Description later. Check svn for current version. Should be refactored to multiple files.
1
2 #!/usr/bin/env python2.5
3 from copy import deepcopy
4 ## utilities ##
5 def partition(f, l):
6 good = []; bad = []
7 for x in l:
8 if f(x):
9 good.append(x)
10 else:
11 bad.append(x)
12 return good, bad
13 ## grammars ##
14 def parse_grammar(g):
15 """Ironically, it would be a good idea to have a Real Parser here"""
16 g2 = {}
17 for sym,rule in g.items():
18 alternatives = map(str.strip, rule.split("|"))
19 g2[sym] = map(str.split, alternatives)
20 return g2
21 def issymbol(s):
22 return s[0].isupper()
23 def isliteral(s):
24 return not issymbol(s)
25 def isempty(s): #Some better representation is needed.
26 return s==["e"]
27 ## Convert to CNF (should probably be in cnf.py) ###
28 def cnf(g, start="S"):
29 remove_empty(g)
30 remove_unit(g)
31 remove_non_productive(g)
32 remove_non_reachable(g, start)
33 normalise(g)
34
35 def remove_empty(g):
36 """Right now, this only works if
37 (1) the e production is the only thing in its alternate
38 (2) None of the symbols are the empty string.
39 But you'd never do a thing like that."""
40 while True:
41 empty_sym = find_empty(g)
42 if empty_sym:
43 inline_empty(g, empty_sym)
44 else:
45 break
46 return g # only for debugging, this is a destructive operation
47 def find_empty(g):
48 for sym,rule in g.items():
49 for alt in rule:
50 if isempty(alt):
51 return sym
52 return None
53 def contains_symbol(sym):
54 "Check to see if some alternate contains a symbol"
55 def check_alt(alt):
56 return sym in alt
57 return check_alt
58 def inline_empty(g, empty_sym):
59 g[empty_sym].remove(["e"])
60 for sym,rule in g.items():
61 needs_fixed, ok = partition(contains_symbol(empty_sym), rule)
62 g[sym] = ok + inline_rule(needs_fixed, empty_sym)
63 remove_empty_rule(g, empty_sym)
64 def inline_rule(needs_fixed, empty_sym):
65 acc = []
66 for alt in needs_fixed:
67 for i,sym in enumerate(alt):
68 if sym==empty_sym:
69 if alt not in acc: acc.append(alt)
70 copy = list(alt)
71 del copy[i]
72 if copy != [] and copy not in acc: acc.append(copy)
73 return acc
74 def remove_empty_rule(g, empty_sym):
75 if g[empty_sym]==[]: # truly empty
76 del g[empty_sym]
77 for sym,rule in g.items():
78 acc = []
79 for alt in rule:
80 # propogate true empties upward
81 if alt==[empty_sym]:
82 acc.append(["e"])
83 # otherwise only keep alts that don't use the empty symbol
84 elif empty_sym not in alt:
85 acc.append(alt)
86 g[sym] = acc
87
88 def remove_unit(g):
89 # TODO: Remember to check for rules like A -> A
90 while True:
91 unit_symbol = find_unit(g)
92 if unit_symbol:
93 inline_unit(g, *unit_symbol)
94 else:
95 break
96 return g # only for debugging, this is a destructive operation
97 def find_unit(g):
98 for sym,rule in g.items():
99 for alt in rule:
100 if isunit(alt):
101 return sym, alt[0]
102 return None
103 def inline_unit(g, unit_sym, replacement_sym):
104 g[unit_sym].remove([replacement_sym])
105 g[unit_sym].extend(g[replacement_sym])
106 def isunit(alt):
107 return len(alt)==1 and issymbol(alt[0])
108
109 def remove_non_productive(g):
110 productive = set()
111 previous_size = -1
112 while len(productive) > previous_size:
113 previous_size = len(productive)
114 mark_productives(g, productive)
115 subset_grammar(g, productive)
116 def subset_grammar(g, subset):
117 for remove in set(g) - subset:
118 del g[remove]
119 def mark_productives(g, productive):
120 for sym,rule in g.items():
121 for alt in rule:
122 if any(isliteral(term) or term in productive for term in alt):
123 productive.add(sym)
124 break
125
126 def remove_non_reachable(g, start):
127 reachable = set([start])
128 def traverse(sym):
129 for alt in g[sym]:
130 for term in alt:
131 if term not in reachable and issymbol(term):
132 reachable.add(term)
133 traverse(sym)
134 traverse(start)
135 subset_grammar(g, reachable)
136
137 def normalise(g):
138 #newrules = []
139 for sym,rule in g.items():
140 for alt in rule:
141 if len(alt) > 2:
142 new = make_new_rules(sym, alt)
143 g[sym].remove(alt)
144 g[sym] += new[0][1] # the first alternate
145 g.update(new[1:])
146 #newrules.extend(new[1:])
147 #print newrules
148 #g.update(newrules)
149 #print newrules[0][0] in g
150 #return g
151 def make_new_rules(sym, alt):
152 newnames = [new_rule_name(sym, alt[0], i) for i in range(len(alt) - 2)]
153 return [(sym,[[term,next]]) for sym,term,next # sym -> term next
154 in zip([sym] + newnames, alt, newnames + [alt[-1]])]
155 def new_rule_name(original, *extras):
156 return original + '.' + '.'.join(map(str, extras))
157 ## test grammars ##
158 math = parse_grammar(dict(S = "Expr",
159 Expr = "Expr + Term | Term",
160 Term = "Term x Factor | Factor",
161 Factor = "( Expr ) | i"))
162 numbers = parse_grammar(dict(Number = "Integer | Real",
163 Integer = "Digit | Integer Digit",
164 Real = "Integer Fraction Scale",
165 Fraction = ". Integer",
166 Scale = "x Sign Integer | Empty",
167 Digit = "0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9",
168 Empty = "e",
169 Sign = "+ | -"))
170 tricky = parse_grammar(dict(S = "L a M",
171 L = "L M | e",
172 M = "M M | e"))
173 cyclic = parse_grammar(dict(A = "B",
174 B = "C | b",
175 C = "A"))
176 cyclic_only = parse_grammar(dict(A = "B", # this grammar has no terminals
177 B = "C",
178 C = "A"))
179 useless = parse_grammar(dict(A = "B | foo",
180 B = "bar",
181 C = "baz",
182 Z = "qux"))
183 pretty_flat = parse_grammar(dict(A = "B | C | a",
184 B = "A A B C",
185 C = "B B B"))
186 tricky_no_empty = deepcopy(tricky)
187 remove_empty(tricky_no_empty)
188 num = deepcopy(numbers)
189 # cnf(num, 'Number')
