構文解析が楽しいわけで
最近LALR(1)とか勉強中
とりあえずLR(0)表を作れるようにした
- 参考文献
- http://nicosia.is.s.u-tokyo.ac.jp/pub/staff/hagiya/kougiroku/compiler/lalr1
http://nicosia.is.s.u-tokyo.ac.jp/pub/staff/hagiya/kougiroku/compiler/lr
このコードにはバグが含まれています(>_<)
#!/usr/bin/env python # -*- coding: utf-8 -*- import itertools class Grammer(object): start = '$start' end = '$end' def __init__(self, term, nonterm, productions, start): self.terminals = term self.nonterminals = nonterm self.productions_dict = {} self.productions = [Production(self.start, (start, self.end))] for left, rights in productions.iteritems(): self.productions.extend([Production(left, right) for right in rights]) for p in self.productions: if self.productions_dict.has_key(p.left): self.productions_dict[p.left].append(p) else: self.productions_dict[p.left] = [p] def __str__(self): str_terms = 'Sigma = { %s }' % (', '.join(self.terminals)) str_nonterms = 'N = { %s }' % (', '.join(self.nonterminals)) str_p = 'P = {\n\t%s\n}' % '\n\t'.join([str(p) for p in self.productions]) str_start = 'start = %s' % (self.start, ) return '\n'.join((str_terms, str_nonterms, str_p, str_start)) def __repr__(self): return self.__str__() class Production(object): def __init__(self, left, right): self.left = left self.right = right def __str__(self): return '%s -> %s' % (self.left, ' '.join(self.right)) def __repr__(self): return self.__str__() def __eq__(self, other): return self.left == other.left and self.right and other.right def __ne__(self, other): return not (self == other) class LRItem(object): def __init__(self, production, readed=0): self.production = production self.readed = readed def __str__(self): str_seq = [self.production.left, '->'] for i in range(len(self.production.right)): if i == self.readed: str_seq.append('.') str_seq.append(self.production.right[i]) if self.readed == len(self.production.right): str_seq.append('.') return ' '.join(str_seq) def __repr__(self): return self.__str__() def __eq__(self, other): return self.readed == other.readed and self.production == other.production def __ne__(self, other): return not (self == other) def nextSymbol(self): try: return self.production.right[self.readed] except IndexError: return None def transit(self, symbol): if self.nextSymbol() == symbol: return LRItem(self.production, self.readed + 1) return None class LR(object): def __init__(self, grammer): self.debug = 1 self.grammer = grammer self.goto = {} self.states = {} self.init_state = ((LRItem(self.grammer.productions_dict[self.grammer.start][0], 0), )) self.calcAllGoto() def closure(self, item): c = [] sub_c = [] next_symbol = item.nextSymbol() if next_symbol is None or next_symbol not in self.grammer.nonterminals: return () else: for p in self.grammer.productions_dict[next_symbol]: new_item = LRItem(p, 0) c.append(new_item) if p.left != p.right[0]: sub_c.extend(self.closure(new_item)) c.extend(sub_c) return tuple(c) def calcAllGoto(self): states = (self.init_state, ) count = 0 while True: new_states = [] for stat in states: # self.states['I%d' % count] = stat if self.debug > 0: print 'I%d = %s' % (count, stat) if stat not in self.goto.keys(): self.goto[stat] = {} for sym in itertools.chain(self.grammer.nonterminals, self.grammer.terminals): new_stat = self.calcGoto(stat, sym) self.goto[stat][sym] = new_stat #if not self.goto.has_key(new_stat): if new_stat not in self.goto.keys() and new_stat is not None: new_states.append(new_stat) count += 1 if not new_states: break if self.debug > 0: print '----' states = new_states def calcGoto(self, state, symbol): if not self.goto.has_key(state): self.goto[state] = {} c = [x for x in itertools.chain(*[self.closure(i) for i in state])] new_stat = tuple(x for x in [i.transit(symbol) for i in itertools.chain(state, c) ] if x is not None) if new_stat: return new_stat return None def main(): terminals = ['+', '*', '(', ')', '1'] nonterminals = ['E', 'T', 'F'] productions = { 'E': (('E', '+', 'T', ), ('T', ), ), 'T': (('T', '*', 'F', ), ('F', ), ), 'F': (('(', 'E', ')', ), ('1', ), ), } start = 'E' lr = LR(Grammer(terminals, nonterminals, productions, start)) # print lr.goto if __name__ == '__main__': main()
今度解説する
LALRのためにLookahead setもつくれるようにする