構文解析が楽しいわけで

最近LALR(1)とか勉強中

とりあえずLR(0)表を作れるようにした

参考文献
http://nicosia.is.s.u-tokyo.ac.jp/pub/staff/hagiya/kougiroku/compiler/lalr1
http://nicosia.is.s.u-tokyo.ac.jp/pub/staff/hagiya/kougiroku/compiler/lr

このコードにはバグが含まれています(>_<)

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import itertools


class Grammer(object):
    start = '$start'
    end = '$end'
    def __init__(self, term, nonterm, productions, start):
        self.terminals = term
        self.nonterminals = nonterm
        self.productions_dict = {}
        self.productions = [Production(self.start, (start, self.end))]
        for left, rights in productions.iteritems():
            self.productions.extend([Production(left, right) for right in rights])
        for p in self.productions:
            if self.productions_dict.has_key(p.left):
                self.productions_dict[p.left].append(p)
            else:
                self.productions_dict[p.left] = [p]
        
    def __str__(self):
        str_terms = 'Sigma = { %s }' % (', '.join(self.terminals))
        str_nonterms = 'N = { %s }' % (', '.join(self.nonterminals))
        str_p = 'P = {\n\t%s\n}' % '\n\t'.join([str(p) for p in self.productions])
        str_start = 'start = %s' % (self.start, )
        return '\n'.join((str_terms, str_nonterms, str_p, str_start))

    def __repr__(self):
        return self.__str__()


class Production(object):
    def __init__(self, left, right):
        self.left = left
        self.right = right
    
    def __str__(self):
        return '%s -> %s' % (self.left, ' '.join(self.right))

    def __repr__(self):
        return self.__str__()

    def __eq__(self, other):
        return self.left == other.left and self.right and other.right

    def __ne__(self, other):
        return not (self == other)
        

class LRItem(object):
    def __init__(self, production, readed=0):
        self.production = production
        self.readed = readed

    def __str__(self):
        str_seq = [self.production.left, '->']
        for i in range(len(self.production.right)):
            if i == self.readed:
                str_seq.append('.')
            str_seq.append(self.production.right[i])
        if self.readed == len(self.production.right):
            str_seq.append('.')
        return ' '.join(str_seq)

    def __repr__(self):
        return self.__str__()

    def __eq__(self, other):
        return self.readed == other.readed and self.production == other.production

    def __ne__(self, other):
        return not (self == other)

    def nextSymbol(self):
        try:
            return self.production.right[self.readed]
        except IndexError:
            return None

    def transit(self, symbol):
        if self.nextSymbol() == symbol:
            return LRItem(self.production, self.readed + 1)
        return None


class LR(object):
    def __init__(self, grammer):
        self.debug = 1
        self.grammer = grammer
        self.goto = {}
        self.states = {}
        self.init_state = ((LRItem(self.grammer.productions_dict[self.grammer.start][0], 0), ))
        self.calcAllGoto()
        
    def closure(self, item):
        c = []
        sub_c = []
        next_symbol = item.nextSymbol()
        if next_symbol is None or next_symbol not in self.grammer.nonterminals:
            return ()
        else:
            for p in self.grammer.productions_dict[next_symbol]:
                new_item = LRItem(p, 0)
                c.append(new_item)
                if p.left != p.right[0]:
                    sub_c.extend(self.closure(new_item))
            c.extend(sub_c)
        return tuple(c)

    def calcAllGoto(self):
        states = (self.init_state, )
        count = 0
        while True:
            new_states = []
            for stat in states:
                # self.states['I%d' % count] = stat
                if self.debug > 0:
                    print 'I%d = %s' % (count, stat)
                if stat not in self.goto.keys():
                    self.goto[stat] = {}
                for sym in itertools.chain(self.grammer.nonterminals, self.grammer.terminals):
                    new_stat = self.calcGoto(stat, sym)
                    self.goto[stat][sym] = new_stat
                    #if not self.goto.has_key(new_stat):
                    if new_stat not in self.goto.keys() and new_stat is not None:
                        new_states.append(new_stat)
                count += 1
            if not new_states:
                break
            if self.debug > 0:
                print '----'
            states = new_states

    def calcGoto(self, state, symbol):
        if not self.goto.has_key(state):
            self.goto[state] = {}
        c = [x for x in itertools.chain(*[self.closure(i) for i in state])]
        new_stat = tuple(x for x in
                         [i.transit(symbol)
                          for i in itertools.chain(state, c)
                          ] if x is not None)
        if new_stat:
            return new_stat
        return None


def main():
    terminals = ['+', '*', '(', ')', '1']
    nonterminals = ['E', 'T', 'F']
    productions = { 'E': (('E', '+', 'T', ), ('T', ), ),
                    'T': (('T', '*', 'F', ), ('F', ), ),
                    'F': (('(', 'E', ')', ), ('1', ), ),                        
                    }
    start = 'E'

    lr = LR(Grammer(terminals, nonterminals, productions, start))
    # print lr.goto

if __name__ == '__main__':
    main()


今度解説する

LALRのためにLookahead setもつくれるようにする