def SplitProductions(productions):
    ret = []
    for production in productions:
        if '|' in production:
            head = production[:len("N -> ")]
            ret += [head + sub.strip() + "$" for sub in production[len(head):].split('|')]
        else:
            ret += [production + "$"]

    return ret

def GetLR0(production):
    head = production[:len("N -> ")]
    tail = production[len(head):-1] # except $

    items = [head + tail[:pos] + '.' + tail[pos:] + '$'
            for pos in range(len(tail))]
    return items

def GetNewStartProduction(productions):
    return '0 -> %c$' % productions[0][0]

def GetMarkSymbol(item):
    pos = item.find('.')
    return item[pos + 1]

def IsNonterminal(symbol):
    return symbol.isupper()

def Closure(item, productions):
    # add self
    ret = [item]

    markSymbol = GetMarkSymbol(item)

    if IsNonterminal(markSymbol) and markSymbol[0] != item[0]:
        relationProductions = [production
                for production in productions
                    if production[0] == markSymbol]

        # add relation production closures
        relationClosures = [Closure(GetLR0(production)[0], productions)
            for production in relationProductions]

        for relationClosure in relationClosures:
            ret += relationClosure

    return ret

def Goto(items, nextToken, productions):
    movableItems = [item
        for item in items
            if item[item.find('.') + 1] == nextToken]

    nextItems = []

    ret = []
    for item in movableItems:
        curPos = item.find('.')
        nextPos = curPos + 1
        nextItem = item[:curPos] + item[nextPos] + '.' + item[nextPos + 1:]
        ret += Closure(nextItem, productions)

    return ret

def GetMarkSymbols(items):
    markSymbols = [GetMarkSymbol(item) for item in items]

    ret = []
    for markSymbol in markSymbols:
        if markSymbol in ret:
            pass
        else:
            ret.append(markSymbol)

    return ret

class Node:
    index = 0

    def __init__(self, items, productions, gotoList):
        gotoList.append(self)

        self.name = "I%d" % (Node.index)
        Node.index += 1

        self.items = items

        self.nextTokens = [markSymbol
            for markSymbol in GetMarkSymbols(items)
                if markSymbol != '$']

        print self.name, self.items

    def FindSameNode(self, items, gotoList):
        for node in gotoList:
            if node.items == items:
                return node
        return None

    def MakeChildren(self, stack, productions, gotoList):
        print self.name, "MakeChildren"
        gotos = [(nextToken, Goto(self.items, nextToken, productions))
            for nextToken in self.nextTokens]

        self.children = []
        for nextToken, goto in gotos:
            sameNode = self.FindSameNode(goto, gotoList)
            if sameNode:
                print "FoundSameNode! Redirect[%s]: %s" % (sameNode.name, goto)
                self.children.append((nextToken, sameNode))
            else:
                child = Node(goto, productions, gotoList)
                self.children.append((nextToken, child))
                stack.append(child)
        #raw_input()


def MakeTransitionGraph(productions):
    gotoList = []

    productions = SplitProductions(productions)
    productions = [GetNewStartProduction(productions)] + productions

    newStartProduction = GetLR0(productions[0])[0]

    print productions
    argmentedProductions = Closure(newStartProduction, productions)
    rootNode = Node(argmentedProductions, productions, gotoList)

    stack = [rootNode]

    while stack:
        parent = stack.pop(0)
        parent.MakeChildren(stack, productions, gotoList)

    return rootNode

"""
productions = [
    "E -> E+T | T",
    "T -> T*F | F",
    "F -> (E) | a",
]
"""
productions = [
    "S -> (L) | a",
    "L -> L,S | S",
]


print "-" * 80

MakeTransitionGraph(productions)

결과보기

more..


이올린에 북마크하기(0) 이올린에 추천하기(0)
2007/10/13 13:53 2007/10/13 13:53

컴파일러 LL 파싱 FOLLOW 구하기

Posted at 2007/09/30 15:50// Posted in study/compiler
TERMINAL = "abcdefghijklmnopqrstuvwxyz.+*()"

def RING_SUM(a, b):
    if "." in a:
        a_except_upsilon = [] + a
        a_except_upsilon.remove(".")
        return a_except_upsilon + b
    else:
        return a


def UNION(a, b):
    for e in b:
        if e in a:
            pass
        else:
            a.append(e)

    a.sort()

    return a

def FIRST(rules):
    firstDict = {}

    # step 1
    for rule in rules:
        var, eq, stmt = rule.split()
        firstDict[var] = []


    # step 2
    for rule in rules:
        var, eq, stmt = rule.split()
        if stmt[0] in TERMINAL:
            firstDict[var] = UNION(firstDict[var], [stmt[0]])


    # step 3
    while True:
        oldRepr = repr(firstDict)

        for rule in rules:
            var, eq, stmt = rule.split()
            if stmt[0] in TERMINAL:
                pass
            else:
                oldFirst = firstDict[var]

                addFirst = firstDict[stmt[0]]
                for next in stmt[1:]:
                    addFirst = RING_SUM(addFirst, firstDict.get(next, [next]))

                newFirst = UNION(oldFirst, addFirst)

                firstDict[var] = newFirst

        newRepr = repr(firstDict)
        if oldRepr == newRepr:
            break

    return firstDict

def UNION_EXCEPT_UPSILON(a, b):
    if "." in b:
        b_except_upsilon = [] + b
        b_except_upsilon.remove(".")
        return UNION(a, b_except_upsilon)
    else:
        return UNION(a, b)

def PRINT_FOLLOW(rules):
    firstDict = FIRST(rules)


    followDict = {}

    # step1 시작 심벌만 $ 로 초기화
    for idx, rule in enumerate(rules):
        var, eq, stmt = rule.split()

        if idx == 0:
            followDict[var] = ["$"]
        else:
            followDict[var] = []


    print followDict

    # step 2 A = ?BN 모양을 찾은 다음

    # FOLLOW(B) 에 입실론_제외_FIRST(N) 을 합한다
    for rule in rules:
        var, eq, stmt = rule.split()

        for nonTerminal in firstDict:
            nonTerminalPos = stmt.find(nonTerminal)
            if nonTerminalPos >= 0:
                nonTerminalNextPos = nonTerminalPos + 1
                if nonTerminalNextPos < len(stmt):
                    nextChar = stmt[nonTerminalNextPos]
                    nextFirst = firstDict.get(nextChar, [nextChar])

                    if "." == nextFirst:
                        pass
                    else:
                        followDict[nonTerminal] = UNION_EXCEPT_UPSILON(followDict[nonTerminal], nextFirst)

    print followDict

    # step 3 A = B or
    #        A = ?B or
    #        A = ?BN 이고 FIRST(N) 에 입실론이 있을 경우
    #        FOLLOW(B) 에 FOLLOW(A) 를 합한다
    while True:
        oldRepr = repr(followDict)

        for rule in rules:
            var, eq, stmt = rule.split()

            for nonTerminal in firstDict:
                nonTerminalPos = stmt.find(nonTerminal)
                if nonTerminalPos >= 0:
                    nonTerminalNextPos = nonTerminalPos + 1
                    if nonTerminalNextPos < len(stmt):
                        nextChar = stmt[nonTerminalNextPos]
                        nextFirst = firstDict.get(nextChar, [nextChar])

                        if "." in nextFirst:
                            #print "upsilon in follow"
                            pass
                        else:
                            continue
                    else:
                        #print "not exist follow"
                        pass

                    followDict[nonTerminal] = UNION(followDict[nonTerminal], followDict[var])

        newRepr = repr(followDict)

        if oldRepr == newRepr:
            break

        print followDict


rules = [
    "E = TX",
    "X = +TX",
    "X = .",
    "T = FY",
    "Y = *FY",
    "Y = .",
    "F = (E)",
    "F = a",
]

PRINT_FOLLOW(rules)

결과

{'Y': [], 'X': [], 'E': ['$'], 'T': [], 'F': []}
{'Y': [], 'X': [], 'E': ['$', ')'], 'T': ['+'], 'F': ['*']}
{'Y': ['$', ')', '+'], 'X': ['$', ')'], 'E': ['$', ')'], 'T': ['$', ')', '+'], 'F': ['$', ')', '*', '+']}
Hit any key to close this window...

이올린에 북마크하기(0) 이올린에 추천하기(0)
2007/09/30 15:50 2007/09/30 15:50

컴파일러 LL 파싱 FIRST 구하기

Posted at 2007/09/30 14:30// Posted in study/compiler
TERMINAL = "abcdefghijklmnopqrstuvwxyz.+*()"

def RING_SUM(a, b):
    if "." in a:
        a_except_upsilon = [] + a
        a_except_upsilon.remove(".")
        return a_except_upsilon + b
    else:
        return a

def UNION(a, b):
    for e in b:
        if e in a:
            pass
        else:
            a.append(e)

    a.sort()

    return a

def PRINT_FIRST(rules):
    firstDict = {}

    # step 1
    for rule in rules:
        var, eq, stmt = rule.split()
        firstDict[var] = []

    print firstDict

    # step 2
    for rule in rules:
        var, eq, stmt = rule.split()
        if stmt[0] in TERMINAL:
            firstDict[var] = UNION(firstDict[var], [stmt[0]])

    print firstDict

    # step 3
    while True:
        oldRepr = repr(firstDict)

        for rule in rules:
            var, eq, stmt = rule.split()
            if stmt[0] in TERMINAL:
                pass
            else:
                oldFirst = firstDict[var]

                addFirst = firstDict[stmt[0]]
                for next in stmt[1:]:
                    addFirst = RING_SUM(addFirst, firstDict.get(next, [next]))

                newFirst = UNION(oldFirst, addFirst)

                print "update", var, newFirst
                firstDict[var] = newFirst

        newRepr = repr(firstDict)
        if oldRepr == newRepr:
            break

    firstDict = firstDict
    print firstDict

rules = [
    "E = TX",
    "X = +TX",
    "X = .",
    "T = FY",
    "Y = *FY",
    "Y = .",
    "F = (E)",
    "F = a",
]

PRINT_FIRST(rules)


결과
{'Y': [], 'X': [], 'E': [], 'T': [], 'F': []}
{'Y': ['*', '.'], 'X': ['+', '.'], 'E': [], 'T': [], 'F': ['(', 'a']}
update E []
update T ['(', 'a']
update E ['(', 'a']
update T ['(', 'a']
update E ['(', 'a']
update T ['(', 'a']
{'Y': ['*', '.'], 'X': ['+', '.'], 'E': ['(', 'a'], 'T': ['(', 'a'], 'F': ['(', 'a']}

이올린에 북마크하기(0) 이올린에 추천하기(0)
2007/09/30 14:30 2007/09/30 14:30