普通文本  |  940行  |  21.81 KB

import unittest
import textwrap
import antlr3
import antlr3.tree
import testbase
import sys

class T(testbase.ANTLRTest):
    def parserClass(self, base):
        class TParser(base):
            def __init__(self, *args, **kwargs):
                super().__init__(*args, **kwargs)

                self._output = ""


            def capture(self, t):
                self._output += t


            def traceIn(self, ruleName, ruleIndex):
                self.traces.append('>'+ruleName)


            def traceOut(self, ruleName, ruleIndex):
                self.traces.append('<'+ruleName)


            def recover(self, input, re):
                # no error recovery yet, just crash!
                raise

        return TParser


    def lexerClass(self, base):
        class TLexer(base):
            def __init__(self, *args, **kwargs):
                super().__init__(*args, **kwargs)

                self._output = ""


            def capture(self, t):
                self._output += t


            def traceIn(self, ruleName, ruleIndex):
                self.traces.append('>'+ruleName)


            def traceOut(self, ruleName, ruleIndex):
                self.traces.append('<'+ruleName)


            def recover(self, input, re):
                # no error recovery yet, just crash!
                raise

        return TLexer


    def execParser(self, grammar, grammarEntry, input):
        lexerCls, parserCls = self.compileInlineGrammar(grammar)

        cStream = antlr3.StringStream(input)
        lexer = lexerCls(cStream)
        tStream = antlr3.CommonTokenStream(lexer)
        parser = parserCls(tStream)
        r = getattr(parser, grammarEntry)()

        if r:
            return r.tree.toStringTree()

        return ""


    def execTreeParser(self, grammar, grammarEntry, treeGrammar, treeEntry, input):
        lexerCls, parserCls = self.compileInlineGrammar(grammar)
        walkerCls = self.compileInlineGrammar(treeGrammar)

        cStream = antlr3.StringStream(input)
        lexer = lexerCls(cStream)
        tStream = antlr3.CommonTokenStream(lexer)
        parser = parserCls(tStream)
        r = getattr(parser, grammarEntry)()
        nodes = antlr3.tree.CommonTreeNodeStream(r.tree)
        nodes.setTokenStream(tStream)
        walker = walkerCls(nodes)
        r = getattr(walker, treeEntry)()

        if r:
            return r.tree.toStringTree()

        return ""


    # PARSERS -- AUTO AST

    def testToken(self):
        grammar = textwrap.dedent(
        r'''
        grammar T1;
        options {
            language=Python3;
            output=AST;
        }
        @header {
        class V(CommonTree):
            def toString(self):
                return self.token.text + "<V>"
            __str__ = toString

        }
        a : ID<V> ;
        ID : 'a'..'z'+ ;
        WS : (' '|'\n') {$channel=HIDDEN;} ;
        ''')

        found = self.execParser(
            grammar, 'a',
            input="a"
            )

        self.assertEqual("a<V>", found)


    def testTokenCommonTree(self):
        grammar = textwrap.dedent(
            r'''
            grammar T;
            options {
                language=Python3;
                output=AST;
            }
            a : ID<CommonTree> ;
            ID : 'a'..'z'+ ;
            WS : (' '|'\n') {$channel=HIDDEN;} ;
            ''')

        found = self.execParser(
            grammar, 'a',
            input="a")

        self.assertEqual("a", found)


    def testTokenWithQualifiedType(self):
        grammar = textwrap.dedent(
            r'''
            grammar T;
            options {
                language=Python3;
                output=AST;
            }
            @members {
            class V(CommonTree):
                def toString(self):
                    return self.token.text + "<V>"
                __str__ = toString
            }
            a : ID<TParser.V> ; // TParser.V is qualified name
            ID : 'a'..'z'+ ;
            WS : (' '|'\n') {$channel=HIDDEN;} ;
            ''')

        found = self.execParser(
            grammar, 'a',
            input="a"
            )

        self.assertEqual("a<V>", found)


    def testNamedType(self):
        grammar = textwrap.dedent(
            r"""
            grammar $T;
            options {
                language=Python3;
                output=AST;
            }
            @header {
            class V(CommonTree):
                def toString(self):
                    return self.token.text + "<V>"
                __str__ = toString
            }
            a : ID<node=V> ;
            ID : 'a'..'z'+ ;
            WS : (' '|'\\n') {$channel=HIDDEN;} ;
            """)

        found = self.execParser(grammar, 'a', input="a")
        self.assertEqual("a<V>", found)


    def testTokenWithLabel(self):
        grammar = textwrap.dedent(
        r'''
        grammar T2;
        options {
            language=Python3;
            output=AST;
        }
        @header {
        class V(CommonTree):
            def toString(self):
                return self.token.text + "<V>"
            __str__ = toString

        }
        a : x=ID<V> ;
        ID : 'a'..'z'+ ;
        WS : (' '|'\n') {$channel=HIDDEN;} ;
        ''')

        found = self.execParser(
            grammar, 'a',
            input="a"
            )

        self.assertEqual("a<V>", found)


    def testTokenWithListLabel(self):
        grammar = textwrap.dedent(
        r'''
        grammar T3;
        options {
            language=Python3;
            output=AST;
        }
        @header {
        class V(CommonTree):
            def toString(self):
                return self.token.text + "<V>"
            __str__ = toString

        }
        a : x+=ID<V> ;
        ID : 'a'..'z'+ ;
        WS : (' '|'\n') {$channel=HIDDEN;} ;
        ''')

        found = self.execParser(
            grammar, 'a',
            input="a"
            )

        self.assertEqual("a<V>", found)


    def testTokenRoot(self):
        grammar = textwrap.dedent(
        r'''
        grammar T4;
        options {
            language=Python3;
            output=AST;
        }
        @header {
        class V(CommonTree):
            def toString(self):
                return self.token.text + "<V>"
            __str__ = toString

        }
        a : ID<V>^ ;
        ID : 'a'..'z'+ ;
        WS : (' '|'\n') {$channel=HIDDEN;} ;
        ''')

        found = self.execParser(
            grammar, 'a',
            input="a"
            )

        self.assertEqual("a<V>", found)


    def testTokenRootWithListLabel(self):
        grammar = textwrap.dedent(
        r'''
        grammar T5;
        options {
            language=Python3;
            output=AST;
        }
        @header {
        class V(CommonTree):
            def toString(self):
                return self.token.text + "<V>"
            __str__ = toString

        }
        a : x+=ID<V>^ ;
        ID : 'a'..'z'+ ;
        WS : (' '|'\n') {$channel=HIDDEN;} ;
        ''')

        found = self.execParser(
            grammar, 'a',
            input="a"
            )

        self.assertEqual("a<V>", found)


    def testString(self):
        grammar = textwrap.dedent(
        r'''
        grammar T6;
        options {
            language=Python3;
            output=AST;
        }
        @header {
        class V(CommonTree):
            def toString(self):
                return self.token.text + "<V>"
            __str__ = toString

        }
        a : 'begin'<V> ;
        ID : 'a'..'z'+ ;
        WS : (' '|'\n') {$channel=HIDDEN;} ;
        ''')

        found = self.execParser(
            grammar, 'a',
            input="begin"
            )

        self.assertEqual("begin<V>", found)


    def testStringRoot(self):
        grammar = textwrap.dedent(
        r'''
        grammar T7;
        options {
            language=Python3;
            output=AST;
        }
        @header {
        class V(CommonTree):
            def toString(self):
                return self.token.text + "<V>"
            __str__ = toString

        }
        a : 'begin'<V>^ ;
        ID : 'a'..'z'+ ;
        WS : (' '|'\n') {$channel=HIDDEN;} ;
        ''')

        found = self.execParser(
            grammar, 'a',
            input="begin"
            )

        self.assertEqual("begin<V>", found)


    # PARSERS -- REWRITE AST

    def testRewriteToken(self):
        grammar = textwrap.dedent(
        r'''
        grammar T8;
        options {
            language=Python3;
            output=AST;
        }
        @header {
        class V(CommonTree):
            def toString(self):
                return self.token.text + "<V>"
            __str__ = toString

        }
        a : ID -> ID<V> ;
        ID : 'a'..'z'+ ;
        WS : (' '|'\n') {$channel=HIDDEN;} ;
        ''')

        found = self.execParser(
            grammar, 'a',
            input="a"
            )

        self.assertEqual("a<V>", found)


    def testRewriteTokenWithArgs(self):
        grammar = textwrap.dedent(
        r'''
        grammar T9;
        options {
            language=Python3;
            output=AST;
        }
        @header {
        class V(CommonTree):
            def __init__(self, *args):
                if len(args) == 4:
                    ttype = args[0]
                    x = args[1]
                    y = args[2]
                    z = args[3]
                    token = CommonToken(type=ttype, text="")

                elif len(args) == 3:
                    ttype = args[0]
                    token = args[1]
                    x = args[2]
                    y, z = 0, 0

                else:
                    raise TypeError("Invalid args {!r}".format(args))

                super().__init__(token)
                self.x = x
                self.y = y
                self.z = z

            def toString(self):
                txt = ""
                if self.token:
                    txt += self.token.text
                txt +="<V>;{0.x}{0.y}{0.z}".format(self)
                return txt
            __str__ = toString

        }
        a : ID -> ID<V>[42,19,30] ID<V>[$ID,99];
        ID : 'a'..'z'+ ;
        WS : (' '|'\n') {$channel=HIDDEN;} ;
        ''')

        found = self.execParser(
            grammar, 'a',
            input="a"
            )

        self.assertEqual("<V>;421930 a<V>;9900", found)


    def testRewriteTokenRoot(self):
        grammar = textwrap.dedent(
        r'''
        grammar T10;
        options {
            language=Python3;
            output=AST;
        }
        @header {
        class V(CommonTree):
            def toString(self):
                return self.token.text + "<V>"
            __str__ = toString

        }
        a : ID INT -> ^(ID<V> INT) ;
        ID : 'a'..'z'+ ;
        INT : '0'..'9'+ ;
        WS : (' '|'\n') {$channel=HIDDEN;} ;
        ''')

        found = self.execParser(
            grammar, 'a',
            input="a 2"
            )

        self.assertEqual("(a<V> 2)", found)


    def testRewriteString(self):
        grammar = textwrap.dedent(
        r'''
        grammar T11;
        options {
            language=Python3;
            output=AST;
        }
        @header {
        class V(CommonTree):
            def toString(self):
                return self.token.text + "<V>"
            __str__ = toString

        }
        a : 'begin' -> 'begin'<V> ;
        ID : 'a'..'z'+ ;
        WS : (' '|'\n') {$channel=HIDDEN;} ;
        ''')

        found = self.execParser(
            grammar, 'a',
            input="begin"
            )

        self.assertEqual("begin<V>", found)


    def testRewriteStringRoot(self):
        grammar = textwrap.dedent(
        r'''
        grammar T12;
        options {
            language=Python3;
            output=AST;
        }
        @header {
        class V(CommonTree):
            def toString(self):
                return self.token.text + "<V>"
            __str__ = toString

        }
        a : 'begin' INT -> ^('begin'<V> INT) ;
        ID : 'a'..'z'+ ;
        INT : '0'..'9'+ ;
        WS : (' '|'\n') {$channel=HIDDEN;} ;
        ''')

        found = self.execParser(
            grammar, 'a',
            input="begin 2"
            )

        self.assertEqual("(begin<V> 2)", found)

    def testRewriteRuleResults(self):
        grammar = textwrap.dedent(
            r'''
            grammar T;
            options {
                language=Python3;
                output=AST;
            }
            tokens {LIST;}
            @header {
            class V(CommonTree):
                def toString(self):
                    return self.token.text + "<V>"
                __str__ = toString

            class W(CommonTree):
                def __init__(self, tokenType, txt):
                    super().__init__(
                        CommonToken(type=tokenType, text=txt))

                def toString(self):
                    return self.token.text + "<W>"
                __str__ = toString

            }
            a : id (',' id)* -> ^(LIST<W>["LIST"] id+);
            id : ID -> ID<V>;
            ID : 'a'..'z'+ ;
            WS : (' '|'\n') {$channel=HIDDEN;} ;
            ''')

        found = self.execParser(
            grammar, 'a',
            input="a,b,c")

        self.assertEqual("(LIST<W> a<V> b<V> c<V>)", found)

    def testCopySemanticsWithHetero(self):
        grammar = textwrap.dedent(
            r'''
            grammar T;
            options {
                language=Python3;
                output=AST;
            }
            @header {
            class V(CommonTree):
                def dupNode(self):
                    return V(self)

                def toString(self):
                    return self.token.text + "<V>"
                __str__ = toString

            }
            a : type ID (',' ID)* ';' -> ^(type ID)+;
            type : 'int'<V> ;
            ID : 'a'..'z'+ ;
            INT : '0'..'9'+;
            WS : (' '|'\\n') {$channel=HIDDEN;} ;
            ''')

        found = self.execParser(
            grammar, 'a',
            input="int a, b, c;")
        self.assertEqual("(int<V> a) (int<V> b) (int<V> c)", found)

    # TREE PARSERS -- REWRITE AST

    def testTreeParserRewriteFlatList(self):
        grammar = textwrap.dedent(
        r'''
        grammar T13;
        options {
            language=Python3;
            output=AST;
        }
        a : ID INT;
        ID : 'a'..'z'+ ;
        INT : '0'..'9'+;
        WS : (' '|'\n') {$channel=HIDDEN;} ;
        ''')

        treeGrammar = textwrap.dedent(
        r'''
        tree grammar TP13;
        options {
            language=Python3;
            output=AST;
            ASTLabelType=CommonTree;
            tokenVocab=T13;
        }
        @header {
        class V(CommonTree):
            def toString(self):
                return self.token.text + "<V>"
            __str__ = toString

        class W(CommonTree):
            def toString(self):
                return self.token.text + "<W>"
            __str__ = toString

        }
        a : ID INT -> INT<V> ID<W>
          ;
        ''')

        found = self.execTreeParser(
            grammar, 'a',
            treeGrammar, 'a',
            input="abc 34"
            )

        self.assertEqual("34<V> abc<W>", found)


    def testTreeParserRewriteTree(self):
        grammar = textwrap.dedent(
        r'''
        grammar T14;
        options {
            language=Python3;
            output=AST;
        }
        a : ID INT;
        ID : 'a'..'z'+ ;
        INT : '0'..'9'+;
        WS : (' '|'\n') {$channel=HIDDEN;} ;
        ''')

        treeGrammar = textwrap.dedent(
        r'''
        tree grammar TP14;
        options {
            language=Python3;
            output=AST;
            ASTLabelType=CommonTree;
            tokenVocab=T14;
        }
        @header {
        class V(CommonTree):
            def toString(self):
                return self.token.text + "<V>"
            __str__ = toString

        class W(CommonTree):
            def toString(self):
                return self.token.text + "<W>"
            __str__ = toString

        }
        a : ID INT -> ^(INT<V> ID<W>)
          ;
        ''')

        found = self.execTreeParser(
            grammar, 'a',
            treeGrammar, 'a',
            input="abc 34"
            )

        self.assertEqual("(34<V> abc<W>)", found)


    def testTreeParserRewriteImaginary(self):
        grammar = textwrap.dedent(
        r'''
        grammar T15;
        options {
            language=Python3;
            output=AST;
        }
        a : ID ;
        ID : 'a'..'z'+ ;
        INT : '0'..'9'+;
        WS : (' '|'\n') {$channel=HIDDEN;} ;
        ''')

        treeGrammar = textwrap.dedent(
        r'''
        tree grammar TP15;
        options {
            language=Python3;
            output=AST;
            ASTLabelType=CommonTree;
            tokenVocab=T15;
        }
        tokens { ROOT; }
        @header {
        class V(CommonTree):
            def __init__(self, tokenType):
                super().__init__(CommonToken(tokenType))

            def toString(self):
                return tokenNames[self.token.type] + "<V>"
            __str__ = toString


        }
        a : ID -> ROOT<V> ID
          ;
        ''')

        found = self.execTreeParser(
            grammar, 'a',
            treeGrammar, 'a',
            input="abc"
            )

        self.assertEqual("ROOT<V> abc", found)


    def testTreeParserRewriteImaginaryWithArgs(self):
        grammar = textwrap.dedent(
        r'''
        grammar T16;
        options {
            language=Python3;
            output=AST;
        }
        a : ID ;
        ID : 'a'..'z'+ ;
        INT : '0'..'9'+;
        WS : (' '|'\n') {$channel=HIDDEN;} ;
        ''')

        treeGrammar = textwrap.dedent(
        r'''
        tree grammar TP16;
        options {
            language=Python3;
            output=AST;
            ASTLabelType=CommonTree;
            tokenVocab=T16;
        }
        tokens { ROOT; }
        @header {
        class V(CommonTree):
            def __init__(self, tokenType, x):
                super().__init__(CommonToken(tokenType))
                self.x = x

            def toString(self):
                return tokenNames[self.token.type] + "<V>;" + str(self.x)
            __str__ = toString

        }
        a : ID -> ROOT<V>[42] ID
          ;
        ''')

        found = self.execTreeParser(
            grammar, 'a',
            treeGrammar, 'a',
            input="abc"
            )

        self.assertEqual("ROOT<V>;42 abc", found)


    def testTreeParserRewriteImaginaryRoot(self):
        grammar = textwrap.dedent(
        r'''
        grammar T17;
        options {
            language=Python3;
            output=AST;
        }
        a : ID ;
        ID : 'a'..'z'+ ;
        INT : '0'..'9'+;
        WS : (' '|'\n') {$channel=HIDDEN;} ;
        ''')

        treeGrammar = textwrap.dedent(
        r'''
        tree grammar TP17;
        options {
            language=Python3;
            output=AST;
            ASTLabelType=CommonTree;
            tokenVocab=T17;
        }
        tokens { ROOT; }
        @header {
        class V(CommonTree):
            def __init__(self, tokenType):
                super().__init__(CommonToken(tokenType))

            def toString(self):
                return tokenNames[self.token.type] + "<V>"
            __str__ = toString

        }
        a : ID -> ^(ROOT<V> ID)
          ;
        ''')

        found = self.execTreeParser(
            grammar, 'a',
            treeGrammar, 'a',
            input="abc"
            )

        self.assertEqual("(ROOT<V> abc)", found)


    def testTreeParserRewriteImaginaryFromReal(self):
        grammar = textwrap.dedent(
        r'''
        grammar T18;
        options {
            language=Python3;
            output=AST;
        }
        a : ID ;
        ID : 'a'..'z'+ ;
        INT : '0'..'9'+;
        WS : (' '|'\n') {$channel=HIDDEN;} ;
        ''')

        treeGrammar = textwrap.dedent(
        r'''
        tree grammar TP18;
        options {
            language=Python3;
            output=AST;
            ASTLabelType=CommonTree;
            tokenVocab=T18;
        }
        tokens { ROOT; }
        @header {
        class V(CommonTree):
            def __init__(self, tokenType, tree=None):
                if tree is None:
                    super().__init__(CommonToken(tokenType))
                else:
                    super().__init__(tree)
                    self.token.type = tokenType

            def toString(self):
                return tokenNames[self.token.type]+"<V>@"+str(self.token.line)
            __str__ = toString

        }
        a : ID -> ROOT<V>[$ID]
          ;
        ''')

        found = self.execTreeParser(
            grammar, 'a',
            treeGrammar, 'a',
            input="abc"
            )

        self.assertEqual("ROOT<V>@1", found)


    def testTreeParserAutoHeteroAST(self):
        grammar = textwrap.dedent(
            r'''
            grammar T;
            options {
                language=Python3;
                output=AST;
            }
            a : ID ';' ;
            ID : 'a'..'z'+ ;
            INT : '0'..'9'+;
            WS : (' '|'\n') {$channel=HIDDEN;} ;
            ''')

        treeGrammar = textwrap.dedent(
            r'''
            tree grammar TP;
            options {
                language=Python3;
                output=AST;
                ASTLabelType=CommonTree;
                tokenVocab=T;
            }
            tokens { ROOT; }
            @header {
            class V(CommonTree):
                def toString(self):
                    return CommonTree.toString(self) + "<V>"
                __str__ = toString

            }

            a : ID<V> ';'<V>;
            ''')

        found = self.execTreeParser(
            grammar, 'a',
            treeGrammar, 'a',
            input="abc;"
            )

        self.assertEqual("abc<V> ;<V>", found)


if __name__ == '__main__':
    unittest.main()