普通文本  |  311行  |  9.51 KB

"""ANTLR3 runtime package"""

# begin[licence]
#
# [The "BSD licence"]
# Copyright (c) 2005-2012 Terence Parr
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
#    derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# end[licence]

from .constants import DEFAULT_CHANNEL, EOF, INVALID_TOKEN_TYPE

############################################################################
#
# basic token interface
#
############################################################################

class Token(object):
    """@brief Abstract token baseclass."""

    TOKEN_NAMES_MAP = None

    @classmethod
    def registerTokenNamesMap(cls, tokenNamesMap):
        """@brief Store a mapping from token type to token name.
        
        This enables token.typeName to give something more meaningful
        than, e.g., '6'.
        """
        cls.TOKEN_NAMES_MAP = tokenNamesMap
        cls.TOKEN_NAMES_MAP[EOF] = "EOF"

    def __init__(self, type=None, channel=DEFAULT_CHANNEL, text=None,
                 index=-1, line=0, charPositionInLine=-1, input=None):
        # We use -1 for index and charPositionInLine as an invalid index
        self._type = type
        self._channel = channel
        self._text = text
        self._index = index
        self._line = 0
        self._charPositionInLine = charPositionInLine
        self.input = input

    # To override a property, you'll need to override both the getter and setter.
    @property
    def text(self):
        return self._text

    @text.setter
    def text(self, value):
        self._text = value


    @property
    def type(self):
        return self._type

    @type.setter
    def type(self, value):
        self._type = value

    # For compatibility
    def getType(self):
        return self._type

    @property
    def typeName(self):
        if self.TOKEN_NAMES_MAP:
            return self.TOKEN_NAMES_MAP.get(self._type, "INVALID_TOKEN_TYPE")
        else:
            return str(self._type)
    
    @property
    def line(self):
        """Lines are numbered 1..n."""
        return self._line

    @line.setter
    def line(self, value):
        self._line = value


    @property
    def charPositionInLine(self):
        """Columns are numbered 0..n-1."""
        return self._charPositionInLine

    @charPositionInLine.setter
    def charPositionInLine(self, pos):
        self._charPositionInLine = pos


    @property
    def channel(self):
        return self._channel

    @channel.setter
    def channel(self, value):
        self._channel = value


    @property
    def index(self):
        """
        An index from 0..n-1 of the token object in the input stream.
        This must be valid in order to use the ANTLRWorks debugger.
        """
        return self._index

    @index.setter
    def index(self, value):
        self._index = value


    def getInputStream(self):
        """@brief From what character stream was this token created.

        You don't have to implement but it's nice to know where a Token
        comes from if you have include files etc... on the input."""

        raise NotImplementedError

    def setInputStream(self, input):
        """@brief From what character stream was this token created.

        You don't have to implement but it's nice to know where a Token
        comes from if you have include files etc... on the input."""

        raise NotImplementedError


############################################################################
#
# token implementations
#
# Token
# +- CommonToken
# \- ClassicToken
#
############################################################################

class CommonToken(Token):
    """@brief Basic token implementation.

    This implementation does not copy the text from the input stream upon
    creation, but keeps start/stop pointers into the stream to avoid
    unnecessary copy operations.

    """

    def __init__(self, type=None, channel=DEFAULT_CHANNEL, text=None,
                 input=None, start=None, stop=None, oldToken=None):

        if oldToken:
            super().__init__(oldToken.type, oldToken.channel, oldToken.text,
                             oldToken.index, oldToken.line,
                             oldToken.charPositionInLine, oldToken.input)
            if isinstance(oldToken, CommonToken):
                self.start = oldToken.start
                self.stop = oldToken.stop
            else:
                self.start = start
                self.stop = stop

        else:
            super().__init__(type=type, channel=channel, input=input)

            # We need to be able to change the text once in a while.  If
            # this is non-null, then getText should return this.  Note that
            # start/stop are not affected by changing this.
            self._text = text

            # The char position into the input buffer where this token starts
            self.start = start

            # The char position into the input buffer where this token stops
            # This is the index of the last char, *not* the index after it!
            self.stop = stop


    @property
    def text(self):
        # Could be the empty string, and we want to return that.
        if self._text is not None:
            return self._text

        if not self.input:
            return None

        if self.start < self.input.size() and self.stop < self.input.size():
            return self.input.substring(self.start, self.stop)

        return '<EOF>'

    @text.setter
    def text(self, value):
        """
        Override the text for this token.  getText() will return this text
        rather than pulling from the buffer.  Note that this does not mean
        that start/stop indexes are not valid.  It means that that input
        was converted to a new string in the token object.
        """
        self._text = value


    def getInputStream(self):
        return self.input

    def setInputStream(self, input):
        self.input = input


    def __str__(self):
        if self.type == EOF:
            return "<EOF>"

        channelStr = ""
        if self.channel > 0:
            channelStr = ",channel=" + str(self.channel)

        txt = self.text
        if txt:
            # Put 2 backslashes in front of each character
            txt = txt.replace("\n", r"\\n")
            txt = txt.replace("\r", r"\\r")
            txt = txt.replace("\t", r"\\t")
        else:
            txt = "<no text>"

        return ("[@{0.index},{0.start}:{0.stop}={txt!r},"
                "<{0.typeName}>{channelStr},"
                "{0.line}:{0.charPositionInLine}]"
                .format(self, txt=txt, channelStr=channelStr))


class ClassicToken(Token):
    """@brief Alternative token implementation.

    A Token object like we'd use in ANTLR 2.x; has an actual string created
    and associated with this object.  These objects are needed for imaginary
    tree nodes that have payload objects.  We need to create a Token object
    that has a string; the tree node will point at this token.  CommonToken
    has indexes into a char stream and hence cannot be used to introduce
    new strings.
    """

    def __init__(self, type=None, text=None, channel=DEFAULT_CHANNEL,
                 oldToken=None):
        if oldToken:
            super().__init__(type=oldToken.type, channel=oldToken.channel,
                             text=oldToken.text, line=oldToken.line,
                             charPositionInLine=oldToken.charPositionInLine)

        else:
            super().__init__(type=type, channel=channel, text=text,
                             index=None, line=None, charPositionInLine=None)


    def getInputStream(self):
        return None

    def setInputStream(self, input):
        pass


    def toString(self):
        channelStr = ""
        if self.channel > 0:
            channelStr = ",channel=" + str(self.channel)

        txt = self.text
        if not txt:
            txt = "<no text>"

        return ("[@{0.index!r},{txt!r},<{0.type!r}>{channelStr},"
                "{0.line!r}:{0.charPositionInLine!r}]"
                .format(self, txt=txt, channelStr=channelStr))

    __str__ = toString
    __repr__ = toString


INVALID_TOKEN = CommonToken(type=INVALID_TOKEN_TYPE)

# In an action, a lexer rule can set token to this SKIP_TOKEN and ANTLR
# will avoid creating a token for this symbol and try to fetch another.
SKIP_TOKEN = CommonToken(type=INVALID_TOKEN_TYPE)