"""ANTLR3 runtime package""" # begin[licence] # # [The "BSD licence"] # Copyright (c) 2005-2012 Terence Parr # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # 3. The name of the author may not be used to endorse or promote products # derived from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES # OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. # IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT # NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # end[licence] from .constants import DEFAULT_CHANNEL, EOF, INVALID_TOKEN_TYPE ############################################################################ # # basic token interface # ############################################################################ class Token(object): """@brief Abstract token baseclass.""" TOKEN_NAMES_MAP = None @classmethod def registerTokenNamesMap(cls, tokenNamesMap): """@brief Store a mapping from token type to token name. This enables token.typeName to give something more meaningful than, e.g., '6'. """ cls.TOKEN_NAMES_MAP = tokenNamesMap cls.TOKEN_NAMES_MAP[EOF] = "EOF" def __init__(self, type=None, channel=DEFAULT_CHANNEL, text=None, index=-1, line=0, charPositionInLine=-1, input=None): # We use -1 for index and charPositionInLine as an invalid index self._type = type self._channel = channel self._text = text self._index = index self._line = 0 self._charPositionInLine = charPositionInLine self.input = input # To override a property, you'll need to override both the getter and setter. @property def text(self): return self._text @text.setter def text(self, value): self._text = value @property def type(self): return self._type @type.setter def type(self, value): self._type = value # For compatibility def getType(self): return self._type @property def typeName(self): if self.TOKEN_NAMES_MAP: return self.TOKEN_NAMES_MAP.get(self._type, "INVALID_TOKEN_TYPE") else: return str(self._type) @property def line(self): """Lines are numbered 1..n.""" return self._line @line.setter def line(self, value): self._line = value @property def charPositionInLine(self): """Columns are numbered 0..n-1.""" return self._charPositionInLine @charPositionInLine.setter def charPositionInLine(self, pos): self._charPositionInLine = pos @property def channel(self): return self._channel @channel.setter def channel(self, value): self._channel = value @property def index(self): """ An index from 0..n-1 of the token object in the input stream. This must be valid in order to use the ANTLRWorks debugger. """ return self._index @index.setter def index(self, value): self._index = value def getInputStream(self): """@brief From what character stream was this token created. You don't have to implement but it's nice to know where a Token comes from if you have include files etc... on the input.""" raise NotImplementedError def setInputStream(self, input): """@brief From what character stream was this token created. You don't have to implement but it's nice to know where a Token comes from if you have include files etc... on the input.""" raise NotImplementedError ############################################################################ # # token implementations # # Token # +- CommonToken # \- ClassicToken # ############################################################################ class CommonToken(Token): """@brief Basic token implementation. This implementation does not copy the text from the input stream upon creation, but keeps start/stop pointers into the stream to avoid unnecessary copy operations. """ def __init__(self, type=None, channel=DEFAULT_CHANNEL, text=None, input=None, start=None, stop=None, oldToken=None): if oldToken: super().__init__(oldToken.type, oldToken.channel, oldToken.text, oldToken.index, oldToken.line, oldToken.charPositionInLine, oldToken.input) if isinstance(oldToken, CommonToken): self.start = oldToken.start self.stop = oldToken.stop else: self.start = start self.stop = stop else: super().__init__(type=type, channel=channel, input=input) # We need to be able to change the text once in a while. If # this is non-null, then getText should return this. Note that # start/stop are not affected by changing this. self._text = text # The char position into the input buffer where this token starts self.start = start # The char position into the input buffer where this token stops # This is the index of the last char, *not* the index after it! self.stop = stop @property def text(self): # Could be the empty string, and we want to return that. if self._text is not None: return self._text if not self.input: return None if self.start < self.input.size() and self.stop < self.input.size(): return self.input.substring(self.start, self.stop) return '<EOF>' @text.setter def text(self, value): """ Override the text for this token. getText() will return this text rather than pulling from the buffer. Note that this does not mean that start/stop indexes are not valid. It means that that input was converted to a new string in the token object. """ self._text = value def getInputStream(self): return self.input def setInputStream(self, input): self.input = input def __str__(self): if self.type == EOF: return "<EOF>" channelStr = "" if self.channel > 0: channelStr = ",channel=" + str(self.channel) txt = self.text if txt: # Put 2 backslashes in front of each character txt = txt.replace("\n", r"\\n") txt = txt.replace("\r", r"\\r") txt = txt.replace("\t", r"\\t") else: txt = "<no text>" return ("[@{0.index},{0.start}:{0.stop}={txt!r}," "<{0.typeName}>{channelStr}," "{0.line}:{0.charPositionInLine}]" .format(self, txt=txt, channelStr=channelStr)) class ClassicToken(Token): """@brief Alternative token implementation. A Token object like we'd use in ANTLR 2.x; has an actual string created and associated with this object. These objects are needed for imaginary tree nodes that have payload objects. We need to create a Token object that has a string; the tree node will point at this token. CommonToken has indexes into a char stream and hence cannot be used to introduce new strings. """ def __init__(self, type=None, text=None, channel=DEFAULT_CHANNEL, oldToken=None): if oldToken: super().__init__(type=oldToken.type, channel=oldToken.channel, text=oldToken.text, line=oldToken.line, charPositionInLine=oldToken.charPositionInLine) else: super().__init__(type=type, channel=channel, text=text, index=None, line=None, charPositionInLine=None) def getInputStream(self): return None def setInputStream(self, input): pass def toString(self): channelStr = "" if self.channel > 0: channelStr = ",channel=" + str(self.channel) txt = self.text if not txt: txt = "<no text>" return ("[@{0.index!r},{txt!r},<{0.type!r}>{channelStr}," "{0.line!r}:{0.charPositionInLine!r}]" .format(self, txt=txt, channelStr=channelStr)) __str__ = toString __repr__ = toString INVALID_TOKEN = CommonToken(type=INVALID_TOKEN_TYPE) # In an action, a lexer rule can set token to this SKIP_TOKEN and ANTLR # will avoid creating a token for this symbol and try to fetch another. SKIP_TOKEN = CommonToken(type=INVALID_TOKEN_TYPE)