antlr3tokenstream.hpp - Android社区 - https://www.androidos.net.cn/

/** \file
 * Defines the interface for an ANTLR3 common token stream. Custom token streams should create
 * one of these and then override any functions by installing their own pointers
 * to implement the various functions.
 */
#ifndef	_ANTLR3_TOKENSTREAM_HPP
#define	_ANTLR3_TOKENSTREAM_HPP

// [The "BSD licence"]
// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB

//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
// 3. The name of the author may not be used to endorse or promote products
//    derived from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include    "antlr3defs.hpp"

/** Definition of a token source, which has a pointer to a function that
 *  returns the next token (using a token factory if it is going to be
 *  efficient) and a pointer to an ANTLR3_INPUT_STREAM. This is slightly
 *  different to the Java interface because we have no way to implement
 *  multiple interfaces without defining them in the interface structure
 *  or casting (void *), which is too convoluted.
 */
ANTLR_BEGIN_NAMESPACE()

//We are not making it subclass AllocPolicy, as this will always be a base class
template<class ImplTraits>
class TokenSource
{
public:
	typedef typename ImplTraits::CommonTokenType TokenType;
	typedef TokenType CommonTokenType;
	typedef typename ImplTraits::StringType StringType;
	typedef typename ImplTraits::LexerType LexerType;

private:
    /** A special pre-allocated token, which signifies End Of Tokens. Because this must
     *  be set up with the current input index and so on, we embed the structure and
     *  return the address of it. It is marked as factoryMade, so that it is never
     *  attempted to be freed.
     */
    TokenType				m_eofToken;

	/// A special pre-allocated token, which is returned by mTokens() if the
	/// lexer rule said to just skip the generated token altogether.
	/// Having this single token stops us wasting memory by have the token factory
	/// actually create something that we are going to SKIP(); anyway.
	///
	TokenType				m_skipToken;

    /** When the token source is constructed, it is populated with the file
     *  name from whence the tokens were produced by the lexer. This pointer is a
     *  copy of the one supplied by the CharStream (and may be NULL) so should
     *  not be manipulated other than to copy or print it.
     */
    StringType				m_fileName;

public:
	TokenType& get_eofToken();
	const TokenType& get_eofToken() const;
	TokenType& get_skipToken();
	StringType& get_fileName();
	LexerType* get_super();

	void set_fileName( const StringType& fileName );

	/**
	 * \brief
	 * Default implementation of the nextToken() call for a lexer.
	 *
	 * \param toksource
	 * Points to the implementation of a token source. The lexer is
	 * addressed by the super structure pointer.
	 *
	 * \returns
	 * The next token in the current input stream or the EOF token
	 * if there are no more tokens in any input stream in the stack.
	 *
	 * Write detailed description for nextToken here.
	 *
	 * \remarks
	 * Write remarks for nextToken here.
	 *
	 * \see nextTokenStr
	 */
    TokenType*  nextToken();
	CommonTokenType* nextToken( BoolForwarder<true> /*isFiltered*/ );
	CommonTokenType* nextToken( BoolForwarder<false> /*isFiltered*/ );

	///
	/// \brief
	/// Returns the next available token from the current input stream.
	///
	/// \param toksource
	/// Points to the implementation of a token source. The lexer is
	/// addressed by the super structure pointer.
	///
	/// \returns
	/// The next token in the current input stream or the EOF token
	/// if there are no more tokens.
	///
	/// \remarks
	/// Write remarks for nextToken here.
	///
	/// \see nextToken
	///
	TokenType*	nextTokenStr();

protected:
	TokenSource();
};

/** Definition of the ANTLR3 common token stream interface.
 * \remark
 * Much of the documentation for this interface is stolen from Ter's Java implementation.
 */
template<class ImplTraits>
class TokenStream  : public ImplTraits::TokenIntStreamType
{
public:
	typedef typename ImplTraits::TokenSourceType TokenSourceType;
	typedef typename ImplTraits::TokenIntStreamType IntStreamType;
	typedef typename ImplTraits::CommonTokenType TokenType;
	typedef TokenType UnitType;
	typedef typename ImplTraits::StringType StringType;
	typedef typename ImplTraits::DebugEventListenerType DebugEventListenerType;
	typedef typename ImplTraits::TokenStreamType TokenStreamType;
	typedef typename ImplTraits::ParserType ComponentType;

protected:
    /** Pointer to the token source for this stream
     */
    TokenSourceType*    m_tokenSource;

	/// Debugger interface, is this is a debugging token stream
	///
	DebugEventListenerType*	m_debugger;

	/// Indicates the initial stream state for dbgConsume()
	///
	bool				m_initialStreamState;

public:
	TokenStream(TokenSourceType* source, DebugEventListenerType* debugger);
	IntStreamType* get_istream();
	TokenSourceType* get_tokenSource() const;
	void set_tokenSource( TokenSourceType* tokenSource );

    /** Get Token at current input pointer + i ahead where i=1 is next Token.
     *  i<0 indicates tokens in the past.  So -1 is previous token and -2 is
     *  two tokens ago. LT(0) is undefined.  For i>=n, return Token.EOFToken.
     *  Return null for LT(0) and any index that results in an absolute address
     *  that is negative.
     */
    const TokenType*  _LT(ANTLR_INT32 k);

    /** Where is this stream pulling tokens from?  This is not the name, but
     *  a pointer into an interface that contains a ANTLR3_TOKEN_SOURCE interface.
     *  The Token Source interface contains a pointer to the input stream and a pointer
     *  to a function that returns the next token.
     */
    TokenSourceType*   getTokenSource();

    /** Function that installs a token source for teh stream
     */
    void	setTokenSource(TokenSourceType*   tokenSource);

    /** Return the text of all the tokens in the stream, as the old tramp in
     *  Leeds market used to say; "Get the lot!"
     */
    StringType	toString();

    /** Return the text of all tokens from start to stop, inclusive.
     *  If the stream does not buffer all the tokens then it can just
     *  return an empty ANTLR3_STRING or NULL;  Grammars should not access $ruleLabel.text in
     *  an action in that case.
     */
    StringType	 toStringSS(ANTLR_MARKER start, ANTLR_MARKER stop);

    /** Because the user is not required to use a token with an index stored
     *  in it, we must provide a means for two token objects themselves to
     *  indicate the start/end location.  Most often this will just delegate
     *  to the other toString(int,int).  This is also parallel with
     *  the pTREENODE_STREAM->toString(Object,Object).
     */
    StringType	 toStringTT(const TokenType* start, const TokenType* stop);


    /** Function that sets the token stream into debugging mode
     */
    void	setDebugListener(DebugEventListenerType* debugger);

	TokenStream();

};

/** Common token stream is an implementation of ANTLR_TOKEN_STREAM for the default
 *  parsers and recognizers. You may of course build your own implementation if
 *  you are so inclined.
 */
template<bool TOKENS_ACCESSED_FROM_OWNING_RULE, class ListType, class MapType>
class TokenStoreSelector
{
public:
	typedef ListType TokensType;
};

template<class ListType, class MapType>
class TokenStoreSelector<true, ListType, MapType>
{
public:
	typedef MapType TokensType;
};

template<class ImplTraits>
class	CommonTokenStream : public TokenStream<ImplTraits>
{
public:
	typedef typename ImplTraits::AllocPolicyType AllocPolicyType;
	typedef typename ImplTraits::BitsetType BitsetType;
	typedef typename ImplTraits::CommonTokenType TokenType;
	typedef typename ImplTraits::TokenSourceType TokenSourceType;
	typedef typename ImplTraits::DebugEventListenerType DebugEventListenerType;
	typedef typename AllocPolicyType::template ListType<TokenType> TokensListType;
	typedef typename AllocPolicyType::template OrderedMapType<ANTLR_MARKER, TokenType> TokensMapType;
	typedef typename TokenStoreSelector< ImplTraits::TOKENS_ACCESSED_FROM_OWNING_RULE,
	                                       TokensListType, TokensMapType >::TokensType TokensType;

	typedef typename AllocPolicyType::template UnOrderedMapType<ANTLR_UINT32, ANTLR_UINT32> ChannelOverridesType;
	typedef typename AllocPolicyType::template OrderedSetType<ANTLR_UINT32> DiscardSetType;
	typedef typename AllocPolicyType::template ListType<ANTLR_UINT32> IntListType;
	typedef TokenStream<ImplTraits> BaseType;

private:
    /** Records every single token pulled from the source indexed by the token index.
     *  There might be more efficient ways to do this, such as referencing directly in to
     *  the token factory pools, but for now this is convenient and the ANTLR3_LIST is not
     *  a huge overhead as it only stores pointers anyway, but allows for iterations and
     *  so on.
     */
    TokensType			m_tokens;

    /** Override map of tokens. If a token type has an entry in here, then
     *  the pointer in the table points to an int, being the override channel number
     *  that should always be used for this token type.
     */
    ChannelOverridesType	m_channelOverrides;

    /** Discared set. If a token has an entry in this table, then it is thrown
     *  away (data pointer is always NULL).
     */
    DiscardSetType			m_discardSet;

    /* The channel number that this token stream is tuned to. For instance, whitespace
     * is usually tuned to channel 99, which no token stream would normally tune to and
     * so it is thrown away.
     */
    ANTLR_UINT32			m_channel;

	/** The index into the tokens list of the current token (the next one that will be
     *  consumed. p = -1 indicates that the token list is empty.
     */
    ANTLR_INT32				m_p;

	/* The total number of tokens issued till now. For streams that delete tokens,
	   this helps in issuing the index
	 */
	ANTLR_UINT32			m_nissued;

    /** If this flag is set to true, then tokens that the stream sees that are not
     *  in the channel that this stream is tuned to, are not tracked in the
     *  tokens table. When set to false, ALL tokens are added to the tracking.
     */
    bool					m_discardOffChannel;

public:
	CommonTokenStream(ANTLR_UINT32 hint, TokenSourceType* source = NULL,
										DebugEventListenerType* debugger = NULL);
	~CommonTokenStream();
	TokensType& get_tokens();
	const TokensType& get_tokens() const;
	DiscardSetType& get_discardSet();
	const DiscardSetType& get_discardSet() const;
	ANTLR_INT32 get_p() const;
	void set_p( ANTLR_INT32 p );
	void inc_p();
	void dec_p();

    /** A simple filter mechanism whereby you can tell this token stream
     *  to force all tokens of type ttype to be on channel.  For example,
     *  when interpreting, we cannot exec actions so we need to tell
     *  the stream to force all WS and NEWLINE to be a different, ignored
     *  channel.
     */
    void setTokenTypeChannel(ANTLR_UINT32 ttype, ANTLR_UINT32 channel);

    /** Add a particular token type to the discard set. If a token is found to belong
     *  to this set, then it is skipped/thrown away
     */
    void discardTokenType(ANTLR_INT32 ttype);

	//This will discard tokens of a particular rule after the rule execution completion
	void discardTokens( ANTLR_MARKER start, ANTLR_MARKER stop );
	void discardTokens( ANTLR_MARKER start, ANTLR_MARKER stop, 
								BoolForwarder<true>  tokens_accessed_from_owning_rule  );
	void discardTokens( ANTLR_MARKER start, ANTLR_MARKER stop, 
								BoolForwarder<false>  tokens_accessed_from_owning_rule  );

	void insertToken( const TokenType& tok );
	void insertToken( const TokenType& tok, BoolForwarder<true>  tokens_accessed_from_owning_rule  );
	void insertToken( const TokenType& tok, BoolForwarder<false>  tokens_accessed_from_owning_rule  );

	/** Get a token at an absolute index i; 0..n-1.  This is really only
     *  needed for profiling and debugging and token stream rewriting.
     *  If you don't want to buffer up tokens, then this method makes no
     *  sense for you.  Naturally you can't use the rewrite stream feature.
     *  I believe DebugTokenStream can easily be altered to not use
     *  this method, removing the dependency.
     */
    const TokenType*   get(ANTLR_MARKER i);
	const TokenType*   getToken(ANTLR_MARKER i);
	const TokenType* getToken( ANTLR_MARKER tok_idx, BoolForwarder<true>  tokens_accessed_from_owning_rule );
	const TokenType* getToken( ANTLR_MARKER tok_idx, BoolForwarder<false>  tokens_accessed_from_owning_rule  );

    /** Signal to discard off channel tokens from here on in.
     */
    void discardOffChannelToks(bool discard);

    /** Function that returns a pointer to the ANTLR3_LIST of all tokens
     *  in the stream (this causes the buffer to fill if we have not get any yet)
     */
    TokensType*	getTokens();

    /** Function that returns all the tokens between a start and a stop index.
     */
    void getTokenRange(ANTLR_UINT32 start, ANTLR_UINT32 stop, TokensListType& tokenRange);

    /** Function that returns all the tokens indicated by the specified bitset, within a range of tokens
     */
    void getTokensSet(ANTLR_UINT32 start, ANTLR_UINT32 stop, BitsetType* types, TokensListType& tokenSet);

    /** Function that returns all the tokens indicated by being a member of the supplied List
     */
    void getTokensList(ANTLR_UINT32 start, ANTLR_UINT32 stop,
									const IntListType& list, TokensListType& tokenList);

    /** Function that returns all tokens of a certain type within a range.
     */
    void getTokensType(ANTLR_UINT32 start, ANTLR_UINT32 stop, ANTLR_UINT32 type, TokensListType& tokens);

    /** Function that resets the token stream so that it can be reused, but
     *  but that does not free up any resources, such as the token factory
     *  the factory pool and so on. This prevents the need to keep freeing
     *  and reallocating the token pools if the thing you are building is
     *  a multi-shot dameon or somethign like that. It is much faster to
     *  just reuse all the vectors.
     */
    void  reset();

	const TokenType* LB(ANTLR_INT32 k);


	void fillBufferExt();
	void fillBuffer();

	bool hasReachedFillbufferTarget( ANTLR_UINT32 cnt, BoolForwarder<true>  tokens_accessed_from_owning_rule  );
	bool hasReachedFillbufferTarget( ANTLR_UINT32 cnt, BoolForwarder<false>  tokens_accessed_from_owning_rule  );

	ANTLR_UINT32 skipOffTokenChannels(ANTLR_INT32 i);
	ANTLR_UINT32 skipOffTokenChannelsReverse(ANTLR_INT32 x);
	ANTLR_MARKER index_impl();
};

class TokenAccessException : public std::exception
{
	virtual const char* what() const throw()
	{
		return " Attempted access on Deleted Token";
	}
};

ANTLR_END_NAMESPACE()

#include "antlr3tokenstream.inl"

#endif