C++程序  |  368行  |  13.3 KB

/** \file
 * \brief Defines the interface for a common token.
 *
 * All token streams should provide their tokens using an instance
 * of this common token. A custom pointer is provided, wher you may attach
 * a further structure to enhance the common token if you feel the need
 * to do so. The C runtime will assume that a token provides implementations
 * of the interface functions, but all of them may be rplaced by your own
 * implementation if you require it.
 */
#ifndef	_ANTLR3_COMMON_TOKEN_H
#define	_ANTLR3_COMMON_TOKEN_H

// [The "BSD licence"]
// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
// http://www.temporal-wave.com
// http://www.linkedin.com/in/jimidle
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
// 3. The name of the author may not be used to endorse or promote products
//    derived from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include    <antlr3defs.h>

/** How many tokens to allocate at once in the token factory
 */
#define	ANTLR3_FACTORY_POOL_SIZE    1024

/* Base token types, which all lexer/parser tokens come after in sequence.
 */

/** Indicator of an invalid token
 */
#define	ANTLR3_TOKEN_INVALID	0

#define	ANTLR3_EOR_TOKEN_TYPE	1

/** Imaginary token type to cause a traversal of child nodes in a tree parser
 */
#define	ANTLR3_TOKEN_DOWN		2

/** Imaginary token type to signal the end of a stream of child nodes.
 */
#define	ANTLR3_TOKEN_UP		3

/** First token that can be used by users/generated code
 */

#define	ANTLR3_MIN_TOKEN_TYPE	ANTLR3_TOKEN_UP + 1

/** End of file token
 */
#define	ANTLR3_TOKEN_EOF	(ANTLR3_CHARSTREAM_EOF & 0xFFFFFFFF)

/** Default channel for a token
 */
#define	ANTLR3_TOKEN_DEFAULT_CHANNEL	0

/** Reserved channel number for a HIDDEN token - a token that
 *  is hidden from the parser.
 */
#define	HIDDEN				99

#ifdef __cplusplus
extern "C" {
#endif

// Indicates whether this token is carrying:
//
// State | Meaning
// ------+--------------------------------------
//     0 | Nothing (neither rewrite text, nor setText)
//     1 | char * to user supplied rewrite text
//     2 | pANTLR3_STRING because of setText or similar action
//
#define	ANTLR3_TEXT_NONE	0
#define	ANTLR3_TEXT_CHARP	1
#define	ANTLR3_TEXT_STRING	2

/** The definition of an ANTLR3 common token structure, which all implementations
 * of a token stream should provide, installing any further structures in the
 * custom pointer element of this structure.
 *
 * \remark
 * Token streams are in essence provided by lexers or other programs that serve
 * as lexers.
 */
typedef	struct ANTLR3_COMMON_TOKEN_struct
{
    /** The actual type of this token
     */
    ANTLR3_UINT32   type;

    /** Indicates that a token was produced from the token factory and therefore
     *  the the freeToken() method should not do anything itself because
     *  token factory is responsible for deleting it.
     */
    ANTLR3_BOOLEAN  factoryMade;

	/// A string factory that we can use if we ever need the text of a token
	/// and need to manufacture a pANTLR3_STRING
	///
	pANTLR3_STRING_FACTORY	strFactory;

    /** The line number in the input stream where this token was derived from
     */
    ANTLR3_UINT32   line;

    /** The offset into the input stream that the line in which this
     *  token resides starts.
     */
    void	    * lineStart;

    /** The character position in the line that this token was derived from
     */
    ANTLR3_INT32    charPosition;

    /** The virtual channel that this token exists in.
     */
    ANTLR3_UINT32   channel;

    /** Pointer to the input stream that this token originated in.
     */
    pANTLR3_INPUT_STREAM    input;

    /** What the index of this token is, 0, 1, .., n-2, n-1 tokens
     */
    ANTLR3_MARKER   index;

    /** The character offset in the input stream where the text for this token
     *  starts.
     */
    ANTLR3_MARKER   start;

    /** The character offset in the input stream where the text for this token
     *  stops.
     */
    ANTLR3_MARKER   stop;

	/// Indicates whether this token is carrying:
	///
	/// State | Meaning
	/// ------+--------------------------------------
	///     0 | Nothing (neither rewrite text, nor setText)
	///     1 | char * to user supplied rewrite text
	///     2 | pANTLR3_STRING because of setText or similar action
	///
	/// Affects the union structure tokText below
	/// (uses 32 bit so alignment is always good)
	///
	ANTLR3_UINT32	textState;

	union
	{
		/// Pointer that is used when the token just has a pointer to
		/// a char *, such as when a rewrite of an imaginary token supplies
		/// a string in the grammar. No sense in constructing a pANTLR3_STRING just
		/// for that, as mostly the text will not be accessed - if it is, then
		/// we will build a pANTLR3_STRING for it a that point.
		///
		pANTLR3_UCHAR	chars;

		/// Some token types actually do carry around their associated text, hence
		/// (*getText)() will return this pointer if it is not NULL
		///
		pANTLR3_STRING	text;
	}
		tokText;

    /**  Because it is a bit more of a hassle to override an ANTLR3_COMMON_TOKEN
     *   as the standard structure for a token, a number of user programmable 
     *	 elements are allowed in a token. This is one of them.
     */
    ANTLR3_UINT32   user1;
    
    /**  Because it is a bit more of a hassle to override an ANTLR3_COMMON_TOKEN
     *   as the standard structure for a token, a number of user programmable 
     *	 elements are allowed in a token. This is one of them.
     */
    ANTLR3_UINT32   user2;

    /**  Because it is a bit more of a hassle to override an ANTLR3_COMMON_TOKEN
     *   as the standard structure for a token, a number of user programmable 
     *	 elements are allowed in a token. This is one of them.
     */
    ANTLR3_UINT32   user3;

    /** Pointer to a custom element that the ANTLR3 programmer may define and install
     */
    void    * custom;

    /** Pointer to a function that knows how to free the custom structure when the 
     *  token is destroyed.
     */
    void    (*freeCustom)(void * custom);

    /* ==============================
     * API 
     */

    /** Pointer to function that returns the text pointer of a token, use
     *  toString() if you want a pANTLR3_STRING version of the token.
     */
    pANTLR3_STRING  (*getText)(struct ANTLR3_COMMON_TOKEN_struct * token);

    /** Pointer to a function that 'might' be able to set the text associated
     *  with a token. Imaginary tokens such as an ANTLR3_CLASSIC_TOKEN may actually
     *  do this, however many tokens such as ANTLR3_COMMON_TOKEN do not actaully have
     *  strings associated with them but just point into the current input stream. These
     *  tokens will implement this function with a function that errors out (probably
     *  drastically.
     */
    void	    (*setText)(struct ANTLR3_COMMON_TOKEN_struct * token, pANTLR3_STRING text);

    /** Pointer to a function that 'might' be able to set the text associated
     *  with a token. Imaginary tokens such as an ANTLR3_CLASSIC_TOKEN may actually
     *  do this, however many tokens such as ANTLR3_COMMON_TOKEN do not actully have
     *  strings associated with them but just point into the current input stream. These
     *  tokens will implement this function with a function that errors out (probably
     *  drastically.
     */
    void	    (*setText8)(struct ANTLR3_COMMON_TOKEN_struct * token, pANTLR3_UINT8 text);

    /** Pointer to a function that returns the token type of this token
     */
    ANTLR3_UINT32   (*getType)(struct ANTLR3_COMMON_TOKEN_struct * token);

    /** Pointer to a function that sets the type of this token
     */
    void	    (*setType)(struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_UINT32 ttype);

    /** Pointer to a function that gets the 'line' number where this token resides
     */
    ANTLR3_UINT32   (*getLine)(struct ANTLR3_COMMON_TOKEN_struct * token);

    /** Pointer to a function that sets the 'line' number where this token reside
     */
    void	    (*setLine)(struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_UINT32 line);

    /** Pointer to a function that gets the offset in the line where this token exists
     */ 
    ANTLR3_INT32    (*getCharPositionInLine)	(struct ANTLR3_COMMON_TOKEN_struct * token);

    /** Pointer to a function that sets the offset in the line where this token exists
     */
    void	    (*setCharPositionInLine)	(struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_INT32 pos);

    /** Pointer to a function that gets the channel that this token was placed in (parsers
     *  can 'tune' to these channels.
     */
    ANTLR3_UINT32   (*getChannel)	(struct ANTLR3_COMMON_TOKEN_struct * token);

    /** Pointer to a function that sets the channel that this token should belong to
     */
    void	    (*setChannel)	(struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_UINT32 channel);

    /** Pointer to a function that returns an index 0...n-1 of the token in the token
     *  input stream.
     */
    ANTLR3_MARKER   (*getTokenIndex)	(struct ANTLR3_COMMON_TOKEN_struct * token);

    /** Pointer to a function that can set the token index of this token in the token
     *  input stream.
     */
    void			(*setTokenIndex)	(struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_MARKER);

    /** Pointer to a function that gets the start index in the input stream for this token.
     */
    ANTLR3_MARKER   (*getStartIndex)	(struct ANTLR3_COMMON_TOKEN_struct * token);

    /** Pointer to a function that sets the start index in the input stream for this token.
     */
    void			(*setStartIndex)	(struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_MARKER index);
    
    /** Pointer to a function that gets the stop index in the input stream for this token.
     */
    ANTLR3_MARKER   (*getStopIndex)	(struct ANTLR3_COMMON_TOKEN_struct * token);

    /** Pointer to a function that sets the stop index in the input stream for this token.
     */
    void			(*setStopIndex)	(struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_MARKER index);

    /** Pointer to a function that returns this token as a text representation that can be 
     *  printed with embedded control codes such as \n replaced with the printable sequence "\\n"
     *  This also yields a string structure that can be used more easily than the pointer to 
     *  the input stream in certain situations.
     */
    pANTLR3_STRING  (*toString)		(struct ANTLR3_COMMON_TOKEN_struct * token);
}
    ANTLR3_COMMON_TOKEN;

/** \brief ANTLR3 Token factory interface to create lots of tokens efficiently
 *  rather than creating and freeing lots of little bits of memory.
 */
typedef	struct ANTLR3_TOKEN_FACTORY_struct
{
    /** Pointers to the array of tokens that this factory has produced so far
     */
    pANTLR3_COMMON_TOKEN    *pools;

    /** Current pool tokens we are allocating from
     */
    ANTLR3_INT32	    thisPool;

    /** Maximum pool count we have available
     */
    ANTLR3_INT32            maxPool;

    /** The next token to throw out from the pool, will cause a new pool allocation
     *  if this exceeds the available tokenCount
     */
    ANTLR3_UINT32	    nextToken;

    /** Trick to initialize tokens and their API quickly, we set up this token when the
     *  factory is created, then just copy the memory it uses into the new token.
     */
    ANTLR3_COMMON_TOKEN	    unTruc;

    /** Pointer to an input stream that is using this token factory (may be NULL)
     *  which will be assigned to the tokens automatically.
     */
    pANTLR3_INPUT_STREAM    input;

    /** Pointer to a function that returns a new token
     */
    pANTLR3_COMMON_TOKEN    (*newToken)	    (struct ANTLR3_TOKEN_FACTORY_struct * factory);

    /** Pointer to a function that resets the factory so you can reuse the pools it
     *  has laready allocated
     */
    void                    (*reset)	    (struct ANTLR3_TOKEN_FACTORY_struct * factory);

    /** Pointer to a function that changes teh curent inptu stream so that
     *  new tokens are created with reference to their originating text.
     */
    void		    (*setInputStream)	(struct ANTLR3_TOKEN_FACTORY_struct * factory, pANTLR3_INPUT_STREAM input);
    /** Pointer to a function the destroys the factory
     */
    void		    (*close)	    (struct ANTLR3_TOKEN_FACTORY_struct * factory);
}
    ANTLR3_TOKEN_FACTORY;

#ifdef __cplusplus
}
#endif

#endif