/** \file
 * Defines the the class interface for an antlr3 INTSTREAM.
 * 
 * Certain functionality (such as DFAs for instance) abstract the stream of tokens
 * or characters in to a steam of integers. Hence this structure should be included
 * in any stream that is able to provide the output as a stream of integers (which is anything
 * basically.
 *
 * There are no specific implementations of the methods in this interface in general. Though
 * for purposes of casting and so on, it may be necesssary to implement a function with
 * the signature in this interface which abstracts the base immplementation. In essence though
 * the base stream provides a pointer to this interface, within which it installs its
 * normal match() functions and so on. Interaces such as DFA are then passed the pANTLR3_INT_STREAM
 * and can treat any input as an int stream. 
 *
 * For instance, a lexer implements a pANTLR3_BASE_RECOGNIZER, within which there is a pANTLR3_INT_STREAM.
 * However, a pANTLR3_INPUT_STREAM also provides a pANTLR3_INT_STREAM, which it has constructed from
 * it's normal interface when it was created. This is then pointed at by the pANTLR_BASE_RECOGNIZER
 * when it is intialized with a pANTLR3_INPUT_STREAM.
 *
 * Similarly if a pANTLR3_BASE_RECOGNIZER is initialized with a pANTLR3_TOKEN_STREAM, then the 
 * pANTLR3_INT_STREAM is taken from the pANTLR3_TOKEN_STREAM. 
 *
 * If a pANTLR3_BASE_RECOGNIZER is initialized with a pANTLR3_TREENODE_STREAM, then guess where
 * the pANTLR3_INT_STREAM comes from?
 *
 * Note that because the context pointer points to the actual interface structure that is providing
 * the ANTLR3_INT_STREAM it is defined as a (void *) in this interface. There is no direct implementation
 * of an ANTLR3_INT_STREAM (unless someone did not understand what I was doing here =;?P
 */
#ifndef	_ANTLR3_INTSTREAM_HPP
#define	_ANTLR3_INTSTREAM_HPP

// [The "BSD licence"]
// Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB

//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
// 3. The name of the author may not be used to endorse or promote products
//    derived from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include  <cassert>

#include    "antlr3defs.hpp"

ANTLR_BEGIN_NAMESPACE()

enum STREAM_TYPE
{
	/** Type indicator for a character stream
	 * \remark if a custom stream is created but it can be treated as
	 * a char stream, then you may OR in this value to your type indicator
	 */
	CHARSTREAM	= 0x0001

	/** Type indicator for a Token stream
	 * \remark if a custom stream is created but it can be treated as
	 * a token stream, then you may OR in this value to your type indicator
	 */
	, TOKENSTREAM = 0x0002

	/** Type indicator for a common tree node stream
	 * \remark if a custom stream is created but it can be treated as
	 * a common tree node stream, then you may OR in this value to your type indicator
	 */
	, COMMONTREENODE = 0x0004

	/** Type mask for input stream so we can switch in the above types
	*  \remark DO NOT USE 0x0000 as a stream type!
	*/
	, INPUT_MASK =	0x0007
};

class RESOLVE_ENDIAN_AT_RUNTIME {};
class BYTE_AGNOSTIC {};
class ANTLR_LITTLE_ENDIAN {};
class ANTLR_BIG_ENDIAN {};

template<class ImplTraits, class SuperType>
class IntStream : public ImplTraits::AllocPolicyType
{
public:
	typedef typename ImplTraits::StringType StringType;
	
protected:
    /** Potentially useful in error reporting and so on, this string is
     *  an identification of the input source. It may be NULL, so anything
     *  attempting to access it needs to check this and substitute a sensible
     *  default.
     */
    StringType		m_streamName;

    /** Last marker position allocated
     */
    ANTLR_MARKER	m_lastMarker;
	
    bool		m_upper_case; //if set, values should be returbed in upper case	

    /// Indicates whether we should implement endian-specific logic
    /// 0 - Undefined 1 - Default(machine and input are both same), 2 - Little Endian, 3 - Big Endian
    ANTLR_UINT8		m_endian_spec;	

public:
	IntStream();
	
	// Return a string that identifies the input source
	//
	StringType		getSourceName();
	StringType& 	get_streamName();
	const StringType& 	get_streamName() const;
	ANTLR_MARKER get_lastMarker() const;

	SuperType* get_super();
	/**
     * Function that installs a version of LA that always
     * returns upper case. Only valid for character streams and creates a case
     * insensitive lexer if the lexer tokens are described in upper case. The
     * tokens will preserve case in the token text.
     */
    void	setUcaseLA(bool flag);

    /** Consume the next 'ANTR3_UINT32' in the stream
     */
    void		    consume();

    /** Get ANTLR3_UINT32 at current input pointer + i ahead where i=1 is next ANTLR3_UINT32 
     */
    ANTLR_UINT32	_LA( ANTLR_INT32 i);

    /** Tell the stream to start buffering if it hasn't already.  Return
     *  current input position, index(), or some other marker so that
     *  when passed to rewind() you get back to the same spot.
     *  rewind(mark()) should not affect the input cursor.
     */
    ANTLR_MARKER	    mark();
    
    /** Return the current input symbol index 0..n where n indicates the
     *  last symbol has been read.
     */
    ANTLR_MARKER	    index();

    /** Reset the stream so that next call to index would return marker.
     *  The marker will usually be index() but it doesn't have to be.  It's
     *  just a marker to indicate what state the stream was in.  This is
     *  essentially calling release() and seek().  If there are markers
     *  created after this marker argument, this routine must unroll them
     *  like a stack.  Assume the state the stream was in when this marker
     *  was created.
     */
    void	rewind(ANTLR_MARKER marker);

    /** Reset the stream to the last marker position, witouh destryoing the
     *  last marker position.
     */
    void	rewindLast();

    /** You may want to commit to a backtrack but don't want to force the
     *  stream to keep bookkeeping objects around for a marker that is
     *  no longer necessary.  This will have the same behavior as
     *  rewind() except it releases resources without the backward seek.
     */
    void	release(ANTLR_MARKER mark);

    /** Set the input cursor to the position indicated by index.  This is
     *  normally used to seek ahead in the input stream.  No buffering is
     *  required to do this unless you know your stream will use seek to
     *  move backwards such as when backtracking.
     *
     *  This is different from rewind in its multi-directional
     *  requirement and in that its argument is strictly an input cursor (index).
     *
     *  For char streams, seeking forward must update the stream state such
     *  as line number.  For seeking backwards, you will be presumably
     *  backtracking using the mark/rewind mechanism that restores state and
     *  so this method does not need to update state when seeking backwards.
     *
     *  Currently, this method is only used for efficient backtracking, but
     *  in the future it may be used for incremental parsing.
     */
    void	seek(ANTLR_MARKER index);

	/// Debug only method to flag consumption of initial off-channel
	/// tokens in the input stream
	///
	void consumeInitialHiddenTokens();

	void  rewindMark(ANTLR_MARKER marker);
	ANTLR_MARKER tindex();

    /** Frees any resources that were allocated for the implementation of this
     *  interface. Usually this is just releasing the memory allocated
     *  for the structure itself, but it may of course do anything it need to
     *  so long as it does not stamp on anything else.
     */
	~IntStream();

protected:
	void setupIntStream(bool machineBigEndian, bool inputBigEndian);
	void findout_endian_spec(bool machineBigEndian, bool inputBigEndian);

	//If the user chooses this option, then we will be resolving stuffs at run-time
	ANTLR_UINT32	_LA( ANTLR_INT32 i, ClassForwarder<RESOLVE_ENDIAN_AT_RUNTIME> );

	//resolve into one of the three categories below at runtime
	void	consume( ClassForwarder<RESOLVE_ENDIAN_AT_RUNTIME> );
};

template<class ImplTraits, class SuperType>
class EBCDIC_IntStream : public IntStream<ImplTraits, SuperType>
{
public:
	ANTLR_UINT32	_LA( ANTLR_INT32 i);

protected:
	void setupIntStream();
};

template<class ImplTraits, class SuperType>
class UTF8_IntStream : public IntStream<ImplTraits, SuperType>
{
public:
	ANTLR_UINT32	_LA( ANTLR_INT32 i);
	void consume();

protected:
	void setupIntStream(bool machineBigEndian, bool inputBigEndian);

private:
	static const ANTLR_UINT32* TrailingBytesForUTF8();
	static const UTF32* OffsetsFromUTF8();
};

template<class ImplTraits, class SuperType>
class UTF16_IntStream : public IntStream<ImplTraits, SuperType>
{
public:
	ANTLR_UINT32	_LA( ANTLR_INT32 i);
	void		    consume();
	ANTLR_MARKER	index();
	void seek(ANTLR_MARKER seekPoint);

protected:
	void setupIntStream(bool machineBigEndian, bool inputBigEndian);

	/// \brief Return the input element assuming an 8 bit ascii input
	///
	/// \param[in] input Input stream context pointer
	/// \param[in] la 1 based offset of next input stream element
	///
	/// \return Next input character in internal ANTLR3 encoding (UTF32)
	///
	ANTLR_UINT32	_LA( ANTLR_INT32 i, ClassForwarder<BYTE_AGNOSTIC> );

	/// \brief Return the input element assuming a UTF16 input when the input is Little Endian and the machine is not
	///
	/// \param[in] input Input stream context pointer
	/// \param[in] la 1 based offset of next input stream element
	///
	/// \return Next input character in internal ANTLR3 encoding (UTF32)
	///
	ANTLR_UINT32	_LA( ANTLR_INT32 i, ClassForwarder<ANTLR_LITTLE_ENDIAN> );
	
	/// \brief Return the input element assuming a UTF16 input when the input is Little Endian and the machine is not
	///
	/// \param[in] input Input stream context pointer
	/// \param[in] la 1 based offset of next input stream element
	///
	/// \return Next input character in internal ANTLR3 encoding (UTF32)
	///
	ANTLR_UINT32	_LA( ANTLR_INT32 i, ClassForwarder<ANTLR_BIG_ENDIAN> );

	/// \brief Consume the next character in a UTF16 input stream
	///
	/// \param input Input stream context pointer
	///
	void	consume( ClassForwarder<BYTE_AGNOSTIC> );

	/// \brief Consume the next character in a UTF16 input stream when the input is Little Endian and the machine is not
	/// Note that the UTF16 routines do not do any substantial verification of the input stream as for performance
	/// sake, we assume it is validly encoded. So if a low surrogate is found at the curent input position then we
	/// just consume it. Surrogate pairs should be seen as Hi, Lo. So if we have a Lo first, then the input stream
	/// is fubar but we just ignore that.
	///
	/// \param input Input stream context pointer
	///
	void	consume( ClassForwarder<ANTLR_LITTLE_ENDIAN> );

	/// \brief Consume the next character in a UTF16 input stream when the input is Big Endian and the machine is not
	///
	/// \param input Input stream context pointer
	///
	void	consume( ClassForwarder<ANTLR_BIG_ENDIAN> );
};



template<class ImplTraits, class SuperType>
class UTF32_IntStream : public IntStream<ImplTraits, SuperType>
{
public:
	ANTLR_UINT32	_LA( ANTLR_INT32 i);
	void		    consume();
	
	/// \brief Calculate the current index in the output stream.
	/// \param[in] input Input stream context pointer
	///
	ANTLR_MARKER	index();
	void seek(ANTLR_MARKER seekPoint);

protected:
	void setupIntStream(bool machineBigEndian, bool inputBigEndian);
	ANTLR_UINT32	_LA( ANTLR_INT32 i, ClassForwarder<RESOLVE_ENDIAN_AT_RUNTIME> );
	ANTLR_UINT32	_LA( ANTLR_INT32 i, ClassForwarder<BYTE_AGNOSTIC> );
	ANTLR_UINT32	_LA( ANTLR_INT32 i, ClassForwarder<ANTLR_LITTLE_ENDIAN> );
	ANTLR_UINT32	_LA( ANTLR_INT32 i, ClassForwarder<ANTLR_BIG_ENDIAN> );

	void	consume( ClassForwarder<RESOLVE_ENDIAN_AT_RUNTIME> );
	void	consume( ClassForwarder<BYTE_AGNOSTIC> );
	void	consume( ClassForwarder<ANTLR_LITTLE_ENDIAN> );
	void	consume( ClassForwarder<ANTLR_BIG_ENDIAN> );
};

template<class ImplTraits>
class TokenIntStream : public IntStream<ImplTraits, typename ImplTraits::TokenStreamType >
{
public:
	typedef typename ImplTraits::CommonTokenType CommonTokenType;
	typedef typename ImplTraits::StringType StringType;
	typedef typename ImplTraits::TokenStreamType TokenStreamType;
	typedef IntStream<ImplTraits, TokenStreamType > BaseType;

private:
	/** Because the indirect call, though small in individual cases can
     *  mount up if there are thousands of tokens (very large input streams), callers
     *  of size can optionally use this cached size field.
     */
    ANTLR_UINT32	    m_cachedSize;

public:
	TokenIntStream();
	ANTLR_UINT32 get_cachedSize() const;
	void set_cachedSize( ANTLR_UINT32 cachedSize );

	void consume();
	void  consumeInitialHiddenTokens();
	ANTLR_UINT32  _LA( ANTLR_INT32 i );
	ANTLR_MARKER  mark();
	ANTLR_UINT32  size();
	void release();
	ANTLR_MARKER  tindex();
	void rewindLast();
	void rewind(ANTLR_MARKER marker);
	void seek(ANTLR_MARKER index);
	StringType getSourceName();

};

template<class ImplTraits>
class TreeNodeIntStream : public IntStream<ImplTraits, typename ImplTraits::CommonTreeNodeStreamType>
{
public:
	typedef typename ImplTraits::CommonTreeNodeStreamType CommonTreeNodeStreamType;
	typedef IntStream<ImplTraits, CommonTreeNodeStreamType > BaseType;
	typedef typename ImplTraits::TreeType TreeType;
	typedef typename ImplTraits::CommonTokenType CommonTokenType;

public:
	void				consume();
	ANTLR_MARKER		tindex();
	ANTLR_UINT32		_LA(ANTLR_INT32 i);
	ANTLR_MARKER		mark();
	void				release(ANTLR_MARKER marker);
	void				rewindMark(ANTLR_MARKER marker);
	void				rewindLast();
	void				seek(ANTLR_MARKER index);
	ANTLR_UINT32		size();
};

ANTLR_END_NAMESPACE()

#include "antlr3intstream.inl"

#endif