C++程序  |  514行  |  19.65 KB

#ifndef	ANTLR3COLLECTIONS_H
#define	ANTLR3COLLECTIONS_H

// [The "BSD licence"]
// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
// http://www.temporal-wave.com
// http://www.linkedin.com/in/jimidle
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
// 3. The name of the author may not be used to endorse or promote products
//    derived from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include    <antlr3defs.h>
#include    <antlr3bitset.h>

#define	ANTLR3_HASH_TYPE_INT	0   /**< Indicates the hashed file has integer keys */
#define	ANTLR3_HASH_TYPE_STR	1   /**< Indicates the hashed file has numeric keys */

#ifdef __cplusplus
extern "C" {
#endif

typedef struct ANTLR3_HASH_KEY_struct
{
	ANTLR3_UINT8	type;	/**< One of ##ANTLR3_HASH_TYPE_INT or ##ANTLR3_HASH_TYPE_STR	*/

	union
	{
		pANTLR3_UINT8   sKey;	/**< Used if type is ANTLR3_HASH_TYPE_STR			*/
		ANTLR3_INTKEY   iKey;	/**< used if type is ANTLR3_HASH_TYPE_INT			*/
	}
	key;

} ANTLR3_HASH_KEY, *pANTLR3_HASH_KEY;

/** Internal structure representing an element in a hash bucket.
 *  Stores the original key so that duplicate keys can be rejected
 *  if necessary, and contains function can be supported. If the hash key
 *  could be unique I would have invented the perfect compression algorithm ;-)
 */
typedef	struct	ANTLR3_HASH_ENTRY_struct
{
    /** Key that created this particular entry
     */
    ANTLR3_HASH_KEY 	keybase;

    /** Pointer to the data for this particular entry
     */
    void	    * data;

    /** Pointer to routine that knows how to release the memory
     *  structure pointed at by data. If this is NULL then we assume
     *  that the data pointer does not need to be freed when the entry
     *  is deleted from the table.
     */
    void	    (ANTLR3_CDECL *free)(void * data);

    /** Pointer to the next entry in this bucket if there
     *  is one. Sometimes different keys will hash to the same bucket (especially
     *  if the number of buckets is small). We could implement dual hashing algorithms
     *  to minimize this, but that seems over the top for what this is needed for.
     */
    struct	ANTLR3_HASH_ENTRY_struct * nextEntry;
}
    ANTLR3_HASH_ENTRY;

/** Internal structure of a hash table bucket, which tracks
 *  all keys that hash to the same bucket.
 */
typedef struct	ANTLR3_HASH_BUCKET_struct
{
    /** Pointer to the first entry in the bucket (if any, it
     *  may be NULL). Duplicate entries are chained from
     * here.
     */
    pANTLR3_HASH_ENTRY	entries;
    
}
    ANTLR3_HASH_BUCKET;

/** Structure that tracks a hash table
 */
typedef	struct	ANTLR3_HASH_TABLE_struct
{
    /** Indicates whether the table allows duplicate keys
     */
    int					allowDups;

    /** Number of buckets available in this table
     */
    ANTLR3_UINT32		modulo;

    /** Points to the memory where the array of buckets
     * starts.
     */
    pANTLR3_HASH_BUCKET	buckets;

    /** How many elements currently exist in the table.
     */
    ANTLR3_UINT32		count;

    /** Whether the hash table should strdup the keys it is given or not.
     */
    ANTLR3_BOOLEAN              doStrdup;

    /** Pointer to function to completely delete this table
     */
    void				(*free)	    (struct ANTLR3_HASH_TABLE_struct * table);
    
    /* String keyed hashtable functions */
    void				(*del)	    (struct ANTLR3_HASH_TABLE_struct * table, void * key);
    pANTLR3_HASH_ENTRY	(*remove)   (struct ANTLR3_HASH_TABLE_struct * table, void * key);
    void *				(*get)	    (struct ANTLR3_HASH_TABLE_struct * table, void * key);
    ANTLR3_INT32		(*put)	    (struct ANTLR3_HASH_TABLE_struct * table, void * key, void * element, void (ANTLR3_CDECL *freeptr)(void *));

    /* Integer based hash functions */
    void				(*delI)	    (struct ANTLR3_HASH_TABLE_struct * table, ANTLR3_INTKEY key);
    pANTLR3_HASH_ENTRY	(*removeI)  (struct ANTLR3_HASH_TABLE_struct * table, ANTLR3_INTKEY key);
    void *				(*getI)	    (struct ANTLR3_HASH_TABLE_struct * table, ANTLR3_INTKEY key);
    ANTLR3_INT32		(*putI)	    (struct ANTLR3_HASH_TABLE_struct * table, ANTLR3_INTKEY key, void * element, void (ANTLR3_CDECL *freeptr)(void *));

    ANTLR3_UINT32		(*size)	    (struct ANTLR3_HASH_TABLE_struct * table);
}
    ANTLR3_HASH_TABLE;


/** Internal structure representing an enumeration of a table.
 *  This is returned by antlr3Enumeration()
 *  Allows the programmer to traverse the table in hash order without 
 *  knowing what is in the actual table.
 *
 *  Note that it is up to the caller to ensure that the table
 *  structure does not change in the hash bucket that is currently being
 *  enumerated as this structure just tracks the next pointers in the
 *  bucket series.
 */
typedef struct	ANTLR3_HASH_ENUM_struct
{
    /* Pointer to the table we are enumerating
     */
    pANTLR3_HASH_TABLE	table;

    /* Bucket we are currently enumerating (if NULL then we are done)
     */
    ANTLR3_UINT32	bucket;

    /* Next entry to return, if NULL, then move to next bucket if any
     */
    pANTLR3_HASH_ENTRY	entry;

    /* Interface
     */
    int		(*next)	    (struct ANTLR3_HASH_ENUM_struct * en, pANTLR3_HASH_KEY *key, void ** data);
    void	(*free)	    (struct ANTLR3_HASH_ENUM_struct * table);
}
    ANTLR3_HASH_ENUM;

/** Structure that represents a LIST collection
 */
typedef	struct	ANTLR3_LIST_struct
{
    /** Hash table that is storing the list elements
     */
    pANTLR3_HASH_TABLE	table;

    void			(*free)		(struct ANTLR3_LIST_struct * list);
    void			(*del)		(struct ANTLR3_LIST_struct * list, ANTLR3_INTKEY key);
    void *			(*get)		(struct ANTLR3_LIST_struct * list, ANTLR3_INTKEY key);
    void *			(*remove)	(struct ANTLR3_LIST_struct * list, ANTLR3_INTKEY key);
    ANTLR3_INT32    (*add)		(struct ANTLR3_LIST_struct * list, void * element, void (ANTLR3_CDECL *freeptr)(void *));
    ANTLR3_INT32    (*put)		(struct ANTLR3_LIST_struct * list, ANTLR3_INTKEY key, void * element, void (ANTLR3_CDECL *freeptr)(void *));
    ANTLR3_UINT32   (*size)		(struct ANTLR3_LIST_struct * list);
    
}
    ANTLR3_LIST;

/** Structure that represents a Stack collection
 */
typedef	struct	ANTLR3_STACK_struct
{
    /** List that supports the stack structure
     */
    pANTLR3_VECTOR  vector;

    /** Used for quick access to the top of the stack
     */
    void *	    top;
    void			(*free)	(struct ANTLR3_STACK_struct * stack);
    void *			(*pop)	(struct ANTLR3_STACK_struct * stack);
    void *			(*get)	(struct ANTLR3_STACK_struct * stack, ANTLR3_INTKEY key);
    ANTLR3_BOOLEAN  (*push)	(struct ANTLR3_STACK_struct * stack, void * element, void (ANTLR3_CDECL *freeptr)(void *));
    ANTLR3_UINT32   (*size)	(struct ANTLR3_STACK_struct * stack);
    void *			(*peek)	(struct ANTLR3_STACK_struct * stack);

}
    ANTLR3_STACK;

/* Structure that represents a vector element
 */
typedef struct ANTLR3_VECTOR_ELEMENT_struct
{
    void    * element;
    void (ANTLR3_CDECL *freeptr)(void *);
}
    ANTLR3_VECTOR_ELEMENT, *pANTLR3_VECTOR_ELEMENT;

#define ANTLR3_VECTOR_INTERNAL_SIZE     16
/* Structure that represents a vector collection. A vector is a simple list
 * that contains a pointer to the element and a pointer to a function that
 * that can free the element if it is removed. It auto resizes but does not
 * use hash techniques as it is referenced by a simple numeric index. It is not a 
 * sparse list, so if any element is deleted, then the ones following are moved
 * down in memory and the count is adjusted.
 */
typedef struct ANTLR3_VECTOR_struct
{
    /** Array of pointers to vector elements
     */
    pANTLR3_VECTOR_ELEMENT  elements;

    /** Number of entries currently in the list;
     */
    ANTLR3_UINT32   count;

    /** Many times, a vector holds just a few nodes in an AST and it
     * is too much overhead to malloc the space for elements so
     * at the expense of a few bytes of memory, we hold the first
     * few elements internally. It means we must copy them when
     * we grow beyond this initial size, but that is less overhead than
     * the malloc/free callas we would otherwise require.
     */
    ANTLR3_VECTOR_ELEMENT   internal[ANTLR3_VECTOR_INTERNAL_SIZE];

    /** Indicates if the structure was made by a factory, in which
     *  case only the factory can free the memory for the actual vector,
     *  though the vector free function is called and will recurse through its
     *  entries calling any free pointers for each entry.
     */
    ANTLR3_BOOLEAN  factoryMade;

    /** Total number of entries in elements at any point in time
     */
    ANTLR3_UINT32   elementsSize;

    void			(ANTLR3_CDECL *free)	(struct ANTLR3_VECTOR_struct * vector);
    void			(*del)					(struct ANTLR3_VECTOR_struct * vector, ANTLR3_UINT32 entry);
    void *			(*get)					(struct ANTLR3_VECTOR_struct * vector, ANTLR3_UINT32 entry);
    void *			(*remove)				(struct ANTLR3_VECTOR_struct * vector, ANTLR3_UINT32 entry);
    void			(*clear)				(struct ANTLR3_VECTOR_struct * vector);
    ANTLR3_BOOLEAN              (*swap)                 (struct ANTLR3_VECTOR_struct *, ANTLR3_UINT32 entry1, ANTLR3_UINT32 entry2);
    ANTLR3_UINT32   (*add)					(struct ANTLR3_VECTOR_struct * vector, void * element, void (ANTLR3_CDECL *freeptr)(void *));
    ANTLR3_UINT32   (*set)					(struct ANTLR3_VECTOR_struct * vector, ANTLR3_UINT32 entry, void * element, void (ANTLR3_CDECL *freeptr)(void *), ANTLR3_BOOLEAN freeExisting);
    ANTLR3_UINT32   (*size)					(struct ANTLR3_VECTOR_struct * vector);
}
    ANTLR3_VECTOR;

/** Default vector pool size if otherwise unspecified
 */
#define ANTLR3_FACTORY_VPOOL_SIZE 256

/** Structure that tracks vectors in a vector and auto deletes the vectors
 *  in the vector factory when closed.
 */
typedef struct ANTLR3_VECTOR_FACTORY_struct
{

        /** List of all vector pools allocated so far
         */
        pANTLR3_VECTOR      *pools;

        /** Count of the vector pools allocated so far (current active pool)
         */
        ANTLR3_INT32         thisPool;

        /** The next vector available in the pool
         */
        ANTLR3_UINT32        nextVector;

        /** Trick to quickly initialize a new vector via memcpy and not a function call
         */
        ANTLR3_VECTOR        unTruc;

		/** Consumers from the factory can release a factory produced vector 
		 * back to the factory so that it may be reused (and thus conserve memory)
		 * by another caller. The available vectors are stored here. Note that
		 * the only vectors avaible in the free chain are produced by this factory, so they
		 * need not be explicitly freed when the factory is closed.
		 */
		pANTLR3_STACK		 freeStack;

       	/** Function to close the vector factory
	 */
	void                (*close)	    (struct ANTLR3_VECTOR_FACTORY_struct * factory);

	/** Function to supply a new vector
	 */
	pANTLR3_VECTOR      (*newVector)    (struct ANTLR3_VECTOR_FACTORY_struct * factory);

	/// Function to return a vector to the factory for reuse
	///
	void				(*returnVector)	(struct ANTLR3_VECTOR_FACTORY_struct * factory, pANTLR3_VECTOR vector);

}
ANTLR3_VECTOR_FACTORY; 
    
    
/* -------------- TRIE Interfaces ---------------- */


/** Structure that holds the payload entry in an ANTLR3_INT_TRIE or ANTLR3_STRING_TRIE
 */
typedef struct ANTLR3_TRIE_ENTRY_struct
{
	ANTLR3_UINT32   type;
	void (ANTLR3_CDECL *freeptr)(void *);
	union
	{
		ANTLR3_INTKEY     intVal;
		void		* ptr;
	} data;

	struct ANTLR3_TRIE_ENTRY_struct	* next;	    /* Allows duplicate entries for same key in insertion order	*/
}
ANTLR3_TRIE_ENTRY, * pANTLR3_TRIE_ENTRY;


/** Structure that defines an element/node in an ANTLR3_INT_TRIE
 */
typedef struct ANTLR3_INT_TRIE_NODE_struct
{
    ANTLR3_UINT32							  bitNum;	/**< This is the left/right bit index for traversal along the nodes				*/
    ANTLR3_INTKEY							  key;		/**< This is the actual key that the entry represents if it is a terminal node  */
    pANTLR3_TRIE_ENTRY						  buckets;	/**< This is the data bucket(s) that the key indexes, which may be NULL			*/
    struct ANTLR3_INT_TRIE_NODE_struct	    * leftN;	/**< Pointer to the left node from here when sKey & bitNum = 0					*/
    struct ANTLR3_INT_TRIE_NODE_struct	    * rightN;	/**< Pointer to the right node from here when sKey & bitNum, = 1				*/
}
    ANTLR3_INT_TRIE_NODE, * pANTLR3_INT_TRIE_NODE;

/** Structure that defines an ANTLR3_INT_TRIE. For this particular implementation,
 *  as you might expect, the key is turned into a "string" by looking at bit(key, depth)
 *  of the integer key. Using 64 bit keys gives us a depth limit of 64 (or bit 0..63)
 *  and potentially a huge trie. This is the algorithm for a Patricia Trie.
 *  Note also that this trie [can] accept multiple entries for the same key and is
 *  therefore a kind of elastic bucket patricia trie.
 *
 *  If you find this code useful, please feel free to 'steal' it for any purpose
 *  as covered by the BSD license under which ANTLR is issued. You can cut the code
 *  but as the ANTLR library is only about 50K (Windows Vista), you might find it 
 *  easier to just link the library. Please keep all comments and licenses and so on
 *  in any version of this you create of course.
 *
 *  Jim Idle.
 *  
 */
typedef struct ANTLR3_INT_TRIE_struct
{
    pANTLR3_INT_TRIE_NODE   root;			/* Root node of this integer trie					*/
    pANTLR3_INT_TRIE_NODE   current;		/* Used to traverse the TRIE with the next() method	*/
    ANTLR3_UINT32			count;			/* Current entry count								*/
    ANTLR3_BOOLEAN			allowDups;		/* Whether this trie accepts duplicate keys			*/

    
    pANTLR3_TRIE_ENTRY	(*get)	(struct ANTLR3_INT_TRIE_struct * trie, ANTLR3_INTKEY key);
    ANTLR3_BOOLEAN		(*del)	(struct ANTLR3_INT_TRIE_struct * trie, ANTLR3_INTKEY key);
    ANTLR3_BOOLEAN		(*add)	(struct ANTLR3_INT_TRIE_struct * trie, ANTLR3_INTKEY key, ANTLR3_UINT32 type, ANTLR3_INTKEY intVal, void * data, void (ANTLR3_CDECL *freeptr)(void *));
    void				(*free)	(struct ANTLR3_INT_TRIE_struct * trie);

}
    ANTLR3_INT_TRIE;

/**
 * A topological sort system that given a set of dependencies of a node m on node n,
 * can sort them in dependency order. This is a generally useful utility object
 * that does not care what the things are it is sorting. Generally the set
 * to be sorted will be numeric indexes into some other structure such as an ANTLR3_VECTOR.
 * I have provided a sort method that given ANTLR3_VECTOR as an input will sort
 * the vector entries in place, as well as a sort method that just returns an
 * array of the sorted noded indexes, in case you are not sorting ANTLR3_VECTORS but
 * some set of your own device.
 *
 * Of the two main algorithms that could be used, I chose to use the depth first
 * search for unvisited nodes as a) This runs in linear time, and b) it is what
 * we used in the ANTLR Tool to perform a topological sort of the input grammar files
 * based on their dependencies.
 */
typedef struct ANTLR3_TOPO_struct
{
    /**
     * A vector of vectors of edges, built by calling the addEdge method()
     * to indicate that node number n depends on node number m. Each entry in the vector
     * contains a bitset, which has a bit index set for each node upon which the
     * entry node depends.
     */
    pANTLR3_BITSET  * edges;

    /**
     * A vector used to build up the sorted output order. Note that
     * as the vector contains UINT32 then the maximum node index is
     * 'limited' to 2^32, as nodes should be zero based.
     */
    pANTLR3_UINT32    sorted;

    /**
     * A vector used to detect cycles in the edge dependecies. It is used
     * as a stack and each time we descend a node to one of its edges we
     * add the node into this stack. If we find a node that we have already
     * visited in the stack, then it means there wasa cycle such as 9->8->1->9
     * as the only way a node can be on the stack is if we are currently
     * descnding from it as we remove it from the stack as we exit from
     * descending its dependencies
     */
    pANTLR3_UINT32    cycle;

    /**
     * A flag that indicates the algorithm found a cycle in the edges
     * such as 9->8->1->9
     * If this flag is set after you have called one of the sort routines
     * then the detected cycle will be contained in the cycle array and
     * cycleLimit will point to the one after the last entry in the cycle.
     */
    ANTLR3_BOOLEAN    hasCycle;

    /**
     * A watermark used to accumulate potential cycles in the cycle array.
     * This should be zero when we are done. Check hasCycle after calling one
     * of the sort methods and if it is ANTLR3_TRUE then you can find the cycle
     * in cycle[0]...cycle[cycleMark-1]
     */
    ANTLR3_UINT32     cycleMark;
    
    /**
     * One more than the largest node index that is contained in edges/sorted.
     */
    ANTLR3_UINT32     limit;

    /**
     * The set of visited nodes as determined by a set entry in
     * the bitmap.
     */
    pANTLR3_BITSET    visited;

    /**
     * A method that adds an edge from one node to another. An edge
     * of n -> m indicates that node n is dependent on node m. Note that
     * while building these edges, it is perfectly OK to add nodes out of
     * sequence. So, if you have edges:
     *
     * 3 -> 0
     * 2 -> 1
     * 1 -> 3
     *
     * The you can add them in that order and so add node 3 before nodes 2 and 1
     *
     */
    void            (*addEdge)          (struct ANTLR3_TOPO_struct * topo, ANTLR3_UINT32 edge, ANTLR3_UINT32 dependency);


    /**
     * A method that returns a pointer to an array of sorted node indexes.
     * The array is sorted in topological sorted order. Note that the array
     * is only as large as the largest node index you created an edge for. This means
     * that if you had an input of 32 nodes, but that largest node with an edge
     * was 16, then the returned array will be the sorted order of the first 16
     * nodes and the last 16 nodes of your array are basically fine as they are
     * as they had no dependencies and do not need any particular sort order.
     *
     * NB: If the structure that contains the array is freed, then the sorted
     * array will be freed too so you should use the value of limit to
     * make a long term copy of this array if you do not want to keep the topo
     * structure around as well.
     */
    pANTLR3_UINT32  (*sortToArray)      (struct ANTLR3_TOPO_struct * topo);

    /** 
     * A method that sorts the supplied ANTLR3_VECTOR in place based
     * on the previously supplied edge data.
     */
    void            (*sortVector)       (struct ANTLR3_TOPO_struct * topo, pANTLR3_VECTOR v);

    /**
     *  A method to free this structure and any associated memory.
     */
    void            (*free)             (struct ANTLR3_TOPO_struct * topo);
}
    ANTLR3_TOPO;
    
#ifdef __cplusplus
}
#endif

#endif