C++程序  |  181行  |  7.6 KB

/*---------------------------------------------------------------------------*
 *  grxmldoc.h  *
 *                                                                           *
 *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
 *                                                                           *
 *  Licensed under the Apache License, Version 2.0 (the 'License');          *
 *  you may not use this file except in compliance with the License.         *
 *                                                                           *
 *  You may obtain a copy of the License at                                  *
 *      http://www.apache.org/licenses/LICENSE-2.0                           *
 *                                                                           *
 *  Unless required by applicable law or agreed to in writing, software      *
 *  distributed under the License is distributed on an 'AS IS' BASIS,        *
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
 *  See the License for the specific language governing permissions and      *
 *  limitations under the License.                                           *
 *                                                                           *
 *---------------------------------------------------------------------------*/


#ifndef __grxmldoc_h__
#define  __grxmldoc_h__

// #define MEMTRACE // Uses mtrace() to detect leaks

#include "hashmap.h"
#include "tinyxml.h"
#include <stack>
#include "vocab.h"

#define SCRIPT_LABEL_PREFIX "_"
#define SCRIPT_LABEL_PREFIX_LEN 1
class Node;
template <typename T1, typename T2> class HashMap;
class Graph;
class SubGraph;

class GRXMLDoc
{
public:
    typedef TiXmlNode XMLNode;
    // Some convenience items for string comparison
    typedef enum KeywordValues {NodeTypeGrammar, NodeTypeRule, NodeTypeRuleReference, NodeTypeOneOf, NodeTypeItem, NodeTypeTag, NodeTypeCount, NodeTypeMeta, NodeTypeBadValue};
    typedef  std::map<std::string, KeywordValues> KEYWDPAIR;

    typedef struct {
	bool hasRuleRef;
	std::string RuleRefName;
	int tagID;
    } ItemData;

    GRXMLDoc();
    ~GRXMLDoc();

    // Optional use of voc and model files
   // TODO: Rearrange access to voc and models
#ifndef OPENFSTSDK
    void initialize_SR(char* parfile);
    void shutdown_SR();
    Vocabulary *getVocabulary() { return m_pVocab;}
    AcousticModel* getModel() { return m_pModel;}
    int addPhonemeToList( std::string const& s );
    bool findPhoneme( int i, std::string & s );
    bool getHMMSequence (int centre, int left, int right, std::vector<int> & modelSequence);
#endif

    //  Lookup functions
    bool findSubGraph(std::string & s, SubGraph *&p_SubGraph);
    bool findRule(int i, std::string &s );
    bool findTag(int i, std::string &s );
    bool findLabel(int i, std::string &s );
    bool findSubGraphIndex( SubGraph *p_SubGraph, std::string &s );
    bool findRuleIndex( std::string s, int &i );
    bool findTagIndex( std::string s, int &i );
    bool findLabelIndex( std::string s, int &i );
    bool findSortedLabel(int i, std::string &s );
    bool findSortedLabelIndex( int i, int &sortedIndex );
    bool findMeta(const std::string & sn, std::string &s);
    bool setMeta(const std::string & sn, const std::string &s);
    void sortLabels();
    void addOLabelToOList( std::string & s);
    bool WriteOLabels(const std::string& fileName);

    // Take DOM object and create word graph. Creates SubGraph, rule, tag and label lists.
    bool parseGrammar( XMLNode &node, std::string & xMLFileName );

    // Generate output files
    void writeMapFile( std::string & fileName );
    void writeScriptFile( std::string & fileName );
    void writeGraphFiles( std::string & fileName, bool bDoWriteRecogGraphs );
    void writeParamsFile( std::string & fileName );
    void printLists();
    void printSubgraphs();

protected:
    void initializeLists();
    bool parseNode( XMLNode &node, SubGraph *&p_SubGraph, const unsigned int level );
    bool beginNode( XMLNode &node, SubGraph *&p_SubGraph, const unsigned int level );
    bool endNode( XMLNode &node, SubGraph *&p_SubGraph, const unsigned int level );
    bool beginParseGrammarNode( XMLNode &node );
    bool endParseGrammarNode( XMLNode &node );
    bool beginParseMetaNode( XMLNode &node );
    bool endParseMetaNode( XMLNode &node );
    bool beginParseRuleNode( XMLNode &node, SubGraph *&p_SubGraph);
    bool endParseRuleNode( XMLNode &node, SubGraph *&p_SubGraph );
    bool beginItem( XMLNode &node, SubGraph *&p_SubGraph );
    bool endItem( XMLNode &node, SubGraph *&p_SubGraph );
    bool processCDATA( XMLNode &node, SubGraph *&p_SubGraph );
    bool beginOneOf( XMLNode &node, SubGraph *&p_SubGraph );
    bool endOneOf( XMLNode &node, SubGraph *&p_SubGraph );
    bool beginRuleRef( XMLNode &grmNode, SubGraph *&p_SubGraph );
    bool endRuleRef(XMLNode &node, SubGraph *&p_SubGraph );
    bool fixRuleRef( SubGraph *&p_SubGraph );
    bool getRuleRefName(XMLNode &node, std::string &ruleName);
    bool extendAltExpression( XMLNode &node, int level );
    bool beginTag( XMLNode &node, SubGraph *&p_SubGraph );
    bool endTag( XMLNode &node, SubGraph *&p_SubGraph );
    bool beginCount( XMLNode &node, SubGraph *&p_SubGraph );
    bool endCount( XMLNode &node, SubGraph *&p_SubGraph );
    void printNode( XMLNode &node, int level );
    bool addRuleToList(std::string const& ruleName, SubGraph *&p_SubGraph);

    bool deleteRules();
    bool addTagToList( std::string const& s );
    bool addLabelToList( std::string const& s );
    void printSubgraph( SubGraph &p_SubGraph );

private:

    Graph *m_pGraph;	// The top-level container object for the word graph;
    KEYWDPAIR  m_NodeKeyWords;
    // The unique attributes of the GRML doc
    std::string m_XMLMode;
    std::string m_XMLLanguage;
    std::string m_RootRule;
    std::string m_XMLTagFormat;
    std::string m_XMLVersion;
    std::string m_XMLBase;
    std::string m_XMLFileName;

    //  We store indices for all labels used in the word graph.
    //  Store all these labels in the m_LabelList table, which is auto-indexed.
    //  We need a list of the rule names so that we can distinguish them from other labels.
    //  Store these rule names in the m_RuleList table with an index equal to the label index for the rule.
    //  Thus, when we need the index of a rule, we go straight to m_RuleList
    //	and when we need the label of a rule or any other item we use m_LabelList.

    HashMap<std::string,SubGraph*> m_SubgraphList;
    HashMap<int,std::string> m_TagList;	// <item tag = ...
    HashMap<int,std::string> m_LabelList; // Stores all network label IDs, including rule names
    HashMap<int,std::string> m_SortedLabelList; // Used to sort the labels fo
    HashMap<int, std::string> m_PhonemeList;    // Stores triphones
    HashMap<std::string,int> m_RuleList; // Stores rule name and index used in the LabelList. Use to distinguish which are rules.
    HashMap<int, std::string> m_RuleScope;
    HashMap<int, std::string> m_SlotList;
    HashMap<std::string, std::string> m_MetaKeyValPairs; //Store word-penalty value
    HashMap<std::string, int> m_OutputPtxtLabels;

    std::stack<ItemData*> m_ItemVarsStack;
    std::stack<std::string> m_RuleListStack;
    int m_RuleAutoIndex;
    int m_TagAutoIndex;
    int m_LabelAutoIndex;
    int m_PhonemeAutoIndex;
    int m_ExpandedRulesAutoIndex;
    int m_TagID; // Use to stash tag index for items.
    // Note the subgraph list does not have an auto-index as it is string-indexed.
    // All these lists also have an internal numeric index which can be used.

#ifndef OPENFSTSDK
    Vocabulary *m_pVocab;
    AcousticModel *m_pModel;
#endif

};

#endif // __grxmldoc_h__