C++程序  |  378行  |  12.56 KB

/* FILE:		sub_grph.h
 *  DATE MODIFIED:	31-Aug-07
 *  DESCRIPTION:	Part of the  SREC graph compiler project source files.
 *
 *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
 *                                                                           *
 *  Licensed under the Apache License, Version 2.0 (the 'License');          *
 *  you may not use this file except in compliance with the License.         *
 *                                                                           *
 *  You may obtain a copy of the License at                                  *
 *      http://www.apache.org/licenses/LICENSE-2.0                           *
 *                                                                           *
 *  Unless required by applicable law or agreed to in writing, software      *
 *  distributed under the License is distributed on an 'AS IS' BASIS,        *
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * 
 *  See the License for the specific language governing permissions and      *
 *  limitations under the License.                                           *
 *                                                                           *
 *---------------------------------------------------------------------------*/

#ifndef __sub_grph_h__
#define __sub_grph_h__

#include "netw_arc.h"

#include "vocab.h"

//  TODO:
//  Guards various stages 1.  Compiling  2.  Processing

#define SCOPE_ROOT         0
#define SCOPE_ITEM        -1
#define SCOPE_RULE        -2
#define SCOPE_ONEOF       -3
#define SCOPE_COUNT       -4
#define SCOPE_OPT         -5
#define SCOPE_REPEAT      -6

#define NONE_LABEL        -100000           //  Null transition
#define TAG_LABEL         -100001           //  Tag transition
#define WB_LABEL          -100002           //  Word boundary transition
#define TERMINAL_LABEL    -100003           //  Terminal transition
#define BEGINSCOPE_LABEL  -100004           //  Start of scope
#define ENDSCOPE_LABEL    -100005           //  End of scope
#define BEGINRULE_LABEL   -100006           //  Start of rule
#define ENDRULE_LABEL     -100007           //  End of rule
#define DISCARD_LABEL     -100010           //  Discard (internal)
#define INITIAL_LABEL     -100011           //  Initial silence
#define FINAL_LABEL       -100012           //  Final silence

#define MAXNUM             100000           //  A large number
#define BLKSIZE             10000           //  Buffer size
#define MAXITS                  5           //  Maximum equivalence reduction iterations

// #define MAX(X,Y)        ((X) > (Y) ? (X) : (Y))
// #define MIN(X,Y)        ((X) > (Y) ? (X) : (Y))

// General functions
bool IsSlot (std::string label);


class DetCache {
    public:

        DetCache () { num= 0; dataPack= 0; };

        void AddEntry (int comb, int first, int second) {
            if (num == 0)
                dataPack= new int [3*BLKSIZE];
            else if ((num%BLKSIZE) == 0) {
                int *newPack = new int [num+3*BLKSIZE];
                for (int ii= 0; ii < (3*num); ii++)
                    newPack[ii]= dataPack[ii];
                delete [] dataPack;
                dataPack= newPack;
            }
            dataPack[3*num]= first;
            dataPack[3*num+1]= second;
            dataPack[3*num+2]= comb;
	    num++;
            return;
        };

        int QueryEntry (int first, int second) {
            if (!dataPack)
                return -1;
            for (int ii= 0; ii < num; ii++)
                if (dataPack[3*ii] == first && dataPack[3*ii+1] == second)
                    return (dataPack[3*ii+2]);
            return -1;
        }

        ~DetCache () { if (dataPack) delete [] dataPack; };

    private:
        int num;
        int *dataPack;
};


class SubGraph
{
public:

    SubGraph (char *name, int ruleNo)
    {
        int count= strlen(name);
        title= new char [count+1];
        strcpy (title, name);
	// printf ("New Rule %s\n", title);
        ruleId= ruleNo;
        popOp= 0;
        numArc= 0;
        numVertex= 0;
        lastScope= SCOPE_ROOT;
        startId= NewVertexId();
        lastId= startId;
        endId= lastId;
        forwardList= 0;
        backwardList= 0;
        sortNum= 0;
        vertexProp= 0;
        sizeProp= 0;
        return;
    };

    SubGraph (SubGraph *baseG);

    ~SubGraph ()
    {
        delete [] title;
        for (int ii= 0; ii < numArc; ii++)
            delete arc[ii];
        if (numArc >0)
            delete [] arc;
        if (forwardList)
            delete [] forwardList;
        if (backwardList)
            delete [] backwardList;
        return;
    }

    void getName (char *name, int maxlen)
    {
        strncpy (name, title, maxlen);
    }

    int getRuleId () {
        return ruleId;
    }

    /**  Creates a copy of the graph */
    void CopyFastArcs (SubGraph *subG, int startLoc, int endLoc, int offset, int headId, int newHeadId, int tailId, int newTailId);

    /**  Graph construction routines */
    void BeginScope (int scopeType, int arg1, int arg2);
    void EndScope ();
    int  AddItem (int inputLabel, int tagLabel);
    int  AddTag (int tagLabel);
    void ExpandRules (SubGraph **subList, int *lookupList, int numSubs);

    /**  Reverse subgraph */
    void ReverseGraphForOutput ();
    void SortLanguage ();
    void SortLanguageForMin ();
    void SortLanguageReverse ();
    void UpdateSortForLanguage ();

    void RemoveRuleConnections ();
    void RemoveInternalConnections ();
    void RemoveUnreachedConnections (int startPoint, int endPoint);
    void RemoveUnreachedConnectionsDebug (int startPoint, int endPoint);
    void RemoveTagConnections (int startPoint, int endPoint);
    void AddTerminalConnections ();

    void Print ();
    void PrintText ();
    void PrintWithLabels( GRXMLDoc &p_Doc );
    void RemapForSortedOutput ( GRXMLDoc & doc );

    void WriteForwardGraphWithSemantic ( std::string & fileName, GRXMLDoc &p_Doc );
    void WriteForwardGraphFile ( std::string & fileName, GRXMLDoc &p_Doc );
    void WriteFile ( std::string & fileName, GRXMLDoc &p_Doc );
    void WritePhonemeGraphFile( std::string & fileName, GRXMLDoc &p_Doc );
    void WriteHMMGraphFile( std::string & fileName, GRXMLDoc &p_Doc );

    void DeterminizeArcs ();
    int IsDeterminized (int currId);
    void ReduceArcsByEquivalence ();

    void ExpandPhonemes ( GRXMLDoc & doc );
    void AddLeftContexts ();
    void AddRightContexts ();
    void ExpandToHMMs ( GRXMLDoc & doc );
    void ExpandIntraWordSilence ( GRXMLDoc & doc );
    void ClearOutputs ();
    void ShiftOutputsToLeft ();
    void FinalProcessing ( GRXMLDoc & doc );

    void DebugPrintDirective (char *dirrData);
    void DebugPrintLabel (int labId);

private:

    int        NewVertexId ();
    int        numVertex;

    void       AllocateSpaceForArc ();
    NUANArc        *CreateArc (int iLabel, int oLabel, int from, int to);
    NUANArc        *InheritArc (NUANArc *arcBase, int id);
    NUANArc        *InheritReverseArc (NUANArc *arcBase, int id);
    NUANArc        *CreateCopyWithOutput (NUANArc *arcBase, int id);
    NUANArc        *InheritReverseArcWithTag (NUANArc *arcBase, int id, int tagId);
    int        numArc;
    NUANArc        **arc;
    int        sortNum;
    int        sortRevNum;
    int        *forwardList;
    int        *backwardList;

    int  ConnectLastScope (int beginScopeId, int endScopeId);
    int  CloseScope ();
    void UpdateVertexCount (int startLoc);

    int FindFromIndex (int element);
    int FindToIndex (int element);
    void PullUpBegins (int currId, int baseId, int endId, int procLabel, int *nodeList, int currNum, int maxNum);
    void ProcessBegins (int currId, int endId, int procLabel, int *nodeList, int currNum, int *visitMark, int maxNum);
    void PullUpEnds (int currId, int baseId, int initialId, int procLabel, int *nodeList, int currNum, int maxNum);
    void ProcessEnds (int currId, int initialId, int procLabel, int *nodeList, int currNum, int *visitMark, int maxNum);
    bool PullUpTags (int currId, int baseId, int initialId, int outTag, int *nodeList, int currNum, int maxNum);
    void ProcessTags (int currId, int initialId, int *nodeList, int currNum, int *visitMark, int maxNum);
    void ClearDuplicateArcs ();

    void RemoveRuleStarts (int startId, int endId);
    void RemoveRuleEnds (int startId, int endId);
    void RemoveNulls (int startPoint, int endPoint);
    void RemoveForwardConnections (int startId, int endId);
    void RemoveBackwardConnections (int startId, int endId);
    void ClearLabelledConnections (int labItem);
    void RemoveUnvisitedArcs (int initialId, int finalId);
    void RemoveMarkedNodes (int *forwardDepth, int *reverseDepth);
    void RemoveDiscardedArcs ();
    void RenumberNodes ();

    bool EquivalenceTestForward (int firstId, int secondId, int *equivMap);
    void ForwardDepthData (int startId, int *depthMap, int depth);
    void ReverseDepthData (int startId, int *depthMap, int depth);
    void ReverseDepthOnTerminal (int *depthMap);

    void MapGraphVertices (int *equivMap);
    void IdentifyEquivalence (int *depthMap, int *equivMap);
    void CheckForChangeAndResort (int currId, int *mapList);

    void SortLanguageAtIndices (int begIx, int endIx);
    void SortLanguageAtIndicesForMin (int begIx, int endIx);
    void SortLanguageAtSortIndices (int begIx, int endIx);
    bool CheckSort ();
    bool CheckSortReverse ();
    void RemoveDuplicatesAtNode (int rixBegin, int rixEnd);
    void MergeVertices (int newId, int firstId, int secondId);
    void DeterminizeAtVertex (int baseId, DetCache *cache);
    int  PairwiseDeterminize (int baseId, int firstId, int fixStart, int secondId, int sixStart, DetCache *cache);
    void ClearRuleIds ();
    void ViewNode (int currId);

    void ReverseMarkArcs ();
    void ReverseMarkOutput (int currId, int initialId, int outId);
    void MarkNodesByOutputAndClearArcs ();
    void ExpandWordBoundaries ( GRXMLDoc & doc );
    void AddInitialFinalSilences ( GRXMLDoc & doc );

    void UpdateVisitationCache (int startNo);
    void ClearVisitationCache ();
    void SetupVisitationCache ();
    void DecVisitationCache (int currId);
    void IncVisitationCache (int currId);
    int VisitationConsistencyCheck ();

    void CreateNodeProperty () {
        vertexProp= new int [BLKSIZE];
        vertexMinned= new bool [BLKSIZE];
        sizeProp= BLKSIZE;
        for (int ii= 0; ii < sizeProp; ii++) {
            vertexProp[ii]= 0;
            vertexMinned[ii]= false;
	}
        return;
    }

    void IncNodeProperty (int vertNo) {
        if (vertNo >= sizeProp) {
            int newSize= (vertNo/BLKSIZE + 1)*BLKSIZE;
            int *newPack = new int [newSize];
            bool *newMinned = new bool [newSize];
            int ii;
            for (ii= 0; ii < sizeProp; ii++) {
                newPack[ii]= vertexProp[ii];
                newMinned[ii]= vertexMinned[ii];
	    }
            for (ii= sizeProp; ii < newSize; ii++) {
                newPack[ii]= 0;
                newMinned[ii]= false;
	    }
            delete [] vertexProp;
            delete [] vertexMinned;
            vertexProp= newPack;
            vertexMinned= newMinned;
	    sizeProp= newSize;
        }
        vertexProp[vertNo]++;
        return;
    }

    void DecNodeProperty (int vertNo) {
        if (vertNo < sizeProp)
            vertexProp[vertNo]--;
        return;
    }

    void SetNodeProperty (int vertNo, int value) {
        if (vertNo < sizeProp)
            vertexProp[vertNo]= value;
        return;
    }

    void SetMinProperty (int vertNo) {
        if (vertNo < sizeProp)
            vertexMinned[vertNo]= true;
        return;
    }

    int QueryNodeProperty (int vertNo) {
        if (vertNo < sizeProp)
            return (vertexProp[vertNo]);
        return -1;
    }

    int QueryMinProperty (int vertNo) {
        if (vertNo < sizeProp)
            return (vertexMinned[vertNo]);
        return false;
    }

    void DeleteNodeProperty () {
        delete [] vertexProp;
        delete [] vertexMinned;
        vertexProp= 0;
        vertexMinned= 0;
    }


    /*  Stacking of operations and related data
    */
    char    *title;
    int     ruleId;
    int     lastScope;
    int     startId;
    int     endId;
    int     prevStartId;
    int     prevEndId;
    int     lastId;
    int     arg1;
    int     arg2;
    int     arcLoc;
    int     opStack[100000];      // TODO: remove hard coded number
    int     popOp;
    void    pushScope ();
    void    popScope ();
    int     silenceId;
    int     intraId;
    int     *vertexProp;          //  Vertex properties
    bool    *vertexMinned;          //  Vertex properties
    int     sizeProp;
};

#endif // __sub_grph_h__