C++程序  |  648行  |  17.32 KB

/*---------------------------------------------------------------------------*
 *  parseStringTest.c  *
 *                                                                           *
 *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
 *                                                                           *
 *  Licensed under the Apache License, Version 2.0 (the 'License');          *
 *  you may not use this file except in compliance with the License.         *
 *                                                                           *
 *  You may obtain a copy of the License at                                  *
 *      http://www.apache.org/licenses/LICENSE-2.0                           *
 *                                                                           *
 *  Unless required by applicable law or agreed to in writing, software      *
 *  distributed under the License is distributed on an 'AS IS' BASIS,        *
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * 
 *  See the License for the specific language governing permissions and      *
 *  limitations under the License.                                           *
 *                                                                           *
 *---------------------------------------------------------------------------*/



#include "pstdio.h"
#include "pmemory.h"
#include "plog.h"


#include "HashMap.h"
#include "SR_Grammar.h"
#include "SR_SemanticResult.h"
#include "ESR_Session.h"
#include "ESR_Locale.h"
#include "LCHAR.h"

#include "PFileSystem.h"
#include "PANSIFileSystem.h"

/* for testing RecognizerImpl.c, see below */
#include"buildopt.h"
#include"setting.h"
#include"srec_sizes.h"
#include"SR_GrammarImpl.h"

/* defines */
#define MAX_LINE_LENGTH 256
#define MAX_STR_LENGTH  512
#define MAX_SEM_RESULTS   3
#define MAX_KEYS         30

/* protos */
ESR_ReturnCode process_single_key_line(SR_Grammar* grammar, PFile* fin, PFile* fout);
ESR_ReturnCode process_multi_key_line(SR_Grammar* grammar, const LCHAR* rootrule, PFile* fin, PFile* fout);

/* struct */
typedef struct Opts
{
  int use_parse_by_string_ids;
  int do_check_all_ids;
}
Opts;

int usage(LCHAR* exename)
{
  pfprintf(PSTDOUT, "usage: %s -base <basefilename> [-in <input file>] [-out <output file>] [-itest <testfilename>]\n", exename);
  return 1;
}

void lstr_strip_multiple_spaces(LCHAR* trans)
{
  char *src=trans, *dst=trans;
  for( ;(*dst = *src)!=L('\0'); src++) {
    if(*dst != ' ') dst++;
    else if(src[1] != ' ') dst++;
  }
}

/**
 * Display the Semantic Result
 */
void display_results(SR_SemanticResult *result, PFile* fout)
{
  size_t i, size, len;
  LCHAR* keys[MAX_KEYS]; /* array of pointers to strings */
  LCHAR  value[MAX_STR_LENGTH];
  ESR_ReturnCode rc;
  
  size = MAX_KEYS;
  rc = result->getKeyList(result, (LCHAR**) & keys, &size); /* get the key list */
  if (rc == ESR_SUCCESS)
  {
    for (i = 0; i < size; i++)
    {
      len = MAX_STR_LENGTH;
      if ((rc = result->getValue(result, keys[i], value, &len)) == ESR_SUCCESS)
        pfprintf(fout, "{%s : %s}\n", keys[i], value);
      else
        pfprintf(fout, "Error: %s\n", ESR_rc2str(rc));
    }
    pfprintf(fout, "--Done--\n");
  }
  else
    pfprintf(fout, "Error: %s\n", ESR_rc2str(rc));
}

ESR_ReturnCode Parse(SR_Grammar* grammar, LCHAR* trans, PFile* fout, Opts* opts)
{
  ESR_ReturnCode rc = ESR_SUCCESS;
  size_t i, result_count, key_count;
  SR_SemanticResult* semanticResults[MAX_SEM_RESULTS];
  wordID wordIDs[32], *wordIDptr;
  SR_GrammarImpl* pgrammar = (SR_GrammarImpl*)grammar;
  wordmap* wmap;
  
  if (opts->do_check_all_ids)
  {
    wordID id;
    Opts myopts;
    memcpy(&myopts, opts, sizeof(myopts));
    myopts.do_check_all_ids = 0;
    wmap = pgrammar->syntax->synx->olabels;
    /* start at word 4 because "eps, -pau- -pau2- @root */
    for (id = 4; id < wmap->num_words; id++)
    {
      trans = wmap->words[id];
      Parse(grammar, trans, fout, &myopts);
    }
    return 0;
  }
  
  result_count = MAX_SEM_RESULTS; /* initially not greater than MAX */
  for (i = 0; i < result_count; i++)
    SR_SemanticResultCreate(&semanticResults[i]); /* create the result holders */
  lstrtrim(trans);
  /* check for multiple space separators! */
  lstr_strip_multiple_spaces(trans);
  
  if (!opts->use_parse_by_string_ids)
  {
    rc = grammar->checkParse(grammar, trans, semanticResults, (size_t*) & result_count);
  }
  else
  {
    char copy_of_trans[256], *p;
    strcpy(copy_of_trans, trans);
    wmap = pgrammar->syntax->synx->olabels;
    wordIDs[0] = wordIDs[1] = MAXwordID;
    wordIDptr = &wordIDs[0];
    for (p = strtok(copy_of_trans, " "); p; p = strtok(NULL, " "))
    {
      for (i = 0; i < wmap->num_words; i++)
        if (!strcmp(wmap->words[i], p))
        {
          *wordIDptr++ = (wordID)i;
          break;
        }
      if (i == wmap->num_words)
      {
        wordIDs[0] = MAXwordID;
        break;
      }
    }
    *wordIDptr++ = MAXwordID;
    
    /* printf("wordids:");
       for(wordIDptr=&wordIDs[0]; *wordIDptr!=MAXwordID; wordIDptr++) 
       printf(" %d/%s", *wordIDptr, wmap->words[*wordIDptr]);
       printf("\n"); */
    
    if (wordIDs[0] == MAXwordID)
    {
      result_count = 0;
      rc = ESR_SUCCESS;
    }
    else
    {
      rc = pgrammar->semproc->flush(pgrammar->semproc);
      rc = pgrammar->semproc->setParam(pgrammar->semproc, L("literal"), trans);
      rc = pgrammar->semproc->checkParseByWordID(pgrammar->semproc, pgrammar->semgraph,
           wordIDs, semanticResults, &result_count);
    }
  }
  if (rc != ESR_SUCCESS)
  {
    pfprintf(fout, "error (%s)\n\n", trans);
    return rc;
  }
  
  if (result_count < 1)
  {
    pfprintf(fout, "no parse (%s)\n\n", trans);
  }
  else
  {
    key_count = 0xffff;
    rc = SR_SemanticResultGetKeyCount(semanticResults[0], &key_count);
    pfprintf(fout, "parse ok (%d results) (%s) (%d)\n", result_count, trans, key_count);
    for (i = 0; i < result_count; i++)
      display_results(semanticResults[i], fout);
      
    for (i = 0; i < MAX_SEM_RESULTS; i++)
    {
      rc = semanticResults[i]->destroy(semanticResults[i]);
      if (rc != ESR_SUCCESS)
        return rc;
    }
  }
  return ESR_SUCCESS;
}

/* tests the transcription against the grammar and then decided based on what was expected of the test
whether or not is it considered a pass or fail */
ESR_ReturnCode ParseTestSet(SR_Grammar* grammar, LCHAR* trans, LCHAR* key, LCHAR* ref, LCHAR* result, PFile* fout)
{
  size_t len;
  ESR_ReturnCode rc;
  int i, result_count;
  SR_SemanticResult* semanticResults[MAX_SEM_RESULTS];
  LCHAR  value[MAX_STR_LENGTH];
  
  result_count = MAX_SEM_RESULTS;
  for (i = 0; i < result_count; i++)
    SR_SemanticResultCreate(&semanticResults[i]);
    
  lstrtrim(trans);
  /* check for multiple space separators! */
  lstr_strip_multiple_spaces(trans);
  
  pfprintf(fout, "checking (%s) ref(%s) res(%s)\n", trans, ref, result);
  rc = grammar->checkParse(grammar, trans, semanticResults, (size_t*) & result_count);
  if (rc != ESR_SUCCESS)
    return rc;
    
  /*result file will contain
  transcription | key | reference | result | PASSESD/FAILED */
  
  if (result_count < 1) /*failed to parse, but this could still be a pass if you expected a failure*/
  {
    pfprintf(fout, "NO PARSE FOR: %s|%s|%s|  |", trans, key, ref);
    if (strcmp("FAIL", result) == 0)
      pfprintf(fout, "PASSED (%s)\n", trans);
    else
      pfprintf(fout, "FAILED (%s)\n", trans);
  }
  else /*parsed, look at what was expected, what was returned and which of PASS/FAIL is expected */
  {
    for (i = 0; i < result_count; i++)
    {
      len = MAX_STR_LENGTH;
      if ((rc = semanticResults[i]->getValue(semanticResults[i], key, value, &len)) == ESR_SUCCESS)
      {
        pfprintf(fout, "%s|%s|%s|%s|", trans, key, ref, value);
        
        if (strcmp(value, ref) == 0 && strcmp("PASS", result) == 0)
          pfprintf(fout, "PASSED\n");
        else
          pfprintf(fout, "FAILED\n");
      }
      else
      {
        pfprintf(fout, "ERROR: %s, while checking key='%s'\n", ESR_rc2str(rc), key);
      }
    }
    
    /*deallocate semantic results*/
    for (i = 0; i < MAX_SEM_RESULTS; i++)
    {
      rc = semanticResults[i]->destroy(semanticResults[i]);
      if (rc != ESR_SUCCESS)
        return rc;
    }
  }
  return ESR_SUCCESS;
}

int main(int argc, char **argv)
{
  LCHAR trans[MAX_LINE_LENGTH];
  SR_Grammar* grammar = NULL;
  ESR_ReturnCode rc;
  LCHAR base[P_PATH_MAX] = L("");
  LCHAR infilename[P_PATH_MAX] = L("");
  LCHAR inRTfilename[P_PATH_MAX] = L("");
  LCHAR outfilename[P_PATH_MAX] = L("");
  PFile *fin = NULL, *fout = NULL;
  int i;
  LCHAR *rootrule = L("myRoot"), *p;
  Opts opts = { 0, 0 };
  
  /*
   * Initialize portable library.
   */
  CHKLOG(rc, PMemInit());

  fin = PSTDIN;
  fout = PSTDOUT;
  
  if (argc < 3)
  {
    usage(argv[0]);
    exit(EXIT_FAILURE);
  }
  for (i = 1; i < argc; ++i)
  {
    if (!LSTRCMP(argv[i], L("-base")))
    {
      ++i;
      LSTRCPY(base, argv[i]);
    }
    else if (!LSTRCMP(argv[i], L("-in")))
    {
      ++i;
      LSTRCPY(infilename, argv[i]);
    }
    else if (!LSTRCMP(argv[i], L("-out")))
    {
      ++i;
      LSTRCPY(outfilename, argv[i]);
    }
    else if (!LSTRCMP(argv[i], L("-itest")))
    {
      ++i;
      LSTRCPY(inRTfilename, argv[i]);
    }
    else if (!LSTRCMP(argv[i], L("-ids")))
    {
      opts.use_parse_by_string_ids = 1;
    }
    else if (!LSTRCMP(argv[i], L("-allids")))
    {
      opts.do_check_all_ids = 1;
      opts.use_parse_by_string_ids = 1;
    }
    else
      return usage(argv[0]);
  }
  
  CHK(rc, PLogInit(NULL, 0));
    
  rc = SR_GrammarLoad(base, &grammar);
  if (rc != ESR_SUCCESS)
    goto CLEANUP;
    
  if (*outfilename)
  {
    if ((fout = pfopen(outfilename, "w")) == NULL)
    {
      pfprintf(PSTDOUT, "Could not open file: %s\n", outfilename);
      rc = 1;
      goto CLEANUP;
    }
  }
  
  if (opts.do_check_all_ids)
  {
    rc = Parse(grammar, NULL, fout, &opts);
  }
  else if (*infilename)
  {
    if (LSTRCMP(infilename, "-") == 0)
    {
      fin = PSTDIN;
    }
    else if ((fin = pfopen(infilename, "r")) == NULL)
    {
      pfprintf(PSTDOUT, "Could not open file: %s\n", infilename);
      rc = 1;
      goto CLEANUP;
    }
    for (;;)
    {
      if (pfgets(trans, MAX_LINE_LENGTH, fin) == NULL)
      {
        if (!pfeof(fin))
        {
          rc = ESR_READ_ERROR;
          PLogError(ESR_rc2str(rc));
        }
        break;
      }
      if (trans[0] == '#') continue;
      lstrtrim(trans);
      /* check for multiple space separators! */
      lstr_strip_multiple_spaces(trans);
      pfprintf(fout, "Transcription: %s\n", trans);
      if ((rc = Parse(grammar, trans, fout, &opts)) != ESR_SUCCESS)
        goto CLEANUP;
      pfprintf(fout, "\n");
    }
  }
  else if (*inRTfilename) /*using a test file*/
  {
    if ((fin = pfopen(inRTfilename, "r")) == NULL)
    {
      pfprintf(PSTDOUT, "Could not open test file: %s\n", inRTfilename);
      rc = 1;
      goto CLEANUP;
    }
    
    /*read through the test file parsing it into the variables
     FORMAT: "the transciption" key "value" 
    */
    while (ESR_TRUE)
    {
      if (0) rc = process_single_key_line(grammar, fin, fout);
      else  rc = process_multi_key_line(grammar, rootrule, fin, fout);
      if (rc == ESR_READ_ERROR)
      {
        rc = ESR_SUCCESS;
        break;
      }
    }
  }
  else
  {
    /* get some transcriptions from the user */
    pfprintf(PSTDOUT, "\nSemantic Parser Test Program for esr (Nuance Communicaitions, 2007)\n");
    pfprintf(PSTDOUT, "'qqq' to quit\n");
    
    while (ESR_TRUE)
    {
      pfprintf(PSTDOUT, "> ");

      if (!fgets(trans, MAX_LINE_LENGTH, PSTDIN))
        break;
      // remove trailing whitespace
      for(p=&trans[0]; *p!=0 && *p!='\n' && *p!='\r'; p++) {}
      *p=0;

      if (!LSTRCMP("qqq", trans))
        break;
      else
        if ((rc = Parse(grammar, trans, fout, &opts)) != ESR_SUCCESS)
          goto CLEANUP;
    }
  }
CLEANUP:
  if (fin && fin != PSTDIN)
    pfclose(fin);
  if (fout && fout != PSTDOUT)
    pfclose(fout);
  if (grammar) grammar->destroy(grammar);
  PLogShutdown();
/*  PANSIFileSystemDestroy();
  PFileSystemDestroy();*/
  PMemShutdown();
  return rc;
}

ESR_ReturnCode process_single_key_line(SR_Grammar* grammar, PFile* fin, PFile* fout)
{
  LCHAR* position;
  LCHAR line[MAX_LINE_LENGTH];
  LCHAR trans[MAX_LINE_LENGTH];
  LCHAR key[MAX_LINE_LENGTH];
  LCHAR refValue[MAX_LINE_LENGTH];
  LCHAR result[MAX_LINE_LENGTH];
  ESR_ReturnCode rc;
  
  position = pfgets(line, MAX_LINE_LENGTH, fin);
  if (line[0] == '#')
    return ESR_SUCCESS;
  if (!strncmp(line, "__END__", 7))
    return ESR_READ_ERROR;
  if (position == NULL)
  {
    if (pfeof(fin))
      return ESR_READ_ERROR;
    else
    {
      PLogError(L("ESR_READ_ERROR"));
      return ESR_READ_ERROR;
    }
  }
  
  //get the transcription to test
  if ((position = strtok(line, "\"")) != NULL)
  {
    LSTRCPY(trans, position);
  }
  else
  {
    pfprintf(fout, "INVALID FORMAT for input line 1 \n");
    rc = ESR_INVALID_ARGUMENT;
    goto CLEANUP;
  }
  
  //get the key (meaning)
  if ((position = strtok(NULL, " \t")) != NULL)
  {
    LSTRCPY(key, position);
  }
  else
  {
    pfprintf(fout, "INVALID FORMAT for input line 2\n");
    rc = ESR_INVALID_ARGUMENT;
    goto CLEANUP;
  }
  
  //get the expected return string
  if ((position = strtok(NULL, "\"")) != NULL)
  {
    LSTRCPY(refValue, position);
  }
  else
  {
    pfprintf(fout, "INVALID FORMAT for input line 3\n");
    rc = ESR_INVALID_ARGUMENT;
    goto CLEANUP;
  }
  
  //get the expected result PASS/FAIL
  //there is no need to write PASS, if nothing is written PASS is assumed
  if ((position = strtok(NULL, " \t\r\n\"")) != NULL)
  {
    LSTRCPY(result, position);
    
    if (strcmp(result, "PASS") != 0 && strcmp(result, "FAIL") != 0)
    {
      pfprintf(fout, "INVALID FORMAT for input line, use either PASS or FAIL\n");
      rc = ESR_INVALID_ARGUMENT;
      goto CLEANUP;
    }
    
    if ((rc = ParseTestSet(grammar, trans, key, refValue, result, fout)) != ESR_SUCCESS)
      goto CLEANUP;
  }
  else
  {
    if ((rc = ParseTestSet(grammar, trans, key, refValue, "PASS", fout)) != ESR_SUCCESS)
      goto CLEANUP;
  }
  rc = ESR_SUCCESS;
CLEANUP:
  return rc;
}

ESR_ReturnCode process_multi_key_line(SR_Grammar* grammar, const LCHAR* rootrule, PFile* fin, PFile* fout)
{
  LCHAR *position, *p;
  LCHAR line[MAX_LINE_LENGTH];
  LCHAR trans[MAX_LINE_LENGTH];
  LCHAR keyvals[MAX_LINE_LENGTH];
  ESR_ReturnCode rc;
  SR_SemanticResult* semanticResults[MAX_SEM_RESULTS];
  LCHAR refkey[MAX_LINE_LENGTH];
  LCHAR refval[MAX_LINE_LENGTH], value[MAX_STR_LENGTH];
  size_t i, j, len;
  size_t result_count;
  
  position = pfgets(line, MAX_LINE_LENGTH, fin);
  if (line[0] == '#')
    return ESR_SUCCESS;
  if (!strncmp(line, "__END__", 7))
    return ESR_READ_ERROR;
  if (position == NULL)
  {
    if (pfeof(fin))
      return ESR_READ_ERROR;
    else
    {
      PLogError(L("ESR_READ_ERROR"));
      return ESR_READ_ERROR;
    }
  }
  
  /* we're trying to parse
    Hello there : BONJOUR
   */
  p = strtok(line, ":");
  LSTRCPY(trans, p);
  /* strip trailing spaces */
  for (len = strlen(trans); len > 0 && trans[len-1] == ' '; len--)
    trans[len-1] = 0;
    
  p = strtok(NULL, "\n\r");
  /* strip leading spaces */
  while (*p == ' ' || *p == '\t')  p++;
  LSTRCPY(keyvals, p);
  
  result_count = MAX_SEM_RESULTS;
  for (i = 0; i < result_count; i++)
    SR_SemanticResultCreate(&semanticResults[i]);
    
  /* pfprintf(fout,"checking (%s) ref(%s)\n", trans, keyvals); */
  rc = grammar->checkParse(grammar, trans, semanticResults, (size_t*) & result_count);
  if (rc != ESR_SUCCESS)
    return rc;
    
  /*result file will contain
  transcription | key | reference | result | PASSESD/FAILED */
  
  if (result_count < 1) /*failed to parse, but this could still be a pass if you expected a failure*/
  {
    pfprintf(fout, "%s|%s|  |", trans, keyvals);
    if (!strcmp("FAIL", keyvals) || !strcmp(keyvals, "-"))
      pfprintf(fout, "PASSED\n");
    else
      pfprintf(fout, "FAILED\n");
  }
  else /*parsed, look at what was expected, what was returned and which of PASS/FAIL is expected */
  {
    size_t size, len;
    LCHAR* keys_available[MAX_KEYS]; /* array of pointers to strings */
    size = MAX_KEYS;
    rc = semanticResults[0]->getKeyList(semanticResults[0], (LCHAR**) & keys_available, &size);
    
    for (p = strtok(keyvals, ";"); p; p = strtok(NULL, ";"))
    {
      sprintf(refkey, "%s.%s", rootrule, p);
      p = strchr(refkey, '=');
      assert(p);
      *p = 0;
      p++;
      if (*p == '\'') p++;
      LSTRCPY(refval, p);
      if (refval[ strlen(refval)-1] == '\'') refval[strlen(refval)-1] = 0;
      
      for (i = 0; i < result_count; i++)
      {
        len = MAX_STR_LENGTH;
        for (j = 0; j < size; j++)
          if (!strcmp(keys_available[j], refkey)) break;
        if (j < size)
          rc = semanticResults[i]->getValue(semanticResults[i], refkey, value, &len);
        else
        {
          LSTRCPY(value, "<NOSUCHKEY>");
          rc = ESR_NO_MATCH_ERROR;
        }
        pfprintf(fout, "%s|%s|%s|%s|", trans, refkey, refval, value);
        if (strcmp(value, refval) == 0)
          pfprintf(fout, "PASSED\n");
        else
          pfprintf(fout, "FAILED\n");
      }
    }
    
    /*deallocate semantic results*/
    for (i = 0; i < MAX_SEM_RESULTS; i++)
    {
      rc = semanticResults[i]->destroy(semanticResults[i]);
      if (rc != ESR_SUCCESS)
        PLogError("%s while destroying", ESR_rc2str(rc));
    }
  }
  return ESR_SUCCESS;
}