/*---------------------------------------------------------------------------* * parseStringTest.c * * * * Copyright 2007, 2008 Nuance Communciations, Inc. * * * * Licensed under the Apache License, Version 2.0 (the 'License'); * * you may not use this file except in compliance with the License. * * * * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * * * Unless required by applicable law or agreed to in writing, software * * distributed under the License is distributed on an 'AS IS' BASIS, * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * * See the License for the specific language governing permissions and * * limitations under the License. * * * *---------------------------------------------------------------------------*/ #include "pstdio.h" #include "pmemory.h" #include "plog.h" #include "HashMap.h" #include "SR_Grammar.h" #include "SR_SemanticResult.h" #include "ESR_Session.h" #include "ESR_Locale.h" #include "LCHAR.h" #include "PFileSystem.h" #include "PANSIFileSystem.h" /* for testing RecognizerImpl.c, see below */ #include"buildopt.h" #include"setting.h" #include"srec_sizes.h" #include"SR_GrammarImpl.h" /* defines */ #define MAX_LINE_LENGTH 256 #define MAX_STR_LENGTH 512 #define MAX_SEM_RESULTS 3 #define MAX_KEYS 30 /* protos */ ESR_ReturnCode process_single_key_line(SR_Grammar* grammar, PFile* fin, PFile* fout); ESR_ReturnCode process_multi_key_line(SR_Grammar* grammar, const LCHAR* rootrule, PFile* fin, PFile* fout); /* struct */ typedef struct Opts { int use_parse_by_string_ids; int do_check_all_ids; } Opts; int usage(LCHAR* exename) { pfprintf(PSTDOUT, "usage: %s -base <basefilename> [-in <input file>] [-out <output file>] [-itest <testfilename>]\n", exename); return 1; } void lstr_strip_multiple_spaces(LCHAR* trans) { char *src=trans, *dst=trans; for( ;(*dst = *src)!=L('\0'); src++) { if(*dst != ' ') dst++; else if(src[1] != ' ') dst++; } } /** * Display the Semantic Result */ void display_results(SR_SemanticResult *result, PFile* fout) { size_t i, size, len; LCHAR* keys[MAX_KEYS]; /* array of pointers to strings */ LCHAR value[MAX_STR_LENGTH]; ESR_ReturnCode rc; size = MAX_KEYS; rc = result->getKeyList(result, (LCHAR**) & keys, &size); /* get the key list */ if (rc == ESR_SUCCESS) { for (i = 0; i < size; i++) { len = MAX_STR_LENGTH; if ((rc = result->getValue(result, keys[i], value, &len)) == ESR_SUCCESS) pfprintf(fout, "{%s : %s}\n", keys[i], value); else pfprintf(fout, "Error: %s\n", ESR_rc2str(rc)); } pfprintf(fout, "--Done--\n"); } else pfprintf(fout, "Error: %s\n", ESR_rc2str(rc)); } ESR_ReturnCode Parse(SR_Grammar* grammar, LCHAR* trans, PFile* fout, Opts* opts) { ESR_ReturnCode rc = ESR_SUCCESS; size_t i, result_count, key_count; SR_SemanticResult* semanticResults[MAX_SEM_RESULTS]; wordID wordIDs[32], *wordIDptr; SR_GrammarImpl* pgrammar = (SR_GrammarImpl*)grammar; wordmap* wmap; if (opts->do_check_all_ids) { wordID id; Opts myopts; memcpy(&myopts, opts, sizeof(myopts)); myopts.do_check_all_ids = 0; wmap = pgrammar->syntax->synx->olabels; /* start at word 4 because "eps, -pau- -pau2- @root */ for (id = 4; id < wmap->num_words; id++) { trans = wmap->words[id]; Parse(grammar, trans, fout, &myopts); } return 0; } result_count = MAX_SEM_RESULTS; /* initially not greater than MAX */ for (i = 0; i < result_count; i++) SR_SemanticResultCreate(&semanticResults[i]); /* create the result holders */ lstrtrim(trans); /* check for multiple space separators! */ lstr_strip_multiple_spaces(trans); if (!opts->use_parse_by_string_ids) { rc = grammar->checkParse(grammar, trans, semanticResults, (size_t*) & result_count); } else { char copy_of_trans[256], *p; strcpy(copy_of_trans, trans); wmap = pgrammar->syntax->synx->olabels; wordIDs[0] = wordIDs[1] = MAXwordID; wordIDptr = &wordIDs[0]; for (p = strtok(copy_of_trans, " "); p; p = strtok(NULL, " ")) { for (i = 0; i < wmap->num_words; i++) if (!strcmp(wmap->words[i], p)) { *wordIDptr++ = (wordID)i; break; } if (i == wmap->num_words) { wordIDs[0] = MAXwordID; break; } } *wordIDptr++ = MAXwordID; /* printf("wordids:"); for(wordIDptr=&wordIDs[0]; *wordIDptr!=MAXwordID; wordIDptr++) printf(" %d/%s", *wordIDptr, wmap->words[*wordIDptr]); printf("\n"); */ if (wordIDs[0] == MAXwordID) { result_count = 0; rc = ESR_SUCCESS; } else { rc = pgrammar->semproc->flush(pgrammar->semproc); rc = pgrammar->semproc->setParam(pgrammar->semproc, L("literal"), trans); rc = pgrammar->semproc->checkParseByWordID(pgrammar->semproc, pgrammar->semgraph, wordIDs, semanticResults, &result_count); } } if (rc != ESR_SUCCESS) { pfprintf(fout, "error (%s)\n\n", trans); return rc; } if (result_count < 1) { pfprintf(fout, "no parse (%s)\n\n", trans); } else { key_count = 0xffff; rc = SR_SemanticResultGetKeyCount(semanticResults[0], &key_count); pfprintf(fout, "parse ok (%d results) (%s) (%d)\n", result_count, trans, key_count); for (i = 0; i < result_count; i++) display_results(semanticResults[i], fout); for (i = 0; i < MAX_SEM_RESULTS; i++) { rc = semanticResults[i]->destroy(semanticResults[i]); if (rc != ESR_SUCCESS) return rc; } } return ESR_SUCCESS; } /* tests the transcription against the grammar and then decided based on what was expected of the test whether or not is it considered a pass or fail */ ESR_ReturnCode ParseTestSet(SR_Grammar* grammar, LCHAR* trans, LCHAR* key, LCHAR* ref, LCHAR* result, PFile* fout) { size_t len; ESR_ReturnCode rc; int i, result_count; SR_SemanticResult* semanticResults[MAX_SEM_RESULTS]; LCHAR value[MAX_STR_LENGTH]; result_count = MAX_SEM_RESULTS; for (i = 0; i < result_count; i++) SR_SemanticResultCreate(&semanticResults[i]); lstrtrim(trans); /* check for multiple space separators! */ lstr_strip_multiple_spaces(trans); pfprintf(fout, "checking (%s) ref(%s) res(%s)\n", trans, ref, result); rc = grammar->checkParse(grammar, trans, semanticResults, (size_t*) & result_count); if (rc != ESR_SUCCESS) return rc; /*result file will contain transcription | key | reference | result | PASSESD/FAILED */ if (result_count < 1) /*failed to parse, but this could still be a pass if you expected a failure*/ { pfprintf(fout, "NO PARSE FOR: %s|%s|%s| |", trans, key, ref); if (strcmp("FAIL", result) == 0) pfprintf(fout, "PASSED (%s)\n", trans); else pfprintf(fout, "FAILED (%s)\n", trans); } else /*parsed, look at what was expected, what was returned and which of PASS/FAIL is expected */ { for (i = 0; i < result_count; i++) { len = MAX_STR_LENGTH; if ((rc = semanticResults[i]->getValue(semanticResults[i], key, value, &len)) == ESR_SUCCESS) { pfprintf(fout, "%s|%s|%s|%s|", trans, key, ref, value); if (strcmp(value, ref) == 0 && strcmp("PASS", result) == 0) pfprintf(fout, "PASSED\n"); else pfprintf(fout, "FAILED\n"); } else { pfprintf(fout, "ERROR: %s, while checking key='%s'\n", ESR_rc2str(rc), key); } } /*deallocate semantic results*/ for (i = 0; i < MAX_SEM_RESULTS; i++) { rc = semanticResults[i]->destroy(semanticResults[i]); if (rc != ESR_SUCCESS) return rc; } } return ESR_SUCCESS; } int main(int argc, char **argv) { LCHAR trans[MAX_LINE_LENGTH]; SR_Grammar* grammar = NULL; ESR_ReturnCode rc; LCHAR base[P_PATH_MAX] = L(""); LCHAR infilename[P_PATH_MAX] = L(""); LCHAR inRTfilename[P_PATH_MAX] = L(""); LCHAR outfilename[P_PATH_MAX] = L(""); PFile *fin = NULL, *fout = NULL; int i; LCHAR *rootrule = L("myRoot"), *p; Opts opts = { 0, 0 }; /* * Initialize portable library. */ CHKLOG(rc, PMemInit()); fin = PSTDIN; fout = PSTDOUT; if (argc < 3) { usage(argv[0]); exit(EXIT_FAILURE); } for (i = 1; i < argc; ++i) { if (!LSTRCMP(argv[i], L("-base"))) { ++i; LSTRCPY(base, argv[i]); } else if (!LSTRCMP(argv[i], L("-in"))) { ++i; LSTRCPY(infilename, argv[i]); } else if (!LSTRCMP(argv[i], L("-out"))) { ++i; LSTRCPY(outfilename, argv[i]); } else if (!LSTRCMP(argv[i], L("-itest"))) { ++i; LSTRCPY(inRTfilename, argv[i]); } else if (!LSTRCMP(argv[i], L("-ids"))) { opts.use_parse_by_string_ids = 1; } else if (!LSTRCMP(argv[i], L("-allids"))) { opts.do_check_all_ids = 1; opts.use_parse_by_string_ids = 1; } else return usage(argv[0]); } CHK(rc, PLogInit(NULL, 0)); rc = SR_GrammarLoad(base, &grammar); if (rc != ESR_SUCCESS) goto CLEANUP; if (*outfilename) { if ((fout = pfopen(outfilename, "w")) == NULL) { pfprintf(PSTDOUT, "Could not open file: %s\n", outfilename); rc = 1; goto CLEANUP; } } if (opts.do_check_all_ids) { rc = Parse(grammar, NULL, fout, &opts); } else if (*infilename) { if (LSTRCMP(infilename, "-") == 0) { fin = PSTDIN; } else if ((fin = pfopen(infilename, "r")) == NULL) { pfprintf(PSTDOUT, "Could not open file: %s\n", infilename); rc = 1; goto CLEANUP; } for (;;) { if (pfgets(trans, MAX_LINE_LENGTH, fin) == NULL) { if (!pfeof(fin)) { rc = ESR_READ_ERROR; PLogError(ESR_rc2str(rc)); } break; } if (trans[0] == '#') continue; lstrtrim(trans); /* check for multiple space separators! */ lstr_strip_multiple_spaces(trans); pfprintf(fout, "Transcription: %s\n", trans); if ((rc = Parse(grammar, trans, fout, &opts)) != ESR_SUCCESS) goto CLEANUP; pfprintf(fout, "\n"); } } else if (*inRTfilename) /*using a test file*/ { if ((fin = pfopen(inRTfilename, "r")) == NULL) { pfprintf(PSTDOUT, "Could not open test file: %s\n", inRTfilename); rc = 1; goto CLEANUP; } /*read through the test file parsing it into the variables FORMAT: "the transciption" key "value" */ while (ESR_TRUE) { if (0) rc = process_single_key_line(grammar, fin, fout); else rc = process_multi_key_line(grammar, rootrule, fin, fout); if (rc == ESR_READ_ERROR) { rc = ESR_SUCCESS; break; } } } else { /* get some transcriptions from the user */ pfprintf(PSTDOUT, "\nSemantic Parser Test Program for esr (Nuance Communicaitions, 2007)\n"); pfprintf(PSTDOUT, "'qqq' to quit\n"); while (ESR_TRUE) { pfprintf(PSTDOUT, "> "); if (!fgets(trans, MAX_LINE_LENGTH, PSTDIN)) break; // remove trailing whitespace for(p=&trans[0]; *p!=0 && *p!='\n' && *p!='\r'; p++) {} *p=0; if (!LSTRCMP("qqq", trans)) break; else if ((rc = Parse(grammar, trans, fout, &opts)) != ESR_SUCCESS) goto CLEANUP; } } CLEANUP: if (fin && fin != PSTDIN) pfclose(fin); if (fout && fout != PSTDOUT) pfclose(fout); if (grammar) grammar->destroy(grammar); PLogShutdown(); /* PANSIFileSystemDestroy(); PFileSystemDestroy();*/ PMemShutdown(); return rc; } ESR_ReturnCode process_single_key_line(SR_Grammar* grammar, PFile* fin, PFile* fout) { LCHAR* position; LCHAR line[MAX_LINE_LENGTH]; LCHAR trans[MAX_LINE_LENGTH]; LCHAR key[MAX_LINE_LENGTH]; LCHAR refValue[MAX_LINE_LENGTH]; LCHAR result[MAX_LINE_LENGTH]; ESR_ReturnCode rc; position = pfgets(line, MAX_LINE_LENGTH, fin); if (line[0] == '#') return ESR_SUCCESS; if (!strncmp(line, "__END__", 7)) return ESR_READ_ERROR; if (position == NULL) { if (pfeof(fin)) return ESR_READ_ERROR; else { PLogError(L("ESR_READ_ERROR")); return ESR_READ_ERROR; } } //get the transcription to test if ((position = strtok(line, "\"")) != NULL) { LSTRCPY(trans, position); } else { pfprintf(fout, "INVALID FORMAT for input line 1 \n"); rc = ESR_INVALID_ARGUMENT; goto CLEANUP; } //get the key (meaning) if ((position = strtok(NULL, " \t")) != NULL) { LSTRCPY(key, position); } else { pfprintf(fout, "INVALID FORMAT for input line 2\n"); rc = ESR_INVALID_ARGUMENT; goto CLEANUP; } //get the expected return string if ((position = strtok(NULL, "\"")) != NULL) { LSTRCPY(refValue, position); } else { pfprintf(fout, "INVALID FORMAT for input line 3\n"); rc = ESR_INVALID_ARGUMENT; goto CLEANUP; } //get the expected result PASS/FAIL //there is no need to write PASS, if nothing is written PASS is assumed if ((position = strtok(NULL, " \t\r\n\"")) != NULL) { LSTRCPY(result, position); if (strcmp(result, "PASS") != 0 && strcmp(result, "FAIL") != 0) { pfprintf(fout, "INVALID FORMAT for input line, use either PASS or FAIL\n"); rc = ESR_INVALID_ARGUMENT; goto CLEANUP; } if ((rc = ParseTestSet(grammar, trans, key, refValue, result, fout)) != ESR_SUCCESS) goto CLEANUP; } else { if ((rc = ParseTestSet(grammar, trans, key, refValue, "PASS", fout)) != ESR_SUCCESS) goto CLEANUP; } rc = ESR_SUCCESS; CLEANUP: return rc; } ESR_ReturnCode process_multi_key_line(SR_Grammar* grammar, const LCHAR* rootrule, PFile* fin, PFile* fout) { LCHAR *position, *p; LCHAR line[MAX_LINE_LENGTH]; LCHAR trans[MAX_LINE_LENGTH]; LCHAR keyvals[MAX_LINE_LENGTH]; ESR_ReturnCode rc; SR_SemanticResult* semanticResults[MAX_SEM_RESULTS]; LCHAR refkey[MAX_LINE_LENGTH]; LCHAR refval[MAX_LINE_LENGTH], value[MAX_STR_LENGTH]; size_t i, j, len; size_t result_count; position = pfgets(line, MAX_LINE_LENGTH, fin); if (line[0] == '#') return ESR_SUCCESS; if (!strncmp(line, "__END__", 7)) return ESR_READ_ERROR; if (position == NULL) { if (pfeof(fin)) return ESR_READ_ERROR; else { PLogError(L("ESR_READ_ERROR")); return ESR_READ_ERROR; } } /* we're trying to parse Hello there : BONJOUR */ p = strtok(line, ":"); LSTRCPY(trans, p); /* strip trailing spaces */ for (len = strlen(trans); len > 0 && trans[len-1] == ' '; len--) trans[len-1] = 0; p = strtok(NULL, "\n\r"); /* strip leading spaces */ while (*p == ' ' || *p == '\t') p++; LSTRCPY(keyvals, p); result_count = MAX_SEM_RESULTS; for (i = 0; i < result_count; i++) SR_SemanticResultCreate(&semanticResults[i]); /* pfprintf(fout,"checking (%s) ref(%s)\n", trans, keyvals); */ rc = grammar->checkParse(grammar, trans, semanticResults, (size_t*) & result_count); if (rc != ESR_SUCCESS) return rc; /*result file will contain transcription | key | reference | result | PASSESD/FAILED */ if (result_count < 1) /*failed to parse, but this could still be a pass if you expected a failure*/ { pfprintf(fout, "%s|%s| |", trans, keyvals); if (!strcmp("FAIL", keyvals) || !strcmp(keyvals, "-")) pfprintf(fout, "PASSED\n"); else pfprintf(fout, "FAILED\n"); } else /*parsed, look at what was expected, what was returned and which of PASS/FAIL is expected */ { size_t size, len; LCHAR* keys_available[MAX_KEYS]; /* array of pointers to strings */ size = MAX_KEYS; rc = semanticResults[0]->getKeyList(semanticResults[0], (LCHAR**) & keys_available, &size); for (p = strtok(keyvals, ";"); p; p = strtok(NULL, ";")) { sprintf(refkey, "%s.%s", rootrule, p); p = strchr(refkey, '='); assert(p); *p = 0; p++; if (*p == '\'') p++; LSTRCPY(refval, p); if (refval[ strlen(refval)-1] == '\'') refval[strlen(refval)-1] = 0; for (i = 0; i < result_count; i++) { len = MAX_STR_LENGTH; for (j = 0; j < size; j++) if (!strcmp(keys_available[j], refkey)) break; if (j < size) rc = semanticResults[i]->getValue(semanticResults[i], refkey, value, &len); else { LSTRCPY(value, "<NOSUCHKEY>"); rc = ESR_NO_MATCH_ERROR; } pfprintf(fout, "%s|%s|%s|%s|", trans, refkey, refval, value); if (strcmp(value, refval) == 0) pfprintf(fout, "PASSED\n"); else pfprintf(fout, "FAILED\n"); } } /*deallocate semantic results*/ for (i = 0; i < MAX_SEM_RESULTS; i++) { rc = semanticResults[i]->destroy(semanticResults[i]); if (rc != ESR_SUCCESS) PLogError("%s while destroying", ESR_rc2str(rc)); } } return ESR_SUCCESS; }