Main Page | Modules | Data Structures | Directories | File List | Data Fields | Globals | Related Pages

wchmm_check.c

Go to the documentation of this file.
00001 
00029 /*
00030  * Copyright (c) 1991-2006 Kawahara Lab., Kyoto University
00031  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00032  * Copyright (c) 2005-2006 Julius project team, Nagoya Institute of Technology, Nagoya Institute of Technology
00033  * All rights reserved
00034  */
00035 
00036 #include <julius.h>
00037 
00052 static void
00053 print_winfo_w(WORD_INFO *winfo, WORD_ID word)
00054 {
00055   int i;
00056   if (word >= winfo->num) return;
00057   j_printf("--winfo\n");
00058   j_printf("wname   = %s\n",winfo->wname[word]);
00059   j_printf("woutput = %s\n",winfo->woutput[word]);
00060   j_printf("\ntransp  = %s\n", (winfo->is_transparent[word]) ? "yes" : "no");
00061   j_printf("wlen    = %d\n",winfo->wlen[word]);
00062   j_printf("wseq    =");
00063   for (i=0;i<winfo->wlen[word];i++) {
00064     j_printf(" %s",winfo->wseq[word][i]->name);
00065   }
00066   j_printf("\nwseq_def=");
00067   for (i=0;i<winfo->wlen[word];i++) {
00068     if (winfo->wseq[word][i]->is_pseudo) {
00069       j_printf(" (%s)", winfo->wseq[word][i]->body.pseudo->name);
00070     } else {
00071       j_printf(" %s",winfo->wseq[word][i]->body.defined->name);
00072     }
00073   }
00074   j_printf("\nwton    = %d\n",winfo->wton[word]);
00075 #ifdef CLASS_NGRAM
00076   j_printf("cprob   = %f(%f)\n", winfo->cprob[word], pow(10.0, winfo->cprob[word]));
00077 #endif
00078   
00079 }
00080 
00095 static void
00096 print_wchmm_w(WCHMM_INFO *wchmm, WORD_ID word)
00097 {
00098   int i;
00099   if (word >= wchmm->winfo->num) return;
00100   j_printf("--wchmm (word)\n");
00101   j_printf("offset  =");
00102   for (i=0;i<wchmm->winfo->wlen[word];i++) {
00103     j_printf(" %d",wchmm->offset[word][i]);
00104   }
00105   j_printf("\n");
00106 #ifdef MULTIPATH_VERSION
00107   j_printf("wordbegin = %d\n",wchmm->wordbegin[word]);
00108 #endif
00109   j_printf("wordend = %d\n",wchmm->wordend[word]);
00110 }
00111 
00126 static void
00127 print_wchmm_s(WCHMM_INFO *wchmm, int node)
00128 {
00129   j_printf("--wchmm (node)\n");
00130 #ifndef MULTIPATH_VERSION
00131   j_printf("ststart = %d\n",wchmm->ststart[node]);
00132 #endif
00133   j_printf("stend   = %d\n",wchmm->stend[node]);
00134 #ifdef MULTIPATH_VERSION
00135   if (wchmm->state[node].out.state == NULL) {
00136     j_printf("NO OUTPUT\n");
00137     return;
00138   }
00139 #endif
00140 #ifdef PASS1_IWCD
00141   j_printf("outstyle= ");
00142   switch(wchmm->outstyle[node]) {
00143   case AS_STATE:
00144     j_printf("AS_STATE (id=%d)\n", (wchmm->state[node].out.state)->id);
00145     break;
00146   case AS_LSET:
00147     j_printf("AS_LSET  (%d variants)\n", (wchmm->state[node].out.lset)->num);
00148     break;
00149   case AS_RSET:
00150     if ((wchmm->state[node].out.rset)->hmm->is_pseudo) {
00151       j_printf("AS_RSET  (name=\"%s\", pseudo=\"%s\", loc=%d)\n",
00152                (wchmm->state[node].out.rset)->hmm->name,
00153                (wchmm->state[node].out.rset)->hmm->body.pseudo->name,
00154                (wchmm->state[node].out.rset)->state_loc);
00155     } else {
00156       j_printf("AS_RSET  (name=\"%s\", defined=\"%s\", loc=%d)\n",
00157                (wchmm->state[node].out.rset)->hmm->name,
00158                (wchmm->state[node].out.rset)->hmm->body.defined->name,
00159                (wchmm->state[node].out.rset)->state_loc);
00160     }
00161     break;
00162   case AS_LRSET:
00163     if ((wchmm->state[node].out.rset)->hmm->is_pseudo) {
00164       j_printf("AS_LRSET  (name=\"%s\", pseudo=\"%s\", loc=%d)\n",
00165                (wchmm->state[node].out.lrset)->hmm->name,
00166                (wchmm->state[node].out.lrset)->hmm->body.pseudo->name,
00167                (wchmm->state[node].out.lrset)->state_loc);
00168     } else {
00169       j_printf("AS_LRSET  (name=\"%s\", defined=\"%s\", loc=%d)\n",
00170                (wchmm->state[node].out.lrset)->hmm->name,
00171                (wchmm->state[node].out.lrset)->hmm->body.defined->name,
00172                (wchmm->state[node].out.lrset)->state_loc);
00173     }
00174     break;
00175   default:
00176     j_printf("UNKNOWN???\n");
00177   }
00178 #endif /* PASS1_IWCD */
00179 }
00180 
00195 static void
00196 print_wchmm_s_arc(WCHMM_INFO *wchmm, int node)
00197 {
00198   A_CELL *ac;
00199   int i = 0;
00200   j_printf("arcs:\n");
00201   for (ac=wchmm->state[node].ac;ac;ac=ac->next) {
00202     j_printf(" %d %f(%f)\n",ac->arc,ac->a,pow(10.0, ac->a));
00203     i++;
00204   }
00205   j_printf(" total %d arcs\n",i);
00206 }
00207 
00208 #ifndef CATEGORY_TREE
00209 
00223 static void
00224 print_wchmm_s_successor(WCHMM_INFO *wchmm, int node)
00225 {
00226   S_CELL *sc;
00227   int i = 0;
00228   int scid;
00229 
00230   scid = wchmm->state[node].scid;
00231   if (scid == 0) {
00232     j_printf("no successors\n");
00233   } else if (scid < 0) {
00234     j_printf("successor id: %d\n", scid);
00235 #ifdef UNIGRAM_FACTORING
00236     j_printf("1-gram factoring node: score=%f\n",wchmm->fscore[-scid]);
00237 #endif
00238   } else {
00239     j_printf("successor id: %d\n", scid);
00240     for (sc=wchmm->sclist[scid];sc;sc=sc->next) {
00241       j_printf(" %d\n",sc->word);
00242       i++;
00243     }
00244     j_printf(" total %d successors\n",i);
00245   }
00246 }
00247 #endif
00248 
00261 static void
00262 print_hmminfo(char *name)
00263 {
00264   HMM_Logical *l;
00265 
00266   l = htk_hmmdata_lookup_logical(hmminfo, name);
00267   if (l == NULL) {
00268     j_printf("no HMM named \"%s\"\n", name);
00269   } else {
00270     put_logical_hmm(l);
00271   }
00272 }
00273 
00274 #ifdef USE_NGRAM
00275 
00289 static void
00290 print_ngraminfo(NGRAM_INFO *ngram, int nid)
00291 {
00292   j_printf("-- N-gram entry --\n");
00293   j_printf("nid  = %d\n", nid);
00294   j_printf("name = %s\n", ngram->wname[nid]);
00295 }
00296 #endif
00297 
00298 
00312 void
00313 wchmm_check_interactive(WCHMM_INFO *wchmm) /* interactive check */
00314 {
00315   char buf1[24], buf2[24];
00316   int arg;
00317   WORD_ID argw;
00318   boolean endflag;
00319 
00320   j_printf("\n\n");
00321   j_printf("********************************************\n");
00322   j_printf("********  LM & LEXICON CHECK MODE  *********\n");
00323   j_printf("********************************************\n");
00324   j_printf("\n");
00325 
00326   for (endflag = FALSE; endflag == FALSE;) {
00327     j_printf("===== syntax: command arg (\"H H\" for help) > ");
00328     scanf("%s %s",buf1, buf2);
00329     if (strlen(buf1)==0) continue;
00330 
00331     arg = atoi(buf2);
00332     
00333     switch(buf1[0]) {
00334     case 'w':                   /* word info */
00335       argw = arg;
00336       print_winfo_w(wchmm->winfo, argw);
00337       print_wchmm_w(wchmm, argw);
00338       break;
00339     case 'n':                   /* node info */
00340       print_wchmm_s(wchmm, arg);
00341       break;
00342     case 'a':                   /* arc list */
00343       print_wchmm_s_arc(wchmm, arg);
00344       break;
00345 #if 0
00346     case 'r':                   /* reverse arc list */
00347       print_wchmm_r_arc(arg);
00348       break;
00349 #endif
00350 #ifndef CATEGORY_TREE
00351     case 's':                   /* successor word list */
00352       print_wchmm_s_successor(wchmm, arg);
00353       break;
00354 #endif
00355     case 't':                   /* node total info of above */
00356       print_wchmm_s(wchmm, arg);
00357       print_wchmm_s_arc(wchmm, arg);
00358 #if 0
00359       print_wchmm_r_arc(arg);
00360 #endif
00361 #ifndef CATEGORY_TREE
00362       print_wchmm_s_successor(wchmm, arg);
00363 #endif
00364       break;
00365     case 'h':                   /* hmm state info */
00366       print_hmminfo(buf2);
00367       break;
00368 #ifdef USE_NGRAM
00369     case 'l':                   /* N-gram language model info */
00370       print_ngraminfo(wchmm->ngram, arg);
00371       break;
00372 #endif
00373     case 'H':                   /* help */
00374       j_printf("syntax: [command_character] [number(#)]\n");
00375       j_printf("  w [word_id] ... show word info\n");
00376       j_printf("  n [state]   ... show wchmm state info\n");
00377       j_printf("  a [state]   ... show arcs from the state\n");
00378 #if 0
00379       j_printf("  r [state]   ... show arcs  to  the state\n");
00380 #endif
00381       j_printf("  s [state]   ... show successor list of the state\n");
00382       j_printf("  h [hmmname] ... show HMM info of the name\n");
00383 #ifdef USE_NGRAM
00384       j_printf("  l [nwid]    ... N-gram entry info\n");
00385 #endif
00386       j_printf("  H           ... print this help\n");
00387       break;
00388     case 'q':                   /* quit */
00389       endflag = TRUE;
00390       break;
00391     }
00392   }
00393   j_printf("\n");
00394   j_printf("********************************************\n");
00395   j_printf("*****  END OF LM & LEXICON CHECK MODE  *****\n");
00396   j_printf("********************************************\n");
00397   j_printf("\n");
00398 }
00399 
00400 
00413 void
00414 check_wchmm(WCHMM_INFO *wchmm)
00415 {
00416   int i,n;
00417   boolean ok_flag;
00418   A_CELL *ac;
00419 #ifdef MULTIPATH_VERSION
00420   int node;
00421   WORD_ID w;
00422 #endif
00423 
00424   ok_flag = TRUE;
00425 
00426 #ifdef MULTIPATH_VERSION
00427   
00428   /* check word-beginning nodes */
00429   for(i=0;i<wchmm->startnum;i++) {
00430     node = wchmm->startnode[i];
00431     if (wchmm->state[node].out.state != NULL) {
00432       j_printf("Error: word-beginning node %d has output function!\n, node");
00433       ok_flag = FALSE;
00434     }
00435   }
00436   /* examine if word->state and state->word mapping is correct */
00437   for(w=0;w<wchmm->winfo->num;w++) {
00438     if (wchmm->stend[wchmm->wordend[w]] != w) {
00439       j_printf("Error: no match of word end for word %d!!\n", w);
00440       ok_flag = FALSE;
00441     }
00442   }
00443   
00444 #else
00445   
00446   /* examine if word->state and state->word mapping is correct */
00447   for (i=0;i<winfo->num;i++) {
00448     if (wchmm->stend[wchmm->wordend[i]]!=i) {
00449       j_printf("end ga awanai!!!: word=%d, node=%d, value=%d\n",
00450                i, wchmm->wordend[i], wchmm->stend[wchmm->wordend[i]]);
00451       ok_flag = FALSE;
00452     }
00453     if (wchmm->ststart[wchmm->offset[i][0]] == WORD_INVALID) {
00454       j_printf("word start node is WORD_INVALID:word=%d, node=%d, value=%d\n",
00455                i, wchmm->offset[i][0], wchmm->ststart[wchmm->offset[i][0]]);
00456       ok_flag = FALSE;
00457     }
00458   }
00459 
00460 #endif /* MULTIPATH_VERSION */
00461 
00462   /* check if the last state is unique and has only one output arc */
00463   i = 0;
00464   for (n=0;n<wchmm->n;n++) {
00465     if (wchmm->stend[n] != WORD_INVALID) {
00466       i++;
00467       for (ac=wchmm->state[n].ac; ac; ac=ac->next) {
00468         if (ac->arc == n) continue;
00469 #ifndef MULTIPATH_VERSION
00470         if (wchmm->ststart[ac->arc] != WORD_INVALID) continue;
00471 #endif
00472         break;
00473       }
00474       if (ac != NULL) {
00475         j_printf("node %d is shared?\n",n);
00476         ok_flag = FALSE;
00477       }
00478     }
00479   }
00480   if (i != wchmm->winfo->num ) {
00481     j_printf("num of heads of words in wchmm not match word num!!\n");
00482     j_printf("from wchmm->stend:%d != from winfo:%d ?\n",i,wchmm->winfo->num);
00483     ok_flag = FALSE;
00484   }
00485 
00486   /* if check failed, go into interactive mode */
00487   if (!ok_flag) {
00488     wchmm_check_interactive(wchmm);
00489   }
00490 
00491   VERMES("  coordination check passed\n");
00492 }
00493 

Generated on Tue Mar 28 16:01:39 2006 for Julius by  doxygen 1.4.2