julius/wchmm_check.c

Go to the documentation of this file.
00001 
00029 /*
00030  * Copyright (c) 1991-2006 Kawahara Lab., Kyoto University
00031  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00032  * Copyright (c) 2005-2006 Julius project team, Nagoya Institute of Technology
00033  * All rights reserved
00034  */
00035 
00036 #include <julius.h>
00037 
00052 static void
00053 print_winfo_w(WORD_INFO *winfo, WORD_ID word)
00054 {
00055   int i;
00056   if (word >= winfo->num) return;
00057   j_printf("--winfo\n");
00058   j_printf("wname   = %s\n",winfo->wname[word]);
00059   j_printf("woutput = %s\n",winfo->woutput[word]);
00060   j_printf("\ntransp  = %s\n", (winfo->is_transparent[word]) ? "yes" : "no");
00061   j_printf("wlen    = %d\n",winfo->wlen[word]);
00062   j_printf("wseq    =");
00063   for (i=0;i<winfo->wlen[word];i++) {
00064     j_printf(" %s",winfo->wseq[word][i]->name);
00065   }
00066   j_printf("\nwseq_def=");
00067   for (i=0;i<winfo->wlen[word];i++) {
00068     if (winfo->wseq[word][i]->is_pseudo) {
00069       j_printf(" (%s)", winfo->wseq[word][i]->body.pseudo->name);
00070     } else {
00071       j_printf(" %s",winfo->wseq[word][i]->body.defined->name);
00072     }
00073   }
00074   j_printf("\nwton    = %d\n",winfo->wton[word]);
00075 #ifdef CLASS_NGRAM
00076   j_printf("cprob   = %f(%f)\n", winfo->cprob[word], pow(10.0, winfo->cprob[word]));
00077 #endif
00078   
00079 }
00080 
00095 static void
00096 print_wchmm_w(WCHMM_INFO *wchmm, WORD_ID word)
00097 {
00098   int i;
00099   if (word >= wchmm->winfo->num) return;
00100   j_printf("--wchmm (word)\n");
00101   j_printf("offset  =");
00102   for (i=0;i<wchmm->winfo->wlen[word];i++) {
00103     j_printf(" %d",wchmm->offset[word][i]);
00104   }
00105   j_printf("\n");
00106 #ifdef MULTIPATH_VERSION
00107   j_printf("wordbegin = %d\n",wchmm->wordbegin[word]);
00108 #endif
00109   j_printf("wordend = %d\n",wchmm->wordend[word]);
00110 }
00111 
00126 static void
00127 print_wchmm_s(WCHMM_INFO *wchmm, int node)
00128 {
00129   j_printf("--wchmm (node)\n");
00130 #ifndef MULTIPATH_VERSION
00131   j_printf("ststart = %d\n",wchmm->ststart[node]);
00132 #endif
00133   j_printf("stend   = %d\n",wchmm->stend[node]);
00134 #ifdef MULTIPATH_VERSION
00135   if (wchmm->state[node].out.state == NULL) {
00136     j_printf("NO OUTPUT\n");
00137     return;
00138   }
00139 #endif
00140 #ifdef PASS1_IWCD
00141   j_printf("outstyle= ");
00142   switch(wchmm->outstyle[node]) {
00143   case AS_STATE:
00144     j_printf("AS_STATE (id=%d)\n", (wchmm->state[node].out.state)->id);
00145     break;
00146   case AS_LSET:
00147     j_printf("AS_LSET  (%d variants)\n", (wchmm->state[node].out.lset)->num);
00148     break;
00149   case AS_RSET:
00150     if ((wchmm->state[node].out.rset)->hmm->is_pseudo) {
00151       j_printf("AS_RSET  (name=\"%s\", pseudo=\"%s\", loc=%d)\n",
00152                (wchmm->state[node].out.rset)->hmm->name,
00153                (wchmm->state[node].out.rset)->hmm->body.pseudo->name,
00154                (wchmm->state[node].out.rset)->state_loc);
00155     } else {
00156       j_printf("AS_RSET  (name=\"%s\", defined=\"%s\", loc=%d)\n",
00157                (wchmm->state[node].out.rset)->hmm->name,
00158                (wchmm->state[node].out.rset)->hmm->body.defined->name,
00159                (wchmm->state[node].out.rset)->state_loc);
00160     }
00161     break;
00162   case AS_LRSET:
00163     if ((wchmm->state[node].out.rset)->hmm->is_pseudo) {
00164       j_printf("AS_LRSET  (name=\"%s\", pseudo=\"%s\", loc=%d)\n",
00165                (wchmm->state[node].out.lrset)->hmm->name,
00166                (wchmm->state[node].out.lrset)->hmm->body.pseudo->name,
00167                (wchmm->state[node].out.lrset)->state_loc);
00168     } else {
00169       j_printf("AS_LRSET  (name=\"%s\", defined=\"%s\", loc=%d)\n",
00170                (wchmm->state[node].out.lrset)->hmm->name,
00171                (wchmm->state[node].out.lrset)->hmm->body.defined->name,
00172                (wchmm->state[node].out.lrset)->state_loc);
00173     }
00174     break;
00175   default:
00176     j_printf("UNKNOWN???\n");
00177   }
00178 #endif /* PASS1_IWCD */
00179 }
00180 
00195 static void
00196 print_wchmm_s_arc(WCHMM_INFO *wchmm, int node)
00197 {
00198   A_CELL *ac;
00199   int i = 0;
00200   j_printf("arcs:\n");
00201   for (ac=wchmm->state[node].ac;ac;ac=ac->next) {
00202     j_printf(" %d %f(%f)\n",ac->arc,ac->a,pow(10.0, ac->a));
00203     i++;
00204   }
00205   j_printf(" total %d arcs\n",i);
00206 }
00207 
00208 #ifndef CATEGORY_TREE
00209 
00223 static void
00224 print_wchmm_s_successor(WCHMM_INFO *wchmm, int node)
00225 {
00226   S_CELL *sc;
00227   int i = 0;
00228   int scid;
00229 
00230   scid = wchmm->state[node].scid;
00231   if (scid == 0) {
00232     j_printf("no successors\n");
00233   } else if (scid < 0) {
00234     j_printf("successor id: %d\n", scid);
00235 #ifdef UNIGRAM_FACTORING
00236     j_printf("1-gram factoring node: score=%f\n",wchmm->fscore[-scid]);
00237 #endif
00238   } else {
00239     j_printf("successor id: %d\n", scid);
00240     for (sc=wchmm->sclist[scid];sc;sc=sc->next) {
00241       j_printf(" %d\n",sc->word);
00242       i++;
00243     }
00244     j_printf(" total %d successors\n",i);
00245   }
00246 }
00247 #endif
00248 
00261 static void
00262 print_hmminfo(char *name)
00263 {
00264   HMM_Logical *l;
00265 
00266   l = htk_hmmdata_lookup_logical(hmminfo, name);
00267   if (l == NULL) {
00268     j_printf("no HMM named \"%s\"\n", name);
00269   } else {
00270     put_logical_hmm(l);
00271   }
00272 }
00273 
00274 #ifdef USE_NGRAM
00275 
00289 static void
00290 print_ngraminfo(NGRAM_INFO *ngram, int nid)
00291 {
00292   j_printf("-- N-gram entry --\n");
00293   j_printf("nid  = %d\n", nid);
00294   j_printf("name = %s\n", ngram->wname[nid]);
00295 }
00296 #endif
00297 
00298 
00312 void
00313 wchmm_check_interactive(WCHMM_INFO *wchmm) /* interactive check */
00314 {
00315   static const int len = 24;
00316   char buf[len], *name;
00317   int arg, newline;
00318   WORD_ID argw;
00319   boolean endflag;
00320 
00321   j_printf("\n\n");
00322   j_printf("********************************************\n");
00323   j_printf("********  LM & LEXICON CHECK MODE  *********\n");
00324   j_printf("********************************************\n");
00325   j_printf("\n");
00326 
00327   for (endflag = FALSE; endflag == FALSE;) {
00328     j_printf("===== syntax: command arg (\"H\" for help) > ");
00329     if (fgets(buf, len, stdin) == NULL) break;
00330     name = "";
00331     arg = 0;
00332     if (isalpha(buf[0]) != 0 && buf[1] == ' ') {
00333       newline = strlen(buf)-1;
00334       if (buf[newline] == '\n') {
00335         buf[newline] = '\0';
00336       }
00337       if (buf[2] != '\0') {
00338         name = buf + 2;
00339         arg = atoi(name);
00340       }
00341     }
00342     switch(buf[0]) {
00343     case 'w':                   /* word info */
00344       argw = arg;
00345       print_winfo_w(wchmm->winfo, argw);
00346       print_wchmm_w(wchmm, argw);
00347       break;
00348     case 'n':                   /* node info */
00349       print_wchmm_s(wchmm, arg);
00350       break;
00351     case 'a':                   /* arc list */
00352       print_wchmm_s_arc(wchmm, arg);
00353       break;
00354 #if 0
00355     case 'r':                   /* reverse arc list */
00356       print_wchmm_r_arc(arg);
00357       break;
00358 #endif
00359 #ifndef CATEGORY_TREE
00360     case 's':                   /* successor word list */
00361       print_wchmm_s_successor(wchmm, arg);
00362       break;
00363 #endif
00364     case 't':                   /* node total info of above */
00365       print_wchmm_s(wchmm, arg);
00366       print_wchmm_s_arc(wchmm, arg);
00367 #if 0
00368       print_wchmm_r_arc(arg);
00369 #endif
00370 #ifndef CATEGORY_TREE
00371       print_wchmm_s_successor(wchmm, arg);
00372 #endif
00373       break;
00374     case 'h':                   /* hmm state info */
00375       print_hmminfo(name);
00376       break;
00377 #ifdef USE_NGRAM
00378     case 'l':                   /* N-gram language model info */
00379       print_ngraminfo(wchmm->ngram, arg);
00380       break;
00381 #endif
00382     case 'q':                   /* quit */
00383       endflag = TRUE;
00384       break;
00385     default:                    /* help */
00386       j_printf("syntax: [command_character] [number(#)]\n");
00387       j_printf("  w [word_id] ... show word info\n");
00388       j_printf("  n [state]   ... show wchmm state info\n");
00389       j_printf("  a [state]   ... show arcs from the state\n");
00390 #if 0
00391       j_printf("  r [state]   ... show arcs  to  the state\n");
00392 #endif
00393       j_printf("  s [state]   ... show successor list of the state\n");
00394       j_printf("  h [hmmname] ... show HMM info of the name\n");
00395 #ifdef USE_NGRAM
00396       j_printf("  l [nwid]    ... N-gram entry info\n");
00397 #endif
00398       j_printf("  H           ... print this help\n");
00399       j_printf("  q           ... quit\n");
00400       break;
00401     }
00402   }
00403   j_printf("\n");
00404   j_printf("********************************************\n");
00405   j_printf("*****  END OF LM & LEXICON CHECK MODE  *****\n");
00406   j_printf("********************************************\n");
00407   j_printf("\n");
00408 }
00409 
00410 
00423 void
00424 check_wchmm(WCHMM_INFO *wchmm)
00425 {
00426   int i,n;
00427   boolean ok_flag;
00428   A_CELL *ac;
00429 #ifdef MULTIPATH_VERSION
00430   int node;
00431   WORD_ID w;
00432 #endif
00433 
00434   ok_flag = TRUE;
00435 
00436 #ifdef MULTIPATH_VERSION
00437   
00438   /* check word-beginning nodes */
00439   for(i=0;i<wchmm->startnum;i++) {
00440     node = wchmm->startnode[i];
00441     if (wchmm->state[node].out.state != NULL) {
00442       j_printf("Error: word-beginning node %d has output function!\n, node");
00443       ok_flag = FALSE;
00444     }
00445   }
00446   /* examine if word->state and state->word mapping is correct */
00447   for(w=0;w<wchmm->winfo->num;w++) {
00448     if (wchmm->stend[wchmm->wordend[w]] != w) {
00449       j_printf("Error: no match of word end for word %d!!\n", w);
00450       ok_flag = FALSE;
00451     }
00452   }
00453   
00454 #else
00455   
00456   /* examine if word->state and state->word mapping is correct */
00457   for (i=0;i<winfo->num;i++) {
00458     if (wchmm->stend[wchmm->wordend[i]]!=i) {
00459       j_printf("end ga awanai!!!: word=%d, node=%d, value=%d\n",
00460                i, wchmm->wordend[i], wchmm->stend[wchmm->wordend[i]]);
00461       ok_flag = FALSE;
00462     }
00463     if (wchmm->ststart[wchmm->offset[i][0]] == WORD_INVALID) {
00464       j_printf("word start node is WORD_INVALID:word=%d, node=%d, value=%d\n",
00465                i, wchmm->offset[i][0], wchmm->ststart[wchmm->offset[i][0]]);
00466       ok_flag = FALSE;
00467     }
00468   }
00469 
00470 #endif /* MULTIPATH_VERSION */
00471 
00472   /* check if the last state is unique and has only one output arc */
00473   i = 0;
00474   for (n=0;n<wchmm->n;n++) {
00475     if (wchmm->stend[n] != WORD_INVALID) {
00476       i++;
00477       for (ac=wchmm->state[n].ac; ac; ac=ac->next) {
00478         if (ac->arc == n) continue;
00479 #ifndef MULTIPATH_VERSION
00480         if (wchmm->ststart[ac->arc] != WORD_INVALID) continue;
00481 #endif
00482         break;
00483       }
00484       if (ac != NULL) {
00485         j_printf("node %d is shared?\n",n);
00486         ok_flag = FALSE;
00487       }
00488     }
00489   }
00490   if (i != wchmm->winfo->num ) {
00491     j_printf("num of heads of words in wchmm not match word num!!\n");
00492     j_printf("from wchmm->stend:%d != from winfo:%d ?\n",i,wchmm->winfo->num);
00493     ok_flag = FALSE;
00494   }
00495 
00496   /* if check failed, go into interactive mode */
00497   if (!ok_flag) {
00498     wchmm_check_interactive(wchmm);
00499   }
00500 
00501   VERMES("  coordination check passed\n");
00502 }
00503 

Generated on Tue Dec 26 16:16:33 2006 for Julius by  doxygen 1.5.0