libjulius/src/wchmm_check.c

Go to the documentation of this file.
00001 
00030 /*
00031  * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University
00032  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00033  * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
00034  * All rights reserved
00035  */
00036 
00037 #include <julius/julius.h>
00038 
00055 static void
00056 print_winfo_w(WORD_INFO *winfo, WORD_ID word, boolean ngram_exist)
00057 {
00058   int i;
00059   if (word >= winfo->num) return;
00060   printf("--winfo\n");
00061   printf("wname   = %s\n",winfo->wname[word]);
00062   printf("woutput = %s\n",winfo->woutput[word]);
00063   printf("\ntransp  = %s\n", (winfo->is_transparent[word]) ? "yes" : "no");
00064   printf("wlen    = %d\n",winfo->wlen[word]);
00065   printf("wseq    =");
00066   for (i=0;i<winfo->wlen[word];i++) {
00067     printf(" %s",winfo->wseq[word][i]->name);
00068   }
00069   printf("\nwseq_def=");
00070   for (i=0;i<winfo->wlen[word];i++) {
00071     if (winfo->wseq[word][i]->is_pseudo) {
00072       printf(" (%s)", winfo->wseq[word][i]->body.pseudo->name);
00073     } else {
00074       printf(" %s",winfo->wseq[word][i]->body.defined->name);
00075     }
00076   }
00077   if (ngram_exist) {
00078     printf("\nwton    = %d\n",winfo->wton[word]);
00079 #ifdef CLASS_NGRAM
00080     printf("cprob   = %f(%f)\n", winfo->cprob[word], pow(10.0, winfo->cprob[word]));
00081 #endif
00082   }
00083   
00084 }
00085 
00100 static void
00101 print_wchmm_w(WCHMM_INFO *wchmm, WORD_ID word)
00102 {
00103   int i;
00104   if (word >= wchmm->winfo->num) return;
00105   printf("--wchmm (word)\n");
00106   printf("offset  =");
00107   for (i=0;i<wchmm->winfo->wlen[word];i++) {
00108     printf(" %d",wchmm->offset[word][i]);
00109   }
00110   printf("\n");
00111   if (wchmm->hmminfo->multipath) {
00112     printf("wordbegin = %d\n",wchmm->wordbegin[word]);
00113   }
00114   printf("wordend = %d\n",wchmm->wordend[word]);
00115 }
00116 
00131 static void
00132 print_wchmm_s(WCHMM_INFO *wchmm, int node)
00133 {
00134   printf("--wchmm (node)\n");
00135   printf("stend   = %d\n",wchmm->stend[node]);
00136   if (wchmm->hmminfo->multipath) {
00137     if (wchmm->state[node].out.state == NULL) {
00138       printf("NO OUTPUT\n");
00139       return;
00140     }
00141   }
00142 #ifdef PASS1_IWCD
00143   printf("outstyle= ");
00144   switch(wchmm->outstyle[node]) {
00145   case AS_STATE:
00146     printf("AS_STATE (id=%d)\n", (wchmm->state[node].out.state)->id);
00147     break;
00148   case AS_LSET:
00149     printf("AS_LSET  (%d variants)\n", (wchmm->state[node].out.lset)->num);
00150     break;
00151   case AS_RSET:
00152     if ((wchmm->state[node].out.rset)->hmm->is_pseudo) {
00153       printf("AS_RSET  (name=\"%s\", pseudo=\"%s\", loc=%d)\n",
00154                (wchmm->state[node].out.rset)->hmm->name,
00155                (wchmm->state[node].out.rset)->hmm->body.pseudo->name,
00156                (wchmm->state[node].out.rset)->state_loc);
00157     } else {
00158       printf("AS_RSET  (name=\"%s\", defined=\"%s\", loc=%d)\n",
00159                (wchmm->state[node].out.rset)->hmm->name,
00160                (wchmm->state[node].out.rset)->hmm->body.defined->name,
00161                (wchmm->state[node].out.rset)->state_loc);
00162     }
00163     break;
00164   case AS_LRSET:
00165     if ((wchmm->state[node].out.rset)->hmm->is_pseudo) {
00166       printf("AS_LRSET  (name=\"%s\", pseudo=\"%s\", loc=%d)\n",
00167                (wchmm->state[node].out.lrset)->hmm->name,
00168                (wchmm->state[node].out.lrset)->hmm->body.pseudo->name,
00169                (wchmm->state[node].out.lrset)->state_loc);
00170     } else {
00171       printf("AS_LRSET  (name=\"%s\", defined=\"%s\", loc=%d)\n",
00172                (wchmm->state[node].out.lrset)->hmm->name,
00173                (wchmm->state[node].out.lrset)->hmm->body.defined->name,
00174                (wchmm->state[node].out.lrset)->state_loc);
00175     }
00176     break;
00177   default:
00178     printf("UNKNOWN???\n");
00179   }
00180 #endif /* PASS1_IWCD */
00181 }
00182 
00197 static void
00198 print_wchmm_s_arc(WCHMM_INFO *wchmm, int node)
00199 {
00200   A_CELL2 *ac;
00201   int i = 0;
00202   int j;
00203   printf("arcs:\n");
00204   if (wchmm->self_a[node] != LOG_ZERO) {
00205     printf(" %d %f(%f)\n", node, wchmm->self_a[node], pow(10.0, wchmm->self_a[node]));
00206     i++;
00207   }
00208   if (wchmm->next_a[node] != LOG_ZERO) {
00209     printf(" %d %f(%f)\n", node + 1, wchmm->next_a[node], pow(10.0, wchmm->next_a[node]));
00210     i++;
00211   }
00212   for(ac = wchmm->ac[node]; ac; ac = ac->next) {
00213     for (j=0;j<ac->n;j++) {
00214       printf(" %d %f(%f)\n",ac->arc[j],ac->a[j],pow(10.0, ac->a[j]));
00215       i++;
00216     }
00217   }
00218   printf(" total %d arcs\n",i);
00219 }
00220 
00235 static void
00236 print_wchmm_s_successor(WCHMM_INFO *wchmm, int node)
00237 {
00238   S_CELL *sc;
00239   int i = 0;
00240   int scid;
00241 
00242   scid = wchmm->state[node].scid;
00243   if (scid == 0) {
00244     printf("no successors\n");
00245   } else if (scid < 0) {
00246     printf("successor id: %d\n", scid);
00247 #ifdef UNIGRAM_FACTORING
00248     if (wchmm->lmtype == LM_PROB) {
00249       printf("1-gram factoring node: score=%f\n",wchmm->fscore[-scid]);
00250     }
00251 #endif
00252   } else {
00253     printf("successor id: %d\n", scid);
00254     for (sc=wchmm->sclist[scid];sc;sc=sc->next) {
00255       printf(" %d\n",sc->word);
00256       i++;
00257     }
00258     printf(" total %d successors\n",i);
00259   }
00260 }
00261 
00276 static void
00277 print_hmminfo(char *name, HTK_HMM_INFO *hmminfo)
00278 {
00279   HMM_Logical *l;
00280 
00281   l = htk_hmmdata_lookup_logical(hmminfo, name);
00282   if (l == NULL) {
00283     printf("no HMM named \"%s\"\n", name);
00284   } else {
00285     put_logical_hmm(stdout, l);
00286   }
00287 }
00288 
00303 static void
00304 print_ngraminfo(NGRAM_INFO *ngram, int nid)
00305 {
00306   printf("-- N-gram entry --\n");
00307   printf("nid  = %d\n", nid);
00308   printf("name = %s\n", ngram->wname[nid]);
00309 }
00310 
00311 
00327 void
00328 wchmm_check_interactive(WCHMM_INFO *wchmm) /* interactive check */
00329 {
00330   static const int len = 24;
00331   char buf[len], *name;
00332   int arg, newline;
00333   WORD_ID argw;
00334   boolean endflag;
00335 
00336   printf("\n\n");
00337   printf("********************************************\n");
00338   printf("********  LM & LEXICON CHECK MODE  *********\n");
00339   printf("********************************************\n");
00340   printf("\n");
00341 
00342   for (endflag = FALSE; endflag == FALSE;) {
00343     printf("===== syntax: command arg (\"H\" for help) > ");
00344     if (fgets(buf, len, stdin) == NULL) break;
00345     name = "";
00346     arg = 0;
00347     if (isalpha(buf[0]) != 0 && buf[1] == ' ') {
00348       newline = strlen(buf)-1;
00349       if (buf[newline] == '\n') {
00350         buf[newline] = '\0';
00351       }
00352       if (buf[2] != '\0') {
00353         name = buf + 2;
00354         arg = atoi(name);
00355       }
00356     }
00357     switch(buf[0]) {
00358     case 'w':                   /* word info */
00359       argw = arg;
00360       print_winfo_w(wchmm->winfo, argw, (wchmm->ngram) ? TRUE : FALSE);
00361       print_wchmm_w(wchmm, argw);
00362       break;
00363     case 'n':                   /* node info */
00364       print_wchmm_s(wchmm, arg);
00365       break;
00366     case 'a':                   /* arc list */
00367       print_wchmm_s_arc(wchmm, arg);
00368       break;
00369 #if 0
00370     case 'r':                   /* reverse arc list */
00371       print_wchmm_r_arc(arg);
00372       break;
00373 #endif
00374     case 's':                   /* successor word list */
00375       if (wchmm->category_tree) {
00376         printf("Error: this is category tree (no successor list)\n");
00377       } else {
00378         print_wchmm_s_successor(wchmm, arg);
00379       }
00380       break;
00381     case 't':                   /* node total info of above */
00382       print_wchmm_s(wchmm, arg);
00383       print_wchmm_s_arc(wchmm, arg);
00384 #if 0
00385       print_wchmm_r_arc(arg);
00386 #endif
00387       if (!wchmm->category_tree) {
00388         print_wchmm_s_successor(wchmm, arg);
00389       }
00390       break;
00391     case 'h':                   /* hmm state info */
00392       print_hmminfo(name, wchmm->hmminfo);
00393       break;
00394     case 'l':                   /* N-gram language model info */
00395       if (wchmm->lmtype == LM_PROB) {
00396         print_ngraminfo(wchmm->ngram, arg);
00397       } else {
00398         printf("Error: this is not an N-gram model\n");
00399       }
00400       break;
00401     case 'q':                   /* quit */
00402       endflag = TRUE;
00403       break;
00404     default:                    /* help */
00405       printf("syntax: [command_character] [number(#)]\n");
00406       printf("  w [word_id] ... show word info\n");
00407       printf("  n [state]   ... show wchmm state info\n");
00408       printf("  a [state]   ... show arcs from the state\n");
00409 #if 0
00410       printf("  r [state]   ... show arcs  to  the state\n");
00411 #endif
00412       printf("  s [state]   ... show successor list of the state\n");
00413       printf("  h [hmmname] ... show HMM info of the name\n");
00414       printf("  l [nwid]    ... N-gram entry info\n");
00415       printf("  H           ... print this help\n");
00416       printf("  q           ... quit\n");
00417       break;
00418     }
00419   }
00420   printf("\n");
00421   printf("********************************************\n");
00422   printf("*****  END OF LM & LEXICON CHECK MODE  *****\n");
00423   printf("********************************************\n");
00424   printf("\n");
00425 }
00426 
00427 
00442 void
00443 check_wchmm(WCHMM_INFO *wchmm)
00444 {
00445   int i;
00446   boolean ok_flag;
00447   int node;
00448   WORD_ID w;
00449 
00450   ok_flag = TRUE;
00451 
00452   if (wchmm->hmminfo->multipath) {
00453   
00454     /* check word-beginning nodes */
00455     for(i=0;i<wchmm->startnum;i++) {
00456       node = wchmm->startnode[i];
00457       if (wchmm->state[node].out.state != NULL) {
00458         printf("Error: word-beginning node %d has output function!\n", node);
00459         ok_flag = FALSE;
00460       }
00461     }
00462     /* examine if word->state and state->word mapping is correct */
00463     for(w=0;w<wchmm->winfo->num;w++) {
00464       if (wchmm->stend[wchmm->wordend[w]] != w) {
00465         printf("Error: no match of word end for word %d!!\n", w);
00466         ok_flag = FALSE;
00467       }
00468     }
00469     
00470   } else {
00471   
00472     /* examine if word->state and state->word mapping is correct */
00473     for (i=0;i<wchmm->winfo->num;i++) {
00474       if (wchmm->stend[wchmm->wordend[i]]!=i) {
00475         printf("end ga awanai!!!: word=%d, node=%d, value=%d\n",
00476                i, wchmm->wordend[i], wchmm->stend[wchmm->wordend[i]]);
00477         ok_flag = FALSE;
00478       }
00479     }
00480   }
00481 
00482 #if 0
00483   /* check if the last state is unique and has only one output arc */
00484   {
00485     int n;
00486     A_CELL *ac;
00487 
00488     i = 0;
00489     for (n=0;n<wchmm->n;n++) {
00490       if (wchmm->stend[n] != WORD_INVALID) {
00491         i++;
00492         for (ac=wchmm->state[n].ac; ac; ac=ac->next) {
00493           if (ac->arc == n) continue;
00494           if (!wchmm->hmminfo->multipath && wchmm->ststart[ac->arc] != WORD_INVALID) continue;
00495           break;
00496         }
00497         if (ac != NULL) {
00498           printf("node %d is shared?\n",n);
00499           ok_flag = FALSE;
00500         }
00501       }
00502     }
00503     if (i != wchmm->winfo->num ) {
00504       printf("num of heads of words in wchmm not match word num!!\n");
00505       printf("from wchmm->stend:%d != from winfo:%d ?\n",i,wchmm->winfo->num);
00506       ok_flag = FALSE;
00507     }
00508   }
00509 #endif
00510 
00511   /* if check failed, go into interactive mode */
00512   if (!ok_flag) {
00513     wchmm_check_interactive(wchmm);
00514   }
00515 
00516   jlog("STAT: coordination check passed\n");
00517 }
00518 
00519 /* end of file */

Generated on Tue Dec 18 15:59:53 2007 for Julius by  doxygen 1.5.4