libsent/src/hmminfo/chkhmmlist.c

Go to the documentation of this file.
00001 
00031 /*
00032  * Copyright (c) 1991-2006 Kawahara Lab., Kyoto University
00033  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00034  * Copyright (c) 2005-2006 Julius project team, Nagoya Institute of Technology
00035  * All rights reserved
00036  */
00037 
00038 #include <sent/htk_hmm.h>
00039 #include <sent/vocabulary.h>
00040 
00046 void
00047 make_hmm_basephone_list(HTK_HMM_INFO *hmminfo)
00048 {
00049   HMM_Logical *lg;
00050   static char p[MAX_HMMNAME_LEN];
00051   BASEPHONE *match = NULL, *new;
00052   APATNODE *root;
00053   int n;
00054 
00055   n = 0;
00056   root = NULL;
00057   for(lg=hmminfo->lgstart; lg; lg=lg->next) {
00058     center_name(lg->name, p);
00059     if (root != NULL) {
00060       match = aptree_search_data(p, root);
00061       if (strmatch(match->name, p)) continue;
00062     }
00063     new = (BASEPHONE *)mybmalloc2(sizeof(BASEPHONE), &(hmminfo->mroot));
00064     new->bgnflag = FALSE;
00065     new->endflag = FALSE;
00066     new->name = (char *)mybmalloc2(strlen(p)+1, &(hmminfo->mroot));
00067     strcpy(new->name, p);
00068     if (root == NULL) root = aptree_make_root_node(new);
00069     else aptree_add_entry(new->name, new, match->name, &root);
00070     n++;
00071   }
00072   hmminfo->basephone.num = n;
00073   hmminfo->basephone.root = root;
00074 }
00075 
00081 static void
00082 print_callback_detail(void *x)
00083 {
00084   BASEPHONE *b = x;
00085   j_printf("\"%s\": bgn=%d, end=%d\n", b->name, b->bgnflag, b->endflag);
00086 }
00087 
00093 static void
00094 print_callback_name(void *x)
00095 {
00096   BASEPHONE *b = x;
00097   j_printf("%s, ", b->name);
00098 }
00104 void
00105 print_all_basephone_detail(HMM_basephone *base)
00106 {
00107   aptree_traverse_and_do(base->root, print_callback_detail);
00108 }
00114 void
00115 print_all_basephone_name(HMM_basephone *base)
00116 {
00117   aptree_traverse_and_do(base->root, print_callback_name);
00118   j_printf("\n");
00119 }
00120 
00121 static int bncnt;               
00122 static int edcnt;               
00123 
00129 static void
00130 count_callback(void *x)
00131 {
00132   BASEPHONE *b = x;
00133   if (b->bgnflag) bncnt++;
00134   if (b->endflag) edcnt++;
00135 }
00136 
00143 static void
00144 count_all_phone(HMM_basephone *base)
00145 {
00146   bncnt = edcnt = 0;
00147   aptree_traverse_and_do(base->root, count_callback);
00148   base->bgnnum = bncnt;
00149   base->endnum = edcnt;
00150 }
00151 
00158 static void
00159 mark_word_edge(WORD_INFO *winfo, HMM_basephone *base)
00160 {
00161   WORD_ID w;
00162   static char p[MAX_HMMNAME_LEN];
00163   char *key;
00164   BASEPHONE *match;
00165 
00166   /* mark what is at beginning of word (can be right context) */
00167   for(w=0;w<winfo->num;w++) {
00168     if (w == winfo->head_silwid) continue;
00169     key = center_name(winfo->wseq[w][0]->name, p);
00170     match = aptree_search_data(key, base->root);
00171     if (strmatch(match->name, key)) {
00172       match->bgnflag = TRUE;
00173     } else {
00174       /* not found!!! */
00175       j_error("InternalError: basephone \"%s\" specified in dict, but not found in HMM\n");
00176     }
00177   }
00178   /* mark what is at end of word (can be left context) */
00179   for(w=0;w<winfo->num;w++) {
00180     if (w == winfo->tail_silwid) continue;
00181     key = center_name(winfo->wseq[w][winfo->wlen[w]-1]->name, p);
00182     match = aptree_search_data(key, base->root);
00183     if (strmatch(match->name, key)) {
00184       match->endflag = TRUE;
00185     } else {
00186       /* not found!!! */
00187       j_error("InternalError: basephone \"%s\" specified in dict, but not found in HMM\n");
00188     }
00189   }
00190 }
00191 
00192 
00193 /* check if all possible triphones are exist in logical HMM */
00194 /* temporal storage for aptree() callback */
00195 static HTK_HMM_INFO *local_hmminfo; 
00196 static WORD_INFO *local_winfo;  
00197 static APATNODE *local_root;    
00198 static WORD_ID current_w;       
00199 static char gbuf[MAX_HMMNAME_LEN];              
00200 
00201 static APATNODE *error_root;    
00202 static int error_num;           
00203 
00210 static void
00211 add_to_error(char *lostname, HTK_HMM_INFO *hmminfo)
00212 {
00213   char *match = NULL, *new;
00214   if (error_root != NULL) {
00215     match = aptree_search_data(lostname, error_root);
00216     if (strmatch(match, lostname)) return;
00217   }
00218   new = (char *)mybmalloc2(strlen(lostname)+1, &(hmminfo->mroot));
00219   strcpy(new, lostname);
00220   if (error_root == NULL) error_root = aptree_make_root_node(new);
00221   else aptree_add_entry(new, new, match, &error_root);
00222 
00223   error_num++;
00224 }
00225 
00231 static void
00232 print_error_callback(void *x)
00233 {
00234   char *p = x;
00235   j_printf("%s\n", p);
00236 }
00237 
00245 static void
00246 triphone_callback_normal(void *x)
00247 {
00248   BASEPHONE *b = x;
00249   WORD_ID w = current_w;
00250   HMM_Logical *lg, *found;
00251 
00252   if (b->endflag) {             /* x can appear as end of word */
00253     lg = local_winfo->wseq[w][0];
00254     strcpy(gbuf, lg->name);
00255     add_left_context(gbuf, b->name);
00256     /* printf("checking \"%s\" - \"%s\"\n", b->name, lg->name); */
00257     if ((found = htk_hmmdata_lookup_logical(local_hmminfo, gbuf)) == NULL) {
00258       if (lg->is_pseudo) {
00259         j_printerr("Error: \"%s\" not found, fallback to pseudo {%s}\n", gbuf, lg->name);
00260         add_to_error(gbuf, local_hmminfo);
00261       }
00262     }
00263   }
00264   if (b->bgnflag) {             /* x can appear as beginning of word */
00265     lg = local_winfo->wseq[w][local_winfo->wlen[w]-1];
00266     strcpy(gbuf, lg->name);
00267     add_right_context(gbuf, b->name);
00268     /* printf("checking \"%s\" - \"%s\"\n", lg->name, b->name); */
00269     if ((found = htk_hmmdata_lookup_logical(local_hmminfo, gbuf)) == NULL) {
00270       if (lg->is_pseudo) {
00271         j_printerr("Error: \"%s\" not found, fallback to pseudo {%s}\n", gbuf, lg->name);
00272         add_to_error(gbuf, local_hmminfo);
00273       }
00274     }
00275   }
00276 }
00277 
00278 /* for words with only one phone, all combination of "x - current_w + x"
00279    should be checked */
00287 static void
00288 triphone_callback_right(void *x)
00289 {
00290   BASEPHONE *b = x;
00291   WORD_ID w = current_w;
00292   HMM_Logical *lg, *found;
00293   static char buf[MAX_HMMNAME_LEN];
00294 
00295   if (b->bgnflag) {
00296     lg = local_winfo->wseq[w][0];
00297     strcpy(buf, gbuf);
00298     add_right_context(buf, b->name);
00299     /* printf("    checking \"%s\" - \"%s\"\n", gbuf, b->name); */
00300     if ((found = htk_hmmdata_lookup_logical(local_hmminfo, buf)) == NULL) {
00301       if (lg->is_pseudo) {
00302         j_printerr("Error: \"%s\" not found, fallback to pseudo {%s}\n", buf, lg->name);
00303         add_to_error(buf, local_hmminfo);
00304       }
00305     }
00306   }
00307 }
00308 
00316 static void
00317 triphone_callback_left(void *x)
00318 {
00319   BASEPHONE *b = x;
00320   WORD_ID w = current_w;
00321   HMM_Logical *lg;
00322 
00323   if (b->endflag) {
00324     lg = local_winfo->wseq[w][0];
00325     strcpy(gbuf, lg->name);
00326     add_left_context(gbuf, b->name);
00327     /*printf("continue checking \"%s\" - \"%s\"\n", b->name, lg->name);*/
00328     aptree_traverse_and_do(local_root, triphone_callback_right);
00329   }
00330 }
00331 
00339 void
00340 test_interword_triphone(HTK_HMM_INFO *hmminfo, WORD_INFO *winfo)
00341 {
00342   WORD_ID w;
00343   local_hmminfo = hmminfo;
00344   local_winfo = winfo;
00345   local_root = hmminfo->basephone.root;
00346   error_root = NULL;
00347   error_num = 0;
00348 
00349   j_printf("Inter-word triphone existence test...\n");
00350   for(w=0;w<winfo->num;w++) {
00351     current_w = w;
00352     if (winfo->wlen[w] > 1) {
00353       /* check beginning phone and ending phone of this word */
00354       aptree_traverse_and_do(hmminfo->basephone.root, triphone_callback_normal);
00355     } else {
00356       /* for word of only 1 phoneme, check both */
00357       aptree_traverse_and_do(hmminfo->basephone.root, triphone_callback_left);
00358     }
00359   }
00360   if (error_root == NULL) {
00361     j_printf("passed\n");
00362   } else {
00363     j_printf("following triphones are missing in HMMList:\n");
00364     aptree_traverse_and_do(error_root, print_error_callback);
00365     j_printf("total %d missing inter-word triphones\n", error_num);
00366   }
00367 }
00368 
00369 
00370 
00380 void
00381 make_base_phone(HTK_HMM_INFO *hmminfo, WORD_INFO *winfo)
00382 {
00383   /* gather base phones and word-{head,tail} phones */
00384   j_printf("Exploring HMM database and lexicon tree:\n");
00385   mark_word_edge(winfo, &(hmminfo->basephone));
00386   count_all_phone(&(hmminfo->basephone));
00387 }
00388 
00394 void
00395 print_phone_info(HTK_HMM_INFO *hmminfo)
00396 {
00397   /* output information */
00398   j_printf("%5d physical HMMs defined in hmmdefs\n", hmminfo->totalhmmnum);
00399   if (hmminfo->totalhmmnum == hmminfo->totallogicalnum - hmminfo->totalpseudonum) {
00400     j_printf("   no HMMList, physical HMM names are redirected to logicalHMM\n");
00401   } else {
00402     if (hmminfo->is_triphone) {
00403       j_printf("%5d triphones listed in hmmlist\n", hmminfo->totallogicalnum - hmminfo->totalpseudonum);
00404     } else {
00405       j_printf("%5d phones in hmmlist\n", hmminfo->totallogicalnum - hmminfo->totalpseudonum);
00406     }
00407   }
00408   if (hmminfo->totalpseudonum != 0) {
00409     j_printf("%5d pseudo HMM generated for missing mono/bi-phones\n",hmminfo->totalpseudonum);
00410   }
00411   j_printf("%5d TOTAL logical HMMs\n", hmminfo->totallogicalnum);
00412   j_printf("%5d base phones in logical HMM\n", hmminfo->basephone.num);
00413   j_printf("%5d phones appear on word head, %d phones on word tail\n", hmminfo->basephone.bgnnum, hmminfo->basephone.endnum);
00414 
00415 }

Generated on Tue Dec 26 16:16:33 2006 for Julius by  doxygen 1.5.0