00001
00032
00033
00034
00035
00036
00037
00038
00039 #include <sent/htk_hmm.h>
00040 #include <sent/vocabulary.h>
00041
00047 void
00048 make_hmm_basephone_list(HTK_HMM_INFO *hmminfo)
00049 {
00050 HMM_Logical *lg;
00051 static char p[MAX_HMMNAME_LEN];
00052 BASEPHONE *match = NULL, *new;
00053 APATNODE *root;
00054 int n;
00055
00056 n = 0;
00057 root = NULL;
00058 for(lg=hmminfo->lgstart; lg; lg=lg->next) {
00059 center_name(lg->name, p);
00060 if (root != NULL) {
00061 match = aptree_search_data(p, root);
00062 if (match != NULL && strmatch(match->name, p)) continue;
00063 }
00064 new = (BASEPHONE *)mybmalloc2(sizeof(BASEPHONE), &(hmminfo->mroot));
00065 new->bgnflag = FALSE;
00066 new->endflag = FALSE;
00067 new->name = (char *)mybmalloc2(strlen(p)+1, &(hmminfo->mroot));
00068 strcpy(new->name, p);
00069 if (root == NULL) root = aptree_make_root_node(new);
00070 else aptree_add_entry(new->name, new, match->name, &root);
00071 n++;
00072 }
00073 hmminfo->basephone.num = n;
00074 hmminfo->basephone.root = root;
00075 }
00076
00082 static void
00083 print_callback_detail(void *x)
00084 {
00085 BASEPHONE *b = x;
00086 printf("\"%s\": bgn=%d, end=%d\n", b->name, b->bgnflag, b->endflag);
00087 }
00088
00094 static void
00095 print_callback_name(void *x)
00096 {
00097 BASEPHONE *b = x;
00098 printf("%s, ", b->name);
00099 }
00105 void
00106 print_all_basephone_detail(HMM_basephone *base)
00107 {
00108 aptree_traverse_and_do(base->root, print_callback_detail);
00109 }
00115 void
00116 print_all_basephone_name(HMM_basephone *base)
00117 {
00118 aptree_traverse_and_do(base->root, print_callback_name);
00119 printf("\n");
00120 }
00121
00122 static int bncnt;
00123 static int edcnt;
00124
00130 static void
00131 count_callback(void *x)
00132 {
00133 BASEPHONE *b = x;
00134 if (b->bgnflag) bncnt++;
00135 if (b->endflag) edcnt++;
00136 }
00137
00144 static void
00145 count_all_phone(HMM_basephone *base)
00146 {
00147 bncnt = edcnt = 0;
00148 aptree_traverse_and_do(base->root, count_callback);
00149 base->bgnnum = bncnt;
00150 base->endnum = edcnt;
00151 }
00152
00159 static boolean
00160 mark_word_edge(WORD_INFO *winfo, HMM_basephone *base)
00161 {
00162 WORD_ID w;
00163 static char p[MAX_HMMNAME_LEN];
00164 char *key;
00165 BASEPHONE *match;
00166 boolean ok_p = TRUE;
00167
00168
00169 for(w=0;w<winfo->num;w++) {
00170 if (w == winfo->head_silwid) continue;
00171 key = center_name(winfo->wseq[w][0]->name, p);
00172 match = aptree_search_data(key, base->root);
00173 if (match != NULL && strmatch(match->name, key)) {
00174 match->bgnflag = TRUE;
00175 } else {
00176
00177 jlog("Error: chkhmmlist: basephone \"%s\" used in dictionary not exist in HMM definition\n", key);
00178 ok_p = FALSE;
00179 }
00180 }
00181
00182 for(w=0;w<winfo->num;w++) {
00183 if (w == winfo->tail_silwid) continue;
00184 key = center_name(winfo->wseq[w][winfo->wlen[w]-1]->name, p);
00185 match = aptree_search_data(key, base->root);
00186 if (match != NULL && strmatch(match->name, key)) {
00187 match->endflag = TRUE;
00188 } else {
00189
00190 jlog("Error: chkhmmlist: basephone \"%s\" used in dictionary not exist in HMM definition\n", key);
00191 ok_p = FALSE;
00192 }
00193 }
00194
00195 return ok_p;
00196 }
00197
00198
00199
00200
00201 static HTK_HMM_INFO *local_hmminfo;
00202 static WORD_INFO *local_winfo;
00203 static APATNODE *local_root;
00204 static WORD_ID current_w;
00205 static char gbuf[MAX_HMMNAME_LEN];
00206
00207 static APATNODE *error_root;
00208 static int error_num;
00209
00216 static void
00217 add_to_error(char *lostname, HTK_HMM_INFO *hmminfo)
00218 {
00219 char *match = NULL, *new;
00220 if (error_root != NULL) {
00221 match = aptree_search_data(lostname, error_root);
00222 if (match != NULL && strmatch(match, lostname)) return;
00223 }
00224 new = (char *)mybmalloc2(strlen(lostname)+1, &(hmminfo->mroot));
00225 strcpy(new, lostname);
00226 if (error_root == NULL) error_root = aptree_make_root_node(new);
00227 else aptree_add_entry(new, new, match, &error_root);
00228
00229 error_num++;
00230 }
00231
00237 static void
00238 print_error_callback(void *x)
00239 {
00240 char *p = x;
00241 printf("%s\n", p);
00242 }
00243
00251 static void
00252 triphone_callback_normal(void *x)
00253 {
00254 BASEPHONE *b = x;
00255 WORD_ID w = current_w;
00256 HMM_Logical *lg, *found;
00257
00258 if (b->endflag) {
00259 lg = local_winfo->wseq[w][0];
00260 strcpy(gbuf, lg->name);
00261 add_left_context(gbuf, b->name);
00262
00263 if ((found = htk_hmmdata_lookup_logical(local_hmminfo, gbuf)) == NULL) {
00264 if (lg->is_pseudo) {
00265 printf("Error: chkhmmlist: \"%s\" not found, fallback to pseudo {%s}\n", gbuf, lg->name);
00266 add_to_error(gbuf, local_hmminfo);
00267 }
00268 }
00269 }
00270 if (b->bgnflag) {
00271 lg = local_winfo->wseq[w][local_winfo->wlen[w]-1];
00272 strcpy(gbuf, lg->name);
00273 add_right_context(gbuf, b->name);
00274
00275 if ((found = htk_hmmdata_lookup_logical(local_hmminfo, gbuf)) == NULL) {
00276 if (lg->is_pseudo) {
00277 printf("Error: chkhmmlist: \"%s\" not found, fallback to pseudo {%s}\n", gbuf, lg->name);
00278 add_to_error(gbuf, local_hmminfo);
00279 }
00280 }
00281 }
00282 }
00283
00284
00285
00293 static void
00294 triphone_callback_right(void *x)
00295 {
00296 BASEPHONE *b = x;
00297 WORD_ID w = current_w;
00298 HMM_Logical *lg, *found;
00299 static char buf[MAX_HMMNAME_LEN];
00300
00301 if (b->bgnflag) {
00302 lg = local_winfo->wseq[w][0];
00303 strcpy(buf, gbuf);
00304 add_right_context(buf, b->name);
00305
00306 if ((found = htk_hmmdata_lookup_logical(local_hmminfo, buf)) == NULL) {
00307 if (lg->is_pseudo) {
00308 printf("Error: chkhmmlist: \"%s\" not found, fallback to pseudo {%s}\n", buf, lg->name);
00309 add_to_error(buf, local_hmminfo);
00310 }
00311 }
00312 }
00313 }
00314
00322 static void
00323 triphone_callback_left(void *x)
00324 {
00325 BASEPHONE *b = x;
00326 WORD_ID w = current_w;
00327 HMM_Logical *lg;
00328
00329 if (b->endflag) {
00330 lg = local_winfo->wseq[w][0];
00331 strcpy(gbuf, lg->name);
00332 add_left_context(gbuf, b->name);
00333 aptree_traverse_and_do(local_root, triphone_callback_right);
00334 }
00335 }
00336
00344 void
00345 test_interword_triphone(HTK_HMM_INFO *hmminfo, WORD_INFO *winfo)
00346 {
00347 WORD_ID w;
00348 local_hmminfo = hmminfo;
00349 local_winfo = winfo;
00350 local_root = hmminfo->basephone.root;
00351 error_root = NULL;
00352 error_num = 0;
00353
00354 printf("Inter-word triphone existence test...\n");
00355 for(w=0;w<winfo->num;w++) {
00356 current_w = w;
00357 if (winfo->wlen[w] > 1) {
00358
00359 aptree_traverse_and_do(hmminfo->basephone.root, triphone_callback_normal);
00360 } else {
00361
00362 aptree_traverse_and_do(hmminfo->basephone.root, triphone_callback_left);
00363 }
00364 }
00365 if (error_root == NULL) {
00366 printf("passed\n");
00367 } else {
00368 printf("following triphones are missing in HMMList:\n");
00369 aptree_traverse_and_do(error_root, print_error_callback);
00370 printf("total %d missing inter-word triphones\n", error_num);
00371 }
00372 }
00373
00374
00375
00385 boolean
00386 make_base_phone(HTK_HMM_INFO *hmminfo, WORD_INFO *winfo)
00387 {
00388
00389 jlog("Stat: chkhmmlist: Exploring HMM database and lexicon tree:\n");
00390 if (mark_word_edge(winfo, &(hmminfo->basephone)) == FALSE) {
00391 return FALSE;
00392 }
00393 count_all_phone(&(hmminfo->basephone));
00394 return TRUE;
00395 }
00396
00403 void
00404 print_phone_info(FILE *fp, HTK_HMM_INFO *hmminfo)
00405 {
00406
00407 fprintf(fp, "%5d physical HMMs defined in hmmdefs\n", hmminfo->totalhmmnum);
00408 if (hmminfo->totalhmmnum == hmminfo->totallogicalnum - hmminfo->totalpseudonum) {
00409 fprintf(fp, " no HMMList, physical HMM names are redirected to logicalHMM\n");
00410 } else {
00411 if (hmminfo->is_triphone) {
00412 fprintf(fp, "%5d triphones listed in hmmlist\n", hmminfo->totallogicalnum - hmminfo->totalpseudonum);
00413 } else {
00414 fprintf(fp, "%5d phones in hmmlist\n", hmminfo->totallogicalnum - hmminfo->totalpseudonum);
00415 }
00416 }
00417 if (hmminfo->totalpseudonum != 0) {
00418 fprintf(fp, "%5d pseudo HMM generated for missing mono/bi-phones\n",hmminfo->totalpseudonum);
00419 }
00420 fprintf(fp, "%5d TOTAL logical HMMs\n", hmminfo->totallogicalnum);
00421 fprintf(fp, "%5d base phones in logical HMM\n", hmminfo->basephone.num);
00422 fprintf(fp, "%5d phones appear on word head, %d phones on word tail\n", hmminfo->basephone.bgnnum, hmminfo->basephone.endnum);
00423 }