00001
00031
00032
00033
00034
00035
00036
00037
00038 #include <sent/htk_hmm.h>
00039 #include <sent/vocabulary.h>
00040
00046 void
00047 make_hmm_basephone_list(HTK_HMM_INFO *hmminfo)
00048 {
00049 HMM_Logical *lg;
00050 static char p[MAX_HMMNAME_LEN];
00051 BASEPHONE *match = NULL, *new;
00052 APATNODE *root;
00053 int n;
00054
00055 n = 0;
00056 root = NULL;
00057 for(lg=hmminfo->lgstart; lg; lg=lg->next) {
00058 center_name(lg->name, p);
00059 if (root != NULL) {
00060 match = aptree_search_data(p, root);
00061 if (strmatch(match->name, p)) continue;
00062 }
00063 new = (BASEPHONE *)mybmalloc2(sizeof(BASEPHONE), &(hmminfo->mroot));
00064 new->bgnflag = FALSE;
00065 new->endflag = FALSE;
00066 new->name = (char *)mybmalloc2(strlen(p)+1, &(hmminfo->mroot));
00067 strcpy(new->name, p);
00068 if (root == NULL) root = aptree_make_root_node(new);
00069 else aptree_add_entry(new->name, new, match->name, &root);
00070 n++;
00071 }
00072 hmminfo->basephone.num = n;
00073 hmminfo->basephone.root = root;
00074 }
00075
00081 static void
00082 print_callback_detail(void *x)
00083 {
00084 BASEPHONE *b = x;
00085 j_printf("\"%s\": bgn=%d, end=%d\n", b->name, b->bgnflag, b->endflag);
00086 }
00087
00093 static void
00094 print_callback_name(void *x)
00095 {
00096 BASEPHONE *b = x;
00097 j_printf("%s, ", b->name);
00098 }
00104 void
00105 print_all_basephone_detail(HMM_basephone *base)
00106 {
00107 aptree_traverse_and_do(base->root, print_callback_detail);
00108 }
00114 void
00115 print_all_basephone_name(HMM_basephone *base)
00116 {
00117 aptree_traverse_and_do(base->root, print_callback_name);
00118 j_printf("\n");
00119 }
00120
00121 static int bncnt;
00122 static int edcnt;
00123
00129 static void
00130 count_callback(void *x)
00131 {
00132 BASEPHONE *b = x;
00133 if (b->bgnflag) bncnt++;
00134 if (b->endflag) edcnt++;
00135 }
00136
00143 static void
00144 count_all_phone(HMM_basephone *base)
00145 {
00146 bncnt = edcnt = 0;
00147 aptree_traverse_and_do(base->root, count_callback);
00148 base->bgnnum = bncnt;
00149 base->endnum = edcnt;
00150 }
00151
00158 static void
00159 mark_word_edge(WORD_INFO *winfo, HMM_basephone *base)
00160 {
00161 WORD_ID w;
00162 static char p[MAX_HMMNAME_LEN];
00163 char *key;
00164 BASEPHONE *match;
00165
00166
00167 for(w=0;w<winfo->num;w++) {
00168 if (w == winfo->head_silwid) continue;
00169 key = center_name(winfo->wseq[w][0]->name, p);
00170 match = aptree_search_data(key, base->root);
00171 if (strmatch(match->name, key)) {
00172 match->bgnflag = TRUE;
00173 } else {
00174
00175 j_error("InternalError: basephone \"%s\" specified in dict, but not found in HMM\n");
00176 }
00177 }
00178
00179 for(w=0;w<winfo->num;w++) {
00180 if (w == winfo->tail_silwid) continue;
00181 key = center_name(winfo->wseq[w][winfo->wlen[w]-1]->name, p);
00182 match = aptree_search_data(key, base->root);
00183 if (strmatch(match->name, key)) {
00184 match->endflag = TRUE;
00185 } else {
00186
00187 j_error("InternalError: basephone \"%s\" specified in dict, but not found in HMM\n");
00188 }
00189 }
00190 }
00191
00192
00193
00194
00195 static HTK_HMM_INFO *local_hmminfo;
00196 static WORD_INFO *local_winfo;
00197 static APATNODE *local_root;
00198 static WORD_ID current_w;
00199 static char gbuf[MAX_HMMNAME_LEN];
00200
00201 static APATNODE *error_root;
00202 static int error_num;
00203
00210 static void
00211 add_to_error(char *lostname, HTK_HMM_INFO *hmminfo)
00212 {
00213 char *match = NULL, *new;
00214 if (error_root != NULL) {
00215 match = aptree_search_data(lostname, error_root);
00216 if (strmatch(match, lostname)) return;
00217 }
00218 new = (char *)mybmalloc2(strlen(lostname)+1, &(hmminfo->mroot));
00219 strcpy(new, lostname);
00220 if (error_root == NULL) error_root = aptree_make_root_node(new);
00221 else aptree_add_entry(new, new, match, &error_root);
00222
00223 error_num++;
00224 }
00225
00231 static void
00232 print_error_callback(void *x)
00233 {
00234 char *p = x;
00235 j_printf("%s\n", p);
00236 }
00237
00245 static void
00246 triphone_callback_normal(void *x)
00247 {
00248 BASEPHONE *b = x;
00249 WORD_ID w = current_w;
00250 HMM_Logical *lg, *found;
00251
00252 if (b->endflag) {
00253 lg = local_winfo->wseq[w][0];
00254 strcpy(gbuf, lg->name);
00255 add_left_context(gbuf, b->name);
00256
00257 if ((found = htk_hmmdata_lookup_logical(local_hmminfo, gbuf)) == NULL) {
00258 if (lg->is_pseudo) {
00259 j_printerr("Error: \"%s\" not found, fallback to pseudo {%s}\n", gbuf, lg->name);
00260 add_to_error(gbuf, local_hmminfo);
00261 }
00262 }
00263 }
00264 if (b->bgnflag) {
00265 lg = local_winfo->wseq[w][local_winfo->wlen[w]-1];
00266 strcpy(gbuf, lg->name);
00267 add_right_context(gbuf, b->name);
00268
00269 if ((found = htk_hmmdata_lookup_logical(local_hmminfo, gbuf)) == NULL) {
00270 if (lg->is_pseudo) {
00271 j_printerr("Error: \"%s\" not found, fallback to pseudo {%s}\n", gbuf, lg->name);
00272 add_to_error(gbuf, local_hmminfo);
00273 }
00274 }
00275 }
00276 }
00277
00278
00279
00287 static void
00288 triphone_callback_right(void *x)
00289 {
00290 BASEPHONE *b = x;
00291 WORD_ID w = current_w;
00292 HMM_Logical *lg, *found;
00293 static char buf[MAX_HMMNAME_LEN];
00294
00295 if (b->bgnflag) {
00296 lg = local_winfo->wseq[w][0];
00297 strcpy(buf, gbuf);
00298 add_right_context(buf, b->name);
00299
00300 if ((found = htk_hmmdata_lookup_logical(local_hmminfo, buf)) == NULL) {
00301 if (lg->is_pseudo) {
00302 j_printerr("Error: \"%s\" not found, fallback to pseudo {%s}\n", buf, lg->name);
00303 add_to_error(buf, local_hmminfo);
00304 }
00305 }
00306 }
00307 }
00308
00316 static void
00317 triphone_callback_left(void *x)
00318 {
00319 BASEPHONE *b = x;
00320 WORD_ID w = current_w;
00321 HMM_Logical *lg;
00322
00323 if (b->endflag) {
00324 lg = local_winfo->wseq[w][0];
00325 strcpy(gbuf, lg->name);
00326 add_left_context(gbuf, b->name);
00327
00328 aptree_traverse_and_do(local_root, triphone_callback_right);
00329 }
00330 }
00331
00339 void
00340 test_interword_triphone(HTK_HMM_INFO *hmminfo, WORD_INFO *winfo)
00341 {
00342 WORD_ID w;
00343 local_hmminfo = hmminfo;
00344 local_winfo = winfo;
00345 local_root = hmminfo->basephone.root;
00346 error_root = NULL;
00347 error_num = 0;
00348
00349 j_printf("Inter-word triphone existence test...\n");
00350 for(w=0;w<winfo->num;w++) {
00351 current_w = w;
00352 if (winfo->wlen[w] > 1) {
00353
00354 aptree_traverse_and_do(hmminfo->basephone.root, triphone_callback_normal);
00355 } else {
00356
00357 aptree_traverse_and_do(hmminfo->basephone.root, triphone_callback_left);
00358 }
00359 }
00360 if (error_root == NULL) {
00361 j_printf("passed\n");
00362 } else {
00363 j_printf("following triphones are missing in HMMList:\n");
00364 aptree_traverse_and_do(error_root, print_error_callback);
00365 j_printf("total %d missing inter-word triphones\n", error_num);
00366 }
00367 }
00368
00369
00370
00380 void
00381 make_base_phone(HTK_HMM_INFO *hmminfo, WORD_INFO *winfo)
00382 {
00383
00384 j_printf("Exploring HMM database and lexicon tree:\n");
00385 mark_word_edge(winfo, &(hmminfo->basephone));
00386 count_all_phone(&(hmminfo->basephone));
00387 }
00388
00394 void
00395 print_phone_info(HTK_HMM_INFO *hmminfo)
00396 {
00397
00398 j_printf("%5d physical HMMs defined in hmmdefs\n", hmminfo->totalhmmnum);
00399 if (hmminfo->totalhmmnum == hmminfo->totallogicalnum - hmminfo->totalpseudonum) {
00400 j_printf(" no HMMList, physical HMM names are redirected to logicalHMM\n");
00401 } else {
00402 if (hmminfo->is_triphone) {
00403 j_printf("%5d triphones listed in hmmlist\n", hmminfo->totallogicalnum - hmminfo->totalpseudonum);
00404 } else {
00405 j_printf("%5d phones in hmmlist\n", hmminfo->totallogicalnum - hmminfo->totalpseudonum);
00406 }
00407 }
00408 if (hmminfo->totalpseudonum != 0) {
00409 j_printf("%5d pseudo HMM generated for missing mono/bi-phones\n",hmminfo->totalpseudonum);
00410 }
00411 j_printf("%5d TOTAL logical HMMs\n", hmminfo->totallogicalnum);
00412 j_printf("%5d base phones in logical HMM\n", hmminfo->basephone.num);
00413 j_printf("%5d phones appear on word head, %d phones on word tail\n", hmminfo->basephone.bgnnum, hmminfo->basephone.endnum);
00414
00415 }