00001
00031
00032
00033
00034
00035
00036
00037
00038 #include <sent/htk_hmm.h>
00039 #include <sent/vocabulary.h>
00040
00046 void
00047 make_hmm_basephone_list(HTK_HMM_INFO *hmminfo)
00048 {
00049 HMM_Logical *lg;
00050 static char p[MAX_HMMNAME_LEN];
00051 BASEPHONE *match = NULL, *new;
00052 APATNODE *root;
00053 int n;
00054
00055 n = 0;
00056 root = NULL;
00057 for(lg=hmminfo->lgstart; lg; lg=lg->next) {
00058 center_name(lg->name, p);
00059 if (root != NULL) {
00060 match = aptree_search_data(p, root);
00061 if (strmatch(match->name, p)) continue;
00062 }
00063 new = (BASEPHONE *)mybmalloc(sizeof(BASEPHONE));
00064 new->bgnflag = FALSE;
00065 new->endflag = FALSE;
00066 new->name = (char *)mybmalloc(strlen(p)+1);
00067 strcpy(new->name, p);
00068 if (root == NULL) root = aptree_make_root_node(new);
00069 else aptree_add_entry(new->name, new, match->name, &root);
00070 n++;
00071 }
00072 hmminfo->basephone.num = n;
00073 hmminfo->basephone.root = root;
00074 }
00075
00081 static void
00082 print_callback_detail(void *x)
00083 {
00084 BASEPHONE *b = x;
00085 j_printf("\"%s\": bgn=%d, end=%d\n", b->name, b->bgnflag, b->endflag);
00086 }
00087
00093 static void
00094 print_callback_name(void *x)
00095 {
00096 BASEPHONE *b = x;
00097 j_printf("%s, ", b->name);
00098 }
00104 void
00105 print_all_basephone_detail(HMM_basephone *base)
00106 {
00107 aptree_traverse_and_do(base->root, print_callback_detail);
00108 }
00114 void
00115 print_all_basephone_name(HMM_basephone *base)
00116 {
00117 aptree_traverse_and_do(base->root, print_callback_name);
00118 j_printf("\n");
00119 }
00120
00121 static int bncnt;
00122 static int edcnt;
00123
00129 static void
00130 count_callback(void *x)
00131 {
00132 BASEPHONE *b = x;
00133 if (b->bgnflag) bncnt++;
00134 if (b->endflag) edcnt++;
00135 }
00136
00143 static void
00144 count_all_phone(HMM_basephone *base)
00145 {
00146 bncnt = edcnt = 0;
00147 aptree_traverse_and_do(base->root, count_callback);
00148 base->bgnnum = bncnt;
00149 base->endnum = edcnt;
00150 }
00151
00158 static void
00159 mark_word_edge(WORD_INFO *winfo, HMM_basephone *base)
00160 {
00161 WORD_ID w;
00162 static char p[MAX_HMMNAME_LEN];
00163 char *key;
00164 BASEPHONE *match;
00165
00166
00167 for(w=0;w<winfo->num;w++) {
00168 if (w == winfo->head_silwid) continue;
00169 key = center_name(winfo->wseq[w][0]->name, p);
00170 match = aptree_search_data(key, base->root);
00171 if (strmatch(match->name, key)) {
00172 match->bgnflag = TRUE;
00173 } else {
00174
00175 j_error("InternalError: basephone \"%s\" specified in dict, but not found in HMM\n");
00176 }
00177 }
00178
00179 for(w=0;w<winfo->num;w++) {
00180 if (w == winfo->tail_silwid) continue;
00181 key = center_name(winfo->wseq[w][winfo->wlen[w]-1]->name, p);
00182 match = aptree_search_data(key, base->root);
00183 if (strmatch(match->name, key)) {
00184 match->endflag = TRUE;
00185 } else {
00186
00187 j_error("InternalError: basephone \"%s\" specified in dict, but not found in HMM\n");
00188 }
00189 }
00190 }
00191
00192
00193
00194
00195 static HTK_HMM_INFO *local_hmminfo;
00196 static WORD_INFO *local_winfo;
00197 static APATNODE *local_root;
00198 static WORD_ID current_w;
00199 static char gbuf[MAX_HMMNAME_LEN];
00200
00201 static APATNODE *error_root;
00202 static int error_num;
00203
00209 static void
00210 add_to_error(char *lostname)
00211 {
00212 char *match = NULL, *new;
00213 if (error_root != NULL) {
00214 match = aptree_search_data(lostname, error_root);
00215 if (strmatch(match, lostname)) return;
00216 }
00217 new = (char *)mybmalloc(strlen(lostname)+1);
00218 strcpy(new, lostname);
00219 if (error_root == NULL) error_root = aptree_make_root_node(new);
00220 else aptree_add_entry(new, new, match, &error_root);
00221
00222 error_num++;
00223 }
00224
00230 static void
00231 print_error_callback(void *x)
00232 {
00233 char *p = x;
00234 j_printf("%s\n", p);
00235 }
00236
00244 static void
00245 triphone_callback_normal(void *x)
00246 {
00247 BASEPHONE *b = x;
00248 WORD_ID w = current_w;
00249 HMM_Logical *lg, *found;
00250
00251 if (b->endflag) {
00252 lg = local_winfo->wseq[w][0];
00253 strcpy(gbuf, lg->name);
00254 add_left_context(gbuf, b->name);
00255
00256 if ((found = htk_hmmdata_lookup_logical(local_hmminfo, gbuf)) == NULL) {
00257 if (lg->is_pseudo) {
00258 j_printerr("Error: \"%s\" not found, fallback to pseudo {%s}\n", gbuf, lg->name);
00259 add_to_error(gbuf);
00260 }
00261 }
00262 }
00263 if (b->bgnflag) {
00264 lg = local_winfo->wseq[w][local_winfo->wlen[w]-1];
00265 strcpy(gbuf, lg->name);
00266 add_right_context(gbuf, b->name);
00267
00268 if ((found = htk_hmmdata_lookup_logical(local_hmminfo, gbuf)) == NULL) {
00269 if (lg->is_pseudo) {
00270 j_printerr("Error: \"%s\" not found, fallback to pseudo {%s}\n", gbuf, lg->name);
00271 add_to_error(gbuf);
00272 }
00273 }
00274 }
00275 }
00276
00277
00278
00286 static void
00287 triphone_callback_right(void *x)
00288 {
00289 BASEPHONE *b = x;
00290 WORD_ID w = current_w;
00291 HMM_Logical *lg, *found;
00292 static char buf[MAX_HMMNAME_LEN];
00293
00294 if (b->bgnflag) {
00295 lg = local_winfo->wseq[w][0];
00296 strcpy(buf, gbuf);
00297 add_right_context(buf, b->name);
00298
00299 if ((found = htk_hmmdata_lookup_logical(local_hmminfo, buf)) == NULL) {
00300 if (lg->is_pseudo) {
00301 j_printerr("Error: \"%s\" not found, fallback to pseudo {%s}\n", buf, lg->name);
00302 add_to_error(buf);
00303 }
00304 }
00305 }
00306 }
00307
00315 static void
00316 triphone_callback_left(void *x)
00317 {
00318 BASEPHONE *b = x;
00319 WORD_ID w = current_w;
00320 HMM_Logical *lg;
00321
00322 if (b->endflag) {
00323 lg = local_winfo->wseq[w][0];
00324 strcpy(gbuf, lg->name);
00325 add_left_context(gbuf, b->name);
00326
00327 aptree_traverse_and_do(local_root, triphone_callback_right);
00328 }
00329 }
00330
00338 void
00339 test_interword_triphone(HTK_HMM_INFO *hmminfo, WORD_INFO *winfo)
00340 {
00341 WORD_ID w;
00342 local_hmminfo = hmminfo;
00343 local_winfo = winfo;
00344 local_root = hmminfo->basephone.root;
00345 error_root = NULL;
00346 error_num = 0;
00347
00348 j_printf("Inter-word triphone existence test...\n");
00349 for(w=0;w<winfo->num;w++) {
00350 current_w = w;
00351 if (winfo->wlen[w] > 1) {
00352
00353 aptree_traverse_and_do(hmminfo->basephone.root, triphone_callback_normal);
00354 } else {
00355
00356 aptree_traverse_and_do(hmminfo->basephone.root, triphone_callback_left);
00357 }
00358 }
00359 if (error_root == NULL) {
00360 j_printf("passed\n");
00361 } else {
00362 j_printf("following triphones are missing in HMMList:\n");
00363 aptree_traverse_and_do(error_root, print_error_callback);
00364 j_printf("total %d missing inter-word triphones\n", error_num);
00365 }
00366 }
00367
00368
00369
00379 void
00380 make_base_phone(HTK_HMM_INFO *hmminfo, WORD_INFO *winfo)
00381 {
00382
00383 j_printf("Exploring HMM database and lexicon tree:\n");
00384 mark_word_edge(winfo, &(hmminfo->basephone));
00385 count_all_phone(&(hmminfo->basephone));
00386 }
00387
00393 void
00394 print_phone_info(HTK_HMM_INFO *hmminfo)
00395 {
00396
00397 j_printf("%5d physical HMMs defined in hmmdefs\n", hmminfo->totalhmmnum);
00398 if (hmminfo->totalhmmnum == hmminfo->totallogicalnum - hmminfo->totalpseudonum) {
00399 j_printf(" no HMMList, physical HMM names are redirected to logicalHMM\n");
00400 } else {
00401 if (hmminfo->is_triphone) {
00402 j_printf("%5d triphones listed in hmmlist\n", hmminfo->totallogicalnum - hmminfo->totalpseudonum);
00403 } else {
00404 j_printf("%5d phones in hmmlist\n", hmminfo->totallogicalnum - hmminfo->totalpseudonum);
00405 }
00406 }
00407 if (hmminfo->totalpseudonum != 0) {
00408 j_printf("%5d pseudo HMM generated for missing mono/bi-phones\n",hmminfo->totalpseudonum);
00409 }
00410 j_printf("%5d TOTAL logical HMMs\n", hmminfo->totallogicalnum);
00411 j_printf("%5d base phones in logical HMM\n", hmminfo->basephone.num);
00412 j_printf("%5d phones appear on word head, %d phones on word tail\n", hmminfo->basephone.bgnnum, hmminfo->basephone.endnum);
00413
00414 }