00001
00074
00075
00076
00077
00078
00079
00080
00081 #include <julius/julius.h>
00082
00083 #ifdef PASS1_IWCD
00084
00099 void
00100 outprob_style_cache_init(WCHMM_INFO *wchmm)
00101 {
00102 int n;
00103 for(n=0;n<wchmm->n;n++) {
00104 if (wchmm->state[n].out.state == NULL) continue;
00105 if (wchmm->outstyle[n] == AS_RSET) {
00106 (wchmm->state[n].out.rset)->cache.state = NULL;
00107 } else if (wchmm->outstyle[n] == AS_LRSET) {
00108 (wchmm->state[n].out.lrset)->cache.state = NULL;
00109 }
00110 }
00111 }
00112
00113
00114
00115 static char lccbuf[MAX_HMMNAME_LEN+7];
00116 static char lccbuf2[MAX_HMMNAME_LEN+7];
00117
00146 CD_Set *
00147 lcdset_lookup_with_category(WCHMM_INFO *wchmm, HMM_Logical *hmm, WORD_ID category)
00148 {
00149 CD_Set *cd;
00150
00151 leftcenter_name(hmm->name, lccbuf);
00152 sprintf(lccbuf2, "%s::%04d", lccbuf, category);
00153 if (wchmm->lcdset_category_root != NULL) {
00154 cd = aptree_search_data(lccbuf2, wchmm->lcdset_category_root);
00155 if (cd == NULL) return NULL;
00156 if (strmatch(lccbuf2, cd->name)) {
00157 return cd;
00158 }
00159 }
00160 return NULL;
00161 }
00162
00208 static void
00209 lcdset_register_with_category(WCHMM_INFO *wchmm, HMM_Logical *hmm, WORD_ID category)
00210 {
00211 WORD_ID c2, i, w;
00212 HMM_Logical *ltmp;
00213
00214 int cnt_c, cnt_w, cnt_p;
00215
00216 if (lcdset_lookup_with_category(wchmm, hmm, category) == NULL) {
00217 leftcenter_name(hmm->name, lccbuf);
00218 sprintf(lccbuf2, "%s::%04d", lccbuf, category);
00219 if (debug2_flag) {
00220 jlog("DEBUG: category-aware lcdset {%s}...", lccbuf2);
00221 }
00222 cnt_c = cnt_w = cnt_p = 0;
00223
00224 for(c2=0;c2<wchmm->dfa->term_num;c2++) {
00225 if (! dfa_cp(wchmm->dfa, category, c2)) continue;
00226
00227
00228 for(i=0;i<wchmm->dfa->term.wnum[c2];i++) {
00229 w = wchmm->dfa->term.tw[c2][i];
00230 ltmp = get_right_context_HMM(hmm, wchmm->winfo->wseq[w][0]->name, wchmm->hmminfo);
00231 if (ltmp == NULL) {
00232 ltmp = hmm;
00233 if (ltmp->is_pseudo) {
00234 error_missing_right_triphone(hmm, wchmm->winfo->wseq[w][0]->name);
00235 }
00236 }
00237 if (! ltmp->is_pseudo) {
00238 if (regist_cdset(&(wchmm->lcdset_category_root), ltmp->body.defined, lccbuf2)) {
00239 cnt_p++;
00240 }
00241 }
00242 }
00243 cnt_c++;
00244 cnt_w += wchmm->dfa->term.wnum[c2];
00245 }
00246 if (debug2_flag) {
00247 jlog("%d categories (%d words) can follow, %d HMMs registered\n", cnt_c, cnt_w, cnt_p);
00248 }
00249 }
00250 }
00251
00269 void
00270 lcdset_register_with_category_all(WCHMM_INFO *wchmm)
00271 {
00272 WORD_INFO *winfo;
00273 WORD_ID c1, w, w_prev;
00274 int i;
00275 HMM_Logical *ltmp;
00276
00277 winfo = wchmm->winfo;
00278
00279
00280
00281 for(w=0;w<winfo->num;w++) {
00282 ltmp = winfo->wseq[w][winfo->wlen[w]-1];
00283 lcdset_register_with_category(wchmm, ltmp, winfo->wton[w]);
00284 }
00285
00286
00287 for(w=0;w<winfo->num;w++) {
00288 if (winfo->wlen[w] > 1) continue;
00289 for(c1=0;c1<wchmm->dfa->term_num;c1++) {
00290 if (! dfa_cp(wchmm->dfa, c1, winfo->wton[w])) continue;
00291 for(i=0;i<wchmm->dfa->term.wnum[c1];i++) {
00292 w_prev = wchmm->dfa->term.tw[c1][i];
00293 ltmp = get_left_context_HMM(winfo->wseq[w][0], winfo->wseq[w_prev][winfo->wlen[w_prev]-1]->name, wchmm->hmminfo);
00294 if (ltmp == NULL) continue;
00295 if (ltmp->is_pseudo) continue;
00296 lcdset_register_with_category(wchmm, ltmp, winfo->wton[w]);
00297 }
00298 }
00299 }
00300 }
00301
00319 void
00320 lcdset_remove_with_category_all(WCHMM_INFO *wchmm)
00321 {
00322 free_cdset(&(wchmm->lcdset_category_root));
00323 }
00324
00325 #endif
00326
00356 LOGPROB
00357 outprob_style(WCHMM_INFO *wchmm, int node, int last_wid, int t, HTK_Param *param)
00358 {
00359 static char rbuf[MAX_HMMNAME_LEN];
00360
00361 #ifndef PASS1_IWCD
00362
00363
00364
00365 return(outprob_state(wchmm->hmmwrk, t, wchmm->state[node].out, param));
00366
00367 #else
00368
00369
00370 HMM_Logical *ohmm, *rhmm;
00371 RC_INFO *rset;
00372 LRC_INFO *lrset;
00373 CD_Set *lcd;
00374 WORD_INFO *winfo = wchmm->winfo;
00375 HTK_HMM_INFO *hmminfo = wchmm->hmminfo;
00376
00377
00378
00379 switch(wchmm->outstyle[node]) {
00380 case AS_STATE:
00381
00382
00383 return(outprob_state(wchmm->hmmwrk, t, wchmm->state[node].out.state, param));
00384 case AS_LSET:
00385
00386
00387 return(outprob_cd(wchmm->hmmwrk, t, wchmm->state[node].out.lset, param));
00388 case AS_RSET:
00389
00390
00391 rset = wchmm->state[node].out.rset;
00392
00393 if (rset->cache.state == NULL || rset->lastwid_cache != last_wid) {
00394
00395
00396 if (last_wid != WORD_INVALID) {
00397
00398 if ((ohmm = get_left_context_HMM(rset->hmm, (winfo->wseq[last_wid][winfo->wlen[last_wid]-1])->name, hmminfo)) != NULL) {
00399 rhmm = ohmm;
00400 } else {
00401
00402 rhmm = rset->hmm;
00403
00404
00405
00406 if (debug2_flag) {
00407 if (rhmm->is_pseudo) {
00408 error_missing_left_triphone(rset->hmm, (winfo->wseq[last_wid][winfo->wlen[last_wid]-1])->name);
00409 }
00410 }
00411 }
00412 } else {
00413
00414 rhmm = rset->hmm;
00415
00416
00417 if (debug2_flag) {
00418 if (rhmm->is_pseudo) {
00419 error_missing_left_triphone(rset->hmm, (winfo->wseq[last_wid][winfo->wlen[last_wid]-1])->name);
00420 }
00421 }
00422 }
00423
00424
00425 if (rhmm->is_pseudo) {
00426 rset->last_is_lset = TRUE;
00427 rset->cache.lset = &(rhmm->body.pseudo->stateset[rset->state_loc]);
00428 } else {
00429 rset->last_is_lset = FALSE;
00430 rset->cache.state = rhmm->body.defined->s[rset->state_loc];
00431 }
00432 rset->lastwid_cache = last_wid;
00433 }
00434
00435 if (rset->last_is_lset) {
00436 return(outprob_cd(wchmm->hmmwrk, t, rset->cache.lset, param));
00437 } else {
00438 return(outprob_state(wchmm->hmmwrk, t, rset->cache.state, param));
00439 }
00440 case AS_LRSET:
00441
00442 lrset = wchmm->state[node].out.lrset;
00443 if (lrset->cache.state == NULL || lrset->lastwid_cache != last_wid) {
00444
00445 rhmm = lrset->hmm;
00446
00447 strcpy(rbuf, rhmm->name);
00448 if (last_wid != WORD_INVALID) {
00449 add_left_context(rbuf, (winfo->wseq[last_wid][winfo->wlen[last_wid]-1])->name);
00450 }
00451 if (wchmm->category_tree) {
00452 #ifdef USE_OLD_IWCD
00453 lcd = lcdset_lookup_by_hmmname(hmminfo, rbuf);
00454 #else
00455
00456 if (last_wid != WORD_INVALID &&
00457 (ohmm = get_left_context_HMM(rhmm, (winfo->wseq[last_wid][winfo->wlen[last_wid]-1])->name, hmminfo)) != NULL) {
00458 lcd = lcdset_lookup_with_category(wchmm, ohmm, lrset->category);
00459 } else {
00460 lcd = lcdset_lookup_with_category(wchmm, rhmm, lrset->category);
00461 }
00462 #endif
00463 } else {
00464 lcd = lcdset_lookup_by_hmmname(hmminfo, rbuf);
00465 }
00466 if (lcd != NULL) {
00467 lrset->last_is_lset = TRUE;
00468 lrset->cache.lset = &(lcd->stateset[lrset->state_loc]);
00469 lrset->lastwid_cache = last_wid;
00470 } else {
00471
00472 if (rhmm->is_pseudo) {
00473 lrset->last_is_lset = TRUE;
00474 lrset->cache.lset = &(rhmm->body.pseudo->stateset[lrset->state_loc]);
00475 lrset->lastwid_cache = last_wid;
00476 } else {
00477 lrset->last_is_lset = FALSE;
00478 lrset->cache.state = rhmm->body.defined->s[lrset->state_loc];
00479 lrset->lastwid_cache = last_wid;
00480 }
00481 }
00482
00483 }
00484
00485 if (lrset->last_is_lset) {
00486 return(outprob_cd(wchmm->hmmwrk, t, lrset->cache.lset, param));
00487 } else {
00488 return(outprob_state(wchmm->hmmwrk, t, lrset->cache.state, param));
00489 }
00490 default:
00491
00492 j_internal_error("outprob_style: no outprob style??\n");
00493 return(LOG_ZERO);
00494 }
00495
00496 #endif
00497
00498 }
00499
00522 void
00523 error_missing_right_triphone(HMM_Logical *base, char *rc_name)
00524 {
00525 static char rbuf[MAX_HMMNAME_LEN];
00526
00527 strcpy(rbuf, base->name);
00528 add_right_context(rbuf, rc_name);
00529 jlog("WARNING: IW-triphone for word end \"%s\" not found, fallback to pseudo {%s}\n", rbuf, base->name);
00530 }
00531
00554 void
00555 error_missing_left_triphone(HMM_Logical *base, char *lc_name)
00556 {
00557 static char rbuf[MAX_HMMNAME_LEN];
00558
00559 strcpy(rbuf, base->name);
00560 add_left_context(rbuf, lc_name);
00561 jlog("WARNING: IW-triphone for word head \"%s\" not found, fallback to pseudo {%s}\n", rbuf, base->name);
00562 }
00563
00564