00001
00074
00075
00076
00077
00078
00079
00080
00081 #include <julius/julius.h>
00082
00083 #ifdef PASS1_IWCD
00084
00099 void
00100 outprob_style_cache_init(WCHMM_INFO *wchmm)
00101 {
00102 int n;
00103 for(n=0;n<wchmm->n;n++) {
00104 if (wchmm->state[n].out.state == NULL) continue;
00105 if (wchmm->outstyle[n] == AS_RSET) {
00106 (wchmm->state[n].out.rset)->cache.state = NULL;
00107 } else if (wchmm->outstyle[n] == AS_LRSET) {
00108 (wchmm->state[n].out.lrset)->cache.state = NULL;
00109 }
00110 }
00111 }
00112
00113
00114
00143 CD_Set *
00144 lcdset_lookup_with_category(WCHMM_INFO *wchmm, HMM_Logical *hmm, WORD_ID category)
00145 {
00146 CD_Set *cd;
00147
00148 leftcenter_name(hmm->name, wchmm->lccbuf);
00149 sprintf(wchmm->lccbuf2, "%s::%04d", wchmm->lccbuf, category);
00150 if (wchmm->lcdset_category_root != NULL) {
00151 cd = aptree_search_data(wchmm->lccbuf2, wchmm->lcdset_category_root);
00152 if (cd == NULL) return NULL;
00153 if (strmatch(wchmm->lccbuf2, cd->name)) {
00154 return cd;
00155 }
00156 }
00157 return NULL;
00158 }
00159
00205 static void
00206 lcdset_register_with_category(WCHMM_INFO *wchmm, HMM_Logical *hmm, WORD_ID category)
00207 {
00208 WORD_ID c2, i, w;
00209 HMM_Logical *ltmp;
00210
00211 int cnt_c, cnt_w, cnt_p;
00212
00213 if (lcdset_lookup_with_category(wchmm, hmm, category) == NULL) {
00214 leftcenter_name(hmm->name, wchmm->lccbuf);
00215 sprintf(wchmm->lccbuf2, "%s::%04d", wchmm->lccbuf, category);
00216 if (debug2_flag) {
00217 jlog("DEBUG: category-aware lcdset {%s}...", wchmm->lccbuf2);
00218 }
00219 cnt_c = cnt_w = cnt_p = 0;
00220
00221 for(c2=0;c2<wchmm->dfa->term_num;c2++) {
00222 if (! dfa_cp(wchmm->dfa, category, c2)) continue;
00223
00224
00225 for(i=0;i<wchmm->dfa->term.wnum[c2];i++) {
00226 w = wchmm->dfa->term.tw[c2][i];
00227 ltmp = get_right_context_HMM(hmm, wchmm->winfo->wseq[w][0]->name, wchmm->hmminfo);
00228 if (ltmp == NULL) {
00229 ltmp = hmm;
00230 if (ltmp->is_pseudo) {
00231 error_missing_right_triphone(hmm, wchmm->winfo->wseq[w][0]->name);
00232 }
00233 }
00234 if (! ltmp->is_pseudo) {
00235 if (regist_cdset(&(wchmm->lcdset_category_root), ltmp->body.defined, wchmm->lccbuf2, &(wchmm->lcdset_mroot))) {
00236 cnt_p++;
00237 }
00238 }
00239 }
00240 cnt_c++;
00241 cnt_w += wchmm->dfa->term.wnum[c2];
00242 }
00243 if (debug2_flag) {
00244 jlog("%d categories (%d words) can follow, %d HMMs registered\n", cnt_c, cnt_w, cnt_p);
00245 }
00246 }
00247 }
00248
00266 void
00267 lcdset_register_with_category_all(WCHMM_INFO *wchmm)
00268 {
00269 WORD_INFO *winfo;
00270 WORD_ID c1, w, w_prev;
00271 int i;
00272 HMM_Logical *ltmp;
00273
00274 winfo = wchmm->winfo;
00275
00276
00277
00278 for(w=0;w<winfo->num;w++) {
00279 ltmp = winfo->wseq[w][winfo->wlen[w]-1];
00280 lcdset_register_with_category(wchmm, ltmp, winfo->wton[w]);
00281 }
00282
00283
00284 for(w=0;w<winfo->num;w++) {
00285 if (winfo->wlen[w] > 1) continue;
00286 for(c1=0;c1<wchmm->dfa->term_num;c1++) {
00287 if (! dfa_cp(wchmm->dfa, c1, winfo->wton[w])) continue;
00288 for(i=0;i<wchmm->dfa->term.wnum[c1];i++) {
00289 w_prev = wchmm->dfa->term.tw[c1][i];
00290 ltmp = get_left_context_HMM(winfo->wseq[w][0], winfo->wseq[w_prev][winfo->wlen[w_prev]-1]->name, wchmm->hmminfo);
00291 if (ltmp == NULL) continue;
00292 if (ltmp->is_pseudo) continue;
00293 lcdset_register_with_category(wchmm, ltmp, winfo->wton[w]);
00294 }
00295 }
00296 }
00297 }
00298
00316 void
00317 lcdset_remove_with_category_all(WCHMM_INFO *wchmm)
00318 {
00319 free_cdset(&(wchmm->lcdset_category_root), &(wchmm->lcdset_mroot));
00320 }
00321
00322 #endif
00323
00353 LOGPROB
00354 outprob_style(WCHMM_INFO *wchmm, int node, int last_wid, int t, HTK_Param *param)
00355 {
00356 char rbuf[MAX_HMMNAME_LEN];
00357
00358 #ifndef PASS1_IWCD
00359
00360
00361
00362 return(outprob_state(wchmm->hmmwrk, t, wchmm->state[node].out, param));
00363
00364 #else
00365
00366
00367 HMM_Logical *ohmm, *rhmm;
00368 RC_INFO *rset;
00369 LRC_INFO *lrset;
00370 CD_Set *lcd;
00371 WORD_INFO *winfo = wchmm->winfo;
00372 HTK_HMM_INFO *hmminfo = wchmm->hmminfo;
00373
00374
00375
00376 switch(wchmm->outstyle[node]) {
00377 case AS_STATE:
00378
00379
00380 return(outprob_state(wchmm->hmmwrk, t, wchmm->state[node].out.state, param));
00381 case AS_LSET:
00382
00383
00384 return(outprob_cd(wchmm->hmmwrk, t, wchmm->state[node].out.lset, param));
00385 case AS_RSET:
00386
00387
00388 rset = wchmm->state[node].out.rset;
00389
00390 if (rset->cache.state == NULL || rset->lastwid_cache != last_wid) {
00391
00392
00393 if (last_wid != WORD_INVALID) {
00394
00395 if ((ohmm = get_left_context_HMM(rset->hmm, (winfo->wseq[last_wid][winfo->wlen[last_wid]-1])->name, hmminfo)) != NULL) {
00396 rhmm = ohmm;
00397 } else {
00398
00399 rhmm = rset->hmm;
00400
00401
00402
00403 if (debug2_flag) {
00404 if (rhmm->is_pseudo) {
00405 error_missing_left_triphone(rset->hmm, (winfo->wseq[last_wid][winfo->wlen[last_wid]-1])->name);
00406 }
00407 }
00408 }
00409 } else {
00410
00411 rhmm = rset->hmm;
00412
00413
00414 if (debug2_flag) {
00415 if (rhmm->is_pseudo) {
00416 error_missing_left_triphone(rset->hmm, (winfo->wseq[last_wid][winfo->wlen[last_wid]-1])->name);
00417 }
00418 }
00419 }
00420
00421
00422 if (rhmm->is_pseudo) {
00423 rset->last_is_lset = TRUE;
00424 rset->cache.lset = &(rhmm->body.pseudo->stateset[rset->state_loc]);
00425 } else {
00426 rset->last_is_lset = FALSE;
00427 rset->cache.state = rhmm->body.defined->s[rset->state_loc];
00428 }
00429 rset->lastwid_cache = last_wid;
00430 }
00431
00432 if (rset->last_is_lset) {
00433 return(outprob_cd(wchmm->hmmwrk, t, rset->cache.lset, param));
00434 } else {
00435 return(outprob_state(wchmm->hmmwrk, t, rset->cache.state, param));
00436 }
00437 case AS_LRSET:
00438
00439 lrset = wchmm->state[node].out.lrset;
00440 if (lrset->cache.state == NULL || lrset->lastwid_cache != last_wid) {
00441
00442 rhmm = lrset->hmm;
00443
00444 strcpy(rbuf, rhmm->name);
00445 if (last_wid != WORD_INVALID) {
00446 add_left_context(rbuf, (winfo->wseq[last_wid][winfo->wlen[last_wid]-1])->name);
00447 }
00448 if (wchmm->category_tree) {
00449 #ifdef USE_OLD_IWCD
00450 lcd = lcdset_lookup_by_hmmname(hmminfo, rbuf);
00451 #else
00452
00453 if (last_wid != WORD_INVALID &&
00454 (ohmm = get_left_context_HMM(rhmm, (winfo->wseq[last_wid][winfo->wlen[last_wid]-1])->name, hmminfo)) != NULL) {
00455 lcd = lcdset_lookup_with_category(wchmm, ohmm, lrset->category);
00456 } else {
00457 lcd = lcdset_lookup_with_category(wchmm, rhmm, lrset->category);
00458 }
00459 #endif
00460 } else {
00461 lcd = lcdset_lookup_by_hmmname(hmminfo, rbuf);
00462 }
00463 if (lcd != NULL) {
00464 lrset->last_is_lset = TRUE;
00465 lrset->cache.lset = &(lcd->stateset[lrset->state_loc]);
00466 lrset->lastwid_cache = last_wid;
00467 } else {
00468
00469 if (rhmm->is_pseudo) {
00470 lrset->last_is_lset = TRUE;
00471 lrset->cache.lset = &(rhmm->body.pseudo->stateset[lrset->state_loc]);
00472 lrset->lastwid_cache = last_wid;
00473 } else {
00474 lrset->last_is_lset = FALSE;
00475 lrset->cache.state = rhmm->body.defined->s[lrset->state_loc];
00476 lrset->lastwid_cache = last_wid;
00477 }
00478 }
00479
00480 }
00481
00482 if (lrset->last_is_lset) {
00483 return(outprob_cd(wchmm->hmmwrk, t, lrset->cache.lset, param));
00484 } else {
00485 return(outprob_state(wchmm->hmmwrk, t, lrset->cache.state, param));
00486 }
00487 default:
00488
00489 j_internal_error("outprob_style: no outprob style??\n");
00490 return(LOG_ZERO);
00491 }
00492
00493 #endif
00494
00495 }
00496
00519 void
00520 error_missing_right_triphone(HMM_Logical *base, char *rc_name)
00521 {
00522 char rbuf[MAX_HMMNAME_LEN];
00523
00524 strcpy(rbuf, base->name);
00525 add_right_context(rbuf, rc_name);
00526 jlog("WARNING: IW-triphone for word end \"%s\" not found, fallback to pseudo {%s}\n", rbuf, base->name);
00527 }
00528
00551 void
00552 error_missing_left_triphone(HMM_Logical *base, char *lc_name)
00553 {
00554 char rbuf[MAX_HMMNAME_LEN];
00555
00556 strcpy(rbuf, base->name);
00557 add_left_context(rbuf, lc_name);
00558 jlog("WARNING: IW-triphone for word head \"%s\" not found, fallback to pseudo {%s}\n", rbuf, base->name);
00559 }
00560
00561