00001 
00074 
00075 
00076 
00077 
00078 
00079 
00080 
00081 #include <julius/julius.h>
00082 
00083 #ifdef PASS1_IWCD
00084 
00099 void
00100 outprob_style_cache_init(WCHMM_INFO *wchmm)
00101 {
00102   int n;
00103   for(n=0;n<wchmm->n;n++) {
00104     if (wchmm->state[n].out.state == NULL) continue;
00105     if (wchmm->outstyle[n] == AS_RSET) {
00106       (wchmm->state[n].out.rset)->cache.state = NULL;
00107     } else if (wchmm->outstyle[n] == AS_LRSET) {
00108       (wchmm->state[n].out.lrset)->cache.state = NULL;
00109     }
00110   }
00111 }
00112 
00113 
00114 
00115 static char lccbuf[MAX_HMMNAME_LEN+7]; 
00116 static char lccbuf2[MAX_HMMNAME_LEN+7]; 
00117 
00146 CD_Set *
00147 lcdset_lookup_with_category(WCHMM_INFO *wchmm, HMM_Logical *hmm, WORD_ID category)
00148 {
00149   CD_Set *cd;
00150 
00151   leftcenter_name(hmm->name, lccbuf);
00152   sprintf(lccbuf2, "%s::%04d", lccbuf, category);
00153   if (wchmm->lcdset_category_root != NULL) {
00154     cd = aptree_search_data(lccbuf2, wchmm->lcdset_category_root);
00155     if (cd == NULL) return NULL;
00156     if (strmatch(lccbuf2, cd->name)) {
00157       return cd;
00158     }
00159   }
00160   return NULL;
00161 }
00162 
00208 static void
00209 lcdset_register_with_category(WCHMM_INFO *wchmm, HMM_Logical *hmm, WORD_ID category)
00210 {
00211   WORD_ID c2, i, w;
00212   HMM_Logical *ltmp;
00213 
00214   int cnt_c, cnt_w, cnt_p;
00215 
00216   if (lcdset_lookup_with_category(wchmm, hmm, category) == NULL) {
00217     leftcenter_name(hmm->name, lccbuf);
00218     sprintf(lccbuf2, "%s::%04d", lccbuf, category);
00219     if (debug2_flag) {
00220       jlog("DEBUG: category-aware lcdset {%s}...", lccbuf2);
00221     }
00222     cnt_c = cnt_w = cnt_p = 0;
00223     
00224     for(c2=0;c2<wchmm->dfa->term_num;c2++) {
00225       if (! dfa_cp(wchmm->dfa, category, c2)) continue;
00226       
00227 
00228       for(i=0;i<wchmm->dfa->term.wnum[c2];i++) {
00229         w = wchmm->dfa->term.tw[c2][i];
00230         ltmp = get_right_context_HMM(hmm, wchmm->winfo->wseq[w][0]->name, wchmm->hmminfo);
00231         if (ltmp == NULL) {
00232           ltmp = hmm;
00233           if (ltmp->is_pseudo) {
00234             error_missing_right_triphone(hmm, wchmm->winfo->wseq[w][0]->name);
00235           }
00236         }
00237         if (! ltmp->is_pseudo) {
00238           if (regist_cdset(&(wchmm->lcdset_category_root), ltmp->body.defined, lccbuf2)) {
00239             cnt_p++;
00240           }
00241         }
00242       }
00243       cnt_c++;
00244       cnt_w += wchmm->dfa->term.wnum[c2];
00245     }
00246     if (debug2_flag) {
00247       jlog("%d categories (%d words) can follow, %d HMMs registered\n", cnt_c, cnt_w, cnt_p);
00248     }
00249   }
00250 }
00251 
00269 void
00270 lcdset_register_with_category_all(WCHMM_INFO *wchmm)
00271 {
00272   WORD_INFO *winfo;
00273   WORD_ID c1, w, w_prev;
00274   int i;
00275   HMM_Logical *ltmp;
00276 
00277   winfo = wchmm->winfo;
00278 
00279   
00280   
00281   for(w=0;w<winfo->num;w++) {
00282     ltmp = winfo->wseq[w][winfo->wlen[w]-1];
00283     lcdset_register_with_category(wchmm, ltmp, winfo->wton[w]);
00284   }
00285   
00286   
00287   for(w=0;w<winfo->num;w++) {
00288     if (winfo->wlen[w] > 1) continue;
00289     for(c1=0;c1<wchmm->dfa->term_num;c1++) {
00290       if (! dfa_cp(wchmm->dfa, c1, winfo->wton[w])) continue;
00291       for(i=0;i<wchmm->dfa->term.wnum[c1];i++) {
00292         w_prev = wchmm->dfa->term.tw[c1][i];
00293         ltmp = get_left_context_HMM(winfo->wseq[w][0], winfo->wseq[w_prev][winfo->wlen[w_prev]-1]->name, wchmm->hmminfo);
00294         if (ltmp == NULL) continue; 
00295         if (ltmp->is_pseudo) continue; 
00296         lcdset_register_with_category(wchmm, ltmp, winfo->wton[w]);
00297       }
00298     }
00299   }
00300 }
00301 
00319 void
00320 lcdset_remove_with_category_all(WCHMM_INFO *wchmm)
00321 {
00322   free_cdset(&(wchmm->lcdset_category_root));
00323 }
00324 
00325 #endif 
00326 
00356 LOGPROB
00357 outprob_style(WCHMM_INFO *wchmm, int node, int last_wid, int t, HTK_Param *param)
00358 {
00359   static char rbuf[MAX_HMMNAME_LEN]; 
00360 
00361 #ifndef PASS1_IWCD
00362   
00363   
00364 
00365   return(outprob_state(wchmm->hmmwrk, t, wchmm->state[node].out, param));
00366   
00367 #else  
00368 
00369   
00370   HMM_Logical *ohmm, *rhmm;
00371   RC_INFO *rset;
00372   LRC_INFO *lrset;
00373   CD_Set *lcd;
00374   WORD_INFO *winfo = wchmm->winfo;
00375   HTK_HMM_INFO *hmminfo = wchmm->hmminfo;
00376 
00377   
00378 
00379   switch(wchmm->outstyle[node]) {
00380   case AS_STATE:
00381     
00382     
00383     return(outprob_state(wchmm->hmmwrk, t, wchmm->state[node].out.state, param));
00384   case AS_LSET:
00385     
00386     
00387     return(outprob_cd(wchmm->hmmwrk, t, wchmm->state[node].out.lset, param));
00388   case AS_RSET:
00389     
00390     
00391     rset = wchmm->state[node].out.rset;
00392     
00393     if (rset->cache.state == NULL || rset->lastwid_cache != last_wid) {
00394       
00395       
00396       if (last_wid != WORD_INVALID) {
00397         
00398         if ((ohmm = get_left_context_HMM(rset->hmm, (winfo->wseq[last_wid][winfo->wlen[last_wid]-1])->name, hmminfo)) != NULL) {
00399           rhmm = ohmm;
00400         } else {
00401           
00402           rhmm = rset->hmm;
00403           
00404 
00405 
00406           if (debug2_flag) {
00407             if (rhmm->is_pseudo) {
00408             error_missing_left_triphone(rset->hmm, (winfo->wseq[last_wid][winfo->wlen[last_wid]-1])->name);
00409             }
00410           }
00411         }
00412       } else {
00413         
00414         rhmm = rset->hmm;
00415         
00416 
00417         if (debug2_flag) {
00418           if (rhmm->is_pseudo) {
00419             error_missing_left_triphone(rset->hmm, (winfo->wseq[last_wid][winfo->wlen[last_wid]-1])->name);
00420           }
00421         }
00422       }
00423       
00424       
00425       if (rhmm->is_pseudo) {
00426         rset->last_is_lset  = TRUE;
00427         rset->cache.lset    = &(rhmm->body.pseudo->stateset[rset->state_loc]);
00428       } else {
00429         rset->last_is_lset  = FALSE;
00430         rset->cache.state   = rhmm->body.defined->s[rset->state_loc];
00431       }
00432       rset->lastwid_cache = last_wid;
00433     }
00434     
00435     if (rset->last_is_lset) {
00436       return(outprob_cd(wchmm->hmmwrk, t, rset->cache.lset, param));
00437     } else {
00438       return(outprob_state(wchmm->hmmwrk, t, rset->cache.state, param));
00439     }
00440   case AS_LRSET:
00441     
00442     lrset = wchmm->state[node].out.lrset;
00443     if (lrset->cache.state == NULL || lrset->lastwid_cache != last_wid) {
00444       
00445       rhmm = lrset->hmm;
00446       
00447       strcpy(rbuf, rhmm->name);
00448       if (last_wid != WORD_INVALID) {
00449         add_left_context(rbuf, (winfo->wseq[last_wid][winfo->wlen[last_wid]-1])->name);
00450       }
00451       if (wchmm->category_tree) {
00452 #ifdef USE_OLD_IWCD
00453         lcd = lcdset_lookup_by_hmmname(hmminfo, rbuf);
00454 #else
00455         
00456         if (last_wid != WORD_INVALID &&
00457             (ohmm = get_left_context_HMM(rhmm, (winfo->wseq[last_wid][winfo->wlen[last_wid]-1])->name, hmminfo)) != NULL) {
00458           lcd = lcdset_lookup_with_category(wchmm, ohmm, lrset->category);
00459         } else {
00460           lcd = lcdset_lookup_with_category(wchmm, rhmm, lrset->category);
00461         }
00462 #endif
00463       } else {
00464         lcd = lcdset_lookup_by_hmmname(hmminfo, rbuf);
00465       }
00466       if (lcd != NULL) {        
00467         lrset->last_is_lset  = TRUE;
00468         lrset->cache.lset    = &(lcd->stateset[lrset->state_loc]);
00469         lrset->lastwid_cache = last_wid;
00470       } else {
00471         
00472         if (rhmm->is_pseudo) {
00473           lrset->last_is_lset  = TRUE;
00474           lrset->cache.lset    = &(rhmm->body.pseudo->stateset[lrset->state_loc]);
00475           lrset->lastwid_cache = last_wid;
00476         } else {
00477           lrset->last_is_lset  = FALSE;
00478           lrset->cache.state   = rhmm->body.defined->s[lrset->state_loc];
00479           lrset->lastwid_cache = last_wid;
00480         }
00481       }
00482       
00483     }
00484     
00485     if (lrset->last_is_lset) {
00486       return(outprob_cd(wchmm->hmmwrk, t, lrset->cache.lset, param));
00487     } else {
00488       return(outprob_state(wchmm->hmmwrk, t, lrset->cache.state, param));
00489     }
00490   default:
00491     
00492     j_internal_error("outprob_style: no outprob style??\n");
00493     return(LOG_ZERO);
00494   }
00495 
00496 #endif  
00497 
00498 }
00499 
00522 void
00523 error_missing_right_triphone(HMM_Logical *base, char *rc_name)
00524 {
00525   static char rbuf[MAX_HMMNAME_LEN]; 
00526   
00527   strcpy(rbuf, base->name);
00528   add_right_context(rbuf, rc_name);
00529   jlog("WARNING: IW-triphone for word end \"%s\" not found, fallback to pseudo {%s}\n", rbuf, base->name);
00530 }
00531 
00554 void
00555 error_missing_left_triphone(HMM_Logical *base, char *lc_name)
00556 {
00557   static char rbuf[MAX_HMMNAME_LEN]; 
00558   
00559   strcpy(rbuf, base->name);
00560   add_left_context(rbuf, lc_name);
00561   jlog("WARNING: IW-triphone for word head \"%s\" not found, fallback to pseudo {%s}\n", rbuf, base->name);
00562 }
00563 
00564