julius/search_bestfirst_v2.c

Go to the documentation of this file.
00001 
00054 /*
00055  * Copyright (c) 1991-2006 Kawahara Lab., Kyoto University
00056  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00057  * Copyright (c) 2005-2006 Julius project team, Nagoya Institute of Technology
00058  * All rights reserved
00059  */
00060 
00061 /* By "fast" setting (default), search_bestfirst_v1.c is used for faster
00062    decoding.  Please specify option "--enable-setup=standard" or
00063    "--enable-strict-iwcd2" at "./configure" to activate this. */
00064 
00065 #include <julius.h>
00066 
00067 #ifdef PASS2_STRICT_IWCD
00068 
00069 #undef TCD                      
00070 
00071 
00072 /**********************************************************************/
00073 /************ 仮説ノードの基本操作                         ************/
00074 /************ Basic functions for hypothesis node handling ************/
00075 /**********************************************************************/
00076 
00077 #undef STOCKER_DEBUG
00078 
00079 static NODE *stocker_root = NULL; 
00080 
00081 #ifdef STOCKER_DEBUG
00082 static int stocked_num = 0;
00083 static int reused_num = 0;
00084 static int new_num = 0;
00085 static int request_num = 0;
00086 #endif
00087 
00100 void
00101 free_node_exec(NODE *node)
00102 {
00103   if (node == NULL) return;
00104   free(node->g);
00105 #ifdef GRAPHOUT
00106 #ifdef GRAPHOUT_PRECISE_BOUNDARY
00107   free(node->wordend_frame);
00108   free(node->wordend_gscore);
00109 #endif
00110 #endif
00111   free(node);
00112 }
00113 
00126 void
00127 free_node(NODE *node)
00128 {
00129   if (node == NULL) return;
00130 
00131 #ifdef GRAPHOUT
00132   if (node->prevgraph != NULL && node->prevgraph->saved == FALSE) {
00133     wordgraph_free(node->prevgraph);
00134   }
00135 #endif
00136 
00137   /* save to stocker */
00138   node->next = stocker_root;
00139   stocker_root = node;
00140 
00141 #ifdef STOCKER_DEBUG
00142   stocked_num++;
00143 #endif
00144 }
00145 
00156 void
00157 clear_stocker()
00158 {
00159   NODE *node, *tmp;
00160   node = stocker_root;
00161   while(node) {
00162     tmp = node->next;
00163     free_node_exec(node);
00164     node = tmp;
00165   }
00166   stocker_root = NULL;
00167 
00168 #ifdef STOCKER_DEBUG
00169   printf("%d times requested, %d times newly allocated, %d times reused\n", request_num, new_num, reused_num);
00170   stocked_num = 0;
00171   reused_num = 0;
00172   new_num = 0;
00173   request_num = 0;
00174 #endif
00175 }
00176 
00195 NODE *
00196 cpy_node(NODE *dst, NODE *src)
00197 {
00198   
00199   dst->next = src->next;
00200   dst->prev = src->prev;
00201   memcpy(dst->g, src->g, sizeof(LOGPROB) * peseqlen);
00202   memcpy(dst->seq, src->seq, sizeof(WORD_ID) * MAXSEQNUM);
00203 #ifdef CM_SEARCH
00204 #ifdef CM_MULTIPLE_ALPHA
00205   {
00206     int w;
00207     for(w=0;w<src->seqnum;w++) {
00208       memcpy(dst->cmscore[w], src->cmscore[w], sizeof(LOGPROB) * cm_alpha_num);
00209     }
00210   }     
00211 #else
00212   memcpy(dst->cmscore, src->cmscore, sizeof(LOGPROB) * MAXSEQNUM);
00213 #endif
00214 #endif /* CM_SEARCH */
00215   dst->seqnum = src->seqnum;
00216   dst->score = src->score;
00217   dst->bestt = src->bestt;
00218   dst->estimated_next_t = src->estimated_next_t;
00219   dst->endflag = src->endflag;
00220 #ifdef USE_DFA
00221   dst->state = src->state;
00222 #endif
00223   dst->tre = src->tre;
00224   if (ccd_flag) {
00225     dst->last_ph = src->last_ph;
00226 #ifdef MULTIPATH_VERSION
00227     dst->last_ph_sp_attached = src->last_ph_sp_attached;
00228 #endif
00229   }
00230 #ifdef USE_NGRAM
00231   dst->totallscore = src->totallscore;
00232 #endif
00233 #ifdef MULTIPATH_VERSION
00234   dst->final_g = src->final_g;
00235 #endif
00236 #ifdef VISUALIZE
00237   dst->popnode = src->popnode;
00238 #endif
00239 #ifdef GRAPHOUT
00240 #ifdef GRAPHOUT_PRECISE_BOUNDARY
00241   memcpy(dst->wordend_frame, src->wordend_frame, sizeof(short)*peseqlen);
00242   memcpy(dst->wordend_gscore, src->wordend_gscore, sizeof(LOGPROB)*peseqlen);
00243 #endif
00244   dst->prevgraph = src->prevgraph;
00245   dst->lastcontext = src->lastcontext;
00246 #ifndef GRAPHOUT_PRECISE_BOUNDARY
00247   dst->tail_g_score = src->tail_g_score;
00248 #endif
00249 #endif
00250   return(dst);
00251 }
00252 
00267 NODE *
00268 newnode()
00269 {
00270   NODE *tmp;
00271   int i;
00272 
00273 #ifdef STOCKER_DEBUG
00274   request_num++;
00275 #endif
00276   if (stocker_root != NULL) {
00277     /* re-use ones in the stocker */
00278     tmp = stocker_root;
00279     stocker_root = tmp->next;
00280 #ifdef STOCKER_DEBUG
00281     stocked_num--;
00282     reused_num++;
00283 #endif
00284   } else {
00285     /* allocate new */
00286     if ((tmp=(NODE *)mymalloc(sizeof(NODE)))==NULL) {
00287       j_error("can't malloc\n");
00288     }
00289     tmp->g = (LOGPROB *)mymalloc(sizeof(LOGPROB)*peseqlen);
00290 #ifdef GRAPHOUT
00291 #ifdef GRAPHOUT_PRECISE_BOUNDARY
00292     tmp->wordend_frame = (short *)mymalloc(sizeof(short)*peseqlen);
00293     tmp->wordend_gscore = (LOGPROB *)mymalloc(sizeof(LOGPROB)*peseqlen);
00294 #endif
00295 #endif
00296 #ifdef STOCKER_DEBUG
00297     new_num++;
00298 #endif
00299   }
00300 
00301   /* clear the data */
00302   /*bzero(tmp,sizeof(NODE));*/
00303   tmp->next=NULL;
00304   tmp->prev=NULL;
00305   tmp->last_ph = NULL;
00306 #ifdef MULTIPATH_VERSION
00307   tmp->last_ph_sp_attached = FALSE;
00308 #endif  
00309   if (ccd_flag) {
00310 #ifdef USE_NGRAM
00311     tmp->totallscore = LOG_ZERO;
00312 #endif
00313   }
00314   tmp->endflag = FALSE;
00315   tmp->seqnum = 0;
00316   for(i=0;i<peseqlen;i++) {
00317     tmp->g[i] = LOG_ZERO;
00318   }
00319 #ifdef MULTIPATH_VERSION
00320   tmp->final_g = LOG_ZERO;
00321 #endif
00322 #ifdef VISUALIZE
00323   tmp->popnode = NULL;
00324 #endif
00325 #ifdef GRAPHOUT
00326   tmp->prevgraph = NULL;
00327   tmp->lastcontext = NULL;
00328 #endif
00329 
00330   return(tmp);
00331 }
00332 
00333 
00334 /**********************************************************************/
00335 /************ 前向きトレリス展開と尤度計算             ****************/
00336 /************ Expand trellis and update forward score *****************/
00337 /**********************************************************************/
00338 
00339 static LOGPROB *wordtrellis[2]; 
00340 static int tn;                 
00341 static int tl;                 
00342 static LOGPROB *g;              
00343 static HMM_Logical **phmmseq;   
00344 static int phmmlen_max;         
00345 static HMM_Logical *tailph;     
00346 #ifdef MULTIPATH_VERSION
00347 static boolean *has_sp;         
00348 #endif
00349 
00350 #ifdef GRAPHOUT
00351 #ifdef GRAPHOUT_PRECISE_BOUNDARY
00352 static short *wend_token_frame[2]; 
00353 static LOGPROB *wend_token_gscore[2]; 
00354 static short *wef;              
00355 static LOGPROB *wes;            
00356 #endif
00357 #endif
00358 
00369 void
00370 malloc_wordtrellis()
00371 {
00372   int maxwn;
00373 
00374   maxwn = winfo->maxwn + 10;    /* CCDによる変動を考慮 */
00375   wordtrellis[0] = (LOGPROB *)mymalloc(sizeof(LOGPROB) * maxwn);
00376   wordtrellis[1] = (LOGPROB *)mymalloc(sizeof(LOGPROB) * maxwn);
00377 
00378   g = (LOGPROB *)mymalloc(sizeof(LOGPROB) * peseqlen);
00379 
00380   phmmlen_max = winfo->maxwlen + 2;
00381   phmmseq = (HMM_Logical **)mymalloc(sizeof(HMM_Logical *) * phmmlen_max);
00382 #ifdef MULTIPATH_VERSION
00383   has_sp = (boolean *)mymalloc(sizeof(boolean) * phmmlen_max);
00384 #endif
00385 
00386 #ifdef GRAPHOUT
00387 #ifdef GRAPHOUT_PRECISE_BOUNDARY
00388   wef = (short *)mymalloc(sizeof(short) * peseqlen);
00389   wes = (LOGPROB *)mymalloc(sizeof(LOGPROB) * peseqlen);
00390   wend_token_frame[0] = (short *)mymalloc(sizeof(short) * maxwn);
00391   wend_token_frame[1] = (short *)mymalloc(sizeof(short) * maxwn);
00392   wend_token_gscore[0] = (LOGPROB *)mymalloc(sizeof(LOGPROB) * maxwn);
00393   wend_token_gscore[1] = (LOGPROB *)mymalloc(sizeof(LOGPROB) * maxwn);
00394 #endif
00395 #endif
00396 }
00397 
00408 void
00409 free_wordtrellis()
00410 {
00411   free(wordtrellis[0]);
00412   free(wordtrellis[1]);
00413   free(g);
00414   free(phmmseq);
00415 #ifdef MULTIPATH_VERSION
00416   free(has_sp);
00417 #endif
00418 #ifdef GRAPHOUT
00419 #ifdef GRAPHOUT_PRECISE_BOUNDARY
00420   free(wef);
00421   free(wes);
00422   free(wend_token_frame[0]);
00423   free(wend_token_frame[1]);
00424   free(wend_token_gscore[0]);
00425   free(wend_token_gscore[1]);
00426 #endif
00427 #endif
00428 }
00429 
00430 
00431 /**********************************************************************/
00432 /************ 仮説の前向き尤度計算                  *******************/
00433 /************ Compute forward score of a hypothesis *******************/
00434 /**********************************************************************/
00435 
00436 /* 与えられた音素のならび phmmseq[0..phmmlen-1]に対してviterbi計算を行う.
00437    g[0..framelen-1] のスコアを初期値として g_new[0..framelen-1]に更新値を代入.
00438    最低 least_frame まではscanする.*/
00439 /* Viterbi computation for the given phoneme sequence 'phmmseq[0..phmmlen-1]'
00440    with g[0..framelen-1] as initial values.  The results are stored in
00441    g_new[0..framelen-1].  Scan should not terminate at least it reaches
00442    'least_frame'. */
00477 static void
00478 do_viterbi(LOGPROB *g, LOGPROB *g_new, HMM_Logical **phmmseq
00479 #ifdef MULTIPATH_VERSION
00480            , boolean *has_sp
00481 #endif
00482            , int phmmlen, HTK_Param *param, int framelen, int least_frame
00483 #ifdef MULTIPATH_VERSION
00484            , LOGPROB *final_g
00485 #endif
00486 #ifdef GRAPHOUT
00487 #ifdef GRAPHOUT_PRECISE_BOUNDARY
00488            , short *wordend_frame_src, short *wordend_frame_dst
00489            , LOGPROB *wordend_gscore_src, LOGPROB *wordend_gscore_dst
00490 #endif
00491 #endif
00492            )
00493 {
00494   HMM *whmm;                    /* HMM */
00495   int wordhmmnum;               /* length of above */
00496   int startt;                   /* scan start frame */
00497   LOGPROB tmpmax,tmpscore;      /* variables for Viterbi process */
00498   A_CELL *ac;
00499   int t,i,j;
00500   boolean node_exist_p;
00501 
00502 #ifdef TCD
00503   j_printf("scan for:");
00504   for (i=0;i<phmmlen;i++) {
00505     j_printf(" %s", phmmseq[i]->name);
00506   }
00507   j_printf("\n");
00508 #endif
00509   
00510   /* 単語HMMを作る */
00511   /* make word HMM */
00512   whmm = new_make_word_hmm(hmminfo, phmmseq, phmmlen
00513 #ifdef MULTIPATH_VERSION
00514                            , has_sp
00515 #endif
00516                            );
00517   wordhmmnum = whmm->len;
00518   if (wordhmmnum >= winfo->maxwn + 10) {
00519     j_error("scan_word: word too long\n");
00520   }
00521 
00522   /* scan開始点を検索 -> starttへ*/
00523   /* search for the start frame -> set to startt */
00524   for(t = framelen-1; t >=0 ; t--) {
00525     if (
00526 #ifdef SCAN_BEAM
00527         g[t] > framemaxscore[t] - scan_beam_thres &&
00528 #endif
00529         g[t] > LOG_ZERO) {
00530       break;
00531     }
00532   }
00533   if (t < 0) {                  /* no node has score > LOG_ZERO */
00534     /* reset all scores and end */
00535     for(t=0;t<framelen;t++) {
00536       g_new[t] = LOG_ZERO;
00537 #ifdef GRAPHOUT
00538 #ifdef GRAPHOUT_PRECISE_BOUNDARY
00539       wordend_frame_dst[t] = -1;
00540       wordend_gscore_dst[t] = LOG_ZERO;
00541 #endif
00542 #endif
00543     }
00544     free_hmm(whmm);
00545     return;
00546   }
00547   startt = t;
00548   
00549   /* 開始点以降[startt+1..framelen-1] の g_new[] をリセット */
00550   /* clear g_new[] for [startt+1..framelen-1] */
00551   for(t=framelen-1;t>startt;t--) {
00552     g_new[t] = LOG_ZERO;
00553 #ifdef GRAPHOUT
00554 #ifdef GRAPHOUT_PRECISE_BOUNDARY
00555     wordend_frame_dst[t] = -1;
00556     wordend_gscore_dst[t] = LOG_ZERO;
00557 #endif
00558 #endif
00559   }
00560 
00561   /*****************/
00562   /* viterbi start */
00563   /*****************/
00564 
00565   /* set initial swap buffer */
00566   tn = 0; tl = 1;
00567 
00568 #ifdef GRAPHOUT
00569 #ifdef GRAPHOUT_PRECISE_BOUNDARY
00570   for(i=0;i<wordhmmnum;i++) {
00571     wend_token_frame[tn][i] = -1;
00572     wend_token_gscore[tn][i] = LOG_ZERO;
00573   }    
00574 #endif
00575 #endif
00576 
00577 #ifndef MULTIPATH_VERSION
00578   /* 時間 [startt] 上の値を初期化 */
00579   /* initialize scores on frame [startt] */
00580   for(i=0;i<wordhmmnum-1;i++) wordtrellis[tn][i] = LOG_ZERO;
00581   wordtrellis[tn][wordhmmnum-1] = g[startt] + outprob(startt, &(whmm->state[wordhmmnum-1]), param);
00582   g_new[startt] = wordtrellis[tn][0];
00583 #ifdef GRAPHOUT
00584 #ifdef GRAPHOUT_PRECISE_BOUNDARY
00585   wend_token_frame[tn][wordhmmnum-1] = wordend_frame_src[startt];
00586   wend_token_gscore[tn][wordhmmnum-1] = wordend_gscore_src[startt];
00587   wordend_frame_dst[startt] = wend_token_frame[tn][0];
00588   wordend_gscore_dst[startt] = wend_token_gscore[tn][0];
00589 #endif
00590 #endif
00591 #endif /* ~MULTIPATH_VERSION */
00592   
00593   /* メインループ: startt から始まり 0 に向かって Viterbi 計算 */
00594   /* main loop: start from [startt], and compute Viterbi toward [0] */
00595   for(t=
00596 #ifdef MULTIPATH_VERSION
00597         startt
00598 #else
00599         startt-1
00600 #endif
00601         ; t>=0; t--) {
00602     
00603     /* wordtrellisのワークエリアをスワップ */
00604     /* swap workarea of wordtrellis */
00605     i = tn; tn = tl; tl = i;
00606 
00607     node_exist_p = FALSE;       /* TRUE if there is at least 1 survived node in this frame */
00608 
00609 #ifndef MULTIPATH_VERSION
00610     
00611     /* 端のノード [t][wordhmmnum-1]は,内部遷移 か g[]の高い方になる */
00612     /* the edge node [t][wordhmmnum-1] is either internal transitin or g[] */
00613     tmpscore = LOG_ZERO;
00614     for (ac=whmm->state[wordhmmnum-1].ac;ac;ac=ac->next) {
00615       if (tmpscore < wordtrellis[tl][ac->arc] + ac->a)
00616         j = ac->arc;
00617         tmpscore = wordtrellis[tl][ac->arc] + ac->a;
00618     }
00619     if (g[t] > tmpscore) {
00620       tmpmax = g[t];
00621 #ifdef GRAPHOUT
00622 #ifdef GRAPHOUT_PRECISE_BOUNDARY
00623       wend_token_frame[tn][wordhmmnum-1] = wordend_frame_src[t];
00624       wend_token_gscore[tn][wordhmmnum-1] = wordend_gscore_src[t];
00625 #endif
00626 #endif
00627     } else {
00628       tmpmax = tmpscore;
00629 #ifdef GRAPHOUT
00630 #ifdef GRAPHOUT_PRECISE_BOUNDARY
00631       wend_token_frame[tn][wordhmmnum-1] = wend_token_frame[tl][j];
00632       wend_token_gscore[tn][wordhmmnum-1] = wend_token_gscore[tl][j];
00633 #endif
00634 #endif
00635     }
00636 
00637     /* 端のノードのスコアエンベロープチェック: 一定幅外なら落とす */
00638     /* check if the edge node is within score envelope */
00639     if (
00640 #ifdef SCAN_BEAM
00641         tmpmax <= framemaxscore[t] - scan_beam_thres ||
00642 #endif
00643         tmpmax <= LOG_ZERO
00644         ) {
00645       wordtrellis[tn][wordhmmnum-1] = LOG_ZERO;
00646 #ifdef GRAPHOUT
00647 #ifdef GRAPHOUT_PRECISE_BOUNDARY
00648       wend_token_frame[tn][wordhmmnum-1] = -1;
00649       wend_token_gscore[tn][wordhmmnum-1] = LOG_ZERO;
00650 #endif
00651 #endif
00652     } else {
00653       node_exist_p = TRUE;
00654       wordtrellis[tn][wordhmmnum-1] = tmpmax + outprob(t, &(whmm->state[wordhmmnum-1]), param);
00655     }
00656 
00657 #endif /* ~MULTIPATH_VERSION */
00658 
00659     /* node[wordhmmnum-2..0]についてトレリスを展開 */
00660     /* expand trellis for node [t][wordhmmnum-2..0] */
00661     for(i=wordhmmnum-2;i>=0;i--) {
00662       
00663       /* 最尤パスと最尤スコア tmpmax を見つける */
00664       /* find most likely path and the max score 'tmpmax' */
00665       tmpmax = LOG_ZERO;
00666       for (ac=whmm->state[i].ac;ac;ac=ac->next) {
00667 #ifdef MULTIPATH_VERSION
00668         if (ac->arc == wordhmmnum-1) tmpscore = g[t];
00669         else if (t + 1 > startt) tmpscore = LOG_ZERO;
00670         else tmpscore = wordtrellis[tl][ac->arc];
00671         tmpscore += ac->a;
00672 #else
00673         tmpscore = wordtrellis[tl][ac->arc] + ac->a;
00674 #endif
00675         if (tmpmax < tmpscore) {
00676           tmpmax = tmpscore;
00677           j = ac->arc;
00678         }
00679       }
00680       
00681       /* スコアエンベロープチェック: 一定幅外なら落とす */
00682       /* check if score of this node is within the score envelope */
00683       if (
00684 #ifdef SCAN_BEAM
00685           tmpmax <= framemaxscore[t] - scan_beam_thres ||
00686 #endif
00687           tmpmax <= LOG_ZERO
00688           ) {
00689         /* invalid node */
00690         wordtrellis[tn][i] = LOG_ZERO;
00691 #ifdef GRAPHOUT
00692 #ifdef GRAPHOUT_PRECISE_BOUNDARY
00693         wend_token_frame[tn][i] = -1;
00694         wend_token_gscore[tn][i] = LOG_ZERO;
00695 #endif
00696 #endif
00697       } else {
00698         /* survived node */
00699         node_exist_p = TRUE;
00700 #ifdef MULTIPATH_VERSION
00701         wordtrellis[tn][i] = tmpmax;
00702         if (i > 0) wordtrellis[tn][i] += outprob(t, &(whmm->state[i]), param);
00703 #else
00704         wordtrellis[tn][i] = tmpmax + outprob(t, &(whmm->state[i]), param);
00705 #endif
00706 #ifdef GRAPHOUT
00707 #ifdef GRAPHOUT_PRECISE_BOUNDARY
00708 #ifdef MULTIPATH_VERSION
00709         if (j == wordhmmnum-1) {
00710           wend_token_frame[tn][i] = wordend_frame_src[t];
00711           wend_token_gscore[tn][i] = wordend_gscore_src[t];
00712         } else {
00713           wend_token_frame[tn][i] = wend_token_frame[tl][j];
00714           wend_token_gscore[tn][i] = wend_token_gscore[tl][j];
00715         }
00716 #else
00717         wend_token_frame[tn][i] = wend_token_frame[tl][j];
00718         wend_token_gscore[tn][i] = wend_token_gscore[tl][j];
00719 #endif
00720 #endif
00721 #endif
00722       }
00723     } /* end of node loop */
00724 
00725     /* 時間 t のViterbi計算終了.新たな前向きスコア g_new[t] をセット */
00726     /* Viterbi end for frame [t].  set the new forward score g_new[t] */
00727     g_new[t] = wordtrellis[tn][0];
00728 #ifdef GRAPHOUT
00729 #ifdef GRAPHOUT_PRECISE_BOUNDARY
00730     /* new wordend */
00731     wordend_frame_dst[t] = wend_token_frame[tn][0];
00732     wordend_gscore_dst[t] = wend_token_gscore[tn][0];
00733 #endif
00734 #endif
00735     /* 指定された least_frame より先まで t が進んでおり,かつこの t において
00736        スコアエンベロープによって生き残ったノードが一つも無かった場合,
00737        このフレームで計算を打ち切りそれ以上先([0..t-1])は計算しない */
00738     /* if frame 't' already reached the 'least_frame' and no node was
00739        survived in this frame (all nodes pruned by score envelope),
00740        terminate computation at this frame and do not computer further
00741        frame ([0..t-1]). */
00742     if (t < least_frame && (!node_exist_p)) {
00743       /* crear the rest scores */
00744       for (i=t-1;i>=0;i--) {
00745         g_new[i] = LOG_ZERO;
00746 #ifdef GRAPHOUT
00747 #ifdef GRAPHOUT_PRECISE_BOUNDARY
00748         wordend_frame_dst[i] = -1;
00749         wordend_gscore_dst[i] = LOG_ZERO;
00750 #endif
00751 #endif
00752       }
00753       /* terminate loop */
00754       break;
00755     }
00756     
00757   } /* end of time loop */
00758 
00759 #ifdef MULTIPATH_VERSION
00760   /* 前向きスコアの最終値を計算 (状態 0 から時間 0 への遷移) */
00761   /* compute the total forward score (transition from state 0 to frame 0 */
00762   if (t < 0) {                  /* computed till the end */
00763     tmpmax = LOG_ZERO;
00764     for(ac=whmm->state[0].ac;ac;ac=ac->next) {
00765       tmpscore = wordtrellis[tn][ac->arc] + ac->a;
00766       if (tmpmax < tmpscore) tmpmax = tmpscore;
00767     }
00768     *final_g = tmpmax;
00769   } else {
00770     *final_g = LOG_ZERO;
00771   }
00772 #endif
00773 
00774   /* free work area */
00775   free_hmm(whmm);
00776 }
00777 
00797 static void
00798 do_viterbi_next_word(NODE *now, NODE *new, HMM_Logical *lastphone
00799 #ifdef MULTIPATH_VERSION
00800                      , boolean sp
00801 #endif
00802                      , HTK_Param *param)
00803 {
00804   int t, n;
00805 #ifndef MULTIPATH_VERSION
00806   LOGPROB a_value;
00807 
00808   /* もし展開元仮説の最後の単語の音素長が 1 であれば,その音素は
00809      直前の scan_word で計算されていない.この場合, now->g[] に以前の
00810      初期値が格納されている.
00811      もし音素長が1以上であれば,now->g[] はその手前まで計算した状態
00812      のスコアが入っているので,now->g[t] から初期値を設定する必要がある */
00813   /* If the length of last word is 1, it means the last phone was not
00814      scanned in the last call of scan_word().  In this case, now->g[]
00815      keeps the previous initial value, so start viterbi with the old scores.
00816      If the length is more than 1, the now->g[] keeps the values of the
00817      scan result till the previous phone, so make initial value
00818      considering last transition probability. */
00819   if (winfo->wlen[now->seq[now->seqnum-1]] > 1) {
00820     n = hmm_logical_state_num(lastphone);
00821     a_value = (hmm_logical_trans(lastphone))->a[n-2][n-1];
00822     for(t=0; t<peseqlen-1; t++) g[t] = now->g[t+1] + a_value;
00823     g[peseqlen-1] = LOG_ZERO;
00824   } else {
00825     for(t=0; t<peseqlen; t++) g[t] = now->g[t];
00826   }
00827   
00828 # else /* MULTIPATH_VERSION */
00829   
00830   for(t=0; t<peseqlen; t++) g[t] = now->g[t];
00831   phmmseq[0] = lastphone;
00832   has_sp[0] = sp;
00833   
00834 #endif /* MULTIPATH_VERSION */
00835   
00836   do_viterbi(g, new->g
00837 #ifdef MULTIPATH_VERSION
00838              , phmmseq, has_sp, 1, param
00839 #else
00840              , &lastphone, 1, param
00841 #endif
00842              , peseqlen, now->estimated_next_t
00843 #ifdef MULTIPATH_VERSION
00844              , &(new->final_g)
00845 #endif
00846 #ifdef GRAPHOUT
00847 #ifdef GRAPHOUT_PRECISE_BOUNDARY
00848              , now->wordend_frame, new->wordend_frame
00849              , now->wordend_gscore, new->wordend_gscore
00850 #endif
00851 #endif
00852              );
00853 
00854 #ifndef MULTIPATH_VERSION
00855 #ifdef GRAPHOUT
00856 #ifdef GRAPHOUT_PRECISE_BOUNDARY
00857   /* 次回の next_word 用に境界情報を調整 */
00858   /* proceed word boundary for one step for next_word */
00859   new->wordend_frame[peseqlen-1] = new->wordend_frame[0];
00860   new->wordend_gscore[peseqlen-1] = new->wordend_gscore[0];
00861   for (t=0;t<peseqlen-1;t++) {
00862     new->wordend_frame[t] = new->wordend_frame[t+1];
00863     new->wordend_gscore[t] = new->wordend_gscore[t+1];
00864   }
00865 #endif
00866 #endif
00867 #endif
00868 }
00869 
00885 void
00886 scan_word(NODE *now, HTK_Param *param)
00887 {
00888   int   i,t;
00889   WORD_ID word;
00890   int phmmlen;
00891 #ifdef MULTIPATH_VERSION
00892   boolean tail_ph_sp_attached;
00893 #endif
00894   
00895 #ifdef GRAPHOUT
00896 #ifndef GRAPHOUT_PRECISE_BOUNDARY
00897   if (ccd_flag) {
00898     now->tail_g_score = now->g[now->bestt];
00899   }
00900 #endif
00901 #endif
00902 
00903   /* ----------------------- prepare phoneme sequence ------------------ */
00904   /* triphoneなら先頭の1音素はここでは対象外(あとでnext_wordでやる) */
00905   /*             末尾の1音素はコンテキストにしたがって置換 */
00906   /* with triphone, modify the tail phone of the last word according to the
00907      previous word, and do not compute the head phone here (that will be
00908      computed later in next_word() */
00909   word = now->seq[now->seqnum-1];
00910   
00911 #ifdef TCD
00912     j_printf("w=");
00913     for(i=0;i<winfo->wlen[word];i++) {
00914       j_printf(" %s",(winfo->wseq[word][i])->name);
00915     }
00916     if (ccd_flag) {
00917       if (now->last_ph != NULL) {
00918         j_printf(" | %s", (now->last_ph)->name);
00919       }
00920     }
00921     j_printf("\n");
00922 #endif /* TCD */
00923     
00924   if (ccd_flag) {
00925     
00926     /* the tail triphone of the last word varies by context */
00927     if (now->last_ph != NULL) {
00928       tailph = get_right_context_HMM(winfo->wseq[word][winfo->wlen[word]-1], now->last_ph->name, hmminfo);
00929       if (tailph == NULL) {
00930         /* fallback to the original bi/mono-phone */
00931         /* error if the original is pseudo phone (not explicitly defined
00932            in hmmdefs/hmmlist) */
00933         /* exception: word with 1 phone (triphone may exist in the next expansion */
00934         if (winfo->wlen[word] > 1 && winfo->wseq[word][winfo->wlen[word]-1]->is_pseudo){
00935           error_missing_right_triphone(winfo->wseq[word][winfo->wlen[word]-1], now->last_ph->name);
00936         }
00937 
00938         tailph = winfo->wseq[word][winfo->wlen[word]-1];
00939       }
00940     } else {
00941       tailph = winfo->wseq[word][winfo->wlen[word]-1];
00942     }
00943     /* 長さ1の単語は次のnextwordでさらに変化するのでここではscanしない */
00944     /* do not scan word if the length is 1, as it further varies in the
00945        following next_word() */
00946     if (winfo->wlen[word] == 1) {
00947       now->last_ph = tailph;
00948 #ifdef MULTIPATH_VERSION
00949       now->last_ph_sp_attached = TRUE;
00950 #endif
00951 #ifdef GRAPHOUT
00952 #ifdef GRAPHOUT_PRECISE_BOUNDARY
00953       /* 単語境界伝搬情報を初期化 */
00954       /* initialize word boundary propagation info */
00955       for (t=0;t<peseqlen;t++) {
00956         now->wordend_frame[t] = t;
00957         now->wordend_gscore[t] = now->g[t];
00958       }
00959 #endif
00960 #endif
00961 #ifdef TCD
00962       j_printf("suspended as %s\n", (now->last_ph)->name);
00963 #endif
00964       return;
00965     }
00966 
00967     /* scan範囲の音素列を準備 */
00968     /* prepare HMM of the scan range */
00969     phmmlen = winfo->wlen[word] - 1;
00970     if (phmmlen > phmmlen_max) {
00971       j_error("short of phmmlen\n");
00972     }
00973     for (i=0;i<phmmlen-1;i++) {
00974       phmmseq[i] = winfo->wseq[word][i+1];
00975 #ifdef MULTIPATH_VERSION
00976       has_sp[i] = FALSE;
00977 #endif
00978     }
00979     phmmseq[phmmlen-1] = tailph;
00980 #ifdef MULTIPATH_VERSION
00981     has_sp[phmmlen-1] = (enable_iwsp) ? TRUE : FALSE;
00982 #endif
00983   } else {
00984     phmmlen = winfo->wlen[word];
00985 #ifdef MULTIPATH_VERSION
00986     for (i=0;i<phmmlen;i++) {
00987       phmmseq[i] = winfo->wseq[word][i];
00988       has_sp[i] = FALSE;
00989     }
00990     if (enable_iwsp) has_sp[phmmlen-1] = TRUE;
00991 #else
00992     for (i=0;i<phmmlen;i++) phmmseq[i] = winfo->wseq[word][i];
00993 #endif
00994   }
00995 
00996   /* 元のg[]をいったん待避しておく */
00997   /* temporally keeps the original g[] */
00998   for (t=0;t<peseqlen;t++) g[t] = now->g[t];
00999 
01000 #ifdef GRAPHOUT
01001 #ifdef GRAPHOUT_PRECISE_BOUNDARY
01002   /* 単語境界伝搬情報を初期化 */
01003   /* initialize word boundary propagation info */
01004   for (t=0;t<peseqlen;t++) {
01005     wef[t] = t;
01006     wes[t] = now->g[t];
01007   }
01008 #endif
01009 #endif
01010 
01011   /* viterbiを実行して g[] から now->g[] を更新する */
01012   /* do viterbi computation for phmmseq from g[] to now->g[] */
01013   do_viterbi(g, now->g, phmmseq
01014 #ifdef MULTIPATH_VERSION
01015              , has_sp
01016 #endif
01017              , phmmlen, param, peseqlen, now->estimated_next_t
01018 #ifdef MULTIPATH_VERSION
01019              , &(now->final_g)
01020 #endif
01021 #ifdef GRAPHOUT
01022 #ifdef GRAPHOUT_PRECISE_BOUNDARY
01023              /* 単語境界情報 we[] から now->wordend_frame[] を更新する */
01024              /* propagate word boundary info from we[] to now->wordend_frame[] */
01025              , wef, now->wordend_frame
01026              , wes, now->wordend_gscore
01027 #endif
01028 #endif
01029              );
01030 #ifndef MULTIPATH_VERSION
01031 #ifdef GRAPHOUT
01032 #ifdef GRAPHOUT_PRECISE_BOUNDARY
01033   /* 次回の next_word 用に境界情報を調整 */
01034   /* proceed word boundary for one step for next_word */
01035   now->wordend_frame[peseqlen-1] = now->wordend_frame[0];
01036   now->wordend_gscore[peseqlen-1] = now->wordend_gscore[0];
01037   for (t=0;t<peseqlen-1;t++) {
01038     now->wordend_frame[t] = now->wordend_frame[t+1];
01039     now->wordend_gscore[t] = now->wordend_gscore[t+1];
01040   }
01041 #endif
01042 #endif
01043 #endif
01044 
01045   if (ccd_flag) {
01046     /* 次回のために now->last_ph を更新 */
01047     /* update 'now->last_ph' for future scan_word() */
01048     now->last_ph = winfo->wseq[word][0];
01049 #ifdef MULTIPATH_VERSION
01050     now->last_ph_sp_attached = FALSE; /* wlen > 1 here */
01051 #endif
01052 #ifdef TCD
01053     j_printf("last_ph = %s\n", (now->last_ph)->name);
01054 #endif
01055   }
01056 }
01057 
01058 
01059 /**************************************************************************/
01060 /*** 新仮説の展開とヒューリスティックを繋いだ全体スコアを計算           ***/
01061 /*** Expand new hypothesis and compute the total score (with heuristic) ***/
01062 /**************************************************************************/
01063 
01064 static HMM_Logical *lastphone, *newphone;
01065 static LOGPROB *g_src;
01066 
01090 void
01091 next_word(NODE *now, NODE *new, NEXTWORD *nword, HTK_Param *param, BACKTRELLIS *backtrellis)
01092 {
01093   int   t;
01094   int lastword;
01095   int   i;
01096   LOGPROB a_value;
01097   LOGPROB tmpp;
01098   int   startt;
01099   int word;
01100   TRELLIS_ATOM *tre;
01101   LOGPROB totalscore;
01102 #ifdef GRAPHOUT
01103 #ifdef GRAPHOUT_PRECISE_BOUNDARY
01104   short *wendf;
01105   LOGPROB *wends;
01106 #endif
01107 #endif
01108 
01109 
01110   word = nword->id;
01111   lastword = now->seq[now->seqnum-1];
01112 
01113   /* lastphone (直前単語の先頭音素) を準備 */
01114   /* prepare lastphone (head phone of previous word) */
01115   if (ccd_flag) {
01116     /* 最終音素 triphone を接続単語に会わせて変化 */
01117     /* modify triphone of last phone according to the next word */
01118     lastphone = get_left_context_HMM(now->last_ph, winfo->wseq[word][winfo->wlen[word]-1]->name, hmminfo);
01119     if (lastphone == NULL) {
01120       /* fallback to the original bi/mono-phone */
01121       /* error if the original is pseudo phone (not explicitly defined
01122          in hmmdefs/hmmlist) */
01123       /* exception: word with 1 phone (triphone may exist in the next expansion */
01124       if (now->last_ph->is_pseudo){
01125         error_missing_left_triphone(now->last_ph, winfo->wseq[word][winfo->wlen[word]-1]->name);
01126       }
01127       lastphone = now->last_ph;
01128     }
01129   }
01130 
01131   /* newphone (接続単語の末尾音素) を準備 */
01132   /* prepare newphone (tail phone of next word) */
01133   if (ccd_flag) {
01134     newphone = get_right_context_HMM(winfo->wseq[word][winfo->wlen[word]-1], now->last_ph->name, hmminfo);
01135     if (newphone == NULL) {
01136       /* fallback to the original bi/mono-phone */
01137       /* error if the original is pseudo phone (not explicitly defined
01138          in hmmdefs/hmmlist) */
01139       /* exception: word with 1 phone (triphone may exist in the next expansion */
01140       if (winfo->wlen[word] > 1 && winfo->wseq[word][winfo->wlen[word]-1]->is_pseudo){
01141         error_missing_right_triphone(winfo->wseq[word][winfo->wlen[word]-1], now->last_ph->name);
01142       }
01143       newphone = winfo->wseq[word][winfo->wlen[word]-1];
01144     }
01145   } else {
01146     newphone = winfo->wseq[word][winfo->wlen[word]-1];
01147   }
01148   
01149   /* 単語並び、DFA状態番号、言語スコアを new へ継承・更新 */
01150   /* inherit and update word sequence, DFA state and total LM score to 'new' */
01151   new->score = LOG_ZERO;
01152   for (i=0;i< now->seqnum;i++){
01153     new->seq[i] = now->seq[i];
01154 #ifdef CM_SEARCH
01155 #ifdef CM_MULTIPLE_ALPHA
01156     memcpy(new->cmscore[i], now->cmscore[i], sizeof(LOGPROB) * cm_alpha_num);
01157 #else
01158     new->cmscore[i] = now->cmscore[i];
01159 #endif
01160 #endif /* CM_SEARCH */
01161   }
01162   new->seq[i] = word;
01163   new->seqnum = now->seqnum+1;
01164 #ifdef USE_DFA
01165   new->state = nword->next_state;
01166 #endif
01167 #ifdef USE_NGRAM
01168   new->totallscore = now->totallscore + nword->lscore;
01169 #endif  
01170   if (ccd_flag) {
01171     /* 次仮説の履歴情報として保存 */
01172     /* keep the lastphone for next scan_word() */
01173     new->last_ph = lastphone;
01174 #ifdef MULTIPATH_VERSION
01175     new->last_ph_sp_attached = now->last_ph_sp_attached;
01176 #endif
01177   }
01178 
01179   if (ccd_flag) {
01180     /* 最後の1音素(lastphone)分をscanし,更新したスコアを new に保存 */
01181     /* scan the lastphone and set the updated score to new->g[] */
01182     do_viterbi_next_word(now, new, lastphone
01183 #ifdef MULTIPATH_VERSION
01184                          , now->last_ph_sp_attached
01185 #endif
01186                          , param);
01187     g_src = new->g;
01188   } else {
01189     g_src = now->g;
01190 #ifdef GRAPHOUT
01191 #ifdef GRAPHOUT_PRECISE_BOUNDARY
01192     memcpy(new->wordend_frame, now->wordend_frame, sizeof(short)*peseqlen);
01193     memcpy(new->wordend_gscore, now->wordend_gscore, sizeof(LOGPROB)*peseqlen);
01194 #endif
01195 #endif
01196   }
01197       
01198   /* 次回の scan_word に備えて new->g[] を変更しておく */
01199   /* prepare new->g[] for next scan_word() */
01200 #ifdef MULTIPATH_VERSION
01201   startt = peseqlen-1;
01202 #else
01203   startt = peseqlen-2;
01204 #endif
01205   i = hmm_logical_state_num(newphone);
01206   a_value = (hmm_logical_trans(newphone))->a[i-2][i-1];
01207   for(t=0; t <= startt; t++) {
01208     new->g[t] =
01209 #ifdef MULTIPATH_VERSION
01210       g_src[t]
01211 #else
01212       g_src[t+1] + a_value
01213 #endif
01214 #ifdef USE_NGRAM
01215       + nword->lscore
01216 #else
01217       + penalty2
01218 #endif
01219       ;
01220   }
01221 
01222   /***************************************************************************/
01223   /* 前向き(第2パス),後ろ向き(第1パス)トレリスを接続し最尤接続点を見つける */
01224   /* connect forward/backward trellis to look for the best connection time   */
01225   /***************************************************************************/
01226   /*-----------------------------------------------------------------*/
01227   /* 単語トレリスを探して, 次単語の最尤接続点を発見する */
01228   /* determine the best connection time of the new word, seeking the word
01229      trellis */
01230   /*-----------------------------------------------------------------*/
01231 
01232 #ifdef USE_DFA
01233   if (!looktrellis_flag) {
01234     /* すべてのフレームにわたって最尤を探す */
01235     /* search for best trellis word throughout all frame */
01236     for(t = startt; t >= 0; t--) {
01237       tre = bt_binsearch_atom(backtrellis, t, (WORD_ID) word);
01238       if (tre == NULL) continue;
01239 #ifdef MULTIPATH_VERSION
01240       totalscore = new->g[t] + tre->backscore;
01241 #else
01242       if (newphone->is_pseudo) {
01243         tmpp = outprob_cd(t, &(newphone->body.pseudo->stateset[newphone->body.pseudo->state_num-2]), param);
01244       } else {
01245         tmpp = outprob_state(t, newphone->body.defined->s[newphone->body.defined->state_num-2], param);
01246       }
01247       totalscore = new->g[t] + tre->backscore + tmpp;
01248 #endif
01249       if (new->score < totalscore) {
01250         new->score = totalscore;
01251         new->bestt = t;
01252         new->estimated_next_t = tre->begintime - 1;
01253         new->tre = tre;
01254       }
01255     }
01256   } else {
01257 #endif /* USE_DFA */
01258     
01259   /* 最後に参照したTRELLIS_ATOMの終端時間の前後 */
01260   /* newの推定時間は,上記で採用したTRELLIS_ATOMの始端時間 */
01261 
01262   /* この展開単語のトレリス上の終端時間の前後のみスキャンする
01263      前後に連続して存在するフレームについてのみ計算 */
01264   /* search for best trellis word only around the estimated time */
01265   /* 1. search forward */
01266   for(t = (nword->tre)->endtime; t >= 0; t--) {
01267     tre = bt_binsearch_atom(backtrellis, t, (WORD_ID) word);
01268     if (tre == NULL) break;     /* go to 2 if the trellis word disappear */
01269 #ifdef MULTIPATH_VERSION
01270     totalscore = new->g[t] + tre->backscore;
01271 #else
01272     if (newphone->is_pseudo) {
01273       tmpp = outprob_cd(t, &(newphone->body.pseudo->stateset[newphone->body.pseudo->state_num-2]), param);
01274     } else {
01275       tmpp = outprob_state(t, newphone->body.defined->s[newphone->body.defined->state_num-2], param);
01276     }
01277     totalscore = new->g[t] + tre->backscore + tmpp;
01278 #endif
01279     if (new->score < totalscore) {
01280       new->score = totalscore;
01281       new->bestt = t;
01282       new->estimated_next_t = tre->begintime - 1;
01283       new->tre = tre;
01284     }
01285   }
01286   /* 2. search bckward */
01287   for(t = (nword->tre)->endtime + 1; t <= startt; t++) {
01288     tre = bt_binsearch_atom(backtrellis, t, (WORD_ID) word);
01289     if (tre == NULL) break;     /* end if the trellis word disapper */
01290 #ifdef MULTIPATH_VERSION
01291     totalscore = new->g[t] + tre->backscore;
01292 #else
01293     if (newphone->is_pseudo) {
01294       tmpp = outprob_cd(t, &(newphone->body.pseudo->stateset[newphone->body.pseudo->state_num-2]), param);
01295     } else {
01296       tmpp = outprob_state(t, newphone->body.defined->s[newphone->body.defined->state_num-2], param);
01297     }
01298     totalscore = new->g[t] + tre->backscore + tmpp;
01299 #endif
01300     if (new->score < totalscore) {
01301       new->score = totalscore;
01302       new->bestt = t;
01303       new->estimated_next_t = tre->begintime - 1;
01304       new->tre = tre;
01305     }
01306   }
01307 
01308 #ifdef USE_DFA
01309   }
01310 #endif
01311 
01312 #ifdef USE_NGRAM
01313   /* set current LM score */
01314   new->lscore = nword->lscore;
01315 #endif
01316   
01317 }
01318 
01319 
01320 /**********************************************************************/
01321 /********** 初期仮説の生成                 ****************************/
01322 /********** Generate an initial hypothesis ****************************/
01323 /**********************************************************************/
01324 
01344 void
01345 start_word(NODE *new, NEXTWORD *nword, HTK_Param *param, BACKTRELLIS *backtrellis)
01346 {
01347   HMM_Logical *newphone;
01348   WORD_ID word;
01349   TRELLIS_ATOM *tre = NULL;
01350   LOGPROB tmpp;
01351   int t;
01352 
01353   /* initialize data */
01354   word = nword->id;
01355   new->score = LOG_ZERO;
01356   new->seqnum = 1;
01357   new->seq[0] = word;
01358 
01359 #ifdef USE_DFA
01360   new->state = nword->next_state;
01361 #endif
01362 #ifdef USE_NGRAM
01363   new->totallscore = nword->lscore;
01364 #endif  
01365 
01366 #ifdef USE_NGRAM
01367   /* set current LM score */
01368   new->lscore = nword->lscore;
01369 #endif
01370 
01371   /* cross-word triphone need not be handled on startup */
01372   newphone = winfo->wseq[word][winfo->wlen[word]-1];
01373   if (ccd_flag) {
01374     new->last_ph = NULL;
01375   }
01376   
01377 #ifdef USE_NGRAM
01378   new->g[peseqlen-1] = nword->lscore;
01379 #else  /* USE_DFA */
01380   new->g[peseqlen-1] = 0;
01381 #endif
01382   
01383   for (t=peseqlen-1; t>=0; t--) {
01384     tre = bt_binsearch_atom(backtrellis, t, word);
01385     if (tre != NULL) {
01386 #ifdef GRAPHOUT
01387       new->bestt = peseqlen-1;
01388 #else
01389       new->bestt = t;
01390 #endif
01391 #ifdef MULTIPATH_VERSION
01392       new->score = new->g[peseqlen-1] + tre->backscore;
01393 #else
01394       if (newphone->is_pseudo) {
01395         tmpp = outprob_cd(peseqlen-1, &(newphone->body.pseudo->stateset[newphone->body.pseudo->state_num-2]), param);
01396       } else {
01397         tmpp = outprob_state(peseqlen-1, newphone->body.defined->s[newphone->body.defined->state_num-2], param);
01398       }
01399       new->score = new->g[peseqlen-1] + tre->backscore + tmpp;
01400 #endif
01401       new->estimated_next_t = tre->begintime - 1;
01402       new->tre = tre;
01403       break;
01404     }
01405   }
01406   if (tre == NULL) {            /* no word in backtrellis */
01407     new->score = LOG_ZERO;
01408   }
01409 }
01410 
01428 void
01429 last_next_word(NODE *now, NODE *new, HTK_Param *param)
01430 {
01431   cpy_node(new, now);
01432   if (ccd_flag) {
01433     /* 最終音素分を viterbi して最終スコアを設定 */
01434     /* scan the last phone and update the final score */
01435     do_viterbi_next_word(now, new, now->last_ph
01436 #ifdef MULTIPATH_VERSION
01437                          , now->last_ph_sp_attached
01438 #endif
01439                          , param);
01440 #ifdef MULTIPATH_VERSION
01441     new->score = new->final_g;
01442 #else
01443     new->score = new->g[0];
01444 #endif
01445   } else {
01446 #ifdef MULTIPATH_VERSION
01447     new->score = now->final_g;
01448 #else
01449     new->score = now->g[0];
01450 #endif
01451 #ifdef GRAPHOUT
01452 #ifdef GRAPHOUT_PRECISE_BOUNDARY
01453     /* last boundary has moved to [peseqlen-1] in last scan_word() */
01454     memcpy(new->wordend_frame, now->wordend_frame, sizeof(short)*peseqlen);
01455     memcpy(new->wordend_gscore, now->wordend_gscore, sizeof(LOGPROB)*peseqlen);
01456 #endif
01457 #endif
01458   }
01459 }
01460 
01461 #endif /* PASS2_STRICT_IWCD */

Generated on Tue Dec 26 16:16:33 2006 for Julius by  doxygen 1.5.0