00001 
00046 
00047 
00048 
00049 
00050 
00051 
00052 
00053 #include <julius.h>
00054 
00055 
00056 static NODE *get_best_from_stack(NODE **start, int *stacknum);
00057 static int put_to_stack(NODE *new, NODE **start, NODE **bottom, int *stacknum, int stacksize);
00058 static void put_all_in_stack(NODE **start, int *stacknum);
00059 static void free_all_nodes(NODE *node);
00060 static void put_hypo_woutput(NODE *hypo);
00061 static void put_hypo_wname(NODE *hypo);
00062 
00063 
00064 
00065 
00066 
00067 
00086 static NEXTWORD **
00087 nw_malloc(int *maxlen, NEXTWORD **root)
00088 {
00089   NEXTWORD *nwtmp;
00090   NEXTWORD **nw;
00091   int i;
00092   int max;
00093 
00094   
00095   max = winfo->num;
00096   
00097   nw = (NEXTWORD **)mymalloc(max * sizeof(NEXTWORD *));
00098   nwtmp = (NEXTWORD *)mymalloc(max * sizeof(NEXTWORD));
00099   for (i=0;i<max; i++) {
00100     nw[i] = &(nwtmp[i]);
00101   }
00102   *maxlen = max;
00103   *root = nwtmp;
00104   return nw;
00105 }
00106 
00122 static void
00123 nw_free(NEXTWORD **nw, NEXTWORD *root)
00124 {
00125   free(root);
00126   free(nw);
00127 }
00128 
00129 #ifdef USE_DFA
00130 
00166 static NEXTWORD **
00167 nw_expand(NEXTWORD **nwold, int *maxlen, NEXTWORD **root)
00168 {
00169   NEXTWORD *nwtmp;
00170   NEXTWORD **nw;
00171   int i;
00172   int nwmaxlen;
00173 
00174   nwmaxlen = *maxlen + winfo->num;
00175 
00176   nwtmp = (NEXTWORD *)myrealloc(*root, nwmaxlen * sizeof(NEXTWORD));
00177   nw = (NEXTWORD **)myrealloc(nwold, nwmaxlen * sizeof(NEXTWORD *));
00178   nw[0] = nwtmp;
00179   for (i=1;i<nwmaxlen; i++) {
00180     nw[i] = &(nwtmp[i]);
00181   }
00182   *maxlen = nwmaxlen;
00183   *root = nwtmp;
00184   return nw;
00185 }
00186 #endif
00187 
00188 
00189 
00190 
00191 
00192 
00193 
00194 
00195 
00196 
00216 static NODE *
00217 get_best_from_stack(NODE **start, int *stacknum)
00218 {
00219   NODE *tmp;
00220 
00221   
00222   tmp=(*start);
00223   if ((*start)!=NULL) {         
00224     (*start)=(*start)->next;
00225     if ((*start)!=NULL) (*start)->prev=NULL;
00226     (*stacknum)--;
00227     return(tmp);
00228   }
00229   else {
00230     return(NULL);
00231   }
00232 }
00233 
00260 static int
00261 can_put_to_stack(NODE *new, NODE **bottom, int *stacknum, int stacksize)
00262 {
00263   
00264   if ((*stacknum + 1) > stacksize && (*bottom)->score >= new->score) {
00265     
00266     return(-1);
00267   }
00268   return(0);
00269 }
00270 
00299 static int
00300 put_to_stack(NODE *new, NODE **start, NODE **bottom, int *stacknum, int stacksize)
00301 {
00302   NODE *tmp;
00303 
00304   
00305   (*stacknum)++;
00306 
00307   
00308   if ((*stacknum)>stacksize) {
00309     
00310     (*stacknum)--;
00311     if ((*bottom)->score < new->score) {
00312       
00313       tmp=(*bottom);
00314       (*bottom)->prev->next=NULL;
00315       (*bottom)=(*bottom)->prev;
00316       free_node(tmp);
00317     } else {
00318       
00319       free_node(new);
00320       return(-1);
00321     }
00322   }
00323 
00324   
00325   if ((*start)==NULL) {         
00326     
00327     (*start)=new;
00328     (*bottom)=new;
00329     new->next=NULL;
00330     new->prev=NULL;
00331     return(0);
00332   }
00333   if ((*start)->score <= new->score) {
00334     
00335     new->next = (*start);
00336     new->next->prev = new;
00337     (*start)=new;
00338     new->prev=NULL;
00339     return(0);
00340   }
00341   if ((*bottom)->score >= new->score) {
00342     
00343     new->prev = (*bottom);
00344     new->prev->next = new;
00345     (*bottom)=new;
00346     new->next=NULL;
00347     return(0);
00348   }
00349     
00350   
00351   if (((*start)->score + (*bottom)->score) / 2 > new->score) {
00352     
00353     tmp=(*bottom);
00354     while(tmp->score < new->score) tmp=tmp->prev;
00355     new->prev=tmp;
00356     new->next=tmp->next;
00357     tmp->next->prev=new;
00358     tmp->next=new;
00359   } else {
00360     
00361     tmp=(*start);
00362     while(tmp->score > new->score) tmp=tmp->next;
00363     new->next=tmp;
00364     new->prev=tmp->prev;
00365     tmp->prev->next=new;
00366     tmp->prev=new;
00367   }
00368   return(0);
00369 }
00370 
00385 static void
00386 put_all_in_stack(NODE **start, int *stacknum)
00387 {
00388   NODE *ntmp;
00389   
00390   j_printf("stack remains::\n");
00391   while ((ntmp = get_best_from_stack(start, stacknum)) != NULL) {
00392     j_printf("%3d: s=%f ",*stacknum, ntmp->score);
00393     put_hypo_woutput(ntmp);
00394     free_node(ntmp);
00395   }
00396 }
00397 
00410 static void
00411 free_all_nodes(NODE *start)
00412 {
00413   NODE *tmp;
00414   NODE *next;
00415 
00416   tmp=start;
00417   while(tmp) {
00418     next=tmp->next;
00419     free_node(tmp);
00420     tmp=next;
00421   }
00422 }
00423 
00424 
00425 #ifdef CONFIDENCE_MEASURE
00426 
00427 
00428 
00429 
00430 
00431 
00432 
00433 #ifdef CM_MULTIPLE_ALPHA
00434 static LOGPROB *cmsumlist = NULL; 
00435 #endif
00436 
00437 
00438 #ifdef CM_SEARCH
00439 
00440 
00441 
00442 
00443 
00444 
00445 static LOGPROB cm_tmpbestscore; 
00446 #ifndef CM_MULTIPLE_ALPHA
00447 static LOGPROB cm_tmpsum;       
00448 #endif
00449 
00450 
00451 static int l_stacksize;         
00452 static int l_stacknum;          
00453 static NODE *l_start = NULL;    
00454 static NODE *l_bottom = NULL;   
00455 
00469 static void
00470 cm_init(WORD_INFO *winfo)
00471 {
00472   l_stacksize = winfo->num;
00473   l_start = l_bottom = NULL;
00474   l_stacknum = 0;
00475 #ifdef CM_MULTIPLE_ALPHA
00476   if (cmsumlist == NULL) {      
00477     cmsumlist = (LOGPROB *)mymalloc(sizeof(LOGPROB) * cm_alpha_num);
00478   }
00479 #endif    
00480 }
00481 
00494 static void
00495 cm_store(NODE *new)
00496 {
00497   
00498   put_to_stack(new, &l_start, &l_bottom, &l_stacknum, l_stacksize);
00499 }
00500 
00512 static void
00513 cm_sum_score()
00514 {
00515   NODE *node;
00516   LOGPROB sum;
00517 #ifdef CM_MULTIPLE_ALPHA
00518   LOGPROB cm_alpha;
00519   int j;
00520 #endif
00521 
00522   if (l_start == NULL) return;  
00523   cm_tmpbestscore = l_start->score; 
00524 
00525 #ifdef CM_MULTIPLE_ALPHA
00526   for (j = 0, cm_alpha = cm_alpha_bgn; cm_alpha <= cm_alpha_end; cm_alpha += cm_alpha_step) {
00527 #endif
00528     sum = 0.0;
00529     for(node = l_start; node; node = node->next) {
00530       sum += pow(10, cm_alpha * (node->score - cm_tmpbestscore));
00531     }
00532 #ifdef CM_MULTIPLE_ALPHA
00533     cmsumlist[j++] = sum;       
00534 #else
00535     cm_tmpsum = sum;            
00536 #endif
00537 
00538 #ifdef CM_MULTIPLE_ALPHA
00539   }
00540 #endif
00541 }
00542 
00543 
00558 static void
00559 cm_set_score(NODE *node)
00560 {
00561 #ifdef CM_MULTIPLE_ALPHA
00562   int j;
00563   LOGPROB cm_alpha;
00564 #endif
00565 
00566 #ifdef CM_MULTIPLE_ALPHA
00567   for (j = 0, cm_alpha = cm_alpha_bgn; cm_alpha <= cm_alpha_end; cm_alpha += cm_alpha_step) {
00568     node->cmscore[node->seqnum-1][j] = pow(10, cm_alpha * (node->score - cm_tmpbestscore)) / cmsumlist[j];
00569     j++;
00570   }
00571 #else
00572   node->cmscore[node->seqnum-1] = pow(10, cm_alpha * (node->score - cm_tmpbestscore)) / cm_tmpsum;
00573 #endif
00574 }
00575 
00588 static NODE *
00589 cm_get_node()
00590 {
00591   return(get_best_from_stack(&l_start, &l_stacknum));
00592 }
00593 
00594 #endif 
00595 
00596 #ifdef CM_NBEST
00597 
00598 
00599 
00600 
00601 
00602 
00603 
00604 static LOGPROB *sentcm = NULL;  
00605 static LOGPROB *wordcm = NULL;  
00606 static int sentnum;             
00607 
00623 static void
00624 cm_compute_from_nbest(NODE *start, int stacknum)
00625 {
00626   NODE *node;
00627   LOGPROB bestscore, sum, s;
00628   WORD_ID w;
00629   int i;
00630 #ifdef CM_MULTIPLE_ALPHA
00631   LOGPROB cm_alpha;
00632   int j;
00633 #endif
00634 
00635   
00636 #ifdef CM_MULTIPLE_ALPHA
00637   if (cmsumlist == NULL) {
00638     cmsumlist = (LOGPROB *)mymalloc(sizeof(LOGPROB) * cm_alpha_num);
00639   }
00640 #endif    
00641   if (sentcm == NULL) {         
00642     sentcm = (LOGPROB *)mymalloc(sizeof(LOGPROB)*stacknum);
00643     sentnum = stacknum;
00644   } else if (sentnum < stacknum) { 
00645     sentcm = (LOGPROB *)myrealloc(sentcm, sizeof(LOGPROB)*stacknum);
00646     sentnum = stacknum;
00647   }
00648   if (wordcm == NULL) {
00649     wordcm = (LOGPROB *)mymalloc(sizeof(LOGPROB) * winfo->num);
00650   }
00651 #ifdef CM_MULTIPLE_ALPHA
00652   for (j = 0, cm_alpha = cm_alpha_bgn; cm_alpha <= cm_alpha_end; cm_alpha += cm_alpha_step) {
00653 #endif
00654     
00655     for(w=0;w<winfo->num;w++) {
00656       wordcm[w] = 0.0;
00657     }
00658     
00659     bestscore = start->score;
00660     
00661     sum = 0.0;
00662     for (node = start; node != NULL; node = node->next) {
00663       sum += pow(10, cm_alpha * (node->score - bestscore));
00664     }
00665     
00666     i = 0;
00667     for (node = start; node != NULL; node = node->next) {
00668       sentcm[i] = pow(10, cm_alpha * (node->score - bestscore)) / sum;
00669       i++;
00670     }
00671     
00672     i = 0;
00673     for (node = start; node != NULL; node = node->next) {
00674       for (w=0;w<node->seqnum;w++) {
00675         wordcm[node->seq[w]] += sentcm[i];
00676       }
00677       i++;
00678     }
00679     
00680     for (node = start; node != NULL; node = node->next) {
00681       for (w=0;w<node->seqnum;w++) {
00682 #ifdef CM_MULTIPLE_ALPHA
00683         node->cmscore[w][j] = wordcm[node->seq[w]];
00684 #else   
00685         node->cmscore[w] = wordcm[node->seq[w]];
00686 #endif
00687       }
00688     }
00689 #ifdef CM_MULTIPLE_ALPHA
00690     j++;
00691   }
00692 #endif
00693 }
00694 
00695 #endif 
00696 
00697 #endif 
00698 
00699 
00700 
00701 
00702 
00703 
00704 
00705 
00706 
00707 
00708 
00709 
00710 
00711 
00712 
00713 
00714 
00715 
00716 
00717 static int hypo_len_count[MAXSEQNUM+1]; 
00718 static int maximum_filled_length; 
00719 
00730 static void
00731 wb_init()
00732 {
00733   int i;
00734   for(i=0;i<=MAXSEQNUM;i++) hypo_len_count[i] = 0;
00735   maximum_filled_length = -1;
00736 }
00737 
00760 static boolean
00761 wb_ok(NODE *now)
00762 {
00763   if (now->seqnum <= maximum_filled_length) {
00764     
00765 
00766     if (debug2_flag) {
00767       j_printf("popped but pruned by word envelope: ");
00768       put_hypo_woutput(now);
00769     }
00770     return FALSE;
00771   } else {
00772     
00773 
00774     hypo_len_count[now->seqnum]++;
00775     if (hypo_len_count[now->seqnum] > enveloped_bestfirst_width) {
00776       
00777 
00778 
00779       if (maximum_filled_length < now->seqnum) maximum_filled_length = now->seqnum;
00780     }
00781     return TRUE;
00782   }
00783 }
00784 
00785 #ifdef SCAN_BEAM
00786 
00787 
00788 
00789 
00790 
00791 
00792 
00793 
00794 
00795 
00796 
00797 
00798 
00799 
00800 
00801 
00802 
00803 
00804 
00805 
00806 
00807 
00808 
00809 
00810 
00824 static void
00825 envl_init(int framenum)
00826 {
00827   int i;
00828   for(i=0;i<framenum;i++) framemaxscore[i] = LOG_ZERO;
00829 }
00830 
00845 static void
00846 envl_update(NODE *n, int framenum)
00847 {
00848   int t;
00849   for(t=framenum-1;t>=0;t--) {
00850     if (framemaxscore[t] < n->g[t]) framemaxscore[t] = n->g[t];
00851   }
00852 }
00853 #endif 
00854 
00855 
00856 #ifdef USE_DFA
00857 
00861 static NEXTWORD fornoise;
00862 #endif
00863 
00864 
00865 #ifdef SP_BREAK_CURRENT_FRAME
00866 
00867 
00868 
00869 
00887 void
00888 sp_segment_set_last_nword(NODE *hypo)
00889 {
00890   int i;
00891   WORD_ID w;
00892   for(i=0;i<hypo->seqnum;i++) {
00893     w = hypo->seq[i];
00894     if (w != sp_break_last_word
00895        && !is_sil(w, winfo, hmminfo)
00896        && !winfo->is_transparent[w]
00897        ) {
00898       sp_break_last_nword = w;
00899       break;
00900     }
00901   }
00902 #ifdef SP_BREAK_DEBUG
00903   printf("sp_break_last_nword=%d[%s]\n", sp_break_last_nword, winfo->woutput[sp_break_last_nword]);
00904 #endif
00905 }
00906 #endif
00907 
00908 
00909 
00910 
00911 
00912 
00913 
00914 
00915 
00916 static HTK_Param *tparam;       
00917 
00918 
00931 static void
00932 put_hypo_woutput(NODE *hypo)
00933 {
00934   int i,w;
00935 
00936   if (hypo != NULL) {
00937     for (i=hypo->seqnum-1;i>=0;i--) {
00938       w = hypo->seq[i];
00939       j_printf(" %s",winfo->woutput[w]);
00940     }
00941   }
00942   j_printf("\n");  
00943 }
00944 
00957 static void
00958 put_hypo_wname(NODE *hypo)
00959 {
00960   int i,w;
00961 
00962   if (hypo != NULL) {
00963     for (i=hypo->seqnum-1;i>=0;i--) {
00964       w = hypo->seq[i];
00965       j_printf(" %s",winfo->wname[w]);
00966     }
00967   }
00968   j_printf("\n");  
00969 }
00970 
00971 
00972 
00973 
00974 
01010 static void
01011 result_reorder_and_output(NODE **r_start, NODE **r_bottom, int *r_stacknum, int ncan, WORD_INFO *winfo)
01012 {
01013   NODE *now;
01014   int num;
01015 
01016 #ifdef CM_NBEST 
01017   
01018   cm_compute_from_nbest(*r_start, *r_stacknum);
01019 #endif
01020   num = 0;
01021   result_pass2_begin();
01022   while ((now = get_best_from_stack(r_start,r_stacknum)) != NULL && num < ncan) {
01023     num++;
01024     
01025     result_pass2(now, num, winfo);
01026 #ifdef VISUALIZE
01027     visual2_best(now, winfo);
01028 #endif
01029 #ifdef SP_BREAK_CURRENT_FRAME
01030     
01031     if (sp_break_last_nword_allow_override && num == 1) sp_segment_set_last_nword(now);
01032 #endif
01033     
01034     if (align_result_word_flag) word_rev_align(now->seq, now->seqnum, tparam);
01035     if (align_result_phoneme_flag) phoneme_rev_align(now->seq, now->seqnum, tparam);
01036     if (align_result_state_flag) state_rev_align(now->seq, now->seqnum, tparam);
01037     free_node(now);
01038   }
01039   result_pass2_end();
01040   
01041   if (now != NULL) free_node(now);
01042   free_all_nodes(*r_start);
01043 }  
01044 
01045 
01046 
01047 
01048 
01049 
01050 
01051 
01052 
01053 
01054 
01055 
01082 void
01083 wchmm_fbs(HTK_Param *param, BACKTRELLIS *backtrellis, LOGPROB backmax, int stacksize, int ncan, int maxhypo, int cate_bgn, int cate_num)
01084 {
01085   
01086   
01087   int stacknum;                 
01088   NODE *start = NULL;           
01089   NODE *bottom = NULL;          
01090 
01091   
01092   
01093   int r_stacksize = ncan;
01094   int r_stacknum;
01095   NODE *r_start = NULL;
01096   NODE *r_bottom = NULL;
01097 
01098   
01099   
01100   int popctr = 0;               
01101   int genectr = 0;              
01102   int pushctr = 0;              
01103   int finishnum = 0;            
01104 
01105   
01106   
01107   NEXTWORD **nextword, *nwroot; 
01108   int maxnwnum;                 
01109   int nwnum;                    
01110   NODE *now, *new;              
01111 #ifdef USE_DFA
01112   NODE *now_noise;             
01113   boolean now_noise_calced;
01114   int t;
01115 #endif
01116   int w,i,j;
01117   LOGPROB last_score = LOG_ZERO;
01118 #ifdef GRAPHOUT
01119   LOGPROB prev_score;
01120   WordGraph *wordgraph_root = NULL;
01121   boolean merged_p;
01122 #ifdef GRAPHOUT_DYNAMIC
01123   int dynamic_merged_num = 0;
01124   WordGraph *wtmp;
01125 #endif
01126 #ifdef GRAPHOUT_SEARCH
01127   int terminate_search_num = 0;
01128 #endif
01129 #endif
01130 
01131   
01132   if (align_result_word_flag || align_result_phoneme_flag || align_result_state_flag) tparam = param;
01133 
01134 #ifdef USE_DFA
01135   if (debug2_flag) j_printf("limiting category: %d-%d\n", cate_bgn, cate_bgn + cate_num-1);
01136 #endif
01137 
01138   
01139 
01140 
01141 
01142   peseqlen = backtrellis->framelen; 
01143   
01144   
01145   nextword = nw_malloc(&maxnwnum, &nwroot);
01146   
01147   
01148   malloc_wordtrellis();         
01149   
01150   
01151   start = bottom = NULL;
01152   stacknum = 0;
01153   
01154   
01155   if (result_reorder_flag) {
01156     r_start = r_bottom = NULL;
01157     r_stacknum = 0;
01158   }
01159   
01160 #ifdef CM_SEARCH
01161   
01162   cm_init(winfo);
01163 #endif
01164 #ifdef SCAN_BEAM
01165   
01166   framemaxscore = (LOGPROB *)mymalloc(sizeof(LOGPROB)*peseqlen);
01167   envl_init(peseqlen);
01168 #endif 
01169   
01170   
01171   
01172   if (enveloped_bestfirst_width >= 0) wb_init();
01173 
01174   if (verbose_flag) j_printf("samplenum=%d\n", peseqlen);
01175   if (result_reorder_flag) {
01176     if (debug2_flag) VERMES("getting %d candidates...\n",ncan);
01177   }
01178 
01179 #ifdef VISUALIZE
01180   visual2_init(maxhypo);
01181 #endif
01182 
01183   
01184 
01185 
01186 
01187 
01188   
01189 #ifdef USE_NGRAM
01190   nwnum = ngram_firstwords(nextword, peseqlen, maxnwnum, winfo, backtrellis);
01191 #else  
01192   nwnum = dfa_firstwords(nextword, peseqlen, maxnwnum, dfa);
01193   
01194   
01195 
01196   while (nwnum < 0) {
01197     nextword = nw_expand(nextword, &maxnwnum, &nwroot);
01198     nwnum = dfa_firstwords(nextword, peseqlen, maxnwnum, dfa);
01199   }
01200 #endif
01201 
01202   if (debug2_flag) {
01203     j_printf("%d words in wordtrellis as first hypothesis\n", nwnum);
01204   }
01205   
01206   
01207   for (w = 0; w < nwnum; w++) {
01208 #ifdef USE_DFA
01209     
01210     if (! (winfo->wton[nextword[w]->id] >= cate_bgn && winfo->wton[nextword[w]->id] < cate_bgn + cate_num)) {
01211       continue;
01212     }
01213 #endif
01214     
01215     new = newnode();
01216     start_word(new,nextword[w],param,backtrellis);
01217 #ifdef USE_DFA
01218     if (new->score <= LOG_ZERO) { 
01219       free_node(new);
01220       continue;
01221     }
01222 #endif
01223     genectr++;
01224 #ifdef CM_SEARCH
01225     
01226     cm_store(new);
01227 #else 
01228     
01229     if (put_to_stack(new, &start, &bottom, &stacknum, stacksize) != -1) {
01230 #ifdef VISUALIZE
01231       visual2_next_word(new, NULL, popctr);
01232 #endif
01233 #ifdef GRAPHOUT
01234       new->prevgraph = NULL;
01235       new->lastcontext = NULL;
01236 #endif
01237       pushctr++;
01238     }
01239 #endif
01240   }
01241 #ifdef CM_SEARCH
01242   
01243   cm_sum_score();
01244   
01245   while ((new = cm_get_node()) != NULL) {
01246     cm_set_score(new);
01247 #ifdef CM_SEARCH_LIMIT
01248     if (new->cmscore[new->seqnum-1] < cm_cut_thres
01249 #ifdef CM_SEARCH_LIMIT_AFTER
01250         && finishnum > 0
01251 #endif
01252         ) {
01253       free_node(new);
01254       continue;
01255     }
01256 #endif 
01257     
01258     if (put_to_stack(new, &start, &bottom, &stacknum, stacksize) != -1) {
01259 #ifdef VISUALIZE
01260       visual2_next_word(new, NULL, popctr);
01261 #endif
01262 #ifdef GRAPHOUT
01263       new->prevgraph = NULL;
01264       new->lastcontext = NULL;
01265 #endif
01266       pushctr++;
01267     }
01268   }
01269 #endif
01270 
01271   if (debug2_flag) {
01272     j_printf("%d pushed\n", pushctr);
01273   }
01274   
01275   
01276   
01277   
01278 
01279   for (;;) {
01280 
01281     if (module_mode) {
01282       
01283       if (module_wants_terminate()) {
01284         j_printf("terminated\n");
01285         break;
01286       }
01287     }
01288     
01289     
01290 
01291 
01292 
01293 #ifdef DEBUG
01294     VERMES("get one hypothesis\n");
01295 #endif
01296     now = get_best_from_stack(&start,&stacknum);
01297     if (now == NULL) {  
01298       j_printf("stack empty, search terminate now\n");
01299       j_printf("%d sentences have found\n", finishnum);
01300       break;
01301     }
01302     
01303     if (now->score <= LOG_ZERO) {
01304       free_node(now);
01305       continue;
01306     }
01307 
01308 #ifdef GRAPHOUT
01309     
01310     prev_score = now->score;
01311 #endif
01312 
01313     
01314     
01315     if (enveloped_bestfirst_width >= 0) {
01316       if (!wb_ok(now)) {
01317         
01318 
01319         
01320 
01321 
01322         free_node(now);
01323         continue;
01324       }
01325     }
01326     
01327 #ifdef CM_SEARCH_LIMIT_POP
01328     if (now->cmscore[now->seqnum-1] < cm_cut_thres_pop) {
01329       free_node(now);
01330       continue;
01331     }
01332 #endif 
01333 
01334     popctr++;
01335 
01336     
01337     
01338     if (debug2_flag) {
01339       j_printf("--- pop %d:\n", popctr);
01340       j_printf("  "); put_hypo_woutput(now);
01341       j_printf("  "); put_hypo_wname(now);
01342       j_printf("  %d words, f=%f, g=%f\n", now->seqnum, now->score, now->g[now->bestt]);
01343       j_printf("  last word on trellis: [%d-%d]\n", now->estimated_next_t + 1, now->bestt);
01344     }
01345 
01346 #ifdef VISUALIZE
01347     visual2_popped(now, popctr);
01348 #endif
01349 
01350 #ifdef GRAPHOUT
01351 #ifdef GRAPHOUT_DYNAMIC
01352     
01353     wtmp = wordgraph_check_merge(now->prevgraph, &wordgraph_root, now->seq[now->seqnum-1], &merged_p);
01354     if (wtmp != NULL) {         
01355       dynamic_merged_num++;
01356       if (now->prevgraph != NULL) {
01357         if (now->prevgraph->saved) {
01358           j_error("ERROR: already saved??\n");
01359         }
01360         wordgraph_free(now->prevgraph);
01361       }
01362       now->prevgraph = wtmp;    
01363 
01364       if (now->lastcontext != NULL
01365           && now->lastcontext != now->prevgraph 
01366           ) {
01367         wordgraph_check_and_add_leftword(now->lastcontext, now->prevgraph);
01368 #ifdef GRAPHOUT_SEARCH_CONSIDER_RIGHT
01369         if (merged_p) {
01370           if (wordgraph_check_and_add_rightword(now->prevgraph, now->lastcontext) == FALSE) {
01371             merged_p = TRUE;
01372           } else {
01373             merged_p = FALSE;
01374           }
01375         } else {
01376           wordgraph_check_and_add_rightword(now->prevgraph, now->lastcontext);
01377         }
01378 #else
01379         wordgraph_check_and_add_rightword(now->prevgraph, now->lastcontext);
01380 #endif    
01381       }
01382       
01383       
01384     } else {
01385       wordgraph_save(now->prevgraph, now->lastcontext, &wordgraph_root);
01386     }
01387 #ifdef GRAPHOUT_SEARCH
01388     
01389     if (merged_p && now->endflag == FALSE
01390 #ifdef GRAPHOUT_SEARCH_DELAY_TERMINATION
01391         
01392 
01393         && (graphout_search_delay == FALSE || finishnum > 0)
01394 #endif
01395         ) {
01396       terminate_search_num++;
01397       free_node(now);
01398       continue;
01399     }
01400 #endif
01401 #else  
01402     
01403     wordgraph_save(now->prevgraph, now->lastcontext, &wordgraph_root);
01404 #endif 
01405 #endif 
01406 
01407     
01408     
01409     envl_update(now, peseqlen);
01410 
01411     
01412 
01413 
01414 
01415 
01416 
01417 
01418 #ifdef DEBUG
01419     VERMES("endflag check\n");
01420 #endif
01421     
01422     if (now->endflag) {
01423       if (debug2_flag) {
01424         j_printf("  This is a final sentence candidate\n");
01425       }
01426       
01427       if (now->score == last_score) {
01428         free_node(now);
01429         continue;
01430       } else {
01431         last_score = now->score;
01432       }
01433       
01434       finishnum++;
01435       if (debug2_flag) {
01436         j_printf("  %d-th sentence found\n", finishnum);
01437       }
01438 
01439       if (result_reorder_flag) {
01440         
01441 
01442         
01443 
01444         put_to_stack(now, &r_start, &r_bottom, &r_stacknum, r_stacksize);
01445         
01446         
01447         if (finishnum >= ncan) {
01448           if (debug2_flag) VERMES("%d\n",finishnum);
01449           break;
01450         } else {
01451           if (debug2_flag) VERMES("%d..", finishnum);
01452           continue;
01453         }
01454       } else {                  
01455         
01456         
01457         if (finishnum == 1) result_pass2_begin(); 
01458         result_pass2(now, finishnum, winfo);
01459 #ifdef VISUALIZE
01460         visual2_best(now, winfo);
01461 #endif
01462 #ifdef SP_BREAK_CURRENT_FRAME
01463         
01464 
01465         
01466         if (sp_break_last_nword_allow_override && finishnum == 1) sp_segment_set_last_nword(now);
01467 #endif
01468         
01469         
01470         if (align_result_word_flag) word_rev_align(now->seq, now->seqnum, tparam);
01471         if (align_result_phoneme_flag) phoneme_rev_align(now->seq, now->seqnum, tparam);
01472         if (align_result_state_flag) state_rev_align(now->seq, now->seqnum, tparam);
01473         
01474         
01475         free_node(now);
01476         
01477         
01478         if (finishnum >= ncan) {
01479           result_pass2_end();
01480           break;                
01481         }
01482         else continue;
01483       }
01484       
01485     } 
01486 
01487     
01488     
01489 
01490 
01491 
01492 
01493 
01494 
01495 #ifdef DEBUG
01496     VERMES("loop end check\n");
01497 #endif
01498     if (popctr >= maxhypo) {
01499       j_printf("num of hypotheses overflow\n");
01500       
01501       
01502       if (debug2_flag) put_all_in_stack(&start, &stacknum);
01503       free_node(now);
01504       break;                    
01505     }
01506     
01507     
01508     if (now->seqnum >= MAXSEQNUM) {
01509       j_printerr("sentence length exceeded ( > %d)\n",MAXSEQNUM);
01510       free_node(now);
01511       continue;
01512     }
01513 
01514 #ifdef GRAPHOUT
01515 #ifndef GRAPHOUT_PRECISE_BOUNDARY
01516     
01517     
01518     if(!ccd_flag) {
01519       now->tail_g_score = now->g[now->bestt];
01520     }
01521 #endif
01522 #endif
01523 
01524     
01525 
01526 
01527 
01528 #ifdef DEBUG
01529     VERMES("scan_word\n");
01530 #endif
01531     scan_word(now, param);
01532     if (now->score < backmax + LOG_ZERO) { 
01533       j_printf("now->score = %f?\n",now->score);
01534       put_hypo_woutput(now);
01535       free_node(now);
01536       continue;
01537     }
01538 
01539     
01540 
01541 
01542 
01543 
01544 
01545 
01546 
01547 
01548 #ifdef DEBUG
01549     VERMES("accept check\n");
01550 #endif
01551     if (
01552 #ifdef USE_NGRAM
01553         ngram_acceptable(now, winfo)
01554 #else  
01555         dfa_acceptable(now, dfa)
01556 #endif
01557         && now->estimated_next_t <= 5) {
01558       new = newnode();
01559       
01560       
01561       last_next_word(now, new, param);
01562       if (debug2_flag) {
01563         j_printf("  This is acceptable as a sentence candidate\n");
01564       }
01565       
01566       
01567       if (new->score <= LOG_ZERO) {
01568         if (debug2_flag) {
01569           j_printf("  But invalid because Viterbi pass does not reach the 0th frame\n");
01570         }
01571         free_node(new);
01572         free_node(now);
01573         continue;
01574       }
01575       
01576       
01577       if (debug2_flag) {
01578         j_printf("  This hypo itself was pushed with final score=%f\n", new->score);
01579       }
01580       new->endflag = TRUE;
01581       if (put_to_stack(new, &start, &bottom, &stacknum, stacksize) != -1) {
01582 #ifdef GRAPHOUT
01583         if (new->score > LOG_ZERO) {
01584           new->lastcontext = now->prevgraph;
01585           new->prevgraph = wordgraph_assign(new->seq[new->seqnum-1],
01586                                             WORD_INVALID,
01587                                             (new->seqnum >= 2) ? new->seq[new->seqnum-2] : WORD_INVALID,
01588                                             0,
01589 #ifdef GRAPHOUT_PRECISE_BOUNDARY
01590                                             
01591 #ifdef PASS2_STRICT_IWCD
01592                                             new->wordend_frame[0],
01593 #else
01594                                             now->wordend_frame[0],
01595 #endif
01596 #else
01597                                             now->bestt,
01598 #endif
01599                                             new->score,
01600                                             prev_score,
01601                                             now->g[0],
01602 #ifdef GRAPHOUT_PRECISE_BOUNDARY
01603 #ifdef PASS2_STRICT_IWCD
01604                                             new->wordend_gscore[0],
01605 #else
01606                                             now->wordend_gscore[0],
01607 #endif
01608 #else
01609                                             now->tail_g_score,
01610 #endif
01611 #ifdef USE_NGRAM
01612                                             now->lscore,
01613 #else
01614                                             LOG_ZERO,
01615 #endif
01616 #ifdef CM_SEARCH
01617                                             new->cmscore[new->seqnum-1]
01618 #else
01619                                             LOG_ZERO
01620 #endif
01621                                             );
01622         } else {
01623           new->lastcontext = now->lastcontext;
01624           new->prevgraph = now->prevgraph;
01625         }
01626 #endif
01627       }
01628       
01629       
01630     }
01631 
01632     
01633 
01634 
01635 
01636 
01637 
01638 
01639 
01640     
01641 
01642 
01643 
01644 
01645 
01646 
01647 
01648 
01649 #ifdef DEBUG
01650     VERMES("get next words\n");
01651 #endif
01652 #ifdef USE_NGRAM
01653     nwnum = ngram_nextwords(now, nextword, maxnwnum, ngram, winfo, backtrellis);
01654 #else  
01655     nwnum = dfa_nextwords(now, nextword, maxnwnum, dfa);
01656     
01657     
01658 
01659     while (nwnum < 0) {
01660       nextword = nw_expand(nextword, &maxnwnum, &nwroot);
01661       nwnum = dfa_nextwords(now, nextword, maxnwnum, dfa);
01662     }
01663 #endif
01664     if (debug2_flag) {
01665       j_printf("  %d words extracted from wordtrellis\n", nwnum);
01666     }
01667 
01668     
01669 
01670 
01671     
01672 
01673 
01674 
01675 #ifdef DEBUG
01676     VERMES("generate hypo\n");
01677 #endif
01678 
01679 #ifdef USE_DFA
01680     now_noise_calced = FALSE;   
01681 #endif
01682     i = pushctr;                
01683 
01684 #ifdef CM_SEARCH
01685     
01686     cm_init(winfo);
01687 #endif
01688 
01689     
01690     for (w = 0; w < nwnum; w++) {
01691 #ifdef USE_DFA
01692       
01693       if (! (winfo->wton[nextword[w]->id] >= cate_bgn && winfo->wton[nextword[w]->id] < cate_bgn + cate_num)) {
01694         continue;
01695       }
01696 #endif
01697       new = newnode();
01698 #ifdef USE_DFA
01699       if (nextword[w]->can_insert_sp == TRUE) {
01700         
01701         
01702         
01703         if (now_noise_calced == FALSE) {
01704           
01705           
01706 
01707           fornoise.id = dfa->sp_id;
01708           now_noise = newnode();
01709           cpy_node(now_noise, now);
01710 #if 0
01711           now_noise_tmp = newnode();
01712           next_word(now, now_noise_tmp, &fornoise, param, backtrellis);
01713           scan_word(now_noise_tmp, param);
01714           for(t=0;t<peseqlen;t++) {
01715             now_noise->g[t] = max(now_noise_tmp->g[t], now->g[t]);
01716           }
01717           free_node(now_noise_tmp);
01718 #else
01719           
01720           
01721           if (looktrellis_flag) {
01722             if(!dfa_look_around(&fornoise, now, backtrellis)){
01723               free_node(now_noise);
01724               free_node(new);
01725               continue;
01726             }
01727           }
01728           
01729 
01730           
01731 
01732           
01733 
01734           next_word(now, now_noise, &fornoise, param, backtrellis);
01735           scan_word(now_noise, param);
01736           for(t=0;t<peseqlen;t++) {
01737             now_noise->g[t] = max(now_noise->g[t], now->g[t]);
01738           }
01739           
01740 
01741           
01742 
01743           now_noise->seqnum--;
01744 #endif
01745           now_noise_calced = TRUE;
01746         }
01747 
01748         
01749         
01750         if (looktrellis_flag) {
01751           if(!dfa_look_around(nextword[w], now_noise, backtrellis)){
01752             free_node(new);
01753             continue;
01754           }
01755         }
01756         
01757         
01758         
01759         
01760         next_word(now_noise, new, nextword[w], param, backtrellis);
01761         
01762       } else {
01763         
01764         
01765         
01766         if (looktrellis_flag) {
01767           if(!dfa_look_around(nextword[w], now, backtrellis)){
01768             free_node(new);
01769             continue;
01770           }
01771         }
01772         
01773         
01774         
01775         
01776         next_word(now, new, nextword[w], param, backtrellis);
01777         
01778       }
01779 #else  
01780 
01781       
01782 
01783       
01784 
01785       next_word(now, new, nextword[w], param, backtrellis);
01786 
01787 #endif 
01788 
01789       if (new->score <= LOG_ZERO) { 
01790         free_node(new);
01791         continue;
01792       }
01793         
01794       genectr++;
01795 
01796 #ifdef CM_SEARCH
01797       
01798       cm_store(new);
01799 #else 
01800       
01801       
01802 
01803       
01804       if (can_put_to_stack(new, &bottom, &stacknum, stacksize) == -1) {
01805         free_node(new);
01806         continue;
01807       }
01808       
01809 #ifdef GRAPHOUT
01810       
01811       new->lastcontext = now->prevgraph;
01812       new->prevgraph = wordgraph_assign(new->seq[new->seqnum-2],
01813                                         new->seq[new->seqnum-1],
01814                                         (new->seqnum >= 3) ? new->seq[new->seqnum-3] : WORD_INVALID,
01815                                         new->bestt + 1,
01816 #ifdef GRAPHOUT_PRECISE_BOUNDARY
01817 #ifdef PASS2_STRICT_IWCD
01818                                         
01819                                         new->wordend_frame[new->bestt],
01820 #else
01821                                         now->wordend_frame[new->bestt],
01822 #endif
01823 #else
01824                                         now->bestt,
01825 #endif
01826                                         new->score, prev_score,
01827                                         now->g[new->bestt+1],
01828 #ifdef GRAPHOUT_PRECISE_BOUNDARY
01829 #ifdef PASS2_STRICT_IWCD
01830                                         
01831                                         new->wordend_gscore[new->bestt],
01832 #else
01833                                         now->wordend_gscore[new->bestt],
01834 #endif
01835 #else
01836                                         now->tail_g_score,
01837 #endif
01838 #ifdef USE_NGRAM
01839                                         now->lscore,
01840 #else
01841                                         LOG_ZERO,
01842 #endif
01843 #ifdef CM_SEARCH
01844                                         new->cmscore[new->seqnum-2]
01845 #else
01846                                         LOG_ZERO
01847 #endif
01848                                         );
01849 #endif 
01850       put_to_stack(new, &start, &bottom, &stacknum, stacksize);
01851       if (debug2_flag) {
01852         j = new->seq[new->seqnum-1];
01853         j_printf("  %15s [%15s](id=%5d)(%f) [%d-%d] pushed\n",winfo->wname[j], winfo->woutput[j], j, new->score, new->estimated_next_t + 1, new->bestt);
01854       }
01855 #ifdef VISUALIZE
01856       visual2_next_word(new, now, popctr);
01857 #endif
01858       pushctr++;
01859 #endif
01860     } 
01861 
01862 #ifdef CM_SEARCH
01863     
01864     cm_sum_score();
01865     
01866     while ((new = cm_get_node()) != NULL) {
01867       cm_set_score(new);
01868 #ifdef CM_SEARCH_LIMIT
01869       if (new->cmscore[new->seqnum-1] < cm_cut_thres
01870 #ifdef CM_SEARCH_LIMIT_AFTER
01871           && finishnum > 0
01872 #endif
01873           ) {
01874         free_node(new);
01875         continue;
01876       }
01877 #endif 
01878       
01879 
01880 
01881       
01882       if (can_put_to_stack(new, &bottom, &stacknum, stacksize) == -1) {
01883         free_node(new);
01884         continue;
01885       }
01886 
01887 #ifdef GRAPHOUT
01888 
01889       
01890       new->lastcontext = now->prevgraph;
01891       new->prevgraph = wordgraph_assign(new->seq[new->seqnum-2],
01892                                         new->seq[new->seqnum-1],
01893                                         (new->seqnum >= 3) ? new->seq[new->seqnum-3] : WORD_INVALID,
01894                                         new->bestt + 1,
01895 #ifdef GRAPHOUT_PRECISE_BOUNDARY
01896 #ifdef PASS2_STRICT_IWCD
01897                                         new->wordend_frame[new->bestt],
01898 #else
01899                                         now->wordend_frame[new->bestt],
01900 #endif
01901 #else
01902                                         now->bestt,
01903 #endif
01904                                         new->score, prev_score,
01905                                         now->g[new->bestt+1],
01906 #ifdef GRAPHOUT_PRECISE_BOUNDARY
01907 #ifdef PASS2_STRICT_IWCD
01908                                         new->wordend_gscore[new->bestt],
01909 #else
01910                                         now->wordend_gscore[new->bestt],
01911 #endif
01912 #else
01913                                         now->tail_g_score,
01914 #endif
01915 #ifdef USE_NGRAM
01916                                         now->lscore,
01917 #else
01918                                         LOG_ZERO,
01919 #endif
01920 #ifdef CM_SEARCH
01921                                         new->cmscore[new->seqnum-2]
01922 #else
01923                                         LOG_ZERO
01924 #endif
01925                                         );
01926 #endif 
01927       
01928       put_to_stack(new, &start, &bottom, &stacknum, stacksize);
01929       if (debug2_flag) {
01930         j = new->seq[new->seqnum-1];
01931         j_printf("  %15s [%15s](id=%5d)(%f) [%d-%d] pushed\n",winfo->wname[j], winfo->woutput[j], j, new->score, new->estimated_next_t + 1, new->bestt);
01932       }
01933 #ifdef VISUALIZE
01934       visual2_next_word(new, now, popctr);
01935 #endif
01936       pushctr++;
01937     }
01938 #endif
01939 
01940     if (debug2_flag) {
01941       j_printf("%d pushed\n",pushctr-i);
01942     }
01943 #ifdef USE_DFA
01944     if (now_noise_calced == TRUE) free_node(now_noise);
01945 #endif
01946 
01947     
01948 
01949 
01950 
01951     free_node(now);
01952 
01953   }
01954   
01955   
01956   
01957 
01958   
01959   if (!result_reorder_flag && finishnum < ncan) {
01960     result_pass2_end();
01961   }
01962   
01963   
01964 
01965   if (finishnum == 0) {         
01966     if (verbose_flag) {
01967       j_printf("got no candidates, output 1st pass result as a final result\n",finishnum);
01968     }
01969     
01970     now = newnode();
01971     for (i=0;i<pass1_wnum;i++) {
01972       now->seq[i] = pass1_wseq[pass1_wnum-1-i];
01973     }
01974     now->seqnum = pass1_wnum;
01975     now->score = pass1_score;
01976 #ifdef CONFIDENCE_MEASURE
01977     
01978 #ifdef CM_MULTIPLE_ALPHA
01979     for(j=0;j<cm_alpha_num;j++) {
01980       for(i=0;i<now->seqnum;i++) now->cmscore[i][j] = 0.0;
01981     }
01982 #else
01983     for(i=0;i<now->seqnum;i++) now->cmscore[i] = 0.0;
01984 #endif
01985 #endif 
01986     
01987     
01988     if (module_mode) {
01989       
01990       result_pass2_failed(winfo);
01991     } else {
01992       
01993       result_pass2_begin();
01994       result_pass2(now, 1, winfo);
01995       result_pass2_end();
01996     }
01997 #ifdef SP_BREAK_CURRENT_FRAME
01998     
01999     if (sp_break_last_nword_allow_override) sp_segment_set_last_nword(now);
02000 #endif
02001     
02002     if (align_result_word_flag) word_rev_align(now->seq, now->seqnum, tparam);
02003     if (align_result_phoneme_flag) phoneme_rev_align(now->seq, now->seqnum, tparam);
02004     if (align_result_state_flag) state_rev_align(now->seq, now->seqnum, tparam);
02005     free_node(now);
02006   } else {                      
02007     if (debug2_flag) {
02008       j_printf("got %d candidates\n",finishnum);
02009     }
02010     if (result_reorder_flag) {
02011       
02012 
02013       
02014 
02015       if (debug2_flag) VERMES("done\n");
02016       result_reorder_and_output(&r_start, &r_bottom, &r_stacknum, output_hypo_maxnum, winfo);
02017     }
02018   }
02019 
02020   
02021   
02022   if (verbose_flag) {
02023     j_printf("%d generated, %d pushed, %d nodes popped in %d\n",
02024              genectr, pushctr, popctr, backtrellis->framelen);
02025     j_flushprint();
02026 #ifdef GRAPHOUT_DYNAMIC
02027     j_printf("graph: %d merged", dynamic_merged_num);
02028 #ifdef GRAPHOUT_SEARCH
02029     j_printf(", %d terminated", terminate_search_num);
02030 #endif
02031     j_printf(" in %d\n", popctr);
02032 #endif
02033   }
02034     
02035 #ifdef GRAPHOUT
02036   printf("------ wordgraph post-processing begin ------\n");
02037   
02038   
02039   wordgraph_purge_leaf_nodes(&wordgraph_root);
02040 #ifdef GRAPHOUT_DEPTHCUT
02041   
02042   wordgraph_depth_cut(&wordgraph_root);
02043 #endif
02044   
02045 
02046 
02047 
02048   wordgraph_adjust_boundary(&wordgraph_root);
02049   
02050   wordgraph_compaction_thesame(&wordgraph_root);
02051   
02052   wordgraph_compaction_exacttime(&wordgraph_root);
02053   
02054   wordgraph_compaction_neighbor(&wordgraph_root);
02055   
02056   wordgraph_sort_and_annotate_id(&wordgraph_root);
02057   
02058   wordgraph_check_coherence(wordgraph_root);
02059   printf("------ wordgraph post-processing end ------\n");
02060   
02061   wordgraph_dump(wordgraph_root);
02062   
02063   result_graph(wordgraph_root, winfo);
02064   
02065   wordgraph_clean(&wordgraph_root);
02066 #endif
02067   
02068   
02069   
02070   nw_free(nextword, nwroot);
02071   free_all_nodes(start);
02072   free_wordtrellis();
02073 #ifdef SCAN_BEAM
02074   free(framemaxscore);
02075 #endif
02076   clear_stocker();
02077 }