00001 
00046 
00047 
00048 
00049 
00050 
00051 
00052 
00053 #include <julius.h>
00054 
00055 
00056 static NODE *get_best_from_stack(NODE **start, int *stacknum);
00057 static int put_to_stack(NODE *new, NODE **start, NODE **bottom, int *stacknum, int stacksize);
00058 static void put_all_in_stack(NODE **start, int *stacknum);
00059 static void free_all_nodes(NODE *node);
00060 static void put_hypo_woutput(NODE *hypo);
00061 static void put_hypo_wname(NODE *hypo);
00062 
00063 
00064 
00065 
00066 
00067 
00086 static NEXTWORD **
00087 nw_malloc(int *maxlen, NEXTWORD **root)
00088 {
00089   NEXTWORD *nwtmp;
00090   NEXTWORD **nw;
00091   int i;
00092   int max;
00093 
00094   
00095   max = winfo->num;
00096   
00097   nw = (NEXTWORD **)mymalloc(max * sizeof(NEXTWORD *));
00098   nwtmp = (NEXTWORD *)mymalloc(max * sizeof(NEXTWORD));
00099   for (i=0;i<max; i++) {
00100     nw[i] = &(nwtmp[i]);
00101   }
00102   *maxlen = max;
00103   *root = nwtmp;
00104   return nw;
00105 }
00106 
00122 static void
00123 nw_free(NEXTWORD **nw, NEXTWORD *root)
00124 {
00125   free(root);
00126   free(nw);
00127 }
00128 
00129 #ifdef USE_DFA
00130 
00166 static NEXTWORD **
00167 nw_expand(NEXTWORD **nwold, int *maxlen, NEXTWORD **root)
00168 {
00169   NEXTWORD *nwtmp;
00170   NEXTWORD **nw;
00171   int i;
00172   int nwmaxlen;
00173 
00174   nwmaxlen = *maxlen + winfo->num;
00175 
00176   nwtmp = (NEXTWORD *)myrealloc(*root, nwmaxlen * sizeof(NEXTWORD));
00177   nw = (NEXTWORD **)myrealloc(nwold, nwmaxlen * sizeof(NEXTWORD *));
00178   nw[0] = nwtmp;
00179   for (i=1;i<nwmaxlen; i++) {
00180     nw[i] = &(nwtmp[i]);
00181   }
00182   *maxlen = nwmaxlen;
00183   *root = nwtmp;
00184   return nw;
00185 }
00186 #endif
00187 
00188 
00189 
00190 
00191 
00192 
00193 
00194 
00195 
00196 
00216 static NODE *
00217 get_best_from_stack(NODE **start, int *stacknum)
00218 {
00219   NODE *tmp;
00220 
00221   
00222   tmp=(*start);
00223   if ((*start)!=NULL) {         
00224     (*start)=(*start)->next;
00225     if ((*start)!=NULL) (*start)->prev=NULL;
00226     (*stacknum)--;
00227     return(tmp);
00228   }
00229   else {
00230     return(NULL);
00231   }
00232 }
00233 
00260 static int
00261 can_put_to_stack(NODE *new, NODE **bottom, int *stacknum, int stacksize)
00262 {
00263   
00264   if ((*stacknum + 1) > stacksize && (*bottom)->score >= new->score) {
00265     
00266     return(-1);
00267   }
00268   return(0);
00269 }
00270 
00299 static int
00300 put_to_stack(NODE *new, NODE **start, NODE **bottom, int *stacknum, int stacksize)
00301 {
00302   NODE *tmp;
00303 
00304   
00305   (*stacknum)++;
00306 
00307   
00308   if ((*stacknum)>stacksize) {
00309     
00310     (*stacknum)--;
00311     if ((*bottom)->score < new->score) {
00312       
00313       tmp=(*bottom);
00314       (*bottom)->prev->next=NULL;
00315       (*bottom)=(*bottom)->prev;
00316       free_node(tmp);
00317     } else {
00318       
00319       free_node(new);
00320       return(-1);
00321     }
00322   }
00323 
00324   
00325   if ((*start)==NULL) {         
00326     
00327     (*start)=new;
00328     (*bottom)=new;
00329     new->next=NULL;
00330     new->prev=NULL;
00331     return(0);
00332   }
00333   if ((*start)->score <= new->score) {
00334     
00335     new->next = (*start);
00336     new->next->prev = new;
00337     (*start)=new;
00338     new->prev=NULL;
00339     return(0);
00340   }
00341   if ((*bottom)->score >= new->score) {
00342     
00343     new->prev = (*bottom);
00344     new->prev->next = new;
00345     (*bottom)=new;
00346     new->next=NULL;
00347     return(0);
00348   }
00349     
00350   
00351   if (((*start)->score + (*bottom)->score) / 2 > new->score) {
00352     
00353     tmp=(*bottom);
00354     while(tmp->score < new->score) tmp=tmp->prev;
00355     new->prev=tmp;
00356     new->next=tmp->next;
00357     tmp->next->prev=new;
00358     tmp->next=new;
00359   } else {
00360     
00361     tmp=(*start);
00362     while(tmp->score > new->score) tmp=tmp->next;
00363     new->next=tmp;
00364     new->prev=tmp->prev;
00365     tmp->prev->next=new;
00366     tmp->prev=new;
00367   }
00368   return(0);
00369 }
00370 
00385 static void
00386 put_all_in_stack(NODE **start, int *stacknum)
00387 {
00388   NODE *ntmp;
00389   
00390   j_printf("stack remains::\n");
00391   while ((ntmp = get_best_from_stack(start, stacknum)) != NULL) {
00392     j_printf("%3d: s=%f ",*stacknum, ntmp->score);
00393     put_hypo_woutput(ntmp);
00394     free_node(ntmp);
00395   }
00396 }
00397 
00410 static void
00411 free_all_nodes(NODE *start)
00412 {
00413   NODE *tmp;
00414   NODE *next;
00415 
00416   tmp=start;
00417   while(tmp) {
00418     next=tmp->next;
00419     free_node(tmp);
00420     tmp=next;
00421   }
00422 }
00423 
00424 
00425 #ifdef CONFIDENCE_MEASURE
00426 
00427 
00428 
00429 
00430 
00431 
00432 
00433 #ifdef CM_MULTIPLE_ALPHA
00434 static LOGPROB *cmsumlist = NULL; 
00435 #endif
00436 
00437 
00438 #ifdef CM_SEARCH
00439 
00440 
00441 
00442 
00443 
00444 
00445 static LOGPROB cm_tmpbestscore; 
00446 #ifndef CM_MULTIPLE_ALPHA
00447 static LOGPROB cm_tmpsum;       
00448 #endif
00449 
00450 
00451 static int l_stacksize;         
00452 static int l_stacknum;          
00453 static NODE *l_start = NULL;    
00454 static NODE *l_bottom = NULL;   
00455 
00469 static void
00470 cm_init(WORD_INFO *winfo)
00471 {
00472   l_stacksize = winfo->num;
00473   l_start = l_bottom = NULL;
00474   l_stacknum = 0;
00475 #ifdef CM_MULTIPLE_ALPHA
00476   if (cmsumlist == NULL) {      
00477     cmsumlist = (LOGPROB *)mymalloc(sizeof(LOGPROB) * cm_alpha_num);
00478   }
00479 #endif    
00480 }
00481 
00494 static void
00495 cm_store(NODE *new)
00496 {
00497   
00498   put_to_stack(new, &l_start, &l_bottom, &l_stacknum, l_stacksize);
00499 }
00500 
00512 static void
00513 cm_sum_score()
00514 {
00515   NODE *node;
00516   LOGPROB sum;
00517 #ifdef CM_MULTIPLE_ALPHA
00518   LOGPROB cm_alpha;
00519   int j;
00520 #endif
00521 
00522   if (l_start == NULL) return;  
00523   cm_tmpbestscore = l_start->score; 
00524 
00525 #ifdef CM_MULTIPLE_ALPHA
00526   for (j = 0, cm_alpha = cm_alpha_bgn; cm_alpha <= cm_alpha_end; cm_alpha += cm_alpha_step) {
00527 #endif
00528     sum = 0.0;
00529     for(node = l_start; node; node = node->next) {
00530       sum += pow(10, cm_alpha * (node->score - cm_tmpbestscore));
00531     }
00532 #ifdef CM_MULTIPLE_ALPHA
00533     cmsumlist[j++] = sum;       
00534 #else
00535     cm_tmpsum = sum;            
00536 #endif
00537 
00538 #ifdef CM_MULTIPLE_ALPHA
00539   }
00540 #endif
00541 }
00542 
00543 
00558 static void
00559 cm_set_score(NODE *node)
00560 {
00561 #ifdef CM_MULTIPLE_ALPHA
00562   int j;
00563   LOGPROB cm_alpha;
00564 #endif
00565 
00566 #ifdef CM_MULTIPLE_ALPHA
00567   for (j = 0, cm_alpha = cm_alpha_bgn; cm_alpha <= cm_alpha_end; cm_alpha += cm_alpha_step) {
00568     node->cmscore[node->seqnum-1][j] = pow(10, cm_alpha * (node->score - cm_tmpbestscore)) / cmsumlist[j];
00569     j++;
00570   }
00571 #else
00572   node->cmscore[node->seqnum-1] = pow(10, cm_alpha * (node->score - cm_tmpbestscore)) / cm_tmpsum;
00573 #endif
00574 }
00575 
00588 static NODE *
00589 cm_get_node()
00590 {
00591   return(get_best_from_stack(&l_start, &l_stacknum));
00592 }
00593 
00594 #endif 
00595 
00596 #ifdef CM_NBEST
00597 
00598 
00599 
00600 
00601 
00602 
00603 
00604 static LOGPROB *sentcm = NULL;  
00605 static LOGPROB *wordcm = NULL;  
00606 static int sentnum;             
00607 
00623 static void
00624 cm_compute_from_nbest(NODE *start, int stacknum)
00625 {
00626   NODE *node;
00627   LOGPROB bestscore, sum, s;
00628   WORD_ID w;
00629   int i;
00630 #ifdef CM_MULTIPLE_ALPHA
00631   LOGPROB cm_alpha;
00632   int j;
00633 #endif
00634 
00635   
00636 #ifdef CM_MULTIPLE_ALPHA
00637   if (cmsumlist == NULL) {
00638     cmsumlist = (LOGPROB *)mymalloc(sizeof(LOGPROB) * cm_alpha_num);
00639   }
00640 #endif    
00641   if (sentcm == NULL) {         
00642     sentcm = (LOGPROB *)mymalloc(sizeof(LOGPROB)*stacknum);
00643     sentnum = stacknum;
00644   } else if (sentnum < stacknum) { 
00645     sentcm = (LOGPROB *)myrealloc(sentcm, sizeof(LOGPROB)*stacknum);
00646     sentnum = stacknum;
00647   }
00648   if (wordcm == NULL) {
00649     wordcm = (LOGPROB *)mymalloc(sizeof(LOGPROB) * winfo->num);
00650   }
00651 #ifdef CM_MULTIPLE_ALPHA
00652   for (j = 0, cm_alpha = cm_alpha_bgn; cm_alpha <= cm_alpha_end; cm_alpha += cm_alpha_step) {
00653 #endif
00654     
00655     for(w=0;w<winfo->num;w++) {
00656       wordcm[w] = 0.0;
00657     }
00658     
00659     bestscore = start->score;
00660     
00661     sum = 0.0;
00662     for (node = start; node != NULL; node = node->next) {
00663       sum += pow(10, cm_alpha * (node->score - bestscore));
00664     }
00665     
00666     i = 0;
00667     for (node = start; node != NULL; node = node->next) {
00668       sentcm[i] = pow(10, cm_alpha * (node->score - bestscore)) / sum;
00669       i++;
00670     }
00671     
00672     i = 0;
00673     for (node = start; node != NULL; node = node->next) {
00674       for (w=0;w<node->seqnum;w++) {
00675         wordcm[node->seq[w]] += sentcm[i];
00676       }
00677       i++;
00678     }
00679     
00680     for (node = start; node != NULL; node = node->next) {
00681       for (w=0;w<node->seqnum;w++) {
00682 #ifdef CM_MULTIPLE_ALPHA
00683         node->cmscore[w][j] = wordcm[node->seq[w]];
00684 #else   
00685         node->cmscore[w] = wordcm[node->seq[w]];
00686 #endif
00687       }
00688     }
00689 #ifdef CM_MULTIPLE_ALPHA
00690     j++;
00691   }
00692 #endif
00693 }
00694 
00695 #endif 
00696 
00697 #endif 
00698 
00699 
00700 
00701 
00702 
00703 
00704 
00705 
00706 
00707 
00708 
00709 
00710 
00711 
00712 
00713 
00714 
00715 
00716 
00717 static int hypo_len_count[MAXSEQNUM+1]; 
00718 static int maximum_filled_length; 
00719 
00730 static void
00731 wb_init()
00732 {
00733   int i;
00734   for(i=0;i<=MAXSEQNUM;i++) hypo_len_count[i] = 0;
00735   maximum_filled_length = -1;
00736 }
00737 
00760 static boolean
00761 wb_ok(NODE *now)
00762 {
00763   if (now->seqnum <= maximum_filled_length) {
00764     
00765 
00766     if (debug2_flag) {
00767       j_printf("popped but pruned by word envelope: ");
00768       put_hypo_woutput(now);
00769     }
00770     return FALSE;
00771   } else {
00772     
00773 
00774     hypo_len_count[now->seqnum]++;
00775     if (hypo_len_count[now->seqnum] > enveloped_bestfirst_width) {
00776       
00777 
00778 
00779       if (maximum_filled_length < now->seqnum) maximum_filled_length = now->seqnum;
00780     }
00781     return TRUE;
00782   }
00783 }
00784 
00785 #ifdef SCAN_BEAM
00786 
00787 
00788 
00789 
00790 
00791 
00792 
00793 
00794 
00795 
00796 
00797 
00798 
00799 
00800 
00801 
00802 
00803 
00804 
00805 
00806 
00807 
00808 
00809 
00810 
00824 static void
00825 envl_init(int framenum)
00826 {
00827   int i;
00828   for(i=0;i<framenum;i++) framemaxscore[i] = LOG_ZERO;
00829 }
00830 
00845 static void
00846 envl_update(NODE *n, int framenum)
00847 {
00848   int t;
00849   for(t=framenum-1;t>=0;t--) {
00850     if (framemaxscore[t] < n->g[t]) framemaxscore[t] = n->g[t];
00851   }
00852 }
00853 #endif 
00854 
00855 
00856 #ifdef USE_DFA
00857 
00861 static NEXTWORD fornoise;
00862 #endif
00863 
00864 
00865 #ifdef SP_BREAK_CURRENT_FRAME
00866 
00867 
00868 
00869 
00887 void
00888 sp_segment_set_last_nword(NODE *hypo)
00889 {
00890   int i;
00891   WORD_ID w;
00892   for(i=0;i<hypo->seqnum;i++) {
00893     w = hypo->seq[i];
00894     if (w != sp_break_last_word
00895        && !is_sil(w, winfo, hmminfo)
00896        && !winfo->is_transparent[w]
00897        ) {
00898       sp_break_last_nword = w;
00899       break;
00900     }
00901   }
00902 #ifdef SP_BREAK_DEBUG
00903   printf("sp_break_last_nword=%d[%s]\n", sp_break_last_nword, winfo->woutput[sp_break_last_nword]);
00904 #endif
00905 }
00906 #endif
00907 
00908 
00909 
00910 
00911 
00912 
00913 
00914 
00915 
00916 static HTK_Param *tparam;       
00917 
00918 
00931 static void
00932 put_hypo_woutput(NODE *hypo)
00933 {
00934   int i,w;
00935 
00936   if (hypo != NULL) {
00937     for (i=hypo->seqnum-1;i>=0;i--) {
00938       w = hypo->seq[i];
00939       j_printf(" %s",winfo->woutput[w]);
00940     }
00941   }
00942   j_printf("\n");  
00943 }
00944 
00957 static void
00958 put_hypo_wname(NODE *hypo)
00959 {
00960   int i,w;
00961 
00962   if (hypo != NULL) {
00963     for (i=hypo->seqnum-1;i>=0;i--) {
00964       w = hypo->seq[i];
00965       j_printf(" %s",winfo->wname[w]);
00966     }
00967   }
00968   j_printf("\n");  
00969 }
00970 
00971 
00972 
00973 
00974 
01010 static void
01011 result_reorder_and_output(NODE **r_start, NODE **r_bottom, int *r_stacknum, int ncan, WORD_INFO *winfo)
01012 {
01013   NODE *now;
01014   int num;
01015 
01016 #ifdef CM_NBEST 
01017   
01018   cm_compute_from_nbest(*r_start, *r_stacknum);
01019 #endif
01020   num = 0;
01021   result_pass2_begin();
01022   while ((now = get_best_from_stack(r_start,r_stacknum)) != NULL && num < ncan) {
01023     num++;
01024     
01025     result_pass2(now, num, winfo);
01026 #ifdef VISUALIZE
01027     visual2_best(now, winfo);
01028 #endif
01029 #ifdef SP_BREAK_CURRENT_FRAME
01030     
01031     if (sp_break_last_nword_allow_override && num == 1) sp_segment_set_last_nword(now);
01032 #endif
01033     
01034     if (align_result_word_flag) word_rev_align(now->seq, now->seqnum, tparam);
01035     if (align_result_phoneme_flag) phoneme_rev_align(now->seq, now->seqnum, tparam);
01036     if (align_result_state_flag) state_rev_align(now->seq, now->seqnum, tparam);
01037     free_node(now);
01038   }
01039   result_pass2_end();
01040   
01041   if (now != NULL) free_node(now);
01042   free_all_nodes(*r_start);
01043 }  
01044 
01045 
01046 
01047 
01048 
01049 
01050 
01051 
01052 
01053 
01054 
01055 
01082 void
01083 wchmm_fbs(HTK_Param *param, BACKTRELLIS *backtrellis, LOGPROB backmax, int stacksize, int ncan, int maxhypo, int cate_bgn, int cate_num)
01084 {
01085   
01086   
01087   int stacknum;                 
01088   NODE *start = NULL;           
01089   NODE *bottom = NULL;          
01090 
01091   
01092   
01093   int r_stacksize = ncan;
01094   int r_stacknum;
01095   NODE *r_start = NULL;
01096   NODE *r_bottom = NULL;
01097 
01098   
01099   
01100   int popctr = 0;               
01101   int genectr = 0;              
01102   int pushctr = 0;              
01103   int finishnum = 0;            
01104 
01105   
01106   
01107   NEXTWORD **nextword, *nwroot; 
01108   int maxnwnum;                 
01109   int nwnum;                    
01110   NODE *now, *new;              
01111 #ifdef USE_DFA
01112   NODE *now_noise;             
01113   boolean now_noise_calced;
01114   int t;
01115 #endif
01116   int w,i,j;
01117   LOGPROB last_score = LOG_ZERO;
01118 #ifdef GRAPHOUT
01119   LOGPROB prev_score;
01120   WordGraph *wordgraph_root = NULL;
01121   boolean merged_p;
01122 #ifdef GRAPHOUT_DYNAMIC
01123   int dynamic_merged_num = 0;
01124   WordGraph *wtmp;
01125 #endif
01126 #ifdef GRAPHOUT_SEARCH
01127   int terminate_search_num = 0;
01128 #endif
01129 #endif
01130 
01131   
01132   if (align_result_word_flag || align_result_phoneme_flag || align_result_state_flag) tparam = param;
01133 
01134 #ifdef USE_DFA
01135   if (debug2_flag) j_printf("limiting category: %d-%d\n", cate_bgn, cate_bgn + cate_num-1);
01136 #endif
01137 
01138   
01139 
01140 
01141 
01142   peseqlen = backtrellis->framelen; 
01143   
01144   
01145   nextword = nw_malloc(&maxnwnum, &nwroot);
01146   
01147   
01148   malloc_wordtrellis();         
01149   
01150   
01151   start = bottom = NULL;
01152   stacknum = 0;
01153   
01154   
01155   if (result_reorder_flag) {
01156     r_start = r_bottom = NULL;
01157     r_stacknum = 0;
01158   }
01159   
01160 #ifdef CM_SEARCH
01161   
01162   cm_init(winfo);
01163 #endif
01164 #ifdef SCAN_BEAM
01165   
01166   framemaxscore = (LOGPROB *)mymalloc(sizeof(LOGPROB)*peseqlen);
01167   envl_init(peseqlen);
01168 #endif 
01169   
01170   
01171   
01172   if (enveloped_bestfirst_width >= 0) wb_init();
01173 
01174   if (verbose_flag) j_printf("samplenum=%d\n", peseqlen);
01175   if (result_reorder_flag) {
01176     if (debug2_flag) VERMES("getting %d candidates...\n",ncan);
01177   }
01178 
01179 #ifdef VISUALIZE
01180   visual2_init(maxhypo);
01181 #endif
01182 
01183   
01184 
01185 
01186 
01187 
01188   
01189 #ifdef USE_NGRAM
01190   nwnum = ngram_firstwords(nextword, peseqlen, maxnwnum, winfo, backtrellis);
01191 #else  
01192   nwnum = dfa_firstwords(nextword, peseqlen, maxnwnum, dfa);
01193   
01194   
01195 
01196   while (nwnum < 0) {
01197     nextword = nw_expand(nextword, &maxnwnum, &nwroot);
01198     nwnum = dfa_firstwords(nextword, peseqlen, maxnwnum, dfa);
01199   }
01200 #endif
01201 
01202   if (debug2_flag) {
01203     j_printf("%d words in wordtrellis as first hypothesis\n", nwnum);
01204   }
01205   
01206   
01207   for (w = 0; w < nwnum; w++) {
01208 #ifdef USE_DFA
01209     
01210     if (! (winfo->wton[nextword[w]->id] >= cate_bgn && winfo->wton[nextword[w]->id] < cate_bgn + cate_num)) {
01211       continue;
01212     }
01213 #endif
01214     
01215     new = newnode();
01216     start_word(new,nextword[w],param,backtrellis);
01217 #ifdef USE_DFA
01218     if (new->score <= LOG_ZERO) { 
01219       free_node(new);
01220       continue;
01221     }
01222 #endif
01223     genectr++;
01224 #ifdef CM_SEARCH
01225     
01226     cm_store(new);
01227 #else 
01228     
01229     if (put_to_stack(new, &start, &bottom, &stacknum, stacksize) != -1) {
01230 #ifdef VISUALIZE
01231       visual2_next_word(new, NULL, popctr);
01232 #endif
01233 #ifdef GRAPHOUT
01234       new->prevgraph = NULL;
01235       new->lastcontext = NULL;
01236 #endif
01237       pushctr++;
01238     }
01239 #endif
01240   }
01241 #ifdef CM_SEARCH
01242   
01243   cm_sum_score();
01244   
01245   while ((new = cm_get_node()) != NULL) {
01246     cm_set_score(new);
01247 #ifdef CM_SEARCH_LIMIT
01248     if (new->cmscore[new->seqnum-1] < cm_cut_thres
01249 #ifdef CM_SEARCH_LIMIT_AFTER
01250         && finishnum > 0
01251 #endif
01252         ) {
01253       free_node(new);
01254       continue;
01255     }
01256 #endif 
01257     
01258     if (put_to_stack(new, &start, &bottom, &stacknum, stacksize) != -1) {
01259 #ifdef VISUALIZE
01260       visual2_next_word(new, NULL, popctr);
01261 #endif
01262 #ifdef GRAPHOUT
01263       new->prevgraph = NULL;
01264       new->lastcontext = NULL;
01265 #endif
01266       pushctr++;
01267     }
01268   }
01269 #endif
01270 
01271   if (debug2_flag) {
01272     j_printf("%d pushed\n", pushctr);
01273   }
01274   
01275   
01276   
01277   
01278 
01279   for (;;) {
01280 
01281     if (module_mode) {
01282       
01283       if (module_wants_terminate()) {
01284         j_printf("terminated\n");
01285         break;
01286       }
01287     }
01288     
01289     
01290 
01291 
01292 
01293 #ifdef DEBUG
01294     VERMES("get one hypothesis\n");
01295 #endif
01296     now = get_best_from_stack(&start,&stacknum);
01297     if (now == NULL) {  
01298       j_printf("stack empty, search terminate now\n");
01299       j_printf("%d sentences have found\n", finishnum);
01300       break;
01301     }
01302     
01303     if (now->score <= LOG_ZERO) {
01304       free_node(now);
01305       continue;
01306     }
01307 
01308 #ifdef GRAPHOUT
01309     
01310     prev_score = now->score;
01311 #endif
01312 
01313     
01314     
01315     if (enveloped_bestfirst_width >= 0) {
01316       if (!wb_ok(now)) {
01317         
01318 
01319         
01320 
01321 
01322         free_node(now);
01323         continue;
01324       }
01325     }
01326     
01327 #ifdef CM_SEARCH_LIMIT_POP
01328     if (now->cmscore[now->seqnum-1] < cm_cut_thres_pop) {
01329       free_node(now);
01330       continue;
01331     }
01332 #endif 
01333 
01334     popctr++;
01335 
01336     
01337     
01338     if (debug2_flag) {
01339       j_printf("--- pop %d:\n", popctr);
01340       j_printf("  "); put_hypo_woutput(now);
01341       j_printf("  "); put_hypo_wname(now);
01342       j_printf("  %d words, f=%f, g=%f\n", now->seqnum, now->score, now->g[now->bestt]);
01343       j_printf("  last word on trellis: [%d-%d]\n", now->estimated_next_t + 1, now->bestt);
01344     }
01345 
01346 #ifdef VISUALIZE
01347     visual2_popped(now, popctr);
01348 #endif
01349 
01350 #ifdef GRAPHOUT
01351 #ifdef GRAPHOUT_DYNAMIC
01352     
01353     wtmp = wordgraph_check_merge(now->prevgraph, &wordgraph_root, now->seq[now->seqnum-1], &merged_p);
01354     if (wtmp != NULL) {         
01355       dynamic_merged_num++;
01356       if (now->prevgraph != NULL) {
01357         if (now->prevgraph->saved) {
01358           j_error("ERROR: already saved??\n");
01359         }
01360         wordgraph_free(now->prevgraph);
01361       }
01362       now->prevgraph = wtmp;    
01363 
01364       if (now->lastcontext != NULL
01365           && now->lastcontext != now->prevgraph 
01366           ) {
01367         wordgraph_check_and_add_leftword(now->lastcontext, now->prevgraph);
01368 #ifdef GRAPHOUT_SEARCH_CONSIDER_RIGHT
01369         if (merged_p) {
01370           if (wordgraph_check_and_add_rightword(now->prevgraph, now->lastcontext) == FALSE) {
01371             merged_p = TRUE;
01372           } else {
01373             merged_p = FALSE;
01374           }
01375         } else {
01376           wordgraph_check_and_add_rightword(now->prevgraph, now->lastcontext);
01377         }
01378 #else
01379         wordgraph_check_and_add_rightword(now->prevgraph, now->lastcontext);
01380 #endif    
01381       }
01382       
01383       
01384     } else {
01385       wordgraph_save(now->prevgraph, now->lastcontext, &wordgraph_root);
01386     }
01387 #ifdef GRAPHOUT_SEARCH
01388     
01389     if (merged_p && now->endflag == FALSE) {
01390       terminate_search_num++;
01391       free_node(now);
01392       continue;
01393     }
01394 #endif
01395 #else  
01396     
01397     wordgraph_save(now->prevgraph, now->lastcontext, &wordgraph_root);
01398 #endif 
01399 #endif 
01400 
01401     
01402     
01403     envl_update(now, peseqlen);
01404 
01405     
01406 
01407 
01408 
01409 
01410 
01411 
01412 #ifdef DEBUG
01413     VERMES("endflag check\n");
01414 #endif
01415     
01416     if (now->endflag) {
01417       if (debug2_flag) {
01418         j_printf("  This is a final sentence candidate\n");
01419       }
01420       
01421       if (now->score == last_score) {
01422         free_node(now);
01423         continue;
01424       } else {
01425         last_score = now->score;
01426       }
01427       
01428       finishnum++;
01429       if (debug2_flag) {
01430         j_printf("  %d-th sentence found\n", finishnum);
01431       }
01432 
01433       if (result_reorder_flag) {
01434         
01435 
01436         
01437 
01438         put_to_stack(now, &r_start, &r_bottom, &r_stacknum, r_stacksize);
01439         
01440         
01441         if (finishnum >= ncan) {
01442           if (debug2_flag) VERMES("%d\n",finishnum);
01443           break;
01444         } else {
01445           if (debug2_flag) VERMES("%d..", finishnum);
01446           continue;
01447         }
01448       } else {                  
01449         
01450         
01451         if (finishnum == 1) result_pass2_begin(); 
01452         result_pass2(now, finishnum, winfo);
01453 #ifdef VISUALIZE
01454         visual2_best(now, winfo);
01455 #endif
01456 #ifdef SP_BREAK_CURRENT_FRAME
01457         
01458 
01459         
01460         if (sp_break_last_nword_allow_override && finishnum == 1) sp_segment_set_last_nword(now);
01461 #endif
01462         
01463         
01464         if (align_result_word_flag) word_rev_align(now->seq, now->seqnum, tparam);
01465         if (align_result_phoneme_flag) phoneme_rev_align(now->seq, now->seqnum, tparam);
01466         if (align_result_state_flag) state_rev_align(now->seq, now->seqnum, tparam);
01467         
01468         
01469         free_node(now);
01470         
01471         
01472         if (finishnum >= ncan) {
01473           result_pass2_end();
01474           break;                
01475         }
01476         else continue;
01477       }
01478       
01479     } 
01480 
01481     
01482     
01483 
01484 
01485 
01486 
01487 
01488 
01489 #ifdef DEBUG
01490     VERMES("loop end check\n");
01491 #endif
01492     if (popctr >= maxhypo) {
01493       j_printf("num of hypotheses overflow\n");
01494       
01495       
01496       if (debug2_flag) put_all_in_stack(&start, &stacknum);
01497       free_node(now);
01498       break;                    
01499     }
01500     
01501     
01502     if (now->seqnum >= MAXSEQNUM) {
01503       j_printerr("sentence length exceeded ( > %d)\n",MAXSEQNUM);
01504       free_node(now);
01505       continue;
01506     }
01507 
01508 #ifdef GRAPHOUT
01509 #ifndef GRAPHOUT_PRECISE_BOUNDARY
01510     
01511     
01512     if(!ccd_flag) {
01513       now->tail_g_score = now->g[now->bestt];
01514     }
01515 #endif
01516 #endif
01517 
01518     
01519 
01520 
01521 
01522 #ifdef DEBUG
01523     VERMES("scan_word\n");
01524 #endif
01525     scan_word(now, param);
01526     if (now->score < backmax + LOG_ZERO) { 
01527       j_printf("now->score = %f?\n",now->score);
01528       put_hypo_woutput(now);
01529       free_node(now);
01530       break;
01531     }
01532 
01533     
01534 
01535 
01536 
01537 
01538 
01539 
01540 
01541 
01542 #ifdef DEBUG
01543     VERMES("accept check\n");
01544 #endif
01545     if (
01546 #ifdef USE_NGRAM
01547         ngram_acceptable(now, winfo)
01548 #else  
01549         dfa_acceptable(now, dfa)
01550 #endif
01551         && now->estimated_next_t <= 5) {
01552       new = newnode();
01553       
01554       
01555       last_next_word(now, new, param);
01556       if (debug2_flag) {
01557         j_printf("  This is acceptable as a sentence candidate\n");
01558       }
01559       
01560       
01561       if (new->score <= LOG_ZERO) {
01562         if (debug2_flag) {
01563           j_printf("  But invalid because Viterbi pass does not reach the 0th frame\n");
01564         }
01565         free_node(new);
01566         free_node(now);
01567         continue;
01568       }
01569       
01570       
01571       if (debug2_flag) {
01572         j_printf("  This hypo itself was pushed with final score=%f\n", new->score);
01573       }
01574       new->endflag = TRUE;
01575       if (put_to_stack(new, &start, &bottom, &stacknum, stacksize) != -1) {
01576 #ifdef GRAPHOUT
01577         if (new->score > LOG_ZERO) {
01578           new->lastcontext = now->prevgraph;
01579           new->prevgraph = wordgraph_assign(new->seq[new->seqnum-1], 0,
01580 #ifdef GRAPHOUT_PRECISE_BOUNDARY
01581                                             
01582 #ifdef PASS2_STRICT_IWCD
01583                                             new->wordend_frame[0],
01584 #else
01585                                             now->wordend_frame[0],
01586 #endif
01587 #else
01588                                             now->bestt,
01589 #endif
01590                                             new->score,
01591                                             prev_score,
01592                                             now->g[0],
01593 #ifdef GRAPHOUT_PRECISE_BOUNDARY
01594 #ifdef PASS2_STRICT_IWCD
01595                                             new->wordend_gscore[0],
01596 #else
01597                                             now->wordend_gscore[0],
01598 #endif
01599 #else
01600                                             now->tail_g_score,
01601 #endif
01602 #ifdef USE_NGRAM
01603                                             now->lscore,
01604 #else
01605                                             LOG_ZERO,
01606 #endif
01607 #ifdef CM_SEARCH
01608                                             new->cmscore[new->seqnum-1]
01609 #else
01610                                             LOG_ZERO
01611 #endif
01612                                             );
01613         } else {
01614           new->lastcontext = now->lastcontext;
01615           new->prevgraph = now->prevgraph;
01616         }
01617 #endif
01618       }
01619       
01620       
01621     }
01622 
01623     
01624 
01625 
01626 
01627 
01628 
01629 
01630 
01631     
01632 
01633 
01634 
01635 
01636 
01637 
01638 
01639 
01640 #ifdef DEBUG
01641     VERMES("get next words\n");
01642 #endif
01643 #ifdef USE_NGRAM
01644     nwnum = ngram_nextwords(now, nextword, maxnwnum, ngram, winfo, backtrellis);
01645 #else  
01646     nwnum = dfa_nextwords(now, nextword, maxnwnum, dfa);
01647     
01648     
01649 
01650     while (nwnum < 0) {
01651       nextword = nw_expand(nextword, &maxnwnum, &nwroot);
01652       nwnum = dfa_nextwords(now, nextword, maxnwnum, dfa);
01653     }
01654 #endif
01655     if (debug2_flag) {
01656       j_printf("  %d words extracted from wordtrellis\n", nwnum);
01657     }
01658 
01659     
01660 
01661 
01662     
01663 
01664 
01665 
01666 #ifdef DEBUG
01667     VERMES("generate hypo\n");
01668 #endif
01669 
01670 #ifdef USE_DFA
01671     now_noise_calced = FALSE;   
01672 #endif
01673     i = pushctr;                
01674 
01675 #ifdef CM_SEARCH
01676     
01677     cm_init(winfo);
01678 #endif
01679 
01680     
01681     for (w = 0; w < nwnum; w++) {
01682 #ifdef USE_DFA
01683       
01684       if (! (winfo->wton[nextword[w]->id] >= cate_bgn && winfo->wton[nextword[w]->id] < cate_bgn + cate_num)) {
01685         continue;
01686       }
01687 #endif
01688       new = newnode();
01689 #ifdef USE_DFA
01690       if (nextword[w]->can_insert_sp == TRUE) {
01691         
01692         
01693         
01694         if (now_noise_calced == FALSE) {
01695           
01696           
01697 
01698           fornoise.id = dfa->sp_id;
01699           now_noise = newnode();
01700           cpy_node(now_noise, now);
01701 #if 0
01702           now_noise_tmp = newnode();
01703           next_word(now, now_noise_tmp, &fornoise, param, backtrellis);
01704           scan_word(now_noise_tmp, param);
01705           for(t=0;t<peseqlen;t++) {
01706             now_noise->g[t] = max(now_noise_tmp->g[t], now->g[t]);
01707           }
01708           free_node(now_noise_tmp);
01709 #else
01710           
01711           
01712           if (looktrellis_flag) {
01713             if(!dfa_look_around(&fornoise, now, backtrellis)){
01714               free_node(now_noise);
01715               free_node(new);
01716               continue;
01717             }
01718           }
01719           
01720 
01721           
01722 
01723           
01724 
01725           next_word(now, now_noise, &fornoise, param, backtrellis);
01726           scan_word(now_noise, param);
01727           for(t=0;t<peseqlen;t++) {
01728             now_noise->g[t] = max(now_noise->g[t], now->g[t]);
01729           }
01730           
01731 
01732           
01733 
01734           now_noise->seqnum--;
01735 #endif
01736           now_noise_calced = TRUE;
01737         }
01738 
01739         
01740         
01741         if (looktrellis_flag) {
01742           if(!dfa_look_around(nextword[w], now_noise, backtrellis)){
01743             free_node(new);
01744             continue;
01745           }
01746         }
01747         
01748         
01749         
01750         
01751         next_word(now_noise, new, nextword[w], param, backtrellis);
01752         
01753       } else {
01754         
01755         
01756         
01757         if (looktrellis_flag) {
01758           if(!dfa_look_around(nextword[w], now, backtrellis)){
01759             free_node(new);
01760             continue;
01761           }
01762         }
01763         
01764         
01765         
01766         
01767         next_word(now, new, nextword[w], param, backtrellis);
01768         
01769       }
01770 #else  
01771 
01772       
01773 
01774       
01775 
01776       next_word(now, new, nextword[w], param, backtrellis);
01777 
01778 #endif 
01779 
01780       if (new->score <= LOG_ZERO) { 
01781         free_node(new);
01782         continue;
01783       }
01784         
01785       genectr++;
01786 
01787 #ifdef CM_SEARCH
01788       
01789       cm_store(new);
01790 #else 
01791       
01792       
01793 
01794       
01795       if (can_put_to_stack(new, &bottom, &stacknum, stacksize) == -1) {
01796         free_node(new);
01797         continue;
01798       }
01799       
01800 #ifdef GRAPHOUT
01801       
01802       new->lastcontext = now->prevgraph;
01803       new->prevgraph = wordgraph_assign(new->seq[new->seqnum-2], new->bestt + 1,
01804 #ifdef GRAPHOUT_PRECISE_BOUNDARY
01805 #ifdef PASS2_STRICT_IWCD
01806                                         
01807                                         new->wordend_frame[new->bestt],
01808 #else
01809                                         now->wordend_frame[new->bestt],
01810 #endif
01811 #else
01812                                         now->bestt,
01813 #endif
01814                                         new->score, prev_score,
01815                                         now->g[new->bestt+1],
01816 #ifdef GRAPHOUT_PRECISE_BOUNDARY
01817 #ifdef PASS2_STRICT_IWCD
01818                                         
01819                                         new->wordend_gscore[new->bestt],
01820 #else
01821                                         now->wordend_gscore[new->bestt],
01822 #endif
01823 #else
01824                                         now->tail_g_score,
01825 #endif
01826 #ifdef USE_NGRAM
01827                                         now->lscore,
01828 #else
01829                                         LOG_ZERO,
01830 #endif
01831 #ifdef CM_SEARCH
01832                                         new->cmscore[new->seqnum-2]
01833 #else
01834                                         LOG_ZERO
01835 #endif
01836                                         );
01837 #endif 
01838       put_to_stack(new, &start, &bottom, &stacknum, stacksize);
01839       if (debug2_flag) {
01840         j = new->seq[new->seqnum-1];
01841         j_printf("  %15s [%15s](id=%5d)(%f) [%d-%d] pushed\n",winfo->wname[j], winfo->woutput[j], j, new->score, new->estimated_next_t + 1, new->bestt);
01842       }
01843 #ifdef VISUALIZE
01844       visual2_next_word(new, now, popctr);
01845 #endif
01846       pushctr++;
01847 #endif
01848     } 
01849 
01850 #ifdef CM_SEARCH
01851     
01852     cm_sum_score();
01853     
01854     while ((new = cm_get_node()) != NULL) {
01855       cm_set_score(new);
01856 #ifdef CM_SEARCH_LIMIT
01857       if (new->cmscore[new->seqnum-1] < cm_cut_thres
01858 #ifdef CM_SEARCH_LIMIT_AFTER
01859           && finishnum > 0
01860 #endif
01861           ) {
01862         free_node(new);
01863         continue;
01864       }
01865 #endif 
01866       
01867 
01868 
01869       
01870       if (can_put_to_stack(new, &bottom, &stacknum, stacksize) == -1) {
01871         free_node(new);
01872         continue;
01873       }
01874 
01875 #ifdef GRAPHOUT
01876 
01877       
01878       new->lastcontext = now->prevgraph;
01879       new->prevgraph = wordgraph_assign(new->seq[new->seqnum-2], new->bestt + 1,
01880 #ifdef GRAPHOUT_PRECISE_BOUNDARY
01881 #ifdef PASS2_STRICT_IWCD
01882                                         new->wordend_frame[new->bestt],
01883 #else
01884                                         now->wordend_frame[new->bestt],
01885 #endif
01886 #else
01887                                         now->bestt,
01888 #endif
01889                                         new->score, prev_score,
01890                                         now->g[new->bestt+1],
01891 #ifdef GRAPHOUT_PRECISE_BOUNDARY
01892 #ifdef PASS2_STRICT_IWCD
01893                                         new->wordend_gscore[new->bestt],
01894 #else
01895                                         now->wordend_gscore[new->bestt],
01896 #endif
01897 #else
01898                                         now->tail_g_score,
01899 #endif
01900 #ifdef USE_NGRAM
01901                                         now->lscore,
01902 #else
01903                                         LOG_ZERO,
01904 #endif
01905 #ifdef CM_SEARCH
01906                                         new->cmscore[new->seqnum-2]
01907 #else
01908                                         LOG_ZERO
01909 #endif
01910                                         );
01911 #endif 
01912       
01913       put_to_stack(new, &start, &bottom, &stacknum, stacksize);
01914       if (debug2_flag) {
01915         j = new->seq[new->seqnum-1];
01916         j_printf("  %15s [%15s](id=%5d)(%f) [%d-%d] pushed\n",winfo->wname[j], winfo->woutput[j], j, new->score, new->estimated_next_t + 1, new->bestt);
01917       }
01918 #ifdef VISUALIZE
01919       visual2_next_word(new, now, popctr);
01920 #endif
01921       pushctr++;
01922     }
01923 #endif
01924 
01925     if (debug2_flag) {
01926       j_printf("%d pushed\n",pushctr-i);
01927     }
01928 #ifdef USE_DFA
01929     if (now_noise_calced == TRUE) free_node(now_noise);
01930 #endif
01931 
01932     
01933 
01934 
01935 
01936     free_node(now);
01937 
01938     if (catch_intr_flag) break;
01939     
01940   }
01941   
01942   
01943   
01944 
01945   
01946   if (!result_reorder_flag && finishnum < ncan) {
01947     result_pass2_end();
01948   }
01949   
01950   
01951 
01952   if (finishnum == 0) {         
01953     if (verbose_flag) {
01954       j_printf("got no candidates, output 1st pass result as a final result\n",finishnum);
01955     }
01956     
01957     now = newnode();
01958     for (i=0;i<pass1_wnum;i++) {
01959       now->seq[i] = pass1_wseq[pass1_wnum-1-i];
01960     }
01961     now->seqnum = pass1_wnum;
01962     now->score = pass1_score;
01963 #ifdef CONFIDENCE_MEASURE
01964     
01965 #ifdef CM_MULTIPLE_ALPHA
01966     for(j=0;j<cm_alpha_num;j++) {
01967       for(i=0;i<now->seqnum;i++) now->cmscore[i][j] = 0.0;
01968     }
01969 #else
01970     for(i=0;i<now->seqnum;i++) now->cmscore[i] = 0.0;
01971 #endif
01972 #endif 
01973     
01974     
01975     if (module_mode) {
01976       
01977       result_pass2_failed(winfo);
01978     } else {
01979       
01980       result_pass2_begin();
01981       result_pass2(now, 1, winfo);
01982       result_pass2_end();
01983     }
01984 #ifdef SP_BREAK_CURRENT_FRAME
01985     
01986     if (sp_break_last_nword_allow_override) sp_segment_set_last_nword(now);
01987 #endif
01988     
01989     if (align_result_word_flag) word_rev_align(now->seq, now->seqnum, tparam);
01990     if (align_result_phoneme_flag) phoneme_rev_align(now->seq, now->seqnum, tparam);
01991     if (align_result_state_flag) state_rev_align(now->seq, now->seqnum, tparam);
01992     free_node(now);
01993   } else {                      
01994     if (debug2_flag) {
01995       j_printf("got %d candidates\n",finishnum);
01996     }
01997     if (result_reorder_flag) {
01998       
01999 
02000       
02001 
02002       if (debug2_flag) VERMES("done\n");
02003       result_reorder_and_output(&r_start, &r_bottom, &r_stacknum, output_hypo_maxnum, winfo);
02004     }
02005   }
02006 
02007   
02008   
02009   if (verbose_flag) {
02010     j_printf("%d generated, %d pushed, %d nodes popped in %d\n",
02011              genectr, pushctr, popctr, backtrellis->framelen);
02012     j_flushprint();
02013 #ifdef GRAPHOUT_DYNAMIC
02014     j_printf("graph: %d merged", dynamic_merged_num);
02015 #ifdef GRAPHOUT_SEARCH
02016     j_printf(", %d terminated", terminate_search_num);
02017 #endif
02018     j_printf(" in %d\n", popctr);
02019 #endif
02020   }
02021     
02022 #ifdef GRAPHOUT
02023   printf("------ wordgraph post-processing begin ------\n");
02024   
02025   
02026   wordgraph_purge_leaf_nodes(&wordgraph_root);
02027   
02028 
02029 
02030 
02031   wordgraph_adjust_boundary(&wordgraph_root);
02032   
02033   wordgraph_compaction_thesame(&wordgraph_root);
02034   
02035   wordgraph_compaction_exacttime(&wordgraph_root);
02036   
02037   wordgraph_compaction_neighbor(&wordgraph_root);
02038   
02039   wordgraph_sort_and_annotate_id(&wordgraph_root);
02040   
02041   wordgraph_check_coherence(wordgraph_root);
02042   printf("------ wordgraph post-processing end ------\n");
02043   
02044   wordgraph_dump(wordgraph_root);
02045   
02046   result_graph(wordgraph_root, winfo);
02047   
02048   wordgraph_clean(&wordgraph_root);
02049 #endif
02050   
02051   
02052   
02053   nw_free(nextword, nwroot);
02054   free_all_nodes(start);
02055   free_wordtrellis();
02056 #ifdef SCAN_BEAM
02057   free(framemaxscore);
02058 #endif
02059 }