Julius: julius/search_bestfirst

00001 
00046 /*
00047  * Copyright (c) 1991-2006 Kawahara Lab., Kyoto University
00048  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00049  * Copyright (c) 2005-2006 Julius project team, Nagoya Institute of Technology, Nagoya Institute of Technology
00050  * All rights reserved
00051  */
00052 
00053 #include <julius.h>
00054 
00055 /* declaration of local functions */
00056 static NODE *get_best_from_stack(NODE **start, int *stacknum);
00057 static int put_to_stack(NODE *new, NODE **start, NODE **bottom, int *stacknum, int stacksize);
00058 static void put_all_in_stack(NODE **start, int *stacknum);
00059 static void free_all_nodes(NODE *node);
00060 static void put_hypo_woutput(NODE *hypo);
00061 static void put_hypo_wname(NODE *hypo);
00062 
00063 /**********************************************************************/
00064 /********** ¼¡Ã±¸ì³ÊÇ¼ÎÎ°è¤Î³ä¤êÅö¤Æ          *************************/
00065 /********** allocate memory for nextword data *************************/
00066 /**********************************************************************/
00067 
00086 static NEXTWORD **
00087 nw_malloc(int *maxlen, NEXTWORD **root)
00088 {
00089   NEXTWORD *nwtmp;
00090   NEXTWORD **nw;
00091   int i;
00092   int max;
00093 
00094   /* the initial maximum number of nextwords is the size of vocabulary */
00095   max = winfo->num;
00096   
00097   nw = (NEXTWORD **)mymalloc(max * sizeof(NEXTWORD *));
00098   nwtmp = (NEXTWORD *)mymalloc(max * sizeof(NEXTWORD));
00099   for (i=0;i<max; i++) {
00100     nw[i] = &(nwtmp[i]);
00101   }
00102   *maxlen = max;
00103   *root = nwtmp;
00104   return nw;
00105 }
00106 
00122 static void
00123 nw_free(NEXTWORD **nw, NEXTWORD *root)
00124 {
00125   free(root);
00126   free(nw);
00127 }
00128 
00129 #ifdef USE_DFA
00130 
00166 static NEXTWORD **
00167 nw_expand(NEXTWORD **nwold, int *maxlen, NEXTWORD **root)
00168 {
00169   NEXTWORD *nwtmp;
00170   NEXTWORD **nw;
00171   int i;
00172   int nwmaxlen;
00173 
00174   nwmaxlen = *maxlen + winfo->num;
00175 
00176   nwtmp = (NEXTWORD *)myrealloc(*root, nwmaxlen * sizeof(NEXTWORD));
00177   nw = (NEXTWORD **)myrealloc(nwold, nwmaxlen * sizeof(NEXTWORD *));
00178   nw[0] = nwtmp;
00179   for (i=1;i<nwmaxlen; i++) {
00180     nw[i] = &(nwtmp[i]);
00181   }
00182   *maxlen = nwmaxlen;
00183   *root = nwtmp;
00184   return nw;
00185 }
00186 #endif
00187 
00188 
00189 /**********************************************************************/
00190 /********** ²¾Àâ¥¹¥¿¥Ã¥¯¤ÎÁàºî         ********************************/
00191 /********** Hypothesis stack operation ********************************/
00192 /**********************************************************************/
00193 
00194 /* Ê¸²¾Àâ¥¹¥¿¥Ã¥¯¤Ï double linked list ¤Ç¤¢¤ë¡¥¤Þ¤¿¾ï¤Ë¥¹¥³¥¢½ç¤¬ÊÝ¤¿¤ì¤ë */
00195 /* The hypothesis stack is double-linked list, and holds entries in score sorted order */
00196 
00216 static NODE *
00217 get_best_from_stack(NODE **start, int *stacknum)
00218 {
00219   NODE *tmp;
00220 
00221   /* return top */
00222   tmp=(*start);
00223   if ((*start)!=NULL) {         /* delete it from stack */
00224     (*start)=(*start)->next;
00225     if ((*start)!=NULL) (*start)->prev=NULL;
00226     (*stacknum)--;
00227     return(tmp);
00228   }
00229   else {
00230     return(NULL);
00231   }
00232 }
00233 
00260 static int
00261 can_put_to_stack(NODE *new, NODE **bottom, int *stacknum, int stacksize)
00262 {
00263   /* stack size check */
00264   if ((*stacknum + 1) > stacksize && (*bottom)->score >= new->score) {
00265     /* new node is below the bottom: discard it */
00266     return(-1);
00267   }
00268   return(0);
00269 }
00270 
00299 static int
00300 put_to_stack(NODE *new, NODE **start, NODE **bottom, int *stacknum, int stacksize)
00301 {
00302   NODE *tmp;
00303 
00304   /* stack size is going to increase... */
00305   (*stacknum)++;
00306 
00307   /* stack size check */
00308   if ((*stacknum)>stacksize) {
00309     /* stack size overflow */
00310     (*stacknum)--;
00311     if ((*bottom)->score < new->score) {
00312       /* new node will be inserted in the stack: free the bottom */
00313       tmp=(*bottom);
00314       (*bottom)->prev->next=NULL;
00315       (*bottom)=(*bottom)->prev;
00316       free_node(tmp);
00317     } else {
00318       /* new node is below the bottom: discard it */
00319       free_node(new);
00320       return(-1);
00321     }
00322   }
00323 
00324   /* insert new node on edge */
00325   if ((*start)==NULL) {         /* no node in stack */
00326     /* new node is the only node */
00327     (*start)=new;
00328     (*bottom)=new;
00329     new->next=NULL;
00330     new->prev=NULL;
00331     return(0);
00332   }
00333   if ((*start)->score <= new->score) {
00334     /* insert on the top */
00335     new->next = (*start);
00336     new->next->prev = new;
00337     (*start)=new;
00338     new->prev=NULL;
00339     return(0);
00340   }
00341   if ((*bottom)->score >= new->score) {
00342     /* insert on the bottom */
00343     new->prev = (*bottom);
00344     new->prev->next = new;
00345     (*bottom)=new;
00346     new->next=NULL;
00347     return(0);
00348   }
00349     
00350   /* now the new node is between (*start) and (*bottom) */
00351   if (((*start)->score + (*bottom)->score) / 2 > new->score) {
00352     /* search from bottom */
00353     tmp=(*bottom);
00354     while(tmp->score < new->score) tmp=tmp->prev;
00355     new->prev=tmp;
00356     new->next=tmp->next;
00357     tmp->next->prev=new;
00358     tmp->next=new;
00359   } else {
00360     /* search from start */
00361     tmp=(*start);
00362     while(tmp->score > new->score) tmp=tmp->next;
00363     new->next=tmp;
00364     new->prev=tmp->prev;
00365     tmp->prev->next=new;
00366     tmp->prev=new;
00367   }
00368   return(0);
00369 }
00370 
00385 static void
00386 put_all_in_stack(NODE **start, int *stacknum)
00387 {
00388   NODE *ntmp;
00389   
00390   j_printf("stack remains::\n");
00391   while ((ntmp = get_best_from_stack(start, stacknum)) != NULL) {
00392     j_printf("%3d: s=%f ",*stacknum, ntmp->score);
00393     put_hypo_woutput(ntmp);
00394     free_node(ntmp);
00395   }
00396 }
00397 
00410 static void
00411 free_all_nodes(NODE *start)
00412 {
00413   NODE *tmp;
00414   NODE *next;
00415 
00416   tmp=start;
00417   while(tmp) {
00418     next=tmp->next;
00419     free_node(tmp);
00420     tmp=next;
00421   }
00422 }
00423 
00424 
00425 #ifdef CONFIDENCE_MEASURE
00426 
00427 /**********************************************************************/
00428 /********** Ã±¸ì¿®ÍêÅÙ¤Î·×»» ******************************************/
00429 /********** Confidence scoring ****************************************/
00430 /**********************************************************************/
00431 /* scoring using multiple cmalpha value will be enabled by CM_MULTIPLE_ALPHA */
00432 
00433 #ifdef CM_MULTIPLE_ALPHA
00434 static LOGPROB *cmsumlist = NULL; 
00435 #endif
00436 
00437 
00438 #ifdef CM_SEARCH
00439 /**************************************************************/
00440 /**** CM computation method 1(default):                  ******/
00441 /****   - Use local posterior probabilities while search ******/
00442 /****   - Obtain CM at hypothesis expansion time         ******/
00443 /**************************************************************/
00444 
00445 static LOGPROB cm_tmpbestscore; 
00446 #ifndef CM_MULTIPLE_ALPHA
00447 static LOGPROB cm_tmpsum;       
00448 #endif
00449 
00450 /* local stack for CM */
00451 static int l_stacksize;         
00452 static int l_stacknum;          
00453 static NODE *l_start = NULL;    
00454 static NODE *l_bottom = NULL;   
00455 
00469 static void
00470 cm_init(WORD_INFO *winfo)
00471 {
00472   l_stacksize = winfo->num;
00473   l_start = l_bottom = NULL;
00474   l_stacknum = 0;
00475 #ifdef CM_MULTIPLE_ALPHA
00476   if (cmsumlist == NULL) {      /* allocate if not yet */
00477     cmsumlist = (LOGPROB *)mymalloc(sizeof(LOGPROB) * cm_alpha_num);
00478   }
00479 #endif    
00480 }
00481 
00494 static void
00495 cm_store(NODE *new)
00496 {
00497   /* store the generated hypo into local stack */
00498   put_to_stack(new, &l_start, &l_bottom, &l_stacknum, l_stacksize);
00499 }
00500 
00512 static void
00513 cm_sum_score()
00514 {
00515   NODE *node;
00516   LOGPROB sum;
00517 #ifdef CM_MULTIPLE_ALPHA
00518   LOGPROB cm_alpha;
00519   int j;
00520 #endif
00521 
00522   if (l_start == NULL) return;  /* no hypo */
00523   cm_tmpbestscore = l_start->score; /* best hypo is at the top of the stack */
00524 
00525 #ifdef CM_MULTIPLE_ALPHA
00526   for (j = 0, cm_alpha = cm_alpha_bgn; cm_alpha <= cm_alpha_end; cm_alpha += cm_alpha_step) {
00527 #endif
00528     sum = 0.0;
00529     for(node = l_start; node; node = node->next) {
00530       sum += pow(10, cm_alpha * (node->score - cm_tmpbestscore));
00531     }
00532 #ifdef CM_MULTIPLE_ALPHA
00533     cmsumlist[j++] = sum;       /* store sums for each alpha coef. */
00534 #else
00535     cm_tmpsum = sum;            /* store sum */
00536 #endif
00537 
00538 #ifdef CM_MULTIPLE_ALPHA
00539   }
00540 #endif
00541 }
00542 
00543 /* compute CM scores of the new word in the local stack from the sum above */
00558 static void
00559 cm_set_score(NODE *node)
00560 {
00561 #ifdef CM_MULTIPLE_ALPHA
00562   int j;
00563   LOGPROB cm_alpha;
00564 #endif
00565 
00566 #ifdef CM_MULTIPLE_ALPHA
00567   for (j = 0, cm_alpha = cm_alpha_bgn; cm_alpha <= cm_alpha_end; cm_alpha += cm_alpha_step) {
00568     node->cmscore[node->seqnum-1][j] = pow(10, cm_alpha * (node->score - cm_tmpbestscore)) / cmsumlist[j];
00569     j++;
00570   }
00571 #else
00572   node->cmscore[node->seqnum-1] = pow(10, cm_alpha * (node->score - cm_tmpbestscore)) / cm_tmpsum;
00573 #endif
00574 }
00575 
00588 static NODE *
00589 cm_get_node()
00590 {
00591   return(get_best_from_stack(&l_start, &l_stacknum));
00592 }
00593 
00594 #endif /* CM_SEARCH */
00595 
00596 #ifdef CM_NBEST
00597 /*****************************************************************/
00598 /**** CM computation method 2: conventional N-best scoring *******/
00599 /**** NOTE: enough N-best should be computed (-n 10 ~ -n 100) ****/
00600 /*****************************************************************/
00601 
00602 /* CM of each word is calculated after all search had finished */
00603 
00604 static LOGPROB *sentcm = NULL;  
00605 static LOGPROB *wordcm = NULL;  
00606 static int sentnum;             
00607 
00623 static void
00624 cm_compute_from_nbest(NODE *start, int stacknum)
00625 {
00626   NODE *node;
00627   LOGPROB bestscore, sum, s;
00628   WORD_ID w;
00629   int i;
00630 #ifdef CM_MULTIPLE_ALPHA
00631   LOGPROB cm_alpha;
00632   int j;
00633 #endif
00634 
00635   /* prepare buffer */
00636 #ifdef CM_MULTIPLE_ALPHA
00637   if (cmsumlist == NULL) {
00638     cmsumlist = (LOGPROB *)mymalloc(sizeof(LOGPROB) * cm_alpha_num);
00639   }
00640 #endif    
00641   if (sentcm == NULL) {         /* not allocated yet */
00642     sentcm = (LOGPROB *)mymalloc(sizeof(LOGPROB)*stacknum);
00643     sentnum = stacknum;
00644   } else if (sentnum < stacknum) { /* need expanded */
00645     sentcm = (LOGPROB *)myrealloc(sentcm, sizeof(LOGPROB)*stacknum);
00646     sentnum = stacknum;
00647   }
00648   if (wordcm == NULL) {
00649     wordcm = (LOGPROB *)mymalloc(sizeof(LOGPROB) * winfo->num);
00650   }
00651 #ifdef CM_MULTIPLE_ALPHA
00652   for (j = 0, cm_alpha = cm_alpha_bgn; cm_alpha <= cm_alpha_end; cm_alpha += cm_alpha_step) {
00653 #endif
00654     /* clear whole word cm buffer */
00655     for(w=0;w<winfo->num;w++) {
00656       wordcm[w] = 0.0;
00657     }
00658     /* get best score */
00659     bestscore = start->score;
00660     /* compute sum score of all hypothesis */
00661     sum = 0.0;
00662     for (node = start; node != NULL; node = node->next) {
00663       sum += pow(10, cm_alpha * (node->score - bestscore));
00664     }
00665     /* compute sentence posteriori probabilities */
00666     i = 0;
00667     for (node = start; node != NULL; node = node->next) {
00668       sentcm[i] = pow(10, cm_alpha * (node->score - bestscore)) / sum;
00669       i++;
00670     }
00671     /* compute word posteriori probabilities */
00672     i = 0;
00673     for (node = start; node != NULL; node = node->next) {
00674       for (w=0;w<node->seqnum;w++) {
00675         wordcm[node->seq[w]] += sentcm[i];
00676       }
00677       i++;
00678     }
00679     /* store the probabilities to node */
00680     for (node = start; node != NULL; node = node->next) {
00681       for (w=0;w<node->seqnum;w++) {
00682 #ifdef CM_MULTIPLE_ALPHA
00683         node->cmscore[w][j] = wordcm[node->seq[w]];
00684 #else   
00685         node->cmscore[w] = wordcm[node->seq[w]];
00686 #endif
00687       }
00688     }
00689 #ifdef CM_MULTIPLE_ALPHA
00690     j++;
00691   }
00692 #endif
00693 }
00694 
00695 #endif /* CM_NBEST */
00696 
00697 #endif /* CONFIDENCE_MEASURE */
00698 
00699 
00700 /**********************************************************************/
00701 /********** Enveloped best-first search *******************************/
00702 /**********************************************************************/
00703 
00704 /*
00705  * 1. Word envelope
00706  *
00707  * °ì¼ï¤Î²¾Àâ¥Ó¡¼¥àÉý¤òÀßÄê: Å¸³«¸µ¤È¤Ê¤Ã¤¿²¾Àâ¤Î¿ô¤ò¤½¤Î²¾ÀâÄ¹(Ã±¸ì¿ô)
00708  * ¤´¤È¤Ë¥«¥¦¥ó¥È¤¹¤ë¡¥°ìÄê¿ô¤ò±Û¤¨¤¿¤é¤½¤ì¤è¤êÃ»¤¤²¾Àâ¤Ï°Ê¸åÅ¸³«¤·¤Ê¤¤¡¥
00709  * 
00710  * Introduce a kind of beam width to search tree: count the number of
00711  * popped hypotheses per the depth of the hypotheses, and when a count
00712  * in a certain depth reaches the threshold, all hypotheses shorter than
00713  * the depth will be dropped from candidates.
00714  *
00715  */
00716 
00717 static int hypo_len_count[MAXSEQNUM+1]; 
00718 static int maximum_filled_length; 
00719 
00730 static void
00731 wb_init()
00732 {
00733   int i;
00734   for(i=0;i<=MAXSEQNUM;i++) hypo_len_count[i] = 0;
00735   maximum_filled_length = -1;
00736 }
00737 
00760 static boolean
00761 wb_ok(NODE *now)
00762 {
00763   if (now->seqnum <= maximum_filled_length) {
00764     /* word expansion is not allowed because a word expansion count
00765        at deeper level has already reached the limit */
00766     if (debug2_flag) {
00767       j_printf("popped but pruned by word envelope: ");
00768       put_hypo_woutput(now);
00769     }
00770     return FALSE;
00771   } else {
00772     /* word expansion is possible.  Increment the word expansion count
00773        of the given depth */
00774     hypo_len_count[now->seqnum]++;
00775     if (hypo_len_count[now->seqnum] > enveloped_bestfirst_width) {
00776       /* the word expansion count of this level has reached the limit, so
00777          set the beam-filled depth to this level to inhibit further
00778          expansion of shorter hypotheses. */
00779       if (maximum_filled_length < now->seqnum) maximum_filled_length = now->seqnum;
00780     }
00781     return TRUE;
00782   }
00783 }
00784 
00785 #ifdef SCAN_BEAM
00786 /*
00787  * 2. Score envelope
00788  *
00789  * Viterbi·×»»ÎÌ¤Îºï¸º: ÆþÎÏ¥Õ¥ì¡¼¥à¤´¤È¤ÎºÇÂçÌàÅÙ (score envelope) ¤ò
00790  * Á´²¾Àâ¤Ë¤ï¤¿¤Ã¤ÆµÏ¿¤·¤Æ¤ª¤¯¡¥²¾Àâ¤ÎÁ°¸þ¤ÌàÅÙ·×»»»þ¤Ë¡¤¤½¤Î envelope
00791  * ¤«¤é°ìÄêÉý°Ê¾å¥¹¥³¥¢¤¬²¼²ó¤ë¤È¤¡¤Viterbi ¥Ñ¥¹¤Î±é»»¤òÃæÃÇ¤¹¤ë¡¥
00792  *
00793  * ¤³¤³¤Ç¤Ï¡¤¼è¤ê½Ð¤·¤¿²¾Àâ¤«¤é¥Õ¥ì¡¼¥à¤´¤È¤Î score envelope ¤ò¹¹¿·¤¹¤ë
00794  * ÉôÊ¬¤¬µ½Ò¤µ¤ì¤Æ¤¤¤ë¡¥Envelope ¤ò¹ÍÎ¸¤·¤¿ Viterbi ·×»»¤Î¼ÂºÝ¤Ï
00795  * scan_word() ¤ò»²¾È¤Î¤³¤È¡¥
00796  *
00797  * Reduce computation cost of hypothesis Viterbi processing by setting a
00798  * "score envelope" that holds the maximum scores at every frames
00799  * throughout the expanded hypotheses.  When calculating Viterbi path
00800  * on HMM trellis for updating score of popped hypothesis, Viterbi paths
00801  * that goes below a certain range from the score envelope are dropped.
00802  *
00803  * These functions are for updating the score envelope according to the
00804  * popped hypothesis.  For actual Viterbi process with score envelope, 
00805  * see scan_word().
00806  *
00807  */
00808 
00809 /* the score envelope is kept in framemaxscore[0..framelen-1] */
00810 
00824 static void
00825 envl_init(int framenum)
00826 {
00827   int i;
00828   for(i=0;i<framenum;i++) framemaxscore[i] = LOG_ZERO;
00829 }
00830 
00845 static void
00846 envl_update(NODE *n, int framenum)
00847 {
00848   int t;
00849   for(t=framenum-1;t>=0;t--) {
00850     if (framemaxscore[t] < n->g[t]) framemaxscore[t] = n->g[t];
00851   }
00852 }
00853 #endif /* SCAN_BEAM */
00854 
00855 
00856 #ifdef USE_DFA
00857 
00861 static NEXTWORD fornoise;
00862 #endif
00863 
00864 
00865 #ifdef SP_BREAK_CURRENT_FRAME
00866 /**********************************************************************/
00867 /********** Short pause segmentation **********************************/
00868 /**********************************************************************/
00869 
00887 void
00888 sp_segment_set_last_nword(NODE *hypo)
00889 {
00890   int i;
00891   WORD_ID w;
00892   for(i=0;i<hypo->seqnum;i++) {
00893     w = hypo->seq[i];
00894     if (w != sp_break_last_word
00895        && !is_sil(w, winfo, hmminfo)
00896        && !winfo->is_transparent[w]
00897        ) {
00898       sp_break_last_nword = w;
00899       break;
00900     }
00901   }
00902 #ifdef SP_BREAK_DEBUG
00903   printf("sp_break_last_nword=%d[%s]\n", sp_break_last_nword, winfo->woutput[sp_break_last_nword]);
00904 #endif
00905 }
00906 #endif
00907 
00908 
00909 /**********************************************************************/
00910 /********* Debug output of hypothesis while search ********************/
00911 /**********************************************************************/
00912 
00913 /* ÄÌ¾ï¤Î½ÐÎÏ´Ø¿ô¤Ï result_*.c ¤Ë¤¢¤ë */
00914 /* normal output functions are in result_*.c */
00915 
00916 static HTK_Param *tparam;       
00917 
00918 /* output word sequence of a hypothesis */
00931 static void
00932 put_hypo_woutput(NODE *hypo)
00933 {
00934   int i,w;
00935 
00936   if (hypo != NULL) {
00937     for (i=hypo->seqnum-1;i>=0;i--) {
00938       w = hypo->seq[i];
00939       j_printf(" %s",winfo->woutput[w]);
00940     }
00941   }
00942   j_printf("\n");  
00943 }
00944 
00957 static void
00958 put_hypo_wname(NODE *hypo)
00959 {
00960   int i,w;
00961 
00962   if (hypo != NULL) {
00963     for (i=hypo->seqnum-1;i>=0;i--) {
00964       w = hypo->seq[i];
00965       j_printf(" %s",winfo->wname[w]);
00966     }
00967   }
00968   j_printf("\n");  
00969 }
00970 
00971 /**********************************************************************/
00972 /******** Output top 'ncan' hypotheses in a stack and free all ********/
00973 /**********************************************************************/
00974 
01010 static void
01011 result_reorder_and_output(NODE **r_start, NODE **r_bottom, int *r_stacknum, int ncan, WORD_INFO *winfo)
01012 {
01013   NODE *now;
01014   int num;
01015 
01016 #ifdef CM_NBEST 
01017   /* compute CM from the N-best sentence candidates */
01018   cm_compute_from_nbest(*r_start, *r_stacknum);
01019 #endif
01020   num = 0;
01021   result_pass2_begin();
01022   while ((now = get_best_from_stack(r_start,r_stacknum)) != NULL && num < ncan) {
01023     num++;
01024     /* output result */
01025     result_pass2(now, num, winfo);
01026 #ifdef VISUALIZE
01027     visual2_best(now, winfo);
01028 #endif
01029 #ifdef SP_BREAK_CURRENT_FRAME
01030     /* set the last context-aware word for next recognition */
01031     if (sp_break_last_nword_allow_override && num == 1) sp_segment_set_last_nword(now);
01032 #endif
01033     /* do forced alignment if needed */
01034     if (align_result_word_flag) word_rev_align(now->seq, now->seqnum, tparam);
01035     if (align_result_phoneme_flag) phoneme_rev_align(now->seq, now->seqnum, tparam);
01036     if (align_result_state_flag) state_rev_align(now->seq, now->seqnum, tparam);
01037     free_node(now);
01038   }
01039   result_pass2_end();
01040   /* free the rest */
01041   if (now != NULL) free_node(now);
01042   free_all_nodes(*r_start);
01043 }  
01044 
01045 
01046 /**********************************************************************/
01047 /********* Main stack decoding function *******************************/
01048 /**********************************************************************/
01049 
01050 /* subfunctions called by this main function:
01051      for word prediction: ngram_decode.c or dfa_decode.c
01052      for hypothesis expansion and forward viterbi: search_bestfirst_v{1,2}.c
01053                                   (v1: for efficiency   v2: for accuracy)
01054 */
01055 
01082 void
01083 wchmm_fbs(HTK_Param *param, BACKTRELLIS *backtrellis, LOGPROB backmax, int stacksize, int ncan, int maxhypo, int cate_bgn, int cate_num)
01084 {
01085   /* Ê¸²¾Àâ¥¹¥¿¥Ã¥¯ */
01086   /* hypothesis stack (double-linked list) */
01087   int stacknum;                 /* current stack size */
01088   NODE *start = NULL;           /* top node */
01089   NODE *bottom = NULL;          /* bottom node */
01090 
01091   /* Ç§¼±·ë²Ì³ÊÇ¼¥¹¥¿¥Ã¥¯(·ë²Ì¤Ï¤³¤³¤Ø¤¤¤Ã¤¿¤ó½¸¤á¤é¤ì¤ë) */
01092   /* result sentence stack (found results will be stored here and then re-ordered) */
01093   int r_stacksize = ncan;
01094   int r_stacknum;
01095   NODE *r_start = NULL;
01096   NODE *r_bottom = NULL;
01097 
01098   /* ¼ï¡¹¤Î¥«¥¦¥ó¥¿¡¼ */
01099   /* monitoring counters */
01100   int popctr = 0;               /* num of popped hypotheses from stack */
01101   int genectr = 0;              /* num of generated hypotheses */
01102   int pushctr = 0;              /* num of hypotheses actually pushed to stack */
01103   int finishnum = 0;            /* num of found sentence hypothesis */
01104 
01105   /* ¥ï¡¼¥¯¥¨¥ê¥¢ */
01106   /* work area */
01107   NEXTWORD **nextword, *nwroot; /* buffer to store predicted words */
01108   int maxnwnum;                 /* current allocated number of words in nextword */
01109   int nwnum;                    /* current number of words in nextword */
01110   NODE *now, *new;              /* popped current hypo., expanded new hypo. */
01111 #ifdef USE_DFA
01112   NODE *now_noise;             /* for inserting/deleting noise word */
01113   boolean now_noise_calced;
01114   int t;
01115 #endif
01116   int w,i,j;
01117   LOGPROB last_score = LOG_ZERO;
01118 #ifdef GRAPHOUT
01119   LOGPROB prev_score;
01120   WordGraph *wordgraph_root = NULL;
01121   boolean merged_p;
01122 #ifdef GRAPHOUT_DYNAMIC
01123   int dynamic_merged_num = 0;
01124   WordGraph *wtmp;
01125 #endif
01126 #ifdef GRAPHOUT_SEARCH
01127   int terminate_search_num = 0;
01128 #endif
01129 #endif
01130 
01131   /* copy parameter pointer beforehand for forced alignment */
01132   if (align_result_word_flag || align_result_phoneme_flag || align_result_state_flag) tparam = param;
01133 
01134 #ifdef USE_DFA
01135   if (debug2_flag) j_printf("limiting category: %d-%d\n", cate_bgn, cate_bgn + cate_num-1);
01136 #endif
01137 
01138   /*
01139    * ½é´ü²½
01140    * Initialize
01141    */
01142   peseqlen = backtrellis->framelen; /* (just for quick access) */
01143   /* Í½Â¬Ã±¸ì³ÊÇ¼ÎÎ°è¤ò³ÎÊÝ */
01144   /* malloc area for word prediction */
01145   nextword = nw_malloc(&maxnwnum, &nwroot);
01146   /* Á°¸þ¤¥¹¥³¥¢·×»»ÍÑ¤ÎÎÎ°è¤ò³ÎÊÝ */
01147   /* malloc are for forward viterbi (scan_word()) */
01148   malloc_wordtrellis();         /* scan_wordÍÑÎÎ°è */
01149   /* ²¾Àâ¥¹¥¿¥Ã¥¯½é´ü²½ */
01150   /* initialize hypothesis stack */
01151   start = bottom = NULL;
01152   stacknum = 0;
01153   /* ·ë²Ì³ÊÇ¼¥¹¥¿¥Ã¥¯½é´ü²½ */
01154   /* initialize result stack */
01155   if (result_reorder_flag) {
01156     r_start = r_bottom = NULL;
01157     r_stacknum = 0;
01158   }
01159   
01160 #ifdef CM_SEARCH
01161   /* initialize local stack */
01162   cm_init(winfo);
01163 #endif
01164 #ifdef SCAN_BEAM
01165   /* prepare and initialize score envelope */
01166   framemaxscore = (LOGPROB *)mymalloc(sizeof(LOGPROB)*peseqlen);
01167   envl_init(peseqlen);
01168 #endif /* SCAN_BEAM */
01169   
01170   /* ¥¨¥ó¥Ù¥í¡¼¥×Ãµº÷ÍÑ¤ÎÃ±¸ìÄ¹ÊÌÅ¸³«¿ô¥«¥¦¥ó¥¿¤ò½é´ü²½ */
01171   /* initialize counters for envelope search */
01172   if (enveloped_bestfirst_width >= 0) wb_init();
01173 
01174   if (verbose_flag) j_printf("samplenum=%d\n", peseqlen);
01175   if (result_reorder_flag) {
01176     if (debug2_flag) VERMES("getting %d candidates...\n",ncan);
01177   }
01178 
01179 #ifdef VISUALIZE
01180   visual2_init(maxhypo);
01181 #endif
01182 
01183   /* 
01184    * ½é´ü²¾Àâ(1Ã±¸ì¤«¤é¤Ê¤ë)¤òÆÀ, Ê¸²¾Àâ¥¹¥¿¥Ã¥¯¤Ë¤¤¤ì¤ë
01185    * get a set of initial words from LM function and push them as initial
01186    * hypotheses
01187    */
01188   /* the first words will be stored in nextword[] */
01189 #ifdef USE_NGRAM
01190   nwnum = ngram_firstwords(nextword, peseqlen, maxnwnum, winfo, backtrellis);
01191 #else  /* USE_DFA */
01192   nwnum = dfa_firstwords(nextword, peseqlen, maxnwnum, dfa);
01193   /* °î¤ì¤¿¤é¡¢¥Ð¥Ã¥Õ¥¡¤òÁý¤ä¤·¤ÆºÆ¥Á¥ã¥ì¥ó¥¸ */
01194   /* If the number of nextwords can exceed the buffer size, expand the
01195      nextword data area */
01196   while (nwnum < 0) {
01197     nextword = nw_expand(nextword, &maxnwnum, &nwroot);
01198     nwnum = dfa_firstwords(nextword, peseqlen, maxnwnum, dfa);
01199   }
01200 #endif
01201 
01202   if (debug2_flag) {
01203     j_printf("%d words in wordtrellis as first hypothesis\n", nwnum);
01204   }
01205   
01206   /* store them to stack */
01207   for (w = 0; w < nwnum; w++) {
01208 #ifdef USE_DFA
01209     /* limit word hypothesis */
01210     if (! (winfo->wton[nextword[w]->id] >= cate_bgn && winfo->wton[nextword[w]->id] < cate_bgn + cate_num)) {
01211       continue;
01212     }
01213 #endif
01214     /* generate new hypothesis */
01215     new = newnode();
01216     start_word(new,nextword[w],param,backtrellis);
01217 #ifdef USE_DFA
01218     if (new->score <= LOG_ZERO) { /* not on trellis */
01219       free_node(new);
01220       continue;
01221     }
01222 #endif
01223     genectr++;
01224 #ifdef CM_SEARCH
01225     /* store the local hypothesis to temporal stack */
01226     cm_store(new);
01227 #else 
01228     /* put to stack */
01229     if (put_to_stack(new, &start, &bottom, &stacknum, stacksize) != -1) {
01230 #ifdef VISUALIZE
01231       visual2_next_word(new, NULL, popctr);
01232 #endif
01233 #ifdef GRAPHOUT
01234       new->prevgraph = NULL;
01235       new->lastcontext = NULL;
01236 #endif
01237       pushctr++;
01238     }
01239 #endif
01240   }
01241 #ifdef CM_SEARCH
01242   /* compute score sum */
01243   cm_sum_score();
01244   /* compute CM and put the generated hypotheses to global stack */
01245   while ((new = cm_get_node()) != NULL) {
01246     cm_set_score(new);
01247 #ifdef CM_SEARCH_LIMIT
01248     if (new->cmscore[new->seqnum-1] < cm_cut_thres
01249 #ifdef CM_SEARCH_LIMIT_AFTER
01250         && finishnum > 0
01251 #endif
01252         ) {
01253       free_node(new);
01254       continue;
01255     }
01256 #endif /* CM_SEARCH_LIMIT */
01257     
01258     if (put_to_stack(new, &start, &bottom, &stacknum, stacksize) != -1) {
01259 #ifdef VISUALIZE
01260       visual2_next_word(new, NULL, popctr);
01261 #endif
01262 #ifdef GRAPHOUT
01263       new->prevgraph = NULL;
01264       new->lastcontext = NULL;
01265 #endif
01266       pushctr++;
01267     }
01268   }
01269 #endif
01270 
01271   if (debug2_flag) {
01272     j_printf("%d pushed\n", pushctr);
01273   }
01274   
01275   /********************/
01276   /* main search loop */
01277   /********************/
01278 
01279   for (;;) {
01280 
01281     if (module_mode) {
01282       /* if terminate signal has been received, cancel this input */
01283       if (module_wants_terminate()) {
01284         j_printf("terminated\n");
01285         break;
01286       }
01287     }
01288     
01289     /* 
01290      * ²¾Àâ¥¹¥¿¥Ã¥¯¤«¤éºÇ¤â¥¹¥³¥¢¤Î¹â¤¤²¾Àâ¤ò¼è¤ê½Ð¤¹
01291      * pop the top hypothesis from stack
01292      */
01293 #ifdef DEBUG
01294     VERMES("get one hypothesis\n");
01295 #endif
01296     now = get_best_from_stack(&start,&stacknum);
01297     if (now == NULL) {  /* stack empty ---> Ãµº÷½ªÎ»*/
01298       j_printf("stack empty, search terminate now\n");
01299       j_printf("%d sentences have found\n", finishnum);
01300       break;
01301     }
01302     /* (bogus score check) */
01303     if (now->score <= LOG_ZERO) {
01304       free_node(now);
01305       continue;
01306     }
01307 
01308 #ifdef GRAPHOUT
01309     /* Ã±¸ì¥°¥é¥ÕÍÑ¤Ë pop ²¾Àâ¤Î f ¥¹¥³¥¢¤ò°ì»þÊÝÂ¸ */
01310     prev_score = now->score;
01311 #endif
01312 
01313     /* word envelope ¥Á¥§¥Ã¥¯ */
01314     /* consult word envelope */
01315     if (enveloped_bestfirst_width >= 0) {
01316       if (!wb_ok(now)) {
01317         /* ¤³¤Î²¾ÀâÄ¹¤Ë¤ª¤±¤ëÅ¸³«¸µ²¾Àâ¿ô¤ÎÎß·×¿ô¤Ï´û¤ËïçÃÍ¤ò±Û¤¨¤Æ¤¤¤ë¡¥
01318            ¤½¤Î¤¿¤á¡¤¤³¤Î²¾Àâ¤Ï¼Î¤Æ¤ë¡¥*/
01319         /* the number of popped hypotheses at the length already
01320            reaches its limit, so the current popped hypothesis should
01321            be discarded here with no expansion */
01322         free_node(now);
01323         continue;
01324       }
01325     }
01326     
01327 #ifdef CM_SEARCH_LIMIT_POP
01328     if (now->cmscore[now->seqnum-1] < cm_cut_thres_pop) {
01329       free_node(now);
01330       continue;
01331     }
01332 #endif /* CM_SEARCH_LIMIT_POP */
01333 
01334     popctr++;
01335 
01336     /* (for debug) ¼è¤ê½Ð¤·¤¿²¾Àâ¤È¤½¤Î¥¹¥³¥¢¤ò½ÐÎÏ */
01337     /*             output information of the popped hypothesis to stdout */
01338     if (debug2_flag) {
01339       j_printf("--- pop %d:\n", popctr);
01340       j_printf("  "); put_hypo_woutput(now);
01341       j_printf("  "); put_hypo_wname(now);
01342       j_printf("  %d words, f=%f, g=%f\n", now->seqnum, now->score, now->g[now->bestt]);
01343       j_printf("  last word on trellis: [%d-%d]\n", now->estimated_next_t + 1, now->bestt);
01344     }
01345 
01346 #ifdef VISUALIZE
01347     visual2_popped(now, popctr);
01348 #endif
01349 
01350 #ifdef GRAPHOUT
01351 #ifdef GRAPHOUT_DYNAMIC
01352     /* merge last word in popped hypo if possible */
01353     wtmp = wordgraph_check_merge(now->prevgraph, &wordgraph_root, now->seq[now->seqnum-1], &merged_p);
01354     if (wtmp != NULL) {         /* wtmp holds merged word */
01355       dynamic_merged_num++;
01356       if (now->prevgraph != NULL) {
01357         if (now->prevgraph->saved) {
01358           j_error("ERROR: already saved??\n");
01359         }
01360         wordgraph_free(now->prevgraph);
01361       }
01362       now->prevgraph = wtmp;    /* change word to the merged one */
01363 
01364       if (now->lastcontext != NULL
01365           && now->lastcontext != now->prevgraph /* avoid self loop */
01366           ) {
01367         wordgraph_check_and_add_leftword(now->lastcontext, now->prevgraph);
01368 #ifdef GRAPHOUT_SEARCH_CONSIDER_RIGHT
01369         if (merged_p) {
01370           if (wordgraph_check_and_add_rightword(now->prevgraph, now->lastcontext) == FALSE) {
01371             merged_p = TRUE;
01372           } else {
01373             merged_p = FALSE;
01374           }
01375         } else {
01376           wordgraph_check_and_add_rightword(now->prevgraph, now->lastcontext);
01377         }
01378 #else
01379         wordgraph_check_and_add_rightword(now->prevgraph, now->lastcontext);
01380 #endif    
01381       }
01382       /*printf("last word merged\n");*/
01383       /* previous still remains at memory here... (will be purged later) */
01384     } else {
01385       wordgraph_save(now->prevgraph, now->lastcontext, &wordgraph_root);
01386     }
01387 #ifdef GRAPHOUT_SEARCH
01388     /* if recent hypotheses are included in the existing graph, terminate */
01389     if (merged_p && now->endflag == FALSE) {
01390       terminate_search_num++;
01391       free_node(now);
01392       continue;
01393     }
01394 #endif
01395 #else  /* ~GRAPHOUT_DYNAMIC */
01396     /* always save */
01397     wordgraph_save(now->prevgraph, now->lastcontext, &wordgraph_root);
01398 #endif /* ~GRAPHOUT_DYNAMIC */
01399 #endif /* GRAPHOUT */
01400 
01401     /* ¼è¤ê½Ð¤·¤¿²¾Àâ¤Î¥¹¥³¥¢¤ò¸µ¤Ë score envelope ¤ò¹¹¿· */
01402     /* update score envelope using the popped hypothesis */
01403     envl_update(now, peseqlen);
01404 
01405     /* 
01406      * ¼è¤ê½Ð¤·¤¿²¾Àâ¤Î¼õÍý¥Õ¥é¥°¤¬´û¤ËÎ©¤Ã¤Æ¤¤¤ì¤Ð¡¤
01407      * ¤½¤Î²¾Àâ¤ÏÃµº÷½ªÎ»¤È¤ß¤Ê¤·¡¤·ë²Ì¤È¤·¤Æ½ÐÎÏ¤·¤Æ¼¡¤Î¥ë¡¼¥×¤Ø¡¥
01408      *
01409      * If the popped hypothesis already reached to the end, 
01410      * we can treat it as a recognition result.
01411      */
01412 #ifdef DEBUG
01413     VERMES("endflag check\n");
01414 #endif
01415     
01416     if (now->endflag) {
01417       if (debug2_flag) {
01418         j_printf("  This is a final sentence candidate\n");
01419       }
01420       /* quick, dirty hack */
01421       if (now->score == last_score) {
01422         free_node(now);
01423         continue;
01424       } else {
01425         last_score = now->score;
01426       }
01427       
01428       finishnum++;
01429       if (debug2_flag) {
01430         j_printf("  %d-th sentence found\n", finishnum);
01431       }
01432 
01433       if (result_reorder_flag) {
01434         /* °ìÄê¿ô¤Î²¾Àâ¤¬ÆÀ¤é¤ì¤¿¤¢¤È¥¹¥³¥¢¤Ç¥½¡¼¥È¤¹¤ë¤¿¤á¡¤
01435            °ì»þÅª¤ËÊÌ¤Î¥¹¥¿¥Ã¥¯¤Ë³ÊÇ¼¤·¤Æ¤ª¤¯ */
01436         /* store the result to result stack
01437            after search is finished, they will be re-ordered and output */
01438         put_to_stack(now, &r_start, &r_bottom, &r_stacknum, r_stacksize);
01439         /* »ØÄê¿ô¤ÎÊ¸²¾Àâ¤¬ÆÀ¤é¤ì¤¿¤Ê¤éÃµº÷¤ò½ªÎ»¤¹¤ë */
01440         /* finish search if specified number of results are found */
01441         if (finishnum >= ncan) {
01442           if (debug2_flag) VERMES("%d\n",finishnum);
01443           break;
01444         } else {
01445           if (debug2_flag) VERMES("%d..", finishnum);
01446           continue;
01447         }
01448       } else {                  /* result_reorder ¤Ç¤Ê¤¤¾ì¹ç¤Ï */
01449         /* ·ë²Ì¤Ï¸«¤Ä¤«¤ê¼¡Âè¤¹¤°¤Ë½ÐÎÏ */
01450         /* result are directly output as soon as found */
01451         if (finishnum == 1) result_pass2_begin(); /* first time */
01452         result_pass2(now, finishnum, winfo);
01453 #ifdef VISUALIZE
01454         visual2_best(now, winfo);
01455 #endif
01456 #ifdef SP_BREAK_CURRENT_FRAME
01457         /* ¥·¥ç¡¼¥È¥Ý¡¼¥º¥»¥°¥á¥ó¥Æ¡¼¥·¥ç¥ó»þ¤Ï¼¡¤Î¥»¥°¥á¥ó¥ÈÇ§¼±³«»Ï»þ¤Ë
01458            Ã±¸ìÍúÎò¤òÅÁÈÂ¤µ¤»¤ë */
01459         /* set the last context-aware word for next recognition */
01460         if (sp_break_last_nword_allow_override && finishnum == 1) sp_segment_set_last_nword(now);
01461 #endif
01462         /* É¬Í×¤Ç¤¢¤ì¤Ð, Ç§¼±·ë²Ì¤òÍÑ¤¤¤ÆÆþÎÏ¤Î¥»¥°¥á¥ó¥Æ¡¼¥·¥ç¥ó¤ò¹Ô¤Ê¤¦ */
01463         /* do forced alignment with the result if needed */
01464         if (align_result_word_flag) word_rev_align(now->seq, now->seqnum, tparam);
01465         if (align_result_phoneme_flag) phoneme_rev_align(now->seq, now->seqnum, tparam);
01466         if (align_result_state_flag) state_rev_align(now->seq, now->seqnum, tparam);
01467         /* ²¾Àâ¤ò²òÊü¤·¤Æ¼¡¤Î²¾Àâ¤Ø */
01468         /* free the hypothesis and continue to next */
01469         free_node(now);
01470         /* ¤³¤ì¤Ç»ØÄê¿ô¤ÎÊ¸²¾Àâ¤¬ÆÀ¤é¤ì¤¿¤Ê¤é, ¤³¤³¤ÇÃµº÷¤ò½ªÎ»¤¹¤ë */
01471         /* finish search if specified number of results are found */
01472         if (finishnum >= ncan) {
01473           result_pass2_end();
01474           break;                /* end of search */
01475         }
01476         else continue;
01477       }
01478       
01479     } /* end of now->endflag */
01480 
01481     
01482     /* 
01483      * Ãµº÷¼ºÇÔ¤ò¸¡½Ð¤¹¤ë¡¥
01484      * ²¾Àâ¿ô¤¬ maxhypo °Ê¾åÅ¸³«¤µ¤ì¤¿¤é, ¤â¤¦¤³¤ì°Ê¾å¤ÏÃµº÷¤·¤Ê¤¤
01485      *
01486      * detecting search failure:
01487      * if the number of expanded hypotheses reaches maxhypo, giveup further search
01488      */
01489 #ifdef DEBUG
01490     VERMES("loop end check\n");
01491 #endif
01492     if (popctr >= maxhypo) {
01493       j_printf("num of hypotheses overflow\n");
01494       /* (for debug) Ãµº÷¼ºÇÔ»þ¤Ë¡¢¥¹¥¿¥Ã¥¯¤Ë»Ä¤Ã¤¿¾ðÊó¤òÅÇ¤½Ð¤¹ */
01495       /* (for debug) output all hypothesis remaining in the stack */
01496       if (debug2_flag) put_all_in_stack(&start, &stacknum);
01497       free_node(now);
01498       break;                    /* end of search */
01499     }
01500     /* ²¾ÀâÄ¹¤¬°ìÄêÃÍ¤ò±Û¤¨¤¿¤È¤¡¤¤½¤Î²¾Àâ¤òÇË´þ¤¹¤ë */
01501     /* check hypothesis word length overflow */
01502     if (now->seqnum >= MAXSEQNUM) {
01503       j_printerr("sentence length exceeded ( > %d)\n",MAXSEQNUM);
01504       free_node(now);
01505       continue;
01506     }
01507 
01508 #ifdef GRAPHOUT
01509 #ifndef GRAPHOUT_PRECISE_BOUNDARY
01510     /* if monophone (= no backscan), the tail g score should be kept here */
01511     /* else, updated tail g score will be computed in scan_word()  */
01512     if(!ccd_flag) {
01513       now->tail_g_score = now->g[now->bestt];
01514     }
01515 #endif
01516 #endif
01517 
01518     /*
01519      * Á°¸þ¤¥¹¥³¥¢¤ò¹¹¿·¤¹¤ë¡§ ºÇ¸å¤ÎÃ±¸ì¤ÎÉôÊ¬¤ÎÁ°¸þ¤¥¹¥³¥¢¤ò·×»»¤¹¤ë¡¥
01520      * update forward score: compute forward trellis for the last word
01521      */
01522 #ifdef DEBUG
01523     VERMES("scan_word\n");
01524 #endif
01525     scan_word(now, param);
01526     if (now->score < backmax + LOG_ZERO) { /* another end-of-search detecter */
01527       j_printf("now->score = %f?\n",now->score);
01528       put_hypo_woutput(now);
01529       free_node(now);
01530       break;
01531     }
01532 
01533     /* 
01534      * ¼è¤ê½Ð¤·¤¿²¾Àâ¤¬Ê¸¤È¤·¤Æ¼õÍý²ÄÇ½¤Ç¤¢¤ì¤Ð¡¤
01535      * ¼õÍý¥Õ¥é¥°¤òÎ©¤Æ¤Æ¤ò¥¹¥¿¥Ã¥¯¤Ë¤¤¤ìÄ¾¤·¤Æ¤ª¤¯¡¥
01536      * (¼¡¤Ë¼è¤ê½Ð¤µ¤ì¤¿¤é²ò¤È¤Ê¤ë)
01537      *
01538      * if the current popped hypothesis is acceptable, set endflag
01539      * and return it to stack: it will become the recognition result
01540      * when popped again.
01541      */
01542 #ifdef DEBUG
01543     VERMES("accept check\n");
01544 #endif
01545     if (
01546 #ifdef USE_NGRAM
01547         ngram_acceptable(now, winfo)
01548 #else  /* USE_DFA */
01549         dfa_acceptable(now, dfa)
01550 #endif
01551         && now->estimated_next_t <= 5) {
01552       new = newnode();
01553       /* new ¤Ë now ¤ÎÃæ¿È¤ò¥³¥Ô¡¼¤·¤Æ¡¤ºÇ½ªÅª¤Ê¥¹¥³¥¢¤ò·×»» */
01554       /* copy content of 'now' to 'new', and compute the final score */
01555       last_next_word(now, new, param);
01556       if (debug2_flag) {
01557         j_printf("  This is acceptable as a sentence candidate\n");
01558       }
01559       /* g[] ¤¬ÆþÎÏ»ÏÃ¼¤ËÃ£¤·¤Æ¤¤¤Ê¤±¤ì¤Ð´þµÑ */
01560       /* reject this sentence candidate if g[] does not reach the end */
01561       if (new->score <= LOG_ZERO) {
01562         if (debug2_flag) {
01563           j_printf("  But invalid because Viterbi pass does not reach the 0th frame\n");
01564         }
01565         free_node(new);
01566         free_node(now);
01567         continue;
01568       }
01569       /* ¼õÍý¥Õ¥é¥°¤òÎ©¤Æ¤ÆÆþ¤ìÄ¾¤¹ */
01570       /* set endflag and push again  */
01571       if (debug2_flag) {
01572         j_printf("  This hypo itself was pushed with final score=%f\n", new->score);
01573       }
01574       new->endflag = TRUE;
01575       if (put_to_stack(new, &start, &bottom, &stacknum, stacksize) != -1) {
01576 #ifdef GRAPHOUT
01577         if (new->score > LOG_ZERO) {
01578           new->lastcontext = now->prevgraph;
01579           new->prevgraph = wordgraph_assign(new->seq[new->seqnum-1], 0,
01580 #ifdef GRAPHOUT_PRECISE_BOUNDARY
01581                                             /* wordend are shifted to the last */
01582 #ifdef PASS2_STRICT_IWCD
01583                                             new->wordend_frame[0],
01584 #else
01585                                             now->wordend_frame[0],
01586 #endif
01587 #else
01588                                             now->bestt,
01589 #endif
01590                                             new->score,
01591                                             prev_score,
01592                                             now->g[0],
01593 #ifdef GRAPHOUT_PRECISE_BOUNDARY
01594 #ifdef PASS2_STRICT_IWCD
01595                                             new->wordend_gscore[0],
01596 #else
01597                                             now->wordend_gscore[0],
01598 #endif
01599 #else
01600                                             now->tail_g_score,
01601 #endif
01602 #ifdef USE_NGRAM
01603                                             now->lscore,
01604 #else
01605                                             LOG_ZERO,
01606 #endif
01607 #ifdef CM_SEARCH
01608                                             new->cmscore[new->seqnum-1]
01609 #else
01610                                             LOG_ZERO
01611 #endif
01612                                             );
01613         } else {
01614           new->lastcontext = now->lastcontext;
01615           new->prevgraph = now->prevgraph;
01616         }
01617 #endif
01618       }
01619       /* ¤³¤Î²¾Àâ¤Ï¤³¤³¤Ç½ª¤ï¤é¤º¤Ë, ¤³¤³¤«¤é¤µ¤é¤ËÃ±¸ìÅ¸³«¤¹¤ë */
01620       /* continue with the 'now' hypothesis, not terminate here */
01621     }
01622 
01623     /*
01624      * ¤³¤Î²¾Àâ¤«¤é¡¤¼¡Ã±¸ì½¸¹ç¤ò·èÄê¤¹¤ë¡¥
01625      * ¼¡Ã±¸ì½¸¹ç¤Ï, ¤³¤Î²¾Àâ¤Î¿äÄê»ÏÃ¼¥Õ¥ì¡¼¥à¼þÊÕ¤ËÂ¸ºß¤·¤¿
01626      * Âè£±¥Ñ¥¹¤Î¥È¥ì¥ê¥¹Ã±¸ì½¸¹ç¡¥
01627      *
01628      * N-gram¤Î¾ì¹ç¤Ï³ÆÃ±¸ì¤Î n-gram ÀÜÂ³³ÎÎ¨¤¬´Þ¤Þ¤ì¤ë¡¥
01629      * DFA ¤Î¾ì¹ç¤Ï, ¤½¤ÎÃæ¤Ç¤µ¤é¤Ë DFA ¾å¤ÇÀÜÂ³²ÄÇ½¤Ê¤â¤Î¤Î¤ß¤¬ÊÖ¤Ã¤Æ¤¯¤ë
01630      */
01631     /*
01632      * Determine next word set that can connect to this hypothesis.
01633      * They come from the trellis word that has been survived at near the
01634      * beginning of the last word.
01635      *
01636      * In N-gram mode, they also contain N-gram probabilities toward the
01637      * source hypothesis.  In DFA mode, the word set is further reduced
01638      * by the grammatical constraint
01639      */
01640 #ifdef DEBUG
01641     VERMES("get next words\n");
01642 #endif
01643 #ifdef USE_NGRAM
01644     nwnum = ngram_nextwords(now, nextword, maxnwnum, ngram, winfo, backtrellis);
01645 #else  /* USE_DFA */
01646     nwnum = dfa_nextwords(now, nextword, maxnwnum, dfa);
01647     /* nextword ¤¬°î¤ì¤¿¤é¡¢¥Ð¥Ã¥Õ¥¡¤òÁý¤ä¤·¤ÆºÆ¥Á¥ã¥ì¥ó¥¸ */
01648     /* If the number of nextwords can exceed the buffer size, expand the
01649        nextword data area */
01650     while (nwnum < 0) {
01651       nextword = nw_expand(nextword, &maxnwnum, &nwroot);
01652       nwnum = dfa_nextwords(now, nextword, maxnwnum, dfa);
01653     }
01654 #endif
01655     if (debug2_flag) {
01656       j_printf("  %d words extracted from wordtrellis\n", nwnum);
01657     }
01658 
01659     /* 
01660      * ²¾Àâ¤È¼¡Ã±¸ì½¸¹ç¤«¤é¿·¤¿¤ÊÊ¸²¾Àâ¤òÀ¸À®¤·¡¤¥¹¥¿¥Ã¥¯¤Ë¤¤¤ì¤ë¡¥
01661      */
01662     /*
01663      * generate new hypotheses from 'now' and 'nextword', 
01664      * and push them to stack
01665      */
01666 #ifdef DEBUG
01667     VERMES("generate hypo\n");
01668 #endif
01669 
01670 #ifdef USE_DFA
01671     now_noise_calced = FALSE;   /* TRUE is noise-inserted score has been calculated */
01672 #endif
01673     i = pushctr;                /* store old value */
01674 
01675 #ifdef CM_SEARCH
01676     /* initialize local stack */
01677     cm_init(winfo);
01678 #endif
01679 
01680     /* for each nextword, generate a new hypothesis */
01681     for (w = 0; w < nwnum; w++) {
01682 #ifdef USE_DFA
01683       /* limit word hypothesis */
01684       if (! (winfo->wton[nextword[w]->id] >= cate_bgn && winfo->wton[nextword[w]->id] < cate_bgn + cate_num)) {
01685         continue;
01686       }
01687 #endif
01688       new = newnode();
01689 #ifdef USE_DFA
01690       if (nextword[w]->can_insert_sp == TRUE) {
01691         /* ¥Î¥¤¥º¤ò¶´¤ó¤À¥È¥ì¥ê¥¹¥¹¥³¥¢¤ò·×»»¤·¡¤¶´¤Þ¤Ê¤¤¾ì¹ç¤È¤ÎºÇÂçÃÍ¤ò¼è¤ë */
01692         /* compute hypothesis score with noise inserted */
01693         
01694         if (now_noise_calced == FALSE) {
01695           /* now ¤Ë sp ¤ò¤Ä¤±¤¿²¾Àâ now_noise ¤òºî¤ê,¤½¤Î¥¹¥³¥¢¤ò·×»» */
01696           /* generate temporal hypothesis 'now_noise' which has short-pause
01697              word after the original 'now' */
01698           fornoise.id = dfa->sp_id;
01699           now_noise = newnode();
01700           cpy_node(now_noise, now);
01701 #if 0
01702           now_noise_tmp = newnode();
01703           next_word(now, now_noise_tmp, &fornoise, param, backtrellis);
01704           scan_word(now_noise_tmp, param);
01705           for(t=0;t<peseqlen;t++) {
01706             now_noise->g[t] = max(now_noise_tmp->g[t], now->g[t]);
01707           }
01708           free_node(now_noise_tmp);
01709 #else
01710           /* expand NOISE only if it exists in backward trellis */
01711           /* begin patch by kashima */
01712           if (looktrellis_flag) {
01713             if(!dfa_look_around(&fornoise, now, backtrellis)){
01714               free_node(now_noise);
01715               free_node(new);
01716               continue;
01717             }
01718           }
01719           /* end patch by kashima */
01720 
01721           /* now_nosie ¤Î ¥¹¥³¥¢ g[] ¤ò·×»»¤·¡¤¸µ¤Î now ¤Î g[] ¤ÈÈæ³Ó¤·¤Æ
01722              ¹â¤¤Êý¤òºÎÍÑ */
01723           /* compute trellis score g[], and adopt the maximum score
01724              for each frame compared with now->g[] */
01725           next_word(now, now_noise, &fornoise, param, backtrellis);
01726           scan_word(now_noise, param);
01727           for(t=0;t<peseqlen;t++) {
01728             now_noise->g[t] = max(now_noise->g[t], now->g[t]);
01729           }
01730           /* ¥Î¥¤¥º¤ò¶´¤ó¤ÀºÝ¤ò¹ÍÎ¸¤·¤¿¥¹¥³¥¢¤ò·×»»¤·¤¿¤Î¤Ç¡¤
01731              ¤³¤³¤ÇºÇ¸å¤Î¥Î¥¤¥ºÃ±¸ì¤ò now_noise ¤«¤é¾Ã¤¹ */
01732           /* now that score has been computed considering pause insertion,
01733              we can delete the last noise word from now_noise here */
01734           now_noise->seqnum--;
01735 #endif
01736           now_noise_calced = TRUE;
01737         }
01738 
01739         /* expand word only if it exists in backward trellis */
01740         /* begin patch by kashima */
01741         if (looktrellis_flag) {
01742           if(!dfa_look_around(nextword[w], now_noise, backtrellis)){
01743             free_node(new);
01744             continue;
01745           }
01746         }
01747         /* end patch by kashima */
01748         
01749         /* ¿·¤·¤¤²¾Àâ' new' ¤ò 'now_noise' ¤«¤éÀ¸À® */
01750         /* generate a new hypothesis 'new' from 'now_noise' */
01751         next_word(now_noise, new, nextword[w], param, backtrellis);
01752         
01753       } else {
01754         
01755         /* expand word only if it exists in backward trellis */
01756         /* begin patch by kashima */
01757         if (looktrellis_flag) {
01758           if(!dfa_look_around(nextword[w], now, backtrellis)){
01759             free_node(new);
01760             continue;
01761           }
01762         }
01763         /* end patch by kashima */
01764         
01765         /* ¿·¤·¤¤²¾Àâ' new' ¤ò 'now_noise' ¤«¤éÀ¸À® */
01766         /* generate a new hypothesis 'new' from 'now_noise' */
01767         next_word(now, new, nextword[w], param, backtrellis);
01768         
01769       }
01770 #else  /* not USE_DFA */
01771 
01772       /* ¿·¤·¤¤²¾Àâ' new' ¤ò 'now_noise' ¤«¤éÀ¸À®
01773          N-gram ¤Î¾ì¹ç¤Ï¥Î¥¤¥º¤òÆÃÊÌ°·¤¤¤·¤Ê¤¤ */
01774       /* generate a new hypothesis 'new' from 'now'.
01775          pause insertion is treated as same as normal words in N-gram mode. */
01776       next_word(now, new, nextword[w], param, backtrellis);
01777 
01778 #endif /* USE_DFA */
01779 
01780       if (new->score <= LOG_ZERO) { /* not on trellis */
01781         free_node(new);
01782         continue;
01783       }
01784         
01785       genectr++;
01786 
01787 #ifdef CM_SEARCH
01788       /* store the local hypothesis to temporal stack */
01789       cm_store(new);
01790 #else 
01791       /* À¸À®¤·¤¿²¾Àâ 'new' ¤ò¥¹¥¿¥Ã¥¯¤ËÆþ¤ì¤ë */
01792       /* push the generated hypothesis 'new' to stack */
01793 
01794       /* stack overflow */
01795       if (can_put_to_stack(new, &bottom, &stacknum, stacksize) == -1) {
01796         free_node(new);
01797         continue;
01798       }
01799       
01800 #ifdef GRAPHOUT
01801       /* assign a word arc to the last fixed word */
01802       new->lastcontext = now->prevgraph;
01803       new->prevgraph = wordgraph_assign(new->seq[new->seqnum-2], new->bestt + 1,
01804 #ifdef GRAPHOUT_PRECISE_BOUNDARY
01805 #ifdef PASS2_STRICT_IWCD
01806                                         /* most up-to-date wordend_gscore is on new, because the last phone of 'now' will be computed at next_word() */
01807                                         new->wordend_frame[new->bestt],
01808 #else
01809                                         now->wordend_frame[new->bestt],
01810 #endif
01811 #else
01812                                         now->bestt,
01813 #endif
01814                                         new->score, prev_score,
01815                                         now->g[new->bestt+1],
01816 #ifdef GRAPHOUT_PRECISE_BOUNDARY
01817 #ifdef PASS2_STRICT_IWCD
01818                                         /* most up-to-date wordend_gscore is on new, because the last phone of 'now' will be computed at next_word() */
01819                                         new->wordend_gscore[new->bestt],
01820 #else
01821                                         now->wordend_gscore[new->bestt],
01822 #endif
01823 #else
01824                                         now->tail_g_score,
01825 #endif
01826 #ifdef USE_NGRAM
01827                                         now->lscore,
01828 #else
01829                                         LOG_ZERO,
01830 #endif
01831 #ifdef CM_SEARCH
01832                                         new->cmscore[new->seqnum-2]
01833 #else
01834                                         LOG_ZERO
01835 #endif
01836                                         );
01837 #endif /* GRAPHOUT */
01838       put_to_stack(new, &start, &bottom, &stacknum, stacksize);
01839       if (debug2_flag) {
01840         j = new->seq[new->seqnum-1];
01841         j_printf("  %15s [%15s](id=%5d)(%f) [%d-%d] pushed\n",winfo->wname[j], winfo->woutput[j], j, new->score, new->estimated_next_t + 1, new->bestt);
01842       }
01843 #ifdef VISUALIZE
01844       visual2_next_word(new, now, popctr);
01845 #endif
01846       pushctr++;
01847 #endif
01848     } /* end of nextword loop */
01849 
01850 #ifdef CM_SEARCH
01851     /* compute score sum */
01852     cm_sum_score();
01853     /* compute CM and put the generated hypotheses to global stack */
01854     while ((new = cm_get_node()) != NULL) {
01855       cm_set_score(new);
01856 #ifdef CM_SEARCH_LIMIT
01857       if (new->cmscore[new->seqnum-1] < cm_cut_thres
01858 #ifdef CM_SEARCH_LIMIT_AFTER
01859           && finishnum > 0
01860 #endif
01861           ) {
01862         free_node(new);
01863         continue;
01864       }
01865 #endif /* CM_SEARCH_LIMIT */
01866       /*      j = new->seq[new->seqnum-1];
01867               printf("  %15s [%15s](id=%5d)(%f) [%d-%d] cm=%f\n",winfo->wname[j], winfo->woutput[j], j, new->score, new->estimated_next_t + 1, new->bestt, new->cmscore[new->seqnum-1]);*/
01868 
01869       /* stack overflow */
01870       if (can_put_to_stack(new, &bottom, &stacknum, stacksize) == -1) {
01871         free_node(new);
01872         continue;
01873       }
01874 
01875 #ifdef GRAPHOUT
01876 
01877       /* assign a word arc to the last fixed word */
01878       new->lastcontext = now->prevgraph;
01879       new->prevgraph = wordgraph_assign(new->seq[new->seqnum-2], new->bestt + 1,
01880 #ifdef GRAPHOUT_PRECISE_BOUNDARY
01881 #ifdef PASS2_STRICT_IWCD
01882                                         new->wordend_frame[new->bestt],
01883 #else
01884                                         now->wordend_frame[new->bestt],
01885 #endif
01886 #else
01887                                         now->bestt,
01888 #endif
01889                                         new->score, prev_score,
01890                                         now->g[new->bestt+1],
01891 #ifdef GRAPHOUT_PRECISE_BOUNDARY
01892 #ifdef PASS2_STRICT_IWCD
01893                                         new->wordend_gscore[new->bestt],
01894 #else
01895                                         now->wordend_gscore[new->bestt],
01896 #endif
01897 #else
01898                                         now->tail_g_score,
01899 #endif
01900 #ifdef USE_NGRAM
01901                                         now->lscore,
01902 #else
01903                                         LOG_ZERO,
01904 #endif
01905 #ifdef CM_SEARCH
01906                                         new->cmscore[new->seqnum-2]
01907 #else
01908                                         LOG_ZERO
01909 #endif
01910                                         );
01911 #endif /* GRAPHOUT */
01912       
01913       put_to_stack(new, &start, &bottom, &stacknum, stacksize);
01914       if (debug2_flag) {
01915         j = new->seq[new->seqnum-1];
01916         j_printf("  %15s [%15s](id=%5d)(%f) [%d-%d] pushed\n",winfo->wname[j], winfo->woutput[j], j, new->score, new->estimated_next_t + 1, new->bestt);
01917       }
01918 #ifdef VISUALIZE
01919       visual2_next_word(new, now, popctr);
01920 #endif
01921       pushctr++;
01922     }
01923 #endif
01924 
01925     if (debug2_flag) {
01926       j_printf("%d pushed\n",pushctr-i);
01927     }
01928 #ifdef USE_DFA
01929     if (now_noise_calced == TRUE) free_node(now_noise);
01930 #endif
01931 
01932     /* 
01933      * ¼è¤ê½Ð¤·¤¿²¾Àâ¤ò¼Î¤Æ¤ë
01934      * free the source hypothesis
01935      */
01936     free_node(now);
01937 
01938     if (catch_intr_flag) break;
01939     
01940   }
01941   /***************/
01942   /* End of Loop */
01943   /***************/
01944 
01945   /* output */
01946   if (!result_reorder_flag && finishnum < ncan) {
01947     result_pass2_end();
01948   }
01949   /* Ãµº÷¼ºÇÔ¤Ç°ì¤Ä¤â¸õÊä¤¬ÆÀ¤é¤ì¤Ê¤±¤ì¤Ð¡¤Âè1¥Ñ¥¹¤Î·ë²Ì¤ò¤½¤Î¤Þ¤Þ½ÐÎÏ¤¹¤ë */
01950   /* If search failed and no candidate obtained in this pass, output
01951      the result of the previous 1st pass as a final result. */
01952   if (finishnum == 0) {         /* if search failed */
01953     if (verbose_flag) {
01954       j_printf("got no candidates, output 1st pass result as a final result\n",finishnum);
01955     }
01956     /* make hypothesis data from the result of previous 1st pass */
01957     now = newnode();
01958     for (i=0;i<pass1_wnum;i++) {
01959       now->seq[i] = pass1_wseq[pass1_wnum-1-i];
01960     }
01961     now->seqnum = pass1_wnum;
01962     now->score = pass1_score;
01963 #ifdef CONFIDENCE_MEASURE
01964     /* fill in null values */
01965 #ifdef CM_MULTIPLE_ALPHA
01966     for(j=0;j<cm_alpha_num;j++) {
01967       for(i=0;i<now->seqnum;i++) now->cmscore[i][j] = 0.0;
01968     }
01969 #else
01970     for(i=0;i<now->seqnum;i++) now->cmscore[i] = 0.0;
01971 #endif
01972 #endif /* CONFIDENCE_MEASURE */
01973     
01974     /* output it */
01975     if (module_mode) {
01976       /* ¥â¥¸¥å¡¼¥ë¥â¡¼¥É¤Ç¤Ï¡¤Âè2¥Ñ¥¹¤¬¼ºÇÔ¤·¤¿¤é²¿¤â½ÐÎÏ¤·¤Ê¤¤ */
01977       result_pass2_failed(winfo);
01978     } else {
01979       /* output the result of the first pass as the final output */
01980       result_pass2_begin();
01981       result_pass2(now, 1, winfo);
01982       result_pass2_end();
01983     }
01984 #ifdef SP_BREAK_CURRENT_FRAME
01985     /* segment restart processing */
01986     if (sp_break_last_nword_allow_override) sp_segment_set_last_nword(now);
01987 #endif
01988     /* do forced alignment if needed */
01989     if (align_result_word_flag) word_rev_align(now->seq, now->seqnum, tparam);
01990     if (align_result_phoneme_flag) phoneme_rev_align(now->seq, now->seqnum, tparam);
01991     if (align_result_state_flag) state_rev_align(now->seq, now->seqnum, tparam);
01992     free_node(now);
01993   } else {                      /* if at least 1 candidate found */
01994     if (debug2_flag) {
01995       j_printf("got %d candidates\n",finishnum);
01996     }
01997     if (result_reorder_flag) {
01998       /* ·ë²Ì¤Ï¤Þ¤À½ÐÎÏ¤µ¤ì¤Æ¤¤¤Ê¤¤¤Î¤Ç¡¤Ê¸¸õÊäÍÑ¥¹¥¿¥Ã¥¯Æâ¤ò¥½¡¼¥È¤·¤Æ
01999          ¤³¤³¤Ç½ÐÎÏ¤¹¤ë */
02000       /* As all of the found candidate are in result stack, we sort them
02001          and output them here  */
02002       if (debug2_flag) VERMES("done\n");
02003       result_reorder_and_output(&r_start, &r_bottom, &r_stacknum, output_hypo_maxnum, winfo);
02004     }
02005   }
02006 
02007   /* ³Æ¼ï¥«¥¦¥ó¥¿¤ò½ÐÎÏ */
02008   /* output counters */
02009   if (verbose_flag) {
02010     j_printf("%d generated, %d pushed, %d nodes popped in %d\n",
02011              genectr, pushctr, popctr, backtrellis->framelen);
02012     j_flushprint();
02013 #ifdef GRAPHOUT_DYNAMIC
02014     j_printf("graph: %d merged", dynamic_merged_num);
02015 #ifdef GRAPHOUT_SEARCH
02016     j_printf(", %d terminated", terminate_search_num);
02017 #endif
02018     j_printf(" in %d\n", popctr);
02019 #endif
02020   }
02021     
02022 #ifdef GRAPHOUT
02023   printf("------ wordgraph post-processing begin ------\n");
02024   /* garbage collection and word merging */
02025   /* words with no following word (except end of sentence) will be erased */
02026   wordgraph_purge_leaf_nodes(&wordgraph_root);
02027   /* if GRAPHOUT_PRECISE_BOUNDARY defined,
02028      propagate exact time boundary to the right context.
02029      words of different boundary will be duplicated here.
02030   */
02031   wordgraph_adjust_boundary(&wordgraph_root);
02032   /* merge words with the same time and same score */
02033   wordgraph_compaction_thesame(&wordgraph_root);
02034   /* merge words with the same time */
02035   wordgraph_compaction_exacttime(&wordgraph_root);
02036   /* merge words of near time if "-graphrange" is other than 0 */
02037   wordgraph_compaction_neighbor(&wordgraph_root);
02038   /* finalize: sort and annotate ID */
02039   wordgraph_sort_and_annotate_id(&wordgraph_root);
02040   /* check coherence */
02041   wordgraph_check_coherence(wordgraph_root);
02042   printf("------ wordgraph post-processing end ------\n");
02043   /* debug: output all graph word info */
02044   wordgraph_dump(wordgraph_root);
02045   /* output graph */
02046   result_graph(wordgraph_root, winfo);
02047   /* clear all wordgraph */
02048   wordgraph_clean(&wordgraph_root);
02049 #endif
02050   
02051   /* ½ªÎ»½èÍý */
02052   /* finalize */
02053   nw_free(nextword, nwroot);
02054   free_all_nodes(start);
02055   free_wordtrellis();
02056 #ifdef SCAN_BEAM
02057   free(framemaxscore);
02058 #endif
02059 }
search_bestfirst_main.c