julius/beam.c

Go to the documentation of this file.
00001 
00052 /*
00053  * Copyright (c) 1991-2006 Kawahara Lab., Kyoto University
00054  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00055  * Copyright (c) 2005-2006 Julius project team, Nagoya Institute of Technology
00056  * All rights reserved
00057  */
00058 
00059 #include <julius.h>
00060 
00061 #undef DEBUG
00062 
00063 static boolean idc_on;          
00064 static int progout_interval_frame; 
00065 
00066 
00067 /* -------------------------------------------------------------------- */
00068 /*                     第1パスの結果出力と終了処理                     */
00069 /*              result output and end procedure of 1st pass             */
00070 /* -------------------------------------------------------------------- */
00071 
00072 #ifdef WORD_GRAPH
00073 /* 第1パス結果から単語グラフを生成 */
00074 /* generate word graphs from result of 1st pass (=backtrellis) */
00075 static int glevel;              
00076 static int gnodes;              
00077 static int garcs;               
00078 
00107 static void
00108 generate_lattice(int frame, BACKTRELLIS *bt, WORD_INFO *winfo)
00109 {
00110   TRELLIS_ATOM *ta;
00111   int i, j;
00112   boolean new_node = FALSE;
00113   LOGPROB l;
00114 
00115   glevel++;
00116 
00117   if (frame >= 0) {
00118     for (i=0;i<bt->num[frame];i++) {
00119       ta = bt->rw[frame][i];
00120       /* words will be saved as a part of graph only if any of its
00121          following word has been survived in a beam */
00122       if (! ta->within_context) continue; /* not a candidate */
00123       if (ta->within_wordgraph) continue; /* already marked */
00124       /* mark  */
00125       ta->within_wordgraph = TRUE;
00126       new_node = TRUE;
00127       if (debug2_flag) {
00128         for(j=0;j<glevel;j++) j_printf(" ");
00129         j_printf("%s: [%d..%d]\n", winfo->wname[ta->wid], ta->begintime, ta->endtime);
00130       }
00131       if (verbose_flag) {
00132         j_printf("%d: [%d..%d] wid=%d name=\"%s\" lname=\"%s\" score=%f", garcs, ta->begintime, ta->endtime, ta->wid, winfo->woutput[ta->wid], winfo->wname[ta->wid], ta->backscore);
00133       }
00134 #ifdef USE_NGRAM
00135       j_printf(" lscore=%f", ta->lscore);
00136 #endif
00137       l = ta->backscore;
00138       if (ta->last_tre->wid != WORD_INVALID) {
00139         l -= ta->last_tre->backscore;
00140       }
00141 #ifdef USE_NGRAM
00142       l -= ta->lscore;
00143 #endif
00144       j_printf(" AMavg=%f\n", l / (ta->endtime - ta->begintime + 1));
00145 
00146       garcs++;
00147       /* recursive call */
00148       generate_lattice(ta->last_tre->endtime, bt, winfo);
00149     }
00150   }
00151   if (new_node) {
00152     gnodes++;
00153   }
00154   glevel--;
00155 }
00156 #endif
00157 
00172 static void
00173 put_atom(TRELLIS_ATOM *atom, WORD_INFO *winfo)
00174 {
00175   int i;
00176   j_printf("%3d,%3d %f %16s (id=%5d)", atom->begintime, atom->endtime,
00177          atom->backscore, winfo->wname[atom->wid], atom->wid);
00178   for (i=0;i<winfo->wlen[atom->wid]; i++) {
00179     j_printf(" %s",winfo->wseq[atom->wid][i]->name);
00180   }
00181   j_printf("\n");
00182 }
00183 
00216 static LOGPROB
00217 trace_backptr(WORD_ID wordseq_rt[MAXSEQNUM], int *rt_wordlen, TRELLIS_ATOM *atom, BACKTRELLIS *backtrellis, WORD_INFO *winfo)
00218 {
00219   int wordlen = 0;              /* word length of best sentence hypothesis */
00220   TRELLIS_ATOM *tretmp;
00221   LOGPROB langscore = 0.0;
00222   static WORD_ID wordseq[MAXSEQNUM];    /* temporal: in reverse order */
00223   int i;
00224   
00225   /* initialize */
00226   wordseq[0] = atom->wid;       /* start from specified atom */
00227   wordlen = 1;
00228   tretmp = atom;
00229 #ifdef USE_NGRAM
00230   langscore += tretmp->lscore;
00231 #endif
00232   if (debug2_flag) {
00233     put_atom(tretmp, winfo);
00234   }
00235   
00236   /* trace the backtrellis */
00237   while (tretmp->begintime > 0) {/* until beginning of input */
00238     tretmp = tretmp->last_tre;
00239 /*    t = tretmp->boundtime - 1;
00240     tretmp = bt_binsearch_atom(backtrellis, tretmp->boundtime-1, tretmp->last_wid);*/
00241     if (tretmp == NULL) {       /* should not happen */
00242       j_error("ERROR: BackTrellis Pass missing??\n");
00243     }
00244 #ifdef USE_NGRAM
00245     langscore += tretmp->lscore;
00246 #endif
00247     wordseq[wordlen] = tretmp->wid;
00248     wordlen++;
00249     if (debug2_flag) {
00250       put_atom(tretmp, winfo);
00251     }
00252     if (wordlen >= MAXSEQNUM) {
00253       j_error("sentence length exceeded ( > %d)\n",MAXSEQNUM);
00254     }
00255   }
00256   *rt_wordlen = wordlen;
00257   /* reverse order -> normal order */
00258   for(i=0;i<wordlen;i++) wordseq_rt[i] = wordseq[wordlen-i-1];
00259   return(langscore);
00260 }
00261 
00311 static LOGPROB
00312 print_1pass_result(BACKTRELLIS *backtrellis, int framelen, WORD_INFO *winfo)
00313 {
00314   WORD_ID wordseq[MAXSEQNUM];
00315   int wordlen;
00316   int i;
00317   TRELLIS_ATOM *best;
00318   int last_time;
00319   LOGPROB total_lscore;
00320   LOGPROB maxscore;
00321   TRELLIS_ATOM *tmp;
00322 
00323   /* look for the last trellis word */
00324   for (last_time = framelen - 1; last_time >= 0; last_time--) {
00325 #ifdef USE_NGRAM
00326 #ifdef SP_BREAK_CURRENT_FRAME   /*  in case of sp segmentation */
00327     /* 最終フレームに残った最大スコアの単語 */
00328     /* it should be the best trellis word on the last frame */
00329     maxscore = LOG_ZERO;
00330     for (i=0;i<backtrellis->num[last_time];i++) {
00331       tmp = backtrellis->rw[last_time][i];
00332 #ifdef WORD_GRAPH
00333       /* treat only words on a graph path */
00334       if (!tmp->within_context) continue;
00335 #endif
00336       if (maxscore < tmp->backscore) {
00337         maxscore = tmp->backscore;
00338         best = tmp;
00339       }
00340     }
00341     if (maxscore != LOG_ZERO) break;
00342 #else  /* normal mode */
00343     /* 最終単語は winfo->tail_silwid に固定 */
00344     /* it is fixed to the tail silence model (winfo->tail_silwid) */
00345     maxscore = LOG_ZERO;
00346     for (i=0;i<backtrellis->num[last_time];i++) {
00347       tmp = backtrellis->rw[last_time][i];
00348 #ifdef WORD_GRAPH
00349       /* treat only words on a graph path */
00350       if (!tmp->within_context) continue;
00351 #endif
00352       if (tmp->wid == winfo->tail_silwid && maxscore < tmp->backscore) {
00353         maxscore = tmp->backscore;
00354         best = tmp;
00355         break;
00356       }
00357     }
00358     if (maxscore != LOG_ZERO) break;
00359 #endif
00360 #else  /* USE_DFA */
00361     /* 末尾に残った単語の中で最大スコアの単語(cp_endは使用しない) */
00362     /* the best trellis word on the last frame (not use cp_end[]) */
00363     maxscore = LOG_ZERO;
00364     for (i=0;i<backtrellis->num[last_time];i++) {
00365       tmp = backtrellis->rw[last_time][i];
00366 #ifdef WORD_GRAPH
00367       /* treat only words on a graph path */
00368       if (!tmp->within_context) continue;
00369 #endif
00370       /*      if (dfa->cp_end[winfo->wton[tmp->wid]] == TRUE) {*/
00371         if (maxscore < tmp->backscore) {
00372           maxscore = tmp->backscore;
00373           best = tmp;
00374         }
00375         /*      }*/
00376     }
00377     if (maxscore != LOG_ZERO) break;
00378 #endif /* USE_NGRAM */
00379   }
00380   if (last_time < 0) {          /* not found */
00381     j_printerr("[no sentence-end word survived on last beam]\n");
00382     /* print null result */
00383     result_pass1_final(NULL, 0, LOG_ZERO, LOG_ZERO, winfo);
00384     return(LOG_ZERO);
00385   }
00386   
00387   /* traceback word trellis from the best word */
00388   total_lscore = trace_backptr(wordseq, &wordlen, best, backtrellis, winfo);
00389 
00390   if (progout_flag) {           /* just flush last progress output */
00391     result_pass1_current(last_time, wordseq, wordlen, best->backscore, total_lscore, winfo);
00392   }
00393 
00394   /* output 1st pass result */    
00395   if (verbose_flag || !progout_flag) {
00396     result_pass1_final(wordseq, wordlen, best->backscore, total_lscore, winfo);
00397   }
00398   result_pass1_end();
00399   
00400   /* store the result to global val (notice: in reverse order) */
00401   for(i=0;i<wordlen;i++) pass1_wseq[i] = wordseq[i];
00402   pass1_wnum = wordlen;
00403   pass1_score = best->backscore;
00404 
00405 #ifdef WORD_GRAPH
00406   /* 単語トレリスから,ラティスを生成する */
00407   /* 実際には within_wordgraph を on にするだけ */
00408   /* generate word graph from the word trellis */
00409   /* actually generate_lattice() does not construct graph:
00410      it only marks words in trellis that are on the word graph */
00411   glevel = 0;
00412   gnodes = 0;                   /* frame = 0 の分 */
00413   garcs = 0;
00414   j_printf("--- begin wordgraph data pass1 ---\n");
00415   generate_lattice(last_time, backtrellis, winfo);
00416   if (verbose_flag) j_printf("word graph generated (nodes=%d,arcs=%d)\n",gnodes, garcs);
00417   j_printf("--- end wordgraph data pass1 ---\n");
00418 #endif
00419 
00420   /* return maximum score */
00421   return(best->backscore);
00422 }
00423 
00441 static void
00442 bt_current_max(BACKTRELLIS *bt, int t, WORD_INFO *winfo)
00443 {
00444   static WORD_ID wordseq[MAXSEQNUM];
00445   int wordlen;
00446   TRELLIS_ATOM *tre;
00447   TRELLIS_ATOM *tremax;
00448   LOGPROB maxscore;
00449   LOGPROB lscore;
00450 
00451   /* bt->list is ordered by time frame */
00452   maxscore = LOG_ZERO;
00453   tremax = NULL;
00454   tre = bt->list;
00455   while (tre != NULL && tre->endtime == t) {
00456     if (maxscore < tre->backscore) {
00457       maxscore = tre->backscore;
00458       tremax = tre;
00459     }
00460     tre = tre->next;
00461   }
00462 
00463   if (maxscore != LOG_ZERO) {
00464     lscore = trace_backptr(wordseq, &wordlen, tremax, bt, winfo);
00465     result_pass1_current(t, wordseq, wordlen, tremax->backscore, lscore, winfo);
00466   } else {
00467     wordlen = 0;
00468     result_pass1_current(t, wordseq, wordlen, LOG_ZERO, LOG_ZERO, winfo);
00469   }
00470 }
00471 
00489 static void
00490 bt_current_max_word(BACKTRELLIS *bt, int t, WORD_INFO *winfo)
00491 {
00492   TRELLIS_ATOM *tre;
00493   TRELLIS_ATOM *tremax;
00494   LOGPROB maxscore;
00495   WORD_ID w;
00496 
00497   /* bt->list は時間順に格納されている */
00498   /* bt->list is order by time */
00499   maxscore = LOG_ZERO;
00500   tremax = NULL;
00501   tre = bt->list;
00502   while (tre != NULL && tre->endtime == t) {
00503     if (maxscore < tre->backscore) {
00504       maxscore = tre->backscore;
00505       tremax = tre;
00506     }
00507     tre = tre->next;
00508   }
00509 
00510   if (maxscore != LOG_ZERO) {
00511     j_printf("%3d: ",t);
00512     w = tremax->wid;
00513     j_printf("\"%s [%s]\"(id=%d)",
00514            winfo->wname[w], winfo->woutput[w], w);
00515     j_printf(" [%d-%d] %f <- ", tremax->begintime, t, tremax->backscore);
00516     w = tremax->last_tre->wid;
00517     if (w != WORD_INVALID) {
00518       j_printf("\"%s [%s]\"(id=%d)\n",
00519              winfo->wname[w], winfo->woutput[w], w);
00520     } else {
00521       j_printf("bgn\n");
00522     }
00523   }
00524 }
00525 
00526 /* -------------------------------------------------------------------- */
00527 /*                 ビーム探索中のトークンを扱うサブ関数                 */
00528 /*                functions to handle hypothesis tokens                  */
00529 /* -------------------------------------------------------------------- */
00530 
00531 /* Token structure: */
00532    
00533 /* How tokens are managed:
00534    o  tlist[][] is a token stocker.  It holds all tokens in sequencial
00535       buffer.  They are malloced first on startup, and refered by ID while
00536       Viterbi procedure.  In word-pair mode, each token also has a link to
00537       another token to allow a node to have more than 1 token.
00538       
00539    o  token[n] holds the current ID number of a token associated to a
00540       lexicon tree node 'n'.
00541 
00542  */
00543 
00544 /* Token space */
00545 static TOKEN2 *tlist[2];        
00546 static TOKENID *tindex[2];      
00547 static int maxtnum = 0;         
00548 static int expand_step = 0;     
00549 static int tnum[2];             
00550 static int n_start;             
00551 static int n_end;               
00552 static int tl;          
00553 static int tn;          
00554 
00555 /* Active token list */
00556 static TOKENID *token;          
00557 static int totalnodenum;        
00558 
00559 
00560 static TRELLIS_ATOM bos;        
00561 static boolean nodes_malloced = FALSE; 
00562 
00581 static void
00582 malloc_nodes(int n, int ntoken_init, int ntoken_step)
00583 {
00584   totalnodenum = n;
00585   token        = mymalloc(sizeof(TOKENID)*totalnodenum);
00586   if (maxtnum < ntoken_init) maxtnum = ntoken_init;
00587   tlist[0]     = mymalloc(sizeof(TOKEN2)*maxtnum);
00588   tlist[1]     = mymalloc(sizeof(TOKEN2)*maxtnum);
00589   tindex[0]     = mymalloc(sizeof(TOKENID)*maxtnum);
00590   tindex[1]     = mymalloc(sizeof(TOKENID)*maxtnum);
00591   tnum[0] = tnum[1] = 0;
00592   if (expand_step < ntoken_step) expand_step = ntoken_step;
00593   nodes_malloced = TRUE;
00594 }
00595 
00604 static void
00605 expand_tlist()
00606 {
00607   maxtnum += expand_step;
00608   tlist[0]     = myrealloc(tlist[0],sizeof(TOKEN2)*maxtnum);
00609   tlist[1]     = myrealloc(tlist[1],sizeof(TOKEN2)*maxtnum);
00610   tindex[0]     = myrealloc(tindex[0],sizeof(TOKENID)*maxtnum);
00611   tindex[1]     = myrealloc(tindex[1],sizeof(TOKENID)*maxtnum);
00612   /*j_printerr("warn: token space expanded\n");*/
00613 }
00614 
00623 static void
00624 free_nodes()
00625 {
00626   if (nodes_malloced) {
00627     free(token);
00628     free(tlist[0]);
00629     free(tlist[1]);
00630     free(tindex[0]);
00631     free(tindex[1]);
00632     nodes_malloced = FALSE;
00633   }
00634 }
00635 
00648 static void
00649 clear_tlist(int tt)
00650 {
00651   tnum[tt] = 0;
00652 }
00653 
00666 static void
00667 clear_tokens(int tt)
00668 {
00669   int j;
00670   /* initialize active token list: only clear ones used in the last call */
00671   for (j=0; j<tnum[tt]; j++) {
00672     token[tlist[tt][j].node] = TOKENID_UNDEFINED;
00673   }
00674 }
00675 
00688 static TOKENID
00689 create_token()
00690 {
00691   TOKENID newid;
00692   newid = tnum[tn];
00693   tnum[tn]++;
00694   if (tnum[tn]>=maxtnum) expand_tlist();
00695   tindex[tn][newid] = newid;
00696 #ifdef WPAIR
00697   /* initialize link */
00698   tlist[tn][newid].next = TOKENID_UNDEFINED;
00699 #endif
00700   return(newid);
00701 }
00702 
00730 static void
00731 node_assign_token(int node, TOKENID tkid)
00732 {
00733 #ifdef WPAIR
00734   /* add to link list */
00735   tlist[tn][tkid].next = token[node];
00736 #endif
00737   token[node] = tkid;
00738   tlist[tn][tkid].node = node;
00739 }
00740 
00775 static TOKENID
00776 node_exist_token(int tt, int node, WORD_ID wid)
00777 {
00778 #ifdef WPAIR
00779   /* In word-pair mode, multiple tokens are assigned to a node as a list.
00780      so we have to search for tokens with same last word ID */
00781 #ifdef WPAIR_KEEP_NLIMIT
00782   /* 1ノードごとに保持するtoken数の上限を設定 */
00783   /* tokenが無いが上限に達しているときは一番スコアの低いtokenを上書きする */
00784   /* N-best: limit number of assigned tokens to a node */
00785   int i = 0;
00786   TOKENID lowest_token = TOKENID_UNDEFINED;
00787 #endif
00788   TOKENID tmp;
00789   for(tmp=token[node]; tmp != TOKENID_UNDEFINED; tmp=tlist[tt][tmp].next) {
00790     if (tlist[tt][tmp].last_tre->wid == wid) {
00791       return(tmp);
00792     }
00793 #ifdef WPAIR_KEEP_NLIMIT
00794     if (lowest_token == TOKENID_UNDEFINED ||
00795         tlist[tt][lowest_token].score < tlist[tt][tmp].score)
00796       lowest_token = tmp;
00797     if (++i >= wpair_keep_nlimit) break;
00798 #endif
00799   }
00800 #ifdef WPAIR_KEEP_NLIMIT
00801   if (i >= wpair_keep_nlimit) { /* overflow, overwrite lowest score */
00802     return(lowest_token);
00803   } else {
00804     return(TOKENID_UNDEFINED);
00805   }
00806 #else 
00807   return(TOKENID_UNDEFINED);
00808 #endif
00809   
00810 #else  /* not WPAIR */
00811   /* 1つだけ保持,これを常に上書き */
00812   /* Only one token is kept in 1-best mode (default), so
00813      simply return the ID */
00814   return(token[node]);
00815 #endif
00816 }
00817 
00818 #ifdef DEBUG
00819 /* tlist と token の対応をチェックする(debug) */
00820 /* for debug: check tlist <-> token correspondence
00821    where  tlist[tt][tokenID].node = nodeID and
00822           token[nodeID] = tokenID
00823  */
00824 static void
00825 node_check_token(int tt)
00826 {
00827   int i;
00828   for(i=0;i<tnum[tt];i++) {
00829     if (node_exist_token(tt, tlist[tt][i].node, tlist[tt][i].last_tre->wid) != i) {
00830       j_printerr("token %d not found on node %d\n", i, tlist[tt][i].node);
00831     }
00832   }
00833 }
00834 #endif
00835 
00836 
00837 
00838 /* -------------------------------------------------------------------- */
00839 /*       トークンをソートし 上位 N トークンを判別する (heap sort)       */
00840 /*        Sort generated tokens and get N-best (use heap sort)          */
00841 /* -------------------------------------------------------------------- */
00842 /* ビームの閾値として上位 N 番目のスコアが欲しいだけであり,実際にソート
00843    される必要はない */
00844 /* we only want to know the N-th score for determining beam threshold, so
00845    order is not considered here */
00846 
00847 #define SD(A) tindex[tn][A-1]   
00848 #define SCOPY(D,S) D = S        
00849 #define SVAL(A) (tlist[tn][tindex[tn][A-1]].score) 
00850 #define STVAL (tlist[tn][s].score) 
00851 
00852 
00874 static void
00875 sort_token_upward(int neednum, int totalnum)
00876 {
00877   int n,root,child,parent;
00878   TOKENID s;
00879   for (root = totalnum/2; root >= 1; root--) {
00880     SCOPY(s, SD(root));
00881     parent = root;
00882     while ((child = parent * 2) <= totalnum) {
00883       if (child < totalnum && SVAL(child) < SVAL(child+1)) {
00884         child++;
00885       }
00886       if (STVAL >= SVAL(child)) {
00887         break;
00888       }
00889       SCOPY(SD(parent), SD(child));
00890       parent = child;
00891     }
00892     SCOPY(SD(parent), s);
00893   }
00894   n = totalnum;
00895   while ( n > totalnum - neednum) {
00896     SCOPY(s, SD(n));
00897     SCOPY(SD(n), SD(1));
00898     n--;
00899     parent = 1;
00900     while ((child = parent * 2) <= n) {
00901       if (child < n && SVAL(child) < SVAL(child+1)) {
00902         child++;
00903       }
00904       if (STVAL >= SVAL(child)) {
00905         break;
00906       }
00907       SCOPY(SD(parent), SD(child));
00908       parent = child;
00909     }
00910     SCOPY(SD(parent), s);
00911   }
00912 }
00913 
00938 static void
00939 sort_token_downward(int neednum, int totalnum)
00940 {
00941   int n,root,child,parent;
00942   TOKENID s;
00943   for (root = totalnum/2; root >= 1; root--) {
00944     SCOPY(s, SD(root));
00945     parent = root;
00946     while ((child = parent * 2) <= totalnum) {
00947       if (child < totalnum && SVAL(child) > SVAL(child+1)) {
00948         child++;
00949       }
00950       if (STVAL <= SVAL(child)) {
00951         break;
00952       }
00953       SCOPY(SD(parent), SD(child));
00954       parent = child;
00955     }
00956     SCOPY(SD(parent), s);
00957   }
00958   n = totalnum;
00959   while ( n > totalnum - neednum) {
00960     SCOPY(s, SD(n));
00961     SCOPY(SD(n), SD(1));
00962     n--;
00963     parent = 1;
00964     while ((child = parent * 2) <= n) {
00965       if (child < n && SVAL(child) > SVAL(child+1)) {
00966         child++;
00967       }
00968       if (STVAL <= SVAL(child)) {
00969         break;
00970       }
00971       SCOPY(SD(parent), SD(child));
00972       parent = child;
00973     }
00974     SCOPY(SD(parent), s);
00975   }
00976 }
00977 
01008 static void
01009 sort_token_no_order(int neednum, int *start, int *end)
01010 {
01011   int totalnum = tnum[tn];
01012   int restnum;
01013 
01014   restnum = totalnum - neednum;
01015 
01016   if (neednum >= totalnum) {
01017     /* no need to sort */
01018     *start = 0;
01019     *end = totalnum - 1;
01020   } else if (neednum < restnum)  {
01021     /* needed num is smaller than rest, so sort for the needed tokens */
01022     sort_token_upward(neednum,totalnum);
01023     *start = totalnum - neednum;
01024     *end = totalnum - 1;
01025   } else {
01026     /* needed num is bigger than rest, so sort for the rest token */
01027     sort_token_downward(restnum,totalnum);
01028     *start = 0;
01029     *end = neednum - 1;
01030   }
01031 }
01032     
01033 
01034 #ifdef SP_BREAK_CURRENT_FRAME
01035 /* -------------------------------------------------------------------- */
01036 /*                     ショートポーズ・セグメンテーション               */
01037 /*                     short pause segmentation                         */
01038 /* -------------------------------------------------------------------- */
01039 /* ====================================================================== */
01040 /* sp segmentation */
01041 /*
01042   |---************-----*********************-------*******--|
01043 [input segments]
01044   |-------------------|
01045                   |-------------------------------|
01046                                             |---------------|
01047 
01048                      
01049   |-------------------|t-2
01050                        |t-1 ... token processed (= lastlen)
01051                         |t  ... outprob computed
01052                        
01053 */
01054 
01055 static boolean in_sparea;       /* TRUE if we are within a pause area */
01056 static int sparea_start;        /* start frame of pause area */
01057 static int tmp_sparea_start;
01058 #ifdef SP_BREAK_RESUME_WORD_BEGIN
01059 static WORD_ID tmp_sp_break_last_word;
01060 #else
01061 static WORD_ID last_tre_word;
01062 #endif
01063 static boolean first_sparea;    /* TRUE if this is the first pause segment in a input stream */
01064 static int sp_duration;         /* number of current successive sp frame */
01065 
01092 boolean
01093 is_sil(WORD_ID w, WORD_INFO *winfo, HTK_HMM_INFO *hmm)
01094 {
01095   /* num of phones should be 1 */
01096   if (winfo->wlen[w] > 1) return FALSE;
01097 
01098   /* short pause (specified by "-spmodel") */
01099   if (winfo->wseq[w][0] == hmm->sp) return TRUE;
01100 
01101   /* head/tail sil */
01102   if (w == winfo->head_silwid || w == winfo->tail_silwid) return TRUE;
01103 
01104   /* otherwise */
01105   return FALSE;
01106 }
01107 
01131 static boolean
01132 detect_end_of_segment(BACKTRELLIS *backtrellis, int time, WCHMM_INFO *wchmm)
01133 {
01134   TRELLIS_ATOM *tre;
01135   LOGPROB maxscore = LOG_ZERO;
01136   TRELLIS_ATOM *tremax = NULL;
01137   int count = 0;
01138   boolean detected = FALSE;
01139 
01140   /* look for the best trellis word on the given time frame */
01141   for(tre = backtrellis->list; tre != NULL && tre->endtime == time; tre = tre->next) {
01142     if (maxscore < tre->backscore) {
01143       maxscore = tre->backscore;
01144       tremax = tre;
01145     }
01146     count++;
01147   }
01148   if (tremax == NULL) { /* no word end: possible in the very beggining of input*/
01149     detected = TRUE;            /* assume it's in the short-pause duration */
01150   } else if (count > 0) {       /* many words found --- check if maximum is sp */
01151     if (is_sil(tremax->wid, wchmm->winfo, wchmm->hmminfo)) {
01152       detected = TRUE;
01153     }
01154   }
01155   
01156   /* sp区間持続チェック */
01157   /* check sp segment duration */
01158   if (in_sparea && detected) {  /* we are already in sp segment and sp continues */
01159     sp_duration++;              /* increment count */
01160 #ifdef SP_BREAK_RESUME_WORD_BEGIN
01161     /* resume word at the "beggining" of sp segment */
01162     /* if this segment has triggered by (tremax == NULL) (in case the first
01163        several frame of input), the sp word (to be used as resuming
01164        word in the next segment) is not yet set. it will be detected here */
01165     if (tmp_sp_break_last_word == WORD_INVALID) {
01166       if (tremax != NULL) tmp_sp_break_last_word = tremax->wid;
01167     }
01168 #else
01169     /* resume word at the "end" of sp segment */
01170     /* simply update the best sp word */
01171     if (tremax != NULL) last_tre_word = tremax->wid;
01172 #endif
01173   }
01174 
01175   /* sp区間開始チェック */
01176   /* check if sp segment begins at this frame */
01177   else if (!in_sparea && detected) {
01178     /* 一時的に開始フレームとしてマーク */
01179     /* mark this frame as "temporal" begging of short-pause segment */
01180     tmp_sparea_start = time;
01181 #ifdef SP_BREAK_RESUME_WORD_BEGIN
01182     /* sp 区間開始時点の最尤単語を保存 */
01183     /* store the best word in this frame as resuming word */
01184     tmp_sp_break_last_word = tremax ? tremax->wid : WORD_INVALID;
01185 #endif
01186     in_sparea = TRUE;           /* yes, we are in sp segment */
01187     sp_duration = 1;            /* initialize duration count */
01188 #ifdef SP_BREAK_DEBUG
01189     printf("sp start %d\n", time);
01190 #endif /* SP_BREAK_DEBUG */
01191   }
01192   
01193   /* sp 区間終了チェック */
01194   /* check if sp segment ends at this frame */
01195   else if (in_sparea && !detected) {
01196     /* (time-1) is end frame of pause segment */
01197     in_sparea = FALSE;          /* we are not in sp segment */
01198 #ifdef SP_BREAK_DEBUG
01199     printf("sp end %d\n", time);
01200 #endif /* SP_BREAK_DEBUG */
01201     /* sp 区間長チェック */
01202     /* check length of the duration*/
01203     if (sp_duration < sp_frame_duration) {
01204       /* 短すぎる: 第1パスを中断せず続行 */
01205       /* too short segment: not break, continue 1st pass */
01206 #ifdef SP_BREAK_DEBUG
01207       printf("too short (%d<%d), ignored\n", sp_duration, sp_frame_duration);
01208 #endif /* SP_BREAK_DEBUG */
01209     } else if (first_sparea) {
01210       /* 最初のsp区間は silB にあたるので,第1パスを中断せず続行 */
01211       /* do not break at first sp segment: they are silB */
01212       first_sparea = FALSE;
01213 #ifdef SP_BREAK_DEBUG
01214       printf("first silence, ignored\n");
01215 #endif /* SP_BREAK_DEBUG */
01216     } else {
01217       /* 区間終了確定, 第1パスを中断して第2パスへ */
01218       /* break 1st pass */
01219 #ifdef SP_BREAK_DEBUG
01220       printf(">> segment [%d..%d]\n", sparea_start, time-1);
01221 #else
01222       if (idc_on) j_printerr("|");
01223 #endif /* SP_BREAK_DEBUG */
01224       /* store begging frame of the segment */
01225       sparea_start = tmp_sparea_start;
01226 #ifdef SP_BREAK_RESUME_WORD_BEGIN
01227       /* resume word = most likely sp word on beginning frame of the segment */
01228       sp_break_last_word = tmp_sp_break_last_word;
01229 #else
01230       /* resume word = most likely sp word on end frame of the segment */
01231       sp_break_last_word = last_tre_word;
01232 #endif
01233 
01234       /*** segment: [sparea_start - time-1] ***/
01235       return(TRUE);
01236     }
01237   }
01238     
01239 #ifdef SP_BREAK_EVAL
01240   printf("[%d %d %d]\n", time, count, (detected) ? 50 : 0);
01241 #endif
01242   return (FALSE);
01243 }
01244 
01245 #endif /* SP_BREAK_CURRENT_FRAME */
01246 
01247 
01248 
01249 /* -------------------------------------------------------------------- */
01250 /*             第1パス(フレーム同期ビームサーチ) メイン                */
01251 /*           main routines of 1st pass (frame-synchronous beam search)  */
01252 /* -------------------------------------------------------------------- */
01253 
01272 static void
01273 init_nodescore(HTK_Param *param, WCHMM_INFO *wchmm)
01274 {
01275   TOKENID newid;
01276   TOKEN2 *new;
01277 #ifdef USE_NGRAM
01278   WORD_ID beginword;
01279 #endif
01280   int node;
01281 #ifdef USE_DFA
01282   int i;
01283 #endif
01284 
01285   /* 初期仮説用単語履歴 */
01286   /* setup initial word context */
01287 #ifdef SP_BREAK_CURRENT_FRAME
01288   /* initial word context = last non-sp word of previous 2nd pass at last segment*/
01289   if (sp_break_last_nword == wchmm->winfo->tail_silwid) {
01290     /* if end with silE, initialize as normal start of sentence */
01291     bos.wid = WORD_INVALID;
01292   } else {
01293     bos.wid = sp_break_last_nword;
01294   }
01295 #else
01296   bos.wid = WORD_INVALID;       /* no context */
01297 #endif
01298   bos.begintime = bos.endtime = -1;
01299 
01300   /* ノード・トークンを初期化 */
01301   /* clear tree lexicon nodes and tokens */
01302   for(node=0;node<totalnodenum;node++) {
01303     token[node] = TOKENID_UNDEFINED;
01304   }
01305   tnum[0] = tnum[1]  = 0;
01306   
01307 #ifdef PASS1_IWCD
01308   /* 出力確率計算キャッシュを初期化 */
01309   /* initialize outprob cache */
01310   outprob_style_cache_init(wchmm);
01311 #endif
01312 
01313   /* 初期仮説の作成: 初期単語の決定と初期トークンの生成 */
01314   /* initial word hypothesis */
01315 #ifdef USE_NGRAM
01316   
01317 #ifdef SP_BREAK_CURRENT_FRAME
01318   if (sp_break_last_word != WORD_INVALID) { /* last segment exist */
01319     /* 開始単語=前のセグメント計算時の最後の最尤単語 */
01320     /* 文終了単語(silE,句点(IPAモデル))なら,silB で開始 */
01321     /* initial word = best last word hypothesis on the last segment */
01322     /* if silE or sp, begin with silB */
01323     /*if (is_sil(sp_break_last_word, wchmm->winfo, wchmm->hmminfo)) {*/
01324     if (sp_break_last_word == wchmm->winfo->tail_silwid) {
01325       beginword = wchmm->winfo->head_silwid;
01326       bos.wid = WORD_INVALID;   /* reset initial word context */
01327     } else {
01328       beginword = sp_break_last_word;
01329     }
01330   } else {
01331     /* initial word fixed to silB */
01332     beginword = wchmm->winfo->head_silwid;
01333   }
01334 #else
01335   /* initial word fixed to silB */
01336   beginword = wchmm->winfo->head_silwid;
01337 #endif
01338 #ifdef SP_BREAK_DEBUG
01339   printf("startword=[%s], last_nword=[%s]\n",
01340          (beginword == WORD_INVALID) ? "WORD_INVALID" : wchmm->winfo->wname[beginword],
01341          (bos.wid == WORD_INVALID) ? "WORD_INVALID" : wchmm->winfo->wname[bos.wid]);
01342 #endif
01343   /* create the first token at the first node of initial word */
01344   newid = create_token();
01345   new = &(tlist[tn][newid]);
01346   /* initial node = head node of the beginword */
01347 #ifdef MULTIPATH_VERSION
01348   node = wchmm->wordbegin[beginword];
01349 #else
01350   node = wchmm->offset[beginword][0];
01351 #endif /* MULTIPATH_VERSION */
01352   /* set initial LM score */
01353   if (wchmm->state[node].scid != 0) {
01354     /* if initial node is on a factoring branch, use the factored score */
01355     new->last_lscore = max_successor_prob(wchmm, bos.wid, node);
01356   } else {
01357     /* else, set to 0.0 */
01358     new->last_lscore = 0.0;
01359   }
01360 #ifdef FIX_PENALTY
01361   new->last_lscore = new->last_lscore * lm_weight;
01362 #else
01363   new->last_lscore = new->last_lscore * lm_weight + lm_penalty;
01364 #endif
01365   /* set initial word history */
01366   new->last_tre = &bos;
01367   new->last_cword = bos.wid;
01368 #ifdef MULTIPATH_VERSION
01369   /* set initial score using the initial LM score */
01370   new->score = new->last_lscore;
01371 #else
01372   /* set initial score using the initial LM score and AM score of the first state */
01373   new->score = outprob_style(wchmm, node, bos.wid, 0, param) + new->last_lscore;
01374 #endif /* MULTIPATH_VERSION */
01375   /* assign the initial node to token list */
01376   node_assign_token(node, newid);
01377   
01378 #else  /* USE_DFA */
01379   
01380   /* 初期仮説: 文法上文頭に接続しうる単語集合 */
01381   /* initial words: all words that can be begin of sentence grammatically */
01382   /* アクティブな文法に属する単語のみ許す */
01383   /* only words in active grammars are allowed to be an initial words */
01384   {
01385     MULTIGRAM *m;
01386     int t,tb,te;
01387     WORD_ID iw;
01388     boolean flag;
01389 
01390     flag = FALSE;
01391     /* for all active grammar */
01392     for(m = gramlist; m; m = m->next) {
01393       if (m->active) {
01394         tb = m->cate_begin;
01395         te = tb + m->dfa->term_num;
01396         for(t=tb;t<te;t++) {
01397           /* for all word categories that belong to the grammar */
01398           if (dfa_cp_begin(dfa, t) == TRUE) {
01399             /* if the category can appear at beginning of sentence, */
01400             flag = TRUE;
01401             for (iw=0;iw<dfa->term.wnum[t];iw++) {
01402               /* create the initial token at the first node of all words that belongs to the category */
01403               i = dfa->term.tw[t][iw];
01404 #ifdef MULTIPATH_VERSION
01405               node = wchmm->wordbegin[i];
01406 #else
01407               node = wchmm->offset[i][0];
01408 #endif /* MULTIPATH_VERSION */
01409               /* in tree lexicon, words in the same category may share the same root node, so skip it if the node has already existed */
01410               if (node_exist_token(tn, node, bos.wid) != TOKENID_UNDEFINED) continue;
01411               newid = create_token();
01412               new = &(tlist[tn][newid]);
01413               new->last_tre = &bos;
01414 #ifdef MULTIPATH_VERSION
01415               new->score = 0.0;
01416 #else
01417               new->score = outprob_style(wchmm, node, bos.wid, 0, param);
01418 #endif /* MULTIPATH_VERSION */
01419               node_assign_token(node, newid);
01420             }
01421           }
01422         }
01423       }
01424     }
01425     if (!flag) {
01426       j_printerr("Error: no initial state found in active DFA grammar!\n");
01427     }
01428   }
01429 /* 
01430  *   for (i=0;i<wchmm->winfo->num;i++) {
01431  *     if (dfa->cp_begin[wchmm->winfo->wton[i]] == TRUE) {
01432  *       node = wchmm->offset[i][0];
01433  *       if (node_exist_token(tn, node, bos.wid) != TOKENID_UNDEFINED) continue;
01434  *       newid = create_token();
01435  *       new = &(tlist[tn][newid]);
01436  *       new->last_tre = &bos;
01437  *       new->score = outprob_style(wchmm, node, bos.wid, 0, param);
01438  *       node_assign_token(node, newid);
01439  *     }
01440  *   }
01441  */
01442   
01443 #endif /* USE_DFA */
01444 }
01445 
01446 /******************************************************/
01447 /* フレーム同期ビーム探索の実行 --- 最初のフレーム用  */
01448 /* frame synchronous beam search --- first frame only */
01449 /******************************************************/
01450 
01476 void
01477 get_back_trellis_init(HTK_Param *param, WCHMM_INFO *wchmm, BACKTRELLIS *backtrellis)
01478 {
01479   
01480   /* Viterbi演算用ワークエリアのスイッチャー tl,tn の初期値設定 */
01481   /* tn: このフレーム用ID   tl: 1フレーム前のID */
01482   /* initialize switch tl, tn for Viterbi computation */
01483   /* tn: this frame  tl: last frame */
01484   tn = 0;
01485   tl = 1;
01486 
01487   /* 結果の単語トレリスを格納するバックトレリス構造体を初期化 */
01488   /* initialize backtrellis structure to store resulting word trellis */
01489   bt_prepare(backtrellis);
01490 
01491   /* ワークエリアを確保 */
01492   /* malloc work area */
01493   /* 使用するトークン量 = viterbi時に遷移先となる状態候補の数
01494    * 予測: ビーム数 x 2 (自己遷移+次状態) + 木構造化辞書のルートノード数
01495    */
01496   /* assumed initial number of needed tokens: beam width x 2 (self + next trans.)
01497    * + root node on the tree lexicon (for inter-word trans.)
01498    */
01499   /*malloc_nodes(wchmm->n);*/
01500   malloc_nodes(wchmm->n, trellis_beam_width * 2 + wchmm->startnum, trellis_beam_width);
01501   
01502   /* 初期スコアを nodescore[tn] にセット */
01503   /* set initial score to nodescore[tn] */
01504   init_nodescore(param, wchmm);
01505   sort_token_no_order(trellis_beam_width, &n_start, &n_end);
01506 
01507   /* テキスト出力を初期化 */
01508   /* initialize message output */
01509   result_pass1_begin();
01510   /* 漸次出力を行なう場合のインターバルを計算 */
01511   /* set interval frame for progout */
01512   progout_interval_frame = (int)((float)progout_interval / ((float)param->header.wshift / 10000.0));
01513 
01514   /* 進行状況表示を初期化 */
01515   /* progress bar setting */
01516   if (!realtime_flag && verbose_flag && (!progout_flag) && isatty(1)) {
01517     idc_on = TRUE;
01518   } else { 
01519     idc_on = FALSE;
01520   }
01521   
01522 #ifdef SP_BREAK_CURRENT_FRAME
01523   /* ショートポーズセグメンテーション用パラメータの初期化 */
01524   /* initialize parameter for short pause segmentation */
01525   in_sparea = TRUE;             /* assume beginning is silence */
01526   sparea_start = tmp_sparea_start = 0; /* set start frame to 0 */
01527 #ifdef SP_BREAK_RESUME_WORD_BEGIN
01528   tmp_sp_break_last_word = WORD_INVALID;
01529 #endif
01530   sp_break_last_word = WORD_INVALID;
01531   /* 最初のセグメント: 次の非ポーズフレームで第2パスへ移行しない */
01532   /* the first end of pause segment should be always silB, so
01533      skip the first segment */
01534   first_sparea = TRUE;
01535   sp_break_2_begin_word = WORD_INVALID;
01536 #endif
01537 
01538   if (gmm != NULL) {
01539     /* GMM 計算の初期化 */
01540     gmm_prepare(gmm);
01541   }
01542 }
01543 
01544 /* local work areas for get_back_trellis_proceed() */
01545 static TRELLIS_ATOM *tre; 
01546 static int node; 
01547 static int stid; 
01548 static A_CELL *ac; 
01549 static int next_node2;          
01550 #ifdef USE_NGRAM
01551 static LOGPROB tmpprob; 
01552 static LOGPROB *iwparray; 
01553 #endif
01554 #ifdef UNIGRAM_FACTORING
01555 /* for wordend processing with 1-gram factoring */
01556 static LOGPROB wordend_best_score; 
01557 static int wordend_best_node;   
01558 static TRELLIS_ATOM *wordend_best_tre; 
01559 static WORD_ID wordend_best_last_cword; 
01560 static int isoid; 
01561 #endif
01562 
01563 /*****************************************************/
01564 /* frame synchronous beam search --- proceed 1 frame */
01565 /* フレーム同期ビーム探索の実行 --- 1フレーム進める  */
01566 /*****************************************************/
01567 
01607 boolean
01608 get_back_trellis_proceed(int t, HTK_Param *param, WCHMM_INFO *wchmm, BACKTRELLIS *backtrellis
01609 #ifdef MULTIPATH_VERSION
01610                          , boolean final /* TRUE if this is final frame */
01611 #endif
01612                          )
01613 {
01614   LOGPROB tmpsum;
01615   int j, next_node, sword;
01616   TOKEN2  *tk, *tknext;
01617   TOKENID  tknextid;
01618 #ifdef USE_NGRAM
01619   LOGPROB ngram_score_cache;
01620 #endif
01621 
01622   /*********************/
01623   /* 1. 初期化         */
01624   /*    initialization */
01625   /*********************/
01626 
01627   /* tl と tn を入れ替えて作業領域を切り替え */
01628   /* tl (= 直前の tn) は直前フレームの結果を持つ */
01629   /* swap tl and tn to switch work buffer */
01630   /* tl (= last tn) holds result of the previous frame */
01631   tl = tn;
01632   if (tn == 0) tn = 1; else tn = 0;
01633 
01634   /* 進行状況表示を更新 */
01635   /* update progress bar */
01636   if (idc_on) {
01637 #ifdef SP_BREAK_CURRENT_FRAME
01638     if (in_sparea) j_printerr("."); else j_printerr("-");
01639 #else  /* normal */
01640     j_printerr(".");
01641 #endif /* SP_BREAK_CURRENT_FRAME */
01642   }
01643 
01644 #ifdef UNIGRAM_FACTORING
01645   /* 1-gram factoring では単語先頭での言語確率が一定で直前単語に依存しない
01646      ため,単語間 Viterbi において選ばれる直前単語は,次単語によらず共通である.
01647      よって単語終端からfactoring値のある単語先頭への遷移は1つにまとめられる.
01648      ただし,木から独立した単語については, 単語先頭で履歴に依存した2-gramが
01649      与えられるため, 最尤の単語間 Viterbi パスは次単語ごとに異なる.
01650      よってそれらについてはまとめずに別に計算する */
01651   /* In 1-gram factoring, the language score on the word-head node is constant
01652      and independent of the previous word.  So, the same word hypothesis will
01653      be selected as the best previous word at the inter-word Viterbi
01654      processing.  So, in inter-word processing, we can (1) select only
01655      the best word-end hypothesis, and then (2) process viterbi from the node
01656      to each word-head node.  On the other hand, the isolated words,
01657      i.e. words not sharing any node with other word, has unique word-head
01658      node and the true 2-gram language score is determined at the top node.
01659      In such case the best word hypothesis prior to each node will differ
01660      according to the language scores.  So we have to deal such words separately. */
01661   /* initialize max value to delect best word-end hypothesis */
01662   wordend_best_score = LOG_ZERO;
01663 #endif
01664 
01665 #ifdef DEBUG
01666   /* debug */
01667   /* node_check_token(tl); */
01668 #endif
01669 
01670   /* トークンバッファを初期化: 直前フレームで使われた部分だけクリアすればよい */
01671   /* initialize token buffer: for speedup, only ones used in the last call will be cleared */
01672   clear_tokens(tl);
01673 
01674   /**************************/
01675   /* 2. Viterbi計算         */
01676   /*    Viterbi computation */
01677   /**************************/
01678   /* 直前フレームからこのフレームへの Viterbi 計算を行なう */
01679   /* tindex[tl][n_start..n_end] に直前フレーム上位ノードのIDが格納されている */
01680   /* do one viterbi computation from last frame to this frame */
01681   /* tindex[tl][n_start..n_end] holds IDs of survived nodes in last frame */
01682   for (j=n_start;j<=n_end;j++) {
01683 
01684     /* tk: 対象トークン  node: そのトークンを持つ木構造化辞書ノードID */
01685     /* tk: token data  node: lexicon tree node ID that holds the 'tk' */
01686     tk = &(tlist[tl][tindex[tl][j]]);
01687     node = tk->node;
01688     if (tk->score <= LOG_ZERO) continue; /* invalid node */
01689 
01690     /*********************************/
01691     /* 2.1. 単語内遷移               */
01692     /*      word-internal transition */
01693     /*********************************/
01694     for (ac = wchmm->state[node].ac; ac; ac = ac->next) {
01695       next_node = ac->arc;
01696       /* now, 'node' is the source node, 'next_node' is the destication node,
01697          and ac-> holds transition probability */
01698       /* tk->score is the accumulated score at the 'node' on previous frame */
01699 
01700       /******************************************************************/
01701       /* 2.1.1 遷移先へのスコア計算(遷移確率+言語スコア)               */
01702       /*       compute score of destination node (transition prob + LM) */
01703       /******************************************************************/
01704       tmpsum = tk->score + ac->a;
01705 #ifdef USE_NGRAM
01706       ngram_score_cache = LOG_ZERO;
01707 #endif
01708       /* the next score at 'next_node' will be computed on 'tmpsum', and
01709          the new LM probability (if updated) will be stored on 'ngram_score_cache' at below */
01710 
01711 #ifndef CATEGORY_TREE
01712       /* 言語スコア factoring:
01713          arcが自己遷移でない単語内の遷移で,かつ遷移先にsuccessorリスト
01714          があれば,lexicon tree の分岐部分の遷移である */
01715       /* LM factoring:
01716          If this is not a self transition and destination node has successor
01717          list, this is branching transition
01718        */
01719       if (next_node != node) {
01720         if (wchmm->state[next_node].scid != 0
01721 #ifdef UNIGRAM_FACTORING
01722             /* 1-gram factoring 使用時は, 複数で共有される枝では
01723                wchmm->state[node].scid は負の値となり,その絶対値を
01724                添字として wchmm->fscore[] に単語集合の1-gramの最大値が格納
01725                されている.末端の枝(複数単語で共有されない)では,
01726                wchmm->state[node].scid は正の値となり,
01727                1単語を sc として持つのでそこで正確な2-gramを計算する */
01728             /* When uni-gram factoring,
01729                wchmm->state[node].scid is below 0 for shared branches.
01730                In this case the maximum uni-gram probability for sub-tree
01731                is stored in wchmm->fscore[- wchmm->state[node].scid].
01732                Leaf branches (with only one successor word): the scid is
01733                larger than 0, and has
01734                the word ID in wchmm->sclist[wchmm->state[node].scid].
01735                So precise 2-gram is computed in this point */
01736 #endif
01737             ){
01738 #ifdef USE_NGRAM
01739           /* ここで言語モデル確率を更新する */
01740           /* LM value should be update at this transition */
01741           /* N-gram確率からfactoring 値を計算 */
01742           /* compute new factoring value from N-gram probabilities */
01743 #ifdef FIX_PENALTY
01744           /* if at the beginning of sentence, not add lm_penalty */
01745           if (tk->last_cword == WORD_INVALID) {
01746             ngram_score_cache = max_successor_prob(wchmm, tk->last_cword, next_node) * lm_weight;
01747           } else {
01748             ngram_score_cache = max_successor_prob(wchmm, tk->last_cword, next_node) * lm_weight + lm_penalty;
01749           }
01750 #else
01751           ngram_score_cache = max_successor_prob(wchmm, tk->last_cword, next_node) * lm_weight + lm_penalty;
01752 #endif
01753           /* スコアの更新: tk->last_lscore に単語内での最後のfactoring値が
01754            入っているので, それをスコアから引いてリセットし, 新たなスコアを
01755            セットする */
01756           /* update score: since 'tk->last_lscore' holds the last LM factoring
01757              value in this word, we first remove the score from the current
01758              score, and then add the new LM value computed above. */
01759           tmpsum -= tk->last_lscore;
01760           tmpsum += ngram_score_cache;
01761           
01762 #else  /* USE_DFA --- not CATEGORY_TREE */
01763           /* 文法を用いる場合, カテゴリ単位の木構造化がなされていれば,
01764              接続制約は単語間遷移のみで扱われるので,factoring は必要ない.
01765              カテゴリ単位木構造化が行われない場合, 文法間の接続制約はここ
01766              で factoring で行われることになる.*/
01767           /* With DFA, we use category-pair constraint extracted from the DFA
01768              at this 1st pass.  So if we compose a tree lexicon per word's
01769              category, the each category tree has the same connection
01770              constraint and thus we can apply the constraint on the cross-word
01771              transition.  This per-category tree lexicon is enabled by default,
01772              and in the case the constraint will be applied at the word end.
01773              If you disable per-category tree lexicon by undefining
01774              'CATEGORY_TREE', the word-pair contrained will be applied in a
01775              factoring style at here.
01776           */
01777           
01778           /* 決定的factoring: 直前単語に対して,sub-tree内にカテゴリ対制約上
01779              接続しうる単語が1つもなければ, この遷移は不可 */
01780           /* deterministic factoring in grammar mode:
01781              transition disabled if there are totally no sub-tree word that can
01782              grammatically (in category-pair constraint) connect
01783              to the previous word.
01784            */
01785           if (!can_succeed(wchmm, tk->last_tre->wid, next_node)) {
01786             tmpsum = LOG_ZERO;
01787           }
01788           
01789 #endif /* USE_NGRAM */
01790         }
01791       }
01792 #endif /* CATEGORY_TREE */
01793       /* factoring not needed when DFA mode and uses category-tree */
01794 
01795       /****************************************/
01796       /* 2.1.2 遷移先ノードへトークン伝搬     */
01797       /*       pass token to destination node */
01798       /****************************************/
01799 
01800       if ((tknextid = node_exist_token(tn, next_node, tk->last_tre->wid)) != TOKENID_UNDEFINED) {
01801         /* 遷移先ノードには既に他ノードから伝搬済み: スコアが高いほうを残す */
01802         /* the destination node already has a token: compare score */
01803         tknext = &(tlist[tn][tknextid]);
01804         if (tknext->score < tmpsum) {
01805           /* その遷移先ノードが持つトークンの内容を上書きする(新規トークンは作らない) */
01806           /* overwrite the content of existing destination token: not create a new token */
01807           tknext->last_tre = tk->last_tre; /* propagate last word info */
01808 #ifdef USE_NGRAM
01809           tknext->last_cword = tk->last_cword; /* propagate last context word info */
01810           tknext->last_lscore = (ngram_score_cache != LOG_ZERO) ? ngram_score_cache : tk->last_lscore; /* set new LM score */
01811 #endif /* USE_NGRAM */
01812           tknext->score = tmpsum; /* set new score */
01813         }
01814       } else {
01815         /* 遷移先ノードは未伝搬: 新規トークンを作って割り付ける */
01816         /* token unassigned: create new token and assign */
01817         if (tmpsum > LOG_ZERO) { /* valid token */
01818           tknextid = create_token(); /* get new token */
01819           tknext = &(tlist[tn][tknextid]);
01820           /* if work area has been expanded at 'create_token()' above,
01821              the inside 'remalloc()' will destroy the 'tk' pointer.
01822              so, here we again set tk from token index */
01823           tk = &(tlist[tl][tindex[tl][j]]);
01824           tknext->last_tre = tk->last_tre; /* propagate last word info */
01825 #ifdef USE_NGRAM
01826           tknext->last_cword = tk->last_cword; /* propagate last context word info */
01827           tknext->last_lscore = (ngram_score_cache != LOG_ZERO) ? ngram_score_cache : tk->last_lscore; /* set new LM score */
01828 #endif /* USE_NGRAM */
01829           tknext->score = tmpsum; /* set new score */
01830           node_assign_token(next_node, tknextid); /* assign this new token to the next node */
01831         }
01832       }
01833     } /* END OF WORD-INTERNAL TRANSITION */
01834 
01835 #ifdef MULTIPATH_VERSION
01836     
01837   }
01838   /*******************************************************/
01839   /* 3. スコアでトークンをソートしビーム幅分の上位を決定 */
01840   /*    sort tokens by score up to beam width            */
01841   /*******************************************************/
01842   /* ヒープソートを用いてこの段のノード集合から上位(bwidth)個を得ておく */
01843   /* (上位内の順列は必要ない) */
01844   sort_token_no_order(trellis_beam_width, &n_start, &n_end);
01845   
01846   /*************************/
01847   /* 4. 単語間Viterbi計算  */
01848   /*    cross-word viterbi */
01849   /*************************/
01850   
01851   for(j=n_start; j<=n_end; j++) {
01852     tk = &(tlist[tn][tindex[tn][j]]);
01853     node = tk->node;
01854      
01855 #endif /* MULTIPATH_VERSION */
01856 
01857     /* 遷移元ノードが単語終端ならば */
01858     /* if source node is end state of a word, */
01859     if (wchmm->stend[node] != WORD_INVALID) {
01860 
01861       sword = wchmm->stend[node];
01862 
01863       /**************************/
01864       /* 2.2. トレリス単語保存  */
01865       /*      save trellis word */
01866       /**************************/
01867 
01868       /* この遷移元の単語終端ノードは「直前フレームで」生き残ったノード.
01869          (「このフレーム」でないことに注意!!)
01870          よってここで, 時間(t-1) を単語終端とするトレリス上の単語仮説
01871          (TRELLIS_ATOM)として,単語トレリス構造体に保存する.*/
01872       /* This source node (= word end node) has been survived in the
01873          "last" frame (notice: not "this" frame!!).  So this word end
01874          is saved here to the word trellis structure (BACKTRELLIS) as a
01875          trellis word (TRELLIS_ATOM) with end frame (t-1). */
01876       tre = (TRELLIS_ATOM *)mybmalloc2(sizeof(TRELLIS_ATOM), &(backtrellis->root));
01877       tre->wid = sword;         /* word ID */
01878       tre->backscore = tk->score; /* log score (AM + LM) */
01879       tre->begintime = tk->last_tre->endtime + 1; /* word beginning frame */
01880       tre->endtime   = t-1;     /* word end frame */
01881       tre->last_tre  = tk->last_tre; /* link to previous trellis word */
01882 #ifdef USE_NGRAM
01883       tre->lscore    = tk->last_lscore; /* log score (LM only) */
01884 #endif
01885       bt_store(backtrellis, tre); /* save to backtrellis */
01886 #ifdef WORD_GRAPH
01887       if (tre->last_tre != NULL) {
01888         /* mark to indicate that the following words was survived in beam */
01889         tre->last_tre->within_context = TRUE;
01890       }
01891 #ifdef MULTIPATH_VERSION
01892       if (final) {
01893         /* last node */
01894         if (tre->wid == winfo->tail_silwid) {
01895           tre->within_context = TRUE;
01896         }
01897       }
01898 #endif
01899 #endif /* WORD_GRAPH */
01900       
01901       /******************************/
01902       /* 2.3. 単語間遷移            */
01903       /*      cross-word transition */
01904       /******************************/
01905 
01906 #ifdef MULTIPATH_VERSION
01907       /* 最終フレームであればここまで:遷移はさせない */
01908       /* If this is a final frame, does not do cross-word transition */
01909       if (final) continue;
01910 #endif
01911 
01912       /* 先ほどのトレリス単語treが,ここからの遷移先の単語先頭ノード以降の
01913          直前単語情報としても参照される */
01914       /* The trellis atom 'tre' will be refered as the word context
01915          information for next word-beginning nodes */
01916 
01917 #ifdef UNIGRAM_FACTORING
01918       /* ここで処理されるのは isolated words のみ,
01919          shared nodes はまとめてこのループの外で計算する */
01920       /* Only the isolated words will be processed here.
01921          The shared nodes with constant factoring values will be computed
01922          after this loop */
01923 #endif
01924 
01925 #ifdef USE_NGRAM
01926       /* 遷移元単語が末尾単語の終端なら,どこへも遷移させない */
01927       /* do not allow transition if the source word is end-of-sentence word */
01928       if (sword == wchmm->winfo->tail_silwid) continue;
01929 
01930 #ifdef UNIGRAM_FACTORING
01931       /* あとで共有単語先頭ノードに対して単語間遷移をまとめて計算するため,*/
01932       /* このループ内では最大尤度を持つ単語終端ノードを記録しておく */
01933       /* here we will record the best wordend node of maximum likelihood
01934          at this frame, to compute later the cross-word transitions toward
01935          shared factoring word-head node */
01936       if (wordend_best_score < tk->score
01937 #ifndef MULTIPATH_VERSION
01938           + wchmm->wordend_a[sword]
01939 #endif
01940           ) {
01941         wordend_best_score = tk->score
01942 #ifndef MULTIPATH_VERSION
01943           + wchmm->wordend_a[sword]
01944 #endif
01945           ;
01946         wordend_best_node = node;
01947         wordend_best_tre = tre;
01948         wordend_best_last_cword = tk->last_cword;
01949       }
01950 #endif
01951       
01952       /* N-gramにおいては常に全単語の接続を考慮する必要があるため,
01953          ここで単語間の言語確率値をすべて計算しておく.
01954          キャッシュは max_successor_prob_iw() 内で考慮.*/
01955       /* As all words are possible to connect in N-gram, we first compute
01956          all the inter-word LM probability here.
01957          Cache is onsidered in max_successor_prob_iw(). */
01958       if (wchmm->winfo->is_transparent[sword]) {
01959         iwparray = max_successor_prob_iw(wchmm, tk->last_cword);
01960       } else {
01961         iwparray = max_successor_prob_iw(wchmm, sword);
01962       }
01963 #endif
01964 
01965       /* すべての単語始端ノードに対して以下を実行 */
01966       /* for all beginning-of-word nodes, */
01967       /* wchmm->startnode[0..stid-1] ... 単語始端ノードリスト */
01968       /* wchmm->startnode[0..stid-1] ... list of word start node (shared) */
01969       for (stid = wchmm->startnum - 1; stid >= 0; stid--) {
01970         next_node = wchmm->startnode[stid];
01971 #ifdef MULTIPATH_VERSION
01972 #ifdef USE_NGRAM
01973         /* connection to the head silence word is not allowed */
01974         if (wchmm->wordbegin[wchmm->winfo->head_silwid] == next_node) continue;
01975 #endif
01976 #endif
01977 
01978         /*****************************************/
01979         /* 2.3.1. 単語間言語制約を適用           */
01980         /*        apply cross-word LM constraint */
01981         /*****************************************/
01982         
01983 #ifdef USE_NGRAM
01984         /* N-gram確率を計算 */
01985         /* compute N-gram probability */
01986 #ifdef UNIGRAM_FACTORING
01987         /* 1-gram factoring における単語間言語確率キャッシュの効率化:
01988            1-gram factoring は単語履歴に依存しないので,
01989            ここで参照する factoring 値の多くは
01990            wchmm->fscore[] に既に格納され, 探索中も不変である.
01991            よって計算が必要な単語(どの単語ともノードを共有しない単語)
01992            についてのみ iwparray[] で計算・キャッシュする. */
01993         /* Efficient cross-word LM cache:
01994            As 1-gram factoring values are independent of word context,
01995            they remain unchanged while search.  So, in cross-word LM
01996            computation, beginning-of-word states which share nodes with
01997            others and has factoring value in wchmm does not need cache.
01998            So only the unshared beginning-of-word states are computed and
01999            cached here in iwparray[].
02000          */
02001         /* wchmm,start2isolate[0..stid-1] ... ノードを共有しない単語は
02002            その通しID, 共有する(キャッシュの必要のない)単語は -1 */
02003         /* wchmm->start2isolate[0..stid-1] ... isolate ID for
02004            beginning-of-word state.  value: -1 for states that has
02005            1-gram factoring value (share nodes with some other words),
02006            and ID for unshared words
02007          */
02008         isoid = wchmm->start2isolate[stid];
02009         /* 計算が必要でない単語先頭ノードはパスをまとめて後に計算するので
02010            ここではスキップ */
02011         /* the shared nodes will be computed afterward, so just skip them
02012            here */
02013         if (isoid == -1) continue;
02014         tmpprob = iwparray[isoid];
02015 #else
02016         tmpprob = iwparray[stid];
02017 #endif
02018 #endif
02019 
02020 #ifdef USE_NGRAM
02021         /* 遷移先の単語が先頭単語なら遷移させない.
02022            これは wchmm.c で該当単語に stid を割り振らないことで対応
02023            しているので,ここでは何もしなくてよい */
02024         /* do not allow transition if the destination word is
02025            beginning-of-sentence word.  This limitation is realized by
02026            not assigning 'stid' for the word in wchmm.c, so we have nothing
02027            to do here.
02028         */
02029 #endif
02030         
02031 #ifdef CATEGORY_TREE
02032         /* 文法の場合, 制約は決定的: カテゴリ対制約上許されない場合は遷移させない */
02033         /* With DFA and per-category tree lexicon,
02034            LM constraint is deterministic:
02035            do not allow transition if the category connection is not allowed
02036            (with category tree, constraint can be determined on top node) */
02037         if (dfa_cp(dfa, wchmm->winfo->wton[sword],
02038 #ifdef MULTIPATH_VERSION
02039                    wchmm->winfo->wton[wchmm->start2wid[stid]]
02040 #else
02041                    wchmm->winfo->wton[wchmm->ststart[next_node]]
02042 #endif /* MULTIPATH_VERSION */
02043                    ) == FALSE) continue;
02044 #endif
02045 
02046         /*******************************************************************/
02047         /* 2.3.2. 遷移先の単語先頭へのスコア計算(遷移確率+言語スコア)     */
02048         /*        compute score of destination node (transition prob + LM) */
02049         /*******************************************************************/
02050         tmpsum = tk->score
02051 #ifndef MULTIPATH_VERSION
02052           + wchmm->wordend_a[sword]
02053 #endif
02054           ;
02055         /* 'tmpsum' now holds outgoing score from the wordend node */
02056 #ifdef USE_NGRAM
02057         /* 言語スコアを追加 */
02058         /* add LM score */
02059         ngram_score_cache = tmpprob * lm_weight + lm_penalty;
02060         tmpsum += ngram_score_cache;
02061         if (wchmm->winfo->is_transparent[sword] && wchmm->winfo->is_transparent[tk->last_cword]) {
02062           
02063           tmpsum += lm_penalty_trans;
02064         }
02065 #else  /* USE_DFA */
02066         /* grammar: 単語挿入ペナルティを追加 */
02067         /* grammar: add insertion penalty */
02068         tmpsum += penalty1;
02069 
02070         /* grammar: deterministic factoring (in case category-tree not enabled) */
02071 #ifdef CATEGORY_TREE
02072         /* no need to factoring */
02073 #else
02074         if (!can_succeed(wchmm, sword, next_node)) {
02075           tmpsum = LOG_ZERO;
02076         }
02077 #endif /* CATEGORY_TREE */
02078 #endif /* USE_NGRAM */
02079 
02080 
02081         /*********************************************************************/
02082         /* 2.3.3. 遷移先ノードへトークン伝搬(単語履歴情報は更新)             */
02083         /*        pass token to destination node (updating word-context info */
02084         /*********************************************************************/
02085 
02086 #ifndef MULTIPATH_VERSION
02087         next_node2 = next_node;
02088 #else
02089         for(ac=wchmm->state[next_node].ac; ac; ac=ac->next) {
02090           next_node2 = ac->arc;
02091 #endif
02092           
02093           if ((tknextid = node_exist_token(tn, next_node2, tre->wid)) != TOKENID_UNDEFINED) {
02094             /* 遷移先ノードには既に他ノードから伝搬済み: スコアが高いほうを残す */
02095             /* the destination node already has a token: compare score */
02096             tknext = &(tlist[tn][tknextid]);
02097             if (tknext->score < tmpsum) {
02098               /* その遷移先ノードが持つトークンの内容を上書きする(新規トークンは作らない) */
02099               /* overwrite the content of existing destination token: not create a new token */
02100 #ifdef USE_NGRAM
02101               tknext->last_lscore = ngram_score_cache; /* set new LM score */
02102               /* set last context word */
02103               if (wchmm->winfo->is_transparent[sword]) {
02104                 /* if this word is a "transparent" word, this word should be
02105                    excluded from a word history as a context for LM. In the case,
02106                    propagate the last context word to the new node */
02107                 tknext->last_cword = tk->last_cword;
02108               } else {
02109                 /* otherwise, set last context word to this word */
02110                 tknext->last_cword = sword;
02111               }
02112 #endif
02113               /* set new score */
02114               tknext->score = tmpsum;
02115 #ifdef MULTIPATH_VERSION
02116               tknext->score += ac->a;
02117 #endif
02118               /* 遷移先に,先ほど保存したswordのトレリス単語へのポインタを
02119                  「直前単語履歴情報」として伝搬 */
02120               /* pass destination the pointer to the saved trellis atom
02121                  corresponds to 'sword' as "previous word-context info". */
02122               tknext->last_tre = tre;
02123             }
02124           } else {
02125             /* 遷移先ノードは未伝搬: 新規トークンを作って割り付ける */
02126             /* token unassigned: create new token and assign */
02127             if (tmpsum > LOG_ZERO) { /* valid token */
02128               tknextid = create_token();
02129               tknext = &(tlist[tn][tknextid]);
02130               /* if work area has been expanded at 'create_token()' above,
02131                  the inside 'remalloc()' will destroy the 'tk' pointer.
02132                  so, here we again set tk from token index */
02133 #ifdef MULTIPATH_VERSION
02134               tk = &(tlist[tn][tindex[tn][j]]);
02135 #else
02136               tk = &(tlist[tl][tindex[tl][j]]);
02137 #endif
02138 #ifdef USE_NGRAM
02139               tknext->last_lscore = ngram_score_cache; /* set new LM score */
02140               /* set last context word */
02141               if (wchmm->winfo->is_transparent[sword]) {
02142                 /* if this word is a "transparent" word, this word should be
02143                    excluded from a word history as a context for LM. In the case,
02144                    propagate the last context word to the new node */
02145                 tknext->last_cword = tk->last_cword;
02146               } else {
02147                 /* otherwise, set last context word to this word */
02148                 tknext->last_cword = sword;
02149               }
02150 #endif
02151               tknext->score = tmpsum;
02152 #ifdef MULTIPATH_VERSION
02153               tknext->score += ac->a;
02154 #endif
02155             /* 遷移先に,先ほど保存したswordのトレリス単語へのポインタを
02156                「直前単語履歴情報」として伝搬 */
02157             /* pass destination the pointer to the saved trellis atom
02158                corresponds to 'sword' as "previous word-context info". */
02159               tknext->last_tre = tre;
02160               /* assign to the next node */
02161               node_assign_token(next_node2, tknextid);
02162             }
02163           }
02164 #ifdef MULTIPATH_VERSION
02165         }
02166 #endif
02167         
02168       } /* end of next word heads */
02169     } /* end of cross-word processing */
02170     
02171   } /* end of main viterbi loop */
02172 #ifdef UNIGRAM_FACTORING
02173   /***********************************************************/
02174   /* 2.4 単語終端からfactoring付き単語先頭への遷移 ***********/
02175   /*    transition from wordend to shared (factorized) nodes */
02176   /***********************************************************/
02177   /* wordend_best_* holds the best word ends at this frame. */
02178   if (wordend_best_score > LOG_ZERO) {
02179     node = wordend_best_node;
02180     sword = wchmm->stend[node];
02181     for (stid = wchmm->startnum - 1; stid >= 0; stid--) {
02182       next_node = wchmm->startnode[stid];
02183       /* compute transition from 'node' at end of word 'sword' to 'next_node' */
02184       /* skip isolated words already calculated in the above main loop */
02185       if (wchmm->start2isolate[stid] != -1) continue;
02186       /* rest should have 1-gram factoring score at wchmm->fscore */
02187       if (wchmm->state[next_node].scid >= 0) {
02188         j_error("InternalError: scid mismatch at 1-gram factoring of shared states\n");
02189       }
02190       tmpprob = wchmm->fscore[- wchmm->state[next_node].scid];
02191       ngram_score_cache = tmpprob * lm_weight + lm_penalty;
02192       tmpsum = wordend_best_score;
02193       tmpsum += ngram_score_cache;
02194       if (wchmm->winfo->is_transparent[sword] && wchmm->winfo->is_transparent[wordend_best_last_cword]) {
02195         tmpsum += lm_penalty_trans;
02196       }
02197       
02198 #ifndef MULTIPATH_VERSION
02199       next_node2 = next_node;
02200 #else
02201       for(ac=wchmm->state[next_node].ac; ac; ac=ac->next) {
02202         next_node2 = ac->arc;
02203 #endif
02204         if ((tknextid = node_exist_token(tn, next_node2, sword)) != TOKENID_UNDEFINED) {
02205           tknext = &(tlist[tn][tknextid]);
02206           if (tknext->score < tmpsum) {
02207             tknext->last_lscore = ngram_score_cache;
02208             if (wchmm->winfo->is_transparent[sword]) {
02209               tknext->last_cword = wordend_best_last_cword;
02210             } else {
02211               tknext->last_cword = sword;
02212             }
02213             tknext->score = tmpsum;
02214 #ifdef MULTIPATH_VERSION
02215             tknext->score += ac->a;
02216 #endif
02217             tknext->last_tre = wordend_best_tre;
02218           }
02219         } else {
02220           if (tmpsum > LOG_ZERO) { /* valid token */
02221             tknextid = create_token();
02222             tknext = &(tlist[tn][tknextid]);
02223             tknext->last_lscore = ngram_score_cache;
02224             if (wchmm->winfo->is_transparent[sword]) {
02225               tknext->last_cword = wordend_best_last_cword;
02226             } else {
02227               tknext->last_cword = sword;
02228             }
02229             tknext->score = tmpsum;
02230 #ifdef MULTIPATH_VERSION
02231             tknext->score += ac->a;
02232 #endif
02233             tknext->last_tre = wordend_best_tre;
02234             node_assign_token(next_node2, tknextid);
02235           }
02236         }
02237 #ifdef MULTIPATH_VERSION
02238       }
02239 #endif
02240       
02241     }
02242   }
02243 #endif /* UNIGRAM_FACTORING */
02244 
02245   /***************************************/
02246   /* 3. 状態の出力確率計算               */
02247   /*    compute state output probability */
02248   /***************************************/
02249 
02250   /* 次段の有効ノードについて出力確率を計算してスコアに加える */
02251   /* compute outprob for new valid (token assigned) nodes and add to score */
02252 #ifdef MULTIPATH_VERSION
02253   /* 今扱っているのが入力の最終フレームの場合出力確率は計算しない */
02254   /* don't calculate the last frame (transition only) */
02255   if (! final) {
02256 #endif
02257     for (j=0;j<tnum[tn];j++) {
02258       tk = &(tlist[tn][tindex[tn][j]]);
02259 #ifdef MULTIPATH_VERSION
02260       /* skip non-output state */
02261       if (wchmm->state[tk->node].out.state == NULL) continue;
02262 #endif
02263       tk->score += outprob_style(wchmm, tk->node, tk->last_tre->wid, t, param);
02264     }
02265 #ifdef MULTIPATH_VERSION
02266   }
02267 #endif
02268 
02269   if (gmm != NULL) {
02270     /* GMM 計算を行う */
02271     gmm_proceed(gmm, param, t);
02272   }
02273 
02274   /*******************************************************/
02275   /* 4. スコアでトークンをソートしビーム幅分の上位を決定 */
02276   /*    sort tokens by score up to beam width            */
02277   /*******************************************************/
02278 
02279   /* tlist[tl]を次段のためにリセット */
02280   clear_tlist(tl);
02281 
02282   /* ヒープソートを用いてこの段のノード集合から上位(bwidth)個を得ておく */
02283   /* (上位内の順列は必要ない) */
02284   sort_token_no_order(trellis_beam_width, &n_start, &n_end);
02285 
02286   /* check for sort result */
02287 /* 
02288  *     j_printf("%d: vwidth=%d / %d\n",t, vwidth[tn], wchmm->n);
02289  */
02290   /* make sure LOG_ZERO not exist width vwidth[tn] */
02291 /* 
02292  *     for (j=0;j<vwidth[tn];j++) {
02293  *       if (nodescore[tn][hsindex[tn][j+1]] <= LOG_ZERO) {
02294  *         j_error("dsadsadas %d %d\n",hsindex[tn][j+1], nodescore[tn][hsindex[tn][j+1]]);
02295  *       }
02296  *     }
02297  */
02298   
02299   /***************/
02300   /* 5. 終了処理 */
02301   /*    finalize */
02302   /***************/
02303   if (progout_flag) {
02304     /* 漸次出力: 現フレームのベストパスを一定時間おきに上書き出力 */
02305     /* progressive result output: output current best path in certain time interval */
02306     if ((t % progout_interval_frame) == 0) {
02307       bt_current_max(backtrellis, t-1, wchmm->winfo);
02308     }
02309   }
02310   /* j_printf("%d: %d\n",t,tnum[tn]); */
02311   /* for debug: output current max word */
02312   if (debug2_flag) {
02313     bt_current_max_word(backtrellis, t-1, wchmm->winfo);
02314   }
02315 
02316 #ifdef SP_BREAK_CURRENT_FRAME
02317   /* ショートポーズセグメンテーション: 直前フレームでセグメントが切れたかどうかチェック */
02318   if (detect_end_of_segment(backtrellis, t-1, wchmm)) {
02319     /* セグメント終了検知: 第1パスここで中断 */
02320     return FALSE;               /* segment: [sparea_start..t-2] */
02321   }
02322 #endif
02323 
02324   /* ビーム内ノード数が 0 になってしまったら,強制終了 */
02325   if (tnum[tn] == 0) {
02326     j_printerr("Error: %dth frame: no nodes left in beam! model mismatch or wrong input?\n", t);
02327     return(FALSE);
02328   }
02329 
02330   return(TRUE);
02331     
02332 }
02333 
02334 /*************************************************/
02335 /* frame synchronous beam search --- last frame  */
02336 /* フレーム同期ビーム探索の実行 --- 最終フレーム */
02337 /*************************************************/
02338 
02362 void
02363 get_back_trellis_end(HTK_Param *param, WCHMM_INFO *wchmm, BACKTRELLIS *backtrellis)
02364 {
02365   int t, node, j;
02366   TOKEN2 *tk;
02367 
02368   /* 最後にビーム内に残った単語終端トークンを処理する */
02369   /* process the last wordend tokens */
02370 
02371 #ifdef MULTIPATH_VERSION
02372   
02373   /* 単語末ノードへの遷移のみ計算 */
02374   /* only arcs to word-end node is calculated */
02375   get_back_trellis_proceed(param->samplenum, param, wchmm, backtrellis, TRUE);
02376   
02377 #else  /* ~MULTIPATH_VERSION */
02378   
02379   t = param->samplenum;
02380   tl = tn;
02381   if (tn == 0) tn = 1; else tn = 0;
02382   for (j=n_start; j<=n_end; j++) {
02383     tk = &(tlist[tl][tindex[tl][j]]);
02384     node = tk->node;
02385     if (wchmm->stend[node] != WORD_INVALID) {
02386       tre = (TRELLIS_ATOM *)mybmalloc2(sizeof(TRELLIS_ATOM), &(backtrellis->root));
02387       tre->wid = wchmm->stend[node];
02388       tre->backscore = tk->score;
02389       tre->begintime = tk->last_tre->endtime + 1;
02390       tre->endtime   = t-1;
02391       tre->last_tre  = tk->last_tre;
02392 #ifdef USE_NGRAM
02393       tre->lscore    = tk->last_lscore;
02394 #endif
02395       bt_store(backtrellis, tre);
02396 #ifdef WORD_GRAPH
02397       if (tre->last_tre != NULL) {
02398         /* mark to indicate that the following words was survived in beam */
02399         tre->last_tre->within_context = TRUE;
02400       }
02401       /* last node */
02402       if (tre->wid == winfo->tail_silwid) {
02403         tre->within_context = TRUE;
02404       }
02405 #endif
02406     }
02407   }
02408 
02409 #endif /* MULTIPATH_VERSION */
02410 
02411 #ifdef SP_BREAK_CURRENT_FRAME
02412   /*if (detect_end_of_segment(backtrellis, param->samplenum-1, winfo)) {
02413     return;
02414     }*/
02415 #endif
02416 }
02417 
02418 /*************************/
02419 /* 探索終了 --- 終了処理 */
02420 /* end of search         */
02421 /*************************/
02454 LOGPROB
02455 finalize_1st_pass(BACKTRELLIS *backtrellis, WORD_INFO *winfo, int len)
02456 {
02457   LOGPROB lastscore;
02458   int mseclen;
02459   boolean ok_p;
02460  
02461   backtrellis->framelen = len;
02462 
02463   /* rejectshort 指定時, 入力が短ければここで第1パス結果を出力しない */
02464   /* ここでは reject はまだ行わず, 後で reject する */
02465   /* suppress 1st pass output if -rejectshort and input shorter than specified */
02466   /* the actual rejection will be done later at main */
02467   ok_p = TRUE;
02468   if (rejectshortlen > 0) {
02469     mseclen = (float)len * (float)para.smp_period * (float)para.frameshift / 10000.0;
02470     if (mseclen < rejectshortlen) {
02471       ok_p = FALSE;
02472     }
02473   }
02474   
02475   /* 単語トレリス(backtrellis) を整理: トレリス単語の再配置とソート */
02476   /* re-arrange backtrellis: index them by frame, and sort by word ID */
02477   if (ok_p) {
02478     bt_relocate_rw(backtrellis);
02479     bt_sort_rw(backtrellis);
02480     if (backtrellis->num == NULL) {
02481       if (backtrellis->framelen > 0) j_printerr("no survived word!\n");
02482       ok_p = FALSE;
02483     }
02484   }
02485 
02486   /* 結果を表示する (best 仮説)*/
02487   /* output 1st pass result (best hypothesis) */
02488   if (verbose_flag && (!progout_flag)) j_printerr("\n");
02489   if (ok_p) {
02490     lastscore = print_1pass_result(backtrellis, len, winfo);
02491   } else {
02492     lastscore = LOG_ZERO;
02493   }
02494 
02495 #ifdef USE_NGRAM
02496   /* free succesor cache */
02497   /* 次の認識でwchmm->winfoともに無変更の場合 free する必要なし */
02498   /* no need to free if wchmm and wchmm are not changed in the next recognition */
02499   /* max_successor_cache_free(); */
02500 #endif
02501   /* free nodes */
02502   free_nodes();
02503   
02504   if (ok_p) {
02505     if (gmm != NULL) {
02506       /* GMM 計算の終了 */
02507       gmm_end(gmm);
02508     }
02509   }
02510 
02511   /* return the best score */
02512   return(lastscore);
02513 }
02514 
02515 #ifdef SP_BREAK_CURRENT_FRAME
02516 /*******************************************************************/
02517 /* 第1パスセグメント終了処理 (ショートポーズセグメンテーション用) */
02518 /* end of 1st pass for a segment (for short pause segmentation)    */
02519 /*******************************************************************/
02549 void
02550 finalize_segment(BACKTRELLIS *backtrellis, HTK_Param *param, int len)
02551 {
02552   int t;
02553 
02554   /* トレリス始終端における最尤単語を第2パスの始終端単語として格納 */
02555   /* fix initial/last word hypothesis of the next 2nd pass to the best
02556      word hypothesis at the first/last frame in backtrellis*/
02557   set_terminal_words(backtrellis);
02558 
02559   /* パラメータを, 今第1パスが終了したセグメント区間と残りの区間に分割する.
02560      ただし接続部のsp区間部分(sparea_start..len-1)は「のりしろ」として両方に
02561      コピーする */
02562   /* Divide input parameter into two: the last segment and the rest.
02563      The short-pause area (sparea_start..len-1) is considered as "tab",
02564      copied in both parameters
02565    */
02566   /* param[sparea_start..framelen] -> rest_param
02567      param[0..len-1] -> param
02568      [sparea_start...len-1] overlapped
02569   */
02570 
02571   if (len != param->samplenum) {
02572 
02573     VERMES("segmented: processed length=%d\n", len);
02574     VERMES("segmented: next decoding will restart from %d\n", sparea_start);
02575     
02576     /* copy rest parameters for next process */
02577     rest_param = new_param();
02578     memcpy(&(rest_param->header), &(param->header), sizeof(HTK_Param_Header));
02579     rest_param->samplenum = param->samplenum - sparea_start;
02580     rest_param->header.samplenum = rest_param->samplenum;
02581     rest_param->veclen = param->veclen;
02582     rest_param->parvec = (VECT **)mymalloc(sizeof(VECT *) * rest_param->samplenum);
02583     /* copy 1: overlap area (copy) */
02584     for(t=sparea_start;t<len;t++) {
02585       rest_param->parvec[t-sparea_start] = (VECT *)mymalloc(sizeof(VECT) * rest_param->veclen);
02586       memcpy(rest_param->parvec[t-sparea_start], param->parvec[t], sizeof(VECT) * rest_param->veclen);
02587     }
02588     /* copy 2: rest area (move) */
02589     for(t=len;t<param->samplenum;t++) {
02590       rest_param->parvec[t-sparea_start] = param->parvec[t];
02591     }
02592 
02593     /* shrink original param to [0..len-1] */
02594     /* just shrink the parvec */
02595     param->samplenum = len;
02596     param->parvec = (VECT **)myrealloc(param->parvec, sizeof(VECT *) * param->samplenum);
02597     sp_break_last_nword_allow_override = TRUE;
02598     
02599   } else {
02600     
02601     /* last segment is on end of input: no rest parameter */
02602     rest_param = NULL;
02603     /* reset last_word info */
02604     sp_break_last_word = WORD_INVALID;
02605     sp_break_last_nword = WORD_INVALID;
02606     sp_break_last_nword_allow_override = FALSE;
02607   }
02608 }
02609 #endif /* SP_BREAK_CURRENT_FRAME */
02610   
02611 /********************************************************************/
02612 /* 第1パスを実行するメイン関数                                     */
02613 /* 入力をパイプライン処理する場合は realtime_1stpass.c を参照のこと */
02614 /* main function to execute 1st pass                                */
02615 /* the pipeline processing is not here: see realtime_1stpass.c      */
02616 /********************************************************************/
02617 
02655 void
02656 get_back_trellis(HTK_Param *param, WCHMM_INFO *wchmm, BACKTRELLIS *backtrellis,
02657 LOGPROB *backmax)
02658 {
02659   int t;
02660 
02661   /* 初期化及び t=0 を計算 */
02662   /* initialize and compute frame = 0 */
02663   get_back_trellis_init(param, wchmm, backtrellis);
02664 
02665   /* メインループ */
02666   /* main loop */
02667   for (
02668 #ifdef MULTIPATH_VERSION
02669        /* t = 0 should be computed here */
02670        t = 0
02671 #else
02672        /* t = 0 is already computed in get_back_trellis_init() */
02673        t = 1
02674 #endif
02675          ; t < param->samplenum; t++) {
02676     if (get_back_trellis_proceed(t, param, wchmm, backtrellis
02677 #ifdef MULTIPATH_VERSION
02678                                  ,FALSE
02679 #endif
02680                                  ) == FALSE
02681         || (module_mode && module_wants_terminate())) {
02682       /* 探索中断: 処理された入力は 0 から t-2 まで */
02683       /* search terminated: processed input = [0..t-2] */
02684       /* この時点で第1パスを終了する */
02685       /* end the 1st pass at this point */
02686       *backmax = finalize_1st_pass(backtrellis, wchmm->winfo, t-1);
02687 #ifdef SP_BREAK_CURRENT_FRAME
02688       /* ショートポーズセグメンテーションの場合,
02689          入力パラメータ分割などの最終処理も行なう */
02690       /* When short-pause segmentation enabled */
02691       finalize_segment(backtrellis, param, t-1);
02692 #endif
02693       /* terminate 1st pass here */
02694       return;
02695     }
02696   }
02697   /* 最終フレームを計算 */
02698   /* compute the last frame of input */
02699   get_back_trellis_end(param, wchmm, backtrellis);
02700   /* 第1パス終了処理 */
02701   /* end of 1st pass */
02702   *backmax = finalize_1st_pass(backtrellis, wchmm->winfo, param->samplenum);
02703 #ifdef SP_BREAK_CURRENT_FRAME
02704   /* ショートポーズセグメンテーションの場合,
02705      入力パラメータ分割などの最終処理も行なう */
02706   /* When short-pause segmentation enabled */
02707   finalize_segment(backtrellis, param, param->samplenum);
02708 #endif
02709 }
02710 
02711 /* end of file */

Generated on Tue Dec 26 12:53:21 2006 for Julian by  doxygen 1.5.0