libjulius/src/beam.c

Go to the documentation of this file.
00001 
00048 /*
00049  * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University
00050  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00051  * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
00052  * All rights reserved
00053  */
00054 
00055 #include <julius/julius.h>
00056 
00057 #undef DEBUG
00058 
00059 
00060 /* ---------------------------------------------------------- */
00061 /*                     第1パスの結果処理                     */
00062 /*              end procedure to get result of 1st pass       */
00063 /* ---------------------------------------------------------- */
00064 
00065 #ifdef WORD_GRAPH
00066 
00096 static void
00097 generate_lattice(int frame, RecogProcess *r)
00098 {
00099   BACKTRELLIS *bt;
00100   WORD_INFO *winfo;
00101   TRELLIS_ATOM *ta;
00102   int i, j;
00103   LOGPROB l;
00104   WordGraph *new;
00105 
00106   bt = r->backtrellis;
00107   winfo = r->lm->winfo;
00108 
00109   if (frame >= 0) {
00110     for (i=0;i<bt->num[frame];i++) {
00111       ta = bt->rw[frame][i];
00112       /* words will be saved as a part of graph only if any of its
00113          following word has been survived in a beam */
00114       if (! ta->within_context) continue; /* not a candidate */
00115       if (ta->within_wordgraph) continue; /* already marked */
00116       /* mark  */
00117       ta->within_wordgraph = TRUE;
00118 
00119       new = (WordGraph *)mymalloc(sizeof(WordGraph));
00120       new->wid = ta->wid;
00121       new->lefttime = ta->begintime;
00122       new->righttime = ta->endtime;
00123       new->fscore_head = ta->backscore;
00124       new->fscore_tail = 0.0;
00125       new->gscore_head = 0.0;
00126       new->gscore_tail = 0.0;
00127       new->lscore_tmp = ta->lscore;
00128 #ifdef CM_SEARCH
00129       new->cmscore = 0.0;
00130 #endif
00131       new->forward_score = new->backward_score = 0.0;
00132       new->headphone = winfo->wseq[ta->wid][0];
00133       new->tailphone = winfo->wseq[ta->wid][winfo->wlen[ta->wid]-1];
00134 
00135       new->leftwordmaxnum = FANOUTSTEP;
00136       new->leftword = (WordGraph **)mymalloc(sizeof(WordGraph *) * new->leftwordmaxnum);
00137       new->left_lscore = (LOGPROB *)mymalloc(sizeof(LOGPROB) * new->leftwordmaxnum);
00138       new->leftwordnum = 0;
00139       new->rightwordmaxnum = FANOUTSTEP;
00140       new->rightword = (WordGraph **)mymalloc(sizeof(WordGraph *) * new->rightwordmaxnum);
00141       new->right_lscore = (LOGPROB *)mymalloc(sizeof(LOGPROB) * new->rightwordmaxnum);
00142       new->rightwordnum = 0;
00143 
00144       l = ta->backscore;
00145       if (ta->last_tre->wid != WORD_INVALID) {
00146         l -= ta->last_tre->backscore;
00147       }
00148       l -= ta->lscore;
00149       new->amavg = l / (float)(ta->endtime - ta->begintime + 1);
00150 
00151 #ifdef GRAPHOUT_DYNAMIC
00152       new->purged = FALSE;
00153 #endif
00154       new->saved = FALSE;
00155       new->graph_cm = 0.0;
00156       new->mark = FALSE;
00157 
00158       new->next = r->result.wg1;
00159       r->result.wg1 = new;
00160 
00161       /* recursive call */
00162       generate_lattice(ta->last_tre->endtime, r);
00163     }
00164   }
00165 }
00166 
00183 static void
00184 link_lattice_by_time(WordGraph *root)
00185 {
00186   WordGraph *wg;
00187   WordGraph *wtmp;
00188   int lefttime, righttime;
00189   
00190   for(wg=root;wg;wg=wg->next) {
00191     
00192     for(wtmp=root;wtmp;wtmp=wtmp->next) {
00193       if (wg->righttime + 1 == wtmp->lefttime) {
00194         wordgraph_check_and_add_leftword(wtmp, wg, wtmp->lscore_tmp);
00195         wordgraph_check_and_add_rightword(wg, wtmp, wtmp->lscore_tmp);
00196       }
00197       if (wtmp->righttime + 1 == wg->lefttime) {
00198         wordgraph_check_and_add_leftword(wg, wtmp, wg->lscore_tmp);
00199         wordgraph_check_and_add_rightword(wtmp, wg, wg->lscore_tmp);
00200       }
00201     }
00202   }
00203 
00204 }
00205 
00219 static void
00220 re_compute_lattice_lm(WordGraph *root, WCHMM_INFO *wchmm)
00221 {
00222   int i;
00223   
00224   for(wg=root;wg;wg=wg->next) {
00225     for(i=0;i<wg->leftwordnum;i++) {
00226       wg->left_lscore[i] = (*(wchmm->ngram->bigram_prob))(wchmm->ngram, wchmm->winfo->wton[wg->leftword[i]->wid], wchmm->winfo->wton[wg->wid]);
00227     }
00228     for(i=0;i<wg->rightwordnum;i++) {
00229       wg->right_lscore[i] = (*(wchmm->ngram->bigram_prob))(wchmm->ngram, wchmm->winfo->wton[wg->wid], wchmm->winfo->wton[wg->rightword[i]->wid]);
00230     }
00231   }
00232 }
00233 
00234 #endif
00235 
00250 static void
00251 put_atom(TRELLIS_ATOM *atom, WORD_INFO *winfo)
00252 {
00253   int i;
00254   jlog("DEBUG: %3d,%3d %f %16s (id=%5d)", atom->begintime, atom->endtime,
00255        atom->backscore, winfo->wname[atom->wid], atom->wid);
00256   for (i=0;i<winfo->wlen[atom->wid]; i++) {
00257     jlog(" %s",winfo->wseq[atom->wid][i]->name);
00258   }
00259   jlog("\n");
00260 }
00261 
00292 static LOGPROB
00293 trace_backptr(WORD_ID wordseq_rt[MAXSEQNUM], int *rt_wordlen, TRELLIS_ATOM *atom, WORD_INFO *winfo)
00294 {
00295   int wordlen = 0;              /* word length of best sentence hypothesis */
00296   TRELLIS_ATOM *tretmp;
00297   LOGPROB langscore = 0.0;
00298   static WORD_ID wordseq[MAXSEQNUM];    /* temporal: in reverse order */
00299   int i;
00300 
00301   /* initialize */
00302   wordseq[0] = atom->wid;       /* start from specified atom */
00303   wordlen = 1;
00304   tretmp = atom;
00305   langscore += tretmp->lscore;
00306   if (debug2_flag) {
00307     put_atom(tretmp, winfo);
00308   }
00309   
00310   /* trace the backtrellis */
00311   while (tretmp->begintime > 0) {/* until beginning of input */
00312     tretmp = tretmp->last_tre;
00313 /*    t = tretmp->boundtime - 1;
00314     tretmp = bt_binsearch_atom(backtrellis, tretmp->boundtime-1, tretmp->last_wid);*/
00315     if (tretmp == NULL) {       /* should not happen */
00316       j_internal_error("trace_backptr: last trellis missing while backtracking");
00317     }
00318     langscore += tretmp->lscore;
00319     wordseq[wordlen] = tretmp->wid;
00320     wordlen++;
00321     if (debug2_flag) {
00322       put_atom(tretmp, winfo);
00323     }
00324     if (wordlen >= MAXSEQNUM) {
00325       j_internal_error("trace_backptr: sentence length exceeded ( > %d)\n",MAXSEQNUM);
00326     }
00327   }
00328   *rt_wordlen = wordlen;
00329   /* reverse order -> normal order */
00330   for(i=0;i<wordlen;i++) wordseq_rt[i] = wordseq[wordlen-i-1];
00331   return(langscore);
00332 }
00333 
00370 static void
00371 find_1pass_result(int framelen, RecogProcess *r)
00372 {
00373   BACKTRELLIS *backtrellis;
00374   WORD_INFO *winfo;
00375   WORD_ID wordseq[MAXSEQNUM];
00376   int wordlen;
00377   int i;
00378   TRELLIS_ATOM *best;
00379   int last_time;
00380   LOGPROB total_lscore;
00381   LOGPROB maxscore;
00382   TRELLIS_ATOM *tmp;
00383 #ifdef SPSEGMENT_NAIST
00384   boolean ok_p;
00385 #endif
00386 
00387   backtrellis = r->backtrellis;
00388   winfo = r->lm->winfo;
00389 
00390   /* look for the last trellis word */
00391 
00392   if (r->lmtype == LM_PROB) {
00393 
00394     for (last_time = framelen - 1; last_time >= 0; last_time--) {
00395 
00396       maxscore = LOG_ZERO;
00397       for (i=0;i<backtrellis->num[last_time];i++) {
00398         tmp = backtrellis->rw[last_time][i];
00399 #ifdef WORD_GRAPH
00400         /* treat only words on a graph path */
00401         if (!tmp->within_context) continue;
00402 #endif
00403         if (r->config->successive.enabled) {
00404           /* short-pause segmentation mode */
00405           /* 最終フレームに残った最大スコアの単語 */
00406           /* it should be the best trellis word on the last frame */
00407           if (maxscore < tmp->backscore) {
00408             maxscore = tmp->backscore;
00409             best = tmp;
00410           }
00411         } else {
00412           /* not segmentation mode */
00413           /* 最終単語は winfo->tail_silwid に固定 */
00414           /* it is fixed to the tail silence model (winfo->tail_silwid) */
00415           if (tmp->wid == winfo->tail_silwid && maxscore < tmp->backscore) {
00416             maxscore = tmp->backscore;
00417             best = tmp;
00418             break;
00419           }
00420         }
00421       }
00422       if (maxscore != LOG_ZERO) break;
00423     }
00424 
00425     if (last_time < 0) {                /* not found */
00426       jlog("WARNING: %02d %s: no tail silence word survived on the last frame, search failed\n", r->config->id, r->config->name);
00427       r->result.status = J_RESULT_STATUS_FAIL;
00428       //callback_exec(CALLBACK_RESULT, r);
00429       return;
00430     }
00431   
00432   }
00433 
00434   if (r->lmtype == LM_DFA) {
00435 
00436     for (last_time = framelen - 1; last_time >= 0; last_time--) {
00437 
00438       /* 末尾に残った単語の中で最大スコアの単語(cp_endは使用しない) */
00439       /* the best trellis word on the last frame (not use cp_end[]) */
00440       maxscore = LOG_ZERO;
00441       for (i=0;i<backtrellis->num[last_time];i++) {
00442         tmp = backtrellis->rw[last_time][i];
00443 #ifdef WORD_GRAPH
00444         /* treat only words on a graph path */
00445         if (!tmp->within_context) continue;
00446 #endif
00447         /*      if (dfa->cp_end[winfo->wton[tmp->wid]] == TRUE) {*/
00448         if (maxscore < tmp->backscore) {
00449           maxscore = tmp->backscore;
00450           best = tmp;
00451         }
00452         /*      }*/
00453       }
00454       if (maxscore != LOG_ZERO) break;
00455     }
00456 
00457     if (last_time < 0) {                /* not found */
00458       jlog("WARNING: %02d %s: no sentence-end word survived on last beam\n", r->config->id, r->config->name);
00459       r->result.status = J_RESULT_STATUS_FAIL;
00460       //callback_exec(CALLBACK_RESULT, r);
00461       return;
00462     }
00463   
00464   }
00465 
00466   /* traceback word trellis from the best word */
00467   total_lscore = trace_backptr(wordseq, &wordlen, best, r->lm->winfo);
00468 #ifdef SPSEGMENT_NAIST
00469   if (r->config->successive.enabled) {
00470     /* on segmentation mode, recognition result that only consists of
00471        short-pause words will be treated as recognition rejection */
00472     ok_p = FALSE;
00473     for(i=0;i<wordlen;i++) {
00474       if (! is_sil(wordseq[i], r)) ok_p = TRUE;
00475     }
00476     if (ok_p == FALSE) {
00477       r->result.status = J_RESULT_STATUS_ONLY_SILENCE;
00478       return;
00479     }
00480   }
00481 #endif
00482 
00483   /* just flush last progress output */
00484   /*
00485   if (recog->jconf->output.progout_flag) {
00486     recog->result.status = 1;
00487     recog->result.num_frame = last_time;
00488     recog->result.pass1.word = wordseq;
00489     recog->result.pass1.word_num = wordlen;
00490     recog->result.pass1.score = best->backscore;
00491     recog->result.pass1.score_lm = total_lscore;
00492     recog->result.pass1.score_am = best->backscore - total_lscore;
00493     //callback_exec(CALLBACK_RESULT_PASS1_INTERIM, recog);
00494     }*/
00495 
00496   /* output 1st pass result */    
00497   if (verbose_flag || ! r->config->output.progout_flag) {
00498     r->result.status = J_RESULT_STATUS_SUCCESS;
00499     r->result.num_frame = framelen;
00500     for(i=0;i<wordlen;i++) r->result.pass1.word[i] = wordseq[i];
00501     r->result.pass1.word_num = wordlen;
00502     r->result.pass1.score = best->backscore;
00503     r->result.pass1.score_lm = total_lscore;
00504     r->result.pass1.score_am = best->backscore - total_lscore;
00505     //callback_exec(CALLBACK_RESULT_PASS1, r);
00506   }
00507 
00508   /* store the result to global val (notice: in reverse order) */
00509   for(i=0;i<wordlen;i++) r->pass1_wseq[i] = wordseq[i];
00510   r->pass1_wnum = wordlen;
00511   r->pass1_score = best->backscore;
00512 
00513 #ifdef WORD_GRAPH
00514   /* 単語トレリスから,ラティスを生成する */
00515   /* generate word graph from the word trellis */
00516   r->peseqlen = backtrellis->framelen;
00517   r->result.wg1 = NULL;
00518   generate_lattice(last_time, r);
00519   link_lattice_by_time(r->result.wg1);
00520   if (r->lmtype == LM_PROB) re_compute_lattice_lm(r->result.wg1, r->wchmm);
00521   r->result.wg1_num = wordgraph_sort_and_annotate_id(&(r->result.wg1), r);
00522   /* compute graph CM by forward-backward processing */
00523   graph_forward_backward(r->result.wg1, r);
00524   //callback_exec(CALLBACK_RESULT_PASS1_GRAPH, r);
00525   //wordgraph_clean(&(r->result.wg1));
00526 #endif
00527 
00528 }
00529 
00548 static int
00549 compare_backscore(TRELLIS_ATOM **x1, TRELLIS_ATOM **x2)
00550 {
00551   return((*x1)->backscore < (*x2)->backscore);
00552 }
00553 
00573 static void
00574 find_1pass_result_word(int framelen, RecogProcess *r)
00575 {
00576   BACKTRELLIS *bt;
00577   TRELLIS_ATOM *best, *tmp;
00578   int last_time;
00579   Sentence *s;
00580 #ifdef CONFIDENCE_MEASURE
00581   LOGPROB sum;
00582 #endif
00583   LOGPROB maxscore;
00584   int i;
00585   TRELLIS_ATOM **idx;
00586 
00587   if (r->lmvar != LM_DFA_WORD) return;
00588 
00589   bt = r->backtrellis;
00590   for (last_time = framelen - 1; last_time >= 0; last_time--) {
00591     maxscore = LOG_ZERO;
00592     for (i=0;i<bt->num[last_time];i++) {
00593       tmp = bt->rw[last_time][i];
00594 #ifdef WORD_GRAPH
00595       /* treat only words on a graph path */
00596       if (!tmp->within_context) continue;
00597 #endif
00598       if (maxscore < tmp->backscore) {
00599         maxscore = tmp->backscore;
00600         best = tmp;
00601       }
00602     }
00603     if (maxscore != LOG_ZERO) break;
00604   }
00605 
00606   if (last_time < 0) {          /* not found */
00607     jlog("WARNING: %02d %s: no word survived on the last frame, search failed\n", r->config->id, r->config->name);
00608     r->result.status = J_RESULT_STATUS_FAIL;
00609     //callback_exec(CALLBACK_RESULT, r);
00610     return;
00611   }
00612 
00613 #ifdef CONFIDENCE_MEASURE
00614   sum = 0.0;
00615   for (i=0;i<bt->num[last_time];i++) {
00616     tmp = bt->rw[last_time][i];
00617 #ifdef WORD_GRAPH
00618     /* treat only words on a graph path */
00619     if (!tmp->within_context) continue;
00620 #endif
00621     sum += pow(10, r->config->annotate.cm_alpha * (tmp->backscore - maxscore));
00622   }
00623 #endif
00624 
00625   /* set recognition result status to normal */
00626   r->result.status = J_RESULT_STATUS_SUCCESS;
00627 
00628   if (r->config->output.output_hypo_maxnum > 1) {
00629     /* more than one candidate is requested */
00630 
00631     /* get actual number of candidates to output */
00632     r->result.sentnum = r->config->output.output_hypo_maxnum;
00633     if (r->result.sentnum > bt->num[last_time]) {
00634       r->result.sentnum = bt->num[last_time];
00635     }
00636 
00637     /* prepare result storage */
00638     r->result.sent = (Sentence *)mymalloc(sizeof(Sentence)* r->result.sentnum);
00639 
00640     /* sort by score */
00641     idx = (TRELLIS_ATOM **)mymalloc(sizeof(TRELLIS_ATOM *)*bt->num[last_time]);
00642     for (i=0;i<bt->num[last_time];i++) {
00643       idx[i] = bt->rw[last_time][i];
00644     }
00645     qsort(idx, bt->num[last_time], sizeof(TRELLIS_ATOM *),
00646           (int (*)(const void *,const void *))compare_backscore);
00647     
00648     /* store to result storage */
00649     for(i=0;i<r->result.sentnum;i++) {
00650       s = &(r->result.sent[i]);
00651       tmp = idx[i];
00652       s->word_num = 1;
00653       s->word[0] = tmp->wid;
00654 #ifdef CONFIDENCE_MEASURE
00655       s->confidence[0] = pow(10, r->config->annotate.cm_alpha * (tmp->backscore - maxscore)) / sum;
00656 #endif
00657       s->score = tmp->backscore;
00658       s->score_lm = 0.0;
00659       s->score_am = tmp->backscore;
00660       s->gram_id = 0;
00661       s->align.filled = FALSE;
00662     }
00663     /* free work area for sort */
00664     free(idx);
00665 
00666   } else {                      /* only max is needed */
00667 
00668     /* prepare result storage */
00669     r->result.sent = (Sentence *)mymalloc(sizeof(Sentence));
00670     r->result.sentnum = 1;
00671     s = &(r->result.sent[0]);
00672     s->word_num = 1;
00673     s->word[0] = best->wid;
00674 #ifdef CONFIDENCE_MEASURE
00675     s->confidence[0] = 1.0 / sum;
00676 #endif
00677     s->score = best->backscore;
00678     s->score_lm = 0.0;
00679     s->score_am = best->backscore;
00680     s->gram_id = 0;
00681     s->align.filled = FALSE;
00682   }
00683 
00684   /* copy as 1st pass result */
00685   memcpy(&(r->result.pass1), &(r->result.sent[0]), sizeof(Sentence));
00686 
00687   //callback_exec(CALLBACK_RESULT, r);
00688   //free(r->result.sent);
00689 }
00690 
00691 
00692 #ifdef DETERMINE
00693 
00721 static TRELLIS_ATOM *
00722 determine_word(RecogProcess *r, int t, TRELLIS_ATOM *tremax, LOGPROB thres, int countthres)
00723 {
00724   TRELLIS_ATOM *ret;
00725   WORD_ID w;
00726 
00727   //LOGPROB sum;
00728   //LOGPROB cm;
00729 
00730   int j;
00731   FSBeam *d;
00732   TOKEN2 *tk;
00733     
00734   if (tremax == NULL) {
00735     /* initialize */
00736     r->determine_count = 0;
00737     r->determine_maxnodescore = LOG_ZERO;
00738     r->determined = FALSE;
00739     r->determine_last_wid = WORD_INVALID;
00740     r->have_determine = FALSE;
00741     return NULL;
00742   }
00743 
00744   ret = NULL;
00745 
00746   /* get confidence score of the maximum word hypothesis */
00747 /* 
00748  *   sum = 0.0;
00749  *   tre = recog->backtrellis->list;
00750  *   while (tre != NULL && tre->endtime == t) {
00751  *     sum += pow(10, recog->jconf->annotate.cm_alpha * (tre->backscore - tremax->backscore));
00752  *     tre = tre->next;
00753  *   }
00754  *   cm = 1.0 / sum;
00755  */
00756 
00757   /* determinization decision */
00758   w = tremax->wid;
00759 
00760   r->have_determine = FALSE;
00761 
00762   /* determine by score threshold from maximum node score to maximum word end node score */
00763   if (r->determine_last_wid == w && r->determine_maxnodescore - tremax->backscore <= thres) {
00764     r->determine_count++;
00765     if (r->determine_count > countthres) {
00766       if (r->determined == FALSE) {
00767         ret = tremax;
00768         r->determined = TRUE;
00769         r->have_determine = TRUE;
00770       }
00771     }
00772   } else {
00773     r->determine_count = 0;
00774   }
00775 
00776   //printf("determine: %d: %s: cm=%f, relscore=%f, count=%d, phase=%d\n", t, recog->model->winfo->woutput[w], cm, determine_maxnodescore - tremax->backscore, count, phase);
00777   r->determine_last_wid = w;
00778 
00779   /* update maximum node score here for next call, since
00780      the word path determination is always one frame later */
00781   d = &(r->pass1);
00782   r->determine_maxnodescore = LOG_ZERO;
00783   for (j = d->n_start; j <= d->n_end; j++) {
00784     tk = &(d->tlist[d->tn][d->tindex[d->tn][j]]);
00785     if (r->determine_maxnodescore < tk->score) r->determine_maxnodescore = tk->score;
00786   }
00787 
00788   return(ret);
00789 }
00790 
00810 static void
00811 check_determine_word(RecogProcess *r, int t)
00812 {
00813   TRELLIS_ATOM *tre;
00814   TRELLIS_ATOM *tremax;
00815   LOGPROB maxscore;
00816 
00817   /* bt->list is ordered by time frame */
00818   maxscore = LOG_ZERO;
00819   tremax = NULL;
00820   tre = r->backtrellis->list;
00821   while (tre != NULL && tre->endtime == t) {
00822     if (maxscore < tre->backscore) {
00823       maxscore = tre->backscore;
00824       tremax = tre;
00825     }
00826     tre = tre->next;
00827   }
00828 
00829   r->result.status = J_RESULT_STATUS_SUCCESS;
00830   r->result.num_frame = t;
00831 
00832   if (maxscore != LOG_ZERO) {
00833     //    if ((tre = determine_word(recog, t, tremax, 0.9, 17)) != NULL) {
00834     if ((tre = determine_word(r, t, tremax, r->config->pass1.determine_score_thres, r->config->pass1.determine_duration_thres)) != NULL) {
00835       r->result.pass1.word[0] = tremax->wid;
00836       r->result.pass1.word_num = 1;
00837       r->result.pass1.score = tremax->backscore;
00838       r->result.pass1.score_lm = 0.0;
00839       r->result.pass1.score_am = tremax->backscore;
00840       r->result.num_frame = t;
00841       //callback_exec(CALLBACK_RESULT_PASS1_DETERMINED, r);
00842     }
00843   }
00844 
00845   
00846 }
00847 
00848 #endif /* DETERMINE */
00849 
00865 static void
00866 bt_current_max(RecogProcess *r, int t)
00867 {
00868   int wordlen;
00869   TRELLIS_ATOM *tre;
00870   TRELLIS_ATOM *tremax;
00871   LOGPROB maxscore;
00872   LOGPROB lscore;
00873 
00874   /* bt->list is ordered by time frame */
00875   maxscore = LOG_ZERO;
00876   tremax = NULL;
00877   tre = r->backtrellis->list;
00878   while (tre != NULL && tre->endtime == t) {
00879     if (maxscore < tre->backscore) {
00880       maxscore = tre->backscore;
00881       tremax = tre;
00882     }
00883     tre = tre->next;
00884   }
00885 
00886   r->result.status = J_RESULT_STATUS_SUCCESS;
00887   r->result.num_frame = t;
00888 
00889   if (maxscore == LOG_ZERO) {
00890     r->result.pass1.word_num = 0;
00891   } else {
00892     if (r->lmvar == LM_DFA_WORD) {
00893       r->result.pass1.word[0] = tremax->wid;
00894       r->result.pass1.word_num = 1;
00895       r->result.pass1.score = tremax->backscore;
00896       r->result.pass1.score_lm = 0.0;
00897       r->result.pass1.score_am = tremax->backscore;
00898     } else {
00899       lscore = trace_backptr(r->result.pass1.word, &wordlen, tremax, r->lm->winfo);
00900       r->result.pass1.word_num = wordlen;
00901       r->result.pass1.score = tremax->backscore;
00902       r->result.pass1.score_lm = lscore;
00903       r->result.pass1.score_am = tremax->backscore;
00904     }
00905   }
00906   //callback_exec(CALLBACK_RESULT_PASS1_INTERIM, r);
00907 }
00908 
00924 static void
00925 bt_current_max_word(RecogProcess *r, int t)
00926 {
00927 
00928   TRELLIS_ATOM *tre;
00929   TRELLIS_ATOM *tremax;
00930   LOGPROB maxscore;
00931   WORD_ID w;
00932 
00933   /* bt->list は時間順に格納されている */
00934   /* bt->list is order by time */
00935   maxscore = LOG_ZERO;
00936   tremax = NULL;
00937   tre = r->backtrellis->list;
00938   while (tre != NULL && tre->endtime == t) {
00939     if (maxscore < tre->backscore) {
00940       maxscore = tre->backscore;
00941       tremax = tre;
00942     }
00943     tre = tre->next;
00944   }
00945 
00946   if (maxscore != LOG_ZERO) {
00947     jlog("DEBUG: %3d: ",t);
00948     w = tremax->wid;
00949     jlog("\"%s [%s]\"(id=%d)",
00950          r->lm->winfo->wname[w], r->lm->winfo->woutput[w], w);
00951     jlog(" [%d-%d] %f", tremax->begintime, t, tremax->backscore);
00952     w = tremax->last_tre->wid;
00953     if (w != WORD_INVALID) {
00954       jlog(" <- \"%s [%s]\"(id=%d)\n",
00955                r->lm->winfo->wname[w], r->lm->winfo->woutput[w], w);
00956     } else {
00957       jlog(" <- bgn\n");
00958     }
00959   }
00960 }
00961 
00962 
00963 /* -------------------------------------------------------------------- */
00964 /*                 ビーム探索中のトークンを扱うサブ関数                 */
00965 /*                functions to handle hypothesis tokens                 */
00966 /* -------------------------------------------------------------------- */
00967 
00986 static void
00987 malloc_nodes(FSBeam *d, int n, int ntoken_init)
00988 {
00989   d->totalnodenum = n;
00990   d->token     = (TOKENID *)mymalloc(sizeof(TOKENID) * d->totalnodenum);
00991   //d->maxtnum = ntoken_init;
00992   if (d->maxtnum < ntoken_init) d->maxtnum = ntoken_init;
00993   d->tlist[0]  = (TOKEN2 *)mymalloc(sizeof(TOKEN2) * d->maxtnum);
00994   d->tlist[1]  = (TOKEN2 *)mymalloc(sizeof(TOKEN2) * d->maxtnum);
00995   d->tindex[0] = (TOKENID *)mymalloc(sizeof(TOKENID) * d->maxtnum);
00996   d->tindex[1] = (TOKENID *)mymalloc(sizeof(TOKENID) * d->maxtnum);
00997   //d->expand_step = ntoken_step;
00998   d->nodes_malloced = TRUE;
00999   d->expanded = FALSE;
01000 }
01001 
01014 static void
01015 expand_tlist(FSBeam *d)
01016 {
01017   d->maxtnum += d->expand_step;
01018   d->tlist[0]  = (TOKEN2 *)myrealloc(d->tlist[0],sizeof(TOKEN2) * d->maxtnum);
01019   d->tlist[1]  = (TOKEN2 *)myrealloc(d->tlist[1],sizeof(TOKEN2) * d->maxtnum);
01020   d->tindex[0] = (TOKENID *)myrealloc(d->tindex[0],sizeof(TOKENID) * d->maxtnum);
01021   d->tindex[1] = (TOKENID *)myrealloc(d->tindex[1],sizeof(TOKENID) * d->maxtnum);
01022   if (debug2_flag) jlog("STAT: token space expanded to %d\n", d->maxtnum);
01023   d->expanded = TRUE;
01024 }
01025 
01043 static void
01044 prepare_nodes(FSBeam *d, int ntoken_step)
01045 {
01046   d->tnum[0] = d->tnum[1] = 0;
01047   if (d->expand_step < ntoken_step) d->expand_step = ntoken_step;
01048 }
01049 
01064 static void
01065 free_nodes(FSBeam *d)
01066 {
01067   if (d->nodes_malloced) {
01068     free(d->token);
01069     free(d->tlist[0]);
01070     free(d->tlist[1]);
01071     free(d->tindex[0]);
01072     free(d->tindex[1]);
01073     d->nodes_malloced = FALSE;
01074   }
01075 }
01076 
01091 static void
01092 clear_tlist(FSBeam *d, int tt)
01093 {
01094   d->tnum[tt] = 0;
01095 }
01096 
01111 static void
01112 clear_tokens(FSBeam *d, int tt)
01113 {
01114   int j;
01115   /* initialize active token list: only clear ones used in the last call */
01116   for (j=0; j<d->tnum[tt]; j++) {
01117     d->token[d->tlist[tt][j].node] = TOKENID_UNDEFINED;
01118   }
01119 }
01120 
01137 static TOKENID
01138 create_token(FSBeam *d)
01139 {
01140   TOKENID newid;
01141   int tn;
01142   tn = d->tn;
01143   newid = d->tnum[tn];
01144   d->tnum[tn]++;
01145   while (d->tnum[tn]>=d->maxtnum) expand_tlist(d);
01146   d->tindex[tn][newid] = newid;
01147 #ifdef WPAIR
01148   /* initialize link */
01149   d->tlist[tn][newid].next = TOKENID_UNDEFINED;
01150 #endif
01151   return(newid);
01152 }
01153 
01183 static void
01184 node_assign_token(FSBeam *d, int node, TOKENID tkid)
01185 {
01186 #ifdef WPAIR
01187   /* add to link list */
01188   d->tlist[d->tn][tkid].next = d->token[node];
01189 #endif
01190   d->token[node] = tkid;
01191   d->tlist[d->tn][tkid].node = node;
01192 }
01193 
01230 static TOKENID
01231 node_exist_token(FSBeam *d, int tt, int node, WORD_ID wid)
01232 {
01233 #ifdef WPAIR
01234   /* In word-pair mode, multiple tokens are assigned to a node as a list.
01235      so we have to search for tokens with same last word ID */
01236 #ifdef WPAIR_KEEP_NLIMIT
01237   /* 1ノードごとに保持するtoken数の上限を設定 */
01238   /* tokenが無いが上限に達しているときは一番スコアの低いtokenを上書きする */
01239   /* N-best: limit number of assigned tokens to a node */
01240   int i = 0;
01241   TOKENID lowest_token = TOKENID_UNDEFINED;
01242 #endif
01243   TOKENID tmp;
01244   for(tmp=d->token[node]; tmp != TOKENID_UNDEFINED; tmp=d->tlist[tt][tmp].next) {
01245     if (d->tlist[tt][tmp].last_tre->wid == wid) {
01246       return(tmp);
01247     }
01248 #ifdef WPAIR_KEEP_NLIMIT
01249     if (lowest_token == TOKENID_UNDEFINED ||
01250         d->tlist[tt][lowest_token].score < d->tlist[tt][tmp].score)
01251       lowest_token = tmp;
01252     if (++i >= d->wpair_keep_nlimit) break;
01253 #endif
01254   }
01255 #ifdef WPAIR_KEEP_NLIMIT
01256   if (i >= d->wpair_keep_nlimit) { /* overflow, overwrite lowest score */
01257     return(lowest_token);
01258   } else {
01259     return(TOKENID_UNDEFINED);
01260   }
01261 #else 
01262   return(TOKENID_UNDEFINED);
01263 #endif
01264   
01265 #else  /* not WPAIR */
01266   /* 1つだけ保持,これを常に上書き */
01267   /* Only one token is kept in 1-best mode (default), so
01268      simply return the ID */
01269   return(d->token[node]);
01270 #endif
01271 }
01272 
01273 #ifdef DEBUG
01274 /* tlist と token の対応をチェックする(debug) */
01275 /* for debug: check tlist <-> token correspondence
01276    where  tlist[tt][tokenID].node = nodeID and
01277           token[nodeID] = tokenID
01278  */
01279 static void
01280 node_check_token(FSBeam *d, int tt)
01281 {
01282   int i;
01283   for(i=0;i<d->tnum[tt];i++) {
01284     if (node_exist_token(d, tt, d->tlist[tt][i].node, d->tlist[tt][i].last_tre->wid) != i) {
01285       jlog("ERROR: token %d not found on node %d\n", i, d->tlist[tt][i].node);
01286     }
01287   }
01288 }
01289 #endif
01290 
01291 
01292 
01293 /* -------------------------------------------------------------------- */
01294 /*       トークンをソートし 上位 N トークンを判別する (heap sort)       */
01295 /*        Sort generated tokens and get N-best (use heap sort)          */
01296 /* -------------------------------------------------------------------- */
01297 /* ビームの閾値として上位 N 番目のスコアが欲しいだけであり,実際にソート
01298    される必要はない */
01299 /* we only want to know the N-th score for determining beam threshold, so
01300    order is not considered here */
01301 
01302 #define SD(A) tindex_local[A-1] 
01303 #define SCOPY(D,S) D = S        
01304 #define SVAL(A) (tlist_local[tindex_local[A-1]].score) 
01305 #define STVAL (tlist_local[s].score) 
01306 
01307 
01331 static void
01332 sort_token_upward(FSBeam *d, int neednum, int totalnum)
01333 {
01334   int n,root,child,parent;
01335   TOKENID s;
01336   TOKEN2 *tlist_local;
01337   TOKENID *tindex_local;
01338 
01339   tlist_local = d->tlist[d->tn];
01340   tindex_local = d->tindex[d->tn];
01341 
01342   for (root = totalnum/2; root >= 1; root--) {
01343     SCOPY(s, SD(root));
01344     parent = root;
01345     while ((child = parent * 2) <= totalnum) {
01346       if (child < totalnum && SVAL(child) < SVAL(child+1)) {
01347         child++;
01348       }
01349       if (STVAL >= SVAL(child)) {
01350         break;
01351       }
01352       SCOPY(SD(parent), SD(child));
01353       parent = child;
01354     }
01355     SCOPY(SD(parent), s);
01356   }
01357   n = totalnum;
01358   while ( n > totalnum - neednum) {
01359     SCOPY(s, SD(n));
01360     SCOPY(SD(n), SD(1));
01361     n--;
01362     parent = 1;
01363     while ((child = parent * 2) <= n) {
01364       if (child < n && SVAL(child) < SVAL(child+1)) {
01365         child++;
01366       }
01367       if (STVAL >= SVAL(child)) {
01368         break;
01369       }
01370       SCOPY(SD(parent), SD(child));
01371       parent = child;
01372     }
01373     SCOPY(SD(parent), s);
01374   }
01375 }
01376 
01403 static void
01404 sort_token_downward(FSBeam *d, int neednum, int totalnum)
01405 {
01406   int n,root,child,parent;
01407   TOKENID s;
01408   TOKEN2 *tlist_local;
01409   TOKENID *tindex_local;
01410 
01411   tlist_local = d->tlist[d->tn];
01412   tindex_local = d->tindex[d->tn];
01413 
01414   for (root = totalnum/2; root >= 1; root--) {
01415     SCOPY(s, SD(root));
01416     parent = root;
01417     while ((child = parent * 2) <= totalnum) {
01418       if (child < totalnum && SVAL(child) > SVAL(child+1)) {
01419         child++;
01420       }
01421       if (STVAL <= SVAL(child)) {
01422         break;
01423       }
01424       SCOPY(SD(parent), SD(child));
01425       parent = child;
01426     }
01427     SCOPY(SD(parent), s);
01428   }
01429   n = totalnum;
01430   while ( n > totalnum - neednum) {
01431     SCOPY(s, SD(n));
01432     SCOPY(SD(n), SD(1));
01433     n--;
01434     parent = 1;
01435     while ((child = parent * 2) <= n) {
01436       if (child < n && SVAL(child) > SVAL(child+1)) {
01437         child++;
01438       }
01439       if (STVAL <= SVAL(child)) {
01440         break;
01441       }
01442       SCOPY(SD(parent), SD(child));
01443       parent = child;
01444     }
01445     SCOPY(SD(parent), s);
01446   }
01447 }
01448 
01481 static void
01482 sort_token_no_order(FSBeam *d, int neednum, int *start, int *end)
01483 {
01484   int totalnum;
01485   int restnum;
01486 
01487   totalnum = d->tnum[d->tn];
01488 
01489   restnum = totalnum - neednum;
01490 
01491   if (neednum >= totalnum) {
01492     /* no need to sort */
01493     *start = 0;
01494     *end = totalnum - 1;
01495   } else if (neednum < restnum)  {
01496     /* needed num is smaller than rest, so sort for the needed tokens */
01497     sort_token_upward(d, neednum,totalnum);
01498     *start = totalnum - neednum;
01499     *end = totalnum - 1;
01500   } else {
01501     /* needed num is bigger than rest, so sort for the rest token */
01502     sort_token_downward(d, restnum,totalnum);
01503     *start = 0;
01504     *end = neednum - 1;
01505   }
01506 }
01507 
01508 /* -------------------------------------------------------------------- */
01509 /*             第1パス(フレーム同期ビームサーチ) メイン                */
01510 /*           main routines of 1st pass (frame-synchronous beam search)  */
01511 /* -------------------------------------------------------------------- */
01512 
01541 static boolean
01542 init_nodescore(HTK_Param *param, RecogProcess *r)
01543 {
01544   WCHMM_INFO *wchmm;
01545   FSBeam *d;
01546   TOKENID newid;
01547   TOKEN2 *new;
01548   WORD_ID beginword;
01549   int node;
01550   int i;
01551 
01552   wchmm = r->wchmm;
01553   d = &(r->pass1);
01554 
01555   /* 初期仮説用単語履歴 */
01556   /* setup initial word context */
01557   if (r->config->successive.enabled) { /* sp segment mode */
01558     /* initial word context = last non-sp word of previous 2nd pass at last segment*/
01559     if (r->lmtype == LM_PROB) {
01560       if (r->sp_break_last_nword == wchmm->winfo->tail_silwid) {
01561         /* if end with silE, initialize as normal start of sentence */
01562         d->bos.wid = WORD_INVALID;
01563       } else {
01564         d->bos.wid = r->sp_break_last_nword;
01565       }
01566     } else {
01567       d->bos.wid = WORD_INVALID;
01568     }
01569   } else {                      /* not sp segment mode */
01570     d->bos.wid = WORD_INVALID;  /* no context */
01571   }
01572 
01573   d->bos.begintime = d->bos.endtime = -1;
01574 
01575   /* ノード・トークンを初期化 */
01576   /* clear tree lexicon nodes and tokens */
01577   for(node = 0; node < d->totalnodenum; node++) {
01578     d->token[node] = TOKENID_UNDEFINED;
01579   }
01580   d->tnum[0] = d->tnum[1]  = 0;
01581   
01582 #ifdef PASS1_IWCD
01583   /* 出力確率計算キャッシュを初期化 */
01584   /* initialize outprob cache */
01585   outprob_style_cache_init(wchmm);
01586 #endif
01587 
01588   /* 初期仮説の作成: 初期単語の決定と初期トークンの生成 */
01589   /* initial word hypothesis */
01590 
01591   if (r->lmtype == LM_PROB) {
01592 
01593     if (r->config->successive.enabled) { /* sp segment mode */
01594       if (r->sp_break_last_word != WORD_INVALID) { /* last segment exist */
01595         /* 開始単語=前のセグメント計算時の最後の最尤単語 */
01596         /* 文終了単語(silE,句点(IPAモデル))なら,silB で開始 */
01597         /* initial word = best last word hypothesis on the last segment */
01598         /* if silE or sp, begin with silB */
01599         /*if (is_sil(recog.sp_break_last_word, wchmm->winfo, wchmm->hmminfo)) {*/
01600         if (r->sp_break_last_word == wchmm->winfo->tail_silwid) {
01601           beginword = wchmm->winfo->head_silwid;
01602           d->bos.wid = WORD_INVALID;    /* reset initial word context */
01603         } else {
01604           beginword = r->sp_break_last_word;
01605         }
01606       } else {
01607         /* initial segment: initial word set to silB */
01608         beginword = wchmm->winfo->head_silwid;
01609       }
01610     } else {                    /* not sp segment mode */
01611       /* initial word fixed to silB */
01612       beginword = wchmm->winfo->head_silwid;
01613     }
01614 
01615 #ifdef SP_BREAK_DEBUG
01616     jlog("DEBUG: startword=[%s], last_nword=[%s]\n",
01617            (beginword == WORD_INVALID) ? "WORD_INVALID" : wchmm->winfo->wname[beginword],
01618            (d->bos.wid == WORD_INVALID) ? "WORD_INVALID" : wchmm->winfo->wname[d->bos.wid]);
01619 #endif
01620     /* create the first token at the first node of initial word */
01621     newid = create_token(d);
01622     new = &(d->tlist[d->tn][newid]);
01623 
01624     /* initial node = head node of the beginword */
01625     if (wchmm->hmminfo->multipath) {
01626       node = wchmm->wordbegin[beginword];
01627     } else {
01628       node = wchmm->offset[beginword][0];
01629     }
01630 
01631     /* set initial LM score */
01632     if (wchmm->state[node].scid != 0) {
01633       /* if initial node is on a factoring branch, use the factored score */
01634       new->last_lscore = max_successor_prob(wchmm, d->bos.wid, node);
01635     } else {
01636       /* else, set to 0.0 */
01637       new->last_lscore = 0.0;
01638     }
01639 #ifdef FIX_PENALTY
01640     new->last_lscore = new->last_lscore * d->lm_weight;
01641 #else
01642     new->last_lscore = new->last_lscore * d->lm_weight + d->lm_penalty;
01643 #endif
01644     /* set initial word history */
01645     new->last_tre = &(d->bos);
01646     new->last_cword = d->bos.wid;
01647     if (wchmm->hmminfo->multipath) {
01648       /* set initial score using the initial LM score */
01649       new->score = new->last_lscore;
01650     } else {
01651       /* set initial score using the initial LM score and AM score of the first state */
01652       new->score = outprob_style(wchmm, node, d->bos.wid, 0, param) + new->last_lscore;
01653     }
01654     /* assign the initial node to token list */
01655     node_assign_token(d, node, newid);
01656 
01657   }
01658 
01659   if (r->lmtype == LM_DFA && r->lmvar == LM_DFA_GRAMMAR) {
01660   
01661     /* 初期仮説: 文法上文頭に接続しうる単語集合 */
01662     /* initial words: all words that can be begin of sentence grammatically */
01663     /* アクティブな文法に属する単語のみ許す */
01664     /* only words in active grammars are allowed to be an initial words */
01665     MULTIGRAM *m;
01666     int t,tb,te;
01667     WORD_ID iw;
01668     boolean flag;
01669     DFA_INFO *gdfa;
01670 
01671     gdfa = r->lm->dfa;
01672 
01673     flag = FALSE;
01674     /* for all active grammar */
01675     for(m = r->lm->grammars; m; m = m->next) {
01676       if (m->active) {
01677         tb = m->cate_begin;
01678         te = tb + m->dfa->term_num;
01679         for(t=tb;t<te;t++) {
01680           /* for all word categories that belong to the grammar */
01681           if (dfa_cp_begin(gdfa, t) == TRUE) {
01682             /* if the category can appear at beginning of sentence, */
01683             flag = TRUE;
01684             for (iw=0;iw<gdfa->term.wnum[t];iw++) {
01685               /* create the initial token at the first node of all words that belongs to the category */
01686               i = gdfa->term.tw[t][iw];
01687               if (wchmm->hmminfo->multipath) {
01688                 node = wchmm->wordbegin[i];
01689               } else {
01690                 node = wchmm->offset[i][0];
01691               }
01692               /* in tree lexicon, words in the same category may share the same root node, so skip it if the node has already existed */
01693               if (node_exist_token(d, d->tn, node, d->bos.wid) != TOKENID_UNDEFINED) continue;
01694               newid = create_token(d);
01695               new = &(d->tlist[d->tn][newid]);
01696               new->last_tre = &(d->bos);
01697               new->last_lscore = 0.0;
01698               if (wchmm->hmminfo->multipath) {
01699                 new->score = 0.0;
01700               } else {
01701                 new->score = outprob_style(wchmm, node, d->bos.wid, 0, param);
01702               }
01703               node_assign_token(d, node, newid);
01704             }
01705           }
01706         }
01707       }
01708     }
01709     if (!flag) {
01710       jlog("ERROR: init_nodescore: no initial state found in active DFA grammar\n");
01711       return FALSE;
01712     }
01713   }
01714 
01715   if (r->lmtype == LM_DFA && r->lmvar == LM_DFA_WORD) {
01716     /* all words can appear at start */
01717     for (i=0;i<wchmm->winfo->num;i++) {
01718       if (wchmm->hmminfo->multipath) {
01719         node = wchmm->wordbegin[i];
01720       } else {
01721         node = wchmm->offset[i][0];
01722       }
01723       if (node_exist_token(d, d->tn, node, d->bos.wid) != TOKENID_UNDEFINED) continue;
01724       newid = create_token(d);
01725       new = &(d->tlist[d->tn][newid]);
01726       new->last_tre = &(d->bos);
01727       new->last_lscore = 0.0;
01728       if (wchmm->hmminfo->multipath) {
01729         new->score = 0.0;
01730       } else {
01731         new->score = outprob_style(wchmm, node, d->bos.wid, 0, param);
01732       }
01733       node_assign_token(d, node, newid);
01734     }
01735   }
01736 
01737   return TRUE;
01738 
01739 }
01740 
01741 /******************************************************/
01742 /* フレーム同期ビーム探索の実行 --- 最初のフレーム用  */
01743 /* frame synchronous beam search --- first frame only */
01744 /******************************************************/
01745 
01770 boolean
01771 get_back_trellis_init(HTK_Param *param, RecogProcess *r)
01772 {
01773   WCHMM_INFO *wchmm;
01774   BACKTRELLIS *backtrellis;
01775   FSBeam *d;
01776 
01777   wchmm = r->wchmm;
01778   backtrellis = r->backtrellis;
01779   d = &(r->pass1);
01780 
01781   /* Viterbi演算用ワークエリアのスイッチャー tl,tn の初期値設定 */
01782   /* tn: このフレーム用ID   tl: 1フレーム前のID */
01783   /* initialize switch tl, tn for Viterbi computation */
01784   /* tn: this frame  tl: last frame */
01785   d->tn = 0;
01786   d->tl = 1;
01787 
01788   /* 結果の単語トレリスを格納するバックトレリス構造体を初期化 */
01789   /* initialize backtrellis structure to store resulting word trellis */
01790   bt_prepare(backtrellis);
01791 
01792   /* 計算用ワークエリアを初期化 */
01793   /* initialize some data on work area */
01794 
01795   if (r->lmtype == LM_PROB) {
01796     d->lm_weight  = r->config->lmp.lm_weight;
01797     d->lm_penalty = r->config->lmp.lm_penalty;
01798   }
01799   if (r->lmtype == LM_DFA) {
01800     d->penalty1 = r->config->lmp.penalty1;
01801   }
01802 #if defined(WPAIR) && defined(WPAIR_KEEP_NLIMIT)
01803   d->wpair_keep_nlimit = r->config->pass1.wpair_keep_nlimit;
01804 #endif
01805 
01806   /* ワークエリアを確保 */
01807   /* malloc work area */
01808   /* 使用するトークン量 = viterbi時に遷移先となる状態候補の数
01809    * 予測: ビーム数 x 2 (自己遷移+次状態) + 木構造化辞書のルートノード数
01810    */
01811   /* assumed initial number of needed tokens: beam width x 2 (self + next trans.)
01812    * + root node on the tree lexicon (for inter-word trans.)
01813    */
01814   if (d->totalnodenum != wchmm->n) {
01815     free_nodes(d);
01816   }
01817   if (d->nodes_malloced == FALSE) {
01818     malloc_nodes(d, wchmm->n, r->trellis_beam_width * 2 + wchmm->startnum);
01819   }
01820   prepare_nodes(d, r->trellis_beam_width);
01821   
01822   /* 初期スコアを nodescore[tn] にセット */
01823   /* set initial score to nodescore[tn] */
01824   if (init_nodescore(param, r) == FALSE) {
01825     jlog("ERROR: get_back_trellis_init: failed to set initial node scores\n");
01826     return FALSE;
01827   }
01828 
01829   sort_token_no_order(d, r->trellis_beam_width, &(d->n_start), &(d->n_end));
01830 
01831   /* 漸次出力を行なう場合のインターバルを計算 */
01832   /* set interval frame for progout */
01833   r->config->output.progout_interval_frame = (int)((float)r->config->output.progout_interval / ((float)param->header.wshift / 10000.0));
01834 
01835   if (r->config->successive.enabled) {
01836     /* ショートポーズセグメンテーション用パラメータの初期化 */
01837     /* initialize parameter for short pause segmentation */
01838     d->in_sparea = TRUE;                /* assume beginning is silence */
01839     r->am->mfcc->sparea_start = d->tmp_sparea_start = 0; /* set start frame to 0 */
01840 #ifdef SP_BREAK_RESUME_WORD_BEGIN
01841     d->tmp_sp_break_last_word = WORD_INVALID;
01842 #endif
01843     r->sp_break_last_word = WORD_INVALID;
01844     /* 最初のセグメント: 次の非ポーズフレームで第2パスへ移行しない */
01845     /* the first end of pause segment should be always silB, so
01846        skip the first segment */
01847     d->first_sparea = TRUE;
01848     r->sp_break_2_begin_word = WORD_INVALID;
01849   }
01850 
01851 #ifdef DETERMINE
01852   if (r->lmvar == LM_DFA_WORD) {
01853     /* initialize */
01854     determine_word(r, 0, NULL, 0, 0);
01855   }
01856 #endif
01857 
01858   return TRUE;
01859 }
01860 
01861 /*****************************************************/
01862 /* frame synchronous beam search --- proceed 1 frame */
01863 /* フレーム同期ビーム探索の実行 --- 1フレーム進める  */
01864 /*****************************************************/
01865 
01884 static void
01885 propagate_token(FSBeam *d, int next_node, LOGPROB next_score, TRELLIS_ATOM *last_tre, WORD_ID last_cword, LOGPROB last_lscore)
01886 {
01887   TOKEN2 *tknext;
01888   TOKENID tknextid;
01889 
01890   if ((tknextid = node_exist_token(d, d->tn, next_node, last_tre->wid)) != TOKENID_UNDEFINED) {
01891     /* 遷移先ノードには既に他ノードから伝搬済み: スコアが高いほうを残す */
01892     /* the destination node already has a token: compare score */
01893     tknext = &(d->tlist[d->tn][tknextid]);
01894     if (tknext->score < next_score) {
01895       /* その遷移先ノードが持つトークンの内容を上書きする(新規トークンは作らない) */
01896       /* overwrite the content of existing destination token: not create a new token */
01897       tknext->last_tre = last_tre; /* propagate last word info */
01898       tknext->last_cword = last_cword; /* propagate last context word info */
01899       tknext->last_lscore = last_lscore; /* set new LM score */
01900       tknext->score = next_score; /* set new score */
01901     }
01902   } else {
01903     /* 遷移先ノードは未伝搬: 新規トークンを作って割り付ける */
01904     /* token unassigned: create new token and assign */
01905     if (next_score > LOG_ZERO) { /* valid token */
01906       tknextid = create_token(d); /* get new token */
01907     }
01908     tknext = &(d->tlist[d->tn][tknextid]);
01909     tknext->last_tre = last_tre; /* propagate last word info */
01910     tknext->last_cword = last_cword; /* propagate last context word info */
01911     tknext->last_lscore = last_lscore;
01912     tknext->score = next_score; /* set new score */
01913     node_assign_token(d, next_node, tknextid); /* assign this new token to the next node */
01914   }
01915 }
01916 
01940 static void
01941 beam_intra_word_core(WCHMM_INFO *wchmm, FSBeam *d, TOKEN2 **tk_ret, int j, int next_node, LOGPROB next_a)
01942 {
01943   int node; 
01944   LOGPROB tmpsum;
01945   LOGPROB ngram_score_cache;
01946   TOKEN2 *tk;
01947 
01948   tk = *tk_ret;
01949 
01950   node = tk->node;
01951 
01952   /* now, 'node' is the source node, 'next_node' is the destication node,
01953      and ac-> holds transition probability */
01954   /* tk->score is the accumulated score at the 'node' on previous frame */
01955   
01956   /******************************************************************/
01957   /* 2.1.1 遷移先へのスコア計算(遷移確率+言語スコア)               */
01958   /*       compute score of destination node (transition prob + LM) */
01959   /******************************************************************/
01960   tmpsum = tk->score + next_a;
01961   ngram_score_cache = LOG_ZERO;
01962   /* the next score at 'next_node' will be computed on 'tmpsum', and
01963      the new LM probability (if updated) will be stored on 'ngram_score_cache' at below */
01964   
01965   if (!wchmm->category_tree) {
01966     /* 言語スコア factoring:
01967        arcが自己遷移でない単語内の遷移で,かつ遷移先にsuccessorリスト
01968        があれば,lexicon tree の分岐部分の遷移である */
01969     /* LM factoring:
01970        If this is not a self transition and destination node has successor
01971        list, this is branching transition
01972     */
01973     if (next_node != node) {
01974       if (wchmm->state[next_node].scid != 0
01975 #ifdef UNIGRAM_FACTORING
01976           /* 1-gram factoring 使用時は, 複数で共有される枝では
01977              wchmm->state[node].scid は負の値となり,その絶対値を
01978              添字として wchmm->fscore[] に単語集合の1-gramの最大値が格納
01979              されている. 末端の枝(複数単語で共有されない)では,
01980              wchmm->state[node].scid は正の値となり,
01981              1単語を sc として持つのでそこで正確な2-gramを計算する */
01982           /* When uni-gram factoring,
01983              wchmm->state[node].scid is below 0 for shared branches.
01984              In this case the maximum uni-gram probability for sub-tree
01985              is stored in wchmm->fscore[- wchmm->state[node].scid].
01986              Leaf branches (with only one successor word): the scid is
01987              larger than 0, and has
01988              the word ID in wchmm->sclist[wchmm->state[node].scid].
01989              So precise 2-gram is computed in this point */
01990 #endif
01991           ){
01992         
01993         if (wchmm->lmtype == LM_PROB) {
01994           /* ここで言語モデル確率を更新する */
01995           /* LM value should be update at this transition */
01996           /* N-gram確率からfactoring 値を計算 */
01997           /* compute new factoring value from N-gram probabilities */
01998 #ifdef FIX_PENALTY
01999           /* if at the beginning of sentence, not add lm_penalty */
02000           if (tk->last_cword == WORD_INVALID) {
02001             ngram_score_cache = max_successor_prob(wchmm, tk->last_cword, next_node) * d->lm_weight;
02002           } else {
02003             ngram_score_cache = max_successor_prob(wchmm, tk->last_cword, next_node) * d->lm_weight + d->lm_penalty;
02004           }
02005 #else
02006           ngram_score_cache = max_successor_prob(wchmm, tk->last_cword, next_node) * d->lm_weight + d->lm_penalty;
02007 #endif
02008           /* スコアの更新: tk->last_lscore に単語内での最後のfactoring値が
02009              入っているので, それをスコアから引いてリセットし, 新たなスコアを
02010              セットする */
02011           /* update score: since 'tk->last_lscore' holds the last LM factoring
02012              value in this word, we first remove the score from the current
02013              score, and then add the new LM value computed above. */
02014           tmpsum -= tk->last_lscore;
02015           tmpsum += ngram_score_cache;
02016         }
02017         
02018         if (wchmm->lmtype == LM_DFA && wchmm->lmvar == LM_DFA_GRAMMAR) {
02019           /* 文法を用いる場合, カテゴリ単位の木構造化がなされていれば,
02020              接続制約は単語間遷移のみで扱われるので,factoring は必要ない. 
02021              カテゴリ単位木構造化が行われない場合, 文法間の接続制約はここ
02022              で factoring で行われることになる. */
02023           /* With DFA, we use category-pair constraint extracted from the DFA
02024              at this 1st pass.  So if we compose a tree lexicon per word's
02025              category, the each category tree has the same connection
02026              constraint and thus we can apply the constraint on the cross-word
02027              transition.  This per-category tree lexicon is enabled by default,
02028              and in the case the constraint will be applied at the word end.
02029              If you disable per-category tree lexicon by undefining
02030              'CATEGORY_TREE', the word-pair contrained will be applied in a
02031              factoring style at here.
02032           */
02033           
02034           /* 決定的factoring: 直前単語に対して,sub-tree内にカテゴリ対制約上
02035              接続しうる単語が1つもなければ, この遷移は不可 */
02036           /* deterministic factoring in grammar mode:
02037              transition disabled if there are totally no sub-tree word that can
02038              grammatically (in category-pair constraint) connect
02039              to the previous word.
02040           */
02041           if (!can_succeed(wchmm, tk->last_tre->wid, next_node)) {
02042             tmpsum = LOG_ZERO;
02043           }
02044         }
02045         
02046       }
02047     }
02048   }
02049   /* factoring not needed when DFA mode and uses category-tree */
02050   
02051   /****************************************/
02052   /* 2.1.2 遷移先ノードへトークン伝搬     */
02053   /*       pass token to destination node */
02054   /****************************************/
02055   
02056   if (ngram_score_cache == LOG_ZERO) ngram_score_cache = tk->last_lscore;
02057   propagate_token(d, next_node, tmpsum, tk->last_tre, tk->last_cword, ngram_score_cache);
02058   
02059   if (d->expanded) {
02060     /* if work area has been expanded at 'create_token()' above,
02061        the inside 'realloc()' will destroy the pointers.
02062        so, reset local pointers from token index */
02063     tk = &(d->tlist[d->tl][d->tindex[d->tl][j]]);
02064     d->expanded = FALSE;
02065   }
02066   
02067   *tk_ret = tk;
02068 
02069 }
02070 
02090 static void
02091 beam_intra_word(WCHMM_INFO *wchmm, FSBeam *d, TOKEN2 **tk_ret, int j)
02092 {
02093   A_CELL2 *ac; 
02094   TOKEN2 *tk;
02095   int node;
02096   int k;
02097 
02098   tk = *tk_ret;
02099 
02100   node = tk->node;
02101 
02102   if (wchmm->self_a[node] != LOG_ZERO) {
02103     beam_intra_word_core(wchmm, d, tk_ret, j, node, wchmm->self_a[node]);
02104   }
02105 
02106   if (wchmm->next_a[node] != LOG_ZERO) {
02107     beam_intra_word_core(wchmm, d, tk_ret, j, node+1, wchmm->next_a[node]);
02108   }
02109 
02110   for(ac=wchmm->ac[node];ac;ac=ac->next) {
02111     for(k=0;k<ac->n;k++) {
02112       beam_intra_word_core(wchmm, d, tk_ret, j, ac->arc[k], ac->a[k]);
02113     }
02114   }
02115 }
02116 
02117 /**************************/
02118 /* 2.2. トレリス単語保存  */
02119 /*      save trellis word */
02120 /**************************/
02145 static TRELLIS_ATOM *
02146 save_trellis(BACKTRELLIS *bt, WCHMM_INFO *wchmm, TOKEN2 *tk, int t, boolean final_for_multipath)
02147 {
02148   TRELLIS_ATOM *tre;
02149   int sword;
02150  
02151   sword = wchmm->stend[tk->node];
02152 
02153   /* この遷移元の単語終端ノードは「直前フレームで」生き残ったノード. 
02154      (「このフレーム」でないことに注意!!)
02155      よってここで, 時間(t-1) を単語終端とするトレリス上の単語仮説
02156      (TRELLIS_ATOM)として,単語トレリス構造体に保存する. */
02157   /* This source node (= word end node) has been survived in the
02158      "last" frame (notice: not "this" frame!!).  So this word end
02159      is saved here to the word trellis structure (BACKTRELLIS) as a
02160      trellis word (TRELLIS_ATOM) with end frame (t-1). */
02161   tre = bt_new(bt);
02162   tre->wid = sword;             /* word ID */
02163   tre->backscore = tk->score; /* log score (AM + LM) */
02164   tre->begintime = tk->last_tre->endtime + 1; /* word beginning frame */
02165   tre->endtime   = t-1; /* word end frame */
02166   tre->last_tre  = tk->last_tre; /* link to previous trellis word */
02167   if (wchmm->lmtype == LM_PROB) {
02168     tre->lscore    = tk->last_lscore;   /* log score (LM only) */
02169   } else if (wchmm->lmtype == LM_DFA) {
02170     tre->lscore = 0.0;
02171   }
02172   bt_store(bt, tre); /* save to backtrellis */
02173 #ifdef WORD_GRAPH
02174   if (tre->last_tre != NULL) {
02175     /* mark to indicate that the following words was survived in beam */
02176     tre->last_tre->within_context = TRUE;
02177   }
02178   if (final_for_multipath) {
02179     /* last node */
02180     if (tre->wid == wchmm->winfo->tail_silwid) {
02181       tre->within_context = TRUE;
02182     }
02183   }
02184 #endif /* WORD_GRAPH */
02185 
02186   return tre;
02187 }
02188       
02189 
02210 static void
02211 beam_inter_word(WCHMM_INFO *wchmm, FSBeam *d, TOKEN2 **tk_ret, TRELLIS_ATOM *tre, int j)
02212 {
02213   A_CELL2 *ac;
02214   TOKEN2 *tk;
02215   int sword;
02216   int node, next_node;
02217   LOGPROB *iwparray; 
02218   int stid;
02219 #ifdef UNIGRAM_FACTORING
02220   int isoid; 
02221 #endif
02222   LOGPROB tmpprob, tmpsum, ngram_score_cache;
02223   int k;
02224   WORD_ID last_word;
02225 
02226   tk = *tk_ret;
02227  
02228   node = tk->node;
02229   sword = wchmm->stend[node];
02230   last_word = wchmm->winfo->is_transparent[sword] ? tk->last_cword : sword;
02231 
02232   if (wchmm->lmtype == LM_PROB) {
02233 
02234     /* 遷移元単語が末尾単語の終端なら,どこへも遷移させない */
02235     /* do not allow transition if the source word is end-of-sentence word */
02236     if (sword == wchmm->winfo->tail_silwid) return;
02237 
02238 #ifdef UNIGRAM_FACTORING
02239     /* あとで共有単語先頭ノードに対して単語間遷移をまとめて計算するため,*/
02240     /* このループ内では最大尤度を持つ単語終端ノードを記録しておく */
02241     /* here we will record the best wordend node of maximum likelihood
02242        at this frame, to compute later the cross-word transitions toward
02243        shared factoring word-head node */
02244     tmpprob = tk->score;
02245     if (!wchmm->hmminfo->multipath) tmpprob += wchmm->wordend_a[sword];
02246     if (d->wordend_best_score < tmpprob) {
02247       d->wordend_best_score = tmpprob;
02248       d->wordend_best_node = node;
02249       d->wordend_best_tre = tre;
02250       d->wordend_best_last_cword = tk->last_cword;
02251     }
02252 #endif
02253     
02254     /* N-gramにおいては常に全単語の接続を考慮する必要があるため,
02255        ここで単語間の言語確率値をすべて計算しておく. 
02256        キャッシュは max_successor_prob_iw() 内で考慮. */
02257     /* As all words are possible to connect in N-gram, we first compute
02258        all the inter-word LM probability here.
02259        Cache is onsidered in max_successor_prob_iw(). */
02260     if (wchmm->winfo->is_transparent[sword]) {
02261       iwparray = max_successor_prob_iw(wchmm, tk->last_cword);
02262     } else {
02263       iwparray = max_successor_prob_iw(wchmm, sword);
02264     }
02265   }
02266 
02267   /* すべての単語始端ノードに対して以下を実行 */
02268   /* for all beginning-of-word nodes, */
02269   /* wchmm->startnode[0..stid-1] ... 単語始端ノードリスト */
02270   /* wchmm->startnode[0..stid-1] ... list of word start node (shared) */
02271   for (stid = wchmm->startnum - 1; stid >= 0; stid--) {
02272     next_node = wchmm->startnode[stid];
02273     if (wchmm->hmminfo->multipath) {
02274       if (wchmm->lmtype == LM_PROB) {
02275         /* connection to the head silence word is not allowed */
02276         if (wchmm->wordbegin[wchmm->winfo->head_silwid] == next_node) continue;
02277       }
02278     }
02279     
02280     /*****************************************/
02281     /* 2.3.1. 単語間言語制約を適用           */
02282     /*        apply cross-word LM constraint */
02283     /*****************************************/
02284         
02285     if (wchmm->lmtype == LM_PROB) {
02286       /* N-gram確率を計算 */
02287       /* compute N-gram probability */
02288 #ifdef UNIGRAM_FACTORING
02289       /* 1-gram factoring における単語間言語確率キャッシュの効率化:
02290          1-gram factoring は単語履歴に依存しないので,
02291          ここで参照する factoring 値の多くは
02292          wchmm->fscore[] に既に格納され, 探索中も不変である. 
02293          よって計算が必要な単語(どの単語ともノードを共有しない単語)
02294          についてのみ iwparray[] で計算・キャッシュする.  */
02295       /* Efficient cross-word LM cache:
02296          As 1-gram factoring values are independent of word context,
02297          they remain unchanged while search.  So, in cross-word LM
02298          computation, beginning-of-word states which share nodes with
02299          others and has factoring value in wchmm does not need cache.
02300          So only the unshared beginning-of-word states are computed and
02301          cached here in iwparray[].
02302       */
02303       /* wchmm,start2isolate[0..stid-1] ... ノードを共有しない単語は
02304          その通しID, 共有する(キャッシュの必要のない)単語は -1 */
02305       /* wchmm->start2isolate[0..stid-1] ... isolate ID for
02306          beginning-of-word state.  value: -1 for states that has
02307          1-gram factoring value (share nodes with some other words),
02308          and ID for unshared words
02309       */
02310       isoid = wchmm->start2isolate[stid];
02311       /* 計算が必要でない単語先頭ノードはパスをまとめて後に計算するので
02312          ここではスキップ */
02313       /* the shared nodes will be computed afterward, so just skip them
02314          here */
02315       if (isoid == -1) continue;
02316       tmpprob = iwparray[isoid];
02317 #else
02318       tmpprob = iwparray[stid];
02319 #endif
02320     }
02321 
02322     /* 遷移先の単語が先頭単語なら遷移させない. 
02323        これは wchmm.c で該当単語に stid を割り振らないことで対応
02324        しているので,ここでは何もしなくてよい */
02325     /* do not allow transition if the destination word is
02326        beginning-of-sentence word.  This limitation is realized by
02327        not assigning 'stid' for the word in wchmm.c, so we have nothing
02328        to do here.
02329     */
02330     
02331     if (wchmm->category_tree) {
02332       /* 文法の場合, 制約は決定的: カテゴリ対制約上許されない場合は遷移させない */
02333       /* With DFA and per-category tree lexicon,
02334          LM constraint is deterministic:
02335          do not allow transition if the category connection is not allowed
02336          (with category tree, constraint can be determined on top node) */
02337       if (dfa_cp(wchmm->dfa, wchmm->winfo->wton[sword], wchmm->winfo->wton[wchmm->start2wid[stid]]) == FALSE) continue;
02338     }
02339 
02340     /*******************************************************************/
02341     /* 2.3.2. 遷移先の単語先頭へのスコア計算(遷移確率+言語スコア)     */
02342     /*        compute score of destination node (transition prob + LM) */
02343     /*******************************************************************/
02344     tmpsum = tk->score;
02345     if (!wchmm->hmminfo->multipath) tmpsum += wchmm->wordend_a[sword];
02346 
02347     /* 'tmpsum' now holds outgoing score from the wordend node */
02348     if (wchmm->lmtype == LM_PROB) {
02349       /* 言語スコアを追加 */
02350       /* add LM score */
02351       ngram_score_cache = tmpprob * d->lm_weight + d->lm_penalty;
02352       tmpsum += ngram_score_cache;
02353       if (wchmm->winfo->is_transparent[sword] && wchmm->winfo->is_transparent[tk->last_cword]) {
02354             
02355         tmpsum += d->lm_penalty_trans;
02356       }
02357     }
02358     if (wchmm->lmtype == LM_DFA) {
02359       /* grammar: 単語挿入ペナルティを追加 */
02360       /* grammar: add insertion penalty */
02361       tmpsum += d->penalty1;
02362 
02363       /* grammar: deterministic factoring (in case category-tree not enabled) */
02364       if (!wchmm->category_tree) {
02365         if (!can_succeed(wchmm, sword, next_node)) {
02366           tmpsum = LOG_ZERO;
02367         }
02368       }
02369     }
02370     
02371     /*********************************************************************/
02372     /* 2.3.3. 遷移先ノードへトークン伝搬(単語履歴情報は更新)             */
02373     /*        pass token to destination node (updating word-context info */
02374     /*********************************************************************/
02375 
02376     if (wchmm->hmminfo->multipath) {
02377       /* since top node has no ouput, we should go one more step further */
02378       if (wchmm->self_a[next_node] != LOG_ZERO) {
02379         propagate_token(d, next_node, tmpsum + wchmm->self_a[next_node], tre, last_word, ngram_score_cache);
02380         if (d->expanded) {
02381           /* if work area has been expanded at 'create_token()' above,
02382              the inside 'realloc()' will destroy the pointers.
02383              so, reset local pointers from token index */
02384           tk = &(d->tlist[d->tn][d->tindex[d->tn][j]]);
02385           d->expanded = FALSE;
02386         }
02387       }
02388       if (wchmm->next_a[next_node] != LOG_ZERO) {
02389         propagate_token(d, next_node+1, tmpsum + wchmm->next_a[next_node], tre, last_word, ngram_score_cache);
02390         if (d->expanded) {
02391           /* if work area has been expanded at 'create_token()' above,
02392              the inside 'realloc()' will destroy the pointers.
02393              so, reset local pointers from token index */
02394           tk = &(d->tlist[d->tn][d->tindex[d->tn][j]]);
02395           d->expanded = FALSE;
02396         }
02397       }
02398       for(ac=wchmm->ac[next_node];ac;ac=ac->next) {
02399         for(k=0;k<ac->n;k++) {
02400           propagate_token(d, ac->arc[k], tmpsum + ac->a[k], tre, last_word, ngram_score_cache);
02401           if (d->expanded) {
02402             /* if work area has been expanded at 'create_token()' above,
02403                the inside 'realloc()' will destroy the pointers.
02404                so, reset local pointers from token index */
02405             tk = &(d->tlist[d->tn][d->tindex[d->tn][j]]);
02406             d->expanded = FALSE;
02407           }
02408         }
02409       }
02410     } else {
02411       propagate_token(d, next_node, tmpsum, tre, last_word, ngram_score_cache);
02412       if (d->expanded) {
02413         /* if work area has been expanded at 'create_token()' above,
02414            the inside 'realloc()' will destroy the pointers.
02415            so, reset local pointers from token index */
02416         tk = &(d->tlist[d->tl][d->tindex[d->tl][j]]);
02417         d->expanded = FALSE;
02418       }
02419     }
02420         
02421   }     /* end of next word heads */
02422 
02423   *tk_ret = tk;
02424 
02425 
02426 } /* end of cross-word processing */
02427 
02428 
02429 #ifdef UNIGRAM_FACTORING
02430 
02457 static void
02458 beam_inter_word_factoring(WCHMM_INFO *wchmm, FSBeam *d)
02459 {
02460   int sword;
02461   int node, next_node;
02462   int stid;
02463   LOGPROB tmpprob, tmpsum, ngram_score_cache;
02464   A_CELL2 *ac;
02465   int j;
02466   WORD_ID last_word;
02467 
02468   node = d->wordend_best_node;
02469   sword = wchmm->stend[node];
02470   last_word = wchmm->winfo->is_transparent[sword] ? d->wordend_best_last_cword : sword;
02471 
02472   for (stid = wchmm->startnum - 1; stid >= 0; stid--) {
02473     next_node = wchmm->startnode[stid];
02474     /* compute transition from 'node' at end of word 'sword' to 'next_node' */
02475     /* skip isolated words already calculated in the above main loop */
02476     if (wchmm->start2isolate[stid] != -1) continue;
02477     /* rest should have 1-gram factoring score at wchmm->fscore */
02478     if (wchmm->state[next_node].scid >= 0) {
02479       j_internal_error("get_back_trellis_proceed: scid mismatch at 1-gram factoring of shared states\n");
02480     }
02481     tmpprob = wchmm->fscore[- wchmm->state[next_node].scid];
02482     ngram_score_cache = tmpprob * d->lm_weight + d->lm_penalty;
02483     tmpsum = d->wordend_best_score;
02484     tmpsum += ngram_score_cache;
02485     if (wchmm->winfo->is_transparent[sword] && wchmm->winfo->is_transparent[d->wordend_best_last_cword]) {
02486       tmpsum += d->lm_penalty_trans;
02487     }
02488 
02489     if (wchmm->hmminfo->multipath) {
02490       /* since top node has no ouput, we should go one more step further */
02491       if (wchmm->self_a[next_node] != LOG_ZERO) {
02492         propagate_token(d, next_node, tmpsum + wchmm->self_a[next_node], d->wordend_best_tre, last_word, ngram_score_cache);
02493         if (d->expanded) {
02494           d->expanded = FALSE;
02495         }
02496       }
02497       if (wchmm->next_a[next_node] != LOG_ZERO) {
02498         propagate_token(d, next_node+1, tmpsum + wchmm->next_a[next_node], d->wordend_best_tre, last_word, ngram_score_cache);
02499         if (d->expanded) {
02500           d->expanded = FALSE;
02501         }
02502       }
02503       for(ac=wchmm->ac[next_node];ac;ac=ac->next) {
02504         for(j=0;j<ac->n;j++) {
02505           propagate_token(d, ac->arc[j], tmpsum + ac->a[j], d->wordend_best_tre, last_word, ngram_score_cache);
02506           if (d->expanded) {
02507             d->expanded = FALSE;
02508           }
02509         }
02510       }
02511       
02512     } else {
02513       propagate_token(d, next_node, tmpsum, d->wordend_best_tre, last_word, ngram_score_cache);
02514       if (d->expanded) {
02515         d->expanded = FALSE;
02516       }
02517     }
02518 
02519   }
02520 }
02521 
02522 #endif /* UNIGRAM_FACTORING */
02523 
02524 
02566 boolean
02567 get_back_trellis_proceed(int t, HTK_Param *param, RecogProcess *r, boolean final_for_multipath)
02568 {
02569   /* local static work area for get_back_trellis_proceed() */
02570   /* these are local work area and need not to be kept for another call */
02571   TRELLIS_ATOM *tre; 
02572   int node; 
02573   int lmtype, lmvar;
02574 
02575   WCHMM_INFO *wchmm;
02576   FSBeam *d;
02577   int j;
02578   TOKEN2  *tk;
02579 
02580   /* local copied variables */
02581   int tn, tl;
02582 
02583   /* store pointer to local for rapid access */
02584   wchmm = r->wchmm;
02585   d = &(r->pass1);
02586   
02587 
02588   lmtype = r->lmtype;
02589   lmvar  = r->lmvar;
02590 
02591   /*********************/
02592   /* 1. 初期化         */
02593   /*    initialization */
02594   /*********************/
02595 
02596   /* tl と tn を入れ替えて作業領域を切り替え */
02597   /* tl (= 直前の tn) は直前フレームの結果を持つ */
02598   /* swap tl and tn to switch work buffer */
02599   /* tl (= last tn) holds result of the previous frame */
02600   d->tl = d->tn;
02601   if (d->tn == 0) d->tn = 1; else d->tn = 0;
02602   
02603   /* store locally for rapid access */
02604   tl = d->tl;
02605   tn = d->tn;
02606 
02607 #ifdef UNIGRAM_FACTORING
02608   /* 1-gram factoring では単語先頭での言語確率が一定で直前単語に依存しない
02609      ため,単語間 Viterbi において選ばれる直前単語は,次単語によらず共通である. 
02610      よって単語終端からfactoring値のある単語先頭への遷移は1つにまとめられる. 
02611      ただし,木から独立した単語については, 単語先頭で履歴に依存した2-gramが
02612      与えられるため, 最尤の単語間 Viterbi パスは次単語ごとに異なる. 
02613      よってそれらについてはまとめずに別に計算する */
02614   /* In 1-gram factoring, the language score on the word-head node is constant
02615      and independent of the previous word.  So, the same word hypothesis will
02616      be selected as the best previous word at the inter-word Viterbi
02617      processing.  So, in inter-word processing, we can (1) select only
02618      the best word-end hypothesis, and then (2) process viterbi from the node
02619      to each word-head node.  On the other hand, the isolated words,
02620      i.e. words not sharing any node with other word, has unique word-head
02621      node and the true 2-gram language score is determined at the top node.
02622      In such case the best word hypothesis prior to each node will differ
02623      according to the language scores.  So we have to deal such words separately. */
02624   /* initialize max value to delect best word-end hypothesis */
02625   if (lmtype == LM_PROB) {
02626     d->wordend_best_score = LOG_ZERO;
02627   }
02628 #endif
02629 
02630 #ifdef DEBUG
02631   /* debug */
02632   /* node_check_token(d, tl); */
02633 #endif
02634 
02635   /* トークンバッファを初期化: 直前フレームで使われた部分だけクリアすればよい */
02636   /* initialize token buffer: for speedup, only ones used in the last call will be cleared */
02637   clear_tokens(d, tl);
02638 
02639   /**************************/
02640   /* 2. Viterbi計算         */
02641   /*    Viterbi computation */
02642   /**************************/
02643   /* 直前フレームからこのフレームへの Viterbi 計算を行なう */
02644   /* tindex[tl][n_start..n_end] に直前フレーム上位ノードのIDが格納されている */
02645   /* do one viterbi computation from last frame to this frame */
02646   /* tindex[tl][n_start..n_end] holds IDs of survived nodes in last frame */
02647 
02648   if (wchmm->hmminfo->multipath) {
02649     /*********************************/
02650     /* MULTIPATH MODE */
02651     /*********************************/
02652 
02653     for (j = d->n_start; j <= d->n_end; j++) {
02654       /* tk: 対象トークン  node: そのトークンを持つ木構造化辞書ノードID */
02655       /* tk: token data  node: lexicon tree node ID that holds the 'tk' */
02656       tk = &(d->tlist[tl][d->tindex[tl][j]]);
02657       if (tk->score <= LOG_ZERO) continue; /* invalid node */
02658       node = tk->node;
02659       /*********************************/
02660       /* 2.1. 単語内遷移               */
02661       /*      word-internal transition */
02662       /*********************************/
02663       beam_intra_word(wchmm, d, &tk, j);
02664     }
02665     /*******************************************************/
02666     /* 2.2. スコアでトークンをソートしビーム幅分の上位を決定 */
02667     /*    sort tokens by score up to beam width            */
02668     /*******************************************************/
02669     sort_token_no_order(d, r->trellis_beam_width, &(d->n_start), &(d->n_end));
02670   
02671     /*************************/
02672     /* 2.3. 単語間Viterbi計算  */
02673     /*    cross-word viterbi */
02674     /*************************/
02675     for(j = d->n_start; j <= d->n_end; j++) {
02676       tk = &(d->tlist[tn][d->tindex[tn][j]]);
02677       node = tk->node;
02678       
02679       /* 遷移元ノードが単語終端ならば */
02680       /* if source node is end state of a word, */
02681       if (wchmm->stend[node] != WORD_INVALID) {
02682 
02683         /**************************/
02684         /* 2.4. トレリス単語保存  */
02685         /*      save trellis word */
02686         /**************************/
02687 #ifdef SPSEGMENT_NAIST
02688         if (r->config->successive.enabled && !d->after_trigger) {
02689           tre = tk->last_tre;   /* dummy */
02690         } else {
02691           tre = save_trellis(r->backtrellis, wchmm, tk, t, final_for_multipath);
02692         }
02693 #else
02694         tre = save_trellis(r->backtrellis, wchmm, tk, t, final_for_multipath);
02695 #endif
02696         /* 最終フレームであればここまで:遷移はさせない */
02697         /* If this is a final frame, does not do cross-word transition */
02698         if (final_for_multipath) continue;
02699         /* 単語認識モードでは単語間遷移は必要ない */
02700         if (lmvar == LM_DFA_WORD) continue;
02701 
02702         /******************************/
02703         /* 2.5. 単語間遷移            */
02704         /*      cross-word transition */
02705         /******************************/
02706 
02707 #ifdef UNIGRAM_FACTORING
02708         /* ここで処理されるのは isolated words のみ,
02709            shared nodes はまとめてこのループの外で計算する */
02710         /* Only the isolated words will be processed here.
02711            The shared nodes with constant factoring values will be computed
02712            after this loop */
02713 #endif
02714         beam_inter_word(wchmm, d, &tk, tre, j);
02715 
02716       } /* end of cross-word processing */
02717     
02718     } /* end of main viterbi loop */
02719 
02720 
02721 
02722   } else {
02723 
02724     /*********************************/
02725     /* NORMAL MODE */
02726     /*********************************/
02727 
02728     for (j = d->n_start; j <= d->n_end; j++) {
02729       /* tk: 対象トークン  node: そのトークンを持つ木構造化辞書ノードID */
02730       /* tk: token data  node: lexicon tree node ID that holds the 'tk' */
02731       tk = &(d->tlist[tl][d->tindex[tl][j]]);
02732       if (tk->score <= LOG_ZERO) continue; /* invalid node */
02733       node = tk->node;
02734       
02735       /*********************************/
02736       /* 2.1. 単語内遷移               */
02737       /*      word-internal transition */
02738       /*********************************/
02739       beam_intra_word(wchmm, d, &tk, j);
02740 
02741       /* 遷移元ノードが単語終端ならば */
02742       /* if source node is end state of a word, */
02743       if (wchmm->stend[node] != WORD_INVALID) {
02744         
02745         /**************************/
02746         /* 2.2. トレリス単語保存  */
02747         /*      save trellis word */
02748         /**************************/
02749 #ifdef SPSEGMENT_NAIST
02750         if (r->config->successive.enabled && !d->after_trigger) {
02751           tre = tk->last_tre;   /* dummy */
02752         } else {
02753           tre = save_trellis(r->backtrellis, wchmm, tk, t, final_for_multipath);
02754         }
02755 #else
02756         tre = save_trellis(r->backtrellis, wchmm, tk, t, final_for_multipath);
02757 #endif
02758         /* 単語認識モードでは単語間遷移は必要ない */
02759         if (lmvar == LM_DFA_WORD) continue;
02760 
02761         /******************************/
02762         /* 2.3. 単語間遷移            */
02763         /*      cross-word transition */
02764         /******************************/
02765         
02766 #ifdef UNIGRAM_FACTORING
02767         /* ここで処理されるのは isolated words のみ,
02768            shared nodes はまとめてこのループの外で計算する */
02769         /* Only the isolated words will be processed here.
02770            The shared nodes with constant factoring values will be computed
02771            after this loop */
02772 #endif
02773 
02774         beam_inter_word(wchmm, d, &tk, tre, j);
02775 
02776       } /* end of cross-word processing */
02777       
02778     } /* end of main viterbi loop */
02779 
02780 
02781   }
02782 
02783 #ifdef UNIGRAM_FACTORING
02784 
02785   if (lmtype == LM_PROB) {
02786 
02787     /***********************************************************/
02788     /* 2.x 単語終端からfactoring付き単語先頭への遷移 ***********/
02789     /*    transition from wordend to shared (factorized) nodes */
02790     /***********************************************************/
02791     /* d->wordend_best_* holds the best word ends at this frame. */
02792     if (d->wordend_best_score > LOG_ZERO) {
02793       beam_inter_word_factoring(wchmm, d);
02794     }
02795   }
02796 #endif /* UNIGRAM_FACTORING */
02797 
02798   /***************************************/
02799   /* 3. 状態の出力確率計算               */
02800   /*    compute state output probability */
02801   /***************************************/
02802 
02803   /* 次段の有効ノードについて出力確率を計算してスコアに加える */
02804   /* compute outprob for new valid (token assigned) nodes and add to score */
02805   /* 今扱っているのが入力の最終フレームの場合出力確率は計算しない */
02806   /* don't calculate the last frame (transition only) */
02807 
02808   if (wchmm->hmminfo->multipath) {
02809     if (! final_for_multipath) {
02810       for (j = 0; j < d->tnum[tn]; j++) {
02811         tk = &(d->tlist[tn][d->tindex[tn][j]]);
02812         /* skip non-output state */
02813         if (wchmm->state[tk->node].out.state == NULL) continue;
02814         tk->score += outprob_style(wchmm, tk->node, tk->last_tre->wid, t, param);
02815       }
02816     }
02817   } else {
02818     for (j = 0; j < d->tnum[tn]; j++) {
02819       tk = &(d->tlist[tn][d->tindex[tn][j]]);
02820       tk->score += outprob_style(wchmm, tk->node, tk->last_tre->wid, t, param);
02821     }
02822   }
02823 
02824   /*******************************************************/
02825   /* 4. スコアでトークンをソートしビーム幅分の上位を決定 */
02826   /*    sort tokens by score up to beam width            */
02827   /*******************************************************/
02828 
02829   /* tlist[tl]を次段のためにリセット */
02830   clear_tlist(d, tl);
02831 
02832   /* ヒープソートを用いてこの段のノード集合から上位(bwidth)個を得ておく */
02833   /* (上位内の順列は必要ない) */
02834   sort_token_no_order(d, r->trellis_beam_width, &(d->n_start), &(d->n_end));
02835   /***************/
02836   /* 5. 終了処理 */
02837   /*    finalize */
02838   /***************/
02839 
02840 #ifdef SPSEGMENT_NAIST
02841   if (!r->config->successive.enabled || d->after_trigger) {
02842 #endif
02843 
02844     /* call frame-wise callback */
02845     r->have_interim = FALSE;
02846     if (t > 0) {
02847       if (r->config->output.progout_flag) {
02848         /* 漸次出力: 現フレームのベストパスを一定時間おきに上書き出力 */
02849         /* progressive result output: output current best path in certain time interval */
02850         if (((t-1) % r->config->output.progout_interval_frame) == 0) {
02851           r->have_interim = TRUE;
02852           bt_current_max(r, t-1);
02853         }
02854       }
02855     }
02856     
02857     /* jlog("DEBUG: %d: %d\n",t,tnum[tn]); */
02858     /* for debug: output current max word */
02859     if (debug2_flag) {
02860       bt_current_max_word(r, t-1);
02861     }
02862 
02863 #ifdef DETERMINE
02864     if (lmvar == LM_DFA_WORD) {
02865       check_determine_word(r, t-1);
02866     }
02867 #endif
02868 
02869 #ifdef SPSEGMENT_NAIST
02870   }
02871 #endif
02872     
02873   /* ビーム内ノード数が 0 になってしまったら,強制終了 */
02874   if (d->tnum[tn] == 0) {
02875     jlog("ERROR: get_back_trellis_proceed: %02d %s: frame %d: no nodes left in beam, now terminates search\n", r->config->id, r->config->name, t);
02876     return(FALSE);
02877   }
02878 
02879   return(TRUE);
02880     
02881 }
02882 
02883 /*************************************************/
02884 /* frame synchronous beam search --- last frame  */
02885 /* フレーム同期ビーム探索の実行 --- 最終フレーム */
02886 /*************************************************/
02887 
02913 void
02914 get_back_trellis_end(HTK_Param *param, RecogProcess *r)
02915 {
02916   WCHMM_INFO *wchmm;
02917   FSBeam *d;
02918   int j;
02919   TOKEN2 *tk;
02920 
02921   wchmm = r->wchmm;
02922   d = &(r->pass1);
02923 
02924   /* 最後にビーム内に残った単語終端トークンを処理する */
02925   /* process the last wordend tokens */
02926 
02927 
02928   if (r->am->hmminfo->multipath) {
02929     /* MULTI-PATH VERSION */
02930 
02931     /* 単語末ノードへの遷移のみ計算 */
02932     /* only arcs to word-end node is calculated */
02933     get_back_trellis_proceed(param->samplenum, param, r, TRUE);
02934 
02935   } else {
02936     /* NORMAL VERSION */
02937 
02938     /* 最後の遷移のあとの単語終端処理を行う */
02939     /* process the word-ends at the last frame */
02940     d->tl = d->tn;
02941     if (d->tn == 0) d->tn = 1; else d->tn = 0;
02942     for (j = d->n_start; j <= d->n_end; j++) {
02943       tk = &(d->tlist[d->tl][d->tindex[d->tl][j]]);
02944       if (wchmm->stend[tk->node] != WORD_INVALID) {
02945         save_trellis(r->backtrellis, wchmm, tk, param->samplenum, TRUE);
02946       }
02947     }
02948 
02949   }
02950     
02951 }
02952 
02953 /*************************/
02954 /* 探索終了 --- 終了処理 */
02955 /* end of search         */
02956 /*************************/
02991 void
02992 finalize_1st_pass(RecogProcess *r, int len)
02993 {
02994   BACKTRELLIS *backtrellis;
02995 
02996   backtrellis = r->backtrellis;
02997  
02998   backtrellis->framelen = len;
02999 
03000   /* 単語トレリス(backtrellis) を整理: トレリス単語の再配置とソート */
03001   /* re-arrange backtrellis: index them by frame, and sort by word ID */
03002 
03003   bt_relocate_rw(backtrellis);
03004   bt_sort_rw(backtrellis);
03005   if (backtrellis->num == NULL) {
03006     if (backtrellis->framelen > 0) {
03007       jlog("WARNING: %02d %s: input processed, but no survived word found\n", r->config->id, r->config->name);
03008     }
03009     /* reognition failed */
03010     r->result.status = J_RESULT_STATUS_FAIL;
03011     return;
03012   }
03013 
03014   /* 第1パスのベストパスを結果に格納する */
03015   /* store 1st pass result (best hypothesis) to result */
03016   if (r->lmvar == LM_DFA_WORD) {
03017     find_1pass_result_word(len, r);
03018   } else {
03019     find_1pass_result(len, r);
03020   }
03021 }
03022 
03037 void
03038 fsbeam_free(FSBeam *d)
03039 {
03040   free_nodes(d);
03041   if (d->pausemodelnames != NULL) {
03042     free(d->pausemodelnames);
03043     free(d->pausemodel);
03044   }
03045 }
03046 
03047 /* end of file */

Generated on Tue Dec 18 15:59:51 2007 for Julius by  doxygen 1.5.4