libjulius/src/beam.c

説明を見る。
00001 
00048 /*
00049  * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University
00050  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00051  * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
00052  * All rights reserved
00053  */
00054 
00055 #include <julius/julius.h>
00056 
00057 #undef DEBUG
00058 
00059 
00060 /* ---------------------------------------------------------- */
00061 /*                     第1パスの結果処理                     */
00062 /*              end procedure to get result of 1st pass       */
00063 /* ---------------------------------------------------------- */
00064 
00065 #ifdef WORD_GRAPH
00066 
00096 static void
00097 generate_lattice(int frame, RecogProcess *r)
00098 {
00099   BACKTRELLIS *bt;
00100   WORD_INFO *winfo;
00101   TRELLIS_ATOM *ta;
00102   int i, j;
00103   LOGPROB l;
00104   WordGraph *new;
00105 
00106   bt = r->backtrellis;
00107   winfo = r->lm->winfo;
00108 
00109   if (frame >= 0) {
00110     for (i=0;i<bt->num[frame];i++) {
00111       ta = bt->rw[frame][i];
00112       /* words will be saved as a part of graph only if any of its
00113          following word has been survived in a beam */
00114       if (! ta->within_context) continue; /* not a candidate */
00115       if (ta->within_wordgraph) continue; /* already marked */
00116       /* mark  */
00117       ta->within_wordgraph = TRUE;
00118 
00119       new = (WordGraph *)mymalloc(sizeof(WordGraph));
00120       new->wid = ta->wid;
00121       new->lefttime = ta->begintime;
00122       new->righttime = ta->endtime;
00123       new->fscore_head = ta->backscore;
00124       new->fscore_tail = 0.0;
00125       new->gscore_head = 0.0;
00126       new->gscore_tail = 0.0;
00127       new->lscore_tmp = ta->lscore;
00128 #ifdef CM_SEARCH
00129       new->cmscore = 0.0;
00130 #endif
00131       new->forward_score = new->backward_score = 0.0;
00132       new->headphone = winfo->wseq[ta->wid][0];
00133       new->tailphone = winfo->wseq[ta->wid][winfo->wlen[ta->wid]-1];
00134 
00135       new->leftwordmaxnum = FANOUTSTEP;
00136       new->leftword = (WordGraph **)mymalloc(sizeof(WordGraph *) * new->leftwordmaxnum);
00137       new->left_lscore = (LOGPROB *)mymalloc(sizeof(LOGPROB) * new->leftwordmaxnum);
00138       new->leftwordnum = 0;
00139       new->rightwordmaxnum = FANOUTSTEP;
00140       new->rightword = (WordGraph **)mymalloc(sizeof(WordGraph *) * new->rightwordmaxnum);
00141       new->right_lscore = (LOGPROB *)mymalloc(sizeof(LOGPROB) * new->rightwordmaxnum);
00142       new->rightwordnum = 0;
00143 
00144       l = ta->backscore;
00145       if (ta->last_tre->wid != WORD_INVALID) {
00146         l -= ta->last_tre->backscore;
00147       }
00148       l -= ta->lscore;
00149       new->amavg = l / (float)(ta->endtime - ta->begintime + 1);
00150 
00151 #ifdef GRAPHOUT_DYNAMIC
00152       new->purged = FALSE;
00153 #endif
00154       new->saved = FALSE;
00155       new->graph_cm = 0.0;
00156       new->mark = FALSE;
00157 
00158       new->next = r->result.wg1;
00159       r->result.wg1 = new;
00160 
00161       /* recursive call */
00162       generate_lattice(ta->last_tre->endtime, r);
00163     }
00164   }
00165 }
00166 
00183 static void
00184 link_lattice_by_time(WordGraph *root)
00185 {
00186   WordGraph *wg;
00187   WordGraph *wtmp;
00188   int lefttime, righttime;
00189   
00190   for(wg=root;wg;wg=wg->next) {
00191     
00192     for(wtmp=root;wtmp;wtmp=wtmp->next) {
00193       if (wg->righttime + 1 == wtmp->lefttime) {
00194         wordgraph_check_and_add_leftword(wtmp, wg, wtmp->lscore_tmp);
00195         wordgraph_check_and_add_rightword(wg, wtmp, wtmp->lscore_tmp);
00196       }
00197       if (wtmp->righttime + 1 == wg->lefttime) {
00198         wordgraph_check_and_add_leftword(wg, wtmp, wg->lscore_tmp);
00199         wordgraph_check_and_add_rightword(wtmp, wg, wg->lscore_tmp);
00200       }
00201     }
00202   }
00203 
00204 }
00205 
00219 static void
00220 re_compute_lattice_lm(WordGraph *root, WCHMM_INFO *wchmm)
00221 {
00222   int i;
00223   
00224   for(wg=root;wg;wg=wg->next) {
00225     for(i=0;i<wg->leftwordnum;i++) {
00226       wg->left_lscore[i] = (*(wchmm->ngram->bigram_prob))(wchmm->ngram, wchmm->winfo->wton[wg->leftword[i]->wid], wchmm->winfo->wton[wg->wid]);
00227     }
00228     for(i=0;i<wg->rightwordnum;i++) {
00229       wg->right_lscore[i] = (*(wchmm->ngram->bigram_prob))(wchmm->ngram, wchmm->winfo->wton[wg->wid], wchmm->winfo->wton[wg->rightword[i]->wid]);
00230     }
00231   }
00232 }
00233 
00234 #endif
00235 
00250 static void
00251 put_atom(TRELLIS_ATOM *atom, WORD_INFO *winfo)
00252 {
00253   int i;
00254   jlog("DEBUG: %3d,%3d %f %16s (id=%5d)", atom->begintime, atom->endtime,
00255        atom->backscore, winfo->wname[atom->wid], atom->wid);
00256   for (i=0;i<winfo->wlen[atom->wid]; i++) {
00257     jlog(" %s",winfo->wseq[atom->wid][i]->name);
00258   }
00259   jlog("\n");
00260 }
00261 
00292 static LOGPROB
00293 trace_backptr(WORD_ID wordseq_rt[MAXSEQNUM], int *rt_wordlen, TRELLIS_ATOM *atom, WORD_INFO *winfo)
00294 {
00295   int wordlen = 0;              /* word length of best sentence hypothesis */
00296   TRELLIS_ATOM *tretmp;
00297   LOGPROB langscore = 0.0;
00298   WORD_ID wordseq[MAXSEQNUM];   /* temporal: in reverse order */
00299   int i;
00300 
00301   /* initialize */
00302   wordseq[0] = atom->wid;       /* start from specified atom */
00303   wordlen = 1;
00304   tretmp = atom;
00305   langscore += tretmp->lscore;
00306   if (debug2_flag) {
00307     put_atom(tretmp, winfo);
00308   }
00309   
00310   /* trace the backtrellis */
00311   while (tretmp->begintime > 0) {/* until beginning of input */
00312     tretmp = tretmp->last_tre;
00313 /*    t = tretmp->boundtime - 1;
00314     tretmp = bt_binsearch_atom(backtrellis, tretmp->boundtime-1, tretmp->last_wid);*/
00315     if (tretmp == NULL) {       /* should not happen */
00316       j_internal_error("trace_backptr: last trellis missing while backtracking");
00317     }
00318     langscore += tretmp->lscore;
00319     wordseq[wordlen] = tretmp->wid;
00320     wordlen++;
00321     if (debug2_flag) {
00322       put_atom(tretmp, winfo);
00323     }
00324     if (wordlen >= MAXSEQNUM) {
00325       j_internal_error("trace_backptr: sentence length exceeded ( > %d)\n",MAXSEQNUM);
00326     }
00327   }
00328   *rt_wordlen = wordlen;
00329   /* reverse order -> normal order */
00330   for(i=0;i<wordlen;i++) wordseq_rt[i] = wordseq[wordlen-i-1];
00331   return(langscore);
00332 }
00333 
00370 static void
00371 find_1pass_result(int framelen, RecogProcess *r)
00372 {
00373   BACKTRELLIS *backtrellis;
00374   WORD_INFO *winfo;
00375   WORD_ID wordseq[MAXSEQNUM];
00376   int wordlen;
00377   int i;
00378   TRELLIS_ATOM *best;
00379   int last_time;
00380   LOGPROB total_lscore;
00381   LOGPROB maxscore;
00382   TRELLIS_ATOM *tmp;
00383 #ifdef SPSEGMENT_NAIST
00384   boolean ok_p;
00385 #endif
00386 
00387   backtrellis = r->backtrellis;
00388   winfo = r->lm->winfo;
00389 
00390   /* look for the last trellis word */
00391 
00392   if (r->lmtype == LM_PROB) {
00393 
00394     for (last_time = framelen - 1; last_time >= 0; last_time--) {
00395 
00396       maxscore = LOG_ZERO;
00397       for (i=0;i<backtrellis->num[last_time];i++) {
00398         tmp = backtrellis->rw[last_time][i];
00399 #ifdef WORD_GRAPH
00400         /* treat only words on a graph path */
00401         if (!tmp->within_context) continue;
00402 #endif
00403         if (r->config->successive.enabled) {
00404           /* short-pause segmentation mode */
00405           /* 最終フレームに残った最大スコアの単語 */
00406           /* it should be the best trellis word on the last frame */
00407           if (maxscore < tmp->backscore) {
00408             maxscore = tmp->backscore;
00409             best = tmp;
00410           }
00411         } else {
00412           /* not segmentation mode */
00413           /* 最終単語は winfo->tail_silwid に固定 */
00414           /* it is fixed to the tail silence model (winfo->tail_silwid) */
00415           if (tmp->wid == winfo->tail_silwid && maxscore < tmp->backscore) {
00416             maxscore = tmp->backscore;
00417             best = tmp;
00418             break;
00419           }
00420         }
00421       }
00422       if (maxscore != LOG_ZERO) break;
00423     }
00424 
00425     if (last_time < 0) {                /* not found */
00426       jlog("WARNING: %02d %s: no tail silence word survived on the last frame, search failed\n", r->config->id, r->config->name);
00427       r->result.status = J_RESULT_STATUS_FAIL;
00428       //callback_exec(CALLBACK_RESULT, r);
00429       return;
00430     }
00431   
00432   }
00433 
00434   if (r->lmtype == LM_DFA) {
00435 
00436     for (last_time = framelen - 1; last_time >= 0; last_time--) {
00437 
00438       /* 末尾に残った単語の中で最大スコアの単語(cp_endは使用しない) */
00439       /* the best trellis word on the last frame (not use cp_end[]) */
00440       maxscore = LOG_ZERO;
00441       for (i=0;i<backtrellis->num[last_time];i++) {
00442         tmp = backtrellis->rw[last_time][i];
00443 #ifdef WORD_GRAPH
00444         /* treat only words on a graph path */
00445         if (!tmp->within_context) continue;
00446 #endif
00447         /*      if (dfa->cp_end[winfo->wton[tmp->wid]] == TRUE) {*/
00448         if (maxscore < tmp->backscore) {
00449           maxscore = tmp->backscore;
00450           best = tmp;
00451         }
00452         /*      }*/
00453       }
00454       if (maxscore != LOG_ZERO) break;
00455     }
00456 
00457     if (last_time < 0) {                /* not found */
00458       jlog("WARNING: %02d %s: no sentence-end word survived on last beam\n", r->config->id, r->config->name);
00459       r->result.status = J_RESULT_STATUS_FAIL;
00460       //callback_exec(CALLBACK_RESULT, r);
00461       return;
00462     }
00463   
00464   }
00465 
00466   /* traceback word trellis from the best word */
00467   total_lscore = trace_backptr(wordseq, &wordlen, best, r->lm->winfo);
00468 #ifdef SPSEGMENT_NAIST
00469   if (r->config->successive.enabled) {
00470     /* on segmentation mode, recognition result that only consists of
00471        short-pause words will be treated as recognition rejection */
00472     ok_p = FALSE;
00473     for(i=0;i<wordlen;i++) {
00474       if (! is_sil(wordseq[i], r)) ok_p = TRUE;
00475     }
00476     if (ok_p == FALSE) {
00477       r->result.status = J_RESULT_STATUS_ONLY_SILENCE;
00478       return;
00479     }
00480   }
00481 #endif
00482 
00483   /* just flush last progress output */
00484   /*
00485   if (recog->jconf->output.progout_flag) {
00486     recog->result.status = 1;
00487     recog->result.num_frame = last_time;
00488     recog->result.pass1.word = wordseq;
00489     recog->result.pass1.word_num = wordlen;
00490     recog->result.pass1.score = best->backscore;
00491     recog->result.pass1.score_lm = total_lscore;
00492     recog->result.pass1.score_am = best->backscore - total_lscore;
00493     //callback_exec(CALLBACK_RESULT_PASS1_INTERIM, recog);
00494     }*/
00495 
00496   /* output 1st pass result */    
00497   if (verbose_flag || ! r->config->output.progout_flag) {
00498     r->result.status = J_RESULT_STATUS_SUCCESS;
00499     r->result.num_frame = framelen;
00500     for(i=0;i<wordlen;i++) r->result.pass1.word[i] = wordseq[i];
00501     r->result.pass1.word_num = wordlen;
00502     r->result.pass1.score = best->backscore;
00503     r->result.pass1.score_lm = total_lscore;
00504     r->result.pass1.score_am = best->backscore - total_lscore;
00505     //callback_exec(CALLBACK_RESULT_PASS1, r);
00506   }
00507 
00508   /* store the result to global val (notice: in reverse order) */
00509   for(i=0;i<wordlen;i++) r->pass1_wseq[i] = wordseq[i];
00510   r->pass1_wnum = wordlen;
00511   r->pass1_score = best->backscore;
00512 
00513 #ifdef WORD_GRAPH
00514   /* 単語トレリスから,ラティスを生成する */
00515   /* generate word graph from the word trellis */
00516   r->peseqlen = backtrellis->framelen;
00517   r->result.wg1 = NULL;
00518   generate_lattice(last_time, r);
00519   link_lattice_by_time(r->result.wg1);
00520   if (r->lmtype == LM_PROB) re_compute_lattice_lm(r->result.wg1, r->wchmm);
00521   r->result.wg1_num = wordgraph_sort_and_annotate_id(&(r->result.wg1), r);
00522   /* compute graph CM by forward-backward processing */
00523   graph_forward_backward(r->result.wg1, r);
00524   //callback_exec(CALLBACK_RESULT_PASS1_GRAPH, r);
00525   //wordgraph_clean(&(r->result.wg1));
00526 #endif
00527 
00528 }
00529 
00548 static int
00549 compare_backscore(TRELLIS_ATOM **x1, TRELLIS_ATOM **x2)
00550 {
00551   return((*x2)->backscore - (*x1)->backscore);
00552 }
00553 
00573 static void
00574 find_1pass_result_word(int framelen, RecogProcess *r)
00575 {
00576   BACKTRELLIS *bt;
00577   TRELLIS_ATOM *best, *tmp;
00578   int last_time;
00579   Sentence *s;
00580 #ifdef CONFIDENCE_MEASURE
00581   LOGPROB sum;
00582 #endif
00583   LOGPROB maxscore;
00584   int i;
00585   TRELLIS_ATOM **idx;
00586   int num;
00587 
00588   if (r->lmvar != LM_DFA_WORD) return;
00589 
00590   bt = r->backtrellis;
00591   for (last_time = framelen - 1; last_time >= 0; last_time--) {
00592     maxscore = LOG_ZERO;
00593     for (i=0;i<bt->num[last_time];i++) {
00594       tmp = bt->rw[last_time][i];
00595 #ifdef WORD_GRAPH
00596       /* treat only words on a graph path */
00597       if (!tmp->within_context) continue;
00598 #endif
00599       if (maxscore < tmp->backscore) {
00600         maxscore = tmp->backscore;
00601         best = tmp;
00602       }
00603     }
00604     if (maxscore != LOG_ZERO) break;
00605   }
00606 
00607   if (last_time < 0) {          /* not found */
00608     jlog("WARNING: %02d %s: no word survived on the last frame, search failed\n", r->config->id, r->config->name);
00609     r->result.status = J_RESULT_STATUS_FAIL;
00610     //callback_exec(CALLBACK_RESULT, r);
00611     return;
00612   }
00613 
00614 #ifdef CONFIDENCE_MEASURE
00615   sum = 0.0;
00616   for (i=0;i<bt->num[last_time];i++) {
00617     tmp = bt->rw[last_time][i];
00618 #ifdef WORD_GRAPH
00619     /* treat only words on a graph path */
00620     if (!tmp->within_context) continue;
00621 #endif
00622     sum += pow(10, r->config->annotate.cm_alpha * (tmp->backscore - maxscore));
00623   }
00624 #endif
00625 
00626   /* set recognition result status to normal */
00627   r->result.status = J_RESULT_STATUS_SUCCESS;
00628 
00629   if (r->config->output.output_hypo_maxnum > 1) {
00630     /* more than one candidate is requested */
00631 
00632     /* get actual number of candidates to output */
00633     num = r->config->output.output_hypo_maxnum;
00634     if (num > bt->num[last_time]) {
00635       num = bt->num[last_time];
00636     }
00637 
00638     /* prepare result storage */
00639     result_sentence_malloc(r, num);
00640     r->result.sentnum = num;
00641 
00642     /* sort by score */
00643     idx = (TRELLIS_ATOM **)mymalloc(sizeof(TRELLIS_ATOM *)*bt->num[last_time]);
00644     for (i=0;i<bt->num[last_time];i++) {
00645       idx[i] = bt->rw[last_time][i];
00646     }
00647     qsort(idx, bt->num[last_time], sizeof(TRELLIS_ATOM *),
00648           (int (*)(const void *,const void *))compare_backscore);
00649     
00650     /* store to result storage */
00651     for(i=0;i<r->result.sentnum;i++) {
00652       s = &(r->result.sent[i]);
00653       tmp = idx[i];
00654       s->word_num = 1;
00655       s->word[0] = tmp->wid;
00656 #ifdef CONFIDENCE_MEASURE
00657       s->confidence[0] = pow(10, r->config->annotate.cm_alpha * (tmp->backscore - maxscore)) / sum;
00658 #endif
00659       s->score = tmp->backscore;
00660       s->score_lm = 0.0;
00661       s->score_am = tmp->backscore;
00662       if (multigram_get_all_num(r->lm) > 0) {
00663         s->gram_id = multigram_get_gram_from_wid(s->word[0], r->lm);
00664       } else {
00665         s->gram_id = 0;
00666       }
00667     }
00668     /* free work area for sort */
00669     free(idx);
00670 
00671   } else {                      /* only max is needed */
00672 
00673     /* prepare result storage */
00674     result_sentence_malloc(r, 1);
00675     r->result.sentnum = 1;
00676     s = &(r->result.sent[0]);
00677     s->word_num = 1;
00678     s->word[0] = best->wid;
00679 #ifdef CONFIDENCE_MEASURE
00680     s->confidence[0] = 1.0 / sum;
00681 #endif
00682     s->score = best->backscore;
00683     s->score_lm = 0.0;
00684     s->score_am = best->backscore;
00685     if (multigram_get_all_num(r->lm) > 0) {
00686       s->gram_id = multigram_get_gram_from_wid(s->word[0], r->lm);
00687     } else {
00688       s->gram_id = 0;
00689     }
00690   }
00691 
00692   /* copy as 1st pass result */
00693   memcpy(&(r->result.pass1), &(r->result.sent[0]), sizeof(Sentence));
00694   r->result.pass1.align = NULL;
00695 
00696   //callback_exec(CALLBACK_RESULT, r);
00697   //free(r->result.sent);
00698 }
00699 
00700 
00701 #ifdef DETERMINE
00702 
00730 static TRELLIS_ATOM *
00731 determine_word(RecogProcess *r, int t, TRELLIS_ATOM *tremax, LOGPROB thres, int countthres)
00732 {
00733   TRELLIS_ATOM *ret;
00734   WORD_ID w;
00735 
00736   //LOGPROB sum;
00737   //LOGPROB cm;
00738 
00739   int j;
00740   FSBeam *d;
00741   TOKEN2 *tk;
00742     
00743   if (tremax == NULL) {
00744     /* initialize */
00745     r->determine_count = 0;
00746     r->determine_maxnodescore = LOG_ZERO;
00747     r->determined = FALSE;
00748     r->determine_last_wid = WORD_INVALID;
00749     r->have_determine = FALSE;
00750     return NULL;
00751   }
00752 
00753   ret = NULL;
00754 
00755   /* get confidence score of the maximum word hypothesis */
00756 /* 
00757  *   sum = 0.0;
00758  *   tre = recog->backtrellis->list;
00759  *   while (tre != NULL && tre->endtime == t) {
00760  *     sum += pow(10, recog->jconf->annotate.cm_alpha * (tre->backscore - tremax->backscore));
00761  *     tre = tre->next;
00762  *   }
00763  *   cm = 1.0 / sum;
00764  */
00765 
00766   /* determinization decision */
00767   w = tremax->wid;
00768 
00769   r->have_determine = FALSE;
00770 
00771   /* determine by score threshold from maximum node score to maximum word end node score */
00772   if (r->determine_last_wid == w && r->determine_maxnodescore - tremax->backscore <= thres) {
00773     r->determine_count++;
00774     if (r->determine_count > countthres) {
00775       if (r->determined == FALSE) {
00776         ret = tremax;
00777         r->determined = TRUE;
00778         r->have_determine = TRUE;
00779       }
00780     }
00781   } else {
00782     r->determine_count = 0;
00783   }
00784 
00785   //printf("determine: %d: %s: cm=%f, relscore=%f, count=%d, phase=%d\n", t, recog->model->winfo->woutput[w], cm, determine_maxnodescore - tremax->backscore, count, phase);
00786   r->determine_last_wid = w;
00787 
00788   /* update maximum node score here for next call, since
00789      the word path determination is always one frame later */
00790   d = &(r->pass1);
00791   r->determine_maxnodescore = LOG_ZERO;
00792   for (j = d->n_start; j <= d->n_end; j++) {
00793     tk = &(d->tlist[d->tn][d->tindex[d->tn][j]]);
00794     if (r->determine_maxnodescore < tk->score) r->determine_maxnodescore = tk->score;
00795   }
00796 
00797   return(ret);
00798 }
00799 
00819 static void
00820 check_determine_word(RecogProcess *r, int t)
00821 {
00822   TRELLIS_ATOM *tre;
00823   TRELLIS_ATOM *tremax;
00824   LOGPROB maxscore;
00825 
00826   /* bt->list is ordered by time frame */
00827   maxscore = LOG_ZERO;
00828   tremax = NULL;
00829   tre = r->backtrellis->list;
00830   while (tre != NULL && tre->endtime == t) {
00831     if (maxscore < tre->backscore) {
00832       maxscore = tre->backscore;
00833       tremax = tre;
00834     }
00835     tre = tre->next;
00836   }
00837 
00838   r->result.status = J_RESULT_STATUS_SUCCESS;
00839   r->result.num_frame = t;
00840 
00841   if (maxscore != LOG_ZERO) {
00842     //    if ((tre = determine_word(recog, t, tremax, 0.9, 17)) != NULL) {
00843     if ((tre = determine_word(r, t, tremax, r->config->pass1.determine_score_thres, r->config->pass1.determine_duration_thres)) != NULL) {
00844       r->result.pass1.word[0] = tremax->wid;
00845       r->result.pass1.word_num = 1;
00846       r->result.pass1.score = tremax->backscore;
00847       r->result.pass1.score_lm = 0.0;
00848       r->result.pass1.score_am = tremax->backscore;
00849       r->result.num_frame = t;
00850       //callback_exec(CALLBACK_RESULT_PASS1_DETERMINED, r);
00851     }
00852   }
00853 
00854   
00855 }
00856 
00857 #endif /* DETERMINE */
00858 
00874 static void
00875 bt_current_max(RecogProcess *r, int t)
00876 {
00877   int wordlen;
00878   TRELLIS_ATOM *tre;
00879   TRELLIS_ATOM *tremax;
00880   LOGPROB maxscore;
00881   LOGPROB lscore;
00882 
00883   /* bt->list is ordered by time frame */
00884   maxscore = LOG_ZERO;
00885   tremax = NULL;
00886   tre = r->backtrellis->list;
00887   while (tre != NULL && tre->endtime == t) {
00888     if (maxscore < tre->backscore) {
00889       maxscore = tre->backscore;
00890       tremax = tre;
00891     }
00892     tre = tre->next;
00893   }
00894 
00895   r->result.status = J_RESULT_STATUS_SUCCESS;
00896   r->result.num_frame = t;
00897 
00898   if (maxscore == LOG_ZERO) {
00899     r->result.pass1.word_num = 0;
00900   } else {
00901     if (r->lmvar == LM_DFA_WORD) {
00902       r->result.pass1.word[0] = tremax->wid;
00903       r->result.pass1.word_num = 1;
00904       r->result.pass1.score = tremax->backscore;
00905       r->result.pass1.score_lm = 0.0;
00906       r->result.pass1.score_am = tremax->backscore;
00907     } else {
00908       lscore = trace_backptr(r->result.pass1.word, &wordlen, tremax, r->lm->winfo);
00909       r->result.pass1.word_num = wordlen;
00910       r->result.pass1.score = tremax->backscore;
00911       r->result.pass1.score_lm = lscore;
00912       r->result.pass1.score_am = tremax->backscore;
00913     }
00914   }
00915   //callback_exec(CALLBACK_RESULT_PASS1_INTERIM, r);
00916 }
00917 
00933 static void
00934 bt_current_max_word(RecogProcess *r, int t)
00935 {
00936 
00937   TRELLIS_ATOM *tre;
00938   TRELLIS_ATOM *tremax;
00939   LOGPROB maxscore;
00940   WORD_ID w;
00941 
00942   /* bt->list は時間順に格納されている */
00943   /* bt->list is order by time */
00944   maxscore = LOG_ZERO;
00945   tremax = NULL;
00946   tre = r->backtrellis->list;
00947   while (tre != NULL && tre->endtime == t) {
00948     if (maxscore < tre->backscore) {
00949       maxscore = tre->backscore;
00950       tremax = tre;
00951     }
00952     tre = tre->next;
00953   }
00954 
00955   if (maxscore != LOG_ZERO) {
00956     jlog("DEBUG: %3d: ",t);
00957     w = tremax->wid;
00958     jlog("\"%s [%s]\"(id=%d)",
00959          r->lm->winfo->wname[w], r->lm->winfo->woutput[w], w);
00960     jlog(" [%d-%d] %f", tremax->begintime, t, tremax->backscore);
00961     w = tremax->last_tre->wid;
00962     if (w != WORD_INVALID) {
00963       jlog(" <- \"%s [%s]\"(id=%d)\n",
00964                r->lm->winfo->wname[w], r->lm->winfo->woutput[w], w);
00965     } else {
00966       jlog(" <- bgn\n");
00967     }
00968   }
00969 }
00970 
00971 
00972 /* -------------------------------------------------------------------- */
00973 /*                 ビーム探索中のトークンを扱うサブ関数                 */
00974 /*                functions to handle hypothesis tokens                 */
00975 /* -------------------------------------------------------------------- */
00976 
00995 static void
00996 malloc_nodes(FSBeam *d, int n, int ntoken_init)
00997 {
00998   d->totalnodenum = n;
00999   d->token     = (TOKENID *)mymalloc(sizeof(TOKENID) * d->totalnodenum);
01000   //d->maxtnum = ntoken_init;
01001   if (d->maxtnum < ntoken_init) d->maxtnum = ntoken_init;
01002   d->tlist[0]  = (TOKEN2 *)mymalloc(sizeof(TOKEN2) * d->maxtnum);
01003   d->tlist[1]  = (TOKEN2 *)mymalloc(sizeof(TOKEN2) * d->maxtnum);
01004   d->tindex[0] = (TOKENID *)mymalloc(sizeof(TOKENID) * d->maxtnum);
01005   d->tindex[1] = (TOKENID *)mymalloc(sizeof(TOKENID) * d->maxtnum);
01006   //d->expand_step = ntoken_step;
01007   d->nodes_malloced = TRUE;
01008   d->expanded = FALSE;
01009 }
01010 
01023 static void
01024 expand_tlist(FSBeam *d)
01025 {
01026   d->maxtnum += d->expand_step;
01027   d->tlist[0]  = (TOKEN2 *)myrealloc(d->tlist[0],sizeof(TOKEN2) * d->maxtnum);
01028   d->tlist[1]  = (TOKEN2 *)myrealloc(d->tlist[1],sizeof(TOKEN2) * d->maxtnum);
01029   d->tindex[0] = (TOKENID *)myrealloc(d->tindex[0],sizeof(TOKENID) * d->maxtnum);
01030   d->tindex[1] = (TOKENID *)myrealloc(d->tindex[1],sizeof(TOKENID) * d->maxtnum);
01031   if (debug2_flag) jlog("STAT: token space expanded to %d\n", d->maxtnum);
01032   d->expanded = TRUE;
01033 }
01034 
01052 static void
01053 prepare_nodes(FSBeam *d, int ntoken_step)
01054 {
01055   d->tnum[0] = d->tnum[1] = 0;
01056   if (d->expand_step < ntoken_step) d->expand_step = ntoken_step;
01057 }
01058 
01073 static void
01074 free_nodes(FSBeam *d)
01075 {
01076   if (d->nodes_malloced) {
01077     free(d->token);
01078     free(d->tlist[0]);
01079     free(d->tlist[1]);
01080     free(d->tindex[0]);
01081     free(d->tindex[1]);
01082     d->nodes_malloced = FALSE;
01083   }
01084 }
01085 
01100 static void
01101 clear_tlist(FSBeam *d, int tt)
01102 {
01103   d->tnum[tt] = 0;
01104 }
01105 
01120 static void
01121 clear_tokens(FSBeam *d, int tt)
01122 {
01123   int j;
01124   /* initialize active token list: only clear ones used in the last call */
01125   for (j=0; j<d->tnum[tt]; j++) {
01126     d->token[d->tlist[tt][j].node] = TOKENID_UNDEFINED;
01127   }
01128 }
01129 
01146 static TOKENID
01147 create_token(FSBeam *d)
01148 {
01149   TOKENID newid;
01150   int tn;
01151   tn = d->tn;
01152   newid = d->tnum[tn];
01153   d->tnum[tn]++;
01154   while (d->tnum[tn]>=d->maxtnum) expand_tlist(d);
01155   d->tindex[tn][newid] = newid;
01156 #ifdef WPAIR
01157   /* initialize link */
01158   d->tlist[tn][newid].next = TOKENID_UNDEFINED;
01159 #endif
01160   return(newid);
01161 }
01162 
01192 static void
01193 node_assign_token(FSBeam *d, int node, TOKENID tkid)
01194 {
01195 #ifdef WPAIR
01196   /* add to link list */
01197   d->tlist[d->tn][tkid].next = d->token[node];
01198 #endif
01199   d->token[node] = tkid;
01200   d->tlist[d->tn][tkid].node = node;
01201 }
01202 
01239 static TOKENID
01240 node_exist_token(FSBeam *d, int tt, int node, WORD_ID wid)
01241 {
01242 #ifdef WPAIR
01243   /* In word-pair mode, multiple tokens are assigned to a node as a list.
01244      so we have to search for tokens with same last word ID */
01245 #ifdef WPAIR_KEEP_NLIMIT
01246   /* 1ノードごとに保持するtoken数の上限を設定 */
01247   /* tokenが無いが上限に達しているときは一番スコアの低いtokenを上書きする */
01248   /* N-best: limit number of assigned tokens to a node */
01249   int i = 0;
01250   TOKENID lowest_token = TOKENID_UNDEFINED;
01251 #endif
01252   TOKENID tmp;
01253   for(tmp=d->token[node]; tmp != TOKENID_UNDEFINED; tmp=d->tlist[tt][tmp].next) {
01254     if (d->tlist[tt][tmp].last_tre->wid == wid) {
01255       return(tmp);
01256     }
01257 #ifdef WPAIR_KEEP_NLIMIT
01258     if (lowest_token == TOKENID_UNDEFINED ||
01259         d->tlist[tt][lowest_token].score < d->tlist[tt][tmp].score)
01260       lowest_token = tmp;
01261     if (++i >= d->wpair_keep_nlimit) break;
01262 #endif
01263   }
01264 #ifdef WPAIR_KEEP_NLIMIT
01265   if (i >= d->wpair_keep_nlimit) { /* overflow, overwrite lowest score */
01266     return(lowest_token);
01267   } else {
01268     return(TOKENID_UNDEFINED);
01269   }
01270 #else 
01271   return(TOKENID_UNDEFINED);
01272 #endif
01273   
01274 #else  /* not WPAIR */
01275   /* 1つだけ保持,これを常に上書き */
01276   /* Only one token is kept in 1-best mode (default), so
01277      simply return the ID */
01278   return(d->token[node]);
01279 #endif
01280 }
01281 
01282 #ifdef DEBUG
01283 /* tlist と token の対応をチェックする(debug) */
01284 /* for debug: check tlist <-> token correspondence
01285    where  tlist[tt][tokenID].node = nodeID and
01286           token[nodeID] = tokenID
01287  */
01288 static void
01289 node_check_token(FSBeam *d, int tt)
01290 {
01291   int i;
01292   for(i=0;i<d->tnum[tt];i++) {
01293     if (node_exist_token(d, tt, d->tlist[tt][i].node, d->tlist[tt][i].last_tre->wid) != i) {
01294       jlog("ERROR: token %d not found on node %d\n", i, d->tlist[tt][i].node);
01295     }
01296   }
01297 }
01298 #endif
01299 
01300 
01301 
01302 /* -------------------------------------------------------------------- */
01303 /*       トークンをソートし 上位 N トークンを判別する (heap sort)       */
01304 /*        Sort generated tokens and get N-best (use heap sort)          */
01305 /* -------------------------------------------------------------------- */
01306 /* ビームの閾値として上位 N 番目のスコアが欲しいだけであり,実際にソート
01307    される必要はない */
01308 /* we only want to know the N-th score for determining beam threshold, so
01309    order is not considered here */
01310 
01311 #define SD(A) tindex_local[A-1] 
01312 #define SCOPY(D,S) D = S        
01313 #define SVAL(A) (tlist_local[tindex_local[A-1]].score) 
01314 #define STVAL (tlist_local[s].score) 
01315 
01316 
01340 static void
01341 sort_token_upward(FSBeam *d, int neednum, int totalnum)
01342 {
01343   int n,root,child,parent;
01344   TOKENID s;
01345   TOKEN2 *tlist_local;
01346   TOKENID *tindex_local;
01347 
01348   tlist_local = d->tlist[d->tn];
01349   tindex_local = d->tindex[d->tn];
01350 
01351   for (root = totalnum/2; root >= 1; root--) {
01352     SCOPY(s, SD(root));
01353     parent = root;
01354     while ((child = parent * 2) <= totalnum) {
01355       if (child < totalnum && SVAL(child) < SVAL(child+1)) {
01356         child++;
01357       }
01358       if (STVAL >= SVAL(child)) {
01359         break;
01360       }
01361       SCOPY(SD(parent), SD(child));
01362       parent = child;
01363     }
01364     SCOPY(SD(parent), s);
01365   }
01366   n = totalnum;
01367   while ( n > totalnum - neednum) {
01368     SCOPY(s, SD(n));
01369     SCOPY(SD(n), SD(1));
01370     n--;
01371     parent = 1;
01372     while ((child = parent * 2) <= n) {
01373       if (child < n && SVAL(child) < SVAL(child+1)) {
01374         child++;
01375       }
01376       if (STVAL >= SVAL(child)) {
01377         break;
01378       }
01379       SCOPY(SD(parent), SD(child));
01380       parent = child;
01381     }
01382     SCOPY(SD(parent), s);
01383   }
01384 }
01385 
01412 static void
01413 sort_token_downward(FSBeam *d, int neednum, int totalnum)
01414 {
01415   int n,root,child,parent;
01416   TOKENID s;
01417   TOKEN2 *tlist_local;
01418   TOKENID *tindex_local;
01419 
01420   tlist_local = d->tlist[d->tn];
01421   tindex_local = d->tindex[d->tn];
01422 
01423   for (root = totalnum/2; root >= 1; root--) {
01424     SCOPY(s, SD(root));
01425     parent = root;
01426     while ((child = parent * 2) <= totalnum) {
01427       if (child < totalnum && SVAL(child) > SVAL(child+1)) {
01428         child++;
01429       }
01430       if (STVAL <= SVAL(child)) {
01431         break;
01432       }
01433       SCOPY(SD(parent), SD(child));
01434       parent = child;
01435     }
01436     SCOPY(SD(parent), s);
01437   }
01438   n = totalnum;
01439   while ( n > totalnum - neednum) {
01440     SCOPY(s, SD(n));
01441     SCOPY(SD(n), SD(1));
01442     n--;
01443     parent = 1;
01444     while ((child = parent * 2) <= n) {
01445       if (child < n && SVAL(child) > SVAL(child+1)) {
01446         child++;
01447       }
01448       if (STVAL <= SVAL(child)) {
01449         break;
01450       }
01451       SCOPY(SD(parent), SD(child));
01452       parent = child;
01453     }
01454     SCOPY(SD(parent), s);
01455   }
01456 }
01457 
01490 static void
01491 sort_token_no_order(FSBeam *d, int neednum, int *start, int *end)
01492 {
01493   int totalnum;
01494   int restnum;
01495 
01496   totalnum = d->tnum[d->tn];
01497 
01498   restnum = totalnum - neednum;
01499 
01500   if (neednum >= totalnum) {
01501     /* no need to sort */
01502     *start = 0;
01503     *end = totalnum - 1;
01504   } else if (neednum < restnum)  {
01505     /* needed num is smaller than rest, so sort for the needed tokens */
01506     sort_token_upward(d, neednum,totalnum);
01507     *start = totalnum - neednum;
01508     *end = totalnum - 1;
01509   } else {
01510     /* needed num is bigger than rest, so sort for the rest token */
01511     sort_token_downward(d, restnum,totalnum);
01512     *start = 0;
01513     *end = neednum - 1;
01514   }
01515 }
01516 
01517 /* -------------------------------------------------------------------- */
01518 /*             第1パス(フレーム同期ビームサーチ) メイン                */
01519 /*           main routines of 1st pass (frame-synchronous beam search)  */
01520 /* -------------------------------------------------------------------- */
01521 
01550 static boolean
01551 init_nodescore(HTK_Param *param, RecogProcess *r)
01552 {
01553   WCHMM_INFO *wchmm;
01554   FSBeam *d;
01555   TOKENID newid;
01556   TOKEN2 *new;
01557   WORD_ID beginword;
01558   int node;
01559   int i;
01560 
01561   wchmm = r->wchmm;
01562   d = &(r->pass1);
01563 
01564   /* 初期仮説用単語履歴 */
01565   /* setup initial word context */
01566   if (r->config->successive.enabled) { /* sp segment mode */
01567     /* initial word context = last non-sp word of previous 2nd pass at last segment*/
01568     if (r->lmtype == LM_PROB) {
01569       if (r->sp_break_last_nword == wchmm->winfo->tail_silwid) {
01570         /* if end with silE, initialize as normal start of sentence */
01571         d->bos.wid = WORD_INVALID;
01572       } else {
01573         d->bos.wid = r->sp_break_last_nword;
01574       }
01575     } else {
01576       d->bos.wid = WORD_INVALID;
01577     }
01578   } else {                      /* not sp segment mode */
01579     d->bos.wid = WORD_INVALID;  /* no context */
01580   }
01581 
01582   d->bos.begintime = d->bos.endtime = -1;
01583 
01584   /* ノード・トークンを初期化 */
01585   /* clear tree lexicon nodes and tokens */
01586   for(node = 0; node < d->totalnodenum; node++) {
01587     d->token[node] = TOKENID_UNDEFINED;
01588   }
01589   d->tnum[0] = d->tnum[1]  = 0;
01590   
01591 #ifdef PASS1_IWCD
01592   /* 出力確率計算キャッシュを初期化 */
01593   /* initialize outprob cache */
01594   outprob_style_cache_init(wchmm);
01595 #endif
01596 
01597   /* 初期仮説の作成: 初期単語の決定と初期トークンの生成 */
01598   /* initial word hypothesis */
01599 
01600   if (r->lmtype == LM_PROB) {
01601 
01602     if (r->config->successive.enabled) { /* sp segment mode */
01603       if (r->sp_break_last_word != WORD_INVALID) { /* last segment exist */
01604         /* 開始単語=前のセグメント計算時の最後の最尤単語 */
01605         /* 文終了単語(silE,句点(IPAモデル))なら,silB で開始 */
01606         /* initial word = best last word hypothesis on the last segment */
01607         /* if silE or sp, begin with silB */
01608         /*if (is_sil(recog.sp_break_last_word, wchmm->winfo, wchmm->hmminfo)) {*/
01609         if (r->sp_break_last_word == wchmm->winfo->tail_silwid) {
01610           beginword = wchmm->winfo->head_silwid;
01611           d->bos.wid = WORD_INVALID;    /* reset initial word context */
01612         } else {
01613           beginword = r->sp_break_last_word;
01614         }
01615       } else {
01616         /* initial segment: initial word set to silB */
01617         beginword = wchmm->winfo->head_silwid;
01618       }
01619     } else {                    /* not sp segment mode */
01620       /* initial word fixed to silB */
01621       beginword = wchmm->winfo->head_silwid;
01622     }
01623 
01624 #ifdef SP_BREAK_DEBUG
01625     jlog("DEBUG: startword=[%s], last_nword=[%s]\n",
01626            (beginword == WORD_INVALID) ? "WORD_INVALID" : wchmm->winfo->wname[beginword],
01627            (d->bos.wid == WORD_INVALID) ? "WORD_INVALID" : wchmm->winfo->wname[d->bos.wid]);
01628 #endif
01629     /* create the first token at the first node of initial word */
01630     newid = create_token(d);
01631     new = &(d->tlist[d->tn][newid]);
01632 
01633     /* initial node = head node of the beginword */
01634     if (wchmm->hmminfo->multipath) {
01635       node = wchmm->wordbegin[beginword];
01636     } else {
01637       node = wchmm->offset[beginword][0];
01638     }
01639 
01640     /* set initial LM score */
01641     if (wchmm->state[node].scid != 0) {
01642       /* if initial node is on a factoring branch, use the factored score */
01643       new->last_lscore = max_successor_prob(wchmm, d->bos.wid, node);
01644     } else {
01645       /* else, set to 0.0 */
01646       new->last_lscore = 0.0;
01647     }
01648 #ifdef FIX_PENALTY
01649     new->last_lscore = new->last_lscore * d->lm_weight;
01650 #else
01651     new->last_lscore = new->last_lscore * d->lm_weight + d->lm_penalty;
01652 #endif
01653     /* set initial word history */
01654     new->last_tre = &(d->bos);
01655     new->last_cword = d->bos.wid;
01656     if (wchmm->hmminfo->multipath) {
01657       /* set initial score using the initial LM score */
01658       new->score = new->last_lscore;
01659     } else {
01660       /* set initial score using the initial LM score and AM score of the first state */
01661       new->score = outprob_style(wchmm, node, d->bos.wid, 0, param) + new->last_lscore;
01662     }
01663     /* assign the initial node to token list */
01664     node_assign_token(d, node, newid);
01665 
01666   }
01667 
01668   if (r->lmtype == LM_DFA && r->lmvar == LM_DFA_GRAMMAR) {
01669   
01670     /* 初期仮説: 文法上文頭に接続しうる単語集合 */
01671     /* initial words: all words that can be begin of sentence grammatically */
01672     /* アクティブな文法に属する単語のみ許す */
01673     /* only words in active grammars are allowed to be an initial words */
01674     MULTIGRAM *m;
01675     int t,tb,te;
01676     WORD_ID iw;
01677     boolean flag;
01678     DFA_INFO *gdfa;
01679 
01680     gdfa = r->lm->dfa;
01681 
01682     flag = FALSE;
01683     /* for all active grammar */
01684     for(m = r->lm->grammars; m; m = m->next) {
01685       if (m->active) {
01686         tb = m->cate_begin;
01687         te = tb + m->dfa->term_num;
01688         for(t=tb;t<te;t++) {
01689           /* for all word categories that belong to the grammar */
01690           if (dfa_cp_begin(gdfa, t) == TRUE) {
01691             /* if the category can appear at beginning of sentence, */
01692             flag = TRUE;
01693             for (iw=0;iw<gdfa->term.wnum[t];iw++) {
01694               /* create the initial token at the first node of all words that belongs to the category */
01695               i = gdfa->term.tw[t][iw];
01696               if (wchmm->hmminfo->multipath) {
01697                 node = wchmm->wordbegin[i];
01698               } else {
01699                 node = wchmm->offset[i][0];
01700               }
01701               /* in tree lexicon, words in the same category may share the same root node, so skip it if the node has already existed */
01702               if (node_exist_token(d, d->tn, node, d->bos.wid) != TOKENID_UNDEFINED) continue;
01703               newid = create_token(d);
01704               new = &(d->tlist[d->tn][newid]);
01705               new->last_tre = &(d->bos);
01706               new->last_lscore = 0.0;
01707               if (wchmm->hmminfo->multipath) {
01708                 new->score = 0.0;
01709               } else {
01710                 new->score = outprob_style(wchmm, node, d->bos.wid, 0, param);
01711               }
01712               node_assign_token(d, node, newid);
01713             }
01714           }
01715         }
01716       }
01717     }
01718     if (!flag) {
01719       jlog("ERROR: init_nodescore: no initial state found in active DFA grammar\n");
01720       return FALSE;
01721     }
01722   }
01723 
01724   if (r->lmtype == LM_DFA && r->lmvar == LM_DFA_WORD) {
01725     /* アクティブな文法に属する単語のみ許す */
01726     /* only words in active grammars are allowed to be an initial words */
01727     MULTIGRAM *m;
01728 
01729     for(m = r->lm->grammars; m; m = m->next) {
01730       if (m->active) {
01731         for(i = m->word_begin; i < m->word_begin + m->winfo->num; i++) {
01732           if (wchmm->hmminfo->multipath) {
01733             node = wchmm->wordbegin[i];
01734           } else {
01735             node = wchmm->offset[i][0];
01736           }
01737           if (node_exist_token(d, d->tn, node, d->bos.wid) != TOKENID_UNDEFINED) continue;
01738           newid = create_token(d);
01739           new = &(d->tlist[d->tn][newid]);
01740           new->last_tre = &(d->bos);
01741           new->last_lscore = 0.0;
01742           if (wchmm->hmminfo->multipath) {
01743             new->score = 0.0;
01744           } else {
01745             new->score = outprob_style(wchmm, node, d->bos.wid, 0, param);
01746           }
01747           node_assign_token(d, node, newid);
01748         }
01749       }
01750     }
01751   }
01752 
01753   return TRUE;
01754 
01755 }
01756 
01757 /******************************************************/
01758 /* フレーム同期ビーム探索の実行 --- 最初のフレーム用  */
01759 /* frame synchronous beam search --- first frame only */
01760 /******************************************************/
01761 
01786 boolean
01787 get_back_trellis_init(HTK_Param *param, RecogProcess *r)
01788 {
01789   WCHMM_INFO *wchmm;
01790   BACKTRELLIS *backtrellis;
01791   FSBeam *d;
01792 
01793   wchmm = r->wchmm;
01794   backtrellis = r->backtrellis;
01795   d = &(r->pass1);
01796 
01797   /* Viterbi演算用ワークエリアのスイッチャー tl,tn の初期値設定 */
01798   /* tn: このフレーム用ID   tl: 1フレーム前のID */
01799   /* initialize switch tl, tn for Viterbi computation */
01800   /* tn: this frame  tl: last frame */
01801   d->tn = 0;
01802   d->tl = 1;
01803 
01804   /* 結果の単語トレリスを格納するバックトレリス構造体を初期化 */
01805   /* initialize backtrellis structure to store resulting word trellis */
01806   bt_prepare(backtrellis);
01807 
01808   /* 計算用ワークエリアを初期化 */
01809   /* initialize some data on work area */
01810 
01811   if (r->lmtype == LM_PROB) {
01812     d->lm_weight  = r->config->lmp.lm_weight;
01813     d->lm_penalty = r->config->lmp.lm_penalty;
01814   }
01815   if (r->lmtype == LM_DFA) {
01816     d->penalty1 = r->config->lmp.penalty1;
01817   }
01818 #if defined(WPAIR) && defined(WPAIR_KEEP_NLIMIT)
01819   d->wpair_keep_nlimit = r->config->pass1.wpair_keep_nlimit;
01820 #endif
01821 
01822   /* ワークエリアを確保 */
01823   /* malloc work area */
01824   /* 使用するトークン量 = viterbi時に遷移先となる状態候補の数
01825    * 予測: ビーム数 x 2 (自己遷移+次状態) + 木構造化辞書のルートノード数
01826    */
01827   /* assumed initial number of needed tokens: beam width x 2 (self + next trans.)
01828    * + root node on the tree lexicon (for inter-word trans.)
01829    */
01830   if (d->totalnodenum != wchmm->n) {
01831     free_nodes(d);
01832   }
01833   if (d->nodes_malloced == FALSE) {
01834     malloc_nodes(d, wchmm->n, r->trellis_beam_width * 2 + wchmm->startnum);
01835   }
01836   prepare_nodes(d, r->trellis_beam_width);
01837   
01838   /* 初期スコアを nodescore[tn] にセット */
01839   /* set initial score to nodescore[tn] */
01840   if (init_nodescore(param, r) == FALSE) {
01841     jlog("ERROR: get_back_trellis_init: failed to set initial node scores\n");
01842     return FALSE;
01843   }
01844 
01845   sort_token_no_order(d, r->trellis_beam_width, &(d->n_start), &(d->n_end));
01846 
01847   /* 漸次出力を行なう場合のインターバルを計算 */
01848   /* set interval frame for progout */
01849   r->config->output.progout_interval_frame = (int)((float)r->config->output.progout_interval / ((float)param->header.wshift / 10000.0));
01850 
01851   if (r->config->successive.enabled) {
01852     /* ショートポーズセグメンテーション用パラメータの初期化 */
01853     /* initialize parameter for short pause segmentation */
01854     d->in_sparea = TRUE;                /* assume beginning is silence */
01855     r->am->mfcc->sparea_start = d->tmp_sparea_start = 0; /* set start frame to 0 */
01856 #ifdef SP_BREAK_RESUME_WORD_BEGIN
01857     d->tmp_sp_break_last_word = WORD_INVALID;
01858 #endif
01859     r->sp_break_last_word = WORD_INVALID;
01860     /* 最初のセグメント: 次の非ポーズフレームで第2パスへ移行しない */
01861     /* the first end of pause segment should be always silB, so
01862        skip the first segment */
01863     d->first_sparea = TRUE;
01864     r->sp_break_2_begin_word = WORD_INVALID;
01865   }
01866 
01867 #ifdef DETERMINE
01868   if (r->lmvar == LM_DFA_WORD) {
01869     /* initialize */
01870     determine_word(r, 0, NULL, 0, 0);
01871   }
01872 #endif
01873 
01874   return TRUE;
01875 }
01876 
01877 /*****************************************************/
01878 /* frame synchronous beam search --- proceed 1 frame */
01879 /* フレーム同期ビーム探索の実行 --- 1フレーム進める  */
01880 /*****************************************************/
01881 
01900 static void
01901 propagate_token(FSBeam *d, int next_node, LOGPROB next_score, TRELLIS_ATOM *last_tre, WORD_ID last_cword, LOGPROB last_lscore)
01902 {
01903   TOKEN2 *tknext;
01904   TOKENID tknextid;
01905 
01906   if ((tknextid = node_exist_token(d, d->tn, next_node, last_tre->wid)) != TOKENID_UNDEFINED) {
01907     /* 遷移先ノードには既に他ノードから伝搬済み: スコアが高いほうを残す */
01908     /* the destination node already has a token: compare score */
01909     tknext = &(d->tlist[d->tn][tknextid]);
01910     if (tknext->score < next_score) {
01911       /* その遷移先ノードが持つトークンの内容を上書きする(新規トークンは作らない) */
01912       /* overwrite the content of existing destination token: not create a new token */
01913       tknext->last_tre = last_tre; /* propagate last word info */
01914       tknext->last_cword = last_cword; /* propagate last context word info */
01915       tknext->last_lscore = last_lscore; /* set new LM score */
01916       tknext->score = next_score; /* set new score */
01917     }
01918   } else {
01919     /* 遷移先ノードは未伝搬: 新規トークンを作って割り付ける */
01920     /* token unassigned: create new token and assign */
01921     if (next_score > LOG_ZERO) { /* valid token */
01922       tknextid = create_token(d); /* get new token */
01923     }
01924     tknext = &(d->tlist[d->tn][tknextid]);
01925     tknext->last_tre = last_tre; /* propagate last word info */
01926     tknext->last_cword = last_cword; /* propagate last context word info */
01927     tknext->last_lscore = last_lscore;
01928     tknext->score = next_score; /* set new score */
01929     node_assign_token(d, next_node, tknextid); /* assign this new token to the next node */
01930   }
01931 }
01932 
01956 static void
01957 beam_intra_word_core(WCHMM_INFO *wchmm, FSBeam *d, TOKEN2 **tk_ret, int j, int next_node, LOGPROB next_a)
01958 {
01959   int node; 
01960   LOGPROB tmpsum;
01961   LOGPROB ngram_score_cache;
01962   TOKEN2 *tk;
01963 
01964   tk = *tk_ret;
01965 
01966   node = tk->node;
01967 
01968   /* now, 'node' is the source node, 'next_node' is the destication node,
01969      and ac-> holds transition probability */
01970   /* tk->score is the accumulated score at the 'node' on previous frame */
01971   
01972   /******************************************************************/
01973   /* 2.1.1 遷移先へのスコア計算(遷移確率+言語スコア)               */
01974   /*       compute score of destination node (transition prob + LM) */
01975   /******************************************************************/
01976   tmpsum = tk->score + next_a;
01977   ngram_score_cache = LOG_ZERO;
01978   /* the next score at 'next_node' will be computed on 'tmpsum', and
01979      the new LM probability (if updated) will be stored on 'ngram_score_cache' at below */
01980   
01981   if (!wchmm->category_tree) {
01982     /* 言語スコア factoring:
01983        arcが自己遷移でない単語内の遷移で,かつ遷移先にsuccessorリスト
01984        があれば,lexicon tree の分岐部分の遷移である */
01985     /* LM factoring:
01986        If this is not a self transition and destination node has successor
01987        list, this is branching transition
01988     */
01989     if (next_node != node) {
01990       if (wchmm->state[next_node].scid != 0
01991 #ifdef UNIGRAM_FACTORING
01992           /* 1-gram factoring 使用時は, 複数で共有される枝では
01993              wchmm->state[node].scid は負の値となり,その絶対値を
01994              添字として wchmm->fscore[] に単語集合の1-gramの最大値が格納
01995              されている. 末端の枝(複数単語で共有されない)では,
01996              wchmm->state[node].scid は正の値となり,
01997              1単語を sc として持つのでそこで正確な2-gramを計算する */
01998           /* When uni-gram factoring,
01999              wchmm->state[node].scid is below 0 for shared branches.
02000              In this case the maximum uni-gram probability for sub-tree
02001              is stored in wchmm->fscore[- wchmm->state[node].scid].
02002              Leaf branches (with only one successor word): the scid is
02003              larger than 0, and has
02004              the word ID in wchmm->sclist[wchmm->state[node].scid].
02005              So precise 2-gram is computed in this point */
02006 #endif
02007           ){
02008         
02009         if (wchmm->lmtype == LM_PROB) {
02010           /* ここで言語モデル確率を更新する */
02011           /* LM value should be update at this transition */
02012           /* N-gram確率からfactoring 値を計算 */
02013           /* compute new factoring value from N-gram probabilities */
02014 #ifdef FIX_PENALTY
02015           /* if at the beginning of sentence, not add lm_penalty */
02016           if (tk->last_cword == WORD_INVALID) {
02017             ngram_score_cache = max_successor_prob(wchmm, tk->last_cword, next_node) * d->lm_weight;
02018           } else {
02019             ngram_score_cache = max_successor_prob(wchmm, tk->last_cword, next_node) * d->lm_weight + d->lm_penalty;
02020           }
02021 #else
02022           ngram_score_cache = max_successor_prob(wchmm, tk->last_cword, next_node) * d->lm_weight + d->lm_penalty;
02023 #endif
02024           /* スコアの更新: tk->last_lscore に単語内での最後のfactoring値が
02025              入っているので, それをスコアから引いてリセットし, 新たなスコアを
02026              セットする */
02027           /* update score: since 'tk->last_lscore' holds the last LM factoring
02028              value in this word, we first remove the score from the current
02029              score, and then add the new LM value computed above. */
02030           tmpsum -= tk->last_lscore;
02031           tmpsum += ngram_score_cache;
02032         }
02033         
02034         if (wchmm->lmtype == LM_DFA && wchmm->lmvar == LM_DFA_GRAMMAR) {
02035           /* 文法を用いる場合, カテゴリ単位の木構造化がなされていれば,
02036              接続制約は単語間遷移のみで扱われるので,factoring は必要ない. 
02037              カテゴリ単位木構造化が行われない場合, 文法間の接続制約はここ
02038              で factoring で行われることになる. */
02039           /* With DFA, we use category-pair constraint extracted from the DFA
02040              at this 1st pass.  So if we compose a tree lexicon per word's
02041              category, the each category tree has the same connection
02042              constraint and thus we can apply the constraint on the cross-word
02043              transition.  This per-category tree lexicon is enabled by default,
02044              and in the case the constraint will be applied at the word end.
02045              If you disable per-category tree lexicon by undefining
02046              'CATEGORY_TREE', the word-pair contrained will be applied in a
02047              factoring style at here.
02048           */
02049           
02050           /* 決定的factoring: 直前単語に対して,sub-tree内にカテゴリ対制約上
02051              接続しうる単語が1つもなければ, この遷移は不可 */
02052           /* deterministic factoring in grammar mode:
02053              transition disabled if there are totally no sub-tree word that can
02054              grammatically (in category-pair constraint) connect
02055              to the previous word.
02056           */
02057           if (!can_succeed(wchmm, tk->last_tre->wid, next_node)) {
02058             tmpsum = LOG_ZERO;
02059           }
02060         }
02061         
02062       }
02063     }
02064   }
02065   /* factoring not needed when DFA mode and uses category-tree */
02066   
02067   /****************************************/
02068   /* 2.1.2 遷移先ノードへトークン伝搬     */
02069   /*       pass token to destination node */
02070   /****************************************/
02071   
02072   if (ngram_score_cache == LOG_ZERO) ngram_score_cache = tk->last_lscore;
02073   propagate_token(d, next_node, tmpsum, tk->last_tre, tk->last_cword, ngram_score_cache);
02074   
02075   if (d->expanded) {
02076     /* if work area has been expanded at 'create_token()' above,
02077        the inside 'realloc()' will destroy the pointers.
02078        so, reset local pointers from token index */
02079     tk = &(d->tlist[d->tl][d->tindex[d->tl][j]]);
02080     d->expanded = FALSE;
02081   }
02082   
02083   *tk_ret = tk;
02084 
02085 }
02086 
02106 static void
02107 beam_intra_word(WCHMM_INFO *wchmm, FSBeam *d, TOKEN2 **tk_ret, int j)
02108 {
02109   A_CELL2 *ac; 
02110   TOKEN2 *tk;
02111   int node;
02112   int k;
02113 
02114   tk = *tk_ret;
02115 
02116   node = tk->node;
02117 
02118   if (wchmm->self_a[node] != LOG_ZERO) {
02119     beam_intra_word_core(wchmm, d, tk_ret, j, node, wchmm->self_a[node]);
02120   }
02121 
02122   if (wchmm->next_a[node] != LOG_ZERO) {
02123     beam_intra_word_core(wchmm, d, tk_ret, j, node+1, wchmm->next_a[node]);
02124   }
02125 
02126   for(ac=wchmm->ac[node];ac;ac=ac->next) {
02127     for(k=0;k<ac->n;k++) {
02128       beam_intra_word_core(wchmm, d, tk_ret, j, ac->arc[k], ac->a[k]);
02129     }
02130   }
02131 }
02132 
02133 /**************************/
02134 /* 2.2. トレリス単語保存  */
02135 /*      save trellis word */
02136 /**************************/
02161 static TRELLIS_ATOM *
02162 save_trellis(BACKTRELLIS *bt, WCHMM_INFO *wchmm, TOKEN2 *tk, int t, boolean final_for_multipath)
02163 {
02164   TRELLIS_ATOM *tre;
02165   int sword;
02166  
02167   sword = wchmm->stend[tk->node];
02168 
02169   /* この遷移元の単語終端ノードは「直前フレームで」生き残ったノード. 
02170      (「このフレーム」でないことに注意!!)
02171      よってここで, 時間(t-1) を単語終端とするトレリス上の単語仮説
02172      (TRELLIS_ATOM)として,単語トレリス構造体に保存する. */
02173   /* This source node (= word end node) has been survived in the
02174      "last" frame (notice: not "this" frame!!).  So this word end
02175      is saved here to the word trellis structure (BACKTRELLIS) as a
02176      trellis word (TRELLIS_ATOM) with end frame (t-1). */
02177   tre = bt_new(bt);
02178   tre->wid = sword;             /* word ID */
02179   tre->backscore = tk->score; /* log score (AM + LM) */
02180   tre->begintime = tk->last_tre->endtime + 1; /* word beginning frame */
02181   tre->endtime   = t-1; /* word end frame */
02182   tre->last_tre  = tk->last_tre; /* link to previous trellis word */
02183   if (wchmm->lmtype == LM_PROB) {
02184     tre->lscore    = tk->last_lscore;   /* log score (LM only) */
02185   } else if (wchmm->lmtype == LM_DFA) {
02186     tre->lscore = 0.0;
02187   }
02188   bt_store(bt, tre); /* save to backtrellis */
02189 #ifdef WORD_GRAPH
02190   if (tre->last_tre != NULL) {
02191     /* mark to indicate that the following words was survived in beam */
02192     tre->last_tre->within_context = TRUE;
02193   }
02194   if (final_for_multipath) {
02195     /* last node */
02196     if (tre->wid == wchmm->winfo->tail_silwid) {
02197       tre->within_context = TRUE;
02198     }
02199   }
02200 #endif /* WORD_GRAPH */
02201 
02202   return tre;
02203 }
02204       
02205 
02226 static void
02227 beam_inter_word(WCHMM_INFO *wchmm, FSBeam *d, TOKEN2 **tk_ret, TRELLIS_ATOM *tre, int j)
02228 {
02229   A_CELL2 *ac;
02230   TOKEN2 *tk;
02231   int sword;
02232   int node, next_node;
02233   LOGPROB *iwparray; 
02234   int stid;
02235 #ifdef UNIGRAM_FACTORING
02236   int isoid; 
02237 #endif
02238   LOGPROB tmpprob, tmpsum, ngram_score_cache;
02239   int k;
02240   WORD_ID last_word;
02241 
02242   tk = *tk_ret;
02243  
02244   node = tk->node;
02245   sword = wchmm->stend[node];
02246   last_word = wchmm->winfo->is_transparent[sword] ? tk->last_cword : sword;
02247 
02248   if (wchmm->lmtype == LM_PROB) {
02249 
02250     /* 遷移元単語が末尾単語の終端なら,どこへも遷移させない */
02251     /* do not allow transition if the source word is end-of-sentence word */
02252     if (sword == wchmm->winfo->tail_silwid) return;
02253 
02254 #ifdef UNIGRAM_FACTORING
02255     /* あとで共有単語先頭ノードに対して単語間遷移をまとめて計算するため,*/
02256     /* このループ内では最大尤度を持つ単語終端ノードを記録しておく */
02257     /* here we will record the best wordend node of maximum likelihood
02258        at this frame, to compute later the cross-word transitions toward
02259        shared factoring word-head node */
02260     tmpprob = tk->score;
02261     if (!wchmm->hmminfo->multipath) tmpprob += wchmm->wordend_a[sword];
02262     if (d->wordend_best_score < tmpprob) {
02263       d->wordend_best_score = tmpprob;
02264       d->wordend_best_node = node;
02265       d->wordend_best_tre = tre;
02266       d->wordend_best_last_cword = tk->last_cword;
02267     }
02268 #endif
02269     
02270     /* N-gramにおいては常に全単語の接続を考慮する必要があるため,
02271        ここで単語間の言語確率値をすべて計算しておく. 
02272        キャッシュは max_successor_prob_iw() 内で考慮. */
02273     /* As all words are possible to connect in N-gram, we first compute
02274        all the inter-word LM probability here.
02275        Cache is onsidered in max_successor_prob_iw(). */
02276     if (wchmm->winfo->is_transparent[sword]) {
02277       iwparray = max_successor_prob_iw(wchmm, tk->last_cword);
02278     } else {
02279       iwparray = max_successor_prob_iw(wchmm, sword);
02280     }
02281   }
02282 
02283   /* すべての単語始端ノードに対して以下を実行 */
02284   /* for all beginning-of-word nodes, */
02285   /* wchmm->startnode[0..stid-1] ... 単語始端ノードリスト */
02286   /* wchmm->startnode[0..stid-1] ... list of word start node (shared) */
02287   for (stid = wchmm->startnum - 1; stid >= 0; stid--) {
02288     next_node = wchmm->startnode[stid];
02289     if (wchmm->hmminfo->multipath) {
02290       if (wchmm->lmtype == LM_PROB) {
02291         /* connection to the head silence word is not allowed */
02292         if (wchmm->wordbegin[wchmm->winfo->head_silwid] == next_node) continue;
02293       }
02294     }
02295     
02296     /*****************************************/
02297     /* 2.3.1. 単語間言語制約を適用           */
02298     /*        apply cross-word LM constraint */
02299     /*****************************************/
02300         
02301     if (wchmm->lmtype == LM_PROB) {
02302       /* N-gram確率を計算 */
02303       /* compute N-gram probability */
02304 #ifdef UNIGRAM_FACTORING
02305       /* 1-gram factoring における単語間言語確率キャッシュの効率化:
02306          1-gram factoring は単語履歴に依存しないので,
02307          ここで参照する factoring 値の多くは
02308          wchmm->fscore[] に既に格納され, 探索中も不変である. 
02309          よって計算が必要な単語(どの単語ともノードを共有しない単語)
02310          についてのみ iwparray[] で計算・キャッシュする.  */
02311       /* Efficient cross-word LM cache:
02312          As 1-gram factoring values are independent of word context,
02313          they remain unchanged while search.  So, in cross-word LM
02314          computation, beginning-of-word states which share nodes with
02315          others and has factoring value in wchmm does not need cache.
02316          So only the unshared beginning-of-word states are computed and
02317          cached here in iwparray[].
02318       */
02319       /* wchmm,start2isolate[0..stid-1] ... ノードを共有しない単語は
02320          その通しID, 共有する(キャッシュの必要のない)単語は -1 */
02321       /* wchmm->start2isolate[0..stid-1] ... isolate ID for
02322          beginning-of-word state.  value: -1 for states that has
02323          1-gram factoring value (share nodes with some other words),
02324          and ID for unshared words
02325       */
02326       isoid = wchmm->start2isolate[stid];
02327       /* 計算が必要でない単語先頭ノードはパスをまとめて後に計算するので
02328          ここではスキップ */
02329       /* the shared nodes will be computed afterward, so just skip them
02330          here */
02331       if (isoid == -1) continue;
02332       tmpprob = iwparray[isoid];
02333 #else
02334       tmpprob = iwparray[stid];
02335 #endif
02336     }
02337 
02338     /* 遷移先の単語が先頭単語なら遷移させない. 
02339        これは wchmm.c で該当単語に stid を割り振らないことで対応
02340        しているので,ここでは何もしなくてよい */
02341     /* do not allow transition if the destination word is
02342        beginning-of-sentence word.  This limitation is realized by
02343        not assigning 'stid' for the word in wchmm.c, so we have nothing
02344        to do here.
02345     */
02346     
02347     if (wchmm->category_tree) {
02348       /* 文法の場合, 制約は決定的: カテゴリ対制約上許されない場合は遷移させない */
02349       /* With DFA and per-category tree lexicon,
02350          LM constraint is deterministic:
02351          do not allow transition if the category connection is not allowed
02352          (with category tree, constraint can be determined on top node) */
02353       if (dfa_cp(wchmm->dfa, wchmm->winfo->wton[sword], wchmm->winfo->wton[wchmm->start2wid[stid]]) == FALSE) continue;
02354     }
02355 
02356     /*******************************************************************/
02357     /* 2.3.2. 遷移先の単語先頭へのスコア計算(遷移確率+言語スコア)     */
02358     /*        compute score of destination node (transition prob + LM) */
02359     /*******************************************************************/
02360     tmpsum = tk->score;
02361     if (!wchmm->hmminfo->multipath) tmpsum += wchmm->wordend_a[sword];
02362 
02363     /* 'tmpsum' now holds outgoing score from the wordend node */
02364     if (wchmm->lmtype == LM_PROB) {
02365       /* 言語スコアを追加 */
02366       /* add LM score */
02367       ngram_score_cache = tmpprob * d->lm_weight + d->lm_penalty;
02368       tmpsum += ngram_score_cache;
02369       if (wchmm->winfo->is_transparent[sword] && wchmm->winfo->is_transparent[tk->last_cword]) {
02370             
02371         tmpsum += d->lm_penalty_trans;
02372       }
02373     }
02374     if (wchmm->lmtype == LM_DFA) {
02375       /* grammar: 単語挿入ペナルティを追加 */
02376       /* grammar: add insertion penalty */
02377       tmpsum += d->penalty1;
02378 
02379       /* grammar: deterministic factoring (in case category-tree not enabled) */
02380       if (!wchmm->category_tree) {
02381         if (!can_succeed(wchmm, sword, next_node)) {
02382           tmpsum = LOG_ZERO;
02383         }
02384       }
02385     }
02386     
02387     /*********************************************************************/
02388     /* 2.3.3. 遷移先ノードへトークン伝搬(単語履歴情報は更新)             */
02389     /*        pass token to destination node (updating word-context info */
02390     /*********************************************************************/
02391 
02392     if (wchmm->hmminfo->multipath) {
02393       /* since top node has no ouput, we should go one more step further */
02394       if (wchmm->self_a[next_node] != LOG_ZERO) {
02395         propagate_token(d, next_node, tmpsum + wchmm->self_a[next_node], tre, last_word, ngram_score_cache);
02396         if (d->expanded) {
02397           /* if work area has been expanded at 'create_token()' above,
02398              the inside 'realloc()' will destroy the pointers.
02399              so, reset local pointers from token index */
02400           tk = &(d->tlist[d->tn][d->tindex[d->tn][j]]);
02401           d->expanded = FALSE;
02402         }
02403       }
02404       if (wchmm->next_a[next_node] != LOG_ZERO) {
02405         propagate_token(d, next_node+1, tmpsum + wchmm->next_a[next_node], tre, last_word, ngram_score_cache);
02406         if (d->expanded) {
02407           /* if work area has been expanded at 'create_token()' above,
02408              the inside 'realloc()' will destroy the pointers.
02409              so, reset local pointers from token index */
02410           tk = &(d->tlist[d->tn][d->tindex[d->tn][j]]);
02411           d->expanded = FALSE;
02412         }
02413       }
02414       for(ac=wchmm->ac[next_node];ac;ac=ac->next) {
02415         for(k=0;k<ac->n;k++) {
02416           propagate_token(d, ac->arc[k], tmpsum + ac->a[k], tre, last_word, ngram_score_cache);
02417           if (d->expanded) {
02418             /* if work area has been expanded at 'create_token()' above,
02419                the inside 'realloc()' will destroy the pointers.
02420                so, reset local pointers from token index */
02421             tk = &(d->tlist[d->tn][d->tindex[d->tn][j]]);
02422             d->expanded = FALSE;
02423           }
02424         }
02425       }
02426     } else {
02427       propagate_token(d, next_node, tmpsum, tre, last_word, ngram_score_cache);
02428       if (d->expanded) {
02429         /* if work area has been expanded at 'create_token()' above,
02430            the inside 'realloc()' will destroy the pointers.
02431            so, reset local pointers from token index */
02432         tk = &(d->tlist[d->tl][d->tindex[d->tl][j]]);
02433         d->expanded = FALSE;
02434       }
02435     }
02436         
02437   }     /* end of next word heads */
02438 
02439   *tk_ret = tk;
02440 
02441 
02442 } /* end of cross-word processing */
02443 
02444 
02445 #ifdef UNIGRAM_FACTORING
02446 
02473 static void
02474 beam_inter_word_factoring(WCHMM_INFO *wchmm, FSBeam *d)
02475 {
02476   int sword;
02477   int node, next_node;
02478   int stid;
02479   LOGPROB tmpprob, tmpsum, ngram_score_cache;
02480   A_CELL2 *ac;
02481   int j;
02482   WORD_ID last_word;
02483 
02484   node = d->wordend_best_node;
02485   sword = wchmm->stend[node];
02486   last_word = wchmm->winfo->is_transparent[sword] ? d->wordend_best_last_cword : sword;
02487 
02488   for (stid = wchmm->startnum - 1; stid >= 0; stid--) {
02489     next_node = wchmm->startnode[stid];
02490     /* compute transition from 'node' at end of word 'sword' to 'next_node' */
02491     /* skip isolated words already calculated in the above main loop */
02492     if (wchmm->start2isolate[stid] != -1) continue;
02493     /* rest should have 1-gram factoring score at wchmm->fscore */
02494     if (wchmm->state[next_node].scid >= 0) {
02495       j_internal_error("get_back_trellis_proceed: scid mismatch at 1-gram factoring of shared states\n");
02496     }
02497     tmpprob = wchmm->fscore[- wchmm->state[next_node].scid];
02498     ngram_score_cache = tmpprob * d->lm_weight + d->lm_penalty;
02499     tmpsum = d->wordend_best_score;
02500     tmpsum += ngram_score_cache;
02501     if (wchmm->winfo->is_transparent[sword] && wchmm->winfo->is_transparent[d->wordend_best_last_cword]) {
02502       tmpsum += d->lm_penalty_trans;
02503     }
02504 
02505     if (wchmm->hmminfo->multipath) {
02506       /* since top node has no ouput, we should go one more step further */
02507       if (wchmm->self_a[next_node] != LOG_ZERO) {
02508         propagate_token(d, next_node, tmpsum + wchmm->self_a[next_node], d->wordend_best_tre, last_word, ngram_score_cache);
02509         if (d->expanded) {
02510           d->expanded = FALSE;
02511         }
02512       }
02513       if (wchmm->next_a[next_node] != LOG_ZERO) {
02514         propagate_token(d, next_node+1, tmpsum + wchmm->next_a[next_node], d->wordend_best_tre, last_word, ngram_score_cache);
02515         if (d->expanded) {
02516           d->expanded = FALSE;
02517         }
02518       }
02519       for(ac=wchmm->ac[next_node];ac;ac=ac->next) {
02520         for(j=0;j<ac->n;j++) {
02521           propagate_token(d, ac->arc[j], tmpsum + ac->a[j], d->wordend_best_tre, last_word, ngram_score_cache);
02522           if (d->expanded) {
02523             d->expanded = FALSE;
02524           }
02525         }
02526       }
02527       
02528     } else {
02529       propagate_token(d, next_node, tmpsum, d->wordend_best_tre, last_word, ngram_score_cache);
02530       if (d->expanded) {
02531         d->expanded = FALSE;
02532       }
02533     }
02534 
02535   }
02536 }
02537 
02538 #endif /* UNIGRAM_FACTORING */
02539 
02540 
02582 boolean
02583 get_back_trellis_proceed(int t, HTK_Param *param, RecogProcess *r, boolean final_for_multipath)
02584 {
02585   /* local static work area for get_back_trellis_proceed() */
02586   /* these are local work area and need not to be kept for another call */
02587   TRELLIS_ATOM *tre; 
02588   int node; 
02589   int lmtype, lmvar;
02590 
02591   WCHMM_INFO *wchmm;
02592   FSBeam *d;
02593   int j;
02594   TOKEN2  *tk;
02595 
02596   /* local copied variables */
02597   int tn, tl;
02598 
02599   /* store pointer to local for rapid access */
02600   wchmm = r->wchmm;
02601   d = &(r->pass1);
02602   
02603 
02604   lmtype = r->lmtype;
02605   lmvar  = r->lmvar;
02606 
02607   /*********************/
02608   /* 1. 初期化         */
02609   /*    initialization */
02610   /*********************/
02611 
02612   /* tl と tn を入れ替えて作業領域を切り替え */
02613   /* tl (= 直前の tn) は直前フレームの結果を持つ */
02614   /* swap tl and tn to switch work buffer */
02615   /* tl (= last tn) holds result of the previous frame */
02616   d->tl = d->tn;
02617   if (d->tn == 0) d->tn = 1; else d->tn = 0;
02618   
02619   /* store locally for rapid access */
02620   tl = d->tl;
02621   tn = d->tn;
02622 
02623 #ifdef UNIGRAM_FACTORING
02624   /* 1-gram factoring では単語先頭での言語確率が一定で直前単語に依存しない
02625      ため,単語間 Viterbi において選ばれる直前単語は,次単語によらず共通である. 
02626      よって単語終端からfactoring値のある単語先頭への遷移は1つにまとめられる. 
02627      ただし,木から独立した単語については, 単語先頭で履歴に依存した2-gramが
02628      与えられるため, 最尤の単語間 Viterbi パスは次単語ごとに異なる. 
02629      よってそれらについてはまとめずに別に計算する */
02630   /* In 1-gram factoring, the language score on the word-head node is constant
02631      and independent of the previous word.  So, the same word hypothesis will
02632      be selected as the best previous word at the inter-word Viterbi
02633      processing.  So, in inter-word processing, we can (1) select only
02634      the best word-end hypothesis, and then (2) process viterbi from the node
02635      to each word-head node.  On the other hand, the isolated words,
02636      i.e. words not sharing any node with other word, has unique word-head
02637      node and the true 2-gram language score is determined at the top node.
02638      In such case the best word hypothesis prior to each node will differ
02639      according to the language scores.  So we have to deal such words separately. */
02640   /* initialize max value to delect best word-end hypothesis */
02641   if (lmtype == LM_PROB) {
02642     d->wordend_best_score = LOG_ZERO;
02643   }
02644 #endif
02645 
02646 #ifdef DEBUG
02647   /* debug */
02648   /* node_check_token(d, tl); */
02649 #endif
02650 
02651   /* トークンバッファを初期化: 直前フレームで使われた部分だけクリアすればよい */
02652   /* initialize token buffer: for speedup, only ones used in the last call will be cleared */
02653   clear_tokens(d, tl);
02654 
02655   /**************************/
02656   /* 2. Viterbi計算         */
02657   /*    Viterbi computation */
02658   /**************************/
02659   /* 直前フレームからこのフレームへの Viterbi 計算を行なう */
02660   /* tindex[tl][n_start..n_end] に直前フレーム上位ノードのIDが格納されている */
02661   /* do one viterbi computation from last frame to this frame */
02662   /* tindex[tl][n_start..n_end] holds IDs of survived nodes in last frame */
02663 
02664   if (wchmm->hmminfo->multipath) {
02665     /*********************************/
02666     /* MULTIPATH MODE */
02667     /*********************************/
02668 
02669     for (j = d->n_start; j <= d->n_end; j++) {
02670       /* tk: 対象トークン  node: そのトークンを持つ木構造化辞書ノードID */
02671       /* tk: token data  node: lexicon tree node ID that holds the 'tk' */
02672       tk = &(d->tlist[tl][d->tindex[tl][j]]);
02673       if (tk->score <= LOG_ZERO) continue; /* invalid node */
02674       node = tk->node;
02675       /*********************************/
02676       /* 2.1. 単語内遷移               */
02677       /*      word-internal transition */
02678       /*********************************/
02679       beam_intra_word(wchmm, d, &tk, j);
02680     }
02681     /*******************************************************/
02682     /* 2.2. スコアでトークンをソートしビーム幅分の上位を決定 */
02683     /*    sort tokens by score up to beam width            */
02684     /*******************************************************/
02685     sort_token_no_order(d, r->trellis_beam_width, &(d->n_start), &(d->n_end));
02686   
02687     /*************************/
02688     /* 2.3. 単語間Viterbi計算  */
02689     /*    cross-word viterbi */
02690     /*************************/
02691     for(j = d->n_start; j <= d->n_end; j++) {
02692       tk = &(d->tlist[tn][d->tindex[tn][j]]);
02693       node = tk->node;
02694       
02695       /* 遷移元ノードが単語終端ならば */
02696       /* if source node is end state of a word, */
02697       if (wchmm->stend[node] != WORD_INVALID) {
02698 
02699         /**************************/
02700         /* 2.4. トレリス単語保存  */
02701         /*      save trellis word */
02702         /**************************/
02703 #ifdef SPSEGMENT_NAIST
02704         if (r->config->successive.enabled && !d->after_trigger) {
02705           tre = tk->last_tre;   /* dummy */
02706         } else {
02707           tre = save_trellis(r->backtrellis, wchmm, tk, t, final_for_multipath);
02708         }
02709 #else
02710         tre = save_trellis(r->backtrellis, wchmm, tk, t, final_for_multipath);
02711 #endif
02712         /* 最終フレームであればここまで:遷移はさせない */
02713         /* If this is a final frame, does not do cross-word transition */
02714         if (final_for_multipath) continue;
02715         /* 単語認識モードでは単語間遷移は必要ない */
02716         if (lmvar == LM_DFA_WORD) continue;
02717 
02718         /******************************/
02719         /* 2.5. 単語間遷移            */
02720         /*      cross-word transition */
02721         /******************************/
02722 
02723 #ifdef UNIGRAM_FACTORING
02724         /* ここで処理されるのは isolated words のみ,
02725            shared nodes はまとめてこのループの外で計算する */
02726         /* Only the isolated words will be processed here.
02727            The shared nodes with constant factoring values will be computed
02728            after this loop */
02729 #endif
02730         beam_inter_word(wchmm, d, &tk, tre, j);
02731 
02732       } /* end of cross-word processing */
02733     
02734     } /* end of main viterbi loop */
02735 
02736 
02737 
02738   } else {
02739 
02740     /*********************************/
02741     /* NORMAL MODE */
02742     /*********************************/
02743 
02744     for (j = d->n_start; j <= d->n_end; j++) {
02745       /* tk: 対象トークン  node: そのトークンを持つ木構造化辞書ノードID */
02746       /* tk: token data  node: lexicon tree node ID that holds the 'tk' */
02747       tk = &(d->tlist[tl][d->tindex[tl][j]]);
02748       if (tk->score <= LOG_ZERO) continue; /* invalid node */
02749       node = tk->node;
02750       
02751       /*********************************/
02752       /* 2.1. 単語内遷移               */
02753       /*      word-internal transition */
02754       /*********************************/
02755       beam_intra_word(wchmm, d, &tk, j);
02756 
02757       /* 遷移元ノードが単語終端ならば */
02758       /* if source node is end state of a word, */
02759       if (wchmm->stend[node] != WORD_INVALID) {
02760         
02761         /**************************/
02762         /* 2.2. トレリス単語保存  */
02763         /*      save trellis word */
02764         /**************************/
02765 #ifdef SPSEGMENT_NAIST
02766         if (r->config->successive.enabled && !d->after_trigger) {
02767           tre = tk->last_tre;   /* dummy */
02768         } else {
02769           tre = save_trellis(r->backtrellis, wchmm, tk, t, final_for_multipath);
02770         }
02771 #else
02772         tre = save_trellis(r->backtrellis, wchmm, tk, t, final_for_multipath);
02773 #endif
02774         /* 単語認識モードでは単語間遷移は必要ない */
02775         if (lmvar == LM_DFA_WORD) continue;
02776 
02777         /******************************/
02778         /* 2.3. 単語間遷移            */
02779         /*      cross-word transition */
02780         /******************************/
02781         
02782 #ifdef UNIGRAM_FACTORING
02783         /* ここで処理されるのは isolated words のみ,
02784            shared nodes はまとめてこのループの外で計算する */
02785         /* Only the isolated words will be processed here.
02786            The shared nodes with constant factoring values will be computed
02787            after this loop */
02788 #endif
02789 
02790         beam_inter_word(wchmm, d, &tk, tre, j);
02791 
02792       } /* end of cross-word processing */
02793       
02794     } /* end of main viterbi loop */
02795 
02796 
02797   }
02798 
02799 #ifdef UNIGRAM_FACTORING
02800 
02801   if (lmtype == LM_PROB) {
02802 
02803     /***********************************************************/
02804     /* 2.x 単語終端からfactoring付き単語先頭への遷移 ***********/
02805     /*    transition from wordend to shared (factorized) nodes */
02806     /***********************************************************/
02807     /* d->wordend_best_* holds the best word ends at this frame. */
02808     if (d->wordend_best_score > LOG_ZERO) {
02809       beam_inter_word_factoring(wchmm, d);
02810     }
02811   }
02812 #endif /* UNIGRAM_FACTORING */
02813 
02814   /***************************************/
02815   /* 3. 状態の出力確率計算               */
02816   /*    compute state output probability */
02817   /***************************************/
02818 
02819   /* 次段の有効ノードについて出力確率を計算してスコアに加える */
02820   /* compute outprob for new valid (token assigned) nodes and add to score */
02821   /* 今扱っているのが入力の最終フレームの場合出力確率は計算しない */
02822   /* don't calculate the last frame (transition only) */
02823 
02824   if (wchmm->hmminfo->multipath) {
02825     if (! final_for_multipath) {
02826       for (j = 0; j < d->tnum[tn]; j++) {
02827         tk = &(d->tlist[tn][d->tindex[tn][j]]);
02828         /* skip non-output state */
02829         if (wchmm->state[tk->node].out.state == NULL) continue;
02830         tk->score += outprob_style(wchmm, tk->node, tk->last_tre->wid, t, param);
02831       }
02832     }
02833   } else {
02834     for (j = 0; j < d->tnum[tn]; j++) {
02835       tk = &(d->tlist[tn][d->tindex[tn][j]]);
02836       tk->score += outprob_style(wchmm, tk->node, tk->last_tre->wid, t, param);
02837     }
02838   }
02839 
02840   /*******************************************************/
02841   /* 4. スコアでトークンをソートしビーム幅分の上位を決定 */
02842   /*    sort tokens by score up to beam width            */
02843   /*******************************************************/
02844 
02845   /* tlist[tl]を次段のためにリセット */
02846   clear_tlist(d, tl);
02847 
02848   /* ヒープソートを用いてこの段のノード集合から上位(bwidth)個を得ておく */
02849   /* (上位内の順列は必要ない) */
02850   sort_token_no_order(d, r->trellis_beam_width, &(d->n_start), &(d->n_end));
02851   /***************/
02852   /* 5. 終了処理 */
02853   /*    finalize */
02854   /***************/
02855 
02856 #ifdef SPSEGMENT_NAIST
02857   if (!r->config->successive.enabled || d->after_trigger) {
02858 #endif
02859 
02860     /* call frame-wise callback */
02861     r->have_interim = FALSE;
02862     if (t > 0) {
02863       if (r->config->output.progout_flag) {
02864         /* 漸次出力: 現フレームのベストパスを一定時間おきに上書き出力 */
02865         /* progressive result output: output current best path in certain time interval */
02866         if (((t-1) % r->config->output.progout_interval_frame) == 0) {
02867           r->have_interim = TRUE;
02868           bt_current_max(r, t-1);
02869         }
02870       }
02871     }
02872     
02873     /* jlog("DEBUG: %d: %d\n",t,tnum[tn]); */
02874     /* for debug: output current max word */
02875     if (debug2_flag) {
02876       bt_current_max_word(r, t-1);
02877     }
02878 
02879 #ifdef DETERMINE
02880     if (lmvar == LM_DFA_WORD) {
02881       check_determine_word(r, t-1);
02882     }
02883 #endif
02884 
02885 #ifdef SPSEGMENT_NAIST
02886   }
02887 #endif
02888     
02889   /* ビーム内ノード数が 0 になってしまったら,強制終了 */
02890   if (d->tnum[tn] == 0) {
02891     jlog("ERROR: get_back_trellis_proceed: %02d %s: frame %d: no nodes left in beam, now terminates search\n", r->config->id, r->config->name, t);
02892     return(FALSE);
02893   }
02894 
02895   return(TRUE);
02896     
02897 }
02898 
02899 /*************************************************/
02900 /* frame synchronous beam search --- last frame  */
02901 /* フレーム同期ビーム探索の実行 --- 最終フレーム */
02902 /*************************************************/
02903 
02929 void
02930 get_back_trellis_end(HTK_Param *param, RecogProcess *r)
02931 {
02932   WCHMM_INFO *wchmm;
02933   FSBeam *d;
02934   int j;
02935   TOKEN2 *tk;
02936 
02937   wchmm = r->wchmm;
02938   d = &(r->pass1);
02939 
02940   /* 最後にビーム内に残った単語終端トークンを処理する */
02941   /* process the last wordend tokens */
02942 
02943 
02944   if (r->am->hmminfo->multipath) {
02945     /* MULTI-PATH VERSION */
02946 
02947     /* 単語末ノードへの遷移のみ計算 */
02948     /* only arcs to word-end node is calculated */
02949     get_back_trellis_proceed(param->samplenum, param, r, TRUE);
02950 
02951   } else {
02952     /* NORMAL VERSION */
02953 
02954     /* 最後の遷移のあとの単語終端処理を行う */
02955     /* process the word-ends at the last frame */
02956     d->tl = d->tn;
02957     if (d->tn == 0) d->tn = 1; else d->tn = 0;
02958     for (j = d->n_start; j <= d->n_end; j++) {
02959       tk = &(d->tlist[d->tl][d->tindex[d->tl][j]]);
02960       if (wchmm->stend[tk->node] != WORD_INVALID) {
02961         save_trellis(r->backtrellis, wchmm, tk, param->samplenum, TRUE);
02962       }
02963     }
02964 
02965   }
02966     
02967 }
02968 
02969 /*************************/
02970 /* 探索終了 --- 終了処理 */
02971 /* end of search         */
02972 /*************************/
03007 void
03008 finalize_1st_pass(RecogProcess *r, int len)
03009 {
03010   BACKTRELLIS *backtrellis;
03011 
03012   backtrellis = r->backtrellis;
03013  
03014   backtrellis->framelen = len;
03015 
03016   /* 単語トレリス(backtrellis) を整理: トレリス単語の再配置とソート */
03017   /* re-arrange backtrellis: index them by frame, and sort by word ID */
03018 
03019   bt_relocate_rw(backtrellis);
03020   bt_sort_rw(backtrellis);
03021   if (backtrellis->num == NULL) {
03022     if (backtrellis->framelen > 0) {
03023       jlog("WARNING: %02d %s: input processed, but no survived word found\n", r->config->id, r->config->name);
03024     }
03025     /* reognition failed */
03026     r->result.status = J_RESULT_STATUS_FAIL;
03027     return;
03028   }
03029 
03030   /* 第1パスのベストパスを結果に格納する */
03031   /* store 1st pass result (best hypothesis) to result */
03032   if (r->lmvar == LM_DFA_WORD) {
03033     find_1pass_result_word(len, r);
03034   } else {
03035     find_1pass_result(len, r);
03036   }
03037 }
03038 
03053 void
03054 fsbeam_free(FSBeam *d)
03055 {
03056   free_nodes(d);
03057   if (d->pausemodelnames != NULL) {
03058     free(d->pausemodelnames);
03059     free(d->pausemodel);
03060   }
03061 }
03062 
03063 /* end of file */

Juliusに対してThu Jul 23 12:16:22 2009に生成されました。  doxygen 1.5.1