Julius: julius/ngram_decode.c ソースファイル

00001 
00041 /*
00042  * Copyright (c) 1991-2006 Kawahara Lab., Kyoto University
00043  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00044  * Copyright (c) 2005-2006 Julius project team, Nagoya Institute of Technology
00045  * All rights reserved
00046  */
00047    
00048 #include <julius.h>
00049 
00050 #ifdef USE_NGRAM
00051 
00070 static int
00071 compare_nw(NEXTWORD **a, NEXTWORD **b)
00072 {
00073   if ((*a)->id > (*b)->id) return 1;
00074   if ((*a)->id < (*b)->id) return -1;
00075   return 0;
00076 }
00077 
00099 /* find next word candiate whose id 'w' */
00100 static NEXTWORD *
00101 search_nw(NEXTWORD **nw, WORD_ID w, int num)
00102 {
00103   int left,right,mid;
00104   NEXTWORD *tmp;
00105 
00106   if (num == 0) return NULL;
00107   left = 0;
00108   right = num - 1;
00109   while (left < right) {
00110     mid = (left + right) / 2;
00111     if ((nw[mid])->id < w) {
00112       left = mid + 1;
00113     } else {
00114       right = mid;
00115     }
00116   }
00117   tmp = nw[left];
00118   if (tmp->id == w) {
00119     return tmp;
00120   } else {
00121     return NULL;
00122   }
00123 }
00124 
00125 /*********** transparent words handling on 2nd pass **********/
00126 static WORD_ID cnword[2];       
00127 static int cnnum;               
00128 static int last_trans;          
00129 
00146 static void
00147 set_word_context(WORD_ID *cseq, int n, WORD_INFO *winfo)
00148 {
00149   int i;
00150 
00151   cnnum = 0;
00152   last_trans = 0;
00153   for(i=n-1;i>=0;i--) {
00154     if (! winfo->is_transparent[cseq[i]]) {
00155       cnword[cnnum++] = winfo->wton[cseq[i]];
00156       if (cnnum >= 2) break;
00157     } else {
00158       last_trans++;
00159     }
00160   }
00161 }
00162 
00163 /*************************************************************/
00164 
00165 /* lookup survived words on specified frame in backtrellis, 
00166    compute their N-gram probabilities, and add them to NEXTWORD data */
00204 static int
00205 pick_backtrellis_words(BACKTRELLIS *bt, WORD_INFO *winfo, NGRAM_INFO *ngram, NEXTWORD **nw, int oldnum, NODE *hypo, short t)
00206 {
00207   int i;
00208   WORD_ID w;
00209   LOGPROB rawscore;
00210 #ifdef WPAIR
00211   int w_old = WORD_INVALID;
00212 #endif
00213   int num;
00214 
00215   num = oldnum;
00216   /* set word context to cnword[0], cnword[1] */
00217   set_word_context(hypo->seq, hypo->seqnum, winfo);
00218   /* lookup survived words in backtrellis on time frame 't' */
00219   for (i=0;i<bt->num[t];i++) {
00220     w = (bt->rw[t][i])->wid;
00221 #ifdef WORD_GRAPH
00222     /* only words on the word graphs are expanded */
00223     if (!(bt->rw[t][i])->within_wordgraph) continue;
00224 #endif /* not WORD_GRAPH */
00225 #ifdef WPAIR
00226     /* some word have same word ID with different previous word, so
00227        only one will be opened (best word will be selected later
00228        by next_word() */
00229     if (w == w_old) continue;   /* backtrellis is sorted by word ID */
00230     else w_old = w;
00231 #endif /* WPAIR */
00232     /* skip if already exist */
00233     if (search_nw(nw, w, oldnum) != NULL) continue;
00234     switch(cnnum) {             /* length of valid context */
00235     case 0:                     /* unigram */
00236       rawscore = uni_prob(ngram, winfo->wton[w]);
00237       break;
00238     case 1:                     /* bigram */
00239       rawscore = bi_prob_rl(ngram, winfo->wton[w], cnword[0]);
00240       break;
00241     default:                    /* trigram */
00242       rawscore = tri_prob_rl(ngram, winfo->wton[w], cnword[0], cnword[1]);
00243       break;
00244     }
00245 #ifdef CLASS_NGRAM
00246     rawscore += winfo->cprob[w];
00247 #endif
00248     nw[num]->tre   = bt->rw[t][i];
00249     nw[num]->id    = w;
00250     nw[num]->lscore = lm_weight2 * rawscore + lm_penalty2;
00251     if (winfo->is_transparent[w]) {
00252       /*nw[num]->lscore -= (LOGPROB)last_trans * TRANS_RENZOKU_PENALTY;*/
00253       if (winfo->is_transparent[hypo->seq[hypo->seqnum-1]]) {
00254         nw[num]->lscore += lm_penalty_trans;
00255       }
00256     }
00257     
00258     /* j_printf("%d: %s added\n", num, winfo->wname[nw[num]->id]); */
00259     num++;
00260   }
00261   return num;
00262 }
00263 
00264 /* Look for survived backtrellis words near the specified frame, and
00265    make NEXTWORD list.
00266    Words in frame [tm-lookup_range..tm+lookup_range-1] will be picked up.
00267    If a word exist in several frames, only one near the center frame
00268    will be taken: the true connection point will be determined later at
00269    next_word() */
00308 int
00309 get_backtrellis_words(BACKTRELLIS *bt, WORD_INFO *winfo, NGRAM_INFO *ngram, NEXTWORD **nw, NODE *hypo, short tm, short t_end)
00310 {
00311   int num = 0;
00312   int t, t_step;
00313   int oldnum=0;
00314 
00315   if (tm < 0) return(0);
00316 
00317 #ifdef PREFER_CENTER_ON_TRELLIS_LOOKUP
00318   /* fix for 3.2 (01/10/18 by ri) */
00319   /* before and after (one near center frame has high priority) */
00320   for (t_step = 0; t_step < lookup_range; t_step++) {
00321     /* before or center */
00322     t = tm - t_step;
00323     if (t < 0 || t > bt->framelen - 1 || t >= t_end) continue;
00324     num = pick_backtrellis_words(bt, winfo, ngram, nw, oldnum, hypo, t);
00325     if (num > oldnum) {
00326       qsort(nw, num, sizeof(NEXTWORD *),
00327             (int (*)(const void *,const void *))compare_nw);
00328       oldnum = num;
00329     }
00330     if (t_step == 0) continue;  /* center */
00331     /* after */
00332     t = tm + t_step;
00333     if (t < 0 || t > bt->framelen - 1 || t >= t_end) continue;
00334     num = pick_backtrellis_words(bt, winfo, ngram, nw, oldnum, hypo, t);
00335     if (num > oldnum) {
00336       qsort(nw, num, sizeof(NEXTWORD *),
00337             (int (*)(const void *,const void *))compare_nw);
00338       oldnum = num;
00339     }
00340   }
00341 
00342 #else
00343 
00344   /* before the center frame */
00345   for(t = tm; t >= tm - lookup_range; t--) {
00346     if (t < 0) break;
00347     num = pick_backtrellis_words(bt, winfo, ngram, nw, oldnum, hypo, t);
00348     if (num > oldnum) {
00349       qsort(nw, num, sizeof(NEXTWORD *),
00350             (int (*)(const void *,const void *))compare_nw);
00351       oldnum = num;
00352     }
00353   }
00354   /* after the center frame */
00355   for(t = tm + 1; t < tm + lookup_range; t++) {
00356     if (t > bt->framelen - 1) break;
00357     if (t >= t_end) break;
00358     num = pick_backtrellis_words(bt, winfo, ngram, nw, oldnum, hypo, t);
00359     if (num > oldnum) {
00360       qsort(nw, num, sizeof(NEXTWORD *),
00361             (int (*)(const void *,const void *))compare_nw);
00362       oldnum = num;
00363     }
00364   }
00365 #endif
00366 
00367   return num;
00368 }
00369 
00390 int
00391 limit_nw(NEXTWORD **nw, NODE *hypo, int num)
00392 {
00393   int src,dst;
00394   int newnum;
00395 
00396   /* <s>からは何も展開しない */
00397   /* no hypothesis will be generated after "<s>" */
00398   if (hypo->seq[hypo->seqnum-1] == winfo->head_silwid) {
00399     return(0);
00400   }
00401 
00402   dst = 0;
00403   for (src=0; src<num; src++) {
00404     if (nw[src]->id == winfo->tail_silwid) {
00405       /* </s> は展開しない */
00406       /* do not expand </s> (it only appears at start) */
00407       continue;
00408     }
00409 #ifdef FIX_35_INHIBIT_SAME_WORD_EXPANSION
00410     /* 直前単語と同じトレリス単語は展開しない */
00411     /* inhibit expanding the exactly the same trellis word twice */
00412     if (nw[src]->tre == hypo->tre) continue;
00413 #endif
00414     
00415     if (src != dst) memcpy(nw[dst], nw[src], sizeof(NEXTWORD));
00416     dst++;
00417   }
00418   newnum = dst;
00419 
00420   return newnum;
00421 }
00422         
00423 
00424 
00425 /* 最初の単語群を返す．返り値: 単語数 (-1 on error) */
00426 /* return initial word set.  return value: num of words (-1 on error) */
00427 
00461 int
00462 ngram_firstwords(NEXTWORD **nw, int peseqlen, int maxnw, WORD_INFO *winfo, BACKTRELLIS *bt)
00463 {
00464 #ifdef SP_BREAK_CURRENT_FRAME
00465   if (rest_param != NULL) {
00466     /* 初期仮説は 最終フレームに残った単語トレリス上の最尤単語 */
00467     /* the initial hypothesis is the best word survived on the last frame of
00468        the segment */
00469     nw[0]->id = sp_break_2_begin_word;
00470   } else {
00471     /* 最終セグメント: 初期仮説は 単語の末尾の無音単語(=winfo->tail_silwid) */
00472     /* we are in the last of sentence: initial hypothesis is word-end silence word */
00473     nw[0]->id = winfo->tail_silwid;
00474   }
00475 #else
00476   /* initial hypothesis is word-end silence word */
00477   nw[0]->id = winfo->tail_silwid;
00478 #endif
00479 #ifdef FIX_PENALTY
00480   nw[0]->lscore = 0.0;
00481 #else
00482   nw[0]->lscore = lm_penalty2;
00483 #endif
00484 
00485   return 1;                     /* number of words = 1 */
00486 }
00487 
00488 /* ある仮説の次の接続単語群を返す．帰り値: 単語数 (-1 on error) */
00489 /* return next word set from the hypothesis.  return value:
00490    num of words (-1 on error) */
00530 int
00531 ngram_nextwords(
00532                 NODE *hypo,
00533                 NEXTWORD **nw,
00534                 int maxnw,      /* hypo: source */
00535                 NGRAM_INFO *ngram, /* N-gram info */
00536                 WORD_INFO *winfo, /* word dictionary */
00537                 BACKTRELLIS *bt) /* backtrellis info */
00538 {
00539   int num, num2;
00540 
00541   if (hypo->seqnum == 0) {
00542     j_error("gs_get_next_words: hypo contains no word\n");
00543   }
00544 
00545   /* 仮説の推定終端時刻において backtrellis内に残っている単語を得る */
00546   /* get survived words on backtrellis at the estimated end frame */
00547   num = get_backtrellis_words(bt, winfo, ngram, nw, hypo, hypo->estimated_next_t, hypo->bestt);
00548 
00549   if (debug2_flag) j_printf("%d",num);
00550 
00551   /* 展開できない単語をチェックして外す */
00552   /* exclude unallowed words */
00553   num2 = limit_nw(nw, hypo, num);
00554 
00555   if (debug2_flag) j_printf("-%d=%d unfolded\n",num-num2,num2);
00556 
00557   return(num2);
00558 }
00559 
00560 /* return if the hypothesis is "acceptable" */
00583 boolean
00584 ngram_acceptable(NODE *hypo, WORD_INFO *winfo)
00585 {
00586   if (
00587 #ifdef SP_BREAK_CURRENT_FRAME
00588       /* 最後の仮説が第１パス最尤仮説の最初の単語と一致しなければならない */
00589       /* the last word should be equal to the first word on the best hypothesis on 1st pass */
00590       hypo->seq[hypo->seqnum-1] == sp_break_2_end_word
00591 #else
00592       /* 最後の仮説が文頭無音単語でなければならない */
00593       /* the last word should be head silence word */
00594       hypo->seq[hypo->seqnum-1] == winfo->head_silwid
00595 #endif
00596       ) {
00597     return TRUE;
00598   } else {
00599     return FALSE;
00600   }
00601 }
00602 
00603 #endif /* USE_NGRAM */