Julius: julius/ngram_decode.c Source File

00001 
00041 /*
00042  * Copyright (c) 1991-2006 Kawahara Lab., Kyoto University
00043  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00044  * Copyright (c) 2005-2006 Julius project team, Nagoya Institute of Technology, Nagoya Institute of Technology
00045  * All rights reserved
00046  */
00047    
00048 #include <julius.h>
00049 
00050 #ifdef USE_NGRAM
00051 
00070 static int
00071 compare_nw(NEXTWORD **a, NEXTWORD **b)
00072 {
00073   if ((*a)->id > (*b)->id) return 1;
00074   if ((*a)->id < (*b)->id) return -1;
00075   return 0;
00076 }
00077 
00099 /* find next word candiate whose id 'w' */
00100 static NEXTWORD *
00101 search_nw(NEXTWORD **nw, WORD_ID w, int num)
00102 {
00103   int left,right,mid;
00104   NEXTWORD *tmp;
00105 
00106   if (num == 0) return NULL;
00107   left = 0;
00108   right = num - 1;
00109   while (left < right) {
00110     mid = (left + right) / 2;
00111     if ((nw[mid])->id < w) {
00112       left = mid + 1;
00113     } else {
00114       right = mid;
00115     }
00116   }
00117   tmp = nw[left];
00118   if (tmp->id == w) {
00119     return tmp;
00120   } else {
00121     return NULL;
00122   }
00123 }
00124 
00125 /*********** transparent words handling on 2nd pass **********/
00126 static WORD_ID cnword[2];       
00127 static int cnnum;               
00128 static int last_trans;          
00129 
00146 static void
00147 set_word_context(WORD_ID *cseq, int n, WORD_INFO *winfo)
00148 {
00149   int i;
00150 
00151   cnnum = 0;
00152   last_trans = 0;
00153   for(i=n-1;i>=0;i--) {
00154     if (! winfo->is_transparent[cseq[i]]) {
00155       cnword[cnnum++] = winfo->wton[cseq[i]];
00156       if (cnnum >= 2) break;
00157     } else {
00158       last_trans++;
00159     }
00160   }
00161 }
00162 
00163 /*************************************************************/
00164 
00165 /* lookup survived words on specified frame in backtrellis, 
00166    compute their N-gram probabilities, and add them to NEXTWORD data */
00204 static int
00205 pick_backtrellis_words(BACKTRELLIS *bt, WORD_INFO *winfo, NGRAM_INFO *ngram, NEXTWORD **nw, int oldnum, NODE *hypo, short t)
00206 {
00207   int i;
00208   WORD_ID w;
00209   LOGPROB rawscore;
00210 #ifndef WORD_GRAPH
00211 #ifdef WPAIR
00212   int w_old = WORD_INVALID;
00213 #endif
00214 #endif
00215   int num;
00216 
00217   num = oldnum;
00218   /* set word context to cnword[0], cnword[1] */
00219   set_word_context(hypo->seq, hypo->seqnum, winfo);
00220   /* lookup survived words in backtrellis on time frame 't' */
00221   for (i=0;i<bt->num[t];i++) {
00222     w = (bt->rw[t][i])->wid;
00223 #ifdef WORD_GRAPH
00224     /* only words on the word graphs are expanded */
00225     if (!(bt->rw[t][i])->within_wordgraph) continue;
00226     /* word connection is restricted by the graph structure */
00227     if (w != ((hypo->tre)->last_tre)->wid) continue;
00228 #else /* not WORD_GRAPH */
00229 #ifdef WPAIR
00230     /* some word have same word ID with different previous word, so
00231        only one will be opened (best word will be selected later
00232        by next_word() */
00233     if (w == w_old) continue;   /* backtrellis is sorted by word ID */
00234     else w_old = w;
00235 #endif /* WPAIR */
00236 #endif /* not WORD_GRAPH */
00237     /* skip if already exist */
00238     if (search_nw(nw, w, oldnum) != NULL) continue;
00239     switch(cnnum) {             /* length of valid context */
00240     case 0:                     /* unigram */
00241       rawscore = uni_prob(ngram, winfo->wton[w]);
00242       break;
00243     case 1:                     /* bigram */
00244       rawscore = bi_prob_rl(ngram, winfo->wton[w], cnword[0]);
00245       break;
00246     default:                    /* trigram */
00247       rawscore = tri_prob_rl(ngram, winfo->wton[w], cnword[0], cnword[1]);
00248       break;
00249     }
00250 #ifdef CLASS_NGRAM
00251     rawscore += winfo->cprob[w];
00252 #endif
00253     nw[num]->tre   = bt->rw[t][i];
00254     nw[num]->id    = w;
00255     nw[num]->lscore = lm_weight2 * rawscore + lm_penalty2;
00256     if (winfo->is_transparent[w]) {
00257       /*nw[num]->lscore -= (LOGPROB)last_trans * TRANS_RENZOKU_PENALTY;*/
00258       if (winfo->is_transparent[hypo->seq[hypo->seqnum-1]]) {
00259         nw[num]->lscore += lm_penalty_trans;
00260       }
00261     }
00262     
00263     /* j_printf("%d: %s added\n", num, winfo->wname[nw[num]->id]); */
00264     num++;
00265   }
00266   return num;
00267 }
00268 
00269 /* Look for survived backtrellis words near the specified frame, and
00270    make NEXTWORD list.
00271    Words in frame [tm-lookup_range..tm+lookup_range-1] will be picked up.
00272    If a word exist in several frames, only one near the center frame
00273    will be taken: the true connection point will be determined later at
00274    next_word() */
00313 int
00314 get_backtrellis_words(BACKTRELLIS *bt, WORD_INFO *winfo, NGRAM_INFO *ngram, NEXTWORD **nw, NODE *hypo, short tm, short t_end)
00315 {
00316   int num = 0;
00317   int t, t_step;
00318   int oldnum=0;
00319 
00320   if (tm < 0) return(0);
00321 
00322 #ifdef PREFER_CENTER_ON_TRELLIS_LOOKUP
00323   /* fix for 3.2 (01/10/18 by ri) */
00324   /* before and after (one near center frame has high priority) */
00325   for (t_step = 0; t_step < lookup_range; t_step++) {
00326     /* before or center */
00327     t = tm - t_step;
00328     if (t < 0 || t > bt->framelen - 1 || t >= t_end) continue;
00329     num = pick_backtrellis_words(bt, winfo, ngram, nw, oldnum, hypo, t);
00330     if (num > oldnum) {
00331       qsort(nw, num, sizeof(NEXTWORD *),
00332             (int (*)(const void *,const void *))compare_nw);
00333       oldnum = num;
00334     }
00335     if (t_step == 0) continue;  /* center */
00336     /* after */
00337     t = tm + t_step;
00338     if (t < 0 || t > bt->framelen - 1 || t >= t_end) continue;
00339     num = pick_backtrellis_words(bt, winfo, ngram, nw, oldnum, hypo, t);
00340     if (num > oldnum) {
00341       qsort(nw, num, sizeof(NEXTWORD *),
00342             (int (*)(const void *,const void *))compare_nw);
00343       oldnum = num;
00344     }
00345   }
00346 
00347 #else
00348 
00349   /* before the center frame */
00350   for(t = tm; t >= tm - lookup_range; t--) {
00351     if (t < 0) break;
00352     num = pick_backtrellis_words(bt, winfo, ngram, nw, oldnum, hypo, t);
00353     if (num > oldnum) {
00354       qsort(nw, num, sizeof(NEXTWORD *),
00355             (int (*)(const void *,const void *))compare_nw);
00356       oldnum = num;
00357     }
00358   }
00359   /* after the center frame */
00360   for(t = tm + 1; t < tm + lookup_range; t++) {
00361     if (t > bt->framelen - 1) break;
00362     if (t >= t_end) break;
00363     num = pick_backtrellis_words(bt, winfo, ngram, nw, oldnum, hypo, t);
00364     if (num > oldnum) {
00365       qsort(nw, num, sizeof(NEXTWORD *),
00366             (int (*)(const void *,const void *))compare_nw);
00367       oldnum = num;
00368     }
00369   }
00370 #endif
00371 
00372   return num;
00373 }
00374 
00395 int
00396 limit_nw(NEXTWORD **nw, NODE *hypo, int num)
00397 {
00398   int src,dst;
00399   int newnum;
00400 
00401   /* <s>¤«¤é¤Ï²¿¤âÅ¸³«¤·¤Ê¤¤ */
00402   /* no hypothesis will be generated after "<s>" */
00403   if (hypo->seq[hypo->seqnum-1] == winfo->head_silwid) {
00404     return(0);
00405   }
00406 
00407   dst = 0;
00408   for (src=0; src<num; src++) {
00409     if (nw[src]->id == winfo->tail_silwid) {
00410       /* </s> ¤ÏÅ¸³«¤·¤Ê¤¤ */
00411       /* do not expand </s> (it only appears at start) */
00412       continue;
00413     }
00414 #ifdef FIX_35_INHIBIT_SAME_WORD_EXPANSION
00415     /* Ä¾Á°Ã±¸ì¤ÈÆ±¤¸¥È¥ì¥ê¥¹Ã±¸ì¤ÏÅ¸³«¤·¤Ê¤¤ */
00416     /* inhibit expanding the exactly the same trellis word twice */
00417     if (nw[src]->tre == hypo->tre) continue;
00418 #endif
00419     
00420     if (src != dst) memcpy(nw[dst], nw[src], sizeof(NEXTWORD));
00421     dst++;
00422   }
00423   newnum = dst;
00424 
00425   return newnum;
00426 }
00427         
00428 
00429 
00430 /* ºÇ½é¤ÎÃ±¸ì·²¤òÊÖ¤¹¡¥ÊÖ¤êÃÍ: Ã±¸ì¿ô (-1 on error) */
00431 /* return initial word set.  return value: num of words (-1 on error) */
00432 
00466 int
00467 ngram_firstwords(NEXTWORD **nw, int peseqlen, int maxnw, WORD_INFO *winfo, BACKTRELLIS *bt)
00468 {
00469 #ifdef WORD_GRAPH
00470   int last_time;
00471 #endif
00472 
00473 #ifdef SP_BREAK_CURRENT_FRAME
00474   if (rest_param != NULL) {
00475     /* ½é´ü²¾Àâ¤Ï ºÇ½ª¥Õ¥ì¡¼¥à¤Ë»Ä¤Ã¤¿Ã±¸ì¥È¥ì¥ê¥¹¾å¤ÎºÇÌàÃ±¸ì */
00476     /* the initial hypothesis is the best word survived on the last frame of
00477        the segment */
00478     nw[0]->id = sp_break_2_begin_word;
00479   } else {
00480     /* ºÇ½ª¥»¥°¥á¥ó¥È: ½é´ü²¾Àâ¤Ï Ã±¸ì¤ÎËöÈø¤ÎÌµ²»Ã±¸ì(=winfo->tail_silwid) */
00481     /* we are in the last of sentence: initial hypothesis is word-end silence word */
00482     nw[0]->id = winfo->tail_silwid;
00483   }
00484 #else
00485   /* initial hypothesis is word-end silence word */
00486   nw[0]->id = winfo->tail_silwid;
00487 #endif
00488 #ifdef FIX_PENALTY
00489   nw[0]->lscore = 0.0;
00490 #else
00491   nw[0]->lscore = lm_penalty2;
00492 #endif
00493 #ifdef WORD_GRAPH
00494   /* ½é´ü²¾Àâ¤ÎTRELLIS_ATOM¤ò¤³¤³¤Ç·èÄê¤¹¤ë */
00495   /* in word-graph mode, the first trellis atom should be defined here */
00496   for (last_time = peseqlen - 1; last_time >= 0; last_time--) {
00497     nw[0]->tre = bt_binsearch_atom(&backtrellis, last_time, winfo->tail_silwid);
00498     if (nw[0]->tre != NULL) break;
00499   }
00500   if (nw[0]->tre == NULL) {             /* </s> not found */
00501     j_printerr("no wordend left in beam!\n");
00502     j_printerr("beam width too small or input speech too short\n");
00503     return 0;
00504   }
00505 #endif
00506 
00507   return 1;                     /* number of words = 1 */
00508 }
00509 
00510 /* ¤¢¤ë²¾Àâ¤Î¼¡¤ÎÀÜÂ³Ã±¸ì·²¤òÊÖ¤¹¡¥µ¢¤êÃÍ: Ã±¸ì¿ô (-1 on error) */
00511 /* return next word set from the hypothesis.  return value:
00512    num of words (-1 on error) */
00552 int
00553 ngram_nextwords(
00554                 NODE *hypo,
00555                 NEXTWORD **nw,
00556                 int maxnw,      /* hypo: source */
00557                 NGRAM_INFO *ngram, /* N-gram info */
00558                 WORD_INFO *winfo, /* word dictionary */
00559                 BACKTRELLIS *bt) /* backtrellis info */
00560 {
00561   int num, num2;
00562 
00563   if (hypo->seqnum == 0) {
00564     j_error("gs_get_next_words: hypo contains no word\n");
00565   }
00566 
00567   /* ²¾Àâ¤Î¿äÄê½ªÃ¼»þ¹ï¤Ë¤ª¤¤¤Æ backtrellisÆâ¤Ë»Ä¤Ã¤Æ¤¤¤ëÃ±¸ì¤òÆÀ¤ë */
00568   /* get survived words on backtrellis at the estimated end frame */
00569   num = get_backtrellis_words(bt, winfo, ngram, nw, hypo, hypo->estimated_next_t, hypo->bestt);
00570 
00571   if (debug2_flag) j_printf("%d",num);
00572 
00573   /* Å¸³«¤Ç¤¤Ê¤¤Ã±¸ì¤ò¥Á¥§¥Ã¥¯¤·¤Æ³°¤¹ */
00574   /* exclude unallowed words */
00575   num2 = limit_nw(nw, hypo, num);
00576 
00577   if (debug2_flag) j_printf("-%d=%d unfolded\n",num-num2,num2);
00578 
00579   return(num2);
00580 }
00581 
00582 /* return if the hypothesis is "acceptable" */
00605 boolean
00606 ngram_acceptable(NODE *hypo, WORD_INFO *winfo)
00607 {
00608   if (
00609 #ifdef SP_BREAK_CURRENT_FRAME
00610       /* ºÇ¸å¤Î²¾Àâ¤¬Âè£±¥Ñ¥¹ºÇÌà²¾Àâ¤ÎºÇ½é¤ÎÃ±¸ì¤È°ìÃ×¤·¤Ê¤±¤ì¤Ð¤Ê¤é¤Ê¤¤ */
00611       /* the last word should be equal to the first word on the best hypothesis on 1st pass */
00612       hypo->seq[hypo->seqnum-1] == sp_break_2_end_word
00613 #else
00614       /* ºÇ¸å¤Î²¾Àâ¤¬Ê¸Æ¬Ìµ²»Ã±¸ì¤Ç¤Ê¤±¤ì¤Ð¤Ê¤é¤Ê¤¤ */
00615       /* the last word should be head silence word */
00616       hypo->seq[hypo->seqnum-1] == winfo->head_silwid
00617 #endif
00618       ) {
00619     return TRUE;
00620   } else {
00621     return FALSE;
00622   }
00623 }
00624 
00625 #endif /* USE_NGRAM */