00001
00041
00042
00043
00044
00045
00046
00047
00048 #include <julius/julius.h>
00049
00068 static int
00069 compare_nw(NEXTWORD **a, NEXTWORD **b)
00070 {
00071 if ((*a)->id > (*b)->id) return 1;
00072 if ((*a)->id < (*b)->id) return -1;
00073 return 0;
00074 }
00075
00097
00098 static NEXTWORD *
00099 search_nw(NEXTWORD **nw, WORD_ID w, int num)
00100 {
00101 int left,right,mid;
00102 NEXTWORD *tmp;
00103
00104 if (num == 0) return NULL;
00105 left = 0;
00106 right = num - 1;
00107 while (left < right) {
00108 mid = (left + right) / 2;
00109 if ((nw[mid])->id < w) {
00110 left = mid + 1;
00111 } else {
00112 right = mid;
00113 }
00114 }
00115 tmp = nw[left];
00116 if (tmp->id == w) {
00117 return tmp;
00118 } else {
00119 return NULL;
00120 }
00121 }
00122
00138 static LOGPROB
00139 ngram_forw2back(NGRAM_INFO *ngram, WORD_ID *w, int wlen)
00140 {
00141 int i;
00142 LOGPROB p1, p2;
00143
00144 p1 = 0.0;
00145 for(i = 1; i < ngram->n; i++) {
00146 if (i >= wlen) break;
00147 p1 += ngram_prob(ngram, i, &(w[1]));
00148 }
00149 p2 = 0.0;
00150 for(i = 0; i < ngram->n; i++) {
00151 if (i >= wlen) break;
00152 p2 += ngram_prob(ngram, i+1, w);
00153 }
00154
00155 return(p2 - p1);
00156 }
00157
00191 static int
00192 pick_backtrellis_words(RecogProcess *r, NEXTWORD **nw, int oldnum, NODE *hypo, short t)
00193 {
00194 int i;
00195 WORD_ID w;
00196 LOGPROB rawscore;
00197 #ifdef WPAIR
00198 int w_old = WORD_INVALID;
00199 #endif
00200 int num;
00201 static WORD_ID cnword[MAX_N];
00202 static WORD_ID cnwordrev[MAX_N];
00203 int cnnum;
00204 int last_trans;
00205
00206 BACKTRELLIS *bt;
00207 WORD_INFO *winfo;
00208 NGRAM_INFO *ngram;
00209 LOGPROB lm_weight2, lm_penalty2, lm_penalty_trans;
00210
00211 num = oldnum;
00212 bt = r->backtrellis;
00213 winfo = r->lm->winfo;
00214 ngram = r->lm->ngram;
00215 lm_weight2 = r->config->lmp.lm_weight2;
00216 lm_penalty2 = r->config->lmp.lm_penalty2;
00217 lm_penalty_trans = r->config->lmp.lm_penalty_trans;
00218
00219
00220 if (ngram) {
00221 cnnum = 0;
00222 last_trans = 0;
00223 for(i=hypo->seqnum-1;i>=0;i--) {
00224 if (! winfo->is_transparent[hypo->seq[i]]) {
00225 cnword[cnnum+1] = hypo->seq[i];
00226 cnnum++;
00227 if (cnnum >= ngram->n - 1) break;
00228 } else {
00229 last_trans++;
00230 }
00231 }
00232 if (ngram->dir == DIR_RL) {
00233 for(i=0;i<cnnum;i++) {
00234 cnwordrev[cnnum-1-i] = cnword[i+1];
00235 }
00236 }
00237
00238 if (ngram->dir == DIR_RL) {
00239 for(i=0;i<cnnum;i++) cnwordrev[i] = winfo->wton[cnwordrev[i]];
00240 } else {
00241 for(i=0;i<cnnum;i++) cnword[i+1] = winfo->wton[cnword[i+1]];
00242 }
00243 }
00244
00245
00246 for (i=0;i<bt->num[t];i++) {
00247 w = (bt->rw[t][i])->wid;
00248 #ifdef WORD_GRAPH
00249
00250 if (!(bt->rw[t][i])->within_wordgraph) continue;
00251 #endif
00252 #ifdef WPAIR
00253
00254
00255
00256 if (w == w_old) continue;
00257 else w_old = w;
00258 #endif
00259
00260 if (search_nw(nw, w, oldnum) != NULL) continue;
00261
00262
00263 if (ngram) {
00264
00265 if (ngram->dir == DIR_RL) {
00266
00267 cnwordrev[cnnum] = winfo->wton[w];
00268 rawscore = ngram_prob(ngram, cnnum + 1, cnwordrev);
00269 } else {
00270 cnword[0] = winfo->wton[w];
00271 rawscore = ngram_forw2back(ngram, cnword, cnnum + 1);
00272 }
00273 #ifdef CLASS_NGRAM
00274 rawscore += winfo->cprob[w];
00275 #endif
00276 }
00277 if (r->lmvar == LM_NGRAM_USER) {
00278
00279
00280 rawscore = (*(r->lm->lmfunc.lmprob))(winfo, hypo->seq, hypo->seqnum, w, rawscore);
00281 }
00282
00283 nw[num]->tre = bt->rw[t][i];
00284 nw[num]->id = w;
00285 nw[num]->lscore = rawscore * lm_weight2 + lm_penalty2;
00286 if (winfo->is_transparent[w]) {
00287
00288 if (winfo->is_transparent[hypo->seq[hypo->seqnum-1]]) {
00289 nw[num]->lscore += lm_penalty_trans;
00290 }
00291 }
00292
00293
00294 num++;
00295 }
00296
00297 return num;
00298 }
00299
00333 static int
00334 get_backtrellis_words(RecogProcess *r, NEXTWORD **nw, NODE *hypo, short tm, short t_end)
00335 {
00336 int num = 0;
00337 int t, t_step;
00338 int oldnum=0;
00339
00340 BACKTRELLIS *bt;
00341 int lookup_range;
00342
00343 if (tm < 0) return(0);
00344
00345 bt = r->backtrellis;
00346 lookup_range = r->config->pass2.lookup_range;
00347
00348 #ifdef PREFER_CENTER_ON_TRELLIS_LOOKUP
00349
00350
00351 for (t_step = 0; t_step < lookup_range; t_step++) {
00352
00353 t = tm - t_step;
00354 if (t < 0 || t > bt->framelen - 1 || t >= t_end) continue;
00355 num = pick_backtrellis_words(r, nw, oldnum, hypo, t);
00356 if (num > oldnum) {
00357 qsort(nw, num, sizeof(NEXTWORD *),
00358 (int (*)(const void *,const void *))compare_nw);
00359 oldnum = num;
00360 }
00361 if (t_step == 0) continue;
00362
00363 t = tm + t_step;
00364 if (t < 0 || t > bt->framelen - 1 || t >= t_end) continue;
00365 num = pick_backtrellis_words(r, nw, oldnum, hypo, t);
00366 if (num > oldnum) {
00367 qsort(nw, num, sizeof(NEXTWORD *),
00368 (int (*)(const void *,const void *))compare_nw);
00369 oldnum = num;
00370 }
00371 }
00372
00373 #else
00374
00375
00376 for(t = tm; t >= tm - lookup_range; t--) {
00377 if (t < 0) break;
00378 num = pick_backtrellis_words(r, nw, oldnum, hypo, t);
00379 if (num > oldnum) {
00380 qsort(nw, num, sizeof(NEXTWORD *),
00381 (int (*)(const void *,const void *))compare_nw);
00382 oldnum = num;
00383 }
00384 }
00385
00386 for(t = tm + 1; t < tm + lookup_range; t++) {
00387 if (t > bt->framelen - 1) break;
00388 if (t >= t_end) break;
00389 num = pick_backtrellis_words(r, nw, oldnum, hypo, t);
00390 if (num > oldnum) {
00391 qsort(nw, num, sizeof(NEXTWORD *),
00392 (int (*)(const void *,const void *))compare_nw);
00393 oldnum = num;
00394 }
00395 }
00396 #endif
00397
00398 return num;
00399 }
00400
00427 static int
00428 limit_nw(NEXTWORD **nw, NODE *hypo, int num, WORD_INFO *winfo)
00429 {
00430 int src,dst;
00431 int newnum;
00432
00433
00434
00435 if (hypo->seq[hypo->seqnum-1] == winfo->head_silwid) {
00436 return(0);
00437 }
00438
00439 dst = 0;
00440 for (src=0; src<num; src++) {
00441 if (nw[src]->id == winfo->tail_silwid) {
00442
00443
00444 continue;
00445 }
00446 #ifdef FIX_35_INHIBIT_SAME_WORD_EXPANSION
00447
00448
00449 if (nw[src]->tre == hypo->tre) continue;
00450 #endif
00451
00452 if (src != dst) memcpy(nw[dst], nw[src], sizeof(NEXTWORD));
00453 dst++;
00454 }
00455 newnum = dst;
00456
00457 return newnum;
00458 }
00459
00460
00495 int
00496 ngram_firstwords(NEXTWORD **nw, int peseqlen, int maxnw, RecogProcess *r)
00497 {
00498
00499 if (r->config->successive.enabled) {
00500
00501 if (r->sp_break_2_begin_word != WORD_INVALID) {
00502
00503
00504
00505 nw[0]->id = r->sp_break_2_begin_word;
00506 } else {
00507
00508
00509 nw[0]->id = r->lm->winfo->tail_silwid;
00510 }
00511 } else {
00512
00513 nw[0]->id = r->lm->winfo->tail_silwid;
00514 }
00515
00516 #ifdef FIX_PENALTY
00517 nw[0]->lscore = 0.0;
00518 #else
00519 nw[0]->lscore = r->config->lmp.lm_penalty2;
00520 #endif
00521
00522 return 1;
00523 }
00524
00562 int
00563 ngram_nextwords(NODE *hypo, NEXTWORD **nw, int maxnw, RecogProcess *r)
00564 {
00565 int num, num2;
00566
00567 if (hypo->seqnum == 0) {
00568 j_internal_error("ngram_nextwords: hypo contains no word\n");
00569 }
00570
00571
00572
00573 num = get_backtrellis_words(r, nw, hypo, hypo->estimated_next_t, hypo->bestt);
00574
00575
00576
00577 num2 = limit_nw(nw, hypo, num, r->lm->winfo);
00578
00579 if (debug2_flag) jlog("DEBUG: ngram_decode: %d-%d=%d unfolded\n",num, num-num2,num2);
00580
00581 return(num2);
00582 }
00583
00612 boolean
00613 ngram_acceptable(NODE *hypo, RecogProcess *r)
00614 {
00615
00616 if (r->config->successive.enabled) {
00617
00618
00619 if (hypo->seq[hypo->seqnum-1] == r->sp_break_2_end_word) {
00620 return TRUE;
00621 }
00622 } else {
00623
00624
00625 if (hypo->seq[hypo->seqnum-1] == r->lm->winfo->head_silwid) {
00626 return TRUE;
00627 }
00628 }
00629 return FALSE;
00630 }
00631
00632