00001
00159
00160
00161
00162
00163
00164
00165
00166 #include <julius/julius.h>
00167
00168
00169
00190 static void
00191 add_successor(WCHMM_INFO *wchmm, int node, WORD_ID w)
00192 {
00193 S_CELL *sctmp, *sc;
00194
00195
00196 sctmp=(S_CELL *) mymalloc(sizeof(S_CELL));
00197
00198 sctmp->word = w;
00199
00200 if (wchmm->state[node].scid == 0) {
00201 j_internal_error("add_successor: sclist id not assigned to branch node?\n");
00202 }
00203 sc = wchmm->sclist[wchmm->state[node].scid];
00204 if (sc == NULL || sctmp->word < sc->word) {
00205 sctmp->next = sc;
00206 wchmm->sclist[wchmm->state[node].scid] = sctmp;
00207 } else {
00208 for(;sc;sc=sc->next) {
00209 if (sc->next == NULL || sctmp->word < (sc->next)->word) {
00210 if (sctmp->word == sc->word) break;
00211 sctmp->next = sc->next;
00212 sc->next = sctmp;
00213 break;
00214 }
00215 }
00216 }
00217 }
00218
00239 static boolean
00240 match_successor(WCHMM_INFO *wchmm, int node1, int node2)
00241 {
00242 S_CELL *sc1,*sc2;
00243
00244
00245 if (wchmm->state[node1].scid == 0 || wchmm->state[node2].scid == 0) {
00246 j_internal_error("match_successor: sclist id not assigned to branch node?\n");
00247 }
00248 sc1 = wchmm->sclist[wchmm->state[node1].scid];
00249 sc2 = wchmm->sclist[wchmm->state[node2].scid];
00250 for (;;) {
00251 if (sc1 == NULL || sc2 == NULL) {
00252 if (sc1 == NULL && sc2 == NULL) {
00253 return TRUE;
00254 } else {
00255 return FALSE;
00256 }
00257 } else if (sc1->word != sc2->word) {
00258 return FALSE;
00259 }
00260 sc1 = sc1->next;
00261 sc2 = sc2->next;
00262 }
00263 }
00264
00279 static void
00280 free_successor(WCHMM_INFO *wchmm, int scid)
00281 {
00282 S_CELL *sc;
00283 S_CELL *sctmp;
00284
00285
00286 sc = wchmm->sclist[scid];
00287 while (sc != NULL) {
00288 sctmp = sc;
00289 sc = sc->next;
00290 free(sctmp);
00291 }
00292 }
00293
00308 static void
00309 compaction_successor(WCHMM_INFO *wchmm)
00310 {
00311 int src, dst;
00312
00313 dst = 1;
00314 for(src=1;src<wchmm->scnum;src++) {
00315 if (wchmm->state[wchmm->sclist2node[src]].scid <= 0) {
00316
00317 continue;
00318 }
00319 if (dst != src) {
00320 wchmm->sclist[dst] = wchmm->sclist[src];
00321 wchmm->sclist2node[dst] = wchmm->sclist2node[src];
00322 wchmm->state[wchmm->sclist2node[dst]].scid = dst;
00323 }
00324 dst++;
00325 }
00326 if (debug2_flag) {
00327 jlog("DEBUG: successor list shrinked from %d to %d\n", wchmm->scnum, dst);
00328 }
00329 wchmm->scnum = dst;
00330 }
00331
00346 static void
00347 shrink_successor(WCHMM_INFO *wchmm)
00348 {
00349 if (wchmm->sclist) {
00350 wchmm->sclist = (S_CELL **)myrealloc(wchmm->sclist, sizeof(S_CELL *) * wchmm->scnum);
00351 }
00352 if (wchmm->sclist2node) {
00353 wchmm->sclist2node = (int *)myrealloc(wchmm->sclist2node, sizeof(int) * wchmm->scnum);
00354 }
00355 }
00356
00373 void
00374 make_successor_list(WCHMM_INFO *wchmm)
00375 {
00376 int node;
00377 WORD_ID w;
00378 int i;
00379 boolean *freemark;
00380 int s;
00381
00382 jlog("STAT: make successor lists for factoring\n");
00383
00384
00385
00386 for (node=0;node<wchmm->n;node++) wchmm->state[node].scid = 0;
00387
00388
00389 s = 1;
00390 for (w=0;w<wchmm->winfo->num;w++) {
00391 for (i=0;i<wchmm->winfo->wlen[w];i++) {
00392 if (wchmm->state[wchmm->offset[w][i]].scid == 0) {
00393 wchmm->state[wchmm->offset[w][i]].scid = s;
00394 s++;
00395 }
00396 }
00397 if (wchmm->state[wchmm->wordend[w]].scid == 0) {
00398 wchmm->state[wchmm->wordend[w]].scid = s;
00399 s++;
00400 }
00401 }
00402 wchmm->scnum = s;
00403 if (debug2_flag) {
00404 jlog("DEBUG: initial successor list size = %d\n", wchmm->scnum);
00405 }
00406
00407
00408 wchmm->sclist = (S_CELL **)mymalloc(sizeof(S_CELL *) * wchmm->scnum);
00409 for (i=1;i<wchmm->scnum;i++) wchmm->sclist[i] = NULL;
00410 wchmm->sclist2node = (int *)mymalloc(sizeof(int) * wchmm->scnum);
00411
00412
00413 freemark = (boolean *)mymalloc(sizeof(boolean) * wchmm->scnum);
00414 for (i=1;i<wchmm->scnum;i++) freemark[i] = FALSE;
00415
00416
00417 for (w=0;w<wchmm->winfo->num;w++) {
00418
00419 for (i=0;i<wchmm->winfo->wlen[w];i++) {
00420 wchmm->sclist2node[wchmm->state[wchmm->offset[w][i]].scid] = wchmm->offset[w][i];
00421 add_successor(wchmm, wchmm->offset[w][i], w);
00422 }
00423
00424 wchmm->sclist2node[wchmm->state[wchmm->wordend[w]].scid] = wchmm->wordend[w];
00425 add_successor(wchmm, wchmm->wordend[w], w);
00426 }
00427
00428
00429
00430
00431 for (w=0;w<wchmm->winfo->num;w++) {
00432 node = wchmm->wordend[w];
00433 i = wchmm->winfo->wlen[w]-1;
00434 while (i >= 0) {
00435 if (node == wchmm->offset[w][i]) {
00436
00437 i--;
00438 continue;
00439 }
00440 if (match_successor(wchmm, node, wchmm->offset[w][i])) {
00441 freemark[wchmm->state[node].scid] = TRUE;
00442 }
00443
00444
00445
00446
00447
00448 node = wchmm->offset[w][i];
00449 i--;
00450 }
00451 }
00452
00453 for (i=1;i<wchmm->scnum;i++) {
00454 if (freemark[i] == TRUE) {
00455 free_successor(wchmm, i);
00456
00457 wchmm->state[wchmm->sclist2node[i]].scid = 0;
00458 }
00459 }
00460
00461 compaction_successor(wchmm);
00462
00463 free(freemark);
00464
00465 jlog("STAT: done\n");
00466 }
00467
00468 #ifdef UNIGRAM_FACTORING
00469
00486 void
00487 make_successor_list_unigram_factoring(WCHMM_INFO *wchmm)
00488 {
00489
00490 #ifndef FAST_FACTOR1_SUCCESSOR_LIST
00491
00492
00493 make_successor_list(wchmm);
00494 calc_all_unigram_factoring_values(wchmm);
00495
00496 #else
00497
00498
00499
00500 int node, node2;
00501 WORD_ID w, w2;
00502 int i, j, n, f;
00503 int s;
00504 LOGPROB tmpprob;
00505
00506 jlog("STAT: make successor lists for unigram factoring\n");
00507
00508
00509
00510 for (node=0;node<wchmm->n;node++) wchmm->state[node].scid = 0;
00511
00512
00513 wchmm->scnum = wchmm->winfo->num + 1;
00514 if (debug2_flag) {
00515 jlog("DEBUG: successor list size = %d\n", wchmm->scnum);
00516 }
00517
00518
00519 wchmm->sclist = (S_CELL **)mymalloc(sizeof(S_CELL *) * wchmm->scnum);
00520 for (i=1;i<wchmm->scnum;i++) wchmm->sclist[i] = NULL;
00521
00522
00523
00524 f = 1;
00525 s = 1;
00526 for (w=0;w<wchmm->winfo->num;w++) {
00527 for (i=0;i<wchmm->winfo->wlen[w] + 1;i++) {
00528 if (i < wchmm->winfo->wlen[w]) {
00529 node = wchmm->offset[w][i];
00530 } else {
00531 node = wchmm->wordend[w];
00532 }
00533 if (wchmm->state[node].scid == 0) {
00534
00535 wchmm->state[node].scid = s++;
00536 if (s > wchmm->scnum) {
00537 jlog("InternalError: make_successor_list_unigram_factoring: scid num exceeded?\n");
00538 return;
00539 }
00540 add_successor(wchmm, node, w);
00541 break;
00542 } else if (wchmm->state[node].scid > 0) {
00543
00544
00545 w2 = wchmm->sclist[wchmm->state[node].scid]->word;
00546 for(j=i+1;j<wchmm->winfo->wlen[w2] + 1;j++) {
00547 if (j < wchmm->winfo->wlen[w2]) {
00548 node2 = wchmm->offset[w2][j];
00549 } else {
00550 node2 = wchmm->wordend[w2];
00551 }
00552 if (wchmm->state[node2].scid == 0) {
00553
00554 wchmm->state[node2].scid = wchmm->state[node].scid;
00555 break;
00556 }
00557 }
00558 if (j >= wchmm->winfo->wlen[w2] + 1) {
00559
00560 jlog("InternalError: make_successor_list_unigram_factoring: no isolated word for %d\n", w2);
00561 return;
00562 }
00563
00564 n = f++;
00565 wchmm->state[node].scid = -n;
00566
00567 }
00568
00569 }
00570 }
00571
00572
00573 wchmm->fsnum = f;
00574 wchmm->fscore = (LOGPROB *)mymalloc(sizeof(LOGPROB) * wchmm->fsnum);
00575 for(n=0;n<wchmm->fsnum;n++) wchmm->fscore[n] = LOG_ZERO;
00576
00577
00578 for (w=0;w<wchmm->winfo->num;w++) {
00579 for (i=0;i<wchmm->winfo->wlen[w] + 1;i++) {
00580 if (i < wchmm->winfo->wlen[w]) {
00581 node = wchmm->offset[w][i];
00582 } else {
00583 node = wchmm->wordend[w];
00584 }
00585 if (wchmm->state[node].scid < 0) {
00586
00587 if (wchmm->ngram) {
00588 tmpprob = uni_prob(wchmm->ngram, wchmm->winfo->wton[w])
00589 #ifdef CLASS_NGRAM
00590 + wchmm->winfo->cprob[w]
00591 #endif
00592 ;
00593 } else {
00594 tmpprob = LOG_ZERO;
00595 }
00596 if (wchmm->lmvar == LM_NGRAM_USER) {
00597 tmpprob = (*(wchmm->uni_prob_user))(wchmm->winfo, w, tmpprob);
00598 }
00599 n = - wchmm->state[node].scid;
00600 if (wchmm->fscore[n] < tmpprob) {
00601 wchmm->fscore[n] = tmpprob;
00602 }
00603 }
00604
00605 }
00606 }
00607
00608 #endif
00609
00610 jlog("STAT: done\n");
00611 }
00612
00613 #endif
00614
00615
00634 void
00635 adjust_sc_index(WCHMM_INFO *wchmm)
00636 {
00637 WORD_ID w;
00638 int i,j,k;
00639 HMM_Logical *ltmp;
00640 int ltmp_state_num;
00641 int ato;
00642 LOGPROB prob;
00643 int node, scid;
00644 A_CELL2 *ac;
00645
00646
00647 for(w=0;w<wchmm->winfo->num;w++) {
00648 for(k=0;k<wchmm->winfo->wlen[w];k++) {
00649 node = wchmm->offset[w][k];
00650 scid = wchmm->state[node].scid;
00651 if (scid == 0) continue;
00652 ltmp = wchmm->winfo->wseq[w][k];
00653 ltmp_state_num = hmm_logical_state_num(ltmp);
00654 if ((hmm_logical_trans(ltmp))->a[0][ltmp_state_num-1] != LOG_ZERO) {
00655 j = k + 1;
00656 if (j == wchmm->winfo->wlen[w]) {
00657 if (wchmm->state[wchmm->wordend[w]].scid == 0) {
00658 jlog("STAT: word %d: factoring node copied for skip phone\n", w);
00659 wchmm->state[wchmm->wordend[w]].scid = scid;
00660 }
00661 } else {
00662 if (wchmm->state[wchmm->offset[w][j]].scid == 0) {
00663 jlog("STAT: word %d: factoring node copied for skip phone\n", w);
00664 wchmm->state[wchmm->offset[w][j]].scid = scid;
00665 }
00666 }
00667 }
00668 for(ato=1;ato<ltmp_state_num;ato++) {
00669 prob = (hmm_logical_trans(ltmp))->a[0][ato];
00670 if (prob != LOG_ZERO) {
00671 wchmm->state[node+ato-1].scid = scid;
00672 }
00673 }
00674 }
00675 }
00676
00677
00678 for(i=0;i<wchmm->startnum;i++) {
00679 node = wchmm->startnode[i];
00680 if (wchmm->state[node].out.state != NULL) {
00681 j_internal_error("adjust_sc_index: outprob exist in word-head node??\n");
00682 }
00683 if (wchmm->next_a[node] != LOG_ZERO) {
00684 if (wchmm->state[node+1].scid != 0) {
00685 if (wchmm->state[node].scid != 0 && wchmm->state[node].scid != wchmm->state[node+1].scid) {
00686 j_internal_error("adjust_sc_index: different successor list within word-head phone?\n");
00687 }
00688 wchmm->state[node].scid = wchmm->state[node+1].scid;
00689 wchmm->state[node+1].scid = 0;
00690 }
00691 }
00692 for(ac=wchmm->ac[node];ac;ac=ac->next) {
00693 for(k=0;k<ac->n;k++) {
00694 if (wchmm->state[ac->arc[k]].scid != 0) {
00695 if (wchmm->state[node].scid != 0 && wchmm->state[node].scid != wchmm->state[ac->arc[k]].scid) {
00696 j_internal_error("adjust_sc_index: different successor list within word-head phone?\n");
00697 }
00698 wchmm->state[node].scid = wchmm->state[ac->arc[k]].scid;
00699 wchmm->state[ac->arc[k]].scid = 0;
00700 }
00701 }
00702 }
00703 }
00704 }
00705
00706
00707
00708
00709
00728 void
00729 max_successor_cache_init(WCHMM_INFO *wchmm)
00730 {
00731 int i;
00732 LM_PROB_CACHE *l;
00733 WORD_ID wnum;
00734
00735
00736 shrink_successor(wchmm);
00737
00738
00739 l = &(wchmm->lmcache);
00740
00741 l->probcache = (LOGPROB *) mymalloc(sizeof(LOGPROB) * wchmm->scnum);
00742 l->lastwcache = (WORD_ID *) mymalloc(sizeof(WORD_ID) * wchmm->scnum);
00743 for (i=0;i<wchmm->scnum;i++) {
00744 l->lastwcache[i] = WORD_INVALID;
00745 }
00746
00747 if (wchmm->ngram) {
00748 wnum = wchmm->ngram->max_word_num;
00749 } else {
00750 wnum = wchmm->winfo->num;
00751 }
00752 #ifdef HASH_CACHE_IW
00753 l->iw_cache_num = wnum * jconf.search.pass1.iw_cache_rate / 100;
00754 if (l->iw_cache_num < 10) l->iw_cache_num = 10;
00755 #else
00756 l->iw_cache_num = wnum;
00757 #endif
00758 l->iw_sc_cache = (LOGPROB **)mymalloc(sizeof(LOGPROB *) * l->iw_cache_num);
00759 for (i=0;i<l->iw_cache_num;i++) {
00760 l->iw_sc_cache[i] = NULL;
00761 }
00762 #ifdef HASH_CACHE_IW
00763 l->iw_lw_cache = (WORD_ID *)mymalloc(sizeof(WORD_ID) * l->iw_cache_num);
00764 for (i=0;i<l->iw_cache_num;i++) {
00765 l->iw_lw_cache[i] = WORD_INVALID;
00766 }
00767 #endif
00768 }
00769
00782 static void
00783 max_successor_prob_iw_free(WCHMM_INFO *wchmm)
00784 {
00785 int i;
00786 LM_PROB_CACHE *l;
00787 l = &(wchmm->lmcache);
00788 for (i=0;i<l->iw_cache_num;i++) {
00789 if (l->iw_sc_cache[i] != NULL) free(l->iw_sc_cache[i]);
00790 l->iw_sc_cache[i] = NULL;
00791 }
00792 }
00793
00810 void
00811 max_successor_cache_free(WCHMM_INFO *wchmm)
00812 {
00813 free(wchmm->lmcache.probcache);
00814 free(wchmm->lmcache.lastwcache);
00815 max_successor_prob_iw_free(wchmm);
00816 free(wchmm->lmcache.iw_sc_cache);
00817 #ifdef HASH_CACHE_IW
00818 free(wchmm->lmcache.iw_lw_cache);
00819 #endif
00820 }
00821
00822 #ifdef UNIGRAM_FACTORING
00823
00864 void
00865 make_iwcache_index(WCHMM_INFO *wchmm)
00866 {
00867 int i, node, num;
00868
00869 wchmm->start2isolate = (int *)mymalloc(sizeof(int) * wchmm->startnum);
00870 num = 0;
00871 for(i=0;i<wchmm->startnum;i++) {
00872 node = wchmm->startnode[i];
00873 if (wchmm->state[node].scid >= 0) {
00874 wchmm->start2isolate[i] = num;
00875 num++;
00876 } else {
00877 wchmm->start2isolate[i] = -1;
00878 }
00879 }
00880 wchmm->isolatenum = num;
00881 }
00882
00927 void
00928 calc_all_unigram_factoring_values(WCHMM_INFO *wchmm)
00929 {
00930 S_CELL *sc, *sctmp;
00931 LOGPROB tmpprob, maxprob;
00932 int i, n;
00933
00934
00935 n = 0;
00936 for (i=1;i<wchmm->scnum;i++) {
00937 sc = wchmm->sclist[i];
00938 if (sc == NULL) {
00939 j_internal_error("call_all_unigram_factoring_values: sclist has no sc?\n");
00940 }
00941 if (sc->next != NULL) {
00942
00943 n++;
00944 }
00945 }
00946 wchmm->fsnum = n + 1;
00947
00948 wchmm->fscore = (LOGPROB *)mymalloc(sizeof(LOGPROB) * wchmm->fsnum);
00949
00950 n = 1;
00951 for (i=1;i<wchmm->scnum;i++) {
00952 sc = wchmm->sclist[i];
00953 if (sc->next != NULL) {
00954 maxprob = LOG_ZERO;
00955 for (sctmp = sc; sctmp; sctmp = sctmp->next) {
00956 if (wchmm->ngram) {
00957 tmpprob = uni_prob(wchmm->ngram, wchmm->winfo->wton[sctmp->word])
00958 #ifdef CLASS_NGRAM
00959 + wchmm->winfo->cprob[sctmp->word]
00960 #endif
00961 ;
00962 } else {
00963 tmpprob = LOG_ZERO;
00964 }
00965 if (wchmm->lmvar == LM_NGRAM_USER) {
00966 tmpprob = (*(wchmm->uni_prob_user))(wchmm->winfo, sctmp->word, tmpprob);
00967 }
00968 if (maxprob < tmpprob) maxprob = tmpprob;
00969 }
00970 wchmm->fscore[n] = maxprob;
00971 free_successor(wchmm, i);
00972 wchmm->state[wchmm->sclist2node[i]].scid = - n;
00973 n++;
00974 }
00975 }
00976
00977 compaction_successor(wchmm);
00978 }
00979
00980 #else
00981
01004 static LOGPROB
01005 calc_successor_prob(WCHMM_INFO *wchmm, WORD_ID lastword, int node)
01006 {
01007 S_CELL *sc;
01008 LOGPROB tmpprob, maxprob;
01009 WORD_ID lw;
01010
01011 maxprob = LOG_ZERO;
01012 if (wchmm->ngram) {
01013 lw = wchmm->winfo->wton[lastword];
01014 }
01015
01016 for (sc = wchmm->sclist[wchmm->state[node].scid]; sc; sc = sc->next) {
01017 if (wchmm->ngram) {
01018 tmpprob = (*(wchmm->ngram->bigram_prob))(wchmm->ngram, lw , wchmm->winfo->wton[sc->word])
01019 #ifdef CLASS_NGRAM
01020 + wchmm->winfo->cprob[sc->word]
01021 #endif
01022 ;
01023 } else {
01024 tmpprob = LOG_ZERO;
01025 }
01026 if (wchmm->lmvar == LM_NGRAM_USER) {
01027 tmpprob = (*(wchmm->bi_prob_user))(wchmm->winfo, lastword, sc->word, tmpprob);
01028 }
01029 if (maxprob < tmpprob) maxprob = tmpprob;
01030 }
01031
01032 return(maxprob);
01033 }
01034
01035 #endif
01036
01079 LOGPROB
01080 max_successor_prob(WCHMM_INFO *wchmm, WORD_ID lastword, int node)
01081 {
01082 LOGPROB maxprob;
01083 WORD_ID last_nword, w;
01084 int scid;
01085 LM_PROB_CACHE *l;
01086
01087 l = &(wchmm->lmcache);
01088
01089 if (lastword != WORD_INVALID) {
01090 if (wchmm->ngram) {
01091 last_nword = wchmm->winfo->wton[lastword];
01092 } else {
01093 last_nword = lastword;
01094 }
01095 scid = wchmm->state[node].scid;
01096 #ifdef UNIGRAM_FACTORING
01097 if (scid < 0) {
01098
01099 return(wchmm->fscore[(- scid)]);
01100 } else {
01101
01102
01103 if (last_nword != l->lastwcache[scid]) {
01104
01105 w = (wchmm->sclist[scid])->word;
01106 if (wchmm->ngram) {
01107 maxprob = (*(wchmm->ngram->bigram_prob))(wchmm->ngram, last_nword, wchmm->winfo->wton[w])
01108 #ifdef CLASS_NGRAM
01109 + wchmm->winfo->cprob[w]
01110 #endif
01111 ;
01112 } else {
01113 maxprob = LOG_ZERO;
01114 }
01115 if (wchmm->lmvar == LM_NGRAM_USER) {
01116 maxprob = (*(wchmm->bi_prob_user))(wchmm->winfo, lastword, w, maxprob);
01117 }
01118 l->lastwcache[scid] = last_nword;
01119 l->probcache[scid] = maxprob;
01120 return(maxprob);
01121 } else {
01122
01123 return (l->probcache[scid]);
01124 }
01125 }
01126 #else
01127
01128 if (last_nword != l->lastwcache[scid]) {
01129 maxprob = calc_successor_prob(wchmm, lastword, node);
01130
01131 l->lastwcache[scid] = last_nword;
01132 l->probcache[scid] = maxprob;
01133 return(maxprob);
01134 } else {
01135 return (l->probcache[scid]);
01136 }
01137 #endif
01138 } else {
01139 return(0.0);
01140 #if 0
01141 maxprob = LOG_ZERO;
01142 for (sc=wchmm->state[node].sc;sc;sc=sc->next) {
01143 tmpprob = uni_prob(wchmm->ngram, sc->word);
01144 if (maxprob < tmpprob) maxprob = tmpprob;
01145 }
01146 return(maxprob);
01147 #endif
01148 }
01149
01150 }
01151
01186 LOGPROB *
01187 max_successor_prob_iw(WCHMM_INFO *wchmm, WORD_ID lastword)
01188 {
01189 int i, j, x, node;
01190 int last_nword;
01191 WORD_ID w;
01192 LM_PROB_CACHE *l;
01193 LOGPROB p;
01194
01195 l = &(wchmm->lmcache);
01196
01197 if (wchmm->ngram) {
01198 last_nword = wchmm->winfo->wton[lastword];
01199 } else {
01200 last_nword = lastword;
01201 }
01202
01203 #ifdef HASH_CACHE_IW
01204 x = last_nword % l->iw_cache_num;
01205 if (l->iw_lw_cache[x] == last_nword) {
01206 return(l->iw_sc_cache[x]);
01207 }
01208 #else
01209 if (l->iw_sc_cache[last_nword] != NULL) {
01210 return(l->iw_sc_cache[last_nword]);
01211 }
01212 x = last_nword;
01213
01214 #endif
01215
01216 if (l->iw_sc_cache[x] == NULL) {
01217 #ifdef UNIGRAM_FACTORING
01218 l->iw_sc_cache[x] = (LOGPROB *)mymalloc(sizeof(LOGPROB)*wchmm->isolatenum);
01219 #else
01220 l->iw_sc_cache[x] = (LOGPROB *)mymalloc(sizeof(LOGPROB)*wchmm->startnum);
01221 #endif
01222 if (l->iw_sc_cache[x] == NULL) {
01223
01224 max_successor_prob_iw_free(wchmm);
01225 jlog("STAT: inter-word LM cache (%dMB) rehashed\n",
01226 (l->iw_cache_num *
01227 #ifdef UNIGRAM_FACTORING
01228 wchmm->isolatenum
01229 #else
01230 wchmm->startnum
01231 #endif
01232 ) / 1000 * sizeof(LOGPROB) / 1000);
01233 #ifdef UNIGRAM_FACTORING
01234 l->iw_sc_cache[x] = (LOGPROB *)mymalloc(sizeof(LOGPROB)*wchmm->isolatenum);
01235 #else
01236 l->iw_sc_cache[x] = (LOGPROB *)mymalloc(sizeof(LOGPROB)*wchmm->startnum);
01237 #endif
01238 if (l->iw_sc_cache[x] == NULL) {
01239 j_internal_error("max_successor_prob_iw: cannot malloc\n");
01240 }
01241 }
01242 }
01243
01244
01245 #ifdef UNIGRAM_FACTORING
01246 for (j=0;j<wchmm->startnum;j++) {
01247 i = wchmm->start2isolate[j];
01248 if (i == -1) continue;
01249 node = wchmm->startnode[j];
01250 if (wchmm->state[node].scid <= 0) {
01251
01252 j_internal_error("max_successor_prob_iw: isolated (not shared) tree root node has unigram factoring value??\n");
01253 } else {
01254 w = (wchmm->sclist[wchmm->state[node].scid])->word;
01255 if (wchmm->ngram) {
01256 p = (*(wchmm->ngram->bigram_prob))(wchmm->ngram, last_nword, wchmm->winfo->wton[w])
01257 #ifdef CLASS_NGRAM
01258 + wchmm->winfo->cprob[w]
01259 #endif
01260 ;
01261 } else {
01262 p = LOG_ZERO;
01263 }
01264 if (wchmm->lmvar == LM_NGRAM_USER) {
01265 p = (*(wchmm->bi_prob_user))(wchmm->winfo, lastword, w, p);
01266 }
01267 l->iw_sc_cache[x][i] = p;
01268 }
01269 }
01270 #else
01271 for (i=0;i<wchmm->startnum;i++) {
01272 node = wchmm->startnode[i];
01273 l->iw_sc_cache[x][i] = calc_successor_prob(wchmm, lastword, node);
01274 }
01275 #endif
01276 #ifdef HASH_CACHE_IW
01277 l->iw_lw_cache[x] = last_nword;
01278 #endif
01279
01280 return(l->iw_sc_cache[x]);
01281 }
01282
01332 boolean
01333 can_succeed(WCHMM_INFO *wchmm, WORD_ID lastword, int node)
01334 {
01335 int lc;
01336 S_CELL *sc;
01337
01338
01339
01340 if (lastword == WORD_INVALID) {
01341 for (sc=wchmm->sclist[wchmm->state[node].scid];sc;sc=sc->next) {
01342 if (dfa_cp_begin(wchmm->dfa, sc->word) == TRUE) return(TRUE);
01343 }
01344 return(FALSE);
01345 } else {
01346 lc = wchmm->winfo->wton[lastword];
01347 for (sc=wchmm->sclist[wchmm->state[node].scid];sc;sc=sc->next) {
01348 if (dfa_cp(wchmm->dfa, lc, sc->word) == TRUE) return(TRUE);
01349 }
01350 return(FALSE);
01351 }
01352 }
01353
01354