00001
00018
00019
00020
00021
00022
00023
00024
00025 #include <julius/julius.h>
00026
00028 #undef GDEBUG
00029
00031 #undef GDEBUG2
00032
00033 #if defined(GDEBUG) || defined(GDEBUG2)
00034 static WCHMM_INFO *wchmm_local;
00035 #endif
00036
00053 void
00054 wordgraph_init(WCHMM_INFO *wchmm)
00055 {
00056 #if defined(GDEBUG) || defined(GDEBUG2)
00057 wchmm_local = wchmm;
00058 #endif
00059 }
00060
00061
00062
00063
00064
00101 static WordGraph *
00102 wordgraph_new(WORD_ID wid, HMM_Logical *headphone, HMM_Logical *tailphone, int leftframe, int rightframe, LOGPROB fscore_head, LOGPROB fscore_tail, LOGPROB gscore_head, LOGPROB gscore_tail, LOGPROB lscore, LOGPROB cm)
00103 {
00104 WordGraph *new;
00105
00106 new = (WordGraph *)mymalloc(sizeof(WordGraph));
00107 new->wid = wid;
00108 new->lefttime = leftframe;
00109 new->righttime = rightframe;
00110 new->fscore_head = fscore_head;
00111 new->fscore_tail = fscore_tail;
00112 new->gscore_head = gscore_head;
00113 new->gscore_tail = gscore_tail;
00114 new->lscore_tmp = lscore;
00115 #ifdef CM_SEARCH
00116 new->cmscore = cm;
00117 #endif
00118 new->forward_score = new->backward_score = 0.0;
00119 if (rightframe - leftframe + 1 != 0) {
00120
00121 new->amavg = (gscore_head - gscore_tail) / (float)(rightframe - leftframe + 1);
00122 }
00123 new->headphone = headphone;
00124 new->tailphone = tailphone;
00125 new->leftwordmaxnum = FANOUTSTEP;
00126 new->leftword = (WordGraph **)mymalloc(sizeof(WordGraph *) * new->leftwordmaxnum);
00127 new->left_lscore = (LOGPROB *)mymalloc(sizeof(LOGPROB) * new->leftwordmaxnum);
00128 new->leftwordnum = 0;
00129 new->rightwordmaxnum = FANOUTSTEP;
00130 new->rightword = (WordGraph **)mymalloc(sizeof(WordGraph *) * new->rightwordmaxnum);
00131 new->right_lscore = (LOGPROB *)mymalloc(sizeof(LOGPROB) * new->rightwordmaxnum);
00132 new->rightwordnum = 0;
00133
00134 new->mark = FALSE;
00135 #ifdef GRAPHOUT_DYNAMIC
00136 new->purged = FALSE;
00137 #endif
00138 new->next = NULL;
00139 new->saved = FALSE;
00140
00141 new->graph_cm = 0.0;
00142
00143 #ifdef GDEBUG
00144 {
00145 int i;
00146 WordGraph *w;
00147 jlog("DEBUG: NEW: \"%s\"[%d..%d]\n", wchmm_local->winfo->woutput[new->wid], new->lefttime, new->righttime);
00148 for(i=0;i<new->leftwordnum;i++) {
00149 w = new->leftword[i];
00150 jlog("DEBUG: \t left%d: \"%15s\"[%d..%d]\n", i, wchmm_local->winfo->woutput[w->wid], w->lefttime, w->righttime);
00151 }
00152 for(i=0;i<new->rightwordnum;i++) {
00153 w = new->rightword[i];
00154 jlog("DEBUG: \tright%d: \"%15s\"[%d..%d]\n", i, wchmm_local->winfo->woutput[w->wid], w->lefttime, w->righttime);
00155 }
00156 jlog("DEBUG: \headphone: %s\n", new->headphone->name);
00157 jlog("DEBUG: \tailphone: %s\n", new->tailphone->name);
00158 }
00159 #endif
00160
00161 return(new);
00162 }
00163
00180 void
00181 wordgraph_free(WordGraph *wg)
00182 {
00183 free(wg->rightword);
00184 free(wg->right_lscore);
00185 free(wg->leftword);
00186 free(wg->left_lscore);
00187 free(wg);
00188 }
00189
00190
00191
00192
00209 static void
00210 wordgraph_add_leftword(WordGraph *wg, WordGraph *left, LOGPROB lscore)
00211 {
00212 if (wg == NULL) return;
00213 if (left == NULL) return;
00214 if (wg->leftwordnum >= wg->leftwordmaxnum) {
00215
00216 wg->leftwordmaxnum += FANOUTSTEP;
00217 wg->leftword = (WordGraph **)myrealloc(wg->leftword, sizeof(WordGraph *) * wg->leftwordmaxnum);
00218 wg->left_lscore = (LOGPROB *)myrealloc(wg->left_lscore, sizeof(LOGPROB) * wg->leftwordmaxnum);
00219 }
00220 wg->leftword[wg->leftwordnum] = left;
00221 wg->left_lscore[wg->leftwordnum] = lscore;
00222 wg->leftwordnum++;
00223 #ifdef GDEBUG
00224 jlog("DEBUG: addleft: \"%s\"[%d..%d] added as %dth left of \"%s\"[%d..%d]\n", wchmm_local->winfo->woutput[left->wid], left->lefttime, left->righttime, wg->leftwordnum, wchmm_local->winfo->woutput[wg->wid], wg->lefttime, wg->righttime);
00225 #endif
00226 }
00227
00246 static void
00247 wordgraph_add_rightword(WordGraph *wg, WordGraph *right, LOGPROB lscore)
00248 {
00249 if (wg == NULL) return;
00250 if (right == NULL) return;
00251 if (wg->rightwordnum >= wg->rightwordmaxnum) {
00252
00253 wg->rightwordmaxnum += FANOUTSTEP;
00254 wg->rightword = (WordGraph **)myrealloc(wg->rightword, sizeof(WordGraph *) * wg->rightwordmaxnum);
00255 wg->right_lscore = (LOGPROB *)myrealloc(wg->right_lscore, sizeof(LOGPROB) * wg->rightwordmaxnum);
00256 }
00257 wg->rightword[wg->rightwordnum] = right;
00258 wg->right_lscore[wg->rightwordnum] = lscore;
00259 wg->rightwordnum++;
00260 #ifdef GDEBUG
00261 jlog("DEBUG: addright: \"%s\"[%d..%d] added as %dth right of \"%s\"[%d..%d]\n", wchmm_local->winfo->woutput[right->wid], right->lefttime, right->righttime, wg->rightwordnum, wchmm_local->winfo->woutput[wg->wid], wg->lefttime, wg->righttime);
00262 #endif
00263 }
00264
00294 boolean
00295 wordgraph_check_and_add_leftword(WordGraph *wg, WordGraph *left, LOGPROB lscore)
00296 {
00297 int i;
00298
00299 if (wg == NULL) return FALSE;
00300 if (left == NULL) return FALSE;
00301 for(i=0;i<wg->leftwordnum;i++) {
00302 if (wg->leftword[i] == left) {
00303 break;
00304 }
00305 }
00306 if (i >= wg->leftwordnum) {
00307 wordgraph_add_leftword(wg, left, lscore);
00308 return TRUE;
00309 } else if (wg->left_lscore[i] < lscore) {
00310
00311 if (debug2_flag) jlog("DEBUG: check_and_add_leftword: update left\n");
00312 wg->left_lscore[i] = lscore;
00313 }
00314 return FALSE;
00315 }
00316
00346 boolean
00347 wordgraph_check_and_add_rightword(WordGraph *wg, WordGraph *right, LOGPROB lscore)
00348 {
00349 int i;
00350
00351 if (wg == NULL) return FALSE;
00352 if (right == NULL) return FALSE;
00353 for(i=0;i<wg->rightwordnum;i++) {
00354 if (wg->rightword[i] == right) {
00355 break;
00356 }
00357 }
00358 if (i >= wg->rightwordnum) {
00359 wordgraph_add_rightword(wg, right, lscore);
00360 return TRUE;
00361 } else if (wg->right_lscore[i] < lscore) {
00362
00363 if (debug2_flag) jlog("DEBUG: check_and_add_rightword: update right\n");
00364 wg->right_lscore[i] = lscore;
00365 }
00366 return FALSE;
00367 }
00368
00389 static boolean
00390 merge_contexts(WordGraph *dst, WordGraph *src)
00391 {
00392 int s, d;
00393 WordGraph *adding;
00394 boolean ret;
00395
00396 #ifdef GDEBUG
00397 jlog("DEBUG: merge_contexts: merging context of \"%s\"[%d..%d] to \"%s\"[%d..%d]...\n",
00398 wchmm_local->winfo->woutput[src->wid], src->lefttime, src->righttime,
00399 wchmm_local->winfo->woutput[dst->wid], dst->lefttime, dst->righttime);
00400 #endif
00401
00402 ret = FALSE;
00403
00404
00405 for(s=0;s<src->leftwordnum;s++) {
00406 adding = src->leftword[s];
00407 if (adding->mark) continue;
00408
00409 if (adding == dst) {
00410 #ifdef GDEBUG
00411 jlog("DEBUG: merge_contexts: skipping direct link (dst) -> (src)\n");
00412 #endif
00413 continue;
00414 }
00415 for(d=0;d<dst->leftwordnum;d++) {
00416 if (dst->leftword[d]->mark) continue;
00417 if (dst->leftword[d] == adding) {
00418 break;
00419 }
00420 }
00421 if (d >= dst->leftwordnum) {
00422 wordgraph_add_leftword(dst, adding, src->left_lscore[s]);
00423 #ifdef GDEBUG
00424 jlog("DEBUG: merge_contexts: added \"%s\"[%d..%d] as a new left context\n",
00425 wchmm_local->winfo->woutput[adding->wid], adding->lefttime, adding->righttime);
00426 #endif
00427 ret = TRUE;
00428 } else if (dst->left_lscore[d] < src->left_lscore[s]) {
00429 jlog("DEBUG: merge_context: update left\n");
00430 dst->left_lscore[d] = src->left_lscore[s];
00431 }
00432 #ifdef GDEBUG
00433 else {
00434 jlog("DEBUG: merge_contexts: \"%s\"[%d..%d] already exist\n",
00435 wchmm_local->winfo->woutput[adding->wid], adding->lefttime, adding->righttime);
00436 }
00437 #endif
00438 }
00439
00440
00441 for(s=0;s<src->rightwordnum;s++) {
00442 adding = src->rightword[s];
00443 if (adding->mark) continue;
00444
00445 if (adding == dst) {
00446 #ifdef GDEBUG
00447 jlog("DEBUG: merge_contexts: skipping direct link (src) -> (dst)\n");
00448 #endif
00449 continue;
00450 }
00451 for(d=0;d<dst->rightwordnum;d++) {
00452 if (dst->rightword[d]->mark) continue;
00453 if (dst->rightword[d] == adding) {
00454 break;
00455 }
00456 }
00457 if (d >= dst->rightwordnum) {
00458 wordgraph_add_rightword(dst, adding, src->right_lscore[s]);
00459 #ifdef GDEBUG
00460 jlog("DEBUG: merge_contexts: added \"%s\"[%d..%d] as a new right context\n",
00461 wchmm_local->winfo->woutput[adding->wid], adding->lefttime, adding->righttime);
00462 #endif
00463 ret = TRUE;
00464 } else if (dst->right_lscore[d] < src->right_lscore[s]) {
00465 jlog("DEBUG: merge_context: update right\n");
00466 dst->right_lscore[d] = src->right_lscore[s];
00467 }
00468 #ifdef GDEBUG
00469 else {
00470 jlog("DEBUG: merge_contexts: \"%s\"[%d..%d] already exist\n",
00471 wchmm_local->winfo->woutput[adding->wid], adding->lefttime, adding->righttime);
00472 }
00473 #endif
00474 }
00475
00476 return(ret);
00477 }
00478
00497 static void
00498 swap_leftword(WordGraph *wg, WordGraph *from, WordGraph *to, LOGPROB lscore)
00499 {
00500 int i;
00501
00502 #ifdef GDEBUG
00503 jlog("DEBUG: swapleft: replacing left of \"%s\"[%d..%d] from \"%s\"[%d..%d] to \"%s\"[%d..%d]...\n",
00504 wchmm_local->winfo->woutput[wg->wid], wg->lefttime, wg->righttime,
00505 wchmm_local->winfo->woutput[from->wid], from->lefttime, from->righttime,
00506 wchmm_local->winfo->woutput[to->wid], to->lefttime, to->righttime);
00507 #endif
00508
00509 for(i=0;i<wg->leftwordnum;i++) {
00510 if (wg->leftword[i] == from) {
00511 wg->leftword[i] = to;
00512 wg->left_lscore[i] = lscore;
00513 }
00514 }
00515 }
00516
00535 static void
00536 swap_rightword(WordGraph *wg, WordGraph *from, WordGraph *to, LOGPROB lscore)
00537 {
00538 int i;
00539
00540 #ifdef GDEBUG
00541 jlog("DEBUG: swapright: replacing right of \"%s\"[%d..%d] from \"%s\"[%d..%d] to \"%s\"[%d..%d]...\n",
00542 wchmm_local->winfo->woutput[wg->wid], wg->lefttime, wg->righttime,
00543 wchmm_local->winfo->woutput[from->wid], from->lefttime, from->righttime,
00544 wchmm_local->winfo->woutput[to->wid], to->lefttime, to->righttime);
00545 #endif
00546
00547 for(i=0;i<wg->rightwordnum;i++) {
00548 if (wg->rightword[i] == from) {
00549 wg->rightword[i] = to;
00550 wg->right_lscore[i] = lscore;
00551 }
00552 }
00553 }
00554
00567 static void
00568 uniq_leftword(WordGraph *wg)
00569 {
00570 int i, j, dst;
00571 boolean ok;
00572
00573 dst = 0;
00574 for(i=0;i<wg->leftwordnum;i++) {
00575 ok = TRUE;
00576 for(j=0;j<dst;j++) {
00577 if (wg->leftword[i] == wg->leftword[j]) {
00578 ok = FALSE;
00579 break;
00580 }
00581 }
00582 if (ok == TRUE) {
00583 wg->leftword[dst] = wg->leftword[i];
00584 wg->left_lscore[dst] = wg->left_lscore[i];
00585 dst++;
00586 }
00587 }
00588 wg->leftwordnum = dst;
00589 }
00590
00603 static void
00604 uniq_rightword(WordGraph *wg)
00605 {
00606 int i, j, dst;
00607 boolean ok;
00608
00609 dst = 0;
00610 for(i=0;i<wg->rightwordnum;i++) {
00611 ok = TRUE;
00612 for(j=0;j<dst;j++) {
00613 if (wg->rightword[i] == wg->rightword[j]) {
00614 ok = FALSE;
00615 break;
00616 }
00617 }
00618 if (ok == TRUE) {
00619 wg->rightword[dst] = wg->rightword[i];
00620 wg->right_lscore[dst] = wg->right_lscore[i];
00621 dst++;
00622 }
00623 }
00624 wg->rightwordnum = dst;
00625 }
00626
00639 static void
00640 wordgraph_remove_context(WordGraph *wg)
00641 {
00642 WordGraph *w;
00643 int i,j,k;
00644
00645 if (wg == NULL) return;
00646
00647 for(i=0;i<wg->leftwordnum;i++) {
00648 w = wg->leftword[i];
00649 k=0;
00650 for(j=0;j<w->rightwordnum;j++) {
00651 if (w->rightword[j] != wg) {
00652 if (j != k) {
00653 w->rightword[k] = w->rightword[j];
00654 w->right_lscore[k] = w->right_lscore[j];
00655 }
00656 k++;
00657 }
00658 }
00659 w->rightwordnum = k;
00660 }
00661 for(i=0;i<wg->rightwordnum;i++) {
00662 w = wg->rightword[i];
00663 k=0;
00664 for(j=0;j<w->leftwordnum;j++) {
00665 if (w->leftword[j] != wg) {
00666 if (j != k) {
00667 w->leftword[k] = w->leftword[j];
00668 w->left_lscore[k] = w->left_lscore[j];
00669 }
00670 k++;
00671 }
00672 }
00673 w->leftwordnum = k;
00674 #ifdef GDEBUG2
00675 if (w->leftwordnum == 0) {
00676 jlog("DEBUG: leftword becomes 0 by remove_context\n");
00677 put_wordgraph(jlog_get_fp(), w, wchmm_local->winfo);
00678 jlog("DEBUG: by deleting its left context:\n");
00679 put_wordgraph(jlog_get_fp(), wg, wchmm_local->winfo);
00680 }
00681 #endif
00682 }
00683 }
00684
00697 static void
00698 wordgraph_link_context(WordGraph *wg)
00699 {
00700 int i,j;
00701 WordGraph *left, *right;
00702
00703 if (wg == NULL) return;
00704 for(i=0;i<wg->leftwordnum;i++) {
00705 left = wg->leftword[i];
00706 if (left->mark) continue;
00707 if (left == wg) continue;
00708 for(j=0;j<wg->rightwordnum;j++) {
00709 right = wg->rightword[j];
00710 if (right->mark) continue;
00711 if (right == wg) continue;
00712 if (left == right) continue;
00713 wordgraph_check_and_add_leftword(right, left, wg->left_lscore[i]);
00714 wordgraph_check_and_add_rightword(left, right, wg->right_lscore[j]);
00715 }
00716 }
00717 }
00718
00719
00720
00721
00738 static int
00739 wordgraph_exec_erase(WordGraph **rootp)
00740 {
00741 WordGraph *wg, *we, *wtmp;
00742 int count;
00743
00744 if (*rootp == NULL) return(0);
00745
00746 wg = *rootp;
00747 count = 0;
00748 while (wg != NULL) {
00749 we = wg->next;
00750 while(we != NULL && we->mark == TRUE) {
00751 wtmp = we->next;
00752 wordgraph_free(we); count++;
00753 we = wtmp;
00754 }
00755 wg->next = we;
00756 wg = we;
00757 }
00758 if ((*rootp)->mark == TRUE) {
00759 wtmp = (*rootp)->next;
00760 wordgraph_free(*rootp); count++;
00761 *rootp = wtmp;
00762 }
00763
00764 return(count);
00765 }
00766
00785 static int
00786 compare_lefttime(WordGraph **x, WordGraph **y)
00787 {
00788 if ((*x)->lefttime > (*y)->lefttime) return 1;
00789 else if ((*x)->lefttime < (*y)->lefttime) return -1;
00790 else {
00791 if ((*x)->righttime > (*y)->righttime) return 1;
00792 else if ((*x)->righttime < (*y)->righttime) return -1;
00793 else {
00794 if ((*x)->fscore_head < (*y)->fscore_head) return 1;
00795 else if ((*x)->fscore_head > (*y)->fscore_head) return -1;
00796 else return 0;
00797 }
00798 }
00799 }
00800
00819 int
00820 wordgraph_sort_and_annotate_id(WordGraph **rootp, RecogProcess *r)
00821 {
00822 WordGraph *wg;
00823 int cnt;
00824 WordGraph **wlist;
00825 int i;
00826 WordGraph *wo;
00827
00828
00829 cnt = 0;
00830 for(wg=*rootp;wg;wg=wg->next) cnt++;
00831 if (cnt == 0) return 0;
00832
00833 wlist = (WordGraph **)mymalloc(sizeof(WordGraph *) * cnt);
00834 i = 0;
00835 for(wg=*rootp;wg;wg=wg->next) {
00836 wlist[i++] = wg;
00837 }
00838 qsort(wlist, cnt, sizeof(WordGraph *), (int (*)(const void *, const void *))compare_lefttime);
00839
00840
00841 wo = NULL;
00842 for(i=cnt-1;i>=0;i--) {
00843 wg = wlist[i];
00844 wg->id = i;
00845 wg->next = wo;
00846 wo = wg;
00847 }
00848 *rootp = wo;
00849 free(wlist);
00850
00851 return cnt;
00852 }
00853
00870 void
00871 wordgraph_clean(WordGraph **rootp)
00872 {
00873 WordGraph *wg, *wtmp;
00874
00875 wg = *rootp;
00876 while(wg != NULL) {
00877 wtmp = wg->next;
00878 wordgraph_free(wg);
00879 wg = wtmp;
00880 }
00881 *rootp = NULL;
00882
00883 }
00884
00885
00886
00887
00908 static int
00909 compare_beam(WordGraph **x, WordGraph **y)
00910 {
00911 if ((*x)->fscore_head < (*y)->fscore_head) return 1;
00912 else if ((*x)->fscore_head > (*y)->fscore_head) return -1;
00913 else return 0;
00914 }
00915
00940 void
00941 wordgraph_purge_leaf_nodes(WordGraph **rootp, RecogProcess *r)
00942 {
00943 WordGraph *wg;
00944 int i, dst;
00945 boolean changed;
00946 int count, erased, del_left, del_right;
00947
00948
00949 count = 0;
00950 for(wg=*rootp;wg;wg=wg->next) count++;
00951 if (verbose_flag) jlog("STAT: graphout: %d initial word arcs generated\n", count);
00952 if (count == 0) return;
00953
00954 if (verbose_flag) jlog("STAT: graphout: step 1: purge leaf nodes\n");
00955
00956
00957 del_left = del_right = 0;
00958 do {
00959 changed = FALSE;
00960 for(wg=*rootp;wg;wg=wg->next) {
00961 if (wg->mark == TRUE) continue;
00962
00963 if (wg->lefttime != 0) {
00964 for(i=0;i<wg->leftwordnum;i++) {
00965 if (wg->leftword[i]->mark == FALSE) break;
00966 }
00967 if (i >= wg->leftwordnum) {
00968 wg->mark = TRUE;
00969 changed = TRUE;
00970 del_left++;
00971 continue;
00972 }
00973 }
00974
00975 if (wg->righttime != r->peseqlen - 1) {
00976 for(i=0;i<wg->rightwordnum;i++) {
00977 if (wg->rightword[i]->mark == FALSE) break;
00978 }
00979 if (i >= wg->rightwordnum) {
00980 wg->mark = TRUE;
00981 changed = TRUE;
00982 del_right++;
00983 continue;
00984 }
00985 }
00986 }
00987 } while (changed == TRUE);
00988
00989 if (verbose_flag) jlog("STAT: graphout: %d leaf words found (left_blank=%d, right_blank=%d)\n", del_left + del_right, del_left, del_right);
00990
00991
00992 for(wg=*rootp;wg;wg=wg->next) {
00993 if (wg->mark) continue;
00994 dst = 0;
00995 for(i=0;i<wg->leftwordnum;i++) {
00996 if (wg->leftword[i]->mark == FALSE) {
00997 if (dst != i) {
00998 wg->leftword[dst] = wg->leftword[i];
00999 wg->left_lscore[dst] = wg->left_lscore[i];
01000 }
01001 dst++;
01002 }
01003 }
01004 wg->leftwordnum = dst;
01005 }
01006 for(wg=*rootp;wg;wg=wg->next) {
01007 if (wg->mark) continue;
01008 dst = 0;
01009 for(i=0;i<wg->rightwordnum;i++) {
01010 if (wg->rightword[i]->mark == FALSE) {
01011 if (dst != i) {
01012 wg->rightword[dst] = wg->rightword[i];
01013 wg->right_lscore[dst] = wg->right_lscore[i];
01014 }
01015 dst++;
01016 }
01017 }
01018 wg->rightwordnum = dst;
01019 }
01020
01021
01022 erased = wordgraph_exec_erase(rootp);
01023 if (verbose_flag) jlog("STAT: graphout: %d words purged, %d words left in lattice\n", erased, count - erased);
01024
01025 }
01026
01049 void
01050 wordgraph_depth_cut(WordGraph **rootp, RecogProcess *r)
01051 {
01052 #ifdef GRAPHOUT_DEPTHCUT
01053
01054 WordGraph *wg;
01055 int i, dst;
01056 boolean changed;
01057 int count, erased, del_left, del_right;
01058 WordGraph **wlist;
01059 boolean f;
01060 int *wc;
01061 int t;
01062 int pruned;
01063
01064
01065 if (r->config->graph.graphout_cut_depth < 0) return;
01066
01067 if (verbose_flag) jlog("STAT: graphout: step 1.5: cut less likely hypothesis by depth of %d\n", r->config->graph.graphout_cut_depth);
01068
01069
01070 count = 0;
01071 for(wg=*rootp;wg;wg=wg->next) count++;
01072 if (count == 0) return;
01073
01074
01075 wc = (int *)mymalloc(sizeof(int) * r->peseqlen);
01076 for (t=0;t<r->peseqlen;t++) wc[t] = 0;
01077
01078 wlist = (WordGraph **)mymalloc(sizeof(WordGraph *) * count);
01079 i = 0;
01080 for(wg=*rootp;wg;wg=wg->next) {
01081 wlist[i++] = wg;
01082 }
01083 qsort(wlist, count, sizeof(WordGraph *), (int (*)(const void *, const void *))compare_beam);
01084
01085 pruned = 0;
01086 for (i=0;i<count;i++) {
01087 wg = wlist[i];
01088 f = TRUE;
01089 for (t=wg->lefttime;t<=wg->righttime;t++) {
01090 wc[t]++;
01091 if (wc[t] <= r->config->graph.graphout_cut_depth) f = FALSE;
01092 }
01093 if (f) {
01094
01095 wg->mark = TRUE;
01096 pruned++;
01097 }
01098 }
01099 #ifdef GDEBUG2
01100 jlog("DEBUG: GRAPH DEPTH STATISTICS: NUMBER OF WORDS PER FRAME\n");
01101 for(t=0;t<r->peseqlen;t++) {
01102 if (wc[t] > r->config->graph.graphout_cut_depth) {
01103 jlog("*");
01104 } else {
01105 jlog(" ");
01106 }
01107 jlog("%4d: %d\n", t, wc[t]);
01108 }
01109 #endif
01110 if (verbose_flag) jlog("STAT: graphout: %d words out of %d are going to be pruned by depth cutting\n", pruned, count);
01111 free(wlist);
01112 free(wc);
01113
01114
01115 del_left = del_right = 0;
01116 do {
01117 changed = FALSE;
01118 for(wg=*rootp;wg;wg=wg->next) {
01119 if (wg->mark == TRUE) continue;
01120
01121 if (wg->lefttime != 0) {
01122 for(i=0;i<wg->leftwordnum;i++) {
01123 if (wg->leftword[i]->mark == FALSE) break;
01124 }
01125 if (i >= wg->leftwordnum) {
01126 wg->mark = TRUE;
01127 changed = TRUE;
01128 del_left++;
01129 continue;
01130 }
01131 }
01132
01133 if (wg->righttime != r->peseqlen - 1) {
01134 for(i=0;i<wg->rightwordnum;i++) {
01135 if (wg->rightword[i]->mark == FALSE) break;
01136 }
01137 if (i >= wg->rightwordnum) {
01138 wg->mark = TRUE;
01139 changed = TRUE;
01140 del_right++;
01141 continue;
01142 }
01143 }
01144 }
01145 } while (changed == TRUE);
01146
01147 if (verbose_flag) jlog("STAT: graphout: %d new leaves found (left_blank=%d, right_blank=%d)\n", del_left + del_right, del_left, del_right);
01148
01149
01150 for(wg=*rootp;wg;wg=wg->next) {
01151 if (wg->mark) continue;
01152 dst = 0;
01153 for(i=0;i<wg->leftwordnum;i++) {
01154 if (wg->leftword[i]->mark == FALSE) {
01155 if (dst != i) {
01156 wg->leftword[dst] = wg->leftword[i];
01157 wg->left_lscore[dst] = wg->left_lscore[i];
01158 }
01159 dst++;
01160 }
01161 }
01162 wg->leftwordnum = dst;
01163 }
01164 for(wg=*rootp;wg;wg=wg->next) {
01165 if (wg->mark) continue;
01166 dst = 0;
01167 for(i=0;i<wg->rightwordnum;i++) {
01168 if (wg->rightword[i]->mark == FALSE) {
01169 if (dst != i) {
01170 wg->rightword[dst] = wg->rightword[i];
01171 wg->right_lscore[dst] = wg->right_lscore[i];
01172 }
01173 dst++;
01174 }
01175 }
01176 wg->rightwordnum = dst;
01177 }
01178
01179
01180 erased = wordgraph_exec_erase(rootp);
01181 if (verbose_flag) jlog("STAT: graphout: total %d words purged, %d words left in lattice\n", erased, count - erased);
01182
01183 #else
01184
01185 if (verbose_flag) jlog("STAT: graphout: step 1.5: graph depth cutting has been disabled, skipped\n");
01186
01187 #endif
01188
01189 }
01190
01236 static boolean
01237 wordgraph_adjust_boundary_sub(WordGraph **rootp, int *mov_num_ret, int *dup_num_ret, int *del_num_ret, int *mod_num_ret, int count, int *maxfnum, int peseqlen, int lmtype, int **p_framelist, LOGPROB **p_framescorelist)
01238 {
01239 WordGraph *wg, *left, *new;
01240 int i, j, k;
01241 int fnum;
01242 int mov_num, dup_num, del_num, mod_num;
01243 boolean changed = FALSE;
01244 int *framelist;
01245 LOGPROB *framescorelist;
01246
01247 mov_num = dup_num = del_num = mod_num = 0;
01248
01249 framelist = *p_framelist;
01250 framescorelist = *p_framescorelist;
01251
01252
01253
01254
01255 if (*maxfnum == 0) {
01256
01257 *maxfnum = count;
01258 framelist = (int *)mymalloc(sizeof(int) * (*maxfnum));
01259 framescorelist = (LOGPROB *)mymalloc(sizeof(LOGPROB) * (*maxfnum));
01260 #ifdef GDEBUG
01261 jlog("DEBUG: Notice: maxfnum starts at %d\n", *maxfnum);
01262 #endif
01263 } else if (*maxfnum < count) {
01264
01265 free(framescorelist);
01266 free(framelist);
01267 *maxfnum = count;
01268 framelist = (int *)mymalloc(sizeof(int) * (*maxfnum));
01269 framescorelist = (LOGPROB *)mymalloc(sizeof(LOGPROB) * (*maxfnum));
01270 #ifdef GDEBUG
01271 jlog("DEBUG: Notice: maxfnum expanded by count (%d)\n", *maxfnum);
01272 #endif
01273 }
01274
01275 #ifdef GDEBUG2
01276 jlog("DEBUG: ***CHECK LOOP BEGIN***\n");
01277 #endif
01278 for(wg=*rootp;wg;wg=wg->next) {
01279 if (wg->mark) continue;
01280 #ifdef GDEBUG2
01281 jlog("DEBUG: [%d..%d] \"%s\"\n", wg->lefttime, wg->righttime, wchmm_local->winfo->woutput[wg->wid]);
01282 #endif
01283 if (wg->leftwordnum == 0) {
01284 if (wg->lefttime != 0) {
01285
01286 #ifdef GDEBUG2
01287 jlog("DEBUG: -> no leftword at middle of lattice, eliminate this\n");
01288 #endif
01289 wordgraph_remove_context(wg);
01290 wg->mark = TRUE;
01291 del_num++;
01292 changed = TRUE;
01293 }
01294
01295 continue;
01296 }
01297 if (wg->rightwordnum == 0) {
01298 if (wg->righttime != peseqlen-1) {
01299
01300 #ifdef GDEBUG2
01301 jlog("DEBUG: -> no rightword at middle of lattice, eliminate this\n");
01302 #endif
01303 wordgraph_remove_context(wg);
01304 wg->mark = TRUE;
01305 del_num++;
01306 changed = TRUE;
01307 continue;
01308 }
01309
01310 }
01311
01312 fnum = 0;
01313
01314 if (wg->leftwordnum > (*maxfnum)) {
01315
01316 free(framescorelist);
01317 free(framelist);
01318 *maxfnum = wg->leftwordnum;
01319 framelist = (int *)mymalloc(sizeof(int) * (*maxfnum));
01320 framescorelist = (LOGPROB *)mymalloc(sizeof(LOGPROB) * (*maxfnum));
01321 #ifdef GDEBUG
01322 jlog("DEBUG: Notice: wg->leftwordnum exceeds maxfnum (%d > %d), expanded\n", wg->leftwordnum, *maxfnum);
01323 #endif
01324 }
01325 for(i=0;i<wg->leftwordnum;i++) {
01326 left = wg->leftword[i];
01327 if (left->mark) continue;
01328 for(j=0;j<fnum;j++) {
01329 if (framelist[j] == left->righttime + 1) break;
01330 }
01331 if (j >= fnum) {
01332 framelist[fnum] = left->righttime + 1;
01333
01334
01335 framescorelist[fnum] = left->gscore_tail - wg->left_lscore[i];
01336 fnum++;
01337 }
01338 }
01339 #ifdef GDEBUG2
01340 jlog("DEBUG: possible boundary of left words:");
01341 if (fnum == 0) {
01342 jlog(" (not exist)\n");
01343 } else {
01344 for(j=0;j<fnum;j++) jlog(" %d", framelist[j]);
01345 jlog("\n");
01346 }
01347 #endif
01348 if (fnum == 0) continue;
01349
01350 if (fnum == 1) {
01351 if (wg->lefttime != framelist[0]) {
01352 #ifdef GDEBUG2
01353 jlog("DEBUG: !moving as [%d..%d]", framelist[0], wg->righttime);
01354 #endif
01355
01356
01357
01358
01359
01360 if (framelist[0] > wg->righttime) {
01361 #ifdef GDEBUG2
01362 jlog(" : eliminated");
01363 #endif
01364 wordgraph_link_context(wg);
01365 wordgraph_remove_context(wg);
01366 wg->mark = TRUE;
01367 del_num++;
01368 } else {
01369 #ifdef GDEBUG2
01370 jlog(" : ok");
01371 #endif
01372
01373 wg->lefttime = framelist[0];
01374 wg->gscore_head = framescorelist[0];
01375 mov_num++;
01376 }
01377 #ifdef GDEBUG2
01378 jlog("\n");
01379 #endif
01380 changed = TRUE;
01381 } else if (wg->gscore_head != framescorelist[0]) {
01382
01383 #ifdef GDEBUG2
01384 jlog("DEBUG: !ghead score changed: %f -> %f\n", wg->gscore_head, framescorelist[0]);
01385 #endif
01386 wg->gscore_head = framescorelist[0];
01387 mod_num++;
01388 changed = TRUE;
01389 }
01390 }
01391 if (fnum > 1) {
01392
01393 for(j=0;j<fnum;j++) {
01394
01395 dup_num++;
01396 #ifdef GDEBUG2
01397 jlog("DEBUG: !duping as [%d..%d]", framelist[j], wg->righttime);
01398 #endif
01399
01400 if (framelist[j] > wg->righttime) {
01401
01402 #ifdef GDEBUG2
01403 jlog(" : eliminated");
01404 #endif
01405 for(i=0;i<wg->leftwordnum;i++) {
01406 left = wg->leftword[i];
01407 if (left->mark) continue;
01408 if (left->righttime + 1 == framelist[j]) {
01409 for(k=0;k<wg->rightwordnum;k++) {
01410 if ((wg->rightword[k])->mark) continue;
01411 if (wg->rightword[k] == left) continue;
01412 wordgraph_check_and_add_leftword(wg->rightword[k], left, wg->left_lscore[i]);
01413 wordgraph_check_and_add_rightword(left, wg->rightword[k], wg->right_lscore[k]);
01414 }
01415 }
01416 }
01417 del_num++;
01418
01419 } else {
01420
01421 #ifdef GDEBUG2
01422 jlog(" : ok");
01423 #endif
01424 new = wordgraph_new(wg->wid, wg->headphone, wg->tailphone, framelist[j], wg->righttime, wg->fscore_head, wg->fscore_tail, framescorelist[j], wg->gscore_tail, wg->lscore_tmp
01425 #ifdef CM_SEARCH
01426 , wg->cmscore
01427 #else
01428 , LOG_ZERO
01429 #endif
01430 );
01431
01432 for(i=0;i<wg->leftwordnum;i++) {
01433 if ((wg->leftword[i])->mark) continue;
01434 if ((wg->leftword[i])->righttime + 1 == framelist[j]) {
01435 wordgraph_add_leftword(new, wg->leftword[i], wg->left_lscore[i]);
01436 wordgraph_add_rightword(wg->leftword[i], new, wg->left_lscore[i]);
01437 }
01438 }
01439 for(i=0;i<wg->rightwordnum;i++) {
01440 if ((wg->rightword[i])->mark) continue;
01441 wordgraph_add_rightword(new, wg->rightword[i], wg->right_lscore[i]);
01442 wordgraph_add_leftword(wg->rightword[i], new, wg->right_lscore[i]);
01443 }
01444 new->saved = TRUE;
01445 new->next = *rootp;
01446 *rootp = new;
01447 }
01448
01449 #ifdef GDEBUG2
01450 jlog("\n");
01451 #endif
01452 }
01453
01454
01455 #ifdef GDEBUG2
01456 jlog("DEBUG: !delete original [%d..%d]\n", wg->lefttime, wg->righttime);
01457 #endif
01458 wordgraph_remove_context(wg);
01459 wg->mark = TRUE;
01460 dup_num--;
01461
01462 changed = TRUE;
01463 }
01464 }
01465
01466 *mov_num_ret = mov_num;
01467 *dup_num_ret = dup_num;
01468 *del_num_ret = del_num;
01469 *mod_num_ret = mod_num;
01470
01471 *p_framelist = framelist;
01472 *p_framescorelist = framescorelist;
01473
01474 #ifdef GDEBUG2
01475 if (changed) {
01476 jlog("DEBUG: *** some graph has been altered, check loop continues\n");
01477 } else {
01478 jlog("DEBUG: *** graph not changed at last loop, check ends here\n");
01479 }
01480 #endif
01481
01482 return (changed);
01483 }
01484
01501 static void
01502 wordgraph_compaction_thesame_sub(WordGraph **rootp, int *rest_ret, int *merged_ret)
01503 {
01504 WordGraph *wg, *we;
01505 int i, count, erased, merged;
01506
01507 count = 0;
01508 merged = 0;
01509 for(wg=*rootp;wg;wg=wg->next) {
01510 count++;
01511 if (wg->mark == TRUE) continue;
01512 for(we=wg->next;we;we=we->next) {
01513 if (we->mark == TRUE) continue;
01514
01515 if (wg->wid == we->wid &&
01516 wg->headphone == we->headphone &&
01517 wg->tailphone == we->tailphone &&
01518 wg->lefttime == we->lefttime &&
01519 wg->righttime == we->righttime &&
01520 wg->fscore_head == we->fscore_head &&
01521 wg->fscore_tail == we->fscore_tail) {
01522
01523 merge_contexts(wg, we);
01524
01525 for(i=0;i<we->leftwordnum;i++) {
01526 if (we->leftword[i]->mark) continue;
01527
01528 swap_rightword(we->leftword[i], we, wg, we->left_lscore[i]);
01529 }
01530 for(i=0;i<we->rightwordnum;i++) {
01531 if (we->rightword[i]->mark) continue;
01532
01533 swap_leftword(we->rightword[i], we, wg, we->right_lscore[i]);
01534 }
01535 we->mark = TRUE;
01536 merged++;
01537 }
01538 }
01539 }
01540
01541 erased = wordgraph_exec_erase(rootp);
01542
01543 for(wg=*rootp;wg;wg=wg->next) {
01544 uniq_leftword(wg);
01545 uniq_rightword(wg);
01546 }
01547
01548 *rest_ret = count - erased;
01549 *merged_ret = merged;
01550 }
01551
01594 void
01595 wordgraph_adjust_boundary(WordGraph **rootp, RecogProcess *r)
01596 {
01597 #ifdef GRAPHOUT_PRECISE_BOUNDARY
01598 WordGraph *wg;
01599 int mov_num, dup_num, del_num, mod_num;
01600 int count, merged;
01601 boolean flag;
01602 int loopcount;
01603 int maxfnum;
01604 int *framelist;
01605 LOGPROB *framescorelist;
01606
01607 loopcount = 0;
01608
01609 if (verbose_flag) jlog("STAT: graphout: step 2: adjust boundaries\n");
01610 mov_num = dup_num = del_num = 0;
01611
01612
01613 count = 0;
01614 for(wg=*rootp;wg;wg=wg->next) count++;
01615 maxfnum = 0;
01616
01617 do {
01618
01619 flag = wordgraph_adjust_boundary_sub(rootp, &mov_num, &dup_num, &del_num, &mod_num, count, &maxfnum, r->peseqlen, r->lmtype, &framelist, &framescorelist);
01620
01621 wordgraph_compaction_thesame_sub(rootp, &count, &merged);
01622 if (verbose_flag) jlog("STAT: graphout: #%d: %d moved, %d duplicated, %d purged, %d modified, %d idential, %d left\n", loopcount + 1, mov_num, dup_num, del_num, mod_num, merged, count);
01623 #ifdef GRAPHOUT_LIMIT_BOUNDARY_LOOP
01624 if (++loopcount >= r->config->graph.graphout_limit_boundary_loop_num) {
01625 if (verbose_flag) jlog("STAT: graphout: loop count reached %d, terminate loop now\n", r->config->graph.graphout_limit_boundary_loop_num);
01626 break;
01627 }
01628 #endif
01629 } while (flag);
01630
01631
01632 if (maxfnum > 0) {
01633 free(framescorelist);
01634 free(framelist);
01635 }
01636
01637
01638 wordgraph_exec_erase(rootp);
01639
01640 #else
01641
01642 if (verbose_flag) jlog("STAT: graphout: step 2: SKIP (adjusting boundaries)\n");
01643
01644 #endif
01645
01646 }
01647
01648
01670 void
01671 wordgraph_compaction_thesame(WordGraph **rootp)
01672 {
01673 int rest, erased;
01674
01675 if (verbose_flag) jlog("STAT: graphout: step 3: merge idential hypotheses (same score, boundary, context)\n");
01676 wordgraph_compaction_thesame_sub(rootp, &rest, &erased);
01677 if (verbose_flag) jlog("STAT: graphout: %d words merged, %d words left in lattice\n", erased, rest);
01678 }
01679
01708 void
01709 wordgraph_compaction_exacttime(WordGraph **rootp, RecogProcess *r)
01710 {
01711 WordGraph *wg, *we;
01712 int i, count, erased;
01713
01714 if (r->config->graph.graph_merge_neighbor_range < 0) {
01715 if (verbose_flag) jlog("STAT: graphout: step 4: SKIP (merge the same words with same boundary to the most likely one\n");
01716 return;
01717 }
01718
01719 if (verbose_flag) jlog("STAT: graphout: step 4: merge same words with same boundary to the most likely one\n");
01720
01721 count = 0;
01722 for(wg=*rootp;wg;wg=wg->next) {
01723 count++;
01724 if (wg->mark == TRUE) continue;
01725 for(we=wg->next;we;we=we->next) {
01726 if (we->mark == TRUE) continue;
01727
01728 if (wg->wid == we->wid &&
01729 wg->lefttime == we->lefttime &&
01730 wg->righttime == we->righttime) {
01731
01732 merge_contexts(wg, we);
01733
01734 for(i=0;i<we->leftwordnum;i++) {
01735 swap_rightword(we->leftword[i], we, wg, we->left_lscore[i]);
01736 }
01737 for(i=0;i<we->rightwordnum;i++) {
01738 swap_leftword(we->rightword[i], we, wg, we->right_lscore[i]);
01739 }
01740
01741 if (wg->fscore_head < we->fscore_head) {
01742 wg->headphone = we->headphone;
01743 wg->tailphone = we->tailphone;
01744 wg->fscore_head = we->fscore_head;
01745 wg->fscore_tail = we->fscore_tail;
01746 wg->gscore_head = we->gscore_head;
01747 wg->gscore_tail = we->gscore_tail;
01748 wg->lscore_tmp = we->lscore_tmp;
01749 #ifdef CM_SEARCH
01750 wg->cmscore = we->cmscore;
01751 #endif
01752 wg->amavg = we->amavg;
01753 }
01754 we->mark = TRUE;
01755 }
01756 }
01757 }
01758 erased = wordgraph_exec_erase(rootp);
01759 if (verbose_flag) jlog("STAT: graphout: %d words merged, %d words left in lattice\n", erased, count-erased);
01760
01761 for(wg=*rootp;wg;wg=wg->next) {
01762 uniq_leftword(wg);
01763 uniq_rightword(wg);
01764 }
01765 }
01766
01794 void
01795 wordgraph_compaction_neighbor(WordGraph **rootp, RecogProcess *r)
01796 {
01797 WordGraph *wg, *we;
01798 int i, count, erased;
01799
01800 if (r->config->graph.graph_merge_neighbor_range <= 0) {
01801 if (verbose_flag) jlog("STAT: graphout: step 5: SKIP (merge the same words around)\n");
01802 return;
01803 }
01804
01805 if (verbose_flag) jlog("STAT: graphout: step 5: merge same words around, with %d frame margin\n", r->config->graph.graph_merge_neighbor_range);
01806
01807 count = 0;
01808 for(wg=*rootp;wg;wg=wg->next) {
01809 count++;
01810 if (wg->mark == TRUE) continue;
01811 for(we=wg->next;we;we=we->next) {
01812 if (we->mark == TRUE) continue;
01813 if (wg->wid == we->wid &&
01814 abs(wg->lefttime - we->lefttime) <= r->config->graph.graph_merge_neighbor_range &&
01815 abs(wg->righttime - we->righttime) <= r->config->graph.graph_merge_neighbor_range) {
01816
01817 merge_contexts(wg, we);
01818
01819 for(i=0;i<we->leftwordnum;i++) {
01820 swap_rightword(we->leftword[i], we, wg, we->left_lscore[i]);
01821 }
01822 for(i=0;i<we->rightwordnum;i++) {
01823 swap_leftword(we->rightword[i], we, wg, we->right_lscore[i]);
01824 }
01825
01826 if (wg->fscore_head < we->fscore_head) {
01827 wg->headphone = we->headphone;
01828 wg->tailphone = we->tailphone;
01829 wg->fscore_head = we->fscore_head;
01830 wg->fscore_tail = we->fscore_tail;
01831 wg->gscore_head = we->gscore_head;
01832 wg->gscore_tail = we->gscore_tail;
01833 wg->lscore_tmp = we->lscore_tmp;
01834 #ifdef CM_SEARCH
01835 wg->cmscore = we->cmscore;
01836 #endif
01837 wg->amavg = we->amavg;
01838 }
01839 we->mark = TRUE;
01840 }
01841 }
01842 }
01843 erased = wordgraph_exec_erase(rootp);
01844 if (verbose_flag) jlog("STAT: graphout: %d words merged, %d words left in lattice\n", erased, count-erased);
01845
01846 for(wg=*rootp;wg;wg=wg->next) {
01847 uniq_leftword(wg);
01848 uniq_rightword(wg);
01849 }
01850
01851 }
01852
01853
01854
01855
01900 WordGraph *
01901 wordgraph_assign(WORD_ID wid, WORD_ID wid_left, WORD_ID wid_right, int leftframe, int rightframe, LOGPROB fscore_head, LOGPROB fscore_tail, LOGPROB gscore_head, LOGPROB gscore_tail, LOGPROB lscore, LOGPROB cm, RecogProcess *r)
01902 {
01903 WordGraph *newarc;
01904 HMM_Logical *l, *ret, *head, *tail;
01905 WORD_INFO *winfo;
01906
01907 winfo = r->lm->winfo;
01908
01909
01910 l = winfo->wseq[wid][winfo->wlen[wid]-1];
01911 if (wid_right != WORD_INVALID) {
01912 ret = get_right_context_HMM(l, winfo->wseq[wid_right][0]->name, r->am->hmminfo);
01913 if (ret != NULL) l = ret;
01914 }
01915 if (winfo->wlen[wid] > 1) {
01916 tail = l;
01917 l = winfo->wseq[wid][0];
01918 }
01919 if (wid_left != WORD_INVALID) {
01920 ret = get_left_context_HMM(l, winfo->wseq[wid_left][winfo->wlen[wid_left]-1]->name, r->am->hmminfo);
01921 if (ret != NULL) l = ret;
01922 }
01923 head = l;
01924 if (winfo->wlen[wid] <= 1) {
01925 tail = l;
01926 }
01927
01928
01929 newarc = wordgraph_new(wid, head, tail, leftframe, rightframe, fscore_head, fscore_tail, gscore_head, gscore_tail, lscore, cm);
01930
01931 return newarc;
01932 }
01933
01956 void
01957 wordgraph_save(WordGraph *wg, WordGraph *right, WordGraph **root)
01958 {
01959 if (wg != NULL) {
01960 wg->next = *root;
01961 *root = wg;
01962 wg->saved = TRUE;
01963 wordgraph_add_leftword(right, wg, wg->lscore_tmp);
01964 wordgraph_add_rightword(wg, right, wg->lscore_tmp);
01965 }
01966 }
01967
01968 #ifdef GRAPHOUT_DYNAMIC
01969
02019 WordGraph *
02020 wordgraph_check_merge(WordGraph *now, WordGraph **root, WORD_ID next_wid, boolean *merged_p, JCONF_SEARCH *jconf)
02021 {
02022 WordGraph *wg;
02023 int i;
02024 #ifdef GDEBUG
02025 WordGraph *w;
02026 #endif
02027
02028 #ifdef GRAPHOUT_SEARCH
02029 *merged_p = FALSE;
02030 #endif
02031
02032 if (now == NULL) return(NULL);
02033
02034 #ifdef GDEBUG
02035 jlog("DEBUG: check_merge: checking \"%s\"[%d..%d]\n", wchmm_local->winfo->woutput[now->wid], now->lefttime, now->righttime);
02036 for(i=0;i<now->leftwordnum;i++) {
02037 w = now->leftword[i];
02038 jlog("DEBUG: \t left%d: \"%15s\"[%d..%d]\n", i, wchmm_local->winfo->woutput[w->wid], w->lefttime, w->righttime);
02039 }
02040 for(i=0;i<now->rightwordnum;i++) {
02041 w = now->rightword[i];
02042 jlog("DEBUG: \tright%d: \"%15s\"[%d..%d]\n", i, wchmm_local->winfo->woutput[w->wid], w->lefttime, w->righttime);
02043 }
02044 #endif
02045
02046 for(wg=*root;wg;wg=wg->next) {
02047 if (wg == now) continue;
02048 #ifdef GRAPHOUT_DYNAMIC
02049
02050 if (wg->purged) continue;
02051 #endif
02052 if (jconf->graph.graph_merge_neighbor_range < 0) {
02053
02054
02055 if (wg->headphone != now->headphone || wg->tailphone != now->tailphone) {
02056 continue;
02057 }
02058 }
02059 if (wg->wid == now->wid
02060 && wg->lefttime == now->lefttime
02061 && wg->righttime == now->righttime) {
02062
02063 #ifdef GDEBUG
02064 jlog("DEBUG: check_merge: same word found: \"%s\"[%d..%d]\n", wchmm_local->winfo->woutput[wg->wid], wg->lefttime, wg->righttime);
02065 for(i=0;i<wg->leftwordnum;i++) {
02066 w = wg->leftword[i];
02067 jlog("DEBUG: \t left%d: \"%15s\"[%d..%d]\n", i, wchmm_local->winfo->woutput[w->wid], w->lefttime, w->righttime);
02068 }
02069 for(i=0;i<wg->rightwordnum;i++) {
02070 w = wg->rightword[i];
02071 jlog("DEBUG: \tright%d: \"%15s\"[%d..%d]\n", i, wchmm_local->winfo->woutput[w->wid], w->lefttime, w->righttime);
02072 }
02073 #endif
02074
02075 merge_contexts(wg, now);
02076
02077 for(i=0;i<now->leftwordnum;i++) {
02078 swap_rightword(now->leftword[i], now, wg, now->left_lscore[i]);
02079 uniq_rightword(now->leftword[i]);
02080 }
02081 for(i=0;i<now->rightwordnum;i++) {
02082 swap_leftword(now->rightword[i], now, wg, now->right_lscore[i]);
02083 uniq_leftword(now->rightword[i]);
02084 }
02085 #ifdef GRAPHOUT_SEARCH
02086
02087
02088
02089
02090
02091
02092
02093 for(i=0;i<wg->leftwordnum;i++) {
02094 if (wg->leftword[i]->wid == next_wid) break;
02095 }
02096 if (i < wg->leftwordnum) {
02097 *merged_p = TRUE;
02098 }
02099 #endif
02100 #ifdef GRAPHOUT_OVERWRITE
02101
02102
02103 if (
02104 #ifdef GRAPHOUT_OVERWRITE_GSCORE
02105
02106 wg->amavg < now->amavg;
02107 #else
02108 wg->fscore_head < now->fscore_head
02109 #endif
02110 ) {
02111 wg->headphone = now->headphone;
02112 wg->tailphone = now->tailphone;
02113 wg->fscore_head = now->fscore_head;
02114 wg->fscore_tail = now->fscore_tail;
02115 wg->gscore_head = now->gscore_head;
02116 wg->gscore_tail = now->gscore_tail;
02117 wg->lscore_tmp = now->lscore_tmp;
02118 #ifdef CM_SEARCH
02119 wg->cmscore = now->cmscore;
02120 #endif
02121 wg->amavg = now->amavg;
02122 #ifdef GRAPHOUT_SEARCH
02123 *merged_p = FALSE;
02124 #endif
02125 }
02126 #endif
02127
02128
02129 now->purged = TRUE;
02130
02131
02132 return wg;
02133 }
02134 }
02135
02136 return NULL;
02137 }
02138 #endif
02139
02140
02141
02142
02192 void
02193 put_wordgraph(FILE *fp, WordGraph *wg, WORD_INFO *winfo)
02194 {
02195 int i;
02196 if (fp == NULL) return;
02197 if (wg == NULL) {
02198 fprintf(fp, "(NULL)\n");
02199 } else {
02200 fprintf(fp, "%d:", wg->id);
02201 fprintf(fp, " [%d..%d]", wg->lefttime, wg->righttime);
02202 for(i=0;i<wg->leftwordnum;i++) {
02203 fprintf(fp, (i == 0) ? " left=%d" : ",%d", wg->leftword[i]->id);
02204 }
02205 for(i=0;i<wg->rightwordnum;i++) {
02206 fprintf(fp, (i == 0) ? " right=%d" : ",%d", wg->rightword[i]->id);
02207 }
02208 for(i=0;i<wg->leftwordnum;i++) {
02209 fprintf(fp, (i == 0) ? " left_lscore=%f" : ",%f", wg->left_lscore[i]);
02210 }
02211 for(i=0;i<wg->rightwordnum;i++) {
02212 fprintf(fp, (i == 0) ? " right_lscore=%f" : ",%f", wg->right_lscore[i]);
02213 }
02214 fprintf(fp, " lscore_tmp=%f", wg->lscore_tmp);
02215
02216 fprintf(fp, " wid=%d name=\"%s\" lname=\"%s\" f=%f f_prev=%f g_head=%f g_prev=%f", wg->wid, winfo->woutput[wg->wid], winfo->wname[wg->wid], wg->fscore_head, wg->fscore_tail, wg->gscore_head, wg->gscore_tail);
02217 fprintf(fp, " forward_score=%f backword_score=%f", wg->forward_score, wg->backward_score);
02218 if (wg->righttime - wg->lefttime + 1 != 0) {
02219 fprintf(fp, " AMavg=%f", wg->amavg);
02220 }
02221 #ifdef CM_SEARCH
02222 fprintf(fp, " cmscore=%f", wg->cmscore);
02223 #endif
02224 fprintf(fp, " graphcm=%f", wg->graph_cm);
02225 fprintf(fp, " headphone=%s", wg->headphone->name);
02226 fprintf(fp, " tailphone=%s", wg->tailphone->name);
02227 fprintf(fp, "\n");
02228 }
02229 }
02230
02251 void
02252 wordgraph_dump(FILE *fp, WordGraph *root, WORD_INFO *winfo)
02253 {
02254 WordGraph *wg;
02255 fprintf(fp, "--- begin wordgraph data ---\n");
02256 for(wg=root;wg;wg=wg->next) {
02257 put_wordgraph(fp, wg, winfo);
02258 }
02259 fprintf(fp, "--- end wordgraph data ---\n");
02260 }
02261
02280 void
02281 wordgraph_check_coherence(WordGraph *rootp, RecogProcess *r)
02282 {
02283 WordGraph *wg, *wl, *wr;
02284 int nl, nr;
02285 WORD_INFO *winfo;
02286
02287 winfo = r->lm->winfo;
02288
02289 for(wg=rootp;wg;wg=wg->next) {
02290
02291 if (wg->id < 0 || wg->id >= r->graph_totalwordnum) {
02292 jlog("ERROR: invalid graph word id \"%d\" (should be [0..%d])\n", wg->id, r->graph_totalwordnum-1);
02293 put_wordgraph(jlog_get_fp(), wg, winfo);
02294 continue;
02295 }
02296
02297 for(nl=0;nl<wg->leftwordnum;nl++){
02298 wl = wg->leftword[nl];
02299 if (wl->id < 0 || wl->id >= r->graph_totalwordnum) {
02300 jlog("ERROR: invalid graph word id \"%d\" (should be [0..%d]) in left context\n", wl->id, r->graph_totalwordnum-1);
02301 put_wordgraph(jlog_get_fp(), wg, winfo);
02302 continue;
02303 }
02304 for(nr=0;nr<wl->rightwordnum;nr++){
02305 if (wl->rightword[nr] == wg) break;
02306 }
02307 if (nr >= wl->rightwordnum) {
02308 jlog("ERROR: on graph, reverse link not found in left context\n");
02309 put_wordgraph(jlog_get_fp(), wg, winfo);
02310 put_wordgraph(jlog_get_fp(), wl, winfo);
02311 continue;
02312 }
02313 }
02314 for(nr=0;nr<wg->rightwordnum;nr++){
02315 wr = wg->rightword[nr];
02316 if (wr->id < 0 || wr->id >= r->graph_totalwordnum) {
02317 jlog("ERROR: invalid graph word id \"%d\" (should be [0..%d]) in right context\n", wr->id, r->graph_totalwordnum-1);
02318 put_wordgraph(jlog_get_fp(), wg, winfo);
02319 continue;
02320 }
02321 for(nl=0;nl<wr->leftwordnum;nl++){
02322 if (wr->leftword[nl] == wg) break;
02323 }
02324 if (nl >= wr->leftwordnum) {
02325 jlog("ERROR: on graph, reverse link not found in left context\n");
02326 put_wordgraph(jlog_get_fp(), wg, winfo);
02327 put_wordgraph(jlog_get_fp(), wr, winfo);
02328 continue;
02329 }
02330 }
02331 }
02332 }
02333
02334
02335
02336
02351 static int
02352 compare_forward(WordGraph **x, WordGraph **y)
02353 {
02354 if ((*x)->righttime < (*y)->righttime) return 1;
02355 else if ((*x)->righttime > (*y)->righttime) return -1;
02356 else return 0;
02357 }
02358
02373 static int
02374 compare_backward(WordGraph **x, WordGraph **y)
02375 {
02376 if ((*x)->lefttime < (*y)->lefttime) return -1;
02377 else if ((*x)->lefttime > (*y)->lefttime) return 1;
02378 else return 0;
02379 }
02380
02395 static LOGPROB
02396 addlog10(LOGPROB x, LOGPROB y)
02397 {
02398 if (x < y) {
02399
02400 return(y + log(1 + pow(10, x-y)) * INV_LOG_TEN);
02401 } else {
02402 return(x + log(1 + pow(10, y-x)) * INV_LOG_TEN);
02403 }
02404 }
02405
02429 void
02430 graph_forward_backward(WordGraph *root, RecogProcess *r)
02431 {
02432 WordGraph *wg, *left, *right;
02433 int i, j;
02434 LOGPROB s;
02435 LOGPROB sum1, sum2;
02436 int count;
02437 WordGraph **wlist;
02438 LOGPROB cm_alpha;
02439
02440 cm_alpha = r->config->annotate.cm_alpha;
02441
02442
02443 count = 0;
02444 for(wg=root;wg;wg=wg->next) count++;
02445 if (count == 0) return;
02446 wlist = (WordGraph **)mymalloc(sizeof(WordGraph *) * count);
02447 i = 0;
02448 for(wg=root;wg;wg=wg->next) {
02449 wlist[i++] = wg;
02450 }
02451
02452
02453 qsort(wlist, count, sizeof(WordGraph *), (int (*)(const void *, const void *))compare_forward);
02454
02455 for(wg=root;wg;wg=wg->next) {
02456 wg->forward_score = LOG_ZERO;
02457 }
02458
02459 sum1 = LOG_ZERO;
02460 for(i=0;i<count;i++) {
02461 wg = wlist[i];
02462 if (wg->righttime == r->peseqlen - 1) {
02463
02464 wg->forward_score = 0.0;
02465
02466 } else {
02467
02468 if (wg->forward_score == LOG_ZERO) {
02469 wordgraph_dump(stdout, root, r->lm->winfo);
02470 put_wordgraph(stdout, wg, r->lm->winfo);
02471 j_internal_error("NO CONTEXT?\n");
02472 }
02473 }
02474
02475 s = wg->amavg * (wg->righttime - wg->lefttime + 1);
02476 s *= cm_alpha;
02477 s += wg->forward_score;
02478 if (wg->lefttime == 0) {
02479
02480 sum1 = addlog10(sum1, s);
02481 } else {
02482
02483 for(j=0;j<wg->leftwordnum;j++) {
02484 left = wg->leftword[j];
02485 left->forward_score = addlog10(left->forward_score, s + wg->left_lscore[j] * cm_alpha);
02486 }
02487 }
02488 }
02489
02490
02491 qsort(wlist, count, sizeof(WordGraph *), (int (*)(const void *, const void *))compare_backward);
02492
02493 for(wg=root;wg;wg=wg->next) {
02494 wg->backward_score = LOG_ZERO;
02495 }
02496
02497 sum2 = LOG_ZERO;
02498 for(i=0;i<count;i++) {
02499 wg = wlist[i];
02500 if (wg->lefttime == 0) {
02501
02502 wg->backward_score = 0.0;
02503 } else {
02504
02505 if (wg->backward_score == LOG_ZERO) {
02506 put_wordgraph(stdout, wg, r->lm->winfo);
02507 j_internal_error("NO CONTEXT?\n");
02508 }
02509 }
02510
02511 s = wg->amavg * (wg->righttime - wg->lefttime + 1);
02512 s *= cm_alpha;
02513 s += wg->backward_score;
02514 if (wg->righttime == r->peseqlen - 1) {
02515
02516
02517 sum2 = addlog10(sum2, s);
02518 } else {
02519 for(j=0;j<wg->rightwordnum;j++) {
02520 right = wg->rightword[j];
02521 right->backward_score = addlog10(right->backward_score, s + wg->right_lscore[j] * cm_alpha);
02522 }
02523 }
02524 }
02525
02526 if (verbose_flag) jlog("STAT: graph_cm: forward score = %f, backward score = %f\n", sum1, sum2);
02527
02528
02529 for(wg=root;wg;wg=wg->next) {
02530 s = wg->amavg * (wg->righttime - wg->lefttime + 1);
02531 s *= cm_alpha;
02532 s = wg->backward_score + s + wg->forward_score;
02533 wg->graph_cm = pow(10, s - sum1);
02534
02535 }
02536
02537 free(wlist);
02538
02539 }
02540