00001
00017
00018
00019
00020
00021
00022
00023
00024 #include <julius.h>
00025
00026 #ifdef GRAPHOUT
00027
00029 #undef GDEBUG
00030
00031
00032
00033
00034
00067 static WordGraph *
00068 wordgraph_new(WORD_ID wid, int leftframe, int rightframe, LOGPROB fscore_head, LOGPROB fscore_tail, LOGPROB gscore_head, LOGPROB gscore_tail, LOGPROB lscore, LOGPROB cm)
00069 {
00070 WordGraph *new;
00071
00072 new = (WordGraph *)mymalloc(sizeof(WordGraph));
00073 new->wid = wid;
00074 new->lefttime = leftframe;
00075 new->righttime = rightframe;
00076 new->fscore_head = fscore_head;
00077 new->fscore_tail = fscore_tail;
00078 new->gscore_head = gscore_head;
00079 new->gscore_tail = gscore_tail;
00080 #ifdef USE_NGRAM
00081 new->lscore = lscore;
00082 #endif
00083 #ifdef CM_SEARCH
00084 new->cmscore = cm;
00085 #endif
00086 new->leftwordmaxnum = FANOUTSTEP;
00087 new->leftword = (WordGraph **)mymalloc(sizeof(WordGraph *) * new->leftwordmaxnum);
00088 new->leftwordnum = 0;
00089 new->rightwordmaxnum = FANOUTSTEP;
00090 new->rightword = (WordGraph **)mymalloc(sizeof(WordGraph *) * new->rightwordmaxnum);
00091 new->rightwordnum = 0;
00092 new->mark = FALSE;
00093 #ifdef GRAPHOUT_DYNAMIC
00094 new->purged = FALSE;
00095 #endif
00096 new->next = NULL;
00097 new->saved = FALSE;
00098
00099 #ifdef GDEBUG
00100 {
00101 int i;
00102 WordGraph *w;
00103 printf("NEW: \"%s\"[%d..%d]\n", wchmm->winfo->woutput[new->wid], new->lefttime, new->righttime);
00104 for(i=0;i<new->leftwordnum;i++) {
00105 w = new->leftword[i];
00106 printf("\t left%d: \"%15s\"[%d..%d]\n", i, wchmm->winfo->woutput[w->wid], w->lefttime, w->righttime);
00107 }
00108 for(i=0;i<new->rightwordnum;i++) {
00109 w = new->rightword[i];
00110 printf("\tright%d: \"%15s\"[%d..%d]\n", i, wchmm->winfo->woutput[w->wid], w->lefttime, w->righttime);
00111 }
00112 }
00113 #endif
00114
00115 return(new);
00116 }
00117
00130 void
00131 wordgraph_free(WordGraph *wg)
00132 {
00133 free(wg->rightword);
00134 free(wg->leftword);
00135 free(wg);
00136 }
00137
00138
00139
00140
00141
00156 static void
00157 wordgraph_add_leftword(WordGraph *wg, WordGraph *left)
00158 {
00159 if (wg == NULL) return;
00160 if (left == NULL) return;
00161 if (wg->leftwordnum >= wg->leftwordmaxnum) {
00162
00163 wg->leftwordmaxnum += FANOUTSTEP;
00164 wg->leftword = (WordGraph **)myrealloc(wg->leftword, sizeof(WordGraph *) * wg->leftwordmaxnum);
00165 }
00166 wg->leftword[wg->leftwordnum] = left;
00167 wg->leftwordnum++;
00168 #ifdef GDEBUG
00169 printf("addleft: \"%s\"[%d..%d] added as %dth left of \"%s\"[%d..%d]\n", wchmm->winfo->woutput[left->wid], left->lefttime, left->righttime, wg->leftwordnum, wchmm->winfo->woutput[wg->wid], wg->lefttime, wg->righttime);
00170 #endif
00171 }
00172
00189 static void
00190 wordgraph_add_rightword(WordGraph *wg, WordGraph *right)
00191 {
00192 if (wg == NULL) return;
00193 if (right == NULL) return;
00194 if (wg->rightwordnum >= wg->rightwordmaxnum) {
00195
00196 wg->rightwordmaxnum += FANOUTSTEP;
00197 wg->rightword = (WordGraph **)myrealloc(wg->rightword, sizeof(WordGraph *) * wg->rightwordmaxnum);
00198 }
00199 wg->rightword[wg->rightwordnum] = right;
00200 wg->rightwordnum++;
00201 #ifdef GDEBUG
00202 printf("addright: \"%s\"[%d..%d] added as %dth right of \"%s\"[%d..%d]\n", wchmm->winfo->woutput[right->wid], right->lefttime, right->righttime, wg->rightwordnum, wchmm->winfo->woutput[wg->wid], wg->lefttime, wg->righttime);
00203 #endif
00204 }
00205
00229 boolean
00230 wordgraph_check_and_add_leftword(WordGraph *wg, WordGraph *left)
00231 {
00232 int i;
00233
00234 if (wg == NULL) return FALSE;
00235 if (left == NULL) return FALSE;
00236 for(i=0;i<wg->leftwordnum;i++) {
00237 if (wg->leftword[i] == left) {
00238 break;
00239 }
00240 }
00241 if (i >= wg->leftwordnum) {
00242 wordgraph_add_leftword(wg, left);
00243 return TRUE;
00244 }
00245 return FALSE;
00246 }
00247
00271 boolean
00272 wordgraph_check_and_add_rightword(WordGraph *wg, WordGraph *right)
00273 {
00274 int i;
00275
00276 if (wg == NULL) return FALSE;
00277 if (right == NULL) return FALSE;
00278 for(i=0;i<wg->rightwordnum;i++) {
00279 if (wg->rightword[i] == right) {
00280 break;
00281 }
00282 }
00283 if (i >= wg->rightwordnum) {
00284 wordgraph_add_rightword(wg, right);
00285 return TRUE;
00286 }
00287 return FALSE;
00288 }
00289
00310 static boolean
00311 merge_contexts(WordGraph *dst, WordGraph *src)
00312 {
00313 int s, d;
00314 WordGraph *adding;
00315 boolean ret;
00316
00317 #ifdef GDEBUG
00318 printf("merge_contexts: merging context of \"%s\"[%d..%d] to \"%s\"[%d..%d]...\n",
00319 wchmm->winfo->woutput[src->wid], src->lefttime, src->righttime,
00320 wchmm->winfo->woutput[dst->wid], dst->lefttime, dst->righttime);
00321 #endif
00322
00323 ret = FALSE;
00324
00325
00326 for(s=0;s<src->leftwordnum;s++) {
00327 adding = src->leftword[s];
00328
00329 if (adding == dst) {
00330 #ifdef GDEBUG
00331 printf("merge_contexts: skipping direct link (dst) -> (src)\n");
00332 #endif
00333 continue;
00334 }
00335 for(d=0;d<dst->leftwordnum;d++) {
00336 if (dst->leftword[d] == adding) {
00337 break;
00338 }
00339 }
00340 if (d >= dst->leftwordnum) {
00341 wordgraph_add_leftword(dst, adding);
00342 #ifdef GDEBUG
00343 printf("merge_contexts: added \"%s\"[%d..%d] as a new left context\n",
00344 wchmm->winfo->woutput[adding->wid], adding->lefttime, adding->righttime);
00345 #endif
00346 ret = TRUE;
00347 }
00348 #ifdef GDEBUG
00349 else {
00350 printf("merge_contexts: \"%s\"[%d..%d] already exist\n",
00351 wchmm->winfo->woutput[adding->wid], adding->lefttime, adding->righttime);
00352 }
00353 #endif
00354 }
00355
00356
00357 for(s=0;s<src->rightwordnum;s++) {
00358 adding = src->rightword[s];
00359
00360 if (adding == dst) {
00361 #ifdef GDEBUG
00362 printf("merge_contexts: skipping direct link (src) -> (dst)\n");
00363 #endif
00364 continue;
00365 }
00366 for(d=0;d<dst->rightwordnum;d++) {
00367 if (dst->rightword[d] == adding) {
00368 break;
00369 }
00370 }
00371 if (d >= dst->rightwordnum) {
00372 wordgraph_add_rightword(dst, adding);
00373 #ifdef GDEBUG
00374 printf("merge_contexts: added \"%s\"[%d..%d] as a new right context\n",
00375 wchmm->winfo->woutput[adding->wid], adding->lefttime, adding->righttime);
00376 #endif
00377 ret = TRUE;
00378 }
00379 #ifdef GDEBUG
00380 else {
00381 printf("merge_contexts: \"%s\"[%d..%d] already exist\n",
00382 wchmm->winfo->woutput[adding->wid], adding->lefttime, adding->righttime);
00383 }
00384 #endif
00385 }
00386
00387 return(ret);
00388 }
00389
00406 static void
00407 swap_leftword(WordGraph *wg, WordGraph *from, WordGraph *to)
00408 {
00409 int i;
00410
00411 #ifdef GDEBUG
00412 printf("swapleft: replacing left of \"%s\"[%d..%d] from \"%s\"[%d..%d] to \"%s\"[%d..%d]...\n",
00413 wchmm->winfo->woutput[wg->wid], wg->lefttime, wg->righttime,
00414 wchmm->winfo->woutput[from->wid], from->lefttime, from->righttime,
00415 wchmm->winfo->woutput[to->wid], to->lefttime, to->righttime);
00416 #endif
00417
00418 for(i=0;i<wg->leftwordnum;i++) {
00419 if (wg->leftword[i] == from) {
00420 wg->leftword[i] = to;
00421 }
00422 }
00423 }
00424
00441 static void
00442 swap_rightword(WordGraph *wg, WordGraph *from, WordGraph *to)
00443 {
00444 int i;
00445
00446 #ifdef GDEBUG
00447 printf("swapright: replacing right of \"%s\"[%d..%d] from \"%s\"[%d..%d] to \"%s\"[%d..%d]...\n",
00448 wchmm->winfo->woutput[wg->wid], wg->lefttime, wg->righttime,
00449 wchmm->winfo->woutput[from->wid], from->lefttime, from->righttime,
00450 wchmm->winfo->woutput[to->wid], to->lefttime, to->righttime);
00451 #endif
00452
00453 for(i=0;i<wg->rightwordnum;i++) {
00454 if (wg->rightword[i] == from) {
00455 wg->rightword[i] = to;
00456 }
00457 }
00458 }
00459
00472 static void
00473 uniq_leftword(WordGraph *wg)
00474 {
00475 int i, j, dst;
00476 boolean ok;
00477
00478 dst = 0;
00479 for(i=0;i<wg->leftwordnum;i++) {
00480 ok = TRUE;
00481 for(j=0;j<dst;j++) {
00482 if (wg->leftword[i] == wg->leftword[j]) {
00483 ok = FALSE;
00484 break;
00485 }
00486 }
00487 if (ok == TRUE) {
00488 wg->leftword[dst] = wg->leftword[i];
00489 dst++;
00490 }
00491 }
00492 wg->leftwordnum = dst;
00493 }
00494
00507 static void
00508 uniq_rightword(WordGraph *wg)
00509 {
00510 int i, j, dst;
00511 boolean ok;
00512
00513 dst = 0;
00514 for(i=0;i<wg->rightwordnum;i++) {
00515 ok = TRUE;
00516 for(j=0;j<dst;j++) {
00517 if (wg->rightword[i] == wg->rightword[j]) {
00518 ok = FALSE;
00519 break;
00520 }
00521 }
00522 if (ok == TRUE) {
00523 wg->rightword[dst] = wg->rightword[i];
00524 dst++;
00525 }
00526 }
00527 wg->rightwordnum = dst;
00528 }
00529
00542 static void
00543 wordgraph_remove_context(WordGraph *wg)
00544 {
00545 WordGraph *w;
00546 int i,j,k;
00547
00548 if (wg == NULL) return;
00549
00550 for(i=0;i<wg->leftwordnum;i++) {
00551 w = wg->leftword[i];
00552 k=0;
00553 for(j=0;j<w->rightwordnum;j++) {
00554 if (w->rightword[j] != wg) {
00555 if (j != k) {
00556 w->rightword[k] = w->rightword[j];
00557 }
00558 k++;
00559 }
00560 }
00561 w->rightwordnum = k;
00562 }
00563 for(i=0;i<wg->rightwordnum;i++) {
00564 w = wg->rightword[i];
00565 k=0;
00566 for(j=0;j<w->leftwordnum;j++) {
00567 if (w->leftword[j] != wg) {
00568 if (j != k) {
00569 w->leftword[k] = w->leftword[j];
00570 }
00571 k++;
00572 }
00573 }
00574 w->leftwordnum = k;
00575 }
00576 }
00577
00590 static void
00591 wordgraph_link_context(WordGraph *wg)
00592 {
00593 WordGraph *w;
00594 int i,j;
00595
00596 if (wg == NULL) return;
00597 for(i=0;i<wg->leftwordnum;i++) {
00598 for(j=0;j<wg->rightwordnum;j++) {
00599 wordgraph_check_and_add_leftword(wg->rightword[j], wg->leftword[i]);
00600 wordgraph_check_and_add_rightword(wg->leftword[i], wg->rightword[j]);
00601 }
00602 }
00603 }
00604
00605
00606
00607
00608
00625 static int
00626 wordgraph_exec_erase(WordGraph **rootp)
00627 {
00628 WordGraph *wg, *we, *wtmp;
00629 int count;
00630
00631 if (*rootp == NULL) return(0);
00632
00633 wg = *rootp;
00634 count = 0;
00635 while (wg != NULL) {
00636 we = wg->next;
00637 while(we != NULL && we->mark == TRUE) {
00638 wtmp = we->next;
00639 wordgraph_free(we); count++;
00640 we = wtmp;
00641 }
00642 wg->next = we;
00643 wg = we;
00644 }
00645 if ((*rootp)->mark == TRUE) {
00646 wtmp = (*rootp)->next;
00647 wordgraph_free(*rootp); count++;
00648 *rootp = wtmp;
00649 }
00650
00651 return(count);
00652 }
00653
00672 static int
00673 compare_lefttime(WordGraph **x, WordGraph **y)
00674 {
00675 if ((*x)->lefttime > (*y)->lefttime) return 1;
00676 else if ((*x)->lefttime < (*y)->lefttime) return -1;
00677 else {
00678 if ((*x)->righttime > (*y)->righttime) return 1;
00679 else if ((*x)->righttime < (*y)->righttime) return -1;
00680 else {
00681 if ((*x)->fscore_head < (*y)->fscore_head) return 1;
00682 else if ((*x)->fscore_head > (*y)->fscore_head) return -1;
00683 else return 0;
00684 }
00685 }
00686 }
00687
00700 void
00701 wordgraph_sort_and_annotate_id(WordGraph **rootp)
00702 {
00703 WordGraph *wg;
00704 int cnt;
00705 WordGraph **wlist;
00706 int i;
00707 WordGraph *wo;
00708
00709
00710 cnt = 0;
00711 for(wg=*rootp;wg;wg=wg->next) cnt++;
00712 graph_totalwordnum = cnt;
00713 if (graph_totalwordnum == 0) return;
00714
00715 wlist = (WordGraph **)mymalloc(sizeof(WordGraph *) * graph_totalwordnum);
00716 i = 0;
00717 for(wg=*rootp;wg;wg=wg->next) {
00718 wlist[i++] = wg;
00719 }
00720 qsort(wlist, graph_totalwordnum, sizeof(WordGraph *), (int (*)(const void *, const void *))compare_lefttime);
00721
00722
00723 wo = NULL;
00724 for(i=graph_totalwordnum-1;i>=0;i--) {
00725 wg = wlist[i];
00726 wg->id = i;
00727 wg->next = wo;
00728 wo = wg;
00729 }
00730 *rootp = wo;
00731 free(wlist);
00732 }
00733
00746 void
00747 wordgraph_clean(WordGraph **rootp)
00748 {
00749 WordGraph *wg, *wtmp;
00750
00751 wg = *rootp;
00752 while(wg != NULL) {
00753 wtmp = wg->next;
00754 wordgraph_free(wg);
00755 wg = wtmp;
00756 }
00757 *rootp = NULL;
00758
00759 }
00760
00761
00762
00763
00764
00783 void
00784 wordgraph_purge_leaf_nodes(WordGraph **rootp)
00785 {
00786 WordGraph *wg;
00787 boolean changed;
00788 int i, count, erased;
00789
00790
00791 count = 0;
00792 for(wg=*rootp;wg;wg=wg->next) count++;
00793 j_printf("- %d initial word arcs generated\n", count);
00794
00795 j_printf("Step 1: Purge leaf words\n");
00796
00797 do {
00798 changed = FALSE;
00799 for(wg=*rootp;wg;wg=wg->next) {
00800
00801 if (wg->mark == TRUE || wg->lefttime == 0) continue;
00802 for(i=0;i<wg->leftwordnum;i++) {
00803 if (wg->leftword[i]->mark == FALSE) break;
00804 }
00805 if (i >= wg->leftwordnum) {
00806 wg->mark = TRUE;
00807 changed = TRUE;
00808 }
00809 }
00810 } while (changed == TRUE);
00811
00812
00813 {
00814 int dst;
00815 for(wg=*rootp;wg;wg=wg->next) {
00816 dst = 0;
00817 for(i=0;i<wg->leftwordnum;i++) {
00818 if (wg->leftword[i]->mark == FALSE) {
00819 if (dst != i) wg->leftword[dst] = wg->leftword[i];
00820 dst++;
00821 }
00822 }
00823 wg->leftwordnum = dst;
00824 }
00825 }
00826
00827
00828 erased = wordgraph_exec_erase(rootp);
00829 j_printf("- %d words purged, %d words left in lattice\n", erased, count - erased);
00830 }
00831
00856 void
00857 wordgraph_adjust_boundary(WordGraph **rootp)
00858 {
00859 #ifdef GRAPHOUT_PRECISE_BOUNDARY
00860 WordGraph *wg, *left, *new;
00861 int i, j;
00862 int new_lefttime;
00863 int *framelist;
00864 LOGPROB *framescorelist;
00865 int maxfnum;
00866 int fnum;
00867 int mov_num, dup_num, del_num;
00868 int count;
00869 boolean changed;
00870
00871 j_printf("Step 2: Fit word boundaries\n");
00872 mov_num = dup_num = del_num = 0;
00873
00874
00875 count = 0;
00876 maxfnum = 0;
00877 for(wg=*rootp;wg;wg=wg->next) {
00878 count++;
00879 if (maxfnum < wg->leftwordnum) maxfnum = wg->leftwordnum;
00880 }
00881
00882 framelist = (int *)mymalloc(sizeof(int) * maxfnum);
00883 framescorelist = (LOGPROB *)mymalloc(sizeof(LOGPROB) * maxfnum);
00884
00885 do {
00886
00887 changed = FALSE;
00888 for(wg=*rootp;wg;wg=wg->next) {
00889 if (wg->mark) continue;
00890 if (wg->leftwordnum == 0) continue;
00891
00892 fnum = 0;
00893 for(i=0;i<wg->leftwordnum;i++) {
00894 left = wg->leftword[i];
00895 if (left->mark) continue;
00896 for(j=0;j<fnum;j++) {
00897 if (framelist[j] == left->righttime + 1) break;
00898 }
00899 if (j >= fnum) {
00900 framelist[fnum] = left->righttime + 1;
00901 framescorelist[fnum] = left->gscore_tail
00902 #ifdef USE_NGRAM
00903
00904
00905 - left->lscore
00906 #endif
00907 ;
00908 fnum++;
00909 }
00910 }
00911 if (fnum == 0) continue;
00912
00913 if (fnum == 1) {
00914 if (wg->lefttime != framelist[0]) {
00915
00916
00917 wg->lefttime = framelist[0];
00918 wg->gscore_head = framescorelist[0];
00919
00920
00921
00922
00923
00924 if (wg->lefttime > wg->righttime) {
00925 wordgraph_link_context(wg);
00926 wordgraph_remove_context(wg);
00927 wg->mark = TRUE;
00928 del_num++;
00929 } else {
00930 mov_num++;
00931 }
00932 changed = TRUE;
00933 } else if (wg->gscore_head != framescorelist[0]) {
00934
00935 wg->gscore_head = framescorelist[0];
00936 changed = TRUE;
00937 }
00938 }
00939 if (fnum > 1) {
00940
00941 for(j=0;j<fnum;j++) {
00942
00943 dup_num++;
00944
00945
00946
00947
00948
00949 if (framelist[j] > wg->righttime) {
00950 del_num++;
00951 continue;
00952 }
00953
00954
00955 new = wordgraph_new(wg->wid, framelist[j], wg->righttime, wg->fscore_head, wg->fscore_tail, framescorelist[j], wg->gscore_tail
00956 #ifdef USE_NGRAM
00957 , wg->lscore
00958 #else
00959 , LOG_ZERO
00960 #endif
00961 #ifdef CM_SEARCH
00962 , wg->cmscore
00963 #else
00964 , LOG_ZERO
00965 #endif
00966 );
00967
00968 for(i=0;i<wg->leftwordnum;i++) {
00969 if ((wg->leftword[i])->mark) continue;
00970 if ((wg->leftword[i])->righttime + 1 == framelist[j]) {
00971 wordgraph_add_leftword(new, wg->leftword[i]);
00972 wordgraph_add_rightword(wg->leftword[i], new);
00973 }
00974 }
00975 for(i=0;i<wg->rightwordnum;i++) {
00976 if ((wg->rightword[i])->mark) continue;
00977 wordgraph_add_rightword(new, wg->rightword[i]);
00978 wordgraph_add_leftword(wg->rightword[i], new);
00979 }
00980 new->saved = TRUE;
00981 new->next = *rootp;
00982 *rootp = new;
00983 }
00984
00985 wordgraph_remove_context(wg);
00986
00987 wg->mark = TRUE;
00988 dup_num--;
00989 changed = TRUE;
00990 }
00991 }
00992 } while (changed == TRUE);
00993
00994 wordgraph_exec_erase(rootp);
00995 free(framelist);
00996 free(framescorelist);
00997 j_printf("- %d moved, %d duplicated, %d purged (bad align), %d words left in lattice\n", mov_num, dup_num, del_num, count + dup_num - del_num);
00998
00999 #else
01000
01001 j_printf("# Step 2: SKIP (adjusting re-aligned word boundaries)\n");
01002
01003 #endif
01004
01005 }
01006
01007
01025 void
01026 wordgraph_compaction_thesame(WordGraph **rootp)
01027 {
01028 WordGraph *wg, *we;
01029 int i, count, erased;
01030 WordGraph *wtmp;
01031
01032 j_printf("Step 3: Merge same words with exactly same time and score\n");
01033
01034 count = 0;
01035 for(wg=*rootp;wg;wg=wg->next) {
01036 count++;
01037 if (wg->mark == TRUE) continue;
01038 for(we=wg->next;we;we=we->next) {
01039 if (we->mark == TRUE) continue;
01040
01041 if (wg->wid == we->wid &&
01042 wg->lefttime == we->lefttime &&
01043 wg->righttime == we->righttime &&
01044 wg->fscore_head == we->fscore_head &&
01045 wg->fscore_tail == we->fscore_tail) {
01046
01047 merge_contexts(wg, we);
01048
01049 for(i=0;i<we->leftwordnum;i++) {
01050 swap_rightword(we->leftword[i], we, wg);
01051 }
01052 for(i=0;i<we->rightwordnum;i++) {
01053 swap_leftword(we->rightword[i], we, wg);
01054 }
01055 we->mark = TRUE;
01056 }
01057 }
01058 }
01059 erased = wordgraph_exec_erase(rootp);
01060 j_printf("- %d words merged, %d words left in lattice\n", erased, count - erased);
01061 for(wg=*rootp;wg;wg=wg->next) {
01062 uniq_leftword(wg);
01063 uniq_rightword(wg);
01064 }
01065 }
01066
01087 void
01088 wordgraph_compaction_exacttime(WordGraph **rootp)
01089 {
01090 WordGraph *wg, *we;
01091 int i, count, erased;
01092 WordGraph *wtmp;
01093
01094 j_printf("Step 4: Merge same words at same time, keep max score\n");
01095
01096 count = 0;
01097 for(wg=*rootp;wg;wg=wg->next) {
01098 count++;
01099 if (wg->mark == TRUE) continue;
01100 for(we=wg->next;we;we=we->next) {
01101 if (we->mark == TRUE) continue;
01102
01103 if (wg->wid == we->wid &&
01104 wg->lefttime == we->lefttime &&
01105 wg->righttime == we->righttime) {
01106
01107 merge_contexts(wg, we);
01108
01109 for(i=0;i<we->leftwordnum;i++) {
01110 swap_rightword(we->leftword[i], we, wg);
01111 }
01112 for(i=0;i<we->rightwordnum;i++) {
01113 swap_leftword(we->rightword[i], we, wg);
01114 }
01115
01116 if (wg->fscore_head < we->fscore_head) {
01117 wg->fscore_head = we->fscore_head;
01118 wg->fscore_tail = we->fscore_tail;
01119 wg->gscore_head = we->gscore_head;
01120 wg->gscore_tail = we->gscore_tail;
01121 #ifdef USE_NGRAM
01122 wg->lscore = we->lscore;
01123 #endif
01124 #ifdef CM_SEARCH
01125 wg->cmscore = we->cmscore;
01126 #endif
01127 }
01128 we->mark = TRUE;
01129 }
01130 }
01131 }
01132 erased = wordgraph_exec_erase(rootp);
01133 j_printf("- %d words merged, %d words left in lattice\n", erased, count-erased);
01134
01135 for(wg=*rootp;wg;wg=wg->next) {
01136 uniq_leftword(wg);
01137 uniq_rightword(wg);
01138 }
01139 }
01140
01160 void
01161 wordgraph_compaction_neighbor(WordGraph **rootp)
01162 {
01163 WordGraph *wg, *we;
01164 int i, count, erased;
01165 WordGraph *wtmp;
01166
01167 if (graph_merge_neighbor_range == 0) {
01168 j_printf("# Step 5: SKIP (merge same words at near place)\n");
01169 return;
01170 }
01171
01172 j_printf("Step 5: Merge same words at near place (allow %d frame margin)\n", graph_merge_neighbor_range);
01173
01174 count = 0;
01175 for(wg=*rootp;wg;wg=wg->next) {
01176 count++;
01177 if (wg->mark == TRUE) continue;
01178 for(we=wg->next;we;we=we->next) {
01179 if (we->mark == TRUE) continue;
01180 if (wg->wid == we->wid &&
01181 abs(wg->lefttime - we->lefttime) <= graph_merge_neighbor_range &&
01182 abs(wg->righttime - we->righttime) <= graph_merge_neighbor_range) {
01183
01184 merge_contexts(wg, we);
01185
01186 for(i=0;i<we->leftwordnum;i++) {
01187 swap_rightword(we->leftword[i], we, wg);
01188 }
01189 for(i=0;i<we->rightwordnum;i++) {
01190 swap_leftword(we->rightword[i], we, wg);
01191 }
01192
01193 if (wg->fscore_head < we->fscore_head) {
01194 wg->fscore_head = we->fscore_head;
01195 wg->fscore_tail = we->fscore_tail;
01196 wg->gscore_head = we->gscore_head;
01197 wg->gscore_tail = we->gscore_tail;
01198 #ifdef USE_NGRAM
01199 wg->lscore = we->lscore;
01200 #endif
01201 #ifdef CM_SEARCH
01202 wg->cmscore = we->cmscore;
01203 #endif
01204 }
01205 we->mark = TRUE;
01206 }
01207 }
01208 }
01209 erased = wordgraph_exec_erase(rootp);
01210 j_printf("- %d words merged, %d words left in lattice\n", erased, count-erased);
01211
01212 for(wg=*rootp;wg;wg=wg->next) {
01213 uniq_leftword(wg);
01214 uniq_rightword(wg);
01215 }
01216
01217 }
01218
01219
01220
01221
01222
01253 WordGraph *
01254 wordgraph_assign(WORD_ID wid, int leftframe, int rightframe, LOGPROB fscore_head, LOGPROB fscore_tail, LOGPROB gscore_head, LOGPROB gscore_tail, LOGPROB lscore, LOGPROB cm)
01255 {
01256
01257 WordGraph *newarc;
01258 newarc = wordgraph_new(wid, leftframe, rightframe, fscore_head, fscore_tail, gscore_head, gscore_tail, lscore, cm);
01259
01260 return newarc;
01261 }
01262
01281 void
01282 wordgraph_save(WordGraph *wg, WordGraph *right, WordGraph **root)
01283 {
01284 if (wg != NULL) {
01285 wg->next = *root;
01286 *root = wg;
01287 wg->saved = TRUE;
01288 wordgraph_add_leftword(right, wg);
01289 wordgraph_add_rightword(wg, right);
01290 }
01291 }
01292
01293 #ifdef GRAPHOUT_DYNAMIC
01294
01338 WordGraph *
01339 wordgraph_check_merge(WordGraph *now, WordGraph **root, WORD_ID next_wid, boolean *merged_p)
01340 {
01341 WordGraph *wg;
01342 int i;
01343 #ifdef GDEBUG
01344 WordGraph *w;
01345 #endif
01346
01347 #ifdef GRAPHOUT_SEARCH
01348 *merged_p = FALSE;
01349 #endif
01350
01351 if (now == NULL) return(NULL);
01352
01353 #ifdef GDEBUG
01354 printf("check_merge: checking \"%s\"[%d..%d]\n", wchmm->winfo->woutput[now->wid], now->lefttime, now->righttime);
01355 for(i=0;i<now->leftwordnum;i++) {
01356 w = now->leftword[i];
01357 printf("\t left%d: \"%15s\"[%d..%d]\n", i, wchmm->winfo->woutput[w->wid], w->lefttime, w->righttime);
01358 }
01359 for(i=0;i<now->rightwordnum;i++) {
01360 w = now->rightword[i];
01361 printf("\tright%d: \"%15s\"[%d..%d]\n", i, wchmm->winfo->woutput[w->wid], w->lefttime, w->righttime);
01362 }
01363 #endif
01364
01365 for(wg=*root;wg;wg=wg->next) {
01366 if (wg == now) continue;
01367 #ifdef GRAPHOUT_DYNAMIC
01368
01369 if (wg->purged) continue;
01370 #endif
01371 if (wg->wid == now->wid
01372 && wg->lefttime == now->lefttime
01373 && wg->righttime == now->righttime) {
01374
01375 #ifdef GDEBUG
01376 printf("check_merge: same word found: \"%s\"[%d..%d]\n", wchmm->winfo->woutput[wg->wid], wg->lefttime, wg->righttime);
01377 for(i=0;i<wg->leftwordnum;i++) {
01378 w = wg->leftword[i];
01379 printf("\t left%d: \"%15s\"[%d..%d]\n", i, wchmm->winfo->woutput[w->wid], w->lefttime, w->righttime);
01380 }
01381 for(i=0;i<wg->rightwordnum;i++) {
01382 w = wg->rightword[i];
01383 printf("\tright%d: \"%15s\"[%d..%d]\n", i, wchmm->winfo->woutput[w->wid], w->lefttime, w->righttime);
01384 }
01385 #endif
01386
01387 merge_contexts(wg, now);
01388
01389 for(i=0;i<now->leftwordnum;i++) {
01390 swap_rightword(now->leftword[i], now, wg);
01391 uniq_rightword(now->leftword[i]);
01392 }
01393 for(i=0;i<now->rightwordnum;i++) {
01394 swap_leftword(now->rightword[i], now, wg);
01395 uniq_leftword(now->rightword[i]);
01396 }
01397 #ifdef GRAPHOUT_SEARCH
01398
01399
01400
01401
01402
01403
01404
01405 for(i=0;i<wg->leftwordnum;i++) {
01406 if (wg->leftword[i]->wid == next_wid) break;
01407 }
01408 if (i < wg->leftwordnum) {
01409 *merged_p = TRUE;
01410 }
01411 #endif
01412
01413
01414 now->purged = TRUE;
01415
01416
01417 return wg;
01418 }
01419 }
01420
01421 return NULL;
01422 }
01423 #endif
01424
01425
01426
01427
01428
01471 void
01472 put_wordgraph(WordGraph *wg)
01473 {
01474 int i;
01475 if (wg == NULL) {
01476 j_printf("(NULL)\n");
01477 } else {
01478 j_printf("%d:", wg->id);
01479 for(i=0;i<wg->leftwordnum;i++) {
01480 j_printf((i == 0) ? " left=%d" : ",%d", wg->leftword[i]->id);
01481 }
01482 for(i=0;i<wg->rightwordnum;i++) {
01483 j_printf((i == 0) ? " right=%d" : ",%d", wg->rightword[i]->id);
01484 }
01485 j_printf(" [%d..%d]", wg->lefttime, wg->righttime);
01486 j_printf(" wid=%d name=\"%s\" lname=\"%s\" f=%f f_prev=%f g_head=%f g_prev=%f", wg->wid, wchmm->winfo->woutput[wg->wid], wchmm->winfo->wname[wg->wid], wg->fscore_head, wg->fscore_tail, wg->gscore_head, wg->gscore_tail);
01487 #ifdef USE_NGRAM
01488 j_printf(" lscore=%f", wg->lscore);
01489 if (wg->righttime - wg->lefttime + 1 != 0) {
01490 j_printf(" AMavg=%f", (wg->gscore_head - wg->gscore_tail - wg->lscore) / (float)(wg->righttime - wg->lefttime + 1));
01491 }
01492 #else
01493 if (wg->righttime - wg->lefttime + 1 != 0) {
01494 j_printf(" AMavg=%f", (wg->gscore_head - wg->gscore_tail) / (float)(wg->righttime - wg->lefttime + 1));
01495 }
01496 #endif
01497 #ifdef CM_SEARCH
01498 j_printf(" cmscore=%f", wg->cmscore);
01499 #endif
01500 j_printf("\n");
01501 }
01502 }
01503
01516 void
01517 wordgraph_dump(WordGraph *root)
01518 {
01519 WordGraph *wg;
01520 j_printf("--- begin wordgraph data ---\n");
01521 for(wg=root;wg;wg=wg->next) {
01522 put_wordgraph(wg);
01523 }
01524 j_printf("--- end wordgraph data ---\n");
01525 }
01526
01539 void
01540 wordgraph_check_coherence(WordGraph *rootp)
01541 {
01542 WordGraph *wg, *wl, *wr;
01543 int l,r;
01544
01545 for(wg=rootp;wg;wg=wg->next) {
01546
01547 if (wg->id < 0 || wg->id >= graph_totalwordnum) {
01548 j_printf("ERROR: invalid id\n");
01549 put_wordgraph(wg);
01550 continue;
01551 }
01552
01553 for(l=0;l<wg->leftwordnum;l++){
01554 wl = wg->leftword[l];
01555 if (wl->id < 0 || wl->id >= graph_totalwordnum) {
01556 j_printf("ERROR: invalid id in left context\n");
01557 put_wordgraph(wg);
01558 continue;
01559 }
01560 for(r=0;r<wl->rightwordnum;r++){
01561 if (wl->rightword[r] == wg) break;
01562 }
01563 if (r >= wl->rightwordnum) {
01564 j_printf("ERROR: reverse link not found in left context\n");
01565 put_wordgraph(wg);
01566 put_wordgraph(wl);
01567 continue;
01568 }
01569 }
01570 for(r=0;r<wg->rightwordnum;r++){
01571 wr = wg->rightword[r];
01572 if (wr->id < 0 || wr->id >= graph_totalwordnum) {
01573 j_printf("ERROR: invalid id in right context\n");
01574 put_wordgraph(wg);
01575 continue;
01576 }
01577 for(l=0;l<wr->leftwordnum;l++){
01578 if (wr->leftword[l] == wg) break;
01579 }
01580 if (l >= wr->leftwordnum) {
01581 j_printf("ERROR: reverse link not found in right context\n");
01582 put_wordgraph(wg);
01583 put_wordgraph(wr);
01584 continue;
01585 }
01586 }
01587 }
01588 }
01589
01590 #endif