julius/graphout.c

説明を見る。
00001 
00017 /*
00018  * Copyright (c) 1991-2006 Kawahara Lab., Kyoto University
00019  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00020  * Copyright (c) 2005-2006 Julius project team, Nagoya Institute of Technology
00021  * All rights reserved
00022  */
00023 
00024 #include <julius.h>
00025 
00026 #ifdef GRAPHOUT
00027 
00029 #undef GDEBUG
00030 
00032 #undef GDEBUG2
00033 
00034 static int *framelist;          
00035 static LOGPROB *framescorelist; 
00036 
00037 
00038 /**************************************************************/
00039 /* allocation and free of a WordGraph instance */
00040 
00073 static WordGraph *
00074 wordgraph_new(WORD_ID wid, HMM_Logical *headphone, HMM_Logical *tailphone, int leftframe, int rightframe, LOGPROB fscore_head, LOGPROB fscore_tail, LOGPROB gscore_head, LOGPROB gscore_tail, LOGPROB lscore, LOGPROB cm)
00075 {
00076   WordGraph *new;
00077 
00078   new = (WordGraph *)mymalloc(sizeof(WordGraph));
00079   new->wid = wid;
00080   new->lefttime = leftframe;
00081   new->righttime = rightframe;
00082   new->fscore_head = fscore_head;
00083   new->fscore_tail = fscore_tail;
00084   new->gscore_head = gscore_head;
00085   new->gscore_tail = gscore_tail;
00086 #ifdef USE_NGRAM
00087   new->lscore = lscore;
00088 #endif
00089 #ifdef CM_SEARCH
00090   new->cmscore = cm;
00091 #endif
00092   new->headphone = headphone;
00093   new->tailphone = tailphone;
00094   new->leftwordmaxnum = FANOUTSTEP;
00095   new->leftword = (WordGraph **)mymalloc(sizeof(WordGraph *) * new->leftwordmaxnum);
00096   new->leftwordnum = 0;
00097   new->rightwordmaxnum = FANOUTSTEP;
00098   new->rightword = (WordGraph **)mymalloc(sizeof(WordGraph *) * new->rightwordmaxnum);
00099   new->rightwordnum = 0;
00100 
00101   new->mark = FALSE;
00102 #ifdef GRAPHOUT_DYNAMIC
00103   new->purged = FALSE;
00104 #endif
00105   new->next = NULL;
00106   new->saved = FALSE;
00107 
00108 #ifdef GDEBUG
00109  {
00110    int i;
00111    WordGraph *w;
00112    printf("NEW: \"%s\"[%d..%d]\n", wchmm->winfo->woutput[new->wid], new->lefttime, new->righttime);
00113    for(i=0;i<new->leftwordnum;i++) {
00114      w = new->leftword[i];
00115      printf("\t left%d:  \"%15s\"[%d..%d]\n", i, wchmm->winfo->woutput[w->wid], w->lefttime, w->righttime);
00116    }
00117    for(i=0;i<new->rightwordnum;i++) {
00118      w = new->rightword[i];
00119      printf("\tright%d:  \"%15s\"[%d..%d]\n", i, wchmm->winfo->woutput[w->wid], w->lefttime, w->righttime);
00120    }
00121    printf("\headphone: %s\n", new->headphone->name);
00122    printf("\tailphone: %s\n", new->tailphone->name);
00123  }
00124 #endif
00125 
00126   return(new);
00127 }
00128 
00141 void
00142 wordgraph_free(WordGraph *wg)
00143 {
00144   free(wg->rightword);
00145   free(wg->leftword);
00146   free(wg);
00147 }
00148 
00149 
00150 /**************************************************************/
00151 /* Handling contexts */
00152 
00167 static void
00168 wordgraph_add_leftword(WordGraph *wg, WordGraph *left)
00169 {
00170   if (wg == NULL) return;
00171   if (left == NULL) return;
00172   if (wg->leftwordnum >= wg->leftwordmaxnum) {
00173     /* expand */
00174     wg->leftwordmaxnum += FANOUTSTEP;
00175     wg->leftword = (WordGraph **)myrealloc(wg->leftword, sizeof(WordGraph *) * wg->leftwordmaxnum);
00176   }
00177   wg->leftword[wg->leftwordnum] = left;
00178   wg->leftwordnum++;
00179 #ifdef GDEBUG
00180   printf("addleft: \"%s\"[%d..%d] added as %dth left of \"%s\"[%d..%d]\n", wchmm->winfo->woutput[left->wid], left->lefttime, left->righttime, wg->leftwordnum, wchmm->winfo->woutput[wg->wid], wg->lefttime, wg->righttime);
00181 #endif
00182 }
00183 
00200 static void
00201 wordgraph_add_rightword(WordGraph *wg, WordGraph *right)
00202 {
00203   if (wg == NULL) return;
00204   if (right == NULL) return;
00205   if (wg->rightwordnum >= wg->rightwordmaxnum) {
00206     /* expand */
00207     wg->rightwordmaxnum += FANOUTSTEP;
00208     wg->rightword = (WordGraph **)myrealloc(wg->rightword, sizeof(WordGraph *) * wg->rightwordmaxnum);
00209   }
00210   wg->rightword[wg->rightwordnum] = right;
00211   wg->rightwordnum++;
00212 #ifdef GDEBUG
00213   printf("addright: \"%s\"[%d..%d] added as %dth right of \"%s\"[%d..%d]\n", wchmm->winfo->woutput[right->wid], right->lefttime, right->righttime, wg->rightwordnum, wchmm->winfo->woutput[wg->wid], wg->lefttime, wg->righttime);
00214 #endif
00215 }
00216 
00240 boolean
00241 wordgraph_check_and_add_leftword(WordGraph *wg, WordGraph *left)
00242 {
00243   int i;
00244 
00245   if (wg == NULL) return FALSE;
00246   if (left == NULL) return FALSE;
00247   for(i=0;i<wg->leftwordnum;i++) {
00248     if (wg->leftword[i] == left) {
00249       break;
00250     }
00251   }
00252   if (i >= wg->leftwordnum) { /* no leftword matched */
00253     wordgraph_add_leftword(wg, left);
00254     return TRUE;
00255   }
00256   return FALSE;
00257 }
00258 
00282 boolean
00283 wordgraph_check_and_add_rightword(WordGraph *wg, WordGraph *right)
00284 {
00285   int i;
00286 
00287   if (wg == NULL) return FALSE;
00288   if (right == NULL) return FALSE;
00289   for(i=0;i<wg->rightwordnum;i++) {
00290     if (wg->rightword[i] == right) {
00291       break;
00292     }
00293   }
00294   if (i >= wg->rightwordnum) { /* no rightword matched */
00295     wordgraph_add_rightword(wg, right);
00296     return TRUE;
00297   }
00298   return FALSE;
00299 }
00300 
00321 static boolean
00322 merge_contexts(WordGraph *dst, WordGraph *src)
00323 {
00324   int s, d;
00325   WordGraph *adding;
00326   boolean ret;
00327 
00328 #ifdef GDEBUG
00329   printf("merge_contexts: merging context of \"%s\"[%d..%d] to \"%s\"[%d..%d]...\n",
00330          wchmm->winfo->woutput[src->wid], src->lefttime, src->righttime,
00331          wchmm->winfo->woutput[dst->wid], dst->lefttime, dst->righttime);
00332 #endif
00333 
00334   ret = FALSE;
00335   
00336   /* left context */
00337   for(s=0;s<src->leftwordnum;s++) {
00338     adding = src->leftword[s];
00339     if (adding->mark) continue;
00340     /* direct link between dst and src will disapper to avoid unneccesary loop */
00341     if (adding == dst) {
00342 #ifdef GDEBUG
00343       printf("merge_contexts: skipping direct link (dst) -> (src)\n");
00344 #endif
00345       continue;
00346     }
00347     for(d=0;d<dst->leftwordnum;d++) {
00348       if (dst->leftword[d]->mark) continue;
00349       if (dst->leftword[d] == adding) {
00350         break;
00351       }
00352     }
00353     if (d >= dst->leftwordnum) { /* no leftword matched */
00354       wordgraph_add_leftword(dst, adding);
00355 #ifdef GDEBUG
00356       printf("merge_contexts: added \"%s\"[%d..%d] as a new left context\n",
00357              wchmm->winfo->woutput[adding->wid], adding->lefttime, adding->righttime);
00358 #endif
00359       ret = TRUE;
00360     }
00361 #ifdef GDEBUG
00362     else {
00363       printf("merge_contexts: \"%s\"[%d..%d] already exist\n",
00364              wchmm->winfo->woutput[adding->wid], adding->lefttime, adding->righttime);
00365     }
00366 #endif
00367   }
00368 
00369   /* right context */
00370   for(s=0;s<src->rightwordnum;s++) {
00371     adding = src->rightword[s];
00372     if (adding->mark) continue;
00373     /* direct link between dst and src will disapper to avoid unneccesary loop */
00374     if (adding == dst) {
00375 #ifdef GDEBUG
00376       printf("merge_contexts: skipping direct link (src) -> (dst)\n");
00377 #endif
00378       continue;
00379     }
00380     for(d=0;d<dst->rightwordnum;d++) {
00381       if (dst->rightword[d]->mark) continue;
00382       if (dst->rightword[d] == adding) {
00383         break;
00384       }
00385     }
00386     if (d >= dst->rightwordnum) { /* no rightword matched */
00387       wordgraph_add_rightword(dst, adding);
00388 #ifdef GDEBUG
00389       printf("merge_contexts: added \"%s\"[%d..%d] as a new right context\n",
00390              wchmm->winfo->woutput[adding->wid], adding->lefttime, adding->righttime);
00391 #endif
00392       ret = TRUE;
00393     }
00394 #ifdef GDEBUG
00395     else {
00396       printf("merge_contexts: \"%s\"[%d..%d] already exist\n",
00397              wchmm->winfo->woutput[adding->wid], adding->lefttime, adding->righttime);
00398     }
00399 #endif
00400   }
00401   
00402   return(ret);
00403 }
00404 
00421 static void
00422 swap_leftword(WordGraph *wg, WordGraph *from, WordGraph *to)
00423 {
00424   int i;
00425   
00426 #ifdef GDEBUG
00427   printf("swapleft: replacing left of \"%s\"[%d..%d] from \"%s\"[%d..%d] to \"%s\"[%d..%d]...\n",
00428          wchmm->winfo->woutput[wg->wid], wg->lefttime, wg->righttime,
00429          wchmm->winfo->woutput[from->wid], from->lefttime, from->righttime,
00430          wchmm->winfo->woutput[to->wid], to->lefttime, to->righttime);
00431 #endif
00432   
00433   for(i=0;i<wg->leftwordnum;i++) {
00434     if (wg->leftword[i] == from) {
00435       wg->leftword[i] = to;
00436     }
00437   }
00438 }
00439 
00456 static void
00457 swap_rightword(WordGraph *wg, WordGraph *from, WordGraph *to)
00458 {
00459   int i;
00460   
00461 #ifdef GDEBUG
00462   printf("swapright: replacing right of \"%s\"[%d..%d] from \"%s\"[%d..%d] to \"%s\"[%d..%d]...\n",
00463          wchmm->winfo->woutput[wg->wid], wg->lefttime, wg->righttime,
00464          wchmm->winfo->woutput[from->wid], from->lefttime, from->righttime,
00465          wchmm->winfo->woutput[to->wid], to->lefttime, to->righttime);
00466 #endif
00467 
00468   for(i=0;i<wg->rightwordnum;i++) {
00469     if (wg->rightword[i] == from) {
00470       wg->rightword[i] = to;
00471     }
00472   }
00473 }
00474 
00487 static void
00488 uniq_leftword(WordGraph *wg)
00489 {
00490   int i, j, dst;
00491   boolean ok;
00492 
00493   dst = 0;
00494   for(i=0;i<wg->leftwordnum;i++) {
00495     ok = TRUE;
00496     for(j=0;j<dst;j++) {
00497       if (wg->leftword[i] == wg->leftword[j]) {
00498         ok = FALSE;
00499         break;
00500       }
00501     }
00502     if (ok == TRUE) {
00503       wg->leftword[dst] = wg->leftword[i];
00504       dst++;
00505     }
00506   }
00507   wg->leftwordnum = dst;
00508 }
00509 
00522 static void
00523 uniq_rightword(WordGraph *wg)
00524 {
00525   int i, j, dst;
00526   boolean ok;
00527 
00528   dst = 0;
00529   for(i=0;i<wg->rightwordnum;i++) {
00530     ok = TRUE;
00531     for(j=0;j<dst;j++) {
00532       if (wg->rightword[i] == wg->rightword[j]) {
00533         ok = FALSE;
00534         break;
00535       }
00536     }
00537     if (ok == TRUE) {
00538       wg->rightword[dst] = wg->rightword[i];
00539       dst++;
00540     }
00541   }
00542   wg->rightwordnum = dst;
00543 }
00544 
00557 static void
00558 wordgraph_remove_context(WordGraph *wg)
00559 {
00560   WordGraph *w;
00561   int i,j,k;
00562 
00563   if (wg == NULL) return;
00564 
00565   for(i=0;i<wg->leftwordnum;i++) {
00566     w = wg->leftword[i];
00567     k=0;
00568     for(j=0;j<w->rightwordnum;j++) {
00569       if (w->rightword[j] != wg) {
00570         if (j != k) {
00571           w->rightword[k] = w->rightword[j];
00572         }
00573         k++;
00574       }
00575     }
00576     w->rightwordnum = k;
00577   }
00578   for(i=0;i<wg->rightwordnum;i++) {
00579     w = wg->rightword[i];
00580     k=0;
00581     for(j=0;j<w->leftwordnum;j++) {
00582       if (w->leftword[j] != wg) {
00583         if (j != k) {
00584           w->leftword[k] = w->leftword[j];
00585         }
00586         k++;
00587       }
00588     }
00589     w->leftwordnum = k;
00590 #ifdef GDEBUG2
00591     if (w->leftwordnum == 0) {
00592       j_printf("leftword becomes 0 by remove_context\n");
00593       put_wordgraph(w);
00594       j_printf("by deleting its left context:\n");
00595       put_wordgraph(wg);
00596     }
00597 #endif
00598   }
00599 }
00600 
00613 static void
00614 wordgraph_link_context(WordGraph *wg)
00615 {
00616   int i,j;
00617   WordGraph *left, *right;
00618   
00619   if (wg == NULL) return;
00620   for(i=0;i<wg->leftwordnum;i++) {
00621     left = wg->leftword[i];
00622     if (left->mark) continue;
00623     if (left == wg) continue;
00624     for(j=0;j<wg->rightwordnum;j++) {
00625       right = wg->rightword[j];
00626       if (right->mark) continue;
00627       if (right == wg) continue;
00628       if (left == right) continue;
00629       wordgraph_check_and_add_leftword(right, left);
00630       wordgraph_check_and_add_rightword(left, right);
00631     }
00632   }
00633 }
00634 
00635 
00636 /**************************************************************/
00637 /* Operations for organizing WordGraph set */
00638 
00655 static int
00656 wordgraph_exec_erase(WordGraph **rootp)
00657 {
00658   WordGraph *wg, *we, *wtmp;
00659   int count;
00660 
00661   if (*rootp == NULL) return(0);
00662   
00663   wg = *rootp;
00664   count = 0;
00665   while (wg != NULL) {
00666     we = wg->next;
00667     while(we != NULL && we->mark == TRUE) {
00668       wtmp = we->next;
00669       wordgraph_free(we); count++;
00670       we = wtmp;
00671     }
00672     wg->next = we;
00673     wg = we;
00674   }
00675   if ((*rootp)->mark == TRUE) {
00676     wtmp = (*rootp)->next;
00677     wordgraph_free(*rootp); count++;
00678     *rootp = wtmp;
00679   }
00680 
00681   return(count);
00682 }
00683 
00702 static int
00703 compare_lefttime(WordGraph **x, WordGraph **y)
00704 {
00705   if ((*x)->lefttime > (*y)->lefttime) return 1;
00706   else if ((*x)->lefttime < (*y)->lefttime) return -1;
00707   else {
00708     if ((*x)->righttime > (*y)->righttime) return 1;
00709     else if ((*x)->righttime < (*y)->righttime) return -1;
00710     else {
00711       if ((*x)->fscore_head < (*y)->fscore_head) return 1;
00712       else if ((*x)->fscore_head > (*y)->fscore_head) return -1;
00713       else return 0;
00714     }
00715   }
00716 }
00717 
00730 void
00731 wordgraph_sort_and_annotate_id(WordGraph **rootp)
00732 {
00733   WordGraph *wg;
00734   int cnt;
00735   WordGraph **wlist;
00736   int i;
00737   WordGraph *wo;
00738 
00739   /* count total number of words in the graph */
00740   cnt = 0;
00741   for(wg=*rootp;wg;wg=wg->next) cnt++;
00742   graph_totalwordnum = cnt;
00743   if (graph_totalwordnum == 0) return;
00744   /* sort them by lefttime */
00745   wlist = (WordGraph **)mymalloc(sizeof(WordGraph *) * graph_totalwordnum);
00746   i = 0;
00747   for(wg=*rootp;wg;wg=wg->next) {
00748     wlist[i++] = wg;
00749   }
00750   qsort(wlist, graph_totalwordnum, sizeof(WordGraph *), (int (*)(const void *, const void *))compare_lefttime);
00751 
00752   /* annotated id and re-order the link by the id */
00753   wo = NULL;
00754   for(i=graph_totalwordnum-1;i>=0;i--) {
00755     wg = wlist[i];
00756     wg->id = i;
00757     wg->next = wo;
00758     wo = wg;
00759   }
00760   *rootp = wo;
00761   free(wlist);
00762 }
00763 
00776 void
00777 wordgraph_clean(WordGraph **rootp)
00778 {
00779   WordGraph *wg, *wtmp;
00780 
00781   wg = *rootp;
00782   while(wg != NULL) {
00783     wtmp = wg->next;
00784     wordgraph_free(wg);
00785     wg = wtmp;
00786   }
00787   *rootp = NULL;
00788 
00789 }
00790 
00791 
00792 /*********************************************************************/
00793 /* Post-processing of generated word arcs after search has been done */
00794 
00815 static int
00816 compare_beam(WordGraph **x, WordGraph **y)
00817 {
00818   if ((*x)->fscore_head < (*y)->fscore_head) return 1;
00819   else if ((*x)->fscore_head > (*y)->fscore_head) return -1;
00820   else return 0;
00821 }
00822 
00841 void
00842 wordgraph_purge_leaf_nodes(WordGraph **rootp)
00843 {
00844   WordGraph *wg;
00845   int i, dst;
00846   boolean changed;
00847   int count, erased, del_left, del_right;
00848 
00849   /* count whole */
00850   count = 0;
00851   for(wg=*rootp;wg;wg=wg->next) count++;
00852   j_printf("- %d initial word arcs generated\n", count);
00853   if (count == 0) return;
00854   
00855   j_printf("Step 1: purge leaf nodes\n");
00856 
00857   /* mark words to be erased */
00858   del_left = del_right = 0;
00859   do {
00860     changed = FALSE;
00861     for(wg=*rootp;wg;wg=wg->next) {
00862       if (wg->mark == TRUE) continue;
00863       /* mark if wg has no left context, or all leftwords are marked */
00864       if (wg->lefttime != 0) {
00865         for(i=0;i<wg->leftwordnum;i++) {
00866           if (wg->leftword[i]->mark == FALSE) break;
00867         }
00868         if (i >= wg->leftwordnum) {
00869           wg->mark = TRUE;
00870           changed = TRUE;
00871           del_left++;
00872           continue;
00873         }
00874       }
00875       /* mark if wg has no right context, or all rightwords are marked */
00876       if (wg->righttime != peseqlen - 1) {
00877         for(i=0;i<wg->rightwordnum;i++) {
00878           if (wg->rightword[i]->mark == FALSE) break;
00879         }
00880         if (i >= wg->rightwordnum) {
00881           wg->mark = TRUE;
00882           changed = TRUE;
00883           del_right++;
00884           continue;
00885         }
00886       }
00887     }
00888   } while (changed == TRUE);
00889 
00890   j_printf("- %d leaf words found (left_blank=%d, right_blank=%d)\n", del_left + del_right, del_left, del_right);
00891 
00892   /* do compaction of left/rightwords */
00893     for(wg=*rootp;wg;wg=wg->next) {
00894       if (wg->mark) continue;
00895       dst = 0;
00896       for(i=0;i<wg->leftwordnum;i++) {
00897         if (wg->leftword[i]->mark == FALSE) {
00898           if (dst != i) wg->leftword[dst] = wg->leftword[i];
00899           dst++;
00900         }
00901       }
00902       wg->leftwordnum = dst;
00903     }
00904     for(wg=*rootp;wg;wg=wg->next) {
00905       if (wg->mark) continue;
00906       dst = 0;
00907       for(i=0;i<wg->rightwordnum;i++) {
00908         if (wg->rightword[i]->mark == FALSE) {
00909           if (dst != i) wg->rightword[dst] = wg->rightword[i];
00910           dst++;
00911         }
00912       }
00913       wg->rightwordnum = dst;
00914     }
00915 
00916   /* execute erase of marked words */
00917   erased = wordgraph_exec_erase(rootp);
00918   j_printf("- %d words purged, %d words left in lattice\n", erased, count - erased);
00919 
00920 }
00921 
00938 void
00939 wordgraph_depth_cut(WordGraph **rootp)
00940 {
00941 #ifdef GRAPHOUT_DEPTHCUT
00942 
00943   WordGraph *wg;
00944   int i, dst;
00945   boolean changed;
00946   int count, erased, del_left, del_right;
00947   WordGraph **wlist;
00948   boolean f;
00949   int *wc;
00950   int t;
00951   int pruned;
00952 
00953 
00954   if (graphout_cut_depth < 0) return;
00955 
00956   j_printf("Step 1.5: cut less likely hypothesis by depth of %d\n", graphout_cut_depth);
00957 
00958   /* count whole */
00959   count = 0;
00960   for(wg=*rootp;wg;wg=wg->next) count++;
00961   if (count == 0) return;
00962   
00963   /* prepare buffer to count words per frame */
00964   wc = (int *)mymalloc(sizeof(int) * peseqlen);
00965   for (t=0;t<peseqlen;t++) wc[t] = 0;
00966   /* sort words by fscore_head */
00967   wlist = (WordGraph **)mymalloc(sizeof(WordGraph *) * count);
00968   i = 0;
00969   for(wg=*rootp;wg;wg=wg->next) {
00970     wlist[i++] = wg;
00971   }
00972   qsort(wlist, count, sizeof(WordGraph *), (int (*)(const void *, const void *))compare_beam);
00973   /* count words per frame, and unlink/mark them if below beam width */
00974   pruned = 0;
00975   for (i=0;i<count;i++) {
00976     wg = wlist[i];
00977     f = TRUE;
00978     for (t=wg->lefttime;t<=wg->righttime;t++) {
00979       wc[t]++;
00980       if (wc[t] <= graphout_cut_depth) f = FALSE;
00981     }
00982     if (f) {
00983       //wordgraph_remove_context(wg);
00984       wg->mark = TRUE;
00985       pruned++;
00986     }
00987   }
00988 #ifdef GDEBUG2
00989   printf("GRAPH DEPTH STATISTICS: NUMBER OF WORDS PER FRAME\n");
00990   for(t=0;t<peseqlen;t++) {
00991     if (wc[t] > graphout_cut_depth) {
00992       printf("*");
00993     } else {
00994       printf(" ");
00995     }
00996     printf("%4d: %d\n", t, wc[t]);
00997   }
00998 #endif
00999   j_printf("- %d words out of %d are going to be pruned by depth cutting\n", pruned, count);
01000   free(wlist);
01001   free(wc);
01002 
01003   /* mark words to be erased */
01004   del_left = del_right = 0;
01005   do {
01006     changed = FALSE;
01007     for(wg=*rootp;wg;wg=wg->next) {
01008       if (wg->mark == TRUE) continue;
01009       /* mark if wg has no left context, or all leftwords are marked */
01010       if (wg->lefttime != 0) {
01011         for(i=0;i<wg->leftwordnum;i++) {
01012           if (wg->leftword[i]->mark == FALSE) break;
01013         }
01014         if (i >= wg->leftwordnum) {
01015           wg->mark = TRUE;
01016           changed = TRUE;
01017           del_left++;
01018           continue;
01019         }
01020       }
01021       /* mark if wg has no right context, or all rightwords are marked */
01022       if (wg->righttime != peseqlen - 1) {
01023         for(i=0;i<wg->rightwordnum;i++) {
01024           if (wg->rightword[i]->mark == FALSE) break;
01025         }
01026         if (i >= wg->rightwordnum) {
01027           wg->mark = TRUE;
01028           changed = TRUE;
01029           del_right++;
01030           continue;
01031         }
01032       }
01033     }
01034   } while (changed == TRUE);
01035 
01036   j_printf("- %d new leaves found (left_blank=%d, right_blank=%d)\n", del_left + del_right, del_left, del_right);
01037 
01038   /* do compaction of left/rightwords */
01039     for(wg=*rootp;wg;wg=wg->next) {
01040       if (wg->mark) continue;
01041       dst = 0;
01042       for(i=0;i<wg->leftwordnum;i++) {
01043         if (wg->leftword[i]->mark == FALSE) {
01044           if (dst != i) wg->leftword[dst] = wg->leftword[i];
01045           dst++;
01046         }
01047       }
01048       wg->leftwordnum = dst;
01049     }
01050     for(wg=*rootp;wg;wg=wg->next) {
01051       if (wg->mark) continue;
01052       dst = 0;
01053       for(i=0;i<wg->rightwordnum;i++) {
01054         if (wg->rightword[i]->mark == FALSE) {
01055           if (dst != i) wg->rightword[dst] = wg->rightword[i];
01056           dst++;
01057         }
01058       }
01059       wg->rightwordnum = dst;
01060     }
01061 
01062   /* execute erase of marked words */
01063   erased = wordgraph_exec_erase(rootp);
01064   j_printf("- total %d words purged, %d words left in lattice\n", erased, count - erased);
01065 
01066 #else  /* ~GRAPHOUT_DEPTHCUT */
01067 
01068   j_printf("Warning: Step 1.5: graph depth cutting has been disabled, skipped\n");
01069 
01070 #endif
01071 
01072 }
01073 
01108 static boolean
01109 wordgraph_adjust_boundary_sub(WordGraph **rootp, int *mov_num_ret, int *dup_num_ret, int *del_num_ret, int *mod_num_ret, int count, int *maxfnum)
01110 {
01111   WordGraph *wg, *left, *new;
01112   int i, j, k;
01113   int fnum;
01114   int mov_num, dup_num, del_num, mod_num;
01115   boolean changed = FALSE;
01116 
01117   mov_num = dup_num = del_num = mod_num = 0;
01118 
01119   /* maximum number of left context words does not exceed total word num */
01120   /* allocate temporal work area.  these are permanent buffer that will
01121      be kept between recognition sessions. */
01122   if (*maxfnum == 0) {
01123     /* when this is called for the first time, allocate buffer */
01124     *maxfnum = count;
01125     framelist = (int *)mymalloc(sizeof(int) * (*maxfnum));
01126     framescorelist = (LOGPROB *)mymalloc(sizeof(LOGPROB) * (*maxfnum));
01127 #ifdef GDEBUG
01128     j_printerr("Notice: maxfnum starts at %d\n", *maxfnum);
01129 #endif
01130   } else if (*maxfnum < count) {
01131     /* for later call, expand buffer if necessary */
01132     free(framescorelist);
01133     free(framelist);
01134     *maxfnum = count;
01135     framelist = (int *)mymalloc(sizeof(int) * (*maxfnum));
01136     framescorelist = (LOGPROB *)mymalloc(sizeof(LOGPROB) * (*maxfnum));
01137 #ifdef GDEBUG
01138     j_printerr("Notice: maxfnum expanded by count (%d)\n", *maxfnum);
01139 #endif
01140   }
01141 
01142 #ifdef GDEBUG2
01143   printf("***CHECK LOOP BEGIN***\n");
01144 #endif
01145   for(wg=*rootp;wg;wg=wg->next) {
01146     if (wg->mark) continue;     /* already marked */
01147 #ifdef GDEBUG2
01148     printf("  [%d..%d] \"%s\"\n", wg->lefttime, wg->righttime, wchmm->winfo->woutput[wg->wid]);
01149 #endif
01150     if (wg->leftwordnum == 0) { /* no leftword */
01151       if (wg->lefttime != 0) {
01152         /* some fraction found by former elimination: remove this */
01153 #ifdef GDEBUG2
01154         printf("  -> no leftword at middle of lattice, eliminate this\n");
01155 #endif
01156         wordgraph_remove_context(wg);
01157         wg->mark = TRUE;
01158         del_num++;
01159         changed = TRUE;
01160       }
01161       /* if has no leftword, adjustment of this word is not needed */
01162       continue;
01163     }
01164     if (wg->rightwordnum == 0) {        /* no rightword */
01165       if (wg->righttime != peseqlen-1) {
01166         /* some fraction found by former elimination: remove this */
01167 #ifdef GDEBUG2
01168         printf("  -> no rightword at middle of lattice, eliminate this\n");
01169 #endif
01170         wordgraph_remove_context(wg);
01171         wg->mark = TRUE;
01172         del_num++;
01173         changed = TRUE;
01174         continue;
01175       }
01176       /* if on right edge, continue adjusting */
01177     }
01178     /* correct lefttime variation to framelist[] and framescorelist[] */
01179     fnum = 0;
01180     /* check for buffer overrun */
01181     if (wg->leftwordnum > (*maxfnum)) {
01182       /* expand buffer if necessary */
01183       free(framescorelist);
01184       free(framelist);
01185       *maxfnum = wg->leftwordnum;
01186       framelist = (int *)mymalloc(sizeof(int) * (*maxfnum));
01187       framescorelist = (LOGPROB *)mymalloc(sizeof(LOGPROB) * (*maxfnum));
01188 #ifdef GDEBUG
01189       j_printerr("Notice: wg->leftwordnum exceeds maxfnum (%d > %d), expanded\n", wg->leftwordnum, *maxfnum);
01190 #endif
01191     }
01192     for(i=0;i<wg->leftwordnum;i++) {
01193       left = wg->leftword[i];
01194       if (left->mark) continue;
01195       for(j=0;j<fnum;j++) {
01196         if (framelist[j] == left->righttime + 1) break;
01197       }
01198       if (j >= fnum) {
01199         framelist[fnum] = left->righttime + 1;
01200         framescorelist[fnum] = left->gscore_tail
01201 #ifdef USE_NGRAM
01202           /* the tail gscore contains the language score of the word,
01203              so the head gscore of its right context should consider this */
01204           - left->lscore
01205 #endif
01206           ;
01207         fnum++;
01208       }
01209     }
01210 #ifdef GDEBUG2
01211     printf("  possible boundary of left words:");
01212     if (fnum == 0) {
01213       printf(" (not exist)\n");
01214     } else {
01215       for(j=0;j<fnum;j++) printf(" %d", framelist[j]);
01216       printf("\n");
01217     }
01218 #endif
01219     if (fnum == 0) continue;    /* no left context */
01220     /* one candidate: just move the original (or not move) */
01221     if (fnum == 1) {
01222       if (wg->lefttime != framelist[0]) {
01223 #ifdef GDEBUG2
01224         printf("    !moving as [%d..%d]", framelist[0], wg->righttime);
01225 #endif
01226         /* check the time correctness: if the lefttime is larger than
01227            righttime, this graph word has been completely overridden by
01228            the left word (i.e. the aligned frames are absorbed by
01229            re-alignment.  In this case this word should be removed.
01230         */
01231         if (framelist[0] > wg->righttime) {
01232 #ifdef GDEBUG2
01233           printf(" : eliminated");
01234 #endif
01235           wordgraph_link_context(wg);
01236           wordgraph_remove_context(wg);
01237           wg->mark = TRUE;
01238           del_num++;
01239         } else {
01240 #ifdef GDEBUG2
01241           printf(" : ok");
01242 #endif
01243           /* adjust time and score */
01244           wg->lefttime = framelist[0];
01245           wg->gscore_head = framescorelist[0];
01246           mov_num++;
01247         }
01248 #ifdef GDEBUG2
01249         printf("\n");
01250 #endif
01251         changed = TRUE;
01252       } else if (wg->gscore_head != framescorelist[0]) {
01253         /* adjust only score */
01254 #ifdef GDEBUG2
01255         printf("    !ghead score changed: %f -> %f\n", wg->gscore_head, framescorelist[0]);
01256 #endif
01257         wg->gscore_head = framescorelist[0];
01258         mod_num++;
01259         changed = TRUE;
01260       }
01261     }
01262     if (fnum > 1) {
01263       /* multiple candidate: make copy for each (fnum)*/
01264       for(j=0;j<fnum;j++) {
01265         /* duplicate */
01266         dup_num++;
01267 #ifdef GDEBUG2
01268         printf("    !duping as [%d..%d]", framelist[j], wg->righttime);
01269 #endif
01270         
01271         if (framelist[j] > wg->righttime) {
01272           /* bogus link: link leftwords and rightwords, and delete this */
01273 #ifdef GDEBUG2
01274           printf(" : eliminated");
01275 #endif
01276           for(i=0;i<wg->leftwordnum;i++) {
01277             left = wg->leftword[i];
01278             if (left->mark) continue;
01279             if (left->righttime + 1 == framelist[j]) {
01280               for(k=0;k<wg->rightwordnum;k++) {
01281                 if ((wg->rightword[k])->mark) continue;
01282                 if (wg->rightword[k] == left) continue;
01283                 wordgraph_check_and_add_leftword(wg->rightword[k], left);
01284                 wordgraph_check_and_add_rightword(left, wg->rightword[k]);
01285               }
01286             }
01287           }
01288           del_num++;
01289           
01290         } else {
01291           /* really duplicate */
01292 #ifdef GDEBUG2
01293           printf(" : ok");
01294 #endif
01295           new = wordgraph_new(wg->wid, wg->headphone, wg->tailphone, framelist[j], wg->righttime, wg->fscore_head, wg->fscore_tail, framescorelist[j], wg->gscore_tail
01296 #ifdef USE_NGRAM
01297                               , wg->lscore
01298 #else
01299                               , LOG_ZERO
01300 #endif
01301 #ifdef CM_SEARCH
01302                               , wg->cmscore
01303 #else
01304                               , LOG_ZERO
01305 #endif
01306                               );
01307           /* copy corresponding link */
01308           for(i=0;i<wg->leftwordnum;i++) {
01309             if ((wg->leftword[i])->mark) continue;
01310             if ((wg->leftword[i])->righttime + 1 == framelist[j]) {
01311               wordgraph_add_leftword(new, wg->leftword[i]);
01312               wordgraph_add_rightword(wg->leftword[i], new);
01313             }
01314           }
01315           for(i=0;i<wg->rightwordnum;i++) {
01316             if ((wg->rightword[i])->mark) continue;
01317             wordgraph_add_rightword(new, wg->rightword[i]);
01318             wordgraph_add_leftword(wg->rightword[i], new);
01319           }
01320           new->saved = TRUE;
01321           new->next = *rootp;
01322           *rootp = new;
01323         }
01324 #ifdef GDEBUG2
01325         printf("\n");
01326 #endif
01327       }
01328       
01329       /* remove the original */
01330 #ifdef GDEBUG2
01331       printf("    !delete original [%d..%d]\n", wg->lefttime, wg->righttime);
01332 #endif
01333       wordgraph_remove_context(wg);
01334       wg->mark = TRUE;
01335       dup_num--;
01336       
01337       changed = TRUE;
01338     }
01339   }
01340 
01341   *mov_num_ret = mov_num;
01342   *dup_num_ret = dup_num;
01343   *del_num_ret = del_num;
01344   *mod_num_ret = mod_num;
01345 
01346 #ifdef GDEBUG2
01347   if (changed) {
01348     printf("*** some graph has been altered, check loop continues\n");
01349   } else {
01350     printf("*** graph not changed at last loop, check ends here\n");
01351   }
01352 #endif
01353 
01354   return (changed);
01355 }
01356 
01373 static void
01374 wordgraph_compaction_thesame_sub(WordGraph **rootp, int *rest_ret, int *merged_ret)
01375 {
01376   WordGraph *wg, *we;
01377   int i, count, erased, merged;
01378 
01379   count = 0;
01380   merged = 0;
01381   for(wg=*rootp;wg;wg=wg->next) {
01382     count++;
01383     if (wg->mark == TRUE) continue;
01384     for(we=wg->next;we;we=we->next) {
01385       if (we->mark == TRUE) continue;
01386       /* find the word with exactly the same time and score */
01387       if (wg->wid == we->wid &&
01388           wg->headphone == we->headphone &&
01389           wg->tailphone == we->tailphone &&
01390           wg->lefttime == we->lefttime &&
01391           wg->righttime == we->righttime &&
01392           wg->fscore_head == we->fscore_head &&
01393           wg->fscore_tail == we->fscore_tail) {
01394         /* merge contexts */
01395         merge_contexts(wg, we);
01396         /* swap contexts of left / right contexts */
01397         for(i=0;i<we->leftwordnum;i++) {
01398           if (we->leftword[i]->mark) continue;
01399           //if (we->leftword[i] == wg) continue;
01400           swap_rightword(we->leftword[i], we, wg);
01401         }
01402         for(i=0;i<we->rightwordnum;i++) {
01403           if (we->rightword[i]->mark) continue;
01404           //if (we->rightword[i] == wg) continue;
01405           swap_leftword(we->rightword[i], we, wg);
01406         }
01407         we->mark = TRUE;
01408         merged++;
01409       }
01410     }
01411   }
01412 
01413   erased = wordgraph_exec_erase(rootp);
01414 
01415   for(wg=*rootp;wg;wg=wg->next) {
01416     uniq_leftword(wg);
01417     uniq_rightword(wg);
01418   }
01419 
01420   *rest_ret = count - erased;
01421   *merged_ret = merged;
01422 }
01423 
01460 void
01461 wordgraph_adjust_boundary(WordGraph **rootp)
01462 {
01463 #ifdef GRAPHOUT_PRECISE_BOUNDARY
01464   WordGraph *wg;
01465   int mov_num, dup_num, del_num, mod_num;
01466   int count, merged;
01467   boolean flag;
01468   int loopcount;
01469   int maxfnum;
01470 
01471   loopcount = 0;
01472 
01473   j_printf("Step 2: adjust boundaries\n");
01474   mov_num = dup_num = del_num = 0;
01475 
01476   /* count number of all words */
01477   count = 0;
01478   for(wg=*rootp;wg;wg=wg->next) count++;
01479   maxfnum = 0;
01480 
01481   do {
01482     /* do adjust */
01483     flag = wordgraph_adjust_boundary_sub(rootp, &mov_num, &dup_num, &del_num, &mod_num, count, &maxfnum);
01484     /* do compaction */
01485     wordgraph_compaction_thesame_sub(rootp, &count, &merged);
01486     /*j_printf("- %d moved, %d duplicated, %d purged (bad align), %d words left\n", mov_num, dup_num, del_num, count + dup_num - del_num);*/
01487     j_printf("- #%d: %d moved, %d duplicated, %d purged, %d modified, %d idential, %d left\n", loopcount + 1, mov_num, dup_num, del_num, mod_num, merged, count);
01488 #ifdef GRAPHOUT_LIMIT_BOUNDARY_LOOP
01489     if (++loopcount >= graphout_limit_boundary_loop_num) {
01490       j_printf("*** loop count reached %d, terminate loop now\n", graphout_limit_boundary_loop_num);
01491       break;
01492     }
01493 #endif
01494   } while (flag);
01495 
01496   /* free work area allocated in adjust_boundary_sub */
01497   if (maxfnum > 0) {
01498     free(framescorelist);
01499     free(framelist);
01500   }
01501 
01502   /* execute erase of marked words */
01503   wordgraph_exec_erase(rootp);
01504 
01505 #else
01506 
01507   j_printf("# Step 2: SKIP (adjusting boundaries)\n");
01508 
01509 #endif /* GRAPHOUT_PRECISE_BOUNDARY */
01510 
01511 }
01512 
01513  
01531 void
01532 wordgraph_compaction_thesame(WordGraph **rootp)
01533 {
01534   int rest, erased;
01535 
01536   j_printf("Step 3: merge idential hypotheses (same score, boundary, context)\n");
01537   wordgraph_compaction_thesame_sub(rootp, &rest, &erased);
01538   j_printf("- %d words merged, %d words left in lattice\n", erased, rest);
01539 }
01540 
01563 void
01564 wordgraph_compaction_exacttime(WordGraph **rootp)
01565 {
01566   WordGraph *wg, *we;
01567   int i, count, erased;
01568   WordGraph *wtmp;
01569 
01570   if (graph_merge_neighbor_range < 0) {
01571     j_printf("# Step 4: SKIP (merge the same words with same boundary to the most likely one\n");
01572     return;
01573   }
01574 
01575   j_printf("Step 4: merge same words with same boundary to the most likely one\n");
01576 
01577   count = 0;
01578   for(wg=*rootp;wg;wg=wg->next) {
01579     count++;
01580     if (wg->mark == TRUE) continue;
01581     for(we=wg->next;we;we=we->next) {
01582       if (we->mark == TRUE) continue;
01583       /* find same words at same position */
01584       if (wg->wid == we->wid &&
01585           wg->lefttime == we->lefttime &&
01586           wg->righttime == we->righttime) {
01587         /* merge contexts */
01588         merge_contexts(wg, we);
01589         /* swap contexts of left / right contexts */
01590         for(i=0;i<we->leftwordnum;i++) {
01591           swap_rightword(we->leftword[i], we, wg);
01592         }
01593         for(i=0;i<we->rightwordnum;i++) {
01594           swap_leftword(we->rightword[i], we, wg);
01595         }
01596         /* keep the max score */
01597         if (wg->fscore_head < we->fscore_head) {
01598           wg->headphone = we->headphone;
01599           wg->tailphone = we->tailphone;
01600           wg->fscore_head = we->fscore_head;
01601           wg->fscore_tail = we->fscore_tail;
01602           wg->gscore_head = we->gscore_head;
01603           wg->gscore_tail = we->gscore_tail;
01604 #ifdef USE_NGRAM
01605           wg->lscore = we->lscore;
01606 #endif
01607 #ifdef CM_SEARCH
01608           wg->cmscore = we->cmscore;
01609 #endif
01610         }
01611         we->mark = TRUE;
01612       }
01613     }
01614   }
01615   erased = wordgraph_exec_erase(rootp);
01616   j_printf("- %d words merged, %d words left in lattice\n", erased, count-erased);
01617 
01618   for(wg=*rootp;wg;wg=wg->next) {
01619     uniq_leftword(wg);
01620     uniq_rightword(wg);
01621   }
01622 }
01623 
01645 void
01646 wordgraph_compaction_neighbor(WordGraph **rootp)
01647 {
01648   WordGraph *wg, *we;
01649   int i, count, erased;
01650 
01651   if (graph_merge_neighbor_range <= 0) {
01652     j_printf("# Step 5: SKIP (merge the same words around)\n");
01653     return;
01654   }
01655 
01656   j_printf("Step 5: merge same words around, with %d frame margin\n", graph_merge_neighbor_range);
01657 
01658   count = 0;
01659   for(wg=*rootp;wg;wg=wg->next) {
01660     count++;
01661     if (wg->mark == TRUE) continue;
01662     for(we=wg->next;we;we=we->next) {
01663       if (we->mark == TRUE) continue;
01664       if (wg->wid == we->wid &&
01665           abs(wg->lefttime - we->lefttime) <= graph_merge_neighbor_range &&
01666           abs(wg->righttime - we->righttime) <= graph_merge_neighbor_range) {
01667         /* merge contexts */
01668         merge_contexts(wg, we);
01669         /* swap contexts of left / right contexts */
01670         for(i=0;i<we->leftwordnum;i++) {
01671           swap_rightword(we->leftword[i], we, wg);
01672         }
01673         for(i=0;i<we->rightwordnum;i++) {
01674           swap_leftword(we->rightword[i], we, wg);
01675         }
01676         /* keep the max score */
01677         if (wg->fscore_head < we->fscore_head) {
01678           wg->headphone = we->headphone;
01679           wg->tailphone = we->tailphone;
01680           wg->fscore_head = we->fscore_head;
01681           wg->fscore_tail = we->fscore_tail;
01682           wg->gscore_head = we->gscore_head;
01683           wg->gscore_tail = we->gscore_tail;
01684 #ifdef USE_NGRAM
01685           wg->lscore = we->lscore;
01686 #endif
01687 #ifdef CM_SEARCH
01688           wg->cmscore = we->cmscore;
01689 #endif
01690         }
01691         we->mark = TRUE;
01692       }
01693     }
01694   }
01695   erased = wordgraph_exec_erase(rootp);
01696   j_printf("- %d words merged, %d words left in lattice\n", erased, count-erased);
01697 
01698   for(wg=*rootp;wg;wg=wg->next) {
01699     uniq_leftword(wg);
01700     uniq_rightword(wg);
01701   }
01702  
01703 }
01704 
01705 
01706 /**************************************************************/
01707 /* generation of graph word candidates while search */
01708 
01739 WordGraph *
01740 wordgraph_assign(WORD_ID wid, WORD_ID wid_left, WORD_ID wid_right, int leftframe, int rightframe, LOGPROB fscore_head, LOGPROB fscore_tail, LOGPROB gscore_head, LOGPROB gscore_tail, LOGPROB lscore, LOGPROB cm)
01741 {
01742   WordGraph *newarc;
01743   HMM_Logical *l, *ret, *head, *tail;
01744 
01745   /* find context dependent phones at head and tail */
01746   l = wchmm->winfo->wseq[wid][winfo->wlen[wid]-1];
01747   if (wid_right != WORD_INVALID) {
01748     ret = get_right_context_HMM(l, wchmm->winfo->wseq[wid_right][0]->name, wchmm->hmminfo);
01749     if (ret != NULL) l = ret;
01750   }
01751   if (wchmm->winfo->wlen[wid] > 1) {
01752     tail = l;
01753     l = wchmm->winfo->wseq[wid][0];
01754   }
01755   if (wid_left != WORD_INVALID) {
01756     ret = get_left_context_HMM(l, wchmm->winfo->wseq[wid_left][winfo->wlen[wid_left]-1]->name, wchmm->hmminfo);
01757     if (ret != NULL) l = ret;
01758   }
01759   head = l;
01760   if (wchmm->winfo->wlen[wid] <= 1) {
01761     tail = l;
01762   }
01763 
01764   /* generate a new graph word hypothesis */
01765   newarc = wordgraph_new(wid, head, tail, leftframe, rightframe, fscore_head, fscore_tail, gscore_head, gscore_tail, lscore, cm);
01766   //printf("    [%d..%d] %d\n", leftframe, rightframe, wid);
01767   return newarc;
01768 }
01769 
01788 void
01789 wordgraph_save(WordGraph *wg, WordGraph *right, WordGraph **root)
01790 {
01791   if (wg != NULL) {
01792     wg->next = *root;
01793     *root = wg;
01794     wg->saved = TRUE;
01795     wordgraph_add_leftword(right, wg);
01796     wordgraph_add_rightword(wg, right);
01797   }
01798 }
01799 
01800 #ifdef GRAPHOUT_DYNAMIC
01801 
01845 WordGraph *
01846 wordgraph_check_merge(WordGraph *now, WordGraph **root, WORD_ID next_wid, boolean *merged_p)
01847 {
01848   WordGraph *wg;
01849   int i;
01850 #ifdef GDEBUG
01851   WordGraph *w;
01852 #endif
01853 
01854 #ifdef GRAPHOUT_SEARCH
01855   *merged_p = FALSE;
01856 #endif
01857 
01858   if (now == NULL) return(NULL);
01859 
01860 #ifdef GDEBUG
01861   printf("check_merge: checking \"%s\"[%d..%d]\n", wchmm->winfo->woutput[now->wid], now->lefttime, now->righttime);
01862   for(i=0;i<now->leftwordnum;i++) {
01863     w = now->leftword[i];
01864     printf("\t left%d:  \"%15s\"[%d..%d]\n", i, wchmm->winfo->woutput[w->wid], w->lefttime, w->righttime);
01865   }
01866   for(i=0;i<now->rightwordnum;i++) {
01867     w = now->rightword[i];
01868     printf("\tright%d:  \"%15s\"[%d..%d]\n", i, wchmm->winfo->woutput[w->wid], w->lefttime, w->righttime);
01869   }
01870 #endif
01871 
01872   for(wg=*root;wg;wg=wg->next) {
01873     if (wg == now) continue;
01874 #ifdef GRAPHOUT_DYNAMIC
01875     /* skip already merged word */
01876     if (wg->purged) continue;
01877 #endif
01878     if (graph_merge_neighbor_range < 0) {
01879       /* when no merging, words with different triphone context at word edge
01880          should be differenciated */
01881       if (wg->headphone != now->headphone || wg->tailphone != now->tailphone) {
01882         continue;
01883       }
01884     }
01885     if (wg->wid == now->wid
01886         && wg->lefttime == now->lefttime
01887         && wg->righttime == now->righttime) {
01888       /* same word on the same position is found in current word graph */
01889 #ifdef GDEBUG
01890       printf("check_merge: same word found: \"%s\"[%d..%d]\n", wchmm->winfo->woutput[wg->wid], wg->lefttime, wg->righttime);
01891       for(i=0;i<wg->leftwordnum;i++) {
01892         w = wg->leftword[i];
01893         printf("\t left%d:  \"%15s\"[%d..%d]\n", i, wchmm->winfo->woutput[w->wid], w->lefttime, w->righttime);
01894       }
01895       for(i=0;i<wg->rightwordnum;i++) {
01896         w = wg->rightword[i];
01897         printf("\tright%d:  \"%15s\"[%d..%d]\n", i, wchmm->winfo->woutput[w->wid], w->lefttime, w->righttime);
01898       }
01899 #endif
01900       /* merge contexts */
01901       merge_contexts(wg, now);
01902       /* swap contexts of left / right contexts */
01903       for(i=0;i<now->leftwordnum;i++) {
01904         swap_rightword(now->leftword[i], now, wg);
01905         uniq_rightword(now->leftword[i]);
01906       }
01907       for(i=0;i<now->rightwordnum;i++) {
01908         swap_leftword(now->rightword[i], now, wg);
01909         uniq_leftword(now->rightword[i]);
01910       }
01911 #ifdef GRAPHOUT_SEARCH
01912       /* if the left and right contexts of now are already included in wg,
01913          and wg already has left node of next word,
01914          it means that
01915          the current word and the last word context is
01916          already included in the existing word graph.
01917          So, in the case this partial path should be abandoned.
01918       */
01919       for(i=0;i<wg->leftwordnum;i++) {
01920         if (wg->leftword[i]->wid == next_wid) break;
01921       }
01922       if (i < wg->leftwordnum) {
01923         *merged_p = TRUE;
01924       }
01925 #endif /* GRAPHOUT_SEARCH */
01926 #ifdef GRAPHOUT_OVERWRITE
01927       /*  if current hypothesis score is higher than saved,
01928           overwrite the scores and not terminate */
01929       if (
01930 #ifdef GRAPHOUT_OVERWRITE_GSCORE
01931           wg->gscore_head < now->gscore_head
01932 #else
01933           wg->fscore_head < now->fscore_head
01934 #endif
01935           ) {
01936           wg->headphone = now->headphone;
01937           wg->tailphone = now->tailphone;
01938           wg->fscore_head = now->fscore_head;
01939           wg->fscore_tail = now->fscore_tail;
01940           wg->gscore_head = now->gscore_head;
01941           wg->gscore_tail = now->gscore_tail;
01942 #ifdef USE_NGRAM
01943           wg->lscore = now->lscore;
01944 #endif
01945 #ifdef CM_SEARCH
01946           wg->cmscore = now->cmscore;
01947 #endif
01948 #ifdef GRAPHOUT_SEARCH
01949           *merged_p = FALSE;
01950 #endif
01951       }
01952 #endif /* GRAPHOUT_OVERWRITE */
01953       /* the merged word should be discarded for later merging from
01954          another word, so disable this */
01955       now->purged = TRUE;
01956       
01957       /* return the found one */
01958       return wg;
01959     }
01960   }
01961   /* if the same word not found, return NULL */
01962   return NULL;
01963 }
01964 #endif /* GRAPHOUT_DYNAMIC */
01965 
01966 
01967 /**************************************************************/
01968 /* misc. functions */
01969 
02012 void
02013 put_wordgraph(WordGraph *wg)
02014 {
02015   int i;
02016   if (wg == NULL) {
02017     j_printf("(NULL)\n");
02018   } else {
02019     j_printf("%d:", wg->id);
02020     for(i=0;i<wg->leftwordnum;i++) {
02021       j_printf((i == 0) ? " left=%d" : ",%d", wg->leftword[i]->id);
02022     }
02023     for(i=0;i<wg->rightwordnum;i++) {
02024       j_printf((i == 0) ? " right=%d" : ",%d", wg->rightword[i]->id);
02025     }
02026     j_printf(" [%d..%d]", wg->lefttime, wg->righttime);
02027     j_printf(" wid=%d name=\"%s\" lname=\"%s\" f=%f f_prev=%f g_head=%f g_prev=%f", wg->wid, wchmm->winfo->woutput[wg->wid], wchmm->winfo->wname[wg->wid], wg->fscore_head, wg->fscore_tail, wg->gscore_head, wg->gscore_tail);
02028 #ifdef USE_NGRAM
02029     j_printf(" lscore=%f", wg->lscore);
02030     if (wg->righttime - wg->lefttime + 1 != 0) {
02031       j_printf(" AMavg=%f", (wg->gscore_head - wg->gscore_tail - wg->lscore) / (float)(wg->righttime - wg->lefttime + 1));
02032     }
02033 #else
02034     if (wg->righttime - wg->lefttime + 1 != 0) {
02035       j_printf(" AMavg=%f", (wg->gscore_head - wg->gscore_tail) / (float)(wg->righttime - wg->lefttime + 1));
02036     }
02037 #endif
02038 #ifdef CM_SEARCH
02039     j_printf(" cmscore=%f", wg->cmscore);
02040 #endif
02041     j_printf(" headphone=%s", wg->headphone->name);
02042     j_printf(" tailphone=%s", wg->tailphone->name);
02043     j_printf("\n");
02044   }
02045 }
02046 
02059 void
02060 wordgraph_dump(WordGraph *root)
02061 {
02062   WordGraph *wg;
02063   j_printf("--- begin wordgraph data ---\n");
02064   for(wg=root;wg;wg=wg->next) {
02065     put_wordgraph(wg);
02066   }
02067   j_printf("--- end wordgraph data ---\n");
02068 }
02069 
02082 void
02083 wordgraph_check_coherence(WordGraph *rootp)
02084 {
02085   WordGraph *wg, *wl, *wr;
02086   int l,r;
02087 
02088   for(wg=rootp;wg;wg=wg->next) {
02089     /* check ID overflow */
02090     if (wg->id < 0 || wg->id >= graph_totalwordnum) {
02091       j_printf("ERROR: invalid id\n");
02092       put_wordgraph(wg);
02093       continue;
02094     }
02095     /* check link */
02096     for(l=0;l<wg->leftwordnum;l++){
02097       wl = wg->leftword[l];
02098       if (wl->id < 0 || wl->id >= graph_totalwordnum) {
02099         j_printf("ERROR: invalid id in left context\n");
02100         put_wordgraph(wg);
02101         continue;
02102       }
02103       for(r=0;r<wl->rightwordnum;r++){
02104         if (wl->rightword[r] == wg) break;
02105       }
02106       if (r >= wl->rightwordnum) {
02107         j_printf("ERROR: reverse link not found in left context\n");
02108         put_wordgraph(wg);
02109         put_wordgraph(wl);
02110         continue;
02111       }
02112     }
02113     for(r=0;r<wg->rightwordnum;r++){
02114       wr = wg->rightword[r];
02115       if (wr->id < 0 || wr->id >= graph_totalwordnum) {
02116         j_printf("ERROR: invalid id in right context\n");
02117         put_wordgraph(wg);
02118         continue;
02119       }
02120       for(l=0;l<wr->leftwordnum;l++){
02121         if (wr->leftword[l] == wg) break;
02122       }
02123       if (l >= wr->leftwordnum) {
02124         j_printf("ERROR: reverse link not found in right context\n");
02125         put_wordgraph(wg);
02126         put_wordgraph(wr);
02127         continue;
02128       }
02129     }
02130   }
02131 }
02132 
02133 #endif /* GRAPHOUT */

Juliusに対してTue Dec 26 16:19:27 2006に生成されました。  doxygen 1.5.0