Main Page | Modules | Data Structures | Directories | File List | Data Fields | Globals | Related Pages

vsegment.c

Go to the documentation of this file.
00001 
00017 /*
00018  * Copyright (c) 1991-2006 Kawahara Lab., Kyoto University
00019  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00020  * Copyright (c) 2005-2006 Julius project team, Nagoya Institute of Technology, Nagoya Institute of Technology
00021  * All rights reserved
00022  */
00023 
00024 #include <sent/stddefs.h>
00025 #include <sent/htk_param.h>
00026 #include <sent/hmm.h>
00027 
00049 LOGPROB
00050 viterbi_segment(HMM *hmm, HTK_Param *param, int *endstates, int ulen, int **id_ret, int **seg_ret, LOGPROB **uscore_ret, int *slen_ret)
00051 {
00052   /* for viterbi */
00053   LOGPROB *nodescore[2];        /* node buffer */
00054   SEGTOKEN **tokenp[2];         /* propagating token which holds segment info */
00055   int *from_node;
00056 #ifndef MULTIPATH_VERSION
00057   int *u_end, *u_start; /* the node is an end of the word, or -1 */
00058 #endif
00059   int i, n;
00060   unsigned int t;
00061   int tl,tn;
00062   LOGPROB tmpsum;
00063   A_CELL *ac;
00064   SEGTOKEN *newtoken, *token, *tmptoken, *root;
00065   LOGPROB result_score;
00066   LOGPROB maxscore, minscore;   /* for debug */
00067   int maxnode;                  /* for debug */
00068   int *id, *seg, slen;
00069   LOGPROB *uscore;
00070 
00071   /* assume more than 1 units */
00072   if (ulen < 1) {
00073     j_printerr("Error: viterbi_segment: no unit?\n");
00074     return LOG_ZERO;
00075   }
00076 
00077 #ifndef MULTIPATH_VERSION
00078   /* initialize unit start/end marker */
00079   u_start = (int *)mymalloc(hmm->len * sizeof(int));
00080   u_end   = (int *)mymalloc(hmm->len * sizeof(int));
00081   for (n = 0; n < hmm->len; n++) {
00082     u_start[n] = -1;
00083     u_end[n] = -1;
00084   }
00085   u_start[0] = 0;
00086   u_end[endstates[0]] = 0;
00087   for (i=1;i<ulen;i++) {
00088     u_start[endstates[i-1]+1] = i;
00089     u_end[endstates[i]] = i;
00090   }
00091 #if 0
00092   for (i=0;i<hmm->len;i++) {
00093     printf("unit %d: start=%d, end=%d\n", i, u_start[i], u_end[i]);
00094   }
00095 #endif
00096 #endif /* MULTIPATH_VERSION */
00097 
00098   /* initialize node buffers */
00099   tn = 0;
00100   tl = 1;
00101   root = NULL;
00102   for (i=0;i<2;i++){
00103     nodescore[i] = (LOGPROB *)mymalloc(hmm->len * sizeof(LOGPROB));
00104     tokenp[i] = (SEGTOKEN **)mymalloc(hmm->len * sizeof(SEGTOKEN *));
00105     for (n = 0; n < hmm->len; n++) {
00106       tokenp[i][n] = NULL;
00107     }
00108   }
00109   for (n = 0; n < hmm->len; n++) {
00110     nodescore[tn][n] = LOG_ZERO;
00111     newtoken = (SEGTOKEN *)mymalloc(sizeof(SEGTOKEN));
00112     newtoken->last_id = -1;
00113     newtoken->last_end_frame = -1;
00114     newtoken->last_end_score = 0.0;
00115     newtoken->list = root;
00116     root = newtoken;
00117     newtoken->next = NULL;
00118     tokenp[tn][n] = newtoken;
00119   }
00120   from_node = (int *)mymalloc(sizeof(int) * hmm->len);
00121   
00122   /* first frame: only set initial score */
00123   /*if (hmm->state[0].is_pseudo_state) {
00124     j_printerr("Warning: state %d: pseudo state?\n", 0);
00125     }*/
00126 #ifdef MULTIPATH_VERSION
00127   nodescore[tn][0] = 0.0;
00128 #else
00129   nodescore[tn][0] = outprob(0, &(hmm->state[0]), param);
00130 #endif
00131 
00132   /* do viterbi for rest frame */
00133   for (
00134 #ifdef MULTIPATH_VERSION
00135        t = 0; t <= param->samplenum; t++
00136 #else
00137        t = 1; t < param->samplenum; t++
00138 #endif
00139        ) {
00140     i = tl;
00141     tl = tn;
00142     tn = i;
00143     maxscore = LOG_ZERO;
00144     minscore = 0.0;
00145 
00146     /* clear next scores */
00147     for (i=0;i<hmm->len;i++) {
00148       nodescore[tn][i] = LOG_ZERO;
00149       from_node[i] = -1;
00150     }
00151 
00152     /* select viterbi path for each node */
00153     for (n = 0; n < hmm->len; n++) {
00154       if (nodescore[tl][n] <= LOG_ZERO) continue;
00155       for (ac = hmm->state[n].ac; ac; ac = ac->next) {
00156         tmpsum = nodescore[tl][n] + ac->a;
00157         if (nodescore[tn][ac->arc] < tmpsum) {
00158           nodescore[tn][ac->arc] = tmpsum;
00159           from_node[ac->arc] = n;
00160         }
00161       }
00162     }
00163     /* propagate token, appending new if path was selected between units */
00164     for (n = 0; n < hmm->len; n++) {
00165 #ifdef MULTIPATH_VERSION
00166       if (from_node[n] == -1 || nodescore[tn][n] <= LOG_ZERO) {
00167         /*tokenp[tn][n] = NULL;*/
00168       } else {
00169         i=0;
00170         while (from_node[n] > endstates[i]) i++;
00171         if (n > endstates[i]) {
00172           newtoken = (SEGTOKEN *)mymalloc(sizeof(SEGTOKEN));
00173           newtoken->last_id = i;
00174 #else
00175       if (from_node[n] == -1) {
00176         tokenp[tn][n] = NULL;
00177       } else if (nodescore[tn][n] <= LOG_ZERO) {
00178         tokenp[tn][n] = tokenp[tl][from_node[n]];
00179       } else {
00180         if (u_end[from_node[n]] != -1 && u_start[n] != -1
00181             && from_node[n] !=  n) {
00182           newtoken = (SEGTOKEN *)mymalloc(sizeof(SEGTOKEN));
00183           newtoken->last_id = u_end[from_node[n]];
00184 #endif
00185           newtoken->last_end_frame = t-1;
00186           newtoken->last_end_score = nodescore[tl][from_node[n]];
00187           newtoken->list = root;
00188           root = newtoken;
00189           newtoken->next = tokenp[tl][from_node[n]];
00190           tokenp[tn][n] = newtoken;
00191         } else {
00192           tokenp[tn][n] = tokenp[tl][from_node[n]];
00193         }
00194       }
00195     }
00196 
00197 #ifdef MULTIPATH_VERSION
00198     /* if this is next of last frame, loop ends here */
00199     if (t == param->samplenum) break;
00200 #endif
00201         
00202     /* calc outprob to new nodes */
00203     for (n = 0; n < hmm->len; n++) {
00204 #ifdef MULTIPATH_VERSION
00205       if (hmm->state[n].out.state == NULL) continue;
00206 #endif
00207       if (nodescore[tn][n] > LOG_ZERO) {
00208         if (hmm->state[n].is_pseudo_state) {
00209           j_printerr("Warning: state %d: pseudo state?\n", n);
00210         }
00211         nodescore[tn][n] += outprob(t, &(hmm->state[n]), param);
00212       }
00213       if (nodescore[tn][n] > maxscore) { /* for debug */
00214         maxscore = nodescore[tn][n];
00215         maxnode = n;
00216       }
00217     }
00218     
00219 #if 0
00220     for (i=0;i<ulen;i++) {
00221       printf("%d: unit %d(%d-%d): begin_frame = %d\n", t - 1, i,
00222              (i > 0) ? endstates[i-1]+1 : 0, endstates[i],
00223 #ifdef MULTIPATH_VERSION
00224              (tokenp[tl][endstates[i]] == NULL) ? -1 :
00225 #endif
00226              tokenp[tl][endstates[i]]->last_end_frame + 1);
00227     }
00228 #endif
00229 
00230     /* printf("t=%3d max=%f n=%d\n",t,maxscore, maxnode); */
00231     
00232   }
00233 
00234   result_score = nodescore[tn][hmm->len-1];
00235 
00236   /* parse back the last token to see the trail of best viterbi path */
00237   /* and store the informations to returning buffer */
00238 #ifdef MULTIPATH_VERSION
00239   slen = 0;
00240 #else
00241   slen = 1;
00242 #endif
00243   for(token = tokenp[tn][hmm->len-1]; token; token = token->next) {
00244     if (token->last_end_frame == -1) break;
00245     slen++;
00246   }
00247   id = (int *)mymalloc(sizeof(int)*slen);
00248   seg = (int *)mymalloc(sizeof(int)*slen);
00249   uscore = (LOGPROB *)mymalloc(sizeof(LOGPROB)*slen);
00250 
00251 #ifdef MULTIPATH_VERSION
00252   i = slen - 1;
00253 #else
00254   id[slen-1] = ulen - 1;
00255   seg[slen-1] = t - 1;
00256   uscore[slen-1] = result_score;
00257   i = slen - 2;
00258 #endif
00259   for(token = tokenp[tn][hmm->len-1]; token; token = token->next) {
00260     if (i < 0 || token->last_end_frame == -1) break;
00261     id[i] = token->last_id;
00262     seg[i] = token->last_end_frame;
00263     uscore[i] = token->last_end_score;
00264     i--;
00265   }
00266 
00267   /* normalize scores by frame */
00268   for (i=slen-1;i>0;i--) {
00269     uscore[i] = (uscore[i] - uscore[i-1]) / (seg[i] - seg[i-1]);
00270   }
00271   uscore[0] = uscore[0] / (seg[0] + 1);
00272 
00273   /* set return value */
00274   *id_ret = id;
00275   *seg_ret = seg;
00276   *uscore_ret = uscore;
00277   *slen_ret = slen;
00278 
00279   /* free memory */
00280 #ifndef MULTIPATH_VERSION
00281   free(u_start);
00282   free(u_end);
00283 #endif
00284   free(from_node);
00285   token = root;
00286   while(token) {
00287     tmptoken = token->list;
00288     free(token);
00289     token = tmptoken;
00290   }
00291   for (i=0;i<2;i++) {
00292     free(nodescore[i]);
00293     free(tokenp[i]);
00294   }
00295 
00296   return(result_score);
00297 
00298 }

Generated on Tue Mar 28 16:01:39 2006 for Julius by  doxygen 1.4.2