libsent/src/phmm/mkwhmm.c

説明を見る。
00001 
00017 /*
00018  * Copyright (c) 1991-2006 Kawahara Lab., Kyoto University
00019  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00020  * Copyright (c) 2005-2006 Julius project team, Nagoya Institute of Technology
00021  * All rights reserved
00022  */
00023 
00024 #ifndef MULTIPATH_VERSION
00025 /* initial & accept arc will be stripped */
00026 /* trans prob to accept state will be stored in accept_ac_a */
00027 #endif
00028 
00029 #include <sent/stddefs.h>
00030 #include <sent/hmm.h>
00031 
00040 static int
00041 totalstatelen(HMM_Logical **hdseq, int hdseqlen
00042 #ifdef MULTIPATH_VERSION
00043               , boolean *has_sp, HTK_HMM_INFO *hmminfo
00044 #endif
00045               )
00046 {
00047   int i, len;
00048 
00049   len = 0;
00050   for (i=0;i<hdseqlen;i++) {
00051     len += hmm_logical_state_num(hdseq[i]) - 2;
00052 #ifdef MULTIPATH_VERSION
00053     if (has_sp[i]) {
00054       if (hmminfo->sp == NULL) j_error("Error: no hmminfo->sp!!\n");
00055       len += hmm_logical_state_num(hmminfo->sp) - 2;
00056     }
00057 #endif
00058   }
00059 #ifdef MULTIPATH_VERSION
00060   /* initial and final state */
00061   len += 2;
00062 #endif
00063   return(len);
00064 }
00065 
00073 static void
00074 add_arc(HMM_STATE *state, int arc, LOGPROB a)
00075 {
00076   A_CELL *atmp;
00077 
00078   atmp = (A_CELL *)mymalloc(sizeof(A_CELL));
00079   atmp->a = a;
00080   atmp->arc = arc;
00081   atmp->next = state->ac;
00082   state->ac = atmp;
00083 }
00084 
00085 /* make word(phrase) HMM from HTK_HMM_INFO */
00086 /* LM prob will be assigned for cross-word arcs */
00087 /* new HMM is malloced and returned */
00088 
00101 HMM *
00102 new_make_word_hmm_with_lm(HTK_HMM_INFO *hmminfo, HMM_Logical **hdseq, int hdseqlen
00103 #ifdef MULTIPATH_VERSION
00104                           , boolean *has_sp
00105 #endif
00106                           , LOGPROB *lscore)
00107 {
00108   HMM *new;
00109   int i,j,n;
00110   int afrom, ato;
00111   LOGPROB logprob;
00112   HTK_HMM_Trans *tr;
00113   int state_num;
00114 
00115   /* allocate needed states */
00116   new = (HMM *)mymalloc(sizeof(HMM));
00117 #ifdef MULTIPATH_VERSION
00118   new->len = totalstatelen(hdseq, hdseqlen, has_sp, hmminfo);
00119 #else
00120   new->len = totalstatelen(hdseq, hdseqlen);
00121 #endif
00122   new->state = (HMM_STATE *)mymalloc(sizeof(HMM_STATE) * new->len);
00123   for (i=0;i<new->len;i++) {
00124     new->state[i].ac = NULL;
00125     new->state[i].is_pseudo_state = FALSE;
00126     new->state[i].out.state = NULL;
00127     new->state[i].out.cdset = NULL;
00128   }
00129 
00130   /* assign outprob informations into the states  */
00131 #ifdef MULTIPATH_VERSION
00132   n = 1;                        /* skip first state */
00133 #else
00134   n = 0;
00135 #endif
00136   for (i = 0; i < hdseqlen; i++) {
00137     if (hdseq[i]->is_pseudo) {
00138       for (j = 1; j < hdseq[i]->body.pseudo->state_num - 1; j++) {
00139         new->state[n].is_pseudo_state = TRUE;
00140         new->state[n].out.cdset = &(hdseq[i]->body.pseudo->stateset[j]);
00141         n++;
00142       }
00143     } else {
00144       for (j = 1; j < hdseq[i]->body.defined->state_num - 1; j++) {
00145         new->state[n].is_pseudo_state = FALSE;
00146         new->state[n].out.state = hdseq[i]->body.defined->s[j];
00147         n++;
00148       }
00149     }
00150 #ifdef MULTIPATH_VERSION
00151     if (has_sp[i]) {
00152       /* append sp at the end of the phone */
00153       if (hmminfo->sp->is_pseudo) {
00154         for (j = 1; j < hmm_logical_state_num(hmminfo->sp) - 1; j++) {
00155           new->state[n].is_pseudo_state = TRUE;
00156           new->state[n].out.cdset = &(hmminfo->sp->body.pseudo->stateset[j]);
00157           n++;
00158         }
00159       } else {
00160         for (j = 1; j < hmm_logical_state_num(hmminfo->sp) - 1; j++) {
00161           new->state[n].is_pseudo_state = FALSE;
00162           new->state[n].out.state = hmminfo->sp->body.defined->s[j];
00163           n++;
00164         }
00165       }
00166     }
00167 #endif
00168   }
00169   
00170   /* make transition arcs between each state*/
00171 /* 
00172  *   for (i=0;i<hdseq[0]->def->state_num;i++) {
00173  *     if (i != 1 && (hdseq[0]->def->tr->a[0][i]) != LOG_ZERO) {
00174  *       j_printerr("initial state contains more than 1 arc.\n");
00175  *     }
00176  *   }
00177  */
00178 
00179 #ifdef MULTIPATH_VERSION
00180 
00181   {
00182     int *out_from, *out_from_next;
00183     LOGPROB *out_a, *out_a_next;
00184     int out_num_prev, out_num_next;
00185     out_from = (int *)mymalloc(sizeof(int) * new->len);
00186     out_from_next = (int *)mymalloc(sizeof(int) * new->len);
00187     out_a = (LOGPROB *)mymalloc(sizeof(LOGPROB) * new->len);
00188     out_a_next = (LOGPROB *)mymalloc(sizeof(LOGPROB) * new->len);
00189 
00190     n = 0;                      /* n points to previous state */
00191 
00192     out_from[0] = 0;
00193     out_a[0] = 0.0;
00194     out_num_prev = 1;
00195     for (i = 0; i < hdseqlen; i++) {
00196       state_num = hmm_logical_state_num(hdseq[i]);
00197       tr = hmm_logical_trans(hdseq[i]);
00198       out_num_next = 0;
00199       /* arc from initial state */
00200       for (ato = 1; ato < state_num; ato++) {
00201         logprob = tr->a[0][ato];
00202         if (logprob != LOG_ZERO) {
00203           /* expand arc */
00204           if (ato == state_num-1) {
00205             /* from initial to final ... register all previously registered arcs for next expansion */
00206             if (lscore != NULL) logprob += lscore[i];
00207             for(j=0;j<out_num_prev;j++) {
00208               out_from_next[out_num_next] = out_from[j];
00209               out_a_next[out_num_next] = out_a[j] + logprob;
00210               out_num_next++;
00211             }
00212           } else {
00213             for(j=0;j<out_num_prev;j++) {
00214               add_arc(&(new->state[out_from[j]]), n + ato,
00215                       out_a[j] + logprob);
00216             }
00217           }
00218         }
00219       }
00220       /* arc from output state */
00221       for(afrom = 1; afrom < state_num - 1; afrom++) {
00222         for (ato = 1; ato < state_num; ato++) {
00223           logprob = tr->a[afrom][ato];
00224           if (logprob != LOG_ZERO) {
00225             if (ato == state_num - 1) {
00226               /* from output state to final ... register the arc for next expansion */
00227               if (lscore != NULL) logprob += lscore[i];
00228               out_from_next[out_num_next] = n+afrom;
00229               out_a_next[out_num_next++] = logprob;
00230             } else {
00231               add_arc(&(new->state[n+afrom]), n + ato, logprob);
00232             }
00233           }
00234         }
00235       }
00236       n += state_num - 2;
00237       for(j=0;j<out_num_next;j++) {
00238         out_from[j] = out_from_next[j];
00239         out_a[j] = out_a_next[j];
00240       }
00241       out_num_prev = out_num_next;
00242 
00243       /* inter-word short pause handling */
00244       if (has_sp[i]) {
00245       
00246         out_num_next = 0;
00247 
00248         /* arc from initial state */
00249         for (ato = 1; ato < hmm_logical_state_num(hmminfo->sp); ato++) {
00250           logprob = hmm_logical_trans(hmminfo->sp)->a[0][ato];
00251           if (logprob != LOG_ZERO) {
00252             /* to control short pause insertion, transition probability toward
00253                the word-end short pause will be given a penalty */
00254             logprob += hmminfo->iwsp_penalty;
00255             /* expand arc */
00256             if (ato == hmm_logical_state_num(hmminfo->sp)-1) {
00257               /* from initial to final ... register all previously registered arcs for next expansion */
00258               for(j=0;j<out_num_prev;j++) {
00259                 out_from_next[out_num_next] = out_from[j];
00260                 out_a_next[out_num_next] = out_a[j] + logprob;
00261                 out_num_next++;
00262               }
00263             } else {
00264               for(j=0;j<out_num_prev;j++) {
00265                 add_arc(&(new->state[out_from[j]]), n + ato,
00266                         out_a[j] + logprob);
00267               }
00268             }
00269           }
00270         }
00271         /* if short pause model doesn't have a model skip transition, also add it */
00272         if (hmm_logical_trans(hmminfo->sp)->a[0][hmm_logical_state_num(hmminfo->sp)-1] == LOG_ZERO) {
00273           /* to make insertion sp model to have no effect on the original path,
00274              the skip transition probability should be 0.0 (=100%) */
00275           logprob = 0.0;
00276           for(j=0; j<out_num_prev; j++) {
00277             out_from_next[out_num_next] = out_from[j];
00278             out_a_next[out_num_next] = out_a[j] + logprob;
00279             out_num_next++;
00280           }
00281         }
00282         /* arc from output state */
00283         for(afrom = 1; afrom < hmm_logical_state_num(hmminfo->sp) - 1; afrom++) {
00284           for (ato = 1; ato < hmm_logical_state_num(hmminfo->sp); ato++) {
00285             logprob = hmm_logical_trans(hmminfo->sp)->a[afrom][ato];
00286             if (logprob != LOG_ZERO) {
00287               if (ato == hmm_logical_state_num(hmminfo->sp) - 1) {
00288                 /* from output state to final ... register the arc for next expansion */
00289                 out_from_next[out_num_next] = n+afrom;
00290                 out_a_next[out_num_next++] = logprob;
00291               } else {
00292                 add_arc(&(new->state[n+afrom]), n + ato, logprob);
00293               }
00294             }
00295           }
00296         }
00297         n += hmm_logical_state_num(hmminfo->sp) - 2;
00298         for(j=0;j<out_num_next;j++) {
00299           out_from[j] = out_from_next[j];
00300           out_a[j] = out_a_next[j];
00301         }
00302         out_num_prev = out_num_next;
00303       }
00304     }
00305       
00306     
00307     for(j=0;j<out_num_prev;j++) {
00308       add_arc(&(new->state[out_from[j]]), new->len-1, out_a[j]);
00309     }
00310     free(out_from);
00311     free(out_from_next);
00312     free(out_a);
00313     free(out_a_next);
00314   }
00315 
00316 #else  /* ~MULTIPATH_VERSION */
00317   
00318   new->accept_ac_a = LOG_ZERO;
00319   n = 0;
00320   for (i = 0; i < hdseqlen; i++) {
00321     state_num = hmm_logical_state_num(hdseq[i]);
00322     tr = hmm_logical_trans(hdseq[i]);
00323     /* for each phoneme, consult the transition matrix to form HMM instance */
00324     for (afrom = 1; afrom < state_num - 1; afrom++) {
00325       for (ato = 1; ato < state_num; ato++) {
00326         logprob = tr->a[afrom][ato];
00327         if (logprob != LOG_ZERO) {
00328           /* if emitting transition, add connection probability to the arc */
00329           if (ato == state_num - 1 && lscore != NULL){
00330             logprob += lscore[i];
00331           }
00332           if (n + (ato - afrom) >= new->len) { /* arc to accept node */
00333             if (new->accept_ac_a != LOG_ZERO) {
00334               j_error("more than 1 arc to accept node found.\n");
00335             } else {
00336               new->accept_ac_a = logprob;
00337             }
00338           } else {
00339             add_arc(&(new->state[n]), n + (ato - afrom), logprob);
00340           }
00341         }
00342       }
00343       n++;
00344     }
00345   }
00346   
00347 #endif /* ~MULTIPATH_VERSION */
00348 
00349   return (new);
00350 }
00351 
00361 HMM *
00362 new_make_word_hmm(HTK_HMM_INFO *hmminfo, HMM_Logical **hdseq, int hdseqlen
00363 #ifdef MULTIPATH_VERSION
00364                   , boolean *has_sp
00365 #endif
00366                   )
00367 {
00368   return(new_make_word_hmm_with_lm(hmminfo, hdseq, hdseqlen
00369 #ifdef MULTIPATH_VERSION
00370                                    , has_sp
00371 #endif
00372                                    , NULL));
00373 }
00374 
00380 void
00381 free_hmm(HMM *d)
00382 {
00383   A_CELL *ac, *atmp;
00384   int i;
00385 
00386   for (i=0;i<d->len;i++) {
00387     ac = d->state[i].ac;
00388     while (ac) {
00389       atmp = ac->next;
00390       free(ac);
00391       ac = atmp;
00392     }
00393   }
00394   free(d->state);
00395   free(d);
00396 }

Juliusに対してTue Dec 26 16:19:28 2006に生成されました。  doxygen 1.5.0