00001
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028 #include <sent/stddefs.h>
00029 #include <sent/hmm.h>
00030
00041 static int
00042 totalstatelen(HMM_Logical **hdseq, int hdseqlen, boolean *has_sp, HTK_HMM_INFO *hmminfo)
00043 {
00044 int i, len;
00045
00046 len = 0;
00047 for (i=0;i<hdseqlen;i++) {
00048 len += hmm_logical_state_num(hdseq[i]) - 2;
00049 if (hmminfo->multipath) {
00050 if (has_sp[i]) {
00051 len += hmm_logical_state_num(hmminfo->sp) - 2;
00052 }
00053 }
00054 }
00055 if (hmminfo->multipath) {
00056
00057 len += 2;
00058 }
00059 return(len);
00060 }
00061
00069 static void
00070 add_arc(HMM_STATE *state, int arc, LOGPROB a)
00071 {
00072 A_CELL *atmp;
00073
00074 atmp = (A_CELL *)mymalloc(sizeof(A_CELL));
00075 atmp->a = a;
00076 atmp->arc = arc;
00077 atmp->next = state->ac;
00078 state->ac = atmp;
00079 }
00080
00081
00082
00083
00084
00098 HMM *
00099 new_make_word_hmm_with_lm(HTK_HMM_INFO *hmminfo, HMM_Logical **hdseq, int hdseqlen, boolean *has_sp, LOGPROB *lscore)
00100 {
00101 HMM *new;
00102 int i,j,n;
00103 int afrom, ato;
00104 LOGPROB logprob;
00105 HTK_HMM_Trans *tr;
00106 int state_num;
00107
00108 if (hmminfo->multipath) {
00109 if (hmminfo->sp == NULL) {
00110 jlog("Error: mkwhmm: no short-pause model in hmminfo\n");
00111 return NULL;
00112 }
00113 }
00114
00115
00116 new = (HMM *)mymalloc(sizeof(HMM));
00117 new->len = totalstatelen(hdseq, hdseqlen, hmminfo->multipath ? has_sp : NULL, hmminfo);
00118 new->state = (HMM_STATE *)mymalloc(sizeof(HMM_STATE) * new->len);
00119 for (i=0;i<new->len;i++) {
00120 new->state[i].ac = NULL;
00121 new->state[i].is_pseudo_state = FALSE;
00122 new->state[i].out.state = NULL;
00123 new->state[i].out.cdset = NULL;
00124 }
00125
00126
00127 n = 0;
00128 if (hmminfo->multipath) n++;
00129 for (i = 0; i < hdseqlen; i++) {
00130 if (hdseq[i]->is_pseudo) {
00131 for (j = 1; j < hdseq[i]->body.pseudo->state_num - 1; j++) {
00132 new->state[n].is_pseudo_state = TRUE;
00133 new->state[n].out.cdset = &(hdseq[i]->body.pseudo->stateset[j]);
00134 n++;
00135 }
00136 } else {
00137 for (j = 1; j < hdseq[i]->body.defined->state_num - 1; j++) {
00138 new->state[n].is_pseudo_state = FALSE;
00139 new->state[n].out.state = hdseq[i]->body.defined->s[j];
00140 n++;
00141 }
00142 }
00143 if (hmminfo->multipath) {
00144 if (has_sp[i]) {
00145
00146 if (hmminfo->sp->is_pseudo) {
00147 for (j = 1; j < hmm_logical_state_num(hmminfo->sp) - 1; j++) {
00148 new->state[n].is_pseudo_state = TRUE;
00149 new->state[n].out.cdset = &(hmminfo->sp->body.pseudo->stateset[j]);
00150 n++;
00151 }
00152 } else {
00153 for (j = 1; j < hmm_logical_state_num(hmminfo->sp) - 1; j++) {
00154 new->state[n].is_pseudo_state = FALSE;
00155 new->state[n].out.state = hmminfo->sp->body.defined->s[j];
00156 n++;
00157 }
00158 }
00159 }
00160 }
00161 }
00162
00163
00164
00165
00166
00167
00168
00169
00170
00171
00172 if (hmminfo->multipath) {
00173
00174 int *out_from, *out_from_next;
00175 LOGPROB *out_a, *out_a_next;
00176 int out_num_prev, out_num_next;
00177 out_from = (int *)mymalloc(sizeof(int) * new->len);
00178 out_from_next = (int *)mymalloc(sizeof(int) * new->len);
00179 out_a = (LOGPROB *)mymalloc(sizeof(LOGPROB) * new->len);
00180 out_a_next = (LOGPROB *)mymalloc(sizeof(LOGPROB) * new->len);
00181
00182 n = 0;
00183
00184 out_from[0] = 0;
00185 out_a[0] = 0.0;
00186 out_num_prev = 1;
00187 for (i = 0; i < hdseqlen; i++) {
00188 state_num = hmm_logical_state_num(hdseq[i]);
00189 tr = hmm_logical_trans(hdseq[i]);
00190 out_num_next = 0;
00191
00192 for (ato = 1; ato < state_num; ato++) {
00193 logprob = tr->a[0][ato];
00194 if (logprob != LOG_ZERO) {
00195
00196 if (ato == state_num-1) {
00197
00198 if (lscore != NULL) logprob += lscore[i];
00199 for(j=0;j<out_num_prev;j++) {
00200 out_from_next[out_num_next] = out_from[j];
00201 out_a_next[out_num_next] = out_a[j] + logprob;
00202 out_num_next++;
00203 }
00204 } else {
00205 for(j=0;j<out_num_prev;j++) {
00206 add_arc(&(new->state[out_from[j]]), n + ato,
00207 out_a[j] + logprob);
00208 }
00209 }
00210 }
00211 }
00212
00213 for(afrom = 1; afrom < state_num - 1; afrom++) {
00214 for (ato = 1; ato < state_num; ato++) {
00215 logprob = tr->a[afrom][ato];
00216 if (logprob != LOG_ZERO) {
00217 if (ato == state_num - 1) {
00218
00219 if (lscore != NULL) logprob += lscore[i];
00220 out_from_next[out_num_next] = n+afrom;
00221 out_a_next[out_num_next++] = logprob;
00222 } else {
00223 add_arc(&(new->state[n+afrom]), n + ato, logprob);
00224 }
00225 }
00226 }
00227 }
00228 n += state_num - 2;
00229 for(j=0;j<out_num_next;j++) {
00230 out_from[j] = out_from_next[j];
00231 out_a[j] = out_a_next[j];
00232 }
00233 out_num_prev = out_num_next;
00234
00235
00236 if (has_sp[i]) {
00237
00238 out_num_next = 0;
00239
00240
00241 for (ato = 1; ato < hmm_logical_state_num(hmminfo->sp); ato++) {
00242 logprob = hmm_logical_trans(hmminfo->sp)->a[0][ato];
00243 if (logprob != LOG_ZERO) {
00244
00245
00246 logprob += hmminfo->iwsp_penalty;
00247
00248 if (ato == hmm_logical_state_num(hmminfo->sp)-1) {
00249
00250 for(j=0;j<out_num_prev;j++) {
00251 out_from_next[out_num_next] = out_from[j];
00252 out_a_next[out_num_next] = out_a[j] + logprob;
00253 out_num_next++;
00254 }
00255 } else {
00256 for(j=0;j<out_num_prev;j++) {
00257 add_arc(&(new->state[out_from[j]]), n + ato,
00258 out_a[j] + logprob);
00259 }
00260 }
00261 }
00262 }
00263
00264 if (hmm_logical_trans(hmminfo->sp)->a[0][hmm_logical_state_num(hmminfo->sp)-1] == LOG_ZERO) {
00265
00266
00267 logprob = 0.0;
00268 for(j=0; j<out_num_prev; j++) {
00269 out_from_next[out_num_next] = out_from[j];
00270 out_a_next[out_num_next] = out_a[j] + logprob;
00271 out_num_next++;
00272 }
00273 }
00274
00275 for(afrom = 1; afrom < hmm_logical_state_num(hmminfo->sp) - 1; afrom++) {
00276 for (ato = 1; ato < hmm_logical_state_num(hmminfo->sp); ato++) {
00277 logprob = hmm_logical_trans(hmminfo->sp)->a[afrom][ato];
00278 if (logprob != LOG_ZERO) {
00279 if (ato == hmm_logical_state_num(hmminfo->sp) - 1) {
00280
00281 out_from_next[out_num_next] = n+afrom;
00282 out_a_next[out_num_next++] = logprob;
00283 } else {
00284 add_arc(&(new->state[n+afrom]), n + ato, logprob);
00285 }
00286 }
00287 }
00288 }
00289 n += hmm_logical_state_num(hmminfo->sp) - 2;
00290 for(j=0;j<out_num_next;j++) {
00291 out_from[j] = out_from_next[j];
00292 out_a[j] = out_a_next[j];
00293 }
00294 out_num_prev = out_num_next;
00295 }
00296 }
00297
00298
00299 for(j=0;j<out_num_prev;j++) {
00300 add_arc(&(new->state[out_from[j]]), new->len-1, out_a[j]);
00301 }
00302 free(out_from);
00303 free(out_from_next);
00304 free(out_a);
00305 free(out_a_next);
00306
00307 } else {
00308
00309
00310 new->accept_ac_a = LOG_ZERO;
00311 n = 0;
00312 for (i = 0; i < hdseqlen; i++) {
00313 state_num = hmm_logical_state_num(hdseq[i]);
00314 tr = hmm_logical_trans(hdseq[i]);
00315
00316 for (afrom = 1; afrom < state_num - 1; afrom++) {
00317 for (ato = 1; ato < state_num; ato++) {
00318 logprob = tr->a[afrom][ato];
00319 if (logprob != LOG_ZERO) {
00320
00321 if (ato == state_num - 1 && lscore != NULL){
00322 logprob += lscore[i];
00323 }
00324 if (n + (ato - afrom) >= new->len) {
00325 if (new->accept_ac_a != LOG_ZERO) {
00326 jlog("Error: mkwhmm: more than 1 arc to accept node found\n");
00327 return NULL;
00328
00329 } else {
00330 new->accept_ac_a = logprob;
00331 }
00332 } else {
00333 add_arc(&(new->state[n]), n + (ato - afrom), logprob);
00334 }
00335 }
00336 }
00337 n++;
00338 }
00339 }
00340 }
00341
00342 return (new);
00343 }
00344
00355 HMM *
00356 new_make_word_hmm(HTK_HMM_INFO *hmminfo, HMM_Logical **hdseq, int hdseqlen, boolean *has_sp)
00357 {
00358 return(new_make_word_hmm_with_lm(hmminfo, hdseq, hdseqlen, has_sp, NULL));
00359 }
00360
00366 void
00367 free_hmm(HMM *d)
00368 {
00369 A_CELL *ac, *atmp;
00370 int i;
00371
00372 for (i=0;i<d->len;i++) {
00373 ac = d->state[i].ac;
00374 while (ac) {
00375 atmp = ac->next;
00376 free(ac);
00377 ac = atmp;
00378 }
00379 }
00380 free(d->state);
00381 free(d);
00382 }