00001
00039
00040
00041
00042
00043
00044
00045
00046 #include <julius/julius.h>
00047
00077 static HMM_Logical **
00078 make_phseq(WORD_ID *wseq, short num, boolean **has_sp_ret, int *num_ret, int **end_ret, int per_what,
00079 RecogProcess *r)
00080 {
00081 HMM_Logical **ph;
00082 boolean *has_sp;
00083 int k;
00084 int phnum;
00085 WORD_ID tmpw, w;
00086 int i, j, pn, st, endn;
00087 HMM_Logical *tmpp, *ret;
00088 WORD_INFO *winfo;
00089 HTK_HMM_INFO *hmminfo;
00090 boolean enable_iwsp;
00091
00092 winfo = r->lm->winfo;
00093 hmminfo = r->am->hmminfo;
00094 if (hmminfo->multipath) enable_iwsp = r->lm->config->enable_iwsp;
00095
00096
00097
00098 phnum = 0;
00099 for (w=0;w<num;w++) phnum += winfo->wlen[wseq[w]];
00100 ph = (HMM_Logical **)mymalloc(sizeof(HMM_Logical *) * phnum);
00101
00102 if (hmminfo->multipath) {
00103 has_sp = (boolean *)mymalloc(sizeof(boolean) * phnum);
00104 }
00105
00106 st = 0;
00107 if (hmminfo->multipath) st++;
00108 pn = 0;
00109 endn = 0;
00110 for (w=0;w<num;w++) {
00111 tmpw = wseq[w];
00112 for (i=0;i<winfo->wlen[tmpw];i++) {
00113 tmpp = winfo->wseq[tmpw][i];
00114
00115 if (r->ccd_flag) {
00116 if (w > 0 && i == 0) {
00117
00118 if ((ret = get_left_context_HMM(tmpp, ph[pn-1]->name, hmminfo)) != NULL) {
00119 tmpp = ret;
00120 }
00121
00122
00123 }
00124 if (w < num-1 && i == winfo->wlen[tmpw] - 1) {
00125 if ((ret = get_right_context_HMM(tmpp, winfo->wseq[wseq[w+1]][0]->name, hmminfo)) != NULL) {
00126 tmpp = ret;
00127 }
00128 }
00129 }
00130 ph[pn] = tmpp;
00131 if (hmminfo->multipath) {
00132 if (enable_iwsp && i == winfo->wlen[tmpw] - 1) {
00133 has_sp[pn] = TRUE;
00134 } else {
00135 has_sp[pn] = FALSE;
00136 }
00137 }
00138 if (per_what == PER_STATE) {
00139 for (j=0;j<hmm_logical_state_num(tmpp)-2;j++) {
00140 (*end_ret)[endn++] = st + j;
00141 }
00142 if (hmminfo->multipath && enable_iwsp && has_sp[pn]) {
00143 for (k=0;k<hmm_logical_state_num(hmminfo->sp)-2;k++) {
00144 (*end_ret)[endn++] = st + j + k;
00145 }
00146 }
00147 }
00148 st += hmm_logical_state_num(tmpp) - 2;
00149 if (hmminfo->multipath && enable_iwsp && has_sp[pn]) {
00150 st += hmm_logical_state_num(hmminfo->sp) - 2;
00151 }
00152 if (per_what == PER_PHONEME) (*end_ret)[endn++] = st - 1;
00153 pn++;
00154 }
00155 if (per_what == PER_WORD) (*end_ret)[endn++] = st - 1;
00156 }
00157 *num_ret = phnum;
00158 if (hmminfo->multipath) *has_sp_ret = has_sp;
00159 return ph;
00160 }
00161
00162
00185 static void
00186 do_align(WORD_ID *words, short wnum, HTK_Param *param, int per_what, Sentence *s, RecogProcess *r)
00187 {
00188 HMM_Logical **phones;
00189 boolean *has_sp;
00190 int k;
00191 int phonenum;
00192 HMM *shmm;
00193 int *end_state;
00194 int *end_frame;
00195 LOGPROB *end_score;
00196 LOGPROB allscore;
00197 WORD_ID w;
00198 int i, rlen;
00199 int end_num = 0;
00200 int *id_seq, *phloc = NULL, *stloc = NULL;
00201 int j,n,p;
00202 WORD_INFO *winfo;
00203 HTK_HMM_INFO *hmminfo;
00204 boolean enable_iwsp;
00205
00206 winfo = r->lm->winfo;
00207 hmminfo = r->am->hmminfo;
00208 if (hmminfo->multipath) enable_iwsp = r->lm->config->enable_iwsp;
00209
00210
00211 switch(per_what) {
00212 case PER_WORD:
00213 jlog("ALIGN: === word alignment begin ===\n");
00214 end_num = wnum;
00215 phloc = (int *)mymalloc(sizeof(int)*wnum);
00216 i = 0;
00217 for(w=0;w<wnum;w++) {
00218 phloc[w] = i;
00219 i += winfo->wlen[words[w]];
00220 }
00221 break;
00222 case PER_PHONEME:
00223 jlog("ALIGN: === phoneme alignment begin ===\n");
00224 end_num = 0;
00225 for(w=0;w<wnum;w++) end_num += winfo->wlen[words[w]];
00226 break;
00227 case PER_STATE:
00228 jlog("ALIGN: === state alignment begin ===\n");
00229 end_num = 0;
00230 for(w=0;w<wnum;w++) {
00231 for (i=0;i<winfo->wlen[words[w]]; i++) {
00232 end_num += hmm_logical_state_num(winfo->wseq[words[w]][i]) - 2;
00233 }
00234 if (hmminfo->multipath && enable_iwsp) {
00235 end_num += hmm_logical_state_num(hmminfo->sp) - 2;
00236 }
00237 }
00238 phloc = (int *)mymalloc(sizeof(int)*end_num);
00239 stloc = (int *)mymalloc(sizeof(int)*end_num);
00240 {
00241 n = 0;
00242 p = 0;
00243 for(w=0;w<wnum;w++) {
00244 for(i=0;i<winfo->wlen[words[w]]; i++) {
00245 for(j=0; j<hmm_logical_state_num(winfo->wseq[words[w]][i]) - 2; j++) {
00246 phloc[n] = p;
00247 stloc[n] = j + 1;
00248 n++;
00249 }
00250 if (hmminfo->multipath && enable_iwsp && i == winfo->wlen[words[w]] - 1) {
00251 for(k=0;k<hmm_logical_state_num(hmminfo->sp)-2;k++) {
00252 phloc[n] = p;
00253 stloc[n] = j + 1 + k + end_num;
00254 n++;
00255 }
00256 }
00257 p++;
00258 }
00259 }
00260 }
00261
00262 break;
00263 }
00264 end_state = (int *)mymalloc(sizeof(int) * end_num);
00265
00266
00267 phones = make_phseq(words, wnum, hmminfo->multipath ? &has_sp : NULL, &phonenum, &end_state, per_what, r);
00268
00269 shmm = new_make_word_hmm(hmminfo, phones, phonenum, hmminfo->multipath ? has_sp : NULL);
00270 if (shmm == NULL) {
00271 j_internal_error("Error: failed to make word hmm for alignment\n");
00272 }
00273
00274
00275 allscore = viterbi_segment(shmm, param, r->wchmm->hmmwrk, hmminfo->multipath, end_state, end_num, &id_seq, &end_frame, &end_score, &rlen);
00276
00277
00278 s->align.num = rlen;
00279 s->align.unittype = per_what;
00280 s->align.begin_frame = (int *)mymalloc(sizeof(int) * rlen);
00281 s->align.end_frame = (int *)mymalloc(sizeof(int) * rlen);
00282 s->align.avgscore = (LOGPROB *)mymalloc(sizeof(LOGPROB) * rlen);
00283 for(i=0;i<rlen;i++) {
00284 s->align.begin_frame[i] = (i == 0) ? 0 : end_frame[i-1] + 1;
00285 s->align.end_frame[i] = end_frame[i];
00286 s->align.avgscore[i] = end_score[i];
00287 }
00288 switch(per_what) {
00289 case PER_WORD:
00290 s->align.w = (WORD_ID *)mymalloc(sizeof(WORD_ID) * rlen);
00291 for(i=0;i<rlen;i++) {
00292 s->align.w[i] = words[id_seq[i]];
00293 }
00294 break;
00295 case PER_PHONEME:
00296 s->align.ph = (HMM_Logical **)mymalloc(sizeof(HMM_Logical *) * rlen);
00297 for(i=0;i<rlen;i++) {
00298 s->align.ph[i] = phones[id_seq[i]];
00299 }
00300 break;
00301 case PER_STATE:
00302 s->align.ph = (HMM_Logical **)mymalloc(sizeof(HMM_Logical *) * rlen);
00303 s->align.loc = (short *)mymalloc(sizeof(short) * rlen);
00304 if (hmminfo->multipath) s->align.is_iwsp = (boolean *)mymalloc(sizeof(boolean) * rlen);
00305 for(i=0;i<rlen;i++) {
00306 s->align.ph[i] = phones[phloc[id_seq[i]]];
00307 if (hmminfo->multipath) {
00308 if (enable_iwsp && stloc[id_seq[i]] > end_num) {
00309 s->align.loc[i] = stloc[id_seq[i]] - end_num;
00310 s->align.is_iwsp[i] = TRUE;
00311 } else {
00312 s->align.loc[i] = stloc[id_seq[i]];
00313 s->align.is_iwsp[i] = FALSE;
00314 }
00315 } else {
00316 s->align.loc[i] = stloc[id_seq[i]];
00317 }
00318 }
00319 break;
00320 }
00321
00322 s->align.allscore = allscore;
00323
00324 s->align.filled = TRUE;
00325
00326 free_hmm(shmm);
00327 free(id_seq);
00328 free(phones);
00329 if (hmminfo->multipath) free(has_sp);
00330 free(end_score);
00331 free(end_frame);
00332 free(end_state);
00333
00334 switch(per_what) {
00335 case PER_WORD:
00336 free(phloc);
00337 break;
00338 case PER_PHONEME:
00339 break;
00340 case PER_STATE:
00341 free(phloc);
00342 free(stloc);
00343 }
00344
00345 }
00346
00369 void
00370 word_align(WORD_ID *words, short wnum, HTK_Param *param, Sentence *s, RecogProcess *r)
00371 {
00372 do_align(words, wnum, param, PER_WORD, s, r);
00373 }
00374
00397 void
00398 word_rev_align(WORD_ID *revwords, short wnum, HTK_Param *param, Sentence *s, RecogProcess *r)
00399 {
00400 WORD_ID *words;
00401 int w;
00402 words = (WORD_ID *)mymalloc(sizeof(WORD_ID) * wnum);
00403 for (w=0;w<wnum;w++) words[w] = revwords[wnum-w-1];
00404 do_align(words, wnum, param, PER_WORD, s, r);
00405 free(words);
00406 }
00407
00430 void
00431 phoneme_align(WORD_ID *words, short num, HTK_Param *param, Sentence *s, RecogProcess *r)
00432 {
00433 do_align(words, num, param, PER_PHONEME, s, r);
00434 }
00435
00458 void
00459 phoneme_rev_align(WORD_ID *revwords, short num, HTK_Param *param, Sentence *s, RecogProcess *r)
00460 {
00461 WORD_ID *words;
00462 int p;
00463 words = (WORD_ID *)mymalloc(sizeof(WORD_ID) * num);
00464 for (p=0;p<num;p++) words[p] = revwords[num-p-1];
00465 do_align(words, num, param, PER_PHONEME, s, r);
00466 free(words);
00467 }
00468
00491 void
00492 state_align(WORD_ID *words, short num, HTK_Param *param, Sentence *s, RecogProcess *r)
00493 {
00494 do_align(words, num, param, PER_STATE, s, r);
00495 }
00496
00519 void
00520 state_rev_align(WORD_ID *revwords, short num, HTK_Param *param, Sentence *s, RecogProcess *r)
00521 {
00522 WORD_ID *words;
00523 int p;
00524 words = (WORD_ID *)mymalloc(sizeof(WORD_ID) * num);
00525 for (p=0;p<num;p++) words[p] = revwords[num-p-1];
00526 do_align(words, num, param, PER_STATE, s, r);
00527 free(words);
00528 }
00529
00530