00001
00039
00040
00041
00042
00043
00044
00045
00046 #include <julius/julius.h>
00047
00077 static HMM_Logical **
00078 make_phseq(WORD_ID *wseq, short num, boolean **has_sp_ret, int *num_ret, int **end_ret, int per_what,
00079 RecogProcess *r)
00080 {
00081 HMM_Logical **ph;
00082 boolean *has_sp;
00083 int k;
00084 int phnum;
00085 WORD_ID tmpw, w;
00086 int i, j, pn, st, endn;
00087 HMM_Logical *tmpp, *ret;
00088 WORD_INFO *winfo;
00089 HTK_HMM_INFO *hmminfo;
00090 boolean enable_iwsp;
00091
00092 winfo = r->lm->winfo;
00093 hmminfo = r->am->hmminfo;
00094 if (hmminfo->multipath) enable_iwsp = r->lm->config->enable_iwsp;
00095
00096
00097
00098 phnum = 0;
00099 for (w=0;w<num;w++) phnum += winfo->wlen[wseq[w]];
00100 ph = (HMM_Logical **)mymalloc(sizeof(HMM_Logical *) * phnum);
00101
00102 if (hmminfo->multipath && enable_iwsp) {
00103 has_sp = (boolean *)mymalloc(sizeof(boolean) * phnum);
00104 } else {
00105 has_sp = NULL;
00106 }
00107
00108 st = 0;
00109 if (hmminfo->multipath) st++;
00110 pn = 0;
00111 endn = 0;
00112 for (w=0;w<num;w++) {
00113 tmpw = wseq[w];
00114 for (i=0;i<winfo->wlen[tmpw];i++) {
00115 tmpp = winfo->wseq[tmpw][i];
00116
00117 if (r->ccd_flag) {
00118 if (w > 0 && i == 0) {
00119
00120 if ((ret = get_left_context_HMM(tmpp, ph[pn-1]->name, hmminfo)) != NULL) {
00121 tmpp = ret;
00122 }
00123
00124
00125 }
00126 if (w < num-1 && i == winfo->wlen[tmpw] - 1) {
00127 if ((ret = get_right_context_HMM(tmpp, winfo->wseq[wseq[w+1]][0]->name, hmminfo)) != NULL) {
00128 tmpp = ret;
00129 }
00130 }
00131 }
00132 ph[pn] = tmpp;
00133 if (hmminfo->multipath && enable_iwsp) {
00134 if (i == winfo->wlen[tmpw] - 1) {
00135 has_sp[pn] = TRUE;
00136 } else {
00137 has_sp[pn] = FALSE;
00138 }
00139 }
00140 if (per_what == PER_STATE) {
00141 for (j=0;j<hmm_logical_state_num(tmpp)-2;j++) {
00142 (*end_ret)[endn++] = st + j;
00143 }
00144 if (hmminfo->multipath && enable_iwsp && has_sp[pn]) {
00145 for (k=0;k<hmm_logical_state_num(hmminfo->sp)-2;k++) {
00146 (*end_ret)[endn++] = st + j + k;
00147 }
00148 }
00149 }
00150 st += hmm_logical_state_num(tmpp) - 2;
00151 if (hmminfo->multipath && enable_iwsp && has_sp[pn]) {
00152 st += hmm_logical_state_num(hmminfo->sp) - 2;
00153 }
00154 if (per_what == PER_PHONEME) (*end_ret)[endn++] = st - 1;
00155 pn++;
00156 }
00157 if (per_what == PER_WORD) (*end_ret)[endn++] = st - 1;
00158 }
00159 *num_ret = phnum;
00160 *has_sp_ret = has_sp;
00161 return ph;
00162 }
00163
00164
00187 static void
00188 do_align(WORD_ID *words, short wnum, HTK_Param *param, int per_what, SentenceAlign *align, RecogProcess *r)
00189 {
00190 HMM_Logical **phones;
00191 boolean *has_sp;
00192 int k;
00193 int phonenum;
00194 HMM *shmm;
00195 int *end_state;
00196 int *end_frame;
00197 LOGPROB *end_score;
00198 LOGPROB allscore;
00199 WORD_ID w;
00200 int i, rlen;
00201 int end_num = 0;
00202 int *id_seq, *phloc = NULL, *stloc = NULL;
00203 int j,n,p;
00204 WORD_INFO *winfo;
00205 HTK_HMM_INFO *hmminfo;
00206 boolean enable_iwsp;
00207
00208 winfo = r->lm->winfo;
00209 hmminfo = r->am->hmminfo;
00210 if (hmminfo->multipath) enable_iwsp = r->lm->config->enable_iwsp;
00211
00212
00213 switch(per_what) {
00214 case PER_WORD:
00215 jlog("ALIGN: === word alignment begin ===\n");
00216 end_num = wnum;
00217 phloc = (int *)mymalloc(sizeof(int)*wnum);
00218 i = 0;
00219 for(w=0;w<wnum;w++) {
00220 phloc[w] = i;
00221 i += winfo->wlen[words[w]];
00222 }
00223 break;
00224 case PER_PHONEME:
00225 jlog("ALIGN: === phoneme alignment begin ===\n");
00226 end_num = 0;
00227 for(w=0;w<wnum;w++) end_num += winfo->wlen[words[w]];
00228 break;
00229 case PER_STATE:
00230 jlog("ALIGN: === state alignment begin ===\n");
00231 end_num = 0;
00232 for(w=0;w<wnum;w++) {
00233 for (i=0;i<winfo->wlen[words[w]]; i++) {
00234 end_num += hmm_logical_state_num(winfo->wseq[words[w]][i]) - 2;
00235 }
00236 if (hmminfo->multipath && enable_iwsp) {
00237 end_num += hmm_logical_state_num(hmminfo->sp) - 2;
00238 }
00239 }
00240 phloc = (int *)mymalloc(sizeof(int)*end_num);
00241 stloc = (int *)mymalloc(sizeof(int)*end_num);
00242 {
00243 n = 0;
00244 p = 0;
00245 for(w=0;w<wnum;w++) {
00246 for(i=0;i<winfo->wlen[words[w]]; i++) {
00247 for(j=0; j<hmm_logical_state_num(winfo->wseq[words[w]][i]) - 2; j++) {
00248 phloc[n] = p;
00249 stloc[n] = j + 1;
00250 n++;
00251 }
00252 if (hmminfo->multipath && enable_iwsp && i == winfo->wlen[words[w]] - 1) {
00253 for(k=0;k<hmm_logical_state_num(hmminfo->sp)-2;k++) {
00254 phloc[n] = p;
00255 stloc[n] = j + 1 + k + end_num;
00256 n++;
00257 }
00258 }
00259 p++;
00260 }
00261 }
00262 }
00263
00264 break;
00265 }
00266 end_state = (int *)mymalloc(sizeof(int) * end_num);
00267
00268
00269 phones = make_phseq(words, wnum, &has_sp, &phonenum, &end_state, per_what, r);
00270
00271 shmm = new_make_word_hmm(hmminfo, phones, phonenum, has_sp);
00272 if (shmm == NULL) {
00273 j_internal_error("Error: failed to make word hmm for alignment\n");
00274 }
00275
00276
00277 allscore = viterbi_segment(shmm, param, r->wchmm->hmmwrk, hmminfo->multipath, end_state, end_num, &id_seq, &end_frame, &end_score, &rlen);
00278
00279
00280 align->num = rlen;
00281 align->unittype = per_what;
00282 align->begin_frame = (int *)mymalloc(sizeof(int) * rlen);
00283 align->end_frame = (int *)mymalloc(sizeof(int) * rlen);
00284 align->avgscore = (LOGPROB *)mymalloc(sizeof(LOGPROB) * rlen);
00285 for(i=0;i<rlen;i++) {
00286 align->begin_frame[i] = (i == 0) ? 0 : end_frame[i-1] + 1;
00287 align->end_frame[i] = end_frame[i];
00288 align->avgscore[i] = end_score[i];
00289 }
00290 switch(per_what) {
00291 case PER_WORD:
00292 align->w = (WORD_ID *)mymalloc(sizeof(WORD_ID) * rlen);
00293 for(i=0;i<rlen;i++) {
00294 align->w[i] = words[id_seq[i]];
00295 }
00296 break;
00297 case PER_PHONEME:
00298 align->ph = (HMM_Logical **)mymalloc(sizeof(HMM_Logical *) * rlen);
00299 for(i=0;i<rlen;i++) {
00300 align->ph[i] = phones[id_seq[i]];
00301 }
00302 break;
00303 case PER_STATE:
00304 align->ph = (HMM_Logical **)mymalloc(sizeof(HMM_Logical *) * rlen);
00305 align->loc = (short *)mymalloc(sizeof(short) * rlen);
00306 if (hmminfo->multipath) align->is_iwsp = (boolean *)mymalloc(sizeof(boolean) * rlen);
00307 for(i=0;i<rlen;i++) {
00308 align->ph[i] = phones[phloc[id_seq[i]]];
00309 if (hmminfo->multipath) {
00310 if (enable_iwsp && stloc[id_seq[i]] > end_num) {
00311 align->loc[i] = stloc[id_seq[i]] - end_num;
00312 align->is_iwsp[i] = TRUE;
00313 } else {
00314 align->loc[i] = stloc[id_seq[i]];
00315 align->is_iwsp[i] = FALSE;
00316 }
00317 } else {
00318 align->loc[i] = stloc[id_seq[i]];
00319 }
00320 }
00321 break;
00322 }
00323
00324 align->allscore = allscore;
00325
00326 free_hmm(shmm);
00327 free(id_seq);
00328 free(phones);
00329 if (has_sp) free(has_sp);
00330 free(end_score);
00331 free(end_frame);
00332 free(end_state);
00333
00334 switch(per_what) {
00335 case PER_WORD:
00336 free(phloc);
00337 break;
00338 case PER_PHONEME:
00339 break;
00340 case PER_STATE:
00341 free(phloc);
00342 free(stloc);
00343 }
00344
00345 }
00346
00369 void
00370 word_align(WORD_ID *words, short wnum, HTK_Param *param, SentenceAlign *align, RecogProcess *r)
00371 {
00372 do_align(words, wnum, param, PER_WORD, align, r);
00373 }
00374
00397 void
00398 word_rev_align(WORD_ID *revwords, short wnum, HTK_Param *param, SentenceAlign *align, RecogProcess *r)
00399 {
00400 WORD_ID *words;
00401 int w;
00402 words = (WORD_ID *)mymalloc(sizeof(WORD_ID) * wnum);
00403 for (w=0;w<wnum;w++) words[w] = revwords[wnum-w-1];
00404 do_align(words, wnum, param, PER_WORD, align, r);
00405 free(words);
00406 }
00407
00430 void
00431 phoneme_align(WORD_ID *words, short num, HTK_Param *param, SentenceAlign *align, RecogProcess *r)
00432 {
00433 do_align(words, num, param, PER_PHONEME, align, r);
00434 }
00435
00458 void
00459 phoneme_rev_align(WORD_ID *revwords, short num, HTK_Param *param, SentenceAlign *align, RecogProcess *r)
00460 {
00461 WORD_ID *words;
00462 int p;
00463 words = (WORD_ID *)mymalloc(sizeof(WORD_ID) * num);
00464 for (p=0;p<num;p++) words[p] = revwords[num-p-1];
00465 do_align(words, num, param, PER_PHONEME, align, r);
00466 free(words);
00467 }
00468
00491 void
00492 state_align(WORD_ID *words, short num, HTK_Param *param, SentenceAlign *align, RecogProcess *r)
00493 {
00494 do_align(words, num, param, PER_STATE, align, r);
00495 }
00496
00519 void
00520 state_rev_align(WORD_ID *revwords, short num, HTK_Param *param, SentenceAlign *align, RecogProcess *r)
00521 {
00522 WORD_ID *words;
00523 int p;
00524 words = (WORD_ID *)mymalloc(sizeof(WORD_ID) * num);
00525 for (p=0;p<num;p++) words[p] = revwords[num-p-1];
00526 do_align(words, num, param, PER_STATE, align, r);
00527 free(words);
00528 }
00529
00546 void
00547 do_alignment_all(RecogProcess *r, HTK_Param *param)
00548 {
00549 int n;
00550 Sentence *s;
00551 SentenceAlign *now, *prev;
00552
00553 for(n = 0; n < r->result.sentnum; n++) {
00554 s = &(r->result.sent[n]);
00555
00556 if (r->config->annotate.align_result_word_flag) {
00557 now = result_align_new();
00558 word_align(s->word, s->word_num, param, now, r);
00559 if (s->align == NULL) s->align = now;
00560 else prev->next = now;
00561 prev = now;
00562 }
00563 if (r->config->annotate.align_result_phoneme_flag) {
00564 now = result_align_new();
00565 phoneme_align(s->word, s->word_num, param, now, r);
00566 if (s->align == NULL) s->align = now;
00567 else prev->next = now;
00568 prev = now;
00569 }
00570 if (r->config->annotate.align_result_state_flag) {
00571 now = result_align_new();
00572 state_align(s->word, s->word_num, param, now, r);
00573 if (s->align == NULL) s->align = now;
00574 else prev->next = now;
00575 prev = now;
00576 }
00577 }
00578 }
00579
00580