00001
00028
00029
00030
00031
00032
00033
00034 #include <sent/stddefs.h>
00035 #include <sent/htk_param.h>
00036 #include <sent/htk_hmm.h>
00037
00038 #undef DMES
00039
00040 static boolean gzfile;
00041
00042 #define rdn(A,B,C,D) if (rdnfunc(A,B,C,D) == FALSE) return FALSE
00043 #define rdn_str(A,B,C) if ((C = rdn_strfunc(A,B)) == NULL) return FALSE
00044
00053 static boolean
00054 rdnfunc(FILE *fp, void *buf, size_t unitbyte, int unitnum)
00055 {
00056 size_t tmp;
00057 if (gzfile) {
00058 tmp = myfread(buf, unitbyte, unitnum, fp);
00059 } else {
00060 tmp = fread(buf, unitbyte, unitnum, fp);
00061 }
00062 if (tmp < (size_t)unitnum) {
00063 jlog("Error: read_binhmm: failed to read %d bytes\n", unitbyte * unitnum);
00064 return FALSE;
00065 }
00066 #ifndef WORDS_BIGENDIAN
00067 if (unitbyte != 1) {
00068 swap_bytes(buf, unitbyte, unitnum);
00069 }
00070 #endif
00071 return TRUE;
00072 }
00073
00074 static char buf[MAXLINELEN];
00075 static char nostr = '\0';
00084 static char *
00085 rdn_strfunc(FILE *fp, HTK_HMM_INFO *hmm)
00086 {
00087 int c;
00088 int len;
00089 char *p;
00090
00091 len = 0;
00092 while ((c = gzfile ? myfgetc(fp) : fgetc(fp)) != -1) {
00093 if (len >= MAXLINELEN) {
00094 jlog("Error: read_binhmm: string len exceeded %d bytes\n", MAXLINELEN);
00095 jlog("Error: read_binhmm: please check the value of MAXLINELEN\n");
00096 return NULL;
00097 }
00098 buf[len++] = c;
00099 if (c == '\0') break;
00100 }
00101 if (len == 0) return NULL;
00102 if (len == 1) {
00103 p = &nostr;
00104 } else {
00105 p = (char *)mybmalloc2(len, &(hmm->mroot));
00106 strcpy(p, buf);
00107 }
00108 return(p);
00109 }
00110
00111
00112 static char *binhmm_header = BINHMM_HEADER;
00113 static char *binhmm_header_v2 = BINHMM_HEADER_V2;
00114
00121 static boolean
00122 rd_para(FILE *fp, Value *para)
00123 {
00124 short version;
00125 float dummy;
00126
00127
00128 rdn(fp, &version, sizeof(short), 1);
00129
00130 if (version > VALUE_VERSION) {
00131 jlog("Error: read_binhmm: unknown embedded parameter format version: %d\n", version);
00132 return FALSE;
00133 }
00134
00135
00136 rdn(fp, &(para->smp_period), sizeof(long), 1);
00137 rdn(fp, &(para->smp_freq), sizeof(long), 1);
00138 rdn(fp, &(para->framesize), sizeof(int), 1);
00139 rdn(fp, &(para->frameshift), sizeof(int), 1);
00140 rdn(fp, &(para->preEmph), sizeof(float), 1);
00141 rdn(fp, &(para->lifter), sizeof(int), 1);
00142 rdn(fp, &(para->fbank_num), sizeof(int), 1);
00143 rdn(fp, &(para->delWin), sizeof(int), 1);
00144 rdn(fp, &(para->accWin), sizeof(int), 1);
00145 rdn(fp, &(para->silFloor), sizeof(float), 1);
00146 rdn(fp, &(para->escale), sizeof(float), 1);
00147 rdn(fp, &(para->hipass), sizeof(int), 1);
00148 rdn(fp, &(para->lopass), sizeof(int), 1);
00149 rdn(fp, &(para->enormal), sizeof(int), 1);
00150 rdn(fp, &(para->raw_e), sizeof(int), 1);
00151 if (version == 1) {
00152
00153
00154 rdn(fp, &dummy, sizeof(float), 1);
00155 rdn(fp, &dummy, sizeof(float), 1);
00156 }
00157 rdn(fp, &(para->zmeanframe), sizeof(int), 1);
00158
00159 return(TRUE);
00160 }
00161
00172 static boolean
00173 rd_header(FILE *fp, HTK_HMM_INFO *hmm, Value *para)
00174 {
00175 char *p, *q;
00176 boolean emp, inv;
00177
00178 rdn_str(fp, hmm, p);
00179 if (strmatch(p, binhmm_header)) {
00180
00181 hmm->variance_inversed = FALSE;
00182 } else if (strmatch(p, binhmm_header_v2)) {
00183
00184 emp = inv = FALSE;
00185 rdn_str(fp, hmm, q);
00186 if (*q != '\0') {
00187 while(*q == '_') {
00188 q++;
00189 switch (*q) {
00190 case BINHMM_HEADER_V2_EMBEDPARA:
00191
00192 emp = TRUE;
00193 break;
00194 case BINHMM_HEADER_V2_VARINV:
00195 inv = TRUE;
00196 break;
00197 }
00198 q++;
00199 }
00200 }
00201 if (emp) {
00202 para->loaded = 1;
00203 if (rd_para(fp, para) == FALSE) {
00204 jlog("Error: read_binhmm: failed to read embeded parameter\n");
00205 return FALSE;
00206 }
00207 jlog("Stat: read_binhmm: has acoutic analysis configurations in its header\n");
00208 }
00209 if (inv) {
00210 hmm->variance_inversed = TRUE;
00211 jlog("Stat: read_binhmm: has inversed variances\n");
00212 } else {
00213 hmm->variance_inversed = FALSE;
00214 }
00215 } else {
00216
00217 return FALSE;
00218 }
00219 return TRUE;
00220 }
00221
00222
00223
00231 static boolean
00232 rd_opt(FILE *fp, HTK_HMM_Options *opt)
00233 {
00234 rdn(fp, &(opt->stream_info.num), sizeof(short), 1);
00235 rdn(fp, opt->stream_info.vsize, sizeof(short), 50);
00236 rdn(fp, &(opt->vec_size), sizeof(short), 1);
00237 rdn(fp, &(opt->cov_type), sizeof(short), 1);
00238 rdn(fp, &(opt->dur_type), sizeof(short), 1);
00239 rdn(fp, &(opt->param_type), sizeof(short), 1);
00240
00241 return(TRUE);
00242 }
00243
00250 static boolean
00251 rd_type(FILE *fp, HTK_HMM_INFO *hmm)
00252 {
00253 rdn(fp, &(hmm->is_tied_mixture), sizeof(boolean), 1);
00254 rdn(fp, &(hmm->maxmixturenum), sizeof(int), 1);
00255 return TRUE;
00256 }
00257
00258
00259
00260 static HTK_HMM_Trans **tr_index;
00261 static unsigned int tr_num;
00262
00273 static boolean
00274 rd_trans(FILE *fp, HTK_HMM_INFO *hmm)
00275 {
00276 HTK_HMM_Trans *t;
00277 unsigned int idx;
00278 int i;
00279 PROB *atmp;
00280 char *p;
00281
00282 rdn(fp, &tr_num, sizeof(unsigned int), 1);
00283 tr_index = (HTK_HMM_Trans **)mymalloc(sizeof(HTK_HMM_Trans *) * tr_num);
00284
00285 hmm->trstart = NULL;
00286 hmm->tr_root = NULL;
00287 for (idx = 0; idx < tr_num; idx++) {
00288 t = (HTK_HMM_Trans *)mybmalloc2(sizeof(HTK_HMM_Trans), &(hmm->mroot));
00289 rdn_str(fp, hmm, p);
00290 t->name = (*p == '\0') ? NULL : p;
00291 rdn(fp, &(t->statenum), sizeof(short), 1);
00292 t->a = (PROB **)mybmalloc2(sizeof(PROB *) * t->statenum, &(hmm->mroot));
00293 atmp = (PROB *)mybmalloc2(sizeof(PROB) * t->statenum * t->statenum, &(hmm->mroot));
00294 for (i=0;i<t->statenum;i++) {
00295 t->a[i] = &(atmp[i*t->statenum]);
00296 rdn(fp, t->a[i], sizeof(PROB), t->statenum);
00297 }
00298 trans_add(hmm, t);
00299 tr_index[idx] = t;
00300 }
00301
00302 #ifdef DMES
00303 jlog("Stat: read_binhmm: %d transition maxtix read\n", tr_num);
00304 #endif
00305 return TRUE;
00306 }
00307
00308
00309 static HTK_HMM_Var **vr_index;
00310 static unsigned int vr_num;
00311
00322 static boolean
00323 rd_var(FILE *fp, HTK_HMM_INFO *hmm)
00324 {
00325 HTK_HMM_Var *v;
00326 unsigned int idx;
00327 char *p;
00328
00329 rdn(fp, &vr_num, sizeof(unsigned int), 1);
00330 vr_index = (HTK_HMM_Var **)mymalloc(sizeof(HTK_HMM_Var *) * vr_num);
00331
00332 hmm->vrstart = NULL;
00333 hmm->vr_root = NULL;
00334 for (idx = 0; idx < vr_num; idx++) {
00335 v = (HTK_HMM_Var *)mybmalloc2(sizeof(HTK_HMM_Var), &(hmm->mroot));
00336 rdn_str(fp, hmm, p);
00337 v->name = (*p == '\0') ? NULL : p;
00338 rdn(fp, &(v->len), sizeof(short), 1);
00339 v->vec = (VECT *)mybmalloc2(sizeof(VECT) * v->len, &(hmm->mroot));
00340 rdn(fp, v->vec, sizeof(VECT), v->len);
00341 vr_index[idx] = v;
00342 var_add(hmm, v);
00343 }
00344 #ifdef DMES
00345 jlog("Stat: read_binhmm: %d variance read\n", vr_num);
00346 #endif
00347 return TRUE;
00348 }
00349
00350
00351
00352 static HTK_HMM_Dens **dens_index;
00353 static unsigned int dens_num;
00354
00366 static boolean
00367 rd_dens(FILE *fp, HTK_HMM_INFO *hmm)
00368 {
00369 HTK_HMM_Dens *d;
00370 unsigned int idx;
00371 unsigned int vid;
00372 char *p;
00373
00374 rdn(fp, &dens_num, sizeof(unsigned int), 1);
00375 hmm->totalmixnum = dens_num;
00376 dens_index = (HTK_HMM_Dens **)mymalloc(sizeof(HTK_HMM_Dens *) * dens_num);
00377
00378 hmm->dnstart = NULL;
00379 hmm->dn_root = NULL;
00380 for (idx = 0; idx < dens_num; idx++) {
00381 d = (HTK_HMM_Dens *)mybmalloc2(sizeof(HTK_HMM_Dens), &(hmm->mroot));
00382 rdn_str(fp, hmm, p);
00383 d->name = (*p == '\0') ? NULL : p;
00384 rdn(fp, &(d->meanlen), sizeof(short), 1);
00385 d->mean = (VECT *)mybmalloc2(sizeof(VECT) * d->meanlen, &(hmm->mroot));
00386 rdn(fp, d->mean, sizeof(VECT), d->meanlen);
00387 rdn(fp, &vid, sizeof(unsigned int), 1);
00388 d->var = vr_index[vid];
00389 rdn(fp, &(d->gconst), sizeof(LOGPROB), 1);
00390 dens_index[idx] = d;
00391 dens_add(hmm, d);
00392 }
00393 #ifdef DMES
00394 jlog("Stat: read_binhmm: %d gaussian densities read\n", dens_num);
00395 #endif
00396 return TRUE;
00397 }
00398
00399
00400
00401 static GCODEBOOK **tm_index;
00402 static unsigned int tm_num;
00403
00415 static boolean
00416 rd_tmix(FILE *fp, HTK_HMM_INFO *hmm)
00417 {
00418 GCODEBOOK *tm;
00419 unsigned int idx;
00420 unsigned int did;
00421 int i;
00422 char *p;
00423
00424 rdn(fp, &tm_num, sizeof(unsigned int), 1);
00425 hmm->codebooknum = tm_num;
00426 tm_index = (GCODEBOOK **)mymalloc(sizeof(GCODEBOOK *) * tm_num);
00427 hmm->maxcodebooksize = 0;
00428
00429 hmm->codebook_root = NULL;
00430 for (idx = 0; idx < tm_num; idx++) {
00431 tm = (GCODEBOOK *)mybmalloc2(sizeof(GCODEBOOK), &(hmm->mroot));
00432 rdn_str(fp, hmm, p);
00433 tm->name = (*p == '\0') ? NULL : p;
00434 rdn(fp, &(tm->num), sizeof(int), 1);
00435 if (hmm->maxcodebooksize < tm->num) hmm->maxcodebooksize = tm->num;
00436 tm->d = (HTK_HMM_Dens **)mybmalloc2(sizeof(HTK_HMM_Dens *) * tm->num, &(hmm->mroot));
00437 for(i=0;i<tm->num;i++) {
00438 rdn(fp, &did, sizeof(unsigned int), 1);
00439 if (did >= dens_num) {
00440 tm->d[i] = NULL;
00441 } else {
00442 tm->d[i] = dens_index[did];
00443 }
00444 }
00445 tm->id = idx;
00446 tm_index[idx] = tm;
00447 codebook_add(hmm, tm);
00448 }
00449 #ifdef DMES
00450 jlog("Stat: read_binhmm: %d tied-mixture codebooks read\n", tm_num);
00451 #endif
00452 return TRUE;
00453 }
00454
00455
00456 static HTK_HMM_State **st_index;
00457 static unsigned int st_num;
00458
00471 static boolean
00472 rd_state(FILE *fp, HTK_HMM_INFO *hmm)
00473 {
00474 HTK_HMM_State *s;
00475 unsigned int idx;
00476 unsigned int did;
00477 int i;
00478 char *p;
00479
00480 rdn(fp, &st_num, sizeof(unsigned int), 1);
00481 hmm->totalstatenum = st_num;
00482 st_index = (HTK_HMM_State **)mymalloc(sizeof(HTK_HMM_State *) * st_num);
00483
00484 hmm->ststart = NULL;
00485 hmm->st_root = NULL;
00486 for (idx = 0; idx < st_num; idx++) {
00487 s = (HTK_HMM_State *)mybmalloc2(sizeof(HTK_HMM_State), &(hmm->mroot));
00488 rdn_str(fp, hmm, p);
00489 s->name = (*p == '\0') ? NULL : p;
00490 rdn(fp, &(s->mix_num), sizeof(short), 1);
00491 if (s->mix_num == -1) {
00492
00493 rdn(fp, &did, sizeof(unsigned int), 1);
00494 s->b = (HTK_HMM_Dens **)tm_index[did];
00495 s->mix_num = (tm_index[did])->num;
00496 } else {
00497
00498 s->b = (HTK_HMM_Dens **)mybmalloc2(sizeof(HTK_HMM_Dens *) * s->mix_num, &(hmm->mroot));
00499 for (i=0;i<s->mix_num;i++) {
00500 rdn(fp, &did, sizeof(unsigned int), 1);
00501 if (did >= dens_num) {
00502 s->b[i] = NULL;
00503 } else {
00504 s->b[i] = dens_index[did];
00505 }
00506 }
00507 }
00508 s->bweight = (PROB *)mybmalloc2(sizeof(PROB) * s->mix_num, &(hmm->mroot));
00509 rdn(fp, s->bweight, sizeof(PROB), s->mix_num);
00510 s->id = idx;
00511 st_index[idx] = s;
00512 state_add(hmm, s);
00513 }
00514 #ifdef DMES
00515 jlog("Stat: read_binhmm: %d states read\n", st_num);
00516 #endif
00517 return TRUE;
00518 }
00519
00531 static boolean
00532 rd_data(FILE *fp, HTK_HMM_INFO *hmm)
00533 {
00534 HTK_HMM_Data *d;
00535 unsigned int md_num;
00536 unsigned int sid, tid;
00537 unsigned int idx;
00538 int i;
00539 char *p;
00540
00541 rdn(fp, &(md_num), sizeof(unsigned int), 1);
00542 hmm->totalhmmnum = md_num;
00543
00544 hmm->start = NULL;
00545 hmm->physical_root = NULL;
00546 for (idx = 0; idx < md_num; idx++) {
00547 d = (HTK_HMM_Data *)mybmalloc2(sizeof(HTK_HMM_Data), &(hmm->mroot));
00548 rdn_str(fp, hmm, p);
00549 d->name = (*p == '\0') ? NULL : p;
00550 rdn(fp, &(d->state_num), sizeof(short), 1);
00551 d->s = (HTK_HMM_State **)mybmalloc2(sizeof(HTK_HMM_State *) * d->state_num, &(hmm->mroot));
00552 for (i=0;i<d->state_num;i++) {
00553 rdn(fp, &sid, sizeof(unsigned int), 1);
00554 if (sid > (unsigned int)hmm->totalstatenum) {
00555 d->s[i] = NULL;
00556 } else {
00557 d->s[i] = st_index[sid];
00558 }
00559 }
00560 rdn(fp, &tid, sizeof(unsigned int), 1);
00561 d->tr = tr_index[tid];
00562 htk_hmmdata_add(hmm, d);
00563 }
00564 #ifdef DMES
00565 jlog("Stat: read_binhmm: %d HMM model definition read\n", md_num);
00566 #endif
00567 return TRUE;
00568 }
00569
00570
00571
00582 boolean
00583 read_binhmm(FILE *fp, HTK_HMM_INFO *hmm, boolean gzfile_p, Value *para)
00584 {
00585
00586 gzfile = gzfile_p;
00587
00588
00589 if (rd_header(fp, hmm, para) == FALSE) {
00590 return FALSE;
00591 }
00592
00593 jlog("Stat: read_binhmm: binary format HMM definition\n");
00594
00595
00596 if (rd_opt(fp, &(hmm->opt)) == FALSE) {
00597 jlog("Error: read_binhmm: failed to read HMM options\n");
00598 return FALSE;
00599 }
00600
00601
00602 if (rd_type(fp, hmm) == FALSE) {
00603 jlog("Error: read_binhmm: failed to read HMM type of mixture tying\n");
00604 return FALSE;
00605 }
00606
00607
00608 if (rd_trans(fp, hmm) == FALSE) {
00609 jlog("Error: read_binhmm: failed to read HMM transition data\n");
00610 return FALSE;
00611 }
00612
00613
00614 if (rd_var(fp, hmm) == FALSE) {
00615 jlog("Error: read_binhmm: failed to read HMM variance data\n");
00616 return FALSE;
00617 }
00618
00619
00620 if (rd_dens(fp, hmm) == FALSE) {
00621 jlog("Error: read_binhmm: failed to read HMM density data\n");
00622 return FALSE;
00623 }
00624
00625
00626 if (hmm->is_tied_mixture) {
00627 if (rd_tmix(fp, hmm) == FALSE) {
00628 jlog("Error: read_binhmm: failed to read HMM tied-mixture codebook data\n");
00629 return FALSE;
00630 }
00631 }
00632
00633
00634 if (rd_state(fp, hmm) == FALSE) {
00635 jlog("Error: read_binhmm: failed to read HMM state data\n");
00636 return FALSE;
00637 }
00638
00639
00640 if (rd_data(fp, hmm) == FALSE) {
00641 jlog("Error: read_binhmm: failed to read HMM data\n");
00642 return FALSE;
00643 }
00644
00645
00646 free(tr_index);
00647 free(vr_index);
00648 free(dens_index);
00649 if (hmm->is_tied_mixture) free(tm_index);
00650 free(st_index);
00651
00652
00653 {
00654 HTK_HMM_Data *dtmp;
00655 int maxlen = 0;
00656 for (dtmp = hmm->start; dtmp; dtmp = dtmp->next) {
00657 if (maxlen < dtmp->state_num) maxlen = dtmp->state_num;
00658 }
00659 hmm->maxstatenum = maxlen;
00660 }
00661
00662
00663 hmm->need_multipath = htk_hmm_has_several_arc_on_edge(hmm);
00664 if (hmm->need_multipath) {
00665 jlog("Stat: read_binhmm: this HMM requires multipath handling at decoding\n");
00666 } else {
00667 jlog("Stat: read_binhmm: this HMM does not need multipath handling\n");
00668 }
00669
00670 if (! hmm->variance_inversed) {
00671
00672 htk_hmm_inverse_variances(hmm);
00673 hmm->variance_inversed = TRUE;
00674 }
00675
00676 return (TRUE);
00677 }