libsent/src/hmminfo/read_binhmm.c

説明を見る。
00001 
00027 /*
00028  * Copyright (c) 2003-2005 Shikano Lab., Nara Institute of Science and Technology
00029  * Copyright (c) 2005-2006 Julius project team, Nagoya Institute of Technology
00030  * All rights reserved
00031  */
00032 
00033 #include <sent/stddefs.h>
00034 #include <sent/htk_param.h>
00035 #include <sent/htk_hmm.h>
00036 
00037 #undef DMES                     /* define to enable debug message */
00038 
00039 static boolean gzfile;        
00040 
00049 static void
00050 rdn(FILE *fp, void *buf, size_t unitbyte, int unitnum)
00051 {
00052   size_t tmp;
00053   if (gzfile) {
00054     tmp = myfread(buf, unitbyte, unitnum, fp);
00055   } else {
00056     tmp = fread(buf, unitbyte, unitnum, fp);
00057   }
00058   if (tmp < (size_t)unitnum) {
00059     perror("ngram_read_bin");
00060     j_error("read failed\n");
00061   }
00062 #ifndef WORDS_BIGENDIAN
00063   if (unitbyte != 1) {
00064     swap_bytes(buf, unitbyte, unitnum);
00065   }
00066 #endif
00067 }
00068 
00069 static char buf[MAXLINELEN];    
00070 
00078 static char *
00079 rdn_str(FILE *fp, HTK_HMM_INFO *hmm)
00080 {
00081   int c;
00082   int len;
00083   char *p;
00084 
00085   len = 0;
00086   while ((c = gzfile ? myfgetc(fp) : fgetc(fp)) != -1) {
00087     if (len >= MAXLINELEN) j_error("Error: string len exceeded %d bytes\n", len);
00088     buf[len++] = c;
00089     if (c == '\0') break;
00090   }
00091   if (len == 1) {
00092     p = NULL;
00093   } else {
00094     p = (char *)mybmalloc2(len, &(hmm->mroot));
00095     strcpy(p, buf);
00096   }
00097   return(p);
00098 }
00099 
00100 
00101 
00102 static char *binhmm_header = BINHMM_HEADER; 
00103 static char *binhmm_header_v2 = BINHMM_HEADER_V2; 
00104 
00111 static void
00112 rd_para(FILE *fp, Value *para)
00113 {
00114   short version;
00115 
00116   rdn(fp, &version, sizeof(short), 1);
00117   switch(version) {
00118   case 1:
00119     rdn(fp, &(para->smp_period), sizeof(long), 1);      
00120     rdn(fp, &(para->smp_freq), sizeof(long), 1);        
00121     rdn(fp, &(para->framesize), sizeof(int), 1);        
00122     rdn(fp, &(para->frameshift), sizeof(int), 1);       
00123     rdn(fp, &(para->preEmph), sizeof(float), 1);        
00124     rdn(fp, &(para->lifter), sizeof(int), 1);           
00125     rdn(fp, &(para->fbank_num), sizeof(int), 1);        
00126     rdn(fp, &(para->delWin), sizeof(int), 1);           
00127     rdn(fp, &(para->accWin), sizeof(int), 1);           
00128     rdn(fp, &(para->silFloor), sizeof(float), 1);       
00129     rdn(fp, &(para->escale), sizeof(float), 1);         
00130     rdn(fp, &(para->hipass), sizeof(int), 1);           
00131     rdn(fp, &(para->lopass), sizeof(int), 1);           
00132     rdn(fp, &(para->enormal), sizeof(int), 1);          
00133     rdn(fp, &(para->raw_e), sizeof(int), 1);            
00134     rdn(fp, &(para->ss_alpha), sizeof(float), 1);       
00135     rdn(fp, &(para->ss_floor), sizeof(float), 1);       
00136     rdn(fp, &(para->zmeanframe), sizeof(int), 1);       
00137     break;
00138   default:
00139     j_error("Error: read_binhmm: unknown embedded parameter format version: %d\n", version);
00140   }
00141 }
00142 
00153 static boolean
00154 rd_header(FILE *fp, HTK_HMM_INFO *hmm, Value *para)
00155 {
00156   char *p, *q;
00157   boolean emp, inv;
00158   
00159   p = rdn_str(fp, hmm);
00160   if (strmatch(p, binhmm_header)) {
00161     /* version 1 */
00162     hmm->variance_inversed = FALSE;
00163   } else if (strmatch(p, binhmm_header_v2)) {
00164     /* version 2 */
00165     emp = inv = FALSE;
00166     q = rdn_str(fp, hmm);
00167     if (q != NULL) {
00168       while(*q == '_') {
00169         q++;
00170         switch (*q) {
00171         case BINHMM_HEADER_V2_EMBEDPARA:
00172           /* read in embedded acoutic condition parameters */
00173           emp = TRUE;
00174           break;
00175         case BINHMM_HEADER_V2_VARINV:
00176           inv = TRUE;
00177           break;
00178         }
00179         q++;
00180       }
00181     }
00182     if (emp) {
00183       para->loaded = 1;
00184       rd_para(fp, para);
00185       j_printerr("(acoutic analysis conf embedded)...");
00186     }
00187     if (inv) {
00188       hmm->variance_inversed = TRUE;
00189       j_printerr("(varinv)...");
00190     } else {
00191       hmm->variance_inversed = FALSE;
00192     }
00193   } else {
00194     /* failed to read header */
00195     return FALSE;
00196   }
00197   return TRUE;
00198 }
00199 
00200 
00201 
00209 static void
00210 rd_opt(FILE *fp, HTK_HMM_Options *opt)
00211 {
00212   rdn(fp, &(opt->stream_info.num), sizeof(short), 1);
00213   rdn(fp, opt->stream_info.vsize, sizeof(short), 50);
00214   rdn(fp, &(opt->vec_size), sizeof(short), 1);
00215   rdn(fp, &(opt->cov_type), sizeof(short), 1);
00216   rdn(fp, &(opt->dur_type), sizeof(short), 1);
00217   rdn(fp, &(opt->param_type), sizeof(short), 1);
00218 }
00219 
00226 static void
00227 rd_type(FILE *fp, HTK_HMM_INFO *hmm)
00228 {
00229   rdn(fp, &(hmm->is_tied_mixture), sizeof(boolean), 1);
00230   rdn(fp, &(hmm->maxmixturenum), sizeof(int), 1);
00231 }
00232 
00233 
00234 /* read transition data */
00235 static HTK_HMM_Trans **tr_index; 
00236 static unsigned int tr_num;     
00237 
00248 static void
00249 rd_trans(FILE *fp, HTK_HMM_INFO *hmm)
00250 {
00251   HTK_HMM_Trans *t;
00252   unsigned int idx;
00253   int i;
00254   PROB *atmp;
00255 
00256   rdn(fp, &tr_num, sizeof(unsigned int), 1);
00257   tr_index = (HTK_HMM_Trans **)mymalloc(sizeof(HTK_HMM_Trans *) * tr_num);
00258 
00259   hmm->trstart = NULL;
00260   hmm->tr_root = NULL;
00261   for (idx = 0; idx < tr_num; idx++) {
00262     t = (HTK_HMM_Trans *)mybmalloc2(sizeof(HTK_HMM_Trans), &(hmm->mroot));
00263     t->name = rdn_str(fp, hmm);
00264     rdn(fp, &(t->statenum), sizeof(short), 1);
00265     t->a = (PROB **)mybmalloc2(sizeof(PROB *) * t->statenum, &(hmm->mroot));
00266     atmp = (PROB *)mybmalloc2(sizeof(PROB) * t->statenum * t->statenum, &(hmm->mroot));
00267     for (i=0;i<t->statenum;i++) {
00268       t->a[i] = &(atmp[i*t->statenum]);
00269       rdn(fp, t->a[i], sizeof(PROB), t->statenum);
00270     }
00271     trans_add(hmm, t);
00272     tr_index[idx] = t;
00273   }
00274 
00275 #ifdef DMES
00276   j_printf("%d transition maxtix read\n", tr_num);
00277 #endif
00278 }
00279 
00280 
00281 static HTK_HMM_Var **vr_index;  
00282 static unsigned int vr_num;     
00283 
00294 static void
00295 rd_var(FILE *fp, HTK_HMM_INFO *hmm)
00296 {
00297   HTK_HMM_Var *v;
00298   unsigned int idx;
00299 
00300   rdn(fp, &vr_num, sizeof(unsigned int), 1);
00301   vr_index = (HTK_HMM_Var **)mymalloc(sizeof(HTK_HMM_Var *) * vr_num);
00302   
00303   hmm->vrstart = NULL;
00304   hmm->vr_root = NULL;
00305   for (idx = 0; idx < vr_num; idx++) {
00306     v = (HTK_HMM_Var *)mybmalloc2(sizeof(HTK_HMM_Var), &(hmm->mroot));
00307     v->name = rdn_str(fp, hmm);
00308     rdn(fp, &(v->len), sizeof(short), 1);
00309     v->vec = (VECT *)mybmalloc2(sizeof(VECT) * v->len, &(hmm->mroot));
00310     rdn(fp, v->vec, sizeof(VECT), v->len);
00311     vr_index[idx] = v;
00312     var_add(hmm, v);
00313   }
00314 #ifdef DMES
00315   j_printf("%d variance read\n", vr_num);
00316 #endif
00317 }
00318 
00319 
00320 /* read density data */
00321 static HTK_HMM_Dens **dens_index; 
00322 static unsigned int dens_num;   
00323 
00335 static void
00336 rd_dens(FILE *fp, HTK_HMM_INFO *hmm)
00337 {
00338   HTK_HMM_Dens *d;
00339   unsigned int idx;
00340   unsigned int vid;
00341 
00342   rdn(fp, &dens_num, sizeof(unsigned int), 1);
00343   hmm->totalmixnum = dens_num;
00344   dens_index = (HTK_HMM_Dens **)mymalloc(sizeof(HTK_HMM_Dens *) * dens_num);
00345 
00346   hmm->dnstart = NULL;
00347   hmm->dn_root = NULL;
00348   for (idx = 0; idx < dens_num; idx++) {
00349     d = (HTK_HMM_Dens *)mybmalloc2(sizeof(HTK_HMM_Dens), &(hmm->mroot));
00350     d->name = rdn_str(fp, hmm);
00351     rdn(fp, &(d->meanlen), sizeof(short), 1);
00352     d->mean = (VECT *)mybmalloc2(sizeof(VECT) * d->meanlen, &(hmm->mroot));
00353     rdn(fp, d->mean, sizeof(VECT), d->meanlen);
00354     rdn(fp, &vid, sizeof(unsigned int), 1);
00355     d->var = vr_index[vid];
00356     rdn(fp, &(d->gconst), sizeof(LOGPROB), 1);
00357     dens_index[idx] = d;
00358     dens_add(hmm, d);
00359   }
00360 #ifdef DMES
00361   j_printf("%d gaussian densities read\n", dens_num);
00362 #endif
00363 }
00364 
00365 
00366 /* read tmix data */
00367 static GCODEBOOK **tm_index;    
00368 static unsigned int tm_num;     
00369 
00381 static void
00382 rd_tmix(FILE *fp, HTK_HMM_INFO *hmm)
00383 {
00384   GCODEBOOK *tm;
00385   unsigned int idx;
00386   unsigned int did;
00387   int i;
00388 
00389   rdn(fp, &tm_num, sizeof(unsigned int), 1);
00390   hmm->codebooknum = tm_num;
00391   tm_index = (GCODEBOOK **)mymalloc(sizeof(GCODEBOOK *) * tm_num);
00392   hmm->maxcodebooksize = 0;
00393 
00394   hmm->codebook_root = NULL;
00395   for (idx = 0; idx < tm_num; idx++) {
00396     tm = (GCODEBOOK *)mybmalloc2(sizeof(GCODEBOOK), &(hmm->mroot));
00397     tm->name = rdn_str(fp, hmm);
00398     rdn(fp, &(tm->num), sizeof(int), 1);
00399     if (hmm->maxcodebooksize < tm->num) hmm->maxcodebooksize = tm->num;
00400     tm->d = (HTK_HMM_Dens **)mybmalloc2(sizeof(HTK_HMM_Dens *) * tm->num, &(hmm->mroot));
00401     for(i=0;i<tm->num;i++) {
00402       rdn(fp, &did, sizeof(unsigned int), 1);
00403       if (did >= dens_num) {
00404         tm->d[i] = NULL;
00405       } else {
00406         tm->d[i] = dens_index[did];
00407       }
00408     }
00409     tm->id = idx;
00410     tm_index[idx] = tm;
00411     codebook_add(hmm, tm);
00412   }
00413 #ifdef DMES
00414   j_printf("%d tied-mixture codebooks read\n", tm_num);
00415 #endif  
00416 }
00417 
00418 /* read state data */
00419 static HTK_HMM_State **st_index; 
00420 static unsigned int st_num;     
00421 
00434 static void
00435 rd_state(FILE *fp, HTK_HMM_INFO *hmm)
00436 {
00437   HTK_HMM_State *s;
00438   unsigned int idx;
00439   unsigned int did;
00440   int i;
00441 
00442   rdn(fp, &st_num, sizeof(unsigned int), 1);
00443   hmm->totalstatenum = st_num;
00444   st_index = (HTK_HMM_State **)mymalloc(sizeof(HTK_HMM_State *) * st_num);
00445 
00446   hmm->ststart = NULL;
00447   hmm->st_root = NULL;
00448   for (idx = 0; idx < st_num; idx++) {
00449     s = (HTK_HMM_State *)mybmalloc2(sizeof(HTK_HMM_State), &(hmm->mroot));
00450     s->name = rdn_str(fp, hmm);
00451     rdn(fp, &(s->mix_num), sizeof(short), 1);
00452     if (s->mix_num == -1) {
00453       /* tmix */
00454       rdn(fp, &did, sizeof(unsigned int), 1);
00455       s->b = (HTK_HMM_Dens **)tm_index[did];
00456       s->mix_num = (tm_index[did])->num;
00457     } else {
00458       /* mixture */
00459       s->b = (HTK_HMM_Dens **)mybmalloc2(sizeof(HTK_HMM_Dens *) * s->mix_num, &(hmm->mroot));
00460       for (i=0;i<s->mix_num;i++) {
00461         rdn(fp, &did, sizeof(unsigned int), 1);
00462         if (did >= dens_num) {
00463           s->b[i] = NULL;
00464         } else {
00465           s->b[i] = dens_index[did];
00466         }
00467       }
00468     }
00469     s->bweight = (PROB *)mybmalloc2(sizeof(PROB) * s->mix_num, &(hmm->mroot));
00470     rdn(fp, s->bweight, sizeof(PROB), s->mix_num);
00471     s->id = idx;
00472     st_index[idx] = s;
00473     state_add(hmm, s);
00474   }
00475 #ifdef DMES
00476   j_printf("%d states read\n", st_num);
00477 #endif
00478 }
00479 
00491 static void
00492 rd_data(FILE *fp, HTK_HMM_INFO *hmm)
00493 {
00494   HTK_HMM_Data *d;
00495   unsigned int md_num;
00496   unsigned int sid, tid;
00497   unsigned int idx;
00498   int i;
00499 
00500   rdn(fp, &(md_num), sizeof(unsigned int), 1);
00501   hmm->totalhmmnum = md_num;
00502 
00503   hmm->start = NULL;
00504   hmm->physical_root = NULL;
00505   for (idx = 0; idx < md_num; idx++) {
00506     d = (HTK_HMM_Data *)mybmalloc2(sizeof(HTK_HMM_Data), &(hmm->mroot));
00507     d->name = rdn_str(fp, hmm);
00508     rdn(fp, &(d->state_num), sizeof(short), 1);
00509     d->s = (HTK_HMM_State **)mybmalloc2(sizeof(HTK_HMM_State *) * d->state_num, &(hmm->mroot));
00510     for (i=0;i<d->state_num;i++) {
00511       rdn(fp, &sid, sizeof(unsigned int), 1);
00512       if (sid > hmm->totalstatenum) {
00513         d->s[i] = NULL;
00514       } else {
00515         d->s[i] = st_index[sid];
00516       }
00517     }
00518     rdn(fp, &tid, sizeof(unsigned int), 1);
00519     d->tr = tr_index[tid];
00520     htk_hmmdata_add(hmm, d);
00521   }
00522 #ifdef DMES
00523   j_printf("%d HMM model definition read\n", md_num);
00524 #endif
00525 }
00526 
00527 
00528 
00539 boolean
00540 read_binhmm(FILE *fp, HTK_HMM_INFO *hmm, boolean gzfile_p, Value *para)
00541 {
00542 
00543   gzfile = gzfile_p;
00544 
00545   /* read header */
00546   if (rd_header(fp, hmm, para) == FALSE) {
00547     return FALSE;
00548   }
00549 
00550   j_printerr("(binary)...");
00551   
00552   /* read option data */
00553   rd_opt(fp, &(hmm->opt));
00554 
00555   /* read type data */
00556   rd_type(fp, hmm);
00557 
00558   /* read transition data */
00559   rd_trans(fp, hmm);
00560 
00561   /* read variance data */
00562   rd_var(fp, hmm);
00563 
00564   /* read density data */
00565   rd_dens(fp, hmm);
00566 
00567   /* read tmix data */
00568   if (hmm->is_tied_mixture) {
00569     rd_tmix(fp, hmm);
00570   }
00571 
00572   /* read state data */
00573   rd_state(fp, hmm);
00574 
00575   /* read model data */
00576   rd_data(fp, hmm);
00577 
00578   /* free pointer->index work area */
00579   free(tr_index);
00580   free(vr_index);
00581   free(dens_index);
00582   if (hmm->is_tied_mixture) free(tm_index);
00583   free(st_index);
00584 
00585   /* count maximum state num (it is not stored in binhmm... */
00586   {
00587     HTK_HMM_Data *dtmp;
00588     int maxlen = 0;
00589     for (dtmp = hmm->start; dtmp; dtmp = dtmp->next) {
00590       if (maxlen < dtmp->state_num) maxlen = dtmp->state_num;
00591     }
00592     hmm->maxstatenum = maxlen;
00593   }
00594 
00595   if (! hmm->variance_inversed) {
00596     /* inverse all variance values for faster computation */
00597     htk_hmm_inverse_variances(hmm);
00598     hmm->variance_inversed = TRUE;
00599   }
00600 
00601   j_printerr("finished\n");
00602 
00603   return (TRUE);
00604 }

Julianに対してTue Dec 26 12:56:19 2006に生成されました。  doxygen 1.5.0