libsent/src/hmminfo/cdset.c

Go to the documentation of this file.
00001 
00066 /*
00067  * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University
00068  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00069  * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
00070  * All rights reserved
00071  */
00072 
00073 #include <sent/stddefs.h>
00074 #include <sent/htk_param.h>
00075 #include <sent/htk_hmm.h>
00076 
00078 
00079 
00080 #define CD_STATE_SET_STEP 10    
00081 
00082 
00087 static void
00088 cdset_init(HTK_HMM_INFO *hmminfo)
00089 {
00090   hmminfo->cdset_info.cdtree = NULL;
00091 }
00092 
00098 static CD_Set *
00099 cdset_new()
00100 {
00101   return((CD_Set *)mymalloc(sizeof(CD_Set)));
00102 }
00103 
00112 CD_Set *
00113 cdset_lookup(HTK_HMM_INFO *hmminfo, char *cdstr)
00114 {
00115   CD_Set *cd;
00116   cd = aptree_search_data(cdstr, hmminfo->cdset_info.cdtree);
00117   if (cd != NULL && strmatch(cdstr, cd->name)) {
00118     return cd;
00119   } else {
00120     return NULL;
00121   }
00122 }
00123 
00132 CD_Set *
00133 lcdset_lookup_by_hmmname(HTK_HMM_INFO *hmminfo, char *hmmname)
00134 {
00135   static char buf[MAX_HMMNAME_LEN];
00136 
00137   return(cdset_lookup(hmminfo, leftcenter_name(hmmname, buf)));
00138 }
00139 
00148 CD_Set *
00149 rcdset_lookup_by_hmmname(HTK_HMM_INFO *hmminfo, char *hmmname)
00150 {
00151   static char buf[MAX_HMMNAME_LEN];
00152 
00153   return(cdset_lookup(hmminfo, rightcenter_name(hmmname, buf)));
00154 }
00155 
00156 
00162 static void
00163 put_cdset(void *ptr)
00164 {
00165   int i;
00166   CD_Set *a;
00167 
00168   a = ptr;
00169   printf("name: %s\n", a->name);
00170   /* printf("state_num: %d\n", a->state_num); */
00171   for(i=0;i<a->state_num;i++) {
00172     if (a->stateset[i].num == 0) {
00173       printf("\t[state %d]  not exist\n", i);
00174     } else {
00175       printf("\t[state %d]  %d variants\n", i, a->stateset[i].num);
00176     }
00177     /*
00178       for(j=0;j<a->stateset[i].num;j++) {
00179         put_htk_state(stdout, a->stateset[i].s[j]);
00180       }
00181     */
00182   }
00183 }
00184 
00190 void
00191 put_all_cdinfo(HTK_HMM_INFO *hmminfo)
00192 {
00193   aptree_traverse_and_do(hmminfo->cdset_info.cdtree, put_cdset);
00194 }
00195 
00196 
00206 boolean
00207 regist_cdset(APATNODE **root, HTK_HMM_Data *d, char *cdname)
00208 {
00209   boolean need_new;
00210   CD_State_Set *tmp;
00211   CD_Set *lset = NULL, *lmatch = NULL;
00212   int j,n;
00213   boolean changed = FALSE;
00214 
00215   if (strlen(cdname) >= MAX_HMMNAME_LEN) {
00216     jlog("Error: cdset: HMM name exceeds limit (%d): %s!\n", MAX_HMMNAME_LEN, cdname);
00217     jlog("Error: cdset: Please increase the value of MAX_HMMNAME_LEN (current = %d)\n", MAX_HMMNAME_LEN);
00218     exit(1);
00219   }
00220   
00221   /* check if the cdset already exist */
00222   need_new = TRUE;
00223   if (*root != NULL) {
00224     lmatch = aptree_search_data(cdname, *root);
00225     if (lmatch != NULL && strmatch(lmatch->name, cdname)) {
00226       /* exist, add to it later */
00227       lset = lmatch;
00228       need_new = FALSE;
00229       /* if the state num is larger than allocated, expand the lset */
00230       if (d->state_num > lset->state_num) {
00231         lset->stateset = (CD_State_Set *)myrealloc(lset->stateset, sizeof(CD_State_Set) * d->state_num);
00232         /* 0 1 ... (lset->state_num-1) */
00233         /* N A ... N                   */
00234         /* 0 1 ...                     ... (d->state_num-1) */
00235         /* N A ... A ..................... N                */
00236         /* malloc new area to expanded state (N to A above) */
00237         for(j = lset->state_num - 1; j < d->state_num - 1; j++) {
00238           lset->stateset[j].maxnum = CD_STATE_SET_STEP;
00239           lset->stateset[j].s = (HTK_HMM_State **)mymalloc(sizeof(HTK_HMM_State *) * lset->stateset[j].maxnum);
00240           lset->stateset[j].num = 0;
00241         }
00242         lset->stateset[d->state_num-1].s = NULL;
00243         lset->stateset[d->state_num-1].num = 0;
00244         lset->stateset[d->state_num-1].maxnum = 0;
00245         
00246         lset->state_num = d->state_num;
00247 
00248         /* update transition table */
00249         lset->tr = d->tr;
00250 
00251         changed = TRUE;
00252       }
00253     }
00254   }
00255 
00256   if (need_new) {
00257     /* allocate as new with blank data */
00258     lset = cdset_new();
00259     lset->name = strdup(cdname);
00260     lset->state_num = d->state_num;
00261     lset->stateset = (CD_State_Set *)mymalloc(sizeof(CD_State_Set) * lset->state_num);
00262     /* assume first and last state has no outprob */
00263     lset->stateset[0].s = lset->stateset[lset->state_num-1].s = NULL;
00264     lset->stateset[0].num = lset->stateset[lset->state_num-1].num = 0;
00265     lset->stateset[0].maxnum = lset->stateset[lset->state_num-1].maxnum = 0;
00266     for(j=1;j<lset->state_num-1; j++) {
00267       /* pre-allocate only the first step */
00268       lset->stateset[j].maxnum = CD_STATE_SET_STEP;
00269       lset->stateset[j].s = (HTK_HMM_State **)mymalloc(sizeof(HTK_HMM_State *) * lset->stateset[j].maxnum);
00270       lset->stateset[j].num = 0;
00271     }
00272     /* assign transition table of first found %HMM (ad-hoc?) */
00273     lset->tr = d->tr;
00274     /* add to search index tree */
00275     if (*root == NULL) {
00276       *root = aptree_make_root_node(lset);
00277     } else {
00278       aptree_add_entry(lset->name, lset, lmatch->name, root);
00279     }
00280 
00281     changed = TRUE;
00282   }
00283     
00284   /* register each HMM states to the lcdset */
00285   for (j=1;j<d->state_num-1;j++) {
00286     tmp = &(lset->stateset[j]);
00287     /* check if the state has already registered */
00288     for(n = 0; n < tmp->num ; n++) {
00289       if (tmp->s[n] == d->s[j]) { /* compare by pointer */
00290         /*jlog("\tstate %d has same\n", n);*/
00291         break;
00292       }
00293     }
00294     if (n < tmp->num ) continue;        /* same state found, cancel regist. */
00295     
00296     /* expand storage area if necessary */
00297     if (tmp->num >= tmp->maxnum) {
00298       tmp->maxnum += CD_STATE_SET_STEP;
00299       tmp->s = (HTK_HMM_State **)myrealloc(tmp->s, sizeof(HTK_HMM_State *) * tmp->maxnum);
00300     }
00301     
00302     tmp->s[tmp->num] = d->s[j];
00303     tmp->num++;
00304 
00305     changed = TRUE;
00306   }
00307 
00308   return(changed);
00309 }
00310 
00319 boolean
00320 remove_cdset(HTK_HMM_INFO *hmminfo, char *cdname)
00321 {
00322   CD_Set *lmatch;
00323   
00324   if (hmminfo->cdset_info.cdtree == NULL) return TRUE;
00325 
00326   lmatch = aptree_search_data(cdname, hmminfo->cdset_info.cdtree);
00327   if (lmatch != NULL && strmatch(lmatch->name, cdname)) {
00328     jlog("Stat: cdset: [%s] found, removed from cdset\n", lmatch->name);
00329     /* found */
00330     /*
00331     for(j=1;j<lmatch->state_num-1;j++) {
00332       free(lmatch->stateset[j].s);
00333     }
00334     free(lmatch->stateset);
00335     */
00336     aptree_remove_entry(cdname, &(hmminfo->cdset_info.cdtree));
00337   } else {
00338     return FALSE;
00339   }
00340   return TRUE;
00341 }
00342   
00343 
00352 boolean
00353 make_cdset(HTK_HMM_INFO *hmminfo)
00354 {
00355   HMM_Logical *lg;
00356   static char buf[MAX_HMMNAME_LEN];
00357 
00358   cdset_init(hmminfo);
00359   /* make cdset name from logical HMM name */
00360   /* left-context set: "a-k" for /a-k+i/, /a-k+o/, ...
00361      for 1st pass (word end) */
00362   for(lg = hmminfo->lgstart; lg; lg = lg->next) {
00363     if (lg->is_pseudo) continue;
00364     regist_cdset(&(hmminfo->cdset_info.cdtree), lg->body.defined, leftcenter_name(lg->name, buf));
00365   }
00366   /* right-context set: "a+o" for /b-a+o/, /t-a+o/, ...
00367      for 2nd pass (word beginning) */
00368   for(lg = hmminfo->lgstart; lg; lg = lg->next) {
00369     if (lg->is_pseudo) continue;
00370     regist_cdset(&(hmminfo->cdset_info.cdtree), lg->body.defined, rightcenter_name(lg->name, buf));
00371   }
00372   /* both-context set: "a" for all triphone with same base phone "a"
00373      for 1st pass (1 phoneme word, with no previous word hypo.) */
00374   for(lg = hmminfo->lgstart; lg; lg = lg->next) {
00375     if (lg->is_pseudo) continue;
00376     regist_cdset(&(hmminfo->cdset_info.cdtree), lg->body.defined, center_name(lg->name, buf));
00377   }
00378 
00379   /* now that cdset is completely built */
00380   
00381   return(TRUE);
00382 }
00383 
00389 static void
00390 callback_free_lcdset_content(void *arg)
00391 {
00392   CD_Set *d;
00393   int j;
00394 
00395   d = arg;
00396   for(j=0;j<d->state_num;j++) {
00397     if (d->stateset[j].s != NULL) free(d->stateset[j].s);
00398   }
00399   free(d->stateset);
00400   free(d->name);
00401   free(d);
00402 }
00403 
00411 void
00412 free_cdset(APATNODE **root)
00413 {
00414   if (*root != NULL) {
00415     aptree_traverse_and_do(*root, callback_free_lcdset_content);
00416     free_aptree(*root);
00417     *root = NULL;
00418   }
00419 }
00420 

Generated on Tue Dec 18 15:59:55 2007 for Julius by  doxygen 1.5.4