00001
00036
00037
00038
00039
00040
00041
00042
00043 #include <sent/stddefs.h>
00044 #include <sent/htk_param.h>
00045 #include <sent/htk_hmm.h>
00046
00047 #define MAXBUFLEN 4096
00048
00049 char *rdhmmdef_token;
00050 static char *buf = NULL;
00051 static int line;
00052
00053
00054
00060 void
00061 rderr(char *str)
00062 {
00063 if (rdhmmdef_token == NULL) {
00064 jlog("Error: rdhmmdef: %s on end of file\n", str);
00065 } else {
00066 jlog("Error: rdhmmdef: read error at line %d: %s\n", line, (str) ? str : "parse error");
00067 }
00068 jlog_flush();
00069 exit(1);
00070 }
00071
00079 char *
00080 read_token(FILE *fp)
00081 {
00082 if (buf != NULL) {
00083
00084 if ((rdhmmdef_token = mystrtok_quote(NULL, HMMDEF_DELM)) != NULL) {
00085
00086 return rdhmmdef_token;
00087 }
00088 } else {
00089
00090 buf = (char *)mymalloc(MAXBUFLEN);
00091 line = 1;
00092 }
00093
00094 if (getl(buf, MAXBUFLEN, fp) == NULL) {
00095 rdhmmdef_token = NULL;
00096 } else {
00097 rdhmmdef_token = mystrtok_quote(buf, HMMDEF_DELM);
00098 line++;
00099 }
00100 return rdhmmdef_token;
00101 }
00102
00108 static void
00109 conv_log_arc(HTK_HMM_INFO *hmm)
00110 {
00111 HTK_HMM_Trans *tr;
00112 int i,j;
00113 LOGPROB l;
00114
00115 for (tr = hmm->trstart; tr; tr = tr->next) {
00116 for(i=0;i<tr->statenum;i++) {
00117 for(j=0;j<tr->statenum;j++) {
00118 l = tr->a[i][j];
00119 tr->a[i][j] = (l != 0.0) ? (float)log10(l) : LOG_ZERO;
00120 }
00121 }
00122 }
00123 }
00129 void
00130 htk_hmm_inverse_variances(HTK_HMM_INFO *hmm)
00131 {
00132 HTK_HMM_Var *v;
00133 int i;
00134
00135 for (v = hmm->vrstart; v; v = v->next) {
00136 for(i=0;i<v->len;i++) {
00137 v->vec[i] = 1.0 / v->vec[i];
00138 }
00139 }
00140 }
00141
00142 #ifdef ENABLE_MSD
00143
00148 void
00149 htk_hmm_check_msd(HTK_HMM_INFO *hmm)
00150 {
00151 HTK_HMM_PDF *m;
00152 int vlen;
00153 int i;
00154
00155 hmm->has_msd = FALSE;
00156 for (m = hmm->pdfstart; m; m = m->next) {
00157
00158 if (m->tmix) continue;
00159
00160 vlen = hmm->opt.stream_info.vsize[m->stream_id];
00161 for(i=0;i<m->mix_num;i++) {
00162 if (m->b[i]->meanlen != vlen) {
00163 jlog("Stat: rdhmmdef: assume MSD-HMM since Gaussian dimension are not consistent\n");
00164 hmm->has_msd = TRUE;
00165 return;
00166 }
00167 }
00168 }
00169 }
00170 #endif
00171
00183 boolean
00184 rdhmmdef(FILE *fp, HTK_HMM_INFO *hmm)
00185 {
00186 char macrosw;
00187 char *name;
00188
00189
00190 hmm->variance_inversed = FALSE;
00191
00192
00193 read_token(fp);
00194
00195
00196 while (rdhmmdef_token != NULL) {
00197 if (rdhmmdef_token[0] != '~') {
00198 return FALSE;
00199 }
00200 macrosw = rdhmmdef_token[1];
00201 read_token(fp);
00202 switch(macrosw) {
00203 case 'o':
00204 if (set_global_opt(fp,hmm) == FALSE) {
00205 return FALSE;
00206 }
00207 break;
00208 case 't':
00209 name = mybstrdup2(rdhmmdef_token, &(hmm->mroot));
00210 if (strlen(name) >= MAX_HMMNAME_LEN) rderr("Macro name too long");
00211 read_token(fp);
00212 def_trans_macro(name, fp, hmm);
00213 break;
00214 case 's':
00215 name = mybstrdup2(rdhmmdef_token, &(hmm->mroot));
00216 if (strlen(name) >= MAX_HMMNAME_LEN) rderr("Macro name too long");
00217 read_token(fp);
00218 def_state_macro(name, fp, hmm);
00219 break;
00220 case 'm':
00221 name = mybstrdup2(rdhmmdef_token, &(hmm->mroot));
00222 if (strlen(name) >= MAX_HMMNAME_LEN) rderr("Macro name too long");
00223 read_token(fp);
00224 def_dens_macro(name, fp, hmm);
00225 break;
00226 case 'h':
00227 name = mybstrdup2(rdhmmdef_token, &(hmm->mroot));
00228 if (strlen(name) >= MAX_HMMNAME_LEN) rderr("Macro name too long");
00229 read_token(fp);
00230 def_HMM(name, fp, hmm);
00231 break;
00232 case 'v':
00233 name = mybstrdup2(rdhmmdef_token, &(hmm->mroot));
00234 if (strlen(name) >= MAX_HMMNAME_LEN) rderr("Macro name too long");
00235 read_token(fp);
00236 def_var_macro(name, fp, hmm);
00237 break;
00238 case 'w':
00239 name = mybstrdup2(rdhmmdef_token, &(hmm->mroot));
00240 if (strlen(name) >= MAX_HMMNAME_LEN) rderr("Macro name too long");
00241 read_token(fp);
00242 def_streamweight_macro(name, fp, hmm);
00243 break;
00244 case 'r':
00245 name = mybstrdup2(rdhmmdef_token, &(hmm->mroot));
00246 if (strlen(name) >= MAX_HMMNAME_LEN) rderr("Macro name too long");
00247 read_token(fp);
00248 def_regtree_macro(name, fp, hmm);
00249 break;
00250 case 'p':
00251 name = mybstrdup2(rdhmmdef_token, &(hmm->mroot));
00252 if (strlen(name) >= MAX_HMMNAME_LEN) rderr("Macro name too long");
00253 read_token(fp);
00254 def_mpdf_macro(name, fp, hmm);
00255 break;
00256 }
00257 }
00258
00259
00260 conv_log_arc(hmm);
00261
00262 jlog("Stat: rdhmmdef: ascii format HMM definition\n");
00263
00264
00265 if (check_all_hmm_limit(hmm)) {
00266 jlog("Stat: rdhmmdef: limit check passed\n");
00267 } else {
00268 jlog("Error: rdhmmdef: cannot handle this HMM due to system limitation\n");
00269 return FALSE;
00270 }
00271
00272
00273 hmm->need_multipath = htk_hmm_has_several_arc_on_edge(hmm);
00274 if (hmm->need_multipath) {
00275 jlog("Stat: rdhmmdef: this HMM requires multipath handling at decoding\n");
00276 } else {
00277 jlog("Stat: rdhmmdef: this HMM does not need multipath handling\n");
00278 }
00279
00280
00281 if (! hmm->variance_inversed) {
00282 htk_hmm_inverse_variances(hmm);
00283 hmm->variance_inversed = TRUE;
00284 }
00285
00286
00287 if (!check_hmm_options(hmm)) {
00288 jlog("Error: rdhmmdef: hmm options check failed\n");
00289 return FALSE;
00290 }
00291
00292
00293
00294 {
00295 HTK_HMM_State *stmp;
00296 int n, max, s, mix;
00297 n = 0;
00298 max = 0;
00299 for (stmp = hmm->ststart; stmp; stmp = stmp->next) {
00300 for(s=0;s<stmp->nstream;s++) {
00301 mix = stmp->pdf[s]->mix_num;
00302 if (max < mix) max = mix;
00303 }
00304 stmp->id = n++;
00305 if (n >= MAX_STATE_NUM) {
00306 jlog("Error: rdhmmdef: too much states in a model > %d\n", MAX_STATE_NUM);
00307 return FALSE;
00308 }
00309 }
00310 hmm->totalstatenum = n;
00311 hmm->maxmixturenum = max;
00312 }
00313
00314 {
00315 HTK_HMM_Data *dtmp;
00316 int n, maxlen;
00317 n = 0;
00318 maxlen = 0;
00319 for (dtmp = hmm->start; dtmp; dtmp = dtmp->next) {
00320 if (maxlen < dtmp->state_num) maxlen = dtmp->state_num;
00321 n++;
00322 }
00323 hmm->maxstatenum = maxlen;
00324 hmm->totalhmmnum = n;
00325 }
00326
00327 {
00328 HTK_HMM_Dens *dtmp;
00329 int n = 0;
00330 for (dtmp = hmm->dnstart; dtmp; dtmp = dtmp->next) {
00331 n++;
00332 }
00333 hmm->totalmixnum = n;
00334 }
00335
00336 {
00337 HTK_HMM_Dens *dtmp;
00338 int n = 0;
00339 for (dtmp = hmm->dnstart; dtmp; dtmp = dtmp->next) {
00340 n++;
00341 }
00342 hmm->totalmixnum = n;
00343 }
00344
00345 {
00346 HTK_HMM_PDF *p;
00347 int n = 0;
00348 for (p = hmm->pdfstart; p; p = p->next) {
00349 n++;
00350 }
00351 hmm->totalpdfnum = n;
00352 }
00353 #ifdef ENABLE_MSD
00354
00355 htk_hmm_check_msd(hmm);
00356 #endif
00357
00358 return(TRUE);
00359 }