libsent/src/wav2mfcc/para.c

Go to the documentation of this file.
00001 
00022 /*
00023  * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University
00024  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00025  * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
00026  * All rights reserved
00027  */
00028 
00029 #include <sent/mfcc.h>
00030 #include <sent/speech.h>
00031 
00038 void
00039 undef_para(Value *para)
00040 {
00041   para->smp_period = -1;
00042   para->smp_freq   = -1;
00043   para->framesize  = -1;
00044   para->frameshift = -1;
00045   para->preEmph    = -1;
00046   para->mfcc_dim   = -1;
00047   para->lifter     = -1;
00048   para->fbank_num  = -1;
00049   para->delWin     = -1;
00050   para->accWin     = -1;
00051   para->silFloor   = -1;
00052   para->escale     = -1;
00053   para->enormal    = -1;
00054   para->hipass     = -2;        /* undef */
00055   para->lopass     = -2;        /* undef */
00056   para->cmn        = -1;
00057   para->raw_e      = -1;
00058   para->c0         = -1;
00059   //para->ss_alpha   = -1;
00060   //para->ss_floor   = -1;
00061   para->zmeanframe = -1;
00062   para->delta      = -1;
00063   para->acc        = -1;
00064   para->energy     = -1;
00065   para->absesup    = -1;
00066   para->baselen    = -1;
00067   para->vecbuflen  = -1;
00068   para->veclen     = -1;
00069 
00070   para->loaded     = 0;
00071 }
00072 
00079 void
00080 make_default_para(Value *para)
00081 {
00082   para->smp_period = 625;       /* 16kHz = 625 100ns unit */
00083   para->smp_freq   = 16000;     /* 16kHz = 625 100ns unit */
00084   para->framesize  = DEF_FRAMESIZE;
00085   para->frameshift = DEF_FRAMESHIFT;
00086   para->preEmph    = DEF_PREENPH;
00087   para->fbank_num  = DEF_FBANK;
00088   para->lifter     = DEF_CEPLIF;
00089   para->delWin     = DEF_DELWIN;
00090   para->accWin     = DEF_ACCWIN;
00091   para->raw_e      = FALSE;
00092   para->enormal    = FALSE;
00093   para->escale     = DEF_ESCALE;
00094   para->silFloor   = DEF_SILFLOOR;
00095   para->hipass     = -1;        /* disabled */
00096   para->lopass     = -1;        /* disabled */
00097   //para->ss_alpha    = DEF_SSALPHA;
00098   //para->ss_floor    = DEF_SSFLOOR;
00099   para->zmeanframe = FALSE;
00100 }
00101 
00109 void
00110 make_default_para_htk(Value *para)
00111 {
00112   para->framesize  = 256000.0;  /* dummy! */
00113   para->preEmph    = 0.97;
00114   para->fbank_num  = 20;
00115   para->lifter     = 22;
00116   para->delWin     = 2;
00117   para->accWin     = 2;
00118   para->raw_e      = TRUE;
00119   para->enormal    = TRUE;
00120   para->escale     = 0.1;
00121   para->silFloor   = 50.0;
00122   para->hipass     = -1;        /* disabled */
00123   para->lopass     = -1;        /* disabled */
00124   para->zmeanframe = FALSE;
00125 }
00126 
00134 void
00135 apply_para(Value *dst, Value *src)
00136 {
00137   if (dst->smp_period == -1) dst->smp_period = src->smp_period;
00138   if (dst->smp_freq   == -1) dst->smp_freq = src->smp_freq; 
00139   if (dst->framesize  == -1) dst->framesize = src->framesize; 
00140   if (dst->frameshift == -1) dst->frameshift = src->frameshift; 
00141   if (dst->preEmph    == -1) dst->preEmph = src->preEmph; 
00142   if (dst->mfcc_dim   == -1) dst->mfcc_dim = src->mfcc_dim; 
00143   if (dst->lifter     == -1) dst->lifter = src->lifter; 
00144   if (dst->fbank_num  == -1) dst->fbank_num = src->fbank_num; 
00145   if (dst->delWin     == -1) dst->delWin = src->delWin; 
00146   if (dst->accWin     == -1) dst->accWin = src->accWin; 
00147   if (dst->silFloor   == -1) dst->silFloor = src->silFloor; 
00148   if (dst->escale     == -1) dst->escale = src->escale; 
00149   if (dst->enormal    == -1) dst->enormal = src->enormal; 
00150   if (dst->hipass     == -2) dst->hipass = src->hipass;
00151   if (dst->lopass     == -2) dst->lopass = src->lopass;
00152   if (dst->cmn        == -1) dst->cmn = src->cmn; 
00153   if (dst->raw_e      == -1) dst->raw_e = src->raw_e; 
00154   if (dst->c0         == -1) dst->c0 = src->c0; 
00155   //if (dst->ss_alpha   == -1) dst->ss_alpha = src->ss_alpha; 
00156   //if (dst->ss_floor   == -1) dst->ss_floor = src->ss_floor; 
00157   if (dst->zmeanframe == -1) dst->zmeanframe = src->zmeanframe; 
00158   if (dst->delta      == -1) dst->delta = src->delta; 
00159   if (dst->acc        == -1) dst->acc = src->acc; 
00160   if (dst->energy     == -1) dst->energy = src->energy; 
00161   if (dst->absesup    == -1) dst->absesup = src->absesup; 
00162   if (dst->baselen    == -1) dst->baselen = src->baselen; 
00163   if (dst->vecbuflen  == -1) dst->vecbuflen = src->vecbuflen; 
00164   if (dst->veclen     == -1) dst->veclen = src->veclen; 
00165 }
00166 
00167 #define ISTOKEN(A) (A == ' ' || A == '\t' || A == '\n') 
00168 
00169 
00177 boolean
00178 htk_config_file_parse(char *HTKconffile, Value *para)
00179 {
00180   FILE *fp;
00181   char buf[512];
00182   char *p, *d, *a;
00183   float srate;
00184   boolean skipped;
00185 
00186   jlog("Stat: para: parsing HTK Config file: %s\n", HTKconffile);
00187   
00188   /* convert the content into argument list c_argv[1..c_argc-1] */
00189   /* c_argv[0] will be the original conffile name */
00190   if ((fp = fopen(HTKconffile, "r")) == NULL) {
00191     jlog("Error: para: failed to open HTK Config file: %s\n", HTKconffile);
00192     return FALSE;
00193   }
00194 
00195   while (getl_fp(buf, 512, fp) != NULL) {
00196     p = buf;
00197     if (*p == 35) { /* skip comment line */
00198       continue;
00199     }
00200 
00201     /* parse the input line to get directive and argument */
00202     while (*p != '\0' && ISTOKEN(*p)) p++;
00203     if (*p == '\0') continue;
00204     d = p;
00205     while (*p != '\0' && (!ISTOKEN(*p)) && *p != '=') p++;
00206     if (*p == '\0') continue;
00207     *p = '\0'; p++;
00208     while (*p != '\0' && ((ISTOKEN(*p)) || *p == '=')) p++;
00209     if (*p == '\0') continue;
00210     a = p;
00211     while (*p != '\0' && (!ISTOKEN(*p))) p++;
00212     *p = '\0';
00213 
00214     /* process arguments */
00215     skipped = FALSE;
00216     if (strmatch(d, "SOURCERATE")) { /* -smpPeriod */
00217       srate = atof(a);
00218     } else if (strmatch(d, "TARGETRATE")) { /* -fshift */
00219       para->frameshift = atof(a);
00220     } else if (strmatch(d, "WINDOWSIZE")) { /* -fsize */
00221       para->framesize = atof(a);
00222     } else if (strmatch(d, "ZMEANSOURCE")) { /* -zmeansource */
00223       para->zmeanframe = (a[0] == 'T') ? TRUE : FALSE;
00224     } else if (strmatch(d, "PREEMCOEF")) { /* -preemph */
00225       para->preEmph = atof(a);
00226     } else if (strmatch(d, "USEHAMMING")) { /* (fixed to T) */
00227       if (a[0] != 'T') {
00228         jlog("Error: para: USEHAMMING should be T\n", HTKconffile);
00229         return FALSE;
00230       }
00231     } else if (strmatch(d, "NUMCHANS")) { /* -fbank */
00232       para->fbank_num = atoi(a);
00233     } else if (strmatch(d, "CEPLIFTER")) { /* -ceplif */
00234       para->lifter = atoi(a);
00235     } else if (strmatch(d, "DELTAWINDOW")) { /* -delwin */
00236       para->delWin = atoi(a);
00237     } else if (strmatch(d, "ACCWINDOW")) { /* -accwin */
00238       para->accWin = atoi(a);
00239     } else if (strmatch(d, "LOFREQ")) { /* -lofreq */
00240       para->lopass = atof(a);
00241     } else if (strmatch(d, "HIFREQ")) { /* -hifreq */
00242       para->hipass = atof(a);
00243     } else if (strmatch(d, "RAWENERGY")) { /* -rawe */
00244       para->raw_e = (a[0] == 'T') ? TRUE : FALSE;
00245     } else if (strmatch(d, "ENORMALISE")) { /* -enormal */
00246       para->enormal = (a[0] == 'T') ? TRUE : FALSE;
00247     } else if (strmatch(d, "ESCALE")) { /* -escale */
00248       para->escale = atof(a);
00249     } else if (strmatch(d, "SILFLOOR")) { /* -silfloor */
00250       para->silFloor = atof(a);
00251     } else if (strmatch(d, "TARGETKIND")) {
00252       jlog("Stat: para: TARGETKIND skipped (will be set from AM header)\n");
00253       skipped = TRUE;
00254     } else if (strmatch(d, "NUMCEPS")) {
00255       jlog("Stat: para: NUMCEPS skipped (will be set from AM header)\n");
00256       skipped = TRUE;
00257     } else {
00258       skipped = TRUE;
00259     }
00260     if (!skipped) {
00261       jlog("Stat: para: %s=%s\n", d, a);
00262     }
00263   }
00264 
00265   para->smp_period = srate;
00266   para->smp_freq = period2freq(para->smp_period);
00267   para->frameshift /= srate;
00268   para->framesize /= srate;
00269 
00270   if (fclose(fp) == -1) {
00271     jlog("Error: para: failed to close file\n");
00272     return FALSE;
00273   }
00274 
00275   para->loaded = 1;
00276 
00277   return TRUE;
00278 }
00279 
00287 void
00288 calc_para_from_header(Value *para, short param_type, short vec_size)
00289 {
00290   int dim;
00291 
00292   /* decode required parameter extraction types */
00293   para->delta = (param_type & F_DELTA) ? TRUE : FALSE;
00294   para->acc = (param_type & F_ACCL) ? TRUE : FALSE;
00295   para->energy = (param_type & F_ENERGY) ? TRUE : FALSE;
00296   para->c0 = (param_type & F_ZEROTH) ? TRUE : FALSE;
00297   para->absesup = (param_type & F_ENERGY_SUP) ? TRUE : FALSE;
00298   para->cmn = (param_type & F_CEPNORM) ? TRUE : FALSE;
00299   /* guess MFCC dimension from the vector size and parameter type in the
00300      acoustic HMM */
00301   dim = vec_size;
00302   if (para->absesup) dim++;
00303   dim /= 1 + (para->delta ? 1 : 0) + (para->acc ? 1 : 0);
00304   if (para->energy) dim--;
00305   if (para->c0) dim--;
00306   para->mfcc_dim = dim;
00307     
00308   /* determine base size */
00309   para->baselen = para->mfcc_dim + (para->c0 ? 1 : 0) + (para->energy ? 1 : 0);
00310   /* set required size of parameter vector for MFCC computation */
00311   para->vecbuflen = para->baselen * (1 + (para->delta ? 1 : 0) + (para->acc ? 1 : 0));
00312   /* set size of final parameter vector */
00313   para->veclen = para->vecbuflen - (para->absesup ? 1 : 0);
00314 }
00315 
00323 void
00324 put_para(FILE *fp, Value *para)
00325 {
00326   fprintf(fp, " Acoustic analysis condition:\n");
00327   fprintf(fp, "\t       parameter = MFCC");
00328   if (para->c0) fprintf(fp, "_0");
00329   if (para->energy) fprintf(fp, "_E");
00330   if (para->delta) fprintf(fp, "_D");
00331   if (para->acc) fprintf(fp, "_A");
00332   if (para->absesup) fprintf(fp, "_N");
00333   if (para->cmn) fprintf(fp, "_Z");
00334   fprintf(fp, " (%d dimension from %d cepstrum)\n", para->veclen, para->mfcc_dim);
00335   fprintf(fp, "\tsample frequency = %5ld Hz\n", para->smp_freq);
00336   fprintf(fp, "\t   sample period = %4ld  (1 = 100ns)\n", para->smp_period);
00337   fprintf(fp, "\t     window size = %4d samples (%.1f ms)\n", para->framesize,
00338            (float)para->smp_period * (float)para->framesize / 10000.0);
00339   fprintf(fp, "\t     frame shift = %4d samples (%.1f ms)\n", para->frameshift,
00340            (float)para->smp_period * (float)para->frameshift / 10000.0);
00341   fprintf(fp, "\t    pre-emphasis = %.2f\n", para->preEmph);
00342   fprintf(fp, "\t    # filterbank = %d\n", para->fbank_num);
00343   fprintf(fp, "\t   cepst. lifter = %d\n", para->lifter);
00344   fprintf(fp, "\t      raw energy = %s\n", para->raw_e ? "True" : "False");
00345   if (para->enormal) {
00346     fprintf(fp, "\tenergy normalize = True (scale = %.1f, silence floor = %.1f dB)\n", para->escale, para->silFloor);
00347   } else {
00348     fprintf(fp, "\tenergy normalize = False\n");
00349   }
00350   if (para->delta) {
00351     fprintf(fp, "\t    delta window = %d frames (%.1f ms) around\n", para->delWin,  (float)para->delWin * (float)para->smp_period * (float)para->frameshift / 10000.0);
00352   }
00353   if (para->acc) {
00354     fprintf(fp, "\t      acc window = %d frames (%.1f ms) around\n", para->accWin, (float)para->accWin * (float)para->smp_period * (float)para->frameshift / 10000.0);
00355   }
00356   fprintf(fp, "\t     hi freq cut = ");
00357   if (para->hipass < 0) fprintf(fp, "OFF\n"); 
00358   else fprintf(fp, "%5d Hz\n", para->hipass);
00359   fprintf(fp, "\t     lo freq cut = ");
00360   if (para->lopass < 0) fprintf(fp, "OFF\n"); 
00361   else fprintf(fp, "%5d Hz\n", para->lopass);
00362   fprintf(fp, "\t zero mean frame = ");
00363   if (para->zmeanframe) fprintf(fp, "ON\n");
00364   else fprintf(fp, "OFF\n");
00365 }

Generated on Tue Dec 18 15:59:57 2007 for Julius by  doxygen 1.5.4