libsent/src/wav2mfcc/para.c

説明を見る。
00001 
00021 /*
00022  * Copyright (c) 1991-2006 Kawahara Lab., Kyoto University
00023  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00024  * Copyright (c) 2005-2006 Julius project team, Nagoya Institute of Technology
00025  * All rights reserved
00026  */
00027 
00028 #include <sent/mfcc.h>
00029 #include <sent/speech.h>
00030 
00037 void
00038 undef_para(Value *para)
00039 {
00040   para->smp_period = -1;
00041   para->smp_freq   = -1;
00042   para->framesize  = -1;
00043   para->frameshift = -1;
00044   para->preEmph    = -1;
00045   para->mfcc_dim   = -1;
00046   para->lifter     = -1;
00047   para->fbank_num  = -1;
00048   para->delWin     = -1;
00049   para->accWin     = -1;
00050   para->silFloor   = -1;
00051   para->escale     = -1;
00052   para->enormal    = -1;
00053   para->hipass     = -2;        /* undef */
00054   para->lopass     = -2;        /* undef */
00055   para->cmn        = -1;
00056   para->raw_e      = -1;
00057   para->c0         = -1;
00058   para->ss_alpha   = -1;
00059   para->ss_floor   = -1;
00060   para->zmeanframe = -1;
00061   para->delta      = -1;
00062   para->acc        = -1;
00063   para->energy     = -1;
00064   para->absesup    = -1;
00065   para->baselen    = -1;
00066   para->vecbuflen  = -1;
00067   para->veclen     = -1;
00068 
00069   para->loaded     = 0;
00070 }
00071 
00078 void
00079 make_default_para(Value *para)
00080 {
00081   para->smp_period = 625;       /* 16kHz = 625 100ns unit */
00082   para->smp_freq   = 16000;     /* 16kHz = 625 100ns unit */
00083   para->framesize  = DEF_FRAMESIZE;
00084   para->frameshift = DEF_FRAMESHIFT;
00085   para->preEmph    = DEF_PREENPH;
00086   para->fbank_num  = DEF_FBANK;
00087   para->lifter     = DEF_CEPLIF;
00088   para->delWin     = DEF_DELWIN;
00089   para->accWin     = DEF_ACCWIN;
00090   para->raw_e      = FALSE;
00091   para->enormal    = FALSE;
00092   para->escale     = DEF_ESCALE;
00093   para->silFloor   = DEF_SILFLOOR;
00094   para->hipass     = -1;        /* disabled */
00095   para->lopass     = -1;        /* disabled */
00096   para->ss_alpha    = DEF_SSALPHA;
00097   para->ss_floor    = DEF_SSFLOOR;
00098   para->zmeanframe = FALSE;
00099 }
00100 
00108 void
00109 make_default_para_htk(Value *para)
00110 {
00111   para->framesize  = 256000.0;  /* dummy! */
00112   para->preEmph    = 0.97;
00113   para->fbank_num  = 20;
00114   para->lifter     = 22;
00115   para->delWin     = 2;
00116   para->accWin     = 2;
00117   para->raw_e      = TRUE;
00118   para->enormal    = TRUE;
00119   para->escale     = 0.1;
00120   para->silFloor   = 50.0;
00121   para->hipass     = -1;        /* disabled */
00122   para->lopass     = -1;        /* disabled */
00123   para->zmeanframe = FALSE;
00124 }
00125 
00133 void
00134 apply_para(Value *dst, Value *src)
00135 {
00136   if (dst->smp_period == -1) dst->smp_period = src->smp_period;
00137   if (dst->smp_freq   == -1) dst->smp_freq = src->smp_freq; 
00138   if (dst->framesize  == -1) dst->framesize = src->framesize; 
00139   if (dst->frameshift == -1) dst->frameshift = src->frameshift; 
00140   if (dst->preEmph    == -1) dst->preEmph = src->preEmph; 
00141   if (dst->mfcc_dim   == -1) dst->mfcc_dim = src->mfcc_dim; 
00142   if (dst->lifter     == -1) dst->lifter = src->lifter; 
00143   if (dst->fbank_num  == -1) dst->fbank_num = src->fbank_num; 
00144   if (dst->delWin     == -1) dst->delWin = src->delWin; 
00145   if (dst->accWin     == -1) dst->accWin = src->accWin; 
00146   if (dst->silFloor   == -1) dst->silFloor = src->silFloor; 
00147   if (dst->escale     == -1) dst->escale = src->escale; 
00148   if (dst->enormal    == -1) dst->enormal = src->enormal; 
00149   if (dst->hipass     == -2) dst->hipass = src->hipass;
00150   if (dst->lopass     == -2) dst->lopass = src->lopass;
00151   if (dst->cmn        == -1) dst->cmn = src->cmn; 
00152   if (dst->raw_e      == -1) dst->raw_e = src->raw_e; 
00153   if (dst->c0         == -1) dst->c0 = src->c0; 
00154   if (dst->ss_alpha   == -1) dst->ss_alpha = src->ss_alpha; 
00155   if (dst->ss_floor   == -1) dst->ss_floor = src->ss_floor; 
00156   if (dst->zmeanframe == -1) dst->zmeanframe = src->zmeanframe; 
00157   if (dst->delta      == -1) dst->delta = src->delta; 
00158   if (dst->acc        == -1) dst->acc = src->acc; 
00159   if (dst->energy     == -1) dst->energy = src->energy; 
00160   if (dst->absesup    == -1) dst->absesup = src->absesup; 
00161   if (dst->baselen    == -1) dst->baselen = src->baselen; 
00162   if (dst->vecbuflen  == -1) dst->vecbuflen = src->vecbuflen; 
00163   if (dst->veclen     == -1) dst->veclen = src->veclen; 
00164 }
00165 
00166 
00167 #define ISTOKEN(A) (A == ' ' || A == '\t' || A == '\n') 
00168 
00169 
00177 boolean
00178 htk_config_file_parse(char *HTKconffile, Value *para)
00179 {
00180   FILE *fp;
00181   char buf[512];
00182   char *p, *d, *a;
00183   float srate;
00184   boolean skipped;
00185 
00186   j_printerr("include HTK Config: %s\n", HTKconffile);
00187   
00188   /* convert the content into argument list c_argv[1..c_argc-1] */
00189   /* c_argv[0] will be the original conffile name */
00190   if ((fp = fopen(HTKconffile, "r")) == NULL) {
00191     j_printerr("Error: rdhtkconf: failed to open HTK Config file: %s\n", HTKconffile);
00192   }
00193 
00194   while (getl_fp(buf, 512, fp) != NULL) {
00195     p = buf;
00196     if (*p == 35) { /* skip comment line */
00197       continue;
00198     }
00199 
00200     /* parse the input line to get directive and argument */
00201     while (*p != '\0' && ISTOKEN(*p)) p++;
00202     if (*p == '\0') continue;
00203     d = p;
00204     while (*p != '\0' && (!ISTOKEN(*p)) && *p != '=') p++;
00205     if (*p == '\0') continue;
00206     *p = '\0'; p++;
00207     while (*p != '\0' && ((ISTOKEN(*p)) || *p == '=')) p++;
00208     if (*p == '\0') continue;
00209     a = p;
00210     while (*p != '\0' && (!ISTOKEN(*p))) p++;
00211     *p = '\0';
00212 
00213     /* process arguments */
00214     skipped = FALSE;
00215     if (strmatch(d, "SOURCERATE")) { /* -smpPeriod */
00216       srate = atof(a);
00217     } else if (strmatch(d, "TARGETRATE")) { /* -fshift */
00218       para->frameshift = atof(a);
00219     } else if (strmatch(d, "WINDOWSIZE")) { /* -fsize */
00220       para->framesize = atof(a);
00221     } else if (strmatch(d, "ZMEANSOURCE")) { /* -zmeansource */
00222       para->zmeanframe = (a[0] == 'T') ? TRUE : FALSE;
00223     } else if (strmatch(d, "PREEMCOEF")) { /* -preemph */
00224       para->preEmph = atof(a);
00225     } else if (strmatch(d, "USEHAMMING")) { /* (fixed to T) */
00226       if (a[0] != 'T') {
00227         j_error("\nError: in HTK Config \"%s\": USEHAMMING should be T\n", HTKconffile);
00228       }
00229     } else if (strmatch(d, "NUMCHANS")) { /* -fbank */
00230       para->fbank_num = atoi(a);
00231     } else if (strmatch(d, "CEPLIFTER")) { /* -ceplif */
00232       para->lifter = atoi(a);
00233     } else if (strmatch(d, "DELTAWINDOW")) { /* -delwin */
00234       para->delWin = atoi(a);
00235     } else if (strmatch(d, "ACCWINDOW")) { /* -accwin */
00236       para->accWin = atoi(a);
00237     } else if (strmatch(d, "LOFREQ")) { /* -lofreq */
00238       para->lopass = atof(a);
00239     } else if (strmatch(d, "HIFREQ")) { /* -hifreq */
00240       para->hipass = atof(a);
00241     } else if (strmatch(d, "RAWENERGY")) { /* -rawe */
00242       para->raw_e = (a[0] == 'T') ? TRUE : FALSE;
00243     } else if (strmatch(d, "ENORMALISE")) { /* -enormal */
00244       para->enormal = (a[0] == 'T') ? TRUE : FALSE;
00245     } else if (strmatch(d, "ESCALE")) { /* -escale */
00246       para->escale = atof(a);
00247     } else if (strmatch(d, "SILFLOOR")) { /* -silfloor */
00248       para->silFloor = atof(a);
00249     } else if (strmatch(d, "TARGETKIND")) {
00250       j_printerr("TARGETKIND specified but skipped (will be set from AM header)\n");
00251       skipped = TRUE;
00252     } else if (strmatch(d, "NUMCEPS")) {
00253       j_printerr("NUMCEPS specified but skipped (will be set from AM header)\n");
00254       skipped = TRUE;
00255     } else {
00256       skipped = TRUE;
00257     }
00258     if (!skipped) {
00259       j_printerr("%s=%s\n", d, a);
00260     }
00261   }
00262 
00263   para->smp_period = srate;
00264   para->smp_freq = period2freq(para->smp_period);
00265   para->frameshift /= srate;
00266   para->framesize /= srate;
00267 
00268   if (fclose(fp) == -1) {
00269     j_printerr("Error: rdhtkconf: jconf file cannot close\n");
00270   }
00271 
00272   para->loaded = 1;
00273 
00274   return TRUE;
00275 }
00276 
00277 
00285 void
00286 calc_para_from_header(Value *para, short param_type, short vec_size)
00287 {
00288   int dim;
00289 
00290   /* decode required parameter extraction types */
00291   para->delta = (param_type & F_DELTA) ? TRUE : FALSE;
00292   para->acc = (param_type & F_ACCL) ? TRUE : FALSE;
00293   para->energy = (param_type & F_ENERGY) ? TRUE : FALSE;
00294   para->c0 = (param_type & F_ZEROTH) ? TRUE : FALSE;
00295   para->absesup = (param_type & F_ENERGY_SUP) ? TRUE : FALSE;
00296   para->cmn = (param_type & F_CEPNORM) ? TRUE : FALSE;
00297   /* guess MFCC dimension from the vector size and parameter type in the
00298      acoustic HMM */
00299   dim = vec_size;
00300   if (para->absesup) dim++;
00301   dim /= 1 + (para->delta ? 1 : 0) + (para->acc ? 1 : 0);
00302   if (para->energy) dim--;
00303   if (para->c0) dim--;
00304   para->mfcc_dim = dim;
00305     
00306   /* determine base size */
00307   para->baselen = para->mfcc_dim + (para->c0 ? 1 : 0) + (para->energy ? 1 : 0);
00308   /* set required size of parameter vector for MFCC computation */
00309   para->vecbuflen = para->baselen * (1 + (para->delta ? 1 : 0) + (para->acc ? 1 : 0));
00310   /* set size of final parameter vector */
00311   para->veclen = para->vecbuflen - (para->absesup ? 1 : 0);
00312 }
00313 
00314 
00321 void
00322 put_para(Value *para)
00323 {
00324   j_printf("Acoustic analysis condition:\n");
00325   j_printf("\t       parameter = MFCC");
00326   if (para->c0) j_printf("_0");
00327   if (para->energy) j_printf("_E");
00328   if (para->delta) j_printf("_D");
00329   if (para->acc) j_printf("_A");
00330   if (para->absesup) j_printf("_N");
00331   if (para->cmn) j_printf("_Z");
00332   j_printf(" (%d dimension from %d cepstrum)\n", para->veclen, para->mfcc_dim);
00333   j_printf("\tsample frequency = %5d Hz\n", para->smp_freq);
00334   j_printf("\t   sample period = %4d  (100ns unit)\n", para->smp_period);
00335   j_printf("\t     window size = %4d samples (%.1f ms)\n", para->framesize,
00336            (float)para->smp_period * (float)para->framesize / 10000.0);
00337   j_printf("\t     frame shift = %4d samples (%.1f ms)\n", para->frameshift,
00338            (float)para->smp_period * (float)para->frameshift / 10000.0);
00339   j_printf("\t    pre-emphasis = %.2f\n", para->preEmph);
00340   j_printf("\t    # filterbank = %d\n", para->fbank_num);
00341   j_printf("\t   cepst. lifter = %d\n", para->lifter);
00342   j_printf("\t      raw energy = %s\n", para->raw_e ? "True" : "False");
00343   if (para->enormal) {
00344     j_printf("\tenergy normalize = True (scale = %.1f, silence floor = %.1f dB)\n", para->escale, para->silFloor);
00345   } else {
00346     j_printf("\tenergy normalize = False\n");
00347   }
00348   if (para->delta) {
00349     j_printf("\t    delta window = %d frames (%.1f ms) around\n", para->delWin,  (float)para->delWin * (float)para->smp_period * (float)para->frameshift / 10000.0);
00350   }
00351   if (para->acc) {
00352     j_printf("\t      acc window = %d frames (%.1f ms) around\n", para->accWin, (float)para->accWin * (float)para->smp_period * (float)para->frameshift / 10000.0);
00353   }
00354   j_printf("\t        hi freq. = ");
00355   if (para->hipass < 0) j_printf("OFF\n"); 
00356   else j_printf("%5d Hz\n", para->hipass);
00357   j_printf("\t        lo freq. = ");
00358   if (para->lopass < 0) j_printf("OFF\n"); 
00359   else j_printf("%5d Hz\n", para->lopass);
00360   j_printf("\t zero mean frame = ");
00361   if (para->zmeanframe) j_printf("ON\n");
00362   else j_printf("OFF\n");
00363 }

Julianに対してTue Dec 26 12:56:19 2006に生成されました。  doxygen 1.5.0