00001
00022
00023
00024
00025
00026
00027
00028
00029 #include <sent/mfcc.h>
00030 #include <sent/speech.h>
00031
00038 void
00039 undef_para(Value *para)
00040 {
00041 para->smp_period = -1;
00042 para->smp_freq = -1;
00043 para->framesize = -1;
00044 para->frameshift = -1;
00045 para->preEmph = -1;
00046 para->mfcc_dim = -1;
00047 para->lifter = -1;
00048 para->fbank_num = -1;
00049 para->delWin = -1;
00050 para->accWin = -1;
00051 para->silFloor = -1;
00052 para->escale = -1;
00053 para->enormal = -1;
00054 para->hipass = -2;
00055 para->lopass = -2;
00056 para->cmn = -1;
00057 para->raw_e = -1;
00058 para->c0 = -1;
00059
00060
00061 para->zmeanframe = -1;
00062 para->delta = -1;
00063 para->acc = -1;
00064 para->energy = -1;
00065 para->absesup = -1;
00066 para->baselen = -1;
00067 para->vecbuflen = -1;
00068 para->veclen = -1;
00069
00070 para->loaded = 0;
00071 }
00072
00079 void
00080 make_default_para(Value *para)
00081 {
00082 para->smp_period = 625;
00083 para->smp_freq = 16000;
00084 para->framesize = DEF_FRAMESIZE;
00085 para->frameshift = DEF_FRAMESHIFT;
00086 para->preEmph = DEF_PREENPH;
00087 para->fbank_num = DEF_FBANK;
00088 para->lifter = DEF_CEPLIF;
00089 para->delWin = DEF_DELWIN;
00090 para->accWin = DEF_ACCWIN;
00091 para->raw_e = FALSE;
00092 para->enormal = FALSE;
00093 para->escale = DEF_ESCALE;
00094 para->silFloor = DEF_SILFLOOR;
00095 para->hipass = -1;
00096 para->lopass = -1;
00097
00098
00099 para->zmeanframe = FALSE;
00100 }
00101
00109 void
00110 make_default_para_htk(Value *para)
00111 {
00112 para->framesize = 256000.0;
00113 para->preEmph = 0.97;
00114 para->fbank_num = 20;
00115 para->lifter = 22;
00116 para->delWin = 2;
00117 para->accWin = 2;
00118 para->raw_e = TRUE;
00119 para->enormal = TRUE;
00120 para->escale = 0.1;
00121 para->silFloor = 50.0;
00122 para->hipass = -1;
00123 para->lopass = -1;
00124 para->zmeanframe = FALSE;
00125 }
00126
00134 void
00135 apply_para(Value *dst, Value *src)
00136 {
00137 if (dst->smp_period == -1) dst->smp_period = src->smp_period;
00138 if (dst->smp_freq == -1) dst->smp_freq = src->smp_freq;
00139 if (dst->framesize == -1) dst->framesize = src->framesize;
00140 if (dst->frameshift == -1) dst->frameshift = src->frameshift;
00141 if (dst->preEmph == -1) dst->preEmph = src->preEmph;
00142 if (dst->mfcc_dim == -1) dst->mfcc_dim = src->mfcc_dim;
00143 if (dst->lifter == -1) dst->lifter = src->lifter;
00144 if (dst->fbank_num == -1) dst->fbank_num = src->fbank_num;
00145 if (dst->delWin == -1) dst->delWin = src->delWin;
00146 if (dst->accWin == -1) dst->accWin = src->accWin;
00147 if (dst->silFloor == -1) dst->silFloor = src->silFloor;
00148 if (dst->escale == -1) dst->escale = src->escale;
00149 if (dst->enormal == -1) dst->enormal = src->enormal;
00150 if (dst->hipass == -2) dst->hipass = src->hipass;
00151 if (dst->lopass == -2) dst->lopass = src->lopass;
00152 if (dst->cmn == -1) dst->cmn = src->cmn;
00153 if (dst->raw_e == -1) dst->raw_e = src->raw_e;
00154 if (dst->c0 == -1) dst->c0 = src->c0;
00155
00156
00157 if (dst->zmeanframe == -1) dst->zmeanframe = src->zmeanframe;
00158 if (dst->delta == -1) dst->delta = src->delta;
00159 if (dst->acc == -1) dst->acc = src->acc;
00160 if (dst->energy == -1) dst->energy = src->energy;
00161 if (dst->absesup == -1) dst->absesup = src->absesup;
00162 if (dst->baselen == -1) dst->baselen = src->baselen;
00163 if (dst->vecbuflen == -1) dst->vecbuflen = src->vecbuflen;
00164 if (dst->veclen == -1) dst->veclen = src->veclen;
00165 }
00166
00167 #define ISTOKEN(A) (A == ' ' || A == '\t' || A == '\n')
00168
00169
00177 boolean
00178 htk_config_file_parse(char *HTKconffile, Value *para)
00179 {
00180 FILE *fp;
00181 char buf[512];
00182 char *p, *d, *a;
00183 float srate;
00184 boolean skipped;
00185
00186 jlog("Stat: para: parsing HTK Config file: %s\n", HTKconffile);
00187
00188
00189
00190 if ((fp = fopen(HTKconffile, "r")) == NULL) {
00191 jlog("Error: para: failed to open HTK Config file: %s\n", HTKconffile);
00192 return FALSE;
00193 }
00194
00195 while (getl_fp(buf, 512, fp) != NULL) {
00196 p = buf;
00197 if (*p == 35) {
00198 continue;
00199 }
00200
00201
00202 while (*p != '\0' && ISTOKEN(*p)) p++;
00203 if (*p == '\0') continue;
00204 d = p;
00205 while (*p != '\0' && (!ISTOKEN(*p)) && *p != '=') p++;
00206 if (*p == '\0') continue;
00207 *p = '\0'; p++;
00208 while (*p != '\0' && ((ISTOKEN(*p)) || *p == '=')) p++;
00209 if (*p == '\0') continue;
00210 a = p;
00211 while (*p != '\0' && (!ISTOKEN(*p))) p++;
00212 *p = '\0';
00213
00214
00215 skipped = FALSE;
00216 if (strmatch(d, "SOURCERATE")) {
00217 srate = atof(a);
00218 } else if (strmatch(d, "TARGETRATE")) {
00219 para->frameshift = atof(a);
00220 } else if (strmatch(d, "WINDOWSIZE")) {
00221 para->framesize = atof(a);
00222 } else if (strmatch(d, "ZMEANSOURCE")) {
00223 para->zmeanframe = (a[0] == 'T') ? TRUE : FALSE;
00224 } else if (strmatch(d, "PREEMCOEF")) {
00225 para->preEmph = atof(a);
00226 } else if (strmatch(d, "USEHAMMING")) {
00227 if (a[0] != 'T') {
00228 jlog("Error: para: USEHAMMING should be T\n", HTKconffile);
00229 return FALSE;
00230 }
00231 } else if (strmatch(d, "NUMCHANS")) {
00232 para->fbank_num = atoi(a);
00233 } else if (strmatch(d, "CEPLIFTER")) {
00234 para->lifter = atoi(a);
00235 } else if (strmatch(d, "DELTAWINDOW")) {
00236 para->delWin = atoi(a);
00237 } else if (strmatch(d, "ACCWINDOW")) {
00238 para->accWin = atoi(a);
00239 } else if (strmatch(d, "LOFREQ")) {
00240 para->lopass = atof(a);
00241 } else if (strmatch(d, "HIFREQ")) {
00242 para->hipass = atof(a);
00243 } else if (strmatch(d, "RAWENERGY")) {
00244 para->raw_e = (a[0] == 'T') ? TRUE : FALSE;
00245 } else if (strmatch(d, "ENORMALISE")) {
00246 para->enormal = (a[0] == 'T') ? TRUE : FALSE;
00247 } else if (strmatch(d, "ESCALE")) {
00248 para->escale = atof(a);
00249 } else if (strmatch(d, "SILFLOOR")) {
00250 para->silFloor = atof(a);
00251 } else if (strmatch(d, "TARGETKIND")) {
00252 jlog("Stat: para: TARGETKIND skipped (will be set from AM header)\n");
00253 skipped = TRUE;
00254 } else if (strmatch(d, "NUMCEPS")) {
00255 jlog("Stat: para: NUMCEPS skipped (will be set from AM header)\n");
00256 skipped = TRUE;
00257 } else {
00258 skipped = TRUE;
00259 }
00260 if (!skipped) {
00261 jlog("Stat: para: %s=%s\n", d, a);
00262 }
00263 }
00264
00265 para->smp_period = srate;
00266 para->smp_freq = period2freq(para->smp_period);
00267 para->frameshift /= srate;
00268 para->framesize /= srate;
00269
00270 if (fclose(fp) == -1) {
00271 jlog("Error: para: failed to close file\n");
00272 return FALSE;
00273 }
00274
00275 para->loaded = 1;
00276
00277 return TRUE;
00278 }
00279
00287 void
00288 calc_para_from_header(Value *para, short param_type, short vec_size)
00289 {
00290 int dim;
00291
00292
00293 para->delta = (param_type & F_DELTA) ? TRUE : FALSE;
00294 para->acc = (param_type & F_ACCL) ? TRUE : FALSE;
00295 para->energy = (param_type & F_ENERGY) ? TRUE : FALSE;
00296 para->c0 = (param_type & F_ZEROTH) ? TRUE : FALSE;
00297 para->absesup = (param_type & F_ENERGY_SUP) ? TRUE : FALSE;
00298 para->cmn = (param_type & F_CEPNORM) ? TRUE : FALSE;
00299
00300
00301 dim = vec_size;
00302 if (para->absesup) dim++;
00303 dim /= 1 + (para->delta ? 1 : 0) + (para->acc ? 1 : 0);
00304 if (para->energy) dim--;
00305 if (para->c0) dim--;
00306 para->mfcc_dim = dim;
00307
00308
00309 para->baselen = para->mfcc_dim + (para->c0 ? 1 : 0) + (para->energy ? 1 : 0);
00310
00311 para->vecbuflen = para->baselen * (1 + (para->delta ? 1 : 0) + (para->acc ? 1 : 0));
00312
00313 para->veclen = para->vecbuflen - (para->absesup ? 1 : 0);
00314 }
00315
00323 void
00324 put_para(FILE *fp, Value *para)
00325 {
00326 fprintf(fp, " Acoustic analysis condition:\n");
00327 fprintf(fp, "\t parameter = MFCC");
00328 if (para->c0) fprintf(fp, "_0");
00329 if (para->energy) fprintf(fp, "_E");
00330 if (para->delta) fprintf(fp, "_D");
00331 if (para->acc) fprintf(fp, "_A");
00332 if (para->absesup) fprintf(fp, "_N");
00333 if (para->cmn) fprintf(fp, "_Z");
00334 fprintf(fp, " (%d dimension from %d cepstrum)\n", para->veclen, para->mfcc_dim);
00335 fprintf(fp, "\tsample frequency = %5ld Hz\n", para->smp_freq);
00336 fprintf(fp, "\t sample period = %4ld (1 = 100ns)\n", para->smp_period);
00337 fprintf(fp, "\t window size = %4d samples (%.1f ms)\n", para->framesize,
00338 (float)para->smp_period * (float)para->framesize / 10000.0);
00339 fprintf(fp, "\t frame shift = %4d samples (%.1f ms)\n", para->frameshift,
00340 (float)para->smp_period * (float)para->frameshift / 10000.0);
00341 fprintf(fp, "\t pre-emphasis = %.2f\n", para->preEmph);
00342 fprintf(fp, "\t # filterbank = %d\n", para->fbank_num);
00343 fprintf(fp, "\t cepst. lifter = %d\n", para->lifter);
00344 fprintf(fp, "\t raw energy = %s\n", para->raw_e ? "True" : "False");
00345 if (para->enormal) {
00346 fprintf(fp, "\tenergy normalize = True (scale = %.1f, silence floor = %.1f dB)\n", para->escale, para->silFloor);
00347 } else {
00348 fprintf(fp, "\tenergy normalize = False\n");
00349 }
00350 if (para->delta) {
00351 fprintf(fp, "\t delta window = %d frames (%.1f ms) around\n", para->delWin, (float)para->delWin * (float)para->smp_period * (float)para->frameshift / 10000.0);
00352 }
00353 if (para->acc) {
00354 fprintf(fp, "\t acc window = %d frames (%.1f ms) around\n", para->accWin, (float)para->accWin * (float)para->smp_period * (float)para->frameshift / 10000.0);
00355 }
00356 fprintf(fp, "\t hi freq cut = ");
00357 if (para->hipass < 0) fprintf(fp, "OFF\n");
00358 else fprintf(fp, "%5d Hz\n", para->hipass);
00359 fprintf(fp, "\t lo freq cut = ");
00360 if (para->lopass < 0) fprintf(fp, "OFF\n");
00361 else fprintf(fp, "%5d Hz\n", para->lopass);
00362 fprintf(fp, "\t zero mean frame = ");
00363 if (para->zmeanframe) fprintf(fp, "ON\n");
00364 else fprintf(fp, "OFF\n");
00365 }