00001
00022
00023
00024
00025
00026
00027
00028
00029 #include <sent/mfcc.h>
00030 #include <sent/speech.h>
00031
00038 void
00039 undef_para(Value *para)
00040 {
00041 para->smp_period = -1;
00042 para->smp_freq = -1;
00043 para->framesize = -1;
00044 para->frameshift = -1;
00045 para->preEmph = -1;
00046 para->mfcc_dim = -1;
00047 para->lifter = -1;
00048 para->fbank_num = -1;
00049 para->delWin = -1;
00050 para->accWin = -1;
00051 para->silFloor = -1;
00052 para->escale = -1;
00053 para->enormal = -1;
00054 para->hipass = -2;
00055 para->lopass = -2;
00056 para->cmn = -1;
00057 para->cvn = -1;
00058 para->raw_e = -1;
00059 para->c0 = -1;
00060
00061
00062 para->vtln_alpha = -1;
00063 para->vtln_upper = -1;
00064 para->vtln_lower = -1;
00065 para->zmeanframe = -1;
00066 para->usepower = -1;
00067 para->delta = -1;
00068 para->acc = -1;
00069 para->energy = -1;
00070 para->absesup = -1;
00071 para->baselen = -1;
00072 para->vecbuflen = -1;
00073 para->veclen = -1;
00074
00075 para->loaded = 0;
00076 }
00077
00084 void
00085 make_default_para(Value *para)
00086 {
00087 para->smp_period = 625;
00088 para->smp_freq = 16000;
00089 para->framesize = DEF_FRAMESIZE;
00090 para->frameshift = DEF_FRAMESHIFT;
00091 para->preEmph = DEF_PREENPH;
00092 para->fbank_num = DEF_FBANK;
00093 para->lifter = DEF_CEPLIF;
00094 para->delWin = DEF_DELWIN;
00095 para->accWin = DEF_ACCWIN;
00096 para->raw_e = FALSE;
00097 para->enormal = FALSE;
00098 para->escale = DEF_ESCALE;
00099 para->silFloor = DEF_SILFLOOR;
00100 para->cvn = 0;
00101 para->hipass = -1;
00102 para->lopass = -1;
00103
00104
00105 para->vtln_alpha = 1.0;
00106 para->zmeanframe = FALSE;
00107 para->usepower = FALSE;
00108 }
00109
00117 void
00118 make_default_para_htk(Value *para)
00119 {
00120 para->framesize = 256000.0;
00121 para->preEmph = 0.97;
00122 para->fbank_num = 20;
00123 para->lifter = 22;
00124 para->delWin = 2;
00125 para->accWin = 2;
00126 para->raw_e = TRUE;
00127 para->enormal = TRUE;
00128 para->escale = 0.1;
00129 para->silFloor = 50.0;
00130 para->hipass = -1;
00131 para->lopass = -1;
00132 para->vtln_alpha = 1.0;
00133 para->zmeanframe = FALSE;
00134 para->usepower = FALSE;
00135 }
00136
00144 void
00145 apply_para(Value *dst, Value *src)
00146 {
00147 if (dst->smp_period == -1) dst->smp_period = src->smp_period;
00148 if (dst->smp_freq == -1) dst->smp_freq = src->smp_freq;
00149 if (dst->framesize == -1) dst->framesize = src->framesize;
00150 if (dst->frameshift == -1) dst->frameshift = src->frameshift;
00151 if (dst->preEmph == -1) dst->preEmph = src->preEmph;
00152 if (dst->mfcc_dim == -1) dst->mfcc_dim = src->mfcc_dim;
00153 if (dst->lifter == -1) dst->lifter = src->lifter;
00154 if (dst->fbank_num == -1) dst->fbank_num = src->fbank_num;
00155 if (dst->delWin == -1) dst->delWin = src->delWin;
00156 if (dst->accWin == -1) dst->accWin = src->accWin;
00157 if (dst->silFloor == -1) dst->silFloor = src->silFloor;
00158 if (dst->escale == -1) dst->escale = src->escale;
00159 if (dst->enormal == -1) dst->enormal = src->enormal;
00160 if (dst->hipass == -2) dst->hipass = src->hipass;
00161 if (dst->lopass == -2) dst->lopass = src->lopass;
00162 if (dst->cmn == -1) dst->cmn = src->cmn;
00163 if (dst->cvn == -1) dst->cvn = src->cvn;
00164 if (dst->raw_e == -1) dst->raw_e = src->raw_e;
00165 if (dst->c0 == -1) dst->c0 = src->c0;
00166
00167
00168 if (dst->vtln_alpha == -1) dst->vtln_alpha = src->vtln_alpha;
00169 if (dst->vtln_upper == -1) dst->vtln_upper = src->vtln_upper;
00170 if (dst->vtln_lower == -1) dst->vtln_lower = src->vtln_lower;
00171 if (dst->zmeanframe == -1) dst->zmeanframe = src->zmeanframe;
00172 if (dst->usepower == -1) dst->usepower = src->usepower;
00173 if (dst->delta == -1) dst->delta = src->delta;
00174 if (dst->acc == -1) dst->acc = src->acc;
00175 if (dst->energy == -1) dst->energy = src->energy;
00176 if (dst->absesup == -1) dst->absesup = src->absesup;
00177 if (dst->baselen == -1) dst->baselen = src->baselen;
00178 if (dst->vecbuflen == -1) dst->vecbuflen = src->vecbuflen;
00179 if (dst->veclen == -1) dst->veclen = src->veclen;
00180 }
00181
00182 #define ISTOKEN(A) (A == ' ' || A == '\t' || A == '\n')
00183
00184
00192 boolean
00193 htk_config_file_parse(char *HTKconffile, Value *para)
00194 {
00195 FILE *fp;
00196 char buf[512];
00197 char *p, *d, *a;
00198 float srate;
00199 boolean skipped;
00200
00201 jlog("Stat: para: parsing HTK Config file: %s\n", HTKconffile);
00202
00203
00204
00205 if ((fp = fopen(HTKconffile, "r")) == NULL) {
00206 jlog("Error: para: failed to open HTK Config file: %s\n", HTKconffile);
00207 return FALSE;
00208 }
00209
00210 srate = 0.0;
00211
00212 while (getl_fp(buf, 512, fp) != NULL) {
00213 p = buf;
00214 if (*p == 35) {
00215 continue;
00216 }
00217
00218
00219 while (*p != '\0' && ISTOKEN(*p)) p++;
00220 if (*p == '\0') continue;
00221 d = p;
00222 while (*p != '\0' && (!ISTOKEN(*p)) && *p != '=') p++;
00223 if (*p == '\0') continue;
00224 *p = '\0'; p++;
00225 while (*p != '\0' && ((ISTOKEN(*p)) || *p == '=')) p++;
00226 if (*p == '\0') continue;
00227 a = p;
00228 while (*p != '\0' && (!ISTOKEN(*p))) p++;
00229 *p = '\0';
00230
00231
00232 skipped = FALSE;
00233 if (strmatch(d, "SOURCERATE")) {
00234 srate = atof(a);
00235 } else if (strmatch(d, "TARGETRATE")) {
00236 para->frameshift = atof(a);
00237 } else if (strmatch(d, "WINDOWSIZE")) {
00238 para->framesize = atof(a);
00239 } else if (strmatch(d, "ZMEANSOURCE")) {
00240 para->zmeanframe = (a[0] == 'T') ? TRUE : FALSE;
00241 } else if (strmatch(d, "USEPOWER")) {
00242 para->zmeanframe = (a[0] == 'T') ? TRUE : FALSE;
00243 } else if (strmatch(d, "PREEMCOEF")) {
00244 para->preEmph = atof(a);
00245 } else if (strmatch(d, "USEHAMMING")) {
00246 if (a[0] != 'T') {
00247 jlog("Error: para: USEHAMMING should be T\n", HTKconffile);
00248 return FALSE;
00249 }
00250 } else if (strmatch(d, "NUMCHANS")) {
00251 para->fbank_num = atoi(a);
00252 } else if (strmatch(d, "CEPLIFTER")) {
00253 para->lifter = atoi(a);
00254 } else if (strmatch(d, "DELTAWINDOW")) {
00255 para->delWin = atoi(a);
00256 } else if (strmatch(d, "ACCWINDOW")) {
00257 para->accWin = atoi(a);
00258 } else if (strmatch(d, "LOFREQ")) {
00259 para->lopass = atof(a);
00260 } else if (strmatch(d, "HIFREQ")) {
00261 para->hipass = atof(a);
00262 } else if (strmatch(d, "RAWENERGY")) {
00263 para->raw_e = (a[0] == 'T') ? TRUE : FALSE;
00264 } else if (strmatch(d, "ENORMALISE")) {
00265 para->enormal = (a[0] == 'T') ? TRUE : FALSE;
00266 } else if (strmatch(d, "ESCALE")) {
00267 para->escale = atof(a);
00268 } else if (strmatch(d, "SILFLOOR")) {
00269 para->silFloor = atof(a);
00270 } else if (strmatch(d, "WARPFREQ")) {
00271 para->vtln_alpha = atof(a);
00272 } else if (strmatch(d, "WARPLCUTOFF")) {
00273 para->vtln_lower = atof(a);
00274 } else if (strmatch(d, "WARPUCUTOFF")) {
00275 para->vtln_upper = atof(a);
00276 } else if (strmatch(d, "TARGETKIND")) {
00277 jlog("Warning: para: TARGETKIND skipped (will be determined by AM header)\n");
00278 skipped = TRUE;
00279 } else if (strmatch(d, "NUMCEPS")) {
00280 jlog("Warning: para: NUMCEPS skipped (will be determined by AM header)\n");
00281 skipped = TRUE;
00282 } else {
00283 jlog("Warning: para: \"%s\" ignored (not supported, or irrelevant)\n", d);
00284 skipped = TRUE;
00285 }
00286 if (!skipped) {
00287 jlog("Stat: para: %s=%s\n", d, a);
00288 }
00289 }
00290
00291 if (srate == 0.0) {
00292 jlog("Warning: no SOURCERATE found\n");
00293 jlog("Warning: assume source waveform sample rate to 625 (16kHz)\n");
00294 srate = 625;
00295 }
00296
00297 para->smp_period = srate;
00298 para->smp_freq = period2freq(para->smp_period);
00299 para->frameshift /= srate;
00300 para->framesize /= srate;
00301
00302 if (fclose(fp) == -1) {
00303 jlog("Error: para: failed to close file\n");
00304 return FALSE;
00305 }
00306
00307 para->loaded = 1;
00308
00309 return TRUE;
00310 }
00311
00319 void
00320 calc_para_from_header(Value *para, short param_type, short vec_size)
00321 {
00322 int dim;
00323
00324
00325 para->delta = (param_type & F_DELTA) ? TRUE : FALSE;
00326 para->acc = (param_type & F_ACCL) ? TRUE : FALSE;
00327 para->energy = (param_type & F_ENERGY) ? TRUE : FALSE;
00328 para->c0 = (param_type & F_ZEROTH) ? TRUE : FALSE;
00329 para->absesup = (param_type & F_ENERGY_SUP) ? TRUE : FALSE;
00330 para->cmn = (param_type & F_CEPNORM) ? TRUE : FALSE;
00331
00332
00333 dim = vec_size;
00334 if (para->absesup) dim++;
00335 dim /= 1 + (para->delta ? 1 : 0) + (para->acc ? 1 : 0);
00336 if (para->energy) dim--;
00337 if (para->c0) dim--;
00338 para->mfcc_dim = dim;
00339
00340
00341 para->baselen = para->mfcc_dim + (para->c0 ? 1 : 0) + (para->energy ? 1 : 0);
00342
00343 para->vecbuflen = para->baselen * (1 + (para->delta ? 1 : 0) + (para->acc ? 1 : 0));
00344
00345 para->veclen = para->vecbuflen - (para->absesup ? 1 : 0);
00346 }
00347
00355 void
00356 put_para(FILE *fp, Value *para)
00357 {
00358 fprintf(fp, " Acoustic analysis condition:\n");
00359 fprintf(fp, "\t parameter = MFCC");
00360 if (para->c0) fprintf(fp, "_0");
00361 if (para->energy) fprintf(fp, "_E");
00362 if (para->delta) fprintf(fp, "_D");
00363 if (para->acc) fprintf(fp, "_A");
00364 if (para->absesup) fprintf(fp, "_N");
00365 if (para->cmn) fprintf(fp, "_Z");
00366 fprintf(fp, " (%d dim. from %d cepstrum", para->veclen, para->mfcc_dim);
00367 if (para->c0) fprintf(fp, " + c0");
00368 if (para->energy) fprintf(fp, " + energy");
00369 if (para->absesup) fprintf(fp, ", abs energy supressed");
00370 if (para->cmn) fprintf(fp, " with CMN");
00371 fprintf(fp, ")\n");
00372 fprintf(fp, "\tsample frequency = %5ld Hz\n", para->smp_freq);
00373 fprintf(fp, "\t sample period = %4ld (1 = 100ns)\n", para->smp_period);
00374 fprintf(fp, "\t window size = %4d samples (%.1f ms)\n", para->framesize,
00375 (float)para->smp_period * (float)para->framesize / 10000.0);
00376 fprintf(fp, "\t frame shift = %4d samples (%.1f ms)\n", para->frameshift,
00377 (float)para->smp_period * (float)para->frameshift / 10000.0);
00378 fprintf(fp, "\t pre-emphasis = %.2f\n", para->preEmph);
00379 fprintf(fp, "\t # filterbank = %d\n", para->fbank_num);
00380 fprintf(fp, "\t cepst. lifter = %d\n", para->lifter);
00381 fprintf(fp, "\t raw energy = %s\n", para->raw_e ? "True" : "False");
00382 if (para->enormal) {
00383 fprintf(fp, "\tenergy normalize = True (scale = %.1f, silence floor = %.1f dB)\n", para->escale, para->silFloor);
00384 } else {
00385 fprintf(fp, "\tenergy normalize = False\n");
00386 }
00387 if (para->delta) {
00388 fprintf(fp, "\t delta window = %d frames (%.1f ms) around\n", para->delWin, (float)para->delWin * (float)para->smp_period * (float)para->frameshift / 10000.0);
00389 }
00390 if (para->acc) {
00391 fprintf(fp, "\t acc window = %d frames (%.1f ms) around\n", para->accWin, (float)para->accWin * (float)para->smp_period * (float)para->frameshift / 10000.0);
00392 }
00393 fprintf(fp, "\t hi freq cut = ");
00394 if (para->hipass < 0) fprintf(fp, "OFF\n");
00395 else fprintf(fp, "%5d Hz\n", para->hipass);
00396 fprintf(fp, "\t lo freq cut = ");
00397 if (para->lopass < 0) fprintf(fp, "OFF\n");
00398 else fprintf(fp, "%5d Hz\n", para->lopass);
00399 fprintf(fp, "\t zero mean frame = ");
00400 if (para->zmeanframe) fprintf(fp, "ON\n");
00401 else fprintf(fp, "OFF\n");
00402 fprintf(fp, "\t use power = ");
00403 if (para->usepower) fprintf(fp, "ON\n");
00404 else fprintf(fp, "OFF\n");
00405 fprintf(fp, "\t CVN = ");
00406 if (para->cvn) fprintf(fp, "ON\n");
00407 else fprintf(fp, "OFF\n");
00408 fprintf(fp, "\t VTLN = ");
00409 if(para->vtln_alpha != 1.0) {
00410 fprintf(fp, "ON, alpha=%.3f, f_low=%.1f, f_high=%.1f\n", para->vtln_alpha, para->vtln_lower, para->vtln_upper);
00411 } else fprintf(fp, "OFF\n");
00412 }