00001
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046 #include <sent/stddefs.h>
00047 #include <sent/mfcc.h>
00048 #include <sent/htk_param.h>
00049
00050
00059 DeltaBuf *
00060 WMP_deltabuf_new(int veclen, int windowlen)
00061 {
00062 int i;
00063 DeltaBuf *db;
00064
00065 db = (DeltaBuf *)mymalloc(sizeof(DeltaBuf));
00066 db->veclen = veclen;
00067 db->win = windowlen;
00068 db->len = windowlen * 2 + 1;
00069 db->mfcc = (float **)mymalloc(sizeof(float *) * db->len);
00070 db->is_on = (boolean *) mymalloc(sizeof(boolean) * db->len);
00071 for (i=0;i<db->len;i++) {
00072 db->mfcc[i] = (float *)mymalloc(sizeof(float) * veclen * 2);
00073 }
00074 db->B = 0;
00075 for(i = 1; i <= windowlen; i++) db->B += i * i;
00076 db->B *= 2;
00077
00078 return (db);
00079 }
00080
00086 void
00087 WMP_deltabuf_free(DeltaBuf *db)
00088 {
00089 int i;
00090
00091 for (i=0;i<db->len;i++) {
00092 free(db->mfcc[i]);
00093 }
00094 free(db->is_on);
00095 free(db->mfcc);
00096 free(db);
00097 }
00098
00104 void
00105 WMP_deltabuf_prepare(DeltaBuf *db)
00106 {
00107 int i;
00108 db->store = 0;
00109 for (i=0;i<db->len;i++) {
00110 db->is_on[i] = FALSE;
00111 }
00112 }
00113
00120 static void
00121 WMP_deltabuf_calc(DeltaBuf *db, int cur)
00122 {
00123 int n, theta, p;
00124 float A1, A2, sum;
00125 int last_valid_left, last_valid_right;
00126
00127 for (n = 0; n < db->veclen; n++) {
00128 sum = 0.0;
00129 last_valid_left = last_valid_right = cur;
00130 for (theta = 1; theta <= db->win; theta++) {
00131 p = cur - theta;
00132 if (p < 0) p += db->len;
00133 if (db->is_on[p]) {
00134 A1 = db->mfcc[p][n];
00135 last_valid_left = p;
00136 } else {
00137 A1 = db->mfcc[last_valid_left][n];
00138 }
00139 p = cur + theta;
00140 if (p >= db->len) p -= db->len;
00141 if (db->is_on[p]) {
00142 A2 = db->mfcc[p][n];
00143 last_valid_right = p;
00144 } else {
00145 A2 = db->mfcc[last_valid_right][n];
00146 }
00147 sum += theta * (A2 - A1);
00148 }
00149 db->mfcc[cur][db->veclen + n] = sum / db->B;
00150 }
00151 }
00152
00163 boolean
00164 WMP_deltabuf_proceed(DeltaBuf *db, float *new_mfcc)
00165 {
00166 int cur;
00167 boolean ret;
00168
00169
00170 memcpy(db->mfcc[db->store], new_mfcc, sizeof(float) * db->veclen);
00171 db->is_on[db->store] = TRUE;
00172
00173
00174 cur = db->store - db->win;
00175 if (cur < 0) cur += db->len;
00176
00177
00178 if (db->is_on[cur]) {
00179 WMP_deltabuf_calc(db, cur);
00180 db->vec = db->mfcc[cur];
00181 ret = TRUE;
00182 } else {
00183 ret = FALSE;
00184 }
00185
00186
00187 db->store++;
00188 if (db->store >= db->len) db->store -= db->len;
00189
00190
00191 return (ret);
00192 }
00193
00205 boolean
00206 WMP_deltabuf_flush(DeltaBuf *db)
00207 {
00208 int cur;
00209 boolean ret;
00210
00211
00212 db->is_on[db->store] = FALSE;
00213
00214
00215 cur = db->store - db->win;
00216 if (cur < 0) cur += db->len;
00217
00218
00219 if (db->is_on[cur]) {
00220 WMP_deltabuf_calc(db, cur);
00221 db->vec = db->mfcc[cur];
00222 ret = TRUE;
00223 } else {
00224 ret = FALSE;
00225 }
00226
00227
00228 db->store++;
00229 if (db->store >= db->len) db->store -= db->len;
00230
00231
00232 return (ret);
00233 }
00234
00235
00236
00237
00238
00246 CMNWork *
00247 CMN_realtime_new(Value *para, float weight)
00248 {
00249 int i;
00250
00251 CMNWork *c;
00252
00253 c = (CMNWork *)mymalloc(sizeof(CMNWork));
00254
00255 c->cweight = weight;
00256 c->mfcc_dim = para->mfcc_dim + (para->c0 ? 1 : 0);
00257 c->veclen = para->veclen;
00258 c->mean = para->cmn ? TRUE : FALSE;
00259 c->var = para->cvn ? TRUE : FALSE;
00260 c->clist_max = CPSTEP;
00261 c->clist_num = 0;
00262 c->clist = (CMEAN *)mymalloc(sizeof(CMEAN) * c->clist_max);
00263 for(i=0;i<c->clist_max;i++) {
00264 c->clist[i].mfcc_sum = (float *)mymalloc(sizeof(float)*c->veclen);
00265 if (c->var) c->clist[i].mfcc_var = (float *)mymalloc(sizeof(float)*c->veclen);
00266 c->clist[i].framenum = 0;
00267 }
00268 c->now.mfcc_sum = (float *)mymalloc(sizeof(float) * c->veclen);
00269 if (c->var) c->now.mfcc_var = (float *)mymalloc(sizeof(float) * c->veclen);
00270
00271 c->cmean_init = (float *)mymalloc(sizeof(float) * c->veclen);
00272 if (c->var) c->cvar_init = (float *)mymalloc(sizeof(float) * c->veclen);
00273 c->cmean_init_set = FALSE;
00274
00275 return c;
00276 }
00277
00284 void
00285 CMN_realtime_free(CMNWork *c)
00286 {
00287 int i;
00288
00289 free(c->cmean_init);
00290 free(c->now.mfcc_sum);
00291 if (c->var) {
00292 free(c->cvar_init);
00293 free(c->now.mfcc_var);
00294 }
00295 for(i=0;i<c->clist_max;i++) {
00296 if (c->var) free(c->clist[i].mfcc_var);
00297 free(c->clist[i].mfcc_sum);
00298 }
00299 free(c->clist);
00300 free(c);
00301 }
00302
00308 void
00309 CMN_realtime_prepare(CMNWork *c)
00310 {
00311 int d;
00312
00313 for(d=0;d<c->veclen;d++) c->now.mfcc_sum[d] = 0.0;
00314 if (c->var) {
00315 for(d=0;d<c->veclen;d++) c->now.mfcc_var[d] = 0.0;
00316 }
00317 c->now.framenum = 0;
00318 }
00319
00327 void
00328 CMN_realtime(CMNWork *c, float *mfcc)
00329 {
00330 int d;
00331 double x, y;
00332
00333 c->now.framenum++;
00334 if (c->cmean_init_set) {
00335
00336 for(d=0;d<c->veclen;d++) {
00337
00338 c->now.mfcc_sum[d] += mfcc[d];
00339
00340 x = c->now.mfcc_sum[d] + c->cweight * c->cmean_init[d];
00341 y = (double)c->now.framenum + c->cweight;
00342 x /= y;
00343 if (c->var) {
00344
00345 c->now.mfcc_var[d] += (mfcc[d] - x) * (mfcc[d] - x);
00346 }
00347 if (c->mean && d < c->mfcc_dim) {
00348
00349 mfcc[d] -= x;
00350 }
00351 if (c->var) {
00352
00353 x = c->now.mfcc_var[d] + c->cweight * c->cvar_init[d];
00354 y = (double)c->now.framenum + c->cweight;
00355 mfcc[d] /= sqrt(x / y);
00356 }
00357 }
00358 } else {
00359
00360 for(d=0;d<c->veclen;d++) {
00361
00362 c->now.mfcc_sum[d] += mfcc[d];
00363
00364 x = c->now.mfcc_sum[d] / c->now.framenum;
00365 if (c->var) {
00366
00367 c->now.mfcc_var[d] += (mfcc[d] - x) * (mfcc[d] - x);
00368 }
00369 if (c->mean && d < c->mfcc_dim) {
00370
00371 mfcc[d] -= x;
00372 }
00373 #if 0
00374 if (c->var) {
00375
00376 mfcc[d] /= sqrt(c->now.mfcc_var[d] / c->now.framenum);
00377 }
00378 #endif
00379 }
00380 }
00381 }
00382
00388 void
00389 CMN_realtime_update(CMNWork *c, HTK_Param *param)
00390 {
00391 float *tmp, *tmp2;
00392 int i, d;
00393 int frames;
00394
00395
00396
00397 if (c->now.framenum == 0) return;
00398
00399
00400 if (c->var && param != NULL) {
00401 float m, x;
00402 if (param->samplenum != c->now.framenum) {
00403 jlog("InternalError: CMN_realtime_update: param->samplenum != c->now.framenum\n");
00404 } else if (param->veclen != c->veclen) {
00405 jlog("InternalError: CMN_realtime_update: param->veclen != c->veclen\n");
00406 } else {
00407 for(d=0;d<c->veclen;d++) {
00408 m = c->now.mfcc_sum[d] / (float) c->now.framenum;
00409 x = 0;
00410 for(i=0;i<param->samplenum;i++) {
00411 x += (param->parvec[i][d] - m) * (param->parvec[i][d] - m);
00412 }
00413 c->now.mfcc_var[d] = x;
00414 }
00415 }
00416 }
00417
00418
00419 for(d=0;d<c->veclen;d++) c->cmean_init[d] = c->now.mfcc_sum[d];
00420 if (c->var) {
00421 for(d=0;d<c->veclen;d++) c->cvar_init[d] = c->now.mfcc_var[d];
00422 }
00423 frames = c->now.framenum;
00424 for(i=0;i<c->clist_num;i++) {
00425 for(d=0;d<c->veclen;d++) c->cmean_init[d] += c->clist[i].mfcc_sum[d];
00426 if (c->var) {
00427 for(d=0;d<c->veclen;d++) c->cvar_init[d] += c->clist[i].mfcc_var[d];
00428 }
00429 frames += c->clist[i].framenum;
00430 if (frames >= CPMAX) break;
00431 }
00432 for(d=0;d<c->veclen;d++) c->cmean_init[d] /= (float) frames;
00433 if (c->var) {
00434 for(d=0;d<c->veclen;d++) c->cvar_init[d] /= (float) frames;
00435 }
00436
00437 c->cmean_init_set = TRUE;
00438
00439
00440 if (c->clist_num == c->clist_max && frames < CPMAX) {
00441 c->clist_max += CPSTEP;
00442 c->clist = (CMEAN *)myrealloc(c->clist, sizeof(CMEAN) * c->clist_max);
00443 for(i=c->clist_num;i<c->clist_max;i++) {
00444 c->clist[i].mfcc_sum = (float *)mymalloc(sizeof(float)*c->veclen);
00445 if (c->var) c->clist[i].mfcc_var = (float *)mymalloc(sizeof(float)*c->veclen);
00446 c->clist[i].framenum = 0;
00447 }
00448 }
00449
00450
00451 tmp = c->clist[c->clist_max-1].mfcc_sum;
00452 if (c->var) tmp2 = c->clist[c->clist_max-1].mfcc_var;
00453 memmove(&(c->clist[1]), &(c->clist[0]), sizeof(CMEAN) * (c->clist_max - 1));
00454 c->clist[0].mfcc_sum = tmp;
00455 if (c->var) c->clist[0].mfcc_var = tmp2;
00456
00457 memcpy(c->clist[0].mfcc_sum, c->now.mfcc_sum, sizeof(float) * c->veclen);
00458 if (c->var) memcpy(c->clist[0].mfcc_var, c->now.mfcc_var, sizeof(float) * c->veclen);
00459 c->clist[0].framenum = c->now.framenum;
00460
00461 if (c->clist_num < c->clist_max) c->clist_num++;
00462
00463 }
00464
00475 static boolean
00476 myread(void *buf, size_t unitbyte, int unitnum, FILE *fp)
00477 {
00478 if (myfread(buf, unitbyte, unitnum, fp) < (size_t)unitnum) {
00479 return(FALSE);
00480 }
00481 #ifndef WORDS_BIGENDIAN
00482 swap_bytes(buf, unitbyte, unitnum);
00483 #endif
00484 return(TRUE);
00485 }
00486
00497 static boolean
00498 mywrite(void *buf, size_t unitbyte, size_t unitnum, int fd)
00499 {
00500 #ifndef WORDS_BIGENDIAN
00501 swap_bytes(buf, unitbyte, unitnum);
00502 #endif
00503 if (write(fd, buf, unitbyte * unitnum) < unitbyte * unitnum) {
00504 return(FALSE);
00505 }
00506 #ifndef WORDS_BIGENDIAN
00507 swap_bytes(buf, unitbyte, unitnum);
00508 #endif
00509 return(TRUE);
00510 }
00511
00521 boolean
00522 CMN_load_from_file(CMNWork *c, char *filename)
00523 {
00524 FILE *fp;
00525 int veclen;
00526
00527 jlog("Stat: wav2mfcc-pipe: reading initial CMN from file \"%s\"\n", filename);
00528 if ((fp = fopen_readfile(filename)) == NULL) {
00529 jlog("Error: wav2mfcc-pipe: failed to open\n");
00530 return(FALSE);
00531 }
00532
00533 if (myread(&veclen, sizeof(int), 1, fp) == FALSE) {
00534 jlog("Error: wav2mfcc-pipe: failed to read header\n");
00535 fclose_readfile(fp);
00536 return(FALSE);
00537 }
00538
00539 if (veclen != c->veclen) {
00540 jlog("Error: wav2mfcc-pipe: cepstral dimension mismatch\n");
00541 jlog("Error: wav2mfcc-pipe: process = %d, file = %d\n", c->veclen, veclen);
00542 fclose_readfile(fp);
00543 return(FALSE);
00544 }
00545
00546 if (myread(c->cmean_init, sizeof(float), c->veclen, fp) == FALSE) {
00547 jlog("Error: wav2mfcc-pipe: failed to read mean for CMN\n");
00548 fclose_readfile(fp);
00549 return(FALSE);
00550 }
00551 if (c->var) {
00552 if (myread(c->cvar_init, sizeof(float), c->veclen, fp) == FALSE) {
00553 jlog("Error: wav2mfcc-pipe: failed to read variance for CVN\n");
00554 fclose_readfile(fp);
00555 return(FALSE);
00556 }
00557 }
00558
00559 if (fclose_readfile(fp) == -1) {
00560 jlog("Error: wav2mfcc-pipe: failed to close\n");
00561 return(FALSE);
00562 }
00563
00564 c->cmean_init_set = TRUE;
00565 jlog("Stat: wav2mfcc-pipe: read CMN parameter\n");
00566
00567 return(TRUE);
00568 }
00569
00578 boolean
00579 CMN_save_to_file(CMNWork *c, char *filename)
00580 {
00581 int fd;
00582
00583 jlog("Stat: wav2mfcc-pipe: writing current cepstral data to file \"%s\"\n", filename);
00584
00585 if ((fd = creat(filename, 0644)) == -1) {
00586 jlog("Error: wav2mfcc-pipe: failed to open \"%s\" to write current cepstral data\n", filename);
00587 return(FALSE);
00588 }
00589
00590 if (mywrite(&(c->veclen), sizeof(int), 1, fd) == FALSE) {
00591 jlog("Error: wav2mfcc-pipe: cannot write header to \"%s\" as current cepstral data\n", filename);
00592 close(fd);
00593 return(FALSE);
00594 }
00595
00596 if (mywrite(c->cmean_init, sizeof(float), c->veclen, fd) == FALSE) {
00597 jlog("Error: wav2mfcc-pipe: cannot write mean to \"%s\" as current cepstral data\n", filename);
00598 close(fd);
00599 return(FALSE);
00600 }
00601 if (c->var) {
00602 if (mywrite(c->cvar_init, sizeof(float), c->veclen, fd) == FALSE) {
00603 jlog("Error: wav2mfcc-pipe: cannot write variance to \"%s\" as current cepstrum\n", filename);
00604 close(fd);
00605 return(FALSE);
00606 }
00607 }
00608
00609 close(fd);
00610
00611 jlog("Stat: wav2mfcc-pipe: current cepstral data written to \"%s\"\n", filename);
00612
00613 return(TRUE);
00614 }
00615
00616
00617
00618
00619
00620
00628 void
00629 energy_max_init(ENERGYWork *energy)
00630 {
00631 energy->max = 5.0;
00632 }
00633
00641 void
00642 energy_max_prepare(ENERGYWork *energy, Value *para)
00643 {
00644 energy->max_last = energy->max;
00645 energy->min_last = energy->max - (para->silFloor * LOG_TEN) / 10.0;
00646 energy->max = 0.0;
00647 }
00648
00658 LOGPROB
00659 energy_max_normalize(ENERGYWork *energy, LOGPROB f, Value *para)
00660 {
00661 if (energy->max < f) energy->max = f;
00662 if (f < energy->min_last) f = energy->min_last;
00663 return(1.0 - (energy->max_last - f) * para->escale);
00664 }