Julius: libsent/src/wav2mfcc/wav2mfcc-pipe.c ソースファイル

00001 
00026 /*
00027  * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University
00028  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00029  * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
00030  * All rights reserved
00031  */
00032 
00033 /* wav2mfcc-pipe.c --- split Wav2MFCC to perform per-frame-basis,
00034    and also realtime CMN for 1st-pass pipe-lining */
00035 
00036 /************************************************************************/
00037 /*    wav2mfcc.c   Convert Speech file to MFCC_E_D_(Z) file             */
00038 /*----------------------------------------------------------------------*/
00039 /*    Author    : Yuichiro Nakano                                       */
00040 /*                                                                      */
00041 /*    Copyright(C) Yuichiro Nakano 1996-1998                            */
00042 /*----------------------------------------------------------------------*/
00043 /************************************************************************/
00044 
00045 
00046 #include <sent/stddefs.h>
00047 #include <sent/mfcc.h>
00048 #include <sent/htk_param.h>
00049 
00050 /***********************************************************************/
00059 DeltaBuf *
00060 WMP_deltabuf_new(int veclen, int windowlen)
00061 {
00062   int i;
00063   DeltaBuf *db;
00064 
00065   db = (DeltaBuf *)mymalloc(sizeof(DeltaBuf));
00066   db->veclen = veclen;
00067   db->win = windowlen;
00068   db->len = windowlen * 2 + 1;
00069   db->mfcc = (float **)mymalloc(sizeof(float *) * db->len);
00070   db->is_on = (boolean *) mymalloc(sizeof(boolean) * db->len);
00071   for (i=0;i<db->len;i++) {
00072     db->mfcc[i] = (float *)mymalloc(sizeof(float) * veclen * 2);
00073   }
00074   db->B = 0;
00075   for(i = 1; i <= windowlen; i++) db->B += i * i;
00076   db->B *= 2;
00077 
00078   return (db);
00079 }
00080 
00086 void
00087 WMP_deltabuf_free(DeltaBuf *db)
00088 {
00089   int i;
00090 
00091   for (i=0;i<db->len;i++) {
00092     free(db->mfcc[i]);
00093   }
00094   free(db->is_on);
00095   free(db->mfcc);
00096   free(db);
00097 }
00098 
00104 void
00105 WMP_deltabuf_prepare(DeltaBuf *db)
00106 {
00107   int i;
00108   db->store = 0;
00109   for (i=0;i<db->len;i++) {
00110     db->is_on[i] = FALSE;
00111   }
00112 }
00113 
00120 static void
00121 WMP_deltabuf_calc(DeltaBuf *db, int cur)
00122 {
00123   int n, theta, p;
00124   float A1, A2, sum;
00125   int last_valid_left, last_valid_right;
00126   
00127   for (n = 0; n < db->veclen; n++) {
00128     sum = 0.0;
00129     last_valid_left = last_valid_right = cur;
00130     for (theta = 1; theta <= db->win; theta++) {
00131       p = cur - theta;
00132       if (p < 0) p += db->len;
00133       if (db->is_on[p]) {
00134         A1 = db->mfcc[p][n];
00135         last_valid_left = p;
00136       } else {
00137         A1 = db->mfcc[last_valid_left][n];
00138       }
00139       p = cur + theta;
00140       if (p >= db->len) p -= db->len;
00141       if (db->is_on[p]) {
00142         A2 = db->mfcc[p][n];
00143         last_valid_right = p;
00144       } else {
00145         A2 = db->mfcc[last_valid_right][n];
00146       }
00147       sum += theta * (A2 - A1);
00148     }
00149     db->mfcc[cur][db->veclen + n] = sum / db->B;
00150   }
00151 }
00152 
00163 boolean
00164 WMP_deltabuf_proceed(DeltaBuf *db, float *new_mfcc) 
00165 {
00166   int cur;
00167   boolean ret;
00168 
00169   /* copy data to store point */
00170   memcpy(db->mfcc[db->store], new_mfcc, sizeof(float) * db->veclen);
00171   db->is_on[db->store] = TRUE;
00172 
00173   /* get current calculation point */
00174   cur = db->store - db->win;
00175   if (cur < 0) cur += db->len;
00176 
00177   /* if the current point is fulfilled, compute delta  */
00178   if (db->is_on[cur]) {
00179     WMP_deltabuf_calc(db, cur);
00180     db->vec = db->mfcc[cur];
00181     ret = TRUE;
00182   } else {
00183     ret = FALSE;
00184   }
00185 
00186   /* move store pointer to next */
00187   db->store++;
00188   if (db->store >= db->len) db->store -= db->len;
00189 
00190   /* return TRUE if delta computed for current, or -1 if not calculated yet */
00191   return (ret);
00192 }
00193 
00205 boolean
00206 WMP_deltabuf_flush(DeltaBuf *db) 
00207 {
00208   int cur;
00209   boolean ret;
00210 
00211   /* clear store point */
00212   db->is_on[db->store] = FALSE;
00213 
00214   /* get current calculation point */
00215   cur = db->store - db->win;
00216   if (cur < 0) cur += db->len;
00217 
00218   /* if the current point if fulfilled, compute delta  */
00219   if (db->is_on[cur]) {
00220     WMP_deltabuf_calc(db, cur);
00221     db->vec = db->mfcc[cur];
00222     ret = TRUE;
00223   } else {
00224     ret = FALSE;
00225   }
00226 
00227   /* move store pointer to next */
00228   db->store++;
00229   if (db->store >= db->len) db->store -= db->len;
00230 
00231   /* return TRUE if delta computed for current, or -1 if not calculated yet */
00232   return (ret);
00233 }
00234 
00235 /***********************************************************************/
00236 /* MAP-CMN */
00237 /***********************************************************************/
00238 
00246 CMNWork *
00247 CMN_realtime_new(Value *para, float weight)
00248 {
00249   int i;
00250 
00251   CMNWork *c;
00252 
00253   c = (CMNWork *)mymalloc(sizeof(CMNWork));
00254 
00255   c->cweight = weight;
00256   c->mfcc_dim = para->mfcc_dim + (para->c0 ? 1 : 0);
00257   c->veclen = para->veclen;
00258   c->mean = para->cmn ? TRUE : FALSE;
00259   c->var = para->cvn ? TRUE : FALSE;
00260   c->clist_max = CPSTEP;
00261   c->clist_num = 0;
00262   c->clist = (CMEAN *)mymalloc(sizeof(CMEAN) * c->clist_max);
00263   for(i=0;i<c->clist_max;i++) {
00264     c->clist[i].mfcc_sum = (float *)mymalloc(sizeof(float)*c->veclen);
00265     if (c->var) c->clist[i].mfcc_var = (float *)mymalloc(sizeof(float)*c->veclen);
00266     c->clist[i].framenum = 0;
00267   }
00268   c->now.mfcc_sum = (float *)mymalloc(sizeof(float) * c->veclen);
00269   if (c->var) c->now.mfcc_var = (float *)mymalloc(sizeof(float) * c->veclen);
00270 
00271   c->cmean_init = (float *)mymalloc(sizeof(float) * c->veclen);
00272   if (c->var) c->cvar_init = (float *)mymalloc(sizeof(float) * c->veclen);
00273   c->cmean_init_set = FALSE;
00274 
00275   return c;
00276 }
00277 
00284 void
00285 CMN_realtime_free(CMNWork *c)
00286 {
00287   int i;
00288 
00289   free(c->cmean_init);
00290   free(c->now.mfcc_sum);
00291   if (c->var) {
00292     free(c->cvar_init);
00293     free(c->now.mfcc_var);
00294   }
00295   for(i=0;i<c->clist_max;i++) {
00296     if (c->var) free(c->clist[i].mfcc_var);
00297     free(c->clist[i].mfcc_sum);
00298   }
00299   free(c->clist);
00300   free(c);
00301 }
00302 
00308 void
00309 CMN_realtime_prepare(CMNWork *c)
00310 {
00311   int d;
00312   
00313   for(d=0;d<c->veclen;d++) c->now.mfcc_sum[d] = 0.0;
00314   if (c->var) {
00315     for(d=0;d<c->veclen;d++) c->now.mfcc_var[d] = 0.0;
00316   }
00317   c->now.framenum = 0;
00318 }
00319 
00327 void
00328 CMN_realtime(CMNWork *c, float *mfcc)
00329 {
00330   int d;
00331   double x, y;
00332 
00333   c->now.framenum++;
00334   if (c->cmean_init_set) {
00335     /* initial data exists */
00336     for(d=0;d<c->veclen;d++) {
00337       /* accumulate current MFCC to sum */
00338       c->now.mfcc_sum[d] += mfcc[d];
00339       /* calculate map-mean */
00340       x = c->now.mfcc_sum[d] + c->cweight * c->cmean_init[d];
00341       y = (double)c->now.framenum + c->cweight;
00342       x /= y;
00343       if (c->var) {
00344         /* calculate map-var */
00345         c->now.mfcc_var[d] += (mfcc[d] - x) * (mfcc[d] - x);
00346       }
00347       if (c->mean && d < c->mfcc_dim) {
00348         /* mean normalization */
00349         mfcc[d] -= x;
00350       }
00351       if (c->var) {
00352         /* variance normalization */
00353         x = c->now.mfcc_var[d] + c->cweight * c->cvar_init[d];
00354         y = (double)c->now.framenum + c->cweight;
00355         mfcc[d] /= sqrt(x / y);
00356       }
00357     }
00358   } else {
00359     /* no initial data */
00360     for(d=0;d<c->veclen;d++) {
00361       /* accumulate current MFCC to sum */
00362       c->now.mfcc_sum[d] += mfcc[d];
00363       /* calculate current mean */
00364       x = c->now.mfcc_sum[d] / c->now.framenum;
00365       if (c->var) {
00366         /* calculate current variance */
00367         c->now.mfcc_var[d] += (mfcc[d] - x) * (mfcc[d] - x);
00368       }
00369       if (c->mean && d < c->mfcc_dim) {
00370         /* mean normalization */
00371         mfcc[d] -= x;
00372       }
00373 #if 0      /* not perform variance normalization on no initial data */
00374       if (c->var) {
00375         /* variance normalization */
00376         mfcc[d] /= sqrt(c->now.mfcc_var[d] / c->now.framenum);
00377       }
00378 #endif
00379     }
00380   }
00381 }
00382 
00388 void
00389 CMN_realtime_update(CMNWork *c, HTK_Param *param)
00390 {
00391   float *tmp, *tmp2;
00392   int i, d;
00393   int frames;
00394 
00395   /* if CMN_realtime was never called before this, return immediately */
00396   /* this may occur by pausing just after startup */
00397   if (c->now.framenum == 0) return;
00398 
00399   /* re-calculate variance based on the final mean at the given param */
00400   if (c->var && param != NULL) {
00401     float m, x;
00402     if (param->samplenum != c->now.framenum) {
00403       jlog("InternalError: CMN_realtime_update: param->samplenum != c->now.framenum\n");
00404     } else if (param->veclen != c->veclen) {
00405       jlog("InternalError: CMN_realtime_update: param->veclen != c->veclen\n");
00406     } else {
00407       for(d=0;d<c->veclen;d++) {
00408         m = c->now.mfcc_sum[d] / (float) c->now.framenum;
00409         x = 0;
00410         for(i=0;i<param->samplenum;i++) {
00411           x += (param->parvec[i][d] - m) * (param->parvec[i][d] - m);
00412         }
00413         c->now.mfcc_var[d] = x;
00414       }
00415     }
00416   }
00417 
00418   /* compute cepstral mean from now and previous sums up to CPMAX frames */
00419   for(d=0;d<c->veclen;d++) c->cmean_init[d] = c->now.mfcc_sum[d];
00420   if (c->var) {
00421     for(d=0;d<c->veclen;d++) c->cvar_init[d] = c->now.mfcc_var[d];
00422   }
00423   frames = c->now.framenum;
00424   for(i=0;i<c->clist_num;i++) {
00425     for(d=0;d<c->veclen;d++) c->cmean_init[d] += c->clist[i].mfcc_sum[d];
00426     if (c->var) {
00427       for(d=0;d<c->veclen;d++) c->cvar_init[d] += c->clist[i].mfcc_var[d];
00428     }
00429     frames += c->clist[i].framenum;
00430     if (frames >= CPMAX) break;
00431   }
00432   for(d=0;d<c->veclen;d++) c->cmean_init[d] /= (float) frames;
00433   if (c->var) {
00434     for(d=0;d<c->veclen;d++) c->cvar_init[d] /= (float) frames;
00435   }
00436 
00437   c->cmean_init_set = TRUE;
00438 
00439   /* expand clist if neccessary */
00440   if (c->clist_num == c->clist_max && frames < CPMAX) {
00441     c->clist_max += CPSTEP;
00442     c->clist = (CMEAN *)myrealloc(c->clist, sizeof(CMEAN) * c->clist_max);
00443     for(i=c->clist_num;i<c->clist_max;i++) {
00444       c->clist[i].mfcc_sum = (float *)mymalloc(sizeof(float)*c->veclen);
00445       if (c->var) c->clist[i].mfcc_var = (float *)mymalloc(sizeof(float)*c->veclen);
00446       c->clist[i].framenum = 0;
00447     }
00448   }
00449   
00450   /* shift clist */
00451   tmp = c->clist[c->clist_max-1].mfcc_sum;
00452   if (c->var) tmp2 = c->clist[c->clist_max-1].mfcc_var;
00453   memmove(&(c->clist[1]), &(c->clist[0]), sizeof(CMEAN) * (c->clist_max - 1));
00454   c->clist[0].mfcc_sum = tmp;
00455   if (c->var) c->clist[0].mfcc_var = tmp2;
00456   /* copy now to clist[0] */
00457   memcpy(c->clist[0].mfcc_sum, c->now.mfcc_sum, sizeof(float) * c->veclen);
00458   if (c->var) memcpy(c->clist[0].mfcc_var, c->now.mfcc_var, sizeof(float) * c->veclen);
00459   c->clist[0].framenum = c->now.framenum;
00460 
00461   if (c->clist_num < c->clist_max) c->clist_num++;
00462 
00463 }
00464 
00475 static boolean
00476 myread(void *buf, size_t unitbyte, int unitnum, FILE *fp)
00477 {
00478   if (myfread(buf, unitbyte, unitnum, fp) < (size_t)unitnum) {
00479     return(FALSE);
00480   }
00481 #ifndef WORDS_BIGENDIAN
00482   swap_bytes(buf, unitbyte, unitnum);
00483 #endif
00484   return(TRUE);
00485 }
00486 
00497 static boolean
00498 mywrite(void *buf, size_t unitbyte, size_t unitnum, int fd)
00499 {
00500 #ifndef WORDS_BIGENDIAN
00501   swap_bytes(buf, unitbyte, unitnum);
00502 #endif
00503   if (write(fd, buf, unitbyte * unitnum) < unitbyte * unitnum) {
00504     return(FALSE);
00505   }
00506 #ifndef WORDS_BIGENDIAN
00507   swap_bytes(buf, unitbyte, unitnum);
00508 #endif
00509   return(TRUE);
00510 }
00511 
00521 boolean
00522 CMN_load_from_file(CMNWork *c, char *filename)
00523 {
00524   FILE *fp;
00525   int veclen;
00526 
00527   jlog("Stat: wav2mfcc-pipe: reading initial CMN from file \"%s\"\n", filename);
00528   if ((fp = fopen_readfile(filename)) == NULL) {
00529     jlog("Error: wav2mfcc-pipe: failed to open\n");
00530     return(FALSE);
00531   }
00532   /* read header */
00533   if (myread(&veclen, sizeof(int), 1, fp) == FALSE) {
00534     jlog("Error: wav2mfcc-pipe: failed to read header\n");
00535     fclose_readfile(fp);
00536     return(FALSE);
00537   }
00538   /* check length */
00539   if (veclen != c->veclen) {
00540     jlog("Error: wav2mfcc-pipe: cepstral dimension mismatch\n");
00541     jlog("Error: wav2mfcc-pipe: process = %d, file = %d\n", c->veclen, veclen);
00542     fclose_readfile(fp);
00543     return(FALSE);
00544   }
00545   /* read body */
00546   if (myread(c->cmean_init, sizeof(float), c->veclen, fp) == FALSE) {
00547     jlog("Error: wav2mfcc-pipe: failed to read mean for CMN\n");
00548     fclose_readfile(fp);
00549     return(FALSE);
00550   }
00551   if (c->var) {
00552     if (myread(c->cvar_init, sizeof(float), c->veclen, fp) == FALSE) {
00553       jlog("Error: wav2mfcc-pipe: failed to read variance for CVN\n");
00554       fclose_readfile(fp);
00555       return(FALSE);
00556     }
00557   }
00558 
00559   if (fclose_readfile(fp) == -1) {
00560     jlog("Error: wav2mfcc-pipe: failed to close\n");
00561     return(FALSE);
00562   }
00563 
00564   c->cmean_init_set = TRUE;
00565   jlog("Stat: wav2mfcc-pipe: read CMN parameter\n");
00566 
00567   return(TRUE);
00568 }
00569 
00578 boolean
00579 CMN_save_to_file(CMNWork *c, char *filename)
00580 {
00581   int fd;
00582 
00583   jlog("Stat: wav2mfcc-pipe: writing current cepstral data to file \"%s\"\n", filename);
00584 
00585   if ((fd = creat(filename, 0644)) == -1) {
00586     jlog("Error: wav2mfcc-pipe: failed to open \"%s\" to write current cepstral data\n", filename);
00587     return(FALSE);
00588   }
00589   /* write header */
00590   if (mywrite(&(c->veclen), sizeof(int), 1, fd) == FALSE) {
00591     jlog("Error: wav2mfcc-pipe: cannot write header to \"%s\" as current cepstral data\n", filename);
00592     close(fd);
00593     return(FALSE);
00594   }
00595   /* write body */
00596   if (mywrite(c->cmean_init, sizeof(float), c->veclen, fd) == FALSE) {
00597     jlog("Error: wav2mfcc-pipe: cannot write mean to \"%s\" as current cepstral data\n", filename);
00598     close(fd);
00599     return(FALSE);
00600   }
00601   if (c->var) {
00602     if (mywrite(c->cvar_init, sizeof(float), c->veclen, fd) == FALSE) {
00603       jlog("Error: wav2mfcc-pipe: cannot write variance to \"%s\" as current cepstrum\n", filename);
00604       close(fd);
00605       return(FALSE);
00606     }
00607   }
00608 
00609   close(fd);
00610 
00611   jlog("Stat: wav2mfcc-pipe: current cepstral data written to \"%s\"\n", filename);
00612   
00613   return(TRUE);
00614 }
00615 
00616 
00617 /***********************************************************************/
00618 /* energy normalization and scaling on live input */
00619 /***********************************************************************/
00620 
00628 void
00629 energy_max_init(ENERGYWork *energy)
00630 {
00631   energy->max = 5.0;
00632 }
00633 
00641 void
00642 energy_max_prepare(ENERGYWork *energy, Value *para)
00643 {
00644   energy->max_last = energy->max;
00645   energy->min_last = energy->max - (para->silFloor * LOG_TEN) / 10.0;
00646   energy->max = 0.0;
00647 }
00648 
00658 LOGPROB
00659 energy_max_normalize(ENERGYWork *energy, LOGPROB f, Value *para)
00660 {
00661   if (energy->max < f) energy->max = f;
00662   if (f < energy->min_last) f = energy->min_last;
00663   return(1.0 - (energy->max_last - f) * para->escale);
00664 }