libsent/src/wav2mfcc/wav2mfcc-pipe.c

Go to the documentation of this file.
00001 
00026 /*
00027  * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University
00028  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00029  * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
00030  * All rights reserved
00031  */
00032 
00033 /* wav2mfcc-pipe.c --- split Wav2MFCC to perform per-frame-basis,
00034    and also realtime CMN for 1st-pass pipe-lining */
00035 
00036 /************************************************************************/
00037 /*    wav2mfcc.c   Convert Speech file to MFCC_E_D_(Z) file             */
00038 /*----------------------------------------------------------------------*/
00039 /*    Author    : Yuichiro Nakano                                       */
00040 /*                                                                      */
00041 /*    Copyright(C) Yuichiro Nakano 1996-1998                            */
00042 /*----------------------------------------------------------------------*/
00043 /************************************************************************/
00044 
00045 
00046 #include <sent/stddefs.h>
00047 #include <sent/mfcc.h>
00048 
00049 /***********************************************************************/
00058 DeltaBuf *
00059 WMP_deltabuf_new(int veclen, int windowlen)
00060 {
00061   int i;
00062   DeltaBuf *db;
00063 
00064   db = (DeltaBuf *)mymalloc(sizeof(DeltaBuf));
00065   db->veclen = veclen;
00066   db->win = windowlen;
00067   db->len = windowlen * 2 + 1;
00068   db->mfcc = (float **)mymalloc(sizeof(float *) * db->len);
00069   db->is_on = (boolean *) mymalloc(sizeof(boolean) * db->len);
00070   for (i=0;i<db->len;i++) {
00071     db->mfcc[i] = (float *)mymalloc(sizeof(float) * veclen * 2);
00072   }
00073   db->B = 0;
00074   for(i = 1; i <= windowlen; i++) db->B += i * i;
00075   db->B *= 2;
00076 
00077   return (db);
00078 }
00079 
00085 void
00086 WMP_deltabuf_free(DeltaBuf *db)
00087 {
00088   int i;
00089 
00090   for (i=0;i<db->len;i++) {
00091     free(db->mfcc[i]);
00092   }
00093   free(db->is_on);
00094   free(db->mfcc);
00095   free(db);
00096 }
00097 
00103 void
00104 WMP_deltabuf_prepare(DeltaBuf *db)
00105 {
00106   int i;
00107   db->store = 0;
00108   for (i=0;i<db->len;i++) {
00109     db->is_on[i] = FALSE;
00110   }
00111 }
00112 
00119 static void
00120 WMP_deltabuf_calc(DeltaBuf *db, int cur)
00121 {
00122   int n, theta, p;
00123   float A1, A2, sum;
00124   int last_valid_left, last_valid_right;
00125   
00126   for (n = 0; n < db->veclen; n++) {
00127     sum = 0.0;
00128     last_valid_left = last_valid_right = cur;
00129     for (theta = 1; theta <= db->win; theta++) {
00130       p = cur - theta;
00131       if (p < 0) p += db->len;
00132       if (db->is_on[p]) {
00133         A1 = db->mfcc[p][n];
00134         last_valid_left = p;
00135       } else {
00136         A1 = db->mfcc[last_valid_left][n];
00137       }
00138       p = cur + theta;
00139       if (p >= db->len) p -= db->len;
00140       if (db->is_on[p]) {
00141         A2 = db->mfcc[p][n];
00142         last_valid_right = p;
00143       } else {
00144         A2 = db->mfcc[last_valid_right][n];
00145       }
00146       sum += theta * (A2 - A1);
00147     }
00148     db->mfcc[cur][db->veclen + n] = sum / db->B;
00149   }
00150 }
00151 
00162 boolean
00163 WMP_deltabuf_proceed(DeltaBuf *db, float *new_mfcc) 
00164 {
00165   int cur;
00166   boolean ret;
00167 
00168   /* copy data to store point */
00169   memcpy(db->mfcc[db->store], new_mfcc, sizeof(float) * db->veclen);
00170   db->is_on[db->store] = TRUE;
00171 
00172   /* get current calculation point */
00173   cur = db->store - db->win;
00174   if (cur < 0) cur += db->len;
00175 
00176   /* if the current point is fulfilled, compute delta  */
00177   if (db->is_on[cur]) {
00178     WMP_deltabuf_calc(db, cur);
00179     db->vec = db->mfcc[cur];
00180     ret = TRUE;
00181   } else {
00182     ret = FALSE;
00183   }
00184 
00185   /* move store pointer to next */
00186   db->store++;
00187   if (db->store >= db->len) db->store -= db->len;
00188 
00189   /* return TRUE if delta computed for current, or -1 if not calculated yet */
00190   return (ret);
00191 }
00192 
00204 boolean
00205 WMP_deltabuf_flush(DeltaBuf *db) 
00206 {
00207   int cur;
00208   boolean ret;
00209 
00210   /* clear store point */
00211   db->is_on[db->store] = FALSE;
00212 
00213   /* get current calculation point */
00214   cur = db->store - db->win;
00215   if (cur < 0) cur += db->len;
00216 
00217   /* if the current point if fulfilled, compute delta  */
00218   if (db->is_on[cur]) {
00219     WMP_deltabuf_calc(db, cur);
00220     db->vec = db->mfcc[cur];
00221     ret = TRUE;
00222   } else {
00223     ret = FALSE;
00224   }
00225 
00226   /* move store pointer to next */
00227   db->store++;
00228   if (db->store >= db->len) db->store -= db->len;
00229 
00230   /* return TRUE if delta computed for current, or -1 if not calculated yet */
00231   return (ret);
00232 }
00233 
00234 /***********************************************************************/
00235 /* MAP-CMN */
00236 /***********************************************************************/
00237 
00245 CMNWork *
00246 CMN_realtime_new(int dimension, float weight)
00247 {
00248   int i;
00249 
00250   CMNWork *c;
00251 
00252   c = (CMNWork *)mymalloc(sizeof(CMNWork));
00253 
00254   c->dim = dimension;
00255   c->cweight = weight;
00256 
00257   c->clist_max = CPSTEP;
00258   c->clist_num = 0;
00259   c->clist = (CMEAN *)mymalloc(sizeof(CMEAN) * c->clist_max);
00260   for(i=0;i<c->clist_max;i++) {
00261     c->clist[i].mfcc_sum = (float *)mymalloc(sizeof(float)*c->dim);
00262     c->clist[i].framenum = 0;
00263   }
00264   c->now.mfcc_sum = (float *)mymalloc(sizeof(float) * c->dim);
00265 
00266   c->cmean_init = (float *)mymalloc(sizeof(float) * c->dim);
00267   c->cmean_init_set = FALSE;
00268 
00269   return c;
00270 }
00271 
00278 void
00279 CMN_realtime_free(CMNWork *c)
00280 {
00281   int i;
00282 
00283   free(c->cmean_init);
00284   free(c->now.mfcc_sum);
00285   for(i=0;i<c->clist_max;i++) {
00286     free(c->clist[i].mfcc_sum);
00287   }
00288   free(c->clist);
00289   free(c);
00290 }
00291 
00297 void
00298 CMN_realtime_prepare(CMNWork *c)
00299 {
00300   int d;
00301   
00302   for(d=0;d<c->dim;d++) c->now.mfcc_sum[d] = 0.0;
00303   c->now.framenum = 0;
00304 }
00305 
00313 void
00314 CMN_realtime(CMNWork *c, float *mfcc)
00315 {
00316   int d;
00317   double x, y;
00318 
00319   c->now.framenum++;
00320   if (c->cmean_init_set) {
00321     for(d=0;d<c->dim;d++) {
00322       /* accumulate value of given MFCC to sum */
00323       c->now.mfcc_sum[d] += mfcc[d];
00324       /* calculate map-cmn and perform subtraction to the given vector */
00325       x = c->now.mfcc_sum[d] + c->cweight * c->cmean_init[d];
00326       y = (double)c->now.framenum + c->cweight;
00327       mfcc[d] -= x / y;
00328     }
00329   } else {
00330     for(d=0;d<c->dim;d++) {
00331       c->now.mfcc_sum[d] += mfcc[d];
00332       mfcc[d] -= c->now.mfcc_sum[d] / c->now.framenum;
00333     }
00334   }
00335 }
00336 
00342 void
00343 CMN_realtime_update(CMNWork *c)
00344 {
00345   float *tmp;
00346   int i, d;
00347   int frames;
00348 
00349   /* if CMN_realtime was never called before this, return immediately */
00350   /* this may occur by pausing just after startup */
00351   if (c->now.framenum == 0) return;
00352 
00353   /* compute cepstral mean from now and previous sums up to CPMAX frames */
00354   for(d=0;d<c->dim;d++) c->cmean_init[d] = c->now.mfcc_sum[d];
00355   frames = c->now.framenum;
00356   for(i=0;i<c->clist_num;i++) {
00357     for(d=0;d<c->dim;d++) c->cmean_init[d] += c->clist[i].mfcc_sum[d];
00358     frames += c->clist[i].framenum;
00359     if (frames >= CPMAX) break;
00360   }
00361   for(d=0;d<c->dim;d++) c->cmean_init[d] /= (float) frames;
00362   c->cmean_init_set = TRUE;
00363 
00364   /* expand clist if neccessary */
00365   if (c->clist_num == c->clist_max && frames < CPMAX) {
00366     c->clist_max += CPSTEP;
00367     c->clist = (CMEAN *)myrealloc(c->clist, sizeof(CMEAN) * c->clist_max);
00368     for(i=c->clist_num;i<c->clist_max;i++) {
00369       c->clist[i].mfcc_sum = (float *)mymalloc(sizeof(float)*c->dim);
00370       c->clist[i].framenum = 0;
00371     }
00372   }
00373   
00374   /* shift clist */
00375   tmp = c->clist[c->clist_max-1].mfcc_sum;
00376   memmove(&(c->clist[1]), &(c->clist[0]), sizeof(CMEAN) * (c->clist_max - 1));
00377   c->clist[0].mfcc_sum = tmp;
00378   /* copy now to clist[0] */
00379   memcpy(c->clist[0].mfcc_sum, c->now.mfcc_sum, sizeof(float) * c->dim);
00380   c->clist[0].framenum = c->now.framenum;
00381 
00382   if (c->clist_num < c->clist_max) c->clist_num++;
00383 
00384 }
00385 
00396 static boolean
00397 myread(void *buf, size_t unitbyte, int unitnum, FILE *fp)
00398 {
00399   if (myfread(buf, unitbyte, unitnum, fp) < (size_t)unitnum) {
00400     return(FALSE);
00401   }
00402 #ifndef WORDS_BIGENDIAN
00403   swap_bytes(buf, unitbyte, unitnum);
00404 #endif
00405   return(TRUE);
00406 }
00407 
00418 static boolean
00419 mywrite(void *buf, size_t unitbyte, size_t unitnum, int fd)
00420 {
00421 #ifndef WORDS_BIGENDIAN
00422   swap_bytes(buf, unitbyte, unitnum);
00423 #endif
00424   if (write(fd, buf, unitbyte * unitnum) < unitbyte * unitnum) {
00425     return(FALSE);
00426   }
00427 #ifndef WORDS_BIGENDIAN
00428   swap_bytes(buf, unitbyte, unitnum);
00429 #endif
00430   return(TRUE);
00431 }
00432 
00442 boolean
00443 CMN_load_from_file(CMNWork *c, char *filename)
00444 {
00445   FILE *fp;
00446   int veclen;
00447 
00448   jlog("Stat: wav2mfcc-pipe: reading initial CMN from file \"%s\"\n", filename);
00449   if ((fp = fopen_readfile(filename)) == NULL) {
00450     jlog("Error: wav2mfcc-pipe: failed to open\n");
00451     return(FALSE);
00452   }
00453   /* read header */
00454   if (myread(&veclen, sizeof(int), 1, fp) == FALSE) {
00455     jlog("Error: wav2mfcc-pipe: failed to read header\n");
00456     fclose_readfile(fp);
00457     return(FALSE);
00458   }
00459   /* check length */
00460   if (veclen != c->dim) {
00461     jlog("Error: wav2mfcc-pipe: vector dimension mismatch\n");
00462     fclose_readfile(fp);
00463     return(FALSE);
00464   }
00465   /* read body */
00466   if (myread(c->cmean_init, sizeof(float), c->dim, fp) == FALSE) {
00467     jlog("Error: wav2mfcc-pipe: failed to read\n");
00468     fclose_readfile(fp);
00469     return(FALSE);
00470   }
00471   if (fclose_readfile(fp) == -1) {
00472     jlog("Error: wav2mfcc-pipe: failed to close\n");
00473     return(FALSE);
00474   }
00475 
00476   c->cmean_init_set = TRUE;
00477   jlog("Stat: wav2mfcc-pipe: read CMN parameter\n");
00478 
00479   return(TRUE);
00480 }
00481 
00490 boolean
00491 CMN_save_to_file(CMNWork *c, char *filename)
00492 {
00493   int fd;
00494 
00495   jlog("Stat: wav2mfcc-pipe: writing current CM to file \"%s\"\n", filename);
00496 
00497   if ((fd = creat(filename, 0644)) == -1) {
00498     jlog("Error: wav2mfcc-pipe: failed to open\n");
00499     return(FALSE);
00500   }
00501   /* write header */
00502   if (mywrite(&(c->dim), sizeof(int), 1, fd) == FALSE) {
00503     jlog("Error: wav2mfcc-pipe: failed to write header\n");
00504     close(fd);
00505     return(FALSE);
00506   }
00507   /* write body */
00508   if (mywrite(c->cmean_init, sizeof(float), c->dim, fd) == FALSE) {
00509     jlog("Error: wav2mfcc-pipe: failed to write header\n");
00510     close(fd);
00511     return(FALSE);
00512   }
00513   close(fd);
00514 
00515   jlog("Stat: wav2mfcc-pipe: wrote current CM\n");
00516   
00517   return(TRUE);
00518 }
00519 
00520 
00521 /***********************************************************************/
00522 /* energy normalization and scaling on live input */
00523 /***********************************************************************/
00524 
00532 void
00533 energy_max_init(ENERGYWork *energy)
00534 {
00535   energy->max = 5.0;
00536 }
00537 
00545 void
00546 energy_max_prepare(ENERGYWork *energy, Value *para)
00547 {
00548   energy->max_last = energy->max;
00549   energy->min_last = energy->max - (para->silFloor * LOG_TEN) / 10.0;
00550   energy->max = 0.0;
00551 }
00552 
00562 LOGPROB
00563 energy_max_normalize(ENERGYWork *energy, LOGPROB f, Value *para)
00564 {
00565   if (energy->max < f) energy->max = f;
00566   if (f < energy->min_last) f = energy->min_last;
00567   return(1.0 - (energy->max_last - f) * para->escale);
00568 }

Generated on Tue Dec 18 15:59:57 2007 for Julius by  doxygen 1.5.4