libsent/src/wav2mfcc/wav2mfcc-pipe.c

説明を見る。
00001 
00025 /*
00026  * Copyright (c) 1991-2006 Kawahara Lab., Kyoto University
00027  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00028  * Copyright (c) 2005-2006 Julius project team, Nagoya Institute of Technology
00029  * All rights reserved
00030  */
00031 
00032 /* wav2mfcc-pipe.c --- split Wav2MFCC to perform per-frame-basis,
00033    and also realtime CMN for 1st-pass pipe-lining */
00034 
00035 /************************************************************************/
00036 /*    wav2mfcc.c   Convert Speech file to MFCC_E_D_(Z) file             */
00037 /*----------------------------------------------------------------------*/
00038 /*    Author    : Yuichiro Nakano                                       */
00039 /*                                                                      */
00040 /*    Copyright(C) Yuichiro Nakano 1996-1998                            */
00041 /*----------------------------------------------------------------------*/
00042 /************************************************************************/
00043 
00044 
00045 #include <sent/stddefs.h>
00046 #include <sent/mfcc.h>
00047 
00056 void
00057 WMP_init(Value para, float **bf, float *ssbuf, int ssbuflen)
00058 {
00059   int bflen;
00060 
00061   /* initialize module */
00062   WMP_calc_init(para, bf, &bflen);
00063 
00064   if (ssbuf != NULL) {
00065     /* check ssbuf length */
00066     if (ssbuflen != bflen) {
00067       j_error("Error: Wav2MFCC: noise spectrum length not match\n");
00068     }
00069   }
00070 
00071 }
00072 
00073 /***********************************************************************/
00082 DeltaBuf *
00083 WMP_deltabuf_new(int veclen, int windowlen)
00084 {
00085   int i;
00086   DeltaBuf *db;
00087 
00088   db = (DeltaBuf *)mymalloc(sizeof(DeltaBuf));
00089   db->veclen = veclen;
00090   db->win = windowlen;
00091   db->len = windowlen * 2 + 1;
00092   db->mfcc = (float **)mymalloc(sizeof(float *) * db->len);
00093   db->is_on = (boolean *) mymalloc(sizeof(boolean) * db->len);
00094   for (i=0;i<db->len;i++) {
00095     db->mfcc[i] = (float *)mymalloc(sizeof(float) * veclen * 2);
00096   }
00097   db->B = 0;
00098   for(i = 1; i <= windowlen; i++) db->B += i * i;
00099   db->B *= 2;
00100 
00101   return (db);
00102 }
00103 
00109 void
00110 WMP_deltabuf_free(DeltaBuf *db)
00111 {
00112   int i;
00113 
00114   for (i=0;i<db->len;i++) {
00115     free(db->mfcc[i]);
00116   }
00117   free(db->is_on);
00118   free(db->mfcc);
00119   free(db);
00120 }
00121 
00127 void
00128 WMP_deltabuf_prepare(DeltaBuf *db)
00129 {
00130   int i;
00131   db->store = 0;
00132   for (i=0;i<db->len;i++) {
00133     db->is_on[i] = FALSE;
00134   }
00135 }
00136 
00143 static void
00144 WMP_deltabuf_calc(DeltaBuf *db, int cur)
00145 {
00146   int n, theta, p;
00147   float A1, A2, sum;
00148   int last_valid_left, last_valid_right;
00149   
00150   for (n = 0; n < db->veclen; n++) {
00151     sum = 0.0;
00152     last_valid_left = last_valid_right = cur;
00153     for (theta = 1; theta <= db->win; theta++) {
00154       p = cur - theta;
00155       if (p < 0) p += db->len;
00156       if (db->is_on[p]) {
00157         A1 = db->mfcc[p][n];
00158         last_valid_left = p;
00159       } else {
00160         A1 = db->mfcc[last_valid_left][n];
00161       }
00162       p = cur + theta;
00163       if (p >= db->len) p -= db->len;
00164       if (db->is_on[p]) {
00165         A2 = db->mfcc[p][n];
00166         last_valid_right = p;
00167       } else {
00168         A2 = db->mfcc[last_valid_right][n];
00169       }
00170       sum += theta * (A2 - A1);
00171     }
00172     db->mfcc[cur][db->veclen + n] = sum / db->B;
00173   }
00174 }
00175 
00186 boolean
00187 WMP_deltabuf_proceed(DeltaBuf *db, float *new_mfcc) 
00188 {
00189   int cur;
00190   boolean ret;
00191 
00192   /* copy data to store point */
00193   memcpy(db->mfcc[db->store], new_mfcc, sizeof(float) * db->veclen);
00194   db->is_on[db->store] = TRUE;
00195 
00196   /* get current calculation point */
00197   cur = db->store - db->win;
00198   if (cur < 0) cur += db->len;
00199 
00200   /* if the current point is fulfilled, compute delta  */
00201   if (db->is_on[cur]) {
00202     WMP_deltabuf_calc(db, cur);
00203     db->vec = db->mfcc[cur];
00204     ret = TRUE;
00205   } else {
00206     ret = FALSE;
00207   }
00208 
00209   /* move store pointer to next */
00210   db->store++;
00211   if (db->store >= db->len) db->store -= db->len;
00212 
00213   /* return TRUE if delta computed for current, or -1 if not calculated yet */
00214   return (ret);
00215 }
00216 
00228 boolean
00229 WMP_deltabuf_flush(DeltaBuf *db) 
00230 {
00231   int cur;
00232   boolean ret;
00233 
00234   /* clear store point */
00235   db->is_on[db->store] = FALSE;
00236 
00237   /* get current calculation point */
00238   cur = db->store - db->win;
00239   if (cur < 0) cur += db->len;
00240 
00241   /* if the current point if fulfilled, compute delta  */
00242   if (db->is_on[cur]) {
00243     WMP_deltabuf_calc(db, cur);
00244     db->vec = db->mfcc[cur];
00245     ret = TRUE;
00246   } else {
00247     ret = FALSE;
00248   }
00249 
00250   /* move store pointer to next */
00251   db->store++;
00252   if (db->store >= db->len) db->store -= db->len;
00253 
00254   /* return TRUE if delta computed for current, or -1 if not calculated yet */
00255   return (ret);
00256 }
00257 
00258 /***********************************************************************/
00259 /* MAP-CMN */
00260 /***********************************************************************/
00261 
00262 #define CPMAX 500               
00263 
00264 
00268 typedef struct {
00269   float *mfcc_sum;              
00270   int framenum;                 
00271 } CMEAN;
00272 #define CPSTEP 5                
00273 static CMEAN *clist;            
00274 static int clist_max;           
00275 static int clist_num;           
00276 static int dim;                 
00277 static float cweight;           
00278 static float *cmean_init;       
00279 static boolean cmean_init_set;  
00280 static CMEAN now;               
00281 
00282 
00289 void
00290 CMN_realtime_init(int dimension, float weight)
00291 {
00292   int i;
00293 
00294   dim = dimension;
00295   cweight = weight;
00296 
00297   clist_max = CPSTEP;
00298   clist_num = 0;
00299   clist = (CMEAN *)mymalloc(sizeof(CMEAN) * clist_max);
00300   for(i=0;i<clist_max;i++) {
00301     clist[i].mfcc_sum = (float *)mymalloc(sizeof(float)*dim);
00302     clist[i].framenum = 0;
00303   }
00304 
00305   now.mfcc_sum = (float *)mymalloc(sizeof(float) * dim);
00306 
00307   cmean_init = (float *)mymalloc(sizeof(float) * dim);
00308   cmean_init_set = FALSE;
00309 
00310 }
00311 
00316 void
00317 CMN_realtime_prepare()
00318 {
00319   int d;
00320   for(d=0;d<dim;d++) now.mfcc_sum[d] = 0.0;
00321   now.framenum = 0;
00322 }
00323 
00331 void
00332 CMN_realtime(float *mfcc, int dim)
00333 {
00334   int d;
00335   double x, y;
00336 
00337   now.framenum++;
00338   if (cmean_init_set) {
00339     for(d=0;d<dim;d++) {
00340       /* accumulate value of given MFCC to sum */
00341       now.mfcc_sum[d] += mfcc[d];
00342       /* calculate map-cmn and perform subtraction to the given vector */
00343       x = now.mfcc_sum[d] + cweight * cmean_init[d];
00344       y = (double)now.framenum + cweight;
00345       mfcc[d] -= x / y;
00346     }
00347   } else {
00348     for(d=0;d<dim;d++) {
00349       now.mfcc_sum[d] += mfcc[d];
00350       mfcc[d] -= now.mfcc_sum[d] / now.framenum;
00351     }
00352   }
00353 }
00354 
00359 void
00360 CMN_realtime_update()
00361 {
00362   float *tmp;
00363   int i, d;
00364   int frames;
00365 
00366   /* if CMN_realtime was never called before this, return immediately */
00367   /* this may occur by pausing just after startup */
00368   if (now.framenum == 0) return;
00369 
00370   /* compute cepstral mean from now and previous sums up to CPMAX frames */
00371   for(d=0;d<dim;d++) cmean_init[d] = now.mfcc_sum[d];
00372   frames = now.framenum;
00373   for(i=0;i<clist_num;i++) {
00374     for(d=0;d<dim;d++) cmean_init[d] += clist[i].mfcc_sum[d];
00375     frames += clist[i].framenum;
00376     if (frames >= CPMAX) break;
00377   }
00378   for(d=0;d<dim;d++) cmean_init[d] /= (float) frames;
00379   cmean_init_set = TRUE;
00380 
00381   /* expand clist if neccessary */
00382   if (clist_num == clist_max && frames < CPMAX) {
00383     clist_max += CPSTEP;
00384     clist = (CMEAN *)myrealloc(clist, sizeof(CMEAN) * clist_max);
00385     for(i=clist_num;i<clist_max;i++) {
00386       clist[i].mfcc_sum = (float *)mymalloc(sizeof(float)*dim);
00387       clist[i].framenum = 0;
00388     }
00389   }
00390   
00391   /* shift clist */
00392   tmp = clist[clist_max-1].mfcc_sum;
00393   memcpy(&(clist[1]), &(clist[0]), sizeof(CMEAN) * (clist_max - 1));
00394   clist[0].mfcc_sum = tmp;
00395   /* copy now to clist[0] */
00396   memcpy(clist[0].mfcc_sum, now.mfcc_sum, sizeof(float) * dim);
00397   clist[0].framenum = now.framenum;
00398 
00399   if (clist_num < clist_max) clist_num++;
00400 
00401 }
00402 
00413 static boolean
00414 myread(void *buf, size_t unitbyte, int unitnum, FILE *fp)
00415 {
00416   if (myfread(buf, unitbyte, unitnum, fp) < (size_t)unitnum) {
00417     return(FALSE);
00418   }
00419 #ifndef WORDS_BIGENDIAN
00420   swap_bytes(buf, unitbyte, unitnum);
00421 #endif
00422   return(TRUE);
00423 }
00424 
00435 static boolean
00436 mywrite(void *buf, size_t unitbyte, int unitnum, int fd)
00437 {
00438 #ifndef WORDS_BIGENDIAN
00439   swap_bytes(buf, unitbyte, unitnum);
00440 #endif
00441   if (write(fd, buf, unitbyte * unitnum) < unitbyte * unitnum) {
00442     return(FALSE);
00443   }
00444 #ifndef WORDS_BIGENDIAN
00445   swap_bytes(buf, unitbyte, unitnum);
00446 #endif
00447   return(TRUE);
00448 }
00449 
00459 boolean
00460 CMN_load_from_file(char *filename, int dim)
00461 {
00462   FILE *fp;
00463   int veclen;
00464   if ((fp = fopen_readfile(filename)) == NULL) {
00465     j_printerr("Error: CMN_load_from_file: failed to open\n");
00466     return(FALSE);
00467   }
00468   /* read header */
00469   if (myread(&veclen, sizeof(int), 1, fp) == FALSE) {
00470     j_printerr("Error: CMN_load_from_file: failed to read header\n");
00471     fclose_readfile(fp);
00472     return(FALSE);
00473   }
00474   /* check length */
00475   if (veclen != dim) {
00476     j_printerr("Error: CMN_load_from_file: vector dimension mismatch\n");
00477     fclose_readfile(fp);
00478     return(FALSE);
00479   }
00480   /* read body */
00481   if (myread(cmean_init, sizeof(float), dim, fp) == FALSE) {
00482     j_printerr("Error: CMN_load_from_file: failed to read\n");
00483     fclose_readfile(fp);
00484     return(FALSE);
00485   }
00486   if (fclose_readfile(fp) == -1) {
00487     j_printerr("Error: CMN_load_from_file: failed to close\n");
00488     return(FALSE);
00489   }
00490 
00491   cmean_init_set = TRUE;
00492 
00493   return(TRUE);
00494 }
00495 
00503 boolean
00504 CMN_save_to_file(char *filename)
00505 {
00506   int fd;
00507 
00508   if ((fd = creat(filename, 0644)) == -1) {
00509     j_printerr("Error: CMN_save_to_file: failed to open\n");
00510     return(FALSE);
00511   }
00512   /* write header */
00513   if (mywrite(&dim, sizeof(int), 1, fd) == FALSE) {
00514     j_printerr("Error: CMN_save_to_file: failed to write header\n");
00515     close(fd);
00516     return(FALSE);
00517   }
00518   /* write body */
00519   if (mywrite(cmean_init, sizeof(float), dim, fd) == FALSE) {
00520     j_printerr("Error: CMN_save_to_file: failed to write header\n");
00521     close(fd);
00522     return(FALSE);
00523   }
00524   close(fd);
00525   
00526   return(TRUE);
00527 }
00528 
00529 
00530 /***********************************************************************/
00531 /* energy normalization and scaling on live input */
00532 /***********************************************************************/
00533 static LOGPROB energy_max_last; 
00534 static LOGPROB energy_min_last; 
00535 static LOGPROB energy_max;      
00536 
00542 void
00543 energy_max_init()
00544 {
00545   energy_max = 5.0;
00546 }
00547 
00554 void
00555 energy_max_prepare(Value *para)
00556 {
00557   energy_max_last = energy_max;
00558   energy_min_last = energy_max - (para->silFloor * LOG_TEN) / 10.0;
00559   energy_max = 0.0;
00560 }
00561 
00570 LOGPROB
00571 energy_max_normalize(LOGPROB f, Value *para)
00572 {
00573   if (energy_max < f) energy_max = f;
00574   if (f < energy_min_last) f = energy_min_last;
00575   return(1.0 - (energy_max_last - f) * para->escale);
00576 }
00577 
00578   

Juliusに対してTue Dec 26 16:19:29 2006に生成されました。  doxygen 1.5.0