Julius: libsent/include/sent/mfcc.h Source File

00001 
00035 /************************************************************************/
00036 /*    mfcc.h                                                            */
00037 /*                                                                      */
00038 /*    Author    : Yuichiro Nakano                                       */
00039 /************************************************************************/
00040 
00041 #ifndef __MFCC_H__
00042 #define __MFCC_H__
00043 
00045 #undef MFCC_TABLE_DEBUG
00046 
00047 #define CPMAX 500               
00048 #define CPSTEP 5                
00049 
00050 #include <sent/stddefs.h>
00051 #include <sent/htk_defs.h>
00052 #include <ctype.h>
00053 
00054 #define DEF_SMPPERIOD   625     
00055 #define DEF_FRAMESIZE   400     
00056 #define DEF_FFTNUM      512     
00057 #define DEF_FRAMESHIFT  160     
00058 #define DEF_PREENPH     0.97    
00059 #define DEF_MFCCDIM     12      
00060 #define DEF_CEPLIF      22      
00061 #define DEF_FBANK       24      
00062 #define DEF_DELWIN      2       
00063 #define DEF_ACCWIN      2       
00064 #define DEF_SILFLOOR    50.0    
00065 #define DEF_ESCALE      1.0     
00066 
00067 #define DEF_SSALPHA     2.0     
00068 #define DEF_SSFLOOR     0.5     
00069 
00070 /* version 2 ... ss_floor and ss_alpha removed */
00071 #define VALUE_VERSION 2 
00072 
00074 typedef struct {
00075   long smp_period;      
00076   long smp_freq;        
00077   int framesize;        
00078   int frameshift;       
00079   float preEmph;        
00080   int lifter;           
00081   int fbank_num;        
00082   int delWin;           
00083   int accWin;           
00084   float silFloor;       
00085   float escale;         
00086   int hipass;           
00087   int lopass;           
00088   int enormal;          
00089   int raw_e;            
00090   int zmeanframe;       
00091 
00092   /* items below does not need to be embedded, because they can be
00093      detemined from the acoustic model header, or should be computed
00094      from run-time variables */
00095   int delta;            
00096   int acc;              
00097   int energy;           
00098   int c0;               
00099   int absesup;          
00100   int cmn;              
00101   int mfcc_dim;         
00102   int baselen;          
00103   int vecbuflen;        
00104   int veclen;           
00105 
00106   int loaded;           
00107 }Value;
00108 
00110 typedef struct {
00111    int fftN;            
00112    int n;               
00113    int klo;             
00114    int khi;             
00115    float fres;          
00116    float *cf;           
00117    short *loChan;       
00118    float *loWt;         
00119    float *Re;           
00120    float *Im;           
00121 } FBankInfo;
00122 
00124 typedef struct {
00125   float **mfcc;                 
00126   int veclen;                   
00127   float *vec;                   
00128   int win;                      
00129   int len;                      
00130   int store;                    
00131   boolean *is_on;               
00132   int B;                        
00133 } DeltaBuf;
00134 
00136 typedef struct {
00137   float *bf;                    
00138   double *fbank;   
00139   FBankInfo fb; 
00140   int bflen;                    
00141 #ifdef MFCC_SINCOS_TABLE
00142   double *costbl_hamming; 
00143   int costbl_hamming_len; 
00144   /* cos/-sin table for FFT */
00145   double *costbl_fft; 
00146   double *sintbl_fft; 
00147   int tbllen; 
00148   /* cos table for MakeMFCC */
00149   double *costbl_makemfcc; 
00150   int costbl_makemfcc_len; 
00151   /* sin table for WeightCepstrum */
00152   double *sintbl_wcep; 
00153   int sintbl_wcep_len; 
00154 #endif /* MFCC_SINCOS_TABLE */
00155   float sqrt2var; 
00156   float *ssbuf;                 
00157   int ssbuflen;                 
00158   float ss_floor;               
00159   float ss_alpha;               
00160 } MFCCWork;
00161 
00166 typedef struct {
00167   float *mfcc_sum;              
00168   int framenum;                 
00169 } CMEAN;
00170 
00175 typedef struct {
00176   CMEAN *clist;         
00177   int clist_max;                
00178   int clist_num;                
00179   int dim;                      
00180   float cweight;                
00181   float *cmean_init;    
00182   boolean cmean_init_set;       
00183   CMEAN now;            
00184 } CMNWork;
00185 
00190 typedef struct {
00191   LOGPROB max_last;     
00192   LOGPROB min_last;     
00193   LOGPROB max;  
00194 } ENERGYWork;
00195 
00196 /**** mfcc-core.c ****/
00197 MFCCWork *WMP_work_new(Value *para);
00198 void WMP_calc(MFCCWork *w, float *mfcc, Value *para);
00199 void WMP_free(MFCCWork *w);
00200 /* Get filterbank information */
00201 void InitFBank(MFCCWork *w, Value *para);
00202 void FreeFBank(FBankInfo *fb);
00203 /* Apply hamming window */
00204 void Hamming (float *wave, int framesize, MFCCWork *w);
00205 /* Apply pre-emphasis filter */
00206 void PreEmphasise (float *wave, int framesize, float preEmph);
00207 /* Return mel-frequency */
00208 float Mel(int k, float fres);
00209 /* Apply FFT */
00210 void FFT(float *xRe, float *xIm, int p, MFCCWork *w);
00211 /* Convert wave -> mel-frequency filterbank */
00212 void MakeFBank(float *wave, MFCCWork *w, Value *para);
00213 /* Apply the DCT to filterbank */ 
00214 void MakeMFCC(float *mfcc, Value *para, MFCCWork *w);
00215 /* Calculate 0'th Cepstral parameter*/
00216 float CalcC0(MFCCWork *w, Value *para);
00217 /* Calculate Log Raw Energy */
00218 float CalcLogRawE(float *wave, int framesize);
00219 /* Zero Mean Souce by frame */
00220 void ZMeanFrame(float *wave, int framesize);
00221 /* Re-scale cepstral coefficients */
00222 void WeightCepstrum (float *mfcc, Value *para, MFCCWork *w);
00223 
00224 /**** wav2mfcc-buffer.c ****/
00225 /* Convert wave -> MFCC_E_D_(Z) (batch) */
00226 int Wav2MFCC(SP16 *wave, float **mfcc, Value *para, int nSamples, MFCCWork *w);
00227 /* Calculate delta coefficients (batch) */
00228 void Delta(float **c, int frame, Value *para);
00229 /* Calculate acceleration coefficients (batch) */
00230 void Accel(float **c, int frame, Value *para);
00231 /* Normalise log energy (batch) */
00232 void NormaliseLogE(float **c, int frame_num, Value *para);
00233 /* Cepstrum Mean Normalization (batch) */
00234 void CMN(float **mfcc, int frame_num, int dim);
00235 
00236 /**** wav2mfcc-pipe.c ****/
00237 DeltaBuf *WMP_deltabuf_new(int veclen, int windowlen);
00238 void WMP_deltabuf_free(DeltaBuf *db);
00239 void WMP_deltabuf_prepare(DeltaBuf *db);
00240 boolean WMP_deltabuf_proceed(DeltaBuf *db, float *new_mfcc);
00241 boolean WMP_deltabuf_flush(DeltaBuf *db);
00242 
00243 CMNWork *CMN_realtime_new(int dimension, float weight);
00244 void CMN_realtime_free(CMNWork *c);
00245 void CMN_realtime_prepare(CMNWork *c);
00246 void CMN_realtime(CMNWork *c, float *mfcc);
00247 void CMN_realtime_update(CMNWork *c);
00248 boolean CMN_load_from_file(CMNWork *c, char *filename);
00249 boolean CMN_save_to_file(CMNWork *c, char *filename);
00250 
00251 void energy_max_init(ENERGYWork *energy);
00252 void energy_max_prepare(ENERGYWork *energy, Value *para);
00253 LOGPROB energy_max_normalize(ENERGYWork *energy, LOGPROB f, Value *para);
00254 
00255 /**** ss.c ****/
00256 /* spectral subtraction */
00257 float *new_SS_load_from_file(char *filename, int *slen);
00258 float *new_SS_calculate(SP16 *wave, int wavelen, int *slen, MFCCWork *w, Value *para);
00259 
00260 /**** para.c *****/
00261 void undef_para(Value *para);
00262 void make_default_para(Value *para);
00263 void make_default_para_htk(Value *para);
00264 void apply_para(Value *dst, Value *src);
00265 boolean htk_config_file_parse(char *HTKconffile, Value *para);
00266 void calc_para_from_header(Value *para, short param_type, short vec_size);
00267 void put_para(FILE *fp, Value *para);
00268 
00269 
00270 #endif /* __MFCC_H__ */