libsent/include/sent/mfcc.h

説明を見る。
00001 
00035 /************************************************************************/
00036 /*    mfcc.h                                                            */
00037 /*                                                                      */
00038 /*    Author    : Yuichiro Nakano                                       */
00039 /************************************************************************/
00040 
00041 #ifndef __MFCC_H__
00042 #define __MFCC_H__
00043 
00045 #undef MFCC_TABLE_DEBUG
00046 
00047 #define CPMAX 500               
00048 #define CPSTEP 5                
00049 
00050 #include <sent/stddefs.h>
00051 #include <sent/htk_defs.h>
00052 #include <sent/htk_param.h>
00053 #include <ctype.h>
00054 
00055 #define DEF_SMPPERIOD   625     
00056 #define DEF_FRAMESIZE   400     
00057 #define DEF_FFTNUM      512     
00058 #define DEF_FRAMESHIFT  160     
00059 #define DEF_PREENPH     0.97    
00060 #define DEF_MFCCDIM     12      
00061 #define DEF_CEPLIF      22      
00062 #define DEF_FBANK       24      
00063 #define DEF_DELWIN      2       
00064 #define DEF_ACCWIN      2       
00065 #define DEF_SILFLOOR    50.0    
00066 #define DEF_ESCALE      1.0     
00067 
00068 #define DEF_SSALPHA     2.0     
00069 #define DEF_SSFLOOR     0.5     
00070 
00071 /* version 2 ... ss_floor and ss_alpha removed */
00072 /* version 3 add usepower */
00073 #define VALUE_VERSION 3 
00074 
00076 typedef struct {
00077   long smp_period;      
00078   long smp_freq;        
00079   int framesize;        
00080   int frameshift;       
00081   float preEmph;        
00082   int lifter;           
00083   int fbank_num;        
00084   int delWin;           
00085   int accWin;           
00086   float silFloor;       
00087   float escale;         
00088   int hipass;           
00089   int lopass;           
00090   int enormal;          
00091   int raw_e;            
00092   int zmeanframe;       
00093   int usepower;         
00094   float vtln_alpha;     
00095   float vtln_upper;     
00096   float vtln_lower;     
00097 
00098   /* items below does not need to be embedded, because they can be
00099      detemined from the acoustic model header, or should be computed
00100      from run-time variables */
00101   int delta;            
00102   int acc;              
00103   int energy;           
00104   int c0;               
00105   int absesup;          
00106   int cmn;              
00107   int cvn;              
00108   int mfcc_dim;         
00109   int baselen;          
00110   int vecbuflen;        
00111   int veclen;           
00112 
00113   int loaded;           
00114 }Value;
00115 
00117 typedef struct {
00118    int fftN;            
00119    int n;               
00120    int klo;             
00121    int khi;             
00122    float fres;          
00123    float *cf;           
00124    short *loChan;       
00125    float *loWt;         
00126    float *Re;           
00127    float *Im;           
00128 } FBankInfo;
00129 
00131 typedef struct {
00132   float **mfcc;                 
00133   int veclen;                   
00134   float *vec;                   
00135   int win;                      
00136   int len;                      
00137   int store;                    
00138   boolean *is_on;               
00139   int B;                        
00140 } DeltaBuf;
00141 
00143 typedef struct {
00144   float *bf;                    
00145   double *fbank;   
00146   FBankInfo fb; 
00147   int bflen;                    
00148 #ifdef MFCC_SINCOS_TABLE
00149   double *costbl_hamming; 
00150   int costbl_hamming_len; 
00151   /* cos/-sin table for FFT */
00152   double *costbl_fft; 
00153   double *sintbl_fft; 
00154   int tbllen; 
00155   /* cos table for MakeMFCC */
00156   double *costbl_makemfcc; 
00157   int costbl_makemfcc_len; 
00158   /* sin table for WeightCepstrum */
00159   double *sintbl_wcep; 
00160   int sintbl_wcep_len; 
00161 #endif /* MFCC_SINCOS_TABLE */
00162   float sqrt2var; 
00163   float *ssbuf;                 
00164   int ssbuflen;                 
00165   float ss_floor;               
00166   float ss_alpha;               
00167 } MFCCWork;
00168 
00173 typedef struct {
00174   float *mfcc_sum;              
00175   float *mfcc_var;              
00176   int framenum;                 
00177 } CMEAN;
00178 
00183 typedef struct {
00184   CMEAN *clist;         
00185   int clist_max;                
00186   int clist_num;                
00187   float cweight;                
00188   float *cmean_init;    
00189   float *cvar_init;             
00190   int mfcc_dim;                 
00191   int veclen;                   
00192   boolean mean;                 
00193   boolean var;                  
00194   boolean cmean_init_set;       
00195   CMEAN now;            
00196 } CMNWork;
00197 
00202 typedef struct {
00203   LOGPROB max_last;     
00204   LOGPROB min_last;     
00205   LOGPROB max;  
00206 } ENERGYWork;
00207 
00208 /**** mfcc-core.c ****/
00209 MFCCWork *WMP_work_new(Value *para);
00210 void WMP_calc(MFCCWork *w, float *mfcc, Value *para);
00211 void WMP_free(MFCCWork *w);
00212 /* Get filterbank information */
00213 boolean InitFBank(MFCCWork *w, Value *para);
00214 void FreeFBank(FBankInfo *fb);
00215 /* Apply hamming window */
00216 void Hamming (float *wave, int framesize, MFCCWork *w);
00217 /* Apply pre-emphasis filter */
00218 void PreEmphasise (float *wave, int framesize, float preEmph);
00219 /* Return mel-frequency */
00220 float Mel(int k, float fres);
00221 /* Apply FFT */
00222 void FFT(float *xRe, float *xIm, int p, MFCCWork *w);
00223 /* Convert wave -> mel-frequency filterbank */
00224 void MakeFBank(float *wave, MFCCWork *w, Value *para);
00225 /* Apply the DCT to filterbank */ 
00226 void MakeMFCC(float *mfcc, Value *para, MFCCWork *w);
00227 /* Calculate 0'th Cepstral parameter*/
00228 float CalcC0(MFCCWork *w, Value *para);
00229 /* Calculate Log Raw Energy */
00230 float CalcLogRawE(float *wave, int framesize);
00231 /* Zero Mean Souce by frame */
00232 void ZMeanFrame(float *wave, int framesize);
00233 /* Re-scale cepstral coefficients */
00234 void WeightCepstrum (float *mfcc, Value *para, MFCCWork *w);
00235 
00236 /**** wav2mfcc-buffer.c ****/
00237 /* Convert wave -> MFCC_E_D_(Z) (batch) */
00238 int Wav2MFCC(SP16 *wave, float **mfcc, Value *para, int nSamples, MFCCWork *w);
00239 /* Calculate delta coefficients (batch) */
00240 void Delta(float **c, int frame, Value *para);
00241 /* Calculate acceleration coefficients (batch) */
00242 void Accel(float **c, int frame, Value *para);
00243 /* Normalise log energy (batch) */
00244 void NormaliseLogE(float **c, int frame_num, Value *para);
00245 /* Cepstrum Mean Normalization (batch) */
00246 void CMN(float **mfcc, int frame_num, int dim);
00247 void MVN(float **mfcc, int frame_num, Value *para);
00248 
00249 /**** wav2mfcc-pipe.c ****/
00250 DeltaBuf *WMP_deltabuf_new(int veclen, int windowlen);
00251 void WMP_deltabuf_free(DeltaBuf *db);
00252 void WMP_deltabuf_prepare(DeltaBuf *db);
00253 boolean WMP_deltabuf_proceed(DeltaBuf *db, float *new_mfcc);
00254 boolean WMP_deltabuf_flush(DeltaBuf *db);
00255 
00256 CMNWork *CMN_realtime_new(Value *para, float weight);
00257 void CMN_realtime_free(CMNWork *c);
00258 void CMN_realtime_prepare(CMNWork *c);
00259 void CMN_realtime(CMNWork *c, float *mfcc);
00260 void CMN_realtime_update(CMNWork *c, HTK_Param *param);
00261 boolean CMN_load_from_file(CMNWork *c, char *filename);
00262 boolean CMN_save_to_file(CMNWork *c, char *filename);
00263 
00264 void energy_max_init(ENERGYWork *energy);
00265 void energy_max_prepare(ENERGYWork *energy, Value *para);
00266 LOGPROB energy_max_normalize(ENERGYWork *energy, LOGPROB f, Value *para);
00267 
00268 /**** ss.c ****/
00269 /* spectral subtraction */
00270 float *new_SS_load_from_file(char *filename, int *slen);
00271 float *new_SS_calculate(SP16 *wave, int wavelen, int *slen, MFCCWork *w, Value *para);
00272 
00273 /**** para.c *****/
00274 void undef_para(Value *para);
00275 void make_default_para(Value *para);
00276 void make_default_para_htk(Value *para);
00277 void apply_para(Value *dst, Value *src);
00278 boolean htk_config_file_parse(char *HTKconffile, Value *para);
00279 void calc_para_from_header(Value *para, short param_type, short vec_size);
00280 void put_para(FILE *fp, Value *para);
00281 
00282 
00283 #endif /* __MFCC_H__ */

Juliusに対してThu Jul 23 12:16:23 2009に生成されました。  doxygen 1.5.1