libsent/include/sent/mfcc.h

説明を見る。
00001 
00034 /************************************************************************/
00035 /*    mfcc.h                                                            */
00036 /*                                                                      */
00037 /*    Author    : Yuichiro Nakano                                       */
00038 /************************************************************************/
00039 
00040 #ifndef __MFCC_H__
00041 #define __MFCC_H__
00042 
00044 #undef MFCC_TABLE_DEBUG
00045 
00046 #include <sent/stddefs.h>
00047 #include <sent/htk_defs.h>
00048 #include <ctype.h>
00049 
00050 #define DEF_SMPPERIOD   625     
00051 #define DEF_FRAMESIZE   400     
00052 #define DEF_FFTNUM      512     
00053 #define DEF_FRAMESHIFT  160     
00054 #define DEF_PREENPH     0.97    
00055 #define DEF_MFCCDIM     12      
00056 #define DEF_CEPLIF      22      
00057 #define DEF_FBANK       24      
00058 #define DEF_DELWIN      2       
00059 #define DEF_ACCWIN      2       
00060 #define DEF_SILFLOOR    50.0    
00061 #define DEF_ESCALE      1.0     
00062 #define DEF_SSALPHA     2.0     
00063 #define DEF_SSFLOOR     0.5     
00064 
00065 #define VALUE_VERSION 1 
00066 
00068 typedef struct {
00069   long smp_period;      
00070   long smp_freq;        
00071   int framesize;        
00072   int frameshift;       
00073   float preEmph;        
00074   int lifter;           
00075   int fbank_num;        
00076   int delWin;           
00077   int accWin;           
00078   float silFloor;       
00079   float escale;         
00080   int hipass;           
00081   int lopass;           
00082   int enormal;          
00083   int raw_e;            
00084   float ss_alpha;       
00085   float ss_floor;       
00086   int zmeanframe;       
00087 
00088   /* items below does not need to be embedded, because they can be
00089      detemined from the acoustic model header, or should be computed
00090      from run-time variables */
00091   int delta;            
00092   int acc;              
00093   int energy;           
00094   int c0;               
00095   int absesup;          
00096   int cmn;              
00097   int mfcc_dim;         
00098   int baselen;          
00099   int vecbuflen;        
00100   int veclen;           
00101 
00102   int loaded;           
00103 }Value;
00104 
00106 typedef struct {
00107    int fftN;            
00108    int n;               
00109    int klo;             
00110    int khi;             
00111    float fres;          
00112    float *cf;           
00113    short *loChan;       
00114    float *loWt;         
00115    float *Re;           
00116    float *Im;           
00117 } FBankInfo;
00118 
00120 typedef struct {
00121   float **mfcc;                 
00122   int veclen;                   
00123   float *vec;                   
00124   int win;                      
00125   int len;                      
00126   int store;                    
00127   boolean *is_on;               
00128   int B;                        
00129 } DeltaBuf;
00130 
00131 /**** mfcc-core.c ****/
00132 void WMP_calc_init(Value para, float **bf, int *bflen);
00133 void WMP_calc(float *mfcc, float *bf, Value para, float *ssbuf);
00134 void WMP_calc_fin(float *bf);
00135 #ifdef MFCC_SINCOS_TABLE
00136 /* functions for making tables */
00137 void make_costbl_hamming(int framesize);
00138 void make_fft_table(int n);
00139 void make_costbl_makemfcc(int fbank_num, int mfcc_dim);
00140 void make_sintbl_wcep(int lifter, int mfcc_dim);
00141 #endif
00142 /* Get filterbank information */
00143 FBankInfo InitFBank(Value para);
00144 void FreeFBank(FBankInfo fb);
00145 /* Apply hamming window */
00146 void Hamming (float *wave, int framesize);
00147 /* Apply pre-emphasis filter */
00148 void PreEmphasise (float *wave, Value para);
00149 /* Return mel-frequency */
00150 float Mel(int k, float fres);
00151 /* Apply FFT */
00152 void FFT(float *xRe, float *xIm, int p);
00153 /* Convert wave -> mel-frequency filterbank */
00154 void MakeFBank(float *wave, double *fbank, FBankInfo info, Value para, float *ssbuf);
00155 /* Apply the DCT to filterbank */ 
00156 void MakeMFCC(double *fbank, float *mfcc, Value para);
00157 /* Calculate 0'th Cepstral parameter*/
00158 float CalcC0(double *fbank, Value para);
00159 /* Calculate Log Raw Energy */
00160 float CalcLogRawE(float *wave, int framesize);
00161 /* Zero Mean Souce by frame */
00162 void ZMeanFrame(float *wave, int framesize);
00163 /* Re-scale cepstral coefficients */
00164 void WeightCepstrum (float *mfcc, Value para);
00165 
00166 /**** wav2mfcc-buffer.c ****/
00167 /* Convert wave -> MFCC_E_D_(Z) (batch) */
00168 int Wav2MFCC(SP16 *wave, float **mfcc, Value para, int nSamples, float *ssbuf, int ssbuflen);
00169 /* Calculate delta coefficients (batch) */
00170 void Delta(float **c, int frame, Value para);
00171 /* Calculate acceleration coefficients (batch) */
00172 void Accel(float **c, int frame, Value para);
00173 /* Normalise log energy (batch) */
00174 void NormaliseLogE(float **c, int frame_num, Value para);
00175 /* Cepstrum Mean Normalization (batch) */
00176 void CMN(float **mfcc, int frame_num, int dim);
00177 
00178 /**** wav2mfcc-pipe.c ****/
00179 void WMP_init(Value para, float **bf, float *ssbuf, int ssbuflen);
00180 DeltaBuf *WMP_deltabuf_new(int veclen, int windowlen);
00181 void WMP_deltabuf_free(DeltaBuf *db);
00182 void WMP_deltabuf_prepare(DeltaBuf *db);
00183 boolean WMP_deltabuf_proceed(DeltaBuf *db, float *new_mfcc);
00184 boolean WMP_deltabuf_flush(DeltaBuf *db);
00185 void CMN_realtime_init(int dimension, float weight);
00186 void CMN_realtime_prepare();
00187 void CMN_realtime(float *mfcc, int dim);
00188 void CMN_realtime_update();
00189 boolean CMN_load_from_file(char *filename, int dim);
00190 boolean CMN_save_to_file(char *filename);
00191 void energy_max_init();
00192 void energy_max_prepare(Value *para);
00193 LOGPROB energy_max_normalize(LOGPROB f, Value *para);
00194 
00195 /**** ss.c ****/
00196 /* spectral subtraction */
00197 float *new_SS_load_from_file(char *filename, int *slen);
00198 float *new_SS_calculate(SP16 *wave, int wavelen, Value para, int *slen);
00199 
00200 /**** para.c *****/
00201 void undef_para(Value *para);
00202 void make_default_para(Value *para);
00203 void make_default_para_htk(Value *para);
00204 void apply_para(Value *dst, Value *src);
00205 boolean htk_config_file_parse(char *HTKconffile, Value *para);
00206 void calc_para_from_header(Value *para, short param_type, short vec_size);
00207 void put_para(Value *para);
00208 
00209 
00210 #endif /* __MFCC_H__ */

Julianに対してTue Dec 26 12:56:19 2006に生成されました。  doxygen 1.5.0