libsent/include/sent/mfcc.h File Reference

Definitions for MFCC computation. More...

#include <sent/stddefs.h>
#include <sent/htk_defs.h>
#include <ctype.h>

Include dependency graph for mfcc.h:

This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  Value
 mfcc configuration parameter values More...
struct  FBankInfo
 Workspace for filterbank analysis. More...
struct  DeltaBuf
 Cycle buffer for delta computation. More...

Defines

#define DEF_SMPPERIOD   625
 DEBUG: define if you want to enable debug messages for sin/cos table operation Default sampling period in 100ns (625 = 16kHz).
#define DEF_FRAMESIZE   400
 Default Window size in samples, similar to WINDOWSIZE in HTK (unit is different).
#define DEF_FFTNUM   512
 Number of FFT steps.
#define DEF_FRAMESHIFT   160
 Default frame shift length in samples.
#define DEF_PREENPH   0.97
 Default pre-emphasis coefficient, corresponds to PREEMCOEF in HTK.
#define DEF_MFCCDIM   12
 Default number of MFCC dimension, corresponds to NUMCEPS in HTK.
#define DEF_CEPLIF   22
 Default cepstral Liftering coefficient, corresponds to CEPLIFTER in HTK.
#define DEF_FBANK   24
 Default number of filterbank channels, corresponds to NUMCHANS in HTK.
#define DEF_DELWIN   2
 Default delta window size, corresponds to DELTAWINDOW in HTK.
#define DEF_ACCWIN   2
 Default acceleration window size, corresponds to ACCWINDOW in HTK.
#define DEF_SILFLOOR   50.0
 Default energy silence floor in dBs, corresponds to SILFLOOR in HTK.
#define DEF_ESCALE   1.0
 Default scaling coefficient of log energy, corresponds to ESCALE in HTK.
#define DEF_SSALPHA   2.0
 Default alpha coefficient for spectral subtraction.
#define DEF_SSFLOOR   0.5
 Default flooring coefficient for spectral subtraction.
#define VALUE_VERSION   1
 Integer version number of Value, for embedding.

Functions

void WMP_calc_init (Value para, float **bf, int *bflen)
void WMP_calc (float *mfcc, float *bf, Value para, float *ssbuf)
void WMP_calc_fin (float *bf)
void make_costbl_hamming (int framesize)
void make_fft_table (int n)
void make_costbl_makemfcc (int fbank_num, int mfcc_dim)
void make_sintbl_wcep (int lifter, int mfcc_dim)
FBankInfo InitFBank (Value para)
void FreeFBank (FBankInfo fb)
void Hamming (float *wave, int framesize)
void PreEmphasise (float *wave, Value para)
float Mel (int k, float fres)
void FFT (float *xRe, float *xIm, int p)
void MakeFBank (float *wave, double *fbank, FBankInfo info, Value para, float *ssbuf)
void MakeMFCC (double *fbank, float *mfcc, Value para)
float CalcC0 (double *fbank, Value para)
float CalcLogRawE (float *wave, int framesize)
void ZMeanFrame (float *wave, int framesize)
void WeightCepstrum (float *mfcc, Value para)
int Wav2MFCC (SP16 *wave, float **mfcc, Value para, int nSamples, float *ssbuf, int ssbuflen)
void Delta (float **c, int frame, Value para)
void Accel (float **c, int frame, Value para)
void NormaliseLogE (float **c, int frame_num, Value para)
void CMN (float **mfcc, int frame_num, int dim)
void WMP_init (Value para, float **bf, float *ssbuf, int ssbuflen)
DeltaBufWMP_deltabuf_new (int veclen, int windowlen)
void WMP_deltabuf_free (DeltaBuf *db)
void WMP_deltabuf_prepare (DeltaBuf *db)
boolean WMP_deltabuf_proceed (DeltaBuf *db, float *new_mfcc)
boolean WMP_deltabuf_flush (DeltaBuf *db)
void CMN_realtime_init (int dimension, float weight)
void CMN_realtime_prepare ()
void CMN_realtime (float *mfcc, int dim)
void CMN_realtime_update ()
boolean CMN_load_from_file (char *filename, int dim)
boolean CMN_save_to_file (char *filename)
void energy_max_init ()
void energy_max_prepare (Value *para)
LOGPROB energy_max_normalize (LOGPROB f, Value *para)
float * new_SS_load_from_file (char *filename, int *slen)
float * new_SS_calculate (SP16 *wave, int wavelen, Value para, int *slen)
void undef_para (Value *para)
void make_default_para (Value *para)
void make_default_para_htk (Value *para)
void apply_para (Value *dst, Value *src)
boolean htk_config_file_parse (char *HTKconffile, Value *para)
void calc_para_from_header (Value *para, short param_type, short vec_size)
void put_para (Value *para)


Detailed Description

Definitions for MFCC computation.

Author:
Akinobu LEE
Date:
Fri Feb 11 03:40:52 2005
This file contains structures and default values for extracting speech parameter vectors of Mel-Frequency Cepstral Cefficients (MFCC). The default values here are the ones used in the standard acoustic models distributed together with Julius, and some of them have different value from HTK defaults. So be careful of the default values.

See also:
libsent/src/wav2mfcc/wav2mfcc.c

libsent/src/wav2mfcc/wav2mfcc-pipe.c

julius/wav2mfcc.c

julius/realtime-1stpass.c

Revision
1.8

Definition in file mfcc.h.


Function Documentation

void WMP_calc_init ( Value  para,
float **  bf,
int *  bflen 
)

Initialize calculation functions and work areas.

Parameters:
para [in] configuration parameters
bf [out] returns pointer to newly allocated window buffer
bflen [out] length of bf

Definition at line 605 of file mfcc-core.c.

Referenced by WMP_init().

void WMP_calc ( float *  mfcc,
float *  bf,
Value  para,
float *  ssbuf 
)

Calculate MFCC and log energy for one frame. Perform spectral subtraction if ssbuf is specified.

Parameters:
mfcc [out] buffer to hold the resulting MFCC vector
bf [i/o] work area for FFT
para [in] configuration parameters
ssbuf [in] noise spectrum, or NULL if not using spectral subtraction

Definition at line 632 of file mfcc-core.c.

Referenced by RealTimePipeLine(), and Wav2MFCC().

void WMP_calc_fin ( float *  bf  ) 

Free work area for MFCC computation

Parameters:
bf [in] window buffer previously allocated by WMP_calc_init()

Definition at line 674 of file mfcc-core.c.

void make_costbl_hamming ( int  framesize  ) 

Generate table for hamming window.

Parameters:
framesize [in] window size

Definition at line 63 of file mfcc-core.c.

Referenced by new_SS_calculate().

void make_fft_table ( int  n  ) 

Build tables for FFT.

Parameters:
n [in] 2^n = FFT point

Definition at line 94 of file mfcc-core.c.

Referenced by new_SS_calculate().

void make_costbl_makemfcc ( int  fbank_num,
int  mfcc_dim 
)

Generate table for DCT operation to make mfcc from fbank.

Parameters:
fbank_num [in] number of filer banks
mfcc_dim [in] number of dimensions in MFCC

Definition at line 127 of file mfcc-core.c.

void make_sintbl_wcep ( int  lifter,
int  mfcc_dim 
)

Generate table for weighing cepstrum.

Parameters:
lifter [in] cepstral liftering coefficient
mfcc_dim [in] number of dimensions in MFCC

Definition at line 165 of file mfcc-core.c.

FBankInfo InitFBank ( Value  para  ) 

Build filterbank information and generate tables for MFCC comptutation.

Parameters:
para [in] configuration parameters
Returns:
the generated filterbank information.

Definition at line 211 of file mfcc-core.c.

Referenced by WMP_calc_init().

void FreeFBank ( FBankInfo  fb  ) 

Free FBankInfo.

Parameters:
fb [in] filterbank information

Definition at line 305 of file mfcc-core.c.

Referenced by WMP_calc_fin().

void Hamming ( float *  wave,
int  framesize 
)

Apply hamming window.

Parameters:
wave [i/o] waveform data in the current frame
framesize [in] frame size

Definition at line 375 of file mfcc-core.c.

Referenced by new_SS_calculate(), and WMP_calc().

void PreEmphasise ( float *  wave,
Value  para 
)

Apply pre-emphasis filter.

Parameters:
wave [i/o] waveform data in the current frame
para [in] configuration parameters

Definition at line 360 of file mfcc-core.c.

Referenced by new_SS_calculate(), and WMP_calc().

float Mel ( int  k,
float  fres 
)

Return mel-frequency.

Parameters:
k [in] channel number of filter bank
fres [in] constant value computed by "1.0E7 / (para.smp_period * fb.fftN * 700.0)"
Returns:
the mel frequency.

Definition at line 199 of file mfcc-core.c.

Referenced by InitFBank().

void FFT ( float *  xRe,
float *  xIm,
int  p 
)

Apply FFT

Parameters:
xRe [i/o] real part of waveform
xIm [i/o] imaginal part of waveform
p [in] 2^p = FFT point

Definition at line 396 of file mfcc-core.c.

void MakeFBank ( float *  wave,
double *  fbank,
FBankInfo  fb,
Value  para,
float *  ssbuf 
)

Convert wave -> (spectral subtraction) -> mel-frequency filterbank

Parameters:
wave [in] waveform data in the current frame
fbank [out] the resulting mel-frequency filterbank
fb [in] filterbank information
para [in] configuration parameters
ssbuf [in] noise spectrum, or NULL if not apply subtraction

Definition at line 450 of file mfcc-core.c.

Referenced by WMP_calc().

void MakeMFCC ( double *  fbank,
float *  mfcc,
Value  para 
)

Apply DCT to filterbank to make MFCC.

Parameters:
fbank [in] filterbank
mfcc [out] output MFCC vector
para [in] configuration parameters

Definition at line 528 of file mfcc-core.c.

Referenced by WMP_calc().

float CalcC0 ( double *  fbank,
Value  para 
)

Calculate 0'th cepstral coefficient.

Parameters:
fbank [in] filterbank
para [in] configuration parameters
Returns:

Definition at line 510 of file mfcc-core.c.

Referenced by WMP_calc().

float CalcLogRawE ( float *  wave,
int  framesize 
)

Calculate Log Raw Energy.

Parameters:
wave [in] waveform data in the current frame
framesize [in] frame size
Returns:
the calculated log raw energy.

Definition at line 341 of file mfcc-core.c.

Referenced by WMP_calc().

void ZMeanFrame ( float *  wave,
int  framesize 
)

Remove DC offset per frame

Parameters:
wave [i/o] waveform data in the current frame
framesize [in] frame size

Definition at line 322 of file mfcc-core.c.

Referenced by new_SS_calculate(), and WMP_calc().

void WeightCepstrum ( float *  mfcc,
Value  para 
)

Re-scale cepstral coefficients.

Parameters:
mfcc [i/o] a MFCC vector
para [in] configuration parameters

Definition at line 562 of file mfcc-core.c.

Referenced by WMP_calc().

int Wav2MFCC ( SP16 wave,
float **  mfcc,
Value  para,
int  nSamples,
float *  ssbuf,
int  ssbuflen 
)

Convert wave data to MFCC. Also does spectral subtraction if ssbuf specified.

Parameters:
wave [in] waveform data
mfcc [out] buffer to store the resulting MFCC parameter vector [t][0..veclen-1], should be already allocated
para [in] configuration parameters
nSamples [in] length of waveform data
ssbuf [in] buffer that holds noise spectrum to be subtracted from input, or NULL if not use spectral subtraction
ssbuflen [in] length of above, ignored when ssbuf is NULL
Returns:
the number of processed frames.

Definition at line 56 of file wav2mfcc-buffer.c.

Referenced by new_wav2mfcc().

void Delta ( float **  c,
int  frame,
Value  para 
)

Calculate delta coefficients

Parameters:
c [i/o] MFCC vectors, in which the delta coeff. will be appended.
frame [in] number of frames
para [in] configuration parameters

Definition at line 140 of file wav2mfcc-buffer.c.

void Accel ( float **  c,
int  frame,
Value  para 
)

Calculate acceleration coefficients.

Parameters:
c [i/o] MFCC vectors, in which the delta coeff. will be appended.
frame [in] number of frames
para [in] configuration parameters

Definition at line 188 of file wav2mfcc-buffer.c.

void NormaliseLogE ( float **  mfcc,
int  frame_num,
Value  para 
)

Normalise log energy

Parameters:
mfcc [i/o] array of MFCC vectors
frame_num [in] number of frames
para [in] configuration parameters

Definition at line 108 of file wav2mfcc-buffer.c.

void CMN ( float **  mfcc,
int  frame_num,
int  dim 
)

Cepstrum Mean Normalization (buffered) Cepstral mean will be computed within the given MFCC vectors.

Parameters:
mfcc [i/o] array of MFCC vectors
frame_num [in] number of frames
dim [in] total dimension of MFCC vectors

Definition at line 227 of file wav2mfcc-buffer.c.

void WMP_init ( Value  para,
float **  bf,
float *  ssbuf,
int  ssbuflen 
)

initialize and setup buffers for a MFCC computataion.

Parameters:
para [in] configuration parameters
bf [out] pointer to the entry point of workspace for FFT
ssbuf [in] noise spectrum, or NULL if not using spectral subtraction
ssbuflen [in] length of above, ignoredwhen ssbuf is NULL

Definition at line 57 of file wav2mfcc-pipe.c.

Referenced by RealTimeInit(), and Wav2MFCC().

DeltaBuf* WMP_deltabuf_new ( int  veclen,
int  windowlen 
)

Allocate a new delta cycle buffer.

Parameters:
veclen [in] length of a vector
windowlen [in] window width for computing delta
Returns:
pointer to newly allocated delta cycle buffer structure.

Definition at line 83 of file wav2mfcc-pipe.c.

Referenced by RealTimeInit().

void WMP_deltabuf_free ( DeltaBuf db  ) 

Destroy the delta cycle buffer.

Parameters:
db [i/o] delta cycle buffer

Definition at line 110 of file wav2mfcc-pipe.c.

void WMP_deltabuf_prepare ( DeltaBuf db  ) 

Reset and clear the delta cycle buffer.

Parameters:
db [i/o] delta cycle buffer

Definition at line 128 of file wav2mfcc-pipe.c.

Referenced by RealTimePipeLinePrepare().

boolean WMP_deltabuf_proceed ( DeltaBuf db,
float *  new_mfcc 
)

Store the given MFCC vector into the delta cycle buffer, and compute the latest delta coefficients.

Parameters:
db [i/o] delta cycle buffer
new_mfcc [in] MFCC vector
Returns:
TRUE if next delta coeff. computed, in that case it is saved in db->delta[], or FALSE if delta is not yet computed by short of data.

Definition at line 187 of file wav2mfcc-pipe.c.

Referenced by RealTimeParam(), and RealTimePipeLine().

boolean WMP_deltabuf_flush ( DeltaBuf db  ) 

Flush the delta cycle buffer the delta coefficients left in the cycle buffer.

Parameters:
db [i/o] delta cycle buffer
Returns:
TRUE if next delta coeff. computed, in that case it is saved in db->delta[], or FALSE if all delta computation has been flushed and no data is available.

Definition at line 229 of file wav2mfcc-pipe.c.

Referenced by RealTimeParam().

void CMN_realtime_init ( int  dimension,
float  weight 
)

Initialize MAP-CMN at startup.

Parameters:
dimension [in] vector dimension
weight [in] initial cepstral mean weight

Definition at line 290 of file wav2mfcc-pipe.c.

Referenced by RealTimeInit().

void CMN_realtime_prepare (  ) 

Prepare for MAP-CMN at start of each input

Definition at line 317 of file wav2mfcc-pipe.c.

Referenced by RealTimePipeLinePrepare().

void CMN_realtime ( float *  mfcc,
int  dim 
)

Perform MAP-CMN for incoming MFCC vectors

Parameters:
mfcc [in] MFCC vector
dim [in] dimension

Definition at line 332 of file wav2mfcc-pipe.c.

Referenced by RealTimeParam(), and RealTimePipeLine().

void CMN_realtime_update (  ) 

Update initial cepstral mean from previous utterances for next input.

Definition at line 360 of file wav2mfcc-pipe.c.

Referenced by RealTimeCMNUpdate().

boolean CMN_load_from_file ( char *  filename,
int  dim 
)

Load CMN parameter from file. If the number of MFCC dimension in the file does not match the specified one, an error will occur.

Parameters:
filename [in] file name
dim [in] required number of MFCC dimensions
Returns:
TRUE on success, FALSE on failure.

Definition at line 460 of file wav2mfcc-pipe.c.

Referenced by RealTimeInit().

boolean CMN_save_to_file ( char *  filename  ) 

Save the current CMN vector to a file.

Parameters:
filename [in] filename to save the data.
Returns:
TRUE on success, FALSE on failure.

Definition at line 504 of file wav2mfcc-pipe.c.

Referenced by RealTimeCMNUpdate().

void energy_max_init (  ) 

Initialize work area for energy normalization on live input. This should be called once on startup.

Definition at line 543 of file wav2mfcc-pipe.c.

Referenced by RealTimeInit().

void energy_max_prepare ( Value para  ) 

Prepare values for energy normalization on live input. This should be called before each input segment.

Parameters:
para [in] MFCC computation configuration parameter

Definition at line 555 of file wav2mfcc-pipe.c.

Referenced by RealTimePipeLinePrepare().

LOGPROB energy_max_normalize ( LOGPROB  f,
Value para 
)

Peform energy normalization using maximum of last input.

Parameters:
f [in] raw energy
para [in] MFCC computation configuration parameter
Returns:
value of the normalized log energy.

Definition at line 571 of file wav2mfcc-pipe.c.

Referenced by RealTimePipeLine().

float* new_SS_load_from_file ( char *  filename,
int *  slen 
)

Load a noise spectrum from file.

Parameters:
filename [in] path name of noise spectrum file
slen [out] length of the returned buffer
Returns:
a newly allocated buffer that holds the loaded noise spectrum.

Definition at line 65 of file ss.c.

Referenced by new_wav2mfcc(), and RealTimeInit().

float* new_SS_calculate ( SP16 wave,
int  wavelen,
Value  para,
int *  slen 
)

Compute average spectrum of audio input. This is used to estimate a noise spectrum from input samples.

Parameters:
wave [in] input audio data sequence
wavelen [in] length of above
para [in] parameter
slen [out] length of returned buffer
Returns:
a newly allocated buffer that contains the calculated spectrum.

Definition at line 109 of file ss.c.

Referenced by new_wav2mfcc().

void undef_para ( Value para  ) 

Reset configuration parameters for MFCC computation.

Parameters:
para [out] feature extraction parameters

Definition at line 38 of file para.c.

Referenced by initialize_GMM(), initialize_GSHMM(), and system_bootup().

void make_default_para ( Value para  ) 

Set Julius default parameters for MFCC computation.

Parameters:
para [out] feature extraction parameters

Definition at line 79 of file para.c.

Referenced by system_bootup().

void make_default_para_htk ( Value para  ) 

Set HTK default configuration parameters for MFCC computation. This will be refered when parameters are given as HTK Config file.

Parameters:
para [out] feature extraction parameters

Definition at line 109 of file para.c.

Referenced by system_bootup().

void apply_para ( Value dst,
Value src 
)

Merge two configuration parameters for MFCC computation.

Parameters:
dst [out] feature extraction parameters to set to
src [out] feature extraction parameters to set from

Definition at line 134 of file para.c.

Referenced by final_fusion().

boolean htk_config_file_parse ( char *  HTKconffile,
Value para 
)

Read and parse an HTK Config file, and set the specified option values.

Parameters:
HTKconffile [in] HTK Config file path name
para [out] MFCC parameter to set
Returns:
TRUE on success, FALSE on failure.

Definition at line 178 of file para.c.

Referenced by opt_parse().

void calc_para_from_header ( Value para,
short  param_type,
short  vec_size 
)

Set acoustic analysis parameters from HTK HMM definition header information.

Parameters:
para [out] acoustic analysis parameters
param_type [in] parameter type specified at HMM header
vec_size [in] vector size type specified at HMM header

Definition at line 286 of file para.c.

Referenced by initialize_HMM().

void put_para ( Value para  ) 

Output acoustic analysis configuration parameters to stdout.

Parameters:
para [in] configuration parameter

Definition at line 322 of file para.c.

Referenced by print_info().


Generated on Tue Dec 26 16:17:15 2006 for Julius by  doxygen 1.5.0