#include <sent/stddefs.h>
#include <sent/htk_defs.h>
#include <sent/htk_param.h>
#include <ctype.h>
データ構造 | |
struct | Value |
mfcc configuration parameter values [詳細] | |
struct | FBankInfo |
Workspace for filterbank analysis [詳細] | |
struct | DeltaBuf |
Cycle buffer for delta computation [詳細] | |
struct | MFCCWork |
Work area for MFCC computation [詳細] | |
struct | CMEAN |
Structure to hold sentence sum of MFCC for realtime CMN [詳細] | |
struct | CMNWork |
Work area for real-time CMN [詳細] | |
struct | ENERGYWork |
work area for energy normalization on real time input [詳細] | |
マクロ定義 | |
#define | CPMAX 500 |
DEBUG: define if you want to enable debug messages for sin/cos table operation Maximum number of frames to store ceptral mean for realtime CMN update | |
#define | CPSTEP 5 |
allocate step of cmean list per sentence | |
#define | DEF_SMPPERIOD 625 |
Default sampling period in 100ns (625 = 16kHz) | |
#define | DEF_FRAMESIZE 400 |
Default Window size in samples, similar to WINDOWSIZE in HTK (unit is different) | |
#define | DEF_FFTNUM 512 |
Number of FFT steps | |
#define | DEF_FRAMESHIFT 160 |
Default frame shift length in samples | |
#define | DEF_PREENPH 0.97 |
Default pre-emphasis coefficient, corresponds to PREEMCOEF in HTK | |
#define | DEF_MFCCDIM 12 |
Default number of MFCC dimension, corresponds to NUMCEPS in HTK | |
#define | DEF_CEPLIF 22 |
Default cepstral Liftering coefficient, corresponds to CEPLIFTER in HTK | |
#define | DEF_FBANK 24 |
Default number of filterbank channels, corresponds to NUMCHANS in HTK | |
#define | DEF_DELWIN 2 |
Default delta window size, corresponds to DELTAWINDOW in HTK | |
#define | DEF_ACCWIN 2 |
Default acceleration window size, corresponds to ACCWINDOW in HTK | |
#define | DEF_SILFLOOR 50.0 |
Default energy silence floor in dBs, corresponds to SILFLOOR in HTK | |
#define | DEF_ESCALE 1.0 |
Default scaling coefficient of log energy, corresponds to ESCALE in HTK | |
#define | DEF_SSALPHA 2.0 |
Default alpha coefficient for spectral subtraction | |
#define | DEF_SSFLOOR 0.5 |
Default flooring coefficient for spectral subtraction | |
#define | VALUE_VERSION 3 |
Integer version number of Value, for embedding | |
関数 | |
MFCCWork * | WMP_work_new (Value *para) |
Setup work area for parameters, values, buffers, tables to compute MFCC vectors, with a given parameter configurations | |
void | WMP_calc (MFCCWork *w, float *mfcc, Value *para) |
Calculate MFCC and log energy for one frame. | |
void | WMP_free (MFCCWork *w) |
Free all work area for MFCC computation | |
boolean | InitFBank (MFCCWork *w, Value *para) |
Build filterbank information and generate tables for MFCC comptutation. | |
void | FreeFBank (FBankInfo *fb) |
Free FBankInfo. | |
void | Hamming (float *wave, int framesize, MFCCWork *w) |
Apply hamming window. | |
void | PreEmphasise (float *wave, int framesize, float preEmph) |
Apply pre-emphasis filter. | |
float | Mel (int k, float fres) |
Return mel-frequency. | |
void | FFT (float *xRe, float *xIm, int p, MFCCWork *w) |
Apply FFT | |
void | MakeFBank (float *wave, MFCCWork *w, Value *para) |
Convert wave -> (spectral subtraction) -> mel-frequency filterbank | |
void | MakeMFCC (float *mfcc, Value *para, MFCCWork *w) |
Apply DCT to filterbank to make MFCC. | |
float | CalcC0 (MFCCWork *w, Value *para) |
Calculate 0'th cepstral coefficient. | |
float | CalcLogRawE (float *wave, int framesize) |
Calculate Log Raw Energy. | |
void | ZMeanFrame (float *wave, int framesize) |
Remove DC offset per frame | |
void | WeightCepstrum (float *mfcc, Value *para, MFCCWork *w) |
Re-scale cepstral coefficients. | |
int | Wav2MFCC (SP16 *wave, float **mfcc, Value *para, int nSamples, MFCCWork *w) |
Convert wave data to MFCC. | |
void | Delta (float **c, int frame, Value *para) |
Calculate delta coefficients | |
void | Accel (float **c, int frame, Value *para) |
Calculate acceleration coefficients. | |
void | NormaliseLogE (float **c, int frame_num, Value *para) |
Normalise log energy | |
void | CMN (float **mfcc, int frame_num, int dim) |
Cepstrum Mean Normalization (buffered) Cepstral mean will be computed within the given MFCC vectors. | |
void | MVN (float **mfcc, int frame_num, Value *para) |
Cepstrum Mean/Variance Normalization (buffered) | |
DeltaBuf * | WMP_deltabuf_new (int veclen, int windowlen) |
Allocate a new delta cycle buffer. | |
void | WMP_deltabuf_free (DeltaBuf *db) |
Destroy the delta cycle buffer. | |
void | WMP_deltabuf_prepare (DeltaBuf *db) |
Reset and clear the delta cycle buffer. | |
boolean | WMP_deltabuf_proceed (DeltaBuf *db, float *new_mfcc) |
Store the given MFCC vector into the delta cycle buffer, and compute the latest delta coefficients. | |
boolean | WMP_deltabuf_flush (DeltaBuf *db) |
Flush the delta cycle buffer the delta coefficients left in the cycle buffer. | |
CMNWork * | CMN_realtime_new (Value *para, float weight) |
Initialize MAP-CMN at startup. | |
void | CMN_realtime_free (CMNWork *c) |
Free work area for real-time CMN. | |
void | CMN_realtime_prepare (CMNWork *c) |
Prepare for MAP-CMN at start of each input | |
void | CMN_realtime (CMNWork *c, float *mfcc) |
Perform MAP-CMN for incoming MFCC vectors | |
void | CMN_realtime_update (CMNWork *c, HTK_Param *param) |
Update initial cepstral mean from previous utterances for next input. | |
boolean | CMN_load_from_file (CMNWork *c, char *filename) |
Load CMN parameter from file. | |
boolean | CMN_save_to_file (CMNWork *c, char *filename) |
Save the current CMN vector to a file. | |
void | energy_max_init (ENERGYWork *energy) |
Initialize work area for energy normalization on live input. | |
void | energy_max_prepare (ENERGYWork *energy, Value *para) |
Prepare values for energy normalization on live input. | |
LOGPROB | energy_max_normalize (ENERGYWork *energy, LOGPROB f, Value *para) |
Peform energy normalization using maximum of last input. | |
float * | new_SS_load_from_file (char *filename, int *slen) |
Load a noise spectrum from file. | |
float * | new_SS_calculate (SP16 *wave, int wavelen, int *slen, MFCCWork *w, Value *para) |
Compute average spectrum of audio input. | |
void | undef_para (Value *para) |
Reset configuration parameters for MFCC computation. | |
void | make_default_para (Value *para) |
Set Julius default parameters for MFCC computation. | |
void | make_default_para_htk (Value *para) |
Set HTK default configuration parameters for MFCC computation. | |
void | apply_para (Value *dst, Value *src) |
Merge two configuration parameters for MFCC computation. | |
boolean | htk_config_file_parse (char *HTKconffile, Value *para) |
Read and parse an HTK Config file, and set the specified option values. | |
void | calc_para_from_header (Value *para, short param_type, short vec_size) |
Set acoustic analysis parameters from HTK HMM definition header information. | |
void | put_para (FILE *fp, Value *para) |
Output acoustic analysis configuration parameters to stdout. |
このファイルには,音声波形データからMFCC形式の特徴量ベクトル系列を 計算するための構造体の定義およびデフォルト値が含まれています. デフォルト値は Julius とともに配布されている音響モデルで使用している 値であり,HTKのデフォルトとは値が異なる部分がありますので注意して下さい.
libsent/src/wav2mfcc/wav2mfcc-pipe.c
julius/wav2mfcc.c
julius/realtime-1stpass.c
mfcc.h で定義されています。
Setup work area for parameters, values, buffers, tables to compute MFCC vectors, with a given parameter configurations
para | [in] configuration parameters |
mfcc-core.c の 614 行で定義されています。
Calculate MFCC and log energy for one frame.
Perform spectral subtraction if ssbuf is specified.
w | [i/o] MFCC calculation work area | |
mfcc | [out] buffer to hold the resulting MFCC vector | |
para | [in] configuration parameters |
mfcc-core.c の 652 行で定義されています。
参照元 RealTimeMFCC()・Wav2MFCC().
void WMP_free | ( | MFCCWork * | w | ) |
Free all work area for MFCC computation
w | [i/o] MFCC calculation work area |
mfcc-core.c の 694 行で定義されています。
参照元 j_mfcccalc_free().
Build filterbank information and generate tables for MFCC comptutation.
w | [i/o] MFCC calculation work area | |
para | [in] configuration parameters |
mfcc-core.c の 223 行で定義されています。
参照元 WMP_work_new().
void FreeFBank | ( | FBankInfo * | fb | ) |
void Hamming | ( | float * | wave, | |
int | framesize, | |||
MFCCWork * | w | |||
) |
Apply hamming window.
wave | [i/o] waveform data in the current frame | |
framesize | [in] frame size | |
w | [i/o] MFCC calculation work area |
mfcc-core.c の 377 行で定義されています。
void PreEmphasise | ( | float * | wave, | |
int | framesize, | |||
float | preEmph | |||
) |
Apply pre-emphasis filter.
wave | [i/o] waveform data in the current frame | |
framesize | [i/o] frame size in samples | |
preEmph | [in] pre-emphasis coef. |
mfcc-core.c の 361 行で定義されています。
float Mel | ( | int | k, | |
float | fres | |||
) |
Return mel-frequency.
k | [in] channel number of filter bank | |
fres | [in] constant value computed by "1.0E7 / (para.smp_period * fb.fftN * 700.0)" |
mfcc-core.c の 155 行で定義されています。
参照元 InitFBank().
void FFT | ( | float * | xRe, | |
float * | xIm, | |||
int | p, | |||
MFCCWork * | w | |||
) |
Apply FFT
xRe | [i/o] real part of waveform | |
xIm | [i/o] imaginal part of waveform | |
p | [in] 2^p = FFT point | |
w | [i/o] MFCC calculation work area |
mfcc-core.c の 399 行で定義されています。
Convert wave -> (spectral subtraction) -> mel-frequency filterbank
wave | [in] waveform data in the current frame | |
w | [i/o] MFCC calculation work area | |
para | [in] configuration parameters |
mfcc-core.c の 452 行で定義されています。
参照元 WMP_calc().
Apply DCT to filterbank to make MFCC.
mfcc | [out] output MFCC vector | |
para | [in] configuration parameters | |
w | [i/o] MFCC calculation work area |
mfcc-core.c の 541 行で定義されています。
参照元 WMP_calc().
Calculate 0'th cepstral coefficient.
w | [i/o] MFCC calculation work area | |
para | [in] configuration parameters |
mfcc-core.c の 523 行で定義されています。
参照元 WMP_calc().
float CalcLogRawE | ( | float * | wave, | |
int | framesize | |||
) |
Calculate Log Raw Energy.
wave | [in] waveform data in the current frame | |
framesize | [in] frame size |
mfcc-core.c の 341 行で定義されています。
参照元 WMP_calc().
void ZMeanFrame | ( | float * | wave, | |
int | framesize | |||
) |
Remove DC offset per frame
wave | [i/o] waveform data in the current frame | |
framesize | [in] frame size |
mfcc-core.c の 322 行で定義されています。
Re-scale cepstral coefficients.
mfcc | [i/o] a MFCC vector | |
para | [in] configuration parameters | |
w | [i/o] MFCC calculation work area |
mfcc-core.c の 576 行で定義されています。
参照元 WMP_calc().
Convert wave data to MFCC.
Also does spectral subtraction if ssbuf specified.
wave | [in] waveform data | |
mfcc | [out] buffer to store the resulting MFCC parameter vector [t][0..veclen-1], should be already allocated | |
para | [in] configuration parameters | |
nSamples | [in] length of waveform data | |
w | [i/o] MFCC calculation work area |
wav2mfcc-buffer.c の 57 行で定義されています。
参照元 wav2mfcc().
void Delta | ( | float ** | c, | |
int | frame, | |||
Value * | para | |||
) |
Calculate delta coefficients
c | [i/o] MFCC vectors, in which the delta coeff. will be appended. | |
frame | [in] number of frames | |
para | [in] configuration parameters |
wav2mfcc-buffer.c の 142 行で定義されています。
void Accel | ( | float ** | c, | |
int | frame, | |||
Value * | para | |||
) |
Calculate acceleration coefficients.
c | [i/o] MFCC vectors, in which the delta coeff. will be appended. | |
frame | [in] number of frames | |
para | [in] configuration parameters |
wav2mfcc-buffer.c の 180 行で定義されています。
void NormaliseLogE | ( | float ** | mfcc, | |
int | frame_num, | |||
Value * | para | |||
) |
Normalise log energy
mfcc | [i/o] array of MFCC vectors | |
frame_num | [in] number of frames | |
para | [in] configuration parameters |
wav2mfcc-buffer.c の 110 行で定義されています。
void CMN | ( | float ** | mfcc, | |
int | frame_num, | |||
int | dim | |||
) |
Cepstrum Mean Normalization (buffered) Cepstral mean will be computed within the given MFCC vectors.
mfcc | [i/o] array of MFCC vectors | |
frame_num | [in] number of frames | |
dim | [in] total dimension of MFCC vectors |
wav2mfcc-buffer.c の 219 行で定義されています。
void MVN | ( | float ** | mfcc, | |
int | frame_num, | |||
Value * | para | |||
) |
Cepstrum Mean/Variance Normalization (buffered)
mfcc | [i/o] array of MFCC vectors | |
frame_num | [in] number of frames | |
para | [in] configuration parameters |
wav2mfcc-buffer.c の 248 行で定義されています。
DeltaBuf* WMP_deltabuf_new | ( | int | veclen, | |
int | windowlen | |||
) |
Allocate a new delta cycle buffer.
veclen | [in] length of a vector | |
windowlen | [in] window width for computing delta |
wav2mfcc-pipe.c の 60 行で定義されています。
参照元 RealTimeInit().
void WMP_deltabuf_free | ( | DeltaBuf * | db | ) |
Destroy the delta cycle buffer.
db | [i/o] delta cycle buffer |
wav2mfcc-pipe.c の 87 行で定義されています。
参照元 j_mfcccalc_free().
void WMP_deltabuf_prepare | ( | DeltaBuf * | db | ) |
Reset and clear the delta cycle buffer.
db | [i/o] delta cycle buffer |
wav2mfcc-pipe.c の 105 行で定義されています。
参照元 reset_mfcc().
Store the given MFCC vector into the delta cycle buffer, and compute the latest delta coefficients.
db | [i/o] delta cycle buffer | |
new_mfcc | [in] MFCC vector |
wav2mfcc-pipe.c の 164 行で定義されています。
Flush the delta cycle buffer the delta coefficients left in the cycle buffer.
db | [i/o] delta cycle buffer |
wav2mfcc-pipe.c の 206 行で定義されています。
参照元 RealTimeParam().
Initialize MAP-CMN at startup.
para | [in] MFCC computation configuration parameter | |
weight | [in] initial cepstral mean weight |
wav2mfcc-pipe.c の 247 行で定義されています。
参照元 RealTimeInit().
void CMN_realtime_free | ( | CMNWork * | c | ) |
Free work area for real-time CMN.
c | [i/o] CMN calculation work area |
wav2mfcc-pipe.c の 285 行で定義されています。
参照元 j_mfcccalc_free().
void CMN_realtime_prepare | ( | CMNWork * | c | ) |
Prepare for MAP-CMN at start of each input
c | [i/o] CMN calculation work area |
wav2mfcc-pipe.c の 309 行で定義されています。
void CMN_realtime | ( | CMNWork * | c, | |
float * | mfcc | |||
) |
Perform MAP-CMN for incoming MFCC vectors
c | [i/o] CMN calculation work area | |
mfcc | [in] MFCC vector |
wav2mfcc-pipe.c の 328 行で定義されています。
Update initial cepstral mean from previous utterances for next input.
c | [i/o] CMN calculation work area |
wav2mfcc-pipe.c の 389 行で定義されています。
参照元 RealTimeCMNUpdate().
Load CMN parameter from file.
If the number of MFCC dimension in the file does not match the specified one, an error will occur.
c | [i/o] CMN calculation work area | |
filename | [in] file name |
wav2mfcc-pipe.c の 522 行で定義されています。
参照元 RealTimeInit().
Save the current CMN vector to a file.
c | [i/o] CMN calculation work area | |
filename | [in] filename to save the data. |
wav2mfcc-pipe.c の 579 行で定義されています。
参照元 RealTimeCMNUpdate().
void energy_max_init | ( | ENERGYWork * | energy | ) |
Initialize work area for energy normalization on live input.
This should be called once on startup.
energy | [in] energy normalization work area |
wav2mfcc-pipe.c の 629 行で定義されています。
参照元 RealTimeInit().
void energy_max_prepare | ( | ENERGYWork * | energy, | |
Value * | para | |||
) |
Prepare values for energy normalization on live input.
This should be called before each input segment.
energy | [in] energy normalization work area | |
para | [in] MFCC computation configuration parameter |
wav2mfcc-pipe.c の 642 行で定義されています。
参照元 reset_mfcc().
LOGPROB energy_max_normalize | ( | ENERGYWork * | energy, | |
LOGPROB | f, | |||
Value * | para | |||
) |
Peform energy normalization using maximum of last input.
energy | [in] energy normalization work area | |
f | [in] raw energy | |
para | [in] MFCC computation configuration parameter |
wav2mfcc-pipe.c の 659 行で定義されています。
参照元 RealTimeMFCC().
float* new_SS_load_from_file | ( | char * | filename, | |
int * | slen | |||
) |
Load a noise spectrum from file.
filename | [in] path name of noise spectrum file | |
slen | [out] length of the returned buffer |
参照元 RealTimeInit()・wav2mfcc().
Compute average spectrum of audio input.
This is used to estimate a noise spectrum from input samples.
wave | [in] input audio data sequence | |
wavelen | [in] length of above | |
slen | [out] length of returned buffer | |
w | [i/o] MFCC calculation work area | |
para | [in] parameter |
参照元 wav2mfcc().
void undef_para | ( | Value * | para | ) |
Reset configuration parameters for MFCC computation.
para | [out] feature extraction parameters |
void make_default_para | ( | Value * | para | ) |
Set Julius default parameters for MFCC computation.
para | [out] feature extraction parameters |
void make_default_para_htk | ( | Value * | para | ) |
Set HTK default configuration parameters for MFCC computation.
This will be refered when parameters are given as HTK Config file.
para | [out] feature extraction parameters |
Merge two configuration parameters for MFCC computation.
dst | [out] feature extraction parameters to set to | |
src | [out] feature extraction parameters to set from |
Read and parse an HTK Config file, and set the specified option values.
HTKconffile | [in] HTK Config file path name | |
para | [out] MFCC parameter to set |
参照元 opt_parse().
void calc_para_from_header | ( | Value * | para, | |
short | param_type, | |||
short | vec_size | |||
) |
void put_para | ( | FILE * | fp, | |
Value * | para | |||
) |
Output acoustic analysis configuration parameters to stdout.
fp | [in] file pointer | |
para | [in] configuration parameter |
参照元 print_engine_info().