00001
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041 #include <sent/stddefs.h>
00042 #include <sent/mfcc.h>
00043
00056 int
00057 Wav2MFCC(SP16 *wave, float **mfcc, Value *para, int nSamples, MFCCWork *w)
00058 {
00059 int i, k, t;
00060 int end = 0, start = 1;
00061 int frame_num;
00062
00063
00064 if (w->ssbuf != NULL) {
00065
00066 if (w->ssbuflen != w->bflen) {
00067 jlog("Error: mfcc-core: noise spectrum length not match\n");
00068 return FALSE;
00069 }
00070 }
00071
00072 frame_num = (int)((nSamples - para->framesize) / para->frameshift) + 1;
00073
00074 for(t = 0; t < frame_num; t++){
00075 if(end != 0) start = end - (para->framesize - para->frameshift) - 1;
00076
00077 k = 1;
00078 for(i = start; i <= start + para->framesize; i++){
00079 w->bf[k] = (float)wave[i - 1]; k++;
00080 }
00081 end = i;
00082
00083
00084 WMP_calc(w, mfcc[t], para);
00085 }
00086
00087
00088 if (para->energy && para->enormal) NormaliseLogE(mfcc, frame_num, para);
00089
00090
00091 if (para->delta) Delta(mfcc, frame_num, para);
00092
00093
00094 if (para->acc) Accel(mfcc, frame_num, para);
00095
00096
00097 if (para->cmn && ! para->cvn) CMN(mfcc, frame_num, para->mfcc_dim + (para->c0 ? 1 : 0));
00098 else if (para->cmn || para->cvn) MVN(mfcc, frame_num, para);
00099
00100 return(frame_num);
00101 }
00102
00110 void NormaliseLogE(float **mfcc, int frame_num, Value *para)
00111 {
00112 float max, min, f;
00113 int t;
00114 int l;
00115
00116 l = para->mfcc_dim;
00117 if (para->c0) l++;
00118
00119
00120 max = mfcc[0][l];
00121 for(t = 0; t < frame_num; t++)
00122 if(mfcc[t][l] > max) max = mfcc[t][l];
00123
00124
00125 min = max - (para->silFloor * LOG_TEN) / 10.0;
00126
00127
00128 for(t = 0; t < frame_num; t++){
00129 f = mfcc[t][l];
00130 if (f < min) f = min;
00131 mfcc[t][l] = 1.0 - (max - f) * para->escale;
00132 }
00133 }
00134
00142 void Delta(float **c, int frame, Value *para)
00143 {
00144 int theta, t, n, B = 0;
00145 float A1, A2, sum;
00146
00147 for(theta = 1; theta <= para->delWin; theta++)
00148 B += theta * theta;
00149
00150 for(n = para->baselen - 1; n >=0; n--){
00151 for(t = 0; t < frame; t++){
00152 sum = 0;
00153 for(theta = 1; theta <= para->delWin; theta++){
00154
00155
00156 if (t - theta < 0) A1 = c[0][n];
00157 else A1 = c[t - theta][n];
00158 if (t + theta >= frame) A2 = c[frame - 1][n];
00159 else A2 = c[t + theta][n];
00160 sum += theta * (A2 - A1);
00161 }
00162 sum /= (2.0 * B);
00163 if (para->absesup) {
00164 c[t][para->baselen + n - 1] = sum;
00165 } else {
00166 c[t][para->baselen + n] = sum;
00167 }
00168 }
00169 }
00170 }
00171
00172
00180 void Accel(float **c, int frame, Value *para)
00181 {
00182 int theta, t, n, B = 0;
00183 int src, dst;
00184 float A1, A2, sum;
00185
00186 for(theta = 1; theta <= para->accWin; theta++)
00187 B += theta * theta;
00188
00189 for(t = 0; t < frame; t++){
00190 src = para->baselen * 2 - 1;
00191 if (para->absesup) src--;
00192 dst = src + para->baselen;
00193 for(n = 0; n < para->baselen; n++){
00194 sum = 0;
00195 for(theta = 1; theta <= para->accWin; theta++){
00196
00197
00198 if (t - theta < 0) A1 = c[0][src];
00199 else A1 = c[t - theta][src];
00200 if (t + theta >= frame) A2 = c[frame - 1][src];
00201 else A2 = c[t + theta][src];
00202 sum += theta * (A2 - A1);
00203 }
00204 c[t][dst] = sum / (2 * B);
00205 src--;
00206 dst--;
00207 }
00208 }
00209 }
00210
00219 void CMN(float **mfcc, int frame_num, int dim)
00220 {
00221 int i, t;
00222 float *mfcc_ave, *sum;
00223
00224 mfcc_ave = (float *)mycalloc(dim, sizeof(float));
00225 sum = (float *)mycalloc(dim, sizeof(float));
00226
00227 for(i = 0; i < dim; i++){
00228 sum[i] = 0.0;
00229 for(t = 0; t < frame_num; t++)
00230 sum[i] += mfcc[t][i];
00231 mfcc_ave[i] = sum[i] / frame_num;
00232 }
00233 for(t = 0; t < frame_num; t++){
00234 for(i = 0; i < dim; i++)
00235 mfcc[t][i] = mfcc[t][i] - mfcc_ave[i];
00236 }
00237 free(sum);
00238 free(mfcc_ave);
00239 }
00240
00248 void MVN(float **mfcc, int frame_num, Value *para)
00249 {
00250 int i, t;
00251 float *mfcc_mean, *mfcc_sd;
00252 float x;
00253 int basedim;
00254
00255 basedim = para->mfcc_dim + (para->c0 ? 1 : 0);
00256
00257 mfcc_mean = (float *)mycalloc(para->veclen, sizeof(float));
00258 if (para->cvn) mfcc_sd = (float *)mycalloc(para->veclen, sizeof(float));
00259
00260
00261 for(i = 0; i < para->veclen; i++){
00262 mfcc_mean[i] = 0.0;
00263 for(t = 0; t < frame_num; t++)
00264 mfcc_mean[i] += mfcc[t][i];
00265 mfcc_mean[i] /= (float)frame_num;
00266 }
00267 if (para->cvn) {
00268
00269 for(i = 0; i < para->veclen; i++){
00270 mfcc_sd[i] = 0.0;
00271 for(t = 0; t < frame_num; t++) {
00272 x = mfcc[t][i] - mfcc_mean[i];
00273 mfcc_sd[i] += x * x;
00274 }
00275 mfcc_sd[i] = sqrt(mfcc_sd[i] / (float)frame_num);
00276 }
00277 }
00278 for(t = 0; t < frame_num; t++){
00279 if (para->cmn) {
00280
00281 for(i = 0; i < basedim; i++) mfcc[t][i] -= mfcc_mean[i];
00282 }
00283 if (para->cvn) {
00284
00285 for(i = 0; i < para->veclen; i++) mfcc[t][i] /= mfcc_sd[i];
00286 }
00287 }
00288
00289 if (para->cvn) free(mfcc_sd);
00290 free(mfcc_mean);
00291 }