00001
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040 #include <sent/stddefs.h>
00041 #include <sent/mfcc.h>
00042
00043
00044 #ifdef MFCC_SINCOS_TABLE
00045
00046
00047
00048 static double *costbl_hamming;
00049 static int costbl_hamming_len = 0;
00050
00051 static double *costbl_fft;
00052 static double *sintbl_fft;
00053 static int tbllen = 0;
00054
00055 static double *costbl_makemfcc;
00056 static int costbl_makemfcc_len = 0;
00057
00058 static double *sintbl_wcep;
00059 static int sintbl_wcep_len = 0;
00060
00061 #endif
00062
00063 static float sqrt2var;
00064
00065 #ifdef MFCC_SINCOS_TABLE
00066
00067
00073 void
00074 make_costbl_hamming(int framesize)
00075 {
00076 int i;
00077 float a;
00078
00079 if (costbl_hamming_len == framesize) return;
00080 if (costbl_hamming_len != 0) {
00081 j_printerr("WARNING!! frame size changed!! (%d -> %d)\n",
00082 costbl_hamming_len, framesize);
00083 free(costbl_hamming);
00084 }
00085 costbl_hamming = (double *)mymalloc(sizeof(double) * framesize);
00086
00087 a = 2.0 * PI / (framesize - 1);
00088 for(i=1;i<=framesize;i++) {
00089
00090 costbl_hamming[i-1] = 0.54 - 0.46 * cos(a * (i - 1));
00091 }
00092 costbl_hamming_len = framesize;
00093 #ifdef MFCC_TABLE_DEBUG
00094 j_printerr("generated Hamming cos table (%d bytes)\n",
00095 costbl_hamming_len * sizeof(double));
00096 #endif
00097 }
00098
00104 void
00105 make_fft_table(int n)
00106 {
00107 int m;
00108 int me, me1;
00109
00110 if (tbllen == n) return;
00111
00112 if (tbllen != 0) {
00113 j_printerr("WARNING!! fft size changed!! (%d -> %d)\n", tbllen, n);
00114 free(costbl_fft);
00115 free(sintbl_fft);
00116 }
00117 costbl_fft = (double *)mymalloc(sizeof(double) * n);
00118 sintbl_fft = (double *)mymalloc(sizeof(double) * n);
00119 for (m = 1; m <= n; m++) {
00120 me = 1 << m;
00121 me1 = me / 2;
00122 costbl_fft[m-1] = cos(PI / me1);
00123 sintbl_fft[m-1] = -sin(PI / me1);
00124 }
00125 tbllen = n;
00126 #ifdef MFCC_TABLE_DEBUG
00127 j_printerr("generated FFT sin/cos table (%d bytes)\n", tbllen * sizeof(double));
00128 #endif
00129 }
00130
00137 void
00138 make_costbl_makemfcc(int fbank_num, int mfcc_dim)
00139 {
00140 int size;
00141 int i, j, k;
00142 float B, C;
00143
00144 size = fbank_num * mfcc_dim;
00145 if (costbl_makemfcc_len == size) return;
00146 if (costbl_makemfcc_len != 0) {
00147 j_printerr("WARNING!! fbank num or mfcc dim changed!! (%d -> %d)\n",
00148 costbl_makemfcc_len, size);
00149 free(costbl_makemfcc);
00150 }
00151 costbl_makemfcc = (double *)mymalloc(sizeof(double) * size);
00152
00153 B = PI / fbank_num;
00154 k = 0;
00155 for(i=1;i<=mfcc_dim;i++) {
00156 C = i * B;
00157 for(j=1;j<=fbank_num;j++) {
00158 costbl_makemfcc[k] = cos(C * (j - 0.5));
00159 k++;
00160 }
00161 }
00162 costbl_makemfcc_len = size;
00163 #ifdef MFCC_TABLE_DEBUG
00164 j_printerr("generated MakeMFCC cos table (%d bytes)\n",
00165 costbl_makemfcc_len * sizeof(double));
00166 #endif
00167 }
00168
00175 void
00176 make_sintbl_wcep(int lifter, int mfcc_dim)
00177 {
00178 int i;
00179 float a, b;
00180
00181 if (sintbl_wcep_len == mfcc_dim) return;
00182 if (sintbl_wcep_len != 0) {
00183 j_printerr("WARNING!! mfcc dim changed!! (%d -> %d)\n",
00184 sintbl_wcep_len, mfcc_dim);
00185 free(sintbl_wcep);
00186 }
00187 sintbl_wcep = (double *)mymalloc(sizeof(double) * mfcc_dim);
00188 a = PI / lifter;
00189 b = lifter / 2.0;
00190 for(i=0;i<mfcc_dim;i++) {
00191 sintbl_wcep[i] = 1.0 + b * sin((i+1) * a);
00192 }
00193 sintbl_wcep_len = mfcc_dim;
00194 #ifdef MFCC_TABLE_DEBUG
00195 j_printerr("generated WeightCepstrum sin table (%d bytes)\n",
00196 sintbl_wcep_len * sizeof(double));
00197 #endif
00198 }
00199
00200 #endif
00201
00202
00203
00217 int Wav2MFCC(SP16 *wave, float **mfcc, Value para, int nSamples, float *ssbuf, int ssbuflen)
00218 {
00219 float *bf;
00220 double *fbank;
00221 int i, k, t;
00222 int end = 0, start = 1;
00223 int frame_num;
00224 FBankInfo fb;
00225 float energy = 0.0;
00226 float c0 = 0.0;
00227 int p;
00228
00229
00230 fb = InitFBank(para);
00231
00232 if((fbank = (double *)mymalloc((para.fbank_num+1)*sizeof(double))) == NULL){
00233 j_error("Error: Wav2MFCC: failed to malloc\n");
00234 }
00235 if((bf = (float *)mymalloc(fb.fftN * sizeof(float))) == NULL){
00236 j_error("Error: Wav2MFCC: failed to malloc\n");
00237 }
00238
00239 if (ssbuf != NULL) {
00240
00241 if (fb.fftN != ssbuflen) {
00242 j_error("Error: Wav2MFCC: noise spectrum length not match\n");
00243 }
00244 }
00245
00246 frame_num = (int)((nSamples - para.framesize) / para.frameshift) + 1;
00247
00248 for(t = 0; t < frame_num; t++){
00249 if(end != 0) start = end - (para.framesize - para.frameshift) - 1;
00250
00251 k = 1;
00252 for(i = start; i <= start + para.framesize; i++){
00253 bf[k] = (float)wave[i - 1]; k++;
00254 }
00255 end = i;
00256
00257 if (para.zmeanframe) {
00258 ZMeanFrame(bf, para.framesize);
00259 }
00260
00261
00262
00263 if (para.energy && para.raw_e) {
00264
00265 energy = CalcLogRawE(bf, para.framesize);
00266 }
00267
00268 PreEmphasise(bf, para);
00269
00270 Hamming(bf, para.framesize);
00271 if (para.energy && ! para.raw_e) {
00272
00273 energy = CalcLogRawE(bf, para.framesize);
00274 }
00275
00276 MakeFBank(bf, fbank, fb, para, ssbuf);
00277
00278 if (para.c0) c0 = CalcC0(fbank, para);
00279
00280 MakeMFCC(fbank, mfcc[t], para);
00281
00282 WeightCepstrum(mfcc[t], para);
00283
00284 p = para.mfcc_dim;
00285 if (para.c0) mfcc[t][p++] = c0;
00286 if (para.energy) mfcc[t][p++] = energy;
00287 }
00288
00289
00290 if (para.energy && para.enormal) NormaliseLogE(mfcc, frame_num, para);
00291
00292
00293 if (para.delta) Delta(mfcc, frame_num, para);
00294
00295
00296 if (para.acc) Accel(mfcc, frame_num, para);
00297
00298
00299 if(para.cmn) CMN(mfcc, frame_num, para.mfcc_dim);
00300
00301 FreeFBank(fb);
00302 free(fbank);
00303 free(bf);
00304 return(frame_num);
00305 }
00306
00307
00316 float CalcLogRawE(float *wave, int framesize)
00317 {
00318 int i;
00319 double raw_E = 0.0;
00320 float energy;
00321
00322 for(i = 1; i <= framesize; i++)
00323 raw_E += wave[i] * wave[i];
00324 energy = (float)log(raw_E);
00325
00326 return(energy);
00327 }
00328
00336 void
00337 ZMeanFrame(float *wave, int framesize)
00338 {
00339 int i;
00340 float mean;
00341
00342 mean = 0.0;
00343 for(i = 1; i <= framesize; i++) mean += wave[i];
00344 mean /= framesize;
00345 for(i = 1; i <= framesize; i++) wave[i] -= mean;
00346 }
00347
00354 void Hamming(float *wave, int framesize)
00355 {
00356 int i;
00357 #ifdef MFCC_SINCOS_TABLE
00358 for(i = 1; i <= framesize; i++)
00359 wave[i] *= costbl_hamming[i-1];
00360 #else
00361 float a;
00362 a = 2 * PI / (framesize - 1);
00363 for(i = 1; i <= framesize; i++)
00364 wave[i] *= 0.54 - 0.46 * cos(a * (i - 1));
00365 #endif
00366 }
00367
00374 void PreEmphasise (float *wave, Value para)
00375 {
00376 int i;
00377
00378 for(i = para.framesize; i >= 2; i--)
00379 wave[i] -= wave[i - 1] * para.preEmph;
00380 wave[1] *= 1.0 - para.preEmph;
00381 }
00382
00389 void WeightCepstrum (float *mfcc, Value para)
00390 {
00391 #ifdef MFCC_SINCOS_TABLE
00392 int i;
00393 for(i=0;i<para.mfcc_dim;i++) {
00394 mfcc[i] *= sintbl_wcep[i];
00395 }
00396 #else
00397 int i;
00398 float a, b, *cepWin;
00399
00400 if((cepWin = (float *)mymalloc(para.mfcc_dim * sizeof(float))) == NULL){
00401 j_error("WeightCepstrum: failed to malloc\n");
00402 }
00403 a = PI / para.lifter;
00404 b = para.lifter / 2.0;
00405
00406 for(i = 0; i < para.mfcc_dim; i++){
00407 cepWin[i] = 1.0 + b * sin((i + 1) * a);
00408 mfcc[i] *= cepWin[i];
00409 }
00410
00411 free(cepWin);
00412 #endif
00413 }
00414
00423 float Mel(int k, float fres)
00424 {
00425 return(1127 * log(1 + (k-1) * fres));
00426 }
00427
00435 FBankInfo InitFBank(Value para)
00436 {
00437 FBankInfo fb;
00438 float mlo, mhi, ms, melk;
00439 int k, chan, maxChan, nv2;
00440
00441
00442 fb.fftN = 2; fb.n = 1;
00443 while(para.framesize > fb.fftN){
00444 fb.fftN *= 2; fb.n++;
00445 }
00446
00447 nv2 = fb.fftN / 2;
00448 fb.fres = 1.0E7 / (para.smp_period * fb.fftN * 700.0);
00449 maxChan = para.fbank_num + 1;
00450 fb.klo = 2; fb.khi = nv2;
00451 mlo = 0; mhi = Mel(nv2 + 1, fb.fres);
00452
00453
00454 if (para.lopass >= 0) {
00455 mlo = 1127*log(1+(float)para.lopass/700.0);
00456 fb.klo = ((float)para.lopass * para.smp_period * 1.0e-7 * fb.fftN) + 2.5;
00457 if (fb.klo<2) fb.klo = 2;
00458 }
00459
00460 if (para.hipass >= 0) {
00461 mhi = 1127*log(1+(float)para.hipass/700.0);
00462 fb.khi = ((float)para.hipass * para.smp_period * 1.0e-7 * fb.fftN) + 0.5;
00463 if (fb.khi>nv2) fb.khi = nv2;
00464 }
00465
00466
00467 if((fb.cf = (float *)mymalloc((maxChan + 1) * sizeof(float))) == NULL){
00468 j_error("InitFBank: failed to malloc\n");
00469 }
00470 ms = mhi - mlo;
00471 for (chan = 1; chan <= maxChan; chan++)
00472 fb.cf[chan] = ((float)chan / maxChan)*ms + mlo;
00473
00474
00475 if((fb.loChan = (short *)mymalloc((nv2 + 1) * sizeof(short))) == NULL){
00476 j_error("InitFBank: failed to malloc\n");
00477 }
00478 for(k = 1, chan = 1; k <= nv2; k++){
00479 if (k < fb.klo || k > fb.khi) fb.loChan[k] = -1;
00480 else {
00481 melk = Mel(k, fb.fres);
00482 while (fb.cf[chan] < melk && chan <= maxChan) ++chan;
00483 fb.loChan[k] = chan - 1;
00484 }
00485 }
00486
00487
00488 if((fb.loWt = (float *)mymalloc((nv2 + 1) * sizeof(float))) == NULL){
00489 j_error("InitFBank: failed to malloc\n");
00490 }
00491 for(k = 1; k <= nv2; k++) {
00492 chan = fb.loChan[k];
00493 if (k < fb.klo || k > fb.khi) fb.loWt[k] = 0.0;
00494 else {
00495 if (chan > 0)
00496 fb.loWt[k] = (fb.cf[chan + 1] - Mel(k, fb.fres)) / (fb.cf[chan + 1] - fb.cf[chan]);
00497 else
00498 fb.loWt[k] = (fb.cf[1] - Mel(k, fb.fres)) / (fb.cf[1] - mlo);
00499 }
00500 }
00501
00502
00503 if((fb.Re = (float *)mymalloc((fb.fftN + 1) * sizeof(float))) == NULL){
00504 j_error("InitFBank: failed to malloc\n");
00505 }
00506 if((fb.Im = (float *)mymalloc((fb.fftN + 1) * sizeof(float))) == NULL){
00507 j_error("InitFBank: failed to malloc\n");
00508 }
00509
00510 #ifdef MFCC_SINCOS_TABLE
00511
00512 make_costbl_hamming(para.framesize);
00513 make_fft_table(fb.n);
00514 make_costbl_makemfcc(para.fbank_num, para.mfcc_dim);
00515 make_sintbl_wcep(para.lifter, para.mfcc_dim);
00516 #endif
00517
00518 sqrt2var = sqrt(2.0 / para.fbank_num);
00519
00520 return(fb);
00521 }
00522
00528 void
00529 FreeFBank(FBankInfo fb)
00530 {
00531 free(fb.cf);
00532 free(fb.loChan);
00533 free(fb.loWt);
00534 free(fb.Re);
00535 free(fb.Im);
00536 }
00537
00547 void MakeFBank(float *wave, double *fbank, FBankInfo fb, Value para, float *ssbuf)
00548 {
00549 int k, bin, i;
00550 double Re, Im, A, P, NP, H, temp;
00551
00552 for(k = 1; k <= para.framesize; k++){
00553 fb.Re[k - 1] = wave[k]; fb.Im[k - 1] = 0.0;
00554 }
00555 for(k = para.framesize + 1; k <= fb.fftN; k++){
00556 fb.Re[k - 1] = 0.0; fb.Im[k - 1] = 0.0;
00557 }
00558
00559
00560 FFT(fb.Re, fb.Im, fb.n);
00561
00562 if (ssbuf != NULL) {
00563
00564 for(k = 1; k <= fb.fftN; k++){
00565 Re = fb.Re[k - 1]; Im = fb.Im[k - 1];
00566 P = sqrt(Re * Re + Im * Im);
00567 NP = ssbuf[k - 1];
00568 if((P * P - para.ss_alpha * NP * NP) < 0){
00569 H = para.ss_floor;
00570 }else{
00571 H = sqrt(P * P - para.ss_alpha * NP * NP) / P;
00572 }
00573 fb.Re[k - 1] = H * Re;
00574 fb.Im[k - 1] = H * Im;
00575 }
00576 }
00577
00578
00579 for(i = 1; i <= para.fbank_num; i++)
00580 fbank[i] = 0.0;
00581
00582 for(k = fb.klo; k <= fb.khi; k++){
00583 Re = fb.Re[k-1]; Im = fb.Im[k-1];
00584 A = sqrt(Re * Re + Im * Im);
00585 bin = fb.loChan[k];
00586 Re = fb.loWt[k] * A;
00587 if(bin > 0) fbank[bin] += Re;
00588 if(bin < para.fbank_num) fbank[bin + 1] += A - Re;
00589 }
00590
00591
00592 for(bin = 1; bin <= para.fbank_num; bin++){
00593 temp = fbank[bin];
00594 if(temp < 1.0) temp = 1.0;
00595 fbank[bin] = log(temp);
00596 }
00597 }
00598
00599
00607 void MakeMFCC(double *fbank, float *mfcc, Value para)
00608 {
00609 #ifdef MFCC_SINCOS_TABLE
00610 int i, j, k;
00611 k = 0;
00612
00613 for(i = 0; i < para.mfcc_dim; i++){
00614 mfcc[i] = 0.0;
00615 for(j = 1; j <= para.fbank_num; j++)
00616 mfcc[i] += fbank[j] * costbl_makemfcc[k++];
00617 mfcc[i] *= sqrt2var;
00618 }
00619 #else
00620 int i, j;
00621 float B, C;
00622
00623 B = PI / para.fbank_num;
00624
00625 for(i = 1; i <= para.mfcc_dim; i++){
00626 mfcc[i - 1] = 0.0;
00627 C = i * B;
00628 for(j = 1; j <= para.fbank_num; j++)
00629 mfcc[i - 1] += fbank[j] * cos(C * (j - 0.5));
00630 mfcc[i - 1] *= sqrt2var;
00631 }
00632 #endif
00633 }
00634
00635
00644 float CalcC0(double *fbank, Value para)
00645 {
00646 int i;
00647 float S;
00648
00649 S = 0.0;
00650 for(i = 1; i <= para.fbank_num; i++)
00651 S += fbank[i];
00652 return S * sqrt2var;
00653 }
00654
00655
00663 void NormaliseLogE(float **mfcc, int frame_num, Value para)
00664 {
00665 float max, min, f;
00666 int t;
00667 int l;
00668
00669 l = para.mfcc_dim;
00670 if (para.c0) l++;
00671
00672
00673 max = mfcc[0][l];
00674 for(t = 0; t < frame_num; t++)
00675 if(mfcc[t][l] > max) max = mfcc[t][l];
00676
00677
00678 min = max - (para.silFloor * log(10.0)) / 10.0;
00679
00680
00681 for(t = 0; t < frame_num; t++){
00682 f = mfcc[t][l];
00683 if (f < min) f = min;
00684 mfcc[t][l] = 1.0 - (max - f) * para.escale;
00685 }
00686 }
00687
00695 void Delta(float **c, int frame, Value para)
00696 {
00697 int theta, t, n, B = 0;
00698 float A1, A2, sum;
00699 int offset;
00700 float *ed;
00701
00702 for(theta = 1; theta <= para.delWin; theta++)
00703 B += theta * theta;
00704
00705 if (para.absesup) ed = (float *)mymalloc(sizeof(float) * frame);
00706
00707 for(t = 0; t < frame; t++){
00708 for(n = 0; n < para.baselen; n++){
00709 sum = 0;
00710 for(theta = 1; theta <= para.delWin; theta++){
00711
00712
00713 if (t - theta < 0) A1 = c[0][n];
00714 else A1 = c[t - theta][n];
00715 if (t + theta >= frame) A2 = c[frame - 1][n];
00716 else A2 = c[t + theta][n];
00717 sum += theta * (A2 - A1);
00718 }
00719 if (para.absesup && n == para.baselen-1) {
00720 ed[t] = sum / (2 * B);
00721 } else {
00722 c[t][para.baselen + n] = sum / (2 * B);
00723 }
00724 }
00725 }
00726
00727 if (para.absesup) {
00728 for (t=0;t<frame;t++) {
00729 memmove(&(c[t][para.baselen-1]), &(c[t][para.baselen]), sizeof(float) * para.baselen - 1);
00730 c[t][para.baselen * 2 - 2] = ed[t];
00731 }
00732 free(ed);
00733 }
00734 }
00735
00736
00744 void Accel(float **c, int frame, Value para)
00745 {
00746 int theta, t, n, B = 0;
00747 int src, dst;
00748 float A1, A2, sum;
00749
00750 for(theta = 1; theta <= para.accWin; theta++)
00751 B += theta * theta;
00752
00753 for(t = 0; t < frame; t++){
00754 src = para.baselen * 2 - 1;
00755 if (para.absesup) src--;
00756 dst = src + para.baselen;
00757 for(n = 0; n < para.baselen; n++){
00758 sum = 0;
00759 for(theta = 1; theta <= para.accWin; theta++){
00760
00761
00762 if (t - theta < 0) A1 = c[0][src];
00763 else A1 = c[t - theta][src];
00764 if (t + theta >= frame) A2 = c[frame - 1][src];
00765 else A2 = c[t + theta][src];
00766 sum += theta * (A2 - A1);
00767 }
00768 c[t][dst] = sum / (2 * B);
00769 src--;
00770 dst--;
00771 }
00772 }
00773 }
00774
00782 void FFT(float *xRe, float *xIm, int p)
00783 {
00784 int i, ip, j, k, m, me, me1, n, nv2;
00785 double uRe, uIm, vRe, vIm, wRe, wIm, tRe, tIm;
00786
00787 n = 1<<p;
00788 nv2 = n / 2;
00789
00790 j = 0;
00791 for(i = 0; i < n-1; i++){
00792 if(j > i){
00793 tRe = xRe[j]; tIm = xIm[j];
00794 xRe[j] = xRe[i]; xIm[j] = xIm[i];
00795 xRe[i] = tRe; xIm[i] = tIm;
00796 }
00797 k = nv2;
00798 while(j >= k){
00799 j -= k; k /= 2;
00800 }
00801 j += k;
00802 }
00803
00804 for(m = 1; m <= p; m++){
00805 me = 1<<m; me1 = me / 2;
00806 uRe = 1.0; uIm = 0.0;
00807 #ifdef MFCC_SINCOS_TABLE
00808 wRe = costbl_fft[m-1]; wIm = sintbl_fft[m-1];
00809 #else
00810 wRe = cos(PI / me1); wIm = -sin(PI / me1);
00811 #endif
00812 for(j = 0; j < me1; j++){
00813 for(i = j; i < n; i += me){
00814 ip = i + me1;
00815 tRe = xRe[ip] * uRe - xIm[ip] * uIm;
00816 tIm = xRe[ip] * uIm + xIm[ip] * uRe;
00817 xRe[ip] = xRe[i] - tRe; xIm[ip] = xIm[i] - tIm;
00818 xRe[i] += tRe; xIm[i] += tIm;
00819 }
00820 vRe = uRe * wRe - uIm * wIm; vIm = uRe * wIm + uIm * wRe;
00821 uRe = vRe; uIm = vIm;
00822 }
00823 }
00824 }
00825
00826
00835 void CMN(float **mfcc, int frame_num, int dim)
00836 {
00837 int i, t;
00838 float *mfcc_ave, *sum;
00839
00840 mfcc_ave = (float *)mycalloc(dim, sizeof(float));
00841 sum = (float *)mycalloc(dim, sizeof(float));
00842
00843 for(i = 0; i < dim; i++){
00844 sum[i] = 0.0;
00845 for(t = 0; t < frame_num; t++)
00846 sum[i] += mfcc[t][i];
00847 mfcc_ave[i] = sum[i] / frame_num;
00848 }
00849 for(t = 0; t < frame_num; t++){
00850 for(i = 0; i < dim; i++)
00851 mfcc[t][i] = mfcc[t][i] - mfcc_ave[i];
00852 }
00853 free(sum);
00854 free(mfcc_ave);
00855 }