00001
00046
00047
00048
00049
00050
00051
00052
00053 #include <julius/julius.h>
00054
00055 #include <sys/stat.h>
00056
00084 boolean
00085 wav2mfcc(SP16 speech[], int speechlen, Recog *recog)
00086 {
00087 int framenum;
00088 int len;
00089 Value *para;
00090 MFCCCalc *mfcc;
00091
00092
00093 framenum = (int)((speechlen - recog->jconf->input.framesize) / recog->jconf->input.frameshift) + 1;
00094 if (framenum < 1) {
00095 jlog("WARNING: input too short (%d samples), ignored\n", speechlen);
00096 return FALSE;
00097 }
00098
00099 for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
00100
00101 if (mfcc->frontend.ssload_filename) {
00102
00103 if (mfcc->frontend.ssbuf == NULL) {
00104
00105 if ((mfcc->frontend.ssbuf = new_SS_load_from_file(mfcc->frontend.ssload_filename, &(mfcc->frontend.sslen))) == NULL) {
00106 jlog("ERROR: wav2mfcc: failed to read noise spectrum from file \"%s\"\n", mfcc->frontend.ssload_filename);
00107 return FALSE;
00108 }
00109 }
00110 }
00111
00112 if (mfcc->frontend.sscalc) {
00113
00114 len = mfcc->frontend.sscalc_len * recog->jconf->input.sfreq / 1000;
00115 if (len > speechlen) len = speechlen;
00116 #ifdef SSDEBUG
00117 jlog("DEBUG: [%d]\n", len);
00118 #endif
00119 mfcc->frontend.ssbuf = new_SS_calculate(speech, len, &(mfcc->frontend.sslen), mfcc->frontend.mfccwrk_ss, mfcc->para);
00120 }
00121
00122 }
00123
00124
00125 for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
00126
00127 para = mfcc->para;
00128
00129
00130 param_init_content(mfcc->param);
00131 if (param_alloc(mfcc->param, framenum, para->veclen) == FALSE) {
00132 jlog("ERROR: failed to allocate memory for converted parameter vectors\n");
00133 return FALSE;
00134 }
00135
00136 if (mfcc->frontend.ssload_filename || mfcc->frontend.sscalc) {
00137
00138 mfcc->wrk->ssbuf = mfcc->frontend.ssbuf;
00139 mfcc->wrk->ssbuflen = mfcc->frontend.sslen;
00140 mfcc->wrk->ss_alpha = mfcc->frontend.ss_alpha;
00141 mfcc->wrk->ss_floor = mfcc->frontend.ss_floor;
00142 }
00143
00144
00145 if (Wav2MFCC(speech, mfcc->param->parvec, para, speechlen, mfcc->wrk) == FALSE) {
00146 jlog("ERROR: failed to compute MFCC from input speech\n");
00147 if (mfcc->frontend.sscalc) {
00148 free(mfcc->frontend.ssbuf);
00149 mfcc->frontend.ssbuf = NULL;
00150 }
00151 return FALSE;
00152 }
00153
00154
00155 mfcc->param->header.samplenum = framenum;
00156 mfcc->param->header.wshift = para->smp_period * para->frameshift;
00157 mfcc->param->header.sampsize = para->veclen * sizeof(VECT);
00158 mfcc->param->header.samptype = F_MFCC;
00159 if (para->delta) mfcc->param->header.samptype |= F_DELTA;
00160 if (para->acc) mfcc->param->header.samptype |= F_ACCL;
00161 if (para->energy) mfcc->param->header.samptype |= F_ENERGY;
00162 if (para->c0) mfcc->param->header.samptype |= F_ZEROTH;
00163 if (para->absesup) mfcc->param->header.samptype |= F_ENERGY_SUP;
00164 if (para->cmn) mfcc->param->header.samptype |= F_CEPNORM;
00165 mfcc->param->veclen = para->veclen;
00166 mfcc->param->samplenum = framenum;
00167
00168 if (mfcc->frontend.sscalc) {
00169 free(mfcc->frontend.ssbuf);
00170 mfcc->frontend.ssbuf = NULL;
00171 }
00172 }
00173
00174 return TRUE;
00175 }
00176
00177