libjulius/src/realtime-1stpass.c

説明を見る。
00001 
00117 /*
00118  * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University
00119  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00120  * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
00121  * All rights reserved
00122  */
00123 
00124 #include <julius/julius.h>
00125 
00126 #undef RDEBUG                   
00127 
00128 
00158 static void
00159 init_param(MFCCCalc *mfcc)
00160 {
00161   Value *para;
00162 
00163   para = mfcc->para;
00164 
00165   /* これから計算されるパラメータの型をヘッダに設定 */
00166   /* set header types */
00167   mfcc->param->header.samptype = F_MFCC;
00168   if (para->delta) mfcc->param->header.samptype |= F_DELTA;
00169   if (para->acc) mfcc->param->header.samptype |= F_ACCL;
00170   if (para->energy) mfcc->param->header.samptype |= F_ENERGY;
00171   if (para->c0) mfcc->param->header.samptype |= F_ZEROTH;
00172   if (para->absesup) mfcc->param->header.samptype |= F_ENERGY_SUP;
00173   if (para->cmn) mfcc->param->header.samptype |= F_CEPNORM;
00174   
00175   mfcc->param->header.wshift = para->smp_period * para->frameshift;
00176   mfcc->param->header.sampsize = para->veclen * sizeof(VECT); /* not compressed */
00177   mfcc->param->veclen = para->veclen;
00178   
00179   /* 認識処理中/終了後にセットされる変数:
00180      param->parvec (パラメータベクトル系列)
00181      param->header.samplenum, param->samplenum (全フレーム数)
00182   */
00183   /* variables that will be set while/after computation has been done:
00184      param->parvec (parameter vector sequence)
00185      param->header.samplenum, param->samplenum (total number of frames)
00186   */
00187   /* MAP-CMN の初期化 */
00188   /* Prepare for MAP-CMN */
00189   if (mfcc->para->cmn || mfcc->para->cvn) CMN_realtime_prepare(mfcc->cmn.wrk);
00190 }
00191 
00219 boolean
00220 RealTimeInit(Recog *recog)
00221 {
00222   Value *para;
00223   Jconf *jconf;
00224   RealBeam *r;
00225   MFCCCalc *mfcc;
00226 
00227 
00228   jconf = recog->jconf;
00229   r = &(recog->real);
00230 
00231   /* 最大フレーム長を最大入力時間数から計算 */
00232   /* set maximum allowed frame length */
00233   r->maxframelen = MAXSPEECHLEN / recog->jconf->input.frameshift;
00234 
00235   /* -ssload 指定時, SS用のノイズスペクトルをファイルから読み込む */
00236   /* if "-ssload", load noise spectrum for spectral subtraction from file */
00237   for(mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00238     if (mfcc->frontend.ssload_filename && mfcc->frontend.ssbuf == NULL) {
00239       if ((mfcc->frontend.ssbuf = new_SS_load_from_file(mfcc->frontend.ssload_filename, &(mfcc->frontend.sslen))) == NULL) {
00240         jlog("ERROR: failed to read \"%s\"\n", mfcc->frontend.ssload_filename);
00241         return FALSE;
00242       }
00243       /* check ssbuf length */
00244       if (mfcc->frontend.sslen != mfcc->wrk->bflen) {
00245         jlog("ERROR: noise spectrum length not match\n");
00246         return FALSE;
00247       }
00248       mfcc->wrk->ssbuf = mfcc->frontend.ssbuf;
00249       mfcc->wrk->ssbuflen = mfcc->frontend.sslen;
00250       mfcc->wrk->ss_alpha = mfcc->frontend.ss_alpha;
00251       mfcc->wrk->ss_floor = mfcc->frontend.ss_floor;
00252     }
00253   }
00254 
00255   for(mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00256   
00257     para = mfcc->para;
00258 
00259     /* 対数エネルギー正規化のための初期値 */
00260     /* set initial value for log energy normalization */
00261     if (para->energy && para->enormal) energy_max_init(&(mfcc->ewrk));
00262     /* デルタ計算のためのサイクルバッファを用意 */
00263     /* initialize cycle buffers for delta and accel coef. computation */
00264     if (para->delta) mfcc->db = WMP_deltabuf_new(para->baselen, para->delWin);
00265     if (para->acc) mfcc->ab = WMP_deltabuf_new(para->baselen * 2, para->accWin);
00266     /* デルタ計算のためのワークエリアを確保 */
00267     /* allocate work area for the delta computation */
00268     mfcc->tmpmfcc = (VECT *)mymalloc(sizeof(VECT) * para->vecbuflen);
00269     /* MAP-CMN 用の初期ケプストラム平均を読み込んで初期化する */
00270     /* Initialize the initial cepstral mean data from file for MAP-CMN */
00271     if (para->cmn || para->cvn) mfcc->cmn.wrk = CMN_realtime_new(para, mfcc->cmn.map_weight);
00272     /* -cmnload 指定時, CMN用のケプストラム平均の初期値をファイルから読み込む */
00273     /* if "-cmnload", load initial cepstral mean data from file for CMN */
00274     if (mfcc->cmn.load_filename) {
00275       if (para->cmn) {
00276         if ((mfcc->cmn.loaded = CMN_load_from_file(mfcc->cmn.wrk, mfcc->cmn.load_filename))== FALSE) {
00277           jlog("WARNING: failed to read initial cepstral mean from \"%s\", do flat start\n", mfcc->cmn.load_filename);
00278         }
00279       } else {
00280         jlog("WARNING: CMN not required on AM, file \"%s\" ignored\n", mfcc->cmn.load_filename);
00281       }
00282     }
00283 
00284   }
00285   /* 窓長をセット */
00286   /* set window length */
00287   r->windowlen = recog->jconf->input.framesize + 1;
00288   /* 窓かけ用バッファを確保 */
00289   /* set window buffer */
00290   r->window = mymalloc(sizeof(SP16) * r->windowlen);
00291 
00292   return TRUE;
00293 }
00294 
00319 void
00320 reset_mfcc(Recog *recog) 
00321 {
00322   Value *para;
00323   MFCCCalc *mfcc;
00324   RealBeam *r;
00325 
00326   r = &(recog->real);
00327 
00328   /* 特徴抽出モジュールを初期化 */
00329   /* initialize parameter extraction module */
00330   for(mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00331 
00332     para = mfcc->para;
00333 
00334     /* 対数エネルギー正規化のための初期値をセット */
00335     /* set initial value for log energy normalization */
00336     if (para->energy && para->enormal) energy_max_prepare(&(mfcc->ewrk), para);
00337     /* デルタ計算用バッファを準備 */
00338     /* set the delta cycle buffer */
00339     if (para->delta) WMP_deltabuf_prepare(mfcc->db);
00340     if (para->acc) WMP_deltabuf_prepare(mfcc->ab);
00341   }
00342 
00343 }
00344 
00371 boolean
00372 RealTimePipeLinePrepare(Recog *recog)
00373 {
00374   RealBeam *r;
00375   PROCESS_AM *am;
00376   MFCCCalc *mfcc;
00377 #ifdef SPSEGMENT_NAIST
00378   RecogProcess *p;
00379 #endif
00380 
00381   r = &(recog->real);
00382 
00383   /* 計算用の変数を初期化 */
00384   /* initialize variables for computation */
00385   r->windownum = 0;
00386   /* parameter check */
00387   for(mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00388     /* パラメータ初期化 */
00389     /* parameter initialization */
00390     if (recog->jconf->input.speech_input == SP_MFCMODULE) {
00391       if (mfc_module_set_header(mfcc, recog) == FALSE) return FALSE;
00392     } else {
00393       init_param(mfcc);
00394     }
00395     /* フレームごとのパラメータベクトル保存の領域を確保 */
00396     /* あとで必要に応じて伸長される */
00397     if (param_alloc(mfcc->param, 1, mfcc->param->veclen) == FALSE) {
00398       j_internal_error("ERROR: segmented: failed to allocate memory for rest param\n");
00399     }
00400     /* フレーム数をリセット */
00401     /* reset frame count */
00402     mfcc->f = 0;
00403   }
00404   /* 準備した param 構造体のデータのパラメータ型を音響モデルとチェックする */
00405   /* check type coherence between param and hmminfo here */
00406   if (recog->jconf->input.paramtype_check_flag) {
00407     for(am=recog->amlist;am;am=am->next) {
00408       if (!check_param_coherence(am->hmminfo, am->mfcc->param)) {
00409         jlog("ERROR: input parameter type does not match AM\n");
00410         return FALSE;
00411       }
00412     }
00413   }
00414 
00415   /* 計算用のワークエリアを準備 */
00416   /* prepare work area for calculation */
00417   if (recog->jconf->input.type == INPUT_WAVEFORM) {
00418     reset_mfcc(recog);
00419   }
00420   /* 音響尤度計算用キャッシュを準備 */
00421   /* prepare cache area for acoustic computation of HMM states and mixtures */
00422   for(am=recog->amlist;am;am=am->next) {
00423     outprob_prepare(&(am->hmmwrk), r->maxframelen);
00424   }
00425 
00426 #ifdef BACKEND_VAD
00427   if (recog->jconf->decodeopt.segment) {
00428     /* initialize segmentation parameters */
00429     spsegment_init(recog);
00430   }
00431 #else
00432   recog->triggered = FALSE;
00433 #endif
00434 
00435 #ifdef DEBUG_VTLN_ALPHA_TEST
00436   /* store speech */
00437   recog->speechlen = 0;
00438 #endif
00439 
00440   return TRUE;
00441 }
00442 
00475 boolean
00476 RealTimeMFCC(MFCCCalc *mfcc, SP16 *window, int windowlen)
00477 {
00478   int i;
00479   boolean ret;
00480   VECT *tmpmfcc;
00481   Value *para;
00482 
00483   tmpmfcc = mfcc->tmpmfcc;
00484   para = mfcc->para;
00485 
00486   /* 音声波形から base MFCC を計算 (recog->mfccwrk を利用) */
00487   /* calculate base MFCC from waveform (use recog->mfccwrk) */
00488   for (i=0; i < windowlen; i++) {
00489     mfcc->wrk->bf[i+1] = (float) window[i];
00490   }
00491   WMP_calc(mfcc->wrk, tmpmfcc, para);
00492 
00493   if (para->energy && para->enormal) {
00494     /* 対数エネルギー項を正規化する */
00495     /* normalize log energy */
00496     /* リアルタイム入力では発話ごとの最大エネルギーが得られないので
00497        直前の発話のパワーで代用する */
00498     /* Since the maximum power of the whole input utterance cannot be
00499        obtained at real-time input, the maximum of last input will be
00500        used to normalize.
00501     */
00502     tmpmfcc[para->baselen-1] = energy_max_normalize(&(mfcc->ewrk), tmpmfcc[para->baselen-1], para);
00503   }
00504 
00505   if (para->delta) {
00506     /* デルタを計算する */
00507     /* calc delta coefficients */
00508     ret = WMP_deltabuf_proceed(mfcc->db, tmpmfcc);
00509 #ifdef RDEBUG
00510     printf("DeltaBuf: ret=%d, status=", ret);
00511     for(i=0;i<mfcc->db->len;i++) {
00512       printf("%d", mfcc->db->is_on[i]);
00513     }
00514     printf(", nextstore=%d\n", mfcc->db->store);
00515 #endif
00516     /* ret == FALSE のときはまだディレイ中なので認識処理せず次入力へ */
00517     /* if ret == FALSE, there is no available frame.  So just wait for
00518        next input */
00519     if (! ret) {
00520       return FALSE;
00521     }
00522 
00523     /* db->vec に現在の元データとデルタ係数が入っているので tmpmfcc にコピー */
00524     /* now db->vec holds the current base and full delta, so copy them to tmpmfcc */
00525     memcpy(tmpmfcc, mfcc->db->vec, sizeof(VECT) * para->baselen * 2);
00526   }
00527 
00528   if (para->acc) {
00529     /* Accelerationを計算する */
00530     /* calc acceleration coefficients */
00531     /* base+delta をそのまま入れる */
00532     /* send the whole base+delta to the cycle buffer */
00533     ret = WMP_deltabuf_proceed(mfcc->ab, tmpmfcc);
00534 #ifdef RDEBUG
00535     printf("AccelBuf: ret=%d, status=", ret);
00536     for(i=0;i<mfcc->ab->len;i++) {
00537       printf("%d", mfcc->ab->is_on[i]);
00538     }
00539     printf(", nextstore=%d\n", mfcc->ab->store);
00540 #endif
00541     /* ret == FALSE のときはまだディレイ中なので認識処理せず次入力へ */
00542     /* if ret == FALSE, there is no available frame.  So just wait for
00543        next input */
00544     if (! ret) {
00545       return FALSE;
00546     }
00547     /* ab->vec には,(base+delta) とその差分係数が入っている. 
00548        [base] [delta] [delta] [acc] の順で入っているので,
00549        [base] [delta] [acc] を tmpmfcc にコピーする. */
00550     /* now ab->vec holds the current (base+delta) and their delta coef. 
00551        it holds a vector in the order of [base] [delta] [delta] [acc], 
00552        so copy the [base], [delta] and [acc] to tmpmfcc.  */
00553     memcpy(tmpmfcc, mfcc->ab->vec, sizeof(VECT) * para->baselen * 2);
00554     memcpy(&(tmpmfcc[para->baselen*2]), &(mfcc->ab->vec[para->baselen*3]), sizeof(VECT) * para->baselen);
00555   }
00556 
00557 #ifdef POWER_REJECT
00558   if (para->energy || para->c0) {
00559     mfcc->avg_power += tmpmfcc[para->baselen-1];
00560   }
00561 #endif
00562 
00563   if (para->delta && (para->energy || para->c0) && para->absesup) {
00564     /* 絶対値パワーを除去 */
00565     /* suppress absolute power */
00566     memmove(&(tmpmfcc[para->baselen-1]), &(tmpmfcc[para->baselen]), sizeof(VECT) * (para->vecbuflen - para->baselen));
00567   }
00568 
00569   /* この時点で tmpmfcc に現時点での最新の特徴ベクトルが格納されている */
00570   /* tmpmfcc[] now holds the latest parameter vector */
00571 
00572   /* CMN を計算 */
00573   /* perform CMN */
00574   if (para->cmn || para->cvn) CMN_realtime(mfcc->cmn.wrk, tmpmfcc);
00575 
00576   return TRUE;
00577 }
00578 
00579 static int
00580 proceed_one_frame(Recog *recog)
00581 {
00582   MFCCCalc *mfcc;
00583   RealBeam *r;
00584   int maxf;
00585   PROCESS_AM *am;
00586   int rewind_frame;
00587   boolean reprocess;
00588   boolean ok_p;
00589 
00590   r = &(recog->real);
00591 
00592   /* call recognition start callback */
00593   ok_p = FALSE;
00594   maxf = 0;
00595   for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00596     if (!mfcc->valid) continue;
00597     if (maxf < mfcc->f) maxf = mfcc->f;
00598     if (mfcc->f == 0) {
00599       ok_p = TRUE;
00600     }
00601   }
00602   if (ok_p && maxf == 0) {
00603     /* call callback when at least one of MFCC has initial frame */
00604     if (recog->jconf->decodeopt.segment) {
00605 #ifdef BACKEND_VAD
00606       /* not exec pass1 begin callback here */
00607 #else
00608       if (!recog->process_segment) {
00609         callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog);
00610       }
00611       callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog);
00612       callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog);
00613       recog->triggered = TRUE;
00614 #endif
00615     } else {
00616       callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog);
00617       callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog);
00618       recog->triggered = TRUE;
00619     }
00620   }
00621   /* 各インスタンスについて mfcc->f の認識処理を1フレーム進める */
00622   switch (decode_proceed(recog)) {
00623   case -1: /* error */
00624     return -1;
00625     break;
00626   case 0:                       /* success */
00627     break;
00628   case 1:                       /* segmented */
00629     /* 認識処理のセグメント要求で終わったことをフラグにセット */
00630     /* set flag which indicates that the input has ended with segmentation request */
00631     r->last_is_segmented = TRUE;
00632     /* tell the caller to be segmented by this function */
00633     /* 呼び出し元に,ここで入力を切るよう伝える */
00634     return 1;
00635   }
00636 #ifdef BACKEND_VAD
00637   /* check up trigger in case of VAD segmentation */
00638   if (recog->jconf->decodeopt.segment) {
00639     if (recog->triggered == FALSE) {
00640       if (spsegment_trigger_sync(recog)) {
00641         if (!recog->process_segment) {
00642           callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog);
00643         }
00644         callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog);
00645         callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog);
00646         recog->triggered = TRUE;
00647       }
00648     }
00649   }
00650 #endif
00651   
00652   if (spsegment_need_restart(recog, &rewind_frame, &reprocess) == TRUE) {
00653     /* set total length to the current frame */
00654     for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00655       if (!mfcc->valid) continue;
00656       mfcc->param->header.samplenum = mfcc->f + 1;
00657       mfcc->param->samplenum = mfcc->f + 1;
00658     }
00659     /* do rewind for all mfcc here */
00660     spsegment_restart_mfccs(recog, rewind_frame, reprocess);
00661     /* also tell adin module to rehash the concurrent audio input */
00662     recog->adin->rehash = TRUE;
00663     /* reset outprob cache for all AM */
00664     for(am=recog->amlist;am;am=am->next) {
00665       outprob_prepare(&(am->hmmwrk), am->mfcc->param->samplenum);
00666     }
00667     if (reprocess) {
00668       /* process the backstep MFCCs here */
00669       while(1) {
00670         ok_p = TRUE;
00671         for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00672           if (! mfcc->valid) continue;
00673           mfcc->f++;
00674           if (mfcc->f < mfcc->param->samplenum) {
00675             mfcc->valid = TRUE;
00676             ok_p = FALSE;
00677           } else {
00678             mfcc->valid = FALSE;
00679           }
00680         }
00681         if (ok_p) {
00682           /* すべての MFCC が終わりに達したのでループ終了 */
00683           /* all MFCC has been processed, end of loop  */
00684           for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00685             if (! mfcc->valid) continue;
00686             mfcc->f--;
00687           }
00688           break;
00689         }
00690         /* 各インスタンスについて mfcc->f の認識処理を1フレーム進める */
00691         switch (decode_proceed(recog)) {
00692         case -1: /* error */
00693           return -1;
00694           break;
00695         case 0:                 /* success */
00696           break;
00697         case 1:                 /* segmented */
00698           /* ignore segmentation while in the backstep segment */
00699           break;
00700         }
00701         /* call frame-wise callback */
00702         callback_exec(CALLBACK_EVENT_PASS1_FRAME, recog);
00703       }
00704     }
00705   }
00706   /* call frame-wise callback if at least one of MFCC is valid at this frame */
00707   for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00708     if (mfcc->valid) {
00709       callback_exec(CALLBACK_EVENT_PASS1_FRAME, recog);
00710       break;
00711     }
00712   }
00713   
00714   return 0;
00715 }
00716 
00717 
00786 int
00787 RealTimePipeLine(SP16 *Speech, int nowlen, Recog *recog) /* Speech[0...nowlen] = input */
00788 {
00789   int i, now, ret;
00790   MFCCCalc *mfcc;
00791   RealBeam *r;
00792 
00793   r = &(recog->real);
00794 
00795 #ifdef DEBUG_VTLN_ALPHA_TEST
00796   /* store speech */
00797   adin_cut_callback_store_buffer(Speech, nowlen, recog);
00798 #endif
00799 
00800   /* window[0..windownum-1] は前回の呼び出しで残った音声データが格納されている */
00801   /* window[0..windownum-1] are speech data left from previous call */
00802 
00803   /* 処理用ポインタを初期化 */
00804   /* initialize pointer for local processing */
00805   now = 0;
00806   
00807   /* 認識処理がセグメント要求で終わったのかどうかのフラグをリセット */
00808   /* reset flag which indicates whether the input has ended with segmentation request */
00809   r->last_is_segmented = FALSE;
00810 
00811 #ifdef RDEBUG
00812   printf("got %d samples\n", nowlen);
00813 #endif
00814 
00815   while (now < nowlen) {        /* till whole input is processed */
00816     /* 入力長が maxframelen に達したらここで強制終了 */
00817     /* if input length reaches maximum buffer size, terminate 1st pass here */
00818     for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00819       if (mfcc->f >= r->maxframelen) return(1);
00820     }
00821     /* 窓バッファを埋められるだけ埋める */
00822     /* fill window buffer as many as possible */
00823     for(i = min(r->windowlen - r->windownum, nowlen - now); i > 0 ; i--)
00824       r->window[r->windownum++] = (float) Speech[now++];
00825     /* もし窓バッファが埋まらなければ, このセグメントの処理はここで終わる. 
00826        処理されなかったサンプル (window[0..windownum-1]) は次回に持ち越し. */
00827     /* if window buffer was not filled, end processing here, keeping the
00828        rest samples (window[0..windownum-1]) in the window buffer. */
00829     if (r->windownum < r->windowlen) break;
00830 #ifdef RDEBUG
00831     /*    printf("%d used, %d rest\n", now, nowlen - now);
00832 
00833           printf("[f = %d]\n", f);*/
00834 #endif
00835 
00836     for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00837       mfcc->valid = FALSE;
00838       /* 窓内の音声波形から特徴量を計算して r->tmpmfcc に格納  */
00839       /* calculate a parameter vector from current waveform windows
00840          and store to r->tmpmfcc */
00841       if ((*(recog->calc_vector))(mfcc, r->window, r->windowlen)) {
00842 #ifdef ENABLE_PLUGIN
00843         /* call post-process plugin if exist */
00844         plugin_exec_vector_postprocess(mfcc->tmpmfcc, mfcc->param->veclen, mfcc->f);
00845 #endif
00846         /* MFCC完成,登録 */
00847         mfcc->valid = TRUE;
00848         /* now get the MFCC vector of current frame, now store it to param */
00849         if (param_alloc(mfcc->param, mfcc->f + 1, mfcc->param->veclen) == FALSE) {
00850           jlog("ERROR: failed to allocate memory for incoming MFCC vectors\n");
00851           return -1;
00852         }
00853         memcpy(mfcc->param->parvec[mfcc->f], mfcc->tmpmfcc, sizeof(VECT) * mfcc->param->veclen);
00854 #ifdef RDEBUG
00855         printf("DeltaBuf: %02d: got frame %d\n", mfcc->id, mfcc->f);
00856 #endif
00857       }
00858     }
00859 
00860     /* 処理を1フレーム進める */
00861     /* proceed one frame */
00862     ret = proceed_one_frame(recog);
00863 
00864     if (ret == 1 && recog->jconf->decodeopt.segment) {
00865       /* ショートポーズセグメンテーション: バッファに残っているデータを
00866          別に保持して,次回の最初に処理する */
00867       /* short pause segmentation: there is some data left in buffer, so
00868          we should keep them for next processing */
00869       r->rest_len = nowlen - now;
00870       if (r->rest_len > 0) {
00871         /* copy rest samples to rest_Speech */
00872         if (r->rest_Speech == NULL) {
00873           r->rest_alloc_len = r->rest_len;
00874           r->rest_Speech = (SP16 *)mymalloc(sizeof(SP16)*r->rest_alloc_len);
00875         } else if (r->rest_alloc_len < r->rest_len) {
00876           r->rest_alloc_len = r->rest_len;
00877           r->rest_Speech = (SP16 *)myrealloc(r->rest_Speech, sizeof(SP16)*r->rest_alloc_len);
00878         }
00879         memcpy(r->rest_Speech, &(Speech[now]), sizeof(SP16) * r->rest_len);
00880       }
00881     }
00882     if (ret != 0) return ret;
00883 
00884     /* 1フレーム処理が進んだのでポインタを進める */
00885     /* proceed frame pointer */
00886     for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00887       if (!mfcc->valid) continue;
00888       mfcc->f++;
00889     }
00890 
00891     /* 窓バッファを処理が終わった分シフト */
00892     /* shift window */
00893     memmove(r->window, &(r->window[recog->jconf->input.frameshift]), sizeof(SP16) * (r->windowlen - recog->jconf->input.frameshift));
00894     r->windownum -= recog->jconf->input.frameshift;
00895   }
00896 
00897   /* 与えられた音声セグメントに対する認識処理が全て終了
00898      呼び出し元に, 入力を続けるよう伝える */
00899   /* input segment is fully processed
00900      tell the caller to continue input */
00901   return(0);                    
00902 }
00903 
00937 int
00938 RealTimeResume(Recog *recog)
00939 {
00940   MFCCCalc *mfcc;
00941   RealBeam *r;
00942   boolean ok_p;
00943 #ifdef SPSEGMENT_NAIST
00944   RecogProcess *p;
00945 #endif
00946   PROCESS_AM *am;
00947 
00948   r = &(recog->real);
00949 
00950   /* 計算用のワークエリアを準備 */
00951   /* prepare work area for calculation */
00952   if (recog->jconf->input.type == INPUT_WAVEFORM) {
00953     reset_mfcc(recog);
00954   }
00955   /* 音響尤度計算用キャッシュを準備 */
00956   /* prepare cache area for acoustic computation of HMM states and mixtures */
00957   for(am=recog->amlist;am;am=am->next) {
00958     outprob_prepare(&(am->hmmwrk), r->maxframelen);
00959   }
00960 
00961   /* param にある全パラメータを処理する準備 */
00962   /* prepare to process all data in param */
00963   for(mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00964     if (mfcc->param->samplenum == 0) mfcc->valid = FALSE;
00965     else mfcc->valid = TRUE;
00966 #ifdef RDEBUG
00967     printf("Resume: %02d: f=%d\n", mfcc->id, mfcc->mfcc->param->samplenum-1);
00968 #endif
00969     /* フレーム数をリセット */
00970     /* reset frame count */
00971     mfcc->f = 0;
00972     /* MAP-CMN の初期化 */
00973     /* Prepare for MAP-CMN */
00974     if (mfcc->para->cmn || mfcc->para->cvn) CMN_realtime_prepare(mfcc->cmn.wrk);
00975   }
00976 
00977 #ifdef BACKEND_VAD
00978   if (recog->jconf->decodeopt.segment) {
00979     spsegment_init(recog);
00980   }
00981   /* not exec pass1 begin callback here */
00982 #else
00983   recog->triggered = FALSE;
00984   for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00985     if (!mfcc->valid) continue;
00986     callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog);
00987     callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog);
00988     recog->triggered = TRUE;
00989     break;
00990   }
00991 #endif
00992 
00993   /* param 内の全フレームについて認識処理を進める */
00994   /* proceed recognition for all frames in param */
00995 
00996   while(1) {
00997     ok_p = TRUE;
00998     for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00999       if (! mfcc->valid) continue;
01000       if (mfcc->f < mfcc->param->samplenum) {
01001         mfcc->valid = TRUE;
01002         ok_p = FALSE;
01003       } else {
01004         mfcc->valid = FALSE;
01005       }
01006     }
01007     if (ok_p) {
01008       /* すべての MFCC が終わりに達したのでループ終了 */
01009       /* all MFCC has been processed, end of loop  */
01010       break;
01011     }
01012 
01013     /* 各インスタンスについて mfcc->f の認識処理を1フレーム進める */
01014     switch (decode_proceed(recog)) {
01015     case -1: /* error */
01016       return -1;
01017       break;
01018     case 0:                     /* success */
01019       break;
01020     case 1:                     /* segmented */
01021       /* segmented, end procs ([0..f])*/
01022       r->last_is_segmented = TRUE;
01023       return 1;         /* segmented by this function */
01024     }
01025 
01026 #ifdef BACKEND_VAD
01027     /* check up trigger in case of VAD segmentation */
01028     if (recog->jconf->decodeopt.segment) {
01029       if (recog->triggered == FALSE) {
01030         if (spsegment_trigger_sync(recog)) {
01031           callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog);
01032           callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog);
01033           recog->triggered = TRUE;
01034         }
01035       }
01036     }
01037 #endif
01038 
01039     /* call frame-wise callback */
01040     callback_exec(CALLBACK_EVENT_PASS1_FRAME, recog);
01041 
01042     /* 1フレーム処理が進んだのでポインタを進める */
01043     /* proceed frame pointer */
01044     for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
01045       if (!mfcc->valid) continue;
01046       mfcc->f++;
01047     }
01048 
01049   }
01050   /* 前回のセグメント時に入力をシフトしていない分をシフトする */
01051   /* do the last shift here */
01052   if (recog->jconf->input.type == INPUT_WAVEFORM) {
01053     memmove(r->window, &(r->window[recog->jconf->input.frameshift]), sizeof(SP16) * (r->windowlen - recog->jconf->input.frameshift));
01054     r->windownum -= recog->jconf->input.frameshift;
01055     /* これで再開の準備が整ったので,まずは前回の処理で残っていた音声データから
01056        処理する */
01057     /* now that the search status has been prepared for the next input, we
01058        first process the rest unprocessed samples at the last session */
01059     if (r->rest_len > 0) {
01060       return(RealTimePipeLine(r->rest_Speech, r->rest_len, recog));
01061     }
01062   }
01063 
01064   /* 新規の入力に対して認識処理は続く… */
01065   /* the recognition process will continue for the newly incoming samples... */
01066   return 0;
01067 
01068 }
01069 
01070 
01104 boolean
01105 RealTimeParam(Recog *recog)
01106 {
01107   boolean ret1, ret2;
01108   RealBeam *r;
01109   int ret;
01110   int maxf;
01111   boolean ok_p;
01112   MFCCCalc *mfcc;
01113   Value *para;
01114 #ifdef RDEBUG
01115   int i;
01116 #endif
01117 
01118   r = &(recog->real);
01119 
01120   if (r->last_is_segmented) {
01121 
01122     /* RealTimePipeLine で認識処理側の理由により認識が中断した場合,
01123        現状態のMFCC計算データをそのまま次回へ保持する必要があるので,
01124        MFCC計算終了処理を行わずに第1パスの結果のみ出力して終わる. */
01125     /* When input segmented by recognition process in RealTimePipeLine(),
01126        we have to keep the whole current status of MFCC computation to the
01127        next call.  So here we only output the 1st pass result. */
01128     for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
01129       mfcc->param->header.samplenum = mfcc->f + 1;/* len = lastid + 1 */
01130       mfcc->param->samplenum = mfcc->f + 1;
01131     }
01132     decode_end_segmented(recog);
01133 
01134     /* この区間の param データを第2パスのために返す */
01135     /* return obtained parameter for 2nd pass */
01136     return(TRUE);
01137   }
01138 
01139   if (recog->jconf->input.type == INPUT_VECTOR) {
01140     /* finalize real-time 1st pass */
01141     for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
01142       mfcc->param->header.samplenum = mfcc->f;
01143       mfcc->param->samplenum = mfcc->f;
01144     }
01145     /* 最終フレーム処理を行い,認識の結果出力と終了処理を行う */
01146     decode_end(recog);
01147     return TRUE;
01148   }
01149 
01150   /* MFCC計算の終了処理を行う: 最後の遅延フレーム分を処理 */
01151   /* finish MFCC computation for the last delayed frames */
01152   for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
01153     if (mfcc->para->delta || mfcc->para->acc) {
01154       mfcc->valid = TRUE;
01155     } else {
01156       mfcc->valid = FALSE;
01157     }
01158   }
01159 
01160   /* loop until all data has been flushed */
01161   while (1) {
01162 
01163     /* if all mfcc became invalid, exit loop here */
01164     ok_p = FALSE;
01165     for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
01166       if (mfcc->valid) {
01167         ok_p = TRUE;
01168         break;
01169       }
01170     }
01171     if (!ok_p) break;
01172 
01173     /* try to get 1 frame for all mfcc instances */
01174     for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
01175       
01176       para = mfcc->para;
01177       
01178       if (! mfcc->valid) continue;
01179       
01180       /* check if there is data in cycle buffer of delta */
01181       ret1 = WMP_deltabuf_flush(mfcc->db);
01182 #ifdef RDEBUG
01183       printf("DeltaBufLast: ret=%d, status=", ret1);
01184       for(i=0;i<mfcc->db->len;i++) {
01185         printf("%d", mfcc->db->is_on[i]);
01186       }
01187       printf(", nextstore=%d\n", mfcc->db->store);
01188 #endif
01189       if (ret1) {
01190         /* uncomputed delta has flushed, compute it with tmpmfcc */
01191         if (para->energy && para->absesup) {
01192           memcpy(mfcc->tmpmfcc, mfcc->db->vec, sizeof(VECT) * (para->baselen - 1));
01193           memcpy(&(mfcc->tmpmfcc[para->baselen-1]), &(mfcc->db->vec[para->baselen]), sizeof(VECT) * para->baselen);
01194         } else {
01195           memcpy(mfcc->tmpmfcc, mfcc->db->vec, sizeof(VECT) * para->baselen * 2);
01196         }
01197         if (para->acc) {
01198           /* this new delta should be given to the accel cycle buffer */
01199           ret2 = WMP_deltabuf_proceed(mfcc->ab, mfcc->tmpmfcc);
01200 #ifdef RDEBUG
01201           printf("AccelBuf: ret=%d, status=", ret2);
01202           for(i=0;i<mfcc->ab->len;i++) {
01203             printf("%d", mfcc->ab->is_on[i]);
01204           }
01205           printf(", nextstore=%d\n", mfcc->ab->store);
01206 #endif
01207           if (ret2) {
01208             /* uncomputed accel was given, compute it with tmpmfcc */
01209             memcpy(mfcc->tmpmfcc, mfcc->ab->vec, sizeof(VECT) * (para->veclen - para->baselen));
01210             memcpy(&(mfcc->tmpmfcc[para->veclen - para->baselen]), &(mfcc->ab->vec[para->veclen - para->baselen]), sizeof(VECT) * para->baselen);
01211           } else {
01212             /* still no input is given: */
01213             /* in case of very short input: go on to the next input */
01214             continue;
01215           }
01216         }
01217         
01218       } else {
01219       
01220         /* no data left in the delta buffer */
01221         if (para->acc) {
01222           /* no new data, just flush the accel buffer */
01223           ret2 = WMP_deltabuf_flush(mfcc->ab);
01224 #ifdef RDEBUG
01225           printf("AccelBuf: ret=%d, status=", ret2);
01226           for(i=0;i<mfcc->ab->len;i++) {
01227             printf("%d", mfcc->ab->is_on[i]);
01228           }
01229           printf(", nextstore=%d\n", mfcc->ab->store);
01230 #endif
01231           if (ret2) {
01232             /* uncomputed data has flushed, compute it with tmpmfcc */
01233             memcpy(mfcc->tmpmfcc, mfcc->ab->vec, sizeof(VECT) * (para->veclen - para->baselen));
01234             memcpy(&(mfcc->tmpmfcc[para->veclen - para->baselen]), &(mfcc->ab->vec[para->veclen - para->baselen]), sizeof(VECT) * para->baselen);
01235           } else {
01236             /* actually no data exists in both delta and accel */
01237             mfcc->valid = FALSE; /* disactivate this instance */
01238             continue;           /* end this loop */
01239           }
01240         } else {
01241           /* only delta: input fully flushed */
01242           mfcc->valid = FALSE; /* disactivate this instance */
01243           continue;             /* end this loop */
01244         }
01245       }
01246       /* a new frame has been obtained from delta buffer to tmpmfcc */
01247       if(para->cmn || para->cvn) CMN_realtime(mfcc->cmn.wrk, mfcc->tmpmfcc);
01248       if (param_alloc(mfcc->param, mfcc->f + 1, mfcc->param->veclen) == FALSE) {
01249         jlog("ERROR: failed to allocate memory for incoming MFCC vectors\n");
01250         return FALSE;
01251       }
01252       /* store to mfcc->f */
01253       memcpy(mfcc->param->parvec[mfcc->f], mfcc->tmpmfcc, sizeof(VECT) * mfcc->param->veclen);
01254 #ifdef ENABLE_PLUGIN
01255       /* call postprocess plugin if any */
01256       plugin_exec_vector_postprocess(mfcc->param->parvec[mfcc->f], mfcc->param->veclen, mfcc->f);
01257 #endif
01258     }
01259 
01260     /* call recognition start callback */
01261     ok_p = FALSE;
01262     maxf = 0;
01263     for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
01264       if (!mfcc->valid) continue;
01265       if (maxf < mfcc->f) maxf = mfcc->f;
01266       if (mfcc->f == 0) {
01267         ok_p = TRUE;
01268       }
01269     }
01270 
01271     if (ok_p && maxf == 0) {
01272       /* call callback when at least one of MFCC has initial frame */
01273       if (recog->jconf->decodeopt.segment) {
01274 #ifdef BACKEND_VAD
01275           /* not exec pass1 begin callback here */
01276 #else
01277         if (!recog->process_segment) {
01278           callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog);
01279         }
01280         callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog);
01281         callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog);
01282         recog->triggered = TRUE;
01283 #endif
01284       } else {
01285         callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog);
01286         callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog);
01287         recog->triggered = TRUE;
01288       }
01289     }
01290 
01291     /* proceed for the curent frame */
01292     ret = decode_proceed(recog);
01293     if (ret == -1) {            /* error */
01294       return -1;
01295     } else if (ret == 1) {      /* segmented */
01296       /* loop out */
01297       break;
01298     } /* else no event occured */
01299 
01300 #ifdef BACKEND_VAD
01301     /* check up trigger in case of VAD segmentation */
01302     if (recog->jconf->decodeopt.segment) {
01303       if (recog->triggered == FALSE) {
01304         if (spsegment_trigger_sync(recog)) {
01305           if (!recog->process_segment) {
01306             callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog);
01307           }
01308           callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog);
01309           callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog);
01310           recog->triggered = TRUE;
01311         }
01312       }
01313     }
01314 #endif
01315 
01316     /* call frame-wise callback */
01317     callback_exec(CALLBACK_EVENT_PASS1_FRAME, recog);
01318 
01319     /* move to next */
01320     for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
01321       if (! mfcc->valid) continue;
01322       mfcc->f++;
01323       if (mfcc->f > r->maxframelen) mfcc->valid = FALSE;
01324     }
01325   }
01326 
01327   /* finalize real-time 1st pass */
01328   for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
01329     mfcc->param->header.samplenum = mfcc->f;
01330     mfcc->param->samplenum = mfcc->f;
01331   }
01332   /* 最終フレーム処理を行い,認識の結果出力と終了処理を行う */
01333   decode_end(recog);
01334 
01335   return(TRUE);
01336 }
01337 
01356 void
01357 RealTimeCMNUpdate(MFCCCalc *mfcc, Recog *recog)
01358 {
01359   boolean cmn_update_p;
01360   Value *para;
01361   Jconf *jconf;
01362   RecogProcess *r;
01363 
01364   jconf = recog->jconf;
01365   para = mfcc->para;
01366   
01367   /* update CMN vector for next speech */
01368   if(para->cmn) {
01369     if (mfcc->cmn.update) {
01370       cmn_update_p = TRUE;
01371       for(r=recog->process_list;r;r=r->next) {
01372         if (!r->live) continue;
01373         if (r->am->mfcc != mfcc) continue;
01374         if (r->result.status < 0) { /* input rejected */
01375           cmn_update_p = FALSE;
01376           break;
01377         }
01378       }
01379       if (cmn_update_p) {
01380         /* update last CMN parameter for next spech */
01381         CMN_realtime_update(mfcc->cmn.wrk, mfcc->param);
01382       } else {
01383         /* do not update, because the last input is bogus */
01384         if (verbose_flag) {
01385 #ifdef BACKEND_VAD
01386           if (!recog->jconf->decodeopt.segment || recog->triggered) {
01387             jlog("STAT: skip CMN parameter update since last input was invalid\n");
01388           }
01389 #else
01390           jlog("STAT: skip CMN parameter update since last input was invalid\n");
01391 #endif
01392         }
01393       }
01394     }
01395     /* if needed, save the updated CMN parameter to a file */
01396     if (mfcc->cmn.save_filename) {
01397       if (CMN_save_to_file(mfcc->cmn.wrk, mfcc->cmn.save_filename) == FALSE) {
01398         jlog("WARNING: failed to save CMN parameter to \"%s\"\n", mfcc->cmn.save_filename);
01399       }
01400     }
01401   }
01402 }
01403 
01416 void
01417 RealTimeTerminate(Recog *recog)
01418 {
01419   MFCCCalc *mfcc;
01420 
01421   for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
01422     mfcc->param->header.samplenum = mfcc->f;
01423     mfcc->param->samplenum = mfcc->f;
01424   }
01425 
01426   /* 最終フレーム処理を行い,認識の結果出力と終了処理を行う */
01427   decode_end(recog);
01428 }
01429 
01441 void
01442 realbeam_free(Recog *recog)
01443 {
01444   RealBeam *r;
01445 
01446   r = &(recog->real);
01447 
01448   if (recog->real.window) {
01449     free(recog->real.window);
01450     recog->real.window = NULL;
01451   }
01452   if (recog->real.rest_Speech) {
01453     free(recog->real.rest_Speech);
01454     recog->real.rest_Speech = NULL;
01455   }
01456 }
01457 
01458 
01459 
01460 /************************************************************************/
01461 /************************************************************************/
01462 /************************************************************************/
01463 /************************************************************************/
01464 
01465 /* MFCC realtime input */
01483 int
01484 mfcc_go(Recog *recog, int (*ad_check)(Recog *))
01485 {
01486   RealBeam *r;
01487   MFCCCalc *mfcc;
01488   int new_f;
01489   int ret, ret3;
01490 
01491   r = &(recog->real);
01492 
01493   r->last_is_segmented = FALSE;
01494   
01495   while(1/*in_data_vec*/) {
01496 
01497     ret = mfc_module_read(recog->mfcclist, &new_f);
01498 
01499     if (debug2_flag) {
01500       if (recog->mfcclist->f < new_f) {
01501         jlog("%d: %d (%d)\n", recog->mfcclist->f, new_f, ret);
01502       }
01503     }
01504  
01505     /* callback poll */
01506     if (ad_check != NULL) {
01507       if ((ret3 = (*(ad_check))(recog)) < 0) {
01508         if ((ret3 == -1 && mfcc->f == 0) || ret3 == -2) {
01509           return(-2);
01510         }
01511       }
01512     }
01513 
01514     while(recog->mfcclist->f < new_f) {
01515 
01516       recog->mfcclist->valid = TRUE;
01517 
01518 #ifdef ENABLE_PLUGIN
01519       /* call post-process plugin if exist */
01520       plugin_exec_vector_postprocess(recog->mfcclist->param->parvec[recog->mfcclist->f], recog->mfcclist->param->veclen, mfcc->f);
01521 #endif
01522 
01523       /* 処理を1フレーム進める */
01524       /* proceed one frame */
01525       
01526       switch(proceed_one_frame(recog)) {
01527       case -1:                  /* error */
01528         return -1;
01529       case 0:                   /* normal */
01530         break;
01531       case 1:                   /* segmented by process */
01532         return 2;
01533       }
01534 
01535       /* 1フレーム処理が進んだのでポインタを進める */
01536       /* proceed frame pointer */
01537       for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
01538         if (!mfcc->valid) continue;
01539         mfcc->f++;
01540       }
01541     }
01542     
01543     /* check if input end */
01544     switch(ret) {
01545     case -1:                    /* end of input */
01546       return 0;
01547     case -2:                    /* error */
01548       return -1;
01549     case -3:                    /* end of segment request */
01550       return 1;
01551     }
01552   }
01553   /* 与えられた音声セグメントに対する認識処理が全て終了
01554      呼び出し元に, 入力を続けるよう伝える */
01555   /* input segment is fully processed
01556      tell the caller to continue input */
01557   return(1);
01558 }
01559 
01560 /* end of file */
01561 
01562 

Juliusに対してThu Jul 23 12:16:22 2009に生成されました。  doxygen 1.5.1