libjulius/src/realtime-1stpass.c

Go to the documentation of this file.
00001 
00117 /*
00118  * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University
00119  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00120  * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
00121  * All rights reserved
00122  */
00123 
00124 #include <julius/julius.h>
00125 
00126 #undef RDEBUG                   
00127 
00128 
00158 static void
00159 init_param(MFCCCalc *mfcc)
00160 {
00161   Value *para;
00162 
00163   para = mfcc->para;
00164 
00165   /* これから計算されるパラメータの型をヘッダに設定 */
00166   /* set header types */
00167   mfcc->param->header.samptype = F_MFCC;
00168   if (para->delta) mfcc->param->header.samptype |= F_DELTA;
00169   if (para->acc) mfcc->param->header.samptype |= F_ACCL;
00170   if (para->energy) mfcc->param->header.samptype |= F_ENERGY;
00171   if (para->c0) mfcc->param->header.samptype |= F_ZEROTH;
00172   if (para->absesup) mfcc->param->header.samptype |= F_ENERGY_SUP;
00173   if (para->cmn) mfcc->param->header.samptype |= F_CEPNORM;
00174   
00175   mfcc->param->header.wshift = para->smp_period * para->frameshift;
00176   mfcc->param->header.sampsize = para->veclen * sizeof(VECT); /* not compressed */
00177   mfcc->param->veclen = para->veclen;
00178   /* フレームごとのパラメータベクトル保存の領域を確保 */
00179   /* あとで必要に応じて伸長される */
00180   if (param_alloc(mfcc->param, 1, mfcc->param->veclen) == FALSE) {
00181     j_internal_error("ERROR: segmented: failed to allocate memory for rest param\n");
00182   }
00183   
00184   /* 認識処理中/終了後にセットされる変数:
00185      param->parvec (パラメータベクトル系列)
00186      param->header.samplenum, param->samplenum (全フレーム数)
00187   */
00188   /* variables that will be set while/after computation has been done:
00189      param->parvec (parameter vector sequence)
00190      param->header.samplenum, param->samplenum (total number of frames)
00191   */
00192 }
00193 
00221 boolean
00222 RealTimeInit(Recog *recog)
00223 {
00224   Value *para;
00225   Jconf *jconf;
00226   RealBeam *r;
00227   MFCCCalc *mfcc;
00228 
00229 
00230   jconf = recog->jconf;
00231   r = &(recog->real);
00232 
00233   /* -ssload 指定時, SS用のノイズスペクトルをファイルから読み込む */
00234   /* if "-ssload", load noise spectrum for spectral subtraction from file */
00235   for(mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00236     if (mfcc->frontend.ssload_filename && mfcc->frontend.ssbuf == NULL) {
00237       if ((mfcc->frontend.ssbuf = new_SS_load_from_file(mfcc->frontend.ssload_filename, &(mfcc->frontend.sslen))) == NULL) {
00238         jlog("ERROR: failed to read \"%s\"\n", mfcc->frontend.ssload_filename);
00239         return FALSE;
00240       }
00241       /* check ssbuf length */
00242       if (mfcc->frontend.sslen != mfcc->wrk->bflen) {
00243         jlog("ERROR: noise spectrum length not match\n");
00244         return FALSE;
00245       }
00246       mfcc->wrk->ssbuf = mfcc->frontend.ssbuf;
00247       mfcc->wrk->ssbuflen = mfcc->frontend.sslen;
00248       mfcc->wrk->ss_alpha = mfcc->frontend.ss_alpha;
00249       mfcc->wrk->ss_floor = mfcc->frontend.ss_floor;
00250     }
00251   }
00252 
00253   for(mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00254   
00255     para = mfcc->para;
00256 
00257     /* 対数エネルギー正規化のための初期値 */
00258     /* set initial value for log energy normalization */
00259     if (para->energy && para->enormal) energy_max_init(&(mfcc->ewrk));
00260     /* デルタ計算のためのサイクルバッファを用意 */
00261     /* initialize cycle buffers for delta and accel coef. computation */
00262     if (para->delta) mfcc->db = WMP_deltabuf_new(para->baselen, para->delWin);
00263     if (para->acc) mfcc->ab = WMP_deltabuf_new(para->baselen * 2, para->accWin);
00264     /* デルタ計算のためのワークエリアを確保 */
00265     /* allocate work area for the delta computation */
00266     mfcc->tmpmfcc = (VECT *)mymalloc(sizeof(VECT) * para->vecbuflen);
00267     /* MAP-CMN 用の初期ケプストラム平均を読み込んで初期化する */
00268     /* Initialize the initial cepstral mean data from file for MAP-CMN */
00269     if (para->cmn) mfcc->cmn.wrk = CMN_realtime_new(para->mfcc_dim, mfcc->cmn.map_weight);
00270     /* -cmnload 指定時, CMN用のケプストラム平均の初期値をファイルから読み込む */
00271     /* if "-cmnload", load initial cepstral mean data from file for CMN */
00272     if (mfcc->cmn.load_filename) {
00273       if (para->cmn) {
00274         if ((mfcc->cmn.loaded = CMN_load_from_file(mfcc->cmn.wrk, mfcc->cmn.load_filename))== FALSE) {
00275           jlog("WARNING: failed to read initial cepstral mean from \"%s\", do flat start\n", mfcc->cmn.load_filename);
00276         }
00277       } else {
00278         jlog("WARNING: CMN not required on AM, file \"%s\" ignored\n", mfcc->cmn.load_filename);
00279       }
00280     }
00281 
00282   }
00283   /* 最大フレーム長を最大入力時間数から計算 */
00284   /* set maximum allowed frame length */
00285   r->maxframelen = MAXSPEECHLEN / recog->jconf->input.frameshift;
00286   /* 窓長をセット */
00287   /* set window length */
00288   r->windowlen = recog->jconf->input.framesize + 1;
00289   /* 窓かけ用バッファを確保 */
00290   /* set window buffer */
00291   r->window = mymalloc(sizeof(SP16) * r->windowlen);
00292 
00293   return TRUE;
00294 }
00295 
00320 void
00321 reset_mfcc(Recog *recog) 
00322 {
00323   Value *para;
00324   PROCESS_AM *am;
00325   MFCCCalc *mfcc;
00326   RealBeam *r;
00327 
00328   r = &(recog->real);
00329 
00330   /* 特徴抽出モジュールを初期化 */
00331   /* initialize parameter extraction module */
00332   for(mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00333 
00334     para = mfcc->para;
00335 
00336     /* 対数エネルギー正規化のための初期値をセット */
00337     /* set initial value for log energy normalization */
00338     if (para->energy && para->enormal) energy_max_prepare(&(mfcc->ewrk), para);
00339     /* デルタ計算用バッファを準備 */
00340     /* set the delta cycle buffer */
00341     if (para->delta) WMP_deltabuf_prepare(mfcc->db);
00342     if (para->acc) WMP_deltabuf_prepare(mfcc->ab);
00343   }
00344 
00345   /* 音響尤度計算用キャッシュを準備 */
00346   /* prepare cache area for acoustic computation of HMM states and mixtures */
00347   for(am=recog->amlist;am;am=am->next) {
00348     outprob_prepare(&(am->hmmwrk), r->maxframelen);
00349   }
00350 }
00351 
00378 boolean
00379 RealTimePipeLinePrepare(Recog *recog)
00380 {
00381   RealBeam *r;
00382   PROCESS_AM *am;
00383   MFCCCalc *mfcc;
00384 #ifdef SPSEGMENT_NAIST
00385   RecogProcess *p;
00386 #endif
00387 
00388   r = &(recog->real);
00389 
00390   /* 計算用の変数を初期化 */
00391   /* initialize variables for computation */
00392   r->windownum = 0;
00393   /* parameter check */
00394   for(mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00395     /* パラメータ初期化 */
00396     /* parameter initialization */
00397     init_param(mfcc);
00398     /* フレーム数をリセット */
00399     /* reset frame count */
00400     mfcc->f = 0;
00401     /* MAP-CMN の初期化 */
00402     /* Prepare for MAP-CMN */
00403     if (mfcc->para->cmn) CMN_realtime_prepare(mfcc->cmn.wrk);
00404   }
00405   /* 準備した param 構造体のデータのパラメータ型を音響モデルとチェックする */
00406   /* check type coherence between param and hmminfo here */
00407   for(am=recog->amlist;am;am=am->next) {
00408     if (!check_param_coherence(am->hmminfo, am->mfcc->param)) {
00409       jlog("ERROR: input parameter type does not match AM\n");
00410       return FALSE;
00411     }
00412   }
00413 
00414   /* 計算用のワークエリアを準備 */
00415   /* prepare work area for calculation */
00416   reset_mfcc(recog);
00417 
00418 #ifdef BACKEND_VAD
00419   if (recog->jconf->decodeopt.segment) {
00420     /* initialize segmentation parameters */
00421     spsegment_init(recog);
00422   }
00423 #else
00424   recog->triggered = FALSE;
00425 #endif
00426 
00427   return TRUE;
00428 }
00429 
00462 boolean
00463 RealTimeMFCC(MFCCCalc *mfcc, SP16 *window, int windowlen)
00464 {
00465   int i;
00466   boolean ret;
00467   VECT *tmpmfcc;
00468   Value *para;
00469 
00470   tmpmfcc = mfcc->tmpmfcc;
00471   para = mfcc->para;
00472 
00473   /* 音声波形から base MFCC を計算 (recog->mfccwrk を利用) */
00474   /* calculate base MFCC from waveform (use recog->mfccwrk) */
00475   for (i=0; i < windowlen; i++) {
00476     mfcc->wrk->bf[i+1] = (float) window[i];
00477   }
00478   WMP_calc(mfcc->wrk, tmpmfcc, para);
00479 
00480   if (para->energy && para->enormal) {
00481     /* 対数エネルギー項を正規化する */
00482     /* normalize log energy */
00483     /* リアルタイム入力では発話ごとの最大エネルギーが得られないので
00484        直前の発話のパワーで代用する */
00485     /* Since the maximum power of the whole input utterance cannot be
00486        obtained at real-time input, the maximum of last input will be
00487        used to normalize.
00488     */
00489     tmpmfcc[para->baselen-1] = energy_max_normalize(&(mfcc->ewrk), tmpmfcc[para->baselen-1], para);
00490   }
00491 
00492   if (para->delta) {
00493     /* デルタを計算する */
00494     /* calc delta coefficients */
00495     ret = WMP_deltabuf_proceed(mfcc->db, tmpmfcc);
00496 #ifdef RDEBUG
00497     printf("DeltaBuf: ret=%d, status=", ret);
00498     for(i=0;i<mfcc->db->len;i++) {
00499       printf("%d", mfcc->db->is_on[i]);
00500     }
00501     printf(", nextstore=%d\n", mfcc->db->store);
00502 #endif
00503     /* ret == FALSE のときはまだディレイ中なので認識処理せず次入力へ */
00504     /* if ret == FALSE, there is no available frame.  So just wait for
00505        next input */
00506     if (! ret) {
00507       return FALSE;
00508     }
00509 
00510     /* db->vec に現在の元データとデルタ係数が入っているので tmpmfcc にコピー */
00511     /* now db->vec holds the current base and full delta, so copy them to tmpmfcc */
00512     memcpy(tmpmfcc, mfcc->db->vec, sizeof(VECT) * para->baselen * 2);
00513   }
00514 
00515   if (para->acc) {
00516     /* Accelerationを計算する */
00517     /* calc acceleration coefficients */
00518     /* base+delta をそのまま入れる */
00519     /* send the whole base+delta to the cycle buffer */
00520     ret = WMP_deltabuf_proceed(mfcc->ab, tmpmfcc);
00521 #ifdef RDEBUG
00522     printf("AccelBuf: ret=%d, status=", ret);
00523     for(i=0;i<mfcc->ab->len;i++) {
00524       printf("%d", mfcc->ab->is_on[i]);
00525     }
00526     printf(", nextstore=%d\n", mfcc->ab->store);
00527 #endif
00528     /* ret == FALSE のときはまだディレイ中なので認識処理せず次入力へ */
00529     /* if ret == FALSE, there is no available frame.  So just wait for
00530        next input */
00531     if (! ret) {
00532       return FALSE;
00533     }
00534     /* ab->vec には,(base+delta) とその差分係数が入っている. 
00535        [base] [delta] [delta] [acc] の順で入っているので,
00536        [base] [delta] [acc] を tmpmfcc にコピーする. */
00537     /* now ab->vec holds the current (base+delta) and their delta coef. 
00538        it holds a vector in the order of [base] [delta] [delta] [acc], 
00539        so copy the [base], [delta] and [acc] to tmpmfcc.  */
00540     memcpy(tmpmfcc, mfcc->ab->vec, sizeof(VECT) * para->baselen * 2);
00541     memcpy(&(tmpmfcc[para->baselen*2]), &(mfcc->ab->vec[para->baselen*3]), sizeof(VECT) * para->baselen);
00542   }
00543 
00544 #ifdef POWER_REJECT
00545   if (para->energy || para->c0) {
00546     mfcc->avg_power += tmpmfcc[para->baselen-1];
00547   }
00548 #endif
00549 
00550   if (para->delta && (para->energy || para->c0) && para->absesup) {
00551     /* 絶対値パワーを除去 */
00552     /* suppress absolute power */
00553     memmove(&(tmpmfcc[para->baselen-1]), &(tmpmfcc[para->baselen]), sizeof(VECT) * (para->vecbuflen - para->baselen));
00554   }
00555 
00556   /* この時点で tmpmfcc に現時点での最新の特徴ベクトルが格納されている */
00557   /* tmpmfcc[] now holds the latest parameter vector */
00558 
00559   /* CMN を計算 */
00560   /* perform CMN */
00561   if (para->cmn) CMN_realtime(mfcc->cmn.wrk, tmpmfcc);
00562   
00563   return TRUE;
00564 }
00565 
00634 int
00635 RealTimePipeLine(SP16 *Speech, int nowlen, Recog *recog) /* Speech[0...nowlen] = input */
00636 {
00637   int i, now;
00638   MFCCCalc *mfcc;
00639   RealBeam *r;
00640   int maxf;
00641   boolean ok_p;
00642 
00643   RecogProcess *p;
00644   PROCESS_AM *am;
00645   int rewind_frame;
00646   boolean reprocess;
00647   boolean all_false, all_true;
00648 
00649   r = &(recog->real);
00650 
00651   /* window[0..windownum-1] は前回の呼び出しで残った音声データが格納されている */
00652   /* window[0..windownum-1] are speech data left from previous call */
00653 
00654   /* 処理用ポインタを初期化 */
00655   /* initialize pointer for local processing */
00656   now = 0;
00657   
00658   /* 認識処理がセグメント要求で終わったのかどうかのフラグをリセット */
00659   /* reset flag which indicates whether the input has ended with segmentation request */
00660   r->last_is_segmented = FALSE;
00661 
00662 #ifdef RDEBUG
00663   printf("got %d samples\n", nowlen);
00664 #endif
00665 
00666   while (now < nowlen) {        /* till whole input is processed */
00667     /* 入力長が maxframelen に達したらここで強制終了 */
00668     /* if input length reaches maximum buffer size, terminate 1st pass here */
00669     for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00670       if (mfcc->f >= r->maxframelen) return(1);
00671     }
00672     /* 窓バッファを埋められるだけ埋める */
00673     /* fill window buffer as many as possible */
00674     for(i = min(r->windowlen - r->windownum, nowlen - now); i > 0 ; i--)
00675       r->window[r->windownum++] = (float) Speech[now++];
00676     /* もし窓バッファが埋まらなければ, このセグメントの処理はここで終わる. 
00677        処理されなかったサンプル (window[0..windownum-1]) は次回に持ち越し. */
00678     /* if window buffer was not filled, end processing here, keeping the
00679        rest samples (window[0..windownum-1]) in the window buffer. */
00680     if (r->windownum < r->windowlen) break;
00681 #ifdef RDEBUG
00682     /*    printf("%d used, %d rest\n", now, nowlen - now);
00683 
00684           printf("[f = %d]\n", f);*/
00685 #endif
00686 
00687     for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00688       mfcc->valid = FALSE;
00689       /* 窓内の音声波形から特徴量を計算して r->tmpmfcc に格納  */
00690       /* calculate a parameter vector from current waveform windows
00691          and store to r->tmpmfcc */
00692       if ((*(recog->calc_vector))(mfcc, r->window, r->windowlen)) {
00693         mfcc->valid = TRUE;
00694         /* MFCC完成,登録 */
00695         /* now get the MFCC vector of current frame, now store it to param */
00696         if (param_alloc(mfcc->param, mfcc->f + 1, mfcc->param->veclen) == FALSE) {
00697           jlog("ERROR: failed to allocate memory for incoming MFCC vectors\n");
00698           return -1;
00699         }
00700         memcpy(mfcc->param->parvec[mfcc->f], mfcc->tmpmfcc, sizeof(VECT) * mfcc->param->veclen);
00701 #ifdef RDEBUG
00702         printf("DeltaBuf: %02d: got frame %d\n", mfcc->id, mfcc->f);
00703 #endif
00704       }
00705     }
00706 
00707     /* call recognition start callback */
00708     ok_p = FALSE;
00709     maxf = 0;
00710     for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00711       if (!mfcc->valid) continue;
00712       if (maxf < mfcc->f) maxf = mfcc->f;
00713       if (mfcc->f == 0) {
00714         ok_p = TRUE;
00715       }
00716     }
00717     if (ok_p && maxf == 0) {
00718       /* call callback when at least one of MFCC has initial frame */
00719       if (recog->jconf->decodeopt.segment) {
00720 #ifdef BACKEND_VAD
00721         /* not exec pass1 begin callback here */
00722 #else
00723         if (!recog->process_segment) {
00724           callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog);
00725         }
00726         callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog);
00727         callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog);
00728         recog->triggered = TRUE;
00729 #endif
00730       } else {
00731         callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog);
00732         callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog);
00733         recog->triggered = TRUE;
00734       }
00735     }
00736 
00737     /* 各インスタンスについて mfcc->f の認識処理を1フレーム進める */
00738     switch (decode_proceed(recog)) {
00739     case -1: /* error */
00740       return -1;
00741       break;
00742     case 0:                     /* success */
00743       break;
00744     case 1:                     /* segmented */
00745       /* 認識処理のセグメント要求で終わったことをフラグにセット */
00746       /* set flag which indicates that the input has ended with segmentation request */
00747       r->last_is_segmented = TRUE;
00748       if (recog->jconf->decodeopt.segment) {
00749         /* ショートポーズセグメンテーション: バッファに残っているデータを
00750            別に保持して,次回の最初に処理する */
00751         /* short pause segmentation: there is some data left in buffer, so
00752            we should keep them for next processing */
00753         r->rest_len = nowlen - now;
00754         if (r->rest_len > 0) {
00755           /* copy rest samples to rest_Speech */
00756           if (r->rest_Speech == NULL) {
00757             r->rest_alloc_len = r->rest_len;
00758             r->rest_Speech = (SP16 *)mymalloc(sizeof(SP16)*r->rest_alloc_len);
00759           } else if (r->rest_alloc_len < r->rest_len) {
00760             r->rest_alloc_len = r->rest_len;
00761             r->rest_Speech = (SP16 *)myrealloc(r->rest_Speech, sizeof(SP16)*r->rest_alloc_len);
00762           }
00763           memcpy(r->rest_Speech, &(Speech[now]), sizeof(SP16) * r->rest_len);
00764         }
00765       }
00766       /* tell the caller to be segmented by this function */
00767       /* 呼び出し元に,ここで入力を切るよう伝える */
00768       return 1;
00769     }
00770 
00771 #ifdef BACKEND_VAD
00772     /* check up trigger in case of VAD segmentation */
00773     if (recog->jconf->decodeopt.segment) {
00774       if (recog->triggered == FALSE) {
00775         if (spsegment_trigger_sync(recog)) {
00776           if (!recog->process_segment) {
00777             callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog);
00778           }
00779           callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog);
00780           callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog);
00781           recog->triggered = TRUE;
00782         }
00783       }
00784     }
00785 #endif
00786 
00787     if (spsegment_need_restart(recog, &rewind_frame, &reprocess) == TRUE) {
00788       /* set total length to the current frame */
00789       for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00790         if (!mfcc->valid) continue;
00791         mfcc->param->header.samplenum = mfcc->f + 1;
00792         mfcc->param->samplenum = mfcc->f + 1;
00793       }
00794       /* do rewind for all mfcc here */
00795       spsegment_restart_mfccs(recog, rewind_frame, reprocess);
00796       /* also tell adin module to rehash the concurrent audio input */
00797       recog->adin->rehash = TRUE;
00798       /* reset outprob cache for all AM */
00799       for(am=recog->amlist;am;am=am->next) {
00800         outprob_prepare(&(am->hmmwrk), am->mfcc->param->samplenum);
00801       }
00802       if (reprocess) {
00803         /* process the backstep MFCCs here */
00804         while(1) {
00805           ok_p = TRUE;
00806           for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00807             if (! mfcc->valid) continue;
00808             mfcc->f++;
00809             if (mfcc->f < mfcc->param->samplenum) {
00810               mfcc->valid = TRUE;
00811               ok_p = FALSE;
00812             } else {
00813               mfcc->valid = FALSE;
00814             }
00815           }
00816           if (ok_p) {
00817             /* すべての MFCC が終わりに達したのでループ終了 */
00818             /* all MFCC has been processed, end of loop  */
00819             for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00820               if (! mfcc->valid) continue;
00821               mfcc->f--;
00822             }
00823             break;
00824           }
00825           /* 各インスタンスについて mfcc->f の認識処理を1フレーム進める */
00826           switch (decode_proceed(recog)) {
00827           case -1: /* error */
00828             return -1;
00829             break;
00830           case 0:                       /* success */
00831             break;
00832           case 1:                       /* segmented */
00833             /* ignore segmentation while in the backstep segment */
00834             break;
00835           }
00836           /* call frame-wise callback */
00837           callback_exec(CALLBACK_EVENT_PASS1_FRAME, recog);
00838         }
00839       }
00840     }
00841 
00842     /* call frame-wise callback if at least one of MFCC is valid at this frame */
00843     for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00844       if (mfcc->valid) {
00845         callback_exec(CALLBACK_EVENT_PASS1_FRAME, recog);
00846         break;
00847       }
00848     }
00849 
00850     /* 1フレーム処理が進んだのでポインタを進める */
00851     /* proceed frame pointer */
00852     for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00853       if (!mfcc->valid) continue;
00854       mfcc->f++;
00855     }
00856 
00857     /* 窓バッファを処理が終わった分シフト */
00858     /* shift window */
00859     memmove(r->window, &(r->window[recog->jconf->input.frameshift]), sizeof(SP16) * (r->windowlen - recog->jconf->input.frameshift));
00860     r->windownum -= recog->jconf->input.frameshift;
00861   }
00862 
00863   /* 与えられた音声セグメントに対する認識処理が全て終了
00864      呼び出し元に, 入力を続けるよう伝える */
00865   /* input segment is fully processed
00866      tell the caller to continue input */
00867   return(0);                    
00868 }
00869 
00903 int
00904 RealTimeResume(Recog *recog)
00905 {
00906   MFCCCalc *mfcc;
00907   RealBeam *r;
00908   boolean ok_p;
00909 #ifdef SPSEGMENT_NAIST
00910   RecogProcess *p;
00911 #endif
00912 
00913   r = &(recog->real);
00914 
00915   /* 計算用のワークエリアを準備 */
00916   /* prepare work area for calculation */
00917   reset_mfcc(recog);
00918 
00919   /* param にある全パラメータを処理する準備 */
00920   /* prepare to process all data in param */
00921   for(mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00922     if (mfcc->param->samplenum == 0) mfcc->valid = FALSE;
00923     else mfcc->valid = TRUE;
00924 #ifdef RDEBUG
00925     printf("Resume: %02d: f=%d\n", mfcc->id, mfcc->mfcc->param->samplenum-1);
00926 #endif
00927     /* フレーム数をリセット */
00928     /* reset frame count */
00929     mfcc->f = 0;
00930   }
00931 
00932 #ifdef BACKEND_VAD
00933   if (recog->jconf->decodeopt.segment) {
00934     spsegment_init(recog);
00935   }
00936   /* not exec pass1 begin callback here */
00937 #else
00938   recog->triggered = FALSE;
00939   for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00940     if (!mfcc->valid) continue;
00941     callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog);
00942     callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog);
00943     recog->triggered = TRUE;
00944     break;
00945   }
00946 #endif
00947 
00948   /* param 内の全フレームについて認識処理を進める */
00949   /* proceed recognition for all frames in param */
00950 
00951   while(1) {
00952     ok_p = TRUE;
00953     for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00954       if (! mfcc->valid) continue;
00955       if (mfcc->f < mfcc->param->samplenum) {
00956         mfcc->valid = TRUE;
00957         ok_p = FALSE;
00958       } else {
00959         mfcc->valid = FALSE;
00960       }
00961     }
00962     if (ok_p) {
00963       /* すべての MFCC が終わりに達したのでループ終了 */
00964       /* all MFCC has been processed, end of loop  */
00965       break;
00966     }
00967 
00968     /* 各インスタンスについて mfcc->f の認識処理を1フレーム進める */
00969     switch (decode_proceed(recog)) {
00970     case -1: /* error */
00971       return -1;
00972       break;
00973     case 0:                     /* success */
00974       break;
00975     case 1:                     /* segmented */
00976       /* segmented, end procs ([0..f])*/
00977       r->last_is_segmented = TRUE;
00978       return 1;         /* segmented by this function */
00979     }
00980 
00981 #ifdef BACKEND_VAD
00982     /* check up trigger in case of VAD segmentation */
00983     if (recog->jconf->decodeopt.segment) {
00984       if (recog->triggered == FALSE) {
00985         if (spsegment_trigger_sync(recog)) {
00986           callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog);
00987           callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog);
00988           recog->triggered = TRUE;
00989         }
00990       }
00991     }
00992 #endif
00993 
00994     /* call frame-wise callback */
00995     callback_exec(CALLBACK_EVENT_PASS1_FRAME, recog);
00996 
00997     /* 1フレーム処理が進んだのでポインタを進める */
00998     /* proceed frame pointer */
00999     for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
01000       if (!mfcc->valid) continue;
01001       mfcc->f++;
01002     }
01003 
01004   }
01005   /* 前回のセグメント時に入力をシフトしていない分をシフトする */
01006   /* do the last shift here */
01007   memmove(r->window, &(r->window[recog->jconf->input.frameshift]), sizeof(SP16) * (r->windowlen - recog->jconf->input.frameshift));
01008   r->windownum -= recog->jconf->input.frameshift;
01009 
01010   /* これで再開の準備が整ったので,まずは前回の処理で残っていた音声データから
01011      処理する */
01012   /* now that the search status has been prepared for the next input, we
01013      first process the rest unprocessed samples at the last session */
01014   if (r->rest_len > 0) {
01015     return(RealTimePipeLine(r->rest_Speech, r->rest_len, recog));
01016   }
01017 
01018   /* 新規の入力に対して認識処理は続く… */
01019   /* the recognition process will continue for the newly incoming samples... */
01020   return 0;
01021 
01022 }
01023 
01024 
01058 boolean
01059 RealTimeParam(Recog *recog)
01060 {
01061   boolean ret1, ret2;
01062   RealBeam *r;
01063   int ret;
01064   int maxf;
01065   boolean ok_p;
01066   MFCCCalc *mfcc;
01067   Value *para;
01068 #ifdef RDEBUG
01069   int i;
01070 #endif
01071 
01072   r = &(recog->real);
01073 
01074   if (r->last_is_segmented) {
01075 
01076     /* RealTimePipeLine で認識処理側の理由により認識が中断した場合,
01077        現状態のMFCC計算データをそのまま次回へ保持する必要があるので,
01078        MFCC計算終了処理を行わずに第1パスの結果のみ出力して終わる. */
01079     /* When input segmented by recognition process in RealTimePipeLine(),
01080        we have to keep the whole current status of MFCC computation to the
01081        next call.  So here we only output the 1st pass result. */
01082     for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
01083       mfcc->param->header.samplenum = mfcc->f + 1;/* len = lastid + 1 */
01084       mfcc->param->samplenum = mfcc->f + 1;
01085     }
01086     decode_end_segmented(recog);
01087 
01088     /* この区間の param データを第2パスのために返す */
01089     /* return obtained parameter for 2nd pass */
01090     return(TRUE);
01091   }
01092 
01093   /* MFCC計算の終了処理を行う: 最後の遅延フレーム分を処理 */
01094   /* finish MFCC computation for the last delayed frames */
01095   for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
01096     if (mfcc->para->delta || mfcc->para->acc) {
01097       mfcc->valid = TRUE;
01098     } else {
01099       mfcc->valid = FALSE;
01100     }
01101   }
01102 
01103   /* loop until all data has been flushed */
01104   while (1) {
01105 
01106     /* if all mfcc became invalid, exit loop here */
01107     ok_p = FALSE;
01108     for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
01109       if (mfcc->valid) {
01110         ok_p = TRUE;
01111         break;
01112       }
01113     }
01114     if (!ok_p) break;
01115 
01116     /* try to get 1 frame for all mfcc instances */
01117     for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
01118       
01119       para = mfcc->para;
01120       
01121       if (! mfcc->valid) continue;
01122       
01123       /* check if there is data in cycle buffer of delta */
01124       ret1 = WMP_deltabuf_flush(mfcc->db);
01125 #ifdef RDEBUG
01126       printf("DeltaBufLast: ret=%d, status=", ret1);
01127       for(i=0;i<mfcc->db->len;i++) {
01128         printf("%d", mfcc->db->is_on[i]);
01129       }
01130       printf(", nextstore=%d\n", mfcc->db->store);
01131 #endif
01132       if (ret1) {
01133         /* uncomputed delta has flushed, compute it with tmpmfcc */
01134         if (para->energy && para->absesup) {
01135           memcpy(mfcc->tmpmfcc, mfcc->db->vec, sizeof(VECT) * (para->baselen - 1));
01136           memcpy(&(mfcc->tmpmfcc[para->baselen-1]), &(mfcc->db->vec[para->baselen]), sizeof(VECT) * para->baselen);
01137         } else {
01138           memcpy(mfcc->tmpmfcc, mfcc->db->vec, sizeof(VECT) * para->baselen * 2);
01139         }
01140         if (para->acc) {
01141           /* this new delta should be given to the accel cycle buffer */
01142           ret2 = WMP_deltabuf_proceed(mfcc->ab, mfcc->tmpmfcc);
01143 #ifdef RDEBUG
01144           printf("AccelBuf: ret=%d, status=", ret2);
01145           for(i=0;i<mfcc->ab->len;i++) {
01146             printf("%d", mfcc->ab->is_on[i]);
01147           }
01148           printf(", nextstore=%d\n", mfcc->ab->store);
01149 #endif
01150           if (ret2) {
01151             /* uncomputed accel was given, compute it with tmpmfcc */
01152             memcpy(mfcc->tmpmfcc, mfcc->ab->vec, sizeof(VECT) * (para->veclen - para->baselen));
01153             memcpy(&(mfcc->tmpmfcc[para->veclen - para->baselen]), &(mfcc->ab->vec[para->veclen - para->baselen]), sizeof(VECT) * para->baselen);
01154           } else {
01155             /* still no input is given: */
01156             /* in case of very short input: go on to the next input */
01157             continue;
01158           }
01159         }
01160         
01161       } else {
01162       
01163         /* no data left in the delta buffer */
01164         if (para->acc) {
01165           /* no new data, just flush the accel buffer */
01166           ret2 = WMP_deltabuf_flush(mfcc->ab);
01167 #ifdef RDEBUG
01168           printf("AccelBuf: ret=%d, status=", ret2);
01169           for(i=0;i<mfcc->ab->len;i++) {
01170             printf("%d", mfcc->ab->is_on[i]);
01171           }
01172           printf(", nextstore=%d\n", mfcc->ab->store);
01173 #endif
01174           if (ret2) {
01175             /* uncomputed data has flushed, compute it with tmpmfcc */
01176             memcpy(mfcc->tmpmfcc, mfcc->ab->vec, sizeof(VECT) * (para->veclen - para->baselen));
01177             memcpy(&(mfcc->tmpmfcc[para->veclen - para->baselen]), &(mfcc->ab->vec[para->veclen - para->baselen]), sizeof(VECT) * para->baselen);
01178           } else {
01179             /* actually no data exists in both delta and accel */
01180             mfcc->valid = FALSE; /* disactivate this instance */
01181             continue;           /* end this loop */
01182           }
01183         } else {
01184           /* only delta: input fully flushed */
01185           mfcc->valid = FALSE; /* disactivate this instance */
01186           continue;             /* end this loop */
01187         }
01188       }
01189       /* a new frame has been obtained from delta buffer to tmpmfcc */
01190       if(para->cmn) CMN_realtime(mfcc->cmn.wrk, mfcc->tmpmfcc);
01191       if (param_alloc(mfcc->param, mfcc->f + 1, mfcc->param->veclen) == FALSE) {
01192         jlog("ERROR: failed to allocate memory for incoming MFCC vectors\n");
01193         return FALSE;
01194       }
01195       /* store to mfcc->f */
01196       memcpy(mfcc->param->parvec[mfcc->f], mfcc->tmpmfcc, sizeof(VECT) * mfcc->param->veclen);
01197     }
01198 
01199     /* call recognition start callback */
01200     ok_p = FALSE;
01201     maxf = 0;
01202     for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
01203       if (!mfcc->valid) continue;
01204       if (maxf < mfcc->f) maxf = mfcc->f;
01205       if (mfcc->f == 0) {
01206         ok_p = TRUE;
01207       }
01208     }
01209 
01210     if (ok_p && maxf == 0) {
01211       /* call callback when at least one of MFCC has initial frame */
01212       if (recog->jconf->decodeopt.segment) {
01213 #ifdef BACKEND_VAD
01214           /* not exec pass1 begin callback here */
01215 #else
01216         if (!recog->process_segment) {
01217           callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog);
01218         }
01219         callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog);
01220         callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog);
01221         recog->triggered = TRUE;
01222 #endif
01223       } else {
01224         callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog);
01225         callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog);
01226         recog->triggered = TRUE;
01227       }
01228     }
01229 
01230     /* proceed for the curent frame */
01231     ret = decode_proceed(recog);
01232     if (ret == -1) {            /* error */
01233       return -1;
01234     } else if (ret == 1) {      /* segmented */
01235       /* loop out */
01236       break;
01237     } /* else no event occured */
01238 
01239 #ifdef BACKEND_VAD
01240     /* check up trigger in case of VAD segmentation */
01241     if (recog->jconf->decodeopt.segment) {
01242       if (recog->triggered == FALSE) {
01243         if (spsegment_trigger_sync(recog)) {
01244           if (!recog->process_segment) {
01245             callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog);
01246           }
01247           callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog);
01248           callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog);
01249           recog->triggered = TRUE;
01250         }
01251       }
01252     }
01253 #endif
01254 
01255     /* call frame-wise callback */
01256     callback_exec(CALLBACK_EVENT_PASS1_FRAME, recog);
01257 
01258     /* move to next */
01259     for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
01260       if (! mfcc->valid) continue;
01261       mfcc->f++;
01262       if (mfcc->f > r->maxframelen) mfcc->valid = FALSE;
01263     }
01264   }
01265 
01266   /* finalize real-time 1st pass */
01267   for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
01268     mfcc->param->header.samplenum = mfcc->f;
01269     mfcc->param->samplenum = mfcc->f;
01270   }
01271   /* 最終フレーム処理を行い,認識の結果出力と終了処理を行う */
01272   decode_end(recog);
01273 
01274   return(TRUE);
01275 }
01276 
01295 void
01296 RealTimeCMNUpdate(MFCCCalc *mfcc, Recog *recog)
01297 {
01298   float mseclen;
01299   boolean cmn_update_p;
01300   Value *para;
01301   Jconf *jconf;
01302   RecogProcess *r;
01303 
01304   jconf = recog->jconf;
01305   para = mfcc->para;
01306   
01307   /* update CMN vector for next speech */
01308   if(para->cmn) {
01309     if (mfcc->cmn.update) {
01310       cmn_update_p = TRUE;
01311       for(r=recog->process_list;r;r=r->next) {
01312         if (!r->live) continue;
01313         if (r->am->mfcc != mfcc) continue;
01314         if (r->result.status < 0) { /* input rejected */
01315           cmn_update_p = FALSE;
01316           break;
01317         }
01318       }
01319       if (cmn_update_p) {
01320         /* update last CMN parameter for next spech */
01321         CMN_realtime_update(mfcc->cmn.wrk);
01322       } else {
01323         /* do not update, because the last input is bogus */
01324         if (verbose_flag) {
01325 #ifdef BACKEND_VAD
01326           if (!recog->jconf->decodeopt.segment || recog->triggered) {
01327             jlog("STAT: skip CMN parameter update since last input was invalid\n");
01328           }
01329 #else
01330           jlog("STAT: skip CMN parameter update since last input was invalid\n");
01331 #endif
01332         }
01333       }
01334     }
01335     /* if needed, save the updated CMN parameter to a file */
01336     if (mfcc->cmn.save_filename) {
01337       if (CMN_save_to_file(mfcc->cmn.wrk, mfcc->cmn.save_filename) == FALSE) {
01338         jlog("WARNING: failed to save CMN parameter to \"%s\"\n", mfcc->cmn.save_filename);
01339       }
01340     }
01341   }
01342 }
01343 
01356 void
01357 RealTimeTerminate(Recog *recog)
01358 {
01359   MFCCCalc *mfcc;
01360 
01361   for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
01362     mfcc->param->header.samplenum = mfcc->f;
01363     mfcc->param->samplenum = mfcc->f;
01364   }
01365 
01366   /* 最終フレーム処理を行い,認識の結果出力と終了処理を行う */
01367   decode_end(recog);
01368 }
01369 
01381 void
01382 realbeam_free(Recog *recog)
01383 {
01384   RealBeam *r;
01385 
01386   r = &(recog->real);
01387 
01388   if (recog->real.window) {
01389     free(recog->real.window);
01390     recog->real.window = NULL;
01391   }
01392   if (recog->real.rest_Speech) {
01393     free(recog->real.rest_Speech);
01394     recog->real.rest_Speech = NULL;
01395   }
01396 }
01397 
01398 /* end of file */

Generated on Tue Dec 18 15:59:52 2007 for Julius by  doxygen 1.5.4