libjulius/src/pass1.c

Go to the documentation of this file.
00001 
00042 /*
00043  * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University
00044  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00045  * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
00046  * All rights reserved
00047  */
00048 
00049 #include <julius/julius.h>
00050 
00051 /********************************************************************/
00052 /* 第1パスを実行するメイン関数                                     */
00053 /* 入力をパイプライン処理する場合は realtime_1stpass.c を参照のこと */
00054 /* main function to execute 1st pass                                */
00055 /* the pipeline processing is not here: see realtime_1stpass.c      */
00056 /********************************************************************/
00057 
00111 int
00112 decode_proceed(Recog *recog)
00113 {
00114   MFCCCalc *mfcc;
00115   boolean break_flag;
00116   boolean break_decode;
00117   RecogProcess *p;
00118   boolean ok_p;
00119 #ifdef GMM_VAD
00120   GMMCalc *gmm;
00121   boolean break_gmm;
00122 #endif
00123   
00124   break_decode = FALSE;
00125 
00126   for(p = recog->process_list; p; p = p->next) {
00127     p->have_determine = FALSE;
00128     p->have_interim = FALSE;
00129   }
00130   for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00131     mfcc->segmented = FALSE;
00132   }
00133 
00134 #ifdef POWER_REJECT
00135   for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00136     if (mfcc->f == 0) {
00137       mfcc->avg_power = 0.0;
00138       if (debug2_flag) jlog("STAT: power_reject: reset\n");
00139     }
00140   }
00141 #endif
00142 
00143 
00144   if (recog->gmm != NULL) {
00145     /* GMM 計算を行う */
00146     if (recog->gmmmfcc->f == 0) {
00147       /* GMM 計算の初期化 */
00148       gmm_prepare(recog);
00149     }
00150     /* このフレームに対するGMMの尤度を計算 */
00151     gmm_proceed(recog);
00152 #ifdef GMM_VAD
00153     /* reset break flag */
00154     break_gmm = FALSE;
00155     /* Check for GMM-based VAD */
00156     gmm = recog->gc;
00157     gmm->want_rewind = FALSE;
00158     gmm_check_trigger(recog);
00159     if (gmm->after_trigger) {
00160       /* after trigger, in speech area */
00161       if (gmm->down_trigger) {
00162         /* down trigger, end segment */
00163 #ifdef GMM_VAD_DEBUG
00164         printf("GMM_VAD: %d: down trigger\n", recog->gmmmfcc->f);
00165 #endif
00166         recog->gmmmfcc->sparea_start = recog->gmmmfcc->f - recog->jconf->detect.gmm_margin;
00167         if (recog->gmmmfcc->sparea_start < 0) recog->gmmmfcc->sparea_start = 0;
00168         gmm->after_trigger = FALSE;
00169         recog->gmmmfcc->segmented = TRUE;
00170         break_gmm = TRUE;
00171       } else {
00172         /* keep recognition */
00173       }
00174     } else {
00175       /* before trigger, in noise area */
00176       if (gmm->up_trigger) {
00177         /* start recognition */
00178         /* request caller to rewind to the backstep point and
00179            re-start with normal search */
00180         if (recog->gmmmfcc->f < recog->jconf->detect.gmm_margin) {
00181           gmm->rewind_frame = 0;
00182         } else {
00183           gmm->rewind_frame = recog->gmmmfcc->f - recog->jconf->detect.gmm_margin;
00184         }
00185 #ifdef GMM_VAD_DEBUG
00186         printf("GMM_VAD: %d: up trigger, start recognition with %d frame rewind\n", recog->gmmmfcc->f, recog->gmmmfcc->f - gmm->rewind_frame);
00187 #endif
00188         gmm->want_rewind = TRUE;
00189         gmm->want_rewind_reprocess = TRUE;
00190         gmm->after_trigger = TRUE;
00191         return 0;
00192       } else {
00193         /* before trigger, noise continues */
00194 
00195         /* if noise goes more than a certain frame, shrink the noise area
00196            to avoid unlimited memory usage */
00197         if (recog->gmmmfcc->f > GMM_VAD_AUTOSHRINK_LIMIT) {
00198           gmm->want_rewind = TRUE;
00199           gmm->want_rewind_reprocess = FALSE;
00200           gmm->rewind_frame = recog->gmmmfcc->f - recog->jconf->detect.gmm_margin;
00201           if (debug2_flag) {
00202             jlog("DEBUG: GMM_VAD: pause exceeded %d, rewind\n", GMM_VAD_AUTOSHRINK_LIMIT);
00203           }
00204         }
00205 
00206         /* skip recognition processing */
00207         return 0;
00208       }
00209     }
00210 #endif /* GMM_VAD */
00211   }
00212 
00213   for(p = recog->process_list; p; p = p->next) {
00214     if (!p->live) continue;
00215     mfcc = p->am->mfcc;
00216     if (!mfcc->valid) {
00217       /* このフレームの処理をスキップ */
00218       /* skip processing the frame */
00219       continue;
00220     }
00221 
00222     /* mfcc-f のフレームについて認識処理(フレーム同期ビーム探索)を進める */
00223     /* proceed beam search for mfcc->f */
00224     if (mfcc->f == 0) {
00225       /* 最初のフレーム: 探索処理を初期化 */
00226       /* initial frame: initialize search process */
00227       if (get_back_trellis_init(mfcc->param, p) == FALSE) {
00228         jlog("ERROR: %02d %s: failed to initialize the 1st pass\n", p->config->id, p->config->name);
00229         return -1;
00230       }
00231     }
00232     if (mfcc->f > 0 || p->am->hmminfo->multipath) {
00233       /* 1フレーム探索を進める */
00234       /* proceed search for 1 frame */
00235       if (get_back_trellis_proceed(mfcc->f, mfcc->param, p, FALSE) == FALSE) {
00236         mfcc->segmented = TRUE;
00237         break_decode = TRUE;
00238       }
00239       if (p->config->successive.enabled) {
00240         if (detect_end_of_segment(p, mfcc->f - 1)) {
00241           /* セグメント終了検知: 第1パスここで中断 */
00242           mfcc->segmented = TRUE;
00243           break_decode = TRUE;
00244         }
00245       }
00246     }
00247   }
00248 
00249   /* セグメントすべきかどうか最終的な判定を行う.
00250      デコーダベースVADあるいは spsegment の場合,複数インスタンス間で OR
00251      を取る.また,GMMなど複数基準がある場合は基準間で AND を取る.*/
00252   /* determine whether to segment at here
00253      If multiple segmenter exists, take their AND */
00254   break_flag = FALSE;
00255   if (break_decode
00256 #ifdef GMM_VAD
00257       && (recog->gmm != NULL && break_gmm)
00258 #endif
00259       ) {
00260     break_flag = TRUE;
00261   }
00262 
00263   if (break_flag) {
00264     /* 探索処理の終了が発生したのでここで認識を終える. 
00265        最初のフレームから [f-1] 番目までが認識されたことになる
00266     */
00267     /* the recognition process tells us to stop recognition, so
00268        recognition should be terminated here.
00269        the recognized data are [0..f-1] */
00270 
00271     /* 最終フレームを last_time にセット */
00272     /* set the last frame to last_time */
00273     for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00274       mfcc->last_time = mfcc->f - 1;
00275     }
00276 
00277     if (! recog->jconf->decodeopt.segment) {
00278       /* ショートポーズ以外で切れた場合,残りのサンプルは認識せずに捨てる */
00279       /* drop rest inputs if segmented by error */
00280       for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00281         mfcc->param->header.samplenum = mfcc->f;
00282         mfcc->param->samplenum = mfcc->f;
00283       }
00284     }
00285 
00286     return 1;
00287   }
00288 
00289   /* call frame-wise callback for the processing results if any */
00290   ok_p = FALSE;
00291   for(p=recog->process_list;p;p=p->next) {
00292     if (!p->live) continue;
00293     if (p->have_determine) {
00294       ok_p = TRUE;
00295     }
00296   }
00297   if (ok_p) callback_exec(CALLBACK_RESULT_PASS1_DETERMINED, recog);
00298   ok_p = FALSE;
00299   for(p=recog->process_list;p;p=p->next) {
00300     if (!p->live) continue;
00301     if (p->have_interim) {
00302       ok_p = TRUE;
00303     }
00304   }
00305   if (ok_p) callback_exec(CALLBACK_RESULT_PASS1_INTERIM, recog);
00306   
00307   return 0;
00308 }
00309 
00310 #ifdef POWER_REJECT
00311 boolean
00312 power_reject(Recog *recog)
00313 {
00314   MFCCCalc *mfcc;
00315 
00316   for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00317     /* skip if not realtime and raw file processing */
00318     if (mfcc->avg_power == 0.0) continue;
00319     if (debug2_flag) jlog("STAT: power_reject: MFCC%02d: avg_power = %f\n", mfcc->id, mfcc->avg_power / mfcc->param->samplenum);
00320     if (mfcc->avg_power / mfcc->param->samplenum < recog->jconf->reject.powerthres) return TRUE;
00321   }
00322   return FALSE;
00323 }
00324 #endif
00325 
00362 void
00363 decode_end_segmented(Recog *recog)
00364 {
00365   boolean ok_p;
00366   int mseclen;
00367   RecogProcess *p;
00368   int last_status;
00369 
00370   /* rejectshort 指定時, 入力が短ければここで第1パス結果を出力しない */
00371   /* suppress 1st pass output if -rejectshort and input shorter than specified */
00372   ok_p = TRUE;
00373   if (recog->jconf->reject.rejectshortlen > 0) {
00374     mseclen = (float)recog->mfcclist->last_time * (float)recog->jconf->input.period * (float)recog->jconf->input.frameshift / 10000.0;
00375     if (mseclen < recog->jconf->reject.rejectshortlen) {
00376       last_status = J_RESULT_STATUS_REJECT_SHORT;
00377       ok_p = FALSE;
00378     }
00379   }
00380 
00381 #ifdef POWER_REJECT
00382   if (ok_p) {
00383     if (power_reject(recog)) {
00384       last_status = J_RESULT_STATUS_REJECT_POWER;
00385       ok_p = FALSE;
00386     }
00387   }
00388 #endif
00389 
00390   if (ok_p) {
00391     for(p=recog->process_list;p;p=p->next) {
00392       if (!p->live) continue;
00393       finalize_1st_pass(p, p->am->mfcc->last_time);
00394     }
00395   } else {
00396     for(p=recog->process_list;p;p=p->next) {
00397       if (!p->live) continue;
00398       p->result.status = last_status;
00399     }
00400   }
00401   if (recog->jconf->decodeopt.segment) {
00402     finalize_segment(recog);
00403   }
00404   if (recog->gmm != NULL) {
00405     /* GMM 計算の終了 */
00406     gmm_end(recog);
00407   }
00408 }
00409 
00439 void
00440 decode_end(Recog *recog)
00441 {
00442   MFCCCalc *mfcc;
00443   int mseclen;
00444   boolean ok_p;
00445   RecogProcess *p;
00446   int last_status;
00447 
00448   for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00449     mfcc->segmented = FALSE;
00450   }
00451   if (recog->gmm != NULL) {
00452     /* GMM 計算の終了 */
00453     gmm_end(recog);
00454   }
00455 
00456 #ifdef GMM_VAD
00457   /* もしトリガがかからないまま入力終了に達したのなら,そのままエラー終了 */
00458   if (recog->jconf->decodeopt.segment) {
00459     if (recog->gmm) {
00460       if (recog->gc->after_trigger == FALSE) {
00461         for(p=recog->process_list;p;p=p->next) {
00462           p->result.status = J_RESULT_STATUS_ONLY_SILENCE;      /* reject by decoding */
00463         }
00464         /* ショートポーズセグメンテーションの場合,
00465            入力パラメータ分割などの最終処理も行なう */
00466         /* When short-pause segmentation enabled */
00467         finalize_segment(recog);
00468         return;
00469       }
00470     }
00471   }
00472 #endif
00473 
00474   /* 第1パスの最後のフレームの認識処理を行う */
00475   /* finalize 1st pass */
00476   for(p=recog->process_list;p;p=p->next) {
00477     if (!p->live) continue;
00478 #ifdef SPSEGMENT_NAIST
00479     if (recog->jconf->decodeopt.segment) {
00480       if (p->pass1.after_trigger == FALSE) continue;
00481     }
00482 #endif
00483     mfcc = p->am->mfcc;
00484     if (mfcc->f > 0) {
00485       get_back_trellis_end(mfcc->param, p);
00486     }
00487   }
00488 
00489   /* 終了処理 */
00490   for(p=recog->process_list;p;p=p->next) {
00491     if (!p->live) continue;
00492 
00493     ok_p = TRUE;
00494 
00495     /* check rejection by no input */
00496     if (ok_p) {
00497       mfcc = p->am->mfcc;
00498       /* 入力長がデルタの計算に十分でない場合,入力無しとする. */
00499       /* if input is short for compute all the delta coeff., terminate here */
00500       if (mfcc->f == 0) {
00501         jlog("STAT: no input frame\n");
00502         last_status = J_RESULT_STATUS_FAIL;
00503         ok_p = FALSE;
00504       }
00505     }
00506 
00507     /* check rejection by input length */
00508     if (ok_p) {
00509       if (recog->jconf->reject.rejectshortlen > 0) {
00510         mseclen = (float)mfcc->param->samplenum * (float)recog->jconf->input.period * (float)recog->jconf->input.frameshift / 10000.0;
00511         if (mseclen < recog->jconf->reject.rejectshortlen) {
00512           last_status = J_RESULT_STATUS_REJECT_SHORT;
00513           ok_p = FALSE;
00514         }
00515       }
00516     }
00517 
00518 #ifdef POWER_REJECT
00519     /* check rejection by average power */
00520     if (ok_p) {
00521       if (power_reject(recog)) {
00522         last_status = J_RESULT_STATUS_REJECT_POWER;
00523         ok_p = FALSE;
00524       }
00525     }
00526 #endif
00527 
00528 #ifdef SPSEGMENT_NAIST
00529     /* check rejection non-triggered input segment */
00530     if (ok_p) {
00531       if (recog->jconf->decodeopt.segment) {
00532         if (p->pass1.after_trigger == FALSE) {
00533           last_status = J_RESULT_STATUS_ONLY_SILENCE;   /* reject by decoding */
00534           ok_p = FALSE;
00535         }
00536       }
00537     }
00538 #endif
00539 
00540     if (ok_p) {
00541       /* valid input segment, finalize it */
00542       finalize_1st_pass(p, mfcc->param->samplenum);
00543     } else {
00544       /* invalid input segment */
00545       p->result.status = last_status;
00546     }
00547   }
00548   if (recog->jconf->decodeopt.segment) {
00549     /* ショートポーズセグメンテーションの場合,
00550        入力パラメータ分割などの最終処理も行なう */
00551     /* When short-pause segmentation enabled */
00552     finalize_segment(recog);
00553   }
00554 }
00555 
00556 
00590 boolean
00591 get_back_trellis(Recog *recog)
00592 {
00593   boolean ok_p;
00594   MFCCCalc *mfcc;
00595   int rewind_frame;
00596   PROCESS_AM *am;
00597   RecogProcess *p;
00598   boolean reprocess;
00599   boolean started;
00600 
00601   /* initialize mfcc instances */
00602   for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
00603     /* mark all as valid, since all frames are fully prepared beforehand */
00604     if (mfcc->param->samplenum == 0) mfcc->valid = FALSE;
00605     else mfcc->valid = TRUE;
00606     /* set frame pointers to 0 */
00607     mfcc->f = 0;
00608   }
00609 
00610   /* callback of process start */
00611 #ifdef BACKEND_VAD
00612   if (recog->jconf->decodeopt.segment) {
00613     /* at first time, recognition does not start yet */
00614     /* reset segmentation flags */
00615     spsegment_init(recog);
00616   } else {
00617     /* execute callback for pass1 begin here */
00618     callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog);
00619     callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog);
00620     recog->triggered = TRUE;
00621   }
00622 #else
00623   if (recog->jconf->decodeopt.segment) {
00624     if (!recog->process_segment) {
00625       callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog);
00626     }
00627     callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog);
00628   } else {
00629     callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog);
00630   }
00631   callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog);
00632   recog->triggered = TRUE;
00633 #endif
00634 
00635   while(1) {
00636     ok_p = TRUE;
00637     for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00638       if (! mfcc->valid) continue;
00639       if (mfcc->f < mfcc->param->samplenum) {
00640         mfcc->valid = TRUE;
00641         ok_p = FALSE;
00642       } else {
00643         mfcc->valid = FALSE;
00644       }
00645     }
00646     if (ok_p) {
00647       /* すべての MFCC が終わりに達したのでループ終了 */
00648       /* all MFCC has been processed, end of loop  */
00649       break;
00650     }
00651 
00652     switch (decode_proceed(recog)) {
00653     case -1: /* error */
00654       return FALSE;
00655       break;
00656     case 0:                     /* success */
00657       break;
00658     case 1:                     /* segmented */
00659       /* 探索中断: 処理された入力は 0 から t-2 まで */
00660       /* search terminated: processed input = [0..t-2] */
00661       /* この時点で第1パスを終了する */
00662       /* end the 1st pass at this point */
00663       decode_end_segmented(recog);
00664       /* terminate 1st pass here */
00665       return TRUE;
00666     }
00667 
00668 #ifdef BACKEND_VAD
00669     /* check up trigger in case of VAD segmentation */
00670     if (recog->jconf->decodeopt.segment) {
00671       if (recog->triggered == FALSE) {
00672         if (spsegment_trigger_sync(recog)) {
00673           if (!recog->process_segment) {
00674             callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog);
00675           }
00676           callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog);
00677           callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog);
00678           recog->triggered = TRUE;
00679         }
00680       }
00681     }
00682 #endif
00683 
00684     if (spsegment_need_restart(recog, &rewind_frame, &reprocess) == TRUE) {
00685       /* do rewind for all mfcc here */
00686       spsegment_restart_mfccs(recog, rewind_frame, reprocess);
00687       /* reset outprob cache for all AM */
00688       for(am=recog->amlist;am;am=am->next) {
00689         outprob_prepare(&(am->hmmwrk), am->mfcc->param->samplenum);
00690       }
00691     }
00692     /* call frame-wise callback */
00693     callback_exec(CALLBACK_EVENT_PASS1_FRAME, recog);
00694 
00695     /* 1フレーム処理が進んだのでポインタを進める */
00696     /* proceed frame pointer */
00697     for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00698       if (!mfcc->valid) continue;
00699       mfcc->f++;
00700     }
00701 
00702     if (recog->process_want_terminate) {
00703       /* termination requested */
00704       decode_end_segmented(recog);
00705       return TRUE;
00706     }
00707   }
00708 
00709   /* 最終フレーム処理を行い,認識の結果出力と終了処理を行う */
00710   decode_end(recog);
00711 
00712   return TRUE;
00713 }
00714 
00715 /* end of file */

Generated on Tue Dec 18 15:59:52 2007 for Julius by  doxygen 1.5.4