00001
00042
00043
00044
00045
00046
00047
00048
00049 #include <julius/julius.h>
00050
00051
00052
00053
00054
00055
00056
00057
00111 int
00112 decode_proceed(Recog *recog)
00113 {
00114 MFCCCalc *mfcc;
00115 boolean break_flag;
00116 boolean break_decode;
00117 RecogProcess *p;
00118 boolean ok_p;
00119 #ifdef GMM_VAD
00120 GMMCalc *gmm;
00121 boolean break_gmm;
00122 #endif
00123
00124 break_decode = FALSE;
00125
00126 for(p = recog->process_list; p; p = p->next) {
00127 #ifdef DETERMINE
00128 p->have_determine = FALSE;
00129 #endif
00130 p->have_interim = FALSE;
00131 }
00132 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00133 mfcc->segmented = FALSE;
00134 }
00135
00136 #ifdef POWER_REJECT
00137 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00138 if (!mfcc->valid) continue;
00139 if (mfcc->f == 0) {
00140 mfcc->avg_power = 0.0;
00141 if (debug2_flag) jlog("STAT: power_reject: reset\n");
00142 }
00143 }
00144 #endif
00145
00146
00147 #ifdef GMM_VAD
00148 if (recog->gmm != NULL) {
00149
00150 break_gmm = FALSE;
00151 recog->gc->want_rewind = FALSE;
00152 }
00153 #endif
00154 if (recog->gmm != NULL && recog->gmmmfcc->valid) {
00155
00156 if (recog->gmmmfcc->f == 0) {
00157
00158 gmm_prepare(recog);
00159 }
00160
00161 gmm_proceed(recog);
00162 #ifdef GMM_VAD
00163
00164 gmm = recog->gc;
00165 gmm_check_trigger(recog);
00166 if (gmm->after_trigger) {
00167
00168 if (gmm->down_trigger) {
00169
00170 #ifdef GMM_VAD_DEBUG
00171 printf("GMM_VAD: %d: down trigger\n", recog->gmmmfcc->f);
00172 #endif
00173 recog->gmmmfcc->sparea_start = recog->gmmmfcc->f + 1 - recog->jconf->detect.gmm_margin;
00174 if (recog->gmmmfcc->sparea_start < 0) recog->gmmmfcc->sparea_start = 0;
00175 gmm->after_trigger = FALSE;
00176 recog->gmmmfcc->segmented = TRUE;
00177 break_gmm = TRUE;
00178 } else {
00179
00180 }
00181 } else {
00182
00183 if (gmm->up_trigger) {
00184
00185
00186
00187 if (recog->gmmmfcc->f + 1 < recog->jconf->detect.gmm_margin) {
00188 gmm->rewind_frame = 0;
00189 } else {
00190 gmm->rewind_frame = recog->gmmmfcc->f + 1 - recog->jconf->detect.gmm_margin;
00191 }
00192 #ifdef GMM_VAD_DEBUG
00193 printf("GMM_VAD: %d: up trigger, start recognition with %d frame rewind\n", recog->gmmmfcc->f, recog->gmmmfcc->f - gmm->rewind_frame);
00194 #endif
00195 gmm->want_rewind = TRUE;
00196 gmm->want_rewind_reprocess = TRUE;
00197 gmm->after_trigger = TRUE;
00198 return 0;
00199 } else {
00200
00201
00202
00203
00204 if (recog->gmmmfcc->f + 1 > GMM_VAD_AUTOSHRINK_LIMIT) {
00205 gmm->want_rewind = TRUE;
00206 gmm->want_rewind_reprocess = FALSE;
00207 gmm->rewind_frame = recog->gmmmfcc->f + 1 - recog->jconf->detect.gmm_margin;
00208 if (debug2_flag) {
00209 jlog("DEBUG: GMM_VAD: pause exceeded %d, rewind\n", GMM_VAD_AUTOSHRINK_LIMIT);
00210 }
00211 }
00212
00213
00214 return 0;
00215 }
00216 }
00217 #endif
00218 }
00219
00220 for(p = recog->process_list; p; p = p->next) {
00221 if (!p->live) continue;
00222 mfcc = p->am->mfcc;
00223 if (!mfcc->valid) {
00224
00225
00226 continue;
00227 }
00228
00229
00230
00231 if (mfcc->f == 0) {
00232
00233
00234 if (get_back_trellis_init(mfcc->param, p) == FALSE) {
00235 jlog("ERROR: %02d %s: failed to initialize the 1st pass\n", p->config->id, p->config->name);
00236 return -1;
00237 }
00238 }
00239 if (mfcc->f > 0 || p->am->hmminfo->multipath) {
00240
00241
00242 if (get_back_trellis_proceed(mfcc->f, mfcc->param, p, FALSE) == FALSE) {
00243 mfcc->segmented = TRUE;
00244 break_decode = TRUE;
00245 }
00246 if (p->config->successive.enabled) {
00247 if (detect_end_of_segment(p, mfcc->f - 1)) {
00248
00249 mfcc->segmented = TRUE;
00250 break_decode = TRUE;
00251 }
00252 }
00253 }
00254 }
00255
00256
00257
00258
00259
00260
00261 break_flag = FALSE;
00262 if (break_decode
00263 #ifdef GMM_VAD
00264 || (recog->gmm != NULL && break_gmm)
00265 #endif
00266 ) {
00267 break_flag = TRUE;
00268 }
00269
00270 if (break_flag) {
00271
00272
00273
00274
00275
00276
00277
00278
00279
00280 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00281 mfcc->last_time = mfcc->f - 1;
00282 }
00283
00284 if (! recog->jconf->decodeopt.segment) {
00285
00286
00287 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00288 mfcc->param->header.samplenum = mfcc->f;
00289 mfcc->param->samplenum = mfcc->f;
00290 }
00291 }
00292
00293 return 1;
00294 }
00295
00296
00297 #ifdef DETERMINE
00298 ok_p = FALSE;
00299 for(p=recog->process_list;p;p=p->next) {
00300 if (!p->live) continue;
00301 if (p->have_determine) {
00302 ok_p = TRUE;
00303 }
00304 }
00305 if (ok_p) callback_exec(CALLBACK_RESULT_PASS1_DETERMINED, recog);
00306 #endif
00307 ok_p = FALSE;
00308 for(p=recog->process_list;p;p=p->next) {
00309 if (!p->live) continue;
00310 if (p->have_interim) {
00311 ok_p = TRUE;
00312 }
00313 }
00314 if (ok_p) callback_exec(CALLBACK_RESULT_PASS1_INTERIM, recog);
00315
00316 return 0;
00317 }
00318
00319 #ifdef POWER_REJECT
00320 boolean
00321 power_reject(Recog *recog)
00322 {
00323 MFCCCalc *mfcc;
00324
00325 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00326
00327 if (mfcc->avg_power == 0.0) continue;
00328 if (debug2_flag) jlog("STAT: power_reject: MFCC%02d: avg_power = %f\n", mfcc->id, mfcc->avg_power / mfcc->param->samplenum);
00329 if (mfcc->avg_power / mfcc->param->samplenum < recog->jconf->reject.powerthres) return TRUE;
00330 }
00331 return FALSE;
00332 }
00333 #endif
00334
00371 void
00372 decode_end_segmented(Recog *recog)
00373 {
00374 boolean ok_p;
00375 int mseclen;
00376 RecogProcess *p;
00377 int last_status;
00378
00379
00380
00381 ok_p = TRUE;
00382 if (recog->jconf->reject.rejectshortlen > 0) {
00383 mseclen = (float)recog->mfcclist->last_time * (float)recog->jconf->input.period * (float)recog->jconf->input.frameshift / 10000.0;
00384 if (mseclen < recog->jconf->reject.rejectshortlen) {
00385 last_status = J_RESULT_STATUS_REJECT_SHORT;
00386 ok_p = FALSE;
00387 }
00388 }
00389
00390 #ifdef POWER_REJECT
00391 if (ok_p) {
00392 if (power_reject(recog)) {
00393 last_status = J_RESULT_STATUS_REJECT_POWER;
00394 ok_p = FALSE;
00395 }
00396 }
00397 #endif
00398
00399 if (ok_p) {
00400 for(p=recog->process_list;p;p=p->next) {
00401 if (!p->live) continue;
00402 finalize_1st_pass(p, p->am->mfcc->last_time);
00403 }
00404 } else {
00405 for(p=recog->process_list;p;p=p->next) {
00406 if (!p->live) continue;
00407 p->result.status = last_status;
00408 }
00409 }
00410 if (recog->jconf->decodeopt.segment) {
00411 finalize_segment(recog);
00412 }
00413
00414 if (recog->gmm != NULL) {
00415
00416 gmm_end(recog);
00417 }
00418 }
00419
00449 void
00450 decode_end(Recog *recog)
00451 {
00452 MFCCCalc *mfcc;
00453 int mseclen;
00454 boolean ok_p;
00455 RecogProcess *p;
00456 int last_status;
00457
00458 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00459 mfcc->segmented = FALSE;
00460 }
00461
00462 if (recog->gmm != NULL) {
00463
00464 gmm_end(recog);
00465 }
00466
00467 #ifdef GMM_VAD
00468
00469 if (recog->jconf->decodeopt.segment) {
00470 if (recog->gmm) {
00471 if (recog->gc->after_trigger == FALSE) {
00472 for(p=recog->process_list;p;p=p->next) {
00473 p->result.status = J_RESULT_STATUS_ONLY_SILENCE;
00474 }
00475
00476
00477
00478 finalize_segment(recog);
00479 return;
00480 }
00481 }
00482 }
00483 #endif
00484
00485
00486
00487 for(p=recog->process_list;p;p=p->next) {
00488 if (!p->live) continue;
00489 #ifdef SPSEGMENT_NAIST
00490 if (recog->jconf->decodeopt.segment) {
00491 if (p->pass1.after_trigger == FALSE) continue;
00492 }
00493 #endif
00494 mfcc = p->am->mfcc;
00495 if (mfcc->f > 0) {
00496 get_back_trellis_end(mfcc->param, p);
00497 }
00498 }
00499
00500
00501 for(p=recog->process_list;p;p=p->next) {
00502 if (!p->live) continue;
00503
00504 ok_p = TRUE;
00505
00506
00507 if (ok_p) {
00508 mfcc = p->am->mfcc;
00509
00510
00511 if (mfcc->f == 0) {
00512 jlog("STAT: no input frame\n");
00513 last_status = J_RESULT_STATUS_FAIL;
00514 ok_p = FALSE;
00515 }
00516 }
00517
00518
00519 if (ok_p) {
00520 if (recog->jconf->reject.rejectshortlen > 0) {
00521 mseclen = (float)mfcc->param->samplenum * (float)recog->jconf->input.period * (float)recog->jconf->input.frameshift / 10000.0;
00522 if (mseclen < recog->jconf->reject.rejectshortlen) {
00523 last_status = J_RESULT_STATUS_REJECT_SHORT;
00524 ok_p = FALSE;
00525 }
00526 }
00527 }
00528
00529 #ifdef POWER_REJECT
00530
00531 if (ok_p) {
00532 if (power_reject(recog)) {
00533 last_status = J_RESULT_STATUS_REJECT_POWER;
00534 ok_p = FALSE;
00535 }
00536 }
00537 #endif
00538
00539 #ifdef SPSEGMENT_NAIST
00540
00541 if (ok_p) {
00542 if (recog->jconf->decodeopt.segment) {
00543 if (p->pass1.after_trigger == FALSE) {
00544 last_status = J_RESULT_STATUS_ONLY_SILENCE;
00545 ok_p = FALSE;
00546 }
00547 }
00548 }
00549 #endif
00550
00551 if (ok_p) {
00552
00553 finalize_1st_pass(p, mfcc->param->samplenum);
00554 } else {
00555
00556 p->result.status = last_status;
00557 }
00558 }
00559 if (recog->jconf->decodeopt.segment) {
00560
00561
00562
00563 finalize_segment(recog);
00564 }
00565 }
00566
00567
00601 boolean
00602 get_back_trellis(Recog *recog)
00603 {
00604 boolean ok_p;
00605 MFCCCalc *mfcc;
00606 int rewind_frame;
00607 PROCESS_AM *am;
00608 boolean reprocess;
00609
00610
00611 for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
00612
00613 if (mfcc->param->samplenum == 0) mfcc->valid = FALSE;
00614 else mfcc->valid = TRUE;
00615
00616 mfcc->f = 0;
00617 }
00618
00619
00620 #ifdef BACKEND_VAD
00621 if (recog->jconf->decodeopt.segment) {
00622
00623
00624 spsegment_init(recog);
00625 } else {
00626
00627 callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog);
00628 callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog);
00629 recog->triggered = TRUE;
00630 }
00631 #else
00632 if (recog->jconf->decodeopt.segment) {
00633 if (!recog->process_segment) {
00634 callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog);
00635 }
00636 callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog);
00637 } else {
00638 callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog);
00639 }
00640 callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog);
00641 recog->triggered = TRUE;
00642 #endif
00643
00644 while(1) {
00645 ok_p = TRUE;
00646 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00647 if (! mfcc->valid) continue;
00648 if (mfcc->f < mfcc->param->samplenum) {
00649 mfcc->valid = TRUE;
00650 ok_p = FALSE;
00651 } else {
00652 mfcc->valid = FALSE;
00653 }
00654 }
00655 if (ok_p) {
00656
00657
00658 break;
00659 }
00660
00661 switch (decode_proceed(recog)) {
00662 case -1:
00663 return FALSE;
00664 break;
00665 case 0:
00666 break;
00667 case 1:
00668
00669
00670
00671
00672 decode_end_segmented(recog);
00673
00674 return TRUE;
00675 }
00676
00677 #ifdef BACKEND_VAD
00678
00679 if (recog->jconf->decodeopt.segment) {
00680 if (recog->triggered == FALSE) {
00681 if (spsegment_trigger_sync(recog)) {
00682 if (!recog->process_segment) {
00683 callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog);
00684 }
00685 callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog);
00686 callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog);
00687 recog->triggered = TRUE;
00688 }
00689 }
00690 }
00691 #endif
00692
00693 if (spsegment_need_restart(recog, &rewind_frame, &reprocess) == TRUE) {
00694
00695 spsegment_restart_mfccs(recog, rewind_frame, reprocess);
00696
00697 for(am=recog->amlist;am;am=am->next) {
00698 outprob_prepare(&(am->hmmwrk), am->mfcc->param->samplenum);
00699 }
00700 }
00701
00702 callback_exec(CALLBACK_EVENT_PASS1_FRAME, recog);
00703
00704
00705
00706 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00707 if (!mfcc->valid) continue;
00708 mfcc->f++;
00709 }
00710
00711 if (recog->process_want_terminate) {
00712
00713 decode_end_segmented(recog);
00714 return TRUE;
00715 }
00716 }
00717
00718
00719 decode_end(recog);
00720
00721 return TRUE;
00722 }
00723
00724