00001
00042
00043
00044
00045
00046
00047
00048
00049 #include <julius/julius.h>
00050
00051
00052
00053
00054
00055
00056
00057
00111 int
00112 decode_proceed(Recog *recog)
00113 {
00114 MFCCCalc *mfcc;
00115 boolean break_flag;
00116 boolean break_decode;
00117 RecogProcess *p;
00118 boolean ok_p;
00119 #ifdef GMM_VAD
00120 GMMCalc *gmm;
00121 boolean break_gmm;
00122 #endif
00123
00124 break_decode = FALSE;
00125
00126 for(p = recog->process_list; p; p = p->next) {
00127 p->have_determine = FALSE;
00128 p->have_interim = FALSE;
00129 }
00130 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00131 mfcc->segmented = FALSE;
00132 }
00133
00134 #ifdef POWER_REJECT
00135 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00136 if (mfcc->f == 0) {
00137 mfcc->avg_power = 0.0;
00138 if (debug2_flag) jlog("STAT: power_reject: reset\n");
00139 }
00140 }
00141 #endif
00142
00143
00144 if (recog->gmm != NULL) {
00145
00146 if (recog->gmmmfcc->f == 0) {
00147
00148 gmm_prepare(recog);
00149 }
00150
00151 gmm_proceed(recog);
00152 #ifdef GMM_VAD
00153
00154 break_gmm = FALSE;
00155
00156 gmm = recog->gc;
00157 gmm->want_rewind = FALSE;
00158 gmm_check_trigger(recog);
00159 if (gmm->after_trigger) {
00160
00161 if (gmm->down_trigger) {
00162
00163 #ifdef GMM_VAD_DEBUG
00164 printf("GMM_VAD: %d: down trigger\n", recog->gmmmfcc->f);
00165 #endif
00166 recog->gmmmfcc->sparea_start = recog->gmmmfcc->f - recog->jconf->detect.gmm_margin;
00167 if (recog->gmmmfcc->sparea_start < 0) recog->gmmmfcc->sparea_start = 0;
00168 gmm->after_trigger = FALSE;
00169 recog->gmmmfcc->segmented = TRUE;
00170 break_gmm = TRUE;
00171 } else {
00172
00173 }
00174 } else {
00175
00176 if (gmm->up_trigger) {
00177
00178
00179
00180 if (recog->gmmmfcc->f < recog->jconf->detect.gmm_margin) {
00181 gmm->rewind_frame = 0;
00182 } else {
00183 gmm->rewind_frame = recog->gmmmfcc->f - recog->jconf->detect.gmm_margin;
00184 }
00185 #ifdef GMM_VAD_DEBUG
00186 printf("GMM_VAD: %d: up trigger, start recognition with %d frame rewind\n", recog->gmmmfcc->f, recog->gmmmfcc->f - gmm->rewind_frame);
00187 #endif
00188 gmm->want_rewind = TRUE;
00189 gmm->want_rewind_reprocess = TRUE;
00190 gmm->after_trigger = TRUE;
00191 return 0;
00192 } else {
00193
00194
00195
00196
00197 if (recog->gmmmfcc->f > GMM_VAD_AUTOSHRINK_LIMIT) {
00198 gmm->want_rewind = TRUE;
00199 gmm->want_rewind_reprocess = FALSE;
00200 gmm->rewind_frame = recog->gmmmfcc->f - recog->jconf->detect.gmm_margin;
00201 if (debug2_flag) {
00202 jlog("DEBUG: GMM_VAD: pause exceeded %d, rewind\n", GMM_VAD_AUTOSHRINK_LIMIT);
00203 }
00204 }
00205
00206
00207 return 0;
00208 }
00209 }
00210 #endif
00211 }
00212
00213 for(p = recog->process_list; p; p = p->next) {
00214 if (!p->live) continue;
00215 mfcc = p->am->mfcc;
00216 if (!mfcc->valid) {
00217
00218
00219 continue;
00220 }
00221
00222
00223
00224 if (mfcc->f == 0) {
00225
00226
00227 if (get_back_trellis_init(mfcc->param, p) == FALSE) {
00228 jlog("ERROR: %02d %s: failed to initialize the 1st pass\n", p->config->id, p->config->name);
00229 return -1;
00230 }
00231 }
00232 if (mfcc->f > 0 || p->am->hmminfo->multipath) {
00233
00234
00235 if (get_back_trellis_proceed(mfcc->f, mfcc->param, p, FALSE) == FALSE) {
00236 mfcc->segmented = TRUE;
00237 break_decode = TRUE;
00238 }
00239 if (p->config->successive.enabled) {
00240 if (detect_end_of_segment(p, mfcc->f - 1)) {
00241
00242 mfcc->segmented = TRUE;
00243 break_decode = TRUE;
00244 }
00245 }
00246 }
00247 }
00248
00249
00250
00251
00252
00253
00254 break_flag = FALSE;
00255 if (break_decode
00256 #ifdef GMM_VAD
00257 && (recog->gmm != NULL && break_gmm)
00258 #endif
00259 ) {
00260 break_flag = TRUE;
00261 }
00262
00263 if (break_flag) {
00264
00265
00266
00267
00268
00269
00270
00271
00272
00273 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00274 mfcc->last_time = mfcc->f - 1;
00275 }
00276
00277 if (! recog->jconf->decodeopt.segment) {
00278
00279
00280 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00281 mfcc->param->header.samplenum = mfcc->f;
00282 mfcc->param->samplenum = mfcc->f;
00283 }
00284 }
00285
00286 return 1;
00287 }
00288
00289
00290 ok_p = FALSE;
00291 for(p=recog->process_list;p;p=p->next) {
00292 if (!p->live) continue;
00293 if (p->have_determine) {
00294 ok_p = TRUE;
00295 }
00296 }
00297 if (ok_p) callback_exec(CALLBACK_RESULT_PASS1_DETERMINED, recog);
00298 ok_p = FALSE;
00299 for(p=recog->process_list;p;p=p->next) {
00300 if (!p->live) continue;
00301 if (p->have_interim) {
00302 ok_p = TRUE;
00303 }
00304 }
00305 if (ok_p) callback_exec(CALLBACK_RESULT_PASS1_INTERIM, recog);
00306
00307 return 0;
00308 }
00309
00310 #ifdef POWER_REJECT
00311 boolean
00312 power_reject(Recog *recog)
00313 {
00314 MFCCCalc *mfcc;
00315
00316 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00317
00318 if (mfcc->avg_power == 0.0) continue;
00319 if (debug2_flag) jlog("STAT: power_reject: MFCC%02d: avg_power = %f\n", mfcc->id, mfcc->avg_power / mfcc->param->samplenum);
00320 if (mfcc->avg_power / mfcc->param->samplenum < recog->jconf->reject.powerthres) return TRUE;
00321 }
00322 return FALSE;
00323 }
00324 #endif
00325
00362 void
00363 decode_end_segmented(Recog *recog)
00364 {
00365 boolean ok_p;
00366 int mseclen;
00367 RecogProcess *p;
00368 int last_status;
00369
00370
00371
00372 ok_p = TRUE;
00373 if (recog->jconf->reject.rejectshortlen > 0) {
00374 mseclen = (float)recog->mfcclist->last_time * (float)recog->jconf->input.period * (float)recog->jconf->input.frameshift / 10000.0;
00375 if (mseclen < recog->jconf->reject.rejectshortlen) {
00376 last_status = J_RESULT_STATUS_REJECT_SHORT;
00377 ok_p = FALSE;
00378 }
00379 }
00380
00381 #ifdef POWER_REJECT
00382 if (ok_p) {
00383 if (power_reject(recog)) {
00384 last_status = J_RESULT_STATUS_REJECT_POWER;
00385 ok_p = FALSE;
00386 }
00387 }
00388 #endif
00389
00390 if (ok_p) {
00391 for(p=recog->process_list;p;p=p->next) {
00392 if (!p->live) continue;
00393 finalize_1st_pass(p, p->am->mfcc->last_time);
00394 }
00395 } else {
00396 for(p=recog->process_list;p;p=p->next) {
00397 if (!p->live) continue;
00398 p->result.status = last_status;
00399 }
00400 }
00401 if (recog->jconf->decodeopt.segment) {
00402 finalize_segment(recog);
00403 }
00404 if (recog->gmm != NULL) {
00405
00406 gmm_end(recog);
00407 }
00408 }
00409
00439 void
00440 decode_end(Recog *recog)
00441 {
00442 MFCCCalc *mfcc;
00443 int mseclen;
00444 boolean ok_p;
00445 RecogProcess *p;
00446 int last_status;
00447
00448 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00449 mfcc->segmented = FALSE;
00450 }
00451 if (recog->gmm != NULL) {
00452
00453 gmm_end(recog);
00454 }
00455
00456 #ifdef GMM_VAD
00457
00458 if (recog->jconf->decodeopt.segment) {
00459 if (recog->gmm) {
00460 if (recog->gc->after_trigger == FALSE) {
00461 for(p=recog->process_list;p;p=p->next) {
00462 p->result.status = J_RESULT_STATUS_ONLY_SILENCE;
00463 }
00464
00465
00466
00467 finalize_segment(recog);
00468 return;
00469 }
00470 }
00471 }
00472 #endif
00473
00474
00475
00476 for(p=recog->process_list;p;p=p->next) {
00477 if (!p->live) continue;
00478 #ifdef SPSEGMENT_NAIST
00479 if (recog->jconf->decodeopt.segment) {
00480 if (p->pass1.after_trigger == FALSE) continue;
00481 }
00482 #endif
00483 mfcc = p->am->mfcc;
00484 if (mfcc->f > 0) {
00485 get_back_trellis_end(mfcc->param, p);
00486 }
00487 }
00488
00489
00490 for(p=recog->process_list;p;p=p->next) {
00491 if (!p->live) continue;
00492
00493 ok_p = TRUE;
00494
00495
00496 if (ok_p) {
00497 mfcc = p->am->mfcc;
00498
00499
00500 if (mfcc->f == 0) {
00501 jlog("STAT: no input frame\n");
00502 last_status = J_RESULT_STATUS_FAIL;
00503 ok_p = FALSE;
00504 }
00505 }
00506
00507
00508 if (ok_p) {
00509 if (recog->jconf->reject.rejectshortlen > 0) {
00510 mseclen = (float)mfcc->param->samplenum * (float)recog->jconf->input.period * (float)recog->jconf->input.frameshift / 10000.0;
00511 if (mseclen < recog->jconf->reject.rejectshortlen) {
00512 last_status = J_RESULT_STATUS_REJECT_SHORT;
00513 ok_p = FALSE;
00514 }
00515 }
00516 }
00517
00518 #ifdef POWER_REJECT
00519
00520 if (ok_p) {
00521 if (power_reject(recog)) {
00522 last_status = J_RESULT_STATUS_REJECT_POWER;
00523 ok_p = FALSE;
00524 }
00525 }
00526 #endif
00527
00528 #ifdef SPSEGMENT_NAIST
00529
00530 if (ok_p) {
00531 if (recog->jconf->decodeopt.segment) {
00532 if (p->pass1.after_trigger == FALSE) {
00533 last_status = J_RESULT_STATUS_ONLY_SILENCE;
00534 ok_p = FALSE;
00535 }
00536 }
00537 }
00538 #endif
00539
00540 if (ok_p) {
00541
00542 finalize_1st_pass(p, mfcc->param->samplenum);
00543 } else {
00544
00545 p->result.status = last_status;
00546 }
00547 }
00548 if (recog->jconf->decodeopt.segment) {
00549
00550
00551
00552 finalize_segment(recog);
00553 }
00554 }
00555
00556
00590 boolean
00591 get_back_trellis(Recog *recog)
00592 {
00593 boolean ok_p;
00594 MFCCCalc *mfcc;
00595 int rewind_frame;
00596 PROCESS_AM *am;
00597 RecogProcess *p;
00598 boolean reprocess;
00599 boolean started;
00600
00601
00602 for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
00603
00604 if (mfcc->param->samplenum == 0) mfcc->valid = FALSE;
00605 else mfcc->valid = TRUE;
00606
00607 mfcc->f = 0;
00608 }
00609
00610
00611 #ifdef BACKEND_VAD
00612 if (recog->jconf->decodeopt.segment) {
00613
00614
00615 spsegment_init(recog);
00616 } else {
00617
00618 callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog);
00619 callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog);
00620 recog->triggered = TRUE;
00621 }
00622 #else
00623 if (recog->jconf->decodeopt.segment) {
00624 if (!recog->process_segment) {
00625 callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog);
00626 }
00627 callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog);
00628 } else {
00629 callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog);
00630 }
00631 callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog);
00632 recog->triggered = TRUE;
00633 #endif
00634
00635 while(1) {
00636 ok_p = TRUE;
00637 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00638 if (! mfcc->valid) continue;
00639 if (mfcc->f < mfcc->param->samplenum) {
00640 mfcc->valid = TRUE;
00641 ok_p = FALSE;
00642 } else {
00643 mfcc->valid = FALSE;
00644 }
00645 }
00646 if (ok_p) {
00647
00648
00649 break;
00650 }
00651
00652 switch (decode_proceed(recog)) {
00653 case -1:
00654 return FALSE;
00655 break;
00656 case 0:
00657 break;
00658 case 1:
00659
00660
00661
00662
00663 decode_end_segmented(recog);
00664
00665 return TRUE;
00666 }
00667
00668 #ifdef BACKEND_VAD
00669
00670 if (recog->jconf->decodeopt.segment) {
00671 if (recog->triggered == FALSE) {
00672 if (spsegment_trigger_sync(recog)) {
00673 if (!recog->process_segment) {
00674 callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog);
00675 }
00676 callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog);
00677 callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog);
00678 recog->triggered = TRUE;
00679 }
00680 }
00681 }
00682 #endif
00683
00684 if (spsegment_need_restart(recog, &rewind_frame, &reprocess) == TRUE) {
00685
00686 spsegment_restart_mfccs(recog, rewind_frame, reprocess);
00687
00688 for(am=recog->amlist;am;am=am->next) {
00689 outprob_prepare(&(am->hmmwrk), am->mfcc->param->samplenum);
00690 }
00691 }
00692
00693 callback_exec(CALLBACK_EVENT_PASS1_FRAME, recog);
00694
00695
00696
00697 for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
00698 if (!mfcc->valid) continue;
00699 mfcc->f++;
00700 }
00701
00702 if (recog->process_want_terminate) {
00703
00704 decode_end_segmented(recog);
00705 return TRUE;
00706 }
00707 }
00708
00709
00710 decode_end(recog);
00711
00712 return TRUE;
00713 }
00714
00715