libjulius/src/recogmain.c

Go to the documentation of this file.
00001 
00019 /*
00020  * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University
00021  * Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan
00022  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00023  * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
00024  * All rights reserved
00025  */
00161 #define GLOBAL_VARIABLE_DEFINE  
00162 #include <julius/julius.h>
00163 #include <signal.h>
00164 #if defined(_WIN32) && !defined(__CYGWIN32__)
00165 #include <mbctype.h>
00166 #include <mbstring.h>
00167 #endif
00168 
00169 /* ---------- utility functions -----------------------------------------*/
00170 #ifdef REPORT_MEMORY_USAGE
00171 
00181 static void
00182 print_mem()
00183 {
00184   char buf[200];
00185   sprintf(buf,"ps -o vsz,rss -p %d",getpid());
00186   system(buf);
00187   fflush(stdout);
00188   fflush(stderr);
00189 }
00190 #endif
00191           
00192 
00208 void
00209 result_sentence_malloc(RecogProcess *r, int num)
00210 {
00211   int i;
00212   r->result.sent = (Sentence *)mymalloc(sizeof(Sentence) * num);
00213   for(i=0;i<num;i++) {
00214     r->result.sent[i].align.filled = FALSE;
00215     r->result.sent[i].align.w = NULL;
00216     r->result.sent[i].align.ph = NULL;
00217     r->result.sent[i].align.loc = NULL;
00218     r->result.sent[i].align.begin_frame = NULL;
00219     r->result.sent[i].align.end_frame = NULL;
00220     r->result.sent[i].align.avgscore = NULL;
00221     r->result.sent[i].align.is_iwsp = NULL;
00222   }
00223   r->result.sentnum = 0;
00224 }
00225 
00239 void
00240 result_sentence_free(RecogProcess *r)
00241 {  
00242   int i;
00243   if (r->result.sent) {
00244     for(i=0;i<r->result.sentnum;i++) {
00245       if (r->result.sent[i].align.w) free(r->result.sent[i].align.w);
00246       if (r->result.sent[i].align.ph) free(r->result.sent[i].align.ph);
00247       if (r->result.sent[i].align.loc) free(r->result.sent[i].align.loc);
00248       if (r->result.sent[i].align.begin_frame) free(r->result.sent[i].align.begin_frame);
00249       if (r->result.sent[i].align.end_frame) free(r->result.sent[i].align.end_frame);
00250       if (r->result.sent[i].align.avgscore) free(r->result.sent[i].align.avgscore);
00251       if (r->result.sent[i].align.is_iwsp) free(r->result.sent[i].align.is_iwsp);
00252     }
00253     free(r->result.sent);
00254     r->result.sent = NULL;
00255   }
00256 }
00257 
00271 void
00272 clear_result(RecogProcess *r)
00273 {
00274 #ifdef WORD_GRAPH
00275   /* clear 1st pass word graph output */
00276   wordgraph_clean(&(r->result.wg1));
00277 #endif
00278 
00279   if (r->lmvar == LM_DFA_WORD) {
00280     if (r->result.status == J_RESULT_STATUS_SUCCESS) {
00281       /* clear word recog result of first pass as in final result */
00282       free(r->result.sent);
00283     }
00284   } else {
00285     if (r->graphout) {
00286       if (r->config->graph.confnet) {
00287         /* free confusion network clusters */
00288         cn_free_all(&(r->result.confnet));
00289       } else if (r->config->graph.lattice) {
00290       }
00291       /* clear all wordgraph */
00292       wordgraph_clean(&(r->result.wg));
00293     }
00294     result_sentence_free(r);
00295   }
00296 }
00297 
00298 /* --------------------- speech buffering ------------------ */
00299 
00332 static int
00333 adin_cut_callback_store_buffer(SP16 *now, int len, Recog *recog)
00334 {
00335   if (recog->speechlen == 0) {          /* first part of a segment */
00336     if (!recog->process_active) {
00337       return(1);
00338     }
00339   }
00340 
00341   if (recog->speechlen + len > recog->speechalloclen) {
00342     while (recog->speechlen + len > recog->speechalloclen) {
00343       recog->speechalloclen += MAX_SPEECH_ALLOC_STEP;
00344     }
00345     if (recog->speech == NULL) {
00346       recog->speech = (SP16 *)mymalloc(sizeof(SP16) * recog->speechalloclen);
00347     } else {
00348       if (debug2_flag) {
00349         jlog("STAT: expanding recog->speech to %d samples\n", recog->speechalloclen);
00350       }
00351       recog->speech = (SP16 *)myrealloc(recog->speech, sizeof(SP16) * recog->speechalloclen);
00352     }
00353   }
00354 
00355   /* store now[0..len] to recog->speech[recog->speechlen] */
00356   memcpy(&(recog->speech[recog->speechlen]), now, len * sizeof(SP16));
00357   recog->speechlen += len;
00358   return(0);                    /* tell adin_go to continue reading */
00359 }
00360 
00361 
00362 /* --------------------- adin check callback --------------- */
00390 static int
00391 callback_check_in_adin(Recog *recog)
00392 {
00393   /* module: check command and terminate recording when requested */
00394   callback_exec(CALLBACK_POLL, recog);
00395   /* With audio input via adinnet, TERMINATE command will issue terminate
00396      command to the adinnet client.  The client then stops recording
00397      immediately and return end-of-segment ack.  Then it will cause this
00398      process to stop recognition as normal.  So we need not to
00399      perform immediate termination at this callback, but just ignore the
00400      results in the main.c.  */
00401 #if 1
00402 /* 
00403  *   if (recog->jconf->input.speech_input != SP_ADINNET) {
00404  *     if (recog->process_want_terminate) {
00405  *       return(-2);
00406  *     }
00407  *     if (recog->process_want_reload) {
00408  *       return(-1);
00409  *     }
00410  *   }
00411  */
00412   if (recog->process_want_terminate) { /* TERMINATE ... force termination */
00413     return(-2);
00414   }
00415   if (recog->process_want_reload) {
00416     return(-1);
00417   }
00418 #else
00419   if (recog->process_want_terminate /* TERMINATE ... force termination */
00420       && recog->jconf->input.speech_input != SP_ADINNET) {
00421     return(-2);
00422   }
00423   if (recog->process_want_reload) {
00424     return(-1);
00425   }
00426 #endif
00427   return(0);
00428 }
00429 
00430 /*********************/
00431 /* open input stream */
00432 /*********************/
00450 int
00451 j_open_stream(Recog *recog, char *file_or_dev_name)
00452 {
00453   Jconf *jconf;
00454 
00455   jconf = recog->jconf;
00456 
00457   if (jconf->input.speech_input == SP_MFCFILE) {
00458     /* read parameter file */
00459     param_init_content(recog->mfcclist->param);
00460     if (rdparam(file_or_dev_name, recog->mfcclist->param) == FALSE) {
00461       jlog("ERROR: error in reading parameter file: %s\n", file_or_dev_name);
00462       return -1;
00463     }
00464     /* check and strip invalid frames */
00465     if (jconf->preprocess.strip_zero_sample) {
00466       param_strip_zero(recog->mfcclist->param);
00467     }
00468 
00469     /* output frame length */
00470     callback_exec(CALLBACK_STATUS_PARAM, recog);
00471   } else {                      /* raw speech input */
00472     /* begin A/D input */
00473     if (adin_begin(recog->adin) == FALSE) {
00474       return -2;
00475     }
00476   }
00477     
00478 #if 0
00479     /* if not module mode, process becomes online after all initialize done */
00480     process_online = TRUE;
00481     callback_exec(CALLBACK_EVENT_PROCESS_ONLINE, recog);
00482 #endif
00483 
00484   return 0;
00485 
00486 }
00487 
00488 /**********************************************************************/
00489 /**********************************************************************/
00490 /**********************************************************************/
00491 
00504 static void
00505 result_error(Recog *recog, int status)
00506 {
00507   MFCCCalc *mfcc;
00508   RecogProcess *r;
00509   boolean ok_p;
00510 
00511   for(r=recog->process_list;r;r=r->next) r->result.status = status;
00512 
00513   ok_p = FALSE;
00514   for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
00515     if (mfcc->f > 0) {
00516       ok_p = TRUE;
00517       break;
00518     }
00519   }
00520   if (ok_p) {                   /* had some input */
00521     /* output as rejected */
00522     callback_exec(CALLBACK_RESULT, recog);
00523   }
00524 }
00525 
00561 static int
00562 j_recognize_stream_core(Recog *recog)
00563 {
00564   Jconf *jconf;
00565   int ret;
00566   float seclen, mseclen;
00567   RecogProcess *r;
00568   MFCCCalc *mfcc;
00569   PROCESS_AM *am;
00570   PROCESS_LM *lm;
00571   boolean ok_p;
00572   boolean process_segment_last;
00573 
00574   jconf = recog->jconf;
00575 
00576   if (jconf->input.speech_input != SP_MFCFILE) {
00577     for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
00578       param_init_content(mfcc->param);
00579     }
00580   }
00581 
00582   /* if no process instance exist, start with terminated */
00583   if (recog->process_list == NULL) {
00584     jlog("STAT: no recog process, engine inactive\n");
00585     j_request_pause(recog);
00586   }
00587 
00588   /* update initial recognition process status */
00589   for(r=recog->process_list;r;r=r->next) {
00590     if (r->active > 0) {
00591       r->live = TRUE;
00592     } else if (r->active < 0) {
00593       r->live = FALSE;
00594     }
00595     r->active = 0;
00596   }
00597 
00598   /******************************************************************/
00599   /* do recognition for each incoming segment from the input stream */
00600   /******************************************************************/
00601   while (1) {
00602     
00603   start_recog:
00604 
00605     /*************************************/
00606     /* Update recognition process status */
00607     /*************************************/
00608     for(r=recog->process_list;r;r=r->next) {
00609       if (r->active > 0) {
00610         r->live = TRUE;
00611         jlog("STAT: SR%02d %s now active\n", r->config->id, r->config->name);
00612       } else if (r->active < 0) {
00613         r->live = FALSE;
00614         jlog("STAT: SR%02d %s now inactive\n", r->config->id, r->config->name);
00615       }
00616       r->active = 0;
00617     }
00618     if (debug2_flag) {
00619       for(r=recog->process_list;r;r=r->next) {
00620         jlog("DEBUG: %s: SR%02d %s\n", r->live ? "live" : "dead", r->config->id, r->config->name);
00621       }
00622     }
00623     /* check if any process is live */
00624     if (recog->process_active) {
00625       ok_p = FALSE;
00626       for(r=recog->process_list;r;r=r->next) {
00627         if (r->live) ok_p = TRUE;
00628       }
00629       if (!ok_p) {              /* no process is alive */
00630         /* make whole process as inactive */
00631         jlog("STAT: all recog process inactive, pause engine now\n");
00632         j_request_pause(recog);
00633       }
00634     }
00635 
00636     /* Check whether process status was changed while in the last run */
00637     if (recog->process_online != recog->process_active) {
00638       recog->process_online = recog->process_active;
00639       if (recog->process_online) callback_exec(CALLBACK_EVENT_PROCESS_ONLINE, recog);
00640       else callback_exec(CALLBACK_EVENT_PROCESS_OFFLINE, recog);
00641     }
00642     /* execute poll callback */
00643     if (recog->process_active) {
00644       callback_exec(CALLBACK_POLL, recog);
00645     }
00646     /* reset reload flag here */
00647     j_reset_reload(recog);
00648 
00649     if (!recog->process_active) {
00650       /* now sleeping, return */
00651       /* in the next call, we will resume from here */
00652       return 1;
00653     }
00654     /* update process status */
00655     if (recog->process_online != recog->process_active) {
00656       recog->process_online = recog->process_active;
00657       if (recog->process_online) callback_exec(CALLBACK_EVENT_PROCESS_ONLINE, recog);
00658       else callback_exec(CALLBACK_EVENT_PROCESS_OFFLINE, recog);
00659     }
00660 
00661     /*********************************************************/
00662     /* check for grammar to change, and rebuild if necessary */
00663     /*********************************************************/
00664     for(lm=recog->lmlist;lm;lm=lm->next) {
00665       if (lm->lmtype == LM_DFA) {
00666         multigram_update(lm); /* some modification occured if return TRUE*/
00667       }
00668     }
00669     for(r=recog->process_list;r;r=r->next) {
00670       if (!r->live) continue;
00671       if (r->lmtype == LM_DFA && r->lm->global_modified) {
00672         multigram_build(r);
00673       }
00674     }
00675     for(lm=recog->lmlist;lm;lm=lm->next) {
00676       if (lm->lmtype == LM_DFA) lm->global_modified = FALSE;
00677     }
00678 
00679     ok_p = FALSE;
00680     for(r=recog->process_list;r;r=r->next) {
00681       if (!r->live) continue;
00682       if (r->lmtype == LM_DFA) {
00683         if (r->lm->winfo == NULL ||
00684             (r->lmvar == LM_DFA_GRAMMAR && r->lm->dfa == NULL)) {
00685           /* make this instance inactive */
00686           r->active = -1;
00687           ok_p = TRUE;
00688         }
00689       }
00690     }
00691     if (ok_p) {                 /* at least one instance has no grammar */
00692       goto start_recog;
00693     }
00694 
00695     /**************************************/
00696     /* getting input and perform 1st pass */
00697     /**************************************/
00698     if (jconf->input.speech_input == SP_MFCFILE) {
00699       /************************/
00700       /* parameter file input */
00701       /************************/
00702       /********************************/
00703       /* check the analized parameter */
00704       /********************************/
00705       /* parameter type check --- compare the type to that of HMM,
00706          and adjust them if necessary */
00707       if (jconf->input.paramtype_check_flag) {
00708         for(am=recog->amlist;am;am=am->next) {
00709           /* return param itself or new malloced param */
00710           if (param_check_and_adjust(am->hmminfo, am->mfcc->param, verbose_flag) == -1) {       /* failed */
00711             
00712             for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
00713               param_init_content(mfcc->param);
00714             }
00715             /* tell failure */
00716             result_error(recog, J_RESULT_STATUS_FAIL);
00717             goto end_recog;
00718           }
00719         }
00720       }
00721       /* whole input is already read, so set input status to end of stream */
00722       /* and jump to the start point of 1st pass */
00723       ret = 0;
00724     } else {
00725       /****************************************************/
00726       /* raw wave data input (mic, file, adinnet, etc...) */
00727       /****************************************************/
00728       if (jconf->decodeopt.realtime_flag) {
00729         /********************************************/
00730         /* REALTIME ON-THE-FLY DECODING OF 1ST-PASS */
00731         /********************************************/
00732         /* store, analysis and search in a pipeline  */
00733         /* main function is RealTimePipeLine() at realtime-1stpass.c, and
00734            it will be periodically called for each incoming input segment
00735            from the AD-in function adin_go().  RealTimePipeLine() will be
00736            called as a callback function from adin_go() */
00737         /* after this part, directly jump to the beginning of the 2nd pass */
00738 
00739         if (recog->process_segment) {
00740           /*****************************************************************/
00741           /* short-pause segmentation: process last remaining frames first */
00742           /*****************************************************************/
00743           /* last was segmented by short pause */
00744           /* the margin segment in the last input will be re-processed first,
00745              and then the speech input will be processed */
00746           /* output listening start message */
00747           callback_exec(CALLBACK_EVENT_SPEECH_READY, recog);
00748           /* process the last remaining parameters */
00749           ret = RealTimeResume(recog);
00750           if (ret < 0) {                /* error end in the margin */
00751             jlog("ERROR: failed to process last remaining samples on RealTimeResume\n"); /* exit now! */
00752             return -1;
00753           }
00754           if (ret != 1) {       /* if segmented again in the margin, not process the rest */
00755             /* last parameters has been processed, so continue with the
00756                current input as normal */
00757             /* process the incoming input */
00758             ret = adin_go(RealTimePipeLine, callback_check_in_adin, recog);
00759             if (ret < 0) {              /* error end in adin_go */
00760               if (ret == -2 || recog->process_want_terminate) {
00761                 /* terminated by callback */
00762                 RealTimeTerminate(recog);
00763                 /* reset param */
00764                 for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
00765                   param_init_content(mfcc->param);
00766                 }
00767                 /* execute callback at end of pass1 */
00768                 if (recog->triggered) {
00769                   callback_exec(CALLBACK_EVENT_PASS1_END, recog);
00770                   /* output result terminate */
00771                   result_error(recog, J_RESULT_STATUS_TERMINATE);
00772                 }
00773                 goto end_recog; /* cancel this recognition */
00774               }
00775               jlog("ERROR: an error occured at on-the-fly 1st pass decoding\n");          /* exit now! */
00776               return(-1);
00777             }
00778           }
00779           
00780         } else {
00781 
00782           /***********************************************************/
00783           /* last was not segmented, process the new incoming input  */
00784           /***********************************************************/
00785           /* end of this input will be determined by either end of stream
00786              (in case of file input), or silence detection by adin_go(), or
00787              'TERMINATE' command from module (if module mode) */
00788           /* prepare work area for on-the-fly processing */
00789           if (RealTimePipeLinePrepare(recog) == FALSE) {
00790             jlog("ERROR: failed to prepare for on-the-fly 1st pass decoding");
00791             return (-1);
00792           }
00793           /* output 'listening start' message */
00794           callback_exec(CALLBACK_EVENT_SPEECH_READY, recog);
00795           /* process the incoming input */
00796           ret = adin_go(RealTimePipeLine, callback_check_in_adin, recog);
00797           if (ret < 0) {                /* error end in adin_go */
00798             if (ret == -2 || recog->process_want_terminate) {   
00799               /* terminated by callback */
00800               RealTimeTerminate(recog);
00801               /* reset param */
00802               for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
00803                 param_init_content(mfcc->param);
00804               }
00805               /* execute callback at end of pass1 */
00806               if (recog->triggered) {
00807                 callback_exec(CALLBACK_EVENT_PASS1_END, recog);
00808                 /* output result terminate */
00809                 result_error(recog, J_RESULT_STATUS_TERMINATE);
00810               }
00811               goto end_recog;
00812             }
00813             jlog("ERROR: an error occured at on-the-fly 1st pass decoding\n");          /* exit now! */
00814             return(-1);
00815           }
00816         }
00817         /******************************************************************/
00818         /* speech stream has been processed on-the-fly, and 1st pass ends */
00819         /******************************************************************/
00820         /* last procedure of 1st-pass */
00821         if (RealTimeParam(recog) == FALSE) {
00822           jlog("ERROR: fatal error occured, program terminates now\n");
00823           return -1;
00824         }
00825 
00826 #ifdef BACKEND_VAD
00827         /* if not triggered, skip this segment */
00828         if (recog->jconf->decodeopt.segment && ! recog->triggered) {
00829           goto end_recog;
00830         }
00831 #endif
00832 
00833         /* execute callback for 1st pass result */
00834         /* result.status <0 must be skipped inside callback */
00835         callback_exec(CALLBACK_RESULT_PASS1, recog);
00836 #ifdef WORD_GRAPH
00837         /* result.wg1 == NULL should be skipped inside callback */
00838         callback_exec(CALLBACK_RESULT_PASS1_GRAPH, recog);
00839 #endif
00840         /* execute callback at end of pass1 */
00841         callback_exec(CALLBACK_EVENT_PASS1_END, recog);
00842         /* output frame length */
00843         callback_exec(CALLBACK_STATUS_PARAM, recog);
00844         /* if terminate signal has been received, discard this input */
00845         if (recog->process_want_terminate) {
00846           result_error(recog, J_RESULT_STATUS_TERMINATE);
00847           goto end_recog;
00848         }
00849 
00850         /* end of 1st pass, jump to 2nd pass */
00851         goto end_1pass;
00852         
00853       } /* end of realtime_flag && speech stream input */
00854       
00855       /******************************************/
00856       /* buffered speech input (not on-the-fly) */
00857       /******************************************/
00858       if (!recog->process_segment) { /* no segment left */
00859 
00860         /****************************************/
00861         /* store raw speech samples to speech[] */
00862         /****************************************/
00863         recog->speechlen = 0;
00864         for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
00865           param_init_content(mfcc->param);
00866         }
00867         /* output 'listening start' message */
00868         callback_exec(CALLBACK_EVENT_SPEECH_READY, recog);
00869         /* tell module to start recording */
00870         /* the "adin_cut_callback_store_buffer" simply stores
00871            the input speech to a buffer "speech[]" */
00872         /* end of this input will be determined by either end of stream
00873            (in case of file input), or silence detection by adin_go(), or
00874            'TERMINATE' command from module (if module mode) */
00875         ret = adin_go(adin_cut_callback_store_buffer, callback_check_in_adin, recog);
00876         if (ret < 0) {          /* error end in adin_go */
00877           if (ret == -2 || recog->process_want_terminate) {
00878             /* terminated by module */
00879             /* output fail */
00880             result_error(recog, J_RESULT_STATUS_TERMINATE);
00881             goto end_recog;
00882           }
00883           jlog("ERROR: an error occured while recording input\n");
00884           return -1;
00885         }
00886         
00887         /* output recorded length */
00888         seclen = (float)recog->speechlen / (float)jconf->input.sfreq;
00889         jlog("STAT: %d samples (%.2f sec.)\n", recog->speechlen, seclen);
00890         
00891         /* -rejectshort 指定時, 入力が指定時間以下であれば
00892            ここで入力を棄却する */
00893         /* when using "-rejectshort", and input was shorter than
00894            specified, reject the input here */
00895         if (jconf->reject.rejectshortlen > 0) {
00896           if (seclen * 1000.0 < jconf->reject.rejectshortlen) {
00897             result_error(recog, J_RESULT_STATUS_REJECT_SHORT);
00898             goto end_recog;
00899           }
00900         }
00901         
00902         /**********************************************/
00903         /* acoustic analysis and encoding of speech[] */
00904         /**********************************************/
00905         jlog("STAT: ### speech analysis (waveform -> MFCC)\n");
00906         /* CMN will be computed for the whole buffered input */
00907         if (wav2mfcc(recog->speech, recog->speechlen, recog) == FALSE) {
00908           /* error end, end stream */
00909           ret = -1;
00910           /* tell failure */
00911           result_error(recog, J_RESULT_STATUS_FAIL);
00912           goto end_recog;
00913         }
00914         
00915         /* if terminate signal has been received, cancel this input */
00916         if (recog->process_want_terminate) {
00917           result_error(recog, J_RESULT_STATUS_TERMINATE);
00918           goto end_recog;
00919         }
00920         
00921         /* output frame length */
00922         callback_exec(CALLBACK_STATUS_PARAM, recog);
00923         
00924       }
00925     }   /* end of data input */
00926     /* parameter has been got in 'param' */
00927     
00928     /******************************************************/
00929     /* 1st-pass --- backward search to compute heuristics */
00930     /******************************************************/
00931     /* (for buffered speech input and HTK parameter file input) */
00932     if (!jconf->decodeopt.realtime_flag) {
00933       /* prepare for outprob cache for each HMM state and time frame */
00934       /* assume all MFCCCalc has params of the same sample num */
00935       for(am=recog->amlist;am;am=am->next) {
00936         outprob_prepare(&(am->hmmwrk), am->mfcc->param->samplenum);
00937       }
00938     }
00939 
00940     /* if terminate signal has been received, cancel this input */
00941     if (recog->process_want_terminate) {
00942       result_error(recog, J_RESULT_STATUS_TERMINATE);
00943       goto end_recog;
00944     }
00945     
00946 
00947     /****************************************************/
00948     /* execute computation of left-to-right backtrellis */
00949     /****************************************************/
00950     if (get_back_trellis(recog) == FALSE) {
00951       jlog("ERROR: fatal error occured, program terminates now\n");
00952       return -1;
00953     }
00954 #ifdef BACKEND_VAD
00955     /* if not triggered, skip this segment */
00956     if (recog->jconf->decodeopt.segment && ! recog->triggered) {
00957       goto end_recog;
00958     }
00959 #endif
00960 
00961     /* execute callback for 1st pass result */
00962     /* result.status <0 must be skipped inside callback */
00963     callback_exec(CALLBACK_RESULT_PASS1, recog);
00964 #ifdef WORD_GRAPH
00965     /* result.wg1 == NULL should be skipped inside callback */
00966     callback_exec(CALLBACK_RESULT_PASS1_GRAPH, recog);
00967 #endif
00968 
00969     /* execute callback at end of pass1 */
00970     if (recog->triggered) {
00971       callback_exec(CALLBACK_EVENT_PASS1_END, recog);
00972     }
00973 
00974   end_1pass:
00975 
00976     /**********************************/
00977     /* end processing of the 1st-pass */
00978     /**********************************/
00979     /* on-the-fly 1st pass processing will join here */
00980     
00981     /* -rejectshort 指定時, 入力が指定時間以下であれば探索失敗として */
00982     /* 第2パスを実行せずにここで終了する */
00983     /* when using "-rejectshort", and input was shorter than the specified
00984        length, terminate search here and output recognition failure */
00985     if (jconf->reject.rejectshortlen > 0) {
00986       mseclen = (float)recog->mfcclist->param->samplenum * (float)jconf->input.period * (float)jconf->input.frameshift / 10000.0;
00987       if (mseclen < jconf->reject.rejectshortlen) {
00988         result_error(recog, J_RESULT_STATUS_REJECT_SHORT);
00989         goto end_recog;
00990       }
00991     }
00992 #ifdef POWER_REJECT
00993     if (power_reject(recog)) {
00994       result_error(recog, J_RESULT_STATUS_REJECT_POWER);
00995       goto end_recog;
00996     }
00997 #endif
00998     
00999     /* if terminate signal has been received, cancel this input */
01000     if (recog->process_want_terminate) {
01001       result_error(recog, J_RESULT_STATUS_TERMINATE);
01002       goto end_recog;
01003     }
01004     
01005     /* if GMM is specified and result are to be rejected, terminate search here */
01006     if (jconf->reject.gmm_reject_cmn_string != NULL) {
01007       if (! gmm_valid_input(recog)) {
01008         result_error(recog, J_RESULT_STATUS_REJECT_GMM);
01009         goto end_recog;
01010       }
01011     }
01012 
01013     /***********************************************/
01014     /* 2nd-pass --- forward search with heuristics */
01015     /***********************************************/
01016 #if !defined(PASS2_STRICT_IWCD) || defined(FIX_35_PASS2_STRICT_SCORE)    
01017     /* adjust trellis score not to contain outprob of the last frames */
01018     for(r=recog->process_list;r;r=r->next) {
01019       if (!r->live) continue;
01020       /* if [-1pass] is specified, skip 2nd pass */
01021       if (r->config->compute_only_1pass) continue;
01022       /* if search already failed on 1st pass, skip 2nd pass */
01023       if (r->result.status < 0) continue;
01024       if (! r->am->hmminfo->multipath) {
01025         bt_discount_pescore(r->wchmm, r->backtrellis, r->am->mfcc->param);
01026       }
01027 #ifdef LM_FIX_DOUBLE_SCORING
01028       if (r->lmtype == LM_PROB) {
01029         bt_discount_lm(r->backtrellis);
01030       }
01031 #endif
01032     }
01033 #endif
01034     
01035     /* execute stack-decoding search */
01036     callback_exec(CALLBACK_EVENT_PASS2_BEGIN, recog);
01037 
01038     for(r=recog->process_list;r;r=r->next) {
01039       if (!r->live) continue;
01040       /* if [-1pass] is specified, skip 2nd pass */
01041       if (r->config->compute_only_1pass) continue;
01042       /* if search already failed on 1st pass, skip 2nd pass */
01043       if (r->result.status < 0) continue;
01044       if (r->lmtype == LM_PROB) {
01045         wchmm_fbs(r->am->mfcc->param, r, 0, 0);
01046       } else if (r->lmtype == LM_DFA) {
01047         if (r->config->output.multigramout_flag) {
01048           /* execute 2nd pass multiple times for each grammar sequencially */
01049           /* to output result for each grammar */
01050           MULTIGRAM *m;
01051           for(m = r->lm->grammars; m; m = m->next) {
01052             if (m->active) {
01053               jlog("STAT: execute 2nd pass limiting words for gram #%d\n", m->id);
01054               wchmm_fbs(r->am->mfcc->param, r, m->cate_begin, m->dfa->term_num);
01055             }
01056           }
01057         } else {
01058           /* only the best among all grammar will be output */
01059           wchmm_fbs(r->am->mfcc->param, r, 0, r->lm->dfa->term_num);
01060         }
01061       }
01062     }
01063 
01064     /* output result */
01065     callback_exec(CALLBACK_RESULT, recog);
01066     /* output graph */
01067     /* r->result.wg == NULL should be skipped inside the callback */
01068     ok_p = FALSE;
01069     for(r=recog->process_list;r;r=r->next) {
01070       if (!r->live) continue;
01071       if (r->config->graph.lattice) ok_p = TRUE;
01072     }
01073     if (ok_p) callback_exec(CALLBACK_RESULT_GRAPH, recog);
01074     /* output confnet */
01075     /* r->result.confnet == NULL should be skipped inside the callback */
01076     ok_p = FALSE;
01077     for(r=recog->process_list;r;r=r->next) {
01078       if (!r->live) continue;
01079       if (r->config->graph.confnet) ok_p = TRUE;
01080     }
01081     if (ok_p) callback_exec(CALLBACK_RESULT_CONFNET, recog);
01082 
01083     /* clear work area for output */
01084     for(r=recog->process_list;r;r=r->next) {
01085       if (!r->live) continue;
01086       clear_result(r);
01087     }
01088     
01089     /* output end of 2nd pass */
01090     callback_exec(CALLBACK_EVENT_PASS2_END, recog);
01091 
01092   end_recog:
01093     /**********************/
01094     /* end of recognition */
01095     /**********************/
01096 
01097     process_segment_last = recog->process_segment;
01098     if (jconf->decodeopt.segment) { /* sp-segment mode */
01099       /* param is now shrinked to hold only the processed input, and */
01100       /* the rests are holded in (newly allocated) "rest_param" */
01101       /* if this is the last segment, rest_param is NULL */
01102       /* assume all segmentation are synchronized */
01103       recog->process_segment = FALSE;
01104       for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
01105         if (mfcc->rest_param != NULL) {
01106           /* process the rest parameters in the next loop */
01107           recog->process_segment = TRUE;
01108           free_param(mfcc->param);
01109           mfcc->param = mfcc->rest_param;
01110           mfcc->rest_param = NULL;
01111         }
01112       }
01113     }
01114 
01115     /* callback of recognition end */
01116     if (jconf->decodeopt.segment) {
01117 #ifdef BACKEND_VAD
01118       if (recog->triggered) callback_exec(CALLBACK_EVENT_SEGMENT_END, recog);
01119       if (process_segment_last && !recog->process_segment) callback_exec(CALLBACK_EVENT_RECOGNITION_END, recog);
01120 #else
01121       callback_exec(CALLBACK_EVENT_SEGMENT_END, recog);
01122       if (!recog->process_segment) callback_exec(CALLBACK_EVENT_RECOGNITION_END, recog);
01123 #endif
01124     } else {
01125       callback_exec(CALLBACK_EVENT_RECOGNITION_END, recog);
01126     }
01127 
01128 
01129     /* update CMN info for next input (in case of realtime wave input) */
01130     if (jconf->input.speech_input != SP_MFCFILE && jconf->decodeopt.realtime_flag) {
01131       for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
01132         if (mfcc->param->samplenum > 0) {
01133           RealTimeCMNUpdate(mfcc, recog);
01134         }
01135       }
01136     }
01137     
01138     if (verbose_flag) jlog("\n");
01139     jlog_flush();
01140 
01141     if (jconf->decodeopt.segment) { /* sp-segment mode */
01142       if (recog->process_segment == TRUE) {
01143         if (verbose_flag) jlog("STAT: <<<restart the rest>>>\n\n");
01144       } else {
01145         /* input has reached end of stream, terminate program */
01146         if (ret <= 0 && ret != -2) break;
01147       }
01148     } else {                    /* not sp-segment mode */
01149       /* input has reached end of stream, terminate program */
01150       if (ret <= 0 && ret != -2) break;
01151     }
01152 
01153     /* recognition continues for next (silence-aparted) segment */
01154       
01155   } /* END OF STREAM LOOP */
01156     
01157     /* input stream ended. it will happen when
01158        - input speech file has reached the end of file, 
01159        - adinnet input has received end of segment mark from client,
01160        - adinnet input has received end of input from client,
01161        - adinnet client disconnected.
01162     */
01163 
01164   if (jconf->input.speech_input != SP_MFCFILE) {
01165     /* close the stream */
01166     adin_end(recog->adin);
01167   }
01168 
01169   /* return to the opening of input stream */
01170 
01171   return(0);
01172 
01173 }
01174 
01219 int
01220 j_recognize_stream(Recog *recog)
01221 {
01222   int ret;
01223 
01224   do {
01225     
01226     ret = j_recognize_stream_core(recog);
01227 
01228     switch(ret) {
01229     case 1:           /* paused by a callback (stream will continue) */
01230       /* call pause event callbacks */
01231       callback_exec(CALLBACK_EVENT_PAUSE, recog);
01232       /* call pause functions */
01233       /* block until all pause functions exits */
01234       if (! callback_exist(recog, CALLBACK_PAUSE_FUNCTION)) {
01235         jlog("WARNING: pause requested but no pause function specified\n");
01236         jlog("WARNING: engine will resume now immediately\n");
01237       }
01238       callback_exec(CALLBACK_PAUSE_FUNCTION, recog);
01239       /* after here, recognition will restart for the rest input */
01240       /* call resume event callbacks */
01241       callback_exec(CALLBACK_EVENT_RESUME, recog);
01242       break;
01243     case 0:                     /* end of stream */
01244       /* go on to the next input */
01245       break;
01246     case -1:            /* error */
01247       jlog("ERROR: an error occured while recognition, terminate stream\n");
01248       return -1;
01249     }
01250   } while (ret == 1);           /* loop when paused by callback */
01251 
01252   return 0;
01253 }
01254 
01255 /* end of file */

Generated on Tue Dec 18 15:59:52 2007 for Julius by  doxygen 1.5.4