libjulius/src/recogmain.c

説明を見る。
00001 
00019 /*
00020  * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University
00021  * Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan
00022  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00023  * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
00024  * All rights reserved
00025  */
00161 #define GLOBAL_VARIABLE_DEFINE  
00162 #include <julius/julius.h>
00163 #include <signal.h>
00164 #if defined(_WIN32) && !defined(__CYGWIN32__)
00165 #include <mbctype.h>
00166 #include <mbstring.h>
00167 #endif
00168 
00169 /* ---------- utility functions -----------------------------------------*/
00170 #ifdef REPORT_MEMORY_USAGE
00171 
00181 static void
00182 print_mem()
00183 {
00184   char buf[200];
00185   sprintf(buf,"ps -o vsz,rss -p %d",getpid());
00186   system(buf);
00187   fflush(stdout);
00188   fflush(stderr);
00189 }
00190 #endif
00191           
00192 
00209 SentenceAlign *
00210 result_align_new()
00211 {
00212   SentenceAlign *new;
00213   new = (SentenceAlign *)mymalloc(sizeof(SentenceAlign));
00214   new->w = NULL;
00215   new->ph = NULL;
00216   new->loc = NULL;
00217   new->begin_frame = NULL;
00218   new->end_frame = NULL;
00219   new->avgscore = NULL;
00220   new->is_iwsp = NULL;
00221   new->next = NULL;
00222   return new;
00223 }
00224 
00241 void
00242 result_align_free(SentenceAlign *a)
00243 {
00244   if (a->w) free(a->w);
00245   if (a->ph) free(a->ph);
00246   if (a->loc) free(a->loc);
00247   if (a->begin_frame) free(a->begin_frame);
00248   if (a->end_frame) free(a->end_frame);
00249   if (a->avgscore) free(a->avgscore);
00250   if (a->is_iwsp) free(a->is_iwsp);
00251   free(a);
00252 }
00253 
00269 void
00270 result_sentence_malloc(RecogProcess *r, int num)
00271 {
00272   int i;
00273   r->result.sent = (Sentence *)mymalloc(sizeof(Sentence) * num);
00274   for(i=0;i<num;i++) r->result.sent[i].align = NULL;
00275   r->result.sentnum = 0;
00276 }
00277 
00291 void
00292 result_sentence_free(RecogProcess *r)
00293 {  
00294   int i;
00295   SentenceAlign *a, *atmp;
00296   if (r->result.sent) {
00297     for(i=0;i<r->result.sentnum;i++) {
00298       a = r->result.sent[i].align;
00299       while(a) {
00300         atmp = a->next;
00301         result_align_free(a);
00302         a = atmp;
00303       }
00304     }
00305     free(r->result.sent);
00306     r->result.sent = NULL;
00307   }
00308 }
00309 
00323 void
00324 clear_result(RecogProcess *r)
00325 {
00326 #ifdef WORD_GRAPH
00327   /* clear 1st pass word graph output */
00328   wordgraph_clean(&(r->result.wg1));
00329 #endif
00330 
00331   if (r->lmvar == LM_DFA_WORD) {
00332     if (r->result.status == J_RESULT_STATUS_SUCCESS) {
00333       /* clear word recog result of first pass as in final result */
00334       free(r->result.sent);
00335     }
00336   } else {
00337     if (r->graphout) {
00338       if (r->config->graph.confnet) {
00339         /* free confusion network clusters */
00340         cn_free_all(&(r->result.confnet));
00341       } else if (r->config->graph.lattice) {
00342       }
00343       /* clear all wordgraph */
00344       wordgraph_clean(&(r->result.wg));
00345     }
00346     result_sentence_free(r);
00347   }
00348 }
00349 
00350 /* --------------------- speech buffering ------------------ */
00351 
00384 int
00385 adin_cut_callback_store_buffer(SP16 *now, int len, Recog *recog)
00386 {
00387   if (recog->speechlen == 0) {          /* first part of a segment */
00388     if (!recog->process_active) {
00389       return(1);
00390     }
00391   }
00392 
00393   if (recog->speechlen + len > recog->speechalloclen) {
00394     while (recog->speechlen + len > recog->speechalloclen) {
00395       recog->speechalloclen += MAX_SPEECH_ALLOC_STEP;
00396     }
00397     if (recog->speech == NULL) {
00398       recog->speech = (SP16 *)mymalloc(sizeof(SP16) * recog->speechalloclen);
00399     } else {
00400       if (debug2_flag) {
00401         jlog("STAT: expanding recog->speech to %d samples\n", recog->speechalloclen);
00402       }
00403       recog->speech = (SP16 *)myrealloc(recog->speech, sizeof(SP16) * recog->speechalloclen);
00404     }
00405   }
00406 
00407   /* store now[0..len] to recog->speech[recog->speechlen] */
00408   memcpy(&(recog->speech[recog->speechlen]), now, len * sizeof(SP16));
00409   recog->speechlen += len;
00410   return(0);                    /* tell adin_go to continue reading */
00411 }
00412 
00413 
00414 /* --------------------- adin check callback --------------- */
00442 static int
00443 callback_check_in_adin(Recog *recog)
00444 {
00445   /* module: check command and terminate recording when requested */
00446   callback_exec(CALLBACK_POLL, recog);
00447   /* With audio input via adinnet, TERMINATE command will issue terminate
00448      command to the adinnet client.  The client then stops recording
00449      immediately and return end-of-segment ack.  Then it will cause this
00450      process to stop recognition as normal.  So we need not to
00451      perform immediate termination at this callback, but just ignore the
00452      results in the main.c.  */
00453 #if 1
00454   if (recog->process_want_terminate) { /* TERMINATE ... force termination */
00455     return(-2);
00456   }
00457   if (recog->process_want_reload) {
00458     return(-1);
00459   }
00460 #else
00461   if (recog->process_want_terminate /* TERMINATE ... force termination */
00462       && recog->jconf->input.speech_input != SP_ADINNET) {
00463     return(-2);
00464   }
00465   if (recog->process_want_reload) {
00466     return(-1);
00467   }
00468 #endif
00469   return(0);
00470 }
00471 
00472 /*********************/
00473 /* open input stream */
00474 /*********************/
00492 int
00493 j_open_stream(Recog *recog, char *file_or_dev_name)
00494 {
00495   Jconf *jconf;
00496 
00497   jconf = recog->jconf;
00498 
00499   if (jconf->input.type == INPUT_WAVEFORM) {
00500     /* begin A/D input */
00501     if (adin_begin(recog->adin) == FALSE) {
00502       return -2;
00503     }
00504     /* create A/D-in thread here */
00505 #ifdef HAVE_PTHREAD
00506     if (recog->adin->enable_thread && ! recog->adin->input_side_segment) {
00507       if (adin_thread_create(recog) == FALSE) {
00508         return -2;
00509       }
00510     }
00511 #endif
00512   } else {
00513     switch(jconf->input.speech_input) {
00514     case SP_MFCMODULE:
00515       param_init_content(recog->mfcclist->param);
00516       if (mfc_module_begin(recog->mfcclist) == FALSE) return -2;
00517       break;
00518     case SP_MFCFILE:
00519       /* read parameter file */
00520       param_init_content(recog->mfcclist->param);
00521       if (rdparam(file_or_dev_name, recog->mfcclist->param) == FALSE) {
00522         jlog("ERROR: error in reading parameter file: %s\n", file_or_dev_name);
00523         return -1;
00524       }
00525       /* check and strip invalid frames */
00526       if (jconf->preprocess.strip_zero_sample) {
00527         param_strip_zero(recog->mfcclist->param);
00528       }
00529       /* output frame length */
00530       callback_exec(CALLBACK_STATUS_PARAM, recog);
00531       break;
00532     default:
00533       jlog("ERROR: j_open_stream: none of SP_MFC_*??\n");
00534       return -1;
00535     }
00536   }
00537       
00538   return 0;
00539 
00540 }
00541 
00559 int
00560 j_close_stream(Recog *recog)
00561 {
00562   Jconf *jconf;
00563 
00564   jconf = recog->jconf;
00565 
00566   if (jconf->input.type == INPUT_WAVEFORM) {
00567 #ifdef HAVE_PTHREAD
00568     /* close A/D-in thread here */
00569     if (recog->adin->enable_thread && ! recog->adin->input_side_segment) {
00570       if (adin_thread_cancel(recog) == FALSE) {
00571         return -2;
00572       }
00573     }
00574 #endif
00575     /* end A/D input */
00576     if (adin_end(recog->adin) == FALSE) {
00577       return -2;
00578     }
00579   } else {
00580     switch(jconf->input.speech_input) {
00581     case SP_MFCMODULE:
00582       if (mfc_module_end(recog->mfcclist) == FALSE) return -2;
00583       break;
00584     case SP_MFCFILE:
00585       /* nothing to do */
00586       break;
00587     default:
00588       jlog("ERROR: j_close_stream: none of SP_MFC_*??\n");
00589       return -1;
00590     }
00591   }
00592       
00593   return 0;
00594 
00595 }
00596 
00597 /**********************************************************************/
00598 /**********************************************************************/
00599 /**********************************************************************/
00600 
00613 static void
00614 result_error(Recog *recog, int status)
00615 {
00616   MFCCCalc *mfcc;
00617   RecogProcess *r;
00618   boolean ok_p;
00619 
00620   for(r=recog->process_list;r;r=r->next) r->result.status = status;
00621 
00622   ok_p = FALSE;
00623   for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
00624     if (mfcc->f > 0) {
00625       ok_p = TRUE;
00626       break;
00627     }
00628   }
00629   if (ok_p) {                   /* had some input */
00630     /* output as rejected */
00631     callback_exec(CALLBACK_RESULT, recog);
00632 #ifdef ENABLE_PLUGIN
00633     plugin_exec_process_result(recog);
00634 #endif
00635   }
00636 }
00637 
00673 static int
00674 j_recognize_stream_core(Recog *recog)
00675 {
00676   Jconf *jconf;
00677   int ret;
00678   float seclen, mseclen;
00679   RecogProcess *r;
00680   MFCCCalc *mfcc;
00681   PROCESS_AM *am;
00682   PROCESS_LM *lm;
00683   boolean ok_p;
00684   boolean process_segment_last;
00685   boolean on_the_fly;
00686   boolean pass2_p;
00687 
00688   jconf = recog->jconf;
00689 
00690   /* determine whether on-the-fly decoding should be done */
00691   on_the_fly = FALSE;
00692   switch(jconf->input.type) {
00693   case INPUT_VECTOR:
00694     switch(jconf->input.speech_input) {
00695     case SP_MFCFILE: 
00696       on_the_fly = FALSE;
00697       break;
00698     case SP_MFCMODULE:
00699       on_the_fly = TRUE;
00700       break;
00701     }
00702     break;
00703   case INPUT_WAVEFORM:
00704     if (jconf->decodeopt.realtime_flag) {
00705       on_the_fly = TRUE;
00706     } else {
00707       on_the_fly = FALSE;
00708     }
00709     break;
00710   }
00711 
00712   if (jconf->input.type == INPUT_WAVEFORM || jconf->input.speech_input == SP_MFCMODULE) {
00713     for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
00714       param_init_content(mfcc->param);
00715     }
00716   }
00717 
00718   /* if no process instance exist, start with terminated */
00719   if (recog->process_list == NULL) {
00720     jlog("STAT: no recog process, engine inactive\n");
00721     j_request_pause(recog);
00722   }
00723 
00724   /* update initial recognition process status */
00725   for(r=recog->process_list;r;r=r->next) {
00726     if (r->active > 0) {
00727       r->live = TRUE;
00728     } else if (r->active < 0) {
00729       r->live = FALSE;
00730     }
00731     r->active = 0;
00732   }
00733 
00734   /******************************************************************/
00735   /* do recognition for each incoming segment from the input stream */
00736   /******************************************************************/
00737   while (1) {
00738     
00739   start_recog:
00740 
00741     /*************************************/
00742     /* Update recognition process status */
00743     /*************************************/
00744     for(r=recog->process_list;r;r=r->next) {
00745       if (r->active > 0) {
00746         r->live = TRUE;
00747         jlog("STAT: SR%02d %s now active\n", r->config->id, r->config->name);
00748       } else if (r->active < 0) {
00749         r->live = FALSE;
00750         jlog("STAT: SR%02d %s now inactive\n", r->config->id, r->config->name);
00751       }
00752       r->active = 0;
00753     }
00754     if (debug2_flag) {
00755       for(r=recog->process_list;r;r=r->next) {
00756         jlog("DEBUG: %s: SR%02d %s\n", r->live ? "live" : "dead", r->config->id, r->config->name);
00757       }
00758     }
00759     /* check if any process is live */
00760     if (recog->process_active) {
00761       ok_p = FALSE;
00762       for(r=recog->process_list;r;r=r->next) {
00763         if (r->live) ok_p = TRUE;
00764       }
00765       if (!ok_p) {              /* no process is alive */
00766         /* make whole process as inactive */
00767         jlog("STAT: all recog process inactive, pause engine now\n");
00768         j_request_pause(recog);
00769       }
00770     }
00771 
00772     /* Check whether process status was changed while in the last run */
00773     if (recog->process_online != recog->process_active) {
00774       recog->process_online = recog->process_active;
00775       if (recog->process_online) callback_exec(CALLBACK_EVENT_PROCESS_ONLINE, recog);
00776       else callback_exec(CALLBACK_EVENT_PROCESS_OFFLINE, recog);
00777     }
00778     /* execute poll callback */
00779     if (recog->process_active) {
00780       callback_exec(CALLBACK_POLL, recog);
00781     }
00782     /* reset reload flag here */
00783     j_reset_reload(recog);
00784 
00785     if (!recog->process_active) {
00786       /* now sleeping, return */
00787       /* in the next call, we will resume from here */
00788       return 1;
00789     }
00790     /* update process status */
00791     if (recog->process_online != recog->process_active) {
00792       recog->process_online = recog->process_active;
00793       if (recog->process_online) callback_exec(CALLBACK_EVENT_PROCESS_ONLINE, recog);
00794       else callback_exec(CALLBACK_EVENT_PROCESS_OFFLINE, recog);
00795     }
00796 
00797     /*********************************************************/
00798     /* check for grammar to change, and rebuild if necessary */
00799     /*********************************************************/
00800     for(lm=recog->lmlist;lm;lm=lm->next) {
00801       if (lm->lmtype == LM_DFA) {
00802         multigram_update(lm); /* some modification occured if return TRUE*/
00803       }
00804     }
00805     for(r=recog->process_list;r;r=r->next) {
00806       if (!r->live) continue;
00807       if (r->lmtype == LM_DFA && r->lm->global_modified) {
00808         multigram_build(r);
00809       }
00810     }
00811     for(lm=recog->lmlist;lm;lm=lm->next) {
00812       if (lm->lmtype == LM_DFA) lm->global_modified = FALSE;
00813     }
00814 
00815     ok_p = FALSE;
00816     for(r=recog->process_list;r;r=r->next) {
00817       if (!r->live) continue;
00818       if (r->lmtype == LM_DFA) {
00819         if (r->lm->winfo == NULL ||
00820             (r->lmvar == LM_DFA_GRAMMAR && r->lm->dfa == NULL)) {
00821           /* make this instance inactive */
00822           r->active = -1;
00823           ok_p = TRUE;
00824         }
00825       }
00826     }
00827     if (ok_p) {                 /* at least one instance has no grammar */
00828       goto start_recog;
00829     }
00830 
00831 
00832     /******************/
00833     /* start 1st pass */
00834     /******************/
00835     if (on_the_fly) {
00836 
00837       /********************************************/
00838       /* REALTIME ON-THE-FLY DECODING OF 1ST-PASS */
00839       /********************************************/
00840       /* store, analysis and search in a pipeline  */
00841       /* main function is RealTimePipeLine() at realtime-1stpass.c, and
00842          it will be periodically called for each incoming input segment
00843          from the AD-in function adin_go().  RealTimePipeLine() will be
00844          called as a callback function from adin_go() */
00845       /* after this part, directly jump to the beginning of the 2nd pass */
00846       
00847       if (recog->process_segment) {
00848         /*****************************************************************/
00849         /* short-pause segmentation: process last remaining frames first */
00850         /*****************************************************************/
00851         /* last was segmented by short pause */
00852         /* the margin segment in the last input will be re-processed first,
00853            and then the speech input will be processed */
00854         /* process the last remaining parameters */
00855         ret = RealTimeResume(recog);
00856         if (ret < 0) {          /* error end in the margin */
00857           jlog("ERROR: failed to process last remaining samples on RealTimeResume\n"); /* exit now! */
00858           return -1;
00859         }
00860         if (ret != 1) { /* if segmented again in the margin, not process the rest */
00861           /* last parameters has been processed, so continue with the
00862              current input as normal */
00863           /* process the incoming input */
00864           if (jconf->input.type == INPUT_WAVEFORM) {
00865             /* get speech and process it on real-time */
00866             ret = adin_go(RealTimePipeLine, callback_check_in_adin, recog);
00867           } else {
00868             /* get feature vector and process it */
00869             ret = mfcc_go(recog, callback_check_in_adin);
00870           }
00871           if (ret < 0) {                /* error end in adin_go */
00872             if (ret == -2 || recog->process_want_terminate) {
00873               /* terminated by callback */
00874               RealTimeTerminate(recog);
00875               /* reset param */
00876               for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
00877                 param_init_content(mfcc->param);
00878               }
00879               /* execute callback at end of pass1 */
00880               if (recog->triggered) {
00881                 callback_exec(CALLBACK_EVENT_PASS1_END, recog);
00882                 /* output result terminate */
00883                 result_error(recog, J_RESULT_STATUS_TERMINATE);
00884               }
00885               goto end_recog; /* cancel this recognition */
00886             }
00887             jlog("ERROR: an error occured at on-the-fly 1st pass decoding\n");          /* exit now! */
00888             return(-1);
00889           }
00890         }
00891         
00892       } else {
00893 
00894         /***********************************************************/
00895         /* last was not segmented, process the new incoming input  */
00896         /***********************************************************/
00897         /* end of this input will be determined by either end of stream
00898            (in case of file input), or silence detection by adin_go(), or
00899            'TERMINATE' command from module (if module mode) */
00900         /* prepare work area for on-the-fly processing */
00901         if (RealTimePipeLinePrepare(recog) == FALSE) {
00902           jlog("ERROR: failed to prepare for on-the-fly 1st pass decoding\n");
00903           return (-1);
00904         }
00905         /* process the incoming input */
00906         if (jconf->input.type == INPUT_WAVEFORM) {
00907           /* get speech and process it on real-time */
00908           ret = adin_go(RealTimePipeLine, callback_check_in_adin, recog);
00909         } else {
00910           /* get feature vector and process it */
00911           ret = mfcc_go(recog, callback_check_in_adin);
00912         }
00913         
00914         if (ret < 0) {          /* error end in adin_go */
00915           if (ret == -2 || recog->process_want_terminate) {     
00916             /* terminated by callback */
00917             RealTimeTerminate(recog);
00918             /* reset param */
00919             for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
00920               param_init_content(mfcc->param);
00921             }
00922             /* execute callback at end of pass1 */
00923             if (recog->triggered) {
00924               callback_exec(CALLBACK_EVENT_PASS1_END, recog);
00925               /* output result terminate */
00926               result_error(recog, J_RESULT_STATUS_TERMINATE);
00927             }
00928             goto end_recog;
00929           }
00930           jlog("ERROR: an error occured at on-the-fly 1st pass decoding\n");          /* exit now! */
00931           return(-1);
00932         }
00933       }
00934       /******************************************************************/
00935       /* speech stream has been processed on-the-fly, and 1st pass ends */
00936       /******************************************************************/
00937       /* last procedure of 1st-pass */
00938       if (RealTimeParam(recog) == FALSE) {
00939         jlog("ERROR: fatal error occured, program terminates now\n");
00940         return -1;
00941       }
00942       
00943 #ifdef BACKEND_VAD
00944       /* if not triggered, skip this segment */
00945       if (recog->jconf->decodeopt.segment && ! recog->triggered) {
00946         goto end_recog;
00947       }
00948 #endif
00949 
00950       /* execute callback for 1st pass result */
00951       /* result.status <0 must be skipped inside callback */
00952       callback_exec(CALLBACK_RESULT_PASS1, recog);
00953 #ifdef WORD_GRAPH
00954       /* result.wg1 == NULL should be skipped inside callback */
00955       callback_exec(CALLBACK_RESULT_PASS1_GRAPH, recog);
00956 #endif
00957       /* execute callback at end of pass1 */
00958       callback_exec(CALLBACK_EVENT_PASS1_END, recog);
00959       /* output frame length */
00960       callback_exec(CALLBACK_STATUS_PARAM, recog);
00961       /* if terminate signal has been received, discard this input */
00962       if (recog->process_want_terminate) {
00963         result_error(recog, J_RESULT_STATUS_TERMINATE);
00964         goto end_recog;
00965       }
00966 
00967       /* END OF ON-THE-FLY INPUT AND DECODING OF 1ST PASS */
00968 
00969     } else {
00970 
00971       /******************/
00972       /* buffered input */
00973       /******************/
00974 
00975       if (jconf->input.type == INPUT_VECTOR) {
00976         /***********************/
00977         /* feature vector input */
00978         /************************/
00979         if (jconf->input.speech_input == SP_MFCFILE) {
00980           /************************/
00981           /* parameter file input */
00982           /************************/
00983           /* parameter type check --- compare the type to that of HMM,
00984              and adjust them if necessary */
00985           if (jconf->input.paramtype_check_flag) {
00986             for(am=recog->amlist;am;am=am->next) {
00987               /* return param itself or new malloced param */
00988               if (param_check_and_adjust(am->hmminfo, am->mfcc->param, verbose_flag) == -1) {   /* failed */
00989                 
00990                 for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
00991                   param_init_content(mfcc->param);
00992                 }
00993                 /* tell failure */
00994                 result_error(recog, J_RESULT_STATUS_FAIL);
00995                 goto end_recog;
00996               }
00997             }
00998           }
00999           /* whole input is already read, so set input status to end of stream */
01000           /* and jump to the start point of 1st pass */
01001           ret = 0;
01002         }
01003       } else {
01004         /*************************/
01005         /* buffered speech input */
01006         /*************************/
01007         if (!recog->process_segment) { /* no segment left */
01008 
01009           /****************************************/
01010           /* store raw speech samples to speech[] */
01011           /****************************************/
01012           recog->speechlen = 0;
01013           for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
01014             param_init_content(mfcc->param);
01015           }
01016           /* tell module to start recording */
01017           /* the "adin_cut_callback_store_buffer" simply stores
01018              the input speech to a buffer "speech[]" */
01019           /* end of this input will be determined by either end of stream
01020              (in case of file input), or silence detection by adin_go(), or
01021              'TERMINATE' command from module (if module mode) */
01022           ret = adin_go(adin_cut_callback_store_buffer, callback_check_in_adin, recog);
01023           if (ret < 0) {                /* error end in adin_go */
01024             if (ret == -2 || recog->process_want_terminate) {
01025               /* terminated by module */
01026               /* output fail */
01027               result_error(recog, J_RESULT_STATUS_TERMINATE);
01028               goto end_recog;
01029             }
01030             jlog("ERROR: an error occured while recording input\n");
01031             return -1;
01032           }
01033           
01034           /* output recorded length */
01035           seclen = (float)recog->speechlen / (float)jconf->input.sfreq;
01036           jlog("STAT: %d samples (%.2f sec.)\n", recog->speechlen, seclen);
01037           
01038           /* -rejectshort 指定時, 入力が指定時間以下であれば
01039              ここで入力を棄却する */
01040           /* when using "-rejectshort", and input was shorter than
01041              specified, reject the input here */
01042           if (jconf->reject.rejectshortlen > 0) {
01043             if (seclen * 1000.0 < jconf->reject.rejectshortlen) {
01044               result_error(recog, J_RESULT_STATUS_REJECT_SHORT);
01045               goto end_recog;
01046             }
01047           }
01048         
01049           /**********************************************/
01050           /* acoustic analysis and encoding of speech[] */
01051           /**********************************************/
01052           jlog("STAT: ### speech analysis (waveform -> MFCC)\n");
01053           /* CMN will be computed for the whole buffered input */
01054           if (wav2mfcc(recog->speech, recog->speechlen, recog) == FALSE) {
01055             /* error end, end stream */
01056             ret = -1;
01057             /* tell failure */
01058             result_error(recog, J_RESULT_STATUS_FAIL);
01059             goto end_recog;
01060           }
01061           
01062           /* if terminate signal has been received, cancel this input */
01063           if (recog->process_want_terminate) {
01064             result_error(recog, J_RESULT_STATUS_TERMINATE);
01065             goto end_recog;
01066           }
01067           
01068           /* output frame length */
01069           callback_exec(CALLBACK_STATUS_PARAM, recog);
01070         }
01071       }
01072 
01073 #ifdef ENABLE_PLUGIN
01074       /* call post-process plugin if exist */
01075       plugin_exec_vector_postprocess_all(recog->mfcclist->param);
01076 #endif
01077 
01078       /******************************************************/
01079       /* 1st-pass --- backward search to compute heuristics */
01080       /******************************************************/
01081       if (!jconf->decodeopt.realtime_flag) {
01082         /* prepare for outprob cache for each HMM state and time frame */
01083         /* assume all MFCCCalc has params of the same sample num */
01084         for(am=recog->amlist;am;am=am->next) {
01085           outprob_prepare(&(am->hmmwrk), am->mfcc->param->samplenum);
01086         }
01087       }
01088       
01089       /* if terminate signal has been received, cancel this input */
01090       if (recog->process_want_terminate) {
01091         result_error(recog, J_RESULT_STATUS_TERMINATE);
01092         goto end_recog;
01093       }
01094     
01095       /* execute computation of left-to-right backtrellis */
01096       if (get_back_trellis(recog) == FALSE) {
01097         jlog("ERROR: fatal error occured, program terminates now\n");
01098         return -1;
01099       }
01100 #ifdef BACKEND_VAD
01101       /* if not triggered, skip this segment */
01102       if (recog->jconf->decodeopt.segment && ! recog->triggered) {
01103         goto end_recog;
01104       }
01105 #endif
01106       
01107       /* execute callback for 1st pass result */
01108       /* result.status <0 must be skipped inside callback */
01109       callback_exec(CALLBACK_RESULT_PASS1, recog);
01110 #ifdef WORD_GRAPH
01111       /* result.wg1 == NULL should be skipped inside callback */
01112       callback_exec(CALLBACK_RESULT_PASS1_GRAPH, recog);
01113 #endif
01114       
01115       /* execute callback at end of pass1 */
01116       if (recog->triggered) {
01117         callback_exec(CALLBACK_EVENT_PASS1_END, recog);
01118       }
01119 
01120       /* END OF BUFFERED 1ST PASS */
01121 
01122     }
01123 
01124     /**********************************/
01125     /* end processing of the 1st-pass */
01126     /**********************************/
01127     /* on-the-fly 1st pass processing will join here */
01128     
01129     /* -rejectshort 指定時, 入力が指定時間以下であれば探索失敗として */
01130     /* 第2パスを実行せずにここで終了する */
01131     /* when using "-rejectshort", and input was shorter than the specified
01132        length, terminate search here and output recognition failure */
01133     if (jconf->reject.rejectshortlen > 0) {
01134       mseclen = (float)recog->mfcclist->param->samplenum * (float)jconf->input.period * (float)jconf->input.frameshift / 10000.0;
01135       if (mseclen < jconf->reject.rejectshortlen) {
01136         result_error(recog, J_RESULT_STATUS_REJECT_SHORT);
01137         goto end_recog;
01138       }
01139     }
01140 #ifdef POWER_REJECT
01141     if (power_reject(recog)) {
01142       result_error(recog, J_RESULT_STATUS_REJECT_POWER);
01143       goto end_recog;
01144     }
01145 #endif
01146     
01147     /* if terminate signal has been received, cancel this input */
01148     if (recog->process_want_terminate) {
01149       result_error(recog, J_RESULT_STATUS_TERMINATE);
01150       goto end_recog;
01151     }
01152     
01153     /* if GMM is specified and result are to be rejected, terminate search here */
01154     if (jconf->reject.gmm_reject_cmn_string != NULL) {
01155       if (! gmm_valid_input(recog)) {
01156         result_error(recog, J_RESULT_STATUS_REJECT_GMM);
01157         goto end_recog;
01158       }
01159     }
01160 
01161     /* for instances with "-1pass", copy 1st pass result as final */
01162     /* execute stack-decoding search */
01163     /* they will be skipepd in the next pass */
01164     for(r=recog->process_list;r;r=r->next) {
01165       if (!r->live) continue;
01166       /* skip if 1st pass was failed */
01167       if (r->result.status < 0) continue;
01168       /* already stored on word recognition, so skip this */
01169       if (r->lmvar == LM_DFA_WORD) continue;
01170       if (r->config->compute_only_1pass) {
01171         if (verbose_flag) {
01172           jlog("%02d %s: \"-1pass\" specified, output 1st pass result as a final result\n", r->config->id, r->config->name);
01173         }
01174         /* prepare result storage */
01175         result_sentence_malloc(r, 1);
01176         /* finalize result when no hypothesis was obtained */
01177         pass2_finalize_on_no_result(r, TRUE);
01178       }
01179     }
01180 
01181     /***********************************************/
01182     /* 2nd-pass --- forward search with heuristics */
01183     /***********************************************/
01184     pass2_p = FALSE;
01185     for(r=recog->process_list;r;r=r->next) {
01186       if (!r->live) continue;
01187       /* if [-1pass] is specified, skip 2nd pass */
01188       if (r->config->compute_only_1pass) continue;
01189       /* if search already failed on 1st pass, skip 2nd pass */
01190       if (r->result.status < 0) continue;
01191       pass2_p = TRUE;
01192     }
01193     if (pass2_p) callback_exec(CALLBACK_EVENT_PASS2_BEGIN, recog);
01194 
01195 #if !defined(PASS2_STRICT_IWCD) || defined(FIX_35_PASS2_STRICT_SCORE)    
01196     /* adjust trellis score not to contain outprob of the last frames */
01197     for(r=recog->process_list;r;r=r->next) {
01198       if (!r->live) continue;
01199       /* if [-1pass] is specified, skip 2nd pass */
01200       if (r->config->compute_only_1pass) continue;
01201       /* if search already failed on 1st pass, skip 2nd pass */
01202       if (r->result.status < 0) continue;
01203       if (! r->am->hmminfo->multipath) {
01204         bt_discount_pescore(r->wchmm, r->backtrellis, r->am->mfcc->param);
01205       }
01206 #ifdef LM_FIX_DOUBLE_SCORING
01207       if (r->lmtype == LM_PROB) {
01208         bt_discount_lm(r->backtrellis);
01209       }
01210 #endif
01211     }
01212 #endif
01213     
01214     /* execute stack-decoding search */
01215     for(r=recog->process_list;r;r=r->next) {
01216       if (!r->live) continue;
01217       /* if [-1pass] is specified, just copy from 1st pass result */
01218       if (r->config->compute_only_1pass) continue;
01219       /* if search already failed on 1st pass, skip 2nd pass */
01220       if (r->result.status < 0) continue;
01221       /* prepare result storage */
01222       if (r->lmtype == LM_DFA && r->config->output.multigramout_flag) {
01223         result_sentence_malloc(r, r->config->output.output_hypo_maxnum * multigram_get_all_num(r->lm));
01224       } else {
01225         result_sentence_malloc(r, r->config->output.output_hypo_maxnum);
01226       }
01227       /* do 2nd pass */
01228       if (r->lmtype == LM_PROB) {
01229         wchmm_fbs(r->am->mfcc->param, r, 0, 0);
01230       } else if (r->lmtype == LM_DFA) {
01231         if (r->config->output.multigramout_flag) {
01232           /* execute 2nd pass multiple times for each grammar sequencially */
01233           /* to output result for each grammar */
01234           MULTIGRAM *m;
01235           boolean has_success = FALSE;
01236           for(m = r->lm->grammars; m; m = m->next) {
01237             if (m->active) {
01238               jlog("STAT: execute 2nd pass limiting words for gram #%d\n", m->id);
01239               wchmm_fbs(r->am->mfcc->param, r, m->cate_begin, m->dfa->term_num);
01240               if (r->result.status == J_RESULT_STATUS_SUCCESS) {
01241                 has_success = TRUE;
01242               }
01243             }
01244           }
01245           r->result.status = (has_success == TRUE) ? J_RESULT_STATUS_SUCCESS : J_RESULT_STATUS_FAIL;
01246         } else {
01247           /* only the best among all grammar will be output */
01248           wchmm_fbs(r->am->mfcc->param, r, 0, r->lm->dfa->term_num);
01249         }
01250       }
01251     }
01252 
01253     /* do forced alignment if needed */
01254     for(r=recog->process_list;r;r=r->next) {
01255       if (!r->live) continue;
01256       /* if search failed on 2nd pass, skip this */
01257       if (r->result.status < 0) continue;
01258       /* do needed alignment */
01259       do_alignment_all(r, r->am->mfcc->param);
01260     }
01261 
01262     /* output result */
01263     callback_exec(CALLBACK_RESULT, recog);
01264 #ifdef ENABLE_PLUGIN
01265     plugin_exec_process_result(recog);
01266 #endif
01267     /* output graph */
01268     /* r->result.wg == NULL should be skipped inside the callback */
01269     ok_p = FALSE;
01270     for(r=recog->process_list;r;r=r->next) {
01271       if (!r->live) continue;
01272       if (r->config->compute_only_1pass) continue;
01273       if (r->result.status < 0) continue;
01274       if (r->config->graph.lattice) ok_p = TRUE;
01275     }
01276     if (ok_p) callback_exec(CALLBACK_RESULT_GRAPH, recog);
01277     /* output confnet */
01278     /* r->result.confnet == NULL should be skipped inside the callback */
01279     ok_p = FALSE;
01280     for(r=recog->process_list;r;r=r->next) {
01281       if (!r->live) continue;
01282       if (r->config->compute_only_1pass) continue;
01283       if (r->result.status < 0) continue;
01284       if (r->config->graph.confnet) ok_p = TRUE;
01285     }
01286     if (ok_p) callback_exec(CALLBACK_RESULT_CONFNET, recog);
01287 
01288     /* clear work area for output */
01289     for(r=recog->process_list;r;r=r->next) {
01290       if (!r->live) continue;
01291       clear_result(r);
01292     }
01293     
01294     /* output end of 2nd pass */
01295     if (pass2_p) callback_exec(CALLBACK_EVENT_PASS2_END, recog);
01296 
01297 #ifdef DEBUG_VTLN_ALPHA_TEST
01298     if (r->am->mfcc->para->vtln_alpha == 1.0) {
01299       /* if vtln parameter remains default, search for VTLN parameter */
01300       vtln_alpha(recog, r);
01301     }
01302 #endif
01303 
01304   end_recog:
01305     /**********************/
01306     /* end of recognition */
01307     /**********************/
01308 
01309     /* update CMN info for next input (in case of realtime wave input) */
01310     if (jconf->input.type == INPUT_WAVEFORM && jconf->decodeopt.realtime_flag) {
01311       for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
01312         if (mfcc->param->samplenum > 0) {
01313           RealTimeCMNUpdate(mfcc, recog);
01314         }
01315       }
01316     }
01317     
01318     process_segment_last = recog->process_segment;
01319     if (jconf->decodeopt.segment) { /* sp-segment mode */
01320       /* param is now shrinked to hold only the processed input, and */
01321       /* the rests are holded in (newly allocated) "rest_param" */
01322       /* if this is the last segment, rest_param is NULL */
01323       /* assume all segmentation are synchronized */
01324       recog->process_segment = FALSE;
01325       for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
01326         if (mfcc->rest_param != NULL) {
01327           /* process the rest parameters in the next loop */
01328           recog->process_segment = TRUE;
01329           free_param(mfcc->param);
01330           mfcc->param = mfcc->rest_param;
01331           mfcc->rest_param = NULL;
01332         }
01333       }
01334     }
01335 
01336     /* callback of recognition end */
01337     if (jconf->decodeopt.segment) {
01338 #ifdef BACKEND_VAD
01339       if (recog->triggered) callback_exec(CALLBACK_EVENT_SEGMENT_END, recog);
01340       if (process_segment_last && !recog->process_segment) callback_exec(CALLBACK_EVENT_RECOGNITION_END, recog);
01341 #else
01342       callback_exec(CALLBACK_EVENT_SEGMENT_END, recog);
01343       if (!recog->process_segment) callback_exec(CALLBACK_EVENT_RECOGNITION_END, recog);
01344 #endif
01345     } else {
01346       callback_exec(CALLBACK_EVENT_RECOGNITION_END, recog);
01347     }
01348 
01349 
01350     if (verbose_flag) jlog("\n");
01351     jlog_flush();
01352 
01353     if (jconf->decodeopt.segment) { /* sp-segment mode */
01354       if (recog->process_segment == TRUE) {
01355         if (verbose_flag) jlog("STAT: <<<restart the rest>>>\n\n");
01356       } else {
01357         /* input has reached end of stream, terminate program */
01358         if (ret <= 0 && ret != -2) break;
01359       }
01360     } else {                    /* not sp-segment mode */
01361       /* input has reached end of stream, terminate program */
01362       if (ret <= 0 && ret != -2) break;
01363     }
01364 
01365     /* recognition continues for next (silence-aparted) segment */
01366       
01367   } /* END OF STREAM LOOP */
01368     
01369   /* close the stream */
01370   if (jconf->input.type == INPUT_WAVEFORM) {
01371     if (adin_end(recog->adin) == FALSE) return -1;
01372   }
01373   if (jconf->input.speech_input == SP_MFCMODULE) {
01374     if (mfc_module_end(recog->mfcclist) == FALSE) return -1;
01375   }
01376 
01377   /* return to the opening of input stream */
01378 
01379   return(0);
01380 
01381 }
01382 
01427 int
01428 j_recognize_stream(Recog *recog)
01429 {
01430   int ret;
01431 
01432   do {
01433     
01434     ret = j_recognize_stream_core(recog);
01435 
01436     switch(ret) {
01437     case 1:           /* paused by a callback (stream will continue) */
01438       /* call pause event callbacks */
01439       callback_exec(CALLBACK_EVENT_PAUSE, recog);
01440       /* call pause functions */
01441       /* block until all pause functions exits */
01442       if (! callback_exist(recog, CALLBACK_PAUSE_FUNCTION)) {
01443         jlog("WARNING: pause requested but no pause function specified\n");
01444         jlog("WARNING: engine will resume now immediately\n");
01445       }
01446       callback_exec(CALLBACK_PAUSE_FUNCTION, recog);
01447       /* after here, recognition will restart for the rest input */
01448       /* call resume event callbacks */
01449       callback_exec(CALLBACK_EVENT_RESUME, recog);
01450       break;
01451     case 0:                     /* end of stream */
01452       /* go on to the next input */
01453       break;
01454     case -1:            /* error */
01455       jlog("ERROR: an error occured while recognition, terminate stream\n");
01456       return -1;
01457     }
01458   } while (ret == 1);           /* loop when paused by callback */
01459 
01460   return 0;
01461 }
01462 
01463 /* end of file */

Juliusに対してThu Jul 23 12:16:23 2009に生成されました。  doxygen 1.5.1