00001 
00017 
00018 
00019 
00020 
00021 
00022 
00023 
00024 
00025 #define GLOBAL_VARIABLE_DEFINE  
00026 #include <julius.h>
00027 #include <signal.h>
00028 #if defined(_WIN32) && !defined(__CYGWIN32__)
00029 #include <mbctype.h>
00030 #include <mbstring.h>
00031 #endif
00032 
00033 
00034 
00035 #ifdef REPORT_MEMORY_USAGE
00036 
00046 static void
00047 print_mem()
00048 {
00049   char buf[200];
00050   sprintf(buf,"ps -o vsz,rss -p %d",getpid());
00051   system(buf);
00052   j_flushprint();
00053   fflush(stderr);
00054 }
00055 #endif
00056           
00057 
00058 
00066 static SP16 *overflowed_samples = NULL;
00071 static int overflowed_samplenum;
00102 int
00103 adin_cut_callback_store_buffer(SP16 *now, int len)
00104 {
00105   if (module_mode) {
00106     
00107     msock_check_and_process_command();
00108   }
00109   if (speechlen == 0) {         
00110     
00111     status_recstart();
00112     if (module_mode) {
00113       
00114       if (module_wants_terminate() ||
00115           !module_is_active()) { 
00116         return(-2);
00117       }
00118     }
00119     if (overflowed_samples) {   
00120       
00121       memcpy(&(speech[0]), overflowed_samples, sizeof(SP16)*overflowed_samplenum);
00122       speechlen += overflowed_samplenum;
00123       free(overflowed_samples);
00124       overflowed_samples = NULL;
00125     }
00126   }
00127   if (speechlen + len > MAXSPEECHLEN) {
00128     
00129     j_printerr("Warning: too long input (> %d samples), segmented now\n", MAXSPEECHLEN);
00130     
00131     {
00132       int getlen, restlen;
00133       getlen = MAXSPEECHLEN - speechlen;
00134       restlen = len - getlen;
00135       overflowed_samples = (SP16 *)mymalloc(sizeof(SP16)*restlen);
00136       memcpy(overflowed_samples, &(now[getlen]), restlen * sizeof(SP16));
00137       if (record_dirname != NULL) {
00138         record_sample_write(&(now[getlen]), restlen);
00139       }
00140       overflowed_samplenum = restlen;
00141       memcpy(&(speech[speechlen]), now, getlen * sizeof(SP16));
00142       if (record_dirname != NULL) {
00143         record_sample_write(now, getlen);
00144       }
00145       speechlen += getlen;
00146     }
00147     return(1);                  
00148   }
00149   if (module_mode) {
00150     
00151     if (module_wants_terminate()) {
00152       speechlen = 0;
00153       return(-2);
00154     }
00155   }
00156   
00157   memcpy(&(speech[speechlen]), now, len * sizeof(SP16));
00158   if (record_dirname != NULL) {
00159     record_sample_write(now, len);
00160   }
00161   speechlen += len;
00162   return(0);                    
00163 }
00164 
00165 
00201 static char *
00202 mfcfilelist_nextfile()
00203 {
00204   static FILE *mfclist = NULL;  
00205   static char *buf;
00206   
00207   if (mfclist == NULL) {        
00208     if ((mfclist = fopen(inputlist_filename, "r")) == NULL) { 
00209       j_error("inputlist open error\n");
00210     }
00211   }
00212   buf = mymalloc(MAXLINELEN);
00213   while(getl_fp(buf, MAXLINELEN, mfclist) != NULL) {
00214     if (buf[0] == '\0') continue; 
00215     if (buf[0] == '#') continue; 
00216     
00217     return buf;
00218   }
00219   
00220   free(buf);
00221   fclose(mfclist);
00222   mfclist = NULL;
00223   return NULL;
00224 }
00225 
00226                                       
00227 
00228 
00229 
00230 
00252 void
00253 main_recognition_loop()
00254 {
00255   char *speechfilename; 
00256   HTK_Param *param = NULL;              
00257   HTK_Param *selected_param;
00258   int ret;
00259   int file_counter;
00260   float seclen, mseclen;
00261   boolean process_online = FALSE; 
00262 
00263   
00264   
00265   
00266   
00267   final_fusion();
00268 
00269   
00270   adin_initialize();
00271   
00272   
00273   print_info();
00274 
00275   
00276   file_counter = 0;
00277 
00278 #ifdef VISUALIZE
00279   
00280   visual_init();
00281 #endif
00282 
00283   
00284   
00286   
00287   
00288   for (;;) {
00289 
00290     j_printf("\n");
00291     if (verbose_flag) j_printf("------\n");
00292     j_flushprint();
00293 
00294     
00295     
00296     
00297     if (speech_input == SP_MFCFILE) {
00298       
00299       
00300       
00301       VERMES("### read analyzed parameter\n");
00302       
00303       if (inputlist_filename != NULL) { 
00304         speechfilename = mfcfilelist_nextfile();
00305       } else {
00306         speechfilename = get_line("enter MFCC filename->");
00307       }
00308       if (speechfilename == NULL) {
00309         
00310         j_printerr("%d files processed\n", file_counter);
00311 #ifdef REPORT_MEMORY_USAGE
00312         print_mem();
00313 #endif
00314         j_exit();
00315       }
00316       if (verbose_flag) j_printf("\ninput MFCC file: %s\n",speechfilename);
00317       
00318       param = new_param();
00319       if (rdparam(speechfilename, param) == FALSE) {
00320         j_printerr("error in reading parameter file: %s\n",speechfilename);
00321         free(speechfilename);
00322         free_param(param);
00323         continue;
00324       }
00325       
00326       if (strip_zero_sample) {
00327         param_strip_zero(param);
00328       }
00329       free(speechfilename);
00330       
00331       status_param(param);
00332       
00333       file_counter++;
00334     } else {                    
00335       
00336       
00337       
00338       VERMES("### read waveform input\n");
00339       
00340       if (adin_begin() == FALSE) {
00341         
00342         if (speech_input == SP_RAWFILE) {
00343           j_printerr("%d files processed\n", file_counter);
00344           j_exit();  
00345         } else if (speech_input == SP_STDIN) {
00346           j_exit();  
00347         } else {
00348           j_error("failed to begin input stream\n");
00349         }
00350       }
00351       
00352       if (speech_input == SP_RAWFILE) {
00353         file_counter++;
00354       }
00355     }
00356     
00357 #ifdef USE_DFA
00358     
00359     if (module_mode) {
00360       if (dfa == NULL || winfo == NULL) { 
00361         msock_exec_command("PAUSE");
00362       }
00363     }
00364 #endif
00365 
00366     if (!module_mode) {
00367       
00368       process_online = TRUE;
00369       status_process_online();
00370     }
00371   
00372     
00373     
00374     
00375     while (1) {
00376 
00377     start_recog:
00378 
00379       if (module_mode) {
00380         
00381         
00382         
00383         
00384 
00385 
00386 
00387 
00388 
00389         
00390         if (process_online != module_is_active()) {
00391           process_online = module_is_active();
00392           if (process_online) status_process_online();
00393           else status_process_offline();
00394         }
00395         if (module_is_active()) {
00396           
00397           msock_check_and_process_command();
00398         }
00399         module_reset_reload();  
00400         while (! module_is_active()) {    
00401           
00402           
00403           
00404           msock_process_command();
00405         }
00406         
00407         if (process_online != module_is_active()) {
00408           process_online = module_is_active();
00409           if (process_online) status_process_online();
00410           else status_process_offline();
00411         }
00412 #ifdef USE_DFA
00413         
00414         
00415         
00416         multigram_exec();
00417         if (dfa == NULL || winfo == NULL) { 
00418           msock_exec_command("PAUSE");
00419           goto start_recog;
00420         }
00421 #endif
00422       }
00423 
00424       if (speech_input == SP_MFCFILE) {
00425         
00426         
00427         
00428         
00429         
00430         
00431         
00432 
00433         if (paramtype_check_flag) {
00434           
00435           selected_param = new_param_check_and_adjust(hmminfo, param, verbose_flag);
00436           if (selected_param == NULL) { 
00437             free_param(param);
00438             param = NULL;
00439             goto end_recog;
00440           }
00441           param = selected_param;
00442         }
00443         
00444         
00445         ret = 0;
00446       } else {
00447         
00448         
00449         
00450         if (realtime_flag) {
00451           
00452           
00453           
00454           
00455           
00456 
00457 
00458 
00459           
00460 #ifdef SP_BREAK_CURRENT_FRAME
00461           if (rest_param) {
00462             
00463             
00464             
00465             
00466             
00467             
00468             ret = RealTimeResume();
00469             if (ret < 0) {              
00470               j_error("error in resuming last fragment\n"); 
00471             }
00472             if (ret != 1) {     
00473               
00474 
00475               
00476               status_recready();
00477               if (module_mode) {
00478                 
00479                 ret = adin_go(RealTimePipeLine, msock_check_in_adin);
00480               } else {
00481                 
00482                 ret = adin_go(RealTimePipeLine, NULL);
00483               }
00484               if (ret < 0) {            
00485                 if (module_mode && (ret == -2 || module_wants_terminate())) {   
00486                   RealTimeTerminate();
00487                   param = NULL;
00488                   goto end_recog; 
00489                 }
00490                 j_error("error in adin_go\n");          
00491               }
00492             }
00493             
00494           } else {
00495             
00496 #endif
00497             
00498             
00499             
00500             
00501 
00502 
00503             
00504             RealTimePipeLinePrepare();
00505             
00506             status_recready();
00507             
00508             if (module_mode) {
00509               ret = adin_go(RealTimePipeLine, msock_check_in_adin);
00510             } else {
00511               ret = adin_go(RealTimePipeLine, NULL); 
00512             }
00513             if (ret < 0) {              
00514               if (module_mode && (ret == -2 || module_wants_terminate())) {     
00515                 RealTimeTerminate();
00516                 param = NULL;
00517                 goto end_recog;
00518               }
00519               j_error("error in adin_go\n");            
00520             }
00521 #ifdef SP_BREAK_CURRENT_FRAME
00522           }
00523 #endif
00524           
00525           
00526           
00527           
00528           param = RealTimeParam(&backmax);
00529           
00530           status_recend();
00531           
00532           status_param(param);
00533           if (module_mode) {
00534             
00535             if (module_wants_terminate()) goto end_recog;
00536           }
00537           
00538           goto end_1pass;
00539           
00540         } 
00541         
00542         
00543         
00544         
00545 #ifdef SP_BREAK_CURRENT_FRAME
00546         if (rest_param == NULL) { 
00547 #endif
00548           
00549           
00550           
00551           speechlen = 0;
00552           param = NULL;
00553           
00554           if (record_dirname != NULL) {
00555             record_sample_open();
00556           }
00557           
00558           status_recready();
00559           if (module_mode) {
00560             
00561             
00562 
00563             
00564 
00565 
00566             ret = adin_go(adin_cut_callback_store_buffer, msock_check_in_adin);
00567           } else {
00568             ret = adin_go(adin_cut_callback_store_buffer, NULL);
00569           }
00570           if (ret < 0) {                
00571             if (module_mode && (ret == -2 || module_wants_terminate())) {       
00572               goto end_recog;
00573             }
00574             j_error("error in adin_go\n");              
00575           }
00576           
00577           status_recend();
00578 
00579           
00580           seclen = (float)speechlen / (float)para.smp_freq;
00581           j_printf("%d samples (%.2f sec.)\n", speechlen, seclen);
00582 
00583           
00584 
00585           
00586 
00587           if (rejectshortlen > 0) {
00588             if (seclen * 1000.0 < rejectshortlen) {
00589               result_rejected("too short input");
00590               goto end_recog;
00591             }
00592           }
00593       
00594           
00595           
00596           
00597           VERMES("### speech analysis (waveform -> MFCC)\n");
00598           
00599           param = new_wav2mfcc(speech, speechlen);
00600           if (param == NULL) {
00601             ret = -1;
00602             goto end_recog;
00603           }
00604 
00605           
00606           if (module_mode && module_wants_terminate()) goto end_recog;
00607 
00608           
00609           status_param(param);
00610 
00611 #ifdef SP_BREAK_CURRENT_FRAME
00612         }
00613 #endif
00614       } 
00615       
00616 
00617       
00618       
00619       
00620       
00621 #ifdef USE_NGRAM
00622       VERMES("### Recognition: 1st pass (LR beam with 2-gram)\n");
00623 #else
00624       VERMES("### Recognition: 1st pass (LR beam with word-pair grammar)\n");
00625 #endif
00626 
00627 
00628 
00629 
00630 
00631 
00632 
00633 
00634 
00635 
00636 
00637 
00638 
00639 
00640       if (!realtime_flag) {
00641         
00642         outprob_prepare(param->samplenum);
00643       }
00644 
00645       if (module_mode) {
00646         
00647         if (module_wants_terminate()) goto end_recog;
00648       }
00649 
00650       
00651       get_back_trellis(param, wchmm, &backtrellis, &backmax);
00652 
00653     end_1pass:
00654 
00655       
00656       
00657       
00658       
00659       
00660       
00661       
00662       
00663 
00664       if (rejectshortlen > 0) {
00665         mseclen = (float)param->samplenum * (float)para.smp_period * (float)para.frameshift / 10000.0;
00666         if (mseclen < rejectshortlen) {
00667           result_rejected("too short input");
00668           goto end_recog;
00669         }
00670       }
00671   
00672       
00673       if (compute_only_1pass) {
00674         goto end_recog;
00675       }
00676 
00677       
00678       if (backmax == LOG_ZERO) {
00679         
00680         result_pass2_failed(wchmm->winfo);
00681         ret = -1;
00682         goto end_recog;
00683       }
00684 
00685       
00686       if (module_mode && module_wants_terminate()) goto end_recog;
00687 
00688       
00689       if (gmm_reject_cmn_string != NULL) {
00690         if (! gmm_valid_input()) {
00691           result_rejected("by GMM");
00692           goto end_recog;
00693         }
00694       }
00695       
00696       
00697       
00698 #if !defined(PASS2_STRICT_IWCD) || defined(FIX_35_PASS2_STRICT_SCORE)    
00699       
00700       bt_discount_pescore(wchmm, &backtrellis, param);
00701 #endif
00702 
00703 #ifdef USE_NGRAM
00704       VERMES("### Recognition: 2nd pass (RL heuristic best-first with 3-gram)\n");
00705 #else
00706       VERMES("### Recognition: 2nd pass (RL heuristic best-first with DFA)\n");
00707 #endif
00708 
00709       
00710 #ifdef USE_NGRAM
00711       wchmm_fbs(param, &backtrellis, backmax, stack_size, nbest, hypo_overflow, 0, 0);
00712 #else  
00713       if (multigramout_flag) {
00714         
00715         
00716         MULTIGRAM *m;
00717         for(m = gramlist; m; m = m->next) {
00718           if (m->active) {
00719             j_printf("## search for gram #%d\n", m->id);
00720             wchmm_fbs(param, &backtrellis, backmax, stack_size, nbest, hypo_overflow, m->cate_begin, m->dfa->term_num);
00721           }
00722         }
00723       } else {
00724         
00725         wchmm_fbs(param, &backtrellis, backmax, stack_size, nbest, hypo_overflow, 0, dfa->term_num);
00726       }
00727 #endif
00728 
00729     end_recog:
00730       
00731       
00732       
00733 
00734       
00735       if (speech_input != SP_MFCFILE && realtime_flag && param != NULL) {
00736         RealTimeCMNUpdate(param);
00737       }
00738 
00739 #ifdef VISUALIZE
00740       
00741       visual_show(&backtrellis);
00742 #endif
00743 
00744       
00745       if (param != NULL) free_param(param);
00746 
00747       
00748       if (record_dirname != NULL) {
00749         record_sample_close();
00750       }
00751 
00752       VERMES("\n");
00753 
00754 #ifdef SP_BREAK_CURRENT_FRAME
00755       
00756       
00757       
00758       if (rest_param != NULL) {
00759         
00760         VERMES("<<<restart the rest>>>\n\n");
00761         param = rest_param;
00762       } else {
00763         
00764         if (ret <= 0 && ret != -2) break;
00765       }
00766 #else
00767       
00768       if (ret <= 0 && ret != -2) break;
00769 #endif
00770 
00771       
00772       
00773     } 
00774     
00775     
00776 
00777 
00778 
00779 
00780 
00781 
00782     if (speech_input != SP_MFCFILE) {
00783       
00784       adin_end();
00785     }
00786     
00787   }
00788 
00789 }
00790 
00791 
00792 
00793 
00794 
00821 int
00822 main(int argc, char *argv[])
00823 {
00824   
00825   
00826   
00827   
00828   system_bootup();
00829   
00830   opt_parse(argc,argv,NULL);
00831 
00832 #ifdef CHARACTER_CONVERSION
00833   if (j_printf_set_charconv(from_code, to_code) == FALSE) {
00834     j_error("Error: character set conversion setup failed\n");
00835   }
00836 #endif 
00837 
00838   
00839   check_specs();
00840 
00841   
00842   
00843   
00844   if (module_mode) {
00845     
00846     
00847     main_module_loop();
00848   } else {
00849     
00850     main_recognition_loop();
00851   }
00852 
00853   
00854   opt_release();
00855 
00856   return 0;
00857 }