00001 
00017 
00018 
00019 
00020 
00021 
00022 
00023 
00024 
00025 #define GLOBAL_VARIABLE_DEFINE  
00026 #include <julius.h>
00027 #include <signal.h>
00028 #if defined(_WIN32) && !defined(__CYGWIN32__)
00029 #include <mbctype.h>
00030 #include <mbstring.h>
00031 #endif
00032 
00033 
00034 
00035 #ifdef REPORT_MEMORY_USAGE
00036 
00046 static void
00047 print_mem()
00048 {
00049   char buf[200];
00050   sprintf(buf,"ps -o vsz,rss -p %d",getpid());
00051   system(buf);
00052   j_flushprint();
00053   fflush(stderr);
00054 }
00055 #endif
00056           
00057 
00058 
00066 static SP16 *overflowed_samples = NULL;
00071 static int overflowed_samplenum;
00102 int
00103 adin_cut_callback_store_buffer(SP16 *now, int len)
00104 {
00105   if (module_mode) {
00106     
00107     msock_check_and_process_command();
00108   }
00109   if (speechlen == 0) {         
00110     
00111     status_recstart();
00112     if (module_mode) {
00113       
00114       if (module_wants_terminate() ||
00115           !module_is_active()) { 
00116         return(-2);
00117       }
00118     }
00119     if (overflowed_samples) {   
00120       
00121       memcpy(&(speech[0]), overflowed_samples, sizeof(SP16)*overflowed_samplenum);
00122       speechlen += overflowed_samplenum;
00123       free(overflowed_samples);
00124       overflowed_samples = NULL;
00125     }
00126   }
00127   if (speechlen + len > MAXSPEECHLEN) {
00128     
00129     j_printerr("Warning: too long input (> %d samples), segmented now\n", MAXSPEECHLEN);
00130     
00131     {
00132       int getlen, restlen;
00133       getlen = MAXSPEECHLEN - speechlen;
00134       restlen = len - getlen;
00135       overflowed_samples = (SP16 *)mymalloc(sizeof(SP16)*restlen);
00136       memcpy(overflowed_samples, &(now[getlen]), restlen * sizeof(SP16));
00137       if (record_dirname != NULL) {
00138         record_sample_write(&(now[getlen]), restlen);
00139       }
00140       overflowed_samplenum = restlen;
00141       memcpy(&(speech[speechlen]), now, getlen * sizeof(SP16));
00142       if (record_dirname != NULL) {
00143         record_sample_write(now, getlen);
00144       }
00145       speechlen += getlen;
00146     }
00147     return(1);                  
00148   }
00149   if (module_mode) {
00150     
00151     if (module_wants_terminate()) {
00152       speechlen = 0;
00153       return(-2);
00154     }
00155   }
00156   
00157   memcpy(&(speech[speechlen]), now, len * sizeof(SP16));
00158   if (record_dirname != NULL) {
00159     record_sample_write(now, len);
00160   }
00161   speechlen += len;
00162   return(0);                    
00163 }
00164 
00165 
00201 static char *
00202 mfcfilelist_nextfile()
00203 {
00204   static FILE *mfclist = NULL;  
00205   static char *buf;
00206   
00207   if (mfclist == NULL) {        
00208     if ((mfclist = fopen(inputlist_filename, "r")) == NULL) { 
00209       j_error("inputlist open error\n");
00210     }
00211   }
00212   buf = mymalloc(MAXLINELEN);
00213   while(getl_fp(buf, MAXLINELEN, mfclist) != NULL) {
00214     if (buf[0] == '\0') continue; 
00215     if (buf[0] == '#') continue; 
00216     
00217     return buf;
00218   }
00219   
00220   free(buf);
00221   fclose(mfclist);
00222   mfclist = NULL;
00223   return NULL;
00224 }
00225 
00226                                       
00227 
00228 
00229 
00230 
00252 void
00253 main_recognition_loop()
00254 {
00255   char *speechfilename; 
00256   HTK_Param *param = NULL;              
00257   HTK_Param *selected_param;
00258   int ret;
00259   int file_counter;
00260   float seclen, mseclen;
00261   boolean process_online = FALSE; 
00262 
00263   
00264   
00265   
00266   
00267   adin_initialize();
00268   
00269   
00270   final_fusion();
00271 
00272   
00273   print_info();
00274 
00275   
00276   file_counter = 0;
00277 
00278 #ifdef VISUALIZE
00279   
00280   visual_init();
00281 #endif
00282 
00283   
00284   
00286   
00287   
00288   for (;;) {
00289 
00290     j_printf("\n");
00291     if (verbose_flag) j_printf("------\n");
00292     j_flushprint();
00293 
00294     
00295     
00296     
00297     if (speech_input == SP_MFCFILE) {
00298       
00299       
00300       
00301       VERMES("### read analyzed parameter\n");
00302       
00303       if (inputlist_filename != NULL) { 
00304         speechfilename = mfcfilelist_nextfile();
00305       } else {
00306         speechfilename = get_line("enter MFCC filename->");
00307       }
00308       if (speechfilename == NULL) {
00309         
00310         j_printerr("%d files processed\n", file_counter);
00311 #ifdef REPORT_MEMORY_USAGE
00312         print_mem();
00313 #endif
00314         j_exit();
00315       }
00316       if (verbose_flag) j_printf("\ninput MFCC file: %s\n",speechfilename);
00317       
00318       param = new_param();
00319       if (rdparam(speechfilename, param) == FALSE) {
00320         j_printerr("error in reading MFCC file: %s\n",speechfilename);
00321         free(speechfilename);
00322         free_param(param);
00323         continue;
00324       }
00325       
00326       if (strip_zero_sample) {
00327         param_strip_zero(param);
00328       }
00329       free(speechfilename);
00330       
00331       status_param(param);
00332       
00333       file_counter++;
00334     } else {                    
00335       
00336       
00337       
00338       VERMES("### read waveform input\n");
00339       
00340       if (adin_begin() == FALSE) {
00341         
00342         if (speech_input == SP_RAWFILE) {
00343           j_printerr("%d files processed\n", file_counter);
00344           j_exit();  
00345         } else if (speech_input == SP_STDIN) {
00346           j_exit();  
00347         } else {
00348           j_error("failed to begin input stream\n");
00349         }
00350       }
00351       
00352       if (speech_input == SP_RAWFILE) {
00353         file_counter++;
00354       }
00355     }
00356     
00357 #ifdef USE_DFA
00358     
00359     if (module_mode) {
00360       if (dfa == NULL || winfo == NULL) { 
00361         msock_exec_command("PAUSE");
00362       }
00363     }
00364 #endif
00365 
00366     if (!module_mode) {
00367       
00368       process_online = TRUE;
00369       status_process_online();
00370     }
00371   
00372     
00373     
00374     
00375     while (1) {
00376 
00377     start_recog:
00378 
00379       if (module_mode) {
00380         
00381         
00382         
00383         
00384 
00385 
00386 
00387 
00388 
00389         
00390         if (process_online != module_is_active()) {
00391           process_online = module_is_active();
00392           if (process_online) status_process_online();
00393           else status_process_offline();
00394         }
00395         if (module_is_active()) {
00396           
00397           msock_check_and_process_command();
00398         }
00399         module_reset_reload();  
00400         while (! module_is_active()) {    
00401           
00402           
00403           
00404           msock_process_command();
00405         }
00406         
00407         if (process_online != module_is_active()) {
00408           process_online = module_is_active();
00409           if (process_online) status_process_online();
00410           else status_process_offline();
00411         }
00412 #ifdef USE_DFA
00413         
00414         
00415         
00416         multigram_exec();
00417         if (dfa == NULL || winfo == NULL) { 
00418           msock_exec_command("PAUSE");
00419           goto start_recog;
00420         }
00421 #endif
00422       }
00423 
00424       if (speech_input == SP_MFCFILE) {
00425         
00426         
00427         
00428         
00429         
00430         
00431         
00432 
00433         if (paramtype_check_flag) {
00434           
00435           selected_param = new_param_check_and_adjust(hmminfo, param, verbose_flag);
00436           if (selected_param == NULL) { 
00437             free_param(param);
00438             param = NULL;
00439             goto end_recog;
00440           }
00441           param = selected_param;
00442         }
00443         
00444         
00445         ret = 0;
00446       } else {
00447         
00448         
00449         
00450         if (realtime_flag) {
00451           
00452           
00453           
00454           
00455           
00456 
00457 
00458 
00459           
00460 #ifdef SP_BREAK_CURRENT_FRAME
00461           if (rest_param) {
00462             
00463             
00464             
00465             
00466             
00467             
00468             ret = RealTimeResume();
00469             if (ret < 0) {              
00470               j_error("error in resuming last fragment\n"); 
00471             }
00472             if (ret != 1) {     
00473               
00474 
00475               
00476               status_recready();
00477               if (module_mode) {
00478                 
00479                 ret = adin_go(RealTimePipeLine, msock_check_in_adin);
00480               } else {
00481                 
00482                 ret = adin_go(RealTimePipeLine, NULL);
00483               }
00484               if (ret < 0) {            
00485                 if (module_mode && (ret == -2 || module_wants_terminate())) {   
00486                   RealTimeTerminate();
00487                   param = NULL;
00488                   goto end_recog; 
00489                 }
00490                 j_error("error in adin_go\n");          
00491               }
00492             }
00493             
00494           } else {
00495             
00496 #endif
00497             
00498             
00499             
00500             
00501 
00502 
00503             
00504             RealTimePipeLinePrepare();
00505             
00506             status_recready();
00507             
00508             if (module_mode) {
00509               ret = adin_go(RealTimePipeLine, msock_check_in_adin);
00510             } else {
00511               ret = adin_go(RealTimePipeLine, NULL); 
00512             }
00513             if (ret < 0) {              
00514               if (module_mode && (ret == -2 || module_wants_terminate())) {     
00515                 RealTimeTerminate();
00516                 param = NULL;
00517                 goto end_recog;
00518               }
00519               j_error("error in adin_go\n");            
00520             }
00521 #ifdef SP_BREAK_CURRENT_FRAME
00522           }
00523 #endif
00524           
00525           
00526           
00527           
00528           param = RealTimeParam(&backmax);
00529           
00530           status_recend();
00531           
00532           status_param(param);
00533           if (module_mode) {
00534             
00535             if (module_wants_terminate()) goto end_recog;
00536           }
00537           
00538           goto end_1pass;
00539           
00540         } 
00541         
00542         
00543         
00544         
00545 #ifdef SP_BREAK_CURRENT_FRAME
00546         if (rest_param == NULL) { 
00547 #endif
00548           
00549           
00550           
00551           speechlen = 0;
00552           param = NULL;
00553           
00554           if (record_dirname != NULL) {
00555             record_sample_open();
00556           }
00557           
00558           status_recready();
00559           if (module_mode) {
00560             
00561             
00562 
00563             
00564 
00565 
00566             ret = adin_go(adin_cut_callback_store_buffer, msock_check_in_adin);
00567           } else {
00568             ret = adin_go(adin_cut_callback_store_buffer, NULL);
00569           }
00570           if (ret < 0) {                
00571             if (module_mode && (ret == -2 || module_wants_terminate())) {       
00572               goto end_recog;
00573             }
00574             j_error("error in adin_go\n");              
00575           }
00576           
00577           status_recend();
00578 
00579           
00580           seclen = (float)speechlen / (float)smpFreq;
00581           j_printf("%d samples (%.2f sec.)\n", speechlen, seclen);
00582 
00583           
00584 
00585           
00586 
00587           if (rejectshortlen > 0) {
00588             if (seclen * 1000.0 < rejectshortlen) {
00589               result_rejected("too short input");
00590               goto end_recog;
00591             }
00592           }
00593       
00594           
00595           
00596           
00597           VERMES("### speech analysis (waveform -> MFCC)\n");
00598           
00599           param = new_wav2mfcc(speech, speechlen);
00600           if (param == NULL) goto end_recog;
00601 
00602           
00603           if (module_mode && module_wants_terminate()) goto end_recog;
00604 
00605           
00606           status_param(param);
00607 
00608 #ifdef SP_BREAK_CURRENT_FRAME
00609         }
00610 #endif
00611       } 
00612       
00613 
00614       
00615       
00616       
00617       
00618 #ifdef USE_NGRAM
00619       VERMES("### Recognition: 1st pass (LR beam with 2-gram)\n");
00620 #else
00621       VERMES("### Recognition: 1st pass (LR beam with word-pair grammar)\n");
00622 #endif
00623 
00624 
00625 
00626 
00627 
00628 
00629 
00630 
00631 
00632 
00633 
00634 
00635 
00636 
00637       if (!realtime_flag) {
00638         
00639         outprob_prepare(param->samplenum);
00640       }
00641 
00642       if (module_mode) {
00643         
00644         if (module_wants_terminate()) goto end_recog;
00645       }
00646 
00647       
00648       get_back_trellis(param, wchmm, &backtrellis, &backmax);
00649 
00650     end_1pass:
00651 
00652       
00653       
00654       
00655       
00656       
00657       
00658       
00659       
00660 
00661       if (rejectshortlen > 0) {
00662         mseclen = (float)param->samplenum * (float)smpPeriod * (float)fshift / 10000.0;
00663         if (mseclen < rejectshortlen) {
00664           result_rejected("too short input");
00665           goto end_recog;
00666         }
00667       }
00668   
00669       
00670       if (compute_only_1pass) {
00671         goto end_recog;
00672       }
00673 
00674       
00675       if (backmax == LOG_ZERO) {
00676         
00677         result_pass2_failed(wchmm->winfo);
00678         goto end_recog;
00679       }
00680 
00681       
00682       if (module_mode && module_wants_terminate()) goto end_recog;
00683 
00684       
00685       if (gmm_reject_cmn_string != NULL) {
00686         if (! gmm_valid_input()) {
00687           result_rejected("by GMM");
00688           goto end_recog;
00689         }
00690       }
00691       
00692       
00693       
00694 #if !defined(PASS2_STRICT_IWCD) || defined(FIX_35_PASS2_STRICT_SCORE)    
00695       
00696       bt_discount_pescore(wchmm, &backtrellis, param);
00697 #endif
00698 
00699 #ifdef USE_NGRAM
00700       VERMES("### Recognition: 2nd pass (RL heuristic best-first with 3-gram)\n");
00701 #else
00702       VERMES("### Recognition: 2nd pass (RL heuristic best-first with DFA)\n");
00703 #endif
00704 
00705       
00706 #ifdef USE_NGRAM
00707       wchmm_fbs(param, &backtrellis, backmax, stack_size, nbest, hypo_overflow, 0, 0);
00708 #else  
00709       if (multigramout_flag) {
00710         
00711         
00712         MULTIGRAM *m;
00713         for(m = gramlist; m; m = m->next) {
00714           if (m->active) {
00715             j_printf("## search for gram #%d\n", m->id);
00716             wchmm_fbs(param, &backtrellis, backmax, stack_size, nbest, hypo_overflow, m->cate_begin, m->dfa->term_num);
00717           }
00718         }
00719       } else {
00720         
00721         wchmm_fbs(param, &backtrellis, backmax, stack_size, nbest, hypo_overflow, 0, dfa->term_num);
00722       }
00723 #endif
00724 
00725     end_recog:
00726       
00727       
00728       
00729 
00730       
00731       if (speech_input != SP_MFCFILE && realtime_flag && param != NULL) {
00732         RealTimeCMNUpdate(param);
00733       }
00734 
00735 #ifdef VISUALIZE
00736       
00737       visual_show(&backtrellis);
00738 #endif
00739 
00740       
00741       if (param != NULL) free_param(param);
00742 
00743       
00744       if (record_dirname != NULL) {
00745         record_sample_close();
00746       }
00747 
00748       VERMES("\n");
00749 
00750 #ifdef SP_BREAK_CURRENT_FRAME
00751       
00752       
00753       
00754       if (rest_param != NULL) {
00755         
00756         VERMES("<<<restart the rest>>>\n\n");
00757         param = rest_param;
00758       } else {
00759         
00760         if (ret <= 0 && ret != -2) break;
00761       }
00762 #else
00763       
00764       if (ret <= 0 && ret != -2) break;
00765 #endif
00766 
00767       
00768       
00769     } 
00770     
00771     
00772 
00773 
00774 
00775 
00776 
00777 
00778     if (speech_input != SP_MFCFILE) {
00779       
00780       adin_end();
00781     }
00782     
00783   }
00784 
00785 }
00786 
00787 
00788 
00789 
00790 
00817 int
00818 main(int argc, char *argv[])
00819 {
00820   
00821   
00822   
00823   
00824   system_bootup();
00825   
00826   opt_parse(argc,argv,NULL);
00827 
00828 #ifdef CHARACTER_CONVERSION
00829   if (j_printf_set_charconv(from_code, to_code) == FALSE) {
00830     j_error("Error: character set conversion setup failed\n");
00831   }
00832 #endif 
00833 
00834   
00835   check_specs();
00836 
00837   
00838   
00839   
00840   if (module_mode) {
00841     
00842     
00843     main_module_loop();
00844   } else {
00845     
00846     main_recognition_loop();
00847   }
00848 
00849   return 0;
00850 }