00001
00026
00027
00028
00029
00030
00031
00032
00033 #include <julius/julius.h>
00034
00072 static HTK_HMM_INFO *
00073 initialize_HMM(JCONF_AM *amconf, Jconf *jconf)
00074 {
00075 HTK_HMM_INFO *hmminfo;
00076
00077
00078
00079 if (amconf->analysis.para_hmm.loaded == 1) {
00080 jlog("Warning: you seems to read more than one acoustic model for recognition, but\n");
00081 jlog("Warning: previous one already has header-embedded acoustic parameters\n");
00082 jlog("Warning: if you have different parameters, result may be wrong!\n");
00083 }
00084
00085
00086 hmminfo = hmminfo_new();
00087
00088 if (init_hmminfo(hmminfo, amconf->hmmfilename, amconf->mapfilename, &(amconf->analysis.para_hmm)) == FALSE) {
00089 hmminfo_free(hmminfo);
00090 return NULL;
00091 }
00092
00093
00094 if (amconf->force_multipath) {
00095 jlog("STAT: m_fusion: force multipath HMM handling by user request\n");
00096 hmminfo->multipath = TRUE;
00097 } else {
00098 hmminfo->multipath = hmminfo->need_multipath;
00099 }
00100
00101
00102
00103
00104 if (jconf->input.type == INPUT_WAVEFORM) {
00105
00106
00107 if ((hmminfo->opt.param_type & F_BASEMASK) != F_MFCC) {
00108 jlog("ERROR: m_fusion: for direct speech input, only HMM trained by MFCC is supported\n");
00109 hmminfo_free(hmminfo);
00110 return NULL;
00111 }
00112
00113 calc_para_from_header(&(amconf->analysis.para), hmminfo->opt.param_type, hmminfo->opt.vec_size);
00114 }
00115
00116 if (hmminfo->is_tied_mixture && hmminfo->codebooknum <= 0) {
00117 jlog("ERROR: m_fusion: this tied-mixture model has no codebook!?\n");
00118 hmminfo_free(hmminfo);
00119 return NULL;
00120 }
00121
00122 #ifdef PASS1_IWCD
00123
00124 if (hmminfo->is_triphone) {
00125 jlog("STAT: making pseudo bi/mono-phone for IW-triphone\n");
00126 if (make_cdset(hmminfo) == FALSE) {
00127 jlog("ERROR: m_fusion: failed to make context-dependent state set\n");
00128 hmminfo_free(hmminfo);
00129 return NULL;
00130 }
00131
00132
00133 hmm_add_pseudo_phones(hmminfo);
00134 }
00135 #endif
00136
00137
00138 htk_hmm_set_pause_model(hmminfo, amconf->spmodel_name);
00139
00140
00141 hmminfo->cdset_method = amconf->iwcdmethod;
00142 hmminfo->cdmax_num = amconf->iwcdmaxn;
00143
00144 if (amconf->analysis.para_htk.loaded == 1) apply_para(&(amconf->analysis.para), &(amconf->analysis.para_htk));
00145 if (amconf->analysis.para_hmm.loaded == 1) apply_para(&(amconf->analysis.para), &(amconf->analysis.para_hmm));
00146 apply_para(&(amconf->analysis.para), &(amconf->analysis.para_default));
00147
00148 return(hmminfo);
00149
00150 }
00151
00165 static HTK_HMM_INFO *
00166 initialize_GSHMM(JCONF_AM *amconf)
00167 {
00168 HTK_HMM_INFO *hmm_gs;
00169 Value para_dummy;
00170
00171 jlog("STAT: Reading GS HMMs:\n");
00172 hmm_gs = hmminfo_new();
00173 undef_para(¶_dummy);
00174 if (init_hmminfo(hmm_gs, amconf->hmm_gs_filename, NULL, ¶_dummy) == FALSE) {
00175 hmminfo_free(hmm_gs);
00176 return NULL;
00177 }
00178 return(hmm_gs);
00179 }
00180
00197 static HTK_HMM_INFO *
00198 initialize_GMM(Jconf *jconf)
00199 {
00200 HTK_HMM_INFO *gmm;
00201
00202 jlog("STAT: reading GMM: %s\n", jconf->reject.gmm_filename);
00203
00204 if (jconf->gmm == NULL) {
00205
00206
00207 jlog("STAT: -AM_GMM not used, use parameter of the first AM\n");
00208 jconf->gmm = j_jconf_am_new();
00209 memcpy(jconf->gmm, jconf->am_root, sizeof(JCONF_AM));
00210 jconf->gmm->hmmfilename = NULL;
00211 jconf->gmm->mapfilename = NULL;
00212 jconf->gmm->spmodel_name = NULL;
00213 jconf->gmm->hmm_gs_filename = NULL;
00214 if (jconf->am_root->analysis.cmnload_filename) {
00215 jconf->gmm->analysis.cmnload_filename = strcpy((char *)mymalloc(strlen(jconf->am_root->analysis.cmnload_filename)+ 1), jconf->am_root->analysis.cmnload_filename);
00216 }
00217 if (jconf->am_root->analysis.cmnsave_filename) {
00218 jconf->gmm->analysis.cmnsave_filename = strcpy((char *)mymalloc(strlen(jconf->am_root->analysis.cmnsave_filename)+ 1), jconf->am_root->analysis.cmnsave_filename);
00219 }
00220 if (jconf->am_root->frontend.ssload_filename) {
00221 jconf->gmm->frontend.ssload_filename = strcpy((char *)mymalloc(strlen(jconf->am_root->frontend.ssload_filename)+ 1), jconf->am_root->frontend.ssload_filename);
00222 }
00223 }
00224
00225 gmm = hmminfo_new();
00226 if (init_hmminfo(gmm, jconf->reject.gmm_filename, NULL, &(jconf->gmm->analysis.para_hmm)) == FALSE) {
00227 hmminfo_free(gmm);
00228 return NULL;
00229 }
00230
00231 if (jconf->input.type == INPUT_WAVEFORM) {
00232
00233
00234 if ((gmm->opt.param_type & F_BASEMASK) != F_MFCC) {
00235 jlog("ERROR: m_fusion: for direct speech input, only GMM trained by MFCC is supported\n");
00236 hmminfo_free(gmm);
00237 return NULL;
00238 }
00239 }
00240
00241
00242 calc_para_from_header(&(jconf->gmm->analysis.para), gmm->opt.param_type, gmm->opt.vec_size);
00243
00244 if (jconf->gmm->analysis.para_htk.loaded == 1) apply_para(&(jconf->gmm->analysis.para), &(jconf->gmm->analysis.para_htk));
00245 if (jconf->gmm->analysis.para_hmm.loaded == 1) apply_para(&(jconf->gmm->analysis.para), &(jconf->gmm->analysis.para_hmm));
00246 apply_para(&(jconf->gmm->analysis.para), &(jconf->gmm->analysis.para_default));
00247
00248 return(gmm);
00249 }
00250
00284 static WORD_INFO *
00285 initialize_dict(JCONF_LM *lmconf, HTK_HMM_INFO *hmminfo)
00286 {
00287 WORD_INFO *winfo;
00288
00289
00290 winfo = word_info_new();
00291
00292 if ( !
00293 #ifdef MONOTREE
00294
00295 init_voca(winfo, lmconf->dictfilename, hmminfo, TRUE, lmconf->forcedict_flag)
00296 #else
00297 init_voca(winfo, lmconf->dictfilename, hmminfo, FALSE, lmconf->forcedict_flag)
00298 #endif
00299 ) {
00300 jlog("ERROR: m_fusion: failed to read dictionary, terminated\n");
00301 word_info_free(winfo);
00302 return NULL;
00303 }
00304
00305 if (lmconf->lmtype == LM_PROB) {
00306
00307 if (lmconf->enable_iwspword) {
00308 if (
00309 #ifdef MONOTREE
00310 voca_append_htkdict(lmconf->iwspentry, winfo, hmminfo, TRUE)
00311 #else
00312 voca_append_htkdict(lmconf->iwspentry, winfo, hmminfo, FALSE)
00313 #endif
00314 == FALSE) {
00315 jlog("ERROR: m_fusion: failed to make IW-sp word entry \"%s\"\n", lmconf->iwspentry);
00316 word_info_free(winfo);
00317 return NULL;
00318 } else {
00319 jlog("STAT: 1 IW-sp word entry added\n");
00320 }
00321 }
00322
00323 winfo->head_silwid = voca_lookup_wid(lmconf->head_silname, winfo);
00324 if (winfo->head_silwid == WORD_INVALID) {
00325 jlog("ERROR: m_fusion: head sil word \"%s\" not exist in voca\n", lmconf->head_silname);
00326 word_info_free(winfo);
00327 return NULL;
00328 }
00329 winfo->tail_silwid = voca_lookup_wid(lmconf->tail_silname, winfo);
00330 if (winfo->tail_silwid == WORD_INVALID) {
00331 jlog("ERROR: m_fusion: tail sil word \"%s\" not exist in voca\n", lmconf->tail_silname);
00332 word_info_free(winfo);
00333 return NULL;
00334 }
00335 }
00336
00337 return(winfo);
00338
00339 }
00340
00341
00374 static NGRAM_INFO *
00375 initialize_ngram(JCONF_LM *lmconf, WORD_INFO *winfo)
00376 {
00377 NGRAM_INFO *ngram;
00378 boolean ret;
00379
00380
00381 ngram = ngram_info_new();
00382
00383 if (lmconf->ngram_filename != NULL) {
00384 ret = init_ngram_bin(ngram, lmconf->ngram_filename);
00385 } else {
00386
00387
00388
00389 if (lmconf->ngram_filename_rl_arpa) {
00390 ret = init_ngram_arpa(ngram, lmconf->ngram_filename_rl_arpa, DIR_RL);
00391 if (ret == FALSE) {
00392 ngram_info_free(ngram);
00393 return NULL;
00394 }
00395 if (lmconf->ngram_filename_lr_arpa) {
00396 ret = init_ngram_arpa_additional(ngram, lmconf->ngram_filename_lr_arpa);
00397 if (ret == FALSE) {
00398 ngram_info_free(ngram);
00399 return NULL;
00400 }
00401 }
00402 } else if (lmconf->ngram_filename_lr_arpa) {
00403 ret = init_ngram_arpa(ngram, lmconf->ngram_filename_lr_arpa, DIR_LR);
00404 }
00405 }
00406 if (ret == FALSE) {
00407 ngram_info_free(ngram);
00408 return NULL;
00409 }
00410
00411
00412 set_unknown_id(ngram, lmconf->unknown_name);
00413
00414
00415 if (make_voca_ref(ngram, winfo) == FALSE) {
00416 ngram_info_free(ngram);
00417 return NULL;
00418 }
00419
00420
00421 fix_uniprob_srilm(ngram, winfo);
00422
00423 return(ngram);
00424 }
00425
00458 boolean
00459 j_load_am(Recog *recog, JCONF_AM *amconf)
00460 {
00461 PROCESS_AM *am;
00462
00463 jlog("STAT: *** loading AM%02d %s\n", amconf->id, amconf->name);
00464
00465
00466 am = j_process_am_new(recog, amconf);
00467
00468
00469 if ((am->hmminfo = initialize_HMM(amconf, recog->jconf)) == NULL) {
00470 jlog("ERROR: m_fusion: failed to initialize AM\n");
00471 return FALSE;
00472 }
00473 if (amconf->hmm_gs_filename != NULL) {
00474 if ((am->hmm_gs = initialize_GSHMM(amconf)) == NULL) {
00475 jlog("ERROR: m_fusion: failed to initialize GS HMM\n");
00476 return FALSE;
00477 }
00478 }
00479
00480
00481
00482
00483 if (am->config->gprune_method == GPRUNE_SEL_UNDEF) {
00484 if (am->hmminfo->is_tied_mixture) {
00485
00486 #if defined(GPRUNE_DEFAULT_SAFE)
00487 am->config->gprune_method = GPRUNE_SEL_SAFE;
00488 #elif defined(GPRUNE_DEFAULT_HEURISTIC)
00489 am->config->gprune_method = GPRUNE_SEL_HEURISTIC;
00490 #elif defined(GPRUNE_DEFAULT_BEAM)
00491 am->config->gprune_method = GPRUNE_SEL_BEAM;
00492 #endif
00493 } else {
00494
00495 am->config->gprune_method = GPRUNE_SEL_NONE;
00496 }
00497 }
00498
00499
00500
00501 amconf->analysis.para.loaded = 0;
00502
00503 jlog("STAT: *** AM%02d %s loaded\n", amconf->id, amconf->name);
00504
00505 return TRUE;
00506 }
00507
00547 boolean
00548 j_load_lm(Recog *recog, JCONF_LM *lmconf)
00549 {
00550 JCONF_SEARCH *sh;
00551 PROCESS_LM *lm;
00552 PROCESS_AM *am, *atmp;
00553
00554 jlog("STAT: *** loading LM%02d %s\n", lmconf->id, lmconf->name);
00555
00556
00557 am = NULL;
00558 for(sh=recog->jconf->search_root;sh;sh=sh->next) {
00559 if (sh->lmconf == lmconf) {
00560 for(atmp=recog->amlist;atmp;atmp=atmp->next) {
00561 if (sh->amconf == atmp->config) {
00562 am = atmp;
00563 }
00564 }
00565 }
00566 }
00567 if (am == NULL) {
00568 jlog("ERROR: cannot find corresponding AM for LM%02d %s\n", lmconf->id, lmconf->name);
00569 jlog("ERROR: you should write all AM/LM combinations to be used for recognition with \"-SR\"\n");
00570 return FALSE;
00571 }
00572
00573
00574 lm = j_process_lm_new(recog, lmconf);
00575
00576
00577 lm->am = am;
00578
00579
00580 if (lm->lmtype == LM_PROB) {
00581
00582 if ((lm->winfo = initialize_dict(lm->config, lm->am->hmminfo)) == NULL) {
00583 jlog("ERROR: m_fusion: failed to initialize dictionary\n");
00584 return FALSE;
00585 }
00586 if (lm->config->ngram_filename_lr_arpa || lm->config->ngram_filename_rl_arpa || lm->config->ngram_filename) {
00587 if ((lm->ngram = initialize_ngram(lm->config, lm->winfo)) == NULL) {
00588 jlog("ERROR: m_fusion: failed to initialize N-gram\n");
00589 return FALSE;
00590 }
00591 }
00592 }
00593 if (lm->lmtype == LM_DFA) {
00594
00595 if (lm->config->dfa_filename != NULL && lm->config->dictfilename != NULL) {
00596
00597 multigram_add_gramlist(lm->config->dfa_filename, lm->config->dictfilename, lm->config, LM_DFA_GRAMMAR);
00598 }
00599
00600 if (multigram_load_all_gramlist(lm) == FALSE) {
00601 jlog("ERROR: m_fusion: some error occured in reading grammars\n");
00602 return FALSE;
00603 }
00604
00605 multigram_update(lm);
00606
00607
00608 lm->global_modified = FALSE;
00609 }
00610
00611 jlog("STAT: *** LM%02d %s loaded\n", lmconf->id, lmconf->name);
00612
00613 return TRUE;
00614 }
00615
00616
00645 boolean
00646 j_load_all(Recog *recog, Jconf *jconf)
00647 {
00648 JCONF_AM *amconf;
00649 JCONF_LM *lmconf;
00650
00651
00652 recog->jconf = jconf;
00653
00654
00655 for(amconf=jconf->am_root;amconf;amconf=amconf->next) {
00656 if (j_load_am(recog, amconf) == FALSE) return FALSE;
00657 }
00658
00659
00660 for(lmconf=jconf->lm_root;lmconf;lmconf=lmconf->next) {
00661 if (j_load_lm(recog, lmconf) == FALSE) return FALSE;
00662 }
00663
00664
00665 if (jconf->reject.gmm_filename != NULL) {
00666 jlog("STAT: loading GMM\n");
00667 if ((recog->gmm = initialize_GMM(jconf)) == NULL) {
00668 jlog("ERROR: m_fusion: failed to initialize GMM\n");
00669 return FALSE;
00670 }
00671 }
00672
00673
00674 {
00675 boolean ok_p;
00676
00677
00678 jconf->input.sfreq = jconf->am_root->analysis.para.smp_freq;
00679 jconf->input.period = jconf->am_root->analysis.para.smp_period;
00680 jconf->input.frameshift = jconf->am_root->analysis.para.frameshift;
00681 jconf->input.framesize = jconf->am_root->analysis.para.framesize;
00682
00683 ok_p = TRUE;
00684 for(amconf = jconf->am_root; amconf; amconf = amconf->next) {
00685 if (jconf->input.sfreq != amconf->analysis.para.smp_freq) ok_p = FALSE;
00686 }
00687 if (!ok_p) {
00688 jlog("ERROR: required sampling rate differs in AMs!\n");
00689 for(amconf = jconf->am_root; amconf; amconf = amconf->next) {
00690 jlog("ERROR: AM%02d %s: %dHz\n", amconf->analysis.para.smp_freq);
00691 }
00692 return FALSE;
00693 }
00694
00695 if (recog->gmm) {
00696 if (jconf->input.sfreq != jconf->gmm->analysis.para.smp_freq) {
00697 jlog("ERROR: required sampling rate differs between AM and GMM!\n");
00698 jlog("ERROR: AM : %dHz\n", jconf->input.sfreq);
00699 jlog("ERROR: GMM: %dHz\n", jconf->gmm->analysis.para.smp_freq);
00700 return FALSE;
00701 }
00702 }
00703 for(amconf = jconf->am_root; amconf; amconf = amconf->next) {
00704 if (jconf->input.frameshift != amconf->analysis.para.frameshift) ok_p = FALSE;
00705 }
00706 if (!ok_p) {
00707 jlog("ERROR: requested frame shift differs in AMs!\n");
00708 for(amconf = jconf->am_root; amconf; amconf = amconf->next) {
00709 jlog("ERROR: AM%02d %s: %d samples\n", amconf->analysis.para.frameshift);
00710 }
00711 return FALSE;
00712 }
00713
00714 if (recog->gmm) {
00715 if (jconf->input.frameshift != jconf->gmm->analysis.para.frameshift) {
00716 jlog("ERROR: required frameshift differs between AM and GMM!\n");
00717 jlog("ERROR: AM : %d samples\n", jconf->input.frameshift);
00718 jlog("ERROR: GMM: %d samples\n", jconf->gmm->analysis.para.frameshift);
00719 return FALSE;
00720 }
00721 }
00722 for(amconf = jconf->am_root; amconf; amconf = amconf->next) {
00723 if (jconf->input.framesize != amconf->analysis.para.framesize) ok_p = FALSE;
00724 }
00725 if (!ok_p) {
00726 jlog("ERROR: requested frame size (window length) differs in AMs!\n");
00727 for(amconf = jconf->am_root; amconf; amconf = amconf->next) {
00728 jlog("ERROR: AM%02d %s: %d samples\n", amconf->analysis.para.framesize);
00729 }
00730 return FALSE;
00731 }
00732
00733 if (recog->gmm) {
00734 if (jconf->input.framesize != jconf->gmm->analysis.para.framesize) {
00735 jlog("ERROR: requested frame size differs between AM and GMM!\n");
00736 jlog("ERROR: AM : %d samples\n", jconf->input.framesize);
00737 jlog("ERROR: GMM: %d samples\n", jconf->gmm->analysis.para.framesize);
00738 return FALSE;
00739 }
00740 }
00741 }
00742
00743 return TRUE;
00744 }
00745
00763 static boolean
00764 mfcc_config_is_same(JCONF_AM *amconf, MFCCCalc *mfcc)
00765 {
00766 char *s1, *s2;
00767
00768
00769
00770 if (&(amconf->analysis.para) == mfcc->para || memcmp(&(amconf->analysis.para), mfcc->para, sizeof(Value)) == 0) {
00771 s1 = amconf->analysis.cmnload_filename;
00772 s2 = mfcc->cmn.load_filename;
00773 if (s1 == s2 || (s1 && s2 && strmatch(s1, s2))) {
00774 s1 = amconf->analysis.cmnsave_filename;
00775 s2 = mfcc->cmn.save_filename;
00776 if (s1 == s2 || (s1 && s2 && strmatch(s1, s2))) {
00777 if (amconf->analysis.cmn_update == mfcc->cmn.update
00778 && amconf->analysis.cmn_map_weight == mfcc->cmn.map_weight) {
00779 if (amconf->frontend.ss_alpha == mfcc->frontend.ss_alpha
00780 && amconf->frontend.ss_floor == mfcc->frontend.ss_floor
00781 && amconf->frontend.sscalc == mfcc->frontend.sscalc
00782 && amconf->frontend.sscalc_len == mfcc->frontend.sscalc_len) {
00783 s1 = amconf->frontend.ssload_filename;
00784 s2 = mfcc->frontend.ssload_filename;
00785 if (s1 == s2 || (s1 && s2 && strmatch(s1, s2))) {
00786 return TRUE;
00787 }
00788 }
00789 }
00790 }
00791 }
00792 }
00793
00794 return FALSE;
00795 }
00796
00797
00798
00799
00800
00825 void
00826 create_mfcc_calc_instances(Recog *recog)
00827 {
00828 PROCESS_AM *am;
00829 MFCCCalc *mfcc;
00830 int count;
00831
00832 jlog("STAT: *** create MFCC calculation modules from AM\n");
00833 count = 0;
00834 for(am=recog->amlist;am;am=am->next) {
00835 for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
00836 if (mfcc_config_is_same(am->config, mfcc)) {
00837
00838 jlog("STAT: AM%02d %s: share MFCC%02d\n", am->config->id, am->config->name, mfcc->id);
00839 am->mfcc = mfcc;
00840 break;
00841 }
00842 }
00843 if (!mfcc) {
00844
00845 count++;
00846
00847 mfcc = j_mfcccalc_new(am->config);
00848 mfcc->id = count;
00849
00850 am->mfcc = mfcc;
00851
00852 mfcc->next = recog->mfcclist;
00853 recog->mfcclist = mfcc;
00854 jlog("STAT: AM%2d %s: create a new module MFCC%02d\n", am->config->id, am->config->name, mfcc->id);
00855 }
00856 }
00857
00858
00859 if (recog->gmm) {
00860
00861 for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
00862 if (mfcc_config_is_same(recog->jconf->gmm, mfcc)) {
00863
00864 jlog("STAT: GMM: share MFCC%02d\n", mfcc->id);
00865 recog->gmmmfcc = mfcc;
00866 break;
00867 }
00868 }
00869 if (!mfcc) {
00870
00871 count++;
00872
00873 mfcc = j_mfcccalc_new(recog->jconf->gmm);
00874 mfcc->id = count;
00875
00876 recog->gmmmfcc = mfcc;
00877
00878 mfcc->next = recog->mfcclist;
00879 recog->mfcclist = mfcc;
00880 jlog("STAT: GMM: create a new module MFCC%02d\n", mfcc->id);
00881 }
00882 }
00883
00884 jlog("STAT: %d MFCC modules created\n", count);
00885 }
00886
00919 boolean
00920 j_launch_recognition_instance(Recog *recog, JCONF_SEARCH *sconf)
00921 {
00922 RecogProcess *p;
00923 PROCESS_AM *am;
00924 PROCESS_LM *lm;
00925
00926 jlog("STAT: composing recognizer instance SR%02d %s (AM%02d %s, LM%02d %s)\n", sconf->id, sconf->name, sconf->amconf->id, sconf->amconf->name, sconf->lmconf->id, sconf->lmconf->name);
00927
00928
00929 p = j_recogprocess_new(recog, sconf);
00930
00931
00932 for(lm=recog->lmlist;lm;lm=lm->next) {
00933 if (sconf->lmconf == lm->config) {
00934 for(am=recog->amlist;am;am=am->next) {
00935 if (sconf->amconf == am->config) {
00936 p->am = am;
00937 p->lm = lm;
00938 }
00939 }
00940 }
00941 }
00942
00943 if (p->config->sw.triphone_check_flag && p->am->hmminfo->is_triphone) {
00944
00945 hmm_check(p);
00946 }
00947
00948
00949
00950
00951
00952
00953
00954 p->lmtype = p->lm->lmtype;
00955 p->lmvar = p->lm->lmvar;
00956 p->graphout = p->config->graph.enabled;
00957
00958
00959 if (p->config->force_ccd_handling) {
00960 p->ccd_flag = p->config->ccd_handling;
00961 } else {
00962 if (p->am->hmminfo->is_triphone) {
00963 p->ccd_flag = TRUE;
00964 } else {
00965 p->ccd_flag = FALSE;
00966 }
00967 }
00968
00969
00970 if (p->lm->config->enable_iwsp) {
00971 if (p->am->hmminfo->multipath) {
00972
00973 if (p->am->hmminfo->sp == NULL) {
00974 jlog("ERROR: iwsp enabled but no short pause model \"%s\" in hmmdefs\n", p->am->config->spmodel_name);
00975 return FALSE;
00976 }
00977 p->am->hmminfo->iwsp_penalty = p->am->config->iwsp_penalty;
00978 } else {
00979 jlog("Warning: \"-iwsp\" is supported on multi-path mode, ignored\n");
00980 }
00981 }
00982
00983
00984 if (p->config->successive.enabled) {
00985 if (p->config->successive.pausemodelname) {
00986
00987 char *s;
00988 int n;
00989 p->pass1.pausemodelnames = (char*)mymalloc(strlen(p->config->successive.pausemodelname)+1);
00990 strcpy(p->pass1.pausemodelnames, p->config->successive.pausemodelname);
00991 n = 0;
00992 for (s = strtok(p->pass1.pausemodelnames, " ,"); s; s = strtok(NULL, " ,")) {
00993 n++;
00994 }
00995 p->pass1.pausemodelnum = n;
00996 p->pass1.pausemodel = (char **)mymalloc(sizeof(char *) * n);
00997 strcpy(p->pass1.pausemodelnames, p->config->successive.pausemodelname);
00998 n = 0;
00999 for (s = strtok(p->pass1.pausemodelnames, " ,"); s; s = strtok(NULL, " ,")) {
01000 p->pass1.pausemodel[n++] = s;
01001 }
01002 } else {
01003 p->pass1.pausemodel = NULL;
01004 }
01005
01006 {
01007 WORD_ID w;
01008 boolean ok_p;
01009 ok_p = FALSE;
01010 for(w=0;w<p->lm->winfo->num;w++) {
01011 if (is_sil(w, p)) {
01012 ok_p = TRUE;
01013 break;
01014 }
01015 }
01016 if (!ok_p) {
01017 #ifdef SPSEGMENT_NAIST
01018 jlog("Error: no pause word in dictionary needed for decoder-based VAD\n");
01019 #else
01020 jlog("Error: no pause word in dictionary needed for short-pause segmentation\n");
01021 #endif
01022 jlog("Error: you should have at least one pause word in dictionary\n");
01023 jlog("Error: you can specify pause model names by \"-pausemodels\"\n");
01024 return FALSE;
01025 }
01026 }
01027 }
01028
01029
01030
01031
01032 if (p->lmtype == LM_PROB) {
01033
01034 if (!p->config->lmp.lmp_specified) {
01035 if (p->am->hmminfo->is_triphone) {
01036 p->config->lmp.lm_weight = DEFAULT_LM_WEIGHT_TRI_PASS1;
01037 p->config->lmp.lm_penalty = DEFAULT_LM_PENALTY_TRI_PASS1;
01038 } else {
01039 p->config->lmp.lm_weight = DEFAULT_LM_WEIGHT_MONO_PASS1;
01040 p->config->lmp.lm_penalty = DEFAULT_LM_PENALTY_MONO_PASS1;
01041 }
01042 }
01043 if (!p->config->lmp.lmp2_specified) {
01044 if (p->am->hmminfo->is_triphone) {
01045 p->config->lmp.lm_weight2 = DEFAULT_LM_WEIGHT_TRI_PASS2;
01046 p->config->lmp.lm_penalty2 = DEFAULT_LM_PENALTY_TRI_PASS2;
01047 } else {
01048 p->config->lmp.lm_weight2 = DEFAULT_LM_WEIGHT_MONO_PASS2;
01049 p->config->lmp.lm_penalty2 = DEFAULT_LM_PENALTY_MONO_PASS2;
01050 }
01051 }
01052 if (p->config->lmp.lmp_specified != p->config->lmp.lmp2_specified) {
01053 jlog("WARNING: m_fusion: only -lmp or -lmp2 specified, LM weights may be unbalanced\n");
01054 }
01055 }
01056
01057
01058
01059
01060 if (p->lmtype == LM_DFA) {
01061
01062 multigram_build(p);
01063 }
01064
01065 if (p->lmtype == LM_PROB) {
01066
01067 p->wchmm = wchmm_new();
01068 p->wchmm->lmtype = p->lmtype;
01069 p->wchmm->lmvar = p->lmvar;
01070 p->wchmm->ccd_flag = p->ccd_flag;
01071 p->wchmm->category_tree = FALSE;
01072 p->wchmm->hmmwrk = &(p->am->hmmwrk);
01073
01074 p->wchmm->ngram = p->lm->ngram;
01075 if (p->lmvar == LM_NGRAM_USER) {
01076
01077 p->wchmm->uni_prob_user = p->lm->lmfunc.uniprob;
01078 p->wchmm->bi_prob_user = p->lm->lmfunc.biprob;
01079 }
01080 p->wchmm->winfo = p->lm->winfo;
01081 p->wchmm->hmminfo = p->am->hmminfo;
01082 if (p->wchmm->category_tree) {
01083 if (p->config->pass1.old_tree_function_flag) {
01084 if (build_wchmm(p->wchmm, p->lm->config) == FALSE) {
01085 jlog("ERROR: m_fusion: error in bulding wchmm\n");
01086 return FALSE;
01087 }
01088 } else {
01089 if (build_wchmm2(p->wchmm, p->lm->config) == FALSE) {
01090 jlog("ERROR: m_fusion: error in bulding wchmm\n");
01091 return FALSE;
01092 }
01093 }
01094 } else {
01095 if (build_wchmm2(p->wchmm, p->lm->config) == FALSE) {
01096 jlog("ERROR: m_fusion: error in bulding wchmm\n");
01097 return FALSE;
01098 }
01099 }
01100
01101
01102 if (p->config->sw.wchmm_check_flag) {
01103 wchmm_check_interactive(p->wchmm);
01104 }
01105
01106
01107
01108 p->trellis_beam_width = set_beam_width(p->wchmm, p->config->pass1.specified_trellis_beam_width);
01109
01110
01111 max_successor_cache_init(p->wchmm);
01112 }
01113
01114
01115 p->backtrellis = (BACKTRELLIS *)mymalloc(sizeof(BACKTRELLIS));
01116 bt_init(p->backtrellis);
01117
01118 jlog("STAT: SR%02d %s composed\n", sconf->id, sconf->name);
01119
01120 if (sconf->sw.start_inactive) {
01121
01122 p->active = -1;
01123 } else {
01124
01125 p->active = 1;
01126 }
01127 if (p->lmtype == LM_DFA) {
01128 if (p->lm->winfo == NULL ||
01129 (p->lmvar == LM_DFA_GRAMMAR && p->lm->dfa == NULL)) {
01130
01131 p->active = -1;
01132 }
01133 }
01134
01135 return TRUE;
01136 }
01137
01138
01192 boolean
01193 j_final_fusion(Recog *recog)
01194 {
01195 MFCCCalc *mfcc;
01196 JCONF_SEARCH *sconf;
01197 PROCESS_AM *am;
01198
01199 jlog("STAT: ------\n");
01200 jlog("STAT: All models are ready, go for final fusion\n");
01201 jlog("STAT: [1] create MFCC extraction instance(s)\n");
01202 if (recog->jconf->input.type == INPUT_WAVEFORM) {
01203
01204
01205
01206
01207 create_mfcc_calc_instances(recog);
01208 }
01209
01210
01211
01212
01213 jlog("STAT: [2] create recognition processing instance(s) with AM and LM\n");
01214 for(sconf=recog->jconf->search_root;sconf;sconf=sconf->next) {
01215 if (j_launch_recognition_instance(recog, sconf) == FALSE) return FALSE;
01216 }
01217
01218
01219
01220
01221 if (recog->gmm != NULL) {
01222 jlog("STAT: [2.5] create GMM instance\n");
01223 if (gmm_init(recog) == FALSE) {
01224 jlog("ERROR: m_fusion: error in initializing GMM\n");
01225 return FALSE;
01226 }
01227 }
01228
01229
01230 jlog("STAT: [3] initialize for acoustic HMM calculation\n");
01231 for(am=recog->amlist;am;am=am->next) {
01232 #ifdef ENABLE_PLUGIN
01233
01234 if (am->config->gprune_method == GPRUNE_SEL_USER) {
01235 am->hmmwrk.compute_gaussset = (void (*)(HMMWork *, HTK_HMM_Dens **, int, int *, int)) plugin_get_func(am->config->gprune_plugin_source, "calcmix");
01236 if (am->hmmwrk.compute_gaussset == NULL) {
01237 jlog("ERROR: calcmix plugin has no function \"calcmix\"\n");
01238 return FALSE;
01239 }
01240 am->hmmwrk.compute_gaussset_init = (boolean (*)(HMMWork *)) plugin_get_func(am->config->gprune_plugin_source, "calcmix_init");
01241 if (am->hmmwrk.compute_gaussset_init == NULL) {
01242 jlog("ERROR: calcmix plugin has no function \"calcmix_init\"\n");
01243 return FALSE;
01244 }
01245 am->hmmwrk.compute_gaussset_free = (void (*)(HMMWork *)) plugin_get_func(am->config->gprune_plugin_source, "calcmix_free");
01246 if (am->hmmwrk.compute_gaussset_free == NULL) {
01247 jlog("ERROR: calcmix plugin has no function \"calcmix_free\"\n");
01248 return FALSE;
01249 }
01250 }
01251 #endif
01252 if (am->config->hmm_gs_filename != NULL) {
01253 if (outprob_init(&(am->hmmwrk), am->hmminfo, am->hmm_gs, am->config->gs_statenum, am->config->gprune_method, am->config->mixnum_thres) == FALSE) {
01254 return FALSE;
01255 }
01256 } else {
01257 if (outprob_init(&(am->hmmwrk), am->hmminfo, NULL, 0, am->config->gprune_method, am->config->mixnum_thres) == FALSE) {
01258 return FALSE;
01259 }
01260 }
01261 }
01262
01263
01264
01265 jlog("STAT: [4] prepare MFCC storage(s)\n");
01266 if (recog->jconf->input.type == INPUT_VECTOR) {
01267
01268
01269 recog->mfcclist = j_mfcccalc_new(NULL);
01270 recog->mfcclist->id = 1;
01271
01272 for(am=recog->amlist;am;am=am->next) {
01273 am->mfcc = recog->mfcclist;
01274 }
01275 if (recog->gmm) recog->gmmmfcc = recog->mfcclist;
01276 }
01277
01278 for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
01279 mfcc->param = new_param();
01280 }
01281
01282
01283 if (recog->jconf->input.type == INPUT_WAVEFORM) {
01284 for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {
01285 if (mfcc->frontend.sscalc) {
01286 mfcc->frontend.mfccwrk_ss = WMP_work_new(mfcc->para);
01287 if (mfcc->frontend.mfccwrk_ss == NULL) {
01288 jlog("ERROR: m_fusion: failed to initialize MFCC computation for SS\n");
01289 return FALSE;
01290 }
01291 if (mfcc->frontend.sscalc_len * recog->jconf->input.sfreq / 1000 < mfcc->para->framesize) {
01292 jlog("ERROR: m_fusion: head sil length for SS (%d msec) is shorter than a frame (%d msec)\n", mfcc->frontend.sscalc_len, mfcc->para->framesize * 1000 / recog->jconf->input.sfreq);
01293 return FALSE;
01294 }
01295 }
01296 }
01297 }
01298
01299 if (recog->jconf->decodeopt.realtime_flag) {
01300 jlog("STAT: [5] prepare for real-time decoding\n");
01301
01302 if (recog->jconf->input.type == INPUT_WAVEFORM) {
01303 if (RealTimeInit(recog) == FALSE) {
01304 jlog("ERROR: m_fusion: failed to initialize recognition process\n");
01305 return FALSE;
01306 }
01307 }
01308 }
01309
01310
01311 jlog("STAT: All init successfully done\n\n");
01312
01313
01314 #ifdef ENABLE_PLUGIN
01315 if (plugin_exec_engine_startup(recog) == FALSE) {
01316 jlog("ERROR: m_fusion: failed to execute callback setup in plugin\n");
01317 return FALSE;
01318 }
01319 #endif
01320
01321 return TRUE;
01322 }
01323
01324