julius/m_fusion.c

Go to the documentation of this file.
00001 
00017 /*
00018  * Copyright (c) 1991-2006 Kawahara Lab., Kyoto University
00019  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00020  * Copyright (c) 2005-2006 Julius project team, Nagoya Institute of Technology
00021  * All rights reserved
00022  */
00023 
00024 #include <julius.h>
00025 
00034 static void
00035 initialize_HMM()
00036 {
00037   /* at here, global variable "para" holds values specified by user or
00038      by user-specified HTK config file */
00039   
00040   /* allocate new hmminfo */
00041   hmminfo = hmminfo_new();
00042   /* load hmmdefs */
00043   init_hmminfo(hmminfo, hmmfilename, mapfilename, &para_hmm);
00044   /* only MFCC is supported for audio input */
00045   /* MFCC_{0|E}[_D][_A][_Z][_N] is supported */
00046   /* check parameter type of this acoustic HMM */
00047   if (speech_input != SP_MFCFILE) {
00048     /* Decode parameter extraction type according to the training
00049        parameter type in the header of the given acoustic HMM */
00050     if ((hmminfo->opt.param_type & F_BASEMASK) != F_MFCC) {
00051       j_error("Error: for direct speech input, only HMM trained by MFCC is supported\n");
00052     }
00053     /* set acoustic analysis parameters from HMM header */
00054     calc_para_from_header(&para, hmminfo->opt.param_type, hmminfo->opt.vec_size);
00055   }
00056   /* check if tied_mixture */
00057   if (hmminfo->is_tied_mixture && hmminfo->codebooknum <= 0) {
00058     j_error("%s: this tied-mixture model has no codebook!?\n", EXECNAME);
00059   }
00060 #ifdef PASS1_IWCD
00061   /* make state clusters of same context for inter-word triphone approx. */
00062   if (hmminfo->is_triphone) {
00063     j_printerr("Making pseudo bi/mono-phone for IW-triphone...");
00064     if (make_cdset(hmminfo) == FALSE) {
00065       j_error("\nError: failed to make context-dependent state set\n");
00066     }
00067     /* add those `pseudo' biphone and monophone to the logical HMM names */
00068     /* they points not to the defined HMM, but to the CD_Set structure */
00069     hmm_add_pseudo_phones(hmminfo);
00070     j_printerr("done\n");
00071   }
00072 #endif
00073 
00074   /* find short pause model and set to hmminfo->sp */
00075   htk_hmm_set_pause_model(hmminfo, spmodel_name);
00076 
00077   /* set flag for context dependent handling (if not specified in command arg)*/
00078   if (!ccd_flag_force) {
00079     if (hmminfo->is_triphone) {
00080       ccd_flag = TRUE;
00081     } else {
00082       ccd_flag = FALSE;
00083     }
00084   }
00085   /* set which iwcd1 method to use */
00086   hmminfo->cdset_method = iwcdmethod;
00087   hmminfo->cdmax_num = iwcdmaxn;
00088 
00089 #ifdef MULTIPATH_VERSION
00090   /* find short-pause model */
00091   if (enable_iwsp) {
00092     if (hmminfo->sp == NULL) {
00093       j_error("cannot find short pause model \"%s\" in hmmdefs\n", spmodel_name);
00094     }
00095     hmminfo->iwsp_penalty = iwsp_penalty;
00096   }
00097 #endif
00098   
00099 }
00100 
00110 static void
00111 initialize_GSHMM()
00112 {
00113   j_printerr("Reading GS HMMs:\n");
00114   hmm_gs = hmminfo_new();
00115   undef_para(&para_dummy);
00116   init_hmminfo(hmm_gs, hmm_gs_filename, NULL, &para_dummy);
00117 }
00118 
00119 /* initialize GMM for utterance verification */
00131 static void
00132 initialize_GMM()
00133 {
00134   j_printerr("Reading GMM:\n");
00135   gmm = hmminfo_new();
00136   undef_para(&para_dummy);
00137   init_hmminfo(gmm, gmm_filename, NULL, &para_dummy);
00138 
00139   gmm_init(gmm, gmm_gprune_num);
00140 }
00141 
00142 /* initialize word dictionary */
00153 static void
00154 initialize_dict()
00155 {
00156   /* allocate new word dictionary */
00157   winfo = word_info_new();
00158   /* read in dictinary from file */
00159   if ( ! 
00160 #ifdef MONOTREE
00161       /* leave winfo monophone for 1st pass lexicon tree */
00162        init_voca(winfo, dictfilename, hmminfo, TRUE, forcedict_flag)
00163 #else 
00164        init_voca(winfo, dictfilename, hmminfo, FALSE, forcedict_flag)
00165 #endif
00166        ) {
00167     j_error("ERROR: failed to read dictionary, terminated\n");
00168   }
00169 
00170 #ifdef USE_NGRAM
00171   /* if necessary, append a IW-sp word to the dict if "-iwspword" specified */
00172   if (enable_iwspword) {
00173     if (
00174 #ifdef MONOTREE
00175         voca_append_htkdict(iwspentry, winfo, hmminfo, TRUE)
00176 #else 
00177         voca_append_htkdict(iwspentry, winfo, hmminfo, FALSE)
00178 #endif
00179         == FALSE) {
00180       j_error("Error: failed to make IW-sp word entry \"%s\"\n", iwspentry);
00181     } else {
00182       j_printerr("1 IW-sp word entry added\n");
00183     }
00184   }
00185   /* set {head,tail}_silwid */
00186   winfo->head_silwid = voca_lookup_wid(head_silname, winfo);
00187   if (winfo->head_silwid == WORD_INVALID) { /* not exist */
00188     j_error("ERROR: head sil word \"%s\" not exist in voca\n", head_silname);
00189   }
00190   winfo->tail_silwid = voca_lookup_wid(tail_silname, winfo);
00191   if (winfo->tail_silwid == WORD_INVALID) { /* not exist */
00192     j_error("ERROR: tail sil word \"%s\" not exist in voca\n", tail_silname);
00193   }
00194 #endif
00195   
00196 #ifdef PASS1_IWCD
00197   if (triphone_check_flag && hmminfo->is_triphone) {
00198     /* go into interactive triphone HMM check mode */
00199     hmm_check(hmminfo, winfo);
00200   }
00201 #endif
00202 
00203 
00204 }
00205 
00206 
00207 #ifdef USE_NGRAM
00208 
00219 static void
00220 initialize_ngram()
00221 {
00222   /* allocate new */
00223   ngram = ngram_info_new();
00224   /* load LM */
00225   if (ngram_filename != NULL) { /* binary format */
00226     init_ngram_bin(ngram, ngram_filename);
00227   } else {                      /* ARPA format */
00228     init_ngram_arpa(ngram, ngram_filename_lr_arpa, ngram_filename_rl_arpa);
00229   }
00230 
00231   /* map dict item to N-gram entry */
00232   make_voca_ref(ngram, winfo);
00233 }
00234 
00235 #endif /* USE_NGRAM */
00236 
00237 /* set params whose default will change by models and not specified in arg */
00256 static void
00257 configure_param()
00258 {
00259 #ifdef USE_NGRAM
00260   /* set default lm parameter */
00261   if (!lmp_specified) set_lm_weight();
00262   if (!lmp2_specified) set_lm_weight2();
00263   if (lmp_specified != lmp2_specified) {
00264     j_printerr("Warning: only -lmp or -lmp2 specified, LM weights may be unbalanced\n");
00265   }
00266 #endif
00267   /* select Gaussian pruning function */
00268   if (gprune_method == GPRUNE_SEL_UNDEF) {/* set default if not specified */
00269     if (hmminfo->is_tied_mixture) {
00270       /* enabled by default for tied-mixture models */
00271 #ifdef GPRUNE_DEFAULT_SAFE
00272       gprune_method = GPRUNE_SEL_SAFE;
00273 #elif GPRUNE_DEFAULT_HEURISTIC
00274       gprune_method = GPRUNE_SEL_HEURISTIC;
00275 #elif GPRUNE_DEFAULT_BEAM
00276       gprune_method = GPRUNE_SEL_BEAM;
00277 #endif
00278     } else {
00279       /* disabled by default for non tied-mixture model */
00280       gprune_method = GPRUNE_SEL_NONE;
00281     }
00282   }
00283 }
00284 
00295 void
00296 select_result_output()
00297 {
00298   switch(result_output) {
00299   case SP_RESULT_TTY: setup_result_tty(); break; /* in result_tty.c */
00300   case SP_RESULT_MSOCK: setup_result_msock(); break; /* in result_msock.c */
00301   default:
00302     j_printerr("Internal Error: no such result output device: id = %d\n", result_output);
00303     break;
00304   }
00305 
00306 }
00307 
00308 
00309 
00310 /**********************************************************************/
00330 void
00331 final_fusion()
00332 {
00333   VERMES("###### build up system\n");
00334 
00335   /* stage 1: load models */
00336   initialize_HMM();
00337   if (hmm_gs_filename != NULL) initialize_GSHMM();
00338   if (gmm_filename != NULL) initialize_GMM();
00339 #ifdef USE_NGRAM
00340   initialize_dict();
00341   initialize_ngram();
00342 #endif
00343 
00344   /* stage 2: fixate params */
00345   /* set params whose default will change by models and not specified in arg */
00346   configure_param();
00347   /* 
00348      gather all the MFCC configuration parameters to form final config.
00349        preference: Julian option > HTK config > HMM > Julian default
00350      With HTK config, the default values are overridden to HTK values.
00351   */
00352   if (para_htk.loaded == 1) apply_para(&para, &para_htk);
00353   if (para_hmm.loaded == 1) apply_para(&para, &para_hmm);
00354   apply_para(&para, &para_default);
00355 
00356   /* stage 3: build lexicon tree */
00357 #ifdef USE_DFA
00358 
00359   /* read and setup all the initial grammars */
00360   if (dfa_filename != NULL && dictfilename != NULL) {
00361     multigram_add_gramlist(dfa_filename, dictfilename);
00362   }
00363   multigram_read_all_gramlist();
00364   
00365   /* execute generation of global grammar and (re)building of wchmm */
00366   multigram_exec();
00367 
00368 #else  /* ~USE_DFA */
00369 
00370   wchmm = wchmm_new();
00371   wchmm->ngram = ngram;
00372   wchmm->winfo = winfo;
00373   wchmm->hmminfo = hmminfo;
00374 #ifdef CATEGORY_TREE
00375   if (old_tree_function_flag) {
00376     build_wchmm(wchmm);
00377   } else {
00378     build_wchmm2(wchmm);
00379   }
00380 #else
00381   build_wchmm2(wchmm);
00382 #endif /* CATEGORY_TREE */
00383   /* set actual beam width */
00384   /* guess beam width from models, when not specified */
00385   trellis_beam_width = set_beam_width(wchmm, specified_trellis_beam_width);
00386 
00387 #endif                  /* USE_DFA */
00388 
00389 #ifdef MONOTREE
00390   /* after building tree lexocon, */
00391   /* convert monophone to triphone in winfo for 2nd pass */
00392   if (hmminfo->is_triphone) {
00393     j_printerr("convert monophone dictionary to word-internal triphone...");
00394     if (voca_mono2tri(winfo, hmminfo) == FALSE) {
00395       j_error("failed\n");
00396     }
00397     j_printerr("done\n");
00398   }
00399 #endif
00400   
00401   /* stage 4: setup output function */
00402   if (hmm_gs_filename != NULL) {/* with GMS */
00403     outprob_init(hmminfo, hmm_gs, gs_statenum, gprune_method, mixnum_thres);
00404   } else {
00405     outprob_init(hmminfo, NULL, 0, gprune_method, mixnum_thres);
00406   }
00407 
00408   /* stage 5: initialize work area and misc. */
00409   bt_init(&backtrellis);        /* backtrellis initialization */
00410 #ifdef USE_NGRAM
00411   max_successor_cache_init(wchmm);      /* initialize cache for factoring */
00412 #endif
00413   if (realtime_flag) {
00414     RealTimeInit();             /* prepare for 1st pass pipeline processing */
00415   }
00416   /* setup result output function */
00417   select_result_output();
00418 
00419   /* finished! */
00420   VERMES("All init successfully done\n\n");
00421 }

Generated on Tue Dec 26 16:16:32 2006 for Julius by  doxygen 1.5.0