Main Page | Modules | Data Structures | Directories | File List | Data Fields | Globals | Related Pages

m_fusion.c

Go to the documentation of this file.
00001 
00017 /*
00018  * Copyright (c) 1991-2006 Kawahara Lab., Kyoto University
00019  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00020  * Copyright (c) 2005-2006 Julius project team, Nagoya Institute of Technology, Nagoya Institute of Technology
00021  * All rights reserved
00022  */
00023 
00024 #include <julius.h>
00025 
00034 static void
00035 initialize_HMM()
00036 {
00037   int dim;
00038   
00039   /* allocate new hmminfo */
00040   hmminfo = hmminfo_new();
00041   /* load hmmdefs */
00042   init_hmminfo(hmminfo, hmmfilename, mapfilename);
00043   /* check parameter type of this acoustic HMM */
00044   /* only MFCC is supported for audio input */
00045   /* MFCC_{0|E}[_D][_A][_Z][_N] is supported */
00046   /* Decode parameter extraction type according to the training
00047      parameter type in the header of the given acoustic HMM */
00048   if (speech_input != SP_MFCFILE) {
00049     if ((hmminfo->opt.param_type & F_BASEMASK) != F_MFCC) {
00050       j_error("Error: for direct speech input, only HMM trained by MFCC is supported\n");
00051     }
00052     /* decode required parameter extraction types */
00053     delta_required = (hmminfo->opt.param_type & F_DELTA) ? TRUE : FALSE;
00054     acc_required = (hmminfo->opt.param_type & F_ACCL) ? TRUE : FALSE;
00055     energy_required = (hmminfo->opt.param_type & F_ENERGY) ? TRUE : FALSE;
00056     c0_required = (hmminfo->opt.param_type & F_ZEROTH) ? TRUE : FALSE;
00057     abs_energy_suppress = (hmminfo->opt.param_type & F_ENERGY_SUP) ? TRUE : FALSE;
00058     cmn_required = (hmminfo->opt.param_type & F_CEPNORM) ? TRUE : FALSE;
00059     /* guess MFCC dimension from the vector size and parameter type in the
00060        acoustic HMM */
00061     dim = hmminfo->opt.vec_size;
00062     if (abs_energy_suppress) dim++;
00063     dim /= 1 + (delta_required ? 1 : 0) + (acc_required ? 1 : 0);
00064     if (energy_required) dim--;
00065     if (c0_required) dim--;
00066     model_mfcc_dim = dim;
00067   }
00068   /* check if tied_mixture */
00069   if (hmminfo->is_tied_mixture && hmminfo->codebooknum <= 0) {
00070     j_error("%s: this tied-mixture model has no codebook!?\n", EXECNAME);
00071   }
00072 #ifdef PASS1_IWCD
00073   /* make state clusters of same context for inter-word triphone approx. */
00074   if (hmminfo->is_triphone) {
00075     j_printerr("Making pseudo bi/mono-phone for IW-triphone...");
00076     if (make_cdset(hmminfo) == FALSE) {
00077       j_error("\nError: failed to make context-dependent state set\n");
00078     }
00079     j_printerr("done\n");
00080   }
00081 #endif
00082 
00083   /* find short pause model and set to hmminfo->sp */
00084   htk_hmm_set_pause_model(hmminfo, spmodel_name);
00085 
00086   /* set flag for context dependent handling (if not specified in command arg)*/
00087   if (!ccd_flag_force) {
00088     if (hmminfo->is_triphone) {
00089       ccd_flag = TRUE;
00090     } else {
00091       ccd_flag = FALSE;
00092     }
00093   }
00094   /* set which iwcd1 method to use */
00095   hmminfo->cdset_method = iwcdmethod;
00096   hmminfo->cdmax_num = iwcdmaxn;
00097 
00098 #ifdef MULTIPATH_VERSION
00099   /* find short-pause model */
00100   if (enable_iwsp) {
00101     if (hmminfo->sp == NULL) {
00102       j_error("cannot find short pause model \"%s\" in hmmdefs\n", spmodel_name);
00103     }
00104     hmminfo->iwsp_penalty = iwsp_penalty;
00105   }
00106 #endif
00107   
00108 }
00109 
00119 static void
00120 initialize_GSHMM()
00121 {
00122   j_printerr("Reading GS HMMs:\n");
00123   hmm_gs = hmminfo_new();
00124   init_hmminfo(hmm_gs, hmm_gs_filename, NULL);
00125 }
00126 
00127 /* initialize GMM for utterance verification */
00139 static void
00140 initialize_GMM()
00141 {
00142   j_printerr("Reading GMM:\n");
00143   gmm = hmminfo_new();
00144   init_hmminfo(gmm, gmm_filename, NULL);
00145 
00146   gmm_init(gmm, gmm_gprune_num);
00147 }
00148 
00149 /* initialize word dictionary */
00160 static void
00161 initialize_dict()
00162 {
00163   /* allocate new word dictionary */
00164   winfo = word_info_new();
00165   /* read in dictinary from file */
00166   if ( ! 
00167 #ifdef MONOTREE
00168       /* leave winfo monophone for 1st pass lexicon tree */
00169        init_voca(winfo, dictfilename, hmminfo, TRUE, forcedict_flag)
00170 #else 
00171        init_voca(winfo, dictfilename, hmminfo, FALSE, forcedict_flag)
00172 #endif
00173        ) {
00174     j_error("ERROR: failed to read dictionary, terminated\n");
00175   }
00176 
00177 #ifdef USE_NGRAM
00178   /* if necessary, append a IW-sp word to the dict if "-iwspword" specified */
00179   if (enable_iwspword) {
00180     if (
00181 #ifdef MONOTREE
00182         voca_append_htkdict(iwspentry, winfo, hmminfo, TRUE)
00183 #else 
00184         voca_append_htkdict(iwspentry, winfo, hmminfo, FALSE)
00185 #endif
00186         == FALSE) {
00187       j_error("Error: failed to make IW-sp word entry \"%s\"\n", iwspentry);
00188     } else {
00189       j_printerr("1 IW-sp word entry added\n");
00190     }
00191   }
00192   /* set {head,tail}_silwid */
00193   winfo->head_silwid = voca_lookup_wid(head_silname, winfo);
00194   if (winfo->head_silwid == WORD_INVALID) { /* not exist */
00195     j_error("ERROR: head sil word \"%s\" not exist in voca\n", head_silname);
00196   }
00197   winfo->tail_silwid = voca_lookup_wid(tail_silname, winfo);
00198   if (winfo->tail_silwid == WORD_INVALID) { /* not exist */
00199     j_error("ERROR: tail sil word \"%s\" not exist in voca\n", tail_silname);
00200   }
00201 #endif
00202   
00203 #ifdef PASS1_IWCD
00204   if (triphone_check_flag && hmminfo->is_triphone) {
00205     /* go into interactive triphone HMM check mode */
00206     hmm_check(hmminfo, winfo);
00207   }
00208 #endif
00209 
00210 
00211 }
00212 
00213 
00214 #ifdef USE_NGRAM
00215 
00226 static void
00227 initialize_ngram()
00228 {
00229   /* allocate new */
00230   ngram = ngram_info_new();
00231   /* load LM */
00232   if (ngram_filename != NULL) { /* binary format */
00233     init_ngram_bin(ngram, ngram_filename);
00234   } else {                      /* ARPA format */
00235     init_ngram_arpa(ngram, ngram_filename_lr_arpa, ngram_filename_rl_arpa);
00236   }
00237 
00238   /* map dict item to N-gram entry */
00239   make_voca_ref(ngram, winfo);
00240 }
00241 
00242 #endif /* USE_NGRAM */
00243 
00244 /* set params whose default will change by models and not specified in arg */
00263 static void
00264 configure_param()
00265 {
00266 #ifdef USE_NGRAM
00267   /* set default lm parameter */
00268   if (!lmp_specified) set_lm_weight();
00269   if (!lmp2_specified) set_lm_weight2();
00270   if (lmp_specified != lmp2_specified) {
00271     j_printerr("Warning: only -lmp or -lmp2 specified, LM weights may be unbalanced\n");
00272   }
00273 #endif
00274   /* select Gaussian pruning function */
00275   if (gprune_method == GPRUNE_SEL_UNDEF) {/* set default if not specified */
00276     if (hmminfo->is_tied_mixture) {
00277       /* enabled by default for tied-mixture models */
00278 #ifdef GPRUNE_DEFAULT_SAFE
00279       gprune_method = GPRUNE_SEL_SAFE;
00280 #elif GPRUNE_DEFAULT_HEURISTIC
00281       gprune_method = GPRUNE_SEL_HEURISTIC;
00282 #elif GPRUNE_DEFAULT_BEAM
00283       gprune_method = GPRUNE_SEL_BEAM;
00284 #endif
00285     } else {
00286       /* disabled by default for non tied-mixture model */
00287       gprune_method = GPRUNE_SEL_NONE;
00288     }
00289   }
00290 }
00291 
00302 void
00303 select_result_output()
00304 {
00305   switch(result_output) {
00306   case SP_RESULT_TTY: setup_result_tty(); break; /* in result_tty.c */
00307   case SP_RESULT_MSOCK: setup_result_msock(); break; /* in result_msock.c */
00308   default:
00309     j_printerr("Internal Error: no such result output device: id = %d\n", result_output);
00310     break;
00311   }
00312 
00313 }
00314 
00315 
00316 
00317 /**********************************************************************/
00337 void
00338 final_fusion()
00339 {
00340   VERMES("###### build up system\n");
00341 
00342   /* stage 1: load models */
00343   initialize_HMM();
00344   if (hmm_gs_filename != NULL) initialize_GSHMM();
00345   if (gmm_filename != NULL) initialize_GMM();
00346 #ifdef USE_NGRAM
00347   initialize_dict();
00348   initialize_ngram();
00349 #endif
00350 
00351   /* stage 2: fixate params */
00352   /* set params whose default will change by models and not specified in arg */
00353   configure_param();
00354 
00355   /* stage 3: build lexicon tree */
00356 #ifdef USE_DFA
00357 
00358   /* read and setup all the initial grammars */
00359   if (dfa_filename != NULL && dictfilename != NULL) {
00360     multigram_add_gramlist(dfa_filename, dictfilename);
00361   }
00362   multigram_read_all_gramlist();
00363   
00364   /* execute generation of global grammar and (re)building of wchmm */
00365   multigram_exec();
00366 
00367 #else  /* ~USE_DFA */
00368 
00369   wchmm = wchmm_new();
00370   wchmm->ngram = ngram;
00371   wchmm->winfo = winfo;
00372   wchmm->hmminfo = hmminfo;
00373 #ifdef CATEGORY_TREE
00374   if (old_tree_function_flag) {
00375     build_wchmm(wchmm);
00376   } else {
00377     build_wchmm2(wchmm);
00378   }
00379 #else
00380   build_wchmm2(wchmm);
00381 #endif /* CATEGORY_TREE */
00382   /* set actual beam width */
00383   /* guess beam width from models, when not specified */
00384   trellis_beam_width = set_beam_width(wchmm, specified_trellis_beam_width);
00385 
00386 #endif                  /* USE_DFA */
00387 
00388 #ifdef MONOTREE
00389   /* after building tree lexocon, */
00390   /* convert monophone to triphone in winfo for 2nd pass */
00391   if (hmminfo->is_triphone) {
00392     j_printerr("convert monophone dictionary to word-internal triphone...");
00393     if (voca_mono2tri(winfo, hmminfo) == FALSE) {
00394       j_error("failed\n");
00395     }
00396     j_printerr("done\n");
00397   }
00398 #endif
00399   
00400   /* stage 4: setup output function */
00401   if (hmm_gs_filename != NULL) {/* with GMS */
00402     outprob_init(hmminfo, hmm_gs, gs_statenum, gprune_method, mixnum_thres);
00403   } else {
00404     outprob_init(hmminfo, NULL, 0, gprune_method, mixnum_thres);
00405   }
00406 
00407   /* stage 5: initialize work area and misc. */
00408   bt_init(&backtrellis);        /* backtrellis initialization */
00409 #ifdef USE_NGRAM
00410   max_successor_cache_init(wchmm);      /* initialize cache for factoring */
00411 #endif
00412   if (realtime_flag) {
00413     RealTimeInit();             /* prepare for 1st pass pipeline processing */
00414   }
00415   /* setup result output function */
00416   select_result_output();
00417 
00418   /* finished! */
00419   VERMES("All init successfully done\n\n");
00420 }

Generated on Tue Mar 28 16:01:38 2006 for Julius by  doxygen 1.4.2