00001
00017
00018
00019
00020
00021
00022
00023
00024 #include <julius.h>
00025
00036 void
00037 print_setting()
00038 {
00039 #ifdef USE_NETAUDIO
00040 char *p;
00041 #endif
00042 #ifdef USE_DFA
00043 GRAMLIST *g;
00044 int n;
00045 #endif
00046
00047 j_printf(" hmmfilename=%s\n",hmmfilename);
00048 if (mapfilename != NULL) {
00049 j_printf(" hmmmapfilename=%s\n",mapfilename);
00050 }
00051 #ifdef USE_NGRAM
00052 j_printf(" vocabulary filename=%s\n",dictfilename);
00053 if (ngram_filename != NULL) {
00054 j_printf(" n-gram filename=%s (binary format)\n",ngram_filename);
00055 } else {
00056 j_printf(" LR 2-gram filename=%s\n",ngram_filename_lr_arpa);
00057 if (ngram_filename_rl_arpa != NULL) {
00058 j_printf(" RL 3-gram filename=%s\n",ngram_filename_rl_arpa);
00059 }
00060 }
00061 #else
00062 n = 1;
00063 for(g = gramlist_root; g; g = g->next) {
00064 j_printf(" grammar #%d:\n", n++);
00065 j_printf(" dfa = %s\n", g->dfafile);
00066 j_printf(" dict = %s\n", g->dictfile);
00067 }
00068 #endif
00069 if (hmm_gs_filename != NULL) {
00070 j_printf(" hmmfile for Gaussian Selection: %s\n", hmm_gs_filename);
00071 }
00072 if (gmm_filename != NULL) {
00073 j_printf(" GMM file for utterance verification: %s\n", gmm_filename);
00074 }
00075 }
00076
00087 void
00088 print_info()
00089 {
00090 j_printf("------------- System Info begin -------------\n");
00091 put_header(stdout);
00092 if (verbose_flag) {
00093 put_compile_defs(stdout);
00094 j_printf("\n");
00095 #ifdef USE_NGRAM
00096 j_printf("Large Vocabulary Continuous Speech Recognition Based on N-gram\n\n");
00097 #else
00098 j_printf("Continuous Speech Recognition Parser based on automaton grammar\n\n");
00099 #endif
00100 }
00101
00102
00103 j_printf("Files:\n");
00104 print_setting();
00105 j_printf("\n");
00106
00107
00108 if (speech_input == SP_RAWFILE) {
00109 j_printf("Speech input source: file\n\n");
00110 } else if (speech_input == SP_MFCFILE) {
00111 j_printf("Speech input source: MFCC parameter file (HTK format)\n\n");
00112 }
00113
00114 if (speech_input != SP_MFCFILE) {
00115 j_printf("Acoustic analysis condition:\n");
00116 j_printf("\t parameter = MFCC");
00117 if (c0_required) j_printf("_0");
00118 if (energy_required) j_printf("_E");
00119 if (delta_required) j_printf("_D");
00120 if (acc_required) j_printf("_A");
00121 if (abs_energy_suppress) j_printf("_N");
00122 if (cmn_required) j_printf("_Z");
00123 j_printf(" (%d dimension from %d cepstrum)\n", hmminfo->opt.vec_size, model_mfcc_dim);
00124 j_printf("\t sample period = %4d ns (%5d Hz)\n", smpPeriod, smpFreq);
00125 j_printf("\t window size = %4d samples (%.1f ms)\n", fsize,
00126 (float)smpPeriod * (float)fsize / 10000.0);
00127 j_printf("\t frame shift = %4d samples (%.1f ms)\n", fshift,
00128 (float)smpPeriod * (float)fshift / 10000.0);
00129 j_printf("\t pre-emphasis = %.2f\n", preemph);
00130 j_printf("\t # filterbank = %d\n", fbank_num);
00131 j_printf("\t cepst. lifter = %d\n", ceplifter);
00132 j_printf("\t raw energy = %s\n", rawe_required ? "True" : "False");
00133 if (enormal_required) {
00134 j_printf("\tenergy normalize = True (scale = %.1f, silence floor = %.1f dB)\n", enormal_escale, enormal_silfloor);
00135 } else {
00136 j_printf("\tenergy normalize = False\n");
00137 }
00138 if (delta_required) {
00139 j_printf("\t delta window = %d frames (%.1f ms) around\n", delwin,
00140 (float)delwin * (float)smpPeriod * (float)fshift / 10000.0);
00141 }
00142 if (acc_required) {
00143 j_printf("\t acc window = %d frames (%.1f ms) around\n", accwin,
00144 (float)accwin * (float)smpPeriod * (float)fshift / 10000.0);
00145 }
00146 j_printf("\t hi freq. = ");
00147 if (hipass < 0) j_printf("OFF\n");
00148 else j_printf("%5d Hz\n", hipass);
00149 j_printf("\t lo freq. = ");
00150 if (lopass < 0) j_printf("OFF\n");
00151 else j_printf("%5d Hz\n", lopass);
00152
00153 j_printf(" spectral subtraction = ");
00154 if (ssload_filename || sscalc) {
00155 if (sscalc) {
00156 j_printf("use head silence of each input\n");
00157 if (speech_input != SP_RAWFILE) {
00158 j_error("Error: ss calculation with head silence only for rawfile input\n");
00159 }
00160 j_printf("\t head sil length = %d msec\n", sscalc_len);
00161 } else {
00162 j_printf("use a constant value from file\n");
00163 j_printf(" noise spectrum file = \"%s\"\n", ssload_filename);
00164 }
00165 j_printf("\t alpha coef. = %f\n", ssalpha);
00166 j_printf("\t spectral floor = %f\n", ssfloor);
00167 } else {
00168 j_printf("off\n");
00169 }
00170 j_printf("\n");
00171 }
00172
00173 print_hmmdef_info(hmminfo); j_printf("\n");
00174 if (hmm_gs_filename != NULL) {
00175 j_printf("GS ");
00176 print_hmmdef_info(hmm_gs); j_printf("\n");
00177 }
00178 if (winfo != NULL) {
00179 print_voca_info(winfo); j_printf("\n");
00180 }
00181 if (wchmm != NULL) {
00182 print_wchmm_info(wchmm); j_printf("\n");
00183 }
00184 #ifdef USE_NGRAM
00185 print_ngram_info(ngram);
00186 #else
00187 if (dfa != NULL) {
00188 print_dfa_info(dfa);
00189 if (debug2_flag) print_dfa_cp(dfa);
00190 }
00191 #endif
00192
00193 #ifdef USE_NGRAM
00194 j_printf(" inter-word N-gram cache: \n");
00195 {
00196 int num, len;
00197 #ifdef UNIGRAM_FACTORING
00198 len = wchmm->isolatenum;
00199 j_printf("\t root node to be cached = %d / %d (isolated only)\n",
00200 len, wchmm->startnum);
00201 #else
00202 len = wchmm->startnum;
00203 j_printf("\t root node to be cached = %d (all)\n", len);
00204 #endif
00205 #ifdef HASH_CACHE_IW
00206 num = (iw_cache_rate * ngram->max_word_num) / 100;
00207 j_printf("\tword end num to be cached = %d / %d\n", num, ngram->max_word_num);
00208 #else
00209 num = ngram->max_word_num;
00210 j_printf("\tword end num to be cached = %d (all)\n", num);
00211 #endif
00212 j_printf("\t maximum allocation size = %dMB\n", num * len / 1000 * sizeof(LOGPROB) / 1000);
00213 }
00214
00215 #endif
00216
00217 j_printf("\nWeights and words: \n");
00218 #ifdef USE_NGRAM
00219 j_printf("\t(-lmp) pass1 LM weight = %2.1f ins. penalty = %+2.1f\n", lm_weight, lm_penalty);
00220 j_printf("\t(-lmp2) pass2 LM weight = %2.1f ins. penalty = %+2.1f\n", lm_weight2, lm_penalty2);
00221 j_printf("\t(-transp)trans. penalty = %+2.1f per word\n", lm_penalty_trans);
00222 j_printf("\t(-silhead)head sil word = ");
00223 put_voca(winfo, winfo->head_silwid);
00224 j_printf("\t(-siltail)tail sil word = ");
00225 put_voca(winfo, winfo->tail_silwid);
00226 #else
00227 j_printf("\t(-penalty1) IW penalty1 = %+2.1f\n", penalty1);
00228 j_printf("\t(-penalty2) IW penalty2 = %+2.1f\n", penalty2);
00229 #endif
00230
00231 #ifdef CONFIDENCE_MEASURE
00232 #ifdef CM_MULTIPLE_ALPHA
00233 j_printf("\t(-cmalpha)CM alpha coef = from %f to %f by step of %f (%d outputs)\n", cm_alpha_bgn, cm_alpha_end, cm_alpha_step, cm_alpha_num);
00234 #else
00235 j_printf("\t(-cmalpha)CM alpha coef = %f\n", cm_alpha);
00236 #endif
00237 #ifdef CM_SEARCH_LIMIT
00238 j_printf("\t(-cmthres) CM cut thres = %f for hypo generation\n", cm_cut_thres);
00239 #endif
00240 #ifdef CM_SEARCH_LIMIT_POP
00241 j_printf("\t(-cmthres2)CM cut thres = %f for popped hypo\n", cm_cut_thres_pop);
00242 #endif
00243 #endif
00244 j_printf("\t(-sp)shortpause HMM name= \"%s\" specified", spmodel_name);
00245 if (hmminfo->sp != NULL) {
00246 j_printf(", \"%s\" applied", hmminfo->sp->name);
00247 if (hmminfo->sp->is_pseudo) {
00248 j_printf(" (pseudo)");
00249 } else {
00250 j_printf(" (physical)");
00251 }
00252 }
00253 j_printf("\n");
00254 #ifdef USE_DFA
00255 if (dfa != NULL) {
00256 int i;
00257 j_printf("\t found sp category IDs =");
00258 for(i=0;i<dfa->term_num;i++) {
00259 if (dfa->is_sp[i]) {
00260 j_printf(" %d", i);
00261 }
00262 }
00263 j_printf("\n");
00264 }
00265 #endif
00266 #ifdef MULTIPATH_VERSION
00267 if (enable_iwsp) {
00268 j_printf("\t inter-word short pause = on (append \"%s\" for each word tail)\n", hmminfo->sp->name);
00269 j_printf("\t sp transition penalty = %+2.1f\n", iwsp_penalty);
00270 }
00271 #endif
00272 #ifdef USE_NGRAM
00273 if (enable_iwspword) {
00274 j_printf("\tIW-sp word added to dict= \"%s\"\n", iwspentry);
00275 }
00276 #endif
00277
00278 if (gmm != NULL) {
00279 j_printf("\nUtterance verification by GMM\n");
00280 j_printf(" GMM defs file = %s\n", gmm_filename);
00281 j_printf(" GMM gprune num = %d\n", gmm_gprune_num);
00282 if (gmm_reject_cmn_string != NULL) {
00283 j_printf(" GMM names to reject = %s\n", gmm_reject_cmn_string);
00284 }
00285 j_printf(" ");
00286 print_hmmdef_info(gmm);
00287 }
00288
00289 if (realtime_flag && cmn_required) {
00290 j_printf("\nMAP-CMN on realtime input: \n");
00291 if (cmnload_filename) {
00292 if (cmn_loaded) {
00293 j_printf("\t initial CMN param = from \"%s\"\n", cmnload_filename);
00294 } else {
00295 j_printf("\t initial CMN param = from \"%s\" (failed, ignored)\n", cmnload_filename);
00296 }
00297 } else {
00298 j_printf("\t initial CMN param = not specified\n");
00299 }
00300 j_printf("\t initial mean weight = %6.2f\n", cmn_map_weight);
00301 if (cmn_update) {
00302 j_printf("\t CMN param update = yes, update from last inputs\n");
00303 } else {
00304 j_printf("\t CMN param update = no, keep initial\n");
00305 }
00306 if (cmnsave_filename) {
00307 if (realtime_flag) {
00308 j_printf("\t save CMN param to = %s\n", cmnsave_filename);
00309 } else {
00310 j_printf("\t save CMN param to = %s (not realtime CMN, ignored)\n", cmnsave_filename);
00311 }
00312 }
00313 }
00314
00315 j_printf("\nSearch parameters: \n");
00316
00317 j_printf("\t 1st pass decoding = ");
00318 if (force_realtime_flag) j_printf("(forced) ");
00319 if (realtime_flag) {
00320 j_printf("on-the-fly");
00321 if (cmn_required) j_printf(" with MAP-CMN");
00322 j_printf("\n");
00323 } else {
00324 j_printf("batch");
00325 if (cmn_required) j_printf(" with sentence CMN");
00326 j_printf("\n");
00327 }
00328 j_printf("\t 1st pass method = ");
00329 #ifdef WPAIR
00330 # ifdef WPAIR_KEEP_NLIMIT
00331 j_printf("word-pair approx., keeping only N tokens ");
00332 # else
00333 j_printf("word-pair approx. ");
00334 # endif
00335 #else
00336 j_printf("1-best approx. ");
00337 #endif
00338 #ifdef WORD_GRAPH
00339 j_printf("generating word_graph\n");
00340 #else
00341 j_printf("generating indexed trellis\n");
00342 #endif
00343
00344 j_printf("\t(-b) trellis beam width = %d", trellis_beam_width);
00345 if (specified_trellis_beam_width == -1) {
00346 j_printf(" (-1 or not specified - guessed)\n");
00347 } else if (specified_trellis_beam_width == 0) {
00348 j_printf(" (0 - full)\n");
00349 } else {
00350 j_printf("\n");
00351 }
00352 j_printf("\t(-n)search candidate num= %d\n", nbest);
00353 j_printf("\t(-s) search stack size = %d\n", stack_size);
00354 j_printf("\t(-m) search overflow = after %d hypothesis poped\n", hypo_overflow);
00355 j_printf("\t 2nd pass method = ");
00356 #ifdef GRAPHOUT
00357 #ifdef GRAPHOUT_DYNAMIC
00358 #ifdef GRAPHOUT_SEARCH
00359 j_printf("searching graph, generating dynamic graph\n");
00360 #else
00361 j_printf("searching sentence, generating dynamic graph\n");
00362 #endif
00363 #else
00364 j_printf("searching sentence, generating static graph from N-best\n");
00365 #endif
00366 #else
00367 j_printf("searching sentence, generating N-best\n");
00368 #endif
00369 if (enveloped_bestfirst_width >= 0) {
00370 j_printf("\t(-b2) pass2 beam width = %d\n", enveloped_bestfirst_width);
00371 }
00372 j_printf("\t(-lookuprange)lookup range= %d (tm-%d <= t <tm+%d)\n",lookup_range,lookup_range,lookup_range);
00373 #ifdef SCAN_BEAM
00374 j_printf("\t(-sb)2nd scan beamthres = %.1f (in logscore)\n",scan_beam_thres);
00375 #endif
00376 j_printf("\t(-gprune)Gauss. pruning = ");
00377 switch(gprune_method){
00378 case GPRUNE_SEL_NONE: j_printf("none (full computation)\n"); break;
00379 case GPRUNE_SEL_BEAM: j_printf("beam\n"); break;
00380 case GPRUNE_SEL_HEURISTIC: j_printf("heuristic\n"); break;
00381 case GPRUNE_SEL_SAFE: j_printf("safe\n"); break;
00382 }
00383 if (gprune_method != GPRUNE_SEL_NONE) {
00384 j_printf("\t(-tmix) mixture thres = %d / %d\n", mixnum_thres, hmminfo->maxcodebooksize);
00385 }
00386 if (hmm_gs_filename != NULL) {
00387 j_printf("\t(-gsnum) GS state num = %d / %d selected\n", gs_statenum, hmm_gs->totalstatenum);
00388 }
00389
00390 j_printf("\t(-n) search till = %d candidates found\n", nbest);
00391 j_printf("\t(-output) and output = %d candidates out of above\n", output_hypo_maxnum);
00392 #ifdef GRAPHOUT
00393 j_printf("\t(-graphrange) margin = %d frames\n",graph_merge_neighbor_range);
00394 #endif
00395
00396 if (ccd_flag) {
00397 j_printf("\t IWCD handling:\n");
00398 #ifdef PASS1_IWCD
00399 j_printf("\t 1st pass: approximation ");
00400 switch(hmminfo->cdset_method) {
00401 case IWCD_AVG:
00402 j_printf("(use average prob. of same LC)\n");
00403 break;
00404 case IWCD_MAX:
00405 j_printf("(use max. prob. of same LC)\n");
00406 break;
00407 case IWCD_NBEST:
00408 j_printf("(use %d-best of same LC)\n", hmminfo->cdmax_num);
00409 break;
00410 }
00411 #else
00412 j_printf("\t 1st pass: ignored\n");
00413 #endif
00414 #ifdef PASS2_STRICT_IWCD
00415 j_printf("\t 2nd pass: strict (apply when expanding hypo. )\n");
00416 #else
00417 j_printf("\t 2nd pass: loose (apply when hypo. is popped and scanned)\n");
00418 #endif
00419 }
00420
00421 #ifdef USE_NGRAM
00422 j_printf("\t factoring score: ");
00423 #ifdef UNIGRAM_FACTORING
00424 j_printf("1-gram prob. (statically assigned beforehand)\n");
00425 #else
00426 j_printf("2-gram prob. (dynamically computed while search)\n");
00427 #endif
00428 #endif
00429
00430 if (align_result_word_flag) {
00431 j_printf("\t output word alignments\n");
00432 }
00433 if (align_result_phoneme_flag) {
00434 j_printf("\t output phoneme alignments\n");
00435 }
00436 if (align_result_state_flag) {
00437 j_printf("\t output state alignments\n");
00438 }
00439 #ifdef USE_DFA
00440 if (looktrellis_flag) {
00441 j_printf("\t only words in backtrellis will be expanded in 2nd pass\n");
00442 } else {
00443 j_printf("\t all possible words will be expanded in 2nd pass\n");
00444 }
00445 #endif
00446 #ifdef CATEGORY_TREE
00447 if (old_tree_function_flag) {
00448 j_printf("\t build_wchmm() used\n");
00449 } else {
00450 j_printf("\t build_wchmm2() used\n");
00451 }
00452 #ifdef PASS1_IWCD
00453 if (old_iwcd_flag) {
00454 j_printf("\t full lcdset used\n");
00455 } else {
00456 j_printf("\t lcdset limited by word-pair constraint\n");
00457 }
00458 #endif
00459 #endif
00460 if (progout_flag) j_printf("\tprogressive output on 1st pass\n");
00461
00462
00463
00464 if (compute_only_1pass) {
00465 j_printf("\tCompute only 1-pass\n");
00466 }
00467 #ifdef CONFIDENCE_MEASURE
00468 j_printf("\t output word confidence measure ");
00469 #ifdef CM_NBEST
00470 j_printf("based on N-best candidates\n");
00471 #endif
00472 #ifdef CM_SEARCH
00473 j_printf("based on search-time scores\n");
00474 #endif
00475 #endif
00476
00477 j_printf("\nSystem I/O configuration:\n");
00478 j_printf("\t speech input source = ");
00479 if (speech_input == SP_RAWFILE) {
00480 j_printf("speech file\n");
00481 j_printf("\t input filelist = ");
00482 if (inputlist_filename == NULL) {
00483 j_printf("(none, enter filenames from stdin)\n");
00484 } else {
00485 j_printf("%s\n", inputlist_filename);
00486 }
00487 } else if (speech_input == SP_MFCFILE) {
00488 j_printf("MFCC parameter file (HTK format)\n");
00489 j_printf("\t filelist = ");
00490 if (inputlist_filename == NULL) {
00491 j_printf("(none, enter filenames from stdin)\n");
00492 } else {
00493 j_printf("%s\n", inputlist_filename);
00494 }
00495 } else if (speech_input == SP_STDIN) {
00496 j_printf("standard input\n");
00497 } else if (speech_input == SP_ADINNET) {
00498 j_printf("adinnet client\n");
00499 #ifdef USE_NETAUDIO
00500 } else if (speech_input == SP_NETAUDIO) {
00501 char *p;
00502 j_printf("NetAudio server on ");
00503 if (netaudio_devname != NULL) {
00504 j_printf("%s\n", netaudio_devname);
00505 } else if ((p = getenv("AUDIO_DEVICE")) != NULL) {
00506 j_printf("%s\n", p);
00507 } else {
00508 j_printf("local port\n");
00509 }
00510 #endif
00511 } else if (speech_input == SP_MIC) {
00512 j_printf("microphone\n");
00513 }
00514 if (speech_input != SP_MFCFILE) {
00515 if (speech_input == SP_RAWFILE || speech_input == SP_STDIN || speech_input == SP_ADINNET) {
00516 j_printf("\t sampling freq. = %d Hz required\n", smpFreq);
00517 } else {
00518 j_printf("\t sampling freq. = %d Hz\n", smpFreq);
00519 }
00520 }
00521 if (speech_input != SP_MFCFILE) {
00522 j_printf("\t threaded A/D-in = ");
00523 #ifdef HAVE_PTHREAD
00524 if (query_thread_on()) {
00525 j_printf("supported, on\n");
00526 } else {
00527 j_printf("supported, off\n");
00528 }
00529 #else
00530 j_printf("not supported (live input may be dropped)\n");
00531 #endif
00532 }
00533 if (strip_zero_sample) {
00534 j_printf("\t zero frames stripping = on\n");
00535 } else {
00536 j_printf("\t zero frames stripping = off\n");
00537 }
00538 if (speech_input != SP_MFCFILE) {
00539 if (query_segment_on()) {
00540 j_printf("\t silence cutting = on\n");
00541 j_printf("\t level thres = %d / 32767\n", level_thres);
00542 j_printf("\t zerocross thres = %d / sec.\n", zero_cross_num);
00543 j_printf("\t head margin = %d msec.\n", head_margin_msec);
00544 j_printf("\t tail margin = %d msec.\n", tail_margin_msec);
00545 } else {
00546 j_printf("\t silence cutting = off\n");
00547 }
00548 if (use_zmean || zmean_frame) {
00549 j_printf("\t remove DC offset = on");
00550 if (zmean_frame) {
00551 j_printf(" (frame-wise)\n");
00552 }
00553 if (speech_input == SP_RAWFILE) {
00554 j_printf(" (will compute for each file)\n");
00555 } else {
00556 j_printf(" (will compute from first %.1f sec)\n",
00557 (float)ZMEANSAMPLES / (float)smpFreq);
00558 }
00559 } else {
00560 j_printf("\t remove DC offset = off\n");
00561 }
00562 }
00563 j_printf("\t reject short input = ");
00564 if (rejectshortlen > 0) {
00565 j_printf("< %d msec\n", rejectshortlen);
00566 } else {
00567 j_printf("off\n");
00568 }
00569 #ifdef SP_BREAK_CURRENT_FRAME
00570 j_printf("\tshort pause segmentation= on\n");
00571 j_printf("\t sp duration length = %d frames\n", sp_frame_duration);
00572 #else
00573 j_printf("\tshort pause segmentation= off\n");
00574 #endif
00575 j_printf("\t result output to = ");
00576 switch(result_output) {
00577 case SP_RESULT_TTY:
00578 j_printf("tty (standard out)\n"); break;
00579 case SP_RESULT_MSOCK:
00580 j_printf("msock\n"); break;
00581 }
00582 if (progout_flag) {
00583 j_printf("\t progout interval = %d msec\n", progout_interval);
00584 }
00585 if (speech_input != SP_MFCFILE) {
00586 if (record_dirname != NULL) {
00587 j_printf("\tspeech data stored to = %s/\n", record_dirname);
00588 }
00589 }
00590 j_printf("\t output charset conv. = ");
00591 #ifdef CHARACTER_CONVERSION
00592 if (to_code == NULL) {
00593 j_printf("disabled\n");
00594 } else {
00595 if (from_code != NULL) {
00596 j_printf("from \"%s\" ", from_code);
00597 }
00598 j_printf("to \"%s\"\n", to_code);
00599 }
00600 #else
00601 j_printf("not supported\n");
00602 #endif
00603 j_printf("\n------------- System Info end -------------\n");
00604
00605 #ifdef USE_MIC
00606 if (realtime_flag) {
00607 if (cmn_required) {
00608 if (cmn_loaded) {
00609 j_printf("\ninitial CMN parameter loaded from file\n");
00610 } else {
00611 j_printf("\n");
00612 j_printf("\t************************************************************\n");
00613 j_printf("\t* NOTICE: The first input may not be correctly recoginized *\n");
00614 j_printf("\t* since no CMN parameter is available on startup. *\n");
00615 j_printf("\t************************************************************\n");
00616 }
00617 }
00618 }
00619 #endif
00620 }