julius/define.h

Go to the documentation of this file.
00001 
00032 /*
00033  * Copyright (c) 1991-2006 Kawahara Lab., Kyoto University
00034  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00035  * Copyright (c) 2005-2006 Julius project team, Nagoya Institute of Technology
00036  * All rights reserved
00037  */
00038 
00039 /*****************************************************************************/
00041 /*****************************************************************************/
00042 
00043 /* switch N-gram mode (julius) <-> grammar mode (julian) */
00044 #ifdef USE_DFA
00045 #undef  USE_NGRAM
00046 #else
00047 #define USE_NGRAM
00048 #endif
00049 
00050 /* delete incoherent option */
00051 #ifdef USE_DFA
00052 #ifdef UNIGRAM_FACTORING
00053 #undef UNIGRAM_FACTORING
00054 #endif
00055 #define CATEGORY_TREE
00056 #else  /* USE_NGRAM */
00057 #ifdef CATEGORY_TREE
00058 #undef CATEGORY_TREE
00059 #endif
00060 #endif /* USE_DFA */
00061 
00062 /* abbreviations for verbose message output */
00063 #define VERMES if (verbose_flag) j_printerr
00064 
00065 /* define this to report memory usage on exit (Linux only) */
00066 #undef REPORT_MEMORY_USAGE
00067 
00068 #ifdef USE_NGRAM
00069 /*** tree construction ***/
00070 /* With 1-best approximation, Constructing a single tree from all words
00071    causes much error by factoring.  Listing each word flatly with no
00072    tree-organization will not cause this error, but the network becomes
00073    much larger and, especially, the inter-word LM handling becomes much more
00074    complex (O(n^2)).  The cost may be eased by LM caching, but it needs much
00075    memory. */
00076 /* This is a trade-off of accuracy and cost */
00077 #define SHORT_WORD_LEN 2
00078 #ifdef LOWMEM
00079 /* don't separate, construct a single tree from all words */
00080 /* root nodes are about 50 in monophone, cache size will be 5MB on max */
00081 #define NO_SEPARATE_SHORT_WORD
00082 #else
00083 #ifdef LOWMEM2
00084 /* experimental: separate words frequently appears in corpus (1-gram) */
00085 /* root nodes will be "-sepnum num" + 50, cache size will be 10MB or so */
00086 #define NO_SEPARATE_SHORT_WORD
00087 #define SEPARATE_BY_UNIGRAM
00088 #define DEFAULT_SEPARATE_WNUM 150
00089 #else
00090 /* separate all short words (<= 2 phonemes) */
00091 /* root nodes are about 1100 in 20k (proportional to vocabulary),
00092    cache size will be about 100MB on max */
00093 #endif /* LOWMEM2 */
00094 #endif /* LOWMEM */
00095 
00096 /*#define HASH_CACHE_IW*/
00097 /* "./configure --enable-lowmem" defines NO_SEPARATE_SHORT_WORD instead */
00098 
00099 #endif /* USE_NGRAM */
00100 
00101 #ifdef USE_NGRAM
00102 /* default language model weight and insertion penalty for pass1 and pass2 */
00103 /* these values come from the best parameters in IPA evaluation result */
00104 #define DEFAULT_LM_WEIGHT_MONO_PASS1   5.0
00105 #define DEFAULT_LM_PENALTY_MONO_PASS1 -1.0
00106 #define DEFAULT_LM_WEIGHT_MONO_PASS2   6.0
00107 #define DEFAULT_LM_PENALTY_MONO_PASS2  0.0
00108 #ifdef PASS1_IWCD
00109 #define DEFAULT_LM_WEIGHT_TRI_PASS1   8.0
00110 #define DEFAULT_LM_PENALTY_TRI_PASS1 -2.0
00111 #define DEFAULT_LM_WEIGHT_TRI_PASS2   8.0
00112 #define DEFAULT_LM_PENALTY_TRI_PASS2 -2.0
00113 #else
00114 #define DEFAULT_LM_WEIGHT_TRI_PASS1   9.0
00115 #define DEFAULT_LM_PENALTY_TRI_PASS1  8.0
00116 #define DEFAULT_LM_WEIGHT_TRI_PASS2  11.0
00117 #define DEFAULT_LM_PENALTY_TRI_PASS2 -2.0
00118 #endif /* PASS1_IWCD */
00119 #endif /* USE_NGRAM */
00120 
00121 /* Switch head/tail word insertion penalty to be inserted */
00122 #undef FIX_PENALTY
00123 
00124 /* some definitions for short-pause segmentation */
00125 #ifdef SP_BREAK_CURRENT_FRAME
00126 #undef SP_BREAK_EVAL            /* output messages for evaluation */
00127 #undef SP_BREAK_DEBUG           /* output messages for debug */
00128 #undef SP_BREAK_RESUME_WORD_BEGIN /* resume word = maxword at beginning of sp area */
00129 #endif
00130 
00131 /* '01/10/18 by ri: enable fix for trellis lookup order */
00132 #define PREFER_CENTER_ON_TRELLIS_LOOKUP
00133 
00134 #ifdef MULTIPATH_VERSION
00135 /* '01/11/28 by ri: malloc step for startnode */
00136 # define STARTNODE_STEP 300
00137 /* default value of iwsp penalty */
00138 # define IWSP_PENALTY_DEFAULT -1.0
00139 #endif
00140 
00141 /* default dict entry for IW-sp word that will be added to dict with -iwspword */
00142 #ifdef USE_NGRAM
00143 #define IWSPENTRY_DEFAULT "<UNK> [sp] sp sp"
00144 #endif
00145 
00146 /* confidence scoring method */
00147 #ifdef CONFIDENCE_MEASURE
00148 # ifndef CM_NBEST       /* use conventional N-best CM, will be defined if "--enable-cm-nbest" specified */
00149 #  define CM_SEARCH     /* otherwise, use on-the-fly CM scoring */
00150 # endif
00151 #endif
00152 
00153 /* dynamic word graph generation */
00154 #ifdef GRAPHOUT        /* output result in word-graph format */
00155 
00156 #undef GRAPHOUT_SEARCH_CONSIDER_RIGHT /* if defined, only hypothesis whose
00157                                          left/right contexts is already
00158                                          included in popped hypo will be merged.
00159                                          EXPERIMENTAL, should not be defined.
00160                                        */
00161 #ifdef CM_SEARCH_LIMIT
00162 #undef CM_SEARCH_LIMIT_AFTER    /* enable above only after 1 sentence found */
00163 #undef CM_SEARCH_LIMIT_POP      /* terminate hypo of low CM on pop */
00164 #endif
00165 
00166 /* compute exact boundary instead of using 1st pass result */
00167 /* also propagate exact time boundary to the right context after generation */
00168 /* this may produce precise word boundary, but cause bigger word graph output */
00169 #define GRAPHOUT_PRECISE_BOUNDARY
00170 
00171 #undef GDEBUG                   /* enable debug message in graphout.c */
00172 
00173 #endif /* GRAPHOUT */
00174 
00175 /* some decoding fix candidates */
00176 #undef FIX_35_PASS2_STRICT_SCORE /* fix hypothesis scores by enabling
00177                                       bt_discount_pescore() in standard mode
00178                                       with PASS2_STRICT_IWCD, 
00179                                    */
00180 #define FIX_35_INHIBIT_SAME_WORD_EXPANSION /* privent connecting the same trellis word in 2nd pass */
00181 
00182 
00183 /* below are new since 3.5.2 */
00184 #ifdef GRAPHOUT
00185 
00196 #define GRAPHOUT_OVERWRITE
00197 
00198 /* with GRAPHOUT_OVERWRITE, use gscore_head instead of fscore_head */
00204 #undef GRAPHOUT_OVERWRITE_GSCORE
00205 
00212 #define GRAPHOUT_LIMIT_BOUNDARY_LOOP
00213 
00220 #define GRAPHOUT_LIMIT_BOUNDARY_LOOP_NUM_DEFAULT 20
00221 
00234 #define GRAPHOUT_SEARCH_DELAY_TERMINATION
00235 
00241 #define GRAPHOUT_DEPTHCUT
00242 
00248 #define GRAPHOUT_DEPTHCUT_DEFAULT 80
00249 
00250 #endif /* GRAPHOUT */

Generated on Tue Dec 26 16:16:32 2006 for Julius by  doxygen 1.5.0