00001
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086 #ifndef __J_RECOG_H__
00087 #define __J_RECOG_H__
00088
00089 #include <sent/stddefs.h>
00090 #include <sent/hmm.h>
00091 #include <sent/vocabulary.h>
00092 #include <sent/ngram2.h>
00093 #include <sent/dfa.h>
00094 #include <julius/wchmm.h>
00095 #include <julius/search.h>
00096 #include <julius/callback.h>
00097 #include <julius/jconf.h>
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00114 typedef struct __FSBeam__ {
00115
00116 TOKEN2 *tlist[2];
00117 TOKENID *tindex[2];
00118 int maxtnum;
00119 int expand_step;
00120 boolean expanded;
00121 int tnum[2];
00122 int n_start;
00123 int n_end;
00124 int tl;
00125 int tn;
00126
00127
00128 TOKENID *token;
00129 #ifdef UNIGRAM_FACTORING
00130
00131 LOGPROB wordend_best_score;
00132 int wordend_best_node;
00133 TRELLIS_ATOM *wordend_best_tre;
00134 WORD_ID wordend_best_last_cword;
00135 #endif
00136
00137 int totalnodenum;
00138 TRELLIS_ATOM bos;
00139 boolean nodes_malloced;
00140 LOGPROB lm_weight;
00141 LOGPROB lm_penalty;
00142 LOGPROB lm_penalty_trans;
00143 LOGPROB penalty1;
00144 #if defined(WPAIR) && defined(WPAIR_KEEP_NLIMIT)
00145 boolean wpair_keep_nlimit;
00146 #endif
00147
00148 boolean in_sparea;
00149 int tmp_sparea_start;
00150 #ifdef SP_BREAK_RESUME_WORD_BEGIN
00151 WORD_ID tmp_sp_break_last_word;
00152 #else
00153 WORD_ID last_tre_word;
00154 #endif
00155 boolean first_sparea;
00156 int sp_duration;
00157 #ifdef SPSEGMENT_NAIST
00158 boolean after_trigger;
00159 int trigger_duration;
00160 boolean want_rewind;
00161 int rewind_frame;
00162 boolean want_rewind_reprocess;
00163 #endif
00164 char *pausemodelnames;
00165 char **pausemodel;
00166 int pausemodelnum;
00167 } FSBeam;
00168
00169
00174 typedef struct __RealBeam__ {
00175
00176 int maxframelen;
00177
00178 SP16 *window;
00179 int windowlen;
00180 int windownum;
00181
00182
00183 boolean last_is_segmented;
00184 SP16 *rest_Speech;
00185 int rest_alloc_len;
00186 int rest_len;
00187
00188 } RealBeam;
00189
00194 typedef struct __StackDecode__ {
00195 int hypo_len_count[MAXSEQNUM+1];
00196 int maximum_filled_length;
00197 #ifdef SCAN_BEAM
00198 LOGPROB *framemaxscore;
00199 #endif
00200 NODE *stocker_root;
00201 int popctr;
00202 int genectr;
00203 int pushctr;
00204 int finishnum;
00205 NODE *current;
00206
00207 #ifdef CONFIDENCE_MEASURE
00208 LOGPROB cm_alpha;
00209 # ifdef CM_MULTIPLE_ALPHA
00210 LOGPROB *cmsumlist;
00211 int cmsumlistlen;
00212 # endif
00213 # ifdef CM_SEARCH
00214 LOGPROB cm_tmpbestscore;
00215 # ifndef CM_MULTIPLE_ALPHA
00216 LOGPROB cm_tmpsum;
00217 # endif
00218 int l_stacksize;
00219 int l_stacknum;
00220 NODE *l_start;
00221 NODE *l_bottom;
00222 # endif
00223 # ifdef CM_NBEST
00224 LOGPROB *sentcm = NULL;
00225 LOGPROB *wordcm = NULL;
00226 int sentnum;
00227 # endif
00228 #endif
00229
00230 } StackDecode;
00231
00236 typedef struct {
00237 LOGPROB (*uniprob)(WORD_INFO *, WORD_ID, LOGPROB);
00238 LOGPROB (*biprob)(WORD_INFO *, WORD_ID, WORD_ID, LOGPROB);
00239 LOGPROB (*lmprob)(WORD_INFO *, WORD_ID *, int, WORD_ID, LOGPROB);
00240 } LMFunc;
00241
00246 typedef struct __gmm_calc__{
00247 LOGPROB *gmm_score;
00248 boolean *is_voice;
00249 int framecount;
00250 LOGPROB *OP_calced_score;
00251 int *OP_calced_id;
00252 int OP_calced_num;
00253 int OP_calced_maxnum;
00254 int OP_gprune_num;
00255 VECT *OP_vec;
00256 short OP_veclen;
00257 HTK_HMM_Data *max_d;
00258 int max_i;
00259 #ifdef CONFIDENCE_MEASURE
00260 LOGPROB gmm_max_cm;
00261 #endif
00262 #ifdef GMM_VAD
00263 LOGPROB *rates;
00264 int nframe;
00265 boolean filled;
00266 int framep;
00267
00268 boolean in_voice;
00269 boolean up_trigger;
00270 boolean down_trigger;
00271 boolean after_trigger;
00272 boolean want_rewind;
00273 boolean want_rewind_reprocess;
00274 int rewind_frame;
00275 int duration;
00276 #endif
00277 } GMMCalc;
00278
00283 typedef struct __sentence__ {
00284 WORD_ID word[MAXSEQNUM];
00285 int word_num;
00286 LOGPROB score;
00287 LOGPROB confidence[MAXSEQNUM];
00288 LOGPROB score_lm;
00289 LOGPROB score_am;
00290 int gram_id;
00291
00296 struct {
00297 boolean filled;
00298 int num;
00299 short unittype;
00300
00301 WORD_ID *w;
00302 HMM_Logical **ph;
00303 short *loc;
00304 boolean *is_iwsp;
00305
00306 int *begin_frame;
00307 int *end_frame;
00308 LOGPROB *avgscore;
00309
00310 LOGPROB allscore;
00311 } align;
00312
00313 } Sentence;
00314
00319 typedef struct __adin__ {
00320
00322 boolean (*ad_standby)(int, void *);
00324 boolean (*ad_begin)();
00326 boolean (*ad_end)();
00328 boolean (*ad_resume)();
00330 boolean (*ad_pause)();
00332 int (*ad_read)(SP16 *, int);
00333
00334
00335 int thres;
00336 int noise_zerocross;
00337 int nc_max;
00338 boolean adin_cut_on;
00339 boolean silence_cut_default;
00340 boolean strip_flag;
00341 boolean enable_thread;
00342 boolean need_zmean;
00343
00344
00345 int c_length;
00346 int c_offset;
00347 SP16 *swapbuf;
00348 int sbsize;
00349 int sblen;
00350 int rest_tail;
00351
00352 ZEROCROSS zc;
00353
00354 #ifdef HAVE_PTHREAD
00355
00356 pthread_mutex_t mutex;
00357 SP16 *speech;
00358 int speechlen;
00359
00360
00361
00362
00363
00364
00365
00366
00367
00368 boolean transfer_online;
00373 boolean adinthread_buffer_overflowed;
00374
00375 boolean ignore_speech_while_recog;
00376
00377 #endif
00378
00379
00380 SP16 *buffer;
00381 int bpmax;
00382 int bp;
00383 int current_len;
00384 SP16 *cbuf;
00385 boolean down_sample;
00386 SP16 *buffer48;
00387 int io_rate;
00388
00389 boolean is_valid_data;
00390 int nc;
00391 boolean end_of_stream;
00392 boolean need_init;
00393
00394 DS_BUFFER *ds;
00395
00396 boolean rehash;
00397 } ADIn;
00398
00404 typedef struct __Output__ {
00413 int status;
00414
00415 int num_frame;
00416 int length_msec;
00417
00418 Sentence *sent;
00419 int sentnum;
00420
00421 WordGraph *wg1;
00422 int wg1_num;
00423
00424 WordGraph *wg;
00425
00426 CN_CLUSTER *confnet;
00427
00428 Sentence pass1;
00429
00430 } Output;
00431
00432
00433
00434
00435
00436
00441 typedef struct __mfcc_calc__ {
00442
00447 short id;
00448
00453 Value *para;
00454
00459 boolean htk_loaded;
00464 boolean hmm_loaded;
00465
00470 boolean paramtype_check_flag;
00471
00476 MFCCWork *wrk;
00477
00482 HTK_Param *param;
00483
00487 HTK_Param *rest_param;
00488
00493 struct {
00497 char *load_filename;
00502 boolean update;
00506 char *save_filename;
00510 float map_weight;
00511
00515 boolean loaded;
00516
00521 CMNWork *wrk;
00522
00523 } cmn;
00524
00529 struct {
00533 float *ssbuf;
00534
00538 int sslen;
00539
00544 float ss_alpha;
00545
00550 float ss_floor;
00551
00555 boolean sscalc;
00556
00560 int sscalc_len;
00561
00565 char *ssload_filename;
00566
00571 MFCCWork *mfccwrk_ss;
00572
00573 } frontend;
00574
00579 ENERGYWork ewrk;
00580
00585 DeltaBuf *db;
00590 DeltaBuf *ab;
00595 VECT *tmpmfcc;
00596
00602 boolean valid;
00603
00608 int f;
00609
00614 int last_time;
00615
00620 int sparea_start;
00621
00626 boolean segmented;
00627
00628 #ifdef POWER_REJECT
00629 float avg_power;
00630 #endif
00631
00636 struct __mfcc_calc__ *next;
00637
00638 } MFCCCalc;
00639
00644 typedef struct __process_am__ {
00645
00650 JCONF_AM *config;
00651
00656 MFCCCalc *mfcc;
00657
00661 HTK_HMM_INFO *hmminfo;
00662
00666 HTK_HMM_INFO *hmm_gs;
00667
00671 HMMWork hmmwrk;
00672
00677 struct __process_am__ *next;
00678
00679 } PROCESS_AM;
00680
00685 typedef struct __process_lm__ {
00686
00691 JCONF_LM *config;
00692
00697 PROCESS_AM *am;
00698
00699
00704 int lmtype;
00705
00711 int lmvar;
00712
00716 WORD_INFO *winfo;
00717
00721 NGRAM_INFO *ngram;
00722
00726 MULTIGRAM *grammars;
00727
00733 int gram_maxid;
00734
00739 DFA_INFO *dfa;
00740
00745 boolean global_modified;
00746
00751 LMFunc lmfunc;
00752
00757 struct __process_lm__ *next;
00758
00759 } PROCESS_LM;
00760
00765 typedef struct __recogprocess__ {
00766
00771 boolean live;
00772
00779 short active;
00780
00785 JCONF_SEARCH *config;
00786
00791 PROCESS_AM *am;
00792
00797 PROCESS_LM *lm;
00798
00803 int lmtype;
00804
00810 int lmvar;
00811
00815 boolean ccd_flag;
00816
00820 WCHMM_INFO *wchmm;
00821
00825 int trellis_beam_width;
00826
00830 BACKTRELLIS *backtrellis;
00831
00835 FSBeam pass1;
00836
00841 StackDecode pass2;
00842
00846 WORD_ID pass1_wseq[MAXSEQNUM];
00847
00851 int pass1_wnum;
00852
00856 LOGPROB pass1_score;
00857
00861 WORD_ID sp_break_last_word;
00865 WORD_ID sp_break_last_nword;
00869 boolean sp_break_last_nword_allow_override;
00873 WORD_ID sp_break_2_begin_word;
00877 WORD_ID sp_break_2_end_word;
00878
00882 int peseqlen;
00883
00887 int graph_totalwordnum;
00888
00893 Output result;
00894
00899 boolean graphout;
00900
00901 #ifdef DETERMINE
00902 int determine_count;
00903 LOGPROB determine_maxnodescore;
00904 boolean determined;
00905 LOGPROB determine_last_wid;
00906 boolean have_determine;
00907 #endif
00908
00913 boolean have_interim;
00914
00919 void *hook;
00920
00925 struct __recogprocess__ *next;
00926
00927 } RecogProcess;
00928
00933 typedef struct __Recog__ {
00934
00935
00940 Jconf *jconf;
00941
00942
00947 ADIn *adin;
00948
00952 RealBeam real;
00953
00958 MFCCCalc *mfcclist;
00959
00964 PROCESS_AM *amlist;
00965
00970 PROCESS_LM *lmlist;
00971
00976 RecogProcess *process_list;
00977
00978
00983 boolean process_segment;
00984
00985
00986
00987
00991 SP16 *speech;
00992
00997 int speechalloclen;
00998
01002 int speechlen;
01003
01007 int peseqlen;
01008
01009
01010
01015 HTK_HMM_INFO *gmm;
01016
01021 MFCCCalc *gmmmfcc;
01022
01027 GMMCalc *gc;
01028
01029
01030
01031
01043 boolean process_active;
01044
01050 boolean process_want_terminate;
01051
01059 boolean process_want_reload;
01060
01066 short gram_switch_input_method;
01067
01074 boolean process_online;
01075
01081 boolean (*calc_vector)(MFCCCalc *, SP16 *, int);
01082
01088 boolean triggered;
01089
01094 void (*callback_function[SIZEOF_CALLBACK_ID][MAX_CALLBACK_HOOK])();
01099 void *callback_user_data[SIZEOF_CALLBACK_ID][MAX_CALLBACK_HOOK];
01104 int callback_function_num[SIZEOF_CALLBACK_ID];
01109 int callback_list_code[MAX_CALLBACK_HOOK*SIZEOF_CALLBACK_ID];
01114 int callback_list_loc[MAX_CALLBACK_HOOK*SIZEOF_CALLBACK_ID];
01119 int callback_num;
01120
01121
01122
01127 void *hook;
01128
01129 } Recog;
01130
01131 #endif