00001
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086 #ifndef __J_RECOG_H__
00087 #define __J_RECOG_H__
00088
00089 #include <sent/stddefs.h>
00090 #include <sent/hmm.h>
00091 #include <sent/vocabulary.h>
00092 #include <sent/ngram2.h>
00093 #include <sent/dfa.h>
00094 #include <julius/wchmm.h>
00095 #include <julius/search.h>
00096 #include <julius/callback.h>
00097 #include <julius/jconf.h>
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00114 typedef struct __FSBeam__ {
00115
00116 TOKEN2 *tlist[2];
00117 TOKENID *tindex[2];
00118 int maxtnum;
00119 int expand_step;
00120 boolean expanded;
00121 int tnum[2];
00122 int n_start;
00123 int n_end;
00124 int tl;
00125 int tn;
00126
00127
00128 TOKENID *token;
00129 #ifdef UNIGRAM_FACTORING
00130
00131 LOGPROB wordend_best_score;
00132 int wordend_best_node;
00133 TRELLIS_ATOM *wordend_best_tre;
00134 WORD_ID wordend_best_last_cword;
00135 #endif
00136
00137 int totalnodenum;
00138 TRELLIS_ATOM bos;
00139 boolean nodes_malloced;
00140 LOGPROB lm_weight;
00141 LOGPROB lm_penalty;
00142 LOGPROB lm_penalty_trans;
00143 LOGPROB penalty1;
00144 #if defined(WPAIR) && defined(WPAIR_KEEP_NLIMIT)
00145 boolean wpair_keep_nlimit;
00146 #endif
00147
00148 boolean in_sparea;
00149 int tmp_sparea_start;
00150 #ifdef SP_BREAK_RESUME_WORD_BEGIN
00151 WORD_ID tmp_sp_break_last_word;
00152 #else
00153 WORD_ID last_tre_word;
00154 #endif
00155 boolean first_sparea;
00156 int sp_duration;
00157 #ifdef SPSEGMENT_NAIST
00158 boolean after_trigger;
00159 int trigger_duration;
00160 boolean want_rewind;
00161 int rewind_frame;
00162 boolean want_rewind_reprocess;
00163 #endif
00164 char *pausemodelnames;
00165 char **pausemodel;
00166 int pausemodelnum;
00167 } FSBeam;
00168
00169
00174 typedef struct __RealBeam__ {
00175
00176 int maxframelen;
00177
00178 SP16 *window;
00179 int windowlen;
00180 int windownum;
00181
00182
00183 boolean last_is_segmented;
00184 SP16 *rest_Speech;
00185 int rest_alloc_len;
00186 int rest_len;
00187
00188 } RealBeam;
00189
00194 typedef struct __StackDecode__ {
00195 int hypo_len_count[MAXSEQNUM+1];
00196 int maximum_filled_length;
00197 #ifdef SCAN_BEAM
00198 LOGPROB *framemaxscore;
00199 #endif
00200 NODE *stocker_root;
00201 int popctr;
00202 int genectr;
00203 int pushctr;
00204 int finishnum;
00205 NODE *current;
00206
00207 #ifdef CONFIDENCE_MEASURE
00208 LOGPROB cm_alpha;
00209 # ifdef CM_MULTIPLE_ALPHA
00210 LOGPROB *cmsumlist;
00211 int cmsumlistlen;
00212 # endif
00213 # ifdef CM_SEARCH
00214 LOGPROB cm_tmpbestscore;
00215 # ifndef CM_MULTIPLE_ALPHA
00216 LOGPROB cm_tmpsum;
00217 # endif
00218 int l_stacksize;
00219 int l_stacknum;
00220 NODE *l_start;
00221 NODE *l_bottom;
00222 # endif
00223 # ifdef CM_NBEST
00224 LOGPROB *sentcm = NULL;
00225 LOGPROB *wordcm = NULL;
00226 int sentnum;
00227 # endif
00228 #endif
00229
00230 LOGPROB *wordtrellis[2];
00231 LOGPROB *g;
00232 HMM_Logical **phmmseq;
00233 int phmmlen_max;
00234 boolean *has_sp;
00235 #ifdef GRAPHOUT_PRECISE_BOUNDARY
00236 short *wend_token_frame[2];
00237 LOGPROB *wend_token_gscore[2];
00238 short *wef;
00239 LOGPROB *wes;
00240 #endif
00241
00242 } StackDecode;
00243
00248 typedef struct {
00249 LOGPROB (*uniprob)(WORD_INFO *, WORD_ID, LOGPROB);
00250 LOGPROB (*biprob)(WORD_INFO *, WORD_ID, WORD_ID, LOGPROB);
00251 LOGPROB (*lmprob)(WORD_INFO *, WORD_ID *, int, WORD_ID, LOGPROB);
00252 } LMFunc;
00253
00258 typedef struct __gmm_calc__{
00259 LOGPROB *gmm_score;
00260 boolean *is_voice;
00261 int framecount;
00262
00263 short OP_nstream;
00264 VECT *OP_vec_stream[MAXSTREAMNUM];
00265 short OP_veclen_stream[MAXSTREAMNUM];
00266
00267 LOGPROB *OP_calced_score;
00268 int *OP_calced_id;
00269 int OP_calced_num;
00270 int OP_calced_maxnum;
00271 int OP_gprune_num;
00272 VECT *OP_vec;
00273 short OP_veclen;
00274 HTK_HMM_Data *max_d;
00275 int max_i;
00276 #ifdef CONFIDENCE_MEASURE
00277 LOGPROB gmm_max_cm;
00278 #endif
00279 #ifdef GMM_VAD
00280 LOGPROB *rates;
00281 int nframe;
00282 boolean filled;
00283 int framep;
00284
00285 boolean in_voice;
00286 boolean up_trigger;
00287 boolean down_trigger;
00288 boolean after_trigger;
00289 boolean want_rewind;
00290 boolean want_rewind_reprocess;
00291 int rewind_frame;
00292 int duration;
00293 #endif
00294 } GMMCalc;
00295
00300 typedef struct __sentence_align__ {
00301 int num;
00302 short unittype;
00303 WORD_ID *w;
00304 HMM_Logical **ph;
00305 short *loc;
00306 boolean *is_iwsp;
00307 int *begin_frame;
00308 int *end_frame;
00309 LOGPROB *avgscore;
00310 LOGPROB allscore;
00311 struct __sentence_align__ *next;
00312 } SentenceAlign;
00313
00318 typedef struct __sentence__ {
00319 WORD_ID word[MAXSEQNUM];
00320 int word_num;
00321 LOGPROB score;
00322 LOGPROB confidence[MAXSEQNUM];
00323 LOGPROB score_lm;
00324 LOGPROB score_am;
00325 int gram_id;
00326 SentenceAlign *align;
00327
00328 } Sentence;
00329
00334 typedef struct __adin__ {
00335
00337 boolean (*ad_standby)(int, void *);
00339 boolean (*ad_begin)();
00341 boolean (*ad_end)();
00343 boolean (*ad_resume)();
00345 boolean (*ad_pause)();
00347 boolean (*ad_terminate)();
00349 int (*ad_read)(SP16 *, int);
00350
00351
00352 int thres;
00353 int noise_zerocross;
00354 int nc_max;
00355 boolean adin_cut_on;
00356 boolean silence_cut_default;
00357 boolean strip_flag;
00358 boolean enable_thread;
00359 boolean need_zmean;
00360
00361
00362 int c_length;
00363 int c_offset;
00364 SP16 *swapbuf;
00365 int sbsize;
00366 int sblen;
00367 int rest_tail;
00368
00369 ZEROCROSS zc;
00370
00371 #ifdef HAVE_PTHREAD
00372
00373 pthread_t adin_thread;
00374 pthread_mutex_t mutex;
00375 SP16 *speech;
00376 int speechlen;
00377
00378
00379
00380
00381
00382
00383
00384
00385
00386 boolean transfer_online;
00391 boolean adinthread_buffer_overflowed;
00396 boolean adinthread_ended;
00397
00398 boolean ignore_speech_while_recog;
00399
00400 #endif
00401
00402
00403 SP16 *buffer;
00404 int bpmax;
00405 int bp;
00406 int current_len;
00407 SP16 *cbuf;
00408 boolean down_sample;
00409 SP16 *buffer48;
00410 int io_rate;
00411
00412 boolean is_valid_data;
00413 int nc;
00414 boolean end_of_stream;
00415 boolean need_init;
00416
00417 DS_BUFFER *ds;
00418
00419 boolean rehash;
00420
00421 boolean input_side_segment;
00422
00423 unsigned int total_captured_len;
00424 unsigned int last_trigger_sample;
00425
00426 } ADIn;
00427
00433 typedef struct __Output__ {
00442 int status;
00443
00444 int num_frame;
00445 int length_msec;
00446
00447 Sentence *sent;
00448 int sentnum;
00449
00450 WordGraph *wg1;
00451 int wg1_num;
00452
00453 WordGraph *wg;
00454
00455 CN_CLUSTER *confnet;
00456
00457 Sentence pass1;
00458
00459 } Output;
00460
00461
00462
00463
00464
00465
00470 typedef struct __mfcc_calc__ {
00471
00476 short id;
00477
00482 Value *para;
00483
00488 boolean htk_loaded;
00493 boolean hmm_loaded;
00494
00499 boolean paramtype_check_flag;
00500
00505 MFCCWork *wrk;
00506
00511 HTK_Param *param;
00512
00516 HTK_Param *rest_param;
00517
00522 struct {
00526 char *load_filename;
00531 boolean update;
00535 char *save_filename;
00539 float map_weight;
00540
00544 boolean loaded;
00545
00550 CMNWork *wrk;
00551
00552 } cmn;
00553
00558 struct {
00562 float *ssbuf;
00563
00567 int sslen;
00568
00573 float ss_alpha;
00574
00579 float ss_floor;
00580
00584 boolean sscalc;
00585
00589 int sscalc_len;
00590
00594 char *ssload_filename;
00595
00600 MFCCWork *mfccwrk_ss;
00601
00602 } frontend;
00603
00608 ENERGYWork ewrk;
00609
00614 DeltaBuf *db;
00619 DeltaBuf *ab;
00624 VECT *tmpmfcc;
00625
00631 boolean valid;
00632
00637 int f;
00638
00643 int last_time;
00644
00649 int sparea_start;
00650
00655 boolean segmented;
00656
00661 boolean segmented_by_input;
00662
00667 int plugin_source;
00668
00673 struct {
00675 boolean (*fv_standby)();
00677 boolean (*fv_begin)();
00679 int (*fv_read)(VECT *, int);
00681 boolean (*fv_end)();
00683 boolean (*fv_resume)();
00685 boolean (*fv_pause)();
00687 boolean (*fv_terminate)();
00688 } func;
00689
00690 #ifdef POWER_REJECT
00691 float avg_power;
00692 #endif
00693
00698 struct __mfcc_calc__ *next;
00699
00700 } MFCCCalc;
00701
00706 typedef struct __process_am__ {
00707
00712 JCONF_AM *config;
00713
00718 MFCCCalc *mfcc;
00719
00723 HTK_HMM_INFO *hmminfo;
00724
00728 HTK_HMM_INFO *hmm_gs;
00729
00733 HMMWork hmmwrk;
00734
00739 struct __process_am__ *next;
00740
00741 } PROCESS_AM;
00742
00747 typedef struct __process_lm__ {
00748
00753 JCONF_LM *config;
00754
00759 PROCESS_AM *am;
00760
00761
00766 int lmtype;
00767
00773 int lmvar;
00774
00778 WORD_INFO *winfo;
00779
00783 NGRAM_INFO *ngram;
00784
00788 MULTIGRAM *grammars;
00789
00795 int gram_maxid;
00796
00801 DFA_INFO *dfa;
00802
00807 boolean global_modified;
00808
00813 LMFunc lmfunc;
00814
00819 struct __process_lm__ *next;
00820
00821 } PROCESS_LM;
00822
00827 typedef struct __recogprocess__ {
00828
00833 boolean live;
00834
00841 short active;
00842
00847 JCONF_SEARCH *config;
00848
00853 PROCESS_AM *am;
00854
00859 PROCESS_LM *lm;
00860
00865 int lmtype;
00866
00872 int lmvar;
00873
00877 boolean ccd_flag;
00878
00882 WCHMM_INFO *wchmm;
00883
00887 int trellis_beam_width;
00888
00892 BACKTRELLIS *backtrellis;
00893
00897 FSBeam pass1;
00898
00903 StackDecode pass2;
00904
00908 WORD_ID pass1_wseq[MAXSEQNUM];
00909
00913 int pass1_wnum;
00914
00918 LOGPROB pass1_score;
00919
00923 WORD_ID sp_break_last_word;
00927 WORD_ID sp_break_last_nword;
00931 boolean sp_break_last_nword_allow_override;
00935 WORD_ID sp_break_2_begin_word;
00939 WORD_ID sp_break_2_end_word;
00940
00944 int peseqlen;
00945
00949 int graph_totalwordnum;
00950
00955 Output result;
00956
00961 boolean graphout;
00962
00968 char *order_matrix;
00969
00975 int order_matrix_count;
00976
00977 #ifdef DETERMINE
00978 int determine_count;
00979 LOGPROB determine_maxnodescore;
00980 boolean determined;
00981 LOGPROB determine_last_wid;
00982 boolean have_determine;
00983 #endif
00984
00989 boolean have_interim;
00990
00995 void *hook;
00996
01001 struct __recogprocess__ *next;
01002
01003 } RecogProcess;
01004
01009 typedef struct __Recog__ {
01010
01011
01016 Jconf *jconf;
01017
01018
01023 ADIn *adin;
01024
01028 RealBeam real;
01029
01034 MFCCCalc *mfcclist;
01035
01040 PROCESS_AM *amlist;
01041
01046 PROCESS_LM *lmlist;
01047
01052 RecogProcess *process_list;
01053
01054
01059 boolean process_segment;
01060
01061
01062
01063
01067 SP16 *speech;
01068
01073 int speechalloclen;
01074
01078 int speechlen;
01079
01083 int peseqlen;
01084
01085
01086
01091 HTK_HMM_INFO *gmm;
01092
01097 MFCCCalc *gmmmfcc;
01098
01103 GMMCalc *gc;
01104
01105
01106
01107
01119 boolean process_active;
01120
01126 boolean process_want_terminate;
01127
01135 boolean process_want_reload;
01136
01142 short gram_switch_input_method;
01143
01150 boolean process_online;
01151
01157 boolean (*calc_vector)(MFCCCalc *, SP16 *, int);
01158
01164 boolean triggered;
01165
01170 void (*callback_function[SIZEOF_CALLBACK_ID][MAX_CALLBACK_HOOK])();
01175 void *callback_user_data[SIZEOF_CALLBACK_ID][MAX_CALLBACK_HOOK];
01180 int callback_function_num[SIZEOF_CALLBACK_ID];
01185 int callback_list_code[MAX_CALLBACK_HOOK*SIZEOF_CALLBACK_ID];
01190 int callback_list_loc[MAX_CALLBACK_HOOK*SIZEOF_CALLBACK_ID];
01195 int callback_num;
01196
01197
01198
01203 void *hook;
01204
01205 } Recog;
01206
01207 #endif