libjulius/include/julius/recog.h

Go to the documentation of this file.
00001 
00076 /*
00077  * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University
00078  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00079  * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
00080  * All rights reserved
00081  */
00082 
00083 /*
00084 */
00085 
00086 #ifndef __J_RECOG_H__
00087 #define __J_RECOG_H__
00088 
00089 #include <sent/stddefs.h>
00090 #include <sent/hmm.h>
00091 #include <sent/vocabulary.h>
00092 #include <sent/ngram2.h>
00093 #include <sent/dfa.h>
00094 #include <julius/wchmm.h>
00095 #include <julius/search.h>
00096 #include <julius/callback.h>
00097 #include <julius/jconf.h>
00098 
00099 /*
00100   How tokens are managed:
00101    o  tlist[][] is a token stocker.  It holds all tokens in sequencial
00102       buffer.  They are malloced first on startup, and refered by ID while
00103       Viterbi procedure.  In word-pair mode, each token also has a link to
00104       another token to allow a node to have more than 1 token.
00105       
00106    o  token[n] holds the current ID number of a token associated to a
00107       lexicon tree node 'n'.
00108 
00109   */
00114 typedef struct __FSBeam__ {
00115   /* token stocker */
00116   TOKEN2 *tlist[2];     
00117   TOKENID *tindex[2];   
00118   int maxtnum;          
00119   int expand_step;      
00120   boolean expanded;     
00121   int tnum[2];          
00122   int n_start;          
00123   int n_end;            
00124   int tl;               
00125   int tn;               
00126     
00127   /* Active token list */
00128   TOKENID *token;       
00129 #ifdef UNIGRAM_FACTORING
00130   /* for wordend processing with 1-gram factoring */
00131   LOGPROB wordend_best_score; 
00132   int wordend_best_node;        
00133   TRELLIS_ATOM *wordend_best_tre; 
00134   WORD_ID wordend_best_last_cword;      
00135 #endif
00136 
00137   int totalnodenum;     
00138   TRELLIS_ATOM bos;     
00139   boolean nodes_malloced; 
00140   LOGPROB lm_weight;           
00141   LOGPROB lm_penalty;          
00142   LOGPROB lm_penalty_trans; 
00143   LOGPROB penalty1; 
00144 #if defined(WPAIR) && defined(WPAIR_KEEP_NLIMIT)
00145   boolean wpair_keep_nlimit; 
00146 #endif
00147   /* for short-pause segmentation */
00148   boolean in_sparea;         
00149   int tmp_sparea_start;         
00150 #ifdef SP_BREAK_RESUME_WORD_BEGIN
00151   WORD_ID tmp_sp_break_last_word; 
00152 #else
00153   WORD_ID last_tre_word;        
00154 #endif
00155   boolean first_sparea;  
00156   int sp_duration;   
00157 #ifdef SPSEGMENT_NAIST
00158   boolean after_trigger;        
00159   int trigger_duration;         
00160   boolean want_rewind;          
00161   int rewind_frame;             
00162   boolean want_rewind_reprocess; 
00163 #endif
00164   char *pausemodelnames;        
00165   char **pausemodel;            
00166   int pausemodelnum;            
00167 } FSBeam;
00168 
00169 
00174 typedef struct __RealBeam__ {
00175   /* input parameter */
00176   int maxframelen;              
00177 
00178   SP16 *window;         
00179   int windowlen;                
00180   int windownum;                
00181 
00182   /* for short-pause segmentation */
00183   boolean last_is_segmented; 
00184   SP16 *rest_Speech; 
00185   int rest_alloc_len;   
00186   int rest_len;         
00187 
00188 } RealBeam;
00189 
00194 typedef struct __StackDecode__ {
00195   int hypo_len_count[MAXSEQNUM+1];      
00196   int maximum_filled_length; 
00197 #ifdef SCAN_BEAM
00198   LOGPROB *framemaxscore; 
00199 #endif
00200   NODE *stocker_root; 
00201   int popctr;           
00202   int genectr;          
00203   int pushctr;          
00204   int finishnum;        
00205   NODE *current;                
00206 
00207 #ifdef CONFIDENCE_MEASURE
00208   LOGPROB cm_alpha;             
00209 # ifdef CM_MULTIPLE_ALPHA
00210   LOGPROB *cmsumlist;        
00211   int cmsumlistlen;             
00212 # endif
00213 # ifdef CM_SEARCH
00214   LOGPROB cm_tmpbestscore; 
00215 #  ifndef CM_MULTIPLE_ALPHA
00216   LOGPROB cm_tmpsum;            
00217 #  endif
00218   int l_stacksize;              
00219   int l_stacknum;               
00220   NODE *l_start;        
00221   NODE *l_bottom;       
00222 # endif
00223 # ifdef CM_NBEST
00224   LOGPROB *sentcm = NULL;       
00225   LOGPROB *wordcm = NULL;       
00226   int sentnum;          
00227 # endif
00228 #endif /* CONFIDENCE_MEASURE */
00229 
00230 } StackDecode;
00231 
00236 typedef struct {
00237   LOGPROB (*uniprob)(WORD_INFO *, WORD_ID, LOGPROB); 
00238   LOGPROB (*biprob)(WORD_INFO *, WORD_ID, WORD_ID, LOGPROB); 
00239   LOGPROB (*lmprob)(WORD_INFO *, WORD_ID *, int, WORD_ID, LOGPROB); 
00240 } LMFunc;
00241 
00246 typedef struct __gmm_calc__{
00247   LOGPROB *gmm_score;   
00248   boolean *is_voice;            
00249   int framecount;               
00250   LOGPROB *OP_calced_score; 
00251   int *OP_calced_id; 
00252   int OP_calced_num; 
00253   int OP_calced_maxnum; 
00254   int OP_gprune_num; 
00255   VECT *OP_vec;         
00256   short OP_veclen;              
00257   HTK_HMM_Data *max_d;  
00258   int max_i;                    
00259 #ifdef CONFIDENCE_MEASURE
00260   LOGPROB gmm_max_cm;   
00261 #endif
00262 #ifdef GMM_VAD
00263   LOGPROB *rates;   
00264   int nframe;                   
00265   boolean filled;
00266   int framep;                   
00267 
00268   boolean in_voice;             
00269   boolean up_trigger;           
00270   boolean down_trigger;         
00271   boolean after_trigger;        
00272   boolean want_rewind;          
00273   boolean want_rewind_reprocess; 
00274   int rewind_frame;             
00275   int duration;                 
00276 #endif
00277 } GMMCalc;
00278 
00283 typedef struct __sentence__ {
00284   WORD_ID word[MAXSEQNUM];      
00285   int word_num;                 
00286   LOGPROB score;                
00287   LOGPROB confidence[MAXSEQNUM]; 
00288   LOGPROB score_lm;             
00289   LOGPROB score_am;             
00290   int gram_id;                  
00291 
00296   struct {
00297     boolean filled;             
00298     int num;                    
00299     short unittype;             
00300 
00301     WORD_ID *w;                 
00302     HMM_Logical **ph;     
00303     short *loc; 
00304     boolean *is_iwsp;           
00305 
00306     int *begin_frame;           
00307     int *end_frame;             
00308     LOGPROB *avgscore;          
00309    
00310     LOGPROB allscore;           
00311   } align;
00312 
00313 } Sentence;
00314 
00319 typedef struct __adin__ {
00320   /* functions */
00322   boolean (*ad_standby)(int, void *);
00324   boolean (*ad_begin)();
00326   boolean (*ad_end)();
00328   boolean (*ad_resume)();
00330   boolean (*ad_pause)();
00332   int (*ad_read)(SP16 *, int);
00333 
00334   /* configuration parameters */
00335   int thres;            
00336   int noise_zerocross;  
00337   int nc_max;           
00338   boolean adin_cut_on;  
00339   boolean silence_cut_default; 
00340   boolean strip_flag;   
00341   boolean enable_thread;        
00342   boolean need_zmean;   
00343 
00344   /* work area */
00345   int c_length; 
00346   int c_offset; 
00347   SP16 *swapbuf;                
00348   int sbsize;    
00349   int sblen;    
00350   int rest_tail;                
00351 
00352   ZEROCROSS zc;                 
00353 
00354 #ifdef HAVE_PTHREAD
00355   /* Variables related to POSIX threading */
00356   pthread_mutex_t mutex;        
00357   SP16 *speech;         
00358   int speechlen;                
00359 /*
00360  * Semaphore to start/stop recognition.
00361  * 
00362  * If TRUE, A/D-in thread will store incoming samples to @a speech and
00363  * main thread will detect and process them.
00364  * If FALSE, A/D-in thread will still get input and check trigger as the same
00365  * as TRUE case, but does not store them to @a speech.
00366  * 
00367  */
00368   boolean transfer_online;
00373   boolean adinthread_buffer_overflowed;
00374 
00375   boolean ignore_speech_while_recog; 
00376 
00377 #endif
00378 
00379   /* Input data buffer */
00380   SP16 *buffer; 
00381   int bpmax;            
00382   int bp;                       
00383   int current_len;              
00384   SP16 *cbuf;           
00385   boolean down_sample; 
00386   SP16 *buffer48; 
00387   int io_rate; 
00388 
00389   boolean is_valid_data;        
00390   int nc;               
00391   boolean end_of_stream;        
00392   boolean need_init;    
00393 
00394   DS_BUFFER *ds;           
00395 
00396   boolean rehash; 
00397 } ADIn;
00398 
00404 typedef struct __Output__ {
00413   int status;
00414 
00415   int num_frame;                
00416   int length_msec;              
00417 
00418   Sentence *sent;               
00419   int sentnum;                  
00420 
00421   WordGraph *wg1;               
00422   int wg1_num;                  
00423 
00424   WordGraph *wg;                
00425 
00426   CN_CLUSTER *confnet;          
00427 
00428   Sentence pass1;               
00429 
00430 } Output;  
00431 
00432 
00433 /**********************************************************************/
00434 /**********************************************************************/
00435 /**********************************************************************/
00436 
00441 typedef struct __mfcc_calc__ {
00442 
00447   short id;
00448 
00453   Value *para;
00454 
00459   boolean htk_loaded;
00464   boolean hmm_loaded;
00465 
00470   boolean paramtype_check_flag;
00471 
00476   MFCCWork *wrk;
00477 
00482   HTK_Param *param;
00483 
00487   HTK_Param *rest_param;
00488 
00493   struct {
00497     char *load_filename;
00502     boolean update;
00506     char *save_filename;     
00510     float map_weight;
00511 
00515     boolean loaded;
00516 
00521     CMNWork *wrk;
00522 
00523   } cmn;
00524 
00529   struct {
00533     float *ssbuf;
00534     
00538     int sslen;
00539     
00544     float ss_alpha;
00545 
00550     float ss_floor;
00551 
00555     boolean sscalc;
00556 
00560     int sscalc_len;
00561 
00565     char *ssload_filename;
00566 
00571     MFCCWork *mfccwrk_ss;
00572     
00573   } frontend;
00574 
00579   ENERGYWork ewrk;
00580 
00585   DeltaBuf *db;
00590   DeltaBuf *ab;
00595   VECT *tmpmfcc;
00596 
00602   boolean valid;
00603 
00608   int f;
00609 
00614   int last_time;
00615 
00620   int sparea_start;
00621 
00626   boolean segmented;
00627 
00628 #ifdef POWER_REJECT
00629   float avg_power;
00630 #endif
00631 
00636   struct __mfcc_calc__ *next;
00637 
00638 } MFCCCalc;
00639 
00644 typedef struct __process_am__ {
00645 
00650   JCONF_AM *config;
00651 
00656   MFCCCalc *mfcc;
00657 
00661   HTK_HMM_INFO *hmminfo;
00662 
00666   HTK_HMM_INFO *hmm_gs;
00667 
00671   HMMWork hmmwrk;
00672 
00677   struct __process_am__ *next;
00678   
00679 } PROCESS_AM;
00680 
00685 typedef struct __process_lm__ {
00686 
00691   JCONF_LM *config;
00692 
00697   PROCESS_AM *am;
00698 
00699 
00704   int lmtype;
00705 
00711   int lmvar;
00712 
00716   WORD_INFO *winfo;
00717 
00721   NGRAM_INFO *ngram;
00722 
00726   MULTIGRAM *grammars;
00727 
00733   int gram_maxid;
00734 
00739   DFA_INFO *dfa;
00740 
00745   boolean global_modified;
00746 
00751   LMFunc lmfunc;
00752 
00757   struct __process_lm__ *next;
00758 
00759 } PROCESS_LM;
00760 
00765 typedef struct __recogprocess__ {
00766 
00771   boolean live;
00772 
00779   short active;
00780 
00785   JCONF_SEARCH *config;
00786 
00791   PROCESS_AM *am;
00792 
00797   PROCESS_LM *lm;
00798 
00803   int lmtype;
00804 
00810   int lmvar;
00811 
00815   boolean ccd_flag;
00816 
00820   WCHMM_INFO *wchmm;
00821 
00825   int trellis_beam_width;
00826 
00830   BACKTRELLIS *backtrellis;
00831 
00835   FSBeam pass1;
00836 
00841   StackDecode pass2;
00842 
00846   WORD_ID pass1_wseq[MAXSEQNUM];
00847 
00851   int pass1_wnum;
00852 
00856   LOGPROB pass1_score;
00857 
00861   WORD_ID sp_break_last_word;
00865   WORD_ID sp_break_last_nword;
00869   boolean sp_break_last_nword_allow_override;
00873   WORD_ID sp_break_2_begin_word;
00877   WORD_ID sp_break_2_end_word;
00878 
00882   int peseqlen;         
00883 
00887   int graph_totalwordnum;
00888 
00893   Output result;
00894 
00899   boolean graphout;
00900 
00901 #ifdef DETERMINE
00902   int determine_count;
00903   LOGPROB determine_maxnodescore;
00904   boolean determined;
00905   LOGPROB determine_last_wid;
00906   boolean have_determine;
00907 #endif
00908 
00913   boolean have_interim;
00914 
00919   void *hook;
00920 
00925   struct __recogprocess__ *next;
00926 
00927 } RecogProcess;
00928 
00933 typedef struct __Recog__ {
00934 
00935   /*******************************************/
00940   Jconf *jconf;
00941 
00942   /*******************************************/
00947   ADIn *adin;
00948 
00952   RealBeam real;
00953 
00958   MFCCCalc *mfcclist;
00959 
00964   PROCESS_AM *amlist;
00965 
00970   PROCESS_LM *lmlist;
00971 
00976   RecogProcess *process_list;
00977 
00978 
00983   boolean process_segment;
00984 
00985   /*******************************************/
00986   /* inputs */
00987 
00991   SP16 *speech;
00992 
00997   int speechalloclen;
00998 
01002   int speechlen;                
01003 
01007   int peseqlen;         
01008 
01009   /*******************************************/
01010 
01015   HTK_HMM_INFO *gmm;
01016 
01021   MFCCCalc *gmmmfcc;
01022 
01027   GMMCalc *gc;
01028 
01029   /*******************************************/
01030   /* misc. */
01031 
01043   boolean process_active;
01044 
01050   boolean process_want_terminate;
01051 
01059   boolean process_want_reload;
01060 
01066   short gram_switch_input_method;
01067 
01074   boolean process_online;
01075 
01081   boolean (*calc_vector)(MFCCCalc *, SP16 *, int);
01082 
01088   boolean triggered;
01089 
01094   void (*callback_function[SIZEOF_CALLBACK_ID][MAX_CALLBACK_HOOK])();
01099   void *callback_user_data[SIZEOF_CALLBACK_ID][MAX_CALLBACK_HOOK];
01104   int callback_function_num[SIZEOF_CALLBACK_ID];
01109   int callback_list_code[MAX_CALLBACK_HOOK*SIZEOF_CALLBACK_ID];
01114   int callback_list_loc[MAX_CALLBACK_HOOK*SIZEOF_CALLBACK_ID];
01119   int callback_num;
01120 
01121   /*******************************************/
01122 
01127   void *hook;
01128 
01129 } Recog;
01130 
01131 #endif /* __J_RECOG_H__ */

Generated on Tue Dec 18 15:59:50 2007 for Julius by  doxygen 1.5.4