libjulius/include/julius/recog.h

説明を見る。
00001 
00076 /*
00077  * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University
00078  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00079  * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
00080  * All rights reserved
00081  */
00082 
00083 /*
00084 */
00085 
00086 #ifndef __J_RECOG_H__
00087 #define __J_RECOG_H__
00088 
00089 #include <sent/stddefs.h>
00090 #include <sent/hmm.h>
00091 #include <sent/vocabulary.h>
00092 #include <sent/ngram2.h>
00093 #include <sent/dfa.h>
00094 #include <julius/wchmm.h>
00095 #include <julius/search.h>
00096 #include <julius/callback.h>
00097 #include <julius/jconf.h>
00098 
00099 /*
00100   How tokens are managed:
00101    o  tlist[][] is a token stocker.  It holds all tokens in sequencial
00102       buffer.  They are malloced first on startup, and refered by ID while
00103       Viterbi procedure.  In word-pair mode, each token also has a link to
00104       another token to allow a node to have more than 1 token.
00105       
00106    o  token[n] holds the current ID number of a token associated to a
00107       lexicon tree node 'n'.
00108 
00109   */
00114 typedef struct __FSBeam__ {
00115   /* token stocker */
00116   TOKEN2 *tlist[2];     
00117   TOKENID *tindex[2];   
00118   int maxtnum;          
00119   int expand_step;      
00120   boolean expanded;     
00121   int tnum[2];          
00122   int n_start;          
00123   int n_end;            
00124   int tl;               
00125   int tn;               
00126     
00127   /* Active token list */
00128   TOKENID *token;       
00129 #ifdef UNIGRAM_FACTORING
00130   /* for wordend processing with 1-gram factoring */
00131   LOGPROB wordend_best_score; 
00132   int wordend_best_node;        
00133   TRELLIS_ATOM *wordend_best_tre; 
00134   WORD_ID wordend_best_last_cword;      
00135 #endif
00136 
00137   int totalnodenum;     
00138   TRELLIS_ATOM bos;     
00139   boolean nodes_malloced; 
00140   LOGPROB lm_weight;           
00141   LOGPROB lm_penalty;          
00142   LOGPROB lm_penalty_trans; 
00143   LOGPROB penalty1; 
00144 #if defined(WPAIR) && defined(WPAIR_KEEP_NLIMIT)
00145   boolean wpair_keep_nlimit; 
00146 #endif
00147   /* for short-pause segmentation */
00148   boolean in_sparea;         
00149   int tmp_sparea_start;         
00150 #ifdef SP_BREAK_RESUME_WORD_BEGIN
00151   WORD_ID tmp_sp_break_last_word; 
00152 #else
00153   WORD_ID last_tre_word;        
00154 #endif
00155   boolean first_sparea;  
00156   int sp_duration;   
00157 #ifdef SPSEGMENT_NAIST
00158   boolean after_trigger;        
00159   int trigger_duration;         
00160   boolean want_rewind;          
00161   int rewind_frame;             
00162   boolean want_rewind_reprocess; 
00163 #endif
00164   char *pausemodelnames;        
00165   char **pausemodel;            
00166   int pausemodelnum;            
00167 } FSBeam;
00168 
00169 
00174 typedef struct __RealBeam__ {
00175   /* input parameter */
00176   int maxframelen;              
00177 
00178   SP16 *window;         
00179   int windowlen;                
00180   int windownum;                
00181 
00182   /* for short-pause segmentation */
00183   boolean last_is_segmented; 
00184   SP16 *rest_Speech; 
00185   int rest_alloc_len;   
00186   int rest_len;         
00187 
00188 } RealBeam;
00189 
00194 typedef struct __StackDecode__ {
00195   int hypo_len_count[MAXSEQNUM+1];      
00196   int maximum_filled_length; 
00197 #ifdef SCAN_BEAM
00198   LOGPROB *framemaxscore; 
00199 #endif
00200   NODE *stocker_root; 
00201   int popctr;           
00202   int genectr;          
00203   int pushctr;          
00204   int finishnum;        
00205   NODE *current;                
00206 
00207 #ifdef CONFIDENCE_MEASURE
00208   LOGPROB cm_alpha;             
00209 # ifdef CM_MULTIPLE_ALPHA
00210   LOGPROB *cmsumlist;        
00211   int cmsumlistlen;             
00212 # endif
00213 # ifdef CM_SEARCH
00214   LOGPROB cm_tmpbestscore; 
00215 #  ifndef CM_MULTIPLE_ALPHA
00216   LOGPROB cm_tmpsum;            
00217 #  endif
00218   int l_stacksize;              
00219   int l_stacknum;               
00220   NODE *l_start;        
00221   NODE *l_bottom;       
00222 # endif
00223 # ifdef CM_NBEST
00224   LOGPROB *sentcm = NULL;       
00225   LOGPROB *wordcm = NULL;       
00226   int sentnum;          
00227 # endif
00228 #endif /* CONFIDENCE_MEASURE */
00229 
00230   LOGPROB *wordtrellis[2]; 
00231   LOGPROB *g;           
00232   HMM_Logical **phmmseq;        
00233   int phmmlen_max;              
00234   boolean *has_sp;              
00235 #ifdef GRAPHOUT_PRECISE_BOUNDARY
00236   short *wend_token_frame[2]; 
00237   LOGPROB *wend_token_gscore[2]; 
00238   short *wef;           
00239   LOGPROB *wes;         
00240 #endif
00241 
00242 } StackDecode;
00243 
00248 typedef struct {
00249   LOGPROB (*uniprob)(WORD_INFO *, WORD_ID, LOGPROB); 
00250   LOGPROB (*biprob)(WORD_INFO *, WORD_ID, WORD_ID, LOGPROB); 
00251   LOGPROB (*lmprob)(WORD_INFO *, WORD_ID *, int, WORD_ID, LOGPROB); 
00252 } LMFunc;
00253 
00258 typedef struct __gmm_calc__{
00259   LOGPROB *gmm_score;   
00260   boolean *is_voice;            
00261   int framecount;               
00262 
00263   short OP_nstream;             
00264   VECT *OP_vec_stream[MAXSTREAMNUM]; 
00265   short OP_veclen_stream[MAXSTREAMNUM]; 
00266 
00267   LOGPROB *OP_calced_score; 
00268   int *OP_calced_id; 
00269   int OP_calced_num; 
00270   int OP_calced_maxnum; 
00271   int OP_gprune_num; 
00272   VECT *OP_vec;         
00273   short OP_veclen;              
00274   HTK_HMM_Data *max_d;  
00275   int max_i;                    
00276 #ifdef CONFIDENCE_MEASURE
00277   LOGPROB gmm_max_cm;   
00278 #endif
00279 #ifdef GMM_VAD
00280   LOGPROB *rates;   
00281   int nframe;                   
00282   boolean filled;
00283   int framep;                   
00284 
00285   boolean in_voice;             
00286   boolean up_trigger;           
00287   boolean down_trigger;         
00288   boolean after_trigger;        
00289   boolean want_rewind;          
00290   boolean want_rewind_reprocess; 
00291   int rewind_frame;             
00292   int duration;                 
00293 #endif
00294 } GMMCalc;
00295 
00300 typedef struct __sentence_align__ {
00301   int num;                    
00302   short unittype;             
00303   WORD_ID *w;                 
00304   HMM_Logical **ph;     
00305   short *loc; 
00306   boolean *is_iwsp;           
00307   int *begin_frame;           
00308   int *end_frame;             
00309   LOGPROB *avgscore;          
00310   LOGPROB allscore;           
00311   struct __sentence_align__ *next; 
00312 } SentenceAlign;
00313 
00318 typedef struct __sentence__ {
00319   WORD_ID word[MAXSEQNUM];      
00320   int word_num;                 
00321   LOGPROB score;                
00322   LOGPROB confidence[MAXSEQNUM]; 
00323   LOGPROB score_lm;             
00324   LOGPROB score_am;             
00325   int gram_id;                  
00326   SentenceAlign *align;
00327 
00328 } Sentence;
00329 
00334 typedef struct __adin__ {
00335   /* functions */
00337   boolean (*ad_standby)(int, void *);
00339   boolean (*ad_begin)();
00341   boolean (*ad_end)();
00343   boolean (*ad_resume)();
00345   boolean (*ad_pause)();
00347   boolean (*ad_terminate)();
00349   int (*ad_read)(SP16 *, int);
00350 
00351   /* configuration parameters */
00352   int thres;            
00353   int noise_zerocross;  
00354   int nc_max;           
00355   boolean adin_cut_on;  
00356   boolean silence_cut_default; 
00357   boolean strip_flag;   
00358   boolean enable_thread;        
00359   boolean need_zmean;   
00360 
00361   /* work area */
00362   int c_length; 
00363   int c_offset; 
00364   SP16 *swapbuf;                
00365   int sbsize;    
00366   int sblen;    
00367   int rest_tail;                
00368 
00369   ZEROCROSS zc;                 
00370 
00371 #ifdef HAVE_PTHREAD
00372   /* Variables related to POSIX threading */
00373   pthread_t adin_thread;        
00374   pthread_mutex_t mutex;        
00375   SP16 *speech;         
00376   int speechlen;                
00377 /*
00378  * Semaphore to start/stop recognition.
00379  * 
00380  * If TRUE, A/D-in thread will store incoming samples to @a speech and
00381  * main thread will detect and process them.
00382  * If FALSE, A/D-in thread will still get input and check trigger as the same
00383  * as TRUE case, but does not store them to @a speech.
00384  * 
00385  */
00386   boolean transfer_online;
00391   boolean adinthread_buffer_overflowed;
00396   boolean adinthread_ended;
00397 
00398   boolean ignore_speech_while_recog; 
00399 
00400 #endif
00401 
00402   /* Input data buffer */
00403   SP16 *buffer; 
00404   int bpmax;            
00405   int bp;                       
00406   int current_len;              
00407   SP16 *cbuf;           
00408   boolean down_sample; 
00409   SP16 *buffer48; 
00410   int io_rate; 
00411 
00412   boolean is_valid_data;        
00413   int nc;               
00414   boolean end_of_stream;        
00415   boolean need_init;    
00416 
00417   DS_BUFFER *ds;           
00418 
00419   boolean rehash; 
00420 
00421   boolean input_side_segment;   
00422 
00423   unsigned int total_captured_len;
00424   unsigned int last_trigger_sample;
00425 
00426 } ADIn;
00427 
00433 typedef struct __Output__ {
00442   int status;
00443 
00444   int num_frame;                
00445   int length_msec;              
00446 
00447   Sentence *sent;               
00448   int sentnum;                  
00449 
00450   WordGraph *wg1;               
00451   int wg1_num;                  
00452 
00453   WordGraph *wg;                
00454 
00455   CN_CLUSTER *confnet;          
00456 
00457   Sentence pass1;               
00458 
00459 } Output;  
00460 
00461 
00462 /**********************************************************************/
00463 /**********************************************************************/
00464 /**********************************************************************/
00465 
00470 typedef struct __mfcc_calc__ {
00471 
00476   short id;
00477 
00482   Value *para;
00483 
00488   boolean htk_loaded;
00493   boolean hmm_loaded;
00494 
00499   boolean paramtype_check_flag;
00500 
00505   MFCCWork *wrk;
00506 
00511   HTK_Param *param;
00512 
00516   HTK_Param *rest_param;
00517 
00522   struct {
00526     char *load_filename;
00531     boolean update;
00535     char *save_filename;     
00539     float map_weight;
00540 
00544     boolean loaded;
00545 
00550     CMNWork *wrk;
00551 
00552   } cmn;
00553 
00558   struct {
00562     float *ssbuf;
00563     
00567     int sslen;
00568     
00573     float ss_alpha;
00574 
00579     float ss_floor;
00580 
00584     boolean sscalc;
00585 
00589     int sscalc_len;
00590 
00594     char *ssload_filename;
00595 
00600     MFCCWork *mfccwrk_ss;
00601     
00602   } frontend;
00603 
00608   ENERGYWork ewrk;
00609 
00614   DeltaBuf *db;
00619   DeltaBuf *ab;
00624   VECT *tmpmfcc;
00625 
00631   boolean valid;
00632 
00637   int f;
00638 
00643   int last_time;
00644 
00649   int sparea_start;
00650 
00655   boolean segmented;
00656 
00661   boolean segmented_by_input;
00662 
00667   int plugin_source;
00668 
00673   struct {
00675     boolean (*fv_standby)();
00677     boolean (*fv_begin)();
00679     int (*fv_read)(VECT *, int);
00681     boolean (*fv_end)();
00683     boolean (*fv_resume)();
00685     boolean (*fv_pause)();
00687     boolean (*fv_terminate)();
00688   } func;
00689 
00690 #ifdef POWER_REJECT
00691   float avg_power;
00692 #endif
00693 
00698   struct __mfcc_calc__ *next;
00699 
00700 } MFCCCalc;
00701 
00706 typedef struct __process_am__ {
00707 
00712   JCONF_AM *config;
00713 
00718   MFCCCalc *mfcc;
00719 
00723   HTK_HMM_INFO *hmminfo;
00724 
00728   HTK_HMM_INFO *hmm_gs;
00729 
00733   HMMWork hmmwrk;
00734 
00739   struct __process_am__ *next;
00740   
00741 } PROCESS_AM;
00742 
00747 typedef struct __process_lm__ {
00748 
00753   JCONF_LM *config;
00754 
00759   PROCESS_AM *am;
00760 
00761 
00766   int lmtype;
00767 
00773   int lmvar;
00774 
00778   WORD_INFO *winfo;
00779 
00783   NGRAM_INFO *ngram;
00784 
00788   MULTIGRAM *grammars;
00789 
00795   int gram_maxid;
00796 
00801   DFA_INFO *dfa;
00802 
00807   boolean global_modified;
00808 
00813   LMFunc lmfunc;
00814 
00819   struct __process_lm__ *next;
00820 
00821 } PROCESS_LM;
00822 
00827 typedef struct __recogprocess__ {
00828 
00833   boolean live;
00834 
00841   short active;
00842 
00847   JCONF_SEARCH *config;
00848 
00853   PROCESS_AM *am;
00854 
00859   PROCESS_LM *lm;
00860 
00865   int lmtype;
00866 
00872   int lmvar;
00873 
00877   boolean ccd_flag;
00878 
00882   WCHMM_INFO *wchmm;
00883 
00887   int trellis_beam_width;
00888 
00892   BACKTRELLIS *backtrellis;
00893 
00897   FSBeam pass1;
00898 
00903   StackDecode pass2;
00904 
00908   WORD_ID pass1_wseq[MAXSEQNUM];
00909 
00913   int pass1_wnum;
00914 
00918   LOGPROB pass1_score;
00919 
00923   WORD_ID sp_break_last_word;
00927   WORD_ID sp_break_last_nword;
00931   boolean sp_break_last_nword_allow_override;
00935   WORD_ID sp_break_2_begin_word;
00939   WORD_ID sp_break_2_end_word;
00940 
00944   int peseqlen;         
00945 
00949   int graph_totalwordnum;
00950 
00955   Output result;
00956 
00961   boolean graphout;
00962 
00968   char *order_matrix;
00969 
00975   int order_matrix_count;
00976 
00977 #ifdef DETERMINE
00978   int determine_count;
00979   LOGPROB determine_maxnodescore;
00980   boolean determined;
00981   LOGPROB determine_last_wid;
00982   boolean have_determine;
00983 #endif
00984 
00989   boolean have_interim;
00990 
00995   void *hook;
00996 
01001   struct __recogprocess__ *next;
01002 
01003 } RecogProcess;
01004 
01009 typedef struct __Recog__ {
01010 
01011   /*******************************************/
01016   Jconf *jconf;
01017 
01018   /*******************************************/
01023   ADIn *adin;
01024 
01028   RealBeam real;
01029 
01034   MFCCCalc *mfcclist;
01035 
01040   PROCESS_AM *amlist;
01041 
01046   PROCESS_LM *lmlist;
01047 
01052   RecogProcess *process_list;
01053 
01054 
01059   boolean process_segment;
01060 
01061   /*******************************************/
01062   /* inputs */
01063 
01067   SP16 *speech;
01068 
01073   int speechalloclen;
01074 
01078   int speechlen;                
01079 
01083   int peseqlen;         
01084 
01085   /*******************************************/
01086 
01091   HTK_HMM_INFO *gmm;
01092 
01097   MFCCCalc *gmmmfcc;
01098 
01103   GMMCalc *gc;
01104 
01105   /*******************************************/
01106   /* misc. */
01107 
01119   boolean process_active;
01120 
01126   boolean process_want_terminate;
01127 
01135   boolean process_want_reload;
01136 
01142   short gram_switch_input_method;
01143 
01150   boolean process_online;
01151 
01157   boolean (*calc_vector)(MFCCCalc *, SP16 *, int);
01158 
01164   boolean triggered;
01165 
01170   void (*callback_function[SIZEOF_CALLBACK_ID][MAX_CALLBACK_HOOK])();
01175   void *callback_user_data[SIZEOF_CALLBACK_ID][MAX_CALLBACK_HOOK];
01180   int callback_function_num[SIZEOF_CALLBACK_ID];
01185   int callback_list_code[MAX_CALLBACK_HOOK*SIZEOF_CALLBACK_ID];
01190   int callback_list_loc[MAX_CALLBACK_HOOK*SIZEOF_CALLBACK_ID];
01195   int callback_num;
01196 
01197   /*******************************************/
01198 
01203   void *hook;
01204 
01205 } Recog;
01206 
01207 #endif /* __J_RECOG_H__ */

Juliusに対してThu Jul 23 12:16:22 2009に生成されました。  doxygen 1.5.1