#include <sent/stddefs.h>
#include <sent/vocabulary.h>
#include <sent/htk_hmm.h>
Go to the source code of this file.
Data Structures | |
struct | _dfa_arc |
Transition arc of DFA. More... | |
struct | DFA_STATE |
State of DFA. More... | |
struct | TERM_INFO |
Information of each terminal symbol (=category). More... | |
struct | DFA_INFO |
Top structure of a DFA. More... | |
Defines | |
#define | DFA_STATESTEP 1000 |
Allocation step of DFA state. | |
#define | DFA_CP_MINSTEP 20 |
Minimum initial CP data size per category. | |
#define | INITIAL_S 0x10000000 |
Status flag mask specifying an initial state. | |
#define | ACCEPT_S 0x00000001 |
Status flag mask specifying an accept state. | |
Typedefs | |
typedef _dfa_arc | DFA_ARC |
Transition arc of DFA. | |
Functions | |
DFA_INFO * | dfa_info_new () |
Allocate a new grammar information data structure and initialize it. | |
void | dfa_info_free (DFA_INFO *dfa) |
Free all informations in the DFA_INFO. | |
void | dfa_state_init (DFA_INFO *dinfo) |
Initialize and allocate DFA state information list in the grammar. | |
void | dfa_state_expand (DFA_INFO *dinfo, int needed) |
Expand the state information list to the required length. | |
boolean | rddfa (FILE *fp, DFA_INFO *dinfo) |
Top loop function to read DFA grammar via file pointer (gzip enabled). | |
boolean | rddfa_fp (FILE *fp, DFA_INFO *dinfo) |
Top loop function to read DFA grammar via file descriptor. | |
boolean | rddfa_line (char *line, DFA_INFO *dinfo, int *state_max, int *arc_num, int *terminal_max) |
Parse the input line and set grammar information, one by line. | |
void | dfa_append (DFA_INFO *dst, DFA_INFO *src, int soffset, int coffset) |
Append the DFA state information to other. | |
boolean | init_dfa (DFA_INFO *dinfo, char *filename) |
Read in a grammar file and set to DFA grammar structure. | |
WORD_ID | dfa_symbol_lookup (DFA_INFO *dinfo, char *terminalname) |
Return category id corresponding to the given terminal name. | |
boolean | extract_cpair (DFA_INFO *dinfo) |
Extract category-pair constraint from DFA grammar and newly set the category pair matrix of the give DFA. | |
boolean | cpair_append (DFA_INFO *dst, DFA_INFO *src, int coffset) |
Append the category pair matrix at the last. | |
void | print_dfa_info (FILE *fp, DFA_INFO *dinfo) |
Output overall grammar information to stdout. | |
void | print_dfa_cp (FILE *fp, DFA_INFO *dinfo) |
Output the category-pair matrix in text format to stdout. | |
boolean | dfa_cp (DFA_INFO *dfa, int i, int j) |
Return whether the given two category can be connected or not. | |
boolean | dfa_cp_begin (DFA_INFO *dfa, int i) |
Return whether the category can be appear at the beginning of sentence. | |
boolean | dfa_cp_end (DFA_INFO *dfa, int i) |
Return whether the category can be appear at the end of sentence. | |
void | set_dfa_cp (DFA_INFO *dfa, int i, int j, boolean value) |
Set a category-pair matrix bit. | |
void | set_dfa_cp_begin (DFA_INFO *dfa, int i, boolean value) |
Set a category-pair matrix bit for the beginning of sentence. | |
void | set_dfa_cp_end (DFA_INFO *dfa, int i, boolean value) |
Set a category-pair matrix bit for the end of sentence. | |
void | init_dfa_cp (DFA_INFO *dfa) |
Initialize category pair matrix in the grammar data. | |
void | malloc_dfa_cp (DFA_INFO *dfa, int term_num, int size) |
Allocate memory for category pair matrix and initialize it. | |
void | realloc_dfa_cp (DFA_INFO *dfa, int old_term_num, int new_term_num) |
void | free_dfa_cp (DFA_INFO *dfa) |
Free the category pair matrix from DFA grammar. | |
void | dfa_cp_output_rawdata (FILE *fp, DFA_INFO *dfa) |
void | dfa_cp_count_size (DFA_INFO *dfa, unsigned long *size_ret, unsigned long *allocsize_ret) |
boolean | dfa_cp_append (DFA_INFO *dfa, DFA_INFO *src, int offset) |
Append a categori-pair matrix to another. | |
boolean | make_dfa_voca_ref (DFA_INFO *dinfo, WORD_INFO *winfo) |
Make correspondence between all words in dictionary and categories in grammar, both from a word to a category and from a category to words. | |
void | make_terminfo (TERM_INFO *tinfo, DFA_INFO *dinfo, WORD_INFO *winfo) |
Make a word list for each category. | |
void | free_terminfo (TERM_INFO *tinfo) |
Free word list for each category. | |
void | terminfo_append (TERM_INFO *dst, TERM_INFO *src, int coffset, int woffset) |
Append the terminal(category) word list. | |
void | dfa_find_pause_word (DFA_INFO *dfa, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo) |
Find pause word and pause category information, and set to the grammar data. | |
boolean | dfa_pause_word_append (DFA_INFO *dst, DFA_INFO *src, int coffset) |
Append the pause word/category information at the last. |
This file includes definitions for a finite state grammar called DFA.
DFA is a deterministic finite state automaton describing grammartical constraint, using the category number of each dictionary word as an input. It also holds lists of words belonging for each categories.
Additionaly, the category-pair information will be generated from the given DFA by extracting allowed connections between categories. It will be used as a degenerated constraint of word connection at the 1st pass.
Definition in file dfa.h.
DFA_INFO* dfa_info_new | ( | ) |
Allocate a new grammar information data structure and initialize it.
Definition at line 34 of file dfa_malloc.c.
Referenced by multigram_read_file_and_add(), and multigram_update().
void dfa_info_free | ( | DFA_INFO * | dfa | ) |
Free all informations in the DFA_INFO.
dfa | [i/o] grammar information data to be freed. |
Definition at line 55 of file dfa_malloc.c.
Referenced by j_process_lm_free(), multigram_exec_delete(), multigram_free_all(), multigram_read_file_and_add(), and multigram_update().
void dfa_state_init | ( | DFA_INFO * | dinfo | ) |
Initialize and allocate DFA state information list in the grammar.
dinfo | [i/o] DFA grammar |
Definition at line 36 of file rddfa.c.
Referenced by multigram_update(), rddfa(), and rddfa_fp().
void dfa_state_expand | ( | DFA_INFO * | dinfo, | |
int | needed | |||
) |
Expand the state information list to the required length.
dinfo | [i/o] DFA grammar | |
needed | [in] required new length |
Definition at line 57 of file rddfa.c.
Referenced by dfa_append(), and rddfa_line().
Top loop function to read DFA grammar via file pointer (gzip enabled).
fp | [in] file pointer that points to the DFA grammar data | |
dinfo | [out] the read data will be stored in this DFA grammar structure |
Definition at line 80 of file rddfa.c.
Referenced by init_dfa().
boolean rddfa_line | ( | char * | line, | |
DFA_INFO * | dinfo, | |||
int * | state_max, | |||
int * | arc_num, | |||
int * | terminal_max | |||
) |
Parse the input line and set grammar information, one by line.
line | [in] text buffer that holds a line of DFA file | |
dinfo | [i/o] the read data will be appended to this DFA data | |
state_max | [i/o] maximum number of state id appeared, will be updated | |
arc_num | [i/o] number of read arcs, will be updated | |
terminal_max | [i/o] maximum number of state id appended, will be updated |
Definition at line 143 of file rddfa.c.
Referenced by rddfa(), and rddfa_fp().
Append the DFA state information to other.
dst | [i/o] DFA grammar | |
src | [i/o] DFA grammar to be appended to dst | |
soffset | [in] offset state number in dst where the new state should be stored | |
coffset | [in] category id offset in dst where the new data should be stored |
Definition at line 218 of file rddfa.c.
Referenced by multigram_append_to_global().
Read in a grammar file and set to DFA grammar structure.
dinfo | [i/o] a blank DFA data | |
filename | [in] DFA grammar file name |
Definition at line 46 of file init_dfa.c.
Referenced by multigram_read_file_and_add().
Return category id corresponding to the given terminal name.
Actually they are mere strings of ID itself.
dinfo | [in] DFA grammar information | |
terminalname | [in] name string |
Definition at line 45 of file dfa_lookup.c.
Referenced by make_dfa_voca_ref().
Extract category-pair constraint from DFA grammar and newly set the category pair matrix of the give DFA.
dinfo | [i/o] DFA grammar, in which the category-pair matrix will be created. |
Definition at line 61 of file mkcpair.c.
Referenced by multigram_update().
Append the category pair matrix at the last.
dst | [i/o] DFA grammar | |
src | [in] DFA grammar to be appended to dst | |
coffset | [in] category id in dst where the new data should be stored |
Definition at line 123 of file mkcpair.c.
Referenced by multigram_append_to_global().
void print_dfa_info | ( | FILE * | fp, | |
DFA_INFO * | dinfo | |||
) |
Output overall grammar information to stdout.
fp | [in] file pointer | |
dinfo | [in] DFA grammar |
Definition at line 35 of file dfa_util.c.
Referenced by print_engine_info().
void print_dfa_cp | ( | FILE * | fp, | |
DFA_INFO * | dinfo | |||
) |
Output the category-pair matrix in text format to stdout.
fp | [in] file pointer | |
dinfo | [in] DFA grammar that holds category pair matrix |
Definition at line 54 of file dfa_util.c.
Referenced by print_engine_info().
Return whether the given two category can be connected or not.
dfa | [in] DFA grammar holding category pair matrix | |
i | [in] category id of left word | |
j | [in] category id of right word |
Definition at line 91 of file cpair.c.
Referenced by beam_inter_word(), can_succeed(), lcdset_register_with_category(), and lcdset_register_with_category_all().
Return whether the category can be appear at the beginning of sentence.
dfa | [in] DFA grammar holding category pair matrix | |
i | [in] category id of the word |
Definition at line 109 of file cpair.c.
Referenced by can_succeed(), and init_nodescore().
Set a category-pair matrix bit for the end of sentence.
dfa | [out] DFA grammar holding category pair matrix | |
i | [in] category id of the word | |
value | TRUE if the category can appear at the end of sentence, FALSE if not. |
Definition at line 251 of file cpair.c.
Referenced by extract_cpair().
void init_dfa_cp | ( | DFA_INFO * | dfa | ) |
Initialize category pair matrix in the grammar data.
dfa | [out] DFA grammar to hold category pair matrix |
Definition at line 274 of file cpair.c.
Referenced by dfa_info_new().
void malloc_dfa_cp | ( | DFA_INFO * | dfa, | |
int | term_num, | |||
int | size | |||
) |
Allocate memory for category pair matrix and initialize it.
dfa | [out] DFA grammar to hold category pair matrix | |
term_num | [in] number of categories in the grammar | |
size | [in] memory allocation length for each cp list as initial |
Definition at line 287 of file cpair.c.
Referenced by extract_cpair().
void free_dfa_cp | ( | DFA_INFO * | dfa | ) |
Free the category pair matrix from DFA grammar.
dfa | [i/o] DFA grammar holding category pair matrix |
Definition at line 396 of file cpair.c.
Referenced by dfa_info_free().
Append a categori-pair matrix to another.
This function assumes that other grammar information has been already appended and dfa->term_num contains the new size.
dfa | [i/o] DFA grammar to which the new category pair will be appended | |
src | [in] source DFA | |
offset | [in] appending point at dfa |
Definition at line 319 of file cpair.c.
Referenced by cpair_append().
Make correspondence between all words in dictionary and categories in grammar, both from a word to a category and from a category to words.
dinfo | [i/o] DFA grammar, category information will be built here. | |
winfo | [i/o] Word dictionary, word-to-category information will be build here. |
Definition at line 74 of file init_dfa.c.
Referenced by multigram_add_words_to_grammar(), and multigram_update().
Make a word list for each category.
tinfo | [i/o] terminal data structure to hold the result | |
dinfo | [in] DFA gammar to supply the number of category in the grammar | |
winfo | [in] word dictionary. |
Definition at line 39 of file mkterminfo.c.
void free_terminfo | ( | TERM_INFO * | tinfo | ) |
Free word list for each category.
tinfo | [in] terminal data structure holding the content. |
Definition at line 75 of file mkterminfo.c.
Referenced by dfa_info_free(), and multigram_add_words_to_grammar().
Append the terminal(category) word list.
dst | [i/o] category data | |
src | [i/o] category data to be appended to dst | |
coffset | [in] category id offset in dst where the new data should be stored | |
woffset | [in] word id offset where the new data should be stored |
Definition at line 97 of file mkterminfo.c.
Referenced by multigram_append_to_global().
void dfa_find_pause_word | ( | DFA_INFO * | dfa, | |
WORD_INFO * | winfo, | |||
HTK_HMM_INFO * | hmminfo | |||
) |
Find pause word and pause category information, and set to the grammar data.
dfa | [i/o] DFA grammar, sp_id and is_sp will be built here. | |
winfo | [in] Word dictionary | |
hmminfo | [in] HTK HMM to provide which is short pause HMM |
Definition at line 107 of file init_dfa.c.
Referenced by multigram_update().
Append the pause word/category information at the last.
dst | [i/o] DFA grammar | |
src | [in] DFA grammar to be appended to dst | |
coffset | appending category point in dst |
Definition at line 138 of file init_dfa.c.
Referenced by multigram_append_to_global().