#include <sent/stddefs.h>
#include <sent/vocabulary.h>
#include <sent/htk_hmm.h>
Go to the source code of this file.
Data Structures | |
struct | DFA_ARC |
Transition arc of DFA. More... | |
struct | DFA_STATE |
State of DFA. More... | |
struct | TERM_INFO |
Information of each terminal symbol (=category). More... | |
struct | DFA_INFO |
Top structure of a DFA. More... | |
Defines | |
#define | DFA_STATESTEP 1000 |
Allocation step of DFA state. | |
#define | INITIAL_S 0x10000000 |
Status flag mask specifying an initial state. | |
#define | ACCEPT_S 0x00000001 |
Status flag mask specifying an accept state. | |
Functions | |
DFA_INFO * | dfa_info_new () |
Allocate a new grammar information data structure and initialize it. | |
void | dfa_info_free (DFA_INFO *dfa) |
Free all informations in the DFA_INFO. | |
void | dfa_state_init (DFA_INFO *dinfo) |
Initialize and allocate DFA state information list in the grammar. | |
void | dfa_state_expand (DFA_INFO *dinfo, int needed) |
Expand the state information list to the required length. | |
boolean | rddfa (FILE *fp, DFA_INFO *dinfo) |
Top loop function to read DFA grammar via file pointer. | |
boolean | rddfa_fd (int fd, DFA_INFO *dinfo) |
Top loop function to read DFA grammar via file descriptor. | |
boolean | rddfa_sd (int sd, DFA_INFO *dinfo) |
Top loop function to read DFA grammar via socket descriptor. | |
boolean | rddfa_line (char *line, DFA_INFO *dinfo, int *state_max, int *arc_num, int *terminal_max) |
Parse the input line and set grammar information, one by line. | |
void | dfa_append (DFA_INFO *dst, DFA_INFO *src, int soffset, int coffset) |
Append the DFA state information to other. | |
boolean | init_dfa (DFA_INFO *dinfo, char *filename) |
Read in a grammar file and set to DFA grammar structure. | |
WORD_ID | dfa_symbol_lookup (DFA_INFO *dinfo, char *terminalname) |
Return category id corresponding to the given terminal name. | |
boolean | extract_cpair (DFA_INFO *dinfo) |
Extract category-pair constraint from DFA grammar and newly set the category pair matrix of the give DFA. | |
boolean | cpair_append (DFA_INFO *dst, DFA_INFO *src, int coffset) |
Append the category pair matrix at the last. | |
void | print_dfa_info (FILE *fp, DFA_INFO *dinfo) |
Output overall grammar information to stdout. | |
void | print_dfa_cp (FILE *fp, DFA_INFO *dinfo) |
Output the category-pair matrix in text format to stdout. | |
boolean | dfa_cp (DFA_INFO *dfa, int i, int j) |
Return whether the given two category can be connected or not. | |
boolean | dfa_cp_begin (DFA_INFO *dfa, int i) |
Return whether the category can be appear at the beginning of sentence. | |
boolean | dfa_cp_end (DFA_INFO *dfa, int i) |
Return whether the category can be appear at the end of sentence. | |
void | set_dfa_cp (DFA_INFO *dfa, int i, int j, boolean value) |
Set the category-pair matrix bit. | |
void | set_dfa_cp_begin (DFA_INFO *dfa, int i, boolean value) |
Set the category-pair matrix bit at the beginning of sentence. | |
void | set_dfa_cp_end (DFA_INFO *dfa, int i, boolean value) |
Set the category-pair matrix bit at the end of sentence. | |
void | init_dfa_cp (DFA_INFO *dfa) |
Initialize category pair matrix in the grammar data. | |
void | malloc_dfa_cp (DFA_INFO *dfa, int term_num) |
Allocate memory for category pair matrix and initialize it. | |
void | realloc_dfa_cp (DFA_INFO *dfa, int old_term_num, int new_term_num) |
Re-allocate memory for category pair matrix, can be called when the number of category is expanded. | |
void | free_dfa_cp (DFA_INFO *dfa) |
Free the category pair matrix from DFA grammar. | |
boolean | make_dfa_voca_ref (DFA_INFO *dinfo, WORD_INFO *winfo) |
Make correspondence between all words in dictionary and categories in grammar, both from a word to a category and from a category to words. | |
void | make_terminfo (TERM_INFO *tinfo, DFA_INFO *dinfo, WORD_INFO *winfo) |
Make a word list for each category. | |
void | free_terminfo (TERM_INFO *tinfo) |
Free word list for each category. | |
void | terminfo_append (TERM_INFO *dst, TERM_INFO *src, int coffset, int woffset) |
Append the terminal(category) word list. | |
void | dfa_find_pause_word (DFA_INFO *dfa, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo) |
Find pause word and pause category information, and set to the grammar data. | |
boolean | dfa_pause_word_append (DFA_INFO *dst, DFA_INFO *src, int coffset) |
Append the pause word/category information at the last. | |
boolean | read_grammar_from_socket (int sd, DFA_INFO **ret_dfa, WORD_INFO **ret_winfo, HTK_HMM_INFO *hmminfo) |
Read grammar (DFA and dictionary) from socket and returns newly allocated grammars. |
This file includes definitions for a finite state grammar called DFA.
DFA is a deterministic finite state automaton describing grammartical constraint, using the category number of each dictionary word as an input. It also holds lists of words belonging for each categories.
Additionaly, the category-pair information will be generated from the given DFA by extracting allowed connections between categories. It will be used as a degenerated constraint of word connection at the 1st pass.
Definition in file dfa.h.
DFA_INFO* dfa_info_new | ( | ) |
Allocate a new grammar information data structure and initialize it.
Definition at line 34 of file dfa_malloc.c.
Referenced by multigram_read_file_and_add(), multigram_update(), and read_grammar_from_socket().
void dfa_info_free | ( | DFA_INFO * | dfa | ) |
Free all informations in the DFA_INFO.
dfa | [i/o] grammar information data to be freed. |
Definition at line 55 of file dfa_malloc.c.
Referenced by j_process_lm_free(), multigram_exec_delete(), multigram_free_all(), multigram_read_file_and_add(), multigram_update(), and read_grammar_from_socket().
void dfa_state_init | ( | DFA_INFO * | dinfo | ) |
Initialize and allocate DFA state information list in the grammar.
dinfo | [i/o] DFA grammar |
Definition at line 36 of file rddfa.c.
Referenced by multigram_update(), rddfa(), rddfa_fd(), and rddfa_sd().
void dfa_state_expand | ( | DFA_INFO * | dinfo, | |
int | needed | |||
) |
Expand the state information list to the required length.
dinfo | [i/o] DFA grammar | |
needed | [in] required new length |
Definition at line 57 of file rddfa.c.
Referenced by dfa_append(), and rddfa_line().
boolean rddfa | ( | FILE * | fp, | |
DFA_INFO * | dinfo | |||
) |
Top loop function to read DFA grammar via file pointer.
fp | [in] file pointer that points to the DFA grammar data | |
dinfo | [out] the read data will be stored in this DFA grammar structure |
Definition at line 80 of file rddfa.c.
Referenced by init_dfa().
boolean rddfa_fd | ( | int | fd, | |
DFA_INFO * | dinfo | |||
) |
Top loop function to read DFA grammar via file descriptor.
fd | [in] file descriptor that points to the DFA grammar data | |
dinfo | [out] the read data will be stored in this DFA grammar structure |
Definition at line 110 of file rddfa.c.
Referenced by read_grammar_from_socket().
boolean rddfa_sd | ( | int | sd, | |
DFA_INFO * | dinfo | |||
) |
Top loop function to read DFA grammar via socket descriptor.
sd | [in] socket descriptor that points to the DFA grammar data | |
dinfo | [out] the read data will be stored in this DFA grammar structure |
Definition at line 140 of file rddfa.c.
Referenced by read_grammar_from_socket().
boolean rddfa_line | ( | char * | line, | |
DFA_INFO * | dinfo, | |||
int * | state_max, | |||
int * | arc_num, | |||
int * | terminal_max | |||
) |
Parse the input line and set grammar information, one by line.
line | [in] text buffer that holds a line of DFA file | |
dinfo | [i/o] the read data will be appended to this DFA data | |
state_max | [i/o] maximum number of state id appeared, will be updated | |
arc_num | [i/o] number of read arcs, will be updated | |
terminal_max | [i/o] maximum number of state id appended, will be updated |
Definition at line 173 of file rddfa.c.
Referenced by rddfa(), rddfa_fd(), and rddfa_sd().
Append the DFA state information to other.
dst | [i/o] DFA grammar | |
src | [i/o] DFA grammar to be appended to dst | |
soffset | [in] offset state number in dst where the new state should be stored | |
coffset | [in] category id offset in dst where the new data should be stored |
Definition at line 248 of file rddfa.c.
Referenced by multigram_append_to_global().
boolean init_dfa | ( | DFA_INFO * | dinfo, | |
char * | filename | |||
) |
Read in a grammar file and set to DFA grammar structure.
dinfo | [i/o] a blank DFA data | |
filename | [in] DFA grammar file name |
Definition at line 46 of file init_dfa.c.
Referenced by multigram_read_file_and_add().
Return category id corresponding to the given terminal name.
Actually they are mere strings of ID itself.
dinfo | [in] DFA grammar information | |
terminalname | [in] name string |
Definition at line 45 of file dfa_lookup.c.
Referenced by make_dfa_voca_ref().
boolean extract_cpair | ( | DFA_INFO * | dinfo | ) |
Extract category-pair constraint from DFA grammar and newly set the category pair matrix of the give DFA.
dinfo | [i/o] DFA grammar, in which the category-pair matrix will be created. |
Definition at line 61 of file mkcpair.c.
Referenced by multigram_update().
Append the category pair matrix at the last.
dst | [i/o] DFA grammar | |
src | [in] DFA grammar to be appended to dst | |
coffset | [in] category id in dst where the new data should be stored |
Definition at line 119 of file mkcpair.c.
Referenced by multigram_append_to_global().
void print_dfa_info | ( | FILE * | fp, | |
DFA_INFO * | dinfo | |||
) |
Output overall grammar information to stdout.
fp | [in] file pointer | |
dinfo | [in] DFA grammar |
Definition at line 35 of file dfa_util.c.
Referenced by print_engine_info().
void print_dfa_cp | ( | FILE * | fp, | |
DFA_INFO * | dinfo | |||
) |
Output the category-pair matrix in text format to stdout.
fp | [in] file pointer | |
dinfo | [in] DFA grammar that holds category pair matrix |
Definition at line 52 of file dfa_util.c.
Referenced by print_engine_info().
boolean dfa_cp | ( | DFA_INFO * | dfa, | |
int | i, | |||
int | j | |||
) |
Return whether the given two category can be connected or not.
dfa | [in] DFA grammar holding category pair matrix | |
i | [in] category id of left word | |
j | [in] category id of right word |
Definition at line 49 of file cpair.c.
Referenced by cpair_append(), and print_dfa_cp().
boolean dfa_cp_begin | ( | DFA_INFO * | dfa, | |
int | i | |||
) |
Return whether the category can be appear at the beginning of sentence.
dfa | [in] DFA grammar holding category pair matrix | |
i | [in] category id of the word |
Definition at line 64 of file cpair.c.
Referenced by cpair_append(), and print_dfa_cp().
boolean dfa_cp_end | ( | DFA_INFO * | dfa, | |
int | i | |||
) |
Return whether the category can be appear at the end of sentence.
dfa | [in] DFA grammar holding category pair matrix | |
i | [in] category id of the word |
Definition at line 79 of file cpair.c.
Referenced by cpair_append(), and print_dfa_cp().
void set_dfa_cp | ( | DFA_INFO * | dfa, | |
int | i, | |||
int | j, | |||
boolean | value | |||
) |
Set the category-pair matrix bit.
dfa | [out] DFA grammar holding category pair matrix | |
i | [in] category id of left word | |
j | [in] category id of right word | |
value | TRUE if connection allowed, FALSE if connection prohibited. |
Definition at line 94 of file cpair.c.
Referenced by cpair_append(), extract_cpair(), malloc_dfa_cp(), and realloc_dfa_cp().
void set_dfa_cp_begin | ( | DFA_INFO * | dfa, | |
int | i, | |||
boolean | value | |||
) |
Set the category-pair matrix bit at the beginning of sentence.
dfa | [out] DFA grammar holding category pair matrix | |
i | [in] category id of the word | |
value | TRUE if the category can appear at the beginning of sentence, FALSE if not. |
Definition at line 113 of file cpair.c.
Referenced by cpair_append(), extract_cpair(), malloc_dfa_cp(), and realloc_dfa_cp().
void set_dfa_cp_end | ( | DFA_INFO * | dfa, | |
int | i, | |||
boolean | value | |||
) |
Set the category-pair matrix bit at the end of sentence.
dfa | [out] DFA grammar holding category pair matrix | |
i | [in] category id of the word | |
value | TRUE if the category can appear at the end of sentence, FALSE if not. |
Definition at line 132 of file cpair.c.
Referenced by cpair_append(), extract_cpair(), malloc_dfa_cp(), and realloc_dfa_cp().
void init_dfa_cp | ( | DFA_INFO * | dfa | ) |
Initialize category pair matrix in the grammar data.
dfa | [out] DFA grammar to hold category pair matrix |
Definition at line 148 of file cpair.c.
Referenced by dfa_info_new().
void malloc_dfa_cp | ( | DFA_INFO * | dfa, | |
int | term_num | |||
) |
Allocate memory for category pair matrix and initialize it.
dfa | [out] DFA grammar to hold category pair matrix | |
term_num | [in] number of categories in the grammar |
Definition at line 163 of file cpair.c.
Referenced by extract_cpair(), and realloc_dfa_cp().
void realloc_dfa_cp | ( | DFA_INFO * | dfa, | |
int | old_term_num, | |||
int | new_term_num | |||
) |
Re-allocate memory for category pair matrix, can be called when the number of category is expanded.
dfa | [I/O] DFA grammar holding category pair matrix | |
old_term_num | [in] number of categories when the last category pair matrix was allocated | |
new_term_num | [in] new number of categories in the grammar |
Definition at line 194 of file cpair.c.
Referenced by cpair_append().
void free_dfa_cp | ( | DFA_INFO * | dfa | ) |
Free the category pair matrix from DFA grammar.
dfa | [i/o] DFA grammar holding category pair matrix |
Definition at line 255 of file cpair.c.
Referenced by dfa_info_free().
Make correspondence between all words in dictionary and categories in grammar, both from a word to a category and from a category to words.
dinfo | [i/o] DFA grammar, category information will be built here. | |
winfo | [i/o] Word dictionary, word-to-category information will be build here. |
Definition at line 74 of file init_dfa.c.
Referenced by multigram_add_words_to_grammar(), and multigram_update().
Make a word list for each category.
tinfo | [i/o] terminal data structure to hold the result | |
dinfo | [in] DFA gammar to supply the number of category in the grammar | |
winfo | [in] word dictionary. |
Definition at line 39 of file mkterminfo.c.
Referenced by make_dfa_voca_ref().
void free_terminfo | ( | TERM_INFO * | tinfo | ) |
Free word list for each category.
tinfo | [in] terminal data structure holding the content. |
Definition at line 75 of file mkterminfo.c.
Referenced by dfa_info_free(), and multigram_add_words_to_grammar().
Append the terminal(category) word list.
dst | [i/o] category data | |
src | [i/o] category data to be appended to dst | |
coffset | [in] category id offset in dst where the new data should be stored | |
woffset | [in] word id offset where the new data should be stored |
Definition at line 97 of file mkterminfo.c.
Referenced by multigram_append_to_global().
void dfa_find_pause_word | ( | DFA_INFO * | dfa, | |
WORD_INFO * | winfo, | |||
HTK_HMM_INFO * | hmminfo | |||
) |
Find pause word and pause category information, and set to the grammar data.
dfa | [i/o] DFA grammar, sp_id and is_sp will be built here. | |
winfo | [in] Word dictionary | |
hmminfo | [in] HTK HMM to provide which is short pause HMM |
Definition at line 107 of file init_dfa.c.
Referenced by multigram_update().
Append the pause word/category information at the last.
dst | [i/o] DFA grammar | |
src | [in] DFA grammar to be appended to dst | |
coffset | appending category point in dst |
Definition at line 138 of file init_dfa.c.
Referenced by multigram_append_to_global().
boolean read_grammar_from_socket | ( | int | sd, | |
DFA_INFO ** | ret_dfa, | |||
WORD_INFO ** | ret_winfo, | |||
HTK_HMM_INFO * | hmminfo | |||
) |
Read grammar (DFA and dictionary) from socket and returns newly allocated grammars.
sd | [in] socket descpriter | |
ret_dfa | [out] read DFA | |
ret_winfo | [out] read dictionary | |
hmminfo | [in] HMM definition |
Definition at line 178 of file init_dfa.c.