libsent/include/sent/dfa.h File Reference

Definitions for DFA grammar and category-pair information. More...

#include <sent/stddefs.h>
#include <sent/vocabulary.h>
#include <sent/htk_hmm.h>

Go to the source code of this file.

Data Structures

struct  _dfa_arc
 Transition arc of DFA. More...
struct  DFA_STATE
 State of DFA. More...
struct  TERM_INFO
 Information of each terminal symbol (=category). More...
struct  DFA_INFO
 Top structure of a DFA. More...

Defines

#define DFA_STATESTEP   1000
 Allocation step of DFA state.
#define DFA_CP_MINSTEP   20
 Minimum initial CP data size per category.
#define INITIAL_S   0x10000000
 Status flag mask specifying an initial state.
#define ACCEPT_S   0x00000001
 Status flag mask specifying an accept state.

Typedefs

typedef _dfa_arc DFA_ARC
 Transition arc of DFA.

Functions

DFA_INFOdfa_info_new ()
 Allocate a new grammar information data structure and initialize it.
void dfa_info_free (DFA_INFO *dfa)
 Free all informations in the DFA_INFO.
void dfa_state_init (DFA_INFO *dinfo)
 Initialize and allocate DFA state information list in the grammar.
void dfa_state_expand (DFA_INFO *dinfo, int needed)
 Expand the state information list to the required length.
boolean rddfa (FILE *fp, DFA_INFO *dinfo)
 Top loop function to read DFA grammar via file pointer (gzip enabled).
boolean rddfa_fp (FILE *fp, DFA_INFO *dinfo)
 Top loop function to read DFA grammar via file descriptor.
boolean rddfa_line (char *line, DFA_INFO *dinfo, int *state_max, int *arc_num, int *terminal_max)
 Parse the input line and set grammar information, one by line.
void dfa_append (DFA_INFO *dst, DFA_INFO *src, int soffset, int coffset)
 Append the DFA state information to other.
boolean init_dfa (DFA_INFO *dinfo, char *filename)
 Read in a grammar file and set to DFA grammar structure.
WORD_ID dfa_symbol_lookup (DFA_INFO *dinfo, char *terminalname)
 Return category id corresponding to the given terminal name.
boolean extract_cpair (DFA_INFO *dinfo)
 Extract category-pair constraint from DFA grammar and newly set the category pair matrix of the give DFA.
boolean cpair_append (DFA_INFO *dst, DFA_INFO *src, int coffset)
 Append the category pair matrix at the last.
void print_dfa_info (FILE *fp, DFA_INFO *dinfo)
 Output overall grammar information to stdout.
void print_dfa_cp (FILE *fp, DFA_INFO *dinfo)
 Output the category-pair matrix in text format to stdout.
boolean dfa_cp (DFA_INFO *dfa, int i, int j)
 Return whether the given two category can be connected or not.
boolean dfa_cp_begin (DFA_INFO *dfa, int i)
 Return whether the category can be appear at the beginning of sentence.
boolean dfa_cp_end (DFA_INFO *dfa, int i)
 Return whether the category can be appear at the end of sentence.
void set_dfa_cp (DFA_INFO *dfa, int i, int j, boolean value)
 Set a category-pair matrix bit.
void set_dfa_cp_begin (DFA_INFO *dfa, int i, boolean value)
 Set a category-pair matrix bit for the beginning of sentence.
void set_dfa_cp_end (DFA_INFO *dfa, int i, boolean value)
 Set a category-pair matrix bit for the end of sentence.
void init_dfa_cp (DFA_INFO *dfa)
 Initialize category pair matrix in the grammar data.
void malloc_dfa_cp (DFA_INFO *dfa, int term_num, int size)
 Allocate memory for category pair matrix and initialize it.
void realloc_dfa_cp (DFA_INFO *dfa, int old_term_num, int new_term_num)
void free_dfa_cp (DFA_INFO *dfa)
 Free the category pair matrix from DFA grammar.
void dfa_cp_output_rawdata (FILE *fp, DFA_INFO *dfa)
void dfa_cp_count_size (DFA_INFO *dfa, unsigned long *size_ret, unsigned long *allocsize_ret)
boolean dfa_cp_append (DFA_INFO *dfa, DFA_INFO *src, int offset)
 Append a categori-pair matrix to another.
boolean make_dfa_voca_ref (DFA_INFO *dinfo, WORD_INFO *winfo)
 Make correspondence between all words in dictionary and categories in grammar, both from a word to a category and from a category to words.
void make_terminfo (TERM_INFO *tinfo, DFA_INFO *dinfo, WORD_INFO *winfo)
 Make a word list for each category.
void free_terminfo (TERM_INFO *tinfo)
 Free word list for each category.
void terminfo_append (TERM_INFO *dst, TERM_INFO *src, int coffset, int woffset)
 Append the terminal(category) word list.
void dfa_find_pause_word (DFA_INFO *dfa, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo)
 Find pause word and pause category information, and set to the grammar data.
boolean dfa_pause_word_append (DFA_INFO *dst, DFA_INFO *src, int coffset)
 Append the pause word/category information at the last.


Detailed Description

Definitions for DFA grammar and category-pair information.

This file includes definitions for a finite state grammar called DFA.

DFA is a deterministic finite state automaton describing grammartical constraint, using the category number of each dictionary word as an input. It also holds lists of words belonging for each categories.

Additionaly, the category-pair information will be generated from the given DFA by extracting allowed connections between categories. It will be used as a degenerated constraint of word connection at the 1st pass.

Author:
Akinobu LEE
Date:
Thu Feb 10 18:21:27 2005
Revision
1.4

Definition in file dfa.h.


Function Documentation

DFA_INFO* dfa_info_new (  ) 

Allocate a new grammar information data structure and initialize it.

Returns:
pointer to the newly allocated DFA_INFO.

Definition at line 34 of file dfa_malloc.c.

Referenced by multigram_read_file_and_add(), and multigram_update().

void dfa_info_free ( DFA_INFO dfa  ) 

Free all informations in the DFA_INFO.

Parameters:
dfa [i/o] grammar information data to be freed.

Definition at line 55 of file dfa_malloc.c.

Referenced by j_process_lm_free(), multigram_exec_delete(), multigram_free_all(), multigram_read_file_and_add(), and multigram_update().

void dfa_state_init ( DFA_INFO dinfo  ) 

Initialize and allocate DFA state information list in the grammar.

Parameters:
dinfo [i/o] DFA grammar

Definition at line 36 of file rddfa.c.

Referenced by multigram_update(), rddfa(), and rddfa_fp().

void dfa_state_expand ( DFA_INFO dinfo,
int  needed 
)

Expand the state information list to the required length.

Parameters:
dinfo [i/o] DFA grammar
needed [in] required new length

Definition at line 57 of file rddfa.c.

Referenced by dfa_append(), and rddfa_line().

boolean rddfa ( FILE *  fp,
DFA_INFO dinfo 
)

Top loop function to read DFA grammar via file pointer (gzip enabled).

Parameters:
fp [in] file pointer that points to the DFA grammar data
dinfo [out] the read data will be stored in this DFA grammar structure
Returns:
TRUE on success, FALSE on failure.

Definition at line 80 of file rddfa.c.

Referenced by init_dfa().

boolean rddfa_fp ( FILE *  fp,
DFA_INFO dinfo 
)

Top loop function to read DFA grammar via file descriptor.

Parameters:
fp [in] file pointer that points to the DFA grammar data
dinfo [out] the read data will be stored in this DFA grammar structure
Returns:
TRUE on success, FALSE on failure.

Definition at line 110 of file rddfa.c.

boolean rddfa_line ( char *  line,
DFA_INFO dinfo,
int *  state_max,
int *  arc_num,
int *  terminal_max 
)

Parse the input line and set grammar information, one by line.

Parameters:
line [in] text buffer that holds a line of DFA file
dinfo [i/o] the read data will be appended to this DFA data
state_max [i/o] maximum number of state id appeared, will be updated
arc_num [i/o] number of read arcs, will be updated
terminal_max [i/o] maximum number of state id appended, will be updated
Returns:
TRUE if the line was successfully parsed, FALSE if failed.

Definition at line 143 of file rddfa.c.

Referenced by rddfa(), and rddfa_fp().

void dfa_append ( DFA_INFO dst,
DFA_INFO src,
int  soffset,
int  coffset 
)

Append the DFA state information to other.

Parameters:
dst [i/o] DFA grammar
src [i/o] DFA grammar to be appended to dst
soffset [in] offset state number in dst where the new state should be stored
coffset [in] category id offset in dst where the new data should be stored

Definition at line 218 of file rddfa.c.

Referenced by multigram_append_to_global().

boolean init_dfa ( DFA_INFO dinfo,
char *  filename 
)

Read in a grammar file and set to DFA grammar structure.

Parameters:
dinfo [i/o] a blank DFA data
filename [in] DFA grammar file name

Definition at line 46 of file init_dfa.c.

Referenced by multigram_read_file_and_add().

WORD_ID dfa_symbol_lookup ( DFA_INFO dinfo,
char *  terminalname 
)

Return category id corresponding to the given terminal name.

Actually they are mere strings of ID itself.

Parameters:
dinfo [in] DFA grammar information
terminalname [in] name string
Returns:
the category id.

Definition at line 45 of file dfa_lookup.c.

Referenced by make_dfa_voca_ref().

boolean extract_cpair ( DFA_INFO dinfo  ) 

Extract category-pair constraint from DFA grammar and newly set the category pair matrix of the give DFA.

Parameters:
dinfo [i/o] DFA grammar, in which the category-pair matrix will be created.

Definition at line 61 of file mkcpair.c.

Referenced by multigram_update().

boolean cpair_append ( DFA_INFO dst,
DFA_INFO src,
int  coffset 
)

Append the category pair matrix at the last.

Parameters:
dst [i/o] DFA grammar
src [in] DFA grammar to be appended to dst
coffset [in] category id in dst where the new data should be stored

Definition at line 123 of file mkcpair.c.

Referenced by multigram_append_to_global().

void print_dfa_info ( FILE *  fp,
DFA_INFO dinfo 
)

Output overall grammar information to stdout.

Parameters:
fp [in] file pointer
dinfo [in] DFA grammar

Definition at line 35 of file dfa_util.c.

Referenced by print_engine_info().

void print_dfa_cp ( FILE *  fp,
DFA_INFO dinfo 
)

Output the category-pair matrix in text format to stdout.

Parameters:
fp [in] file pointer
dinfo [in] DFA grammar that holds category pair matrix

Definition at line 54 of file dfa_util.c.

Referenced by print_engine_info().

boolean dfa_cp ( DFA_INFO dfa,
int  i,
int  j 
)

Return whether the given two category can be connected or not.

Parameters:
dfa [in] DFA grammar holding category pair matrix
i [in] category id of left word
j [in] category id of right word
Returns:
TRUE if connection is allowed by the grammar, FALSE if prohibited.

Definition at line 91 of file cpair.c.

Referenced by beam_inter_word(), can_succeed(), lcdset_register_with_category(), and lcdset_register_with_category_all().

boolean dfa_cp_begin ( DFA_INFO dfa,
int  i 
)

Return whether the category can be appear at the beginning of sentence.

Parameters:
dfa [in] DFA grammar holding category pair matrix
i [in] category id of the word
Returns:
TRUE if it can appear at the beginning of sentence, FALSE if not.

Definition at line 109 of file cpair.c.

Referenced by can_succeed(), and init_nodescore().

boolean dfa_cp_end ( DFA_INFO dfa,
int  i 
)

Return whether the category can be appear at the end of sentence.

Parameters:
dfa [in] DFA grammar holding category pair matrix
i [in] category id of the word
Returns:
TRUE if it can appear at the end of sentence, FALSE if not.

Definition at line 126 of file cpair.c.

void set_dfa_cp ( DFA_INFO dfa,
int  i,
int  j,
boolean  value 
)

Set a category-pair matrix bit.

Parameters:
dfa [out] DFA grammar holding category pair matrix
i [in] category id of left word
j [in] category id of right word
value TRUE if connection allowed, FALSE if connection prohibited.

Definition at line 200 of file cpair.c.

void set_dfa_cp_begin ( DFA_INFO dfa,
int  i,
boolean  value 
)

Set a category-pair matrix bit for the beginning of sentence.

Parameters:
dfa [out] DFA grammar holding category pair matrix
i [in] category id of the word
value TRUE if the category can appear at the beginning of sentence, FALSE if not.

Definition at line 225 of file cpair.c.

void set_dfa_cp_end ( DFA_INFO dfa,
int  i,
boolean  value 
)

Set a category-pair matrix bit for the end of sentence.

Parameters:
dfa [out] DFA grammar holding category pair matrix
i [in] category id of the word
value TRUE if the category can appear at the end of sentence, FALSE if not.

Definition at line 251 of file cpair.c.

Referenced by extract_cpair().

void init_dfa_cp ( DFA_INFO dfa  ) 

Initialize category pair matrix in the grammar data.

Parameters:
dfa [out] DFA grammar to hold category pair matrix

Definition at line 274 of file cpair.c.

Referenced by dfa_info_new().

void malloc_dfa_cp ( DFA_INFO dfa,
int  term_num,
int  size 
)

Allocate memory for category pair matrix and initialize it.

Parameters:
dfa [out] DFA grammar to hold category pair matrix
term_num [in] number of categories in the grammar
size [in] memory allocation length for each cp list as initial

Definition at line 287 of file cpair.c.

Referenced by extract_cpair().

void free_dfa_cp ( DFA_INFO dfa  ) 

Free the category pair matrix from DFA grammar.

Parameters:
dfa [i/o] DFA grammar holding category pair matrix

Definition at line 396 of file cpair.c.

Referenced by dfa_info_free().

boolean dfa_cp_append ( DFA_INFO dfa,
DFA_INFO src,
int  offset 
)

Append a categori-pair matrix to another.

This function assumes that other grammar information has been already appended and dfa->term_num contains the new size.

Parameters:
dfa [i/o] DFA grammar to which the new category pair will be appended
src [in] source DFA
offset [in] appending point at dfa
Returns:
TRUE on success, FALSE on error.

Definition at line 319 of file cpair.c.

Referenced by cpair_append().

boolean make_dfa_voca_ref ( DFA_INFO dinfo,
WORD_INFO winfo 
)

Make correspondence between all words in dictionary and categories in grammar, both from a word to a category and from a category to words.

Parameters:
dinfo [i/o] DFA grammar, category information will be built here.
winfo [i/o] Word dictionary, word-to-category information will be build here.

Definition at line 74 of file init_dfa.c.

Referenced by multigram_add_words_to_grammar(), and multigram_update().

void make_terminfo ( TERM_INFO tinfo,
DFA_INFO dinfo,
WORD_INFO winfo 
)

Make a word list for each category.

Parameters:
tinfo [i/o] terminal data structure to hold the result
dinfo [in] DFA gammar to supply the number of category in the grammar
winfo [in] word dictionary.

Definition at line 39 of file mkterminfo.c.

void free_terminfo ( TERM_INFO tinfo  ) 

Free word list for each category.

Parameters:
tinfo [in] terminal data structure holding the content.

Definition at line 75 of file mkterminfo.c.

Referenced by dfa_info_free(), and multigram_add_words_to_grammar().

void terminfo_append ( TERM_INFO dst,
TERM_INFO src,
int  coffset,
int  woffset 
)

Append the terminal(category) word list.

Parameters:
dst [i/o] category data
src [i/o] category data to be appended to dst
coffset [in] category id offset in dst where the new data should be stored
woffset [in] word id offset where the new data should be stored

Definition at line 97 of file mkterminfo.c.

Referenced by multigram_append_to_global().

void dfa_find_pause_word ( DFA_INFO dfa,
WORD_INFO winfo,
HTK_HMM_INFO hmminfo 
)

Find pause word and pause category information, and set to the grammar data.

Parameters:
dfa [i/o] DFA grammar, sp_id and is_sp will be built here.
winfo [in] Word dictionary
hmminfo [in] HTK HMM to provide which is short pause HMM

Definition at line 107 of file init_dfa.c.

Referenced by multigram_update().

boolean dfa_pause_word_append ( DFA_INFO dst,
DFA_INFO src,
int  coffset 
)

Append the pause word/category information at the last.

Parameters:
dst [i/o] DFA grammar
src [in] DFA grammar to be appended to dst
coffset appending category point in dst

Definition at line 138 of file init_dfa.c.

Referenced by multigram_append_to_global().


Generated on Thu Jul 23 12:14:19 2009 for Julius by  doxygen 1.5.1