#include <sent/stddefs.h>
#include <sent/ngram2.h>
#include <sent/vocabulary.h>
Go to the source code of this file.
Functions | |
boolean | init_ngram_bin (NGRAM_INFO *ndata, char *bin_ngram_file) |
Read and setup N-gram data from binary format file. | |
boolean | init_ngram_arpa (NGRAM_INFO *ndata, char *ngram_file, int dir) |
Read and setup N-gram data from ARPA format file. | |
boolean | init_ngram_arpa_additional (NGRAM_INFO *ndata, char *bigram_file) |
Read additional LR 2-gram for 1st pass. | |
void | make_voca_ref (NGRAM_INFO *ndata, WORD_INFO *winfo) |
Make correspondence between word dictionary and N-gram vocabulary. | |
void | set_unknown_id (NGRAM_INFO *ndata) |
Set unknown word ID to the N-gram data. |
Definition in file init_ngram.c.
boolean init_ngram_bin | ( | NGRAM_INFO * | ndata, | |
char * | bin_ngram_file | |||
) |
Read and setup N-gram data from binary format file.
ndata | [out] pointer to N-gram data structure to store the data | |
bin_ngram_file | [in] file name of the binary N-gram |
Definition at line 36 of file init_ngram.c.
Referenced by initialize_ngram().
boolean init_ngram_arpa | ( | NGRAM_INFO * | ndata, | |
char * | ngram_file, | |||
int | dir | |||
) |
Read and setup N-gram data from ARPA format file.
ndata | [out] pointer to N-gram data structure to store the data | |
ngram_file | [in] file name of ARPA (reverse) 3-gram file | |
dir | [in] direction (DIR_LR | DIR_RL) |
Definition at line 65 of file init_ngram.c.
Referenced by initialize_ngram().
boolean init_ngram_arpa_additional | ( | NGRAM_INFO * | ndata, | |
char * | bigram_file | |||
) |
Read additional LR 2-gram for 1st pass.
ndata | [out] pointer to N-gram data structure to store the data | |
bigram_file | [in] file name of ARPA 2-gram file |
Definition at line 98 of file init_ngram.c.
Referenced by initialize_ngram().
void make_voca_ref | ( | NGRAM_INFO * | ndata, | |
WORD_INFO * | winfo | |||
) |
Make correspondence between word dictionary and N-gram vocabulary.
ndata | [i/o] word/class N-gram, the unknown word information will be set. | |
winfo | [i/o] word dictionary, the word-to-ngram-entry mapping will be done here. |
Definition at line 127 of file init_ngram.c.
Referenced by initialize_ngram().
void set_unknown_id | ( | NGRAM_INFO * | ndata | ) |
Set unknown word ID to the N-gram data.
In CMU-Cam SLM toolkit, OOV words are always mapped to UNK, which always appear at the very beginning of N-gram entry, so we fix the unknown word ID at "0".
ndata | [out] N-gram data to set unknown word ID. |
Definition at line 157 of file init_ngram.c.
Referenced by ngram_read_arpa(), and ngram_read_bin().