#include <sent/stddefs.h>
#include <sent/ngram2.h>
#include <sent/vocabulary.h>
Go to the source code of this file.
Functions | |
boolean | init_ngram_bin (NGRAM_INFO *ndata, char *bin_ngram_file) |
Read and setup N-gram data from binary format file. | |
boolean | init_ngram_arpa (NGRAM_INFO *ndata, char *ngram_file, int dir) |
Read and setup N-gram data from ARPA format file. | |
boolean | init_ngram_arpa_additional (NGRAM_INFO *ndata, char *bigram_file) |
Read additional LR 2-gram for 1st pass. | |
boolean | make_voca_ref (NGRAM_INFO *ndata, WORD_INFO *winfo) |
Make correspondence between word dictionary and N-gram vocabulary. | |
void | set_unknown_id (NGRAM_INFO *ndata, char *str) |
Set unknown word ID to the N-gram data. | |
void | fix_uniprob_srilm (NGRAM_INFO *ndata, WORD_INFO *winfo) |
Fix unigram probability of BOS / EOS word. |
Definition in file init_ngram.c.
boolean init_ngram_bin | ( | NGRAM_INFO * | ndata, | |
char * | bin_ngram_file | |||
) |
Read and setup N-gram data from binary format file.
ndata | [out] pointer to N-gram data structure to store the data | |
bin_ngram_file | [in] file name of the binary N-gram |
Definition at line 36 of file init_ngram.c.
Referenced by initialize_ngram().
boolean init_ngram_arpa | ( | NGRAM_INFO * | ndata, | |
char * | ngram_file, | |||
int | dir | |||
) |
Read and setup N-gram data from ARPA format file.
ndata | [out] pointer to N-gram data structure to store the data | |
ngram_file | [in] file name of ARPA (reverse) 3-gram file | |
dir | [in] direction (DIR_LR | DIR_RL) |
Definition at line 65 of file init_ngram.c.
Referenced by initialize_ngram().
boolean init_ngram_arpa_additional | ( | NGRAM_INFO * | ndata, | |
char * | bigram_file | |||
) |
Read additional LR 2-gram for 1st pass.
ndata | [out] pointer to N-gram data structure to store the data | |
bigram_file | [in] file name of ARPA 2-gram file |
Definition at line 98 of file init_ngram.c.
Referenced by initialize_ngram().
boolean make_voca_ref | ( | NGRAM_INFO * | ndata, | |
WORD_INFO * | winfo | |||
) |
Make correspondence between word dictionary and N-gram vocabulary.
ndata | [i/o] word/class N-gram, the unknown word information will be set. | |
winfo | [i/o] word dictionary, the word-to-ngram-entry mapping will be done here. |
Definition at line 127 of file init_ngram.c.
Referenced by initialize_ngram().
void set_unknown_id | ( | NGRAM_INFO * | ndata, | |
char * | str | |||
) |
Set unknown word ID to the N-gram data.
ndata | [out] N-gram data to set unknown word ID. | |
str | [in] word name string of unknown word |
Definition at line 169 of file init_ngram.c.
Referenced by initialize_ngram().
void fix_uniprob_srilm | ( | NGRAM_INFO * | ndata, | |
WORD_INFO * | winfo | |||
) |
Fix unigram probability of BOS / EOS word.
This function checks the probabilities of BOS / EOS word, and if it is set to "-99", give the same as another one. This is the case when the LM is trained by SRILM, which assigns unigram probability of "-99" to the beginning-of-sentence word, and causes search on reverse direction to fail.
ndata | [i/o] N-gram data | |
winfo | [i/o] Vocabulary information |
Definition at line 206 of file init_ngram.c.
Referenced by initialize_ngram().