#include <sent/stddefs.h>
#include <sent/ngram2.h>
Include dependency graph for ngram_read_arpa.c:
Go to the source code of this file.
Functions | |
static WORD_ID | lookup_word (NGRAM_INFO *ndata, char *str) |
void | set_unknown_id (NGRAM_INFO *ndata) |
Set unknown word ID to the N-gram data. | |
static void | set_total_info (FILE *fp, NGRAM_INFO *ndata) |
static void | set_and_check_total_info (FILE *fp, NGRAM_INFO *ndata) |
static void | set_unigram (FILE *fp, NGRAM_INFO *ndata) |
static void | add_unigram (FILE *fp, NGRAM_INFO *ndata) |
static void | set_bigram (FILE *fp, NGRAM_INFO *ndata) |
static void | add_bigram_rl (FILE *fp, NGRAM_INFO *ndata) |
static void | set_trigram (FILE *fp, NGRAM_INFO *ndata) |
boolean | ngram_read_arpa (FILE *fp, NGRAM_INFO *ndata, int direction) |
void | ngram_compact_bigram_context (NGRAM_INFO *ndata) |
Variables | |
static char | buf [800] |
Local buffer for reading. | |
static char | pbuf [800] |
Local buffer for error string. | |
static boolean | LR_2gram_read = FALSE |
TRUE if LR 2gram has already been read. |
Definition in file ngram_read_arpa.c.
static WORD_ID lookup_word | ( | NGRAM_INFO * | ndata, | |
char * | str | |||
) | [static] |
Get N-gram word/class id of a string, and terminate program if not found.
ndata | [in] N-gram data | |
str | [in] name string of N-gram entry |
Definition at line 52 of file ngram_read_arpa.c.
Referenced by add_bigram_rl(), add_unigram(), and set_bigram().
void set_unknown_id | ( | NGRAM_INFO * | ndata | ) |
Set unknown word ID to the N-gram data.
In CMU-Cam SLM toolkit, OOV words are always mapped to UNK, which always appear at the very beginning of N-gram entry, so we fix the unknown word ID at "0".
ndata | [out] N-gram data to set unknown word ID. |
Definition at line 72 of file ngram_read_arpa.c.
Referenced by ngram_read_arpa().
static void set_total_info | ( | FILE * | fp, | |
NGRAM_INFO * | ndata | |||
) | [static] |
Set number of N-gram entries, for reading the first LR 2-gram.
fp | [in] file pointer | |
ndata | [out] N-gram data to set it. |
Definition at line 97 of file ngram_read_arpa.c.
Referenced by ngram_read_arpa().
static void set_and_check_total_info | ( | FILE * | fp, | |
NGRAM_INFO * | ndata | |||
) | [static] |
Read number of N-gram entries of the second RL 3-gram, and check if those values are exactly the same as the previous LR values.
fp | [in] file pointer | |
ndata | [i/o] N-gram data |
Definition at line 121 of file ngram_read_arpa.c.
Referenced by ngram_read_arpa().
static void set_unigram | ( | FILE * | fp, | |
NGRAM_INFO * | ndata | |||
) | [static] |
Read word/class entry names and 1-gram data from LR 2-gram file.
fp | [in] file pointer | |
ndata | [out] N-gram to set the read data. |
Definition at line 155 of file ngram_read_arpa.c.
Referenced by ngram_read_arpa().
static void add_unigram | ( | FILE * | fp, | |
NGRAM_INFO * | ndata | |||
) | [static] |
Read 1-gram data from RL 3-gram file. Only the back-off weights are stored.
fp | [in] file pointer | |
ndata | [out] N-gram to store the read data. |
Definition at line 220 of file ngram_read_arpa.c.
Referenced by ngram_read_arpa().
static void set_bigram | ( | FILE * | fp, | |
NGRAM_INFO * | ndata | |||
) | [static] |
Read 2-gram data from LR 2-gram file.
fp | [in] file pointer | |
ndata | [out] N-gram to set the read data. |
Definition at line 260 of file ngram_read_arpa.c.
Referenced by ngram_read_arpa().
static void add_bigram_rl | ( | FILE * | fp, | |
NGRAM_INFO * | ndata | |||
) | [static] |
Read reverse 2-gram data from RL 3-gram file, and set RL 2-gram probabilities and back-off values for RL 3-gram to the corresponding LR 2-gram data.
fp | [in] file pointer | |
ndata | [i/o] N-gram to set the read data. |
Definition at line 337 of file ngram_read_arpa.c.
Referenced by ngram_read_arpa().
static void set_trigram | ( | FILE * | fp, | |
NGRAM_INFO * | ndata | |||
) | [static] |
Read reverse 3-gram data from RL 3-gram file and store them.
fp | [in] file pointer | |
ndata | [i/o] N-gram to set the read data. |
Definition at line 374 of file ngram_read_arpa.c.
Referenced by ngram_read_arpa().
boolean ngram_read_arpa | ( | FILE * | fp, | |
NGRAM_INFO * | ndata, | |||
int | direction | |||
) |
Read in one ARPA N-gram file, either LR 2-gram or RL 3-gram.
fp | [in] file pointer | |
ndata | [out] N-gram data to store the read data | |
direction | [in] specify whether this is LR 2-gram or RL 3-gram |
Definition at line 518 of file ngram_read_arpa.c.
Referenced by init_ngram_arpa().
void ngram_compact_bigram_context | ( | NGRAM_INFO * | ndata | ) |
Compact the 2-gram context information.
ndata | [i/o] N-gram data |
Definition at line 630 of file ngram_read_arpa.c.
Referenced by ngram_read_arpa().