#include <sent/stddefs.h>
#include <sent/ngram2.h>
Include dependency graph for ngram_read_arpa.c:

Go to the source code of this file.
Functions | |
| static WORD_ID | lookup_word (NGRAM_INFO *ndata, char *str) | 
| void | set_unknown_id (NGRAM_INFO *ndata) | 
| Set unknown word ID to the N-gram data.   | |
| static void | set_total_info (FILE *fp, NGRAM_INFO *ndata) | 
| static void | set_and_check_total_info (FILE *fp, NGRAM_INFO *ndata) | 
| static void | set_unigram (FILE *fp, NGRAM_INFO *ndata) | 
| static void | add_unigram (FILE *fp, NGRAM_INFO *ndata) | 
| static void | set_bigram (FILE *fp, NGRAM_INFO *ndata) | 
| static void | add_bigram_rl (FILE *fp, NGRAM_INFO *ndata) | 
| static void | set_trigram (FILE *fp, NGRAM_INFO *ndata) | 
| boolean | ngram_read_arpa (FILE *fp, NGRAM_INFO *ndata, int direction) | 
| void | ngram_compact_bigram_context (NGRAM_INFO *ndata) | 
Variables | |
| static char | buf [800] | 
| Local buffer for reading.  | |
| static char | pbuf [800] | 
| Local buffer for error string.  | |
| static boolean | LR_2gram_read = FALSE | 
| TRUE if LR 2gram has already been read.  | |
Definition in file ngram_read_arpa.c.
      
  | 
  ||||||||||||
| 
 Get N-gram word/class id of a string, and terminate program if not found. 
 
 
 Definition at line 52 of file ngram_read_arpa.c. Referenced by add_bigram_rl(), add_unigram(), set_bigram(), and set_trigram().  | 
  
      
  | 
  
| 
 Set unknown word ID to the N-gram data. In CMU-Cam SLM toolkit, OOV words are always mapped to <unk>, which always appear at the very beginning of N-gram entry, so we fix the unknown word ID at "0". 
 
 Definition at line 72 of file ngram_read_arpa.c. Referenced by ngram_read_arpa(), and ngram_read_bin().  | 
  
      
  | 
  ||||||||||||
| 
 Set number of N-gram entries, for reading the first LR 2-gram. 
 
 Definition at line 97 of file ngram_read_arpa.c. Referenced by ngram_read_arpa().  | 
  
      
  | 
  ||||||||||||
| 
 Read number of N-gram entries of the second RL 3-gram, and check if those values are exactly the same as the previous LR values. 
 
 Definition at line 121 of file ngram_read_arpa.c. Referenced by ngram_read_arpa().  | 
  
      
  | 
  ||||||||||||
| 
 Read word/class entry names and 1-gram data from LR 2-gram file. 
 
 Definition at line 155 of file ngram_read_arpa.c. Referenced by ngram_read_arpa().  | 
  
      
  | 
  ||||||||||||
| 
 Read 1-gram data from RL 3-gram file. Only the back-off weights are stored. 
 
 Definition at line 220 of file ngram_read_arpa.c. Referenced by ngram_read_arpa().  | 
  
      
  | 
  ||||||||||||
| 
 Read 2-gram data from LR 2-gram file. 
 
 Definition at line 260 of file ngram_read_arpa.c. Referenced by ngram_read_arpa().  | 
  
      
  | 
  ||||||||||||
| 
 Read reverse 2-gram data from RL 3-gram file, and set RL 2-gram probabilities and back-off values for RL 3-gram to the corresponding LR 2-gram data. 
 
 Definition at line 337 of file ngram_read_arpa.c. Referenced by ngram_read_arpa().  | 
  
      
  | 
  ||||||||||||
| 
 Read reverse 3-gram data from RL 3-gram file and store them. 
 
 Definition at line 374 of file ngram_read_arpa.c. Referenced by ngram_read_arpa().  | 
  
      
  | 
  ||||||||||||||||
| 
 Read in one ARPA N-gram file, either LR 2-gram or RL 3-gram. 
 
 
 Definition at line 518 of file ngram_read_arpa.c. Referenced by init_ngram_arpa().  | 
  
      
  | 
  
| 
 Compact the 2-gram context information. 
 
 Definition at line 630 of file ngram_read_arpa.c. Referenced by ngram_read_arpa(), and ngram_read_bin().  | 
  
 1.4.2