Julius: libsent/src/ngram/ngram_read_bin.c ソースファイル

00001 
00054 /*
00055  * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University
00056  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00057  * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
00058  * All rights reserved
00059  */
00060 
00061 #include <sent/stddefs.h>
00062 #include <sent/ngram2.h>
00063 
00064 static int file_version;  
00065 static boolean need_swap; 
00066 #ifdef WORDS_INT
00067 static boolean need_conv;       
00068 static boolean words_int_retry = FALSE; 
00069 #endif
00070 
00075 #define rdn(A,B,C,D) if (rdnfunc(A,B,C,D) == FALSE) return FALSE
00076 #define rdn_wordid(A,B,C,D) if (rdn_wordid_func(A,B,C,D) == FALSE) return FALSE
00077 
00085 static boolean
00086 rdnfunc(FILE *fp, void *buf, size_t unitbyte, size_t unitnum)
00087 {
00088   size_t tmp;
00089   if ((tmp = myfread(buf, unitbyte, unitnum, fp)) < unitnum) {
00090     jlog("Error: ngram_read_bin: failed to read %d bytes\n", unitbyte*unitnum);
00091     return FALSE;
00092   }
00093   if (need_swap) {
00094     if (unitbyte != 1) {
00095       swap_bytes(buf, unitbyte, unitnum);
00096     }
00097   }
00098   return TRUE;
00099 }
00100 
00101 #ifdef WORDS_INT
00102 
00110 static boolean
00111 rdn_wordid_func(FILE *fp, void *buf, int unitnum, boolean need_conv)
00112 {
00113   int i;
00114   unsigned short *s;
00115   WORD_ID *t;
00116   WORD_ID d;
00117 
00118   if (need_conv) {
00119     /* read unsigned short units */
00120     rdn(fp, buf, sizeof(unsigned short), unitnum);
00121     /* convert them to WORD_ID (integer) */
00122     for(i=unitnum-1;i>=0;i--) {
00123       s = (unsigned short *)buf + i;
00124       t = (WORD_ID *)buf + i;
00125       d = *s;
00126       *t = d;
00127     }
00128   } else {
00129     /* read as usual */
00130     rdn(fp, buf, sizeof(WORD_ID), unitnum);
00131   }
00132   return TRUE;
00133 }
00134 #endif
00135 
00141 static boolean
00142 check_header(FILE *fp)
00143 {
00144   char buf[BINGRAM_HDSIZE], *p;
00145 
00146   rdn(fp, buf, 1, BINGRAM_HDSIZE);
00147   p = buf;
00148 #ifdef WORDS_INT
00149   need_conv = FALSE;
00150 #endif
00151 
00152   /* version check */
00153   if (strnmatch(p, BINGRAM_IDSTR, strlen(BINGRAM_IDSTR))) {
00154     /* bingram file made by mkbingram before 3.4.2 */
00155     file_version = 3;
00156     p += strlen(BINGRAM_IDSTR) + 1;
00157   } else if (strnmatch(p, BINGRAM_IDSTR_V4, strlen(BINGRAM_IDSTR_V4))) {
00158     /* bingram file made by mkbingram later than 3.5 */
00159     file_version = 4;
00160     p += strlen(BINGRAM_IDSTR_V4) + 1;
00161   } else if (strnmatch(p, BINGRAM_IDSTR_V5, strlen(BINGRAM_IDSTR_V5))) {
00162     /* bingram file made by JuliusLib-4 and later */
00163     file_version = 5;
00164     p += strlen(BINGRAM_IDSTR_V5) + 1;
00165   } else {
00166     /* not a bingram file */
00167     jlog("Error: ngram_read_bin: invalid header\n");
00168     return FALSE;
00169   }
00170   /* word size check (for bingram build by mkbingram 3.3p5 and later */
00171   if (strnmatch(p, BINGRAM_SIZESTR_HEAD, strlen(BINGRAM_SIZESTR_HEAD))) {
00172     p += strlen(BINGRAM_SIZESTR_HEAD);
00173     if (! strnmatch(p, BINGRAM_SIZESTR_BODY, strlen(BINGRAM_SIZESTR_BODY))) {
00174       /* word size does not match (int / short) */
00175 #ifdef WORDS_INT
00176       if (strnmatch(p, BINGRAM_SIZESTR_BODY_2BYTE, strlen(BINGRAM_SIZESTR_BODY_2BYTE))) {
00177         /* this is 2-byte word ID, will convert while reading */
00178         jlog("Warning: ngram_read_bin: 2-bytes bingram, converting to 4 bytes\n");
00179         need_conv = TRUE;
00180         p += strlen(BINGRAM_SIZESTR_BODY_2BYTE) + 1;
00181       } else {
00182         jlog("Error: ngram_read_bin: unknown word byte size!\n");
00183         return FALSE;
00184       }
00185 #else
00186       if (strnmatch(p, BINGRAM_SIZESTR_BODY_4BYTE, strlen(BINGRAM_SIZESTR_BODY_4BYTE))) {
00187         /*** 4bytes to 2bytes not implemented, just terminate here... ***/
00188         jlog("Error: ngram_read_bin: cannot handle 4-bytes bingram\n");
00189         jlog("Error: ngram_read_bin: please use Julius compiled with --enable-words-int\n");
00190         return FALSE;
00191         //p += strlen(BINGRAM_SIZESTR_BODY_4BYTE) + 1;
00192       } else {
00193         jlog("Error: ngram_read_bin: unknown word byte size!\n");
00194         return FALSE;
00195       }
00196 #endif
00197     } else {
00198       p += strlen(BINGRAM_SIZESTR_BODY) + 1;
00199     }
00200 
00201     /* byte order check (v4 (rev.3.5) and later) */
00202     if (file_version >= 4) {
00203       if (!strnmatch(p, BINGRAM_BYTEORDER_HEAD, strlen(BINGRAM_BYTEORDER_HEAD))) {
00204         jlog("Error: ngram_read_bin: no information for byte order??\n");
00205         return FALSE;
00206       }
00207       p += strlen(BINGRAM_BYTEORDER_HEAD);
00208       if (! strnmatch(p, BINGRAM_NATURAL_BYTEORDER, strlen(BINGRAM_NATURAL_BYTEORDER))) {
00209         /* file endian and running endian is different, need swapping */
00210         need_swap = TRUE;
00211       } else {
00212         need_swap = FALSE;
00213       }
00214       p += strlen(BINGRAM_NATURAL_BYTEORDER) + 1;
00215     }
00216   } /* if no BINGRAM_SIZESTR_HEAD found, just pass it */
00217 
00218   /* in case of V3 bingram file, the unit size of word_id and its byte order
00219      cannot be determined from the header.  In that case, we assume 
00220      byteorder to be a BIG ENDIAN.  The word_id unit size (2byte in normal,
00221      or 4byte if bingram generated with mkbingram with --enable-words-int)
00222      will be automagically detected.
00223      */
00224 
00225   if (file_version < 4) {
00226     /* assume input as big endian */
00227 #ifdef WORDS_BIGENDIAN
00228     need_swap = FALSE;
00229 #else
00230     need_swap = TRUE;
00231 #endif
00232   }
00233     
00234   /*jlog("%s",buf);*/
00235 
00236   return TRUE;
00237 }
00238 
00239 static boolean
00240 ngram_read_bin_v5(FILE *fp, NGRAM_INFO *ndata)
00241 {
00242   int i,n,len;
00243   char *w, *p;
00244 #ifdef WORDS_INT
00245   unsigned short *buf;
00246 #endif
00247   NGRAM_TUPLE_INFO *t;
00248 
00249   /* read some info extended from version 5 */
00250   rdn(fp, &(ndata->n), sizeof(int), 1);
00251   rdn(fp, &(ndata->dir), sizeof(int), 1);
00252   rdn(fp, &(ndata->bigram_index_reversed), sizeof(boolean), 1);
00253 
00254   jlog("Stat: ngram_read_bin_v5: this is %s %d-gram file\n", (ndata->dir == DIR_LR) ? "forward" : "backward", ndata->n);
00255 
00256   if (ndata->n > MAX_N) {
00257     jlog("Error: ngram_read_bin_v5: too long N-gram (N=%d)\n", n);
00258     jlog("Error: ngram_read_bin_v5: current maximum length of N-gram is set to %d\n", MAX_N);
00259     jlog("Error: ngram_read_bin_v5: you can expand the limit by setting MAX_N in \"sent/ngram.h\"\n");
00260     return FALSE;
00261   }
00262 
00263   /* read total info and set max_word_num */
00264   for(n=0;n<ndata->n;n++) {
00265     rdn(fp, &(ndata->d[n].totalnum), sizeof(NNID), 1);
00266   }
00267   ndata->max_word_num = ndata->d[0].totalnum;
00268 
00269   /* read wname */
00270   rdn(fp, &len, sizeof(int), 1);
00271   w = mymalloc(len);
00272   rdn(fp, w, 1, len);
00273   /* assign... */
00274   ndata->wname = (char **)mymalloc(sizeof(char *) * ndata->max_word_num);
00275   p = w; i = 0;
00276   while (p < w + len) {
00277     ndata->wname[i++] = p;
00278     while(*p != '\0') p++;
00279     p++;
00280   }
00281   if (i != ndata->max_word_num) {
00282     jlog("Error: ngram_read_bin_v5: wname error??\n");
00283     return FALSE;
00284   }
00285 
00286   /* read N-gram */
00287   for(n=0;n<ndata->n;n++) {
00288     jlog("stat: ngram_read_bin_v5: reading %d-gram\n", n+1);
00289 
00290     t = &(ndata->d[n]);
00291     
00292     rdn(fp, &(t->is24bit), sizeof(boolean), 1);
00293     rdn(fp, &(t->ct_compaction), sizeof(boolean), 1);
00294     rdn(fp, &(t->bgnlistlen), sizeof(NNID), 1);
00295     rdn(fp, &(t->context_num), sizeof(NNID), 1);
00296 
00297     if (n > 0) {
00298       if (t->is24bit) {
00299         t->bgn_upper = (NNID_UPPER *)mymalloc_big(sizeof(NNID_UPPER), t->bgnlistlen);
00300         rdn(fp, t->bgn_upper, sizeof(NNID_UPPER), t->bgnlistlen);
00301         t->bgn_lower = (NNID_LOWER *)mymalloc_big(sizeof(NNID_LOWER), t->bgnlistlen);
00302         rdn(fp, t->bgn_lower, sizeof(NNID_LOWER), t->bgnlistlen);
00303       } else {
00304         t->bgn = (NNID *)mymalloc_big(sizeof(NNID), t->bgnlistlen);
00305         rdn(fp, t->bgn, sizeof(NNID), t->bgnlistlen);
00306       }
00307       t->num = (WORD_ID *)mymalloc_big(sizeof(WORD_ID), t->bgnlistlen);
00308       rdn(fp, t->num, sizeof(WORD_ID), t->bgnlistlen);
00309       t->nnid2wid = (WORD_ID *)mymalloc_big(sizeof(WORD_ID), t->totalnum);
00310       rdn(fp, t->nnid2wid, sizeof(WORD_ID), t->totalnum);
00311     } else {
00312       t->bgn_upper = NULL;
00313       t->bgn_lower = NULL;
00314       t->bgn = NULL;
00315       t->num = NULL;
00316       t->bgnlistlen = 0;
00317       t->nnid2wid = NULL;
00318     }
00319 
00320     t->prob = (LOGPROB *)mymalloc_big(sizeof(LOGPROB), t->totalnum);
00321     rdn(fp, t->prob, sizeof(LOGPROB), t->totalnum);
00322 
00323     rdn(fp, &i, sizeof(int), 1);
00324     if (i == 1) {
00325       t->bo_wt = (LOGPROB *)mymalloc_big(sizeof(LOGPROB), t->context_num);
00326       rdn(fp, t->bo_wt, sizeof(LOGPROB), t->context_num);
00327     } else {
00328       t->bo_wt = NULL;
00329     }
00330     rdn(fp, &i, sizeof(int), 1);
00331     if (i == 1) {
00332       t->nnid2ctid_upper = (NNID_UPPER *)mymalloc_big(sizeof(NNID_UPPER), t->totalnum);
00333       t->nnid2ctid_lower = (NNID_LOWER *)mymalloc_big(sizeof(NNID_LOWER), t->totalnum);
00334       rdn(fp, t->nnid2ctid_upper, sizeof(NNID_UPPER), t->totalnum);
00335       rdn(fp, t->nnid2ctid_lower, sizeof(NNID_LOWER), t->totalnum);
00336     } else {
00337       t->nnid2ctid_upper = NULL;
00338       t->nnid2ctid_lower = NULL;
00339     }
00340   }
00341   rdn(fp, &i, sizeof(int), 1);
00342   if (i == 1) {
00343     ndata->bo_wt_1 = (LOGPROB *)mymalloc_big(sizeof(LOGPROB), ndata->d[0].context_num);
00344     rdn(fp, ndata->bo_wt_1, sizeof(LOGPROB), ndata->d[0].context_num);
00345   } else {
00346     ndata->bo_wt_1 = NULL;
00347   }
00348   rdn(fp, &i, sizeof(int), 1);
00349   if (i == 1) {
00350     ndata->p_2 = (LOGPROB *)mymalloc_big(sizeof(LOGPROB), ndata->d[1].totalnum);
00351     rdn(fp, ndata->p_2, sizeof(LOGPROB), ndata->d[1].totalnum);
00352   } else {
00353     ndata->p_2 = NULL;
00354   }
00355 
00356   return TRUE;
00357 }
00358 
00359 static boolean
00360 ngram_read_bin_compat(FILE *fp, NGRAM_INFO *ndata, int *retry_ret)
00361 {
00362   int i,n,len;
00363   char *w, *p;
00364   NNID *n3_bgn;
00365   NNID d, ntmp;
00366 #ifdef WORDS_INT
00367   unsigned short *buf;
00368 #endif
00369   NGRAM_TUPLE_INFO *t, *tt, *ttt;
00370 
00371   /* old binary N-gram assumes these types */
00372   ndata->bigram_index_reversed = TRUE;
00373   ndata->n = 3;
00374   ndata->dir = DIR_RL;
00375 
00376   /* read total info and set max_word_num */
00377   for(n=0;n<ndata->n;n++) {
00378     rdn(fp, &(ndata->d[n].totalnum), sizeof(NNID), 1);
00379   }
00380   ndata->max_word_num = ndata->d[0].totalnum;
00381 
00382   if (file_version == 4) {
00383     rdn(fp, &(ndata->d[1].context_num), sizeof(NNID), 1);
00384   }
00385 
00386   for(n=0;n<ndata->n;n++) {
00387     if (n < 2) {
00388       ndata->d[n].is24bit = FALSE;
00389     } else {
00390       if (ndata->d[n].totalnum >= NNID_MAX_24) {
00391         jlog("Warning: ngram_read_bin_compat: num of %d-gram exceeds 24bit, now switch to %dbit index\n", n+1, sizeof(NNID) * 8);
00392         ndata->d[n].is24bit = FALSE;
00393       } else {
00394         ndata->d[n].is24bit = TRUE;
00395       }
00396     }
00397     ndata->d[n].nnid2ctid_upper = NULL;
00398     ndata->d[n].nnid2ctid_lower = NULL;
00399   }
00400   /* always do back-off compaction for 3-gram and up */
00401   /* mark 2-gram and up */
00402   ndata->d[0].ct_compaction = FALSE;
00403   for(n=1;n<ndata->n;n++) {
00404     ndata->d[n].ct_compaction = TRUE;
00405   }
00406 
00407   /* read wname */
00408   rdn(fp, &len, sizeof(int), 1);
00409   w = mymalloc(len);
00410   rdn(fp, w, 1, len);
00411   /* assign... */
00412   ndata->wname = (char **)mymalloc(sizeof(char *) * ndata->max_word_num);
00413   p = w; i = 0;
00414   while (p < w + len) {
00415     ndata->wname[i++] = p;
00416     while(*p != '\0') p++;
00417     p++;
00418   }
00419   if (i != ndata->max_word_num) {
00420     jlog("Error: ngram_read_bin_compat: wname error??\n");
00421     return FALSE;
00422   }
00423 
00424   /* malloc 1-gram */
00425   t = &(ndata->d[0]);
00426   tt = &(ndata->d[1]);
00427   ttt = &(ndata->d[2]);
00428 
00429   t->bgn_upper = NULL;
00430   t->bgn_lower = NULL;
00431   t->bgn = NULL;
00432   t->num = NULL;
00433   t->bgnlistlen = 0;
00434   t->nnid2wid = NULL;
00435   t->nnid2ctid_upper = NULL;
00436   t->nnid2ctid_lower = NULL;
00437 
00438   t->context_num = t->totalnum;
00439   t->prob = (LOGPROB *)mymalloc_big(sizeof(LOGPROB), t->totalnum);
00440   ndata->bo_wt_1 = (LOGPROB *)mymalloc_big(sizeof(LOGPROB), t->context_num);
00441   t->bo_wt = (LOGPROB *)mymalloc_big(sizeof(LOGPROB), t->context_num);
00442   tt->bgnlistlen = t->context_num;
00443   tt->bgn = (NNID *)mymalloc_big(sizeof(NNID), tt->bgnlistlen);
00444   tt->num = (WORD_ID *)mymalloc_big(sizeof(WORD_ID), tt->bgnlistlen);
00445 
00446   /* read 1-gram */
00447   jlog("stat: ngram_read_bin_compat: reading 1-gram\n");
00448   rdn(fp, t->prob, sizeof(LOGPROB), t->totalnum);
00449   rdn(fp, ndata->bo_wt_1, sizeof(LOGPROB), t->context_num);
00450   rdn(fp, t->bo_wt, sizeof(LOGPROB), t->context_num);
00451   rdn(fp, tt->bgn, sizeof(NNID), tt->bgnlistlen);
00452 #ifdef WORDS_INT
00453   rdn_wordid(fp, tt->num, tt->bgnlistlen, need_conv);
00454 #else
00455   rdn(fp, tt->num, sizeof(WORD_ID), tt->bgnlistlen);
00456 #endif
00457 
00458 #ifdef WORDS_INT
00459   {
00460     /* check if we are wrongly reading word_id=2byte bingram
00461        (if bingram version >= 4, this should not be happen because
00462         header correctly tells the word_id byte size.  This will 
00463         occur only if matches all the conditions below:
00464         - you run Julius with --enable-words-int,
00465         - you use old bingram of version <= 3, and
00466         - you use bingram file converted without --enable-words-int
00467      */
00468     WORD_ID w;
00469     for(w=0;w<ndata->max_word_num;w++) {
00470       if (ndata->d[1].num[w] > ndata->max_word_num) {
00471         if (words_int_retry) {
00472           jlog("Error: ngram_read_bin_compat: retry failed, wrong bingram format\n");
00473           return FALSE;
00474         }
00475         jlog("Warning: ngram_read_bin_compat: incorrect data, may be a 2-byte v3 bingram, retry with conversion\n");
00476         free(ndata->wname[0]);
00477         free(ndata->wname);
00478         free(t->prob);
00479         free(ndata->bo_wt_1);
00480         free(t->bo_wt);
00481         free(tt->bgn);
00482         free(tt->num);
00483         myfrewind(fp);
00484         words_int_retry = TRUE;
00485         *retry_ret = 1;
00486         return FALSE;
00487       }
00488     }
00489   }
00490 #endif
00491 
00492   /* malloc the rest */
00493   tt->nnid2wid = (WORD_ID *)mymalloc_big(sizeof(WORD_ID), tt->totalnum);
00494   tt->prob = (LOGPROB *)mymalloc_big(sizeof(LOGPROB), tt->totalnum);
00495   ndata->p_2 = (LOGPROB *)mymalloc_big(sizeof(LOGPROB), tt->totalnum);
00496   if (file_version == 4) {      /* context compaction and 24bit */
00497     tt->nnid2ctid_upper = (NNID_UPPER *)mymalloc_big(sizeof(NNID_UPPER), tt->totalnum);
00498     tt->nnid2ctid_lower = (NNID_LOWER *)mymalloc_big(sizeof(NNID_LOWER), tt->totalnum);
00499     tt->bo_wt = (LOGPROB *)mymalloc_big(sizeof(LOGPROB), tt->context_num);
00500     ttt->bgnlistlen = tt->context_num;
00501     ttt->bgn_upper = (NNID_UPPER *)mymalloc_big(sizeof(NNID_UPPER), ttt->bgnlistlen);
00502     ttt->bgn_lower = (NNID_LOWER *)mymalloc_big(sizeof(NNID_LOWER), ttt->bgnlistlen);
00503     ttt->num = (WORD_ID *)mymalloc_big(sizeof(WORD_ID), ttt->bgnlistlen);
00504   } else {
00505     tt->context_num = tt->totalnum;
00506     tt->bo_wt = (LOGPROB *)mymalloc_big(sizeof(LOGPROB), tt->context_num);
00507     ttt->bgnlistlen = tt->context_num;
00508     ttt->num = (WORD_ID *)mymalloc_big(sizeof(WORD_ID), ttt->bgnlistlen);
00509     if (ttt->is24bit) {
00510       ttt->bgn_upper = (NNID_UPPER *)mymalloc_big(sizeof(NNID_UPPER), ttt->bgnlistlen);
00511       ttt->bgn_lower = (NNID_LOWER *)mymalloc_big(sizeof(NNID_LOWER), ttt->bgnlistlen);
00512       n3_bgn = (NNID *)mymalloc_big(sizeof(NNID), ttt->bgnlistlen);
00513     } else {
00514       ttt->bgn = (NNID *)mymalloc_big(sizeof(NNID), ttt->bgnlistlen);
00515     }
00516   }
00517       
00518   ttt->nnid2wid = (WORD_ID *)mymalloc_big(sizeof(WORD_ID), ttt->totalnum);
00519   ttt->prob = (LOGPROB *)mymalloc_big(sizeof(LOGPROB), ttt->totalnum);
00520   ttt->bo_wt = NULL;
00521   
00522   /* read 2-gram*/
00523   jlog("Stat: ngram_read_bin_compat: reading 2-gram\n");
00524 #ifdef WORDS_INT
00525   rdn_wordid(fp, tt->nnid2wid, tt->totalnum, need_conv);
00526 #else
00527   rdn(fp, tt->nnid2wid, sizeof(WORD_ID), tt->totalnum);
00528 #endif
00529   rdn(fp, ndata->p_2, sizeof(LOGPROB), tt->totalnum);
00530   rdn(fp, tt->prob, sizeof(LOGPROB), tt->totalnum);
00531   if (file_version == 4) {
00532     rdn(fp, tt->nnid2ctid_upper, sizeof(NNID_UPPER), tt->totalnum);
00533     rdn(fp, tt->nnid2ctid_lower, sizeof(NNID_LOWER), tt->totalnum);
00534     rdn(fp, tt->bo_wt, sizeof(LOGPROB), tt->context_num);
00535     rdn(fp, ttt->bgn_upper, sizeof(NNID_UPPER), ttt->bgnlistlen);
00536     rdn(fp, ttt->bgn_lower, sizeof(NNID_LOWER), ttt->bgnlistlen);
00537 #ifdef WORDS_INT
00538     rdn_wordid(fp, ttt->num, ttt->bgnlistlen, need_conv);
00539 #else
00540     rdn(fp, ttt->num, sizeof(WORD_ID), ttt->bgnlistlen);
00541 #endif
00542   } else {
00543     rdn(fp, tt->bo_wt, sizeof(LOGPROB), tt->context_num);
00544     if (ttt->is24bit) {
00545       rdn(fp, n3_bgn, sizeof(NNID), ttt->bgnlistlen);
00546       for(d=0;d<ttt->bgnlistlen;d++) {
00547         if (n3_bgn[d] == NNID_INVALID) {
00548           ttt->bgn_lower[d] = 0;
00549           ttt->bgn_upper[d] = NNID_INVALID_UPPER;
00550         } else {
00551           ntmp = n3_bgn[d] & 0xffff;
00552           ttt->bgn_lower[d] = ntmp;
00553           ntmp = n3_bgn[d] >> 16;
00554           ttt->bgn_upper[d] = ntmp;
00555         }
00556       }
00557     } else {
00558       rdn(fp, ttt->bgn, sizeof(NNID), ttt->bgnlistlen);
00559     }
00560 #ifdef WORDS_INT
00561     rdn_wordid(fp, ttt->num, ttt->bgnlistlen, need_conv);
00562 #else
00563     rdn(fp, ttt->num, sizeof(WORD_ID), ttt->bgnlistlen);
00564 #endif
00565   }
00566 
00567   /* read 3-gram*/
00568   jlog("Stat: ngram_read_bin_compat: reading 3-gram\n");
00569 #ifdef WORDS_INT
00570   rdn_wordid(fp, ttt->nnid2wid, ttt->totalnum, need_conv);
00571 #else
00572   rdn(fp, ttt->nnid2wid, sizeof(WORD_ID), ttt->totalnum);
00573 #endif
00574   rdn(fp, ttt->prob, sizeof(LOGPROB), ttt->totalnum);
00575 
00576   /* compact the 2-gram back-off and 3-gram links */
00577   if (file_version != 4) {
00578     if (ttt->is24bit) {
00579       free(n3_bgn);
00580       if (ngram_compact_context(ndata, 2) == FALSE) return FALSE;
00581     }
00582   }
00583   
00584   return TRUE;
00585 }
00586 
00587 
00596 boolean
00597 ngram_read_bin(FILE *fp, NGRAM_INFO *ndata)
00598 {
00599   int retry;
00600 
00601 #ifdef WORDS_INT
00602   /* reset retry flag */
00603   words_int_retry = FALSE;
00604   /* when retrying, it restarts from here with words_int_retry = TRUE */
00605  ngram_read_bin_start:
00606 #endif
00607   
00608   ndata->from_bin = TRUE;
00609 
00610   /* check initial header */
00611   if (check_header(fp) == FALSE) return FALSE;
00612   
00613 #ifdef WORDS_INT
00614   /* in retry mode, force word_id conversion  */
00615   if (words_int_retry) need_conv = TRUE;
00616 #endif
00617   
00618 #ifdef WORDS_INT
00619   if (need_conv) jlog("Stat: ngram_read_bin: word-id size conversion enabled\n");
00620 #endif
00621 
00622   if (file_version <= 4) {
00623     retry = 0;
00624     if (ngram_read_bin_compat(fp, ndata, &retry) == FALSE) {
00625 #ifdef WORDS_INT
00626       if (retry == 1) {
00627         goto ngram_read_bin_start;
00628       } else {
00629         return FALSE;
00630       }
00631 #else
00632       return FALSE;
00633 #endif
00634     }
00635   } else {
00636     if (ngram_read_bin_v5(fp, ndata) == FALSE) return FALSE;
00637   }
00638 
00639 
00640   /* make word search tree for later lookup */
00641   jlog("Stat: ngram_read_bin: making entry name index\n");
00642   ngram_make_lookup_tree(ndata);
00643 
00644   bi_prob_func_set(ndata);
00645 
00646   return TRUE;
00647 }
00648