Julius: libsent/src/ngram/ngram_read

00001 
00025 /*
00026  * Copyright (c) 1991-2006 Kawahara Lab., Kyoto University
00027  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00028  * Copyright (c) 2005-2006 Julius project team, Nagoya Institute of Technology, Nagoya Institute of Technology
00029  * All rights reserved
00030  */
00031 
00032 /* $Id: ngram_read_arpa.c,v 1.3 2005/11/22 14:34:07 sumomo Exp $ */
00033 
00034 /* words should be alphabetically sorted */
00035 
00036 #include <sent/stddefs.h>
00037 #include <sent/ngram2.h>
00038 
00039 static char buf[800];                   
00040 static char pbuf[800];                  
00041 
00042 
00051 static WORD_ID
00052 lookup_word(NGRAM_INFO *ndata, char *str)
00053 {
00054   WORD_ID wid;
00055   
00056   if ((wid = ngram_lookup_word(ndata, str)) == WORD_INVALID) {
00057     j_error("word %s not in N-gram vocabulary.\n",str);
00058   }
00059   return wid;
00060 }
00061 
00071 void
00072 set_unknown_id(NGRAM_INFO *ndata)
00073 {
00074 #if 0
00075   ndata->unk_id = ngram_lookup_word(ndata, unkword);
00076   if (ndata->unk_id == WORD_INVALID) {
00077     j_printerr("word %s not found, so assume this is a closed vocabulary model\n",
00078             unkword);
00079     ndata->isopen = FALSE;
00080   } else {
00081     ndata->isopen = TRUE;
00082   }
00083 #endif
00084   ndata->isopen = TRUE;
00085   ndata->unk_id = 0;            /* unknown (OOV) words are always mapped to
00086                                    the number 0 (by CMU-TK)*/
00087 }
00088 
00089 
00096 static void
00097 set_total_info(FILE *fp, NGRAM_INFO *ndata)
00098 {
00099   char *p;
00100   int n;
00101 
00102   while (getl(buf, sizeof(buf), fp) != NULL && buf[0] != '\\') {
00103     if (strnmatch(buf, "ngram", 5)) { /* n-gram num */
00104       p = strtok(buf, "=");
00105       n = p[strlen(p)-1] - '0' - 1;
00106       p = strtok(NULL, "=");
00107       ndata->ngram_num[n] = atoi(p);
00108     }
00109   }
00110 }
00111 
00112 /* read total info and check it with LR data (RL) */
00120 static void
00121 set_and_check_total_info(FILE *fp, NGRAM_INFO *ndata)
00122 {
00123   char *p;
00124   int n;
00125 
00126   while (getl(buf, sizeof(buf), fp) != NULL && buf[0] != '\\') {
00127     if (strnmatch(buf, "ngram", 5)) { /* n-gram num */
00128       p = strtok(buf, "=");
00129       n = p[strlen(p)-1] - '0' - 1;
00130       p = strtok(NULL, "=");
00131 /* 
00132  *       if (n <= 2 && ndata->ngram_num[n] != atoi(p)) {
00133  *         j_printerr("LR and RL don't match at ngram_num!\n");
00134  *         j_error("cut-off value when building LM differ?\n");
00135  *       }
00136  */
00137       if (n == 2) {             /* 3-gram */
00138         ndata->ngram_num[n] = atoi(p);
00139       } else {
00140         if (n <= 1 && ndata->ngram_num[n] != atoi(p)) {
00141           j_printerr("Warning: %d-gram total num differ! may cause read error\n",n+1);
00142         }
00143       }
00144     }
00145   }
00146 }
00147 
00154 static void
00155 set_unigram(FILE *fp, NGRAM_INFO *ndata)
00156 {
00157   WORD_ID read_word_num;        /* # of words already read */
00158   WORD_ID nid, resid;
00159   LOGPROB prob, bo_wt;
00160   char *name, *p;
00161 
00162   /* malloc area */
00163   ndata->wname = (char **)mymalloc(sizeof(char *)*ndata->ngram_num[0]);
00164   ndata->p = (LOGPROB *)mymalloc(sizeof(LOGPROB)*ndata->ngram_num[0]);
00165   ndata->bo_wt_lr = (LOGPROB *)mymalloc(sizeof(LOGPROB)*ndata->ngram_num[0]);
00166   ndata->bo_wt_rl = (LOGPROB *)mymalloc(sizeof(LOGPROB)*ndata->ngram_num[0]);
00167   ndata->n2_bgn = (NNID *)mymalloc(sizeof(NNID)*ndata->ngram_num[0]);
00168   ndata->n2_num = (WORD_ID *)mymalloc(sizeof(WORD_ID)*ndata->ngram_num[0]);
00169   read_word_num = 0;
00170   
00171   while (getl(buf, sizeof(buf), fp) != NULL && buf[0] != '\\') {
00172     prob = (LOGPROB)atof(first_token(buf));
00173     p = next_token();
00174     name = strcpy((char *)mymalloc(strlen(p)+1), p);
00175     bo_wt = (LOGPROB)atof(next_token());
00176 
00177     /* register unigram */
00178     nid = read_word_num;
00179     ndata->wname[nid] = name;
00180     /* add entry name to index tree */
00181     if (ndata->root == NULL) {
00182       ndata->root = ptree_make_root_node(nid);
00183     } else {
00184       resid = ptree_search_data(name, ndata->root);
00185       if (strmatch(name, ndata->wname[resid])) { /* already exist */
00186         j_error("Error: word \"%s\" multiply defined at (#%d and #%d)\n",
00187                    name, resid, nid);
00188       } else {
00189         ptree_add_entry(name, nid, ndata->wname[resid], &(ndata->root));
00190       }
00191     }
00192     ndata->p[nid] = prob;
00193     ndata->bo_wt_lr[nid] = bo_wt;
00194     ndata->n2_bgn[nid] = NNID_INVALID;
00195     ndata->n2_num[nid] = 0;
00196   
00197     read_word_num++;
00198     if (read_word_num > ndata->max_word_num) {
00199       j_printerr("Error: actual n-gram word num exceeded header value\n");
00200       j_error("%d > %d\n", read_word_num, ndata->max_word_num);
00201     }
00202   }
00203 
00204   if (read_word_num != ndata->ngram_num[0]) {
00205     j_printerr("Error: actual n-gram word num not match the header value\n");
00206     j_error("%d != %d ?\n", read_word_num, ndata->ngram_num[0]);
00207   }
00208   j_printerr("  1-gram read %d end\n", read_word_num);
00209 }
00210 
00211 /* read-in 1-gram (RL) --- only add back-off weight */
00219 static void
00220 add_unigram(FILE *fp, NGRAM_INFO *ndata)
00221 {
00222   WORD_ID read_word_num;
00223   WORD_ID nid;
00224   LOGPROB prob, bo_wt;
00225   char *name, *p;
00226 
00227   read_word_num = 0;
00228   while (getl(buf, sizeof(buf), fp) != NULL && buf[0] != '\\') {
00229     prob = atof(first_token(buf));
00230     p = next_token();
00231     name = strcpy((char *)mymalloc(strlen(p)+1), p);
00232     bo_wt = (LOGPROB)atof(next_token());
00233   
00234     /* add bo_wt_rl to existing 1-gram entry */
00235     nid = lookup_word(ndata, name);
00236     if (nid == WORD_INVALID) {
00237       j_printerr("Warning: n-gram word \"%s\" in RL not exist in LR (ignored)\n", name);
00238     } else {
00239       ndata->bo_wt_rl[nid] = bo_wt;
00240     }
00241   
00242     read_word_num++;
00243     if (read_word_num > ndata->max_word_num) {
00244       j_printerr("Error: actual n-gram word num exceeded header value\n");
00245       j_error("%d > %d\n", read_word_num, ndata->max_word_num);
00246     }
00247     free(name);
00248   }
00249   j_printerr("  1-gram read %d end\n", read_word_num);
00250   
00251 }
00252 
00259 static void
00260 set_bigram(FILE *fp, NGRAM_INFO *ndata)
00261 {
00262   int w_l, w_r;
00263   int w_last, w_r_last;
00264   LOGPROB p;
00265   NNID n2;
00266 
00267   ndata->n2tonid = (WORD_ID *)mymalloc(sizeof(WORD_ID)*ndata->ngram_num[1]);
00268   ndata->p_lr = (LOGPROB *)mymalloc(sizeof(LOGPROB)*ndata->ngram_num[1]);
00269   ndata->p_rl = (LOGPROB *)mymalloc(sizeof(LOGPROB)*ndata->ngram_num[1]);
00270   ndata->bo_wt_rrl = (LOGPROB *)mymalloc(sizeof(LOGPROB)*ndata->ngram_num[1]);
00271 
00272   n2 = 0;
00273   
00274   /* read in LR 2-gram */
00275   w_last = -1; w_r_last = -1;
00276   for (;;) {
00277     if (getl(buf, sizeof(buf), fp) == NULL || buf[0] == '\\') break;
00278     strcpy(pbuf, buf);
00279     if ( n2 % 100000 == 0) {
00280       j_printerr("  2-gram read %d (%d%%)\n", n2, n2 * 100 / ndata->ngram_num[1]);
00281     }
00282 
00283     /* 2-gram probability */
00284     p = (LOGPROB)atof(first_token(buf));
00285     /* read in left (context) word and lookup the ID */
00286     w_l = lookup_word(ndata, next_token());
00287     /* increment n2_bgn and n2_num if context word changed */
00288     if (w_l != w_last) {
00289       if (w_last != -1) ndata->n2_num[w_last] = n2 - ndata->n2_bgn[w_last];
00290       /* the next context word should be an new entry */
00291       if (ndata->n2_bgn[w_l] != NNID_INVALID) {
00292         j_printerr("Error: entry not sorted (same left context not sequenced)\n");
00293         j_error("at 2-gram #%d: \"%s\"\n", n2+1, pbuf);
00294       }
00295       ndata->n2_bgn[w_l] = n2;
00296       w_r_last = -1;
00297     }
00298     /* read in right word and set */
00299     w_r = lookup_word(ndata, next_token());
00300     if (w_r == w_r_last) {
00301       j_printerr("Error: duplicated entry\n");
00302       j_error("at 2-gram #%d: \"%s\"\n", n2+1, pbuf);
00303     } else if (w_r < w_r_last) {
00304       j_printerr("Error: entry not sorted downward\n");
00305       j_error("at 2-gram #%d: \"%s\"\n", n2+1, pbuf);
00306     }
00307     ndata->n2tonid[n2] = w_r;
00308     ndata->p_lr[n2] = p;
00309 
00310     n2++;
00311     w_last = w_l;
00312     w_r_last = w_r;
00313 
00314     /* check total num */
00315     if (n2 > ndata->ngram_num[1]) {
00316       j_printerr("Error: actual 2-gram num not match the header value\n");
00317       j_error("%d != %d ?\n", n2, ndata->ngram_num[1]);
00318     }
00319   }
00320   
00321   /* set the last entry */
00322   ndata->n2_num[w_last] = n2 - ndata->n2_bgn[w_last];
00323 
00324   j_printerr("  2-gram read %d end\n", n2);
00325 
00326 }
00327 
00336 static void
00337 add_bigram_rl(FILE *fp, NGRAM_INFO *ndata)
00338 {
00339   WORD_ID w_l, w_r;
00340   LOGPROB prob, bo_wt;
00341   int bi_count = 0;
00342   NNID n2;
00343 
00344   while (getl(buf, sizeof(buf), fp) != NULL && buf[0] != '\\') {
00345     /* p(w_l|w_r) w_r w_l bo_wt_rl */
00346     if ( ++bi_count % 100000 == 0) {
00347       j_printerr("  2-gram read %d (%d%%)\n", bi_count, bi_count * 100 / ndata->ngram_num[1]);
00348     }
00349     prob = (LOGPROB)atof(first_token(buf));
00350     w_r = lookup_word(ndata, next_token());
00351     w_l = lookup_word(ndata, next_token());
00352     bo_wt = (LOGPROB)atof(next_token());
00353     n2 = search_bigram(ndata, w_l, w_r);
00354     if (n2 == NNID_INVALID) {
00355       j_printerr("Warning: (%s,%s) not exist in LR 2-gram (ignored)\n",
00356               ndata->wname[w_l], ndata->wname[w_r]);
00357     } else {
00358       ndata->p_rl[n2] = prob;
00359       ndata->bo_wt_rrl[n2] = bo_wt;
00360     }
00361   }
00362   j_printerr("  2-gram read %d end\n", bi_count);
00363   
00364 }
00365     
00366 
00373 static void
00374 set_trigram(FILE *fp, NGRAM_INFO *ndata)
00375 {
00376   int w_l, w_m, w_r;
00377   LOGPROB p_rl;
00378   int w_r_last, w_m_last, w_l_last;
00379   NNID n2, n2_last;
00380   NNID n3;
00381   NNID ntmp;
00382 
00383   /* allocate pointer from 2gram to 3gram */
00384   switch(ndata->version) {
00385   case 3:
00386     ndata->n3_bgn = (NNID *)mymalloc(sizeof(NNID)*ndata->ngram_num[1]);
00387     for(n2=0;n2<ndata->ngram_num[1];n2++) ndata->n3_bgn[n2] = NNID_INVALID;
00388     break;
00389   case 4:
00390     ndata->n3_bgn_upper = (NNID_UPPER *)mymalloc(sizeof(NNID_UPPER)*ndata->ngram_num[1]);
00391     ndata->n3_bgn_lower = (NNID_LOWER *)mymalloc(sizeof(NNID_LOWER)*ndata->ngram_num[1]);
00392     for(n2=0;n2<ndata->ngram_num[1];n2++) {    
00393       ndata->n3_bgn_upper[n2] = NNID_INVALID_UPPER;
00394       ndata->n3_bgn_lower[n2] = 0;
00395     }
00396     break;
00397   }
00398   ndata->n3_num = (WORD_ID *)mymalloc(sizeof(WORD_ID)*ndata->ngram_num[1]);
00399   for(n2=0;n2<ndata->ngram_num[1];n2++) ndata->n3_num[n2] = 0;
00400 
00401   /* allocate data area for 3-gram */
00402   ndata->n3tonid = (WORD_ID *)mymalloc(sizeof(WORD_ID)*ndata->ngram_num[2]);
00403   ndata->p_rrl = (LOGPROB *)mymalloc(sizeof(LOGPROB)*ndata->ngram_num[2]);
00404   n3 = 0;
00405 
00406   n2 = n2_last = NNID_INVALID;
00407   w_r_last = w_m_last = w_l_last = -1;
00408   for (;;) {
00409 
00410     if (getl(buf, sizeof(buf), fp) == NULL || buf[0] == '\\') break;
00411     strcpy(pbuf, buf);
00412     if (n3 % 100000 == 0) {
00413       j_printerr("  3-gram read %d (%d%%)\n", n3, n3 * 100 / ndata->ngram_num[2]);
00414     }
00415 
00416     /* N-gram probability */
00417     p_rl = (LOGPROB)atof(first_token(buf));
00418     /* read in right (first) word and lookup its ID */
00419     w_r = lookup_word(ndata, next_token());
00420     /* read in middle word and lookup its ID */
00421     w_m = lookup_word(ndata, next_token());
00422 
00423     /* if context changed, create the next structure */
00424     if (w_r != w_r_last || w_m != w_m_last) {
00425       n2 = search_bigram(ndata, (WORD_ID)w_m, (WORD_ID)w_r);
00426       if (n2 == NNID_INVALID) { /* no context */
00427         j_printerr("Warning: context (%s,%s) not exist in LR 2-gram (ignored)\n",
00428                    ndata->wname[w_m], ndata->wname[w_r]);
00429         continue;
00430       }
00431       switch(ndata->version) {
00432       case 3:
00433         ntmp = ndata->n3_bgn[n2_last];
00434         break;
00435       case 4:
00436         ntmp = ((NNID)(ndata->n3_bgn_upper[n2_last]) << 16) + (NNID)(ndata->n3_bgn_lower[n2_last]);
00437         break;
00438       }
00439       if (n2_last != NNID_INVALID) ndata->n3_num[n2_last] = n3 - ntmp;
00440       /* check: the next 'n2' should be an new entry */
00441       switch(ndata->version) {
00442       case 3:
00443         if (ndata->n3_bgn[n2] != NNID_INVALID) {
00444           j_printerr("Error: entry not sorted (same left context not sequenced)\n");
00445           j_error("at 3-gram #%d: \"%s\"\n", n3+1, pbuf);
00446         }
00447         ndata->n3_bgn[n2] = n3;
00448         break;
00449       case 4:
00450         if (ndata->n3_bgn_upper[n2] != NNID_INVALID_UPPER) {
00451           j_printerr("Error: entry not sorted (same left context not sequenced)\n");
00452           j_error("at 3-gram #%d: \"%s\"\n", n3+1, pbuf);
00453         }
00454         ntmp = n3 & 0xffff;
00455         ndata->n3_bgn_lower[n2] = ntmp;
00456         ntmp = n3 >> 16;
00457         ndata->n3_bgn_upper[n2] = ntmp;
00458         break;
00459       }
00460 
00461       n2_last = n2;
00462       w_l_last = -1;
00463     } else {
00464       if (n2 == NNID_INVALID) continue;
00465     }
00466     
00467     /* read in left (last) word and store */
00468     w_l = lookup_word(ndata, next_token());
00469     if (w_l == w_l_last) {
00470       j_printerr("Error: duplicated entry\n");
00471       j_error("at 3-gram #%d: \"%s\"\n", n3+1, pbuf);
00472     } else if (w_l < w_l_last) {
00473       j_printerr("Error: entry not sorted downward\n");
00474       j_error("at 3-gram #%d: \"%s\"\n", n3+1, pbuf);
00475     }
00476     ndata->n3tonid[n3] = w_l;
00477     ndata->p_rrl[n3] = p_rl;
00478 
00479     n3++;
00480     w_m_last = w_m;
00481     w_r_last = w_r;
00482     w_l_last = w_l;
00483 
00484     /* check the 3-gram num */
00485     if (n3 > ndata->ngram_num[2]) {
00486       j_printerr("Error: actual 3-gram num not match the header value\n");
00487       j_error("%d != %d ?\n", n3, ndata->ngram_num[2]);
00488     }
00489   }
00490 
00491   /* store the last n3_num */
00492   switch(ndata->version) {
00493   case 3:
00494     ntmp = ndata->n3_bgn[n2_last];
00495     break;
00496   case 4:
00497     ntmp = ((NNID)(ndata->n3_bgn_upper[n2_last]) << 16) + (NNID)(ndata->n3_bgn_lower[n2_last]);
00498     break;
00499   }
00500   ndata->n3_num[n2_last] = n3 - ntmp;
00501 
00502   j_printerr("  3-gram read %d end\n", n3);
00503 }
00504 
00505 
00506 static boolean LR_2gram_read = FALSE; 
00507 
00517 boolean
00518 ngram_read_arpa(FILE *fp, NGRAM_INFO *ndata, int direction)
00519 {
00520   int n;
00521 
00522   ndata->from_bin = FALSE;
00523 
00524   if (!LR_2gram_read && direction == DIR_RL) {
00525     j_printerr("you should first read LR 2-gram\n");
00526     return FALSE;
00527   }
00528 
00529   if (direction == DIR_LR) {
00530     n = 2;
00531   } else {
00532     n = 3;
00533   }
00534 
00535   /* read until `\data\' found */
00536   while (getl(buf, sizeof(buf), fp) != NULL && strncmp(buf,"\\data\\",6) != 0);
00537     
00538   /* read n-gram total info */
00539   if (direction == DIR_LR) {
00540     set_total_info(fp, ndata);
00541   } else {
00542     set_and_check_total_info(fp, ndata);
00543   }
00544   if (ndata->ngram_num[0] > MAX_WORD_NUM) {
00545     j_error("Error: vocabulary size exceeded limit (%d)\n", MAX_WORD_NUM);
00546   }
00547   ndata->max_word_num = ndata->ngram_num[0];
00548 
00549   /* version requirement check (determined by 3-gram entry limit) */
00550   if (n >= 3) {
00551     if (ndata->ngram_num[2] >= NNIDMAX) {
00552       j_printerr("Warning: more than %d 3-gram tuples, use old structure\n", NNIDMAX);
00553       ndata->version = 3;
00554     } else {
00555       ndata->version = 4;
00556     }
00557   }
00558   
00559   /* read 1-gram data */
00560   if (!strnmatch(buf,"\\1-grams",8)) {
00561     j_error("data format error: 1-gram not found\n");
00562   }
00563   j_printerr("  reading 1-gram part...\n");
00564   if (direction == DIR_LR) {
00565     set_unigram(fp, ndata);
00566   } else {
00567     add_unigram(fp, ndata);
00568   }
00569   
00570   if (n >= 2) {
00571     /* read 2-gram data */
00572     if (!strnmatch(buf,"\\2-grams", 8)) {
00573       j_error("data format error: 2-gram not found\n");
00574     }
00575     j_printerr("  reading 2-gram part...\n");
00576     if (direction == DIR_LR) {
00577       set_bigram(fp, ndata);
00578     } else {
00579       add_bigram_rl(fp, ndata);
00580     }
00581   }
00582 
00583   if (n >= 3) {
00584       /* read 3-gram data */
00585     if (!strnmatch(buf,"\\3-grams", 8)) {
00586       j_error("data format error: 3-gram not found\n");
00587     }
00588     if ( direction == DIR_LR) {
00589       j_error("should not happen..\n");
00590     } else {
00591       j_printerr("  reading 3-gram part...\n");
00592       set_trigram(fp, ndata);
00593     }
00594   }
00595 
00596   /* finished */
00597   if (!strnmatch(buf, "\\end", 4)) {
00598     j_error("data format error: data end marker \"\\end\" not found\n");
00599   }
00600 #ifdef CLASS_NGRAM
00601   /* skip in-class word entries (they should be in word dictionary) */
00602   if (getl(buf, sizeof(buf), fp) != NULL) {
00603     if (strnmatch(buf, "\\class", 6)) {
00604       j_printerr("  skipping in-class word entries...\n");
00605     }
00606   }
00607 #endif
00608 
00609   if (n >= 3 && ndata->version == 4) {
00610     /* compact the 2-gram back-off and 3-gram links */
00611     ngram_compact_bigram_context(ndata);
00612   }
00613   
00614   /* set unknown (=OOV) word id */
00615   set_unknown_id(ndata);
00616 
00617   if (direction == DIR_LR) {
00618     LR_2gram_read = TRUE;
00619   }
00620 
00621   return TRUE;
00622 }
00623 
00629 void
00630 ngram_compact_bigram_context(NGRAM_INFO *ndata)
00631 {
00632   NNID i;
00633   int c;
00634   int dst;
00635   NNID ntmp;
00636 
00637   /* version check */
00638   if (ndata->version != 4) {
00639     j_error("InternalError: bigram context compaction called for version != 4\n");
00640   }
00641 
00642   /* count number of valid bigram context */
00643   c = 0;
00644   for(i=0;i<ndata->ngram_num[1];i++) {
00645     if (ndata->n3_bgn_upper[i] != NNID_INVALID_UPPER) {
00646       c++;
00647     } else {
00648       if (ndata->n3_num[i] != 0) {
00649         printf("bgn=%d|%d, num=%d, bo_wt_rrl=%f\n",
00650                ndata->n3_bgn_upper[i], 
00651                ndata->n3_bgn_lower[i], 
00652                ndata->n3_num[i],
00653                ndata->bo_wt_rrl[i]);
00654         j_error("Error: ngram_compact_bigram_context: internal error\n");
00655       }
00656       if (ndata->bo_wt_rrl[i] != 0.0) {
00657         j_error("Error: 2-gram has no upper 3-gram, but not 0.0 back-off weight\n");
00658       }
00659     }
00660   }
00661   ndata->bigram_bo_num = c;
00662   j_printerr("num: %d -> %d\n", ndata->ngram_num[1], ndata->bigram_bo_num);
00663   
00664   /* allocate index buffer */
00665   ndata->n2bo_upper = (NNID_UPPER *)mymalloc(sizeof(NNID_UPPER) * ndata->ngram_num[1]);
00666   ndata->n2bo_lower = (NNID_LOWER *)mymalloc(sizeof(NNID_LOWER) * ndata->ngram_num[1]);
00667   /* make index and do compaction of context informations */
00668   dst = 0;
00669   for(i=0;i<ndata->ngram_num[1];i++) {
00670     if (ndata->n3_bgn_upper[i] != NNID_INVALID_UPPER) {
00671       ndata->bo_wt_rrl[dst] = ndata->bo_wt_rrl[i];
00672       ndata->n3_bgn_upper[dst] = ndata->n3_bgn_upper[i];
00673       ndata->n3_bgn_lower[dst] = ndata->n3_bgn_lower[i];
00674       ndata->n3_num[dst] = ndata->n3_num[i];
00675       ntmp = dst & 0xffff;
00676       ndata->n2bo_lower[i] = ntmp;
00677       ntmp = dst >> 16;
00678       ndata->n2bo_upper[i] = ntmp;
00679       dst++;
00680     } else {
00681       ndata->n2bo_upper[i] = NNID_INVALID_UPPER;
00682       ndata->n2bo_lower[i] = 0;
00683     }
00684   }
00685   /* really shrink the memory area */
00686   ndata->bo_wt_rrl = (LOGPROB *)myrealloc(ndata->bo_wt_rrl, sizeof(LOGPROB) * ndata->bigram_bo_num);
00687   ndata->n3_bgn_upper = (NNID_UPPER *)myrealloc(ndata->n3_bgn_upper, sizeof(NNID_UPPER) * ndata->bigram_bo_num);
00688   ndata->n3_bgn_lower = (NNID_LOWER *)myrealloc(ndata->n3_bgn_lower, sizeof(NNID_LOWER) * ndata->bigram_bo_num);
00689   ndata->n3_num = (WORD_ID *)myrealloc(ndata->n3_num, sizeof(WORD_ID) * ndata->bigram_bo_num);
00690 }