libsent/src/ngram/ngram_compact_context.c

Go to the documentation of this file.
00001 
00018 /*
00019  * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University
00020  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00021  * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
00022  * All rights reserved
00023  */
00024 
00025 #include <sent/stddefs.h>
00026 #include <sent/ngram2.h>
00027 
00038 boolean
00039 ngram_compact_context(NGRAM_INFO *ndata, int n)
00040 {
00041   NNID i;
00042   int c;
00043   int dst;
00044   NNID ntmp;
00045   NGRAM_TUPLE_INFO *this, *up;
00046 
00047   this = &(ndata->d[n-1]);
00048   up   = &(ndata->d[n]);
00049 
00050   /* count number of valid bigram context */
00051   c = 0;
00052   for(i=0;i<up->bgnlistlen;i++) {
00053     if ((up->is24bit == TRUE && up->bgn_upper[i] != NNID_INVALID_UPPER)
00054         || (up->is24bit == FALSE && up->bgn[i] != NNID_INVALID)) {
00055       c++;
00056       if (c >= NNIDMAX) {
00057         jlog("Error: ngram_compact_context: num of %d-gram valid context exceeds 24bit limit\n", n+1);
00058         return FALSE;
00059       }
00060     } else {
00061       if (up->num[i] != 0) {
00062         jlog("Error: ngram_compact_context: internal error\n");
00063         return FALSE;
00064       }
00065       if (this->bo_wt[i] != 0.0) {
00066         jlog("Error: ngram_compact_context: 2-gram has no upper 3-gram, but not 0.0 back-off weight\n");
00067         return FALSE;
00068       }
00069     }
00070   }
00071   
00072   this->context_num = c;
00073   jlog("Stat: ngram_compact_context: bigram bowt compaction: %d -> %d\n", this->totalnum, this->context_num);
00074 
00075   
00076   /* allocate index buffer */
00077   this->nnid2ctid_upper = (NNID_UPPER *)mymalloc(sizeof(NNID_UPPER) * this->totalnum);
00078   this->nnid2ctid_lower = (NNID_LOWER *)mymalloc(sizeof(NNID_LOWER) * this->totalnum);
00079   /* make index and do compaction of context informations */
00080   dst = 0;
00081   for(i=0;i<up->bgnlistlen;i++) {
00082     if ((up->is24bit == TRUE && up->bgn_upper[i] != NNID_INVALID_UPPER)
00083         || (up->is24bit == FALSE && up->bgn[i] != NNID_INVALID)) {
00084       this->bo_wt[dst] = this->bo_wt[i];
00085       up->bgn_upper[dst] = up->bgn_upper[i];
00086       up->bgn_lower[dst] = up->bgn_lower[i];
00087       up->num[dst] = up->num[i];
00088       ntmp = dst & 0xffff;
00089       this->nnid2ctid_lower[i] = ntmp;
00090       ntmp = dst >> 16;
00091       this->nnid2ctid_upper[i] = ntmp;
00092       dst++;
00093     } else {
00094       this->nnid2ctid_upper[i] = NNID_INVALID_UPPER;
00095       this->nnid2ctid_lower[i] = 0;
00096     }
00097   }
00098   up->bgnlistlen = this->context_num;
00099 
00100   /* shrink the memory area */
00101   this->bo_wt = (LOGPROB *)myrealloc(this->bo_wt, sizeof(LOGPROB) * this->context_num);
00102   up->bgn_upper = (NNID_UPPER *)myrealloc(up->bgn_upper, sizeof(NNID_UPPER) * up->bgnlistlen);
00103   up->bgn_lower = (NNID_LOWER *)myrealloc(up->bgn_lower, sizeof(NNID_LOWER) * up->bgnlistlen);
00104   up->num = (WORD_ID *)myrealloc(up->num, sizeof(WORD_ID) * up->bgnlistlen);
00105 
00106   return TRUE;
00107 
00108 }

Generated on Tue Dec 18 15:59:55 2007 for Julius by  doxygen 1.5.4