00001
00018
00019
00020
00021
00022
00023
00024
00025 #include <sent/stddefs.h>
00026 #include <sent/ngram2.h>
00027
00038 boolean
00039 ngram_compact_context(NGRAM_INFO *ndata, int n)
00040 {
00041 NNID i;
00042 int c;
00043 int dst;
00044 NNID ntmp;
00045 NGRAM_TUPLE_INFO *this, *up;
00046
00047 this = &(ndata->d[n-1]);
00048 up = &(ndata->d[n]);
00049
00050
00051 c = 0;
00052 for(i=0;i<up->bgnlistlen;i++) {
00053 if ((up->is24bit == TRUE && up->bgn_upper[i] != NNID_INVALID_UPPER)
00054 || (up->is24bit == FALSE && up->bgn[i] != NNID_INVALID)) {
00055 c++;
00056 if (c >= NNIDMAX) {
00057 jlog("Error: ngram_compact_context: num of %d-gram valid context exceeds 24bit limit\n", n+1);
00058 return FALSE;
00059 }
00060 } else {
00061 if (up->num[i] != 0) {
00062 jlog("Error: ngram_compact_context: internal error\n");
00063 return FALSE;
00064 }
00065 if (this->bo_wt[i] != 0.0) {
00066 jlog("Error: ngram_compact_context: 2-gram has no upper 3-gram, but not 0.0 back-off weight\n");
00067 return FALSE;
00068 }
00069 }
00070 }
00071
00072 this->context_num = c;
00073 jlog("Stat: ngram_compact_context: bigram bowt compaction: %d -> %d\n", this->totalnum, this->context_num);
00074
00075
00076
00077 this->nnid2ctid_upper = (NNID_UPPER *)mymalloc(sizeof(NNID_UPPER) * this->totalnum);
00078 this->nnid2ctid_lower = (NNID_LOWER *)mymalloc(sizeof(NNID_LOWER) * this->totalnum);
00079
00080 dst = 0;
00081 for(i=0;i<up->bgnlistlen;i++) {
00082 if ((up->is24bit == TRUE && up->bgn_upper[i] != NNID_INVALID_UPPER)
00083 || (up->is24bit == FALSE && up->bgn[i] != NNID_INVALID)) {
00084 this->bo_wt[dst] = this->bo_wt[i];
00085 up->bgn_upper[dst] = up->bgn_upper[i];
00086 up->bgn_lower[dst] = up->bgn_lower[i];
00087 up->num[dst] = up->num[i];
00088 ntmp = dst & 0xffff;
00089 this->nnid2ctid_lower[i] = ntmp;
00090 ntmp = dst >> 16;
00091 this->nnid2ctid_upper[i] = ntmp;
00092 dst++;
00093 } else {
00094 this->nnid2ctid_upper[i] = NNID_INVALID_UPPER;
00095 this->nnid2ctid_lower[i] = 0;
00096 }
00097 }
00098 up->bgnlistlen = this->context_num;
00099
00100
00101 this->bo_wt = (LOGPROB *)myrealloc(this->bo_wt, sizeof(LOGPROB) * this->context_num);
00102 up->bgn_upper = (NNID_UPPER *)myrealloc(up->bgn_upper, sizeof(NNID_UPPER) * up->bgnlistlen);
00103 up->bgn_lower = (NNID_LOWER *)myrealloc(up->bgn_lower, sizeof(NNID_LOWER) * up->bgnlistlen);
00104 up->num = (WORD_ID *)myrealloc(up->num, sizeof(WORD_ID) * up->bgnlistlen);
00105
00106 return TRUE;
00107
00108 }