00001
00018
00019
00020
00021
00022
00023
00024
00025 #include <sent/stddefs.h>
00026 #include <sent/ngram2.h>
00027
00038 boolean
00039 ngram_compact_context(NGRAM_INFO *ndata, int n)
00040 {
00041 NNID i;
00042 NNID c;
00043 NNID dst;
00044 NNID ntmp;
00045 NGRAM_TUPLE_INFO *this, *up;
00046
00047 this = &(ndata->d[n-1]);
00048 up = &(ndata->d[n]);
00049
00050
00051 c = 0;
00052 for(i=0;i<up->bgnlistlen;i++) {
00053 if ((up->is24bit == TRUE && up->bgn_upper[i] != NNID_INVALID_UPPER)
00054 || (up->is24bit == FALSE && up->bgn[i] != NNID_INVALID)) {
00055 c++;
00056 } else {
00057 if (up->num[i] != 0) {
00058 jlog("Error: ngram_compact_context: internal error\n");
00059 return FALSE;
00060 }
00061 if (this->bo_wt[i] != 0.0) {
00062 jlog("Warning: ngram_compact_context: found a %d-gram that has non-zero back-off weight but not a context of upper N-gram (%f)\n", n, this->bo_wt[i]);
00063 jlog("Warning: ngram_compact_context: context compaction disabled\n");
00064 ndata->d[n-1].ct_compaction = FALSE;
00065 return TRUE;
00066 }
00067 }
00068 }
00069
00070 if (this->totalnum == c) {
00071 jlog("Stat: ngram_compact_context: %d-gram has full bo_wt, compaction disabled\n", n);
00072 ndata->d[n-1].ct_compaction = FALSE;
00073 return TRUE;
00074 }
00075
00076 if (c >= NNID_MAX_24) {
00077 jlog("Stat: ngram_compact_context: %d-gram bo_wt exceeds 24bit, compaction diabled\n", n);
00078 ndata->d[n-1].ct_compaction = FALSE;
00079 return TRUE;
00080 }
00081
00082 this->context_num = c;
00083 jlog("Stat: ngram_compact_context: %d-gram back-off weight compaction: %d -> %d\n", n, this->totalnum, this->context_num);
00084
00085
00086 this->nnid2ctid_upper = (NNID_UPPER *)mymalloc(sizeof(NNID_UPPER) * this->totalnum);
00087 this->nnid2ctid_lower = (NNID_LOWER *)mymalloc(sizeof(NNID_LOWER) * this->totalnum);
00088
00089 dst = 0;
00090 for(i=0;i<up->bgnlistlen;i++) {
00091 if ((up->is24bit == TRUE && up->bgn_upper[i] != NNID_INVALID_UPPER)
00092 || (up->is24bit == FALSE && up->bgn[i] != NNID_INVALID)) {
00093 this->bo_wt[dst] = this->bo_wt[i];
00094 if (up->is24bit) {
00095 up->bgn_upper[dst] = up->bgn_upper[i];
00096 up->bgn_lower[dst] = up->bgn_lower[i];
00097 } else {
00098 up->bgn[dst] = up->bgn[i];
00099 }
00100 up->num[dst] = up->num[i];
00101 ntmp = dst & 0xffff;
00102 this->nnid2ctid_lower[i] = ntmp;
00103 ntmp = dst >> 16;
00104 this->nnid2ctid_upper[i] = ntmp;
00105 dst++;
00106 } else {
00107 this->nnid2ctid_upper[i] = NNID_INVALID_UPPER;
00108 this->nnid2ctid_lower[i] = 0;
00109 }
00110 }
00111 up->bgnlistlen = this->context_num;
00112
00113
00114 this->bo_wt = (LOGPROB *)myrealloc(this->bo_wt, sizeof(LOGPROB) * this->context_num);
00115 if (up->is24bit) {
00116 up->bgn_upper = (NNID_UPPER *)myrealloc(up->bgn_upper, sizeof(NNID_UPPER) * up->bgnlistlen);
00117 up->bgn_lower = (NNID_LOWER *)myrealloc(up->bgn_lower, sizeof(NNID_LOWER) * up->bgnlistlen);
00118 } else {
00119 up->bgn = (NNID *)myrealloc(up->bgn, sizeof(NNID) * up->bgnlistlen);
00120 }
00121 up->num = (WORD_ID *)myrealloc(up->num, sizeof(WORD_ID) * up->bgnlistlen);
00122
00123
00124 ndata->d[n-1].ct_compaction = TRUE;
00125
00126 return TRUE;
00127
00128 }