00001
00018
00019
00020
00021
00022
00023
00024
00025 #include <sent/stddefs.h>
00026 #include <sent/ngram2.h>
00027
00036 static int
00037 get_ngram_tuple_bytes(NGRAM_TUPLE_INFO *t)
00038 {
00039 int size, unit;
00040
00041 size = 0;
00042 if (t->num != NULL) {
00043
00044 if (t->is24bit) {
00045 unit = sizeof(NNID_UPPER) + sizeof(NNID_LOWER);
00046 } else {
00047 unit = sizeof(NNID);
00048 }
00049
00050 unit += sizeof(WORD_ID);
00051 size += unit * t->bgnlistlen;
00052 }
00053
00054 unit = sizeof(LOGPROB);
00055
00056 if (t->nnid2wid) unit += sizeof(WORD_ID);
00057 size += unit * t->totalnum;
00058
00059 if (t->bo_wt) {
00060 if (t->ct_compaction) {
00061
00062 unit = sizeof(NNID_UPPER) + sizeof(NNID_LOWER);
00063 size += unit * t->totalnum;
00064 }
00065
00066 size += sizeof(LOGPROB) * t->context_num;
00067 }
00068
00069 return size;
00070 }
00071
00078 void
00079 print_ngram_info(FILE *fp, NGRAM_INFO *ndata)
00080 {
00081 int i;
00082 fprintf(fp, " N-gram info:\n");
00083
00084
00085 fprintf(fp, "\t spec = %d-gram", ndata->n);
00086 if (ndata->dir == DIR_RL) {
00087 fprintf(fp, ", backward (right-to-left)\n");
00088 } else {
00089 fprintf(fp, ", forward (left-to-right)\n");
00090 }
00091 if (ndata->isopen) {
00092 fprintf(fp, "\t OOV word = %s(id=%d)\n", ndata->wname[ndata->unk_id],ndata->unk_id);
00093 fprintf(fp, "\t OOV size = %d words in dict\n", ndata->unk_num);
00094 } else {
00095 fprintf(fp, "\t OOV word = none\n");
00096 }
00097 fprintf(fp, "\t wordset size = %d\n", ndata->max_word_num);
00098 for(i=0;i<ndata->n;i++) {
00099 fprintf(fp, "\t %d-gram entries = %8d (%5.1f MB)", i+1, ndata->d[i].totalnum, get_ngram_tuple_bytes(&(ndata->d[i])) / 1048576.0);
00100 if (ndata->d[i].bo_wt != NULL && ndata->d[i].totalnum != ndata->d[i].context_num) {
00101 fprintf(fp, " (%d%% are valid contexts)", ndata->d[i].context_num * 100 / ndata->d[i].totalnum);
00102 }
00103 fprintf(fp, "\n");
00104 }
00105
00106 if (ndata->bo_wt_1) {
00107 fprintf(fp, "\tLR 2-gram entries= %8d (%5.1f MB)\n", ndata->d[1].totalnum,
00108 (sizeof(LOGPROB) * ndata->d[1].totalnum + sizeof(LOGPROB) * ndata->d[0].context_num) / 1048576.0);
00109 }
00110 fprintf(fp, "\t pass1 = ");
00111 if (ndata->dir == DIR_RL) {
00112 if (ndata->bo_wt_1) {
00113 fprintf(fp, "given additional forward 2-gram\n");
00114 } else {
00115 fprintf(fp, "estimate 2-gram from the backward 2-gram\n");
00116 }
00117 } else {
00118 fprintf(fp, "2-gram in the forward n-gram\n");
00119 }
00120 }