Main Page | Modules | Data Structures | Directories | File List | Data Fields | Globals | Related Pages

ngram_util.c

Go to the documentation of this file.
00001 
00017 /*
00018  * Copyright (c) 1991-2006 Kawahara Lab., Kyoto University
00019  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00020  * Copyright (c) 2005-2006 Julius project team, Nagoya Institute of Technology, Nagoya Institute of Technology
00021  * All rights reserved
00022  */
00023 
00024 #include <sent/stddefs.h>
00025 #include <sent/ngram2.h>
00026 
00035 static int
00036 get_unigram_size(NGRAM_INFO *ndata)
00037 {
00038   int unitsize;
00039   unitsize = sizeof(LOGPROB) * 3 + sizeof(NNID) + sizeof(WORD_ID);
00040   return(unitsize * ndata->ngram_num[0]);
00041 }
00042 
00051 static int
00052 get_bigram_size(NGRAM_INFO *ndata)
00053 {
00054   int unitsize;
00055   int size;
00056 
00057   switch(ndata->version) {
00058   case 4:
00059     unitsize = sizeof(WORD_ID) + sizeof(LOGPROB) * 2 + sizeof(NNID_UPPER) + sizeof(NNID_LOWER);
00060     size = unitsize * ndata->ngram_num[1];
00061     unitsize = sizeof(LOGPROB) + sizeof(NNID_UPPER) + sizeof(NNID_LOWER) + sizeof(WORD_ID);
00062     size += unitsize * ndata->bigram_bo_num;
00063     break;
00064   case 3:
00065     unitsize = sizeof(WORD_ID) * 2 + sizeof(LOGPROB) * 3 + sizeof(NNID);
00066     size = unitsize * ndata->ngram_num[1];
00067     break;
00068   }
00069     
00070   return(size);
00071 }
00072   
00081 static int
00082 get_trigram_size(NGRAM_INFO *ndata)
00083 {
00084   int unitsize;
00085 
00086   unitsize = sizeof(WORD_ID) + sizeof(LOGPROB);
00087   return(unitsize * ndata->ngram_num[2]);
00088 }
00089   
00090 
00096 void
00097 print_ngram_info(NGRAM_INFO *ndata)
00098 {
00099   j_printf("N-gram info:\n");
00100   j_printf("\t  struct version = %d\n", ndata->version);
00101   if (ndata->isopen) {
00102     j_printf("\t        OOV word = %s(id=%d)\n", ndata->wname[ndata->unk_id],ndata->unk_id);
00103     j_printf("\t        OOV size = %d words in dict\n", ndata->unk_num);
00104   } else {
00105     j_printf("\t        OOV word = none\n");
00106   }
00107   j_printf("\t   wordset size  = %8d\n", ndata->max_word_num);
00108   j_printf("\tuni-gram entries = %8d (%8d bytes)\n",
00109          ndata->ngram_num[0], get_unigram_size(ndata));
00110   j_printf("\t  bi-gram tuples = %8d (%8d bytes)\n",
00111          ndata->ngram_num[1], get_bigram_size(ndata));
00112   j_printf("\t tri-gram tuples = %8d (%8d bytes)\n",
00113          ndata->ngram_num[2], get_trigram_size(ndata));
00114 }

Generated on Tue Mar 28 16:01:39 2006 for Julius by  doxygen 1.4.2