Main Page | Modules | Data Structures | Directories | File List | Data Fields | Globals | Related Pages

ngram_lookup.c

Go to the documentation of this file.
00001 
00017 /*
00018  * Copyright (c) 1991-2006 Kawahara Lab., Kyoto University
00019  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00020  * Copyright (c) 2005-2006 Julius project team, Nagoya Institute of Technology, Nagoya Institute of Technology
00021  * All rights reserved
00022  */
00023 
00024 #include <sent/stddefs.h>
00025 #include <sent/ngram2.h>
00026 #include <sent/ptree.h>
00027 
00033 void
00034 ngram_make_lookup_tree(NGRAM_INFO *ndata)
00035 {
00036   int i;
00037   int *windex;
00038   char **wnameindex;
00039   
00040   windex = (int *)mymalloc(sizeof(int)*ndata->max_word_num);
00041   for (i=0;i<ndata->max_word_num;i++) {
00042     windex[i] = i;
00043   }
00044   wnameindex = (char **)mymalloc(sizeof(char *)*ndata->max_word_num);
00045   for (i=0;i<ndata->max_word_num;i++) {
00046     wnameindex[i] = ndata->wname[i];
00047   }
00048 
00049   ndata->root = make_ptree(wnameindex, windex, ndata->max_word_num, 0);
00050 
00051   free(windex);
00052   free(wnameindex);
00053 }
00054 
00063 WORD_ID
00064 ngram_lookup_word(NGRAM_INFO *ndata, char *wordstr)
00065 {
00066   int data;
00067   data = ptree_search_data(wordstr, ndata->root);
00068   if (strcmp(wordstr, ndata->wname[data]) != 0) {
00069     return WORD_INVALID;
00070   } else {
00071     return(data);
00072   }
00073 }
00074 
00083 WORD_ID
00084 make_ngram_ref(NGRAM_INFO *ndata, char *wstr)
00085 {
00086   WORD_ID nw;
00087 
00088   nw = ngram_lookup_word(ndata, wstr);
00089   if (nw == WORD_INVALID) {     /* not found */
00090     j_printf("word %s not exist in N-gram, treat as <UNK>\n", wstr);
00091     return(ndata->unk_id);
00092   } else {
00093     return(nw);
00094   }
00095 }

Generated on Tue Mar 28 16:01:39 2006 for Julius by  doxygen 1.4.2