00001
00018
00019
00020
00021
00022
00023
00024
00025 #include <sent/stddefs.h>
00026 #include <sent/ngram2.h>
00027 #include <sent/ptree.h>
00028
00034 void
00035 ngram_make_lookup_tree(NGRAM_INFO *ndata)
00036 {
00037 int i;
00038 int *windex;
00039 char **wnameindex;
00040
00041 windex = (int *)mymalloc(sizeof(int)*ndata->max_word_num);
00042 for (i=0;i<ndata->max_word_num;i++) {
00043 windex[i] = i;
00044 }
00045 wnameindex = (char **)mymalloc(sizeof(char *)*ndata->max_word_num);
00046 for (i=0;i<ndata->max_word_num;i++) {
00047 wnameindex[i] = ndata->wname[i];
00048 }
00049
00050 ndata->root = make_ptree(wnameindex, windex, ndata->max_word_num, 0);
00051
00052 free(windex);
00053 free(wnameindex);
00054 }
00055
00064 WORD_ID
00065 ngram_lookup_word(NGRAM_INFO *ndata, char *wordstr)
00066 {
00067 int data;
00068 data = ptree_search_data(wordstr, ndata->root);
00069 if (data == -1 || strcmp(wordstr, ndata->wname[data]) != 0) {
00070 return WORD_INVALID;
00071 } else {
00072 return(data);
00073 }
00074 }
00075
00084 WORD_ID
00085 make_ngram_ref(NGRAM_INFO *ndata, char *wstr)
00086 {
00087 WORD_ID nw;
00088
00089 nw = ngram_lookup_word(ndata, wstr);
00090 if (nw == WORD_INVALID) {
00091 if (ndata->isopen) {
00092 jlog("Warning: ngram_lookup: \"%s\" not exist in N-gram, treat as unknown\n", wstr);
00093 return(ndata->unk_id);
00094 } else {
00095 jlog("Error: ngram_lookup: \"%s\" not exist in N-gram\n", wstr);
00096 return WORD_INVALID;
00097 }
00098 } else {
00099 return(nw);
00100 }
00101 }