Main Page | Modules | Data Structures | Directories | File List | Data Fields | Globals | Related Pages

voca_lookup.c

Go to the documentation of this file.
00001 
00023 /*
00024  * Copyright (c) 1991-2006 Kawahara Lab., Kyoto University
00025  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00026  * Copyright (c) 2005-2006 Julius project team, Nagoya Institute of Technology, Nagoya Institute of Technology
00027  * All rights reserved
00028  */
00029 
00030 #include <sent/stddefs.h>
00031 #include <sent/vocabulary.h>
00032 
00041 WORD_ID
00042 voca_lookup_wid(char *keyword, WORD_INFO *winfo)
00043 {
00044   WORD_ID i, found;
00045   int plen,totallen;
00046   boolean numflag = TRUE;
00047   int wid;
00048   char *c;
00049 
00050   if (keyword == NULL) return WORD_INVALID;
00051   
00052   if (keyword[0] == '#') {
00053     
00054     for(i=1;i<strlen(keyword);i++) {
00055       if (keyword[i] < '0' || keyword[i] > '9') {
00056         numflag = FALSE;
00057         break;
00058       }
00059     }
00060     if (numflag) {
00061       wid = atoi(&(keyword[1]));
00062       if (wid < 0 || wid >= winfo->num) {
00063         return(WORD_INVALID);
00064       } else {
00065         return(wid);
00066       }
00067     } else {
00068       return(WORD_INVALID);
00069     }
00070   }
00071       
00072   found = WORD_INVALID;
00073   totallen = strlen(keyword);
00074   if ((c = strchr(keyword, '[')) != NULL) {
00075     plen = c - keyword;
00076     for (i=0;i<winfo->num;i++) {
00077       if (strnmatch(keyword,winfo->wname[i], plen)
00078           && strnmatch(c+1, winfo->woutput[i], totallen-plen-2)) {
00079         if (found == WORD_INVALID) {
00080           found = i;
00081         } else {
00082           j_printerr("Warning: several \"%s\" found in dictionary, use the first one..\n");
00083           break;
00084         }
00085       }
00086     }
00087   } else {
00088     for (i=0;i<winfo->num;i++) {
00089       if (strmatch(keyword,winfo->wname[i])) {
00090         if (found == WORD_INVALID) {
00091           found = i;
00092         } else {
00093           j_printerr("Warning: several \"%s\" found in dictionary, use the first one..\n");
00094           break;
00095         }
00096       }
00097     }
00098   }
00099   return found;
00100 }
00101 
00102 /* convert space-separated words string -> array of wid */
00103 /* return malloced array */
00104 #define WSSTEP 10 
00105 
00106 
00115 WORD_ID *
00116 new_str2wordseq(WORD_INFO *winfo, char *s, int *len_return)
00117 {
00118   char *p;
00119   int num;
00120   int maxnum;
00121   WORD_ID *wseq;
00122 
00123   maxnum = WSSTEP;
00124   wseq = (WORD_ID *)mymalloc(sizeof(WORD_ID)*maxnum);
00125   num = 0;
00126   for (p = strtok(s, " "); p != NULL; p = strtok(NULL, " ")) {
00127     if (num >= maxnum) {
00128       maxnum += WSSTEP;
00129       wseq = (WORD_ID *)myrealloc(wseq, sizeof(WORD_ID) * maxnum);
00130     }
00131     if ((wseq[num] = voca_lookup_wid(p, winfo)) == WORD_INVALID) {
00132       /* not found */
00133       j_printerr("word \"%s\" not found in dict\n", p);
00134       free(wseq);
00135       return NULL;
00136     }
00137     num++;
00138   }
00139 
00140   *len_return = num;
00141   return(wseq);
00142 }

Generated on Tue Mar 28 16:01:39 2006 for Julius by  doxygen 1.4.2