00001
00024
00025
00026
00027
00028
00029
00030
00031 #include <sent/stddefs.h>
00032 #include <sent/vocabulary.h>
00033
00042 WORD_ID
00043 voca_lookup_wid(char *keyword, WORD_INFO *winfo)
00044 {
00045 WORD_ID i, found;
00046 int plen,totallen;
00047 boolean numflag = TRUE;
00048 int wid;
00049 char *c;
00050
00051 if (keyword == NULL) return WORD_INVALID;
00052
00053 if (keyword[0] == '#') {
00054
00055 for(i=1;i<strlen(keyword);i++) {
00056 if (keyword[i] < '0' || keyword[i] > '9') {
00057 numflag = FALSE;
00058 break;
00059 }
00060 }
00061 if (numflag) {
00062 wid = atoi(&(keyword[1]));
00063 if (wid < 0 || wid >= winfo->num) {
00064 return(WORD_INVALID);
00065 } else {
00066 return(wid);
00067 }
00068 } else {
00069 return(WORD_INVALID);
00070 }
00071 }
00072
00073 found = WORD_INVALID;
00074 totallen = strlen(keyword);
00075 if ((c = strchr(keyword, '[')) != NULL) {
00076 plen = c - keyword;
00077 for (i=0;i<winfo->num;i++) {
00078 if (strnmatch(keyword,winfo->wname[i], plen)
00079 && strnmatch(c+1, winfo->woutput[i], totallen-plen-2)) {
00080 if (found == WORD_INVALID) {
00081 found = i;
00082 } else {
00083 jlog("Warning: voca_lookup: several \"%s\" found in dictionary, use the first one..\n");
00084 break;
00085 }
00086 }
00087 }
00088 } else {
00089 for (i=0;i<winfo->num;i++) {
00090 if (strmatch(keyword,winfo->wname[i])) {
00091 if (found == WORD_INVALID) {
00092 found = i;
00093 } else {
00094 jlog("Warning: voca_lookup: several \"%s\" found in dictionary, use the first one..\n");
00095 break;
00096 }
00097 }
00098 }
00099 }
00100 return found;
00101 }
00102
00103
00104
00105 #define WSSTEP 10
00106
00107
00116 WORD_ID *
00117 new_str2wordseq(WORD_INFO *winfo, char *s, int *len_return)
00118 {
00119 char *p;
00120 int num;
00121 int maxnum;
00122 WORD_ID *wseq;
00123
00124 maxnum = WSSTEP;
00125 wseq = (WORD_ID *)mymalloc(sizeof(WORD_ID)*maxnum);
00126 num = 0;
00127 for (p = strtok(s, " "); p != NULL; p = strtok(NULL, " ")) {
00128 if (num >= maxnum) {
00129 maxnum += WSSTEP;
00130 wseq = (WORD_ID *)myrealloc(wseq, sizeof(WORD_ID) * maxnum);
00131 }
00132 if ((wseq[num] = voca_lookup_wid(p, winfo)) == WORD_INVALID) {
00133
00134 jlog("Error: voca_lookup: word \"%s\" not found in dict\n", p);
00135 free(wseq);
00136 return NULL;
00137 }
00138 num++;
00139 }
00140
00141 *len_return = num;
00142 return(wseq);
00143 }