00001
00018
00019
00020
00021
00022
00023
00024
00025 #include <sent/stddefs.h>
00026 #include <sent/dfa.h>
00027
00028 static char buf[MAXLINELEN];
00029
00035 void
00036 dfa_state_init(DFA_INFO *dinfo)
00037 {
00038 int i;
00039 dinfo->maxstatenum = DFA_STATESTEP;
00040 dinfo->st = (DFA_STATE *)mymalloc(sizeof(DFA_STATE) * dinfo->maxstatenum);
00041 for (i=0;i<dinfo->maxstatenum;i++) {
00042 dinfo->st[i].number = i;
00043 dinfo->st[i].status = 0;
00044 dinfo->st[i].arc = NULL;
00045 }
00046 dinfo->state_num = dinfo->arc_num = dinfo->term_num = 0;
00047 dinfo->sp_id = WORD_INVALID;
00048 }
00049
00056 void
00057 dfa_state_expand(DFA_INFO *dinfo, int needed)
00058 {
00059 int oldnum, i;
00060 oldnum = dinfo->maxstatenum;
00061 dinfo->maxstatenum += DFA_STATESTEP;
00062 if (dinfo->maxstatenum < needed) dinfo->maxstatenum = needed;
00063 dinfo->st = (DFA_STATE *)myrealloc(dinfo->st, sizeof(DFA_STATE) * dinfo->maxstatenum);
00064 for (i=oldnum;i<dinfo->maxstatenum;i++) {
00065 dinfo->st[i].number = i;
00066 dinfo->st[i].status = 0;
00067 dinfo->st[i].arc = NULL;
00068 }
00069 }
00070
00079 boolean
00080 rddfa(FILE *fp, DFA_INFO *dinfo)
00081 {
00082 int state_max, arc_num, terminal_max;
00083
00084
00085 dfa_state_init(dinfo);
00086 state_max = 0;
00087 arc_num = 0;
00088 terminal_max = 0;
00089
00090 while (getl(buf, MAXLINELEN, fp) != NULL) {
00091 if (rddfa_line(buf, dinfo, &state_max, &arc_num, &terminal_max) == FALSE) {
00092 break;
00093 }
00094 }
00095 dinfo->state_num = state_max + 1;
00096 dinfo->arc_num = arc_num;
00097 dinfo->term_num = terminal_max + 1;
00098 return(TRUE);
00099 }
00100
00109 boolean
00110 rddfa_fd(int fd, DFA_INFO *dinfo)
00111 {
00112 int state_max, arc_num, terminal_max;
00113
00114
00115 dfa_state_init(dinfo);
00116 state_max = 0;
00117 arc_num = 0;
00118 terminal_max = 0;
00119
00120 while(getl_fd(buf, MAXLINELEN, fd) != NULL) {
00121 if (rddfa_line(buf, dinfo, &state_max, &arc_num, &terminal_max) == FALSE) {
00122 break;
00123 }
00124 }
00125 dinfo->state_num = state_max + 1;
00126 dinfo->arc_num = arc_num;
00127 dinfo->term_num = terminal_max + 1;
00128 return(TRUE);
00129 }
00130
00139 boolean
00140 rddfa_sd(int sd, DFA_INFO *dinfo)
00141 {
00142 int state_max, arc_num, terminal_max;
00143
00144
00145 dfa_state_init(dinfo);
00146 state_max = 0;
00147 arc_num = 0;
00148 terminal_max = 0;
00149
00150 while(getl_sd(buf, MAXLINELEN, sd) != NULL) {
00151 if (rddfa_line(buf, dinfo, &state_max, &arc_num, &terminal_max) == FALSE) {
00152 break;
00153 }
00154 }
00155 dinfo->state_num = state_max + 1;
00156 dinfo->arc_num = arc_num;
00157 dinfo->term_num = terminal_max + 1;
00158 return(TRUE);
00159 }
00160
00172 boolean
00173 rddfa_line(char *line, DFA_INFO *dinfo, int *state_max, int *arc_num, int *terminal_max)
00174 {
00175 DFA_ARC *newarc;
00176 int state, terminal, next_state;
00177 unsigned int status;
00178 char *p;
00179
00180 if (strmatch(buf, "DFAEND")) return(FALSE);
00181
00182 if ((p = strtok(line, DELM)) == NULL) {
00183 jlog("Error: rddfa: failed to parse, corrupted or invalid data?\n");
00184 return FALSE;
00185 }
00186 state = atoi(p);
00187 if ((p = strtok(NULL, DELM)) == NULL) {
00188 jlog("Error: rddfa: failed to parse, corrupted or invalid data?\n");
00189 return FALSE;
00190 }
00191 terminal = atoi(p);
00192 if ((p = strtok(NULL, DELM)) == NULL) {
00193 jlog("Error: rddfa: failed to parse, corrupted or invalid data?\n");
00194 return FALSE;
00195 }
00196 next_state = atoi(p);
00197 if ((p = strtok(NULL, DELM)) == NULL) {
00198 jlog("Error: rddfa: failed to parse, corrupted or invalid data?\n");
00199 return FALSE;
00200 }
00201 sscanf(p, "%x", &status);
00202
00203 if (state >= dinfo->maxstatenum) {
00204 dfa_state_expand(dinfo, state+1);
00205 }
00206 if (next_state >= dinfo->maxstatenum) {
00207 dfa_state_expand(dinfo, next_state+1);
00208 }
00209
00210
00211 if (status & ACCEPT_S) {
00212 dinfo->st[state].status |= ACCEPT_S;
00213 }
00214
00215 if (state == 0) {
00216 dinfo->st[state].status |= INITIAL_S;
00217 }
00218
00219
00220 if (terminal > 0 || next_state > 0) {
00221
00222 newarc = (DFA_ARC *)mymalloc(sizeof(DFA_ARC));
00223 newarc->label = terminal;
00224 newarc->to_state = next_state;
00225 newarc->next = dinfo->st[state].arc;
00226 dinfo->st[state].arc = newarc;
00227 (*arc_num)++;
00228 }
00229
00230 if (*state_max < state) *state_max = state;
00231 if (*terminal_max < terminal) *terminal_max = terminal;
00232
00233 return(TRUE);
00234 }
00235
00236
00237
00238
00247 void
00248 dfa_append(DFA_INFO *dst, DFA_INFO *src, int soffset, int coffset)
00249 {
00250 DFA_ARC *arc, *newarc;
00251 int s, state, terminal, next_state;
00252 unsigned int status;
00253
00254 for (s = 0; s < src->state_num; s++) {
00255 state = s + soffset;
00256 status = src->st[s].status;
00257 if (state >= dst->maxstatenum) {
00258 dfa_state_expand(dst, state+1);
00259 }
00260
00261 if (status & ACCEPT_S) {
00262 dst->st[state].status |= ACCEPT_S;
00263 }
00264
00265 if (s == 0) {
00266 dst->st[state].status |= INITIAL_S;
00267 }
00268 for (arc = src->st[s].arc; arc; arc = arc->next) {
00269 terminal = arc->label + coffset;
00270 next_state = arc->to_state + soffset;
00271
00272 if (next_state >= dst->maxstatenum) {
00273 dfa_state_expand(dst, next_state+1);
00274 }
00275
00276 newarc = (DFA_ARC *)mymalloc(sizeof(DFA_ARC));
00277 newarc->label = terminal;
00278 newarc->to_state = next_state;
00279 newarc->next = dst->st[state].arc;
00280 dst->st[state].arc = newarc;
00281
00282 dst->arc_num++;
00283 if (dst->term_num < terminal + 1) dst->term_num = terminal + 1;
00284 }
00285 if (dst->state_num < state + 1) dst->state_num = state + 1;
00286 }
00287 }