libsent/src/dfa/rddfa.c

Go to the documentation of this file.
00001 
00018 /*
00019  * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University
00020  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00021  * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
00022  * All rights reserved
00023  */
00024 
00025 #include <sent/stddefs.h>
00026 #include <sent/dfa.h>
00027 
00028 static char buf[MAXLINELEN];    
00029 
00035 void
00036 dfa_state_init(DFA_INFO *dinfo)
00037 {
00038   int i;
00039   dinfo->maxstatenum = DFA_STATESTEP;
00040   dinfo->st = (DFA_STATE *)mymalloc(sizeof(DFA_STATE) * dinfo->maxstatenum);
00041   for (i=0;i<dinfo->maxstatenum;i++) {
00042     dinfo->st[i].number = i;
00043     dinfo->st[i].status = 0;
00044     dinfo->st[i].arc = NULL;
00045   }
00046   dinfo->state_num = dinfo->arc_num = dinfo->term_num = 0;
00047   dinfo->sp_id = WORD_INVALID;
00048 }
00049 
00056 void
00057 dfa_state_expand(DFA_INFO *dinfo, int needed)
00058 {
00059   int oldnum, i;
00060   oldnum = dinfo->maxstatenum;
00061   dinfo->maxstatenum += DFA_STATESTEP;
00062   if (dinfo->maxstatenum < needed) dinfo->maxstatenum = needed;
00063   dinfo->st = (DFA_STATE *)myrealloc(dinfo->st, sizeof(DFA_STATE) * dinfo->maxstatenum);
00064   for (i=oldnum;i<dinfo->maxstatenum;i++) {
00065     dinfo->st[i].number = i;
00066     dinfo->st[i].status = 0;
00067     dinfo->st[i].arc = NULL;
00068   }
00069 }
00070 
00079 boolean
00080 rddfa(FILE *fp, DFA_INFO *dinfo)
00081 {
00082   int state_max, arc_num, terminal_max;
00083 
00084   /* initialize */
00085   dfa_state_init(dinfo);
00086   state_max = 0;
00087   arc_num = 0;
00088   terminal_max = 0;
00089 
00090   while (getl(buf, MAXLINELEN, fp) != NULL) {
00091     if (rddfa_line(buf, dinfo, &state_max, &arc_num, &terminal_max) == FALSE) {
00092       break;
00093     }
00094   }
00095   dinfo->state_num = state_max + 1;
00096   dinfo->arc_num = arc_num;
00097   dinfo->term_num = terminal_max + 1;
00098   return(TRUE);
00099 }
00100 
00109 boolean
00110 rddfa_fd(int fd, DFA_INFO *dinfo)
00111 {
00112   int state_max, arc_num, terminal_max;
00113 
00114   /* initialize */
00115   dfa_state_init(dinfo);
00116   state_max = 0;
00117   arc_num = 0;
00118   terminal_max = 0;
00119 
00120   while(getl_fd(buf, MAXLINELEN, fd) != NULL) {
00121     if (rddfa_line(buf, dinfo, &state_max, &arc_num, &terminal_max) == FALSE) {
00122       break;
00123     }
00124   }
00125   dinfo->state_num = state_max + 1;
00126   dinfo->arc_num = arc_num;
00127   dinfo->term_num = terminal_max + 1;
00128   return(TRUE);
00129 }
00130 
00139 boolean
00140 rddfa_sd(int sd, DFA_INFO *dinfo)
00141 {
00142   int state_max, arc_num, terminal_max;
00143 
00144   /* initialize */
00145   dfa_state_init(dinfo);
00146   state_max = 0;
00147   arc_num = 0;
00148   terminal_max = 0;
00149 
00150   while(getl_sd(buf, MAXLINELEN, sd) != NULL) {
00151     if (rddfa_line(buf, dinfo, &state_max, &arc_num, &terminal_max) == FALSE) {
00152       break;
00153     }
00154   }
00155   dinfo->state_num = state_max + 1;
00156   dinfo->arc_num = arc_num;
00157   dinfo->term_num = terminal_max + 1;
00158   return(TRUE);
00159 }
00160 
00172 boolean
00173 rddfa_line(char *line, DFA_INFO *dinfo, int *state_max, int *arc_num, int *terminal_max)
00174 {
00175   DFA_ARC *newarc;
00176   int state, terminal, next_state;
00177   unsigned int status;
00178   char *p;
00179 
00180   if (strmatch(buf, "DFAEND")) return(FALSE);
00181   /* format: state terminalID nextstate statuscode_of_state */
00182   if ((p = strtok(line, DELM)) == NULL) {
00183     jlog("Error: rddfa: failed to parse, corrupted or invalid data?\n");
00184     return FALSE;
00185   }
00186   state = atoi(p);
00187   if ((p = strtok(NULL, DELM)) == NULL) {
00188     jlog("Error: rddfa: failed to parse, corrupted or invalid data?\n");
00189     return FALSE;
00190   }
00191   terminal = atoi(p);
00192   if ((p = strtok(NULL, DELM)) == NULL) {
00193     jlog("Error: rddfa: failed to parse, corrupted or invalid data?\n");
00194     return FALSE;
00195   }
00196   next_state = atoi(p);
00197   if ((p = strtok(NULL, DELM)) == NULL) {
00198     jlog("Error: rddfa: failed to parse, corrupted or invalid data?\n");
00199     return FALSE;
00200   }
00201   sscanf(p, "%x", &status);
00202 
00203   if (state >= dinfo->maxstatenum) {      /* expand */
00204     dfa_state_expand(dinfo, state+1);
00205   }
00206   if (next_state >= dinfo->maxstatenum) { /* expand */
00207     dfa_state_expand(dinfo, next_state+1);
00208   }
00209 
00210   /* set state status (accept / initial) */
00211   if (status & ACCEPT_S) {
00212     dinfo->st[state].status |= ACCEPT_S;
00213   }
00214   /* the state #0 is an initial state */
00215   if (state == 0) {
00216     dinfo->st[state].status |= INITIAL_S;
00217   }
00218   
00219   /* skip line with negative terminalID/nextstate */
00220   if (terminal > 0 || next_state > 0) {
00221     /* add new arc to the state */
00222     newarc = (DFA_ARC *)mymalloc(sizeof(DFA_ARC));
00223     newarc->label    = terminal;
00224     newarc->to_state = next_state;
00225     newarc->next     = dinfo->st[state].arc;
00226     dinfo->st[state].arc = newarc;
00227     (*arc_num)++;
00228   }
00229 
00230   if (*state_max < state) *state_max = state;
00231   if (*terminal_max < terminal) *terminal_max = terminal;
00232 
00233   return(TRUE);
00234 }
00235 
00236 /* append dfa info to other */
00237 /* soffset: state offset  coffset: category(terminal) offset */
00238 
00247 void
00248 dfa_append(DFA_INFO *dst, DFA_INFO *src, int soffset, int coffset)
00249 {
00250   DFA_ARC *arc, *newarc;
00251   int s, state, terminal, next_state;
00252   unsigned int status;
00253 
00254   for (s = 0; s < src->state_num; s++) {
00255     state = s + soffset;
00256     status = src->st[s].status;
00257     if (state >= dst->maxstatenum) {      /* expand */
00258       dfa_state_expand(dst, state+1);
00259     }
00260     /* set state status (accept / initial) */
00261     if (status & ACCEPT_S) {
00262       dst->st[state].status |= ACCEPT_S;
00263     }
00264     /* the state #0 is an initial state */
00265     if (s == 0) {
00266       dst->st[state].status |= INITIAL_S;
00267     }
00268     for (arc = src->st[s].arc; arc; arc = arc->next) {
00269       terminal = arc->label + coffset;
00270       next_state = arc->to_state + soffset;
00271 
00272       if (next_state >= dst->maxstatenum) { /* expand */
00273         dfa_state_expand(dst, next_state+1);
00274       }
00275       /* add new arc to the state */
00276       newarc = (DFA_ARC *)mymalloc(sizeof(DFA_ARC));
00277       newarc->label    = terminal;
00278       newarc->to_state = next_state;
00279       newarc->next     = dst->st[state].arc;
00280       dst->st[state].arc = newarc;
00281 
00282       dst->arc_num++;
00283       if (dst->term_num < terminal + 1) dst->term_num = terminal + 1;
00284     }
00285     if (dst->state_num < state + 1) dst->state_num = state + 1;
00286   }
00287 }

Generated on Tue Dec 18 15:59:55 2007 for Julius by  doxygen 1.5.4