00001
00024
00025
00026
00027
00028
00029
00030
00031 #include <sent/stddefs.h>
00032 #include <sent/dfa.h>
00033
00046 static int
00047 cp_find(int *list, int len, int key, int *loc)
00048 {
00049 int left, right, mid;
00050 int ret;
00051
00052 if (len == 0) {
00053 *loc = 0;
00054 return -1;
00055 }
00056
00057 left = 0;
00058 right = len - 1;
00059 while (left < right) {
00060 mid = (left + right) / 2;
00061 if (list[mid] < key) {
00062 left = mid + 1;
00063 } else {
00064 right = mid;
00065 }
00066 }
00067 if (list[left] == key) {
00068 *loc = left;
00069 ret = left;
00070 } else {
00071 ret = -1;
00072 if (list[left] > key) {
00073 *loc = left;
00074 } else {
00075 *loc = left + 1;
00076 }
00077 }
00078 return ret;
00079 }
00080
00090 boolean
00091 dfa_cp(DFA_INFO *dfa, int i, int j)
00092 {
00093 int loc;
00094
00095
00096
00097 return(cp_find(dfa->cp[i], dfa->cplen[i], j, &loc) != -1 ? TRUE : FALSE);
00098 }
00099
00108 boolean
00109 dfa_cp_begin(DFA_INFO *dfa, int i)
00110 {
00111 int loc;
00112
00113
00114 return(cp_find(dfa->cp_begin, dfa->cp_begin_len, i, &loc) != -1 ? TRUE : FALSE);
00115 }
00116
00125 boolean
00126 dfa_cp_end(DFA_INFO *dfa, int i)
00127 {
00128 int loc;
00129
00130
00131 return(cp_find(dfa->cp_end, dfa->cp_end_len, i, &loc) != -1 ? TRUE : FALSE);
00132 }
00133
00146 static boolean
00147 cp_add(int **list, int *len, int *alloclen, int val, int loc)
00148 {
00149 if (loc > *len) {
00150 jlog("InternalError: skip?\n");
00151 return FALSE;
00152 }
00153 if (*len + 1 > *alloclen) {
00154
00155 *alloclen *= 2;
00156 *list = (int *)myrealloc(*list, sizeof(int) * (*alloclen));
00157 }
00158 if (loc < *len) {
00159 memmove(&((*list)[loc+1]), &((*list)[loc]), sizeof(int) * (*len - loc));
00160 }
00161 (*list)[loc] = val;
00162 (*len)++;
00163 return TRUE;
00164 }
00165 static boolean
00176 cp_remove(int **list, int *len, int loc)
00177 {
00178 if (*len == 0) return TRUE;
00179 if (loc >= *len) {
00180 jlog("InternalError: skip?\n");
00181 return FALSE;
00182 }
00183 if (loc < *len - 1) {
00184 memmove(&((*list)[loc]), &((*list)[loc+1]), sizeof(int) * (*len - loc - 1));
00185 }
00186 (*len) --;
00187 return TRUE;
00188 }
00189
00190
00199 void
00200 set_dfa_cp(DFA_INFO *dfa, int i, int j, boolean value)
00201 {
00202 int loc;
00203 if (value) {
00204
00205 if (cp_find(dfa->cp[i], dfa->cplen[i], j, &loc) == -1) {
00206 cp_add(&(dfa->cp[i]), &(dfa->cplen[i]), &(dfa->cpalloclen[i]), j, loc);
00207 }
00208 } else {
00209
00210 if (cp_find(dfa->cp[i], dfa->cplen[i], j, &loc) != -1) {
00211 cp_remove(&(dfa->cp[i]), &(dfa->cplen[i]), loc);
00212 }
00213 }
00214 }
00215
00224 void
00225 set_dfa_cp_begin(DFA_INFO *dfa, int i, boolean value)
00226 {
00227 int loc;
00228
00229 if (value) {
00230
00231 if (cp_find(dfa->cp_begin, dfa->cp_begin_len, i, &loc) == -1) {
00232 cp_add(&(dfa->cp_begin), &(dfa->cp_begin_len), &(dfa->cp_begin_alloclen), i, loc);
00233 }
00234 } else {
00235
00236 if (cp_find(dfa->cp_begin, dfa->cp_begin_len, i, &loc) != -1) {
00237 cp_remove(&(dfa->cp_begin), &(dfa->cp_begin_len), loc);
00238 }
00239 }
00240 }
00241
00250 void
00251 set_dfa_cp_end(DFA_INFO *dfa, int i, boolean value)
00252 {
00253 int loc;
00254
00255 if (value) {
00256
00257 if (cp_find(dfa->cp_end, dfa->cp_end_len, i, &loc) == -1) {
00258 cp_add(&(dfa->cp_end), &(dfa->cp_end_len), &(dfa->cp_end_alloclen), i, loc);
00259 }
00260 } else {
00261
00262 if (cp_find(dfa->cp_end, dfa->cp_end_len, i, &loc) != -1) {
00263 cp_remove(&(dfa->cp_end), &(dfa->cp_end_len), loc);
00264 }
00265 }
00266 }
00267
00273 void
00274 init_dfa_cp(DFA_INFO *dfa)
00275 {
00276 dfa->cp = NULL;
00277 }
00278
00286 void
00287 malloc_dfa_cp(DFA_INFO *dfa, int term_num, int size)
00288 {
00289 int i;
00290
00291 dfa->cp = (int **)mymalloc(sizeof(int *) * term_num);
00292 dfa->cplen = (int *)mymalloc(sizeof(int) * term_num);
00293 dfa->cpalloclen = (int *)mymalloc(sizeof(int) * term_num);
00294 for(i=0;i<term_num;i++) {
00295 dfa->cp[i] = (int *)mymalloc(sizeof(int) * size);
00296 dfa->cpalloclen[i] = size;
00297 dfa->cplen[i] = 0;
00298 }
00299 dfa->cp_begin = (int *)mymalloc(sizeof(int) * size);
00300 dfa->cp_begin_alloclen = size;
00301 dfa->cp_begin_len = 0;
00302 dfa->cp_end = (int *)mymalloc(sizeof(int) * size);
00303 dfa->cp_end_alloclen = size;
00304 dfa->cp_end_len = 0;
00305 }
00306
00318 boolean
00319 dfa_cp_append(DFA_INFO *dfa, DFA_INFO *src, int offset)
00320 {
00321 int i, j, size;
00322
00323 if (dfa->cp == NULL) {
00324
00325 if (dfa->term_num != src->term_num) {
00326 jlog("InternalError: dfa_cp_append\n");
00327 return FALSE;
00328 }
00329
00330 dfa->cp = (int **)mymalloc(sizeof(int *) * dfa->term_num);
00331 dfa->cplen = (int *)mymalloc(sizeof(int) * dfa->term_num);
00332 dfa->cpalloclen = (int *)mymalloc(sizeof(int) * dfa->term_num);
00333 for(i=0;i<dfa->term_num;i++) {
00334 size = src->cplen[i];
00335 if (size < DFA_CP_MINSTEP) size = DFA_CP_MINSTEP;
00336 dfa->cp[i] = (int *)mymalloc(sizeof(int) * size);
00337 dfa->cpalloclen[i] = size;
00338 memcpy(dfa->cp[i], src->cp[i], sizeof(int) * src->cplen[i]);
00339 dfa->cplen[i] = src->cplen[i];
00340 }
00341 size = src->cp_begin_len;
00342 if (size < DFA_CP_MINSTEP) size = DFA_CP_MINSTEP;
00343 dfa->cp_begin = (int *)mymalloc(sizeof(int) * size);
00344 dfa->cp_begin_alloclen = size;
00345 memcpy(dfa->cp_begin, src->cp_begin, sizeof(int) * src->cp_begin_len);
00346 dfa->cp_begin_len = src->cp_begin_len;
00347 size = src->cp_end_len;
00348 if (size < DFA_CP_MINSTEP) size = DFA_CP_MINSTEP;
00349 dfa->cp_end = (int *)mymalloc(sizeof(int) * size);
00350 dfa->cp_end_alloclen = size;
00351 memcpy(dfa->cp_end, src->cp_end, sizeof(int) * src->cp_end_len);
00352 dfa->cp_end_len = src->cp_end_len;
00353 return TRUE;
00354 }
00355
00356 dfa->cp = (int **)myrealloc(dfa->cp, sizeof(int *) * dfa->term_num);
00357 dfa->cplen = (int *)myrealloc(dfa->cplen, sizeof(int) * dfa->term_num);
00358 dfa->cpalloclen = (int *)myrealloc(dfa->cpalloclen, sizeof(int) * dfa->term_num);
00359
00360 for(i=offset;i<dfa->term_num;i++) {
00361 size = src->cplen[i-offset];
00362 if (size < DFA_CP_MINSTEP) size = DFA_CP_MINSTEP;
00363 dfa->cp[i] = (int *)mymalloc(sizeof(int) * size);
00364 dfa->cpalloclen[i] = size;
00365 for(j=0;j<src->cplen[i-offset];j++) {
00366 dfa->cp[i][j] = src->cp[i-offset][j] + offset;
00367 }
00368 dfa->cplen[i] = src->cplen[i-offset];
00369 }
00370 if (dfa->cp_begin_alloclen < dfa->cp_begin_len + src->cp_begin_len) {
00371 dfa->cp_begin_alloclen = dfa->cp_begin_len + src->cp_begin_len;
00372 dfa->cp_begin = (int *)myrealloc(dfa->cp_begin, sizeof(int) * dfa->cp_begin_alloclen);
00373 }
00374 for(j=0;j<src->cp_begin_len;j++) {
00375 dfa->cp_begin[dfa->cp_begin_len + j] = src->cp_begin[j] + offset;
00376 }
00377 dfa->cp_begin_len += src->cp_begin_len;
00378 if (dfa->cp_end_alloclen < dfa->cp_end_len + src->cp_end_len) {
00379 dfa->cp_end_alloclen = dfa->cp_end_len + src->cp_end_len;
00380 dfa->cp_end = (int *)myrealloc(dfa->cp_end, sizeof(int) * dfa->cp_end_alloclen);
00381 }
00382 for(j=0;j<src->cp_end_len;j++) {
00383 dfa->cp_end[dfa->cp_end_len + j] = src->cp_end[j] + offset;
00384 }
00385 dfa->cp_end_len += src->cp_end_len;
00386
00387 return TRUE;
00388 }
00389
00395 void
00396 free_dfa_cp(DFA_INFO *dfa)
00397 {
00398 int i;
00399
00400 if (dfa->cp != NULL) {
00401 free(dfa->cp_end);
00402 free(dfa->cp_begin);
00403 for(i=0;i<dfa->term_num;i++) free(dfa->cp[i]);
00404 free(dfa->cpalloclen);
00405 free(dfa->cplen);
00406 free(dfa->cp);
00407 dfa->cp = NULL;
00408 }
00409 }
00410
00411 void
00412 dfa_cp_output_rawdata(FILE *fp, DFA_INFO *dfa)
00413 {
00414 int i, j;
00415
00416 for(i=0;i<dfa->term_num;i++) {
00417 fprintf(fp, "%d:", i);
00418 for(j=0;j<dfa->cplen[i];j++) {
00419 fprintf(fp, " %d", dfa->cp[i][j]);
00420 }
00421 fprintf(fp, "\n");
00422 }
00423 fprintf(fp, "bgn:");
00424 for(j=0;j<dfa->cp_begin_len;j++) {
00425 fprintf(fp, " %d", dfa->cp_begin[j]);
00426 }
00427 fprintf(fp, "\n");
00428 fprintf(fp, "end:");
00429 for(j=0;j<dfa->cp_end_len;j++) {
00430 fprintf(fp, " %d", dfa->cp_end[j]);
00431 }
00432 fprintf(fp, "\n");
00433 }
00434
00435 void
00436 dfa_cp_count_size(DFA_INFO *dfa, unsigned long *size_ret, unsigned long *allocsize_ret)
00437 {
00438 int i;
00439 unsigned long size, allocsize;
00440
00441 size = 0;
00442 allocsize = 0;
00443 for(i=0;i<dfa->term_num;i++) {
00444 size += sizeof(int) * dfa->cplen[i];
00445 allocsize += sizeof(int) * dfa->cpalloclen[i];
00446 }
00447 size += sizeof(int) * dfa->cp_begin_len;
00448 allocsize += sizeof(int) * dfa->cp_begin_alloclen;
00449 size += sizeof(int) * dfa->cp_end_len;
00450 allocsize += sizeof(int) * dfa->cp_end_alloclen;
00451
00452 allocsize += (sizeof(int *) + sizeof(int) + sizeof(int)) * dfa->term_num;
00453
00454 *size_ret = size;
00455 *allocsize_ret = allocsize;
00456 }