00001
00052
00053
00054
00055
00056
00057
00058
00059
00060 #include <julius.h>
00061
00062 #ifdef USE_DFA
00063
00065 #define MDEBUG
00066
00072 static DFA_INFO *global_dfa = NULL;
00078 static WORD_INFO *global_winfo = NULL;
00084 static int gram_maxid = 0;
00085
00114 static void
00115 multigram_setup(DFA_INFO *d, WORD_INFO *w)
00116 {
00117 if (d == NULL || w == NULL) {
00118
00119 dfa = NULL;
00120 winfo = NULL;
00121 return;
00122 }
00123
00124
00125 dfa = d;
00126 winfo = w;
00127
00128
00129 if (wchmm != NULL) {
00130 wchmm_free(wchmm);
00131 }
00132 wchmm = wchmm_new();
00133 wchmm->dfa = d;
00134 wchmm->winfo = w;
00135 wchmm->hmminfo = hmminfo;
00136 #ifdef CATEGORY_TREE
00137 if (old_tree_function_flag) {
00138 build_wchmm(wchmm);
00139 } else {
00140 build_wchmm2(wchmm);
00141 }
00142 #else
00143 build_wchmm2(wchmm);
00144 #endif
00145
00146
00147 trellis_beam_width = set_beam_width(wchmm, specified_trellis_beam_width);
00148 if (specified_trellis_beam_width == 0) {
00149 j_printf("now beam width = %d (full)\n", trellis_beam_width);
00150 } else if (specified_trellis_beam_width == -1) {
00151 j_printf("now beam width = %d (guess)\n", trellis_beam_width);
00152 }
00153
00154 #ifdef USE_NGRAM
00155
00156 max_successor_cache_free();
00157 max_successor_cache_init(wchmm);
00158 #endif
00159
00160
00161 }
00162
00164 static char *hookstr[] = {"", "delete", "activate", "deactivate"};
00175 static void
00176 print_all_gram()
00177 {
00178 MULTIGRAM *m;
00179
00180 j_printf("[grammars]\n");
00181 for(m=gramlist;m;m=m->next) {
00182 j_printf(" #%2d: [%-11s] %4d words, %3d categories, %4d nodes",
00183 m->id,
00184 m->active ? "active" : "inactive",
00185 m->winfo->num, m->dfa->term_num, m->dfa->state_num);
00186 if (m->newbie) j_printf(" (new)");
00187 if (m->hook != MULTIGRAM_DEFAULT) {
00188 j_printf(" (next: %s)", hookstr[m->hook]);
00189 }
00190 j_printf(" \"%s\"\n", m->name);
00191 }
00192 if (global_dfa != NULL) {
00193 j_printf(" Global: %4d words, %3d categories, %4d nodes\n",
00194 global_winfo->num, global_dfa->term_num, global_dfa->state_num);
00195 }
00196 }
00197
00208 static void
00209 send_gram_info()
00210 {
00211 MULTIGRAM *m;
00212
00213 module_send(module_sd, "<GRAMINFO>\n");
00214 for(m=gramlist;m;m=m->next) {
00215 module_send(module_sd, " #%2d: [%-11s] %4d words, %3d categories, %4d nodes",
00216 m->id,
00217 m->active ? "active" : "inactive",
00218 m->winfo->num, m->dfa->term_num, m->dfa->state_num);
00219 if (m->newbie) module_send(module_sd, " (new)");
00220 if (m->hook != MULTIGRAM_DEFAULT) {
00221 module_send(module_sd, " (next: %s)", hookstr[m->hook]);
00222 }
00223 module_send(module_sd, " \"%s\"\n", m->name);
00224 }
00225 if (global_dfa != NULL) {
00226 module_send(module_sd, " Global: %4d words, %3d categories, %4d nodes\n",
00227 global_winfo->num, global_dfa->term_num, global_dfa->state_num);
00228 }
00229 module_send(module_sd, "</GRAMINFO>\n.\n");
00230 }
00231
00248 static void
00249 multigram_build_append(DFA_INFO *gdfa, WORD_INFO *gwinfo, MULTIGRAM *m)
00250 {
00251
00252 m->state_begin = gdfa->state_num;
00253 m->cate_begin = gdfa->term_num;
00254 m->word_begin = gwinfo->num;
00255
00256
00257
00258
00259 dfa_append(gdfa, m->dfa, m->state_begin, m->cate_begin);
00260
00261 voca_append(gwinfo, m->winfo, m->cate_begin, m->word_begin);
00262
00263 terminfo_append(&(gdfa->term), &(m->dfa->term), m->cate_begin, m->word_begin);
00264
00265
00266 cpair_append(gdfa, m->dfa, m->cate_begin);
00267
00268 dfa_pause_word_append(gdfa, m->dfa, m->cate_begin);
00269 #ifdef MDEBUG
00270 j_printf("- Gram #%d: installed\n", m->id);
00271 #endif
00272 }
00273
00290 void
00291 multigram_add(DFA_INFO *dfa, WORD_INFO *winfo, char *name)
00292 {
00293 MULTIGRAM *new;
00294
00295
00296 new = (MULTIGRAM *)mymalloc(sizeof(MULTIGRAM));
00297 if (name != NULL) {
00298 strncpy(new->name, name, MAXGRAMNAMELEN);
00299 } else {
00300 strncpy(new->name, "(no name)", MAXGRAMNAMELEN);
00301 }
00302
00303 new->id = gram_maxid;
00304 new->dfa = dfa;
00305 new->winfo = winfo;
00306 new->hook = MULTIGRAM_DEFAULT;
00307 new->newbie = TRUE;
00308 new->active = TRUE;
00309
00310
00311 new->next = gramlist;
00312 gramlist = new;
00313
00314 j_printf("- Gram #%d: read\n", new->id);
00315 if (module_mode) {
00316 send_gram_info();
00317 }
00318 #ifdef MDEBUG
00319 print_all_gram();
00320 #endif
00321 gram_maxid++;
00322 }
00323
00341 boolean
00342 multigram_delete(int delid)
00343 {
00344 MULTIGRAM *m;
00345 for(m=gramlist;m;m=m->next) {
00346 if (m->id == delid) {
00347 m->hook = MULTIGRAM_DELETE;
00348 j_printf("- Gram #%d: marked delete\n", m->id);
00349 break;
00350 }
00351 }
00352 if (! m) {
00353 j_printf("- Gram #%d: not found\n", delid);
00354 if (module_mode) {
00355 module_send(module_sd, "<ERROR MESSAGE=\"Gram #%d not found\"/>\n.\n", delid);
00356 }
00357 return FALSE;
00358 }
00359 return TRUE;
00360 }
00361
00371 void
00372 multigram_delete_all()
00373 {
00374 MULTIGRAM *m;
00375 for(m=gramlist;m;m=m->next) {
00376 m->hook = MULTIGRAM_DELETE;
00377 }
00378 }
00379
00392 static boolean
00393 multigram_exec_delete()
00394 {
00395 MULTIGRAM *m, *mtmp, *mprev;
00396 boolean ret_flag = FALSE;
00397 #ifdef MDEBUG
00398 int n;
00399 #endif
00400
00401
00402 mprev = NULL;
00403 m = gramlist;
00404 while(m) {
00405 mtmp = m->next;
00406 if (m->hook == MULTIGRAM_DELETE) {
00407
00408
00409 if (! m->newbie) ret_flag = TRUE;
00410 dfa_info_free(m->dfa);
00411 word_info_free(m->winfo);
00412 n=m->id;
00413 free(m);
00414 j_printf("- Gram #%d: purged\n", n);
00415 if (mprev != NULL) {
00416 mprev->next = mtmp;
00417 } else {
00418 gramlist = mtmp;
00419 }
00420 } else {
00421 mprev = m;
00422 }
00423 m = mtmp;
00424 }
00425
00426 return(ret_flag);
00427 }
00428
00443 void
00444 multigram_activate(int gid)
00445 {
00446 MULTIGRAM *m;
00447 for(m=gramlist;m;m=m->next) {
00448 if (m->id == gid) {
00449 if (m->hook == MULTIGRAM_ACTIVATE) {
00450 j_printf("- Gram #%d: already active\n", m->id);
00451 if (module_mode) {
00452 module_send(module_sd, "<WARN MESSAGE=\"Gram #%d already active\"/>\n.\n", m->id);
00453 }
00454 } else {
00455 m->hook = MULTIGRAM_ACTIVATE;
00456 j_printf("- Gram #%d: marked activate\n", m->id);
00457 }
00458 break;
00459 }
00460 }
00461 if (! m) {
00462 j_printf("- Gram #%d: not found, activation ignored\n", gid);
00463 if (module_mode) {
00464 module_send(module_sd, "<WARN MESSAGE=\"Gram #%d not found\"/>\n.\n", gid);
00465 }
00466 }
00467 }
00468
00489 void
00490 multigram_deactivate(int gid)
00491 {
00492 MULTIGRAM *m;
00493 for(m=gramlist;m;m=m->next) {
00494 if (m->id == gid) {
00495 m->hook = MULTIGRAM_DEACTIVATE;
00496 j_printf("- Gram #%d: marked deactivate\n", m->id);
00497 break;
00498 }
00499 }
00500 if (! m) {
00501 j_printf("- Gram #%d: not found, deactivation ignored\n", gid);
00502 if (module_mode) {
00503 module_send(module_sd, "<WARN MESSAGE=\"Gram #%d not found\"/>\n.\n", gid);
00504 }
00505 }
00506 }
00507
00522 static boolean
00523 multigram_exec_activate()
00524 {
00525 MULTIGRAM *m;
00526 boolean modified;
00527
00528 modified = FALSE;
00529 for(m=gramlist;m;m=m->next) {
00530 if (m->hook == MULTIGRAM_ACTIVATE) {
00531 m->hook = MULTIGRAM_DEFAULT;
00532 if (!m->active) {
00533 j_printf("- Gram #%d: turn on active\n", m->id);
00534 }
00535 m->active = TRUE;
00536 modified = TRUE;
00537 } else if (m->hook == MULTIGRAM_DEACTIVATE) {
00538 m->hook = MULTIGRAM_DEFAULT;
00539 if (m->active) {
00540 j_printf("- Gram #%d: turn off inactive\n, m->id");
00541 }
00542 m->active = FALSE;
00543 modified = TRUE;
00544 }
00545 }
00546 return(modified);
00547 }
00548
00549
00550
00551
00586 boolean
00587 multigram_exec()
00588 {
00589 MULTIGRAM *m;
00590 boolean global_modified = FALSE;
00591 boolean active_changed = FALSE;
00592
00593 #ifdef MDEBUG
00594 j_printf("- Grammar update check\n");
00595 #endif
00596
00597
00598 for(m=gramlist;m;m=m->next) {
00599 if (m->newbie) {
00600
00601 make_dfa_voca_ref(m->dfa, m->winfo);
00602
00603 dfa_find_pause_word(m->dfa, m->winfo, hmminfo);
00604
00605 extract_cpair(m->dfa);
00606 }
00607 }
00608
00609
00610 if (multigram_exec_delete()) {
00611
00612
00613
00614 #ifdef MDEBUG
00615 j_printf("- Re-build whole global grammar...\n");
00616 #endif
00617 if (global_dfa != NULL) {
00618 dfa_info_free(global_dfa);
00619 word_info_free(global_winfo);
00620 global_dfa = NULL;
00621 }
00622 for(m=gramlist;m;m=m->next) {
00623 if (global_dfa == NULL) {
00624 global_dfa = dfa_info_new();
00625 dfa_state_init(global_dfa);
00626 global_winfo = word_info_new();
00627 winfo_init(global_winfo);
00628 }
00629 if (m->newbie) m->newbie = FALSE;
00630 multigram_build_append(global_dfa, global_winfo, m);
00631 }
00632 global_modified = TRUE;
00633 } else {
00634
00635 for(m=gramlist;m;m=m->next) {
00636 if (m->newbie) {
00637 if (global_dfa == NULL) {
00638 global_dfa = dfa_info_new();
00639 dfa_state_init(global_dfa);
00640 global_winfo = word_info_new();
00641 winfo_init(global_winfo);
00642 }
00643 if (m->newbie) m->newbie = FALSE;
00644 multigram_build_append(global_dfa, global_winfo, m);
00645 global_modified = TRUE;
00646 }
00647 }
00648 }
00649
00650
00651 active_changed = multigram_exec_activate();
00652
00653 if (global_modified) {
00654
00655
00656 multigram_setup(global_dfa, global_winfo);
00657 #ifdef MDEBUG
00658 j_printf("- update completed\n");
00659 #endif
00660 }
00661
00662
00663 if (global_modified || active_changed) {
00664 print_all_gram();
00665 if (module_mode) {
00666 send_gram_info();
00667 }
00668 }
00669
00670 return(TRUE);
00671 }
00672
00673
00688 void
00689 multigram_read_file(char *dfa_file, char *dict_file)
00690 {
00691 WORD_INFO *new_winfo;
00692 DFA_INFO *new_dfa;
00693 char buf[MAXGRAMNAMELEN], *p, *q;
00694
00695 j_printf("reading [%s] and [%s]...\n", dfa_file, dict_file);
00696
00697
00698 new_winfo = word_info_new();
00699 if ( !
00700 #ifdef MONOTREE
00701
00702 init_voca(new_winfo, dict_file, hmminfo, TRUE, forcedict_flag)
00703 #else
00704 init_voca(new_winfo, dict_file, hmminfo, FALSE, forcedict_flag)
00705 #endif
00706 ) {
00707 j_error("ERROR: failed to read dictionary, terminated\n");
00708 }
00709 #ifdef PASS1_IWCD
00710 if (triphone_check_flag && hmminfo->is_triphone) {
00711
00712 hmm_check(hmminfo, new_winfo);
00713 }
00714 #endif
00715
00716
00717 new_dfa = dfa_info_new();
00718 init_dfa(new_dfa, dfa_file);
00719
00720
00721 p = &(dfa_file[0]);
00722 q = p;
00723 while(*p != '\0') {
00724 if (*p == '/') q = p + 1;
00725 p++;
00726 }
00727 p = q;
00728 while(*p != '\0' && *p != '.') {
00729 buf[p-q] = *p;
00730 p++;
00731 }
00732 buf[p-q] = '\0';
00733
00734
00735 multigram_add(new_dfa, new_winfo, buf);
00736
00737 j_printf("gram \"%s\" registered\n", buf);
00738
00739 }
00740
00755 void
00756 multigram_add_gramlist(char *dfafile, char *dictfile)
00757 {
00758 GRAMLIST *new;
00759
00760 new = (GRAMLIST *)mymalloc(sizeof(GRAMLIST));
00761 new->dfafile = strcpy((char *)mymalloc(strlen(dfafile)+1), dfafile);
00762 new->dictfile = strcpy((char *)mymalloc(strlen(dictfile)+1), dictfile);
00763 new->next = gramlist_root;
00764 gramlist_root = new;
00765 }
00766
00777 void
00778 multigram_remove_gramlist()
00779 {
00780 GRAMLIST *g;
00781 GRAMLIST *tmp;
00782
00783 g = gramlist_root;
00784 while (g) {
00785 tmp = g->next;
00786 free(g->dfafile);
00787 free(g->dictfile);
00788 free(g);
00789 g = tmp;
00790 }
00791 gramlist_root = NULL;
00792 }
00793
00804 void
00805 multigram_read_all_gramlist()
00806 {
00807 GRAMLIST *g;
00808
00809 for(g = gramlist_root; g; g = g->next) {
00810 multigram_read_file(g->dfafile, g->dictfile);
00811 }
00812 }
00813
00839 void
00840 multigram_add_prefix_list(char *prefix_list, char *cwd)
00841 {
00842 char buf[MAXGRAMNAMELEN], *p, *q;
00843 char buf2_d[MAXGRAMNAMELEN], *buf_d;
00844 char buf2_v[MAXGRAMNAMELEN], *buf_v;
00845
00846 if (prefix_list == NULL) return;
00847
00848 p = &(prefix_list[0]);
00849
00850 while(*p != '\0') {
00851
00852 q = p;
00853 while(*p != '\0' && *p != ',') {
00854 buf[p-q] = *p;
00855 p++;
00856 }
00857 buf[p-q] = '\0';
00858
00859
00860 strcpy(buf2_d, buf);
00861 strcat(buf2_d, ".dfa");
00862 buf_d = filepath(buf2_d, cwd);
00863 checkpath(buf_d);
00864 strcpy(buf2_v, buf);
00865 strcat(buf2_v, ".dict");
00866 buf_v = filepath(buf2_v, cwd);
00867 checkpath(buf_v);
00868 multigram_add_gramlist(buf_d, buf_v);
00869
00870
00871 if (*p == ',') p++;
00872 }
00873 }
00874
00901 void
00902 multigram_add_prefix_filelist(char *listfile)
00903 {
00904 FILE *fp;
00905 char buf[MAXGRAMNAMELEN], *p, *src_bgn, *src_end, *dst;
00906 char *cdir;
00907 char buf2_d[MAXGRAMNAMELEN], *buf_d;
00908 char buf2_v[MAXGRAMNAMELEN], *buf_v;
00909
00910 if (listfile == NULL) return;
00911 if ((fp = fopen(listfile, "r")) == NULL) {
00912 j_printerr("failed to open %s\n", listfile);
00913 return;
00914 }
00915 while(getl_fp(buf, MAXGRAMNAMELEN, fp) != NULL) {
00916
00917 p = &(buf[0]);
00918 while(*p != '\0') {
00919 if (*p == '#') {
00920 *p = '\0';
00921 break;
00922 }
00923 p++;
00924 }
00925 if (buf[0] == '\0') continue;
00926
00927
00928 p = (&buf[0]);
00929 while(*p == ' ' || *p == '\t' || *p == '\r') p++;
00930 if (*p == '\0') continue;
00931 src_bgn = p;
00932 p = (&buf[strlen(buf) - 1]);
00933 while((*p == ' ' || *p == '\t' || *p == '\r') && p > src_bgn) p--;
00934 src_end = p;
00935 dst = (&buf[0]);
00936 p = src_bgn;
00937 while(p <= src_end) *dst++ = *p++;
00938 *dst = '\0';
00939 if (buf[0] == '\0') continue;
00940
00941
00942
00943 cdir = strcpy((char *)mymalloc(strlen(listfile)+1), listfile);
00944 get_dirname(cdir);
00945 strcpy(buf2_d, buf);
00946 strcat(buf2_d, ".dfa");
00947 buf_d = filepath(buf2_d, cdir);
00948 checkpath(buf_d);
00949 strcpy(buf2_v, buf);
00950 strcat(buf2_v, ".dict");
00951 buf_v = filepath(buf2_v, cdir);
00952 checkpath(buf_v);
00953 multigram_add_gramlist(buf_d, buf_v);
00954 free(cdir);
00955 }
00956 fclose(fp);
00957 }
00958
00971 int
00972 multigram_get_all_num()
00973 {
00974 MULTIGRAM *m;
00975 int cnt;
00976
00977 cnt = 0;
00978 for(m=gramlist;m;m=m->next) cnt++;
00979 return(cnt);
00980 }
00981
00998 int
00999 multigram_get_gram_from_category(int category)
01000 {
01001 MULTIGRAM *m;
01002 int tb, te;
01003 for(m = gramlist; m; m = m->next) {
01004 if (m->newbie) continue;
01005 tb = m->cate_begin;
01006 te = tb + m->dfa->term_num;
01007 if (tb <= category && category < te) {
01008 return(m->id);
01009 }
01010 }
01011 return(-1);
01012 }
01013
01014 #endif