00001
00053
00054
00055
00056
00057
00058
00059
00060
00061 #include <julius/julius.h>
00062
00064 #define MDEBUG
00065
00088 static boolean
00089 multigram_rebuild_wchmm(RecogProcess *r)
00090 {
00091 boolean ret;
00092
00093
00094 if (r->wchmm != NULL) {
00095 wchmm_free(r->wchmm);
00096 }
00097 r->wchmm = wchmm_new();
00098 r->wchmm->lmtype = r->lmtype;
00099 r->wchmm->lmvar = r->lmvar;
00100 r->wchmm->ccd_flag = r->ccd_flag;
00101 r->wchmm->category_tree = TRUE;
00102 r->wchmm->hmmwrk = &(r->am->hmmwrk);
00103
00104 r->wchmm->dfa = r->lm->dfa;
00105 r->wchmm->winfo = r->lm->winfo;
00106 r->wchmm->hmminfo = r->am->hmminfo;
00107 if (r->wchmm->category_tree) {
00108 if (r->config->pass1.old_tree_function_flag) {
00109 ret = build_wchmm(r->wchmm, r->lm->config);
00110 } else {
00111 ret = build_wchmm2(r->wchmm, r->lm->config);
00112 }
00113 } else {
00114 ret = build_wchmm2(r->wchmm, r->lm->config);
00115 }
00116
00117
00118 if (r->config->sw.wchmm_check_flag) {
00119 wchmm_check_interactive(r->wchmm);
00120 }
00121
00122 if (ret == FALSE) {
00123 jlog("ERROR: multi-gram: failed to build (global) lexicon tree for recognition\n");
00124 return FALSE;
00125 }
00126
00127
00128 r->trellis_beam_width = set_beam_width(r->wchmm, r->config->pass1.specified_trellis_beam_width);
00129 switch(r->config->pass1.specified_trellis_beam_width) {
00130 case 0:
00131 jlog("STAT: multi-gram: beam width set to %d (full) by lexicon change\n", r->trellis_beam_width);
00132 break;
00133 case -1:
00134 jlog("STAT: multi-gram: beam width set to %d (guess) by lexicon change\n", r->trellis_beam_width);
00135 }
00136
00137
00138
00139
00140
00141
00142
00143
00144 return TRUE;
00145 }
00146
00172 boolean
00173 multigram_build(RecogProcess *r)
00174 {
00175 if (r->lm->winfo != NULL) {
00176
00177 if (multigram_rebuild_wchmm(r) == FALSE) {
00178 jlog("ERROR: multi-gram: failed to re-build tree lexicon\n");
00179 return FALSE;
00180 }
00181 #ifdef MDEBUG
00182 jlog("STAT: wchmm (re)build completed\n");
00183 #endif
00184 }
00185 return(TRUE);
00186 }
00187
00210 static boolean
00211 multigram_append_to_global(DFA_INFO *gdfa, WORD_INFO *gwinfo, MULTIGRAM *m)
00212 {
00213
00214 m->state_begin = gdfa->state_num;
00215 m->cate_begin = gdfa->term_num;
00216 m->word_begin = gwinfo->num;
00217
00218
00219
00220
00221 dfa_append(gdfa, m->dfa, m->state_begin, m->cate_begin);
00222
00223 if (voca_append(gwinfo, m->winfo, m->cate_begin, m->word_begin) == FALSE) {
00224 return FALSE;
00225 }
00226
00227 terminfo_append(&(gdfa->term), &(m->dfa->term), m->cate_begin, m->word_begin);
00228
00229
00230 if (cpair_append(gdfa, m->dfa, m->cate_begin) == FALSE) {
00231 return FALSE;
00232 }
00233
00234 if (dfa_pause_word_append(gdfa, m->dfa, m->cate_begin) == FALSE) {
00235 return FALSE;
00236 }
00237
00238 jlog("STAT: Gram #%d %s: installed\n", m->id, m->name);
00239
00240 return TRUE;
00241 }
00242
00276 int
00277 multigram_add(DFA_INFO *dfa, WORD_INFO *winfo, char *name, PROCESS_LM *lm)
00278 {
00279 MULTIGRAM *new;
00280
00281
00282 new = (MULTIGRAM *)mymalloc(sizeof(MULTIGRAM));
00283 if (name != NULL) {
00284 strncpy(new->name, name, MAXGRAMNAMELEN);
00285 } else {
00286 strncpy(new->name, "(no name)", MAXGRAMNAMELEN);
00287 }
00288
00289 new->id = lm->gram_maxid;
00290 new->dfa = dfa;
00291 new->winfo = winfo;
00292 new->hook = MULTIGRAM_DEFAULT;
00293 new->newbie = TRUE;
00294 new->active = TRUE;
00295
00296
00297 new->next = lm->grammars;
00298 lm->grammars = new;
00299
00300 jlog("STAT: Gram #%d %s registered\n", new->id, new->name);
00301 lm->gram_maxid++;
00302
00303 return new->id;
00304 }
00305
00331 boolean
00332 multigram_delete(int delid, PROCESS_LM *lm)
00333 {
00334 MULTIGRAM *m;
00335 for(m=lm->grammars;m;m=m->next) {
00336 if (m->id == delid) {
00337 m->hook |= MULTIGRAM_DELETE;
00338 jlog("STAT: Gram #%d %s: marked delete\n", m->id, m->name);
00339 break;
00340 }
00341 }
00342 if (! m) {
00343 jlog("STAT: Gram #%d: not found\n", delid);
00344 return FALSE;
00345 }
00346 return TRUE;
00347 }
00348
00364 void
00365 multigram_delete_all(PROCESS_LM *lm)
00366 {
00367 MULTIGRAM *m;
00368 for(m=lm->grammars;m;m=m->next) {
00369 m->hook |= MULTIGRAM_DELETE;
00370 }
00371 }
00372
00389 static boolean
00390 multigram_exec_delete(PROCESS_LM *lm)
00391 {
00392 MULTIGRAM *m, *mtmp, *mprev;
00393 boolean ret_flag = FALSE;
00394
00395
00396 mprev = NULL;
00397 m = lm->grammars;
00398 while(m) {
00399 mtmp = m->next;
00400 if (m->hook & MULTIGRAM_DELETE) {
00401
00402
00403 if (! m->newbie) ret_flag = TRUE;
00404 if (m->dfa) dfa_info_free(m->dfa);
00405 word_info_free(m->winfo);
00406 jlog("STAT: Gram #%d %s: purged\n", m->id, m->name);
00407 free(m);
00408 if (mprev != NULL) {
00409 mprev->next = mtmp;
00410 } else {
00411 lm->grammars = mtmp;
00412 }
00413 } else {
00414 mprev = m;
00415 }
00416 m = mtmp;
00417 }
00418
00419 return(ret_flag);
00420 }
00421
00444 int
00445 multigram_activate(int gid, PROCESS_LM *lm)
00446 {
00447 MULTIGRAM *m;
00448 int ret;
00449
00450 for(m=lm->grammars;m;m=m->next) {
00451 if (m->id == gid) {
00452 if (m->hook & MULTIGRAM_DEACTIVATE) {
00453 ret = 0;
00454 m->hook &= ~(MULTIGRAM_DEACTIVATE);
00455 m->hook |= MULTIGRAM_ACTIVATE;
00456 jlog("STAT: Gram #%d %s: marked active, superceding deactivate\n", m->id, m->name);
00457 } else {
00458 if (m->hook & MULTIGRAM_ACTIVATE) {
00459 jlog("STAT: Gram #%d %s: already marked active\n", m->id, m->name);
00460 ret = 1;
00461 } else {
00462 ret = 0;
00463 m->hook |= MULTIGRAM_ACTIVATE;
00464 jlog("STAT: Gram #%d %s: marked activate\n", m->id, m->name);
00465 }
00466 }
00467 break;
00468 }
00469 }
00470 if (! m) {
00471 jlog("WARNING: Gram #%d: not found, activation ignored\n", gid);
00472 ret = -1;
00473 }
00474
00475 return(ret);
00476 }
00477
00506 int
00507 multigram_deactivate(int gid, PROCESS_LM *lm)
00508 {
00509 MULTIGRAM *m;
00510 int ret;
00511
00512 for(m=lm->grammars;m;m=m->next) {
00513 if (m->id == gid) {
00514 if (m->hook & MULTIGRAM_ACTIVATE) {
00515 ret = 0;
00516 m->hook &= ~(MULTIGRAM_ACTIVATE);
00517 m->hook |= MULTIGRAM_DEACTIVATE;
00518 jlog("STAT: Gram #%d %s: marked deactivate, superceding activate\n", m->id, m->name);
00519 } else {
00520 if (m->hook & MULTIGRAM_DEACTIVATE) {
00521 jlog("STAT: Gram #%d %s: already marked deactivate\n", m->id, m->name);
00522 ret = 1;
00523 } else {
00524 ret = 0;
00525 m->hook |= MULTIGRAM_DEACTIVATE;
00526 jlog("STAT: Gram #%d %s: marked deactivate\n", m->id, m->name);
00527 }
00528 }
00529 break;
00530 }
00531 }
00532 if (! m) {
00533 jlog("WARNING: - Gram #%d: not found, deactivation ignored\n", gid);
00534 ret = -1;
00535 }
00536
00537 return(ret);
00538 }
00539
00558 static boolean
00559 multigram_exec_activate(PROCESS_LM *lm)
00560 {
00561 MULTIGRAM *m;
00562 boolean modified;
00563
00564 modified = FALSE;
00565 for(m=lm->grammars;m;m=m->next) {
00566 if (m->hook & MULTIGRAM_ACTIVATE) {
00567 m->hook &= ~(MULTIGRAM_ACTIVATE);
00568 if (!m->active) {
00569 jlog("STAT: Gram #%d %s: turn on active\n", m->id, m->name);
00570 }
00571 m->active = TRUE;
00572 modified = TRUE;
00573 } else if (m->hook & MULTIGRAM_DEACTIVATE) {
00574 m->hook &= ~(MULTIGRAM_DEACTIVATE);
00575 if (m->active) {
00576 jlog("STAT: Gram #%d %s: turn off inactive\n", m->id, m->name);
00577 }
00578 m->active = FALSE;
00579 modified = TRUE;
00580 }
00581 }
00582 return(modified);
00583 }
00584
00618 boolean
00619 multigram_update(PROCESS_LM *lm)
00620 {
00621 MULTIGRAM *m;
00622 boolean active_changed = FALSE;
00623 boolean rebuild_flag;
00624
00625 if (lm->lmvar == LM_DFA_GRAMMAR) {
00626
00627 for(m=lm->grammars;m;m=m->next) {
00628 if (m->newbie) {
00629 jlog("STAT: Gram #%d %s: new grammar loaded, now mash it up for recognition\n", m->id, m->name);
00630
00631 if (make_dfa_voca_ref(m->dfa, m->winfo) == FALSE) {
00632 jlog("ERROR: failed to map dict <-> DFA. This grammar will be deleted\n");
00633
00634 m->hook |= MULTIGRAM_DELETE;
00635 continue;
00636 }
00637
00638 dfa_find_pause_word(m->dfa, m->winfo, lm->am->hmminfo);
00639
00640 jlog("STAT: Gram #%d %s: extracting category-pair constraint for the 1st pass\n", m->id, m->name);
00641 if (extract_cpair(m->dfa) == FALSE) {
00642 jlog("ERROR: failed to extract category pair. This grammar will be deleted\n");
00643
00644 m->hook |= MULTIGRAM_DELETE;
00645 }
00646 }
00647 }
00648 }
00649
00650 rebuild_flag = FALSE;
00651
00652 if (multigram_exec_delete(lm)) {
00653 rebuild_flag = TRUE;
00654 }
00655
00656 for(m=lm->grammars;m;m=m->next) {
00657 if (m->hook & MULTIGRAM_MODIFIED) {
00658 rebuild_flag = TRUE;
00659 m->hook &= ~(MULTIGRAM_MODIFIED);
00660 }
00661 }
00662
00663 if (rebuild_flag) {
00664
00665
00666
00667 #ifdef MDEBUG
00668 jlog("STAT: re-build whole global grammar...\n");
00669 #endif
00670
00671 if (lm->dfa != NULL) {
00672 dfa_info_free(lm->dfa);
00673 lm->dfa = NULL;
00674 }
00675 if (lm->winfo != NULL) {
00676 word_info_free(lm->winfo);
00677 lm->winfo = NULL;
00678 }
00679
00680 for(m=lm->grammars;m;m=m->next) {
00681 if (lm->lmvar == LM_DFA_GRAMMAR && lm->dfa == NULL) {
00682 lm->dfa = dfa_info_new();
00683 dfa_state_init(lm->dfa);
00684 }
00685 if (lm->winfo == NULL) {
00686 lm->winfo = word_info_new();
00687 winfo_init(lm->winfo);
00688 }
00689 if (m->newbie) m->newbie = FALSE;
00690 if (lm->lmvar == LM_DFA_WORD) {
00691
00692 m->word_begin = lm->winfo->num;
00693 if (voca_append(lm->winfo, m->winfo, m->id, m->word_begin) == FALSE) {
00694 jlog("ERROR: multi-gram: failed to add dictionary #%d to recognition network\n", m->id);
00695
00696 m->hook |= MULTIGRAM_DELETE;
00697 }
00698 } else {
00699 if (multigram_append_to_global(lm->dfa, lm->winfo, m) == FALSE) {
00700 jlog("ERROR: multi-gram: failed to add grammar #%d to recognition network\n", m->id);
00701
00702 m->hook |= MULTIGRAM_DELETE;
00703 }
00704 }
00705 }
00706
00707 if (multigram_exec_delete(lm)) {
00708 jlog("ERROR: errorous grammar deleted\n");
00709 }
00710 lm->global_modified = TRUE;
00711 } else {
00712
00713 for(m=lm->grammars;m;m=m->next) {
00714 if (m->newbie) {
00715 if (lm->lmvar == LM_DFA_GRAMMAR && lm->dfa == NULL) {
00716 lm->dfa = dfa_info_new();
00717 dfa_state_init(lm->dfa);
00718 }
00719 if (lm->winfo == NULL) {
00720 lm->winfo = word_info_new();
00721 winfo_init(lm->winfo);
00722 }
00723 if (m->newbie) m->newbie = FALSE;
00724 if (lm->lmvar == LM_DFA_WORD) {
00725
00726 m->word_begin = lm->winfo->num;
00727 if (voca_append(lm->winfo, m->winfo, m->id, m->word_begin) == FALSE) {
00728 jlog("ERROR: multi-gram: failed to add dictionary #%d to recognition network\n", m->id);
00729
00730 m->hook |= MULTIGRAM_DELETE;
00731 }
00732 } else {
00733 if (multigram_append_to_global(lm->dfa, lm->winfo, m) == FALSE) {
00734 jlog("ERROR: multi-gram: failed to add grammar #%d to recognition network\n", m->id);
00735
00736 m->hook |= MULTIGRAM_DELETE;
00737 }
00738 }
00739 lm->global_modified = TRUE;
00740 }
00741 }
00742 }
00743
00744
00745 active_changed = multigram_exec_activate(lm);
00746
00747 if (lm->global_modified) {
00748
00749
00750 if (lm->lmvar == LM_DFA_GRAMMAR) {
00751 if (lm->dfa == NULL || lm->winfo == NULL) {
00752 if (lm->dfa != NULL) {
00753 dfa_info_free(lm->dfa);
00754 lm->dfa = NULL;
00755 }
00756 if (lm->winfo != NULL) {
00757 word_info_free(lm->winfo);
00758 lm->winfo = NULL;
00759 }
00760 }
00761 }
00762 #ifdef MDEBUG
00763 jlog("STAT: grammar update completed\n");
00764 #endif
00765 }
00766
00767 if (lm->global_modified || active_changed) {
00768 return (TRUE);
00769 }
00770
00771 return FALSE;
00772 }
00773
00790 static boolean
00791 multigram_read_file_and_add(char *dfa_file, char *dict_file, PROCESS_LM *lm)
00792 {
00793 WORD_INFO *new_winfo;
00794 DFA_INFO *new_dfa;
00795 char buf[MAXGRAMNAMELEN], *p, *q;
00796 boolean ret;
00797
00798 if (dfa_file != NULL) {
00799 jlog("STAT: reading [%s] and [%s]...\n", dfa_file, dict_file);
00800 } else {
00801 jlog("STAT: reading [%s]...\n", dict_file);
00802 }
00803
00804
00805 new_winfo = word_info_new();
00806
00807 if (lm->lmvar == LM_DFA_GRAMMAR) {
00808 ret = init_voca(new_winfo, dict_file, lm->am->hmminfo,
00809 #ifdef MONOTREE
00810 TRUE,
00811 #else
00812 FALSE,
00813 #endif
00814 lm->config->forcedict_flag);
00815 if ( ! ret ) {
00816 jlog("ERROR: failed to read dictionary \"%s\"\n", dict_file);
00817 word_info_free(new_winfo);
00818 return FALSE;
00819 }
00820 } else if (lm->lmvar == LM_DFA_WORD) {
00821 ret = init_wordlist(new_winfo, dict_file, lm->am->hmminfo,
00822 lm->config->wordrecog_head_silence_model_name,
00823 lm->config->wordrecog_tail_silence_model_name,
00824 (lm->config->wordrecog_silence_context_name[0] == '\0') ? NULL : lm->config->wordrecog_silence_context_name,
00825 lm->config->forcedict_flag);
00826 if ( ! ret ) {
00827 jlog("ERROR: failed to read word list \"%s\"\n", dict_file);
00828 word_info_free(new_winfo);
00829 return FALSE;
00830 }
00831 }
00832
00833 new_dfa = NULL;
00834 if (lm->lmvar == LM_DFA_GRAMMAR) {
00835
00836 new_dfa = dfa_info_new();
00837 if (init_dfa(new_dfa, dfa_file) == FALSE) {
00838 jlog("ERROR: multi-gram: error in reading DFA\n");
00839 word_info_free(new_winfo);
00840 dfa_info_free(new_dfa);
00841 return FALSE;
00842 }
00843 }
00844
00845 jlog("STAT: done\n");
00846
00847
00848 p = &(dict_file[0]);
00849 q = p;
00850 while(*p != '\0') {
00851 if (*p == '/') q = p + 1;
00852 p++;
00853 }
00854 p = q;
00855 while(*p != '\0' && *p != '.') {
00856 buf[p-q] = *p;
00857 p++;
00858 }
00859 buf[p-q] = '\0';
00860
00861
00862 multigram_add(new_dfa, new_winfo, buf, lm);
00863
00864 return TRUE;
00865
00866 }
00867
00868
00885 boolean
00886 multigram_load_all_gramlist(PROCESS_LM *lm)
00887 {
00888 GRAMLIST *g;
00889 GRAMLIST *groot;
00890 boolean ok_p;
00891
00892 switch(lm->config->lmvar) {
00893 case LM_DFA_GRAMMAR: groot = lm->config->gramlist_root; break;
00894 case LM_DFA_WORD: groot = lm->config->wordlist_root; break;
00895 }
00896
00897 ok_p = TRUE;
00898 for(g = groot; g; g = g->next) {
00899 if (multigram_read_file_and_add(g->dfafile, g->dictfile, lm) == FALSE) {
00900 ok_p = FALSE;
00901 }
00902 }
00903 return(ok_p);
00904 }
00905
00925 int
00926 multigram_get_all_num(PROCESS_LM *lm)
00927 {
00928 MULTIGRAM *m;
00929 int cnt;
00930
00931 cnt = 0;
00932 for(m=lm->grammars;m;m=m->next) cnt++;
00933 return(cnt);
00934 }
00935
00957 int
00958 multigram_get_gram_from_category(int category, PROCESS_LM *lm)
00959 {
00960 MULTIGRAM *m;
00961 int tb, te;
00962 for(m = lm->grammars; m; m = m->next) {
00963 if (m->newbie) continue;
00964 tb = m->cate_begin;
00965 te = tb + m->dfa->term_num;
00966 if (tb <= category && category < te) {
00967 return(m->id);
00968 }
00969 }
00970 return(-1);
00971 }
00972
00994 int
00995 multigram_get_gram_from_wid(WORD_ID wid, PROCESS_LM *lm)
00996 {
00997 MULTIGRAM *m;
00998 int wb, we;
00999
01000 for(m = lm->grammars; m; m = m->next) {
01001 if (m->newbie) continue;
01002 wb = m->word_begin;
01003 we = wb + m->winfo->num;
01004 if (wb <= wid && wid < we) {
01005 return(m->id);
01006 }
01007 }
01008 return(-1);
01009 }
01010
01011
01026 void
01027 multigram_free_all(MULTIGRAM *root)
01028 {
01029 MULTIGRAM *m, *mtmp;
01030
01031 m = root;
01032 while(m) {
01033 mtmp = m->next;
01034 if (m->dfa) dfa_info_free(m->dfa);
01035 word_info_free(m->winfo);
01036 free(m);
01037 m = mtmp;
01038 }
01039 }
01040
01059 int
01060 multigram_get_id_by_name(PROCESS_LM *lm, char *gramname)
01061 {
01062 MULTIGRAM *m;
01063
01064 for(m=lm->grammars;m;m=m->next) {
01065 if (strmatch(m->name, gramname)) break;
01066 }
01067 if (!m) {
01068 jlog("ERROR: multigram: cannot find grammar \"%s\"\n", gramname);
01069 return -1;
01070 }
01071 return m->id;
01072 }
01073
01092 MULTIGRAM *
01093 multigram_get_grammar_by_name(PROCESS_LM *lm, char *gramname)
01094 {
01095 MULTIGRAM *m;
01096
01097 for(m=lm->grammars;m;m=m->next) {
01098 if (strmatch(m->name, gramname)) break;
01099 }
01100 if (!m) {
01101 jlog("ERROR: multigram: cannot find grammar \"%s\"\n", gramname);
01102 return NULL;
01103 }
01104 return m;
01105 }
01106
01125 MULTIGRAM *
01126 multigram_get_grammar_by_id(PROCESS_LM *lm, unsigned short id)
01127 {
01128 MULTIGRAM *m;
01129
01130 for(m=lm->grammars;m;m=m->next) {
01131 if (m->id == id) break;
01132 }
01133 if (!m) {
01134 jlog("ERROR: multi-gram: cannot find grammar id \"%d\"\n", id);
01135 return NULL;
01136 }
01137 return m;
01138 }
01139
01174 boolean
01175 multigram_add_words_to_grammar(PROCESS_LM *lm, MULTIGRAM *m, WORD_INFO *winfo)
01176 {
01177 int offset;
01178
01179 if (lm == NULL || m == NULL || winfo == NULL) return FALSE;
01180
01181 offset = m->winfo->num;
01182 printf("adding %d words to grammar #%d (%d words)\n", winfo->num, m->id, m->winfo->num);
01183
01184 if (voca_append(m->winfo, winfo, m->id, offset) == FALSE) {
01185 jlog("ERROR: multi-gram: failed to add words to dict in grammar #%d \"%s\"\n", m->id, m->name);
01186 return FALSE;
01187 }
01188
01189 if (lm->lmvar == LM_DFA_GRAMMAR) {
01190 if (m->dfa->term_num != 0) free_terminfo(&(m->dfa->term));
01191 if (make_dfa_voca_ref(m->dfa, m->winfo) == FALSE) {
01192 jlog("ERROR: failed to map dict <-> DFA. This grammar will be deleted\n");
01193 return FALSE;
01194 }
01195 }
01196
01197 m->hook |= MULTIGRAM_MODIFIED;
01198
01199 return TRUE;
01200 }
01201
01227 boolean
01228 multigram_add_words_to_grammar_by_name(PROCESS_LM *lm, char *gramname, WORD_INFO *winfo)
01229 {
01230 return(multigram_add_words_to_grammar(lm, multigram_get_grammar_by_name(lm, gramname), winfo));
01231 }
01232
01258 boolean
01259 multigram_add_words_to_grammar_by_id(PROCESS_LM *lm, unsigned short id, WORD_INFO *winfo)
01260 {
01261 return(multigram_add_words_to_grammar(lm, multigram_get_grammar_by_id(lm, id), winfo));
01262 }
01263
01264