00001
00053
00054
00055
00056
00057
00058
00059
00060
00061 #include <julius/julius.h>
00062
00064 #define MDEBUG
00065
00088 static boolean
00089 multigram_rebuild_wchmm(RecogProcess *r)
00090 {
00091 boolean ret;
00092
00093
00094 if (r->wchmm != NULL) {
00095 wchmm_free(r->wchmm);
00096 }
00097 r->wchmm = wchmm_new();
00098 r->wchmm->lmtype = r->lmtype;
00099 r->wchmm->lmvar = r->lmvar;
00100 r->wchmm->ccd_flag = r->ccd_flag;
00101 r->wchmm->category_tree = TRUE;
00102 r->wchmm->hmmwrk = &(r->am->hmmwrk);
00103
00104 r->wchmm->dfa = r->lm->dfa;
00105 r->wchmm->winfo = r->lm->winfo;
00106 r->wchmm->hmminfo = r->am->hmminfo;
00107 if (r->wchmm->category_tree) {
00108 if (r->config->pass1.old_tree_function_flag) {
00109 ret = build_wchmm(r->wchmm, r->lm->config);
00110 } else {
00111 ret = build_wchmm2(r->wchmm, r->lm->config);
00112 }
00113 } else {
00114 ret = build_wchmm2(r->wchmm, r->lm->config);
00115 }
00116
00117
00118 if (r->config->sw.wchmm_check_flag) {
00119 wchmm_check_interactive(r->wchmm);
00120 }
00121
00122 if (ret == FALSE) {
00123 jlog("ERROR: multi-gram: failed to build (global) lexicon tree for recognition\n");
00124 return FALSE;
00125 }
00126
00127
00128 r->trellis_beam_width = set_beam_width(r->wchmm, r->config->pass1.specified_trellis_beam_width);
00129 switch(r->config->pass1.specified_trellis_beam_width) {
00130 case 0:
00131 jlog("STAT: multi-gram: beam width set to %d (full) by lexicon change\n", r->trellis_beam_width);
00132 break;
00133 case -1:
00134 jlog("STAT: multi-gram: beam width set to %d (guess) by lexicon change\n", r->trellis_beam_width);
00135 }
00136
00137
00138
00139
00140
00141
00142
00143
00144 return TRUE;
00145 }
00146
00172 boolean
00173 multigram_build(RecogProcess *r)
00174 {
00175 if (r->lm->winfo != NULL) {
00176
00177 if (multigram_rebuild_wchmm(r) == FALSE) {
00178 jlog("ERROR: multi-gram: failed to re-build tree lexicon\n");
00179 return FALSE;
00180 }
00181 }
00182 #ifdef MDEBUG
00183 jlog("STAT: wchmm (re)build completed\n");
00184 #endif
00185 return(TRUE);
00186 }
00187
00210 static boolean
00211 multigram_append_to_global(DFA_INFO *gdfa, WORD_INFO *gwinfo, MULTIGRAM *m)
00212 {
00213
00214 m->state_begin = gdfa->state_num;
00215 m->cate_begin = gdfa->term_num;
00216 m->word_begin = gwinfo->num;
00217
00218
00219
00220
00221 dfa_append(gdfa, m->dfa, m->state_begin, m->cate_begin);
00222
00223 if (voca_append(gwinfo, m->winfo, m->cate_begin, m->word_begin) == FALSE) {
00224 return FALSE;
00225 }
00226
00227 terminfo_append(&(gdfa->term), &(m->dfa->term), m->cate_begin, m->word_begin);
00228
00229
00230 if (cpair_append(gdfa, m->dfa, m->cate_begin) == FALSE) {
00231 return FALSE;
00232 }
00233
00234 if (dfa_pause_word_append(gdfa, m->dfa, m->cate_begin) == FALSE) {
00235 return FALSE;
00236 }
00237
00238 jlog("STAT: Gram #%d: installed\n", m->id);
00239
00240 return TRUE;
00241 }
00242
00272 void
00273 multigram_add(DFA_INFO *dfa, WORD_INFO *winfo, char *name, PROCESS_LM *lm)
00274 {
00275 MULTIGRAM *new;
00276
00277
00278 new = (MULTIGRAM *)mymalloc(sizeof(MULTIGRAM));
00279 if (name != NULL) {
00280 strncpy(new->name, name, MAXGRAMNAMELEN);
00281 } else {
00282 strncpy(new->name, "(no name)", MAXGRAMNAMELEN);
00283 }
00284
00285 new->id = lm->gram_maxid;
00286 new->dfa = dfa;
00287 new->winfo = winfo;
00288 new->hook = MULTIGRAM_DEFAULT;
00289 new->newbie = TRUE;
00290 new->active = TRUE;
00291
00292
00293 new->next = lm->grammars;
00294 lm->grammars = new;
00295
00296 jlog("STAT: Gram #%d: read\n", new->id);
00297 lm->gram_maxid++;
00298 }
00299
00325 boolean
00326 multigram_delete(int delid, PROCESS_LM *lm)
00327 {
00328 MULTIGRAM *m;
00329 for(m=lm->grammars;m;m=m->next) {
00330 if (m->id == delid) {
00331 m->hook |= MULTIGRAM_DELETE;
00332 jlog("STAT: Gram #%d: marked delete\n", m->id);
00333 break;
00334 }
00335 }
00336 if (! m) {
00337 jlog("STAT: Gram #%d: not found\n", delid);
00338 return FALSE;
00339 }
00340 return TRUE;
00341 }
00342
00358 void
00359 multigram_delete_all(PROCESS_LM *lm)
00360 {
00361 MULTIGRAM *m;
00362 for(m=lm->grammars;m;m=m->next) {
00363 m->hook |= MULTIGRAM_DELETE;
00364 }
00365 }
00366
00383 static boolean
00384 multigram_exec_delete(PROCESS_LM *lm)
00385 {
00386 MULTIGRAM *m, *mtmp, *mprev;
00387 boolean ret_flag = FALSE;
00388 int n;
00389
00390
00391 mprev = NULL;
00392 m = lm->grammars;
00393 while(m) {
00394 mtmp = m->next;
00395 if (m->hook & MULTIGRAM_DELETE) {
00396
00397
00398 if (! m->newbie) ret_flag = TRUE;
00399 if (m->dfa) dfa_info_free(m->dfa);
00400 word_info_free(m->winfo);
00401 n=m->id;
00402 free(m);
00403 jlog("STAT: Gram #%d: purged\n", n);
00404 if (mprev != NULL) {
00405 mprev->next = mtmp;
00406 } else {
00407 lm->grammars = mtmp;
00408 }
00409 } else {
00410 mprev = m;
00411 }
00412 m = mtmp;
00413 }
00414
00415 return(ret_flag);
00416 }
00417
00440 int
00441 multigram_activate(int gid, PROCESS_LM *lm)
00442 {
00443 MULTIGRAM *m;
00444 int ret;
00445
00446 for(m=lm->grammars;m;m=m->next) {
00447 if (m->id == gid) {
00448 if (m->hook & MULTIGRAM_DEACTIVATE) {
00449 ret = 0;
00450 m->hook &= ~(MULTIGRAM_DEACTIVATE);
00451 m->hook |= MULTIGRAM_ACTIVATE;
00452 jlog("STAT: Gram #%d: marked active, superceding deactivate\n", m->id);
00453 } else {
00454 if (m->hook & MULTIGRAM_ACTIVATE) {
00455 jlog("STAT: Gram #%d: already marked active\n", m->id);
00456 ret = 1;
00457 } else {
00458 ret = 0;
00459 m->hook |= MULTIGRAM_ACTIVATE;
00460 jlog("STAT: Gram #%d: marked activate\n", m->id);
00461 }
00462 }
00463 break;
00464 }
00465 }
00466 if (! m) {
00467 jlog("WARNING: Gram #%d: not found, activation ignored\n", gid);
00468 ret = -1;
00469 }
00470
00471 return(ret);
00472 }
00473
00502 int
00503 multigram_deactivate(int gid, PROCESS_LM *lm)
00504 {
00505 MULTIGRAM *m;
00506 int ret;
00507
00508 for(m=lm->grammars;m;m=m->next) {
00509 if (m->id == gid) {
00510 if (m->hook & MULTIGRAM_ACTIVATE) {
00511 ret = 0;
00512 m->hook &= ~(MULTIGRAM_ACTIVATE);
00513 m->hook |= MULTIGRAM_DEACTIVATE;
00514 jlog("STAT: Gram #%d: marked deactivate, superceding activate\n", m->id);
00515 } else {
00516 if (m->hook & MULTIGRAM_DEACTIVATE) {
00517 jlog("STAT: Gram #%d: already marked deactivate\n", m->id);
00518 ret = 1;
00519 } else {
00520 ret = 0;
00521 m->hook |= MULTIGRAM_DEACTIVATE;
00522 jlog("STAT: Gram #%d: marked deactivate\n", m->id);
00523 }
00524 }
00525 break;
00526 }
00527 }
00528 if (! m) {
00529 jlog("WARNING: - Gram #%d: not found, deactivation ignored\n", gid);
00530 ret = -1;
00531 }
00532
00533 return(ret);
00534 }
00535
00554 static boolean
00555 multigram_exec_activate(PROCESS_LM *lm)
00556 {
00557 MULTIGRAM *m;
00558 boolean modified;
00559
00560 modified = FALSE;
00561 for(m=lm->grammars;m;m=m->next) {
00562 if (m->hook & MULTIGRAM_ACTIVATE) {
00563 m->hook &= ~(MULTIGRAM_ACTIVATE);
00564 if (!m->active) {
00565 jlog("STAT: Gram #%d: turn on active\n", m->id);
00566 }
00567 m->active = TRUE;
00568 modified = TRUE;
00569 } else if (m->hook & MULTIGRAM_DEACTIVATE) {
00570 m->hook &= ~(MULTIGRAM_DEACTIVATE);
00571 if (m->active) {
00572 jlog("STAT: Gram #%d: turn off inactive\n", m->id);
00573 }
00574 m->active = FALSE;
00575 modified = TRUE;
00576 }
00577 }
00578 return(modified);
00579 }
00580
00614 boolean
00615 multigram_update(PROCESS_LM *lm)
00616 {
00617 MULTIGRAM *m;
00618 boolean active_changed = FALSE;
00619 boolean rebuild_flag;
00620
00621 if (lm->lmvar == LM_DFA_GRAMMAR) {
00622
00623 for(m=lm->grammars;m;m=m->next) {
00624 if (m->newbie) {
00625 jlog("STAT: Gram #%d: new grammar found, setup it for recognition\n", m->id);
00626
00627 if (make_dfa_voca_ref(m->dfa, m->winfo) == FALSE) {
00628 jlog("ERROR: failed to map dict <-> DFA. This grammar will be deleted\n");
00629
00630 m->hook |= MULTIGRAM_DELETE;
00631 continue;
00632 }
00633
00634 dfa_find_pause_word(m->dfa, m->winfo, lm->am->hmminfo);
00635
00636 if (extract_cpair(m->dfa) == FALSE) {
00637 jlog("ERROR: failed to extracting category pair. This grammar will be deleted\n");
00638
00639 m->hook |= MULTIGRAM_DELETE;
00640 }
00641 }
00642 }
00643 }
00644
00645 rebuild_flag = FALSE;
00646
00647 if (multigram_exec_delete(lm)) {
00648 rebuild_flag = TRUE;
00649 }
00650
00651 for(m=lm->grammars;m;m=m->next) {
00652 if (m->hook & MULTIGRAM_MODIFIED) {
00653 rebuild_flag = TRUE;
00654 m->hook &= ~(MULTIGRAM_MODIFIED);
00655 }
00656 }
00657
00658 if (rebuild_flag) {
00659
00660
00661
00662 #ifdef MDEBUG
00663 jlog("STAT: re-build whole global grammar...\n");
00664 #endif
00665
00666 if (lm->dfa != NULL) {
00667 dfa_info_free(lm->dfa);
00668 lm->dfa = NULL;
00669 }
00670 if (lm->winfo != NULL) {
00671 word_info_free(lm->winfo);
00672 lm->winfo = NULL;
00673 }
00674
00675 for(m=lm->grammars;m;m=m->next) {
00676 if (lm->lmvar == LM_DFA_GRAMMAR && lm->dfa == NULL) {
00677 lm->dfa = dfa_info_new();
00678 dfa_state_init(lm->dfa);
00679 }
00680 if (lm->winfo == NULL) {
00681 lm->winfo = word_info_new();
00682 winfo_init(lm->winfo);
00683 }
00684 if (m->newbie) m->newbie = FALSE;
00685 if (lm->lmvar == LM_DFA_WORD) {
00686
00687 m->word_begin = lm->winfo->num;
00688 if (voca_append(lm->winfo, m->winfo, 0, m->word_begin) == FALSE) {
00689 jlog("ERROR: multi-gram: failed to add dictionary #%d to recognition network\n", m->id);
00690
00691 m->hook |= MULTIGRAM_DELETE;
00692 }
00693 } else {
00694 if (multigram_append_to_global(lm->dfa, lm->winfo, m) == FALSE) {
00695 jlog("ERROR: multi-gram: failed to add grammar #%d to recognition network\n", m->id);
00696
00697 m->hook |= MULTIGRAM_DELETE;
00698 }
00699 }
00700 }
00701
00702 if (multigram_exec_delete(lm)) {
00703 jlog("ERROR: errorous grammar deleted\n");
00704 }
00705 lm->global_modified = TRUE;
00706 } else {
00707
00708 for(m=lm->grammars;m;m=m->next) {
00709 if (m->newbie) {
00710 if (lm->lmvar == LM_DFA_GRAMMAR && lm->dfa == NULL) {
00711 lm->dfa = dfa_info_new();
00712 dfa_state_init(lm->dfa);
00713 }
00714 if (lm->winfo == NULL) {
00715 lm->winfo = word_info_new();
00716 winfo_init(lm->winfo);
00717 }
00718 if (m->newbie) m->newbie = FALSE;
00719 if (lm->lmvar == LM_DFA_WORD) {
00720
00721 m->word_begin = lm->winfo->num;
00722 if (voca_append(lm->winfo, m->winfo, 0, m->word_begin) == FALSE) {
00723 jlog("ERROR: multi-gram: failed to add dictionary #%d to recognition network\n", m->id);
00724
00725 m->hook |= MULTIGRAM_DELETE;
00726 }
00727 } else {
00728 if (multigram_append_to_global(lm->dfa, lm->winfo, m) == FALSE) {
00729 jlog("ERROR: multi-gram: failed to add grammar #%d to recognition network\n", m->id);
00730
00731 m->hook |= MULTIGRAM_DELETE;
00732 }
00733 }
00734 lm->global_modified = TRUE;
00735 }
00736 }
00737 }
00738
00739
00740 active_changed = multigram_exec_activate(lm);
00741
00742 if (lm->global_modified) {
00743
00744
00745 if (lm->lmvar == LM_DFA_GRAMMAR) {
00746 if (lm->dfa == NULL || lm->winfo == NULL) {
00747 if (lm->dfa != NULL) {
00748 dfa_info_free(lm->dfa);
00749 lm->dfa = NULL;
00750 }
00751 if (lm->winfo != NULL) {
00752 word_info_free(lm->winfo);
00753 lm->winfo = NULL;
00754 }
00755 }
00756 }
00757 #ifdef MDEBUG
00758 jlog("STAT: grammar update completed\n");
00759 #endif
00760 }
00761
00762 if (lm->global_modified || active_changed) {
00763 return (TRUE);
00764 }
00765
00766 return FALSE;
00767 }
00768
00785 static boolean
00786 multigram_read_file_and_add(char *dfa_file, char *dict_file, PROCESS_LM *lm)
00787 {
00788 WORD_INFO *new_winfo;
00789 DFA_INFO *new_dfa;
00790 char buf[MAXGRAMNAMELEN], *p, *q;
00791 boolean ret;
00792
00793 if (dfa_file != NULL) {
00794 jlog("STAT: reading [%s] and [%s]...\n", dfa_file, dict_file);
00795 } else {
00796 jlog("STAT: reading [%s]...\n", dict_file);
00797 }
00798
00799
00800 new_winfo = word_info_new();
00801
00802 if (lm->lmvar == LM_DFA_GRAMMAR) {
00803 ret = init_voca(new_winfo, dict_file, lm->am->hmminfo,
00804 #ifdef MONOTREE
00805 TRUE,
00806 #else
00807 FALSE,
00808 #endif
00809 lm->config->forcedict_flag);
00810 if ( ! ret ) {
00811 jlog("ERROR: failed to read dictionary \"%s\"\n", dict_file);
00812 word_info_free(new_winfo);
00813 return FALSE;
00814 }
00815 } else if (lm->lmvar == LM_DFA_WORD) {
00816 ret = init_wordlist(new_winfo, dict_file, lm->am->hmminfo,
00817 lm->config->wordrecog_head_silence_model_name,
00818 lm->config->wordrecog_tail_silence_model_name,
00819 (lm->config->wordrecog_silence_context_name[0] == '\0') ? NULL : lm->config->wordrecog_silence_context_name,
00820 lm->config->forcedict_flag);
00821 if ( ! ret ) {
00822 jlog("ERROR: failed to read word list \"%s\"\n", dict_file);
00823 word_info_free(new_winfo);
00824 return FALSE;
00825 }
00826 }
00827
00828 new_dfa = NULL;
00829 if (lm->lmvar == LM_DFA_GRAMMAR) {
00830
00831 new_dfa = dfa_info_new();
00832 if (init_dfa(new_dfa, dfa_file) == FALSE) {
00833 jlog("ERROR: multi-gram: error in reading DFA\n");
00834 word_info_free(new_winfo);
00835 dfa_info_free(new_dfa);
00836 return FALSE;
00837 }
00838 }
00839
00840 jlog("STAT: done\n");
00841
00842
00843 p = &(dict_file[0]);
00844 q = p;
00845 while(*p != '\0') {
00846 if (*p == '/') q = p + 1;
00847 p++;
00848 }
00849 p = q;
00850 while(*p != '\0' && *p != '.') {
00851 buf[p-q] = *p;
00852 p++;
00853 }
00854 buf[p-q] = '\0';
00855
00856
00857 multigram_add(new_dfa, new_winfo, buf, lm);
00858
00859 jlog("STAT: gram \"%s\" registered\n", buf);
00860
00861 return TRUE;
00862
00863 }
00864
00865
00882 boolean
00883 multigram_load_all_gramlist(PROCESS_LM *lm)
00884 {
00885 GRAMLIST *g;
00886 GRAMLIST *groot;
00887 boolean ok_p;
00888
00889 switch(lm->config->lmvar) {
00890 case LM_DFA_GRAMMAR: groot = lm->config->gramlist_root; break;
00891 case LM_DFA_WORD: groot = lm->config->wordlist_root; break;
00892 }
00893
00894 ok_p = TRUE;
00895 for(g = groot; g; g = g->next) {
00896 if (multigram_read_file_and_add(g->dfafile, g->dictfile, lm) == FALSE) {
00897 ok_p = FALSE;
00898 }
00899 }
00900 return(ok_p);
00901 }
00902
00922 int
00923 multigram_get_all_num(PROCESS_LM *lm)
00924 {
00925 MULTIGRAM *m;
00926 int cnt;
00927
00928 cnt = 0;
00929 for(m=lm->grammars;m;m=m->next) cnt++;
00930 return(cnt);
00931 }
00932
00954 int
00955 multigram_get_gram_from_category(int category, PROCESS_LM *lm)
00956 {
00957 MULTIGRAM *m;
00958 int tb, te;
00959 for(m = lm->grammars; m; m = m->next) {
00960 if (m->newbie) continue;
00961 tb = m->cate_begin;
00962 te = tb + m->dfa->term_num;
00963 if (tb <= category && category < te) {
00964 return(m->id);
00965 }
00966 }
00967 return(-1);
00968 }
00969
00970
00985 void
00986 multigram_free_all(MULTIGRAM *root)
00987 {
00988 MULTIGRAM *m, *mtmp;
00989
00990 m = root;
00991 while(m) {
00992 mtmp = m->next;
00993 if (m->dfa) dfa_info_free(m->dfa);
00994 word_info_free(m->winfo);
00995 free(m);
00996 m = mtmp;
00997 }
00998 }
00999
01018 MULTIGRAM *
01019 multigram_get_grammar_by_name(PROCESS_LM *lm, char *gramname)
01020 {
01021 MULTIGRAM *m;
01022
01023 for(m=lm->grammars;m;m=m->next) {
01024 if (strmatch(m->name, gramname)) break;
01025 }
01026 if (!m) {
01027 jlog("ERROR: multigram: cannot find grammar \"%s\"\n", gramname);
01028 return NULL;
01029 }
01030 return m;
01031 }
01032
01051 MULTIGRAM *
01052 multigram_get_grammar_by_id(PROCESS_LM *lm, unsigned short id)
01053 {
01054 MULTIGRAM *m;
01055
01056 for(m=lm->grammars;m;m=m->next) {
01057 if (m->id == id) break;
01058 }
01059 if (!m) {
01060 jlog("ERROR: multi-gram: cannot find grammar id \"%d\"\n", id);
01061 return NULL;
01062 }
01063 return m;
01064 }
01065
01100 boolean
01101 multigram_add_words_to_grammar(PROCESS_LM *lm, MULTIGRAM *m, WORD_INFO *winfo)
01102 {
01103 int offset;
01104
01105 if (lm == NULL || m == NULL || winfo == NULL) return FALSE;
01106
01107 offset = m->winfo->num;
01108 printf("adding %d words to grammar #%d (%d words)\n", winfo->num, m->id, m->winfo->num);
01109
01110 if (voca_append(m->winfo, winfo, 0, offset) == FALSE) {
01111 jlog("ERROR: multi-gram: failed to add words to dict in grammar #%d \"%s\"\n", m->id, m->name);
01112 return FALSE;
01113 }
01114
01115 if (lm->lmvar == LM_DFA_GRAMMAR) {
01116 if (m->dfa->term_num != 0) free_terminfo(&(m->dfa->term));
01117 if (make_dfa_voca_ref(m->dfa, m->winfo) == FALSE) {
01118 jlog("ERROR: failed to map dict <-> DFA. This grammar will be deleted\n");
01119 return FALSE;
01120 }
01121 }
01122
01123 m->hook |= MULTIGRAM_MODIFIED;
01124
01125 return TRUE;
01126 }
01127
01153 boolean
01154 multigram_add_words_to_grammar_by_name(PROCESS_LM *lm, char *gramname, WORD_INFO *winfo)
01155 {
01156 return(multigram_add_words_to_grammar(lm, multigram_get_grammar_by_name(lm, gramname), winfo));
01157 }
01158
01184 boolean
01185 multigram_add_words_to_grammar_by_id(PROCESS_LM *lm, unsigned short id, WORD_INFO *winfo)
01186 {
01187 return(multigram_add_words_to_grammar(lm, multigram_get_grammar_by_id(lm, id), winfo));
01188 }
01189
01190