Main Page | Modules | Data Structures | Directories | File List | Data Fields | Globals | Related Pages

multi-gram.c

Go to the documentation of this file.
00001 
00052 /*
00053  * Copyright (c) 1991-2006 Kawahara Lab., Kyoto University
00054  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00055  * Copyright (c) 2005-2006 Julius project team, Nagoya Institute of Technology, Nagoya Institute of Technology
00056  * All rights reserved
00057  */
00058 
00059 
00060 #include <julius.h>
00061 
00062 #ifdef USE_DFA
00063 
00065 #define MDEBUG
00066 
00072 static DFA_INFO *global_dfa = NULL;
00078 static WORD_INFO *global_winfo = NULL;
00084 static int gram_maxid = 0;
00085 
00114 static void
00115 multigram_setup(DFA_INFO *d, WORD_INFO *w)
00116 {
00117   if (d == NULL || w == NULL) {
00118     /* no grammar was specified */
00119     dfa = NULL;                 /* clear */
00120     winfo = NULL;
00121     return;
00122   }
00123 
00124   /* set the global grammar and vocabulary pointer */
00125   dfa = d;
00126   winfo = w;
00127 
00128   /* re-build wchmm */
00129   if (wchmm != NULL) {
00130     wchmm_free(wchmm);
00131   }
00132   wchmm = wchmm_new();
00133   wchmm->dfa = d;
00134   wchmm->winfo = w;
00135   wchmm->hmminfo = hmminfo;
00136 #ifdef CATEGORY_TREE
00137   if (old_tree_function_flag) {
00138     build_wchmm(wchmm);
00139   } else {
00140     build_wchmm2(wchmm);
00141   }
00142 #else
00143   build_wchmm2(wchmm);
00144 #endif /* CATEGORY_TREE */
00145   
00146   /* guess beam width from models, when not specified */
00147   trellis_beam_width = set_beam_width(wchmm, specified_trellis_beam_width);
00148   if (specified_trellis_beam_width == 0) {
00149     j_printf("now beam width = %d (full)\n", trellis_beam_width);
00150   } else if (specified_trellis_beam_width == -1) {
00151     j_printf("now beam width = %d (guess)\n", trellis_beam_width);
00152   }
00153 
00154 #ifdef USE_NGRAM
00155   /* re-allocate factoring cache for the tree lexicon*/
00156   max_successor_cache_free();
00157   max_successor_cache_init(wchmm);
00158 #endif
00159 
00160   /* finished! */
00161 }
00162 
00164 static char *hookstr[] = {"", "delete", "activate", "deactivate"};
00175 static void 
00176 print_all_gram()
00177 {
00178   MULTIGRAM *m;
00179 
00180   j_printf("[grammars]\n");
00181   for(m=gramlist;m;m=m->next) {
00182     j_printf("  #%2d: [%-11s] %4d words, %3d categories, %4d nodes",
00183              m->id,
00184              m->active ? "active" : "inactive",
00185              m->winfo->num, m->dfa->term_num, m->dfa->state_num);
00186     if (m->newbie) j_printf(" (new)");
00187     if (m->hook != MULTIGRAM_DEFAULT) {
00188       j_printf(" (next: %s)", hookstr[m->hook]);
00189     }
00190     j_printf(" \"%s\"\n", m->name);
00191   }
00192   if (global_dfa != NULL) {
00193     j_printf("  Global:            %4d words, %3d categories, %4d nodes\n",
00194              global_winfo->num, global_dfa->term_num, global_dfa->state_num);
00195   }
00196 }
00197 
00208 static void
00209 send_gram_info()
00210 {
00211   MULTIGRAM *m;
00212 
00213   module_send(module_sd, "<GRAMINFO>\n");
00214   for(m=gramlist;m;m=m->next) {
00215     module_send(module_sd, "  #%2d: [%-11s] %4d words, %3d categories, %4d nodes",
00216                 m->id,
00217                 m->active ? "active" : "inactive",
00218                 m->winfo->num, m->dfa->term_num, m->dfa->state_num);
00219     if (m->newbie) module_send(module_sd, " (new)");
00220     if (m->hook != MULTIGRAM_DEFAULT) {
00221       module_send(module_sd, " (next: %s)", hookstr[m->hook]);
00222     }
00223     module_send(module_sd, " \"%s\"\n", m->name);
00224   }
00225   if (global_dfa != NULL) {
00226     module_send(module_sd, "  Global:            %4d words, %3d categories, %4d nodes\n",
00227                 global_winfo->num, global_dfa->term_num, global_dfa->state_num);
00228   }
00229   module_send(module_sd, "</GRAMINFO>\n.\n");
00230 }
00231 
00248 static void
00249 multigram_build_append(DFA_INFO *gdfa, WORD_INFO *gwinfo, MULTIGRAM *m)
00250 {
00251   /* the new grammar 'm' will be appended to the last of gdfa and gwinfo */
00252   m->state_begin = gdfa->state_num;     /* initial state ID */
00253   m->cate_begin = gdfa->term_num;       /* initial terminal ID */
00254   m->word_begin = gwinfo->num;  /* initial word ID */
00255   
00256   /* append category ID and node number of src DFA */
00257   /* Julius allow multiple initial states: connect each initial node
00258      is not necesarry. */
00259   dfa_append(gdfa, m->dfa, m->state_begin, m->cate_begin);
00260   /* append words of src vocabulary to global winfo */
00261   voca_append(gwinfo, m->winfo, m->cate_begin, m->word_begin);
00262   /* append category->word mapping table */
00263   terminfo_append(&(gdfa->term), &(m->dfa->term), m->cate_begin, m->word_begin);
00264   /* append catergory-pair information */
00265   /* pause has already been considered on m->dfa, so just append here */
00266   cpair_append(gdfa, m->dfa, m->cate_begin);
00267   /* re-set noise entry by merging */
00268   dfa_pause_word_append(gdfa, m->dfa, m->cate_begin);
00269 #ifdef MDEBUG
00270   j_printf("- Gram #%d: installed\n", m->id);
00271 #endif
00272 }
00273 
00290 void
00291 multigram_add(DFA_INFO *dfa, WORD_INFO *winfo, char *name)
00292 {
00293   MULTIGRAM *new;
00294 
00295   /* allocate new gram */
00296   new = (MULTIGRAM *)mymalloc(sizeof(MULTIGRAM));
00297   if (name != NULL) {
00298     strncpy(new->name, name, MAXGRAMNAMELEN);
00299   } else {
00300     strncpy(new->name, "(no name)", MAXGRAMNAMELEN);
00301   }
00302 
00303   new->id = gram_maxid;
00304   new->dfa = dfa;
00305   new->winfo = winfo;
00306   new->hook = MULTIGRAM_DEFAULT;
00307   new->newbie = TRUE;           /* need to setup */
00308   new->active = TRUE;           /* default: active */
00309 
00310   /* the new grammar is now added to gramlist */
00311   new->next = gramlist;
00312   gramlist = new;
00313 
00314   j_printf("- Gram #%d: read\n", new->id);
00315   if (module_mode) {
00316     send_gram_info();
00317   }
00318 #ifdef MDEBUG
00319   print_all_gram();
00320 #endif
00321   gram_maxid++;
00322 }
00323 
00341 boolean
00342 multigram_delete(int delid)
00343 {
00344   MULTIGRAM *m;
00345   for(m=gramlist;m;m=m->next) {
00346     if (m->id == delid) {
00347       m->hook = MULTIGRAM_DELETE;
00348       j_printf("- Gram #%d: marked delete\n", m->id);
00349       break;
00350     }
00351   }
00352   if (! m) {
00353     j_printf("- Gram #%d: not found\n", delid);
00354     if (module_mode) {
00355       module_send(module_sd, "<ERROR MESSAGE=\"Gram #%d not found\"/>\n.\n", delid);
00356     }
00357     return FALSE;
00358   }
00359   return TRUE;
00360 }
00361 
00371 void
00372 multigram_delete_all()
00373 {
00374   MULTIGRAM *m;
00375   for(m=gramlist;m;m=m->next) {
00376     m->hook = MULTIGRAM_DELETE;
00377   }
00378 }
00379 
00392 static boolean
00393 multigram_exec_delete()
00394 {
00395   MULTIGRAM *m, *mtmp, *mprev;
00396   boolean ret_flag = FALSE;
00397 #ifdef MDEBUG
00398   int n;
00399 #endif
00400 
00401   /* exec delete */
00402   mprev = NULL;
00403   m = gramlist;
00404   while(m) {
00405     mtmp = m->next;
00406     if (m->hook == MULTIGRAM_DELETE) {
00407       /* if any grammar is deleted, we need to rebuild lexicons etc. */
00408       /* so tell it to the caller */
00409       if (! m->newbie) ret_flag = TRUE;
00410       dfa_info_free(m->dfa);
00411       word_info_free(m->winfo);
00412       n=m->id;
00413       free(m);
00414       j_printf("- Gram #%d: purged\n", n);
00415       if (mprev != NULL) {
00416         mprev->next = mtmp;
00417       } else {
00418         gramlist = mtmp;
00419       }
00420     } else {
00421       mprev = m;
00422     }
00423     m = mtmp;
00424   }
00425 
00426   return(ret_flag);
00427 }
00428 
00443 void
00444 multigram_activate(int gid)     /* only mark */
00445 {
00446   MULTIGRAM *m;
00447   for(m=gramlist;m;m=m->next) {
00448     if (m->id == gid) {
00449       if (m->hook == MULTIGRAM_ACTIVATE) {
00450         j_printf("- Gram #%d: already active\n", m->id);
00451         if (module_mode) {
00452           module_send(module_sd, "<WARN MESSAGE=\"Gram #%d already active\"/>\n.\n", m->id);
00453         }
00454       } else {
00455         m->hook = MULTIGRAM_ACTIVATE;
00456         j_printf("- Gram #%d: marked activate\n", m->id);
00457       }
00458       break;
00459     }
00460   }
00461   if (! m) {
00462     j_printf("- Gram #%d: not found, activation ignored\n", gid);
00463     if (module_mode) {
00464       module_send(module_sd, "<WARN MESSAGE=\"Gram #%d not found\"/>\n.\n", gid);
00465     }
00466   }
00467 }
00468 
00489 void
00490 multigram_deactivate(int gid)   /* only mark */
00491 {
00492   MULTIGRAM *m;
00493   for(m=gramlist;m;m=m->next) {
00494     if (m->id == gid) {
00495       m->hook = MULTIGRAM_DEACTIVATE;
00496       j_printf("- Gram #%d: marked deactivate\n", m->id);
00497       break;
00498     }
00499   }
00500   if (! m) {
00501     j_printf("- Gram #%d: not found, deactivation ignored\n", gid);
00502     if (module_mode) {
00503       module_send(module_sd, "<WARN MESSAGE=\"Gram #%d not found\"/>\n.\n", gid);
00504     }
00505   }
00506 }
00507 
00522 static boolean
00523 multigram_exec_activate()
00524 {
00525   MULTIGRAM *m;
00526   boolean modified;
00527   
00528   modified = FALSE;
00529   for(m=gramlist;m;m=m->next) {
00530     if (m->hook == MULTIGRAM_ACTIVATE) {
00531       m->hook = MULTIGRAM_DEFAULT;
00532       if (!m->active) {
00533         j_printf("- Gram #%d: turn on active\n", m->id);
00534       }
00535       m->active = TRUE;
00536       modified = TRUE;
00537     } else if (m->hook == MULTIGRAM_DEACTIVATE) {
00538       m->hook = MULTIGRAM_DEFAULT;
00539       if (m->active) {
00540         j_printf("- Gram #%d: turn off inactive\n, m->id");
00541       }
00542       m->active = FALSE;
00543       modified = TRUE;
00544     }
00545   }
00546   return(modified);
00547 }
00548  
00549 /************************************************************************/
00550 /* update grammar if needed */
00551 /************************************************************************/
00586 boolean                         /* return FALSE if no gram */
00587 multigram_exec()
00588 {
00589   MULTIGRAM *m;
00590   boolean global_modified = FALSE;
00591   boolean active_changed = FALSE;
00592 
00593 #ifdef MDEBUG
00594   j_printf("- Grammar update check\n");
00595 #endif
00596 
00597   /* setup additional grammar info of new ones */
00598   for(m=gramlist;m;m=m->next) {
00599     if (m->newbie) {
00600       /* map dict item to dfa terminal symbols */
00601       make_dfa_voca_ref(m->dfa, m->winfo);
00602       /* set dfa->sp_id and dfa->is_sp */
00603       dfa_find_pause_word(m->dfa, m->winfo, hmminfo);
00604       /* build catergory-pair information */
00605       extract_cpair(m->dfa);
00606     }
00607   }
00608 
00609   /* delete grammars marked as "delete" */
00610   if (multigram_exec_delete()) { /* some built grammars deleted */
00611     /* rebuild global grammar from scratch (including new) */
00612     /* active status not changed here (inactive grammar will also included) */
00613     /* activate/deactivate hook will be handled later, so just keep it here */
00614 #ifdef MDEBUG
00615     j_printf("- Re-build whole global grammar...\n");
00616 #endif
00617     if (global_dfa != NULL) {    /* free old global */
00618       dfa_info_free(global_dfa);
00619       word_info_free(global_winfo);
00620       global_dfa = NULL;
00621     }
00622     for(m=gramlist;m;m=m->next) {
00623       if (global_dfa == NULL) {
00624         global_dfa = dfa_info_new();
00625         dfa_state_init(global_dfa);
00626         global_winfo = word_info_new();
00627         winfo_init(global_winfo);
00628       }
00629       if (m->newbie) m->newbie = FALSE;
00630       multigram_build_append(global_dfa, global_winfo, m);
00631     }
00632     global_modified = TRUE;
00633   } else {                      /* global not need changed by the deletion */
00634     /* append only new grammars */
00635     for(m=gramlist;m;m=m->next) {
00636       if (m->newbie) {
00637         if (global_dfa == NULL) {
00638           global_dfa = dfa_info_new();
00639           dfa_state_init(global_dfa);
00640           global_winfo = word_info_new();
00641           winfo_init(global_winfo);
00642         }
00643         if (m->newbie) m->newbie = FALSE;
00644         multigram_build_append(global_dfa, global_winfo, m);
00645         global_modified = TRUE;
00646       }
00647     }
00648   }
00649 
00650   /* process activate/deactivate hook */
00651   active_changed = multigram_exec_activate();
00652 
00653   if (global_modified) {                /* if global lexicon has changed */
00654     /* now global grammar info has been updated, */
00655     /* build up tree lexicon for recognition process */
00656     multigram_setup(global_dfa, global_winfo);
00657 #ifdef MDEBUG
00658     j_printf("- update completed\n");
00659 #endif
00660   }
00661   
00662   /* output grammar info when any change has been made */
00663   if (global_modified || active_changed) {
00664     print_all_gram();
00665     if (module_mode) {
00666       send_gram_info();
00667     }
00668   }
00669 
00670   return(TRUE);
00671 }
00672 
00673 /***********************************************************************/
00688 void
00689 multigram_read_file(char *dfa_file, char *dict_file)
00690 {
00691   WORD_INFO *new_winfo;
00692   DFA_INFO *new_dfa;
00693   char buf[MAXGRAMNAMELEN], *p, *q;
00694 
00695   j_printf("reading [%s] and [%s]...\n", dfa_file, dict_file);
00696   
00697   /* read dict*/
00698   new_winfo = word_info_new();
00699   if ( ! 
00700 #ifdef MONOTREE
00701       /* leave winfo monophone for 1st pass lexicon tree */
00702        init_voca(new_winfo, dict_file, hmminfo, TRUE, forcedict_flag)
00703 #else 
00704        init_voca(new_winfo, dict_file, hmminfo, FALSE, forcedict_flag)
00705 #endif
00706        ) {
00707     j_error("ERROR: failed to read dictionary, terminated\n");
00708   }
00709 #ifdef PASS1_IWCD
00710   if (triphone_check_flag && hmminfo->is_triphone) {
00711     /* go into interactive triphone HMM check mode */
00712     hmm_check(hmminfo, new_winfo);
00713   }
00714 #endif
00715   
00716   /* read dfa */
00717   new_dfa = dfa_info_new();
00718   init_dfa(new_dfa, dfa_file);
00719 
00720   /* extract name */
00721   p = &(dfa_file[0]);
00722   q = p;
00723   while(*p != '\0') {
00724     if (*p == '/') q = p + 1;
00725     p++;
00726   }
00727   p = q;
00728   while(*p != '\0' && *p != '.') {
00729     buf[p-q] = *p;
00730     p++;
00731   }
00732   buf[p-q] = '\0';
00733   
00734   /* register the new grammar to multi-gram tree */
00735   multigram_add(new_dfa, new_winfo, buf);
00736 
00737   j_printf("gram \"%s\" registered\n", buf);
00738 
00739 }
00740 
00755 void
00756 multigram_add_gramlist(char *dfafile, char *dictfile)
00757 {
00758   GRAMLIST *new;
00759 
00760   new = (GRAMLIST *)mymalloc(sizeof(GRAMLIST));
00761   new->dfafile = strcpy((char *)mymalloc(strlen(dfafile)+1), dfafile);
00762   new->dictfile = strcpy((char *)mymalloc(strlen(dictfile)+1), dictfile);
00763   new->next = gramlist_root;
00764   gramlist_root = new;
00765 }
00766 
00777 void
00778 multigram_remove_gramlist()
00779 {
00780   GRAMLIST *g;
00781   GRAMLIST *tmp;
00782 
00783   g = gramlist_root;
00784   while (g) {
00785     tmp = g->next;
00786     free(g->dfafile);
00787     free(g->dictfile);
00788     free(g);
00789     g = tmp;
00790   }
00791   gramlist_root = NULL;
00792 }
00793 
00804 void
00805 multigram_read_all_gramlist()
00806 {
00807   GRAMLIST *g;
00808 
00809   for(g = gramlist_root; g; g = g->next) {
00810     multigram_read_file(g->dfafile, g->dictfile);
00811   }
00812 }
00813 
00839 void
00840 multigram_add_prefix_list(char *prefix_list, char *cwd)
00841 {
00842   char buf[MAXGRAMNAMELEN], *p, *q;
00843   char buf2_d[MAXGRAMNAMELEN], *buf_d;
00844   char buf2_v[MAXGRAMNAMELEN], *buf_v;
00845 
00846   if (prefix_list == NULL) return;
00847   
00848   p = &(prefix_list[0]);
00849   
00850   while(*p != '\0') {
00851     /* extract one prefix to buf[] */
00852     q = p;
00853     while(*p != '\0' && *p != ',') {
00854       buf[p-q] = *p;
00855       p++;
00856     }
00857     buf[p-q] = '\0';
00858 
00859     /* register the new grammar to the grammar list to be read later */
00860     strcpy(buf2_d, buf);
00861     strcat(buf2_d, ".dfa");
00862     buf_d = filepath(buf2_d, cwd);
00863     checkpath(buf_d);
00864     strcpy(buf2_v, buf);
00865     strcat(buf2_v, ".dict");
00866     buf_v = filepath(buf2_v, cwd);
00867     checkpath(buf_v);
00868     multigram_add_gramlist(buf_d, buf_v);
00869 
00870     /* move to next */
00871     if (*p == ',') p++;
00872   }
00873 }
00874 
00901 void
00902 multigram_add_prefix_filelist(char *listfile)
00903 {
00904   FILE *fp;
00905   char buf[MAXGRAMNAMELEN], *p, *src_bgn, *src_end, *dst;
00906   char *cdir;
00907   char buf2_d[MAXGRAMNAMELEN], *buf_d;
00908   char buf2_v[MAXGRAMNAMELEN], *buf_v;
00909 
00910   if (listfile == NULL) return;
00911   if ((fp = fopen(listfile, "r")) == NULL) {
00912     j_printerr("failed to open %s\n", listfile);
00913     return;
00914   }
00915   while(getl_fp(buf, MAXGRAMNAMELEN, fp) != NULL) {
00916     /* remove comment */
00917     p = &(buf[0]);
00918     while(*p != '\0') {
00919       if (*p == '#') {
00920         *p = '\0';
00921         break;
00922       }
00923       p++;
00924     }
00925     if (buf[0] == '\0') continue;
00926     
00927     /* trim head/tail blanks */
00928     p = (&buf[0]);
00929     while(*p == ' ' || *p == '\t' || *p == '\r') p++;
00930     if (*p == '\0') continue;
00931     src_bgn = p;
00932     p = (&buf[strlen(buf) - 1]);
00933     while((*p == ' ' || *p == '\t' || *p == '\r') && p > src_bgn) p--;
00934     src_end = p;
00935     dst = (&buf[0]);
00936     p = src_bgn;
00937     while(p <= src_end) *dst++ = *p++;
00938     *dst = '\0';
00939     if (buf[0] == '\0') continue;
00940     
00941     /* register the new grammar to the grammar list to be read later */
00942     /* converting relative paths as relative to this list file */
00943     cdir = strcpy((char *)mymalloc(strlen(listfile)+1), listfile);
00944     get_dirname(cdir);
00945     strcpy(buf2_d, buf);
00946     strcat(buf2_d, ".dfa");
00947     buf_d = filepath(buf2_d, cdir);
00948     checkpath(buf_d);
00949     strcpy(buf2_v, buf);
00950     strcat(buf2_v, ".dict");
00951     buf_v = filepath(buf2_v, cdir);
00952     checkpath(buf_v);
00953     multigram_add_gramlist(buf_d, buf_v);
00954     free(cdir);
00955   }
00956   fclose(fp);
00957 }
00958 
00971 int
00972 multigram_get_all_num()
00973 {
00974   MULTIGRAM *m;
00975   int cnt;
00976   
00977   cnt = 0;
00978   for(m=gramlist;m;m=m->next) cnt++;
00979   return(cnt);
00980 }
00981 
00998 int
00999 multigram_get_gram_from_category(int category)
01000 {
01001   MULTIGRAM *m;
01002   int tb, te;
01003   for(m = gramlist; m; m = m->next) {
01004     if (m->newbie) continue;
01005     tb = m->cate_begin;
01006     te = tb + m->dfa->term_num;
01007     if (tb <= category && category < te) { /* found */
01008       return(m->id);
01009     }
01010   }
01011   return(-1);
01012 }
01013 
01014 #endif /* USE_DFA */

Generated on Tue Mar 28 16:01:38 2006 for Julius by  doxygen 1.4.2