Main Page | Modules | Data Structures | Directories | File List | Data Fields | Globals | Related Pages

voca_load_htkdict.c

Go to the documentation of this file.
00001 
00024 /*
00025  * Copyright (c) 1991-2006 Kawahara Lab., Kyoto University
00026  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00027  * Copyright (c) 2005-2006 Julius project team, Nagoya Institute of Technology, Nagoya Institute of Technology
00028  * All rights reserved
00029  */
00030 
00031 #include <sent/stddefs.h>
00032 #include <sent/vocabulary.h>
00033 #include <sent/htk_hmm.h>
00034 
00035 /* 
00036  * dictinary format:
00037  * 
00038  * 1 words per line.
00039  * 
00040  * fields: GrammarEntry [OutputString] phone1 phone2 ....
00041  * 
00042  *     GrammarEntry
00043  *                 (for N-gram)
00044  *                 word name in N-gram
00045  *                 (for DFA)
00046  *                 terminal symbol ID
00047  *
00048  *     [OutputString]
00049  *                 String to output when the word is recognized.
00050  *
00051  *     {OutputString}
00052  *                 String to output when the word is recognized.
00053  *                 Also specifies that this word is transparent
00054  * 
00055  *     phone1 phon2 ....
00056  *                 sequence of logical HMM name (normally phoneme)
00057  *                 to express the pronunciation
00058  */
00059 
00060 #define PHONEMELEN_STEP  10     
00061 static char buf[MAXLINELEN];    
00062 static char bufbak[MAXLINELEN]; 
00063 
00064 static char trbuf[3][20];       
00065 static char chbuf[30];       
00066 static char nophone[1];         
00067 static int  trp_l;              
00068 static int  trp;                
00069 static int  trp_r;              
00070 
00078 char *
00079 cycle_triphone(char *p)
00080 {
00081   int i;
00082   
00083   if (p == NULL) {              /* initialize */
00084     nophone[0]='\0';
00085     for(i=0;i<3;i++) trbuf[i][0] = '\0';
00086     trp_l = 0;
00087     trp   = 1;
00088     trp_r = 2;
00089     return NULL;
00090   }
00091 
00092   strcpy(trbuf[trp_r],p);
00093 
00094   chbuf[0]='\0';
00095   if (trbuf[trp_l][0] != '\0') {
00096     strcat(chbuf,trbuf[trp_l]);
00097     strcat(chbuf,HMM_LC_DLIM);
00098   }
00099   if (trbuf[trp][0] == '\0') {
00100     i = trp_l;
00101     trp_l = trp;
00102     trp = trp_r;
00103     trp_r = i;
00104     return NULL;
00105   }
00106   strcat(chbuf, trbuf[trp]);
00107   if (trbuf[trp_r][0] != '\0') {
00108     strcat(chbuf,HMM_RC_DLIM);
00109     strcat(chbuf,trbuf[trp_r]);
00110   }
00111   i = trp_l;
00112   trp_l = trp;
00113   trp = trp_r;
00114   trp_r = i;
00115 
00116   return(chbuf);
00117 }
00118 
00124 char *
00125 cycle_triphone_flush()
00126 {
00127   return(cycle_triphone(nophone));
00128 }
00129 
00136 static void
00137 add_to_error(WORD_INFO *winfo, char *name)
00138 {
00139   char *buf;
00140   char *match;
00141 
00142   buf = (char *)mymalloc(strlen(name) + 1);
00143   strcpy(buf, name);
00144   if (winfo->errph_root == NULL) {
00145     winfo->errph_root = aptree_make_root_node(buf);
00146   } else {
00147     match = aptree_search_data(buf, winfo->errph_root);
00148     if (!strmatch(match, buf)) {
00149       aptree_add_entry(buf, buf, match, &(winfo->errph_root));
00150     }
00151   }
00152 }
00153 
00159 static void
00160 callback_list_error(void *x)
00161 {
00162   char *name;
00163   name = x;
00164   j_printf("%s\n", name);
00165 }
00171 static void
00172 list_error(WORD_INFO *winfo)
00173 {
00174   j_printf("
00175   aptree_traverse_and_do(winfo->errph_root, callback_list_error);
00176   j_printf("
00177 }
00178 
00184 static void
00185 set_maxwn(WORD_INFO *winfo)
00186 {
00187   int w,p,n;
00188   int maxwn;
00189 
00190   maxwn = 0;
00191   for (w=0;w<winfo->num;w++) {
00192     n = 0;
00193     for (p=0;p<winfo->wlen[w];p++) {
00194       n += hmm_logical_state_num(winfo->wseq[w][p]) - 2;
00195     }
00196     if (maxwn < n) maxwn = n;
00197   }
00198   winfo->maxwn = maxwn;
00199 }
00200 
00206 static void
00207 set_maxwlen(WORD_INFO *winfo)
00208 {
00209   WORD_ID w;
00210   int maxwlen;
00211   maxwlen = 0;
00212   for(w=0;w<winfo->num;w++) {
00213     if (maxwlen < winfo->wlen[w]) maxwlen = winfo->wlen[w];
00214   }
00215   winfo->maxwlen = maxwlen;
00216 }
00217 
00228 boolean
00229 voca_load_htkdict(FILE *fp, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean ignore_tri_conv)
00230 {
00231   boolean ok_flag = TRUE;
00232   WORD_ID vnum;
00233   boolean do_conv = FALSE;
00234 
00235   if (hmminfo != NULL && hmminfo->is_triphone && (! ignore_tri_conv))
00236     do_conv = TRUE;
00237 
00238   winfo_init(winfo);
00239 
00240   vnum = 0;
00241   while (getl(buf, sizeof(buf), fp) != NULL) {
00242     if (vnum >= winfo->maxnum) winfo_expand(winfo);
00243     if (voca_load_htkdict_line(buf, vnum, winfo, hmminfo, do_conv, &ok_flag) == FALSE) break;
00244     vnum++;
00245   }
00246   winfo->num = vnum;
00247 
00248   if (winfo->errph_root != NULL) list_error(winfo);
00249 
00250   /* compute maxwn */
00251   set_maxwn(winfo);
00252   set_maxwlen(winfo);
00253 
00254   return(ok_flag);
00255 }
00256 
00257 
00268 boolean
00269 voca_load_htkdict_fd(int fd, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean ignore_tri_conv)
00270 {
00271   boolean ok_flag = TRUE;
00272   WORD_ID vnum;
00273   boolean do_conv = FALSE;
00274 
00275   if (hmminfo != NULL && hmminfo->is_triphone && (! ignore_tri_conv))
00276     do_conv = TRUE;
00277 
00278   winfo_init(winfo);
00279 
00280   vnum = 0;
00281   while(getl_fd(buf, MAXLINELEN, fd) != NULL) {
00282     if (vnum >= winfo->maxnum) winfo_expand(winfo);
00283     if (voca_load_htkdict_line(buf, vnum, winfo, hmminfo, do_conv, &ok_flag) == FALSE) break;
00284     vnum++;
00285   }
00286   winfo->num = vnum;
00287 
00288   if (winfo->errph_root != NULL) list_error(winfo);
00289 
00290   /* compute maxwn */
00291   set_maxwn(winfo);
00292   set_maxwlen(winfo);
00293 
00294   return(ok_flag);
00295 }
00296 
00307 boolean
00308 voca_load_htkdict_sd(int sd, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean ignore_tri_conv)
00309 {
00310   boolean ok_flag = TRUE;
00311   WORD_ID vnum;
00312   boolean do_conv = FALSE;
00313   
00314   if (hmminfo != NULL && hmminfo->is_triphone && (! ignore_tri_conv))
00315     do_conv = TRUE;
00316   
00317   winfo_init(winfo);
00318   
00319   vnum = 0;
00320   while(getl_sd(buf, MAXLINELEN, sd) != NULL) {
00321     if (vnum >= winfo->maxnum) winfo_expand(winfo);
00322     if (voca_load_htkdict_line(buf, vnum, winfo, hmminfo, do_conv, &ok_flag) == FALSE) break;
00323     vnum++;
00324   }
00325   winfo->num = vnum;
00326   
00327   if (winfo->errph_root != NULL) list_error(winfo);
00328 
00329   /* compute maxwn */
00330   set_maxwn(winfo);
00331   set_maxwlen(winfo);
00332   
00333   return(ok_flag);
00334 }
00335 
00346 boolean
00347 voca_append_htkdict(char *entry, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean ignore_tri_conv)
00348 {
00349   boolean ok_flag = TRUE;
00350   boolean do_conv = FALSE;
00351 
00352   if (hmminfo != NULL && hmminfo->is_triphone && (! ignore_tri_conv))
00353     do_conv = TRUE;
00354 
00355   if (winfo->num >= winfo->maxnum) winfo_expand(winfo);
00356   strcpy(buf, entry);           /* const buffer not allowed in voca_load_htkdict_line() */
00357   voca_load_htkdict_line(buf, winfo->num, winfo, hmminfo, do_conv, &ok_flag);
00358 
00359   if (ok_flag == TRUE) {
00360     winfo->num++;
00361     /* re-compute maxwn */
00362     set_maxwn(winfo);
00363     set_maxwlen(winfo);
00364   } else {
00365     if (winfo->errph_root != NULL) list_error(winfo);
00366   }
00367 
00368   return(ok_flag);
00369 }
00370 
00383 boolean
00384 voca_load_htkdict_line(char *buf, int vnum, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean do_conv, boolean *ok_flag)
00385 {
00386   char *ptmp, *lp = NULL, *p;
00387   static char cbuf[MAX_HMMNAME_LEN];
00388   int tmpmaxlen, len;
00389   HMM_Logical **tmpwseq;
00390   HMM_Logical *tmplg;
00391   boolean pok;
00392 
00393   if (strmatch(buf, "DICEND")) return FALSE;
00394 
00395   /* backup whole line for debug output */
00396   strcpy(bufbak, buf);
00397   
00398   /* GrammarEntry */
00399   if ((ptmp = mystrtok(buf, " \t\n")) == NULL) {
00400     j_printerr("line %d: corrupted data:\n> %s\n", vnum+1, bufbak);
00401     winfo->errnum++;
00402     *ok_flag = FALSE;
00403     return TRUE;
00404   }
00405   winfo->wname[vnum] = strcpy((char *)mymalloc(strlen(ptmp)+1), ptmp);
00406 
00407   /* just move pointer to next token */
00408   if ((ptmp = mystrtok_movetonext(NULL, " \t\n")) == NULL) {
00409     j_printerr("line %d: corrupted data:\n> %s\n", vnum+1, bufbak);
00410     winfo->errnum++;
00411     *ok_flag = FALSE;
00412     free(winfo->wname[vnum]);
00413     return TRUE;
00414   }
00415 #ifdef CLASS_NGRAM
00416   winfo->cprob[vnum] = 0.0;     /* prob = 1.0, logprob = 0.0 */
00417 #endif
00418   
00419   if (ptmp[0] == '@') {         /* class N-gram prob */
00420 #ifdef CLASS_NGRAM
00421     /* word probability within the class (for class N-gram) */
00422     /* format: classname @classprob wordname [output] phoneseq */
00423     /* classname equals to wname, and wordname will be omitted */
00424     /* format: @%f (log scale) */
00425     /* if "@" not found or "@0", it means class == word */
00426     if ((ptmp = mystrtok(NULL, " \t\n")) == NULL) {
00427       j_printerr("line %d: corrupted data:\n> %s\n", vnum+1, bufbak);
00428       winfo->errnum++;
00429       *ok_flag = FALSE;
00430       return TRUE;
00431     }
00432     if (ptmp[1] == '\0') {      /* space between '@' and figures */
00433       j_printerr("line %d: value after '@' missing, maybe wrong space?\n> %s\n", vnum+1, bufbak);
00434       winfo->errnum++;
00435       *ok_flag = FALSE;
00436       free(winfo->wname[vnum]);
00437       return TRUE;
00438     }
00439     winfo->cprob[vnum] = atof(&(ptmp[1]));
00440     if (winfo->cprob[vnum] != 0.0) winfo->cwnum++;
00441     /* read next word entry (just skip them) */
00442     if ((ptmp = mystrtok(NULL, " \t\n")) == NULL) {
00443       j_printerr("line %d: corrupted data:\n> %s\n", vnum+1,bufbak);
00444       winfo->errnum++;
00445       *ok_flag = FALSE;
00446       free(winfo->wname[vnum]);
00447       return TRUE;
00448     }
00449     /* move to the next word entry */
00450     if ((ptmp = mystrtok_movetonext(NULL, " \t\n")) == NULL) {
00451       j_printerr("line %d: corrupted data:\n> %s\n", vnum+1, bufbak);
00452       winfo->errnum++;
00453       *ok_flag = FALSE;
00454       free(winfo->wname[vnum]);
00455       return TRUE;
00456     }
00457 #else  /* ~CLASS_NGRAM */
00458     j_printerr("line %d: cannot handle in-class word probability\n> %s\n", vnum+1, ptmp, bufbak);
00459     winfo->errnum++;
00460     *ok_flag = FALSE;
00461     free(winfo->wname[vnum]);
00462     return TRUE;
00463 #endif /* CLASS_NGRAM */
00464   }
00465 
00466   /* OutputString */
00467   switch(ptmp[0]) {
00468   case '[':                     /* not transparent word */
00469     winfo->is_transparent[vnum] = FALSE;
00470     ptmp = mystrtok_quotation(NULL, " \t\n", '[', ']', 0);
00471     break;
00472   case '{':                     /* transparent word */
00473     winfo->is_transparent[vnum] = TRUE;
00474     ptmp = mystrtok_quotation(NULL, " \t\n", '{', '}', 0);
00475     break;
00476   default:
00477     j_printerr("line %d: missing output string??\n> %s\n", vnum+1, bufbak);
00478     winfo->errnum++;
00479     *ok_flag = FALSE;
00480     free(winfo->wname[vnum]);
00481     return TRUE;
00482   }
00483   if (ptmp == NULL) {
00484     j_printerr("line %d: corrupted data:\n> %s\n", vnum+1, bufbak);
00485     winfo->errnum++;
00486     *ok_flag = FALSE;
00487     free(winfo->wname[vnum]);
00488     return TRUE;
00489   }
00490   winfo->woutput[vnum] = strcpy((char *)mymalloc(strlen(ptmp)+1), ptmp);
00491     
00492   /* phoneme sequence */
00493   if (hmminfo == NULL) {
00494     /* don't read */
00495     winfo->wseq[vnum] = NULL;
00496     winfo->wlen[vnum] = 0;
00497   } else {
00498     tmpmaxlen = PHONEMELEN_STEP;
00499     tmpwseq = (HMM_Logical **)mymalloc(sizeof(HMM_Logical *) * tmpmaxlen);
00500     len = 0;
00501       
00502     if (do_conv) {
00503       /* convert phoneme to triphone expression (word-internal) */
00504       cycle_triphone(NULL);
00505       if ((lp = mystrtok(NULL, " \t\n")) == NULL) {
00506         j_printerr("line %d: word %s has no phoneme:\n> %s\n", vnum+1, winfo->wname[vnum], bufbak);
00507         winfo->errnum++;
00508         *ok_flag = FALSE;
00509         free(winfo->wname[vnum]);
00510         free(winfo->woutput[vnum]);
00511         return TRUE;
00512       }
00513       cycle_triphone(lp);
00514     }
00515 
00516     pok = TRUE;
00517     for (;;) {
00518       if (do_conv) {
00519 /*      if (lp != NULL) j_printf(" %d%s",len,lp);*/
00520         if (lp != NULL) lp = mystrtok(NULL, " \t\n");
00521         if (lp != NULL) p = cycle_triphone(lp);
00522         else p = cycle_triphone_flush();
00523       } else {
00524         p = mystrtok(NULL, " \t\n");
00525       }
00526       if (p == NULL) break;
00527 
00528       /* both defined/pseudo phone is allowed */
00529       tmplg = htk_hmmdata_lookup_logical(hmminfo, p);
00530       if (tmplg == NULL) {
00531         /* not found */
00532         if (do_conv) {
00533           /* both defined or pseudo phone are not found */
00534           if (len == 0 && lp == NULL) {
00535             j_printerr("line %d: triphone \"*-%s+*\" or monophone \"%s\" not found\n", vnum+1, p, p);
00536             snprintf(cbuf,MAX_HMMNAME_LEN,"*-%s+* or monophone %s", p, p);
00537           } else if (len == 0) {
00538             j_printerr("line %d: triphone \"*-%s\" or biphone \"%s\" not found\n", vnum+1, p, p);
00539             snprintf(cbuf,MAX_HMMNAME_LEN,"*-%s or biphone %s", p, p);
00540           } else if (lp == NULL) {
00541             j_printerr("line %d: triphone \"%s+*\" or biphone \"%s\" not found\n", vnum+1, p, p);
00542             snprintf(cbuf,MAX_HMMNAME_LEN,"%s+* or biphone %s", p, p);
00543           } else {
00544             j_printerr("line %d: triphone \"%s\" not found\n", vnum+1, p);
00545             snprintf(cbuf,MAX_HMMNAME_LEN,"%s", p);
00546           }
00547         } else {
00548           j_printerr("line %d: phone \"%s\" not found\n", vnum+1, p);
00549           snprintf(cbuf, MAX_HMMNAME_LEN, "%s", p);
00550         }
00551         add_to_error(winfo, cbuf);
00552         pok = FALSE;
00553       } else {
00554         /* found */
00555         if (len >= tmpmaxlen) {
00556           /* expand wseq area by PHONEMELEN_STEP */
00557           tmpmaxlen += PHONEMELEN_STEP;
00558           tmpwseq = (HMM_Logical **)myrealloc(tmpwseq, sizeof(HMM_Logical *) * tmpmaxlen);
00559         }
00560         tmpwseq[len] = tmplg;
00561       }
00562       len++;
00563     }
00564     if (!pok) {                 /* error in phoneme */
00565       j_printerr("> %s\n", bufbak);
00566       winfo->errnum++;
00567       *ok_flag = FALSE;
00568       free(winfo->wname[vnum]);
00569       free(winfo->woutput[vnum]);
00570       return TRUE;
00571     }
00572     if (len == 0) {
00573       j_printerr("line %d: no phone specified:\n> %s\n", vnum+1, bufbak);
00574       winfo->errnum++;
00575       *ok_flag = FALSE;
00576       free(winfo->wname[vnum]);
00577       free(winfo->woutput[vnum]);
00578       return TRUE;
00579     }
00580     winfo->wseq[vnum] = tmpwseq;
00581     winfo->wlen[vnum] = len;
00582   }
00583   
00584   return(TRUE);
00585 }
00586 
00598 boolean
00599 voca_mono2tri(WORD_INFO *winfo, HTK_HMM_INFO *hmminfo)
00600 {
00601   WORD_ID w;
00602   int ph;
00603   char *p;
00604   HMM_Logical *tmplg;
00605   boolean ok_flag = TRUE;
00606   
00607   for (w=0;w<winfo->num;w++) {
00608     cycle_triphone(NULL);
00609     cycle_triphone(winfo->wseq[w][0]->name);
00610 
00611     for (ph = 0; ph < winfo->wlen[w] ; ph++) {
00612       if (ph == winfo->wlen[w] - 1) {
00613         p = cycle_triphone_flush();
00614       } else {
00615         p = cycle_triphone(winfo->wseq[w][ph + 1]->name);
00616       }
00617       if ((tmplg = htk_hmmdata_lookup_logical(hmminfo, p)) == NULL) {
00618         j_printerr("voca_mono2tri: word \"%s[%s]\"(id=%d): HMM \"%s\" not found\n", winfo->wname[w], winfo->woutput[w], w, p);
00619         ok_flag = FALSE;
00620         continue;
00621       }
00622       winfo->wseq[w][ph] = tmplg;
00623     }
00624   }
00625   return (ok_flag);
00626 }
00627 
00639 void
00640 voca_append(WORD_INFO *dstinfo, WORD_INFO *srcinfo, int coffset, int woffset)
00641 {
00642   WORD_ID n, w;
00643   int i;
00644 
00645   n = woffset;
00646   for(w=0;w<srcinfo->num;w++) {
00647     /* copy data */
00648     dstinfo->wlen[n] = srcinfo->wlen[w];
00649     dstinfo->wname[n] = strcpy((char *)mymalloc(strlen(srcinfo->wname[w])+1), srcinfo->wname[w]);
00650     dstinfo->woutput[n] = strcpy((char *)mymalloc(strlen(srcinfo->woutput[w])+1), srcinfo->woutput[w]);
00651     dstinfo->wseq[n] = (HMM_Logical **)mymalloc(sizeof(HMM_Logical *) * srcinfo->wlen[w]);
00652     for(i=0;i<srcinfo->wlen[w];i++) {
00653       dstinfo->wseq[n][i] = srcinfo->wseq[w][i];
00654     }
00655     dstinfo->is_transparent[n] = srcinfo->is_transparent[w];
00656     /* offset category ID by coffset */
00657     dstinfo->wton[n] = srcinfo->wton[w] + coffset;
00658     
00659     n++;
00660     if (n >= dstinfo->maxnum) winfo_expand(dstinfo);
00661   }
00662   dstinfo->num = n;
00663 
00664   /* compute maxwn */
00665   set_maxwn(dstinfo);
00666   set_maxwlen(dstinfo);
00667 }
00668 

Generated on Tue Mar 28 16:17:43 2006 for Julius by  doxygen 1.4.2