libsent/src/voca/voca_load_htkdict.c

Go to the documentation of this file.
00001 
00024 /*
00025  * Copyright (c) 1991-2006 Kawahara Lab., Kyoto University
00026  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00027  * Copyright (c) 2005-2006 Julius project team, Nagoya Institute of Technology
00028  * All rights reserved
00029  */
00030 
00031 #include <sent/stddefs.h>
00032 #include <sent/vocabulary.h>
00033 #include <sent/htk_hmm.h>
00034 
00035 /* 
00036  * dictinary format:
00037  * 
00038  * 1 words per line.
00039  * 
00040  * fields: GrammarEntry [OutputString] phone1 phone2 ....
00041  * 
00042  *     GrammarEntry
00043  *                 (for N-gram)
00044  *                 word name in N-gram
00045  *                 (for DFA)
00046  *                 terminal symbol ID
00047  *
00048  *     [OutputString]
00049  *                 String to output when the word is recognized.
00050  *
00051  *     {OutputString}
00052  *                 String to output when the word is recognized.
00053  *                 Also specifies that this word is transparent
00054  * 
00055  *     phone1 phon2 ....
00056  *                 sequence of logical HMM name (normally phoneme)
00057  *                 to express the pronunciation
00058  */
00059 
00060 #define PHONEMELEN_STEP  30     
00061 static char buf[MAXLINELEN];    
00062 static char bufbak[MAXLINELEN]; 
00063 
00064 static char trbuf[3][20];       
00065 static char chbuf[30];       
00066 static char nophone[1];         
00067 static int  trp_l;              
00068 static int  trp;                
00069 static int  trp_r;              
00070 
00078 char *
00079 cycle_triphone(char *p)
00080 {
00081   int i;
00082   
00083   if (p == NULL) {              /* initialize */
00084     nophone[0]='\0';
00085     for(i=0;i<3;i++) trbuf[i][0] = '\0';
00086     trp_l = 0;
00087     trp   = 1;
00088     trp_r = 2;
00089     return NULL;
00090   }
00091 
00092   strcpy(trbuf[trp_r],p);
00093 
00094   chbuf[0]='\0';
00095   if (trbuf[trp_l][0] != '\0') {
00096     strcat(chbuf,trbuf[trp_l]);
00097     strcat(chbuf,HMM_LC_DLIM);
00098   }
00099   if (trbuf[trp][0] == '\0') {
00100     i = trp_l;
00101     trp_l = trp;
00102     trp = trp_r;
00103     trp_r = i;
00104     return NULL;
00105   }
00106   strcat(chbuf, trbuf[trp]);
00107   if (trbuf[trp_r][0] != '\0') {
00108     strcat(chbuf,HMM_RC_DLIM);
00109     strcat(chbuf,trbuf[trp_r]);
00110   }
00111   i = trp_l;
00112   trp_l = trp;
00113   trp = trp_r;
00114   trp_r = i;
00115 
00116   return(chbuf);
00117 }
00118 
00124 char *
00125 cycle_triphone_flush()
00126 {
00127   return(cycle_triphone(nophone));
00128 }
00129 
00136 static void
00137 add_to_error(WORD_INFO *winfo, char *name)
00138 {
00139   char *buf;
00140   char *match;
00141 
00142   buf = (char *)mymalloc(strlen(name) + 1);
00143   strcpy(buf, name);
00144   if (winfo->errph_root == NULL) {
00145     winfo->errph_root = aptree_make_root_node(buf);
00146   } else {
00147     match = aptree_search_data(buf, winfo->errph_root);
00148     if (!strmatch(match, buf)) {
00149       aptree_add_entry(buf, buf, match, &(winfo->errph_root));
00150     }
00151   }
00152 }
00153 
00159 static void
00160 callback_list_error(void *x)
00161 {
00162   char *name;
00163   name = x;
00164   j_printf("%s\n", name);
00165 }
00171 static void
00172 list_error(WORD_INFO *winfo)
00173 {
00174   j_printf("////// Missing phones:\n");
00175   aptree_traverse_and_do(winfo->errph_root, callback_list_error);
00176   j_printf("//////////////////////\n");
00177 }
00178 
00184 static void
00185 set_maxwn(WORD_INFO *winfo)
00186 {
00187   int w,p,n;
00188   int maxwn;
00189 
00190   maxwn = 0;
00191   for (w=0;w<winfo->num;w++) {
00192     n = 0;
00193     for (p=0;p<winfo->wlen[w];p++) {
00194       n += hmm_logical_state_num(winfo->wseq[w][p]) - 2;
00195     }
00196     if (maxwn < n) maxwn = n;
00197   }
00198   winfo->maxwn = maxwn;
00199 }
00200 
00206 static void
00207 set_maxwlen(WORD_INFO *winfo)
00208 {
00209   WORD_ID w;
00210   int maxwlen;
00211   maxwlen = 0;
00212   for(w=0;w<winfo->num;w++) {
00213     if (maxwlen < winfo->wlen[w]) maxwlen = winfo->wlen[w];
00214   }
00215   winfo->maxwlen = maxwlen;
00216 }
00217 
00228 boolean
00229 voca_load_htkdict(FILE *fp, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean ignore_tri_conv)
00230 {
00231   boolean ok_flag = TRUE;
00232   WORD_ID vnum;
00233   boolean do_conv = FALSE;
00234 
00235   if (hmminfo != NULL && hmminfo->is_triphone && (! ignore_tri_conv))
00236     do_conv = TRUE;
00237 
00238   winfo_init(winfo);
00239 
00240   vnum = 0;
00241   while (getl(buf, sizeof(buf), fp) != NULL) {
00242     if (vnum >= winfo->maxnum) winfo_expand(winfo);
00243     if (voca_load_htkdict_line(buf, vnum, winfo, hmminfo, do_conv, &ok_flag) == FALSE) break;
00244     vnum++;
00245   }
00246   winfo->num = vnum;
00247 
00248   if (winfo->errph_root != NULL) list_error(winfo);
00249 
00250   /* compute maxwn */
00251   set_maxwn(winfo);
00252   set_maxwlen(winfo);
00253 
00254   return(ok_flag);
00255 }
00256 
00257 
00268 boolean
00269 voca_load_htkdict_fd(int fd, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean ignore_tri_conv)
00270 {
00271   boolean ok_flag = TRUE;
00272   WORD_ID vnum;
00273   boolean do_conv = FALSE;
00274 
00275   if (hmminfo != NULL && hmminfo->is_triphone && (! ignore_tri_conv))
00276     do_conv = TRUE;
00277 
00278   winfo_init(winfo);
00279 
00280   vnum = 0;
00281   while(getl_fd(buf, MAXLINELEN, fd) != NULL) {
00282     if (vnum >= winfo->maxnum) winfo_expand(winfo);
00283     if (voca_load_htkdict_line(buf, vnum, winfo, hmminfo, do_conv, &ok_flag) == FALSE) break;
00284     vnum++;
00285   }
00286   winfo->num = vnum;
00287 
00288   if (winfo->errph_root != NULL) list_error(winfo);
00289 
00290   /* compute maxwn */
00291   set_maxwn(winfo);
00292   set_maxwlen(winfo);
00293 
00294   return(ok_flag);
00295 }
00296 
00307 boolean
00308 voca_load_htkdict_sd(int sd, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean ignore_tri_conv)
00309 {
00310   boolean ok_flag = TRUE;
00311   WORD_ID vnum;
00312   boolean do_conv = FALSE;
00313   
00314   if (hmminfo != NULL && hmminfo->is_triphone && (! ignore_tri_conv))
00315     do_conv = TRUE;
00316   
00317   winfo_init(winfo);
00318   
00319   vnum = 0;
00320   while(getl_sd(buf, MAXLINELEN, sd) != NULL) {
00321     if (vnum >= winfo->maxnum) winfo_expand(winfo);
00322     if (voca_load_htkdict_line(buf, vnum, winfo, hmminfo, do_conv, &ok_flag) == FALSE) break;
00323     vnum++;
00324   }
00325   winfo->num = vnum;
00326   
00327   if (winfo->errph_root != NULL) list_error(winfo);
00328 
00329   /* compute maxwn */
00330   set_maxwn(winfo);
00331   set_maxwlen(winfo);
00332   
00333   return(ok_flag);
00334 }
00335 
00346 boolean
00347 voca_append_htkdict(char *entry, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean ignore_tri_conv)
00348 {
00349   boolean ok_flag = TRUE;
00350   boolean do_conv = FALSE;
00351 
00352   if (hmminfo != NULL && hmminfo->is_triphone && (! ignore_tri_conv))
00353     do_conv = TRUE;
00354 
00355   if (winfo->num >= winfo->maxnum) winfo_expand(winfo);
00356   strcpy(buf, entry);           /* const buffer not allowed in voca_load_htkdict_line() */
00357   voca_load_htkdict_line(buf, winfo->num, winfo, hmminfo, do_conv, &ok_flag);
00358 
00359   if (ok_flag == TRUE) {
00360     winfo->num++;
00361     /* re-compute maxwn */
00362     set_maxwn(winfo);
00363     set_maxwlen(winfo);
00364   } else {
00365     if (winfo->errph_root != NULL) list_error(winfo);
00366   }
00367 
00368   return(ok_flag);
00369 }
00370 
00383 boolean
00384 voca_load_htkdict_line(char *buf, int vnum, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean do_conv, boolean *ok_flag)
00385 {
00386   char *ptmp, *lp = NULL, *p;
00387   static char cbuf[MAX_HMMNAME_LEN];
00388   static HMM_Logical **tmpwseq = NULL;
00389   static int tmpmaxlen;
00390   int len;
00391   HMM_Logical *tmplg;
00392   boolean pok;
00393 
00394   if (strmatch(buf, "DICEND")) return FALSE;
00395 
00396   /* allocate temporal work area for the first call */
00397   if (tmpwseq == NULL) {
00398     tmpmaxlen = PHONEMELEN_STEP;
00399     tmpwseq = (HMM_Logical **)mymalloc(sizeof(HMM_Logical *) * tmpmaxlen);
00400   }
00401 
00402   /* backup whole line for debug output */
00403   strcpy(bufbak, buf);
00404   
00405   /* GrammarEntry */
00406   if ((ptmp = mystrtok(buf, " \t\n")) == NULL) {
00407     j_printerr("line %d: corrupted data:\n> %s\n", vnum+1, bufbak);
00408     winfo->errnum++;
00409     *ok_flag = FALSE;
00410     return TRUE;
00411   }
00412   winfo->wname[vnum] = strcpy((char *)mybmalloc2(strlen(ptmp)+1, &(winfo->mroot)), ptmp);
00413 
00414   /* just move pointer to next token */
00415   if ((ptmp = mystrtok_movetonext(NULL, " \t\n")) == NULL) {
00416     j_printerr("line %d: corrupted data:\n> %s\n", vnum+1, bufbak);
00417     winfo->errnum++;
00418     *ok_flag = FALSE;
00419     return TRUE;
00420   }
00421 #ifdef CLASS_NGRAM
00422   winfo->cprob[vnum] = 0.0;     /* prob = 1.0, logprob = 0.0 */
00423 #endif
00424   
00425   if (ptmp[0] == '@') {         /* class N-gram prob */
00426 #ifdef CLASS_NGRAM
00427     /* word probability within the class (for class N-gram) */
00428     /* format: classname @classprob wordname [output] phoneseq */
00429     /* classname equals to wname, and wordname will be omitted */
00430     /* format: @%f (log scale) */
00431     /* if "@" not found or "@0", it means class == word */
00432     if ((ptmp = mystrtok(NULL, " \t\n")) == NULL) {
00433       j_printerr("line %d: corrupted data:\n> %s\n", vnum+1, bufbak);
00434       winfo->errnum++;
00435       *ok_flag = FALSE;
00436       return TRUE;
00437     }
00438     if (ptmp[1] == '\0') {      /* space between '@' and figures */
00439       j_printerr("line %d: value after '@' missing, maybe wrong space?\n> %s\n", vnum+1, bufbak);
00440       winfo->errnum++;
00441       *ok_flag = FALSE;
00442       return TRUE;
00443     }
00444     winfo->cprob[vnum] = atof(&(ptmp[1]));
00445     if (winfo->cprob[vnum] != 0.0) winfo->cwnum++;
00446     /* read next word entry (just skip them) */
00447     if ((ptmp = mystrtok(NULL, " \t\n")) == NULL) {
00448       j_printerr("line %d: corrupted data:\n> %s\n", vnum+1,bufbak);
00449       winfo->errnum++;
00450       *ok_flag = FALSE;
00451       return TRUE;
00452     }
00453     /* move to the next word entry */
00454     if ((ptmp = mystrtok_movetonext(NULL, " \t\n")) == NULL) {
00455       j_printerr("line %d: corrupted data:\n> %s\n", vnum+1, bufbak);
00456       winfo->errnum++;
00457       *ok_flag = FALSE;
00458       return TRUE;
00459     }
00460 #else  /* ~CLASS_NGRAM */
00461     j_printerr("line %d: cannot handle in-class word probability\n> %s\n", vnum+1, ptmp, bufbak);
00462     winfo->errnum++;
00463     *ok_flag = FALSE;
00464     return TRUE;
00465 #endif /* CLASS_NGRAM */
00466   }
00467 
00468   /* OutputString */
00469   switch(ptmp[0]) {
00470   case '[':                     /* not transparent word */
00471     winfo->is_transparent[vnum] = FALSE;
00472     ptmp = mystrtok_quotation(NULL, " \t\n", '[', ']', 0);
00473     break;
00474   case '{':                     /* transparent word */
00475     winfo->is_transparent[vnum] = TRUE;
00476     ptmp = mystrtok_quotation(NULL, " \t\n", '{', '}', 0);
00477     break;
00478   default:
00479     j_printerr("line %d: missing output string??\n> %s\n", vnum+1, bufbak);
00480     winfo->errnum++;
00481     *ok_flag = FALSE;
00482     return TRUE;
00483   }
00484   if (ptmp == NULL) {
00485     j_printerr("line %d: corrupted data:\n> %s\n", vnum+1, bufbak);
00486     winfo->errnum++;
00487     *ok_flag = FALSE;
00488     return TRUE;
00489   }
00490   winfo->woutput[vnum] = strcpy((char *)mybmalloc2(strlen(ptmp)+1, &(winfo->mroot)), ptmp);
00491     
00492   /* phoneme sequence */
00493   if (hmminfo == NULL) {
00494     /* don't read */
00495     winfo->wseq[vnum] = NULL;
00496     winfo->wlen[vnum] = 0;
00497   } else {
00498 
00499     /* store converted phone sequence to temporal bufffer */
00500     len = 0;
00501       
00502     if (do_conv) {
00503       /* convert phoneme to triphone expression (word-internal) */
00504       cycle_triphone(NULL);
00505       if ((lp = mystrtok(NULL, " \t\n")) == NULL) {
00506         j_printerr("line %d: word %s has no phoneme:\n> %s\n", vnum+1, winfo->wname[vnum], bufbak);
00507         winfo->errnum++;
00508         *ok_flag = FALSE;
00509         return TRUE;
00510       }
00511       cycle_triphone(lp);
00512     }
00513 
00514     pok = TRUE;
00515     for (;;) {
00516       if (do_conv) {
00517 /*      if (lp != NULL) j_printf(" %d%s",len,lp);*/
00518         if (lp != NULL) lp = mystrtok(NULL, " \t\n");
00519         if (lp != NULL) p = cycle_triphone(lp);
00520         else p = cycle_triphone_flush();
00521       } else {
00522         p = mystrtok(NULL, " \t\n");
00523       }
00524       if (p == NULL) break;
00525 
00526       /* both defined/pseudo phone is allowed */
00527       tmplg = htk_hmmdata_lookup_logical(hmminfo, p);
00528       if (tmplg == NULL) {
00529         /* not found */
00530         if (do_conv) {
00531           /* both defined or pseudo phone are not found */
00532           if (len == 0 && lp == NULL) {
00533             j_printerr("line %d: triphone \"*-%s+*\" or monophone \"%s\" not found\n", vnum+1, p, p);
00534             snprintf(cbuf,MAX_HMMNAME_LEN,"*-%s+* or monophone %s", p, p);
00535           } else if (len == 0) {
00536             j_printerr("line %d: triphone \"*-%s\" or biphone \"%s\" not found\n", vnum+1, p, p);
00537             snprintf(cbuf,MAX_HMMNAME_LEN,"*-%s or biphone %s", p, p);
00538           } else if (lp == NULL) {
00539             j_printerr("line %d: triphone \"%s+*\" or biphone \"%s\" not found\n", vnum+1, p, p);
00540             snprintf(cbuf,MAX_HMMNAME_LEN,"%s+* or biphone %s", p, p);
00541           } else {
00542             j_printerr("line %d: triphone \"%s\" not found\n", vnum+1, p);
00543             snprintf(cbuf,MAX_HMMNAME_LEN,"%s", p);
00544           }
00545         } else {
00546           j_printerr("line %d: phone \"%s\" not found\n", vnum+1, p);
00547           snprintf(cbuf, MAX_HMMNAME_LEN, "%s", p);
00548         }
00549         add_to_error(winfo, cbuf);
00550         pok = FALSE;
00551       } else {
00552         /* found */
00553         if (len >= tmpmaxlen) {
00554           /* expand wseq area by PHONEMELEN_STEP */
00555           tmpmaxlen += PHONEMELEN_STEP;
00556           tmpwseq = (HMM_Logical **)myrealloc(tmpwseq, sizeof(HMM_Logical *) * tmpmaxlen);
00557         }
00558         /* store to temporal buffer */
00559         tmpwseq[len] = tmplg;
00560       }
00561       len++;
00562     }
00563     if (!pok) {                 /* error in phoneme */
00564       j_printerr("> %s\n", bufbak);
00565       winfo->errnum++;
00566       *ok_flag = FALSE;
00567       return TRUE;
00568     }
00569     if (len == 0) {
00570       j_printerr("line %d: no phone specified:\n> %s\n", vnum+1, bufbak);
00571       winfo->errnum++;
00572       *ok_flag = FALSE;
00573       return TRUE;
00574     }
00575     /* store to winfo */
00576     winfo->wseq[vnum] = (HMM_Logical **)mybmalloc2(sizeof(HMM_Logical *) * len, &(winfo->mroot));
00577     memcpy(winfo->wseq[vnum], tmpwseq, sizeof(HMM_Logical *) * len);
00578     winfo->wlen[vnum] = len;
00579   }
00580   
00581   return(TRUE);
00582 }
00583 
00595 boolean
00596 voca_mono2tri(WORD_INFO *winfo, HTK_HMM_INFO *hmminfo)
00597 {
00598   WORD_ID w;
00599   int ph;
00600   char *p;
00601   HMM_Logical *tmplg;
00602   boolean ok_flag = TRUE;
00603   
00604   for (w=0;w<winfo->num;w++) {
00605     cycle_triphone(NULL);
00606     cycle_triphone(winfo->wseq[w][0]->name);
00607 
00608     for (ph = 0; ph < winfo->wlen[w] ; ph++) {
00609       if (ph == winfo->wlen[w] - 1) {
00610         p = cycle_triphone_flush();
00611       } else {
00612         p = cycle_triphone(winfo->wseq[w][ph + 1]->name);
00613       }
00614       if ((tmplg = htk_hmmdata_lookup_logical(hmminfo, p)) == NULL) {
00615         j_printerr("voca_mono2tri: word \"%s[%s]\"(id=%d): HMM \"%s\" not found\n", winfo->wname[w], winfo->woutput[w], w, p);
00616         ok_flag = FALSE;
00617         continue;
00618       }
00619       winfo->wseq[w][ph] = tmplg;
00620     }
00621   }
00622   return (ok_flag);
00623 }
00624 
00636 void
00637 voca_append(WORD_INFO *dstinfo, WORD_INFO *srcinfo, int coffset, int woffset)
00638 {
00639   WORD_ID n, w;
00640   int i;
00641 
00642   n = woffset;
00643   for(w=0;w<srcinfo->num;w++) {
00644     /* copy data */
00645     dstinfo->wlen[n] = srcinfo->wlen[w];
00646     dstinfo->wname[n] = strcpy((char *)mymalloc(strlen(srcinfo->wname[w])+1), srcinfo->wname[w]);
00647     dstinfo->woutput[n] = strcpy((char *)mymalloc(strlen(srcinfo->woutput[w])+1), srcinfo->woutput[w]);
00648     dstinfo->wseq[n] = (HMM_Logical **)mymalloc(sizeof(HMM_Logical *) * srcinfo->wlen[w]);
00649     for(i=0;i<srcinfo->wlen[w];i++) {
00650       dstinfo->wseq[n][i] = srcinfo->wseq[w][i];
00651     }
00652     dstinfo->is_transparent[n] = srcinfo->is_transparent[w];
00653     /* offset category ID by coffset */
00654     dstinfo->wton[n] = srcinfo->wton[w] + coffset;
00655     
00656     n++;
00657     if (n >= dstinfo->maxnum) winfo_expand(dstinfo);
00658   }
00659   dstinfo->num = n;
00660 
00661   /* compute maxwn */
00662   set_maxwn(dstinfo);
00663   set_maxwlen(dstinfo);
00664 }
00665 

Generated on Tue Dec 26 16:16:33 2006 for Julius by  doxygen 1.5.0