libsent/src/voca/voca_load_htkdict.c

Go to the documentation of this file.
00001 
00025 /*
00026  * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University
00027  * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
00028  * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
00029  * All rights reserved
00030  */
00031 
00032 #include <sent/stddefs.h>
00033 #include <sent/vocabulary.h>
00034 #include <sent/htk_hmm.h>
00035 
00036 /* 
00037  * dictinary format:
00038  * 
00039  * 1 words per line.
00040  * 
00041  * fields: GrammarEntry [OutputString] phone1 phone2 ....
00042  * 
00043  *     GrammarEntry
00044  *                 (for N-gram)
00045  *                 word name in N-gram
00046  *                 (for DFA)
00047  *                 terminal symbol ID
00048  *
00049  *     [OutputString]
00050  *                 String to output when the word is recognized.
00051  *
00052  *     {OutputString}
00053  *                 String to output when the word is recognized.
00054  *                 Also specifies that this word is transparent
00055  * 
00056  *     phone1 phon2 ....
00057  *                 sequence of logical HMM name (normally phoneme)
00058  *                 to express the pronunciation
00059  */
00060 
00061 #define PHONEMELEN_STEP  30     
00062 static char buf[MAXLINELEN];    
00063 static char bufbak[MAXLINELEN]; 
00064 
00065 static char trbuf[3][20];       
00066 static char chbuf[30];       
00067 static char nophone[1];         
00068 static int  trp_l;              
00069 static int  trp;                
00070 static int  trp_r;              
00071 
00079 char *
00080 cycle_triphone(char *p)
00081 {
00082   int i;
00083   
00084   if (p == NULL) {              /* initialize */
00085     nophone[0]='\0';
00086     for(i=0;i<3;i++) trbuf[i][0] = '\0';
00087     trp_l = 0;
00088     trp   = 1;
00089     trp_r = 2;
00090     return NULL;
00091   }
00092 
00093   strcpy(trbuf[trp_r],p);
00094 
00095   chbuf[0]='\0';
00096   if (trbuf[trp_l][0] != '\0') {
00097     strcat(chbuf,trbuf[trp_l]);
00098     strcat(chbuf,HMM_LC_DLIM);
00099   }
00100   if (trbuf[trp][0] == '\0') {
00101     i = trp_l;
00102     trp_l = trp;
00103     trp = trp_r;
00104     trp_r = i;
00105     return NULL;
00106   }
00107   strcat(chbuf, trbuf[trp]);
00108   if (trbuf[trp_r][0] != '\0') {
00109     strcat(chbuf,HMM_RC_DLIM);
00110     strcat(chbuf,trbuf[trp_r]);
00111   }
00112   i = trp_l;
00113   trp_l = trp;
00114   trp = trp_r;
00115   trp_r = i;
00116 
00117   return(chbuf);
00118 }
00119 
00125 char *
00126 cycle_triphone_flush()
00127 {
00128   return(cycle_triphone(nophone));
00129 }
00130 
00137 static void
00138 add_to_error(WORD_INFO *winfo, char *name)
00139 {
00140   char *buf;
00141   char *match;
00142 
00143   buf = (char *)mymalloc(strlen(name) + 1);
00144   strcpy(buf, name);
00145   if (winfo->errph_root == NULL) {
00146     winfo->errph_root = aptree_make_root_node(buf);
00147   } else {
00148     match = aptree_search_data(buf, winfo->errph_root);
00149     if (match == NULL || !strmatch(match, buf)) {
00150       aptree_add_entry(buf, buf, match, &(winfo->errph_root));
00151     }
00152   }
00153 }
00154 
00160 static void
00161 callback_list_error(void *x)
00162 {
00163   char *name;
00164   name = x;
00165   jlog("Error: voca_load_htkdict: %s\n", name);
00166 }
00172 static void
00173 list_error(WORD_INFO *winfo)
00174 {
00175   jlog("Error: voca_load_htkdict: begin missing phones\n");
00176   aptree_traverse_and_do(winfo->errph_root, callback_list_error);
00177   jlog("Error: voca_load_htkdict: end missing phones\n");
00178 }
00179 
00185 void
00186 voca_set_stats(WORD_INFO *winfo)
00187 {
00188   int w,p,n;
00189   int maxwn;
00190   int maxwlen;
00191   int states;
00192   int models;
00193   int trnum;
00194 
00195   maxwn = 0;
00196   maxwlen = 0;
00197   states = 0;
00198   models = 0;
00199   trnum = 0;
00200   for (w=0;w<winfo->num;w++) {
00201     models += winfo->wlen[w];
00202     if (maxwlen < winfo->wlen[w]) maxwlen = winfo->wlen[w];
00203     n = 0;
00204     for (p=0;p<winfo->wlen[w];p++) {
00205       n += hmm_logical_state_num(winfo->wseq[w][p]) - 2;
00206     }
00207     if (maxwn < n) maxwn = n;
00208     states += n;
00209     if (winfo->is_transparent[w]) trnum++;
00210   }
00211   winfo->maxwn = maxwn;
00212   winfo->maxwlen = maxwlen;
00213   winfo->totalstatenum = states;
00214   winfo->totalmodelnum = models;
00215   winfo->totaltransnum = trnum;
00216 }
00217 
00227 void
00228 voca_load_start(WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean ignore_tri_conv)
00229 {
00230   winfo->ok_flag = TRUE;
00231   winfo->linenum = 0;
00232   if (hmminfo != NULL && hmminfo->is_triphone && (! ignore_tri_conv)) {
00233     winfo->do_conv = TRUE;
00234   } else {
00235     winfo->do_conv = FALSE;
00236   }
00237   winfo_init(winfo);
00238   winfo->num = 0;
00239 }
00240 
00254 boolean
00255 voca_load_line(char *buf, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo)
00256 {
00257   WORD_ID vnum;
00258 
00259   winfo->linenum++;
00260   vnum = winfo->num;
00261   if (vnum >= winfo->maxnum) {
00262     if (winfo_expand(winfo) == FALSE) return FALSE;
00263   }
00264   if (voca_load_htkdict_line(buf, &vnum, winfo->linenum, winfo, hmminfo, winfo->do_conv, &(winfo->ok_flag)) == FALSE) {
00265     return FALSE;
00266   }
00267   winfo->num = vnum;
00268   return TRUE;
00269 }
00270 
00283 boolean
00284 voca_load_end(WORD_INFO *winfo)
00285 {
00286   voca_set_stats(winfo);
00287   if (!winfo->ok_flag) {
00288     if (winfo->errph_root != NULL) list_error(winfo);
00289   }
00290   return(winfo->ok_flag);
00291 }
00292 
00293 
00304 boolean
00305 voca_load_htkdict(FILE *fp, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean ignore_tri_conv)
00306 {
00307   boolean ret;
00308 
00309   voca_load_start(winfo, hmminfo, ignore_tri_conv);
00310   while (getl(buf, sizeof(buf), fp) != NULL) {
00311     if (voca_load_line(buf, winfo, hmminfo) == FALSE) break;
00312   }
00313   ret = voca_load_end(winfo);
00314 
00315   return(ret);
00316 }
00317 
00318 
00329 boolean
00330 voca_load_htkdict_fd(int fd, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean ignore_tri_conv)
00331 {
00332   boolean ret;
00333 
00334   voca_load_start(winfo, hmminfo, ignore_tri_conv);
00335   while(getl_fd(buf, MAXLINELEN, fd) != NULL) {
00336     if (voca_load_line(buf, winfo, hmminfo) == FALSE) break;
00337   }
00338   ret = voca_load_end(winfo);
00339 
00340   return(ret);
00341 }
00342 
00353 boolean
00354 voca_load_htkdict_sd(int sd, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean ignore_tri_conv)
00355 {
00356   boolean ret;
00357 
00358   voca_load_start(winfo, hmminfo, ignore_tri_conv);
00359   while(getl_sd(buf, MAXLINELEN, sd) != NULL) {
00360     if (voca_load_line(buf, winfo, hmminfo) == FALSE) break;
00361   }
00362   ret = voca_load_end(winfo);
00363 
00364   return(ret);
00365 }
00366 
00377 boolean
00378 voca_append_htkdict(char *entry, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean ignore_tri_conv)
00379 {
00380   voca_load_line(entry, winfo, hmminfo);
00381   return(voca_load_end(winfo));
00382 }
00383 
00397 boolean
00398 voca_load_htkdict_line(char *buf, WORD_ID *vnum_p, int linenum, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean do_conv, boolean *ok_flag)
00399 {
00400   char *ptmp, *lp = NULL, *p;
00401   static char cbuf[MAX_HMMNAME_LEN];
00402   static HMM_Logical **tmpwseq = NULL;
00403   static int tmpmaxlen;
00404   int len;
00405   HMM_Logical *tmplg;
00406   boolean pok;
00407   int vnum;
00408 
00409   vnum = *vnum_p;
00410 
00411   if (strmatch(buf, "DICEND")) return FALSE;
00412 
00413   /* allocate temporal work area for the first call */
00414   if (tmpwseq == NULL) {
00415     tmpmaxlen = PHONEMELEN_STEP;
00416     tmpwseq = (HMM_Logical **)mymalloc(sizeof(HMM_Logical *) * tmpmaxlen);
00417   }
00418 
00419   /* backup whole line for debug output */
00420   strcpy(bufbak, buf);
00421   
00422   /* GrammarEntry */
00423   if ((ptmp = mystrtok_quote(buf, " \t\n")) == NULL) {
00424     jlog("Error: voca_load_htkdict: line %d: corrupted data:\n> %s\n", linenum, bufbak);
00425     winfo->errnum++;
00426     *ok_flag = FALSE;
00427     return TRUE;
00428   }
00429   winfo->wname[vnum] = strcpy((char *)mybmalloc2(strlen(ptmp)+1, &(winfo->mroot)), ptmp);
00430 
00431   /* just move pointer to next token */
00432   if ((ptmp = mystrtok_movetonext(NULL, " \t\n")) == NULL) {
00433     jlog("Error: voca_load_htkdict: line %d: corrupted data:\n> %s\n", linenum, bufbak);
00434     winfo->errnum++;
00435     *ok_flag = FALSE;
00436     return TRUE;
00437   }
00438 #ifdef CLASS_NGRAM
00439   winfo->cprob[vnum] = 0.0;     /* prob = 1.0, logprob = 0.0 */
00440 #endif
00441   
00442   if (ptmp[0] == '@') {         /* class N-gram prob */
00443 #ifdef CLASS_NGRAM
00444     /* word probability within the class (for class N-gram) */
00445     /* format: classname @classprob wordname [output] phoneseq */
00446     /* classname equals to wname, and wordname will be omitted */
00447     /* format: @%f (log scale) */
00448     /* if "@" not found or "@0", it means class == word */
00449     if ((ptmp = mystrtok(NULL, " \t\n")) == NULL) {
00450       jlog("Error: voca_load_htkdict: line %d: corrupted data:\n> %s\n", linenum, bufbak);
00451       winfo->errnum++;
00452       *ok_flag = FALSE;
00453       return TRUE;
00454     }
00455     if (ptmp[1] == '\0') {      /* space between '@' and figures */
00456       jlog("Error: voca_load_htkdict: line %d: value after '@' missing, maybe wrong space?\n> %s\n", linenum, bufbak);
00457       winfo->errnum++;
00458       *ok_flag = FALSE;
00459       return TRUE;
00460     }
00461     winfo->cprob[vnum] = atof(&(ptmp[1]));
00462     if (winfo->cprob[vnum] != 0.0) winfo->cwnum++;
00463     /* read next word entry (just skip them) */
00464     if ((ptmp = mystrtok(NULL, " \t\n")) == NULL) {
00465       jlog("Error: voca_load_htkdict: line %d: corrupted data:\n> %s\n", linenum,bufbak);
00466       winfo->errnum++;
00467       *ok_flag = FALSE;
00468       return TRUE;
00469     }
00470     /* move to the next word entry */
00471     if ((ptmp = mystrtok_movetonext(NULL, " \t\n")) == NULL) {
00472       jlog("Error: voca_load_htkdict: line %d: corrupted data:\n> %s\n", linenum, bufbak);
00473       winfo->errnum++;
00474       *ok_flag = FALSE;
00475       return TRUE;
00476     }
00477 #else  /* ~CLASS_NGRAM */
00478     jlog("Error: voca_load_htkdict: line %d: cannot handle in-class word probability\n> %s\n", linenum, ptmp, bufbak);
00479     winfo->errnum++;
00480     *ok_flag = FALSE;
00481     return TRUE;
00482 #endif /* CLASS_NGRAM */
00483   }
00484 
00485   /* OutputString */
00486   switch(ptmp[0]) {
00487   case '[':                     /* not transparent word */
00488     winfo->is_transparent[vnum] = FALSE;
00489     ptmp = mystrtok_quotation(NULL, " \t\n", '[', ']', 0);
00490     break;
00491   case '{':                     /* transparent word */
00492     winfo->is_transparent[vnum] = TRUE;
00493     ptmp = mystrtok_quotation(NULL, " \t\n", '{', '}', 0);
00494     break;
00495   default:
00496 #if 1
00497     /* ALLOW no entry for output */
00498     /* same as wname is used */
00499     winfo->is_transparent[vnum] = FALSE;
00500     ptmp = winfo->wname[vnum];
00501 #else
00502     /* error */
00503     jlog("Error: voca_load_htkdict: line %d: missing output string??\n> %s\n", linenum, bufbak);
00504     winfo->errnum++;
00505     *ok_flag = FALSE;
00506     return TRUE;
00507 #endif
00508   }
00509   if (ptmp == NULL) {
00510     jlog("Error: voca_load_htkdict: line %d: corrupted data:\n> %s\n", linenum, bufbak);
00511     winfo->errnum++;
00512     *ok_flag = FALSE;
00513     return TRUE;
00514   }
00515   winfo->woutput[vnum] = strcpy((char *)mybmalloc2(strlen(ptmp)+1, &(winfo->mroot)), ptmp);
00516     
00517   /* phoneme sequence */
00518   if (hmminfo == NULL) {
00519     /* don't read */
00520     winfo->wseq[vnum] = NULL;
00521     winfo->wlen[vnum] = 0;
00522   } else {
00523 
00524     /* store converted phone sequence to temporal bufffer */
00525     len = 0;
00526       
00527     if (do_conv) {
00528       /* convert phoneme to triphone expression (word-internal) */
00529       cycle_triphone(NULL);
00530       if ((lp = mystrtok(NULL, " \t\n")) == NULL) {
00531         jlog("Error: voca_load_htkdict: line %d: word %s has no phoneme:\n> %s\n", linenum, winfo->wname[vnum], bufbak);
00532         winfo->errnum++;
00533         *ok_flag = FALSE;
00534         return TRUE;
00535       }
00536       cycle_triphone(lp);
00537     }
00538 
00539     pok = TRUE;
00540     for (;;) {
00541       if (do_conv) {
00542 /*      if (lp != NULL) jlog(" %d%s",len,lp);*/
00543         if (lp != NULL) lp = mystrtok(NULL, " \t\n");
00544         if (lp != NULL) p = cycle_triphone(lp);
00545         else p = cycle_triphone_flush();
00546       } else {
00547         p = mystrtok(NULL, " \t\n");
00548       }
00549       if (p == NULL) break;
00550 
00551       /* both defined/pseudo phone is allowed */
00552       tmplg = htk_hmmdata_lookup_logical(hmminfo, p);
00553       if (tmplg == NULL) {
00554         /* not found */
00555         if (do_conv) {
00556           /* both defined or pseudo phone are not found */
00557           if (len == 0 && lp == NULL) {
00558             jlog("Error: voca_load_htkdict: line %d: triphone \"*-%s+*\" or monophone \"%s\" not found\n", linenum, p, p);
00559             snprintf(cbuf,MAX_HMMNAME_LEN,"*-%s+* or monophone %s", p, p);
00560           } else if (len == 0) {
00561             jlog("Error: voca_load_htkdict: line %d: triphone \"*-%s\" or biphone \"%s\" not found\n", linenum, p, p);
00562             snprintf(cbuf,MAX_HMMNAME_LEN,"*-%s or biphone %s", p, p);
00563           } else if (lp == NULL) {
00564             jlog("Error: voca_load_htkdict: line %d: triphone \"%s+*\" or biphone \"%s\" not found\n", linenum, p, p);
00565             snprintf(cbuf,MAX_HMMNAME_LEN,"%s+* or biphone %s", p, p);
00566           } else {
00567             jlog("Error: voca_load_htkdict: line %d: triphone \"%s\" not found\n", linenum, p);
00568             snprintf(cbuf,MAX_HMMNAME_LEN,"%s", p);
00569           }
00570         } else {
00571           jlog("Error: voca_load_htkdict: line %d: phone \"%s\" not found\n", linenum, p);
00572           snprintf(cbuf, MAX_HMMNAME_LEN, "%s", p);
00573         }
00574         add_to_error(winfo, cbuf);
00575         pok = FALSE;
00576       } else {
00577         /* found */
00578         if (len >= tmpmaxlen) {
00579           /* expand wseq area by PHONEMELEN_STEP */
00580           tmpmaxlen += PHONEMELEN_STEP;
00581           tmpwseq = (HMM_Logical **)myrealloc(tmpwseq, sizeof(HMM_Logical *) * tmpmaxlen);
00582         }
00583         /* store to temporal buffer */
00584         tmpwseq[len] = tmplg;
00585       }
00586       len++;
00587     }
00588     if (!pok) {                 /* error in phoneme */
00589       jlog("Error: voca_load_htkdict: the line content was: %s\n", bufbak);
00590       winfo->errnum++;
00591       *ok_flag = FALSE;
00592       return TRUE;
00593     }
00594     if (len == 0) {
00595       jlog("Error: voca_load_htkdict: line %d: no phone specified:\n> %s\n", linenum, bufbak);
00596       winfo->errnum++;
00597       *ok_flag = FALSE;
00598       return TRUE;
00599     }
00600     /* store to winfo */
00601     winfo->wseq[vnum] = (HMM_Logical **)mybmalloc2(sizeof(HMM_Logical *) * len, &(winfo->mroot));
00602     memcpy(winfo->wseq[vnum], tmpwseq, sizeof(HMM_Logical *) * len);
00603     winfo->wlen[vnum] = len;
00604   }
00605 
00606   vnum++;
00607 
00608   *vnum_p = vnum;
00609   
00610   return(TRUE);
00611 }
00612 
00624 boolean
00625 voca_mono2tri(WORD_INFO *winfo, HTK_HMM_INFO *hmminfo)
00626 {
00627   WORD_ID w;
00628   int ph;
00629   char *p;
00630   HMM_Logical *tmplg;
00631   boolean ok_flag = TRUE;
00632   
00633   for (w=0;w<winfo->num;w++) {
00634     cycle_triphone(NULL);
00635     cycle_triphone(winfo->wseq[w][0]->name);
00636 
00637     for (ph = 0; ph < winfo->wlen[w] ; ph++) {
00638       if (ph == winfo->wlen[w] - 1) {
00639         p = cycle_triphone_flush();
00640       } else {
00641         p = cycle_triphone(winfo->wseq[w][ph + 1]->name);
00642       }
00643       if ((tmplg = htk_hmmdata_lookup_logical(hmminfo, p)) == NULL) {
00644         jlog("Error: voca_load_htkdict: word \"%s[%s]\"(id=%d): HMM \"%s\" not found\n", winfo->wname[w], winfo->woutput[w], w, p);
00645         ok_flag = FALSE;
00646         continue;
00647       }
00648       winfo->wseq[w][ph] = tmplg;
00649     }
00650   }
00651   return (ok_flag);
00652 }
00653 
00665 boolean
00666 voca_append(WORD_INFO *dstinfo, WORD_INFO *srcinfo, int coffset, int woffset)
00667 {
00668   WORD_ID n, w;
00669   int i;
00670 
00671   n = woffset;
00672   for(w=0;w<srcinfo->num;w++) {
00673     /* copy data */
00674     dstinfo->wlen[n] = srcinfo->wlen[w];
00675     if (srcinfo->wname[w]) dstinfo->wname[n] = strcpy((char *)mybmalloc2(strlen(srcinfo->wname[w])+1, &(dstinfo->mroot)), srcinfo->wname[w]);
00676     if (srcinfo->woutput[w]) dstinfo->woutput[n] = strcpy((char *)mybmalloc2(strlen(srcinfo->woutput[w])+1, &(dstinfo->mroot)), srcinfo->woutput[w]);
00677     if (srcinfo->wlen[w] > 0) dstinfo->wseq[n] = (HMM_Logical **)mybmalloc2(sizeof(HMM_Logical *) * srcinfo->wlen[w], &(dstinfo->mroot));
00678     for(i=0;i<srcinfo->wlen[w];i++) {
00679       dstinfo->wseq[n][i] = srcinfo->wseq[w][i];
00680     }
00681     dstinfo->is_transparent[n] = srcinfo->is_transparent[w];
00682     /* offset category ID by coffset */
00683     dstinfo->wton[n] = srcinfo->wton[w] + coffset;
00684     
00685     n++;
00686     if (n >= dstinfo->maxnum) {
00687       if (winfo_expand(dstinfo) == FALSE) return FALSE;
00688     }
00689   }
00690   dstinfo->num = n;
00691 
00692   /* compute maxwn */
00693   voca_set_stats(dstinfo);
00694 
00695   return TRUE;
00696 }
00697 

Generated on Tue Dec 18 15:59:57 2007 for Julius by  doxygen 1.5.4