libjulius/src/gmm.c

Go to the documentation of this file.
00001 
00048 /*
00049  * Copyright (c) 2003-2005 Shikano Lab., Nara Institute of Science and Technology
00050  * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
00051  * All rights reserved
00052  */
00053 
00054 #include <julius/julius.h>
00055 
00056 #undef MES
00057 
00079 static int
00080 gmm_find_insert_point(GMMCalc *gc, LOGPROB score, int len)
00081 {
00082   /* binary search on score */
00083   int left = 0;
00084   int right = len - 1;
00085   int mid;
00086 
00087   while (left < right) {
00088     mid = (left + right) / 2;
00089     if (gc->OP_calced_score[mid] > score) {
00090       left = mid + 1;
00091     } else {
00092       right = mid;
00093     }
00094   }
00095   return(left);
00096 }
00097 
00120 static int
00121 gmm_cache_push(GMMCalc *gc, int id, LOGPROB score, int len)
00122 {
00123   int insertp;
00124 
00125   if (len == 0) {               /* first one */
00126     gc->OP_calced_score[0] = score;
00127     gc->OP_calced_id[0] = id;
00128     return(1);
00129   }
00130   if (gc->OP_calced_score[len-1] >= score) { /* bottom */
00131     if (len < gc->OP_gprune_num) {          /* append to bottom */
00132       gc->OP_calced_score[len] = score;
00133       gc->OP_calced_id[len] = id;
00134       len++;
00135     }
00136     return len;
00137   }
00138   if (gc->OP_calced_score[0] < score) {
00139     insertp = 0;
00140   } else {
00141     insertp = gmm_find_insert_point(gc, score, len);
00142   }
00143   if (len < gc->OP_gprune_num) {
00144     memmove(&(gc->OP_calced_score[insertp+1]), &(gc->OP_calced_score[insertp]), sizeof(LOGPROB)*(len - insertp));
00145     memmove(&(gc->OP_calced_id[insertp+1]), &(gc->OP_calced_id[insertp]), sizeof(int)*(len - insertp));    
00146   } else if (insertp < len - 1) {
00147     memmove(&(gc->OP_calced_score[insertp+1]), &(gc->OP_calced_score[insertp]), sizeof(LOGPROB)*(len - insertp - 1));
00148     memmove(&(gc->OP_calced_id[insertp+1]), &(gc->OP_calced_id[insertp]), sizeof(int)*(len - insertp - 1));
00149   }
00150   gc->OP_calced_score[insertp] = score;
00151   gc->OP_calced_id[insertp] = id;
00152   if (len < gc->OP_gprune_num) len++;
00153   return(len);
00154 }
00155 
00176 static LOGPROB
00177 gmm_compute_g_base(GMMCalc *gc, HTK_HMM_Dens *binfo)
00178 {
00179   VECT tmp, x;
00180   VECT *mean;
00181   VECT *var;
00182   VECT *vec = gc->OP_vec;
00183   short veclen = gc->OP_veclen;
00184 
00185   if (binfo == NULL) return(LOG_ZERO);
00186   mean = binfo->mean;
00187   var = binfo->var->vec;
00188   tmp = 0.0;
00189   for (; veclen > 0; veclen--) {
00190     x = *(vec++) - *(mean++);
00191     tmp += x * x * *(var++);
00192   }
00193   return((tmp + binfo->gconst) * -0.5);
00194 }
00195 
00218 static LOGPROB
00219 gmm_compute_g_safe(GMMCalc *gc, HTK_HMM_Dens *binfo, LOGPROB thres)
00220 {
00221   VECT tmp, x;
00222   VECT *mean;
00223   VECT *var;
00224   VECT *vec = gc->OP_vec;
00225   short veclen = gc->OP_veclen;
00226   VECT fthres = thres * (-2.0);
00227 
00228   if (binfo == NULL) return(LOG_ZERO);
00229   mean = binfo->mean;
00230   var = binfo->var->vec;
00231   tmp = binfo->gconst;
00232   for (; veclen > 0; veclen--) {
00233     x = *(vec++) - *(mean++);
00234     tmp += x * x * *(var++);
00235     if (tmp > fthres)  return LOG_ZERO;
00236   }
00237   return(tmp * -0.5);
00238 }
00239 
00256 static void
00257 gmm_gprune_safe_init(GMMCalc *gc, HTK_HMM_INFO *hmminfo, int prune_num)
00258 {
00259   /* store the pruning num to local area */
00260   gc->OP_gprune_num = prune_num;
00261   /* maximum Gaussian set size = maximum mixture size */
00262   gc->OP_calced_maxnum = hmminfo->maxmixturenum;
00263   /* allocate memory for storing list of currently computed Gaussian in a frame */
00264   gc->OP_calced_score = (LOGPROB *)mymalloc(sizeof(LOGPROB) * gc->OP_gprune_num);
00265   gc->OP_calced_id = (int *)mymalloc(sizeof(int) * gc->OP_gprune_num);
00266 }
00267 
00295 static void
00296 gmm_gprune_safe(GMMCalc *gc, HTK_HMM_Dens **g, int gnum)
00297 {
00298   int i, num = 0;
00299   LOGPROB score, thres;
00300 
00301   thres = LOG_ZERO;
00302   for (i = 0; i < gnum; i++) {
00303     if (num < gc->OP_gprune_num) {
00304       score = gmm_compute_g_base(gc, g[i]);
00305     } else {
00306       score = gmm_compute_g_safe(gc, g[i], thres);
00307       if (score <= thres) continue;
00308     }
00309     num = gmm_cache_push(gc, i, score, num);
00310     thres = gc->OP_calced_score[num-1];
00311   }
00312   gc->OP_calced_num = num;
00313 }
00314 
00333 static LOGPROB
00334 gmm_calc_mix(GMMCalc *gc, HTK_HMM_State *s)
00335 {
00336   int i;
00337   LOGPROB logprob = LOG_ZERO;
00338 
00339   /* compute Gaussian set */
00340   gmm_gprune_safe(gc, s->b, s->mix_num);
00341   /* computed Gaussians will be set in:
00342      score ... OP_calced_score[0..OP_calced_num]
00343      id    ... OP_calced_id[0..OP_calced_num] */
00344   
00345   /* sum */
00346   for(i=0;i<gc->OP_calced_num;i++) {
00347     gc->OP_calced_score[i] += s->bweight[gc->OP_calced_id[i]];
00348   }
00349   logprob = addlog_array(gc->OP_calced_score, gc->OP_calced_num);
00350   if (logprob <= LOG_ZERO) return LOG_ZERO;
00351   return (logprob * INV_LOG_TEN);
00352 }
00353 
00377 static LOGPROB
00378 outprob_state_nocache(GMMCalc *gc, int t, HTK_HMM_State *stateinfo, HTK_Param *param)
00379 {
00380   /* set global values for outprob functions to access them */
00381   gc->OP_vec = param->parvec[t];
00382   gc->OP_veclen = param->veclen;
00383   return(gmm_calc_mix(gc, stateinfo));
00384 }
00385 
00386 /************************************************************************/
00387 /* global functions */
00388 
00406 boolean
00407 gmm_init(Recog *recog)
00408 {
00409   HTK_HMM_INFO *gmm;
00410   HTK_HMM_Data *d;
00411   GMMCalc *gc;
00412   int i;
00413 
00414   gmm = recog->gmm;
00415 
00416   /* check GMM format */
00417   /* tied-mixture GMM is not supported */
00418   if (gmm->is_tied_mixture) {
00419     jlog("ERROR: gmm_init: tied-mixture GMM is not supported\n");
00420     return FALSE;
00421   }
00422   /* assume 3 state GMM (only one output state) */
00423   for(d=gmm->start;d;d=d->next) {
00424     if (d->state_num > 3) {
00425       jlog("ERROR: gmm_init: more than three states (one output state) defined in GMM [%s]\n", d->name);
00426       return FALSE;
00427     }
00428   }
00429 
00430   /* check if CMN needed */
00431 
00432   /* allocate work area */
00433   if (recog->gc == NULL) {
00434     gc = (GMMCalc *)mymalloc(sizeof(GMMCalc));
00435     recog->gc = gc;
00436   }
00437   
00438   /* allocate buffers */
00439   gc->gmm_score = (LOGPROB *)mymalloc(sizeof(LOGPROB) * gmm->totalhmmnum);
00440 
00441 #ifdef GMM_VAD
00442   gc->nframe = recog->jconf->detect.gmm_margin;
00443   gc->rates = (LOGPROB *)mymalloc(sizeof(LOGPROB) * gc->nframe);
00444 #endif
00445 
00446   gc->is_voice = (boolean *)mymalloc(sizeof(boolean) * gmm->totalhmmnum);
00447   i = 0;
00448   if (recog->jconf->reject.gmm_reject_cmn_string) {
00449     for(d=recog->gmm->start;d;d=d->next) {
00450       if (strstr(recog->jconf->reject.gmm_reject_cmn_string, d->name)) {
00451         gc->is_voice[i] = FALSE;
00452       } else {
00453         gc->is_voice[i] = TRUE;
00454       }
00455       i++;
00456     }
00457   } else {
00458     for(d=recog->gmm->start;d;d=d->next) {
00459       gc->is_voice[i] = TRUE;
00460       i++;
00461     }
00462   }
00463 
00464   /* initialize work area */
00465   gmm_gprune_safe_init(gc, gmm, recog->jconf->reject.gmm_gprune_num);
00466 
00467   /* check if variances are inversed */
00468   if (!gmm->variance_inversed) {
00469     /* here, inverse all variance values for faster computation */
00470     htk_hmm_inverse_variances(gmm);
00471     gmm->variance_inversed = TRUE;
00472   }
00473 
00474   return TRUE;
00475 }
00476 
00493 void
00494 gmm_prepare(Recog *recog)
00495 {
00496   HTK_HMM_Data *d;
00497   int i;
00498 
00499   /* initialize score buffer and frame count */
00500   i = 0;
00501   for(d=recog->gmm->start;d;d=d->next) {
00502     recog->gc->gmm_score[i] = 0.0;
00503     i++;
00504   }
00505 #ifdef GMM_VAD
00506   for(i=0;i<recog->gc->nframe;i++) recog->gc->rates[i] = 0.0;
00507   recog->gc->framep = 0;
00508   recog->gc->filled = FALSE;
00509   recog->gc->in_voice = FALSE;
00510 #endif
00511 
00512   recog->gc->framecount = 0;
00513 
00514 #ifdef GMM_VAD_DEBUG
00515   printf("GMM_VAD: init\n");
00516 #endif
00517 }
00518 
00545 void
00546 gmm_proceed(Recog *recog)
00547 {
00548   HTK_HMM_Data *d;
00549   GMMCalc *gc;
00550   int i;
00551   MFCCCalc *mfcc;
00552   LOGPROB score;
00553 #ifdef GMM_VAD
00554   LOGPROB max_n;
00555   LOGPROB max_v;
00556 #endif
00557 
00558   mfcc = recog->gmmmfcc;
00559   gc = recog->gc;
00560 
00561   if (!mfcc->valid) return;
00562 
00563   gc->framecount++;
00564 
00565 #ifdef GMM_VAD
00566   max_n = max_v = LOG_ZERO;
00567 #endif
00568 
00569   i = 0;
00570   for(d=recog->gmm->start;d;d=d->next) {
00571     score = outprob_state_nocache(gc, mfcc->f, d->s[1], mfcc->param);
00572     gc->gmm_score[i] += score;
00573 #ifdef GMM_VAD
00574     if (gc->is_voice[i]) {
00575       if (max_v < score) max_v = score;
00576     } else {
00577       if (max_n < score) max_n = score;
00578     }
00579 #endif
00580 #ifdef MES
00581     jlog("DEBUG: [%s: total=%f avg=%f]\n", d->name, gc->gmm_score[i], gc->gmm_score[i] / (float)gc->framecount);
00582 #endif
00583     i++;
00584   }
00585 #ifdef GMM_VAD
00586 #ifdef GMM_VAD_DEBUG
00587   //printf("GMM_VAD: max_v = %f, max_n = %f, rate = %f\n", max_v, max_n, max_v - max_n, gc->framep);
00588 #endif
00589   /* set rate of this frame */
00590   gc->rates[gc->framep] = max_v - max_n;
00591 #ifdef GMM_VAD_DEBUG
00592   printf("GMM_VAD: %f\n", max_v - max_n);
00593 #endif
00594   /* increment current frame pointer */
00595   gc->framep++;
00596   /* if reached end, go to start point */
00597   if (gc->framep >= gc->nframe) {
00598     gc->filled = TRUE;
00599     gc->framep = 0;
00600   }
00601 #endif
00602 }
00603 
00628 void
00629 gmm_end(Recog *recog)
00630 {
00631   HTK_HMM_INFO *gmm;
00632   LOGPROB *score;
00633   HTK_HMM_Data *d;
00634   LOGPROB maxprob;
00635   HTK_HMM_Data *dmax;
00636 #ifdef CONFIDENCE_MEASURE
00637   LOGPROB sum;
00638 #endif
00639   int i;
00640   int maxid;
00641 
00642   if (recog->gc->framecount == 0) return;
00643 
00644   gmm = recog->gmm;
00645   score = recog->gc->gmm_score;
00646 
00647   /* get max score */
00648   i = 0;
00649   maxprob = LOG_ZERO;
00650   dmax = NULL;
00651   for(d=gmm->start;d;d=d->next) {
00652     if (maxprob < score[i]) {
00653       dmax = d;
00654       maxprob = score[i];
00655       maxid = i;
00656     }
00657     i++;
00658   }
00659   recog->gc->max_d = dmax;
00660   recog->gc->max_i = maxid;
00661 
00662 #ifdef CONFIDENCE_MEASURE
00663   /* compute CM */
00664   sum = 0.0;
00665   i = 0;
00666   for(d=gmm->start;d;d=d->next) {
00667     //sum += pow(10, recog->jconf->annotate.cm_alpha * (score[i] - maxprob));
00668     sum += pow(10, 0.05 * (score[i] - maxprob));
00669     i++;
00670   }
00671   recog->gc->gmm_max_cm = 1.0 / sum;
00672 #endif
00673   
00674   /* output result */
00675   callback_exec(CALLBACK_RESULT_GMM, recog);
00676 
00677 }
00678 
00679 
00704 boolean
00705 gmm_valid_input(Recog *recog)
00706 {
00707   if (recog->gc->max_d == NULL) return FALSE;
00708   if (recog->gc->is_voice[recog->gc->max_i]) {
00709     return TRUE;
00710   }
00711   return FALSE;
00712 }
00713 
00728 void
00729 gmm_free(Recog *recog)
00730 {
00731   if (recog->gc) {
00732     free(recog->gc->OP_calced_score);
00733     free(recog->gc->OP_calced_id);
00734     free(recog->gc->is_voice);
00735 #ifdef GMM_VAD
00736     free(recog->gc->rates);
00737 #endif
00738     free(recog->gc->gmm_score);
00739     free(recog->gc);
00740     recog->gc = NULL;
00741   }
00742 }
00743 
00744 #ifdef GMM_VAD
00745 
00762 static void
00763 voice_activity_score(GMMCalc *gc, float *mean_ret, float *var_ret, int *count_ret)
00764 {
00765   int i, len;
00766   LOGPROB mean;
00767   LOGPROB var;
00768   LOGPROB x;
00769   int count;
00770 
00771   if (!gc->filled) {
00772     /* cycle buffer not filled yet */
00773     *mean_ret = 0.0;
00774     *var_ret = 0.0;
00775     *count_ret = 0;
00776     return;
00777   }
00778 
00779   if (gc->filled) {
00780     len = gc->nframe;
00781   } else {
00782     len = gc->framep;
00783   }
00784 
00785   mean = 0;
00786   count = 0;
00787   for(i=0;i<len;i++) {
00788     mean += gc->rates[i];
00789     if (gc->rates[i] > 0.0) count++;
00790   }
00791   mean /= (float)len;
00792   var = 0.0;
00793   for(i=0;i<len;i++) {
00794     x = mean - gc->rates[i];
00795     var += x * x;
00796   }
00797   var /= (float)len;
00798 
00799   *mean_ret = mean;
00800   *var_ret = var;
00801   *count_ret = count;
00802 }
00803 
00823 void
00824 gmm_check_trigger(Recog *recog)
00825 {
00826   GMMCalc *gc;
00827   gc = recog->gc;
00828   float mean;
00829   float var;
00830   int count;
00831 
00832   gc->up_trigger = gc->down_trigger = FALSE;
00833 
00834   voice_activity_score(gc, &mean, &var, &count);
00835 
00836   if (gc->in_voice) {
00837     if (mean < -0.2) {
00838       gc->down_trigger = TRUE;
00839       gc->in_voice = FALSE;
00840     }
00841   } else {
00842     if (mean > 0.7) {
00843       gc->up_trigger = TRUE;
00844       gc->in_voice = TRUE;
00845     }
00846   }
00847 
00848 #ifdef GMM_VAD_DEBUG
00849   printf("GMM_VAD: %s: %f %f %d", gc->in_voice ? "VOICE" : "NOISE", mean, var, count);
00850   if (gc->up_trigger) printf(": BEGIN");
00851   if (gc->down_trigger) printf(": END");
00852   printf("\n");
00853 #endif
00854 
00855 }
00856 #endif /* GMM_VAD */
00857 
00858 /* end of file */

Generated on Tue Dec 18 15:59:51 2007 for Julius by  doxygen 1.5.4