00001
00048
00049
00050
00051
00052
00053
00054 #include <julius/julius.h>
00055
00056 #undef MES
00057
00079 static int
00080 gmm_find_insert_point(GMMCalc *gc, LOGPROB score, int len)
00081 {
00082
00083 int left = 0;
00084 int right = len - 1;
00085 int mid;
00086
00087 while (left < right) {
00088 mid = (left + right) / 2;
00089 if (gc->OP_calced_score[mid] > score) {
00090 left = mid + 1;
00091 } else {
00092 right = mid;
00093 }
00094 }
00095 return(left);
00096 }
00097
00120 static int
00121 gmm_cache_push(GMMCalc *gc, int id, LOGPROB score, int len)
00122 {
00123 int insertp;
00124
00125 if (len == 0) {
00126 gc->OP_calced_score[0] = score;
00127 gc->OP_calced_id[0] = id;
00128 return(1);
00129 }
00130 if (gc->OP_calced_score[len-1] >= score) {
00131 if (len < gc->OP_gprune_num) {
00132 gc->OP_calced_score[len] = score;
00133 gc->OP_calced_id[len] = id;
00134 len++;
00135 }
00136 return len;
00137 }
00138 if (gc->OP_calced_score[0] < score) {
00139 insertp = 0;
00140 } else {
00141 insertp = gmm_find_insert_point(gc, score, len);
00142 }
00143 if (len < gc->OP_gprune_num) {
00144 memmove(&(gc->OP_calced_score[insertp+1]), &(gc->OP_calced_score[insertp]), sizeof(LOGPROB)*(len - insertp));
00145 memmove(&(gc->OP_calced_id[insertp+1]), &(gc->OP_calced_id[insertp]), sizeof(int)*(len - insertp));
00146 } else if (insertp < len - 1) {
00147 memmove(&(gc->OP_calced_score[insertp+1]), &(gc->OP_calced_score[insertp]), sizeof(LOGPROB)*(len - insertp - 1));
00148 memmove(&(gc->OP_calced_id[insertp+1]), &(gc->OP_calced_id[insertp]), sizeof(int)*(len - insertp - 1));
00149 }
00150 gc->OP_calced_score[insertp] = score;
00151 gc->OP_calced_id[insertp] = id;
00152 if (len < gc->OP_gprune_num) len++;
00153 return(len);
00154 }
00155
00176 static LOGPROB
00177 gmm_compute_g_base(GMMCalc *gc, HTK_HMM_Dens *binfo)
00178 {
00179 VECT tmp, x;
00180 VECT *mean;
00181 VECT *var;
00182 VECT *vec = gc->OP_vec;
00183 short veclen = gc->OP_veclen;
00184
00185 if (binfo == NULL) return(LOG_ZERO);
00186 mean = binfo->mean;
00187 var = binfo->var->vec;
00188 tmp = 0.0;
00189 for (; veclen > 0; veclen--) {
00190 x = *(vec++) - *(mean++);
00191 tmp += x * x * *(var++);
00192 }
00193 return((tmp + binfo->gconst) * -0.5);
00194 }
00195
00218 static LOGPROB
00219 gmm_compute_g_safe(GMMCalc *gc, HTK_HMM_Dens *binfo, LOGPROB thres)
00220 {
00221 VECT tmp, x;
00222 VECT *mean;
00223 VECT *var;
00224 VECT *vec = gc->OP_vec;
00225 short veclen = gc->OP_veclen;
00226 VECT fthres = thres * (-2.0);
00227
00228 if (binfo == NULL) return(LOG_ZERO);
00229 mean = binfo->mean;
00230 var = binfo->var->vec;
00231 tmp = binfo->gconst;
00232 for (; veclen > 0; veclen--) {
00233 x = *(vec++) - *(mean++);
00234 tmp += x * x * *(var++);
00235 if (tmp > fthres) return LOG_ZERO;
00236 }
00237 return(tmp * -0.5);
00238 }
00239
00256 static void
00257 gmm_gprune_safe_init(GMMCalc *gc, HTK_HMM_INFO *hmminfo, int prune_num)
00258 {
00259
00260 gc->OP_gprune_num = prune_num;
00261
00262 gc->OP_calced_maxnum = hmminfo->maxmixturenum;
00263
00264 gc->OP_calced_score = (LOGPROB *)mymalloc(sizeof(LOGPROB) * gc->OP_gprune_num);
00265 gc->OP_calced_id = (int *)mymalloc(sizeof(int) * gc->OP_gprune_num);
00266 }
00267
00295 static void
00296 gmm_gprune_safe(GMMCalc *gc, HTK_HMM_Dens **g, int gnum)
00297 {
00298 int i, num = 0;
00299 LOGPROB score, thres;
00300
00301 thres = LOG_ZERO;
00302 for (i = 0; i < gnum; i++) {
00303 if (num < gc->OP_gprune_num) {
00304 score = gmm_compute_g_base(gc, g[i]);
00305 } else {
00306 score = gmm_compute_g_safe(gc, g[i], thres);
00307 if (score <= thres) continue;
00308 }
00309 num = gmm_cache_push(gc, i, score, num);
00310 thres = gc->OP_calced_score[num-1];
00311 }
00312 gc->OP_calced_num = num;
00313 }
00314
00333 static LOGPROB
00334 gmm_calc_mix(GMMCalc *gc, HTK_HMM_State *s)
00335 {
00336 int i;
00337 LOGPROB logprob = LOG_ZERO;
00338
00339
00340 gmm_gprune_safe(gc, s->b, s->mix_num);
00341
00342
00343
00344
00345
00346 for(i=0;i<gc->OP_calced_num;i++) {
00347 gc->OP_calced_score[i] += s->bweight[gc->OP_calced_id[i]];
00348 }
00349 logprob = addlog_array(gc->OP_calced_score, gc->OP_calced_num);
00350 if (logprob <= LOG_ZERO) return LOG_ZERO;
00351 return (logprob * INV_LOG_TEN);
00352 }
00353
00377 static LOGPROB
00378 outprob_state_nocache(GMMCalc *gc, int t, HTK_HMM_State *stateinfo, HTK_Param *param)
00379 {
00380
00381 gc->OP_vec = param->parvec[t];
00382 gc->OP_veclen = param->veclen;
00383 return(gmm_calc_mix(gc, stateinfo));
00384 }
00385
00386
00387
00388
00406 boolean
00407 gmm_init(Recog *recog)
00408 {
00409 HTK_HMM_INFO *gmm;
00410 HTK_HMM_Data *d;
00411 GMMCalc *gc;
00412 int i;
00413
00414 gmm = recog->gmm;
00415
00416
00417
00418 if (gmm->is_tied_mixture) {
00419 jlog("ERROR: gmm_init: tied-mixture GMM is not supported\n");
00420 return FALSE;
00421 }
00422
00423 for(d=gmm->start;d;d=d->next) {
00424 if (d->state_num > 3) {
00425 jlog("ERROR: gmm_init: more than three states (one output state) defined in GMM [%s]\n", d->name);
00426 return FALSE;
00427 }
00428 }
00429
00430
00431
00432
00433 if (recog->gc == NULL) {
00434 gc = (GMMCalc *)mymalloc(sizeof(GMMCalc));
00435 recog->gc = gc;
00436 }
00437
00438
00439 gc->gmm_score = (LOGPROB *)mymalloc(sizeof(LOGPROB) * gmm->totalhmmnum);
00440
00441 #ifdef GMM_VAD
00442 gc->nframe = recog->jconf->detect.gmm_margin;
00443 gc->rates = (LOGPROB *)mymalloc(sizeof(LOGPROB) * gc->nframe);
00444 #endif
00445
00446 gc->is_voice = (boolean *)mymalloc(sizeof(boolean) * gmm->totalhmmnum);
00447 i = 0;
00448 if (recog->jconf->reject.gmm_reject_cmn_string) {
00449 for(d=recog->gmm->start;d;d=d->next) {
00450 if (strstr(recog->jconf->reject.gmm_reject_cmn_string, d->name)) {
00451 gc->is_voice[i] = FALSE;
00452 } else {
00453 gc->is_voice[i] = TRUE;
00454 }
00455 i++;
00456 }
00457 } else {
00458 for(d=recog->gmm->start;d;d=d->next) {
00459 gc->is_voice[i] = TRUE;
00460 i++;
00461 }
00462 }
00463
00464
00465 gmm_gprune_safe_init(gc, gmm, recog->jconf->reject.gmm_gprune_num);
00466
00467
00468 if (!gmm->variance_inversed) {
00469
00470 htk_hmm_inverse_variances(gmm);
00471 gmm->variance_inversed = TRUE;
00472 }
00473
00474 return TRUE;
00475 }
00476
00493 void
00494 gmm_prepare(Recog *recog)
00495 {
00496 HTK_HMM_Data *d;
00497 int i;
00498
00499
00500 i = 0;
00501 for(d=recog->gmm->start;d;d=d->next) {
00502 recog->gc->gmm_score[i] = 0.0;
00503 i++;
00504 }
00505 #ifdef GMM_VAD
00506 for(i=0;i<recog->gc->nframe;i++) recog->gc->rates[i] = 0.0;
00507 recog->gc->framep = 0;
00508 recog->gc->filled = FALSE;
00509 recog->gc->in_voice = FALSE;
00510 #endif
00511
00512 recog->gc->framecount = 0;
00513
00514 #ifdef GMM_VAD_DEBUG
00515 printf("GMM_VAD: init\n");
00516 #endif
00517 }
00518
00545 void
00546 gmm_proceed(Recog *recog)
00547 {
00548 HTK_HMM_Data *d;
00549 GMMCalc *gc;
00550 int i;
00551 MFCCCalc *mfcc;
00552 LOGPROB score;
00553 #ifdef GMM_VAD
00554 LOGPROB max_n;
00555 LOGPROB max_v;
00556 #endif
00557
00558 mfcc = recog->gmmmfcc;
00559 gc = recog->gc;
00560
00561 if (!mfcc->valid) return;
00562
00563 gc->framecount++;
00564
00565 #ifdef GMM_VAD
00566 max_n = max_v = LOG_ZERO;
00567 #endif
00568
00569 i = 0;
00570 for(d=recog->gmm->start;d;d=d->next) {
00571 score = outprob_state_nocache(gc, mfcc->f, d->s[1], mfcc->param);
00572 gc->gmm_score[i] += score;
00573 #ifdef GMM_VAD
00574 if (gc->is_voice[i]) {
00575 if (max_v < score) max_v = score;
00576 } else {
00577 if (max_n < score) max_n = score;
00578 }
00579 #endif
00580 #ifdef MES
00581 jlog("DEBUG: [%s: total=%f avg=%f]\n", d->name, gc->gmm_score[i], gc->gmm_score[i] / (float)gc->framecount);
00582 #endif
00583 i++;
00584 }
00585 #ifdef GMM_VAD
00586 #ifdef GMM_VAD_DEBUG
00587
00588 #endif
00589
00590 gc->rates[gc->framep] = max_v - max_n;
00591 #ifdef GMM_VAD_DEBUG
00592 printf("GMM_VAD: %f\n", max_v - max_n);
00593 #endif
00594
00595 gc->framep++;
00596
00597 if (gc->framep >= gc->nframe) {
00598 gc->filled = TRUE;
00599 gc->framep = 0;
00600 }
00601 #endif
00602 }
00603
00628 void
00629 gmm_end(Recog *recog)
00630 {
00631 HTK_HMM_INFO *gmm;
00632 LOGPROB *score;
00633 HTK_HMM_Data *d;
00634 LOGPROB maxprob;
00635 HTK_HMM_Data *dmax;
00636 #ifdef CONFIDENCE_MEASURE
00637 LOGPROB sum;
00638 #endif
00639 int i;
00640 int maxid;
00641
00642 if (recog->gc->framecount == 0) return;
00643
00644 gmm = recog->gmm;
00645 score = recog->gc->gmm_score;
00646
00647
00648 i = 0;
00649 maxprob = LOG_ZERO;
00650 dmax = NULL;
00651 for(d=gmm->start;d;d=d->next) {
00652 if (maxprob < score[i]) {
00653 dmax = d;
00654 maxprob = score[i];
00655 maxid = i;
00656 }
00657 i++;
00658 }
00659 recog->gc->max_d = dmax;
00660 recog->gc->max_i = maxid;
00661
00662 #ifdef CONFIDENCE_MEASURE
00663
00664 sum = 0.0;
00665 i = 0;
00666 for(d=gmm->start;d;d=d->next) {
00667
00668 sum += pow(10, 0.05 * (score[i] - maxprob));
00669 i++;
00670 }
00671 recog->gc->gmm_max_cm = 1.0 / sum;
00672 #endif
00673
00674
00675 callback_exec(CALLBACK_RESULT_GMM, recog);
00676
00677 }
00678
00679
00704 boolean
00705 gmm_valid_input(Recog *recog)
00706 {
00707 if (recog->gc->max_d == NULL) return FALSE;
00708 if (recog->gc->is_voice[recog->gc->max_i]) {
00709 return TRUE;
00710 }
00711 return FALSE;
00712 }
00713
00728 void
00729 gmm_free(Recog *recog)
00730 {
00731 if (recog->gc) {
00732 free(recog->gc->OP_calced_score);
00733 free(recog->gc->OP_calced_id);
00734 free(recog->gc->is_voice);
00735 #ifdef GMM_VAD
00736 free(recog->gc->rates);
00737 #endif
00738 free(recog->gc->gmm_score);
00739 free(recog->gc);
00740 recog->gc = NULL;
00741 }
00742 }
00743
00744 #ifdef GMM_VAD
00745
00762 static void
00763 voice_activity_score(GMMCalc *gc, float *mean_ret, float *var_ret, int *count_ret)
00764 {
00765 int i, len;
00766 LOGPROB mean;
00767 LOGPROB var;
00768 LOGPROB x;
00769 int count;
00770
00771 if (!gc->filled) {
00772
00773 *mean_ret = 0.0;
00774 *var_ret = 0.0;
00775 *count_ret = 0;
00776 return;
00777 }
00778
00779 if (gc->filled) {
00780 len = gc->nframe;
00781 } else {
00782 len = gc->framep;
00783 }
00784
00785 mean = 0;
00786 count = 0;
00787 for(i=0;i<len;i++) {
00788 mean += gc->rates[i];
00789 if (gc->rates[i] > 0.0) count++;
00790 }
00791 mean /= (float)len;
00792 var = 0.0;
00793 for(i=0;i<len;i++) {
00794 x = mean - gc->rates[i];
00795 var += x * x;
00796 }
00797 var /= (float)len;
00798
00799 *mean_ret = mean;
00800 *var_ret = var;
00801 *count_ret = count;
00802 }
00803
00823 void
00824 gmm_check_trigger(Recog *recog)
00825 {
00826 GMMCalc *gc;
00827 gc = recog->gc;
00828 float mean;
00829 float var;
00830 int count;
00831
00832 gc->up_trigger = gc->down_trigger = FALSE;
00833
00834 voice_activity_score(gc, &mean, &var, &count);
00835
00836 if (gc->in_voice) {
00837 if (mean < -0.2) {
00838 gc->down_trigger = TRUE;
00839 gc->in_voice = FALSE;
00840 }
00841 } else {
00842 if (mean > 0.7) {
00843 gc->up_trigger = TRUE;
00844 gc->in_voice = TRUE;
00845 }
00846 }
00847
00848 #ifdef GMM_VAD_DEBUG
00849 printf("GMM_VAD: %s: %f %f %d", gc->in_voice ? "VOICE" : "NOISE", mean, var, count);
00850 if (gc->up_trigger) printf(": BEGIN");
00851 if (gc->down_trigger) printf(": END");
00852 printf("\n");
00853 #endif
00854
00855 }
00856 #endif
00857
00858