00001
00048
00049
00050
00051
00052
00053
00054 #include <julius/julius.h>
00055
00056 #undef MES
00057
00079 static int
00080 gmm_find_insert_point(GMMCalc *gc, LOGPROB score, int len)
00081 {
00082
00083 int left = 0;
00084 int right = len - 1;
00085 int mid;
00086
00087 while (left < right) {
00088 mid = (left + right) / 2;
00089 if (gc->OP_calced_score[mid] > score) {
00090 left = mid + 1;
00091 } else {
00092 right = mid;
00093 }
00094 }
00095 return(left);
00096 }
00097
00120 static int
00121 gmm_cache_push(GMMCalc *gc, int id, LOGPROB score, int len)
00122 {
00123 int insertp;
00124
00125 if (len == 0) {
00126 gc->OP_calced_score[0] = score;
00127 gc->OP_calced_id[0] = id;
00128 return(1);
00129 }
00130 if (gc->OP_calced_score[len-1] >= score) {
00131 if (len < gc->OP_gprune_num) {
00132 gc->OP_calced_score[len] = score;
00133 gc->OP_calced_id[len] = id;
00134 len++;
00135 }
00136 return len;
00137 }
00138 if (gc->OP_calced_score[0] < score) {
00139 insertp = 0;
00140 } else {
00141 insertp = gmm_find_insert_point(gc, score, len);
00142 }
00143 if (len < gc->OP_gprune_num) {
00144 memmove(&(gc->OP_calced_score[insertp+1]), &(gc->OP_calced_score[insertp]), sizeof(LOGPROB)*(len - insertp));
00145 memmove(&(gc->OP_calced_id[insertp+1]), &(gc->OP_calced_id[insertp]), sizeof(int)*(len - insertp));
00146 } else if (insertp < len - 1) {
00147 memmove(&(gc->OP_calced_score[insertp+1]), &(gc->OP_calced_score[insertp]), sizeof(LOGPROB)*(len - insertp - 1));
00148 memmove(&(gc->OP_calced_id[insertp+1]), &(gc->OP_calced_id[insertp]), sizeof(int)*(len - insertp - 1));
00149 }
00150 gc->OP_calced_score[insertp] = score;
00151 gc->OP_calced_id[insertp] = id;
00152 if (len < gc->OP_gprune_num) len++;
00153 return(len);
00154 }
00155
00176 static LOGPROB
00177 gmm_compute_g_base(GMMCalc *gc, HTK_HMM_Dens *binfo)
00178 {
00179 VECT tmp, x;
00180 VECT *mean;
00181 VECT *var;
00182 VECT *vec = gc->OP_vec;
00183 short veclen = gc->OP_veclen;
00184
00185 if (binfo == NULL) return(LOG_ZERO);
00186 mean = binfo->mean;
00187 var = binfo->var->vec;
00188 tmp = 0.0;
00189 for (; veclen > 0; veclen--) {
00190 x = *(vec++) - *(mean++);
00191 tmp += x * x * *(var++);
00192 }
00193 return((tmp + binfo->gconst) * -0.5);
00194 }
00195
00218 static LOGPROB
00219 gmm_compute_g_safe(GMMCalc *gc, HTK_HMM_Dens *binfo, LOGPROB thres)
00220 {
00221 VECT tmp, x;
00222 VECT *mean;
00223 VECT *var;
00224 VECT *vec = gc->OP_vec;
00225 short veclen = gc->OP_veclen;
00226 VECT fthres = thres * (-2.0);
00227
00228 if (binfo == NULL) return(LOG_ZERO);
00229 mean = binfo->mean;
00230 var = binfo->var->vec;
00231 tmp = binfo->gconst;
00232 for (; veclen > 0; veclen--) {
00233 x = *(vec++) - *(mean++);
00234 tmp += x * x * *(var++);
00235 if (tmp > fthres) return LOG_ZERO;
00236 }
00237 return(tmp * -0.5);
00238 }
00239
00256 static void
00257 gmm_gprune_safe_init(GMMCalc *gc, HTK_HMM_INFO *hmminfo, int prune_num)
00258 {
00259
00260 gc->OP_gprune_num = prune_num;
00261
00262 gc->OP_calced_maxnum = hmminfo->maxmixturenum * gc->OP_nstream;
00263
00264 gc->OP_calced_score = (LOGPROB *)mymalloc(sizeof(LOGPROB) * gc->OP_calced_maxnum);
00265 gc->OP_calced_id = (int *)mymalloc(sizeof(int) * gc->OP_calced_maxnum);
00266 }
00267
00295 static void
00296 gmm_gprune_safe(GMMCalc *gc, HTK_HMM_Dens **g, int gnum)
00297 {
00298 int i, num = 0;
00299 LOGPROB score, thres;
00300
00301 thres = LOG_ZERO;
00302 for (i = 0; i < gnum; i++) {
00303 if (num < gc->OP_gprune_num) {
00304 score = gmm_compute_g_base(gc, g[i]);
00305 } else {
00306 score = gmm_compute_g_safe(gc, g[i], thres);
00307 if (score <= thres) continue;
00308 }
00309 num = gmm_cache_push(gc, i, score, num);
00310 thres = gc->OP_calced_score[num-1];
00311 }
00312 gc->OP_calced_num = num;
00313 }
00314
00333 static LOGPROB
00334 gmm_calc_mix(GMMCalc *gc, HTK_HMM_State *state)
00335 {
00336 int i;
00337 LOGPROB logprob, logprobsum;
00338 int s;
00339 PROB stream_weight;
00340
00341
00342
00343 logprobsum = 0.0;
00344 for(s=0;s<gc->OP_nstream;s++) {
00345
00346 if (state->w) stream_weight = state->w->weight[s];
00347 else stream_weight = 1.0;
00348
00349 gc->OP_vec = gc->OP_vec_stream[s];
00350 gc->OP_veclen = gc->OP_veclen_stream[s];
00351
00352 gmm_gprune_safe(gc, state->pdf[s]->b, state->pdf[s]->mix_num);
00353
00354
00355
00356
00357 for(i=0;i<gc->OP_calced_num;i++) {
00358 gc->OP_calced_score[i] += state->pdf[s]->bweight[gc->OP_calced_id[i]];
00359 }
00360
00361 logprob = addlog_array(gc->OP_calced_score, gc->OP_calced_num);
00362
00363 if (logprob <= LOG_ZERO) continue;
00364
00365 logprobsum += logprob * stream_weight;
00366
00367 }
00368 if (logprobsum == 0.0) return(LOG_ZERO);
00369 if (logprobsum <= LOG_ZERO) return(LOG_ZERO);
00370 return (logprob * INV_LOG_TEN);
00371 }
00372
00396 static LOGPROB
00397 outprob_state_nocache(GMMCalc *gc, int t, HTK_HMM_State *stateinfo, HTK_Param *param)
00398 {
00399 int d, i;
00400
00401 for(d=0,i=0;i<gc->OP_nstream;i++) {
00402 gc->OP_vec_stream[i] = &(param->parvec[t][d]);
00403 d += gc->OP_veclen_stream[i];
00404 }
00405 return(gmm_calc_mix(gc, stateinfo));
00406 }
00407
00408
00409
00410
00428 boolean
00429 gmm_init(Recog *recog)
00430 {
00431 HTK_HMM_INFO *gmm;
00432 HTK_HMM_Data *d;
00433 GMMCalc *gc;
00434 int i;
00435
00436 gmm = recog->gmm;
00437
00438
00439
00440 if (gmm->is_tied_mixture) {
00441 jlog("ERROR: gmm_init: tied-mixture GMM is not supported\n");
00442 return FALSE;
00443 }
00444
00445 for(d=gmm->start;d;d=d->next) {
00446 if (d->state_num > 3) {
00447 jlog("ERROR: gmm_init: more than three states (one output state) defined in GMM [%s]\n", d->name);
00448 return FALSE;
00449 }
00450 }
00451
00452
00453
00454
00455 if (recog->gc == NULL) {
00456 gc = (GMMCalc *)mymalloc(sizeof(GMMCalc));
00457 recog->gc = gc;
00458 } else {
00459 gc = recog->gc;
00460 }
00461
00462
00463 gc->gmm_score = (LOGPROB *)mymalloc(sizeof(LOGPROB) * gmm->totalhmmnum);
00464
00465 #ifdef GMM_VAD
00466 gc->nframe = recog->jconf->detect.gmm_margin;
00467 gc->rates = (LOGPROB *)mymalloc(sizeof(LOGPROB) * gc->nframe);
00468 #endif
00469
00470 gc->is_voice = (boolean *)mymalloc(sizeof(boolean) * gmm->totalhmmnum);
00471 i = 0;
00472 if (recog->jconf->reject.gmm_reject_cmn_string) {
00473 for(d=recog->gmm->start;d;d=d->next) {
00474 if (strstr(recog->jconf->reject.gmm_reject_cmn_string, d->name)) {
00475 gc->is_voice[i] = FALSE;
00476 } else {
00477 gc->is_voice[i] = TRUE;
00478 }
00479 i++;
00480 }
00481 } else {
00482 for(d=recog->gmm->start;d;d=d->next) {
00483 gc->is_voice[i] = TRUE;
00484 i++;
00485 }
00486 }
00487
00488
00489 gc->OP_nstream = gmm->opt.stream_info.num;
00490 for(i=0;i<gc->OP_nstream;i++) {
00491 gc->OP_veclen_stream[i] = gmm->opt.stream_info.vsize[i];
00492 }
00493 gmm_gprune_safe_init(gc, gmm, recog->jconf->reject.gmm_gprune_num);
00494
00495
00496 if (!gmm->variance_inversed) {
00497
00498 htk_hmm_inverse_variances(gmm);
00499 gmm->variance_inversed = TRUE;
00500 }
00501
00502 return TRUE;
00503 }
00504
00521 void
00522 gmm_prepare(Recog *recog)
00523 {
00524 HTK_HMM_Data *d;
00525 int i;
00526
00527
00528 i = 0;
00529 for(d=recog->gmm->start;d;d=d->next) {
00530 recog->gc->gmm_score[i] = 0.0;
00531 i++;
00532 }
00533 #ifdef GMM_VAD
00534 for(i=0;i<recog->gc->nframe;i++) recog->gc->rates[i] = 0.0;
00535 recog->gc->framep = 0;
00536 recog->gc->filled = FALSE;
00537 recog->gc->in_voice = FALSE;
00538 #endif
00539
00540 recog->gc->framecount = 0;
00541
00542 #ifdef GMM_VAD_DEBUG
00543 printf("GMM_VAD: init\n");
00544 #endif
00545 }
00546
00573 void
00574 gmm_proceed(Recog *recog)
00575 {
00576 HTK_HMM_Data *d;
00577 GMMCalc *gc;
00578 int i;
00579 MFCCCalc *mfcc;
00580 LOGPROB score;
00581 #ifdef GMM_VAD
00582 LOGPROB max_n;
00583 LOGPROB max_v;
00584 #endif
00585
00586 mfcc = recog->gmmmfcc;
00587 gc = recog->gc;
00588
00589 if (!mfcc->valid) return;
00590
00591 gc->framecount++;
00592
00593 #ifdef GMM_VAD
00594 max_n = max_v = LOG_ZERO;
00595 #endif
00596
00597 i = 0;
00598 for(d=recog->gmm->start;d;d=d->next) {
00599 score = outprob_state_nocache(gc, mfcc->f, d->s[1], mfcc->param);
00600 gc->gmm_score[i] += score;
00601 #ifdef GMM_VAD
00602 if (gc->is_voice[i]) {
00603 if (max_v < score) max_v = score;
00604 } else {
00605 if (max_n < score) max_n = score;
00606 }
00607 #endif
00608 #ifdef MES
00609 jlog("DEBUG: [%s: total=%f avg=%f]\n", d->name, gc->gmm_score[i], gc->gmm_score[i] / (float)gc->framecount);
00610 #endif
00611 i++;
00612 }
00613 #ifdef GMM_VAD
00614 #ifdef GMM_VAD_DEBUG
00615
00616 #endif
00617
00618 gc->rates[gc->framep] = max_v - max_n;
00619 #ifdef GMM_VAD_DEBUG
00620 printf("GMM_VAD: %f\n", max_v - max_n);
00621 #endif
00622
00623 gc->framep++;
00624
00625 if (gc->framep >= gc->nframe) {
00626 gc->filled = TRUE;
00627 gc->framep = 0;
00628 }
00629 #endif
00630 }
00631
00656 void
00657 gmm_end(Recog *recog)
00658 {
00659 HTK_HMM_INFO *gmm;
00660 LOGPROB *score;
00661 HTK_HMM_Data *d;
00662 LOGPROB maxprob;
00663 HTK_HMM_Data *dmax;
00664 #ifdef CONFIDENCE_MEASURE
00665 LOGPROB sum;
00666 #endif
00667 int i;
00668 int maxid;
00669
00670 if (recog->gc->framecount == 0) return;
00671
00672 gmm = recog->gmm;
00673 score = recog->gc->gmm_score;
00674
00675
00676 i = 0;
00677 maxprob = LOG_ZERO;
00678 dmax = NULL;
00679 maxid = 0;
00680 for(d=gmm->start;d;d=d->next) {
00681 if (maxprob < score[i]) {
00682 dmax = d;
00683 maxprob = score[i];
00684 maxid = i;
00685 }
00686 i++;
00687 }
00688 recog->gc->max_d = dmax;
00689 recog->gc->max_i = maxid;
00690
00691 #ifdef CONFIDENCE_MEASURE
00692
00693 sum = 0.0;
00694 i = 0;
00695 for(d=gmm->start;d;d=d->next) {
00696
00697 sum += pow(10, 0.05 * (score[i] - maxprob));
00698 i++;
00699 }
00700 recog->gc->gmm_max_cm = 1.0 / sum;
00701 #endif
00702
00703
00704 callback_exec(CALLBACK_RESULT_GMM, recog);
00705
00706 }
00707
00708
00733 boolean
00734 gmm_valid_input(Recog *recog)
00735 {
00736 if (recog->gc->max_d == NULL) return FALSE;
00737 if (recog->gc->is_voice[recog->gc->max_i]) {
00738 return TRUE;
00739 }
00740 return FALSE;
00741 }
00742
00757 void
00758 gmm_free(Recog *recog)
00759 {
00760 if (recog->gc) {
00761 free(recog->gc->OP_calced_score);
00762 free(recog->gc->OP_calced_id);
00763 free(recog->gc->is_voice);
00764 #ifdef GMM_VAD
00765 free(recog->gc->rates);
00766 #endif
00767 free(recog->gc->gmm_score);
00768 free(recog->gc);
00769 recog->gc = NULL;
00770 }
00771 }
00772
00773 #ifdef GMM_VAD
00774
00791 static void
00792 voice_activity_score(GMMCalc *gc, float *mean_ret, float *var_ret, int *count_ret)
00793 {
00794 int i, len;
00795 LOGPROB mean;
00796 LOGPROB var;
00797 LOGPROB x;
00798 int count;
00799
00800 if (!gc->filled) {
00801
00802 *mean_ret = 0.0;
00803 *var_ret = 0.0;
00804 *count_ret = 0;
00805 return;
00806 }
00807
00808 if (gc->filled) {
00809 len = gc->nframe;
00810 } else {
00811 len = gc->framep;
00812 }
00813
00814 mean = 0;
00815 count = 0;
00816 for(i=0;i<len;i++) {
00817 mean += gc->rates[i];
00818 if (gc->rates[i] > 0.0) count++;
00819 }
00820 mean /= (float)len;
00821 var = 0.0;
00822 for(i=0;i<len;i++) {
00823 x = mean - gc->rates[i];
00824 var += x * x;
00825 }
00826 var /= (float)len;
00827
00828 *mean_ret = mean;
00829 *var_ret = var;
00830 *count_ret = count;
00831 }
00832
00852 void
00853 gmm_check_trigger(Recog *recog)
00854 {
00855 GMMCalc *gc;
00856 gc = recog->gc;
00857 float mean;
00858 float var;
00859 int count;
00860
00861 gc->up_trigger = gc->down_trigger = FALSE;
00862
00863 voice_activity_score(gc, &mean, &var, &count);
00864
00865 if (gc->in_voice) {
00866 if (mean <= recog->jconf->detect.gmm_downtrigger_thres) {
00867 gc->down_trigger = TRUE;
00868 gc->in_voice = FALSE;
00869 }
00870 } else {
00871 if (mean >= recog->jconf->detect.gmm_uptrigger_thres) {
00872 gc->up_trigger = TRUE;
00873 gc->in_voice = TRUE;
00874 }
00875 }
00876
00877 #ifdef GMM_VAD_DEBUG
00878 printf("GMM_VAD: %s: %f %f %d", gc->in_voice ? "VOICE" : "NOISE", mean, var, count);
00879 if (gc->up_trigger) printf(": BEGIN");
00880 if (gc->down_trigger) printf(": END");
00881 printf("\n");
00882 #endif
00883
00884 }
00885 #endif
00886
00887