00001
00030
00031 #include <julius/juliuslib.h>
00032
00037 static void
00038 status_recready(Recog *recog, void *dummy)
00039 {
00040 if (recog->jconf->input.speech_input == SP_MIC || recog->jconf->input.speech_input == SP_NETAUDIO) {
00041 fprintf(stderr, "<<< please speak >>>");
00042 }
00043 }
00044
00049 static void
00050 status_recstart(Recog *recog, void *dummy)
00051 {
00052 if (recog->jconf->input.speech_input == SP_MIC || recog->jconf->input.speech_input == SP_NETAUDIO) {
00053 fprintf(stderr, "\r \r");
00054 }
00055 }
00056
00061 static void
00062 put_hypo_phoneme(WORD_ID *seq, int n, WORD_INFO *winfo)
00063 {
00064 int i,j;
00065 WORD_ID w;
00066 static char buf[MAX_HMMNAME_LEN];
00067
00068 if (seq != NULL) {
00069 for (i=0;i<n;i++) {
00070 if (i > 0) printf(" |");
00071 w = seq[i];
00072 for (j=0;j<winfo->wlen[w];j++) {
00073 center_name(winfo->wseq[w][j]->name, buf);
00074 printf(" %s", buf);
00075 }
00076 }
00077 }
00078 printf("\n");
00079 }
00085 static void
00086 output_result(Recog *recog, void *dummy)
00087 {
00088 int i, j;
00089 int len;
00090 WORD_INFO *winfo;
00091 WORD_ID *seq;
00092 int seqnum;
00093 int n;
00094 Sentence *s;
00095 RecogProcess *r;
00096
00097
00098
00099 for(r=recog->process_list;r;r=r->next) {
00100
00101
00102 if (! r->live) continue;
00103
00104
00105
00106
00107 if (r->result.status < 0) {
00108
00109 switch(r->result.status) {
00110 case J_RESULT_STATUS_REJECT_POWER:
00111 printf("<input rejected by power>\n");
00112 break;
00113 case J_RESULT_STATUS_TERMINATE:
00114 printf("<input teminated by request>\n");
00115 break;
00116 case J_RESULT_STATUS_ONLY_SILENCE:
00117 printf("<input rejected by decoder (silence input result)>\n");
00118 break;
00119 case J_RESULT_STATUS_REJECT_GMM:
00120 printf("<input rejected by GMM>\n");
00121 break;
00122 case J_RESULT_STATUS_REJECT_SHORT:
00123 printf("<input rejected by short input>\n");
00124 break;
00125 case J_RESULT_STATUS_FAIL:
00126 printf("<search failed>\n");
00127 break;
00128 }
00129
00130 continue;
00131 }
00132
00133
00134 winfo = r->lm->winfo;
00135
00136 for(n = 0; n < r->result.sentnum; n++) {
00137
00138 s = &(r->result.sent[n]);
00139 seq = s->word;
00140 seqnum = s->word_num;
00141
00142
00143 printf("sentence%d:", n+1);
00144 for(i=0;i<seqnum;i++) printf(" %s", winfo->woutput[seq[i]]);
00145 printf("\n");
00146
00147 printf("wseq%d:", n+1);
00148 for(i=0;i<seqnum;i++) printf(" %s", winfo->wname[seq[i]]);
00149 printf("\n");
00150
00151 printf("phseq%d:", n+1);
00152 put_hypo_phoneme(seq, seqnum, winfo);
00153 printf("\n");
00154
00155 printf("cmscore%d:", n+1);
00156 for (i=0;i<seqnum; i++) printf(" %5.3f", s->confidence[i]);
00157 printf("\n");
00158
00159 printf("score%d: %f", n+1, s->score);
00160 if (r->lmtype == LM_PROB) {
00161 printf(" (AM: %f LM: %f)", s->score_am, s->score_lm);
00162 }
00163 printf("\n");
00164 if (r->lmtype == LM_DFA) {
00165
00166
00167 if (multigram_get_all_num(r->lm) > 1) {
00168 printf("grammar%d: %d\n", n+1, s->gram_id);
00169 }
00170 }
00171
00172
00173 if (s->align.filled) {
00174 HMM_Logical *p;
00175 int i;
00176
00177 printf("=== begin forced alignment ===\n");
00178 printf(" id: from to n_score unit\n");
00179 printf(" ----------------------------------------\n");
00180 for(i=0;i<s->align.num;i++) {
00181 printf("[%4d %4d] %f ", s->align.begin_frame[i], s->align.end_frame[i], s->align.avgscore[i]);
00182 switch(s->align.unittype) {
00183 case PER_WORD:
00184 printf("%s\t[%s]\n", winfo->wname[s->align.w[i]], winfo->woutput[s->align.w[i]]);
00185 break;
00186 case PER_PHONEME:
00187 p = s->align.ph[i];
00188 if (p->is_pseudo) {
00189 printf("{%s}\n", p->name);
00190 } else if (strmatch(p->name, p->body.defined->name)) {
00191 printf("%s\n", p->name);
00192 } else {
00193 printf("%s[%s]\n", p->name, p->body.defined->name);
00194 }
00195 break;
00196 case PER_STATE:
00197 p = s->align.ph[i];
00198 if (p->is_pseudo) {
00199 printf("{%s}", p->name);
00200 } else if (strmatch(p->name, p->body.defined->name)) {
00201 printf("%s", p->name);
00202 } else {
00203 printf("%s[%s]", p->name, p->body.defined->name);
00204 }
00205 if (r->am->hmminfo->multipath) {
00206 if (s->align.is_iwsp[i]) {
00207 printf(" #%d (sp)\n", s->align.loc[i]);
00208 } else {
00209 printf(" #%d\n", s->align.loc[i]);
00210 }
00211 } else {
00212 printf(" #%d\n", s->align.loc[i]);
00213 }
00214 break;
00215 }
00216 }
00217
00218 printf("re-computed AM score: %f\n", s->align.allscore);
00219
00220 printf("=== end forced alignment ===\n");
00221 }
00222 }
00223 }
00224
00225
00226 fflush(stdout);
00227 }
00228
00229
00234 int
00235 main(int argc, char *argv[])
00236 {
00241 Jconf *jconf;
00242
00247 Recog *recog;
00248
00253 static char speechfilename[MAXPATHLEN];
00254
00255 int ret;
00256
00257
00258
00259
00260
00261
00262
00263
00264
00265 if (argc == 1) {
00266 fprintf(stderr, "Julius rev.%s - based on ", JULIUS_VERSION);
00267 j_put_version(stderr);
00268 fprintf(stderr, "Try '-setting' for built-in engine configuration.\n");
00269 fprintf(stderr, "Try '-help' for run time options.\n");
00270 return -1;
00271 }
00272
00273
00274
00275
00276
00277 jconf = j_config_load_args_new(argc, argv);
00278
00279
00280 if (jconf == NULL) {
00281 fprintf(stderr, "Try `-help' for more information.\n");
00282 return -1;
00283 }
00284
00285
00286
00287
00288 recog = j_create_instance_from_jconf(jconf);
00289 if (recog == NULL) {
00290 fprintf(stderr, "Error in startup\n");
00291 return -1;
00292 }
00293
00294
00295
00296
00297
00298 callback_add(recog, CALLBACK_EVENT_SPEECH_READY, status_recready, NULL);
00299 callback_add(recog, CALLBACK_EVENT_SPEECH_START, status_recstart, NULL);
00300 callback_add(recog, CALLBACK_RESULT, output_result, NULL);
00301
00302
00303
00304
00305
00306
00307 if (j_adin_init(recog) == FALSE) {
00308 return -1;
00309 }
00310
00311
00312 j_recog_info(recog);
00313
00314
00315
00316
00317
00318 if (jconf->input.speech_input == SP_MFCFILE) {
00319
00320
00321 while (get_line_from_stdin(speechfilename, MAXPATHLEN, "enter MFCC filename->") != NULL) {
00322 if (verbose_flag) printf("\ninput MFCC file: %s\n", speechfilename);
00323
00324 ret = j_open_stream(recog, speechfilename);
00325 switch(ret) {
00326 case 0:
00327 break;
00328 case -1:
00329
00330 continue;
00331 case -2:
00332 return;
00333 }
00334
00335 ret = j_recognize_stream(recog);
00336 if (ret == -1) return -1;
00337
00338 }
00339
00340 } else {
00341
00342
00343 switch(j_open_stream(recog, NULL)) {
00344 case 0:
00345 break;
00346 case -1:
00347 fprintf(stderr, "error in input stream\n");
00348 return;
00349 case -2:
00350 fprintf(stderr, "failed to begin input stream\n");
00351 return;
00352 }
00353
00354
00355
00356
00357
00358
00359 ret = j_recognize_stream(recog);
00360 if (ret == -1) return -1;
00361
00362
00363
00364
00365 }
00366
00367 j_recog_free(recog);
00368
00369
00370 return(0);
00371 }