Julius: julius-simple/julius-simple.c Source File

00001 
00030 /* include top Julius library header */
00031 #include <julius/juliuslib.h>
00032 
00037 static void
00038 status_recready(Recog *recog, void *dummy)
00039 {
00040   if (recog->jconf->input.speech_input == SP_MIC || recog->jconf->input.speech_input == SP_NETAUDIO) {
00041     fprintf(stderr, "<<< please speak >>>");
00042   }
00043 }
00044 
00049 static void
00050 status_recstart(Recog *recog, void *dummy)
00051 {
00052   if (recog->jconf->input.speech_input == SP_MIC || recog->jconf->input.speech_input == SP_NETAUDIO) {
00053     fprintf(stderr, "\r                    \r");
00054   }
00055 }
00056 
00061 static void
00062 put_hypo_phoneme(WORD_ID *seq, int n, WORD_INFO *winfo)
00063 {
00064   int i,j;
00065   WORD_ID w;
00066   static char buf[MAX_HMMNAME_LEN];
00067 
00068   if (seq != NULL) {
00069     for (i=0;i<n;i++) {
00070       if (i > 0) printf(" |");
00071       w = seq[i];
00072       for (j=0;j<winfo->wlen[w];j++) {
00073         center_name(winfo->wseq[w][j]->name, buf);
00074         printf(" %s", buf);
00075       }
00076     }
00077   }
00078   printf("\n");  
00079 }
00085 static void
00086 output_result(Recog *recog, void *dummy)
00087 {
00088   int i, j;
00089   int len;
00090   WORD_INFO *winfo;
00091   WORD_ID *seq;
00092   int seqnum;
00093   int n;
00094   Sentence *s;
00095   RecogProcess *r;
00096 
00097   /* all recognition results are stored at each recognition process
00098      instance */
00099   for(r=recog->process_list;r;r=r->next) {
00100 
00101     /* skip the process if the process is not alive */
00102     if (! r->live) continue;
00103 
00104     /* result are in r->result.  See recog.h for details */
00105 
00106     /* check result status */
00107     if (r->result.status < 0) {      /* no results obtained */
00108       /* outout message according to the status code */
00109       switch(r->result.status) {
00110       case J_RESULT_STATUS_REJECT_POWER:
00111         printf("<input rejected by power>\n");
00112         break;
00113       case J_RESULT_STATUS_TERMINATE:
00114         printf("<input teminated by request>\n");
00115         break;
00116       case J_RESULT_STATUS_ONLY_SILENCE:
00117         printf("<input rejected by decoder (silence input result)>\n");
00118         break;
00119       case J_RESULT_STATUS_REJECT_GMM:
00120         printf("<input rejected by GMM>\n");
00121         break;
00122       case J_RESULT_STATUS_REJECT_SHORT:
00123         printf("<input rejected by short input>\n");
00124         break;
00125       case J_RESULT_STATUS_FAIL:
00126         printf("<search failed>\n");
00127         break;
00128       }
00129       /* continue to next process instance */
00130       continue;
00131     }
00132 
00133     /* output results for all the obtained sentences */
00134     winfo = r->lm->winfo;
00135 
00136     for(n = 0; n < r->result.sentnum; n++) { /* for all sentences */
00137 
00138       s = &(r->result.sent[n]);
00139       seq = s->word;
00140       seqnum = s->word_num;
00141 
00142       /* output word sequence like Julius */
00143       printf("sentence%d:", n+1);
00144       for(i=0;i<seqnum;i++) printf(" %s", winfo->woutput[seq[i]]);
00145       printf("\n");
00146       /* LM entry sequence */
00147       printf("wseq%d:", n+1);
00148       for(i=0;i<seqnum;i++) printf(" %s", winfo->wname[seq[i]]);
00149       printf("\n");
00150       /* phoneme sequence */
00151       printf("phseq%d:", n+1);
00152       put_hypo_phoneme(seq, seqnum, winfo);
00153       printf("\n");
00154       /* confidence scores */
00155       printf("cmscore%d:", n+1);
00156       for (i=0;i<seqnum; i++) printf(" %5.3f", s->confidence[i]);
00157       printf("\n");
00158       /* AM and LM scores */
00159       printf("score%d: %f", n+1, s->score);
00160       if (r->lmtype == LM_PROB) { /* if this process uses N-gram */
00161         printf(" (AM: %f  LM: %f)", s->score_am, s->score_lm);
00162       }
00163       printf("\n");
00164       if (r->lmtype == LM_DFA) { /* if this process uses DFA grammar */
00165         /* output which grammar the hypothesis belongs to
00166            when using multiple grammars */
00167         if (multigram_get_all_num(r->lm) > 1) {
00168           printf("grammar%d: %d\n", n+1, s->gram_id);
00169         }
00170       }
00171       
00172       /* output alignment result if exist */
00173       if (s->align.filled) {
00174         HMM_Logical *p;
00175         int i;
00176         
00177         printf("=== begin forced alignment ===\n");
00178         printf(" id: from  to    n_score    unit\n");
00179         printf(" ----------------------------------------\n");
00180         for(i=0;i<s->align.num;i++) {
00181           printf("[%4d %4d]  %f  ", s->align.begin_frame[i], s->align.end_frame[i], s->align.avgscore[i]);
00182           switch(s->align.unittype) {
00183           case PER_WORD:
00184             printf("%s\t[%s]\n", winfo->wname[s->align.w[i]], winfo->woutput[s->align.w[i]]);
00185             break;
00186           case PER_PHONEME:
00187             p = s->align.ph[i];
00188             if (p->is_pseudo) {
00189               printf("{%s}\n", p->name);
00190             } else if (strmatch(p->name, p->body.defined->name)) {
00191               printf("%s\n", p->name);
00192             } else {
00193               printf("%s[%s]\n", p->name, p->body.defined->name);
00194             }
00195             break;
00196           case PER_STATE:
00197             p = s->align.ph[i];
00198             if (p->is_pseudo) {
00199               printf("{%s}", p->name);
00200             } else if (strmatch(p->name, p->body.defined->name)) {
00201               printf("%s", p->name);
00202             } else {
00203               printf("%s[%s]", p->name, p->body.defined->name);
00204             }
00205             if (r->am->hmminfo->multipath) {
00206               if (s->align.is_iwsp[i]) {
00207                 printf(" #%d (sp)\n", s->align.loc[i]);
00208               } else {
00209                 printf(" #%d\n", s->align.loc[i]);
00210               }
00211             } else {
00212               printf(" #%d\n", s->align.loc[i]);
00213             }
00214             break;
00215           }
00216         }
00217         
00218         printf("re-computed AM score: %f\n", s->align.allscore);
00219         
00220         printf("=== end forced alignment ===\n");
00221       }
00222     }
00223   }
00224 
00225   /* flush output buffer */
00226   fflush(stdout);
00227 }
00228 
00229 
00234 int
00235 main(int argc, char *argv[])
00236 {
00241   Jconf *jconf;
00242 
00247   Recog *recog;
00248 
00253   static char speechfilename[MAXPATHLEN];
00254 
00255   int ret;
00256 
00257   /* by default, all messages will be output to standard out */
00258   /* to disable output, uncomment below */
00259   //jlog_set_output(NULL);
00260 
00261   /* output log to a file */
00262   //FILE *fp; fp = fopen("log.txt", "w"); jlog_set_output(fp);
00263 
00264   /* if no argument, output usage and exit */
00265   if (argc == 1) {
00266     fprintf(stderr, "Julius rev.%s - based on ", JULIUS_VERSION);
00267     j_put_version(stderr);
00268     fprintf(stderr, "Try '-setting' for built-in engine configuration.\n");
00269     fprintf(stderr, "Try '-help' for run time options.\n");
00270     return -1;
00271   }
00272 
00273   /************/
00274   /* Start up */
00275   /************/
00276   /* 1. load configurations from command arguments */
00277   jconf = j_config_load_args_new(argc, argv);
00278   /* else, you can load configurations from a jconf file */
00279   //jconf = j_config_load_file_new(jconf_filename);
00280   if (jconf == NULL) {          /* error */
00281     fprintf(stderr, "Try `-help' for more information.\n");
00282     return -1;
00283   }
00284   
00285   /* 2. create recognition instance according to the jconf */
00286   /* it loads models, setup final parameters, build lexicon
00287      and set up work area for recognition */
00288   recog = j_create_instance_from_jconf(jconf);
00289   if (recog == NULL) {
00290     fprintf(stderr, "Error in startup\n");
00291     return -1;
00292   }
00293 
00294   /*********************/
00295   /* Register callback */
00296   /*********************/
00297   /* register result callback functions */
00298   callback_add(recog, CALLBACK_EVENT_SPEECH_READY, status_recready, NULL);
00299   callback_add(recog, CALLBACK_EVENT_SPEECH_START, status_recstart, NULL);
00300   callback_add(recog, CALLBACK_RESULT, output_result, NULL);
00301 
00302   /**************************/
00303   /* Initialize audio input */
00304   /**************************/
00305   /* initialize audio input device */
00306   /* ad-in thread starts at this time for microphone */
00307   if (j_adin_init(recog) == FALSE) {    /* error */
00308     return -1;
00309   }
00310 
00311   /* output system information to log */
00312   j_recog_info(recog);
00313 
00314   /***********************************/
00315   /* Open input stream and recognize */
00316   /***********************************/
00317 
00318   if (jconf->input.speech_input == SP_MFCFILE) {
00319     /* MFCC file input */
00320 
00321     while (get_line_from_stdin(speechfilename, MAXPATHLEN, "enter MFCC filename->") != NULL) {
00322       if (verbose_flag) printf("\ninput MFCC file: %s\n", speechfilename);
00323       /* open the input file */
00324       ret = j_open_stream(recog, speechfilename);
00325       switch(ret) {
00326       case 0:                   /* succeeded */
00327         break;
00328       case -1:                  /* error */
00329         /* go on to the next input */
00330         continue;
00331       case -2:                  /* end of recognition */
00332         return;
00333       }
00334       /* recognition loop */
00335       ret = j_recognize_stream(recog);
00336       if (ret == -1) return -1; /* error */
00337       /* reach here when an input ends */
00338     }
00339 
00340   } else {
00341     /* raw speech input (microphone etc.) */
00342 
00343     switch(j_open_stream(recog, NULL)) {
00344     case 0:                     /* succeeded */
00345       break;
00346     case -1:                    /* error */
00347       fprintf(stderr, "error in input stream\n");
00348       return;
00349     case -2:                    /* end of recognition process */
00350       fprintf(stderr, "failed to begin input stream\n");
00351       return;
00352     }
00353     
00354     /**********************/
00355     /* Recognization Loop */
00356     /**********************/
00357     /* enter main loop to recognize the input stream */
00358     /* finish after whole input has been processed and input reaches end */
00359     ret = j_recognize_stream(recog);
00360     if (ret == -1) return -1;   /* error */
00361     
00362     /*******/
00363     /* End */
00364     /*******/
00365   }
00366 
00367   j_recog_free(recog);
00368 
00369   /* exit program */
00370   return(0);
00371 }