#include <sent/stddefs.h>
#include <sent/htk_hmm.h>
Go to the source code of this file.
Data Structures | |
| struct | WORD_INFO |
| Word dictionary structure to hold vocabulary. More... | |
Defines | |
| #define | MAXWSTEP 4000 |
| Memory allocation step in number of words when loading a word dictionary. | |
Functions | |
| WORD_INFO * | word_info_new () |
| Allocate a new word dictionary structure. | |
| void | word_info_free (WORD_INFO *winfo) |
| Free all informations in the WORD_INFO. | |
| void | winfo_init (WORD_INFO *winfo) |
| Initialize a new word dictionary structure. | |
| boolean | winfo_expand (WORD_INFO *winfo) |
| Expand the word dictionary. | |
| boolean | init_voca (WORD_INFO *winfo, char *filename, HTK_HMM_INFO *hmminfo, boolean, boolean) |
| Load and initialize a word dictionary. | |
| boolean | init_wordlist (WORD_INFO *winfo, char *filename, HTK_HMM_INFO *hmminfo, char *headphone, char *tailphone, char *contextphone, boolean force_dict) |
| Load and initialize a word list for isolated word recognition. | |
| void | voca_set_stats (WORD_INFO *winfo) |
| Parse a word dictionary and set the maximum state length per word. | |
| void | voca_load_start (WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean ignore_tri_conv) |
| Start loading a dictionary. | |
| boolean | voca_load_line (char *buf, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo) |
| Load a line from buffer and set parameters to the dictionary. | |
| boolean | voca_load_end (WORD_INFO *winfo) |
| End loading dictionary entries. | |
| boolean | voca_load_htkdict (FILE *, WORD_INFO *, HTK_HMM_INFO *, boolean) |
| Top function to read word dictionary via file pointer (gzip enabled). | |
| boolean | voca_load_htkdict_fp (FILE *, WORD_INFO *, HTK_HMM_INFO *, boolean) |
| Top function to read word dictionary via normal file pointer. | |
| boolean | voca_append_htkdict (char *entry, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean ignore_tri_conv) |
| Append a single entry to the existing word dictionary. | |
| boolean | voca_append (WORD_INFO *dstinfo, WORD_INFO *srcinfo, int coffset, int woffset) |
| Append one word dictionary to other, for multiple grammar handling. | |
| boolean | voca_load_htkdict_line (char *buf, WORD_ID *vnum, int linenum, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean do_conv, boolean *ok_flag) |
| Sub function to Add a dictionary entry line to the word dictionary. | |
| boolean | voca_load_word_line (char *buf, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, char *headphone, char *tailpohone, char *contextphone) |
| Load a line from buffer and set parameters to the dictionary. | |
| boolean | voca_load_wordlist (FILE *fp, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, char *headphone, char *tailphone, char *contextphone) |
| Top function to read word list via text. | |
| boolean | voca_load_wordlist_fp (FILE *fp, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, char *headphone, char *tailphone, char *contextphone) |
| Top function to read word list via file pointer. | |
| boolean | voca_load_wordlist_line (char *buf, WORD_ID *vnum, int linenum, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean do_conv, boolean *ok_flag, char *headphone, char *tailphone, char *contextphone) |
| Sub function to Add a dictionary entry line to the word dictionary. | |
| boolean | voca_mono2tri (WORD_INFO *winfo, HTK_HMM_INFO *hmminfo) |
| Convert whole words in word dictionary to word-internal triphone. | |
| WORD_ID | voca_lookup_wid (char *, WORD_INFO *) |
| Look up a word on dictionary by string. | |
| WORD_ID * | new_str2wordseq (WORD_INFO *, char *, int *) |
| Convert string of space-separated word strings to array of word ids. | |
| char * | cycle_triphone (char *p) |
| Return string of triphone name composed from last 3 call. | |
| char * | cycle_triphone_flush () |
| Flush the triphone buffer and return the last biphone. | |
| void | print_voca_info (FILE *fp, WORD_INFO *) |
| Output overall word dictionary information to stdout. | |
| void | put_voca (FILE *fp, WORD_INFO *winfo, WORD_ID wid) |
| Output information of a word in dictionary to stdout. | |
| boolean | make_base_phone (HTK_HMM_INFO *hmminfo, WORD_INFO *winfo) |
| Build basephone information. | |
| void | print_phone_info (FILE *fp, HTK_HMM_INFO *hmminfo) |
| Output general information concerning phone mapping in HMM definition. | |
| void | print_all_basephone_detail (HMM_basephone *base) |
| Output all basephone informations to stdout. | |
| void | print_all_basephone_name (HMM_basephone *base) |
| Output all basephone names to stdout. | |
| void | test_interword_triphone (HTK_HMM_INFO *hmminfo, WORD_INFO *winfo) |
| Top function to check if all the possible triphones on given word dictionary actually exist in the logical HMM. | |
This file defines data structure for word dictionary used in recognition. It stores word's string, output string, phoneme sequence, transparency. Beginning-of-sentence word and End-of-sentence word guessed from runtime environment is also stored here.
Please note that the N-gram vocabulary is stored in NGRAM_INFO and it can differ from this word dictionary. The reference from the word dictionary to a N-gram vocabulary is done by wton[] member in WORD_INFO. When used with DFA, the wton[] holds a category number to which each word belongs.
Definition in file vocabulary.h.
| WORD_INFO* word_info_new | ( | ) |
Allocate a new word dictionary structure.
Definition at line 35 of file voca_malloc.c.
Referenced by initialize_dict(), multigram_read_file_and_add(), and multigram_update().
| void word_info_free | ( | WORD_INFO * | winfo | ) |
Free all informations in the WORD_INFO.
| winfo | [i/o] word dictionary data to be freed. |
Definition at line 51 of file voca_malloc.c.
Referenced by initialize_dict(), j_process_lm_free(), multigram_exec_delete(), multigram_free_all(), multigram_read_file_and_add(), and multigram_update().
| void winfo_init | ( | WORD_INFO * | winfo | ) |
Initialize a new word dictionary structure.
| winfo | [i/o] word dictionary to be initialized. |
Definition at line 75 of file voca_malloc.c.
Referenced by multigram_update(), and voca_load_start().
Expand the word dictionary.
| winfo | [i/o] word dictionary to be expanded. |
Definition at line 105 of file voca_malloc.c.
Referenced by voca_append(), voca_load_line(), and voca_load_word_line().
| boolean init_voca | ( | WORD_INFO * | winfo, | |
| char * | filename, | |||
| HTK_HMM_INFO * | hmminfo, | |||
| boolean | not_conv_tri, | |||
| boolean | force_dict | |||
| ) |
Load and initialize a word dictionary.
| winfo | [out] pointer to a word dictionary data to store the read data | |
| filename | [in] file name of the word dictionary to read | |
| hmminfo | [in] HMM definition data, needed for triphone conversion. | |
| not_conv_tri | [in] TRUE if not converting monophone to triphone. | |
| force_dict | [in] TRUE if want to ignore the error words in the dictionary |
Definition at line 41 of file init_voca.c.
Referenced by initialize_dict(), and multigram_read_file_and_add().
| boolean init_wordlist | ( | WORD_INFO * | winfo, | |
| char * | filename, | |||
| HTK_HMM_INFO * | hmminfo, | |||
| char * | headphone, | |||
| char * | tailphone, | |||
| char * | contextphone, | |||
| boolean | force_dict | |||
| ) |
Load and initialize a word list for isolated word recognition.
| winfo | [out] pointer to a word dictionary data to store the read data | |
| filename | [in] file name of the word dictionary to read | |
| hmminfo | [in] HMM definition data, needed for triphone conversion. | |
| headphone | [in] word head silence phone name | |
| tailphone | [in] word tail silence phone name | |
| conextphone | [in] silence context name at head and tail phoneme | |
| force_dict | [in] TRUE if want to ignore the error words in the dictionary |
Definition at line 81 of file init_voca.c.
Referenced by multigram_read_file_and_add().
| void voca_set_stats | ( | WORD_INFO * | winfo | ) |
Parse a word dictionary and set the maximum state length per word.
| winfo | [i/o] |
Definition at line 186 of file voca_load_htkdict.c.
Referenced by voca_load_end().
| void voca_load_start | ( | WORD_INFO * | winfo, | |
| HTK_HMM_INFO * | hmminfo, | |||
| boolean | ignore_tri_conv | |||
| ) |
Start loading a dictionary.
See voca_load_htkdict() for an example of using this function.
| winfo | [i/o] dictionary data where the data will be loaded | |
| hmminfo | [in] phoneme HMM definition | |
| ignore_tri_conv | [in] if TRUE, skip triphone conversion while loading |
Definition at line 228 of file voca_load_htkdict.c.
Referenced by voca_load_htkdict(), voca_load_htkdict_fp(), voca_load_wordlist(), and voca_load_wordlist_fp().
| boolean voca_load_line | ( | char * | buf, | |
| WORD_INFO * | winfo, | |||
| HTK_HMM_INFO * | hmminfo | |||
| ) |
Load a line from buffer and set parameters to the dictionary.
See voca_load_htkdict() for an example of using this function.
| buf | [in] input buffer containing a word entry | |
| winfo | [i/o] word dictionary to append the entry | |
| hmminfo | [in] phoneme HMM definition |
Definition at line 255 of file voca_load_htkdict.c.
Referenced by voca_append_htkdict(), voca_load_htkdict(), and voca_load_htkdict_fp().
End loading dictionary entries.
It calculates some statistics for the read entries, outputs errors if encountered while the last loading, and returns with status whether an error occured while loading.
| winfo | [i/o] word dictionary just read by voca_load_line() calls |
Definition at line 284 of file voca_load_htkdict.c.
Referenced by voca_append_htkdict(), voca_load_htkdict(), voca_load_htkdict_fp(), voca_load_wordlist(), and voca_load_wordlist_fp().
| boolean voca_load_htkdict | ( | FILE * | fp, | |
| WORD_INFO * | winfo, | |||
| HTK_HMM_INFO * | hmminfo, | |||
| boolean | ignore_tri_conv | |||
| ) |
Top function to read word dictionary via file pointer (gzip enabled).
| fp | [in] file pointer | |
| winfo | [out] pointer to word dictionary to store the read data. | |
| hmminfo | [in] HTK HMM definition data. if NULL, phonemes are ignored. | |
| ignore_tri_conv | [in] TRUE if triphone conversion is ignored |
Definition at line 305 of file voca_load_htkdict.c.
Referenced by init_voca().
| boolean voca_load_htkdict_fp | ( | FILE * | fp, | |
| WORD_INFO * | winfo, | |||
| HTK_HMM_INFO * | hmminfo, | |||
| boolean | ignore_tri_conv | |||
| ) |
Top function to read word dictionary via normal file pointer.
| fp | [in] file pointer | |
| winfo | [out] pointer to word dictionary to store the read data. | |
| hmminfo | [in] HTK HMM definition data. if NULL, phonemes are ignored. | |
| ignore_tri_conv | [in] TRUE if triphone conversion is ignored |
Definition at line 330 of file voca_load_htkdict.c.
| boolean voca_append_htkdict | ( | char * | entry, | |
| WORD_INFO * | winfo, | |||
| HTK_HMM_INFO * | hmminfo, | |||
| boolean | ignore_tri_conv | |||
| ) |
Append a single entry to the existing word dictionary.
| entry | [in] dictionary entry string to be appended. | |
| winfo | [out] pointer to word dictionary to append the data. | |
| hmminfo | [in] HTK HMM definition data. if NULL, phonemes are ignored. | |
| ignore_tri_conv | [in] TRUE if triphone conversion is ignored |
Definition at line 354 of file voca_load_htkdict.c.
Referenced by initialize_dict().
Append one word dictionary to other, for multiple grammar handling.
Assumes that the same HMM definition is used on both word dictionary.
| dstinfo | [i/o] word dictionary | |
| srcinfo | [in] word dictionary to be appended to dst | |
| coffset | [in] category id offset in dst where the new data should be stored | |
| woffset | [in] word id offset in dst where the new data should be stored |
Definition at line 642 of file voca_load_htkdict.c.
Referenced by multigram_add_words_to_grammar(), multigram_append_to_global(), and multigram_update().
| boolean voca_load_htkdict_line | ( | char * | buf, | |
| WORD_ID * | vnum_p, | |||
| int | linenum, | |||
| WORD_INFO * | winfo, | |||
| HTK_HMM_INFO * | hmminfo, | |||
| boolean | do_conv, | |||
| boolean * | ok_flag | |||
| ) |
Sub function to Add a dictionary entry line to the word dictionary.
| buf | [i/o] buffer to hold the input string, will be modified in this function | |
| vnum_p | [in] current number of words in winfo | |
| linenum | [in] current line number of the input | |
| winfo | [out] pointer to word dictionary to append the data. | |
| hmminfo | [in] HTK HMM definition data. if NULL, phonemes are ignored. | |
| do_conv | [in] TRUE if performing triphone conversion | |
| ok_flag | [out] will be set to FALSE if an error occured for this input. |
Definition at line 374 of file voca_load_htkdict.c.
Referenced by voca_load_line().
| boolean voca_load_word_line | ( | char * | buf, | |
| WORD_INFO * | winfo, | |||
| HTK_HMM_INFO * | hmminfo, | |||
| char * | headphone, | |||
| char * | tailphone, | |||
| char * | contextphone | |||
| ) |
Load a line from buffer and set parameters to the dictionary.
| buf | [in] input buffer containing a word entry | |
| winfo | [i/o] word dictionary to append the entry | |
| hmminfo | [in] phoneme HMM definition | |
| headphone | [in] word head silence model name | |
| tailphone | [in] word tail silence model name | |
| contextphone | [in] silence context name to be used at head and tail |
Definition at line 114 of file voca_load_wordlist.c.
Referenced by voca_load_wordlist(), and voca_load_wordlist_fp().
| boolean voca_load_wordlist | ( | FILE * | fp, | |
| WORD_INFO * | winfo, | |||
| HTK_HMM_INFO * | hmminfo, | |||
| char * | headphone, | |||
| char * | tailphone, | |||
| char * | contextphone | |||
| ) |
Top function to read word list via text.
| fp | [in] file pointer | |
| winfo | [out] pointer to word dictionary to store the read data. | |
| hmminfo | [in] HTK HMM definition data. if NULL, phonemes are ignored. | |
| headphone | [in] word head silence model name | |
| tailphone | [in] word tail silence model name | |
| contextphone | [in] silence context name to be used at head and tail |
Definition at line 142 of file voca_load_wordlist.c.
Referenced by init_wordlist().
| boolean voca_load_wordlist_fp | ( | FILE * | fp, | |
| WORD_INFO * | winfo, | |||
| HTK_HMM_INFO * | hmminfo, | |||
| char * | headphone, | |||
| char * | tailphone, | |||
| char * | contextphone | |||
| ) |
Top function to read word list via file pointer.
| fp | [in] file pointer | |
| winfo | [out] pointer to word dictionary to store the read data. | |
| hmminfo | [in] HTK HMM definition data. if NULL, phonemes are ignored. | |
| headphone | [in] word head silence model name | |
| tailphone | [in] word tail silence model name | |
| contextphone | [in] silence context name to be used at head and tail |
Definition at line 169 of file voca_load_wordlist.c.
| boolean voca_load_wordlist_line | ( | char * | buf, | |
| WORD_ID * | vnum_p, | |||
| int | linenum, | |||
| WORD_INFO * | winfo, | |||
| HTK_HMM_INFO * | hmminfo, | |||
| boolean | do_conv, | |||
| boolean * | ok_flag, | |||
| char * | headphone, | |||
| char * | tailphone, | |||
| char * | contextphone | |||
| ) |
Sub function to Add a dictionary entry line to the word dictionary.
| buf | [i/o] buffer to hold the input string, will be modified in this function | |
| vnum_p | [in] current number of words in winfo | |
| linenum | [in] current line number of the input | |
| winfo | [out] pointer to word dictionary to append the data. | |
| hmminfo | [in] HTK HMM definition data. if NULL, phonemes are ignored. | |
| do_conv | [in] TRUE if performing triphone conversion | |
| ok_flag | [out] will be set to FALSE if an error occured for this input. | |
| headphone | [in] word head silence model name | |
| tailphone | [in] word tail silence model name | |
| contextphone | [in] silence context name to be used at head and tail |
Definition at line 199 of file voca_load_wordlist.c.
Referenced by voca_load_word_line().
| boolean voca_mono2tri | ( | WORD_INFO * | winfo, | |
| HTK_HMM_INFO * | hmminfo | |||
| ) |
Convert whole words in word dictionary to word-internal triphone.
Normally triphone conversion will be performed directly when reading dictionary file. This function is for post conversion only.
| winfo | [i/o] word dictionary information | |
| hmminfo | [in] HTK HMM definition |
Definition at line 601 of file voca_load_htkdict.c.
Look up a word on dictionary by string.
| keyword | [in] keyword to search | |
| winfo | [in] word dictionary |
Definition at line 43 of file voca_lookup.c.
Referenced by initialize_dict(), and new_str2wordseq().
Convert string of space-separated word strings to array of word ids.
| winfo | [in] word dictionary | |
| s | [in] string of space-separated word strings | |
| len_return | [out] number of found words |
Definition at line 117 of file voca_lookup.c.
| char* cycle_triphone | ( | char * | p | ) |
Return string of triphone name composed from last 3 call.
| p | [in] next phone string |
Definition at line 80 of file voca_load_htkdict.c.
Referenced by cycle_triphone_flush(), voca_load_htkdict_line(), voca_load_wordlist_line(), and voca_mono2tri().
| char* cycle_triphone_flush | ( | ) |
Flush the triphone buffer and return the last biphone.
Definition at line 126 of file voca_load_htkdict.c.
Referenced by voca_load_htkdict_line(), voca_load_wordlist_line(), and voca_mono2tri().
| void print_voca_info | ( | FILE * | fp, | |
| WORD_INFO * | winfo | |||
| ) |
Output overall word dictionary information to stdout.
| fp | [in] file descriptor | |
| winfo | [in] word dictionary |
Definition at line 35 of file voca_util.c.
Referenced by print_engine_info().
Output information of a word in dictionary to stdout.
| fp | [in] file descriptor | |
| winfo | [in] word dictionary | |
| wid | [in] word id to be output |
Definition at line 67 of file voca_util.c.
Referenced by hmm_check(), make_dfa_voca_ref(), and print_engine_info().
| boolean make_base_phone | ( | HTK_HMM_INFO * | hmminfo, | |
| WORD_INFO * | winfo | |||
| ) |
Build basephone information.
Extract base phones from HMM definition, mark them whether they appear on word head or word tail, and count the number.
| hmminfo | [i/o] HMM definition information, basephone list will be added. | |
| winfo | [in] word dictionary information |
Definition at line 386 of file chkhmmlist.c.
Referenced by hmm_check().
| void print_phone_info | ( | FILE * | fp, | |
| HTK_HMM_INFO * | hmminfo | |||
| ) |
Output general information concerning phone mapping in HMM definition.
| fp | [in] file descriptor | |
| hmminfo | [in] HMM definition data. |
Definition at line 404 of file chkhmmlist.c.
Referenced by hmm_check().
| void print_all_basephone_detail | ( | HMM_basephone * | base | ) |
Output all basephone informations to stdout.
| base | [in] pointer to the top basephone data holder. |
Definition at line 106 of file chkhmmlist.c.
Referenced by hmm_check().
| void print_all_basephone_name | ( | HMM_basephone * | base | ) |
Output all basephone names to stdout.
| base | [in] pointer to the top basephone data holder. |
Definition at line 116 of file chkhmmlist.c.
Referenced by hmm_check().
| void test_interword_triphone | ( | HTK_HMM_INFO * | hmminfo, | |
| WORD_INFO * | winfo | |||
| ) |
Top function to check if all the possible triphones on given word dictionary actually exist in the logical HMM.
| hmminfo | [in] HMM definition information, with basephone list. | |
| winfo | [in] word dictionary information |
Definition at line 345 of file chkhmmlist.c.
Referenced by hmm_check().
1.5.1