#include <sent/stddefs.h>
#include <sent/htk_hmm.h>
Go to the source code of this file.
Data Structures | |
struct | WORD_INFO |
Word dictionary structure to hold vocabulary. More... | |
Defines | |
#define | BEGIN_WORD_DEFAULT "<s>" |
Default word string of beginning-of-sentence word. | |
#define | END_WORD_DEFAULT "</s>" |
Default word string of end-of-sentence word. | |
#define | MAXWSTEP 4000 |
Memory allocation step in number of words when loading a word dictionary. | |
Functions | |
WORD_INFO * | word_info_new () |
Allocate a new word dictionary structure. | |
void | word_info_free (WORD_INFO *winfo) |
Free all informations in the WORD_INFO. | |
void | winfo_init (WORD_INFO *winfo) |
Initialize a new word dictionary structure. | |
boolean | winfo_expand (WORD_INFO *winfo) |
Expand the word dictionary. | |
boolean | init_voca (WORD_INFO *winfo, char *filename, HTK_HMM_INFO *hmminfo, boolean, boolean) |
Load and initialize a word dictionary. | |
boolean | init_wordlist (WORD_INFO *winfo, char *filename, HTK_HMM_INFO *hmminfo, char *headphone, char *tailphone, char *contextphone, boolean force_dict) |
Load and initialize a word list for isolated word recognition. | |
void | voca_set_stats (WORD_INFO *winfo) |
Parse a word dictionary and set the maximum state length per word. | |
void | voca_load_start (WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean ignore_tri_conv) |
Start loading a dictionary. | |
boolean | voca_load_line (char *buf, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo) |
Load a line from buffer and set parameters to the dictionary. | |
boolean | voca_load_end (WORD_INFO *winfo) |
End loading dictionary entries. | |
boolean | voca_load_htkdict (FILE *, WORD_INFO *, HTK_HMM_INFO *, boolean) |
Top function to read word dictionary via file pointer. | |
boolean | voca_load_htkdict_fd (int, WORD_INFO *, HTK_HMM_INFO *, boolean) |
Top function to read word dictionary via file descriptor. | |
boolean | voca_load_htkdict_sd (int, WORD_INFO *, HTK_HMM_INFO *, boolean) |
Top function to read word dictionary via socket descriptor. | |
boolean | voca_append_htkdict (char *entry, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean ignore_tri_conv) |
Append a single entry to the existing word dictionary. | |
boolean | voca_append (WORD_INFO *dstinfo, WORD_INFO *srcinfo, int coffset, int woffset) |
Append one word dictionary to other, for multiple grammar handling. | |
boolean | voca_load_htkdict_line (char *buf, WORD_ID *vnum, int linenum, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean do_conv, boolean *ok_flag) |
Sub function to Add a dictionary entry line to the word dictionary. | |
boolean | voca_load_word_line (char *buf, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, char *headphone, char *tailpohone, char *contextphone) |
Load a line from buffer and set parameters to the dictionary. | |
boolean | voca_load_wordlist (FILE *fp, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, char *headphone, char *tailphone, char *contextphone) |
Top function to read word list via file pointer. | |
boolean | voca_load_wordlist_line (char *buf, WORD_ID *vnum, int linenum, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean do_conv, boolean *ok_flag, char *headphone, char *tailphone, char *contextphone) |
Sub function to Add a dictionary entry line to the word dictionary. | |
boolean | voca_mono2tri (WORD_INFO *winfo, HTK_HMM_INFO *hmminfo) |
Convert whole words in word dictionary to word-internal triphone. | |
WORD_ID | voca_lookup_wid (char *, WORD_INFO *) |
Look up a word on dictionary by string. | |
WORD_ID * | new_str2wordseq (WORD_INFO *, char *, int *) |
Convert string of space-separated word strings to array of word ids. | |
char * | cycle_triphone (char *p) |
Return string of triphone name composed from last 3 call. | |
char * | cycle_triphone_flush () |
Flush the triphone buffer and return the last biphone. | |
void | print_voca_info (FILE *fp, WORD_INFO *) |
Output overall word dictionary information to stdout. | |
void | put_voca (FILE *fp, WORD_INFO *winfo, WORD_ID wid) |
Output information of a word in dictionary to stdout. | |
boolean | make_base_phone (HTK_HMM_INFO *hmminfo, WORD_INFO *winfo) |
Build basephone information. | |
void | print_phone_info (FILE *fp, HTK_HMM_INFO *hmminfo) |
Output general information concerning phone mapping in HMM definition. | |
void | print_all_basephone_detail (HMM_basephone *base) |
Output all basephone informations to stdout. | |
void | print_all_basephone_name (HMM_basephone *base) |
Output all basephone names to stdout. | |
void | test_interword_triphone (HTK_HMM_INFO *hmminfo, WORD_INFO *winfo) |
Top function to check if all the possible triphones on given word dictionary actually exist in the logical HMM. |
This file defines data structure for word dictionary used in recognition. It stores word's string, output string, phoneme sequence, transparency. Beginning-of-sentence word and End-of-sentence word guessed from runtime environment is also stored here.
Please note that the N-gram vocabulary is stored in NGRAM_INFO and it can differ from this word dictionary. The reference from the word dictionary to a N-gram vocabulary is done by wton[] member in WORD_INFO. When used with DFA, the wton[] holds a category number to which each word belongs.
Definition in file vocabulary.h.
WORD_INFO* word_info_new | ( | ) |
Allocate a new word dictionary structure.
Definition at line 35 of file voca_malloc.c.
Referenced by initialize_dict(), multigram_read_file_and_add(), multigram_update(), and read_grammar_from_socket().
void word_info_free | ( | WORD_INFO * | winfo | ) |
Free all informations in the WORD_INFO.
winfo | [i/o] word dictionary data to be freed. |
Definition at line 51 of file voca_malloc.c.
Referenced by initialize_dict(), j_process_lm_free(), multigram_exec_delete(), multigram_free_all(), multigram_read_file_and_add(), and multigram_update().
void winfo_init | ( | WORD_INFO * | winfo | ) |
Initialize a new word dictionary structure.
winfo | [i/o] word dictionary to be initialized. |
Definition at line 75 of file voca_malloc.c.
Referenced by multigram_update(), and voca_load_start().
boolean winfo_expand | ( | WORD_INFO * | winfo | ) |
Expand the word dictionary.
winfo | [i/o] word dictionary to be expanded. |
Definition at line 105 of file voca_malloc.c.
Referenced by voca_load_line(), and voca_load_word_line().
boolean init_voca | ( | WORD_INFO * | winfo, | |
char * | filename, | |||
HTK_HMM_INFO * | hmminfo, | |||
boolean | not_conv_tri, | |||
boolean | force_dict | |||
) |
Load and initialize a word dictionary.
winfo | [out] pointer to a word dictionary data to store the read data | |
filename | [in] file name of the word dictionary to read | |
hmminfo | [in] HMM definition data, needed for triphone conversion. | |
not_conv_tri | [in] TRUE if not converting monophone to triphone. | |
force_dict | [in] TRUE if want to ignore the error words in the dictionary |
Definition at line 41 of file init_voca.c.
Referenced by initialize_dict(), and multigram_read_file_and_add().
boolean init_wordlist | ( | WORD_INFO * | winfo, | |
char * | filename, | |||
HTK_HMM_INFO * | hmminfo, | |||
char * | headphone, | |||
char * | tailphone, | |||
char * | contextphone, | |||
boolean | force_dict | |||
) |
Load and initialize a word list for isolated word recognition.
winfo | [out] pointer to a word dictionary data to store the read data | |
filename | [in] file name of the word dictionary to read | |
hmminfo | [in] HMM definition data, needed for triphone conversion. | |
headphone | [in] word head silence phone name | |
tailphone | [in] word tail silence phone name | |
conextphone | [in] silence context name at head and tail phoneme | |
force_dict | [in] TRUE if want to ignore the error words in the dictionary |
Definition at line 81 of file init_voca.c.
Referenced by multigram_read_file_and_add().
void voca_set_stats | ( | WORD_INFO * | winfo | ) |
Parse a word dictionary and set the maximum state length per word.
winfo | [i/o] |
Definition at line 186 of file voca_load_htkdict.c.
Referenced by voca_load_end().
void voca_load_start | ( | WORD_INFO * | winfo, | |
HTK_HMM_INFO * | hmminfo, | |||
boolean | ignore_tri_conv | |||
) |
Start loading a dictionary.
See voca_load_htkdict() for an example of using this function.
winfo | [i/o] dictionary data where the data will be loaded | |
hmminfo | [in] phoneme HMM definition | |
ignore_tri_conv | [in] if TRUE, skip triphone conversion while loading |
Definition at line 228 of file voca_load_htkdict.c.
Referenced by voca_load_htkdict(), voca_load_htkdict_fd(), voca_load_htkdict_sd(), and voca_load_wordlist().
boolean voca_load_line | ( | char * | buf, | |
WORD_INFO * | winfo, | |||
HTK_HMM_INFO * | hmminfo | |||
) |
Load a line from buffer and set parameters to the dictionary.
See voca_load_htkdict() for an example of using this function.
buf | [in] input buffer containing a word entry | |
winfo | [i/o] word dictionary to append the entry | |
hmminfo | [in] phoneme HMM definition |
Definition at line 255 of file voca_load_htkdict.c.
Referenced by voca_append_htkdict(), voca_load_htkdict(), voca_load_htkdict_fd(), and voca_load_htkdict_sd().
boolean voca_load_end | ( | WORD_INFO * | winfo | ) |
End loading dictionary entries.
It calculates some statistics for the read entries, outputs errors if encountered while the last loading, and returns with status whether an error occured while loading.
winfo | [i/o] word dictionary just read by voca_load_line() calls |
Definition at line 284 of file voca_load_htkdict.c.
Referenced by voca_append_htkdict(), voca_load_htkdict(), voca_load_htkdict_fd(), voca_load_htkdict_sd(), and voca_load_wordlist().
boolean voca_load_htkdict | ( | FILE * | fp, | |
WORD_INFO * | winfo, | |||
HTK_HMM_INFO * | hmminfo, | |||
boolean | ignore_tri_conv | |||
) |
Top function to read word dictionary via file pointer.
fp | [in] file pointer | |
winfo | [out] pointer to word dictionary to store the read data. | |
hmminfo | [in] HTK HMM definition data. if NULL, phonemes are ignored. | |
ignore_tri_conv | [in] TRUE if triphone conversion is ignored |
Definition at line 305 of file voca_load_htkdict.c.
Referenced by init_voca().
boolean voca_load_htkdict_fd | ( | int | fd, | |
WORD_INFO * | winfo, | |||
HTK_HMM_INFO * | hmminfo, | |||
boolean | ignore_tri_conv | |||
) |
Top function to read word dictionary via file descriptor.
fd | [in] file descriptor | |
winfo | [out] pointer to word dictionary to store the read data. | |
hmminfo | [in] HTK HMM definition data. if NULL, phonemes are ignored. | |
ignore_tri_conv | [in] TRUE if triphone conversion is ignored |
Definition at line 330 of file voca_load_htkdict.c.
Referenced by read_grammar_from_socket().
boolean voca_load_htkdict_sd | ( | int | sd, | |
WORD_INFO * | winfo, | |||
HTK_HMM_INFO * | hmminfo, | |||
boolean | ignore_tri_conv | |||
) |
Top function to read word dictionary via socket descriptor.
sd | [in] socket descriptor | |
winfo | [out] pointer to word dictionary to store the read data. | |
hmminfo | [in] HTK HMM definition data. if NULL, phonemes are ignored. | |
ignore_tri_conv | [in] TRUE if triphone conversion is ignored |
Definition at line 354 of file voca_load_htkdict.c.
Referenced by read_grammar_from_socket().
boolean voca_append_htkdict | ( | char * | entry, | |
WORD_INFO * | winfo, | |||
HTK_HMM_INFO * | hmminfo, | |||
boolean | ignore_tri_conv | |||
) |
Append a single entry to the existing word dictionary.
entry | [in] dictionary entry string to be appended. | |
winfo | [out] pointer to word dictionary to append the data. | |
hmminfo | [in] HTK HMM definition data. if NULL, phonemes are ignored. | |
ignore_tri_conv | [in] TRUE if triphone conversion is ignored |
Definition at line 378 of file voca_load_htkdict.c.
Referenced by initialize_dict().
Append one word dictionary to other, for multiple grammar handling.
Assumes that the same HMM definition is used on both word dictionary.
dstinfo | [i/o] word dictionary | |
srcinfo | [in] word dictionary to be appended to dst | |
coffset | [in] category id offset in dst where the new data should be stored | |
woffset | [in] word id offset in dst where the new data should be stored |
Definition at line 664 of file voca_load_htkdict.c.
Referenced by multigram_add_words_to_grammar(), multigram_append_to_global(), and multigram_update().
boolean voca_load_htkdict_line | ( | char * | buf, | |
WORD_ID * | vnum_p, | |||
int | linenum, | |||
WORD_INFO * | winfo, | |||
HTK_HMM_INFO * | hmminfo, | |||
boolean | do_conv, | |||
boolean * | ok_flag | |||
) |
Sub function to Add a dictionary entry line to the word dictionary.
buf | [i/o] buffer to hold the input string, will be modified in this function | |
vnum_p | [in] current number of words in winfo | |
linenum | [in] current line number of the input | |
winfo | [out] pointer to word dictionary to append the data. | |
hmminfo | [in] HTK HMM definition data. if NULL, phonemes are ignored. | |
do_conv | [in] TRUE if performing triphone conversion | |
ok_flag | [out] will be set to FALSE if an error occured for this input. |
Definition at line 398 of file voca_load_htkdict.c.
Referenced by voca_load_line().
boolean voca_load_word_line | ( | char * | buf, | |
WORD_INFO * | winfo, | |||
HTK_HMM_INFO * | hmminfo, | |||
char * | headphone, | |||
char * | tailphone, | |||
char * | contextphone | |||
) |
Load a line from buffer and set parameters to the dictionary.
buf | [in] input buffer containing a word entry | |
winfo | [i/o] word dictionary to append the entry | |
hmminfo | [in] phoneme HMM definition | |
headphone | [in] word head silence model name | |
tailphone | [in] word tail silence model name | |
contextphone | [in] silence context name to be used at head and tail |
Definition at line 114 of file voca_load_wordlist.c.
Referenced by voca_load_wordlist().
boolean voca_load_wordlist | ( | FILE * | fp, | |
WORD_INFO * | winfo, | |||
HTK_HMM_INFO * | hmminfo, | |||
char * | headphone, | |||
char * | tailphone, | |||
char * | contextphone | |||
) |
Top function to read word list via file pointer.
fp | [in] file pointer | |
winfo | [out] pointer to word dictionary to store the read data. | |
hmminfo | [in] HTK HMM definition data. if NULL, phonemes are ignored. | |
headphone | [in] word head silence model name | |
tailphone | [in] word tail silence model name | |
contextphone | [in] silence context name to be used at head and tail |
Definition at line 142 of file voca_load_wordlist.c.
Referenced by init_wordlist().
boolean voca_load_wordlist_line | ( | char * | buf, | |
WORD_ID * | vnum_p, | |||
int | linenum, | |||
WORD_INFO * | winfo, | |||
HTK_HMM_INFO * | hmminfo, | |||
boolean | do_conv, | |||
boolean * | ok_flag, | |||
char * | headphone, | |||
char * | tailphone, | |||
char * | contextphone | |||
) |
Sub function to Add a dictionary entry line to the word dictionary.
buf | [i/o] buffer to hold the input string, will be modified in this function | |
vnum_p | [in] current number of words in winfo | |
linenum | [in] current line number of the input | |
winfo | [out] pointer to word dictionary to append the data. | |
hmminfo | [in] HTK HMM definition data. if NULL, phonemes are ignored. | |
do_conv | [in] TRUE if performing triphone conversion | |
ok_flag | [out] will be set to FALSE if an error occured for this input. | |
headphone | [in] word head silence model name | |
tailphone | [in] word tail silence model name | |
contextphone | [in] silence context name to be used at head and tail |
Definition at line 172 of file voca_load_wordlist.c.
Referenced by voca_load_word_line().
boolean voca_mono2tri | ( | WORD_INFO * | winfo, | |
HTK_HMM_INFO * | hmminfo | |||
) |
Convert whole words in word dictionary to word-internal triphone.
Normally triphone conversion will be performed directly when reading dictionary file. This function is for post conversion only.
winfo | [i/o] word dictionary information | |
hmminfo | [in] HTK HMM definition |
Definition at line 623 of file voca_load_htkdict.c.
Look up a word on dictionary by string.
keyword | [in] keyword to search | |
winfo | [in] word dictionary |
Definition at line 43 of file voca_lookup.c.
Referenced by initialize_dict(), and new_str2wordseq().
Convert string of space-separated word strings to array of word ids.
winfo | [in] word dictionary | |
s | [in] string of space-separated word strings | |
len_return | [out] number of found words |
Definition at line 117 of file voca_lookup.c.
char* cycle_triphone | ( | char * | p | ) |
Return string of triphone name composed from last 3 call.
p | [in] next phone string |
Definition at line 80 of file voca_load_htkdict.c.
Referenced by cycle_triphone_flush(), new_str2phseq(), voca_load_htkdict_line(), and voca_load_wordlist_line().
char* cycle_triphone_flush | ( | ) |
Flush the triphone buffer and return the last biphone.
Definition at line 126 of file voca_load_htkdict.c.
Referenced by new_str2phseq(), voca_load_htkdict_line(), and voca_load_wordlist_line().
void print_voca_info | ( | FILE * | fp, | |
WORD_INFO * | winfo | |||
) |
Output overall word dictionary information to stdout.
fp | [in] file descriptor | |
winfo | [in] word dictionary |
Definition at line 35 of file voca_util.c.
Referenced by print_engine_info().
Output information of a word in dictionary to stdout.
fp | [in] file descriptor | |
winfo | [in] word dictionary | |
wid | [in] word id to be output |
Definition at line 67 of file voca_util.c.
Referenced by hmm_check(), make_dfa_voca_ref(), print_engine_info(), and wchmm_add_word().
boolean make_base_phone | ( | HTK_HMM_INFO * | hmminfo, | |
WORD_INFO * | winfo | |||
) |
Build basephone information.
Extract base phones from HMM definition, mark them whether they appear on word head or word tail, and count the number.
hmminfo | [i/o] HMM definition information, basephone list will be added. | |
winfo | [in] word dictionary information |
Definition at line 386 of file chkhmmlist.c.
Referenced by hmm_check().
void print_phone_info | ( | FILE * | fp, | |
HTK_HMM_INFO * | hmminfo | |||
) |
Output general information concerning phone mapping in HMM definition.
fp | [in] file descriptor | |
hmminfo | [in] HMM definition data. |
Definition at line 404 of file chkhmmlist.c.
Referenced by hmm_check().
void print_all_basephone_detail | ( | HMM_basephone * | base | ) |
Output all basephone informations to stdout.
base | [in] pointer to the top basephone data holder. |
Definition at line 106 of file chkhmmlist.c.
Referenced by hmm_check().
void print_all_basephone_name | ( | HMM_basephone * | base | ) |
Output all basephone names to stdout.
base | [in] pointer to the top basephone data holder. |
Definition at line 116 of file chkhmmlist.c.
Referenced by hmm_check().
void test_interword_triphone | ( | HTK_HMM_INFO * | hmminfo, | |
WORD_INFO * | winfo | |||
) |
Top function to check if all the possible triphones on given word dictionary actually exist in the logical HMM.
hmminfo | [in] HMM definition information, with basephone list. | |
winfo | [in] word dictionary information |
Definition at line 345 of file chkhmmlist.c.
Referenced by hmm_check().