#include <julius/julius.h>
Go to the source code of this file.
Defines | |
#define | WCHMM_SIZE_CHECK |
If defined, do wchmm size estimation (for debug only). | |
#define | COUNT_STEP 500 |
Word count step for debug progress output. | |
Functions | |
WCHMM_INFO * | wchmm_new () |
Allocate a new tree lexicon structure. | |
static void | wchmm_init (WCHMM_INFO *wchmm) |
Initialize content of a lexicon tree. | |
static void | wchmm_extend (WCHMM_INFO *wchmm) |
Expand state-related area in a tree lexicon by MAXWCNSTEP. | |
static void | wchmm_extend_startnode (WCHMM_INFO *wchmm) |
Expand word-start nodes area in a tree lexicon by STARTNODE_STEP. | |
void | wchmm_free (WCHMM_INFO *w) |
Free all data in a tree lexicon. | |
static int | compare_wseq (WORD_ID *widx1, WORD_ID *widx2, WORD_INFO *winfo) |
qsort_reentrant function to sort words by their phoneme sequence. | |
static void | wchmm_sort_idx_by_wseq (WORD_INFO *winfo, WORD_ID *windex, WORD_ID bgn, WORD_ID len) |
Sort word IDs in windex[bgn. | |
static int | compare_category (WORD_ID *widx1, WORD_ID *widx2, WORD_INFO *winfo) |
qsort function to sort words by their category ID. | |
static void | wchmm_sort_idx_by_category (WORD_INFO *winfo, WORD_ID *windex, WORD_ID len) |
Sort word IDs in windex[0. | |
static int | wchmm_check_match (WORD_INFO *winfo, int i, int j) |
Compare two words from word head per phoneme to see how many phones can be shared among the two. | |
static void | acc_init (WCHMM_INFO *wchmm, int node) |
Initialize transition information on a node. | |
static void | add_ac (WCHMM_INFO *wchmm, int node, LOGPROB a, int arc) |
Add an arc to a node. | |
static void | add_wacc (WCHMM_INFO *wchmm, int node, LOGPROB a, int arc) |
Add a transition arc between two nodes on the tree lexicon. | |
static void | get_outtrans_list (WCHMM_INFO *wchmm, WORD_ID w, int pos, int *node, LOGPROB *a, int *num, int maxnum, boolean insert_sp) |
Make outgoing transition list for given phone position of a word. | |
static void | wchmm_link_hmm (WCHMM_INFO *wchmm, int from_node, int to_node, HTK_HMM_Trans *tinfo) |
Add a transition from end node of a phone to start node of another phone. | |
static void | wchmm_link_subword (WCHMM_INFO *wchmm, int from_word, int from_seq, int to_word, int to_seq) |
Connect two phonemes in tree lexicon. | |
static void | wchmm_duplicate_state (WCHMM_INFO *wchmm, int node, int word) |
Isolation of word-end nodes for homophones: duplicate the word-end state, link as the same as original, and make it the new word-end node of the given new word. | |
static int | wchmm_duplicate_leafnode (WCHMM_INFO *wchmm) |
Scan the whole lexicon tree to find already registered homophones, and make word-end nodes of the found homophones isolated from others. | |
static boolean | wchmm_add_word (WCHMM_INFO *wchmm, int word, int matchlen, int matchword, boolean enable_iwsp) |
Add a new word to the lexicon tree. | |
static void | wchmm_calc_wordend_arc (WCHMM_INFO *wchmm) |
Scan the lexicon tree to make list of emission probability from the word end state. | |
static int | compare_prob (LOGPROB *a, LOGPROB *b) |
qsort callback function to sort unigram values. | |
static LOGPROB | get_nbest_uniprob (WCHMM_INFO *wchmm, int n) |
Get the Nth-best unigram probability from all words. | |
boolean | build_wchmm (WCHMM_INFO *wchmm, JCONF_LM *lmconf) |
Build a tree lexicon from given word dictionary and language model. | |
boolean | build_wchmm2 (WCHMM_INFO *wchmm, JCONF_LM *lmconf) |
Build a tree lexicon from given word dictionary and language model. | |
void | print_wchmm_info (WCHMM_INFO *wchmm) |
Output some specifications of the tree lexicon (size etc. |
Functions to build a tree lexicon (or called word-conjunction HMM here) from word dictionary, HMM and language models are defined here. The constructed tree lexicon will be used for the recognition of the 1st pass. The lexicon is composed per HMM state unit, and various informations about output probabilities, arcs, language model constraints, and others are assembled in the lexicon.
Note that the word "wchmm" in the source code is a synonim of "tree lexicon".
Definition in file wchmm.c.
WCHMM_INFO* wchmm_new | ( | ) |
Allocate a new tree lexicon structure.
Definition at line 70 of file wchmm.c.
Referenced by j_launch_recognition_instance(), and multigram_rebuild_wchmm().
Here is the caller graph for this function:
static void wchmm_init | ( | WCHMM_INFO * | wchmm | ) | [static] |
Initialize content of a lexicon tree.
wchmm | [out] pointer to the lexicon tree structure |
Definition at line 104 of file wchmm.c.
Referenced by build_wchmm(), and build_wchmm2().
static void wchmm_extend | ( | WCHMM_INFO * | wchmm | ) | [static] |
Expand state-related area in a tree lexicon by MAXWCNSTEP.
wchmm | [i/o] tree lexicon |
Definition at line 162 of file wchmm.c.
Referenced by wchmm_add_word(), and wchmm_duplicate_state().
static void wchmm_extend_startnode | ( | WCHMM_INFO * | wchmm | ) | [static] |
Expand word-start nodes area in a tree lexicon by STARTNODE_STEP.
(multipath)
wchmm | [i/o] tree lexicon |
Definition at line 189 of file wchmm.c.
Referenced by wchmm_add_word(), and wchmm_duplicate_state().
void wchmm_free | ( | WCHMM_INFO * | w | ) |
Free all data in a tree lexicon.
w | [in] tree lexicon |
Definition at line 213 of file wchmm.c.
Referenced by j_recogprocess_free(), and multigram_rebuild_wchmm().
Here is the caller graph for this function:
qsort_reentrant function to sort words by their phoneme sequence.
widx1 | [in] pointer to word id #1 | |
widx2 | [in] pointer to wrod id #2 |
Definition at line 298 of file wchmm.c.
Referenced by wchmm_sort_idx_by_wseq().
static void wchmm_sort_idx_by_wseq | ( | WORD_INFO * | winfo, | |
WORD_ID * | windex, | |||
WORD_ID | bgn, | |||
WORD_ID | len | |||
) | [static] |
Sort word IDs in windex[bgn.
.bgn+len-1] by their phoneme sequence order.
winfo | [in] word lexicon | |
windex | [i/o] index sequence of word IDs, (will be sorted in this function) | |
bgn | [in] start point to sort in windex | |
len | [in] length of indexes to be sorted from bgn |
Definition at line 347 of file wchmm.c.
Referenced by build_wchmm2().
qsort function to sort words by their category ID.
widx1 | [in] pointer to element #1 | |
widx2 | [in] pointer to element #2 |
Definition at line 371 of file wchmm.c.
Referenced by wchmm_sort_idx_by_category().
static void wchmm_sort_idx_by_category | ( | WORD_INFO * | winfo, | |
WORD_ID * | windex, | |||
WORD_ID | len | |||
) | [static] |
Sort word IDs in windex[0.
.len-1] by their category ID.
winfo | [in] tree lexicon | |
windex | [i/o] index sequence of word IDs, (will be sorted in this function) | |
len | [in] number of elements in windex |
Definition at line 396 of file wchmm.c.
Referenced by build_wchmm2().
static int wchmm_check_match | ( | WORD_INFO * | winfo, | |
int | i, | |||
int | j | |||
) | [static] |
static void acc_init | ( | WCHMM_INFO * | wchmm, | |
int | node | |||
) | [static] |
Initialize transition information on a node.
wchmm | [i/o] tree lexicon | |
node | [in] node id |
Definition at line 455 of file wchmm.c.
Referenced by wchmm_add_word(), and wchmm_duplicate_state().
static void add_ac | ( | WCHMM_INFO * | wchmm, | |
int | node, | |||
LOGPROB | a, | |||
int | arc | |||
) | [static] |
Add an arc to a node.
This function is for transition other than self and next node.
wchmm | [i/o] tree lexicon | |
node | [in] node id | |
a | [in] transition probability in log10 | |
arc | [in] transition destination node id |
Definition at line 479 of file wchmm.c.
Referenced by add_wacc().
static void add_wacc | ( | WCHMM_INFO * | wchmm, | |
int | node, | |||
LOGPROB | a, | |||
int | arc | |||
) | [static] |
Add a transition arc between two nodes on the tree lexicon.
wchmm | [i/o] tree lexicon | |
node | [in] node number of source node | |
a | [in] transition probability in log scale | |
arc | [in] node number of destination node |
Definition at line 516 of file wchmm.c.
Referenced by wchmm_duplicate_state(), and wchmm_link_hmm().
static void get_outtrans_list | ( | WCHMM_INFO * | wchmm, | |
WORD_ID | w, | |||
int | pos, | |||
int * | node, | |||
LOGPROB * | a, | |||
int * | num, | |||
int | maxnum, | |||
boolean | insert_sp | |||
) | [static] |
Make outgoing transition list for given phone position of a word.
(multipath)
wchmm | [in] tree lexicon | |
w | [in] word ID | |
pos | [in] location of target phone to be inspected in the word w | |
node | [out] list of wchmm states that possibly has outgoing transition | |
a | [out] transition probabilities of the outgoing transitions in node | |
num | [out] number of elements in out (found num will be added) | |
maxnum | [in] maximum number of elements that can be stored in node | |
insert_sp | [in] TRUE if consider short-pause insertion on word end |
Definition at line 554 of file wchmm.c.
Referenced by wchmm_add_word().
static void wchmm_link_hmm | ( | WCHMM_INFO * | wchmm, | |
int | from_node, | |||
int | to_node, | |||
HTK_HMM_Trans * | tinfo | |||
) | [static] |
Add a transition from end node of a phone to start node of another phone.
wchmm | [i/o] tree lexicon | |
from_node | [in] end node of a phone | |
to_node | [in] start node of a phone | |
tinfo | [in] transition prob. matrix of the from_node phone. |
Definition at line 640 of file wchmm.c.
Referenced by wchmm_link_subword().
static void wchmm_link_subword | ( | WCHMM_INFO * | wchmm, | |
int | from_word, | |||
int | from_seq, | |||
int | to_word, | |||
int | to_seq | |||
) | [static] |
Connect two phonemes in tree lexicon.
wchmm | [i/o] tree lexicon | |
from_word | [in] source word ID | |
from_seq | [in] index of source phoneme in from_word from which the other will be connected | |
to_word | [in] destination word ID | |
to_seq | [in] index of destination phoneme in to_word to which the other will connect |
static void wchmm_duplicate_state | ( | WCHMM_INFO * | wchmm, | |
int | node, | |||
int | word | |||
) | [static] |
Isolation of word-end nodes for homophones: duplicate the word-end state, link as the same as original, and make it the new word-end node of the given new word.
wchmm | [i/o] tree lexicon | |
node | [in] the word end node of the already existing homophone | |
word | [in] word ID to be added to the tree |
Definition at line 752 of file wchmm.c.
Referenced by wchmm_duplicate_leafnode().
static int wchmm_duplicate_leafnode | ( | WCHMM_INFO * | wchmm | ) | [static] |
static boolean wchmm_add_word | ( | WCHMM_INFO * | wchmm, | |
int | word, | |||
int | matchlen, | |||
int | matchword, | |||
boolean | enable_iwsp | |||
) | [static] |
Add a new word to the lexicon tree.
The longest matched word in the current lexicon tree and the length of the matched phoneme from the word head should be specified to tell where to insert the new word to the tree.
wchmm | [i/o] tree lexicon | |
word | [in] word id to be added to the lexicon | |
matchlen | [in] phoneme match length between word and matchword. | |
matchword | [in] the longest matched word with word in the current lexicon tree | |
enable_iwsp | [in] should be TRUE when using inter-word short pause option |
Definition at line 970 of file wchmm.c.
Referenced by build_wchmm().
static void wchmm_calc_wordend_arc | ( | WCHMM_INFO * | wchmm | ) | [static] |
static LOGPROB get_nbest_uniprob | ( | WCHMM_INFO * | wchmm, | |
int | n | |||
) | [static] |
Get the Nth-best unigram probability from all words.
winfo | [in] word dictionary | |
n | [in] required rank |
Definition at line 1481 of file wchmm.c.
Referenced by build_wchmm(), and build_wchmm2().
boolean build_wchmm | ( | WCHMM_INFO * | wchmm, | |
JCONF_LM * | lmconf | |||
) |
Build a tree lexicon from given word dictionary and language model.
This function is slow and only used when "-oldtree" option is specified in Julian. Julian without that option and Julius uses build_wchmm2() instead of this.
wchmm | [i/o] lexicon tree | |
lmconf | [in] language model (LM) configuration parameters |
Definition at line 1553 of file wchmm.c.
Referenced by j_launch_recognition_instance(), and multigram_rebuild_wchmm().
Here is the caller graph for this function:
boolean build_wchmm2 | ( | WCHMM_INFO * | wchmm, | |
JCONF_LM * | lmconf | |||
) |
Build a tree lexicon from given word dictionary and language model.
This function does the same job as build_wchmm(), but it is much faster because finding of the longest matched word to an adding word is done by first sorting all the words in the dictoinary by their phoneme sequence order. This function will be used instead of build_wchmm() by default.
wchmm | [i/o] lexicon tree | |
lmconf | [in] language model (LM) configuration parameters |
Definition at line 1766 of file wchmm.c.
Referenced by j_launch_recognition_instance(), and multigram_rebuild_wchmm().
Here is the caller graph for this function:
void print_wchmm_info | ( | WCHMM_INFO * | wchmm | ) |