#include <julius/julius.h>
Go to the source code of this file.
Defines | |
#define | SD(A) tindex_local[A-1] |
Index locater for sort_token_*(). | |
#define | SCOPY(D, S) D = S |
Content copier for sort_token_*(). | |
#define | SVAL(A) (tlist_local[tindex_local[A-1]].score) |
Score locater for sort_token_*(). | |
#define | STVAL (tlist_local[s].score) |
Indexed score locater for sort_token_*(). | |
Functions | |
static void | put_atom (TRELLIS_ATOM *atom, WORD_INFO *winfo) |
Output a trellis word information in text (for debug). | |
static LOGPROB | trace_backptr (WORD_ID wordseq_rt[MAXSEQNUM], int *rt_wordlen, TRELLIS_ATOM *atom, WORD_INFO *winfo) |
Find the best word sequence in the word trellis. | |
static void | find_1pass_result (int framelen, RecogProcess *r) |
Find best path from the first pass result and set result status. | |
static int | compare_backscore (TRELLIS_ATOM **x1, TRELLIS_ATOM **x2) |
qsort function to sort trellis words by their score. | |
static void | find_1pass_result_word (int framelen, RecogProcess *r) |
Isolated word recognition version of find_1pass_result(). | |
static void | bt_current_max (RecogProcess *r, int t) |
Output the current best word sequence ending at a specified time frame in the course of the 1st pass. | |
static void | bt_current_max_word (RecogProcess *r, int t) |
Output the current best word on a specified time frame in the course of the 1st pass. | |
static void | malloc_nodes (FSBeam *d, int n, int ntoken_init) |
Allocate initial work area for beam search on the 1st pass. | |
static void | expand_tlist (FSBeam *d) |
Re-allocate work area for beam search on the 1st pass. | |
static void | prepare_nodes (FSBeam *d, int ntoken_step) |
Clear nodes for the next input. | |
static void | free_nodes (FSBeam *d) |
Free all the work area for beam search on the 1st pass. | |
static void | clear_tlist (FSBeam *d, int tt) |
Reset the token space. | |
static void | clear_tokens (FSBeam *d, int tt) |
Clear the active token list. | |
static TOKENID | create_token (FSBeam *d) |
Assign a new token from token space. | |
static void | node_assign_token (FSBeam *d, int node, TOKENID tkid) |
Assign token to a node on tree lexicon. | |
static TOKENID | node_exist_token (FSBeam *d, int tt, int node, WORD_ID wid) |
Check if a node holds any token. | |
static void | sort_token_upward (FSBeam *d, int neednum, int totalnum) |
Sort the token space upward by score. | |
static void | sort_token_downward (FSBeam *d, int neednum, int totalnum) |
Sort the token space downward by score. | |
static void | sort_token_no_order (FSBeam *d, int neednum, int *start, int *end) |
Sort the token space to find which tokens to be survived in the beam. | |
static boolean | init_nodescore (HTK_Param *param, RecogProcess *r) |
Generate initial hypotheses. | |
boolean | get_back_trellis_init (HTK_Param *param, RecogProcess *r) |
Initialization of the frame synchronous beam search. | |
static void | propagate_token (FSBeam *d, int next_node, LOGPROB next_score, TRELLIS_ATOM *last_tre, WORD_ID last_cword, LOGPROB last_lscore) |
Propagate a token to next node. | |
static void | beam_intra_word_core (WCHMM_INFO *wchmm, FSBeam *d, TOKEN2 **tk_ret, int j, int next_node, LOGPROB next_a) |
Word-internal transition for a set of nodes. | |
static void | beam_intra_word (WCHMM_INFO *wchmm, FSBeam *d, TOKEN2 **tk_ret, int j) |
Word-internal transition. | |
static TRELLIS_ATOM * | save_trellis (BACKTRELLIS *bt, WCHMM_INFO *wchmm, TOKEN2 *tk, int t, boolean final_for_multipath) |
Store a new trellis word on the token. | |
static void | beam_inter_word (WCHMM_INFO *wchmm, FSBeam *d, TOKEN2 **tk_ret, TRELLIS_ATOM *tre, int j) |
Cross-word transition processing from word-end token. | |
boolean | get_back_trellis_proceed (int t, HTK_Param *param, RecogProcess *r, boolean final_for_multipath) |
Frame synchronous beam search: proceed for 2nd frame and later. | |
void | get_back_trellis_end (HTK_Param *param, RecogProcess *r) |
Frame synchronous beam search: last frame. | |
void | finalize_1st_pass (RecogProcess *r, int len) |
Finalize the 1st pass. | |
void | fsbeam_free (FSBeam *d) |
Free work area for the first pass. |
These are core functions of frame-synchronous beam search using a static lexicon tree, as the first pass of Julius. These functions will be called from pass1.c, to execute for each recognition process instance in turn. Functions for initialization, frame-wise recognition processing, end procedure, finding best path, detecting end of segment on short-pause segmentation mode, are defined here.
About algorithm: 1-best approximation will be performed for word context approximation, but normal word-pair approximation is also supported. With word/class N-gram, Julius computes the language score using 1-gram factoring (can be changed to 2-gram factoring if you want). With DFA grammar, Julius can compute the connection constraint of words using the category-pair constraint on the beginning of the words, since Julian makes a per-category tree lexicon. On isolated word recognition mode, the cross-word transitions are ignored.
Definition in file beam.c.
static void put_atom | ( | TRELLIS_ATOM * | atom, | |
WORD_INFO * | winfo | |||
) | [static] |
static LOGPROB trace_backptr | ( | WORD_ID | wordseq_rt[MAXSEQNUM], | |
int * | rt_wordlen, | |||
TRELLIS_ATOM * | atom, | |||
WORD_INFO * | winfo | |||
) | [static] |
Find the best word sequence in the word trellis.
This function trace back through the word trellis to the beginning of input, to find the best word sequence. The traceback starting point should be specified as a trellis word.
wordseq_rt | [out] buffer to store the best word sequence as result | |
rt_wordlen | [out] length of wordseq_rt | |
atom | [in] a trellis word as the starting point of the traceback | |
winfo | [in] word dictionary |
static void find_1pass_result | ( | int | framelen, | |
RecogProcess * | r | |||
) | [static] |
Find best path from the first pass result and set result status.
This function find the best word sequence from the resulting word trellis of the 1st pass, and store them to result.pass1 in the recognition process instance. If no candidate was found, it sets error code -1 (recognition failure) and exit.
On short-pause segmentation, it sets error code -4 (decoder rejection) if the found best path consists of only silence words.
Also, if WORD_GRAPH is defined, this function also calls generate_lattice() to extract word graph from the word trellis.
framelen | [in] frame length that has been processed | |
r | [in] recognition process instance |
static int compare_backscore | ( | TRELLIS_ATOM ** | x1, | |
TRELLIS_ATOM ** | x2 | |||
) | [static] |
static void find_1pass_result_word | ( | int | framelen, | |
RecogProcess * | r | |||
) | [static] |
Isolated word recognition version of find_1pass_result().
Since Julius executes only the 1st pass on Isolated word recognition mode, the result candidate will be stored as the final result.
framelen | [in] frame length that has been processed | |
r | [i/o] recognition process instance |
static void bt_current_max | ( | RecogProcess * | r, | |
int | t | |||
) | [static] |
static void bt_current_max_word | ( | RecogProcess * | r, | |
int | t | |||
) | [static] |
static void malloc_nodes | ( | FSBeam * | d, | |
int | n, | |||
int | ntoken_init | |||
) | [static] |
Allocate initial work area for beam search on the 1st pass.
If filled while search, they will be expanded on demand.
d | [i/o] work area for 1st pass recognition processing | |
n | [in] number of nodes in lexicon tree | |
ntoken_init | [in] number of token space to be allocated at first |
static void expand_tlist | ( | FSBeam * | d | ) | [static] |
static void prepare_nodes | ( | FSBeam * | d, | |
int | ntoken_step | |||
) | [static] |
Clear nodes for the next input.
Julius will call this function for each input to re-set the work area for the beam search. If the size of tree lexicon has been changed since the last input, Julius will free and re-allocate the work area.
d | [i/o] work area for 1st pass recognition processing | |
ntoken_step | [in] required token step |
static void free_nodes | ( | FSBeam * | d | ) | [static] |
static void clear_tlist | ( | FSBeam * | d, | |
int | tt | |||
) | [static] |
static void clear_tokens | ( | FSBeam * | d, | |
int | tt | |||
) | [static] |
Assign token to a node on tree lexicon.
Save the token id to the specified node in the active token list. Also saves the link to the node from the token in token space.
If a token already exist on the node, it will be overridden by the new one. If WPAIR is defined, the new token will be simply added to the list of active tokens on the node.
d | [i/o] work area for 1st pass recognition processing | |
node | [in] node id on the tree lexicon | |
tkid | [in] token id to be assigned |
Check if a node holds any token.
This function checks if a node on the tree lexicon already holds a token.
If WPAIR is defined, a node has multiple tokens according to the previous word context. In this case, only token with the same previous word will be checked.
d | [i/o] work area for 1st pass recognition processing | |
tt | [in] work area id (0 or 1) | |
node | [in] node id of lexicon tree | |
wid | [in] word id of previous word (ignored if WPAIR is not defined) |
static void sort_token_upward | ( | FSBeam * | d, | |
int | neednum, | |||
int | totalnum | |||
) | [static] |
Sort the token space upward by score.
This function sort the whole token space in upward direction, according to their accumulated score. This function terminates sort as soon as the top neednum tokens has been found.
d | [i/o] work area for 1st pass recognition processing | |
neednum | [in] sort until top neednum tokens has been found | |
totalnum | [in] total number of assigned tokens in the token space |
static void sort_token_downward | ( | FSBeam * | d, | |
int | neednum, | |||
int | totalnum | |||
) | [static] |
Sort the token space downward by score.
This function sort the whole token space in downward direction, according to their accumulated score for hypothesis pruning.
This function terminates sort as soon as the bottom neednum tokens has been found.
d | [i/o] work area for 1st pass recognition processing | |
neednum | [in] sort until bottom neednum tokens has been found | |
totalnum | [in] total number of assigned tokens in the token space |
static void sort_token_no_order | ( | FSBeam * | d, | |
int | neednum, | |||
int * | start, | |||
int * | end | |||
) | [static] |
Sort the token space to find which tokens to be survived in the beam.
This function sorts the currrent tokens in the token space to find the tokens to be survived in the current frame. Only getting the top neednum tokens is required, so the sort will be terminate just after the top neednum tokens are determined. Actually, either sort_token_upward() or sort_token_downward() will be used depending of the number of needed tokens and total tokens.
d | [i/o] work area for 1st pass recognition processing | |
neednum | [in] number of top tokens to be found | |
start | [out] start index of the top neednum nodes | |
end | [out] end index of the top neednum nodes |
static boolean init_nodescore | ( | HTK_Param * | param, | |
RecogProcess * | r | |||
) | [static] |
Generate initial hypotheses.
The initial hypothesis is: 1) winfo->head_silwid for N-gram, 2) all words that can be at beginning of sentence for DFA, or 3) all words in dictionary for isolated word recognition mode.
If acoustic model is NOT a multi-path model, the output probabilities for the first frame (t=0) will also be computed in this function.
param | [in] input vectors (only the first frame will be used) | |
r | [in] recognition process instance |
boolean get_back_trellis_init | ( | HTK_Param * | param, | |
RecogProcess * | r | |||
) |
Initialization of the frame synchronous beam search.
This function will initialize work area for the 1st pass. Generation of initial hypotheses will be performed in init_nodescore().
param | [in] input vectors (only the first frame will be used) | |
r | [i/o] recognition process instance |
Definition at line 1763 of file beam.c.
Referenced by decode_proceed().
static void propagate_token | ( | FSBeam * | d, | |
int | next_node, | |||
LOGPROB | next_score, | |||
TRELLIS_ATOM * | last_tre, | |||
WORD_ID | last_cword, | |||
LOGPROB | last_lscore | |||
) | [static] |
Propagate a token to next node.
d | [i/o] work area for 1st pass recognition processing | |
next_node | [in] next node id | |
next_score | [in] score when transmitted to the next node | |
last_tre | [in] previous word context for the next node | |
last_cword | [in] previous context-valid word for the next node | |
last_lscore | [in] LM score to be propagated |
static void beam_intra_word_core | ( | WCHMM_INFO * | wchmm, | |
FSBeam * | d, | |||
TOKEN2 ** | tk_ret, | |||
int | j, | |||
int | next_node, | |||
LOGPROB | next_a | |||
) | [static] |
Word-internal transition for a set of nodes.
wchmm | [in] tree lexicon | |
d | [i/o] work area for the 1st pass | |
tk_ret | [in] source token (if pointer updated, overwrite this) | |
j | [in] the token ID of tk_ret | |
next_node | [in] id of next node | |
next_a | [in] transition probability |
< Temporal work to hold the current node number on the lexicon tree
static void beam_intra_word | ( | WCHMM_INFO * | wchmm, | |
FSBeam * | d, | |||
TOKEN2 ** | tk_ret, | |||
int | j | |||
) | [static] |
static TRELLIS_ATOM* save_trellis | ( | BACKTRELLIS * | bt, | |
WCHMM_INFO * | wchmm, | |||
TOKEN2 * | tk, | |||
int | t, | |||
boolean | final_for_multipath | |||
) | [static] |
Store a new trellis word on the token.
bt | [i/o] backtrellis data to save it | |
wchmm | [in] tree lexicon | |
tk | [in] source token at word edge | |
t | [in] current time frame | |
final_for_multipath | [in] TRUE if this is final frame |
static void beam_inter_word | ( | WCHMM_INFO * | wchmm, | |
FSBeam * | d, | |||
TOKEN2 ** | tk_ret, | |||
TRELLIS_ATOM * | tre, | |||
int | j | |||
) | [static] |
Cross-word transition processing from word-end token.
wchmm | [in] tree lexicon | |
d | [i/o] work area for the 1st pass | |
tk_ret | [in] source token where the propagation is from | |
tre | [in] the trellis word generated from tk_ret | |
j | [in] the token ID of tk_ret |
< Temporal pointer to hold inter-word cache array
boolean get_back_trellis_proceed | ( | int | t, | |
HTK_Param * | param, | |||
RecogProcess * | r, | |||
boolean | final_for_multipath | |||
) |
Frame synchronous beam search: proceed for 2nd frame and later.
This is the main function of beam search on the 1st pass. Given a input vector of a frame, it proceeds the computation for the one frame, and store the words survived in the beam width to the word trellis structure. get_back_trellis_init() should be used for the first frame. For detailed procedure, please see the comments in this function.
t | [in] current frame to be computed in param | |
param | [in] input vector structure (only the vector at t will be used) | |
r | [in] recognition process instance | |
final_for_multipath | [i/o] TRUE if this is last frame of an input |
< Local workarea to hold the generated trellis word
< Temporal work to hold the current node number on the lexicon tree
Definition at line 2558 of file beam.c.
Referenced by decode_proceed().
void get_back_trellis_end | ( | HTK_Param * | param, | |
RecogProcess * | r | |||
) |
Frame synchronous beam search: last frame.
This function should be called at the end of the 1st pass. The last procedure will be done for the (param->samplenum - 1) frame.
param | [in] input vectors (only param->samplenum is referred) | |
r | [in] recognition process instance |
Definition at line 2905 of file beam.c.
Referenced by decode_end().
void finalize_1st_pass | ( | RecogProcess * | r, | |
int | len | |||
) |
Finalize the 1st pass.
This function will be called just after get_back_trellis_end() to finalize the 1st pass. It processes the resulting word trellis structure to be accessible from the 2nd pass, and output the best sentence hypothesis by backtracing the word trellis.
r | [in] recoginirion process instance | |
len | [in] total number of processed frames |
Definition at line 2983 of file beam.c.
Referenced by decode_end(), and decode_end_segmented().
void fsbeam_free | ( | FSBeam * | d | ) |
Free work area for the first pass.
d | [in] work are for 1st pass input handling |
Definition at line 3029 of file beam.c.
Referenced by j_recogprocess_free().