#include <julius/julius.h>
Go to the source code of this file.
Defines | |
#define | SD(A) tindex_local[A-1] |
Index locater for sort_token_*(). | |
#define | SCOPY(D, S) D = S |
Content copier for sort_token_*(). | |
#define | SVAL(A) (tlist_local[tindex_local[A-1]].score) |
Score locater for sort_token_*(). | |
#define | STVAL (tlist_local[s].score) |
Indexed score locater for sort_token_*(). | |
Functions | |
static void | put_atom (TRELLIS_ATOM *atom, WORD_INFO *winfo) |
Output a trellis word information in text (for debug). | |
static LOGPROB | trace_backptr (WORD_ID wordseq_rt[MAXSEQNUM], int *rt_wordlen, TRELLIS_ATOM *atom, WORD_INFO *winfo) |
Find the best word sequence in the word trellis. | |
static void | find_1pass_result (int framelen, RecogProcess *r) |
Find best path from the first pass result and set result status. | |
static int | compare_backscore (TRELLIS_ATOM **x1, TRELLIS_ATOM **x2) |
qsort function to sort trellis words by their score. | |
static void | find_1pass_result_word (int framelen, RecogProcess *r) |
Isolated word recognition version of find_1pass_result(). | |
static void | bt_current_max (RecogProcess *r, int t) |
Output the current best word sequence ending at a specified time frame in the course of the 1st pass. | |
static void | bt_current_max_word (RecogProcess *r, int t) |
Output the current best word on a specified time frame in the course of the 1st pass. | |
static void | malloc_nodes (FSBeam *d, int n, int ntoken_init) |
Allocate initial work area for beam search on the 1st pass. | |
static void | expand_tlist (FSBeam *d) |
Re-allocate work area for beam search on the 1st pass. | |
static void | prepare_nodes (FSBeam *d, int ntoken_step) |
Clear nodes for the next input. | |
static void | free_nodes (FSBeam *d) |
Free all the work area for beam search on the 1st pass. | |
static void | clear_tlist (FSBeam *d, int tt) |
Reset the token space. | |
static void | clear_tokens (FSBeam *d, int tt) |
Clear the active token list. | |
static TOKENID | create_token (FSBeam *d) |
Assign a new token from token space. | |
static void | node_assign_token (FSBeam *d, int node, TOKENID tkid) |
Assign token to a node on tree lexicon. | |
static TOKENID | node_exist_token (FSBeam *d, int tt, int node, WORD_ID wid) |
Check if a node holds any token. | |
static void | sort_token_upward (FSBeam *d, int neednum, int totalnum) |
Sort the token space upward by score. | |
static void | sort_token_downward (FSBeam *d, int neednum, int totalnum) |
Sort the token space downward by score. | |
static void | sort_token_no_order (FSBeam *d, int neednum, int *start, int *end) |
Sort the token space to find which tokens to be survived in the beam. | |
static boolean | init_nodescore (HTK_Param *param, RecogProcess *r) |
Generate initial hypotheses. | |
boolean | get_back_trellis_init (HTK_Param *param, RecogProcess *r) |
Initialization of the frame synchronous beam search. | |
static void | propagate_token (FSBeam *d, int next_node, LOGPROB next_score, TRELLIS_ATOM *last_tre, WORD_ID last_cword, LOGPROB last_lscore) |
Propagate a token to next node. | |
static void | beam_intra_word_core (WCHMM_INFO *wchmm, FSBeam *d, TOKEN2 **tk_ret, int j, int next_node, LOGPROB next_a) |
Word-internal transition for a set of nodes. | |
static void | beam_intra_word (WCHMM_INFO *wchmm, FSBeam *d, TOKEN2 **tk_ret, int j) |
Word-internal transition. | |
static TRELLIS_ATOM * | save_trellis (BACKTRELLIS *bt, WCHMM_INFO *wchmm, TOKEN2 *tk, int t, boolean final_for_multipath) |
Store a new trellis word on the token. | |
static void | beam_inter_word (WCHMM_INFO *wchmm, FSBeam *d, TOKEN2 **tk_ret, TRELLIS_ATOM *tre, int j) |
Cross-word transition processing from word-end token. | |
static void | beam_inter_word_factoring (WCHMM_INFO *wchmm, FSBeam *d) |
Additional cross-word transition processing for 1-gram factoring. | |
boolean | get_back_trellis_proceed (int t, HTK_Param *param, RecogProcess *r, boolean final_for_multipath) |
Frame synchronous beam search: proceed for 2nd frame and later. | |
void | get_back_trellis_end (HTK_Param *param, RecogProcess *r) |
Frame synchronous beam search: last frame. | |
void | finalize_1st_pass (RecogProcess *r, int len) |
Finalize the 1st pass. | |
void | fsbeam_free (FSBeam *d) |
Free work area for the first pass. |
These are core functions of frame-synchronous beam search using a static lexicon tree, as the first pass of Julius. These functions will be called from pass1.c, to execute for each recognition process instance in turn. Functions for initialization, frame-wise recognition processing, end procedure, finding best path, detecting end of segment on short-pause segmentation mode, are defined here.
About algorithm: 1-best approximation will be performed for word context approximation, but normal word-pair approximation is also supported. With word/class N-gram, Julius computes the language score using 1-gram factoring (can be changed to 2-gram factoring if you want). With DFA grammar, Julius can compute the connection constraint of words using the category-pair constraint on the beginning of the words, since Julian makes a per-category tree lexicon. On isolated word recognition mode, the cross-word transitions are ignored.
Definition in file beam.c.
static void put_atom | ( | TRELLIS_ATOM * | atom, | |
WORD_INFO * | winfo | |||
) | [static] |
Output a trellis word information in text (for debug).
atom | [in] trellis word to output | |
winfo | [in] word dictionary |
Definition at line 251 of file beam.c.
Referenced by trace_backptr().
static LOGPROB trace_backptr | ( | WORD_ID | wordseq_rt[MAXSEQNUM], | |
int * | rt_wordlen, | |||
TRELLIS_ATOM * | atom, | |||
WORD_INFO * | winfo | |||
) | [static] |
Find the best word sequence in the word trellis.
This function trace back through the word trellis to the beginning of input, to find the best word sequence. The traceback starting point should be specified as a trellis word.
wordseq_rt | [out] buffer to store the best word sequence as result | |
rt_wordlen | [out] length of wordseq_rt | |
atom | [in] a trellis word as the starting point of the traceback | |
winfo | [in] word dictionary |
Definition at line 293 of file beam.c.
Referenced by bt_current_max().
static void find_1pass_result | ( | int | framelen, | |
RecogProcess * | r | |||
) | [static] |
Find best path from the first pass result and set result status.
This function find the best word sequence from the resulting word trellis of the 1st pass, and store them to result.pass1 in the recognition process instance. If no candidate was found, it sets error code -1 (recognition failure) and exit.
On short-pause segmentation, it sets error code -4 (decoder rejection) if the found best path consists of only silence words.
Also, if WORD_GRAPH is defined, this function also calls generate_lattice() to extract word graph from the word trellis.
framelen | [in] frame length that has been processed | |
r | [in] recognition process instance |
Definition at line 371 of file beam.c.
Referenced by finalize_1st_pass().
static int compare_backscore | ( | TRELLIS_ATOM ** | x1, | |
TRELLIS_ATOM ** | x2 | |||
) | [static] |
static void find_1pass_result_word | ( | int | framelen, | |
RecogProcess * | r | |||
) | [static] |
Isolated word recognition version of find_1pass_result().
Since Julius executes only the 1st pass on Isolated word recognition mode, the result candidate will be stored as the final result.
framelen | [in] frame length that has been processed | |
r | [i/o] recognition process instance |
Definition at line 574 of file beam.c.
Referenced by finalize_1st_pass().
static void bt_current_max | ( | RecogProcess * | r, | |
int | t | |||
) | [static] |
Output the current best word sequence ending at a specified time frame in the course of the 1st pass.
r | [i/o] recognition process instance | |
t | [in] current input frame |
Definition at line 875 of file beam.c.
Referenced by get_back_trellis_proceed().
static void bt_current_max_word | ( | RecogProcess * | r, | |
int | t | |||
) | [static] |
Output the current best word on a specified time frame in the course of the 1st pass.
r | [i/o] recognition process instance | |
t | [in] current input frame |
Definition at line 934 of file beam.c.
Referenced by get_back_trellis_proceed().
static void malloc_nodes | ( | FSBeam * | d, | |
int | n, | |||
int | ntoken_init | |||
) | [static] |
Allocate initial work area for beam search on the 1st pass.
If filled while search, they will be expanded on demand.
d | [i/o] work area for 1st pass recognition processing | |
n | [in] number of nodes in lexicon tree | |
ntoken_init | [in] number of token space to be allocated at first |
Definition at line 996 of file beam.c.
Referenced by get_back_trellis_init().
static void expand_tlist | ( | FSBeam * | d | ) | [static] |
Re-allocate work area for beam search on the 1st pass.
d | [i/o] work area for 1st pass recognition processing |
Definition at line 1024 of file beam.c.
Referenced by create_token().
static void prepare_nodes | ( | FSBeam * | d, | |
int | ntoken_step | |||
) | [static] |
Clear nodes for the next input.
Julius will call this function for each input to re-set the work area for the beam search. If the size of tree lexicon has been changed since the last input, Julius will free and re-allocate the work area.
d | [i/o] work area for 1st pass recognition processing | |
ntoken_step | [in] required token step |
Definition at line 1053 of file beam.c.
Referenced by get_back_trellis_init().
static void free_nodes | ( | FSBeam * | d | ) | [static] |
Free all the work area for beam search on the 1st pass.
d | [i/o] work area for 1st pass recognition processing |
Definition at line 1074 of file beam.c.
Referenced by fsbeam_free(), and get_back_trellis_init().
static void clear_tlist | ( | FSBeam * | d, | |
int | tt | |||
) | [static] |
Reset the token space.
d | [i/o] work area for 1st pass recognition processing | |
tt | [in] work area id (0 or 1) |
Definition at line 1101 of file beam.c.
Referenced by get_back_trellis_proceed().
static void clear_tokens | ( | FSBeam * | d, | |
int | tt | |||
) | [static] |
Clear the active token list.
d | [i/o] work area for 1st pass recognition processing | |
tt | [in] work area id of previous frame (0 or 1) |
Definition at line 1121 of file beam.c.
Referenced by get_back_trellis_proceed().
Assign a new token from token space.
d | [i/o] work area for 1st pass recognition processing |
Definition at line 1147 of file beam.c.
Referenced by init_nodescore(), and propagate_token().
Assign token to a node on tree lexicon.
Save the token id to the specified node in the active token list. Also saves the link to the node from the token in token space.
If a token already exist on the node, it will be overridden by the new one. If WPAIR is defined, the new token will be simply added to the list of active tokens on the node.
d | [i/o] work area for 1st pass recognition processing | |
node | [in] node id on the tree lexicon | |
tkid | [in] token id to be assigned |
Definition at line 1193 of file beam.c.
Referenced by init_nodescore(), and propagate_token().
Check if a node holds any token.
This function checks if a node on the tree lexicon already holds a token.
If WPAIR is defined, a node has multiple tokens according to the previous word context. In this case, only token with the same previous word will be checked.
d | [i/o] work area for 1st pass recognition processing | |
tt | [in] work area id (0 or 1) | |
node | [in] node id of lexicon tree | |
wid | [in] word id of previous word (ignored if WPAIR is not defined) |
Definition at line 1240 of file beam.c.
Referenced by init_nodescore(), and propagate_token().
static void sort_token_upward | ( | FSBeam * | d, | |
int | neednum, | |||
int | totalnum | |||
) | [static] |
Sort the token space upward by score.
This function sort the whole token space in upward direction, according to their accumulated score. This function terminates sort as soon as the top neednum tokens has been found.
d | [i/o] work area for 1st pass recognition processing | |
neednum | [in] sort until top neednum tokens has been found | |
totalnum | [in] total number of assigned tokens in the token space |
Definition at line 1341 of file beam.c.
Referenced by sort_token_no_order().
static void sort_token_downward | ( | FSBeam * | d, | |
int | neednum, | |||
int | totalnum | |||
) | [static] |
Sort the token space downward by score.
This function sort the whole token space in downward direction, according to their accumulated score for hypothesis pruning.
This function terminates sort as soon as the bottom neednum tokens has been found.
d | [i/o] work area for 1st pass recognition processing | |
neednum | [in] sort until bottom neednum tokens has been found | |
totalnum | [in] total number of assigned tokens in the token space |
Definition at line 1413 of file beam.c.
Referenced by sort_token_no_order().
static void sort_token_no_order | ( | FSBeam * | d, | |
int | neednum, | |||
int * | start, | |||
int * | end | |||
) | [static] |
Sort the token space to find which tokens to be survived in the beam.
This function sorts the currrent tokens in the token space to find the tokens to be survived in the current frame. Only getting the top neednum tokens is required, so the sort will be terminate just after the top neednum tokens are determined. Actually, either sort_token_upward() or sort_token_downward() will be used depending of the number of needed tokens and total tokens.
d | [i/o] work area for 1st pass recognition processing | |
neednum | [in] number of top tokens to be found | |
start | [out] start index of the top neednum nodes | |
end | [out] end index of the top neednum nodes |
Definition at line 1491 of file beam.c.
Referenced by get_back_trellis_init(), and get_back_trellis_proceed().
static boolean init_nodescore | ( | HTK_Param * | param, | |
RecogProcess * | r | |||
) | [static] |
Generate initial hypotheses.
The initial hypothesis is: 1) winfo->head_silwid for N-gram, 2) all words that can be at beginning of sentence for DFA, or 3) all words in dictionary for isolated word recognition mode.
If acoustic model is NOT a multi-path model, the output probabilities for the first frame (t=0) will also be computed in this function.
param | [in] input vectors (only the first frame will be used) | |
r | [in] recognition process instance |
Definition at line 1551 of file beam.c.
Referenced by get_back_trellis_init().
boolean get_back_trellis_init | ( | HTK_Param * | param, | |
RecogProcess * | r | |||
) |
Initialization of the frame synchronous beam search.
This function will initialize work area for the 1st pass. Generation of initial hypotheses will be performed in init_nodescore().
param | [in] input vectors (only the first frame will be used) | |
r | [i/o] recognition process instance |
Definition at line 1787 of file beam.c.
Referenced by decode_proceed().
Here is the caller graph for this function:
static void propagate_token | ( | FSBeam * | d, | |
int | next_node, | |||
LOGPROB | next_score, | |||
TRELLIS_ATOM * | last_tre, | |||
WORD_ID | last_cword, | |||
LOGPROB | last_lscore | |||
) | [static] |
Propagate a token to next node.
d | [i/o] work area for 1st pass recognition processing | |
next_node | [in] next node id | |
next_score | [in] score when transmitted to the next node | |
last_tre | [in] previous word context for the next node | |
last_cword | [in] previous context-valid word for the next node | |
last_lscore | [in] LM score to be propagated |
Definition at line 1901 of file beam.c.
Referenced by beam_inter_word(), beam_inter_word_factoring(), and beam_intra_word_core().
static void beam_intra_word_core | ( | WCHMM_INFO * | wchmm, | |
FSBeam * | d, | |||
TOKEN2 ** | tk_ret, | |||
int | j, | |||
int | next_node, | |||
LOGPROB | next_a | |||
) | [static] |
Word-internal transition for a set of nodes.
wchmm | [in] tree lexicon | |
d | [i/o] work area for the 1st pass | |
tk_ret | [in] source token (if pointer updated, overwrite this) | |
j | [in] the token ID of tk_ret | |
next_node | [in] id of next node | |
next_a | [in] transition probability |
< Temporal work to hold the current node number on the lexicon tree
Definition at line 1957 of file beam.c.
Referenced by beam_intra_word().
static void beam_intra_word | ( | WCHMM_INFO * | wchmm, | |
FSBeam * | d, | |||
TOKEN2 ** | tk_ret, | |||
int | j | |||
) | [static] |
Word-internal transition.
wchmm | [in] tree lexicon | |
d | [i/o] work area for the 1st pass | |
tk_ret | [in] source token (if pointer updated, overwrite this) | |
j | [in] the token ID of tk_ret |
< Temporal work to hold the next states of a node
Definition at line 2107 of file beam.c.
Referenced by get_back_trellis_proceed().
static TRELLIS_ATOM* save_trellis | ( | BACKTRELLIS * | bt, | |
WCHMM_INFO * | wchmm, | |||
TOKEN2 * | tk, | |||
int | t, | |||
boolean | final_for_multipath | |||
) | [static] |
Store a new trellis word on the token.
bt | [i/o] backtrellis data to save it | |
wchmm | [in] tree lexicon | |
tk | [in] source token at word edge | |
t | [in] current time frame | |
final_for_multipath | [in] TRUE if this is final frame |
Definition at line 2162 of file beam.c.
Referenced by get_back_trellis_end(), and get_back_trellis_proceed().
static void beam_inter_word | ( | WCHMM_INFO * | wchmm, | |
FSBeam * | d, | |||
TOKEN2 ** | tk_ret, | |||
TRELLIS_ATOM * | tre, | |||
int | j | |||
) | [static] |
Cross-word transition processing from word-end token.
wchmm | [in] tree lexicon | |
d | [i/o] work area for the 1st pass | |
tk_ret | [in] source token where the propagation is from | |
tre | [in] the trellis word generated from tk_ret | |
j | [in] the token ID of tk_ret |
< Temporal pointer to hold inter-word cache array
< Temporal work to hold isolated node
Definition at line 2227 of file beam.c.
Referenced by get_back_trellis_proceed().
static void beam_inter_word_factoring | ( | WCHMM_INFO * | wchmm, | |
FSBeam * | d | |||
) | [static] |
Additional cross-word transition processing for 1-gram factoring.
When using 1-gram factoring, The word end of maximum likelihood will be chosen at cross-word viterbi for factored word-head node, since the LM factoring value is independent of the context. This function performs viterbi processing to the factored word-head nodes from the maximum likelihood word end previously stored.
wchmm | [in] tree lexicon | |
d | [in] work area for the 1st pass |
Definition at line 2474 of file beam.c.
Referenced by get_back_trellis_proceed().
boolean get_back_trellis_proceed | ( | int | t, | |
HTK_Param * | param, | |||
RecogProcess * | r, | |||
boolean | final_for_multipath | |||
) |
Frame synchronous beam search: proceed for 2nd frame and later.
This is the main function of beam search on the 1st pass. Given a input vector of a frame, it proceeds the computation for the one frame, and store the words survived in the beam width to the word trellis structure. get_back_trellis_init() should be used for the first frame. For detailed procedure, please see the comments in this function.
t | [in] current frame to be computed in param | |
param | [in] input vector structure (only the vector at t will be used) | |
r | [in] recognition process instance | |
final_for_multipath | [i/o] TRUE if this is last frame of an input |
< Local workarea to hold the generated trellis word
< Temporal work to hold the current node number on the lexicon tree
Definition at line 2583 of file beam.c.
Referenced by decode_proceed(), and get_back_trellis_end().
Here is the caller graph for this function:
void get_back_trellis_end | ( | HTK_Param * | param, | |
RecogProcess * | r | |||
) |
Frame synchronous beam search: last frame.
This function should be called at the end of the 1st pass. The last procedure will be done for the (param->samplenum - 1) frame.
param | [in] input vectors (only param->samplenum is referred) | |
r | [in] recognition process instance |
Definition at line 2930 of file beam.c.
Referenced by decode_end().
Here is the caller graph for this function:
void finalize_1st_pass | ( | RecogProcess * | r, | |
int | len | |||
) |
Finalize the 1st pass.
This function will be called just after get_back_trellis_end() to finalize the 1st pass. It processes the resulting word trellis structure to be accessible from the 2nd pass, and output the best sentence hypothesis by backtracing the word trellis.
r | [in] recoginirion process instance | |
len | [in] total number of processed frames |
Definition at line 3008 of file beam.c.
Referenced by decode_end(), and decode_end_segmented().
Here is the caller graph for this function:
void fsbeam_free | ( | FSBeam * | d | ) |
Free work area for the first pass.
d | [in] work are for 1st pass input handling |
Definition at line 3054 of file beam.c.
Referenced by j_recogprocess_free().
Here is the caller graph for this function: