#include <sent/stddefs.h>
#include <sent/speech.h>
#include <sent/adin.h>
#include <pthread.h>
adin-cut.cのインクルード依存関係図
マクロ定義 | |
#define | TMP_FIX_200602 |
Enable some fixes relating adinnet+module | |
関数 | |
static void | adin_thread_create () |
create and start A/D-in and detection thread | |
void | adin_setup_func (int(*cad_read)(SP16 *, int), boolean(*cad_pause)(), boolean(*cad_resume)(), boolean use_cut_def, boolean need_thread) |
void | adin_setup_param (int silence_cut, boolean strip_zero, int cthres, int czc, int head_margin, int tail_margin, int sample_freq, boolean ignore_speech, boolean need_zeromean) |
boolean | query_segment_on () |
boolean | query_thread_on () |
void | adin_reset_zmean () |
static void | adin_purge (int from) |
static int | adin_cut (int(*ad_process)(SP16 *, int), int(*ad_check)()) |
Main A/D-in function | |
static int | adin_store_buffer (SP16 *now, int len) |
void | adin_thread_input_main (void *dummy) |
static int | adin_thread_process (int(*ad_process)(SP16 *, int), int(*ad_check)()) |
Main function of processing triggered samples at main thread. | |
int | adin_go (int(*ad_process)(SP16 *, int), int(*ad_check)()) |
Top function to start input processing | |
変数 | |
Variables of zero-cross parameters and buffer sizes | |
static int | c_length = 5000 |
Computed length of cycle buffer for zero-cross, actually equals to head margin length | |
static int | c_offset = 0 |
Static data DC offset (obsolute, should be 0) | |
static int | wstep = DEFAULT_WSTEP |
Data fragment size | |
static int | thres |
Input Level threshold (0-32767) | |
static int | noise_zerocross |
Computed threshold of zerocross num in the cycle buffer | |
static int | nc_max |
Computed number of fragments for tail margin | |
Variables for delayed tail silence processing | |
static SP16 * | swapbuf |
Buffer for re-triggering in tail margin | |
static int | sbsize |
static int | sblen |
Size and current length of swapbuf | |
static int | rest_tail |
Samples not processed yet in swap buffer | |
Work area for device configurations for local use | |
static boolean(*) | ad_resume () |
Function pointer to (re)start input | |
static boolean(*) | ad_pause () |
Function pointer to stop input | |
static int(*) | ad_read (SP16 *, int) |
Function pointer to read in input samples | |
static boolean | adin_cut_on |
TRUE if do input segmentation by silence | |
static boolean | silence_cut_default |
Device-dependent default value of adin_cut_on() | |
static boolean | strip_flag |
TRUE if skip invalid zero samples | |
static boolean | enable_thread = FALSE |
TRUE if input device needs threading | |
static boolean | ignore_speech_while_recog = TRUE |
TRUE if ignore speech input between call, while waiting recognition process | |
static boolean | need_zmean |
TRUE if perform zmeansource | |
Variables related to POSIX threading | |
static pthread_t | adin_thread |
Thread information | |
static pthread_mutex_t | mutex |
Lock primitive | |
static SP16 * | speech |
Unprocessed samples recorded by A/D-in thread | |
static int | speechlen |
Current length of speech | |
static boolean | transfer_online = FALSE |
Semaphore to start/stop recognition. | |
static boolean | adinthread_buffer_overflowed = FALSE |
Will be set to TRUE if speech has been overflowed. | |
Input data buffer | |
static SP16 * | buffer = NULL |
Temporary buffer to hold input samples | |
static int | bpmax |
Maximum length of buffer | |
static int | bp |
Current point to store the next data | |
static int | current_len |
Current length of stored samples | |
static SP16 * | cbuf |
Buffer for flushing cycle buffer just after detecting trigger |
音声区間の検出は,振幅レベルと零交差数を用いて行なっています. 入力断片ごとに,レベルしきい値を越える振幅について零交差数をカウントし, それが指定した数以上になれば,音声区間開始検出として 取り込みを開始します.取り込み中に零交差数が指定数以下になれば, 取り込みを停止します.実際には頑健に切り出しを行なうため,開始部と 停止部の前後にマージンを持たせて切り出します. また必要であれば DC offset の調整を行ないます.
音声データの取り込みと並行して入力音声の処理を行ないます.このため, 取り込んだ音声データはその取り込み単位(live入力では一定時間,音声ファイル ではバッファサイズ)ごとに,それらを引数としてコールバック関数が呼ばれます. このコールバック関数としてデータの保存や特徴量抽出, (フレーム同期の)認識処理を進める関数を指定しておきます.
マイク入力や NetAudio 入力などの Live 入力を直接読み込む場合, コールバック内の処理が重く処理が入力の速度に追い付かないと, デバイスのバッファが溢れ,入力断片がロストする場合があります. このエラーを防ぐために,もし実行環境で pthread が使用可能であれば, 音声取り込み・音声区間検出部は本体と独立したスレッドとして動作します. この場合,このスレッドは本スレッドとバッファ speech を介して以下のように 協調動作します.
定義される関数の概要は以下のとおりです. Juliusのメイン部から呼び出される関数は adin_go() です. 音声取り込みと区間検出処理の本体は adin_cut() です. 音声入力ソースの切替えは, adin_setup_func() を対象となる入力ストリームの 開始・読み込み・停止の関数を引数として呼び出すことで行なわれます. また切り出し処理のための各種パラメータは adin_setup_param() でセットします.
adin-cut.c で定義されています。
static void adin_thread_create | ( | ) | [static] |
create and start A/D-in and detection thread
Start new A/D-in thread, and also initialize buffer speech.
adin-cut.c の 957 行で定義されています。
参照元 adin_setup_param().
void adin_setup_func | ( | int(*)(SP16 *, int) | cad_read, | |
boolean(*)() | cad_pause, | |||
boolean(*)() | cad_resume, | |||
boolean | use_cut_def, | |||
boolean | need_thread | |||
) |
Store the given device-dependent functions and configuration values to local work area. This function will be called from adin_select() via adin_register_func().
cad_read | [in] function to read input samples |
cad_pause | [in] function to stop input |
cad_resume | [in] function to (re-)start input |
use_cut_def | [in] TRUE if the device needs speech segment detection by default |
need_thread | [in] TRUE if the device is live input and needs threading |
adin-cut.c の 179 行で定義されています。
参照元 adin_register_func().
void adin_setup_param | ( | int | silence_cut, | |
boolean | strip_zero, | |||
int | cthres, | |||
int | czc, | |||
int | head_margin, | |||
int | tail_margin, | |||
int | sample_freq, | |||
boolean | ignore_speech, | |||
boolean | need_zeromean | |||
) |
Setup silence detection parameters (should be called after adin_select()). If using pthread, the A/D-in and detection thread will be started at the end of this function.
silence_cut | [in] whether to perform silence cutting. 0=force off, 1=force on, 2=keep device-specific default | |
strip_zero | [in] TRUE if enables stripping of zero samples | |
cthres | [in] input level threshold (0-32767) | |
czc | [in] zero-cross count threshold in a second | |
head_margin | [in] header margin length in msec | |
tail_margin | [in] tail margin length in msec | |
sample_freq | [in] sampling frequency: just providing value for computing other variables | |
ignore_speech | [in] TRUE if ignore speech input between call, while waiting recognition process | |
need_zeromean | [in] TRUE if perform zero-mean subtraction |
adin-cut.c の 216 行で定義されています。
参照元 adin_initialize().
boolean query_segment_on | ( | ) |
Query function to check whether the input speech detection is on or off.
adin-cut.c の 254 行で定義されています。
boolean query_thread_on | ( | ) |
Query function to check whether the input threading is on or off.
adin-cut.c の 265 行で定義されています。
void adin_reset_zmean | ( | ) |
Reset zero mean data to re-estimate zero mean at the next input.
adin-cut.c の 275 行で定義されています。
static void adin_purge | ( | int | from | ) | [static] |
Purge samples already processed in the temporary buffer buffer.
from | [in] Purge samples in range [0..from-1]. |
adin-cut.c の 324 行で定義されています。
参照元 adin_cut().
static int adin_cut | ( | int(*)(SP16 *, int) | ad_process, | |
int(*)() | ad_check | |||
) | [static] |
Main A/D-in function
In threaded mode, this function will detach and loop forever in ad-in thread, storing triggered samples in speech, and telling the status to another process thread via transfer_online. The process thread, called from adin_go(), polls the length of speech and transfer_online, and if there are stored samples, process them.
In non-threaded mode, this function will be called directly from adin_go(), and triggered samples are immediately processed within here.
In module mode, the function argument ad_check should be specified to poll the status of incoming command from client while recognition.
< TRUE if we are now triggered
ad_process | function to process the triggered samples |
ad_check | function periodically called while input processing |
adin-cut.c の 351 行で定義されています。
static int adin_store_buffer | ( | SP16 * | now, | |
int | len | |||
) | [static] |
Callback for storing triggered samples to speech in A/D-in thread.
now | [in] triggered fragment | |
len | [in] length of above |
adin-cut.c の 920 行で定義されています。
void adin_thread_input_main | ( | void * | dummy | ) |
A/D-in thread main function: just call adin_cut() with storing function.
dummy | [in] a dummy data, not used. |
adin-cut.c の 947 行で定義されています。
参照元 adin_thread_create().
static int adin_thread_process | ( | int(*)(SP16 *, int) | ad_process, | |
int(*)() | ad_check | |||
) | [static] |
Main function of processing triggered samples at main thread.
Wait for the new samples to be stored in speech by A/D-in thread, and if found, process them.
ad_process | [in] function to process the recorded fragments | |
ad_check | [in] function to be called periodically for checking incoming user command in module mode. |
adin-cut.c の 997 行で定義されています。
参照元 adin_go().
int adin_go | ( | int(*)(SP16 *, int) | ad_process, | |
int(*)() | ad_check | |||
) |
Top function to start input processing
If threading mode is enabled, this function simply enters to adin_thread_process() to process triggered samples detected by another running A/D-in thread.
If threading mode is not available or disabled by either device requirement or OS capability, this function simply calls adin_cut() to detect speech segment from input device and process them concurrently by one process.
ad_process | [in] function to process the recorded fragments | |
ad_check | [in] function to be called periodically for checking incoming user command in module mode. |
adin-cut.c の 1126 行で定義されています。
boolean transfer_online = FALSE [static] |
Semaphore to start/stop recognition.
If TRUE, A/D-in thread will store incoming samples to speech and main thread will detect and process them. If FALSE, A/D-in thread will still get input and check trigger as the same as TRUE case, but does not store them to speech.
adin-cut.c の 300 行で定義されています。