1- #include < cstdio>
2- #include < cstdlib>
31#include < iostream>
4- #include < memory>
5- #include < filesystem>
6-
7- int main (int argc, char **argv) {
8- // default cmake-build-debug/main
9- const char filename[] = " ../pcm/16k_1.pcm" ;
10- const char output_dir[] = " output_pcm" ;
11- const char output_filename_prefix[] = " 16k_1.pcm" ;
12- if (!std::filesystem::exists (output_dir)) {
13- std::filesystem::create_directories (output_dir);
2+ #include < vector>
3+ #include < cstdint>
4+ #include < whisper.h>
5+
6+ #include " ../stream/stream_components_service.h"
7+ #include " ../stream/stream_components.h"
8+ #include " ../common/utils.h"
9+ #include " ../common/common.h"
10+ #include < speex/speex_preprocess.h>
11+
12+ using namespace stream_components ;
13+
14+
15+ int main () {
16+ std::string wav_file_path = " ../samples/jfk.wav" ; // 替换为您的 WAV 文件路径
17+ // audio arrays
18+ std::vector<float > pcmf32; // mono-channel F32 PCM
19+ std::vector<std::vector<float >> pcmf32s; // stereo-channel F32 PCM
20+ ::read_wav (wav_file_path, pcmf32, pcmf32s, false );
21+
22+ printf (" size of samples %lu\n " , pcmf32.size ());
23+
24+
25+ whisper_local_stream_params params;
26+ struct whisper_context_params cparams{};
27+ cparams.use_gpu = params.service .use_gpu ;
28+ // Instantiate the service
29+ stream_components::WhisperService whisperService (params.service , params.audio , cparams);
30+
31+ // Simulate websokcet by adding 1500 data each time.
32+ std::vector<float > audio_buffer;
33+ int chunk_size = 160 ; // 适用于 16 kHz 采样率的 100 毫秒帧
34+ SpeexPreprocessState *st = speex_preprocess_state_init (chunk_size, WHISPER_SAMPLE_RATE);
35+
36+ int vad = 1 ;
37+ speex_preprocess_ctl (st, SPEEX_PREPROCESS_SET_VAD, &vad);
38+
39+ bool last_is_speech = false ;
40+ // 处理音频帧
41+ for (size_t i = 0 ; i < pcmf32.size (); i += chunk_size) {
42+ spx_int16_t frame[chunk_size];
43+ for (int j = 0 ; j < chunk_size; ++j) {
44+ if (i + j < pcmf32.size ()) {
45+ frame[j] = (spx_int16_t )(pcmf32[i + j] * 32768 );
46+ } else {
47+ frame[j] = 0 ; // 对于超出范围的部分填充 0
48+ }
49+ }
50+ int is_speech = speex_preprocess_run (st, frame);
51+
52+ // 将当前帧添加到 audio_buffer
53+ audio_buffer.insert (audio_buffer.end (), pcmf32.begin () + i, pcmf32.begin () + std::min (i + chunk_size, pcmf32.size ()));
54+ printf (" is_speech %d \n " ,is_speech);
55+ if (!is_speech && last_is_speech) {
56+ bool b = whisperService.process (pcmf32.data (), pcmf32.size ());
57+ const nlohmann::json &json_array = get_result (whisperService.ctx );
58+ const std::basic_string<char , std::char_traits<char >, std::allocator<char >> &string = json_array.dump ();
59+ printf (" %s\n " ,string.c_str ());
60+ return 0 ;
61+ audio_buffer.clear ();
62+ }
63+
64+ last_is_speech = is_speech != 0 ;
1465 }
66+
67+ speex_preprocess_state_destroy (st);
1568}
0 commit comments