99#include < whisper.h>
1010#include < sstream>
1111
12- struct PerSocketData {
13- wav_writer wavWriter;
14- };
15-
16- bool process_vad (float *pDouble, unsigned long size);
17-
18- std::vector<float > extract_first_voice_segment (std::vector<float > vector1);
19-
2012using namespace stream_components ;
13+ nlohmann::json getResult (whisper_context *ctx);
14+ bool processAudio (WhisperService service, std::vector<float > pcm32, const whisper_local_stream_params& params);
2115
2216int main (int argc, char **argv) {
2317 // Read parameters...
2418 whisper_local_stream_params params;
2519
26- if (whisper_params_parse (argc, argv, params) == false ) {
20+ if (! whisper_params_parse (argc, argv, params)) {
2721 return 1 ;
2822 }
2923
@@ -40,7 +34,7 @@ int main(int argc, char **argv) {
4034 }
4135
4236 // Instantiate the service
43- struct whisper_context_params cparams;
37+ struct whisper_context_params cparams{} ;
4438 cparams.use_gpu = params.service .use_gpu ;
4539 stream_components::WhisperService whisperService (params.service , params.audio , cparams);
4640
@@ -66,7 +60,7 @@ int main(int argc, char **argv) {
6660 };
6761 // Save Audio
6862 auto ws_save_handler=[](auto *ws,std::string_view message,uWS::OpCode opCode){
69- std::string * userData = (std::string*)ws->getUserData ();
63+ auto * userData = (std::string*)ws->getUserData ();
7064 printf (" %s: User Data: %s\n " , get_current_time ().c_str (), userData->c_str ());
7165 thread_local wav_writer wavWriter;
7266 thread_local std::string filename;
@@ -144,16 +138,36 @@ int main(int argc, char **argv) {
144138// nlohmann::json response = {{"name",filename},{"signal", signal}};
145139 response = {{" name" , filename},
146140 {" signal" , signal}};
147- response[" result" ] = final_results;
141+
142+ std::vector<float > pcm32 (audioBuffer.size ());
143+ std::transform (audioBuffer.begin (), audioBuffer.end (), pcm32.begin (), [](int16_t sample) {
144+ return static_cast <float >(sample) / 32768 .0f ;
145+ });
146+ audioBuffer.clear ();
147+ // 如果开启了VAD
148+ bool isOk;
149+ if (params.audio .use_vad ) {
150+ printf (" %s: vad: %d \n " , get_current_time ().c_str (), params.audio .use_vad );
151+ // TODO: 实现VAD处理,
152+ // bool containsVoice = vad_simple(audioBuffer, WHISPER_SAMPLE_RATE, 1000, params.audio.vad_thold, params.audio.freq_thold, false);
153+ isOk=whisperService.process (pcm32.data (), pcm32.size ());
154+ } else {
155+ // asr
156+ isOk= whisperService.process (pcm32.data (), pcm32.size ());
157+ }
158+ if (isOk) {
159+ final_results = getResult (whisperService.ctx );
160+ response[" result" ] = final_results;
161+ }
148162 ws->send (response.dump (), uWS::OpCode::TEXT);
149163 }
150164 // other process logic...
151165 } catch (const std::exception &e) {
152166 std::cerr << " JSON parse error: " << e.what () << std::endl;
167+ auto size = message.size ();
153168 }
154169 } else if (opCode == uWS::OpCode::BINARY) {
155170 // process binary message(PCM16 data)
156- auto size = message.size ();
157171 std::basic_string_view<char , std::char_traits<char >>::const_pointer data = message.data ();
158172 printf (" %s: Received message size on /paddlespeech/asr/streaming: %zu\n " , get_current_time ().c_str (), size);
159173 // add received PCM16 to audio cache
@@ -172,32 +186,18 @@ int main(int argc, char **argv) {
172186 });
173187 audioBuffer.clear ();
174188 // 如果开启了VAD
175- bool isOk = false ;
189+ bool isOk;
176190 if (params.audio .use_vad ) {
177- printf (" %s: vad: %n \n " , get_current_time ().c_str (), params.audio .use_vad );
191+ printf (" %s: vad: %d \n " , get_current_time ().c_str (), params.audio .use_vad );
178192 // TODO: 实现VAD处理,
179193 // bool containsVoice = vad_simple(audioBuffer, WHISPER_SAMPLE_RATE, 1000, params.audio.vad_thold, params.audio.freq_thold, false);
180- isOk = whisperService.process (pcm32.data (), pcm32.size ());
194+ isOk= whisperService.process (pcm32.data (), pcm32.size ());
181195 } else {
182196 // asr
183- isOk = whisperService.process (pcm32.data (), pcm32.size ());
197+ isOk= whisperService.process (pcm32.data (), pcm32.size ());
184198 }
185199 if (isOk) {
186- const int n_segments = whisper_full_n_segments (whisperService.ctx );
187- nlohmann::json results = nlohmann::json (nlohmann::json::array ());
188- for (int i = 0 ; i < n_segments; ++i) {
189- nlohmann::json segment;
190- int64_t t0 = whisper_full_get_segment_t0 (whisperService.ctx , i);
191- int64_t t1 = whisper_full_get_segment_t1 (whisperService.ctx , i);
192- const char *sentence = whisper_full_get_segment_text (whisperService.ctx , i);
193- auto result = std::to_string (t0) + " -->" + std::to_string (t1) + " :" + sentence + " \n " ;
194- printf (" %s: result:%s\n " , get_current_time ().c_str (), result.c_str ());
195- segment[" t0" ] = t0;
196- segment[" t1" ] = t1;
197- segment[" sentence" ] = sentence;
198- results.push_back (segment);
199- }
200- final_results = results;
200+ final_results = getResult (whisperService.ctx );
201201 response[" result" ] = final_results;
202202 }
203203 }
@@ -215,7 +215,7 @@ int main(int argc, char **argv) {
215215 // only_save_audio
216216 .ws <std::string>(" /streaming/save" , {.open =[](auto *ws){
217217 // 初始化用户数据
218- std::string * userData = (std::string*)ws->getUserData ();
218+ auto * userData = (std::string*)ws->getUserData ();
219219 *userData = " Create User Id" ; // 设置初始值
220220 },.message = ws_save_handler})
221221 // streaming asr
@@ -224,7 +224,34 @@ int main(int argc, char **argv) {
224224 .listen (port, started_handler).run ();
225225}
226226
227- std::vector<float > extract_first_voice_segment (std::vector<float > vector1) {
228- return std::vector<float >();
227+ bool processAudio (WhisperService whisperService, std::vector<float > pcm32, const whisper_local_stream_params& params) {
228+ if (params.audio .use_vad ) {
229+ printf (" %s: vad: %d \n " , get_current_time ().c_str (), params.audio .use_vad );
230+ // TODO: 实现VAD处理,
231+ // bool containsVoice = vad_simple(audioBuffer, WHISPER_SAMPLE_RATE, 1000, params.audio.vad_thold, params.audio.freq_thold, false);
232+ return whisperService.process (pcm32.data (), pcm32.size ());
233+ } else {
234+ // asr
235+ return whisperService.process (pcm32.data (), pcm32.size ());
236+ }
229237}
230238
239+ nlohmann::json getResult (whisper_context *ctx) {
240+ nlohmann::json results = nlohmann::json (nlohmann::json::array ());
241+ const int n_segments = whisper_full_n_segments (ctx);
242+ for (int i = 0 ; i < n_segments; ++i) {
243+ nlohmann::json segment;
244+ int64_t t0 = whisper_full_get_segment_t0 (ctx, i);
245+ int64_t t1 = whisper_full_get_segment_t1 (ctx, i);
246+ const char *sentence = whisper_full_get_segment_text (ctx, i);
247+ auto result = std::to_string (t0) + " -->" + std::to_string (t1) + " :" + sentence + " \n " ;
248+ printf (" %s: result:%s\n " , get_current_time ().c_str (), result.c_str ());
249+ segment[" t0" ] = t0;
250+ segment[" t1" ] = t1;
251+ segment[" sentence" ] = sentence;
252+ results.push_back (segment);
253+ }
254+ return results;
255+ }
256+
257+
0 commit comments