@@ -24,24 +24,18 @@ cpp::result<void, InferResult> InferenceService::HandleChatCompletion(
2424 return cpp::fail (std::make_pair (stt, res));
2525 }
2626
27+ auto cb = [q, tool_choice](Json::Value status, Json::Value res) {
28+ if (!tool_choice.isNull ()) {
29+ res[" tool_choice" ] = tool_choice;
30+ }
31+ q->push (std::make_pair (status, res));
32+ };
2733 if (std::holds_alternative<EngineI*>(engine_result.value ())) {
2834 std::get<EngineI*>(engine_result.value ())
29- ->HandleChatCompletion (
30- json_body, [q, tool_choice](Json::Value status, Json::Value res) {
31- if (!tool_choice.isNull ()) {
32- res[" tool_choice" ] = tool_choice;
33- }
34- q->push (std::make_pair (status, res));
35- });
35+ ->HandleChatCompletion (json_body, std::move (cb));
3636 } else {
3737 std::get<RemoteEngineI*>(engine_result.value ())
38- ->HandleChatCompletion (
39- json_body, [q, tool_choice](Json::Value status, Json::Value res) {
40- if (!tool_choice.isNull ()) {
41- res[" tool_choice" ] = tool_choice;
42- }
43- q->push (std::make_pair (status, res));
44- });
38+ ->HandleChatCompletion (json_body, std::move (cb));
4539 }
4640
4741 return {};
@@ -66,16 +60,15 @@ cpp::result<void, InferResult> InferenceService::HandleEmbedding(
6660 return cpp::fail (std::make_pair (stt, res));
6761 }
6862
63+ auto cb = [q](Json::Value status, Json::Value res) {
64+ q->push (std::make_pair (status, res));
65+ };
6966 if (std::holds_alternative<EngineI*>(engine_result.value ())) {
7067 std::get<EngineI*>(engine_result.value ())
71- ->HandleEmbedding (json_body, [q](Json::Value status, Json::Value res) {
72- q->push (std::make_pair (status, res));
73- });
68+ ->HandleEmbedding (json_body, std::move (cb));
7469 } else {
7570 std::get<RemoteEngineI*>(engine_result.value ())
76- ->HandleEmbedding (json_body, [q](Json::Value status, Json::Value res) {
77- q->push (std::make_pair (status, res));
78- });
71+ ->HandleEmbedding (json_body, std::move (cb));
7972 }
8073 return {};
8174}
@@ -104,18 +97,16 @@ InferResult InferenceService::LoadModel(
10497 // might need mutex here
10598 auto engine_result = engine_service_->GetLoadedEngine (engine_type);
10699
100+ auto cb = [&stt, &r](Json::Value status, Json::Value res) {
101+ stt = status;
102+ r = res;
103+ };
107104 if (std::holds_alternative<EngineI*>(engine_result.value ())) {
108105 std::get<EngineI*>(engine_result.value ())
109- ->LoadModel (json_body, [&stt, &r](Json::Value status, Json::Value res) {
110- stt = status;
111- r = res;
112- });
106+ ->LoadModel (json_body, std::move (cb));
113107 } else {
114108 std::get<RemoteEngineI*>(engine_result.value ())
115- ->LoadModel (json_body, [&stt, &r](Json::Value status, Json::Value res) {
116- stt = status;
117- r = res;
118- });
109+ ->LoadModel (json_body, std::move (cb));
119110 }
120111 return std::make_pair (stt, r);
121112}
@@ -139,20 +130,16 @@ InferResult InferenceService::UnloadModel(const std::string& engine_name,
139130 json_body[" model" ] = model_id;
140131
141132 LOG_TRACE << " Start unload model" ;
133+ auto cb = [&r, &stt](Json::Value status, Json::Value res) {
134+ stt = status;
135+ r = res;
136+ };
142137 if (std::holds_alternative<EngineI*>(engine_result.value ())) {
143138 std::get<EngineI*>(engine_result.value ())
144- ->UnloadModel (std::make_shared<Json::Value>(json_body),
145- [&r, &stt](Json::Value status, Json::Value res) {
146- stt = status;
147- r = res;
148- });
139+ ->UnloadModel (std::make_shared<Json::Value>(json_body), std::move (cb));
149140 } else {
150141 std::get<RemoteEngineI*>(engine_result.value ())
151- ->UnloadModel (std::make_shared<Json::Value>(json_body),
152- [&r, &stt](Json::Value status, Json::Value res) {
153- stt = status;
154- r = res;
155- });
142+ ->UnloadModel (std::make_shared<Json::Value>(json_body), std::move (cb));
156143 }
157144
158145 return std::make_pair (stt, r);
@@ -181,20 +168,16 @@ InferResult InferenceService::GetModelStatus(
181168
182169 LOG_TRACE << " Start to get model status" ;
183170
171+ auto cb = [&stt, &r](Json::Value status, Json::Value res) {
172+ stt = status;
173+ r = res;
174+ };
184175 if (std::holds_alternative<EngineI*>(engine_result.value ())) {
185176 std::get<EngineI*>(engine_result.value ())
186- ->GetModelStatus (json_body,
187- [&stt, &r](Json::Value status, Json::Value res) {
188- stt = status;
189- r = res;
190- });
177+ ->GetModelStatus (json_body, std::move (cb));
191178 } else {
192179 std::get<RemoteEngineI*>(engine_result.value ())
193- ->GetModelStatus (json_body,
194- [&stt, &r](Json::Value status, Json::Value res) {
195- stt = status;
196- r = res;
197- });
180+ ->GetModelStatus (json_body, std::move (cb));
198181 }
199182
200183 return std::make_pair (stt, r);
@@ -214,15 +197,20 @@ InferResult InferenceService::GetModels(
214197
215198 LOG_TRACE << " Start to get models" ;
216199 Json::Value resp_data (Json::arrayValue);
200+ auto cb = [&resp_data](Json::Value status, Json::Value res) {
201+ for (auto r : res[" data" ]) {
202+ resp_data.append (r);
203+ }
204+ };
217205 for (const auto & loaded_engine : loaded_engines) {
218- auto e = std::get <EngineI*>(loaded_engine);
219- if (e-> IsSupported ( " GetModels " )) {
220- e->GetModels (json_body,
221- [&resp_data](Json::Value status, Json::Value res) {
222- for ( auto r : res[ " data " ]) {
223- resp_data. append (r);
224- }
225- } );
206+ if ( std::holds_alternative <EngineI*>(loaded_engine)) {
207+ auto e = std::get<EngineI*>(loaded_engine);
208+ if ( e->IsSupported ( " GetModels " )) {
209+ e-> GetModels (json_body, std::move (cb));
210+ }
211+ } else {
212+ std::get<RemoteEngineI*>(loaded_engine)
213+ -> GetModels (json_body, std::move (cb) );
226214 }
227215 }
228216
@@ -283,6 +271,25 @@ InferResult InferenceService::FineTuning(
283271 return std::make_pair (stt, r);
284272}
285273
274+ bool InferenceService::StopInferencing (const std::string& engine_name,
275+ const std::string& model_id) {
276+ CTL_DBG (" Stop inferencing" );
277+ auto engine_result = engine_service_->GetLoadedEngine (engine_name);
278+ if (engine_result.has_error ()) {
279+ LOG_WARN << " Engine is not loaded yet" ;
280+ return false ;
281+ }
282+
283+ if (std::holds_alternative<EngineI*>(engine_result.value ())) {
284+ auto engine = std::get<EngineI*>(engine_result.value ());
285+ if (engine->IsSupported (" StopInferencing" )) {
286+ engine->StopInferencing (model_id);
287+ CTL_INF (" Stopped inferencing" );
288+ }
289+ }
290+ return true ;
291+ }
292+
286293bool InferenceService::HasFieldInReq (std::shared_ptr<Json::Value> json_body,
287294 const std::string& field) {
288295 if (!json_body || (*json_body)[field].isNull ()) {
0 commit comments