1414#include < sstream>
1515#include < string>
1616#include < vector>
17- #include < ggml-tsavorite .h>
17+ #include < ggml.h>
1818
1919#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
2020#include < signal.h>
@@ -94,6 +94,7 @@ int main(int argc, char ** argv) {
9494 common_params params;
9595 g_params = ¶ms;
9696 if (!common_params_parse (argc, argv, params, LLAMA_EXAMPLE_MAIN, print_usage)) {
97+ ggml_backend_cleanup ();
9798 return 1 ;
9899 }
99100
@@ -110,6 +111,7 @@ int main(int argc, char ** argv) {
110111 LOG_ERR (" ************\n " );
111112 LOG_ERR (" %s: please use the 'embedding' tool for embedding calculations\n " , __func__);
112113 LOG_ERR (" ************\n\n " );
114+ ggml_backend_cleanup ();
113115
114116 return 0 ;
115117 }
@@ -154,7 +156,8 @@ int main(int argc, char ** argv) {
154156
155157 if (model == NULL ) {
156158 LOG_ERR (" %s: error: unable to load model\n " , __func__);
157- ggml_tsi_finalize ();
159+ printf (" \n Unable to load Model\n " );
160+ ggml_backend_cleanup ();
158161 return 1 ;
159162 }
160163
@@ -168,6 +171,7 @@ int main(int argc, char ** argv) {
168171 auto * cpu_dev = ggml_backend_dev_by_type (GGML_BACKEND_DEVICE_TYPE_CPU);
169172 if (!cpu_dev) {
170173 LOG_ERR (" %s: no CPU backend found\n " , __func__);
174+ ggml_backend_cleanup ();
171175 return 1 ;
172176 }
173177 auto * reg = ggml_backend_dev_backend_reg (cpu_dev);
@@ -186,6 +190,7 @@ int main(int argc, char ** argv) {
186190 threadpool_batch = ggml_threadpool_new_fn (&tpp_batch);
187191 if (!threadpool_batch) {
188192 LOG_ERR (" %s: batch threadpool create failed : n_threads %d\n " , __func__, tpp_batch.n_threads );
193+ ggml_backend_cleanup ();
189194 return 1 ;
190195 }
191196
@@ -196,6 +201,7 @@ int main(int argc, char ** argv) {
196201 struct ggml_threadpool * threadpool = ggml_threadpool_new_fn (&tpp);
197202 if (!threadpool) {
198203 LOG_ERR (" %s: threadpool create failed : n_threads %d\n " , __func__, tpp.n_threads );
204+ ggml_backend_cleanup ();
199205 return 1 ;
200206 }
201207
@@ -259,6 +265,7 @@ int main(int argc, char ** argv) {
259265 size_t n_token_count_out = 0 ;
260266 if (!llama_state_load_file (ctx, path_session.c_str (), session_tokens.data (), session_tokens.capacity (), &n_token_count_out)) {
261267 LOG_ERR (" %s: failed to load session file '%s'\n " , __func__, path_session.c_str ());
268+ ggml_backend_cleanup ();
262269 return 1 ;
263270 }
264271 session_tokens.resize (n_token_count_out);
@@ -283,6 +290,7 @@ int main(int argc, char ** argv) {
283290 auto formatted = common_chat_format_single (chat_templates.get (), chat_msgs, new_msg, role == " user" , g_params->use_jinja );
284291 chat_msgs.push_back (new_msg);
285292 LOG_DBG (" formatted: '%s'\n " , formatted.c_str ());
293+ ggml_backend_cleanup ();
286294 return formatted;
287295 };
288296
@@ -333,13 +341,15 @@ int main(int argc, char ** argv) {
333341 LOG_WRN (" embd_inp was considered empty and bos was added: %s\n " , string_from (ctx, embd_inp).c_str ());
334342 } else {
335343 LOG_ERR (" input is empty\n " );
344+ ggml_backend_cleanup ();
336345 return -1 ;
337346 }
338347 }
339348
340349 // Tokenize negative prompt
341350 if ((int ) embd_inp.size () > n_ctx - 4 ) {
342351 LOG_ERR (" %s: prompt is too long (%d tokens, max %d)\n " , __func__, (int ) embd_inp.size (), n_ctx - 4 );
352+ ggml_backend_cleanup ();
343353 return 1 ;
344354 }
345355
@@ -427,6 +437,7 @@ int main(int argc, char ** argv) {
427437 sigaction (SIGINT, &sigint_action, NULL );
428438#elif defined (_WIN32)
429439 auto console_ctrl_handler = +[](DWORD ctrl_type) -> BOOL {
440+ ggml_backend_cleanup ();
430441 return (ctrl_type == CTRL_C_EVENT) ? (sigint_handler (SIGINT), true ) : false ;
431442 };
432443 SetConsoleCtrlHandler (reinterpret_cast <PHANDLER_ROUTINE>(console_ctrl_handler), true );
@@ -476,6 +487,7 @@ int main(int argc, char ** argv) {
476487 smpl = common_sampler_init (model, sparams);
477488 if (!smpl) {
478489 LOG_ERR (" %s: failed to initialize sampling subsystem\n " , __func__);
490+ ggml_backend_cleanup ();
479491 return 1 ;
480492 }
481493
@@ -561,6 +573,7 @@ int main(int argc, char ** argv) {
561573
562574 if (llama_encode (ctx, llama_batch_get_one (enc_input_buf, enc_input_size))) {
563575 LOG_ERR (" %s : failed to eval\n " , __func__);
576+ ggml_backend_cleanup ();
564577 return 1 ;
565578 }
566579
0 commit comments