Merge pull request #68 from tsisw/llama-cli-error-cases

akapoor3518 · web-flow · commit 3245eca40516 · 2025-10-22T12:43:30.000-07:00
@FIR1037 - Llama.cpp: Error Cases when Model is wrong or other error …
diff --git a/ggml/include/ggml-tsavorite.h b/ggml/include/ggml-tsavorite.h
@@ -214,8 +214,8 @@ extern void ggml_tsi_log_tensor_data(tensor_log log_data);
 // GGML supports tensors with a maximum rank of 4
 #define MEM_REF_DESCRIPTOR_RANK 4
 #define TSI_TVU_MEM_ALIGN 128
-void
-ggml_tsi_finalize();
+
+void tsi_cleanup();
 
 //
 // backend API
diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h
@@ -2591,6 +2591,7 @@ void ggml_perf_accumulate(struct ggml_perf_totals totals[GGML_OP_COUNT], struct
 const char * ggml_backend_type(enum ggml_compute_backend_type backend);
 
 #endif /* GML_PERF-related flags */
+void ggml_backend_cleanup();
 
 #ifdef  __cplusplus
 }
diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp
@@ -27,6 +27,10 @@
 #include <sys/sysctl.h>
 #endif
 
+#ifdef GGML_TSAVORITE
+#include "ggml-tsavorite.h"
+#endif /* GGML_TSAVORITE */
+
 
 // backend buffer type
 
@@ -2209,3 +2213,11 @@ ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size)
     GGML_ASSERT((uintptr_t)ptr % TENSOR_ALIGNMENT == 0 && "buffer pointer must be aligned");
     return ggml_backend_buffer_init(ggml_backend_cpu_buffer_from_ptr_type(), ggml_backend_cpu_buffer_from_ptr_i, ptr, size);
 }
+
+void ggml_backend_cleanup()
+{
+    #ifdef GGML_TSAVORITE
+        tsi_cleanup();
+    #endif /* GGML_TSAVORITE */
+    return;
+}
diff --git a/ggml/src/ggml-tsavorite/ggml-tsavorite.cpp b/ggml/src/ggml-tsavorite/ggml-tsavorite.cpp
@@ -730,19 +730,19 @@ static void ggml_tsavorite_free(struct ggml_backend_tsavorite_context *ctx) {
 }
 
 void
-ggml_tsi_finalize() {
-  if (runtime_initialized != true)
-      return;
-  tsi_finalize();
-  GGML_TSAVORITE_LOG_INFO("Start %s\n", __func__);
-  tsirt::utils::TSIProfiler::finalize();
-  std::cout << "\nOPU Profiling Results:" << std::endl;
-  std::cout << tsirt::utils::TSIProfiler::getFormattedResults(
-                   /*truncateFuncNames*/ true)
-            << std::endl;
-  sleep(2);
-  GGML_TSAVORITE_LOG_INFO("End %s\n", __func__);
-  return;
+tsi_cleanup() {
+    if (runtime_initialized != true)
+        return;
+    tsi_finalize();
+    GGML_TSAVORITE_LOG_INFO("Start %s\n", __func__);
+    tsirt::utils::TSIProfiler::finalize();
+    std::cout << "\nOPU Profiling Results:" << std::endl;
+    std::cout << tsirt::utils::TSIProfiler::getFormattedResults(
+                 /*truncateFuncNames*/ true)
+              << std::endl;
+    sleep(2);
+    GGML_TSAVORITE_LOG_INFO("End %s\n", __func__);
+    return;
 }
 
 #if 0
diff --git a/tools/main/main.cpp b/tools/main/main.cpp
@@ -14,7 +14,7 @@
 #include <sstream>
 #include <string>
 #include <vector>
-#include <ggml-tsavorite.h>
+#include <ggml.h>
 
 #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
 #include <signal.h>
@@ -94,6 +94,7 @@ int main(int argc, char ** argv) {
     common_params params;
     g_params = &params;
     if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_MAIN, print_usage)) {
+        ggml_backend_cleanup();
         return 1;
     }
 
@@ -110,6 +111,7 @@ int main(int argc, char ** argv) {
         LOG_ERR("************\n");
         LOG_ERR("%s: please use the 'embedding' tool for embedding calculations\n", __func__);
         LOG_ERR("************\n\n");
+        ggml_backend_cleanup();
 
         return 0;
     }
@@ -154,7 +156,8 @@ int main(int argc, char ** argv) {
 
     if (model == NULL) {
         LOG_ERR("%s: error: unable to load model\n", __func__);
-        ggml_tsi_finalize();
+	printf("\n Unable to load Model\n");
+        ggml_backend_cleanup();
         return 1;
     }
 
@@ -168,6 +171,7 @@ int main(int argc, char ** argv) {
     auto * cpu_dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
     if (!cpu_dev) {
         LOG_ERR("%s: no CPU backend found\n", __func__);
+        ggml_backend_cleanup();
         return 1;
     }
     auto * reg = ggml_backend_dev_backend_reg(cpu_dev);
@@ -186,6 +190,7 @@ int main(int argc, char ** argv) {
         threadpool_batch = ggml_threadpool_new_fn(&tpp_batch);
         if (!threadpool_batch) {
             LOG_ERR("%s: batch threadpool create failed : n_threads %d\n", __func__, tpp_batch.n_threads);
+            ggml_backend_cleanup();
             return 1;
         }
 
@@ -196,6 +201,7 @@ int main(int argc, char ** argv) {
     struct ggml_threadpool * threadpool = ggml_threadpool_new_fn(&tpp);
     if (!threadpool) {
         LOG_ERR("%s: threadpool create failed : n_threads %d\n", __func__, tpp.n_threads);
+        ggml_backend_cleanup();
         return 1;
     }
 
@@ -259,6 +265,7 @@ int main(int argc, char ** argv) {
             size_t n_token_count_out = 0;
             if (!llama_state_load_file(ctx, path_session.c_str(), session_tokens.data(), session_tokens.capacity(), &n_token_count_out)) {
                 LOG_ERR("%s: failed to load session file '%s'\n", __func__, path_session.c_str());
+                ggml_backend_cleanup();
                 return 1;
             }
             session_tokens.resize(n_token_count_out);
@@ -283,6 +290,7 @@ int main(int argc, char ** argv) {
         auto formatted = common_chat_format_single(chat_templates.get(), chat_msgs, new_msg, role == "user", g_params->use_jinja);
         chat_msgs.push_back(new_msg);
         LOG_DBG("formatted: '%s'\n", formatted.c_str());
+        ggml_backend_cleanup();
         return formatted;
     };
 
@@ -333,13 +341,15 @@ int main(int argc, char ** argv) {
             LOG_WRN("embd_inp was considered empty and bos was added: %s\n", string_from(ctx, embd_inp).c_str());
         } else {
             LOG_ERR("input is empty\n");
+            ggml_backend_cleanup();
             return -1;
         }
     }
 
     // Tokenize negative prompt
     if ((int) embd_inp.size() > n_ctx - 4) {
         LOG_ERR("%s: prompt is too long (%d tokens, max %d)\n", __func__, (int) embd_inp.size(), n_ctx - 4);
+        ggml_backend_cleanup();
         return 1;
     }
 
@@ -427,6 +437,7 @@ int main(int argc, char ** argv) {
         sigaction(SIGINT, &sigint_action, NULL);
 #elif defined (_WIN32)
         auto console_ctrl_handler = +[](DWORD ctrl_type) -> BOOL {
+            ggml_backend_cleanup();
             return (ctrl_type == CTRL_C_EVENT) ? (sigint_handler(SIGINT), true) : false;
         };
         SetConsoleCtrlHandler(reinterpret_cast<PHANDLER_ROUTINE>(console_ctrl_handler), true);
@@ -476,6 +487,7 @@ int main(int argc, char ** argv) {
     smpl = common_sampler_init(model, sparams);
     if (!smpl) {
         LOG_ERR("%s: failed to initialize sampling subsystem\n", __func__);
+        ggml_backend_cleanup();
         return 1;
     }
 
@@ -561,6 +573,7 @@ int main(int argc, char ** argv) {
 
         if (llama_encode(ctx, llama_batch_get_one(enc_input_buf, enc_input_size))) {
             LOG_ERR("%s : failed to eval\n", __func__);
+            ggml_backend_cleanup();
             return 1;
         }
 
diff --git a/tsi-pkg-build.sh b/tsi-pkg-build.sh
@@ -38,11 +38,11 @@ cd ../../
 echo 'building llama.cp, ggml for tsavorite  and other binary for posix'
 if [ "$(echo "$1" | tr '[:upper:]' '[:lower:]')" = "release" ];
 then
-  cmake -B build-posix -DGGML_TSAVORITE=ON -DGGML_TSAVORITE_TARGET=posix -DCMAKE_C_FLAGS="-DGGML_PERF_RELEASE -DGGML_TARGET_POSIX"   -DCMAKE_CXX_FLAGS="-DGGML_PERF_RELEASE -DGGML_TARGET_POSIX"
+  cmake -B build-posix -DGGML_TSAVORITE=ON -DGGML_TSAVORITE_TARGET=posix -DCMAKE_C_FLAGS="-DGGML_PERF_RELEASE -DGGML_TARGET_POSIX -DGGML_TSAVORITE"   -DCMAKE_CXX_FLAGS="-DGGML_PERF_RELEASE -DGGML_TARGET_POSIX -DGGML_TSAVORITE"
 elif [ "$(echo "$1" | tr '[:upper:]' '[:lower:]')" = "debug" ]; then
-  cmake -B build-posix -DGGML_TSAVORITE=ON -DGGML_TSAVORITE_TARGET=posix -DCMAKE_C_FLAGS="-DGGML_PERF_DETAIL -DGGML_TARGET_POSIX"   -DCMAKE_CXX_FLAGS="-DGGML_PERF_DETAIL -DGGML_TARGET_POSIX"
+  cmake -B build-posix -DGGML_TSAVORITE=ON -DGGML_TSAVORITE_TARGET=posix -DCMAKE_C_FLAGS="-DGGML_PERF_DETAIL -DGGML_TARGET_POSIX -DGGML_TSAVORITE"   -DCMAKE_CXX_FLAGS="-DGGML_PERF_DETAIL -DGGML_TARGET_POSIX -DGGML_TSAVORITE"
 else
-  cmake -B build-posix -DGGML_TSAVORITE=ON -DGGML_TSAVORITE_TARGET=posix -DCMAKE_C_FLAGS="-DGGML_PERF -DGGML_TARGET_POSIX"   -DCMAKE_CXX_FLAGS="-DGGML_PERF -DGGML_TARGET_POSIX"
+  cmake -B build-posix -DGGML_TSAVORITE=ON -DGGML_TSAVORITE_TARGET=posix -DCMAKE_C_FLAGS="-DGGML_PERF -DGGML_TARGET_POSIX -DGGML_TSAVORITE"   -DCMAKE_CXX_FLAGS="-DGGML_PERF -DGGML_TARGET_POSIX -DGGML_TSAVORITE"
 fi
 
 cmake --build build-posix --config Release

Original file line number	Diff line number	Diff line change
`@@ -2591,6 +2591,7 @@ void ggml_perf_accumulate(struct ggml_perf_totals totals[GGML_OP_COUNT], struct`
`2591`	`2591`	`const char * ggml_backend_type(enum ggml_compute_backend_type backend);`
`2592`	`2592`
`2593`	`2593`	`#endif /* GML_PERF-related flags */`
	`2594`	`+void ggml_backend_cleanup();`
`2594`	`2595`
`2595`	`2596`	`#ifdef __cplusplus`
`2596`	`2597`	`}`