Skip to content

Commit 3245eca

Browse files
authored
Merge pull request #68 from tsisw/llama-cli-error-cases
@FIR1037 - Llama.cpp: Error Cases when Model is wrong or other error …
2 parents d1d0a11 + dc034f0 commit 3245eca

File tree

6 files changed

+46
-20
lines changed

6 files changed

+46
-20
lines changed

ggml/include/ggml-tsavorite.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -214,8 +214,8 @@ extern void ggml_tsi_log_tensor_data(tensor_log log_data);
214214
// GGML supports tensors with a maximum rank of 4
215215
#define MEM_REF_DESCRIPTOR_RANK 4
216216
#define TSI_TVU_MEM_ALIGN 128
217-
void
218-
ggml_tsi_finalize();
217+
218+
void tsi_cleanup();
219219

220220
//
221221
// backend API

ggml/include/ggml.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2591,6 +2591,7 @@ void ggml_perf_accumulate(struct ggml_perf_totals totals[GGML_OP_COUNT], struct
25912591
const char * ggml_backend_type(enum ggml_compute_backend_type backend);
25922592

25932593
#endif /* GML_PERF-related flags */
2594+
void ggml_backend_cleanup();
25942595

25952596
#ifdef __cplusplus
25962597
}

ggml/src/ggml-backend.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@
2727
#include <sys/sysctl.h>
2828
#endif
2929

30+
#ifdef GGML_TSAVORITE
31+
#include "ggml-tsavorite.h"
32+
#endif /* GGML_TSAVORITE */
33+
3034

3135
// backend buffer type
3236

@@ -2209,3 +2213,11 @@ ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size)
22092213
GGML_ASSERT((uintptr_t)ptr % TENSOR_ALIGNMENT == 0 && "buffer pointer must be aligned");
22102214
return ggml_backend_buffer_init(ggml_backend_cpu_buffer_from_ptr_type(), ggml_backend_cpu_buffer_from_ptr_i, ptr, size);
22112215
}
2216+
2217+
void ggml_backend_cleanup()
2218+
{
2219+
#ifdef GGML_TSAVORITE
2220+
tsi_cleanup();
2221+
#endif /* GGML_TSAVORITE */
2222+
return;
2223+
}

ggml/src/ggml-tsavorite/ggml-tsavorite.cpp

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -730,19 +730,19 @@ static void ggml_tsavorite_free(struct ggml_backend_tsavorite_context *ctx) {
730730
}
731731

732732
void
733-
ggml_tsi_finalize() {
734-
if (runtime_initialized != true)
735-
return;
736-
tsi_finalize();
737-
GGML_TSAVORITE_LOG_INFO("Start %s\n", __func__);
738-
tsirt::utils::TSIProfiler::finalize();
739-
std::cout << "\nOPU Profiling Results:" << std::endl;
740-
std::cout << tsirt::utils::TSIProfiler::getFormattedResults(
741-
/*truncateFuncNames*/ true)
742-
<< std::endl;
743-
sleep(2);
744-
GGML_TSAVORITE_LOG_INFO("End %s\n", __func__);
745-
return;
733+
tsi_cleanup() {
734+
if (runtime_initialized != true)
735+
return;
736+
tsi_finalize();
737+
GGML_TSAVORITE_LOG_INFO("Start %s\n", __func__);
738+
tsirt::utils::TSIProfiler::finalize();
739+
std::cout << "\nOPU Profiling Results:" << std::endl;
740+
std::cout << tsirt::utils::TSIProfiler::getFormattedResults(
741+
/*truncateFuncNames*/ true)
742+
<< std::endl;
743+
sleep(2);
744+
GGML_TSAVORITE_LOG_INFO("End %s\n", __func__);
745+
return;
746746
}
747747

748748
#if 0

tools/main/main.cpp

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
#include <sstream>
1515
#include <string>
1616
#include <vector>
17-
#include <ggml-tsavorite.h>
17+
#include <ggml.h>
1818

1919
#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
2020
#include <signal.h>
@@ -94,6 +94,7 @@ int main(int argc, char ** argv) {
9494
common_params params;
9595
g_params = &params;
9696
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_MAIN, print_usage)) {
97+
ggml_backend_cleanup();
9798
return 1;
9899
}
99100

@@ -110,6 +111,7 @@ int main(int argc, char ** argv) {
110111
LOG_ERR("************\n");
111112
LOG_ERR("%s: please use the 'embedding' tool for embedding calculations\n", __func__);
112113
LOG_ERR("************\n\n");
114+
ggml_backend_cleanup();
113115

114116
return 0;
115117
}
@@ -154,7 +156,8 @@ int main(int argc, char ** argv) {
154156

155157
if (model == NULL) {
156158
LOG_ERR("%s: error: unable to load model\n", __func__);
157-
ggml_tsi_finalize();
159+
printf("\n Unable to load Model\n");
160+
ggml_backend_cleanup();
158161
return 1;
159162
}
160163

@@ -168,6 +171,7 @@ int main(int argc, char ** argv) {
168171
auto * cpu_dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
169172
if (!cpu_dev) {
170173
LOG_ERR("%s: no CPU backend found\n", __func__);
174+
ggml_backend_cleanup();
171175
return 1;
172176
}
173177
auto * reg = ggml_backend_dev_backend_reg(cpu_dev);
@@ -186,6 +190,7 @@ int main(int argc, char ** argv) {
186190
threadpool_batch = ggml_threadpool_new_fn(&tpp_batch);
187191
if (!threadpool_batch) {
188192
LOG_ERR("%s: batch threadpool create failed : n_threads %d\n", __func__, tpp_batch.n_threads);
193+
ggml_backend_cleanup();
189194
return 1;
190195
}
191196

@@ -196,6 +201,7 @@ int main(int argc, char ** argv) {
196201
struct ggml_threadpool * threadpool = ggml_threadpool_new_fn(&tpp);
197202
if (!threadpool) {
198203
LOG_ERR("%s: threadpool create failed : n_threads %d\n", __func__, tpp.n_threads);
204+
ggml_backend_cleanup();
199205
return 1;
200206
}
201207

@@ -259,6 +265,7 @@ int main(int argc, char ** argv) {
259265
size_t n_token_count_out = 0;
260266
if (!llama_state_load_file(ctx, path_session.c_str(), session_tokens.data(), session_tokens.capacity(), &n_token_count_out)) {
261267
LOG_ERR("%s: failed to load session file '%s'\n", __func__, path_session.c_str());
268+
ggml_backend_cleanup();
262269
return 1;
263270
}
264271
session_tokens.resize(n_token_count_out);
@@ -283,6 +290,7 @@ int main(int argc, char ** argv) {
283290
auto formatted = common_chat_format_single(chat_templates.get(), chat_msgs, new_msg, role == "user", g_params->use_jinja);
284291
chat_msgs.push_back(new_msg);
285292
LOG_DBG("formatted: '%s'\n", formatted.c_str());
293+
ggml_backend_cleanup();
286294
return formatted;
287295
};
288296

@@ -333,13 +341,15 @@ int main(int argc, char ** argv) {
333341
LOG_WRN("embd_inp was considered empty and bos was added: %s\n", string_from(ctx, embd_inp).c_str());
334342
} else {
335343
LOG_ERR("input is empty\n");
344+
ggml_backend_cleanup();
336345
return -1;
337346
}
338347
}
339348

340349
// Tokenize negative prompt
341350
if ((int) embd_inp.size() > n_ctx - 4) {
342351
LOG_ERR("%s: prompt is too long (%d tokens, max %d)\n", __func__, (int) embd_inp.size(), n_ctx - 4);
352+
ggml_backend_cleanup();
343353
return 1;
344354
}
345355

@@ -427,6 +437,7 @@ int main(int argc, char ** argv) {
427437
sigaction(SIGINT, &sigint_action, NULL);
428438
#elif defined (_WIN32)
429439
auto console_ctrl_handler = +[](DWORD ctrl_type) -> BOOL {
440+
ggml_backend_cleanup();
430441
return (ctrl_type == CTRL_C_EVENT) ? (sigint_handler(SIGINT), true) : false;
431442
};
432443
SetConsoleCtrlHandler(reinterpret_cast<PHANDLER_ROUTINE>(console_ctrl_handler), true);
@@ -476,6 +487,7 @@ int main(int argc, char ** argv) {
476487
smpl = common_sampler_init(model, sparams);
477488
if (!smpl) {
478489
LOG_ERR("%s: failed to initialize sampling subsystem\n", __func__);
490+
ggml_backend_cleanup();
479491
return 1;
480492
}
481493

@@ -561,6 +573,7 @@ int main(int argc, char ** argv) {
561573

562574
if (llama_encode(ctx, llama_batch_get_one(enc_input_buf, enc_input_size))) {
563575
LOG_ERR("%s : failed to eval\n", __func__);
576+
ggml_backend_cleanup();
564577
return 1;
565578
}
566579

tsi-pkg-build.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,11 @@ cd ../../
3838
echo 'building llama.cp, ggml for tsavorite and other binary for posix'
3939
if [ "$(echo "$1" | tr '[:upper:]' '[:lower:]')" = "release" ];
4040
then
41-
cmake -B build-posix -DGGML_TSAVORITE=ON -DGGML_TSAVORITE_TARGET=posix -DCMAKE_C_FLAGS="-DGGML_PERF_RELEASE -DGGML_TARGET_POSIX" -DCMAKE_CXX_FLAGS="-DGGML_PERF_RELEASE -DGGML_TARGET_POSIX"
41+
cmake -B build-posix -DGGML_TSAVORITE=ON -DGGML_TSAVORITE_TARGET=posix -DCMAKE_C_FLAGS="-DGGML_PERF_RELEASE -DGGML_TARGET_POSIX -DGGML_TSAVORITE" -DCMAKE_CXX_FLAGS="-DGGML_PERF_RELEASE -DGGML_TARGET_POSIX -DGGML_TSAVORITE"
4242
elif [ "$(echo "$1" | tr '[:upper:]' '[:lower:]')" = "debug" ]; then
43-
cmake -B build-posix -DGGML_TSAVORITE=ON -DGGML_TSAVORITE_TARGET=posix -DCMAKE_C_FLAGS="-DGGML_PERF_DETAIL -DGGML_TARGET_POSIX" -DCMAKE_CXX_FLAGS="-DGGML_PERF_DETAIL -DGGML_TARGET_POSIX"
43+
cmake -B build-posix -DGGML_TSAVORITE=ON -DGGML_TSAVORITE_TARGET=posix -DCMAKE_C_FLAGS="-DGGML_PERF_DETAIL -DGGML_TARGET_POSIX -DGGML_TSAVORITE" -DCMAKE_CXX_FLAGS="-DGGML_PERF_DETAIL -DGGML_TARGET_POSIX -DGGML_TSAVORITE"
4444
else
45-
cmake -B build-posix -DGGML_TSAVORITE=ON -DGGML_TSAVORITE_TARGET=posix -DCMAKE_C_FLAGS="-DGGML_PERF -DGGML_TARGET_POSIX" -DCMAKE_CXX_FLAGS="-DGGML_PERF -DGGML_TARGET_POSIX"
45+
cmake -B build-posix -DGGML_TSAVORITE=ON -DGGML_TSAVORITE_TARGET=posix -DCMAKE_C_FLAGS="-DGGML_PERF -DGGML_TARGET_POSIX -DGGML_TSAVORITE" -DCMAKE_CXX_FLAGS="-DGGML_PERF -DGGML_TARGET_POSIX -DGGML_TSAVORITE"
4646
fi
4747

4848
cmake --build build-posix --config Release

0 commit comments

Comments
 (0)