Skip to content

Commit 4b00537

Browse files
author
Anoop Kapoor
committed
@FIR-1001 - GGML: Tsavorite Performance OPs data
1 parent c806b46 commit 4b00537

File tree

4 files changed

+33
-18
lines changed

4 files changed

+33
-18
lines changed

ggml/include/ggml.h

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -343,13 +343,13 @@ extern "C" {
343343
GGML_NORETURN GGML_ATTRIBUTE_FORMAT(3, 4)
344344
GGML_API void ggml_abort(const char * file, int line, const char * fmt, ...);
345345

346-
#ifdef GGML_PERF
346+
#if defined(GGML_PERF) || defined(GGML_PERF_DETAIL)
347347
enum ggml_compute_backend_type {
348348
GGML_COMPUTE_BACKEND_CPU=0,
349349
GGML_COMPUTE_BACKEND_TSAVORITE,
350350
GGML_COMPUTE_BACKEND_COUNT
351351
};
352-
#endif /* GGML_PERF */
352+
#endif /* GGML_PERF || GGML_PERF_DETAIL */
353353

354354
enum ggml_status {
355355
GGML_STATUS_ALLOC_FAILED = -2,
@@ -659,14 +659,15 @@ extern "C" {
659659
char name[GGML_MAX_NAME];
660660

661661
void * extra; // extra things e.g. for ggml-cuda.cu
662-
#ifdef GGML_PERF
662+
663+
#if defined(GGML_PERF) || defined(GGML_PERF_DETAIL)
663664
int64_t perf_runs;
664665
int64_t perf_time_us;
665666
enum ggml_compute_backend_type ggml_compute_backend;
666667
char padding[4];
667668
#else
668669
char padding[8];
669-
#endif /* GGML_PERF */
670+
#endif /* GGML_PERF || GGML_PERF_DETAIL */
670671
};
671672

672673
static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);
@@ -2556,7 +2557,7 @@ extern "C" {
25562557
GGML_API void ggml_threadpool_params_init (struct ggml_threadpool_params * p, int n_threads);
25572558
GGML_API bool ggml_threadpool_params_match (const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1);
25582559

2559-
#ifdef GGML_PERF
2560+
#if defined(GGML_PERF) || defined(GGML_PERF_DETAIL)
25602561
struct ggml_perf_backend_subtotals {
25612562
int64_t total_us;
25622563
int64_t runs;
@@ -2586,7 +2587,7 @@ void ggml_perf_write_detailed_csv(struct ggml_cgraph * cgraph, FILE *fp);
25862587
void ggml_perf_accumulate(struct ggml_perf_totals totals[GGML_OP_COUNT], struct ggml_cgraph * cgraph);
25872588
const char * ggml_backend_type(enum ggml_compute_backend_type backend);
25882589

2589-
#endif /* GGML_PERF */
2590+
#endif /* GGML_PERF || GGML_PERF_DETAIL */
25902591

25912592
#ifdef __cplusplus
25922593
}

ggml/src/ggml-tsavorite/ggml-tsavorite.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -929,9 +929,9 @@ static enum ggml_status ggml_tsavorite_graph_compute(ggml_backend_t backend,
929929

930930
for (int i = 0; i < cgraph->n_nodes; i++) {
931931
int32_t kernel_sub_type=-1;
932-
#ifdef GGML_PERF
932+
#if defined(GGML_PERF) || defined(GGML_PERF_DETAIL)
933933
int64_t t_start = ggml_time_us();
934-
#endif
934+
#endif /* GGML_PERF || GGML_PERF_DETAIL */
935935
node = cgraph->nodes[i];
936936
src0 = node->src[0];
937937
src1 = node->src[1];
@@ -1279,7 +1279,7 @@ static enum ggml_status ggml_tsavorite_graph_compute(ggml_backend_t backend,
12791279
device->stats.op_run_count[kernel_type].max_num_of_elem < max_num_of_elem)
12801280
device->stats.op_run_count[kernel_type].max_num_of_elem = max_num_of_elem;
12811281
}
1282-
#ifdef GGML_PERF
1282+
#if defined(GGML_PERF) || defined(GGML_PERF_DETAIL)
12831283
int64_t t_end = ggml_time_us();
12841284
node->perf_runs++;
12851285
node->ggml_compute_backend = GGML_COMPUTE_BACKEND_TSAVORITE;
@@ -1289,7 +1289,7 @@ static enum ggml_status ggml_tsavorite_graph_compute(ggml_backend_t backend,
12891289
// Handle wraparound by assuming timer rolls over at max int64_t value
12901290
node->perf_time_us += (INT64_MAX - t_start + t_end + 1);
12911291
}
1292-
#endif
1292+
#endif /* GGML_PERF || GGML_PERF_DETAIL */
12931293
}
12941294

12951295
// This this need to implement correctly when we have mixture of CPU and accelerator operation

ggml/src/ggml.c

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1020,12 +1020,12 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
10201020
"GLU",
10211021
};
10221022

1023-
#ifdef GGML_PERF
1023+
#if defined(GGML_PERF) || defined(GGML_PERF_DETAIL)
10241024
static const char * GGML_BACKEND_TYPE[GGML_COMPUTE_BACKEND_COUNT] = {
10251025
"CPU",
10261026
"OPU"
10271027
};
1028-
#endif /* GGML_PERF */
1028+
#endif /* GGML_PERF || GGML_PERF_DETAIL */
10291029

10301030
static_assert(GGML_OP_COUNT == 90, "GGML_OP_COUNT != 90");
10311031

@@ -1262,11 +1262,11 @@ const char * ggml_op_name(enum ggml_op op) {
12621262
return GGML_OP_NAME[op];
12631263
}
12641264

1265-
#ifdef GGML_PERF
1265+
#if defined(GGML_PERF) || defined(GGML_PERF_DETAIL)
12661266
const char * ggml_backend_type(enum ggml_compute_backend_type backend) {
12671267
return GGML_BACKEND_TYPE[backend];
12681268
}
1269-
#endif /* GGML_PERF */
1269+
#endif /* GGML_PERF || GGML_PERF_DETAIL */
12701270

12711271
const char * ggml_op_symbol(enum ggml_op op) {
12721272
return GGML_OP_SYMBOL[op];
@@ -1692,11 +1692,11 @@ static struct ggml_tensor * ggml_new_tensor_impl(
16921692
/*.data =*/ obj_alloc_size > 0 ? (void *)(result + 1) : data,
16931693
/*.name =*/ { 0 },
16941694
/*.extra =*/ NULL,
1695-
#ifdef GGML_PERF
1695+
#if defined(GGML_PERF) || defined(GGML_PERF_DETAIL)
16961696
/*.perf_runs =*/ 0,
16971697
/*.perf_time_us =*/ 0,
16981698
/*.ggml_compute_backend =*/ GGML_COMPUTE_BACKEND_CPU,
1699-
#endif /* GGML_PERF */
1699+
#endif /* GGML_PERF || GGML_PERF_DETAIL */
17001700
/*.padding =*/ { 0 },
17011701
};
17021702

@@ -7231,7 +7231,7 @@ bool ggml_threadpool_params_match(const struct ggml_threadpool_params * p0, cons
72317231
return memcmp(p0->cpumask, p1->cpumask, GGML_MAX_N_THREADS) == 0;
72327232
}
72337233

7234-
#ifdef GGML_PERF
7234+
#if defined(GGML_PERF) || defined(GGML_PERF_DETAIL)
72357235
void ggml_perf_accumulate(struct ggml_perf_totals totals[GGML_OP_COUNT], struct ggml_cgraph * cgraph) {
72367236
for (int i = 0; i < cgraph->n_nodes; ++i) {
72377237
struct ggml_tensor * node = cgraph->nodes[i];
@@ -7258,7 +7258,9 @@ void ggml_perf_accumulate(struct ggml_perf_totals totals[GGML_OP_COUNT], struct
72587258
}
72597259
}
72607260
}
7261+
#endif /* GGML_PERF || GGML_PERF_DETAIL */
72617262

7263+
#if defined(GGML_PERF_DETAIL)
72627264
FILE * ggml_perf_log_open(const char *filename) {
72637265
// Try to delete existing file, ignore error if it doesn't exist
72647266
remove(filename);
@@ -7326,4 +7328,4 @@ void ggml_perf_write_detailed_csv(struct ggml_cgraph * cgraph, FILE *fp) {
73267328

73277329
fprintf(fp, "--------------------------------------------------------------------------------------------------------\n\n");
73287330
}
7329-
#endif /* GGML_PERF */
7331+
#endif /* GGML_PERF_DETAIL */

src/llama-context.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1090,6 +1090,18 @@ int llama_context::decode(const llama_batch & batch_inp) {
10901090
ggml_status status;
10911091
const auto * res = process_ubatch(ubatch, LLM_GRAPH_TYPE_DECODER, mctx.get(), status);
10921092

1093+
#ifdef GGML_PERF
1094+
ggml_perf_accumulate(perf_totals, res->get_gf());
1095+
#endif /* GGML_PERF */
1096+
1097+
#ifdef GGML_PERF_DETAIL
1098+
if (perf_all_shape_fp) {
1099+
ggml_perf_write_detailed_csv(res->get_gf(), perf_all_shape_fp);
1100+
}
1101+
ggml_perf_accumulate(perf_totals, res->get_gf());
1102+
#endif /* GGML_PERF_DETAI */
1103+
1104+
10931105
if (!res) {
10941106
// the last ubatch failed or was aborted -> remove all positions of that ubatch from the memory module
10951107
llama_pos pos_min[LLAMA_MAX_SEQ];

0 commit comments

Comments
 (0)