Skip to content

Commit 4cef4e9

Browse files
committed
fine tune the itrace slice name
1 parent 0af283e commit 4cef4e9

File tree

1 file changed

+48
-3
lines changed

1 file changed

+48
-3
lines changed

ggml/src/ggml-hexagon/ggml-hexagon.cpp

Lines changed: 48 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,9 @@ void ggml_hexagon_session::enqueue(struct htp_general_req &req, struct dspqueue_
288288

289289
// Flush HTP response queue i.e wait for all outstanding requests to complete
290290
void ggml_hexagon_session::flush() {
291+
if (opt_trace) {
292+
itrace_start_section(g_itrace_cpu_profiler_handle, "session-flush", NULL);
293+
}
291294
dspqueue_t q = this->queue;
292295

293296
// Repeatedly read packets from the queue until it's empty. We don't
@@ -338,6 +341,10 @@ void ggml_hexagon_session::flush() {
338341

339342
this->op_pending--; // atomic dec
340343
}
344+
if (opt_trace) {
345+
itrace_end_section(g_itrace_cpu_profiler_handle, NULL);
346+
itrace_flush_logs(g_itrace_logger_handle);
347+
}
341348
}
342349

343350
// ** backend buffers
@@ -701,7 +708,7 @@ static void init_row_q4x4x2(block_q4_0 * x, int64_t k) {
701708
// repack q4_0 data into q4x4x2 tensor
702709
static void repack_q4_0_q4x4x2(ggml_tensor * t, const void * data, size_t size) {
703710
if (opt_trace) {
704-
itrace_start_section(g_itrace_cpu_profiler_handle, (std::string("ggml-hex-repack-q4_0-q4x4x2-") + t->name).c_str(), NULL);
711+
itrace_start_section(g_itrace_cpu_profiler_handle, (std::string("repack-q4_0-q4x4x2-") + t->name).c_str(), NULL);
705712
}
706713
int64_t nrows = ggml_nrows(t);
707714

@@ -2336,7 +2343,7 @@ static void hex_dump_dspbuf(const struct ggml_tensor * t, const dspqueue_buffer
23362343

23372344
static void ggml_hexagon_mul_mat(const struct ggml_tensor * op, uint32_t flags) {
23382345
if (opt_trace) {
2339-
itrace_start_section(g_itrace_cpu_profiler_handle, (std::string("ggml-hex-mul-mat-") + op->name).c_str(), NULL);
2346+
itrace_start_section(g_itrace_cpu_profiler_handle, (std::string("mul-mat-") + op->name).c_str(), NULL);
23402347
}
23412348
const struct ggml_tensor * src0 = op->src[0];
23422349
const struct ggml_tensor * src1 = op->src[1];
@@ -2412,6 +2419,9 @@ static void ggml_hexagon_mul_mat(const struct ggml_tensor * op, uint32_t flags)
24122419
}
24132420

24142421
static void ggml_hexagon_mul_mat_id(const struct ggml_tensor * op, uint32_t flags) {
2422+
if (opt_trace) {
2423+
itrace_start_section(g_itrace_cpu_profiler_handle, (std::string("mul-mat-id-") + op->name).c_str(), NULL);
2424+
}
24152425
const struct ggml_tensor * src0 = op->src[0];
24162426
const struct ggml_tensor * src1 = op->src[1];
24172427
const struct ggml_tensor * src2 = op->src[2];
@@ -2488,9 +2498,15 @@ static void ggml_hexagon_mul_mat_id(const struct ggml_tensor * op, uint32_t flag
24882498
(uint32_t) src2->ne[3], dst->name, (uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2],
24892499
(uint32_t) dst->ne[3], sess->prof_usecs, sess->prof_cycles, sess->prof_pkts,
24902500
(float) sess->prof_cycles / sess->prof_pkts, (unsigned long long) t2 - t1);
2501+
if (opt_trace) {
2502+
itrace_end_section(g_itrace_cpu_profiler_handle, NULL);
2503+
}
24912504
}
24922505

24932506
static void ggml_hexagon_binary(const struct ggml_tensor * op, uint32_t flags) {
2507+
if (opt_trace) {
2508+
itrace_start_section(g_itrace_cpu_profiler_handle, "binary", NULL);
2509+
}
24942510
const struct ggml_tensor * node = op;
24952511
const struct ggml_tensor * src0 = node->src[0];
24962512
const struct ggml_tensor * src1 = node->src[1];
@@ -2577,9 +2593,15 @@ static void ggml_hexagon_binary(const struct ggml_tensor * op, uint32_t flags) {
25772593
(uint32_t) src1->ne[2], (uint32_t) src1->ne[3], dst->name, (uint32_t) dst->ne[0], (uint32_t) dst->ne[1],
25782594
(uint32_t) dst->ne[2], (uint32_t) dst->ne[3], sess->prof_usecs, sess->prof_cycles, sess->prof_pkts,
25792595
(float) sess->prof_cycles / sess->prof_pkts, (unsigned long long) t2 - t1);
2596+
if (opt_trace) {
2597+
itrace_end_section(g_itrace_cpu_profiler_handle, NULL);
2598+
}
25802599
}
25812600

25822601
static void ggml_hexagon_add_id(const struct ggml_tensor * op, uint32_t flags) {
2602+
if (opt_trace) {
2603+
itrace_start_section(g_itrace_cpu_profiler_handle, (std::string("add-id-") + op->name).c_str(), NULL);
2604+
}
25832605
const struct ggml_tensor * node = op;
25842606
const struct ggml_tensor * src0 = node->src[0];
25852607
const struct ggml_tensor * src1 = node->src[1];
@@ -2652,9 +2674,15 @@ static void ggml_hexagon_add_id(const struct ggml_tensor * op, uint32_t flags) {
26522674
(uint32_t) src1->ne[2], (uint32_t) src1->ne[3], dst->name, (uint32_t) dst->ne[0], (uint32_t) dst->ne[1],
26532675
(uint32_t) dst->ne[2], (uint32_t) dst->ne[3], sess->prof_usecs, sess->prof_cycles, sess->prof_pkts,
26542676
(float) sess->prof_cycles / sess->prof_pkts, (unsigned long long) t2 - t1);
2677+
if (opt_trace) {
2678+
itrace_end_section(g_itrace_cpu_profiler_handle, NULL);
2679+
}
26552680
}
26562681

26572682
static void ggml_hexagon_unary(const struct ggml_tensor * op, uint32_t flags) {
2683+
if (opt_trace) {
2684+
itrace_start_section(g_itrace_cpu_profiler_handle, "unary", NULL);
2685+
}
26582686
const struct ggml_tensor * src0 = op->src[0];
26592687
const struct ggml_tensor * src1 = op->src[1];
26602688
const struct ggml_tensor * dst = op;
@@ -2785,9 +2813,15 @@ static void ggml_hexagon_unary(const struct ggml_tensor * op, uint32_t flags) {
27852813
(uint32_t) dst->ne[2], (uint32_t) dst->ne[3], sess->prof_usecs, sess->prof_cycles, sess->prof_pkts,
27862814
(float) sess->prof_cycles / sess->prof_pkts, (unsigned long long) t2 - t1);
27872815
}
2816+
if (opt_trace) {
2817+
itrace_end_section(g_itrace_cpu_profiler_handle, NULL);
2818+
}
27882819
}
27892820

27902821
static void ggml_hexagon_rope(const struct ggml_tensor * op, uint32_t flags) {
2822+
if (opt_trace) {
2823+
itrace_start_section(g_itrace_cpu_profiler_handle, "rope", NULL);
2824+
}
27912825
const struct ggml_tensor * src0 = op->src[0];
27922826
const struct ggml_tensor * src1 = op->src[1];
27932827
const struct ggml_tensor * src2 = op->src[2];
@@ -2893,6 +2927,9 @@ static void ggml_hexagon_rope(const struct ggml_tensor * op, uint32_t flags) {
28932927
(uint32_t) dst->ne[2], (uint32_t) dst->ne[3], sess->prof_usecs, sess->prof_cycles, sess->prof_pkts,
28942928
(float) sess->prof_cycles / sess->prof_pkts, (unsigned long long) t2 - t1);
28952929
}
2930+
if (opt_trace) {
2931+
itrace_end_section(g_itrace_cpu_profiler_handle, NULL);
2932+
}
28962933
}
28972934

28982935
static const char * ggml_backend_hexagon_name(ggml_backend_t backend) {
@@ -2937,6 +2974,10 @@ static inline int last_compute_op(ggml_cgraph * graph) {
29372974
static ggml_status ggml_backend_hexagon_graph_compute(ggml_backend_t backend, ggml_cgraph * graph) {
29382975
auto sess = static_cast<ggml_hexagon_session *>(backend->context);
29392976

2977+
if (opt_trace) {
2978+
itrace_start_section(g_itrace_cpu_profiler_handle, (std::string("hexagon-graph-compute-") + sess->name).c_str(), NULL);
2979+
}
2980+
29402981
HEX_VERBOSE("ggml-hex: %s graph-compute n_nodes %d\n", sess->name.c_str(), graph->n_nodes);
29412982

29422983
const int last = last_compute_op(graph);
@@ -3009,6 +3050,10 @@ static ggml_status ggml_backend_hexagon_graph_compute(ggml_backend_t backend, gg
30093050
// Wait until all pending ops complete
30103051
sess->flush();
30113052

3053+
if (opt_trace) {
3054+
itrace_end_section(g_itrace_cpu_profiler_handle, NULL);
3055+
}
3056+
30123057
return GGML_STATUS_SUCCESS;
30133058
}
30143059

@@ -3508,7 +3553,7 @@ static void ggml_hexagon_init(ggml_backend_reg * reg) {
35083553
if (opt_trace) {
35093554
HEX_VERBOSE("ggml-hex: open itrace\n");
35103555
itrace_open_logger(CPU_DOMAIN_ID, &g_itrace_logger_handle);
3511-
itrace_open_profiler(g_itrace_logger_handle, CPU_DOMAIN_ID, 0, &g_itrace_cpu_profiler_handle);
3556+
itrace_open_profiler(g_itrace_logger_handle, CPU_DOMAIN_ID, 0x1000000, &g_itrace_cpu_profiler_handle);
35123557

35133558
itrace_start_section(g_itrace_cpu_profiler_handle, "open-itrace", NULL);
35143559
itrace_end_section(g_itrace_cpu_profiler_handle, NULL);

0 commit comments

Comments
 (0)