@@ -288,6 +288,9 @@ void ggml_hexagon_session::enqueue(struct htp_general_req &req, struct dspqueue_
288288
289289// Flush HTP response queue i.e wait for all outstanding requests to complete
290290void ggml_hexagon_session::flush () {
291+ if (opt_trace) {
292+ itrace_start_section (g_itrace_cpu_profiler_handle, " session-flush" , NULL );
293+ }
291294 dspqueue_t q = this ->queue ;
292295
293296 // Repeatedly read packets from the queue until it's empty. We don't
@@ -338,6 +341,10 @@ void ggml_hexagon_session::flush() {
338341
339342 this ->op_pending --; // atomic dec
340343 }
344+ if (opt_trace) {
345+ itrace_end_section (g_itrace_cpu_profiler_handle, NULL );
346+ itrace_flush_logs (g_itrace_logger_handle);
347+ }
341348}
342349
343350// ** backend buffers
@@ -701,7 +708,7 @@ static void init_row_q4x4x2(block_q4_0 * x, int64_t k) {
701708// repack q4_0 data into q4x4x2 tensor
702709static void repack_q4_0_q4x4x2 (ggml_tensor * t, const void * data, size_t size) {
703710 if (opt_trace) {
704- itrace_start_section (g_itrace_cpu_profiler_handle, (std::string (" ggml-hex- repack-q4_0-q4x4x2-" ) + t->name ).c_str (), NULL );
711+ itrace_start_section (g_itrace_cpu_profiler_handle, (std::string (" repack-q4_0-q4x4x2-" ) + t->name ).c_str (), NULL );
705712 }
706713 int64_t nrows = ggml_nrows (t);
707714
@@ -2336,7 +2343,7 @@ static void hex_dump_dspbuf(const struct ggml_tensor * t, const dspqueue_buffer
23362343
23372344static void ggml_hexagon_mul_mat (const struct ggml_tensor * op, uint32_t flags) {
23382345 if (opt_trace) {
2339- itrace_start_section (g_itrace_cpu_profiler_handle, (std::string (" ggml-hex- mul-mat-" ) + op->name ).c_str (), NULL );
2346+ itrace_start_section (g_itrace_cpu_profiler_handle, (std::string (" mul-mat-" ) + op->name ).c_str (), NULL );
23402347 }
23412348 const struct ggml_tensor * src0 = op->src [0 ];
23422349 const struct ggml_tensor * src1 = op->src [1 ];
@@ -2412,6 +2419,9 @@ static void ggml_hexagon_mul_mat(const struct ggml_tensor * op, uint32_t flags)
24122419}
24132420
24142421static void ggml_hexagon_mul_mat_id (const struct ggml_tensor * op, uint32_t flags) {
2422+ if (opt_trace) {
2423+ itrace_start_section (g_itrace_cpu_profiler_handle, (std::string (" mul-mat-id-" ) + op->name ).c_str (), NULL );
2424+ }
24152425 const struct ggml_tensor * src0 = op->src [0 ];
24162426 const struct ggml_tensor * src1 = op->src [1 ];
24172427 const struct ggml_tensor * src2 = op->src [2 ];
@@ -2488,9 +2498,15 @@ static void ggml_hexagon_mul_mat_id(const struct ggml_tensor * op, uint32_t flag
24882498 (uint32_t ) src2->ne [3 ], dst->name , (uint32_t ) dst->ne [0 ], (uint32_t ) dst->ne [1 ], (uint32_t ) dst->ne [2 ],
24892499 (uint32_t ) dst->ne [3 ], sess->prof_usecs , sess->prof_cycles , sess->prof_pkts ,
24902500 (float ) sess->prof_cycles / sess->prof_pkts , (unsigned long long ) t2 - t1);
2501+ if (opt_trace) {
2502+ itrace_end_section (g_itrace_cpu_profiler_handle, NULL );
2503+ }
24912504}
24922505
24932506static void ggml_hexagon_binary (const struct ggml_tensor * op, uint32_t flags) {
2507+ if (opt_trace) {
2508+ itrace_start_section (g_itrace_cpu_profiler_handle, " binary" , NULL );
2509+ }
24942510 const struct ggml_tensor * node = op;
24952511 const struct ggml_tensor * src0 = node->src [0 ];
24962512 const struct ggml_tensor * src1 = node->src [1 ];
@@ -2577,9 +2593,15 @@ static void ggml_hexagon_binary(const struct ggml_tensor * op, uint32_t flags) {
25772593 (uint32_t ) src1->ne [2 ], (uint32_t ) src1->ne [3 ], dst->name , (uint32_t ) dst->ne [0 ], (uint32_t ) dst->ne [1 ],
25782594 (uint32_t ) dst->ne [2 ], (uint32_t ) dst->ne [3 ], sess->prof_usecs , sess->prof_cycles , sess->prof_pkts ,
25792595 (float ) sess->prof_cycles / sess->prof_pkts , (unsigned long long ) t2 - t1);
2596+ if (opt_trace) {
2597+ itrace_end_section (g_itrace_cpu_profiler_handle, NULL );
2598+ }
25802599}
25812600
25822601static void ggml_hexagon_add_id (const struct ggml_tensor * op, uint32_t flags) {
2602+ if (opt_trace) {
2603+ itrace_start_section (g_itrace_cpu_profiler_handle, (std::string (" add-id-" ) + op->name ).c_str (), NULL );
2604+ }
25832605 const struct ggml_tensor * node = op;
25842606 const struct ggml_tensor * src0 = node->src [0 ];
25852607 const struct ggml_tensor * src1 = node->src [1 ];
@@ -2652,9 +2674,15 @@ static void ggml_hexagon_add_id(const struct ggml_tensor * op, uint32_t flags) {
26522674 (uint32_t ) src1->ne [2 ], (uint32_t ) src1->ne [3 ], dst->name , (uint32_t ) dst->ne [0 ], (uint32_t ) dst->ne [1 ],
26532675 (uint32_t ) dst->ne [2 ], (uint32_t ) dst->ne [3 ], sess->prof_usecs , sess->prof_cycles , sess->prof_pkts ,
26542676 (float ) sess->prof_cycles / sess->prof_pkts , (unsigned long long ) t2 - t1);
2677+ if (opt_trace) {
2678+ itrace_end_section (g_itrace_cpu_profiler_handle, NULL );
2679+ }
26552680}
26562681
26572682static void ggml_hexagon_unary (const struct ggml_tensor * op, uint32_t flags) {
2683+ if (opt_trace) {
2684+ itrace_start_section (g_itrace_cpu_profiler_handle, " unary" , NULL );
2685+ }
26582686 const struct ggml_tensor * src0 = op->src [0 ];
26592687 const struct ggml_tensor * src1 = op->src [1 ];
26602688 const struct ggml_tensor * dst = op;
@@ -2785,9 +2813,15 @@ static void ggml_hexagon_unary(const struct ggml_tensor * op, uint32_t flags) {
27852813 (uint32_t ) dst->ne [2 ], (uint32_t ) dst->ne [3 ], sess->prof_usecs , sess->prof_cycles , sess->prof_pkts ,
27862814 (float ) sess->prof_cycles / sess->prof_pkts , (unsigned long long ) t2 - t1);
27872815 }
2816+ if (opt_trace) {
2817+ itrace_end_section (g_itrace_cpu_profiler_handle, NULL );
2818+ }
27882819}
27892820
27902821static void ggml_hexagon_rope (const struct ggml_tensor * op, uint32_t flags) {
2822+ if (opt_trace) {
2823+ itrace_start_section (g_itrace_cpu_profiler_handle, " rope" , NULL );
2824+ }
27912825 const struct ggml_tensor * src0 = op->src [0 ];
27922826 const struct ggml_tensor * src1 = op->src [1 ];
27932827 const struct ggml_tensor * src2 = op->src [2 ];
@@ -2893,6 +2927,9 @@ static void ggml_hexagon_rope(const struct ggml_tensor * op, uint32_t flags) {
28932927 (uint32_t ) dst->ne [2 ], (uint32_t ) dst->ne [3 ], sess->prof_usecs , sess->prof_cycles , sess->prof_pkts ,
28942928 (float ) sess->prof_cycles / sess->prof_pkts , (unsigned long long ) t2 - t1);
28952929 }
2930+ if (opt_trace) {
2931+ itrace_end_section (g_itrace_cpu_profiler_handle, NULL );
2932+ }
28962933}
28972934
28982935static const char * ggml_backend_hexagon_name (ggml_backend_t backend) {
@@ -2937,6 +2974,10 @@ static inline int last_compute_op(ggml_cgraph * graph) {
29372974static ggml_status ggml_backend_hexagon_graph_compute (ggml_backend_t backend, ggml_cgraph * graph) {
29382975 auto sess = static_cast <ggml_hexagon_session *>(backend->context );
29392976
2977+ if (opt_trace) {
2978+ itrace_start_section (g_itrace_cpu_profiler_handle, (std::string (" hexagon-graph-compute-" ) + sess->name ).c_str (), NULL );
2979+ }
2980+
29402981 HEX_VERBOSE (" ggml-hex: %s graph-compute n_nodes %d\n " , sess->name .c_str (), graph->n_nodes );
29412982
29422983 const int last = last_compute_op (graph);
@@ -3009,6 +3050,10 @@ static ggml_status ggml_backend_hexagon_graph_compute(ggml_backend_t backend, gg
30093050 // Wait until all pending ops complete
30103051 sess->flush ();
30113052
3053+ if (opt_trace) {
3054+ itrace_end_section (g_itrace_cpu_profiler_handle, NULL );
3055+ }
3056+
30123057 return GGML_STATUS_SUCCESS;
30133058}
30143059
@@ -3508,7 +3553,7 @@ static void ggml_hexagon_init(ggml_backend_reg * reg) {
35083553 if (opt_trace) {
35093554 HEX_VERBOSE (" ggml-hex: open itrace\n " );
35103555 itrace_open_logger (CPU_DOMAIN_ID, &g_itrace_logger_handle);
3511- itrace_open_profiler (g_itrace_logger_handle, CPU_DOMAIN_ID, 0 , &g_itrace_cpu_profiler_handle);
3556+ itrace_open_profiler (g_itrace_logger_handle, CPU_DOMAIN_ID, 0x1000000 , &g_itrace_cpu_profiler_handle);
35123557
35133558 itrace_start_section (g_itrace_cpu_profiler_handle, " open-itrace" , NULL );
35143559 itrace_end_section (g_itrace_cpu_profiler_handle, NULL );
0 commit comments