Skip to content

Commit df4033c

Browse files
fix: kernel profiler threading mode (#3435)
1 parent 38049de commit df4033c

File tree

1 file changed

+33
-19
lines changed

1 file changed

+33
-19
lines changed

cpp/daal/src/services/service_profiler.h

Lines changed: 33 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
#include <mutex>
3333
#include <algorithm>
3434
#include <exception>
35-
35+
#include <unordered_map>
3636
#include "services/library_version_info.h"
3737

3838
#ifdef _WIN32
@@ -272,23 +272,35 @@ class profiler_task
272272
inline profiler_task(const char * task_name, int idx, bool thread) : task_name_(task_name), idx_(idx), is_thread_(thread) {}
273273
inline ~profiler_task();
274274

275-
inline profiler_task(const profiler_task & other) : task_name_(other.task_name_), idx_(other.idx_), is_thread_(other.is_thread_) {}
275+
profiler_task(const profiler_task &) = delete;
276+
profiler_task & operator=(const profiler_task &) = delete;
276277

277-
inline profiler_task & operator=(const profiler_task & other)
278+
profiler_task(profiler_task && other) noexcept : task_name_(other.task_name_), idx_(other.idx_), is_thread_(other.is_thread_)
279+
{
280+
other.task_name_ = nullptr;
281+
other.idx_ = -1;
282+
other.is_thread_ = false;
283+
}
284+
285+
profiler_task & operator=(profiler_task && other) noexcept
278286
{
279287
if (this != &other)
280288
{
281289
task_name_ = other.task_name_;
282290
idx_ = other.idx_;
283291
is_thread_ = other.is_thread_;
292+
293+
other.task_name_ = nullptr;
294+
other.idx_ = -1;
295+
other.is_thread_ = false;
284296
}
285297
return *this;
286298
}
287299

288300
private:
289-
const char * task_name_;
290-
int idx_;
291-
bool is_thread_ = false;
301+
const char * task_name_ = nullptr;
302+
int idx_ = -1;
303+
bool is_thread_ = false;
292304
};
293305

294306
class profiler
@@ -319,15 +331,13 @@ class profiler
319331
{
320332
const auto & entry = tasks_info.kernels[i];
321333
std::string prefix;
322-
for (std::int64_t lvl = 0; lvl < entry.level; ++lvl) prefix += "| ";
323-
bool is_last = (i + 1 < tasks_info.kernels.size()) && (tasks_info.kernels[i + 1].level >= entry.level) ? false : true;
324-
prefix += is_last ? "|-- " : "|-- ";
334+
for (std::int64_t lvl = 0; lvl < entry.level; ++lvl) prefix += "| ";
335+
prefix += "|-- ";
325336
std::cerr << prefix << entry.name << " time: " << format_time_for_output(entry.duration) << " " << std::fixed
326337
<< std::setprecision(2) << (total_time > 0 ? (double(entry.duration) / total_time) * 100 : 0.0) << "% " << entry.count
327-
<< " times"
328-
<< " in a " << entry.threading_task << " region" << '\n';
338+
<< " times in a " << (entry.threading_task ? "parallel" : "sequential") << " region" << '\n';
329339
}
330-
std::cerr << "|---(end)" << '\n';
340+
std::cerr << "|--(end)" << '\n';
331341
std::cerr << "DAAL KERNEL_PROFILER: kernels total time " << format_time_for_output(total_time) << '\n';
332342

333343
#if (!defined(DAAL_NOTHROW_EXCEPTIONS))
@@ -353,6 +363,8 @@ class profiler
353363
inline static profiler_task start_task(const char * task_name)
354364
{
355365
if (!task_name) return profiler_task(nullptr, -1);
366+
367+
std::lock_guard<std::mutex> lock(global_mutex());
356368
auto ns_start = get_time();
357369
auto & tasks_info = get_instance()->get_task();
358370
auto & current_level_ = get_instance()->get_current_level();
@@ -377,9 +389,8 @@ class profiler
377389
inline static profiler_task start_threading_task(const char * task_name)
378390
{
379391
if (!task_name) return profiler_task(nullptr, -1);
380-
static std::mutex mutex;
381392

382-
std::lock_guard<std::mutex> lock(mutex);
393+
std::lock_guard<std::mutex> lock(global_mutex());
383394
if (is_logger_enabled())
384395
{
385396
if (!is_service_debug_enabled())
@@ -423,8 +434,8 @@ class profiler
423434
if (!task_name) return;
424435
const std::uint64_t ns_end = get_time();
425436
auto & tasks_info = get_instance()->get_task();
426-
static std::mutex mutex;
427-
std::lock_guard<std::mutex> lock(mutex);
437+
438+
std::lock_guard<std::mutex> lock(global_mutex());
428439
auto & entry = tasks_info.kernels[idx_];
429440
auto duration = ns_end - entry.duration;
430441
entry.duration = duration;
@@ -446,9 +457,7 @@ class profiler
446457
{
447458
if (!task_name) return;
448459

449-
static std::mutex mutex;
450-
451-
std::lock_guard<std::mutex> lock(mutex);
460+
std::lock_guard<std::mutex> lock(global_mutex());
452461
const std::uint64_t ns_end = get_time();
453462
auto & tasks_info = get_instance()->get_task();
454463

@@ -543,6 +552,11 @@ class profiler
543552
std::int64_t current_level_ = 0;
544553
std::int64_t kernel_count_ = 0;
545554
task task_;
555+
static std::mutex & global_mutex()
556+
{
557+
static std::mutex m;
558+
return m;
559+
}
546560
};
547561

548562
inline profiler_task::~profiler_task()

0 commit comments

Comments
 (0)