Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Framework/Core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@ add_executable(o2-test-framework-core
test/test_OverrideLabels.cxx
test/test_O2DataModelHelpers.cxx
test/test_RootConfigParamHelpers.cxx
test/test_ResourcesMonitoringHelpers.cxx
test/test_Services.cxx
test/test_StringHelpers.cxx
test/test_StaticFor.cxx
Expand Down
2 changes: 2 additions & 0 deletions Framework/Core/include/Framework/DriverInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,8 @@ struct DriverInfo {
std::string uniqueWorkflowId = "";
/// Metrics gathering interval
unsigned short resourcesMonitoringInterval = 0;
/// Where to dump the metrics
std::string resourcesMonitoringFilename = "performanceMetrics.json";
/// Metrics gathering dump to disk interval
unsigned short resourcesMonitoringDumpInterval = 0;
/// Port used by the websocket control. 0 means not initialised.
Expand Down
20 changes: 7 additions & 13 deletions Framework/Core/src/ResourcesMonitoringHelper.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ boost::property_tree::ptree fillNodeWithValue(const DeviceMetricsInfo& deviceMet
bool ResourcesMonitoringHelper::dumpMetricsToJSON(const std::vector<DeviceMetricsInfo>& metrics,
const DeviceMetricsInfo& driverMetrics,
const std::vector<DeviceSpec>& specs,
std::vector<std::regex> const& performanceMetricsRegex) noexcept
std::vector<std::regex> const& performanceMetricsRegex,
std::ostream& out) noexcept
{

assert(metrics.size() == specs.size());
Expand All @@ -76,7 +77,7 @@ bool ResourcesMonitoringHelper::dumpMetricsToJSON(const std::vector<DeviceMetric
auto same = [metricLabel](std::regex const& matcher) -> bool {
return std::regex_match(metricLabel.begin(), metricLabel.end(), matcher);
};
//check if we are interested
// check if we are interested
if (std::find_if(std::begin(performanceMetricsRegex), std::end(performanceMetricsRegex), same) == performanceMetricsRegex.end()) {
continue;
}
Expand All @@ -85,7 +86,7 @@ bool ResourcesMonitoringHelper::dumpMetricsToJSON(const std::vector<DeviceMetric
if (deviceMetrics.metrics[mi].filledMetrics == 0) {
continue;
}
//if so
// if so

boost::property_tree::ptree metricNode;

Expand Down Expand Up @@ -122,7 +123,7 @@ bool ResourcesMonitoringHelper::dumpMetricsToJSON(const std::vector<DeviceMetric
return std::regex_match(metricLabel.begin(), metricLabel.end(), matcher);
};

//check if we are interested
// check if we are interested
if (std::find_if(std::begin(performanceMetricsRegex), std::end(performanceMetricsRegex), same) == performanceMetricsRegex.end()) {
continue;
}
Expand All @@ -133,7 +134,7 @@ bool ResourcesMonitoringHelper::dumpMetricsToJSON(const std::vector<DeviceMetric
continue;
}

//if so
// if so
boost::property_tree::ptree metricNode;

switch (driverMetrics.metrics[mi].type) {
Expand Down Expand Up @@ -161,14 +162,7 @@ bool ResourcesMonitoringHelper::dumpMetricsToJSON(const std::vector<DeviceMetric

root.add_child("driver", driverRoot);

std::ofstream file("performanceMetrics.json", std::ios::out);
if (file.is_open()) {
boost::property_tree::json_parser::write_json(file, root);
} else {
return false;
}

file.close();
boost::property_tree::json_parser::write_json(out, root);

return true;
}
8 changes: 3 additions & 5 deletions Framework/Core/src/ResourcesMonitoringHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,27 +13,25 @@
#define O2_FRAMEWORK_RESOURCESMONITORINGHELPER_H_

#include "Framework/DeviceMetricsInfo.h"
#include "Monitoring/ProcessMonitor.h"
#include "Framework/DeviceSpec.h"

#include <vector>
#include <type_traits>
#include <regex>
#include <iosfwd>

namespace o2::framework
{

struct ResourcesMonitoringHelper {
/// Dump the metrics in @a metrics which match the names specified in @a metricsToDump
/// @a specs are the DeviceSpecs associated to the metrics.
static bool dumpMetricsToJSON(std::vector<DeviceMetricsInfo> const& metrics,
DeviceMetricsInfo const& driverMetrics,
std::vector<DeviceSpec> const& specs,
std::vector<std::regex> const& metricsToDump) noexcept;
std::vector<std::regex> const& metricsToDump,
std::ostream& out) noexcept;
static bool isResourcesMonitoringEnabled(unsigned short interval) noexcept { return interval > 0; }
};


} // namespace o2::framework

#endif // O2_FRAMEWORK_RESOURCESMONITORINGHELPER_H_
9 changes: 7 additions & 2 deletions Framework/Core/src/runDataProcessing.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -1247,8 +1247,10 @@ void dumpMetricsCallback(uv_timer_t* handle)
auto* context = (DriverServerContext*)handle->data;

static auto performanceMetrics = getDumpableMetrics();
std::ofstream file(context->driver->resourcesMonitoringFilename, std::ios::out);
ResourcesMonitoringHelper::dumpMetricsToJSON(*(context->metrics),
context->driver->metrics, *(context->specs), performanceMetrics);
context->driver->metrics, *(context->specs), performanceMetrics,
file);
}

void dumpRunSummary(DriverServerContext& context, DriverInfo const& driverInfo, DeviceInfos const& infos, DeviceSpecs const& specs)
Expand Down Expand Up @@ -2035,6 +2037,7 @@ int runStateMachine(DataProcessorSpecs const& workflow,
"--fairmq-ipc-prefix",
"--readers",
"--resources-monitoring",
"--resources-monitoring-file",
"--resources-monitoring-dump-interval",
"--time-limit",
};
Expand Down Expand Up @@ -2268,7 +2271,7 @@ int runStateMachine(DataProcessorSpecs const& workflow,
if (driverInfo.resourcesMonitoringDumpInterval) {
uv_timer_stop(&metricDumpTimer);
}
LOG(info) << "Dumping performance metrics to performanceMetrics.json file";
LOGP(info, "Dumping performance metrics to {}.json file", driverInfo.resourcesMonitoringFilename);
dumpMetricsCallback(&metricDumpTimer);
}
dumpRunSummary(serverContext, driverInfo, infos, runningWorkflow.devices);
Expand Down Expand Up @@ -2916,6 +2919,7 @@ int doMain(int argc, char** argv, o2::framework::WorkflowSpec const& workflow,
("no-IPC", bpo::value<bool>()->zero_tokens()->default_value(false), "disable IPC topology optimization") // //
("o2-control,o2", bpo::value<std::string>()->default_value(""), "dump O2 Control workflow configuration under the specified name") //
("resources-monitoring", bpo::value<unsigned short>()->default_value(0), "enable cpu/memory monitoring for provided interval in seconds") //
("resources-monitoring-file", bpo::value<std::string>()->default_value("performanceMetrics.json"), "file where to dump the metrics") //
("resources-monitoring-dump-interval", bpo::value<unsigned short>()->default_value(0), "dump monitoring information to disk every provided seconds"); //
// some of the options must be forwarded by default to the device
executorOptions.add(DeviceSpecHelpers::getForwardedDeviceOptions());
Expand Down Expand Up @@ -3186,6 +3190,7 @@ int doMain(int argc, char** argv, o2::framework::WorkflowSpec const& workflow,
driverInfo.deployHostname = varmap["hostname"].as<std::string>();
driverInfo.resources = varmap["resources"].as<std::string>();
driverInfo.resourcesMonitoringInterval = varmap["resources-monitoring"].as<unsigned short>();
driverInfo.resourcesMonitoringFilename = varmap["resources-monitoring-file"].as<std::string>();
driverInfo.resourcesMonitoringDumpInterval = varmap["resources-monitoring-dump-interval"].as<unsigned short>();

// FIXME: should use the whole dataProcessorInfos, actually...
Expand Down
Loading