From d73b506760f170449d8a339b8d6bcf7c18826dc6 Mon Sep 17 00:00:00 2001 From: Anton Riedel Date: Fri, 14 Feb 2025 14:02:39 +0100 Subject: [PATCH 1/2] Feat: add skeleton for GPUErrorQA task --- Detectors/TPC/qc/CMakeLists.txt | 4 +- Detectors/TPC/qc/include/TPCQC/GPUErrorQA.h | 69 +++++++++++++++++++++ Detectors/TPC/qc/src/GPUErrorQA.cxx | 55 ++++++++++++++++ Detectors/TPC/qc/src/TPCQCLinkDef.h | 1 + 4 files changed, 128 insertions(+), 1 deletion(-) create mode 100644 Detectors/TPC/qc/include/TPCQC/GPUErrorQA.h create mode 100644 Detectors/TPC/qc/src/GPUErrorQA.cxx diff --git a/Detectors/TPC/qc/CMakeLists.txt b/Detectors/TPC/qc/CMakeLists.txt index 6bb4c726a90fa..60195ed6d451a 100644 --- a/Detectors/TPC/qc/CMakeLists.txt +++ b/Detectors/TPC/qc/CMakeLists.txt @@ -19,6 +19,7 @@ o2_add_library(TPCQC src/SACs.cxx src/IDCsVsSACs.cxx src/TrackClusters.cxx + src/GPUErrorQA.cxx PUBLIC_LINK_LIBRARIES O2::TPCBase O2::DataFormatsTPC O2::GPUO2Interface @@ -36,7 +37,8 @@ o2_target_root_dictionary(TPCQC include/TPCQC/DCSPTemperature.h include/TPCQC/SACs.h include/TPCQC/IDCsVsSACs.h - include/TPCQC/TrackClusters.h) + include/TPCQC/TrackClusters.h + include/TPCQC/GPUErrorQA.h) o2_add_test(PID COMPONENT_NAME tpc diff --git a/Detectors/TPC/qc/include/TPCQC/GPUErrorQA.h b/Detectors/TPC/qc/include/TPCQC/GPUErrorQA.h new file mode 100644 index 0000000000000..e9ddcfb66d594 --- /dev/null +++ b/Detectors/TPC/qc/include/TPCQC/GPUErrorQA.h @@ -0,0 +1,69 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// +/// @file GPUErrorQA.h +/// @author Anton Riedel, anton.riedel@cern.ch +/// + +#ifndef AliceO2_TPC_QC_GPUERRORQA_H +#define AliceO2_TPC_QC_GPUERRORQA_H + +#include +#include + +// root includes +#include "TH1.h" + +// o2 includes +// #include "DataFormatsTPC/Defs.h" + +namespace o2 +{ +namespace tpc +{ +namespace qc +{ + +/// @brief TPC QC task for errors from GPU reconstruction +/// +/// This class is used to retrieve and visualize GPU errors +/// according to corresponding error code and location. +/// +/// origin: TPC +/// @author Anton Riedel, anton.riedel@cern.ch +class GPUErrorQA +{ + public: + /// \brief Constructor. + GPUErrorQA() = default; + + /// process gpu error reported by the reconstruction workflow + void processErrors(gsl::span> errors); + + /// Initialize all histograms + void initializeHistograms(); + + /// Reset all histograms + void resetHistograms(); + + /// Dump results to a file + void dumpToFile(std::string filename); + + private: + std::unique_ptr mHist; + ClassDefNV(GPUErrorQA, 1) +}; +} // namespace qc +} // namespace tpc +} // namespace o2 + +#endif // AliceO2_TPC_QC_GPUERRORQA_H diff --git a/Detectors/TPC/qc/src/GPUErrorQA.cxx b/Detectors/TPC/qc/src/GPUErrorQA.cxx new file mode 100644 index 0000000000000..876c63d6e89f5 --- /dev/null +++ b/Detectors/TPC/qc/src/GPUErrorQA.cxx @@ -0,0 +1,55 @@ +// Copyright 2019-2025 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#define _USE_MATH_DEFINES + +#include +#include + +// root includes +#include "TFile.h" +#include + +// o2 includes +#include "TPCQC/GPUErrorQA.h" +#include "GPUErrors.h" + +ClassImp(o2::tpc::qc::GPUErrorQA); + +using namespace o2::tpc::qc; + +//______________________________________________________________________________ +void GPUErrorQA::initializeHistograms() +{ + TH1::AddDirectory(false); + mHist = std::make_unique("ErrorCounter", "ErrorCounter", o2::gpu::GPUErrors::getMaxErrors(), 0, o2::gpu::GPUErrors::getMaxErrors()); +} +//______________________________________________________________________________ +void GPUErrorQA::resetHistograms() +{ + mHist->Reset(); +} +//______________________________________________________________________________ +void GPUErrorQA::processErrors(gsl::span> errors) +{ + for (const auto& error : errors) { + uint32_t errorCode = error[0]; + mHist->Fill(static_cast(errorCode)); + } +} + +//______________________________________________________________________________ +void GPUErrorQA::dumpToFile(const std::string filename) +{ + auto f = std::unique_ptr(TFile::Open(filename.c_str(), "recreate")); + mHist->Write(); + f->Close(); +} diff --git a/Detectors/TPC/qc/src/TPCQCLinkDef.h b/Detectors/TPC/qc/src/TPCQCLinkDef.h index c227ebcad8c09..3921d7dfe5649 100644 --- a/Detectors/TPC/qc/src/TPCQCLinkDef.h +++ b/Detectors/TPC/qc/src/TPCQCLinkDef.h @@ -24,6 +24,7 @@ #pragma link C++ class o2::tpc::qc::SACs + ; #pragma link C++ class o2::tpc::qc::IDCsVsSACs + ; #pragma link C++ class o2::tpc::qc::TrackClusters + ; +#pragma link C++ class o2::tpc::qc::GPUErrorQA + ; #pragma link C++ function o2::tpc::qc::helpers::makeLogBinning + ; #pragma link C++ function o2::tpc::qc::helpers::setStyleHistogram1D + ; #pragma link C++ function o2::tpc::qc::helpers::setStyleHistogram2D + ; From addb1ce2250ad234dc54f5ed17c57d68eaba9f7c Mon Sep 17 00:00:00 2001 From: Anton Riedel Date: Thu, 6 Mar 2025 10:26:24 +0100 Subject: [PATCH 2/2] Feat: add GPUErrorQA class --- Detectors/TPC/qc/include/TPCQC/GPUErrorQA.h | 6 +++- Detectors/TPC/qc/src/GPUErrorQA.cxx | 38 +++++++++++++++++---- 2 files changed, 36 insertions(+), 8 deletions(-) diff --git a/Detectors/TPC/qc/include/TPCQC/GPUErrorQA.h b/Detectors/TPC/qc/include/TPCQC/GPUErrorQA.h index e9ddcfb66d594..3db176e570d6f 100644 --- a/Detectors/TPC/qc/include/TPCQC/GPUErrorQA.h +++ b/Detectors/TPC/qc/include/TPCQC/GPUErrorQA.h @@ -18,6 +18,7 @@ #define AliceO2_TPC_QC_GPUERRORQA_H #include +#include #include // root includes @@ -55,11 +56,14 @@ class GPUErrorQA /// Reset all histograms void resetHistograms(); + /// return histograms + std::unordered_map>& getMapHist() { return mMapHist; }; + /// Dump results to a file void dumpToFile(std::string filename); private: - std::unique_ptr mHist; + std::unordered_map> mMapHist; ClassDefNV(GPUErrorQA, 1) }; } // namespace qc diff --git a/Detectors/TPC/qc/src/GPUErrorQA.cxx b/Detectors/TPC/qc/src/GPUErrorQA.cxx index 876c63d6e89f5..529ada53f9444 100644 --- a/Detectors/TPC/qc/src/GPUErrorQA.cxx +++ b/Detectors/TPC/qc/src/GPUErrorQA.cxx @@ -13,6 +13,7 @@ #include #include +#include // root includes #include "TFile.h" @@ -20,7 +21,7 @@ // o2 includes #include "TPCQC/GPUErrorQA.h" -#include "GPUErrors.h" +#include "GPUDefMacros.h" ClassImp(o2::tpc::qc::GPUErrorQA); @@ -30,26 +31,49 @@ using namespace o2::tpc::qc; void GPUErrorQA::initializeHistograms() { TH1::AddDirectory(false); - mHist = std::make_unique("ErrorCounter", "ErrorCounter", o2::gpu::GPUErrors::getMaxErrors(), 0, o2::gpu::GPUErrors::getMaxErrors()); + + // get gpu error names + // copied from GPUErrors.h + static std::unordered_map errorNames = { +#define GPUCA_ERROR_CODE(num, name, ...) {num, GPUCA_M_STR(name)}, +#include "GPUErrorCodes.h" +#undef GPUCA_ERROR_CODE + }; + + // 1D histogram counting all reported errors + mMapHist["ErrorCounter"] = std::make_unique("ErrorCounter", "ErrorCounter", errorNames.size(), 0, errorNames.size()); + mMapHist["ErrorCounter"]->GetXaxis()->SetTitle("Error Codes"); + mMapHist["ErrorCounter"]->GetYaxis()->SetTitle("Entries"); + // for convienence, label each bin with the error name + for (size_t bin = 1; bin < mMapHist["ErrorCounter"]->GetNbinsX(); bin++) { + auto const& it = errorNames.find(bin); + mMapHist["ErrorCounter"]->GetXaxis()->SetBinLabel(bin, it->second); + } } //______________________________________________________________________________ void GPUErrorQA::resetHistograms() { - mHist->Reset(); + for (const auto& pair : mMapHist) { + pair.second->Reset(); + } } //______________________________________________________________________________ void GPUErrorQA::processErrors(gsl::span> errors) { for (const auto& error : errors) { uint32_t errorCode = error[0]; - mHist->Fill(static_cast(errorCode)); + mMapHist["ErrorCounter"]->Fill(static_cast(errorCode)); } } //______________________________________________________________________________ void GPUErrorQA::dumpToFile(const std::string filename) { - auto f = std::unique_ptr(TFile::Open(filename.c_str(), "recreate")); - mHist->Write(); - f->Close(); + auto f = std::unique_ptr(TFile::Open(filename.data(), "recreate")); + for (const auto& [name, hist] : mMapHist) { + TObjArray arr; + arr.SetName(name.data()); + arr.Add(hist.get()); + arr.Write(arr.GetName(), TObject::kSingleKey); + } }