diff --git a/Detectors/TPC/qc/CMakeLists.txt b/Detectors/TPC/qc/CMakeLists.txt index 6bb4c726a90fa..60195ed6d451a 100644 --- a/Detectors/TPC/qc/CMakeLists.txt +++ b/Detectors/TPC/qc/CMakeLists.txt @@ -19,6 +19,7 @@ o2_add_library(TPCQC src/SACs.cxx src/IDCsVsSACs.cxx src/TrackClusters.cxx + src/GPUErrorQA.cxx PUBLIC_LINK_LIBRARIES O2::TPCBase O2::DataFormatsTPC O2::GPUO2Interface @@ -36,7 +37,8 @@ o2_target_root_dictionary(TPCQC include/TPCQC/DCSPTemperature.h include/TPCQC/SACs.h include/TPCQC/IDCsVsSACs.h - include/TPCQC/TrackClusters.h) + include/TPCQC/TrackClusters.h + include/TPCQC/GPUErrorQA.h) o2_add_test(PID COMPONENT_NAME tpc diff --git a/Detectors/TPC/qc/include/TPCQC/GPUErrorQA.h b/Detectors/TPC/qc/include/TPCQC/GPUErrorQA.h new file mode 100644 index 0000000000000..ec171a6925a98 --- /dev/null +++ b/Detectors/TPC/qc/include/TPCQC/GPUErrorQA.h @@ -0,0 +1,69 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// +/// @file GPUErrorQA.h +/// @author Anton Riedel, anton.riedel@cern.ch +/// + +#ifndef AliceO2_TPC_QC_GPUERRORQA_H +#define AliceO2_TPC_QC_GPUERRORQA_H + +#include +#include +#include +#include + +// root includes + +// o2 includes +// #include "DataFormatsTPC/Defs.h" + +class TH1; +namespace o2::tpc::qc +{ + +/// @brief TPC QC task for errors from GPU reconstruction +/// +/// This class is used to retrieve and visualize GPU errors +/// according to corresponding error code and location. +/// +/// origin: TPC +/// @author Anton Riedel, anton.riedel@cern.ch +class GPUErrorQA +{ + public: + /// \brief Constructor. + GPUErrorQA() = default; + + /// process gpu error reported by the reconstruction workflow + void processErrors(std::vector> errors); + + /// Initialize all histograms + void initializeHistograms(); + + /// Reset all histograms + void resetHistograms(); + + /// return histograms + const std::unordered_map>& getMapHist() const { return mMapHist; } + + /// Dump results to a file + void dumpToFile(std::string filename); + + private: + std::unordered_map> mMapHist; + + ClassDefNV(GPUErrorQA, 2); +}; +} // namespace o2::tpc::qc + +#endif // AliceO2_TPC_QC_GPUERRORQA_H diff --git a/Detectors/TPC/qc/src/GPUErrorQA.cxx b/Detectors/TPC/qc/src/GPUErrorQA.cxx new file mode 100644 index 0000000000000..d4848aaefecb7 --- /dev/null +++ b/Detectors/TPC/qc/src/GPUErrorQA.cxx @@ -0,0 +1,80 @@ +// Copyright 2019-2025 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#define _USE_MATH_DEFINES + +// root includes +#include "TFile.h" +#include "TH1I.h" + +// o2 includes +#include "TPCQC/GPUErrorQA.h" +#include "GPUErrors.h" + +ClassImp(o2::tpc::qc::GPUErrorQA); + +using namespace o2::tpc::qc; + +//______________________________________________________________________________ +void GPUErrorQA::initializeHistograms() +{ + TH1::AddDirectory(false); + + auto const& errorNames = o2::gpu::GPUErrors::getErrorNames(); + + int maxErrorCode = 1; + for (const auto& [key, _] : errorNames) { + if (static_cast(key) > maxErrorCode) { + maxErrorCode = key; + } + } + + // 1D histogram counting all reported errors + mMapHist["ErrorCounter"] = std::make_unique("ErrorCounter", "ErrorCounter", maxErrorCode, -0.5, maxErrorCode - 0.5); + mMapHist["ErrorCounter"]->GetXaxis()->SetTitle("Error Codes"); + mMapHist["ErrorCounter"]->GetYaxis()->SetTitle("Entries"); + // for convienence, label each bin with the error name + for (size_t bin = 1; bin <= maxErrorCode; bin++) { + auto const& it = errorNames.find(bin); + if (it != errorNames.end()) { + mMapHist["ErrorCounter"]->GetXaxis()->SetBinLabel(bin, it->second); + } else { + mMapHist["ErrorCounter"]->GetXaxis()->SetBinLabel(bin, "NO_DEF"); + } + } +} +//______________________________________________________________________________ +void GPUErrorQA::resetHistograms() +{ + for (const auto& pair : mMapHist) { + pair.second->Reset(); + } +} +//______________________________________________________________________________ +void GPUErrorQA::processErrors(std::vector> errors) +{ + for (const auto& error : errors) { + uint32_t errorCode = error[0]; + mMapHist["ErrorCounter"]->AddBinContent(errorCode); + } +} + +//______________________________________________________________________________ +void GPUErrorQA::dumpToFile(const std::string filename) +{ + auto f = std::unique_ptr(TFile::Open(filename.data(), "recreate")); + TObjArray arr; + arr.SetName("GPUErrorQA_Hists"); + for ([[maybe_unused]] const auto& [name, hist] : mMapHist) { + arr.Add(hist.get()); + } + arr.Write(arr.GetName(), TObject::kSingleKey); +} diff --git a/Detectors/TPC/qc/src/TPCQCLinkDef.h b/Detectors/TPC/qc/src/TPCQCLinkDef.h index c227ebcad8c09..3921d7dfe5649 100644 --- a/Detectors/TPC/qc/src/TPCQCLinkDef.h +++ b/Detectors/TPC/qc/src/TPCQCLinkDef.h @@ -24,6 +24,7 @@ #pragma link C++ class o2::tpc::qc::SACs + ; #pragma link C++ class o2::tpc::qc::IDCsVsSACs + ; #pragma link C++ class o2::tpc::qc::TrackClusters + ; +#pragma link C++ class o2::tpc::qc::GPUErrorQA + ; #pragma link C++ function o2::tpc::qc::helpers::makeLogBinning + ; #pragma link C++ function o2::tpc::qc::helpers::setStyleHistogram1D + ; #pragma link C++ function o2::tpc::qc::helpers::setStyleHistogram2D + ; diff --git a/GPU/GPUTracking/Global/GPUErrors.cxx b/GPU/GPUTracking/Global/GPUErrors.cxx index e9d5a74c6567a..4baa299c6b976 100644 --- a/GPU/GPUTracking/Global/GPUErrors.cxx +++ b/GPU/GPUTracking/Global/GPUErrors.cxx @@ -48,15 +48,20 @@ void GPUErrors::clear() memset(mErrors, 0, GPUCA_MAX_ERRORS * sizeof(*mErrors)); } -static std::unordered_map errorNames = { +const std::unordered_map& GPUErrors::getErrorNames() +{ + static std::unordered_map errorNames = { #define GPUCA_ERROR_CODE(num, name, ...) {num, GPUCA_M_STR(name)}, #include "GPUErrorCodes.h" #undef GPUCA_ERROR_CODE -}; + }; + return errorNames; +} bool GPUErrors::printErrors(bool silent, uint64_t mask) { bool retVal = 0; + const auto& errorNames = getErrorNames(); for (uint32_t i = 0; i < std::min(*mErrors, GPUCA_MAX_ERRORS); i++) { uint32_t errorCode = mErrors[4 * i + 1]; const auto& it = errorNames.find(errorCode); diff --git a/GPU/GPUTracking/Global/GPUErrors.h b/GPU/GPUTracking/Global/GPUErrors.h index 1cbc4a019601d..535364bf08ce1 100644 --- a/GPU/GPUTracking/Global/GPUErrors.h +++ b/GPU/GPUTracking/Global/GPUErrors.h @@ -16,6 +16,9 @@ #define GPUERRORS_H #include "GPUCommonDef.h" +#ifndef GPUCA_GPUCODE +#include +#endif namespace o2::gpu { @@ -34,6 +37,9 @@ class GPUErrors void setMemory(GPUglobalref() uint32_t* m) { mErrors = m; } void clear(); bool printErrors(bool silent = false, uint64_t mask = 0); +#ifndef GPUCA_GPUCODE + static const std::unordered_map& getErrorNames(); +#endif uint32_t getNErrors() const; const uint32_t* getErrorPtr() const; static uint32_t getMaxErrors();