From a79ab8a8aab3bee52f9539c9d2f9f454b7d24f94 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 13 May 2025 15:56:55 +0200 Subject: [PATCH 1/5] GPU: Add some more type trait templates for GPU code --- GPU/Common/GPUCommonTypeTraits.h | 44 +++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/GPU/Common/GPUCommonTypeTraits.h b/GPU/Common/GPUCommonTypeTraits.h index 6d72565d1f1fb..f837019c11875 100644 --- a/GPU/Common/GPUCommonTypeTraits.h +++ b/GPU/Common/GPUCommonTypeTraits.h @@ -22,7 +22,7 @@ #include #endif #else -// We just reimplement some type traits in std for the GPU +// We just reimplement some type traits in std for the GPU // TODO: Check if meanwhile we can get rid of GPUCommonTypeTraits and GPUCommonArray, and just use the std headers. namespace std { template @@ -35,6 +35,7 @@ struct conditional { }; template using contitional_t = typename conditional::type; + template struct is_same { static constexpr bool value = false; @@ -45,6 +46,7 @@ struct is_same { }; template static constexpr bool is_same_v = is_same::value; + template struct enable_if { }; @@ -52,6 +54,7 @@ template struct enable_if { typedef T type; }; + template struct remove_cv { typedef T type; @@ -68,6 +71,9 @@ template struct remove_cv { typedef T type; }; +template +using remove_cv_t = typename remove_cv::type; + template struct remove_const { typedef T type; @@ -76,6 +82,9 @@ template struct remove_const { typedef T type; }; +template +using remove_const_t = typename remove_const::type; + template struct remove_volatile { typedef T type; @@ -84,6 +93,9 @@ template struct remove_volatile { typedef T type; }; +template +using remove_volatile_t = typename remove_volatile::type; + template struct is_pointer_t { static constexpr bool value = false; @@ -95,6 +107,36 @@ struct is_pointer_t { template struct is_pointer : is_pointer_t::type> { }; + +template +struct remove_reference { + typedef T type; +}; +template +struct remove_reference { + typedef T type; +}; +template +struct remove_reference { + typedef T type; +}; +template +using remove_reference_t = typename remove_reference::type; + +template +struct is_member_pointer_helper { + static constexpr bool value = false; +}; +template +struct is_member_pointer_helper { + static constexpr bool value = true; +}; +template +struct is_member_pointer : is_member_pointer_helper::type> { +}; +template +static constexpr bool is_member_pointer_v = is_member_pointer::value; + } // namespace std #endif From 2c4457fd23dcb0116dab301920dc636c9aa51d9c Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 13 May 2025 15:57:10 +0200 Subject: [PATCH 2/5] Revert "GPU: Workaround for Clang Frontend issue" This reverts commit 89b35ba2d75113e60b2045ed01e169b28d860a07. --- GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx | 4 ---- GPU/GPUTracking/Base/hip/CMakeLists.txt | 5 ----- 2 files changed, 9 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx index 67ad608c13417..acc77648d954b 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx @@ -38,11 +38,7 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) { std::string rtcparam = std::string("#define GPUCA_RTC_CODE\n") + std::string(GetProcessingSettings().rtc.optSpecialCode ? "#define GPUCA_RTC_SPECIAL_CODE(...) __VA_ARGS__\n" : "#define GPUCA_RTC_SPECIAL_CODE(...)\n") + -#ifndef GPUCA_HIP_WORKAROUND_CONSTEXPR // TODO: Fixme, once we have C++ P2280R4 in Clang std::string(GetProcessingSettings().rtc.optConstexpr ? "#define GPUCA_RTC_CONSTEXPR constexpr\n" : "#define GPUCA_RTC_CONSTEXPR\n") + -#else - std::string("#define GPUCA_RTC_CONSTEXPR\n") + -#endif GPUParamRTC::generateRTCCode(param(), GetProcessingSettings().rtc.optConstexpr); if (filename == "") { filename = "/tmp/o2cagpu_rtc_"; diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index c89ef1769ad81..6eded3499e46e 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -270,8 +270,3 @@ add_dependencies(GPUTrackingHIPExternalProvider O2::GPUTracking) # must not depe if(NOT DEFINED GPUCA_HIP_HIPIFY_FROM_CUDA OR "${GPUCA_HIP_HIPIFY_FROM_CUDA}") add_dependencies(GPUTrackingHIPExternalProvider ${MODULE}_HIPIFIED) endif() - -set_source_files_properties("${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPGenRTC.cxx" -TARGET_DIRECTORY O2::GPUTrackingHIP -PROPERTIES -COMPILE_DEFINITIONS "GPUCA_HIP_WORKAROUND_CONSTEXPR") From 9a442e5f81ce0fa094383e71765f2c26cb484464 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 13 May 2025 18:52:29 +0200 Subject: [PATCH 3/5] GPU: Template workaround to get static constexpr values as constexpr from references --- GPU/GPUTracking/Definitions/GPUGetConstexpr.h | 67 +++++++++++++++++++ GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx | 5 +- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 11 +-- 3 files changed, 76 insertions(+), 7 deletions(-) create mode 100644 GPU/GPUTracking/Definitions/GPUGetConstexpr.h diff --git a/GPU/GPUTracking/Definitions/GPUGetConstexpr.h b/GPU/GPUTracking/Definitions/GPUGetConstexpr.h new file mode 100644 index 0000000000000..8001b4e98c83f --- /dev/null +++ b/GPU/GPUTracking/Definitions/GPUGetConstexpr.h @@ -0,0 +1,67 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUGetConstexpr.h +/// \author David Rohr + +#ifndef GPUGETCONSTEXPR_H +#define GPUGETCONSTEXPR_H + +#include "GPUCommonDef.h" +#include "GPUCommonTypeTraits.h" + +// This is a temporary workaround required for clang (with c++20), until we can go to C++23 with P2280R4, which allows getting constexpr static values from references + +#if defined(__clang__) && __cplusplus >= 202002L && __cplusplus < 202302L + +namespace o2::gpu::internal +{ + +#define GPUCA_GET_CONSTEXPR(obj, val) ( \ + std::is_member_pointer_v::val)> ? o2::gpu::internal::getConstexpr(&std::remove_reference_t::val, o2::gpu::internal::getConstexprHelper::val), decltype(&obj)>(&obj).value) : o2::gpu::internal::getConstexpr(&std::remove_reference_t::val, o2::gpu::internal::getConstexprHelper::val), decltype(&obj)>().value)) + +template +struct getConstexprHelper; + +template + requires(!std::is_member_pointer_v) +struct getConstexprHelper { + GPUdi() constexpr getConstexprHelper(const void* = nullptr) {} + static constexpr const void* value = nullptr; +}; + +template + requires(std::is_member_pointer_v) +struct getConstexprHelper { + GPUdi() constexpr getConstexprHelper(const S& v) : value(v) {} + GPUdDefault() constexpr getConstexprHelper() = default; + const S value = nullptr; +}; + +GPUdi() constexpr auto getConstexpr(const auto* v, const void* = nullptr) +{ + return *v; +} + +GPUdi() constexpr auto getConstexpr(const auto v, const auto w) +{ + return w->*v; +} + +} // namespace o2::gpu::internal + +#else // __clang__ + +#define GPUCA_GET_CONSTEXPR(obj, val) (obj).val + +#endif + +#endif // GPUGETCONSTEXPR_H diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index eb22ca49e9242..7bb28a9f22e31 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -20,6 +20,7 @@ #include "DataFormatsTPC/PIDResponse.h" #include "TPCFastTransform.h" #include "CorrectionMapsHelper.h" +#include "GPUGetConstexpr.h" #ifndef GPUCA_GPUCODE #include "SimulationDataFormat/ConstMCTruthContainer.h" @@ -141,10 +142,10 @@ GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks oTrack.setChi2(tracks[i].GetParam().GetChi2()); auto& outerPar = tracks[i].OuterParam(); - if GPUCA_RTC_CONSTEXPR (param.par.dodEdx) { + if GPUCA_RTC_CONSTEXPR (GPUCA_GET_CONSTEXPR(param.par, dodEdx)) { if (param.dodEdxEnabled) { oTrack.setdEdx(tracksdEdx[i]); - if GPUCA_RTC_CONSTEXPR (param.rec.tpc.dEdxClusterRejectionFlagMask != param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) { + if GPUCA_RTC_CONSTEXPR (GPUCA_GET_CONSTEXPR(param.rec.tpc, dEdxClusterRejectionFlagMask) != GPUCA_GET_CONSTEXPR(param.rec.tpc, dEdxClusterRejectionFlagMaskAlt)) { oTrack.setdEdxAlt(tracksdEdxAlt[i]); } else { oTrack.setdEdxAlt(tracksdEdx[i]); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index 4b616fce83f5f..1072e4b178bdf 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -39,6 +39,7 @@ #include "GPUTPCConvertImpl.h" #include "GPUTPCGMMergerTypes.h" #include "GPUParam.inc" +#include "GPUGetConstexpr.h" #ifdef GPUCA_CADEBUG_ENABLED #include "../utils/qconfig.h" @@ -216,12 +217,12 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ continue; } } else if (allowModification && lastRow != 255 && CAMath::Abs(cluster.row - lastRow) > 1) { - if GPUCA_RTC_CONSTEXPR (param.par.dodEdx) { + if GPUCA_RTC_CONSTEXPR (GPUCA_GET_CONSTEXPR(param.par, dodEdx)) { bool dodEdx = param.dodEdxEnabled && param.rec.tpc.adddEdxSubThresholdClusters && iWay == nWays - 1 && CAMath::Abs(cluster.row - lastRow) == 2 && cluster.leg == clusters[maxN - 1].leg; dodEdx = AttachClustersPropagate(merger, cluster.sector, lastRow, cluster.row, iTrk, cluster.leg == clusters[maxN - 1].leg, prop, inFlyDirection, GPUCA_MAX_SIN_PHI, dodEdx); if (dodEdx) { dEdx.fillSubThreshold(lastRow - wayDirection); - if GPUCA_RTC_CONSTEXPR (param.rec.tpc.dEdxClusterRejectionFlagMask != param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) { + if GPUCA_RTC_CONSTEXPR (GPUCA_GET_CONSTEXPR(param.rec.tpc, dEdxClusterRejectionFlagMask) != GPUCA_GET_CONSTEXPR(param.rec.tpc, dEdxClusterRejectionFlagMaskAlt)) { dEdxAlt.fillSubThreshold(lastRow - wayDirection); } } @@ -371,7 +372,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ CADEBUG(printf("Reinit linearization\n")); prop.SetTrack(this, prop.GetAlpha()); } - if GPUCA_RTC_CONSTEXPR (param.par.dodEdx) { + if GPUCA_RTC_CONSTEXPR (GPUCA_GET_CONSTEXPR(param.par, dodEdx)) { if (param.dodEdxEnabled && iWay == nWays - 1 && cluster.leg == clusters[maxN - 1].leg) { // TODO: Costimize flag to remove, and option to remove double-clusters bool acc = (clusterState & param.rec.tpc.dEdxClusterRejectionFlagMask) == 0, accAlt = (clusterState & param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) == 0; if (acc || accAlt) { @@ -395,7 +396,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ if (acc) { dEdx.fillCluster(qtot, qmax, cluster.row, cluster.sector, mP[2], mP[3], merger->GetConstantMem()->calibObjects, zz, pad, relTime); } - if GPUCA_RTC_CONSTEXPR (param.rec.tpc.dEdxClusterRejectionFlagMask != param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) { + if GPUCA_RTC_CONSTEXPR (GPUCA_GET_CONSTEXPR(param.rec.tpc, dEdxClusterRejectionFlagMask) != GPUCA_GET_CONSTEXPR(param.rec.tpc, dEdxClusterRejectionFlagMaskAlt)) { if (accAlt) { dEdxAlt.fillCluster(qtot, qmax, cluster.row, cluster.sector, mP[2], mP[3], merger->GetConstantMem()->calibObjects, zz, pad, relTime); } @@ -436,7 +437,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ if (param.par.dodEdx && param.dodEdxEnabled) { dEdx.computedEdx(merger->MergedTracksdEdx()[iTrk], param); - if GPUCA_RTC_CONSTEXPR (param.rec.tpc.dEdxClusterRejectionFlagMask != param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) { + if GPUCA_RTC_CONSTEXPR (GPUCA_GET_CONSTEXPR(param.rec.tpc, dEdxClusterRejectionFlagMask) != GPUCA_GET_CONSTEXPR(param.rec.tpc, dEdxClusterRejectionFlagMaskAlt)) { dEdxAlt.computedEdx(merger->MergedTracksdEdxAlt()[iTrk], param); } } From 26ba7b237205403f8f5580c7c91ed13967df6edb Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 13 May 2025 18:52:48 +0200 Subject: [PATCH 4/5] GPU: Simplify some type_traits use, get rid of ::values and ::type --- .../Base/cuda/GPUReconstructionCUDAInternals.h | 2 +- GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 4 ++-- GPU/GPUTracking/qa/GPUQAHelper.h | 8 ++++---- GPU/GPUTracking/utils/bitfield.h | 2 +- GPU/GPUTracking/utils/qconfig.cxx | 8 ++++---- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h index 0813c9d22ea09..493c09e448e5e 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h @@ -79,7 +79,7 @@ class GPUDebugTiming bool mDo; }; -static_assert(std::is_convertible::value, "CUDA event type incompatible to deviceEvent"); +static_assert(std::is_convertible_v, "CUDA event type incompatible to deviceEvent"); } // namespace o2::gpu diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx index 949dd6195b262..ce05e159461e5 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx @@ -18,7 +18,7 @@ #include -static_assert(std::is_convertible::value, "OpenCL event type incompatible to deviceEvent"); +static_assert(std::is_convertible_v, "OpenCL event type incompatible to deviceEvent"); #define GPUErrorReturn(...) \ { \ diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index f1a0816529c3a..73ca449252d1d 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -235,7 +235,7 @@ int64_t GPUTPCGMMerger::GetTrackLabelA(const S& trk) const { GPUTPCGMSectorTrack* sectorTrack = nullptr; int32_t nClusters = 0; - if constexpr (std::is_same::value) { + if constexpr (std::is_same_v) { sectorTrack = &mSectorTrackInfos[trk.TrackID()]; nClusters = sectorTrack->OrigTrack()->NHits(); } else { @@ -244,7 +244,7 @@ int64_t GPUTPCGMMerger::GetTrackLabelA(const S& trk) const auto acc = GPUTPCTrkLbl(resolveMCLabels(GetConstantMem()->ioPtrs.clustersNative ? GetConstantMem()->ioPtrs.clustersNative->clustersMCTruth : nullptr, GetConstantMem()->ioPtrs.mcLabelsTPC), 0.5f); for (int32_t i = 0; i < nClusters; i++) { int32_t id; - if constexpr (std::is_same::value) { + if constexpr (std::is_same_v) { const GPUTPCTracker& tracker = GetConstantMem()->tpcTrackers[sectorTrack->Sector()]; const GPUTPCHitId& ic = tracker.TrackHits()[sectorTrack->OrigTrack()->FirstHitID() + i]; id = tracker.Data().ClusterDataIndex(tracker.Data().Row(ic.RowIndex()), ic.HitIndex()) + GetConstantMem()->ioPtrs.clustersNative->clusterOffset[sectorTrack->Sector()][0]; diff --git a/GPU/GPUTracking/qa/GPUQAHelper.h b/GPU/GPUTracking/qa/GPUQAHelper.h index a7811c6fd55ed..a830562119467 100644 --- a/GPU/GPUTracking/qa/GPUQAHelper.h +++ b/GPU/GPUTracking/qa/GPUQAHelper.h @@ -48,7 +48,7 @@ class GPUTPCTrkLbl } inline void addLabel(uint32_t elementId) { - if constexpr (std::is_same::value) { + if constexpr (std::is_same_v) { for (uint32_t i = 0; i < sizeof(mClusterLabels[elementId]) / sizeof(mClusterLabels[elementId].fClusterID[0]); i++) { const auto& element = mClusterLabels[elementId].fClusterID[i]; if (element.fMCID >= 0) { @@ -101,7 +101,7 @@ class GPUTPCTrkLbl } } auto& bestLabel = mLabels[bestLabelNum].first; - if constexpr (std::is_same::value && WEIGHT) { + if constexpr (std::is_same_v && WEIGHT) { *labelWeight = bestLabel.fWeight; *totalWeight = mTotalWeight; *maxCount = bestLabelCount; @@ -147,7 +147,7 @@ struct GPUTPCTrkLbl_ret { template class S, typename... Args> static inline auto GPUTPCTrkLbl(const S* x, Args... args) { - if constexpr (std::is_same::value) { + if constexpr (std::is_same_v) { return internal::GPUTPCTrkLbl>(x, args...); } else { return internal::GPUTPCTrkLbl, U>(x, args...); @@ -159,7 +159,7 @@ static inline auto GPUTPCTrkLbl(const AliHLTTPCClusterMCLabel* x, Args... args) { using S = AliHLTTPCClusterMCLabel; using T = AliHLTTPCClusterMCWeight; - if constexpr (std::is_same::value) { + if constexpr (std::is_same_v) { return internal::GPUTPCTrkLbl(x, args...); } else { return internal::GPUTPCTrkLbl(x, args...); diff --git a/GPU/GPUTracking/utils/bitfield.h b/GPU/GPUTracking/utils/bitfield.h index 9730f6c6c234f..a3a3ac9a5bd95 100644 --- a/GPU/GPUTracking/utils/bitfield.h +++ b/GPU/GPUTracking/utils/bitfield.h @@ -93,7 +93,7 @@ class bitfield } #if !defined(GPUCA_GPUCODE_DEVICE) - static_assert(std::is_integral::value, "Storage type non integral"); + static_assert(std::is_integral_v, "Storage type non integral"); static_assert(sizeof(S) >= sizeof(T), "Storage type has insufficient capacity"); #endif diff --git a/GPU/GPUTracking/utils/qconfig.cxx b/GPU/GPUTracking/utils/qconfig.cxx index cdb41ec5813f2..839954e52ded3 100644 --- a/GPU/GPUTracking/utils/qconfig.cxx +++ b/GPU/GPUTracking/utils/qconfig.cxx @@ -126,7 +126,7 @@ static inline int32_t qAddOptionMainTupleElem(qConfigSettings settings = settingsTup; return (qAddOptionType(settings, ref, i, argv, argc, def)); } -template ::value> +template > struct qAddOptionMainTupleStruct { static inline int32_t qAddOptionMainTuple(qConfigSettings::settingsType> settings, T& tup, int32_t& i, const char** argv, const int argc) { @@ -157,13 +157,13 @@ struct qConfigType { // Recursive handling of additional settings static inline void qProcessSetting(qConfigSettings& settings, qmin_t minval) { - static_assert(!std::is_same::value, "min option not supported for boolean settings"); + static_assert(!std::is_same_v, "min option not supported for boolean settings"); settings.checkMin = true; settings.min = minval.v; } static inline void qProcessSetting(qConfigSettings& settings, qmax_t maxval) { - static_assert(!std::is_same::value, "max option not supported for boolean settings"); + static_assert(!std::is_same_v, "max option not supported for boolean settings"); settings.checkMax = true; settings.max = maxval.v; } @@ -244,7 +244,7 @@ struct qConfigType { static inline void qConfigHelpOption(const char* name, const char* type, const char* def, const char* optname, char optnameshort, const char* preopt, char preoptshort, int32_t optionType, const char* help, Args&&... args) { auto settings = qConfigGetSettings(args...); - const bool boolType = optionType != 1 && std::is_same::value; + const bool boolType = optionType != 1 && std::is_same_v; const char* arguments = settings.doSet ? " (" : (settings.doDefault || optionType == 1 || boolType) ? " [arg] (" : optionType == 2 ? " [...] (" : " arg ("; char argBuffer[4] = {0}; uint32_t argBufferPos = 0; From a4240d72f1672aa63ab5a320cd983625180df13c Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 14 May 2025 20:43:33 +0200 Subject: [PATCH 5/5] GPU CMake: Use CUDA/HIP compilers to preprocess CUDA/HIP RTC files --- GPU/GPUTracking/Base/cuda/CMakeLists.txt | 2 +- GPU/GPUTracking/Base/hip/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/CMakeLists.txt b/GPU/GPUTracking/Base/cuda/CMakeLists.txt index c31dd0c8d3fe2..d9ee132d7c5f5 100644 --- a/GPU/GPUTracking/Base/cuda/CMakeLists.txt +++ b/GPU/GPUTracking/Base/cuda/CMakeLists.txt @@ -68,7 +68,7 @@ set(GPU_RTC_BIN ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionCUDArtc) add_custom_command( OUTPUT ${GPU_RTC_BIN}.src COMMAND cp ${GPUDIR}/Base/cuda/GPUReconstructionCUDAIncludesSystem.h ${GPU_RTC_BIN}.src - COMMAND ${CMAKE_CXX_COMPILER} ${GPU_RTC_DEFINES} ${GPU_RTC_INCLUDES} -std=c++${CMAKE_CUDA_STANDARD} -D__CUDA_ARCH__=${RTC_CUDA_ARCH} -D__CUDACC__ -x c++ -nostdinc -E -P ${GPU_RTC_SRC} >> ${GPU_RTC_BIN}.src + COMMAND ${CMAKE_CUDA_COMPILER} ${GPU_RTC_DEFINES} ${GPU_RTC_INCLUDES} -std=c++${CMAKE_CUDA_STANDARD} -D__CUDA_ARCH__=${RTC_CUDA_ARCH} -D__CUDACC__ -x c++ -E -Xcompiler "-nostdinc -P" ${GPU_RTC_SRC} >> ${GPU_RTC_BIN}.src MAIN_DEPENDENCY ${GPU_RTC_SRC} IMPLICIT_DEPENDS CXX ${GPU_RTC_SRC} COMMAND_EXPAND_LISTS diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index 6eded3499e46e..a47c659c8717d 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -115,7 +115,7 @@ set(GPU_RTC_BIN ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionHIPrtc) add_custom_command( OUTPUT ${GPU_RTC_BIN}.src COMMAND cp ${GPUDIR}/Base/hip/GPUReconstructionHIPIncludesSystem.h ${GPU_RTC_BIN}.src - COMMAND ${CMAKE_CXX_COMPILER} ${GPU_RTC_DEFINES} ${GPU_RTC_INCLUDES} -std=c++${CMAKE_HIP_STANDARD} -D__HIPCC__ -D__HIP_DEVICE_COMPILE__ -x c++ -nostdinc -E -P ${GPU_RTC_SRC} >> ${GPU_RTC_BIN}.src + COMMAND ${CMAKE_HIP_COMPILER} ${GPU_RTC_DEFINES} ${GPU_RTC_INCLUDES} -std=c++${CMAKE_HIP_STANDARD} -D__HIPCC__ -D__HIP_DEVICE_COMPILE__ -x c++ -nostdinc -E -P ${GPU_RTC_SRC} >> ${GPU_RTC_BIN}.src MAIN_DEPENDENCY ${GPU_RTC_SRC} IMPLICIT_DEPENDS CXX ${GPU_RTC_SRC} DEPENDS ${MODULE}_HIPIFIED