diff --git a/GPU/Common/GPUCommonDef.h b/GPU/Common/GPUCommonDef.h index d7e99f53d4ce8..d9a5bdf92b6ac 100644 --- a/GPU/Common/GPUCommonDef.h +++ b/GPU/Common/GPUCommonDef.h @@ -72,6 +72,10 @@ #define GPUCA_RTC_SPECIAL_CODE(...) #endif +#ifndef GPUCA_RTC_CONSTEXPR + #define GPUCA_RTC_CONSTEXPR +#endif + #ifndef GPUCA_DETERMINISTIC_CODE #ifdef GPUCA_DETERMINISTIC_MODE #define GPUCA_DETERMINISTIC_CODE(det, indet) det // In deterministic mode, take deterministic code path diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx index 5706f32e73e96..67ad608c13417 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx @@ -38,6 +38,11 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) { std::string rtcparam = std::string("#define GPUCA_RTC_CODE\n") + std::string(GetProcessingSettings().rtc.optSpecialCode ? "#define GPUCA_RTC_SPECIAL_CODE(...) __VA_ARGS__\n" : "#define GPUCA_RTC_SPECIAL_CODE(...)\n") + +#ifndef GPUCA_HIP_WORKAROUND_CONSTEXPR // TODO: Fixme, once we have C++ P2280R4 in Clang + std::string(GetProcessingSettings().rtc.optConstexpr ? "#define GPUCA_RTC_CONSTEXPR constexpr\n" : "#define GPUCA_RTC_CONSTEXPR\n") + +#else + std::string("#define GPUCA_RTC_CONSTEXPR\n") + +#endif GPUParamRTC::generateRTCCode(param(), GetProcessingSettings().rtc.optConstexpr); if (filename == "") { filename = "/tmp/o2cagpu_rtc_"; diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu index 805397c9b430e..66c02d6ed251c 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu @@ -18,6 +18,7 @@ // Keep some preprocessor calls unprocessed #define GPUCA_RTC_SPECIAL_CODE(...) GPUCA_RTC_SPECIAL_CODE(__VA_ARGS__) #define GPUCA_DETERMINISTIC_CODE(...) GPUCA_DETERMINISTIC_CODE(__VA_ARGS__) +#define GPUCA_RTC_CONSTEXPR GPUCA_RTC_CONSTEXPR // GPUReconstructionCUDAIncludesSystem.h prependended by CMakewithout preprocessor running #include "GPUReconstructionCUDADef.h" diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index 6eded3499e46e..c89ef1769ad81 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -270,3 +270,8 @@ add_dependencies(GPUTrackingHIPExternalProvider O2::GPUTracking) # must not depe if(NOT DEFINED GPUCA_HIP_HIPIFY_FROM_CUDA OR "${GPUCA_HIP_HIPIFY_FROM_CUDA}") add_dependencies(GPUTrackingHIPExternalProvider ${MODULE}_HIPIFIED) endif() + +set_source_files_properties("${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPGenRTC.cxx" +TARGET_DIRECTORY O2::GPUTrackingHIP +PROPERTIES +COMPILE_DEFINITIONS "GPUCA_HIP_WORKAROUND_CONSTEXPR") diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index 2e26622d05291..52848692e7516 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -16,11 +16,12 @@ set(MODULE GPUTracking) if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH}) set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CXX_NO_FAST_MATH_FLAGS}") +elseif(NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG") if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_OPTO2}) set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O2") + else() + set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O3 -ffast-math") endif() -elseif(NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG") - set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O3 -ffast-math") endif() set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CXX_DENORMALS_FLAGS}") diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 6e7de7ee48ca6..d2aba503be6a6 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -301,7 +301,9 @@ void* GPUTPCGMMerger::SetPointersOutput(void* mem) computePointerWithAlignment(mem, mOutputTracks, mNMaxTracks); if (mRec->GetParam().dodEdxEnabled) { computePointerWithAlignment(mem, mOutputTracksdEdx, mNMaxTracks); - computePointerWithAlignment(mem, mOutputTracksdEdxAlt, mNMaxTracks); + if (mRec->GetParam().rec.tpc.dEdxClusterRejectionFlagMask != mRec->GetParam().rec.tpc.dEdxClusterRejectionFlagMaskAlt) { + computePointerWithAlignment(mem, mOutputTracksdEdxAlt, mNMaxTracks); + } } computePointerWithAlignment(mem, mClusters, mNMaxOutputTrackClusters); if (mRec->GetParam().par.earlyTpcTransform) { diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index 9dc6ddc59c2b4..9ead17ea5c7c0 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -106,6 +106,7 @@ GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks const uint32_t flagsRequired = getFlagsRequired(merger.Param().rec); TrackTPC* outputTracks = merger.OutputTracksTPCO2(); uint32_t* clusRefs = merger.OutputClusRefsTPCO2(); + const auto& param = merger.Param(); GPUTPCGMMerger::tmpSort* GPUrestrict() trackSort = merger.TrackSortO2(); uint2* GPUrestrict() tmpData = merger.ClusRefTmp(); @@ -130,9 +131,15 @@ GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks oTrack.setChi2(tracks[i].GetParam().GetChi2()); auto& outerPar = tracks[i].OuterParam(); - if (merger.Param().par.dodEdx && merger.Param().dodEdxEnabled) { - oTrack.setdEdx(tracksdEdx[i]); - oTrack.setdEdxAlt(tracksdEdxAlt[i]); + if GPUCA_RTC_CONSTEXPR (param.par.dodEdx) { + if (param.dodEdxEnabled) { + oTrack.setdEdx(tracksdEdx[i]); + if GPUCA_RTC_CONSTEXPR (param.rec.tpc.dEdxClusterRejectionFlagMask != param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) { + oTrack.setdEdxAlt(tracksdEdxAlt[i]); + } else { + oTrack.setdEdxAlt(tracksdEdx[i]); + } + } } auto snpOut = outerPar.P[2]; @@ -148,9 +155,9 @@ GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks outerPar.C[6], outerPar.C[7], outerPar.C[8], outerPar.C[9], outerPar.C[10], outerPar.C[11], outerPar.C[12], outerPar.C[13], outerPar.C[14]})); - if (merger.Param().par.dodEdx && merger.Param().dodEdxEnabled && merger.Param().rec.tpc.enablePID) { + if (param.par.dodEdx && param.dodEdxEnabled && param.rec.tpc.enablePID) { PIDResponse pidResponse{}; - auto pid = pidResponse.getMostProbablePID(oTrack, merger.Param().rec.tpc.PID_EKrangeMin, merger.Param().rec.tpc.PID_EKrangeMax, merger.Param().rec.tpc.PID_EPrangeMin, merger.Param().rec.tpc.PID_EPrangeMax, merger.Param().rec.tpc.PID_EDrangeMin, merger.Param().rec.tpc.PID_EDrangeMax, merger.Param().rec.tpc.PID_ETrangeMin, merger.Param().rec.tpc.PID_ETrangeMax, merger.Param().rec.tpc.PID_useNsigma, merger.Param().rec.tpc.PID_sigma); + auto pid = pidResponse.getMostProbablePID(oTrack, param.rec.tpc.PID_EKrangeMin, param.rec.tpc.PID_EKrangeMax, param.rec.tpc.PID_EPrangeMin, param.rec.tpc.PID_EPrangeMax, param.rec.tpc.PID_EDrangeMin, param.rec.tpc.PID_EDrangeMax, param.rec.tpc.PID_ETrangeMin, merger.Param().rec.tpc.PID_ETrangeMax, merger.Param().rec.tpc.PID_useNsigma, merger.Param().rec.tpc.PID_sigma); auto pidRemap = merger.Param().rec.tpc.PID_remap[pid]; if (pidRemap >= 0) { pid = pidRemap; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index f5bfbe985fb8c..0d8547263207b 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -216,11 +216,15 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ continue; } } else if (allowModification && lastRow != 255 && CAMath::Abs(cluster.row - lastRow) > 1) { - bool dodEdx = param.par.dodEdx && param.dodEdxEnabled && param.rec.tpc.adddEdxSubThresholdClusters && iWay == nWays - 1 && CAMath::Abs(cluster.row - lastRow) == 2 && cluster.leg == clusters[maxN - 1].leg; - dodEdx = AttachClustersPropagate(merger, cluster.sector, lastRow, cluster.row, iTrk, cluster.leg == clusters[maxN - 1].leg, prop, inFlyDirection, GPUCA_MAX_SIN_PHI, dodEdx); - if (dodEdx) { - dEdx.fillSubThreshold(lastRow - wayDirection); - dEdxAlt.fillSubThreshold(lastRow - wayDirection); + if GPUCA_RTC_CONSTEXPR (param.par.dodEdx) { + bool dodEdx = param.dodEdxEnabled && param.rec.tpc.adddEdxSubThresholdClusters && iWay == nWays - 1 && CAMath::Abs(cluster.row - lastRow) == 2 && cluster.leg == clusters[maxN - 1].leg; + dodEdx = AttachClustersPropagate(merger, cluster.sector, lastRow, cluster.row, iTrk, cluster.leg == clusters[maxN - 1].leg, prop, inFlyDirection, GPUCA_MAX_SIN_PHI, dodEdx); + if (dodEdx) { + dEdx.fillSubThreshold(lastRow - wayDirection); + if GPUCA_RTC_CONSTEXPR (param.rec.tpc.dEdxClusterRejectionFlagMask != param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) { + dEdxAlt.fillSubThreshold(lastRow - wayDirection); + } + } } } @@ -367,31 +371,35 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ CADEBUG(printf("Reinit linearization\n")); prop.SetTrack(this, prop.GetAlpha()); } - if (param.par.dodEdx && param.dodEdxEnabled && iWay == nWays - 1 && cluster.leg == clusters[maxN - 1].leg) { // TODO: Costimize flag to remove, and option to remove double-clusters - bool acc = (clusterState & param.rec.tpc.dEdxClusterRejectionFlagMask) == 0, accAlt = (clusterState & param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) == 0; - if (acc || accAlt) { - float qtot = 0, qmax = 0, pad = 0, relTime = 0; - const int32_t clusterCount = (ihit - ihitMergeFirst) * wayDirection + 1; - for (int32_t iTmp = ihitMergeFirst; iTmp != ihit + wayDirection; iTmp += wayDirection) { - if (merger->GetConstantMem()->ioPtrs.clustersNative == nullptr) { - qtot += clustersXYZ[ihit].amp; - } else { - const ClusterNative& cl = merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[cluster.num]; - qtot += cl.qTot; - qmax = CAMath::Max(qmax, cl.qMax); - pad += cl.getPad(); - relTime += cl.getTime(); + if GPUCA_RTC_CONSTEXPR (param.par.dodEdx) { + if (param.dodEdxEnabled && iWay == nWays - 1 && cluster.leg == clusters[maxN - 1].leg) { // TODO: Costimize flag to remove, and option to remove double-clusters + bool acc = (clusterState & param.rec.tpc.dEdxClusterRejectionFlagMask) == 0, accAlt = (clusterState & param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) == 0; + if (acc || accAlt) { + float qtot = 0, qmax = 0, pad = 0, relTime = 0; + const int32_t clusterCount = (ihit - ihitMergeFirst) * wayDirection + 1; + for (int32_t iTmp = ihitMergeFirst; iTmp != ihit + wayDirection; iTmp += wayDirection) { + if (merger->GetConstantMem()->ioPtrs.clustersNative == nullptr) { + qtot += clustersXYZ[ihit].amp; + } else { + const ClusterNative& cl = merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[cluster.num]; + qtot += cl.qTot; + qmax = CAMath::Max(qmax, cl.qMax); + pad += cl.getPad(); + relTime += cl.getTime(); + } + } + qtot /= clusterCount; // TODO: Weighted Average + pad /= clusterCount; + relTime /= clusterCount; + relTime = relTime - CAMath::Round(relTime); + if (acc) { + dEdx.fillCluster(qtot, qmax, cluster.row, cluster.sector, mP[2], mP[3], merger->GetConstantMem()->calibObjects, zz, pad, relTime); + } + if GPUCA_RTC_CONSTEXPR (param.rec.tpc.dEdxClusterRejectionFlagMask != param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) { + if (accAlt) { + dEdxAlt.fillCluster(qtot, qmax, cluster.row, cluster.sector, mP[2], mP[3], merger->GetConstantMem()->calibObjects, zz, pad, relTime); + } } - } - qtot /= clusterCount; // TODO: Weighted Average - pad /= clusterCount; - relTime /= clusterCount; - relTime = relTime - CAMath::Round(relTime); - if (acc) { - dEdx.fillCluster(qtot, qmax, cluster.row, cluster.sector, mP[2], mP[3], merger->GetConstantMem()->calibObjects, zz, pad, relTime); - } - if (accAlt) { - dEdxAlt.fillCluster(qtot, qmax, cluster.row, cluster.sector, mP[2], mP[3], merger->GetConstantMem()->calibObjects, zz, pad, relTime); } } } @@ -428,7 +436,9 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ if (param.par.dodEdx && param.dodEdxEnabled) { dEdx.computedEdx(merger->OutputTracksdEdx()[iTrk], param); - dEdxAlt.computedEdx(merger->OutputTracksdEdxAlt()[iTrk], param); + if GPUCA_RTC_CONSTEXPR (param.rec.tpc.dEdxClusterRejectionFlagMask != param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) { + dEdxAlt.computedEdx(merger->OutputTracksdEdxAlt()[iTrk], param); + } } Alpha = prop.GetAlpha(); MoveToReference(prop, param, Alpha);