From 3499eb3fbf44630f49a00af2fd2aa77c273d513d Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 7 Mar 2025 17:21:54 +0100 Subject: [PATCH 1/2] GPU: Bump required LLVM version for OpenCL --- dependencies/FindO2GPU.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dependencies/FindO2GPU.cmake b/dependencies/FindO2GPU.cmake index c5d53d6359ada..57c820fbe86b1 100644 --- a/dependencies/FindO2GPU.cmake +++ b/dependencies/FindO2GPU.cmake @@ -175,7 +175,7 @@ if(ENABLE_OPENCL) if(Clang_FOUND AND LLVM_FOUND AND NOT LLVM_CLANG STREQUAL "LLVM_CLANG-NOTFOUND" - AND LLVM_PACKAGE_VERSION VERSION_GREATER_EQUAL 13.0) + AND LLVM_PACKAGE_VERSION VERSION_GREATER_EQUAL 18.0) set(OPENCL_COMPATIBLE_CLANG_FOUND ON) endif() if(OpenCL_VERSION_STRING VERSION_GREATER_EQUAL 2.2 From 56423c7a3969536394567c6aa812dbb0df24cdee Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 7 Mar 2025 17:22:01 +0100 Subject: [PATCH 2/2] GPU: Simplify and cleanup code --- .../Global/GPUChainTrackingSectorTracker.cxx | 38 ++++++------------- 1 file changed, 11 insertions(+), 27 deletions(-) diff --git a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx index df7c513fc1120..dd7fe285265ad 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx @@ -93,6 +93,8 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() } bool streamInit[GPUCA_MAX_STREAMS] = {false}; + int32_t streamInitAndOccMap = mRec->NStreams() - 1; + if (doGPU) { for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { processorsShadow()->tpcTrackers[iSector].GPUParametersConst()->gpumem = (char*)mRec->DeviceMemoryBase(); @@ -113,18 +115,12 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() return 2; } - WriteToConstantMemory(RecoStep::TPCSectorTracking, (char*)processors()->tpcTrackers - (char*)processors(), processorsShadow()->tpcTrackers, sizeof(GPUTPCTracker) * NSECTORS, mRec->NStreams() - 1, &mEvents->init); + WriteToConstantMemory(RecoStep::TPCSectorTracking, (char*)processors()->tpcTrackers - (char*)processors(), processorsShadow()->tpcTrackers, sizeof(GPUTPCTracker) * NSECTORS, streamInitAndOccMap, &mEvents->init); - for (int32_t i = 0; i < mRec->NStreams() - 1; i++) { - streamInit[i] = false; - } - streamInit[mRec->NStreams() - 1] = true; - } - if (GPUDebug("Initialization (1)", 0)) { - return (2); + std::fill(streamInit, streamInit + mRec->NStreams(), false); + streamInit[streamInitAndOccMap] = true; } - int32_t streamOccMap = mRec->NStreams() - 1; if (param().rec.tpc.occupancyMapTimeBins || param().rec.tpc.sysClusErrorC12Norm) { AllocateRegisteredMemory(mInputsHost->mResourceOccupancyMap, mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::tpcOccupancyMap)]); } @@ -134,21 +130,21 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() } uint32_t* ptr = doGPU ? mInputsShadow->mTPCClusterOccupancyMap : mInputsHost->mTPCClusterOccupancyMap; auto* ptrTmp = (GPUTPCClusterOccupancyMapBin*)mRec->AllocateVolatileMemory(GPUTPCClusterOccupancyMapBin::getTotalSize(param()), doGPU); - runKernel(GetGridAutoStep(streamOccMap, RecoStep::TPCSectorTracking), ptrTmp, GPUTPCClusterOccupancyMapBin::getTotalSize(param())); - runKernel(GetGridBlk(GPUCA_NSECTORS * GPUCA_ROW_COUNT, streamOccMap), ptrTmp); - runKernel(GetGridBlk(GPUTPCClusterOccupancyMapBin::getNBins(param()), streamOccMap), ptrTmp, ptr + 2); + runKernel(GetGridAutoStep(streamInitAndOccMap, RecoStep::TPCSectorTracking), ptrTmp, GPUTPCClusterOccupancyMapBin::getTotalSize(param())); + runKernel(GetGridBlk(GPUCA_NSECTORS * GPUCA_ROW_COUNT, streamInitAndOccMap), ptrTmp); + runKernel(GetGridBlk(GPUTPCClusterOccupancyMapBin::getNBins(param()), streamInitAndOccMap), ptrTmp, ptr + 2); mRec->ReturnVolatileMemory(); mInputsHost->mTPCClusterOccupancyMap[1] = param().rec.tpc.occupancyMapTimeBins * 0x10000 + param().rec.tpc.occupancyMapTimeBinsAverage; if (doGPU) { - GPUMemCpy(RecoStep::TPCSectorTracking, mInputsHost->mTPCClusterOccupancyMap + 2, mInputsShadow->mTPCClusterOccupancyMap + 2, sizeof(*ptr) * GPUTPCClusterOccupancyMapBin::getNBins(mRec->GetParam()), streamOccMap, false, &mEvents->init); + GPUMemCpy(RecoStep::TPCSectorTracking, mInputsHost->mTPCClusterOccupancyMap + 2, mInputsShadow->mTPCClusterOccupancyMap + 2, sizeof(*ptr) * GPUTPCClusterOccupancyMapBin::getNBins(mRec->GetParam()), streamInitAndOccMap, false, &mEvents->init); } else { - TransferMemoryResourceLinkToGPU(RecoStep::TPCSectorTracking, mInputsHost->mResourceOccupancyMap, streamOccMap, &mEvents->init); + TransferMemoryResourceLinkToGPU(RecoStep::TPCSectorTracking, mInputsHost->mResourceOccupancyMap, streamInitAndOccMap, &mEvents->init); } } if (param().rec.tpc.occupancyMapTimeBins || param().rec.tpc.sysClusErrorC12Norm) { uint32_t& occupancyTotal = *mInputsHost->mTPCClusterOccupancyMap; occupancyTotal = CAMath::Float2UIntRn(mRec->MemoryScalers()->nTPCHits / (mIOPtrs.settingsTF && mIOPtrs.settingsTF->hasNHBFPerTF ? mIOPtrs.settingsTF->nHBFPerTF : 128)); - mRec->UpdateParamOccupancyMap(param().rec.tpc.occupancyMapTimeBins ? mInputsHost->mTPCClusterOccupancyMap + 2 : nullptr, param().rec.tpc.occupancyMapTimeBins ? mInputsShadow->mTPCClusterOccupancyMap + 2 : nullptr, occupancyTotal, streamOccMap); + mRec->UpdateParamOccupancyMap(param().rec.tpc.occupancyMapTimeBins ? mInputsHost->mTPCClusterOccupancyMap + 2 : nullptr, param().rec.tpc.occupancyMapTimeBins ? mInputsShadow->mTPCClusterOccupancyMap + 2 : nullptr, occupancyTotal, streamInitAndOccMap); } int32_t streamMap[NSECTORS]; @@ -190,19 +186,7 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() } } - // Initialize temporary memory where needed - if (GetProcessingSettings().debugLevel >= 3) { - GPUInfo("Copying Sector Data to GPU and initializing temporary memory"); - } runKernel(GetGridAutoStep(useStream, RecoStep::TPCSectorTracking), trkShadow.Data().HitWeights(), trkShadow.Data().NumberOfHitsPlusAlign() * sizeof(*trkShadow.Data().HitWeights())); - - if (!doGPU) { - TransferMemoryResourcesToGPU(RecoStep::TPCSectorTracking, &trk, useStream); // Copy Data to GPU Global Memory - } - if (GPUDebug("Initialization (3)", useStream)) { - throw std::runtime_error("memcpy failure"); - } - runKernel({GetGridBlk(GPUCA_ROW_COUNT, useStream), {iSector}, {nullptr, streamInit[useStream] ? nullptr : &mEvents->init}}); streamInit[useStream] = true;