AliceO2Group · davidrohr · Aug 27, 2025 · Aug 26, 2025
@@ -194,7 +194,7 @@ int32_t GPUReconstructionCPU::InitDevice()
     ClearAllocatedMemory();
   }
   if (GetProcessingSettings().inKernelParallel) {
-    mBlockCount = mMaxHostThreads;
+    mMultiprocessorCount = mMaxHostThreads;
   }
   mProcShadow.mProcessorsProc = processors();
   return 0;

@@ -94,7 +94,7 @@ class GPUReconstructionCPU : public GPUReconstructionProcessing::KernelInterface
   GPUProcessorProcessors mProcShadow; // Host copy of tracker objects that will be used on the GPU
   GPUConstantMem*& mProcessorsShadow = mProcShadow.mProcessorsProc;
 
-  uint32_t mBlockCount = 1;
+  uint32_t mMultiprocessorCount = 1;
   uint32_t mThreadCount = 1;
   uint32_t mWarpSize = 1;
 

@@ -36,7 +36,7 @@ inline void GPUReconstructionCPU::runKernelInterface(krnlSetup&& setup, Args con
   const uint32_t stream = setup.x.stream;
   auto prop = getKernelProperties<S, I>();
   const int32_t autoThreads = cpuFallback ? 1 : prop.nThreads;
-  const int32_t autoBlocks = cpuFallback ? 1 : (prop.forceBlocks ? prop.forceBlocks : (prop.minBlocks * mBlockCount));
+  const int32_t autoBlocks = cpuFallback ? 1 : (prop.forceBlocks ? prop.forceBlocks : (prop.minBlocks * mMultiprocessorCount));
   if (nBlocks == (uint32_t)-1) {
     nBlocks = (nThreads + autoThreads - 1) / autoThreads;
     nThreads = autoThreads;

@@ -247,8 +247,8 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime()
       throw std::runtime_error("Invalid warp size on GPU");
     }
     mWarpSize = deviceProp.warpSize;
-    mBlockCount = deviceProp.multiProcessorCount;
-    mMaxBackendThreads = std::max<int32_t>(mMaxBackendThreads, deviceProp.maxThreadsPerBlock * mBlockCount);
+    mMultiprocessorCount = deviceProp.multiProcessorCount;
+    mMaxBackendThreads = std::max<int32_t>(mMaxBackendThreads, deviceProp.maxThreadsPerBlock * mMultiprocessorCount);
     mDeviceName = deviceProp.name;
     mDeviceName += " (CUDA GPU)";
 
@@ -329,9 +329,9 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime()
     }
 
 #ifndef __HIPCC__ // CUDA
-    dummyInitKernel<<<mBlockCount, 256>>>(mDeviceMemoryBase);
+    dummyInitKernel<<<mMultiprocessorCount, 256>>>(mDeviceMemoryBase); // TODO: Can't we just use the CUDA version and hipify will take care of the rest?
 #else // HIP
-    hipLaunchKernelGGL(HIP_KERNEL_NAME(dummyInitKernel), dim3(mBlockCount), dim3(256), 0, 0, mDeviceMemoryBase);
+    hipLaunchKernelGGL(HIP_KERNEL_NAME(dummyInitKernel), dim3(mMultiprocessorCount), dim3(256), 0, 0, mDeviceMemoryBase);
 #endif
 
     if (GetProcessingSettings().rtc.enable) {
@@ -373,7 +373,7 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime()
   } else {
     GPUReconstructionCUDA* master = dynamic_cast<GPUReconstructionCUDA*>(mMaster);
     mDeviceId = master->mDeviceId;
-    mBlockCount = master->mBlockCount;
+    mMultiprocessorCount = master->mMultiprocessorCount;
     mWarpSize = master->mWarpSize;
     mMaxBackendThreads = master->mMaxBackendThreads;
     mDeviceName = master->mDeviceName;

@@ -266,9 +266,9 @@ int32_t GPUReconstructionOCL::InitDevice_Runtime()
 
     mDeviceName = device_name.c_str();
     mDeviceName += " (OpenCL)";
-    mBlockCount = device_shaders;
+    mMultiprocessorCount = device_shaders;
     mWarpSize = 32;
-    mMaxBackendThreads = std::max<int32_t>(mMaxBackendThreads, deviceMaxWorkGroup * mBlockCount);
+    mMaxBackendThreads = std::max<int32_t>(mMaxBackendThreads, deviceMaxWorkGroup * mMultiprocessorCount);
 
     mInternals->context = clCreateContext(nullptr, 1, &mInternals->device, nullptr, nullptr, &ocl_error);
     if (GPUChkErrI(ocl_error)) {
@@ -378,7 +378,7 @@ int32_t GPUReconstructionOCL::InitDevice_Runtime()
     GPUInfo("OPENCL Initialisation successfull (%d: %s %s (Frequency %d, Shaders %d), %ld / %ld bytes host / global memory, Stack frame %d, Constant memory %ld)", bestDevice, device_vendor, device_name, (int32_t)device_freq, (int32_t)device_shaders, (int64_t)mDeviceMemorySize, (int64_t)mHostMemorySize, -1, (int64_t)gGPUConstantMemBufferSize);
   } else {
     GPUReconstructionOCL* master = dynamic_cast<GPUReconstructionOCL*>(mMaster);
-    mBlockCount = master->mBlockCount;
+    mMultiprocessorCount = master->mMultiprocessorCount;
     mWarpSize = master->mWarpSize;
     mMaxBackendThreads = master->mMaxBackendThreads;
     mDeviceName = master->mDeviceName;

@@ -210,7 +210,7 @@ class GPUChain
   krnlExec GetGridAuto(int32_t stream, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st = GPUDataTypes::RecoStep::NoRecoStep);
   krnlExec GetGridAutoStep(int32_t stream, GPUDataTypes::RecoStep st = GPUDataTypes::RecoStep::NoRecoStep);
 
-  inline uint32_t BlockCount() const { return mRec->mBlockCount; }
+  inline uint32_t BlockCount() const { return mRec->mMultiprocessorCount; }
   inline uint32_t WarpSize() const { return mRec->mWarpSize; }
   inline uint32_t ThreadCount() const { return mRec->mThreadCount; }