Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion GPU/GPUTracking/Base/GPUReconstructionCPU.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ int32_t GPUReconstructionCPU::InitDevice()
ClearAllocatedMemory();
}
if (GetProcessingSettings().inKernelParallel) {
mBlockCount = mMaxHostThreads;
mMultiprocessorCount = mMaxHostThreads;
}
mProcShadow.mProcessorsProc = processors();
return 0;
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUTracking/Base/GPUReconstructionCPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ class GPUReconstructionCPU : public GPUReconstructionProcessing::KernelInterface
GPUProcessorProcessors mProcShadow; // Host copy of tracker objects that will be used on the GPU
GPUConstantMem*& mProcessorsShadow = mProcShadow.mProcessorsProc;

uint32_t mBlockCount = 1;
uint32_t mMultiprocessorCount = 1;
uint32_t mThreadCount = 1;
uint32_t mWarpSize = 1;

Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUTracking/Base/GPUReconstructionCPUKernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ inline void GPUReconstructionCPU::runKernelInterface(krnlSetup&& setup, Args con
const uint32_t stream = setup.x.stream;
auto prop = getKernelProperties<S, I>();
const int32_t autoThreads = cpuFallback ? 1 : prop.nThreads;
const int32_t autoBlocks = cpuFallback ? 1 : (prop.forceBlocks ? prop.forceBlocks : (prop.minBlocks * mBlockCount));
const int32_t autoBlocks = cpuFallback ? 1 : (prop.forceBlocks ? prop.forceBlocks : (prop.minBlocks * mMultiprocessorCount));
if (nBlocks == (uint32_t)-1) {
nBlocks = (nThreads + autoThreads - 1) / autoThreads;
nThreads = autoThreads;
Expand Down
10 changes: 5 additions & 5 deletions GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu
Original file line number Diff line number Diff line change
Expand Up @@ -247,8 +247,8 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime()
throw std::runtime_error("Invalid warp size on GPU");
}
mWarpSize = deviceProp.warpSize;
mBlockCount = deviceProp.multiProcessorCount;
mMaxBackendThreads = std::max<int32_t>(mMaxBackendThreads, deviceProp.maxThreadsPerBlock * mBlockCount);
mMultiprocessorCount = deviceProp.multiProcessorCount;
mMaxBackendThreads = std::max<int32_t>(mMaxBackendThreads, deviceProp.maxThreadsPerBlock * mMultiprocessorCount);
mDeviceName = deviceProp.name;
mDeviceName += " (CUDA GPU)";

Expand Down Expand Up @@ -329,9 +329,9 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime()
}

#ifndef __HIPCC__ // CUDA
dummyInitKernel<<<mBlockCount, 256>>>(mDeviceMemoryBase);
dummyInitKernel<<<mMultiprocessorCount, 256>>>(mDeviceMemoryBase); // TODO: Can't we just use the CUDA version and hipify will take care of the rest?
#else // HIP
hipLaunchKernelGGL(HIP_KERNEL_NAME(dummyInitKernel), dim3(mBlockCount), dim3(256), 0, 0, mDeviceMemoryBase);
hipLaunchKernelGGL(HIP_KERNEL_NAME(dummyInitKernel), dim3(mMultiprocessorCount), dim3(256), 0, 0, mDeviceMemoryBase);
#endif

if (GetProcessingSettings().rtc.enable) {
Expand Down Expand Up @@ -373,7 +373,7 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime()
} else {
GPUReconstructionCUDA* master = dynamic_cast<GPUReconstructionCUDA*>(mMaster);
mDeviceId = master->mDeviceId;
mBlockCount = master->mBlockCount;
mMultiprocessorCount = master->mMultiprocessorCount;
mWarpSize = master->mWarpSize;
mMaxBackendThreads = master->mMaxBackendThreads;
mDeviceName = master->mDeviceName;
Expand Down
6 changes: 3 additions & 3 deletions GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -266,9 +266,9 @@ int32_t GPUReconstructionOCL::InitDevice_Runtime()

mDeviceName = device_name.c_str();
mDeviceName += " (OpenCL)";
mBlockCount = device_shaders;
mMultiprocessorCount = device_shaders;
mWarpSize = 32;
mMaxBackendThreads = std::max<int32_t>(mMaxBackendThreads, deviceMaxWorkGroup * mBlockCount);
mMaxBackendThreads = std::max<int32_t>(mMaxBackendThreads, deviceMaxWorkGroup * mMultiprocessorCount);

mInternals->context = clCreateContext(nullptr, 1, &mInternals->device, nullptr, nullptr, &ocl_error);
if (GPUChkErrI(ocl_error)) {
Expand Down Expand Up @@ -378,7 +378,7 @@ int32_t GPUReconstructionOCL::InitDevice_Runtime()
GPUInfo("OPENCL Initialisation successfull (%d: %s %s (Frequency %d, Shaders %d), %ld / %ld bytes host / global memory, Stack frame %d, Constant memory %ld)", bestDevice, device_vendor, device_name, (int32_t)device_freq, (int32_t)device_shaders, (int64_t)mDeviceMemorySize, (int64_t)mHostMemorySize, -1, (int64_t)gGPUConstantMemBufferSize);
} else {
GPUReconstructionOCL* master = dynamic_cast<GPUReconstructionOCL*>(mMaster);
mBlockCount = master->mBlockCount;
mMultiprocessorCount = master->mMultiprocessorCount;
mWarpSize = master->mWarpSize;
mMaxBackendThreads = master->mMaxBackendThreads;
mDeviceName = master->mDeviceName;
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUTracking/Global/GPUChain.h
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ class GPUChain
krnlExec GetGridAuto(int32_t stream, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st = GPUDataTypes::RecoStep::NoRecoStep);
krnlExec GetGridAutoStep(int32_t stream, GPUDataTypes::RecoStep st = GPUDataTypes::RecoStep::NoRecoStep);

inline uint32_t BlockCount() const { return mRec->mBlockCount; }
inline uint32_t BlockCount() const { return mRec->mMultiprocessorCount; }
inline uint32_t WarpSize() const { return mRec->mWarpSize; }
inline uint32_t ThreadCount() const { return mRec->mThreadCount; }

Expand Down