From e92bfdd54829446d7e1d0d0b9bc3130a80f74c74 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 22 May 2025 10:16:48 +0200 Subject: [PATCH] GPU: Fix CUDA GetMemInfo must use correct device and simplify context creation / cleanup --- .../Base/cuda/GPUReconstructionCUDA.cu | 35 +++++++------------ 1 file changed, 12 insertions(+), 23 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index c8e5420a8bcf3..740de8c541c7e 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -125,34 +125,25 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() } std::vector devicesOK(count, false); std::vector devMemory(count, 0); - bool contextCreated = false; + std::vector contextCreated(count, false); for (int32_t i = 0; i < count; i++) { if (GetProcessingSettings().debugLevel >= 4) { GPUInfo("Examining device %d", i); } size_t free, total; -#ifndef __HIPCC__ // CUDA - if (GPUChkErrI(cudaInitDevice(i, 0, 0))) { -#else // HIP - if (GPUChkErrI(hipSetDevice(i))) { -#endif + if (GPUChkErrI(cudaSetDevice(i))) { if (GetProcessingSettings().debugLevel >= 4) { GPUWarning("Couldn't create context for device %d. Skipping it.", i); } continue; } - contextCreated = true; + contextCreated[i] = true; if (GPUChkErrI(cudaMemGetInfo(&free, &total))) { if (GetProcessingSettings().debugLevel >= 4) { GPUWarning("Error obtaining CUDA memory info about device %d! Skipping it.", i); } - GPUChkErr(cudaDeviceReset()); continue; } - if (count > 1) { - GPUChkErr(cudaDeviceReset()); - contextCreated = false; - } if (GetProcessingSettings().debugLevel >= 4) { GPUInfo("Obtained current memory usage for device %d", i); } @@ -212,13 +203,20 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() bestDevice = GetProcessingSettings().deviceNum; } } - if (noDevice) { - if (contextCreated) { + for (int32_t i = 0; i < count; i++) { + if (contextCreated[i] && (noDevice || i != bestDevice)) { + GPUChkErrI(cudaSetDevice(i)); GPUChkErrI(cudaDeviceReset()); } + } + if (noDevice) { return (1); } mDeviceId = bestDevice; + if (GPUChkErrI(cudaSetDevice(mDeviceId))) { + GPUError("Could not set CUDA Device!"); + return (1); + } GPUChkErrI(cudaGetDeviceProperties(&deviceProp, mDeviceId)); @@ -262,15 +260,6 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() } #endif -#ifndef __HIPCC__ // CUDA - if (contextCreated == 0 && GPUChkErrI(cudaInitDevice(mDeviceId, 0, 0))) { -#else // HIP - if (contextCreated == 0 && GPUChkErrI(hipSetDevice(mDeviceId))) { -#endif - GPUError("Could not set CUDA Device!"); - return (1); - } - #ifndef __HIPCC__ // CUDA if (GPUChkErrI(cudaDeviceSetLimit(cudaLimitStackSize, GPUCA_GPU_STACK_SIZE))) { GPUError("Error setting CUDA stack size");