diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx index 944fcb32e4eda..1365429245fdc 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx @@ -60,6 +60,21 @@ GPUReconstructionCPU::~GPUReconstructionCPU() Exit(); // Needs to be identical to GPU backend bahavior in order to avoid calling abstract methods later in the destructor } +int32_t GPUReconstructionCPUBackend::getNOMPThreads() +{ + int32_t ompThreads = 0; + if (mProcessingSettings.ompKernels == 2) { + ompThreads = mProcessingSettings.ompThreads / mNestedLoopOmpFactor; + if ((uint32_t)getOMPThreadNum() < mProcessingSettings.ompThreads % mNestedLoopOmpFactor) { + ompThreads++; + } + ompThreads = std::max(1, ompThreads); + } else { + ompThreads = mProcessingSettings.ompKernels ? mProcessingSettings.ompThreads : 1; + } + return ompThreads; +} + template inline int32_t GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args) { @@ -73,16 +88,7 @@ inline int32_t GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlS } uint32_t num = y.num == 0 || y.num == -1 ? 1 : y.num; for (uint32_t k = 0; k < num; k++) { - int32_t ompThreads = 0; - if (mProcessingSettings.ompKernels == 2) { - ompThreads = mProcessingSettings.ompThreads / mNestedLoopOmpFactor; - if ((uint32_t)getOMPThreadNum() < mProcessingSettings.ompThreads % mNestedLoopOmpFactor) { - ompThreads++; - } - ompThreads = std::max(1, ompThreads); - } else { - ompThreads = mProcessingSettings.ompKernels ? mProcessingSettings.ompThreads : 1; - } + int32_t ompThreads = getNOMPThreads(); if (ompThreads > 1) { if (mProcessingSettings.debugLevel >= 5) { printf("Running %d ompThreads\n", ompThreads); @@ -105,7 +111,12 @@ inline int32_t GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlS template <> inline int32_t GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) { - memset(ptr, 0, size); + int32_t ompThreads = std::max(1, std::min(size / (16 * 1024 * 1024), getNOMPThreads())); + if (ompThreads > 1) { + memset(ptr, 0, size); + } else { + memset(ptr, 0, size); + } return 0; } diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.h b/GPU/GPUTracking/Base/GPUReconstructionCPU.h index 27959382e7b67..7903be44907df 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.h +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.h @@ -46,6 +46,7 @@ class GPUReconstructionCPUBackend : public GPUReconstruction uint32_t mNestedLoopOmpFactor = 1; static int32_t getOMPThreadNum(); static int32_t getOMPMaxThreads(); + int32_t getNOMPThreads(); }; class GPUReconstructionCPU : public GPUReconstructionKernels diff --git a/GPU/GPUTracking/Base/cuda/CMakeLists.txt b/GPU/GPUTracking/Base/cuda/CMakeLists.txt index bd6b3b6e51928..e4e336130afa0 100644 --- a/GPU/GPUTracking/Base/cuda/CMakeLists.txt +++ b/GPU/GPUTracking/Base/cuda/CMakeLists.txt @@ -160,7 +160,7 @@ elseif(GPUCA_CUDA_COMPILE_MODE STREQUAL "perkernel") add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingCUDAKernelModules.o COMMAND cp -u $ ${CMAKE_CURRENT_BINARY_DIR}/cuda_kernel_module_fatbin/ - COMMAND ${CMAKE_LINKER} --relocatable --format binary --output ${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingCUDAKernelModules.o $>,PREPEND,${CMAKE_CURRENT_BINARY_DIR}/cuda_kernel_module_fatbin/>,${CMAKE_CURRENT_BINARY_DIR}> + COMMAND ${CMAKE_LINKER} -z noexecstack --relocatable --format binary --output ${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingCUDAKernelModules.o $>,PREPEND,${CMAKE_CURRENT_BINARY_DIR}/cuda_kernel_module_fatbin/>,${CMAKE_CURRENT_BINARY_DIR}> DEPENDS GPUTrackingCUDAKernels $ COMMENT "Compiling fatbin kernels ${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingCUDAKernelModules.o" VERBATIM diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index 727019fa13755..1952c7a0e3567 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -217,7 +217,7 @@ elseif(GPUCA_HIP_COMPILE_MODE STREQUAL "perkernel") add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingHIPKernelModules.o COMMAND cp -u $ ${CMAKE_CURRENT_BINARY_DIR}/hip_kernel_module_fatbin/ - COMMAND ${CMAKE_LINKER} --relocatable --format binary --output ${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingHIPKernelModules.o $>,PREPEND,${CMAKE_CURRENT_BINARY_DIR}/hip_kernel_module_fatbin/>,${CMAKE_CURRENT_BINARY_DIR}> + COMMAND ${CMAKE_LINKER} -z noexecstack --relocatable --format binary --output ${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingHIPKernelModules.o $>,PREPEND,${CMAKE_CURRENT_BINARY_DIR}/hip_kernel_module_fatbin/>,${CMAKE_CURRENT_BINARY_DIR}> DEPENDS GPUTrackingHIPKernels $ COMMENT "Compiling fatbin kernels ${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingHIPKernelModules.o" VERBATIM diff --git a/GPU/GPUTracking/cmake/helpers.cmake b/GPU/GPUTracking/cmake/helpers.cmake index 8d8cf592d8295..f725b870040eb 100644 --- a/GPU/GPUTracking/cmake/helpers.cmake +++ b/GPU/GPUTracking/cmake/helpers.cmake @@ -17,7 +17,7 @@ function(create_binary_resource RESOURCE OUTPUTFILE) FILE(RELATIVE_PATH input-file-rel ${CMAKE_CURRENT_BINARY_DIR} ${input-file-abs}) add_custom_command( OUTPUT ${OUTPUTFILE} - COMMAND ${CMAKE_LINKER} --relocatable --format binary --output ${OUTPUTFILE} ${input-file-rel} + COMMAND ${CMAKE_LINKER} -z noexecstack --relocatable --format binary --output ${OUTPUTFILE} ${input-file-rel} DEPENDS ${input-file-rel} COMMENT "Adding binary resource ${input-file-rel}" VERBATIM