Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,21 @@
#define GPUCA_M_KRNL_NAME(...) GPUCA_M_KRNL_NAME_A(GPUCA_M_STRIP(__VA_ARGS__))

#if defined(GPUCA_GPUCODE) || defined(GPUCA_GPUCODE_HOSTONLY)

#if defined(__HIPCC__) && !defined(GPUCA_GPUCODE_NO_LAUNCH_BOUNDS)
static_assert(GPUCA_PAR_AMD_EUS_PER_CU > 0);
#define GPUCA_MIN_WARPS_PER_EU(maxThreadsPerBlock, minBlocksPerCU) GPUCA_CEIL_INT_DIV((minBlocksPerCU) * (maxThreadsPerBlock), (GPUCA_WARP_SIZE * GPUCA_PAR_AMD_EUS_PER_CU))

#define GPUCA_LB_ARGS_1(maxThreadsPerBlock) maxThreadsPerBlock
#define GPUCA_LB_ARGS_2(maxThreadsPerBlock, minBlocksPerCU) maxThreadsPerBlock, GPUCA_MIN_WARPS_PER_EU(maxThreadsPerBlock, minBlocksPerCU)

#define GPUCA_LAUNCH_BOUNDS_SELECT(n, ...) GPUCA_M_CAT(GPUCA_LB_ARGS_, n)(__VA_ARGS__)
#define GPUCA_LAUNCH_BOUNDS_DISP(...) GPUCA_LAUNCH_BOUNDS_SELECT(GPUCA_M_COUNT(__VA_ARGS__), __VA_ARGS__)
#define GPUCA_KRNL_REG_DEFAULT(args) __launch_bounds__(GPUCA_LAUNCH_BOUNDS_DISP(GPUCA_M_MAX2_3(GPUCA_M_STRIP(args))))
#elif !defined(GPUCA_GPUCODE_NO_LAUNCH_BOUNDS)
#define GPUCA_KRNL_REG_DEFAULT(args) __launch_bounds__(GPUCA_M_MAX2_3(GPUCA_M_STRIP(args)))
#endif

#ifndef GPUCA_KRNL_REG
#define GPUCA_KRNL_REG(...)
#endif
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile)
}
fclose(fp);
}
const std::string launchBounds = o2::gpu::internal::GPUDefParametersExport(*mParDevice, true) +
const std::string launchBounds = o2::gpu::internal::GPUDefParametersExport(*mParDevice, true, mParDevice->par_AMD_EUS_PER_CU ? (mParDevice->par_AMD_EUS_PER_CU * mWarpSize) : 0) +
"#define GPUCA_WARP_SIZE " + std::to_string(mWarpSize) + "\n";
if (GetProcessingSettings().rtctech.printLaunchBounds || GetProcessingSettings().debugLevel >= 3) {
GPUInfo("RTC Launch Bounds:\n%s", launchBounds.c_str());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ inline void GPUReconstructionCUDA::runKernelBackend(const krnlSetupTime& _xyz, c
}

#undef GPUCA_KRNL_REG
#define GPUCA_KRNL_REG(args) __launch_bounds__(GPUCA_M_MAX2_3(GPUCA_M_STRIP(args)))
#define GPUCA_KRNL_REG(...) GPUCA_KRNL_REG_DEFAULT(__VA_ARGS__)

// clang-format off
#if defined(GPUCA_KERNEL_COMPILE_MODE) && GPUCA_KERNEL_COMPILE_MODE != 1 // ---------- COMPILE_MODE = perkernel ----------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
#define GPUCA_GPUCODE_HOSTONLY
#define GPUCA_GPUCODE_NO_LAUNCH_BOUNDS

#define GPUCA_KRNL_REG(args) __launch_bounds__(GPUCA_M_MAX2_3(GPUCA_M_STRIP(args)))
#define GPUCA_KRNL_REG(args) __launch_bounds__(GPUCA_M_STRIP(args))

#include "GPUReconstructionCUDAIncludesSystem.h"
#include "GPUReconstructionCUDADef.h"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

#define GPUCA_GPUCODE_COMPILEKERNELS
#include "GPUReconstructionCUDAIncludesSystem.h"
#define GPUCA_KRNL_REG(args) __launch_bounds__(GPUCA_M_MAX2_3(GPUCA_M_STRIP(args)))
#define GPUCA_KRNL_REG(...) GPUCA_KRNL_REG_DEFAULT(__VA_ARGS__)
#define GPUCA_KRNL(...) GPUCA_KRNLGPU(__VA_ARGS__);
#include "GPUReconstructionCUDADef.h"
#include "GPUReconstructionKernelMacros.h"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

#define GPUCA_GPUCODE_COMPILEKERNELS
#include "GPUReconstructionHIPIncludesSystem.h"
#define GPUCA_KRNL_REG(args) __launch_bounds__(GPUCA_M_MAX2_3(GPUCA_M_STRIP(args)))
#define GPUCA_KRNL_REG(...) GPUCA_KRNL_REG_DEFAULT(__VA_ARGS__)
#define GPUCA_KRNL(...) GPUCA_KRNLGPU(__VA_ARGS__);
#include "GPUReconstructionHIPDef.h"
#include "GPUReconstructionKernelMacros.h"
Expand Down
2 changes: 2 additions & 0 deletions GPU/GPUTracking/Definitions/GPUDefMacros.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,5 +50,7 @@
#define GPUCA_UNROLL(...)
#endif

#define GPUCA_CEIL_INT_DIV(a, b) (((a) + (b) - 1) / (b))

#endif
// clang-format on
5 changes: 5 additions & 0 deletions GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
// GPU-architecture-dependent default settings
#if defined(GPUCA_GPUTYPE_MI2xx)
#define GPUCA_WARP_SIZE 64
#define GPUCA_PAR_AMD_EUS_PER_CU 4
#define GPUCA_THREAD_COUNT_DEFAULT 256
#define GPUCA_LB_GPUTPCCreateTrackingData 256
#define GPUCA_LB_GPUTPCStartHitsSorter 512, 1
Expand Down Expand Up @@ -88,6 +89,7 @@
#define GPUCA_PAR_COMP_GATHER_MODE 3
#elif defined(GPUCA_GPUTYPE_VEGA)
#define GPUCA_WARP_SIZE 64
#define GPUCA_PAR_AMD_EUS_PER_CU 4
#define GPUCA_THREAD_COUNT_DEFAULT 256
#define GPUCA_LB_GPUTPCCreateTrackingData 128
#define GPUCA_LB_GPUTPCStartHitsSorter 1024, 2
Expand Down Expand Up @@ -276,6 +278,9 @@
#ifndef GPUCA_WARP_SIZE
#define GPUCA_WARP_SIZE 32
#endif
#ifndef GPUCA_PAR_AMD_EUS_PER_CU
#define GPUCA_PAR_AMD_EUS_PER_CU 0
#endif
#ifndef GPUCA_THREAD_COUNT_DEFAULT
#define GPUCA_THREAD_COUNT_DEFAULT 256
#endif
Expand Down
24 changes: 12 additions & 12 deletions GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc
Original file line number Diff line number Diff line change
Expand Up @@ -39,23 +39,23 @@ static GPUDefParameters GPUDefParametersLoad()
};
}

#define GPUCA_EXPORT_KERNEL_LB(name) \
if (par.par_LB_maxThreads[i] > 0) { \
o << "#define GPUCA_LB_" GPUCA_M_STR(name) " " << par.par_LB_maxThreads[i]; \
if (par.par_LB_minBlocks[i] > 0) { \
o << ", " << par.par_LB_minBlocks[i]; \
} \
if (!forRTC && par.par_LB_forceBlocks[i] > 0) { \
o << ", " << par.par_LB_forceBlocks[i]; \
} \
o << "\n"; \
} \
#define GPUCA_EXPORT_KERNEL_LB(name) \
if (par.par_LB_maxThreads[i] > 0) { \
o << "#define GPUCA_LB_" GPUCA_M_STR(name) " " << par.par_LB_maxThreads[i]; \
if (par.par_LB_minBlocks[i] > 0) { \
o << ", " << GPUCA_CEIL_INT_DIV(par.par_LB_maxThreads[i] * par.par_LB_minBlocks[i], (minBlockFactor ? minBlockFactor : par.par_LB_maxThreads[i])); \
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add this as a function to GPUCommonMath.h? I'd prefer to have proper functions instead of macros in C++ code.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adding the function to GPUCommonMath.h makes ROOT complain. Basically it cannot expand GPUd() macros contained in GPUCommonMath.h when using

bash -c "echo -e '#define GPUCA_GPUTYPE_${GPU_ARCH}\\n#define PARAMETER_FILE \"GPUDefParametersDefaults.h\"\\ngInterpreter->AddIncludePath(\"${CMAKE_CURRENT_SOURCE_DIR}/../Common\");\\ngInterpreter->AddIncludePath(\"${CMAKE_CURRENT_SOURCE_DIR}/Definitions\");\\ngInterpreter->AddIncludePath(\"${ON_THE_FLY_DIR}\");\\n.x ${CMAKE_CURRENT_SOURCE_DIR}/Standalone/tools/dumpGPUDefParam.C\\n.x ${CMAKE_CURRENT_SOURCE_DIR}/Standalone/tools/dumpGPUDefParam.C(\"${PARAMFILE}\")\\n.q\\n'" | root -l -b > /dev/null

For generating the .par files. Example:

/home/gcimador/alice/O2/GPU/GPUTracking/../Common/GPUCommonMath.h:51:3: error: a type specifier is required for all declarations
  GPUd() static float2 MakeFloat2(float x, float y); // TODO: Find better appraoch that is constexpr
  ^

Do you know if there is a quick fix? I am afraid ROOT does not expand correctly the macros here

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, then just ignore my comment...

} \
if (!forRTC && par.par_LB_forceBlocks[i] > 0) { \
o << ", " << par.par_LB_forceBlocks[i]; \
} \
o << "\n"; \
} \
i++;

#define GPUCA_EXPORT_KERNEL_PARAM(name) \
o << "#define GPUCA_PAR_" GPUCA_M_STR(name) " " << GPUCA_M_CAT(par.par_, name) << "\n";

static std::string GPUDefParametersExport(const GPUDefParameters& par, bool forRTC)
static std::string GPUDefParametersExport(const GPUDefParameters& par, bool forRTC, int32_t minBlockFactor = 0)
{
std::stringstream o; // clang-format off
int32_t i = 0;
Expand Down
3 changes: 2 additions & 1 deletion GPU/GPUTracking/kernels.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,8 @@ o2_gpu_kernel_add_parameter(NEIGHBOURS_FINDER_MAX_NNEIGHUP
COMP_GATHER_KERNEL
COMP_GATHER_MODE
SORT_STARTHITS
CF_SCAN_WORKGROUP_SIZE)
CF_SCAN_WORKGROUP_SIZE
AMD_EUS_PER_CU)

o2_gpu_kernel_add_string_parameter(DEDX_STORAGE_TYPE
MERGER_INTERPOLATION_ERROR_TYPE)