diff --git a/GPU/GPUbenchmark/cuda/Kernels.cu b/GPU/GPUbenchmark/cuda/Kernels.cu index ccbd734d1dbeb..be622b30474e5 100644 --- a/GPU/GPUbenchmark/cuda/Kernels.cu +++ b/GPU/GPUbenchmark/cuda/Kernels.cu @@ -301,102 +301,102 @@ void printDeviceProp(int32_t deviceId) cudaDeviceProp props; GPUCHECK(cudaGetDeviceProperties(&props, deviceId)); - int32_t clockRateKHz = 0; - int32_t memoryClockRateKHz = 0; - int32_t computeMode = 0; + int32_t clockRateKHz = 0; + int32_t memoryClockRateKHz = 0; + int32_t computeMode = 0; #if (CUDART_VERSION >= 13000) - GPUCHECK(cudaDeviceGetAttribute(&clockRateKHz, cudaDevAttrClockRate, deviceId)); - GPUCHECK(cudaDeviceGetAttribute(&memoryClockRateKHz, cudaDevAttrMemoryClockRate, deviceId)); - GPUCHECK(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, deviceId)); + GPUCHECK(cudaDeviceGetAttribute(&clockRateKHz, cudaDevAttrClockRate, deviceId)); + GPUCHECK(cudaDeviceGetAttribute(&memoryClockRateKHz, cudaDevAttrMemoryClockRate, deviceId)); + GPUCHECK(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, deviceId)); #else - clockRateKHz = props.clockRate; - memoryClockRateKHz = props.memoryClockRate; - computeMode = props.computeMode; - cooperativeMultiDevice = props.cooperativeMultiDeviceLaunch; + clockRateKHz = props.clockRate; + memoryClockRateKHz = props.memoryClockRate; + computeMode = props.computeMode; + cooperativeMultiDevice = props.cooperativeMultiDeviceLaunch; #endif - std::cout << std::setw(w1) << "Name: " << props.name << std::endl; - std::cout << std::setw(w1) << "pciBusID: " << props.pciBusID << std::endl; - std::cout << std::setw(w1) << "pciDeviceID: " << props.pciDeviceID << std::endl; - std::cout << std::setw(w1) << "pciDomainID: " << props.pciDomainID << std::endl; - std::cout << std::setw(w1) << "multiProcessorCount: " << props.multiProcessorCount << std::endl; - std::cout << std::setw(w1) << "maxThreadsPerMultiProcessor: " << props.maxThreadsPerMultiProcessor - << std::endl; - std::cout << std::setw(w1) << "isMultiGpuBoard: " << props.isMultiGpuBoard << std::endl; - - // Use the variables we populated above for the moved properties - std::cout << std::setw(w1) << "clockRate: " << (float)clockRateKHz / 1000.0 << " Mhz" << std::endl; - std::cout << std::setw(w1) << "memoryClockRate: " << (float)memoryClockRateKHz / 1000.0 << " Mhz" - << std::endl; - - std::cout << std::setw(w1) << "memoryBusWidth: " << props.memoryBusWidth << std::endl; - - // clockInstructionRate is just another name for clockRate in this context - std::cout << std::setw(w1) << "clockInstructionRate: " << (float)clockRateKHz / 1000.0 - << " Mhz" << std::endl; - std::cout << std::setw(w1) << "totalGlobalMem: " << std::fixed << std::setprecision(2) - << bytesToGB(props.totalGlobalMem) << " GB" << std::endl; + std::cout << std::setw(w1) << "Name: " << props.name << std::endl; + std::cout << std::setw(w1) << "pciBusID: " << props.pciBusID << std::endl; + std::cout << std::setw(w1) << "pciDeviceID: " << props.pciDeviceID << std::endl; + std::cout << std::setw(w1) << "pciDomainID: " << props.pciDomainID << std::endl; + std::cout << std::setw(w1) << "multiProcessorCount: " << props.multiProcessorCount << std::endl; + std::cout << std::setw(w1) << "maxThreadsPerMultiProcessor: " << props.maxThreadsPerMultiProcessor + << std::endl; + std::cout << std::setw(w1) << "isMultiGpuBoard: " << props.isMultiGpuBoard << std::endl; + + // Use the variables we populated above for the moved properties + std::cout << std::setw(w1) << "clockRate: " << (float)clockRateKHz / 1000.0 << " Mhz" << std::endl; + std::cout << std::setw(w1) << "memoryClockRate: " << (float)memoryClockRateKHz / 1000.0 << " Mhz" + << std::endl; + + std::cout << std::setw(w1) << "memoryBusWidth: " << props.memoryBusWidth << std::endl; + + // clockInstructionRate is just another name for clockRate in this context + std::cout << std::setw(w1) << "clockInstructionRate: " << (float)clockRateKHz / 1000.0 + << " Mhz" << std::endl; + std::cout << std::setw(w1) << "totalGlobalMem: " << std::fixed << std::setprecision(2) + << bytesToGB(props.totalGlobalMem) << " GB" << std::endl; #if !defined(__CUDACC__) - std::cout << std::setw(w1) << "maxSharedMemoryPerMultiProcessor: " << std::fixed << std::setprecision(2) - << bytesToconfig(props.sharedMemPerMultiprocessor) << " config" << std::endl; + std::cout << std::setw(w1) << "maxSharedMemoryPerMultiProcessor: " << std::fixed << std::setprecision(2) + << bytesToconfig(props.sharedMemPerMultiprocessor) << " config" << std::endl; #endif #if defined(__HIPCC__) - std::cout << std::setw(w1) << "maxSharedMemoryPerMultiProcessor: " << std::fixed << std::setprecision(2) - << bytesToconfig(props.maxSharedMemoryPerMultiProcessor) << " config" << std::endl; + std::cout << std::setw(w1) << "maxSharedMemoryPerMultiProcessor: " << std::fixed << std::setprecision(2) + << bytesToconfig(props.maxSharedMemoryPerMultiProcessor) << " config" << std::endl; #endif - std::cout << std::setw(w1) << "totalConstMem: " << props.totalConstMem << std::endl; - std::cout << std::setw(w1) << "sharedMemPerBlock: " << (float)props.sharedMemPerBlock / 1024.0 << " config" - << std::endl; - std::cout << std::setw(w1) << "canMapHostMemory: " << props.canMapHostMemory << std::endl; - std::cout << std::setw(w1) << "regsPerBlock: " << props.regsPerBlock << std::endl; - std::cout << std::setw(w1) << "warpSize: " << props.warpSize << std::endl; - std::cout << std::setw(w1) << "l2CacheSize: " << props.l2CacheSize << std::endl; - - // Use the variable for computeMode - std::cout << std::setw(w1) << "computeMode: " << computeMode << std::endl; - - std::cout << std::setw(w1) << "maxThreadsPerBlock: " << props.maxThreadsPerBlock << std::endl; - std::cout << std::setw(w1) << "maxThreadsDim.x: " << props.maxThreadsDim[0] << std::endl; - std::cout << std::setw(w1) << "maxThreadsDim.y: " << props.maxThreadsDim[1] << std::endl; - std::cout << std::setw(w1) << "maxThreadsDim.z: " << props.maxThreadsDim[2] << std::endl; - std::cout << std::setw(w1) << "maxGridSize.x: " << props.maxGridSize[0] << std::endl; - std::cout << std::setw(w1) << "maxGridSize.y: " << props.maxGridSize[1] << std::endl; - std::cout << std::setw(w1) << "maxGridSize.z: " << props.maxGridSize[2] << std::endl; - std::cout << std::setw(w1) << "major: " << props.major << std::endl; - std::cout << std::setw(w1) << "minor: " << props.minor << std::endl; - std::cout << std::setw(w1) << "concurrentKernels: " << props.concurrentKernels << std::endl; + std::cout << std::setw(w1) << "totalConstMem: " << props.totalConstMem << std::endl; + std::cout << std::setw(w1) << "sharedMemPerBlock: " << (float)props.sharedMemPerBlock / 1024.0 << " config" + << std::endl; + std::cout << std::setw(w1) << "canMapHostMemory: " << props.canMapHostMemory << std::endl; + std::cout << std::setw(w1) << "regsPerBlock: " << props.regsPerBlock << std::endl; + std::cout << std::setw(w1) << "warpSize: " << props.warpSize << std::endl; + std::cout << std::setw(w1) << "l2CacheSize: " << props.l2CacheSize << std::endl; + + // Use the variable for computeMode + std::cout << std::setw(w1) << "computeMode: " << computeMode << std::endl; + + std::cout << std::setw(w1) << "maxThreadsPerBlock: " << props.maxThreadsPerBlock << std::endl; + std::cout << std::setw(w1) << "maxThreadsDim.x: " << props.maxThreadsDim[0] << std::endl; + std::cout << std::setw(w1) << "maxThreadsDim.y: " << props.maxThreadsDim[1] << std::endl; + std::cout << std::setw(w1) << "maxThreadsDim.z: " << props.maxThreadsDim[2] << std::endl; + std::cout << std::setw(w1) << "maxGridSize.x: " << props.maxGridSize[0] << std::endl; + std::cout << std::setw(w1) << "maxGridSize.y: " << props.maxGridSize[1] << std::endl; + std::cout << std::setw(w1) << "maxGridSize.z: " << props.maxGridSize[2] << std::endl; + std::cout << std::setw(w1) << "major: " << props.major << std::endl; + std::cout << std::setw(w1) << "minor: " << props.minor << std::endl; + std::cout << std::setw(w1) << "concurrentKernels: " << props.concurrentKernels << std::endl; #if defined(__HIPCC__) - std::cout << std::setw(w1) << "arch.hasGlobalInt32Atomics: " << props.arch.hasGlobalInt32Atomics << std::endl; - std::cout << std::setw(w1) << "arch.hasGlobalFloatAtomicExch: " << props.arch.hasGlobalFloatAtomicExch - << std::endl; - std::cout << std::setw(w1) << "arch.hasSharedInt32Atomics: " << props.arch.hasSharedInt32Atomics << std::endl; - std::cout << std::setw(w1) << "arch.hasSharedFloatAtomicExch: " << props.arch.hasSharedFloatAtomicExch - << std::endl; - std::cout << std::setw(w1) << "arch.hasFloatAtomicAdd: " << props.arch.hasFloatAtomicAdd << std::endl; - std::cout << std::setw(w1) << "arch.hasGlobalInt64Atomics: " << props.arch.hasGlobalInt64Atomics << std::endl; - std::cout << std::setw(w1) << "arch.hasSharedInt64Atomics: " << props.arch.hasSharedInt64Atomics << std::endl; - std::cout << std::setw(w1) << "arch.hasDoubles: " << props.arch.hasDoubles << std::endl; - std::cout << std::setw(w1) << "arch.hasWarpVote: " << props.arch.hasWarpVote << std::endl; - std::cout << std::setw(w1) << "arch.hasWarpBallot: " << props.arch.hasWarpBallot << std::endl; - std::cout << std::setw(w1) << "arch.hasWarpShuffle: " << props.arch.hasWarpShuffle << std::endl; - std::cout << std::setw(w1) << "arch.hasFunnelShift: " << props.arch.hasFunnelShift << std::endl; - std::cout << std::setw(w1) << "arch.hasThreadFenceSystem: " << props.arch.hasThreadFenceSystem << std::endl; - std::cout << std::setw(w1) << "arch.hasSyncThreadsExt: " << props.arch.hasSyncThreadsExt << std::endl; - std::cout << std::setw(w1) << "arch.hasSurfaceFuncs: " << props.arch.hasSurfaceFuncs << std::endl; - std::cout << std::setw(w1) << "arch.has3dGrid: " << props.arch.has3dGrid << std::endl; - std::cout << std::setw(w1) << "arch.hasDynamicParallelism: " << props.arch.hasDynamicParallelism << std::endl; - std::cout << std::setw(w1) << "gcnArchName: " << props.gcnArchName << std::endl; + std::cout << std::setw(w1) << "arch.hasGlobalInt32Atomics: " << props.arch.hasGlobalInt32Atomics << std::endl; + std::cout << std::setw(w1) << "arch.hasGlobalFloatAtomicExch: " << props.arch.hasGlobalFloatAtomicExch + << std::endl; + std::cout << std::setw(w1) << "arch.hasSharedInt32Atomics: " << props.arch.hasSharedInt32Atomics << std::endl; + std::cout << std::setw(w1) << "arch.hasSharedFloatAtomicExch: " << props.arch.hasSharedFloatAtomicExch + << std::endl; + std::cout << std::setw(w1) << "arch.hasFloatAtomicAdd: " << props.arch.hasFloatAtomicAdd << std::endl; + std::cout << std::setw(w1) << "arch.hasGlobalInt64Atomics: " << props.arch.hasGlobalInt64Atomics << std::endl; + std::cout << std::setw(w1) << "arch.hasSharedInt64Atomics: " << props.arch.hasSharedInt64Atomics << std::endl; + std::cout << std::setw(w1) << "arch.hasDoubles: " << props.arch.hasDoubles << std::endl; + std::cout << std::setw(w1) << "arch.hasWarpVote: " << props.arch.hasWarpVote << std::endl; + std::cout << std::setw(w1) << "arch.hasWarpBallot: " << props.arch.hasWarpBallot << std::endl; + std::cout << std::setw(w1) << "arch.hasWarpShuffle: " << props.arch.hasWarpShuffle << std::endl; + std::cout << std::setw(w1) << "arch.hasFunnelShift: " << props.arch.hasFunnelShift << std::endl; + std::cout << std::setw(w1) << "arch.hasThreadFenceSystem: " << props.arch.hasThreadFenceSystem << std::endl; + std::cout << std::setw(w1) << "arch.hasSyncThreadsExt: " << props.arch.hasSyncThreadsExt << std::endl; + std::cout << std::setw(w1) << "arch.hasSurfaceFuncs: " << props.arch.hasSurfaceFuncs << std::endl; + std::cout << std::setw(w1) << "arch.has3dGrid: " << props.arch.has3dGrid << std::endl; + std::cout << std::setw(w1) << "arch.hasDynamicParallelism: " << props.arch.hasDynamicParallelism << std::endl; + std::cout << std::setw(w1) << "gcnArchName: " << props.gcnArchName << std::endl; #endif - std::cout << std::setw(w1) << "isIntegrated: " << props.integrated << std::endl; - std::cout << std::setw(w1) << "maxTexture1D: " << props.maxTexture1D << std::endl; - std::cout << std::setw(w1) << "maxTexture2D.width: " << props.maxTexture2D[0] << std::endl; - std::cout << std::setw(w1) << "maxTexture2D.height: " << props.maxTexture2D[1] << std::endl; - std::cout << std::setw(w1) << "maxTexture3D.width: " << props.maxTexture3D[0] << std::endl; - std::cout << std::setw(w1) << "maxTexture3D.height: " << props.maxTexture3D[1] << std::endl; - std::cout << std::setw(w1) << "maxTexture3D.depth: " << props.maxTexture3D[2] << std::endl; + std::cout << std::setw(w1) << "isIntegrated: " << props.integrated << std::endl; + std::cout << std::setw(w1) << "maxTexture1D: " << props.maxTexture1D << std::endl; + std::cout << std::setw(w1) << "maxTexture2D.width: " << props.maxTexture2D[0] << std::endl; + std::cout << std::setw(w1) << "maxTexture2D.height: " << props.maxTexture2D[1] << std::endl; + std::cout << std::setw(w1) << "maxTexture3D.width: " << props.maxTexture3D[0] << std::endl; + std::cout << std::setw(w1) << "maxTexture3D.height: " << props.maxTexture3D[1] << std::endl; + std::cout << std::setw(w1) << "maxTexture3D.depth: " << props.maxTexture3D[2] << std::endl; #if defined(__HIPCC__) - std::cout << std::setw(w1) << "isLargeBar: " << props.isLargeBar << std::endl; - std::cout << std::setw(w1) << "asicRevision: " << props.asicRevision << std::endl; + std::cout << std::setw(w1) << "isLargeBar: " << props.isLargeBar << std::endl; + std::cout << std::setw(w1) << "asicRevision: " << props.asicRevision << std::endl; #endif int32_t deviceCnt;