diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index 2bd4c0e937c20..b715b08f52b32 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -811,11 +811,9 @@ void GPUReconstruction::PopNonPersistentMemory(RecoStep step, uint64_t tag) GPUFatal("Tag mismatch when popping non persistent memory from stack : pop %s vs on stack %s", qTag2Str(tag).c_str(), qTag2Str(std::get<3>(mNonPersistentMemoryStack.back())).c_str()); } if ((mProcessingSettings.debugLevel >= 3 || mProcessingSettings.allocDebugLevel) && (IsGPU() || mProcessingSettings.forceHostMemoryPoolSize)) { - if (IsGPU()) { - printf("Allocated Device memory after %30s (%8s): %'13zd (non temporary %'13zd, blocked %'13zd)\n", GPUDataTypes::RECO_STEP_NAMES[getRecoStepNum(step, true)], qTag2Str(std::get<3>(mNonPersistentMemoryStack.back())).c_str(), ptrDiff(mDeviceMemoryPool, mDeviceMemoryBase) + ptrDiff((char*)mDeviceMemoryBase + mDeviceMemorySize, mDeviceMemoryPoolEnd), ptrDiff(mDeviceMemoryPool, mDeviceMemoryBase), mDeviceMemoryPoolBlocked ? ptrDiff((char*)mDeviceMemoryBase + mDeviceMemorySize, mDeviceMemoryPoolBlocked) : 0); - } - printf("Allocated Host memory after %30s (%8s): %'13zd (non temporary %'13zd, blocked %'13zd)\n", GPUDataTypes::RECO_STEP_NAMES[getRecoStepNum(step, true)], qTag2Str(std::get<3>(mNonPersistentMemoryStack.back())).c_str(), ptrDiff(mHostMemoryPool, mHostMemoryBase) + ptrDiff((char*)mHostMemoryBase + mHostMemorySize, mHostMemoryPoolEnd), ptrDiff(mHostMemoryPool, mHostMemoryBase), mHostMemoryPoolBlocked ? ptrDiff((char*)mHostMemoryBase + mHostMemorySize, mHostMemoryPoolBlocked) : 0); - printf("%16s", ""); + printf("Allocated memory after %30s (%8s) (Stack %zu): ", GPUDataTypes::RECO_STEP_NAMES[getRecoStepNum(step, true)], qTag2Str(std::get<3>(mNonPersistentMemoryStack.back())).c_str(), mNonPersistentMemoryStack.size()); + PrintMemoryOverview(); + printf("%76s", ""); PrintMemoryMax(); } mHostMemoryPoolEnd = std::get<0>(mNonPersistentMemoryStack.back()); @@ -888,9 +886,10 @@ void GPUReconstruction::PrintMemoryMax() void GPUReconstruction::PrintMemoryOverview() { if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) { - printf("Memory Allocation: Host %'zd / %'zu (Permanent %'zd), Device %'zd / %'zu, (Permanent %'zd) %zu chunks\n", - ptrDiff(mHostMemoryPool, mHostMemoryBase) + ptrDiff((char*)mHostMemoryBase + mHostMemorySize, mHostMemoryPoolEnd), mHostMemorySize, ptrDiff(mHostMemoryPermanent, mHostMemoryBase), - ptrDiff(mDeviceMemoryPool, mDeviceMemoryBase) + ptrDiff((char*)mDeviceMemoryBase + mDeviceMemorySize, mDeviceMemoryPoolEnd), mDeviceMemorySize, ptrDiff(mDeviceMemoryPermanent, mDeviceMemoryBase), mMemoryResources.size()); + printf("Memory Allocation: Host %'13zd / %'13zu (Permanent %'13zd, Data %'13zd, Scratch %'13zd), Device %'13zd / %'13zu, (Permanent %'13zd, Data %'13zd, Scratch %'13zd) %zu chunks\n", + ptrDiff(mHostMemoryPool, mHostMemoryBase) + ptrDiff((char*)mHostMemoryBase + mHostMemorySize, mHostMemoryPoolEnd), mHostMemorySize, ptrDiff(mHostMemoryPermanent, mHostMemoryBase), ptrDiff(mHostMemoryPool, mHostMemoryPermanent), ptrDiff((char*)mHostMemoryBase + mHostMemorySize, mHostMemoryPoolEnd), + ptrDiff(mDeviceMemoryPool, mDeviceMemoryBase) + ptrDiff((char*)mDeviceMemoryBase + mDeviceMemorySize, mDeviceMemoryPoolEnd), mDeviceMemorySize, ptrDiff(mDeviceMemoryPermanent, mDeviceMemoryBase), ptrDiff(mDeviceMemoryPool, mDeviceMemoryPermanent), ptrDiff((char*)mDeviceMemoryBase + mDeviceMemorySize, mDeviceMemoryPoolEnd), + mMemoryResources.size()); } } diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx index f397fc51bd407..a4074282da30f 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx @@ -215,6 +215,10 @@ int32_t GPUReconstructionCPU::RunChains() mStatNEvents++; mNEventsProcessed++; + if (mProcessingSettings.debugLevel >= 3 || mProcessingSettings.allocDebugLevel) { + printf("Allocated memory when starting processing %34s", ""); + PrintMemoryOverview(); + } mTimerTotal.Start(); const std::clock_t cpuTimerStart = std::clock(); if (mProcessingSettings.doublePipeline) { @@ -232,9 +236,16 @@ int32_t GPUReconstructionCPU::RunChains() return retVal; } } + if (GetProcessingSettings().tpcFreeAllocatedMemoryAfterProcessing) { + ClearAllocatedMemory(); + } } mTimerTotal.Stop(); mStatCPUTime += (double)(std::clock() - cpuTimerStart) / CLOCKS_PER_SEC; + if (mProcessingSettings.debugLevel >= 3 || mProcessingSettings.allocDebugLevel) { + printf("Allocated memory when ending processing %36s", ""); + PrintMemoryOverview(); + } mStatWallTime = (mTimerTotal.GetElapsedTime() * 1000000. / mStatNEvents); std::string nEventReport; diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index b7f761c73ffc0..f7004d76c726c 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -326,6 +326,7 @@ AddOption(oclPlatformNum, int32_t, -1, "", 0, "Platform to use, in case the back AddOption(oclCompileFromSources, bool, false, "", 0, "Compile OpenCL binary from included source code instead of using included spirv code") AddOption(oclOverrideSourceBuildFlags, std::string, "", "", 0, "Override OCL build flags for compilation from source, put a space for empty options") AddOption(printSettings, bool, false, "", 0, "Print all settings when initializing") +AddOption(tpcFreeAllocatedMemoryAfterProcessing, bool, false, "", 0, "Clean all memory allocated by TPC when TPC processing done, only data written to external output resources will remain") AddVariable(eventDisplay, o2::gpu::GPUDisplayFrontendInterface*, nullptr) AddSubConfig(GPUSettingsProcessingRTC, rtc) AddSubConfig(GPUSettingsProcessingParam, param)