From 4ea35b002b61e9ff4f2002c1c051aca01faed3f7 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 21 Oct 2025 09:45:13 +0200 Subject: [PATCH] Revert "GPU: Replace assertions with error counters in ZS decoding." This reverts commit 92548325d37460f34df364291b4d1f15ebc9215d. --- GPU/GPUTracking/Global/GPUErrorCodes.h | 6 +- .../TPCClusterFinder/GPUTPCCFDecodeZS.cxx | 156 ++++++------------ .../TPCClusterFinder/GPUTPCCFDecodeZS.h | 9 +- GPU/GPUTracking/kernels.cmake | 2 +- 4 files changed, 57 insertions(+), 116 deletions(-) diff --git a/GPU/GPUTracking/Global/GPUErrorCodes.h b/GPU/GPUTracking/Global/GPUErrorCodes.h index a4921f478b107..8fec23be00a09 100644 --- a/GPU/GPUTracking/Global/GPUErrorCodes.h +++ b/GPU/GPUTracking/Global/GPUErrorCodes.h @@ -47,10 +47,6 @@ GPUCA_ERROR_CODE(26, ERROR_TPCZS_INVALID_ROW, SectorRow) GPUCA_ERROR_CODE(27, ERROR_TPCZS_INVALID_NADC, SectorCRU, SamplesInPage, SamplesWritten) // Invalid number of ADC samples in header, existing samples were decoded GPUCA_ERROR_CODE(28, ERROR_TPCZS_INCOMPLETE_HBF, SectorCRU, PacketCount, NextPacketCount) // Part of HBF is missing, decoding incomplete GPUCA_ERROR_CODE(29, ERROR_TPCZS_INVALID_OFFSET, SectorEndpoint, Value, Expected) // Raw page is skipped since it contains invalid payload offset -GPUCA_ERROR_CODE(30, ERROR_TPCZS_INVALID_MAGIC_WORD, Value) // ZS header contains wrong magic word -GPUCA_ERROR_CODE(31, ERROR_TPCZS_PAGE_OVERFLOW, Position, PageEnd) // Ran out of page to decode -GPUCA_ERROR_CODE(32, ERROR_TPCZS_VERSION_MISMATCH, Value, Expected) // ZS decoder received page with wrong version -GPUCA_ERROR_CODE(33, ERROR_TPCZS_UNKNOWN, ErrorCode) // Unkown or invalid error code raised in decoder -GPUCA_ERROR_CODE(33, MAX_GPUCA_ERROR_NUMBER) +GPUCA_ERROR_CODE(29, MAX_GPUCA_ERROR_NUMBER) // #define GPUCA_CHECK_TPCZS_CORRUPTION diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx index a548217e26b64..f7bb64106fe4f 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx @@ -604,107 +604,61 @@ GPUd() uint32_t GPUTPCCFDecodeZSDenseLink::DecodePage(GPUSharedMemory& smem, pro const auto* decHeader = Peek(page, raw::RDHUtils::getMemorySize(*rawDataHeader) - sizeof(TPCZSHDRV2)); ConsumeHeader(page); + assert(decHeader->version >= ZSVersionDenseLinkBased); + assert(decHeader->magicWord == tpc::zerosupp_link_based::CommonHeader::MagicWordLinkZSMetaHeader); + uint16_t nSamplesWritten = 0; const uint16_t nSamplesInPage = decHeader->nADCsamples; const auto* payloadEnd = Peek(pageStart, raw::RDHUtils::getMemorySize(*rawDataHeader) - sizeof(TPCZSHDRV2) - ((decHeader->flags & TPCZSHDRV2::ZSFlags::TriggerWordPresent) ? TPCZSHDRV2::TRIGGER_WORD_SIZE : 0)); const auto* nextPage = Peek(pageStart, TPCZSHDR::TPC_ZS_PAGE_SIZE); - const bool extendsToNextPage = decHeader->flags & TPCZSHDRV2::ZSFlags::payloadExtendsToNextPage; - ConsumeBytes(page, decHeader->firstZSDataOffset - sizeof(o2::header::RAWDataHeader)); - int err = GPUErrors::ERROR_NONE; + for (uint16_t i = 0; i < decHeader->nTimebinHeaders; i++) { - if (decHeader->version < ZSVersionDenseLinkBased) { - err = GPUErrors::ERROR_TPCZS_VERSION_MISMATCH; - } + [[maybe_unused]] ptrdiff_t sizeLeftInPage = payloadEnd - page; + assert(sizeLeftInPage > 0); - if (decHeader->magicWord != zerosupp_link_based::CommonHeader::MagicWordLinkZSMetaHeader) { - err = GPUErrors::ERROR_TPCZS_INVALID_MAGIC_WORD; - } - - for (uint16_t i = 0; i < decHeader->nTimebinHeaders && !err; i++) { - - ptrdiff_t sizeLeftInPage = payloadEnd - page; - if (sizeLeftInPage <= 0) { - err = GPUErrors::ERROR_TPCZS_PAGE_OVERFLOW; - break; - } - - int16_t nSamplesWrittenTB = 0; - uint16_t nSamplesLeftInPage = nSamplesInPage - nSamplesWritten; - - if (i == decHeader->nTimebinHeaders - 1 && extendsToNextPage) { - if (raw::RDHUtils::getMemorySize(*rawDataHeader) != TPCZSHDR::TPC_ZS_PAGE_SIZE) { - err = GPUErrors::ERROR_TPCZS_PAGE_OVERFLOW; - break; - } + uint16_t nSamplesWrittenTB = 0; + if (i == decHeader->nTimebinHeaders - 1 && decHeader->flags & o2::tpc::TPCZSHDRV2::ZSFlags::payloadExtendsToNextPage) { + assert(o2::raw::RDHUtils::getMemorySize(*rawDataHeader) == TPCZSHDR::TPC_ZS_PAGE_SIZE); if ((uint16_t)(raw::RDHUtils::getPageCounter(rawDataHeader) + 1) == raw::RDHUtils::getPageCounter(nextPage)) { - nSamplesWrittenTB = DecodeTB(clusterer, smem, iThread, page, pageDigitOffset, rawDataHeader, firstHBF, decHeader->cruID, nSamplesLeftInPage, payloadEnd, nextPage); + nSamplesWrittenTB = DecodeTB(clusterer, smem, iThread, page, pageDigitOffset, rawDataHeader, firstHBF, decHeader->cruID, payloadEnd, nextPage); } else { - err = GPUErrors::ERROR_TPCZS_INCOMPLETE_HBF; - break; + nSamplesWrittenTB = FillWithInvalid(clusterer, iThread, nThreads, pageDigitOffset, nSamplesInPage - nSamplesWritten); +#ifdef GPUCA_CHECK_TPCZS_CORRUPTION + if (iThread == 0) { + clusterer.raiseError(GPUErrors::ERROR_TPCZS_INCOMPLETE_HBF, clusterer.mISector * 1000 + decHeader->cruID, raw::RDHUtils::getPageCounter(rawDataHeader), raw::RDHUtils::getPageCounter(nextPage)); + } +#endif } } else { - nSamplesWrittenTB = DecodeTB(clusterer, smem, iThread, page, pageDigitOffset, rawDataHeader, firstHBF, decHeader->cruID, nSamplesLeftInPage, payloadEnd, nextPage); - } - - // Abort decoding the page if an error was detected. - if (nSamplesWrittenTB < 0) { - err = -nSamplesWrittenTB; - break; + nSamplesWrittenTB = DecodeTB(clusterer, smem, iThread, page, pageDigitOffset, rawDataHeader, firstHBF, decHeader->cruID, payloadEnd, nextPage); } + assert(nSamplesWritten <= nSamplesInPage); nSamplesWritten += nSamplesWrittenTB; pageDigitOffset += nSamplesWrittenTB; } // for (uint16_t i = 0; i < decHeader->nTimebinHeaders; i++) - if (nSamplesWritten != nSamplesInPage) { - if (nSamplesWritten < nSamplesInPage) { - pageDigitOffset += FillWithInvalid(clusterer, iThread, nThreads, pageDigitOffset, nSamplesInPage - nSamplesWritten); - } - err = !err ? GPUErrors::ERROR_TPCZS_INVALID_NADC : err; // Ensure we don't overwrite any previous error - } - - if (iThread == 0 && err) { - [[maybe_unused]] bool dumpPage = false; - - if (err == GPUErrors::ERROR_TPCZS_VERSION_MISMATCH) { - clusterer.raiseError(err, decHeader->version, ZSVersionDenseLinkBased); - } else if (err == GPUErrors::ERROR_TPCZS_INVALID_MAGIC_WORD) { - clusterer.raiseError(err, decHeader->magicWord); - } else if (err == GPUErrors::ERROR_TPCZS_INCOMPLETE_HBF) { - clusterer.raiseError(err, clusterer.mISector * 1000 + decHeader->cruID, raw::RDHUtils::getPageCounter(rawDataHeader), raw::RDHUtils::getPageCounter(nextPage)); - } else if (err == GPUErrors::ERROR_TPCZS_PAGE_OVERFLOW) { - clusterer.raiseError(err, extendsToNextPage); - dumpPage = true; - } else if (err == GPUErrors::ERROR_TPCZS_INVALID_NADC) { - clusterer.raiseError(err, nSamplesInPage, nSamplesWritten, extendsToNextPage); - dumpPage = true; - } else { - clusterer.raiseError(GPUErrors::ERROR_TPCZS_UNKNOWN, err); - } - #ifdef GPUCA_CHECK_TPCZS_CORRUPTION -#ifndef GPUCA_GPUCODE - if (dumpPage) { - // allocate more space on the stack for fname, so it can be overwritten by hand in a debugger. - const char fname[64] = "dump00.bin"; - FILE* foo = fopen(fname, "w+b"); - fwrite(pageStart, 1, TPCZSHDR::TPC_ZS_PAGE_SIZE, foo); - fclose(foo); - } -#endif -#endif + if (iThread == 0 && nSamplesWritten != nSamplesInPage) { + clusterer.raiseError(GPUErrors::ERROR_TPCZS_INVALID_NADC, clusterer.mISector * 1000 + decHeader->cruID, nSamplesInPage, nSamplesWritten); + /*#ifndef GPUCA_GPUCODE + FILE* foo = fopen("dump.bin", "w+b"); + fwrite(pageSrc, 1, o2::raw::RDHUtils::getMemorySize(*rdHdr), foo); + fclose(foo); + #endif*/ } +#endif return pageDigitOffset; } template -GPUd() int16_t GPUTPCCFDecodeZSDenseLink::DecodeTB( +GPUd() uint16_t GPUTPCCFDecodeZSDenseLink::DecodeTB( processorType& clusterer, [[maybe_unused]] GPUSharedMemory& smem, int32_t iThread, @@ -713,24 +667,23 @@ GPUd() int16_t GPUTPCCFDecodeZSDenseLink::DecodeTB( const header::RAWDataHeader* rawDataHeader, int32_t firstHBF, int32_t cru, - uint16_t nSamplesLeftInPage, - const uint8_t* payloadEnd, - const uint8_t* nextPage) + [[maybe_unused]] const uint8_t* payloadEnd, + [[maybe_unused]] const uint8_t* nextPage) { if constexpr (DecodeInParallel) { - return DecodeTBMultiThread(clusterer, smem, iThread, page, pageDigitOffset, rawDataHeader, firstHBF, cru, nSamplesLeftInPage, payloadEnd, nextPage); + return DecodeTBMultiThread(clusterer, smem, iThread, page, pageDigitOffset, rawDataHeader, firstHBF, cru, payloadEnd, nextPage); } else { - int16_t nSamplesWritten = 0; + uint16_t nSamplesWritten = 0; if (iThread == 0) { - nSamplesWritten = DecodeTBSingleThread(clusterer, page, pageDigitOffset, rawDataHeader, firstHBF, cru, nSamplesLeftInPage, payloadEnd, nextPage); + nSamplesWritten = DecodeTBSingleThread(clusterer, page, pageDigitOffset, rawDataHeader, firstHBF, cru, payloadEnd, nextPage); } return warp_broadcast(nSamplesWritten, 0); } } template -GPUd() int16_t GPUTPCCFDecodeZSDenseLink::DecodeTBMultiThread( +GPUd() uint16_t GPUTPCCFDecodeZSDenseLink::DecodeTBMultiThread( processorType& clusterer, GPUSharedMemory& smem, const int32_t iThread, @@ -739,9 +692,8 @@ GPUd() int16_t GPUTPCCFDecodeZSDenseLink::DecodeTBMultiThread( const header::RAWDataHeader* rawDataHeader, int32_t firstHBF, int32_t cru, - uint16_t nSamplesLeftInPage, - const uint8_t* payloadEnd, - const uint8_t* nextPage) + [[maybe_unused]] const uint8_t* payloadEnd, + [[maybe_unused]] const uint8_t* nextPage) { #define MAYBE_PAGE_OVERFLOW(pagePtr) \ if constexpr (PayloadExtendsToNextPage) { \ @@ -751,9 +703,7 @@ GPUd() int16_t GPUTPCCFDecodeZSDenseLink::DecodeTBMultiThread( ConsumeBytes(pagePtr, sizeof(header::RAWDataHeader) + diff); \ } \ } else { \ - if (pagePtr > payloadEnd) { \ - return -GPUErrors::ERROR_TPCZS_PAGE_OVERFLOW; \ - } \ + assert(pagePtr <= payloadEnd); \ } #define PEEK_OVERFLOW(pagePtr, offset) \ @@ -778,7 +728,7 @@ GPUd() int16_t GPUTPCCFDecodeZSDenseLink::DecodeTBMultiThread( uint16_t linkBC = (tbbHdr & 0xFFF0) >> 4; int32_t timeBin = (linkBC + (uint64_t)(raw::RDHUtils::getHeartBeatOrbit(*rawDataHeader) - firstHBF) * constants::lhc::LHCMaxBunches) / LHCBCPERTIMEBIN; - int16_t nSamplesInTB = 0; + uint16_t nSamplesInTB = 0; // Read timebin link headers for (uint8_t iLink = 0; iLink < nLinksInTimebin; iLink++) { @@ -797,6 +747,7 @@ GPUd() int16_t GPUTPCCFDecodeZSDenseLink::DecodeTBMultiThread( } int32_t nBytesBitmask = CAMath::Popcount(bitmaskL2); + assert(nBytesBitmask <= 10); for (int32_t chan = iThread; chan < CAMath::nextMultipleOf(80); chan += NTHREADS) { int32_t chanL2Idx = chan / 8; @@ -805,6 +756,7 @@ GPUd() int16_t GPUTPCCFDecodeZSDenseLink::DecodeTBMultiThread( int32_t chanByteOffset = nBytesBitmask - 1 - CAMath::Popcount(bitmaskL2 >> (chanL2Idx + 1)); uint8_t myChannelHasData = (chan < 80 && l2 ? TEST_BIT(PEEK_OVERFLOW(page, chanByteOffset), chan % 8) : 0); + assert(myChannelHasData == 0 || myChannelHasData == 1); int32_t nSamplesStep; int32_t threadSampleOffset = CfUtils::warpPredicateScan(myChannelHasData, &nSamplesStep); @@ -827,17 +779,13 @@ GPUd() int16_t GPUTPCCFDecodeZSDenseLink::DecodeTBMultiThread( GPUbarrierWarp(); // Ensure all writes to shared memory are finished, before reading it - if (nSamplesInTB > nSamplesLeftInPage) { - return -GPUErrors::ERROR_TPCZS_INVALID_NADC; - } + const uint8_t* adcData = ConsumeBytes(page, (nSamplesInTB * DECODE_BITS + 7) / 8); + MAYBE_PAGE_OVERFLOW(page); // TODO: We don't need this check? if (not fragment.contains(timeBin)) { return FillWithInvalid(clusterer, iThread, NTHREADS, pageDigitOffset, nSamplesInTB); } - const uint8_t* adcData = ConsumeBytes(page, (nSamplesInTB * DECODE_BITS + 7) / 8); - MAYBE_PAGE_OVERFLOW(page); - // Unpack ADC int32_t iLink = 0; for (uint16_t sample = iThread; sample < nSamplesInTB; sample += NTHREADS) { @@ -873,6 +821,9 @@ GPUd() int16_t GPUTPCCFDecodeZSDenseLink::DecodeTBMultiThread( GPUbarrierWarp(); // Ensure all reads to shared memory are finished, before decoding next header into shmem + assert(PayloadExtendsToNextPage || adcData <= page); + assert(PayloadExtendsToNextPage || page <= payloadEnd); + return nSamplesInTB; #undef TEST_BIT @@ -881,14 +832,13 @@ GPUd() int16_t GPUTPCCFDecodeZSDenseLink::DecodeTBMultiThread( } template -GPUd() int16_t GPUTPCCFDecodeZSDenseLink::DecodeTBSingleThread( +GPUd() uint16_t GPUTPCCFDecodeZSDenseLink::DecodeTBSingleThread( processorType& clusterer, const uint8_t*& page, uint32_t pageDigitOffset, const header::RAWDataHeader* rawDataHeader, int32_t firstHBF, int32_t cru, - uint16_t nSamplesLeftInPage, [[maybe_unused]] const uint8_t* payloadEnd, [[maybe_unused]] const uint8_t* nextPage) { @@ -900,9 +850,7 @@ GPUd() int16_t GPUTPCCFDecodeZSDenseLink::DecodeTBSingleThread( ConsumeBytes(pagePtr, sizeof(header::RAWDataHeader) + diff); \ } \ } else { \ - if (pagePtr > payloadEnd) { \ - return -GPUErrors::ERROR_TPCZS_PAGE_OVERFLOW; \ - } \ + assert(pagePtr <= payloadEnd); \ } using zerosupp_link_based::ChannelPerTBHeader; @@ -950,18 +898,14 @@ GPUd() int16_t GPUTPCCFDecodeZSDenseLink::DecodeTBSingleThread( } // for (uint8_t iLink = 0; iLink < nLinksInTimebin; iLink++) - if (nSamplesInTB > nSamplesLeftInPage) { - return -GPUErrors::ERROR_TPCZS_INVALID_NADC; - } + const uint8_t* adcData = ConsumeBytes(page, (nSamplesInTB * DECODE_BITS + 7) / 8); + MAYBE_PAGE_OVERFLOW(page); if (not fragment.contains(timeBin)) { FillWithInvalid(clusterer, 0, 1, pageDigitOffset, nSamplesInTB); return nSamplesInTB; } - const uint8_t* adcData = ConsumeBytes(page, (nSamplesInTB * DECODE_BITS + 7) / 8); - MAYBE_PAGE_OVERFLOW(page); - // Unpack ADC uint32_t byte = 0, bits = 0; uint16_t rawFECChannel = 0; @@ -993,6 +937,10 @@ GPUd() int16_t GPUTPCCFDecodeZSDenseLink::DecodeTBSingleThread( } // while (bits >= DECODE_BITS) } // while (nSamplesWritten < nAdc) + assert(PayloadExtendsToNextPage || adcData <= page); + assert(PayloadExtendsToNextPage || page <= payloadEnd); + assert(nSamplesWritten == nSamplesInTB); + return nSamplesWritten; #undef MAYBE_PAGE_OVERFLOW diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.h index 4697462a8c504..e476674e030f9 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.h @@ -167,17 +167,14 @@ class GPUTPCCFDecodeZSDenseLink : public GPUTPCCFDecodeZSLinkBase GPUd() static bool ChannelIsActive(const uint8_t* chan, uint16_t chanIndex); - // Decode a single timebin within an 8kb page. - // Returns the number of samples decoded from the page - // or negative value to indicate an error (no samples are written in this case) template - GPUd() static int16_t DecodeTB(processorType& clusterer, GPUSharedMemory& smem, int32_t iThread, const uint8_t*& page, uint32_t pageDigitOffset, const header::RAWDataHeader* rawDataHeader, int32_t firstHBF, int32_t cru, uint16_t nSamplesLeftInPage, const uint8_t* payloadEnd, const uint8_t* nextPage); + GPUd() static uint16_t DecodeTB(processorType& clusterer, GPUSharedMemory& smem, int32_t iThread, const uint8_t*& page, uint32_t pageDigitOffset, const header::RAWDataHeader* rawDataHeader, int32_t firstHBF, int32_t cru, const uint8_t* payloadEnd, const uint8_t* nextPage); template - GPUd() static int16_t DecodeTBSingleThread(processorType& clusterer, const uint8_t*& page, uint32_t pageDigitOffset, const header::RAWDataHeader* rawDataHeader, int32_t firstHBF, int32_t cru, uint16_t nSamplesLeftInPage, const uint8_t* payloadEnd, const uint8_t* nextPage); + GPUd() static uint16_t DecodeTBSingleThread(processorType& clusterer, const uint8_t*& page, uint32_t pageDigitOffset, const header::RAWDataHeader* rawDataHeader, int32_t firstHBF, int32_t cru, const uint8_t* payloadEnd, const uint8_t* nextPage); template - GPUd() static int16_t DecodeTBMultiThread(processorType& clusterer, GPUSharedMemory& smem, const int32_t iThread, const uint8_t*& page, uint32_t pageDigitOffset, const header::RAWDataHeader* rawDataHeader, int32_t firstHBF, int32_t cru, uint16_t nSamplesLeftInPage, const uint8_t* payloadEnd, const uint8_t* nextPage); + GPUd() static uint16_t DecodeTBMultiThread(processorType& clusterer, GPUSharedMemory& smem, const int32_t iThread, const uint8_t*& page, uint32_t pageDigitOffset, const header::RAWDataHeader* rawDataHeader, int32_t firstHBF, int32_t cru, const uint8_t* payloadEnd, const uint8_t* nextPage); }; } // namespace o2::gpu diff --git a/GPU/GPUTracking/kernels.cmake b/GPU/GPUTracking/kernels.cmake index 84726ea9fb8d0..c8ddcd2e9d81d 100644 --- a/GPU/GPUTracking/kernels.cmake +++ b/GPU/GPUTracking/kernels.cmake @@ -120,7 +120,7 @@ o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanDown" "= TPC o2_gpu_add_kernel("GPUTPCCFStreamCompaction, compactDigits" "= TPCCLUSTERFINDER" LB int32_t iBuf int32_t stage CfChargePos* in CfChargePos* out) o2_gpu_add_kernel("GPUTPCCFDecodeZS" "= TPCCLUSTERFINDER" LB int32_t firstHBF) o2_gpu_add_kernel("GPUTPCCFDecodeZSLink" "GPUTPCCFDecodeZS" LB int32_t firstHBF) -o2_gpu_add_kernel("GPUTPCCFDecodeZSDenseLink" "GPUTPCCFDecodeZS ERRORS" LB int32_t firstHBF) +o2_gpu_add_kernel("GPUTPCCFDecodeZSDenseLink" "GPUTPCCFDecodeZS" LB int32_t firstHBF) o2_gpu_add_kernel("GPUTPCCFGather" "=" LB o2::tpc::ClusterNative* dest) o2_gpu_add_kernel("GPUTrackingRefitKernel, mode0asGPU" "= GLOBALREFIT " LB) o2_gpu_add_kernel("GPUTrackingRefitKernel, mode1asTrackParCov" "= GLOBALREFIT " LB)