From efc8205c1f86fc1d9f07fb99e444816973029318 Mon Sep 17 00:00:00 2001
From: David Rohr <drohr@jwdt.org>
Date: Mon, 1 Sep 2025 15:52:24 +0200
Subject: [PATCH] TPCFastTransform: fix compilation on GPU with the new splines

---
 GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx     |  1 -
 GPU/TPCFastTransformation/Spline1DSpec.h      |  6 +-
 GPU/TPCFastTransformation/Spline2DSpec.h      | 12 +++-
 .../TPCFastSpaceChargeCorrection.h            | 70 +++++++++----------
 GPU/TPCFastTransformation/TPCFastTransform.h  | 60 ++++++++--------
 5 files changed, 80 insertions(+), 69 deletions(-)
diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx
index 94c0a831edc95..6f27b1755c1ee 100644
--- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx
+++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx
@@ -1917,7 +1917,6 @@ GPUd() void GPUTPCGMMerger::MergeLoopersInit(int32_t nBlocks, int32_t nThreads,
     const auto& p = trk.GetParam();
     const float qptabs = CAMath::Abs(p.GetQPt());
     if (trk.NClusters() && qptabs * Param().qptB5Scaler > 5.f && qptabs * Param().qptB5Scaler <= lowPtThresh) {
-      const int32_t sector = mClusters[trk.FirstClusterRef() + trk.NClusters() - 1].sector;
       const float refz = p.GetZ() + (Param().par.earlyTpcTransform ? p.GetTZOffset() : GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(p.GetTZOffset(), Param().continuousMaxTimeBin)) + (trk.CSide() ? -100 : 100);
       float sinA, cosA;
       CAMath::SinCos(trk.GetAlpha(), sinA, cosA);
diff --git a/GPU/TPCFastTransformation/Spline1DSpec.h b/GPU/TPCFastTransformation/Spline1DSpec.h
index 28be5dd20e235..d72de5a446718 100644
--- a/GPU/TPCFastTransformation/Spline1DSpec.h
+++ b/GPU/TPCFastTransformation/Spline1DSpec.h
@@ -314,7 +314,11 @@ class Spline1DSpec<DataT, YdimT, 0> : public Spline1DContainer<DataT>
     const auto nYdimTmp = SplineUtil::getNdim<YdimT>(inpYdim);
     const auto nYdim = nYdimTmp.get();
 
-    auto [dSdSl, dSdDl, dSdSr, dSdDr] = getSderivativesOverParsAtU<T>(knotL, u);
+    auto val = getSderivativesOverParsAtU<T>(knotL, u);
+    const auto& dSdSl = val[0];
+    const auto& dSdDl = val[1];
+    const auto& dSdSr = val[2];
+    const auto& dSdDr = val[3];
     for (int32_t dim = 0; dim < nYdim; ++dim) {
       S[dim] = dSdSr * Sr[dim] + dSdSl * Sl[dim] + dSdDl * Dl[dim] + dSdDr * Dr[dim];
     }
diff --git a/GPU/TPCFastTransformation/Spline2DSpec.h b/GPU/TPCFastTransformation/Spline2DSpec.h
index 987ce1ad5d256..7c34b0890ce50 100644
--- a/GPU/TPCFastTransformation/Spline2DSpec.h
+++ b/GPU/TPCFastTransformation/Spline2DSpec.h
@@ -334,8 +334,16 @@ class Spline2DSpec<DataT, YdimT, 0>
     const DataT* A = Parameters + (nu * iv + iu) * nYdim4; // values { {Y1,Y2,Y3}, {Y1,Y2,Y3}'v, {Y1,Y2,Y3}'u, {Y1,Y2,Y3}''vu } at {u0, v0}
     const DataT* B = A + nYdim4 * nu;                      // values { ... } at {u0, v1}
 
-    auto [dSl, dDl, dSr, dDr] = mGridX1.template getSderivativesOverParsAtU<DataT>(knotU, u);
-    auto [dSd, dDd, dSu, dDu] = mGridX2.template getSderivativesOverParsAtU<DataT>(knotV, v);
+    auto val1 = mGridX1.template getSderivativesOverParsAtU<DataT>(knotU, u);
+    auto val2 = mGridX2.template getSderivativesOverParsAtU<DataT>(knotV, v);
+    const auto& dSl = val1[0];
+    const auto& dDl = val1[1];
+    const auto& dSr = val1[2];
+    const auto& dDr = val1[3];
+    const auto& dSd = val2[0];
+    const auto& dDd = val2[1];
+    const auto& dSu = val2[2];
+    const auto& dDu = val2[3];
 
     // when nYdim == 1:
     // S = dSl * (dSd * A[0] + dDd * A[1]) + dDl * (dSd * A[2] + dDd * A[3]) +
diff --git a/GPU/TPCFastTransformation/TPCFastSpaceChargeCorrection.h b/GPU/TPCFastTransformation/TPCFastSpaceChargeCorrection.h
index 4ca5b74025743..ffbc8691ea268 100644
--- a/GPU/TPCFastTransformation/TPCFastSpaceChargeCorrection.h
+++ b/GPU/TPCFastTransformation/TPCFastSpaceChargeCorrection.h
@@ -56,15 +56,15 @@ class TPCFastSpaceChargeCorrection : public FlatObject
     float splineScalingWithZ{0.f}; ///< spline scaling factor in the Z region between the zOut and the readout plane
 
    public:
-    void set(float y0, float yScale, float z0, float zScale, float zOut, float zReadout)
+    void set(float y0_, float yScale_, float z0_, float zScale_, float zOut_, float zReadout_)
     {
-      this->y0 = y0;
-      this->yScale = yScale;
-      this->z0 = z0;
-      this->zScale = zScale;
-      this->zOut = zOut;
+      this->y0 = y0_;
+      this->yScale = yScale_;
+      this->z0 = z0_;
+      this->zScale = zScale_;
+      this->zOut = zOut_;
       // no scaling when the distance to the readout is too small
-      this->splineScalingWithZ = fabs(zReadout - zOut) > 1. ? 1. / (zReadout - zOut) : 0.;
+      this->splineScalingWithZ = fabs(zReadout_ - zOut_) > 1. ? 1. / (zReadout_ - zOut_) : 0.;
     }
 
     float getY0() const { return y0; }
@@ -72,13 +72,13 @@ class TPCFastSpaceChargeCorrection : public FlatObject
     float getZ0() const { return z0; }
     float getZscale() const { return zScale; }
 
-    float getSpineScaleForZ(float z) const
+    GPUd() float getSpineScaleForZ(float z) const
     {
       return 1.f - GPUCommonMath::Clamp((z - zOut) * splineScalingWithZ, 0.f, 1.f);
     }
 
     /// convert local y, z to internal grid coordinates u,v, and spline scale
-    std::array<float, 3> convLocalToGridUntruncated(float y, float z) const
+    GPUd() std::array<float, 3> convLocalToGridUntruncated(float y, float z) const
     {
       return {(y - y0) * yScale, (z - z0) * zScale, getSpineScaleForZ(z)};
     }
@@ -458,21 +458,21 @@ GPUdi() std::array<float, 3> TPCFastSpaceChargeCorrection::convLocalToGrid(int32
   /// convert local y, z to internal grid coordinates u,v
   /// return values: u, v, scaling factor
   const SplineType& spline = getSpline(sector, row);
-  auto [gridU, gridV, scale] = getSectorRowInfo(sector, row).gridMeasured.convLocalToGridUntruncated(y, z);
+  auto val = getSectorRowInfo(sector, row).gridMeasured.convLocalToGridUntruncated(y, z);
   // shrink to the grid
-  gridU = GPUCommonMath::Clamp(gridU, 0.f, (float)spline.getGridX1().getUmax());
-  gridV = GPUCommonMath::Clamp(gridV, 0.f, (float)spline.getGridX2().getUmax());
-  return {gridU, gridV, scale};
+  val[0] = GPUCommonMath::Clamp(val[0], 0.f, (float)spline.getGridX1().getUmax());
+  val[1] = GPUCommonMath::Clamp(val[1], 0.f, (float)spline.getGridX2().getUmax());
+  return val;
 }
 
 GPUdi() bool TPCFastSpaceChargeCorrection::isLocalInsideGrid(int32_t sector, int32_t row, float y, float z) const
 {
   /// check if local y, z are inside the grid
-  auto [gridU, gridV, scale] = getSectorRowInfo(sector, row).gridMeasured.convLocalToGridUntruncated(y, z);
+  auto val = getSectorRowInfo(sector, row).gridMeasured.convLocalToGridUntruncated(y, z);
   const auto& spline = getSpline(sector, row);
   // shrink to the grid
-  if (gridU < 0.f || gridU > (float)spline.getGridX1().getUmax() || //
-      gridV < 0.f || gridV > (float)spline.getGridX2().getUmax()) {
+  if (val[0] < 0.f || val[0] > (float)spline.getGridX1().getUmax() || //
+      val[1] < 0.f || val[1] > (float)spline.getGridX2().getUmax()) {
     return false;
   }
   return true;
@@ -481,11 +481,11 @@ GPUdi() bool TPCFastSpaceChargeCorrection::isLocalInsideGrid(int32_t sector, int
 GPUdi() bool TPCFastSpaceChargeCorrection::isRealLocalInsideGrid(int32_t sector, int32_t row, float y, float z) const
 {
   /// check if local y, z are inside the grid
-  auto [gridU, gridV, scale] = getSectorRowInfo(sector, row).gridReal.convLocalToGridUntruncated(y, z);
+  auto val = getSectorRowInfo(sector, row).gridReal.convLocalToGridUntruncated(y, z);
   const auto& spline = getSpline(sector, row);
   // shrink to the grid
-  if (gridU < 0.f || gridU > (float)spline.getGridX1().getUmax() || //
-      gridV < 0.f || gridV > (float)spline.getGridX2().getUmax()) {
+  if (val[0] < 0.f || val[0] > (float)spline.getGridX1().getUmax() || //
+      val[1] < 0.f || val[1] > (float)spline.getGridX2().getUmax()) {
     return false;
   }
   return true;
@@ -501,11 +501,11 @@ GPUdi() std::array<float, 3> TPCFastSpaceChargeCorrection::convRealLocalToGrid(i
 {
   /// convert real y, z to the internal grid coordinates + scale
   const SplineType& spline = getSpline(sector, row);
-  auto [gridU, gridV, scale] = getSectorRowInfo(sector, row).gridReal.convLocalToGridUntruncated(y, z);
+  auto val = getSectorRowInfo(sector, row).gridReal.convLocalToGridUntruncated(y, z);
   // shrink to the grid
-  gridU = GPUCommonMath::Clamp(gridU, 0.f, (float)spline.getGridX1().getUmax());
-  gridV = GPUCommonMath::Clamp(gridV, 0.f, (float)spline.getGridX2().getUmax());
-  return {gridU, gridV, scale};
+  val[0] = GPUCommonMath::Clamp(val[0], 0.f, (float)spline.getGridX1().getUmax());
+  val[1] = GPUCommonMath::Clamp(val[1], 0.f, (float)spline.getGridX2().getUmax());
+  return val;
 }
 
 GPUdi() std::array<float, 2> TPCFastSpaceChargeCorrection::convGridToRealLocal(int32_t sector, int32_t row, float gridU, float gridV) const
@@ -520,35 +520,35 @@ GPUdi() std::array<float, 3> TPCFastSpaceChargeCorrection::getCorrectionLocal(in
   const SplineType& spline = getSpline(sector, row);
   const float* splineData = getSplineData(sector, row);
 
-  auto [gridU, gridV, scale] = convLocalToGrid(sector, row, y, z);
+  auto val = convLocalToGrid(sector, row, y, z);
 
   float dxyz[3];
-  spline.interpolateAtU(splineData, gridU, gridV, dxyz);
+  spline.interpolateAtU(splineData, val[0], val[1], dxyz);
 
-  float dx = scale * GPUCommonMath::Clamp(dxyz[0], info.minCorr[0], info.maxCorr[0]);
-  float dy = scale * GPUCommonMath::Clamp(dxyz[1], info.minCorr[1], info.maxCorr[1]);
-  float dz = scale * GPUCommonMath::Clamp(dxyz[2], info.minCorr[2], info.maxCorr[2]);
+  float dx = val[2] * GPUCommonMath::Clamp(dxyz[0], info.minCorr[0], info.maxCorr[0]);
+  float dy = val[2] * GPUCommonMath::Clamp(dxyz[1], info.minCorr[1], info.maxCorr[1]);
+  float dz = val[2] * GPUCommonMath::Clamp(dxyz[2], info.minCorr[2], info.maxCorr[2]);
   return {dx, dy, dz};
 }
 
 GPUdi() float TPCFastSpaceChargeCorrection::getCorrectionXatRealYZ(int32_t sector, int32_t row, float realY, float realZ) const
 {
   const auto& info = getSectorRowInfo(sector, row);
-  auto [gridU, gridV, scale] = convRealLocalToGrid(sector, row, realY, realZ);
+  auto val = convRealLocalToGrid(sector, row, realY, realZ);
   float dx = 0;
-  getSplineInvX(sector, row).interpolateAtU(getSplineDataInvX(sector, row), gridU, gridV, &dx);
-  dx = scale * GPUCommonMath::Clamp(dx, info.minCorr[0], info.maxCorr[0]);
+  getSplineInvX(sector, row).interpolateAtU(getSplineDataInvX(sector, row), val[0], val[1], &dx);
+  dx = val[2] * GPUCommonMath::Clamp(dx, info.minCorr[0], info.maxCorr[0]);
   return dx;
 }
 
 GPUdi() std::array<float, 2> TPCFastSpaceChargeCorrection::getCorrectionYZatRealYZ(int32_t sector, int32_t row, float realY, float realZ) const
 {
-  auto [gridU, gridV, scale] = convRealLocalToGrid(sector, row, realY, realZ);
+  auto val = convRealLocalToGrid(sector, row, realY, realZ);
   const auto& info = getSectorRowInfo(sector, row);
   float dyz[2];
-  getSplineInvYZ(sector, row).interpolateAtU(getSplineDataInvYZ(sector, row), gridU, gridV, dyz);
-  dyz[0] = scale * GPUCommonMath::Clamp(dyz[0], info.minCorr[1], info.maxCorr[1]);
-  dyz[1] = scale * GPUCommonMath::Clamp(dyz[1], info.minCorr[2], info.maxCorr[2]);
+  getSplineInvYZ(sector, row).interpolateAtU(getSplineDataInvYZ(sector, row), val[0], val[1], dyz);
+  dyz[0] = val[2] * GPUCommonMath::Clamp(dyz[0], info.minCorr[1], info.maxCorr[1]);
+  dyz[1] = val[2] * GPUCommonMath::Clamp(dyz[1], info.minCorr[2], info.maxCorr[2]);
   return {dyz[0], dyz[1]};
 }
 
diff --git a/GPU/TPCFastTransformation/TPCFastTransform.h b/GPU/TPCFastTransformation/TPCFastTransform.h
index e26f501140cad..91e1805253209 100644
--- a/GPU/TPCFastTransformation/TPCFastTransform.h
+++ b/GPU/TPCFastTransformation/TPCFastTransform.h
@@ -348,17 +348,17 @@ class TPCFastTransform : public FlatObject
 GPUdi() void TPCFastTransform::convPadTimeToLocal(int32_t sector, int32_t row, float pad, float time, float& y, float& z, float vertexTime) const
 {
   float l = (time - mT0 - vertexTime) * mVdrift; // drift length [cm]
-  const auto local = getGeometry().convPadDriftLengthToLocal(sector, row, pad, l);
-  y = local[0];
-  z = local[1];
+  const auto localval = getGeometry().convPadDriftLengthToLocal(sector, row, pad, l);
+  y = localval[0];
+  z = localval[1];
 }
 
 GPUdi() void TPCFastTransform::convPadTimeToLocalInTimeFrame(int32_t sector, int32_t row, float pad, float time, float& y, float& z, float maxTimeBin) const
 {
   float l = (time - mT0 - maxTimeBin) * mVdrift; // drift length [cm]
-  const auto local = getGeometry().convPadDriftLengthToLocal(sector, row, pad, l);
-  y = local[0];
-  z = local[1];
+  const auto localval = getGeometry().convPadDriftLengthToLocal(sector, row, pad, l);
+  y = localval[0];
+  z = localval[1];
 }
 
 // ----------------------------------------------------------------------
@@ -423,22 +423,22 @@ GPUdi() void TPCFastTransform::TransformLocal(int32_t sector, int32_t row, float
       dz = corrLocal[2];
       if (ref) {
         if ((scale > 0.f) && (scaleMode == 0)) { // scaling was requested
-          auto [dxRef, dyRef, dzRef] = ref->mCorrection.getCorrectionLocal(sector, row, y, z);
-          dx = (dx - dxRef) * scale + dxRef;
-          dy = (dy - dyRef) * scale + dyRef;
-          dz = (dz - dzRef) * scale + dzRef;
+          auto val = ref->mCorrection.getCorrectionLocal(sector, row, y, z);
+          dx = (dx - val[0]) * scale + val[0];
+          dy = (dy - val[1]) * scale + val[1];
+          dz = (dz - val[2]) * scale + val[2];
         } else if ((scale != 0.f) && ((scaleMode == 1) || (scaleMode == 2))) {
-          auto [dxRef, dyRef, dzRef] = ref->mCorrection.getCorrectionLocal(sector, row, y, z);
-          dx = dxRef * scale + dx;
-          dy = dyRef * scale + dy;
-          dz = dzRef * scale + dz;
+          auto val = ref->mCorrection.getCorrectionLocal(sector, row, y, z);
+          dx = val[0] * scale + dx;
+          dy = val[1] * scale + dy;
+          dz = val[2] * scale + dz;
         }
       }
       if (ref2 && (scale2 != 0)) {
-        auto [dxRef, dyRef, dzRef] = ref2->mCorrection.getCorrectionLocal(sector, row, y, z);
-        dx = dxRef * scale2 + dx;
-        dy = dyRef * scale2 + dy;
-        dz = dzRef * scale2 + dz;
+        auto val = ref2->mCorrection.getCorrectionLocal(sector, row, y, z);
+        dx = val[0] * scale2 + dx;
+        dy = val[1] * scale2 + dy;
+        dz = val[2] * scale2 + dz;
       }
     }
   }
@@ -601,9 +601,9 @@ GPUdi() void TPCFastTransform::TransformIdeal(int32_t sector, int32_t row, float
 
   x = getGeometry().getRowInfo(row).x;
   float driftLength = (time - mT0 - vertexTime) * mVdrift; // drift length cm
-  const auto local = getGeometry().convPadDriftLengthToLocal(sector, row, pad, driftLength);
-  y = local[0];
-  z = local[1];
+  const auto localval = getGeometry().convPadDriftLengthToLocal(sector, row, pad, driftLength);
+  y = localval[0];
+  z = localval[1];
 }
 
 GPUdi() float TPCFastTransform::convTimeToZinTimeFrame(int32_t sector, float time, float maxTimeBin) const
@@ -716,18 +716,18 @@ GPUdi() void TPCFastTransform::InverseTransformYZtoNominalYZ(int32_t sector, int
 
     if (ref) { // scaling was requested
       if (scaleMode == 0 && scale > 0.f) {
-        const auto [dyRef, dzRef] = ref->mCorrection.getCorrectionYZatRealYZ(sector, row, realY, realZ);
-        dy = (dy - dyRef) * scale + dyRef;
-        dz = (dz - dzRef) * scale + dzRef;
+        const auto val = ref->mCorrection.getCorrectionYZatRealYZ(sector, row, realY, realZ);
+        dy = (dy - val[0]) * scale + val[0];
+        dz = (dz - val[1]) * scale + val[1];
       } else if ((scale != 0) && ((scaleMode == 1) || (scaleMode == 2))) {
-        const auto [dyRef, dzRef] = ref->mCorrection.getCorrectionYZatRealYZ(sector, row, realY, realZ);
-        dy = dyRef * scale + dy;
-        dz = dzRef * scale + dz;
+        const auto val = ref->mCorrection.getCorrectionYZatRealYZ(sector, row, realY, realZ);
+        dy = val[0] * scale + dy;
+        dz = val[1] * scale + dz;
       }
       if (ref2 && (scale2 != 0)) {
-        const auto [dyRef, dzRef] = ref2->mCorrection.getCorrectionYZatRealYZ(sector, row, realY, realZ);
-        dy = dyRef * scale2 + dy;
-        dz = dzRef * scale2 + dz;
+        const auto val = ref2->mCorrection.getCorrectionYZatRealYZ(sector, row, realY, realZ);
+        dy = val[0] * scale2 + dy;
+        dz = val[1] * scale2 + dz;
       }
     }
   }