Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,6 @@ class TimeFrameGPU : public TimeFrame
void setDevicePropagator(const o2::base::PropagatorImpl<float>*) override;

// Host-specific getters
gsl::span<int> getHostNTracklets(const int chunkId);
gsl::span<int> getHostNCells(const int chunkId);
gsl::span<int, nLayers - 1> getNTracklets() { return mNTracklets; }
gsl::span<int, nLayers - 2> getNCells() { return mNCells; }

Expand Down
14 changes: 7 additions & 7 deletions Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu
Original file line number Diff line number Diff line change
Expand Up @@ -341,9 +341,9 @@ void TimeFrameGPU<nLayers>::createCellsLUTDevice()
{
START_GPU_STREAM_TIMER(mGpuStreams[0].get(), "creating cells LUTs");
for (auto iLayer{0}; iLayer < nLayers - 2; ++iLayer) {
LOGP(debug, "gpu-transfer: creating cell LUT for {} elements on layer {}, for {} MB.", mTracklets[iLayer].size() + 1, iLayer, (mTracklets[iLayer].size() + 1) * sizeof(int) / MB);
allocMemAsync(reinterpret_cast<void**>(&mCellsLUTDevice[iLayer]), (mTracklets[iLayer].size() + 1) * sizeof(int), nullptr, getExtAllocator());
checkGPUError(cudaMemsetAsync(mCellsLUTDevice[iLayer], 0, (mTracklets[iLayer].size() + 1) * sizeof(int), mGpuStreams[0].get()));
LOGP(debug, "gpu-transfer: creating cell LUT for {} elements on layer {}, for {} MB.", mNTracklets[iLayer] + 1, iLayer, (mNTracklets[iLayer] + 1) * sizeof(int) / MB);
allocMemAsync(reinterpret_cast<void**>(&mCellsLUTDevice[iLayer]), (mNTracklets[iLayer] + 1) * sizeof(int), nullptr, getExtAllocator());
checkGPUError(cudaMemsetAsync(mCellsLUTDevice[iLayer], 0, (mNTracklets[iLayer] + 1) * sizeof(int), mGpuStreams[0].get()));
}
allocMemAsync(reinterpret_cast<void**>(&mCellsLUTDeviceArray), (nLayers - 2) * sizeof(int*), nullptr, getExtAllocator());
checkGPUError(cudaMemcpyAsync(mCellsLUTDeviceArray, mCellsLUTDevice.data(), mCellsLUTDevice.size() * sizeof(int*), cudaMemcpyHostToDevice, mGpuStreams[0].get()));
Expand All @@ -355,7 +355,7 @@ void TimeFrameGPU<nLayers>::createCellsBuffers(const int layer)
{
START_GPU_STREAM_TIMER(mGpuStreams[0].get(), "creating cells buffers");
mNCells[layer] = 0;
checkGPUError(cudaMemcpyAsync(&mNCells[layer], mCellsLUTDevice[layer] + mTracklets[layer].size(), sizeof(int), cudaMemcpyDeviceToHost));
checkGPUError(cudaMemcpyAsync(&mNCells[layer], mCellsLUTDevice[layer] + mNTracklets[layer], sizeof(int), cudaMemcpyDeviceToHost));
LOGP(debug, "gpu-transfer: creating cell buffer for {} elements on layer {}, for {} MB.", mNCells[layer], layer, mNCells[layer] * sizeof(CellSeed) / MB);
allocMemAsync(reinterpret_cast<void**>(&mCellsDevice[layer]), mNCells[layer] * sizeof(CellSeed), nullptr, getExtAllocator());

Expand Down Expand Up @@ -446,9 +446,9 @@ void TimeFrameGPU<nLayers>::downloadCellsLUTDevice()
{
START_GPU_STREAM_TIMER(mGpuStreams[0].get(), "downloading cell luts");
for (auto iLayer{0}; iLayer < nLayers - 3; ++iLayer) {
LOGP(debug, "gpu-transfer: downloading cells lut on layer {} for {} elements", iLayer, (mTracklets[iLayer + 1].size() + 1));
mCellsLookupTable[iLayer].resize(mTracklets[iLayer + 1].size() + 1);
checkGPUError(cudaMemcpyAsync(mCellsLookupTable[iLayer].data(), mCellsLUTDevice[iLayer + 1], (mTracklets[iLayer + 1].size() + 1) * sizeof(int), cudaMemcpyDeviceToHost, mGpuStreams[0].get()));
LOGP(debug, "gpu-transfer: downloading cells lut on layer {} for {} elements", iLayer, (mNTracklets[iLayer + 1] + 1));
mCellsLookupTable[iLayer].resize(mNTracklets[iLayer + 1] + 1);
checkGPUError(cudaMemcpyAsync(mCellsLookupTable[iLayer].data(), mCellsLUTDevice[iLayer + 1], (mNTracklets[iLayer + 1] + 1) * sizeof(int), cudaMemcpyDeviceToHost, mGpuStreams[0].get()));
}
STOP_GPU_STREAM_TIMER(mGpuStreams[0].get());
}
Expand Down
6 changes: 2 additions & 4 deletions Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ int TrackerTraitsGPU<nLayers>::getTFNumberOfClusters() const
template <int nLayers>
int TrackerTraitsGPU<nLayers>::getTFNumberOfTracklets() const
{
return mTimeFrameGPU->getNumberOfTracklets();
return std::accumulate(mTimeFrameGPU->getNTracklets().begin(), mTimeFrameGPU->getNTracklets().end(), 0);
}

template <int nLayers>
Expand All @@ -91,7 +91,7 @@ template <int nLayers>
void TrackerTraitsGPU<nLayers>::computeTrackletsHybrid(const int iteration, int iROFslice, int iVertex)
{
auto& conf = o2::its::ITSGpuTrackingParamConfig::Instance();
TrackerTraits::computeLayerTracklets(iteration, iROFslice, iVertex);
// TrackerTraits::computeLayerTracklets(iteration, iROFslice, iVertex);
mTimeFrameGPU->createTrackletsLUTDevice(iteration);

const Vertex diamondVert({mTrkParams[iteration].Diamond[0], mTrkParams[iteration].Diamond[1], mTrkParams[iteration].Diamond[2]}, {25.e-6f, 0.f, 0.f, 25.e-6f, 0.f, 36.f}, 1, 1.f);
Expand Down Expand Up @@ -169,10 +169,8 @@ void TrackerTraitsGPU<nLayers>::computeCellsHybrid(const int iteration)

for (int iLayer = 0; iLayer < mTrkParams[iteration].CellsPerRoad(); ++iLayer) {
if (!mTimeFrameGPU->getNTracklets()[iLayer + 1] || !mTimeFrameGPU->getNTracklets()[iLayer]) {
LOGP(info, "continuing here");
continue;
}
LOGP(info, "+> {}", mTimeFrameGPU->getNTracklets()[iLayer]);
const int currentLayerTrackletsNum{static_cast<int>(mTimeFrameGPU->getNTracklets()[iLayer])};
countCellsHandler(mTimeFrameGPU->getDeviceArrayClusters(),
mTimeFrameGPU->getDeviceArrayUnsortedClusters(),
Expand Down
1 change: 0 additions & 1 deletion Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -863,7 +863,6 @@ void computeTrackletsInROFsHandler(const IndexTableUtils* utils,
thrust::sort(thrust::device, tracklets_ptr, tracklets_ptr + nTracklets[iLayer], gpu::sort_tracklets());
auto unique_end = thrust::unique(thrust::device, tracklets_ptr, tracklets_ptr + nTracklets[iLayer], gpu::equal_tracklets());
nTracklets[iLayer] = unique_end - tracklets_ptr;
LOGP(info, "=> {} {}", nTracklets[iLayer], unique_end - tracklets_ptr);
if (iLayer > 0) {
gpuCheckError(cudaMemset(trackletsLUTsHost[iLayer], 0, nClusters[iLayer] * sizeof(int)));
gpu::compileTrackletsLookupTableKernel<<<nBlocks, nThreads>>>(spanTracklets[iLayer], trackletsLUTsHost[iLayer], nTracklets[iLayer]);
Expand Down
1 change: 0 additions & 1 deletion Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,6 @@ void TrackerTraits::computeLayerTracklets(const int iteration, int iROFslice, in
/// Compute LUT
std::exclusive_scan(lut.begin(), lut.end(), lut.begin(), 0);
lut.push_back(trkl.size());
LOGP(info, "CPU layer {} -> old size: {} - new size: {}", iLayer, oldsize, trkl.size());
}
/// Layer 0 is done outside the loop
std::sort(tf->getTracklets()[0].begin(), tf->getTracklets()[0].end(), [](const Tracklet& a, const Tracklet& b) {
Expand Down
Loading