Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 11 additions & 9 deletions Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ namespace o2::its::gpu
template <int nLayers = 7>
class TimeFrameGPU : public TimeFrame<nLayers>
{
using typename TimeFrame<nLayers>::CellSeedN;

public:
TimeFrameGPU();
~TimeFrameGPU() = default;
Expand Down Expand Up @@ -64,7 +66,7 @@ class TimeFrameGPU : public TimeFrame<nLayers>
void loadTrackSeedsDevice();
void loadTrackSeedsChi2Device();
void loadRoadsDevice();
void loadTrackSeedsDevice(bounded_vector<CellSeed>&);
void loadTrackSeedsDevice(bounded_vector<CellSeedN>&);
void createTrackletsBuffers(const int);
void createTrackletsBuffersArray(const int);
void createCellsBuffers(const int);
Expand All @@ -75,8 +77,8 @@ class TimeFrameGPU : public TimeFrame<nLayers>
void createNeighboursIndexTablesDevice(const int);
void createNeighboursDevice(const unsigned int layer);
void createNeighboursLUTDevice(const int, const unsigned int);
void createTrackITSExtDevice(bounded_vector<CellSeed>&);
void downloadTrackITSExtDevice(bounded_vector<CellSeed>&);
void createTrackITSExtDevice(bounded_vector<CellSeedN>&);
void downloadTrackITSExtDevice(bounded_vector<CellSeedN>&);
void downloadCellsNeighboursDevice(std::vector<bounded_vector<std::pair<int, int>>>&, const int);
void downloadNeighboursLUTDevice(bounded_vector<int>&, const int);
void downloadCellsDevice();
Expand Down Expand Up @@ -125,8 +127,8 @@ class TimeFrameGPU : public TimeFrame<nLayers>
int** getDeviceArrayTrackletsLUT() const { return mTrackletsLUTDeviceArray; }
int** getDeviceArrayCellsLUT() const { return mCellsLUTDeviceArray; }
int** getDeviceArrayNeighboursCellLUT() const { return mNeighboursCellLUTDeviceArray; }
CellSeed** getDeviceArrayCells() { return mCellsDeviceArray; }
CellSeed* getDeviceTrackSeeds() { return mTrackSeedsDevice; }
CellSeedN** getDeviceArrayCells() { return mCellsDeviceArray; }
CellSeedN* getDeviceTrackSeeds() { return mTrackSeedsDevice; }
o2::track::TrackParCovF** getDeviceArrayTrackSeeds() { return mCellSeedsDeviceArray; }
float** getDeviceArrayTrackSeedsChi2() { return mCellSeedsChi2DeviceArray; }
int* getDeviceNeighboursIndexTables(const int layer) { return mNeighboursIndexTablesDevice[layer]; }
Expand All @@ -145,7 +147,7 @@ class TimeFrameGPU : public TimeFrame<nLayers>
gsl::span<int*> getDeviceTrackletsLUTs() { return mTrackletsLUTDevice; }
gsl::span<int*> getDeviceCellLUTs() { return mCellsLUTDevice; }
gsl::span<Tracklet*> getDeviceTracklets() { return mTrackletsDevice; }
gsl::span<CellSeed*> getDeviceCells() { return mCellsDevice; }
gsl::span<CellSeedN*> getDeviceCells() { return mCellsDevice; }

// Overridden getters
int getNumberOfTracklets() const final;
Expand Down Expand Up @@ -189,10 +191,10 @@ class TimeFrameGPU : public TimeFrame<nLayers>
int** mNeighboursCellDeviceArray{nullptr};
int** mNeighboursCellLUTDeviceArray{nullptr};
int** mTrackletsLUTDeviceArray{nullptr};
std::array<CellSeed*, nLayers - 2> mCellsDevice;
CellSeed** mCellsDeviceArray;
std::array<CellSeedN*, nLayers - 2> mCellsDevice;
CellSeedN** mCellsDeviceArray;
std::array<int*, nLayers - 3> mNeighboursIndexTablesDevice;
CellSeed* mTrackSeedsDevice{nullptr};
CellSeedN* mTrackSeedsDevice{nullptr};
std::array<o2::track::TrackParCovF*, nLayers - 2> mCellSeedsDevice;
o2::track::TrackParCovF** mCellSeedsDeviceArray;
std::array<float*, nLayers - 2> mCellSeedsChi2Device;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class TrackerTraitsGPU final : public TrackerTraits<nLayers>

private:
IndexTableUtils* mDeviceIndexTableUtils;
gpu::TimeFrameGPU<7>* mTimeFrameGPU;
gpu::TimeFrameGPU<nLayers>* mTimeFrameGPU;
};

} // namespace o2::its
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,51 +16,21 @@
#include <gsl/gsl>

#include "ITStracking/BoundedAllocator.h"
#include "ITStracking/Definitions.h"
#include "ITStrackingGPU/Utils.h"
#include "DetectorsBase/Propagator.h"
#include "GPUCommonDef.h"

namespace o2::its
{
template <int>
class CellSeed;
class TrackingFrameInfo;
class Tracklet;
class IndexTableUtils;
class Cluster;
class TrackITSExt;
class ExternalAllocator;
namespace gpu
{

#ifdef GPUCA_GPUCODE // GPUg() global kernels must only when compiled by GPU compiler

GPUdii() int4 getEmptyBinsRect()
{
return int4{0, 0, 0, 0};
}

GPUdii() bool fitTrack(TrackITSExt& track,
int start,
int end,
int step,
float chi2clcut,
float chi2ndfcut,
float maxQoverPt,
int nCl,
float Bz,
TrackingFrameInfo** tfInfos,
const o2::base::Propagator* prop,
o2::base::PropagatorF::MatCorrType matCorrType = o2::base::PropagatorImpl<float>::MatCorrType::USEMatCorrNONE);

template <int nLayers = 7>
GPUg() void fitTrackSeedsKernel(CellSeed* trackSeeds,
const TrackingFrameInfo** foundTrackingFrameInfo,
o2::its::TrackITSExt* tracks,
const float* minPts,
const unsigned int nSeeds,
const float Bz,
const int startLevel,
float maxChi2ClusterAttachment,
float maxChi2NDF,
const o2::base::Propagator* propagator,
const o2::base::PropagatorF::MatCorrType matCorrType = o2::base::PropagatorF::MatCorrType::USEMatCorrLUT);
#endif
} // namespace gpu

template <int nLayers = 7>
void countTrackletsInROFsHandler(const IndexTableUtils* utils,
Expand Down Expand Up @@ -131,14 +101,15 @@ void computeTrackletsInROFsHandler(const IndexTableUtils* utils,
const int nThreads,
gpu::Streams& streams);

template <int nLayers>
void countCellsHandler(const Cluster** sortedClusters,
const Cluster** unsortedClusters,
const TrackingFrameInfo** tfInfo,
Tracklet** tracklets,
int** trackletsLUT,
const int nTracklets,
const int layer,
CellSeed* cells,
CellSeed<nLayers>* cells,
int** cellsLUTsDeviceArray,
int* cellsLUTsHost,
const int deltaROF,
Expand All @@ -151,14 +122,15 @@ void countCellsHandler(const Cluster** sortedClusters,
const int nThreads,
gpu::Streams& streams);

template <int nLayers>
void computeCellsHandler(const Cluster** sortedClusters,
const Cluster** unsortedClusters,
const TrackingFrameInfo** tfInfo,
Tracklet** tracklets,
int** trackletsLUT,
const int nTracklets,
const int layer,
CellSeed* cells,
CellSeed<nLayers>* cells,
int** cellsLUTsDeviceArray,
int* cellsLUTsHost,
const int deltaROF,
Expand All @@ -170,7 +142,8 @@ void computeCellsHandler(const Cluster** sortedClusters,
const int nThreads,
gpu::Streams& streams);

void countCellNeighboursHandler(CellSeed** cellsLayersDevice,
template <int nLayers>
void countCellNeighboursHandler(CellSeed<nLayers>** cellsLayersDevice,
int* neighboursLUTs,
int** cellsLUTs,
gpuPair<int, int>* cellNeighbours,
Expand All @@ -188,7 +161,8 @@ void countCellNeighboursHandler(CellSeed** cellsLayersDevice,
const int nThreads,
gpu::Stream& stream);

void computeCellNeighboursHandler(CellSeed** cellsLayersDevice,
template <int nLayers>
void computeCellNeighboursHandler(CellSeed<nLayers>** cellsLayersDevice,
int* neighboursLUTs,
int** cellsLUTs,
gpuPair<int, int>* cellNeighbours,
Expand All @@ -214,14 +188,14 @@ int filterCellNeighboursHandler(gpuPair<int, int>*,
template <int nLayers = 7>
void processNeighboursHandler(const int startLayer,
const int startLevel,
CellSeed** allCellSeeds,
CellSeed* currentCellSeeds,
CellSeed<nLayers>** allCellSeeds,
CellSeed<nLayers>* currentCellSeeds,
std::array<int, nLayers - 2>& nCells,
const unsigned char** usedClusters,
std::array<int*, nLayers - 2>& neighbours,
gsl::span<int*> neighboursDeviceLUTs,
const TrackingFrameInfo** foundTrackingFrameInfo,
bounded_vector<CellSeed>& seedsHost,
bounded_vector<CellSeed<nLayers>>& seedsHost,
const float bz,
const float MaxChi2ClusterAttachment,
const float maxChi2NDF,
Expand All @@ -231,7 +205,8 @@ void processNeighboursHandler(const int startLayer,
const int nBlocks,
const int nThreads);

void trackSeedHandler(CellSeed* trackSeeds,
template <int nLayers = 7>
void trackSeedHandler(CellSeed<nLayers>* trackSeeds,
const TrackingFrameInfo** foundTrackingFrameInfo,
o2::its::TrackITSExt* tracks,
std::vector<float>& minPtsHost,
Expand Down
34 changes: 17 additions & 17 deletions Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu
Original file line number Diff line number Diff line change
Expand Up @@ -355,11 +355,11 @@ void TimeFrameGPU<nLayers>::loadCellsDevice()
{
GPUTimer timer(mGpuStreams, "loading cell seeds", nLayers - 2);
for (auto iLayer{0}; iLayer < nLayers - 2; ++iLayer) {
GPULog("gpu-transfer: loading {} cell seeds on layer {}, for {:.2f} MB.", this->mCells[iLayer].size(), iLayer, this->mCells[iLayer].size() * sizeof(CellSeed) / constants::MB);
allocMemAsync(reinterpret_cast<void**>(&mCellsDevice[iLayer]), this->mCells[iLayer].size() * sizeof(CellSeed), mGpuStreams[iLayer], this->getExtAllocator());
GPULog("gpu-transfer: loading {} cell seeds on layer {}, for {:.2f} MB.", this->mCells[iLayer].size(), iLayer, this->mCells[iLayer].size() * sizeof(CellSeedN) / constants::MB);
allocMemAsync(reinterpret_cast<void**>(&mCellsDevice[iLayer]), this->mCells[iLayer].size() * sizeof(CellSeedN), mGpuStreams[iLayer], this->getExtAllocator());
allocMemAsync(reinterpret_cast<void**>(&mNeighboursIndexTablesDevice[iLayer]), (this->mCells[iLayer].size() + 1) * sizeof(int), mGpuStreams[iLayer], this->getExtAllocator()); // accessory for the neigh. finding.
GPUChkErrS(cudaMemsetAsync(mNeighboursIndexTablesDevice[iLayer], 0, (this->mCells[iLayer].size() + 1) * sizeof(int), mGpuStreams[iLayer].get()));
GPUChkErrS(cudaMemcpyAsync(mCellsDevice[iLayer], this->mCells[iLayer].data(), this->mCells[iLayer].size() * sizeof(CellSeed), cudaMemcpyHostToDevice, mGpuStreams[iLayer].get()));
GPUChkErrS(cudaMemcpyAsync(mCellsDevice[iLayer], this->mCells[iLayer].data(), this->mCells[iLayer].size() * sizeof(CellSeedN), cudaMemcpyHostToDevice, mGpuStreams[iLayer].get()));
}
}

Expand Down Expand Up @@ -387,8 +387,8 @@ void TimeFrameGPU<nLayers>::createCellsBuffersArray(const int iteration)
{
if (!iteration) {
GPUTimer timer("creating cells buffers array");
allocMem(reinterpret_cast<void**>(&mCellsDeviceArray), (nLayers - 2) * sizeof(CellSeed*), this->getExtAllocator());
GPUChkErrS(cudaMemcpy(mCellsDeviceArray, mCellsDevice.data(), mCellsDevice.size() * sizeof(CellSeed*), cudaMemcpyHostToDevice));
allocMem(reinterpret_cast<void**>(&mCellsDeviceArray), (nLayers - 2) * sizeof(CellSeedN*), this->getExtAllocator());
GPUChkErrS(cudaMemcpy(mCellsDeviceArray, mCellsDevice.data(), mCellsDevice.size() * sizeof(CellSeedN*), cudaMemcpyHostToDevice));
}
}

Expand All @@ -399,9 +399,9 @@ void TimeFrameGPU<nLayers>::createCellsBuffers(const int layer)
mNCells[layer] = 0;
GPUChkErrS(cudaMemcpyAsync(&mNCells[layer], mCellsLUTDevice[layer] + mNTracklets[layer], sizeof(int), cudaMemcpyDeviceToHost, mGpuStreams[layer].get()));
mGpuStreams[layer].sync(); // ensure number of cells is correct
GPULog("gpu-transfer: creating cell buffer for {} elements on layer {}, for {:.2f} MB.", mNCells[layer], layer, mNCells[layer] * sizeof(CellSeed) / constants::MB);
allocMemAsync(reinterpret_cast<void**>(&mCellsDevice[layer]), mNCells[layer] * sizeof(CellSeed), mGpuStreams[layer], this->getExtAllocator());
GPUChkErrS(cudaMemcpyAsync(&mCellsDeviceArray[layer], &mCellsDevice[layer], sizeof(CellSeed*), cudaMemcpyHostToDevice, mGpuStreams[layer].get()));
GPULog("gpu-transfer: creating cell buffer for {} elements on layer {}, for {:.2f} MB.", mNCells[layer], layer, mNCells[layer] * sizeof(CellSeedN) / constants::MB);
allocMemAsync(reinterpret_cast<void**>(&mCellsDevice[layer]), mNCells[layer] * sizeof(CellSeedN), mGpuStreams[layer], this->getExtAllocator());
GPUChkErrS(cudaMemcpyAsync(&mCellsDeviceArray[layer], &mCellsDevice[layer], sizeof(CellSeedN*), cudaMemcpyHostToDevice, mGpuStreams[layer].get()));
}

template <int nLayers>
Expand All @@ -426,13 +426,13 @@ void TimeFrameGPU<nLayers>::loadRoadsDevice()
}

template <int nLayers>
void TimeFrameGPU<nLayers>::loadTrackSeedsDevice(bounded_vector<CellSeed>& seeds)
void TimeFrameGPU<nLayers>::loadTrackSeedsDevice(bounded_vector<CellSeedN>& seeds)
{
GPUTimer timer("loading track seeds");
GPULog("gpu-transfer: loading {} track seeds, for {:.2f} MB.", seeds.size(), seeds.size() * sizeof(CellSeed) / constants::MB);
allocMem(reinterpret_cast<void**>(&mTrackSeedsDevice), seeds.size() * sizeof(CellSeed), this->getExtAllocator());
GPUChkErrS(cudaHostRegister(seeds.data(), seeds.size() * sizeof(CellSeed), cudaHostRegisterPortable));
GPUChkErrS(cudaMemcpy(mTrackSeedsDevice, seeds.data(), seeds.size() * sizeof(CellSeed), cudaMemcpyHostToDevice));
GPULog("gpu-transfer: loading {} track seeds, for {:.2f} MB.", seeds.size(), seeds.size() * sizeof(CellSeedN) / constants::MB);
allocMem(reinterpret_cast<void**>(&mTrackSeedsDevice), seeds.size() * sizeof(CellSeedN), this->getExtAllocator());
GPUChkErrS(cudaHostRegister(seeds.data(), seeds.size() * sizeof(CellSeedN), cudaHostRegisterPortable));
GPUChkErrS(cudaMemcpy(mTrackSeedsDevice, seeds.data(), seeds.size() * sizeof(CellSeedN), cudaMemcpyHostToDevice));
}

template <int nLayers>
Expand All @@ -450,7 +450,7 @@ void TimeFrameGPU<nLayers>::createNeighboursDevice(const unsigned int layer)
}

template <int nLayers>
void TimeFrameGPU<nLayers>::createTrackITSExtDevice(bounded_vector<CellSeed>& seeds)
void TimeFrameGPU<nLayers>::createTrackITSExtDevice(bounded_vector<CellSeedN>& seeds)
{
GPUTimer timer("reserving tracks");
mTrackITSExt = bounded_vector<TrackITSExt>(seeds.size(), {}, this->getMemoryPool().get());
Expand All @@ -465,9 +465,9 @@ void TimeFrameGPU<nLayers>::downloadCellsDevice()
{
GPUTimer timer(mGpuStreams, "downloading cells", nLayers - 2);
for (int iLayer{0}; iLayer < nLayers - 2; ++iLayer) {
GPULog("gpu-transfer: downloading {} cells on layer: {}, for {:.2f} MB.", mNCells[iLayer], iLayer, mNCells[iLayer] * sizeof(CellSeed) / constants::MB);
GPULog("gpu-transfer: downloading {} cells on layer: {}, for {:.2f} MB.", mNCells[iLayer], iLayer, mNCells[iLayer] * sizeof(CellSeedN) / constants::MB);
this->mCells[iLayer].resize(mNCells[iLayer]);
GPUChkErrS(cudaMemcpyAsync(this->mCells[iLayer].data(), this->mCellsDevice[iLayer], mNCells[iLayer] * sizeof(CellSeed), cudaMemcpyDeviceToHost, mGpuStreams[iLayer].get()));
GPUChkErrS(cudaMemcpyAsync(this->mCells[iLayer].data(), this->mCellsDevice[iLayer], mNCells[iLayer] * sizeof(CellSeedN), cudaMemcpyDeviceToHost, mGpuStreams[iLayer].get()));
}
}

Expand Down Expand Up @@ -499,7 +499,7 @@ void TimeFrameGPU<nLayers>::downloadNeighboursLUTDevice(bounded_vector<int>& lut
}

template <int nLayers>
void TimeFrameGPU<nLayers>::downloadTrackITSExtDevice(bounded_vector<CellSeed>& seeds)
void TimeFrameGPU<nLayers>::downloadTrackITSExtDevice(bounded_vector<CellSeedN>& seeds)
{
GPUTimer timer("downloading tracks");
GPULog("gpu-transfer: downloading {} tracks, for {:.2f} MB.", mTrackITSExt.size(), mTrackITSExt.size() * sizeof(o2::its::TrackITSExt) / constants::MB);
Expand Down
Loading