Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 9 additions & 18 deletions Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,23 +62,18 @@ class TimeFrameGPU : public TimeFrame<nLayers>
void createCellsDevice();
void createCellsLUTDevice();
void createNeighboursIndexTablesDevice();
void createNeighboursDevice(const unsigned int layer, const unsigned int nNeighbours);
void createNeighboursDevice(const unsigned int layer, std::vector<std::pair<int, int>>& neighbours);
void createNeighboursDevice(const unsigned int layer);
void createNeighboursLUTDevice(const int, const unsigned int);
void createNeighboursDeviceArray();
void createTrackITSExtDevice(bounded_vector<CellSeed>&);
void downloadTrackITSExtDevice(bounded_vector<CellSeed>&);
void downloadCellsNeighboursDevice(std::vector<bounded_vector<std::pair<int, int>>>&, const int);
void downloadNeighboursLUTDevice(bounded_vector<int>&, const int);
void downloadCellsDevice();
void downloadCellsLUTDevice();
void unregisterRest();
template <Task task>
auto& getStream(const size_t stream)
{
return mGpuStreams[stream];
}
auto& getStream(const size_t stream) { return mGpuStreams[stream]; }
auto& getStreams() { return mGpuStreams; }
void syncStream(const size_t stream);
void syncStreams();
virtual void wipe() final;

/// interface
Expand All @@ -99,7 +94,7 @@ class TimeFrameGPU : public TimeFrame<nLayers>
gpuPair<int, int>* getDeviceNeighbourPairs(const int layer) { return mNeighbourPairsDevice[layer]; }
std::array<int*, nLayers - 2>& getDeviceNeighboursAll() { return mNeighboursDevice; }
int* getDeviceNeighbours(const int layer) { return mNeighboursDevice[layer]; }
int** getDeviceNeighboursArray() { return mNeighboursDeviceArray; }
int** getDeviceNeighboursArray() { return mNeighboursDevice.data(); }
TrackingFrameInfo* getDeviceTrackingFrameInfo(const int);
const TrackingFrameInfo** getDeviceArrayTrackingFrameInfo() const { return mTrackingFrameInfoDeviceArray; }
const Cluster** getDeviceArrayClusters() const { return mClustersDeviceArray; }
Expand All @@ -108,11 +103,11 @@ class TimeFrameGPU : public TimeFrame<nLayers>
std::vector<unsigned int> getClusterSizes();
const unsigned char** getDeviceArrayUsedClusters() const { return mUsedClustersDeviceArray; }
const int** getDeviceROframeClusters() const { return mROFrameClustersDeviceArray; }
Tracklet** getDeviceArrayTracklets() { return mTrackletsDeviceArray; }
Tracklet** getDeviceArrayTracklets() { return mTrackletsDevice.data(); }
int** getDeviceArrayTrackletsLUT() const { return mTrackletsLUTDeviceArray; }
int** getDeviceArrayCellsLUT() const { return mCellsLUTDeviceArray; }
int** getDeviceArrayNeighboursCellLUT() const { return mNeighboursCellLUTDeviceArray; }
CellSeed** getDeviceArrayCells() const { return mCellsDeviceArray; }
CellSeed** getDeviceArrayCells() { return mCellsDevice.data(); }
CellSeed* getDeviceTrackSeeds() { return mTrackSeedsDevice; }
o2::track::TrackParCovF** getDeviceArrayTrackSeeds() { return mCellSeedsDeviceArray; }
float** getDeviceArrayTrackSeedsChi2() { return mCellSeedsChi2DeviceArray; }
Expand Down Expand Up @@ -140,7 +135,8 @@ class TimeFrameGPU : public TimeFrame<nLayers>
int getNumberOfNeighbours() const final;

private:
void allocMemAsync(void**, size_t, Stream&, bool); // Abstract owned and unowned memory allocations
void allocMemAsync(void**, size_t, Stream&, bool); // Abstract owned and unowned memory allocations on specific stream
void allocMem(void**, size_t, bool); // Abstract owned and unowned memory allocations on default stream
bool mHostRegistered = false;
TimeFrameGPUParameters mGpuParams;

Expand All @@ -167,7 +163,6 @@ class TimeFrameGPU : public TimeFrame<nLayers>
const unsigned char** mUsedClustersDeviceArray;
const int** mROFrameClustersDeviceArray;
std::array<Tracklet*, nLayers - 1> mTrackletsDevice;
Tracklet** mTrackletsDeviceArray;
std::array<int*, nLayers - 1> mTrackletsLUTDevice;
std::array<int*, nLayers - 2> mCellsLUTDevice;
std::array<int*, nLayers - 3> mNeighboursLUTDevice;
Expand All @@ -179,7 +174,6 @@ class TimeFrameGPU : public TimeFrame<nLayers>
std::array<CellSeed*, nLayers - 2> mCellsDevice;
std::array<int*, nLayers - 2> mNeighboursIndexTablesDevice;
CellSeed* mTrackSeedsDevice;
CellSeed** mCellsDeviceArray;
std::array<o2::track::TrackParCovF*, nLayers - 2> mCellSeedsDevice;
o2::track::TrackParCovF** mCellSeedsDeviceArray;
std::array<float*, nLayers - 2> mCellSeedsChi2Device;
Expand All @@ -189,14 +183,11 @@ class TimeFrameGPU : public TimeFrame<nLayers>
TrackITSExt* mTrackITSExtDevice;
std::array<gpuPair<int, int>*, nLayers - 2> mNeighbourPairsDevice;
std::array<int*, nLayers - 2> mNeighboursDevice;
int** mNeighboursDeviceArray;
std::array<TrackingFrameInfo*, nLayers> mTrackingFrameInfoDevice;
const TrackingFrameInfo** mTrackingFrameInfoDeviceArray;

// State
Streams mGpuStreams;
size_t mAvailMemGB;
bool mFirstInit = true;

// Temporary buffer for storing output tracks from GPU tracking
bounded_vector<TrackITSExt> mTrackITSExt;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,10 @@ namespace gpu

#ifdef GPUCA_GPUCODE // GPUg() global kernels must only when compiled by GPU compiler

GPUdi() int4 getEmptyBinsRect() { return int4{0, 0, 0, 0}; }
GPUdi() int4 getEmptyBinsRect()
{
return int4{0, 0, 0, 0};
}

GPUd() bool fitTrack(TrackITSExt& track,
int start,
Expand Down Expand Up @@ -83,6 +86,7 @@ void countTrackletsInROFsHandler(const IndexTableUtils* utils,
bounded_vector<float>& resolutions,
std::vector<float>& radii,
bounded_vector<float>& mulScatAng,
o2::its::ExternalAllocator* alloc,
const int nBlocks,
const int nThreads,
gpu::Streams& streams);
Expand Down Expand Up @@ -117,6 +121,7 @@ void computeTrackletsInROFsHandler(const IndexTableUtils* utils,
bounded_vector<float>& resolutions,
std::vector<float>& radii,
bounded_vector<float>& mulScatAng,
o2::its::ExternalAllocator* alloc,
const int nBlocks,
const int nThreads,
gpu::Streams& streams);
Expand All @@ -136,8 +141,10 @@ void countCellsHandler(const Cluster** sortedClusters,
const float maxChi2ClusterAttachment,
const float cellDeltaTanLambdaSigma,
const float nSigmaCut,
o2::its::ExternalAllocator* alloc,
const int nBlocks,
const int nThreads);
const int nThreads,
gpu::Streams& streams);

void computeCellsHandler(const Cluster** sortedClusters,
const Cluster** unsortedClusters,
Expand All @@ -155,23 +162,26 @@ void computeCellsHandler(const Cluster** sortedClusters,
const float cellDeltaTanLambdaSigma,
const float nSigmaCut,
const int nBlocks,
const int nThreads);

unsigned int countCellNeighboursHandler(CellSeed** cellsLayersDevice,
int* neighboursLUTs,
int** cellsLUTs,
gpuPair<int, int>* cellNeighbours,
int* neighboursIndexTable,
const Tracklet** tracklets,
const int deltaROF,
const float maxChi2ClusterAttachment,
const float bz,
const int layerIndex,
const unsigned int nCells,
const unsigned int nCellsNext,
const int maxCellNeighbours,
const int nBlocks,
const int nThreads);
const int nThreads,
gpu::Streams& streams);

void countCellNeighboursHandler(CellSeed** cellsLayersDevice,
int* neighboursLUTs,
int** cellsLUTs,
gpuPair<int, int>* cellNeighbours,
int* neighboursIndexTable,
const Tracklet** tracklets,
const int deltaROF,
const float maxChi2ClusterAttachment,
const float bz,
const int layerIndex,
const unsigned int nCells,
const unsigned int nCellsNext,
const int maxCellNeighbours,
o2::its::ExternalAllocator* alloc,
const int nBlocks,
const int nThreads,
gpu::Stream& stream);

void computeCellNeighboursHandler(CellSeed** cellsLayersDevice,
int* neighboursLUTs,
Expand All @@ -187,11 +197,13 @@ void computeCellNeighboursHandler(CellSeed** cellsLayersDevice,
const unsigned int nCellsNext,
const int maxCellNeighbours,
const int nBlocks,
const int nThreads);
const int nThreads,
gpu::Stream& stream);

int filterCellNeighboursHandler(gpuPair<int, int>*,
int*,
unsigned int,
gpu::Stream&,
o2::its::ExternalAllocator* = nullptr);

template <int nLayers = 7>
Expand All @@ -205,12 +217,12 @@ void processNeighboursHandler(const int startLayer,
gsl::span<int*> neighboursDeviceLUTs,
const TrackingFrameInfo** foundTrackingFrameInfo,
bounded_vector<CellSeed>& seedsHost,
o2::its::ExternalAllocator*,
const float bz,
const float MaxChi2ClusterAttachment,
const float maxChi2NDF,
const o2::base::Propagator* propagator,
const o2::base::PropagatorF::MatCorrType matCorrType,
o2::its::ExternalAllocator* alloc,
const int nBlocks,
const int nThreads);

Expand Down
23 changes: 16 additions & 7 deletions Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/Utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,16 +94,19 @@ class Stream
public:
#if defined(__HIPCC__)
using Handle = hipStream_t;
static constexpr Handle Default = 0;
static constexpr Handle DefaultStream = 0;
static constexpr unsigned int DefaultFlag = hipStreamNonBlocking;
#elif defined(__CUDACC__)
using Handle = cudaStream_t;
static constexpr Handle Default = 0;
static constexpr Handle DefaultStream = 0;
static constexpr unsigned int DefaultFlag = cudaStreamNonBlocking;
#else
using Handle = void*;
static constexpr Handle Default = nullptr;
static constexpr Handle DefaultStream = nullptr;
static constexpr unsigned int DefaultFlag = 0;
#endif

Stream(unsigned int flags = 0)
Stream(unsigned int flags = DefaultFlag)
{
#if defined(__HIPCC__)
GPUChkErrS(hipStreamCreateWithFlags(&mHandle, flags));
Expand All @@ -115,7 +118,7 @@ class Stream
Stream(Handle h) : mHandle(h) {}
~Stream()
{
if (mHandle != Default) {
if (mHandle != DefaultStream) {
#if defined(__HIPCC__)
GPUChkErrS(hipStreamDestroy(mHandle));
#elif defined(__CUDACC__)
Expand All @@ -124,7 +127,7 @@ class Stream
}
}

operator bool() const { return mHandle != Default; }
operator bool() const { return mHandle != DefaultStream; }
const Handle& get() { return mHandle; }
void sync() const
{
Expand All @@ -136,7 +139,7 @@ class Stream
}

private:
Handle mHandle{Default};
Handle mHandle{DefaultStream};
};
static_assert(sizeof(Stream) == sizeof(void*), "Stream type must match pointer type!");

Expand All @@ -150,6 +153,12 @@ class Streams
void clear() { mStreams.clear(); }
auto& operator[](size_t i) { return mStreams[i % mStreams.size()]; }
void push_back(const Stream& stream) { mStreams.push_back(stream); }
void sync()
{
for (auto& s : mStreams) {
s.sync();
}
}

private:
std::vector<Stream> mStreams;
Expand Down
Loading