Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 41 additions & 8 deletions Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,13 @@ namespace o2::its::gpu
{

template <int nLayers = 7>
class TimeFrameGPU : public TimeFrame<nLayers>
class TimeFrameGPU final : public TimeFrame<nLayers>
{
using typename TimeFrame<nLayers>::CellSeedN;
using typename TimeFrame<nLayers>::IndexTableUtilsN;

public:
TimeFrameGPU();
TimeFrameGPU() = default;
~TimeFrameGPU() = default;

/// Most relevant operations
Expand All @@ -44,13 +44,13 @@ class TimeFrameGPU : public TimeFrame<nLayers>
void loadTrackingFrameInfoDevice(const int, const int);
void createTrackingFrameInfoDeviceArray(const int);
void loadUnsortedClustersDevice(const int, const int);
void createUnsortedClustersDeviceArray(const int);
void createUnsortedClustersDeviceArray(const int, const int = nLayers);
void loadClustersDevice(const int, const int);
void createClustersDeviceArray(const int);
void createClustersDeviceArray(const int, const int = nLayers);
void loadClustersIndexTables(const int, const int);
void createClustersIndexTablesArray(const int iteration);
void createClustersIndexTablesArray(const int);
void createUsedClustersDevice(const int, const int);
void createUsedClustersDeviceArray(const int);
void createUsedClustersDeviceArray(const int, const int = nLayers);
void loadUsedClustersDevice();
void loadROFrameClustersDevice(const int, const int);
void createROFrameClustersDeviceArray(const int);
Expand Down Expand Up @@ -85,6 +85,12 @@ class TimeFrameGPU : public TimeFrame<nLayers>
void downloadCellsDevice();
void downloadCellsLUTDevice();

/// Vertexer
void createVtxTrackletsLUTDevice(const int32_t);
void createVtxTrackletsBuffers(const int32_t);
void createVtxLinesLUTDevice(const int32_t);
void createVtxLinesBuffer(const int32_t);

/// synchronization
auto& getStream(const size_t stream) { return mGpuStreams[stream]; }
auto& getStreams() { return mGpuStreams; }
Expand All @@ -98,6 +104,8 @@ class TimeFrameGPU : public TimeFrame<nLayers>
virtual void wipe() final;

/// interface
virtual bool isGPU() const noexcept final { return true; }
virtual const char* getName() const noexcept { return "GPU"; }
int getNClustersInRofSpan(const int, const int, const int) const;
IndexTableUtilsN* getDeviceIndexTableUtils() { return mIndexTableUtilsDevice; }
int* getDeviceROFramesClusters(const int layer) { return mROFramesClustersDevice[layer]; }
Expand All @@ -122,7 +130,7 @@ class TimeFrameGPU : public TimeFrame<nLayers>
const Cluster** getDeviceArrayUnsortedClusters() const { return mUnsortedClustersDeviceArray; }
const int** getDeviceArrayClustersIndexTables() const { return mClustersIndexTablesDeviceArray; }
std::vector<unsigned int> getClusterSizes();
const unsigned char** getDeviceArrayUsedClusters() const { return mUsedClustersDeviceArray; }
uint8_t** getDeviceArrayUsedClusters() const { return mUsedClustersDeviceArray; }
const int** getDeviceROFrameClusters() const { return mROFramesClustersDeviceArray; }
Tracklet** getDeviceArrayTracklets() { return mTrackletsDeviceArray; }
int** getDeviceArrayTrackletsLUT() const { return mTrackletsLUTDeviceArray; }
Expand All @@ -135,6 +143,19 @@ class TimeFrameGPU : public TimeFrame<nLayers>
int* getDeviceNeighboursIndexTables(const int layer) { return mNeighboursIndexTablesDevice[layer]; }
uint8_t* getDeviceMultCutMask() { return mMultMaskDevice; }

// Vertexer
auto& getDeviceNTrackletsPerROF() const noexcept { return mNTrackletsPerROFDevice; }
auto& getDeviceNTrackletsPerCluster() const noexcept { return mNTrackletsPerClusterDevice; }
auto& getDeviceNTrackletsPerClusterSum() const noexcept { return mNTrackletsPerClusterSumDevice; }
int32_t** getDeviceArrayNTrackletsPerROF() const noexcept { return mNTrackletsPerROFDeviceArray; }
int32_t** getDeviceArrayNTrackletsPerCluster() const noexcept { return mNTrackletsPerClusterDeviceArray; }
int32_t** getDeviceArrayNTrackletsPerClusterSum() const noexcept { return mNTrackletsPerClusterSumDeviceArray; }
uint8_t* getDeviceUsedTracklets() const noexcept { return mUsedTrackletsDevice; }
int32_t* getDeviceNLinesPerCluster() const noexcept { return mNLinesPerClusterDevice; }
int32_t* getDeviceNLinesPerClusterSum() const noexcept { return mNLinesPerClusterSumDevice; }
Line* getDeviceLines() const noexcept { return mLinesDevice; }
gsl::span<int*> getDeviceTrackletsPerROFs() { return mNTrackletsPerROFDevice; }

void setDevicePropagator(const o2::base::PropagatorImpl<float>* p) final { this->mPropagatorDevice = p; }

// Host-specific getters
Expand Down Expand Up @@ -180,7 +201,7 @@ class TimeFrameGPU : public TimeFrame<nLayers>
const Cluster** mClustersDeviceArray;
const Cluster** mUnsortedClustersDeviceArray;
const int** mClustersIndexTablesDeviceArray;
const unsigned char** mUsedClustersDeviceArray;
uint8_t** mUsedClustersDeviceArray;
const int** mROFramesClustersDeviceArray;
std::array<Tracklet*, nLayers - 1> mTrackletsDevice;
std::array<int*, nLayers - 1> mTrackletsLUTDevice;
Expand Down Expand Up @@ -208,6 +229,18 @@ class TimeFrameGPU : public TimeFrame<nLayers>
std::array<TrackingFrameInfo*, nLayers> mTrackingFrameInfoDevice;
const TrackingFrameInfo** mTrackingFrameInfoDeviceArray;

/// Vertexer
std::array<int32_t*, 2> mNTrackletsPerROFDevice;
std::array<int32_t*, 2> mNTrackletsPerClusterDevice;
std::array<int32_t*, 2> mNTrackletsPerClusterSumDevice;
uint8_t* mUsedTrackletsDevice;
int32_t* mNLinesPerClusterDevice;
int32_t* mNLinesPerClusterSumDevice;
int32_t** mNTrackletsPerROFDeviceArray;
int32_t** mNTrackletsPerClusterDeviceArray;
int32_t** mNTrackletsPerClusterSumDeviceArray;
Line* mLinesDevice;

// State
Streams mGpuStreams;
std::bitset<nLayers + 1> mPinnedUnsortedClusters{0};
Expand Down
189 changes: 189 additions & 0 deletions Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/Utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,22 @@
#include <string>
#include <tuple>

#include "ITStracking/MathUtils.h"
#include "ITStracking/ExternalAllocator.h"

#include "GPUCommonDef.h"
#include "GPUCommonHelpers.h"
#include "GPUCommonLogger.h"
#include "GPUCommonDefAPI.h"

#ifdef GPUCA_GPUCODE
#include <thrust/device_ptr.h>
#ifndef __HIPCC__
#define THRUST_NAMESPACE thrust::cuda
#else
#define THRUST_NAMESPACE thrust::hip
#endif
#endif

#ifdef ITS_GPU_LOG
#define GPULog(...) LOGP(info, __VA_ARGS__)
Expand All @@ -38,6 +45,10 @@

namespace o2::its
{
// FWD declarations
template <int>
class IndexTableUtils;
class Tracklet;

template <typename T1, typename T2>
using gpuPair = std::pair<T1, T2>;
Expand Down Expand Up @@ -282,6 +293,184 @@ class GPUTimer
}
};
#endif

#ifdef GPUCA_GPUCODE
template <typename T>
struct TypedAllocator {
using value_type = T;
using pointer = thrust::device_ptr<T>;
using const_pointer = thrust::device_ptr<const T>;
using size_type = std::size_t;
using difference_type = std::ptrdiff_t;

TypedAllocator() noexcept : mInternalAllocator(nullptr) {}
explicit TypedAllocator(ExternalAllocator* a) noexcept : mInternalAllocator(a) {}

template <typename U>
TypedAllocator(const TypedAllocator<U>& o) noexcept : mInternalAllocator(o.mInternalAllocator)
{
}

pointer allocate(size_type n)
{
void* raw = mInternalAllocator->allocate(n * sizeof(T));
return thrust::device_pointer_cast(static_cast<T*>(raw));
}

void deallocate(pointer p, size_type n) noexcept
{
if (!p) {
return;
}
void* raw = thrust::raw_pointer_cast(p);
mInternalAllocator->deallocate(static_cast<char*>(raw), n * sizeof(T));
}

bool operator==(TypedAllocator const& o) const noexcept
{
return mInternalAllocator == o.mInternalAllocator;
}
bool operator!=(TypedAllocator const& o) const noexcept
{
return !(*this == o);
}

private:
ExternalAllocator* mInternalAllocator;
};

template <int nLayers>
GPUdii() const int4 getBinsRect(const Cluster& currentCluster, const int layerIndex,
const o2::its::IndexTableUtils<nLayers>* utils,
const float z1, const float z2, float maxdeltaz, float maxdeltaphi)
{
const float zRangeMin = o2::gpu::CAMath::Min(z1, z2) - maxdeltaz;
const float phiRangeMin = (maxdeltaphi > o2::constants::math::PI) ? 0.f : currentCluster.phi - maxdeltaphi;
const float zRangeMax = o2::gpu::CAMath::Max(z1, z2) + maxdeltaz;
const float phiRangeMax = (maxdeltaphi > o2::constants::math::PI) ? o2::constants::math::TwoPI : currentCluster.phi + maxdeltaphi;

if (zRangeMax < -utils->getLayerZ(layerIndex) ||
zRangeMin > utils->getLayerZ(layerIndex) || zRangeMin > zRangeMax) {
return {};
}

return int4{o2::gpu::CAMath::Max(0, utils->getZBinIndex(layerIndex, zRangeMin)),
utils->getPhiBinIndex(math_utils::getNormalizedPhi(phiRangeMin)),
o2::gpu::CAMath::Min(utils->getNzBins() - 1, utils->getZBinIndex(layerIndex, zRangeMax)),
utils->getPhiBinIndex(math_utils::getNormalizedPhi(phiRangeMax))};
}

GPUdii() gpuSpan<const Vertex> getPrimaryVertices(const int rof,
const int* roframesPV,
const int nROF,
const uint8_t* mask,
const Vertex* vertices)
{
const int start_pv_id = roframesPV[rof];
const int stop_rof = rof >= nROF - 1 ? nROF : rof + 1;
size_t delta = mask[rof] ? roframesPV[stop_rof] - start_pv_id : 0; // return empty span if ROF is excluded
return gpuSpan<const Vertex>(&vertices[start_pv_id], delta);
};

GPUdii() gpuSpan<const Vertex> getPrimaryVertices(const int romin,
const int romax,
const int* roframesPV,
const int nROF,
const Vertex* vertices)
{
const int start_pv_id = roframesPV[romin];
const int stop_rof = romax >= nROF - 1 ? nROF : romax + 1;
return gpuSpan<const Vertex>(&vertices[start_pv_id], roframesPV[stop_rof] - roframesPV[romin]);
};

GPUdii() gpuSpan<const Cluster> getClustersOnLayer(const int rof,
const int totROFs,
const int layer,
const int** roframesClus,
const Cluster** clusters)
{
if (rof < 0 || rof >= totROFs) {
return gpuSpan<const Cluster>();
}
const int start_clus_id{roframesClus[layer][rof]};
const int stop_rof = rof >= totROFs - 1 ? totROFs : rof + 1;
const unsigned int delta = roframesClus[layer][stop_rof] - start_clus_id;
return gpuSpan<const Cluster>(&(clusters[layer][start_clus_id]), delta);
}

GPUdii() gpuSpan<const Tracklet> getTrackletsPerCluster(const int rof,
const int totROFs,
const int mode,
const int** roframesClus,
const Tracklet** tracklets)
{
if (rof < 0 || rof >= totROFs) {
return gpuSpan<const Tracklet>();
}
const int start_clus_id{roframesClus[1][rof]};
const int stop_rof = rof >= totROFs - 1 ? totROFs : rof + 1;
const unsigned int delta = roframesClus[1][stop_rof] - start_clus_id;
return gpuSpan<const Tracklet>(&(tracklets[mode][start_clus_id]), delta);
}

GPUdii() gpuSpan<int> getNTrackletsPerCluster(const int rof,
const int totROFs,
const int mode,
const int** roframesClus,
int** ntracklets)
{
if (rof < 0 || rof >= totROFs) {
return gpuSpan<int>();
}
const int start_clus_id{roframesClus[1][rof]};
const int stop_rof = rof >= totROFs - 1 ? totROFs : rof + 1;
const unsigned int delta = roframesClus[1][stop_rof] - start_clus_id;
return gpuSpan<int>(&(ntracklets[mode][start_clus_id]), delta);
}

GPUdii() gpuSpan<const int> getNTrackletsPerCluster(const int rof,
const int totROFs,
const int mode,
const int** roframesClus,
const int** ntracklets)
{
if (rof < 0 || rof >= totROFs) {
return gpuSpan<const int>();
}
const int start_clus_id{roframesClus[1][rof]};
const int stop_rof = rof >= totROFs - 1 ? totROFs : rof + 1;
const unsigned int delta = roframesClus[1][stop_rof] - start_clus_id;
return gpuSpan<const int>(&(ntracklets[mode][start_clus_id]), delta);
}

GPUdii() gpuSpan<int> getNLinesPerCluster(const int rof,
const int totROFs,
const int** roframesClus,
int* nlines)
{
if (rof < 0 || rof >= totROFs) {
return gpuSpan<int>();
}
const int start_clus_id{roframesClus[1][rof]};
const int stop_rof = rof >= totROFs - 1 ? totROFs : rof + 1;
const unsigned int delta = roframesClus[1][stop_rof] - start_clus_id;
return gpuSpan<int>(&(nlines[start_clus_id]), delta);
}

GPUdii() gpuSpan<const int> getNLinesPerCluster(const int rof,
const int totROFs,
const int** roframesClus,
const int* nlines)
{
if (rof < 0 || rof >= totROFs) {
return gpuSpan<const int>();
}
const int start_clus_id{roframesClus[1][rof]};
const int stop_rof = rof >= totROFs - 1 ? totROFs : rof + 1;
const unsigned int delta = roframesClus[1][stop_rof] - start_clus_id;
return gpuSpan<const int>(&(nlines[start_clus_id]), delta);
}
#endif
} // namespace gpu
} // namespace o2::its

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,32 +31,25 @@
namespace o2::its
{

class VertexerTraitsGPU final : public VertexerTraits
template <int nLayers>
class VertexerTraitsGPU final : public VertexerTraits<nLayers>
{
public:
void initialise(const TrackingParameters&, const int iteration = 0) final;
void adoptTimeFrame(TimeFrame<7>*) noexcept final;
void adoptTimeFrame(TimeFrame<nLayers>* tf) noexcept final;
void computeTracklets(const int iteration = 0) final;
void computeTrackletMatching(const int iteration = 0) final;
void computeVertices(const int iteration = 0) final;
void updateVertexingParameters(const std::vector<VertexingParameters>&, const TimeFrameGPUParameters&) final;
void computeVerticesHist();

bool isGPU() const noexcept final { return true; }
const char* getName() const noexcept final { return "GPU"; }

protected:
IndexTableUtils* mDeviceIndexTableUtils;
gpu::TimeFrameGPU<7>* mTimeFrameGPU;
gpu::TimeFrameGPU<nLayers>* mTimeFrameGPU;
TimeFrameGPUParameters mTfGPUParams;
};

inline void VertexerTraitsGPU::adoptTimeFrame(TimeFrame<7>* tf) noexcept
{
mTimeFrameGPU = static_cast<gpu::TimeFrameGPU<7>*>(tf);
mTimeFrame = static_cast<TimeFrame<7>*>(tf);
}

} // namespace o2::its

#endif
Loading