forked from cms-patatrack/pixeltrack-standalone
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTrackingRecHit2DHeterogeneous.h
150 lines (117 loc) · 5.76 KB
/
TrackingRecHit2DHeterogeneous.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#ifndef CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DHeterogeneous_h
#define CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DHeterogeneous_h
#include "CUDADataFormats/TrackingRecHit2DSOAView.h"
#include "CUDADataFormats/HeterogeneousSoA.h"
template <typename Traits>
class TrackingRecHit2DHeterogeneous {
public:
template <typename T>
using unique_ptr = typename Traits::template unique_ptr<T>;
using Hist = TrackingRecHit2DSOAView::Hist;
TrackingRecHit2DHeterogeneous() = default;
explicit TrackingRecHit2DHeterogeneous(uint32_t nHits,
pixelCPEforGPU::ParamsOnGPU const* cpeParams,
uint32_t const* hitsModuleStart,
cudaStream_t stream);
~TrackingRecHit2DHeterogeneous() = default;
TrackingRecHit2DHeterogeneous(const TrackingRecHit2DHeterogeneous&) = delete;
TrackingRecHit2DHeterogeneous& operator=(const TrackingRecHit2DHeterogeneous&) = delete;
TrackingRecHit2DHeterogeneous(TrackingRecHit2DHeterogeneous&&) = default;
TrackingRecHit2DHeterogeneous& operator=(TrackingRecHit2DHeterogeneous&&) = default;
TrackingRecHit2DSOAView* view() { return m_view.get(); }
TrackingRecHit2DSOAView const* view() const { return m_view.get(); }
auto nHits() const { return m_nHits; }
auto hitsModuleStart() const { return m_hitsModuleStart; }
auto hitsLayerStart() { return m_hitsLayerStart; }
auto phiBinner() { return m_hist; }
auto iphi() { return m_iphi; }
// only the local coord and detector index
cms::cuda::host::unique_ptr<float[]> localCoordToHostAsync(cudaStream_t stream) const;
cms::cuda::host::unique_ptr<uint16_t[]> detIndexToHostAsync(cudaStream_t stream) const;
cms::cuda::host::unique_ptr<uint32_t[]> hitsModuleStartToHostAsync(cudaStream_t stream) const;
private:
static constexpr uint32_t n16 = 4;
static constexpr uint32_t n32 = 9;
static_assert(sizeof(uint32_t) == sizeof(float)); // just stating the obvious
unique_ptr<uint16_t[]> m_store16; //!
unique_ptr<float[]> m_store32; //!
unique_ptr<TrackingRecHit2DSOAView::Hist> m_HistStore; //!
unique_ptr<TrackingRecHit2DSOAView::AverageGeometry> m_AverageGeometryStore; //!
unique_ptr<TrackingRecHit2DSOAView> m_view; //!
uint32_t m_nHits;
uint32_t const* m_hitsModuleStart; // needed for legacy, this is on GPU!
// needed as kernel params...
Hist* m_hist;
uint32_t* m_hitsLayerStart;
int16_t* m_iphi;
};
#include "CUDACore/copyAsync.h"
#include "CUDACore/cudaCheck.h"
template <typename Traits>
TrackingRecHit2DHeterogeneous<Traits>::TrackingRecHit2DHeterogeneous(uint32_t nHits,
pixelCPEforGPU::ParamsOnGPU const* cpeParams,
uint32_t const* hitsModuleStart,
cudaStream_t stream)
: m_nHits(nHits), m_hitsModuleStart(hitsModuleStart) {
auto view = Traits::template make_host_unique<TrackingRecHit2DSOAView>(stream);
view->m_nHits = nHits;
m_view = Traits::template make_device_unique<TrackingRecHit2DSOAView>(stream);
m_AverageGeometryStore = Traits::template make_device_unique<TrackingRecHit2DSOAView::AverageGeometry>(stream);
view->m_averageGeometry = m_AverageGeometryStore.get();
view->m_cpeParams = cpeParams;
view->m_hitsModuleStart = hitsModuleStart;
// if empy do not bother
if (0 == nHits) {
if
#ifndef __CUDACC__
constexpr
#endif
(std::is_same<Traits, cudaCompat::GPUTraits>::value) {
cms::cuda::copyAsync(m_view, view, stream);
} else {
m_view.reset(view.release()); // NOLINT: std::move() breaks CUDA version
}
return;
}
// the single arrays are not 128 bit alligned...
// the hits are actually accessed in order only in building
// if ordering is relevant they may have to be stored phi-ordered by layer or so
// this will break 1to1 correspondence with cluster and module locality
// so unless proven VERY inefficient we keep it ordered as generated
m_store16 = Traits::template make_device_unique<uint16_t[]>(nHits * n16, stream);
m_store32 = Traits::template make_device_unique<float[]>(nHits * n32 + 11, stream);
m_HistStore = Traits::template make_device_unique<TrackingRecHit2DSOAView::Hist>(stream);
auto get16 = [&](int i) { return m_store16.get() + i * nHits; };
auto get32 = [&](int i) { return m_store32.get() + i * nHits; };
// copy all the pointers
m_hist = view->m_hist = m_HistStore.get();
view->m_xl = get32(0);
view->m_yl = get32(1);
view->m_xerr = get32(2);
view->m_yerr = get32(3);
view->m_xg = get32(4);
view->m_yg = get32(5);
view->m_zg = get32(6);
view->m_rg = get32(7);
m_iphi = view->m_iphi = reinterpret_cast<int16_t*>(get16(0));
view->m_charge = reinterpret_cast<int32_t*>(get32(8));
view->m_xsize = reinterpret_cast<int16_t*>(get16(2));
view->m_ysize = reinterpret_cast<int16_t*>(get16(3));
view->m_detInd = get16(1);
m_hitsLayerStart = view->m_hitsLayerStart = reinterpret_cast<uint32_t*>(get32(n32));
// transfer view
if
#ifndef __CUDACC__
constexpr
#endif
(std::is_same<Traits, cudaCompat::GPUTraits>::value) {
cms::cuda::copyAsync(m_view, view, stream);
} else {
m_view.reset(view.release()); // NOLINT: std::move() breaks CUDA version
}
}
using TrackingRecHit2DGPU = TrackingRecHit2DHeterogeneous<cudaCompat::GPUTraits>;
using TrackingRecHit2DCUDA = TrackingRecHit2DHeterogeneous<cudaCompat::GPUTraits>;
using TrackingRecHit2DCPU = TrackingRecHit2DHeterogeneous<cudaCompat::CPUTraits>;
using TrackingRecHit2DHost = TrackingRecHit2DHeterogeneous<cudaCompat::HostTraits>;
#endif // CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DHeterogeneous_h