forked from cms-patatrack/pixeltrack-standalone
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgpuClusterChargeCut.h
125 lines (100 loc) · 3.82 KB
/
gpuClusterChargeCut.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#ifndef RecoLocalTracker_SiPixelClusterizer_plugins_gpuClusterChargeCut_h
#define RecoLocalTracker_SiPixelClusterizer_plugins_gpuClusterChargeCut_h
#include <cstdint>
#include <cstdio>
#include "CUDACore/cuda_assert.h"
#include "CUDACore/prefixScan.h"
#include "gpuClusteringConstants.h"
namespace gpuClustering {
__global__ void clusterChargeCut(
uint16_t* __restrict__ id, // module id of each pixel (modified if bad cluster)
uint16_t const* __restrict__ adc, // charge of each pixel
uint32_t const* __restrict__ moduleStart, // index of the first pixel of each module
uint32_t* __restrict__ nClustersInModule, // modified: number of clusters found in each module
uint32_t const* __restrict__ moduleId, // module id of each module
int32_t* __restrict__ clusterId, // modified: cluster id of each pixel
uint32_t numElements) {
if (blockIdx.x >= moduleStart[0])
return;
auto firstPixel = moduleStart[1 + blockIdx.x];
auto thisModuleId = id[firstPixel];
assert(thisModuleId < MaxNumModules);
assert(thisModuleId == moduleId[blockIdx.x]);
auto nclus = nClustersInModule[thisModuleId];
if (nclus == 0)
return;
if (threadIdx.x == 0 && nclus > MaxNumClustersPerModules)
printf("Warning too many clusters in module %d in block %d: %d > %d\n",
thisModuleId,
blockIdx.x,
nclus,
MaxNumClustersPerModules);
auto first = firstPixel + threadIdx.x;
if (nclus > MaxNumClustersPerModules) {
// remove excess FIXME find a way to cut charge first....
for (auto i = first; i < numElements; i += blockDim.x) {
if (id[i] == InvId)
continue; // not valid
if (id[i] != thisModuleId)
break; // end of module
if (clusterId[i] >= MaxNumClustersPerModules) {
id[i] = InvId;
clusterId[i] = InvId;
}
}
nclus = MaxNumClustersPerModules;
}
#ifdef GPU_DEBUG
if (thisModuleId % 100 == 1)
if (threadIdx.x == 0)
printf("start clusterizer for module %d in block %d\n", thisModuleId, blockIdx.x);
#endif
__shared__ int32_t charge[MaxNumClustersPerModules];
__shared__ uint8_t ok[MaxNumClustersPerModules];
__shared__ uint16_t newclusId[MaxNumClustersPerModules];
assert(nclus <= MaxNumClustersPerModules);
for (auto i = threadIdx.x; i < nclus; i += blockDim.x) {
charge[i] = 0;
}
__syncthreads();
for (auto i = first; i < numElements; i += blockDim.x) {
if (id[i] == InvId)
continue; // not valid
if (id[i] != thisModuleId)
break; // end of module
atomicAdd(&charge[clusterId[i]], adc[i]);
}
__syncthreads();
auto chargeCut = thisModuleId < 96 ? 2000 : 4000; // move in constants (calib?)
for (auto i = threadIdx.x; i < nclus; i += blockDim.x) {
newclusId[i] = ok[i] = charge[i] > chargeCut ? 1 : 0;
}
__syncthreads();
// renumber
__shared__ uint16_t ws[32];
blockPrefixScan(newclusId, nclus, ws);
assert(nclus >= newclusId[nclus - 1]);
if (nclus == newclusId[nclus - 1])
return;
nClustersInModule[thisModuleId] = newclusId[nclus - 1];
__syncthreads();
// mark bad cluster again
for (auto i = threadIdx.x; i < nclus; i += blockDim.x) {
if (0 == ok[i])
newclusId[i] = InvId + 1;
}
__syncthreads();
// reassign id
for (auto i = first; i < numElements; i += blockDim.x) {
if (id[i] == InvId)
continue; // not valid
if (id[i] != thisModuleId)
break; // end of module
clusterId[i] = newclusId[clusterId[i]] - 1;
if (clusterId[i] == InvId)
id[i] = InvId;
}
//done
}
} // namespace gpuClustering
#endif // RecoLocalTracker_SiPixelClusterizer_plugins_gpuClusterChargeCut_h