Skip to content

Commit

Permalink
Merge pull request cms-sw#47084 from SegmentLinking/ariostas/integrat…
Browse files Browse the repository at this point in the history
…ion_pr_followups

LST followups: better work divisions, concrete kernel dimension, some cleanup and fixes
  • Loading branch information
cmsbuild authored Jan 29, 2025
2 parents dd230c0 + baa91b3 commit 878e0b4
Show file tree
Hide file tree
Showing 16 changed files with 255 additions and 442 deletions.
22 changes: 19 additions & 3 deletions HeterogeneousCore/AlpakaInterface/interface/alpakastdAlgorithm.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,28 @@

#include <alpaka/alpaka.hpp>

// reimplementation of std algorithms able to compile with Alpaka,
// mostly by declaring them constexpr (until C++20, which will make it
// constexpr by default. TODO: drop when moving to C++20)
// reimplementation of std algorithms able to work on device code

namespace alpaka_std {

template <typename RandomIt, typename T, typename Compare = std::less<T>>
ALPAKA_FN_HOST_ACC constexpr RandomIt lower_bound(RandomIt first, RandomIt last, const T &value, Compare comp = {}) {
auto count = last - first;

while (count > 0) {
auto it = first;
auto step = count / 2;
it += step;
if (comp(*it, value)) {
first = ++it;
count -= step + 1;
} else {
count = step;
}
}
return first;
}

template <typename RandomIt, typename T, typename Compare = std::less<T>>
ALPAKA_FN_HOST_ACC constexpr RandomIt upper_bound(RandomIt first, RandomIt last, const T &value, Compare comp = {}) {
auto count = last - first;
Expand Down
46 changes: 46 additions & 0 deletions RecoTracker/LSTCore/interface/Circle.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#ifndef RecoTracker_LSTCore_interface_Circle_h
#define RecoTracker_LSTCore_interface_Circle_h

#include <tuple>

#include "HeterogeneousCore/AlpakaInterface/interface/config.h"

namespace lst {

template <typename TAcc>
ALPAKA_FN_ACC ALPAKA_FN_INLINE std::tuple<float, float, float> computeRadiusFromThreeAnchorHits(
TAcc const& acc, float x1, float y1, float x2, float y2, float x3, float y3) {
float radius = 0.f;

//first anchor hit - (x1,y1), second anchor hit - (x2,y2), third anchor hit - (x3, y3)

float denomInv = 1.0f / ((y1 - y3) * (x2 - x3) - (x1 - x3) * (y2 - y3));

float xy1sqr = x1 * x1 + y1 * y1;

float xy2sqr = x2 * x2 + y2 * y2;

float xy3sqr = x3 * x3 + y3 * y3;

float regressionCenterX = 0.5f * ((y3 - y2) * xy1sqr + (y1 - y3) * xy2sqr + (y2 - y1) * xy3sqr) * denomInv;

float regressionCenterY = 0.5f * ((x2 - x3) * xy1sqr + (x3 - x1) * xy2sqr + (x1 - x2) * xy3sqr) * denomInv;

float c = ((x2 * y3 - x3 * y2) * xy1sqr + (x3 * y1 - x1 * y3) * xy2sqr + (x1 * y2 - x2 * y1) * xy3sqr) * denomInv;

if (((y1 - y3) * (x2 - x3) - (x1 - x3) * (y2 - y3) == 0) ||
(regressionCenterX * regressionCenterX + regressionCenterY * regressionCenterY - c < 0)) {
#ifdef WARNINGS
printf("three collinear points or FATAL! r^2 < 0!\n");
#endif
radius = -1.f;
} else
radius =
alpaka::math::sqrt(acc, regressionCenterX * regressionCenterX + regressionCenterY * regressionCenterY - c);

return std::make_tuple(radius, regressionCenterX, regressionCenterY);
}

} //namespace lst

#endif
4 changes: 2 additions & 2 deletions RecoTracker/LSTCore/interface/HitsSoA.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ namespace lst {
SOA_COLUMN(ArrayIx2, hitRanges),
SOA_COLUMN(int, hitRangesLower),
SOA_COLUMN(int, hitRangesUpper),
SOA_COLUMN(int8_t, hitRangesnLower),
SOA_COLUMN(int8_t, hitRangesnUpper))
SOA_COLUMN(int16_t, hitRangesnLower),
SOA_COLUMN(int16_t, hitRangesnUpper))

using HitsSoA = HitsSoALayout<>;
using HitsRangesSoA = HitsRangesSoALayout<>;
Expand Down
28 changes: 1 addition & 27 deletions RecoTracker/LSTCore/interface/alpaka/Common.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,33 +11,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {

using namespace ::lst;

Vec3D constexpr elementsPerThread(Vec3D::all(static_cast<Idx>(1)));

ALPAKA_FN_HOST ALPAKA_FN_INLINE void lstWarning(std::string warning) {
edm::LogWarning("LST") << warning;
return;
}

// Adjust grid and block sizes based on backend configuration
template <typename Vec, typename TAcc = Acc<typename Vec::Dim>>
ALPAKA_FN_HOST ALPAKA_FN_INLINE WorkDiv<typename Vec::Dim> createWorkDiv(const Vec& blocksPerGrid,
const Vec& threadsPerBlock,
const Vec& elementsPerThreadArg) {
Vec adjustedBlocks = blocksPerGrid;
Vec adjustedThreads = threadsPerBlock;

// special overrides for CPU/host cases
if constexpr (std::is_same_v<Platform, alpaka::PlatformCpu>) {
adjustedBlocks = Vec::all(static_cast<Idx>(1));

if constexpr (alpaka::accMatchesTags<TAcc, alpaka::TagCpuSerial>) {
// Serial execution, set threads to 1 as well
adjustedThreads = Vec::all(static_cast<Idx>(1)); // probably redundant
}
}

return WorkDiv<typename Vec::Dim>(adjustedBlocks, adjustedThreads, elementsPerThreadArg);
}
ALPAKA_FN_HOST ALPAKA_FN_INLINE void lstWarning(std::string_view warning) { edm::LogWarning("LST") << warning; }

// The constants below are usually used in functions like alpaka::math::min(),
// expecting a reference (T const&) in the arguments. Hence,
Expand Down
22 changes: 9 additions & 13 deletions RecoTracker/LSTCore/src/alpaka/Hit.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
#ifndef RecoTracker_LSTCore_src_alpaka_Hit_h
#define RecoTracker_LSTCore_src_alpaka_Hit_h

#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
#include "HeterogeneousCore/AlpakaInterface/interface/alpakastdAlgorithm.h"

#include "RecoTracker/LSTCore/interface/alpaka/Common.h"
#include "RecoTracker/LSTCore/interface/ModulesSoA.h"
#include "RecoTracker/LSTCore/interface/alpaka/HitsDeviceCollection.h"
Expand Down Expand Up @@ -57,15 +60,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
}

struct ModuleRangesKernel {
template <typename TAcc>
ALPAKA_FN_ACC void operator()(TAcc const& acc,
ALPAKA_FN_ACC void operator()(Acc1D const& acc,
ModulesConst modules,
HitsRanges hitsRanges,
int nLowerModules) const {
auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);

for (int lowerIndex = globalThreadIdx[2]; lowerIndex < nLowerModules; lowerIndex += gridThreadExtent[2]) {
for (int lowerIndex : cms::alpakatools::uniform_elements(acc, nLowerModules)) {
uint16_t upperIndex = modules.partnerModuleIndices()[lowerIndex];
if (hitsRanges.hitRanges()[lowerIndex][0] != -1 && hitsRanges.hitRanges()[upperIndex][0] != -1) {
hitsRanges.hitRangesLower()[lowerIndex] = hitsRanges.hitRanges()[lowerIndex][0];
Expand All @@ -80,8 +79,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
};

struct HitLoopKernel {
template <typename TAcc>
ALPAKA_FN_ACC void operator()(TAcc const& acc,
ALPAKA_FN_ACC void operator()(Acc1D const& acc,
uint16_t Endcap, // Integer corresponding to endcap in module subdets
uint16_t TwoS, // Integer corresponding to TwoS in moduleType
unsigned int nModules, // Number of modules
Expand All @@ -94,9 +92,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
{
auto geoMapDetId = endcapGeometry.geoMapDetId(); // DetId's from endcap map
auto geoMapPhi = endcapGeometry.geoMapPhi(); // Phi values from endcap map
auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);
for (unsigned int ihit = globalThreadIdx[2]; ihit < nHits; ihit += gridThreadExtent[2]) {
for (unsigned int ihit : cms::alpakatools::uniform_elements(acc, nHits)) {
float ihit_x = hits.xs()[ihit];
float ihit_y = hits.ys()[ihit];
float ihit_z = hits.zs()[ihit];
Expand All @@ -108,7 +104,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
((ihit_z > 0) - (ihit_z < 0)) *
alpaka::math::acosh(
acc, alpaka::math::sqrt(acc, ihit_x * ihit_x + ihit_y * ihit_y + ihit_z * ihit_z) / hits.rts()[ihit]);
auto found_pointer = std::lower_bound(modules.mapdetId(), modules.mapdetId() + nModules, iDetId);
auto found_pointer = alpaka_std::lower_bound(modules.mapdetId(), modules.mapdetId() + nModules, iDetId);
int found_index = std::distance(modules.mapdetId(), found_pointer);
if (found_pointer == modules.mapdetId() + nModules)
found_index = -1;
Expand All @@ -117,7 +113,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
hits.moduleIndices()[ihit] = lastModuleIndex;

if (modules.subdets()[lastModuleIndex] == Endcap && modules.moduleType()[lastModuleIndex] == TwoS) {
found_pointer = std::lower_bound(geoMapDetId, geoMapDetId + nEndCapMap, iDetId);
found_pointer = alpaka_std::lower_bound(geoMapDetId, geoMapDetId + nEndCapMap, iDetId);
found_index = std::distance(geoMapDetId, found_pointer);
if (found_pointer == geoMapDetId + nEndCapMap)
found_index = -1;
Expand Down
57 changes: 19 additions & 38 deletions RecoTracker/LSTCore/src/alpaka/Kernels.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#ifndef RecoTracker_LSTCore_src_alpaka_Kernels_h
#define RecoTracker_LSTCore_src_alpaka_Kernels_h

#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"

#include "RecoTracker/LSTCore/interface/alpaka/Common.h"
#include "RecoTracker/LSTCore/interface/ModulesSoA.h"
#include "RecoTracker/LSTCore/interface/ObjectRangesSoA.h"
Expand Down Expand Up @@ -139,26 +141,22 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
}

struct RemoveDupQuintupletsAfterBuild {
template <typename TAcc>
ALPAKA_FN_ACC void operator()(TAcc const& acc,
ALPAKA_FN_ACC void operator()(Acc3D const& acc,
ModulesConst modules,
Quintuplets quintuplets,
QuintupletsOccupancyConst quintupletsOccupancy,
ObjectRangesConst ranges) const {
auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);

for (unsigned int lowmod = globalThreadIdx[0]; lowmod < modules.nLowerModules(); lowmod += gridThreadExtent[0]) {
for (unsigned int lowmod : cms::alpakatools::uniform_elements_z(acc, modules.nLowerModules())) {
unsigned int nQuintuplets_lowmod = quintupletsOccupancy.nQuintuplets()[lowmod];
int quintupletModuleIndices_lowmod = ranges.quintupletModuleIndices()[lowmod];

for (unsigned int ix1 = globalThreadIdx[1]; ix1 < nQuintuplets_lowmod; ix1 += gridThreadExtent[1]) {
for (unsigned int ix1 : cms::alpakatools::uniform_elements_y(acc, nQuintuplets_lowmod)) {
unsigned int ix = quintupletModuleIndices_lowmod + ix1;
float eta1 = __H2F(quintuplets.eta()[ix]);
float phi1 = __H2F(quintuplets.phi()[ix]);
float score_rphisum1 = __H2F(quintuplets.score_rphisum()[ix]);

for (unsigned int jx1 = globalThreadIdx[2] + ix1 + 1; jx1 < nQuintuplets_lowmod; jx1 += gridThreadExtent[2]) {
for (unsigned int jx1 : cms::alpakatools::uniform_elements_x(acc, ix1 + 1, nQuintuplets_lowmod)) {
unsigned int jx = quintupletModuleIndices_lowmod + jx1;

float eta2 = __H2F(quintuplets.eta()[jx]);
Expand Down Expand Up @@ -189,25 +187,20 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
};

struct RemoveDupQuintupletsBeforeTC {
template <typename TAcc>
ALPAKA_FN_ACC void operator()(TAcc const& acc,
ALPAKA_FN_ACC void operator()(Acc2D const& acc,
Quintuplets quintuplets,
QuintupletsOccupancyConst quintupletsOccupancy,
ObjectRangesConst ranges) const {
auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);

for (unsigned int lowmodIdx1 = globalThreadIdx[1]; lowmodIdx1 < ranges.nEligibleT5Modules();
lowmodIdx1 += gridThreadExtent[1]) {
for (unsigned int lowmodIdx1 : cms::alpakatools::uniform_elements_y(acc, ranges.nEligibleT5Modules())) {
uint16_t lowmod1 = ranges.indicesOfEligibleT5Modules()[lowmodIdx1];
unsigned int nQuintuplets_lowmod1 = quintupletsOccupancy.nQuintuplets()[lowmod1];
if (nQuintuplets_lowmod1 == 0)
continue;

unsigned int quintupletModuleIndices_lowmod1 = ranges.quintupletModuleIndices()[lowmod1];

for (unsigned int lowmodIdx2 = globalThreadIdx[2] + lowmodIdx1; lowmodIdx2 < ranges.nEligibleT5Modules();
lowmodIdx2 += gridThreadExtent[2]) {
for (unsigned int lowmodIdx2 :
cms::alpakatools::uniform_elements_x(acc, lowmodIdx1, ranges.nEligibleT5Modules())) {
uint16_t lowmod2 = ranges.indicesOfEligibleT5Modules()[lowmodIdx2];
unsigned int nQuintuplets_lowmod2 = quintupletsOccupancy.nQuintuplets()[lowmod2];
if (nQuintuplets_lowmod2 == 0)
Expand Down Expand Up @@ -272,13 +265,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
};

struct RemoveDupPixelTripletsFromMap {
template <typename TAcc>
ALPAKA_FN_ACC void operator()(TAcc const& acc, PixelTriplets pixelTriplets) const {
auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);

for (unsigned int ix = globalThreadIdx[1]; ix < pixelTriplets.nPixelTriplets(); ix += gridThreadExtent[1]) {
for (unsigned int jx = globalThreadIdx[2]; jx < pixelTriplets.nPixelTriplets(); jx += gridThreadExtent[2]) {
ALPAKA_FN_ACC void operator()(Acc2D const& acc, PixelTriplets pixelTriplets) const {
for (unsigned int ix : cms::alpakatools::uniform_elements_y(acc, pixelTriplets.nPixelTriplets())) {
for (unsigned int jx : cms::alpakatools::uniform_elements_x(acc, pixelTriplets.nPixelTriplets())) {
if (ix == jx)
continue;

Expand Down Expand Up @@ -306,15 +295,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
};

struct RemoveDupPixelQuintupletsFromMap {
template <typename TAcc>
ALPAKA_FN_ACC void operator()(TAcc const& acc, PixelQuintuplets pixelQuintuplets) const {
auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);

ALPAKA_FN_ACC void operator()(Acc2D const& acc, PixelQuintuplets pixelQuintuplets) const {
unsigned int nPixelQuintuplets = pixelQuintuplets.nPixelQuintuplets();
for (unsigned int ix = globalThreadIdx[1]; ix < nPixelQuintuplets; ix += gridThreadExtent[1]) {
for (unsigned int ix : cms::alpakatools::uniform_elements_y(acc, nPixelQuintuplets)) {
float score1 = __H2F(pixelQuintuplets.score()[ix]);
for (unsigned int jx = globalThreadIdx[2]; jx < nPixelQuintuplets; jx += gridThreadExtent[2]) {
for (unsigned int jx : cms::alpakatools::uniform_elements_x(acc, nPixelQuintuplets)) {
if (ix == jx)
continue;

Expand All @@ -333,22 +318,18 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
};

struct CheckHitspLS {
template <typename TAcc>
ALPAKA_FN_ACC void operator()(TAcc const& acc,
ALPAKA_FN_ACC void operator()(Acc2D const& acc,
ModulesConst modules,
SegmentsOccupancyConst segmentsOccupancy,
SegmentsPixel segmentsPixel,
bool secondpass) const {
auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);

int pixelModuleIndex = modules.nLowerModules();
unsigned int nPixelSegments = segmentsOccupancy.nSegments()[pixelModuleIndex];

if (nPixelSegments > n_max_pixel_segments_per_module)
nPixelSegments = n_max_pixel_segments_per_module;

for (unsigned int ix = globalThreadIdx[1]; ix < nPixelSegments; ix += gridThreadExtent[1]) {
for (unsigned int ix : cms::alpakatools::uniform_elements_y(acc, nPixelSegments)) {
if (secondpass && (!segmentsPixel.isQuad()[ix] || (segmentsPixel.isDup()[ix] & 1)))
continue;

Expand All @@ -360,7 +341,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
float eta_pix1 = segmentsPixel.eta()[ix];
float phi_pix1 = segmentsPixel.phi()[ix];

for (unsigned int jx = ix + 1 + globalThreadIdx[2]; jx < nPixelSegments; jx += gridThreadExtent[2]) {
for (unsigned int jx : cms::alpakatools::uniform_elements_x(acc, ix + 1, nPixelSegments)) {
float eta_pix2 = segmentsPixel.eta()[jx];
float phi_pix2 = segmentsPixel.phi()[jx];

Expand Down
Loading

0 comments on commit 878e0b4

Please sign in to comment.