Skip to content
This repository has been archived by the owner on Dec 9, 2024. It is now read-only.

Commit

Permalink
Merge pull request #394 from SegmentLinking/fix_timings
Browse files Browse the repository at this point in the history
Fix timing issues
  • Loading branch information
GNiendorf authored Apr 30, 2024
2 parents 82d9fe3 + 8f21fbc commit 1931181
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 25 deletions.
32 changes: 15 additions & 17 deletions SDL/Kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@

namespace SDL {
ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmQuintupletFromMemory(struct SDL::quintuplets& quintupletsInGPU,
unsigned int quintupletIndex) {
quintupletsInGPU.isDup[quintupletIndex] = true;
unsigned int quintupletIndex,
bool secondpass = false) {
quintupletsInGPU.isDup[quintupletIndex] |= 1 + secondpass;
};

ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmPixelTripletFromMemory(struct SDL::pixelTriplets& pixelTripletsInGPU,
Expand Down Expand Up @@ -212,10 +213,8 @@ namespace SDL {
float phi1 = __H2F(quintupletsInGPU.phi[ix]);
float score_rphisum1 = __H2F(quintupletsInGPU.score_rphisum[ix]);

for (unsigned int jx1 = globalThreadIdx[2]; jx1 < nQuintuplets_lowmod; jx1 += gridThreadExtent[2]) {
for (unsigned int jx1 = globalThreadIdx[2] + ix1 + 1; jx1 < nQuintuplets_lowmod; jx1 += gridThreadExtent[2]) {
unsigned int jx = quintupletModuleIndices_lowmod + jx1;
if (ix == jx)
continue;

float eta2 = __H2F(quintupletsInGPU.eta[jx]);
float phi2 = __H2F(quintupletsInGPU.phi[jx]);
Expand All @@ -231,10 +230,10 @@ namespace SDL {

int nMatched = checkHitsT5(ix, jx, quintupletsInGPU);
if (nMatched >= 7) {
if (score_rphisum1 > score_rphisum2) {
rmQuintupletFromMemory(quintupletsInGPU, ix);
} else if ((score_rphisum1 == score_rphisum2) && (ix < jx)) {
if (score_rphisum1 >= score_rphisum2) {
rmQuintupletFromMemory(quintupletsInGPU, ix);
} else {
rmQuintupletFromMemory(quintupletsInGPU, jx);
}
}
}
Expand All @@ -260,7 +259,7 @@ namespace SDL {

unsigned int quintupletModuleIndices_lowmod1 = rangesInGPU.quintupletModuleIndices[lowmod1];

for (unsigned int lowmodIdx2 = globalThreadIdx[2]; lowmodIdx2 < *(rangesInGPU.nEligibleT5Modules);
for (unsigned int lowmodIdx2 = globalThreadIdx[2] + lowmodIdx1; lowmodIdx2 < *(rangesInGPU.nEligibleT5Modules);
lowmodIdx2 += gridThreadExtent[2]) {
uint16_t lowmod2 = rangesInGPU.indicesOfEligibleT5Modules[lowmodIdx2];
unsigned int nQuintuplets_lowmod2 = quintupletsInGPU.nQuintuplets[lowmod2];
Expand All @@ -271,15 +270,15 @@ namespace SDL {

for (unsigned int ix1 = 0; ix1 < nQuintuplets_lowmod1; ix1 += 1) {
unsigned int ix = quintupletModuleIndices_lowmod1 + ix1;
if (quintupletsInGPU.partOfPT5[ix])
if (quintupletsInGPU.partOfPT5[ix] || (quintupletsInGPU.isDup[ix] & 1))
continue;

for (unsigned int jx1 = 0; jx1 < nQuintuplets_lowmod2; jx1++) {
unsigned int jx = quintupletModuleIndices_lowmod2 + jx1;
if (ix == jx)
continue;

if (quintupletsInGPU.partOfPT5[jx])
if (quintupletsInGPU.partOfPT5[jx] || (quintupletsInGPU.isDup[jx] & 1))
continue;

float eta1 = __H2F(quintupletsInGPU.eta[ix]);
Expand All @@ -303,12 +302,11 @@ namespace SDL {
int nMatched = checkHitsT5(ix, jx, quintupletsInGPU);
if (dR2 < 0.001f || nMatched >= 5) {
if (score_rphisum1 > score_rphisum2) {
rmQuintupletFromMemory(quintupletsInGPU, ix);
continue;
}
if ((score_rphisum1 == score_rphisum2) && (ix < jx)) {
rmQuintupletFromMemory(quintupletsInGPU, ix);
continue;
rmQuintupletFromMemory(quintupletsInGPU, ix, true);
} else if (score_rphisum1 < score_rphisum2) {
rmQuintupletFromMemory(quintupletsInGPU, jx, true);
} else {
rmQuintupletFromMemory(quintupletsInGPU, (ix < jx ? ix : jx), true);
}
}
}
Expand Down
10 changes: 5 additions & 5 deletions SDL/Quintuplet.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ namespace SDL {
FPX* phi;
FPX* score_rphisum;
uint8_t* layer;
bool* isDup;
char* isDup;
bool* TightCutFlag;
bool* partOfPT5;

Expand Down Expand Up @@ -85,7 +85,7 @@ namespace SDL {
Buf<TDev, FPX> phi_buf;
Buf<TDev, FPX> score_rphisum_buf;
Buf<TDev, uint8_t> layer_buf;
Buf<TDev, bool> isDup_buf;
Buf<TDev, char> isDup_buf;
Buf<TDev, bool> TightCutFlag_buf;
Buf<TDev, bool> partOfPT5_buf;

Expand Down Expand Up @@ -114,7 +114,7 @@ namespace SDL {
phi_buf(allocBufWrapper<FPX>(devAccIn, nTotalQuintuplets, queue)),
score_rphisum_buf(allocBufWrapper<FPX>(devAccIn, nTotalQuintuplets, queue)),
layer_buf(allocBufWrapper<uint8_t>(devAccIn, nTotalQuintuplets, queue)),
isDup_buf(allocBufWrapper<bool>(devAccIn, nTotalQuintuplets, queue)),
isDup_buf(allocBufWrapper<char>(devAccIn, nTotalQuintuplets, queue)),
TightCutFlag_buf(allocBufWrapper<bool>(devAccIn, nTotalQuintuplets, queue)),
partOfPT5_buf(allocBufWrapper<bool>(devAccIn, nTotalQuintuplets, queue)),
regressionRadius_buf(allocBufWrapper<float>(devAccIn, nTotalQuintuplets, queue)),
Expand All @@ -127,7 +127,7 @@ namespace SDL {
nonAnchorChiSquared_buf(allocBufWrapper<float>(devAccIn, nTotalQuintuplets, queue)) {
alpaka::memset(queue, nQuintuplets_buf, 0u);
alpaka::memset(queue, totOccupancyQuintuplets_buf, 0u);
alpaka::memset(queue, isDup_buf, false);
alpaka::memset(queue, isDup_buf, 0u);
alpaka::memset(queue, TightCutFlag_buf, false);
alpaka::memset(queue, partOfPT5_buf, false);
alpaka::wait(queue);
Expand Down Expand Up @@ -181,7 +181,7 @@ namespace SDL {
quintupletsInGPU.phi[quintupletIndex] = __F2H(phi);
quintupletsInGPU.score_rphisum[quintupletIndex] = __F2H(scores);
quintupletsInGPU.layer[quintupletIndex] = layer;
quintupletsInGPU.isDup[quintupletIndex] = false;
quintupletsInGPU.isDup[quintupletIndex] = 0;
quintupletsInGPU.TightCutFlag[quintupletIndex] = TightCutFlag;
quintupletsInGPU.regressionRadius[quintupletIndex] = regressionRadius;
quintupletsInGPU.regressionG[quintupletIndex] = regressionG;
Expand Down
9 changes: 6 additions & 3 deletions bin/sdl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -298,12 +298,15 @@ int main(int argc, char** argv)
void run_sdl()
{
SDL::Dev devAcc = alpaka::getDevByIdx(ALPAKA_ACCELERATOR_NAMESPACE::Platform{}, 0u);
SDL::QueueAcc queue(devAcc);
std::vector<SDL::QueueAcc> queues;
for (int s = 0; s < ana.streams; s++) {
queues.push_back(SDL::QueueAcc(devAcc));
}

// Load various maps used in the SDL reconstruction
TStopwatch full_timer;
full_timer.Start();
loadMaps(devAcc, queue);
loadMaps(devAcc, queues[0]);
float timeForMapLoading = full_timer.RealTime()*1000;

if (ana.do_write_ntuple)
Expand Down Expand Up @@ -385,7 +388,7 @@ void run_sdl()
std::vector<SDL::Event<SDL::Acc>*> events;
for (int s = 0; s < ana.streams; s++)
{
SDL::Event<SDL::Acc> *event = new SDL::Event<SDL::Acc>(ana.verbose>=2, queue);
SDL::Event<SDL::Acc> *event = new SDL::Event<SDL::Acc>(ana.verbose>=2, queues[s]);
events.push_back(event);
}
float timeForEventCreation = full_timer.RealTime()*1000;
Expand Down

0 comments on commit 1931181

Please sign in to comment.