Skip to content
This repository has been archived by the owner on Dec 9, 2024. It is now read-only.

Commit

Permalink
Merge pull request #387 from SegmentLinking/dup_removal_fixes
Browse files Browse the repository at this point in the history
Fix race conditions in duplicate removal
  • Loading branch information
slava77 authored Apr 12, 2024
2 parents fc97a6b + 64f247a commit a8bb417
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 26 deletions.
10 changes: 3 additions & 7 deletions SDL/Event.cc
Original file line number Diff line number Diff line change
Expand Up @@ -930,11 +930,8 @@ void SDL::Event<SDL::Acc>::createPixelTriplets() {
createWorkDiv(blocksPerGridDupPixTrip, threadsPerBlockDupPixTrip, elementsPerThread);

SDL::removeDupPixelTripletsInGPUFromMap removeDupPixelTripletsInGPUFromMap_kernel;
auto const removeDupPixelTripletsInGPUFromMapTask(
alpaka::createTaskKernel<Acc>(removeDupPixelTripletsInGPUFromMap_workDiv,
removeDupPixelTripletsInGPUFromMap_kernel,
*pixelTripletsInGPU,
false));
auto const removeDupPixelTripletsInGPUFromMapTask(alpaka::createTaskKernel<Acc>(
removeDupPixelTripletsInGPUFromMap_workDiv, removeDupPixelTripletsInGPUFromMap_kernel, *pixelTripletsInGPU));

alpaka::enqueue(queue, removeDupPixelTripletsInGPUFromMapTask);
alpaka::wait(queue);
Expand Down Expand Up @@ -1158,8 +1155,7 @@ void SDL::Event<SDL::Acc>::createPixelQuintuplets() {
auto const removeDupPixelQuintupletsInGPUFromMapTask(
alpaka::createTaskKernel<Acc>(removeDupPixelQuintupletsInGPUFromMap_workDiv,
removeDupPixelQuintupletsInGPUFromMap_kernel,
*pixelQuintupletsInGPU,
false));
*pixelQuintupletsInGPU));

alpaka::enqueue(queue, removeDupPixelQuintupletsInGPUFromMapTask);

Expand Down
23 changes: 4 additions & 19 deletions SDL/Kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -233,13 +233,8 @@ namespace SDL {
if (nMatched >= 7) {
if (score_rphisum1 > score_rphisum2) {
rmQuintupletFromMemory(quintupletsInGPU, ix);
continue;
} else if ((score_rphisum1 == score_rphisum2) && (ix < jx)) {
rmQuintupletFromMemory(quintupletsInGPU, ix);
continue;
} else {
rmQuintupletFromMemory(quintupletsInGPU, jx);
continue;
}
}
}
Expand Down Expand Up @@ -276,15 +271,15 @@ namespace SDL {

for (unsigned int ix1 = 0; ix1 < nQuintuplets_lowmod1; ix1 += 1) {
unsigned int ix = quintupletModuleIndices_lowmod1 + ix1;
if (quintupletsInGPU.partOfPT5[ix] || quintupletsInGPU.isDup[ix])
if (quintupletsInGPU.partOfPT5[ix])
continue;

for (unsigned int jx1 = 0; jx1 < nQuintuplets_lowmod2; jx1++) {
unsigned int jx = quintupletModuleIndices_lowmod2 + jx1;
if (ix == jx)
continue;

if (quintupletsInGPU.partOfPT5[jx] || quintupletsInGPU.isDup[jx])
if (quintupletsInGPU.partOfPT5[jx])
continue;

float eta1 = __H2F(quintupletsInGPU.eta[ix]);
Expand Down Expand Up @@ -325,9 +320,7 @@ namespace SDL {

struct removeDupPixelTripletsInGPUFromMap {
template <typename TAcc>
ALPAKA_FN_ACC void operator()(TAcc const& acc,
struct SDL::pixelTriplets pixelTripletsInGPU,
bool secondPass) const {
ALPAKA_FN_ACC void operator()(TAcc const& acc, struct SDL::pixelTriplets pixelTripletsInGPU) const {
auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);

Expand Down Expand Up @@ -360,25 +353,17 @@ namespace SDL {

struct removeDupPixelQuintupletsInGPUFromMap {
template <typename TAcc>
ALPAKA_FN_ACC void operator()(TAcc const& acc,
struct SDL::pixelQuintuplets pixelQuintupletsInGPU,
bool secondPass) const {
ALPAKA_FN_ACC void operator()(TAcc const& acc, struct SDL::pixelQuintuplets pixelQuintupletsInGPU) const {
auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);

unsigned int nPixelQuintuplets = *pixelQuintupletsInGPU.nPixelQuintuplets;
for (unsigned int ix = globalThreadIdx[1]; ix < nPixelQuintuplets; ix += gridThreadExtent[1]) {
if (secondPass && pixelQuintupletsInGPU.isDup[ix])
continue;

float score1 = __H2F(pixelQuintupletsInGPU.score[ix]);
for (unsigned int jx = globalThreadIdx[2]; jx < nPixelQuintuplets; jx += gridThreadExtent[2]) {
if (ix == jx)
continue;

if (secondPass && pixelQuintupletsInGPU.isDup[jx])
continue;

int nMatched = checkHitspT5(ix, jx, pixelQuintupletsInGPU);
float score2 = __H2F(pixelQuintupletsInGPU.score[jx]);
if (nMatched >= 7) {
Expand Down

0 comments on commit a8bb417

Please sign in to comment.