diff --git a/.gitignore b/.gitignore index ea250b18..f7e304ce 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ mtv +*~ results/ *.o debug.root diff --git a/README.md b/README.md index fc89ede1..9d1e0f6c 100644 --- a/README.md +++ b/README.md @@ -137,7 +137,17 @@ git remote add SegLink git@github.com:SegmentLinking/cmssw.git git fetch SegLink CMSSW_13_3_0_pre3_LST_X git cms-addpkg RecoTracker Configuration git checkout CMSSW_13_3_0_pre3_LST_X -#To include both the CPU library and GPU library into CMSSW, create 2 xml files. Before writing the following xml file, check that libsdl_cpu.so and libsdl_gpu.so can be found under the ../../../TrackLooper/SDL/ folder. +#To include both the CPU library and GPU library into CMSSW, create 3 xml files (headers file has no library). +#Before writing the following xml file, check that libsdl_cpu.so and libsdl_gpu.so can be found under the ../../../TrackLooper/SDL/ folder. +cat <lst_headers.xml + + + + + + + +EOF cat <lst_cpu.xml @@ -160,6 +170,7 @@ cat <lst_cuda.xml EOF +scram setup lst_headers.xml scram setup lst_cpu.xml scram setup lst_cuda.xml cmsenv diff --git a/SDL/Constants.h b/SDL/Constants.h index 67d32ad5..60829b1b 100644 --- a/SDL/Constants.h +++ b/SDL/Constants.h @@ -11,134 +11,140 @@ #include #endif +namespace SDL { // Half precision wrapper functions. #if defined(FP16_Base) #define __F2H __float2half #define __H2F __half2float -typedef __half float FPX; + typedef __half float FPX; #else #define __F2H #define __H2F -typedef float FPX; + typedef float FPX; #endif -using Idx = std::size_t; -using Dim = alpaka::DimInt<3u>; -using Dim1d = alpaka::DimInt<1u>; -using Vec = alpaka::Vec; -using Vec1d = alpaka::Vec; + using Idx = std::size_t; + using Dim = alpaka::DimInt<3u>; + using Dim1d = alpaka::DimInt<1u>; + using Vec = alpaka::Vec; + using Vec1d = alpaka::Vec; #if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED) -using QueueProperty = alpaka::NonBlocking; + using QueueProperty = alpaka::NonBlocking; #else -using QueueProperty = alpaka::Blocking; + using QueueProperty = alpaka::Blocking; #endif -using WorkDiv = alpaka::WorkDivMembers; + using WorkDiv = alpaka::WorkDivMembers; -Vec const elementsPerThread(Vec::all(static_cast(1))); + Vec const elementsPerThread(Vec::all(static_cast(1))); // - AccGpuCudaRt // - AccCpuThreads // - AccCpuSerial #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED -using Acc = alpaka::AccGpuCudaRt; + using Acc = alpaka::AccGpuCudaRt; #elif ALPAKA_ACC_CPU_B_SEQ_T_THREADS_ENABLED -using Acc = alpaka::AccCpuThreads; + using Acc = alpaka::AccCpuThreads; #elif ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED -using Acc = alpaka::AccCpuSerial; + using Acc = alpaka::AccCpuSerial; #elif ALPAKA_ACC_GPU_HIP_ENABLED -using Acc = alpaka::AccGpuHipRt; + using Acc = alpaka::AccGpuHipRt; #endif + using Dev = alpaka::Dev; // Needed for files that are compiled by g++ to not throw an error. // uint4 is defined only for CUDA, so we will have to revisit this soon when running on other backends. #if !defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && !defined(ALPAKA_ACC_GPU_HIP_ENABLED) -struct uint4 { - unsigned int x; - unsigned int y; - unsigned int z; - unsigned int w; -}; + struct uint4 { + unsigned int x; + unsigned int y; + unsigned int z; + unsigned int w; + }; #endif -auto const devHost = alpaka::getDevByIdx(0u); -auto const devAcc = alpaka::getDevByIdx(0u); -using QueueAcc = alpaka::Queue; + auto const devHost = alpaka::getDevByIdx(0u); +#if defined ALPAKA_ACC_GPU_CUDA_ENABLED || defined ALPAKA_ACC_CPU_B_SEQ_T_THREADS_ENABLED || \ + defined ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED + auto const devAcc = alpaka::getDevByIdx(0u); + using QueueAcc = alpaka::Queue; +#endif -// Buffer type for allocations where auto type can't be used. -template -using Buf = alpaka::Buf; + // Buffer type for allocations where auto type can't be used. + template + using Buf = alpaka::Buf; -// Allocation wrapper function to make integration of the caching allocator easier and reduce code boilerplate. -template -ALPAKA_FN_HOST ALPAKA_FN_INLINE Buf allocBufWrapper(TAcc const& devAccIn, TSize nElements, TQueue queue) { + // Allocation wrapper function to make integration of the caching allocator easier and reduce code boilerplate. + template + ALPAKA_FN_HOST ALPAKA_FN_INLINE Buf, T> allocBufWrapper(TAcc const& devAccIn, + TSize nElements, + TQueue queue) { #ifdef CACHE_ALLOC - return cms::alpakatools::allocCachedBuf(devAccIn, queue, Vec1d(static_cast(nElements))); + return cms::alpakatools::allocCachedBuf(devAccIn, queue, Vec1d(static_cast(nElements))); #else - return alpaka::allocBuf(devAccIn, Vec1d(static_cast(nElements))); + return alpaka::allocBuf(devAccIn, Vec1d(static_cast(nElements))); #endif -} - -// Second allocation wrapper function when queue is not given. Reduces code boilerplate. -template -ALPAKA_FN_HOST ALPAKA_FN_INLINE Buf allocBufWrapper(TAcc const& devAccIn, TSize nElements) { - return alpaka::allocBuf(devAccIn, Vec1d(static_cast(nElements))); -} - -// Wrapper function to reduce code boilerplate for defining grid/block sizes. -ALPAKA_FN_HOST ALPAKA_FN_INLINE Vec createVec(int x, int y, int z) { - return Vec(static_cast(x), static_cast(y), static_cast(z)); -} - -// Adjust grid and block sizes based on backend configuration -template -ALPAKA_FN_HOST ALPAKA_FN_INLINE WorkDiv createWorkDiv(const Vec& blocksPerGrid, - const Vec& threadsPerBlock, - const Vec& elementsPerThread) { - Vec adjustedBlocks = blocksPerGrid; - Vec adjustedThreads = threadsPerBlock; - - // Serial execution, so all launch parameters set to 1. + } + + // Second allocation wrapper function when queue is not given. Reduces code boilerplate. + template + ALPAKA_FN_HOST ALPAKA_FN_INLINE Buf, T> allocBufWrapper(TAcc const& devAccIn, TSize nElements) { + return alpaka::allocBuf(devAccIn, Vec1d(static_cast(nElements))); + } + + // Wrapper function to reduce code boilerplate for defining grid/block sizes. + ALPAKA_FN_HOST ALPAKA_FN_INLINE Vec createVec(int x, int y, int z) { + return Vec(static_cast(x), static_cast(y), static_cast(z)); + } + + // Adjust grid and block sizes based on backend configuration + template + ALPAKA_FN_HOST ALPAKA_FN_INLINE WorkDiv createWorkDiv(const Vec& blocksPerGrid, + const Vec& threadsPerBlock, + const Vec& elementsPerThread) { + Vec adjustedBlocks = blocksPerGrid; + Vec adjustedThreads = threadsPerBlock; + + // Serial execution, so all launch parameters set to 1. #if defined(ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED) - adjustedBlocks = Vec::all(static_cast(1)); - adjustedThreads = Vec::all(static_cast(1)); + adjustedBlocks = Vec::all(static_cast(1)); + adjustedThreads = Vec::all(static_cast(1)); #endif - // Threads enabled, set number of blocks to 1. + // Threads enabled, set number of blocks to 1. #if defined(ALPAKA_ACC_CPU_B_SEQ_T_THREADS_ENABLED) - adjustedBlocks = Vec::all(static_cast(1)); + adjustedBlocks = Vec::all(static_cast(1)); #endif - return WorkDiv(adjustedBlocks, adjustedThreads, elementsPerThread); -} + return WorkDiv(adjustedBlocks, adjustedThreads, elementsPerThread); + } // If a compile time flag does not define PT_CUT, default to 0.8 (GeV) #ifndef PT_CUT -constexpr float PT_CUT = 0.8f; + constexpr float PT_CUT = 0.8f; #endif -const unsigned int MAX_BLOCKS = 80; -const unsigned int MAX_CONNECTED_MODULES = 40; + const unsigned int MAX_BLOCKS = 80; + const unsigned int MAX_CONNECTED_MODULES = 40; -const unsigned int N_MAX_PIXEL_SEGMENTS_PER_MODULE = 50000; + const unsigned int N_MAX_PIXEL_SEGMENTS_PER_MODULE = 50000; -const unsigned int N_MAX_PIXEL_MD_PER_MODULES = 2 * N_MAX_PIXEL_SEGMENTS_PER_MODULE; + const unsigned int N_MAX_PIXEL_MD_PER_MODULES = 2 * N_MAX_PIXEL_SEGMENTS_PER_MODULE; -const unsigned int N_MAX_PIXEL_TRIPLETS = 5000; -const unsigned int N_MAX_PIXEL_QUINTUPLETS = 15000; + const unsigned int N_MAX_PIXEL_TRIPLETS = 5000; + const unsigned int N_MAX_PIXEL_QUINTUPLETS = 15000; -const unsigned int N_MAX_PIXEL_TRACK_CANDIDATES = 30000; -const unsigned int N_MAX_NONPIXEL_TRACK_CANDIDATES = 1000; + const unsigned int N_MAX_PIXEL_TRACK_CANDIDATES = 30000; + const unsigned int N_MAX_NONPIXEL_TRACK_CANDIDATES = 1000; -const unsigned int size_superbins = 45000; + const unsigned int size_superbins = 45000; -// Temporary fix for endcap buffer allocation. -const unsigned int endcap_size = 9105; + // Temporary fix for endcap buffer allocation. + const unsigned int endcap_size = 9105; -// Temporary fix for module buffer allocation. -const unsigned int modules_size = 26401; -const unsigned int pix_tot = 1796504; + // Temporary fix for module buffer allocation. + const unsigned int modules_size = 26401; + const unsigned int pix_tot = 1796504; -namespace SDL { //defining the constant host device variables right up here ALPAKA_STATIC_ACC_MEM_GLOBAL const float miniMulsPtScaleBarrel[6] = {0.0052, 0.0038, 0.0034, 0.0034, 0.0032, 0.0034}; ALPAKA_STATIC_ACC_MEM_GLOBAL const float miniMulsPtScaleEndcap[5] = {0.006, 0.006, 0.006, 0.006, 0.006}; @@ -157,7 +163,7 @@ namespace SDL { ALPAKA_STATIC_ACC_MEM_GLOBAL const float magnetic_field = 3.8112; // Since C++ can't represent infinity, SDL_INF = 123456789 was used to represent infinity in the data table ALPAKA_STATIC_ACC_MEM_GLOBAL const float SDL_INF = 123456789; -} // namespace SDL +} //namespace SDL namespace T5DNN { // Working points matching LST fake rate (43.9%) or signal acceptance (82.0%) diff --git a/SDL/EndcapGeometry.h b/SDL/EndcapGeometry.h index 62b75d72..3727ed0d 100644 --- a/SDL/EndcapGeometry.h +++ b/SDL/EndcapGeometry.h @@ -24,8 +24,8 @@ namespace SDL { std::map centroid_zs_; // centroid z public: - Buf geoMapDetId_buf; - Buf geoMapPhi_buf; + Buf geoMapDetId_buf; + Buf geoMapPhi_buf; unsigned int nEndCapMap; diff --git a/SDL/Event.cc b/SDL/Event.cc index 422f4689..18dee6d5 100644 --- a/SDL/Event.cc +++ b/SDL/Event.cc @@ -1,7 +1,8 @@ #include "Event.h" SDL::modules* SDL::modulesInGPU = new SDL::modules(); -SDL::modulesBuffer* SDL::modulesBuffers = new SDL::modulesBuffer(devAcc); +SDL::modulesBuffer* SDL::modulesBuffers = new SDL::modulesBuffer(devAcc); +SDL::modulesBuffer const* SDL::modulesBuffersES = nullptr; std::shared_ptr SDL::pixelMapping = std::make_shared(); uint16_t SDL::nModules; uint16_t SDL::nLowerModules; @@ -165,12 +166,12 @@ void SDL::Event::resetEvent() { void SDL::initModules(const char* moduleMetaDataFilePath) { QueueAcc queue(devAcc); - // Set the relevant data pointers. - modulesInGPU->setData(*modulesBuffers); - // nModules gets filled here - loadModulesFromFile( - modulesInGPU, modulesBuffers, nModules, nLowerModules, *pixelMapping, queue, moduleMetaDataFilePath); + loadModulesFromFile(modulesBuffers, nModules, nLowerModules, *pixelMapping, queue, moduleMetaDataFilePath); + + // Set the relevant data pointers. + modulesBuffersES = modulesBuffers; + modulesInGPU->setData(*modulesBuffersES); } // Temporary solution to the global variables. Should be freed with shared_ptr. @@ -196,13 +197,13 @@ void SDL::Event::addHitToEvent(std::vector x, // Initialize space on device/host for next event. if (hitsInGPU == nullptr) { hitsInGPU = new SDL::hits(); - hitsBuffers = new SDL::hitsBuffer(nModules, nHits, devAcc, queue); + hitsBuffers = new SDL::hitsBuffer(nModules, nHits, devAcc, queue); hitsInGPU->setData(*hitsBuffers); } if (rangesInGPU == nullptr) { rangesInGPU = new SDL::objectRanges(); - rangesBuffers = new SDL::objectRangesBuffer(nModules, nLowerModules, devAcc, queue); + rangesBuffers = new SDL::objectRangesBuffer(nModules, nLowerModules, devAcc, queue); rangesInGPU->setData(*rangesBuffers); } @@ -316,7 +317,7 @@ void SDL::Event::addPixelSegmentToEvent(std::vector hitIndices0, nTotalMDs += N_MAX_PIXEL_MD_PER_MODULES; mdsInGPU = new SDL::miniDoublets(); - miniDoubletsBuffers = new SDL::miniDoubletsBuffer(nTotalMDs, nLowerModules, devAcc, queue); + miniDoubletsBuffers = new SDL::miniDoubletsBuffer(nTotalMDs, nLowerModules, devAcc, queue); mdsInGPU->setData(*miniDoubletsBuffers); alpaka::memcpy(queue, miniDoubletsBuffers->nMemoryLocations_buf, nTotalMDs_view); @@ -347,7 +348,7 @@ void SDL::Event::addPixelSegmentToEvent(std::vector hitIndices0, segmentsInGPU = new SDL::segments(); segmentsBuffers = - new SDL::segmentsBuffer(nTotalSegments, nLowerModules, N_MAX_PIXEL_SEGMENTS_PER_MODULE, devAcc, queue); + new SDL::segmentsBuffer(nTotalSegments, nLowerModules, N_MAX_PIXEL_SEGMENTS_PER_MODULE, devAcc, queue); segmentsInGPU->setData(*segmentsBuffers); alpaka::memcpy(queue, segmentsBuffers->nMemoryLocations_buf, nTotalSegments_view); @@ -459,7 +460,7 @@ void SDL::Event::createMiniDoublets() { if (mdsInGPU == nullptr) { mdsInGPU = new SDL::miniDoublets(); - miniDoubletsBuffers = new SDL::miniDoubletsBuffer(nTotalMDs, nLowerModules, devAcc, queue); + miniDoubletsBuffers = new SDL::miniDoubletsBuffer(nTotalMDs, nLowerModules, devAcc, queue); mdsInGPU->setData(*miniDoubletsBuffers); } @@ -504,7 +505,7 @@ void SDL::Event::createSegmentsWithModuleMap() { if (segmentsInGPU == nullptr) { segmentsInGPU = new SDL::segments(); segmentsBuffers = - new SDL::segmentsBuffer(nTotalSegments, nLowerModules, N_MAX_PIXEL_SEGMENTS_PER_MODULE, devAcc, queue); + new SDL::segmentsBuffer(nTotalSegments, nLowerModules, N_MAX_PIXEL_SEGMENTS_PER_MODULE, devAcc, queue); segmentsInGPU->setData(*segmentsBuffers); } @@ -565,7 +566,7 @@ void SDL::Event::createTriplets() { tripletsInGPU = new SDL::triplets(); tripletsBuffers = - new SDL::tripletsBuffer(*alpaka::getPtrNative(maxTriplets_buf), nLowerModules, devAcc, queue); + new SDL::tripletsBuffer(*alpaka::getPtrNative(maxTriplets_buf), nLowerModules, devAcc, queue); tripletsInGPU->setData(*tripletsBuffers); alpaka::memcpy(queue, tripletsBuffers->nMemoryLocations_buf, maxTriplets_buf, 1); @@ -591,7 +592,7 @@ void SDL::Event::createTriplets() { // Allocate and copy module_nConnectedModules from device to host auto module_nConnectedModules_buf = allocBufWrapper(devHost, nLowerModules, queue); - alpaka::memcpy(queue, module_nConnectedModules_buf, modulesBuffers->nConnectedModules_buf, nLowerModules); + alpaka::memcpy(queue, module_nConnectedModules_buf, modulesBuffersES->nConnectedModules_buf, nLowerModules); alpaka::wait(queue); uint16_t* module_nConnectedModules = alpaka::getPtrNative(module_nConnectedModules_buf); @@ -651,7 +652,7 @@ void SDL::Event::createTriplets() { void SDL::Event::createTrackCandidates() { if (trackCandidatesInGPU == nullptr) { trackCandidatesInGPU = new SDL::trackCandidates(); - trackCandidatesBuffers = new SDL::trackCandidatesBuffer( + trackCandidatesBuffers = new SDL::trackCandidatesBuffer( N_MAX_NONPIXEL_TRACK_CANDIDATES + N_MAX_PIXEL_TRACK_CANDIDATES, devAcc, queue); trackCandidatesInGPU->setData(*trackCandidatesBuffers); } @@ -815,7 +816,7 @@ void SDL::Event::createTrackCandidates() { void SDL::Event::createPixelTriplets() { if (pixelTripletsInGPU == nullptr) { pixelTripletsInGPU = new SDL::pixelTriplets(); - pixelTripletsBuffers = new SDL::pixelTripletsBuffer(N_MAX_PIXEL_TRIPLETS, devAcc, queue); + pixelTripletsBuffers = new SDL::pixelTripletsBuffer(N_MAX_PIXEL_TRIPLETS, devAcc, queue); pixelTripletsInGPU->setData(*pixelTripletsBuffers); } @@ -962,7 +963,7 @@ void SDL::Event::createQuintuplets() { if (quintupletsInGPU == nullptr) { quintupletsInGPU = new SDL::quintuplets(); - quintupletsBuffers = new SDL::quintupletsBuffer(nTotalQuintuplets, nLowerModules, devAcc, queue); + quintupletsBuffers = new SDL::quintupletsBuffer(nTotalQuintuplets, nLowerModules, devAcc, queue); quintupletsInGPU->setData(*quintupletsBuffers); alpaka::memcpy(queue, quintupletsBuffers->nMemoryLocations_buf, nTotalQuintuplets_buf, 1); @@ -1042,12 +1043,12 @@ void SDL::Event::pixelLineSegmentCleaning() { void SDL::Event::createPixelQuintuplets() { if (pixelQuintupletsInGPU == nullptr) { pixelQuintupletsInGPU = new SDL::pixelQuintuplets(); - pixelQuintupletsBuffers = new SDL::pixelQuintupletsBuffer(N_MAX_PIXEL_QUINTUPLETS, devAcc, queue); + pixelQuintupletsBuffers = new SDL::pixelQuintupletsBuffer(N_MAX_PIXEL_QUINTUPLETS, devAcc, queue); pixelQuintupletsInGPU->setData(*pixelQuintupletsBuffers); } if (trackCandidatesInGPU == nullptr) { trackCandidatesInGPU = new SDL::trackCandidates(); - trackCandidatesBuffers = new SDL::trackCandidatesBuffer( + trackCandidatesBuffers = new SDL::trackCandidatesBuffer( N_MAX_NONPIXEL_TRACK_CANDIDATES + N_MAX_PIXEL_TRACK_CANDIDATES, devAcc, queue); trackCandidatesInGPU->setData(*trackCandidatesBuffers); } @@ -1181,10 +1182,10 @@ void SDL::Event::addMiniDoubletsToEventExplicit() { alpaka::memcpy(queue, nMDsCPU_buf, miniDoubletsBuffers->nMDs_buf, nLowerModules); auto module_subdets_buf = allocBufWrapper(devHost, nLowerModules, queue); - alpaka::memcpy(queue, module_subdets_buf, modulesBuffers->subdets_buf, nLowerModules); + alpaka::memcpy(queue, module_subdets_buf, modulesBuffersES->subdets_buf, nLowerModules); auto module_layers_buf = allocBufWrapper(devHost, nLowerModules, queue); - alpaka::memcpy(queue, module_layers_buf, modulesBuffers->layers_buf, nLowerModules); + alpaka::memcpy(queue, module_layers_buf, modulesBuffersES->layers_buf, nLowerModules); auto module_hitRanges_buf = allocBufWrapper(devHost, nLowerModules * 2, queue); alpaka::memcpy(queue, module_hitRanges_buf, hitsBuffers->hitRanges_buf, nLowerModules * 2); @@ -1212,10 +1213,10 @@ void SDL::Event::addSegmentsToEventExplicit() { alpaka::memcpy(queue, nSegmentsCPU_buf, segmentsBuffers->nSegments_buf, nLowerModules); auto module_subdets_buf = allocBufWrapper(devHost, nLowerModules, queue); - alpaka::memcpy(queue, module_subdets_buf, modulesBuffers->subdets_buf, nLowerModules); + alpaka::memcpy(queue, module_subdets_buf, modulesBuffersES->subdets_buf, nLowerModules); auto module_layers_buf = allocBufWrapper(devHost, nLowerModules, queue); - alpaka::memcpy(queue, module_layers_buf, modulesBuffers->layers_buf, nLowerModules); + alpaka::memcpy(queue, module_layers_buf, modulesBuffersES->layers_buf, nLowerModules); alpaka::wait(queue); @@ -1239,10 +1240,10 @@ void SDL::Event::addQuintupletsToEventExplicit() { alpaka::memcpy(queue, nQuintupletsCPU_buf, quintupletsBuffers->nQuintuplets_buf, nLowerModules); auto module_subdets_buf = allocBufWrapper(devHost, nModules, queue); - alpaka::memcpy(queue, module_subdets_buf, modulesBuffers->subdets_buf, nModules); + alpaka::memcpy(queue, module_subdets_buf, modulesBuffersES->subdets_buf, nModules); auto module_layers_buf = allocBufWrapper(devHost, nLowerModules, queue); - alpaka::memcpy(queue, module_layers_buf, modulesBuffers->layers_buf, nLowerModules); + alpaka::memcpy(queue, module_layers_buf, modulesBuffersES->layers_buf, nLowerModules); auto module_quintupletModuleIndices_buf = allocBufWrapper(devHost, nLowerModules, queue); alpaka::memcpy(queue, module_quintupletModuleIndices_buf, rangesBuffers->quintupletModuleIndices_buf, nLowerModules); @@ -1270,10 +1271,10 @@ void SDL::Event::addTripletsToEventExplicit() { alpaka::memcpy(queue, nTripletsCPU_buf, tripletsBuffers->nTriplets_buf, nLowerModules); auto module_subdets_buf = allocBufWrapper(devHost, nLowerModules, queue); - alpaka::memcpy(queue, module_subdets_buf, modulesBuffers->subdets_buf, nLowerModules); + alpaka::memcpy(queue, module_subdets_buf, modulesBuffersES->subdets_buf, nLowerModules); auto module_layers_buf = allocBufWrapper(devHost, nLowerModules, queue); - alpaka::memcpy(queue, module_layers_buf, modulesBuffers->layers_buf, nLowerModules); + alpaka::memcpy(queue, module_layers_buf, modulesBuffersES->layers_buf, nLowerModules); alpaka::wait(queue); int* nTripletsCPU = alpaka::getPtrNative(nTripletsCPU_buf); @@ -1853,23 +1854,23 @@ SDL::modulesBuffer* SDL::Event::getFullModules() { modulesInCPUFull = new SDL::modulesBuffer(devHost, nModules, 1); modulesInCPUFull->setData(*modulesInCPUFull); - alpaka::memcpy(queue, modulesInCPUFull->detIds_buf, modulesBuffers->detIds_buf, nModules); - alpaka::memcpy(queue, modulesInCPUFull->moduleMap_buf, modulesBuffers->moduleMap_buf, 40 * nModules); - alpaka::memcpy(queue, modulesInCPUFull->nConnectedModules_buf, modulesBuffers->nConnectedModules_buf, nModules); - alpaka::memcpy(queue, modulesInCPUFull->drdzs_buf, modulesBuffers->drdzs_buf, nModules); - alpaka::memcpy(queue, modulesInCPUFull->slopes_buf, modulesBuffers->slopes_buf, nModules); - alpaka::memcpy(queue, modulesInCPUFull->nLowerModules_buf, modulesBuffers->nLowerModules_buf, 1); - alpaka::memcpy(queue, modulesInCPUFull->nModules_buf, modulesBuffers->nModules_buf, 1); - alpaka::memcpy(queue, modulesInCPUFull->layers_buf, modulesBuffers->layers_buf, nModules); - alpaka::memcpy(queue, modulesInCPUFull->rings_buf, modulesBuffers->rings_buf, nModules); - alpaka::memcpy(queue, modulesInCPUFull->modules_buf, modulesBuffers->modules_buf, nModules); - alpaka::memcpy(queue, modulesInCPUFull->rods_buf, modulesBuffers->rods_buf, nModules); - alpaka::memcpy(queue, modulesInCPUFull->subdets_buf, modulesBuffers->subdets_buf, nModules); - alpaka::memcpy(queue, modulesInCPUFull->sides_buf, modulesBuffers->sides_buf, nModules); - alpaka::memcpy(queue, modulesInCPUFull->isInverted_buf, modulesBuffers->isInverted_buf, nModules); - alpaka::memcpy(queue, modulesInCPUFull->isLower_buf, modulesBuffers->isLower_buf, nModules); - alpaka::memcpy(queue, modulesInCPUFull->moduleType_buf, modulesBuffers->moduleType_buf, nModules); - alpaka::memcpy(queue, modulesInCPUFull->moduleLayerType_buf, modulesBuffers->moduleLayerType_buf, nModules); + alpaka::memcpy(queue, modulesInCPUFull->detIds_buf, modulesBuffersES->detIds_buf, nModules); + alpaka::memcpy(queue, modulesInCPUFull->moduleMap_buf, modulesBuffersES->moduleMap_buf, 40 * nModules); + alpaka::memcpy(queue, modulesInCPUFull->nConnectedModules_buf, modulesBuffersES->nConnectedModules_buf, nModules); + alpaka::memcpy(queue, modulesInCPUFull->drdzs_buf, modulesBuffersES->drdzs_buf, nModules); + alpaka::memcpy(queue, modulesInCPUFull->slopes_buf, modulesBuffersES->slopes_buf, nModules); + alpaka::memcpy(queue, modulesInCPUFull->nLowerModules_buf, modulesBuffersES->nLowerModules_buf, 1); + alpaka::memcpy(queue, modulesInCPUFull->nModules_buf, modulesBuffersES->nModules_buf, 1); + alpaka::memcpy(queue, modulesInCPUFull->layers_buf, modulesBuffersES->layers_buf, nModules); + alpaka::memcpy(queue, modulesInCPUFull->rings_buf, modulesBuffersES->rings_buf, nModules); + alpaka::memcpy(queue, modulesInCPUFull->modules_buf, modulesBuffersES->modules_buf, nModules); + alpaka::memcpy(queue, modulesInCPUFull->rods_buf, modulesBuffersES->rods_buf, nModules); + alpaka::memcpy(queue, modulesInCPUFull->subdets_buf, modulesBuffersES->subdets_buf, nModules); + alpaka::memcpy(queue, modulesInCPUFull->sides_buf, modulesBuffersES->sides_buf, nModules); + alpaka::memcpy(queue, modulesInCPUFull->isInverted_buf, modulesBuffersES->isInverted_buf, nModules); + alpaka::memcpy(queue, modulesInCPUFull->isLower_buf, modulesBuffersES->isLower_buf, nModules); + alpaka::memcpy(queue, modulesInCPUFull->moduleType_buf, modulesBuffersES->moduleType_buf, nModules); + alpaka::memcpy(queue, modulesInCPUFull->moduleLayerType_buf, modulesBuffersES->moduleLayerType_buf, nModules); alpaka::wait(queue); } return modulesInCPUFull; @@ -1881,19 +1882,19 @@ SDL::modulesBuffer* SDL::Event::getModules() { modulesInCPU = new SDL::modulesBuffer(devHost, nModules, 1); modulesInCPU->setData(*modulesInCPU); - alpaka::memcpy(queue, modulesInCPU->nLowerModules_buf, modulesBuffers->nLowerModules_buf, 1); - alpaka::memcpy(queue, modulesInCPU->nModules_buf, modulesBuffers->nModules_buf, 1); - alpaka::memcpy(queue, modulesInCPU->detIds_buf, modulesBuffers->detIds_buf, nModules); - alpaka::memcpy(queue, modulesInCPU->isLower_buf, modulesBuffers->isLower_buf, nModules); - alpaka::memcpy(queue, modulesInCPU->layers_buf, modulesBuffers->layers_buf, nModules); - alpaka::memcpy(queue, modulesInCPU->subdets_buf, modulesBuffers->subdets_buf, nModules); - alpaka::memcpy(queue, modulesInCPU->rings_buf, modulesBuffers->rings_buf, nModules); - alpaka::memcpy(queue, modulesInCPU->rods_buf, modulesBuffers->rods_buf, nModules); - alpaka::memcpy(queue, modulesInCPU->modules_buf, modulesBuffers->modules_buf, nModules); - alpaka::memcpy(queue, modulesInCPU->sides_buf, modulesBuffers->sides_buf, nModules); - alpaka::memcpy(queue, modulesInCPU->eta_buf, modulesBuffers->eta_buf, nModules); - alpaka::memcpy(queue, modulesInCPU->r_buf, modulesBuffers->r_buf, nModules); - alpaka::memcpy(queue, modulesInCPU->moduleType_buf, modulesBuffers->moduleType_buf, nModules); + alpaka::memcpy(queue, modulesInCPU->nLowerModules_buf, modulesBuffersES->nLowerModules_buf, 1); + alpaka::memcpy(queue, modulesInCPU->nModules_buf, modulesBuffersES->nModules_buf, 1); + alpaka::memcpy(queue, modulesInCPU->detIds_buf, modulesBuffersES->detIds_buf, nModules); + alpaka::memcpy(queue, modulesInCPU->isLower_buf, modulesBuffersES->isLower_buf, nModules); + alpaka::memcpy(queue, modulesInCPU->layers_buf, modulesBuffersES->layers_buf, nModules); + alpaka::memcpy(queue, modulesInCPU->subdets_buf, modulesBuffersES->subdets_buf, nModules); + alpaka::memcpy(queue, modulesInCPU->rings_buf, modulesBuffersES->rings_buf, nModules); + alpaka::memcpy(queue, modulesInCPU->rods_buf, modulesBuffersES->rods_buf, nModules); + alpaka::memcpy(queue, modulesInCPU->modules_buf, modulesBuffersES->modules_buf, nModules); + alpaka::memcpy(queue, modulesInCPU->sides_buf, modulesBuffersES->sides_buf, nModules); + alpaka::memcpy(queue, modulesInCPU->eta_buf, modulesBuffersES->eta_buf, nModules); + alpaka::memcpy(queue, modulesInCPU->r_buf, modulesBuffersES->r_buf, nModules); + alpaka::memcpy(queue, modulesInCPU->moduleType_buf, modulesBuffersES->moduleType_buf, nModules); alpaka::wait(queue); } return modulesInCPU; diff --git a/SDL/Event.h b/SDL/Event.h index e2f48b3c..851fa29e 100644 --- a/SDL/Event.h +++ b/SDL/Event.h @@ -3,6 +3,7 @@ #include "Hit.h" #include "Module.h" +#include "ModuleMethods.h" #include "Segment.h" #include "Triplet.h" #include "Kernels.h" @@ -34,23 +35,23 @@ namespace SDL { //Device stuff unsigned int nTotalSegments; struct objectRanges* rangesInGPU; - struct objectRangesBuffer* rangesBuffers; + struct objectRangesBuffer* rangesBuffers; struct hits* hitsInGPU; - struct hitsBuffer* hitsBuffers; + struct hitsBuffer* hitsBuffers; struct miniDoublets* mdsInGPU; - struct miniDoubletsBuffer* miniDoubletsBuffers; + struct miniDoubletsBuffer* miniDoubletsBuffers; struct segments* segmentsInGPU; - struct segmentsBuffer* segmentsBuffers; + struct segmentsBuffer* segmentsBuffers; struct triplets* tripletsInGPU; - struct tripletsBuffer* tripletsBuffers; + struct tripletsBuffer* tripletsBuffers; struct quintuplets* quintupletsInGPU; - struct quintupletsBuffer* quintupletsBuffers; + struct quintupletsBuffer* quintupletsBuffers; struct trackCandidates* trackCandidatesInGPU; - struct trackCandidatesBuffer* trackCandidatesBuffers; + struct trackCandidatesBuffer* trackCandidatesBuffers; struct pixelTriplets* pixelTripletsInGPU; - struct pixelTripletsBuffer* pixelTripletsBuffers; + struct pixelTripletsBuffer* pixelTripletsBuffers; struct pixelQuintuplets* pixelQuintupletsInGPU; - struct pixelQuintupletsBuffer* pixelQuintupletsBuffers; + struct pixelQuintupletsBuffer* pixelQuintupletsBuffers; //CPU interface stuff objectRangesBuffer* rangesInCPU; @@ -176,7 +177,8 @@ namespace SDL { //global stuff extern SDL::modules* modulesInGPU; - extern SDL::modulesBuffer* modulesBuffers; + extern SDL::modulesBuffer* modulesBuffers; + extern SDL::modulesBuffer const* modulesBuffersES; // not owned const buffers extern uint16_t nModules; extern uint16_t nLowerModules; void initModules(const char* moduleMetaDataFilePath = "data/centroid.txt"); //read from file and init diff --git a/SDL/Hit.h b/SDL/Hit.h index ec18a554..799a5518 100644 --- a/SDL/Hit.h +++ b/SDL/Hit.h @@ -50,27 +50,27 @@ namespace SDL { } }; - template + template struct hitsBuffer : hits { - Buf nHits_buf; - Buf xs_buf; - Buf ys_buf; - Buf zs_buf; - Buf moduleIndices_buf; - Buf idxs_buf; - Buf detid_buf; - Buf rts_buf; - Buf phis_buf; - Buf etas_buf; - Buf highEdgeXs_buf; - Buf highEdgeYs_buf; - Buf lowEdgeXs_buf; - Buf lowEdgeYs_buf; - Buf hitRanges_buf; - Buf hitRangesLower_buf; - Buf hitRangesUpper_buf; - Buf hitRangesnLower_buf; - Buf hitRangesnUpper_buf; + Buf nHits_buf; + Buf xs_buf; + Buf ys_buf; + Buf zs_buf; + Buf moduleIndices_buf; + Buf idxs_buf; + Buf detid_buf; + Buf rts_buf; + Buf phis_buf; + Buf etas_buf; + Buf highEdgeXs_buf; + Buf highEdgeYs_buf; + Buf lowEdgeXs_buf; + Buf lowEdgeYs_buf; + Buf hitRanges_buf; + Buf hitRangesLower_buf; + Buf hitRangesUpper_buf; + Buf hitRangesnLower_buf; + Buf hitRangesnUpper_buf; template hitsBuffer(unsigned int nModules, unsigned int nMaxHits, TDevAcc const& devAccIn, TQueue& queue) @@ -165,7 +165,7 @@ namespace SDL { return dPhi; }; - ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE int binary_search(unsigned int* data, // Array that we are searching over + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE int binary_search(const unsigned int* data, // Array that we are searching over unsigned int search_val, // Value we want to find in data array unsigned int ndata) // Number of elements in data array { diff --git a/SDL/Kernels.h b/SDL/Kernels.h index 2bc264f6..4b4b992c 100644 --- a/SDL/Kernels.h +++ b/SDL/Kernels.h @@ -18,12 +18,12 @@ namespace SDL { ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmPixelTripletFromMemory(struct SDL::pixelTriplets& pixelTripletsInGPU, unsigned int pixelTripletIndex) { - pixelTripletsInGPU.isDup[pixelTripletIndex] = 1; + pixelTripletsInGPU.isDup[pixelTripletIndex] = true; }; ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmPixelQuintupletFromMemory(struct SDL::pixelQuintuplets& pixelQuintupletsInGPU, unsigned int pixelQuintupletIndex) { - pixelQuintupletsInGPU.isDup[pixelQuintupletIndex] = 1; + pixelQuintupletsInGPU.isDup[pixelQuintupletIndex] = true; }; ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmPixelSegmentFromMemory(struct SDL::segments& segmentsInGPU, diff --git a/SDL/LST.cc b/SDL/LST.cc index 97cb38a7..dbfac882 100644 --- a/SDL/LST.cc +++ b/SDL/LST.cc @@ -1,58 +1,61 @@ #include "LST.h" -SDL::LST::LST() { TrackLooperDir_ = getenv("LST_BASE"); } - -void SDL::LST::eventSetup() { - static std::once_flag mapsLoaded; - std::call_once(mapsLoaded, &SDL::LST::loadMaps, this); - TString path = get_absolute_path_after_check_file_exists( - TString::Format("%s/data/centroid_CMSSW_12_2_0_pre2.txt", TrackLooperDir_.Data()).Data()); - static std::once_flag modulesInited; - std::call_once(modulesInited, SDL::initModules, path); -} - -void SDL::LST::loadMaps() { - // Module orientation information (DrDz or phi angles) - TString endcap_geom = get_absolute_path_after_check_file_exists( - TString::Format("%s/data/endcap_orientation_data_CMSSW_12_2_0_pre2.txt", TrackLooperDir_.Data()).Data()); - TString tilted_geom = get_absolute_path_after_check_file_exists( - TString::Format("%s/data/tilted_orientation_data_CMSSW_12_2_0_pre2.txt", TrackLooperDir_.Data()).Data()); - SDL::endcapGeometry->load(endcap_geom.Data()); // centroid values added to the map - SDL::tiltedGeometry.load(tilted_geom.Data()); - - // Module connection map (for line segment building) - TString mappath = get_absolute_path_after_check_file_exists( - TString::Format("%s/data/module_connection_tracing_CMSSW_12_2_0_pre2_merged.txt", TrackLooperDir_.Data()).Data()); - SDL::moduleConnectionMap.load(mappath.Data()); - - TString pLSMapDir = TrackLooperDir_ + "/data/pixelmaps_CMSSW_12_2_0_pre2_0p8minPt/pLS_map"; - std::string connects[] = {"_layer1_subdet5", "_layer2_subdet5", "_layer1_subdet4", "_layer2_subdet4"}; - TString path; - - for (std::string& connect : connects) { - auto connectData = connect.data(); - - path = TString::Format("%s%s.txt", pLSMapDir.Data(), connectData).Data(); - SDL::moduleConnectionMap_pLStoLayer.emplace_back( - ModuleConnectionMap(get_absolute_path_after_check_file_exists(path.Data()).Data())); - - path = TString::Format("%s_pos%s.txt", pLSMapDir.Data(), connectData).Data(); - SDL::moduleConnectionMap_pLStoLayer_pos.emplace_back( - ModuleConnectionMap(get_absolute_path_after_check_file_exists(path.Data()).Data())); - - path = TString::Format("%s_neg%s.txt", pLSMapDir.Data(), connectData).Data(); - SDL::moduleConnectionMap_pLStoLayer_neg.emplace_back( - ModuleConnectionMap(get_absolute_path_after_check_file_exists(path.Data()).Data())); +namespace { + TString trackLooperDir() { return getenv("LST_BASE"); } + + TString get_absolute_path_after_check_file_exists(const std::string name) { + std::filesystem::path fullpath = std::filesystem::absolute(name.c_str()); + if (not std::filesystem::exists(fullpath)) { + std::cout << "ERROR: Could not find the file = " << fullpath << std::endl; + exit(2); + } + return TString(fullpath.string().c_str()); } -} -TString SDL::LST::get_absolute_path_after_check_file_exists(const std::string name) { - std::filesystem::path fullpath = std::filesystem::absolute(name.c_str()); - if (not std::filesystem::exists(fullpath)) { - std::cout << "ERROR: Could not find the file = " << fullpath << std::endl; - exit(2); + void loadMaps() { + // Module orientation information (DrDz or phi angles) + TString endcap_geom = get_absolute_path_after_check_file_exists( + TString::Format("%s/data/endcap_orientation_data_CMSSW_12_2_0_pre2.txt", trackLooperDir().Data()).Data()); + TString tilted_geom = get_absolute_path_after_check_file_exists( + TString::Format("%s/data/tilted_orientation_data_CMSSW_12_2_0_pre2.txt", trackLooperDir().Data()).Data()); + SDL::endcapGeometry->load(endcap_geom.Data()); // centroid values added to the map + SDL::tiltedGeometry.load(tilted_geom.Data()); + + // Module connection map (for line segment building) + TString mappath = get_absolute_path_after_check_file_exists( + TString::Format("%s/data/module_connection_tracing_CMSSW_12_2_0_pre2_merged.txt", trackLooperDir().Data()) + .Data()); + SDL::moduleConnectionMap.load(mappath.Data()); + + TString pLSMapDir = trackLooperDir() + "/data/pixelmaps_CMSSW_12_2_0_pre2_0p8minPt/pLS_map"; + std::string connects[] = {"_layer1_subdet5", "_layer2_subdet5", "_layer1_subdet4", "_layer2_subdet4"}; + TString path; + + for (std::string& connect : connects) { + auto connectData = connect.data(); + + path = TString::Format("%s%s.txt", pLSMapDir.Data(), connectData).Data(); + SDL::moduleConnectionMap_pLStoLayer.emplace_back( + SDL::ModuleConnectionMap(get_absolute_path_after_check_file_exists(path.Data()).Data())); + + path = TString::Format("%s_pos%s.txt", pLSMapDir.Data(), connectData).Data(); + SDL::moduleConnectionMap_pLStoLayer_pos.emplace_back( + SDL::ModuleConnectionMap(get_absolute_path_after_check_file_exists(path.Data()).Data())); + + path = TString::Format("%s_neg%s.txt", pLSMapDir.Data(), connectData).Data(); + SDL::moduleConnectionMap_pLStoLayer_neg.emplace_back( + SDL::ModuleConnectionMap(get_absolute_path_after_check_file_exists(path.Data()).Data())); + } } - return TString(fullpath.string().c_str()); + +} // namespace + +void SDL::LST::loadAndFillES(alpaka::QueueCpuBlocking& queue, struct modulesBuffer* modules) { + ::loadMaps(); + + TString path = get_absolute_path_after_check_file_exists( + TString::Format("%s/data/centroid_CMSSW_12_2_0_pre2.txt", trackLooperDir().Data()).Data()); + SDL::loadModulesFromFile(modules, SDL::nModules, SDL::nLowerModules, *SDL::pixelMapping, queue, path.Data()); } void SDL::LST::prepareInput(const std::vector see_px, diff --git a/SDL/LST.h b/SDL/LST.h index dbbac49c..2445eb14 100644 --- a/SDL/LST.h +++ b/SDL/LST.h @@ -15,12 +15,12 @@ #include "Event.h" namespace SDL { - class LST { public: - LST(); + LST() = default; + + static void loadAndFillES(alpaka::QueueCpuBlocking& queue, struct modulesBuffer* modules); - void eventSetup(); template void run(TQueue& queue, bool verbose, @@ -177,8 +177,6 @@ namespace SDL { std::vector trackCandidateType() { return out_tc_trackCandidateType_; } private: - void loadMaps(); - TString get_absolute_path_after_check_file_exists(const std::string name); void prepareInput(const std::vector see_px, const std::vector see_py, const std::vector see_pz, @@ -208,7 +206,6 @@ namespace SDL { const unsigned int* hitIndices); // Input and output vectors - TString TrackLooperDir_; std::vector in_trkX_; std::vector in_trkY_; std::vector in_trkZ_; diff --git a/SDL/Makefile b/SDL/Makefile index 8167def4..da0d2b7e 100644 --- a/SDL/Makefile +++ b/SDL/Makefile @@ -33,7 +33,11 @@ GENCODE_SM89 := -gencode arch=compute_89,code=sm_89 CXX = g++ CXXFLAGS_CPU = -march=native -mtune=native -Ofast -fno-reciprocal-math -fopenmp-simd -g -Wall -Wshadow -Woverloaded-virtual -fPIC -fopenmp -I.. CXXFLAGS_CUDA = -O3 -g --compiler-options -Wall --compiler-options -Wshadow --compiler-options -Woverloaded-virtual --compiler-options -fPIC --compiler-options -fopenmp -dc -lineinfo --ptxas-options=-v --cudart shared $(GENCODE_SM70) $(GENCODE_SM89) --use_fast_math --default-stream per-thread -I.. -ALPAKAINCLUDE = -I${ALPAKA_ROOT}/include -I/${BOOST_ROOT}/include -std=c++17 -I$(CMSSW_BASE)/src +CMSSWINCLUDE := -I${CMSSW_BASE}/src +ifdef CMSSW_RELEASE_BASE +CMSSWINCLUDE := ${CMSSWINCLUDE} -I${CMSSW_RELEASE_BASE}/src +endif +ALPAKAINCLUDE = -I${ALPAKA_ROOT}/include -I/${BOOST_ROOT}/include -std=c++17 ${CMSSWINCLUDE} ALPAKASERIAL = -DALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED ALPAKACUDA = -DALPAKA_ACC_GPU_CUDA_ENABLED -DALPAKA_ACC_GPU_CUDA_ONLY --expt-relaxed-constexpr ROOTCFLAGS = -pthread -m64 -I$(ROOT_ROOT)/include diff --git a/SDL/MiniDoublet.h b/SDL/MiniDoublet.h index 15692eb8..898e97fe 100644 --- a/SDL/MiniDoublet.h +++ b/SDL/MiniDoublet.h @@ -93,50 +93,50 @@ namespace SDL { } }; - template + template struct miniDoubletsBuffer : miniDoublets { - Buf nMemoryLocations_buf; - - Buf anchorHitIndices_buf; - Buf outerHitIndices_buf; - Buf moduleIndices_buf; - Buf nMDs_buf; - Buf totOccupancyMDs_buf; - Buf dphichanges_buf; - - Buf dzs_buf; - Buf dphis_buf; - - Buf shiftedXs_buf; - Buf shiftedYs_buf; - Buf shiftedZs_buf; - Buf noShiftedDzs_buf; - Buf noShiftedDphis_buf; - Buf noShiftedDphiChanges_buf; - - Buf anchorX_buf; - Buf anchorY_buf; - Buf anchorZ_buf; - Buf anchorRt_buf; - Buf anchorPhi_buf; - Buf anchorEta_buf; - Buf anchorHighEdgeX_buf; - Buf anchorHighEdgeY_buf; - Buf anchorLowEdgeX_buf; - Buf anchorLowEdgeY_buf; - Buf anchorLowEdgePhi_buf; - Buf anchorHighEdgePhi_buf; - - Buf outerX_buf; - Buf outerY_buf; - Buf outerZ_buf; - Buf outerRt_buf; - Buf outerPhi_buf; - Buf outerEta_buf; - Buf outerHighEdgeX_buf; - Buf outerHighEdgeY_buf; - Buf outerLowEdgeX_buf; - Buf outerLowEdgeY_buf; + Buf nMemoryLocations_buf; + + Buf anchorHitIndices_buf; + Buf outerHitIndices_buf; + Buf moduleIndices_buf; + Buf nMDs_buf; + Buf totOccupancyMDs_buf; + Buf dphichanges_buf; + + Buf dzs_buf; + Buf dphis_buf; + + Buf shiftedXs_buf; + Buf shiftedYs_buf; + Buf shiftedZs_buf; + Buf noShiftedDzs_buf; + Buf noShiftedDphis_buf; + Buf noShiftedDphiChanges_buf; + + Buf anchorX_buf; + Buf anchorY_buf; + Buf anchorZ_buf; + Buf anchorRt_buf; + Buf anchorPhi_buf; + Buf anchorEta_buf; + Buf anchorHighEdgeX_buf; + Buf anchorHighEdgeY_buf; + Buf anchorLowEdgeX_buf; + Buf anchorLowEdgeY_buf; + Buf anchorLowEdgePhi_buf; + Buf anchorHighEdgePhi_buf; + + Buf outerX_buf; + Buf outerY_buf; + Buf outerZ_buf; + Buf outerRt_buf; + Buf outerPhi_buf; + Buf outerEta_buf; + Buf outerHighEdgeX_buf; + Buf outerHighEdgeY_buf; + Buf outerLowEdgeX_buf; + Buf outerLowEdgeY_buf; template miniDoubletsBuffer(unsigned int nMemoryLoc, uint16_t nLowerModules, TDevAcc const& devAccIn, TQueue& queue) @@ -445,12 +445,12 @@ namespace SDL { float drprime; // The radial shift size in x-y plane projection float drprime_x; // x-component of drprime float drprime_y; // y-component of drprime - float& slope = + const float& slope = modulesInGPU.slopes[lowerModuleIndex]; // The slope of the possible strip hits for a given module in x-y plane float absArctanSlope; float angleM; // the angle M is the angle of rotation of the module in x-y plane if the possible strip hits are along the x-axis, then angleM = 0, and if the possible strip hits are along y-axis angleM = 90 degrees float absdzprime; // The distance between the two points after shifting - float& drdz_ = modulesInGPU.drdzs[lowerModuleIndex]; + const float& drdz_ = modulesInGPU.drdzs[lowerModuleIndex]; // Assign hit pointers based on their hit type if (modulesInGPU.moduleType[lowerModuleIndex] == PS) { // TODO: This is somewhat of an mystery.... somewhat confused why this is the case diff --git a/SDL/Module.h b/SDL/Module.h index 5c47bfe9..14106035 100644 --- a/SDL/Module.h +++ b/SDL/Module.h @@ -1,13 +1,8 @@ #ifndef Module_cuh #define Module_cuh -#include -#include - +#include #include "Constants.h" -#include "TiltedGeometry.h" -#include "EndcapGeometry.h" -#include "ModuleConnectionMap.h" namespace SDL { enum SubDet { InnerPixel = 0, Barrel = 5, Endcap = 4 }; @@ -18,14 +13,6 @@ namespace SDL { enum ModuleLayerType { Pixel, Strip, InnerPixelLayer }; - // TODO: Change this to remove it from global scope. - inline std::map* detIdToIndex; - inline std::map* module_x; - inline std::map* module_y; - inline std::map* module_z; - inline std::map* module_type; // 23 : Ph2PSP, 24 : Ph2PSS, 25 : Ph2SS - // https://github.com/cms-sw/cmssw/blob/5e809e8e0a625578aa265dc4b128a93830cb5429/Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h#L29 - struct objectRanges { int* hitRanges; int* hitRangesLower; @@ -92,36 +79,36 @@ namespace SDL { } }; - template + template struct objectRangesBuffer : objectRanges { - Buf hitRanges_buf; - Buf hitRangesLower_buf; - Buf hitRangesUpper_buf; - Buf hitRangesnLower_buf; - Buf hitRangesnUpper_buf; - Buf mdRanges_buf; - Buf segmentRanges_buf; - Buf trackletRanges_buf; - Buf tripletRanges_buf; - Buf trackCandidateRanges_buf; - Buf quintupletRanges_buf; - - Buf nEligibleT5Modules_buf; - Buf indicesOfEligibleT5Modules_buf; - - Buf quintupletModuleIndices_buf; - Buf quintupletModuleOccupancy_buf; - Buf miniDoubletModuleIndices_buf; - Buf miniDoubletModuleOccupancy_buf; - Buf segmentModuleIndices_buf; - Buf segmentModuleOccupancy_buf; - Buf tripletModuleIndices_buf; - Buf tripletModuleOccupancy_buf; - - Buf device_nTotalMDs_buf; - Buf device_nTotalSegs_buf; - Buf device_nTotalTrips_buf; - Buf device_nTotalQuints_buf; + Buf hitRanges_buf; + Buf hitRangesLower_buf; + Buf hitRangesUpper_buf; + Buf hitRangesnLower_buf; + Buf hitRangesnUpper_buf; + Buf mdRanges_buf; + Buf segmentRanges_buf; + Buf trackletRanges_buf; + Buf tripletRanges_buf; + Buf trackCandidateRanges_buf; + Buf quintupletRanges_buf; + + Buf nEligibleT5Modules_buf; + Buf indicesOfEligibleT5Modules_buf; + + Buf quintupletModuleIndices_buf; + Buf quintupletModuleOccupancy_buf; + Buf miniDoubletModuleIndices_buf; + Buf miniDoubletModuleOccupancy_buf; + Buf segmentModuleIndices_buf; + Buf segmentModuleOccupancy_buf; + Buf tripletModuleIndices_buf; + Buf tripletModuleOccupancy_buf; + + Buf device_nTotalMDs_buf; + Buf device_nTotalSegs_buf; + Buf device_nTotalTrips_buf; + Buf device_nTotalQuints_buf; template objectRangesBuffer(unsigned int nMod, unsigned int nLowerMod, TDevAcc const& devAccIn, TQueue& queue) @@ -167,42 +154,41 @@ namespace SDL { }; struct modules { - unsigned int* detIds; - uint16_t* moduleMap; - unsigned int* mapdetId; - uint16_t* mapIdx; - uint16_t* nConnectedModules; - float* drdzs; - float* slopes; - uint16_t* nModules; - uint16_t* nLowerModules; - uint16_t* partnerModuleIndices; - - short* layers; - short* rings; - short* modules; - short* rods; - short* subdets; - short* sides; - float* eta; - float* r; - bool* isInverted; - bool* isLower; - bool* isAnchor; - ModuleType* moduleType; - ModuleLayerType* moduleLayerType; - int* sdlLayers; - - unsigned int* connectedPixels; - - bool parseIsInverted(short subdet, short side, short module, short layer) { + const unsigned int* detIds; + const uint16_t* moduleMap; + const unsigned int* mapdetId; + const uint16_t* mapIdx; + const uint16_t* nConnectedModules; + const float* drdzs; + const float* slopes; + const uint16_t* nModules; + const uint16_t* nLowerModules; + const uint16_t* partnerModuleIndices; + + const short* layers; + const short* rings; + const short* modules; + const short* rods; + const short* subdets; + const short* sides; + const float* eta; + const float* r; + const bool* isInverted; + const bool* isLower; + const bool* isAnchor; + const ModuleType* moduleType; + const ModuleLayerType* moduleLayerType; + const int* sdlLayers; + const unsigned int* connectedPixels; + + static bool parseIsInverted(short subdet, short side, short module, short layer) { if (subdet == Endcap) { if (side == NegZ) { return module % 2 == 1; } else if (side == PosZ) { return module % 2 == 0; } else { - return 0; + return false; } } else if (subdet == Barrel) { if (side == Center) { @@ -211,7 +197,7 @@ namespace SDL { } else if (layer >= 4) { return module % 2 == 0; } else { - return 0; + return false; } } else if (side == NegZ or side == PosZ) { if (layer <= 2) { @@ -219,24 +205,26 @@ namespace SDL { } else if (layer == 3) { return module % 2 == 0; } else { - return 0; + return false; } } else { - return 0; + return false; } } else { - return 0; + return false; } }; - bool parseIsLower(bool isInvertedx, unsigned int detId) { return (isInvertedx) ? !(detId & 1) : (detId & 1); }; + static bool parseIsLower(bool isInvertedx, unsigned int detId) { + return (isInvertedx) ? !(detId & 1) : (detId & 1); + }; - unsigned int parsePartnerModuleId(unsigned int detId, bool isLowerx, bool isInvertedx) { + static unsigned int parsePartnerModuleId(unsigned int detId, bool isLowerx, bool isInvertedx) { return isLowerx ? (isInvertedx ? detId - 1 : detId + 1) : (isInvertedx ? detId + 1 : detId - 1); }; template - void setData(TBuff& modulesbuf) { + void setData(const TBuff& modulesbuf) { detIds = alpaka::getPtrNative(modulesbuf.detIds_buf); moduleMap = alpaka::getPtrNative(modulesbuf.moduleMap_buf); mapdetId = alpaka::getPtrNative(modulesbuf.mapdetId_buf); @@ -261,41 +249,39 @@ namespace SDL { isAnchor = alpaka::getPtrNative(modulesbuf.isAnchor_buf); moduleType = alpaka::getPtrNative(modulesbuf.moduleType_buf); moduleLayerType = alpaka::getPtrNative(modulesbuf.moduleLayerType_buf); - - connectedPixels = alpaka::getPtrNative(modulesbuf.connectedPixels_buf); sdlLayers = alpaka::getPtrNative(modulesbuf.sdlLayers_buf); + connectedPixels = alpaka::getPtrNative(modulesbuf.connectedPixels_buf); } }; - template + template struct modulesBuffer : modules { - Buf detIds_buf; - Buf moduleMap_buf; - Buf mapdetId_buf; - Buf mapIdx_buf; - Buf nConnectedModules_buf; - Buf drdzs_buf; - Buf slopes_buf; - Buf nModules_buf; - Buf nLowerModules_buf; - Buf partnerModuleIndices_buf; - - Buf layers_buf; - Buf rings_buf; - Buf modules_buf; - Buf rods_buf; - Buf subdets_buf; - Buf sides_buf; - Buf eta_buf; - Buf r_buf; - Buf isInverted_buf; - Buf isLower_buf; - Buf isAnchor_buf; - Buf moduleType_buf; - Buf moduleLayerType_buf; - - Buf connectedPixels_buf; - Buf sdlLayers_buf; + Buf detIds_buf; + Buf moduleMap_buf; + Buf mapdetId_buf; + Buf mapIdx_buf; + Buf nConnectedModules_buf; + Buf drdzs_buf; + Buf slopes_buf; + Buf nModules_buf; + Buf nLowerModules_buf; + Buf partnerModuleIndices_buf; + + Buf layers_buf; + Buf rings_buf; + Buf modules_buf; + Buf rods_buf; + Buf subdets_buf; + Buf sides_buf; + Buf eta_buf; + Buf r_buf; + Buf isInverted_buf; + Buf isLower_buf; + Buf isAnchor_buf; + Buf moduleType_buf; + Buf moduleLayerType_buf; + Buf sdlLayers_buf; + Buf connectedPixels_buf; template modulesBuffer(TDevAcc const& devAccIn, unsigned int nMod = modules_size, unsigned int nPixs = pix_tot) @@ -324,419 +310,48 @@ namespace SDL { moduleType_buf(allocBufWrapper(devAccIn, nMod)), moduleLayerType_buf(allocBufWrapper(devAccIn, nMod)), sdlLayers_buf(allocBufWrapper(devAccIn, nMod)), - connectedPixels_buf(allocBufWrapper(devAccIn, nPixs)) {} - }; - - // PixelMap is never allocated on the device. - // This is also not passed to any of the kernels, so we can combine the structs. - struct pixelMap { - Buf connectedPixelsIndex_buf; - Buf connectedPixelsSizes_buf; - Buf connectedPixelsIndexPos_buf; - Buf connectedPixelsSizesPos_buf; - Buf connectedPixelsIndexNeg_buf; - Buf connectedPixelsSizesNeg_buf; - - unsigned int* connectedPixelsIndex; - unsigned int* connectedPixelsSizes; - unsigned int* connectedPixelsIndexPos; - unsigned int* connectedPixelsSizesPos; - unsigned int* connectedPixelsIndexNeg; - unsigned int* connectedPixelsSizesNeg; - - int* pixelType; - - pixelMap(unsigned int sizef = size_superbins) - : connectedPixelsIndex_buf(allocBufWrapper(devHost, sizef)), - connectedPixelsSizes_buf(allocBufWrapper(devHost, sizef)), - connectedPixelsIndexPos_buf(allocBufWrapper(devHost, sizef)), - connectedPixelsSizesPos_buf(allocBufWrapper(devHost, sizef)), - connectedPixelsIndexNeg_buf(allocBufWrapper(devHost, sizef)), - connectedPixelsSizesNeg_buf(allocBufWrapper(devHost, sizef)) { - connectedPixelsIndex = alpaka::getPtrNative(connectedPixelsIndex_buf); - connectedPixelsSizes = alpaka::getPtrNative(connectedPixelsSizes_buf); - connectedPixelsIndexPos = alpaka::getPtrNative(connectedPixelsIndexPos_buf); - connectedPixelsSizesPos = alpaka::getPtrNative(connectedPixelsSizesPos_buf); - connectedPixelsIndexNeg = alpaka::getPtrNative(connectedPixelsIndexNeg_buf); - connectedPixelsSizesNeg = alpaka::getPtrNative(connectedPixelsSizesNeg_buf); - } - }; - - template - inline void fillPixelMap(struct modulesBuffer* modulesBuf, struct pixelMap& pixelMapping, TQueue queue) { - std::vector connectedModuleDetIds; - std::vector connectedModuleDetIds_pos; - std::vector connectedModuleDetIds_neg; - - int totalSizes = 0; - int totalSizes_pos = 0; - int totalSizes_neg = 0; - for (unsigned int isuperbin = 0; isuperbin < size_superbins; isuperbin++) { - int sizes = 0; - for (auto const& mCM_pLS : moduleConnectionMap_pLStoLayer) { - std::vector connectedModuleDetIds_pLS = - mCM_pLS.getConnectedModuleDetIds(isuperbin + size_superbins); - connectedModuleDetIds.insert( - connectedModuleDetIds.end(), connectedModuleDetIds_pLS.begin(), connectedModuleDetIds_pLS.end()); - sizes += connectedModuleDetIds_pLS.size(); - } - pixelMapping.connectedPixelsIndex[isuperbin] = totalSizes; - pixelMapping.connectedPixelsSizes[isuperbin] = sizes; - totalSizes += sizes; - - int sizes_pos = 0; - for (auto const& mCM_pLS : moduleConnectionMap_pLStoLayer_pos) { - std::vector connectedModuleDetIds_pLS_pos = mCM_pLS.getConnectedModuleDetIds(isuperbin); - connectedModuleDetIds_pos.insert(connectedModuleDetIds_pos.end(), - connectedModuleDetIds_pLS_pos.begin(), - connectedModuleDetIds_pLS_pos.end()); - sizes_pos += connectedModuleDetIds_pLS_pos.size(); - } - pixelMapping.connectedPixelsIndexPos[isuperbin] = totalSizes_pos; - pixelMapping.connectedPixelsSizesPos[isuperbin] = sizes_pos; - totalSizes_pos += sizes_pos; - - int sizes_neg = 0; - for (auto const& mCM_pLS : moduleConnectionMap_pLStoLayer_neg) { - std::vector connectedModuleDetIds_pLS_neg = mCM_pLS.getConnectedModuleDetIds(isuperbin); - connectedModuleDetIds_neg.insert(connectedModuleDetIds_neg.end(), - connectedModuleDetIds_pLS_neg.begin(), - connectedModuleDetIds_pLS_neg.end()); - sizes_neg += connectedModuleDetIds_pLS_neg.size(); - } - pixelMapping.connectedPixelsIndexNeg[isuperbin] = totalSizes_neg; - pixelMapping.connectedPixelsSizesNeg[isuperbin] = sizes_neg; - totalSizes_neg += sizes_neg; - } - int connectedPix_size = totalSizes + totalSizes_pos + totalSizes_neg; - - // Temporary check for module initialization. - if (pix_tot != connectedPix_size) { - std::cerr << "\nError: pix_tot and connectedPix_size are not equal.\n"; - std::cerr << "pix_tot: " << pix_tot << ", connectedPix_size: " << connectedPix_size << "\n"; - std::cerr << "Please change pix_tot in Constants.h to make it equal to connectedPix_size.\n"; - throw std::runtime_error("Mismatched sizes"); - } - - auto connectedPixels_buf = allocBufWrapper(devHost, connectedPix_size); - unsigned int* connectedPixels = alpaka::getPtrNative(connectedPixels_buf); - - for (int icondet = 0; icondet < totalSizes; icondet++) { - connectedPixels[icondet] = (*detIdToIndex)[connectedModuleDetIds[icondet]]; - } - for (int icondet = 0; icondet < totalSizes_pos; icondet++) { - connectedPixels[icondet + totalSizes] = (*detIdToIndex)[connectedModuleDetIds_pos[icondet]]; - } - for (int icondet = 0; icondet < totalSizes_neg; icondet++) { - connectedPixels[icondet + totalSizes + totalSizes_pos] = (*detIdToIndex)[connectedModuleDetIds_neg[icondet]]; - } - - alpaka::memcpy(queue, modulesBuf->connectedPixels_buf, connectedPixels_buf, connectedPix_size); - alpaka::wait(queue); - }; - - template - inline void fillConnectedModuleArrayExplicit(struct modulesBuffer* modulesBuf, - unsigned int nMod, - TQueue queue) { - auto moduleMap_buf = allocBufWrapper(devHost, nMod * 40); - uint16_t* moduleMap = alpaka::getPtrNative(moduleMap_buf); - - auto nConnectedModules_buf = allocBufWrapper(devHost, nMod); - uint16_t* nConnectedModules = alpaka::getPtrNative(nConnectedModules_buf); - - for (auto it = (*detIdToIndex).begin(); it != (*detIdToIndex).end(); ++it) { - unsigned int detId = it->first; - uint16_t index = it->second; - auto& connectedModules = moduleConnectionMap.getConnectedModuleDetIds(detId); - nConnectedModules[index] = connectedModules.size(); - for (uint16_t i = 0; i < nConnectedModules[index]; i++) { - moduleMap[index * 40 + i] = (*detIdToIndex)[connectedModules[i]]; - } + template + inline void copyFromSrc(TQueue queue, const modulesBuffer& src) { + alpaka::memcpy(queue, detIds_buf, src.detIds_buf); + alpaka::memcpy(queue, moduleMap_buf, src.moduleMap_buf); + alpaka::memcpy(queue, mapdetId_buf, src.mapdetId_buf); + alpaka::memcpy(queue, mapIdx_buf, src.mapIdx_buf); + alpaka::memcpy(queue, nConnectedModules_buf, src.nConnectedModules_buf); + alpaka::memcpy(queue, drdzs_buf, src.drdzs_buf); + alpaka::memcpy(queue, slopes_buf, src.slopes_buf); + alpaka::memcpy(queue, nModules_buf, src.nModules_buf); + alpaka::memcpy(queue, nLowerModules_buf, src.nLowerModules_buf); + alpaka::memcpy(queue, partnerModuleIndices_buf, src.partnerModuleIndices_buf); + + alpaka::memcpy(queue, layers_buf, src.layers_buf); + alpaka::memcpy(queue, rings_buf, src.rings_buf); + alpaka::memcpy(queue, modules_buf, src.modules_buf); + alpaka::memcpy(queue, rods_buf, src.rods_buf); + alpaka::memcpy(queue, subdets_buf, src.subdets_buf); + alpaka::memcpy(queue, sides_buf, src.sides_buf); + alpaka::memcpy(queue, eta_buf, src.eta_buf); + alpaka::memcpy(queue, r_buf, src.r_buf); + alpaka::memcpy(queue, isInverted_buf, src.isInverted_buf); + alpaka::memcpy(queue, isLower_buf, src.isLower_buf); + alpaka::memcpy(queue, isAnchor_buf, src.isAnchor_buf); + alpaka::memcpy(queue, moduleType_buf, src.moduleType_buf); + alpaka::memcpy(queue, moduleLayerType_buf, src.moduleLayerType_buf); + alpaka::memcpy(queue, sdlLayers_buf, src.sdlLayers_buf); + alpaka::memcpy(queue, connectedPixels_buf, src.connectedPixels_buf); + alpaka::wait(queue); } - alpaka::memcpy(queue, modulesBuf->moduleMap_buf, moduleMap_buf, nMod * 40); - alpaka::memcpy(queue, modulesBuf->nConnectedModules_buf, nConnectedModules_buf, nMod); - alpaka::wait(queue); - }; - - template - inline void fillMapArraysExplicit(struct modulesBuffer* modulesBuf, unsigned int nMod, TQueue queue) { - auto mapIdx_buf = allocBufWrapper(devHost, nMod); - uint16_t* mapIdx = alpaka::getPtrNative(mapIdx_buf); - - auto mapdetId_buf = allocBufWrapper(devHost, nMod); - unsigned int* mapdetId = alpaka::getPtrNative(mapdetId_buf); - - unsigned int counter = 0; - for (auto it = (*detIdToIndex).begin(); it != (*detIdToIndex).end(); ++it) { - unsigned int detId = it->first; - unsigned int index = it->second; - mapIdx[counter] = index; - mapdetId[counter] = detId; - counter++; + template + modulesBuffer(TQueue queue, + const modulesBuffer& src, + unsigned int nMod = modules_size, + unsigned int nPixs = pix_tot) + : modulesBuffer(alpaka::getDev(queue), nMod, nPixs) { + copyFromSrc(queue, src); } - - alpaka::memcpy(queue, modulesBuf->mapIdx_buf, mapIdx_buf, nMod); - alpaka::memcpy(queue, modulesBuf->mapdetId_buf, mapdetId_buf, nMod); - alpaka::wait(queue); }; - inline void setDerivedQuantities(unsigned int detId, - unsigned short& layer, - unsigned short& ring, - unsigned short& rod, - unsigned short& module, - unsigned short& subdet, - unsigned short& side, - float m_x, - float m_y, - float m_z, - float& eta, - float& r) { - subdet = (detId & (7 << 25)) >> 25; - side = (subdet == Endcap) ? (detId & (3 << 23)) >> 23 : (detId & (3 << 18)) >> 18; - layer = (subdet == Endcap) ? (detId & (7 << 18)) >> 18 : (detId & (7 << 20)) >> 20; - ring = (subdet == Endcap) ? (detId & (15 << 12)) >> 12 : 0; - module = (detId & (127 << 2)) >> 2; - rod = (subdet == Endcap) ? 0 : (detId & (127 << 10)) >> 10; - - r = std::sqrt(m_x * m_x + m_y * m_y + m_z * m_z); - eta = ((m_z > 0) - (m_z < 0)) * std::acosh(r / std::sqrt(m_x * m_x + m_y * m_y)); - }; - - template - void loadModulesFromFile(struct modules* modulesInGPU, - struct modulesBuffer* modulesBuf, - uint16_t& nModules, - uint16_t& nLowerModules, - struct pixelMap& pixelMapping, - TQueue& queue, - const char* moduleMetaDataFilePath) { - detIdToIndex = new std::map; - module_x = new std::map; - module_y = new std::map; - module_z = new std::map; - module_type = new std::map; - - /* Load the whole text file into the map first*/ - - std::ifstream ifile; - ifile.open(moduleMetaDataFilePath); - if (!ifile.is_open()) { - std::cout << "ERROR! module list file not present!" << std::endl; - } - std::string line; - uint16_t counter = 0; - - while (std::getline(ifile, line)) { - std::stringstream ss(line); - std::string token; - int count_number = 0; - - unsigned int temp_detId; - while (std::getline(ss, token, ',')) { - if (count_number == 0) { - temp_detId = stoi(token); - (*detIdToIndex)[temp_detId] = counter; - } - if (count_number == 1) - (*module_x)[temp_detId] = std::stof(token); - if (count_number == 2) - (*module_y)[temp_detId] = std::stof(token); - if (count_number == 3) - (*module_z)[temp_detId] = std::stof(token); - if (count_number == 4) { - (*module_type)[temp_detId] = std::stoi(token); - counter++; - } - count_number++; - if (count_number > 4) - break; - } - } - - (*detIdToIndex)[1] = counter; //pixel module is the last module in the module list - counter++; - nModules = counter; - - // Temporary check for module initialization. - if (modules_size != nModules) { - std::cerr << "\nError: modules_size and nModules are not equal.\n"; - std::cerr << "modules_size: " << modules_size << ", nModules: " << nModules << "\n"; - std::cerr << "Please change modules_size in Constants.h to make it equal to nModules.\n"; - throw std::runtime_error("Mismatched sizes"); - } - - auto detIds_buf = allocBufWrapper(devHost, nModules); - auto layers_buf = allocBufWrapper(devHost, nModules); - auto rings_buf = allocBufWrapper(devHost, nModules); - auto rods_buf = allocBufWrapper(devHost, nModules); - auto modules_buf = allocBufWrapper(devHost, nModules); - auto subdets_buf = allocBufWrapper(devHost, nModules); - auto sides_buf = allocBufWrapper(devHost, nModules); - auto eta_buf = allocBufWrapper(devHost, nModules); - auto r_buf = allocBufWrapper(devHost, nModules); - auto isInverted_buf = allocBufWrapper(devHost, nModules); - auto isLower_buf = allocBufWrapper(devHost, nModules); - auto isAnchor_buf = allocBufWrapper(devHost, nModules); - auto moduleType_buf = allocBufWrapper(devHost, nModules); - auto moduleLayerType_buf = allocBufWrapper(devHost, nModules); - auto slopes_buf = allocBufWrapper(devHost, nModules); - auto drdzs_buf = allocBufWrapper(devHost, nModules); - auto partnerModuleIndices_buf = allocBufWrapper(devHost, nModules); - auto sdlLayers_buf = allocBufWrapper(devHost, nModules); - - // Getting the underlying data pointers - unsigned int* host_detIds = alpaka::getPtrNative(detIds_buf); - short* host_layers = alpaka::getPtrNative(layers_buf); - short* host_rings = alpaka::getPtrNative(rings_buf); - short* host_rods = alpaka::getPtrNative(rods_buf); - short* host_modules = alpaka::getPtrNative(modules_buf); - short* host_subdets = alpaka::getPtrNative(subdets_buf); - short* host_sides = alpaka::getPtrNative(sides_buf); - float* host_eta = alpaka::getPtrNative(eta_buf); - float* host_r = alpaka::getPtrNative(r_buf); - bool* host_isInverted = alpaka::getPtrNative(isInverted_buf); - bool* host_isLower = alpaka::getPtrNative(isLower_buf); - bool* host_isAnchor = alpaka::getPtrNative(isAnchor_buf); - ModuleType* host_moduleType = alpaka::getPtrNative(moduleType_buf); - ModuleLayerType* host_moduleLayerType = alpaka::getPtrNative(moduleLayerType_buf); - float* host_slopes = alpaka::getPtrNative(slopes_buf); - float* host_drdzs = alpaka::getPtrNative(drdzs_buf); - uint16_t* host_partnerModuleIndices = alpaka::getPtrNative(partnerModuleIndices_buf); - int* host_sdlLayers = alpaka::getPtrNative(sdlLayers_buf); - - //reassign detIdToIndex indices here - nLowerModules = (nModules - 1) / 2; - uint16_t lowerModuleCounter = 0; - uint16_t upperModuleCounter = nLowerModules + 1; - //0 to nLowerModules - 1 => only lower modules, nLowerModules - pixel module, nLowerModules + 1 to nModules => upper modules - for (auto it = (*detIdToIndex).begin(); it != (*detIdToIndex).end(); it++) { - unsigned int detId = it->first; - float m_x = (*module_x)[detId]; - float m_y = (*module_y)[detId]; - float m_z = (*module_z)[detId]; - unsigned int m_t = (*module_type)[detId]; - - float eta, r; - - uint16_t index; - unsigned short layer, ring, rod, module, subdet, side; - bool isInverted, isLower; - if (detId == 1) { - layer = 0; - ring = 0; - rod = 0; - module = 0; - subdet = 0; - side = 0; - isInverted = false; - isLower = false; - } else { - setDerivedQuantities(detId, layer, ring, rod, module, subdet, side, m_x, m_y, m_z, eta, r); - isInverted = modulesInGPU->parseIsInverted(subdet, side, module, layer); - isLower = modulesInGPU->parseIsLower(isInverted, detId); - } - if (isLower) { - index = lowerModuleCounter; - lowerModuleCounter++; - } else if (detId != 1) { - index = upperModuleCounter; - upperModuleCounter++; - } else { - index = nLowerModules; //pixel - } - //reassigning indices! - (*detIdToIndex)[detId] = index; - host_detIds[index] = detId; - host_layers[index] = layer; - host_rings[index] = ring; - host_rods[index] = rod; - host_modules[index] = module; - host_subdets[index] = subdet; - host_sides[index] = side; - host_eta[index] = eta; - host_r[index] = r; - host_isInverted[index] = isInverted; - host_isLower[index] = isLower; - - //assigning other variables! - if (detId == 1) { - host_moduleType[index] = PixelModule; - host_moduleLayerType[index] = SDL::InnerPixelLayer; - host_slopes[index] = 0; - host_drdzs[index] = 0; - host_isAnchor[index] = false; - } else { - host_moduleType[index] = (m_t == 25 ? SDL::TwoS : SDL::PS); - host_moduleLayerType[index] = (m_t == 23 ? SDL::Pixel : SDL::Strip); - - if (host_moduleType[index] == SDL::PS and host_moduleLayerType[index] == SDL::Pixel) { - host_isAnchor[index] = true; - } else if (host_moduleType[index] == SDL::TwoS and host_isLower[index]) { - host_isAnchor[index] = true; - } else { - host_isAnchor[index] = false; - } - - host_slopes[index] = (subdet == Endcap) ? endcapGeometry->getSlopeLower(detId) : tiltedGeometry.getSlope(detId); - host_drdzs[index] = (subdet == Barrel) ? tiltedGeometry.getDrDz(detId) : 0; - } - - host_sdlLayers[index] = - layer + 6 * (subdet == SDL::Endcap) + 5 * (subdet == SDL::Endcap and host_moduleType[index] == SDL::TwoS); - } - - //partner module stuff, and slopes and drdz move around - for (auto it = (*detIdToIndex).begin(); it != (*detIdToIndex).end(); it++) { - auto& detId = it->first; - auto& index = it->second; - if (detId != 1) { - host_partnerModuleIndices[index] = - (*detIdToIndex)[modulesInGPU->parsePartnerModuleId(detId, host_isLower[index], host_isInverted[index])]; - //add drdz and slope importing stuff here! - if (host_drdzs[index] == 0) { - host_drdzs[index] = host_drdzs[host_partnerModuleIndices[index]]; - } - if (host_slopes[index] == 0) { - host_slopes[index] = host_slopes[host_partnerModuleIndices[index]]; - } - } - } - - auto src_view_nModules = alpaka::createView(devHost, &nModules, (Idx)1u); - alpaka::memcpy(queue, modulesBuf->nModules_buf, src_view_nModules); - - auto src_view_nLowerModules = alpaka::createView(devHost, &nLowerModules, (Idx)1u); - alpaka::memcpy(queue, modulesBuf->nLowerModules_buf, src_view_nLowerModules); - - alpaka::memcpy(queue, modulesBuf->moduleType_buf, moduleType_buf); - alpaka::memcpy(queue, modulesBuf->moduleLayerType_buf, moduleLayerType_buf); - - alpaka::memcpy(queue, modulesBuf->detIds_buf, detIds_buf); - alpaka::memcpy(queue, modulesBuf->layers_buf, layers_buf); - alpaka::memcpy(queue, modulesBuf->rings_buf, rings_buf); - alpaka::memcpy(queue, modulesBuf->rods_buf, rods_buf); - alpaka::memcpy(queue, modulesBuf->modules_buf, modules_buf); - alpaka::memcpy(queue, modulesBuf->subdets_buf, subdets_buf); - alpaka::memcpy(queue, modulesBuf->sides_buf, sides_buf); - alpaka::memcpy(queue, modulesBuf->eta_buf, eta_buf); - alpaka::memcpy(queue, modulesBuf->r_buf, r_buf); - alpaka::memcpy(queue, modulesBuf->isInverted_buf, isInverted_buf); - alpaka::memcpy(queue, modulesBuf->isLower_buf, isLower_buf); - alpaka::memcpy(queue, modulesBuf->isAnchor_buf, isAnchor_buf); - alpaka::memcpy(queue, modulesBuf->slopes_buf, slopes_buf); - alpaka::memcpy(queue, modulesBuf->drdzs_buf, drdzs_buf); - alpaka::memcpy(queue, modulesBuf->partnerModuleIndices_buf, partnerModuleIndices_buf); - alpaka::memcpy(queue, modulesBuf->sdlLayers_buf, sdlLayers_buf); - alpaka::wait(queue); - - fillConnectedModuleArrayExplicit(modulesBuf, nModules, queue); - fillMapArraysExplicit(modulesBuf, nModules, queue); - fillPixelMap(modulesBuf, pixelMapping, queue); - }; } // namespace SDL #endif diff --git a/SDL/ModuleMethods.h b/SDL/ModuleMethods.h new file mode 100644 index 00000000..54a9fa54 --- /dev/null +++ b/SDL/ModuleMethods.h @@ -0,0 +1,434 @@ +#ifndef ModuleMethods_cuh +#define ModuleMethods_cuh + +#include +#include + +#include "Constants.h" +#include "Module.h" +#include "TiltedGeometry.h" +#include "EndcapGeometry.h" +#include "ModuleConnectionMap.h" + +namespace SDL { + // TODO: Change this to remove it from global scope. + inline std::map* detIdToIndex; + inline std::map* module_x; + inline std::map* module_y; + inline std::map* module_z; + inline std::map* module_type; // 23 : Ph2PSP, 24 : Ph2PSS, 25 : Ph2SS + // https://github.com/cms-sw/cmssw/blob/5e809e8e0a625578aa265dc4b128a93830cb5429/Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h#L29 + + // PixelMap is never allocated on the device. + // This is also not passed to any of the kernels, so we can combine the structs. + struct pixelMap { + Buf connectedPixelsIndex_buf; + Buf connectedPixelsSizes_buf; + Buf connectedPixelsIndexPos_buf; + Buf connectedPixelsSizesPos_buf; + Buf connectedPixelsIndexNeg_buf; + Buf connectedPixelsSizesNeg_buf; + + unsigned int* connectedPixelsIndex; + unsigned int* connectedPixelsSizes; + unsigned int* connectedPixelsIndexPos; + unsigned int* connectedPixelsSizesPos; + unsigned int* connectedPixelsIndexNeg; + unsigned int* connectedPixelsSizesNeg; + + int* pixelType; + + pixelMap(unsigned int sizef = size_superbins) + : connectedPixelsIndex_buf(allocBufWrapper(devHost, sizef)), + connectedPixelsSizes_buf(allocBufWrapper(devHost, sizef)), + connectedPixelsIndexPos_buf(allocBufWrapper(devHost, sizef)), + connectedPixelsSizesPos_buf(allocBufWrapper(devHost, sizef)), + connectedPixelsIndexNeg_buf(allocBufWrapper(devHost, sizef)), + connectedPixelsSizesNeg_buf(allocBufWrapper(devHost, sizef)) { + connectedPixelsIndex = alpaka::getPtrNative(connectedPixelsIndex_buf); + connectedPixelsSizes = alpaka::getPtrNative(connectedPixelsSizes_buf); + connectedPixelsIndexPos = alpaka::getPtrNative(connectedPixelsIndexPos_buf); + connectedPixelsSizesPos = alpaka::getPtrNative(connectedPixelsSizesPos_buf); + connectedPixelsIndexNeg = alpaka::getPtrNative(connectedPixelsIndexNeg_buf); + connectedPixelsSizesNeg = alpaka::getPtrNative(connectedPixelsSizesNeg_buf); + } + }; + + template + inline void fillPixelMap(struct modulesBuffer* modulesBuf, struct pixelMap& pixelMapping, TQueue queue) { + std::vector connectedModuleDetIds; + std::vector connectedModuleDetIds_pos; + std::vector connectedModuleDetIds_neg; + + int totalSizes = 0; + int totalSizes_pos = 0; + int totalSizes_neg = 0; + for (unsigned int isuperbin = 0; isuperbin < size_superbins; isuperbin++) { + int sizes = 0; + for (auto const& mCM_pLS : moduleConnectionMap_pLStoLayer) { + std::vector connectedModuleDetIds_pLS = + mCM_pLS.getConnectedModuleDetIds(isuperbin + size_superbins); + connectedModuleDetIds.insert( + connectedModuleDetIds.end(), connectedModuleDetIds_pLS.begin(), connectedModuleDetIds_pLS.end()); + sizes += connectedModuleDetIds_pLS.size(); + } + pixelMapping.connectedPixelsIndex[isuperbin] = totalSizes; + pixelMapping.connectedPixelsSizes[isuperbin] = sizes; + totalSizes += sizes; + + int sizes_pos = 0; + for (auto const& mCM_pLS : moduleConnectionMap_pLStoLayer_pos) { + std::vector connectedModuleDetIds_pLS_pos = mCM_pLS.getConnectedModuleDetIds(isuperbin); + connectedModuleDetIds_pos.insert(connectedModuleDetIds_pos.end(), + connectedModuleDetIds_pLS_pos.begin(), + connectedModuleDetIds_pLS_pos.end()); + sizes_pos += connectedModuleDetIds_pLS_pos.size(); + } + pixelMapping.connectedPixelsIndexPos[isuperbin] = totalSizes_pos; + pixelMapping.connectedPixelsSizesPos[isuperbin] = sizes_pos; + totalSizes_pos += sizes_pos; + + int sizes_neg = 0; + for (auto const& mCM_pLS : moduleConnectionMap_pLStoLayer_neg) { + std::vector connectedModuleDetIds_pLS_neg = mCM_pLS.getConnectedModuleDetIds(isuperbin); + connectedModuleDetIds_neg.insert(connectedModuleDetIds_neg.end(), + connectedModuleDetIds_pLS_neg.begin(), + connectedModuleDetIds_pLS_neg.end()); + sizes_neg += connectedModuleDetIds_pLS_neg.size(); + } + pixelMapping.connectedPixelsIndexNeg[isuperbin] = totalSizes_neg; + pixelMapping.connectedPixelsSizesNeg[isuperbin] = sizes_neg; + totalSizes_neg += sizes_neg; + } + + int connectedPix_size = totalSizes + totalSizes_pos + totalSizes_neg; + + // Temporary check for module initialization. + if (pix_tot != connectedPix_size) { + std::cerr << "\nError: pix_tot and connectedPix_size are not equal.\n"; + std::cerr << "pix_tot: " << pix_tot << ", connectedPix_size: " << connectedPix_size << "\n"; + std::cerr << "Please change pix_tot in Constants.h to make it equal to connectedPix_size.\n"; + throw std::runtime_error("Mismatched sizes"); + } + + auto connectedPixels_buf = allocBufWrapper(devHost, connectedPix_size); + unsigned int* connectedPixels = alpaka::getPtrNative(connectedPixels_buf); + + for (int icondet = 0; icondet < totalSizes; icondet++) { + connectedPixels[icondet] = (*detIdToIndex)[connectedModuleDetIds[icondet]]; + } + for (int icondet = 0; icondet < totalSizes_pos; icondet++) { + connectedPixels[icondet + totalSizes] = (*detIdToIndex)[connectedModuleDetIds_pos[icondet]]; + } + for (int icondet = 0; icondet < totalSizes_neg; icondet++) { + connectedPixels[icondet + totalSizes + totalSizes_pos] = (*detIdToIndex)[connectedModuleDetIds_neg[icondet]]; + } + + alpaka::memcpy(queue, modulesBuf->connectedPixels_buf, connectedPixels_buf, connectedPix_size); + alpaka::wait(queue); + }; + + template + inline void fillConnectedModuleArrayExplicit(struct modulesBuffer* modulesBuf, + unsigned int nMod, + TQueue queue) { + auto moduleMap_buf = allocBufWrapper(devHost, nMod * 40); + uint16_t* moduleMap = alpaka::getPtrNative(moduleMap_buf); + + auto nConnectedModules_buf = allocBufWrapper(devHost, nMod); + uint16_t* nConnectedModules = alpaka::getPtrNative(nConnectedModules_buf); + + for (auto it = (*detIdToIndex).begin(); it != (*detIdToIndex).end(); ++it) { + unsigned int detId = it->first; + uint16_t index = it->second; + auto& connectedModules = moduleConnectionMap.getConnectedModuleDetIds(detId); + nConnectedModules[index] = connectedModules.size(); + for (uint16_t i = 0; i < nConnectedModules[index]; i++) { + moduleMap[index * 40 + i] = (*detIdToIndex)[connectedModules[i]]; + } + } + + alpaka::memcpy(queue, modulesBuf->moduleMap_buf, moduleMap_buf, nMod * 40); + alpaka::memcpy(queue, modulesBuf->nConnectedModules_buf, nConnectedModules_buf, nMod); + alpaka::wait(queue); + }; + + template + inline void fillMapArraysExplicit(struct modulesBuffer* modulesBuf, unsigned int nMod, TQueue queue) { + auto mapIdx_buf = allocBufWrapper(devHost, nMod); + uint16_t* mapIdx = alpaka::getPtrNative(mapIdx_buf); + + auto mapdetId_buf = allocBufWrapper(devHost, nMod); + unsigned int* mapdetId = alpaka::getPtrNative(mapdetId_buf); + + unsigned int counter = 0; + for (auto it = (*detIdToIndex).begin(); it != (*detIdToIndex).end(); ++it) { + unsigned int detId = it->first; + unsigned int index = it->second; + mapIdx[counter] = index; + mapdetId[counter] = detId; + counter++; + } + + alpaka::memcpy(queue, modulesBuf->mapIdx_buf, mapIdx_buf, nMod); + alpaka::memcpy(queue, modulesBuf->mapdetId_buf, mapdetId_buf, nMod); + alpaka::wait(queue); + }; + + inline void setDerivedQuantities(unsigned int detId, + unsigned short& layer, + unsigned short& ring, + unsigned short& rod, + unsigned short& module, + unsigned short& subdet, + unsigned short& side, + float m_x, + float m_y, + float m_z, + float& eta, + float& r) { + subdet = (detId & (7 << 25)) >> 25; + side = (subdet == Endcap) ? (detId & (3 << 23)) >> 23 : (detId & (3 << 18)) >> 18; + layer = (subdet == Endcap) ? (detId & (7 << 18)) >> 18 : (detId & (7 << 20)) >> 20; + ring = (subdet == Endcap) ? (detId & (15 << 12)) >> 12 : 0; + module = (detId & (127 << 2)) >> 2; + rod = (subdet == Endcap) ? 0 : (detId & (127 << 10)) >> 10; + + r = std::sqrt(m_x * m_x + m_y * m_y + m_z * m_z); + eta = ((m_z > 0) - (m_z < 0)) * std::acosh(r / std::sqrt(m_x * m_x + m_y * m_y)); + }; + + template + void loadModulesFromFile(struct modulesBuffer* modulesBuf, + uint16_t& nModules, + uint16_t& nLowerModules, + struct pixelMap& pixelMapping, + TQueue& queue, + const char* moduleMetaDataFilePath) { + detIdToIndex = new std::map; + module_x = new std::map; + module_y = new std::map; + module_z = new std::map; + module_type = new std::map; + + /* Load the whole text file into the map first*/ + + std::ifstream ifile; + ifile.open(moduleMetaDataFilePath); + if (!ifile.is_open()) { + std::cout << "ERROR! module list file not present!" << std::endl; + } + std::string line; + uint16_t counter = 0; + + while (std::getline(ifile, line)) { + std::stringstream ss(line); + std::string token; + int count_number = 0; + + unsigned int temp_detId; + while (std::getline(ss, token, ',')) { + if (count_number == 0) { + temp_detId = stoi(token); + (*detIdToIndex)[temp_detId] = counter; + } + if (count_number == 1) + (*module_x)[temp_detId] = std::stof(token); + if (count_number == 2) + (*module_y)[temp_detId] = std::stof(token); + if (count_number == 3) + (*module_z)[temp_detId] = std::stof(token); + if (count_number == 4) { + (*module_type)[temp_detId] = std::stoi(token); + counter++; + } + count_number++; + if (count_number > 4) + break; + } + } + + (*detIdToIndex)[1] = counter; //pixel module is the last module in the module list + counter++; + nModules = counter; + + // Temporary check for module initialization. + if (modules_size != nModules) { + std::cerr << "\nError: modules_size and nModules are not equal.\n"; + std::cerr << "modules_size: " << modules_size << ", nModules: " << nModules << "\n"; + std::cerr << "Please change modules_size in Constants.h to make it equal to nModules.\n"; + throw std::runtime_error("Mismatched sizes"); + } + + auto detIds_buf = allocBufWrapper(devHost, nModules); + auto layers_buf = allocBufWrapper(devHost, nModules); + auto rings_buf = allocBufWrapper(devHost, nModules); + auto rods_buf = allocBufWrapper(devHost, nModules); + auto modules_buf = allocBufWrapper(devHost, nModules); + auto subdets_buf = allocBufWrapper(devHost, nModules); + auto sides_buf = allocBufWrapper(devHost, nModules); + auto eta_buf = allocBufWrapper(devHost, nModules); + auto r_buf = allocBufWrapper(devHost, nModules); + auto isInverted_buf = allocBufWrapper(devHost, nModules); + auto isLower_buf = allocBufWrapper(devHost, nModules); + auto isAnchor_buf = allocBufWrapper(devHost, nModules); + auto moduleType_buf = allocBufWrapper(devHost, nModules); + auto moduleLayerType_buf = allocBufWrapper(devHost, nModules); + auto slopes_buf = allocBufWrapper(devHost, nModules); + auto drdzs_buf = allocBufWrapper(devHost, nModules); + auto partnerModuleIndices_buf = allocBufWrapper(devHost, nModules); + auto sdlLayers_buf = allocBufWrapper(devHost, nModules); + + // Getting the underlying data pointers + unsigned int* host_detIds = alpaka::getPtrNative(detIds_buf); + short* host_layers = alpaka::getPtrNative(layers_buf); + short* host_rings = alpaka::getPtrNative(rings_buf); + short* host_rods = alpaka::getPtrNative(rods_buf); + short* host_modules = alpaka::getPtrNative(modules_buf); + short* host_subdets = alpaka::getPtrNative(subdets_buf); + short* host_sides = alpaka::getPtrNative(sides_buf); + float* host_eta = alpaka::getPtrNative(eta_buf); + float* host_r = alpaka::getPtrNative(r_buf); + bool* host_isInverted = alpaka::getPtrNative(isInverted_buf); + bool* host_isLower = alpaka::getPtrNative(isLower_buf); + bool* host_isAnchor = alpaka::getPtrNative(isAnchor_buf); + ModuleType* host_moduleType = alpaka::getPtrNative(moduleType_buf); + ModuleLayerType* host_moduleLayerType = alpaka::getPtrNative(moduleLayerType_buf); + float* host_slopes = alpaka::getPtrNative(slopes_buf); + float* host_drdzs = alpaka::getPtrNative(drdzs_buf); + uint16_t* host_partnerModuleIndices = alpaka::getPtrNative(partnerModuleIndices_buf); + int* host_sdlLayers = alpaka::getPtrNative(sdlLayers_buf); + + //reassign detIdToIndex indices here + nLowerModules = (nModules - 1) / 2; + uint16_t lowerModuleCounter = 0; + uint16_t upperModuleCounter = nLowerModules + 1; + //0 to nLowerModules - 1 => only lower modules, nLowerModules - pixel module, nLowerModules + 1 to nModules => upper modules + for (auto it = (*detIdToIndex).begin(); it != (*detIdToIndex).end(); it++) { + unsigned int detId = it->first; + float m_x = (*module_x)[detId]; + float m_y = (*module_y)[detId]; + float m_z = (*module_z)[detId]; + unsigned int m_t = (*module_type)[detId]; + + float eta, r; + + uint16_t index; + unsigned short layer, ring, rod, module, subdet, side; + bool isInverted, isLower; + if (detId == 1) { + layer = 0; + ring = 0; + rod = 0; + module = 0; + subdet = 0; + side = 0; + isInverted = false; + isLower = false; + eta = 0; + r = 0; + } else { + setDerivedQuantities(detId, layer, ring, rod, module, subdet, side, m_x, m_y, m_z, eta, r); + isInverted = SDL::modules::parseIsInverted(subdet, side, module, layer); + isLower = SDL::modules::parseIsLower(isInverted, detId); + } + if (isLower) { + index = lowerModuleCounter; + lowerModuleCounter++; + } else if (detId != 1) { + index = upperModuleCounter; + upperModuleCounter++; + } else { + index = nLowerModules; //pixel + } + //reassigning indices! + (*detIdToIndex)[detId] = index; + host_detIds[index] = detId; + host_layers[index] = layer; + host_rings[index] = ring; + host_rods[index] = rod; + host_modules[index] = module; + host_subdets[index] = subdet; + host_sides[index] = side; + host_eta[index] = eta; + host_r[index] = r; + host_isInverted[index] = isInverted; + host_isLower[index] = isLower; + + //assigning other variables! + if (detId == 1) { + host_moduleType[index] = PixelModule; + host_moduleLayerType[index] = SDL::InnerPixelLayer; + host_slopes[index] = 0; + host_drdzs[index] = 0; + host_isAnchor[index] = false; + } else { + host_moduleType[index] = (m_t == 25 ? SDL::TwoS : SDL::PS); + host_moduleLayerType[index] = (m_t == 23 ? SDL::Pixel : SDL::Strip); + + if (host_moduleType[index] == SDL::PS and host_moduleLayerType[index] == SDL::Pixel) { + host_isAnchor[index] = true; + } else if (host_moduleType[index] == SDL::TwoS and host_isLower[index]) { + host_isAnchor[index] = true; + } else { + host_isAnchor[index] = false; + } + + host_slopes[index] = (subdet == Endcap) ? endcapGeometry->getSlopeLower(detId) : tiltedGeometry.getSlope(detId); + host_drdzs[index] = (subdet == Barrel) ? tiltedGeometry.getDrDz(detId) : 0; + } + + host_sdlLayers[index] = + layer + 6 * (subdet == SDL::Endcap) + 5 * (subdet == SDL::Endcap and host_moduleType[index] == SDL::TwoS); + } + + //partner module stuff, and slopes and drdz move around + for (auto it = (*detIdToIndex).begin(); it != (*detIdToIndex).end(); it++) { + auto& detId = it->first; + auto& index = it->second; + if (detId != 1) { + host_partnerModuleIndices[index] = + (*detIdToIndex)[SDL::modules::parsePartnerModuleId(detId, host_isLower[index], host_isInverted[index])]; + //add drdz and slope importing stuff here! + if (host_drdzs[index] == 0) { + host_drdzs[index] = host_drdzs[host_partnerModuleIndices[index]]; + } + if (host_slopes[index] == 0) { + host_slopes[index] = host_slopes[host_partnerModuleIndices[index]]; + } + } + } + + auto src_view_nModules = alpaka::createView(devHost, &nModules, (Idx)1u); + alpaka::memcpy(queue, modulesBuf->nModules_buf, src_view_nModules); + + auto src_view_nLowerModules = alpaka::createView(devHost, &nLowerModules, (Idx)1u); + alpaka::memcpy(queue, modulesBuf->nLowerModules_buf, src_view_nLowerModules); + + alpaka::memcpy(queue, modulesBuf->moduleType_buf, moduleType_buf); + alpaka::memcpy(queue, modulesBuf->moduleLayerType_buf, moduleLayerType_buf); + + alpaka::memcpy(queue, modulesBuf->detIds_buf, detIds_buf); + alpaka::memcpy(queue, modulesBuf->layers_buf, layers_buf); + alpaka::memcpy(queue, modulesBuf->rings_buf, rings_buf); + alpaka::memcpy(queue, modulesBuf->rods_buf, rods_buf); + alpaka::memcpy(queue, modulesBuf->modules_buf, modules_buf); + alpaka::memcpy(queue, modulesBuf->subdets_buf, subdets_buf); + alpaka::memcpy(queue, modulesBuf->sides_buf, sides_buf); + alpaka::memcpy(queue, modulesBuf->eta_buf, eta_buf); + alpaka::memcpy(queue, modulesBuf->r_buf, r_buf); + alpaka::memcpy(queue, modulesBuf->isInverted_buf, isInverted_buf); + alpaka::memcpy(queue, modulesBuf->isLower_buf, isLower_buf); + alpaka::memcpy(queue, modulesBuf->isAnchor_buf, isAnchor_buf); + alpaka::memcpy(queue, modulesBuf->slopes_buf, slopes_buf); + alpaka::memcpy(queue, modulesBuf->drdzs_buf, drdzs_buf); + alpaka::memcpy(queue, modulesBuf->partnerModuleIndices_buf, partnerModuleIndices_buf); + alpaka::memcpy(queue, modulesBuf->sdlLayers_buf, sdlLayers_buf); + alpaka::wait(queue); + + fillConnectedModuleArrayExplicit(modulesBuf, nModules, queue); + fillMapArraysExplicit(modulesBuf, nModules, queue); + fillPixelMap(modulesBuf, pixelMapping, queue); + }; +} // namespace SDL +#endif diff --git a/SDL/NeuralNetworkWeights.h b/SDL/NeuralNetworkWeights.h index 02419521..90fb6b21 100644 --- a/SDL/NeuralNetworkWeights.h +++ b/SDL/NeuralNetworkWeights.h @@ -1,6 +1,8 @@ #ifndef NeuralNetworkWeights_cuh #define NeuralNetworkWeights_cuh +#include + namespace T5DNN { ALPAKA_STATIC_ACC_MEM_GLOBAL const float bias_0[32] = { -4.5069356f, -5.8842053f, 1.0793180f, -0.1540973f, -0.4705772f, 6.4027028f, -0.6620818f, -7.0734525f, @@ -306,4 +308,4 @@ namespace T5DNN { }; } // namespace T5DNN -#endif \ No newline at end of file +#endif diff --git a/SDL/PixelTriplet.h b/SDL/PixelTriplet.h index 5ab38402..07fdbc37 100644 --- a/SDL/PixelTriplet.h +++ b/SDL/PixelTriplet.h @@ -67,31 +67,31 @@ namespace SDL { } }; - template + template struct pixelTripletsBuffer : pixelTriplets { - Buf pixelSegmentIndices_buf; - Buf tripletIndices_buf; - Buf nPixelTriplets_buf; - Buf totOccupancyPixelTriplets_buf; - Buf pixelRadius_buf; - Buf tripletRadius_buf; - Buf pt_buf; - Buf eta_buf; - Buf phi_buf; - Buf eta_pix_buf; - Buf phi_pix_buf; - Buf score_buf; - Buf isDup_buf; - Buf partOfPT5_buf; - Buf logicalLayers_buf; - Buf hitIndices_buf; - Buf lowerModuleIndices_buf; - Buf centerX_buf; - Buf centerY_buf; - Buf pixelRadiusError_buf; - Buf rPhiChiSquared_buf; - Buf rPhiChiSquaredInwards_buf; - Buf rzChiSquared_buf; + Buf pixelSegmentIndices_buf; + Buf tripletIndices_buf; + Buf nPixelTriplets_buf; + Buf totOccupancyPixelTriplets_buf; + Buf pixelRadius_buf; + Buf tripletRadius_buf; + Buf pt_buf; + Buf eta_buf; + Buf phi_buf; + Buf eta_pix_buf; + Buf phi_pix_buf; + Buf score_buf; + Buf isDup_buf; + Buf partOfPT5_buf; + Buf logicalLayers_buf; + Buf hitIndices_buf; + Buf lowerModuleIndices_buf; + Buf centerX_buf; + Buf centerY_buf; + Buf pixelRadiusError_buf; + Buf rPhiChiSquared_buf; + Buf rPhiChiSquaredInwards_buf; + Buf rzChiSquared_buf; template pixelTripletsBuffer(unsigned int maxPixelTriplets, TDevAcc const& devAccIn, TQueue& queue) @@ -155,7 +155,7 @@ namespace SDL { pixelTripletsInGPU.phi[pixelTripletIndex] = __F2H(phi); pixelTripletsInGPU.eta_pix[pixelTripletIndex] = __F2H(eta_pix); pixelTripletsInGPU.phi_pix[pixelTripletIndex] = __F2H(phi_pix); - pixelTripletsInGPU.isDup[pixelTripletIndex] = 0; + pixelTripletsInGPU.isDup[pixelTripletIndex] = false; pixelTripletsInGPU.score[pixelTripletIndex] = __F2H(score); pixelTripletsInGPU.centerX[pixelTripletIndex] = __F2H(centerX); @@ -1944,26 +1944,26 @@ namespace SDL { } }; - template + template struct pixelQuintupletsBuffer : pixelQuintuplets { - Buf pixelIndices_buf; - Buf T5Indices_buf; - Buf nPixelQuintuplets_buf; - Buf totOccupancyPixelQuintuplets_buf; - Buf isDup_buf; - Buf score_buf; - Buf eta_buf; - Buf phi_buf; - Buf logicalLayers_buf; - Buf hitIndices_buf; - Buf lowerModuleIndices_buf; - Buf pixelRadius_buf; - Buf quintupletRadius_buf; - Buf centerX_buf; - Buf centerY_buf; - Buf rzChiSquared_buf; - Buf rPhiChiSquared_buf; - Buf rPhiChiSquaredInwards_buf; + Buf pixelIndices_buf; + Buf T5Indices_buf; + Buf nPixelQuintuplets_buf; + Buf totOccupancyPixelQuintuplets_buf; + Buf isDup_buf; + Buf score_buf; + Buf eta_buf; + Buf phi_buf; + Buf logicalLayers_buf; + Buf hitIndices_buf; + Buf lowerModuleIndices_buf; + Buf pixelRadius_buf; + Buf quintupletRadius_buf; + Buf centerX_buf; + Buf centerY_buf; + Buf rzChiSquared_buf; + Buf rPhiChiSquared_buf; + Buf rPhiChiSquaredInwards_buf; template pixelQuintupletsBuffer(unsigned int maxPixelQuintuplets, TDevAcc const& devAccIn, TQueue& queue) @@ -2011,7 +2011,7 @@ namespace SDL { float& centerY) { pixelQuintupletsInGPU.pixelIndices[pixelQuintupletIndex] = pixelIndex; pixelQuintupletsInGPU.T5Indices[pixelQuintupletIndex] = T5Index; - pixelQuintupletsInGPU.isDup[pixelQuintupletIndex] = 0; + pixelQuintupletsInGPU.isDup[pixelQuintupletIndex] = false; pixelQuintupletsInGPU.score[pixelQuintupletIndex] = __F2H(score); pixelQuintupletsInGPU.eta[pixelQuintupletIndex] = __F2H(eta); pixelQuintupletsInGPU.phi[pixelQuintupletIndex] = __F2H(phi); @@ -2320,7 +2320,7 @@ namespace SDL { moduleType = modulesInGPU.moduleType[lowerModuleIndices[i]]; moduleSubdet = modulesInGPU.subdets[lowerModuleIndices[i]]; moduleSide = modulesInGPU.sides[lowerModuleIndices[i]]; - float& drdz = modulesInGPU.drdzs[lowerModuleIndices[i]]; + const float& drdz = modulesInGPU.drdzs[lowerModuleIndices[i]]; slopes[i] = modulesInGPU.slopes[lowerModuleIndices[i]]; //category 1 - barrel PS flat if (moduleSubdet == Barrel and moduleType == PS and moduleSide == Center) { @@ -2687,7 +2687,7 @@ namespace SDL { residual = (moduleSubdet == SDL::Barrel) ? (zs[i] - zPix[0]) - slope * (rts[i] - rtPix[0]) : (rts[i] - rtPix[0]) - (zs[i] - zPix[0]) / slope; - float& drdz = modulesInGPU.drdzs[lowerModuleIndex]; + const float& drdz = modulesInGPU.drdzs[lowerModuleIndex]; //PS Modules if (moduleType == 0) { error = 0.15f; diff --git a/SDL/Quintuplet.h b/SDL/Quintuplet.h index 9048e42d..d3f24f71 100644 --- a/SDL/Quintuplet.h +++ b/SDL/Quintuplet.h @@ -69,35 +69,35 @@ namespace SDL { } }; - template + template struct quintupletsBuffer : quintuplets { - Buf tripletIndices_buf; - Buf lowerModuleIndices_buf; - Buf nQuintuplets_buf; - Buf totOccupancyQuintuplets_buf; - Buf nMemoryLocations_buf; - - Buf innerRadius_buf; - Buf bridgeRadius_buf; - Buf outerRadius_buf; - Buf pt_buf; - Buf eta_buf; - Buf phi_buf; - Buf score_rphisum_buf; - Buf layer_buf; - Buf isDup_buf; - Buf TightCutFlag_buf; - Buf partOfPT5_buf; - - Buf regressionRadius_buf; - Buf regressionG_buf; - Buf regressionF_buf; - - Buf logicalLayers_buf; - Buf hitIndices_buf; - Buf rzChiSquared_buf; - Buf chiSquared_buf; - Buf nonAnchorChiSquared_buf; + Buf tripletIndices_buf; + Buf lowerModuleIndices_buf; + Buf nQuintuplets_buf; + Buf totOccupancyQuintuplets_buf; + Buf nMemoryLocations_buf; + + Buf innerRadius_buf; + Buf bridgeRadius_buf; + Buf outerRadius_buf; + Buf pt_buf; + Buf eta_buf; + Buf phi_buf; + Buf score_rphisum_buf; + Buf layer_buf; + Buf isDup_buf; + Buf TightCutFlag_buf; + Buf partOfPT5_buf; + + Buf regressionRadius_buf; + Buf regressionG_buf; + Buf regressionF_buf; + + Buf logicalLayers_buf; + Buf hitIndices_buf; + Buf rzChiSquared_buf; + Buf chiSquared_buf; + Buf nonAnchorChiSquared_buf; template quintupletsBuffer(unsigned int nTotalQuintuplets, unsigned int nLowerModules, TDevAcc const& devAccIn, TQueue& queue) @@ -612,118 +612,118 @@ namespace SDL { if (layer1 == 7 and layer2 == 8 and layer3 == 9 and layer4 == 10 and layer5 == 11) //0 { if (rzChiSquared < 94.470f) - TightCutFlag = 1; + TightCutFlag = true; return true; } else if (layer1 == 7 and layer2 == 8 and layer3 == 9 and layer4 == 10 and layer5 == 16) //1 { if (rzChiSquared < 22.099f) - TightCutFlag = 1; + TightCutFlag = true; return rzChiSquared < 37.956f; } else if (layer1 == 7 and layer2 == 8 and layer3 == 9 and layer4 == 15 and layer5 == 16) //2 { if (rzChiSquared < 7.992f) - TightCutFlag = 1; + TightCutFlag = true; return rzChiSquared < 11.622f; } else if (layer1 == 1 and layer2 == 7 and layer3 == 8 and layer4 == 9) { if (layer5 == 10) //3 { if (rzChiSquared < 111.390f) - TightCutFlag = 1; + TightCutFlag = true; return true; } if (layer5 == 15) //4 { if (rzChiSquared < 18.351f) - TightCutFlag = 1; + TightCutFlag = true; return rzChiSquared < 37.941f; } } else if (layer1 == 1 and layer2 == 2 and layer3 == 7) { if (layer4 == 8 and layer5 == 9) //5 { if (rzChiSquared < 116.148f) - TightCutFlag = 1; + TightCutFlag = true; return true; } if (layer4 == 8 and layer5 == 14) //6 { if (rzChiSquared < 19.352f) - TightCutFlag = 1; + TightCutFlag = true; return rzChiSquared < 52.561f; } else if (layer4 == 13 and layer5 == 14) //7 { if (rzChiSquared < 10.392f) - TightCutFlag = 1; + TightCutFlag = true; return rzChiSquared < 13.76f; } } else if (layer1 == 1 and layer2 == 2 and layer3 == 3) { if (layer4 == 7 and layer5 == 8) //8 { if (rzChiSquared < 27.824f) - TightCutFlag = 1; + TightCutFlag = true; return rzChiSquared < 44.247f; } else if (layer4 == 7 and layer5 == 13) //9 { if (rzChiSquared < 18.145f) - TightCutFlag = 1; + TightCutFlag = true; return rzChiSquared < 33.752f; } else if (layer4 == 12 and layer5 == 13) //10 { if (rzChiSquared < 13.308f) - TightCutFlag = 1; + TightCutFlag = true; return rzChiSquared < 21.213f; } else if (layer4 == 4 and layer5 == 5) //11 { if (rzChiSquared < 15.627f) - TightCutFlag = 1; + TightCutFlag = true; return rzChiSquared < 29.035f; } else if (layer4 == 4 and layer5 == 12) //12 { if (rzChiSquared < 14.64f) - TightCutFlag = 1; + TightCutFlag = true; return rzChiSquared < 23.037f; } } else if (layer1 == 2 and layer2 == 7 and layer3 == 8) { if (layer4 == 9 and layer5 == 15) //14 { if (rzChiSquared < 24.662f) - TightCutFlag = 1; + TightCutFlag = true; return rzChiSquared < 41.036f; } else if (layer4 == 14 and layer5 == 15) //15 { if (rzChiSquared < 8.866f) - TightCutFlag = 1; + TightCutFlag = true; return rzChiSquared < 14.092f; } } else if (layer1 == 2 and layer2 == 3 and layer3 == 7) { if (layer4 == 8 and layer5 == 14) //16 { if (rzChiSquared < 23.730f) - TightCutFlag = 1; + TightCutFlag = true; return rzChiSquared < 23.748f; } if (layer4 == 13 and layer5 == 14) //17 { if (rzChiSquared < 10.772f) - TightCutFlag = 1; + TightCutFlag = true; return rzChiSquared < 17.945f; } } else if (layer1 == 2 and layer2 == 3 and layer3 == 4) { if (layer4 == 5 and layer5 == 6) //18 { if (rzChiSquared < 6.065f) - TightCutFlag = 1; + TightCutFlag = true; return rzChiSquared < 8.803f; } else if (layer4 == 5 and layer5 == 12) //19 { if (rzChiSquared < 5.693f) - TightCutFlag = 1; + TightCutFlag = true; return rzChiSquared < 7.930f; } else if (layer4 == 12 and layer5 == 13) //20 { if (rzChiSquared < 5.473f) - TightCutFlag = 1; + TightCutFlag = true; return rzChiSquared < 7.626f; } } @@ -1191,7 +1191,7 @@ namespace SDL { moduleType = modulesInGPU.moduleType[lowerModuleIndices[i]]; moduleSubdet = modulesInGPU.subdets[lowerModuleIndices[i]]; moduleSide = modulesInGPU.sides[lowerModuleIndices[i]]; - float& drdz = modulesInGPU.drdzs[lowerModuleIndices[i]]; + const float& drdz = modulesInGPU.drdzs[lowerModuleIndices[i]]; slopes[i] = modulesInGPU.slopes[lowerModuleIndices[i]]; //category 1 - barrel PS flat if (moduleSubdet == Barrel and moduleType == PS and moduleSide == Center) { diff --git a/SDL/Segment.h b/SDL/Segment.h index a3b81920..c9143e1f 100644 --- a/SDL/Segment.h +++ b/SDL/Segment.h @@ -83,42 +83,42 @@ namespace SDL { } }; - template + template struct segmentsBuffer : segments { - Buf dPhis_buf; - Buf dPhiMins_buf; - Buf dPhiMaxs_buf; - Buf dPhiChanges_buf; - Buf dPhiChangeMins_buf; - Buf dPhiChangeMaxs_buf; - Buf innerLowerModuleIndices_buf; - Buf outerLowerModuleIndices_buf; - Buf seedIdx_buf; - Buf mdIndices_buf; - Buf nMemoryLocations_buf; - Buf innerMiniDoubletAnchorHitIndices_buf; - Buf outerMiniDoubletAnchorHitIndices_buf; - Buf charge_buf; - Buf superbin_buf; - Buf nSegments_buf; - Buf totOccupancySegments_buf; - Buf pLSHitsIdxs_buf; - Buf pixelType_buf; - Buf isQuad_buf; - Buf isDup_buf; - Buf partOfPT5_buf; - Buf ptIn_buf; - Buf ptErr_buf; - Buf px_buf; - Buf py_buf; - Buf pz_buf; - Buf etaErr_buf; - Buf eta_buf; - Buf phi_buf; - Buf score_buf; - Buf circleCenterX_buf; - Buf circleCenterY_buf; - Buf circleRadius_buf; + Buf dPhis_buf; + Buf dPhiMins_buf; + Buf dPhiMaxs_buf; + Buf dPhiChanges_buf; + Buf dPhiChangeMins_buf; + Buf dPhiChangeMaxs_buf; + Buf innerLowerModuleIndices_buf; + Buf outerLowerModuleIndices_buf; + Buf seedIdx_buf; + Buf mdIndices_buf; + Buf nMemoryLocations_buf; + Buf innerMiniDoubletAnchorHitIndices_buf; + Buf outerMiniDoubletAnchorHitIndices_buf; + Buf charge_buf; + Buf superbin_buf; + Buf nSegments_buf; + Buf totOccupancySegments_buf; + Buf pLSHitsIdxs_buf; + Buf pixelType_buf; + Buf isQuad_buf; + Buf isDup_buf; + Buf partOfPT5_buf; + Buf ptIn_buf; + Buf ptErr_buf; + Buf px_buf; + Buf py_buf; + Buf pz_buf; + Buf etaErr_buf; + Buf eta_buf; + Buf phi_buf; + Buf score_buf; + Buf circleCenterX_buf; + Buf circleCenterY_buf; + Buf circleRadius_buf; template segmentsBuffer(unsigned int nMemoryLocationsIn, @@ -286,8 +286,8 @@ namespace SDL { bool isOuterTilted = modulesInGPU.subdets[outerLowerModuleIndex] == SDL::Barrel and modulesInGPU.sides[outerLowerModuleIndex] != SDL::Center; - float& drdzInner = modulesInGPU.drdzs[innerLowerModuleIndex]; - float& drdzOuter = modulesInGPU.drdzs[outerLowerModuleIndex]; + const float& drdzInner = modulesInGPU.drdzs[innerLowerModuleIndex]; + const float& drdzOuter = modulesInGPU.drdzs[outerLowerModuleIndex]; float innerModuleGapSize = SDL::moduleGapSize_seg(modulesInGPU, innerLowerModuleIndex); float outerModuleGapSize = SDL::moduleGapSize_seg(modulesInGPU, outerLowerModuleIndex); const float innerminiTilt = isInnerTilted diff --git a/SDL/TrackCandidate.h b/SDL/TrackCandidate.h index 1a9dacc9..ced7c1fb 100644 --- a/SDL/TrackCandidate.h +++ b/SDL/TrackCandidate.h @@ -52,25 +52,25 @@ namespace SDL { } }; - template + template struct trackCandidatesBuffer : trackCandidates { - Buf trackCandidateType_buf; - Buf directObjectIndices_buf; - Buf objectIndices_buf; - Buf nTrackCandidates_buf; - Buf nTrackCandidatespT3_buf; - Buf nTrackCandidatespT5_buf; - Buf nTrackCandidatespLS_buf; - Buf nTrackCandidatesT5_buf; - - Buf logicalLayers_buf; - Buf hitIndices_buf; - Buf pixelSeedIndex_buf; - Buf lowerModuleIndices_buf; - - Buf centerX_buf; - Buf centerY_buf; - Buf radius_buf; + Buf trackCandidateType_buf; + Buf directObjectIndices_buf; + Buf objectIndices_buf; + Buf nTrackCandidates_buf; + Buf nTrackCandidatespT3_buf; + Buf nTrackCandidatespT5_buf; + Buf nTrackCandidatespLS_buf; + Buf nTrackCandidatesT5_buf; + + Buf logicalLayers_buf; + Buf hitIndices_buf; + Buf pixelSeedIndex_buf; + Buf lowerModuleIndices_buf; + + Buf centerX_buf; + Buf centerY_buf; + Buf radius_buf; template trackCandidatesBuffer(unsigned int maxTrackCandidates, TDevAcc const& devAccIn, TQueue& queue) diff --git a/SDL/Triplet.h b/SDL/Triplet.h index 847f2d82..c9e9909b 100644 --- a/SDL/Triplet.h +++ b/SDL/Triplet.h @@ -77,38 +77,38 @@ namespace SDL { } }; - template + template struct tripletsBuffer : triplets { - Buf segmentIndices_buf; - Buf lowerModuleIndices_buf; - Buf nTriplets_buf; - Buf totOccupancyTriplets_buf; - Buf nMemoryLocations_buf; - Buf logicalLayers_buf; - Buf hitIndices_buf; - Buf betaIn_buf; - Buf betaOut_buf; - Buf pt_beta_buf; - Buf partOfPT5_buf; - Buf partOfT5_buf; - Buf partOfPT3_buf; + Buf segmentIndices_buf; + Buf lowerModuleIndices_buf; + Buf nTriplets_buf; + Buf totOccupancyTriplets_buf; + Buf nMemoryLocations_buf; + Buf logicalLayers_buf; + Buf hitIndices_buf; + Buf betaIn_buf; + Buf betaOut_buf; + Buf pt_beta_buf; + Buf partOfPT5_buf; + Buf partOfT5_buf; + Buf partOfPT3_buf; #ifdef CUT_VALUE_DEBUG - Buf zOut_buf; - Buf rtOut_buf; - Buf deltaPhiPos_buf; - Buf deltaPhi_buf; - Buf zLo_buf; - Buf zHi_buf; - Buf zLoPointed_buf; - Buf zHiPointed_buf; - Buf sdlCut_buf; - Buf betaInCut_buf; - Buf betaOutCut_buf; - Buf deltaBetaCut_buf; - Buf rtLo_buf; - Buf rtHi_buf; - Buf kZ_buf; + Buf zOut_buf; + Buf rtOut_buf; + Buf deltaPhiPos_buf; + Buf deltaPhi_buf; + Buf zLo_buf; + Buf zHi_buf; + Buf zLoPointed_buf; + Buf zHiPointed_buf; + Buf sdlCut_buf; + Buf betaInCut_buf; + Buf betaOutCut_buf; + Buf deltaBetaCut_buf; + Buf rtLo_buf; + Buf rtHi_buf; + Buf kZ_buf; #endif template