From 8db5396cc498f34167ddfa06b0bf32721f6a3831 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Fri, 31 Jan 2025 16:04:27 +0100 Subject: [PATCH] Remove outdated pika version checks and workarounds --- .../dlaf/eigensolver/reduction_to_band/impl.h | 5 ++--- .../dlaf/eigensolver/tridiag_solver/merge.h | 13 +++++------- include/dlaf/permutations/general/impl.h | 5 ++--- include/dlaf/sender/continues_on.h | 20 ------------------- include/dlaf/sender/transform.h | 3 +-- include/dlaf/sender/transform_mpi.h | 1 - include/dlaf/tune.h | 2 -- src/c_api/init.cpp | 6 ------ src/init.cpp | 12 +---------- 9 files changed, 11 insertions(+), 56 deletions(-) delete mode 100644 include/dlaf/sender/continues_on.h diff --git a/include/dlaf/eigensolver/reduction_to_band/impl.h b/include/dlaf/eigensolver/reduction_to_band/impl.h index 40edaf4fe9..28593afcad 100644 --- a/include/dlaf/eigensolver/reduction_to_band/impl.h +++ b/include/dlaf/eigensolver/reduction_to_band/impl.h @@ -52,7 +52,6 @@ #include #include #include -#include #include #include #include @@ -323,7 +322,7 @@ void computePanelReflectors(MatrixLikeA& mat_a, MatrixLikeTaus& mat_taus, const std::vector>{}), // w (internally required) mat_taus.readwrite(LocalTileIndex(j_sub, 0)), ex::when_all_vector(std::move(panel_tiles))) | - di::continues_on(di::getBackendScheduler(thread_priority::high)) | + ex::continues_on(di::getBackendScheduler(thread_priority::high)) | ex::bulk(nworkers, [nworkers, cols = panel_view.cols()](const std::size_t index, auto& barrier_ptr, auto& w, auto& taus, auto& tiles) { const auto barrier_busy_wait = getReductionToBandBarrierBusyWait(); @@ -651,7 +650,7 @@ void computePanelReflectors(TriggerSender&& trigger, comm::IndexT_MPI rank_v0, mat_taus.readwrite(GlobalTileIndex(j_sub, 0)), ex::when_all_vector(std::move(panel_tiles)), std::forward(mpi_col_chain_panel), std::forward(trigger)) | - di::continues_on(di::getBackendScheduler(pika::execution::thread_priority::high)) | + ex::continues_on(di::getBackendScheduler(pika::execution::thread_priority::high)) | ex::bulk(nworkers, [nworkers, rank_v0, cols = panel_view.cols()](const std::size_t index, auto& barrier_ptr, auto& w, auto& taus, auto& tiles, auto&& pcomm) { diff --git a/include/dlaf/eigensolver/tridiag_solver/merge.h b/include/dlaf/eigensolver/tridiag_solver/merge.h index e4eed8ff34..e71ebd654a 100644 --- a/include/dlaf/eigensolver/tridiag_solver/merge.h +++ b/include/dlaf/eigensolver/tridiag_solver/merge.h @@ -50,7 +50,6 @@ #include #include #include -#include #include #include #include @@ -824,7 +823,7 @@ void solveRank1Problem(const SizeType i_begin, const SizeType i_end, KSender&& k ex::when_all_vector(tc.readwrite(z)), ex::when_all_vector(tc.readwrite(evals)), ex::when_all_vector(tc.read(i2)), ex::when_all_vector(tc.readwrite(evecs)), ex::just(std::vector>())) | - di::continues_on(di::getBackendScheduler(thread_priority::high)) | + ex::continues_on(di::getBackendScheduler(thread_priority::high)) | ex::bulk(nthreads, [nthreads, n, nb](std::size_t thread_idx, auto& barrier_ptr, auto& k, auto& rho, auto& d_tiles, auto& z_tiles, auto& eval_tiles, const auto& i2_tile_arr, auto& evec_tiles, auto& ws_vecs) { @@ -1032,12 +1031,11 @@ void multiplyEigenvectors(const SizeType sub_offset, const SizeType n, const Siz // └───┴────────┴────┘ └────────────┴────┘ namespace ex = pika::execution::experimental; - using dlaf::internal::continues_on; using pika::execution::thread_priority; ex::start_detached( ex::when_all(std::forward(k), std::forward(n_udl)) | - continues_on(dlaf::internal::getBackendScheduler(thread_priority::high)) | + ex::continues_on(dlaf::internal::getBackendScheduler(thread_priority::high)) | ex::then([sub_offset, n, n_upper, n_lower, e0 = e0.subPipeline(), e1 = e1.subPipelineConst(), e2 = e2.subPipelineConst()](const SizeType k, std::array n_udl) mutable { using dlaf::matrix::internal::MatrixRef; @@ -1335,7 +1333,7 @@ void solveRank1ProblemDist(CommSender&& row_comm, CommSender&& col_comm, const S // additional workspaces ex::just(std::vector>()), ex::just(memory::MemoryView())) | - di::continues_on(hp_scheduler) | + ex::continues_on(hp_scheduler) | ex::let_value([n, dist_sub, bcast_evals, all_reduce_in_place, hp_scheduler]( auto& row_comm_wrapper, auto& col_comm_wrapper, const SizeType k, const SizeType k_lc, const auto& rho, const auto& d_tiles, auto& z_tiles, @@ -1354,7 +1352,7 @@ void solveRank1ProblemDist(CommSender&& row_comm, CommSender&& col_comm, const S return std::clamp(ideal_workers, min_workers, available_workers); }(); - return ex::just(std::make_unique>(nthreads)) | di::continues_on(hp_scheduler) | + return ex::just(std::make_unique>(nthreads)) | ex::continues_on(hp_scheduler) | ex::bulk(nthreads, [&row_comm_wrapper, &col_comm_wrapper, k, k_lc, &rho, &d_tiles, &z_tiles, &eval_tiles, &i4_tiles_arr, &i6_tiles_arr, &i2_tiles_arr, &evec_tiles, &ws_cols, &ws_row, nthreads, n, dist_sub, bcast_evals, @@ -1763,12 +1761,11 @@ void multiplyEigenvectors(const GlobalElementIndex sub_offset, const matrix::Dis // └───┴────────┴────┘ └────────────┴────┘ namespace ex = pika::execution::experimental; - using dlaf::internal::continues_on; using pika::execution::thread_priority; ex::start_detached( ex::when_all(std::forward(k_lc), std::forward(n_udl)) | - continues_on(dlaf::internal::getBackendScheduler(thread_priority::high)) | + ex::continues_on(dlaf::internal::getBackendScheduler(thread_priority::high)) | ex::then([dist_sub, sub_offset, n_upper, n_lower, e0 = e0.subPipeline(), e1 = e1.subPipelineConst(), e2 = e2.subPipelineConst(), sub_comm_row = row_task_chain.sub_pipeline(), diff --git a/include/dlaf/permutations/general/impl.h b/include/dlaf/permutations/general/impl.h index ef3f8686a6..9067324acd 100644 --- a/include/dlaf/permutations/general/impl.h +++ b/include/dlaf/permutations/general/impl.h @@ -170,7 +170,6 @@ void Permutations::call(const SizeType i_begin, const SizeType i_end namespace ex = pika::execution::experimental; namespace dist_extra = dlaf::matrix::internal::distribution; using dist_extra::local_element_distance_from_global_tile; - using dlaf::internal::continues_on; if (i_begin == i_end) return; @@ -211,7 +210,7 @@ void Permutations::call(const SizeType i_begin, const SizeType i_end applyPermutationOnCPU(i_perm, subm_dist, perm_arr, mat_in_tiles, mat_out_tiles); }; - ex::start_detached(std::move(sender) | continues_on(dlaf::internal::getBackendScheduler()) | + ex::start_detached(std::move(sender) | ex::continues_on(dlaf::internal::getBackendScheduler()) | ex::bulk(nperms, std::move(permute_fn))); } else { @@ -431,7 +430,7 @@ void applyPackingIndex(const matrix::Distribution& subm_dist, IndexMapSender&& i applyPermutationOnCPU(i_perm, subm_dist, perm_arr, mat_in_tiles, mat_out_tiles); }; - ex::start_detached(std::move(sender) | di::continues_on(di::getBackendScheduler()) | + ex::start_detached(std::move(sender) | ex::continues_on(di::getBackendScheduler()) | ex::bulk(nperms, std::move(permute_fn))); } else { diff --git a/include/dlaf/sender/continues_on.h b/include/dlaf/sender/continues_on.h deleted file mode 100644 index 12aae7c725..0000000000 --- a/include/dlaf/sender/continues_on.h +++ /dev/null @@ -1,20 +0,0 @@ -// -// Distributed Linear Algebra with Future (DLAF) -// -// Copyright (c) 2018-2024, ETH Zurich -// All rights reserved. -// -// Please, refer to the LICENSE file in the root directory. -// SPDX-License-Identifier: BSD-3-Clause -// -#pragma once - -#include - -namespace dlaf::internal { -#if PIKA_VERSION_FULL < 0x001D00 // < 0.29.0 -inline constexpr pika::execution::experimental::transfer_t continues_on{}; -#else -using pika::execution::experimental::continues_on; -#endif -} diff --git a/include/dlaf/sender/transform.h b/include/dlaf/sender/transform.h index 4e391456b4..957b17492a 100644 --- a/include/dlaf/sender/transform.h +++ b/include/dlaf/sender/transform.h @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -54,7 +53,7 @@ template >> [[nodiscard]] decltype(auto) transform(const Policy policy, F&& f, Sender&& sender) { - using dlaf::internal::continues_on; + using pika::execution::experimental::continues_on; using pika::execution::experimental::drop_operation_state; using pika::execution::experimental::then; diff --git a/include/dlaf/sender/transform_mpi.h b/include/dlaf/sender/transform_mpi.h index 1b62b7db51..3a49d9652c 100644 --- a/include/dlaf/sender/transform_mpi.h +++ b/include/dlaf/sender/transform_mpi.h @@ -19,7 +19,6 @@ #include #include #include -#include #include namespace dlaf::comm::internal { diff --git a/include/dlaf/tune.h b/include/dlaf/tune.h index ddcbb97e77..3508e31c56 100644 --- a/include/dlaf/tune.h +++ b/include/dlaf/tune.h @@ -100,13 +100,11 @@ struct TuneParameters { // Some parameters require the pika runtime to be initialized since they depend on the number of // threads used by the runtime. We initialize them separately in the constructor after checking that // pika is initialized. -#if PIKA_VERSION_FULL >= 0x001600 // >= 0.22.0 if (!pika::is_runtime_initialized()) { std::cerr << "[ERROR] Trying to initialize DLA-Future tune parameters but the pika runtime is not initialized. Make sure pika is initialized first.\n"; std::terminate(); } -#endif const auto default_pool_thread_count = pika::resource::get_thread_pool("default").get_os_thread_count(); diff --git a/src/c_api/init.cpp b/src/c_api/init.cpp index bd34b57ccf..9a691be07e 100644 --- a/src/c_api/init.cpp +++ b/src/c_api/init.cpp @@ -27,13 +27,7 @@ void dlaf_initialize(int argc_pika, const char** argv_pika, int argc_dlaf, // pika initialization pika::init_params params; params.desc_cmdline = desc; - // After pika 0.21.0 pika::start reports errors only by exception and returns void -#if PIKA_VERSION_FULL >= 0x001500 pika::start(argc_pika, argv_pika, params); -#else - auto pika_started = pika::start(nullptr, argc_pika, argv_pika, params); - DLAF_ASSERT(pika_started, pika_started); -#endif // DLA-Future initialization dlaf::initialize(argc_dlaf, argv_dlaf); diff --git a/src/init.cpp b/src/init.cpp index 4fcb5c0e49..b21890fb2c 100644 --- a/src/init.cpp +++ b/src/init.cpp @@ -105,14 +105,8 @@ void initializeGpuPool(int device, std::size_t num_np_streams, std::size_t num_h #else 0 #endif -#if PIKA_VERSION_FULL >= 0x001D00 // >= 0.29.0 , - num_blas_handles, num_lapack_handles -#endif - ); -#if PIKA_VERSION_FULL < 0x001D00 // < 0.29.0 - dlaf::internal::silenceUnusedWarningFor(num_blas_handles, num_lapack_handles); -#endif + num_blas_handles, num_lapack_handles); } void finalizeGpuPool() { @@ -280,10 +274,6 @@ void updateConfiguration(const pika::program_options::variables_map& vm, configu updateConfigurationValue(vm, cfg.umpire_device_memory_pool_coalescing_reallocation_ratio, "UMPIRE_DEVICE_MEMORY_POOL_COALESCING_REALLOCATION_RATIO", "umpire-device-memory-pool-coalescing-reallocation-ratio"); updateConfigurationValue(vm, cfg.num_gpu_blas_handles, "NUM_GPU_BLAS_HANDLES", "num-gpu-blas-handles"); updateConfigurationValue(vm, cfg.num_gpu_lapack_handles, "NUM_GPU_LAPACK_HANDLES", "num-gpu-lapack-handles"); -#if PIKA_VERSION_FULL < 0x001D00 // < 0.29.0 - warnUnusedConfigurationOption(vm, "NUM_GPU_BLAS_HANDLES", "num-gpu-blas-handles", "only supported with pika 0.29.0 or newer"); - warnUnusedConfigurationOption(vm, "NUM_GPU_LAPACK_HANDLES", "num-gpu-lapack-handles", "only supported with pika 0.29.0 or newer"); -#endif // update tune parameters //