From aaba87f19b4e04753105b453e6210df6226737e4 Mon Sep 17 00:00:00 2001 From: Alberto Invernizzi Date: Mon, 10 Feb 2025 17:26:37 +0100 Subject: [PATCH] minor changes --- include/dlaf/eigensolver/bt_reduction_to_band/impl.h | 4 ++-- include/dlaf/factorization/qr.h | 8 ++++---- include/dlaf/factorization/qr/t_factor_impl.h | 7 ++----- src/init.cpp | 4 ++-- 4 files changed, 10 insertions(+), 13 deletions(-) diff --git a/include/dlaf/eigensolver/bt_reduction_to_band/impl.h b/include/dlaf/eigensolver/bt_reduction_to_band/impl.h index 4a94d997ed..1c5cc71154 100644 --- a/include/dlaf/eigensolver/bt_reduction_to_band/impl.h +++ b/include/dlaf/eigensolver/bt_reduction_to_band/impl.h @@ -222,7 +222,6 @@ void BackTransformationReductionToBand::call( const LocalTileIndex t_index{Coord::Col, k}; computeTFactor(panelV, mat_taus.read(taus_index), panelT.readwrite(t_index), panelWS); - panelWS.reset(); // W = V T auto tile_t = panelT.read(t_index); @@ -247,6 +246,7 @@ void BackTransformationReductionToBand::call( panelW.reset(); panelW2.reset(); panelT.reset(); + panelWS.reset(); } } @@ -389,11 +389,11 @@ void BackTransformationReductionToBand::call(comm::CommunicatorGrid& gr splitTile(mat_c.readwrite(ij), mat_c_view(ij))); } - panelWS.reset(); panelV.reset(); panelW.reset(); panelW2.reset(); panelT.reset(); + panelWS.reset(); } } } diff --git a/include/dlaf/factorization/qr.h b/include/dlaf/factorization/qr.h index 1068304076..c1b607bd73 100644 --- a/include/dlaf/factorization/qr.h +++ b/include/dlaf/factorization/qr.h @@ -54,10 +54,10 @@ namespace dlaf::factorization::internal { /// @param t tile where the resulting T factor will be stored in its top-left sub-matrix of size /// TileElementSize(k, k) /// @param workspaces array of tiles used as workspace, with at least one tile per worker (see -/// get_tfactor_num_workers), each tile should have the same size as @param tile_t +/// get_tfactor_num_workers), each tile should be at least of size TileElementSize(k, k) /// /// @pre reflectors in hh_panel are well formed (1s on the diagonal and 0s in the upper part) -/// @pre hh_panel.getWidth() <= t.get().size().rows && hh_panel.size().getWidth() <= t.get().size().cols() +/// @pre hh_panel.getWidth() <= t.get().size().rows && hh_panel.getWidth() <= t.get().size().cols() template void computeTFactor(matrix::Panel& hh_panel, matrix::ReadOnlyTileSender taus, @@ -99,11 +99,11 @@ void computeTFactor(matrix::Panel& hh_panel, /// @param t tile where the resulting T factor will be stored in its top-left sub-matrix of size /// TileElementSize(k, k) /// @param workspaces array of tiles used as workspace, with at least one tile per worker (see -/// get_tfactor_num_workers), each tile should have the same size as @param tile_t +/// get_tfactor_num_workers), each tile should be at least of size TileElementSize(k, k) /// @param mpi_col_task_chain where internal communications are issued /// /// @pre reflectors in hh_panel are well formed (1s on the diagonal and 0s in the upper part) -/// @pre hh_panel.getWidth() <= t.get().size().rows && hh_panel.size().getWidth() <= t.get().size().cols() +/// @pre hh_panel.getWidth() <= t.get().size().rows && hh_panel.getWidth() <= t.get().size().cols() template void computeTFactor(matrix::Panel& hh_panel, matrix::ReadOnlyTileSender taus, diff --git a/include/dlaf/factorization/qr/t_factor_impl.h b/include/dlaf/factorization/qr/t_factor_impl.h index 2a5a876cb4..6ad1ffe15e 100644 --- a/include/dlaf/factorization/qr/t_factor_impl.h +++ b/include/dlaf/factorization/qr/t_factor_impl.h @@ -158,8 +158,6 @@ struct Helpers { (worker_id == 0 ? tile_t : workspaces[worker_id - 1]) .subTileReference({{0, 0}, tile_t.size()}); - DLAF_ASSERT(equal_size(ws_worker, tile_t), ws_worker.size(), tile_t.size()); - tile::internal::set0(ws_worker); lapack::lacpy(blas::Uplo::General, 1, k, taus.get().ptr(), 1, ws_worker.ptr(), ws_worker.ld() + 1); @@ -296,10 +294,9 @@ struct Helpers { di::Policy(thread_priority::high), [k](cublasHandle_t handle, auto&& hh_tiles, auto&& taus, matrix::Tile& tile_t_full) { - matrix::Tile tile_t = tile_t_full.subTileReference({{0, 0}, {k, k}}); - DLAF_ASSERT_MODERATE(k == taus.size().rows(), k, taus.size().rows()); - DLAF_ASSERT(tile_t.size() == TileElementSize(k, k), tile_t.size(), k); + + matrix::Tile tile_t = tile_t_full.subTileReference({{0, 0}, {k, k}}); // Note: // prepare the diagonal of taus in t and reset the rest diff --git a/src/init.cpp b/src/init.cpp index fb24e179fb..b3d797c80f 100644 --- a/src/init.cpp +++ b/src/init.cpp @@ -349,8 +349,8 @@ pika::program_options::options_description getOptionsDescription() { // Tune parameters command line options desc.add_options()("dlaf:tfactor-num-threads", pika::program_options::value(), "The maximum number of threads to use for computing the tfactor."); - desc.add_options()("dlaf:tfactor-num-streams", pika::program_options::value(), "The maximum number of threads to use for computing the tfactor."); - desc.add_options()("dlaf:tfactor-barrier-busy-wait-us", pika::program_options::value(), "The duration in microseconds to busy-wait in barriers in the tfactor t algorithm."); + desc.add_options()("dlaf:tfactor-num-streams", pika::program_options::value(), "The maximum number of GPU streams to use for computing the tfactor."); + desc.add_options()("dlaf:tfactor-barrier-busy-wait-us", pika::program_options::value(), "The duration in microseconds to busy-wait in barriers in the tfactor algorithm."); desc.add_options()("dlaf:red2band-panel-nworkers", pika::program_options::value(), "The maximum number of threads to use for computing the panel in the reduction to band algorithm."); desc.add_options()("dlaf:red2band-barrier-busy-wait-us", pika::program_options::value(), "The duration in microseconds to busy-wait in barriers in the reduction to band algorithm."); desc.add_options()("dlaf:eigensolver-min-band", pika::program_options::value(), "The minimum value to start looking for a divisor of the block size. When larger than the block size, the block size will be used instead.");