Skip to content

Commit

Permalink
minor changes
Browse files Browse the repository at this point in the history
  • Loading branch information
albestro committed Feb 10, 2025
1 parent b8b1eb0 commit aaba87f
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 13 deletions.
4 changes: 2 additions & 2 deletions include/dlaf/eigensolver/bt_reduction_to_band/impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,6 @@ void BackTransformationReductionToBand<backend, device, T>::call(
const LocalTileIndex t_index{Coord::Col, k};

computeTFactor<backend>(panelV, mat_taus.read(taus_index), panelT.readwrite(t_index), panelWS);
panelWS.reset();

// W = V T
auto tile_t = panelT.read(t_index);
Expand All @@ -247,6 +246,7 @@ void BackTransformationReductionToBand<backend, device, T>::call(
panelW.reset();
panelW2.reset();
panelT.reset();
panelWS.reset();
}
}

Expand Down Expand Up @@ -389,11 +389,11 @@ void BackTransformationReductionToBand<B, D, T>::call(comm::CommunicatorGrid& gr
splitTile(mat_c.readwrite(ij), mat_c_view(ij)));
}

panelWS.reset();
panelV.reset();
panelW.reset();
panelW2.reset();
panelT.reset();
panelWS.reset();
}
}
}
8 changes: 4 additions & 4 deletions include/dlaf/factorization/qr.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,10 @@ namespace dlaf::factorization::internal {
/// @param t tile where the resulting T factor will be stored in its top-left sub-matrix of size
/// TileElementSize(k, k)
/// @param workspaces array of tiles used as workspace, with at least one tile per worker (see
/// get_tfactor_num_workers), each tile should have the same size as @param tile_t
/// get_tfactor_num_workers), each tile should be at least of size TileElementSize(k, k)
///
/// @pre reflectors in hh_panel are well formed (1s on the diagonal and 0s in the upper part)
/// @pre hh_panel.getWidth() <= t.get().size().rows && hh_panel.size().getWidth() <= t.get().size().cols()
/// @pre hh_panel.getWidth() <= t.get().size().rows && hh_panel.getWidth() <= t.get().size().cols()
template <Backend backend, Device device, class T>
void computeTFactor(matrix::Panel<Coord::Col, T, device>& hh_panel,
matrix::ReadOnlyTileSender<T, Device::CPU> taus,
Expand Down Expand Up @@ -99,11 +99,11 @@ void computeTFactor(matrix::Panel<Coord::Col, T, device>& hh_panel,
/// @param t tile where the resulting T factor will be stored in its top-left sub-matrix of size
/// TileElementSize(k, k)
/// @param workspaces array of tiles used as workspace, with at least one tile per worker (see
/// get_tfactor_num_workers), each tile should have the same size as @param tile_t
/// get_tfactor_num_workers), each tile should be at least of size TileElementSize(k, k)
/// @param mpi_col_task_chain where internal communications are issued
///
/// @pre reflectors in hh_panel are well formed (1s on the diagonal and 0s in the upper part)
/// @pre hh_panel.getWidth() <= t.get().size().rows && hh_panel.size().getWidth() <= t.get().size().cols()
/// @pre hh_panel.getWidth() <= t.get().size().rows && hh_panel.getWidth() <= t.get().size().cols()
template <Backend backend, Device device, class T>
void computeTFactor(matrix::Panel<Coord::Col, T, device>& hh_panel,
matrix::ReadOnlyTileSender<T, Device::CPU> taus,
Expand Down
7 changes: 2 additions & 5 deletions include/dlaf/factorization/qr/t_factor_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,6 @@ struct Helpers<Backend::MC, Device::CPU, T> {
(worker_id == 0 ? tile_t : workspaces[worker_id - 1])
.subTileReference({{0, 0}, tile_t.size()});

DLAF_ASSERT(equal_size(ws_worker, tile_t), ws_worker.size(), tile_t.size());

tile::internal::set0<T>(ws_worker);
lapack::lacpy(blas::Uplo::General, 1, k, taus.get().ptr(), 1, ws_worker.ptr(),
ws_worker.ld() + 1);
Expand Down Expand Up @@ -296,10 +294,9 @@ struct Helpers<Backend::GPU, Device::GPU, T> {
di::Policy<Backend::GPU>(thread_priority::high),
[k](cublasHandle_t handle, auto&& hh_tiles, auto&& taus,
matrix::Tile<T, Device::GPU>& tile_t_full) {
matrix::Tile<T, Device::GPU> tile_t = tile_t_full.subTileReference({{0, 0}, {k, k}});

DLAF_ASSERT_MODERATE(k == taus.size().rows(), k, taus.size().rows());
DLAF_ASSERT(tile_t.size() == TileElementSize(k, k), tile_t.size(), k);

matrix::Tile<T, Device::GPU> tile_t = tile_t_full.subTileReference({{0, 0}, {k, k}});

// Note:
// prepare the diagonal of taus in t and reset the rest
Expand Down
4 changes: 2 additions & 2 deletions src/init.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -349,8 +349,8 @@ pika::program_options::options_description getOptionsDescription() {

// Tune parameters command line options
desc.add_options()("dlaf:tfactor-num-threads", pika::program_options::value<std::size_t>(), "The maximum number of threads to use for computing the tfactor.");
desc.add_options()("dlaf:tfactor-num-streams", pika::program_options::value<std::size_t>(), "The maximum number of threads to use for computing the tfactor.");
desc.add_options()("dlaf:tfactor-barrier-busy-wait-us", pika::program_options::value<std::size_t>(), "The duration in microseconds to busy-wait in barriers in the tfactor t algorithm.");
desc.add_options()("dlaf:tfactor-num-streams", pika::program_options::value<std::size_t>(), "The maximum number of GPU streams to use for computing the tfactor.");
desc.add_options()("dlaf:tfactor-barrier-busy-wait-us", pika::program_options::value<std::size_t>(), "The duration in microseconds to busy-wait in barriers in the tfactor algorithm.");
desc.add_options()("dlaf:red2band-panel-nworkers", pika::program_options::value<std::size_t>(), "The maximum number of threads to use for computing the panel in the reduction to band algorithm.");
desc.add_options()("dlaf:red2band-barrier-busy-wait-us", pika::program_options::value<std::size_t>(), "The duration in microseconds to busy-wait in barriers in the reduction to band algorithm.");
desc.add_options()("dlaf:eigensolver-min-band", pika::program_options::value<SizeType>(), "The minimum value to start looking for a divisor of the block size. When larger than the block size, the block size will be used instead.");
Expand Down

0 comments on commit aaba87f

Please sign in to comment.