From 5f2379f23f826877cf22c12453fcc55ce2d68361 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Sun, 1 Oct 2023 14:19:44 +0200 Subject: [PATCH 01/28] Add batch dense base class, core and kernels Co-authored-by: Aditya Kashi --- core/CMakeLists.txt | 1 + core/base/batch_struct.hpp | 76 +++ core/device_hooks/common_kernels.inc.cpp | 10 + core/matrix/batch_dense.cpp | 203 ++++++++ core/matrix/batch_dense_kernels.hpp | 81 +++ core/test/matrix/batch_dense.cpp | 520 +++++++++++++++++++ cuda/CMakeLists.txt | 1 + cuda/matrix/batch_dense_kernels.cu | 90 ++++ dpcpp/CMakeLists.txt | 1 + dpcpp/matrix/batch_dense_kernels.dp.cpp | 83 +++ hip/CMakeLists.txt | 1 + hip/matrix/batch_dense_kernels.hip.cpp | 94 ++++ include/ginkgo/core/matrix/batch_dense.hpp | 341 ++++++++++++ omp/CMakeLists.txt | 1 + omp/matrix/batch_dense_kernels.cpp | 129 +++++ reference/CMakeLists.txt | 1 + reference/base/batch_struct.hpp | 28 + reference/matrix/batch_dense_kernels.cpp | 128 +++++ reference/matrix/batch_dense_kernels.hpp.inc | 88 ++++ 19 files changed, 1877 insertions(+) create mode 100644 core/matrix/batch_dense.cpp create mode 100644 core/matrix/batch_dense_kernels.hpp create mode 100644 core/test/matrix/batch_dense.cpp create mode 100644 cuda/matrix/batch_dense_kernels.cu create mode 100644 dpcpp/matrix/batch_dense_kernels.dp.cpp create mode 100644 hip/matrix/batch_dense_kernels.hip.cpp create mode 100644 include/ginkgo/core/matrix/batch_dense.hpp create mode 100644 omp/matrix/batch_dense_kernels.cpp create mode 100644 reference/matrix/batch_dense_kernels.cpp create mode 100644 reference/matrix/batch_dense_kernels.hpp.inc diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index 7932976d6c9..46ea67abc65 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -39,6 +39,7 @@ target_sources(ginkgo log/vtune.cpp log/record.cpp log/stream.cpp + matrix/batch_dense.cpp matrix/coo.cpp matrix/csr.cpp matrix/dense.cpp diff --git a/core/base/batch_struct.hpp b/core/base/batch_struct.hpp index caca4577cf7..21bd5b0e8ea 100644 --- a/core/base/batch_struct.hpp +++ b/core/base/batch_struct.hpp @@ -81,6 +81,46 @@ struct uniform_batch { } // namespace multi_vector +namespace batch_dense { + + +/** + * Encapsulates one matrix from a batch of multi-vectors. + */ +template +struct batch_item { + using value_type = ValueType; + ValueType* values; + int stride; + int num_rows; + int num_rhs; +}; + + +/** + * A 'simple' structure to store a global uniform batch of multi-vectors. + */ +template +struct uniform_batch { + using value_type = ValueType; + using entry_type = batch_item; + + ValueType* values; + size_type num_batch_items; + int stride; + int num_rows; + int num_rhs; + + size_type get_entry_storage() const + { + return num_rows * stride * sizeof(value_type); + } +}; + + +} // namespace batch_dense + + template GKO_ATTRIBUTES GKO_INLINE multi_vector::batch_item to_const( const multi_vector::batch_item& b) @@ -97,6 +137,22 @@ GKO_ATTRIBUTES GKO_INLINE multi_vector::uniform_batch to_const( } +template +GKO_ATTRIBUTES GKO_INLINE matrix::batch_dense::batch_item +to_const(const matrix::batch_dense::batch_item& b) +{ + return {b.values, b.stride, b.num_rows, b.num_rhs}; +} + + +template +GKO_ATTRIBUTES GKO_INLINE matrix::batch_dense::uniform_batch +to_const(const matrix::batch_dense::uniform_batch& ub) +{ + return {ub.values, ub.num_batch_items, ub.stride, ub.num_rows, ub.num_rhs}; +} + + /** * Extract one object (matrix, vector etc.) from a batch of objects * @@ -126,6 +182,26 @@ extract_batch_item(ValueType* const batch_values, const int stride, } +template +GKO_ATTRIBUTES GKO_INLINE matrix::batch_dense::batch_item +extract_batch_item(const matrix::batch_dense::uniform_batch& batch, + const size_type batch_idx) +{ + return {batch.values + batch_idx * batch.stride * batch.num_rows, + batch.stride, batch.num_rows, batch.num_rhs}; +} + +template +GKO_ATTRIBUTES GKO_INLINE matrix::batch_dense::batch_item +extract_batch_item(ValueType* const batch_values, const int stride, + const int num_rows, const int num_rhs, + const size_type batch_idx) +{ + return {batch_values + batch_idx * stride * num_rows, stride, num_rows, + num_rhs}; +} + + } // namespace batch } // namespace gko diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index c8bbd2e0a31..c22f5cd968d 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -299,6 +299,16 @@ GKO_STUB_VALUE_TYPE(GKO_DECLARE_BATCH_MULTI_VECTOR_COPY_KERNEL); } // namespace batch_multi_vector +namespace batch_dense { + + +GKO_STUB_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL); +GKO_STUB_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_ADVANCED_APPLY_KERNEL); + + +} // namespace batch_dense + + namespace dense { diff --git a/core/matrix/batch_dense.cpp b/core/matrix/batch_dense.cpp new file mode 100644 index 00000000000..e6dedcf11fd --- /dev/null +++ b/core/matrix/batch_dense.cpp @@ -0,0 +1,203 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include +#include + + +#include +#include +#include +#include +#include +#include + + +#include "core/matrix/batch_dense_kernels.hpp" + + +namespace gko { +namespace batch { +namespace matrix { +namespace dense { + + +GKO_REGISTER_OPERATION(simple_apply, batch_dense::simple_apply); +GKO_REGISTER_OPERATION(advanced_apply, batch_dense::advanced_apply); + + +} // namespace dense + + +namespace detail { + + +template +batch_dim<2> compute_batch_size( + const std::vector*>& matrices) +{ + auto common_size = matrices[0]->get_size(); + for (size_type i = 1; i < matrices.size(); ++i) { + GKO_ASSERT_EQUAL_DIMENSIONS(common_size, matrices[i]->get_size()); + } + return batch_dim<2>{matrices.size(), common_size}; +} + + +} // namespace detail + + +template +std::unique_ptr> +BatchDense::create_view_for_item(size_type item_id) +{ + auto exec = this->get_executor(); + auto num_rows = this->get_common_size()[0]; + auto stride = this->get_common_size()[1]; + auto mat = unbatch_type::create( + exec, this->get_common_size(), + make_array_view(exec, num_rows * stride, + this->get_values_for_item(item_id)), + stride); + return mat; +} + + +template +std::unique_ptr> +BatchDense::create_const_view_for_item(size_type item_id) const +{ + auto exec = this->get_executor(); + auto num_rows = this->get_common_size()[0]; + auto stride = this->get_common_size()[1]; + auto mat = unbatch_type::create_const( + exec, this->get_common_size(), + make_const_array_view(exec, num_rows * stride, + this->get_const_values_for_item(item_id)), + stride); + return mat; +} + + +template +std::unique_ptr> +BatchDense::create_with_config_of(ptr_param other) +{ + // De-referencing `other` before calling the functions (instead of + // using operator `->`) is currently required to be compatible with + // CUDA 10.1. + // Otherwise, it results in a compile error. + return (*other).create_with_same_config(); +} + + +template +void BatchDense::set_size(const batch_dim<2>& value) noexcept +{ + batch_size_ = value; +} + + +template +std::unique_ptr> +BatchDense::create_with_same_config() const +{ + return BatchDense::create(this->get_executor(), + this->get_size()); +} + + +inline const batch_dim<2> get_col_sizes(const batch_dim<2>& sizes) +{ + return batch_dim<2>(sizes.get_num_batch_items(), + dim<2>(1, sizes.get_common_size()[1])); +} + + +template +void BatchDense::apply_impl(const MultiVector* b, + MultiVector* x) const +{ + GKO_ASSERT_EQUAL_DIMENSIONS(b->get_common_size(), x->get_common_size()); + GKO_ASSERT_EQ(b->get_num_batch_items(), this->get_num_batch_items()); + GKO_ASSERT_CONFORMANT(this->get_common_size(), b->get_common_size()); + GKO_ASSERT_EQ(this->get_num_batch_items(), x->get_num_batch_items()); + GKO_ASSERT_CONFORMANT(this->get_common_size(), x->get_common_size()); + this->get_executor()->run(batch_dense::make_simple_apply(this, b, x)); +} + + +template +void BatchDense::apply_impl(const MultiVector* alpha, + const MultiVector* b, + const MultiVector* beta, + MultiVector* x) const +{ + GKO_ASSERT_EQUAL_DIMENSIONS(b->get_common_size(), x->get_common_size()); + GKO_ASSERT_EQ(b->get_num_batch_items(), this->get_num_batch_items()); + GKO_ASSERT_CONFORMANT(this->get_common_size(), b->get_common_size()); + GKO_ASSERT_EQ(this->get_num_batch_items(), x->get_num_batch_items()); + GKO_ASSERT_CONFORMANT(this->get_common_size(), x->get_common_size()); + GKO_ASSERT_EQUAL_COLS(alpha->get_common_size(), gko::dim<2>(1, 1)); + GKO_ASSERT_EQUAL_COLS(beta->get_common_size(), gko::dim<2>(1, 1)); + this->get_executor()->run( + batch_dense::make_advanced_apply(alpha, this, b, beta, x)); +} + + +template +void BatchDense::convert_to( + BatchDense>* result) const +{ + result->values_ = this->values_; + result->set_size(this->get_size()); +} + + +template +void BatchDense::move_to( + BatchDense>* result) +{ + this->convert_to(result); +} + + +#define GKO_DECLARE_BATCH_DENSE_MATRIX(_type) class BatchDense<_type> +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_MATRIX); + + +} // namespace matrix +} // namespace batch +} // namespace gko diff --git a/core/matrix/batch_dense_kernels.hpp b/core/matrix/batch_dense_kernels.hpp new file mode 100644 index 00000000000..e801d7aa152 --- /dev/null +++ b/core/matrix/batch_dense_kernels.hpp @@ -0,0 +1,81 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_CORE_MATRIX_BATCH_DENSE_KERNELS_HPP_ +#define GKO_CORE_MATRIX_BATCH_DENSE_KERNELS_HPP_ + + +#include + + +#include +#include +#include + + +namespace gko { +namespace kernels { + + +#define GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL(_type) \ + void simple_apply(std::shared_ptr exec, \ + const batch::matrix::BatchDense<_type>* a, \ + const batch::MultiVector<_type>* b, \ + MultiVector<_type>* c) + +#define GKO_DECLARE_BATCH_DENSE_ADVANCED_APPLY_KERNEL(_type) \ + void advanced_apply(std::shared_ptr exec, \ + const batch::MultiVector<_type>* alpha, \ + const batch::matrix::BatchDense<_type>* a, \ + const batch::MultiVector<_type>* b, \ + const batch::MultiVector<_type>* beta, \ + batch::MultiVector<_type>* c) + +#define GKO_DECLARE_ALL_AS_TEMPLATES \ + template \ + GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL(ValueType); \ + template \ + GKO_DECLARE_BATCH_DENSE_ADVANCED_APPLY_KERNEL(ValueType) + + +GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(batch_dense, + GKO_DECLARE_ALL_AS_TEMPLATES); + + +#undef GKO_DECLARE_ALL_AS_TEMPLATES + + +} // namespace kernels +} // namespace gko + + +#endif // GKO_CORE_MATRIX_BATCH_DENSE_KERNELS_HPP_ diff --git a/core/test/matrix/batch_dense.cpp b/core/test/matrix/batch_dense.cpp new file mode 100644 index 00000000000..7db7469baf6 --- /dev/null +++ b/core/test/matrix/batch_dense.cpp @@ -0,0 +1,520 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include +#include +#include + + +#include "core/test/utils.hpp" + + +namespace { + + +template +class BatchDense : public ::testing::Test { +protected: + using value_type = T; + using DenseMtx = gko::matrix::Dense; + using size_type = gko::size_type; + BatchDense() + : exec(gko::ReferenceExecutor::create()), + mtx(gko::batch_initialize>( + std::vector{4, 3}, + {{{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, + {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}}, + exec)) + {} + + + static void assert_equal_to_original_mtx( + gko::matrix::BatchDense* m) + { + ASSERT_EQ(m->get_num_batch_entries(), 2); + ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 3)); + ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 3)); + ASSERT_EQ(m->get_stride().at(0), 4); + ASSERT_EQ(m->get_stride().at(1), 3); + ASSERT_EQ(m->get_num_stored_elements(), (2 * 4) + (2 * 3)); + ASSERT_EQ(m->get_num_stored_elements(0), 2 * 4); + ASSERT_EQ(m->get_num_stored_elements(1), 2 * 3); + EXPECT_EQ(m->at(0, 0, 0), value_type{-1.0}); + EXPECT_EQ(m->at(0, 0, 1), value_type{2.0}); + EXPECT_EQ(m->at(0, 0, 2), value_type{3.0}); + EXPECT_EQ(m->at(0, 1, 0), value_type{-1.5}); + EXPECT_EQ(m->at(0, 1, 1), value_type{2.5}); + ASSERT_EQ(m->at(0, 1, 2), value_type{3.5}); + EXPECT_EQ(m->at(1, 0, 0), value_type{1.0}); + EXPECT_EQ(m->at(1, 0, 1), value_type{2.5}); + EXPECT_EQ(m->at(1, 0, 2), value_type{3.0}); + EXPECT_EQ(m->at(1, 1, 0), value_type{1.0}); + EXPECT_EQ(m->at(1, 1, 1), value_type{2.0}); + ASSERT_EQ(m->at(1, 1, 2), value_type{3.0}); + } + + static void assert_empty(gko::matrix::BatchDense* m) + { + ASSERT_EQ(m->get_num_batch_entries(), 0); + ASSERT_EQ(m->get_num_stored_elements(), 0); + } + + std::shared_ptr exec; + std::unique_ptr> mtx; +}; + +TYPED_TEST_SUITE(BatchDense, gko::test::ValueTypes); + + +TYPED_TEST(BatchDense, CanBeEmpty) +{ + auto empty = gko::matrix::BatchDense::create(this->exec); + this->assert_empty(empty.get()); +} + + +TYPED_TEST(BatchDense, ReturnsNullValuesArrayWhenEmpty) +{ + auto empty = gko::matrix::BatchDense::create(this->exec); + ASSERT_EQ(empty->get_const_values(), nullptr); +} + + +TYPED_TEST(BatchDense, CanBeConstructedWithSize) +{ + using size_type = gko::size_type; + auto m = gko::matrix::BatchDense::create( + this->exec, + std::vector>{gko::dim<2>{2, 4}, gko::dim<2>{2, 3}}); + + ASSERT_EQ(m->get_num_batch_entries(), 2); + ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 4)); + ASSERT_EQ(m->get_size().at(1), gko::dim<2>(2, 3)); + EXPECT_EQ(m->get_stride().at(0), 4); + EXPECT_EQ(m->get_stride().at(1), 3); + ASSERT_EQ(m->get_num_stored_elements(), 14); + ASSERT_EQ(m->get_num_stored_elements(0), 8); + ASSERT_EQ(m->get_num_stored_elements(1), 6); +} + + +TYPED_TEST(BatchDense, CanBeConstructedWithSizeAndStride) +{ + using size_type = gko::size_type; + auto m = gko::matrix::BatchDense::create( + this->exec, std::vector>{gko::dim<2>{2, 3}}, + std::vector{4}); + + ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 3)); + EXPECT_EQ(m->get_stride().at(0), 4); + ASSERT_EQ(m->get_num_stored_elements(), 8); +} + + +TYPED_TEST(BatchDense, CanBeConstructedFromExistingData) +{ + using value_type = typename TestFixture::value_type; + using size_type = gko::size_type; + // clang-format off + value_type data[] = { + 1.0, 2.0, -1.0, + 3.0, 4.0, -1.0, + 3.0, 5.0, 1.0, + 5.0, 6.0, -3.0}; + // clang-format on + + auto m = gko::matrix::BatchDense::create( + this->exec, + std::vector>{gko::dim<2>{2, 2}, gko::dim<2>{2, 2}}, + gko::array::view(this->exec, 12, data), + std::vector{3, 3}); + + ASSERT_EQ(m->get_const_values(), data); + ASSERT_EQ(m->at(0, 0, 1), value_type{2.0}); + ASSERT_EQ(m->at(0, 1, 2), value_type{-1.0}); + ASSERT_EQ(m->at(1, 0, 1), value_type{5.0}); + ASSERT_EQ(m->at(1, 1, 2), value_type{-3.0}); +} + + +TYPED_TEST(BatchDense, CanBeConstructedFromExistingConstData) +{ + using value_type = typename TestFixture::value_type; + using size_type = gko::size_type; + // clang-format off + const value_type data[] = { + 1.0, 2.0, -1.0, + 3.0, 4.0, -1.0, + 3.0, 5.0, 1.0, + 5.0, 6.0, -3.0}; + // clang-format on + + auto m = gko::matrix::BatchDense::create_const( + this->exec, + std::vector>{gko::dim<2>{2, 2}, gko::dim<2>{2, 2}}, + gko::array::const_view(this->exec, 12, data), + std::vector{3, 3}); + + ASSERT_EQ(m->get_const_values(), data); + ASSERT_EQ(m->at(0, 0, 1), value_type{2.0}); + ASSERT_EQ(m->at(0, 1, 2), value_type{-1.0}); + ASSERT_EQ(m->at(1, 0, 1), value_type{5.0}); + ASSERT_EQ(m->at(1, 1, 2), value_type{-3.0}); +} + + +TYPED_TEST(BatchDense, CanBeConstructedFromBatchDenseMatrices) +{ + using value_type = typename TestFixture::value_type; + using DenseMtx = typename TestFixture::DenseMtx; + using size_type = gko::size_type; + auto mat1 = gko::initialize( + 3, {{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, this->exec); + auto mat2 = gko::initialize({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, + this->exec); + + auto m = gko::matrix::BatchDense::create( + this->exec, std::vector{mat1.get(), mat2.get()}); + auto m_ref = gko::matrix::BatchDense::create( + this->exec, std::vector{mat1.get(), mat2.get(), mat1.get(), + mat2.get(), mat1.get(), mat2.get()}); + auto m2 = + gko::matrix::BatchDense::create(this->exec, 3, m.get()); + + GKO_ASSERT_BATCH_MTX_NEAR(m2.get(), m_ref.get(), 1e-14); +} + + +TYPED_TEST(BatchDense, CanBeConstructedFromDenseMatricesByDuplication) +{ + using value_type = typename TestFixture::value_type; + using DenseMtx = typename TestFixture::DenseMtx; + using size_type = gko::size_type; + auto mat1 = gko::initialize( + 4, {{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, this->exec); + auto mat2 = gko::initialize({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, + this->exec); + + auto bat_m = gko::matrix::BatchDense::create( + this->exec, std::vector{mat1.get(), mat1.get(), mat1.get()}); + auto m = + gko::matrix::BatchDense::create(this->exec, 3, mat1.get()); + + GKO_ASSERT_BATCH_MTX_NEAR(bat_m.get(), m.get(), 1e-14); +} + + +TYPED_TEST(BatchDense, CanBeConstructedFromDenseMatrices) +{ + using value_type = typename TestFixture::value_type; + using DenseMtx = typename TestFixture::DenseMtx; + using size_type = gko::size_type; + auto mat1 = gko::initialize( + 4, {{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, this->exec); + auto mat2 = gko::initialize({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, + this->exec); + + auto m = gko::matrix::BatchDense::create( + this->exec, std::vector{mat1.get(), mat2.get()}); + + this->assert_equal_to_original_mtx(m.get()); +} + + +TYPED_TEST(BatchDense, CanBeUnbatchedIntoDenseMatrices) +{ + using value_type = typename TestFixture::value_type; + using DenseMtx = typename TestFixture::DenseMtx; + using size_type = gko::size_type; + auto mat1 = gko::initialize( + 4, {{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, this->exec); + auto mat2 = gko::initialize({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, + this->exec); + + auto dense_mats = this->mtx->unbatch(); + + + GKO_ASSERT_MTX_NEAR(dense_mats[0].get(), mat1.get(), 0.); + GKO_ASSERT_MTX_NEAR(dense_mats[1].get(), mat2.get(), 0.); +} + + +TYPED_TEST(BatchDense, KnowsItsSizeAndValues) +{ + this->assert_equal_to_original_mtx(this->mtx.get()); +} + + +TYPED_TEST(BatchDense, CanBeListConstructed) +{ + using value_type = typename TestFixture::value_type; + auto m = gko::batch_initialize>( + {{1.0, 2.0}, {1.0, 3.0}}, this->exec); + + ASSERT_EQ(m->get_num_batch_entries(), 2); + ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 1)); + ASSERT_EQ(m->get_size().at(1), gko::dim<2>(2, 1)); + ASSERT_EQ(m->get_num_stored_elements(), 4); + EXPECT_EQ(m->at(0, 0), value_type{1}); + EXPECT_EQ(m->at(0, 1), value_type{2}); + EXPECT_EQ(m->at(1, 0), value_type{1}); + EXPECT_EQ(m->at(1, 1), value_type{3}); +} + + +TYPED_TEST(BatchDense, CanBeListConstructedWithstride) +{ + using value_type = typename TestFixture::value_type; + auto m = gko::batch_initialize>( + std::vector{2}, {{1.0, 2.0}}, this->exec); + ASSERT_EQ(m->get_num_batch_entries(), 1); + ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 1)); + ASSERT_EQ(m->get_num_stored_elements(), 4); + EXPECT_EQ(m->at(0, 0), value_type{1.0}); + EXPECT_EQ(m->at(0, 1), value_type{2.0}); +} + + +TYPED_TEST(BatchDense, CanBeListConstructedByCopies) +{ + using value_type = typename TestFixture::value_type; + auto m = gko::batch_initialize>( + 2, I({1.0, 2.0}), this->exec); + ASSERT_EQ(m->get_num_batch_entries(), 2); + ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 1)); + ASSERT_EQ(m->get_size().at(1), gko::dim<2>(2, 1)); + ASSERT_EQ(m->get_num_stored_elements(), 4); + EXPECT_EQ(m->at(0, 0, 0), value_type{1.0}); + EXPECT_EQ(m->at(0, 0, 1), value_type{2.0}); + EXPECT_EQ(m->at(1, 0, 0), value_type{1.0}); + EXPECT_EQ(m->at(1, 0, 1), value_type{2.0}); +} + + +TYPED_TEST(BatchDense, CanBeDoubleListConstructed) +{ + using value_type = typename TestFixture::value_type; + using T = value_type; + auto m = gko::batch_initialize>( + {{I{1.0, 1.0, 0.0}, I{2.0, 4.0, 3.0}, I{3.0, 6.0, 1.0}}, + {I{1.0, 2.0}, I{3.0, 4.0}, I{5.0, 6.0}}}, + this->exec); + + ASSERT_EQ(m->get_size().at(0), gko::dim<2>(3, 3)); + ASSERT_EQ(m->get_size().at(1), gko::dim<2>(3, 2)); + ASSERT_EQ(m->get_stride().at(0), 3); + ASSERT_EQ(m->get_stride().at(1), 2); + EXPECT_EQ(m->get_num_stored_elements(), 15); + ASSERT_EQ(m->get_num_stored_elements(0), 9); + ASSERT_EQ(m->get_num_stored_elements(1), 6); + EXPECT_EQ(m->at(0, 0), value_type{1.0}); + EXPECT_EQ(m->at(0, 1), value_type{1.0}); + EXPECT_EQ(m->at(0, 2), value_type{0.0}); + ASSERT_EQ(m->at(0, 3), value_type{2.0}); + EXPECT_EQ(m->at(0, 4), value_type{4.0}); + EXPECT_EQ(m->at(1, 0), value_type{1.0}); + EXPECT_EQ(m->at(1, 1), value_type{2.0}); + EXPECT_EQ(m->at(1, 2), value_type{3.0}); + ASSERT_EQ(m->at(1, 3), value_type{4.0}); + EXPECT_EQ(m->at(1, 4), value_type{5.0}); +} + + +TYPED_TEST(BatchDense, CanBeDoubleListConstructedWithstride) +{ + using value_type = typename TestFixture::value_type; + using T = value_type; + auto m = gko::batch_initialize>( + {4, 3}, + {{I{1.0, 1.0, 0.0}, I{2.0, 4.0, 3.0}, I{3.0, 6.0, 1.0}}, + {I{1.0, 2.0}, I{3.0, 4.0}, I{5.0, 6.0}}}, + this->exec); + + ASSERT_EQ(m->get_size().at(0), gko::dim<2>(3, 3)); + ASSERT_EQ(m->get_size().at(1), gko::dim<2>(3, 2)); + ASSERT_EQ(m->get_stride().at(0), 4); + ASSERT_EQ(m->get_stride().at(1), 3); + EXPECT_EQ(m->get_num_stored_elements(), 21); + ASSERT_EQ(m->get_num_stored_elements(0), 12); + ASSERT_EQ(m->get_num_stored_elements(1), 9); + EXPECT_EQ(m->at(0, 0), value_type{1.0}); + EXPECT_EQ(m->at(0, 1), value_type{1.0}); + EXPECT_EQ(m->at(0, 2), value_type{0.0}); + ASSERT_EQ(m->at(0, 3), value_type{2.0}); + EXPECT_EQ(m->at(0, 4), value_type{4.0}); + EXPECT_EQ(m->at(1, 0), value_type{1.0}); + EXPECT_EQ(m->at(1, 1), value_type{2.0}); + EXPECT_EQ(m->at(1, 2), value_type{3.0}); + ASSERT_EQ(m->at(1, 3), value_type{4.0}); + EXPECT_EQ(m->at(1, 4), value_type{5.0}); +} + + +TYPED_TEST(BatchDense, CanBeCopied) +{ + auto mtx_copy = gko::matrix::BatchDense::create(this->exec); + mtx_copy->copy_from(this->mtx.get()); + this->assert_equal_to_original_mtx(this->mtx.get()); + this->mtx->at(0, 0, 0) = 7; + this->mtx->at(0, 1) = 7; + this->assert_equal_to_original_mtx(mtx_copy.get()); +} + + +TYPED_TEST(BatchDense, CanBeMoved) +{ + auto mtx_copy = gko::matrix::BatchDense::create(this->exec); + mtx_copy->copy_from(std::move(this->mtx)); + this->assert_equal_to_original_mtx(mtx_copy.get()); +} + + +TYPED_TEST(BatchDense, CanBeCloned) +{ + auto mtx_clone = this->mtx->clone(); + this->assert_equal_to_original_mtx( + dynamic_castmtx.get())>(mtx_clone.get())); +} + + +TYPED_TEST(BatchDense, CanBeCleared) +{ + this->mtx->clear(); + this->assert_empty(this->mtx.get()); +} + + +TYPED_TEST(BatchDense, CanBeReadFromMatrixData) +{ + using value_type = typename TestFixture::value_type; + auto m = gko::matrix::BatchDense::create(this->exec); + // clang-format off + m->read({gko::matrix_data{{2, 3}, + {{0, 0, 1.0}, + {0, 1, 3.0}, + {0, 2, 2.0}, + {1, 0, 0.0}, + {1, 1, 5.0}, + {1, 2, 0.0}}}, + gko::matrix_data{{2, 2}, + {{0, 0, -1.0}, + {0, 1, 0.5}, + {1, 0, 0.0}, + {1, 1, 9.0}}}}); + // clang-format on + + ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 3)); + ASSERT_EQ(m->get_size().at(1), gko::dim<2>(2, 2)); + ASSERT_EQ(m->get_num_stored_elements(), 10); + ASSERT_EQ(m->get_num_stored_elements(0), 6); + ASSERT_EQ(m->get_num_stored_elements(1), 4); + EXPECT_EQ(m->at(0, 0, 0), value_type{1.0}); + EXPECT_EQ(m->at(0, 1, 0), value_type{0.0}); + EXPECT_EQ(m->at(0, 0, 1), value_type{3.0}); + EXPECT_EQ(m->at(0, 1, 1), value_type{5.0}); + EXPECT_EQ(m->at(0, 0, 2), value_type{2.0}); + EXPECT_EQ(m->at(0, 1, 2), value_type{0.0}); + EXPECT_EQ(m->at(1, 0, 0), value_type{-1.0}); + EXPECT_EQ(m->at(1, 0, 1), value_type{0.5}); + EXPECT_EQ(m->at(1, 1, 0), value_type{0.0}); + EXPECT_EQ(m->at(1, 1, 1), value_type{9.0}); +} + + +TYPED_TEST(BatchDense, GeneratesCorrectMatrixData) +{ + using value_type = typename TestFixture::value_type; + using tpl = typename gko::matrix_data::nonzero_type; + std::vector> data; + + this->mtx->write(data); + + ASSERT_EQ(data[0].size, gko::dim<2>(2, 3)); + ASSERT_EQ(data[0].nonzeros.size(), 6); + EXPECT_EQ(data[0].nonzeros[0], tpl(0, 0, value_type{-1.0})); + EXPECT_EQ(data[0].nonzeros[1], tpl(0, 1, value_type{2.0})); + EXPECT_EQ(data[0].nonzeros[2], tpl(0, 2, value_type{3.0})); + EXPECT_EQ(data[0].nonzeros[3], tpl(1, 0, value_type{-1.5})); + EXPECT_EQ(data[0].nonzeros[4], tpl(1, 1, value_type{2.5})); + EXPECT_EQ(data[0].nonzeros[5], tpl(1, 2, value_type{3.5})); + ASSERT_EQ(data[1].size, gko::dim<2>(2, 3)); + ASSERT_EQ(data[1].nonzeros.size(), 6); + EXPECT_EQ(data[1].nonzeros[0], tpl(0, 0, value_type{1.0})); + EXPECT_EQ(data[1].nonzeros[1], tpl(0, 1, value_type{2.5})); + EXPECT_EQ(data[1].nonzeros[2], tpl(0, 2, value_type{3.0})); + EXPECT_EQ(data[1].nonzeros[3], tpl(1, 0, value_type{1.0})); + EXPECT_EQ(data[1].nonzeros[4], tpl(1, 1, value_type{2.0})); + EXPECT_EQ(data[1].nonzeros[5], tpl(1, 2, value_type{3.0})); +} + + +TYPED_TEST(BatchDense, CanBeReadFromMatrixAssemblyData) +{ + using value_type = typename TestFixture::value_type; + auto m = gko::matrix::BatchDense::create(this->exec); + gko::matrix_assembly_data data1(gko::dim<2>{2, 3}); + data1.set_value(0, 0, 1.0); + data1.set_value(0, 1, 3.0); + data1.set_value(0, 2, 2.0); + data1.set_value(1, 0, 0.0); + data1.set_value(1, 1, 5.0); + data1.set_value(1, 2, 0.0); + gko::matrix_assembly_data data2(gko::dim<2>{2, 1}); + data2.set_value(0, 0, 2.0); + data2.set_value(1, 0, 5.0); + auto data = std::vector>{data1, data2}; + + m->read(data); + + ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 3)); + ASSERT_EQ(m->get_size().at(1), gko::dim<2>(2, 1)); + ASSERT_EQ(m->get_num_stored_elements(), 8); + ASSERT_EQ(m->get_num_stored_elements(0), 6); + ASSERT_EQ(m->get_num_stored_elements(1), 2); + EXPECT_EQ(m->at(0, 0, 0), value_type{1.0}); + EXPECT_EQ(m->at(0, 1, 0), value_type{0.0}); + EXPECT_EQ(m->at(0, 0, 1), value_type{3.0}); + EXPECT_EQ(m->at(0, 1, 1), value_type{5.0}); + EXPECT_EQ(m->at(0, 0, 2), value_type{2.0}); + ASSERT_EQ(m->at(0, 1, 2), value_type{0.0}); + EXPECT_EQ(m->at(1, 0, 0), value_type{2.0}); + EXPECT_EQ(m->at(1, 1, 0), value_type{5.0}); +} + + +} // namespace diff --git a/cuda/CMakeLists.txt b/cuda/CMakeLists.txt index 4c972d2a584..dfa1b2177ee 100644 --- a/cuda/CMakeLists.txt +++ b/cuda/CMakeLists.txt @@ -38,6 +38,7 @@ target_sources(ginkgo_cuda factorization/par_ilut_select_kernel.cu factorization/par_ilut_spgeam_kernel.cu factorization/par_ilut_sweep_kernel.cu + matrix/batch_dense_kernels.cu matrix/coo_kernels.cu ${CSR_INSTANTIATE} matrix/dense_kernels.cu diff --git a/cuda/matrix/batch_dense_kernels.cu b/cuda/matrix/batch_dense_kernels.cu new file mode 100644 index 00000000000..5e53a410bf0 --- /dev/null +++ b/cuda/matrix/batch_dense_kernels.cu @@ -0,0 +1,90 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/matrix/batch_dense_kernels.hpp" + + +#include + + +#include "core/matrix/batch_struct.hpp" +#include "cuda/base/config.hpp" +#include "cuda/base/cublas_bindings.hpp" +#include "cuda/base/pointer_mode_guard.hpp" +#include "cuda/components/cooperative_groups.cuh" +#include "cuda/components/reduction.cuh" +#include "cuda/components/thread_ids.cuh" +#include "cuda/components/uninitialized_array.hpp" +#include "cuda/matrix/batch_struct.hpp" + + +namespace gko { +namespace kernels { +namespace cuda { +/** + * @brief The BatchDense matrix format namespace. + * + * @ingroup batch_dense + */ +namespace batch_dense { + + +constexpr auto default_block_size = 256; +constexpr int sm_multiplier = 4; + + +template +void simple_apply(std::shared_ptr exec, + const batch::matrix::BatchDense* mat, + const batch::MultiVector* b, + MultiVector* x) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL); + + +template +void advanced_apply(std::shared_ptr exec, + const batch::MultiVector* alpha, + const batch::matrix::BatchDense* a, + const batch::MultiVector* b, + const batch::MultiVector* beta, + batch::MultiVector* c) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_BATCH_DENSE_ADVANCED_APPLY_KERNEL); + + +} // namespace batch_dense +} // namespace cuda +} // namespace kernels +} // namespace gko diff --git a/dpcpp/CMakeLists.txt b/dpcpp/CMakeLists.txt index dd0d7c4cdfb..4099bb603a3 100644 --- a/dpcpp/CMakeLists.txt +++ b/dpcpp/CMakeLists.txt @@ -35,6 +35,7 @@ target_sources(ginkgo_dpcpp factorization/par_ilut_select_kernel.dp.cpp factorization/par_ilut_spgeam_kernel.dp.cpp factorization/par_ilut_sweep_kernel.dp.cpp + matrix/batch_dense_kernels.dp.cpp matrix/coo_kernels.dp.cpp matrix/csr_kernels.dp.cpp matrix/fbcsr_kernels.dp.cpp diff --git a/dpcpp/matrix/batch_dense_kernels.dp.cpp b/dpcpp/matrix/batch_dense_kernels.dp.cpp new file mode 100644 index 00000000000..100dbf7e670 --- /dev/null +++ b/dpcpp/matrix/batch_dense_kernels.dp.cpp @@ -0,0 +1,83 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/matrix/batch_dense_kernels.hpp" + + +#include + + +#include + + +#include +#include +#include + + +namespace gko { +namespace kernels { +namespace dpcpp { +/** + * @brief The BatchDense matrix format namespace. + * + * @ingroup batch_dense + */ +namespace batch_dense { + + +template +void simple_apply(std::shared_ptr exec, + const batch::matrix::BatchDense* a, + const batch::MultiVector* b, + MultiVector* c) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL); + + +template +void advanced_apply(std::shared_ptr exec, + const batch::MultiVector* alpha, + const batch::matrix::BatchDense* a, + const batch::MultiVector* b, + const batch::MultiVector* beta, + batch::MultiVector* c) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_BATCH_DENSE_ADVANCED_APPLY_KERNEL); + + +} // namespace batch_dense +} // namespace dpcpp +} // namespace kernels +} // namespace gko diff --git a/hip/CMakeLists.txt b/hip/CMakeLists.txt index 779db13d36a..21b573b6cd0 100644 --- a/hip/CMakeLists.txt +++ b/hip/CMakeLists.txt @@ -35,6 +35,7 @@ set(GINKGO_HIP_SOURCES factorization/par_ilut_select_kernel.hip.cpp factorization/par_ilut_spgeam_kernel.hip.cpp factorization/par_ilut_sweep_kernel.hip.cpp + matrix/batch_dense_kernels.hip.cpp matrix/coo_kernels.hip.cpp ${CSR_INSTANTIATE} matrix/dense_kernels.hip.cpp diff --git a/hip/matrix/batch_dense_kernels.hip.cpp b/hip/matrix/batch_dense_kernels.hip.cpp new file mode 100644 index 00000000000..640f9c67b6a --- /dev/null +++ b/hip/matrix/batch_dense_kernels.hip.cpp @@ -0,0 +1,94 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/matrix/batch_dense_kernels.hpp" + + +#include + + +#include +#include + + +#include "core/matrix/batch_struct.hpp" +#include "hip/base/config.hip.hpp" +#include "hip/base/hipblas_bindings.hip.hpp" +#include "hip/base/pointer_mode_guard.hip.hpp" +#include "hip/components/cooperative_groups.hip.hpp" +#include "hip/components/reduction.hip.hpp" +#include "hip/components/thread_ids.hip.hpp" +#include "hip/components/uninitialized_array.hip.hpp" +#include "hip/matrix/batch_struct.hip.hpp" + + +namespace gko { +namespace kernels { +namespace hip { +/** + * @brief The BatchDense matrix format namespace. + * + * @ingroup batch_dense + */ +namespace batch_dense { + + +constexpr auto default_block_size = 256; +constexpr int sm_multiplier = 4; + + +template +void simple_apply(std::shared_ptr exec, + const batch::matrix::BatchDense* mat, + const batch::MultiVector* b, + MultiVector* x) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL); + + +template +void advanced_apply(std::shared_ptr exec, + const batch::MultiVector* alpha, + const batch::matrix::BatchDense* a, + const batch::MultiVector* b, + const batch::MultiVector* beta, + batch::MultiVector* c) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_BATCH_DENSE_ADVANCED_APPLY_KERNEL); + + +} // namespace batch_dense +} // namespace hip +} // namespace kernels +} // namespace gko diff --git a/include/ginkgo/core/matrix/batch_dense.hpp b/include/ginkgo/core/matrix/batch_dense.hpp new file mode 100644 index 00000000000..60023727c8a --- /dev/null +++ b/include/ginkgo/core/matrix/batch_dense.hpp @@ -0,0 +1,341 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_PUBLIC_CORE_MATRIX_BATCH_DENSE_HPP_ +#define GKO_PUBLIC_CORE_MATRIX_BATCH_DENSE_HPP_ + + +#include +#include + + +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace gko { +namespace batch { +namespace matrix { + + +/** + * BatchDense is a batch matrix format which explicitly stores all values of the + * matrix in each of the batches. + * + * The values in each of the batches are stored in row-major format (values + * belonging to the same row appear consecutive in the memory). Optionally, rows + * can be padded for better memory access. + * + * @tparam ValueType precision of matrix elements + * + * @note While this format is not very useful for storing sparse matrices, it + * is often suitable to store vectors, and sets of vectors. + * @ingroup batch_dense + * @ingroup mat_formats + * @ingroup BatchLinOp + */ +template +class BatchDense : public EnableBatchLinOp>, + public EnableCreateMethod>, + public ConvertibleTo>> { + friend class EnableCreateMethod; + friend class EnablePolymorphicObject; + friend class BatchDense>; + friend class BatchDense>; + +public: + using EnableBatchLinOp::convert_to; + using EnableBatchLinOp::move_to; + + using value_type = ValueType; + using index_type = int32; + using transposed_type = BatchDense; + using unbatch_type = matrix::Dense; + using absolute_type = remove_complex; + using complex_type = to_complex; + + /** + * Creates a BatchDense matrix with the configuration of another BatchDense + * matrix. + * + * @param other The other matrix whose configuration needs to copied. + */ + static std::unique_ptr create_with_config_of( + const BatchDense* other) + { + // De-referencing `other` before calling the functions (instead of + // using operator `->`) is currently required to be compatible with + // CUDA 10.1. + // Otherwise, it results in a compile error. + return (*other).create_with_same_config(); + } + + void convert_to( + BatchDense>* result) const override; + + void move_to(BatchDense>* result) override; + + + /** + * Creates a mutable view (of matrix::Dense type) of one item of the Batch + * MultiVector object. Does not perform any deep copies, but + * only returns a view of the data. + * + * @param item_id The index of the batch item + * + * @return a matrix::Dense object with the data from the batch item at the + * given index. + */ + std::unique_ptr create_view_for_item(size_type item_id); + + /** + * @copydoc create_view_for_item(size_type) + */ + std::unique_ptr create_const_view_for_item( + size_type item_id) const; + + /** + * Returns the batch size. + * + * @return the batch size + */ + batch_dim<2> get_size() const { return batch_size_; } + + /** + * Returns the number of batch items. + * + * @return the number of batch items + */ + size_type get_num_batch_items() const + { + return batch_size_.get_num_batch_items(); + } + + /** + * Returns the common size of the batch items. + * + * @return the common size stored + */ + dim<2> get_common_size() const { return batch_size_.get_common_size(); } + + /** + * Returns a pointer to the array of values of the multi-vector + * + * @return the pointer to the array of values + */ + value_type* get_values() noexcept { return values_.get_data(); } + + /** + * @copydoc get_values() + * + * @note This is the constant version of the function, which can be + * significantly more memory efficient than the non-constant version, + * so always prefer this version. + */ + const value_type* get_const_values() const noexcept + { + return values_.get_const_data(); + } + + /** + * Returns a pointer to the array of values of the multi-vector for a + * specific batch item. + * + * @param batch_id the id of the batch item. + * + * @return the pointer to the array of values + */ + value_type* get_values_for_item(size_type batch_id) noexcept + { + GKO_ASSERT(batch_id < this->get_num_batch_items()); + return values_.get_data() + + this->get_size().get_cumulative_offset(batch_id); + } + + /** + * @copydoc get_values_for_item(size_type) + * + * @note This is the constant version of the function, which can be + * significantly more memory efficient than the non-constant version, + * so always prefer this version. + */ + const value_type* get_const_values_for_item(size_type batch_id) const + noexcept + { + GKO_ASSERT(batch_id < this->get_num_batch_items()); + return values_.get_const_data() + + this->get_size().get_cumulative_offset(batch_id); + } + + /** + * Returns the number of elements explicitly stored in the batch matrix, + * cumulative across all the batch items. + * + * @return the number of elements explicitly stored in the vector, + * cumulative across all the batch items + */ + size_type get_num_stored_elements() const noexcept + { + return values_.get_num_elems(); + } + + + /** + * Creates a constant (immutable) batch dense matrix from a constant + * array. + * + * @param exec the executor to create the vector on + * @param size the dimensions of the vector + * @param values the value array of the vector + * + * @return A smart pointer to the constant multi-vector wrapping the input + * array (if it resides on the same executor as the vector) or a copy of the + * array on the correct executor. + */ + static std::unique_ptr> + create_const(std::shared_ptr exec, + const batch_dim<2>& sizes, + gko::detail::const_array_view&& values); + +private: + inline size_type compute_num_elems(const batch_dim<2>& size) + { + return size.get_cumulative_offset(size.get_num_batch_items()); + } + + + void apply(const MultiVector* b, + MultiVector* x) const + { + this->apply_impl(b, x); + } + + void apply(const MultiVector* alpha, + const MultiVector* b, + const MultiVector* beta, + MultiVector* x) const + { + this->apply_impl(alpha, b, beta, x); + } + +protected: + /** + * Sets the size of the MultiVector. + * + * @param value the new size of the operator + */ + void set_size(const batch_dim<2>& value) noexcept; + + /** + * Creates an uninitialized BatchDense matrix of the specified size. + * + * @param exec Executor associated to the matrix + * @param size size of the matrix + */ + BatchDense(std::shared_ptr exec, + const batch_dim<2>& size = batch_dim<2>{}); + + /** + * Creates a BatchDense matrix from an already allocated (and initialized) + * array. + * + * @tparam ValuesArray type of array of values + * + * @param exec Executor associated to the matrix + * @param size sizes of the batch matrices in a batch_dim object + * @param values array of matrix values + * @param strides stride of the rows (i.e. offset between the first + * elements of two consecutive rows, expressed as the + * number of matrix elements) + * + * @note If `values` is not an rvalue, not an array of ValueType, or is on + * the wrong executor, an internal copy will be created, and the + * original array data will not be used in the matrix. + */ + template + BatchDense(std::shared_ptr exec, const batch_dim<2>& size, + ValuesArray&& values) + : EnableBatchLinOp(exec, size), + values_{exec, std::forward(values)} + { + // Ensure that the values array has the correct size + auto num_elems = compute_num_elems(size); + GKO_ENSURE_IN_BOUNDS(num_elems, values_.get_num_elems() + 1); + } + + /** + * Creates a BatchDense matrix with the same configuration as the callers + * matrix. + * + * @returns a BatchDense matrix with the same configuration as the caller. + */ + std::unique_ptr create_with_same_config() const; + + virtual void apply_impl(const MultiVector* b, + MultiVector* x) const; + + virtual void apply_impl(const MultiVector* alpha, + const MultiVector* b, + const MultiVector* beta, + MultiVector* x) const; + + size_type linearize_index(size_type batch, size_type row, + size_type col) const noexcept + { + return batch_size_.get_cumulative_offset(batch) + + row * batch_size_.get_common_size()[1] + col; + } + + size_type linearize_index(size_type batch, size_type idx) const noexcept + { + return linearize_index(batch, idx / this->get_common_size()[1], + idx % this->get_common_size()[1]); + } + +private: + batch_dim<2> batch_size_; + array values_; +}; + + +} // namespace matrix +} // namespace batch +} // namespace gko + + +#endif // GKO_PUBLIC_CORE_MATRIX_BATCH_DENSE_HPP_ diff --git a/omp/CMakeLists.txt b/omp/CMakeLists.txt index 47259feeac0..d87399492f5 100644 --- a/omp/CMakeLists.txt +++ b/omp/CMakeLists.txt @@ -23,6 +23,7 @@ target_sources(ginkgo_omp factorization/par_ict_kernels.cpp factorization/par_ilu_kernels.cpp factorization/par_ilut_kernels.cpp + matrix/batch_dense_kernels.cpp matrix/coo_kernels.cpp matrix/csr_kernels.cpp matrix/dense_kernels.cpp diff --git a/omp/matrix/batch_dense_kernels.cpp b/omp/matrix/batch_dense_kernels.cpp new file mode 100644 index 00000000000..ea2e84a8e83 --- /dev/null +++ b/omp/matrix/batch_dense_kernels.cpp @@ -0,0 +1,129 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/matrix/batch_dense_kernels.hpp" + + +#include + + +#include +#include + + +#include "reference/matrix/batch_struct.hpp" + + +namespace gko { +namespace kernels { +namespace omp { +/** + * @brief The BatchDense matrix format namespace. + * @ref BatchDense + * @ingroup batch_dense + */ +namespace batch_dense { + + +#include "reference/matrix/batch_dense_kernels.hpp.inc" + + +template +void simple_apply(std::shared_ptr exec, + const batch::matrix::BatchDense* mat, + const batch::MultiVector* b, + MultiVector* x) +{ + const auto b_ub = host::get_batch_struct(b); + const auto x_ub = host::get_batch_struct(x); + const auto mat_ub = host::get_batch_struct(mat); +#pragma omp parallel for + for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) { + const auto mat_item = batch::extract_batch_item(mat_ub, batch); + const auto b_item = batch::extract_batch_item(b_ub, batch); + const auto x_item = batch::extract_batch_item(x_ub, batch); + simple_apply_kernel(mat_item, b_item, x_item); + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL); + + +template +void advanced_apply(std::shared_ptr exec, + const batch::MultiVector* alpha, + const batch::matrix::BatchDense* a, + const batch::MultiVector* b, + const batch::MultiVector* beta, + batch::MultiVector* c) +{ + const auto b_ub = host::get_batch_struct(b); + const auto x_ub = host::get_batch_struct(x); + const auto mat_ub = host::get_batch_struct(mat); + const auto alpha_ub = host::get_batch_struct(alpha); + const auto beta_ub = host::get_batch_struct(beta); + if (alpha->get_num_batch_items() > 1) { + GKO_ASSERT(alpha->get_num_batch_items() == x->get_num_batch_items()); + GKO_ASSERT(beta->get_num_batch_items() == x->get_num_batch_items()); +#pragma omp parallel for + for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) { + const auto mat_item = batch::extract_batch_item(mat_ub, batch); + const auto b_item = batch::extract_batch_item(b_ub, batch); + const auto x_item = batch::extract_batch_item(x_ub, batch); + const auto alpha_item = batch::extract_batch_item(alpha_ub, batch); + const auto beta_item = batch::extract_batch_item(beta_ub, batch); + advanced_apply_kernel(alpha_item.values[0], mat_item, b_item, + beta_item.values[0], x_item); + } + } else { + const auto alpha_item = batch::extract_batch_item(alpha_ub, 0); + const auto beta_item = batch::extract_batch_item(beta_ub, 0); +#pragma omp parallel for + for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) { + const auto mat_item = batch::extract_batch_item(mat_ub, batch); + const auto b_item = batch::extract_batch_item(b_ub, batch); + const auto x_item = batch::extract_batch_item(x_ub, batch); + advanced_apply_kernel(alpha_item.values[0], mat_item, b_item, + beta_item.values[0], x_item); + } + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_BATCH_DENSE_ADVANCED_APPLY_KERNEL); + + +} // namespace batch_dense +} // namespace omp +} // namespace kernels +} // namespace gko diff --git a/reference/CMakeLists.txt b/reference/CMakeLists.txt index dd54e3fb52f..37498588ca7 100644 --- a/reference/CMakeLists.txt +++ b/reference/CMakeLists.txt @@ -25,6 +25,7 @@ target_sources(ginkgo_reference factorization/par_ict_kernels.cpp factorization/par_ilu_kernels.cpp factorization/par_ilut_kernels.cpp + matrix/batch_dense_kernels.cpp matrix/coo_kernels.cpp matrix/csr_kernels.cpp matrix/dense_kernels.cpp diff --git a/reference/base/batch_struct.hpp b/reference/base/batch_struct.hpp index ce7c7af5605..b30fa971ed7 100644 --- a/reference/base/batch_struct.hpp +++ b/reference/base/batch_struct.hpp @@ -87,6 +87,34 @@ inline batch::multi_vector::uniform_batch get_batch_struct( } +/** + * Generates an immutable uniform batch struct from a batch of multi-vectors. + */ +template +inline batch::matrix::batch_dense::uniform_batch +get_batch_struct(const batch::matrix::BatchDense* const op) +{ + return {op->get_const_values(), op->get_num_batch_items(), + static_cast(op->get_common_size()[1]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1])}; +} + + +/** + * Generates a uniform batch struct from a batch of multi-vectors. + */ +template +inline batch::matrix::batch_dense::uniform_batch get_batch_struct( + batch::matrix::BatchDense* const op) +{ + return {op->get_values(), op->get_num_batch_items(), + static_cast(op->get_common_size()[1]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1])}; +} + + } // namespace host } // namespace kernels } // namespace gko diff --git a/reference/matrix/batch_dense_kernels.cpp b/reference/matrix/batch_dense_kernels.cpp new file mode 100644 index 00000000000..aa285a6b01b --- /dev/null +++ b/reference/matrix/batch_dense_kernels.cpp @@ -0,0 +1,128 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/matrix/batch_dense_kernels.hpp" + + +#include + + +#include +#include +#include + + +#include "core/matrix/batch_struct.hpp" +#include "reference/matrix/batch_struct.hpp" + + +namespace gko { +namespace kernels { +namespace reference { +/** + * @brief The BatchDense matrix format namespace. + * @ref BatchDense + * @ingroup batch_dense + */ +namespace batch_dense { + + +#include "reference/matrix/batch_dense_kernels.hpp.inc" + + +template +void simple_apply(std::shared_ptr exec, + const batch::matrix::BatchDense* mat, + const batch::MultiVector* b, + MultiVector* x) +{ + const auto b_ub = host::get_batch_struct(b); + const auto x_ub = host::get_batch_struct(x); + const auto mat_ub = host::get_batch_struct(mat); + for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) { + const auto mat_item = batch::extract_batch_item(mat_ub, batch); + const auto b_item = batch::extract_batch_item(b_ub, batch); + const auto x_item = batch::extract_batch_item(x_ub, batch); + simple_apply_kernel(mat_item, b_item, x_item); + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL); + + +template +void advanced_apply(std::shared_ptr exec, + const batch::MultiVector* alpha, + const batch::matrix::BatchDense* a, + const batch::MultiVector* b, + const batch::MultiVector* beta, + batch::MultiVector* c) +{ + const auto b_ub = host::get_batch_struct(b); + const auto x_ub = host::get_batch_struct(x); + const auto mat_ub = host::get_batch_struct(mat); + const auto alpha_ub = host::get_batch_struct(alpha); + const auto beta_ub = host::get_batch_struct(beta); + if (alpha->get_num_batch_items() > 1) { + GKO_ASSERT(alpha->get_num_batch_items() == x->get_num_batch_items()); + GKO_ASSERT(beta->get_num_batch_items() == x->get_num_batch_items()); + for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) { + const auto mat_item = batch::extract_batch_item(mat_ub, batch); + const auto b_item = batch::extract_batch_item(b_ub, batch); + const auto x_item = batch::extract_batch_item(x_ub, batch); + const auto alpha_item = batch::extract_batch_item(alpha_ub, batch); + const auto beta_item = batch::extract_batch_item(beta_ub, batch); + advanced_apply_kernel(alpha_item.values[0], mat_item, b_item, + beta_item.values[0], x_item); + } + } else { + const auto alpha_item = batch::extract_batch_item(alpha_ub, 0); + const auto beta_item = batch::extract_batch_item(beta_ub, 0); + for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) { + const auto mat_item = batch::extract_batch_item(mat_ub, batch); + const auto b_item = batch::extract_batch_item(b_ub, batch); + const auto x_item = batch::extract_batch_item(x_ub, batch); + advanced_apply_kernel(alpha_item.values[0], mat_item, b_item, + beta_item.values[0], x_item); + } + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_BATCH_DENSE_ADVANCED_APPLY_KERNEL); + + +} // namespace batch_dense +} // namespace reference +} // namespace kernels +} // namespace gko diff --git a/reference/matrix/batch_dense_kernels.hpp.inc b/reference/matrix/batch_dense_kernels.hpp.inc new file mode 100644 index 00000000000..ae342982de5 --- /dev/null +++ b/reference/matrix/batch_dense_kernels.hpp.inc @@ -0,0 +1,88 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +template +inline void simple_apply_kernel( + const gko::batch::batch_dense::batch_item& a, + const gko::batch::batch_multi_vector::batch_item& b, + const gko::batch::batch_multi_vector::batch_item& c) +{ + for (int row = 0; row < c.num_rows; ++row) { + for (int col = 0; col < c.num_rhs; ++col) { + c.values[row * c.stride + col] = gko::zero(); + } + } + + for (int row = 0; row < c.num_rows; ++row) { + for (int inner = 0; inner < a.num_rhs; ++inner) { + for (int col = 0; col < c.num_rhs; ++col) { + c.values[row * c.stride + col] += + a.values[row * a.stride + inner] * + b.values[inner * b.stride + col]; + } + } + } +} + + +template +inline void advanced_apply_kernel( + const ValueType alpha, + const gko::batch::batch_dense::batch_item& a, + const gko::batch::batch_multi_vector::batch_item& b, + const ValueType beta, + const gko::batch::batch_multi_vector::batch_item& c) +{ + if (beta != gko::zero()) { + for (int row = 0; row < c.num_rows; ++row) { + for (int col = 0; col < c.num_rhs; ++col) { + c.values[row * c.stride + col] *= beta; + } + } + } else { + for (int row = 0; row < c.num_rows; ++row) { + for (int col = 0; col < c.num_rhs; ++col) { + c.values[row * c.stride + col] *= gko::zero(); + } + } + } + + for (int row = 0; row < c.num_rows; ++row) { + for (int inner = 0; inner < a.num_rhs; ++inner) { + for (int col = 0; col < c.num_rhs; ++col) { + c.values[row * c.stride + col] += + alpha * a.values[row * a.stride + inner] * + b.values[inner * b.stride + col]; + } + } + } +} From e9f2aa1061c853455898c87c5139c3186957d7b0 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Mon, 2 Oct 2023 19:18:05 +0200 Subject: [PATCH 02/28] add reference kernels WIP --- core/base/batch_multi_vector_kernels.hpp | 1 - core/base/batch_struct.hpp | 76 ----------- core/device_hooks/common_kernels.inc.cpp | 1 + core/matrix/batch_dense.cpp | 10 +- core/matrix/batch_dense_kernels.hpp | 5 +- core/matrix/batch_struct.hpp | 125 +++++++++++++++++++ cuda/matrix/batch_dense_kernels.cu | 3 +- dpcpp/matrix/batch_dense_kernels.dp.cpp | 2 +- hip/matrix/batch_dense_kernels.hip.cpp | 2 +- include/ginkgo/core/matrix/batch_dense.hpp | 10 +- omp/matrix/batch_dense_kernels.cpp | 4 +- reference/base/batch_struct.hpp | 28 ----- reference/matrix/batch_dense_kernels.cpp | 16 ++- reference/matrix/batch_dense_kernels.hpp.inc | 12 +- reference/matrix/batch_struct.hpp | 95 ++++++++++++++ 15 files changed, 259 insertions(+), 131 deletions(-) create mode 100644 core/matrix/batch_struct.hpp create mode 100644 reference/matrix/batch_struct.hpp diff --git a/core/base/batch_multi_vector_kernels.hpp b/core/base/batch_multi_vector_kernels.hpp index 8603a2b9055..5a39567f470 100644 --- a/core/base/batch_multi_vector_kernels.hpp +++ b/core/base/batch_multi_vector_kernels.hpp @@ -39,7 +39,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include -#include #include "core/base/kernel_declaration.hpp" diff --git a/core/base/batch_struct.hpp b/core/base/batch_struct.hpp index 21bd5b0e8ea..caca4577cf7 100644 --- a/core/base/batch_struct.hpp +++ b/core/base/batch_struct.hpp @@ -81,46 +81,6 @@ struct uniform_batch { } // namespace multi_vector -namespace batch_dense { - - -/** - * Encapsulates one matrix from a batch of multi-vectors. - */ -template -struct batch_item { - using value_type = ValueType; - ValueType* values; - int stride; - int num_rows; - int num_rhs; -}; - - -/** - * A 'simple' structure to store a global uniform batch of multi-vectors. - */ -template -struct uniform_batch { - using value_type = ValueType; - using entry_type = batch_item; - - ValueType* values; - size_type num_batch_items; - int stride; - int num_rows; - int num_rhs; - - size_type get_entry_storage() const - { - return num_rows * stride * sizeof(value_type); - } -}; - - -} // namespace batch_dense - - template GKO_ATTRIBUTES GKO_INLINE multi_vector::batch_item to_const( const multi_vector::batch_item& b) @@ -137,22 +97,6 @@ GKO_ATTRIBUTES GKO_INLINE multi_vector::uniform_batch to_const( } -template -GKO_ATTRIBUTES GKO_INLINE matrix::batch_dense::batch_item -to_const(const matrix::batch_dense::batch_item& b) -{ - return {b.values, b.stride, b.num_rows, b.num_rhs}; -} - - -template -GKO_ATTRIBUTES GKO_INLINE matrix::batch_dense::uniform_batch -to_const(const matrix::batch_dense::uniform_batch& ub) -{ - return {ub.values, ub.num_batch_items, ub.stride, ub.num_rows, ub.num_rhs}; -} - - /** * Extract one object (matrix, vector etc.) from a batch of objects * @@ -182,26 +126,6 @@ extract_batch_item(ValueType* const batch_values, const int stride, } -template -GKO_ATTRIBUTES GKO_INLINE matrix::batch_dense::batch_item -extract_batch_item(const matrix::batch_dense::uniform_batch& batch, - const size_type batch_idx) -{ - return {batch.values + batch_idx * batch.stride * batch.num_rows, - batch.stride, batch.num_rows, batch.num_rhs}; -} - -template -GKO_ATTRIBUTES GKO_INLINE matrix::batch_dense::batch_item -extract_batch_item(ValueType* const batch_values, const int stride, - const int num_rows, const int num_rhs, - const size_type batch_idx) -{ - return {batch_values + batch_idx * stride * num_rows, stride, num_rows, - num_rhs}; -} - - } // namespace batch } // namespace gko diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index c22f5cd968d..87cab3dcf0b 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -57,6 +57,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/factorization/par_ict_kernels.hpp" #include "core/factorization/par_ilu_kernels.hpp" #include "core/factorization/par_ilut_kernels.hpp" +#include "core/matrix/batch_dense_kernels.hpp" #include "core/matrix/coo_kernels.hpp" #include "core/matrix/csr_kernels.hpp" #include "core/matrix/dense_kernels.hpp" diff --git a/core/matrix/batch_dense.cpp b/core/matrix/batch_dense.cpp index e6dedcf11fd..803f7a51c50 100644 --- a/core/matrix/batch_dense.cpp +++ b/core/matrix/batch_dense.cpp @@ -43,6 +43,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include "core/matrix/batch_dense_kernels.hpp" @@ -66,7 +67,7 @@ namespace detail { template batch_dim<2> compute_batch_size( - const std::vector*>& matrices) + const std::vector*>& matrices) { auto common_size = matrices[0]->get_size(); for (size_type i = 1; i < matrices.size(); ++i) { @@ -80,7 +81,7 @@ batch_dim<2> compute_batch_size( template -std::unique_ptr> +std::unique_ptr> BatchDense::create_view_for_item(size_type item_id) { auto exec = this->get_executor(); @@ -96,7 +97,7 @@ BatchDense::create_view_for_item(size_type item_id) template -std::unique_ptr> +std::unique_ptr> BatchDense::create_const_view_for_item(size_type item_id) const { auto exec = this->get_executor(); @@ -113,7 +114,8 @@ BatchDense::create_const_view_for_item(size_type item_id) const template std::unique_ptr> -BatchDense::create_with_config_of(ptr_param other) +BatchDense::create_with_config_of( + ptr_param> other) { // De-referencing `other` before calling the functions (instead of // using operator `->`) is currently required to be compatible with diff --git a/core/matrix/batch_dense_kernels.hpp b/core/matrix/batch_dense_kernels.hpp index e801d7aa152..7f814e08b50 100644 --- a/core/matrix/batch_dense_kernels.hpp +++ b/core/matrix/batch_dense_kernels.hpp @@ -42,6 +42,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/base/kernel_declaration.hpp" + + namespace gko { namespace kernels { @@ -50,7 +53,7 @@ namespace kernels { void simple_apply(std::shared_ptr exec, \ const batch::matrix::BatchDense<_type>* a, \ const batch::MultiVector<_type>* b, \ - MultiVector<_type>* c) + batch::MultiVector<_type>* c) #define GKO_DECLARE_BATCH_DENSE_ADVANCED_APPLY_KERNEL(_type) \ void advanced_apply(std::shared_ptr exec, \ diff --git a/core/matrix/batch_struct.hpp b/core/matrix/batch_struct.hpp new file mode 100644 index 00000000000..b6926b0894d --- /dev/null +++ b/core/matrix/batch_struct.hpp @@ -0,0 +1,125 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_CORE_MATRIX_BATCH_STRUCT_HPP_ +#define GKO_CORE_MATRIX_BATCH_STRUCT_HPP_ + + +#include +#include +#include + + +namespace gko { +namespace batch { +namespace matrix { +namespace batch_dense { + + +/** + * Encapsulates one matrix from a batch of multi-vectors. + */ +template +struct batch_item { + using value_type = ValueType; + ValueType* values; + int stride; + int num_rows; + int num_rhs; +}; + + +/** + * A 'simple' structure to store a global uniform batch of multi-vectors. + */ +template +struct uniform_batch { + using value_type = ValueType; + using entry_type = batch_item; + + ValueType* values; + size_type num_batch_items; + int stride; + int num_rows; + int num_rhs; + + size_type get_entry_storage() const + { + return num_rows * stride * sizeof(value_type); + } +}; + + +} // namespace batch_dense + + +template +GKO_ATTRIBUTES GKO_INLINE batch_dense::batch_item to_const( + const batch_dense::batch_item& b) +{ + return {b.values, b.stride, b.num_rows, b.num_rhs}; +} + + +template +GKO_ATTRIBUTES GKO_INLINE batch_dense::uniform_batch to_const( + const batch_dense::uniform_batch& ub) +{ + return {ub.values, ub.num_batch_items, ub.stride, ub.num_rows, ub.num_rhs}; +} + + +template +GKO_ATTRIBUTES GKO_INLINE batch_dense::batch_item extract_batch_item( + const batch_dense::uniform_batch& batch, + const size_type batch_idx) +{ + return {batch.values + batch_idx * batch.stride * batch.num_rows, + batch.stride, batch.num_rows, batch.num_rhs}; +} + +template +GKO_ATTRIBUTES GKO_INLINE batch_dense::batch_item extract_batch_item( + ValueType* const batch_values, const int stride, const int num_rows, + const int num_rhs, const size_type batch_idx) +{ + return {batch_values + batch_idx * stride * num_rows, stride, num_rows, + num_rhs}; +} + + +} // namespace matrix +} // namespace batch +} // namespace gko + + +#endif // GKO_CORE_MATRIX_BATCH_STRUCT_HPP_ diff --git a/cuda/matrix/batch_dense_kernels.cu b/cuda/matrix/batch_dense_kernels.cu index 5e53a410bf0..4615af581f5 100644 --- a/cuda/matrix/batch_dense_kernels.cu +++ b/cuda/matrix/batch_dense_kernels.cu @@ -36,6 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/base/batch_struct.hpp" #include "core/matrix/batch_struct.hpp" #include "cuda/base/config.hpp" #include "cuda/base/cublas_bindings.hpp" @@ -66,7 +67,7 @@ template void simple_apply(std::shared_ptr exec, const batch::matrix::BatchDense* mat, const batch::MultiVector* b, - MultiVector* x) GKO_NOT_IMPLEMENTED; + batch::MultiVector* x) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL); diff --git a/dpcpp/matrix/batch_dense_kernels.dp.cpp b/dpcpp/matrix/batch_dense_kernels.dp.cpp index 100dbf7e670..964bf094077 100644 --- a/dpcpp/matrix/batch_dense_kernels.dp.cpp +++ b/dpcpp/matrix/batch_dense_kernels.dp.cpp @@ -59,7 +59,7 @@ template void simple_apply(std::shared_ptr exec, const batch::matrix::BatchDense* a, const batch::MultiVector* b, - MultiVector* c) GKO_NOT_IMPLEMENTED; + batch::MultiVector* x) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL); diff --git a/hip/matrix/batch_dense_kernels.hip.cpp b/hip/matrix/batch_dense_kernels.hip.cpp index 640f9c67b6a..93570388d50 100644 --- a/hip/matrix/batch_dense_kernels.hip.cpp +++ b/hip/matrix/batch_dense_kernels.hip.cpp @@ -70,7 +70,7 @@ template void simple_apply(std::shared_ptr exec, const batch::matrix::BatchDense* mat, const batch::MultiVector* b, - MultiVector* x) GKO_NOT_IMPLEMENTED; + batch::MultiVector* x) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL); diff --git a/include/ginkgo/core/matrix/batch_dense.hpp b/include/ginkgo/core/matrix/batch_dense.hpp index 60023727c8a..47aff35b7e7 100644 --- a/include/ginkgo/core/matrix/batch_dense.hpp +++ b/include/ginkgo/core/matrix/batch_dense.hpp @@ -40,6 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include #include #include #include @@ -85,7 +86,7 @@ class BatchDense : public EnableBatchLinOp>, using value_type = ValueType; using index_type = int32; using transposed_type = BatchDense; - using unbatch_type = matrix::Dense; + using unbatch_type = gko::matrix::Dense; using absolute_type = remove_complex; using complex_type = to_complex; @@ -227,10 +228,9 @@ class BatchDense : public EnableBatchLinOp>, * array (if it resides on the same executor as the vector) or a copy of the * array on the correct executor. */ - static std::unique_ptr> - create_const(std::shared_ptr exec, - const batch_dim<2>& sizes, - gko::detail::const_array_view&& values); + static std::unique_ptr> create_const( + std::shared_ptr exec, const batch_dim<2>& sizes, + gko::detail::const_array_view&& values); private: inline size_type compute_num_elems(const batch_dim<2>& size) diff --git a/omp/matrix/batch_dense_kernels.cpp b/omp/matrix/batch_dense_kernels.cpp index ea2e84a8e83..fe742bee402 100644 --- a/omp/matrix/batch_dense_kernels.cpp +++ b/omp/matrix/batch_dense_kernels.cpp @@ -40,6 +40,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/base/batch_struct.hpp" +#include "core/matrix/batch_struct.hpp" #include "reference/matrix/batch_struct.hpp" @@ -61,7 +63,7 @@ template void simple_apply(std::shared_ptr exec, const batch::matrix::BatchDense* mat, const batch::MultiVector* b, - MultiVector* x) + batch::MultiVector* x) { const auto b_ub = host::get_batch_struct(b); const auto x_ub = host::get_batch_struct(x); diff --git a/reference/base/batch_struct.hpp b/reference/base/batch_struct.hpp index b30fa971ed7..ce7c7af5605 100644 --- a/reference/base/batch_struct.hpp +++ b/reference/base/batch_struct.hpp @@ -87,34 +87,6 @@ inline batch::multi_vector::uniform_batch get_batch_struct( } -/** - * Generates an immutable uniform batch struct from a batch of multi-vectors. - */ -template -inline batch::matrix::batch_dense::uniform_batch -get_batch_struct(const batch::matrix::BatchDense* const op) -{ - return {op->get_const_values(), op->get_num_batch_items(), - static_cast(op->get_common_size()[1]), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[1])}; -} - - -/** - * Generates a uniform batch struct from a batch of multi-vectors. - */ -template -inline batch::matrix::batch_dense::uniform_batch get_batch_struct( - batch::matrix::BatchDense* const op) -{ - return {op->get_values(), op->get_num_batch_items(), - static_cast(op->get_common_size()[1]), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[1])}; -} - - } // namespace host } // namespace kernels } // namespace gko diff --git a/reference/matrix/batch_dense_kernels.cpp b/reference/matrix/batch_dense_kernels.cpp index aa285a6b01b..bb5f3e18df7 100644 --- a/reference/matrix/batch_dense_kernels.cpp +++ b/reference/matrix/batch_dense_kernels.cpp @@ -41,7 +41,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/base/batch_struct.hpp" #include "core/matrix/batch_struct.hpp" +#include "reference/base/batch_struct.hpp" #include "reference/matrix/batch_struct.hpp" @@ -63,13 +65,13 @@ template void simple_apply(std::shared_ptr exec, const batch::matrix::BatchDense* mat, const batch::MultiVector* b, - MultiVector* x) + batch::MultiVector* x) { const auto b_ub = host::get_batch_struct(b); const auto x_ub = host::get_batch_struct(x); const auto mat_ub = host::get_batch_struct(mat); for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) { - const auto mat_item = batch::extract_batch_item(mat_ub, batch); + const auto mat_item = batch::matrix::extract_batch_item(mat_ub, batch); const auto b_item = batch::extract_batch_item(b_ub, batch); const auto x_item = batch::extract_batch_item(x_ub, batch); simple_apply_kernel(mat_item, b_item, x_item); @@ -83,10 +85,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( template void advanced_apply(std::shared_ptr exec, const batch::MultiVector* alpha, - const batch::matrix::BatchDense* a, + const batch::matrix::BatchDense* mat, const batch::MultiVector* b, const batch::MultiVector* beta, - batch::MultiVector* c) + batch::MultiVector* x) { const auto b_ub = host::get_batch_struct(b); const auto x_ub = host::get_batch_struct(x); @@ -97,7 +99,8 @@ void advanced_apply(std::shared_ptr exec, GKO_ASSERT(alpha->get_num_batch_items() == x->get_num_batch_items()); GKO_ASSERT(beta->get_num_batch_items() == x->get_num_batch_items()); for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) { - const auto mat_item = batch::extract_batch_item(mat_ub, batch); + const auto mat_item = + batch::matrix::extract_batch_item(mat_ub, batch); const auto b_item = batch::extract_batch_item(b_ub, batch); const auto x_item = batch::extract_batch_item(x_ub, batch); const auto alpha_item = batch::extract_batch_item(alpha_ub, batch); @@ -109,7 +112,8 @@ void advanced_apply(std::shared_ptr exec, const auto alpha_item = batch::extract_batch_item(alpha_ub, 0); const auto beta_item = batch::extract_batch_item(beta_ub, 0); for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) { - const auto mat_item = batch::extract_batch_item(mat_ub, batch); + const auto mat_item = + batch::matrix::extract_batch_item(mat_ub, batch); const auto b_item = batch::extract_batch_item(b_ub, batch); const auto x_item = batch::extract_batch_item(x_ub, batch); advanced_apply_kernel(alpha_item.values[0], mat_item, b_item, diff --git a/reference/matrix/batch_dense_kernels.hpp.inc b/reference/matrix/batch_dense_kernels.hpp.inc index ae342982de5..d45183b2faa 100644 --- a/reference/matrix/batch_dense_kernels.hpp.inc +++ b/reference/matrix/batch_dense_kernels.hpp.inc @@ -32,9 +32,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. template inline void simple_apply_kernel( - const gko::batch::batch_dense::batch_item& a, - const gko::batch::batch_multi_vector::batch_item& b, - const gko::batch::batch_multi_vector::batch_item& c) + const gko::batch::matrix::batch_dense::batch_item& a, + const gko::batch::multi_vector::batch_item& b, + const gko::batch::multi_vector::batch_item& c) { for (int row = 0; row < c.num_rows; ++row) { for (int col = 0; col < c.num_rhs; ++col) { @@ -57,10 +57,10 @@ inline void simple_apply_kernel( template inline void advanced_apply_kernel( const ValueType alpha, - const gko::batch::batch_dense::batch_item& a, - const gko::batch::batch_multi_vector::batch_item& b, + const gko::batch::matrix::batch_dense::batch_item& a, + const gko::batch::multi_vector::batch_item& b, const ValueType beta, - const gko::batch::batch_multi_vector::batch_item& c) + const gko::batch::multi_vector::batch_item& c) { if (beta != gko::zero()) { for (int row = 0; row < c.num_rows; ++row) { diff --git a/reference/matrix/batch_struct.hpp b/reference/matrix/batch_struct.hpp new file mode 100644 index 00000000000..1bed5a4e5c9 --- /dev/null +++ b/reference/matrix/batch_struct.hpp @@ -0,0 +1,95 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_REFERENCE_MATRIX_BATCH_STRUCT_HPP_ +#define GKO_REFERENCE_MATRIX_BATCH_STRUCT_HPP_ + + +#include +#include + + +#include "core/base/batch_struct.hpp" + + +namespace gko { +namespace kernels { +/** + * @brief A namespace for shared functionality between omp and reference + * executors. + */ +namespace host { + + +/** @file batch_struct.hpp + * + * Helper functions to generate a batch struct from a batch LinOp. + * + * A specialization is needed for every format of every kind of linear algebra + * object. These are intended to be called on the host. + */ + + +/** + * Generates an immutable uniform batch struct from a batch of multi-vectors. + */ +template +inline batch::matrix::batch_dense::uniform_batch +get_batch_struct(const batch::matrix::BatchDense* const op) +{ + return {op->get_const_values(), op->get_num_batch_items(), + static_cast(op->get_common_size()[1]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1])}; +} + + +/** + * Generates a uniform batch struct from a batch of multi-vectors. + */ +template +inline batch::matrix::batch_dense::uniform_batch get_batch_struct( + batch::matrix::BatchDense* const op) +{ + return {op->get_values(), op->get_num_batch_items(), + static_cast(op->get_common_size()[1]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1])}; +} + + +} // namespace host +} // namespace kernels +} // namespace gko + + +#endif // GKO_REFERENCE_MATRIX_BATCH_STRUCT_HPP_ From 2f6bd776146d46017fd280efc47d8eff994daff0 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Tue, 3 Oct 2023 11:31:51 +0200 Subject: [PATCH 03/28] Generalize batch utilities --- core/base/batch_multi_vector.cpp | 28 ++++++++-- core/base/batch_utilities.hpp | 47 ++++++++-------- core/test/base/batch_multi_vector.cpp | 54 +++++++++++-------- core/test/utils/assertions.hpp | 8 ++- .../test/base/batch_multi_vector_kernels.cpp | 43 ++++++++------- 5 files changed, 109 insertions(+), 71 deletions(-) diff --git a/core/base/batch_multi_vector.cpp b/core/base/batch_multi_vector.cpp index 23591cd1ffe..f6884ef523b 100644 --- a/core/base/batch_multi_vector.cpp +++ b/core/base/batch_multi_vector.cpp @@ -44,6 +44,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include "core/base/batch_multi_vector_kernels.hpp" @@ -72,7 +73,7 @@ namespace detail { template batch_dim<2> compute_batch_size( - const std::vector*>& matrices) + const std::vector*>& matrices) { auto common_size = matrices[0]->get_size(); for (size_type i = 1; i < matrices.size(); ++i) { @@ -86,7 +87,7 @@ batch_dim<2> compute_batch_size( template -std::unique_ptr> +std::unique_ptr> MultiVector::create_view_for_item(size_type item_id) { auto exec = this->get_executor(); @@ -102,7 +103,7 @@ MultiVector::create_view_for_item(size_type item_id) template -std::unique_ptr> +std::unique_ptr> MultiVector::create_const_view_for_item(size_type item_id) const { auto exec = this->get_executor(); @@ -290,6 +291,27 @@ void MultiVector::move_to( } +template +void MultiVector::convert_to( + matrix::BatchDense* result) const +{ + auto exec = result->get_executor() != nullptr ? result->get_executor() + : this->get_executor(); + auto tmp = gko::batch::matrix::BatchDense::create_const( + exec, this->get_size(), + make_const_array_view(exec, this->get_num_stored_elements(), + this->get_const_values())); + result->copy_from(tmp); +} + + +template +void MultiVector::move_to(matrix::BatchDense* result) +{ + this->convert_to(result); +} + + #define GKO_DECLARE_BATCH_MULTI_VECTOR(_type) class MultiVector<_type> GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_MULTI_VECTOR); diff --git a/core/base/batch_utilities.hpp b/core/base/batch_utilities.hpp index e5dc22faeda..d5c5bdb4aa2 100644 --- a/core/base/batch_utilities.hpp +++ b/core/base/batch_utilities.hpp @@ -51,16 +51,15 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace gko { namespace batch { -namespace multivector { -template -std::unique_ptr> duplicate( - std::shared_ptr exec, size_type num_duplications, - const batch::MultiVector* input) +template +std::unique_ptr duplicate(std::shared_ptr exec, + size_type num_duplications, + const OutputType* input) { auto num_batch_items = input->get_num_batch_items(); - auto tmp = batch::MultiVector::create( + auto tmp = OutputType::create( exec, batch_dim<2>(num_batch_items * num_duplications, input->get_common_size())); @@ -75,13 +74,13 @@ std::unique_ptr> duplicate( } -template -std::unique_ptr> create_from_dense( +template +std::unique_ptr create_from_item( std::shared_ptr exec, const size_type num_duplications, - const matrix::Dense* input) + const typename OutputType::unbatch_type* input) { auto num_batch_items = num_duplications; - auto tmp = batch::MultiVector::create( + auto tmp = OutputType::create( exec, batch_dim<2>(num_batch_items, input->get_size())); for (size_type b = 0; b < num_batch_items; ++b) { @@ -92,13 +91,13 @@ std::unique_ptr> create_from_dense( } -template -std::unique_ptr> create_from_dense( +template +std::unique_ptr create_from_item( std::shared_ptr exec, - const std::vector*>& input) + const std::vector& input) { auto num_batch_items = input.size(); - auto tmp = batch::MultiVector::create( + auto tmp = OutputType::create( exec, batch_dim<2>(num_batch_items, input[0]->get_size())); for (size_type b = 0; b < num_batch_items; ++b) { @@ -109,13 +108,12 @@ std::unique_ptr> create_from_dense( } -template -std::vector>> unbatch( - const batch::MultiVector* batch_multivec) +template +auto unbatch(const InputType* batch_multivec) { auto exec = batch_multivec->get_executor(); auto unbatched_mats = - std::vector>>{}; + std::vector>{}; for (size_type b = 0; b < batch_multivec->get_num_batch_items(); ++b) { unbatched_mats.emplace_back( batch_multivec->create_const_view_for_item(b)->clone()); @@ -124,14 +122,14 @@ std::vector>> unbatch( } -template -std::unique_ptr> read( +template +std::unique_ptr read( std::shared_ptr exec, const std::vector>& data) { auto num_batch_items = data.size(); - auto tmp = MultiVector::create( - exec, batch_dim<2>(num_batch_items, data[0].size)); + auto tmp = + OutputType::create(exec, batch_dim<2>(num_batch_items, data[0].size)); for (size_type b = 0; b < num_batch_items; ++b) { tmp->create_view_for_item(b)->read(data[b]); @@ -141,9 +139,9 @@ std::unique_ptr> read( } -template +template std::vector> write( - const MultiVector* mvec) + const OutputType* mvec) { auto data = std::vector>( mvec->get_num_batch_items()); @@ -157,7 +155,6 @@ std::vector> write( } -} // namespace multivector } // namespace batch } // namespace gko diff --git a/core/test/base/batch_multi_vector.cpp b/core/test/base/batch_multi_vector.cpp index 85168a406cc..7bdaec30b27 100644 --- a/core/test/base/batch_multi_vector.cpp +++ b/core/test/base/batch_multi_vector.cpp @@ -188,11 +188,11 @@ TYPED_TEST(MultiVector, CanBeConstructedFromExistingData) using size_type = gko::size_type; // clang-format off value_type data[] = { - 1.0, 2.0, - -1.0,3.0, + 1.0, 2.0, + -1.0, 3.0, 4.0, -1.0, - 3.0, 5.0, - 1.0, 5.0, + 3.0, 5.0, + 1.0, 5.0, 6.0, -3.0}; // clang-format on @@ -218,11 +218,11 @@ TYPED_TEST(MultiVector, CanBeConstructedFromExistingConstData) using size_type = gko::size_type; // clang-format off value_type data[] = { - 1.0, 2.0, - -1.0,3.0, + 1.0, 2.0, + -1.0, 3.0, 4.0, -1.0, - 3.0, 5.0, - 1.0, 5.0, + 3.0, 5.0, + 1.0, 5.0, 6.0, -3.0}; // clang-format on @@ -252,7 +252,7 @@ TYPED_TEST(MultiVector, CanBeConstructedFromDenseMatrices) auto mat2 = gko::initialize({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, this->exec); - auto m = gko::batch::multivector::create_from_dense( + auto m = gko::batch::create_from_item>( this->exec, std::vector{mat1.get(), mat2.get()}); this->assert_equal_to_original_mtx(m.get()); @@ -269,10 +269,12 @@ TYPED_TEST(MultiVector, CanBeConstructedFromDenseMatricesByDuplication) auto mat2 = gko::initialize({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, this->exec); - auto bat_m = gko::batch::multivector::create_from_dense( - this->exec, std::vector{mat1.get(), mat1.get(), mat1.get()}); - auto m = - gko::batch::multivector::create_from_dense(this->exec, 3, mat1.get()); + auto bat_m = + gko::batch::create_from_item>( + this->exec, + std::vector{mat1.get(), mat1.get(), mat1.get()}); + auto m = gko::batch::create_from_item>( + this->exec, 3, mat1.get()); GKO_ASSERT_BATCH_MTX_NEAR(bat_m.get(), m.get(), 1e-14); } @@ -287,14 +289,16 @@ TYPED_TEST(MultiVector, CanBeConstructedByDuplicatingMultiVectors) this->exec); auto mat2 = gko::initialize({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, this->exec); - auto m = gko::batch::multivector::create_from_dense( + auto m = gko::batch::create_from_item>( this->exec, std::vector{mat1.get(), mat2.get()}); - auto m_ref = gko::batch::multivector::create_from_dense( - this->exec, std::vector{mat1.get(), mat2.get(), mat1.get(), - mat2.get(), mat1.get(), mat2.get()}); + auto m_ref = + gko::batch::create_from_item>( + this->exec, + std::vector{mat1.get(), mat2.get(), mat1.get(), + mat2.get(), mat1.get(), mat2.get()}); - auto m2 = - gko::batch::multivector::duplicate(this->exec, 3, m.get()); + auto m2 = gko::batch::duplicate>( + this->exec, 3, m.get()); GKO_ASSERT_BATCH_MTX_NEAR(m2.get(), m_ref.get(), 1e-14); } @@ -385,7 +389,8 @@ TYPED_TEST(MultiVector, CanBeUnbatchedIntoDenseMatrices) auto mat2 = gko::initialize({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, this->exec); - auto dense_mats = gko::batch::multivector::unbatch(this->mtx.get()); + auto dense_mats = gko::batch::unbatch>( + this->mtx.get()); ASSERT_EQ(dense_mats.size(), 2); GKO_ASSERT_MTX_NEAR(dense_mats[0].get(), mat1.get(), 0.); @@ -404,7 +409,8 @@ TYPED_TEST(MultiVector, CanBeReadFromMatrixData) vec_data.emplace_back(gko::matrix_data( {2, 2}, {{0, 0, -1.0}, {0, 1, 0.5}, {1, 0, 0.0}, {1, 1, 9.0}})); - auto m = gko::batch::multivector::read(this->exec, + auto m = gko::batch::read>(this->exec, vec_data); EXPECT_EQ(m->at(0, 0, 0), value_type{1.0}); @@ -429,7 +435,8 @@ TYPED_TEST(MultiVector, CanBeReadFromSparseMatrixData) vec_data.emplace_back(gko::matrix_data( {2, 2}, {{0, 0, -1.0}, {0, 1, 0.5}, {1, 1, 9.0}})); - auto m = gko::batch::multivector::read(this->exec, + auto m = gko::batch::read>(this->exec, vec_data); ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 2)); @@ -451,7 +458,8 @@ TYPED_TEST(MultiVector, GeneratesCorrectMatrixData) using tpl = typename gko::matrix_data::nonzero_type; auto data = - gko::batch::multivector::write(this->mtx.get()); + gko::batch::write>(this->mtx.get()); ASSERT_EQ(data[0].size, gko::dim<2>(2, 3)); ASSERT_EQ(data[0].nonzeros.size(), 6); diff --git a/core/test/utils/assertions.hpp b/core/test/utils/assertions.hpp index d723d5a8964..63ed1e5423a 100644 --- a/core/test/utils/assertions.hpp +++ b/core/test/utils/assertions.hpp @@ -720,8 +720,12 @@ ::testing::AssertionResult batch_matrices_near( using value_type1 = typename Mat1::value_type; using value_type2 = typename Mat2::value_type; - auto first_data = gko::batch::multivector::write(first); - auto second_data = gko::batch::multivector::write(second); + auto first_data = + gko::batch::write>(first); + auto second_data = + gko::batch::write>(second); if (first_data.size() != second_data.size()) { return ::testing::AssertionFailure() diff --git a/reference/test/base/batch_multi_vector_kernels.cpp b/reference/test/base/batch_multi_vector_kernels.cpp index 4f922c37703..e0c7643c8d7 100644 --- a/reference/test/base/batch_multi_vector_kernels.cpp +++ b/reference/test/base/batch_multi_vector_kernels.cpp @@ -137,13 +137,14 @@ TYPED_TEST(MultiVector, ScalesData) using T = typename TestFixture::value_type; auto alpha = gko::batch::initialize( {{{2.0, -2.0, 1.5}}, {{3.0, -1.0, 0.25}}}, this->exec); - auto ualpha = gko::batch::multivector::unbatch(alpha.get()); + auto ualpha = gko::batch::unbatch>(alpha.get()); this->mtx_0->scale(alpha.get()); this->mtx_00->scale(ualpha[0].get()); this->mtx_01->scale(ualpha[1].get()); - auto res = gko::batch::multivector::unbatch(this->mtx_0.get()); + auto res = + gko::batch::unbatch>(this->mtx_0.get()); GKO_ASSERT_MTX_NEAR(res[0].get(), this->mtx_00.get(), 0.); GKO_ASSERT_MTX_NEAR(res[1].get(), this->mtx_01.get(), 0.); } @@ -154,13 +155,14 @@ TYPED_TEST(MultiVector, ScalesDataWithScalar) using Mtx = typename TestFixture::Mtx; using T = typename TestFixture::value_type; auto alpha = gko::batch::initialize({{2.0}, {-2.0}}, this->exec); - auto ualpha = gko::batch::multivector::unbatch(alpha.get()); + auto ualpha = gko::batch::unbatch>(alpha.get()); this->mtx_1->scale(alpha.get()); this->mtx_10->scale(ualpha[0].get()); this->mtx_11->scale(ualpha[1].get()); - auto res = gko::batch::multivector::unbatch(this->mtx_1.get()); + auto res = + gko::batch::unbatch>(this->mtx_1.get()); GKO_ASSERT_MTX_NEAR(res[0].get(), this->mtx_10.get(), 0.); GKO_ASSERT_MTX_NEAR(res[1].get(), this->mtx_11.get(), 0.); } @@ -172,13 +174,14 @@ TYPED_TEST(MultiVector, ScalesDataWithMultipleScalars) using T = typename TestFixture::value_type; auto alpha = gko::batch::initialize( {{{2.0, -2.0, -1.5}}, {{2.0, -2.0, 3.0}}}, this->exec); - auto ualpha = gko::batch::multivector::unbatch(alpha.get()); + auto ualpha = gko::batch::unbatch>(alpha.get()); this->mtx_1->scale(alpha.get()); this->mtx_10->scale(ualpha[0].get()); this->mtx_11->scale(ualpha[1].get()); - auto res = gko::batch::multivector::unbatch(this->mtx_1.get()); + auto res = + gko::batch::unbatch>(this->mtx_1.get()); GKO_ASSERT_MTX_NEAR(res[0].get(), this->mtx_10.get(), 0.); GKO_ASSERT_MTX_NEAR(res[1].get(), this->mtx_11.get(), 0.); } @@ -190,13 +193,14 @@ TYPED_TEST(MultiVector, AddsScaled) using T = typename TestFixture::value_type; auto alpha = gko::batch::initialize( {{{2.0, -2.0, 1.5}}, {{2.0, -2.0, 3.0}}}, this->exec); - auto ualpha = gko::batch::multivector::unbatch(alpha.get()); + auto ualpha = gko::batch::unbatch>(alpha.get()); this->mtx_1->add_scaled(alpha.get(), this->mtx_0.get()); this->mtx_10->add_scaled(ualpha[0].get(), this->mtx_00.get()); this->mtx_11->add_scaled(ualpha[1].get(), this->mtx_01.get()); - auto res = gko::batch::multivector::unbatch(this->mtx_1.get()); + auto res = + gko::batch::unbatch>(this->mtx_1.get()); GKO_ASSERT_MTX_NEAR(res[0].get(), this->mtx_10.get(), 0.); GKO_ASSERT_MTX_NEAR(res[1].get(), this->mtx_11.get(), 0.); } @@ -207,13 +211,14 @@ TYPED_TEST(MultiVector, AddsScaledWithScalar) using Mtx = typename TestFixture::Mtx; using T = typename TestFixture::value_type; auto alpha = gko::batch::initialize({{2.0}, {-2.0}}, this->exec); - auto ualpha = gko::batch::multivector::unbatch(alpha.get()); + auto ualpha = gko::batch::unbatch>(alpha.get()); this->mtx_1->add_scaled(alpha.get(), this->mtx_0.get()); this->mtx_10->add_scaled(ualpha[0].get(), this->mtx_00.get()); this->mtx_11->add_scaled(ualpha[1].get(), this->mtx_01.get()); - auto res = gko::batch::multivector::unbatch(this->mtx_1.get()); + auto res = + gko::batch::unbatch>(this->mtx_1.get()); GKO_ASSERT_MTX_NEAR(res[0].get(), this->mtx_10.get(), 0.); GKO_ASSERT_MTX_NEAR(res[1].get(), this->mtx_11.get(), 0.); } @@ -236,13 +241,13 @@ TYPED_TEST(MultiVector, ComputesDot) using T = typename TestFixture::value_type; auto result = Mtx::create(this->exec, gko::batch_dim<2>(2, gko::dim<2>{1, 3})); - auto ures = gko::batch::multivector::unbatch(result.get()); + auto ures = gko::batch::unbatch>(result.get()); this->mtx_0->compute_dot(this->mtx_1.get(), result.get()); this->mtx_00->compute_dot(this->mtx_10.get(), ures[0].get()); this->mtx_01->compute_dot(this->mtx_11.get(), ures[1].get()); - auto res = gko::batch::multivector::unbatch(result.get()); + auto res = gko::batch::unbatch>(result.get()); GKO_ASSERT_MTX_NEAR(res[0].get(), ures[0].get(), 0.); GKO_ASSERT_MTX_NEAR(res[1].get(), ures[1].get(), 0.); } @@ -277,13 +282,13 @@ TYPED_TEST(MultiVector, ComputesConjDot) using T = typename TestFixture::value_type; auto result = Mtx::create(this->exec, gko::batch_dim<2>(2, gko::dim<2>{1, 3})); - auto ures = gko::batch::multivector::unbatch(result.get()); + auto ures = gko::batch::unbatch>(result.get()); this->mtx_0->compute_conj_dot(this->mtx_1.get(), result.get()); this->mtx_00->compute_conj_dot(this->mtx_10.get(), ures[0].get()); this->mtx_01->compute_conj_dot(this->mtx_11.get(), ures[1].get()); - auto res = gko::batch::multivector::unbatch(result.get()); + auto res = gko::batch::unbatch>(result.get()); GKO_ASSERT_MTX_NEAR(res[0].get(), ures[0].get(), 0.); GKO_ASSERT_MTX_NEAR(res[1].get(), ures[1].get(), 0.); } @@ -359,8 +364,9 @@ TYPED_TEST(MultiVector, ConvertsToPrecision) this->mtx_1->convert_to(tmp.get()); tmp->convert_to(res.get()); - auto ures = gko::batch::multivector::unbatch(res.get()); - auto umtx = gko::batch::multivector::unbatch(this->mtx_1.get()); + auto ures = gko::batch::unbatch>(res.get()); + auto umtx = + gko::batch::unbatch>(this->mtx_1.get()); GKO_ASSERT_MTX_NEAR(umtx[0].get(), ures[0].get(), residual); GKO_ASSERT_MTX_NEAR(umtx[1].get(), ures[1].get(), residual); } @@ -382,8 +388,9 @@ TYPED_TEST(MultiVector, MovesToPrecision) this->mtx_1->move_to(tmp.get()); tmp->move_to(res.get()); - auto ures = gko::batch::multivector::unbatch(res.get()); - auto umtx = gko::batch::multivector::unbatch(this->mtx_1.get()); + auto ures = gko::batch::unbatch>(res.get()); + auto umtx = + gko::batch::unbatch>(this->mtx_1.get()); GKO_ASSERT_MTX_NEAR(umtx[0].get(), ures[0].get(), residual); GKO_ASSERT_MTX_NEAR(umtx[1].get(), ures[1].get(), residual); } From c9d5b4488f54db27ec1875bfd21aeb939baf961e Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Tue, 3 Oct 2023 11:32:30 +0200 Subject: [PATCH 04/28] MultiVector to BatchDense conversion --- core/matrix/batch_dense.cpp | 27 ++- core/test/matrix/CMakeLists.txt | 2 + core/test/matrix/batch_dense.cpp | 222 ++++++++---------- .../ginkgo/core/base/batch_multi_vector.hpp | 25 +- include/ginkgo/core/matrix/batch_dense.hpp | 11 +- 5 files changed, 154 insertions(+), 133 deletions(-) diff --git a/core/matrix/batch_dense.cpp b/core/matrix/batch_dense.cpp index 803f7a51c50..9f72a26c488 100644 --- a/core/matrix/batch_dense.cpp +++ b/core/matrix/batch_dense.cpp @@ -53,12 +53,14 @@ namespace gko { namespace batch { namespace matrix { namespace dense { +namespace { GKO_REGISTER_OPERATION(simple_apply, batch_dense::simple_apply); GKO_REGISTER_OPERATION(advanced_apply, batch_dense::advanced_apply); +} // namespace } // namespace dense @@ -141,6 +143,19 @@ BatchDense::create_with_same_config() const } +template +std::unique_ptr> +BatchDense::create_const( + std::shared_ptr exec, const batch_dim<2>& sizes, + gko::detail::const_array_view&& values) +{ + // cast const-ness away, but return a const object afterwards, + // so we can ensure that no modifications take place. + return std::unique_ptr(new BatchDense{ + exec, sizes, gko::detail::array_const_cast(std::move(values))}); +} + + inline const batch_dim<2> get_col_sizes(const batch_dim<2>& sizes) { return batch_dim<2>(sizes.get_num_batch_items(), @@ -148,6 +163,14 @@ inline const batch_dim<2> get_col_sizes(const batch_dim<2>& sizes) } +template +BatchDense::BatchDense(std::shared_ptr exec, + const batch_dim<2>& size) + : EnableBatchLinOp>(exec, size), + values_(exec, compute_num_elems(size)) +{} + + template void BatchDense::apply_impl(const MultiVector* b, MultiVector* x) const @@ -157,7 +180,7 @@ void BatchDense::apply_impl(const MultiVector* b, GKO_ASSERT_CONFORMANT(this->get_common_size(), b->get_common_size()); GKO_ASSERT_EQ(this->get_num_batch_items(), x->get_num_batch_items()); GKO_ASSERT_CONFORMANT(this->get_common_size(), x->get_common_size()); - this->get_executor()->run(batch_dense::make_simple_apply(this, b, x)); + this->get_executor()->run(dense::make_simple_apply(this, b, x)); } @@ -175,7 +198,7 @@ void BatchDense::apply_impl(const MultiVector* alpha, GKO_ASSERT_EQUAL_COLS(alpha->get_common_size(), gko::dim<2>(1, 1)); GKO_ASSERT_EQUAL_COLS(beta->get_common_size(), gko::dim<2>(1, 1)); this->get_executor()->run( - batch_dense::make_advanced_apply(alpha, this, b, beta, x)); + dense::make_advanced_apply(alpha, this, b, beta, x)); } diff --git a/core/test/matrix/CMakeLists.txt b/core/test/matrix/CMakeLists.txt index 433361a054f..57c2c97e355 100644 --- a/core/test/matrix/CMakeLists.txt +++ b/core/test/matrix/CMakeLists.txt @@ -1,3 +1,5 @@ +# ginkgo_create_test(batch_dense) +# ginkgo_create_test(coo) ginkgo_create_test(coo_builder) ginkgo_create_test(csr) diff --git a/core/test/matrix/batch_dense.cpp b/core/test/matrix/batch_dense.cpp index 7db7469baf6..a1ebdb1061c 100644 --- a/core/test/matrix/batch_dense.cpp +++ b/core/test/matrix/batch_dense.cpp @@ -44,9 +44,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/test/utils.hpp" -namespace { - - template class BatchDense : public ::testing::Test { protected: @@ -55,11 +52,13 @@ class BatchDense : public ::testing::Test { using size_type = gko::size_type; BatchDense() : exec(gko::ReferenceExecutor::create()), - mtx(gko::batch_initialize>( - std::vector{4, 3}, + mtx(gko::batch::initialize< + gko::batch::matrix::BatchDense>( {{{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}}, - exec)) + exec)), + dense_mtx(gko::initialize>( + {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, exec)) {} @@ -67,13 +66,8 @@ class BatchDense : public ::testing::Test { gko::matrix::BatchDense* m) { ASSERT_EQ(m->get_num_batch_entries(), 2); - ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 3)); - ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 3)); - ASSERT_EQ(m->get_stride().at(0), 4); - ASSERT_EQ(m->get_stride().at(1), 3); - ASSERT_EQ(m->get_num_stored_elements(), (2 * 4) + (2 * 3)); - ASSERT_EQ(m->get_num_stored_elements(0), 2 * 4); - ASSERT_EQ(m->get_num_stored_elements(1), 2 * 3); + ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 3)); + ASSERT_EQ(m->get_num_stored_elements(), 2 * (2 * 4)); EXPECT_EQ(m->at(0, 0, 0), value_type{-1.0}); EXPECT_EQ(m->at(0, 0, 1), value_type{2.0}); EXPECT_EQ(m->at(0, 0, 2), value_type{3.0}); @@ -95,7 +89,7 @@ class BatchDense : public ::testing::Test { } std::shared_ptr exec; - std::unique_ptr> mtx; + std::unique_ptr> mtx; }; TYPED_TEST_SUITE(BatchDense, gko::test::ValueTypes); @@ -103,46 +97,85 @@ TYPED_TEST_SUITE(BatchDense, gko::test::ValueTypes); TYPED_TEST(BatchDense, CanBeEmpty) { - auto empty = gko::matrix::BatchDense::create(this->exec); + auto empty = gko::batch::matrix::BatchDense::create(this->exec); this->assert_empty(empty.get()); } TYPED_TEST(BatchDense, ReturnsNullValuesArrayWhenEmpty) { - auto empty = gko::matrix::BatchDense::create(this->exec); + auto empty = gko::batch::matrix::BatchDense::create(this->exec); ASSERT_EQ(empty->get_const_values(), nullptr); } -TYPED_TEST(BatchDense, CanBeConstructedWithSize) +TYPED_TEST(BatchDense, CanGetValuesForEntry) { - using size_type = gko::size_type; - auto m = gko::matrix::BatchDense::create( - this->exec, - std::vector>{gko::dim<2>{2, 4}, gko::dim<2>{2, 3}}); + using value_type = typename TestFixture::value_type; - ASSERT_EQ(m->get_num_batch_entries(), 2); - ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 4)); - ASSERT_EQ(m->get_size().at(1), gko::dim<2>(2, 3)); - EXPECT_EQ(m->get_stride().at(0), 4); - EXPECT_EQ(m->get_stride().at(1), 3); - ASSERT_EQ(m->get_num_stored_elements(), 14); - ASSERT_EQ(m->get_num_stored_elements(0), 8); - ASSERT_EQ(m->get_num_stored_elements(1), 6); + ASSERT_EQ(this->mtx->get_values_for_item(1)[0], value_type{1.0}); +} + + +TYPED_TEST(BatchDense, CanCreateDenseItemView) +{ + GKO_ASSERT_MTX_NEAR(this->mtx->create_view_for_item(1), this->dense_mtx, + 0.0); +} + + +TYPED_TEST(BatchDense, CanBeCopied) +{ + auto mtx_copy = + gko::batch::matrix::BatchDense::create(this->exec); + + mtx_copy->copy_from(this->mtx.get()); + + this->assert_equal_to_original_mtx(this->mtx.get()); + this->mtx->at(0, 0, 0) = 7; + this->mtx->at(0, 1) = 7; + this->assert_equal_to_original_mtx(mtx_copy.get()); +} + + +TYPED_TEST(BatchDense, CanBeMoved) +{ + auto mtx_copy = + gko::batch::matrix::BatchDense::create(this->exec); + + mtx_copy->copy_from(std::move(this->mtx)); + + this->assert_equal_to_original_mtx(mtx_copy.get()); +} + + +TYPED_TEST(BatchDense, CanBeCloned) +{ + auto mtx_clone = this->mtx->clone(); + + this->assert_equal_to_original_mtx( + dynamic_castmtx.get())>(mtx_clone.get())); } -TYPED_TEST(BatchDense, CanBeConstructedWithSizeAndStride) +TYPED_TEST(BatchDense, CanBeCleared) +{ + this->mtx->clear(); + + this->assert_empty(this->mtx.get()); +} + + +TYPED_TEST(BatchDense, CanBeConstructedWithSize) { using size_type = gko::size_type; - auto m = gko::matrix::BatchDense::create( - this->exec, std::vector>{gko::dim<2>{2, 3}}, - std::vector{4}); - ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 3)); - EXPECT_EQ(m->get_stride().at(0), 4); - ASSERT_EQ(m->get_num_stored_elements(), 8); + auto m = gko::batch::matrix::BatchDense::create( + this->exec, gko::batch_dim<2>> {2, gko::dim<2>{5, 3}}); + + ASSERT_EQ(m->get_num_batch_entries(), 2); + ASSERT_EQ(m->get_common_size(), gko::dim<2>(5, 3)); + ASSERT_EQ(m->get_num_stored_elements(), 30); } @@ -152,23 +185,27 @@ TYPED_TEST(BatchDense, CanBeConstructedFromExistingData) using size_type = gko::size_type; // clang-format off value_type data[] = { - 1.0, 2.0, -1.0, - 3.0, 4.0, -1.0, - 3.0, 5.0, 1.0, - 5.0, 6.0, -3.0}; + 1.0, 2.0, + -1.0, 3.0, + 4.0, -1.0, + 3.0, 5.0, + 1.0, 5.0, + 6.0, -3.0}; // clang-format on - auto m = gko::matrix::BatchDense::create( - this->exec, - std::vector>{gko::dim<2>{2, 2}, gko::dim<2>{2, 2}}, - gko::array::view(this->exec, 12, data), - std::vector{3, 3}); + auto m = gko::batch::matrix::BatchDense::create( + this->exec, gko::batch_dim<2>(2, gko::dim<2>(2, 2)), + gko::array::view(this->exec, 8, data)); ASSERT_EQ(m->get_const_values(), data); + ASSERT_EQ(m->at(0, 0, 0), value_type{1.0}); ASSERT_EQ(m->at(0, 0, 1), value_type{2.0}); - ASSERT_EQ(m->at(0, 1, 2), value_type{-1.0}); - ASSERT_EQ(m->at(1, 0, 1), value_type{5.0}); - ASSERT_EQ(m->at(1, 1, 2), value_type{-3.0}); + ASSERT_EQ(m->at(0, 1, 0), value_type{-1.0}); + ASSERT_EQ(m->at(0, 1, 1), value_type{3.0}); + ASSERT_EQ(m->at(1, 0, 0), value_type{4.0}); + ASSERT_EQ(m->at(1, 0, 1), value_type{-1.0}); + ASSERT_EQ(m->at(1, 1, 0), value_type{3.0}); + ASSERT_EQ(m->at(1, 1, 1), value_type{5.0}); } @@ -178,23 +215,27 @@ TYPED_TEST(BatchDense, CanBeConstructedFromExistingConstData) using size_type = gko::size_type; // clang-format off const value_type data[] = { - 1.0, 2.0, -1.0, - 3.0, 4.0, -1.0, - 3.0, 5.0, 1.0, - 5.0, 6.0, -3.0}; + 1.0, 2.0, + -1.0, 3.0, + 4.0, -1.0, + 3.0, 5.0, + 1.0, 5.0, + 6.0, -3.0}; // clang-format on auto m = gko::matrix::BatchDense::create_const( - this->exec, - std::vector>{gko::dim<2>{2, 2}, gko::dim<2>{2, 2}}, - gko::array::const_view(this->exec, 12, data), - std::vector{3, 3}); + this->exec, gko::batch_dim<2>(2, gko::dim<2>(2, 2)), + gko::array::const_view(this->exec, 8, data)); ASSERT_EQ(m->get_const_values(), data); + ASSERT_EQ(m->at(0, 0, 0), value_type{1.0}); ASSERT_EQ(m->at(0, 0, 1), value_type{2.0}); - ASSERT_EQ(m->at(0, 1, 2), value_type{-1.0}); - ASSERT_EQ(m->at(1, 0, 1), value_type{5.0}); - ASSERT_EQ(m->at(1, 1, 2), value_type{-3.0}); + ASSERT_EQ(m->at(0, 1, 0), value_type{-1.0}); + ASSERT_EQ(m->at(0, 1, 1), value_type{3.0}); + ASSERT_EQ(m->at(1, 0, 0), value_type{4.0}); + ASSERT_EQ(m->at(1, 0, 1), value_type{-1.0}); + ASSERT_EQ(m->at(1, 1, 0), value_type{3.0}); + ASSERT_EQ(m->at(1, 1, 1), value_type{5.0}); } @@ -203,20 +244,15 @@ TYPED_TEST(BatchDense, CanBeConstructedFromBatchDenseMatrices) using value_type = typename TestFixture::value_type; using DenseMtx = typename TestFixture::DenseMtx; using size_type = gko::size_type; - auto mat1 = gko::initialize( - 3, {{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, this->exec); + auto mat1 = gko::initialize({{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, + this->exec); auto mat2 = gko::initialize({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, this->exec); - auto m = gko::matrix::BatchDense::create( + auto m = gko::batch::multivector::create_from_dense( this->exec, std::vector{mat1.get(), mat2.get()}); - auto m_ref = gko::matrix::BatchDense::create( - this->exec, std::vector{mat1.get(), mat2.get(), mat1.get(), - mat2.get(), mat1.get(), mat2.get()}); - auto m2 = - gko::matrix::BatchDense::create(this->exec, 3, m.get()); - GKO_ASSERT_BATCH_MTX_NEAR(m2.get(), m_ref.get(), 1e-14); + this->assert_equal_to_original_mtx(m.get()); } @@ -297,19 +333,6 @@ TYPED_TEST(BatchDense, CanBeListConstructed) } -TYPED_TEST(BatchDense, CanBeListConstructedWithstride) -{ - using value_type = typename TestFixture::value_type; - auto m = gko::batch_initialize>( - std::vector{2}, {{1.0, 2.0}}, this->exec); - ASSERT_EQ(m->get_num_batch_entries(), 1); - ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 1)); - ASSERT_EQ(m->get_num_stored_elements(), 4); - EXPECT_EQ(m->at(0, 0), value_type{1.0}); - EXPECT_EQ(m->at(0, 1), value_type{2.0}); -} - - TYPED_TEST(BatchDense, CanBeListConstructedByCopies) { using value_type = typename TestFixture::value_type; @@ -385,40 +408,6 @@ TYPED_TEST(BatchDense, CanBeDoubleListConstructedWithstride) } -TYPED_TEST(BatchDense, CanBeCopied) -{ - auto mtx_copy = gko::matrix::BatchDense::create(this->exec); - mtx_copy->copy_from(this->mtx.get()); - this->assert_equal_to_original_mtx(this->mtx.get()); - this->mtx->at(0, 0, 0) = 7; - this->mtx->at(0, 1) = 7; - this->assert_equal_to_original_mtx(mtx_copy.get()); -} - - -TYPED_TEST(BatchDense, CanBeMoved) -{ - auto mtx_copy = gko::matrix::BatchDense::create(this->exec); - mtx_copy->copy_from(std::move(this->mtx)); - this->assert_equal_to_original_mtx(mtx_copy.get()); -} - - -TYPED_TEST(BatchDense, CanBeCloned) -{ - auto mtx_clone = this->mtx->clone(); - this->assert_equal_to_original_mtx( - dynamic_castmtx.get())>(mtx_clone.get())); -} - - -TYPED_TEST(BatchDense, CanBeCleared) -{ - this->mtx->clear(); - this->assert_empty(this->mtx.get()); -} - - TYPED_TEST(BatchDense, CanBeReadFromMatrixData) { using value_type = typename TestFixture::value_type; @@ -515,6 +504,3 @@ TYPED_TEST(BatchDense, CanBeReadFromMatrixAssemblyData) EXPECT_EQ(m->at(1, 0, 0), value_type{2.0}); EXPECT_EQ(m->at(1, 1, 0), value_type{5.0}); } - - -} // namespace diff --git a/include/ginkgo/core/base/batch_multi_vector.hpp b/include/ginkgo/core/base/batch_multi_vector.hpp index d91274526d3..43f35e55f62 100644 --- a/include/ginkgo/core/base/batch_multi_vector.hpp +++ b/include/ginkgo/core/base/batch_multi_vector.hpp @@ -52,6 +52,15 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace gko { namespace batch { +namespace matrix { + + +template +class BatchDense; + + +} + /** * MultiVector stores multiple vectors in a batched fashion and is useful @@ -81,21 +90,25 @@ class MultiVector : public EnablePolymorphicObject>, public EnablePolymorphicAssignment>, public EnableCreateMethod>, - public ConvertibleTo>> { + public ConvertibleTo>>, + public ConvertibleTo> { friend class EnableCreateMethod; friend class EnablePolymorphicObject; friend class MultiVector>; friend class MultiVector>; + friend class matrix::BatchDense; public: using EnablePolymorphicAssignment::convert_to; using EnablePolymorphicAssignment::move_to; using ConvertibleTo>>::convert_to; using ConvertibleTo>>::move_to; + using ConvertibleTo>::convert_to; + using ConvertibleTo>::move_to; using value_type = ValueType; using index_type = int32; - using unbatch_type = matrix::Dense; + using unbatch_type = gko::matrix::Dense; using absolute_type = remove_complex>; using complex_type = to_complex>; @@ -113,6 +126,10 @@ class MultiVector void move_to(MultiVector>* result) override; + void convert_to(matrix::BatchDense* result) const override; + + void move_to(matrix::BatchDense* result) override; + /** * Creates a mutable view (of matrix::Dense type) of one item of the Batch * MultiVector object. Does not perform any deep copies, but only returns a @@ -196,8 +213,8 @@ class MultiVector * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const value_type* get_const_values_for_item( - size_type batch_id) const noexcept + const value_type* get_const_values_for_item(size_type batch_id) const + noexcept { GKO_ASSERT(batch_id < this->get_num_batch_items()); return values_.get_const_data() + diff --git a/include/ginkgo/core/matrix/batch_dense.hpp b/include/ginkgo/core/matrix/batch_dense.hpp index 47aff35b7e7..1b36cd64869 100644 --- a/include/ginkgo/core/matrix/batch_dense.hpp +++ b/include/ginkgo/core/matrix/batch_dense.hpp @@ -97,14 +97,7 @@ class BatchDense : public EnableBatchLinOp>, * @param other The other matrix whose configuration needs to copied. */ static std::unique_ptr create_with_config_of( - const BatchDense* other) - { - // De-referencing `other` before calling the functions (instead of - // using operator `->`) is currently required to be compatible with - // CUDA 10.1. - // Otherwise, it results in a compile error. - return (*other).create_with_same_config(); - } + ptr_param other); void convert_to( BatchDense>* result) const override; @@ -228,7 +221,7 @@ class BatchDense : public EnableBatchLinOp>, * array (if it resides on the same executor as the vector) or a copy of the * array on the correct executor. */ - static std::unique_ptr> create_const( + static std::unique_ptr> create_const( std::shared_ptr exec, const batch_dim<2>& sizes, gko::detail::const_array_view&& values); From 4053a9521bea2538677738cb6530ad07c5ddc68e Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Tue, 3 Oct 2023 14:09:59 +0200 Subject: [PATCH 05/28] Add tests for BatchDense core --- core/matrix/batch_dense.cpp | 7 - core/test/base/batch_multi_vector.cpp | 2 +- core/test/matrix/CMakeLists.txt | 3 +- core/test/matrix/batch_dense.cpp | 231 +++++++++------------ core/test/utils/assertions.hpp | 8 +- include/ginkgo/core/matrix/batch_dense.hpp | 93 +++++---- 6 files changed, 159 insertions(+), 185 deletions(-) diff --git a/core/matrix/batch_dense.cpp b/core/matrix/batch_dense.cpp index 9f72a26c488..f5d255d901c 100644 --- a/core/matrix/batch_dense.cpp +++ b/core/matrix/batch_dense.cpp @@ -127,13 +127,6 @@ BatchDense::create_with_config_of( } -template -void BatchDense::set_size(const batch_dim<2>& value) noexcept -{ - batch_size_ = value; -} - - template std::unique_ptr> BatchDense::create_with_same_config() const diff --git a/core/test/base/batch_multi_vector.cpp b/core/test/base/batch_multi_vector.cpp index 7bdaec30b27..8390a6c4327 100644 --- a/core/test/base/batch_multi_vector.cpp +++ b/core/test/base/batch_multi_vector.cpp @@ -412,9 +412,9 @@ TYPED_TEST(MultiVector, CanBeReadFromMatrixData) auto m = gko::batch::read>(this->exec, vec_data); - EXPECT_EQ(m->at(0, 0, 0), value_type{1.0}); ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 2)); + EXPECT_EQ(m->at(0, 0, 0), value_type{1.0}); EXPECT_EQ(m->at(0, 0, 1), value_type{3.0}); EXPECT_EQ(m->at(0, 1, 0), value_type{0.0}); EXPECT_EQ(m->at(0, 1, 1), value_type{5.0}); diff --git a/core/test/matrix/CMakeLists.txt b/core/test/matrix/CMakeLists.txt index 57c2c97e355..cca4b8da1c0 100644 --- a/core/test/matrix/CMakeLists.txt +++ b/core/test/matrix/CMakeLists.txt @@ -1,5 +1,4 @@ -# ginkgo_create_test(batch_dense) -# +ginkgo_create_test(batch_dense) ginkgo_create_test(coo) ginkgo_create_test(coo_builder) ginkgo_create_test(csr) diff --git a/core/test/matrix/batch_dense.cpp b/core/test/matrix/batch_dense.cpp index a1ebdb1061c..f9210550bea 100644 --- a/core/test/matrix/batch_dense.cpp +++ b/core/test/matrix/batch_dense.cpp @@ -36,12 +36,15 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include #include #include +#include "core/base/batch_utilities.hpp" #include "core/test/utils.hpp" +#include "core/test/utils/batch_helpers.hpp" template @@ -63,11 +66,11 @@ class BatchDense : public ::testing::Test { static void assert_equal_to_original_mtx( - gko::matrix::BatchDense* m) + gko::batch::matrix::BatchDense* m) { - ASSERT_EQ(m->get_num_batch_entries(), 2); + ASSERT_EQ(m->get_num_batch_items(), 2); ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 3)); - ASSERT_EQ(m->get_num_stored_elements(), 2 * (2 * 4)); + ASSERT_EQ(m->get_num_stored_elements(), 2 * (2 * 3)); EXPECT_EQ(m->at(0, 0, 0), value_type{-1.0}); EXPECT_EQ(m->at(0, 0, 1), value_type{2.0}); EXPECT_EQ(m->at(0, 0, 2), value_type{3.0}); @@ -82,19 +85,26 @@ class BatchDense : public ::testing::Test { ASSERT_EQ(m->at(1, 1, 2), value_type{3.0}); } - static void assert_empty(gko::matrix::BatchDense* m) + static void assert_empty(gko::batch::matrix::BatchDense* m) { - ASSERT_EQ(m->get_num_batch_entries(), 0); + ASSERT_EQ(m->get_num_batch_items(), 0); ASSERT_EQ(m->get_num_stored_elements(), 0); } std::shared_ptr exec; std::unique_ptr> mtx; + std::unique_ptr> dense_mtx; }; TYPED_TEST_SUITE(BatchDense, gko::test::ValueTypes); +TYPED_TEST(BatchDense, KnowsItsSizeAndValues) +{ + this->assert_equal_to_original_mtx(this->mtx.get()); +} + + TYPED_TEST(BatchDense, CanBeEmpty) { auto empty = gko::batch::matrix::BatchDense::create(this->exec); @@ -171,9 +181,9 @@ TYPED_TEST(BatchDense, CanBeConstructedWithSize) using size_type = gko::size_type; auto m = gko::batch::matrix::BatchDense::create( - this->exec, gko::batch_dim<2>> {2, gko::dim<2>{5, 3}}); + this->exec, gko::batch_dim<2>(2, gko::dim<2>{5, 3})); - ASSERT_EQ(m->get_num_batch_entries(), 2); + ASSERT_EQ(m->get_num_batch_items(), 2); ASSERT_EQ(m->get_common_size(), gko::dim<2>(5, 3)); ASSERT_EQ(m->get_num_stored_elements(), 30); } @@ -223,7 +233,7 @@ TYPED_TEST(BatchDense, CanBeConstructedFromExistingConstData) 6.0, -3.0}; // clang-format on - auto m = gko::matrix::BatchDense::create_const( + auto m = gko::batch::matrix::BatchDense::create_const( this->exec, gko::batch_dim<2>(2, gko::dim<2>(2, 2)), gko::array::const_view(this->exec, 8, data)); @@ -239,17 +249,19 @@ TYPED_TEST(BatchDense, CanBeConstructedFromExistingConstData) } -TYPED_TEST(BatchDense, CanBeConstructedFromBatchDenseMatrices) +TYPED_TEST(BatchDense, CanBeConstructedFromDenseMatrices) { using value_type = typename TestFixture::value_type; using DenseMtx = typename TestFixture::DenseMtx; using size_type = gko::size_type; + auto mat1 = gko::initialize({{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, this->exec); auto mat2 = gko::initialize({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, this->exec); - auto m = gko::batch::multivector::create_from_dense( + auto m = gko::batch::create_from_item< + gko::batch::matrix::BatchDense>( this->exec, std::vector{mat1.get(), mat2.get()}); this->assert_equal_to_original_mtx(m.get()); @@ -261,34 +273,45 @@ TYPED_TEST(BatchDense, CanBeConstructedFromDenseMatricesByDuplication) using value_type = typename TestFixture::value_type; using DenseMtx = typename TestFixture::DenseMtx; using size_type = gko::size_type; + auto mat1 = gko::initialize( 4, {{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, this->exec); auto mat2 = gko::initialize({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, this->exec); - auto bat_m = gko::matrix::BatchDense::create( + auto bat_m = gko::batch::create_from_item< + gko::batch::matrix::BatchDense>( this->exec, std::vector{mat1.get(), mat1.get(), mat1.get()}); - auto m = - gko::matrix::BatchDense::create(this->exec, 3, mat1.get()); + auto m = gko::batch::create_from_item< + gko::batch::matrix::BatchDense>(this->exec, 3, mat1.get()); GKO_ASSERT_BATCH_MTX_NEAR(bat_m.get(), m.get(), 1e-14); } -TYPED_TEST(BatchDense, CanBeConstructedFromDenseMatrices) +TYPED_TEST(BatchDense, CanBeConstructedByDuplicatingBatchDenseMatrices) { using value_type = typename TestFixture::value_type; using DenseMtx = typename TestFixture::DenseMtx; using size_type = gko::size_type; - auto mat1 = gko::initialize( - 4, {{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, this->exec); + + auto mat1 = gko::initialize({{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, + this->exec); auto mat2 = gko::initialize({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, this->exec); - auto m = gko::matrix::BatchDense::create( + auto m = gko::batch::create_from_item< + gko::batch::matrix::BatchDense>( this->exec, std::vector{mat1.get(), mat2.get()}); + auto m_ref = gko::batch::create_from_item< + gko::batch::matrix::BatchDense>( + this->exec, std::vector{mat1.get(), mat2.get(), mat1.get(), + mat2.get(), mat1.get(), mat2.get()}); - this->assert_equal_to_original_mtx(m.get()); + auto m2 = gko::batch::duplicate>( + this->exec, 3, m.get()); + + GKO_ASSERT_BATCH_MTX_NEAR(m2.get(), m_ref.get(), 1e-14); } @@ -302,30 +325,23 @@ TYPED_TEST(BatchDense, CanBeUnbatchedIntoDenseMatrices) auto mat2 = gko::initialize({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, this->exec); - auto dense_mats = this->mtx->unbatch(); - + auto dense_mats = + gko::batch::unbatch>( + this->mtx.get()); GKO_ASSERT_MTX_NEAR(dense_mats[0].get(), mat1.get(), 0.); GKO_ASSERT_MTX_NEAR(dense_mats[1].get(), mat2.get(), 0.); } -TYPED_TEST(BatchDense, KnowsItsSizeAndValues) -{ - this->assert_equal_to_original_mtx(this->mtx.get()); -} - - TYPED_TEST(BatchDense, CanBeListConstructed) { using value_type = typename TestFixture::value_type; - auto m = gko::batch_initialize>( + auto m = gko::batch::initialize>( {{1.0, 2.0}, {1.0, 3.0}}, this->exec); - ASSERT_EQ(m->get_num_batch_entries(), 2); - ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 1)); - ASSERT_EQ(m->get_size().at(1), gko::dim<2>(2, 1)); - ASSERT_EQ(m->get_num_stored_elements(), 4); + ASSERT_EQ(m->get_num_batch_items(), 2); + ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 1)); EXPECT_EQ(m->at(0, 0), value_type{1}); EXPECT_EQ(m->at(0, 1), value_type{2}); EXPECT_EQ(m->at(1, 0), value_type{1}); @@ -336,12 +352,12 @@ TYPED_TEST(BatchDense, CanBeListConstructed) TYPED_TEST(BatchDense, CanBeListConstructedByCopies) { using value_type = typename TestFixture::value_type; - auto m = gko::batch_initialize>( + + auto m = gko::batch::initialize>( 2, I({1.0, 2.0}), this->exec); - ASSERT_EQ(m->get_num_batch_entries(), 2); - ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 1)); - ASSERT_EQ(m->get_size().at(1), gko::dim<2>(2, 1)); - ASSERT_EQ(m->get_num_stored_elements(), 4); + + ASSERT_EQ(m->get_num_batch_items(), 2); + ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 1)); EXPECT_EQ(m->at(0, 0, 0), value_type{1.0}); EXPECT_EQ(m->at(0, 0, 1), value_type{2.0}); EXPECT_EQ(m->at(1, 0, 0), value_type{1.0}); @@ -353,18 +369,13 @@ TYPED_TEST(BatchDense, CanBeDoubleListConstructed) { using value_type = typename TestFixture::value_type; using T = value_type; - auto m = gko::batch_initialize>( + + auto m = gko::batch::initialize>( {{I{1.0, 1.0, 0.0}, I{2.0, 4.0, 3.0}, I{3.0, 6.0, 1.0}}, - {I{1.0, 2.0}, I{3.0, 4.0}, I{5.0, 6.0}}}, + {I{1.0, 2.0, -1.0}, I{3.0, 4.0, -2.0}, I{5.0, 6.0, -3.0}}}, this->exec); - ASSERT_EQ(m->get_size().at(0), gko::dim<2>(3, 3)); - ASSERT_EQ(m->get_size().at(1), gko::dim<2>(3, 2)); - ASSERT_EQ(m->get_stride().at(0), 3); - ASSERT_EQ(m->get_stride().at(1), 2); - EXPECT_EQ(m->get_num_stored_elements(), 15); - ASSERT_EQ(m->get_num_stored_elements(0), 9); - ASSERT_EQ(m->get_num_stored_elements(1), 6); + ASSERT_EQ(m->get_common_size(), gko::dim<2>(3, 3)); EXPECT_EQ(m->at(0, 0), value_type{1.0}); EXPECT_EQ(m->at(0, 1), value_type{1.0}); EXPECT_EQ(m->at(0, 2), value_type{0.0}); @@ -372,72 +383,58 @@ TYPED_TEST(BatchDense, CanBeDoubleListConstructed) EXPECT_EQ(m->at(0, 4), value_type{4.0}); EXPECT_EQ(m->at(1, 0), value_type{1.0}); EXPECT_EQ(m->at(1, 1), value_type{2.0}); - EXPECT_EQ(m->at(1, 2), value_type{3.0}); - ASSERT_EQ(m->at(1, 3), value_type{4.0}); - EXPECT_EQ(m->at(1, 4), value_type{5.0}); + EXPECT_EQ(m->at(1, 2), value_type{-1.0}); + ASSERT_EQ(m->at(1, 3), value_type{3.0}); + EXPECT_EQ(m->at(1, 4), value_type{4.0}); } -TYPED_TEST(BatchDense, CanBeDoubleListConstructedWithstride) +TYPED_TEST(BatchDense, CanBeReadFromMatrixData) { using value_type = typename TestFixture::value_type; - using T = value_type; - auto m = gko::batch_initialize>( - {4, 3}, - {{I{1.0, 1.0, 0.0}, I{2.0, 4.0, 3.0}, I{3.0, 6.0, 1.0}}, - {I{1.0, 2.0}, I{3.0, 4.0}, I{5.0, 6.0}}}, - this->exec); + using index_type = int; - ASSERT_EQ(m->get_size().at(0), gko::dim<2>(3, 3)); - ASSERT_EQ(m->get_size().at(1), gko::dim<2>(3, 2)); - ASSERT_EQ(m->get_stride().at(0), 4); - ASSERT_EQ(m->get_stride().at(1), 3); - EXPECT_EQ(m->get_num_stored_elements(), 21); - ASSERT_EQ(m->get_num_stored_elements(0), 12); - ASSERT_EQ(m->get_num_stored_elements(1), 9); - EXPECT_EQ(m->at(0, 0), value_type{1.0}); - EXPECT_EQ(m->at(0, 1), value_type{1.0}); - EXPECT_EQ(m->at(0, 2), value_type{0.0}); - ASSERT_EQ(m->at(0, 3), value_type{2.0}); - EXPECT_EQ(m->at(0, 4), value_type{4.0}); - EXPECT_EQ(m->at(1, 0), value_type{1.0}); - EXPECT_EQ(m->at(1, 1), value_type{2.0}); - EXPECT_EQ(m->at(1, 2), value_type{3.0}); - ASSERT_EQ(m->at(1, 3), value_type{4.0}); - EXPECT_EQ(m->at(1, 4), value_type{5.0}); + auto vec_data = std::vector>{}; + vec_data.emplace_back(gko::matrix_data( + {2, 2}, {{0, 0, 1.0}, {0, 1, 3.0}, {1, 0, 0.0}, {1, 1, 5.0}})); + vec_data.emplace_back(gko::matrix_data( + {2, 2}, {{0, 0, -1.0}, {0, 1, 0.5}, {1, 0, 0.0}, {1, 1, 9.0}})); + + auto m = gko::batch::read>( + this->exec, vec_data); + + ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 2)); + EXPECT_EQ(m->at(0, 0, 0), value_type{1.0}); + EXPECT_EQ(m->at(0, 0, 1), value_type{3.0}); + EXPECT_EQ(m->at(0, 1, 0), value_type{0.0}); + EXPECT_EQ(m->at(0, 1, 1), value_type{5.0}); + EXPECT_EQ(m->at(1, 0, 0), value_type{-1.0}); + EXPECT_EQ(m->at(1, 0, 1), value_type{0.5}); + EXPECT_EQ(m->at(1, 1, 0), value_type{0.0}); + EXPECT_EQ(m->at(1, 1, 1), value_type{9.0}); } -TYPED_TEST(BatchDense, CanBeReadFromMatrixData) +TYPED_TEST(BatchDense, CanBeReadFromSparseMatrixData) { using value_type = typename TestFixture::value_type; - auto m = gko::matrix::BatchDense::create(this->exec); - // clang-format off - m->read({gko::matrix_data{{2, 3}, - {{0, 0, 1.0}, - {0, 1, 3.0}, - {0, 2, 2.0}, - {1, 0, 0.0}, - {1, 1, 5.0}, - {1, 2, 0.0}}}, - gko::matrix_data{{2, 2}, - {{0, 0, -1.0}, - {0, 1, 0.5}, - {1, 0, 0.0}, - {1, 1, 9.0}}}}); - // clang-format on - - ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 3)); - ASSERT_EQ(m->get_size().at(1), gko::dim<2>(2, 2)); - ASSERT_EQ(m->get_num_stored_elements(), 10); - ASSERT_EQ(m->get_num_stored_elements(0), 6); - ASSERT_EQ(m->get_num_stored_elements(1), 4); + using index_type = int; + auto vec_data = std::vector>{}; + vec_data.emplace_back(gko::matrix_data( + {2, 2}, {{0, 0, 1.0}, {0, 1, 3.0}, {1, 1, 5.0}})); + vec_data.emplace_back(gko::matrix_data( + {2, 2}, {{0, 0, -1.0}, {0, 1, 0.5}, {1, 1, 9.0}})); + + auto m = gko::batch::read>( + this->exec, vec_data); + + ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 2)); EXPECT_EQ(m->at(0, 0, 0), value_type{1.0}); - EXPECT_EQ(m->at(0, 1, 0), value_type{0.0}); EXPECT_EQ(m->at(0, 0, 1), value_type{3.0}); + EXPECT_EQ(m->at(0, 1, 0), value_type{0.0}); EXPECT_EQ(m->at(0, 1, 1), value_type{5.0}); - EXPECT_EQ(m->at(0, 0, 2), value_type{2.0}); - EXPECT_EQ(m->at(0, 1, 2), value_type{0.0}); EXPECT_EQ(m->at(1, 0, 0), value_type{-1.0}); EXPECT_EQ(m->at(1, 0, 1), value_type{0.5}); EXPECT_EQ(m->at(1, 1, 0), value_type{0.0}); @@ -448,10 +445,12 @@ TYPED_TEST(BatchDense, CanBeReadFromMatrixData) TYPED_TEST(BatchDense, GeneratesCorrectMatrixData) { using value_type = typename TestFixture::value_type; + using index_type = int; using tpl = typename gko::matrix_data::nonzero_type; - std::vector> data; - this->mtx->write(data); + auto data = gko::batch::write>( + this->mtx.get()); ASSERT_EQ(data[0].size, gko::dim<2>(2, 3)); ASSERT_EQ(data[0].nonzeros.size(), 6); @@ -470,37 +469,3 @@ TYPED_TEST(BatchDense, GeneratesCorrectMatrixData) EXPECT_EQ(data[1].nonzeros[4], tpl(1, 1, value_type{2.0})); EXPECT_EQ(data[1].nonzeros[5], tpl(1, 2, value_type{3.0})); } - - -TYPED_TEST(BatchDense, CanBeReadFromMatrixAssemblyData) -{ - using value_type = typename TestFixture::value_type; - auto m = gko::matrix::BatchDense::create(this->exec); - gko::matrix_assembly_data data1(gko::dim<2>{2, 3}); - data1.set_value(0, 0, 1.0); - data1.set_value(0, 1, 3.0); - data1.set_value(0, 2, 2.0); - data1.set_value(1, 0, 0.0); - data1.set_value(1, 1, 5.0); - data1.set_value(1, 2, 0.0); - gko::matrix_assembly_data data2(gko::dim<2>{2, 1}); - data2.set_value(0, 0, 2.0); - data2.set_value(1, 0, 5.0); - auto data = std::vector>{data1, data2}; - - m->read(data); - - ASSERT_EQ(m->get_size().at(0), gko::dim<2>(2, 3)); - ASSERT_EQ(m->get_size().at(1), gko::dim<2>(2, 1)); - ASSERT_EQ(m->get_num_stored_elements(), 8); - ASSERT_EQ(m->get_num_stored_elements(0), 6); - ASSERT_EQ(m->get_num_stored_elements(1), 2); - EXPECT_EQ(m->at(0, 0, 0), value_type{1.0}); - EXPECT_EQ(m->at(0, 1, 0), value_type{0.0}); - EXPECT_EQ(m->at(0, 0, 1), value_type{3.0}); - EXPECT_EQ(m->at(0, 1, 1), value_type{5.0}); - EXPECT_EQ(m->at(0, 0, 2), value_type{2.0}); - ASSERT_EQ(m->at(0, 1, 2), value_type{0.0}); - EXPECT_EQ(m->at(1, 0, 0), value_type{2.0}); - EXPECT_EQ(m->at(1, 1, 0), value_type{5.0}); -} diff --git a/core/test/utils/assertions.hpp b/core/test/utils/assertions.hpp index 63ed1e5423a..40034883078 100644 --- a/core/test/utils/assertions.hpp +++ b/core/test/utils/assertions.hpp @@ -720,12 +720,8 @@ ::testing::AssertionResult batch_matrices_near( using value_type1 = typename Mat1::value_type; using value_type2 = typename Mat2::value_type; - auto first_data = - gko::batch::write>(first); - auto second_data = - gko::batch::write>(second); + auto first_data = gko::batch::write(first); + auto second_data = gko::batch::write(second); if (first_data.size() != second_data.size()) { return ::testing::AssertionFailure() diff --git a/include/ginkgo/core/matrix/batch_dense.hpp b/include/ginkgo/core/matrix/batch_dense.hpp index 1b36cd64869..55a1791a2a5 100644 --- a/include/ginkgo/core/matrix/batch_dense.hpp +++ b/include/ginkgo/core/matrix/batch_dense.hpp @@ -124,46 +124,75 @@ class BatchDense : public EnableBatchLinOp>, size_type item_id) const; /** - * Returns the batch size. + * Returns a pointer to the array of values of the multi-vector * - * @return the batch size + * @return the pointer to the array of values */ - batch_dim<2> get_size() const { return batch_size_; } + value_type* get_values() noexcept { return values_.get_data(); } /** - * Returns the number of batch items. + * @copydoc get_values() * - * @return the number of batch items + * @note This is the constant version of the function, which can be + * significantly more memory efficient than the non-constant version, + * so always prefer this version. */ - size_type get_num_batch_items() const + const value_type* get_const_values() const noexcept { - return batch_size_.get_num_batch_items(); + return values_.get_const_data(); } /** - * Returns the common size of the batch items. + * Returns a single element for a particular batch item. + * + * @param batch_id the batch item index to be queried + * @param row the row of the requested element + * @param col the column of the requested element * - * @return the common size stored + * @note the method has to be called on the same Executor the vector is + * stored at (e.g. trying to call this method on a GPU multi-vector + * from the OMP results in a runtime error) */ - dim<2> get_common_size() const { return batch_size_.get_common_size(); } + value_type& at(size_type batch_id, size_type row, size_type col) + { + GKO_ASSERT(batch_id < this->get_num_batch_items()); + return values_.get_data()[linearize_index(batch_id, row, col)]; + } /** - * Returns a pointer to the array of values of the multi-vector - * - * @return the pointer to the array of values + * @copydoc MultiVector::at(size_type, size_type, size_type) */ - value_type* get_values() noexcept { return values_.get_data(); } + value_type at(size_type batch_id, size_type row, size_type col) const + { + GKO_ASSERT(batch_id < this->get_num_batch_items()); + return values_.get_const_data()[linearize_index(batch_id, row, col)]; + } /** - * @copydoc get_values() + * Returns a single element for a particular batch item. * - * @note This is the constant version of the function, which can be - * significantly more memory efficient than the non-constant version, - * so always prefer this version. + * Useful for iterating across all elements of the vector. + * However, it is less efficient than the two-parameter variant of this + * method. + * + * @param batch_id the batch item index to be queried + * @param idx a linear index of the requested element + * + * @note the method has to be called on the same Executor the vector is + * stored at (e.g. trying to call this method on a GPU multi-vector + * from the OMP results in a runtime error) */ - const value_type* get_const_values() const noexcept + ValueType& at(size_type batch_id, size_type idx) noexcept { - return values_.get_const_data(); + return values_.get_data()[linearize_index(batch_id, idx)]; + } + + /** + * @copydoc MultiVector::at(size_type, size_type, size_type) + */ + ValueType at(size_type batch_id, size_type idx) const noexcept + { + return values_.get_const_data()[linearize_index(batch_id, idx)]; } /** @@ -225,12 +254,6 @@ class BatchDense : public EnableBatchLinOp>, std::shared_ptr exec, const batch_dim<2>& sizes, gko::detail::const_array_view&& values); -private: - inline size_type compute_num_elems(const batch_dim<2>& size) - { - return size.get_cumulative_offset(size.get_num_batch_items()); - } - void apply(const MultiVector* b, MultiVector* x) const @@ -246,14 +269,13 @@ class BatchDense : public EnableBatchLinOp>, this->apply_impl(alpha, b, beta, x); } -protected: - /** - * Sets the size of the MultiVector. - * - * @param value the new size of the operator - */ - void set_size(const batch_dim<2>& value) noexcept; +private: + inline size_type compute_num_elems(const batch_dim<2>& size) + { + return size.get_cumulative_offset(size.get_num_batch_items()); + } +protected: /** * Creates an uninitialized BatchDense matrix of the specified size. * @@ -310,8 +332,8 @@ class BatchDense : public EnableBatchLinOp>, size_type linearize_index(size_type batch, size_type row, size_type col) const noexcept { - return batch_size_.get_cumulative_offset(batch) + - row * batch_size_.get_common_size()[1] + col; + return this->get_size().get_cumulative_offset(batch) + + row * this->get_size().get_common_size()[1] + col; } size_type linearize_index(size_type batch, size_type idx) const noexcept @@ -321,7 +343,6 @@ class BatchDense : public EnableBatchLinOp>, } private: - batch_dim<2> batch_size_; array values_; }; From e4928b2e60b8086acb351bf75f0a6efc9689f9c0 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Tue, 3 Oct 2023 16:18:47 +0200 Subject: [PATCH 06/28] Add reference kernel tests --- core/matrix/batch_dense.cpp | 18 +- reference/test/matrix/CMakeLists.txt | 1 + reference/test/matrix/batch_dense_kernels.cpp | 219 ++++++++++++++++++ 3 files changed, 230 insertions(+), 8 deletions(-) create mode 100644 reference/test/matrix/batch_dense_kernels.cpp diff --git a/core/matrix/batch_dense.cpp b/core/matrix/batch_dense.cpp index f5d255d901c..c9da010c228 100644 --- a/core/matrix/batch_dense.cpp +++ b/core/matrix/batch_dense.cpp @@ -168,11 +168,12 @@ template void BatchDense::apply_impl(const MultiVector* b, MultiVector* x) const { - GKO_ASSERT_EQUAL_DIMENSIONS(b->get_common_size(), x->get_common_size()); GKO_ASSERT_EQ(b->get_num_batch_items(), this->get_num_batch_items()); - GKO_ASSERT_CONFORMANT(this->get_common_size(), b->get_common_size()); GKO_ASSERT_EQ(this->get_num_batch_items(), x->get_num_batch_items()); - GKO_ASSERT_CONFORMANT(this->get_common_size(), x->get_common_size()); + + GKO_ASSERT_CONFORMANT(this->get_common_size(), b->get_common_size()); + GKO_ASSERT_EQUAL_ROWS(this->get_common_size(), x->get_common_size()); + GKO_ASSERT_EQUAL_COLS(b->get_common_size(), x->get_common_size()); this->get_executor()->run(dense::make_simple_apply(this, b, x)); } @@ -183,13 +184,14 @@ void BatchDense::apply_impl(const MultiVector* alpha, const MultiVector* beta, MultiVector* x) const { - GKO_ASSERT_EQUAL_DIMENSIONS(b->get_common_size(), x->get_common_size()); GKO_ASSERT_EQ(b->get_num_batch_items(), this->get_num_batch_items()); - GKO_ASSERT_CONFORMANT(this->get_common_size(), b->get_common_size()); GKO_ASSERT_EQ(this->get_num_batch_items(), x->get_num_batch_items()); - GKO_ASSERT_CONFORMANT(this->get_common_size(), x->get_common_size()); - GKO_ASSERT_EQUAL_COLS(alpha->get_common_size(), gko::dim<2>(1, 1)); - GKO_ASSERT_EQUAL_COLS(beta->get_common_size(), gko::dim<2>(1, 1)); + + GKO_ASSERT_CONFORMANT(this->get_common_size(), b->get_common_size()); + GKO_ASSERT_EQUAL_ROWS(this->get_common_size(), x->get_common_size()); + GKO_ASSERT_EQUAL_COLS(b->get_common_size(), x->get_common_size()); + GKO_ASSERT_EQUAL_DIMENSIONS(alpha->get_common_size(), gko::dim<2>(1, 1)); + GKO_ASSERT_EQUAL_DIMENSIONS(beta->get_common_size(), gko::dim<2>(1, 1)); this->get_executor()->run( dense::make_advanced_apply(alpha, this, b, beta, x)); } diff --git a/reference/test/matrix/CMakeLists.txt b/reference/test/matrix/CMakeLists.txt index 9670a5df80c..18634de662d 100644 --- a/reference/test/matrix/CMakeLists.txt +++ b/reference/test/matrix/CMakeLists.txt @@ -1,3 +1,4 @@ +ginkgo_create_test(batch_dense_kernels) ginkgo_create_test(coo_kernels) ginkgo_create_test(csr_kernels) ginkgo_create_test(dense_kernels) diff --git a/reference/test/matrix/batch_dense_kernels.cpp b/reference/test/matrix/batch_dense_kernels.cpp new file mode 100644 index 00000000000..7bf11ba70f9 --- /dev/null +++ b/reference/test/matrix/batch_dense_kernels.cpp @@ -0,0 +1,219 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include +#include +#include + + +#include + + +#include +#include +#include +#include +#include + + +#include "core/matrix/batch_dense_kernels.hpp" +#include "core/test/utils.hpp" + + +template +class BatchDense : public ::testing::Test { +protected: + using value_type = T; + using size_type = gko::size_type; + using Mtx = gko::batch::matrix::BatchDense; + using MVec = gko::batch::MultiVector; + using DenseMtx = gko::matrix::Dense; + using ComplexMtx = gko::to_complex; + using RealMtx = gko::remove_complex; + BatchDense() + : exec(gko::ReferenceExecutor::create()), + mtx_0(gko::batch::initialize( + {{I({1.0, -1.0, 1.5}), I({-2.0, 2.0, 3.0})}, + {{1.0, -2.0, -0.5}, {1.0, -2.5, 4.0}}}, + exec)), + mtx_00(gko::initialize( + {I({1.0, -1.0, 1.5}), I({-2.0, 2.0, 3.0})}, exec)), + mtx_01(gko::initialize( + {I({1.0, -2.0, -0.5}), I({1.0, -2.5, 4.0})}, exec)), + b_0(gko::batch::initialize( + {{I({1.0, 0.0, 1.0}), I({2.0, 0.0, 1.0}), + I({1.0, 0.0, 2.0})}, + {I({-1.0, 1.0, 1.0}), I({1.0, -1.0, 1.0}), + I({1.0, 0.0, 2.0})}}, + exec)), + b_00(gko::initialize( + {I({1.0, 0.0, 1.0}), I({2.0, 0.0, 1.0}), + I({1.0, 0.0, 2.0})}, + exec)), + b_01(gko::initialize( + {I({-1.0, 1.0, 1.0}), I({1.0, -1.0, 1.0}), + I({1.0, 0.0, 2.0})}, + exec)), + x_0(gko::batch::initialize( + {{I({2.0, 0.0, 1.0}), I({2.0, 0.0, 2.0})}, + {I({-2.0, 1.0, 1.0}), I({1.0, -1.0, -1.0})}}, + exec)), + x_00(gko::initialize( + {I({2.0, 0.0, 1.0}), I({2.0, 0.0, 2.0})}, exec)), + x_01(gko::initialize( + {I({-2.0, 1.0, 1.0}), I({1.0, -1.0, -1.0})}, exec)) + {} + + std::shared_ptr exec; + std::unique_ptr mtx_0; + std::unique_ptr mtx_00; + std::unique_ptr mtx_01; + std::unique_ptr b_0; + std::unique_ptr b_00; + std::unique_ptr b_01; + std::unique_ptr x_0; + std::unique_ptr x_00; + std::unique_ptr x_01; + + std::ranlux48 rand_engine; +}; + + +TYPED_TEST_SUITE(BatchDense, gko::test::ValueTypes); + + +TYPED_TEST(BatchDense, AppliesToBatchMultiVector) +{ + using T = typename TestFixture::value_type; + + this->mtx_0->apply(this->b_0.get(), this->x_0.get()); + this->mtx_00->apply(this->b_00.get(), this->x_00.get()); + this->mtx_01->apply(this->b_01.get(), this->x_01.get()); + + auto res = gko::batch::unbatch>(this->x_0.get()); + + GKO_ASSERT_MTX_NEAR(res[0].get(), this->x_00.get(), 0.); + GKO_ASSERT_MTX_NEAR(res[1].get(), this->x_01.get(), 0.); +} + + +TYPED_TEST(BatchDense, AppliesLinearCombinationToBatchMultiVector) +{ + using Mtx = typename TestFixture::Mtx; + using MVec = typename TestFixture::MVec; + using DenseMtx = typename TestFixture::DenseMtx; + using T = typename TestFixture::value_type; + auto alpha = gko::batch::initialize({{1.5}, {-1.0}}, this->exec); + auto beta = gko::batch::initialize({{2.5}, {-4.0}}, this->exec); + auto alpha0 = gko::initialize({1.5}, this->exec); + auto alpha1 = gko::initialize({-1.0}, this->exec); + auto beta0 = gko::initialize({2.5}, this->exec); + auto beta1 = gko::initialize({-4.0}, this->exec); + + this->mtx_0->apply(alpha.get(), this->b_0.get(), beta.get(), + this->x_0.get()); + this->mtx_00->apply(alpha0.get(), this->b_00.get(), beta0.get(), + this->x_00.get()); + this->mtx_01->apply(alpha1.get(), this->b_01.get(), beta1.get(), + this->x_01.get()); + + auto res = gko::batch::unbatch>(this->x_0.get()); + + GKO_ASSERT_MTX_NEAR(res[0].get(), this->x_00.get(), 0.); + GKO_ASSERT_MTX_NEAR(res[1].get(), this->x_01.get(), 0.); +} + + +TYPED_TEST(BatchDense, ApplyFailsOnWrongNumberOfResultCols) +{ + using MVec = typename TestFixture::MVec; + auto res = MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2}}); + + ASSERT_THROW(this->mtx_0->apply(this->b_0.get(), res.get()), + gko::DimensionMismatch); +} + + +TYPED_TEST(BatchDense, ApplyFailsOnWrongNumberOfResultRows) +{ + using MVec = typename TestFixture::MVec; + auto res = MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{3}}); + + ASSERT_THROW(this->mtx_0->apply(this->b_0.get(), res.get()), + gko::DimensionMismatch); +} + + +TYPED_TEST(BatchDense, ApplyFailsOnWrongInnerDimension) +{ + using MVec = typename TestFixture::MVec; + auto res = + MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2, 3}}); + + ASSERT_THROW(this->mtx_0->apply(res.get(), this->x_0.get()), + gko::DimensionMismatch); +} + + +TYPED_TEST(BatchDense, AdvancedApplyFailsOnWrongInnerDimension) +{ + using MVec = typename TestFixture::MVec; + auto res = + MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2, 3}}); + auto alpha = + MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{1, 1}}); + auto beta = + MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{1, 1}}); + + ASSERT_THROW( + this->mtx_0->apply(alpha.get(), res.get(), beta.get(), this->x_0.get()), + gko::DimensionMismatch); +} + + +TYPED_TEST(BatchDense, AdvancedApplyFailsOnWrongAlphaDimension) +{ + using MVec = typename TestFixture::MVec; + auto res = + MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{3, 3}}); + auto alpha = + MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2, 1}}); + auto beta = + MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{1, 1}}); + + ASSERT_THROW( + this->mtx_0->apply(alpha.get(), res.get(), beta.get(), this->x_0.get()), + gko::DimensionMismatch); +} From ecc7e5123906e0c3fa442c34a02fd4a967bfcfad Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Tue, 3 Oct 2023 17:56:16 +0200 Subject: [PATCH 07/28] Add OMP tests and fix kernel --- core/matrix/batch_struct.hpp | 14 +- cuda/matrix/batch_dense_kernels.cu | 2 +- hip/matrix/batch_dense_kernels.hip.cpp | 2 +- omp/matrix/batch_dense_kernels.cpp | 38 ++---- reference/matrix/batch_dense_kernels.cpp | 32 ++--- reference/matrix/batch_dense_kernels.hpp.inc | 4 +- reference/test/matrix/batch_dense_kernels.cpp | 27 ++++ test/matrix/CMakeLists.txt | 1 + test/matrix/batch_dense_kernels.cpp | 129 ++++++++++++++++++ 9 files changed, 188 insertions(+), 61 deletions(-) create mode 100644 test/matrix/batch_dense_kernels.cpp diff --git a/core/matrix/batch_struct.hpp b/core/matrix/batch_struct.hpp index b6926b0894d..37c297bb6b5 100644 --- a/core/matrix/batch_struct.hpp +++ b/core/matrix/batch_struct.hpp @@ -54,7 +54,7 @@ struct batch_item { ValueType* values; int stride; int num_rows; - int num_rhs; + int num_cols; }; @@ -70,7 +70,7 @@ struct uniform_batch { size_type num_batch_items; int stride; int num_rows; - int num_rhs; + int num_cols; size_type get_entry_storage() const { @@ -86,7 +86,7 @@ template GKO_ATTRIBUTES GKO_INLINE batch_dense::batch_item to_const( const batch_dense::batch_item& b) { - return {b.values, b.stride, b.num_rows, b.num_rhs}; + return {b.values, b.stride, b.num_rows, b.num_cols}; } @@ -94,7 +94,7 @@ template GKO_ATTRIBUTES GKO_INLINE batch_dense::uniform_batch to_const( const batch_dense::uniform_batch& ub) { - return {ub.values, ub.num_batch_items, ub.stride, ub.num_rows, ub.num_rhs}; + return {ub.values, ub.num_batch_items, ub.stride, ub.num_rows, ub.num_cols}; } @@ -104,16 +104,16 @@ GKO_ATTRIBUTES GKO_INLINE batch_dense::batch_item extract_batch_item( const size_type batch_idx) { return {batch.values + batch_idx * batch.stride * batch.num_rows, - batch.stride, batch.num_rows, batch.num_rhs}; + batch.stride, batch.num_rows, batch.num_cols}; } template GKO_ATTRIBUTES GKO_INLINE batch_dense::batch_item extract_batch_item( ValueType* const batch_values, const int stride, const int num_rows, - const int num_rhs, const size_type batch_idx) + const int num_cols, const size_type batch_idx) { return {batch_values + batch_idx * stride * num_rows, stride, num_rows, - num_rhs}; + num_cols}; } diff --git a/cuda/matrix/batch_dense_kernels.cu b/cuda/matrix/batch_dense_kernels.cu index 4615af581f5..c0a172fd026 100644 --- a/cuda/matrix/batch_dense_kernels.cu +++ b/cuda/matrix/batch_dense_kernels.cu @@ -45,7 +45,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "cuda/components/reduction.cuh" #include "cuda/components/thread_ids.cuh" #include "cuda/components/uninitialized_array.hpp" -#include "cuda/matrix/batch_struct.hpp" +// #include "cuda/matrix/batch_struct.hip.hpp" namespace gko { diff --git a/hip/matrix/batch_dense_kernels.hip.cpp b/hip/matrix/batch_dense_kernels.hip.cpp index 93570388d50..06f0caf81ec 100644 --- a/hip/matrix/batch_dense_kernels.hip.cpp +++ b/hip/matrix/batch_dense_kernels.hip.cpp @@ -48,7 +48,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "hip/components/reduction.hip.hpp" #include "hip/components/thread_ids.hip.hpp" #include "hip/components/uninitialized_array.hip.hpp" -#include "hip/matrix/batch_struct.hip.hpp" +// #include "hip/matrix/batch_struct.hip.hpp" namespace gko { diff --git a/omp/matrix/batch_dense_kernels.cpp b/omp/matrix/batch_dense_kernels.cpp index fe742bee402..a767215c844 100644 --- a/omp/matrix/batch_dense_kernels.cpp +++ b/omp/matrix/batch_dense_kernels.cpp @@ -42,6 +42,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/base/batch_struct.hpp" #include "core/matrix/batch_struct.hpp" +#include "reference/base/batch_struct.hpp" #include "reference/matrix/batch_struct.hpp" @@ -70,7 +71,7 @@ void simple_apply(std::shared_ptr exec, const auto mat_ub = host::get_batch_struct(mat); #pragma omp parallel for for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) { - const auto mat_item = batch::extract_batch_item(mat_ub, batch); + const auto mat_item = batch::matrix::extract_batch_item(mat_ub, batch); const auto b_item = batch::extract_batch_item(b_ub, batch); const auto x_item = batch::extract_batch_item(x_ub, batch); simple_apply_kernel(mat_item, b_item, x_item); @@ -84,40 +85,25 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( template void advanced_apply(std::shared_ptr exec, const batch::MultiVector* alpha, - const batch::matrix::BatchDense* a, + const batch::matrix::BatchDense* mat, const batch::MultiVector* b, const batch::MultiVector* beta, - batch::MultiVector* c) + batch::MultiVector* x) { const auto b_ub = host::get_batch_struct(b); const auto x_ub = host::get_batch_struct(x); const auto mat_ub = host::get_batch_struct(mat); const auto alpha_ub = host::get_batch_struct(alpha); const auto beta_ub = host::get_batch_struct(beta); - if (alpha->get_num_batch_items() > 1) { - GKO_ASSERT(alpha->get_num_batch_items() == x->get_num_batch_items()); - GKO_ASSERT(beta->get_num_batch_items() == x->get_num_batch_items()); #pragma omp parallel for - for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) { - const auto mat_item = batch::extract_batch_item(mat_ub, batch); - const auto b_item = batch::extract_batch_item(b_ub, batch); - const auto x_item = batch::extract_batch_item(x_ub, batch); - const auto alpha_item = batch::extract_batch_item(alpha_ub, batch); - const auto beta_item = batch::extract_batch_item(beta_ub, batch); - advanced_apply_kernel(alpha_item.values[0], mat_item, b_item, - beta_item.values[0], x_item); - } - } else { - const auto alpha_item = batch::extract_batch_item(alpha_ub, 0); - const auto beta_item = batch::extract_batch_item(beta_ub, 0); -#pragma omp parallel for - for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) { - const auto mat_item = batch::extract_batch_item(mat_ub, batch); - const auto b_item = batch::extract_batch_item(b_ub, batch); - const auto x_item = batch::extract_batch_item(x_ub, batch); - advanced_apply_kernel(alpha_item.values[0], mat_item, b_item, - beta_item.values[0], x_item); - } + for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) { + const auto mat_item = batch::matrix::extract_batch_item(mat_ub, batch); + const auto b_item = batch::extract_batch_item(b_ub, batch); + const auto x_item = batch::extract_batch_item(x_ub, batch); + const auto alpha_item = batch::extract_batch_item(alpha_ub, batch); + const auto beta_item = batch::extract_batch_item(beta_ub, batch); + advanced_apply_kernel(alpha_item.values[0], mat_item, b_item, + beta_item.values[0], x_item); } } diff --git a/reference/matrix/batch_dense_kernels.cpp b/reference/matrix/batch_dense_kernels.cpp index bb5f3e18df7..f42d9a81d1f 100644 --- a/reference/matrix/batch_dense_kernels.cpp +++ b/reference/matrix/batch_dense_kernels.cpp @@ -95,30 +95,14 @@ void advanced_apply(std::shared_ptr exec, const auto mat_ub = host::get_batch_struct(mat); const auto alpha_ub = host::get_batch_struct(alpha); const auto beta_ub = host::get_batch_struct(beta); - if (alpha->get_num_batch_items() > 1) { - GKO_ASSERT(alpha->get_num_batch_items() == x->get_num_batch_items()); - GKO_ASSERT(beta->get_num_batch_items() == x->get_num_batch_items()); - for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) { - const auto mat_item = - batch::matrix::extract_batch_item(mat_ub, batch); - const auto b_item = batch::extract_batch_item(b_ub, batch); - const auto x_item = batch::extract_batch_item(x_ub, batch); - const auto alpha_item = batch::extract_batch_item(alpha_ub, batch); - const auto beta_item = batch::extract_batch_item(beta_ub, batch); - advanced_apply_kernel(alpha_item.values[0], mat_item, b_item, - beta_item.values[0], x_item); - } - } else { - const auto alpha_item = batch::extract_batch_item(alpha_ub, 0); - const auto beta_item = batch::extract_batch_item(beta_ub, 0); - for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) { - const auto mat_item = - batch::matrix::extract_batch_item(mat_ub, batch); - const auto b_item = batch::extract_batch_item(b_ub, batch); - const auto x_item = batch::extract_batch_item(x_ub, batch); - advanced_apply_kernel(alpha_item.values[0], mat_item, b_item, - beta_item.values[0], x_item); - } + for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) { + const auto mat_item = batch::matrix::extract_batch_item(mat_ub, batch); + const auto b_item = batch::extract_batch_item(b_ub, batch); + const auto x_item = batch::extract_batch_item(x_ub, batch); + const auto alpha_item = batch::extract_batch_item(alpha_ub, batch); + const auto beta_item = batch::extract_batch_item(beta_ub, batch); + advanced_apply_kernel(alpha_item.values[0], mat_item, b_item, + beta_item.values[0], x_item); } } diff --git a/reference/matrix/batch_dense_kernels.hpp.inc b/reference/matrix/batch_dense_kernels.hpp.inc index d45183b2faa..bff9ad137cf 100644 --- a/reference/matrix/batch_dense_kernels.hpp.inc +++ b/reference/matrix/batch_dense_kernels.hpp.inc @@ -43,7 +43,7 @@ inline void simple_apply_kernel( } for (int row = 0; row < c.num_rows; ++row) { - for (int inner = 0; inner < a.num_rhs; ++inner) { + for (int inner = 0; inner < a.num_cols; ++inner) { for (int col = 0; col < c.num_rhs; ++col) { c.values[row * c.stride + col] += a.values[row * a.stride + inner] * @@ -77,7 +77,7 @@ inline void advanced_apply_kernel( } for (int row = 0; row < c.num_rows; ++row) { - for (int inner = 0; inner < a.num_rhs; ++inner) { + for (int inner = 0; inner < a.num_cols; ++inner) { for (int col = 0; col < c.num_rhs; ++col) { c.values[row * c.stride + col] += alpha * a.values[row * a.stride + inner] * diff --git a/reference/test/matrix/batch_dense_kernels.cpp b/reference/test/matrix/batch_dense_kernels.cpp index 7bf11ba70f9..8e2e522e5f4 100644 --- a/reference/test/matrix/batch_dense_kernels.cpp +++ b/reference/test/matrix/batch_dense_kernels.cpp @@ -129,6 +129,33 @@ TYPED_TEST(BatchDense, AppliesToBatchMultiVector) } +TYPED_TEST(BatchDense, AppliesLinearCombinationWithSameAlphaToBatchMultiVector) +{ + using Mtx = typename TestFixture::Mtx; + using MVec = typename TestFixture::MVec; + using DenseMtx = typename TestFixture::DenseMtx; + using T = typename TestFixture::value_type; + auto alpha = gko::batch::initialize(2, {1.5}, this->exec); + auto beta = gko::batch::initialize(2, {-4.0}, this->exec); + auto alpha0 = gko::initialize({1.5}, this->exec); + auto alpha1 = gko::initialize({1.5}, this->exec); + auto beta0 = gko::initialize({-4.0}, this->exec); + auto beta1 = gko::initialize({-4.0}, this->exec); + + this->mtx_0->apply(alpha.get(), this->b_0.get(), beta.get(), + this->x_0.get()); + this->mtx_00->apply(alpha0.get(), this->b_00.get(), beta0.get(), + this->x_00.get()); + this->mtx_01->apply(alpha1.get(), this->b_01.get(), beta1.get(), + this->x_01.get()); + + auto res = gko::batch::unbatch>(this->x_0.get()); + + GKO_ASSERT_MTX_NEAR(res[0].get(), this->x_00.get(), 0.); + GKO_ASSERT_MTX_NEAR(res[1].get(), this->x_01.get(), 0.); +} + + TYPED_TEST(BatchDense, AppliesLinearCombinationToBatchMultiVector) { using Mtx = typename TestFixture::Mtx; diff --git a/test/matrix/CMakeLists.txt b/test/matrix/CMakeLists.txt index a9cf267a3c8..91987f3717f 100644 --- a/test/matrix/CMakeLists.txt +++ b/test/matrix/CMakeLists.txt @@ -1,3 +1,4 @@ +ginkgo_create_common_test(batch_dense_kernels DISABLE_EXECUTORS dpcpp hip cuda) ginkgo_create_common_device_test(csr_kernels) ginkgo_create_common_test(csr_kernels2) ginkgo_create_common_test(coo_kernels) diff --git a/test/matrix/batch_dense_kernels.cpp b/test/matrix/batch_dense_kernels.cpp new file mode 100644 index 00000000000..60ef4d61a95 --- /dev/null +++ b/test/matrix/batch_dense_kernels.cpp @@ -0,0 +1,129 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include +#include + + +#include + + +#include +#include +#include + + +#include "core/base/batch_utilities.hpp" +#include "core/matrix/batch_dense_kernels.hpp" +#include "core/test/utils.hpp" +#include "core/test/utils/assertions.hpp" +#include "core/test/utils/batch_helpers.hpp" +#include "test/utils/executor.hpp" + + +class BatchDense : public CommonTestFixture { +protected: + using vtype = double; + using Mtx = gko::batch::matrix::BatchDense; + using MVec = gko::batch::MultiVector; + + BatchDense() : rand_engine(15) {} + + template + std::unique_ptr gen_mtx(const gko::size_type num_batch_items, + gko::size_type num_rows, + gko::size_type num_cols) + { + return gko::test::generate_random_batch_matrix( + num_batch_items, num_rows, num_cols, + std::uniform_int_distribution<>(num_cols, num_cols), + std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); + } + + void set_up_apply_data(gko::size_type num_vecs = 1) + { + const int num_rows = 252; + const int num_cols = 32; + x = gen_mtx(batch_size, num_rows, num_cols); + y = gen_mtx(batch_size, num_cols, num_vecs); + alpha = gen_mtx(batch_size, 1, 1); + beta = gen_mtx(batch_size, 1, 1); + dx = gko::clone(exec, x); + dy = gko::clone(exec, y); + dalpha = gko::clone(exec, alpha); + dbeta = gko::clone(exec, beta); + expected = MVec::create( + ref, + gko::batch_dim<2>(batch_size, gko::dim<2>{num_rows, num_vecs})); + expected->fill(gko::one()); + dresult = gko::clone(exec, expected); + } + + std::ranlux48 rand_engine; + + const size_t batch_size = 11; + std::unique_ptr x; + std::unique_ptr y; + std::unique_ptr alpha; + std::unique_ptr beta; + std::unique_ptr expected; + std::unique_ptr dresult; + std::unique_ptr dx; + std::unique_ptr dy; + std::unique_ptr dalpha; + std::unique_ptr dbeta; +}; + + +TEST_F(BatchDense, SingleVectorApplyIsEquivalentToRef) +{ + set_up_apply_data(1); + + x->apply(y.get(), expected.get()); + dx->apply(dy.get(), dresult.get()); + + GKO_ASSERT_BATCH_MTX_NEAR(dresult, expected, 1e-14); +} + + +TEST_F(BatchDense, SingleVectorAdvancedApplyIsEquivalentToRef) +{ + set_up_apply_data(1); + + x->apply(alpha.get(), y.get(), beta.get(), expected.get()); + dx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); + + GKO_ASSERT_BATCH_MTX_NEAR(dresult, expected, 1e-14); +} From ccbbb400121cb021ea76719a454d92cde54dd99f Mon Sep 17 00:00:00 2001 From: ginkgo-bot Date: Wed, 4 Oct 2023 09:41:28 +0000 Subject: [PATCH 08/28] Format files Co-authored-by: Pratik Nayak --- include/ginkgo/core/base/batch_multi_vector.hpp | 4 ++-- include/ginkgo/core/matrix/batch_dense.hpp | 4 ++-- include/ginkgo/ginkgo.hpp | 1 + test/matrix/batch_dense_kernels.cpp | 4 ++-- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/include/ginkgo/core/base/batch_multi_vector.hpp b/include/ginkgo/core/base/batch_multi_vector.hpp index 43f35e55f62..6b3b207c76c 100644 --- a/include/ginkgo/core/base/batch_multi_vector.hpp +++ b/include/ginkgo/core/base/batch_multi_vector.hpp @@ -213,8 +213,8 @@ class MultiVector * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const value_type* get_const_values_for_item(size_type batch_id) const - noexcept + const value_type* get_const_values_for_item( + size_type batch_id) const noexcept { GKO_ASSERT(batch_id < this->get_num_batch_items()); return values_.get_const_data() + diff --git a/include/ginkgo/core/matrix/batch_dense.hpp b/include/ginkgo/core/matrix/batch_dense.hpp index 55a1791a2a5..0457f444c5a 100644 --- a/include/ginkgo/core/matrix/batch_dense.hpp +++ b/include/ginkgo/core/matrix/batch_dense.hpp @@ -217,8 +217,8 @@ class BatchDense : public EnableBatchLinOp>, * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const value_type* get_const_values_for_item(size_type batch_id) const - noexcept + const value_type* get_const_values_for_item( + size_type batch_id) const noexcept { GKO_ASSERT(batch_id < this->get_num_batch_items()); return values_.get_const_data() + diff --git a/include/ginkgo/ginkgo.hpp b/include/ginkgo/ginkgo.hpp index aed3b5f3572..8bb29242e88 100644 --- a/include/ginkgo/ginkgo.hpp +++ b/include/ginkgo/ginkgo.hpp @@ -108,6 +108,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include #include #include #include diff --git a/test/matrix/batch_dense_kernels.cpp b/test/matrix/batch_dense_kernels.cpp index 60ef4d61a95..7d44f29899c 100644 --- a/test/matrix/batch_dense_kernels.cpp +++ b/test/matrix/batch_dense_kernels.cpp @@ -30,7 +30,7 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -#include +#include "core/matrix/batch_dense_kernels.hpp" #include @@ -43,10 +43,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include "core/base/batch_utilities.hpp" -#include "core/matrix/batch_dense_kernels.hpp" #include "core/test/utils.hpp" #include "core/test/utils/assertions.hpp" #include "core/test/utils/batch_helpers.hpp" From 691201a97388abfa3ba072378a59a55e0c03bf91 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Wed, 4 Oct 2023 11:49:01 +0200 Subject: [PATCH 09/28] circ dep and typo fixes --- core/test/matrix/batch_dense.cpp | 2 +- reference/matrix/batch_struct.hpp | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/core/test/matrix/batch_dense.cpp b/core/test/matrix/batch_dense.cpp index f9210550bea..02788e14b7d 100644 --- a/core/test/matrix/batch_dense.cpp +++ b/core/test/matrix/batch_dense.cpp @@ -153,7 +153,7 @@ TYPED_TEST(BatchDense, CanBeMoved) auto mtx_copy = gko::batch::matrix::BatchDense::create(this->exec); - mtx_copy->copy_from(std::move(this->mtx)); + this->mtx->move_to(mtx_copy); this->assert_equal_to_original_mtx(mtx_copy.get()); } diff --git a/reference/matrix/batch_struct.hpp b/reference/matrix/batch_struct.hpp index 1bed5a4e5c9..dee7c71948a 100644 --- a/reference/matrix/batch_struct.hpp +++ b/reference/matrix/batch_struct.hpp @@ -36,9 +36,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include #include "core/base/batch_struct.hpp" +#include "core/matrix/batch_struct.hpp" namespace gko { From a4b82eaa5845275ffe29e6eba0c9f1df35966a13 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Wed, 4 Oct 2023 15:04:36 +0200 Subject: [PATCH 10/28] Add CUDA, HIP kernels and tests Co-authored-by: Aditya Kashi Co-authored-by: Isha Aggarwal --- .../batch_dense_kernel_launcher.hpp.inc | 78 ++++++++ .../matrix/batch_dense_kernels.hpp.inc | 170 ++++++++++++++++++ cuda/base/batch_multi_vector_kernels.cu | 1 + cuda/matrix/batch_dense_kernels.cu | 26 +-- cuda/matrix/batch_struct.hpp | 96 ++++++++++ hip/matrix/batch_dense_kernels.hip.cpp | 28 +-- hip/matrix/batch_struct.hip.hpp | 96 ++++++++++ test/matrix/CMakeLists.txt | 2 +- 8 files changed, 459 insertions(+), 38 deletions(-) create mode 100644 common/cuda_hip/matrix/batch_dense_kernel_launcher.hpp.inc create mode 100644 common/cuda_hip/matrix/batch_dense_kernels.hpp.inc create mode 100644 cuda/matrix/batch_struct.hpp create mode 100644 hip/matrix/batch_struct.hip.hpp diff --git a/common/cuda_hip/matrix/batch_dense_kernel_launcher.hpp.inc b/common/cuda_hip/matrix/batch_dense_kernel_launcher.hpp.inc new file mode 100644 index 00000000000..668b0278680 --- /dev/null +++ b/common/cuda_hip/matrix/batch_dense_kernel_launcher.hpp.inc @@ -0,0 +1,78 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + + +template +void simple_apply(std::shared_ptr exec, + const batch::matrix::BatchDense* mat, + const batch::MultiVector* b, + batch::MultiVector* x) +{ + const auto num_blocks = mat->get_num_batch_items(); + const auto b_ub = get_batch_struct(b); + const auto x_ub = get_batch_struct(x); + const auto mat_ub = get_batch_struct(mat); + if (b->get_common_size()[1] > 1) { + GKO_NOT_IMPLEMENTED; + } + simple_apply_kernel<<get_stream()>>>(mat_ub, b_ub, x_ub); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL); + + +template +void advanced_apply(std::shared_ptr exec, + const batch::MultiVector* alpha, + const batch::matrix::BatchDense* mat, + const batch::MultiVector* b, + const batch::MultiVector* beta, + batch::MultiVector* x) +{ + const auto num_blocks = mat->get_num_batch_items(); + const auto b_ub = get_batch_struct(b); + const auto x_ub = get_batch_struct(x); + const auto mat_ub = get_batch_struct(mat); + const auto alpha_ub = get_batch_struct(alpha); + const auto beta_ub = get_batch_struct(beta); + if (b->get_common_size()[1] > 1) { + GKO_NOT_IMPLEMENTED; + } + advanced_apply_kernel<<get_stream()>>>(alpha_ub, mat_ub, b_ub, + beta_ub, x_ub); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_BATCH_DENSE_ADVANCED_APPLY_KERNEL); diff --git a/common/cuda_hip/matrix/batch_dense_kernels.hpp.inc b/common/cuda_hip/matrix/batch_dense_kernels.hpp.inc new file mode 100644 index 00000000000..43046166abc --- /dev/null +++ b/common/cuda_hip/matrix/batch_dense_kernels.hpp.inc @@ -0,0 +1,170 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + + +template +__device__ __forceinline__ void simple_apply( + const gko::batch::matrix::batch_dense::batch_item& mat, + const ValueType* const __restrict__ b, ValueType* const __restrict__ x) +{ + constexpr auto tile_size = config::warp_size; + + auto thread_block = group::this_thread_block(); + auto subwarp_grp = group::tiled_partition(thread_block); + const auto subwarp_grp_id = static_cast(threadIdx.x / tile_size); + const int num_subwarp_grps_per_block = ceildiv(blockDim.x, tile_size); + + for (int row = subwarp_grp_id; row < mat.num_rows; + row += num_subwarp_grps_per_block) { + ValueType temp = zero(); + for (int j = subwarp_grp.thread_rank(); j < mat.num_cols; + j += subwarp_grp.size()) { + const ValueType val = mat.values[row * mat.stride + j]; + temp += val * b[j]; + } + +#pragma unroll + for (int i = static_cast(tile_size) / 2; i > 0; i /= 2) { + temp += subwarp_grp.shfl_down(temp, i); + } + + if (subwarp_grp.thread_rank() == 0) { + x[row] = temp; + } + } +} + +template +__global__ __launch_bounds__( + default_block_size, + sm_oversubscription) void simple_apply_kernel(const gko::batch::matrix:: + batch_dense:: + uniform_batch< + const ValueType> + mat, + const gko::batch:: + multi_vector:: + uniform_batch< + const ValueType> + b, + const gko::batch:: + multi_vector:: + uniform_batch< + ValueType> + x) +{ + for (size_type batch_id = blockIdx.x; batch_id < mat.num_batch_items; + batch_id += gridDim.x) { + const auto mat_b = + gko::batch::matrix::extract_batch_item(mat, batch_id); + const auto b_b = gko::batch::extract_batch_item(b, batch_id); + const auto x_b = gko::batch::extract_batch_item(x, batch_id); + simple_apply(mat_b, b_b.values, x_b.values); + } +} + + +template +__device__ __forceinline__ void advanced_apply( + const ValueType alpha, + const gko::batch::matrix::batch_dense::batch_item& mat, + const ValueType* const __restrict__ b, const ValueType beta, + ValueType* const __restrict__ x) +{ + constexpr auto tile_size = config::warp_size; + + auto thread_block = group::this_thread_block(); + auto subwarp_grp = group::tiled_partition(thread_block); + const auto subwarp_grp_id = static_cast(threadIdx.x / tile_size); + const int num_subwarp_grps_per_block = ceildiv(blockDim.x, tile_size); + + for (int row = subwarp_grp_id; row < mat.num_rows; + row += num_subwarp_grps_per_block) { + ValueType temp = zero(); + for (int j = subwarp_grp.thread_rank(); j < mat.num_cols; + j += subwarp_grp.size()) { + const ValueType val = mat.values[row * mat.stride + j]; + temp += alpha * val * b[j]; + } + +#pragma unroll + for (int i = static_cast(tile_size) / 2; i > 0; i /= 2) { + temp += subwarp_grp.shfl_down(temp, i); + } + + if (subwarp_grp.thread_rank() == 0) { + x[row] = temp + beta * x[row]; + } + } +} + +template +__global__ __launch_bounds__( + default_block_size, + sm_oversubscription) void advanced_apply_kernel(const gko::batch:: + multi_vector:: + uniform_batch< + const ValueType> + alpha, + const gko::batch::matrix:: + batch_dense:: + uniform_batch< + const ValueType> + mat, + const gko::batch:: + multi_vector:: + uniform_batch< + const ValueType> + b, + const gko::batch:: + multi_vector:: + uniform_batch< + const ValueType> + beta, + const gko::batch:: + multi_vector:: + uniform_batch< + ValueType> + x) +{ + for (size_type batch_id = blockIdx.x; batch_id < mat.num_batch_items; + batch_id += gridDim.x) { + const auto mat_b = + gko::batch::matrix::extract_batch_item(mat, batch_id); + const auto b_b = gko::batch::extract_batch_item(b, batch_id); + const auto x_b = gko::batch::extract_batch_item(x, batch_id); + const auto alpha_b = gko::batch::extract_batch_item(alpha, batch_id); + const auto beta_b = gko::batch::extract_batch_item(beta, batch_id); + advanced_apply(alpha_b.values[0], mat_b, b_b.values, beta_b.values[0], + x_b.values); + } +} diff --git a/cuda/base/batch_multi_vector_kernels.cu b/cuda/base/batch_multi_vector_kernels.cu index 7729d006b75..5c4d1f5bdc5 100644 --- a/cuda/base/batch_multi_vector_kernels.cu +++ b/cuda/base/batch_multi_vector_kernels.cu @@ -78,6 +78,7 @@ constexpr int sm_oversubscription = 4; // clang-format on + } // namespace batch_multi_vector } // namespace cuda } // namespace kernels diff --git a/cuda/matrix/batch_dense_kernels.cu b/cuda/matrix/batch_dense_kernels.cu index c0a172fd026..9d9cfcf6c8e 100644 --- a/cuda/matrix/batch_dense_kernels.cu +++ b/cuda/matrix/batch_dense_kernels.cu @@ -38,6 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/base/batch_struct.hpp" #include "core/matrix/batch_struct.hpp" +#include "cuda/base/batch_struct.hpp" #include "cuda/base/config.hpp" #include "cuda/base/cublas_bindings.hpp" #include "cuda/base/pointer_mode_guard.hpp" @@ -45,7 +46,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "cuda/components/reduction.cuh" #include "cuda/components/thread_ids.cuh" #include "cuda/components/uninitialized_array.hpp" -// #include "cuda/matrix/batch_struct.hip.hpp" +#include "cuda/matrix/batch_struct.hpp" namespace gko { @@ -60,29 +61,18 @@ namespace batch_dense { constexpr auto default_block_size = 256; -constexpr int sm_multiplier = 4; +constexpr int sm_oversubscription = 4; +// clang-format off -template -void simple_apply(std::shared_ptr exec, - const batch::matrix::BatchDense* mat, - const batch::MultiVector* b, - batch::MultiVector* x) GKO_NOT_IMPLEMENTED; +// NOTE: DO NOT CHANGE THE ORDERING OF THE INCLUDES -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( - GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL); +#include "common/cuda_hip/matrix/batch_dense_kernels.hpp.inc" -template -void advanced_apply(std::shared_ptr exec, - const batch::MultiVector* alpha, - const batch::matrix::BatchDense* a, - const batch::MultiVector* b, - const batch::MultiVector* beta, - batch::MultiVector* c) GKO_NOT_IMPLEMENTED; +#include "common/cuda_hip/matrix/batch_dense_kernel_launcher.hpp.inc" -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( - GKO_DECLARE_BATCH_DENSE_ADVANCED_APPLY_KERNEL); +// clang-format on } // namespace batch_dense diff --git a/cuda/matrix/batch_struct.hpp b/cuda/matrix/batch_struct.hpp new file mode 100644 index 00000000000..202eb91a366 --- /dev/null +++ b/cuda/matrix/batch_struct.hpp @@ -0,0 +1,96 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_CUDA_MATRIX_BATCH_STRUCT_HPP_ +#define GKO_CUDA_MATRIX_BATCH_STRUCT_HPP_ + + +#include +#include +#include + + +#include "core/base/batch_struct.hpp" +#include "core/matrix/batch_struct.hpp" +#include "cuda/base/config.hpp" +#include "cuda/base/types.hpp" + + +namespace gko { +namespace kernels { +namespace cuda { + + +/** @file batch_struct.hpp + * + * Helper functions to generate a batch struct from a batch LinOp, + * while also shallow-casting to the required CUDA scalar type. + * + * A specialization is needed for every format of every kind of linear algebra + * object. These are intended to be called on the host. + */ + + +/** + * Generates an immutable uniform batch struct from a batch of multi-vectors. + */ +template +inline batch::matrix::batch_dense::uniform_batch> +get_batch_struct(const batch::matrix::BatchDense* const op) +{ + return {as_cuda_type(op->get_const_values()), op->get_num_batch_items(), + static_cast(op->get_common_size()[1]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1])}; +} + + +/** + * Generates a uniform batch struct from a batch of multi-vectors. + */ +template +inline batch::matrix::batch_dense::uniform_batch> +get_batch_struct(batch::matrix::BatchDense* const op) +{ + return {as_cuda_type(op->get_values()), op->get_num_batch_items(), + static_cast(op->get_common_size()[1]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1])}; +} + + +} // namespace cuda +} // namespace kernels +} // namespace gko + + +#endif // GKO_CUDA_MATRIX_BATCH_STRUCT_HPP_ diff --git a/hip/matrix/batch_dense_kernels.hip.cpp b/hip/matrix/batch_dense_kernels.hip.cpp index 06f0caf81ec..51f2237826b 100644 --- a/hip/matrix/batch_dense_kernels.hip.cpp +++ b/hip/matrix/batch_dense_kernels.hip.cpp @@ -40,7 +40,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/base/batch_struct.hpp" #include "core/matrix/batch_struct.hpp" +#include "hip/base/batch_struct.hpp" #include "hip/base/config.hip.hpp" #include "hip/base/hipblas_bindings.hip.hpp" #include "hip/base/pointer_mode_guard.hip.hpp" @@ -48,7 +50,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "hip/components/reduction.hip.hpp" #include "hip/components/thread_ids.hip.hpp" #include "hip/components/uninitialized_array.hip.hpp" -// #include "hip/matrix/batch_struct.hip.hpp" +#include "hip/matrix/batch_struct.hip.hpp" namespace gko { @@ -63,30 +65,18 @@ namespace batch_dense { constexpr auto default_block_size = 256; -constexpr int sm_multiplier = 4; +constexpr int sm_oversubscription = 4; +// clang-format off -template -void simple_apply(std::shared_ptr exec, - const batch::matrix::BatchDense* mat, - const batch::MultiVector* b, - batch::MultiVector* x) GKO_NOT_IMPLEMENTED; +// NOTE: DO NOT CHANGE THE ORDERING OF THE INCLUDES -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( - GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL); +#include "common/cuda_hip/matrix/batch_dense_kernels.hpp.inc" -template -void advanced_apply(std::shared_ptr exec, - const batch::MultiVector* alpha, - const batch::matrix::BatchDense* a, - const batch::MultiVector* b, - const batch::MultiVector* beta, - batch::MultiVector* c) GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( - GKO_DECLARE_BATCH_DENSE_ADVANCED_APPLY_KERNEL); +#include "common/cuda_hip/matrix/batch_dense_kernel_launcher.hpp.inc" +// clang-format on } // namespace batch_dense } // namespace hip diff --git a/hip/matrix/batch_struct.hip.hpp b/hip/matrix/batch_struct.hip.hpp new file mode 100644 index 00000000000..0d5dfb46a1b --- /dev/null +++ b/hip/matrix/batch_struct.hip.hpp @@ -0,0 +1,96 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_HIP_MATRIX_BATCH_STRUCT_HPP_ +#define GKO_HIP_MATRIX_BATCH_STRUCT_HPP_ + + +#include +#include +#include + + +#include "core/base/batch_struct.hpp" +#include "core/matrix/batch_struct.hpp" +#include "hip/base/config.hpp" +#include "hip/base/types.hpp" + + +namespace gko { +namespace kernels { +namespace hip { + + +/** @file batch_struct.hpp + * + * Helper functions to generate a batch struct from a batch LinOp, + * while also shallow-casting to the required HIP scalar type. + * + * A specialization is needed for every format of every kind of linear algebra + * object. These are intended to be called on the host. + */ + + +/** + * Generates an immutable uniform batch struct from a batch of multi-vectors. + */ +template +inline batch::matrix::batch_dense::uniform_batch> +get_batch_struct(const batch::matrix::BatchDense* const op) +{ + return {as_hip_type(op->get_const_values()), op->get_num_batch_items(), + static_cast(op->get_common_size()[1]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1])}; +} + + +/** + * Generates a uniform batch struct from a batch of multi-vectors. + */ +template +inline batch::matrix::batch_dense::uniform_batch> +get_batch_struct(batch::matrix::BatchDense* const op) +{ + return {as_hip_type(op->get_values()), op->get_num_batch_items(), + static_cast(op->get_common_size()[1]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1])}; +} + + +} // namespace hip +} // namespace kernels +} // namespace gko + + +#endif // GKO_HIP_MATRIX_BATCH_STRUCT_HPP_ diff --git a/test/matrix/CMakeLists.txt b/test/matrix/CMakeLists.txt index 91987f3717f..a89abfb4ae4 100644 --- a/test/matrix/CMakeLists.txt +++ b/test/matrix/CMakeLists.txt @@ -1,4 +1,4 @@ -ginkgo_create_common_test(batch_dense_kernels DISABLE_EXECUTORS dpcpp hip cuda) +ginkgo_create_common_test(batch_dense_kernels DISABLE_EXECUTORS dpcpp) ginkgo_create_common_device_test(csr_kernels) ginkgo_create_common_test(csr_kernels2) ginkgo_create_common_test(coo_kernels) From b23dbfa37513ab138e8624306639be774a2be95f Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Wed, 4 Oct 2023 15:36:19 +0200 Subject: [PATCH 11/28] Add SYCL kernels and tests WIP Co-authored-by: Phuong Nguyen --- .../matrix/batch_dense_kernels.hpp.inc | 12 +-- dpcpp/base/batch_multi_vector_kernels.dp.cpp | 55 +++++------ dpcpp/matrix/batch_dense_kernels.dp.cpp | 99 ++++++++++++++++++- dpcpp/matrix/batch_dense_kernels.hpp.inc | 91 +++++++++++++++++ dpcpp/matrix/batch_struct.hpp | 94 ++++++++++++++++++ test/matrix/CMakeLists.txt | 2 +- 6 files changed, 311 insertions(+), 42 deletions(-) create mode 100644 dpcpp/matrix/batch_dense_kernels.hpp.inc create mode 100644 dpcpp/matrix/batch_struct.hpp diff --git a/common/cuda_hip/matrix/batch_dense_kernels.hpp.inc b/common/cuda_hip/matrix/batch_dense_kernels.hpp.inc index 43046166abc..6cae08eadb5 100644 --- a/common/cuda_hip/matrix/batch_dense_kernels.hpp.inc +++ b/common/cuda_hip/matrix/batch_dense_kernels.hpp.inc @@ -52,10 +52,8 @@ __device__ __forceinline__ void simple_apply( temp += val * b[j]; } -#pragma unroll - for (int i = static_cast(tile_size) / 2; i > 0; i /= 2) { - temp += subwarp_grp.shfl_down(temp, i); - } + // subgroup level reduction + temp = reduce(subgroup, temp, thrust::plus{}); if (subwarp_grp.thread_rank() == 0) { x[row] = temp; @@ -116,10 +114,8 @@ __device__ __forceinline__ void advanced_apply( temp += alpha * val * b[j]; } -#pragma unroll - for (int i = static_cast(tile_size) / 2; i > 0; i /= 2) { - temp += subwarp_grp.shfl_down(temp, i); - } + // subgroup level reduction + temp = reduce(subgroup, temp, thrust::plus{}); if (subwarp_grp.thread_rank() == 0) { x[row] = temp + beta * x[row]; diff --git a/dpcpp/base/batch_multi_vector_kernels.dp.cpp b/dpcpp/base/batch_multi_vector_kernels.dp.cpp index 10e47ba080e..12648b81e00 100644 --- a/dpcpp/base/batch_multi_vector_kernels.dp.cpp +++ b/dpcpp/base/batch_multi_vector_kernels.dp.cpp @@ -37,11 +37,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include #include -#include +#include "core/base/batch_struct.hpp" #include "core/components/prefix_sum_kernels.hpp" #include "dpcpp/base/batch_struct.hpp" #include "dpcpp/base/config.hpp" @@ -193,9 +194,9 @@ void compute_dot(std::shared_ptr exec, // TODO: Remove reqd_sub_group size and use sycl::reduce_over_group exec->get_queue()->submit([&](sycl::handler& cgh) { cgh.parallel_for( - sycl_nd_range(grid, block), - [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size( - config::warp_size)]] { + sycl_nd_range(grid, block), [= + ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size( + config::warp_size)]] { auto group = item_ct1.get_group(); auto group_id = group.get_group_linear_id(); const auto x_b = batch::extract_batch_item(x_ub, group_id); @@ -231,19 +232,18 @@ void compute_conj_dot(std::shared_ptr exec, exec->get_queue()->submit([&](sycl::handler& cgh) { cgh.parallel_for( - sycl_nd_range(grid, block), - [=](sycl::nd_item<3> item_ct1) - [[sycl::reqd_sub_group_size(config::warp_size)]] { - auto group = item_ct1.get_group(); - auto group_id = group.get_group_linear_id(); - const auto x_b = batch::extract_batch_item(x_ub, group_id); - const auto y_b = batch::extract_batch_item(y_ub, group_id); - const auto res_b = - batch::extract_batch_item(res_ub, group_id); - compute_gen_dot_product_kernel( - x_b, y_b, res_b, item_ct1, - [](auto val) { return conj(val); }); - }); + sycl_nd_range(grid, block), [= + ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size( + config::warp_size)]] { + auto group = item_ct1.get_group(); + auto group_id = group.get_group_linear_id(); + const auto x_b = batch::extract_batch_item(x_ub, group_id); + const auto y_b = batch::extract_batch_item(y_ub, group_id); + const auto res_b = batch::extract_batch_item(res_ub, group_id); + compute_gen_dot_product_kernel( + x_b, y_b, res_b, item_ct1, + [](auto val) { return conj(val); }); + }); }); } @@ -268,17 +268,16 @@ void compute_norm2(std::shared_ptr exec, const dim3 grid(num_batches); exec->get_queue()->submit([&](sycl::handler& cgh) { - cgh.parallel_for(sycl_nd_range(grid, block), - [=](sycl::nd_item<3> item_ct1) - [[sycl::reqd_sub_group_size(config::warp_size)]] { - auto group = item_ct1.get_group(); - auto group_id = group.get_group_linear_id(); - const auto x_b = - batch::extract_batch_item(x_ub, group_id); - const auto res_b = batch::extract_batch_item( - res_ub, group_id); - compute_norm2_kernel(x_b, res_b, item_ct1); - }); + cgh.parallel_for( + sycl_nd_range(grid, block), [= + ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size( + config::warp_size)]] { + auto group = item_ct1.get_group(); + auto group_id = group.get_group_linear_id(); + const auto x_b = batch::extract_batch_item(x_ub, group_id); + const auto res_b = batch::extract_batch_item(res_ub, group_id); + compute_norm2_kernel(x_b, res_b, item_ct1); + }); }); } diff --git a/dpcpp/matrix/batch_dense_kernels.dp.cpp b/dpcpp/matrix/batch_dense_kernels.dp.cpp index 964bf094077..118d46d81a5 100644 --- a/dpcpp/matrix/batch_dense_kernels.dp.cpp +++ b/dpcpp/matrix/batch_dense_kernels.dp.cpp @@ -40,8 +40,24 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include -#include +#include + + +#include "core/base/batch_struct.hpp" +#include "core/components/prefix_sum_kernels.hpp" +#include "core/matrix/batch_struct.hpp" +#include "dpcpp/base/batch_struct.hpp" +#include "dpcpp/base/config.hpp" +#include "dpcpp/base/dim3.dp.hpp" +#include "dpcpp/base/dpct.hpp" +#include "dpcpp/base/helper.hpp" +#include "dpcpp/components/cooperative_groups.dp.hpp" +#include "dpcpp/components/intrinsics.dp.hpp" +#include "dpcpp/components/reduction.dp.hpp" +#include "dpcpp/components/thread_ids.dp.hpp" +#include "dpcpp/matrix/batch_struct.hpp" namespace gko { @@ -55,11 +71,46 @@ namespace dpcpp { namespace batch_dense { +#include "dpcpp/matrix/batch_dense_kernels.hpp.inc" + + template void simple_apply(std::shared_ptr exec, - const batch::matrix::BatchDense* a, + const batch::matrix::BatchDense* mat, const batch::MultiVector* b, - batch::MultiVector* x) GKO_NOT_IMPLEMENTED; + batch::MultiVector* x) +{ + const size_type num_rows = x->get_common_size()[0]; + const size_type num_cols = x->get_common_size()[1]; + + const auto num_batch_items = x->get_num_batch_items(); + auto device = exec->get_queue()->get_device(); + auto group_size = + device.get_info(); + + const dim3 block(group_size); + const dim3 grid(num_batch_items); + const auto x_ub = get_batch_struct(x); + const auto b_ub = get_batch_struct(b); + const auto mat_ub = get_batch_struct(mat); + if (b_ub.num_rhs > 1) { + GKO_NOT_IMPLEMENTED; + } + + // Launch a kernel that has nbatches blocks, each block has max group size + (exec->get_queue())->submit([&](sycl::handler& cgh) { + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + auto group = item_ct1.get_group(); + auto group_id = group.get_group_linear_id(); + const auto mat_b = + batch::matrix::extract_batch_item(mat_ub, group_id); + const auto b_b = batch::extract_batch_item(b_ub, group_id); + const auto x_b = batch::extract_batch_item(x_ub, group_id); + simple_apply_kernel(mat_b, b_b.values, x_b.values, item_ct1); + }); + }); +} GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL); @@ -68,10 +119,48 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( template void advanced_apply(std::shared_ptr exec, const batch::MultiVector* alpha, - const batch::matrix::BatchDense* a, + const batch::matrix::BatchDense* mat, const batch::MultiVector* b, const batch::MultiVector* beta, - batch::MultiVector* c) GKO_NOT_IMPLEMENTED; + batch::MultiVector* x) +{ + const auto mat_ub = get_batch_struct(mat); + const auto b_ub = get_batch_struct(b); + const auto x_ub = get_batch_struct(x); + const auto alpha_ub = get_batch_struct(alpha); + const auto beta_ub = get_batch_struct(beta); + + if (b_ub.num_rhs > 1) { + GKO_NOT_IMPLEMENTED; + } + + const auto num_batch_items = mat_ub.num_batch_items; + auto device = exec->get_queue()->get_device(); + auto group_size = + device.get_info(); + + const dim3 block(group_size); + const dim3 grid(num_batch_items); + + // Launch a kernel that has nbatches blocks, each block has max group size + (exec->get_queue())->submit([&](sycl::handler& cgh) { + cgh.parallel_for( + sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + auto group = item_ct1.get_group(); + auto group_id = group.get_group_linear_id(); + const auto mat_b = + batch::matrix::extract_batch_item(mat_ub, group_id); + const auto b_b = batch::extract_batch_item(b_ub, group_id); + const auto x_b = batch::extract_batch_item(x_ub, group_id); + const auto alpha_b = + batch::extract_batch_item(alpha_ub, group_id); + const auto beta_b = + batch::extract_batch_item(beta_ub, group_id); + advanced_apply_kernel(alpha_b.values[0], mat_b, b_b.values, + beta_b.values[0], x_b.values, item_ct1); + }); + }); +} GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( GKO_DECLARE_BATCH_DENSE_ADVANCED_APPLY_KERNEL); diff --git a/dpcpp/matrix/batch_dense_kernels.hpp.inc b/dpcpp/matrix/batch_dense_kernels.hpp.inc new file mode 100644 index 00000000000..ba528ac31a4 --- /dev/null +++ b/dpcpp/matrix/batch_dense_kernels.hpp.inc @@ -0,0 +1,91 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +template +__dpct_inline__ void simple_apply_kernel( + const gko::batch::matrix::batch_dense::batch_item& mat, + const ValueType* const __restrict__ b, ValueType* const __restrict__ x, + sycl::nd_item<3>& item_ct1) +{ + constexpr auto tile_size = config::warp_size; + auto subg = + group::tiled_partition(group::this_thread_block(item_ct1)); + const auto subgroup = static_cast(subg); + const int subgroup_id = subgroup.get_group_id(); + const int subgroup_size = subgroup.get_local_range().size(); + const int num_subgroup = subgroup.get_group_range().size(); + + for (int row = subgroup_id; row < mat.num_rows; row += num_subgroup) { + ValueType temp = zero(); + for (int j = subgroup.get_local_id(); j < mat.num_cols; + j += subgroup_size) { + const ValueType val = mat.values[row * mat.stride + j]; + temp += val * b[j]; + } + temp = ::gko::kernels::dpcpp::reduce( + subg, temp, [](ValueType v1, ValueType v2) { return v1 + v2; }); + if (subgroup.get_local_id() == 0) { + x[row] = temp; + } + } +} + + +template +__dpct_inline__ void advanced_apply_kernel( + const ValueType alpha, + const gko::batch::matrix::batch_dense::batch_item& mat, + const ValueType* const __restrict__ b, const ValueType beta, + ValueType* const __restrict__ x, sycl::nd_item<3>& item_ct1) +{ + constexpr auto tile_size = config::warp_size; + auto subg = + group::tiled_partition(group::this_thread_block(item_ct1)); + const auto subgroup = static_cast(subg); + const int subgroup_id = subgroup.get_group_id(); + const int subgroup_size = subgroup.get_local_range().size(); + const int num_subgroup = subgroup.get_group_range().size(); + + for (int row = subgroup_id; row < mat.num_rows; row += num_subgroup) { + ValueType temp = zero(); + for (int j = subgroup.get_local_id(); j < mat.num_cols; + j += subgroup_size) { + const ValueType val = mat.values[row * mat.stride + j]; + temp += alpha * val * b[j]; + } + temp = ::gko::kernels::dpcpp::reduce( + subg, temp, [](ValueType v1, ValueType v2) { return v1 + v2; }); + if (subgroup.get_local_id() == 0) { + x[row] = temp + beta * x[row]; + } + } +} diff --git a/dpcpp/matrix/batch_struct.hpp b/dpcpp/matrix/batch_struct.hpp new file mode 100644 index 00000000000..dd8c1bbbab6 --- /dev/null +++ b/dpcpp/matrix/batch_struct.hpp @@ -0,0 +1,94 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_DPCPP_MATRIX_BATCH_STRUCT_HPP_ +#define GKO_DPCPP_MATRIX_BATCH_STRUCT_HPP_ + + +#include +#include + + +#include "core/base/batch_struct.hpp" +#include "core/matrix/batch_struct.hpp" +#include "dpcpp/base/config.hpp" + + +namespace gko { +namespace kernels { +namespace dpcpp { + + +/** @file batch_struct.hpp + * + * Helper functions to generate a batch struct from a batch LinOp, + * while also shallow-casting to the required DPCPP scalar type. + * + * A specialization is needed for every format of every kind of linear algebra + * object. These are intended to be called on the host. + */ + + +/** + * Generates an immutable uniform batch struct from a batch of multi-vectors. + */ +template +inline batch::matrix::batch_dense::uniform_batch +get_batch_struct(const batch::matrix::BatchDense* const op) +{ + return {op->get_const_values(), op->get_num_batch_items(), + static_cast(op->get_common_size()[1]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1])}; +} + + +/** + * Generates a uniform batch struct from a batch of multi-vectors. + */ +template +inline batch::matrix::batch_dense::uniform_batch get_batch_struct( + batch::matrix::BatchDense* const op) +{ + return {op->get_values(), op->get_num_batch_items(), + static_cast(op->get_common_size()[1]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1])}; +} + + +} // namespace dpcpp +} // namespace kernels +} // namespace gko + + +#endif // GKO_DPCPP_MATRIX_BATCH_STRUCT_HPP_ diff --git a/test/matrix/CMakeLists.txt b/test/matrix/CMakeLists.txt index a89abfb4ae4..9f3b17cd858 100644 --- a/test/matrix/CMakeLists.txt +++ b/test/matrix/CMakeLists.txt @@ -1,4 +1,4 @@ -ginkgo_create_common_test(batch_dense_kernels DISABLE_EXECUTORS dpcpp) +ginkgo_create_common_test(batch_dense_kernels) ginkgo_create_common_device_test(csr_kernels) ginkgo_create_common_test(csr_kernels2) ginkgo_create_common_test(coo_kernels) From 07578dd3add0a76b3948265947afc8fc901e8bfc Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Thu, 5 Oct 2023 13:38:18 +0200 Subject: [PATCH 12/28] HIP and CUDA thrust fixes --- .../matrix/batch_dense_kernels.hpp.inc | 32 +++++++++---------- cuda/matrix/batch_dense_kernels.cu | 5 +++ hip/matrix/batch_dense_kernels.hip.cpp | 5 ++- 3 files changed, 25 insertions(+), 17 deletions(-) diff --git a/common/cuda_hip/matrix/batch_dense_kernels.hpp.inc b/common/cuda_hip/matrix/batch_dense_kernels.hpp.inc index 6cae08eadb5..2f876332ae7 100644 --- a/common/cuda_hip/matrix/batch_dense_kernels.hpp.inc +++ b/common/cuda_hip/matrix/batch_dense_kernels.hpp.inc @@ -39,15 +39,15 @@ __device__ __forceinline__ void simple_apply( constexpr auto tile_size = config::warp_size; auto thread_block = group::this_thread_block(); - auto subwarp_grp = group::tiled_partition(thread_block); - const auto subwarp_grp_id = static_cast(threadIdx.x / tile_size); - const int num_subwarp_grps_per_block = ceildiv(blockDim.x, tile_size); + auto subgroup = group::tiled_partition(thread_block); + const auto subgroup_id = static_cast(threadIdx.x / tile_size); + const int num_subgroups_per_block = ceildiv(blockDim.x, tile_size); - for (int row = subwarp_grp_id; row < mat.num_rows; - row += num_subwarp_grps_per_block) { + for (int row = subgroup_id; row < mat.num_rows; + row += num_subgroups_per_block) { ValueType temp = zero(); - for (int j = subwarp_grp.thread_rank(); j < mat.num_cols; - j += subwarp_grp.size()) { + for (int j = subgroup.thread_rank(); j < mat.num_cols; + j += subgroup.size()) { const ValueType val = mat.values[row * mat.stride + j]; temp += val * b[j]; } @@ -55,7 +55,7 @@ __device__ __forceinline__ void simple_apply( // subgroup level reduction temp = reduce(subgroup, temp, thrust::plus{}); - if (subwarp_grp.thread_rank() == 0) { + if (subgroup.thread_rank() == 0) { x[row] = temp; } } @@ -101,15 +101,15 @@ __device__ __forceinline__ void advanced_apply( constexpr auto tile_size = config::warp_size; auto thread_block = group::this_thread_block(); - auto subwarp_grp = group::tiled_partition(thread_block); - const auto subwarp_grp_id = static_cast(threadIdx.x / tile_size); - const int num_subwarp_grps_per_block = ceildiv(blockDim.x, tile_size); + auto subgroup = group::tiled_partition(thread_block); + const auto subgroup_id = static_cast(threadIdx.x / tile_size); + const int num_subgroups_per_block = ceildiv(blockDim.x, tile_size); - for (int row = subwarp_grp_id; row < mat.num_rows; - row += num_subwarp_grps_per_block) { + for (int row = subgroup_id; row < mat.num_rows; + row += num_subgroups_per_block) { ValueType temp = zero(); - for (int j = subwarp_grp.thread_rank(); j < mat.num_cols; - j += subwarp_grp.size()) { + for (int j = subgroup.thread_rank(); j < mat.num_cols; + j += subgroup.size()) { const ValueType val = mat.values[row * mat.stride + j]; temp += alpha * val * b[j]; } @@ -117,7 +117,7 @@ __device__ __forceinline__ void advanced_apply( // subgroup level reduction temp = reduce(subgroup, temp, thrust::plus{}); - if (subwarp_grp.thread_rank() == 0) { + if (subgroup.thread_rank() == 0) { x[row] = temp + beta * x[row]; } } diff --git a/cuda/matrix/batch_dense_kernels.cu b/cuda/matrix/batch_dense_kernels.cu index 9d9cfcf6c8e..28d61f70731 100644 --- a/cuda/matrix/batch_dense_kernels.cu +++ b/cuda/matrix/batch_dense_kernels.cu @@ -33,6 +33,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/matrix/batch_dense_kernels.hpp" +#include +#include + + #include @@ -42,6 +46,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "cuda/base/config.hpp" #include "cuda/base/cublas_bindings.hpp" #include "cuda/base/pointer_mode_guard.hpp" +#include "cuda/base/thrust.cuh" #include "cuda/components/cooperative_groups.cuh" #include "cuda/components/reduction.cuh" #include "cuda/components/thread_ids.cuh" diff --git a/hip/matrix/batch_dense_kernels.hip.cpp b/hip/matrix/batch_dense_kernels.hip.cpp index 51f2237826b..20c46736026 100644 --- a/hip/matrix/batch_dense_kernels.hip.cpp +++ b/hip/matrix/batch_dense_kernels.hip.cpp @@ -34,6 +34,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include +#include #include @@ -42,10 +44,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/base/batch_struct.hpp" #include "core/matrix/batch_struct.hpp" -#include "hip/base/batch_struct.hpp" +#include "hip/base/batch_struct.hip.hpp" #include "hip/base/config.hip.hpp" #include "hip/base/hipblas_bindings.hip.hpp" #include "hip/base/pointer_mode_guard.hip.hpp" +#include "hip/base/thrust.hip.hpp" #include "hip/components/cooperative_groups.hip.hpp" #include "hip/components/reduction.hip.hpp" #include "hip/components/thread_ids.hip.hpp" From 81bcf74cddab79ca39007ba4848e4a2f22d95f8d Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Thu, 5 Oct 2023 15:49:46 +0200 Subject: [PATCH 13/28] SYCL kernel fixes --- dpcpp/matrix/batch_dense_kernels.dp.cpp | 20 +++++++++------ dpcpp/matrix/batch_dense_kernels.hpp.inc | 31 +++++++++++++++--------- 2 files changed, 31 insertions(+), 20 deletions(-) diff --git a/dpcpp/matrix/batch_dense_kernels.dp.cpp b/dpcpp/matrix/batch_dense_kernels.dp.cpp index 118d46d81a5..7f3dca70a32 100644 --- a/dpcpp/matrix/batch_dense_kernels.dp.cpp +++ b/dpcpp/matrix/batch_dense_kernels.dp.cpp @@ -80,10 +80,10 @@ void simple_apply(std::shared_ptr exec, const batch::MultiVector* b, batch::MultiVector* x) { - const size_type num_rows = x->get_common_size()[0]; - const size_type num_cols = x->get_common_size()[1]; + const size_type num_rows = mat->get_common_size()[0]; + const size_type num_cols = mat->get_common_size()[1]; - const auto num_batch_items = x->get_num_batch_items(); + const auto num_batch_items = mat->get_num_batch_items(); auto device = exec->get_queue()->get_device(); auto group_size = device.get_info(); @@ -100,14 +100,16 @@ void simple_apply(std::shared_ptr exec, // Launch a kernel that has nbatches blocks, each block has max group size (exec->get_queue())->submit([&](sycl::handler& cgh) { cgh.parallel_for( - sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + sycl_nd_range(grid, block), [= + ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size( + config::warp_size)]] { auto group = item_ct1.get_group(); auto group_id = group.get_group_linear_id(); const auto mat_b = batch::matrix::extract_batch_item(mat_ub, group_id); const auto b_b = batch::extract_batch_item(b_ub, group_id); const auto x_b = batch::extract_batch_item(x_ub, group_id); - simple_apply_kernel(mat_b, b_b.values, x_b.values, item_ct1); + simple_apply_kernel(mat_b, b_b, x_b, item_ct1); }); }); } @@ -145,7 +147,9 @@ void advanced_apply(std::shared_ptr exec, // Launch a kernel that has nbatches blocks, each block has max group size (exec->get_queue())->submit([&](sycl::handler& cgh) { cgh.parallel_for( - sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { + sycl_nd_range(grid, block), [= + ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size( + config::warp_size)]] { auto group = item_ct1.get_group(); auto group_id = group.get_group_linear_id(); const auto mat_b = @@ -156,8 +160,8 @@ void advanced_apply(std::shared_ptr exec, batch::extract_batch_item(alpha_ub, group_id); const auto beta_b = batch::extract_batch_item(beta_ub, group_id); - advanced_apply_kernel(alpha_b.values[0], mat_b, b_b.values, - beta_b.values[0], x_b.values, item_ct1); + advanced_apply_kernel(alpha_b, mat_b, b_b, beta_b, x_b, + item_ct1); }); }); } diff --git a/dpcpp/matrix/batch_dense_kernels.hpp.inc b/dpcpp/matrix/batch_dense_kernels.hpp.inc index ba528ac31a4..dacd31feade 100644 --- a/dpcpp/matrix/batch_dense_kernels.hpp.inc +++ b/dpcpp/matrix/batch_dense_kernels.hpp.inc @@ -33,7 +33,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. template __dpct_inline__ void simple_apply_kernel( const gko::batch::matrix::batch_dense::batch_item& mat, - const ValueType* const __restrict__ b, ValueType* const __restrict__ x, + const gko::batch::multi_vector::batch_item& b, + const gko::batch::multi_vector::batch_item& x, sycl::nd_item<3>& item_ct1) { constexpr auto tile_size = config::warp_size; @@ -42,19 +43,21 @@ __dpct_inline__ void simple_apply_kernel( const auto subgroup = static_cast(subg); const int subgroup_id = subgroup.get_group_id(); const int subgroup_size = subgroup.get_local_range().size(); - const int num_subgroup = subgroup.get_group_range().size(); + const int num_subgroups = subgroup.get_group_range().size(); - for (int row = subgroup_id; row < mat.num_rows; row += num_subgroup) { + for (int row = subgroup_id; row < mat.num_rows; row += num_subgroups) { ValueType temp = zero(); for (int j = subgroup.get_local_id(); j < mat.num_cols; j += subgroup_size) { const ValueType val = mat.values[row * mat.stride + j]; - temp += val * b[j]; + temp += val * b.values[j]; } + temp = ::gko::kernels::dpcpp::reduce( - subg, temp, [](ValueType v1, ValueType v2) { return v1 + v2; }); + subg, temp, [](ValueType a, ValueType b) { return a + b; }); + if (subgroup.get_local_id() == 0) { - x[row] = temp; + x.values[row] = temp; } } } @@ -62,10 +65,12 @@ __dpct_inline__ void simple_apply_kernel( template __dpct_inline__ void advanced_apply_kernel( - const ValueType alpha, + const gko::batch::multi_vector::batch_item& alpha, const gko::batch::matrix::batch_dense::batch_item& mat, - const ValueType* const __restrict__ b, const ValueType beta, - ValueType* const __restrict__ x, sycl::nd_item<3>& item_ct1) + const gko::batch::multi_vector::batch_item& b, + const gko::batch::multi_vector::batch_item& beta, + const gko::batch::multi_vector::batch_item& x, + sycl::nd_item<3>& item_ct1) { constexpr auto tile_size = config::warp_size; auto subg = @@ -80,12 +85,14 @@ __dpct_inline__ void advanced_apply_kernel( for (int j = subgroup.get_local_id(); j < mat.num_cols; j += subgroup_size) { const ValueType val = mat.values[row * mat.stride + j]; - temp += alpha * val * b[j]; + temp += alpha.values[0] * val * b.values[j]; } + temp = ::gko::kernels::dpcpp::reduce( - subg, temp, [](ValueType v1, ValueType v2) { return v1 + v2; }); + subg, temp, [](ValueType a, ValueType b) { return a + b; }); + if (subgroup.get_local_id() == 0) { - x[row] = temp + beta * x[row]; + x.values[row] = temp + beta.values[0] * x.values[row]; } } } From 24836679eb7b00be58f10f0f060097395d08eeaf Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Thu, 5 Oct 2023 16:00:30 +0200 Subject: [PATCH 14/28] BatchDense -> batch::Dense --- .../batch_dense_kernel_launcher.hpp.inc | 4 +- core/base/batch_multi_vector.cpp | 7 +- core/matrix/batch_dense.cpp | 48 +++---- core/matrix/batch_dense_kernels.hpp | 4 +- core/test/matrix/batch_dense.cpp | 128 +++++++++--------- cuda/matrix/batch_dense_kernels.cu | 2 +- cuda/matrix/batch_struct.hpp | 4 +- dpcpp/matrix/batch_dense_kernels.dp.cpp | 6 +- dpcpp/matrix/batch_struct.hpp | 4 +- hip/matrix/batch_dense_kernels.hip.cpp | 2 +- hip/matrix/batch_struct.hip.hpp | 8 +- .../ginkgo/core/base/batch_multi_vector.hpp | 14 +- include/ginkgo/core/matrix/batch_dense.hpp | 59 ++++---- omp/matrix/batch_dense_kernels.cpp | 8 +- reference/matrix/batch_dense_kernels.cpp | 8 +- reference/matrix/batch_struct.hpp | 4 +- reference/test/matrix/batch_dense_kernels.cpp | 24 ++-- test/matrix/batch_dense_kernels.cpp | 10 +- 18 files changed, 169 insertions(+), 175 deletions(-) diff --git a/common/cuda_hip/matrix/batch_dense_kernel_launcher.hpp.inc b/common/cuda_hip/matrix/batch_dense_kernel_launcher.hpp.inc index 668b0278680..23ae8ebd5f0 100644 --- a/common/cuda_hip/matrix/batch_dense_kernel_launcher.hpp.inc +++ b/common/cuda_hip/matrix/batch_dense_kernel_launcher.hpp.inc @@ -33,7 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. template void simple_apply(std::shared_ptr exec, - const batch::matrix::BatchDense* mat, + const batch::matrix::Dense* mat, const batch::MultiVector* b, batch::MultiVector* x) { @@ -55,7 +55,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( template void advanced_apply(std::shared_ptr exec, const batch::MultiVector* alpha, - const batch::matrix::BatchDense* mat, + const batch::matrix::Dense* mat, const batch::MultiVector* b, const batch::MultiVector* beta, batch::MultiVector* x) diff --git a/core/base/batch_multi_vector.cpp b/core/base/batch_multi_vector.cpp index f6884ef523b..294fe45972a 100644 --- a/core/base/batch_multi_vector.cpp +++ b/core/base/batch_multi_vector.cpp @@ -292,12 +292,11 @@ void MultiVector::move_to( template -void MultiVector::convert_to( - matrix::BatchDense* result) const +void MultiVector::convert_to(matrix::Dense* result) const { auto exec = result->get_executor() != nullptr ? result->get_executor() : this->get_executor(); - auto tmp = gko::batch::matrix::BatchDense::create_const( + auto tmp = gko::batch::matrix::Dense::create_const( exec, this->get_size(), make_const_array_view(exec, this->get_num_stored_elements(), this->get_const_values())); @@ -306,7 +305,7 @@ void MultiVector::convert_to( template -void MultiVector::move_to(matrix::BatchDense* result) +void MultiVector::move_to(matrix::Dense* result) { this->convert_to(result); } diff --git a/core/matrix/batch_dense.cpp b/core/matrix/batch_dense.cpp index c9da010c228..75f29bc6b4c 100644 --- a/core/matrix/batch_dense.cpp +++ b/core/matrix/batch_dense.cpp @@ -84,7 +84,7 @@ batch_dim<2> compute_batch_size( template std::unique_ptr> -BatchDense::create_view_for_item(size_type item_id) +Dense::create_view_for_item(size_type item_id) { auto exec = this->get_executor(); auto num_rows = this->get_common_size()[0]; @@ -100,7 +100,7 @@ BatchDense::create_view_for_item(size_type item_id) template std::unique_ptr> -BatchDense::create_const_view_for_item(size_type item_id) const +Dense::create_const_view_for_item(size_type item_id) const { auto exec = this->get_executor(); auto num_rows = this->get_common_size()[0]; @@ -115,9 +115,8 @@ BatchDense::create_const_view_for_item(size_type item_id) const template -std::unique_ptr> -BatchDense::create_with_config_of( - ptr_param> other) +std::unique_ptr> Dense::create_with_config_of( + ptr_param> other) { // De-referencing `other` before calling the functions (instead of // using operator `->`) is currently required to be compatible with @@ -128,23 +127,21 @@ BatchDense::create_with_config_of( template -std::unique_ptr> -BatchDense::create_with_same_config() const +std::unique_ptr> Dense::create_with_same_config() + const { - return BatchDense::create(this->get_executor(), - this->get_size()); + return Dense::create(this->get_executor(), this->get_size()); } template -std::unique_ptr> -BatchDense::create_const( +std::unique_ptr> Dense::create_const( std::shared_ptr exec, const batch_dim<2>& sizes, gko::detail::const_array_view&& values) { // cast const-ness away, but return a const object afterwards, // so we can ensure that no modifications take place. - return std::unique_ptr(new BatchDense{ + return std::unique_ptr(new Dense{ exec, sizes, gko::detail::array_const_cast(std::move(values))}); } @@ -157,16 +154,16 @@ inline const batch_dim<2> get_col_sizes(const batch_dim<2>& sizes) template -BatchDense::BatchDense(std::shared_ptr exec, - const batch_dim<2>& size) - : EnableBatchLinOp>(exec, size), +Dense::Dense(std::shared_ptr exec, + const batch_dim<2>& size) + : EnableBatchLinOp>(exec, size), values_(exec, compute_num_elems(size)) {} template -void BatchDense::apply_impl(const MultiVector* b, - MultiVector* x) const +void Dense::apply_impl(const MultiVector* b, + MultiVector* x) const { GKO_ASSERT_EQ(b->get_num_batch_items(), this->get_num_batch_items()); GKO_ASSERT_EQ(this->get_num_batch_items(), x->get_num_batch_items()); @@ -179,10 +176,10 @@ void BatchDense::apply_impl(const MultiVector* b, template -void BatchDense::apply_impl(const MultiVector* alpha, - const MultiVector* b, - const MultiVector* beta, - MultiVector* x) const +void Dense::apply_impl(const MultiVector* alpha, + const MultiVector* b, + const MultiVector* beta, + MultiVector* x) const { GKO_ASSERT_EQ(b->get_num_batch_items(), this->get_num_batch_items()); GKO_ASSERT_EQ(this->get_num_batch_items(), x->get_num_batch_items()); @@ -198,8 +195,8 @@ void BatchDense::apply_impl(const MultiVector* alpha, template -void BatchDense::convert_to( - BatchDense>* result) const +void Dense::convert_to( + Dense>* result) const { result->values_ = this->values_; result->set_size(this->get_size()); @@ -207,14 +204,13 @@ void BatchDense::convert_to( template -void BatchDense::move_to( - BatchDense>* result) +void Dense::move_to(Dense>* result) { this->convert_to(result); } -#define GKO_DECLARE_BATCH_DENSE_MATRIX(_type) class BatchDense<_type> +#define GKO_DECLARE_BATCH_DENSE_MATRIX(_type) class Dense<_type> GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_MATRIX); diff --git a/core/matrix/batch_dense_kernels.hpp b/core/matrix/batch_dense_kernels.hpp index 7f814e08b50..cb46b7291b8 100644 --- a/core/matrix/batch_dense_kernels.hpp +++ b/core/matrix/batch_dense_kernels.hpp @@ -51,14 +51,14 @@ namespace kernels { #define GKO_DECLARE_BATCH_DENSE_SIMPLE_APPLY_KERNEL(_type) \ void simple_apply(std::shared_ptr exec, \ - const batch::matrix::BatchDense<_type>* a, \ + const batch::matrix::Dense<_type>* a, \ const batch::MultiVector<_type>* b, \ batch::MultiVector<_type>* c) #define GKO_DECLARE_BATCH_DENSE_ADVANCED_APPLY_KERNEL(_type) \ void advanced_apply(std::shared_ptr exec, \ const batch::MultiVector<_type>* alpha, \ - const batch::matrix::BatchDense<_type>* a, \ + const batch::matrix::Dense<_type>* a, \ const batch::MultiVector<_type>* b, \ const batch::MultiVector<_type>* beta, \ batch::MultiVector<_type>* c) diff --git a/core/test/matrix/batch_dense.cpp b/core/test/matrix/batch_dense.cpp index 02788e14b7d..6afe13a50af 100644 --- a/core/test/matrix/batch_dense.cpp +++ b/core/test/matrix/batch_dense.cpp @@ -48,15 +48,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. template -class BatchDense : public ::testing::Test { +class Dense : public ::testing::Test { protected: using value_type = T; using DenseMtx = gko::matrix::Dense; using size_type = gko::size_type; - BatchDense() + Dense() : exec(gko::ReferenceExecutor::create()), - mtx(gko::batch::initialize< - gko::batch::matrix::BatchDense>( + mtx(gko::batch::initialize>( {{{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}}, exec)), @@ -66,7 +65,7 @@ class BatchDense : public ::testing::Test { static void assert_equal_to_original_mtx( - gko::batch::matrix::BatchDense* m) + gko::batch::matrix::Dense* m) { ASSERT_EQ(m->get_num_batch_items(), 2); ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 3)); @@ -85,41 +84,41 @@ class BatchDense : public ::testing::Test { ASSERT_EQ(m->at(1, 1, 2), value_type{3.0}); } - static void assert_empty(gko::batch::matrix::BatchDense* m) + static void assert_empty(gko::batch::matrix::Dense* m) { ASSERT_EQ(m->get_num_batch_items(), 0); ASSERT_EQ(m->get_num_stored_elements(), 0); } std::shared_ptr exec; - std::unique_ptr> mtx; + std::unique_ptr> mtx; std::unique_ptr> dense_mtx; }; -TYPED_TEST_SUITE(BatchDense, gko::test::ValueTypes); +TYPED_TEST_SUITE(Dense, gko::test::ValueTypes); -TYPED_TEST(BatchDense, KnowsItsSizeAndValues) +TYPED_TEST(Dense, KnowsItsSizeAndValues) { this->assert_equal_to_original_mtx(this->mtx.get()); } -TYPED_TEST(BatchDense, CanBeEmpty) +TYPED_TEST(Dense, CanBeEmpty) { - auto empty = gko::batch::matrix::BatchDense::create(this->exec); + auto empty = gko::batch::matrix::Dense::create(this->exec); this->assert_empty(empty.get()); } -TYPED_TEST(BatchDense, ReturnsNullValuesArrayWhenEmpty) +TYPED_TEST(Dense, ReturnsNullValuesArrayWhenEmpty) { - auto empty = gko::batch::matrix::BatchDense::create(this->exec); + auto empty = gko::batch::matrix::Dense::create(this->exec); ASSERT_EQ(empty->get_const_values(), nullptr); } -TYPED_TEST(BatchDense, CanGetValuesForEntry) +TYPED_TEST(Dense, CanGetValuesForEntry) { using value_type = typename TestFixture::value_type; @@ -127,17 +126,16 @@ TYPED_TEST(BatchDense, CanGetValuesForEntry) } -TYPED_TEST(BatchDense, CanCreateDenseItemView) +TYPED_TEST(Dense, CanCreateDenseItemView) { GKO_ASSERT_MTX_NEAR(this->mtx->create_view_for_item(1), this->dense_mtx, 0.0); } -TYPED_TEST(BatchDense, CanBeCopied) +TYPED_TEST(Dense, CanBeCopied) { - auto mtx_copy = - gko::batch::matrix::BatchDense::create(this->exec); + auto mtx_copy = gko::batch::matrix::Dense::create(this->exec); mtx_copy->copy_from(this->mtx.get()); @@ -148,10 +146,9 @@ TYPED_TEST(BatchDense, CanBeCopied) } -TYPED_TEST(BatchDense, CanBeMoved) +TYPED_TEST(Dense, CanBeMoved) { - auto mtx_copy = - gko::batch::matrix::BatchDense::create(this->exec); + auto mtx_copy = gko::batch::matrix::Dense::create(this->exec); this->mtx->move_to(mtx_copy); @@ -159,7 +156,7 @@ TYPED_TEST(BatchDense, CanBeMoved) } -TYPED_TEST(BatchDense, CanBeCloned) +TYPED_TEST(Dense, CanBeCloned) { auto mtx_clone = this->mtx->clone(); @@ -168,7 +165,7 @@ TYPED_TEST(BatchDense, CanBeCloned) } -TYPED_TEST(BatchDense, CanBeCleared) +TYPED_TEST(Dense, CanBeCleared) { this->mtx->clear(); @@ -176,11 +173,11 @@ TYPED_TEST(BatchDense, CanBeCleared) } -TYPED_TEST(BatchDense, CanBeConstructedWithSize) +TYPED_TEST(Dense, CanBeConstructedWithSize) { using size_type = gko::size_type; - auto m = gko::batch::matrix::BatchDense::create( + auto m = gko::batch::matrix::Dense::create( this->exec, gko::batch_dim<2>(2, gko::dim<2>{5, 3})); ASSERT_EQ(m->get_num_batch_items(), 2); @@ -189,7 +186,7 @@ TYPED_TEST(BatchDense, CanBeConstructedWithSize) } -TYPED_TEST(BatchDense, CanBeConstructedFromExistingData) +TYPED_TEST(Dense, CanBeConstructedFromExistingData) { using value_type = typename TestFixture::value_type; using size_type = gko::size_type; @@ -203,7 +200,7 @@ TYPED_TEST(BatchDense, CanBeConstructedFromExistingData) 6.0, -3.0}; // clang-format on - auto m = gko::batch::matrix::BatchDense::create( + auto m = gko::batch::matrix::Dense::create( this->exec, gko::batch_dim<2>(2, gko::dim<2>(2, 2)), gko::array::view(this->exec, 8, data)); @@ -219,7 +216,7 @@ TYPED_TEST(BatchDense, CanBeConstructedFromExistingData) } -TYPED_TEST(BatchDense, CanBeConstructedFromExistingConstData) +TYPED_TEST(Dense, CanBeConstructedFromExistingConstData) { using value_type = typename TestFixture::value_type; using size_type = gko::size_type; @@ -233,7 +230,7 @@ TYPED_TEST(BatchDense, CanBeConstructedFromExistingConstData) 6.0, -3.0}; // clang-format on - auto m = gko::batch::matrix::BatchDense::create_const( + auto m = gko::batch::matrix::Dense::create_const( this->exec, gko::batch_dim<2>(2, gko::dim<2>(2, 2)), gko::array::const_view(this->exec, 8, data)); @@ -249,7 +246,7 @@ TYPED_TEST(BatchDense, CanBeConstructedFromExistingConstData) } -TYPED_TEST(BatchDense, CanBeConstructedFromDenseMatrices) +TYPED_TEST(Dense, CanBeConstructedFromDenseMatrices) { using value_type = typename TestFixture::value_type; using DenseMtx = typename TestFixture::DenseMtx; @@ -260,15 +257,15 @@ TYPED_TEST(BatchDense, CanBeConstructedFromDenseMatrices) auto mat2 = gko::initialize({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, this->exec); - auto m = gko::batch::create_from_item< - gko::batch::matrix::BatchDense>( - this->exec, std::vector{mat1.get(), mat2.get()}); + auto m = + gko::batch::create_from_item>( + this->exec, std::vector{mat1.get(), mat2.get()}); this->assert_equal_to_original_mtx(m.get()); } -TYPED_TEST(BatchDense, CanBeConstructedFromDenseMatricesByDuplication) +TYPED_TEST(Dense, CanBeConstructedFromDenseMatricesByDuplication) { using value_type = typename TestFixture::value_type; using DenseMtx = typename TestFixture::DenseMtx; @@ -279,17 +276,19 @@ TYPED_TEST(BatchDense, CanBeConstructedFromDenseMatricesByDuplication) auto mat2 = gko::initialize({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, this->exec); - auto bat_m = gko::batch::create_from_item< - gko::batch::matrix::BatchDense>( - this->exec, std::vector{mat1.get(), mat1.get(), mat1.get()}); - auto m = gko::batch::create_from_item< - gko::batch::matrix::BatchDense>(this->exec, 3, mat1.get()); + auto bat_m = + gko::batch::create_from_item>( + this->exec, + std::vector{mat1.get(), mat1.get(), mat1.get()}); + auto m = + gko::batch::create_from_item>( + this->exec, 3, mat1.get()); GKO_ASSERT_BATCH_MTX_NEAR(bat_m.get(), m.get(), 1e-14); } -TYPED_TEST(BatchDense, CanBeConstructedByDuplicatingBatchDenseMatrices) +TYPED_TEST(Dense, CanBeConstructedByDuplicatingDenseMatrices) { using value_type = typename TestFixture::value_type; using DenseMtx = typename TestFixture::DenseMtx; @@ -300,22 +299,23 @@ TYPED_TEST(BatchDense, CanBeConstructedByDuplicatingBatchDenseMatrices) auto mat2 = gko::initialize({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, this->exec); - auto m = gko::batch::create_from_item< - gko::batch::matrix::BatchDense>( - this->exec, std::vector{mat1.get(), mat2.get()}); - auto m_ref = gko::batch::create_from_item< - gko::batch::matrix::BatchDense>( - this->exec, std::vector{mat1.get(), mat2.get(), mat1.get(), - mat2.get(), mat1.get(), mat2.get()}); + auto m = + gko::batch::create_from_item>( + this->exec, std::vector{mat1.get(), mat2.get()}); + auto m_ref = + gko::batch::create_from_item>( + this->exec, + std::vector{mat1.get(), mat2.get(), mat1.get(), + mat2.get(), mat1.get(), mat2.get()}); - auto m2 = gko::batch::duplicate>( + auto m2 = gko::batch::duplicate>( this->exec, 3, m.get()); GKO_ASSERT_BATCH_MTX_NEAR(m2.get(), m_ref.get(), 1e-14); } -TYPED_TEST(BatchDense, CanBeUnbatchedIntoDenseMatrices) +TYPED_TEST(Dense, CanBeUnbatchedIntoDenseMatrices) { using value_type = typename TestFixture::value_type; using DenseMtx = typename TestFixture::DenseMtx; @@ -326,7 +326,7 @@ TYPED_TEST(BatchDense, CanBeUnbatchedIntoDenseMatrices) this->exec); auto dense_mats = - gko::batch::unbatch>( + gko::batch::unbatch>( this->mtx.get()); GKO_ASSERT_MTX_NEAR(dense_mats[0].get(), mat1.get(), 0.); @@ -334,10 +334,10 @@ TYPED_TEST(BatchDense, CanBeUnbatchedIntoDenseMatrices) } -TYPED_TEST(BatchDense, CanBeListConstructed) +TYPED_TEST(Dense, CanBeListConstructed) { using value_type = typename TestFixture::value_type; - auto m = gko::batch::initialize>( + auto m = gko::batch::initialize>( {{1.0, 2.0}, {1.0, 3.0}}, this->exec); ASSERT_EQ(m->get_num_batch_items(), 2); @@ -349,11 +349,11 @@ TYPED_TEST(BatchDense, CanBeListConstructed) } -TYPED_TEST(BatchDense, CanBeListConstructedByCopies) +TYPED_TEST(Dense, CanBeListConstructedByCopies) { using value_type = typename TestFixture::value_type; - auto m = gko::batch::initialize>( + auto m = gko::batch::initialize>( 2, I({1.0, 2.0}), this->exec); ASSERT_EQ(m->get_num_batch_items(), 2); @@ -365,12 +365,12 @@ TYPED_TEST(BatchDense, CanBeListConstructedByCopies) } -TYPED_TEST(BatchDense, CanBeDoubleListConstructed) +TYPED_TEST(Dense, CanBeDoubleListConstructed) { using value_type = typename TestFixture::value_type; using T = value_type; - auto m = gko::batch::initialize>( + auto m = gko::batch::initialize>( {{I{1.0, 1.0, 0.0}, I{2.0, 4.0, 3.0}, I{3.0, 6.0, 1.0}}, {I{1.0, 2.0, -1.0}, I{3.0, 4.0, -2.0}, I{5.0, 6.0, -3.0}}}, this->exec); @@ -389,7 +389,7 @@ TYPED_TEST(BatchDense, CanBeDoubleListConstructed) } -TYPED_TEST(BatchDense, CanBeReadFromMatrixData) +TYPED_TEST(Dense, CanBeReadFromMatrixData) { using value_type = typename TestFixture::value_type; using index_type = int; @@ -401,8 +401,8 @@ TYPED_TEST(BatchDense, CanBeReadFromMatrixData) {2, 2}, {{0, 0, -1.0}, {0, 1, 0.5}, {1, 0, 0.0}, {1, 1, 9.0}})); auto m = gko::batch::read>( - this->exec, vec_data); + gko::batch::matrix::Dense>(this->exec, + vec_data); ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 2)); EXPECT_EQ(m->at(0, 0, 0), value_type{1.0}); @@ -416,7 +416,7 @@ TYPED_TEST(BatchDense, CanBeReadFromMatrixData) } -TYPED_TEST(BatchDense, CanBeReadFromSparseMatrixData) +TYPED_TEST(Dense, CanBeReadFromSparseMatrixData) { using value_type = typename TestFixture::value_type; using index_type = int; @@ -427,8 +427,8 @@ TYPED_TEST(BatchDense, CanBeReadFromSparseMatrixData) {2, 2}, {{0, 0, -1.0}, {0, 1, 0.5}, {1, 1, 9.0}})); auto m = gko::batch::read>( - this->exec, vec_data); + gko::batch::matrix::Dense>(this->exec, + vec_data); ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 2)); EXPECT_EQ(m->at(0, 0, 0), value_type{1.0}); @@ -442,14 +442,14 @@ TYPED_TEST(BatchDense, CanBeReadFromSparseMatrixData) } -TYPED_TEST(BatchDense, GeneratesCorrectMatrixData) +TYPED_TEST(Dense, GeneratesCorrectMatrixData) { using value_type = typename TestFixture::value_type; using index_type = int; using tpl = typename gko::matrix_data::nonzero_type; auto data = gko::batch::write>( + gko::batch::matrix::Dense>( this->mtx.get()); ASSERT_EQ(data[0].size, gko::dim<2>(2, 3)); diff --git a/cuda/matrix/batch_dense_kernels.cu b/cuda/matrix/batch_dense_kernels.cu index 28d61f70731..4f1dbc8f4d4 100644 --- a/cuda/matrix/batch_dense_kernels.cu +++ b/cuda/matrix/batch_dense_kernels.cu @@ -58,7 +58,7 @@ namespace gko { namespace kernels { namespace cuda { /** - * @brief The BatchDense matrix format namespace. + * @brief The Dense matrix format namespace. * * @ingroup batch_dense */ diff --git a/cuda/matrix/batch_struct.hpp b/cuda/matrix/batch_struct.hpp index 202eb91a366..56af3c5ba7e 100644 --- a/cuda/matrix/batch_struct.hpp +++ b/cuda/matrix/batch_struct.hpp @@ -65,7 +65,7 @@ namespace cuda { */ template inline batch::matrix::batch_dense::uniform_batch> -get_batch_struct(const batch::matrix::BatchDense* const op) +get_batch_struct(const batch::matrix::Dense* const op) { return {as_cuda_type(op->get_const_values()), op->get_num_batch_items(), static_cast(op->get_common_size()[1]), @@ -79,7 +79,7 @@ get_batch_struct(const batch::matrix::BatchDense* const op) */ template inline batch::matrix::batch_dense::uniform_batch> -get_batch_struct(batch::matrix::BatchDense* const op) +get_batch_struct(batch::matrix::Dense* const op) { return {as_cuda_type(op->get_values()), op->get_num_batch_items(), static_cast(op->get_common_size()[1]), diff --git a/dpcpp/matrix/batch_dense_kernels.dp.cpp b/dpcpp/matrix/batch_dense_kernels.dp.cpp index 7f3dca70a32..4552f918c60 100644 --- a/dpcpp/matrix/batch_dense_kernels.dp.cpp +++ b/dpcpp/matrix/batch_dense_kernels.dp.cpp @@ -64,7 +64,7 @@ namespace gko { namespace kernels { namespace dpcpp { /** - * @brief The BatchDense matrix format namespace. + * @brief The Dense matrix format namespace. * * @ingroup batch_dense */ @@ -76,7 +76,7 @@ namespace batch_dense { template void simple_apply(std::shared_ptr exec, - const batch::matrix::BatchDense* mat, + const batch::matrix::Dense* mat, const batch::MultiVector* b, batch::MultiVector* x) { @@ -121,7 +121,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( template void advanced_apply(std::shared_ptr exec, const batch::MultiVector* alpha, - const batch::matrix::BatchDense* mat, + const batch::matrix::Dense* mat, const batch::MultiVector* b, const batch::MultiVector* beta, batch::MultiVector* x) diff --git a/dpcpp/matrix/batch_struct.hpp b/dpcpp/matrix/batch_struct.hpp index dd8c1bbbab6..e44bc394667 100644 --- a/dpcpp/matrix/batch_struct.hpp +++ b/dpcpp/matrix/batch_struct.hpp @@ -63,7 +63,7 @@ namespace dpcpp { */ template inline batch::matrix::batch_dense::uniform_batch -get_batch_struct(const batch::matrix::BatchDense* const op) +get_batch_struct(const batch::matrix::Dense* const op) { return {op->get_const_values(), op->get_num_batch_items(), static_cast(op->get_common_size()[1]), @@ -77,7 +77,7 @@ get_batch_struct(const batch::matrix::BatchDense* const op) */ template inline batch::matrix::batch_dense::uniform_batch get_batch_struct( - batch::matrix::BatchDense* const op) + batch::matrix::Dense* const op) { return {op->get_values(), op->get_num_batch_items(), static_cast(op->get_common_size()[1]), diff --git a/hip/matrix/batch_dense_kernels.hip.cpp b/hip/matrix/batch_dense_kernels.hip.cpp index 20c46736026..aa6d717438e 100644 --- a/hip/matrix/batch_dense_kernels.hip.cpp +++ b/hip/matrix/batch_dense_kernels.hip.cpp @@ -60,7 +60,7 @@ namespace gko { namespace kernels { namespace hip { /** - * @brief The BatchDense matrix format namespace. + * @brief The Dense matrix format namespace. * * @ingroup batch_dense */ diff --git a/hip/matrix/batch_struct.hip.hpp b/hip/matrix/batch_struct.hip.hpp index 0d5dfb46a1b..c75a6c7f0a3 100644 --- a/hip/matrix/batch_struct.hip.hpp +++ b/hip/matrix/batch_struct.hip.hpp @@ -41,8 +41,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/base/batch_struct.hpp" #include "core/matrix/batch_struct.hpp" -#include "hip/base/config.hpp" -#include "hip/base/types.hpp" +#include "hip/base/config.hip.hpp" +#include "hip/base/types.hip.hpp" namespace gko { @@ -65,7 +65,7 @@ namespace hip { */ template inline batch::matrix::batch_dense::uniform_batch> -get_batch_struct(const batch::matrix::BatchDense* const op) +get_batch_struct(const batch::matrix::Dense* const op) { return {as_hip_type(op->get_const_values()), op->get_num_batch_items(), static_cast(op->get_common_size()[1]), @@ -79,7 +79,7 @@ get_batch_struct(const batch::matrix::BatchDense* const op) */ template inline batch::matrix::batch_dense::uniform_batch> -get_batch_struct(batch::matrix::BatchDense* const op) +get_batch_struct(batch::matrix::Dense* const op) { return {as_hip_type(op->get_values()), op->get_num_batch_items(), static_cast(op->get_common_size()[1]), diff --git a/include/ginkgo/core/base/batch_multi_vector.hpp b/include/ginkgo/core/base/batch_multi_vector.hpp index 6b3b207c76c..7830a4c6efb 100644 --- a/include/ginkgo/core/base/batch_multi_vector.hpp +++ b/include/ginkgo/core/base/batch_multi_vector.hpp @@ -56,7 +56,7 @@ namespace matrix { template -class BatchDense; +class Dense; } @@ -91,20 +91,20 @@ class MultiVector public EnablePolymorphicAssignment>, public EnableCreateMethod>, public ConvertibleTo>>, - public ConvertibleTo> { + public ConvertibleTo> { friend class EnableCreateMethod; friend class EnablePolymorphicObject; friend class MultiVector>; friend class MultiVector>; - friend class matrix::BatchDense; + friend class matrix::Dense; public: using EnablePolymorphicAssignment::convert_to; using EnablePolymorphicAssignment::move_to; using ConvertibleTo>>::convert_to; using ConvertibleTo>>::move_to; - using ConvertibleTo>::convert_to; - using ConvertibleTo>::move_to; + using ConvertibleTo>::convert_to; + using ConvertibleTo>::move_to; using value_type = ValueType; using index_type = int32; @@ -126,9 +126,9 @@ class MultiVector void move_to(MultiVector>* result) override; - void convert_to(matrix::BatchDense* result) const override; + void convert_to(matrix::Dense* result) const override; - void move_to(matrix::BatchDense* result) override; + void move_to(matrix::Dense* result) override; /** * Creates a mutable view (of matrix::Dense type) of one item of the Batch diff --git a/include/ginkgo/core/matrix/batch_dense.hpp b/include/ginkgo/core/matrix/batch_dense.hpp index 0457f444c5a..86cd78eadc8 100644 --- a/include/ginkgo/core/matrix/batch_dense.hpp +++ b/include/ginkgo/core/matrix/batch_dense.hpp @@ -55,7 +55,7 @@ namespace matrix { /** - * BatchDense is a batch matrix format which explicitly stores all values of the + * Dense is a batch matrix format which explicitly stores all values of the * matrix in each of the batches. * * The values in each of the batches are stored in row-major format (values @@ -71,38 +71,37 @@ namespace matrix { * @ingroup BatchLinOp */ template -class BatchDense : public EnableBatchLinOp>, - public EnableCreateMethod>, - public ConvertibleTo>> { - friend class EnableCreateMethod; - friend class EnablePolymorphicObject; - friend class BatchDense>; - friend class BatchDense>; +class Dense : public EnableBatchLinOp>, + public EnableCreateMethod>, + public ConvertibleTo>> { + friend class EnableCreateMethod; + friend class EnablePolymorphicObject; + friend class Dense>; + friend class Dense>; public: - using EnableBatchLinOp::convert_to; - using EnableBatchLinOp::move_to; + using EnableBatchLinOp::convert_to; + using EnableBatchLinOp::move_to; using value_type = ValueType; using index_type = int32; - using transposed_type = BatchDense; + using transposed_type = Dense; using unbatch_type = gko::matrix::Dense; - using absolute_type = remove_complex; - using complex_type = to_complex; + using absolute_type = remove_complex; + using complex_type = to_complex; /** - * Creates a BatchDense matrix with the configuration of another BatchDense + * Creates a Dense matrix with the configuration of another Dense * matrix. * * @param other The other matrix whose configuration needs to copied. */ - static std::unique_ptr create_with_config_of( - ptr_param other); + static std::unique_ptr create_with_config_of( + ptr_param other); - void convert_to( - BatchDense>* result) const override; + void convert_to(Dense>* result) const override; - void move_to(BatchDense>* result) override; + void move_to(Dense>* result) override; /** @@ -250,7 +249,7 @@ class BatchDense : public EnableBatchLinOp>, * array (if it resides on the same executor as the vector) or a copy of the * array on the correct executor. */ - static std::unique_ptr> create_const( + static std::unique_ptr> create_const( std::shared_ptr exec, const batch_dim<2>& sizes, gko::detail::const_array_view&& values); @@ -277,16 +276,16 @@ class BatchDense : public EnableBatchLinOp>, protected: /** - * Creates an uninitialized BatchDense matrix of the specified size. + * Creates an uninitialized Dense matrix of the specified size. * * @param exec Executor associated to the matrix * @param size size of the matrix */ - BatchDense(std::shared_ptr exec, - const batch_dim<2>& size = batch_dim<2>{}); + Dense(std::shared_ptr exec, + const batch_dim<2>& size = batch_dim<2>{}); /** - * Creates a BatchDense matrix from an already allocated (and initialized) + * Creates a Dense matrix from an already allocated (and initialized) * array. * * @tparam ValuesArray type of array of values @@ -303,9 +302,9 @@ class BatchDense : public EnableBatchLinOp>, * original array data will not be used in the matrix. */ template - BatchDense(std::shared_ptr exec, const batch_dim<2>& size, - ValuesArray&& values) - : EnableBatchLinOp(exec, size), + Dense(std::shared_ptr exec, const batch_dim<2>& size, + ValuesArray&& values) + : EnableBatchLinOp(exec, size), values_{exec, std::forward(values)} { // Ensure that the values array has the correct size @@ -314,12 +313,12 @@ class BatchDense : public EnableBatchLinOp>, } /** - * Creates a BatchDense matrix with the same configuration as the callers + * Creates a Dense matrix with the same configuration as the callers * matrix. * - * @returns a BatchDense matrix with the same configuration as the caller. + * @returns a Dense matrix with the same configuration as the caller. */ - std::unique_ptr create_with_same_config() const; + std::unique_ptr create_with_same_config() const; virtual void apply_impl(const MultiVector* b, MultiVector* x) const; diff --git a/omp/matrix/batch_dense_kernels.cpp b/omp/matrix/batch_dense_kernels.cpp index a767215c844..2d0b7ed4d40 100644 --- a/omp/matrix/batch_dense_kernels.cpp +++ b/omp/matrix/batch_dense_kernels.cpp @@ -50,8 +50,8 @@ namespace gko { namespace kernels { namespace omp { /** - * @brief The BatchDense matrix format namespace. - * @ref BatchDense + * @brief The Dense matrix format namespace. + * @ref Dense * @ingroup batch_dense */ namespace batch_dense { @@ -62,7 +62,7 @@ namespace batch_dense { template void simple_apply(std::shared_ptr exec, - const batch::matrix::BatchDense* mat, + const batch::matrix::Dense* mat, const batch::MultiVector* b, batch::MultiVector* x) { @@ -85,7 +85,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( template void advanced_apply(std::shared_ptr exec, const batch::MultiVector* alpha, - const batch::matrix::BatchDense* mat, + const batch::matrix::Dense* mat, const batch::MultiVector* b, const batch::MultiVector* beta, batch::MultiVector* x) diff --git a/reference/matrix/batch_dense_kernels.cpp b/reference/matrix/batch_dense_kernels.cpp index f42d9a81d1f..3d7ef03a3bd 100644 --- a/reference/matrix/batch_dense_kernels.cpp +++ b/reference/matrix/batch_dense_kernels.cpp @@ -51,8 +51,8 @@ namespace gko { namespace kernels { namespace reference { /** - * @brief The BatchDense matrix format namespace. - * @ref BatchDense + * @brief The Dense matrix format namespace. + * @ref Dense * @ingroup batch_dense */ namespace batch_dense { @@ -63,7 +63,7 @@ namespace batch_dense { template void simple_apply(std::shared_ptr exec, - const batch::matrix::BatchDense* mat, + const batch::matrix::Dense* mat, const batch::MultiVector* b, batch::MultiVector* x) { @@ -85,7 +85,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( template void advanced_apply(std::shared_ptr exec, const batch::MultiVector* alpha, - const batch::matrix::BatchDense* mat, + const batch::matrix::Dense* mat, const batch::MultiVector* b, const batch::MultiVector* beta, batch::MultiVector* x) diff --git a/reference/matrix/batch_struct.hpp b/reference/matrix/batch_struct.hpp index dee7c71948a..40e2cfc2078 100644 --- a/reference/matrix/batch_struct.hpp +++ b/reference/matrix/batch_struct.hpp @@ -66,7 +66,7 @@ namespace host { */ template inline batch::matrix::batch_dense::uniform_batch -get_batch_struct(const batch::matrix::BatchDense* const op) +get_batch_struct(const batch::matrix::Dense* const op) { return {op->get_const_values(), op->get_num_batch_items(), static_cast(op->get_common_size()[1]), @@ -80,7 +80,7 @@ get_batch_struct(const batch::matrix::BatchDense* const op) */ template inline batch::matrix::batch_dense::uniform_batch get_batch_struct( - batch::matrix::BatchDense* const op) + batch::matrix::Dense* const op) { return {op->get_values(), op->get_num_batch_items(), static_cast(op->get_common_size()[1]), diff --git a/reference/test/matrix/batch_dense_kernels.cpp b/reference/test/matrix/batch_dense_kernels.cpp index 8e2e522e5f4..e1689352cde 100644 --- a/reference/test/matrix/batch_dense_kernels.cpp +++ b/reference/test/matrix/batch_dense_kernels.cpp @@ -53,16 +53,16 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. template -class BatchDense : public ::testing::Test { +class Dense : public ::testing::Test { protected: using value_type = T; using size_type = gko::size_type; - using Mtx = gko::batch::matrix::BatchDense; + using Mtx = gko::batch::matrix::Dense; using MVec = gko::batch::MultiVector; using DenseMtx = gko::matrix::Dense; using ComplexMtx = gko::to_complex; using RealMtx = gko::remove_complex; - BatchDense() + Dense() : exec(gko::ReferenceExecutor::create()), mtx_0(gko::batch::initialize( {{I({1.0, -1.0, 1.5}), I({-2.0, 2.0, 3.0})}, @@ -111,10 +111,10 @@ class BatchDense : public ::testing::Test { }; -TYPED_TEST_SUITE(BatchDense, gko::test::ValueTypes); +TYPED_TEST_SUITE(Dense, gko::test::ValueTypes); -TYPED_TEST(BatchDense, AppliesToBatchMultiVector) +TYPED_TEST(Dense, AppliesToBatchMultiVector) { using T = typename TestFixture::value_type; @@ -129,7 +129,7 @@ TYPED_TEST(BatchDense, AppliesToBatchMultiVector) } -TYPED_TEST(BatchDense, AppliesLinearCombinationWithSameAlphaToBatchMultiVector) +TYPED_TEST(Dense, AppliesLinearCombinationWithSameAlphaToBatchMultiVector) { using Mtx = typename TestFixture::Mtx; using MVec = typename TestFixture::MVec; @@ -156,7 +156,7 @@ TYPED_TEST(BatchDense, AppliesLinearCombinationWithSameAlphaToBatchMultiVector) } -TYPED_TEST(BatchDense, AppliesLinearCombinationToBatchMultiVector) +TYPED_TEST(Dense, AppliesLinearCombinationToBatchMultiVector) { using Mtx = typename TestFixture::Mtx; using MVec = typename TestFixture::MVec; @@ -183,7 +183,7 @@ TYPED_TEST(BatchDense, AppliesLinearCombinationToBatchMultiVector) } -TYPED_TEST(BatchDense, ApplyFailsOnWrongNumberOfResultCols) +TYPED_TEST(Dense, ApplyFailsOnWrongNumberOfResultCols) { using MVec = typename TestFixture::MVec; auto res = MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2}}); @@ -193,7 +193,7 @@ TYPED_TEST(BatchDense, ApplyFailsOnWrongNumberOfResultCols) } -TYPED_TEST(BatchDense, ApplyFailsOnWrongNumberOfResultRows) +TYPED_TEST(Dense, ApplyFailsOnWrongNumberOfResultRows) { using MVec = typename TestFixture::MVec; auto res = MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{3}}); @@ -203,7 +203,7 @@ TYPED_TEST(BatchDense, ApplyFailsOnWrongNumberOfResultRows) } -TYPED_TEST(BatchDense, ApplyFailsOnWrongInnerDimension) +TYPED_TEST(Dense, ApplyFailsOnWrongInnerDimension) { using MVec = typename TestFixture::MVec; auto res = @@ -214,7 +214,7 @@ TYPED_TEST(BatchDense, ApplyFailsOnWrongInnerDimension) } -TYPED_TEST(BatchDense, AdvancedApplyFailsOnWrongInnerDimension) +TYPED_TEST(Dense, AdvancedApplyFailsOnWrongInnerDimension) { using MVec = typename TestFixture::MVec; auto res = @@ -230,7 +230,7 @@ TYPED_TEST(BatchDense, AdvancedApplyFailsOnWrongInnerDimension) } -TYPED_TEST(BatchDense, AdvancedApplyFailsOnWrongAlphaDimension) +TYPED_TEST(Dense, AdvancedApplyFailsOnWrongAlphaDimension) { using MVec = typename TestFixture::MVec; auto res = diff --git a/test/matrix/batch_dense_kernels.cpp b/test/matrix/batch_dense_kernels.cpp index 7d44f29899c..b32f1063377 100644 --- a/test/matrix/batch_dense_kernels.cpp +++ b/test/matrix/batch_dense_kernels.cpp @@ -53,13 +53,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "test/utils/executor.hpp" -class BatchDense : public CommonTestFixture { +class Dense : public CommonTestFixture { protected: using vtype = double; - using Mtx = gko::batch::matrix::BatchDense; + using Mtx = gko::batch::matrix::Dense; using MVec = gko::batch::MultiVector; - BatchDense() : rand_engine(15) {} + Dense() : rand_engine(15) {} template std::unique_ptr gen_mtx(const gko::size_type num_batch_items, @@ -107,7 +107,7 @@ class BatchDense : public CommonTestFixture { }; -TEST_F(BatchDense, SingleVectorApplyIsEquivalentToRef) +TEST_F(Dense, SingleVectorApplyIsEquivalentToRef) { set_up_apply_data(1); @@ -118,7 +118,7 @@ TEST_F(BatchDense, SingleVectorApplyIsEquivalentToRef) } -TEST_F(BatchDense, SingleVectorAdvancedApplyIsEquivalentToRef) +TEST_F(Dense, SingleVectorAdvancedApplyIsEquivalentToRef) { set_up_apply_data(1); From b402b94fa9dc441dd0036df5c3bfe75d7a4d0e53 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Thu, 5 Oct 2023 16:54:40 +0200 Subject: [PATCH 15/28] Doc updates and multivector view --- core/matrix/batch_dense.cpp | 32 +++++++ core/matrix/batch_struct.hpp | 4 +- core/test/matrix/batch_dense.cpp | 12 +++ cuda/matrix/batch_struct.hpp | 4 +- dpcpp/matrix/batch_struct.hpp | 4 +- hip/matrix/batch_struct.hip.hpp | 4 +- include/ginkgo/core/matrix/batch_dense.hpp | 102 +++++++++++++-------- reference/matrix/batch_struct.hpp | 4 +- 8 files changed, 120 insertions(+), 46 deletions(-) diff --git a/core/matrix/batch_dense.cpp b/core/matrix/batch_dense.cpp index 75f29bc6b4c..a864b4114c2 100644 --- a/core/matrix/batch_dense.cpp +++ b/core/matrix/batch_dense.cpp @@ -82,6 +82,38 @@ batch_dim<2> compute_batch_size( } // namespace detail +template +std::unique_ptr> +Dense::create_multi_vector_view() +{ + auto exec = this->get_executor(); + auto num_batch_items = this->get_num_batch_items(); + auto num_rows = this->get_common_size()[0]; + auto stride = this->get_common_size()[1]; + auto mvec = MultiVector::create( + exec, this->get_size(), + make_array_view(exec, num_batch_items * num_rows * stride, + this->get_values())); + return mvec; +} + + +template +std::unique_ptr> +Dense::create_const_multi_vector_view() const +{ + auto exec = this->get_executor(); + auto num_batch_items = this->get_num_batch_items(); + auto num_rows = this->get_common_size()[0]; + auto stride = this->get_common_size()[1]; + auto mvec = MultiVector::create_const( + exec, this->get_size(), + make_const_array_view(exec, num_batch_items * num_rows * stride, + this->get_const_values())); + return mvec; +} + + template std::unique_ptr> Dense::create_view_for_item(size_type item_id) diff --git a/core/matrix/batch_struct.hpp b/core/matrix/batch_struct.hpp index 37c297bb6b5..93b2b027ceb 100644 --- a/core/matrix/batch_struct.hpp +++ b/core/matrix/batch_struct.hpp @@ -46,7 +46,7 @@ namespace batch_dense { /** - * Encapsulates one matrix from a batch of multi-vectors. + * Encapsulates one matrix from a batch of dense matrices. */ template struct batch_item { @@ -59,7 +59,7 @@ struct batch_item { /** - * A 'simple' structure to store a global uniform batch of multi-vectors. + * A 'simple' structure to store a global uniform batch of dense matrices. */ template struct uniform_batch { diff --git a/core/test/matrix/batch_dense.cpp b/core/test/matrix/batch_dense.cpp index 6afe13a50af..36fc3f2ee4a 100644 --- a/core/test/matrix/batch_dense.cpp +++ b/core/test/matrix/batch_dense.cpp @@ -59,6 +59,10 @@ class Dense : public ::testing::Test { {{{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}}, exec)), + mvec(gko::batch::initialize>( + {{{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, + {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}}, + exec)), dense_mtx(gko::initialize>( {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, exec)) {} @@ -92,6 +96,7 @@ class Dense : public ::testing::Test { std::shared_ptr exec; std::unique_ptr> mtx; + std::unique_ptr> mvec; std::unique_ptr> dense_mtx; }; @@ -133,6 +138,13 @@ TYPED_TEST(Dense, CanCreateDenseItemView) } +TYPED_TEST(Dense, CanCreateMultiVectorView) +{ + GKO_ASSERT_BATCH_MTX_NEAR(this->mtx->create_multi_vector_view(), this->mvec, + 0.0); +} + + TYPED_TEST(Dense, CanBeCopied) { auto mtx_copy = gko::batch::matrix::Dense::create(this->exec); diff --git a/cuda/matrix/batch_struct.hpp b/cuda/matrix/batch_struct.hpp index 56af3c5ba7e..19b006d26cd 100644 --- a/cuda/matrix/batch_struct.hpp +++ b/cuda/matrix/batch_struct.hpp @@ -61,7 +61,7 @@ namespace cuda { /** - * Generates an immutable uniform batch struct from a batch of multi-vectors. + * Generates an immutable uniform batch struct from a batch of dense matrices. */ template inline batch::matrix::batch_dense::uniform_batch> @@ -75,7 +75,7 @@ get_batch_struct(const batch::matrix::Dense* const op) /** - * Generates a uniform batch struct from a batch of multi-vectors. + * Generates a uniform batch struct from a batch of dense matrices. */ template inline batch::matrix::batch_dense::uniform_batch> diff --git a/dpcpp/matrix/batch_struct.hpp b/dpcpp/matrix/batch_struct.hpp index e44bc394667..cd5298a4409 100644 --- a/dpcpp/matrix/batch_struct.hpp +++ b/dpcpp/matrix/batch_struct.hpp @@ -59,7 +59,7 @@ namespace dpcpp { /** - * Generates an immutable uniform batch struct from a batch of multi-vectors. + * Generates an immutable uniform batch struct from a batch of dense matrices. */ template inline batch::matrix::batch_dense::uniform_batch @@ -73,7 +73,7 @@ get_batch_struct(const batch::matrix::Dense* const op) /** - * Generates a uniform batch struct from a batch of multi-vectors. + * Generates a uniform batch struct from a batch of dense matrices. */ template inline batch::matrix::batch_dense::uniform_batch get_batch_struct( diff --git a/hip/matrix/batch_struct.hip.hpp b/hip/matrix/batch_struct.hip.hpp index c75a6c7f0a3..25c73d45abc 100644 --- a/hip/matrix/batch_struct.hip.hpp +++ b/hip/matrix/batch_struct.hip.hpp @@ -61,7 +61,7 @@ namespace hip { /** - * Generates an immutable uniform batch struct from a batch of multi-vectors. + * Generates an immutable uniform batch struct from a batch of dense matrices. */ template inline batch::matrix::batch_dense::uniform_batch> @@ -75,7 +75,7 @@ get_batch_struct(const batch::matrix::Dense* const op) /** - * Generates a uniform batch struct from a batch of multi-vectors. + * Generates a uniform batch struct from a batch of dense matrices. */ template inline batch::matrix::batch_dense::uniform_batch> diff --git a/include/ginkgo/core/matrix/batch_dense.hpp b/include/ginkgo/core/matrix/batch_dense.hpp index 86cd78eadc8..d713760947e 100644 --- a/include/ginkgo/core/matrix/batch_dense.hpp +++ b/include/ginkgo/core/matrix/batch_dense.hpp @@ -59,21 +59,24 @@ namespace matrix { * matrix in each of the batches. * * The values in each of the batches are stored in row-major format (values - * belonging to the same row appear consecutive in the memory). Optionally, rows - * can be padded for better memory access. + * belonging to the same row appear consecutive in the memory and the values of + * each batch item are also stored consecutively in memory). + * + * @note Though the storage layout is similar to the multi-vector object, the + * class semantics and the operations it aims to provide is different. Hence it + * is recommended to create multi-vector objects if the user means to view the + * data as a set of vectors. * * @tparam ValueType precision of matrix elements * - * @note While this format is not very useful for storing sparse matrices, it - * is often suitable to store vectors, and sets of vectors. * @ingroup batch_dense * @ingroup mat_formats * @ingroup BatchLinOp */ template -class Dense : public EnableBatchLinOp>, - public EnableCreateMethod>, - public ConvertibleTo>> { +class Dense final : public EnableBatchLinOp>, + public EnableCreateMethod>, + public ConvertibleTo>> { friend class EnableCreateMethod; friend class EnablePolymorphicObject; friend class Dense>; @@ -103,16 +106,31 @@ class Dense : public EnableBatchLinOp>, void move_to(Dense>* result) override; + /** + * Creates a mutable view (of MultiVector type) of the data owned by the + * matrix::Dense object. Does not perform any deep copies, but only + * returns a view of the underlying data. + * + * @return a MultiVector object with a view of the data from the batch + * dense matrix. + */ + std::unique_ptr> create_multi_vector_view(); + + /** + * @copydoc create_const_multi_vector_view() + */ + std::unique_ptr> + create_const_multi_vector_view() const; /** - * Creates a mutable view (of matrix::Dense type) of one item of the Batch - * MultiVector object. Does not perform any deep copies, but - * only returns a view of the data. + * Creates a mutable view (of matrix::Dense type) of one item of the + * batch::matrix::Dense object. Does not perform any deep + * copies, but only returns a view of the data. * * @param item_id The index of the batch item * - * @return a matrix::Dense object with the data from the batch item at the - * given index. + * @return a batch::matrix::Dense object with the data from the batch item + * at the given index. */ std::unique_ptr create_view_for_item(size_type item_id); @@ -148,8 +166,8 @@ class Dense : public EnableBatchLinOp>, * @param row the row of the requested element * @param col the column of the requested element * - * @note the method has to be called on the same Executor the vector is - * stored at (e.g. trying to call this method on a GPU multi-vector + * @note the method has to be called on the same Executor the matrix is + * stored at (e.g. trying to call this method on a GPU Dense object * from the OMP results in a runtime error) */ value_type& at(size_type batch_id, size_type row, size_type col) @@ -159,7 +177,7 @@ class Dense : public EnableBatchLinOp>, } /** - * @copydoc MultiVector::at(size_type, size_type, size_type) + * @copydoc Dense::at(size_type, size_type, size_type) */ value_type at(size_type batch_id, size_type row, size_type col) const { @@ -170,15 +188,15 @@ class Dense : public EnableBatchLinOp>, /** * Returns a single element for a particular batch item. * - * Useful for iterating across all elements of the vector. + * Useful for iterating across all elements of the matrix. * However, it is less efficient than the two-parameter variant of this * method. * * @param batch_id the batch item index to be queried * @param idx a linear index of the requested element * - * @note the method has to be called on the same Executor the vector is - * stored at (e.g. trying to call this method on a GPU multi-vector + * @note the method has to be called on the same Executor the matrix is + * stored at (e.g. trying to call this method on a GPU Dense object * from the OMP results in a runtime error) */ ValueType& at(size_type batch_id, size_type idx) noexcept @@ -187,7 +205,7 @@ class Dense : public EnableBatchLinOp>, } /** - * @copydoc MultiVector::at(size_type, size_type, size_type) + * @copydoc Dense::at(size_type, size_type, size_type) */ ValueType at(size_type batch_id, size_type idx) const noexcept { @@ -195,7 +213,7 @@ class Dense : public EnableBatchLinOp>, } /** - * Returns a pointer to the array of values of the multi-vector for a + * Returns a pointer to the array of values of the matrix for a * specific batch item. * * @param batch_id the id of the batch item. @@ -236,30 +254,45 @@ class Dense : public EnableBatchLinOp>, return values_.get_num_elems(); } - /** * Creates a constant (immutable) batch dense matrix from a constant * array. * - * @param exec the executor to create the vector on - * @param size the dimensions of the vector - * @param values the value array of the vector + * @param exec the executor to create the matrix on + * @param size the dimensions of the matrix + * @param values the value array of the matrix * - * @return A smart pointer to the constant multi-vector wrapping the input - * array (if it resides on the same executor as the vector) or a copy of the + * @return A smart pointer to the constant matrix wrapping the input + * array (if it resides on the same executor as the matrix) or a copy of the * array on the correct executor. */ static std::unique_ptr> create_const( std::shared_ptr exec, const batch_dim<2>& sizes, gko::detail::const_array_view&& values); - + /** + * Apply the matrix to a multi-vector. Represents the matrix vector + * multiplication, x = A * b, where x and b are both multi-vectors. + * + * @param b the multi-vector to be applied to + * @param x the output multi-vector + */ void apply(const MultiVector* b, MultiVector* x) const { this->apply_impl(b, x); } + /** + * Apply the matrix to a multi-vector with a linear combination of the given + * input vector. Represents the matrix vector multiplication, x = alpha* A * + * b + beta * x, where x and b are both multi-vectors. + * + * @param alpha the scalar to scale the matrix-vector product with + * @param b the multi-vector to be applied to + * @param beta the scalar to scale the x vector with + * @param x the output multi-vector + */ void apply(const MultiVector* alpha, const MultiVector* b, const MultiVector* beta, @@ -293,9 +326,6 @@ class Dense : public EnableBatchLinOp>, * @param exec Executor associated to the matrix * @param size sizes of the batch matrices in a batch_dim object * @param values array of matrix values - * @param strides stride of the rows (i.e. offset between the first - * elements of two consecutive rows, expressed as the - * number of matrix elements) * * @note If `values` is not an rvalue, not an array of ValueType, or is on * the wrong executor, an internal copy will be created, and the @@ -320,13 +350,13 @@ class Dense : public EnableBatchLinOp>, */ std::unique_ptr create_with_same_config() const; - virtual void apply_impl(const MultiVector* b, - MultiVector* x) const; + void apply_impl(const MultiVector* b, + MultiVector* x) const; - virtual void apply_impl(const MultiVector* alpha, - const MultiVector* b, - const MultiVector* beta, - MultiVector* x) const; + void apply_impl(const MultiVector* alpha, + const MultiVector* b, + const MultiVector* beta, + MultiVector* x) const; size_type linearize_index(size_type batch, size_type row, size_type col) const noexcept diff --git a/reference/matrix/batch_struct.hpp b/reference/matrix/batch_struct.hpp index 40e2cfc2078..1a759cec2a9 100644 --- a/reference/matrix/batch_struct.hpp +++ b/reference/matrix/batch_struct.hpp @@ -62,7 +62,7 @@ namespace host { /** - * Generates an immutable uniform batch struct from a batch of multi-vectors. + * Generates an immutable uniform batch struct from a batch of dense matrices. */ template inline batch::matrix::batch_dense::uniform_batch @@ -76,7 +76,7 @@ get_batch_struct(const batch::matrix::Dense* const op) /** - * Generates a uniform batch struct from a batch of multi-vectors. + * Generates a uniform batch struct from a batch of dense matrices. */ template inline batch::matrix::batch_dense::uniform_batch get_batch_struct( From 3ca9fb48180b5f76cc700dfe61581c6cb99b0b08 Mon Sep 17 00:00:00 2001 From: ginkgo-bot Date: Fri, 6 Oct 2023 07:12:03 +0000 Subject: [PATCH 16/28] Format files Co-authored-by: Pratik Nayak --- cuda/matrix/batch_struct.hpp | 4 +- dpcpp/base/batch_multi_vector_kernels.dp.cpp | 52 ++++++++++--------- dpcpp/matrix/batch_dense_kernels.dp.cpp | 54 ++++++++++---------- dpcpp/matrix/batch_struct.hpp | 4 +- hip/matrix/batch_struct.hip.hpp | 10 ++-- reference/matrix/batch_struct.hpp | 4 +- 6 files changed, 69 insertions(+), 59 deletions(-) diff --git a/cuda/matrix/batch_struct.hpp b/cuda/matrix/batch_struct.hpp index 19b006d26cd..f191953f7b9 100644 --- a/cuda/matrix/batch_struct.hpp +++ b/cuda/matrix/batch_struct.hpp @@ -34,13 +34,15 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GKO_CUDA_MATRIX_BATCH_STRUCT_HPP_ +#include "core/matrix/batch_struct.hpp" + + #include #include #include #include "core/base/batch_struct.hpp" -#include "core/matrix/batch_struct.hpp" #include "cuda/base/config.hpp" #include "cuda/base/types.hpp" diff --git a/dpcpp/base/batch_multi_vector_kernels.dp.cpp b/dpcpp/base/batch_multi_vector_kernels.dp.cpp index 12648b81e00..e0bc15fdc61 100644 --- a/dpcpp/base/batch_multi_vector_kernels.dp.cpp +++ b/dpcpp/base/batch_multi_vector_kernels.dp.cpp @@ -194,9 +194,9 @@ void compute_dot(std::shared_ptr exec, // TODO: Remove reqd_sub_group size and use sycl::reduce_over_group exec->get_queue()->submit([&](sycl::handler& cgh) { cgh.parallel_for( - sycl_nd_range(grid, block), [= - ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size( - config::warp_size)]] { + sycl_nd_range(grid, block), + [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size( + config::warp_size)]] { auto group = item_ct1.get_group(); auto group_id = group.get_group_linear_id(); const auto x_b = batch::extract_batch_item(x_ub, group_id); @@ -232,18 +232,19 @@ void compute_conj_dot(std::shared_ptr exec, exec->get_queue()->submit([&](sycl::handler& cgh) { cgh.parallel_for( - sycl_nd_range(grid, block), [= - ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size( - config::warp_size)]] { - auto group = item_ct1.get_group(); - auto group_id = group.get_group_linear_id(); - const auto x_b = batch::extract_batch_item(x_ub, group_id); - const auto y_b = batch::extract_batch_item(y_ub, group_id); - const auto res_b = batch::extract_batch_item(res_ub, group_id); - compute_gen_dot_product_kernel( - x_b, y_b, res_b, item_ct1, - [](auto val) { return conj(val); }); - }); + sycl_nd_range(grid, block), + [=](sycl::nd_item<3> item_ct1) + [[sycl::reqd_sub_group_size(config::warp_size)]] { + auto group = item_ct1.get_group(); + auto group_id = group.get_group_linear_id(); + const auto x_b = batch::extract_batch_item(x_ub, group_id); + const auto y_b = batch::extract_batch_item(y_ub, group_id); + const auto res_b = + batch::extract_batch_item(res_ub, group_id); + compute_gen_dot_product_kernel( + x_b, y_b, res_b, item_ct1, + [](auto val) { return conj(val); }); + }); }); } @@ -268,16 +269,17 @@ void compute_norm2(std::shared_ptr exec, const dim3 grid(num_batches); exec->get_queue()->submit([&](sycl::handler& cgh) { - cgh.parallel_for( - sycl_nd_range(grid, block), [= - ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size( - config::warp_size)]] { - auto group = item_ct1.get_group(); - auto group_id = group.get_group_linear_id(); - const auto x_b = batch::extract_batch_item(x_ub, group_id); - const auto res_b = batch::extract_batch_item(res_ub, group_id); - compute_norm2_kernel(x_b, res_b, item_ct1); - }); + cgh.parallel_for(sycl_nd_range(grid, block), + [=](sycl::nd_item<3> item_ct1) + [[sycl::reqd_sub_group_size(config::warp_size)]] { + auto group = item_ct1.get_group(); + auto group_id = group.get_group_linear_id(); + const auto x_b = + batch::extract_batch_item(x_ub, group_id); + const auto res_b = batch::extract_batch_item( + res_ub, group_id); + compute_norm2_kernel(x_b, res_b, item_ct1); + }); }); } diff --git a/dpcpp/matrix/batch_dense_kernels.dp.cpp b/dpcpp/matrix/batch_dense_kernels.dp.cpp index 4552f918c60..6aec3e57fc5 100644 --- a/dpcpp/matrix/batch_dense_kernels.dp.cpp +++ b/dpcpp/matrix/batch_dense_kernels.dp.cpp @@ -100,17 +100,17 @@ void simple_apply(std::shared_ptr exec, // Launch a kernel that has nbatches blocks, each block has max group size (exec->get_queue())->submit([&](sycl::handler& cgh) { cgh.parallel_for( - sycl_nd_range(grid, block), [= - ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size( - config::warp_size)]] { - auto group = item_ct1.get_group(); - auto group_id = group.get_group_linear_id(); - const auto mat_b = - batch::matrix::extract_batch_item(mat_ub, group_id); - const auto b_b = batch::extract_batch_item(b_ub, group_id); - const auto x_b = batch::extract_batch_item(x_ub, group_id); - simple_apply_kernel(mat_b, b_b, x_b, item_ct1); - }); + sycl_nd_range(grid, block), + [=](sycl::nd_item<3> item_ct1) + [[sycl::reqd_sub_group_size(config::warp_size)]] { + auto group = item_ct1.get_group(); + auto group_id = group.get_group_linear_id(); + const auto mat_b = + batch::matrix::extract_batch_item(mat_ub, group_id); + const auto b_b = batch::extract_batch_item(b_ub, group_id); + const auto x_b = batch::extract_batch_item(x_ub, group_id); + simple_apply_kernel(mat_b, b_b, x_b, item_ct1); + }); }); } @@ -147,22 +147,22 @@ void advanced_apply(std::shared_ptr exec, // Launch a kernel that has nbatches blocks, each block has max group size (exec->get_queue())->submit([&](sycl::handler& cgh) { cgh.parallel_for( - sycl_nd_range(grid, block), [= - ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size( - config::warp_size)]] { - auto group = item_ct1.get_group(); - auto group_id = group.get_group_linear_id(); - const auto mat_b = - batch::matrix::extract_batch_item(mat_ub, group_id); - const auto b_b = batch::extract_batch_item(b_ub, group_id); - const auto x_b = batch::extract_batch_item(x_ub, group_id); - const auto alpha_b = - batch::extract_batch_item(alpha_ub, group_id); - const auto beta_b = - batch::extract_batch_item(beta_ub, group_id); - advanced_apply_kernel(alpha_b, mat_b, b_b, beta_b, x_b, - item_ct1); - }); + sycl_nd_range(grid, block), + [=](sycl::nd_item<3> item_ct1) + [[sycl::reqd_sub_group_size(config::warp_size)]] { + auto group = item_ct1.get_group(); + auto group_id = group.get_group_linear_id(); + const auto mat_b = + batch::matrix::extract_batch_item(mat_ub, group_id); + const auto b_b = batch::extract_batch_item(b_ub, group_id); + const auto x_b = batch::extract_batch_item(x_ub, group_id); + const auto alpha_b = + batch::extract_batch_item(alpha_ub, group_id); + const auto beta_b = + batch::extract_batch_item(beta_ub, group_id); + advanced_apply_kernel(alpha_b, mat_b, b_b, beta_b, x_b, + item_ct1); + }); }); } diff --git a/dpcpp/matrix/batch_struct.hpp b/dpcpp/matrix/batch_struct.hpp index cd5298a4409..f561bf004c7 100644 --- a/dpcpp/matrix/batch_struct.hpp +++ b/dpcpp/matrix/batch_struct.hpp @@ -34,12 +34,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GKO_DPCPP_MATRIX_BATCH_STRUCT_HPP_ +#include "core/matrix/batch_struct.hpp" + + #include #include #include "core/base/batch_struct.hpp" -#include "core/matrix/batch_struct.hpp" #include "dpcpp/base/config.hpp" diff --git a/hip/matrix/batch_struct.hip.hpp b/hip/matrix/batch_struct.hip.hpp index 25c73d45abc..c0659420661 100644 --- a/hip/matrix/batch_struct.hip.hpp +++ b/hip/matrix/batch_struct.hip.hpp @@ -30,8 +30,11 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -#ifndef GKO_HIP_MATRIX_BATCH_STRUCT_HPP_ -#define GKO_HIP_MATRIX_BATCH_STRUCT_HPP_ +#ifndef GKO_HIP_MATRIX_BATCH_STRUCT_HIP_HPP_ +#define GKO_HIP_MATRIX_BATCH_STRUCT_HIP_HPP_ + + +#include "core/matrix/batch_struct.hpp" #include @@ -40,7 +43,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/base/batch_struct.hpp" -#include "core/matrix/batch_struct.hpp" #include "hip/base/config.hip.hpp" #include "hip/base/types.hip.hpp" @@ -93,4 +95,4 @@ get_batch_struct(batch::matrix::Dense* const op) } // namespace gko -#endif // GKO_HIP_MATRIX_BATCH_STRUCT_HPP_ +#endif // GKO_HIP_MATRIX_BATCH_STRUCT_HIP_HPP_ diff --git a/reference/matrix/batch_struct.hpp b/reference/matrix/batch_struct.hpp index 1a759cec2a9..47d48f1e927 100644 --- a/reference/matrix/batch_struct.hpp +++ b/reference/matrix/batch_struct.hpp @@ -34,13 +34,15 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GKO_REFERENCE_MATRIX_BATCH_STRUCT_HPP_ +#include "core/matrix/batch_struct.hpp" + + #include #include #include #include "core/base/batch_struct.hpp" -#include "core/matrix/batch_struct.hpp" namespace gko { From e5b88135914128e37fb51ecf0272df9a27a27b41 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Fri, 6 Oct 2023 09:11:18 +0200 Subject: [PATCH 17/28] Use CommonTestFixture value_type --- test/matrix/batch_dense_kernels.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/test/matrix/batch_dense_kernels.cpp b/test/matrix/batch_dense_kernels.cpp index b32f1063377..d6bf85a42c4 100644 --- a/test/matrix/batch_dense_kernels.cpp +++ b/test/matrix/batch_dense_kernels.cpp @@ -55,9 +55,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. class Dense : public CommonTestFixture { protected: - using vtype = double; - using Mtx = gko::batch::matrix::Dense; - using MVec = gko::batch::MultiVector; + using Mtx = gko::batch::matrix::Dense; + using MVec = gko::batch::MultiVector; Dense() : rand_engine(15) {} @@ -87,7 +86,7 @@ class Dense : public CommonTestFixture { expected = MVec::create( ref, gko::batch_dim<2>(batch_size, gko::dim<2>{num_rows, num_vecs})); - expected->fill(gko::one()); + expected->fill(gko::one()); dresult = gko::clone(exec, expected); } @@ -114,7 +113,7 @@ TEST_F(Dense, SingleVectorApplyIsEquivalentToRef) x->apply(y.get(), expected.get()); dx->apply(dy.get(), dresult.get()); - GKO_ASSERT_BATCH_MTX_NEAR(dresult, expected, 1e-14); + GKO_ASSERT_BATCH_MTX_NEAR(dresult, expected, r::value); } @@ -125,5 +124,5 @@ TEST_F(Dense, SingleVectorAdvancedApplyIsEquivalentToRef) x->apply(alpha.get(), y.get(), beta.get(), expected.get()); dx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); - GKO_ASSERT_BATCH_MTX_NEAR(dresult, expected, 1e-14); + GKO_ASSERT_BATCH_MTX_NEAR(dresult, expected, r::value); } From c00b6d9b748491de5dc28e174d1cb9ce2de8bfa4 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Mon, 9 Oct 2023 12:48:24 +0200 Subject: [PATCH 18/28] Review updates Co-authored-by: Marcel Koch --- core/base/batch_multi_vector.cpp | 16 +++++++++--- core/base/batch_utilities.hpp | 7 +++--- core/matrix/batch_dense.cpp | 25 ------------------- .../test/preconditioner/jacobi_kernels.dp.cpp | 2 +- include/ginkgo/core/matrix/batch_dense.hpp | 2 +- reference/matrix/batch_dense_kernels.hpp.inc | 2 +- reference/test/matrix/batch_dense_kernels.cpp | 2 +- test/matrix/batch_dense_kernels.cpp | 2 +- 8 files changed, 20 insertions(+), 38 deletions(-) diff --git a/core/base/batch_multi_vector.cpp b/core/base/batch_multi_vector.cpp index 294fe45972a..bd2079907a3 100644 --- a/core/base/batch_multi_vector.cpp +++ b/core/base/batch_multi_vector.cpp @@ -294,11 +294,12 @@ void MultiVector::move_to( template void MultiVector::convert_to(matrix::Dense* result) const { - auto exec = result->get_executor() != nullptr ? result->get_executor() - : this->get_executor(); + auto exec = result->get_executor() == nullptr ? this->get_executor() + : result->get_executor(); auto tmp = gko::batch::matrix::Dense::create_const( exec, this->get_size(), - make_const_array_view(exec, this->get_num_stored_elements(), + make_const_array_view(this->get_executor(), + this->get_num_stored_elements(), this->get_const_values())); result->copy_from(tmp); } @@ -307,7 +308,14 @@ void MultiVector::convert_to(matrix::Dense* result) const template void MultiVector::move_to(matrix::Dense* result) { - this->convert_to(result); + auto exec = result->get_executor() == nullptr ? this->get_executor() + : result->get_executor(); + auto tmp = gko::batch::matrix::Dense::create_const( + exec, this->get_size(), + make_const_array_view(this->get_executor(), + this->get_num_stored_elements(), + this->get_const_values())); + tmp->move_to(result); } diff --git a/core/base/batch_utilities.hpp b/core/base/batch_utilities.hpp index d5c5bdb4aa2..834e89c8358 100644 --- a/core/base/batch_utilities.hpp +++ b/core/base/batch_utilities.hpp @@ -109,14 +109,13 @@ std::unique_ptr create_from_item( template -auto unbatch(const InputType* batch_multivec) +auto unbatch(const InputType* batch_object) { - auto exec = batch_multivec->get_executor(); auto unbatched_mats = std::vector>{}; - for (size_type b = 0; b < batch_multivec->get_num_batch_items(); ++b) { + for (size_type b = 0; b < batch_object->get_num_batch_items(); ++b) { unbatched_mats.emplace_back( - batch_multivec->create_const_view_for_item(b)->clone()); + batch_object->create_const_view_for_item(b)->clone()); } return unbatched_mats; } diff --git a/core/matrix/batch_dense.cpp b/core/matrix/batch_dense.cpp index a864b4114c2..b948a2c3afc 100644 --- a/core/matrix/batch_dense.cpp +++ b/core/matrix/batch_dense.cpp @@ -64,24 +64,6 @@ GKO_REGISTER_OPERATION(advanced_apply, batch_dense::advanced_apply); } // namespace dense -namespace detail { - - -template -batch_dim<2> compute_batch_size( - const std::vector*>& matrices) -{ - auto common_size = matrices[0]->get_size(); - for (size_type i = 1; i < matrices.size(); ++i) { - GKO_ASSERT_EQUAL_DIMENSIONS(common_size, matrices[i]->get_size()); - } - return batch_dim<2>{matrices.size(), common_size}; -} - - -} // namespace detail - - template std::unique_ptr> Dense::create_multi_vector_view() @@ -178,13 +160,6 @@ std::unique_ptr> Dense::create_const( } -inline const batch_dim<2> get_col_sizes(const batch_dim<2>& sizes) -{ - return batch_dim<2>(sizes.get_num_batch_items(), - dim<2>(1, sizes.get_common_size()[1])); -} - - template Dense::Dense(std::shared_ptr exec, const batch_dim<2>& size) diff --git a/dpcpp/test/preconditioner/jacobi_kernels.dp.cpp b/dpcpp/test/preconditioner/jacobi_kernels.dp.cpp index aae15245357..b8082a2db32 100644 --- a/dpcpp/test/preconditioner/jacobi_kernels.dp.cpp +++ b/dpcpp/test/preconditioner/jacobi_kernels.dp.cpp @@ -90,7 +90,7 @@ class Jacobi : public ::testing::Test { gko::uint32 max_block_size, int min_nnz, int max_nnz, int num_rhs = 1, value_type accuracy = 0.1, bool skip_sorting = true) { - std::ranlux48 engine(42); + std::default_random_engine engine(42); const auto dim = *(end(block_pointers) - 1); if (condition_numbers.size() == 0) { mtx = gko::test::generate_random_matrix( diff --git a/include/ginkgo/core/matrix/batch_dense.hpp b/include/ginkgo/core/matrix/batch_dense.hpp index d713760947e..d081e5d440e 100644 --- a/include/ginkgo/core/matrix/batch_dense.hpp +++ b/include/ginkgo/core/matrix/batch_dense.hpp @@ -62,7 +62,7 @@ namespace matrix { * belonging to the same row appear consecutive in the memory and the values of * each batch item are also stored consecutively in memory). * - * @note Though the storage layout is similar to the multi-vector object, the + * @note Though the storage layout is the same as the multi-vector object, the * class semantics and the operations it aims to provide is different. Hence it * is recommended to create multi-vector objects if the user means to view the * data as a set of vectors. diff --git a/reference/matrix/batch_dense_kernels.hpp.inc b/reference/matrix/batch_dense_kernels.hpp.inc index bff9ad137cf..20e395af5b7 100644 --- a/reference/matrix/batch_dense_kernels.hpp.inc +++ b/reference/matrix/batch_dense_kernels.hpp.inc @@ -71,7 +71,7 @@ inline void advanced_apply_kernel( } else { for (int row = 0; row < c.num_rows; ++row) { for (int col = 0; col < c.num_rhs; ++col) { - c.values[row * c.stride + col] *= gko::zero(); + c.values[row * c.stride + col] = gko::zero(); } } } diff --git a/reference/test/matrix/batch_dense_kernels.cpp b/reference/test/matrix/batch_dense_kernels.cpp index e1689352cde..97dbe3e77cb 100644 --- a/reference/test/matrix/batch_dense_kernels.cpp +++ b/reference/test/matrix/batch_dense_kernels.cpp @@ -107,7 +107,7 @@ class Dense : public ::testing::Test { std::unique_ptr x_00; std::unique_ptr x_01; - std::ranlux48 rand_engine; + std::default_random_engine rand_engine; }; diff --git a/test/matrix/batch_dense_kernels.cpp b/test/matrix/batch_dense_kernels.cpp index d6bf85a42c4..a73efcd8753 100644 --- a/test/matrix/batch_dense_kernels.cpp +++ b/test/matrix/batch_dense_kernels.cpp @@ -90,7 +90,7 @@ class Dense : public CommonTestFixture { dresult = gko::clone(exec, expected); } - std::ranlux48 rand_engine; + std::default_random_engine rand_engine; const size_t batch_size = 11; std::unique_ptr x; From fe21d6560ad9be7cb93103aa826b0b7421ef8637 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Mon, 9 Oct 2023 16:01:46 +0200 Subject: [PATCH 19/28] Review updates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Thomas Grützmacher --- .../matrix/batch_dense_kernels.hpp.inc | 18 ++--- core/base/batch_multi_vector.cpp | 9 +-- core/base/batch_struct.hpp | 16 ++-- core/matrix/batch_struct.hpp | 37 +++++----- cuda/base/batch_struct.hpp | 14 ++-- cuda/matrix/batch_struct.hpp | 18 ++--- dpcpp/base/batch_struct.hpp | 14 ++-- dpcpp/matrix/batch_dense_kernels.hpp.inc | 4 +- dpcpp/matrix/batch_struct.hpp | 22 +++--- hip/base/batch_struct.hip.hpp | 14 ++-- hip/matrix/batch_struct.hip.hpp | 18 ++--- include/ginkgo/core/matrix/batch_dense.hpp | 14 ++-- reference/base/batch_struct.hpp | 12 +-- reference/matrix/batch_dense_kernels.hpp.inc | 4 +- reference/matrix/batch_struct.hpp | 19 ++--- .../test/base/batch_multi_vector_kernels.cpp | 14 ++-- reference/test/matrix/batch_dense_kernels.cpp | 74 +++++++++---------- test/matrix/batch_dense_kernels.cpp | 44 +++++------ 18 files changed, 178 insertions(+), 187 deletions(-) diff --git a/common/cuda_hip/matrix/batch_dense_kernels.hpp.inc b/common/cuda_hip/matrix/batch_dense_kernels.hpp.inc index 2f876332ae7..7a38cfea215 100644 --- a/common/cuda_hip/matrix/batch_dense_kernels.hpp.inc +++ b/common/cuda_hip/matrix/batch_dense_kernels.hpp.inc @@ -33,7 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. template __device__ __forceinline__ void simple_apply( - const gko::batch::matrix::batch_dense::batch_item& mat, + const gko::batch::matrix::dense::batch_item& mat, const ValueType* const __restrict__ b, ValueType* const __restrict__ x) { constexpr auto tile_size = config::warp_size; @@ -65,10 +65,9 @@ template __global__ __launch_bounds__( default_block_size, sm_oversubscription) void simple_apply_kernel(const gko::batch::matrix:: - batch_dense:: - uniform_batch< - const ValueType> - mat, + dense::uniform_batch< + const ValueType> + mat, const gko::batch:: multi_vector:: uniform_batch< @@ -94,7 +93,7 @@ __global__ __launch_bounds__( template __device__ __forceinline__ void advanced_apply( const ValueType alpha, - const gko::batch::matrix::batch_dense::batch_item& mat, + const gko::batch::matrix::dense::batch_item& mat, const ValueType* const __restrict__ b, const ValueType beta, ValueType* const __restrict__ x) { @@ -132,10 +131,9 @@ __global__ __launch_bounds__( const ValueType> alpha, const gko::batch::matrix:: - batch_dense:: - uniform_batch< - const ValueType> - mat, + dense::uniform_batch< + const ValueType> + mat, const gko::batch:: multi_vector:: uniform_batch< diff --git a/core/base/batch_multi_vector.cpp b/core/base/batch_multi_vector.cpp index bd2079907a3..6a14919bf2f 100644 --- a/core/base/batch_multi_vector.cpp +++ b/core/base/batch_multi_vector.cpp @@ -308,14 +308,7 @@ void MultiVector::convert_to(matrix::Dense* result) const template void MultiVector::move_to(matrix::Dense* result) { - auto exec = result->get_executor() == nullptr ? this->get_executor() - : result->get_executor(); - auto tmp = gko::batch::matrix::Dense::create_const( - exec, this->get_size(), - make_const_array_view(this->get_executor(), - this->get_num_stored_elements(), - this->get_const_values())); - tmp->move_to(result); + this->convert_to(result); } diff --git a/core/base/batch_struct.hpp b/core/base/batch_struct.hpp index caca4577cf7..71445550b87 100644 --- a/core/base/batch_struct.hpp +++ b/core/base/batch_struct.hpp @@ -51,9 +51,9 @@ template struct batch_item { using value_type = ValueType; ValueType* values; - int stride; - int num_rows; - int num_rhs; + int32 stride; + int32 num_rows; + int32 num_rhs; }; @@ -67,9 +67,9 @@ struct uniform_batch { ValueType* values; size_type num_batch_items; - int stride; - int num_rows; - int num_rhs; + int32 stride; + int32 num_rows; + int32 num_rhs; size_type get_entry_storage() const { @@ -117,8 +117,8 @@ extract_batch_item(const multi_vector::uniform_batch& batch, template GKO_ATTRIBUTES GKO_INLINE multi_vector::batch_item -extract_batch_item(ValueType* const batch_values, const int stride, - const int num_rows, const int num_rhs, +extract_batch_item(ValueType* const batch_values, const int32 stride, + const int32 num_rows, const int32 num_rhs, const size_type batch_idx) { return {batch_values + batch_idx * stride * num_rows, stride, num_rows, diff --git a/core/matrix/batch_struct.hpp b/core/matrix/batch_struct.hpp index 93b2b027ceb..0bbfde40cc9 100644 --- a/core/matrix/batch_struct.hpp +++ b/core/matrix/batch_struct.hpp @@ -42,7 +42,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace gko { namespace batch { namespace matrix { -namespace batch_dense { +namespace dense { /** @@ -51,10 +51,10 @@ namespace batch_dense { template struct batch_item { using value_type = ValueType; - ValueType* values; - int stride; - int num_rows; - int num_cols; + value_type* values; + int32 stride; + int32 num_rows; + int32 num_cols; }; @@ -68,9 +68,9 @@ struct uniform_batch { ValueType* values; size_type num_batch_items; - int stride; - int num_rows; - int num_cols; + int32 stride; + int32 num_rows; + int32 num_cols; size_type get_entry_storage() const { @@ -79,38 +79,37 @@ struct uniform_batch { }; -} // namespace batch_dense +} // namespace dense template -GKO_ATTRIBUTES GKO_INLINE batch_dense::batch_item to_const( - const batch_dense::batch_item& b) +GKO_ATTRIBUTES GKO_INLINE dense::batch_item to_const( + const dense::batch_item& b) { return {b.values, b.stride, b.num_rows, b.num_cols}; } template -GKO_ATTRIBUTES GKO_INLINE batch_dense::uniform_batch to_const( - const batch_dense::uniform_batch& ub) +GKO_ATTRIBUTES GKO_INLINE dense::uniform_batch to_const( + const dense::uniform_batch& ub) { return {ub.values, ub.num_batch_items, ub.stride, ub.num_rows, ub.num_cols}; } template -GKO_ATTRIBUTES GKO_INLINE batch_dense::batch_item extract_batch_item( - const batch_dense::uniform_batch& batch, - const size_type batch_idx) +GKO_ATTRIBUTES GKO_INLINE dense::batch_item extract_batch_item( + const dense::uniform_batch& batch, const size_type batch_idx) { return {batch.values + batch_idx * batch.stride * batch.num_rows, batch.stride, batch.num_rows, batch.num_cols}; } template -GKO_ATTRIBUTES GKO_INLINE batch_dense::batch_item extract_batch_item( - ValueType* const batch_values, const int stride, const int num_rows, - const int num_cols, const size_type batch_idx) +GKO_ATTRIBUTES GKO_INLINE dense::batch_item extract_batch_item( + ValueType* const batch_values, const int32 stride, const int32 num_rows, + const int32 num_cols, const size_type batch_idx) { return {batch_values + batch_idx * stride * num_rows, stride, num_rows, num_cols}; diff --git a/cuda/base/batch_struct.hpp b/cuda/base/batch_struct.hpp index 715332418fb..12f34509275 100644 --- a/cuda/base/batch_struct.hpp +++ b/cuda/base/batch_struct.hpp @@ -54,7 +54,7 @@ namespace cuda { * while also shallow-casting to the required CUDA scalar type. * * A specialization is needed for every format of every kind of linear algebra - * object. These are intended to be called on the host. + * object. */ @@ -66,9 +66,9 @@ inline batch::multi_vector::uniform_batch> get_batch_struct(const batch::MultiVector* const op) { return {as_cuda_type(op->get_const_values()), op->get_num_batch_items(), - static_cast(op->get_common_size()[1]), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[1])}; + static_cast(op->get_common_size()[1]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1])}; } /** @@ -79,9 +79,9 @@ inline batch::multi_vector::uniform_batch> get_batch_struct(batch::MultiVector* const op) { return {as_cuda_type(op->get_values()), op->get_num_batch_items(), - static_cast(op->get_common_size()[1]), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[1])}; + static_cast(op->get_common_size()[1]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1])}; } diff --git a/cuda/matrix/batch_struct.hpp b/cuda/matrix/batch_struct.hpp index f191953f7b9..8daf06f416c 100644 --- a/cuda/matrix/batch_struct.hpp +++ b/cuda/matrix/batch_struct.hpp @@ -58,7 +58,7 @@ namespace cuda { * while also shallow-casting to the required CUDA scalar type. * * A specialization is needed for every format of every kind of linear algebra - * object. These are intended to be called on the host. + * object. */ @@ -66,13 +66,13 @@ namespace cuda { * Generates an immutable uniform batch struct from a batch of dense matrices. */ template -inline batch::matrix::batch_dense::uniform_batch> +inline batch::matrix::dense::uniform_batch> get_batch_struct(const batch::matrix::Dense* const op) { return {as_cuda_type(op->get_const_values()), op->get_num_batch_items(), - static_cast(op->get_common_size()[1]), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[1])}; + static_cast(op->get_common_size()[1]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1])}; } @@ -80,13 +80,13 @@ get_batch_struct(const batch::matrix::Dense* const op) * Generates a uniform batch struct from a batch of dense matrices. */ template -inline batch::matrix::batch_dense::uniform_batch> +inline batch::matrix::dense::uniform_batch> get_batch_struct(batch::matrix::Dense* const op) { return {as_cuda_type(op->get_values()), op->get_num_batch_items(), - static_cast(op->get_common_size()[1]), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[1])}; + static_cast(op->get_common_size()[1]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1])}; } diff --git a/dpcpp/base/batch_struct.hpp b/dpcpp/base/batch_struct.hpp index 9c752a94b4f..2a0c03f552e 100644 --- a/dpcpp/base/batch_struct.hpp +++ b/dpcpp/base/batch_struct.hpp @@ -53,7 +53,7 @@ namespace dpcpp { * while also shallow-casting to the required DPCPP scalar type. * * A specialization is needed for every format of every kind of linear algebra - * object. These are intended to be called on the host. + * object. */ @@ -65,9 +65,9 @@ inline batch::multi_vector::uniform_batch get_batch_struct( const batch::MultiVector* const op) { return {op->get_const_values(), op->get_num_batch_items(), - static_cast(op->get_common_size()[1]), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[1])}; + static_cast(op->get_common_size()[1]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1])}; } @@ -79,9 +79,9 @@ inline batch::multi_vector::uniform_batch get_batch_struct( batch::MultiVector* const op) { return {op->get_values(), op->get_num_batch_items(), - static_cast(op->get_common_size()[1]), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[1])}; + static_cast(op->get_common_size()[1]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1])}; } diff --git a/dpcpp/matrix/batch_dense_kernels.hpp.inc b/dpcpp/matrix/batch_dense_kernels.hpp.inc index dacd31feade..88ef5f54764 100644 --- a/dpcpp/matrix/batch_dense_kernels.hpp.inc +++ b/dpcpp/matrix/batch_dense_kernels.hpp.inc @@ -32,7 +32,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. template __dpct_inline__ void simple_apply_kernel( - const gko::batch::matrix::batch_dense::batch_item& mat, + const gko::batch::matrix::dense::batch_item& mat, const gko::batch::multi_vector::batch_item& b, const gko::batch::multi_vector::batch_item& x, sycl::nd_item<3>& item_ct1) @@ -66,7 +66,7 @@ __dpct_inline__ void simple_apply_kernel( template __dpct_inline__ void advanced_apply_kernel( const gko::batch::multi_vector::batch_item& alpha, - const gko::batch::matrix::batch_dense::batch_item& mat, + const gko::batch::matrix::dense::batch_item& mat, const gko::batch::multi_vector::batch_item& b, const gko::batch::multi_vector::batch_item& beta, const gko::batch::multi_vector::batch_item& x, diff --git a/dpcpp/matrix/batch_struct.hpp b/dpcpp/matrix/batch_struct.hpp index f561bf004c7..1955399d0d8 100644 --- a/dpcpp/matrix/batch_struct.hpp +++ b/dpcpp/matrix/batch_struct.hpp @@ -37,8 +37,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/matrix/batch_struct.hpp" -#include #include +#include #include "core/base/batch_struct.hpp" @@ -56,7 +56,7 @@ namespace dpcpp { * while also shallow-casting to the required DPCPP scalar type. * * A specialization is needed for every format of every kind of linear algebra - * object. These are intended to be called on the host. + * object. */ @@ -64,13 +64,13 @@ namespace dpcpp { * Generates an immutable uniform batch struct from a batch of dense matrices. */ template -inline batch::matrix::batch_dense::uniform_batch -get_batch_struct(const batch::matrix::Dense* const op) +inline batch::matrix::dense::uniform_batch get_batch_struct( + const batch::matrix::Dense* const op) { return {op->get_const_values(), op->get_num_batch_items(), - static_cast(op->get_common_size()[1]), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[1])}; + static_cast(op->get_common_size()[1]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1])}; } @@ -78,13 +78,13 @@ get_batch_struct(const batch::matrix::Dense* const op) * Generates a uniform batch struct from a batch of dense matrices. */ template -inline batch::matrix::batch_dense::uniform_batch get_batch_struct( +inline batch::matrix::dense::uniform_batch get_batch_struct( batch::matrix::Dense* const op) { return {op->get_values(), op->get_num_batch_items(), - static_cast(op->get_common_size()[1]), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[1])}; + static_cast(op->get_common_size()[1]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1])}; } diff --git a/hip/base/batch_struct.hip.hpp b/hip/base/batch_struct.hip.hpp index 442260e50e6..732c40662aa 100644 --- a/hip/base/batch_struct.hip.hpp +++ b/hip/base/batch_struct.hip.hpp @@ -54,7 +54,7 @@ namespace hip { * while also shallow-casting to the required Hip scalar type. * * A specialization is needed for every format of every kind of linear algebra - * object. These are intended to be called on the host. + * object. */ @@ -66,9 +66,9 @@ inline batch::multi_vector::uniform_batch> get_batch_struct(const batch::MultiVector* const op) { return {as_hip_type(op->get_const_values()), op->get_num_batch_items(), - static_cast(op->get_common_size()[1]), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[1])}; + static_cast(op->get_common_size()[1]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1])}; } /** @@ -79,9 +79,9 @@ inline batch::multi_vector::uniform_batch> get_batch_struct( batch::MultiVector* const op) { return {as_hip_type(op->get_values()), op->get_num_batch_items(), - static_cast(op->get_common_size()[1]), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[1])}; + static_cast(op->get_common_size()[1]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1])}; } diff --git a/hip/matrix/batch_struct.hip.hpp b/hip/matrix/batch_struct.hip.hpp index c0659420661..a22797a03d4 100644 --- a/hip/matrix/batch_struct.hip.hpp +++ b/hip/matrix/batch_struct.hip.hpp @@ -58,7 +58,7 @@ namespace hip { * while also shallow-casting to the required HIP scalar type. * * A specialization is needed for every format of every kind of linear algebra - * object. These are intended to be called on the host. + * object. */ @@ -66,13 +66,13 @@ namespace hip { * Generates an immutable uniform batch struct from a batch of dense matrices. */ template -inline batch::matrix::batch_dense::uniform_batch> +inline batch::matrix::dense::uniform_batch> get_batch_struct(const batch::matrix::Dense* const op) { return {as_hip_type(op->get_const_values()), op->get_num_batch_items(), - static_cast(op->get_common_size()[1]), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[1])}; + static_cast(op->get_common_size()[1]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1])}; } @@ -80,13 +80,13 @@ get_batch_struct(const batch::matrix::Dense* const op) * Generates a uniform batch struct from a batch of dense matrices. */ template -inline batch::matrix::batch_dense::uniform_batch> +inline batch::matrix::dense::uniform_batch> get_batch_struct(batch::matrix::Dense* const op) { return {as_hip_type(op->get_values()), op->get_num_batch_items(), - static_cast(op->get_common_size()[1]), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[1])}; + static_cast(op->get_common_size()[1]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1])}; } diff --git a/include/ginkgo/core/matrix/batch_dense.hpp b/include/ginkgo/core/matrix/batch_dense.hpp index d081e5d440e..932c52edfc5 100644 --- a/include/ginkgo/core/matrix/batch_dense.hpp +++ b/include/ginkgo/core/matrix/batch_dense.hpp @@ -63,7 +63,7 @@ namespace matrix { * each batch item are also stored consecutively in memory). * * @note Though the storage layout is the same as the multi-vector object, the - * class semantics and the operations it aims to provide is different. Hence it + * class semantics and the operations it aims to provide are different. Hence it * is recommended to create multi-vector objects if the user means to view the * data as a set of vectors. * @@ -123,13 +123,13 @@ class Dense final : public EnableBatchLinOp>, create_const_multi_vector_view() const; /** - * Creates a mutable view (of matrix::Dense type) of one item of the + * Creates a mutable view (of gko::matrix::Dense type) of one item of the * batch::matrix::Dense object. Does not perform any deep * copies, but only returns a view of the data. * * @param item_id The index of the batch item * - * @return a batch::matrix::Dense object with the data from the batch item + * @return a gko::matrix::Dense object with the data from the batch item * at the given index. */ std::unique_ptr create_view_for_item(size_type item_id); @@ -168,7 +168,7 @@ class Dense final : public EnableBatchLinOp>, * * @note the method has to be called on the same Executor the matrix is * stored at (e.g. trying to call this method on a GPU Dense object - * from the OMP results in a runtime error) + * from the OMP may result in incorrect behaviour) */ value_type& at(size_type batch_id, size_type row, size_type col) { @@ -197,7 +197,7 @@ class Dense final : public EnableBatchLinOp>, * * @note the method has to be called on the same Executor the matrix is * stored at (e.g. trying to call this method on a GPU Dense object - * from the OMP results in a runtime error) + * from the OMP may result in incorrect behaviour) */ ValueType& at(size_type batch_id, size_type idx) noexcept { @@ -268,7 +268,7 @@ class Dense final : public EnableBatchLinOp>, */ static std::unique_ptr> create_const( std::shared_ptr exec, const batch_dim<2>& sizes, - gko::detail::const_array_view&& values); + detail::const_array_view&& values); /** * Apply the matrix to a multi-vector. Represents the matrix vector @@ -343,7 +343,7 @@ class Dense final : public EnableBatchLinOp>, } /** - * Creates a Dense matrix with the same configuration as the callers + * Creates a Dense matrix with the same configuration as the caller's * matrix. * * @returns a Dense matrix with the same configuration as the caller. diff --git a/reference/base/batch_struct.hpp b/reference/base/batch_struct.hpp index ce7c7af5605..0a3dbf37493 100644 --- a/reference/base/batch_struct.hpp +++ b/reference/base/batch_struct.hpp @@ -67,9 +67,9 @@ inline batch::multi_vector::uniform_batch get_batch_struct( const batch::MultiVector* const op) { return {op->get_const_values(), op->get_num_batch_items(), - static_cast(op->get_common_size()[1]), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[1])}; + static_cast(op->get_common_size()[1]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1])}; } @@ -81,9 +81,9 @@ inline batch::multi_vector::uniform_batch get_batch_struct( batch::MultiVector* const op) { return {op->get_values(), op->get_num_batch_items(), - static_cast(op->get_common_size()[1]), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[1])}; + static_cast(op->get_common_size()[1]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1])}; } diff --git a/reference/matrix/batch_dense_kernels.hpp.inc b/reference/matrix/batch_dense_kernels.hpp.inc index 20e395af5b7..17144267af1 100644 --- a/reference/matrix/batch_dense_kernels.hpp.inc +++ b/reference/matrix/batch_dense_kernels.hpp.inc @@ -32,7 +32,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. template inline void simple_apply_kernel( - const gko::batch::matrix::batch_dense::batch_item& a, + const gko::batch::matrix::dense::batch_item& a, const gko::batch::multi_vector::batch_item& b, const gko::batch::multi_vector::batch_item& c) { @@ -57,7 +57,7 @@ inline void simple_apply_kernel( template inline void advanced_apply_kernel( const ValueType alpha, - const gko::batch::matrix::batch_dense::batch_item& a, + const gko::batch::matrix::dense::batch_item& a, const gko::batch::multi_vector::batch_item& b, const ValueType beta, const gko::batch::multi_vector::batch_item& c) diff --git a/reference/matrix/batch_struct.hpp b/reference/matrix/batch_struct.hpp index 47d48f1e927..dcd4ce3e71e 100644 --- a/reference/matrix/batch_struct.hpp +++ b/reference/matrix/batch_struct.hpp @@ -39,6 +39,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include #include @@ -67,13 +68,13 @@ namespace host { * Generates an immutable uniform batch struct from a batch of dense matrices. */ template -inline batch::matrix::batch_dense::uniform_batch -get_batch_struct(const batch::matrix::Dense* const op) +inline batch::matrix::dense::uniform_batch get_batch_struct( + const batch::matrix::Dense* const op) { return {op->get_const_values(), op->get_num_batch_items(), - static_cast(op->get_common_size()[1]), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[1])}; + static_cast(op->get_common_size()[1]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1])}; } @@ -81,13 +82,13 @@ get_batch_struct(const batch::matrix::Dense* const op) * Generates a uniform batch struct from a batch of dense matrices. */ template -inline batch::matrix::batch_dense::uniform_batch get_batch_struct( +inline batch::matrix::dense::uniform_batch get_batch_struct( batch::matrix::Dense* const op) { return {op->get_values(), op->get_num_batch_items(), - static_cast(op->get_common_size()[1]), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[1])}; + static_cast(op->get_common_size()[1]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1])}; } diff --git a/reference/test/base/batch_multi_vector_kernels.cpp b/reference/test/base/batch_multi_vector_kernels.cpp index e0c7643c8d7..a49168dc24e 100644 --- a/reference/test/base/batch_multi_vector_kernels.cpp +++ b/reference/test/base/batch_multi_vector_kernels.cpp @@ -140,9 +140,9 @@ TYPED_TEST(MultiVector, ScalesData) auto ualpha = gko::batch::unbatch>(alpha.get()); this->mtx_0->scale(alpha.get()); + this->mtx_00->scale(ualpha[0].get()); this->mtx_01->scale(ualpha[1].get()); - auto res = gko::batch::unbatch>(this->mtx_0.get()); GKO_ASSERT_MTX_NEAR(res[0].get(), this->mtx_00.get(), 0.); @@ -158,9 +158,9 @@ TYPED_TEST(MultiVector, ScalesDataWithScalar) auto ualpha = gko::batch::unbatch>(alpha.get()); this->mtx_1->scale(alpha.get()); + this->mtx_10->scale(ualpha[0].get()); this->mtx_11->scale(ualpha[1].get()); - auto res = gko::batch::unbatch>(this->mtx_1.get()); GKO_ASSERT_MTX_NEAR(res[0].get(), this->mtx_10.get(), 0.); @@ -196,9 +196,9 @@ TYPED_TEST(MultiVector, AddsScaled) auto ualpha = gko::batch::unbatch>(alpha.get()); this->mtx_1->add_scaled(alpha.get(), this->mtx_0.get()); + this->mtx_10->add_scaled(ualpha[0].get(), this->mtx_00.get()); this->mtx_11->add_scaled(ualpha[1].get(), this->mtx_01.get()); - auto res = gko::batch::unbatch>(this->mtx_1.get()); GKO_ASSERT_MTX_NEAR(res[0].get(), this->mtx_10.get(), 0.); @@ -214,9 +214,9 @@ TYPED_TEST(MultiVector, AddsScaledWithScalar) auto ualpha = gko::batch::unbatch>(alpha.get()); this->mtx_1->add_scaled(alpha.get(), this->mtx_0.get()); + this->mtx_10->add_scaled(ualpha[0].get(), this->mtx_00.get()); this->mtx_11->add_scaled(ualpha[1].get(), this->mtx_01.get()); - auto res = gko::batch::unbatch>(this->mtx_1.get()); GKO_ASSERT_MTX_NEAR(res[0].get(), this->mtx_10.get(), 0.); @@ -244,9 +244,9 @@ TYPED_TEST(MultiVector, ComputesDot) auto ures = gko::batch::unbatch>(result.get()); this->mtx_0->compute_dot(this->mtx_1.get(), result.get()); + this->mtx_00->compute_dot(this->mtx_10.get(), ures[0].get()); this->mtx_01->compute_dot(this->mtx_11.get(), ures[1].get()); - auto res = gko::batch::unbatch>(result.get()); GKO_ASSERT_MTX_NEAR(res[0].get(), ures[0].get(), 0.); GKO_ASSERT_MTX_NEAR(res[1].get(), ures[1].get(), 0.); @@ -256,6 +256,7 @@ TYPED_TEST(MultiVector, ComputesDot) TYPED_TEST(MultiVector, ComputeDotFailsOnWrongInputSize) { using Mtx = typename TestFixture::Mtx; + auto result = Mtx::create(this->exec, gko::batch_dim<2>(2, gko::dim<2>{1, 3})); @@ -285,9 +286,9 @@ TYPED_TEST(MultiVector, ComputesConjDot) auto ures = gko::batch::unbatch>(result.get()); this->mtx_0->compute_conj_dot(this->mtx_1.get(), result.get()); + this->mtx_00->compute_conj_dot(this->mtx_10.get(), ures[0].get()); this->mtx_01->compute_conj_dot(this->mtx_11.get(), ures[1].get()); - auto res = gko::batch::unbatch>(result.get()); GKO_ASSERT_MTX_NEAR(res[0].get(), ures[0].get(), 0.); GKO_ASSERT_MTX_NEAR(res[1].get(), ures[1].get(), 0.); @@ -297,6 +298,7 @@ TYPED_TEST(MultiVector, ComputesConjDot) TYPED_TEST(MultiVector, ComputeConjDotFailsOnWrongInputSize) { using Mtx = typename TestFixture::Mtx; + auto result = Mtx::create(this->exec, gko::batch_dim<2>(2, gko::dim<2>{1, 3})); diff --git a/reference/test/matrix/batch_dense_kernels.cpp b/reference/test/matrix/batch_dense_kernels.cpp index 97dbe3e77cb..a85453edee8 100644 --- a/reference/test/matrix/batch_dense_kernels.cpp +++ b/reference/test/matrix/batch_dense_kernels.cpp @@ -57,14 +57,12 @@ class Dense : public ::testing::Test { protected: using value_type = T; using size_type = gko::size_type; - using Mtx = gko::batch::matrix::Dense; - using MVec = gko::batch::MultiVector; + using BMtx = gko::batch::matrix::Dense; + using BMVec = gko::batch::MultiVector; using DenseMtx = gko::matrix::Dense; - using ComplexMtx = gko::to_complex; - using RealMtx = gko::remove_complex; Dense() : exec(gko::ReferenceExecutor::create()), - mtx_0(gko::batch::initialize( + mtx_0(gko::batch::initialize( {{I({1.0, -1.0, 1.5}), I({-2.0, 2.0, 3.0})}, {{1.0, -2.0, -0.5}, {1.0, -2.5, 4.0}}}, exec)), @@ -72,7 +70,7 @@ class Dense : public ::testing::Test { {I({1.0, -1.0, 1.5}), I({-2.0, 2.0, 3.0})}, exec)), mtx_01(gko::initialize( {I({1.0, -2.0, -0.5}), I({1.0, -2.5, 4.0})}, exec)), - b_0(gko::batch::initialize( + b_0(gko::batch::initialize( {{I({1.0, 0.0, 1.0}), I({2.0, 0.0, 1.0}), I({1.0, 0.0, 2.0})}, {I({-1.0, 1.0, 1.0}), I({1.0, -1.0, 1.0}), @@ -86,7 +84,7 @@ class Dense : public ::testing::Test { {I({-1.0, 1.0, 1.0}), I({1.0, -1.0, 1.0}), I({1.0, 0.0, 2.0})}, exec)), - x_0(gko::batch::initialize( + x_0(gko::batch::initialize( {{I({2.0, 0.0, 1.0}), I({2.0, 0.0, 2.0})}, {I({-2.0, 1.0, 1.0}), I({1.0, -1.0, -1.0})}}, exec)), @@ -97,13 +95,13 @@ class Dense : public ::testing::Test { {} std::shared_ptr exec; - std::unique_ptr mtx_0; + std::unique_ptr mtx_0; std::unique_ptr mtx_00; std::unique_ptr mtx_01; - std::unique_ptr b_0; + std::unique_ptr b_0; std::unique_ptr b_00; std::unique_ptr b_01; - std::unique_ptr x_0; + std::unique_ptr x_0; std::unique_ptr x_00; std::unique_ptr x_01; @@ -119,11 +117,10 @@ TYPED_TEST(Dense, AppliesToBatchMultiVector) using T = typename TestFixture::value_type; this->mtx_0->apply(this->b_0.get(), this->x_0.get()); + this->mtx_00->apply(this->b_00.get(), this->x_00.get()); this->mtx_01->apply(this->b_01.get(), this->x_01.get()); - auto res = gko::batch::unbatch>(this->x_0.get()); - GKO_ASSERT_MTX_NEAR(res[0].get(), this->x_00.get(), 0.); GKO_ASSERT_MTX_NEAR(res[1].get(), this->x_01.get(), 0.); } @@ -131,12 +128,12 @@ TYPED_TEST(Dense, AppliesToBatchMultiVector) TYPED_TEST(Dense, AppliesLinearCombinationWithSameAlphaToBatchMultiVector) { - using Mtx = typename TestFixture::Mtx; - using MVec = typename TestFixture::MVec; + using BMtx = typename TestFixture::BMtx; + using BMVec = typename TestFixture::BMVec; using DenseMtx = typename TestFixture::DenseMtx; using T = typename TestFixture::value_type; - auto alpha = gko::batch::initialize(2, {1.5}, this->exec); - auto beta = gko::batch::initialize(2, {-4.0}, this->exec); + auto alpha = gko::batch::initialize(2, {1.5}, this->exec); + auto beta = gko::batch::initialize(2, {-4.0}, this->exec); auto alpha0 = gko::initialize({1.5}, this->exec); auto alpha1 = gko::initialize({1.5}, this->exec); auto beta0 = gko::initialize({-4.0}, this->exec); @@ -144,13 +141,12 @@ TYPED_TEST(Dense, AppliesLinearCombinationWithSameAlphaToBatchMultiVector) this->mtx_0->apply(alpha.get(), this->b_0.get(), beta.get(), this->x_0.get()); + this->mtx_00->apply(alpha0.get(), this->b_00.get(), beta0.get(), this->x_00.get()); this->mtx_01->apply(alpha1.get(), this->b_01.get(), beta1.get(), this->x_01.get()); - auto res = gko::batch::unbatch>(this->x_0.get()); - GKO_ASSERT_MTX_NEAR(res[0].get(), this->x_00.get(), 0.); GKO_ASSERT_MTX_NEAR(res[1].get(), this->x_01.get(), 0.); } @@ -158,12 +154,12 @@ TYPED_TEST(Dense, AppliesLinearCombinationWithSameAlphaToBatchMultiVector) TYPED_TEST(Dense, AppliesLinearCombinationToBatchMultiVector) { - using Mtx = typename TestFixture::Mtx; - using MVec = typename TestFixture::MVec; + using BMtx = typename TestFixture::BMtx; + using BMVec = typename TestFixture::BMVec; using DenseMtx = typename TestFixture::DenseMtx; using T = typename TestFixture::value_type; - auto alpha = gko::batch::initialize({{1.5}, {-1.0}}, this->exec); - auto beta = gko::batch::initialize({{2.5}, {-4.0}}, this->exec); + auto alpha = gko::batch::initialize({{1.5}, {-1.0}}, this->exec); + auto beta = gko::batch::initialize({{2.5}, {-4.0}}, this->exec); auto alpha0 = gko::initialize({1.5}, this->exec); auto alpha1 = gko::initialize({-1.0}, this->exec); auto beta0 = gko::initialize({2.5}, this->exec); @@ -171,13 +167,12 @@ TYPED_TEST(Dense, AppliesLinearCombinationToBatchMultiVector) this->mtx_0->apply(alpha.get(), this->b_0.get(), beta.get(), this->x_0.get()); + this->mtx_00->apply(alpha0.get(), this->b_00.get(), beta0.get(), this->x_00.get()); this->mtx_01->apply(alpha1.get(), this->b_01.get(), beta1.get(), this->x_01.get()); - auto res = gko::batch::unbatch>(this->x_0.get()); - GKO_ASSERT_MTX_NEAR(res[0].get(), this->x_00.get(), 0.); GKO_ASSERT_MTX_NEAR(res[1].get(), this->x_01.get(), 0.); } @@ -185,8 +180,9 @@ TYPED_TEST(Dense, AppliesLinearCombinationToBatchMultiVector) TYPED_TEST(Dense, ApplyFailsOnWrongNumberOfResultCols) { - using MVec = typename TestFixture::MVec; - auto res = MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2}}); + using BMVec = typename TestFixture::BMVec; + + auto res = BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2}}); ASSERT_THROW(this->mtx_0->apply(this->b_0.get(), res.get()), gko::DimensionMismatch); @@ -195,8 +191,9 @@ TYPED_TEST(Dense, ApplyFailsOnWrongNumberOfResultCols) TYPED_TEST(Dense, ApplyFailsOnWrongNumberOfResultRows) { - using MVec = typename TestFixture::MVec; - auto res = MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{3}}); + using BMVec = typename TestFixture::BMVec; + + auto res = BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{3}}); ASSERT_THROW(this->mtx_0->apply(this->b_0.get(), res.get()), gko::DimensionMismatch); @@ -205,9 +202,10 @@ TYPED_TEST(Dense, ApplyFailsOnWrongNumberOfResultRows) TYPED_TEST(Dense, ApplyFailsOnWrongInnerDimension) { - using MVec = typename TestFixture::MVec; + using BMVec = typename TestFixture::BMVec; + auto res = - MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2, 3}}); + BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2, 3}}); ASSERT_THROW(this->mtx_0->apply(res.get(), this->x_0.get()), gko::DimensionMismatch); @@ -216,13 +214,13 @@ TYPED_TEST(Dense, ApplyFailsOnWrongInnerDimension) TYPED_TEST(Dense, AdvancedApplyFailsOnWrongInnerDimension) { - using MVec = typename TestFixture::MVec; + using BMVec = typename TestFixture::BMVec; auto res = - MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2, 3}}); + BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2, 3}}); auto alpha = - MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{1, 1}}); + BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{1, 1}}); auto beta = - MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{1, 1}}); + BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{1, 1}}); ASSERT_THROW( this->mtx_0->apply(alpha.get(), res.get(), beta.get(), this->x_0.get()), @@ -232,13 +230,13 @@ TYPED_TEST(Dense, AdvancedApplyFailsOnWrongInnerDimension) TYPED_TEST(Dense, AdvancedApplyFailsOnWrongAlphaDimension) { - using MVec = typename TestFixture::MVec; + using BMVec = typename TestFixture::BMVec; auto res = - MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{3, 3}}); + BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{3, 3}}); auto alpha = - MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2, 1}}); + BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2, 1}}); auto beta = - MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{1, 1}}); + BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{1, 1}}); ASSERT_THROW( this->mtx_0->apply(alpha.get(), res.get(), beta.get(), this->x_0.get()), diff --git a/test/matrix/batch_dense_kernels.cpp b/test/matrix/batch_dense_kernels.cpp index a73efcd8753..119a868be09 100644 --- a/test/matrix/batch_dense_kernels.cpp +++ b/test/matrix/batch_dense_kernels.cpp @@ -55,17 +55,17 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. class Dense : public CommonTestFixture { protected: - using Mtx = gko::batch::matrix::Dense; - using MVec = gko::batch::MultiVector; + using BMtx = gko::batch::matrix::Dense; + using BMVec = gko::batch::MultiVector; Dense() : rand_engine(15) {} - template - std::unique_ptr gen_mtx(const gko::size_type num_batch_items, - gko::size_type num_rows, - gko::size_type num_cols) + template + std::unique_ptr gen_mtx(const gko::size_type num_batch_items, + gko::size_type num_rows, + gko::size_type num_cols) { - return gko::test::generate_random_batch_matrix( + return gko::test::generate_random_batch_matrix( num_batch_items, num_rows, num_cols, std::uniform_int_distribution<>(num_cols, num_cols), std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); @@ -75,15 +75,15 @@ class Dense : public CommonTestFixture { { const int num_rows = 252; const int num_cols = 32; - x = gen_mtx(batch_size, num_rows, num_cols); - y = gen_mtx(batch_size, num_cols, num_vecs); - alpha = gen_mtx(batch_size, 1, 1); - beta = gen_mtx(batch_size, 1, 1); + x = gen_mtx(batch_size, num_rows, num_cols); + y = gen_mtx(batch_size, num_cols, num_vecs); + alpha = gen_mtx(batch_size, 1, 1); + beta = gen_mtx(batch_size, 1, 1); dx = gko::clone(exec, x); dy = gko::clone(exec, y); dalpha = gko::clone(exec, alpha); dbeta = gko::clone(exec, beta); - expected = MVec::create( + expected = BMVec::create( ref, gko::batch_dim<2>(batch_size, gko::dim<2>{num_rows, num_vecs})); expected->fill(gko::one()); @@ -93,16 +93,16 @@ class Dense : public CommonTestFixture { std::default_random_engine rand_engine; const size_t batch_size = 11; - std::unique_ptr x; - std::unique_ptr y; - std::unique_ptr alpha; - std::unique_ptr beta; - std::unique_ptr expected; - std::unique_ptr dresult; - std::unique_ptr dx; - std::unique_ptr dy; - std::unique_ptr dalpha; - std::unique_ptr dbeta; + std::unique_ptr x; + std::unique_ptr y; + std::unique_ptr alpha; + std::unique_ptr beta; + std::unique_ptr expected; + std::unique_ptr dresult; + std::unique_ptr dx; + std::unique_ptr dy; + std::unique_ptr dalpha; + std::unique_ptr dbeta; }; From f210ea939b8b8cbdeed2d69e65e54d0a6e06064d Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Mon, 9 Oct 2023 22:56:15 +0200 Subject: [PATCH 20/28] dpcpp Jacobi needs ranlux --- dpcpp/test/preconditioner/jacobi_kernels.dp.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpcpp/test/preconditioner/jacobi_kernels.dp.cpp b/dpcpp/test/preconditioner/jacobi_kernels.dp.cpp index b8082a2db32..aae15245357 100644 --- a/dpcpp/test/preconditioner/jacobi_kernels.dp.cpp +++ b/dpcpp/test/preconditioner/jacobi_kernels.dp.cpp @@ -90,7 +90,7 @@ class Jacobi : public ::testing::Test { gko::uint32 max_block_size, int min_nnz, int max_nnz, int num_rhs = 1, value_type accuracy = 0.1, bool skip_sorting = true) { - std::default_random_engine engine(42); + std::ranlux48 engine(42); const auto dim = *(end(block_pointers) - 1); if (condition_numbers.size() == 0) { mtx = gko::test::generate_random_matrix( From fb74b718b2ec2073d598588688b470480bd0e447 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Mon, 9 Oct 2023 23:12:59 +0200 Subject: [PATCH 21/28] Remove create_multivector_view --- core/matrix/batch_dense.cpp | 32 ---------------------- core/test/matrix/batch_dense.cpp | 7 ----- include/ginkgo/core/matrix/batch_dense.hpp | 20 ++------------ 3 files changed, 2 insertions(+), 57 deletions(-) diff --git a/core/matrix/batch_dense.cpp b/core/matrix/batch_dense.cpp index b948a2c3afc..da092a20229 100644 --- a/core/matrix/batch_dense.cpp +++ b/core/matrix/batch_dense.cpp @@ -64,38 +64,6 @@ GKO_REGISTER_OPERATION(advanced_apply, batch_dense::advanced_apply); } // namespace dense -template -std::unique_ptr> -Dense::create_multi_vector_view() -{ - auto exec = this->get_executor(); - auto num_batch_items = this->get_num_batch_items(); - auto num_rows = this->get_common_size()[0]; - auto stride = this->get_common_size()[1]; - auto mvec = MultiVector::create( - exec, this->get_size(), - make_array_view(exec, num_batch_items * num_rows * stride, - this->get_values())); - return mvec; -} - - -template -std::unique_ptr> -Dense::create_const_multi_vector_view() const -{ - auto exec = this->get_executor(); - auto num_batch_items = this->get_num_batch_items(); - auto num_rows = this->get_common_size()[0]; - auto stride = this->get_common_size()[1]; - auto mvec = MultiVector::create_const( - exec, this->get_size(), - make_const_array_view(exec, num_batch_items * num_rows * stride, - this->get_const_values())); - return mvec; -} - - template std::unique_ptr> Dense::create_view_for_item(size_type item_id) diff --git a/core/test/matrix/batch_dense.cpp b/core/test/matrix/batch_dense.cpp index 36fc3f2ee4a..316312bd68f 100644 --- a/core/test/matrix/batch_dense.cpp +++ b/core/test/matrix/batch_dense.cpp @@ -138,13 +138,6 @@ TYPED_TEST(Dense, CanCreateDenseItemView) } -TYPED_TEST(Dense, CanCreateMultiVectorView) -{ - GKO_ASSERT_BATCH_MTX_NEAR(this->mtx->create_multi_vector_view(), this->mvec, - 0.0); -} - - TYPED_TEST(Dense, CanBeCopied) { auto mtx_copy = gko::batch::matrix::Dense::create(this->exec); diff --git a/include/ginkgo/core/matrix/batch_dense.hpp b/include/ginkgo/core/matrix/batch_dense.hpp index 932c52edfc5..50f8fe39727 100644 --- a/include/ginkgo/core/matrix/batch_dense.hpp +++ b/include/ginkgo/core/matrix/batch_dense.hpp @@ -106,22 +106,6 @@ class Dense final : public EnableBatchLinOp>, void move_to(Dense>* result) override; - /** - * Creates a mutable view (of MultiVector type) of the data owned by the - * matrix::Dense object. Does not perform any deep copies, but only - * returns a view of the underlying data. - * - * @return a MultiVector object with a view of the data from the batch - * dense matrix. - */ - std::unique_ptr> create_multi_vector_view(); - - /** - * @copydoc create_const_multi_vector_view() - */ - std::unique_ptr> - create_const_multi_vector_view() const; - /** * Creates a mutable view (of gko::matrix::Dense type) of one item of the * batch::matrix::Dense object. Does not perform any deep @@ -234,8 +218,8 @@ class Dense final : public EnableBatchLinOp>, * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const value_type* get_const_values_for_item( - size_type batch_id) const noexcept + const value_type* get_const_values_for_item(size_type batch_id) const + noexcept { GKO_ASSERT(batch_id < this->get_num_batch_items()); return values_.get_const_data() + From 660ec7ca1eb1b6e813ec818e547f06895195b016 Mon Sep 17 00:00:00 2001 From: ginkgo-bot Date: Mon, 9 Oct 2023 21:50:28 +0000 Subject: [PATCH 22/28] Format files Co-authored-by: Pratik Nayak --- include/ginkgo/core/matrix/batch_dense.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/ginkgo/core/matrix/batch_dense.hpp b/include/ginkgo/core/matrix/batch_dense.hpp index 50f8fe39727..2a33a0a8df3 100644 --- a/include/ginkgo/core/matrix/batch_dense.hpp +++ b/include/ginkgo/core/matrix/batch_dense.hpp @@ -218,8 +218,8 @@ class Dense final : public EnableBatchLinOp>, * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const value_type* get_const_values_for_item(size_type batch_id) const - noexcept + const value_type* get_const_values_for_item( + size_type batch_id) const noexcept { GKO_ASSERT(batch_id < this->get_num_batch_items()); return values_.get_const_data() + From 76726e90bcab3378344021129a38075b270130a6 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Tue, 10 Oct 2023 00:07:15 +0200 Subject: [PATCH 23/28] const_array_view needs to be in gko:: MSVC compiler fails lookip in gko::detail if there exists a gko::x::detail namespace --- include/ginkgo/core/matrix/batch_dense.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ginkgo/core/matrix/batch_dense.hpp b/include/ginkgo/core/matrix/batch_dense.hpp index 2a33a0a8df3..89f12d69f62 100644 --- a/include/ginkgo/core/matrix/batch_dense.hpp +++ b/include/ginkgo/core/matrix/batch_dense.hpp @@ -252,7 +252,7 @@ class Dense final : public EnableBatchLinOp>, */ static std::unique_ptr> create_const( std::shared_ptr exec, const batch_dim<2>& sizes, - detail::const_array_view&& values); + gko::detail::const_array_view&& values); /** * Apply the matrix to a multi-vector. Represents the matrix vector From 09b757448223297ce568b278dedf0de3c9029a3e Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Tue, 10 Oct 2023 16:15:32 +0200 Subject: [PATCH 24/28] Review updates Co-authored-by: Yu-Hsiang Tsai Co-authored-by: Marcel Koch --- core/matrix/batch_dense.cpp | 14 +---- core/test/base/batch_dim.cpp | 10 ---- core/test/matrix/batch_dense.cpp | 16 +++-- cuda/base/batch_struct.hpp | 2 +- cuda/matrix/batch_dense_kernels.cu | 1 + cuda/matrix/batch_struct.hpp | 2 +- dpcpp/base/batch_struct.hpp | 2 +- dpcpp/matrix/batch_dense_kernels.dp.cpp | 58 +++++++++---------- dpcpp/matrix/batch_struct.hpp | 2 +- hip/base/batch_struct.hip.hpp | 2 +- hip/matrix/batch_dense_kernels.hip.cpp | 2 + hip/matrix/batch_struct.hip.hpp | 2 +- include/ginkgo/core/base/batch_dim.hpp | 12 ---- .../ginkgo/core/base/batch_multi_vector.hpp | 24 ++++++-- include/ginkgo/core/matrix/batch_dense.hpp | 36 +++++++----- reference/test/matrix/batch_dense_kernels.cpp | 26 --------- 16 files changed, 89 insertions(+), 122 deletions(-) diff --git a/core/matrix/batch_dense.cpp b/core/matrix/batch_dense.cpp index da092a20229..7675fcdde9c 100644 --- a/core/matrix/batch_dense.cpp +++ b/core/matrix/batch_dense.cpp @@ -100,19 +100,7 @@ template std::unique_ptr> Dense::create_with_config_of( ptr_param> other) { - // De-referencing `other` before calling the functions (instead of - // using operator `->`) is currently required to be compatible with - // CUDA 10.1. - // Otherwise, it results in a compile error. - return (*other).create_with_same_config(); -} - - -template -std::unique_ptr> Dense::create_with_same_config() - const -{ - return Dense::create(this->get_executor(), this->get_size()); + return Dense::create(other->get_executor(), other->get_size()); } diff --git a/core/test/base/batch_dim.cpp b/core/test/base/batch_dim.cpp index 7914eb4d15e..e8722530fba 100644 --- a/core/test/base/batch_dim.cpp +++ b/core/test/base/batch_dim.cpp @@ -85,16 +85,6 @@ TEST(BatchDim, NotEqualWorks) } -TEST(BatchDim, CanGetCumulativeOffsets) -{ - auto d = gko::batch_dim<2>(3, gko::dim<2>(4, 2)); - - ASSERT_EQ(d.get_cumulative_offset(0), 0); - ASSERT_EQ(d.get_cumulative_offset(1), 8); - ASSERT_EQ(d.get_cumulative_offset(2), 16); -} - - TEST(BatchDim, TransposesBatchDimensions) { ASSERT_EQ(gko::transpose(gko::batch_dim<2>(2, gko::dim<2>{4, 2})), diff --git a/core/test/matrix/batch_dense.cpp b/core/test/matrix/batch_dense.cpp index 316312bd68f..7bde0c708dc 100644 --- a/core/test/matrix/batch_dense.cpp +++ b/core/test/matrix/batch_dense.cpp @@ -289,7 +289,7 @@ TYPED_TEST(Dense, CanBeConstructedFromDenseMatricesByDuplication) gko::batch::create_from_item>( this->exec, 3, mat1.get()); - GKO_ASSERT_BATCH_MTX_NEAR(bat_m.get(), m.get(), 1e-14); + GKO_ASSERT_BATCH_MTX_NEAR(bat_m.get(), m.get(), 0); } @@ -316,7 +316,7 @@ TYPED_TEST(Dense, CanBeConstructedByDuplicatingDenseMatrices) auto m2 = gko::batch::duplicate>( this->exec, 3, m.get()); - GKO_ASSERT_BATCH_MTX_NEAR(m2.get(), m_ref.get(), 1e-14); + GKO_ASSERT_BATCH_MTX_NEAR(m2.get(), m_ref.get(), 0); } @@ -384,13 +384,21 @@ TYPED_TEST(Dense, CanBeDoubleListConstructed) EXPECT_EQ(m->at(0, 0), value_type{1.0}); EXPECT_EQ(m->at(0, 1), value_type{1.0}); EXPECT_EQ(m->at(0, 2), value_type{0.0}); - ASSERT_EQ(m->at(0, 3), value_type{2.0}); + EXPECT_EQ(m->at(0, 3), value_type{2.0}); EXPECT_EQ(m->at(0, 4), value_type{4.0}); + EXPECT_EQ(m->at(0, 5), value_type{3.0}); + EXPECT_EQ(m->at(0, 6), value_type{3.0}); + EXPECT_EQ(m->at(0, 7), value_type{6.0}); + EXPECT_EQ(m->at(0, 8), value_type{1.0}); EXPECT_EQ(m->at(1, 0), value_type{1.0}); EXPECT_EQ(m->at(1, 1), value_type{2.0}); EXPECT_EQ(m->at(1, 2), value_type{-1.0}); - ASSERT_EQ(m->at(1, 3), value_type{3.0}); + EXPECT_EQ(m->at(1, 3), value_type{3.0}); EXPECT_EQ(m->at(1, 4), value_type{4.0}); + EXPECT_EQ(m->at(1, 5), value_type{-2.0}); + EXPECT_EQ(m->at(1, 6), value_type{5.0}); + EXPECT_EQ(m->at(1, 7), value_type{6.0}); + EXPECT_EQ(m->at(1, 8), value_type{-3.0}); } diff --git a/cuda/base/batch_struct.hpp b/cuda/base/batch_struct.hpp index 12f34509275..14b300c9204 100644 --- a/cuda/base/batch_struct.hpp +++ b/cuda/base/batch_struct.hpp @@ -54,7 +54,7 @@ namespace cuda { * while also shallow-casting to the required CUDA scalar type. * * A specialization is needed for every format of every kind of linear algebra - * object. + * object. These are intended to be called on the host. */ diff --git a/cuda/matrix/batch_dense_kernels.cu b/cuda/matrix/batch_dense_kernels.cu index 4f1dbc8f4d4..47c478864cf 100644 --- a/cuda/matrix/batch_dense_kernels.cu +++ b/cuda/matrix/batch_dense_kernels.cu @@ -77,6 +77,7 @@ constexpr int sm_oversubscription = 4; #include "common/cuda_hip/matrix/batch_dense_kernel_launcher.hpp.inc" + // clang-format on diff --git a/cuda/matrix/batch_struct.hpp b/cuda/matrix/batch_struct.hpp index 8daf06f416c..2ae453b6e61 100644 --- a/cuda/matrix/batch_struct.hpp +++ b/cuda/matrix/batch_struct.hpp @@ -58,7 +58,7 @@ namespace cuda { * while also shallow-casting to the required CUDA scalar type. * * A specialization is needed for every format of every kind of linear algebra - * object. + * object. These are intended to be called on the host. */ diff --git a/dpcpp/base/batch_struct.hpp b/dpcpp/base/batch_struct.hpp index 2a0c03f552e..dc8301ecb2e 100644 --- a/dpcpp/base/batch_struct.hpp +++ b/dpcpp/base/batch_struct.hpp @@ -53,7 +53,7 @@ namespace dpcpp { * while also shallow-casting to the required DPCPP scalar type. * * A specialization is needed for every format of every kind of linear algebra - * object. + * object. These are intended to be called on the host. */ diff --git a/dpcpp/matrix/batch_dense_kernels.dp.cpp b/dpcpp/matrix/batch_dense_kernels.dp.cpp index 6aec3e57fc5..8fca47c27b8 100644 --- a/dpcpp/matrix/batch_dense_kernels.dp.cpp +++ b/dpcpp/matrix/batch_dense_kernels.dp.cpp @@ -98,19 +98,19 @@ void simple_apply(std::shared_ptr exec, } // Launch a kernel that has nbatches blocks, each block has max group size - (exec->get_queue())->submit([&](sycl::handler& cgh) { + exec->get_queue()->submit([&](sycl::handler& cgh) { cgh.parallel_for( - sycl_nd_range(grid, block), - [=](sycl::nd_item<3> item_ct1) - [[sycl::reqd_sub_group_size(config::warp_size)]] { - auto group = item_ct1.get_group(); - auto group_id = group.get_group_linear_id(); - const auto mat_b = - batch::matrix::extract_batch_item(mat_ub, group_id); - const auto b_b = batch::extract_batch_item(b_ub, group_id); - const auto x_b = batch::extract_batch_item(x_ub, group_id); - simple_apply_kernel(mat_b, b_b, x_b, item_ct1); - }); + sycl_nd_range(grid, block), [= + ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size( + config::warp_size)]] { + auto group = item_ct1.get_group(); + auto group_id = group.get_group_linear_id(); + const auto mat_b = + batch::matrix::extract_batch_item(mat_ub, group_id); + const auto b_b = batch::extract_batch_item(b_ub, group_id); + const auto x_b = batch::extract_batch_item(x_ub, group_id); + simple_apply_kernel(mat_b, b_b, x_b, item_ct1); + }); }); } @@ -145,24 +145,24 @@ void advanced_apply(std::shared_ptr exec, const dim3 grid(num_batch_items); // Launch a kernel that has nbatches blocks, each block has max group size - (exec->get_queue())->submit([&](sycl::handler& cgh) { + exec->get_queue()->submit([&](sycl::handler& cgh) { cgh.parallel_for( - sycl_nd_range(grid, block), - [=](sycl::nd_item<3> item_ct1) - [[sycl::reqd_sub_group_size(config::warp_size)]] { - auto group = item_ct1.get_group(); - auto group_id = group.get_group_linear_id(); - const auto mat_b = - batch::matrix::extract_batch_item(mat_ub, group_id); - const auto b_b = batch::extract_batch_item(b_ub, group_id); - const auto x_b = batch::extract_batch_item(x_ub, group_id); - const auto alpha_b = - batch::extract_batch_item(alpha_ub, group_id); - const auto beta_b = - batch::extract_batch_item(beta_ub, group_id); - advanced_apply_kernel(alpha_b, mat_b, b_b, beta_b, x_b, - item_ct1); - }); + sycl_nd_range(grid, block), [= + ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size( + config::warp_size)]] { + auto group = item_ct1.get_group(); + auto group_id = group.get_group_linear_id(); + const auto mat_b = + batch::matrix::extract_batch_item(mat_ub, group_id); + const auto b_b = batch::extract_batch_item(b_ub, group_id); + const auto x_b = batch::extract_batch_item(x_ub, group_id); + const auto alpha_b = + batch::extract_batch_item(alpha_ub, group_id); + const auto beta_b = + batch::extract_batch_item(beta_ub, group_id); + advanced_apply_kernel(alpha_b, mat_b, b_b, beta_b, x_b, + item_ct1); + }); }); } diff --git a/dpcpp/matrix/batch_struct.hpp b/dpcpp/matrix/batch_struct.hpp index 1955399d0d8..d452f78644f 100644 --- a/dpcpp/matrix/batch_struct.hpp +++ b/dpcpp/matrix/batch_struct.hpp @@ -56,7 +56,7 @@ namespace dpcpp { * while also shallow-casting to the required DPCPP scalar type. * * A specialization is needed for every format of every kind of linear algebra - * object. + * object. These are intended to be called on the host. */ diff --git a/hip/base/batch_struct.hip.hpp b/hip/base/batch_struct.hip.hpp index 732c40662aa..5747e202fb7 100644 --- a/hip/base/batch_struct.hip.hpp +++ b/hip/base/batch_struct.hip.hpp @@ -54,7 +54,7 @@ namespace hip { * while also shallow-casting to the required Hip scalar type. * * A specialization is needed for every format of every kind of linear algebra - * object. + * object. These are intended to be called on the host. */ diff --git a/hip/matrix/batch_dense_kernels.hip.cpp b/hip/matrix/batch_dense_kernels.hip.cpp index aa6d717438e..a0fdea446be 100644 --- a/hip/matrix/batch_dense_kernels.hip.cpp +++ b/hip/matrix/batch_dense_kernels.hip.cpp @@ -79,8 +79,10 @@ constexpr int sm_oversubscription = 4; #include "common/cuda_hip/matrix/batch_dense_kernel_launcher.hpp.inc" + // clang-format on + } // namespace batch_dense } // namespace hip } // namespace kernels diff --git a/hip/matrix/batch_struct.hip.hpp b/hip/matrix/batch_struct.hip.hpp index a22797a03d4..c1bd6441367 100644 --- a/hip/matrix/batch_struct.hip.hpp +++ b/hip/matrix/batch_struct.hip.hpp @@ -58,7 +58,7 @@ namespace hip { * while also shallow-casting to the required HIP scalar type. * * A specialization is needed for every format of every kind of linear algebra - * object. + * object. These are intended to be called on the host. */ diff --git a/include/ginkgo/core/base/batch_dim.hpp b/include/ginkgo/core/base/batch_dim.hpp index 3bda352fb9d..e0ade2c872f 100644 --- a/include/ginkgo/core/base/batch_dim.hpp +++ b/include/ginkgo/core/base/batch_dim.hpp @@ -74,18 +74,6 @@ struct batch_dim { return common_size_; } - /** - * Get the cumulative storage size offset - * - * @param batch_id the batch id - * - * @return the cumulative offset - */ - size_type get_cumulative_offset(size_type batch_id) const - { - return batch_id * common_size_[0] * common_size_[1]; - } - /** * Checks if two batch_dim objects are equal. * diff --git a/include/ginkgo/core/base/batch_multi_vector.hpp b/include/ginkgo/core/base/batch_multi_vector.hpp index 7830a4c6efb..61dffba3193 100644 --- a/include/ginkgo/core/base/batch_multi_vector.hpp +++ b/include/ginkgo/core/base/batch_multi_vector.hpp @@ -202,8 +202,7 @@ class MultiVector value_type* get_values_for_item(size_type batch_id) noexcept { GKO_ASSERT(batch_id < this->get_num_batch_items()); - return values_.get_data() + - this->get_size().get_cumulative_offset(batch_id); + return values_.get_data() + this->get_cumulative_offset(batch_id); } /** @@ -217,8 +216,7 @@ class MultiVector size_type batch_id) const noexcept { GKO_ASSERT(batch_id < this->get_num_batch_items()); - return values_.get_const_data() + - this->get_size().get_cumulative_offset(batch_id); + return values_.get_const_data() + this->get_cumulative_offset(batch_id); } /** @@ -233,6 +231,19 @@ class MultiVector return values_.get_num_elems(); } + /** + * Get the cumulative storage size offset + * + * @param batch_id the batch id + * + * @return the cumulative offset + */ + size_type get_cumulative_offset(size_type batch_id) const + { + return batch_id * this->get_common_size()[0] * + this->get_common_size()[1]; + } + /** * Returns a single element for a particular batch item. * @@ -375,7 +386,8 @@ class MultiVector private: inline size_type compute_num_elems(const batch_dim<2>& size) { - return size.get_cumulative_offset(size.get_num_batch_items()); + return size.get_num_batch_items() * size.get_common_size()[0] * + size.get_common_size()[1]; } protected: @@ -434,7 +446,7 @@ class MultiVector size_type linearize_index(size_type batch, size_type row, size_type col) const noexcept { - return batch_size_.get_cumulative_offset(batch) + + return this->get_cumulative_offset(batch) + row * batch_size_.get_common_size()[1] + col; } diff --git a/include/ginkgo/core/matrix/batch_dense.hpp b/include/ginkgo/core/matrix/batch_dense.hpp index 89f12d69f62..59ab92cd146 100644 --- a/include/ginkgo/core/matrix/batch_dense.hpp +++ b/include/ginkgo/core/matrix/batch_dense.hpp @@ -124,6 +124,19 @@ class Dense final : public EnableBatchLinOp>, std::unique_ptr create_const_view_for_item( size_type item_id) const; + /** + * Get the cumulative storage size offset + * + * @param batch_id the batch id + * + * @return the cumulative offset + */ + size_type get_cumulative_offset(size_type batch_id) const + { + return batch_id * this->get_common_size()[0] * + this->get_common_size()[1]; + } + /** * Returns a pointer to the array of values of the multi-vector * @@ -207,8 +220,7 @@ class Dense final : public EnableBatchLinOp>, value_type* get_values_for_item(size_type batch_id) noexcept { GKO_ASSERT(batch_id < this->get_num_batch_items()); - return values_.get_data() + - this->get_size().get_cumulative_offset(batch_id); + return values_.get_data() + this->get_cumulative_offset(batch_id); } /** @@ -222,8 +234,7 @@ class Dense final : public EnableBatchLinOp>, size_type batch_id) const noexcept { GKO_ASSERT(batch_id < this->get_num_batch_items()); - return values_.get_const_data() + - this->get_size().get_cumulative_offset(batch_id); + return values_.get_const_data() + this->get_cumulative_offset(batch_id); } /** @@ -269,8 +280,8 @@ class Dense final : public EnableBatchLinOp>, /** * Apply the matrix to a multi-vector with a linear combination of the given - * input vector. Represents the matrix vector multiplication, x = alpha* A * - * b + beta * x, where x and b are both multi-vectors. + * input vector. Represents the matrix vector multiplication, x = alpha * A + * * b + beta * x, where x and b are both multi-vectors. * * @param alpha the scalar to scale the matrix-vector product with * @param b the multi-vector to be applied to @@ -288,7 +299,8 @@ class Dense final : public EnableBatchLinOp>, private: inline size_type compute_num_elems(const batch_dim<2>& size) { - return size.get_cumulative_offset(size.get_num_batch_items()); + return size.get_num_batch_items() * size.get_common_size()[0] * + size.get_common_size()[1]; } protected: @@ -326,14 +338,6 @@ class Dense final : public EnableBatchLinOp>, GKO_ENSURE_IN_BOUNDS(num_elems, values_.get_num_elems() + 1); } - /** - * Creates a Dense matrix with the same configuration as the caller's - * matrix. - * - * @returns a Dense matrix with the same configuration as the caller. - */ - std::unique_ptr create_with_same_config() const; - void apply_impl(const MultiVector* b, MultiVector* x) const; @@ -345,7 +349,7 @@ class Dense final : public EnableBatchLinOp>, size_type linearize_index(size_type batch, size_type row, size_type col) const noexcept { - return this->get_size().get_cumulative_offset(batch) + + return this->get_cumulative_offset(batch) + row * this->get_size().get_common_size()[1] + col; } diff --git a/reference/test/matrix/batch_dense_kernels.cpp b/reference/test/matrix/batch_dense_kernels.cpp index a85453edee8..6a23374f7cb 100644 --- a/reference/test/matrix/batch_dense_kernels.cpp +++ b/reference/test/matrix/batch_dense_kernels.cpp @@ -126,32 +126,6 @@ TYPED_TEST(Dense, AppliesToBatchMultiVector) } -TYPED_TEST(Dense, AppliesLinearCombinationWithSameAlphaToBatchMultiVector) -{ - using BMtx = typename TestFixture::BMtx; - using BMVec = typename TestFixture::BMVec; - using DenseMtx = typename TestFixture::DenseMtx; - using T = typename TestFixture::value_type; - auto alpha = gko::batch::initialize(2, {1.5}, this->exec); - auto beta = gko::batch::initialize(2, {-4.0}, this->exec); - auto alpha0 = gko::initialize({1.5}, this->exec); - auto alpha1 = gko::initialize({1.5}, this->exec); - auto beta0 = gko::initialize({-4.0}, this->exec); - auto beta1 = gko::initialize({-4.0}, this->exec); - - this->mtx_0->apply(alpha.get(), this->b_0.get(), beta.get(), - this->x_0.get()); - - this->mtx_00->apply(alpha0.get(), this->b_00.get(), beta0.get(), - this->x_00.get()); - this->mtx_01->apply(alpha1.get(), this->b_01.get(), beta1.get(), - this->x_01.get()); - auto res = gko::batch::unbatch>(this->x_0.get()); - GKO_ASSERT_MTX_NEAR(res[0].get(), this->x_00.get(), 0.); - GKO_ASSERT_MTX_NEAR(res[1].get(), this->x_01.get(), 0.); -} - - TYPED_TEST(Dense, AppliesLinearCombinationToBatchMultiVector) { using BMtx = typename TestFixture::BMtx; From 94452e9f06d4dc0ec1cbbb8860a088a333779acc Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Tue, 10 Oct 2023 16:35:46 +0200 Subject: [PATCH 25/28] Move apply validation to BatchLinOp --- core/matrix/batch_dense.cpp | 16 ++------- include/ginkgo/core/base/batch_lin_op.hpp | 40 +++++++++++++++++++++++ 2 files changed, 42 insertions(+), 14 deletions(-) diff --git a/core/matrix/batch_dense.cpp b/core/matrix/batch_dense.cpp index 7675fcdde9c..758635cea7f 100644 --- a/core/matrix/batch_dense.cpp +++ b/core/matrix/batch_dense.cpp @@ -128,12 +128,7 @@ template void Dense::apply_impl(const MultiVector* b, MultiVector* x) const { - GKO_ASSERT_EQ(b->get_num_batch_items(), this->get_num_batch_items()); - GKO_ASSERT_EQ(this->get_num_batch_items(), x->get_num_batch_items()); - - GKO_ASSERT_CONFORMANT(this->get_common_size(), b->get_common_size()); - GKO_ASSERT_EQUAL_ROWS(this->get_common_size(), x->get_common_size()); - GKO_ASSERT_EQUAL_COLS(b->get_common_size(), x->get_common_size()); + this->validate_application_parameters(b, x); this->get_executor()->run(dense::make_simple_apply(this, b, x)); } @@ -144,14 +139,7 @@ void Dense::apply_impl(const MultiVector* alpha, const MultiVector* beta, MultiVector* x) const { - GKO_ASSERT_EQ(b->get_num_batch_items(), this->get_num_batch_items()); - GKO_ASSERT_EQ(this->get_num_batch_items(), x->get_num_batch_items()); - - GKO_ASSERT_CONFORMANT(this->get_common_size(), b->get_common_size()); - GKO_ASSERT_EQUAL_ROWS(this->get_common_size(), x->get_common_size()); - GKO_ASSERT_EQUAL_COLS(b->get_common_size(), x->get_common_size()); - GKO_ASSERT_EQUAL_DIMENSIONS(alpha->get_common_size(), gko::dim<2>(1, 1)); - GKO_ASSERT_EQUAL_DIMENSIONS(beta->get_common_size(), gko::dim<2>(1, 1)); + this->validate_application_parameters(alpha, b, beta, x); this->get_executor()->run( dense::make_advanced_apply(alpha, this, b, beta, x)); } diff --git a/include/ginkgo/core/base/batch_lin_op.hpp b/include/ginkgo/core/base/batch_lin_op.hpp index 78ce4f4a942..a0efb2ea324 100644 --- a/include/ginkgo/core/base/batch_lin_op.hpp +++ b/include/ginkgo/core/base/batch_lin_op.hpp @@ -40,6 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include #include #include @@ -110,6 +111,45 @@ class BatchLinOp : public EnableAbstractPolymorphicObject { */ const batch_dim<2>& get_size() const noexcept { return size_; } + /** + * Validates the sizes for the apply(b,x) operation in the + * concrete BatchLinOp. + * + */ + template + void validate_application_parameters(const MultiVector* b, + MultiVector* x) const + { + GKO_ASSERT_EQ(b->get_num_batch_items(), this->get_num_batch_items()); + GKO_ASSERT_EQ(this->get_num_batch_items(), x->get_num_batch_items()); + + GKO_ASSERT_CONFORMANT(this->get_common_size(), b->get_common_size()); + GKO_ASSERT_EQUAL_ROWS(this->get_common_size(), x->get_common_size()); + GKO_ASSERT_EQUAL_COLS(b->get_common_size(), x->get_common_size()); + } + + /** + * Validates the sizes for the apply(alpha, b , beta, x) operation in the + * concrete BatchLinOp. + * + */ + template + void validate_application_parameters(const MultiVector* alpha, + const MultiVector* b, + const MultiVector* beta, + MultiVector* x) const + { + GKO_ASSERT_EQ(b->get_num_batch_items(), this->get_num_batch_items()); + GKO_ASSERT_EQ(this->get_num_batch_items(), x->get_num_batch_items()); + + GKO_ASSERT_CONFORMANT(this->get_common_size(), b->get_common_size()); + GKO_ASSERT_EQUAL_ROWS(this->get_common_size(), x->get_common_size()); + GKO_ASSERT_EQUAL_COLS(b->get_common_size(), x->get_common_size()); + GKO_ASSERT_EQUAL_DIMENSIONS(alpha->get_common_size(), + gko::dim<2>(1, 1)); + GKO_ASSERT_EQUAL_DIMENSIONS(beta->get_common_size(), gko::dim<2>(1, 1)); + } + protected: /** * Sets the size of the batch operator. From 4a18c40e882677a2082c478f5d479c7ec6f09d5f Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Tue, 10 Oct 2023 16:38:25 +0200 Subject: [PATCH 26/28] Add to test_install --- test/test_install/test_install.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test/test_install/test_install.cpp b/test/test_install/test_install.cpp index d442647a985..325773f0b75 100644 --- a/test/test_install/test_install.cpp +++ b/test/test_install/test_install.cpp @@ -219,6 +219,13 @@ int main() auto test = batch_multi_vector_type::create(exec); } + // core/base/batch_dense.hpp + { + using type1 = float; + using batch_dense_type = gko::batch::Dense; + auto test = batch_dense_type::create(exec); + } + // core/base/combination.hpp { using type1 = int; From 927e8c892153a3ab33fecc40e92047648c6c1230 Mon Sep 17 00:00:00 2001 From: ginkgo-bot Date: Tue, 10 Oct 2023 14:46:12 +0000 Subject: [PATCH 27/28] Format files Co-authored-by: Pratik Nayak --- dpcpp/matrix/batch_dense_kernels.dp.cpp | 54 ++++++++++++------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/dpcpp/matrix/batch_dense_kernels.dp.cpp b/dpcpp/matrix/batch_dense_kernels.dp.cpp index 8fca47c27b8..a6fba2df8e3 100644 --- a/dpcpp/matrix/batch_dense_kernels.dp.cpp +++ b/dpcpp/matrix/batch_dense_kernels.dp.cpp @@ -100,17 +100,17 @@ void simple_apply(std::shared_ptr exec, // Launch a kernel that has nbatches blocks, each block has max group size exec->get_queue()->submit([&](sycl::handler& cgh) { cgh.parallel_for( - sycl_nd_range(grid, block), [= - ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size( - config::warp_size)]] { - auto group = item_ct1.get_group(); - auto group_id = group.get_group_linear_id(); - const auto mat_b = - batch::matrix::extract_batch_item(mat_ub, group_id); - const auto b_b = batch::extract_batch_item(b_ub, group_id); - const auto x_b = batch::extract_batch_item(x_ub, group_id); - simple_apply_kernel(mat_b, b_b, x_b, item_ct1); - }); + sycl_nd_range(grid, block), + [=](sycl::nd_item<3> item_ct1) + [[sycl::reqd_sub_group_size(config::warp_size)]] { + auto group = item_ct1.get_group(); + auto group_id = group.get_group_linear_id(); + const auto mat_b = + batch::matrix::extract_batch_item(mat_ub, group_id); + const auto b_b = batch::extract_batch_item(b_ub, group_id); + const auto x_b = batch::extract_batch_item(x_ub, group_id); + simple_apply_kernel(mat_b, b_b, x_b, item_ct1); + }); }); } @@ -147,22 +147,22 @@ void advanced_apply(std::shared_ptr exec, // Launch a kernel that has nbatches blocks, each block has max group size exec->get_queue()->submit([&](sycl::handler& cgh) { cgh.parallel_for( - sycl_nd_range(grid, block), [= - ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size( - config::warp_size)]] { - auto group = item_ct1.get_group(); - auto group_id = group.get_group_linear_id(); - const auto mat_b = - batch::matrix::extract_batch_item(mat_ub, group_id); - const auto b_b = batch::extract_batch_item(b_ub, group_id); - const auto x_b = batch::extract_batch_item(x_ub, group_id); - const auto alpha_b = - batch::extract_batch_item(alpha_ub, group_id); - const auto beta_b = - batch::extract_batch_item(beta_ub, group_id); - advanced_apply_kernel(alpha_b, mat_b, b_b, beta_b, x_b, - item_ct1); - }); + sycl_nd_range(grid, block), + [=](sycl::nd_item<3> item_ct1) + [[sycl::reqd_sub_group_size(config::warp_size)]] { + auto group = item_ct1.get_group(); + auto group_id = group.get_group_linear_id(); + const auto mat_b = + batch::matrix::extract_batch_item(mat_ub, group_id); + const auto b_b = batch::extract_batch_item(b_ub, group_id); + const auto x_b = batch::extract_batch_item(x_ub, group_id); + const auto alpha_b = + batch::extract_batch_item(alpha_ub, group_id); + const auto beta_b = + batch::extract_batch_item(beta_ub, group_id); + advanced_apply_kernel(alpha_b, mat_b, b_b, beta_b, x_b, + item_ct1); + }); }); } From 190a01034eac50110d364eba78927762eae5b7c7 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Tue, 10 Oct 2023 22:53:30 +0200 Subject: [PATCH 28/28] Review updates Co-authored-by: Terry Cojean --- core/matrix/batch_dense_kernels.hpp | 1 - core/test/matrix/batch_dense.cpp | 8 ++------ cuda/matrix/batch_dense_kernels.cu | 3 --- cuda/matrix/batch_struct.hpp | 3 --- dpcpp/matrix/batch_struct.hpp | 2 -- hip/matrix/batch_dense_kernels.hip.cpp | 3 --- hip/matrix/batch_struct.hip.hpp | 3 --- include/ginkgo/core/matrix/batch_dense.hpp | 3 +++ reference/matrix/batch_struct.hpp | 2 -- test/matrix/batch_dense_kernels.cpp | 16 ++++++++-------- test/test_install/test_install.cpp | 2 +- 11 files changed, 14 insertions(+), 32 deletions(-) diff --git a/core/matrix/batch_dense_kernels.hpp b/core/matrix/batch_dense_kernels.hpp index cb46b7291b8..ef59ff3e9cc 100644 --- a/core/matrix/batch_dense_kernels.hpp +++ b/core/matrix/batch_dense_kernels.hpp @@ -38,7 +38,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include #include diff --git a/core/test/matrix/batch_dense.cpp b/core/test/matrix/batch_dense.cpp index 7bde0c708dc..8e64c913a6a 100644 --- a/core/test/matrix/batch_dense.cpp +++ b/core/test/matrix/batch_dense.cpp @@ -256,7 +256,6 @@ TYPED_TEST(Dense, CanBeConstructedFromDenseMatrices) using value_type = typename TestFixture::value_type; using DenseMtx = typename TestFixture::DenseMtx; using size_type = gko::size_type; - auto mat1 = gko::initialize({{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, this->exec); auto mat2 = gko::initialize({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, @@ -275,16 +274,15 @@ TYPED_TEST(Dense, CanBeConstructedFromDenseMatricesByDuplication) using value_type = typename TestFixture::value_type; using DenseMtx = typename TestFixture::DenseMtx; using size_type = gko::size_type; - auto mat1 = gko::initialize( 4, {{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, this->exec); auto mat2 = gko::initialize({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, this->exec); - auto bat_m = gko::batch::create_from_item>( this->exec, std::vector{mat1.get(), mat1.get(), mat1.get()}); + auto m = gko::batch::create_from_item>( this->exec, 3, mat1.get()); @@ -298,12 +296,10 @@ TYPED_TEST(Dense, CanBeConstructedByDuplicatingDenseMatrices) using value_type = typename TestFixture::value_type; using DenseMtx = typename TestFixture::DenseMtx; using size_type = gko::size_type; - auto mat1 = gko::initialize({{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, this->exec); auto mat2 = gko::initialize({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, this->exec); - auto m = gko::batch::create_from_item>( this->exec, std::vector{mat1.get(), mat2.get()}); @@ -342,6 +338,7 @@ TYPED_TEST(Dense, CanBeUnbatchedIntoDenseMatrices) TYPED_TEST(Dense, CanBeListConstructed) { using value_type = typename TestFixture::value_type; + auto m = gko::batch::initialize>( {{1.0, 2.0}, {1.0, 3.0}}, this->exec); @@ -406,7 +403,6 @@ TYPED_TEST(Dense, CanBeReadFromMatrixData) { using value_type = typename TestFixture::value_type; using index_type = int; - auto vec_data = std::vector>{}; vec_data.emplace_back(gko::matrix_data( {2, 2}, {{0, 0, 1.0}, {0, 1, 3.0}, {1, 0, 0.0}, {1, 1, 5.0}})); diff --git a/cuda/matrix/batch_dense_kernels.cu b/cuda/matrix/batch_dense_kernels.cu index 47c478864cf..dd82e15b8cc 100644 --- a/cuda/matrix/batch_dense_kernels.cu +++ b/cuda/matrix/batch_dense_kernels.cu @@ -34,7 +34,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include #include @@ -44,8 +43,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/matrix/batch_struct.hpp" #include "cuda/base/batch_struct.hpp" #include "cuda/base/config.hpp" -#include "cuda/base/cublas_bindings.hpp" -#include "cuda/base/pointer_mode_guard.hpp" #include "cuda/base/thrust.cuh" #include "cuda/components/cooperative_groups.cuh" #include "cuda/components/reduction.cuh" diff --git a/cuda/matrix/batch_struct.hpp b/cuda/matrix/batch_struct.hpp index 2ae453b6e61..73712a7b81b 100644 --- a/cuda/matrix/batch_struct.hpp +++ b/cuda/matrix/batch_struct.hpp @@ -37,13 +37,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/matrix/batch_struct.hpp" -#include -#include #include #include "core/base/batch_struct.hpp" -#include "cuda/base/config.hpp" #include "cuda/base/types.hpp" diff --git a/dpcpp/matrix/batch_struct.hpp b/dpcpp/matrix/batch_struct.hpp index d452f78644f..b0393daf55d 100644 --- a/dpcpp/matrix/batch_struct.hpp +++ b/dpcpp/matrix/batch_struct.hpp @@ -37,12 +37,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/matrix/batch_struct.hpp" -#include #include #include "core/base/batch_struct.hpp" -#include "dpcpp/base/config.hpp" namespace gko { diff --git a/hip/matrix/batch_dense_kernels.hip.cpp b/hip/matrix/batch_dense_kernels.hip.cpp index a0fdea446be..eb3da83760a 100644 --- a/hip/matrix/batch_dense_kernels.hip.cpp +++ b/hip/matrix/batch_dense_kernels.hip.cpp @@ -35,7 +35,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include -#include #include @@ -46,8 +45,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/matrix/batch_struct.hpp" #include "hip/base/batch_struct.hip.hpp" #include "hip/base/config.hip.hpp" -#include "hip/base/hipblas_bindings.hip.hpp" -#include "hip/base/pointer_mode_guard.hip.hpp" #include "hip/base/thrust.hip.hpp" #include "hip/components/cooperative_groups.hip.hpp" #include "hip/components/reduction.hip.hpp" diff --git a/hip/matrix/batch_struct.hip.hpp b/hip/matrix/batch_struct.hip.hpp index c1bd6441367..4670cf0988b 100644 --- a/hip/matrix/batch_struct.hip.hpp +++ b/hip/matrix/batch_struct.hip.hpp @@ -37,13 +37,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/matrix/batch_struct.hpp" -#include -#include #include #include "core/base/batch_struct.hpp" -#include "hip/base/config.hip.hpp" #include "hip/base/types.hip.hpp" diff --git a/include/ginkgo/core/matrix/batch_dense.hpp b/include/ginkgo/core/matrix/batch_dense.hpp index 59ab92cd146..7f3ce5890e4 100644 --- a/include/ginkgo/core/matrix/batch_dense.hpp +++ b/include/ginkgo/core/matrix/batch_dense.hpp @@ -133,6 +133,7 @@ class Dense final : public EnableBatchLinOp>, */ size_type get_cumulative_offset(size_type batch_id) const { + GKO_ASSERT(batch_id < this->get_num_batch_items()); return batch_id * this->get_common_size()[0] * this->get_common_size()[1]; } @@ -198,6 +199,7 @@ class Dense final : public EnableBatchLinOp>, */ ValueType& at(size_type batch_id, size_type idx) noexcept { + GKO_ASSERT(batch_id < this->get_num_batch_items()); return values_.get_data()[linearize_index(batch_id, idx)]; } @@ -206,6 +208,7 @@ class Dense final : public EnableBatchLinOp>, */ ValueType at(size_type batch_id, size_type idx) const noexcept { + GKO_ASSERT(batch_id < this->get_num_batch_items()); return values_.get_const_data()[linearize_index(batch_id, idx)]; } diff --git a/reference/matrix/batch_struct.hpp b/reference/matrix/batch_struct.hpp index dcd4ce3e71e..483d7717718 100644 --- a/reference/matrix/batch_struct.hpp +++ b/reference/matrix/batch_struct.hpp @@ -37,8 +37,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/matrix/batch_struct.hpp" -#include -#include #include #include diff --git a/test/matrix/batch_dense_kernels.cpp b/test/matrix/batch_dense_kernels.cpp index 119a868be09..a243d51f3c1 100644 --- a/test/matrix/batch_dense_kernels.cpp +++ b/test/matrix/batch_dense_kernels.cpp @@ -75,11 +75,11 @@ class Dense : public CommonTestFixture { { const int num_rows = 252; const int num_cols = 32; - x = gen_mtx(batch_size, num_rows, num_cols); + mat = gen_mtx(batch_size, num_rows, num_cols); y = gen_mtx(batch_size, num_cols, num_vecs); alpha = gen_mtx(batch_size, 1, 1); beta = gen_mtx(batch_size, 1, 1); - dx = gko::clone(exec, x); + dmat = gko::clone(exec, mat); dy = gko::clone(exec, y); dalpha = gko::clone(exec, alpha); dbeta = gko::clone(exec, beta); @@ -93,13 +93,13 @@ class Dense : public CommonTestFixture { std::default_random_engine rand_engine; const size_t batch_size = 11; - std::unique_ptr x; + std::unique_ptr mat; std::unique_ptr y; std::unique_ptr alpha; std::unique_ptr beta; std::unique_ptr expected; std::unique_ptr dresult; - std::unique_ptr dx; + std::unique_ptr dmat; std::unique_ptr dy; std::unique_ptr dalpha; std::unique_ptr dbeta; @@ -110,8 +110,8 @@ TEST_F(Dense, SingleVectorApplyIsEquivalentToRef) { set_up_apply_data(1); - x->apply(y.get(), expected.get()); - dx->apply(dy.get(), dresult.get()); + mat->apply(y.get(), expected.get()); + dmat->apply(dy.get(), dresult.get()); GKO_ASSERT_BATCH_MTX_NEAR(dresult, expected, r::value); } @@ -121,8 +121,8 @@ TEST_F(Dense, SingleVectorAdvancedApplyIsEquivalentToRef) { set_up_apply_data(1); - x->apply(alpha.get(), y.get(), beta.get(), expected.get()); - dx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); + mat->apply(alpha.get(), y.get(), beta.get(), expected.get()); + dmat->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); GKO_ASSERT_BATCH_MTX_NEAR(dresult, expected, r::value); } diff --git a/test/test_install/test_install.cpp b/test/test_install/test_install.cpp index 325773f0b75..7e53ea8f165 100644 --- a/test/test_install/test_install.cpp +++ b/test/test_install/test_install.cpp @@ -222,7 +222,7 @@ int main() // core/base/batch_dense.hpp { using type1 = float; - using batch_dense_type = gko::batch::Dense; + using batch_dense_type = gko::batch::matrix::Dense; auto test = batch_dense_type::create(exec); }