diff --git a/examples/kokkos-based/CMakeLists.txt b/examples/kokkos-based/CMakeLists.txt index 4b6f2697..fd0c3e7e 100644 --- a/examples/kokkos-based/CMakeLists.txt +++ b/examples/kokkos-based/CMakeLists.txt @@ -2,5 +2,6 @@ linalg_add_example(add_kokkos) linalg_add_example(dot_kokkos) linalg_add_example(dotc_kokkos) +linalg_add_example(idx_abs_max_kokkos) linalg_add_example(simple_scale_kokkos) linalg_add_example(matrix_vector_product_kokkos) diff --git a/examples/kokkos-based/idx_abs_max_kokkos.cpp b/examples/kokkos-based/idx_abs_max_kokkos.cpp new file mode 100644 index 00000000..39a04c12 --- /dev/null +++ b/examples/kokkos-based/idx_abs_max_kokkos.cpp @@ -0,0 +1,42 @@ +#include +#include + +int main(int argc, char* argv[]) +{ + std::cout << "idx_abs_max example: calling kokkos-kernels" << std::endl; + + std::size_t N = 10; + Kokkos::initialize(argc,argv); + { + using value_type = double; + + Kokkos::View a_view("A",N); + value_type* a_ptr = a_view.data(); + + // Requires CTAD working, GCC 11.1 works but some others are buggy + // std::experimental::mdspan a(a_ptr,N); + using extents_type = std::experimental::extents; + std::experimental::mdspan a(a_ptr,N); + a(0) = 0.5; + a(1) = 0.2; + a(2) = 0.1; + a(3) = 0.4; + a(4) = -0.8; + a(5) = -1.7; + a(6) = -0.3; + a(7) = 0.5; + a(8) = -1.7; + a(9) = -0.9; + + namespace stdla = std::experimental::linalg; + + // This goes to the base implementation + const auto idx = stdla::idx_abs_max(std::execution::seq, a); + printf("Seq result = %i\n", idx); + + // This forwards to KokkosKernels (https://github.com/kokkos/kokkos-kernels + const auto idx_kk = stdla::idx_abs_max(KokkosKernelsSTD::kokkos_exec<>(), a); + printf("Kokkos result = %i\n", idx_kk); + } + Kokkos::finalize(); +} diff --git a/include/experimental/__p1673_bits/blas1_vector_idx_abs_max.hpp b/include/experimental/__p1673_bits/blas1_vector_idx_abs_max.hpp index bd347835..5e560f5b 100644 --- a/include/experimental/__p1673_bits/blas1_vector_idx_abs_max.hpp +++ b/include/experimental/__p1673_bits/blas1_vector_idx_abs_max.hpp @@ -48,11 +48,33 @@ namespace experimental { inline namespace __p1673_version_0 { namespace linalg { +// begin anonymous namespace +namespace { + +template +struct is_custom_idx_abs_max_avail : std::false_type {}; + +template +struct is_custom_idx_abs_max_avail< + Exec, v_t, + std::enable_if_t< + std::is_integral< + decltype(idx_abs_max(std::declval(), + std::declval() + ) + ) + >::value + > + > +{ + static constexpr bool value = !std::is_same::value; +}; + template::size_type ext0, class Layout, class Accessor> -extents<>::size_type idx_abs_max( +extents<>::size_type idx_abs_max_default_impl( std::experimental::mdspan, Layout, Accessor> v) { using std::abs; @@ -73,16 +95,38 @@ extents<>::size_type idx_abs_max( return maxInd; // FIXME check for NaN "never less than" stuff } +} // end anonymous namespace + template::size_type ext0, class Layout, class Accessor> extents<>::size_type idx_abs_max( - ExecutionPolicy&& /* exec */, + ExecutionPolicy&& exec, + std::experimental::mdspan, Layout, Accessor> v) +{ + constexpr bool use_custom = is_custom_idx_abs_max_avail< + decltype(execpolicy_mapper(exec)), decltype(v) + >::value; + + if constexpr(use_custom){ + using return_type = extents<>::size_type; + return return_type(idx_abs_max(execpolicy_mapper(exec), v)); + } + else{ + return idx_abs_max_default_impl(v); + } +} + +template::size_type ext0, + class Layout, + class Accessor> +extents<>::size_type idx_abs_max( std::experimental::mdspan, Layout, Accessor> v) { - return idx_abs_max(v); + return idx_abs_max(std::experimental::linalg::impl::default_exec_t(), v); } } // end namespace linalg diff --git a/include/experimental/__p1673_bits/linalg_execpolicy_mapper.hpp b/include/experimental/__p1673_bits/linalg_execpolicy_mapper.hpp index ad6f8e69..7c1df018 100644 --- a/include/experimental/__p1673_bits/linalg_execpolicy_mapper.hpp +++ b/include/experimental/__p1673_bits/linalg_execpolicy_mapper.hpp @@ -17,7 +17,7 @@ struct default_exec_t {}; } -#if defined(LINALG_ENABLE_KOKKOS) || defined(LINALG_ENABLE_KOKKOS_DEFAULT) +#if defined(LINALG_ENABLE_KOKKOS) && defined(LINALG_ENABLE_KOKKOS_DEFAULT) #include #endif diff --git a/tpl-implementations/include/experimental/__p1673_bits/kokkos-kernels/blas1_idx_abs_max_kk.hpp b/tpl-implementations/include/experimental/__p1673_bits/kokkos-kernels/blas1_idx_abs_max_kk.hpp new file mode 100644 index 00000000..a191b469 --- /dev/null +++ b/tpl-implementations/include/experimental/__p1673_bits/kokkos-kernels/blas1_idx_abs_max_kk.hpp @@ -0,0 +1,24 @@ + +#ifndef LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL_P1673_BITS_KOKKOSKERNELS_IDX_ABS_MAX_HPP_ +#define LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL_P1673_BITS_KOKKOSKERNELS_IDX_ABS_MAX_HPP_ + +#include + +namespace KokkosKernelsSTD { + +template::size_type ext0, + class Layout, + class Accessor> +auto idx_abs_max(kokkos_exec, + std::experimental::mdspan, Layout, Accessor> v) +{ + // note that -1 here, this is related to: + // https://github.com/kokkos/stdBLAS/issues/114 + + return KokkosBlas::iamax(Impl::mdspan_to_view(v))-1; +} + +} +#endif diff --git a/tpl-implementations/include/experimental/linalg_kokkoskernels b/tpl-implementations/include/experimental/linalg_kokkoskernels index 502e7dc3..298310b3 100644 --- a/tpl-implementations/include/experimental/linalg_kokkoskernels +++ b/tpl-implementations/include/experimental/linalg_kokkoskernels @@ -5,4 +5,5 @@ #include "__p1673_bits/kokkos-kernels/blas1_dot_kk.hpp" #include "__p1673_bits/kokkos-kernels/blas1_add_kk.hpp" #include "__p1673_bits/kokkos-kernels/blas1_scale_kk.hpp" +#include "__p1673_bits/kokkos-kernels/blas1_idx_abs_max_kk.hpp" #include "__p1673_bits/kokkos-kernels/blas2_matrix_vector_product_kk.hpp"