Skip to content

Commit

Permalink
add linear algebra stack (PETSc, Trilinos) (#170)
Browse files Browse the repository at this point in the history
  • Loading branch information
finkandreas authored Dec 3, 2024
1 parent 3c2673e commit bdf69d7
Show file tree
Hide file tree
Showing 47 changed files with 5,004 additions and 1 deletion.
24 changes: 23 additions & 1 deletion config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ clusters:
uarch: 'gh200'
partition: 'normal'
variables:
#SLURM_RESERVATION: 'icon'
SLURM_RESERVATION: 'icon'
F7T_URL: "https://api.cscs.ch/hpc/firecrest/v1"
runner: f7t
uenvs:
Expand Down Expand Up @@ -173,6 +173,28 @@ uenvs:
todi: [gh200]
santis: [gh200]
develop: False
linalg:
"24.11":
recipes:
zen2: "24.11/mc"
zen3: "24.11/mc"
gh200: "24.11/gh200"
deploy:
daint: [gh200]
eiger: [zen2]
todi: [gh200]
develop: False
linalg-complex:
"24.11":
recipes:
zen2: "24.11/mc"
zen3: "24.11/mc"
gh200: "24.11/gh200"
deploy:
daint: [gh200]
eiger: [zen2]
todi: [gh200]
develop: False
linaro-forge:
"24.1":
recipes:
Expand Down
5 changes: 5 additions & 0 deletions recipes/linalg-complex/24.11/gh200/compilers.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
bootstrap:
spec: [email protected]
gcc:
specs:
- gcc@13
7 changes: 7 additions & 0 deletions recipes/linalg-complex/24.11/gh200/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
name: linalg-complex
spack:
commit: releases/v0.23
repo: https://github.com/spack/spack.git
store: /user-environment
description: Linear algebra libraries, PETSc (complex), Trilinos

61 changes: 61 additions & 0 deletions recipes/linalg-complex/24.11/gh200/environments.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
gcc-env:
compiler:
- toolchain: gcc
spec: gcc
mpi:
spec: [email protected]
gpu: cuda
unify: true
specs:
- arpack-ng
- boost +atomic +chrono +container +context +coroutine +date_time +filesystem +iostreams +json +log +mpi +multithreaded +program_options +python +random +regex +serialization +shared +system +test +thread +timer +url
- cmake
- eigen
- fftw
- fmt
- gsl
- hdf5 +fortran +hl
- hwloc
- kokkos +aggressive_vectorization ~alloc_async +cuda_constexpr +cuda_lambda ~cuda_relocatable_device_code ~cuda_uvm cxxstd=17 +hwloc +openmp +pic +serial +shared ~tuning +wrapper
- kokkos-kernels +blas +cublas +cusparse +cusolver +execspace_cuda +execspace_openmp +execspace_serial +lapack +memspace_cudaspace +openmp scalars=float,double,complex_float,complex_double +serial +shared +superlu
- kokkos-tools +mpi +papi
- metis
- mumps +parmetis +ptscotch
- netcdf-c
- netlib-scalapack
- lua
- libtree
- lz4
- meson
- nco
- ninja
- openblas threads=openmp
- [email protected]
- p4est +mpi
- papi ~cuda
- parmetis
- petsc +batch +complex +hwloc ~hypre +kokkos +libyaml +metis +mumps +openmp +ptscotch +suite-sparse
- [email protected]
- slepc
- suite-sparse ~cuda
- superlu
- superlu-dist
- swig
- trilinos@16 +adios2 ~amesos +amesos2 +belos +boost +complex cxxstd=17 +cuda_constexpr ~epetra ~epetraext +explicit_template_instantiation +hdf5 ~hypre ~ifpack +ifpack2 +intrepid2 +kokkos ~ml +mpi +muelu +mumps +nox +openmp +sacado +stk +stratimikos +suite-sparse +tpetra +zoltan2
- zlib-ng
# add GPU-specific packages here, for easier comparison with mc version
- nccl
- nccl-tests
- [email protected]
- aws-ofi-nccl
variants:
- +mpi
- +cuda
- cuda_arch=90
views:
default:
link: roots
uenv:
add_compilers: true
prefix_paths:
LD_LIBRARY_PATH: [lib, lib64]
23 changes: 23 additions & 0 deletions recipes/linalg-complex/24.11/gh200/modules.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
modules:
# Paths to check when creating modules for all module sets
prefix_inspections:
bin:
- PATH
lib:
- LD_LIBRARY_PATH
lib64:
- LD_LIBRARY_PATH

default:
arch_folder: false
# Where to install modules
roots:
tcl: /user-environment/modules
tcl:
all:
autoload: none
hash_length: 0
exclude_implicits: true
exclude: ['%[email protected]', 'gcc %[email protected]']
projections:
all: '{name}/{version}'
1 change: 1 addition & 0 deletions recipes/linalg-complex/24.11/gh200/repo
5 changes: 5 additions & 0 deletions recipes/linalg-complex/24.11/mc/compilers.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
bootstrap:
spec: gcc@11
gcc:
specs:
- gcc@13
7 changes: 7 additions & 0 deletions recipes/linalg-complex/24.11/mc/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
name: linalg-complex
spack:
commit: releases/v0.23
repo: https://github.com/spack/spack.git
store: /user-environment
description: Linear algebra libraries, PETSc (complex), Trilinos

54 changes: 54 additions & 0 deletions recipes/linalg-complex/24.11/mc/environments.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
gcc-env:
compiler:
- toolchain: gcc
spec: gcc@13
mpi:
spec: [email protected]
gpu: Null
unify: true
specs:
- arpack-ng
- boost +atomic +chrono +container +context +coroutine +date_time +filesystem +iostreams +json +log +mpi +multithreaded +program_options +python +random +regex +serialization +shared +system +test +thread +timer +url
- cmake
- eigen
- fftw
- fmt
- gsl
- hdf5 +fortran +hl
- hwloc
- hypre +superlu-dist
- kokkos +aggressive_vectorization cxxstd=17 +hwloc +openmp +pic +serial +shared ~tuning
- kokkos-kernels +blas +execspace_openmp +execspace_serial +lapack +openmp scalars=float,double,complex_float,complex_double +serial +shared +superlu
- kokkos-tools +mpi +papi
- metis
- mumps +parmetis +ptscotch
- netcdf-c
- netlib-scalapack
- lua
- libtree
- lz4
- meson
- nco
- ninja
- openblas threads=openmp
- [email protected]
- p4est +mpi
- parmetis
- petsc +batch +complex +hwloc +hypre +kokkos +libyaml +metis +mumps +openmp +ptscotch +suite-sparse
- [email protected]
- slepc
- suite-sparse
- superlu
- superlu-dist
- swig
- trilinos@16 +adios2 ~amesos +amesos2 +belos +boost +complex cxxstd=17 ~epetra ~epetraext +explicit_template_instantiation +hdf5 +hypre ~ifpack +ifpack2 +intrepid2 +kokkos ~ml +mpi +muelu +mumps +nox +openmp +sacado +stk +stratimikos +suite-sparse +tpetra +zoltan2
- zlib-ng
variants:
- +mpi
views:
default:
link: roots
uenv:
add_compilers: true
prefix_paths:
LD_LIBRARY_PATH: [lib, lib64]
23 changes: 23 additions & 0 deletions recipes/linalg-complex/24.11/mc/modules.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
modules:
# Paths to check when creating modules for all module sets
prefix_inspections:
bin:
- PATH
lib:
- LD_LIBRARY_PATH
lib64:
- LD_LIBRARY_PATH

default:
arch_folder: false
# Where to install modules
roots:
tcl: /user-environment/modules
tcl:
all:
autoload: none
hash_length: 0
exclude_implicits: true
exclude: ['%[email protected]', 'gcc %[email protected]']
projections:
all: '{name}/{version}'
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
diff --git a/core/src/HPX/Kokkos_HPX_Task.hpp b/core/src/HPX/Kokkos_HPX_Task.hpp
index 7bb3ca5d0..ff50fdc5f 100644
--- a/core/src/HPX/Kokkos_HPX_Task.hpp
+++ b/core/src/HPX/Kokkos_HPX_Task.hpp
@@ -216,7 +216,7 @@ class TaskQueueSpecializationConstrained<
task_queue.scheduler = &scheduler;
Kokkos::Impl::dispatch_execute_task(&task_queue,
Kokkos::Experimental::HPX());
- Kokkos::Experimental::HPX().fence()"Kokkos::Impl::TaskQueueSpecializationConstrained::execute: fence after task execution";
+ Kokkos::Experimental::HPX().fence("Kokkos::Impl::TaskQueueSpecializationConstrained::execute: fence after task execution");
}

// Must provide task queue execution function
diff --git a/core/src/Kokkos_HPX.hpp b/core/src/Kokkos_HPX.hpp
index 236211864..3e8522e94 100644
--- a/core/src/Kokkos_HPX.hpp
+++ b/core/src/Kokkos_HPX.hpp
@@ -282,11 +282,11 @@ class HPX {
m_mode = other.m_mode;
m_independent_instance_data = other.m_independent_instance_data;
m_buffer = m_mode == instance_mode::independent
- ? m_independent_instance_data->m_buffer
- : m_global_instance_data.m_buffer;
- m_future = m_mode == instance_mode::independent
- ? m_independent_instance_data->m_future
- : m_global_instance_data.m_future;
+ ? m_independent_instance_data->m_buffer
+ : m_global_instance_data.m_buffer;
+ m_future = m_mode == instance_mode::independent
+ ? m_independent_instance_data->m_future
+ : m_global_instance_data.m_future;
return *this;
}
#else
@@ -322,25 +322,36 @@ class HPX {
"Fence");
}
void impl_fence_instance(const std::string &name) const {
- Kokkos::Tools::Experimental::Impl::profile_fence_event(name, *this, [&]() {
- if (hpx::threads::get_self_ptr() == nullptr) {
- hpx::threads::run_as_hpx_thread([this]() { impl_get_future().wait(); });
- } else {
- impl_get_future().wait();
- }
- });
+ Kokkos::Tools::Experimental::Impl::profile_fence_event<
+ Kokkos::Experimental::HPX>(
+ name,
+ Kokkos::Tools::Experimental::Impl::DirectFenceIDHandle{
+ impl_instance_id()},
+ [&]() {
+ if (hpx::threads::get_self_ptr() == nullptr) {
+ hpx::threads::run_as_hpx_thread(
+ [this]() { impl_get_future().wait(); });
+ } else {
+ impl_get_future().wait();
+ }
+ });
}

void impl_fence_all_instances() const {
- impl_fence_instance(
+ impl_fence_all_instances(
"Kokkos::Experimental::HPX::impl_fence_all_instances: Unnamed Global "
"HPX Fence");
}
- void impl_fence_all_instances(const std::string &namename) const {
- Kokkos::Tools::Experimental::Impl::profile_fence_event(name, *this, [&]() {
- hpx::util::yield_while(
- []() { return m_active_parallel_region_count.load() != 0; });
- });
+ void impl_fence_all_instances(const std::string &name) const {
+ Kokkos::Tools::Experimental::Impl::profile_fence_event<
+ Kokkos::Experimental::HPX>(
+ name,
+ Kokkos::Tools::Experimental::SpecialSynchronizationCases::
+ GlobalDeviceSynchronization,
+ [&]() {
+ hpx::util::yield_while(
+ []() { return m_active_parallel_region_count.load() != 0; });
+ });
}
#endif

Loading

0 comments on commit bdf69d7

Please sign in to comment.