Skip to content

Commit

Permalink
mch-v6 software stack (#87)
Browse files Browse the repository at this point in the history
Add mch-v6 software stack.
  • Loading branch information
bcumming authored Apr 25, 2024
1 parent 68fa217 commit 70b47c7
Show file tree
Hide file tree
Showing 21 changed files with 1,840 additions and 25 deletions.
4 changes: 2 additions & 2 deletions recipes/mch/v6/compilers.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@ gcc:
specs:
- [email protected]
llvm:
requires: gcc@11
requires: gcc@11.3
specs:
- nvhpc@23.3
- nvhpc@24.1
4 changes: 2 additions & 2 deletions recipes/mch/v6/config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name: mch
store: /user-environment
store: /mch-environment/v6
spack:
commit: releases/v0.20
commit: releases/v0.19
repo: https://github.com/spack/spack.git
description: The compiler toolchains, libraries and tools required for the MCH production software stack.
17 changes: 8 additions & 9 deletions recipes/mch/v6/environments.yaml
Original file line number Diff line number Diff line change
@@ -1,17 +1,13 @@
tools:
compiler:
- toolchain: gcc
spec: gcc@11.3
spec: gcc
unify: when_possible
specs:
- ccache
- [email protected]
- [email protected]
- [email protected]
- libtree
- meson
- ninja
- tmux
- tree
- [email protected]
- [email protected]
Expand All @@ -23,12 +19,12 @@ tools:
prgenv-nvidia:
compiler:
- toolchain: gcc
spec: gcc@11.3
spec: gcc
- toolchain: llvm
spec: nvhpc
unify: when_possible
mpi:
spec: [email protected].25
spec: [email protected].26%nvhpc
gpu: cuda
packages:
- perl
Expand Down Expand Up @@ -72,17 +68,19 @@ prgenv-nvidia:
views:
prgenv-icon:
exclude: ['[email protected]']
link: roots
prgenv-gcc:
compiler:
- toolchain: gcc
spec: gcc@11.3
spec: gcc
unify: when_possible
mpi:
spec: [email protected].25
spec: [email protected].26
gpu: cuda
packages:
- perl
- curl
- libx11
specs:
- [email protected]
- [email protected]
Expand All @@ -108,3 +106,4 @@ prgenv-gcc:
- netcdf-c+mpi
- netcdf-fortran ^netcdf-c+mpi
- [email protected]
link: roots
2 changes: 1 addition & 1 deletion recipes/mch/v6/modules.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ modules:
all: '{name}/{version}-{compiler.name}'
nvhpc: '{name}/{version}'
gcc: '{name}/{version}'
cray-mpich: 'cray-mpich-binary/{version}'
cray-mpich: 'cray-mpich-{compiler.name}/{version}'
python: '{name}/{version}'
hdf5%gcc~mpi: '{name}/{version}-serial-{compiler.name}'
netcdf-c~mpi: '{name}/{version}-serial-{compiler.name}'
Expand Down
4 changes: 4 additions & 0 deletions recipes/mch/v6/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
This software stack uses spack v0.19, which requires a specially patched version of stackinator.

* use the `mch-v6-v0.19` branch of stackinator.
* use a separate build cache, because v0.19 caches are not compatible with v0.20 or later
66 changes: 66 additions & 0 deletions recipes/mch/v6/repo/packages/aws-ofi-nccl/package.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# Copyright 2013-2024 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)

from spack.package import *


class AwsOfiNccl(AutotoolsPackage):
"""AWS OFI NCCL is a plug-in which enables EC2 developers to use
libfabric as a network provider while running NVIDIA's NCCL based
applications."""

homepage = "https://github.com/aws/aws-ofi-nccl"
git = "https://github.com/aws/aws-ofi-nccl.git"
url = "https://github.com/aws/aws-ofi-nccl.git"

#maintainers("bvanessen")

version("master", branch="master")
version("v1.7.4",
sha256="cca529da330d3155ae1502c21365d62159254bca6bfc0482afd35e7eaa927090",
url="https://github.com/aws/aws-ofi-nccl/releases/download/v1.7.4-aws/aws-ofi-nccl-1.7.4-aws.tar.gz",
)

variant("trace", default=False, description="Enable printing trace messages")
variant("tests", default=False, description="Build tests")

depends_on("libfabric")
depends_on("cuda")
depends_on("nccl")
depends_on("mpi")
depends_on("hwloc")
depends_on("autoconf", type="build")
depends_on("automake", type="build")
depends_on("libtool", type="build")

# To enable this plug-in to work with NCCL add it to the LD_LIBRARY_PATH
def setup_run_environment(self, env):
aws_ofi_nccl_home = self.spec.prefix
env.append_path("LD_LIBRARY_PATH", aws_ofi_nccl_home.lib)

# To enable this plug-in to work with NCCL add it to the LD_LIBRARY_PATH
def setup_dependent_run_environment(self, env, dependent_spec):
aws_ofi_nccl_home = self.spec["aws-ofi-nccl"].prefix
env.append_path("LD_LIBRARY_PATH", aws_ofi_nccl_home.lib)

def configure_args(self):
spec = self.spec
args = []

# Always set configure's external paths to use the Spack
# provided dependencies
args.extend(
[
"--with-libfabric={0}".format(spec["libfabric"].prefix),
"--with-cuda={0}".format(spec["cuda"].prefix),
"--with-nccl={0}".format(spec["nccl"].prefix),
"--with-mpi={0}".format(spec["mpi"].prefix),
]
)

args.extend(self.enable_or_disable("trace"))
args.extend(self.enable_or_disable("tests"))

return args
123 changes: 123 additions & 0 deletions recipes/mch/v6/repo/packages/cray-gtl/package.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
# Copyright 2013-2023 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)

import os
import platform

import spack.compilers
from spack.package import *

_versions = {
"8.1.29": {
"Linux-aarch64": "321bc3bc3c17f38d199e0ccae87cc931f69ca58238385f1e6a6165a2fbe94a71",
},
"8.1.28": {
"Linux-aarch64": "0bb881cba502b199dadce7875bba62e7403e1c55abc6669c76a7cba7c05fa5ad",
"Linux-x86_64": "2e82c618648e79bdc4b8bf9394be8fd59c34ccd77f172afd11fce38beca1ecab",
},
"8.1.27": {
"Linux-x86_64": "80c7e94d30b5a3573ac6b2cc5fb0373046760a0acdff44a178e723ab3c8fdfb9"
},
"8.1.26": {
"Linux-x86_64": "37d9626cb5f851f63c9799c18a419354c6f21c77f90558472552156df9eef311"
},
"8.1.25": {
"Linux-x86_64": "a2e2af2037e63b64ef74d870c0bab91a8109e75eef82a30250b81b0d785ff6ae"
},
"8.1.24": {
"Linux-x86_64": "2fa8635f829e67844e7b30dffb092a336d257e0e769d2225030f2ccf4c1d302f"
},
"8.1.23": {
"Linux-x86_64": "034667c2ea49eec76ef8f79494231bad94884b99683edabf781beed01ec681e4"
},
"8.1.21": {
"Linux-x86_64": "78072edfcb6cc24cfefab06e824111b5b2b839551235ece68cd154bec7936a24"
},
"8.1.18": {
"Linux-x86_64": "79c24203a27b67d3aa15ebaab6121e7e72e8a2be61622876179f694a7fb4399c"
},
}


class CrayGtl(Package):
"""Install cray-gtl"""

homepage = "https://www.hpe.com/us/en/compute/hpc/hpc-software.html"
url = "https://jfrog.svc.cscs.ch/artifactory/cray-mpich/cray-gtl-8.1.26.tar.gz"
#maintainers = ["bcumming", "simonpintarelli"]

for ver, packages in _versions.items():
key = "{0}-{1}".format(platform.system(), platform.machine())
sha = packages.get(key)
if sha:
version(
ver,
sha256=sha,
url=f"https://jfrog.svc.cscs.ch/artifactory/cray-mpich/cray-gtl-{ver}.{platform.machine()}.tar.gz",
)

variant("cuda", default=False)
variant("rocm", default=False)
conflicts("+cuda", when="+rocm", msg="Pick either CUDA or ROCM")

# Fix up binaries with patchelf.
depends_on("patchelf", type="build")

conflicts("+cuda", when="+rocm", msg="Pick either CUDA or ROCM")

with when("+cuda"):
depends_on("[email protected]:11", type="link", when="@:8.1.26")
depends_on("[email protected]:12", type="link", when="@8.1.27:")

with when("+rocm"):
# libamdhip64.so.5
depends_on("hip@5:", type="link")
# libhsa-runtime64.so.1
depends_on("hsa-rocr-dev", type="link")

def get_rpaths(self):
# Those rpaths are already set in the build environment, so
# let's just retrieve them.
pkgs = os.getenv("SPACK_RPATH_DIRS", "").split(":")
compilers = os.getenv("SPACK_COMPILER_IMPLICIT_RPATHS", "").split(":")
return ":".join([p for p in pkgs + compilers if p])

def should_patch(self, file):
# Returns true if non-symlink ELF file.
if os.path.islink(file):
return False
try:
with open(file, "rb") as f:
return f.read(4) == b"\x7fELF"
except OSError:
return False

def install(self, spec, prefix):
install_tree(".", prefix)

@property
def libs(self):
if "+cuda" in self.spec:
return find_libraries("libmpi_gtl_cuda", root=self.prefix, shared=True)
if "+rocm" in self.spec:
return find_libraries("libmpi_gtl_hsa", root=self.prefix, shared=True)

@run_after("install")
def fixup_binaries(self):
patchelf = which("patchelf")
rpath = self.get_rpaths()
for root, _, files in os.walk(self.prefix):
for name in files:
f = os.path.join(root, name)
if not self.should_patch(f):
continue
patchelf("--force-rpath", "--set-rpath", rpath, f, fail_on_error=False)
# The C compiler wrapper can fail because libmpi_gtl_cuda refers to the symbol
# __gxx_personality_v0 but wasn't linked against libstdc++.
if "libmpi_gtl_cuda.so" in str(f):
patchelf("--add-needed", "libstdc++.so", f, fail_on_error=False)
if "@8.1.27+cuda" in self.spec:
patchelf("--add-needed", "libcudart.so", f, fail_on_error=False)
patchelf("--add-needed", "libcuda.so", f, fail_on_error=False)
Loading

0 comments on commit 70b47c7

Please sign in to comment.