From face7e61a5482c17378b1e998fa113b4d23525e4 Mon Sep 17 00:00:00 2001 From: Someone Serge Date: Mon, 20 Nov 2023 15:59:47 +0000 Subject: [PATCH 1/3] python311Packages.torch: choose magma at the expression level ...instead of at the callPackage site. Addresses https://github.com/NixOS/nixpkgs/issues/268736 (cherry picked from commit 395b7cc35b0f43d03a64190423b13f623304df81) --- .../manual/release-notes/rl-2311.section.md | 3 +++ .../libraries/science/math/magma/generic.nix | 25 ++++++++++--------- .../python-modules/torch/default.nix | 13 +++++++--- pkgs/top-level/python-packages.nix | 6 ----- 4 files changed, 26 insertions(+), 21 deletions(-) diff --git a/nixos/doc/manual/release-notes/rl-2311.section.md b/nixos/doc/manual/release-notes/rl-2311.section.md index 715ae997af435..5f56b2cd5725f 100644 --- a/nixos/doc/manual/release-notes/rl-2311.section.md +++ b/nixos/doc/manual/release-notes/rl-2311.section.md @@ -605,6 +605,9 @@ The module update takes care of the new config syntax and the data itself (user - `python3.pkgs.flitBuildHook` has been removed. Use `flit-core` and `format = "pyproject"` instead. +- Now `magma` defaults to `magma-hip` instead of `magma-cuda`. It also + respects the `config.cudaSupport` and `config.rocmSupport` options. + - The `extend` function of `llvmPackages` has been removed due it coming from the `tools` attrset thus only extending the `tool` attrset. A possible replacement is to construct the set from `libraries` and `tools`, or patch nixpkgs. - The `qemu-vm.nix` module now supports disabling overriding `fileSystems` with diff --git a/pkgs/development/libraries/science/math/magma/generic.nix b/pkgs/development/libraries/science/math/magma/generic.nix index 6b8588207dc33..7052b684da936 100644 --- a/pkgs/development/libraries/science/math/magma/generic.nix +++ b/pkgs/development/libraries/science/math/magma/generic.nix @@ -8,12 +8,7 @@ { blas , cmake , cudaPackages - # FIXME: cuda being unfree means ofborg won't eval "magma". - # respecting config.cudaSupport -> false by default - # -> ofborg eval -> throws "no GPU targets specified". - # Probably should delete everything but "magma-cuda" and "magma-hip" - # from all-packages.nix -, cudaSupport ? true +, cudaSupport ? config.cudaSupport , fetchurl , gfortran , cudaCapabilities ? cudaPackages.cudaFlags.cudaCapabilities @@ -25,7 +20,9 @@ , magmaRelease , ninja , config -, rocmSupport ? config.rocmSupport + # At least one back-end has to be enabled, + # and we can't default to CUDA since it's unfree +, rocmSupport ? !cudaSupport , static ? false , stdenv , symlinkJoin @@ -133,6 +130,8 @@ stdenv.mkDerivation { cmakeFlags = [ "-DGPU_TARGET=${gpuTargetString}" + (lib.cmakeBool "MAGMA_ENABLE_CUDA" cudaSupport) + (lib.cmakeBool "MAGMA_ENABLE_HIP" rocmSupport) ] ++ lists.optionals static [ "-DBUILD_SHARED_LIBS=OFF" ] ++ lists.optionals cudaSupport [ @@ -140,11 +139,9 @@ stdenv.mkDerivation { "-DMIN_ARCH=${minArch}" # Disarms magma's asserts "-DCMAKE_C_COMPILER=${backendStdenv.cc}/bin/cc" "-DCMAKE_CXX_COMPILER=${backendStdenv.cc}/bin/c++" - "-DMAGMA_ENABLE_CUDA=ON" ] ++ lists.optionals rocmSupport [ "-DCMAKE_C_COMPILER=${rocmPackages.clr}/bin/hipcc" "-DCMAKE_CXX_COMPILER=${rocmPackages.clr}/bin/hipcc" - "-DMAGMA_ENABLE_HIP=ON" ]; buildFlags = [ @@ -155,7 +152,7 @@ stdenv.mkDerivation { doCheck = false; passthru = { - inherit cudaPackages cudaSupport; + inherit cudaPackages cudaSupport rocmSupport gpuTargets; }; meta = with lib; { @@ -164,7 +161,11 @@ stdenv.mkDerivation { homepage = "http://icl.cs.utk.edu/magma/index.html"; platforms = platforms.unix; maintainers = with maintainers; [ connorbaker ]; - # CUDA and ROCm are mutually exclusive - broken = cudaSupport && rocmSupport || cudaSupport && strings.versionOlder cudaVersion "9"; + + # Cf. https://bitbucket.org/icl/magma/src/fcfe5aa61c1a4c664b36a73ebabbdbab82765e9f/CMakeLists.txt#lines-20 + broken = + !(cudaSupport || rocmSupport) # At least one back-end enabled + || (cudaSupport && rocmSupport) # Mutually exclusive + || (cudaSupport && strings.versionOlder cudaVersion "9"); }; } diff --git a/pkgs/development/python-modules/torch/default.nix b/pkgs/development/python-modules/torch/default.nix index f89fed489f84e..bd2ce60aab861 100644 --- a/pkgs/development/python-modules/torch/default.nix +++ b/pkgs/development/python-modules/torch/default.nix @@ -1,5 +1,12 @@ { stdenv, lib, fetchFromGitHub, fetchpatch, buildPythonPackage, python, - config, cudaSupport ? config.cudaSupport, cudaPackages, magma, + config, cudaSupport ? config.cudaSupport, cudaPackages, + effectiveMagma ? + if cudaSupport then magma-cuda-static + else if rocmSupport then magma-hip + else magma, + magma, + magma-hip, + magma-cuda-static, useSystemNccl ? true, MPISupport ? false, mpi, buildDocs ? false, @@ -115,7 +122,7 @@ let "CUDA is not targeting Linux" = cudaSupport && !stdenv.isLinux; "Unsupported CUDA version" = cudaSupport && !(builtins.elem cudaPackages.cudaMajorVersion [ "11" "12" ]); "MPI cudatoolkit does not match cudaPackages.cudatoolkit" = MPISupport && cudaSupport && (mpi.cudatoolkit != cudaPackages.cudatoolkit); - "Magma cudaPackages does not match cudaPackages" = cudaSupport && (magma.cudaPackages != cudaPackages); + "Magma cudaPackages does not match cudaPackages" = cudaSupport && (effectiveMagma.cudaPackages != cudaPackages); }; in buildPythonPackage rec { pname = "torch"; @@ -359,7 +366,7 @@ in buildPythonPackage rec { cuda_profiler_api.dev # ]) ++ lib.optionals rocmSupport [ rocmPackages.llvm.openmp ] - ++ lib.optionals (cudaSupport || rocmSupport) [ magma ] + ++ lib.optionals (cudaSupport || rocmSupport) [ effectiveMagma ] ++ lib.optionals stdenv.isLinux [ numactl ] ++ lib.optionals stdenv.isDarwin [ Accelerate CoreServices libobjc ]; diff --git a/pkgs/top-level/python-packages.nix b/pkgs/top-level/python-packages.nix index 94a46ce2ae50f..861093d22a3aa 100644 --- a/pkgs/top-level/python-packages.nix +++ b/pkgs/top-level/python-packages.nix @@ -14125,10 +14125,6 @@ self: super: with self; { toposort = callPackage ../development/python-modules/toposort { }; torch = callPackage ../development/python-modules/torch { - magma = - if pkgs.config.cudaSupport - then pkgs.magma-cuda-static - else pkgs.magma; inherit (pkgs.darwin.apple_sdk.frameworks) Accelerate CoreServices; inherit (pkgs.darwin) libobjc; }; @@ -14138,7 +14134,6 @@ self: super: with self; { }; torchWithCuda = self.torch.override { - magma = pkgs.magma-cuda-static; openai-triton = self.openai-triton-cuda; cudaSupport = true; rocmSupport = false; @@ -14149,7 +14144,6 @@ self: super: with self; { }; torchWithRocm = self.torch.override { - magma = pkgs.magma-hip; openai-triton = self.openai-triton-no-cuda; rocmSupport = true; cudaSupport = false; From 5f43325925a8b90818d0e18d1df49033c232157e Mon Sep 17 00:00:00 2001 From: Someone Serge Date: Tue, 21 Nov 2023 09:18:04 +0000 Subject: [PATCH 2/3] python311Packages.torch: fix typo in the cuda&&rocm error message (cherry picked from commit 643464269f674d3bd9062f55c2f47e193348283d) --- pkgs/development/python-modules/torch/default.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkgs/development/python-modules/torch/default.nix b/pkgs/development/python-modules/torch/default.nix index bd2ce60aab861..dae916a185259 100644 --- a/pkgs/development/python-modules/torch/default.nix +++ b/pkgs/development/python-modules/torch/default.nix @@ -118,7 +118,7 @@ let }; brokenConditions = attrsets.filterAttrs (_: cond: cond) { - "CUDA and ROCm are not mutually exclusive" = cudaSupport && rocmSupport; + "CUDA and ROCm are mutually exclusive" = cudaSupport && rocmSupport; "CUDA is not targeting Linux" = cudaSupport && !stdenv.isLinux; "Unsupported CUDA version" = cudaSupport && !(builtins.elem cudaPackages.cudaMajorVersion [ "11" "12" ]); "MPI cudatoolkit does not match cudaPackages.cudatoolkit" = MPISupport && cudaSupport && (mpi.cudatoolkit != cudaPackages.cudatoolkit); From cb1d60fa89d16228ddcc83e8c8b78d7b69c6ef4e Mon Sep 17 00:00:00 2001 From: Someone Serge Date: Tue, 21 Nov 2023 10:47:07 +0000 Subject: [PATCH 3/3] magma: respect the global isStatic (cherry picked from commit 1e33c8819f402536bc765a6246fe8b5dfae0afb7) --- pkgs/development/libraries/science/math/magma/generic.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkgs/development/libraries/science/math/magma/generic.nix b/pkgs/development/libraries/science/math/magma/generic.nix index 7052b684da936..1aaab46e1d1d0 100644 --- a/pkgs/development/libraries/science/math/magma/generic.nix +++ b/pkgs/development/libraries/science/math/magma/generic.nix @@ -23,7 +23,7 @@ # At least one back-end has to be enabled, # and we can't default to CUDA since it's unfree , rocmSupport ? !cudaSupport -, static ? false +, static ? stdenv.hostPlatform.isStatic , stdenv , symlinkJoin }: