From 87619f9cdb5d3864bb0d290e6716dcded846d85d Mon Sep 17 00:00:00 2001 From: Shuli Shu <31480676+multiphaseCFD@users.noreply.github.com> Date: Wed, 8 Jan 2025 15:24:00 -0500 Subject: [PATCH] Automate aarch64-cuda-wheels tests (#1031) ### Before submitting Please complete the following checklist when submitting a PR: - [ ] All new features must include a unit test. If you've fixed a bug or added code that should be tested, add a test to the [`tests`](../tests) directory! - [ ] All new functions and code must be clearly commented and documented. If you do make documentation changes, make sure that the docs build and render correctly by running `make docs`. - [ ] Ensure that the test suite passes, by running `make test`. - [ ] Add a new entry to the `.github/CHANGELOG.md` file, summarizing the change, and including a link back to the PR. - [ ] Ensure that code is properly formatted by running `make format`. When all the above are checked, delete everything above the dashed line and fill in the pull request template. ------------------------------------------------------------------------------------------------------------ **Context:** [sc-81544] & [sc-81555] This PR fixes the python cuda dependecies bug and automates the arm64 LT and LGPU wheels tests **Description of the Change:** **Benefits:** **Possible Drawbacks:** **Related GitHub Issues:** --------- Co-authored-by: ringo-but-quantum --- .github/CHANGELOG.md | 14 +++++-- .../workflows/wheel_linux_aarch64_cuda.yml | 37 ++++++++++++++++--- CMakeLists.txt | 2 +- pennylane_lightning/core/_version.py | 2 +- .../test_measurements_class.py | 4 +- 5 files changed, 46 insertions(+), 13 deletions(-) diff --git a/.github/CHANGELOG.md b/.github/CHANGELOG.md index bfb8d3bdbc..18325cc4fc 100644 --- a/.github/CHANGELOG.md +++ b/.github/CHANGELOG.md @@ -29,17 +29,20 @@ ### Improvements +* Add CI wheels checks for `aarch64` wheels of Lightning-GPU and Lightning-Tensor. + [(#1031)](https://github.com/PennyLaneAI/pennylane-lightning/pull/1031) + * Replace the `dummy_tensor_update` method with the `cutensornetStateCaptureMPS`API to ensure that further gates apply is allowed after the `cutensornetStateCompute` call. - [(#1028)](https://github.com/PennyLaneAI/pennylane-lightning/pull/1028/) + [(#1028)](https://github.com/PennyLaneAI/pennylane-lightning/pull/1028) * Add unit test for measurement with shots for Lightning Tensor with `tn` method. [(#1027)](https://github.com/PennyLaneAI/pennylane-lightning/pull/1027) * Add CUDA dependencies to Lightning GPU and Lightning Tensor Python wheels. - [(#1025)](https://github.com/PennyLaneAI/pennylane-lightning/pull/1025/) + [(#1025)](https://github.com/PennyLaneAI/pennylane-lightning/pull/1025) * Update the python layer UI of Lightning Tensor. - [(#1022)](https://github.com/PennyLaneAI/pennylane-lightning/pull/1022/) + [(#1022)](https://github.com/PennyLaneAI/pennylane-lightning/pull/1022) * Catalyst device interfaces support dynamic shots, and no longer parses the device init op's attribute dictionary for a static shots literal. [(#1017)](https://github.com/PennyLaneAI/pennylane-lightning/pull/1017) @@ -51,7 +54,7 @@ [(#1015)](https://github.com/PennyLaneAI/pennylane-lightning/pull/1015) * Add Exact Tensor Network cpp binding. - [(#1014)](https://github.com/PennyLaneAI/pennylane-lightning/pull/1014/) + [(#1014)](https://github.com/PennyLaneAI/pennylane-lightning/pull/1014) * Reverse Lightning Qubit generators vector insertion order. [(#1009)](https://github.com/PennyLaneAI/pennylane-lightning/pull/1009) @@ -104,6 +107,9 @@ ### Bug fixes +* Fix Python CUDA dependencies by adding path to `nvidia/nvjitlink/lib` to RPATH. + [(#1031)](https://github.com/PennyLaneAI/pennylane-lightning/pull/1031) + * Add `RTLD_NODELETE` flag to `dlopen` in order to mitigate the segfault issues for arm64-macos Catalyst support. [(#1030)](https://github.com/PennyLaneAI/pennylane-lightning/pull/1030) diff --git a/.github/workflows/wheel_linux_aarch64_cuda.yml b/.github/workflows/wheel_linux_aarch64_cuda.yml index 922dbb7116..4839dd426f 100644 --- a/.github/workflows/wheel_linux_aarch64_cuda.yml +++ b/.github/workflows/wheel_linux_aarch64_cuda.yml @@ -34,7 +34,7 @@ jobs: strategy: fail-fast: false matrix: - os: [pl-4-core-large-runner] + os: [arm-gpu] arch: [aarch64] pl_backend: ["lightning_gpu", "lightning_tensor"] cuda_version: ["12"] @@ -42,7 +42,9 @@ jobs: container_img: ["quay.io/pypa/manylinux_2_28_aarch64"] timeout-minutes: 45 name: ${{ matrix.os }}::${{ matrix.arch }} - ${{ matrix.pl_backend }} (Python ${{ fromJson('{ "cp310-*":"3.10","cp311-*":"3.11", "cp312-*":"3.12" }')[matrix.cibw_build] }}) - runs-on: ${{ matrix.os }} + runs-on: + - self-hosted + - ${{ matrix.os }} steps: - name: Checkout PennyLane-Lightning @@ -59,9 +61,6 @@ jobs: - name: Configure pyproject.toml file run: PL_BACKEND="${{ matrix.pl_backend }}" python scripts/configure_pyproject_toml.py - - uses: docker/setup-qemu-action@v3 - name: Set up QEMU - - name: Build wheels env: CIBW_ARCHS_LINUX: ${{matrix.arch}} @@ -94,6 +93,34 @@ jobs: run: python3 -m cibuildwheel --output-dir wheelhouse + - name: Determine Python version + id: pyvs + shell: bash + run: | + echo "version=$(echo ${{ matrix.cibw_build }} | tr -cd '[:digit:].' | sed 's/./&./1')" >> $GITHUB_OUTPUT + + - uses: actions/setup-python@v5 + name: Install Python + with: + python-version: ${{ steps.pyvs.outputs.version }} + + - name: Test wheels + run: | + python -m ensurepip --upgrade + python -m pip install -r requirements-tests.txt + PL_BACKEND="lightning_qubit" python scripts/configure_pyproject_toml.py + SKIP_COMPILATION=True python -m pip install . -vv + python -m pip install ./wheelhouse/*.whl + DEVICENAME=`echo ${{ matrix.pl_backend }} | sed "s/_/./g"` + if (${{ matrix.pl_backend == 'lightning_tensor' }}) then + PL_DEVICE=${DEVICENAME} python -m pytest tests/ + else + pl-device-test --device=${DEVICENAME} --skip-ops -x --tb=short --no-flaky-report + # MCM tests are slow and skipped. get_c_interface() API is not supported with current test setup and skipped. + PL_DEVICE=${DEVICENAME} python -m pytest tests/ -k "not test_supported_linux_platform_gpu and not test_native_mcm" + fi + + - name: Validate wheels run: | python3 -m pip install twine diff --git a/CMakeLists.txt b/CMakeLists.txt index 030de959a8..cb6ea5c93a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -153,7 +153,7 @@ if(ENABLE_PYTHON) if("${PL_BACKEND}" STREQUAL "lightning_gpu" OR "${PL_BACKEND}" STREQUAL "lightning_tensor") # Allow pip installation of cuQuantum & CUDA 12 libs to be accessible without setting LD_LIBRARY_PATH for lightning_gpu # BUILD_RPATH only works for the last call - set_target_properties("${PL_BACKEND}_ops" PROPERTIES BUILD_RPATH "$ORIGIN/../cuquantum/lib:$ORIGIN/../nvidia/cuda_runtime/lib:$ORIGIN/../nvidia/cublas/lib:$ORIGIN/../nvidia/cusparse/lib:${SCIPY_OPENBLAS32_RUNTIME_LIB_PATH}:$ORIGIN") + set_target_properties("${PL_BACKEND}_ops" PROPERTIES BUILD_RPATH "$ORIGIN/../cuquantum/lib:$ORIGIN/../nvidia/cuda_runtime/lib:$ORIGIN/../nvidia/nvjitlink/lib:$ORIGIN/../nvidia/cublas/lib:$ORIGIN/../nvidia/cusparse/lib:${SCIPY_OPENBLAS32_RUNTIME_LIB_PATH}:$ORIGIN") else() set_target_properties("${PL_BACKEND}_ops" PROPERTIES BUILD_RPATH "${SCIPY_OPENBLAS32_RUNTIME_LIB_PATH}") endif() diff --git a/pennylane_lightning/core/_version.py b/pennylane_lightning/core/_version.py index 9b9e9b997d..fb54db9a0c 100644 --- a/pennylane_lightning/core/_version.py +++ b/pennylane_lightning/core/_version.py @@ -16,4 +16,4 @@ Version number (major.minor.patch[-label]) """ -__version__ = "0.40.0-rc0" +__version__ = "0.40.0-rc1" diff --git a/tests/lightning_tensor/test_measurements_class.py b/tests/lightning_tensor/test_measurements_class.py index 8cd48f23b3..d3c9c96648 100644 --- a/tests/lightning_tensor/test_measurements_class.py +++ b/tests/lightning_tensor/test_measurements_class.py @@ -140,7 +140,7 @@ def test_probs_many_wires(self, method, n_qubits, n_targets, tol): pytest.skip("Number of targets cannot exceed the number of wires.") dev = qml.device(device_name, wires=n_qubits, **method) - dq = qml.device("lightning.qubit", wires=n_qubits) + dq = qml.device("default.qubit", wires=n_qubits) init_state = np.random.rand(2**n_qubits) + 1.0j * np.random.rand(2**n_qubits) init_state /= np.linalg.norm(init_state) @@ -168,7 +168,7 @@ def test_state_many_wires(self, method, n_qubits, n_targets, tol): pytest.skip("Number of targets cannot exceed the number of wires.") dev = qml.device(device_name, wires=n_qubits, **method) - dq = qml.device("lightning.qubit", wires=n_qubits) + dq = qml.device("default.qubit", wires=n_qubits) init_state = np.random.rand(2**n_qubits) + 1.0j * np.random.rand(2**n_qubits) init_state /= np.linalg.norm(init_state)