Skip to content

[dask] Fix LTR with empty partition and NCCL error. #11643

[dask] Fix LTR with empty partition and NCCL error.

[dask] Fix LTR with empty partition and NCCL error. #11643

Workflow file for this run

name: XGBoost CI (JVM packages)
on: [push, pull_request]
permissions:
contents: read # to fetch code (actions/checkout)
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
env:
BRANCH_NAME: >-
${{ github.event.pull_request.number && 'PR-' }}${{ github.event.pull_request.number || github.ref_name }}
jobs:
build-jvm-manylinux2014:
name: >-
Build libxgboost4j.so targeting glibc 2.17
(arch ${{ matrix.arch }}, runner ${{ matrix.runner }})
runs-on:
- runs-on
- runner=${{ matrix.runner }}
- run-id=${{ github.run_id }}
- tag=jvm-tests-build-jvm-manylinux2014-${{ matrix.arch }}
strategy:
fail-fast: false
matrix:
include:
- arch: aarch64
runner: linux-arm64-cpu
- arch: x86_64
runner: linux-amd64-cpu
steps:
# Restart Docker daemon so that it recognizes the ephemeral disks
- run: sudo systemctl restart docker
- uses: actions/checkout@v4
with:
submodules: "true"
- name: Log into Docker registry (AWS ECR)
run: bash ops/pipeline/login-docker-registry.sh
- run: bash ops/pipeline/build-jvm-manylinux2014.sh ${{ matrix.arch }}
build-jvm-gpu:
name: Build libxgboost4j.so with CUDA
runs-on:
- runs-on=${{ github.run_id }}
- runner=linux-amd64-cpu
- tag=jvm-tests-build-jvm-gpu
steps:
# Restart Docker daemon so that it recognizes the ephemeral disks
- run: sudo systemctl restart docker
- uses: actions/checkout@v4
with:
submodules: "true"
- name: Log into Docker registry (AWS ECR)
run: bash ops/pipeline/login-docker-registry.sh
- run: bash ops/pipeline/build-jvm-gpu.sh
- name: Stash files
run: |
python3 ops/pipeline/manage-artifacts.py upload \
--s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \
--prefix cache/${{ github.run_id }}/build-jvm-gpu \
lib/libxgboost4j.so
build-jvm-mac:
name: "Build libxgboost4j.dylib for ${{ matrix.description }}"
runs-on: ${{ matrix.runner }}
strategy:
fail-fast: false
matrix:
include:
- description: "MacOS (Apple Silicon)"
script: ops/pipeline/build-jvm-macos-apple-silicon.sh
libname: libxgboost4j_m1.dylib
runner: macos-14
- description: "MacOS (Intel)"
script: ops/pipeline/build-jvm-macos-intel.sh
libname: libxgboost4j_intel.dylib
runner: macos-13
steps:
- uses: actions/checkout@v4
with:
submodules: "true"
- run: bash ${{ matrix.script }}
- name: Upload libxgboost4j.dylib
if: github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')
run: |
mv -v lib/libxgboost4j.dylib ${{ matrix.libname }}
python3 ops/pipeline/manage-artifacts.py upload \
--s3-bucket xgboost-nightly-builds \
--prefix ${{ env.BRANCH_NAME }}/${{ github.sha }} --make-public \
${{ matrix.libname }}
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_IAM_S3_UPLOADER }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }}
build-jvm-docs:
name: Build docs for JVM packages
needs: [build-jvm-gpu]
runs-on:
- runs-on=${{ github.run_id }}
- runner=linux-amd64-cpu
- tag=jvm-tests-build-jvm-docs
steps:
# Restart Docker daemon so that it recognizes the ephemeral disks
- run: sudo systemctl restart docker
- uses: actions/checkout@v4
with:
submodules: "true"
- name: Log into Docker registry (AWS ECR)
run: bash ops/pipeline/login-docker-registry.sh
- name: Unstash files
run: |
python3 ops/pipeline/manage-artifacts.py download \
--s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \
--prefix cache/${{ github.run_id }}/build-jvm-gpu \
--dest-dir lib \
libxgboost4j.so
- run: bash ops/pipeline/build-jvm-doc.sh
- name: Upload JVM doc
run: |
python3 ops/pipeline/manage-artifacts.py upload \
--s3-bucket xgboost-docs \
--prefix ${BRANCH_NAME}/${GITHUB_SHA} --make-public \
jvm-packages/${{ env.BRANCH_NAME }}.tar.bz2
build-test-jvm-packages:
name: Build and test JVM packages (Linux, Scala ${{ matrix.scala_version }})
runs-on:
- runs-on=${{ github.run_id }}
- runner=linux-amd64-cpu
- tag=jvm-tests-build-test-jvm-packages-scala${{ matrix.scala_version }}
strategy:
fail-fast: false
matrix:
scala_version: ["2.12", "2.13"]
steps:
# Restart Docker daemon so that it recognizes the ephemeral disks
- run: sudo systemctl restart docker
- uses: actions/checkout@v4
with:
submodules: "true"
- name: Log into Docker registry (AWS ECR)
run: bash ops/pipeline/login-docker-registry.sh
- name: Build and test JVM packages (Scala ${{ matrix.scala_version }})
run: bash ops/pipeline/build-test-jvm-packages.sh
env:
SCALA_VERSION: ${{ matrix.scala_version }}
- name: Stash files
run: |
python3 ops/pipeline/manage-artifacts.py upload \
--s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \
--prefix cache/${{ github.run_id }}/build-test-jvm-packages \
lib/libxgboost4j.so
if: matrix.scala_version == '2.13'
build-test-jvm-packages-other-os:
name: Build and test JVM packages (${{ matrix.os }})
timeout-minutes: 30
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [windows-latest, macos-13]
steps:
- uses: actions/checkout@v4
with:
submodules: 'true'
- uses: actions/setup-java@v4
with:
distribution: 'temurin'
java-version: '8'
- uses: dmlc/xgboost-devops/actions/miniforge-setup@main
with:
environment-name: minimal
environment-file: ops/conda_env/minimal.yml
- name: Cache Maven packages
uses: actions/cache@v4
with:
path: ~/.m2
key: ${{ runner.os }}-m2-${{ hashFiles('./jvm-packages/pom.xml') }}
restore-keys: ${{ runner.os }}-m2-${{ hashFiles('./jvm-packages/pom.xml') }}
- name: Test XGBoost4J (Core) on macos
if: matrix.os == 'macos-13'
run: |
cd jvm-packages
mvn test -B -pl :xgboost4j_2.12 -Duse.openmp=OFF
- name: Test XGBoost4J (Core) on windows
if: matrix.os == 'windows-latest'
run: |
cd jvm-packages
mvn test -B -pl :xgboost4j_2.12
- name: Publish artifact xgboost4j.dll to S3
run: |
python ops/pipeline/manage-artifacts.py upload `
--s3-bucket xgboost-nightly-builds `
--prefix ${{ env.BRANCH_NAME }}/${{ github.sha }} --make-public `
lib/xgboost4j.dll
if: |
(github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')) &&
matrix.os == 'windows-latest'
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_IAM_S3_UPLOADER }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }}
test-jvm-packages-gpu:
name: Test JVM packages with CUDA (Scala ${{ matrix.scala_version }})
needs: [build-jvm-gpu]
runs-on:
- runs-on=${{ github.run_id }}
- runner=linux-amd64-mgpu
- tag=jvm-tests-test-jvm-packages-gpu-scala${{ matrix.scala_version }}
strategy:
fail-fast: false
matrix:
scala_version: ["2.12", "2.13"]
steps:
# Restart Docker daemon so that it recognizes the ephemeral disks
- run: sudo systemctl restart docker
- uses: actions/checkout@v4
with:
submodules: "true"
- name: Log into Docker registry (AWS ECR)
run: bash ops/pipeline/login-docker-registry.sh
- name: Unstash files
run: |
python3 ops/pipeline/manage-artifacts.py download \
--s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \
--prefix cache/${{ github.run_id }}/build-jvm-gpu \
--dest-dir lib \
libxgboost4j.so
- run: bash ops/pipeline/test-jvm-gpu.sh
env:
SCALA_VERSION: ${{ matrix.scala_version }}
deploy-jvm-packages:
name: Deploy JVM packages to S3 (${{ matrix.variant.name }})
needs: [build-jvm-gpu, build-test-jvm-packages, test-jvm-packages-gpu]
runs-on:
- runs-on
- runner=linux-amd64-cpu
- run-id=${{ github.run_id }}
- tag=jvm-tests-deploy-jvm-packages-${{ matrix.variant.name }}-scala${{ matrix.scala_version }}
strategy:
fail-fast: false
matrix:
variant:
- name: cpu
container_id: xgb-ci.jvm
artifact_from: build-test-jvm-packages
- name: gpu
container_id: xgb-ci.jvm_gpu_build
artifact_from: build-jvm-gpu
scala_version: ['2.12', '2.13']
steps:
# Restart Docker daemon so that it recognizes the ephemeral disks
- run: sudo systemctl restart docker
- uses: actions/checkout@v4
with:
submodules: "true"
- name: Log into Docker registry (AWS ECR)
run: bash ops/pipeline/login-docker-registry.sh
- name: Unstash files
run: |
python3 ops/pipeline/manage-artifacts.py download \
--s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \
--prefix cache/${{ github.run_id }}/${{ matrix.variant.artifact_from }} \
--dest-dir lib \
libxgboost4j.so
ls -lh lib/libxgboost4j.so
- name: Deploy JVM packages to S3
run: |
bash ops/pipeline/deploy-jvm-packages.sh ${{ matrix.variant.name }} \
${{ matrix.variant.container_id }} ${{ matrix.scala_version }}