-
Notifications
You must be signed in to change notification settings - Fork 2.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added CI job with TSAN and free-threading
Use bazel to run tests
- Loading branch information
Showing
5 changed files
with
238 additions
and
148 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
name: CI - Free-threading and Thread Sanitizer (nightly) | ||
|
||
concurrency: | ||
group: ${{ github.workflow }}-${{ github.ref }} | ||
cancel-in-progress: true | ||
|
||
on: | ||
schedule: | ||
- cron: "0 12 * * *" # Daily at 12:00 UTC | ||
workflow_dispatch: # allows triggering the workflow run manually | ||
pull_request: # Automatically trigger on pull requests affecting this file | ||
# branches: | ||
# - main | ||
paths: | ||
- '**/workflows/tsan.yaml' | ||
|
||
jobs: | ||
tsan: | ||
runs-on: linux-x86-n2-64 | ||
container: | ||
image: index.docker.io/library/ubuntu@sha256:b359f1067efa76f37863778f7b6d0e8d911e3ee8efa807ad01fbf5dc1ef9006b # ratchet:ubuntu:24.04 | ||
strategy: | ||
fail-fast: false | ||
defaults: | ||
run: | ||
shell: bash -l {0} | ||
steps: | ||
# Install git before actions/checkout as otherwise it will download the code with the GitHub | ||
# REST API and therefore any subsequent git commands will fail. | ||
- name: Install clang 18 | ||
env: | ||
DEBIAN_FRONTEND: noninteractive | ||
run: | | ||
apt update | ||
apt install -y clang-18 libstdc++-14-dev build-essential libssl-dev \ | ||
zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev curl git \ | ||
libncursesw5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev \ | ||
libffi-dev liblzma-dev | ||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 | ||
with: | ||
path: jax | ||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 | ||
with: | ||
repository: python/cpython | ||
path: cpython | ||
ref: "3.13" | ||
- name: Build CPython with TSAN enabled | ||
run: | | ||
cd cpython | ||
mkdir ${GITHUB_WORKSPACE}/cpython-tsan | ||
CC=clang-18 CXX=clang++-18 ./configure --prefix ${GITHUB_WORKSPACE}/cpython-tsan --disable-gil --with-thread-sanitizer | ||
make -j64 | ||
make install | ||
# Check whether free-threading mode is enabled | ||
PYTHON_GIL=0 ${GITHUB_WORKSPACE}/cpython-tsan/bin/python3 -c "import sys; assert not sys._is_gil_enabled()" | ||
# Create archive to be used with bazel as hermetic python: | ||
cd ${GITHUB_WORKSPACE} && tar -czpf python-tsan.tgz cpython-tsan | ||
- name: Build and install JAX | ||
run: | | ||
cd jax | ||
export PYTHON_SHA256=($(sha256sum ${GITHUB_WORKSPACE}/python-tsan.tgz)) | ||
echo "Python sha256: ${PYTHON_SHA256}" | ||
${GITHUB_WORKSPACE}/cpython-tsan/bin/python3 build/build.py build --wheels=jaxlib \ | ||
--python_version=3.13-ft \ | ||
--bazel_options=--repo_env=HERMETIC_PYTHON_URL="file://${GITHUB_WORKSPACE}/python-tsan.tgz" \ | ||
--bazel_options=--repo_env=HERMETIC_PYTHON_SHA256=${PYTHON_SHA256} \ | ||
--bazel_options=--repo_env=HERMETIC_PYTHON_PREFIX="cpython-tsan/" \ | ||
--bazel_options=--color=yes \ | ||
--bazel_options=--copt=-fsanitize=thread \ | ||
--bazel_options=--linkopt="-fsanitize=thread" \ | ||
--bazel_options=--copt=-g \ | ||
--clang_path=/usr/bin/clang-18 | ||
- name: Run tests | ||
timeout-minutes: 120 | ||
env: | ||
JAX_NUM_GENERATED_CASES: 1 | ||
JAX_ENABLE_X64: true | ||
JAX_SKIP_SLOW_TESTS: true | ||
PY_COLORS: 1 | ||
run: | | ||
cd jax | ||
echo "JAX_NUM_GENERATED_CASES=$JAX_NUM_GENERATED_CASES" | ||
echo "JAX_ENABLE_X64=$JAX_ENABLE_X64" | ||
echo "JAX_SKIP_SLOW_TESTS=$JAX_SKIP_SLOW_TESTS" | ||
# As we do not have yet free-threading support | ||
# there will be the following warning: | ||
# RuntimeWarning: The global interpreter lock (GIL) has been enabled to load module 'jaxlib.utils', | ||
# which has not declared that it can run safely without the GIL. | ||
# To avoid that we temporarily define PYTHON_GIL | ||
export PYTHON_GIL=0 | ||
# Set symlink to the bazel executable | ||
bazel_exec=($(ls bazel-*)) | ||
ln -s ${bazel_exec} bazel | ||
# Create tsan suppressions file | ||
cat << EOF > $PWD/.tsan_ignore | ||
# false-positive caused because we haven't tsan-instrumented libgcc_s. Multiple threads | ||
# are racing on a call to __register_frame_info(), but that function appears to be correctly locked internally. | ||
race:llvm::RuntimeDyldELF::registerEHFrames | ||
# https://github.com/python/cpython/issues/128050 | ||
race:partial_vectorcall_fallback | ||
# https://github.com/python/cpython/issues/128100 | ||
race:ensure_nonmanaged_dict | ||
# https://github.com/openxla/xla/issues/20686 | ||
race:dnnl_sgemm | ||
# https://github.com/numpy/numpy/issues/28041 | ||
race:get_initial_from_ufunc | ||
# https://github.com/numpy/numpy/issues/28042 | ||
race:PyArray_UpdateFlags | ||
# https://github.com/python/cpython/issues/128130 | ||
race_top:run_eval_code_obj | ||
race:dump_traceback | ||
# https://github.com/numpy/numpy/issues/28045 not sure about this one | ||
race:arraymethod_dealloc | ||
# https://github.com/python/cpython/issues/128133 | ||
race:bytes_hash | ||
# https://github.com/python/cpython/issues/128137 | ||
race:immortalize_interned | ||
# https://github.com/python/cpython/issues/128144 | ||
race_top:PyMember_GetOne | ||
# https://github.com/python/cpython/issues/128657 | ||
race:py_digest_by_name | ||
EOF | ||
./bazel test \ | ||
--python_version=3.13-ft \ | ||
--//jax:build_jaxlib=false \ | ||
--repo_env=JAX_NUM_GENERATED_CASES=$JAX_NUM_GENERATED_CASES \ | ||
--repo_env=JAX_ENABLE_X64=$JAX_ENABLE_X64 \ | ||
--repo_env=JAX_SKIP_SLOW_TESTS=$JAX_SKIP_SLOW_TESTS \ | ||
--repo_env=PYTHON_GIL=$PYTHON_GIL \ | ||
--test_env=TSAN_OPTIONS=halt_on_error=1,suppressions=$PWD/.tsan_ignore \ | ||
--test_env=JAX_TEST_NUM_THREADS=8 \ | ||
--nocache_test_results \ | ||
--test_output=all \ | ||
//tests:cpu_tests |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
# I believe this is a false-positive caused because we haven't tsan-instrumented libgcc_s. Multiple threads | ||
# are racing on a call to __register_frame_info(), but that function appears to be correctly locked internally. | ||
race:llvm::RuntimeDyldELF::registerEHFrames | ||
|
||
# https://github.com/python/cpython/issues/128050 | ||
race:partial_vectorcall_fallback | ||
|
||
# https://github.com/python/cpython/issues/128100 | ||
race:ensure_nonmanaged_dict | ||
|
||
# https://github.com/openxla/xla/issues/20686 | ||
race:dnnl_sgemm | ||
|
||
# https://github.com/numpy/numpy/issues/28041 | ||
race:get_initial_from_ufunc | ||
|
||
# https://github.com/numpy/numpy/issues/28042 | ||
race:PyArray_UpdateFlags | ||
|
||
# https://github.com/python/cpython/issues/128130 | ||
race_top:run_eval_code_obj | ||
|
||
race:dump_traceback | ||
|
||
# https://github.com/numpy/numpy/issues/28045 not sure about this one | ||
race:arraymethod_dealloc | ||
|
||
# https://github.com/python/cpython/issues/128133 | ||
race:bytes_hash | ||
|
||
# https://github.com/python/cpython/issues/128137 | ||
race:immortalize_interned | ||
|
||
# https://github.com/python/cpython/issues/128144 | ||
race_top:PyMember_GetOne | ||
|
||
# https://github.com/python/cpython/issues/128657 | ||
race:py_digest_by_name |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.