-
Notifications
You must be signed in to change notification settings - Fork 2.9k
153 lines (131 loc) · 5.89 KB
/
tsan.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
name: CI - Free-threading and Thread Sanitizer (nightly)
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
on:
schedule:
- cron: "0 12 * * *" # Daily at 12:00 UTC
workflow_dispatch: # allows triggering the workflow run manually
pull_request: # Automatically trigger on pull requests affecting this file
# branches:
# - main
paths:
- '**/workflows/tsan.yaml'
jobs:
tsan:
runs-on: linux-x86-n2-64
container:
image: index.docker.io/library/ubuntu@sha256:b359f1067efa76f37863778f7b6d0e8d911e3ee8efa807ad01fbf5dc1ef9006b # ratchet:ubuntu:24.04
strategy:
fail-fast: false
defaults:
run:
shell: bash -l {0}
steps:
# Install git before actions/checkout as otherwise it will download the code with the GitHub
# REST API and therefore any subsequent git commands will fail.
- name: Install clang 18
env:
DEBIAN_FRONTEND: noninteractive
run: |
apt update
apt install -y clang-18 libstdc++-14-dev build-essential libssl-dev \
zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev curl git \
libncursesw5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev \
libffi-dev liblzma-dev
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
path: jax
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
repository: python/cpython
path: cpython
ref: "3.13"
- name: Build CPython with TSAN enabled
run: |
cd cpython
mkdir ${GITHUB_WORKSPACE}/cpython-tsan
CC=clang-18 CXX=clang++-18 ./configure --prefix ${GITHUB_WORKSPACE}/cpython-tsan --disable-gil --with-thread-sanitizer
make -j64
make install
# Check whether free-threading mode is enabled
PYTHON_GIL=0 ${GITHUB_WORKSPACE}/cpython-tsan/bin/python3 -c "import sys; assert not sys._is_gil_enabled()"
# Create archive to be used with bazel as hermetic python:
cd ${GITHUB_WORKSPACE} && tar -czpf python-tsan.tgz cpython-tsan
- name: Build and install JAX
run: |
cd jax
export PYTHON_SHA256=($(sha256sum ${GITHUB_WORKSPACE}/python-tsan.tgz))
echo "Python sha256: ${PYTHON_SHA256}"
${GITHUB_WORKSPACE}/cpython-tsan/bin/python3 build/build.py build --wheels=jaxlib \
--python_version=3.13-ft \
--bazel_options=--repo_env=HERMETIC_PYTHON_URL="file://${GITHUB_WORKSPACE}/python-tsan.tgz" \
--bazel_options=--repo_env=HERMETIC_PYTHON_SHA256=${PYTHON_SHA256} \
--bazel_options=--repo_env=HERMETIC_PYTHON_PREFIX="cpython-tsan/" \
--bazel_options=--color=yes \
--bazel_options=--copt=-fsanitize=thread \
--bazel_options=--linkopt="-fsanitize=thread" \
--bazel_options=--copt=-g \
--clang_path=/usr/bin/clang-18
- name: Run tests
timeout-minutes: 120
env:
JAX_NUM_GENERATED_CASES: 1
JAX_ENABLE_X64: true
JAX_SKIP_SLOW_TESTS: true
PY_COLORS: 1
run: |
cd jax
echo "JAX_NUM_GENERATED_CASES=$JAX_NUM_GENERATED_CASES"
echo "JAX_ENABLE_X64=$JAX_ENABLE_X64"
echo "JAX_SKIP_SLOW_TESTS=$JAX_SKIP_SLOW_TESTS"
# As we do not have yet free-threading support
# there will be the following warning:
# RuntimeWarning: The global interpreter lock (GIL) has been enabled to load module 'jaxlib.utils',
# which has not declared that it can run safely without the GIL.
# To avoid that we temporarily define PYTHON_GIL
export PYTHON_GIL=0
# Set symlink to the bazel executable
bazel_exec=($(ls bazel-*))
ln -s ${bazel_exec} bazel
# Create tsan suppressions file
cat << EOF > $PWD/.tsan_ignore
# false-positive caused because we haven't tsan-instrumented libgcc_s. Multiple threads
# are racing on a call to __register_frame_info(), but that function appears to be correctly locked internally.
race:llvm::RuntimeDyldELF::registerEHFrames
# https://github.com/python/cpython/issues/128050
race:partial_vectorcall_fallback
# https://github.com/python/cpython/issues/128100
race:ensure_nonmanaged_dict
# https://github.com/openxla/xla/issues/20686
race:dnnl_sgemm
# https://github.com/numpy/numpy/issues/28041
race:get_initial_from_ufunc
# https://github.com/numpy/numpy/issues/28042
race:PyArray_UpdateFlags
# https://github.com/python/cpython/issues/128130
race_top:run_eval_code_obj
race:dump_traceback
# https://github.com/numpy/numpy/issues/28045 not sure about this one
race:arraymethod_dealloc
# https://github.com/python/cpython/issues/128133
race:bytes_hash
# https://github.com/python/cpython/issues/128137
race:immortalize_interned
# https://github.com/python/cpython/issues/128144
race_top:PyMember_GetOne
# https://github.com/python/cpython/issues/128657
race:py_digest_by_name
EOF
./bazel test \
--repo_env=HERMETIC_PYTHON_VERSION=3.13-ft \
--//jax:build_jaxlib=false \
--repo_env=JAX_NUM_GENERATED_CASES=$JAX_NUM_GENERATED_CASES \
--repo_env=JAX_ENABLE_X64=$JAX_ENABLE_X64 \
--repo_env=JAX_SKIP_SLOW_TESTS=$JAX_SKIP_SLOW_TESTS \
--repo_env=PYTHON_GIL=$PYTHON_GIL \
--test_env=TSAN_OPTIONS=halt_on_error=1,suppressions=$PWD/.tsan_ignore \
--test_env=JAX_TEST_NUM_THREADS=8 \
--nocache_test_results \
--test_output=all \
//tests:cpu_tests