forked from timescale/timescaledb
-
Notifications
You must be signed in to change notification settings - Fork 0
361 lines (310 loc) · 13.5 KB
/
libfuzzer.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
name: Libfuzzer
"on":
schedule:
# run daily 1:00 on main branch
- cron: '0 1 * * *'
push:
branches:
- main
- prerelease_test
- trigger/libfuzzer
pull_request:
paths:
- .github/workflows/libfuzzer.yaml
- 'tsl/test/fuzzing/compression/**'
jobs:
build:
runs-on: ubuntu-22.04
name: Build PostgreSQL and TimescaleDB
env:
PG_SRC_DIR: pgbuild
PG_INSTALL_DIR: postgresql
steps:
- name: Install Linux Dependencies
run: |
# Don't add ddebs here because the ddebs mirror is always 503 Service Unavailable.
# If needed, install them before opening the core dump.
sudo apt-get update
sudo apt-get install 7zip clang lld llvm flex bison libipc-run-perl \
libtest-most-perl tree
- name: Checkout TimescaleDB
uses: actions/checkout@v4
- name: Read configuration
id: config
run: python -B .github/gh_config_reader.py
# We are going to rebuild Postgres daily, so that it doesn't suddenly break
# ages after the original problem.
- name: Get date for build caching
id: get-date
run: |
echo "date=$(date +"%d")" >> $GITHUB_OUTPUT
# we cache the build directory instead of the install directory here
# because extension installation will write files to install directory
# leading to a tainted cache
- name: Cache PostgreSQL
id: cache-postgresql
uses: actions/cache@v4
with:
path: ~/${{ env.PG_SRC_DIR }}
key: "postgresql-libfuzzer-${{ steps.get-date.outputs.date }}-${{ hashFiles('.github/**') }}"
- name: Build PostgreSQL
if: steps.cache-postgresql.outputs.cache-hit != 'true'
run: |
wget -q -O postgresql.tar.bz2 \
https://ftp.postgresql.org/pub/source/v${{ steps.config.outputs.PG15_LATEST }}/postgresql-${{ steps.config.outputs.PG15_LATEST }}.tar.bz2
mkdir -p ~/$PG_SRC_DIR
tar --extract --file postgresql.tar.bz2 --directory ~/$PG_SRC_DIR --strip-components 1
cd ~/$PG_SRC_DIR
CC=clang ./configure --prefix=$HOME/$PG_INSTALL_DIR --with-openssl \
--without-readline --without-zlib --without-libxml --enable-cassert \
--enable-debug CC=clang \
CFLAGS="-fuse-ld=lld -ggdb3 -O2 -fno-omit-frame-pointer"
make -j$(nproc)
- name: Install PostgreSQL
run: |
make -C ~/$PG_SRC_DIR install
make -C ~/$PG_SRC_DIR/contrib/postgres_fdw install
- name: Upload config.log
if: always()
uses: actions/upload-artifact@v4
with:
name: config.log for PostgreSQL
path: ~/${{ env.PG_SRC_DIR }}/config.log
- name: Build TimescaleDB
run: |
set -e
export LIBFUZZER_PATH=$(dirname "$(find $(llvm-config --libdir) -name libclang_rt.fuzzer_no_main-x86_64.a | head -1)")
# Some pointers for the next time we have linking/undefined symbol problems:
# http://web.archive.org/web/20200926071757/https://github.com/google/sanitizers/issues/111
# http://web.archive.org/web/20231101091231/https://github.com/cms-sw/cmssw/issues/40680
cmake -B build -S . -DASSERTIONS=ON -DLINTER=OFF -DCMAKE_VERBOSE_MAKEFILE=1 \
-DWARNINGS_AS_ERRORS=1 -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_C_COMPILER=clang \
-DCMAKE_C_FLAGS="-fsanitize=fuzzer-no-link -lstdc++ -L$LIBFUZZER_PATH -l:libclang_rt.fuzzer_no_main-x86_64.a -static-libsan" \
-DCOMPRESSION_FUZZING=1 -DPG_PATH=$HOME/$PG_INSTALL_DIR
make -C build -j$(nproc) install
# Incredibly, the upload-artifact action can't preserve executable permissions:
# https://github.com/actions/upload-artifact/issues/38
# It's also extremely slow.
- name: Compress the installation directory
run: 7z a install.7z $HOME/$PG_INSTALL_DIR
- name: Save the installation directory
uses: actions/upload-artifact@v4
with:
name: fuzzing-install-dir
path: install.7z
if-no-files-found: error
retention-days: 1
fuzz:
needs: build
strategy:
fail-fast: false
matrix:
case: [
{ algo: gorilla , pgtype: float8, bulk: false, runs: 500000000 },
{ algo: deltadelta, pgtype: int8 , bulk: false, runs: 500000000 },
{ algo: gorilla , pgtype: float8, bulk: true , runs: 1000000000 },
{ algo: deltadelta, pgtype: int8 , bulk: true , runs: 1000000000 },
# array has a peculiar recv function that recompresses all input, so
# fuzzing it is much slower. The dictionary recv also uses it.
{ algo: array , pgtype: text , bulk: false, runs: 10000000 },
{ algo: array , pgtype: text , bulk: true , runs: 10000000 },
{ algo: dictionary, pgtype: text , bulk: false, runs: 100000000 },
{ algo: dictionary, pgtype: text , bulk: true , runs: 100000000 },
]
name: Fuzz decompression ${{ matrix.case.algo }} ${{ matrix.case.pgtype }} ${{ matrix.case.bulk && 'bulk' || 'rowbyrow' }}
runs-on: ubuntu-22.04
env:
PG_SRC_DIR: pgbuild
PG_INSTALL_DIR: postgresql
steps:
- name: Install Linux dependencies
run: |
sudo apt update
sudo apt install 7zip systemd-coredump gdb
- name: Checkout TimescaleDB
uses: actions/checkout@v4
- name: Download the installation directory
uses: actions/download-artifact@v4
with:
name: fuzzing-install-dir
- name: Unpack the installation directory
run: 7z x -o$HOME install.7z
- name: initdb
run: |
# Have to do this before initializing the corpus, or initdb will complain.
set -xeu
export PGDATA=db
export PGPORT=5432
export PGDATABASE=postgres
export PATH=$HOME/$PG_INSTALL_DIR/bin:$PATH
initdb
echo "shared_preload_libraries = 'timescaledb'" >> $PGDATA/postgresql.conf
- name: Set configuration
id: config
run: |
set -x
echo "cache_prefix=${{ format('libfuzzer-corpus-2-{0}-{1}', matrix.case.algo, matrix.case.pgtype) }}" >> $GITHUB_OUTPUT
echo "name=${{ matrix.case.algo }} ${{ matrix.case.pgtype }} ${{ matrix.case.bulk && 'bulk' || 'rowbyrow' }}" >> $GITHUB_OUTPUT
- name: Restore the cached fuzzing corpus (bulk)
id: restore-corpus-cache-bulk
uses: actions/cache/restore@v4
with:
path: db/corpus-bulk
key: "${{ steps.config.outputs.cache_prefix }}-bulk"
# We save the row-by-row corpus separately from the bulk corpus, so that
# they don't overwrite each other. Now we are going to combine them.
- name: Restore the cached fuzzing corpus (rowbyrow)
id: restore-corpus-cache-rowbyrow
uses: actions/cache/restore@v4
with:
path: db/corpus-rowbyrow
key: "${{ steps.config.outputs.cache_prefix }}-rowbyrow"
- name: Initialize the fuzzing corpus
run: |
# Combine the cached corpus from rowbyrow and bulk fuzzing, and from repository.
mkdir -p db/corpus{,-rowbyrow,-bulk}
find "tsl/test/fuzzing/compression/${{ matrix.case.algo }}-${{ matrix.case.pgtype }}" -type f -exec cp -n -t db/corpus {} +
find "db/corpus-rowbyrow" -type f -exec cp -n -t db/corpus {} +
find "db/corpus-bulk" -type f -exec cp -n -t db/corpus {} +
ls db/corpus | wc -l
- name: Run libfuzzer for compression
run: |
set -xeu
export PGDATA=db
export PGPORT=5432
export PGDATABASE=postgres
export PATH=$HOME/$PG_INSTALL_DIR/bin:$PATH
pg_ctl -l postgres.log start
psql -c "create extension timescaledb;"
# Create the fuzzing functions
export MODULE_NAME=$(basename $(find $HOME/$PG_INSTALL_DIR -name "timescaledb-tsl-*.so"))
psql -a -c "create or replace function fuzz(algo cstring, pgtype regtype,
bulk bool, runs int)
returns int as '"$MODULE_NAME"', 'ts_fuzz_compression' language c;
create or replace function ts_read_compressed_data_directory(algo cstring,
pgtype regtype, path cstring, bulk bool)
returns table(path text, bytes int, rows int, sqlstate text, location text)
as '"$MODULE_NAME"', 'ts_read_compressed_data_directory' language c;
"
# Start more fuzzing processes in the background. We won't even monitor
# their progress, because the server will panic if they find an error.
for x in {2..$(nproc)}
do
psql -v ON_ERROR_STOP=1 -c "select fuzz('${{ matrix.case.algo }}',
'${{ matrix.case.pgtype }}', '${{ matrix.case.bulk }}', ${{ matrix.case.runs }});" &
done
# Start the one fuzzing process that we will monitor, in foreground.
# The LLVM fuzzing driver calls exit(), so we expect to lose the connection.
ret=0
psql -v ON_ERROR_STOP=1 -c "select fuzz('${{ matrix.case.algo }}',
'${{ matrix.case.pgtype }}', '${{ matrix.case.bulk }}', ${{ matrix.case.runs }});" || ret=$?
if ! [ $ret -eq 2 ]
then
>&2 echo "Unexpected psql exit code $ret"
exit 1
fi
ls db/corpus | wc -l
fn="ts_read_compressed_data_directory('${{ matrix.case.algo }}',
'${{ matrix.case.pgtype }}', 'corpus', '${{ matrix.case.bulk }}')"
# Show the statistics about fuzzing corpus
psql -c "select count(*), location, min(sqlstate), min(path)
from $fn
group by location order by count(*) desc
"
# Save interesting cases because the caches are not available for download from UI
mkdir -p interesting
psql -qtAX -c "select distinct on (location) 'db/' || path from $fn
order by location, bytes, path
" | xargs cp -t interesting
# Check that we don't have any internal errors
errors=$(psql -qtAX --set=ON_ERROR_STOP=1 -c "select count(*)
from $fn
where sqlstate = 'XX000'")
echo "Internal program errors: $errors"
[ $errors -eq 0 ] || exit 1
# Shouldn't have any WARNINGS in the log.
! grep -F "] WARNING: " postgres.log
# Check that the server is still alive.
psql -c "select 1"
- name: Collect the logs
if: always()
id: collectlogs
run: |
# wait in case there are in-progress coredumps
sleep 10
if coredumpctl -q list >/dev/null; then echo "coredumps=true" >>$GITHUB_OUTPUT; fi
# print OOM killer information
sudo journalctl --system -q --facility=kern --grep "Killed process" || true
- name: Save PostgreSQL log
if: always()
uses: actions/upload-artifact@v4
with:
name: PostgreSQL log for ${{ steps.config.outputs.name }}
path: postgres.log
- name: Save fuzzer-generated crash cases
if: always()
uses: actions/upload-artifact@v4
with:
name: Crash cases for ${{ steps.config.outputs.name }}
path: db/crash-*
if-no-files-found: ignore
- name: Save interesting cases
if: always()
uses: actions/upload-artifact@v4
with:
name: Interesting cases for ${{ steps.config.outputs.name }}
path: interesting/
# We use separate restore/save actions, because the default action won't
# save the updated folder after the cache hit. We also want to save the
# cache after fuzzing errors, and the default action doesn't save after
# errors.
# We can't overwrite the existing cache, so we add a unique suffix. The
# cache is matched by key prefix, not exact key, and picks the newest
# matching item, so this works.
# The caches for rowbyrow and bulk fuzzing are saved separately, otherwise
# the slower job would always overwrite the cache from the faster one. We
# want to combine corpuses from bulk and rowbyrow fuzzing for better
# coverage.
# Note that the cache action cannot be restored on a path different from the
# one it was saved from. To make our lives more interesting, it is not
# directly documented anywhere, but we can deduce it from path influencing
# the version.
- name: Change corpus path to please the 'actions/cache' GitHub Action
if: always()
run: |
rm -rf db/corpus-{bulk,rowbyrow} ||:
mv -fT db/corpus{,-${{ matrix.case.bulk && 'bulk' || 'rowbyrow' }}}
- name: Save fuzzer corpus
if: always()
uses: actions/cache/save@v4
with:
path: db/corpus-${{ matrix.case.bulk && 'bulk' || 'rowbyrow' }}
key: "${{ format('{0}-{1}-{2}-{3}',
steps.config.outputs.cache_prefix,
matrix.case.bulk && 'bulk' || 'rowbyrow',
github.run_id, github.run_attempt) }}"
- name: Stack trace
if: always() && steps.collectlogs.outputs.coredumps == 'true'
run: |
sudo coredumpctl gdb <<<"
set verbose on
set trace-commands on
show debug-file-directory
printf "'"'"query = '%s'\n\n"'"'", debug_query_string
frame function ExceptionalCondition
printf "'"'"condition = '%s'\n"'"'", conditionName
up 1
l
info args
info locals
bt full
" 2>&1 | tee stacktrace.log
./scripts/bundle_coredumps.sh
grep -C40 "was terminated by signal" postgres.log > postgres-failure.log ||:
exit 1 # Fail the job if we have core dumps.
- name: Upload core dumps
if: always() && steps.collectlogs.outputs.coredumps == 'true'
uses: actions/upload-artifact@v4
with:
name: Coredumps for ${{ steps.config.outputs.name }}
path: coredumps