Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Synthetic data models for benchmarking #111

Merged
merged 6 commits into from
Sep 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions bin/perf-benchmark-combinations.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#!/usr/bin/env bash

# This iterates over all combinations of synthetic data+model params and
# benchmarks each one.
# NB: This can take a while, a few days at worst

# Check if at least one argument is provided
if [ $# -lt 1 ]; then
echo "Usage: $0 <jar-file> [extra-params...]"
exit 1
fi

# Extract the JAR file name from the first argument
jar_file="$1"
shift # Remove the first argument so that "$@" contains only the extra params

# Function to handle SIGINT (Ctrl+C)
cleanup() {
set +x
echo "Script interrupted. Exiting..."
exit 1
}

# Trap SIGINT signal and call cleanup function
trap cleanup SIGINT

# Enable command echoing
set -x

# Params to iterate over
num_rows_array=(10 50 200 500 1000 10000)
num_columns_array=(5 10 20 50 100 500)
num_views_array=(5 10 20 50 100 200)
num_clusters_per_view_array=(5 10 20 50 100 200)

for num_rows in "${num_rows_array[@]}"; do
for num_columns in "${num_columns_array[@]}"; do
for num_views in "${num_views_array[@]}"; do
for num_clusters_per_view in "${num_clusters_per_view_array[@]}"; do
# Benchmark, but use "|| true" to continue on failure
java -jar "$jar_file" \
--synthetic \
--num-rows "$num_rows" \
--num-columns "$num_columns" \
--num-views "$num_views" \
--num-clusters-per-view "$num_clusters_per_view" \
--no-overwrite \
"$@" || true
done
done
done
done

15 changes: 15 additions & 0 deletions build.clj
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,21 @@
(defn clean [_]
(build/delete {:path "target"}))

(defn uber-perf [_]
(clean nil)
(let [uber-file (format "target/%s-perf-%s-standalone.jar" (name lib) version)
perf-basis (build/create-basis {:project "deps.edn"
:aliases [:perf]})]
(build/copy-dir {:src-dirs ["src" "perf" "resources"]
:target-dir class-dir})
(build/compile-clj {:basis perf-basis
:ns-compile '[gensql.query.perf.main]
:class-dir class-dir})
(build/uber {:class-dir class-dir
:uber-file uber-file
:basis perf-basis
:main 'gensql.query.perf.main})))

(defn uber [_]
(clean nil)
(build/copy-dir {:src-dirs ["src" "resources"]
Expand Down
4 changes: 3 additions & 1 deletion deps.edn
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@
"--compile" "gensql.query.js"]}
:perf {:extra-paths ["perf"]
:extra-deps {com.clojure-goes-fast/clj-async-profiler {:mvn/version "1.2.0"}
criterium/criterium {:mvn/version "0.4.6"}}
criterium/criterium {:mvn/version "0.4.6"}
babashka/fs {:mvn/version "0.5.21"}
org.clojure/data.json {:mvn/version "2.5.0"}}
:jvm-opts ["-Djdk.attach.allowAttachSelf"]}

:coverage
Expand Down
Loading
Loading