Skip to content

Commit

Permalink
feat: Save CLI benchmark results to files
Browse files Browse the repository at this point in the history
  • Loading branch information
KingMob committed Sep 3, 2024
1 parent bf8ec3f commit 195f34e
Show file tree
Hide file tree
Showing 5 changed files with 221 additions and 24 deletions.
53 changes: 53 additions & 0 deletions bin/perf-benchmark-combinations.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#!/usr/bin/env bash

# This iterates over all combinations of synthetic data+model params and
# benchmarks each one.
# NB: This can take a while, a few days at worst

# Check if at least one argument is provided
if [ $# -lt 1 ]; then
echo "Usage: $0 <jar-file> [extra-params...]"
exit 1
fi

# Extract the JAR file name from the first argument
jar_file="$1"
shift # Remove the first argument so that "$@" contains only the extra params

# Function to handle SIGINT (Ctrl+C)
cleanup() {
set +x
echo "Script interrupted. Exiting..."
exit 1
}

# Trap SIGINT signal and call cleanup function
trap cleanup SIGINT

# Enable command echoing
set -x

# Params to iterate over
num_rows_array=(10 50 200 500 1000 10000)
num_columns_array=(5 10 20 50 100 500)
num_views_array=(5 10 20 50 100 200)
num_clusters_per_view_array=(5 10 20 50 100 200)

for num_rows in "${num_rows_array[@]}"; do
for num_columns in "${num_columns_array[@]}"; do
for num_views in "${num_views_array[@]}"; do
for num_clusters_per_view in "${num_clusters_per_view_array[@]}"; do
# Benchmark, but use "|| true" to continue on failure
java -jar "$jar_file" \
--synthetic \
--num-rows "$num_rows" \
--num-columns "$num_columns" \
--num-views "$num_views" \
--num-clusters-per-view "$num_clusters_per_view" \
--no-overwrite \
"$@" || true
done
done
done
done

72 changes: 63 additions & 9 deletions perf/gensql/query/perf.clj
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
[clojure.string :as str]
[criterium.core :as crit]
[gensql.query.perf.synthetic :as synthetic]
[gensql.query.perf.util :as util]
[gensql.query.strict :as strict]
[medley.core :as medley]))

Expand Down Expand Up @@ -94,6 +95,62 @@
[arg-m]
(update-vals default-query-fns (fn [f] (f arg-m))))

(defn time+
"Uses a variant of time+ from https://clojure-goes-fast.com/kb/benchmarking/time-plus/.
Parameters
- db: the database to run the queries on
- queries: Either (1) a map of queries to run, keys are names, vals are GenSQL
strict queries, or (2) a single GenSQL strict query in a string
- opts - a map of options
opts map keys:
- warmup-iterations: the number of iterations to run to warmup queries (default: 5)
- duration: the duration in ms to aim for (default: 10000)
- print? - whether to print out info (default: true)
- return-results? - whether to return the results of the benchmarked fn - can lead to OOM errors if true (default: false)
Within the time limit, runs as many iterations as it can, then reports the
mean time taken, the mean bytes allocated, the number of iterations, and the
total time taken.
NB: This is preferable when criterium's warmup period is unstable. Criterium
attempts to wait for the Hotspot compiler to stabilize during warmup. Part of
the criteria is two executions of the function being benchmarked with no
changes in the reported JIT status. Unfortunately, not much JIT info is
available at runtime to programs, so it's hard to know if JIT changes are
relevant. Even worse, for very long run times (e.g., 30s+), the odds of a JIT
change increase. This can result in extremely long warmup periods, on the
order of 20+ minutes in some cases."
([db queries]
(time+ db queries {}))
([db queries {:keys [duration warmup-iterations print? return-results?]
:or {duration 10000
warmup-iterations 5
print? true
return-results? false}
:as opts}]
(let [queries (if (string? queries) {:query queries} queries)
time-fn (fn time-fn
[query]
(when print?
(println "\nTiming query:" query)
(println "Duration goal:" duration "ms")
(println "Running" warmup-iterations "warmup iterations"))

(dotimes [_ warmup-iterations]
(dorun (strict/q query db)))

(when print? (println "Timing main iterations"))

(util/time+ duration
;; dorun/doall forces all lazy results to be realized during timing
(if return-results?
(doall (strict/q query db))
(dorun (strict/q query db)))))]

(update-vals queries time-fn))))

(defn benchmark
"Benchmarks the query(ies). Prints out the summary results. Returns the
Criterium results as a map.
Expand All @@ -106,8 +163,8 @@
Options map
- quick? - whether to use quick-benchmark or benchmark (default: true)
- return-results? - whether to return all results - can lead to OOM errors if true (default: false)
- print? - whether to print out results (default: true)"
- return-results? - whether to return the results of the benchmarked fn - can lead to OOM errors if true (default: false)
- print? - whether to print out benchmark info (default: true)"
([db queries]
(benchmark db queries {}))
([db queries {:keys [return-results? print? quick?]
Expand All @@ -122,16 +179,13 @@
(when print?
(if quick?
(println "\nQuick-benchmarking query:" query)
(println "\nBenchmarking query:" query))
(when-not return-results?
(println "Not returning results.")))
(println "\nBenchmarking query:" query)))
(criterium-bench
;; doall forces all lazy results to be realized during benchmarking
(fn []
(let [results (doall (strict/q query db))]
(if return-results?
(doall (strict/q query db))
(dorun (strict/q query db)))))
(if return-results?
(doall (strict/q query db))
(dorun (strict/q query db))))
{:warmup-jit-period 0}))
bmark-results (update-vals queries bmark)]
(dorun (medley/map-kv (fn [query-k bmark-result]
Expand Down
80 changes: 65 additions & 15 deletions perf/gensql/query/perf/main.clj
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
(ns gensql.query.perf.main
(:require [clojure.string :as string]
(:require [babashka.fs :as fs]
[clojure.string :as string]
[clojure.tools.cli :as cli]
[gensql.query.cli :as query.cli]
[gensql.query.db :as db]
Expand All @@ -10,7 +11,7 @@
(:gen-class))

(def langs #{"permissive" "strict"})
(def analysis-types #{"quick-benchmark" "benchmark" "profile"})
(def analysis-types #{"quick-benchmark" "benchmark" "profile" "time"})

(def cli-options
[["-t" "--table NAME=PATH" "table CSV name and path"
Expand All @@ -33,12 +34,19 @@
:validate [langs (str "Must be one of: " (string/join ", " langs))]]
["-e" "--eval STRING" "evaluate query in STRING"
:id :query]
[nil "--type TYPE" "performance analysis type (quick-benchmark, benchmark, or profile)"
[nil "--type TYPE" "performance analysis type (quick-benchmark, benchmark, time, or profile)"
:default "quick-benchmark"
:validate [analysis-types (str "Must be one of: " (string/join ", " analysis-types))]]
[nil "--target-duration DURATION" "target total time in ms spent benchmarking for type time"
:default 10000
:parse-fn parse-long]
[nil "--dry-run" "Will set up, load, and run each query once, but will not benchmark/profile"
:id :dry-run?
:default false]
["-o" "--output-file FILE" "File to write results to."]
[nil "--[no-]overwrite" "Overwrite an existing output file"
:id :overwrite?
:default true]
["-h" "--help"]

;; Synthetic options
Expand Down Expand Up @@ -73,14 +81,17 @@
:default 4
:parse-fn parse-double]
[nil "--categorical-alpha CATEGORICAL-ALPHA" "The alpha CRP concentration parameter for categorical columns in the model."
:default 0
:parse-fn parse-double]
:default 0.01
:parse-fn parse-double
:validate [pos? "Categorical alpha must be > 0"]]
[nil "--local-alpha LOCAL-ALPHA" "The alpha CRP concentration parameter for clustering rows in the model."
:default 0
:parse-fn parse-double]
:default 0.01
:parse-fn parse-double
:validate [pos? "Local alphas must be > 0"]]
[nil "--global-alpha GLOBAL-ALPHA" "The alpha CRP concentration parameter for column grouping in the model."
:default 0
:parse-fn parse-double]])
:default 0.01
:parse-fn parse-double
:validate [pos? "Global alpha must be > 0"]]])

(defn load-db
"Read in data and models from local CSV/EDN files."
Expand Down Expand Up @@ -109,12 +120,43 @@
(reduce-kv db/with-model % models))]
db))

(defn ^:private synthetic-options-str
[{:keys [num-rows num-columns p-categorical num-views
num-clusters-per-view m r s nu categorical-alpha
local-alpha global-alpha type]}]
(str "Perf run benchmark-type=" type
" num-rows=" num-rows
" num-columns=" num-columns
" num-views=" num-views
" num-clusters-per-view=" num-clusters-per-view
" p-categorical=" p-categorical
" m=" m
" r=" r
" s=" s
" nu=" nu
" categorical-alpha=" categorical-alpha
" local-alpha=" local-alpha
" global-alpha=" global-alpha))

(defn ^:private save-results
[results output-file options]
(if (and (seqable? results)
(seq results))
(let [results (if (map? results)
(assoc results :cli-options options)
results)]
(spit (or output-file
(str (synthetic-options-str options) ".edn"))
(pr-str results)))
(println "No results returned. Nothing to save.")))

(defn -main
[& args]
(let [start (System/currentTimeMillis)
{:keys [options errors summary]} (cli/parse-opts args cli-options)
{:keys [db help synthetic? models tables type query dry-run?]} options]
{:keys [db help synthetic? models tables type query dry-run? output-file overwrite? target-duration]} options
output-file (or output-file
(and synthetic? (str (synthetic-options-str options) ".edn")))]

(cond (seq errors)
(doseq [error errors]
Expand All @@ -123,11 +165,14 @@
help
(println summary)

(and synthetic? (or models tables))
(and synthetic? (or (seq models) (seq tables)))
(do
(query.log/errorln "Cannot currently combine synthetic and real tables/models.")
(println summary))

(and output-file (not overwrite?) (fs/exists? output-file))
(query.log/errorln (str "Cannot overwrite '" output-file "'. File already exists and --no-overwrite was set."))

:else
(let [perf-fn (if dry-run?
(fn [db queries]
Expand All @@ -138,17 +183,22 @@
(case type
"quick-benchmark" #(perf/benchmark %1 %2 {:quick? true})
"benchmark" #(perf/benchmark %1 %2 {:quick? false})
"time" #(perf/time+ %1 %2 {:duration target-duration})
"profile" #(perf/profile %1 %2)))]
(if synthetic?
(let [test-suite-opts (select-keys options
[:num-rows :num-columns :p-categorical :num-views
:num-clusters-per-view :m :r :s :nu :categorical-alpha
:local-alpha :global-alpha])
_ (println "Synthetic suite options:" (pr-str test-suite-opts))
{:keys [db queries]} (perf/synthetic-test-suite test-suite-opts)]
(perf-fn db queries))
{:keys [db queries]} (perf/synthetic-test-suite test-suite-opts)
results (perf-fn db queries)]
(save-results results output-file options))

(let [db (load-db models tables db)
queries {:query query}]
(perf-fn db queries)))))
queries {:query query}
results (perf-fn db queries)]
(when output-file
(save-results results output-file options))))))

(println (str "\nTotal execution time: " (/ (- (System/currentTimeMillis) start) 1000.0) " seconds."))))
2 changes: 2 additions & 0 deletions perf/gensql/query/perf/synthetic.clj
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,8 @@
local-alpha
global-alpha]
:as opts}]
(assert (<= num-clusters-per-view num-rows))
(assert (<= num-views num-columns))
(let [opts (merge {:categorical-alpha *default-categorical-alpha*
:local-alpha *default-local-alpha*
:global-alpha *default-global-alpha*
Expand Down
38 changes: 38 additions & 0 deletions perf/gensql/query/perf/util.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
(ns gensql.query.perf.util)

;; time+ adapted from the version at https://clojure-goes-fast.com/kb/benchmarking/time-plus/
(let [time*
(fn time* [^long duration-in-ms f]
(let [^com.sun.management.ThreadMXBean bean (java.lang.management.ManagementFactory/getThreadMXBean)
bytes-before (.getCurrentThreadAllocatedBytes bean)
duration (* duration-in-ms 1000000)
start (System/nanoTime)
first-res (f)
delta (- (System/nanoTime) start)
deadline (+ start duration)
tight-iters (max (quot (quot duration delta) 10) 1)]
(loop [i 1]
(let [now (System/nanoTime)]
(if (< now deadline)
(do (dotimes [_ tight-iters] (f))
(recur (+ i tight-iters)))
(let [i' (double i)
bytes-after (.getCurrentThreadAllocatedBytes bean)
total-run-time (- now start)
t (/ total-run-time i')]
{:first-result first-res
:time-per-call (/ t 1e9)
:total-time (/ total-run-time 1e9)
:bytes-alloc-per-call (/ (- bytes-after bytes-before) i')
:num-total-iterations i}))))))]

(defmacro time+
"Like `time`, but runs the supplied body for the duration in ms and returns:
- the mean time in s
- the mean bytes allocated
- the total number of iterations run
- the result of the first call, if any
Total time in milliseconds must be provided as the first argument."
[duration-in-ms & body]
`(~time* ~duration-in-ms (fn [] ~@body))))

0 comments on commit 195f34e

Please sign in to comment.