Skip to content

Commit

Permalink
feat: Add basic benchmarking and profiling
Browse files Browse the repository at this point in the history
Adds 5 default queries
Benchmarks with criterium
Profiles with clj-async-profiler
  • Loading branch information
KingMob committed Mar 18, 2024
1 parent c154123 commit f1ca894
Show file tree
Hide file tree
Showing 2 changed files with 146 additions and 4 deletions.
8 changes: 4 additions & 4 deletions deps.edn
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
:js-build {:main-opts ["-m" "cljs.main"
"-O" "advanced"
"-c" "inferenceql.query.js"]}
:benchmark {:extra-paths ["benchmark"]
:extra-deps {com.clojure-goes-fast/clj-async-profiler {:mvn/version "1.2.0"}
criterium/criterium {:mvn/version "0.4.6"}}
:jvm-opts ["-Djdk.attach.allowAttachSelf"]}}}
:perf {:extra-paths ["perf"]
:extra-deps {com.clojure-goes-fast/clj-async-profiler {:mvn/version "1.2.0"}
criterium/criterium {:mvn/version "0.4.6"}}
:jvm-opts ["-Djdk.attach.allowAttachSelf"]}}}
142 changes: 142 additions & 0 deletions perf/inferenceql/query/perf.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
(ns inferenceql.query.perf
(:require [clj-async-profiler.core :as prof]
[clojure.java.browse :as browse]
[clojure.string :as str]
[criterium.core :as crit]
[inferenceql.query.strict :as strict]
[medley.core :as medley]))

(def ^:dynamic *default-limit* 10000)

(defn- str-listify
[x]
(if (string? x) x (str/join ", " x)))

(defn conditioned-prob-density
[{:keys [model table prob-density-evt conditioned-density-evt]}]
(str "SELECT PROBABILITY DENSITY OF " prob-density-evt
" UNDER " model
" CONDITIONED BY " conditioned-density-evt
" FROM " table))

(defn generate
[{:keys [col-var-list model limit]
:or {limit *default-limit*}}]
(str "SELECT * FROM"
" GENERATE " (str-listify col-var-list)
" UNDER " model
" LIMIT " limit))

(defn generate-conditioned-by
[{:keys [col-var-list model conditioned-density-evt limit]
:or {limit *default-limit*}}]
(str "SELECT * FROM"
" GENERATE " (str-listify col-var-list)
" UNDER " model
" CONDITIONED BY " conditioned-density-evt
" LIMIT " limit))

(defn generative-join
[{:keys [model table gen-join-density-evt]}]
(str table " GENERATIVE JOIN " model
" CONDITIONED BY " gen-join-density-evt))

(defn approximate-mutual-info
[{:keys [model categorical-col-1 categorical-col-2]}]
;; NB: Why did we choose approximate MI between two column variables? (2024-3-2)
;;
;; Ulli: MI between two columns is most generally applicable (unlike MI that
;; involves a binary event, where we first have to define an event). We’ll
;; eventually replace the underlying inference machinery to deal with mutual
;; information between two categorical column variables with a deterministic
;; computation. Currently, the Monte Carlo approximation is the only way to
;; do that.
(str "SELECT APPROXIMATE MUTUAL INFORMATION OF VAR " categorical-col-1
" WITH VAR " categorical-col-2
" UNDER " model
" FROM (dummy) VALUES (0)"))

(def default-query-fns {:conditioned-prob-density conditioned-prob-density
:generate generate
:generate-conditioned-by generate-conditioned-by
:generative-join generative-join
:approximate-mutual-info approximate-mutual-info})

(defn default-queries
"Returns a map of the default perf queries, reusing the arguments
between queries.
Parameters
- model: the name of the model
- table: the name of the table
- col-var-list: a list of column variables - either a string or a coll of strings
- prob-density-evt: a probability density event (e.g., \"VAR foo = true\")
- conditioned-density-evt: a density event to condition by
- gen-join-density-evt: a generative join density event (e.g., \"VAR foo = foo\")
- categorical-col-1: the name of the first categorical column for MI
- categorical-col-2: the name of the second categorical column for MI
Example usage:
(default-queries {:model \"mod\"
:table \"tbl\"
:col-var-list [\"VAR Anticipated_Lifetime\" \"VAR Period_minutes\"]
:conditioned-density-evt \"VAR Power_watts = 1000\"
:categorical-col-1 \"Class_of_Orbit\"
:categorical-col-2 \"Launch_Site\"
:gen-join-density-evt \"VAR Purpose = Purpose\"
:prob-density-evt \"VAR Launch_Mass_kg = 2000\"})"
[arg-m]
(update-vals default-query-fns (fn [f] (f arg-m))))

(defn benchmark
"Benchmarks the query(ies). Prints out the summary results. Returns the
Criterium results as a map.
Parameters
- db: the database to run the queries on
- queries: Either (1) a map of queries to run, keys are names, vals are IQL
strict queries, or (2) a single IQL strict query in a string"
([db queries]
(benchmark db queries {}))
([db queries opts]
(let [queries (if (string? queries) {:query queries} queries)
bmark (fn bmark
[query]
(println "\nBenchmarking query:" query)
(crit/benchmark
;; doall forces all lazy results to be realized during benchmarking
(doall (strict/q query db))
{}))
bmark-results (update-vals queries bmark)]
(medley/map-kv (fn [query-k bmark-result]
(println ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
(println (str "Results for \"" (query-k queries) "\":"))
(crit/report-result bmark-result))
bmark-results)
bmark-results)))

(defn profile
"Starts clj-async-profiler, then runs the query(ies) with the given
arguments. Open up a browser window to see the flame graphs.
Parameters
- db: the database to run the queries on
- queries: Either (1) a map of queries to run, keys are names, vals are IQL
strict queries, or (2) a single IQL strict query in a string
- opts - a map of options
opts map keys:
- num-iterations: the number of iterations to run the queries (default: 100)"
([db queries]
(profile db queries {:num-iterations 100
:open-browser true}))
([db queries {:keys [num-iterations open-browser]}]
(let [queries (if (string? queries) {:query queries} queries)]
(prof/profile (medley/map-vals (fn [query]
(println "\nProfiling query:" query)
(dotimes [_ num-iterations]
(doall (strict/q query db))))
queries))
(prof/serve-ui 8080)
(when open-browser
(browse/browse-url "http://localhost:8080")))))

0 comments on commit f1ca894

Please sign in to comment.