-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: Add basic benchmarking and profiling
Adds 5 default queries Benchmarks with criterium Profiles with clj-async-profiler
- Loading branch information
Showing
2 changed files
with
146 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
(ns inferenceql.query.perf | ||
(:require [clj-async-profiler.core :as prof] | ||
[clojure.java.browse :as browse] | ||
[clojure.string :as str] | ||
[criterium.core :as crit] | ||
[inferenceql.query.strict :as strict] | ||
[medley.core :as medley])) | ||
|
||
(def ^:dynamic *default-limit* 10000) | ||
|
||
(defn- str-listify | ||
[x] | ||
(if (string? x) x (str/join ", " x))) | ||
|
||
(defn conditioned-prob-density | ||
[{:keys [model table prob-density-evt conditioned-density-evt]}] | ||
(str "SELECT PROBABILITY DENSITY OF " prob-density-evt | ||
" UNDER " model | ||
" CONDITIONED BY " conditioned-density-evt | ||
" FROM " table)) | ||
|
||
(defn generate | ||
[{:keys [col-var-list model limit] | ||
:or {limit *default-limit*}}] | ||
(str "SELECT * FROM" | ||
" GENERATE " (str-listify col-var-list) | ||
" UNDER " model | ||
" LIMIT " limit)) | ||
|
||
(defn generate-conditioned-by | ||
[{:keys [col-var-list model conditioned-density-evt limit] | ||
:or {limit *default-limit*}}] | ||
(str "SELECT * FROM" | ||
" GENERATE " (str-listify col-var-list) | ||
" UNDER " model | ||
" CONDITIONED BY " conditioned-density-evt | ||
" LIMIT " limit)) | ||
|
||
(defn generative-join | ||
[{:keys [model table gen-join-density-evt]}] | ||
(str table " GENERATIVE JOIN " model | ||
" CONDITIONED BY " gen-join-density-evt)) | ||
|
||
(defn approximate-mutual-info | ||
[{:keys [model categorical-col-1 categorical-col-2]}] | ||
;; NB: Why did we choose approximate MI between two column variables? (2024-3-2) | ||
;; | ||
;; Ulli: MI between two columns is most generally applicable (unlike MI that | ||
;; involves a binary event, where we first have to define an event). We’ll | ||
;; eventually replace the underlying inference machinery to deal with mutual | ||
;; information between two categorical column variables with a deterministic | ||
;; computation. Currently, the Monte Carlo approximation is the only way to | ||
;; do that. | ||
(str "SELECT APPROXIMATE MUTUAL INFORMATION OF VAR " categorical-col-1 | ||
" WITH VAR " categorical-col-2 | ||
" UNDER " model | ||
" FROM (dummy) VALUES (0)")) | ||
|
||
(def default-query-fns {:conditioned-prob-density conditioned-prob-density | ||
:generate generate | ||
:generate-conditioned-by generate-conditioned-by | ||
:generative-join generative-join | ||
:approximate-mutual-info approximate-mutual-info}) | ||
|
||
(defn default-queries | ||
"Returns a map of the default perf queries, reusing the arguments | ||
between queries. | ||
Parameters | ||
- model: the name of the model | ||
- table: the name of the table | ||
- col-var-list: a list of column variables - either a string or a coll of strings | ||
- prob-density-evt: a probability density event (e.g., \"VAR foo = true\") | ||
- conditioned-density-evt: a density event to condition by | ||
- gen-join-density-evt: a generative join density event (e.g., \"VAR foo = foo\") | ||
- categorical-col-1: the name of the first categorical column for MI | ||
- categorical-col-2: the name of the second categorical column for MI | ||
Example usage: | ||
(default-queries {:model \"mod\" | ||
:table \"tbl\" | ||
:col-var-list [\"VAR Anticipated_Lifetime\" \"VAR Period_minutes\"] | ||
:conditioned-density-evt \"VAR Power_watts = 1000\" | ||
:categorical-col-1 \"Class_of_Orbit\" | ||
:categorical-col-2 \"Launch_Site\" | ||
:gen-join-density-evt \"VAR Purpose = Purpose\" | ||
:prob-density-evt \"VAR Launch_Mass_kg = 2000\"})" | ||
[arg-m] | ||
(update-vals default-query-fns (fn [f] (f arg-m)))) | ||
|
||
(defn benchmark | ||
"Benchmarks the query(ies). Prints out the summary results. Returns the | ||
Criterium results as a map. | ||
Parameters | ||
- db: the database to run the queries on | ||
- queries: Either (1) a map of queries to run, keys are names, vals are IQL | ||
strict queries, or (2) a single IQL strict query in a string" | ||
([db queries] | ||
(benchmark db queries {})) | ||
([db queries opts] | ||
(let [queries (if (string? queries) {:query queries} queries) | ||
bmark (fn bmark | ||
[query] | ||
(println "\nBenchmarking query:" query) | ||
(crit/benchmark | ||
;; doall forces all lazy results to be realized during benchmarking | ||
(doall (strict/q query db)) | ||
{})) | ||
bmark-results (update-vals queries bmark)] | ||
(medley/map-kv (fn [query-k bmark-result] | ||
(println ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>") | ||
(println (str "Results for \"" (query-k queries) "\":")) | ||
(crit/report-result bmark-result)) | ||
bmark-results) | ||
bmark-results))) | ||
|
||
(defn profile | ||
"Starts clj-async-profiler, then runs the query(ies) with the given | ||
arguments. Open up a browser window to see the flame graphs. | ||
Parameters | ||
- db: the database to run the queries on | ||
- queries: Either (1) a map of queries to run, keys are names, vals are IQL | ||
strict queries, or (2) a single IQL strict query in a string | ||
- opts - a map of options | ||
opts map keys: | ||
- num-iterations: the number of iterations to run the queries (default: 100)" | ||
([db queries] | ||
(profile db queries {:num-iterations 100 | ||
:open-browser true})) | ||
([db queries {:keys [num-iterations open-browser]}] | ||
(let [queries (if (string? queries) {:query queries} queries)] | ||
(prof/profile (medley/map-vals (fn [query] | ||
(println "\nProfiling query:" query) | ||
(dotimes [_ num-iterations] | ||
(doall (strict/q query db)))) | ||
queries)) | ||
(prof/serve-ui 8080) | ||
(when open-browser | ||
(browse/browse-url "http://localhost:8080"))))) |