From f6eb6e6fc834a9e17b4e031f1a52ccc69bd931b2 Mon Sep 17 00:00:00 2001 From: Matthew Davidson Date: Thu, 11 Apr 2024 22:56:48 +0700 Subject: [PATCH 1/4] feat: Add tap> logging support fns --- src/inferenceql/query/log.cljc | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/inferenceql/query/log.cljc b/src/inferenceql/query/log.cljc index 86678ffe..804f167f 100644 --- a/src/inferenceql/query/log.cljc +++ b/src/inferenceql/query/log.cljc @@ -7,3 +7,19 @@ [x] (tap> #:spy{:x x}) x) + +(defn log-tap + "A tap that prns the value to the console. Use `(add-tap log-tap)` to add it." + [x] + (prn x) + (println)) + +(defn clear-taps + "Clears all taps. + + This is primarily for removing anonymous tap fns. If you don't have a reference + to the tap fn, you can't remove it. But if you do, prefer `remove-tap`." + [] + ;; tapset is annoyingly private (and Clojure lacks this functionality), so we + ;; have to use intern to bypass that + (intern 'clojure.core 'tapset (atom #{}))) From 88022e7e6f9c2d300c39abbf3eccb09141a76b0b Mon Sep 17 00:00:00 2001 From: Matthew Davidson Date: Thu, 11 Apr 2024 18:18:05 +0700 Subject: [PATCH 2/4] test: Add CONDITIONED BY * EXCEPT tests --- test/inferenceql/query/plan_test.cljc | 20 ++++++++++++------- .../inferenceql/query/strict/parser_test.cljc | 8 +++++++- test/inferenceql/query/strict_test.cljc | 13 ++++++++++++ 3 files changed, 33 insertions(+), 8 deletions(-) diff --git a/test/inferenceql/query/plan_test.cljc b/test/inferenceql/query/plan_test.cljc index 772a348f..5b285be8 100644 --- a/test/inferenceql/query/plan_test.cljc +++ b/test/inferenceql/query/plan_test.cljc @@ -299,6 +299,9 @@ (plan/plan) (plan/plan?)) "table GENERATIVE JOIN model" + "table GENERATIVE JOIN model CONDITIONED BY *" + "table GENERATIVE JOIN model CONDITIONED BY * EXCEPT (VAR x)" + "table GENERATIVE JOIN model CONDITIONED BY * EXCEPT VAR x, VAR y" "table GENERATIVE JOIN model CONDITIONED BY VAR x = 0" "table GENERATIVE JOIN model CONSTRAINED BY VAR x > 0")) @@ -311,10 +314,13 @@ "table GENERATIVE JOIN model" "table GENERATIVE JOIN model GIVEN x")) -#?(:clj (deftest generative-join - (are [query tbl expected] (let [env {"table" tbl "model" model}] - (= expected (eval query env))) - "table GENERATIVE JOIN model CONDITIONED BY VAR x = x" [{"x" "yes"}] [{"x" "yes" "y" "yes"}] - "table GENERATIVE JOIN model CONDITIONED BY VAR x = x" [{"x" "no"}] [{"x" "no" "y" "no"}] - "table GENERATIVE JOIN model CONDITIONED BY *" [{"x" "yes"}] [{"x" "yes" "y" "yes"}] - "table GENERATIVE JOIN model CONDITIONED BY *" [{"x" "no"}] [{"x" "no" "y" "no"}]))) +#?(:clj + (deftest generative-join + (are [query tbl expected] (let [env {"table" tbl "model" model}] + (= expected (eval query env))) + "table GENERATIVE JOIN model CONDITIONED BY VAR x = x" [{"x" "yes"}] [{"x" "yes" "y" "yes"}] + "table GENERATIVE JOIN model CONDITIONED BY VAR x = x" [{"x" "no"}] [{"x" "no" "y" "no"}] + "table GENERATIVE JOIN model CONDITIONED BY *" [{"x" "yes"}] [{"x" "yes" "y" "yes"}] + "table GENERATIVE JOIN model CONDITIONED BY *" [{"x" "no"}] [{"x" "no" "y" "no"}] + "table GENERATIVE JOIN model CONDITIONED BY * EXCEPT VAR x" [{"y" "yes"}] [{"x" "yes" "y" "yes"}] + "table GENERATIVE JOIN model CONDITIONED BY * EXCEPT (VAR x)" [{"y" "yes"}] [{"x" "yes" "y" "yes"}]))) diff --git a/test/inferenceql/query/strict/parser_test.cljc b/test/inferenceql/query/strict/parser_test.cljc index 7054e4cf..29f9d46c 100644 --- a/test/inferenceql/query/strict/parser_test.cljc +++ b/test/inferenceql/query/strict/parser_test.cljc @@ -40,6 +40,9 @@ (are [s] (not (insta/failure? (parser/parse s))) "data GENERATIVE JOIN model" "data GENERATIVE JOIN model CONDITIONED BY VAR x = 0" + "data GENERATIVE JOIN model CONDITIONED BY *" + "data GENERATIVE JOIN model CONDITIONED BY * EXCEPT VAR x, VAR y" + "data GENERATIVE JOIN model CONDITIONED BY * EXCEPT (VAR x)" "data GENERATIVE JOIN model CONSTRAINED BY VAR x > 0")) (deftest generate-valid @@ -51,4 +54,7 @@ (deftest conditioned-by-valid (are [s] (not (insta/failure? (parser/parse s))) "SELECT * FROM (GENERATE * UNDER model CONDITIONED BY VAR x = x)" - "SELECT * FROM (GENERATE * UNDER model CONDITIONED BY *)")) + "SELECT * FROM (GENERATE * UNDER model CONDITIONED BY *)" + "SELECT * FROM (GENERATE * UNDER model CONDITIONED BY * EXCEPT (VAR x))" + "SELECT * FROM (GENERATE * UNDER model CONDITIONED BY * EXCEPT VAR x, VAR y)" + "SELECT * FROM (GENERATE * UNDER model CONDITIONED BY * EXCEPT VAR x, VAR \"foo.bar\", VAR z)")) diff --git a/test/inferenceql/query/strict_test.cljc b/test/inferenceql/query/strict_test.cljc index 16b0c1f0..be983de0 100644 --- a/test/inferenceql/query/strict_test.cljc +++ b/test/inferenceql/query/strict_test.cljc @@ -254,6 +254,9 @@ "y" {"yes" 0.0 "no" 1.0}}}]]}) q1 (comp first vals first #(q %1 %2 %3))] (is (= 0.5 (q1 "SELECT (PROBABILITY DENSITY OF VAR y = 'yes' UNDER model CONDITIONED BY VAR x = x) FROM data;" + (with-meta [{}] {:iql/columns ["x" "y"]}) + {"model" model}))) + (is (= 0.5 (q1 "SELECT (PROBABILITY DENSITY OF VAR y = 'yes' UNDER (model CONDITIONED BY * EXCEPT VAR y)) FROM data;" (with-meta [{}] {:iql/columns ["x" "y"]}) {"model" model}))))) @@ -498,5 +501,15 @@ (is (= 0.75 (q "SELECT PROBABILITY DENSITY OF VAR x = 'yes' UNDER model CONDITIONED BY VAR y = y FROM data" (with-meta [{}] {:iql/columns ["x" "y"]}))))) + + (testing "* except" + (is (= 0.75 (q "SELECT PROBABILITY DENSITY OF VAR x = 'yes' UNDER model CONDITIONED BY * EXCEPT VAR x FROM data" + (with-meta [{}] + {:iql/columns ["x" "y"]})))) + + (is (= 0.75 (q "SELECT PROBABILITY DENSITY OF VAR x = 'yes' UNDER model CONDITIONED BY * EXCEPT (VAR x) FROM data" + (with-meta [{}] + {:iql/columns ["x" "y"]}))))) + (testing "in with" (is (= 0.0 (q "WITH model CONDITIONED BY VAR y = 'no' AS model: SELECT PROBABILITY DENSITY OF VAR x = x UNDER model FROM data" [{"x" "yes"}])))))))) From dd8e1d1774bab302ae23e414f1fd12018282014c Mon Sep 17 00:00:00 2001 From: Matthew Davidson Date: Thu, 11 Apr 2024 22:56:48 +0700 Subject: [PATCH 3/4] feat: Add CONDITIONED BY * EXCEPT capability --- resources/inferenceql/query/base.bnf | 7 +- src/inferenceql/query/scalar.cljc | 146 +++++++++++++++++---------- 2 files changed, 95 insertions(+), 58 deletions(-) diff --git a/resources/inferenceql/query/base.bnf b/resources/inferenceql/query/base.bnf index 6cf3c787..2ece598f 100644 --- a/resources/inferenceql/query/base.bnf +++ b/resources/inferenceql/query/base.bnf @@ -80,8 +80,8 @@ select-list ::= select-star-clause / selection (ws? ',' ws? selection)* / aggregation (ws? ',' ws? aggregation)* -select-star-clause ::= star (ws? select-except-clause)? star ::= '*' +select-star-clause ::= star (ws? select-except-clause)? select-except-clause ::= #'(?i)EXCEPT' ws? '(' ws? identifier-list ws? ')' selection ::= (scalar-expr | aggregation) (ws alias-clause)? @@ -225,7 +225,10 @@ density-event-and ::= density-event-1 (ws #'(?i)AND' ws density-event-1)+ density-event-group ::= '(' ws? density-event ws? ')' -conditioned-by-expr ::= model-expr ws #'(?i)CONDITIONED' ws #'(?i)BY' ws ('*' | density-event) +conditioned-by-expr ::= model-expr ws #'(?i)CONDITIONED' ws #'(?i)BY' ws (conditioned-by-star-clause | density-event) + ::= star (ws? conditioned-by-except-clause)? +conditioned-by-except-clause ::= #'(?i)EXCEPT' (ws? <'('> ws? model-var-list ws? <')'> | ws model-var-list) + incorporate-expr ::= #'(?i)INCORPORATE' ws relation-expr ws #'(?i)INTO' ws model-expr diff --git a/src/inferenceql/query/scalar.cljc b/src/inferenceql/query/scalar.cljc index a3a85945..cc44cbcd 100644 --- a/src/inferenceql/query/scalar.cljc +++ b/src/inferenceql/query/scalar.cljc @@ -23,76 +23,97 @@ [node] (-> node tree/only-child-node (nth 1))) +(declare plan) + +(defn ^:private conditioned-by-plan* + "`plan` helper that generates plans for CONDITIONED BY nodes. + + NB: Exists because https://clojure.atlassian.net/browse/CLJ-1852 prevents us + from directly adding these rules into `plan`." + [node] + (match/match node + [:conditioned-by-expr model _conditioned _by [:star _]] + `(~'iql/condition-all ~(plan model)) + [:conditioned-by-expr model _conditioned _by [:star _] [:conditioned-by-except-clause & except-children]] + `(~'iql/condition-all-except ~(plan model) ~(plan (into [:conditioned-by-except-clause] except-children))) + [:conditioned-by-expr model _conditioned _by child] + `(~'iql/condition ~(plan model) ~(plan child)) + [:conditioned-by-except-clause _except model-var-list] + (plan model-var-list))) (defn plan "Given a parse tree/node, returns an execution plan." [node] - (match/match (into (empty node) - (remove tree/whitespace?) - node) - [:scalar-expr child] (plan child) - [:scalar-expr-group "(" child ")"] (plan child) + (let [ws-free-node (into (empty node) + (remove tree/whitespace?) + node)] + (match/match ws-free-node + [:scalar-expr child] (plan child) + [:scalar-expr-group "(" child ")"] (plan child) + + [:expr-not _not child] `(~'not ~(plan child)) + + [:expr-disjunction left _ right] `(~'or ~(plan left) ~(plan right)) + [:expr-conjunction left _ right] `(~'and ~(plan left) ~(plan right)) + [:expr-addition left _ right] `(~'+ ~(plan left) ~(plan right)) + [:expr-addition left _ right] `(~'+ ~(plan left) ~(plan right)) + [:expr-subtraction left _ right] `(~'- ~(plan left) ~(plan right)) + [:expr-multiplication left _ right] `(~'* ~(plan left) ~(plan right)) + [:expr-division left _ right] `(~'/ ~(plan left) ~(plan right)) - [:expr-not _not child] `(~'not ~(plan child)) + [:expr-function-call-log _log child _] `(~'log ~(plan child)) - [:expr-disjunction left _ right] `(~'or ~(plan left) ~(plan right)) - [:expr-conjunction left _ right] `(~'and ~(plan left) ~(plan right)) - [:expr-addition left _ right] `(~'+ ~(plan left) ~(plan right)) - [:expr-addition left _ right] `(~'+ ~(plan left) ~(plan right)) - [:expr-subtraction left _ right] `(~'- ~(plan left) ~(plan right)) - [:expr-multiplication left _ right] `(~'* ~(plan left) ~(plan right)) - [:expr-division left _ right] `(~'/ ~(plan left) ~(plan right)) + [:expr-binop left [:binop [:is _]] right] `(~'= ~(plan left) ~(plan right)) + [:expr-binop left [:binop [:is-not & _]] right] `(~'not= ~(plan left) ~(plan right)) + ;; MUST not str-ify below, binops aren't identifiers. + [:expr-binop left [:binop s] right] `(~(symbol s) ~(plan left) ~(plan right)) - [:expr-function-call-log _log child _] `(~'log ~(plan child)) + [:distribution-event child] (plan child) - [:expr-binop left [:binop [:is _]] right] `(~'= ~(plan left) ~(plan right)) - [:expr-binop left [:binop [:is-not & _]] right] `(~'not= ~(plan left) ~(plan right)) - ;; MUST not str-ify below, binops aren't identifiers. - [:expr-binop left [:binop s] right] `(~(symbol s) ~(plan left) ~(plan right)) + [:distribution-event-or left _or right] [:or (plan left) (plan right)] + [:distribution-event-and left _and right] [:and (plan left) (plan right)] - [:distribution-event child] (plan child) + [:distribution-event-binop (variable :guard (tree/tag-pred :variable)) [:binop s] (scalar :guard (tree/tag-pred :scalar-expr))] [(keyword s) (plan variable) (plan scalar)] + [:distribution-event-binop (scalar :guard (tree/tag-pred :scalar-expr)) [:binop s] (variable :guard (tree/tag-pred :variable))] [(keyword s) (plan variable) (plan scalar)] - [:distribution-event-or left _or right] [:or (plan left) (plan right)] - [:distribution-event-and left _and right] [:and (plan left) (plan right)] + [:distribution-event-group "(" child ")"] (plan child) - [:distribution-event-binop (variable :guard (tree/tag-pred :variable)) [:binop s] (scalar :guard (tree/tag-pred :scalar-expr))] [(keyword s) (plan variable) (plan scalar)] - [:distribution-event-binop (scalar :guard (tree/tag-pred :scalar-expr)) [:binop s] (variable :guard (tree/tag-pred :variable))] [(keyword s) (plan variable) (plan scalar)] + [:density-event child] (plan child) + [:density-event-and & children] (into {} (comp (filter tree/branch?) (map plan)) children) - [:distribution-event-group "(" child ")"] (plan child) + [:density-event-eq (variable :guard (tree/tag-pred :variable)) _= (scalar :guard (tree/tag-pred :scalar-expr))] {(plan variable) (plan scalar)} + [:density-event-eq (scalar :guard (tree/tag-pred :scalar-expr)) _= (variable :guard (tree/tag-pred :variable))] {(plan variable) (plan scalar)} - [:density-event child] (plan child) - [:density-event-and & children] (into {} (comp (filter tree/branch?) (map plan)) children) + [:density-event-group "(" child ")"] (plan child) - [:density-event-eq (variable :guard (tree/tag-pred :variable)) _= (scalar :guard (tree/tag-pred :scalar-expr))] {(plan variable) (plan scalar)} - [:density-event-eq (scalar :guard (tree/tag-pred :scalar-expr)) _= (variable :guard (tree/tag-pred :variable))] {(plan variable) (plan scalar)} + [:probability-expr _prob _of event _under model] `(~'iql/prob ~(plan model) ~(plan event)) + [:density-expr _prob _density _of event _under model] `(~'iql/pdf ~(plan model) ~(plan event)) - [:density-event-group "(" child ")"] (plan child) + [:mutual-info-expr _m _i _of lhs _with rhs _under model] `(~'iql/mutual-info ~(plan model) ~(vec (plan lhs)) ~(vec (plan rhs))) + [:approx-mutual-info-expr _a _m _i _of lhs _with rhs _under model] `(~'iql/approx-mutual-info ~(plan model) ~(vec (plan lhs)) ~(vec (plan rhs))) - [:probability-expr _prob _of event _under model] `(~'iql/prob ~(plan model) ~(plan event)) - [:density-expr _prob _density _of event _under model] `(~'iql/pdf ~(plan model) ~(plan event)) + [:model-expr child] (plan child) + [:model-expr "(" child ")"] (plan child) - [:mutual-info-expr _m _i _of lhs _with rhs _under model] `(~'iql/mutual-info ~(plan model) ~(vec (plan lhs)) ~(vec (plan rhs))) - [:approx-mutual-info-expr _a _m _i _of lhs _with rhs _under model] `(~'iql/approx-mutual-info ~(plan model) ~(vec (plan lhs)) ~(vec (plan rhs))) + #?@(:clj [[:generative-table-expr _generative _table relation] + (let [query-plan (requiring-resolve 'inferenceql.query.plan/plan)] + `(~'iql/eval-relation-plan (~'quote ~(query-plan relation))))]) - [:model-expr child] (plan child) - [:model-expr "(" child ")"] (plan child) + ;; Matches either :conditioned-by-expr or :conditioned-by-except-clause + ;; and defers to conditioned-by-plan* to avoid https://clojure.atlassian.net/browse/CLJ-1852 + [(:or :conditioned-by-expr :conditioned-by-except-clause) & _] (conditioned-by-plan* ws-free-node) - #?@(:clj [[:generative-table-expr _generative _table relation] - (let [query-plan (requiring-resolve 'inferenceql.query.plan/plan)] - `(~'iql/eval-relation-plan (~'quote ~(query-plan relation))))]) + [:constrained-by-expr model _constrained _by event] `(~'iql/constrain ~(plan model) ~(plan event)) - [:conditioned-by-expr model _conditioned _by "*"] `(~'iql/condition-all ~(plan model)) - [:conditioned-by-expr model _conditioned _by event] `(~'iql/condition ~(plan model) ~(plan event)) - [:constrained-by-expr model _constrained _by event] `(~'iql/constrain ~(plan model) ~(plan event)) - [:value child] (literal/read child) + [:value child] (literal/read child) - [:variable _var child] (id-node->str child) - [:variable-list & variables] (map plan variables) + [:variable _var child] (id-node->str child) + [:variable-list & variables] (into [] (comp (filter tree/branch?) (map plan)) variables) ; remove commas - [:identifier child] (plan child) - [:delimited-symbol s] (list 'iql/safe-get 'iql-bindings s) - [:simple-symbol s] (list 'iql/safe-get 'iql-bindings s))) + [:identifier child] (plan child) + [:delimited-symbol s] (list 'iql/safe-get 'iql-bindings s) + [:simple-symbol s] (list 'iql/safe-get 'iql-bindings s)))) (defn inference-event [event] @@ -138,14 +159,26 @@ (gpm/condition conditions)))))) (defn condition-all - [model bindings] - (let [conditions (reduce (fn [conditions variable] - (cond-> conditions - (contains? bindings variable) - (assoc variable (get bindings variable)))) - {} - (map str (gpm/variables model)))] - (condition model conditions))) + "Retrieves all variables from the model and conditions them based on the + value found in the bindings, which includes the current tuple/row. + + The 3-arity version takes an additional coll of vars to exclude." + ([model bindings] + (condition-all model #{} bindings)) + ([model exclusions bindings] + (let [exclusions (set exclusions) + condition-vars (into [] + (comp + (map name) + (filter (complement exclusions))) + (gpm/variables model)) + conditions (reduce (fn [conditions variable] + (cond-> conditions + (contains? bindings variable) + (assoc variable (get bindings variable)))) + {} + condition-vars)] + (condition model conditions)))) (defn operation? "Given an event form, returns `true` if the form is an operation." @@ -273,7 +306,8 @@ #?@(:clj ['eval-relation-plan (let [eval (requiring-resolve 'inferenceql.query.plan/eval)] #(generative-table/generative-table (eval % env bindings)))]) - #?@(:clj ['condition-all #(condition-all % bindings)]) + #?@(:clj ['condition-all #(condition-all % bindings) + 'condition-all-except #(condition-all %1 %2 bindings)]) 'condition condition 'constrain constrain 'mutual-info mutual-info From ac9865a94fa5d10d23cede4e63a4e529de37c1eb Mon Sep 17 00:00:00 2001 From: Matthew Davidson Date: Mon, 15 Apr 2024 23:57:10 +0700 Subject: [PATCH 4/4] fix: Enable condition-all for Clojurescript --- src/inferenceql/query/scalar.cljc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/inferenceql/query/scalar.cljc b/src/inferenceql/query/scalar.cljc index cc44cbcd..342d5277 100644 --- a/src/inferenceql/query/scalar.cljc +++ b/src/inferenceql/query/scalar.cljc @@ -306,8 +306,8 @@ #?@(:clj ['eval-relation-plan (let [eval (requiring-resolve 'inferenceql.query.plan/eval)] #(generative-table/generative-table (eval % env bindings)))]) - #?@(:clj ['condition-all #(condition-all % bindings) - 'condition-all-except #(condition-all %1 %2 bindings)]) + 'condition-all #(condition-all % bindings) + 'condition-all-except #(condition-all %1 %2 bindings) 'condition condition 'constrain constrain 'mutual-info mutual-info @@ -328,7 +328,7 @@ ;; NB: never actually passes in more than one tuple [sexpr env bindings & tuples] (tap> #:scalar.eval{:in-env env :in-bindings bindings - :sexpr (pr-str sexpr) :tuple-sample (take 3 tuples)}) + :sexpr (pr-str sexpr) :tuple-sample (take 3 tuples)}) (let [env' (merge env bindings) tuple-map (fn tuple-map [tuple] (merge (zipmap (tuple/attributes tuple)