Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add CONDITIONED BY * EXCEPT support #101

Merged
merged 4 commits into from
Apr 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions resources/inferenceql/query/base.bnf
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,8 @@ select-list ::= select-star-clause
/ selection (ws? ',' ws? selection)*
/ aggregation (ws? ',' ws? aggregation)*

select-star-clause ::= star (ws? select-except-clause)?
star ::= '*'
select-star-clause ::= star (ws? select-except-clause)?
select-except-clause ::= #'(?i)EXCEPT' ws? '(' ws? identifier-list ws? ')'

selection ::= (scalar-expr | aggregation) (ws alias-clause)?
Expand Down Expand Up @@ -225,7 +225,10 @@ density-event-and ::= density-event-1 (ws #'(?i)AND' ws density-event-1)+

density-event-group ::= '(' ws? density-event ws? ')'

conditioned-by-expr ::= model-expr ws #'(?i)CONDITIONED' ws #'(?i)BY' ws ('*' | density-event)
conditioned-by-expr ::= model-expr ws #'(?i)CONDITIONED' ws #'(?i)BY' ws (conditioned-by-star-clause | density-event)
<conditioned-by-star-clause> ::= star (ws? conditioned-by-except-clause)?
conditioned-by-except-clause ::= #'(?i)EXCEPT' (ws? <'('> ws? model-var-list ws? <')'> | ws model-var-list)


incorporate-expr ::= #'(?i)INCORPORATE' ws relation-expr ws #'(?i)INTO' ws model-expr

Expand Down
16 changes: 16 additions & 0 deletions src/inferenceql/query/log.cljc
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,19 @@
[x]
(tap> #:spy{:x x})
x)

(defn log-tap
"A tap that prns the value to the console. Use `(add-tap log-tap)` to add it."
[x]
(prn x)
(println))

Check warning on line 15 in src/inferenceql/query/log.cljc

View check run for this annotation

Codecov / codecov/patch

src/inferenceql/query/log.cljc#L14-L15

Added lines #L14 - L15 were not covered by tests

(defn clear-taps
"Clears all taps.

This is primarily for removing anonymous tap fns. If you don't have a reference
to the tap fn, you can't remove it. But if you do, prefer `remove-tap`."
[]
;; tapset is annoyingly private (and Clojure lacks this functionality), so we
;; have to use intern to bypass that
(intern 'clojure.core 'tapset (atom #{})))

Check warning on line 25 in src/inferenceql/query/log.cljc

View check run for this annotation

Codecov / codecov/patch

src/inferenceql/query/log.cljc#L25

Added line #L25 was not covered by tests
148 changes: 91 additions & 57 deletions src/inferenceql/query/scalar.cljc
Original file line number Diff line number Diff line change
Expand Up @@ -23,76 +23,97 @@
[node]
(-> node tree/only-child-node (nth 1)))

(declare plan)

(defn ^:private conditioned-by-plan*
"`plan` helper that generates plans for CONDITIONED BY nodes.

NB: Exists because https://clojure.atlassian.net/browse/CLJ-1852 prevents us
from directly adding these rules into `plan`."
[node]
(match/match node
[:conditioned-by-expr model _conditioned _by [:star _]]
`(~'iql/condition-all ~(plan model))
[:conditioned-by-expr model _conditioned _by [:star _] [:conditioned-by-except-clause & except-children]]
`(~'iql/condition-all-except ~(plan model) ~(plan (into [:conditioned-by-except-clause] except-children)))
[:conditioned-by-expr model _conditioned _by child]
`(~'iql/condition ~(plan model) ~(plan child))
[:conditioned-by-except-clause _except model-var-list]
(plan model-var-list)))

(defn plan
"Given a parse tree/node, returns an execution plan."
[node]
(match/match (into (empty node)
(remove tree/whitespace?)
node)
[:scalar-expr child] (plan child)
[:scalar-expr-group "(" child ")"] (plan child)
(let [ws-free-node (into (empty node)
(remove tree/whitespace?)
node)]
(match/match ws-free-node
[:scalar-expr child] (plan child)
[:scalar-expr-group "(" child ")"] (plan child)

[:expr-not _not child] `(~'not ~(plan child))

[:expr-disjunction left _ right] `(~'or ~(plan left) ~(plan right))
[:expr-conjunction left _ right] `(~'and ~(plan left) ~(plan right))
[:expr-addition left _ right] `(~'+ ~(plan left) ~(plan right))
[:expr-addition left _ right] `(~'+ ~(plan left) ~(plan right))
[:expr-subtraction left _ right] `(~'- ~(plan left) ~(plan right))
[:expr-multiplication left _ right] `(~'* ~(plan left) ~(plan right))
[:expr-division left _ right] `(~'/ ~(plan left) ~(plan right))

[:expr-not _not child] `(~'not ~(plan child))
[:expr-function-call-log _log child _] `(~'log ~(plan child))

[:expr-disjunction left _ right] `(~'or ~(plan left) ~(plan right))
[:expr-conjunction left _ right] `(~'and ~(plan left) ~(plan right))
[:expr-addition left _ right] `(~'+ ~(plan left) ~(plan right))
[:expr-addition left _ right] `(~'+ ~(plan left) ~(plan right))
[:expr-subtraction left _ right] `(~'- ~(plan left) ~(plan right))
[:expr-multiplication left _ right] `(~'* ~(plan left) ~(plan right))
[:expr-division left _ right] `(~'/ ~(plan left) ~(plan right))
[:expr-binop left [:binop [:is _]] right] `(~'= ~(plan left) ~(plan right))
[:expr-binop left [:binop [:is-not & _]] right] `(~'not= ~(plan left) ~(plan right))
;; MUST not str-ify below, binops aren't identifiers.
[:expr-binop left [:binop s] right] `(~(symbol s) ~(plan left) ~(plan right))

[:expr-function-call-log _log child _] `(~'log ~(plan child))
[:distribution-event child] (plan child)

[:expr-binop left [:binop [:is _]] right] `(~'= ~(plan left) ~(plan right))
[:expr-binop left [:binop [:is-not & _]] right] `(~'not= ~(plan left) ~(plan right))
;; MUST not str-ify below, binops aren't identifiers.
[:expr-binop left [:binop s] right] `(~(symbol s) ~(plan left) ~(plan right))
[:distribution-event-or left _or right] [:or (plan left) (plan right)]
[:distribution-event-and left _and right] [:and (plan left) (plan right)]

Check warning on line 74 in src/inferenceql/query/scalar.cljc

View check run for this annotation

Codecov / codecov/patch

src/inferenceql/query/scalar.cljc#L74

Added line #L74 was not covered by tests

[:distribution-event child] (plan child)
[:distribution-event-binop (variable :guard (tree/tag-pred :variable)) [:binop s] (scalar :guard (tree/tag-pred :scalar-expr))] [(keyword s) (plan variable) (plan scalar)]
[:distribution-event-binop (scalar :guard (tree/tag-pred :scalar-expr)) [:binop s] (variable :guard (tree/tag-pred :variable))] [(keyword s) (plan variable) (plan scalar)]

Check warning on line 77 in src/inferenceql/query/scalar.cljc

View check run for this annotation

Codecov / codecov/patch

src/inferenceql/query/scalar.cljc#L77

Added line #L77 was not covered by tests

[:distribution-event-or left _or right] [:or (plan left) (plan right)]
[:distribution-event-and left _and right] [:and (plan left) (plan right)]
[:distribution-event-group "(" child ")"] (plan child)

[:distribution-event-binop (variable :guard (tree/tag-pred :variable)) [:binop s] (scalar :guard (tree/tag-pred :scalar-expr))] [(keyword s) (plan variable) (plan scalar)]
[:distribution-event-binop (scalar :guard (tree/tag-pred :scalar-expr)) [:binop s] (variable :guard (tree/tag-pred :variable))] [(keyword s) (plan variable) (plan scalar)]
[:density-event child] (plan child)
[:density-event-and & children] (into {} (comp (filter tree/branch?) (map plan)) children)

[:distribution-event-group "(" child ")"] (plan child)
[:density-event-eq (variable :guard (tree/tag-pred :variable)) _= (scalar :guard (tree/tag-pred :scalar-expr))] {(plan variable) (plan scalar)}
[:density-event-eq (scalar :guard (tree/tag-pred :scalar-expr)) _= (variable :guard (tree/tag-pred :variable))] {(plan variable) (plan scalar)}

Check warning on line 85 in src/inferenceql/query/scalar.cljc

View check run for this annotation

Codecov / codecov/patch

src/inferenceql/query/scalar.cljc#L85

Added line #L85 was not covered by tests

[:density-event child] (plan child)
[:density-event-and & children] (into {} (comp (filter tree/branch?) (map plan)) children)
[:density-event-group "(" child ")"] (plan child)

[:density-event-eq (variable :guard (tree/tag-pred :variable)) _= (scalar :guard (tree/tag-pred :scalar-expr))] {(plan variable) (plan scalar)}
[:density-event-eq (scalar :guard (tree/tag-pred :scalar-expr)) _= (variable :guard (tree/tag-pred :variable))] {(plan variable) (plan scalar)}
[:probability-expr _prob _of event _under model] `(~'iql/prob ~(plan model) ~(plan event))
[:density-expr _prob _density _of event _under model] `(~'iql/pdf ~(plan model) ~(plan event))

[:density-event-group "(" child ")"] (plan child)
[:mutual-info-expr _m _i _of lhs _with rhs _under model] `(~'iql/mutual-info ~(plan model) ~(vec (plan lhs)) ~(vec (plan rhs)))
[:approx-mutual-info-expr _a _m _i _of lhs _with rhs _under model] `(~'iql/approx-mutual-info ~(plan model) ~(vec (plan lhs)) ~(vec (plan rhs)))

[:probability-expr _prob _of event _under model] `(~'iql/prob ~(plan model) ~(plan event))
[:density-expr _prob _density _of event _under model] `(~'iql/pdf ~(plan model) ~(plan event))
[:model-expr child] (plan child)
[:model-expr "(" child ")"] (plan child)

[:mutual-info-expr _m _i _of lhs _with rhs _under model] `(~'iql/mutual-info ~(plan model) ~(vec (plan lhs)) ~(vec (plan rhs)))
[:approx-mutual-info-expr _a _m _i _of lhs _with rhs _under model] `(~'iql/approx-mutual-info ~(plan model) ~(vec (plan lhs)) ~(vec (plan rhs)))
#?@(:clj [[:generative-table-expr _generative _table relation]
(let [query-plan (requiring-resolve 'inferenceql.query.plan/plan)]
`(~'iql/eval-relation-plan (~'quote ~(query-plan relation))))])

Check warning on line 100 in src/inferenceql/query/scalar.cljc

View check run for this annotation

Codecov / codecov/patch

src/inferenceql/query/scalar.cljc#L99-L100

Added lines #L99 - L100 were not covered by tests

[:model-expr child] (plan child)
[:model-expr "(" child ")"] (plan child)
;; Matches either :conditioned-by-expr or :conditioned-by-except-clause
;; and defers to conditioned-by-plan* to avoid https://clojure.atlassian.net/browse/CLJ-1852
[(:or :conditioned-by-expr :conditioned-by-except-clause) & _] (conditioned-by-plan* ws-free-node)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure what's going on here. Why is this phrase starting with an :or? Maybe add a comment.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll add a comment.

In core.match, (:or :alternative1 :alternative2) means it could match either of two alternatives in that spot.

I did that to match either subpart, and delegate further matching/planning to conditioned-by-plan*.

And I started delegating to submatching fns because once the core.match patterns got too big/nested, the Clojure compiler started generating invalid Java class names. See https://clojure.atlassian.net/browse/CLJ-1852 and https://ask.clojure.org/index.php/3824/clojure-generated-class-names-length-exceed-file-system-limit?show=3824#q3824 for more.


#?@(:clj [[:generative-table-expr _generative _table relation]
(let [query-plan (requiring-resolve 'inferenceql.query.plan/plan)]
`(~'iql/eval-relation-plan (~'quote ~(query-plan relation))))])
[:constrained-by-expr model _constrained _by event] `(~'iql/constrain ~(plan model) ~(plan event))

[:conditioned-by-expr model _conditioned _by "*"] `(~'iql/condition-all ~(plan model))
[:conditioned-by-expr model _conditioned _by event] `(~'iql/condition ~(plan model) ~(plan event))
[:constrained-by-expr model _constrained _by event] `(~'iql/constrain ~(plan model) ~(plan event))

[:value child] (literal/read child)
[:value child] (literal/read child)

[:variable _var child] (id-node->str child)
[:variable-list & variables] (map plan variables)
[:variable _var child] (id-node->str child)
[:variable-list & variables] (into [] (comp (filter tree/branch?) (map plan)) variables) ; remove commas

[:identifier child] (plan child)
[:delimited-symbol s] (list 'iql/safe-get 'iql-bindings s)
[:simple-symbol s] (list 'iql/safe-get 'iql-bindings s)))
[:identifier child] (plan child)
[:delimited-symbol s] (list 'iql/safe-get 'iql-bindings s)
[:simple-symbol s] (list 'iql/safe-get 'iql-bindings s))))

(defn inference-event
[event]
Expand Down Expand Up @@ -138,14 +159,26 @@
(gpm/condition conditions))))))

(defn condition-all
[model bindings]
(let [conditions (reduce (fn [conditions variable]
(cond-> conditions
(contains? bindings variable)
(assoc variable (get bindings variable))))
{}
(map str (gpm/variables model)))]
(condition model conditions)))
"Retrieves all variables from the model and conditions them based on the
value found in the bindings, which includes the current tuple/row.

The 3-arity version takes an additional coll of vars to exclude."
([model bindings]
(condition-all model #{} bindings))
([model exclusions bindings]
(let [exclusions (set exclusions)
condition-vars (into []
(comp
(map name)
(filter (complement exclusions)))
(gpm/variables model))
conditions (reduce (fn [conditions variable]
(cond-> conditions
(contains? bindings variable)
(assoc variable (get bindings variable))))
{}
condition-vars)]
(condition model conditions))))

(defn operation?
"Given an event form, returns `true` if the form is an operation."
Expand Down Expand Up @@ -273,7 +306,8 @@
#?@(:clj ['eval-relation-plan
(let [eval (requiring-resolve 'inferenceql.query.plan/eval)]
#(generative-table/generative-table (eval % env bindings)))])
#?@(:clj ['condition-all #(condition-all % bindings)])
'condition-all #(condition-all % bindings)
'condition-all-except #(condition-all %1 %2 bindings)
'condition condition
'constrain constrain
'mutual-info mutual-info
Expand All @@ -294,7 +328,7 @@
;; NB: never actually passes in more than one tuple
[sexpr env bindings & tuples]
(tap> #:scalar.eval{:in-env env :in-bindings bindings
:sexpr (pr-str sexpr) :tuple-sample (take 3 tuples)})
:sexpr (pr-str sexpr) :tuple-sample (take 3 tuples)})
(let [env' (merge env bindings)
tuple-map (fn tuple-map [tuple]
(merge (zipmap (tuple/attributes tuple)
Expand Down
20 changes: 13 additions & 7 deletions test/inferenceql/query/plan_test.cljc
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,9 @@
(plan/plan)
(plan/plan?))
"table GENERATIVE JOIN model"
"table GENERATIVE JOIN model CONDITIONED BY *"
"table GENERATIVE JOIN model CONDITIONED BY * EXCEPT (VAR x)"
"table GENERATIVE JOIN model CONDITIONED BY * EXCEPT VAR x, VAR y"
"table GENERATIVE JOIN model CONDITIONED BY VAR x = 0"
"table GENERATIVE JOIN model CONSTRAINED BY VAR x > 0"))

Expand All @@ -311,10 +314,13 @@
"table GENERATIVE JOIN model"
"table GENERATIVE JOIN model GIVEN x"))

#?(:clj (deftest generative-join
(are [query tbl expected] (let [env {"table" tbl "model" model}]
(= expected (eval query env)))
"table GENERATIVE JOIN model CONDITIONED BY VAR x = x" [{"x" "yes"}] [{"x" "yes" "y" "yes"}]
"table GENERATIVE JOIN model CONDITIONED BY VAR x = x" [{"x" "no"}] [{"x" "no" "y" "no"}]
"table GENERATIVE JOIN model CONDITIONED BY *" [{"x" "yes"}] [{"x" "yes" "y" "yes"}]
"table GENERATIVE JOIN model CONDITIONED BY *" [{"x" "no"}] [{"x" "no" "y" "no"}])))
#?(:clj
(deftest generative-join
(are [query tbl expected] (let [env {"table" tbl "model" model}]
(= expected (eval query env)))
"table GENERATIVE JOIN model CONDITIONED BY VAR x = x" [{"x" "yes"}] [{"x" "yes" "y" "yes"}]
"table GENERATIVE JOIN model CONDITIONED BY VAR x = x" [{"x" "no"}] [{"x" "no" "y" "no"}]
"table GENERATIVE JOIN model CONDITIONED BY *" [{"x" "yes"}] [{"x" "yes" "y" "yes"}]
"table GENERATIVE JOIN model CONDITIONED BY *" [{"x" "no"}] [{"x" "no" "y" "no"}]
"table GENERATIVE JOIN model CONDITIONED BY * EXCEPT VAR x" [{"y" "yes"}] [{"x" "yes" "y" "yes"}]
"table GENERATIVE JOIN model CONDITIONED BY * EXCEPT (VAR x)" [{"y" "yes"}] [{"x" "yes" "y" "yes"}])))
8 changes: 7 additions & 1 deletion test/inferenceql/query/strict/parser_test.cljc
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@
(are [s] (not (insta/failure? (parser/parse s)))
"data GENERATIVE JOIN model"
"data GENERATIVE JOIN model CONDITIONED BY VAR x = 0"
"data GENERATIVE JOIN model CONDITIONED BY *"
"data GENERATIVE JOIN model CONDITIONED BY * EXCEPT VAR x, VAR y"
"data GENERATIVE JOIN model CONDITIONED BY * EXCEPT (VAR x)"
"data GENERATIVE JOIN model CONSTRAINED BY VAR x > 0"))

(deftest generate-valid
Expand All @@ -51,4 +54,7 @@
(deftest conditioned-by-valid
(are [s] (not (insta/failure? (parser/parse s)))
"SELECT * FROM (GENERATE * UNDER model CONDITIONED BY VAR x = x)"
"SELECT * FROM (GENERATE * UNDER model CONDITIONED BY *)"))
"SELECT * FROM (GENERATE * UNDER model CONDITIONED BY *)"
"SELECT * FROM (GENERATE * UNDER model CONDITIONED BY * EXCEPT (VAR x))"
"SELECT * FROM (GENERATE * UNDER model CONDITIONED BY * EXCEPT VAR x, VAR y)"
"SELECT * FROM (GENERATE * UNDER model CONDITIONED BY * EXCEPT VAR x, VAR \"foo.bar\", VAR z)"))
13 changes: 13 additions & 0 deletions test/inferenceql/query/strict_test.cljc
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,9 @@
"y" {"yes" 0.0 "no" 1.0}}}]]})
q1 (comp first vals first #(q %1 %2 %3))]
(is (= 0.5 (q1 "SELECT (PROBABILITY DENSITY OF VAR y = 'yes' UNDER model CONDITIONED BY VAR x = x) FROM data;"
(with-meta [{}] {:iql/columns ["x" "y"]})
{"model" model})))
(is (= 0.5 (q1 "SELECT (PROBABILITY DENSITY OF VAR y = 'yes' UNDER (model CONDITIONED BY * EXCEPT VAR y)) FROM data;"
(with-meta [{}] {:iql/columns ["x" "y"]})
{"model" model})))))

Expand Down Expand Up @@ -498,5 +501,15 @@
(is (= 0.75 (q "SELECT PROBABILITY DENSITY OF VAR x = 'yes' UNDER model CONDITIONED BY VAR y = y FROM data"
(with-meta [{}]
{:iql/columns ["x" "y"]})))))

(testing "* except"
(is (= 0.75 (q "SELECT PROBABILITY DENSITY OF VAR x = 'yes' UNDER model CONDITIONED BY * EXCEPT VAR x FROM data"
(with-meta [{}]
{:iql/columns ["x" "y"]}))))

(is (= 0.75 (q "SELECT PROBABILITY DENSITY OF VAR x = 'yes' UNDER model CONDITIONED BY * EXCEPT (VAR x) FROM data"
(with-meta [{}]
{:iql/columns ["x" "y"]})))))

(testing "in with"
(is (= 0.0 (q "WITH model CONDITIONED BY VAR y = 'no' AS model: SELECT PROBABILITY DENSITY OF VAR x = x UNDER model FROM data" [{"x" "yes"}]))))))))
Loading