From 5f795811cdf68d2b8bf92f5fcc5e743ce4f1729e Mon Sep 17 00:00:00 2001 From: Ross Viljoen Date: Sun, 27 Feb 2022 01:09:31 +0000 Subject: [PATCH 01/15] Pathwise sampling implementation --- Project.toml | 1 + src/ApproximateGPs.jl | 13 ++++++++++- src/pathwise_sampling.jl | 46 +++++++++++++++++++++++++++++++++++++++ src/sparse_variational.jl | 4 +++- 4 files changed, 62 insertions(+), 2 deletions(-) create mode 100644 src/pathwise_sampling.jl diff --git a/Project.toml b/Project.toml index 8b13a35f..774221f3 100644 --- a/Project.toml +++ b/Project.toml @@ -14,6 +14,7 @@ GPLikelihoods = "6031954c-0455-49d7-b3b9-3e1c99afaf40" IrrationalConstants = "92d709cd-6900-40b7-9082-c6be49f344b6" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" PDMats = "90014a1f-27ba-587c-ab20-58faa44d9150" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" diff --git a/src/ApproximateGPs.jl b/src/ApproximateGPs.jl index 20f46acf..65cd192b 100644 --- a/src/ApproximateGPs.jl +++ b/src/ApproximateGPs.jl @@ -5,6 +5,7 @@ using Reexport @reexport using GPLikelihoods using Distributions using LinearAlgebra +using Random using Statistics using StatsBase using FastGaussQuadrature @@ -14,7 +15,16 @@ using FillArrays using PDMats: chol_lower using IrrationalConstants: sqrt2, invsqrtπ -using AbstractGPs: AbstractGP, FiniteGP, LatentFiniteGP, ApproxPosteriorGP, At_A, diag_At_A +using AbstractGPs: + AbstractGP, + FiniteGP, + LatentFiniteGP, + ApproxPosteriorGP, + inducing_points, + At_A, + diag_At_A, + Xt_A_X, + Xt_invA_X include("utils.jl") @@ -23,6 +33,7 @@ include("expected_loglik.jl") export SparseVariationalApproximation, Centered, NonCentered include("sparse_variational.jl") +include("pathwise_sampling.jl") using ForwardDiff diff --git a/src/pathwise_sampling.jl b/src/pathwise_sampling.jl new file mode 100644 index 00000000..b221b0d7 --- /dev/null +++ b/src/pathwise_sampling.jl @@ -0,0 +1,46 @@ +function pathwise_sample(rng::Random.AbstractRNG, f::ApproxPosteriorGP, weight_space_approx) # TODO: num_samples + prior_approx = weight_space_approx(f.prior) + prior_sample = rand(rng, prior_approx) + + z = inducing_points(f) + q_u = _get_q_u(f) + + u = rand(rng, q_u) + v = cov(f, z) \ (u - prior_sample(z)) + + posterior_sample(x) = prior_sample(x) + cov(f, x, z) * v + + return posterior_sample +end + +# Methods to get the explicit variational distribution over inducing points q(u) +function _get_q_u(f::ApproxPosteriorGP{<:SparseVariationalApproximation{NonCentered}}) + # u = Lε + μ where LLᵀ = cov(fz) and μ = mean(fz) + # q(ε) = N(m, S) + # => q(u) = N(Lm + μ, LSLᵀ) + L, μ = chol_lower(_chol_cov(f.approx.fz)), mean(f.approx.fz) + m, S = mean(f.approx.q), _chol_cov(f.approx.q) + return MvNormal(L * m + μ, Xt_A_X(S, L')) +end +_get_q_u(f::ApproxPosteriorGP{<:SparseVariationalApproximation{Centered}}) = f.approx.q + +function _get_q_u(f::ApproxPosteriorGP{<:VFE}) + # q(u) = N(m, S) + # q(f_k) = N(μ_k, Σ_k) (the predictive distribution at test inputs k) + # μ_k = mean(k) + K_kz * K_zz⁻¹ * m + # where: K_kz = cov(f.prior, k, z) + # implemented as: μ_k = mean(k) + K_kz * α + # => m = K_zz * α + # Σ_k = K_kk - (K_kz * K_zz⁻¹ * K_zk) + (K_kz * K_zz⁻¹ * S * K_zz⁻¹ * K_zk) + # interested in the last term to get S + # implemented as: Aᵀ * Λ_ε⁻¹ * A + # where: A = U⁻ᵀ * K_zk + # UᵀU = K_zz + # so, Λ_ε⁻¹ = U⁻ᵀ * S * U + # => S = Uᵀ * Λ_ε⁻¹ * U + # see https://krasserm.github.io/2020/12/12/gaussian-processes-sparse/ eqns (8) & (9) + U = f.data.U + m = U'U * f.data.α + S = Xt_invA_X(f.data.Λ_ε, U) + return MvNormal(m, S) +end diff --git a/src/sparse_variational.jl b/src/sparse_variational.jl index 531c3c02..937f5b91 100644 --- a/src/sparse_variational.jl +++ b/src/sparse_variational.jl @@ -233,7 +233,9 @@ end # Misc utility. # -inducing_points(f::ApproxPosteriorGP{<:SparseVariationalApproximation}) = f.approx.fz.x +function AbstractGPs.inducing_points(f::ApproxPosteriorGP{<:SparseVariationalApproximation}) + return f.approx.fz.x +end # # elbo From 58dcf61f23eb7910dfef5085650d172566bfdb84 Mon Sep 17 00:00:00 2001 From: Ross Viljoen Date: Sun, 27 Feb 2022 01:28:44 +0000 Subject: [PATCH 02/15] Multisample version --- src/pathwise_sampling.jl | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/src/pathwise_sampling.jl b/src/pathwise_sampling.jl index b221b0d7..acc9bedf 100644 --- a/src/pathwise_sampling.jl +++ b/src/pathwise_sampling.jl @@ -1,4 +1,4 @@ -function pathwise_sample(rng::Random.AbstractRNG, f::ApproxPosteriorGP, weight_space_approx) # TODO: num_samples +function pathwise_sample(rng::Random.AbstractRNG, f::ApproxPosteriorGP, weight_space_approx) prior_approx = weight_space_approx(f.prior) prior_sample = rand(rng, prior_approx) @@ -12,6 +12,35 @@ function pathwise_sample(rng::Random.AbstractRNG, f::ApproxPosteriorGP, weight_s return posterior_sample end +pathwise_sample(f::ApproxPosteriorGP, wsa) = pathwise_sample(Random.GLOBAL_RNG, f, wsa) + +function pathwise_sample( + rng::Random.AbstractRNG, f::ApproxPosteriorGP, weight_space_approx, num_samples::Integer +) + prior_approx = weight_space_approx(f.prior) + prior_samples = rand(rng, prior_approx, num_samples) + + z = AbstractGPs.inducing_points(f) + q_u = ApproximateGPs._get_q_u(f) + + us = rand(rng, q_u, num_samples) + + vs = cov(f, z) \ (us - reduce(hcat, map((s) -> s(z), prior_samples))) + + function create_posterior_sample_fn(prior_sample, v) + function posterior_sample(x) + return prior_sample(x) + cov(f, x, z) * v + end + end + + posterior_samples = [ + create_posterior_sample_fn(s, v) for (s, v) in zip(prior_samples, eachrow(vs)) + ] + return posterior_samples +end +function pathwise_sample(f::ApproxPosteriorGP, wsa, n::Integer) + return pathwise_sample(Random.GLOBAL_RNG, f, wsa, n) +end # Methods to get the explicit variational distribution over inducing points q(u) function _get_q_u(f::ApproxPosteriorGP{<:SparseVariationalApproximation{NonCentered}}) From c1a07343001c009f634efd6a80094795736deabc Mon Sep 17 00:00:00 2001 From: Ross Viljoen Date: Sun, 27 Feb 2022 01:37:50 +0000 Subject: [PATCH 03/15] Add example --- examples/d-pathwise-sampling/Project.toml | 8 +++ examples/d-pathwise-sampling/script.jl | 73 +++++++++++++++++++++++ 2 files changed, 81 insertions(+) create mode 100644 examples/d-pathwise-sampling/Project.toml create mode 100644 examples/d-pathwise-sampling/script.jl diff --git a/examples/d-pathwise-sampling/Project.toml b/examples/d-pathwise-sampling/Project.toml new file mode 100644 index 00000000..97a47f1c --- /dev/null +++ b/examples/d-pathwise-sampling/Project.toml @@ -0,0 +1,8 @@ +[deps] +AbstractGPs = "99985d1d-32ba-4be9-9821-2ec096f28918" +ApproximateGPs = "298c2ebc-0411-48ad-af38-99e88101b606" +Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +RandomFourierFeatures = "46bd3b77-1c99-43cd-804b-1e14830792b5" diff --git a/examples/d-pathwise-sampling/script.jl b/examples/d-pathwise-sampling/script.jl new file mode 100644 index 00000000..53d8a384 --- /dev/null +++ b/examples/d-pathwise-sampling/script.jl @@ -0,0 +1,73 @@ +# ## Setup + +using RandomFourierFeatures +using ApproximateGPs +using AbstractGPs +using Distributions +using LinearAlgebra +using Random + +rng = MersenneTwister(1234) + +# Find the exact Titsias posterior (avoid optimisation) +# TODO: this is already (and better) solved by `_get_q_u(f::ApproxPosteriorGP{<:VFE})` +# Replace this (and the version in `test_utils`?) +function optimal_variational_posterior(::Centered, fz, fx, y) + σ² = fx.Σy[1] + Kuf = cov(fz, fx) + Kuu = Symmetric(cov(fz)) + Σ = (Symmetric(cov(fz) + (1 / σ²) * Kuf * Kuf')) + m = ((1 / σ²) * Kuu * (Σ \ Kuf)) * y + S = Symmetric(Kuu * (Σ \ Kuu)) + return MvNormal(m, S) +end + +function optimal_variational_posterior(::NonCentered, fz, fx, y) + q = optimal_variational_posterior(Centered(), fz, fx, y) + Cuu = cholesky(Symmetric(cov(fz))) + return MvNormal(Cuu.L \ (mean(q) - mean(fz)), Symmetric((Cuu.L \ cov(q)) / Cuu.U)) +end + +# Define a GP and generate some data + +k = 3 * (SqExponentialKernel() ∘ ScaleTransform(10)) +gp = GP(k) + +input_dims = 1 + +x = ColVecs(rand(input_dims, 20)) +fx = gp(x, 0.01) +y = rand(fx) + +z = x[1:8] +fz = gp(z) + +# Any of the following will work: + +# q = optimal_variational_posterior(NonCentered(), fz, fx, y) +# ap = posterior(SparseVariationalApproximation(NonCentered(), fz, q)) + +# q = optimal_variational_posterior(Centered(), fz, fx, y) +# ap = posterior(SparseVariationalApproximation(NonCentered(), fz, q)) + +ap = posterior(VFE(fz), fx, y) + +x_test = ColVecs(sort(rand(input_dims, 500); dims=2)) + +num_features = 1000 +rff_wsa = build_rff_weight_space_approx(rng, input_dims, num_features) + +function_samples = [ApproximateGPs.pathwise_sample(rng, ap, rff_wsa) for _ in 1:100] + +y_samples = reduce(hcat, map((f) -> f(x_test), function_samples)) # size(y_samples): (length(x_plot), n_samples) + +# Plot sampled functions against the exact posterior + +using Plots + +x_plot = x_test.X' + +plot(x_plot, y_samples; label="", color=:red, linealpha=0.2) +plot!(vec(x_plot), ap; color=:blue, label="True posterior") +scatter!(x.X', y; label="data") +vline!(vec(z.X'); label="inducing points", color=:orange) From c819fc2a2aa1607a5cd667cb0ea2d7449c2a2e8a Mon Sep 17 00:00:00 2001 From: Ross Viljoen Date: Sun, 27 Feb 2022 02:03:20 +0000 Subject: [PATCH 04/15] Move `_optimal_variational_posterior` to `src` --- src/sparse_variational.jl | 15 +++++++++++++++ test/runtests.jl | 5 +++++ test/sparse_variational.jl | 17 ++++++----------- test/test_utils.jl | 15 --------------- 4 files changed, 26 insertions(+), 26 deletions(-) diff --git a/src/sparse_variational.jl b/src/sparse_variational.jl index 937f5b91..dc95c365 100644 --- a/src/sparse_variational.jl +++ b/src/sparse_variational.jl @@ -343,3 +343,18 @@ function _prior_kl(sva::SparseVariationalApproximation{NonCentered}) return (trace_term + m_ε'm_ε - length(m_ε) - logdet(C_ε)) / 2 end + +# Get the optimal closed form solution for the centered variational posterior q(u) +function _optimal_variational_posterior(::Centered, fz, fx, y) + fz.f.mean isa AbstractGPs.ZeroMean || + error("The exact posterior requires a GP with ZeroMean.") + post = posterior(VFE(fz), fx, y) + return _get_q_u(post) +end + +# Get the optimal closed form solution for the non-centered variational posterior q(ε) +function _optimal_variational_posterior(::NonCentered, fz, fx, y) + q_u = _optimal_variational_posterior(Centered(), fz, fx, y) + Cuu = cholesky(Symmetric(cov(fz))) + return MvNormal(Cuu.L \ (mean(q_u) - mean(fz)), Symmetric((Cuu.L \ cov(q_u)) / Cuu.U)) +end diff --git a/test/runtests.jl b/test/runtests.jl index 79861ef3..c7f3b214 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,6 +1,7 @@ using Random using Test using ApproximateGPs +using ApproximateGPs: _optimal_variational_posterior using Flux using IterTools using AbstractGPs @@ -57,6 +58,10 @@ include("test_utils.jl") println(" ") @info "Ran svgp tests" + include("pathwise_sampling.jl") + println(" ") + @info "Ran pathwise sampling tests" + include("laplace.jl") println(" ") @info "Ran laplace tests" diff --git a/test/sparse_variational.jl b/test/sparse_variational.jl index 43cc5898..c57f44f3 100644 --- a/test/sparse_variational.jl +++ b/test/sparse_variational.jl @@ -19,7 +19,7 @@ fz = f(z, 1e-6) # Construct approximate posterior. - q_Centered = optimal_variational_posterior(fz, fx, y) + q_Centered = _optimal_variational_posterior(Centered(), fz, fx, y) approx_Centered = SparseVariationalApproximation(Centered(), fz, q_Centered) f_approx_post_Centered = posterior(approx_Centered) @@ -32,17 +32,12 @@ end @testset "NonCentered" begin - # Construct optimal approximate posterior. - q = optimal_variational_posterior(fz, fx, y) - Cuu = cholesky(Symmetric(cov(fz))) - q_ε = MvNormal( - Cuu.L \ (mean(q) - mean(fz)), Symmetric((Cuu.L \ cov(q)) / Cuu.U) - ) + q_ε = _optimal_variational_posterior(NonCentered(), fz, fx, y) @testset "Check that q_ε has been properly constructed" begin - @test mean(q) ≈ mean(fz) + Cuu.L * mean(q_ε) - @test cov(q) ≈ Cuu.L * cov(q_ε) * Cuu.U + @test mean(q_Centered) ≈ mean(fz) + Cuu.L * mean(q_ε) + @test cov(q_Centered) ≈ Cuu.L * cov(q_ε) * Cuu.U end # Construct equivalent approximate posteriors. @@ -79,7 +74,7 @@ f = GP(kernel) fx = f(x, 0.1) fz = f(z) - q_ex = optimal_variational_posterior(fz, fx, y) + q_ex = _optimal_variational_posterior(Centered(), fz, fx, y) sva = SparseVariationalApproximation(fz, q_ex) @test elbo(sva, fx, y) isa Real @@ -115,7 +110,7 @@ f = GP(kernel) fx = f(x, lik_noise) fz = f(z) - q_ex = optimal_variational_posterior(fz, fx, y) + q_ex = _optimal_variational_posterior(Centered(), fz, fx, y) gpr_post = posterior(fx, y) # Exact GP regression vfe_post = posterior(VFE(fz), fx, y) # Titsias posterior diff --git a/test/test_utils.jl b/test/test_utils.jl index 58bd666c..dae04a51 100644 --- a/test/test_utils.jl +++ b/test/test_utils.jl @@ -1,17 +1,2 @@ # Create a default kernel from two parameters k[1] and k[2] make_kernel(k) = softplus(k[1]) * (SqExponentialKernel() ∘ ScaleTransform(softplus(k[2]))) - -# Computes the optimal closed form solution for the variational posterior -# q(u) (e.g. # https://krasserm.github.io/2020/12/12/gaussian-processes-sparse/ -# equations (11) & (12)). Assumes a ZeroMean function. -function optimal_variational_posterior(fu, fx, y) - fu.f.mean isa AbstractGPs.ZeroMean || - error("The exact posterior requires a GP with ZeroMean.") - σ² = fx.Σy[1] - Kuf = cov(fu, fx) - Kuu = Symmetric(cov(fu)) - Σ = (Symmetric(cov(fu) + (1 / σ²) * Kuf * Kuf')) - m = ((1 / σ²) * Kuu * (Σ \ Kuf)) * y - S = Symmetric(Kuu * (Σ \ Kuu)) - return MvNormal(m, S) -end From 184fe61e2a7786bc3a69ff88aa32f502100b3514 Mon Sep 17 00:00:00 2001 From: Ross Viljoen Date: Sun, 27 Feb 2022 02:04:00 +0000 Subject: [PATCH 05/15] Update example --- examples/d-pathwise-sampling/script.jl | 23 ++--------------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/examples/d-pathwise-sampling/script.jl b/examples/d-pathwise-sampling/script.jl index 53d8a384..561f64eb 100644 --- a/examples/d-pathwise-sampling/script.jl +++ b/examples/d-pathwise-sampling/script.jl @@ -9,25 +9,6 @@ using Random rng = MersenneTwister(1234) -# Find the exact Titsias posterior (avoid optimisation) -# TODO: this is already (and better) solved by `_get_q_u(f::ApproxPosteriorGP{<:VFE})` -# Replace this (and the version in `test_utils`?) -function optimal_variational_posterior(::Centered, fz, fx, y) - σ² = fx.Σy[1] - Kuf = cov(fz, fx) - Kuu = Symmetric(cov(fz)) - Σ = (Symmetric(cov(fz) + (1 / σ²) * Kuf * Kuf')) - m = ((1 / σ²) * Kuu * (Σ \ Kuf)) * y - S = Symmetric(Kuu * (Σ \ Kuu)) - return MvNormal(m, S) -end - -function optimal_variational_posterior(::NonCentered, fz, fx, y) - q = optimal_variational_posterior(Centered(), fz, fx, y) - Cuu = cholesky(Symmetric(cov(fz))) - return MvNormal(Cuu.L \ (mean(q) - mean(fz)), Symmetric((Cuu.L \ cov(q)) / Cuu.U)) -end - # Define a GP and generate some data k = 3 * (SqExponentialKernel() ∘ ScaleTransform(10)) @@ -44,10 +25,10 @@ fz = gp(z) # Any of the following will work: -# q = optimal_variational_posterior(NonCentered(), fz, fx, y) +# q = ApproximateGPs._optimal_variational_posterior(NonCentered(), fz, fx, y) # ap = posterior(SparseVariationalApproximation(NonCentered(), fz, q)) -# q = optimal_variational_posterior(Centered(), fz, fx, y) +# q = ApproximateGPs._optimal_variational_posterior(Centered(), fz, fx, y) # ap = posterior(SparseVariationalApproximation(NonCentered(), fz, q)) ap = posterior(VFE(fz), fx, y) From 3827a21da5f2f19064115dd08f92b993d360fd9c Mon Sep 17 00:00:00 2001 From: Ross Viljoen Date: Sun, 27 Feb 2022 02:09:13 +0000 Subject: [PATCH 06/15] Export pathwise_sample --- src/ApproximateGPs.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/ApproximateGPs.jl b/src/ApproximateGPs.jl index 65cd192b..51ce19c2 100644 --- a/src/ApproximateGPs.jl +++ b/src/ApproximateGPs.jl @@ -33,6 +33,8 @@ include("expected_loglik.jl") export SparseVariationalApproximation, Centered, NonCentered include("sparse_variational.jl") + +export pathwise_sample include("pathwise_sampling.jl") using ForwardDiff From 4b77b52b48ac0938f1b4de40e913fa3f8ec38b99 Mon Sep 17 00:00:00 2001 From: Ross Viljoen Date: Sun, 27 Feb 2022 03:13:10 +0000 Subject: [PATCH 07/15] Fix multisample --- examples/d-pathwise-sampling/script.jl | 2 +- src/pathwise_sampling.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/d-pathwise-sampling/script.jl b/examples/d-pathwise-sampling/script.jl index 561f64eb..bf050494 100644 --- a/examples/d-pathwise-sampling/script.jl +++ b/examples/d-pathwise-sampling/script.jl @@ -38,7 +38,7 @@ x_test = ColVecs(sort(rand(input_dims, 500); dims=2)) num_features = 1000 rff_wsa = build_rff_weight_space_approx(rng, input_dims, num_features) -function_samples = [ApproximateGPs.pathwise_sample(rng, ap, rff_wsa) for _ in 1:100] +function_samples = ApproximateGPs.pathwise_sample(rng, ap, rff_wsa, 100) y_samples = reduce(hcat, map((f) -> f(x_test), function_samples)) # size(y_samples): (length(x_plot), n_samples) diff --git a/src/pathwise_sampling.jl b/src/pathwise_sampling.jl index acc9bedf..7c3ecdae 100644 --- a/src/pathwise_sampling.jl +++ b/src/pathwise_sampling.jl @@ -34,7 +34,7 @@ function pathwise_sample( end posterior_samples = [ - create_posterior_sample_fn(s, v) for (s, v) in zip(prior_samples, eachrow(vs)) + create_posterior_sample_fn(s, v) for (s, v) in zip(prior_samples, eachcol(vs)) ] return posterior_samples end From af18236fdfcb62530e4336489a171e6b98c59096 Mon Sep 17 00:00:00 2001 From: Ross Viljoen Date: Sun, 27 Feb 2022 03:13:35 +0000 Subject: [PATCH 08/15] Add tests --- test/Project.toml | 1 + test/pathwise_sampling.jl | 62 +++++++++++++++++++++++++++++++++++++++ test/runtests.jl | 1 + 3 files changed, 64 insertions(+) create mode 100644 test/pathwise_sampling.jl diff --git a/test/Project.toml b/test/Project.toml index 9326a1dc..bf1f4a5b 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -12,6 +12,7 @@ LogExpFunctions = "2ab3a3ac-af41-5b50-aa03-7779005ae688" Optim = "429524aa-4258-5aef-a3af-852621145aeb" PDMats = "90014a1f-27ba-587c-ab20-58faa44d9150" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +RandomFourierFeatures = "46bd3b77-1c99-43cd-804b-1e14830792b5" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" diff --git a/test/pathwise_sampling.jl b/test/pathwise_sampling.jl new file mode 100644 index 00000000..88808bff --- /dev/null +++ b/test/pathwise_sampling.jl @@ -0,0 +1,62 @@ +@testset "pathwise_sampling" begin + rng = MersenneTwister(1453) + + kernel = 0.1 * (SqExponentialKernel() ∘ ScaleTransform(0.2)) + Σy = 1e-6 + f = GP(kernel) + + input_dims = 2 + X = ColVecs(rand(rng, input_dims, 8)) + + fx = f(X, Σy) + y = rand(fx) + + Z = X[1:4] + fz = f(Z) + + X_test = ColVecs(rand(rng, input_dims, 3)) + + num_features = 10000 + rff_wsa = build_rff_weight_space_approx(rng, input_dims, num_features) + + num_samples = 1000 + + function test_single_sample_stats(ap, num_samples) + return test_stats(ap, [pathwise_sample(rng, ap, rff_wsa) for _ in 1:num_samples]) + end + + function test_multi_sample_stats(ap, num_samples) + return test_stats(ap, pathwise_sample(rng, ap, rff_wsa, num_samples)) + end + + function test_stats(ap, function_samples) + y_samples = reduce(hcat, map((f) -> f(X_test), function_samples)) + m_empirical = mean(y_samples; dims=2) + Σ_empirical = + (y_samples .- m_empirical) * (y_samples .- m_empirical)' ./ num_samples + + @test mean(ap(X_test)) ≈ m_empirical atol = 1e-3 rtol = 1e-3 + @test cov(ap(X_test)) ≈ Σ_empirical atol = 1e-3 rtol = 1e-3 + end + + @testset "Centered SVA" begin + q = ApproximateGPs._optimal_variational_posterior(Centered(), fz, fx, y) + ap = posterior(SparseVariationalApproximation(Centered(), fz, q)) + + test_single_sample_stats(ap, num_samples) + test_multi_sample_stats(ap, num_samples) + end + @testset "NonCentered SVA" begin + q = ApproximateGPs._optimal_variational_posterior(NonCentered(), fz, fx, y) + ap = posterior(SparseVariationalApproximation(NonCentered(), fz, q)) + + test_single_sample_stats(ap, num_samples) + test_multi_sample_stats(ap, num_samples) + end + @testset "VFE" begin + ap = posterior(VFE(fz), fx, y) + + test_single_sample_stats(ap, num_samples) + test_multi_sample_stats(ap, num_samples) + end +end diff --git a/test/runtests.jl b/test/runtests.jl index c7f3b214..929ea52b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2,6 +2,7 @@ using Random using Test using ApproximateGPs using ApproximateGPs: _optimal_variational_posterior +using RandomFourierFeatures using Flux using IterTools using AbstractGPs From 58df984232f5dc903f35581c6bc4781bf76c141a Mon Sep 17 00:00:00 2001 From: Ross Viljoen Date: Sun, 27 Feb 2022 03:26:41 +0000 Subject: [PATCH 09/15] Add docstring --- src/pathwise_sampling.jl | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/src/pathwise_sampling.jl b/src/pathwise_sampling.jl index 7c3ecdae..743b5b84 100644 --- a/src/pathwise_sampling.jl +++ b/src/pathwise_sampling.jl @@ -1,3 +1,20 @@ +@doc raw""" + pathwise_sample(rng::Random.AbstractRNG, f::ApproxPosteriorGP, weight_space_approx[, num_samples::Integer]) + +Efficiently samples a function from a sparse approximate posterior GP `f`. +Returns a function which can be evaluated at any input locations `X`. +`weight_space_approx` must be a function which takes a prior `AbstractGP` as an +argument and returns a `BayesianLinearRegressors.BasisFunctionRegressor`, +representing a weight space approximation to the prior of `f`. An example of +such a function can be constructed with +`RandomFourierFeatures.build_rff_weight_space_approx`. If `num_samples` is +supplied as an argument, returns a Vector of function samples. + +Details of the method can be found in [1]. + +[1] - Wilson, James, et al. "Efficiently sampling functions from Gaussian +process posteriors." International Conference on Machine Learning. PMLR, 2020. +""" function pathwise_sample(rng::Random.AbstractRNG, f::ApproxPosteriorGP, weight_space_approx) prior_approx = weight_space_approx(f.prior) prior_sample = rand(rng, prior_approx) @@ -38,8 +55,8 @@ function pathwise_sample( ] return posterior_samples end -function pathwise_sample(f::ApproxPosteriorGP, wsa, n::Integer) - return pathwise_sample(Random.GLOBAL_RNG, f, wsa, n) +function pathwise_sample(f::ApproxPosteriorGP, wsa, num_samples::Integer) + return pathwise_sample(Random.GLOBAL_RNG, f, wsa, num_samples) end # Methods to get the explicit variational distribution over inducing points q(u) From 8ee4ce76fc9a411f1f0bc0458cf6f24ebb31e194 Mon Sep 17 00:00:00 2001 From: Ross Viljoen Date: Sun, 27 Feb 2022 03:28:07 +0000 Subject: [PATCH 10/15] Formatting --- src/pathwise_sampling.jl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/pathwise_sampling.jl b/src/pathwise_sampling.jl index 743b5b84..50b1276b 100644 --- a/src/pathwise_sampling.jl +++ b/src/pathwise_sampling.jl @@ -7,8 +7,10 @@ Returns a function which can be evaluated at any input locations `X`. argument and returns a `BayesianLinearRegressors.BasisFunctionRegressor`, representing a weight space approximation to the prior of `f`. An example of such a function can be constructed with -`RandomFourierFeatures.build_rff_weight_space_approx`. If `num_samples` is -supplied as an argument, returns a Vector of function samples. +`RandomFourierFeatures.build_rff_weight_space_approx`. + +If `num_samples` is supplied as an argument, returns a Vector of function +samples. Details of the method can be found in [1]. From a1adca3066106dbc5ae0a13847d6a592c5df2c0e Mon Sep 17 00:00:00 2001 From: Ross Viljoen Date: Sun, 27 Feb 2022 03:45:27 +0000 Subject: [PATCH 11/15] Fix test --- test/sparse_variational.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/test/sparse_variational.jl b/test/sparse_variational.jl index c57f44f3..cf4de134 100644 --- a/test/sparse_variational.jl +++ b/test/sparse_variational.jl @@ -36,6 +36,7 @@ q_ε = _optimal_variational_posterior(NonCentered(), fz, fx, y) @testset "Check that q_ε has been properly constructed" begin + Cuu = cholesky(Symmetric(cov(fz))) @test mean(q_Centered) ≈ mean(fz) + Cuu.L * mean(q_ε) @test cov(q_Centered) ≈ Cuu.L * cov(q_ε) * Cuu.U end From 84657cc9c869f3cf249ea38f18f6b74d925744b1 Mon Sep 17 00:00:00 2001 From: Ross Viljoen Date: Fri, 11 Mar 2022 20:17:40 +0000 Subject: [PATCH 12/15] whitespace --- src/ApproximateGPs.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ApproximateGPs.jl b/src/ApproximateGPs.jl index daf9b532..8797857f 100644 --- a/src/ApproximateGPs.jl +++ b/src/ApproximateGPs.jl @@ -4,6 +4,7 @@ using Reexport @reexport using AbstractGPs @reexport using GPLikelihoods + include("API.jl") @reexport using .API: approx_lml From 920ef7dcf782aaed382922066dd327a283c66615 Mon Sep 17 00:00:00 2001 From: Ross Viljoen Date: Fri, 11 Mar 2022 21:12:37 +0000 Subject: [PATCH 13/15] Refactor to submodule --- docs/src/api/pathwisesampling.md | 6 +++ src/ApproximateGPs.jl | 3 ++ ..._sampling.jl => PathwiseSamplingModule.jl} | 49 +++++++------------ src/SparseVariationalApproximationModule.jl | 40 ++++++++++++++- ..._sampling.jl => PathwiseSamplingModule.jl} | 6 +-- test/runtests.jl | 12 +++-- 6 files changed, 75 insertions(+), 41 deletions(-) create mode 100644 docs/src/api/pathwisesampling.md rename src/{pathwise_sampling.jl => PathwiseSamplingModule.jl} (60%) rename test/{pathwise_sampling.jl => PathwiseSamplingModule.jl} (89%) diff --git a/docs/src/api/pathwisesampling.md b/docs/src/api/pathwisesampling.md new file mode 100644 index 00000000..93363656 --- /dev/null +++ b/docs/src/api/pathwisesampling.md @@ -0,0 +1,6 @@ +# Pathwise Posterior Sampling + +```@autodocs +Modules = [ApproximateGPs.PathwiseSamplingModule] +Private = false +``` diff --git a/src/ApproximateGPs.jl b/src/ApproximateGPs.jl index 8797857f..3df44631 100644 --- a/src/ApproximateGPs.jl +++ b/src/ApproximateGPs.jl @@ -21,6 +21,9 @@ include("LaplaceApproximationModule.jl") @reexport using .LaplaceApproximationModule: build_laplace_objective, build_laplace_objective! +include("PathwiseSamplingModule.jl") +@reexport using .PathwiseSamplingModule: pathwise_sample + include("deprecations.jl") end diff --git a/src/pathwise_sampling.jl b/src/PathwiseSamplingModule.jl similarity index 60% rename from src/pathwise_sampling.jl rename to src/PathwiseSamplingModule.jl index 50b1276b..fa3515c2 100644 --- a/src/pathwise_sampling.jl +++ b/src/PathwiseSamplingModule.jl @@ -1,3 +1,18 @@ +module PathwiseSamplingModule + +export pathwise_sample + +using Random + +using ..ApproximateGPs: _chol_cov +using ..SparseVariationalApproximationModule: + SparseVariationalApproximation, Centered, NonCentered, _get_q_u + +using AbstractGPs: + ApproxPosteriorGP, VFE, inducing_points, Xt_invA_X, Xt_A_X, inducing_points +using PDMats: chol_lower +using Distributions + @doc raw""" pathwise_sample(rng::Random.AbstractRNG, f::ApproxPosteriorGP, weight_space_approx[, num_samples::Integer]) @@ -39,8 +54,8 @@ function pathwise_sample( prior_approx = weight_space_approx(f.prior) prior_samples = rand(rng, prior_approx, num_samples) - z = AbstractGPs.inducing_points(f) - q_u = ApproximateGPs._get_q_u(f) + z = inducing_points(f) + q_u = _get_q_u(f) us = rand(rng, q_u, num_samples) @@ -61,34 +76,4 @@ function pathwise_sample(f::ApproxPosteriorGP, wsa, num_samples::Integer) return pathwise_sample(Random.GLOBAL_RNG, f, wsa, num_samples) end -# Methods to get the explicit variational distribution over inducing points q(u) -function _get_q_u(f::ApproxPosteriorGP{<:SparseVariationalApproximation{NonCentered}}) - # u = Lε + μ where LLᵀ = cov(fz) and μ = mean(fz) - # q(ε) = N(m, S) - # => q(u) = N(Lm + μ, LSLᵀ) - L, μ = chol_lower(_chol_cov(f.approx.fz)), mean(f.approx.fz) - m, S = mean(f.approx.q), _chol_cov(f.approx.q) - return MvNormal(L * m + μ, Xt_A_X(S, L')) -end -_get_q_u(f::ApproxPosteriorGP{<:SparseVariationalApproximation{Centered}}) = f.approx.q - -function _get_q_u(f::ApproxPosteriorGP{<:VFE}) - # q(u) = N(m, S) - # q(f_k) = N(μ_k, Σ_k) (the predictive distribution at test inputs k) - # μ_k = mean(k) + K_kz * K_zz⁻¹ * m - # where: K_kz = cov(f.prior, k, z) - # implemented as: μ_k = mean(k) + K_kz * α - # => m = K_zz * α - # Σ_k = K_kk - (K_kz * K_zz⁻¹ * K_zk) + (K_kz * K_zz⁻¹ * S * K_zz⁻¹ * K_zk) - # interested in the last term to get S - # implemented as: Aᵀ * Λ_ε⁻¹ * A - # where: A = U⁻ᵀ * K_zk - # UᵀU = K_zz - # so, Λ_ε⁻¹ = U⁻ᵀ * S * U - # => S = Uᵀ * Λ_ε⁻¹ * U - # see https://krasserm.github.io/2020/12/12/gaussian-processes-sparse/ eqns (8) & (9) - U = f.data.U - m = U'U * f.data.α - S = Xt_invA_X(f.data.Λ_ε, U) - return MvNormal(m, S) end diff --git a/src/SparseVariationalApproximationModule.jl b/src/SparseVariationalApproximationModule.jl index 203dafeb..613211df 100644 --- a/src/SparseVariationalApproximationModule.jl +++ b/src/SparseVariationalApproximationModule.jl @@ -19,10 +19,14 @@ using AbstractGPs: FiniteGP, LatentFiniteGP, ApproxPosteriorGP, + VFE, posterior, marginals, At_A, - diag_At_A + diag_At_A, + Xt_A_X, + Xt_invA_X, + inducing_points using GPLikelihoods: GaussianLikelihood export DefaultQuadrature, Analytic, GaussHermite, MonteCarlo @@ -374,6 +378,38 @@ function _prior_kl(sva::SparseVariationalApproximation{NonCentered}) return (trace_term + m_ε'm_ε - length(m_ε) - logdet(C_ε)) / 2 end +# Methods to get the explicit variational distribution over inducing points q(u) +function _get_q_u(f::ApproxPosteriorGP{<:SparseVariationalApproximation{NonCentered}}) + # u = Lε + μ where LLᵀ = cov(fz) and μ = mean(fz) + # q(ε) = N(m, S) + # => q(u) = N(Lm + μ, LSLᵀ) + L, μ = chol_lower(_chol_cov(f.approx.fz)), mean(f.approx.fz) + m, S = mean(f.approx.q), _chol_cov(f.approx.q) + return MvNormal(L * m + μ, Xt_A_X(S, L')) +end +_get_q_u(f::ApproxPosteriorGP{<:SparseVariationalApproximation{Centered}}) = f.approx.q + +function _get_q_u(f::ApproxPosteriorGP{<:AbstractGPs.VFE}) + # q(u) = N(m, S) + # q(f_k) = N(μ_k, Σ_k) (the predictive distribution at test inputs k) + # μ_k = mean(k) + K_kz * K_zz⁻¹ * m + # where: K_kz = cov(f.prior, k, z) + # implemented as: μ_k = mean(k) + K_kz * α + # => m = K_zz * α + # Σ_k = K_kk - (K_kz * K_zz⁻¹ * K_zk) + (K_kz * K_zz⁻¹ * S * K_zz⁻¹ * K_zk) + # interested in the last term to get S + # implemented as: Aᵀ * Λ_ε⁻¹ * A + # where: A = U⁻ᵀ * K_zk + # UᵀU = K_zz + # so, Λ_ε⁻¹ = U⁻ᵀ * S * U + # => S = Uᵀ * Λ_ε⁻¹ * U + # see https://krasserm.github.io/2020/12/12/gaussian-processes-sparse/ eqns (8) & (9) + U = f.data.U + m = U'U * f.data.α + S = Xt_invA_X(f.data.Λ_ε, U) + return MvNormal(m, S) +end + # Get the optimal closed form solution for the centered variational posterior q(u) function _optimal_variational_posterior(::Centered, fz, fx, y) fz.f.mean isa AbstractGPs.ZeroMean || @@ -384,6 +420,8 @@ end # Get the optimal closed form solution for the non-centered variational posterior q(ε) function _optimal_variational_posterior(::NonCentered, fz, fx, y) + fz.f.mean isa AbstractGPs.ZeroMean || + error("The exact posterior requires a GP with ZeroMean.") q_u = _optimal_variational_posterior(Centered(), fz, fx, y) Cuu = cholesky(Symmetric(cov(fz))) return MvNormal(Cuu.L \ (mean(q_u) - mean(fz)), Symmetric((Cuu.L \ cov(q_u)) / Cuu.U)) diff --git a/test/pathwise_sampling.jl b/test/PathwiseSamplingModule.jl similarity index 89% rename from test/pathwise_sampling.jl rename to test/PathwiseSamplingModule.jl index 88808bff..c751efb9 100644 --- a/test/pathwise_sampling.jl +++ b/test/PathwiseSamplingModule.jl @@ -40,21 +40,21 @@ end @testset "Centered SVA" begin - q = ApproximateGPs._optimal_variational_posterior(Centered(), fz, fx, y) + q = _optimal_variational_posterior(Centered(), fz, fx, y) ap = posterior(SparseVariationalApproximation(Centered(), fz, q)) test_single_sample_stats(ap, num_samples) test_multi_sample_stats(ap, num_samples) end @testset "NonCentered SVA" begin - q = ApproximateGPs._optimal_variational_posterior(NonCentered(), fz, fx, y) + q = _optimal_variational_posterior(NonCentered(), fz, fx, y) ap = posterior(SparseVariationalApproximation(NonCentered(), fz, q)) test_single_sample_stats(ap, num_samples) test_multi_sample_stats(ap, num_samples) end @testset "VFE" begin - ap = posterior(VFE(fz), fx, y) + ap = posterior(AbstractGPs.VFE(fz), fx, y) test_single_sample_stats(ap, num_samples) test_multi_sample_stats(ap, num_samples) diff --git a/test/runtests.jl b/test/runtests.jl index f9edd832..e03fc551 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,7 +1,5 @@ using Random using Test -using ApproximateGPs -using ApproximateGPs: _optimal_variational_posterior using RandomFourierFeatures using Flux using IterTools @@ -16,7 +14,11 @@ using Zygote using ChainRulesCore using ChainRulesTestUtils using FiniteDifferences -using ApproximateGPs: SparseVariationalApproximationModule, LaplaceApproximationModule +using ApproximateGPs +using ApproximateGPs: + SparseVariationalApproximationModule, + SparseVariationalApproximationModule._optimal_variational_posterior, + LaplaceApproximationModule # Writing tests: # 1. The file structure of the test should match precisely the file structure of src. @@ -58,9 +60,9 @@ include("test_utils.jl") include("sparse_variational.jl") println(" ") - @info "Ran svgp tests" + @info "Ran sparse variational tests" - include("pathwise_sampling.jl") + include("PathwiseSamplingModule.jl") println(" ") @info "Ran pathwise sampling tests" From 385421a0347b7657e2b16c92a7c07058b69d8ee7 Mon Sep 17 00:00:00 2001 From: Ross Viljoen Date: Fri, 11 Mar 2022 21:29:56 +0000 Subject: [PATCH 14/15] Closure -> struct --- src/PathwiseSamplingModule.jl | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/PathwiseSamplingModule.jl b/src/PathwiseSamplingModule.jl index fa3515c2..bde2592b 100644 --- a/src/PathwiseSamplingModule.jl +++ b/src/PathwiseSamplingModule.jl @@ -13,6 +13,15 @@ using AbstractGPs: using PDMats: chol_lower using Distributions +struct PosteriorSample{Tapprox<:ApproxPosteriorGP,Tprior,Tv} + approx_post::Tapprox + prior_sample::Tprior + v::Tv +end +function (s::PosteriorSample)(x::AbstractVector) + return s.prior_sample(x) + cov(s.approx_post, x, inducing_points(s.approx_post)) * s.v +end + @doc raw""" pathwise_sample(rng::Random.AbstractRNG, f::ApproxPosteriorGP, weight_space_approx[, num_samples::Integer]) @@ -42,9 +51,7 @@ function pathwise_sample(rng::Random.AbstractRNG, f::ApproxPosteriorGP, weight_s u = rand(rng, q_u) v = cov(f, z) \ (u - prior_sample(z)) - posterior_sample(x) = prior_sample(x) + cov(f, x, z) * v - - return posterior_sample + return PosteriorSample(f, prior_sample, v) end pathwise_sample(f::ApproxPosteriorGP, wsa) = pathwise_sample(Random.GLOBAL_RNG, f, wsa) @@ -61,14 +68,8 @@ function pathwise_sample( vs = cov(f, z) \ (us - reduce(hcat, map((s) -> s(z), prior_samples))) - function create_posterior_sample_fn(prior_sample, v) - function posterior_sample(x) - return prior_sample(x) + cov(f, x, z) * v - end - end - posterior_samples = [ - create_posterior_sample_fn(s, v) for (s, v) in zip(prior_samples, eachcol(vs)) + PosteriorSample(f, s, v) for (s, v) in zip(prior_samples, eachcol(vs)) ] return posterior_samples end From 41eb95eb8da2875751125c21c6acc002f4d0ccbe Mon Sep 17 00:00:00 2001 From: Ross Viljoen Date: Fri, 11 Mar 2022 21:39:56 +0000 Subject: [PATCH 15/15] Add comments --- src/PathwiseSamplingModule.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/PathwiseSamplingModule.jl b/src/PathwiseSamplingModule.jl index bde2592b..35562bf9 100644 --- a/src/PathwiseSamplingModule.jl +++ b/src/PathwiseSamplingModule.jl @@ -14,9 +14,9 @@ using PDMats: chol_lower using Distributions struct PosteriorSample{Tapprox<:ApproxPosteriorGP,Tprior,Tv} - approx_post::Tapprox - prior_sample::Tprior - v::Tv + approx_post::Tapprox # The approximate posterior GP from which this sample is taken + prior_sample::Tprior # A function sampled from the prior of `approx_post` + v::Tv # The term needed to compute the pathwise update to the prior sample end function (s::PosteriorSample)(x::AbstractVector) return s.prior_sample(x) + cov(s.approx_post, x, inducing_points(s.approx_post)) * s.v