From 5f795811cdf68d2b8bf92f5fcc5e743ce4f1729e Mon Sep 17 00:00:00 2001
From: Ross Viljoen <ross@viljoen.co.uk>
Date: Sun, 27 Feb 2022 01:09:31 +0000
Subject: [PATCH 01/15] Pathwise sampling implementation

---
 Project.toml              |  1 +
 src/ApproximateGPs.jl     | 13 ++++++++++-
 src/pathwise_sampling.jl  | 46 +++++++++++++++++++++++++++++++++++++++
 src/sparse_variational.jl |  4 +++-
 4 files changed, 62 insertions(+), 2 deletions(-)
 create mode 100644 src/pathwise_sampling.jl

diff --git a/Project.toml b/Project.toml
index 8b13a35f..774221f3 100644
--- a/Project.toml
+++ b/Project.toml
@@ -14,6 +14,7 @@ GPLikelihoods = "6031954c-0455-49d7-b3b9-3e1c99afaf40"
 IrrationalConstants = "92d709cd-6900-40b7-9082-c6be49f344b6"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 PDMats = "90014a1f-27ba-587c-ab20-58faa44d9150"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
diff --git a/src/ApproximateGPs.jl b/src/ApproximateGPs.jl
index 20f46acf..65cd192b 100644
--- a/src/ApproximateGPs.jl
+++ b/src/ApproximateGPs.jl
@@ -5,6 +5,7 @@ using Reexport
 @reexport using GPLikelihoods
 using Distributions
 using LinearAlgebra
+using Random
 using Statistics
 using StatsBase
 using FastGaussQuadrature
@@ -14,7 +15,16 @@ using FillArrays
 using PDMats: chol_lower
 using IrrationalConstants: sqrt2, invsqrtπ
 
-using AbstractGPs: AbstractGP, FiniteGP, LatentFiniteGP, ApproxPosteriorGP, At_A, diag_At_A
+using AbstractGPs:
+    AbstractGP,
+    FiniteGP,
+    LatentFiniteGP,
+    ApproxPosteriorGP,
+    inducing_points,
+    At_A,
+    diag_At_A,
+    Xt_A_X,
+    Xt_invA_X
 
 include("utils.jl")
 
@@ -23,6 +33,7 @@ include("expected_loglik.jl")
 
 export SparseVariationalApproximation, Centered, NonCentered
 include("sparse_variational.jl")
+include("pathwise_sampling.jl")
 
 using ForwardDiff
 
diff --git a/src/pathwise_sampling.jl b/src/pathwise_sampling.jl
new file mode 100644
index 00000000..b221b0d7
--- /dev/null
+++ b/src/pathwise_sampling.jl
@@ -0,0 +1,46 @@
+function pathwise_sample(rng::Random.AbstractRNG, f::ApproxPosteriorGP, weight_space_approx) # TODO: num_samples
+    prior_approx = weight_space_approx(f.prior)
+    prior_sample = rand(rng, prior_approx)
+
+    z = inducing_points(f)
+    q_u = _get_q_u(f)
+
+    u = rand(rng, q_u)
+    v = cov(f, z) \ (u - prior_sample(z))
+
+    posterior_sample(x) = prior_sample(x) + cov(f, x, z) * v
+
+    return posterior_sample
+end
+
+# Methods to get the explicit variational distribution over inducing points q(u)
+function _get_q_u(f::ApproxPosteriorGP{<:SparseVariationalApproximation{NonCentered}})
+    # u = Lε + μ where LLᵀ = cov(fz) and μ = mean(fz)
+    # q(ε) = N(m, S)
+    # => q(u) = N(Lm + μ, LSLᵀ)
+    L, μ = chol_lower(_chol_cov(f.approx.fz)), mean(f.approx.fz)
+    m, S = mean(f.approx.q), _chol_cov(f.approx.q)
+    return MvNormal(L * m + μ, Xt_A_X(S, L'))
+end
+_get_q_u(f::ApproxPosteriorGP{<:SparseVariationalApproximation{Centered}}) = f.approx.q
+
+function _get_q_u(f::ApproxPosteriorGP{<:VFE})
+    # q(u) = N(m, S)
+    # q(f_k) = N(μ_k, Σ_k)  (the predictive distribution at test inputs k)
+    # μ_k = mean(k) + K_kz * K_zz⁻¹ * m
+    # where: K_kz = cov(f.prior, k, z)
+    # implemented as: μ_k = mean(k) + K_kz * α
+    # => m = K_zz * α
+    # Σ_k = K_kk - (K_kz * K_zz⁻¹ * K_zk) + (K_kz * K_zz⁻¹ * S * K_zz⁻¹ * K_zk)
+    # interested in the last term to get S
+    # implemented as: Aᵀ * Λ_ε⁻¹ * A
+    # where: A = U⁻ᵀ * K_zk
+    # UᵀU = K_zz
+    # so, Λ_ε⁻¹ = U⁻ᵀ * S * U
+    # => S = Uᵀ * Λ_ε⁻¹ * U
+    # see https://krasserm.github.io/2020/12/12/gaussian-processes-sparse/ eqns (8) & (9)
+    U = f.data.U
+    m = U'U * f.data.α
+    S = Xt_invA_X(f.data.Λ_ε, U)
+    return MvNormal(m, S)
+end
diff --git a/src/sparse_variational.jl b/src/sparse_variational.jl
index 531c3c02..937f5b91 100644
--- a/src/sparse_variational.jl
+++ b/src/sparse_variational.jl
@@ -233,7 +233,9 @@ end
 # Misc utility.
 #
 
-inducing_points(f::ApproxPosteriorGP{<:SparseVariationalApproximation}) = f.approx.fz.x
+function AbstractGPs.inducing_points(f::ApproxPosteriorGP{<:SparseVariationalApproximation})
+    return f.approx.fz.x
+end
 
 #
 # elbo

From 58dcf61f23eb7910dfef5085650d172566bfdb84 Mon Sep 17 00:00:00 2001
From: Ross Viljoen <ross@viljoen.co.uk>
Date: Sun, 27 Feb 2022 01:28:44 +0000
Subject: [PATCH 02/15] Multisample version

---
 src/pathwise_sampling.jl | 31 ++++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/src/pathwise_sampling.jl b/src/pathwise_sampling.jl
index b221b0d7..acc9bedf 100644
--- a/src/pathwise_sampling.jl
+++ b/src/pathwise_sampling.jl
@@ -1,4 +1,4 @@
-function pathwise_sample(rng::Random.AbstractRNG, f::ApproxPosteriorGP, weight_space_approx) # TODO: num_samples
+function pathwise_sample(rng::Random.AbstractRNG, f::ApproxPosteriorGP, weight_space_approx)
     prior_approx = weight_space_approx(f.prior)
     prior_sample = rand(rng, prior_approx)
 
@@ -12,6 +12,35 @@ function pathwise_sample(rng::Random.AbstractRNG, f::ApproxPosteriorGP, weight_s
 
     return posterior_sample
 end
+pathwise_sample(f::ApproxPosteriorGP, wsa) = pathwise_sample(Random.GLOBAL_RNG, f, wsa)
+
+function pathwise_sample(
+    rng::Random.AbstractRNG, f::ApproxPosteriorGP, weight_space_approx, num_samples::Integer
+)
+    prior_approx = weight_space_approx(f.prior)
+    prior_samples = rand(rng, prior_approx, num_samples)
+
+    z = AbstractGPs.inducing_points(f)
+    q_u = ApproximateGPs._get_q_u(f)
+
+    us = rand(rng, q_u, num_samples)
+
+    vs = cov(f, z) \ (us - reduce(hcat, map((s) -> s(z), prior_samples)))
+
+    function create_posterior_sample_fn(prior_sample, v)
+        function posterior_sample(x)
+            return prior_sample(x) + cov(f, x, z) * v
+        end
+    end
+
+    posterior_samples = [
+        create_posterior_sample_fn(s, v) for (s, v) in zip(prior_samples, eachrow(vs))
+    ]
+    return posterior_samples
+end
+function pathwise_sample(f::ApproxPosteriorGP, wsa, n::Integer)
+    return pathwise_sample(Random.GLOBAL_RNG, f, wsa, n)
+end
 
 # Methods to get the explicit variational distribution over inducing points q(u)
 function _get_q_u(f::ApproxPosteriorGP{<:SparseVariationalApproximation{NonCentered}})

From c1a07343001c009f634efd6a80094795736deabc Mon Sep 17 00:00:00 2001
From: Ross Viljoen <ross@viljoen.co.uk>
Date: Sun, 27 Feb 2022 01:37:50 +0000
Subject: [PATCH 03/15] Add example

---
 examples/d-pathwise-sampling/Project.toml |  8 +++
 examples/d-pathwise-sampling/script.jl    | 73 +++++++++++++++++++++++
 2 files changed, 81 insertions(+)
 create mode 100644 examples/d-pathwise-sampling/Project.toml
 create mode 100644 examples/d-pathwise-sampling/script.jl

diff --git a/examples/d-pathwise-sampling/Project.toml b/examples/d-pathwise-sampling/Project.toml
new file mode 100644
index 00000000..97a47f1c
--- /dev/null
+++ b/examples/d-pathwise-sampling/Project.toml
@@ -0,0 +1,8 @@
+[deps]
+AbstractGPs = "99985d1d-32ba-4be9-9821-2ec096f28918"
+ApproximateGPs = "298c2ebc-0411-48ad-af38-99e88101b606"
+Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+RandomFourierFeatures = "46bd3b77-1c99-43cd-804b-1e14830792b5"
diff --git a/examples/d-pathwise-sampling/script.jl b/examples/d-pathwise-sampling/script.jl
new file mode 100644
index 00000000..53d8a384
--- /dev/null
+++ b/examples/d-pathwise-sampling/script.jl
@@ -0,0 +1,73 @@
+# ## Setup
+
+using RandomFourierFeatures
+using ApproximateGPs
+using AbstractGPs
+using Distributions
+using LinearAlgebra
+using Random
+
+rng = MersenneTwister(1234)
+
+# Find the exact Titsias posterior (avoid optimisation)
+# TODO: this is already (and better) solved by `_get_q_u(f::ApproxPosteriorGP{<:VFE})`
+# Replace this (and the version in `test_utils`?)
+function optimal_variational_posterior(::Centered, fz, fx, y)
+    σ² = fx.Σy[1]
+    Kuf = cov(fz, fx)
+    Kuu = Symmetric(cov(fz))
+    Σ = (Symmetric(cov(fz) + (1 / σ²) * Kuf * Kuf'))
+    m = ((1 / σ²) * Kuu * (Σ \ Kuf)) * y
+    S = Symmetric(Kuu * (Σ \ Kuu))
+    return MvNormal(m, S)
+end
+
+function optimal_variational_posterior(::NonCentered, fz, fx, y)
+    q = optimal_variational_posterior(Centered(), fz, fx, y)
+    Cuu = cholesky(Symmetric(cov(fz)))
+    return MvNormal(Cuu.L \ (mean(q) - mean(fz)), Symmetric((Cuu.L \ cov(q)) / Cuu.U))
+end
+
+# Define a GP and generate some data
+
+k = 3 * (SqExponentialKernel() ∘ ScaleTransform(10))
+gp = GP(k)
+
+input_dims = 1
+
+x = ColVecs(rand(input_dims, 20))
+fx = gp(x, 0.01)
+y = rand(fx)
+
+z = x[1:8]
+fz = gp(z)
+
+# Any of the following will work:
+
+# q = optimal_variational_posterior(NonCentered(), fz, fx, y)
+# ap = posterior(SparseVariationalApproximation(NonCentered(), fz, q))
+
+# q = optimal_variational_posterior(Centered(), fz, fx, y)
+# ap = posterior(SparseVariationalApproximation(NonCentered(), fz, q))
+
+ap = posterior(VFE(fz), fx, y)
+
+x_test = ColVecs(sort(rand(input_dims, 500); dims=2))
+
+num_features = 1000
+rff_wsa = build_rff_weight_space_approx(rng, input_dims, num_features)
+
+function_samples = [ApproximateGPs.pathwise_sample(rng, ap, rff_wsa) for _ in 1:100]
+
+y_samples = reduce(hcat, map((f) -> f(x_test), function_samples))  # size(y_samples): (length(x_plot), n_samples)
+
+# Plot sampled functions against the exact posterior
+
+using Plots
+
+x_plot = x_test.X'
+
+plot(x_plot, y_samples; label="", color=:red, linealpha=0.2)
+plot!(vec(x_plot), ap; color=:blue, label="True posterior")
+scatter!(x.X', y; label="data")
+vline!(vec(z.X'); label="inducing points", color=:orange)

From c819fc2a2aa1607a5cd667cb0ea2d7449c2a2e8a Mon Sep 17 00:00:00 2001
From: Ross Viljoen <ross@viljoen.co.uk>
Date: Sun, 27 Feb 2022 02:03:20 +0000
Subject: [PATCH 04/15] Move `_optimal_variational_posterior` to `src`

---
 src/sparse_variational.jl  | 15 +++++++++++++++
 test/runtests.jl           |  5 +++++
 test/sparse_variational.jl | 17 ++++++-----------
 test/test_utils.jl         | 15 ---------------
 4 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/src/sparse_variational.jl b/src/sparse_variational.jl
index 937f5b91..dc95c365 100644
--- a/src/sparse_variational.jl
+++ b/src/sparse_variational.jl
@@ -343,3 +343,18 @@ function _prior_kl(sva::SparseVariationalApproximation{NonCentered})
 
     return (trace_term + m_ε'm_ε - length(m_ε) - logdet(C_ε)) / 2
 end
+
+# Get the optimal closed form solution for the centered variational posterior q(u)
+function _optimal_variational_posterior(::Centered, fz, fx, y)
+    fz.f.mean isa AbstractGPs.ZeroMean ||
+        error("The exact posterior requires a GP with ZeroMean.")
+    post = posterior(VFE(fz), fx, y)
+    return _get_q_u(post)
+end
+
+# Get the optimal closed form solution for the non-centered variational posterior q(ε)
+function _optimal_variational_posterior(::NonCentered, fz, fx, y)
+    q_u = _optimal_variational_posterior(Centered(), fz, fx, y)
+    Cuu = cholesky(Symmetric(cov(fz)))
+    return MvNormal(Cuu.L \ (mean(q_u) - mean(fz)), Symmetric((Cuu.L \ cov(q_u)) / Cuu.U))
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index 79861ef3..c7f3b214 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,6 +1,7 @@
 using Random
 using Test
 using ApproximateGPs
+using ApproximateGPs: _optimal_variational_posterior
 using Flux
 using IterTools
 using AbstractGPs
@@ -57,6 +58,10 @@ include("test_utils.jl")
     println(" ")
     @info "Ran svgp tests"
 
+    include("pathwise_sampling.jl")
+    println(" ")
+    @info "Ran pathwise sampling tests"
+
     include("laplace.jl")
     println(" ")
     @info "Ran laplace tests"
diff --git a/test/sparse_variational.jl b/test/sparse_variational.jl
index 43cc5898..c57f44f3 100644
--- a/test/sparse_variational.jl
+++ b/test/sparse_variational.jl
@@ -19,7 +19,7 @@
         fz = f(z, 1e-6)
 
         # Construct approximate posterior.
-        q_Centered = optimal_variational_posterior(fz, fx, y)
+        q_Centered = _optimal_variational_posterior(Centered(), fz, fx, y)
         approx_Centered = SparseVariationalApproximation(Centered(), fz, q_Centered)
         f_approx_post_Centered = posterior(approx_Centered)
 
@@ -32,17 +32,12 @@
         end
 
         @testset "NonCentered" begin
-
             # Construct optimal approximate posterior.
-            q = optimal_variational_posterior(fz, fx, y)
-            Cuu = cholesky(Symmetric(cov(fz)))
-            q_ε = MvNormal(
-                Cuu.L \ (mean(q) - mean(fz)), Symmetric((Cuu.L \ cov(q)) / Cuu.U)
-            )
+            q_ε = _optimal_variational_posterior(NonCentered(), fz, fx, y)
 
             @testset "Check that q_ε has been properly constructed" begin
-                @test mean(q) ≈ mean(fz) + Cuu.L * mean(q_ε)
-                @test cov(q) ≈ Cuu.L * cov(q_ε) * Cuu.U
+                @test mean(q_Centered) ≈ mean(fz) + Cuu.L * mean(q_ε)
+                @test cov(q_Centered) ≈ Cuu.L * cov(q_ε) * Cuu.U
             end
 
             # Construct equivalent approximate posteriors.
@@ -79,7 +74,7 @@
         f = GP(kernel)
         fx = f(x, 0.1)
         fz = f(z)
-        q_ex = optimal_variational_posterior(fz, fx, y)
+        q_ex = _optimal_variational_posterior(Centered(), fz, fx, y)
 
         sva = SparseVariationalApproximation(fz, q_ex)
         @test elbo(sva, fx, y) isa Real
@@ -115,7 +110,7 @@
             f = GP(kernel)
             fx = f(x, lik_noise)
             fz = f(z)
-            q_ex = optimal_variational_posterior(fz, fx, y)
+            q_ex = _optimal_variational_posterior(Centered(), fz, fx, y)
 
             gpr_post = posterior(fx, y) # Exact GP regression
             vfe_post = posterior(VFE(fz), fx, y) # Titsias posterior
diff --git a/test/test_utils.jl b/test/test_utils.jl
index 58bd666c..dae04a51 100644
--- a/test/test_utils.jl
+++ b/test/test_utils.jl
@@ -1,17 +1,2 @@
 # Create a default kernel from two parameters k[1] and k[2]
 make_kernel(k) = softplus(k[1]) * (SqExponentialKernel() ∘ ScaleTransform(softplus(k[2])))
-
-# Computes the optimal closed form solution for the variational posterior
-# q(u) (e.g. # https://krasserm.github.io/2020/12/12/gaussian-processes-sparse/
-# equations (11) & (12)). Assumes a ZeroMean function.
-function optimal_variational_posterior(fu, fx, y)
-    fu.f.mean isa AbstractGPs.ZeroMean ||
-        error("The exact posterior requires a GP with ZeroMean.")
-    σ² = fx.Σy[1]
-    Kuf = cov(fu, fx)
-    Kuu = Symmetric(cov(fu))
-    Σ = (Symmetric(cov(fu) + (1 / σ²) * Kuf * Kuf'))
-    m = ((1 / σ²) * Kuu * (Σ \ Kuf)) * y
-    S = Symmetric(Kuu * (Σ \ Kuu))
-    return MvNormal(m, S)
-end

From 184fe61e2a7786bc3a69ff88aa32f502100b3514 Mon Sep 17 00:00:00 2001
From: Ross Viljoen <ross@viljoen.co.uk>
Date: Sun, 27 Feb 2022 02:04:00 +0000
Subject: [PATCH 05/15] Update example

---
 examples/d-pathwise-sampling/script.jl | 23 ++---------------------
 1 file changed, 2 insertions(+), 21 deletions(-)

diff --git a/examples/d-pathwise-sampling/script.jl b/examples/d-pathwise-sampling/script.jl
index 53d8a384..561f64eb 100644
--- a/examples/d-pathwise-sampling/script.jl
+++ b/examples/d-pathwise-sampling/script.jl
@@ -9,25 +9,6 @@ using Random
 
 rng = MersenneTwister(1234)
 
-# Find the exact Titsias posterior (avoid optimisation)
-# TODO: this is already (and better) solved by `_get_q_u(f::ApproxPosteriorGP{<:VFE})`
-# Replace this (and the version in `test_utils`?)
-function optimal_variational_posterior(::Centered, fz, fx, y)
-    σ² = fx.Σy[1]
-    Kuf = cov(fz, fx)
-    Kuu = Symmetric(cov(fz))
-    Σ = (Symmetric(cov(fz) + (1 / σ²) * Kuf * Kuf'))
-    m = ((1 / σ²) * Kuu * (Σ \ Kuf)) * y
-    S = Symmetric(Kuu * (Σ \ Kuu))
-    return MvNormal(m, S)
-end
-
-function optimal_variational_posterior(::NonCentered, fz, fx, y)
-    q = optimal_variational_posterior(Centered(), fz, fx, y)
-    Cuu = cholesky(Symmetric(cov(fz)))
-    return MvNormal(Cuu.L \ (mean(q) - mean(fz)), Symmetric((Cuu.L \ cov(q)) / Cuu.U))
-end
-
 # Define a GP and generate some data
 
 k = 3 * (SqExponentialKernel() ∘ ScaleTransform(10))
@@ -44,10 +25,10 @@ fz = gp(z)
 
 # Any of the following will work:
 
-# q = optimal_variational_posterior(NonCentered(), fz, fx, y)
+# q = ApproximateGPs._optimal_variational_posterior(NonCentered(), fz, fx, y)
 # ap = posterior(SparseVariationalApproximation(NonCentered(), fz, q))
 
-# q = optimal_variational_posterior(Centered(), fz, fx, y)
+# q = ApproximateGPs._optimal_variational_posterior(Centered(), fz, fx, y)
 # ap = posterior(SparseVariationalApproximation(NonCentered(), fz, q))
 
 ap = posterior(VFE(fz), fx, y)

From 3827a21da5f2f19064115dd08f92b993d360fd9c Mon Sep 17 00:00:00 2001
From: Ross Viljoen <ross@viljoen.co.uk>
Date: Sun, 27 Feb 2022 02:09:13 +0000
Subject: [PATCH 06/15] Export pathwise_sample

---
 src/ApproximateGPs.jl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/ApproximateGPs.jl b/src/ApproximateGPs.jl
index 65cd192b..51ce19c2 100644
--- a/src/ApproximateGPs.jl
+++ b/src/ApproximateGPs.jl
@@ -33,6 +33,8 @@ include("expected_loglik.jl")
 
 export SparseVariationalApproximation, Centered, NonCentered
 include("sparse_variational.jl")
+
+export pathwise_sample
 include("pathwise_sampling.jl")
 
 using ForwardDiff

From 4b77b52b48ac0938f1b4de40e913fa3f8ec38b99 Mon Sep 17 00:00:00 2001
From: Ross Viljoen <ross@viljoen.co.uk>
Date: Sun, 27 Feb 2022 03:13:10 +0000
Subject: [PATCH 07/15] Fix multisample

---
 examples/d-pathwise-sampling/script.jl | 2 +-
 src/pathwise_sampling.jl               | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/d-pathwise-sampling/script.jl b/examples/d-pathwise-sampling/script.jl
index 561f64eb..bf050494 100644
--- a/examples/d-pathwise-sampling/script.jl
+++ b/examples/d-pathwise-sampling/script.jl
@@ -38,7 +38,7 @@ x_test = ColVecs(sort(rand(input_dims, 500); dims=2))
 num_features = 1000
 rff_wsa = build_rff_weight_space_approx(rng, input_dims, num_features)
 
-function_samples = [ApproximateGPs.pathwise_sample(rng, ap, rff_wsa) for _ in 1:100]
+function_samples = ApproximateGPs.pathwise_sample(rng, ap, rff_wsa, 100)
 
 y_samples = reduce(hcat, map((f) -> f(x_test), function_samples))  # size(y_samples): (length(x_plot), n_samples)
 
diff --git a/src/pathwise_sampling.jl b/src/pathwise_sampling.jl
index acc9bedf..7c3ecdae 100644
--- a/src/pathwise_sampling.jl
+++ b/src/pathwise_sampling.jl
@@ -34,7 +34,7 @@ function pathwise_sample(
     end
 
     posterior_samples = [
-        create_posterior_sample_fn(s, v) for (s, v) in zip(prior_samples, eachrow(vs))
+        create_posterior_sample_fn(s, v) for (s, v) in zip(prior_samples, eachcol(vs))
     ]
     return posterior_samples
 end

From af18236fdfcb62530e4336489a171e6b98c59096 Mon Sep 17 00:00:00 2001
From: Ross Viljoen <ross@viljoen.co.uk>
Date: Sun, 27 Feb 2022 03:13:35 +0000
Subject: [PATCH 08/15] Add tests

---
 test/Project.toml         |  1 +
 test/pathwise_sampling.jl | 62 +++++++++++++++++++++++++++++++++++++++
 test/runtests.jl          |  1 +
 3 files changed, 64 insertions(+)
 create mode 100644 test/pathwise_sampling.jl

diff --git a/test/Project.toml b/test/Project.toml
index 9326a1dc..bf1f4a5b 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -12,6 +12,7 @@ LogExpFunctions = "2ab3a3ac-af41-5b50-aa03-7779005ae688"
 Optim = "429524aa-4258-5aef-a3af-852621145aeb"
 PDMats = "90014a1f-27ba-587c-ab20-58faa44d9150"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+RandomFourierFeatures = "46bd3b77-1c99-43cd-804b-1e14830792b5"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
diff --git a/test/pathwise_sampling.jl b/test/pathwise_sampling.jl
new file mode 100644
index 00000000..88808bff
--- /dev/null
+++ b/test/pathwise_sampling.jl
@@ -0,0 +1,62 @@
+@testset "pathwise_sampling" begin
+    rng = MersenneTwister(1453)
+
+    kernel = 0.1 * (SqExponentialKernel() ∘ ScaleTransform(0.2))
+    Σy = 1e-6
+    f = GP(kernel)
+
+    input_dims = 2
+    X = ColVecs(rand(rng, input_dims, 8))
+
+    fx = f(X, Σy)
+    y = rand(fx)
+
+    Z = X[1:4]
+    fz = f(Z)
+
+    X_test = ColVecs(rand(rng, input_dims, 3))
+
+    num_features = 10000
+    rff_wsa = build_rff_weight_space_approx(rng, input_dims, num_features)
+
+    num_samples = 1000
+
+    function test_single_sample_stats(ap, num_samples)
+        return test_stats(ap, [pathwise_sample(rng, ap, rff_wsa) for _ in 1:num_samples])
+    end
+
+    function test_multi_sample_stats(ap, num_samples)
+        return test_stats(ap, pathwise_sample(rng, ap, rff_wsa, num_samples))
+    end
+
+    function test_stats(ap, function_samples)
+        y_samples = reduce(hcat, map((f) -> f(X_test), function_samples))
+        m_empirical = mean(y_samples; dims=2)
+        Σ_empirical =
+            (y_samples .- m_empirical) * (y_samples .- m_empirical)' ./ num_samples
+
+        @test mean(ap(X_test)) ≈ m_empirical atol = 1e-3 rtol = 1e-3
+        @test cov(ap(X_test)) ≈ Σ_empirical atol = 1e-3 rtol = 1e-3
+    end
+
+    @testset "Centered SVA" begin
+        q = ApproximateGPs._optimal_variational_posterior(Centered(), fz, fx, y)
+        ap = posterior(SparseVariationalApproximation(Centered(), fz, q))
+
+        test_single_sample_stats(ap, num_samples)
+        test_multi_sample_stats(ap, num_samples)
+    end
+    @testset "NonCentered SVA" begin
+        q = ApproximateGPs._optimal_variational_posterior(NonCentered(), fz, fx, y)
+        ap = posterior(SparseVariationalApproximation(NonCentered(), fz, q))
+
+        test_single_sample_stats(ap, num_samples)
+        test_multi_sample_stats(ap, num_samples)
+    end
+    @testset "VFE" begin
+        ap = posterior(VFE(fz), fx, y)
+
+        test_single_sample_stats(ap, num_samples)
+        test_multi_sample_stats(ap, num_samples)
+    end
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index c7f3b214..929ea52b 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -2,6 +2,7 @@ using Random
 using Test
 using ApproximateGPs
 using ApproximateGPs: _optimal_variational_posterior
+using RandomFourierFeatures
 using Flux
 using IterTools
 using AbstractGPs

From 58df984232f5dc903f35581c6bc4781bf76c141a Mon Sep 17 00:00:00 2001
From: Ross Viljoen <ross@viljoen.co.uk>
Date: Sun, 27 Feb 2022 03:26:41 +0000
Subject: [PATCH 09/15] Add docstring

---
 src/pathwise_sampling.jl | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/src/pathwise_sampling.jl b/src/pathwise_sampling.jl
index 7c3ecdae..743b5b84 100644
--- a/src/pathwise_sampling.jl
+++ b/src/pathwise_sampling.jl
@@ -1,3 +1,20 @@
+@doc raw"""
+    pathwise_sample(rng::Random.AbstractRNG, f::ApproxPosteriorGP, weight_space_approx[, num_samples::Integer])
+
+Efficiently samples a function from a sparse approximate posterior GP `f`.
+Returns a function which can be evaluated at any input locations `X`.
+`weight_space_approx` must be a function which takes a prior `AbstractGP` as an
+argument and returns a `BayesianLinearRegressors.BasisFunctionRegressor`,
+representing a weight space approximation to the prior of `f`. An example of
+such a function can be constructed with
+`RandomFourierFeatures.build_rff_weight_space_approx`. If `num_samples` is
+supplied as an argument, returns a Vector of function samples.
+
+Details of the method can be found in [1].
+
+[1] - Wilson, James, et al. "Efficiently sampling functions from Gaussian
+process posteriors." International Conference on Machine Learning. PMLR, 2020.
+"""
 function pathwise_sample(rng::Random.AbstractRNG, f::ApproxPosteriorGP, weight_space_approx)
     prior_approx = weight_space_approx(f.prior)
     prior_sample = rand(rng, prior_approx)
@@ -38,8 +55,8 @@ function pathwise_sample(
     ]
     return posterior_samples
 end
-function pathwise_sample(f::ApproxPosteriorGP, wsa, n::Integer)
-    return pathwise_sample(Random.GLOBAL_RNG, f, wsa, n)
+function pathwise_sample(f::ApproxPosteriorGP, wsa, num_samples::Integer)
+    return pathwise_sample(Random.GLOBAL_RNG, f, wsa, num_samples)
 end
 
 # Methods to get the explicit variational distribution over inducing points q(u)

From 8ee4ce76fc9a411f1f0bc0458cf6f24ebb31e194 Mon Sep 17 00:00:00 2001
From: Ross Viljoen <ross@viljoen.co.uk>
Date: Sun, 27 Feb 2022 03:28:07 +0000
Subject: [PATCH 10/15] Formatting

---
 src/pathwise_sampling.jl | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/pathwise_sampling.jl b/src/pathwise_sampling.jl
index 743b5b84..50b1276b 100644
--- a/src/pathwise_sampling.jl
+++ b/src/pathwise_sampling.jl
@@ -7,8 +7,10 @@ Returns a function which can be evaluated at any input locations `X`.
 argument and returns a `BayesianLinearRegressors.BasisFunctionRegressor`,
 representing a weight space approximation to the prior of `f`. An example of
 such a function can be constructed with
-`RandomFourierFeatures.build_rff_weight_space_approx`. If `num_samples` is
-supplied as an argument, returns a Vector of function samples.
+`RandomFourierFeatures.build_rff_weight_space_approx`.
+
+If `num_samples` is supplied as an argument, returns a Vector of function
+samples.
 
 Details of the method can be found in [1].
 

From a1adca3066106dbc5ae0a13847d6a592c5df2c0e Mon Sep 17 00:00:00 2001
From: Ross Viljoen <ross@viljoen.co.uk>
Date: Sun, 27 Feb 2022 03:45:27 +0000
Subject: [PATCH 11/15] Fix test

---
 test/sparse_variational.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/sparse_variational.jl b/test/sparse_variational.jl
index c57f44f3..cf4de134 100644
--- a/test/sparse_variational.jl
+++ b/test/sparse_variational.jl
@@ -36,6 +36,7 @@
             q_ε = _optimal_variational_posterior(NonCentered(), fz, fx, y)
 
             @testset "Check that q_ε has been properly constructed" begin
+                Cuu = cholesky(Symmetric(cov(fz)))
                 @test mean(q_Centered) ≈ mean(fz) + Cuu.L * mean(q_ε)
                 @test cov(q_Centered) ≈ Cuu.L * cov(q_ε) * Cuu.U
             end

From 84657cc9c869f3cf249ea38f18f6b74d925744b1 Mon Sep 17 00:00:00 2001
From: Ross Viljoen <ross@viljoen.co.uk>
Date: Fri, 11 Mar 2022 20:17:40 +0000
Subject: [PATCH 12/15] whitespace

---
 src/ApproximateGPs.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/ApproximateGPs.jl b/src/ApproximateGPs.jl
index daf9b532..8797857f 100644
--- a/src/ApproximateGPs.jl
+++ b/src/ApproximateGPs.jl
@@ -4,6 +4,7 @@ using Reexport
 
 @reexport using AbstractGPs
 @reexport using GPLikelihoods
+
 include("API.jl")
 @reexport using .API: approx_lml
 

From 920ef7dcf782aaed382922066dd327a283c66615 Mon Sep 17 00:00:00 2001
From: Ross Viljoen <ross@viljoen.co.uk>
Date: Fri, 11 Mar 2022 21:12:37 +0000
Subject: [PATCH 13/15] Refactor to submodule

---
 docs/src/api/pathwisesampling.md              |  6 +++
 src/ApproximateGPs.jl                         |  3 ++
 ..._sampling.jl => PathwiseSamplingModule.jl} | 49 +++++++------------
 src/SparseVariationalApproximationModule.jl   | 40 ++++++++++++++-
 ..._sampling.jl => PathwiseSamplingModule.jl} |  6 +--
 test/runtests.jl                              | 12 +++--
 6 files changed, 75 insertions(+), 41 deletions(-)
 create mode 100644 docs/src/api/pathwisesampling.md
 rename src/{pathwise_sampling.jl => PathwiseSamplingModule.jl} (60%)
 rename test/{pathwise_sampling.jl => PathwiseSamplingModule.jl} (89%)

diff --git a/docs/src/api/pathwisesampling.md b/docs/src/api/pathwisesampling.md
new file mode 100644
index 00000000..93363656
--- /dev/null
+++ b/docs/src/api/pathwisesampling.md
@@ -0,0 +1,6 @@
+# Pathwise Posterior Sampling
+
+```@autodocs
+Modules = [ApproximateGPs.PathwiseSamplingModule]
+Private = false
+```
diff --git a/src/ApproximateGPs.jl b/src/ApproximateGPs.jl
index 8797857f..3df44631 100644
--- a/src/ApproximateGPs.jl
+++ b/src/ApproximateGPs.jl
@@ -21,6 +21,9 @@ include("LaplaceApproximationModule.jl")
 @reexport using .LaplaceApproximationModule:
     build_laplace_objective, build_laplace_objective!
 
+include("PathwiseSamplingModule.jl")
+@reexport using .PathwiseSamplingModule: pathwise_sample
+
 include("deprecations.jl")
 
 end
diff --git a/src/pathwise_sampling.jl b/src/PathwiseSamplingModule.jl
similarity index 60%
rename from src/pathwise_sampling.jl
rename to src/PathwiseSamplingModule.jl
index 50b1276b..fa3515c2 100644
--- a/src/pathwise_sampling.jl
+++ b/src/PathwiseSamplingModule.jl
@@ -1,3 +1,18 @@
+module PathwiseSamplingModule
+
+export pathwise_sample
+
+using Random
+
+using ..ApproximateGPs: _chol_cov
+using ..SparseVariationalApproximationModule:
+    SparseVariationalApproximation, Centered, NonCentered, _get_q_u
+
+using AbstractGPs:
+    ApproxPosteriorGP, VFE, inducing_points, Xt_invA_X, Xt_A_X, inducing_points
+using PDMats: chol_lower
+using Distributions
+
 @doc raw"""
     pathwise_sample(rng::Random.AbstractRNG, f::ApproxPosteriorGP, weight_space_approx[, num_samples::Integer])
 
@@ -39,8 +54,8 @@ function pathwise_sample(
     prior_approx = weight_space_approx(f.prior)
     prior_samples = rand(rng, prior_approx, num_samples)
 
-    z = AbstractGPs.inducing_points(f)
-    q_u = ApproximateGPs._get_q_u(f)
+    z = inducing_points(f)
+    q_u = _get_q_u(f)
 
     us = rand(rng, q_u, num_samples)
 
@@ -61,34 +76,4 @@ function pathwise_sample(f::ApproxPosteriorGP, wsa, num_samples::Integer)
     return pathwise_sample(Random.GLOBAL_RNG, f, wsa, num_samples)
 end
 
-# Methods to get the explicit variational distribution over inducing points q(u)
-function _get_q_u(f::ApproxPosteriorGP{<:SparseVariationalApproximation{NonCentered}})
-    # u = Lε + μ where LLᵀ = cov(fz) and μ = mean(fz)
-    # q(ε) = N(m, S)
-    # => q(u) = N(Lm + μ, LSLᵀ)
-    L, μ = chol_lower(_chol_cov(f.approx.fz)), mean(f.approx.fz)
-    m, S = mean(f.approx.q), _chol_cov(f.approx.q)
-    return MvNormal(L * m + μ, Xt_A_X(S, L'))
-end
-_get_q_u(f::ApproxPosteriorGP{<:SparseVariationalApproximation{Centered}}) = f.approx.q
-
-function _get_q_u(f::ApproxPosteriorGP{<:VFE})
-    # q(u) = N(m, S)
-    # q(f_k) = N(μ_k, Σ_k)  (the predictive distribution at test inputs k)
-    # μ_k = mean(k) + K_kz * K_zz⁻¹ * m
-    # where: K_kz = cov(f.prior, k, z)
-    # implemented as: μ_k = mean(k) + K_kz * α
-    # => m = K_zz * α
-    # Σ_k = K_kk - (K_kz * K_zz⁻¹ * K_zk) + (K_kz * K_zz⁻¹ * S * K_zz⁻¹ * K_zk)
-    # interested in the last term to get S
-    # implemented as: Aᵀ * Λ_ε⁻¹ * A
-    # where: A = U⁻ᵀ * K_zk
-    # UᵀU = K_zz
-    # so, Λ_ε⁻¹ = U⁻ᵀ * S * U
-    # => S = Uᵀ * Λ_ε⁻¹ * U
-    # see https://krasserm.github.io/2020/12/12/gaussian-processes-sparse/ eqns (8) & (9)
-    U = f.data.U
-    m = U'U * f.data.α
-    S = Xt_invA_X(f.data.Λ_ε, U)
-    return MvNormal(m, S)
 end
diff --git a/src/SparseVariationalApproximationModule.jl b/src/SparseVariationalApproximationModule.jl
index 203dafeb..613211df 100644
--- a/src/SparseVariationalApproximationModule.jl
+++ b/src/SparseVariationalApproximationModule.jl
@@ -19,10 +19,14 @@ using AbstractGPs:
     FiniteGP,
     LatentFiniteGP,
     ApproxPosteriorGP,
+    VFE,
     posterior,
     marginals,
     At_A,
-    diag_At_A
+    diag_At_A,
+    Xt_A_X,
+    Xt_invA_X,
+    inducing_points
 using GPLikelihoods: GaussianLikelihood
 
 export DefaultQuadrature, Analytic, GaussHermite, MonteCarlo
@@ -374,6 +378,38 @@ function _prior_kl(sva::SparseVariationalApproximation{NonCentered})
     return (trace_term + m_ε'm_ε - length(m_ε) - logdet(C_ε)) / 2
 end
 
+# Methods to get the explicit variational distribution over inducing points q(u)
+function _get_q_u(f::ApproxPosteriorGP{<:SparseVariationalApproximation{NonCentered}})
+    # u = Lε + μ where LLᵀ = cov(fz) and μ = mean(fz)
+    # q(ε) = N(m, S)
+    # => q(u) = N(Lm + μ, LSLᵀ)
+    L, μ = chol_lower(_chol_cov(f.approx.fz)), mean(f.approx.fz)
+    m, S = mean(f.approx.q), _chol_cov(f.approx.q)
+    return MvNormal(L * m + μ, Xt_A_X(S, L'))
+end
+_get_q_u(f::ApproxPosteriorGP{<:SparseVariationalApproximation{Centered}}) = f.approx.q
+
+function _get_q_u(f::ApproxPosteriorGP{<:AbstractGPs.VFE})
+    # q(u) = N(m, S)
+    # q(f_k) = N(μ_k, Σ_k)  (the predictive distribution at test inputs k)
+    # μ_k = mean(k) + K_kz * K_zz⁻¹ * m
+    # where: K_kz = cov(f.prior, k, z)
+    # implemented as: μ_k = mean(k) + K_kz * α
+    # => m = K_zz * α
+    # Σ_k = K_kk - (K_kz * K_zz⁻¹ * K_zk) + (K_kz * K_zz⁻¹ * S * K_zz⁻¹ * K_zk)
+    # interested in the last term to get S
+    # implemented as: Aᵀ * Λ_ε⁻¹ * A
+    # where: A = U⁻ᵀ * K_zk
+    # UᵀU = K_zz
+    # so, Λ_ε⁻¹ = U⁻ᵀ * S * U
+    # => S = Uᵀ * Λ_ε⁻¹ * U
+    # see https://krasserm.github.io/2020/12/12/gaussian-processes-sparse/ eqns (8) & (9)
+    U = f.data.U
+    m = U'U * f.data.α
+    S = Xt_invA_X(f.data.Λ_ε, U)
+    return MvNormal(m, S)
+end
+
 # Get the optimal closed form solution for the centered variational posterior q(u)
 function _optimal_variational_posterior(::Centered, fz, fx, y)
     fz.f.mean isa AbstractGPs.ZeroMean ||
@@ -384,6 +420,8 @@ end
 
 # Get the optimal closed form solution for the non-centered variational posterior q(ε)
 function _optimal_variational_posterior(::NonCentered, fz, fx, y)
+    fz.f.mean isa AbstractGPs.ZeroMean ||
+        error("The exact posterior requires a GP with ZeroMean.")
     q_u = _optimal_variational_posterior(Centered(), fz, fx, y)
     Cuu = cholesky(Symmetric(cov(fz)))
     return MvNormal(Cuu.L \ (mean(q_u) - mean(fz)), Symmetric((Cuu.L \ cov(q_u)) / Cuu.U))
diff --git a/test/pathwise_sampling.jl b/test/PathwiseSamplingModule.jl
similarity index 89%
rename from test/pathwise_sampling.jl
rename to test/PathwiseSamplingModule.jl
index 88808bff..c751efb9 100644
--- a/test/pathwise_sampling.jl
+++ b/test/PathwiseSamplingModule.jl
@@ -40,21 +40,21 @@
     end
 
     @testset "Centered SVA" begin
-        q = ApproximateGPs._optimal_variational_posterior(Centered(), fz, fx, y)
+        q = _optimal_variational_posterior(Centered(), fz, fx, y)
         ap = posterior(SparseVariationalApproximation(Centered(), fz, q))
 
         test_single_sample_stats(ap, num_samples)
         test_multi_sample_stats(ap, num_samples)
     end
     @testset "NonCentered SVA" begin
-        q = ApproximateGPs._optimal_variational_posterior(NonCentered(), fz, fx, y)
+        q = _optimal_variational_posterior(NonCentered(), fz, fx, y)
         ap = posterior(SparseVariationalApproximation(NonCentered(), fz, q))
 
         test_single_sample_stats(ap, num_samples)
         test_multi_sample_stats(ap, num_samples)
     end
     @testset "VFE" begin
-        ap = posterior(VFE(fz), fx, y)
+        ap = posterior(AbstractGPs.VFE(fz), fx, y)
 
         test_single_sample_stats(ap, num_samples)
         test_multi_sample_stats(ap, num_samples)
diff --git a/test/runtests.jl b/test/runtests.jl
index f9edd832..e03fc551 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,7 +1,5 @@
 using Random
 using Test
-using ApproximateGPs
-using ApproximateGPs: _optimal_variational_posterior
 using RandomFourierFeatures
 using Flux
 using IterTools
@@ -16,7 +14,11 @@ using Zygote
 using ChainRulesCore
 using ChainRulesTestUtils
 using FiniteDifferences
-using ApproximateGPs: SparseVariationalApproximationModule, LaplaceApproximationModule
+using ApproximateGPs
+using ApproximateGPs:
+    SparseVariationalApproximationModule,
+    SparseVariationalApproximationModule._optimal_variational_posterior,
+    LaplaceApproximationModule
 
 # Writing tests:
 # 1. The file structure of the test should match precisely the file structure of src.
@@ -58,9 +60,9 @@ include("test_utils.jl")
 
     include("sparse_variational.jl")
     println(" ")
-    @info "Ran svgp tests"
+    @info "Ran sparse variational tests"
 
-    include("pathwise_sampling.jl")
+    include("PathwiseSamplingModule.jl")
     println(" ")
     @info "Ran pathwise sampling tests"
 

From 385421a0347b7657e2b16c92a7c07058b69d8ee7 Mon Sep 17 00:00:00 2001
From: Ross Viljoen <ross@viljoen.co.uk>
Date: Fri, 11 Mar 2022 21:29:56 +0000
Subject: [PATCH 14/15] Closure -> struct

---
 src/PathwiseSamplingModule.jl | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/src/PathwiseSamplingModule.jl b/src/PathwiseSamplingModule.jl
index fa3515c2..bde2592b 100644
--- a/src/PathwiseSamplingModule.jl
+++ b/src/PathwiseSamplingModule.jl
@@ -13,6 +13,15 @@ using AbstractGPs:
 using PDMats: chol_lower
 using Distributions
 
+struct PosteriorSample{Tapprox<:ApproxPosteriorGP,Tprior,Tv}
+    approx_post::Tapprox
+    prior_sample::Tprior
+    v::Tv
+end
+function (s::PosteriorSample)(x::AbstractVector)
+    return s.prior_sample(x) + cov(s.approx_post, x, inducing_points(s.approx_post)) * s.v
+end
+
 @doc raw"""
     pathwise_sample(rng::Random.AbstractRNG, f::ApproxPosteriorGP, weight_space_approx[, num_samples::Integer])
 
@@ -42,9 +51,7 @@ function pathwise_sample(rng::Random.AbstractRNG, f::ApproxPosteriorGP, weight_s
     u = rand(rng, q_u)
     v = cov(f, z) \ (u - prior_sample(z))
 
-    posterior_sample(x) = prior_sample(x) + cov(f, x, z) * v
-
-    return posterior_sample
+    return PosteriorSample(f, prior_sample, v)
 end
 pathwise_sample(f::ApproxPosteriorGP, wsa) = pathwise_sample(Random.GLOBAL_RNG, f, wsa)
 
@@ -61,14 +68,8 @@ function pathwise_sample(
 
     vs = cov(f, z) \ (us - reduce(hcat, map((s) -> s(z), prior_samples)))
 
-    function create_posterior_sample_fn(prior_sample, v)
-        function posterior_sample(x)
-            return prior_sample(x) + cov(f, x, z) * v
-        end
-    end
-
     posterior_samples = [
-        create_posterior_sample_fn(s, v) for (s, v) in zip(prior_samples, eachcol(vs))
+        PosteriorSample(f, s, v) for (s, v) in zip(prior_samples, eachcol(vs))
     ]
     return posterior_samples
 end

From 41eb95eb8da2875751125c21c6acc002f4d0ccbe Mon Sep 17 00:00:00 2001
From: Ross Viljoen <ross@viljoen.co.uk>
Date: Fri, 11 Mar 2022 21:39:56 +0000
Subject: [PATCH 15/15] Add comments

---
 src/PathwiseSamplingModule.jl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/PathwiseSamplingModule.jl b/src/PathwiseSamplingModule.jl
index bde2592b..35562bf9 100644
--- a/src/PathwiseSamplingModule.jl
+++ b/src/PathwiseSamplingModule.jl
@@ -14,9 +14,9 @@ using PDMats: chol_lower
 using Distributions
 
 struct PosteriorSample{Tapprox<:ApproxPosteriorGP,Tprior,Tv}
-    approx_post::Tapprox
-    prior_sample::Tprior
-    v::Tv
+    approx_post::Tapprox  # The approximate posterior GP from which this sample is taken
+    prior_sample::Tprior  # A function sampled from the prior of `approx_post`
+    v::Tv  # The term needed to compute the pathwise update to the prior sample
 end
 function (s::PosteriorSample)(x::AbstractVector)
     return s.prior_sample(x) + cov(s.approx_post, x, inducing_points(s.approx_post)) * s.v