diff --git a/Project.toml b/Project.toml index a3d13f6..d761f55 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "SliceSampling" uuid = "43f4d3e8-9711-4a8c-bd1b-03ac73a255cf" -version = "0.3.0" +version = "0.4.0" [deps] AbstractMCMC = "80f14c24-f653-4e6a-9b94-39d6b0f70001" diff --git a/README.md b/README.md index 8103694..0e97825 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,14 @@ This package implements slice sampling algorithms accessible through the `Abstra For general usage, please refer to [here](https://turinglang.org/SliceSampling.jl/dev/general/). ## Implemented Algorithms +### Univariate Slice Sampling Algorithms - Univariate slice sampling ([Slice](https://turinglang.org/SliceSampling.jl/dev/univariate_slice/)) algorithms with coordinate-wise Gibbs sampling by R. Neal [^N2003]. + +### Univariate-to-Multivariate Strategies +- Random Permutation coordinate-wise Gibbs sampling +- Hit-and-run sampling + +### Multivariate Slice Sampling Algorithms - Latent slice sampling ([LSS](https://turinglang.org/SliceSampling.jl/dev/latent_slice/)) by Li and Walker[^LW2023] - Gibbsian polar slice sampling ([GPSS](https://turinglang.org/SliceSampling.jl/dev/gibbs_polar/)) by P. Schär, M. Habeck, and D. Rudolf[^SHR2023]. @@ -26,7 +33,7 @@ using SliceSampling m ~ Normal(0, sqrt(s)) end -sampler = LatentSlice(2) +sampler = RandPermGibbs(SliceSteppingOut(2.)) n_samples = 10000 model = demo() sample(model, externalsampler(sampler), n_samples; initial_params=[1.0, 0.0]) diff --git a/docs/src/general.md b/docs/src/general.md index 89ff158..60aca2a 100644 --- a/docs/src/general.md +++ b/docs/src/general.md @@ -6,7 +6,7 @@ This package implements the `AbstractMCMC` [interface](https://github.com/Turing ## Drawing Samples From `Turing` Models `SliceSampling.jl` can be used to sample from [Turing](https://github.com/TuringLang/Turing.jl) models through `Turing`'s `externalsampler` interface. -See the following example of using the [latent slice sampler](@ref latent): +See the following example: ```@example turing using Distributions @@ -18,7 +18,7 @@ using SliceSampling m ~ Normal(0, sqrt(s)) end -sampler = LatentSlice(2) +sampler = RandPermGibbs(SliceSteppingOut(2.)) n_samples = 10000 model = demo() sample(model, externalsampler(sampler), n_samples; initial_params=[1.0, 0.0]) diff --git a/docs/src/univariate_slice.md b/docs/src/univariate_slice.md index 9a9f824..8aa298a 100644 --- a/docs/src/univariate_slice.md +++ b/docs/src/univariate_slice.md @@ -10,7 +10,6 @@ Since these algorithms are univariate, they are applied to each coordinate of th Furthermore, their computational efficiency drops as the number of dimensions increases. As such, univariate slice sampling algorithms are best applied to low-dimensional problems with a few tens of dimensions. - ## Fixed Initial Interval Slice Sampling This is the most basic form of univariate slice sampling, where the proposals are generated within a fixed interval formed by the `window`. @@ -36,4 +35,53 @@ SliceSteppingOut SliceDoublingOut ``` -[^N2003]: Neal, R. M. (2003). Slice sampling. The annals of statistics, 31(3), 705-767. +## Combining Univariate Samplers for Multivariate Targets +To use univariate slice sampling strategies on targets with more than on dimension, one has to embed them into a multivariate sampling scheme that relies on univariate sampling elements. +The two most popular approaches for this are Gibbs sampling[^GG1984] hit-and-run[^BRS1993]. + +### Random Permutation Gibbs Strategy +Gibbs sampling[^GG1984] is a simple strategy where we sample a coordinate at a time, conditioned on the values of all other coordinates. +That is, assuming the we sample directly from the target distribution $$\pi$$ over its coordinates $$x_1, \ldots, x_d$$, we are approximating the sampling process +```math +\begin{aligned} +x_1 &\sim \pi(x_1 \mid x_2, \ldots, x_d ) \\ +&\vdots \\ +x_i &\sim \pi(x_i \mid x_1, \ldots x_{-i} \ldots, x_d ) \\ +&\vdots \\ +x_d &\sim \pi(x_d \mid x_1, x_{d-1} ). +\end{aligned} +``` +In practice, one can pick the coordinates in arbitrary order as long as it does not depend on the state of the chain. +It is generally hard to know a-prior which ``scan order'' is best, but randomly picking coordinates tend to work well in general: + +```@docs +RandPermGibbs +``` + +### Hit-and-Run Strategy +Hit-and-run is a simple ``meta''-algorithm, where we sample over a random 1-dimensional projection of the space. +That is, at each iteration, we sample a random direction +```math + \theta \sim \operatorname{Uniform}(\mathbb{S}^{d-1}), +``` +and sample along the 1-dimensional subspace +``` +\begin{aligned} + \lambda &\sim p\left(\lambda \mid x_{n-1}, \theta \right) = \pi\left( x_{n-1} + \lambda \theta \right) \\ + x_{n} &= x_{n-1} + \lambda \theta +\end{aligned} +``` +Applying slice sampling for the 1-dimensional subproblem has been popularized by David Mackay[^M2003]. +Unlike Gibbs sampling, which only makes axis-aligned moves, hit-and-run can choose arbitrary directions, which could be helpful in some cases. + +```@docs +HitAndRun +``` + +Unlike `RandPermGibbs`, `HitAndRun` does not provide the option of using a unique `unislice` object for each coordinate. +This is a natural limitation of the hit-and-run sampler: it does not operate on individual coordinates. + +[^N2003]: Neal, R. M. (2003). Slice sampling. The Annals of Statistics, 31(3), 705-767. +[^GG1984]: Geman, S., & Geman, D. (1984). Stochastic relaxation, Gibbs distributions, and the Bayesian restoration of images. IEEE Transactions on Pattern Analysis and Machine Intelligence, (6). +[^BRS1993]: Bélisle, C. J., Romeijn, H. E., & Smith, R. L. (1993). Hit-and-run algorithms for generating multivariate distributions. Mathematics of Operations Research, 18(2), 255-266. +[^M2003]: MacKay, D. J. (2003). Information theory, inference and learning algorithms. Cambridge university press. diff --git a/src/SliceSampling.jl b/src/SliceSampling.jl index 91707a4..2cda31e 100644 --- a/src/SliceSampling.jl +++ b/src/SliceSampling.jl @@ -56,20 +56,30 @@ function exceeded_max_prop(max_prop::Int) "- There might be a bug in the sampler.") end -# Univariate Slice Sampling Algorithms +## Univariate Slice Sampling Algorithms export Slice, SliceSteppingOut, SliceDoublingOut -abstract type AbstractGibbsSliceSampling <: AbstractSliceSampling end +abstract type AbstractUnivariateSliceSampling <: AbstractSliceSampling end function accept_slice_proposal end function find_interval end -include("gibbs.jl") include("univariate.jl") include("steppingout.jl") include("doublingout.jl") + +## Multivariate slice sampling algorithms +abstract type AbstractMultivariateSliceSampling <: AbstractSliceSampling end + +# Univariate-to-Multivariate Strategies +export RandPermGibbs, HitAndRun + +include("gibbsgeneral.jl") +include("randpermgibbs.jl") +include("hitandrun.jl") + # Latent Slice Sampling export LatentSlice include("latent.jl") diff --git a/src/doublingout.jl b/src/doublingout.jl index 2ad55e8..1e6a8fc 100644 --- a/src/doublingout.jl +++ b/src/doublingout.jl @@ -5,23 +5,24 @@ Univariate slice sampling by automatically adapting the initial interval through the "doubling-out" procedure (Scheme 4 by Neal[^N2003]) # Arguments -- `window::Union{<:Real, <:AbstractVector}`: Proposal window. +- `window::Real`: Proposal window. # Keyword Arguments - `max_doubling_out`: Maximum number of "doubling outs" (default: 8). - `max_proposals::Int`: Maximum number of proposals allowed until throwing an error (default: `typemax(Int)`). """ -struct SliceDoublingOut{W <: Union{<:AbstractVector, <:Real}} <: AbstractGibbsSliceSampling +struct SliceDoublingOut{W <: Real} <: AbstractUnivariateSliceSampling window ::W max_doubling_out::Int max_proposals ::Int end function SliceDoublingOut( - window ::Union{<:AbstractVector, <:Real}; + window ::Real; max_doubling_out::Int = 8, max_proposals ::Int = typemax(Int), ) + @assert window > 0 SliceDoublingOut(window, max_doubling_out, max_proposals) end diff --git a/src/gibbs.jl b/src/gibbs.jl deleted file mode 100644 index a0d8edd..0000000 --- a/src/gibbs.jl +++ /dev/null @@ -1,63 +0,0 @@ - -struct GibbsSliceState{T <: Transition} - "Current [`Transition`](@ref)." - transition::T -end - -struct GibbsObjective{Model, Idx <: Integer, Vec <: AbstractVector} - model::Model - idx ::Idx - θ ::Vec -end - -function LogDensityProblems.logdensity(gibbs::GibbsObjective, θi) - @unpack model, idx, θ = gibbs - LogDensityProblems.logdensity(model, (@set θ[idx] = θi)) -end - -function AbstractMCMC.step(rng ::Random.AbstractRNG, - model ::AbstractMCMC.LogDensityModel, - sampler::AbstractGibbsSliceSampling; - initial_params = nothing, - kwargs...) - logdensitymodel = model.logdensity - d = LogDensityProblems.dimension(logdensitymodel) - if sampler.window isa AbstractVector - @assert length(sampler.window) == d - end - θ = isnothing(initial_params) ? initial_sample(rng, logdensitymodel) : initial_params - lp = LogDensityProblems.logdensity(logdensitymodel, θ) - t = Transition(θ, lp, NamedTuple()) - return t, GibbsSliceState(t) -end - -function AbstractMCMC.step( - rng ::Random.AbstractRNG, - model ::AbstractMCMC.LogDensityModel, - sampler::AbstractGibbsSliceSampling, - state ::GibbsSliceState; - kwargs..., -) - max_prop = sampler.max_proposals - logdensitymodel = model.logdensity - w = if sampler.window isa Real - Fill(sampler.window, LogDensityProblems.dimension(logdensitymodel)) - else - sampler.window - end - ℓp = state.transition.lp - θ = copy(state.transition.params) - @assert length(w) == length(θ) "window size does not match parameter size" - - n_props = zeros(Int, length(θ)) - for idx in shuffle(rng, 1:length(θ)) - model_gibbs = GibbsObjective(logdensitymodel, idx, θ) - θ′idx, ℓp, props = slice_sampling_univariate( - rng, sampler, model_gibbs, w[idx], ℓp, θ[idx], max_prop, - ) - n_props[idx] = props - θ[idx] = θ′idx - end - t = Transition(θ, ℓp, (num_proposals=n_props,)) - t, GibbsSliceState(t) -end diff --git a/src/gibbsgeneral.jl b/src/gibbsgeneral.jl new file mode 100644 index 0000000..6efdbac --- /dev/null +++ b/src/gibbsgeneral.jl @@ -0,0 +1,16 @@ + +struct GibbsState{T <: Transition} + "Current [`Transition`](@ref)." + transition::T +end + +struct GibbsTarget{Model, Idx <: Integer, Vec <: AbstractVector} + model::Model + idx ::Idx + θ ::Vec +end + +function LogDensityProblems.logdensity(gibbs::GibbsTarget, θi) + @unpack model, idx, θ = gibbs + LogDensityProblems.logdensity(model, (@set θ[idx] = θi)) +end diff --git a/src/hitandrun.jl b/src/hitandrun.jl new file mode 100644 index 0000000..8cc3584 --- /dev/null +++ b/src/hitandrun.jl @@ -0,0 +1,76 @@ + +""" + HitAndRun(unislice) + +Hit-and-run sampling strategy[^BRS1993]. +This applies `unislice` along a random direction uniform sampled from the sphere. + +# Arguments +- `unislice`: Univariate slice sampling algorithm. + +`unislice` can also be a vector of univeriate slice samplers, where each slice sampler is applied to each corresponding coordinate of the target posterior. +""" +struct HitAndRun{ + S <: AbstractUnivariateSliceSampling +} <: AbstractMultivariateSliceSampling + unislice::S +end + +struct HitAndRunState{T <: Transition} + "Current [`Transition`](@ref)." + transition::T +end + +struct HitAndRunTarget{Model, Vec <: AbstractVector} + model ::Model + direction::Vec + reference::Vec +end + +function LogDensityProblems.logdensity(target::HitAndRunTarget, λ) + @unpack model, reference, direction = target + LogDensityProblems.logdensity(model, reference + λ*direction) +end + +function AbstractMCMC.step(rng ::Random.AbstractRNG, + model ::AbstractMCMC.LogDensityModel, + sampler::HitAndRun; + initial_params = nothing, + kwargs...) + logdensitymodel = model.logdensity + θ = isnothing(initial_params) ? initial_sample(rng, logdensitymodel) : initial_params + d = length(initial_params) + @assert d ≥ 2 "Hit-and-Run works reliably only in dimension ≥2" + lp = LogDensityProblems.logdensity(logdensitymodel, θ) + t = Transition(θ, lp, NamedTuple()) + return t, HitAndRunState(t) +end + +function rand_uniform_unit_sphere(rng::Random.AbstractRNG, type::Type, d::Int) + x = randn(rng, type, d) + x / norm(x) +end + +function AbstractMCMC.step( + rng ::Random.AbstractRNG, + model ::AbstractMCMC.LogDensityModel, + sampler::HitAndRun, + state ::HitAndRunState; + kwargs..., +) + logdensitymodel = model.logdensity + ℓp = state.transition.lp + θ = copy(state.transition.params) + d = length(θ) + unislice = sampler.unislice + + direction = rand_uniform_unit_sphere(rng, eltype(θ), d) + hnrtarget = HitAndRunTarget(logdensitymodel, direction, θ) + λ = zero(eltype(θ)) + λ, ℓp, props = slice_sampling_univariate( + rng, unislice, hnrtarget, ℓp, λ + ) + θ′ = θ + direction*λ + t = Transition(θ′, ℓp, (num_proposals=props,)) + t, HitAndRunState(t) +end diff --git a/src/randpermgibbs.jl b/src/randpermgibbs.jl new file mode 100644 index 0000000..d9b629a --- /dev/null +++ b/src/randpermgibbs.jl @@ -0,0 +1,67 @@ + +""" + RandPermGibbs(unislice) + +Random permutation coordinate-wise Gibbs sampling strategy. +This applies `unislice` coordinate-wise in a random order. + +# Arguments +- `unislice`: Univariate slice sampling algorithm. + +`unislice` can also be a vector of univeriate slice samplers, where each slice sampler is applied to each corresponding coordinate of the target posterior. +""" +struct RandPermGibbs{ + S <: Union{ + <: AbstractUnivariateSliceSampling, + <: AbstractVector{<: AbstractUnivariateSliceSampling} + } +} <: AbstractMultivariateSliceSampling + unislice::S +end + +function AbstractMCMC.step(rng ::Random.AbstractRNG, + model ::AbstractMCMC.LogDensityModel, + sampler::RandPermGibbs; + initial_params = nothing, + kwargs...) + logdensitymodel = model.logdensity + θ = isnothing(initial_params) ? initial_sample(rng, logdensitymodel) : initial_params + d = length(θ) + if sampler.unislice isa AbstractVector + @assert length(sampler.unislice) == d "Number of slice samplers does not match the dimensionality of the initial parameter." + end + lp = LogDensityProblems.logdensity(logdensitymodel, θ) + t = Transition(θ, lp, NamedTuple()) + return t, GibbsState(t) +end + +function AbstractMCMC.step( + rng ::Random.AbstractRNG, + model ::AbstractMCMC.LogDensityModel, + sampler::RandPermGibbs, + state ::GibbsState; + kwargs..., +) + logdensitymodel = model.logdensity + ℓp = state.transition.lp + θ = copy(state.transition.params) + d = length(θ) + unislices = if sampler.unislice isa AbstractVector + sampler.unislice + else + Fill(sampler.unislice, d) + end + + props = zeros(Int, d) + for i in shuffle(rng, 1:d) + model_gibbs = GibbsTarget(logdensitymodel, i, θ) + unislice = unislices[i] + θ′_coord, ℓp, props_coord = slice_sampling_univariate( + rng, unislice, model_gibbs, ℓp, θ[i] + ) + props[i] = props_coord + θ[i] = θ′_coord + end + t = Transition(θ, ℓp, (num_proposals=props,)) + t, GibbsState(t) +end diff --git a/src/steppingout.jl b/src/steppingout.jl index fe4fd4a..0832df5 100644 --- a/src/steppingout.jl +++ b/src/steppingout.jl @@ -1,5 +1,4 @@ - """ SliceSteppingOut(window) SliceSteppingOut(window; max_stepping_out, max_proposals) @@ -7,23 +6,24 @@ Univariate slice sampling by automatically adapting the initial interval through the "stepping-out" procedure (Scheme 3 by Neal[^N2003]) # Arguments -- `window::Union{<:Real, <:AbstractVector}`: Proposal window. +- `window::Real`: Proposal window. # Keyword Arguments - `max_stepping_out::Int`: Maximum number of "stepping outs" (default: 32). - `max_proposals::Int`: Maximum number of proposals allowed until throwing an error (default: `typemax(Int)`). """ -struct SliceSteppingOut{W <: Union{<:AbstractVector, <:Real}} <: AbstractGibbsSliceSampling +struct SliceSteppingOut{W <: Real} <: AbstractUnivariateSliceSampling window ::W max_stepping_out::Int max_proposals ::Int end function SliceSteppingOut( - window ::Union{<:AbstractVector, <:Real}; + window ::Real; max_stepping_out::Int = 32, max_proposals ::Int = typemax(Int), ) + @assert window > 0 SliceSteppingOut(window, max_stepping_out, max_proposals) end diff --git a/src/univariate.jl b/src/univariate.jl index e89a4f1..f909dda 100644 --- a/src/univariate.jl +++ b/src/univariate.jl @@ -5,20 +5,21 @@ Univariate slice sampling with a fixed initial interval (Scheme 2 by Neal[^N2003]) # Arguments -- `window::Union{<:Real, <:AbstractVector}`: Proposal window. +- `window::Real`: Proposal window. # Keyword Arguments - `max_proposals::Int`: Maximum number of proposals allowed until throwing an error (default: `typemax(Int)`). """ -struct Slice{W <: Union{<:AbstractVector, <:Real}} <: AbstractGibbsSliceSampling +struct Slice{W <: Real} <: AbstractUnivariateSliceSampling window ::W max_proposals::Int end function Slice( - window ::Union{<:AbstractVector, <:Real}; + window ::Real; max_proposals::Int = typemax(Int), ) + @assert window > 0 Slice(window, max_proposals) end @@ -48,14 +49,13 @@ accept_slice_proposal( ) = true function slice_sampling_univariate( - rng ::Random.AbstractRNG, - alg ::AbstractSliceSampling, - model, - w ::Real, - ℓπ ::Real, - θ ::F, - max_prop::Int, + rng ::Random.AbstractRNG, + alg ::AbstractSliceSampling, + model, + ℓπ ::Real, + θ ::F, ) where {F <: Real} + w, max_prop = alg.window, alg.max_proposals ℓy = ℓπ - Random.randexp(rng, F) L, R, props = find_interval(rng, alg, model, w, ℓy, θ) diff --git a/test/runtests.jl b/test/runtests.jl index da93ecd..df71d07 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -73,14 +73,18 @@ end model = Model(1., 1., [0.]) @testset for sampler in [ # Vector-valued windows - Slice(fill(1, LogDensityProblems.dimension(model))), - SliceSteppingOut(fill(1, LogDensityProblems.dimension(model))), - SliceDoublingOut(fill(1, LogDensityProblems.dimension(model))), + RandPermGibbs(Slice.(fill(1, LogDensityProblems.dimension(model)))), + RandPermGibbs(SliceSteppingOut.(fill(1, LogDensityProblems.dimension(model)))), + RandPermGibbs(SliceDoublingOut.(fill(1, LogDensityProblems.dimension(model)))), # Scalar-valued windows - Slice(1), - SliceSteppingOut(1), - SliceDoublingOut(1), + RandPermGibbs(Slice(1)), + RandPermGibbs(SliceSteppingOut(1)), + RandPermGibbs(SliceDoublingOut(1)), + + HitAndRun(Slice(1)), + HitAndRun(SliceSteppingOut(1)), + HitAndRun(SliceDoublingOut(1)), # Latent slice sampling LatentSlice(5), @@ -151,9 +155,13 @@ end model = AbstractMCMC.LogDensityModel(WrongModel()) @testset for sampler in [ # Univariate slice samplers - Slice(1; max_proposals=32), - SliceSteppingOut(1; max_proposals=32), - SliceDoublingOut(1; max_proposals=32), + RandPermGibbs(Slice(1; max_proposals=32)), + RandPermGibbs(SliceSteppingOut(1; max_proposals=32)), + RandPermGibbs(SliceDoublingOut(1; max_proposals=32)), + + HitAndRun(Slice(1; max_proposals=32)), + HitAndRun(SliceSteppingOut(1; max_proposals=32)), + HitAndRun(SliceDoublingOut(1; max_proposals=32)), # Latent slice sampling LatentSlice(5; max_proposals=32), @@ -185,9 +193,12 @@ end model = demo() @testset for sampler in [ - Slice(1), - SliceSteppingOut(1), - SliceDoublingOut(1), + RandPermGibbs(Slice(1)), + RandPermGibbs(SliceSteppingOut(1)), + RandPermGibbs(SliceDoublingOut(1)), + HitAndRun(Slice(1)), + HitAndRun(SliceSteppingOut(1)), + HitAndRun(SliceDoublingOut(1)), LatentSlice(5), GibbsPolarSlice(5), ]