From 06364436f751a939c4f031eb80d07f23734c768b Mon Sep 17 00:00:00 2001 From: Ray Kim Date: Sun, 26 May 2024 02:37:49 +0100 Subject: [PATCH 01/12] add limit on maximum number of proposals --- src/SliceSampling.jl | 9 ++++++++- src/doublingout.jl | 21 +++++++++++++++------ src/gibbs.jl | 11 ++++++----- src/latent.jl | 17 ++++++++++++++--- src/steppingout.jl | 20 +++++++++++++++----- src/univariate.jl | 37 ++++++++++++++++++++++--------------- test/runtests.jl | 37 ++++++++++++++++++++++++++++++++++++- 7 files changed, 116 insertions(+), 36 deletions(-) diff --git a/src/SliceSampling.jl b/src/SliceSampling.jl index 2ce4ecd..a960bbd 100644 --- a/src/SliceSampling.jl +++ b/src/SliceSampling.jl @@ -5,6 +5,7 @@ using AbstractMCMC using Accessors using Distributions using FillArrays +using LinearAlgebra using LogDensityProblems using SimpleUnPack using Random @@ -48,6 +49,13 @@ Return the initial sample for the `model` using the random number generator `rng """ function initial_sample end +function exceeded_max_prop(max_prop::Int) + error("Exceeded maximum number of proposal $(max_prop).\n", + "Here are possible causes:\n", + "- The model might be broken or pathologic.", + "- There might be a bug in the sampler.") +end + # Univariate Slice Sampling Algorithms export Slice, SliceSteppingOut, SliceDoublingOut @@ -64,7 +72,6 @@ include("doublingout.jl") # Latent Slice Sampling export LatentSlice - include("latent.jl") # Turing Compatibility diff --git a/src/doublingout.jl b/src/doublingout.jl index 5243a99..2ad55e8 100644 --- a/src/doublingout.jl +++ b/src/doublingout.jl @@ -1,20 +1,29 @@ """ - SliceDoublingOut(max_doubling_out, window) - SliceDoublingOut(window) + SliceDoublingOut(window; max_doubling_out, max_proposals) Univariate slice sampling by automatically adapting the initial interval through the "doubling-out" procedure (Scheme 4 by Neal[^N2003]) -# Fields -- `max_doubling_out`: Maximum number of "doubling outs" (default: 8). +# Arguments - `window::Union{<:Real, <:AbstractVector}`: Proposal window. + +# Keyword Arguments +- `max_doubling_out`: Maximum number of "doubling outs" (default: 8). +- `max_proposals::Int`: Maximum number of proposals allowed until throwing an error (default: `typemax(Int)`). """ struct SliceDoublingOut{W <: Union{<:AbstractVector, <:Real}} <: AbstractGibbsSliceSampling - max_doubling_out::Int window ::W + max_doubling_out::Int + max_proposals ::Int end -SliceDoublingOut(window::Union{<:AbstractVector, <:Real}) = SliceDoublingOut(8, window) +function SliceDoublingOut( + window ::Union{<:AbstractVector, <:Real}; + max_doubling_out::Int = 8, + max_proposals ::Int = typemax(Int), +) + SliceDoublingOut(window, max_doubling_out, max_proposals) +end function find_interval( rng ::Random.AbstractRNG, diff --git a/src/gibbs.jl b/src/gibbs.jl index f142767..a0d8edd 100644 --- a/src/gibbs.jl +++ b/src/gibbs.jl @@ -38,6 +38,7 @@ function AbstractMCMC.step( state ::GibbsSliceState; kwargs..., ) + max_prop = sampler.max_proposals logdensitymodel = model.logdensity w = if sampler.window isa Real Fill(sampler.window, LogDensityProblems.dimension(logdensitymodel)) @@ -48,15 +49,15 @@ function AbstractMCMC.step( θ = copy(state.transition.params) @assert length(w) == length(θ) "window size does not match parameter size" - total_props = 0 + n_props = zeros(Int, length(θ)) for idx in shuffle(rng, 1:length(θ)) model_gibbs = GibbsObjective(logdensitymodel, idx, θ) θ′idx, ℓp, props = slice_sampling_univariate( - rng, sampler, model_gibbs, w[idx], ℓp, θ[idx] + rng, sampler, model_gibbs, w[idx], ℓp, θ[idx], max_prop, ) - total_props += props - θ[idx] = θ′idx + n_props[idx] = props + θ[idx] = θ′idx end - t = Transition(θ, ℓp, (num_proposals=total_props,)) + t = Transition(θ, ℓp, (num_proposals=n_props,)) t, GibbsSliceState(t) end diff --git a/src/latent.jl b/src/latent.jl index e488020..8725354 100644 --- a/src/latent.jl +++ b/src/latent.jl @@ -4,13 +4,19 @@ Latent slice sampling algorithm by Li and Walker[^LW2023]. -# Fields +# Arguments - `beta::Real`: Beta parameter of the Gamma distribution of the auxiliary variables. + +# Keyword Arguments +- `max_proposals::Int`: Maximum number of proposals allowed until throwing an error (default: `typemax(Int)`). """ struct LatentSlice{B <: Real} <: AbstractSliceSampling - beta::B + beta ::B + max_proposals::Int end +LatentSlice(beta::Real; max_proposals::Int = typemax(Int)) = LatentSlice(beta, max_proposals) + struct LatentSliceState{T <: Transition, S <: AbstractVector} "Current [`Transition`](@ref)." transition ::T @@ -42,6 +48,7 @@ function AbstractMCMC.step( kwargs..., ) logdensitymodel = model.logdensity + max_proposals = sampler.max_proposals β = sampler.beta ℓp = state.transition.lp @@ -69,6 +76,10 @@ function AbstractMCMC.step( break end + if props > max_proposals + exceeded_max_prop(max_proposals) + end + @inbounds for i = 1:d if ystar[i] < y[i] a[i] = ystar[i] @@ -78,6 +89,6 @@ function AbstractMCMC.step( end end s = β*randexp(rng, eltype(y), d) + 2*abs.(l - y) - t = Transition(y, ℓp, NamedTuple()) + t = Transition(y, ℓp, (num_proposals = props,)) t, LatentSliceState(t, s) end diff --git a/src/steppingout.jl b/src/steppingout.jl index 35d882b..fe4fd4a 100644 --- a/src/steppingout.jl +++ b/src/steppingout.jl @@ -1,21 +1,31 @@ """ - SliceSteppingOut(max_stepping_out, window) SliceSteppingOut(window) + SliceSteppingOut(window; max_stepping_out, max_proposals) Univariate slice sampling by automatically adapting the initial interval through the "stepping-out" procedure (Scheme 3 by Neal[^N2003]) -# Fields -- `max_stepping_out`: Maximum number of "stepping outs" (default: 32). +# Arguments - `window::Union{<:Real, <:AbstractVector}`: Proposal window. + +# Keyword Arguments +- `max_stepping_out::Int`: Maximum number of "stepping outs" (default: 32). +- `max_proposals::Int`: Maximum number of proposals allowed until throwing an error (default: `typemax(Int)`). """ struct SliceSteppingOut{W <: Union{<:AbstractVector, <:Real}} <: AbstractGibbsSliceSampling - max_stepping_out::Int window ::W + max_stepping_out::Int + max_proposals ::Int end -SliceSteppingOut(window::Union{<:AbstractVector, <:Real}) = SliceSteppingOut(32, window) +function SliceSteppingOut( + window ::Union{<:AbstractVector, <:Real}; + max_stepping_out::Int = 32, + max_proposals ::Int = typemax(Int), +) + SliceSteppingOut(window, max_stepping_out, max_proposals) +end function find_interval( rng ::Random.AbstractRNG, diff --git a/src/univariate.jl b/src/univariate.jl index c1547fc..e89a4f1 100644 --- a/src/univariate.jl +++ b/src/univariate.jl @@ -1,14 +1,25 @@ """ - Slice(window) + Slice(window; max_proposals) Univariate slice sampling with a fixed initial interval (Scheme 2 by Neal[^N2003]) -# Fields +# Arguments - `window::Union{<:Real, <:AbstractVector}`: Proposal window. + +# Keyword Arguments +- `max_proposals::Int`: Maximum number of proposals allowed until throwing an error (default: `typemax(Int)`). """ struct Slice{W <: Union{<:AbstractVector, <:Real}} <: AbstractGibbsSliceSampling - window::W + window ::W + max_proposals::Int +end + +function Slice( + window ::Union{<:AbstractVector, <:Real}; + max_proposals::Int = typemax(Int), +) + Slice(window, max_proposals) end function find_interval( @@ -37,23 +48,18 @@ accept_slice_proposal( ) = true function slice_sampling_univariate( - rng ::Random.AbstractRNG, - alg ::AbstractSliceSampling, + rng ::Random.AbstractRNG, + alg ::AbstractSliceSampling, model, - w ::Real, - ℓπ ::Real, - θ ::F, + w ::Real, + ℓπ ::Real, + θ ::F, + max_prop::Int, ) where {F <: Real} ℓy = ℓπ - Random.randexp(rng, F) L, R, props = find_interval(rng, alg, model, w, ℓy, θ) - # if eltype(θ) == Float32 - # @assert eltype(L) == Float32 - # @assert eltype(R) == Float32 - # @assert typeof(ℓy) == Float32 - # end - - while true + for _ in 1:max_prop U = rand(rng, F) θ′ = L + U*(R - L) ℓπ′ = LogDensityProblems.logdensity(model, θ′) @@ -68,5 +74,6 @@ function slice_sampling_univariate( R = θ′ end end + exceeded_max_prop(max_prop) end diff --git a/test/runtests.jl b/test/runtests.jl index e790467..2778d7c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -69,7 +69,7 @@ function LogDensityProblems.dimension(model::Model) 2 end -@testset "slice sampling" begin +@testset "sampling" begin model = Model(1., 1., [0.]) @testset for sampler in [ # Vector-valued windows @@ -132,6 +132,41 @@ end end end +struct WrongModel end + +LogDensityProblems.logdensity(::WrongModel, θ) = -Inf + +function LogDensityProblems.capabilities(::Type{<:WrongModel}) + LogDensityProblems.LogDensityOrder{0}() +end + +function LogDensityProblems.dimension(::WrongModel) + 1 +end + +@testset "error handling" begin + model = AbstractMCMC.LogDensityModel(WrongModel()) + @testset for sampler in [ + # Univariate slice samplers + Slice(1; max_proposals=32), + SliceSteppingOut(1; max_proposals=32), + SliceDoublingOut(1; max_proposals=32), + + # Latent slice sampling + LatentSlice(5; max_proposals=32), + ] + @testset "max proposal error" begin + rng = Random.default_rng() + θ = [1.,] + _, init_state = AbstractMCMC.step(rng, model, sampler; initial_params=copy(θ)) + + @test_throws "maximum number" begin + _, _ = AbstractMCMC.step(rng, model, sampler, init_state) + end + end + end +end + @testset "turing compatibility" begin @model function demo() s ~ InverseGamma(2, 3) From 7009745c16eba6f1fe4060cd7c51f827719a5bce Mon Sep 17 00:00:00 2001 From: Ray Kim Date: Sun, 26 May 2024 03:21:00 +0100 Subject: [PATCH 02/12] add Gibbsian polar slice sampling --- Project.toml | 2 + src/SliceSampling.jl | 4 + src/gibbspolar.jl | 181 +++++++++++++++++++++++++++++++++++++++++++ test/runtests.jl | 6 ++ 4 files changed, 193 insertions(+) create mode 100644 src/gibbspolar.jl diff --git a/Project.toml b/Project.toml index f2e8391..0cafd25 100644 --- a/Project.toml +++ b/Project.toml @@ -7,6 +7,7 @@ AbstractMCMC = "80f14c24-f653-4e6a-9b94-39d6b0f70001" Accessors = "7d9f7c33-5ae7-4f3b-8dc6-eff91059b697" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Requires = "ae029012-a4dd-5104-9daa-d747884805df" @@ -23,6 +24,7 @@ AbstractMCMC = "4, 5" Accessors = "0.1" Distributions = "0.25" FillArrays = "1" +LinearAlgebra = "1" LogDensityProblems = "2" Random = "1" Requires = "1" diff --git a/src/SliceSampling.jl b/src/SliceSampling.jl index a960bbd..8dd60f6 100644 --- a/src/SliceSampling.jl +++ b/src/SliceSampling.jl @@ -74,6 +74,10 @@ include("doublingout.jl") export LatentSlice include("latent.jl") +# Gibbsian Polar Slice Sampling +export GibbsPolarSlice +include("gibbspolar.jl") + # Turing Compatibility if !isdefined(Base, :get_extension) diff --git a/src/gibbspolar.jl b/src/gibbspolar.jl new file mode 100644 index 0000000..cdbcb9c --- /dev/null +++ b/src/gibbspolar.jl @@ -0,0 +1,181 @@ + +""" + GibbsPolarSlice(w; max_proposals) + +Gibbsian polar slice sampling algorithm by P. Schär, M. Habeck, and D. Rudolf [^SHR2023]. + +# Arguments +- `w::Real`: Initial window size for the radius shrinkage procedure. + +# Keyword Arguments +- `w::Real`: Initial window size for the radius shrinkage procedure +- `max_proposals::Int`: Maximum number of proposals allowed until throwing an error (default: `typemax(Int)`). +""" +struct GibbsPolarSlice{W <: Real} <: AbstractSliceSampling + w::W + max_proposals::Int +end + +GibbsPolarSlice(w::Real; max_proposals::Int = typemax(Int)) = GibbsPolarSlice(w, max_proposals) + +struct GibbsPolarSliceState{T <: Transition, R <: Real, D <: AbstractVector} + "Current [`Transition`](@ref)." + transition ::T + + "direction (\$\\theta\$ in the original paper[^SHR2023])" + direction::D + + "radius (\$r\$ in the original paper[^SHR2023])" + radius::R +end + +struct GibbsPolarSliceTarget{M} + model::M +end + +function logdensity(target::GibbsPolarSliceTarget, x) + d = length(x) + (d-1)*log(norm(x)) + LogDensityProblems.logdensity(target.model, x) +end + +function AbstractMCMC.step(rng ::Random.AbstractRNG, + model ::AbstractMCMC.LogDensityModel, + sampler::GibbsPolarSlice; + initial_params = nothing, + kwargs...) + logdensitymodel = model.logdensity + x = initial_params === nothing ? initial_sample(rng, model) : initial_params + r = norm(x) + θ = x / r + ℓp = LogDensityProblems.logdensity(logdensitymodel, x) + t = Transition(x, ℓp, NamedTuple()) + return t, GibbsPolarSliceState(t, θ, r) +end + +function rand_subsphere(rng::Random.AbstractRNG, θ::AbstractVector) + d = length(θ) + V1 = randn(rng, eltype(θ), d) + V2 = V1 - dot(θ, V1)*θ + V2 / norm(V2) +end + +function geodesic_shrinkage( + rng ::Random.AbstractRNG, + ϱ1 ::GibbsPolarSliceTarget, + ℓT ::F, + θ ::AbstractVector{F}, + r ::F, + max_prop::Int +) where {F <: Real} + y = rand_subsphere(rng, θ) + ω_max = convert(F, 2π)*rand(rng, F) + ω_min = ω_max - convert(F, 2π) + + for n_props in 1:max_prop + # `Uniform` had a type instability issue: + # https://github.com/JuliaStats/Distributions.jl/pull/1860 + # ω = rand(rng, Uniform(ω_min, ω_max)) + ω = ω_min + (ω_max - ω_min)*rand(rng, F) + θ′ = θ*cos(ω) + y*sin(ω) + + if logdensity(ϱ1, r*θ′) > ℓT + return θ′, n_props + end + + if ω < 0 + ω_min = ω + else + ω_max = ω + end + end + exceeded_max_prop(max_prop) +end + +function radius_shrinkage( + rng ::Random.AbstractRNG, + ϱ1 ::GibbsPolarSliceTarget, + ℓT ::F, + θ ::AbstractVector{F}, + r ::F, + w ::Real, + max_prop::Int +) where {F <: Real} + u = rand(rng, F) + w = convert(F, w) + r_min = max(r - u*w, 0) + r_max = r + (1 - u)*w + n_props_total = 0 + + n_props = 0 + while (r_min > 0) && logdensity(ϱ1, r_min*θ) > ℓT + r_min = max(r_min - w, 0) + + n_props += 1 + if n_props > max_prop + exceeded_max_prop(max_prop) + end + end + n_props_total += n_props + + n_props = 0 + while logdensity(ϱ1, r_max*θ) > ℓT + r_max = r_max + w + + n_props += 1 + if n_props > max_prop + exceeded_max_prop(max_prop) + end + end + n_props_total += n_props + + for n_props in 1:max_prop + # `Uniform` had a type instability issue: + # https://github.com/JuliaStats/Distributions.jl/pull/1860 + #r′ = rand(rng, Uniform{F}(r_min, r_max)) + r′ = r_min + (r_max - r_min)*rand(rng, F) + + if logdensity(ϱ1, r′*θ) > ℓT + n_props_total += n_props + return r′, n_props_total + end + + if r′ < r + r_min = r′ + else + r_max = r′ + end + end + exceeded_max_prop(max_prop) +end + +function AbstractMCMC.step( + rng ::Random.AbstractRNG, + model ::AbstractMCMC.LogDensityModel, + sampler::GibbsPolarSlice, + state ::GibbsPolarSliceState; + kwargs..., +) + logdensitymodel = model.logdensity + max_prop = sampler.max_proposals + + x = state.transition.params + ℓp = state.transition.lp + w = sampler.w + r = state.radius + θ = state.direction + ϱ1 = GibbsPolarSliceTarget(logdensitymodel) + + d = length(x) + ℓT = ((d-1)*log(norm(x)) + ℓp) - Random.randexp(rng, eltype(ℓp)) + + θ, n_props_θ = geodesic_shrinkage(rng, ϱ1, ℓT, θ, r, max_prop) + r, n_props_r = radius_shrinkage( rng, ϱ1, ℓT, θ, r, w, max_prop) + x = θ*r + + ℓp = LogDensityProblems.logdensity(logdensitymodel, x) + t = Transition(x, ℓp, ( + num_radius_proposals = n_props_r, + num_direction_proposals = n_props_θ, + )) + t, GibbsPolarSliceState(t, θ, r) +end diff --git a/test/runtests.jl b/test/runtests.jl index 2778d7c..8abfe17 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -84,6 +84,9 @@ end # Latent slice sampling LatentSlice(5), + + # Gibbsian polar slice sampling + GibbsPolarSlice(10), ] @testset "determinism" begin model = Model(1.0, 1.0, [0.0]) @@ -154,6 +157,9 @@ end # Latent slice sampling LatentSlice(5; max_proposals=32), + + # Gibbs polar slice sampling + GibbsPolarSlice(5; max_proposals=32), ] @testset "max proposal error" begin rng = Random.default_rng() From 19201d5bba05b8434d36b720ef678a819a57af88 Mon Sep 17 00:00:00 2001 From: Ray Kim Date: Sun, 26 May 2024 04:13:14 +0100 Subject: [PATCH 03/12] fix error message --- Project.toml | 2 +- src/SliceSampling.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Project.toml b/Project.toml index 0cafd25..a3d13f6 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "SliceSampling" uuid = "43f4d3e8-9711-4a8c-bd1b-03ac73a255cf" -version = "0.2.1" +version = "0.3.0" [deps] AbstractMCMC = "80f14c24-f653-4e6a-9b94-39d6b0f70001" diff --git a/src/SliceSampling.jl b/src/SliceSampling.jl index 8dd60f6..91707a4 100644 --- a/src/SliceSampling.jl +++ b/src/SliceSampling.jl @@ -52,7 +52,7 @@ function initial_sample end function exceeded_max_prop(max_prop::Int) error("Exceeded maximum number of proposal $(max_prop).\n", "Here are possible causes:\n", - "- The model might be broken or pathologic.", + "- The model might be broken or pathologic.\n", "- There might be a bug in the sampler.") end From b5f2f6caa65bfb92ef7d71f2a541c76400b4613c Mon Sep 17 00:00:00 2001 From: Ray Kim Date: Sun, 26 May 2024 05:17:13 +0100 Subject: [PATCH 04/12] add docs for gibbsian polar sampling --- README.md | 30 ++++++++++++++++ docs/make.jl | 9 ++--- docs/src/gibbs_polar.md | 76 ++++++++++++++++++++++++++++++++++++++++ docs/src/latent_slice.md | 6 ++-- src/gibbspolar.jl | 8 +++-- 5 files changed, 120 insertions(+), 9 deletions(-) create mode 100644 docs/src/gibbs_polar.md diff --git a/README.md b/README.md index dbf3a22..921f109 100644 --- a/README.md +++ b/README.md @@ -4,3 +4,33 @@ [![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://Red-Portal.github.io/SliceSampling.jl/dev/) [![Build Status](https://github.com/Red-Portal/SliceSampling.jl/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/Red-Portal/SliceSampling.jl/actions/workflows/CI.yml?query=branch%3Amain) [![Coverage](https://codecov.io/gh/Red-Portal/SliceSampling.jl/branch/main/graph/badge.svg)](https://codecov.io/gh/Red-Portal/SliceSampling.jl) + +This package implements slice sampling algorithms accessible through the the `AbstractMCMC` [interface](https://github.com/TuringLang/AbstractMCMC.jl). + +## Implemented Algorithms +- Univariate slice sampling algorithms with coordinate-wise Gibbs sampling by R. Neal [^N2003]. +- Latent slice sampling by Li and Walker[^LW2023] +- Gibbsian polar slice sampling by P. Schär, M. Habeck, and D. Rudolf[^SHR2023]. + +## Example with Turing Models +This package supports the [Turing](https://github.com/TuringLang/Turing.jl) probabilistic programming framework: + +```@example turing +using Distributions +using Turing +using SliceSampling + +@model function demo() + s ~ InverseGamma(3, 3) + m ~ Normal(0, sqrt(s)) +end + +sampler = LatentSlice(2) +n_samples = 10000 +model = demo() +sample(model, externalsampler(sampler), n_samples; initial_params=[1.0, 0.0]) +``` + +[^N2003]: Neal, R. M. (2003). Slice sampling. The annals of statistics, 31(3), 705-767. +[^LW2023]: Li, Y., & Walker, S. G. (2023). A latent slice sampling algorithm. Computational Statistics & Data Analysis, 179, 107652. +[^SHR2023]: Schär, P., Habeck, M., & Rudolf, D. (2023, July). Gibbsian polar slice sampling. In International Conference on Machine Learning. diff --git a/docs/make.jl b/docs/make.jl index 5d396db..d18e1bd 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -15,10 +15,11 @@ makedocs(; assets=String[], ), pages=[ - "Home" => "index.md", - "General Usage" => "general.md", - "Univariate Slice Sampling" => "univariate_slice.md", - "Latent Slice Sampling" => "latent_slice.md" + "Home" => "index.md", + "General Usage" => "general.md", + "Univariate Slice Sampling" => "univariate_slice.md", + "Latent Slice Sampling" => "latent_slice.md", + "Gibbsian Polar Slice Sampling" => "gibbs_polar.md" ], ) diff --git a/docs/src/gibbs_polar.md b/docs/src/gibbs_polar.md new file mode 100644 index 0000000..a4955e7 --- /dev/null +++ b/docs/src/gibbs_polar.md @@ -0,0 +1,76 @@ + +# [Gibbsian Polar Slice Sampling](@id polar) + +## Introduction +Gibbsian polar slice sampling (GPSS) is a recent vector-valued slice sampling algorithm proposed by P. Schär, M. Habeck, and D. Rudolf[^SHR2023]. +It is an computationally efficient variant of polar slice sampler previously proposed by Roberts and Rosenthal[^RR2002]. +Unlike other slice sampling algorithms, it operates a Gibbs sampler over polar coordinates, reminiscent of the elliptical slice sampler (ESS). +Due to the involvement of polar coordinates, GPSS only works reliably on more than one dimension. +However, unlike ESS, GPSS is applicable to any target distribution. + + +## Description +For a $$d$$-dimensional target distribution, GPSS utilizes the following augmented target distribution: +```math +\begin{aligned} + p(x, T) &= \varrho_{\pi}^{(0)}(x) \varrho_{\pi}^{(1)}(x) \, \operatorname{Uniform}\left(T; 0, \varrho^1(x)\right) \\ + \varrho_{\pi}^{(0)}(x) &= {\lVert x \rVert}^{1 - d} \\ + \varrho_{\pi}^{(1)}(x) &= {\lVert x \rVert}^{d-1} \pi\left(x\right) +\end{aligned} +``` +As described in Appendix A of the GPSS paper, sampling from $$\varrho^{(1)}(x)$$ in polar coordinates magically targets the augmented target distribution. + +In a high-level view, GPSS operates a Gibbs sampler in the following fashion: +```math +\begin{aligned} +T_n &\sim \operatorname{Uniform}\left(0, \varrho^{(1)}\left(x_{n-1}\right)\right) \\ +\theta_n &\sim \operatorname{Uniform}\left\{ \theta \in \mathbb{S}^{d-1} \mid \varrho^{(1)}\left(r_{n-1} \theta\right) > T_n \right\} \\ +r_n &\sim \operatorname{Uniform}\left\{ r \in \mathbb{R}_{\geq 0} \mid \varrho^{(1)}\left(r \theta_n\right) > T_n \right\} \\ +x &= \theta r, +\end{aligned} +``` +where $$T_n$$ is the usual acceptance threshold auxiliary variable, while $$\theta$$ and $$r$$ are the sampler states in polar coordinates. +The Gibbs steps on $$\theta$$ and $$r$$ are implemented through specialized shrinkage procedures. + +The only tunable parameter of the algorithm is the size of the search interval (window) of the shrinkage sampler for the radius variable $$r$$. + +!!! info + Since the direction and radius variables are states of the Markov chain, this sampler is **not reversible** with respect to the samples of the log-target $$x$$. + +## Interface + +!!! warning + By the nature of polar coordinates, GPSS only works reliably for targets with dimension at least $$d \geq 2$$. + +!!! warning + When initializing the chain (*e.g.* the `initial_params` keyword arguments in `AbstractMCMC.sample`), it is necessary to inialize from a point $$x_0$$ that has a sensible norm $$\lVert x_0 \rVert > 0$$, otherwise, the chain will start from a pathologic point in polar coordinates. If it is smaller than `1e-5`, the current implementation automatically sets the initial radius as `1e-5`. + + +```@docs +GibbsPolarSlice +``` + +## Demonstration +As illustrated in the original paper, GPSS shows good performance on heavy-tailed targets despite being a multivariate slice sampler: +```@example gpss +using Distributions +using Turing +using SliceSampling +using LinearAlgebra +using Plots + +@model function demo() + x ~ MvTDist(1, zeros(10), Matrix(I,10,10)) +end +model = demo() + +n_samples = 10000 +chain = sample(model, externalsampler(GibbsPolarSlice(10)), n_samples; initial_params=ones(10)) +histogram(chain[:,1,:], xlims=[-10,10]) +savefig("cauchy_gpss.svg") +``` +![](cauchy_gpss.svg) + + +[^SHR2023]: Schär, P., Habeck, M., & Rudolf, D. (2023, July). Gibbsian polar slice sampling. In International Conference on Machine Learning. +[^RR2002]: Roberts, G. O., & Rosenthal, J. S. (2002). The polar slice sampler. Stochastic Models, 18(2), 257-280. diff --git a/docs/src/latent_slice.md b/docs/src/latent_slice.md index ce48ba6..182c1a1 100644 --- a/docs/src/latent_slice.md +++ b/docs/src/latent_slice.md @@ -15,9 +15,9 @@ where $$y$$ is the parameters of the log-target $$\pi$$, $$s$$ is the width of t Naturally, the sampler operates as a blocked-Gibbs sampler ```math \begin{aligned} -l &\sim \operatorname{Uniform}\left(l; \; y - s/2,\, y + s/2\right) \\ -s &\sim p(s \mid y, l) \\ -y &\sim \operatorname{slice-sampler}\left(y \mid s, l\right), +l_n &\sim \operatorname{Uniform}\left(l; \; y_{n-1} - s_{n-1}/2,\, y_{n-1} + s_{n-1}/2\right) \\ +s_n &\sim p(s \mid y_{n-1}, l_{n}) \\ +y_n &\sim \operatorname{shrinkage}\left(y \mid s_n, l_n\right), \end{aligned} ``` where $$y$$ is updated using the shrinkage procedure by Neal[^N2003] using the initial interval formed by $$(s, l)$$. diff --git a/src/gibbspolar.jl b/src/gibbspolar.jl index cdbcb9c..bf55b19 100644 --- a/src/gibbspolar.jl +++ b/src/gibbspolar.jl @@ -45,7 +45,10 @@ function AbstractMCMC.step(rng ::Random.AbstractRNG, kwargs...) logdensitymodel = model.logdensity x = initial_params === nothing ? initial_sample(rng, model) : initial_params - r = norm(x) + d = length(x) + @assert d ≥ 2 "Gibbsian polar slice sampling works reliably only in dimension ≥2" + + r = max(norm(x), convert(eltype(x), 1e-5)) θ = x / r ℓp = LogDensityProblems.logdensity(logdensitymodel, x) t = Transition(x, ℓp, NamedTuple()) @@ -56,7 +59,7 @@ function rand_subsphere(rng::Random.AbstractRNG, θ::AbstractVector) d = length(θ) V1 = randn(rng, eltype(θ), d) V2 = V1 - dot(θ, V1)*θ - V2 / norm(V2) + V2 / max(norm(V2), eps(eltype(θ))) end function geodesic_shrinkage( @@ -123,6 +126,7 @@ function radius_shrinkage( n_props += 1 if n_props > max_prop + @info("", r_min, r_max, θ) exceeded_max_prop(max_prop) end end From 62f95809193b6f33c4371749a08925e06140650a Mon Sep 17 00:00:00 2001 From: Ray Kim Date: Sun, 26 May 2024 05:17:24 +0100 Subject: [PATCH 05/12] add Turing compatibility test for `GibbsPolarSlice` --- test/runtests.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index 8abfe17..ab31b0f 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -189,12 +189,13 @@ end SliceSteppingOut(1), SliceDoublingOut(1), LatentSlice(5), + GibbsPolarSlice(5), ] chain = sample( model, externalsampler(sampler), n_samples; - initial_params=[1.0, 0.0], + initial_params=[1.0, 0.1], progress=false, ) end From cacceb957eb9c4a4401b0cdc9ab5cf8e9675eaa9 Mon Sep 17 00:00:00 2001 From: Ray Kim Date: Sun, 26 May 2024 21:10:11 +0100 Subject: [PATCH 06/12] fix README --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index b41da3e..458e0c0 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,7 @@ [![Coverage](https://codecov.io/gh/Red-Portal/SliceSampling.jl/branch/main/graph/badge.svg)](https://codecov.io/gh/Red-Portal/SliceSampling.jl) This package implements slice sampling algorithms accessible through the the `AbstractMCMC` [interface](https://github.com/TuringLang/AbstractMCMC.jl). +For general usage, please refer to [here](https://turinglang.org/SliceSampling.jl/dev/general/). ## Implemented Algorithms - Univariate slice sampling algorithms with coordinate-wise Gibbs sampling by R. Neal [^N2003]. @@ -40,5 +41,3 @@ sample(model, externalsampler(sampler), n_samples; initial_params=[1.0, 0.0]) [![Build Status](https://github.com/TuringLang/SliceSampling.jl/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/Red-Portal/SliceSampling.jl/actions/workflows/CI.yml?query=branch%3Amain) [![Coverage](https://codecov.io/gh/TuringLang/SliceSampling.jl/branch/main/graph/badge.svg)](https://codecov.io/gh/Red-Portal/SliceSampling.jl) - -For a working example, please see [here](https://turinglang.org/SliceSampling.jl/dev/general/). From bdc7b78f91ddbf5821ad934163ae5e03034ed48b Mon Sep 17 00:00:00 2001 From: Ray Kim Date: Sun, 26 May 2024 21:12:00 +0100 Subject: [PATCH 07/12] fix gibbs polar test bug --- src/gibbspolar.jl | 1 - test/runtests.jl | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/gibbspolar.jl b/src/gibbspolar.jl index bf55b19..d7981f3 100644 --- a/src/gibbspolar.jl +++ b/src/gibbspolar.jl @@ -126,7 +126,6 @@ function radius_shrinkage( n_props += 1 if n_props > max_prop - @info("", r_min, r_max, θ) exceeded_max_prop(max_prop) end end diff --git a/test/runtests.jl b/test/runtests.jl index ab31b0f..fc088b7 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -144,7 +144,7 @@ function LogDensityProblems.capabilities(::Type{<:WrongModel}) end function LogDensityProblems.dimension(::WrongModel) - 1 + 2 end @testset "error handling" begin @@ -163,7 +163,7 @@ end ] @testset "max proposal error" begin rng = Random.default_rng() - θ = [1.,] + θ = [1., 1.] _, init_state = AbstractMCMC.step(rng, model, sampler; initial_params=copy(θ)) @test_throws "maximum number" begin From 153c4ddd66062bd984f72afc9aea7dd6be5b2c0a Mon Sep 17 00:00:00 2001 From: Ray Kim Date: Sun, 26 May 2024 21:16:10 +0100 Subject: [PATCH 08/12] fix test to work on Julia 1.7 (error message tests are Julia +1.8) --- test/runtests.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index fc088b7..da93ecd 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -166,7 +166,7 @@ end θ = [1., 1.] _, init_state = AbstractMCMC.step(rng, model, sampler; initial_params=copy(θ)) - @test_throws "maximum number" begin + @test_throws ErrorException begin _, _ = AbstractMCMC.step(rng, model, sampler, init_state) end end From 797508b9d84889e7873cf25bd624f04a454c50db Mon Sep 17 00:00:00 2001 From: Ray Kim Date: Sun, 26 May 2024 21:27:36 +0100 Subject: [PATCH 09/12] add missing Plots dependency for docs --- docs/Project.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/Project.toml b/docs/Project.toml index 507abc0..e14d32c 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -6,6 +6,7 @@ FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c" MCMCDiagnosticTools = "be115224-59cd-429b-ad48-344e309966f0" PDMats = "90014a1f-27ba-587c-ab20-58faa44d9150" +Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SliceSampling = "43f4d3e8-9711-4a8c-bd1b-03ac73a255cf" From 953bf981dee35b05e8907b2d6e95cfef9190be81 Mon Sep 17 00:00:00 2001 From: Ray Kim Date: Sun, 26 May 2024 21:41:31 +0100 Subject: [PATCH 10/12] fix README --- README.md | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 1586468..8a4a533 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,9 @@ -# Implementation of slice sampling algorithms +# Slice Sampling Algorithms in Julia -[![Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://Red-Portal.github.io/SliceSampling.jl/stable/) -[![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://Red-Portal.github.io/SliceSampling.jl/dev/) -[![Build Status](https://github.com/Red-Portal/SliceSampling.jl/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/Red-Portal/SliceSampling.jl/actions/workflows/CI.yml?query=branch%3Amain) -[![Coverage](https://codecov.io/gh/Red-Portal/SliceSampling.jl/branch/main/graph/badge.svg)](https://codecov.io/gh/Red-Portal/SliceSampling.jl) +[![Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://TuringLang.org/SliceSampling.jl/stable/) +[![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://TuringLang.org/SliceSampling.jl/dev/) +[![Build Status](https://github.com/TuringLang/SliceSampling.jl/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/Red-Portal/SliceSampling.jl/actions/workflows/CI.yml?query=branch%3Amain) +[![Coverage](https://codecov.io/gh/TuringLang/SliceSampling.jl/branch/main/graph/badge.svg)](https://codecov.io/gh/Red-Portal/SliceSampling.jl) This package implements slice sampling algorithms accessible through the `AbstractMCMC` [interface](https://github.com/TuringLang/AbstractMCMC.jl). For general usage, please refer to [here](https://turinglang.org/SliceSampling.jl/dev/general/). @@ -35,3 +35,5 @@ sample(model, externalsampler(sampler), n_samples; initial_params=[1.0, 0.0]) [^N2003]: Neal, R. M. (2003). Slice sampling. The annals of statistics, 31(3), 705-767. [^LW2023]: Li, Y., & Walker, S. G. (2023). A latent slice sampling algorithm. Computational Statistics & Data Analysis, 179, 107652. [^SHR2023]: Schär, P., Habeck, M., & Rudolf, D. (2023, July). Gibbsian polar slice sampling. In International Conference on Machine Learning. + + From 840dbcecc787db2cf17904c348c82a362a8b5ece Mon Sep 17 00:00:00 2001 From: Ray Kim Date: Sun, 26 May 2024 22:18:06 +0100 Subject: [PATCH 11/12] fix docs for GPSS --- docs/src/gibbs_polar.md | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/docs/src/gibbs_polar.md b/docs/src/gibbs_polar.md index a4955e7..cc0d1c6 100644 --- a/docs/src/gibbs_polar.md +++ b/docs/src/gibbs_polar.md @@ -10,7 +10,7 @@ However, unlike ESS, GPSS is applicable to any target distribution. ## Description -For a $$d$$-dimensional target distribution, GPSS utilizes the following augmented target distribution: +For a $$d$$-dimensional target distribution $$\pi$$, GPSS utilizes the following augmented target distribution: ```math \begin{aligned} p(x, T) &= \varrho_{\pi}^{(0)}(x) \varrho_{\pi}^{(1)}(x) \, \operatorname{Uniform}\left(T; 0, \varrho^1(x)\right) \\ @@ -26,7 +26,7 @@ In a high-level view, GPSS operates a Gibbs sampler in the following fashion: T_n &\sim \operatorname{Uniform}\left(0, \varrho^{(1)}\left(x_{n-1}\right)\right) \\ \theta_n &\sim \operatorname{Uniform}\left\{ \theta \in \mathbb{S}^{d-1} \mid \varrho^{(1)}\left(r_{n-1} \theta\right) > T_n \right\} \\ r_n &\sim \operatorname{Uniform}\left\{ r \in \mathbb{R}_{\geq 0} \mid \varrho^{(1)}\left(r \theta_n\right) > T_n \right\} \\ -x &= \theta r, +x_n &= \theta_n r_n, \end{aligned} ``` where $$T_n$$ is the usual acceptance threshold auxiliary variable, while $$\theta$$ and $$r$$ are the sampler states in polar coordinates. @@ -51,7 +51,9 @@ GibbsPolarSlice ``` ## Demonstration -As illustrated in the original paper, GPSS shows good performance on heavy-tailed targets despite being a multivariate slice sampler: +As illustrated in the original paper, GPSS shows good performance on heavy-tailed targets despite being a multivariate slice sampler. +Consider a 10-dimensional Student-$$t$$ target with 1-degree of freedom (this corresponds to a multivariate Cauchy): + ```@example gpss using Distributions using Turing @@ -64,12 +66,17 @@ using Plots end model = demo() -n_samples = 10000 -chain = sample(model, externalsampler(GibbsPolarSlice(10)), n_samples; initial_params=ones(10)) -histogram(chain[:,1,:], xlims=[-10,10]) -savefig("cauchy_gpss.svg") +n_samples = 1000 +latent_chain = sample(model, externalsampler(LatentSlice(10)), n_samples; initial_params=ones(10)) +polar_chain = sample(model, externalsampler(GibbsPolarSlice(10)), n_samples; initial_params=ones(10)) +stephist( rand(TDist(1), 10000), bins=-10:1:10, normed=true, label="true", linewidth=3) +stephist!(latent_chain[:,1,:], bins=-10:1:10, fill=true, alpha=0.5, normed=true, label="LSS") +stephist!(polar_chain[:,1,:], bins=-10:1:10, fill=true, alpha=0.5, normed=true, label="GPSS") +savefig("student_latent_gpss.svg") ``` -![](cauchy_gpss.svg) +![](student_latent_gpss.svg) + +Clearly, for 1000 samples, GPSS is mixing much quicker than the [latent slice sampler](@ref latent) (LSS) at a similar per-iteration cost. [^SHR2023]: Schär, P., Habeck, M., & Rudolf, D. (2023, July). Gibbsian polar slice sampling. In International Conference on Machine Learning. From fad166c8433fa1b0cd0fbc2b036d5dfa5e94808e Mon Sep 17 00:00:00 2001 From: Ray Kim Date: Sun, 26 May 2024 22:19:06 +0100 Subject: [PATCH 12/12] update README --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index e5ad498..8103694 100644 --- a/README.md +++ b/README.md @@ -9,9 +9,9 @@ This package implements slice sampling algorithms accessible through the `Abstra For general usage, please refer to [here](https://turinglang.org/SliceSampling.jl/dev/general/). ## Implemented Algorithms -- [Univariate slice sampling](https://turinglang.org/SliceSampling.jl/dev/univariate_slice/) algorithms with coordinate-wise Gibbs sampling by R. Neal [^N2003]. -- [Latent slice sampling](https://turinglang.org/SliceSampling.jl/dev/latent_slice/) by Li and Walker[^LW2023] -- [Gibbsian polar slice sampling](https://turinglang.org/SliceSampling.jl/dev/gibbs_polar/) by P. Schär, M. Habeck, and D. Rudolf[^SHR2023]. +- Univariate slice sampling ([Slice](https://turinglang.org/SliceSampling.jl/dev/univariate_slice/)) algorithms with coordinate-wise Gibbs sampling by R. Neal [^N2003]. +- Latent slice sampling ([LSS](https://turinglang.org/SliceSampling.jl/dev/latent_slice/)) by Li and Walker[^LW2023] +- Gibbsian polar slice sampling ([GPSS](https://turinglang.org/SliceSampling.jl/dev/gibbs_polar/)) by P. Schär, M. Habeck, and D. Rudolf[^SHR2023]. ## Example with Turing Models This package supports the [Turing](https://github.com/TuringLang/Turing.jl) probabilistic programming framework: