From 9923f235481e9313f1f857f3c01f67e6167ee053 Mon Sep 17 00:00:00 2001 From: David Widmann Date: Tue, 2 Jun 2020 12:59:13 +0200 Subject: [PATCH] Use`include` with the correct order via `@require` in `__init__` (#204) * Do not use `include` in `__init__` * Fix error on Julia 1.0 * Fix a seed * Discard initial samples * Increase number of samples * Move ad.jl to contrib subfolder * Update test/sampler.jl * Update sampler-vec.jl * Decrease leapfrog stepsize for some samplers. * Decrease leapfrog stepsize for some samplers. * Slightly relax test error bounds. Co-authored-by: Hong Ge --- src/AdvancedHMC.jl | 16 ++++++- src/contrib/ad.jl | 86 +------------------------------------- src/contrib/diffeq.jl | 6 --- src/contrib/forwarddiff.jl | 50 ++++++++++++++++++++++ src/contrib/zygote.jl | 18 ++++++++ test/common.jl | 2 +- test/sampler-vec.jl | 12 ++++-- test/sampler.jl | 8 ++-- 8 files changed, 97 insertions(+), 101 deletions(-) create mode 100644 src/contrib/forwarddiff.jl create mode 100644 src/contrib/zygote.jl diff --git a/src/AdvancedHMC.jl b/src/AdvancedHMC.jl index a78123f4..97a2f0c3 100644 --- a/src/AdvancedHMC.jl +++ b/src/AdvancedHMC.jl @@ -83,13 +83,25 @@ include("diagnosis.jl") include("sampler.jl") export sample +include("contrib/ad.jl") + ### Init using Requires function __init__() - include(joinpath(@__DIR__, "contrib", "diffeq.jl")) - include(joinpath(@__DIR__, "contrib", "ad.jl")) + @require OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed" begin + export DiffEqIntegrator + include("contrib/diffeq.jl") + end + + @require ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" begin + include("contrib/forwarddiff.jl") + end + + @require Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" begin + include("contrib/zygote.jl") + end end end # module diff --git a/src/contrib/ad.jl b/src/contrib/ad.jl index 02d817a2..015f305b 100644 --- a/src/contrib/ad.jl +++ b/src/contrib/ad.jl @@ -1,7 +1,7 @@ const ADSUPPORT = (:ForwardDiff, :Zygote) const ADAVAILABLE = Dict{Module, Function}() -Hamiltonian(metric::AbstractMetric, ℓπ, m::Module) = ADAVAILABLE[m](metric, ℓπ) +Hamiltonian(metric::M, ℓπ::T, m::Module) where {M<:AbstractMetric,T} = ADAVAILABLE[m](metric, ℓπ) function Hamiltonian(metric::AbstractMetric, ℓπ) available = collect(keys(ADAVAILABLE)) @@ -16,87 +16,3 @@ function Hamiltonian(metric::AbstractMetric, ℓπ) return Hamiltonian(metric, ℓπ, first(available)) end end - -### ForwardDiff - -@require ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" begin - -import .ForwardDiff, .ForwardDiff.DiffResults - -function ∂ℓπ∂θ_forwarddiff(ℓπ, θ::AbstractVector) - res = DiffResults.GradientResult(θ) - ForwardDiff.gradient!(res, ℓπ, θ) - return DiffResults.value(res), DiffResults.gradient(res) -end - -# Implementation 1 -function ∂ℓπ∂θ_forwarddiff(ℓπ, θ::AbstractMatrix) - jacob = similar(θ) - res = DiffResults.JacobianResult(similar(θ, size(θ, 2)), jacob) - ForwardDiff.jacobian!(res, ℓπ, θ) - jacob_full = DiffResults.jacobian(res) - - d, n = size(jacob) - for i in 1:n - jacob[:,i] = jacob_full[i,1+(i-1)*d:i*d] - end - return DiffResults.value(res), jacob -end - -# Implementation 2 -# function ∂ℓπ∂θ_forwarddiff(ℓπ, θ::AbstractMatrix) -# local densities -# f(x) = (densities = ℓπ(x); sum(densities)) -# res = DiffResults.GradientResult(θ) -# ForwardDiff.gradient!(res, f, θ) -# return ForwardDiff.value.(densities), DiffResults.gradient(res) -# end - -# Implementation 3 -# function ∂ℓπ∂θ_forwarddiff(ℓπ, θ::AbstractMatrix) -# v = similar(θ, size(θ, 2)) -# g = similar(θ) -# for i in 1:size(θ, 2) -# res = GradientResult(θ[:,i]) -# gradient!(res, ℓπ, θ[:,i]) -# v[i] = value(res) -# g[:,i] = gradient(res) -# end -# return v, g -# end - -function ForwardDiffHamiltonian(metric::AbstractMetric, ℓπ) - ∂ℓπ∂θ(θ::AbstractVecOrMat) = ∂ℓπ∂θ_forwarddiff(ℓπ, θ) - return Hamiltonian(metric, ℓπ, ∂ℓπ∂θ) -end - -ADAVAILABLE[ForwardDiff] = ForwardDiffHamiltonian - -end # @require - -### Zygote - -@require Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" begin - -import .Zygote - -function ∂ℓπ∂θ_zygote(ℓπ, θ::AbstractVector) - res, back = Zygote.pullback(ℓπ, θ) - return res, first(back(Zygote.sensitivity(res))) -end - -function ∂ℓπ∂θ_zygote(ℓπ, θ::AbstractMatrix) - res, back = Zygote.pullback(ℓπ, θ) - return res, first(back(ones(eltype(res), size(res)))) -end - -function ZygoteADHamiltonian(metric::AbstractMetric, ℓπ) - ∂ℓπ∂θ(θ::AbstractVecOrMat) = ∂ℓπ∂θ_zygote(ℓπ, θ) - return Hamiltonian(metric, ℓπ, ∂ℓπ∂θ) -end - -ADAVAILABLE[Zygote] = ZygoteADHamiltonian - -# Zygote.@adjoint - -end # @require \ No newline at end of file diff --git a/src/contrib/diffeq.jl b/src/contrib/diffeq.jl index 66a0aa34..ef99ee7b 100644 --- a/src/contrib/diffeq.jl +++ b/src/contrib/diffeq.jl @@ -1,5 +1,3 @@ -@require OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed" begin - import .OrdinaryDiffEq struct DiffEqIntegrator{T<:AbstractScalarOrVec{<:AbstractFloat}, DiffEqSolver} <: AbstractLeapfrog{T} @@ -43,7 +41,3 @@ function step( end return res end - -export DiffEqIntegrator - -end # @require diff --git a/src/contrib/forwarddiff.jl b/src/contrib/forwarddiff.jl new file mode 100644 index 00000000..122dbd7a --- /dev/null +++ b/src/contrib/forwarddiff.jl @@ -0,0 +1,50 @@ +import .ForwardDiff, .ForwardDiff.DiffResults + +function ∂ℓπ∂θ_forwarddiff(ℓπ, θ::AbstractVector) + res = DiffResults.GradientResult(θ) + ForwardDiff.gradient!(res, ℓπ, θ) + return DiffResults.value(res), DiffResults.gradient(res) +end + +# Implementation 1 +function ∂ℓπ∂θ_forwarddiff(ℓπ, θ::AbstractMatrix) + jacob = similar(θ) + res = DiffResults.JacobianResult(similar(θ, size(θ, 2)), jacob) + ForwardDiff.jacobian!(res, ℓπ, θ) + jacob_full = DiffResults.jacobian(res) + + d, n = size(jacob) + for i in 1:n + jacob[:,i] = jacob_full[i,1+(i-1)*d:i*d] + end + return DiffResults.value(res), jacob +end + +# Implementation 2 +# function ∂ℓπ∂θ_forwarddiff(ℓπ, θ::AbstractMatrix) +# local densities +# f(x) = (densities = ℓπ(x); sum(densities)) +# res = DiffResults.GradientResult(θ) +# ForwardDiff.gradient!(res, f, θ) +# return ForwardDiff.value.(densities), DiffResults.gradient(res) +# end + +# Implementation 3 +# function ∂ℓπ∂θ_forwarddiff(ℓπ, θ::AbstractMatrix) +# v = similar(θ, size(θ, 2)) +# g = similar(θ) +# for i in 1:size(θ, 2) +# res = GradientResult(θ[:,i]) +# gradient!(res, ℓπ, θ[:,i]) +# v[i] = value(res) +# g[:,i] = gradient(res) +# end +# return v, g +# end + +function ForwardDiffHamiltonian(metric::AbstractMetric, ℓπ) + ∂ℓπ∂θ(θ::AbstractVecOrMat) = ∂ℓπ∂θ_forwarddiff(ℓπ, θ) + return Hamiltonian(metric, ℓπ, ∂ℓπ∂θ) +end + +ADAVAILABLE[ForwardDiff] = ForwardDiffHamiltonian diff --git a/src/contrib/zygote.jl b/src/contrib/zygote.jl new file mode 100644 index 00000000..d89511d3 --- /dev/null +++ b/src/contrib/zygote.jl @@ -0,0 +1,18 @@ +import .Zygote + +function ∂ℓπ∂θ_zygote(ℓπ, θ::AbstractVector) + res, back = Zygote.pullback(ℓπ, θ) + return res, first(back(Zygote.sensitivity(res))) +end + +function ∂ℓπ∂θ_zygote(ℓπ, θ::AbstractMatrix) + res, back = Zygote.pullback(ℓπ, θ) + return res, first(back(ones(eltype(res), size(res)))) +end + +function ZygoteADHamiltonian(metric::AbstractMetric, ℓπ) + ∂ℓπ∂θ(θ::AbstractVecOrMat) = ∂ℓπ∂θ_zygote(ℓπ, θ) + return Hamiltonian(metric, ℓπ, ∂ℓπ∂θ) +end + +ADAVAILABLE[Zygote] = ZygoteADHamiltonian diff --git a/test/common.jl b/test/common.jl index 9a01f87b..b71a9656 100644 --- a/test/common.jl +++ b/test/common.jl @@ -7,7 +7,7 @@ const TRATIO = Int == Int64 ? 1 : 2 # Deterministic tolerance const DETATOL = 1e-3 * D * TRATIO # Random tolerance -const RNDATOL = 5e-2 * D * TRATIO +const RNDATOL = 5e-2 * D * TRATIO * 2 # Hand-coded multivaite Gaussain diff --git a/test/sampler-vec.jl b/test/sampler-vec.jl index 7b167dc1..20fa5cfd 100644 --- a/test/sampler-vec.jl +++ b/test/sampler-vec.jl @@ -6,14 +6,14 @@ include("common.jl") n_chains_max = 20 n_chains_list = collect(1:n_chains_max) θ_init_list = [rand(D, n_chains) for n_chains in n_chains_list] - ϵ = 0.2 + ϵ = 0.1 lf = Leapfrog(ϵ) i_test = 5 lfi = Leapfrog(fill(ϵ, i_test)) lfi_jittered = JitteredLeapfrog(fill(ϵ, i_test), 1.0) n_steps = 10 - n_samples = 12_000 - n_adapts = 2_000 + n_samples = 20_000 + n_adapts = 4_000 for metricT in [ UnitEuclideanMetric, @@ -33,9 +33,12 @@ include("common.jl") @test show(metric) == nothing @test show(h) == nothing @test show(τ) == nothing + # NoAdaptation + Random.seed!(100) samples, stats = sample(h, τ, θ_init_list[i_test], n_samples; verbose=false) @test mean(samples) ≈ zeros(D, n_chains) atol=RNDATOL * n_chains + # Adaptation for adaptor in [ MassMatrixAdaptor(metric), @@ -51,9 +54,12 @@ include("common.jl") ] τ isa HMCDA && continue @test show(adaptor) == nothing + + Random.seed!(100) samples, stats = sample(h, τ, θ_init_list[i_test], n_samples, adaptor, n_adapts; verbose=false, progress=false) @test mean(samples) ≈ zeros(D, n_chains) atol=RNDATOL * n_chains end + # Passing a vector of same RNGs rng = [MersenneTwister(1) for _ in 1:n_chains] h = Hamiltonian(metricT((D, n_chains)), ℓπ, ∂ℓπ∂θ) diff --git a/test/sampler.jl b/test/sampler.jl index bc87d9de..8da91b08 100644 --- a/test/sampler.jl +++ b/test/sampler.jl @@ -7,10 +7,10 @@ using Statistics: mean, var, cov include("common.jl") θ_init = rand(MersenneTwister(1), D) -ϵ = 0.2 +ϵ = 0.1 n_steps = 10 -n_samples = 12_000 -n_adapts = 2_000 +n_samples = 22_000 +n_adapts = 4_000 function test_stats(::Union{StaticTrajectory,HMCDA}, stats, n_adapts) for name in (:step_size, :nom_step_size, :n_steps, :is_accept, :acceptance_rate, :log_density, :hamiltonian_energy, :hamiltonian_energy_error, :is_adapt) @@ -85,7 +85,7 @@ end # For other adapatation methods that are able to adpat the step size, we use `find_good_stepsize`. τ_used = adaptorsym == :MassMatrixAdaptorOnly ? τ : reconstruct(τ, integrator=reconstruct(lf, ϵ=find_good_stepsize(h, θ_init))) samples, stats = sample(h, τ_used , θ_init, n_samples, adaptor, n_adapts; verbose=false, progress=PROGRESS) - @test mean(samples) ≈ zeros(D) atol=RNDATOL + @test mean(samples[(n_adapts+1):end]) ≈ zeros(D) atol=RNDATOL test_stats(τ_used, stats, n_adapts) end end