From 9923f235481e9313f1f857f3c01f67e6167ee053 Mon Sep 17 00:00:00 2001
From: David Widmann <devmotion@users.noreply.github.com>
Date: Tue, 2 Jun 2020 12:59:13 +0200
Subject: [PATCH] Use`include` with the correct order via `@require` in
 `__init__` (#204)

* Do not use `include` in `__init__`

* Fix error on Julia 1.0

* Fix a seed

* Discard initial samples

* Increase number of samples

* Move ad.jl to contrib subfolder

* Update test/sampler.jl

* Update sampler-vec.jl

* Decrease leapfrog stepsize for some samplers.

* Decrease leapfrog stepsize for some samplers.

* Slightly relax test error bounds.

Co-authored-by: Hong Ge <hg344@cam.ac.uk>
---
 src/AdvancedHMC.jl         | 16 ++++++-
 src/contrib/ad.jl          | 86 +-------------------------------------
 src/contrib/diffeq.jl      |  6 ---
 src/contrib/forwarddiff.jl | 50 ++++++++++++++++++++++
 src/contrib/zygote.jl      | 18 ++++++++
 test/common.jl             |  2 +-
 test/sampler-vec.jl        | 12 ++++--
 test/sampler.jl            |  8 ++--
 8 files changed, 97 insertions(+), 101 deletions(-)
 create mode 100644 src/contrib/forwarddiff.jl
 create mode 100644 src/contrib/zygote.jl

diff --git a/src/AdvancedHMC.jl b/src/AdvancedHMC.jl
index a78123f4..97a2f0c3 100644
--- a/src/AdvancedHMC.jl
+++ b/src/AdvancedHMC.jl
@@ -83,13 +83,25 @@ include("diagnosis.jl")
 include("sampler.jl")
 export sample
 
+include("contrib/ad.jl")
+
 ### Init
 
 using Requires
 
 function __init__()
-    include(joinpath(@__DIR__, "contrib", "diffeq.jl"))
-    include(joinpath(@__DIR__, "contrib", "ad.jl"))
+    @require OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed" begin
+        export DiffEqIntegrator
+        include("contrib/diffeq.jl")
+    end
+
+    @require ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" begin
+        include("contrib/forwarddiff.jl")
+    end
+
+    @require Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" begin
+        include("contrib/zygote.jl")
+    end
 end
 
 end # module
diff --git a/src/contrib/ad.jl b/src/contrib/ad.jl
index 02d817a2..015f305b 100644
--- a/src/contrib/ad.jl
+++ b/src/contrib/ad.jl
@@ -1,7 +1,7 @@
 const ADSUPPORT = (:ForwardDiff, :Zygote)
 const ADAVAILABLE = Dict{Module, Function}()
 
-Hamiltonian(metric::AbstractMetric, ℓπ, m::Module) = ADAVAILABLE[m](metric, ℓπ)
+Hamiltonian(metric::M, ℓπ::T, m::Module) where {M<:AbstractMetric,T} = ADAVAILABLE[m](metric, ℓπ)
 
 function Hamiltonian(metric::AbstractMetric, ℓπ)
     available = collect(keys(ADAVAILABLE))
@@ -16,87 +16,3 @@ function Hamiltonian(metric::AbstractMetric, ℓπ)
         return Hamiltonian(metric, ℓπ, first(available))
     end
 end
-
-### ForwardDiff
-
-@require ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" begin
-
-import .ForwardDiff, .ForwardDiff.DiffResults
-
-function ∂ℓπ∂θ_forwarddiff(ℓπ, θ::AbstractVector)
-    res = DiffResults.GradientResult(θ)
-    ForwardDiff.gradient!(res, ℓπ, θ)
-    return DiffResults.value(res), DiffResults.gradient(res)
-end
-
-# Implementation 1
-function ∂ℓπ∂θ_forwarddiff(ℓπ, θ::AbstractMatrix)
-    jacob = similar(θ)
-    res = DiffResults.JacobianResult(similar(θ, size(θ, 2)), jacob)
-    ForwardDiff.jacobian!(res, ℓπ, θ)
-    jacob_full = DiffResults.jacobian(res)
-    
-    d, n = size(jacob)
-    for i in 1:n
-        jacob[:,i] = jacob_full[i,1+(i-1)*d:i*d]
-    end
-    return DiffResults.value(res), jacob
-end
-
-# Implementation 2
-# function ∂ℓπ∂θ_forwarddiff(ℓπ, θ::AbstractMatrix)
-#     local densities
-#     f(x) = (densities = ℓπ(x); sum(densities))
-#     res = DiffResults.GradientResult(θ)
-#     ForwardDiff.gradient!(res, f, θ)
-#     return ForwardDiff.value.(densities), DiffResults.gradient(res)
-# end
-
-# Implementation 3
-# function ∂ℓπ∂θ_forwarddiff(ℓπ, θ::AbstractMatrix)
-#     v = similar(θ, size(θ, 2))
-#     g = similar(θ)
-#     for i in 1:size(θ, 2)
-#         res = GradientResult(θ[:,i])
-#         gradient!(res, ℓπ, θ[:,i])
-#         v[i] = value(res)
-#         g[:,i] = gradient(res)
-#     end
-#     return v, g
-# end
-
-function ForwardDiffHamiltonian(metric::AbstractMetric, ℓπ)
-    ∂ℓπ∂θ(θ::AbstractVecOrMat) = ∂ℓπ∂θ_forwarddiff(ℓπ, θ)
-    return Hamiltonian(metric, ℓπ, ∂ℓπ∂θ)
-end
-
-ADAVAILABLE[ForwardDiff] = ForwardDiffHamiltonian
-
-end # @require
-
-### Zygote
-
-@require Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" begin
-
-import .Zygote
-
-function ∂ℓπ∂θ_zygote(ℓπ, θ::AbstractVector)
-    res, back = Zygote.pullback(ℓπ, θ)
-    return res, first(back(Zygote.sensitivity(res)))
-end
-
-function ∂ℓπ∂θ_zygote(ℓπ, θ::AbstractMatrix)
-    res, back = Zygote.pullback(ℓπ, θ)
-    return res, first(back(ones(eltype(res), size(res))))
-end
-
-function ZygoteADHamiltonian(metric::AbstractMetric, ℓπ)
-    ∂ℓπ∂θ(θ::AbstractVecOrMat) = ∂ℓπ∂θ_zygote(ℓπ, θ)
-    return Hamiltonian(metric, ℓπ, ∂ℓπ∂θ)
-end
-
-ADAVAILABLE[Zygote] = ZygoteADHamiltonian
-
-# Zygote.@adjoint
-
-end # @require
\ No newline at end of file
diff --git a/src/contrib/diffeq.jl b/src/contrib/diffeq.jl
index 66a0aa34..ef99ee7b 100644
--- a/src/contrib/diffeq.jl
+++ b/src/contrib/diffeq.jl
@@ -1,5 +1,3 @@
-@require OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed" begin
-
 import .OrdinaryDiffEq
 
 struct DiffEqIntegrator{T<:AbstractScalarOrVec{<:AbstractFloat}, DiffEqSolver} <: AbstractLeapfrog{T}
@@ -43,7 +41,3 @@ function step(
     end
     return res
 end
-
-export DiffEqIntegrator 
-
-end # @require
diff --git a/src/contrib/forwarddiff.jl b/src/contrib/forwarddiff.jl
new file mode 100644
index 00000000..122dbd7a
--- /dev/null
+++ b/src/contrib/forwarddiff.jl
@@ -0,0 +1,50 @@
+import .ForwardDiff, .ForwardDiff.DiffResults
+
+function ∂ℓπ∂θ_forwarddiff(ℓπ, θ::AbstractVector)
+    res = DiffResults.GradientResult(θ)
+    ForwardDiff.gradient!(res, ℓπ, θ)
+    return DiffResults.value(res), DiffResults.gradient(res)
+end
+
+# Implementation 1
+function ∂ℓπ∂θ_forwarddiff(ℓπ, θ::AbstractMatrix)
+    jacob = similar(θ)
+    res = DiffResults.JacobianResult(similar(θ, size(θ, 2)), jacob)
+    ForwardDiff.jacobian!(res, ℓπ, θ)
+    jacob_full = DiffResults.jacobian(res)
+
+    d, n = size(jacob)
+    for i in 1:n
+        jacob[:,i] = jacob_full[i,1+(i-1)*d:i*d]
+    end
+    return DiffResults.value(res), jacob
+end
+
+# Implementation 2
+# function ∂ℓπ∂θ_forwarddiff(ℓπ, θ::AbstractMatrix)
+#     local densities
+#     f(x) = (densities = ℓπ(x); sum(densities))
+#     res = DiffResults.GradientResult(θ)
+#     ForwardDiff.gradient!(res, f, θ)
+#     return ForwardDiff.value.(densities), DiffResults.gradient(res)
+# end
+
+# Implementation 3
+# function ∂ℓπ∂θ_forwarddiff(ℓπ, θ::AbstractMatrix)
+#     v = similar(θ, size(θ, 2))
+#     g = similar(θ)
+#     for i in 1:size(θ, 2)
+#         res = GradientResult(θ[:,i])
+#         gradient!(res, ℓπ, θ[:,i])
+#         v[i] = value(res)
+#         g[:,i] = gradient(res)
+#     end
+#     return v, g
+# end
+
+function ForwardDiffHamiltonian(metric::AbstractMetric, ℓπ)
+    ∂ℓπ∂θ(θ::AbstractVecOrMat) = ∂ℓπ∂θ_forwarddiff(ℓπ, θ)
+    return Hamiltonian(metric, ℓπ, ∂ℓπ∂θ)
+end
+
+ADAVAILABLE[ForwardDiff] = ForwardDiffHamiltonian
diff --git a/src/contrib/zygote.jl b/src/contrib/zygote.jl
new file mode 100644
index 00000000..d89511d3
--- /dev/null
+++ b/src/contrib/zygote.jl
@@ -0,0 +1,18 @@
+import .Zygote
+
+function ∂ℓπ∂θ_zygote(ℓπ, θ::AbstractVector)
+    res, back = Zygote.pullback(ℓπ, θ)
+    return res, first(back(Zygote.sensitivity(res)))
+end
+
+function ∂ℓπ∂θ_zygote(ℓπ, θ::AbstractMatrix)
+    res, back = Zygote.pullback(ℓπ, θ)
+    return res, first(back(ones(eltype(res), size(res))))
+end
+
+function ZygoteADHamiltonian(metric::AbstractMetric, ℓπ)
+    ∂ℓπ∂θ(θ::AbstractVecOrMat) = ∂ℓπ∂θ_zygote(ℓπ, θ)
+    return Hamiltonian(metric, ℓπ, ∂ℓπ∂θ)
+end
+
+ADAVAILABLE[Zygote] = ZygoteADHamiltonian
diff --git a/test/common.jl b/test/common.jl
index 9a01f87b..b71a9656 100644
--- a/test/common.jl
+++ b/test/common.jl
@@ -7,7 +7,7 @@ const TRATIO = Int == Int64 ? 1 : 2
 # Deterministic tolerance
 const DETATOL = 1e-3 * D * TRATIO
 # Random tolerance
-const RNDATOL = 5e-2 * D * TRATIO
+const RNDATOL = 5e-2 * D * TRATIO * 2
 
 # Hand-coded multivaite Gaussain
 
diff --git a/test/sampler-vec.jl b/test/sampler-vec.jl
index 7b167dc1..20fa5cfd 100644
--- a/test/sampler-vec.jl
+++ b/test/sampler-vec.jl
@@ -6,14 +6,14 @@ include("common.jl")
     n_chains_max = 20
     n_chains_list = collect(1:n_chains_max)
     θ_init_list = [rand(D, n_chains) for n_chains in n_chains_list]
-    ϵ = 0.2
+    ϵ = 0.1
     lf = Leapfrog(ϵ)
     i_test = 5
     lfi = Leapfrog(fill(ϵ, i_test))
     lfi_jittered = JitteredLeapfrog(fill(ϵ, i_test), 1.0)
     n_steps = 10
-    n_samples = 12_000
-    n_adapts = 2_000
+    n_samples = 20_000
+    n_adapts = 4_000
 
     for metricT in [
         UnitEuclideanMetric,
@@ -33,9 +33,12 @@ include("common.jl")
         @test show(metric) == nothing
         @test show(h) == nothing
         @test show(τ) == nothing
+
         # NoAdaptation
+        Random.seed!(100)
         samples, stats = sample(h, τ, θ_init_list[i_test], n_samples; verbose=false)
         @test mean(samples) ≈ zeros(D, n_chains) atol=RNDATOL * n_chains
+
         # Adaptation
         for adaptor in [
             MassMatrixAdaptor(metric),
@@ -51,9 +54,12 @@ include("common.jl")
         ]
             τ isa HMCDA && continue
             @test show(adaptor) == nothing
+
+            Random.seed!(100)
             samples, stats = sample(h, τ, θ_init_list[i_test], n_samples, adaptor, n_adapts; verbose=false, progress=false)
             @test mean(samples) ≈ zeros(D, n_chains) atol=RNDATOL * n_chains
         end
+
         # Passing a vector of same RNGs
         rng = [MersenneTwister(1) for _ in 1:n_chains]
         h = Hamiltonian(metricT((D, n_chains)), ℓπ, ∂ℓπ∂θ)
diff --git a/test/sampler.jl b/test/sampler.jl
index bc87d9de..8da91b08 100644
--- a/test/sampler.jl
+++ b/test/sampler.jl
@@ -7,10 +7,10 @@ using Statistics: mean, var, cov
 include("common.jl")
 
 θ_init = rand(MersenneTwister(1), D)
-ϵ = 0.2
+ϵ = 0.1
 n_steps = 10
-n_samples = 12_000
-n_adapts = 2_000
+n_samples = 22_000
+n_adapts = 4_000
 
 function test_stats(::Union{StaticTrajectory,HMCDA}, stats, n_adapts)
     for name in (:step_size, :nom_step_size, :n_steps, :is_accept, :acceptance_rate, :log_density, :hamiltonian_energy, :hamiltonian_energy_error, :is_adapt)
@@ -85,7 +85,7 @@ end
                     # For other adapatation methods that are able to adpat the step size, we use `find_good_stepsize`.
                     τ_used = adaptorsym == :MassMatrixAdaptorOnly ? τ : reconstruct(τ, integrator=reconstruct(lf, ϵ=find_good_stepsize(h, θ_init)))
                     samples, stats = sample(h, τ_used , θ_init, n_samples, adaptor, n_adapts; verbose=false, progress=PROGRESS)
-                    @test mean(samples) ≈ zeros(D) atol=RNDATOL
+                    @test mean(samples[(n_adapts+1):end]) ≈ zeros(D) atol=RNDATOL
                     test_stats(τ_used, stats, n_adapts)
                 end
             end