From ab72fc806177f98f197639548c99512e5cb6bbb2 Mon Sep 17 00:00:00 2001 From: jaimerzp Date: Mon, 15 Jan 2024 13:59:05 +0000 Subject: [PATCH 1/7] first commit --- src/targets.jl | 308 +++++++++++++++++++++++-------------------------- 1 file changed, 145 insertions(+), 163 deletions(-) diff --git a/src/targets.jl b/src/targets.jl index b0f2e38..c4bb7f6 100644 --- a/src/targets.jl +++ b/src/targets.jl @@ -1,163 +1,145 @@ -NoTransform(x) = x - -#= -mutable struct TuringTarget <: Target - model::DynamicPPL.Model - d::Int - vsyms::Any - dists::Any - h::Hamiltonian - transform::Function - inv_transform::Function - prior_draw::Function -end - -function _get_dists(vi) - mds = values(vi.metadata) - return [md.dists[1] for md in mds] -end - -function _name_variables(vi, dist_lengths) - vsyms = keys(vi) - names = [] - for (vsym, dist_length) in zip(vsyms, dist_lengths) - if dist_length == 1 - name = [vsym] - append!(names, name) - else - name = [DynamicPPL.VarName(Symbol(vsym, i)) for i = 1:dist_length] - append!(names, name) - end - end - return names -end - -TuringTarget(model; kwargs...) = begin - ctxt = model.context - vi = DynamicPPL.VarInfo(model, ctxt) - vi_t = Turing.link!!(vi, model) - dists = _get_dists(vi) - dist_lengths = [length(dist) for dist in dists] - vsyms = _name_variables(vi, dist_lengths) - d = length(vsyms) - - ℓ = LogDensityProblemsAD.ADgradient(DynamicPPL.LogDensityFunction(vi_t, model, ctxt)) - ℓπ(x) = LogDensityProblems.logdensity(ℓ, x) - ∂lπ∂θ(x) = LogDensityProblems.logdensity_and_gradient(ℓ, x) - hamiltonian = Hamiltonian(ℓπ, ∂lπ∂θ) - - function _reshape_params(x::AbstractVector) - xx = [] - idx = 0 - for dist_length in dist_lengths - append!(xx, [x[idx+1:idx+dist_length]]) - idx += dist_length - end - return xx - end - - function transform(x) - x = _reshape_params(x) - xt = [Bijectors.link(dist, par) for (dist, par) in zip(dists, x)] - return vcat(xt...) - end - - function inv_transform(xt) - xt = _reshape_params(xt) - x = [Bijectors.invlink(dist, par) for (dist, par) in zip(dists, xt)] - return vcat(x...) - end - - function prior_draw() - ctxt = model.context - vi = DynamicPPL.VarInfo(model, ctxt) - vi_t = Turing.link!!(vi, model) - return vi_t[DynamicPPL.SampleFromPrior()] - end - - TuringTarget(model, d, vsyms, dists, hamiltonian, transform, inv_transform, prior_draw) -end -=# - -mutable struct CustomTarget <: Target - d::Int - vsyms::Any - h::Hamiltonian - transform::Function - inv_transform::Function - prior_draw::Function -end - -CustomTarget(nlogp, grad_nlogp, priors; kwargs...) = begin - d = length(priors) - vsyms = [DynamicPPL.VarName(Symbol("d_", i)) for i = 1:d] - - function prior_draw() - x = [rand(dist) for dist in priors] - xt = transform(x) - return xt - end - hamiltonian = Hamiltonian(nlogp, grad_nlogp) - CustomTarget(d, hamiltonian, NoTransform, NoTransform, prior_draw) -end - -mutable struct GaussianTarget <: Target - d::Int - vsyms::Any - h::Hamiltonian - transform::Function - inv_transform::Function - prior_draw::Function -end - -GaussianTarget(_mean::AbstractVector, _cov::AbstractMatrix) = begin - d = length(_mean) - vsyms = [DynamicPPL.VarName(Symbol("d_", i)) for i = 1:d] - - _gaussian = MvNormal(_mean, _cov) - ℓπ(θ::AbstractVector) = logpdf(_gaussian, θ) - ∂lπ∂θ(θ::AbstractVector) = (logpdf(_gaussian, θ), gradlogpdf(_gaussian, θ)) - hamiltonian = Hamiltonian(ℓπ, ∂lπ∂θ) - - function prior_draw() - xt = rand(MvNormal(zeros(d), ones(d))) - return xt - end - - GaussianTarget(d, vsyms, hamiltonian, NoTransform, NoTransform, prior_draw) -end - -mutable struct RosenbrockTarget <: Target - d::Int - vsyms::Any - h::Hamiltonian - transform::Function - inv_transform::Function - prior_draw::Function -end - -RosenbrockTarget(a, b; kwargs...) = begin - kwargs = Dict(kwargs) - d = kwargs[:d] - vsyms = [DynamicPPL.VarName(Symbol("d_", i)) for i = 1:d] - - function ℓπ(x; a = a, b = b) - x1 = x[1:Int(d / 2)] - x2 = x[Int(d / 2)+1:end] - m = @.((a - x1)^2 + b * (x2 - x1^2)^2) - return -0.5 * sum(m) - end - - function ∂lπ∂θ(x) - return ℓπ(x), ForwardDiff.gradient(ℓπ, x) - end - - hamiltonian = Hamiltonian(ℓπ, ∂lπ∂θ) - - function prior_draw() - x = rand(MvNormal(zeros(d), ones(d))) - return x - end - - RosenbrockTarget(d, vsyms, hamiltonian, NoTransform, NoTransform, prior_draw) -end +NoTransform(x) = x + +mutable struct Target + d::Int + vsyms::Any + h::Hamiltonian + transform::Function + inv_transform::Function + prior_draw::Function +end + +#= +mutable struct TuringTarget <: Target + model::DynamicPPL.Model + d::Int + vsyms::Any + dists::Any + h::Hamiltonian + transform::Function + inv_transform::Function + prior_draw::Function +end + +function _get_dists(vi) + mds = values(vi.metadata) + return [md.dists[1] for md in mds] +end + +function _name_variables(vi, dist_lengths) + vsyms = keys(vi) + names = [] + for (vsym, dist_length) in zip(vsyms, dist_lengths) + if dist_length == 1 + name = [vsym] + append!(names, name) + else + name = [DynamicPPL.VarName(Symbol(vsym, i)) for i = 1:dist_length] + append!(names, name) + end + end + return names +end + +TuringTarget(model; kwargs...) = begin + ctxt = model.context + vi = DynamicPPL.VarInfo(model, ctxt) + vi_t = Turing.link!!(vi, model) + dists = _get_dists(vi) + dist_lengths = [length(dist) for dist in dists] + vsyms = _name_variables(vi, dist_lengths) + d = length(vsyms) + + ℓ = LogDensityProblemsAD.ADgradient(DynamicPPL.LogDensityFunction(vi_t, model, ctxt)) + ℓπ(x) = LogDensityProblems.logdensity(ℓ, x) + ∂lπ∂θ(x) = LogDensityProblems.logdensity_and_gradient(ℓ, x) + hamiltonian = Hamiltonian(ℓπ, ∂lπ∂θ) + + function _reshape_params(x::AbstractVector) + xx = [] + idx = 0 + for dist_length in dist_lengths + append!(xx, [x[idx+1:idx+dist_length]]) + idx += dist_length + end + return xx + end + + function transform(x) + x = _reshape_params(x) + xt = [Bijectors.link(dist, par) for (dist, par) in zip(dists, x)] + return vcat(xt...) + end + + function inv_transform(xt) + xt = _reshape_params(xt) + x = [Bijectors.invlink(dist, par) for (dist, par) in zip(dists, xt)] + return vcat(x...) + end + + function prior_draw() + ctxt = model.context + vi = DynamicPPL.VarInfo(model, ctxt) + vi_t = Turing.link!!(vi, model) + return vi_t[DynamicPPL.SampleFromPrior()] + end + + TuringTarget(model, d, vsyms, dists, hamiltonian, transform, inv_transform, prior_draw) +end +=# + +CustomTarget(nlogp, grad_nlogp, priors; kwargs...) = begin + d = length(priors) + vsyms = [DynamicPPL.VarName(Symbol("d_", i)) for i = 1:d] + + function prior_draw() + x = [rand(dist) for dist in priors] + xt = transform(x) + return xt + end + hamiltonian = Hamiltonian(nlogp, grad_nlogp) + Target(d, hamiltonian, NoTransform, NoTransform, prior_draw) +end + +GaussianTarget(_mean::AbstractVector, _cov::AbstractMatrix) = begin + d = length(_mean) + vsyms = [DynamicPPL.VarName(Symbol("d_", i)) for i = 1:d] + + _gaussian = MvNormal(_mean, _cov) + ℓπ(θ::AbstractVector) = logpdf(_gaussian, θ) + ∂lπ∂θ(θ::AbstractVector) = (logpdf(_gaussian, θ), gradlogpdf(_gaussian, θ)) + hamiltonian = Hamiltonian(ℓπ, ∂lπ∂θ) + + function prior_draw() + xt = rand(MvNormal(zeros(d), ones(d))) + return xt + end + + Target(d, vsyms, hamiltonian, NoTransform, NoTransform, prior_draw) +end + +RosenbrockTarget(a, b; kwargs...) = begin + kwargs = Dict(kwargs) + d = kwargs[:d] + vsyms = [DynamicPPL.VarName(Symbol("d_", i)) for i = 1:d] + + function ℓπ(x; a = a, b = b) + x1 = x[1:Int(d / 2)] + x2 = x[Int(d / 2)+1:end] + m = @.((a - x1)^2 + b * (x2 - x1^2)^2) + return -0.5 * sum(m) + end + + function ∂lπ∂θ(x) + return ℓπ(x), ForwardDiff.gradient(ℓπ, x) + end + + hamiltonian = Hamiltonian(ℓπ, ∂lπ∂θ) + + function prior_draw() + x = rand(MvNormal(zeros(d), ones(d))) + return x + end + + Target(d, vsyms, hamiltonian, NoTransform, NoTransform, prior_draw) +end From 870d9a7388f86d7f1f550818c58f910a92fa262b Mon Sep 17 00:00:00 2001 From: jaimerzp Date: Mon, 15 Jan 2024 14:31:08 +0000 Subject: [PATCH 2/7] targets rework --- src/targets.jl | 68 ++++++++++++++++++-------------------------------- 1 file changed, 24 insertions(+), 44 deletions(-) diff --git a/src/targets.jl b/src/targets.jl index c4bb7f6..b98b7b0 100644 --- a/src/targets.jl +++ b/src/targets.jl @@ -1,14 +1,3 @@ -NoTransform(x) = x - -mutable struct Target - d::Int - vsyms::Any - h::Hamiltonian - transform::Function - inv_transform::Function - prior_draw::Function -end - #= mutable struct TuringTarget <: Target model::DynamicPPL.Model @@ -88,58 +77,49 @@ TuringTarget(model; kwargs...) = begin end =# -CustomTarget(nlogp, grad_nlogp, priors; kwargs...) = begin - d = length(priors) - vsyms = [DynamicPPL.VarName(Symbol("d_", i)) for i = 1:d] +NoTransform(x) = x - function prior_draw() - x = [rand(dist) for dist in priors] - xt = transform(x) - return xt +mutable struct Target + d::Int + h::Hamiltonian + transform::Function + inv_transform::Function + θ_start::Vector{Float64} + θ_names::Vector{String} +end + +function CustomTarget(nlogp, grad_nlogp, θ_start::Vector{Float64}; + θ_names=nothing, + transform=NoTransform, + inv_transform=NoTransform) + d = length(θ_start) + if θ_names==nothing + θ_names = [String("θ_", i) for i=1:d] end - hamiltonian = Hamiltonian(nlogp, grad_nlogp) - Target(d, hamiltonian, NoTransform, NoTransform, prior_draw) + return Target(d, Hamiltonian(nlogp, grad_nlogp), transform, inv_transform, θ_start, θ_names) end -GaussianTarget(_mean::AbstractVector, _cov::AbstractMatrix) = begin +function GaussianTarget(_mean::AbstractVector, _cov::AbstractMatrix) d = length(_mean) - vsyms = [DynamicPPL.VarName(Symbol("d_", i)) for i = 1:d] - _gaussian = MvNormal(_mean, _cov) ℓπ(θ::AbstractVector) = logpdf(_gaussian, θ) ∂lπ∂θ(θ::AbstractVector) = (logpdf(_gaussian, θ), gradlogpdf(_gaussian, θ)) - hamiltonian = Hamiltonian(ℓπ, ∂lπ∂θ) - - function prior_draw() - xt = rand(MvNormal(zeros(d), ones(d))) - return xt - end - - Target(d, vsyms, hamiltonian, NoTransform, NoTransform, prior_draw) + θ_start = rand(MvNormal(zeros(d), ones(d))) + return CustomTarget(ℓπ, ∂lπ∂θ, θ_start) end -RosenbrockTarget(a, b; kwargs...) = begin +function RosenbrockTarget(a, b; kwargs...) kwargs = Dict(kwargs) d = kwargs[:d] - vsyms = [DynamicPPL.VarName(Symbol("d_", i)) for i = 1:d] - function ℓπ(x; a = a, b = b) x1 = x[1:Int(d / 2)] x2 = x[Int(d / 2)+1:end] m = @.((a - x1)^2 + b * (x2 - x1^2)^2) return -0.5 * sum(m) end - function ∂lπ∂θ(x) return ℓπ(x), ForwardDiff.gradient(ℓπ, x) end - - hamiltonian = Hamiltonian(ℓπ, ∂lπ∂θ) - - function prior_draw() - x = rand(MvNormal(zeros(d), ones(d))) - return x - end - - Target(d, vsyms, hamiltonian, NoTransform, NoTransform, prior_draw) + θ_start = rand(MvNormal(zeros(d), ones(d))) + return CustomTarget(ℓπ, ∂lπ∂θ, θ_start) end From 49ca4e712c15ef43f1350b322786915af75f051f Mon Sep 17 00:00:00 2001 From: jaimerzp Date: Mon, 15 Jan 2024 14:31:55 +0000 Subject: [PATCH 3/7] target redifinition --- src/MicroCanonicalHMC.jl | 54 +++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 28 deletions(-) diff --git a/src/MicroCanonicalHMC.jl b/src/MicroCanonicalHMC.jl index 922f515..3067aee 100644 --- a/src/MicroCanonicalHMC.jl +++ b/src/MicroCanonicalHMC.jl @@ -1,28 +1,26 @@ -module MicroCanonicalHMC - -export Settings, MCHMC, Sample -export Summarize -export TuringTarget, GaussianTarget, RosenbrockTarget, CustomTarget -export ParallelTarget - -using LinearAlgebra, Statistics, Random, DataFrames -using DynamicPPL, LogDensityProblemsAD, LogDensityProblems, ForwardDiff -using AbstractMCMC, MCMCChains, MCMCDiagnosticTools, Distributed -using Distributions, DistributionsAD, ProgressMeter - -abstract type Target <: AbstractMCMC.AbstractModel end - -include("hamiltonian.jl") -include("targets.jl") -include("sampler.jl") -include("integrators.jl") -include("tuning.jl") -include("abstractmcmc.jl") - -include("ensemble/targets.jl") -include("ensemble/sampler.jl") -include("ensemble/integrators.jl") -include("ensemble/tuning.jl") -include("ensemble/abstractmcmc.jl") - -end +module MicroCanonicalHMC + +export Settings, MCHMC, Sample +export Summarize +export TuringTarget, GaussianTarget, RosenbrockTarget, CustomTarget +export ParallelTarget + +using LinearAlgebra, Statistics, Random, DataFrames +using DynamicPPL, LogDensityProblemsAD, LogDensityProblems, ForwardDiff +using AbstractMCMC, MCMCChains, MCMCDiagnosticTools, Distributed +using Distributions, DistributionsAD, ProgressMeter + +include("hamiltonian.jl") +include("targets.jl") +include("sampler.jl") +include("integrators.jl") +include("tuning.jl") +include("abstractmcmc.jl") + +include("ensemble/targets.jl") +include("ensemble/sampler.jl") +include("ensemble/integrators.jl") +include("ensemble/tuning.jl") +include("ensemble/abstractmcmc.jl") + +end From 848526b6626a670bf04bd34cf158ab129c6cf34f Mon Sep 17 00:00:00 2001 From: jaimerzp Date: Mon, 15 Jan 2024 14:56:47 +0000 Subject: [PATCH 4/7] should work --- src/sampler.jl | 605 ++++++++++++++++++++++++------------------------- 1 file changed, 300 insertions(+), 305 deletions(-) diff --git a/src/sampler.jl b/src/sampler.jl index 13e5799..e55a3c8 100644 --- a/src/sampler.jl +++ b/src/sampler.jl @@ -1,305 +1,300 @@ -mutable struct Hyperparameters{T} - eps::T - L::T - nu::T - lambda_c::T - sigma::AbstractVector{T} - gamma::T - sigma_xi::T -end - -Hyperparameters(; kwargs...) = begin - eps = get(kwargs, :eps, 0.0) - L = get(kwargs, :L, 0.0) - nu = get(kwargs, :nu, 0.0) - sigma = get(kwargs, :sigma, [0.0]) - lambda_c = get(kwargs, :lambda_c, 0.1931833275037836) - gamma = get(kwargs, :gamma, (50 - 1) / (50 + 1)) #(neff-1)/(neff+1) - sigma_xi = get(kwargs, :sigma_xi, 1.5) - Hyperparameters(eps, L, nu, lambda_c, sigma, gamma, sigma_xi) -end - -mutable struct Settings{T} - nadapt::Int - TEV::T - nchains::Int - adaptive::Bool - integrator::String - init_eps::Union{Nothing,T} - init_L::Union{Nothing,T} - init_sigma::Union{Nothing,AbstractVector{T}} -end - -Settings(; kwargs...) = begin - kwargs = Dict(kwargs) - nadapt = get(kwargs, :nadapt, 1000) - TEV = get(kwargs, :TEV, 0.001) - adaptive = get(kwargs, :adaptive, false) - nchains = get(kwargs, :nchains, 1) - integrator = get(kwargs, :integrator, "LF") - init_eps = get(kwargs, :init_eps, nothing) - init_L = get(kwargs, :init_L, nothing) - init_sigma = get(kwargs, :init_sigma, nothing) - Settings(nadapt, TEV, nchains, adaptive, integrator, init_eps, init_L, init_sigma) -end - -struct MCHMCSampler <: AbstractMCMC.AbstractSampler - settings::Settings - hyperparameters::Hyperparameters - hamiltonian_dynamics::Function -end - - -""" - MCHMC( - nadapt::Int, - TEV::Real; - kwargs... - ) - -Constructor for the MicroCanonical HMC sampler -""" -function MCHMC(nadapt::Int, TEV::Real; kwargs...) - """the MCHMC (q = 0 Hamiltonian) sampler""" - sett = Settings(; nadapt = nadapt, TEV = TEV, kwargs...) - hyperparameters = Hyperparameters(; kwargs...) - - ### integrator ### - if sett.integrator == "LF" # leapfrog - hamiltonian_dynamics = Leapfrog - grad_evals_per_step = 1.0 - elseif sett.integrator == "MN" # minimal norm - hamiltonian_dynamics = Minimal_norm - grad_evals_per_step = 2.0 - else - println(string("integrator = ", integrator, "is not a valid option.")) - end - - return MCHMCSampler(sett, hyperparameters, hamiltonian_dynamics) -end - -function Random_unit_vector(rng::AbstractRNG, d::Int; _normalize = true) - """Generates a random (isotropic) unit vector.""" - u = randn(rng, d) - if _normalize - u = normalize(u) - end - return u -end - -function Partially_refresh_momentum(rng::AbstractRNG, nu::Float64, u::AbstractVector) - d = length(u) - z = nu .* Random_unit_vector(rng, d; _normalize = false) - uu = u .+ z - return normalize(uu) -end - -function Update_momentum(d::Number, eff_eps::Number, g::AbstractVector, u::AbstractVector) - """The momentum updating map of the esh dynamics (see https://arxiv.org/pdf/2111.02434.pdf) - similar to the implementation: https://github.com/gregversteeg/esh_dynamics - There are no exponentials e^delta, which prevents overflows when the gradient norm is large.""" - g_norm = norm(g) - e = -g ./ g_norm - ue = dot(u, e) - delta = eff_eps * g_norm / (d - 1) - zeta = exp(-delta) - uu = e .* ((1 - zeta) * (1 + zeta + ue * (1 - zeta))) + (2 * zeta) .* u - delta_r = delta - log(2) + log(1 + ue + (1 - ue) * zeta^2) - return normalize(uu), delta_r -end - -struct MCHMCState{T} - rng::AbstractRNG - i::Int - x::Vector{T} - u::Vector{T} - l::T - g::Vector{T} - dE::T - Feps::T - Weps::T - h::Hamiltonian -end - -struct Transition{T} - θ::Vector{T} - ϵ::T - δE::T - ℓ::T -end - -function Transition(state::MCHMCState, bijector) - eps = (state.Feps / state.Weps)^(-1 / 6) - sample = bijector(state.x)[:] - return Transition(sample, eps, state.dE, -state.l) -end - -function Step( - rng::AbstractRNG, - sampler::MCHMCSampler, - h::Hamiltonian; - bijector = NoTransform, - trans_init_params = nothing, - kwargs..., -) - sett = sampler.settings - kwargs = Dict(kwargs) - d = length(trans_init_params) - l, g = -1 .* h.∂lπ∂θ(trans_init_params) - u = Random_unit_vector(rng, d) - Weps = 1e-5 - Feps = Weps * sampler.hyperparameters.eps^(1 / 6) - state = MCHMCState(rng, 0, trans_init_params, u, l, g, 0.0, Feps, Weps, h) - state = tune_hyperparameters(rng, sampler, state; kwargs...) - transition = Transition(state, bijector) - return transition, state -end - -function Step( - rng::AbstractRNG, - sampler::MCHMCSampler, - state::MCHMCState; - bijector = NoTransform, - kwargs..., -) - """One step of the Langevin-like dynamics.""" - dialog = get(kwargs, :dialog, false) - - eps = sampler.hyperparameters.eps - nu = sampler.hyperparameters.nu - sigma_xi = sampler.hyperparameters.sigma_xi - gamma = get(kwargs, :gamma, sampler.hyperparameters.gamma) - - TEV = sampler.settings.TEV - adaptive = get(kwargs, :adaptive, sampler.settings.adaptive) - - # Hamiltonian step - xx, uu, ll, gg, kinetic_change = sampler.hamiltonian_dynamics(sampler, state) - # Langevin-like noise - uuu = Partially_refresh_momentum(rng, nu, uu) - dEE = kinetic_change + ll - state.l - - if adaptive - d = length(xx) - varE = dEE^2 / d - # 1e-8 is added to avoid divergences in log xi - xi = varE / TEV + 1e-8 - # the weight which reduces the impact of stepsizes which - # are much larger on much smaller than the desired one. - w = exp(-0.5 * (log(xi) / (6.0 * sigma_xi))^2) - # Kalman update the linear combinations - Feps = gamma * state.Feps + w * (xi / eps^6) - Weps = gamma * state.Weps + w - new_eps = (Feps / Weps)^(-1 / 6) - - sampler.hyperparameters.eps = new_eps - tune_nu!(sampler, d) - else - Feps = state.Feps - Weps = state.Weps - end - - state = MCHMCState(rng, state.i + 1, xx, uuu, ll, gg, dEE, Feps, Weps, state.h) - transition = Transition(state, bijector) - return transition, state -end - -function Sample( - sampler::MCHMCSampler, - target::Target, - n::Int; - fol_name = ".", - file_name = "samples", - progress = true, - kwargs..., -) - return Sample( - Random.GLOBAL_RNG, - sampler, - target, - n; - fol_name = fol_name, - file_name = file_name, - kwargs..., - ) -end - -""" - sample( - rng::AbstractRNG, - sampler::MCHMCSampler, - target::Target, - n::Int; - init_params = nothing, - fol_name = ".", - file_name = "samples", - progress = true, - kwargs... - ) -Sampling routine -""" -function Sample( - rng::AbstractRNG, - sampler::MCHMCSampler, - target::Target, - n::Int; - init_params = nothing, - fol_name = ".", - file_name = "samples", - progress = true, - kwargs..., -) - io = open(joinpath(fol_name, "VarNames.txt"), "w") do io - println(io, string(target.vsyms)) - end - - chain = [] - ### initial conditions ### - if init_params != nothing - @info "using provided init params" - trans_init_params = target.inv_transform(init_params) - else - @info "sampling from prior" - trans_init_params = target.prior_draw() - end - transition, state = Step( - rng, - sampler, - target.h; - bijector = target.transform, - trans_init_params = trans_init_params, - kwargs..., - ) - push!(chain, [transition.θ; transition.ϵ; transition.δE; transition.ℓ]) - - io = open(joinpath(fol_name, string(file_name, ".txt")), "w") do io - println(io, [transition.θ; transition.ϵ; transition.δE; transition.ℓ]) - @showprogress "MCHMC: " (progress ? 1 : Inf) for i = 1:n-1 - try - transition, state = Step( - rng, - sampler, - state; - bijector = target.transform, - kwargs..., - ) - push!(chain, [transition.θ; transition.ϵ; transition.δE; transition.ℓ]) - println(io, [transition.θ; transition.ϵ; transition.δE; transition.ℓ]) - catch err - if err isa InterruptException - rethrow(err) - else - @warn "Divergence encountered after tuning" - end - end - end - end - - io = open(joinpath(fol_name, string(file_name, "_summary.txt")), "w") do io - ess, rhat = Summarize(chain) - println(io, ess) - println(io, rhat) - end - - return chain -end +mutable struct Hyperparameters{T} + eps::T + L::T + nu::T + lambda_c::T + sigma::AbstractVector{T} + gamma::T + sigma_xi::T +end + +Hyperparameters(; kwargs...) = begin + eps = get(kwargs, :eps, 0.0) + L = get(kwargs, :L, 0.0) + nu = get(kwargs, :nu, 0.0) + sigma = get(kwargs, :sigma, [0.0]) + lambda_c = get(kwargs, :lambda_c, 0.1931833275037836) + gamma = get(kwargs, :gamma, (50 - 1) / (50 + 1)) #(neff-1)/(neff+1) + sigma_xi = get(kwargs, :sigma_xi, 1.5) + Hyperparameters(eps, L, nu, lambda_c, sigma, gamma, sigma_xi) +end + +mutable struct Settings{T} + nadapt::Int + TEV::T + nchains::Int + adaptive::Bool + integrator::String + init_eps::Union{Nothing,T} + init_L::Union{Nothing,T} + init_sigma::Union{Nothing,AbstractVector{T}} +end + +Settings(; kwargs...) = begin + kwargs = Dict(kwargs) + nadapt = get(kwargs, :nadapt, 1000) + TEV = get(kwargs, :TEV, 0.001) + adaptive = get(kwargs, :adaptive, false) + nchains = get(kwargs, :nchains, 1) + integrator = get(kwargs, :integrator, "LF") + init_eps = get(kwargs, :init_eps, nothing) + init_L = get(kwargs, :init_L, nothing) + init_sigma = get(kwargs, :init_sigma, nothing) + Settings(nadapt, TEV, nchains, adaptive, integrator, init_eps, init_L, init_sigma) +end + +struct MCHMCSampler <: AbstractMCMC.AbstractSampler + settings::Settings + hyperparameters::Hyperparameters + hamiltonian_dynamics::Function +end + + +""" + MCHMC( + nadapt::Int, + TEV::Real; + kwargs... + ) + +Constructor for the MicroCanonical HMC sampler +""" +function MCHMC(nadapt::Int, TEV::Real; kwargs...) + """the MCHMC (q = 0 Hamiltonian) sampler""" + sett = Settings(; nadapt = nadapt, TEV = TEV, kwargs...) + hyperparameters = Hyperparameters(; kwargs...) + + ### integrator ### + if sett.integrator == "LF" # leapfrog + hamiltonian_dynamics = Leapfrog + grad_evals_per_step = 1.0 + elseif sett.integrator == "MN" # minimal norm + hamiltonian_dynamics = Minimal_norm + grad_evals_per_step = 2.0 + else + println(string("integrator = ", integrator, "is not a valid option.")) + end + + return MCHMCSampler(sett, hyperparameters, hamiltonian_dynamics) +end + +function Random_unit_vector(rng::AbstractRNG, d::Int; _normalize = true) + """Generates a random (isotropic) unit vector.""" + u = randn(rng, d) + if _normalize + u = normalize(u) + end + return u +end + +function Partially_refresh_momentum(rng::AbstractRNG, nu::Float64, u::AbstractVector) + d = length(u) + z = nu .* Random_unit_vector(rng, d; _normalize = false) + uu = u .+ z + return normalize(uu) +end + +function Update_momentum(d::Number, eff_eps::Number, g::AbstractVector, u::AbstractVector) + """The momentum updating map of the esh dynamics (see https://arxiv.org/pdf/2111.02434.pdf) + similar to the implementation: https://github.com/gregversteeg/esh_dynamics + There are no exponentials e^delta, which prevents overflows when the gradient norm is large.""" + g_norm = norm(g) + e = -g ./ g_norm + ue = dot(u, e) + delta = eff_eps * g_norm / (d - 1) + zeta = exp(-delta) + uu = e .* ((1 - zeta) * (1 + zeta + ue * (1 - zeta))) + (2 * zeta) .* u + delta_r = delta - log(2) + log(1 + ue + (1 - ue) * zeta^2) + return normalize(uu), delta_r +end + +struct MCHMCState{T} + rng::AbstractRNG + i::Int + x::Vector{T} + u::Vector{T} + l::T + g::Vector{T} + dE::T + Feps::T + Weps::T + h::Hamiltonian +end + +struct Transition{T} + θ::Vector{T} + ϵ::T + δE::T + ℓ::T +end + +function Transition(state::MCHMCState, bijector) + eps = (state.Feps / state.Weps)^(-1 / 6) + sample = bijector(state.x)[:] + return Transition(sample, eps, state.dE, -state.l) +end + +function Step( + rng::AbstractRNG, + sampler::MCHMCSampler, + h::Hamiltonian; + bijector = NoTransform, + trans_init_params = nothing, + kwargs..., +) + sett = sampler.settings + kwargs = Dict(kwargs) + d = length(trans_init_params) + l, g = -1 .* h.∂lπ∂θ(trans_init_params) + u = Random_unit_vector(rng, d) + Weps = 1e-5 + Feps = Weps * sampler.hyperparameters.eps^(1 / 6) + state = MCHMCState(rng, 0, trans_init_params, u, l, g, 0.0, Feps, Weps, h) + state = tune_hyperparameters(rng, sampler, state; kwargs...) + transition = Transition(state, bijector) + return transition, state +end + +function Step( + rng::AbstractRNG, + sampler::MCHMCSampler, + state::MCHMCState; + bijector = NoTransform, + kwargs..., +) + """One step of the Langevin-like dynamics.""" + dialog = get(kwargs, :dialog, false) + + eps = sampler.hyperparameters.eps + nu = sampler.hyperparameters.nu + sigma_xi = sampler.hyperparameters.sigma_xi + gamma = get(kwargs, :gamma, sampler.hyperparameters.gamma) + + TEV = sampler.settings.TEV + adaptive = get(kwargs, :adaptive, sampler.settings.adaptive) + + # Hamiltonian step + xx, uu, ll, gg, kinetic_change = sampler.hamiltonian_dynamics(sampler, state) + # Langevin-like noise + uuu = Partially_refresh_momentum(rng, nu, uu) + dEE = kinetic_change + ll - state.l + + if adaptive + d = length(xx) + varE = dEE^2 / d + # 1e-8 is added to avoid divergences in log xi + xi = varE / TEV + 1e-8 + # the weight which reduces the impact of stepsizes which + # are much larger on much smaller than the desired one. + w = exp(-0.5 * (log(xi) / (6.0 * sigma_xi))^2) + # Kalman update the linear combinations + Feps = gamma * state.Feps + w * (xi / eps^6) + Weps = gamma * state.Weps + w + new_eps = (Feps / Weps)^(-1 / 6) + + sampler.hyperparameters.eps = new_eps + tune_nu!(sampler, d) + else + Feps = state.Feps + Weps = state.Weps + end + + state = MCHMCState(rng, state.i + 1, xx, uuu, ll, gg, dEE, Feps, Weps, state.h) + transition = Transition(state, bijector) + return transition, state +end + +function Sample( + sampler::MCHMCSampler, + target::Target, + n::Int; + fol_name = ".", + file_name = "samples", + progress = true, + kwargs..., +) + return Sample( + Random.GLOBAL_RNG, + sampler, + target, + n; + fol_name = fol_name, + file_name = file_name, + kwargs..., + ) +end + +""" + sample( + rng::AbstractRNG, + sampler::MCHMCSampler, + target::Target, + n::Int; + init_params = nothing, + fol_name = ".", + file_name = "samples", + progress = true, + kwargs... + ) +Sampling routine +""" +function Sample( + rng::AbstractRNG, + sampler::MCHMCSampler, + target::Target, + n::Int; + init_params = nothing, + fol_name = ".", + file_name = "samples", + progress = true, + kwargs..., +) + io = open(joinpath(fol_name, "VarNames.txt"), "w") do io + println(io, string(target.θ_names)) + end + + chain = [] + ### initial conditions ### + trans_init_params = target.transform(target.θ_start) + + transition, state = Step( + rng, + sampler, + target.h; + bijector = target.inv_transform, + trans_init_params = trans_init_params, + kwargs..., + ) + push!(chain, [transition.θ; transition.ϵ; transition.δE; transition.ℓ]) + + io = open(joinpath(fol_name, string(file_name, ".txt")), "w") do io + println(io, [transition.θ; transition.ϵ; transition.δE; transition.ℓ]) + @showprogress "MCHMC: " (progress ? 1 : Inf) for i = 1:n-1 + try + transition, state = Step( + rng, + sampler, + state; + bijector = target.transform, + kwargs..., + ) + push!(chain, [transition.θ; transition.ϵ; transition.δE; transition.ℓ]) + println(io, [transition.θ; transition.ϵ; transition.δE; transition.ℓ]) + catch err + if err isa InterruptException + rethrow(err) + else + @warn "Divergence encountered after tuning" + end + end + end + end + + io = open(joinpath(fol_name, string(file_name, "_summary.txt")), "w") do io + ess, rhat = Summarize(chain) + println(io, ess) + println(io, rhat) + end + + return chain +end From 380fcf26fdacde5a576e4d808f1f7b1520f3a445 Mon Sep 17 00:00:00 2001 From: jaimerzp Date: Mon, 15 Jan 2024 14:57:23 +0000 Subject: [PATCH 5/7] gg --- src/targets.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/targets.jl b/src/targets.jl index b98b7b0..846e46b 100644 --- a/src/targets.jl +++ b/src/targets.jl @@ -94,7 +94,7 @@ function CustomTarget(nlogp, grad_nlogp, θ_start::Vector{Float64}; inv_transform=NoTransform) d = length(θ_start) if θ_names==nothing - θ_names = [String("θ_", i) for i=1:d] + θ_names = [string("θ_", i) for i=1:d] end return Target(d, Hamiltonian(nlogp, grad_nlogp), transform, inv_transform, θ_start, θ_names) end From 4ebbcbf275830dbd694c84cc031aa4fb7d97ca40 Mon Sep 17 00:00:00 2001 From: jaimerzp Date: Mon, 15 Jan 2024 15:00:22 +0000 Subject: [PATCH 6/7] no ensemble --- src/MicroCanonicalHMC.jl | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/MicroCanonicalHMC.jl b/src/MicroCanonicalHMC.jl index 3067aee..7cef6fc 100644 --- a/src/MicroCanonicalHMC.jl +++ b/src/MicroCanonicalHMC.jl @@ -17,10 +17,4 @@ include("integrators.jl") include("tuning.jl") include("abstractmcmc.jl") -include("ensemble/targets.jl") -include("ensemble/sampler.jl") -include("ensemble/integrators.jl") -include("ensemble/tuning.jl") -include("ensemble/abstractmcmc.jl") - end From af2a4a812f13732adb3f5cb3a05afc50cd6ecd60 Mon Sep 17 00:00:00 2001 From: jaimerzp Date: Mon, 15 Jan 2024 15:03:20 +0000 Subject: [PATCH 7/7] tests --- test/base.jl | 166 +++++++++++++++++++++++++-------------------------- 1 file changed, 83 insertions(+), 83 deletions(-) diff --git a/test/base.jl b/test/base.jl index d4f68fc..590c20f 100644 --- a/test/base.jl +++ b/test/base.jl @@ -1,83 +1,83 @@ -@testset "Settings" begin - spl = MCHMC( - 10_000, - 0.1; - nchains = 10, - integrator = "MN", - eps = 0.1, - L = 0.1, - sigma = [1.0], - gamma = 2.0, - sigma_xi = 2.0, - init_eps = 1.0, - init_L = 1.0, - init_sigma = [1.0], - ) - - sett = spl.settings - hp = spl.hyperparameters - dy = spl.hamiltonian_dynamics - - @test sett.nchains == 10 - @test sett.integrator == "MN" - @test sett.TEV == 0.1 - @test sett.nadapt == 10_000 - @test sett.init_eps == 1.0 - @test sett.init_L == 1.0 - @test sett.init_sigma == [1.0] - - @test hp.eps == 0.1 - @test hp.L == 0.1 - @test hp.sigma == [1.0] - @test hp.gamma == 2.0 - @test hp.sigma_xi == 2.0 - - @test dy == MicroCanonicalHMC.Minimal_norm -end - -@testset "Partially_refresh_momentum" begin - d = 10 - rng = MersenneTwister(0) - u = MicroCanonicalHMC.Random_unit_vector(rng, d) - @test length(u) == d - @test isapprox(norm(u), 1.0, rtol = 0.0000001) - - p = MicroCanonicalHMC.Partially_refresh_momentum(rng, 0.1, u) - @test length(p) == d - @test isapprox(norm(p), 1.0, rtol = 0.0000001) -end - -@testset "Init" begin - d = 10 - m = zeros(d) - s = Diagonal(ones(d)) - rng = MersenneTwister(1234) - target = GaussianTarget(m, s) - spl = MCHMC(0, 0.001) - _, init = MicroCanonicalHMC.Step(rng, spl, target.h; - trans_init_params = m) - @test init.x == m - @test init.g == m - @test init.dE == init.Feps == 0.0 - @test init.Weps == 1.0e-5 -end - -@testset "Step" begin - d = 10 - m = zeros(d) - s = Diagonal(ones(d)) - rng = MersenneTwister(1234) - target = GaussianTarget(m, s) - spl = MCHMC(0, 0.001; eps = 0.01, L = 0.1, sigma = ones(d)) - aspl = MCHMC(0, 0.001; eps = 0.01, L = 0.1, sigma = ones(d), adaptive = true) - _, init = MicroCanonicalHMC.Step(rng, spl, target.h; - trans_init_params = target.prior_draw()) - tune_sigma, tune_eps, tune_L = MicroCanonicalHMC.tune_what(spl, d) - tune_sigma, tune_eps, tune_L = MicroCanonicalHMC.tune_what(aspl, d) - @test tune_sigma == tune_eps == tune_L == false - _, step = MicroCanonicalHMC.Step(rng, spl, init) - _, astep = MicroCanonicalHMC.Step(rng, aspl, init) - @test spl.hyperparameters.eps == 0.01 - @test aspl.hyperparameters.eps != 0.01 - @test step.x == astep.x -end +@testset "Settings" begin + spl = MCHMC( + 10_000, + 0.1; + nchains = 10, + integrator = "MN", + eps = 0.1, + L = 0.1, + sigma = [1.0], + gamma = 2.0, + sigma_xi = 2.0, + init_eps = 1.0, + init_L = 1.0, + init_sigma = [1.0], + ) + + sett = spl.settings + hp = spl.hyperparameters + dy = spl.hamiltonian_dynamics + + @test sett.nchains == 10 + @test sett.integrator == "MN" + @test sett.TEV == 0.1 + @test sett.nadapt == 10_000 + @test sett.init_eps == 1.0 + @test sett.init_L == 1.0 + @test sett.init_sigma == [1.0] + + @test hp.eps == 0.1 + @test hp.L == 0.1 + @test hp.sigma == [1.0] + @test hp.gamma == 2.0 + @test hp.sigma_xi == 2.0 + + @test dy == MicroCanonicalHMC.Minimal_norm +end + +@testset "Partially_refresh_momentum" begin + d = 10 + rng = MersenneTwister(0) + u = MicroCanonicalHMC.Random_unit_vector(rng, d) + @test length(u) == d + @test isapprox(norm(u), 1.0, rtol = 0.0000001) + + p = MicroCanonicalHMC.Partially_refresh_momentum(rng, 0.1, u) + @test length(p) == d + @test isapprox(norm(p), 1.0, rtol = 0.0000001) +end + +@testset "Init" begin + d = 10 + m = zeros(d) + s = Diagonal(ones(d)) + rng = MersenneTwister(1234) + target = GaussianTarget(m, s) + spl = MCHMC(0, 0.001) + _, init = MicroCanonicalHMC.Step(rng, spl, target.h; + trans_init_params = m) + @test init.x == m + @test init.g == m + @test init.dE == init.Feps == 0.0 + @test init.Weps == 1.0e-5 +end + +@testset "Step" begin + d = 10 + m = zeros(d) + s = Diagonal(ones(d)) + rng = MersenneTwister(1234) + target = GaussianTarget(m, s) + spl = MCHMC(0, 0.001; eps = 0.01, L = 0.1, sigma = ones(d)) + aspl = MCHMC(0, 0.001; eps = 0.01, L = 0.1, sigma = ones(d), adaptive = true) + _, init = MicroCanonicalHMC.Step(rng, spl, target.h; + trans_init_params = target.θ_start) + tune_sigma, tune_eps, tune_L = MicroCanonicalHMC.tune_what(spl, d) + tune_sigma, tune_eps, tune_L = MicroCanonicalHMC.tune_what(aspl, d) + @test tune_sigma == tune_eps == tune_L == false + _, step = MicroCanonicalHMC.Step(rng, spl, init) + _, astep = MicroCanonicalHMC.Step(rng, aspl, init) + @test spl.hyperparameters.eps == 0.01 + @test aspl.hyperparameters.eps != 0.01 + @test step.x == astep.x +end