diff --git a/Project.toml b/Project.toml index e484ab75c..7db077da5 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "Optimization" uuid = "7f7a1694-90dd-40f0-9382-eb1efda571ba" -version = "4.0.3" +version = "4.0.4" [deps] ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b" diff --git a/lib/OptimizationMOI/Project.toml b/lib/OptimizationMOI/Project.toml index 5df31b4a0..6069c73a1 100644 --- a/lib/OptimizationMOI/Project.toml +++ b/lib/OptimizationMOI/Project.toml @@ -1,7 +1,7 @@ name = "OptimizationMOI" uuid = "fd9f6733-72f4-499f-8506-86b2bdd0dea1" authors = ["Vaibhav Dixit and contributors"] -version = "0.5.0" +version = "0.5.1" [deps] LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" diff --git a/lib/OptimizationMOI/src/nlp.jl b/lib/OptimizationMOI/src/nlp.jl index dbfb80089..b745c0aa0 100644 --- a/lib/OptimizationMOI/src/nlp.jl +++ b/lib/OptimizationMOI/src/nlp.jl @@ -375,7 +375,7 @@ function MOI.eval_hessian_lagrangian(evaluator::MOIOptimizationNLPEvaluator{T}, σ, μ) where {T} if evaluator.f.lag_h !== nothing - evaluator.f.lag_h(h, x, σ, μ) + evaluator.f.lag_h(h, x, σ, Vector(μ)) return end if evaluator.f.hess === nothing diff --git a/lib/OptimizationOptimisers/Project.toml b/lib/OptimizationOptimisers/Project.toml index b0e763c2f..f2070226f 100644 --- a/lib/OptimizationOptimisers/Project.toml +++ b/lib/OptimizationOptimisers/Project.toml @@ -1,7 +1,7 @@ name = "OptimizationOptimisers" uuid = "42dfb2eb-d2b4-4451-abcd-913932933ac1" authors = ["Vaibhav Dixit and contributors"] -version = "0.3.3" +version = "0.3.4" [deps] Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2" diff --git a/lib/OptimizationOptimisers/src/OptimizationOptimisers.jl b/lib/OptimizationOptimisers/src/OptimizationOptimisers.jl index 99743d24d..da2299e01 100644 --- a/lib/OptimizationOptimisers/src/OptimizationOptimisers.jl +++ b/lib/OptimizationOptimisers/src/OptimizationOptimisers.jl @@ -42,27 +42,27 @@ function SciMLBase.__solve(cache::OptimizationCache{ P, C } - maxiters = if cache.solver_args.epochs === nothing + if OptimizationBase.isa_dataiterator(cache.p) + data = cache.p + dataiterate = true + else + data = [cache.p] + dataiterate = false + end + + epochs = if cache.solver_args.epochs === nothing if cache.solver_args.maxiters === nothing - throw(ArgumentError("The number of epochs must be specified with either the epochs or maxiters kwarg.")) + throw(ArgumentError("The number of iterations must be specified with either the epochs or maxiters kwarg. Where maxiters = epochs*length(data).")) else - cache.solver_args.maxiters + cache.solver_args.maxiters / length(data) end else cache.solver_args.epochs end - maxiters = Optimization._check_and_convert_maxiters(maxiters) - if maxiters === nothing - throw(ArgumentError("The number of epochs must be specified as the epochs or maxiters kwarg.")) - end - - if OptimizationBase.isa_dataiterator(cache.p) - data = cache.p - dataiterate = true - else - data = [cache.p] - dataiterate = false + epochs = Optimization._check_and_convert_maxiters(epochs) + if epochs === nothing + throw(ArgumentError("The number of iterations must be specified with either the epochs or maxiters kwarg. Where maxiters = epochs*length(data).")) end opt = cache.opt @@ -75,21 +75,39 @@ function SciMLBase.__solve(cache::OptimizationCache{ min_θ = cache.u0 state = Optimisers.setup(opt, θ) - + iterations = 0 + fevals = 0 + gevals = 0 t0 = time() + breakall = false Optimization.@withprogress cache.progress name="Training" begin - for epoch in 1:maxiters + for epoch in 1:epochs + if breakall + break + end for (i, d) in enumerate(data) if cache.f.fg !== nothing && dataiterate x = cache.f.fg(G, θ, d) + iterations += 1 + fevals += 1 + gevals += 1 elseif dataiterate cache.f.grad(G, θ, d) x = cache.f(θ, d) + iterations += 1 + fevals += 2 + gevals += 1 elseif cache.f.fg !== nothing x = cache.f.fg(G, θ) + iterations += 1 + fevals += 1 + gevals += 1 else cache.f.grad(G, θ) x = cache.f(θ) + iterations += 1 + fevals += 2 + gevals += 1 end opt_state = Optimization.OptimizationState( iter = i + (epoch - 1) * length(data), @@ -97,10 +115,10 @@ function SciMLBase.__solve(cache::OptimizationCache{ objective = x[1], grad = G, original = state) - cb_call = cache.callback(opt_state, x...) - if !(cb_call isa Bool) + breakall = cache.callback(opt_state, x...) + if !(breakall isa Bool) error("The callback should return a boolean `halt` for whether to stop the optimization process. Please see the `solve` documentation for information.") - elseif cb_call + elseif breakall break end msg = @sprintf("loss: %.3g", first(x)[1]) @@ -112,7 +130,7 @@ function SciMLBase.__solve(cache::OptimizationCache{ min_err = x min_θ = copy(θ) end - if i == maxiters #Last iter, revert to best. + if i == length(data)*epochs #Last iter, revert to best. opt = min_opt x = min_err θ = min_θ @@ -122,7 +140,7 @@ function SciMLBase.__solve(cache::OptimizationCache{ objective = x[1], grad = G, original = state) - cache.callback(opt_state, x...) + breakall = cache.callback(opt_state, x...) break end end @@ -132,10 +150,9 @@ function SciMLBase.__solve(cache::OptimizationCache{ end t1 = time() - stats = Optimization.OptimizationStats(; iterations = maxiters, - time = t1 - t0, fevals = maxiters, gevals = maxiters) + stats = Optimization.OptimizationStats(; iterations, + time = t1 - t0, fevals, gevals) SciMLBase.build_solution(cache, cache.opt, θ, first(x)[1], stats = stats) - # here should be build_solution to create the output message end end diff --git a/lib/OptimizationOptimisers/test/runtests.jl b/lib/OptimizationOptimisers/test/runtests.jl index 12b6f2754..953db8960 100644 --- a/lib/OptimizationOptimisers/test/runtests.jl +++ b/lib/OptimizationOptimisers/test/runtests.jl @@ -27,6 +27,9 @@ using Zygote sol = solve(prob, Optimisers.Adam(), maxiters = 1000) @test 10 * sol.objective < l1 + @test sol.stats.iterations == 1000 + @test sol.stats.fevals == 1000 + @test sol.stats.gevals == 1000 @testset "cache" begin objective(x, p) = (p[1] - x[1])^2 @@ -73,7 +76,7 @@ end using Optimization, OptimizationOptimisers, Lux, Zygote, MLUtils, Random, ComponentArrays - x = rand(10000) + x = rand(Float32, 10000) y = sin.(x) data = MLUtils.DataLoader((x, y), batchsize = 100) @@ -96,7 +99,14 @@ end optf = OptimizationFunction(loss, AutoZygote()) prob = OptimizationProblem(optf, ps_ca, data) - res = Optimization.solve(prob, Optimisers.Adam(), callback = callback, epochs = 10000) + res = Optimization.solve(prob, Optimisers.Adam(), epochs = 50) + + @test res.objective < 1e-4 + @test res.stats.iterations == 50*length(data) + @test res.stats.fevals == 50*length(data) + @test res.stats.gevals == 50*length(data) + + res = Optimization.solve(prob, Optimisers.Adam(), callback = callback, epochs = 100) @test res.objective < 1e-4 diff --git a/src/sophia.jl b/src/sophia.jl index b63f0c099..9f4d973e9 100644 --- a/src/sophia.jl +++ b/src/sophia.jl @@ -88,7 +88,8 @@ function SciMLBase.__solve(cache::OptimizationCache{ cache.f.grad(gₜ, θ) x = cache.f(θ) end - opt_state = Optimization.OptimizationState(; iter = i + (epoch - 1) * length(data), + opt_state = Optimization.OptimizationState(; + iter = i + (epoch - 1) * length(data), u = θ, objective = first(x), grad = gₜ,