adding iter = options.maxiter before the return solv

also, on the ActiveBox algorithm, i made a change, by scaling <code class="notranslate

ActiveBox fails when reaching maximum number of iterations about nlsolvers.jl HOT 8 CLOSED

longemen3000 commented on August 24, 2024

ActiveBox fails when reaching maximum number of iterations

from nlsolvers.jl.

Comments (8)

longemen3000 commented on August 24, 2024

also, on the ActiveBox algorithm, i made a change, by scaling d. I have yet to check the Bertsekas paper, if I'm grossly not understanding something fundamental, please let me know,because If optimizing were so easy, everybody would be hand rolling their own optimization algorithms

function asolve(prob::OptimizationProblem, x0, scheme::ActiveBox, options::OptimizationOptions)
    t0 = time()

    x0, B0 = x0, (false*x0*x0' +I)
    lower, upper = NLSolvers.bounds(prob)
    ϵbounds = mapreduce(b->(b[2] - b[1])/2, min, zip(lower, upper)) # [1, pp. 100: 5.41]

    !any(clamp.(x0, lower, upper) .!= x0) || error("Initial guess not in the feasible region")

    linesearch = NLSolvers.ArmijoBertsekas()
    mstyle = NLSolvers.OutOfPlace()
    objvars = NLSolvers.prepare_variables(prob, scheme, x0, copy(x0), B0)
    f0, ∇f0 = objvars.fz, norm(objvars.∇fz, Inf) # use user norm
    fz, ∇fz = objvars.fz, objvars.∇fz # use user norm
    fx, ∇fx = fz, copy(∇fz)
    B = B0
    x, z = copy(x0), copy(x0)
    Tf = typeof(fz)
    is_first=false
    Ix = Diagonal(z.*0 .+ 1)
    for iter = 1:options.maxiter
        x = copy(z)
        fx = copy(fz)
        ∇fx = copy(∇fz)
        
        ϵ = min(norm(clamp.(x.-∇fx, lower, upper).-x), ϵbounds) # Kelley 5.41 and just after (83) in [1]
        activeset = NLSolvers.is_ϵ_active.(x, lower, upper, ∇fx, ϵ)
        Hhat = NLSolvers.diagrestrict.(B, activeset, activeset', Ix)
        # Update current gradient and calculate the search direction
        unconstrained_d = -Hhat\∇fx  # solve Bd = -∇fx  #use find_direction here
        d = clamp.(x .+ unconstrained_d, lower, upper).-x #clamp unconstrained direction
        d_scale = minimum(d./unconstrained_d) 
        #scale the direction, the original direction is the same.
        !iszero(d_scale) && (d = unconstrained_d .* d_scale)
        #if the scale is zero, we are actually in a boundary, and we can only grow in one direction.
        
        φ = NLSolvers._lineobjective(mstyle, prob, ∇fz, z, x, d, fz, dot(∇fz, d))
        
        # Perform line search along d
        # Also returns final step vector and update the state
        α, f_α, ls_success = NLSolvers.find_steplength(mstyle, linesearch, φ, Tf(1), ∇fz, activeset, lower, upper, x, d, ∇fx, activeset)

        # # Calculate final step vector and update the state
        if !ls_success
            return NLSolvers.ConvergenceInfo(scheme, (prob=prob, B=B, ρs=norm(x.-z), ρx=norm(x), minimizer=z, fx=fx, minimum=fz, ∇fz=∇fz, f0=f0, ∇f0=∇f0, iter=iter, time=time()-t0), options)
        end
        s = @. α * d
        z = @. x + s
        s = clamp.(z, lower, upper) - x
        z = x + s
        # Update approximation
        fz, ∇fz, B, s, y = NLSolvers.update_obj(prob.objective, s, ∇fx, z, ∇fz, B, Newton(), is_first)
        normx = norm(x.-clamp.(x.-∇fz, lower, upper), Inf)
        if normx < options.g_abstol
            return NLSolvers.ConvergenceInfo(scheme, (prob=prob, B=B, ρs=norm(x.-z), ρx=norm(x), minimizer=z, fx=fx, minimum=fz, ∇fz=∇fz, f0=f0, ∇f0=∇f0, iter=iter, time=time()-t0), options)
        end
    end
    #@show z.-min.(upper, max.(z.-∇fz, lower))
  z, fz, options.maxiter
  iter =  options.maxiter
  return NLSolvers.ConvergenceInfo(scheme, (prob=prob, B=B, ρs=norm(x.-z), ρx=norm(x), minimizer=z, fx=fx, minimum=fz, ∇fz=∇fz, f0=f0, ∇f0=∇f0, iter=iter, time=time()-t0), options)

with that change, the example in optim fminbox converges in 4 iterations. full code to reproduce:

using ForwardDiff, DiffResults,NLSolvers, Optim
function box_optimize(f,x0,lb,ub,options=OptimizationOptions();tolbounds = 1e-12)
    Hres = DiffResults.HessianResult(x0)
    function g(df,x)
        ForwardDiff.gradient!(Hres,f,x)
        df .= DiffResults.gradient(Hres)
        df
    end
    function fg(df,x)
        ForwardDiff.gradient!(Hres,f,x)
        df .= DiffResults.gradient(Hres)
        fx = DiffResults.value(Hres)
        return fx,df
    end

    function fgh(df,d2f,x)
        ForwardDiff.hessian!(Hres,f,x)
        d2f .= DiffResults.hessian(Hres)
        df .= DiffResults.gradient(Hres)
        fx = DiffResults.value(Hres)
        return fx,df,d2f
    end

    function h(d2f,x)
        ForwardDiff.hessian!(Hres,f,x)
        d2f .= DiffResults.hessian(Hres)
        d2f
    end

    scalarobj = ScalarObjective(f=f,
    g=g,
    fg=fg,
    fgh=fgh,
    h=h)

    prob = OptimizationProblem(obj=scalarobj, 
    bounds=(lb,ub); inplace=true)
    asolve(prob, x0, ActiveBox(tolbounds), options)
end

f(x) = (1.0 - x[1])^2 + 100.0 * (x[2] - x[1]^2)^2   
function g!(G, x)
G[1] = -2.0 * (1.0 - x[1]) - 400.0 * (x[2] - x[1]^2) * x[1]
G[2] = 200.0 * (x[2] - x[1]^2)
end
lower = [1.25, -2.1]
upper = [Inf, Inf]
initial_x = [2.0, 2.0]
inner_optimizer = Optim.GradientDescent()

with Optim:

julia> results = optimize(f, g!, lower, upper, initial_x, Fminbox(inner_optimizer))
 * Status: success

 * Candidate solution
    Final objective value:     6.250240e-02

 * Found with
    Algorithm:     Fminbox with Gradient Descent

 * Convergence measures
    |x - x'|               = 0.00e+00 ≤ 0.0e+00
    |x - x'|/|x'|          = 0.00e+00 ≤ 0.0e+00
    |f(x) - f(x')|         = 0.00e+00 ≤ 0.0e+00
    |f(x) - f(x')|/|f(x')| = 0.00e+00 ≤ 0.0e+00
    |g(x)|                 = 3.10e-02 ≰ 1.0e-08

 * Work counters
    Seconds run:   0  (vs limit Inf)
    Iterations:    12
    f(x) calls:    258719
    ∇f(x) calls:   258719


julia> Optim.minimizer(results)
2-element Vector{Float64}:
 1.2500000000000002
 1.5626548683420372

julia> Optim.minimum(results)
0.06250239842033663

NLSolvers, Modified ActiveBox:

julia> res =Sol.box_optimize(f,initial_x,lower,upper)      
Results of minimization

* Algorithm:
  ActiveBox

* Candidate solution:
  Final objective value:    6.25e-02
  Final gradient norm:      5.00e-01
  Final projected gradient norm:  0.00e+00

  Initial objective value:  4.01e+02
  Initial gradient norm:    1.60e+03

* Convergence measures
  |x - x'|              = 7.42e-02 <= 0.00e+00 (false)     
  |x - x'|/|x|          = 3.82e-02 <= 0.00e+00 (false)     
  |f(x) - f(x')|        = 5.50e-01 <= 0.00e+00 (false)     
  |f(x) - f(x')|/|f(x)| = 8.98e-01 <= 0.00e+00 (false)     
  |x - P(x - g(x))|     = 0.00e+00 <= 1.00e-08 (true)      
  |g(x)|                = 5.00e-01 <= 1.00e-08 (false)     
  |g(x)|/|g(x₀)|        = 3.12e-04 <= 0.00e+00 (false)     

  !!! Solution is at the boundary !!!

* Work counters
  Seconds run:   0.00e+00
  Iterations:    4


julia> res.info.minimizer
2-element Vector{Float64}:
 1.25
 1.5625

julia> res.info.minimum
0.0625

from nlsolvers.jl.

pkofod commented on August 24, 2024

also there is a hardcoded B0 at the start of the solve

yes I saw that the other day :)

from nlsolvers.jl.

pkofod commented on August 24, 2024

I don't understand the scaling...

from nlsolvers.jl.

pkofod commented on August 24, 2024

The reason why it doesn't converge is because it ends up in a place where the hessian has a negative eigenvalue, so we'd need to use positivefactorizations or a modified cholesky here. This currently only works for convex functions.

from nlsolvers.jl.

pkofod commented on August 24, 2024

The reason why it doesn't converge is because it ends up in a place where the hessian has a negative eigenvalue, so we'd need to use positivefactorizations or a modified cholesky here. This currently only works for convex functions.

For example, something like this works (converges)

        Hhat = diagrestrict.(B, activeset, activeset', Ix)
        # Update current gradient and calculate the search direction
        HhatChol = cholesky(Positive, Hhat)
        d = clamp.(x.-HhatChol\∇fx, lower, upper).-x # solve Bd = -∇fx  #use find_direction here

though it did take 444 iterations which seems excessive. It takes a long time to find the active set at the solution. Once it's there it converges in 3 iterations

from nlsolvers.jl.

longemen3000 commented on August 24, 2024

this is the geometrical intuition that let me to use that scale.

you can also formulate the scale as d = (1-scale)*d + scale*unconstrained_d. but it worsened the newton iteration. (without applying the cholesky factorization yet)

from nlsolvers.jl.

longemen3000 commented on August 24, 2024

though it did take 444 iterations which seems excessive. It takes a long time to find the active set at the solution. Once it's there it converges in 3 iterations

i tried with 3 strategies:
cholesky with no modification on d: 444 iterations
cholesky, d = (1-scale)d + scale * unconstrained_d :96 iterations
cholesky, !iszero(d_scale) && (d .= unconstrained_d .* d_scale) :4 iterations, same result.

from nlsolvers.jl.

pkofod commented on August 24, 2024

Last night I realized that there's a bug in the line search which is why it went to 444 iterations. I'm fixing it after work today.

from nlsolvers.jl.

ActiveBox fails when reaching maximum number of iterations about nlsolvers.jl HOT 8 CLOSED

Comments (8)

Related Issues (20)

Recommend Projects

React

Vue.js

Typescript

TensorFlow

Django

Laravel

D3

Recommend Topics

javascript

web

server

Machine learning

Visualization

Game

Recommend Org

Facebook

Microsoft

Google

Alibaba

D3

Tencent