In [1]:
import numpy as np
In [2]:
def newton_gll_step( f,
                     df,
                     ddf,
                     x,
                     l,
                     beta = 0.1,
                     sigma = 0.1,
                     eps = 1e-10,
                     return_niter = False,
                     maxiter = 100000,
                     kappa = 1e-2,
                     gamma = 1e-2,
                     M = 0
                    ):
    
    grad = df( x )
    f_x = f( x )
    
    val_list = [ f_x ]
    val_list = val_list + [ float( "-Inf" ) ] * M
    val_list_idx = 0
    
    grad_sq = np.sum( grad ** 2.0 )
    niter = 0
    nfeval = 1
    while ( grad_sq ** 0.5 > eps ) and ( niter < maxiter ):
        
        l_k = l
        try:
            d_k = -np.linalg.solve( ddf( x ), df( x ) )
        except np.linalg.LinAlgError:
            d_k = -grad
        nrm_d_k = np.linalg.norm( d_k )
        if d_k @ grad >= -gamma * ( grad_sq ** 0.5 ) * nrm_d_k:
            d_k = -grad
        if nrm_d_k <= kappa * ( grad_sq ** 0.5 ):
            d_k = -grad
        
        desc = sigma * np.sum( d_k * grad )
        
        x_next = x + l_k * d_k
        f_x_next = f( x_next )
        nfeval = nfeval + 1
        mx = max( val_list )
        while f_x_next >= mx + l_k * desc:
            l_k = l_k * beta
            x_next = x + l_k * d_k
            f_x_next = f( x_next )
            nfeval = nfeval + 1
        
        niter = niter + 1

        x = x_next
        grad = df( x )
        grad_sq = np.sum( grad ** 2.0 )
        f_x = f_x_next
        
        val_list_idx = ( val_list_idx + 1 ) % ( M + 1 )
        val_list[ val_list_idx ] = f_x
    
    if return_niter :
        return ( x, niter, nfeval )
    else:
        return x
In [3]:
l = [ 1, 2, 3, 4, 5 ]
l_idx = 0
val = 6
In [4]:
l[ l_idx ] = val
l_idx = ( l_idx + 1 ) % 5
val = val + 1
print( l )
[6, 2, 3, 4, 5]
In [5]:
val_list = [ 11.1 ] + [ float( "-Inf" ) ] * 0
print( val_list )
print( max( val_list ) )
[11.1]
11.1
In [6]:
def rosenbrock( x, a = 1, b = 100 ):
    
    return ( a - x[ 0 ] ) ** 2.0 + b * ( x[ 1 ] - x[ 0 ] ** 2.0 ) ** 2.0

def rosenbrock_grad( x, a = 1, b = 100 ):
    
    retval = np.empty( 2 )
    
    retval[ 0 ] = 2.0 * ( x[ 0 ] - a ) - 4.0 * x[ 0 ] * b * ( x[ 1 ] - x[ 0 ] ** 2.0 )
    retval[ 1 ] = 2.0 * b * ( x[ 1 ] - x[ 0 ] ** 2.0 )
    
    return retval

def rosenbrock_hess( x, a = 1, b = 100 ):
    
    retval = np.empty( ( 2, 2 ) )
    
    retval[ 0, 0 ] = 2.0 * x[ 0 ] - 4.0 * b * ( x[ 1 ] - x[ 0 ] ** 2.0 ) + 8.0 * b * x[ 0 ]
    retval[ 0, 1 ] = -4.0 * x[ 0 ] * b
    retval[ 1, 0 ] = -4.0 * b * x[ 0 ]
    retval[ 1, 1 ] = 2.0 * b
    
    return retval
In [7]:
x_0 = np.array( [ 1.0, 1.0 ] ) + np.random.randn( 2 ) * 3.0
print( x_0 )
x, niter, nfeval = newton_gll_step( 
                                    rosenbrock,
                                    rosenbrock_grad,
                                    rosenbrock_hess,
                                    x_0,
                                    1.0,
                                    return_niter = True,
                                    sigma = 1e-5,
                                    beta = 0.5,
                                    kappa = 1e-6,
                                    gamma = 1e-5
                                   )

print( niter + nfeval )
print( niter )
print( nfeval / niter )
print( x )
[0.69203435 6.17892732]
229
114
1.0087719298245614
[1. 1.]
In [8]:
print( x_0 )
x, niter, nfeval = newton_gll_step( 
                                    rosenbrock,
                                    rosenbrock_grad,
                                    rosenbrock_hess,
                                    x_0,
                                    1.0,
                                    return_niter = True,
                                    sigma = 1e-5,
                                    beta = 0.5,
                                    kappa = 1e-6,
                                    gamma = 1e-5,
                                    M = 10
                                   )

print( niter + nfeval )
print( niter )
print( nfeval / niter )
print( x )
[0.69203435 6.17892732]
229
114
1.0087719298245614
[1. 1.]
In [9]:
def newton_zh_step( f,
                    df,
                    ddf,
                    x,
                    l,
                    beta = 0.1,
                    sigma = 0.1,
                    eps = 1e-10,
                    return_niter = False,
                    maxiter = 100000,
                    kappa = 1e-2,
                    gamma = 1e-2,
                    eta = 0
                   ):
    
    grad = df( x )
    f_x = f( x )

    C = f_x
    Q = 1
    
    grad_sq = np.sum( grad ** 2.0 )
    niter = 0
    nfeval = 1
    while ( grad_sq ** 0.5 > eps ) and ( niter < maxiter ):
        
        l_k = l
        try:
            d_k = -np.linalg.solve( ddf( x ), df( x ) )
        except np.linalg.LinAlgError:
            d_k = -grad
        nrm_d_k = np.linalg.norm( d_k )
        if d_k @ grad >= -gamma * ( grad_sq ** 0.5 ) * nrm_d_k:
            d_k = -grad
        if nrm_d_k <= kappa * ( grad_sq ** 0.5 ):
            d_k = -grad
        
        desc = sigma * np.sum( d_k * grad )
        
        x_next = x + l_k * d_k
        f_x_next = f( x_next )
        nfeval = nfeval + 1

        while f_x_next >= C + l_k * desc:
            l_k = l_k * beta
            x_next = x + l_k * d_k
            f_x_next = f( x_next )
            nfeval = nfeval + 1
        
        niter = niter + 1

        x = x_next
        grad = df( x )
        grad_sq = np.sum( grad ** 2.0 )
        f_x = f_x_next

        Q_next = eta * Q + 1
        C = ( eta * Q * C + f_x ) / Q_next
        Q = Q_next
    
    if return_niter :
        return ( x, niter, nfeval )
    else:
        return x
In [10]:
print( x_0 )
x, niter, nfeval = newton_zh_step( 
                                   rosenbrock,
                                   rosenbrock_grad,
                                   rosenbrock_hess,
                                   x_0,
                                   1.0,
                                   return_niter = True,
                                   sigma = 1e-4,
                                   beta = 0.5,
                                   kappa = 1e-6,
                                   gamma = 1e-5,
                                   eta = 0.85
                                  )

print( niter + nfeval )
print( niter )
print( nfeval / niter )
print( x )
[0.69203435 6.17892732]
229
114
1.0087719298245614
[1. 1.]