Algoritmos Incrementais de Primeira Ordem

Suponhamos que o nosso problema de minimização tenha a forma $\def\vect{\boldsymbol}$ $$ \min \quad \sum_{i = 1}^m f_i( \vect x ) =: f( \vect x ) $$

  1. Para $k = 0, 1, \dots$

    1. $\vect x^{k, 0} \leftarrow \vect x^{k}$

    2. Para $i = 1, 2, \dots, m$

      1. $\vect x^{k, i} \leftarrow \vect x^{k, i - 1} - \lambda_k \nabla f_i( \vect x^{k, i - 1} )$
    3. $\vect x^{k + 1} \leftarrow \vect x^{k, m}$

Basicamente, sob algumas hipóteses técnicas, podemos esperar convergência se $$ \lambda_k \to 0^+\quad\text e\quad \sum_{i = 0}^\infty \lambda_k = \infty. $$

$$ \min \| A\vect x -\vect b \|^2 = \sum_{i = 1}^m\bigl( (A\vect x)_i - b_i \bigr)^2 = \sum_{i = 1}^M \sum_{j \in \mathcal S_i} \bigl( (A\vect x)_j - b_j \bigr)^2, $$ onde $\mathcal S_i \cap \mathcal S_j = \emptyset$ se $i \neq j$ e $\bigcup_{i = 1}^M\mathcal S_i = \{1, 2, \dots, m\}$.

Ou seja, $$ f( \vect x ) = \sum_{i = 1}^M f_i( \vect x ) $$ onde $$ f( \vect x ) = \| A\vect x -\vect b \|^2\quad \text e \quad f_i( \vect x ) = \sum_{j \in \mathcal S_i} \bigl( (A\vect x)_j - b_j \bigr)^2. $$

In [1]:
import numpy as np
In [2]:
m = 10000
n = int( m / 2 )

A = np.random.normal( size = ( m, n ) )
b = np.random.normal( size = ( m, ) )

x_ls = np.linalg.solve( A.T @ A, A.T @ b )
In [3]:
print( np.sum( ( A @ x_ls - b ) ** 2 ) )
5161.0483820224845
In [4]:
M = 10

# Cria os conjuntos S_i de índices:
Ss10 = []
curr_i = 0

for i in range( M ):
    
    S = []
    n_idx = int( m / M ) + ( i < ( m % M ) )
    
    for j in range( n_idx ):
        S.append( curr_i + j )
    
    curr_i += n_idx
    
    Ss10.append( S )
In [5]:
M = 100

# Cria os conjuntos S_i de índices:
Ss20 = []
curr_i = 0

for i in range( M ):
    
    S = []
    n_idx = int( m / M ) + ( i < ( m % M ) )
    
    for j in range( n_idx ):
        S.append( curr_i + j )
    
    curr_i += n_idx
    
    Ss20.append( S )
In [6]:
def incremental( Ss, x, step, niter ):
    
    iters = [ x.copy() ]
    
    for k in range( niter ):
                
        x_new = iters[ -1 ].copy()
        for S in Ss:
            
            grad = 2.0 * A[ S, : ].T @ ( A[ S, : ] @ x_new - b[ S ] )
            x_new -= step( k ) * grad
            
        iters.append( x_new.copy() )
        
    return iters

$$ \Large \lambda_k = \frac{\lambda_0}{( k + 1 )^{0.6}} $$

In [7]:
def step10( k ):
    
    return 5.0e-5 / ( ( k + 1 ) ** 0.6 )

def step20( k ):
    
    return 7.5e-5 / ( ( k + 1 ) ** 0.6 )
    
#iters10 = incremental( Ss10, np.zeros( ( n, ) ), step10, 20 )
iters20 = incremental( Ss20, np.zeros( ( n, ) ), step20, 20 )
In [8]:
import matplotlib.pyplot as pp
In [9]:
def F( x ):
    return np.sum( ( A @ x - b )  ** 2.0 )
In [10]:
# vals10 = []
# for i in range( len( iters10 ) ):
#     vals10.append( F( iters10[ i ] ) )
vals20 = []
for i in range( len( iters20 ) ):
    vals20.append( F( iters20[ i ] ) )
In [11]:
# pp.plot( vals10 )
pp.plot( vals20 )
pp.show()
In [12]:
def step10( k ):
    
    return 5.0e-5 / ( ( k + 1 ) ** 0.6 )

def step20( k ):
    
    return 5.0e-5 / ( ( k + 1 ) ** 0.6 )
    
iters = incremental( Ss20, np.zeros( ( n, ) ), lambda k : 7.5e-5, 1 )
print( F( np.zeros( ( n, ) ) ) )
print( F( iters[ -1 ] ) )
10300.396820001311
8089.015379581
In [13]:
import ray_tracing_cuda_float
In [14]:
radon, radon_t = ray_tracing_cuda_float.make_radon_transp( ( 1024, 1024 ) )
In [15]:
img = np.zeros( ( 1024, 1024 ) )
img[ 312 : 763, 456 : 975 ] += 1.0
img[ 120 : 450, 253 : 800 ] += np.pi

pp.imshow( img )
pp.show()
In [16]:
dado = radon( img )

pp.imshow( dado )
pp.show()
In [17]:
teste = radon_t( dado )

pp.imshow( teste )
pp.show()

$$\Large \min\quad \| A\vect x - \vect b \|^2 = \sum_{i = 1}^m \bigl( ( A\vect x )_i - b_i \bigr)^2 $$

In [18]:
def fobj( x ):
    
    return 0.5 * np.sum( ( radon( x ) - dado ) ** 2.0 )

def grad( x ):
    
    return radon_t( radon( x ) - dado )
In [19]:
def incremental_img( grads, x, step, niter ):
    
    iters = [ x.copy() ]
    
    for k in range( niter ):
                
        x_new = iters[ -1 ].copy()
        for grad in grads:
            
            x_new -= step( k ) * grad( x_new )
            
        iters.append( x_new.copy() )
        
    return iters
In [20]:
iters = incremental_img( 
        [ grad ],
        np.zeros( ( 1024, 1024 ) ),
        lambda k : 5.0e-1,
        500
    )
In [21]:
pp.figure( figsize = ( 10, 10 ) )
pp.imshow(
    iters[ -1 ]
)
pp.colorbar()
pp.show()
In [22]:
def incremental_img( grads, M, x, step, niter ):
    
    iters = [ x.copy() ]
    
    for k in range( niter ):
                
        x_new = iters[ -1 ].copy()
        for i in range( M ):
            
            x_new -= step( k ) * grads( x_new, i )
            
        iters.append( x_new.copy() )
        
    return iters
In [23]:
M = 20

data_subs = []
data_transforms = []
grads = []
curr_idx = 0

for i in range( M ):

    n_idx = int( dado.shape[ 1 ] / M ) + ( dado.shape[ 1 ] % M > i )
    curr_sub = dado[ :, curr_idx : curr_idx + n_idx ]
    
    data_subs.append( curr_sub )

    data_transforms.append(
        ray_tracing_cuda_float.make_radon_transp(
            ( 1024, n_idx ),
            sino_top_left = ( 
                curr_idx * np.pi / ( 1024.0 - 1.0 ),
                1.0 - 1.0 / 1024.0
            ),
            sino_bottom_right = ( 
                ( curr_idx + n_idx - 1 ) * np.pi / ( 1024.0 - 1.0 ),
                -1.0 + 1.0 / 1024.0
            )
        )   
    )
    
    curr_idx += n_idx

def grads( x, i ) :
    tmp = data_transforms[ i ][ 0 ]( x ) - data_subs[ i ]
    return data_transforms[ i ][ 1 ]( tmp )
In [24]:
iters = incremental_img( 
        grads,
        M,
        np.zeros( ( 1024, 1024 ) ),
        lambda k : 2.0e-0,
        1
    )
In [25]:
pp.figure( figsize = ( 10, 10 ) )
pp.imshow(
    iters[ -1 ]
)
pp.colorbar()
pp.show()
In [26]:
def reconst( niter, M, step, plot = False ):
    
    data_subs = []
    data_transforms = []
    grads = []
    curr_idx = 0

    for i in range( M ):

        n_idx = int( dado.shape[ 1 ] / M ) + ( dado.shape[ 1 ] % M > i )
        curr_sub = dado[ :, curr_idx : curr_idx + n_idx ]

        data_subs.append( curr_sub )

        data_transforms.append(
            ray_tracing_cuda_float.make_radon_transp(
                ( 1024, n_idx ),
                sino_top_left = ( 
                    curr_idx * np.pi / ( 1024.0 - 1.0 ),
                    1.0 - 1.0 / 1024.0
                ),
                sino_bottom_right = ( 
                    ( curr_idx + n_idx - 1 ) * np.pi / ( 1024.0 - 1.0 ),
                    -1.0 + 1.0 / 1024.0
                )
            )   
        )

        curr_idx += n_idx

    def grads( x, i ) :
        tmp = data_transforms[ i ][ 0 ]( x ) - data_subs[ i ]
        return data_transforms[ i ][ 1 ]( tmp )
    
    iters = incremental_img( 
        grads,
        M,
        np.zeros( ( 1024, 1024 ) ),
        lambda k : step,
        niter
    )
    
    
    if plot:
        pp.figure( figsize = ( 10, 10 ) )
        pp.imshow(
            iters[ -1 ]
        )
        pp.colorbar()
        pp.show()
    
    return( iters )
In [27]:
print( np.linalg.norm( reconst( 5, 1, 0.25e-0, plot = True )[ -1 ] - img ) )
555.227306970999
In [28]:
print( np.linalg.norm( reconst( 5, 100, 10e-0, plot = True )[ -1 ] - img ) )
211.33407506183818