clear all
close all
clc

rng(3);

%%
% The input set X consists of 4 possible inputs.
%0: does not like vacation period; 1: like vacation period.
X = [1 0 1 0;
     0 1 0 1;
     0 1 1 1;
     1 0 0 0];
          
% Desired outputs mapped via one-hot encoding (or 1-of-N encoding):
D = [1 0 0 0;     %S1
     0 1 0 0;     %S2
     0 0 1 0;     %S3
     0 0 0 1];    %S4

% Weights initialization:
W1 = 2*rand(20, 4) - 1;  %(hidden neurons) x (inputs)
W2 = 2*rand( 4, 20) - 1; %(outputs) x (hidden neurons)


%% Training process:
max_epoch = 1000;
eqm = zeros(1,max_epoch); %erro quadrático médio
epsilon = 1e-6; %precisão

for epoch = 1:max_epoch
    
    alpha = 0.5; %0.6; learning rate
    
    N = size(X,2); %number of samples
    sum_eq = 0;    %sum of square errors
    
    %%%
    %Treinamento:
    for k = 1:N
        x = reshape(X(k,:), 4, 1);  %reorganizes the k-th sample into a 25x1 vector
        d = D(k, :)';   %desired output
        
        v1 = W1*x;
        y1 = Sigmoid(v1);
        v = W2*y1;
        y = Softmax(v);
        
        e = d - y;

        delta = e;
        sum_eq = sum_eq + sum(e.^2);
        
        
        e1 = W2'*delta;
        delta1 = y1.*(1-y1).*e1;
        
        dW1 = alpha*delta1*x';
        W1 = W1 + dW1;
        
        dW2 = alpha*delta*y1';
        W2 = W2 + dW2;
    end
    %%%
    
    eqm(epoch) = sum_eq/N;
    
    %Critério de parada: convergência do treinamento:
%     if epoch > 1 && abs(eqm(epoch)-eqm(epoch-1)) < epsilon
%         break;        
%     end
    
end

disp(['Número de épocas: ', num2str(epoch), '.']);


%% Inference:
 N = size(X,2);
 y = zeros(N,size(D,2));
 for k = 1:N
     x = reshape(X(k,:), 4, 1);
     v1 = W1*x;
     y1 = Sigmoid(v1);
     v = W2*y1;
     y(k,:) = Softmax(v);   
 end

%% Results:
disp('Results:');
disp('   [desired]:');
disp(D);
disp('   [network_output]:');
disp(y)
disp('   [weights_input_to_hidden_layer]:');
disp(W1)
disp('   [weights_hidden_layer_to_output]:');
disp(W2)

figure
plot(1:epoch, eqm(1:epoch), 'r', 'LineWidth', 1.5)
xlabel('Epoch')
ylabel('Average of Training error')
legend('Sum of Squared Error')