function [W1, W5, Wo] = MnistConv(W1, W5, Wo, X, D) % This function trains the network using the back-propagation %algorithm, takes the neural network’s weights and training data and %returns the trained weights. %W1, W5, and Wo are the convolution filter matrix, pooling-to-hidden layer %weight matrix, and hidden-to-output layer weight matrix, respectively. %X and D are the input and correct output from the training data, respectively. alpha = 0.01; beta = 0.95; momentum1 = zeros(size(W1)); momentum5 = zeros(size(W5)); momentumo = zeros(size(Wo)); N = length(D); bsize = 5; %100 %The weights are adjusted N/bsize times at each epoch. blist = 1:bsize:(N-bsize+1); %location of the first training data point to be brought into the minibatch %One epoch loop: for batch = 1:length(blist) dW1 = zeros(size(W1)); dW5 = zeros(size(W5)); dWo = zeros(size(Wo)); %Mini-batch loop: begin = blist(batch); for k = begin:begin+bsize-1 % Forward pass = inference: x = X(:, :, k); % Input y1 = Conv(x, W1); % Convolution y2 = ReLU(y1); % y3 = Pool(y2); % Pool y4 = reshape(y3, [], 1); % v5 = W5*y4; % ReLU, y5 = ReLU(v5); % v = Wo*y5; % Softmax, y = Softmax(v); % %One-hot encoding (here the code converts the numerical correct %output into a 10x1 vector): %d = zeros(10, 1); %d(sub2ind(size(d), D(k), 1)) = 1; d = D(k); %Backpropagation: e = d - y; %Output layer delta = e; e5 = Wo' * delta; % Hidden(ReLU) layer delta5 = (y5 > 0) .* e5; e4 = W5' * delta5; % Pooling layer e3 = reshape(e4, size(y3)); e2 = zeros(size(y2)); W3 = ones(size(y2)) / (2*2); for c = 1:2 %20 e2(:, :, c) = kron(e3(:, :, c), ones([2 2])) .* W3(:, :, c); end delta2 = (y2 > 0) .* e2; % ReLU layer delta1_x = zeros(size(W1)); % Convolutional layer for c = 1:2 %20 delta1_x(:, :, c) = conv2(x(:, :), rot90(delta2(:, :, c), 2),'valid'); end dW1 = dW1 + delta1_x; dW5 = dW5 + delta5*y4'; dWo = dWo + delta *y5'; end %Update weights (bsize weight updates are summed and averaged, and %the weights are adjusted. This process repeats N/bsize times at %each epoch): dW1 = dW1/bsize; dW5 = dW5/bsize; dWo = dWo/bsize; momentum1 = alpha*dW1 + beta*momentum1; W1 = W1 + momentum1; momentum5 = alpha*dW5 + beta*momentum5; W5 = W5 + momentum5; momentumo = alpha*dWo + beta*momentumo; Wo = Wo + momentumo; end end