Going Back-to-Basics – Convolutional Neural Network from Scratch


Going Back-to-Basics – Convolutional Neural Network from Scratch

import numpy as np
import h5py
import time
import copy
from random import randint
import argparse
#load MNIST data
MNIST_data = h5py.File('MNISTdata.hdf5', 'r')
x_train = np.float32(MNIST_data['x_train'][:] )
y_train = np.int32(np.array(MNIST_data['y_train'][:,0]))
x_test = np.float32( MNIST_data['x_test'][:] )
y_test = np.int32( np.array( MNIST_data['y_test'][:,0] ) )
#reshaping data
MNIST_data.close() 
#Implementation of stochastic gradient descent algorithm
#number of inputs
#--lr=0.01 --epochs=20 --k_num=30 --k_size=7 --ps 3500
LR = 0.01
num_epochs = 20
num_inputs = 28*28
#number of outputs
num_outputs = 10
#hidden layers
model = {}
image_lw = 28
#number of channels
NUM_CHANNELS = 5
#kernel size
kx, ky = 5,5
# Assume square kernel
out_dim_size = image_lw - kx + 1
#training a on a partial set
ps = 3500
#Xavier initalization of Weight, Convlutional Stack and Bias
model['W'] = np.random.randn(num_outputs,out_dim_size,out_dim_size,NUM_CHANNELS)/ np.sqrt(image_lw**2)
model['K'] =  np.random.randn(kx,ky,NUM_CHANNELS) / np.sqrt(kx*ky)
model['b1'] = np.random.randn(num_outputs) / np.sqrt(num_outputs)
#model gradients
model_grads = copy.deepcopy(model) 
#softmax function
def softmax_function(z):
    ZZ = np.exp(z)/np.sum(np.exp(z))
    return ZZ

#ReLU function
def relu(z):
    return np.maximum(z,0)

#ReLU derivative
def relu_p(z):
    z[z >= 0] = 1
    z[z < 0] = 0
    return z
def im2col(x,outdx,outdy,kern_x, kern_y):
    """
    input: x image dim 28*28 minst()
    output: (kx*ky, 28-kx+1, 28 - kx +1 )
    """
    x_out = np.zeros((kern_x*kern_y,outdx*outdy))
    x_out_col = 0
    for i in range(outdx):
        for j in range(outdy):
            x_out[:,x_out_col] = x[i:i+kern_x,j:j+kern_y].flatten()
            x_out_col += 1
    return x_out

def kernal2mat(kern,kern_x, kern_y, kern_d):
    """
    input: kernel kx X ky dim
    output: #number of channels, kx * ky 
    """
    kern_out = np.zeros((kern_d, kern_x*kern_y))
    for i in range(kern_d):
        kern_out[i] = kern[:,:,i].T.flatten()
    return kern_out
#Convolution operation
def convolve(x,kern):
    """
    #x is the input image
    output: (w,h,channels)
    """
    kern_x, kern_y, kern_d = kern.shape
    outdx, outdy = x.shape[1] - kern_x +1, x.shape[0] - kern_y +1
    x_mat = im2col(x,outdx, outdy, kern_x, kern_y)
    k_matrix = kernal2mat(kern,kern_x, kern_y,kern_d)
    z = np.dot(k_matrix,x_mat)
    return np.reshape(z.T,(outdx, outdy, kern_d))
#forward pass
def forward(x,y, model):
    #im2col method
    Z = convolve(x,model["K"])
    H = relu(Z)
    U = np.zeros(num_outputs)
    for i in range(NUM_CHANNELS):
        U += np.sum(
            np.multiply(model["W"][:,:,:,i],H[:,:,i]), axis=(1,2))\
        +model["b1"]
    F = softmax_function(U)
    return F, H, U, Z
#backward pass
def backward(x,y, model, F, H, U, Z):
    indicator = np.zeros(num_outputs)
    indicator[y] = 1
    drdu = - (indicator - F)
    drdb = drdu
    
    drdw = np.zeros((num_outputs,out_dim_size,out_dim_size,NUM_CHANNELS))
    delta = np.zeros((out_dim_size,out_dim_size,NUM_CHANNELS))
    for i in range(num_outputs):
        drdw[i,:,:,:] = drdu[i]*H
        delta += drdu[i]*model["W"][i]
    
    drdk = convolve(x,np.multiply(relu_p(Z),delta))
    model_grads["W"]  = drdw
    model_grads["b1"] = drdb
    model_grads["K"]  = drdk
    return model_grads

#Training Loop
print("!"*40)
print("!"*15 + " TRAINING " + "!"*15)
print("!"*40)

import time
time1 = time.time()

x_train_p = x_train[0:ps]
y_train_p = y_train[0:ps]

for epochs in range(num_epochs):
    total_correct = 0
    losses = np.zeros(len(x_train_p))
    for n in range(len(x_train_p)):
        n_random = randint(0,len(x_train_p)-1)
        y = y_train_p[n_random]
        x = x_train_p[n_random][:]
        x = np.reshape(x,(-1,28))
        #forward
        F, H, U, Z = forward(x, y, model)
        #backwards
        model_grads = backward(x,y, model, F, H, U, Z)
        #update grads
        model["K"] -= LR*model_grads["K"]
        model["W"] -= LR*model_grads["W"]
        model["b1"] -= LR*model_grads["b1"]
        
        prediction = np.argmax(F)
        if (prediction == y):
            total_correct += 1
            
            
    print("Epoch: ", epochs, "Training:", total_correct/np.float(len(x_train_p)) )
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!!!!!!!!!!!!!!! TRAINING !!!!!!!!!!!!!!!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
Epoch:  0 Training: 0.8322857142857143
Epoch:  1 Training: 0.908
Epoch:  2 Training: 0.8925714285714286
Epoch:  3 Training: 0.8271428571428572
Epoch:  4 Training: 0.8777142857142857
Epoch:  5 Training: 0.904
Epoch:  6 Training: 0.9274285714285714
Epoch:  7 Training: 0.926
Epoch:  8 Training: 0.9448571428571428
Epoch:  9 Training: 0.9417142857142857
Epoch:  10 Training: 0.95
Epoch:  11 Training: 0.956
Epoch:  12 Training: 0.9625714285714285
Epoch:  13 Training: 0.9634285714285714
Epoch:  14 Training: 0.9548571428571428
Epoch:  15 Training: 0.9562857142857143
Epoch:  16 Training: 0.964
Epoch:  17 Training: 0.9711428571428572
Epoch:  18 Training: 0.9737142857142858
Epoch:  19 Training: 0.9562857142857143
Epoch:  20 Training: 0.97
Epoch:  21 Training: 0.9702857142857143
Epoch:  22 Training: 0.9728571428571429
Epoch:  23 Training: 0.9814285714285714
Epoch:  24 Training: 0.9777142857142858
Epoch:  25 Training: 0.9691428571428572
Epoch:  26 Training: 0.9525714285714286
Epoch:  27 Training: 0.9634285714285714
Epoch:  28 Training: 0.9582857142857143
Epoch:  29 Training: 0.9842857142857143
Epoch:  30 Training: 0.9911428571428571
Epoch:  31 Training: 0.9885714285714285
Epoch:  32 Training: 0.9954285714285714
Epoch:  33 Training: 0.9954285714285714
Epoch:  34 Training: 0.9985714285714286
Epoch:  35 Training: 0.9988571428571429
Epoch:  36 Training: 0.9937142857142857
Epoch:  37 Training: 0.986
Epoch:  38 Training: 0.9934285714285714
Epoch:  39 Training: 0.992
Epoch:  40 Training: 0.9908571428571429
Epoch:  41 Training: 0.9834285714285714
Epoch:  42 Training: 0.9488571428571428
Epoch:  43 Training: 0.9691428571428572
Epoch:  44 Training: 0.9517142857142857
Epoch:  45 Training: 0.9714285714285714
Epoch:  46 Training: 0.9734285714285714
Epoch:  47 Training: 0.972
Epoch:  48 Training: 0.9605714285714285
Epoch:  49 Training: 0.9662857142857143
#Testing the super overfit model 
#84% accuracy 
print("~"*40)
print("~"*16 + " TESTING " + "~"*16)
print("~"*40)



total_correct = 0
for n in range(len(x_test)):
    y = y_test[n]
    x = x_test[n][:]
    x = np.reshape(x,(-1,28))
    p, _, _, _ = forward(x, y, model)
    prediction = np.argmax(p)
    if (prediction == y):
        total_correct += 1
print("Testing:", total_correct/np.float(len(x_test) ) )

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
~~~~~~~~~~~~~~~~ TESTING ~~~~~~~~~~~~~~~~
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Testing: 0.8459

Updated: