Going Back-to-Basics – Logistic Regression solved using SGD from Scratch
Going Back-to-Basics – Logistic Regression solved using SGD from Scratch
import numpy as np
import h5py
import time
import copy
from random import randint
import argparse
#load MNIST data
MNIST_data = h5py.File('MNISTdata.hdf5', 'r')
x_train = np.float32(MNIST_data['x_train'][:] )
y_train = np.int32(np.array(MNIST_data['y_train'][:,0]))
x_test = np.float32( MNIST_data['x_test'][:] )
y_test = np.int32( np.array( MNIST_data['y_test'][:,0] ) )
#reshaping data
MNIST_data.close()
#Implementation of stochastic gradient descent algorithm
#number of inputs
#--lr=0.01 --epochs=20 --k_num=30 --k_size=7 --ps 3500
#Implementation of stochastic gradient descent algorithm
#number of inputs
num_inputs = 28*28
#number of outputs
num_outputs = 10
model = {}
#Xavier initalization of Weight, Convlutional Stack and Bias
model['W1'] = np.random.randn(num_outputs,num_inputs) / np.sqrt(num_inputs)
#model gradients
model_grads = copy.deepcopy(model)
#softmax function
def softmax_function(z):
ZZ = np.exp(z)/np.sum(np.exp(z))
return ZZ
#Forward Pass
def forward(x,y, model):
Z = np.dot(model['W1'], x)
p = softmax_function(Z)
return p
#Backwards pass
#note derivates of loss function taken ahead of time
def backward(x,y,p, model, model_grads):
dZ = -1.0*p
dZ[y] = dZ[y] + 1.0
for i in range(num_outputs):
model_grads['W1'][i,:] = dZ[i]*x
return model_grads
#Training Loop
print("!"*40)
print("!"*15 + " TRAINING " + "!"*15)
print("!"*40)
import time
time1 = time.time()
LR = .01
num_epochs = 20
for epochs in range(num_epochs):
#Learning rate schedule
if (epochs > 5):
LR = 0.001
if (epochs > 10):
LR = 0.0001
if (epochs > 15):
LR = 0.00001
total_correct = 0
for n in range( len(x_train)):
n_random = randint(0,len(x_train)-1 )
y = y_train[n_random]
x = x_train[n_random][:]
p = forward(x, y, model)
prediction = np.argmax(p)
if (prediction == y):
total_correct += 1
model_grads = backward(x,y,p, model, model_grads)
model['W1'] = model['W1'] + LR*model_grads['W1']
print(total_correct/np.float(len(x_train) ) )
time2 = time.time()
print(time2-time1, "seconds")
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!!!!!!!!!!!!!!! TRAINING !!!!!!!!!!!!!!!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
0.8959333333333334
0.9126833333333333
0.9195166666666666
0.9216
0.9215
0.9220666666666667
0.9278333333333333
0.9292166666666667
0.9293
0.9288
0.93085
0.9315666666666667
0.9313333333333333
0.93
0.9316833333333333
0.9301333333333334
0.93095
0.9329
0.9314333333333333
0.9308166666666666
84.81015992164612
#Testing the super overfit model
#92% accuracy
######################################################
#test data
total_correct = 0
for n in range( len(x_test)):
y = y_test[n]
x = x_test[n][:]
p = forward(x, y, model)
prediction = np.argmax(p)
if (prediction == y):
total_correct += 1
print("{} accuracy on test ".format(total_correct/np.float(len(x_test) )))
0.9242 accuracy on test