Neural network from scratch: only predicts training inputs correctly

Question

Here is a neural network I've been working on. It takes in an array of four zeros or ones and predicts whether that pattern of zeros and ones is a backslash.

import numpy as np
class NeuralNetwork():
correct = 0
num_predictions = 10
epochs = 5000
learningRate = 0.1

def __init__(self, sizes, sizeOfEpoch):
    self.sizeOfEpoch = sizeOfEpoch 
    self.dimensions = sizes

    self.secondLayerNeurons = np.empty(sizes[1])
    self.outputNeurons = np.empty(sizes[2])

    self.firstLayerWeights = np.random.rand(sizes[1], sizes[0])
    self.secondLayerWeights = np.random.rand(sizes[2], sizes[1])
    self.firstLayerBiases = np.random.rand(sizes[1])
    self.secondLayerBiases = np.random.rand(sizes[2])

    self.firstLayerWeightsSummations = np.zeros([sizes[1], sizes[0]])
    self.secondLayerWeightsSummations = np.zeros([sizes[2], sizes[1]])
    self.firstLayerBiasesSummations = np.zeros([sizes[1]])
    self.secondLayerBiasesSummations = np.zeros([sizes[2]])

    self.hiddenLayerErrors = np.empty(sizes[1])
    self.outputLayerErrors = np.empty(sizes[2])

def sigmoid(self, x):
    return 1/(1+np.exp(-x))

def sigmoidDerivative(self, x):
    return np.multiply(x,(1-x))

def forwardProp(self, inputs):
    for i in range (self.dimensions[1]):
        self.secondLayerNeurons[i] = self.sigmoid(np.dot(self.firstLayerWeights[i], inputs)+self.firstLayerBiases[i])
    for i in range (self.dimensions[2]):
        self.outputNeurons[i] = self.sigmoid(np.dot(self.secondLayerWeights[i], self.secondLayerNeurons)+self.secondLayerBiases[i])

def backProp(self, inputs, correct_output):
    self.outputLayerErrors = np.subtract(self.outputNeurons, correct_output)
    self.hiddenLayerErrors = np.multiply(np.dot(self.secondLayerWeights.T, self.outputLayerErrors), self.sigmoidDerivative(self.secondLayerNeurons))

    for i in range (self.dimensions[2]):
        for j in range (self.dimensions[1]):
            if j==0:
                self.secondLayerBiasesSummations[i] += self.outputLayerErrors[i]
            self.secondLayerWeightsSummations[i][j] += self.outputLayerErrors[i]*self.secondLayerNeurons[j]
    for i in range (self.dimensions[1]):
        for j in range (self.dimensions[0]):
            if j==0:
                self.firstLayerBiasesSummations[i] += self.hiddenLayerErrors[i]
            self.firstLayerWeightsSummations[i][j] += self.hiddenLayerErrors[i]*inputs[j]

def train(self, trainImages, trainLabels):
    size = str(self.sizeOfEpoch)

    for m in range (self.sizeOfEpoch):
        correct_output = trainLabels[m]

        self.forwardProp(trainImages[m].flatten())
        self.backProp(trainImages[m].flatten(), correct_output)

        if self.outputNeurons &gt; 0.90 and trainLabels[m] == 1 or self.outputNeurons &lt; 0.1 and trainLabels[m] == 0:
            self.correct+=1
        accuracy = str(int((self.correct/(m+1))*100)) + '%'
        percent = str(int((m/self.sizeOfEpoch)*100)) + '%'
        print (&quot;Progress: &quot; + percent + &quot; -- Accuracy: &quot; + accuracy, end=&quot;\r&quot;)
    self.change()
    self.correct = 0

    print (size+'/'+size+&quot; -- Accuracy: &quot;+accuracy+&quot; -- Error: &quot;+str(np.amax(np.absolute(self.outputLayerErrors))),end=&quot;\r&quot;)

def change(self):
    self.secondLayerBiases -= self.learningRate*self.secondLayerBiasesSummations
    self.firstLayerBiases -= self.learningRate*self.firstLayerBiasesSummations
    self.secondLayerWeights -= self.learningRate*self.secondLayerWeightsSummations
    self.firstLayerWeights -= self.learningRate*self.firstLayerWeightsSummations
    self.firstLayerSummations = np.zeros([self.dimensions[1], self.dimensions[0]])
    self.secondLayerSummations = np.zeros([self.dimensions[2], self.dimensions[1]])
    self.firstLayerBiasesSummations = np.zeros(self.dimensions[1])
    self.secondLayerBiasesSummations = np.zeros(self.dimensions[2])

def predict(self, testImage):
    secondLayerAnsNodes = np.empty([self.dimensions[1]])
    outputAns = np.empty([self.dimensions[2]])
    for i in range (self.dimensions[1]):
        secondLayerAnsNodes[i] = self.sigmoid(np.dot(self.firstLayerWeights[i], testImage)+self.firstLayerBiases[i])
    for i in range (self.dimensions[2]):
        outputAns[i] = self.sigmoid(np.dot(self.secondLayerWeights[i], secondLayerAnsNodes)+self.secondLayerBiases[i])
    return outputAns


if name == "main":
    train_images = np.array([[1,0,0,1],[1,0,1,0],[0,1,0,1],[1,1,1,1],[0,0,0,0]])
    train_labels = np.array([1, 0, 0, 0, 0])
neural_network = NeuralNetwork([4, 2, 1], train_images.shape[0])

for i in range (neural_network.epochs):
    print (&quot;\nEpoch&quot;, str(i+1) + &quot;/&quot; + str(neural_network.epochs))
    neural_network.train(train_images, train_labels)

for i in range (neural_network.num_predictions):
    print(&quot;\n\n\nNew Situations: &quot; + str(i+1) + &quot;/&quot; + str(neural_network.num_predictions))
    A = list(map(int,input(&quot;Enter the numbers : &quot;).strip().split()))[:4] 

    try:
        result = neural_network.predict(A)
    except ValueError:
        print(&quot;\nValueError, try again&quot;)
        continue

    print(&quot;\nOutput Data:&quot;, result[0])
    if result&gt;0.95:
        print(&quot;Result: Back Slash&quot;)
    else:
        print(&quot;Result: Not Back Slash&quot;)

This program only predicts the training examples correctly. When I give it [0,0,0,1] or [1,0,0,0] it predicts that it is a backslash. Also, I have run the classic xor problem with this exact code (just change the dimensions of the NN and also the training I/O) and it works perfectly. I have also made a logistic regression program almost identical to this one except it doesn't have a hidden layer. For the logistic regression program, all inputs are predicted correctly.

How do I fix/change this NN so that it can correctly predict inputs it hasn't seen before?

UPDATE: It works now, I just had to add a regularization term when I change the weights and biases. Before my lambda was 0.01 but it works when I tried a larger lambda.

New change code:

for i in range (self.dimensions[2]):
    for j in range (self.dimensions[1]):
        if j == 0:
            self.secondLayerBiases[i] -= self.learningRate*(self.secondLayerBiasesSummations[i]/self.sizeOfEpoch)
        self.secondLayerWeights[i][j] -= self.learningRate*(self.secondLayerWeightsSummations[i][j]/self.sizeOfEpoch+self.Lambda*self.secondLayerWeights[i][j])
for i in range (self.dimensions[1]):
    for j in range (self.dimensions[0]):
        if j == 0:
            self.firstLayerBiases[i] -= self.learningRate*(self.firstLayerBiasesSummations[i]/self.sizeOfEpoch)
        self.firstLayerWeights[i][j] -= self.learningRate*(self.firstLayerWeightsSummations[i][j]/self.sizeOfEpoch+self.Lambda*self.firstLayerWeights[i][j])

Hi again. You can probably plot the decision boundaries for your model by evaluating [x, 0, 0, y] for x=y=np.linspace(0, 1, 100) in a 3D plot. This might clear up how this can happen, if not why. See: https://matplotlib.org/3.1.0/gallery/mplot3d/surface3d.html — Multihunter, Aug 06 '20 at 03:59
@Multihunter I'm not very familiar with making graphs but could you please explain how to do this in python and why it would help? Thanks a lot! — Joey, Aug 06 '20 at 04:07
You are probably encountering overfitting, i.e. good performance on the training data and bad performance on unseen data. — Michael M, Aug 06 '20 at 06:03

Neural network from scratch: only predicts training inputs correctly

0 Answers0