I made a simple neural net in Python. My activation function is a RELU, i.e. max(-.00001x, x). When I train it on a XOR, it is unable to fit in reasonable way. I'm not sure if I made a mistake in the backpropagation, but I've sanity checked against other online resources and I don't see an obvious mistake.
import numpy as np
class Layer:
def __init__(self, nX, nY):
self.nX = nX # number of layer inputs
self.nY = nY # number of layer outputs
self.W = np.random.rand(nX, nY) # initialize layer weights
def forward_propogate(self, X):
self.X = X
self.Y_linear = np.dot(X, self.W)
self.s_active = -1e-5 * (self.Y_linear <= 0) + 1.0 * (self.Y_linear > 0)
self.Y_sigmoid = self.s_active * self.Y_linear
return self.Y_sigmoid
def backward_propogate(self, dEdY, alpha):
dEdX = np.dot(dEdY * self.s_active, self.W.T)
self.W = self.W - alpha * np.dot(self.X.T, dEdY * self.s_active)
return dEdX
class Net:
def __init__(self, nX, nY, layers):
self.nX = nX
self.nY = nY
self.layers = [Layer(a,b) for a,b in zip([nX] + list(layers), list(layers) + [nY])]
def predict(self, X):
for layer in self.layers:
X = layer.forward_propogate(X)
return X
def loss(self, X, Y):
return np.linalg.norm(Y - self.predict(X), ord='fro') / (Y.shape[0] ** 0.5)
def fit(self, X, Y, alpha, niter):
l0 = self.loss(X, Y)
for i in range(niter):
Yf = self.predict(X)
dE = 2 / Y.shape[0] * (Yf - Y)
for layer in self.layers[::-1]:
dE = layer.backward_propogate(dE, alpha)
lf = self.loss(X, Y)
return l0, lf
X = np.array([[1, 1, 1],
[1, 0, 1],
[0, 1, 1],
[0, 0, 1]])
Y = np.array([[0],
[1],
[1],
[0]])
net = Net(3, 1, [10, 10])
net.predict(X)
l0, lf = net.fit(X, Y, alpha=1e-5, niter=100000)
print(l0, lf)
For some reason, it is unable to learn a simple XOR function. Would greatly appreciate advice on what I am doing worng.