## Gates
##XOR - BP
## ###################################################
import numpy as np
import pandas as pd
## X can be whatever YOU want it to be.....
## For XOR........
print("STARTING\n.......")
X = np.array( [[0, 0], [0, 1], [1, 0] , [1, 1] ])
print("X is:\n", X)
print("The shape of X is\n", X.shape)
# Set y to match with your choice of X
## These are just examples I was playing with....
y = np.array(( [[0], [1], [1], [0]] ))
print("y is:\n", y)
print("The shape of Y is\n", y.shape)
##-----------------------------------------
## NOTE ##
##
## You can set the parameters randomly or by hand
## Both options are below.
##
##
##---------------------------------------------
class NeuralNetwork(object):
def __init__(self):
self.InputNumColumns = 2 ## columns
self.OutputSize = 1
self.HiddenUnits = 2 ## one layer with h units
self.n = 4 ## number of training examples, n
#Random W1
self.W1 = np.random.randn(self.InputNumColumns, self.HiddenUnits) # c by h
##self.W1=[[1, 1], [1, 1]] # If YOU want to control these values
print("W1 is\n", self.W1)
self.W2 = np.random.randn(self.HiddenUnits, self.OutputSize) # h by o
#self.W2=np.array(( [[1], [1]] )) # If YOU want to control these values
print("W2 is:\n", self.W2)
self.b = np.random.randn(self.OutputSize, self.HiddenUnits)
#self.b = [[1,1]] # If YOU want to control these values
print("The b's are:\n", self.b)
## biases for layer 1
self.c = np.random.randn(1, self.OutputSize)
#self.c =1 # If YOU want to control this value
print("The c is\n", self.c)
## bias for last layer
self.GA=False ## set this to True if you want the
## average gradient over all examples rather than
## the sum
def FeedForward(self, X):
print("FeedForward\n\n")
self.z = (np.dot(X, self.W1)) + self.b
#X is n by c W1 is c by h --> n by h
print("Z1 is:\n", self.z)
self.h = self.Sigmoid(self.z) #activation function, shape: n by h
print("H is:\n", self.h)
print("The c is\n", self.c)
self.z2 = (np.dot(self.h, self.W2)) + self.c# n by h @ h by o --> n by o
print("Z2 is:\n", self.z2)
output = self.Sigmoid(self.z2)
print("Y^ - the output is:\n", output)
return output
def Sigmoid(self, s, deriv=False):
if (deriv == True):
return s * (1 - s)
return 1/(1 + np.exp(-s))
def BackProp(self, X, y, output):
print("\n\nBackProp\n")
print("input X is\n", X)
print("input y is \n", y)
self.LR = 1
# Y^ - Y
self.output_error = output - y
#print("Y^ - Y\n", self.output_error)
#print("SIG Y^\n", self.Sigmoid(output, deriv=True))
##(Y^ - Y)(Y^)(1-Y^)
print("Y is\n", y)
print("Y^ is\n", output)
self.output_delta = self.output_error * self.Sigmoid(output, deriv=True)
print("D_Error (Y^)(1-Y^)(Y^-Y) is:\n", self.output_delta)
##(Y^ - Y)(Y^)(1-Y^)(W2)
self.D_Error_W2 = self.output_delta.dot(self.W2.T) # D_Error times W2
print("W2.T is\n", self.W2.T)
print(" D_Error times W2.T\n", self.D_Error_W2)
## (H)(1 - H) (Y^ - Y)(Y^)(1-Y^)(W2)
self.H_D_Error_W2 = self.D_Error_W2 * self.Sigmoid(self.h, deriv=True)
## Note that * will multiply respective values together in each matrix
#print("Derivative sig H is:\n", self.Sigmoid(self.h, deriv=True))
print("self.H_D_Error_W2 is\n", self.H_D_Error_W2)
self.H_D_Error_W2_mean=np.mean(self.H_D_Error_W2, axis=0)
#print("self.H_D_Error_W2 mean is\n", self.H_D_Error_W2_mean)
#print("shape",self.H_D_Error_W2_mean.shape )
## XT (H)(1 - H) (Y^ - Y)(Y^)(1-Y^)(W2)
print("X transpose is\n", X.T)
print("self.H_D_Error_W2 is\n", self.H_D_Error_W2)
self.X_H_D_Error_W2 = X.T @ (self.H_D_Error_W2) ## this is dW1
print("X_H_D_Error_W2 is\n", self.X_H_D_Error_W2)
## (H)T (Y^ - Y)(Y^)(1-Y^)
self.h_output_delta = self.h.T.dot(self.output_delta) ## this is dW2
#print("the gradient :\n", self.X_H_D_Error_W2)
#print("the gradient average:\n", self.X_H_D_Error_W2/self.n)
if(self.GA==True):
print("Using average gradient........\n")
self.W1 = self.W1 - self.LR*(self.X_H_D_Error_W2/self.n)
self.W2 = self.W2 - self.LR*(self.h_output_delta/self.n) ## average the gradients
# #print("New W1: \n", self.W1)
else:
print("Using sum gradient........\n")
print("W1 was :\n", self.W1)
self.W1 = self.W1 - self.LR*(self.X_H_D_Error_W2) # c by h adjusting first set (input -> hidden) weights
print("Updated W1 is: \n", self.W1)
print("W2 was :\n", self.W2)
self.W2 = self.W2 - self.LR*(self.h_output_delta) # adjusting second set (hidden -> output) weights
print("Updated W2 is: \n", self.W2)
#print("The W1 gradient is: \n", self.X_H_D_Error_W2)
#print("The W1 gradient average is: \n", self.X_H_D_Error_W2/self.n)
#print("The W2 gradient is: \n", self.h_output_delta)
#print("The W2 gradient average is: \n", self.h_output_delta/self.n)
print("The mean of the biases b gradient is:\n",self.H_D_Error_W2_mean )
print("The b biases before the update are:\n", self.b)
self.b = self.b - self.LR*self.H_D_Error_W2_mean
print("The new updated bs are:\n", self.b)
print("The bias c is: \n", self.output_delta)
print("c bias before:", self.c)
self.c = self.c - np.mean(self.output_delta)
print("The mean c bias after update:", self.c)
################################################################
def TrainNetwork(self, X, y):
output = self.FeedForward(X)
self.BackProp(X, y, output)
return output
#-------------------------------------------------------------------
MyNN = NeuralNetwork()
TotalLoss=[]
AvgLoss=[]
Epochs=800
for i in range(Epochs):
print("\nRUN:\n ", i)
output=MyNN.TrainNetwork(X, y)
#print("The y is ...\n", y)
print("The output is: \n", output)
print("Total Loss:", .5*(np.sum(np.square(output-y))))
TotalLoss.append( .5*(np.sum(np.square(output-y))))
print("Average Loss:", .5*(np.mean(np.square((output-y)))))
AvgLoss.append(.5*(np.mean(np.square((output-y)))))
## Apply filter to output
def FormatOutput(V):
V[V >= .5] = 1
V[V < .5] = 0
return V
print("The final prediction is\n", FormatOutput(output))
###################-output and vis----------------------
#print("Total Loss List:", TotalLoss)
import matplotlib.pyplot as plt
fig1 = plt.figure()
ax = plt.axes()
plt.title("Total Loss")
x = np.linspace(0, Epochs, Epochs)
ax.plot(x, TotalLoss)
fig2 = plt.figure()
ax = plt.axes()
plt.title("Average Loss")
x = np.linspace(0, Epochs, Epochs)
ax.plot(x, AvgLoss)
###############################
##
## This is just to show examples
## for how matrix mult works
##
################################
#M1=np.array([[1, 2], [0, 1]])
#print(np.mean(M1, axis=0))
#print(np.mean(M1, axis=1))
# M2 = np.array( [[3, 4], [-1, -1]])
# V1 = np.array([[3, 4]])
# N1 = 5
# print("M1 * M2 = \n",M1*M2)
# print("M1 @ M2 = \n",M1@M2)
# print("M1 * V1 = \n",M1*V1)
# print("M1 * N1 = \n",M1*N1)