XOR in NN – Gates Bolton Analytics

## Gates
##XOR - BP


        
## ###################################################     

import numpy as np
import pandas as pd
##  X can be whatever YOU want it to be.....

## For XOR........
print("STARTING\n.......")
X = np.array( [[0, 0], [0, 1], [1, 0] , [1, 1] ])
print("X is:\n", X)   
print("The shape of X is\n", X.shape)
# Set y to match with your choice of X
## These are just examples I was playing with....

y = np.array(( [[0], [1], [1], [0]] ))
print("y is:\n", y) 
print("The shape of Y is\n", y.shape)

 
##-----------------------------------------
        ## NOTE ##
        ##
        ## You can set the parameters randomly or by hand
        ## Both options are below.
        ##
        ##
##---------------------------------------------
        
class NeuralNetwork(object):
    def __init__(self):
        
        self.InputNumColumns = 2  ## columns
        self.OutputSize = 1
        self.HiddenUnits = 2  ## one layer with h units
        self.n = 4  ## number of training examples, n
        
        #Random W1
        self.W1 = np.random.randn(self.InputNumColumns, self.HiddenUnits) # c by h  
        ##self.W1=[[1, 1], [1, 1]] # If YOU want to control these values
        print("W1 is\n", self.W1)
        
        self.W2 = np.random.randn(self.HiddenUnits, self.OutputSize) # h by o 
        #self.W2=np.array(( [[1], [1]] )) # If YOU want to control these values
        print("W2 is:\n", self.W2)
        
        self.b = np.random.randn(self.OutputSize, self.HiddenUnits)
        #self.b = [[1,1]] # If YOU want to control these values
        print("The b's are:\n", self.b)
        ## biases for layer 1
        
        self.c = np.random.randn(1, self.OutputSize)
        #self.c =1 # If YOU want to control this value
        print("The c is\n", self.c)
        ## bias for last layer
        
        self.GA=False ## set this to True if you want the 
        ## average gradient over all examples rather than
        ## the sum
        
    def FeedForward(self, X):
        print("FeedForward\n\n")
        self.z = (np.dot(X, self.W1)) + self.b 
        #X is n by c   W1  is c by h -->  n by h
        print("Z1 is:\n", self.z)        
        self.h = self.Sigmoid(self.z) #activation function, shape: n by h
        print("H is:\n", self.h)
        print("The c is\n", self.c)
        self.z2 = (np.dot(self.h, self.W2)) + self.c# n by h  @  h by o  -->  n by o  
        print("Z2 is:\n", self.z2)
        output = self.Sigmoid(self.z2)  
        print("Y^ -  the output is:\n", output)
        return output
        
    def Sigmoid(self, s, deriv=False):
        if (deriv == True):
            return s * (1 - s)
        return 1/(1 + np.exp(-s))
    
    def BackProp(self, X, y, output):
        print("\n\nBackProp\n")
        print("input X is\n", X)
        print("input y is \n", y)
        
        self.LR = 1
        # Y^ - Y
        self.output_error = output - y    
        #print("Y^ - Y\n", self.output_error)
        #print("SIG Y^\n", self.Sigmoid(output, deriv=True))
        
        ##(Y^ - Y)(Y^)(1-Y^)
        print("Y is\n", y)
        print("Y^ is\n", output)
        self.output_delta = self.output_error * self.Sigmoid(output, deriv=True) 
        print("D_Error (Y^)(1-Y^)(Y^-Y) is:\n", self.output_delta)
        
        ##(Y^ - Y)(Y^)(1-Y^)(W2)
        self.D_Error_W2 = self.output_delta.dot(self.W2.T) #  D_Error times W2
        
        print("W2.T is\n", self.W2.T)
        print(" D_Error times W2.T\n", self.D_Error_W2)
        
        ## (H)(1 - H) (Y^ - Y)(Y^)(1-Y^)(W2)
        self.H_D_Error_W2 = self.D_Error_W2 * self.Sigmoid(self.h, deriv=True) 
        ## Note that * will multiply respective values together in each matrix
        #print("Derivative sig H is:\n", self.Sigmoid(self.h, deriv=True))
        print("self.H_D_Error_W2 is\n", self.H_D_Error_W2)
        self.H_D_Error_W2_mean=np.mean(self.H_D_Error_W2, axis=0)
        #print("self.H_D_Error_W2 mean is\n", self.H_D_Error_W2_mean)
        #print("shape",self.H_D_Error_W2_mean.shape )
        
        
    
        
        ##  XT  (H)(1 - H) (Y^ - Y)(Y^)(1-Y^)(W2)
        print("X transpose is\n", X.T)
        print("self.H_D_Error_W2 is\n", self.H_D_Error_W2)
        self.X_H_D_Error_W2 = X.T @ (self.H_D_Error_W2) ## this is dW1
        print("X_H_D_Error_W2 is\n", self.X_H_D_Error_W2)
        
        ## (H)T (Y^ - Y)(Y^)(1-Y^)
        self.h_output_delta = self.h.T.dot(self.output_delta) ## this is dW2
        
        #print("the gradient :\n", self.X_H_D_Error_W2)
        #print("the gradient average:\n", self.X_H_D_Error_W2/self.n)
        
        if(self.GA==True):
            print("Using average gradient........\n")
            self.W1 = self.W1 - self.LR*(self.X_H_D_Error_W2/self.n)
            self.W2 = self.W2 - self.LR*(self.h_output_delta/self.n) ## average the gradients
        # #print("New W1: \n", self.W1)
        else: 
            print("Using sum gradient........\n")
            print("W1 was :\n", self.W1)
            self.W1 = self.W1 - self.LR*(self.X_H_D_Error_W2) # c by h  adjusting first set (input -> hidden) weights
            print("Updated W1 is: \n", self.W1)
            print("W2 was :\n", self.W2)
            self.W2 = self.W2 - self.LR*(self.h_output_delta) # adjusting second set (hidden -> output) weights
            print("Updated W2 is: \n", self.W2)
        
        
        
        #print("The W1 gradient is: \n", self.X_H_D_Error_W2)
        #print("The W1 gradient average is: \n", self.X_H_D_Error_W2/self.n)
        #print("The W2 gradient  is: \n", self.h_output_delta)
        #print("The W2 gradient average is: \n", self.h_output_delta/self.n)
        print("The mean of the biases b gradient is:\n",self.H_D_Error_W2_mean )
        print("The b biases before the update are:\n", self.b)
        self.b = self.b  - self.LR*self.H_D_Error_W2_mean
        print("The new updated bs are:\n", self.b)
        
        print("The bias c is: \n", self.output_delta)
        print("c bias before:", self.c)
        self.c = self.c - np.mean(self.output_delta)
        print("The mean c bias after update:", self.c)
       
        
        ################################################################
        
    def TrainNetwork(self, X, y):
        output = self.FeedForward(X)
        self.BackProp(X, y, output)
        return output

#-------------------------------------------------------------------        
MyNN = NeuralNetwork()

TotalLoss=[]
AvgLoss=[]
Epochs=800
for i in range(Epochs): 
    print("\nRUN:\n ", i)
    output=MyNN.TrainNetwork(X, y)
   
    #print("The y is ...\n", y)
    print("The output is: \n", output)
    print("Total Loss:", .5*(np.sum(np.square(output-y))))
    TotalLoss.append( .5*(np.sum(np.square(output-y))))
    
    print("Average Loss:", .5*(np.mean(np.square((output-y)))))
    AvgLoss.append(.5*(np.mean(np.square((output-y)))))


## Apply filter to output
def FormatOutput(V):
    V[V >= .5] = 1
    V[V < .5] = 0
    return V

print("The final prediction is\n", FormatOutput(output))
    
###################-output and vis----------------------    
#print("Total Loss List:", TotalLoss) 
import matplotlib.pyplot as plt

fig1 = plt.figure()
ax = plt.axes()
plt.title("Total Loss")
x = np.linspace(0, Epochs, Epochs)
ax.plot(x, TotalLoss)    

fig2 = plt.figure()
ax = plt.axes()
plt.title("Average Loss")
x = np.linspace(0, Epochs, Epochs)
ax.plot(x, AvgLoss)  


###############################
##
## This is just to show examples
## for how matrix mult works
##
################################
#M1=np.array([[1, 2], [0, 1]])
#print(np.mean(M1, axis=0))
#print(np.mean(M1, axis=1))
# M2 = np.array( [[3, 4], [-1, -1]])
# V1 = np.array([[3, 4]])
# N1 = 5

# print("M1 * M2 = \n",M1*M2)
# print("M1 @ M2 = \n",M1@M2)
# print("M1 * V1 = \n",M1*V1)
# print("M1 * N1 = \n",M1*N1)