## NN - FF and BP
## Gates
## Multiple outputs - 3 labels
## 4D data
## One- hot Encoding
## Categorical Cross Entropy
## Softmax
## ###################################################
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
############## Using a dataset with THREE label categories, 1, 2, and 3---------------
## DATAset
# https://drive.google.com/file/d/1bbYwSUBXufbupPoAfqbBzhdMMdZfnLcI/view?usp=sharing
## Use YOUR path !!...
filename="StudentSummerProgramData_Numeric_3NumLabeled_3D.csv"
DF = pd.read_csv("C:/Users/profa/Desktop/UCB/NNCSCI5922/Code/"+str(filename))
#print(DF)
y = np.array(DF.iloc[:,0]).T
y = np.array([y]).T
#print("y is\n", y)
original_y_values=y # save a copy
## Normalize the data (not the label!)
## or use min/max normalized_df=(df-df.min())/(df.max()-df.min())
DF=DF.iloc[:, [1, 2, 3, 4]]
DF=(DF-DF.mean())/DF.std()
#print(DF)
X = np.array(DF)
#print("X is\n", X)
InputColumns = 4
NumberOfLabels = 3
n = len(DF) ## number of rows of entire X
## Take the label off of X and make it a numpy array
LR=.01
LRB = .01
#................................................
###################### Creating one hot labels for y ------------------
temp = y
#print(temp)
one_hot_labels = np.zeros((n, NumberOfLabels))
#print(one_hot_labels)
for i in range(n):
one_hot_labels[i, temp[i]] = 1
#print(one_hot_labels)
y = one_hot_labels
#print(" Y is\n", y)
##################------------------------------------
class NeuralNetwork(object):
def __init__(self):
self.InputNumColumns = InputColumns ## columns
self.OutputSize = 3 ## Categories
self.HiddenUnits = 2 ## one layer with h units
self.n = n ## number of training examples, n
print("Initialize NN\n")
#Random W1
self.W1 = np.random.randn(self.InputNumColumns, self.HiddenUnits) # c by h
print("INIT W1 is\n", self.W1)
##-----------------------------------------
## NOTE ##
##
## The following are all random. However, you can comment this out
## and can set any weights and biases by hand , etc.
##
##---------------------------------------------
self.W2 = np.random.randn(self.HiddenUnits, self.OutputSize) # h by o
print("W2 is:\n", self.W2)
self.b = np.random.randn(1, self.HiddenUnits)
print("The b's are:\n", self.b)
## biases for layer 1
self.c = np.random.randn(1, self.OutputSize)
print("The c is\n", self.c)
## bias for last layer
def FeedForward(self, X):
print("FeedForward\n\n")
self.z = (np.dot(X, self.W1)) + self.b
#X is n by c W1 is c by h --> n by h
print("Z1 is:\n", self.z)
self.h = self.Sigmoid(self.z) #activation function shape: n by h
print("H is:\n", self.h)
self.z2 = (np.dot(self.h, self.W2)) + self.c # n by h @ h by o --> n by o
print("Z2 is:\n", self.z2)
## Using Softmax for the output activation
output = self.Softmax(self.z2)
print("output Y^ (SM of Z2) is:\n", output)
return output
def Sigmoid(self, s, deriv=False):
if (deriv == True):
return s * (1 - s)
return 1/(1 + np.exp(-s))
def Softmax(self, M):
#print("M is\n", M)
expM = np.exp(M)
#print("expM is\n", expM)
SM=expM/np.sum(expM, axis=1)[:,None]
#print("SM is\n",SM )
return SM
def BackProp(self, X, y, output):
print("\n\nBackProp\n")
self.LR = LR
self.LRB=LRB ## LR for biases
# Y^ - Y
self.output_error = output - y
#print("Y^ - Y\n", self.output_error)
## NOTE TO READER........................
## Here - we DO NOT multiply by derivative of Sig for y^ b/c we are using
## cross entropy and softmax for the loss and last activation
self.output_delta = self.output_error
##(Y^ - Y)(W2)
self.D_Error_W2 = self.output_delta.dot(self.W2.T) # D_Error times W2
print("(Y^ - Y) is\n",self.output_delta)
print("W2.T is\n", self.W2.T)
print(" (Y^ - Y) @ W2.T\n", self.D_Error_W2)
## (H)(1 - H) (Y^ - Y)(W2)
## We still use the Sigmoid on H
self.H_D_Error_W2 = self.D_Error_W2 * self.Sigmoid(self.h, deriv=True)
## Note that * will multiply respective values together in each matrix
#print("Derivative sig H is:\n", self.Sigmoid(self.h, deriv=True))
#print("self.H_D_Error_W2 is\n", self.H_D_Error_W2)
################------UPDATE weights and biases ------------------
print("Old W1: \n", self.W1)
#print("Old W2 is:\n", self.W2)
#print("X transpose is\n", X.T)
## X.T (H)(1 - H) (Y^ - Y)(W2)
print("Using sum gradient........\n")
## The sum occurs implicitly because we are multiplying X.T (transpose)
## dW1 ==> (X.T)@(H)(1 - H) (Y^ - Y)(W2)
self.X_H_D_Error_W2 = X.T.dot(self.H_D_Error_W2) ## this is dW1
## dW2 ==> (H)T (Y^ - Y)
self.h_output_delta = self.h.T.dot(self.output_delta) ## this is for dW2
#print("the gradient :\n", self.X_H_D_Error_W2)
#print("the gradient average:\n", self.X_H_D_Error_W2/self.n)
self.W1 = self.W1 - self.LR*(self.X_H_D_Error_W2) # c by h adjusting first set (input -> hidden) weights
self.W2 = self.W2 - self.LR*(self.h_output_delta)
print("The mean of the b update is\n", np.mean(self.H_D_Error_W2, axis=0))
print("The b biases before the update are:\n", self.b)
self.b = self.b - self.LRB*np.mean(self.H_D_Error_W2, axis=0)
#print("The H_D_Error_W2 is...\n", self.H_D_Error_W2)
print("Updated bs are:\n", self.b)
self.c = self.c - self.LR*np.mean(self.output_delta, axis=0)
print("Updated c's are:\n", self.c)
print("The W1 is: \n", self.W1)
print("The W1 gradient is: \n", self.X_H_D_Error_W2)
#print("The W1 gradient average is: \n", self.X_H_D_Error_W2/self.n)
print("The W2 gradient is: \n", self.h_output_delta)
#print("The W2 gradient average is: \n", self.h_output_delta/self.n)
print("The mean biases b gradient is:\n",np.mean(self.H_D_Error_W2, axis=0 ))
print("The mean bias c gradient is: \n", np.mean(self.output_delta, axis=0))
################################################################
def TrainNetwork(self, X, y):
output = self.FeedForward(X)
#print("Output in TNN\n", output)
self.BackProp(X, y, output)
return output
#-------------------------------------------------------------------
MyNN = NeuralNetwork()
AvgLoss=[]
Epochs=500
for i in range(Epochs):
print("\nRUN:\n ", i)
output=MyNN.TrainNetwork(X, y)
## LOSS
loss = np.mean(-y * np.log(output)) ## We need y to place the "1" in the right place
print("The current average loss is\n", loss)
AvgLoss.append(loss)
## OUTPUT
#print("The raw output is: \n", output)
#print("Original y values:\n", original_y_values)
numeric_output=np.argmax(output, axis=1)
#print('Prediction y^ is', numeric_output)
#print("Original Labels y are\n",original_y_values )
## Using Categorical Cross Entropy...........
###################-output and vis----------------------
import matplotlib.pyplot as plt
fig1 = plt.figure()
ax = plt.axes()
x = np.linspace(0, Epochs, Epochs)
ax.plot(x, AvgLoss)
## FIX THE SHAPES FIRST!!
print(numeric_output.shape)
numeric_output2 = np.array([numeric_output])
numeric_output2=numeric_output2.T
print(numeric_output2.shape)
print(original_y_values.shape)
print("The prediction accuracy via confusion matrix is:\n")
print(confusion_matrix(numeric_output2, original_y_values))