This page will introduce logistic regression with a ppt Guide and Python code. The simple data used is in the code. Below is a tutorial and code.
PowerPoint Tutorial:
###################################################
## Gates
## Logistic Regression
##
## Topics:
## Using gradient descent
## Updating w and with GD and LR
## Using the Loss Function LCE: Cross Entropy
## Using the Sigmoid as the logistic activation
## Vis
###################################################
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
from mpl_toolkits import mplot3d
## Confusion matrix (fancy)
import seaborn as sns
from sklearn.metrics import confusion_matrix
plt.rcParams["figure.figsize"] = (12, 9)
datafile="C:/Users/profa/Documents/Python Scripts/NeuralNetworks/SimpleLogRegDataSetBB_Players.csv"
## Here is the tiny dataset:
## Once you get this code to work on this tiny, easy, and already
## normalized dataset, try it on something more complex ;)
## Weight Height BB_Player
# 1.0 1.00 1
# 0.5 0.75 1
# 0.0 0.00 0
# 0.1 0.10 0
# 0.6 0.75 1
## In Python, log is base e, log2 is base 2 etc.
## !! Update this to YOUR path
DF = pd.read_csv(datafile)
print(DF)
## Set y to the label. Check the shape!
y = np.array(DF.iloc[:,2]).T
#y = np.array([y]).T
print("y is\n", y)
print("The shape of y is\n", y.shape)
## NOTE: -------------------------------
## Normalize the data (and not the label!)
## Min-Max
## DF=(df-df.min())/(df.max()-df.min())
## Or z norm....
## DF=(DF-DF.mean())/DF.std() ## z stardard method
## If your dataset is already normal, skip the above.
## --------------------------------------------
##Place the data (and not the labels) in DF
DF=DF.iloc[:, [0, 1]]
print(DF)
X = np.array(DF)
print("X is\n", X)
print("The shape of X is\n", X.shape)
##InputColumns = 2
##NumberOfLabels = 2
n = len(DF) ## number of rows of entire X
## Take the label off of X and make it a numpy array
#Learning Rate
LR=1
## Look at the dataset..........
fig = plt.figure()
ax = plt.axes(projection='3d')
y = y
x1 = X[:,0]
x2 = X[:,1]
print(y)
print(x1)
# plotting
ax.scatter(x1, x2, y, s=50)
ax.set_title('Dataset')
plt.show()
##----------------------------------
## Set up initial weights and biases
## -------------------------------------
w = np.array([[1,1]])
b = 0
print(w)
print(w.shape)
##------------------------------------------------
## Before we move forward, we know that at
## some point we will need to properly multiply X and w.
## Let's try to do this now. Recall that our linear equation
## looks like
## w1x1 + w2x2 + b
##------------------------------------------------
print("X is\n",X)
print("The shape of X is\n", X.shape)
print("w is\n",w)
print("w tranpose is\n", w.T)
print("The shape of w transpose is\n", w.T.shape)
z = (X @ w.T) + b
print("z = (X @ w.T) + b\n", z)
print("The shape of z is\n", z.shape)
## OK! Do this by hand asa well and compare.
## Next, we need to apply the sigmoid function to all the z value results.
## Let's create a function for sig
def Sigmoid(s, deriv=False):
if (deriv == True):
return s * (1 - s)
return 1/(1 + np.exp(-s))
## TEST YOUR FUNCTION!
print(Sigmoid(2)) ## answer should be .88079
## OK - it works and so now we can create S_z by applying
## the sigmoid to all the values in z
S_z = Sigmoid(z)
print("S(z) is\n", S_z)
##Note that S_z here is the same as y^
## It is the output of the logistic regression
y_hat = S_z
## Do and check this by hand.
##-------------------------------------------
## What is our Loss function?
## How do we calculate the error
##
## Recall that our Loss function is
## the LCE - Loss Categorical Entropy function
## for binary (0 and 1) labels.
## LCE = -1/n SUM ylog(y^) + (1 - y)log(1 - y^),
## where y^ is the predicted value
## and the log is log base e. The "y" is the label, which here is 0 or 1.
## The "n" is the number of rows in the dataset.
##---------------------------------------------------
## Now - think about what we are doing
## We want to minimize the LCE by updating w and b using gradient descent
## Our LR (learning rate) was set above and can be tuned.
## Shapes matter when using vectors and matrices.
## Here, we will need to transpose y
print("y is\n",y)
print("y_hat is\n", y_hat)
## Here, y is not the right shape for our goal. We need the transpose. YOU must always
## check on these types of things.
#print(type(y))
#print(np.transpose([y])) ## you need the []
y = np.transpose([y])
print("Updated y is\n",y)
print("y_hat is\n",y_hat)
##-------------------------
## Keep each LCE value
AllError_LCE=[]
##----------------------------------
## The epochs are the number of iterations we want to go through
## to recalculate w and b with the goal of optimization (minimization of LCE)
epochs=200
for i in range(epochs):
print("Epoch \n", i)
z = (X @ w.T) + b
print("The z here is\n", z)
y_hat=Sigmoid(z)
print("The y_hat here is\n", y_hat)
## Get the LCE....
## Step 1 to get LCE
Z1=(y*np.log(y_hat)) + ((1 - y)*np.log(1 - y_hat))
print("Z1 is\n", Z1)
## Step 2 to get LCE
## Next, we need to sum the values in Z1 and then divide by n
LCE = -(np.sum(Z1))/n ## its "-" in front because we multiply by -1/n
print("The LCE for epoch ", i, "is\n", LCE)
##Keep each LCE value - each error
AllError_LCE.append(LCE)
## Now we need to get the derivatives so we can update w and b
## Recall that dL/dw = dL/dy^ * dy^/dz * dz/dw --> 1/n (y^ - y)xT
## and dL/db = dL/dy^ * dy^/dz * dz/db --> 1/n (y^ - y)
## Let's get y^ - y first and let's call this "error"
error = y_hat-y
print("The error y^ - y is\n", error)
## Next, let's multiply the y^-y by X so that we
## get the shape of w. Recall that w is 1 row by 2 columns
## Let's print this to make sure ...
print(w)
print(w.shape)
dL_dw = (1/n) * np.transpose(error) @ X
print("The dL_dw is\n", dL_dw, "\n")
## Now let's get b
## For b, we will use the average - so we will sum up
## all the error values and then multiply by 1/n
## Let's first get 1/n (y^ - y)
b1=(1/n)*(error)
print(b1)
## Now get the mean of this vector
dL_db=np.average(b1)
print("The dL_db is\n", dL_db)
## OK - let's look at new w and new b
print("The update for w is\n",dL_dw)
print("The update for b is\n", dL_db)
## Use the gradient to update w and b
w = w - (LR * dL_dw)
b = b - (LR * dL_db)
print("The new w value is\n", w)
############################end of for loop-----
## Plot and print results
#-----------------------------------------
#print(len(AllError_LCE))
##Plot the change in Loss over epochs
fig1 = plt.figure()
plt.title("Loss Reduction Over Epochs")
plt.xlabel("Epochs")
plt.ylabel("Error")
ax = plt.axes()
x = np.linspace(0, epochs, epochs) #start, stop, how many
#print(x.shape)
ax.plot(x, AllError_LCE)
print("The predicted w is \n", w)
print("The predicted b is\n",b)
################################################
##
## Use the model from above to make
## predictions.
##
#################################################
## Read in test data
## !!! Remember that this model from above
## is ONLY for Height, Weight data to predict
## BB_Player as 0 or 1.
##
## NOTE: You can update this code however
## to work on modeling any data.
## Note also that this code assumes that the data
## is min-max normalized.
## Define X_test as:
X_test=np.array([ [.9,.6],[.2,.15],[.85,.78],[.13,.15],[.63,.65]])
print(X_test)
print(X_test.shape)
labels=np.array([[1],[0],[1],[0],[1]])
print(labels)
Prediction=Sigmoid((X_test @ w.T) + b)
## Update prediction using threshold >=.5 --> 1, else 0
Prediction[Prediction >= .5] = 1
Prediction[Prediction < .5] = 0
print(Prediction)
## Confusion matrix (fancy)
#import seaborn as sns
#from sklearn.metrics import confusion_matrix
fig2 = plt.figure()
plt.title("Confusion Matrix For Predictions vs. Actual Values")
cm = confusion_matrix(labels, Prediction)
print(cm)
ax= plt.subplot()
sns.heatmap(cm, annot=True, fmt='g', ax=ax, cmap='Blues')
#annot=True to annotate cells, ftm='g' to disable scientific notation
# labels, title and ticks
ax.set_xlabel("Predicted labels")
ax.set_ylabel("True labels")
ax.set_title("Confusion Matrix")
ax.xaxis.set_ticklabels(["Not BB Player", "BB Player"])
ax.yaxis.set_ticklabels(["Not BB Player", "BB Player"])