# -*- coding: utf-8 -*-
"""
@author: profa
"""
##############################################
##
## This code uses a simple record dataset
## with numeric data.
## There is a link to the data below.
## The dataset has 3 columns of data (3 features)
## The dataset has labels of 0 and 1
##
## This code uses Keras to build a simple NN
## to predict the label (called "Decision")
## for this dataset.
##
############################################
# -*- coding: utf-8 -*-
"""
@author: profa
"""
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
import tensorflow.keras
#from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, LSTM
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
from tensorflow.keras import layers
import numpy as np
import pandas as pd
## Link to dataset
## https://drive.google.com/file/d/1JjkJ4q0MMGJP8jht2ZI9qDEvV5Jjfu0r/view?usp=drive_link
## Path to dataset on my computer
## YOU will update this path for YOUR computer
filepath="C:/Users/profa/Desktop/UCB/NNCSCI5922/Code/StudentSummerProgramData_ForMod3_Slides_Example.csv"
DF=pd.read_csv(filepath)
print(DF)
## Prepare the data
## This means you need to:
## Normalize the data - but NOT the label!
## Separate the data into a Training set and a Testing set
## remove and retain the labels for both the Training
## and testing datasets.
## Step 1 - Use min-max to normalize the data (but NOT the label!)
## What are the column names? What column is the label?
print(DF.columns)
## "Decision" is the label of this dataset.
for col in DF.columns:
#print(col)
if col != "Decision":
DF[col]=(DF[col]-DF[col].min())/(DF[col].max()-DF[col].min())
print(DF[col])
print(DF)
## Split DF into Training and Testing sets
# Random sampling *without replacement*
TrainDF, TestDF = train_test_split(DF, test_size=0.3)
print(TrainDF.shape)
print(TestDF.shape)
print(TestDF)
## Drop and SAVE the labels from the TrainDF and TestDF
TrainLabels=TrainDF["Decision"]
print(TrainLabels)
TestLabels=TestDF["Decision"]
print(TestLabels)
TrainDF = TrainDF.drop(columns="Decision", axis=1)
print(TrainDF)
print(TrainDF.shape)
TestDF = TestDF.drop(columns="Decision", axis=1)
print(TestDF)
##########################################
## Using Keras
#################################################
## Step 1
## Create a TF - Keras NN Model
## https://keras.io/guides/sequential_model/
My_NN_Model = tf.keras.models.Sequential([
tf.keras.layers.Dense(4, input_shape=(3,), activation='relu'),
## Our data is flat and 3D
## Here, we are sending our data to a hidden layer that has 4 hidden units
# Dense implements the operation:
## output = activation(dot(input, kernel) + bias)
## The "kernel" are the weights matrix.
## In Keras, the input layer itself is not a layer,
## but a tensor. It's the starting tensor you send
## to the first hidden layer. This tensor
## must have the same shape as your training data
## second hidden layer - with 2 units
tf.keras.layers.Dense(2, activation='relu'),
#https://keras.io/api/layers/core_layers/dense/
## The first value, 2 here, are the units in the hidden layer.
## https://www.tutorialspoint.com/keras/keras_dense_layer.htm
##tf.keras.layers.Dropout(0.2), ## We do not need this here.
tf.keras.layers.Dense(1, activation='sigmoid') ## for 0 or 1
])
My_NN_Model.summary()
## Print the weights
first_layer_weights = My_NN_Model.layers[0].get_weights()[0]
#first_layer_biases = My_NN_Model.layers[0].get_weights()[1]
second_layer_weights = My_NN_Model.layers[1].get_weights()[0]
third_layer_weights = My_NN_Model.layers[2].get_weights()[0]
print("The first layer weights are: \n", first_layer_weights)
print("The first layer weights shape is\n",first_layer_weights.shape )
print("The second layer weights are: \n", second_layer_weights)
print("The third layer weights are: \n", third_layer_weights)
## Compile the Model
loss_function = keras.losses.BinaryCrossentropy(from_logits=False)
My_NN_Model.compile(
loss=loss_function,
metrics=["accuracy"],
optimizer='adam'
)
## Fit the Model to the data (train the model)
## Making the epochs larger can improve the model and prediction accuracy
Hist=My_NN_Model.fit(TrainDF,TrainLabels, epochs=300, validation_data=(TestDF, TestLabels))
## batch_size= is also an option here for batch training
plt.plot(Hist.history['accuracy'], label='accuracy')
plt.plot(Hist.history['val_accuracy'], label = 'val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.ylim([0.5, 1])
plt.legend(loc='lower right')
##Test and Model
Test_Loss, Test_Accuracy = My_NN_Model.evaluate(TestDF, TestLabels)
## Save the Model
My_NN_Model.save("Example2_NN_Model")
## Predictions
predictions=My_NN_Model.predict(TestDF)
## For predictions >=.5 --> 1 else 0
print(predictions)
print(type(predictions))
predictions[predictions >= .5] = 1
predictions[predictions < .5] = 0
print(predictions)
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
print("The prediction accuracy via confusion matrix is:\n")
print(confusion_matrix(predictions, TestLabels))
## Pretty Confusion Matrix
#from sklearn.metrics import confusion_matrix
labels = [0, 1]
cm = confusion_matrix(predictions, TestLabels, labels)
print(cm)
import seaborn as sns
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(13,13))
#ax= plt.subplot()
#sns.set(font_scale=3)
#sns.set (rc = {'figure.figsize':(40, 40)})
sns.heatmap(cm, annot=True, fmt='g', ax=ax, annot_kws={'size': 18})
#annot=True to annotate cells, ftm='g' to disable scientific notation
# annot_kws si size of font in heatmap
# labels, title and ticks
ax.set_xlabel('True labels')
ax.set_ylabel('Predicted labels')
ax.set_title('Confusion Matrix: NN')
ax.xaxis.set_ticklabels(["0:Admit","1:Decline"],rotation=90, fontsize = 18)
ax.yaxis.set_ticklabels(["0:Admit","1:Decline"],rotation=0, fontsize = 18)