[Neural Networks and Deep Learning] Practice : Cat/Non-Cat Classifier with Logistic Regression
2022, Apr 21
Cat/Non-Cat Classifier with Logistic Regression
Load Dataset
import numpy as np
import matplotlib.pyplot as plt
import h5py # required for interacting with files stored on h5 file
import scipy # for test
from PIL import Image
from scipy import ndimage
%matplotlib inline
def load_dataset():
with h5py.File('C:/Users\DNI_180902/Desktop/Data/DL-WK2/train_catvnoncat.h5', 'r') as train_dataset:
train_set_x_orig = np.array(train_dataset["train_set_x"][:])
train_set_y_orig = np.array(train_dataset["train_set_y"][:])
with h5py.File('C:/Users\DNI_180902/Desktop/Data/DL-WK2/test_catvnoncat.h5', 'r') as test_dataset:
test_set_x_orig = np.array(test_dataset["test_set_x"][:])
test_set_y_orig = np.array(test_dataset["test_set_y"][:])
classes = np.array(test_dataset["list_classes"][:])
train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))
return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes
train_set_x_orig, train_set_y, test_set_x_orig, test_set_y, classes = load_dataset()
print("Training_Set_X : {0}".format(train_set_x_orig.shape))
print("Test_Set_X : {0}".format(test_set_x_orig.shape))
# (num of examples, (Height, Width px size of image), channels(RGB - 3 channels))
# Training_Set : 209 examples of (64, 64, 3) 3-Dimensional image data
# Test_Set : 50 examples of (64, 64, 3) 3-Dimensional image data
print("Training_Set_Y : {0}".format(train_set_y.shape))
print("Test_Set_Y : {0}".format(test_set_y.shape))
# result : 1 (Cat) or 0 (Non-Cat)
# 209 training examples
# 50 test examples
Reshape X Dataset
- 209 training examples of 64x64x3 image (64x64x3, 209)
train_set_x_flatten = train_set_x_orig.reshape(train_set_x_orig.shape[0], -1).T
test_set_x_flatten = test_set_x_orig.reshape(test_set_x_orig.shape[0], -1).T
print(train_set_x_flatten.shape)
print(test_set_x_flatten.shape)
print(train_set_x_flatten[0:5,0]) # sample check
# Normalization
# pixel range : 0~255
# convert all the values of matrix ranging from 0 ~ 1
train_set_x_flatten = train_set_x_flatten/255
test_set_x_flatten = test_set_x_flatten/255
print(train_set_x_flatten[0:5,0])
Building Learning Algorithm
General Architecture of the learning algorithm
- features : 12388 (64x6x3)
- activation function : logistic regression
- classifier : binary classifier (1 for cat, 0 for non-cat)
-
decision-threshold : 0.5
- Mathematical expression of the algorithm:
- Cost is then computed by summing over all training examples:
- Cost is then computed by summing over all training examples:
STEPS
- Define Model Structure
- Initialize Parameters
- Repeat Below
- Calculate current loss : Forward Propagation
- Caculate current gradient : Backward Propagation
- Update parameters by gradient descent
Helper functions
sigmoid function
def sigmoid(z):
return 1/(1 + np.exp(-z))
initialize parameters
# dim : 64x64x3
def init_params(dim):
w = np.zeros((dim, 1))
b = 0
assert(w.shape == (dim, 1))
assert(isinstance(b, float) or isinstance(b, int))
return w, b
Forward & Backward Propagation
def propagate(w, b, X, Y):
"""
Implement the cost function and its gradient for the propagation explained above
Arguments:
w -- weights, a numpy array of size (num_px * num_px * 3, 1)
b -- bias, a scalar
X -- data of size (num_px * num_px * 3, m)
Y -- true "label" vector (0 non-cat, 1 cat) of size (1, m)
Return:
cost -- negative log-likelihood cost for logistic regression
dw -- gradient of the loss with respect to w, thus same shape as w
db -- gradient of the loss with respect to b, thus same shape as b
"""
m = X.shape[1]
# forward
A = sigmoid(np.dot(w.T, X) + b)
cost = (-1/m)*(np.sum(Y*np.log(A)) + np.sum((1 - Y)*np.log(1 - A)))
# backward
dw = (1/m)*np.dot(X, (A - Y).T)
db = (1/m)*np.sum(A - Y)
assert(dw.shape == w.shape)
assert(db.dtype == float)
cost = np.squeeze(cost)
grads = {"dw" : dw,
"db" : db}
return grads, cost
Optimization
# Finding w that can minimizes Cost by Gradient Descent
def optimize(w, b, X, Y, num_iter, lr, print_cost = False):
"""
This function optimizes w and b by running a gradient descent algorithm
Arguments:
w, b, X, Y ~ same as above
num_iter -- number of iterations of iteration loop
lr -- regularization factor
print_cost -- True to print the loss every 100 steps
Returns:
params -- dictionary containing the weights w and bias b
grads -- dictionary containing the gradients of the weights and bias with respect to the cost function
costs -- list of all the costs computed during the optimization, this will be used to plot the learning curve.
"""
costs = []
cost = float('inf')
nan_cnt = 0
for k in range(num_iter):
print_cost = False
grads, cost_new = propagate(w, b, X, Y)
dw = grads["dw"]
db = grads["db"]
w -= lr*dw
b -= lr*db
if not np.isnan(cost_new) and cost_new < cost:
flag = 0
best_w = w
best_b = b
cost = cost_new
best_iter = k
else:
flag += 1
if k%100 == 0:
print_cost = True
if print_cost:
costs.append(cost_new)
print("Cost After {0}th iterations : {1}".format(k, cost_new))
if flag >= 2000:
return (best_iter, cost, costs, best_w, best_b, w, b)
return (best_iter, cost, costs, best_w, best_b, w, b)
predict cat(1) or Non-Cat(1)
def predict(best_w, best_b, X):
'''
Predict whether the label is 0 or 1 using optimized lr parameters (w, b)
Returns:
Y_prediction - a numpy array (vector) containing all predictions (0/1) for the examples in X
'''
A = sigmoid(np.dot(best_w.T, X) + best_b)
m = A.shape[1]
pred = np.zeros((1, m))
for i in range(m):
if A[0, i] >= 0.5:
pred[0, i] = 1
else:
pred[0, i] = 0
assert(pred.shape==(1, m))
return pred
Merge all functions into Final Model
def model(X_train, X_test, Y_train, Y_test, num_iter = 2000, lr = 0.5, print_cost = False):
"""
Builds the logistic regression model by calling the function you've implemented previously
Arguments:
X_train -- training set represented by a numpy array of shape (64*64*3, m_train)
Y_train -- training labels represented by a numpy array (vector) of shape (1, m_train)
X_test -- test set represented by a numpy array of shape (64*64*3, m_test)
Y_test -- test labels represented by a numpy array (vector) of shape (1, m_test)
Returns:
d -- dictionary containing information about the model.
"""
n_px = X_train.shape[0]
w, b = init_params(n_px)
print("< Train Dataset >")
best_iter, cost, costs, best_w, best_b, w, b = optimize(w, b, X_train, Y_train, num_iter, lr, print_cost)
w_test, b_test = init_params(n_px)
print("< Test Dataset >")
_, _, costs_test, _, _, _, _ = optimize(w_test, b_test, X_test, Y_test, num_iter, lr, print_cost)
pred_train = predict(best_w, best_b, X_train)
pred_test = predict(best_w, best_b, X_test)
print("Train Accuracy : {0}".format((1-np.mean(np.abs(Y_train - pred_train)))*100))
print("Test Accuracy : {0}".format((1-np.mean(np.abs(Y_test - pred_test)))*100))
d = {"cost" : cost,
"costs_test" : costs_test,
"costs" : costs,
"w" : w,
"b" : b,
"best_w" : best_w,
"best_b" : best_b,
"learning_rate" : lr,
"num_iter": num_iter,
"best_iter" : best_iter}
return d
Check Model Performace
d = model(train_set_x_flatten, test_set_x_flatten, train_set_y, test_set_y, 2000, 0.005, False)
# make sure that you set the appropriate learning rate
fig, ax = plt.subplots(figsize=(8, 5))
plt.plot(d['costs_test'], color='b', label="Test")
plt.plot(d['costs'], color='g', label="Train")
plt.legend(fontsize=15)
plt.xlabel("Iterations (per 100)", fontsize=15)
plt.ylabel("Cost", fontsize=15)
plt.title("Learning Rate : 0.005", fontsize=15)
plt.show()
How Learning Curve Differs by Learning Rate
from collections import defaultdict
lrs = [0.01, 0.005, 0.001, 0.0005]
res_by_lr = defaultdict(list)
for lr in lrs:
d = model(train_set_x_flatten, test_set_x_flatten, train_set_y, test_set_y, 2000, lr, False)
res_by_lr["{0}".format(lr)] = d["costs"]
fig, ax = plt.subplots(figsize=(8, 5))
colors = ['r', 'g', 'b', 'violet']
for i, (lr, costs) in enumerate(res_by_lr.items()):
plt.plot(costs, color=colors[i], label=lr)
plt.legend(fontsize=12)
plt.xlabel("Iterations (per 100)", fontsize=15)
plt.ylabel("Cost", fontsize=15)
plt.title("Leaning Curves by Different Learning Rate", fontsize=15)
ax.set_facecolor('w')
plt.show()
# turns out 0.01 is the best learning rate
Test With Your Own Image : Cat or Not Cat?
from PIL import Image
import matplotlib.image as img
from urllib.request import urlopen
def get_cat_img(url):
my_cat = Image.open(urlopen(url))
n_px = train_set_x_orig.shape[1]
catarray = np.asarray(my_cat)
cat = np.array(Image.fromarray(catarray).resize(size=(n_px, n_px))).reshape((1, -1)).T
return cat
# used model : lr - 0.01
d = model(train_set_x_flatten, test_set_x_flatten, train_set_y, test_set_y, 2000, 0.01, False)
best_w, best_b = d["best_w"], d["best_b"]
cats = ["https://user-images.githubusercontent.com/92680829/161882766-dfe0cd10-0ed3-4b87-9659-20c81c61b8b5.png",
"https://user-images.githubusercontent.com/92680829/161882868-1753eeba-42fc-479c-94ab-f9c3d87529e7.png",
"https://user-images.githubusercontent.com/92680829/161882894-904d0a81-1518-4b7b-82af-5329584b618b.png",
"https://user-images.githubusercontent.com/92680829/161882954-f852ced5-5596-4286-bc23-df45797a4142.png",
"https://user-images.githubusercontent.com/92680829/161883046-ab99cec6-2ce3-4d47-8e36-76085d929924.png",
"https://user-images.githubusercontent.com/92680829/161892955-84c22938-2008-43f6-8f8f-8930508ec0ec.png",
"https://user-images.githubusercontent.com/92680829/161892992-a5dca781-cfc0-48c3-89b6-2c5e90d322ef.png",
"https://user-images.githubusercontent.com/92680829/161893118-f505be6f-6dee-43a2-8fae-743195d4e2b5.png",
"https://user-images.githubusercontent.com/92680829/161893303-90588cc4-2e54-47e6-80ed-ed7360c4ec5f.png",
"https://user-images.githubusercontent.com/92680829/161893353-6ec427d8-8789-46c0-b9ac-3627aafc95ab.png",
"https://user-images.githubusercontent.com/92680829/161893642-c49cc4ee-7eb0-4321-8e93-2e7a253a9fca.png",
"https://user-images.githubusercontent.com/92680829/161893546-f969f22a-1003-495e-a0d3-30bbb9023ebd.png"]
Y = np.array([1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1]).reshape((1, -1))
m = Y.shape[1]
pred = np.zeros((m, 1)).T
for i in range(m):
cat = get_cat_img(cats[i])
pred[0, i] = predict(best_w, best_b, cat)
accuracy = (1-np.mean(np.abs(Y-pred)))*100
accuracy
print("정답 : {0} / 예측값 : {1}\n정확도 : {2}".format(Y, pred, accuracy))
- As logistic regression is not the best algorithm as an image classifier,
- the performance of model is not that great
- Later on, other algorithms with better formance on distinguishing images will be covered.