Cat/Non-Cat Classifier with Logistic Regression

Load Dataset

import numpy as np
import matplotlib.pyplot as plt
import h5py      # required for interacting with files stored on h5 file
import scipy     # for test
from PIL import Image
from scipy import ndimage

%matplotlib inline

def load_dataset():
    with h5py.File('C:/Users\DNI_180902/Desktop/Data/DL-WK2/train_catvnoncat.h5', 'r') as train_dataset:
        train_set_x_orig = np.array(train_dataset["train_set_x"][:])
        train_set_y_orig = np.array(train_dataset["train_set_y"][:])

    with h5py.File('C:/Users\DNI_180902/Desktop/Data/DL-WK2/test_catvnoncat.h5', 'r') as test_dataset:
        test_set_x_orig = np.array(test_dataset["test_set_x"][:])
        test_set_y_orig = np.array(test_dataset["test_set_y"][:])
        classes = np.array(test_dataset["list_classes"][:])

    train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
    test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))

    return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes

train_set_x_orig, train_set_y, test_set_x_orig, test_set_y, classes = load_dataset()

print("Training_Set_X : {0}".format(train_set_x_orig.shape))
print("Test_Set_X : {0}".format(test_set_x_orig.shape))

# (num of examples, (Height, Width px size of image), channels(RGB - 3 channels))
# Training_Set : 209 examples of (64, 64, 3) 3-Dimensional image data
# Test_Set : 50 examples of (64, 64, 3) 3-Dimensional image data

print("Training_Set_Y : {0}".format(train_set_y.shape))
print("Test_Set_Y : {0}".format(test_set_y.shape))

# result : 1 (Cat) or 0 (Non-Cat)
# 209 training examples
# 50 test examples

Reshape X Dataset

209 training examples of 64x64x3 image (64x64x3, 209)

train_set_x_flatten = train_set_x_orig.reshape(train_set_x_orig.shape[0], -1).T
test_set_x_flatten = test_set_x_orig.reshape(test_set_x_orig.shape[0], -1).T
print(train_set_x_flatten.shape)
print(test_set_x_flatten.shape)

print(train_set_x_flatten[0:5,0])    # sample check

# Normalization
# pixel range : 0~255 
# convert all the values of matrix ranging from 0 ~ 1

train_set_x_flatten =  train_set_x_flatten/255
test_set_x_flatten = test_set_x_flatten/255
print(train_set_x_flatten[0:5,0])

Building Learning Algorithm

General Architecture of the learning algorithm

features : 12388 (64x6x3)
activation function : logistic regression
classifier : binary classifier (1 for cat, 0 for non-cat)
decision-threshold : 0.5
Mathematical expression of the algorithm:
- Cost is then computed by summing over all training examples:

STEPS

Define Model Structure
Initialize Parameters
Repeat Below
- Calculate current loss : Forward Propagation
- Caculate current gradient : Backward Propagation
- Update parameters by gradient descent

Helper functions

sigmoid function

def sigmoid(z):    
    return 1/(1 + np.exp(-z))

initialize parameters

# dim : 64x64x3

def init_params(dim):     
    w = np.zeros((dim, 1))
    b = 0
    
    assert(w.shape == (dim, 1))
    assert(isinstance(b, float) or isinstance(b, int))
    
    return w, b

Forward & Backward Propagation

def propagate(w, b, X, Y):
    """
    Implement the cost function and its gradient for the propagation explained above
    
    Arguments:
    w -- weights, a numpy array of size (num_px * num_px * 3, 1)
    b -- bias, a scalar
    X -- data of size (num_px * num_px * 3, m)
    Y -- true "label" vector (0 non-cat, 1 cat) of size (1, m)
    
    Return:
    cost -- negative log-likelihood cost for logistic regression
    dw -- gradient of the loss with respect to w, thus same shape as w
    db -- gradient of the loss with respect to b, thus same shape as b

    """
    
    m = X.shape[1]

    # forward
    A = sigmoid(np.dot(w.T, X) + b) 
    cost = (-1/m)*(np.sum(Y*np.log(A)) + np.sum((1 - Y)*np.log(1 - A)))
    
    # backward
    dw = (1/m)*np.dot(X, (A - Y).T)
    db = (1/m)*np.sum(A - Y)
    
    assert(dw.shape == w.shape)
    assert(db.dtype == float)
    cost = np.squeeze(cost)
    
    grads = {"dw" : dw,
            "db" : db}
    
    return grads, cost

Optimization

# Finding w that can minimizes Cost by Gradient Descent

def optimize(w, b, X, Y, num_iter, lr, print_cost = False):
    """
    This function optimizes w and b by running a gradient descent algorithm
    
    Arguments:
    w, b, X, Y ~ same as above
    num_iter -- number of iterations of iteration loop
    lr -- regularization factor
    print_cost -- True to print the loss every 100 steps
    
    Returns:
    params -- dictionary containing the weights w and bias b
    grads -- dictionary containing the gradients of the weights and bias with respect to the cost function
    costs -- list of all the costs computed during the optimization, this will be used to plot the learning curve.
    """
    
    costs = []
    cost = float('inf')
    nan_cnt = 0
    for k in range(num_iter):
        print_cost = False
        grads, cost_new = propagate(w, b, X, Y)
        dw = grads["dw"]
        db = grads["db"]
        w -= lr*dw
        b -= lr*db
        
        if not np.isnan(cost_new) and cost_new < cost:
            flag = 0
            best_w = w
            best_b = b
            cost = cost_new
            best_iter = k
        else:
            flag += 1
        
        if k%100 == 0:
            print_cost = True
        
        if print_cost:
            costs.append(cost_new)
            print("Cost After {0}th iterations : {1}".format(k, cost_new))
    
        if flag >= 2000:
            return (best_iter, cost, costs, best_w, best_b, w, b)

    return (best_iter, cost, costs, best_w, best_b, w, b)

predict cat(1) or Non-Cat(1)

def predict(best_w, best_b, X):
    '''
    Predict whether the label is 0 or 1 using optimized lr parameters (w, b)

    Returns:
    Y_prediction - a numpy array (vector) containing all predictions (0/1) for the examples in X
    '''
    
    A = sigmoid(np.dot(best_w.T, X) + best_b)
    m = A.shape[1]
    pred = np.zeros((1, m))
    
    for i in range(m):
        if A[0, i] >= 0.5:
            pred[0, i] = 1
        else:
            pred[0, i] = 0
            
    assert(pred.shape==(1, m))
    
    return pred

Merge all functions into Final Model

def model(X_train, X_test, Y_train, Y_test, num_iter = 2000, lr = 0.5, print_cost = False):
    """
    Builds the logistic regression model by calling the function you've implemented previously
    
    Arguments:
    X_train -- training set represented by a numpy array of shape (64*64*3, m_train)
    Y_train -- training labels represented by a numpy array (vector) of shape (1, m_train)
    X_test -- test set represented by a numpy array of shape (64*64*3, m_test)
    Y_test -- test labels represented by a numpy array (vector) of shape (1, m_test)
    
    Returns:
    d -- dictionary containing information about the model.
    """
    
    n_px = X_train.shape[0]
    w, b = init_params(n_px)
    
    print("< Train Dataset >")
    best_iter, cost, costs, best_w, best_b, w, b = optimize(w, b, X_train, Y_train, num_iter, lr, print_cost)
    
    w_test, b_test = init_params(n_px)
    print("< Test Dataset >")
    _, _, costs_test, _, _, _, _ = optimize(w_test, b_test, X_test, Y_test, num_iter, lr, print_cost)
    
    pred_train = predict(best_w, best_b, X_train)
    pred_test = predict(best_w, best_b, X_test)
    
    print("Train Accuracy : {0}".format((1-np.mean(np.abs(Y_train - pred_train)))*100))
    print("Test Accuracy : {0}".format((1-np.mean(np.abs(Y_test - pred_test)))*100))
    
    d = {"cost" : cost,
         "costs_test" : costs_test,
        "costs" : costs,
         "w" : w,
         "b" : b,
        "best_w" : best_w,
        "best_b" : best_b,
        "learning_rate" : lr,
        "num_iter": num_iter,
        "best_iter" : best_iter}
    
    return d

Check Model Performace

d = model(train_set_x_flatten, test_set_x_flatten, train_set_y, test_set_y, 2000, 0.005, False)

# make sure that you set the appropriate learning rate

fig, ax = plt.subplots(figsize=(8, 5))
plt.plot(d['costs_test'], color='b', label="Test")
plt.plot(d['costs'], color='g', label="Train")
plt.legend(fontsize=15)
plt.xlabel("Iterations (per 100)", fontsize=15)
plt.ylabel("Cost", fontsize=15)
plt.title("Learning Rate : 0.005", fontsize=15)
plt.show()

How Learning Curve Differs by Learning Rate

from collections import defaultdict

lrs = [0.01, 0.005, 0.001, 0.0005]
res_by_lr = defaultdict(list)

for lr in lrs:
    d = model(train_set_x_flatten, test_set_x_flatten, train_set_y, test_set_y, 2000, lr, False)
    res_by_lr["{0}".format(lr)] = d["costs"]

fig, ax = plt.subplots(figsize=(8, 5))
colors = ['r', 'g', 'b', 'violet']

for i, (lr, costs) in enumerate(res_by_lr.items()):
    plt.plot(costs, color=colors[i], label=lr)

plt.legend(fontsize=12)
plt.xlabel("Iterations (per 100)", fontsize=15)
plt.ylabel("Cost", fontsize=15)
plt.title("Leaning Curves by Different Learning Rate", fontsize=15)
ax.set_facecolor('w')
plt.show()

# turns out 0.01 is the best learning rate 

Test With Your Own Image : Cat or Not Cat?

from PIL import Image
import matplotlib.image as img
from urllib.request import urlopen

def get_cat_img(url):
    my_cat = Image.open(urlopen(url))

    n_px = train_set_x_orig.shape[1]
    catarray = np.asarray(my_cat)
    cat = np.array(Image.fromarray(catarray).resize(size=(n_px, n_px))).reshape((1, -1)).T
    
    return cat

# used model : lr - 0.01 

d = model(train_set_x_flatten, test_set_x_flatten, train_set_y, test_set_y, 2000, 0.01, False)
best_w, best_b = d["best_w"], d["best_b"]

cats = ["https://user-images.githubusercontent.com/92680829/161882766-dfe0cd10-0ed3-4b87-9659-20c81c61b8b5.png",
       "https://user-images.githubusercontent.com/92680829/161882868-1753eeba-42fc-479c-94ab-f9c3d87529e7.png",
       "https://user-images.githubusercontent.com/92680829/161882894-904d0a81-1518-4b7b-82af-5329584b618b.png",
       "https://user-images.githubusercontent.com/92680829/161882954-f852ced5-5596-4286-bc23-df45797a4142.png",
       "https://user-images.githubusercontent.com/92680829/161883046-ab99cec6-2ce3-4d47-8e36-76085d929924.png",
       "https://user-images.githubusercontent.com/92680829/161892955-84c22938-2008-43f6-8f8f-8930508ec0ec.png",
       "https://user-images.githubusercontent.com/92680829/161892992-a5dca781-cfc0-48c3-89b6-2c5e90d322ef.png",
       "https://user-images.githubusercontent.com/92680829/161893118-f505be6f-6dee-43a2-8fae-743195d4e2b5.png",
       "https://user-images.githubusercontent.com/92680829/161893303-90588cc4-2e54-47e6-80ed-ed7360c4ec5f.png",
       "https://user-images.githubusercontent.com/92680829/161893353-6ec427d8-8789-46c0-b9ac-3627aafc95ab.png",
       "https://user-images.githubusercontent.com/92680829/161893642-c49cc4ee-7eb0-4321-8e93-2e7a253a9fca.png",
       "https://user-images.githubusercontent.com/92680829/161893546-f969f22a-1003-495e-a0d3-30bbb9023ebd.png"]

Y = np.array([1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1]).reshape((1, -1))
m = Y.shape[1]
pred = np.zeros((m, 1)).T
for i in range(m):
    cat = get_cat_img(cats[i])
    pred[0, i] = predict(best_w, best_b, cat)

accuracy = (1-np.mean(np.abs(Y-pred)))*100
accuracy

print("정답 : {0} / 예측값 : {1}\n정확도 : {2}".format(Y, pred, accuracy))

As logistic regression is not the best algorithm as an image classifier,
the performance of model is not that great
Later on, other algorithms with better formance on distinguishing images will be covered.