Deep learning & applications

Practice#1-3

  • Task: Binary classification using logistic regression (loss = binary cross entropy loss)
    Input: 2-dim vector, x={x1,x2}
    Output: label of the input, y∈{0,1}

STEP 1: Generate 10000(=m) train samples

m은 training 데이터의 크기. for문에서 2dim vector를 m개 생성.
x_train의 size : 5000(=m) x2 vectorization 위해 반복문 후 형태 변경

y_train도 마찬가지로 길이 5000 리스트였는데, reshape 해서 (5000,1)로 만들어줌

m = 10000

x1_train, x2_train, y_train = [], [], []
for i in range(m):
    x1_train.append(random.uniform(-10, 10))
    x2_train.append(random.uniform(-10, 10))
    if x1_train[-1] < -5 or x1_train[-1] > 5:
        y_train.append(1)
    else:
        y_train.append(0)

x_train = np.array([x1_train,x2_train]).T # np.vstack((x1_train, x2_train)).T 와 동일
y_train = np.array(y_train).reshape(-1, 1) 

STEP 2: W, b initialize

W1 = 첫번째 layer (2,2) W2 = 두번째 layer (2,1)

W1 = np.array([[2,-2],[1,1]])       # np.random.randn(2, 2)
b1 = np.array([[1,1]])              # np.ones((1,2))     # np.zeros((1, 2))
W2 = np.array([[10],[-10]])         # np.random.randn(2, 1)
b2 = np.array([1])                  # np.ones((1,1))     # np.zeros((1, 1))

STEP 3: Set learning rate (start with 0.01)

learning_rate = 1
epochs = 5000 

STEP 4: Update W&b with ‘m’ samples for 5000 (=K) iterations:

for epoch in range(epochs):
    Z1 = np.dot(x_train, W1) + b1
    A1 = sigmoid(Z1)
    Z2 = np.dot(A1, W2) + b2
    y_pred = sigmoid(Z2)

    cost = -(1 / y_train.shape[0]) * np.sum((y_train * np.log(y_pred)) + ((1 - y_train) * np.log(1 - y_pred)))

    # Backward
    dZ2 = y_pred - y_train
    dW2 = (1 / m) * np.dot(A1.T, dZ2)
    db2 = (1 / m) * np.sum(dZ2, axis=0, keepdims=True)
    dZ1 = np.dot(dZ2, W2.T) * A1 * (1 - A1)
    dW1 = (1 / m) * np.dot(x_train.T, dZ1)
    db1 = (1 / m) * np.sum(dZ1, axis=0, keepdims=True)

    W2 = W2 - learning_rate * dW2
    b2 = b2 - learning_rate * db2
    W1 = W1 - learning_rate * dW1
    b1 = b1 - learning_rate * db1

    if epoch % 500 == 499: #500 마다
        print(f"Accuracy with the {epoch+1} train samples: ", 100 - cost * 100)

        print(f"weight: {round(W1[0][0],2):5}, {round(W1[0][1],2):5}, {round(W1[1][0],2):5}, {round(W1[1][1],2):5}, {round(W2[0][0],2):5}, {round(W2[1][0],2):5} ")
        # print(f" +-   : {round(W1[0][0]-p_W1[0][0], 2):5}, {round(W1[0][1]-p_W1[0][1], 2):5}, {round(W1[1][0]--p_W1[1][0], 2):5}, {round(W1[1][1]-p_W1[1][1], 2):5}, {round(W2[0][0]-p_W2[0][0], 2):5}, {round(W2[1][0]-p_W2[1][0], 2):5} ")

        print(f"bias  : {round(b1[0][0],2):5}, {round(b1[0][1],2):5}, {round(b2[0][0],2):5}")
        # print(f" +-   : {round(b1[0][0]-p_b1[0][0], 2):5}, {round(b1[0][1]-p_b1[0][1], 2):5}, {round(b2[0][0]-p_b2[0][0], 2):5}")
        # print(b1, ", ", b2) # 출력하면 이런 형태    ->     [[-0.00083603 -0.00023414]] ,  [[0.03851352]]

        # for comparing update --> only to see changes (previous values)
        p_W2 = W2
        p_b2 = b2
        p_W1 = W1
        p_b1 = b1
        p_cost = cost


STEP 5: Test with test samples (!= train samples)

n = 1000
x1_test, x2_test, y_test = [], [], []
for i in range(m):
    x1_test.append(random.uniform(-10, 10))
    x2_test.append(random.uniform(-10, 10))
    if x1_test[-1] < -5 or x1_test[-1] > 5:
        y_test.append(1)
    else:
        y_test.append(0)

x_test = np.array([x1_test,x2_test]).T # np.vstack((x1_train, x2_train)).T 와 동일
y_test = np.array(y_test).reshape(-1, 1)

# Forward
Z1 = np.dot(x_test, W1) + b1
A1 = sigmoid(Z1)
Z2 = np.dot(A1, W2) + b2
y_pred = sigmoid(Z2)

# Calculate cost
cost = -(1 / y_test.shape[0]) * np.sum((y_test * np.log(y_pred)) + ((1 - y_test) * np.log(1 - y_pred)))

print(f"accuracy with the {n} test samples: ", cost*100)

Full code

import numpy as np
import random

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# === STEP 1 ===
# Generate 10000(=m) train samples
m = 10000

print(f"[{m=}, {n=}, {epochs=}]")
x1_train, x2_train, y_train = [], [], []
for i in range(m):
    x1_train.append(random.uniform(-10, 10))
    x2_train.append(random.uniform(-10, 10))
    if x1_train[-1] < -5 or x1_train[-1] > 5:
        y_train.append(1)
    else:
        y_train.append(0)

# print(x_train) # size : 5000(=m) x 2
x_train = np.array([x1_train,x2_train]).T # np.vstack((x1_train, x2_train)).T 와 동일
# print(len(y_train)) # 5000(=m)
y_train = np.array(y_train).reshape(-1, 1) # 5000 리스트를 (5000,1)로

# === STEP 2 ===
# W, b initialize
W1 = np.array([[2,-2],[1,1]]) # np.random.randn(2, 2) # 첫번째 layer (2,2)
b1 = np.array([[1,1]]) # np.ones((1,2)) # np.zeros((1, 2))
W2 = np.array([[10],[-10]]) # np.random.randn(2, 1) # 두번째 layer (2,1)
b2 = np.array([1]) # np.ones((1,1)) # np.zeros((1, 1))

# === STEP 3 ===
# set learning rate (start with 0.01)
learning_rate = 1
epochs = 5000 

# === STEP 4 ===
# Update W&b with ‘m’ samples for 5000 (=K) iterations:
for epoch in range(epochs):
    Z1 = np.dot(x_train, W1) + b1
    A1 = sigmoid(Z1)
    Z2 = np.dot(A1, W2) + b2
    y_pred = sigmoid(Z2)

    # print(y_Train.shape) #(1000, 1)
    cost = -(1 / y_train.shape[0]) * np.sum((y_train * np.log(y_pred)) + ((1 - y_train) * np.log(1 - y_pred)))

    # Backward
    dZ2 = y_pred - y_train
    dW2 = (1 / m) * np.dot(A1.T, dZ2)
    db2 = (1 / m) * np.sum(dZ2, axis=0, keepdims=True)
    dZ1 = np.dot(dZ2, W2.T) * A1 * (1 - A1)
    dW1 = (1 / m) * np.dot(x_train.T, dZ1)
    db1 = (1 / m) * np.sum(dZ1, axis=0, keepdims=True)

    if epoch == 0:
        p_cost = cost
    #     # 맨 밑에 print 하는 데에서 에포크 단위로 비교용 임시추가 내용인데 처음 값은 따로 필요해서
    #     p_W2 = W2
    #     p_b2 = b2
    #     p_W1 = W1
    #     p_b1 = b1

    W2 = W2 - learning_rate * dW2
    b2 = b2 - learning_rate * db2
    W1 = W1 - learning_rate * dW1
    b1 = b1 - learning_rate * db1

    # print("accuracy with the 'm' train samples: ", cost * 100)

    if epoch % 500 == 499: #500 마다
        print(f"Accuracy with the {epoch+1} train samples: ", 100 - cost * 100)

        # print(f"[[epoch{epoch:4}]] Cost: {cost:20} ({round(cost-p_cost,5):9})")
        # print("weight1:")
        # print(W1) #2x2 형태
        # print("weight2:")
        # print(W2) #2x1 형태

        print(f"weight: {round(W1[0][0],2):5}, {round(W1[0][1],2):5}, {round(W1[1][0],2):5}, {round(W1[1][1],2):5}, {round(W2[0][0],2):5}, {round(W2[1][0],2):5} ")
        # print(f" +-   : {round(W1[0][0]-p_W1[0][0], 2):5}, {round(W1[0][1]-p_W1[0][1], 2):5}, {round(W1[1][0]--p_W1[1][0], 2):5}, {round(W1[1][1]-p_W1[1][1], 2):5}, {round(W2[0][0]-p_W2[0][0], 2):5}, {round(W2[1][0]-p_W2[1][0], 2):5} ")

        print(f"bias  : {round(b1[0][0],2):5}, {round(b1[0][1],2):5}, {round(b2[0][0],2):5}")
        # print(f" +-   : {round(b1[0][0]-p_b1[0][0], 2):5}, {round(b1[0][1]-p_b1[0][1], 2):5}, {round(b2[0][0]-p_b2[0][0], 2):5}")
        # print(b1, ", ", b2) # 출력하면 이런 형태    ->     [[-0.00083603 -0.00023414]] ,  [[0.03851352]]

        # for comparing update --> only to see changes (previous values)
        p_W2 = W2
        p_b2 = b2
        p_W1 = W1
        p_b1 = b1
        p_cost = cost



# print("---------------------------------------------")
# TEST SAMPLES
n = 1000
x1_test, x2_test, y_test = [], [], []
for i in range(m):
    x1_test.append(random.uniform(-10, 10))
    x2_test.append(random.uniform(-10, 10))
    if x1_test[-1] < -5 or x1_test[-1] > 5:
        y_test.append(1)
    else:
        y_test.append(0)

x_test = np.array([x1_test,x2_test]).T
y_test = np.array(y_test).reshape(-1, 1)

# Forward
Z1 = np.dot(x_test, W1) + b1
A1 = sigmoid(Z1)
Z2 = np.dot(A1, W2) + b2
y_pred = sigmoid(Z2)

# Calculate cost
cost = -(1 / y_test.shape[0]) * np.sum((y_test * np.log(y_pred)) + ((1 - y_test) * np.log(1 - y_pred)))

print(f"accuracy with the {n} test samples: ", cost*100)

Hyperparameter 값을 결정하는데 테스트 해 보며 변경한 과정

1. weight와 learning rate 값은 고정 한 상태로, bias만 수정해보면서 결과 확인
    0부터 1, 0.5, 2 등등 세 bias 모두 같은 값 넣기도 하고,
    한 test 에서 1,1.2,1 처럼 다른 값을 넣어보기도 함.

    대괄호 안의 숫자는 test한 bias 값.
    그 우측의 숫자들이 위 지정한 test, train 개수로 한 번 실행시 나온 accuracy. 2회 이상 진행한건 평균값 우측에 추가로 적어두었음
    

    -------    [[   bias 값 test    ]]    -------
m=10000, n=1000, K=5000
W = [7,-1],[8,0.2],[[2],[15]]
learning_rate = 1
-> 고정값

    [              b = 0 0 0              ]                   2.3
Accuracy with the 4500 train samples:  97.3479143126963
weight:  2.64, -2.98,  0.07,  0.04, 12.92, 14.22 
bias  : -13.24, -14.95,  -6.8
accuracy with the 'n' test samples:  2.3972223143334195

    [              b = 1 1 1              ]                   2.7, 2.5        = > 2.6         ++++++++++++++++++
Accuracy with the 4500 train samples:  96.88883967188463
weight:  2.65, -3.14, -0.08, -0.05, 12.18, 13.54 
bias  : -13.02, -15.73, -6.45
accuracy with the 'n' test samples:  2.732941926772509

    [              b = 0.5 0.5 0.5              ]                   2.4, 2.6        = > 2.5
Accuracy with the 4500 train samples:  97.34066933769462
weight:  2.58, -2.93,  0.07, -0.04, 12.72, 13.96 
bias  : -12.7, -14.66, -6.67
accuracy with the 'n' test samples:  2.4993683983322668

    [              b = 2 2 2              ]                   2.5, 2.1, 2.6        = > 2.4
Accuracy with the 4500 train samples:  97.34773155976951
weight:  2.52, -2.86,  0.06,  -0.0, 12.43, 13.86 
bias  : -12.51, -14.41, -6.56
accuracy with the 'n' test samples:  2.515164605154023

    [              b = 1.5 1.5 1.5              ]                   2.4, 2.4        = > 2.4
Accuracy with the 4500 train samples:  97.28407463481226
weight:  2.66,  3.11, -0.05,  0.06, -13.04, 13.57 
bias  : 12.98, -15.3,  5.93
accuracy with the 'n' test samples:  2.4007455595020257

    [              b = 1.2 1.2 1.2              ]                   2.7, 2.6, 2.4, 2.6        = > 2.57x
Accuracy with the 4500 train samples:  97.07391249083688
weight:  2.64, -2.95,  0.08, -0.03, 12.41,  14.1 
bias  : -12.87, -14.78, -6.63
accuracy with the 'n' test samples:  2.721650293607963

    [              b = 1.3 1.3 1.3              ]                   2.2
Accuracy with the 4500 train samples:  97.17244148174133
weight:  2.69, -3.13,  0.08,  0.04, 12.37, 13.76 
bias  : -13.32, -15.6, -6.54
accuracy with the 'n' test samples:  2.2731789250549714

    [              b = 1 1.2 1              ]                   2.1, 3, 2.1        = > 2.4
Accuracy with the 4500 train samples:  97.82039034613317
weight:  2.57, -2.77, -0.05,  -0.0, 12.84, 14.12 
bias  : -12.73, -13.93, -6.67
accuracy with the 'n' test samples:  2.1830696116442008


2. 가장 결과가 잘나온 bias와 learning rate 값은 고정 한 상태로, weight만 수정해보면서 결과 확인
    마찬가지로 대괄호 안의 숫자는 test한 Weight 값.
    그 우측의 숫자들이 위 지정한 test, train 개수로 한 번 실행시 나온 accuracy.

    -------    [[   weight 값 test    ]]    -------
m=10000, n=1000, K=5000
b = 1 1 1
learning_rate = 1
    [        W = [7,-1],[8,0.2] [2],[15]       ]                   2.7, 2.5        = > 2.6
Accuracy with the 4500 train samples:  96.88883967188463
weight:  2.65, -3.14, -0.08, -0.05, 12.18, 13.54 
bias  : -13.02, -15.73, -6.45
accuracy with the 'n' test samples:  2.732941926772509
    [        W = [2,2],[1,1] [10],[10]       ]                   50.0, 48, 49.9        = > 49.3
Accuracy with the 4500 train samples:  50.07167770168171
weight: -2.21, -2.21,  -0.0,  -0.0,  4.72,  4.72 
bias  : -12.96, -12.96, -0.71
accuracy with the 'n' test samples:  50.191414687708516
    [        W = [2,-2],[1,1] [10],[10]       ]                   2.0, 2.2        = > 2.1
Accuracy with the 4500 train samples:  98.03402359062845
weight:  2.88, -2.64, -0.05,  -0.0, 13.84, 14.38 
bias  : -14.33, -13.18, -7.08
accuracy with the 'n' test samples:  2.001148041012408
    [        W = [2,-2],[1,1] [10],[-10]       ]                   69.3, 69.4, 69.3        = > 69.33         ++++++++++++++++++
Accuracy with the 4500 train samples:  30.70474952584206
weight:  0.43,  0.42,  4.44,  4.32,  8.35, -8.36 
bias  :  1.06,  1.13,  0.04
accuracy with the 'n' test samples:  69.33324802767214
    [        W = [2,-2],[0.1,0.1] [10],[-10]       ]                   69.3, 69.3, 69.4        = > 69.33
Accuracy with the 4500 train samples:  30.726603575221503
weight:  0.03,  0.03,  4.03,  3.79,  7.88, -7.89 
bias  :  1.29,  1.35,  0.02
accuracy with the 'n' test samples:  69.33830741722619
    [        W = [2,-2],[0.5,0.5] [10],[-10]       ]                   69.37, 69.32, 69.4        = > 69.33
Accuracy with the 4500 train samples:  30.716304241514052
weight: -0.06, -0.06,  4.17,  4.65,  8.07, -7.99 
bias  :  1.04,  1.15, -0.06
accuracy with the 'n' test samples:  69.37366500609663

3. 위 결과로 나온 weight와 bias 값은 고정 한 상태로, learning rate만 수정해보면서 결과 확인
    
    -------    [[   learning_rate 값 test    ]]    -------
m=10000, n=1000, K=5000
W = [[2,-2],[1,1]] [[10],[-10]]
b = 1 1 1
    [              learning_rate = 1              ]                   69.3, 69.4, 69.3        = > 69.33         ++++++++++++++++++
Accuracy with the 4500 train samples:  30.70474952584206
weight:  0.43,  0.42,  4.44,  4.32,  8.35, -8.36 
bias  :  1.06,  1.13,  0.04
accuracy with the 'n' test samples:  69.33324802767214
    [              learning_rate = 0.1              ]                   69.3, 69.3, 69.3        = > 69.3
Accuracy with the 4500 train samples:  30.729235611665345
weight: -0.01,  -0.0,  2.18,  2.21,  9.64, -9.72 
bias  :  0.98,  1.02,  0.07
accuracy with the 'n' test samples:  69.36540182058226
    [              learning_rate = 0.5              ]                   69.3, 69.4        = > 69.35
Accuracy with the 4500 train samples:  30.647608471484176
weight:  0.22,  0.25,  3.01,   3.0,  9.22, -9.24 
bias  :  1.11,  1.06,  0.01
accuracy with the 'n' test samples:  69.33170362384192
    [              learning_rate = 7              ]                   4.7, 5.4        = > 5.0
Accuracy with the 4500 train samples:  94.4415213248751
weight: -9.36,  -8.7, -0.37,   0.4,  9.86, -9.62 
bias  : -46.52, 43.54,  4.44
accuracy with the 'n' test samples:  4.73286587288952