ITE4053_DL > Practice#1-3 Binary classification using logistic regression
DeepLearning practice
Deep learning & applications
Practice#1-3
- Task: Binary classification using logistic regression (loss = binary cross entropy loss)
Input: 2-dim vector, x={x1,x2}
Output: label of the input, y∈{0,1}
STEP 1: Generate 10000(=m) train samples
m은 training 데이터의 크기. for문에서 2dim vector를 m개 생성.
x_train의 size : 5000(=m) x2
vectorization 위해 반복문 후 형태 변경
y_train도 마찬가지로 길이 5000 리스트였는데, reshape 해서 (5000,1)로 만들어줌
m = 10000
x1_train, x2_train, y_train = [], [], []
for i in range(m):
x1_train.append(random.uniform(-10, 10))
x2_train.append(random.uniform(-10, 10))
if x1_train[-1] < -5 or x1_train[-1] > 5:
y_train.append(1)
else:
y_train.append(0)
x_train = np.array([x1_train,x2_train]).T # np.vstack((x1_train, x2_train)).T 와 동일
y_train = np.array(y_train).reshape(-1, 1)
STEP 2: W, b initialize
W1 = 첫번째 layer (2,2) W2 = 두번째 layer (2,1)
W1 = np.array([[2,-2],[1,1]]) # np.random.randn(2, 2)
b1 = np.array([[1,1]]) # np.ones((1,2)) # np.zeros((1, 2))
W2 = np.array([[10],[-10]]) # np.random.randn(2, 1)
b2 = np.array([1]) # np.ones((1,1)) # np.zeros((1, 1))
STEP 3: Set learning rate (start with 0.01)
learning_rate = 1
epochs = 5000
STEP 4: Update W&b with ‘m’ samples for 5000 (=K) iterations:
for epoch in range(epochs):
Z1 = np.dot(x_train, W1) + b1
A1 = sigmoid(Z1)
Z2 = np.dot(A1, W2) + b2
y_pred = sigmoid(Z2)
cost = -(1 / y_train.shape[0]) * np.sum((y_train * np.log(y_pred)) + ((1 - y_train) * np.log(1 - y_pred)))
# Backward
dZ2 = y_pred - y_train
dW2 = (1 / m) * np.dot(A1.T, dZ2)
db2 = (1 / m) * np.sum(dZ2, axis=0, keepdims=True)
dZ1 = np.dot(dZ2, W2.T) * A1 * (1 - A1)
dW1 = (1 / m) * np.dot(x_train.T, dZ1)
db1 = (1 / m) * np.sum(dZ1, axis=0, keepdims=True)
W2 = W2 - learning_rate * dW2
b2 = b2 - learning_rate * db2
W1 = W1 - learning_rate * dW1
b1 = b1 - learning_rate * db1
if epoch % 500 == 499: #500 마다
print(f"Accuracy with the {epoch+1} train samples: ", 100 - cost * 100)
print(f"weight: {round(W1[0][0],2):5}, {round(W1[0][1],2):5}, {round(W1[1][0],2):5}, {round(W1[1][1],2):5}, {round(W2[0][0],2):5}, {round(W2[1][0],2):5} ")
# print(f" +- : {round(W1[0][0]-p_W1[0][0], 2):5}, {round(W1[0][1]-p_W1[0][1], 2):5}, {round(W1[1][0]--p_W1[1][0], 2):5}, {round(W1[1][1]-p_W1[1][1], 2):5}, {round(W2[0][0]-p_W2[0][0], 2):5}, {round(W2[1][0]-p_W2[1][0], 2):5} ")
print(f"bias : {round(b1[0][0],2):5}, {round(b1[0][1],2):5}, {round(b2[0][0],2):5}")
# print(f" +- : {round(b1[0][0]-p_b1[0][0], 2):5}, {round(b1[0][1]-p_b1[0][1], 2):5}, {round(b2[0][0]-p_b2[0][0], 2):5}")
# print(b1, ", ", b2) # 출력하면 이런 형태 -> [[-0.00083603 -0.00023414]] , [[0.03851352]]
# for comparing update --> only to see changes (previous values)
p_W2 = W2
p_b2 = b2
p_W1 = W1
p_b1 = b1
p_cost = cost
STEP 5: Test with test samples (!= train samples)
n = 1000
x1_test, x2_test, y_test = [], [], []
for i in range(m):
x1_test.append(random.uniform(-10, 10))
x2_test.append(random.uniform(-10, 10))
if x1_test[-1] < -5 or x1_test[-1] > 5:
y_test.append(1)
else:
y_test.append(0)
x_test = np.array([x1_test,x2_test]).T # np.vstack((x1_train, x2_train)).T 와 동일
y_test = np.array(y_test).reshape(-1, 1)
# Forward
Z1 = np.dot(x_test, W1) + b1
A1 = sigmoid(Z1)
Z2 = np.dot(A1, W2) + b2
y_pred = sigmoid(Z2)
# Calculate cost
cost = -(1 / y_test.shape[0]) * np.sum((y_test * np.log(y_pred)) + ((1 - y_test) * np.log(1 - y_pred)))
print(f"accuracy with the {n} test samples: ", cost*100)
Full code
import numpy as np
import random
def sigmoid(z):
return 1 / (1 + np.exp(-z))
# === STEP 1 ===
# Generate 10000(=m) train samples
m = 10000
print(f"[{m=}, {n=}, {epochs=}]")
x1_train, x2_train, y_train = [], [], []
for i in range(m):
x1_train.append(random.uniform(-10, 10))
x2_train.append(random.uniform(-10, 10))
if x1_train[-1] < -5 or x1_train[-1] > 5:
y_train.append(1)
else:
y_train.append(0)
# print(x_train) # size : 5000(=m) x 2
x_train = np.array([x1_train,x2_train]).T # np.vstack((x1_train, x2_train)).T 와 동일
# print(len(y_train)) # 5000(=m)
y_train = np.array(y_train).reshape(-1, 1) # 5000 리스트를 (5000,1)로
# === STEP 2 ===
# W, b initialize
W1 = np.array([[2,-2],[1,1]]) # np.random.randn(2, 2) # 첫번째 layer (2,2)
b1 = np.array([[1,1]]) # np.ones((1,2)) # np.zeros((1, 2))
W2 = np.array([[10],[-10]]) # np.random.randn(2, 1) # 두번째 layer (2,1)
b2 = np.array([1]) # np.ones((1,1)) # np.zeros((1, 1))
# === STEP 3 ===
# set learning rate (start with 0.01)
learning_rate = 1
epochs = 5000
# === STEP 4 ===
# Update W&b with ‘m’ samples for 5000 (=K) iterations:
for epoch in range(epochs):
Z1 = np.dot(x_train, W1) + b1
A1 = sigmoid(Z1)
Z2 = np.dot(A1, W2) + b2
y_pred = sigmoid(Z2)
# print(y_Train.shape) #(1000, 1)
cost = -(1 / y_train.shape[0]) * np.sum((y_train * np.log(y_pred)) + ((1 - y_train) * np.log(1 - y_pred)))
# Backward
dZ2 = y_pred - y_train
dW2 = (1 / m) * np.dot(A1.T, dZ2)
db2 = (1 / m) * np.sum(dZ2, axis=0, keepdims=True)
dZ1 = np.dot(dZ2, W2.T) * A1 * (1 - A1)
dW1 = (1 / m) * np.dot(x_train.T, dZ1)
db1 = (1 / m) * np.sum(dZ1, axis=0, keepdims=True)
if epoch == 0:
p_cost = cost
# # 맨 밑에 print 하는 데에서 에포크 단위로 비교용 임시추가 내용인데 처음 값은 따로 필요해서
# p_W2 = W2
# p_b2 = b2
# p_W1 = W1
# p_b1 = b1
W2 = W2 - learning_rate * dW2
b2 = b2 - learning_rate * db2
W1 = W1 - learning_rate * dW1
b1 = b1 - learning_rate * db1
# print("accuracy with the 'm' train samples: ", cost * 100)
if epoch % 500 == 499: #500 마다
print(f"Accuracy with the {epoch+1} train samples: ", 100 - cost * 100)
# print(f"[[epoch{epoch:4}]] Cost: {cost:20} ({round(cost-p_cost,5):9})")
# print("weight1:")
# print(W1) #2x2 형태
# print("weight2:")
# print(W2) #2x1 형태
print(f"weight: {round(W1[0][0],2):5}, {round(W1[0][1],2):5}, {round(W1[1][0],2):5}, {round(W1[1][1],2):5}, {round(W2[0][0],2):5}, {round(W2[1][0],2):5} ")
# print(f" +- : {round(W1[0][0]-p_W1[0][0], 2):5}, {round(W1[0][1]-p_W1[0][1], 2):5}, {round(W1[1][0]--p_W1[1][0], 2):5}, {round(W1[1][1]-p_W1[1][1], 2):5}, {round(W2[0][0]-p_W2[0][0], 2):5}, {round(W2[1][0]-p_W2[1][0], 2):5} ")
print(f"bias : {round(b1[0][0],2):5}, {round(b1[0][1],2):5}, {round(b2[0][0],2):5}")
# print(f" +- : {round(b1[0][0]-p_b1[0][0], 2):5}, {round(b1[0][1]-p_b1[0][1], 2):5}, {round(b2[0][0]-p_b2[0][0], 2):5}")
# print(b1, ", ", b2) # 출력하면 이런 형태 -> [[-0.00083603 -0.00023414]] , [[0.03851352]]
# for comparing update --> only to see changes (previous values)
p_W2 = W2
p_b2 = b2
p_W1 = W1
p_b1 = b1
p_cost = cost
# print("---------------------------------------------")
# TEST SAMPLES
n = 1000
x1_test, x2_test, y_test = [], [], []
for i in range(m):
x1_test.append(random.uniform(-10, 10))
x2_test.append(random.uniform(-10, 10))
if x1_test[-1] < -5 or x1_test[-1] > 5:
y_test.append(1)
else:
y_test.append(0)
x_test = np.array([x1_test,x2_test]).T
y_test = np.array(y_test).reshape(-1, 1)
# Forward
Z1 = np.dot(x_test, W1) + b1
A1 = sigmoid(Z1)
Z2 = np.dot(A1, W2) + b2
y_pred = sigmoid(Z2)
# Calculate cost
cost = -(1 / y_test.shape[0]) * np.sum((y_test * np.log(y_pred)) + ((1 - y_test) * np.log(1 - y_pred)))
print(f"accuracy with the {n} test samples: ", cost*100)
Hyperparameter 값을 결정하는데 테스트 해 보며 변경한 과정
1. weight와 learning rate 값은 고정 한 상태로, bias만 수정해보면서 결과 확인
0부터 1, 0.5, 2 등등 세 bias 모두 같은 값 넣기도 하고,
한 test 에서 1,1.2,1 처럼 다른 값을 넣어보기도 함.
대괄호 안의 숫자는 test한 bias 값.
그 우측의 숫자들이 위 지정한 test, train 개수로 한 번 실행시 나온 accuracy. 2회 이상 진행한건 평균값 우측에 추가로 적어두었음
------- [[ bias 값 test ]] -------
m=10000, n=1000, K=5000
W = [7,-1],[8,0.2],[[2],[15]]
learning_rate = 1
-> 고정값
[ b = 0 0 0 ] 2.3
Accuracy with the 4500 train samples: 97.3479143126963
weight: 2.64, -2.98, 0.07, 0.04, 12.92, 14.22
bias : -13.24, -14.95, -6.8
accuracy with the 'n' test samples: 2.3972223143334195
[ b = 1 1 1 ] 2.7, 2.5 = > 2.6 ++++++++++++++++++
Accuracy with the 4500 train samples: 96.88883967188463
weight: 2.65, -3.14, -0.08, -0.05, 12.18, 13.54
bias : -13.02, -15.73, -6.45
accuracy with the 'n' test samples: 2.732941926772509
[ b = 0.5 0.5 0.5 ] 2.4, 2.6 = > 2.5
Accuracy with the 4500 train samples: 97.34066933769462
weight: 2.58, -2.93, 0.07, -0.04, 12.72, 13.96
bias : -12.7, -14.66, -6.67
accuracy with the 'n' test samples: 2.4993683983322668
[ b = 2 2 2 ] 2.5, 2.1, 2.6 = > 2.4
Accuracy with the 4500 train samples: 97.34773155976951
weight: 2.52, -2.86, 0.06, -0.0, 12.43, 13.86
bias : -12.51, -14.41, -6.56
accuracy with the 'n' test samples: 2.515164605154023
[ b = 1.5 1.5 1.5 ] 2.4, 2.4 = > 2.4
Accuracy with the 4500 train samples: 97.28407463481226
weight: 2.66, 3.11, -0.05, 0.06, -13.04, 13.57
bias : 12.98, -15.3, 5.93
accuracy with the 'n' test samples: 2.4007455595020257
[ b = 1.2 1.2 1.2 ] 2.7, 2.6, 2.4, 2.6 = > 2.57x
Accuracy with the 4500 train samples: 97.07391249083688
weight: 2.64, -2.95, 0.08, -0.03, 12.41, 14.1
bias : -12.87, -14.78, -6.63
accuracy with the 'n' test samples: 2.721650293607963
[ b = 1.3 1.3 1.3 ] 2.2
Accuracy with the 4500 train samples: 97.17244148174133
weight: 2.69, -3.13, 0.08, 0.04, 12.37, 13.76
bias : -13.32, -15.6, -6.54
accuracy with the 'n' test samples: 2.2731789250549714
[ b = 1 1.2 1 ] 2.1, 3, 2.1 = > 2.4
Accuracy with the 4500 train samples: 97.82039034613317
weight: 2.57, -2.77, -0.05, -0.0, 12.84, 14.12
bias : -12.73, -13.93, -6.67
accuracy with the 'n' test samples: 2.1830696116442008
2. 가장 결과가 잘나온 bias와 learning rate 값은 고정 한 상태로, weight만 수정해보면서 결과 확인
마찬가지로 대괄호 안의 숫자는 test한 Weight 값.
그 우측의 숫자들이 위 지정한 test, train 개수로 한 번 실행시 나온 accuracy.
------- [[ weight 값 test ]] -------
m=10000, n=1000, K=5000
b = 1 1 1
learning_rate = 1
[ W = [7,-1],[8,0.2] [2],[15] ] 2.7, 2.5 = > 2.6
Accuracy with the 4500 train samples: 96.88883967188463
weight: 2.65, -3.14, -0.08, -0.05, 12.18, 13.54
bias : -13.02, -15.73, -6.45
accuracy with the 'n' test samples: 2.732941926772509
[ W = [2,2],[1,1] [10],[10] ] 50.0, 48, 49.9 = > 49.3
Accuracy with the 4500 train samples: 50.07167770168171
weight: -2.21, -2.21, -0.0, -0.0, 4.72, 4.72
bias : -12.96, -12.96, -0.71
accuracy with the 'n' test samples: 50.191414687708516
[ W = [2,-2],[1,1] [10],[10] ] 2.0, 2.2 = > 2.1
Accuracy with the 4500 train samples: 98.03402359062845
weight: 2.88, -2.64, -0.05, -0.0, 13.84, 14.38
bias : -14.33, -13.18, -7.08
accuracy with the 'n' test samples: 2.001148041012408
[ W = [2,-2],[1,1] [10],[-10] ] 69.3, 69.4, 69.3 = > 69.33 ++++++++++++++++++
Accuracy with the 4500 train samples: 30.70474952584206
weight: 0.43, 0.42, 4.44, 4.32, 8.35, -8.36
bias : 1.06, 1.13, 0.04
accuracy with the 'n' test samples: 69.33324802767214
[ W = [2,-2],[0.1,0.1] [10],[-10] ] 69.3, 69.3, 69.4 = > 69.33
Accuracy with the 4500 train samples: 30.726603575221503
weight: 0.03, 0.03, 4.03, 3.79, 7.88, -7.89
bias : 1.29, 1.35, 0.02
accuracy with the 'n' test samples: 69.33830741722619
[ W = [2,-2],[0.5,0.5] [10],[-10] ] 69.37, 69.32, 69.4 = > 69.33
Accuracy with the 4500 train samples: 30.716304241514052
weight: -0.06, -0.06, 4.17, 4.65, 8.07, -7.99
bias : 1.04, 1.15, -0.06
accuracy with the 'n' test samples: 69.37366500609663
3. 위 결과로 나온 weight와 bias 값은 고정 한 상태로, learning rate만 수정해보면서 결과 확인
------- [[ learning_rate 값 test ]] -------
m=10000, n=1000, K=5000
W = [[2,-2],[1,1]] [[10],[-10]]
b = 1 1 1
[ learning_rate = 1 ] 69.3, 69.4, 69.3 = > 69.33 ++++++++++++++++++
Accuracy with the 4500 train samples: 30.70474952584206
weight: 0.43, 0.42, 4.44, 4.32, 8.35, -8.36
bias : 1.06, 1.13, 0.04
accuracy with the 'n' test samples: 69.33324802767214
[ learning_rate = 0.1 ] 69.3, 69.3, 69.3 = > 69.3
Accuracy with the 4500 train samples: 30.729235611665345
weight: -0.01, -0.0, 2.18, 2.21, 9.64, -9.72
bias : 0.98, 1.02, 0.07
accuracy with the 'n' test samples: 69.36540182058226
[ learning_rate = 0.5 ] 69.3, 69.4 = > 69.35
Accuracy with the 4500 train samples: 30.647608471484176
weight: 0.22, 0.25, 3.01, 3.0, 9.22, -9.24
bias : 1.11, 1.06, 0.01
accuracy with the 'n' test samples: 69.33170362384192
[ learning_rate = 7 ] 4.7, 5.4 = > 5.0
Accuracy with the 4500 train samples: 94.4415213248751
weight: -9.36, -8.7, -0.37, 0.4, 9.86, -9.62
bias : -46.52, 43.54, 4.44
accuracy with the 'n' test samples: 4.73286587288952