A.I
딥러닝에 대한 개념 학습 본문
딥러닝¶
In [2]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
# MNIST 데이터를 로드
mnist = keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
# 모델에 맞게 데이터 가공
x_train_norm, x_test_norm = x_train / 255.0, x_test / 255.0
x_train_reshaped = x_train_norm.reshape(-1, x_train_norm.shape[1]*x_train_norm.shape[2])
x_test_reshaped = x_test_norm.reshape(-1, x_test_norm.shape[1]*x_test_norm.shape[2])
# 딥러닝 모델 구성 - 2 Layer Perceptron
model=keras.models.Sequential()
model.add(keras.layers.Dense(50, activation='sigmoid', input_shape=(784,))) # 입력층 d=784, 은닉층 레이어 H=50
model.add(keras.layers.Dense(10, activation='softmax')) # 출력층 레이어 K=10
model.summary()
# 모델 구성과 학습
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
model.fit(x_train_reshaped, y_train, epochs=10)
# 모델 테스트 결과
test_loss, test_accuracy = model.evaluate(x_test_reshaped,y_test, verbose=2)
print("test_loss: {} ".format(test_loss))
print("test_accuracy: {}".format(test_accuracy))
Model: "sequential" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= dense (Dense) (None, 50) 39250 _________________________________________________________________ dense_1 (Dense) (None, 10) 510 ================================================================= Total params: 39,760 Trainable params: 39,760 Non-trainable params: 0 _________________________________________________________________ Epoch 1/10 1875/1875 [==============================] - 2s 963us/step - loss: 0.4921 - accuracy: 0.8825 Epoch 2/10 1875/1875 [==============================] - 2s 984us/step - loss: 0.2319 - accuracy: 0.9340 Epoch 3/10 1875/1875 [==============================] - 2s 892us/step - loss: 0.1829 - accuracy: 0.9477 Epoch 4/10 1875/1875 [==============================] - 2s 968us/step - loss: 0.1526 - accuracy: 0.9566 Epoch 5/10 1875/1875 [==============================] - 2s 1ms/step - loss: 0.1307 - accuracy: 0.9624 Epoch 6/10 1875/1875 [==============================] - 2s 1ms/step - loss: 0.1149 - accuracy: 0.9672 Epoch 7/10 1875/1875 [==============================] - 2s 997us/step - loss: 0.1023 - accuracy: 0.9711 Epoch 8/10 1875/1875 [==============================] - 2s 962us/step - loss: 0.0917 - accuracy: 0.9741 Epoch 9/10 1875/1875 [==============================] - 2s 917us/step - loss: 0.0834 - accuracy: 0.9761 Epoch 10/10 1875/1875 [==============================] - 2s 920us/step - loss: 0.0762 - accuracy: 0.9789 313/313 - 0s - loss: 0.1030 - accuracy: 0.9696 test_loss: 0.10297449678182602 test_accuracy: 0.9696000218391418
MLP 기반 딥러닝 모델 확인¶
In [3]:
# 입력층 데이터의 모양(shape)
print(x_train_reshaped.shape)
X = x_train_reshaped[:5]
print(X.shape)
(60000, 784) (5, 784)
In [4]:
weight_init_std = 0.1
input_size = 784
hidden_size=50
# 인접 레이어간 관계를 나타내는 파라미터 W를 생성하고 random 초기화
W1 = weight_init_std * np.random.randn(input_size, hidden_size)
# 바이어스 파라미터 b를 생성하고 Zero로 초기화
b1 = np.zeros(hidden_size)
a1 = np.dot(X, W1) + b1 # 은닉층 출력
print(W1.shape)
print(b1.shape)
print(a1.shape)
(784, 50) (50,) (5, 50)
In [5]:
# 첫 번째 데이터의 은닉층 출력
a1[0]
Out[5]:
array([ 0.83304331, -0.33484131, -0.39018221, -0.21981622, 0.78830347, 0.52874596, 0.13625722, -0.78178909, -1.17307584, 0.03163056, 1.78907726, 0.7781907 , -1.0231879 , -0.92150396, 1.70690485, 0.30238039, -1.20816855, 0.82153135, -1.55829761, -0.23215501, -0.99573824, 1.53021968, 0.13433009, -0.4211904 , -0.56089689, 0.48230049, 0.11804121, -0.62485705, 0.08126223, -0.98865914, 1.38484073, -0.73179198, 0.11088799, -0.6466737 , -0.01272792, 0.36388412, 0.83044724, -0.35041306, -1.30759039, 1.30968908, -0.72670723, 1.44572124, 0.08968852, 0.7248693 , 1.45083707, -1.29125857, -1.57429086, -1.55545271, -0.86314572, 0.79103105])
In [6]:
# 위에서 쓴 sigmoid 함수를 구현
def sigmoid(x):
return 1 / (1 + np.exp(-x))
z1 = sigmoid(a1)
print(z1[0]) # sigmoid의 출력값은 모든 element가 0에서 1사이
[0.69699804 0.41706313 0.40367344 0.44526616 0.68746694 0.62919058 0.5340117 0.31393442 0.23629946 0.50790698 0.85681411 0.68529004 0.2644069 0.28465155 0.8464344 0.57502432 0.23002527 0.69456131 0.17389106 0.44222052 0.26978016 0.82203845 0.53353212 0.39623193 0.36333996 0.61829095 0.52947608 0.3486776 0.52030438 0.271177 0.79976732 0.32480161 0.52769363 0.3437395 0.49681806 0.58998034 0.69644949 0.41328226 0.21289034 0.78746112 0.32591772 0.80933906 0.52240711 0.67367837 0.81012723 0.21563986 0.17160556 0.17430012 0.29668253 0.68805267]
In [7]:
# 단일 레이어 구현 함수
def affine_layer_forward(X, W, b):
y = np.dot(X, W) + b
cache = (X, W, b)
return y, cache
print('go~')
go~
In [8]:
input_size = 784
hidden_size = 50
output_size = 10
W1 = weight_init_std * np.random.randn(input_size, hidden_size)
b1 = np.zeros(hidden_size)
W2 = weight_init_std * np.random.randn(hidden_size, output_size)
b2 = np.zeros(output_size)
a1, cache1 = affine_layer_forward(X, W1, b1)
z1 = sigmoid(a1)
a2, cache2 = affine_layer_forward(z1, W2, b2) # z1이 다시 두번째 레이어의 입력이 됩니다.
print(a2[0]) # 최종 출력이 output_size만큼의 벡터가 되었습니다.
[ 0.4259652 -0.09597473 0.18571308 -0.33984938 0.67078273 -0.26367004 0.29699598 0.71809307 -0.45386292 -0.10189889]
In [9]:
def softmax(x):
if x.ndim == 2:
x = x.T
x = x - np.max(x, axis=0)
y = np.exp(x) / np.sum(np.exp(x), axis=0)
return y.T
x = x - np.max(x) # 오버플로 대책
return np.exp(x) / np.sum(np.exp(x))
In [10]:
y_hat = softmax(a2)
y_hat[0] # 10개의 숫자 중 하나일 확률이 되었습니다.
Out[10]:
array([0.12743897, 0.07561825, 0.10022177, 0.05925339, 0.16278904, 0.06394366, 0.11201899, 0.17067573, 0.05286859, 0.0751716 ])
3. 손실함수¶
- 평균제곱오차 (MSE; Mean Square Error)
- 교차 엔트로피 (Cross Entropy)
- 손실 함수에 대한 설명
In [11]:
# 정답 라벨을 One-hot 인코딩하는 함수
def _change_ont_hot_label(X, num_category):
T = np.zeros((X.size, num_category))
for idx, row in enumerate(T):
row[X[idx]] = 1
return T
Y_digit = y_train[:5]
t = _change_ont_hot_label(Y_digit, 10)
t # 정답 라벨의 One-hot 인코딩
Out[11]:
array([[0., 0., 0., 0., 0., 1., 0., 0., 0., 0.], [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.], [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]])
In [12]:
print(y_hat[0])
print(t[0])
[0.12743897 0.07561825 0.10022177 0.05925339 0.16278904 0.06394366 0.11201899 0.17067573 0.05286859 0.0751716 ] [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
In [13]:
def cross_entropy_error(y, t):
if y.ndim == 1:
t = t.reshape(1, t.size)
y = y.reshape(1, y.size)
# 훈련 데이터가 원-핫 벡터라면 정답 레이블의 인덱스로 반환
if t.size == y.size:
t = t.argmax(axis=1)
batch_size = y.shape[0]
return -np.sum(np.log(y[np.arange(batch_size), t])) / batch_size
Loss = cross_entropy_error(y_hat, t)
Loss
Out[13]:
2.353749199389715
In [14]:
# 파라미터 W의 변화에 따른 오차(Loss) L의 변화량을 구하기위해
batch_num = y_hat.shape[0]
dy = (y_hat - t) / batch_num
dy # softmax값의 출력으로 Loss를 미분한 값
Out[14]:
array([[ 0.02548779, 0.01512365, 0.02004435, 0.01185068, 0.03255781, -0.18721127, 0.0224038 , 0.03413515, 0.01057372, 0.01503432], [-0.17139518, 0.01589273, 0.01898284, 0.01164563, 0.0373128 , 0.01339288, 0.01711081, 0.03056354, 0.01069386, 0.01580009], [ 0.02825251, 0.01651377, 0.02774657, 0.01486045, -0.16930969, 0.0112018 , 0.0193426 , 0.02595432, 0.00926685, 0.01617081], [ 0.02836764, -0.1837924 , 0.02932118, 0.01080564, 0.028996 , 0.0132958 , 0.01823187, 0.03075256, 0.00924845, 0.01477328], [ 0.03132472, 0.01730697, 0.02524947, 0.01238103, 0.02892122, 0.01126267, 0.0184168 , 0.03182866, 0.00969207, -0.18638361]])
In [15]:
dW2 = np.dot(z1.T, dy)
dW2
Out[15]:
array([[-0.04405911, -0.02942345, 0.04741396, 0.02472017, -0.00193228, -0.07130888, 0.0387122 , 0.06245756, 0.02022765, -0.04680781], [-0.02303975, -0.07704984, 0.07489135, 0.03767795, -0.05041958, -0.0633212 , 0.05768242, 0.09199641, 0.02978419, -0.07820195], [-0.01551123, -0.07817405, 0.07076863, 0.03542435, -0.03348633, -0.06755538, 0.05468906, 0.0875514 , 0.02819673, -0.08190318], [ 0.01056445, -0.09592198, 0.05674196, 0.02739568, -0.02045996, -0.04662121, 0.04265934, 0.06843645, 0.02181038, -0.06460511], [-0.06663682, -0.05109896, 0.04974773, 0.02556815, 0.04396648, -0.07677165, 0.04095063, 0.06773088, 0.02172973, -0.05518616], [-0.01370582, -0.08590786, 0.06774397, 0.03365627, -0.01320068, -0.07176213, 0.0525288 , 0.08460058, 0.02709787, -0.08105101], [-0.02567985, -0.09202596, 0.06969696, 0.03474414, -0.05034865, -0.05650327, 0.05331649, 0.08486485, 0.02752099, -0.04558569], [-0.05465115, -0.07644615, 0.05702957, 0.0284699 , -0.04426831, -0.01041075, 0.04275734, 0.0687734 , 0.02254747, -0.03380132], [-0.04194882, -0.09956559, 0.0884548 , 0.04456262, -0.00431877, -0.10950656, 0.06988812, 0.11286402, 0.03624493, -0.09667477], [-0.01809662, -0.06639431, 0.04553685, 0.02266863, 0.01993099, -0.07341571, 0.03645877, 0.05927362, 0.01884958, -0.04481182], [-0.04068065, -0.04090025, 0.06507062, 0.03315713, -0.06352742, -0.01424697, 0.04918404, 0.07872216, 0.02577267, -0.09255132], [-0.03192014, -0.01543423, 0.05541616, 0.02894449, 0.01542649, -0.08896432, 0.04552393, 0.0740204 , 0.02371322, -0.10672601], [-0.00079428, -0.05990623, 0.03215811, 0.0154759 , 0.02148247, -0.04209253, 0.02498701, 0.04100939, 0.01288715, -0.045207 ], [-0.03726336, -0.11225564, 0.06727157, 0.03311352, 0.04965364, -0.09633577, 0.05343797, 0.08786171, 0.0278555 , -0.07333913], [-0.03356125, -0.05062263, 0.06296269, 0.03225101, -0.05721622, -0.05813082, 0.04908238, 0.07774884, 0.02538995, -0.04790396], [-0.00521945, -0.09535294, 0.06212535, 0.03033525, 0.01537451, -0.06847445, 0.048024 , 0.07821864, 0.0247939 , -0.08982481], [-0.03959003, -0.08326267, 0.07742889, 0.03907862, -0.00411108, -0.09227943, 0.06114537, 0.09883518, 0.03176269, -0.08900752], [ 0.03534535, -0.01556968, 0.03675891, 0.01871921, -0.05162197, -0.05411327, 0.02864232, 0.04414768, 0.01418272, -0.05649128], [ 0.011177 , -0.05229366, 0.0328449 , 0.01562001, 0.01081033, -0.01776854, 0.02440879, 0.04019599, 0.01260661, -0.07760142], [-0.05586135, -0.08429997, 0.08626369, 0.04363501, -0.03527093, -0.06596147, 0.06685389, 0.10775469, 0.03492242, -0.09803599], [-0.03888991, -0.07228484, 0.07705942, 0.03923658, 0.01116327, -0.11569482, 0.06202983, 0.10039072, 0.03216517, -0.09517543], [ 0.02269694, -0.09965655, 0.05316519, 0.02501592, -0.03956919, -0.00195104, 0.03784949, 0.06070594, 0.01936868, -0.07762538], [-0.0283477 , -0.06544274, 0.05713148, 0.02886554, 0.01617911, -0.08998548, 0.04599618, 0.07459303, 0.02383474, -0.06282415], [-0.02907268, -0.00123515, 0.04913674, 0.02573067, -0.05068228, -0.02590918, 0.03816929, 0.06082697, 0.01993662, -0.086901 ], [-0.07593545, -0.07743484, 0.07036381, 0.03575266, -0.00820221, -0.05977452, 0.05525055, 0.08976576, 0.02915209, -0.05893785], [-0.02322762, -0.04256814, 0.0634469 , 0.0331934 , -0.01213862, -0.14868891, 0.05319003, 0.08421794, 0.02711512, -0.0345401 ], [-0.07845744, -0.06634357, 0.06041476, 0.03107719, 0.03946313, -0.09811027, 0.04974285, 0.08161874, 0.02626978, -0.04567516], [-0.04335078, -0.05779747, 0.06158621, 0.03127795, -0.05135098, -0.03581794, 0.04725147, 0.07543114, 0.02466228, -0.05189187], [ 0.03439556, -0.08537352, 0.07417752, 0.03655782, -0.05520276, -0.07079851, 0.05630087, 0.08917374, 0.02850522, -0.10773593], [-0.05429937, -0.09392963, 0.07691725, 0.03842175, -0.00226747, -0.05638672, 0.0594378 , 0.09687411, 0.03120166, -0.09596937], [-0.05777349, -0.05345405, 0.05088205, 0.02565906, 0.00239637, -0.02170071, 0.03923698, 0.0645367 , 0.02090937, -0.07069228], [-0.05676609, -0.06219187, 0.06987058, 0.03596977, 0.01463237, -0.11363685, 0.05703107, 0.09237515, 0.02972492, -0.06700906], [-0.02399746, -0.0452936 , 0.0288958 , 0.0141831 , 0.00027398, -0.01095111, 0.02183351, 0.03581146, 0.01154174, -0.03229743], [-0.05441575, -0.10327903, 0.06652316, 0.03292566, 0.0634262 , -0.0864808 , 0.05299343, 0.08794353, 0.02790565, -0.08754206], [-0.03526508, -0.06363533, 0.07394408, 0.03736402, -0.0319309 , -0.05191741, 0.05703775, 0.09196274, 0.02972977, -0.10728965], [-0.02099984, -0.02441642, 0.06146249, 0.03161328, -0.05324182, -0.04150567, 0.04752443, 0.07576262, 0.02464344, -0.10084252], [-0.00026676, -0.07374754, 0.07462398, 0.03769467, -0.07759834, -0.0834962 , 0.05780304, 0.09068123, 0.02941154, -0.05510563], [ 0.02228392, -0.08183186, 0.05711986, 0.02814529, -0.01574035, -0.09206722, 0.04478135, 0.07113776, 0.02258989, -0.05641863], [-0.06475535, -0.07899738, 0.06076267, 0.03067975, 0.02595418, -0.07393563, 0.04854797, 0.07958653, 0.02559733, -0.05344008], [-0.06116328, -0.07227112, 0.06300767, 0.03182728, 0.03586096, -0.07358729, 0.05031706, 0.08291331, 0.02656221, -0.0834668 ], [-0.0756882 , -0.02817751, 0.06462652, 0.03370535, 0.03404401, -0.08014857, 0.0527364 , 0.08692628, 0.02798756, -0.11601185], [-0.08438394, -0.06596898, 0.05110598, 0.02582386, 0.00021228, -0.01539536, 0.03940112, 0.06488785, 0.02121747, -0.03690028], [-0.07731076, -0.0445813 , 0.0709998 , 0.03735796, -0.0037647 , -0.12615892, 0.05891363, 0.09465299, 0.03074893, -0.04085765], [-0.01269397, -0.03032252, 0.07436693, 0.0385365 , -0.07703363, -0.08660162, 0.05864194, 0.09223416, 0.0300195 , -0.0871473 ], [-0.01770699, -0.06419533, 0.04007467, 0.01968058, -0.00280464, -0.03520843, 0.03076415, 0.04985307, 0.01598922, -0.0364463 ], [-0.08119112, -0.06097204, 0.04746919, 0.02416101, 0.00186554, -0.02802499, 0.03719903, 0.06103121, 0.01996738, -0.02150523], [-0.0408669 , -0.00098092, 0.03941666, 0.02104095, -0.03428059, -0.04167877, 0.03177377, 0.05052557, 0.01663869, -0.04158847], [ 0.00467816, -0.09353664, 0.04470106, 0.02125076, -0.02139366, -0.02885243, 0.03304529, 0.05289863, 0.01690731, -0.02969848], [-0.05706611, -0.03161152, 0.05225415, 0.02715286, -0.01732355, -0.04974803, 0.04161091, 0.06719218, 0.02192218, -0.05438306], [-0.08765079, -0.04181378, 0.05434736, 0.02843106, -0.01183802, -0.05525137, 0.04378815, 0.07092079, 0.02328758, -0.02422099]])
In [16]:
dW2 = np.dot(z1.T, dy)
db2 = np.sum(dy, axis=0)
In [17]:
# 활성화함수에 대한 gradient 설정 함수
def sigmoid_grad(x):
return (1.0 - sigmoid(x)) * sigmoid(x)
In [18]:
dz1 = np.dot(dy, W2.T)
da1 = sigmoid_grad(a1) * dz1
dW1 = np.dot(X.T, da1)
db1 = np.sum(dz1, axis=0)
In [20]:
learning_rate = 0.1
def update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, learning_rate):
W1 = W1 - learning_rate*dW1
b1 = b1 - learning_rate*db1
W2 = W2 - learning_rate*dW2
b2 = b2 - learning_rate*db2
return W1, b1, W2, b2
5. 오차역전파¶
- Output layer의 결과와 target 값과의 차이를 구하여 그 오차값을 각 레이어들을 지나며 역전파해가며 각 노드가 가지고 있는 변수들을 갱신해 나가는 방식
In [21]:
def affine_layer_backward(dy, cache):
X, W, b = cache
dX = np.dot(dy, W.T)
dW = np.dot(X.T, dy)
db = np.sum(dy, axis=0)
return dX, dW, db
In [22]:
# 파라미터 초기화
W1 = weight_init_std * np.random.randn(input_size, hidden_size)
b1 = np.zeros(hidden_size)
W2 = weight_init_std * np.random.randn(hidden_size, output_size)
b2 = np.zeros(output_size)
# Forward Propagation
a1, cache1 = affine_layer_forward(X, W1, b1)
z1 = sigmoid(a1)
a2, cache2 = affine_layer_forward(z1, W2, b2)
# 추론과 오차(Loss) 계산
y_hat = softmax(a2)
t = _change_ont_hot_label(Y_digit, 10) # 정답 One-hot 인코딩
Loss = cross_entropy_error(y_hat, t)
print(y_hat)
print(t)
print('Loss: ', Loss)
dy = (y_hat - t) / X.shape[0]
dz1, dW2, db2 = affine_layer_backward(dy, cache2)
da1 = sigmoid_grad(a1) * dz1
dX, dW1, db1 = affine_layer_backward(da1, cache1)
# 경사하강법을 통한 파라미터 업데이트
learning_rate = 0.1
W1, b1, W2, b2 = update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, learning_rate)
[[0.12930815 0.09829531 0.11104748 0.09021649 0.07795578 0.10833469 0.08864689 0.11143967 0.07095788 0.11379767] [0.11791621 0.10896692 0.10385182 0.09150588 0.06490006 0.11537603 0.10121495 0.09938909 0.06365394 0.13322511] [0.14120447 0.08546109 0.11004187 0.0861622 0.08605174 0.09880557 0.09491945 0.10839974 0.06509522 0.12385864] [0.12565919 0.09741781 0.13289147 0.09909126 0.08058691 0.12668914 0.08574112 0.08283188 0.05674557 0.11234565] [0.13910767 0.08830144 0.1253309 0.09276794 0.06393984 0.09389943 0.09809313 0.1092199 0.07275356 0.11658619]] [[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.] [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.] [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]] Loss: 2.258197606990804
6. 모델 학습¶
In [23]:
W1 = weight_init_std * np.random.randn(input_size, hidden_size)
b1 = np.zeros(hidden_size)
W2 = weight_init_std * np.random.randn(hidden_size, output_size)
b2 = np.zeros(output_size)
def train_step(X, Y, W1, b1, W2, b2, learning_rate=0.1, verbose=False):
a1, cache1 = affine_layer_forward(X, W1, b1)
z1 = sigmoid(a1)
a2, cache2 = affine_layer_forward(z1, W2, b2)
y_hat = softmax(a2)
t = _change_ont_hot_label(Y, 10)
Loss = cross_entropy_error(y_hat, t)
if verbose:
print('---------')
print(y_hat)
print(t)
print('Loss: ', Loss)
dy = (y_hat - t) / X.shape[0]
dz1, dW2, db2 = affine_layer_backward(dy, cache2)
da1 = sigmoid_grad(a1) * dz1
dX, dW1, db1 = affine_layer_backward(da1, cache1)
W1, b1, W2, b2 = update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, learning_rate)
return W1, b1, W2, b2, Loss
In [24]:
X = x_train_reshaped[:5]
Y = y_train[:5]
# train_step을 다섯 번 반복
for i in range(5):
W1, b1, W2, b2, _ = train_step(X, Y, W1, b1, W2, b2, learning_rate=0.1, verbose=True)
--------- [[0.0769323 0.06786783 0.07710978 0.0787273 0.06839531 0.08483743 0.09138769 0.1141673 0.25698985 0.08358521] [0.06871725 0.06516246 0.07231807 0.09975728 0.06817097 0.09006803 0.08404206 0.12252178 0.2342073 0.09503481] [0.07314624 0.06901302 0.08859535 0.10106858 0.06745253 0.10191554 0.08955639 0.1022239 0.2111489 0.09587955] [0.07163193 0.06681487 0.07058345 0.10056004 0.0759144 0.09374417 0.08853415 0.11541788 0.21940534 0.09739378] [0.05768925 0.05818792 0.08077219 0.10604086 0.0644009 0.09057519 0.09867116 0.11541322 0.23797793 0.0902714 ]] [[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.] [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.] [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]] Loss: 2.59037379084016 --------- [[0.09982387 0.08524669 0.07137648 0.07171177 0.08455972 0.10838618 0.08273979 0.10090578 0.19178545 0.10346426] [0.09104759 0.0810534 0.06614932 0.09081166 0.08357703 0.11159395 0.07571677 0.10797339 0.17550037 0.11657652] [0.08968297 0.08484366 0.08134144 0.09129991 0.08598761 0.12109592 0.0809705 0.08979929 0.15844348 0.11653521] [0.08871429 0.08556327 0.06466982 0.09141659 0.09275914 0.1123344 0.08074819 0.10119982 0.16379908 0.11879539] [0.07411776 0.0745085 0.07444705 0.09726712 0.08175143 0.11205036 0.09005344 0.1019109 0.17452911 0.11936434]] [[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.] [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.] [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]] Loss: 2.331210731810014 --------- [[0.11978457 0.09960087 0.06379208 0.06329988 0.09736983 0.12821171 0.07265296 0.08719163 0.15005846 0.11803801] [0.11195872 0.0941293 0.05870457 0.0804016 0.09580716 0.12862908 0.0664393 0.09339916 0.13821601 0.13231511] [0.10307867 0.09825538 0.07283883 0.08069095 0.10335915 0.13484212 0.07163667 0.07776333 0.12520362 0.13233127] [0.10273831 0.102994 0.057658 0.08114329 0.10664108 0.12586465 0.071947 0.08732741 0.12863492 0.13505133] [0.08785387 0.08850809 0.06618458 0.0862777 0.09640938 0.12789642 0.07953028 0.08782622 0.13447067 0.14504278]] [[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.] [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.] [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]] Loss: 2.1434108018471085 --------- [[0.13676185 0.11130791 0.05639839 0.05539684 0.10737242 0.14480945 0.063295 0.07519158 0.12135157 0.12811499] [0.13119069 0.10474272 0.0516617 0.07068009 0.10535933 0.1419242 0.05794491 0.08076479 0.11259622 0.14313535] [0.11355734 0.10954352 0.06482709 0.07104003 0.11970754 0.14426883 0.06313541 0.06744861 0.10247638 0.14399526] [0.11390004 0.11923843 0.05103699 0.07167295 0.11802658 0.13535777 0.06380911 0.07542348 0.10457429 0.14696037] [0.09882734 0.10030369 0.05815476 0.07577233 0.10858931 0.13896606 0.0695727 0.07543814 0.10716178 0.16721389]] [[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.] [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.] [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]] Loss: 2.0002510638391406 --------- [[0.15099064 0.12079059 0.04984367 0.04853024 0.1151134 0.15891421 0.05523114 0.06521714 0.10064015 0.13472881] [0.14873706 0.11327603 0.04550561 0.06223028 0.11272532 0.15236652 0.05065288 0.07028285 0.09406957 0.15015389] [0.12147272 0.11900458 0.05780159 0.06275681 0.13520771 0.1504851 0.05582911 0.05892297 0.08609198 0.15242742] [0.12253596 0.13446358 0.04523439 0.06348773 0.12735283 0.14184494 0.05675968 0.06559176 0.0872375 0.15549164] [0.10725346 0.11014528 0.05105279 0.06656302 0.11862939 0.14636172 0.06089633 0.0651109 0.0876069 0.18638021]] [[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.] [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.] [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]] Loss: 1.8864675156190913
모델이 추론한 확률값 yhat이 t값에 근접해가며, Loss값이 줄어드는 것을 확인할 수 있다¶
7. 추론과정 구현 및 정확도 계산¶
In [25]:
def predict(W1, b1, W2, b2, X):
a1 = np.dot(X, W1) + b1
z1 = sigmoid(a1)
a2 = np.dot(z1, W2) + b2
y = softmax(a2)
return y
In [26]:
# X = x_train[:100] 에 대해 모델 추론을 시도합니다.
X = x_train_reshaped[:100]
Y = y_test[:100]
result = predict(W1, b1, W2, b2, X)
result[0]
Out[26]:
array([0.16282846, 0.1284271 , 0.04423717, 0.04273124, 0.12105069, 0.17123141, 0.04846323, 0.05704807, 0.08516187, 0.13882077])
In [27]:
def accuracy(W1, b1, W2, b2, x, y):
y_hat = predict(W1, b1, W2, b2, x)
y_hat = np.argmax(y_hat, axis=1)
# t = np.argmax(t, axis=1)
accuracy = np.sum(y_hat == y) / float(x.shape[0])
return accuracy
In [28]:
acc = accuracy(W1, b1, W2, b2, X, Y)
t = _change_ont_hot_label(Y, 10)
print(result[0])
print(t[0])
print(acc)
[0.16282846 0.1284271 0.04423717 0.04273124 0.12105069 0.17123141 0.04846323 0.05704807 0.08516187 0.13882077] [0. 0. 0. 0. 0. 0. 0. 1. 0. 0.] 0.12
전체 학습 사이클 수행¶
In [29]:
def init_params(input_size, hidden_size, output_size, weight_init_std=0.01):
W1 = weight_init_std * np.random.randn(input_size, hidden_size)
b1 = np.zeros(hidden_size)
W2 = weight_init_std * np.random.randn(hidden_size, output_size)
b2 = np.zeros(output_size)
print(W1.shape)
print(b1.shape)
print(W2.shape)
print(b2.shape)
return W1, b1, W2, b2
In [30]:
# 하이퍼파라미터
iters_num = 50000 # 반복 횟수를 적절히 설정한다.
train_size = x_train.shape[0]
batch_size = 100 # 미니배치 크기
learning_rate = 0.1
train_loss_list = []
train_acc_list = []
test_acc_list = []
# 1에폭당 반복 수
iter_per_epoch = max(train_size / batch_size, 1)
W1, b1, W2, b2 = init_params(784, 50, 10)
for i in range(iters_num):
# 미니배치 획득
batch_mask = np.random.choice(train_size, batch_size)
x_batch = x_train_reshaped[batch_mask]
y_batch = y_train[batch_mask]
W1, b1, W2, b2, Loss = train_step(x_batch, y_batch, W1, b1, W2, b2, learning_rate=0.1, verbose=False)
# 학습 경과 기록
train_loss_list.append(Loss)
# 1에폭당 정확도 계산
if i % iter_per_epoch == 0:
print('Loss: ', Loss)
train_acc = accuracy(W1, b1, W2, b2, x_train_reshaped, y_train)
test_acc = accuracy(W1, b1, W2, b2, x_test_reshaped, y_test)
train_acc_list.append(train_acc)
test_acc_list.append(test_acc)
print("train acc, test acc | " + str(train_acc) + ", " + str(test_acc))
(784, 50) (50,) (50, 10) (10,) Loss: 2.3066670903282596 train acc, test acc | 0.0993, 0.1032 Loss: 0.8416695553955978 train acc, test acc | 0.79135, 0.7955 Loss: 0.5435052868438864 train acc, test acc | 0.8727833333333334, 0.8757 Loss: 0.48855941880149645 train acc, test acc | 0.89655, 0.898 Loss: 0.3573124882644306 train acc, test acc | 0.9067833333333334, 0.9095 Loss: 0.37726690939106944 train acc, test acc | 0.91375, 0.9157 Loss: 0.18625206454133664 train acc, test acc | 0.91915, 0.922 Loss: 0.19877949051592408 train acc, test acc | 0.9225, 0.9235 Loss: 0.3397377735735032 train acc, test acc | 0.92625, 0.9276 Loss: 0.26860296716825277 train acc, test acc | 0.9295833333333333, 0.9309 Loss: 0.20192135784466964 train acc, test acc | 0.9330666666666667, 0.9338 Loss: 0.290427318542916 train acc, test acc | 0.9356166666666667, 0.9363 Loss: 0.2494855277797229 train acc, test acc | 0.93815, 0.9378 Loss: 0.34551740177514084 train acc, test acc | 0.94055, 0.9405 Loss: 0.10435688613536928 train acc, test acc | 0.94225, 0.9417 Loss: 0.1480011273572673 train acc, test acc | 0.9445166666666667, 0.9435 Loss: 0.12326440003719803 train acc, test acc | 0.94635, 0.9454 Loss: 0.3820391460025334 train acc, test acc | 0.9479166666666666, 0.9472 Loss: 0.21254089841550133 train acc, test acc | 0.94985, 0.9484 Loss: 0.20127980683651722 train acc, test acc | 0.9510833333333333, 0.9491 Loss: 0.20224285123904423 train acc, test acc | 0.9527666666666667, 0.9513 Loss: 0.11455347204418721 train acc, test acc | 0.9533333333333334, 0.9515 Loss: 0.2506718850005177 train acc, test acc | 0.9543666666666667, 0.9522 Loss: 0.09938499419366245 train acc, test acc | 0.9557333333333333, 0.9527 Loss: 0.2291181164032756 train acc, test acc | 0.9563833333333334, 0.9542 Loss: 0.1186453469856623 train acc, test acc | 0.9574333333333334, 0.9543 Loss: 0.08262024155306627 train acc, test acc | 0.9583166666666667, 0.9552 Loss: 0.050135343241131294 train acc, test acc | 0.9593666666666667, 0.9565 Loss: 0.15038236640601393 train acc, test acc | 0.9603833333333334, 0.9555 Loss: 0.10450474835863643 train acc, test acc | 0.9605333333333334, 0.9569 Loss: 0.23561056352612103 train acc, test acc | 0.9620333333333333, 0.9572 Loss: 0.1454099485809948 train acc, test acc | 0.9628166666666667, 0.958 Loss: 0.11354032687547083 train acc, test acc | 0.9635166666666667, 0.9585 Loss: 0.13348770735995633 train acc, test acc | 0.9635333333333334, 0.9597 Loss: 0.14717382217684388 train acc, test acc | 0.9649666666666666, 0.9595 Loss: 0.13219257303756288 train acc, test acc | 0.9657, 0.9604 Loss: 0.08197946109647873 train acc, test acc | 0.9657166666666667, 0.9602 Loss: 0.10502514912254501 train acc, test acc | 0.9669, 0.9615 Loss: 0.1059423253179013 train acc, test acc | 0.9671666666666666, 0.9614 Loss: 0.14830174946694832 train acc, test acc | 0.9677166666666667, 0.9619 Loss: 0.11368245523271087 train acc, test acc | 0.9685666666666667, 0.9621 Loss: 0.09971397181658903 train acc, test acc | 0.9688333333333333, 0.9625 Loss: 0.16311992098339193 train acc, test acc | 0.9694833333333334, 0.9625 Loss: 0.18370487114841172 train acc, test acc | 0.9703333333333334, 0.9626 Loss: 0.15534032325693167 train acc, test acc | 0.9705833333333334, 0.9627 Loss: 0.08411783763909025 train acc, test acc | 0.9711, 0.9634 Loss: 0.047037161407086865 train acc, test acc | 0.9715666666666667, 0.964 Loss: 0.08175359746437703 train acc, test acc | 0.9723666666666667, 0.9643 Loss: 0.07758104813823546 train acc, test acc | 0.9724333333333334, 0.965 Loss: 0.1394998034809735 train acc, test acc | 0.9729666666666666, 0.9649 Loss: 0.16221345512068872 train acc, test acc | 0.9736333333333334, 0.966 Loss: 0.05399982368650055 train acc, test acc | 0.9740833333333333, 0.9657 Loss: 0.08400302092533224 train acc, test acc | 0.97425, 0.9659 Loss: 0.04184568397510707 train acc, test acc | 0.9746333333333334, 0.9656 Loss: 0.0796015444449166 train acc, test acc | 0.9748333333333333, 0.9662 Loss: 0.10548349956116032 train acc, test acc | 0.97495, 0.9665 Loss: 0.07006835670379002 train acc, test acc | 0.9757, 0.9664 Loss: 0.11437173450658517 train acc, test acc | 0.9759166666666667, 0.9673 Loss: 0.17147499673912153 train acc, test acc | 0.9761166666666666, 0.9667 Loss: 0.06547704750865913 train acc, test acc | 0.9763166666666667, 0.968 Loss: 0.08277940817953208 train acc, test acc | 0.9772666666666666, 0.9678 Loss: 0.04820306652788286 train acc, test acc | 0.9771833333333333, 0.9676 Loss: 0.06742543629329492 train acc, test acc | 0.9774166666666667, 0.9673 Loss: 0.07021435466668258 train acc, test acc | 0.9775666666666667, 0.9673 Loss: 0.036857597369214255 train acc, test acc | 0.9779833333333333, 0.9678 Loss: 0.042083505747489466 train acc, test acc | 0.97835, 0.968 Loss: 0.17659515882914342 train acc, test acc | 0.9785833333333334, 0.969 Loss: 0.025761650547214684 train acc, test acc | 0.9785833333333334, 0.9685 Loss: 0.08848218007645862 train acc, test acc | 0.9787333333333333, 0.969 Loss: 0.04797004467357079 train acc, test acc | 0.9793, 0.9691 Loss: 0.050332983492817035 train acc, test acc | 0.9794166666666667, 0.9691 Loss: 0.051343759189660935 train acc, test acc | 0.9795833333333334, 0.9692 Loss: 0.1638523335830362 train acc, test acc | 0.97975, 0.9685 Loss: 0.08039259939222396 train acc, test acc | 0.9799333333333333, 0.9689 Loss: 0.04164436955883234 train acc, test acc | 0.9802, 0.9702 Loss: 0.04913124686784486 train acc, test acc | 0.9805666666666667, 0.9697 Loss: 0.06965157753524727 train acc, test acc | 0.98045, 0.9698 Loss: 0.044795401300464145 train acc, test acc | 0.9806333333333334, 0.9698 Loss: 0.043548043972071834 train acc, test acc | 0.9811333333333333, 0.9701 Loss: 0.05851365987287055 train acc, test acc | 0.9813166666666666, 0.9706 Loss: 0.061185956707979064 train acc, test acc | 0.98125, 0.9708 Loss: 0.04734962423110804 train acc, test acc | 0.9814833333333334, 0.9703 Loss: 0.06922657485717254 train acc, test acc | 0.9817333333333333, 0.9698 Loss: 0.029274401539607715 train acc, test acc | 0.9819, 0.9707
In [31]:
from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 12, 6
# Accuracy 그래프 그리기
markers = {'train': 'o', 'test': 's'}
x = np.arange(len(train_acc_list))
plt.plot(x, train_acc_list, label='train acc')
plt.plot(x, test_acc_list, label='test acc', linestyle='--')
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.ylim(0, 1.0)
plt.legend(loc='lower right')
plt.show()
In [32]:
# Loss 그래프 그리기
x = np.arange(len(train_loss_list))
plt.plot(x, train_loss_list, label='train acc')
plt.xlabel("epochs")
plt.ylabel("Loss")
plt.ylim(0, 3.0)
plt.legend(loc='best')
plt.show()
'파이썬 & AI 학습' 카테고리의 다른 글
TensorFlow v2 다뤄보기 (0) | 2021.02.26 |
---|---|
파이썬으로 DB 다루기 (0) | 2021.02.24 |
이상치 탐색 (0) | 2021.02.19 |
데이터 가져오기 (0) | 2021.02.15 |
선형회귀와 로지스틱회귀 (0) | 2021.02.06 |