A.I
Explolation14 의료영상 진단 본문
폐렴 구별¶
1. 데이터 준비¶
In [1]:
import re # 정규표현식 관련된 작업에 필요한 패키지
import os # I/O 관련된 작업에 필요한 패키지
import pandas as pd # 데이터 전처리 관련된 작업에 필요한 패키지
import numpy as np # 데이터 array 작업에 필요한 패키지
import tensorflow as tf # 딥러닝 관련된 작업에 필요한 패키지
import matplotlib.pyplot as plt # 데이터 시각화에 관련된 작업에 필요한 패키지
from sklearn.model_selection import train_test_split # 데이터 전처리에 필요한 패키지
In [2]:
# 데이터 로드할 때 빠르게 로드할 수 있도록하는 설정 변수
AUTOTUNE = tf.data.experimental.AUTOTUNE
# 데이터 ROOT 경로 변수
ROOT_PATH = os.path.join(os.getenv('HOME'), 'aiffel')
# BATCH_SIZE 변수
BATCH_SIZE = 16
# X-RAY 이미지 사이즈 변수
IMAGE_SIZE = [180, 180]
# EPOCH 크기 변수
EPOCHS = 25
print(ROOT_PATH)
/home/ssac24/aiffel
In [3]:
train_filenames = tf.io.gfile.glob(str(ROOT_PATH + '/chest_xray/train/*/*'))
test_filenames = tf.io.gfile.glob(str(ROOT_PATH + '/chest_xray/test/*/*'))
val_filenames = tf.io.gfile.glob(str(ROOT_PATH + '/chest_xray/val/*/*'))
print(len(train_filenames))
print(len(test_filenames))
print(len(val_filenames))
5216 624 16
In [4]:
# val의 개수가 너무 적어 train과 합친후 8대2로 나눔
filenames = tf.io.gfile.glob(str(ROOT_PATH + '/chest_xray/train/*/*'))
filenames.extend(tf.io.gfile.glob(str(ROOT_PATH + '/chest_xray/val/*/*')))
# train, test(val) dataset으로 분할. test_size에 0.2는 20%롤 의미함.
train_filenames, val_filenames = train_test_split(filenames, test_size=0.2)
print(len(train_filenames))
print(len(val_filenames))
4185 1047
In [5]:
# 정상적인 이미지와 폐렴 이미지의 개수 확인
COUNT_NORMAL = len([filename for filename in train_filenames if "NORMAL" in filename])
print("Normal images count in training set: " + str(COUNT_NORMAL))
COUNT_PNEUMONIA = len([filename for filename in train_filenames if "PNEUMONIA" in filename])
print("Pneumonia images count in training set: " + str(COUNT_PNEUMONIA))
Normal images count in training set: 1071 Pneumonia images count in training set: 3114
In [6]:
# 배치처리 작업을 효율적으로 하기위한 slices Dataset만들기 작업
train_list_ds = tf.data.Dataset.from_tensor_slices(train_filenames)
val_list_ds = tf.data.Dataset.from_tensor_slices(val_filenames)
In [7]:
TRAIN_IMG_COUNT = tf.data.experimental.cardinality(train_list_ds).numpy()
print("Training images count: " + str(TRAIN_IMG_COUNT))
VAL_IMG_COUNT = tf.data.experimental.cardinality(val_list_ds).numpy()
print("Validating images count: " + str(VAL_IMG_COUNT))
Training images count: 4185 Validating images count: 1047
In [8]:
CLASS_NAMES = np.array([str(tf.strings.split(item, os.path.sep)[-1].numpy())[2:-1]
for item in tf.io.gfile.glob(str(ROOT_PATH + "/chest_xray/train/*"))])
print(CLASS_NAMES)
['NORMAL' 'PNEUMONIA']
In [9]:
# 데이터에 라벨링하는 함수
def get_label(file_path):
parts = tf.strings.split(file_path, os.path.sep)
return parts[-2] == "PNEUMONIA" # 폐렴이면 양성(True), 노말이면 음성(False)를 리턴하게 합니다.
In [10]:
# 이미지 사이즈 줄이는 함수
def decode_img(img):
# 이미지를 uint8 tensor로 바꾼다.
img = tf.image.decode_jpeg(img, channels=3)
# img를 범위 [0,1]의 float32 데이터 타입으로 바꾼다.
img = tf.image.convert_image_dtype(img, tf.float32)
# img의 이미지 사이즈를 IMAGE_SIZE에서 지정한 사이즈로 수정한다.
return tf.image.resize(img, IMAGE_SIZE)
def process_path(file_path):
label = get_label(file_path)
img = tf.io.read_file(file_path)
img = decode_img(img)
return img, label
In [11]:
# num_parallel_calls를 써주면 더 빠른 전처리가 가능
train_ds = train_list_ds.map(process_path, num_parallel_calls=AUTOTUNE)
val_ds = val_list_ds.map(process_path, num_parallel_calls=AUTOTUNE)
In [12]:
for image, label in train_ds.take(1):
print("Image shape: ", image.numpy().shape)
print("Label: ", label.numpy())
Image shape: (180, 180, 3) Label: True
In [13]:
test_list_ds = tf.data.Dataset.list_files(str(ROOT_PATH + '/chest_xray/test/*/*'))
TEST_IMAGE_COUNT = tf.data.experimental.cardinality(test_list_ds).numpy()
test_ds = test_list_ds.map(process_path, num_parallel_calls=AUTOTUNE)
test_ds = test_ds.batch(BATCH_SIZE)
print(TEST_IMAGE_COUNT)
624
In [14]:
def prepare_for_training(ds, shuffle_buffer_size=1000):
# 고정 크기 버퍼를 유지하고 해당 버퍼에서 무작위로 균일하게 다음 요소를 선택
ds = ds.shuffle(buffer_size=shuffle_buffer_size)
# 여러번 데이터셋을 사용할 수 있게 해줌
ds = ds.repeat()
ds = ds.batch(BATCH_SIZE)
# 리소스의 유휴 상태를 줄일 수 있음
ds = ds.prefetch(buffer_size=AUTOTUNE)
return ds
train_ds = prepare_for_training(train_ds)
val_ds = prepare_for_training(val_ds)
2. 데이터 시각화¶
In [15]:
image_batch, label_batch = next(iter(train_ds))
def show_batch(image_batch, label_batch):
plt.figure(figsize=(10,10))
for n in range(16):
ax = plt.subplot(5,5,n+1)
plt.imshow(image_batch[n])
if label_batch[n]:
plt.title("PNEUMONIA")
else:
plt.title("NORMAL")
plt.axis("off")
show_batch(image_batch.numpy(), label_batch.numpy())
3. CNN 모델링¶
In [16]:
def conv_block(filters):
block = tf.keras.Sequential([
tf.keras.layers.SeparableConv2D(filters, 3, activation='relu', padding='same'),
tf.keras.layers.SeparableConv2D(filters, 3, activation='relu', padding='same'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.MaxPool2D()
]
)
return block
In [17]:
def dense_block(units, dropout_rate):
block = tf.keras.Sequential([
tf.keras.layers.Dense(units, activation='relu'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.Dropout(dropout_rate)
])
return block
BatchNomalization과 DropOut을 보통 같이 쓰지는 않는다¶
In [18]:
def build_model():
model = tf.keras.Sequential([
tf.keras.Input(shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3)),
tf.keras.layers.Conv2D(16, 3, activation='relu', padding='same'),
tf.keras.layers.Conv2D(16, 3, activation='relu', padding='same'),
tf.keras.layers.MaxPool2D(),
conv_block(32),
conv_block(64),
conv_block(128),
tf.keras.layers.Dropout(0.2),
conv_block(256),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Flatten(),
dense_block(512, 0.7),
dense_block(128, 0.5),
dense_block(64, 0.3),
tf.keras.layers.Dense(1, activation='sigmoid')
])
return model
4. 데이터 imbalance 처리¶
- 클래스 불균형 처리 방법들
- Weight balancing 은 training set 의 각 데이터에서 loss 를 계산할 때 특정 클래스의 데이터에 더 큰 loss 값을 갖도록 가중치를 부여하는 방법
In [19]:
weight_for_0 = (1 / COUNT_NORMAL)*(TRAIN_IMG_COUNT)/2.0
weight_for_1 = (1 / COUNT_PNEUMONIA)*(TRAIN_IMG_COUNT)/2.0
class_weight = {0: weight_for_0, 1: weight_for_1}
print('Weight for class 0: {:.2f}'.format(weight_for_0))
print('Weight for class 1: {:.2f}'.format(weight_for_1))
Weight for class 0: 1.95 Weight for class 1: 0.67
5. 모델 훈련¶
In [20]:
with tf.device('/GPU:0'):
model = build_model()
METRICS = [
'accuracy',
tf.keras.metrics.Precision(name='precision'),
tf.keras.metrics.Recall(name='recall')
]
model.compile(
optimizer='adam',
loss='binary_crossentropy', # 이미지가 2가지 종류이기때문에
metrics=METRICS
)
In [21]:
with tf.device('/GPU:0'):
history = model.fit(
train_ds,
steps_per_epoch=TRAIN_IMG_COUNT // BATCH_SIZE,
epochs=EPOCHS,
validation_data=val_ds,
validation_steps=VAL_IMG_COUNT // BATCH_SIZE,
class_weight=class_weight,
)
Epoch 1/25 261/261 [==============================] - 26s 99ms/step - loss: 0.4023 - accuracy: 0.8070 - precision: 0.9575 - recall: 0.7752 - val_loss: 0.6681 - val_accuracy: 0.7365 - val_precision: 0.7365 - val_recall: 1.0000 Epoch 2/25 261/261 [==============================] - 22s 86ms/step - loss: 0.2234 - accuracy: 0.9083 - precision: 0.9745 - recall: 0.9001 - val_loss: 1.3031 - val_accuracy: 0.7346 - val_precision: 0.7346 - val_recall: 1.0000 Epoch 3/25 261/261 [==============================] - 22s 85ms/step - loss: 0.1814 - accuracy: 0.9308 - precision: 0.9786 - recall: 0.9273 - val_loss: 0.8993 - val_accuracy: 0.7337 - val_precision: 0.7337 - val_recall: 1.0000 Epoch 4/25 261/261 [==============================] - 22s 83ms/step - loss: 0.1846 - accuracy: 0.9301 - precision: 0.9796 - recall: 0.9253 - val_loss: 0.9481 - val_accuracy: 0.6990 - val_precision: 1.0000 - val_recall: 0.5903 Epoch 5/25 261/261 [==============================] - 22s 84ms/step - loss: 0.1448 - accuracy: 0.9488 - precision: 0.9856 - recall: 0.9449 - val_loss: 0.1570 - val_accuracy: 0.9490 - val_precision: 0.9972 - val_recall: 0.9331 Epoch 6/25 261/261 [==============================] - 23s 87ms/step - loss: 0.1579 - accuracy: 0.9406 - precision: 0.9818 - recall: 0.9376 - val_loss: 0.7112 - val_accuracy: 0.7837 - val_precision: 1.0000 - val_recall: 0.7059 Epoch 7/25 261/261 [==============================] - 22s 84ms/step - loss: 0.1417 - accuracy: 0.9483 - precision: 0.9872 - recall: 0.9427 - val_loss: 0.1271 - val_accuracy: 0.9558 - val_precision: 0.9986 - val_recall: 0.9412 Epoch 8/25 261/261 [==============================] - 22s 84ms/step - loss: 0.1183 - accuracy: 0.9586 - precision: 0.9906 - recall: 0.9533 - val_loss: 0.8911 - val_accuracy: 0.7356 - val_precision: 0.7353 - val_recall: 1.0000 Epoch 9/25 261/261 [==============================] - 22s 84ms/step - loss: 0.1364 - accuracy: 0.9521 - precision: 0.9860 - recall: 0.9491 - val_loss: 0.1197 - val_accuracy: 0.9500 - val_precision: 0.9972 - val_recall: 0.9344 Epoch 10/25 261/261 [==============================] - 22s 84ms/step - loss: 0.1198 - accuracy: 0.9509 - precision: 0.9869 - recall: 0.9466 - val_loss: 0.5367 - val_accuracy: 0.8048 - val_precision: 1.0000 - val_recall: 0.7339 Epoch 11/25 261/261 [==============================] - 22s 83ms/step - loss: 0.1251 - accuracy: 0.9523 - precision: 0.9873 - recall: 0.9481 - val_loss: 0.1013 - val_accuracy: 0.9635 - val_precision: 0.9608 - val_recall: 0.9909 Epoch 12/25 261/261 [==============================] - 22s 84ms/step - loss: 0.1111 - accuracy: 0.9557 - precision: 0.9873 - recall: 0.9527 - val_loss: 0.3150 - val_accuracy: 0.8962 - val_precision: 1.0000 - val_recall: 0.8586 Epoch 13/25 261/261 [==============================] - 22s 85ms/step - loss: 0.1060 - accuracy: 0.9591 - precision: 0.9893 - recall: 0.9553 - val_loss: 0.1183 - val_accuracy: 0.9615 - val_precision: 0.9959 - val_recall: 0.9514 Epoch 14/25 261/261 [==============================] - 22s 83ms/step - loss: 0.0947 - accuracy: 0.9653 - precision: 0.9894 - recall: 0.9636 - val_loss: 0.1167 - val_accuracy: 0.9577 - val_precision: 0.9972 - val_recall: 0.9450 Epoch 15/25 261/261 [==============================] - 22s 83ms/step - loss: 0.1214 - accuracy: 0.9552 - precision: 0.9870 - recall: 0.9525 - val_loss: 0.3185 - val_accuracy: 0.8731 - val_precision: 1.0000 - val_recall: 0.8268 Epoch 16/25 261/261 [==============================] - 22s 85ms/step - loss: 0.1064 - accuracy: 0.9610 - precision: 0.9907 - recall: 0.9565 - val_loss: 0.1275 - val_accuracy: 0.9529 - val_precision: 0.9986 - val_recall: 0.9372 Epoch 17/25 261/261 [==============================] - 22s 84ms/step - loss: 0.1016 - accuracy: 0.9658 - precision: 0.9917 - recall: 0.9621 - val_loss: 0.1827 - val_accuracy: 0.9154 - val_precision: 0.9956 - val_recall: 0.8889 Epoch 18/25 261/261 [==============================] - 22s 83ms/step - loss: 0.0939 - accuracy: 0.9660 - precision: 0.9927 - recall: 0.9612 - val_loss: 0.3689 - val_accuracy: 0.8683 - val_precision: 0.9860 - val_recall: 0.8322 Epoch 19/25 261/261 [==============================] - 22s 84ms/step - loss: 0.0810 - accuracy: 0.9660 - precision: 0.9933 - recall: 0.9607 - val_loss: 0.0795 - val_accuracy: 0.9692 - val_precision: 0.9959 - val_recall: 0.9620 Epoch 20/25 261/261 [==============================] - 22s 84ms/step - loss: 0.0829 - accuracy: 0.9703 - precision: 0.9940 - recall: 0.9658 - val_loss: 0.2210 - val_accuracy: 0.9260 - val_precision: 1.0000 - val_recall: 0.8996 Epoch 21/25 261/261 [==============================] - 22s 84ms/step - loss: 0.0813 - accuracy: 0.9691 - precision: 0.9931 - recall: 0.9655 - val_loss: 0.0903 - val_accuracy: 0.9654 - val_precision: 0.9906 - val_recall: 0.9621 Epoch 22/25 261/261 [==============================] - 21s 82ms/step - loss: 0.0661 - accuracy: 0.9782 - precision: 0.9938 - recall: 0.9768 - val_loss: 0.0861 - val_accuracy: 0.9702 - val_precision: 0.9906 - val_recall: 0.9686 Epoch 23/25 261/261 [==============================] - 22s 85ms/step - loss: 0.0860 - accuracy: 0.9713 - precision: 0.9950 - recall: 0.9661 - val_loss: 0.0838 - val_accuracy: 0.9692 - val_precision: 0.9920 - val_recall: 0.9661 Epoch 24/25 261/261 [==============================] - 22s 85ms/step - loss: 0.0722 - accuracy: 0.9756 - precision: 0.9954 - recall: 0.9717 - val_loss: 0.0863 - val_accuracy: 0.9740 - val_precision: 0.9731 - val_recall: 0.9922 Epoch 25/25 261/261 [==============================] - 22s 83ms/step - loss: 0.0597 - accuracy: 0.9787 - precision: 0.9957 - recall: 0.9755 - val_loss: 0.1233 - val_accuracy: 0.9548 - val_precision: 0.9972 - val_recall: 0.9409
In [22]:
fig, ax = plt.subplots(1, 4, figsize=(20, 3))
ax = ax.ravel()
for i, met in enumerate(['precision', 'recall', 'accuracy', 'loss']):
ax[i].plot(history.history[met])
ax[i].plot(history.history['val_' + met])
ax[i].set_title('Model {}'.format(met))
ax[i].set_xlabel('epochs')
ax[i].set_ylabel(met)
ax[i].legend(['train', 'val'])
In [23]:
loss, acc, prec, rec = model.evaluate(test_ds)
39/39 [==============================] - 2s 60ms/step - loss: 0.9148 - accuracy: 0.8157 - precision: 0.7847 - recall: 0.9718
프로젝트 : 폐렴 진단기 성능 개선¶
In [196]:
import re # 정규표현식 관련된 작업에 필요한 패키지
import os # I/O 관련된 작업에 필요한 패키지
import pandas as pd # 데이터 전처리 관련된 작업에 필요한 패키지
import numpy as np # 데이터 array 작업에 필요한 패키지
import tensorflow as tf # 딥러닝 관련된 작업에 필요한 패키지
import matplotlib.pyplot as plt # 데이터 시각화에 관련된 작업에 필요한 패키지
from sklearn.model_selection import train_test_split # 데이터 전처리에 필요한 패키지
In [197]:
# 데이터 로드할 때 빠르게 로드할 수 있도록하는 설정 변수
AUTOTUNE = tf.data.experimental.AUTOTUNE
# 데이터 ROOT 경로 변수
ROOT_PATH = os.path.join(os.getenv('HOME'), 'aiffel')
# BATCH_SIZE 변수
BATCH_SIZE = 16
# X-RAY 이미지 사이즈 변수
IMAGE_SIZE = [180, 180]
# EPOCH 크기 변수
EPOCHS = 20
print(ROOT_PATH)
/home/ssac24/aiffel
In [198]:
train_filenames = tf.io.gfile.glob(str(ROOT_PATH + '/chest_xray/train/*/*'))
test_filenames = tf.io.gfile.glob(str(ROOT_PATH + '/chest_xray/test/*/*'))
val_filenames = tf.io.gfile.glob(str(ROOT_PATH + '/chest_xray/val/*/*'))
print(len(train_filenames))
print(len(test_filenames))
print(len(val_filenames))
5216 624 16
In [199]:
# val의 개수가 너무 적어 train과 합친후 8대2로 나눔
filenames = tf.io.gfile.glob(str(ROOT_PATH + '/chest_xray/train/*/*'))
filenames.extend(tf.io.gfile.glob(str(ROOT_PATH + '/chest_xray/val/*/*')))
# train, test(val) dataset으로 분할. test_size에 0.2는 20%롤 의미함.
train_filenames, val_filenames = train_test_split(filenames, test_size=0.2)
print(len(train_filenames))
print(len(val_filenames))
4185 1047
In [200]:
# 정상적인 이미지와 폐렴 이미지의 개수 확인
COUNT_NORMAL = len([filename for filename in train_filenames if "NORMAL" in filename])
print("Normal images count in training set: " + str(COUNT_NORMAL))
COUNT_PNEUMONIA = len([filename for filename in train_filenames if "PNEUMONIA" in filename])
print("Pneumonia images count in training set: " + str(COUNT_PNEUMONIA))
Normal images count in training set: 1086 Pneumonia images count in training set: 3099
In [201]:
# 배치처리 작업을 효율적으로 하기위한 slices Dataset만들기 작업
train_list_ds = tf.data.Dataset.from_tensor_slices(train_filenames)
val_list_ds = tf.data.Dataset.from_tensor_slices(val_filenames)
In [202]:
TRAIN_IMG_COUNT = tf.data.experimental.cardinality(train_list_ds).numpy()
print("Training images count: " + str(TRAIN_IMG_COUNT))
VAL_IMG_COUNT = tf.data.experimental.cardinality(val_list_ds).numpy()
print("Validating images count: " + str(VAL_IMG_COUNT))
Training images count: 4185 Validating images count: 1047
In [203]:
CLASS_NAMES = np.array([str(tf.strings.split(item, os.path.sep)[-1].numpy())[2:-1]
for item in tf.io.gfile.glob(str(ROOT_PATH + "/chest_xray/train/*"))])
print(CLASS_NAMES)
['NORMAL' 'PNEUMONIA']
In [204]:
# 이미지 사이즈 줄이는 함수
def decode_img(img):
# 이미지를 uint8 tensor로 바꾼다.
img = tf.image.decode_jpeg(img, channels=3)
# img를 범위 [0,1]의 float32 데이터 타입으로 바꾼다.
img = tf.image.convert_image_dtype(img, tf.float32)
# img의 이미지 사이즈를 IMAGE_SIZE에서 지정한 사이즈로 수정한다.
return tf.image.resize(img, IMAGE_SIZE)
def process_path(file_path):
label = get_label(file_path)
img = tf.io.read_file(file_path)
img = decode_img(img)
return img, label
In [205]:
# num_parallel_calls를 써주면 더 빠른 전처리가 가능
train_ds = train_list_ds.map(process_path, num_parallel_calls=AUTOTUNE)
val_ds = val_list_ds.map(process_path, num_parallel_calls=AUTOTUNE)
In [206]:
for image, label in train_ds.take(1):
print("Image shape: ", image.numpy().shape)
print("Label: ", label.numpy())
Image shape: (180, 180, 3) Label: True
In [207]:
test_list_ds = tf.data.Dataset.list_files(str(ROOT_PATH + '/chest_xray/test/*/*'))
TEST_IMAGE_COUNT = tf.data.experimental.cardinality(test_list_ds).numpy()
test_ds = test_list_ds.map(process_path, num_parallel_calls=AUTOTUNE)
test_ds = test_ds.batch(BATCH_SIZE)
print(TEST_IMAGE_COUNT)
624
In [208]:
def augment(image,label):
image = tf.image.random_flip_left_right(image) # 랜덤하게 좌우를 반전합니다.
return image,label
def prepare_for_training(ds, shuffle_buffer_size=1000):
# augment 적용 부분이 배치처리 함수에 추가되었습니다.
ds = ds.map(
augment, # augment 함수 적용
num_parallel_calls=2
)
ds = ds.shuffle(buffer_size=shuffle_buffer_size)
ds = ds.repeat()
ds = ds.batch(BATCH_SIZE)
ds = ds.prefetch(buffer_size=AUTOTUNE)
return ds
train_ds = prepare_for_training(train_ds)
val_ds = prepare_for_training(val_ds)
In [209]:
# 좌우 반전이 잘 되었음을 확인할 수 있다.
image_batch, label_batch = next(iter(train_ds))
def show_batch(image_batch, label_batch):
plt.figure(figsize=(10,10))
for n in range(16):
ax = plt.subplot(5,5,n+1)
plt.imshow(image_batch[n])
if label_batch[n]:
plt.title("PNEUMONIA")
else:
plt.title("NORMAL")
plt.axis("off")
show_batch(image_batch.numpy(), label_batch.numpy())
In [210]:
def conv_block(filters):
block = tf.keras.Sequential([
tf.keras.layers.SeparableConv2D(filters, 3, activation='relu', padding='same'),
tf.keras.layers.SeparableConv2D(filters, 3, activation='relu', padding='same'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.MaxPool2D()
]
)
return block
In [211]:
def dense_block(units, dropout_rate):
block = tf.keras.Sequential([
tf.keras.layers.Dense(units, activation='relu'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.Dropout(dropout_rate)
])
return block
In [212]:
def build_model():
model = tf.keras.Sequential([
tf.keras.Input(shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3)),
tf.keras.layers.Conv2D(16, 3, activation='relu', padding='same'),
tf.keras.layers.Conv2D(16, 3, activation='relu', padding='same'),
tf.keras.layers.MaxPool2D(),
conv_block(32),
conv_block(64),
conv_block(128),
tf.keras.layers.Dropout(0.2),
conv_block(256),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Flatten(),
dense_block(512, 0.7),
dense_block(128, 0.5),
dense_block(64, 0.3),
tf.keras.layers.Dense(1, activation='sigmoid')
])
return model
In [213]:
weight_for_0 = (1 / COUNT_NORMAL)*(TRAIN_IMG_COUNT)/2.0
weight_for_1 = (1 / COUNT_PNEUMONIA)*(TRAIN_IMG_COUNT)/2.0
class_weight = {0: weight_for_0, 1: weight_for_1}
print('Weight for class 0: {:.2f}'.format(weight_for_0))
print('Weight for class 1: {:.2f}'.format(weight_for_1))
Weight for class 0: 1.93 Weight for class 1: 0.68
In [214]:
with tf.device('/GPU:0'):
model = build_model()
METRICS = [
'accuracy',
tf.keras.metrics.Precision(name='precision'),
tf.keras.metrics.Recall(name='recall')
]
model.compile(
optimizer='adam',
loss='binary_crossentropy', # 이미지가 2가지 종류이기때문에
metrics=METRICS
)
In [215]:
with tf.device('/GPU:0'):
history = model.fit(
train_ds,
steps_per_epoch=TRAIN_IMG_COUNT // BATCH_SIZE,
epochs=EPOCHS,
validation_data=val_ds,
validation_steps=VAL_IMG_COUNT // BATCH_SIZE,
class_weight=class_weight,
)
Epoch 1/20 261/261 [==============================] - 23s 89ms/step - loss: 0.4058 - accuracy: 0.8027 - precision: 0.9479 - recall: 0.7762 - val_loss: 0.6233 - val_accuracy: 0.7481 - val_precision: 0.7481 - val_recall: 1.0000 Epoch 2/20 261/261 [==============================] - 23s 87ms/step - loss: 0.2234 - accuracy: 0.9140 - precision: 0.9777 - recall: 0.9047 - val_loss: 1.2875 - val_accuracy: 0.7500 - val_precision: 0.7500 - val_recall: 1.0000 Epoch 3/20 261/261 [==============================] - 22s 86ms/step - loss: 0.1941 - accuracy: 0.9243 - precision: 0.9770 - recall: 0.9194 - val_loss: 0.2350 - val_accuracy: 0.9077 - val_precision: 0.8909 - val_recall: 0.9987 Epoch 4/20 261/261 [==============================] - 22s 85ms/step - loss: 0.2030 - accuracy: 0.9179 - precision: 0.9728 - recall: 0.9146 - val_loss: 0.1018 - val_accuracy: 0.9644 - val_precision: 0.9673 - val_recall: 0.9859 Epoch 5/20 261/261 [==============================] - 22s 85ms/step - loss: 0.1626 - accuracy: 0.9346 - precision: 0.9809 - recall: 0.9298 - val_loss: 0.1427 - val_accuracy: 0.9490 - val_precision: 0.9853 - val_recall: 0.9460 Epoch 6/20 261/261 [==============================] - 22s 84ms/step - loss: 0.1714 - accuracy: 0.9358 - precision: 0.9816 - recall: 0.9307 - val_loss: 0.0826 - val_accuracy: 0.9683 - val_precision: 0.9819 - val_recall: 0.9756 Epoch 7/20 261/261 [==============================] - 22s 83ms/step - loss: 0.1609 - accuracy: 0.9377 - precision: 0.9826 - recall: 0.9324 - val_loss: 0.2518 - val_accuracy: 0.8846 - val_precision: 0.8673 - val_recall: 0.9987 Epoch 8/20 261/261 [==============================] - 22s 83ms/step - loss: 0.1476 - accuracy: 0.9406 - precision: 0.9817 - recall: 0.9372 - val_loss: 0.1620 - val_accuracy: 0.9394 - val_precision: 0.9851 - val_recall: 0.9333 Epoch 9/20 261/261 [==============================] - 22s 85ms/step - loss: 0.1479 - accuracy: 0.9466 - precision: 0.9858 - recall: 0.9415 - val_loss: 0.2177 - val_accuracy: 0.9135 - val_precision: 0.9943 - val_recall: 0.8895 Epoch 10/20 261/261 [==============================] - 22s 84ms/step - loss: 0.1451 - accuracy: 0.9406 - precision: 0.9860 - recall: 0.9331 - val_loss: 0.1867 - val_accuracy: 0.9250 - val_precision: 0.9972 - val_recall: 0.9026 Epoch 11/20 261/261 [==============================] - 22s 86ms/step - loss: 0.1480 - accuracy: 0.9485 - precision: 0.9845 - recall: 0.9454 - val_loss: 0.1627 - val_accuracy: 0.9298 - val_precision: 0.9221 - val_recall: 0.9897 Epoch 12/20 261/261 [==============================] - 22s 86ms/step - loss: 0.1354 - accuracy: 0.9490 - precision: 0.9872 - recall: 0.9435 - val_loss: 0.3158 - val_accuracy: 0.8692 - val_precision: 0.9984 - val_recall: 0.8265 Epoch 13/20 261/261 [==============================] - 22s 85ms/step - loss: 0.1234 - accuracy: 0.9519 - precision: 0.9855 - recall: 0.9488 - val_loss: 0.1130 - val_accuracy: 0.9596 - val_precision: 0.9706 - val_recall: 0.9756 Epoch 14/20 261/261 [==============================] - 22s 85ms/step - loss: 0.1456 - accuracy: 0.9418 - precision: 0.9867 - recall: 0.9341 - val_loss: 0.5342 - val_accuracy: 0.8163 - val_precision: 0.8033 - val_recall: 0.9987 Epoch 15/20 261/261 [==============================] - 22s 84ms/step - loss: 0.1116 - accuracy: 0.9593 - precision: 0.9893 - recall: 0.9554 - val_loss: 0.1548 - val_accuracy: 0.9404 - val_precision: 0.9972 - val_recall: 0.9230 Epoch 16/20 261/261 [==============================] - 22s 85ms/step - loss: 0.1311 - accuracy: 0.9497 - precision: 0.9855 - recall: 0.9459 - val_loss: 0.2019 - val_accuracy: 0.9288 - val_precision: 0.9232 - val_recall: 0.9872 Epoch 17/20 261/261 [==============================] - 22s 86ms/step - loss: 0.1167 - accuracy: 0.9511 - precision: 0.9892 - recall: 0.9444 - val_loss: 0.0738 - val_accuracy: 0.9750 - val_precision: 0.9821 - val_recall: 0.9846 Epoch 18/20 261/261 [==============================] - 22s 84ms/step - loss: 0.1269 - accuracy: 0.9519 - precision: 0.9869 - recall: 0.9477 - val_loss: 0.2064 - val_accuracy: 0.9163 - val_precision: 1.0000 - val_recall: 0.8885 Epoch 19/20 261/261 [==============================] - 22s 84ms/step - loss: 0.1138 - accuracy: 0.9571 - precision: 0.9896 - recall: 0.9521 - val_loss: 0.0983 - val_accuracy: 0.9625 - val_precision: 0.9973 - val_recall: 0.9526 Epoch 20/20 261/261 [==============================] - 22s 84ms/step - loss: 0.1037 - accuracy: 0.9579 - precision: 0.9896 - recall: 0.9530 - val_loss: 0.1657 - val_accuracy: 0.9317 - val_precision: 1.0000 - val_recall: 0.9087
In [216]:
fig, ax = plt.subplots(1, 4, figsize=(20, 3))
ax = ax.ravel()
for i, met in enumerate(['precision', 'recall', 'accuracy', 'loss']):
ax[i].plot(history.history[met])
ax[i].plot(history.history['val_' + met])
ax[i].set_title('Model {}'.format(met))
ax[i].set_xlabel('epochs')
ax[i].set_ylabel(met)
ax[i].legend(['train', 'val'])
In [217]:
loss, acc, prec, rec = model.evaluate(test_ds)
39/39 [==============================] - 2s 58ms/step - loss: 0.4141 - accuracy: 0.8782 - precision: 0.8685 - recall: 0.9487
정리¶
- Balancing을 하지 않은 채 불균형 데이터셋의 모델학습을 했더니 loss 값이 5.7이나 나왔다. 가중치를 주어 균형을 맞추는 작업이 필요함을 느꼈다.
- Dense에서 softmax함수를 써보았는데 recall값이 one-hot encoding이 되어서 1이 되어버렸다. 이진분류에서는 사용하지않는 것이 좋을 듯하다.
- 좌우반전 처리를 하니 loss가 낮아지게 되었다. augmentation은 적절하게 써주는 것이 도움이 될 수도 있다.
- DropOut 비율을 0.4로 늘리니 정확도가 86퍼센트까지 올랐다. loss값도 더 줄어들었다.
- BATCH Size를 2배로 늘렸더니 정확도에 큰 변화가 없었다.
- filter를 반으로 줄여보았는데 정확도가 79프로까지 떨어졌다. 많게도 늘려보았으나 초기값이 제일 적절한 필터였다.
바꿔볼만한 파라미터¶
- 전처리 과정에서의 Batch size, Epoch 등
- 데이터 준비과정에서의 augmentation
- CNN 모델링에서의 Convolution filter, 채널 개수, activation, 모델구조 등
- BatchNormalization과 Dropout의 유무
- 모델 훈련 과정에서의 optimizer나 learning rate 등의 변화
'AIFFEL' 카테고리의 다른 글
Explolation18 OCR로 글씨 인식 (0) | 2021.03.11 |
---|---|
Explolation15 챗봇 만들기 (0) | 2021.03.03 |
Explolation11 텍스트 요약 (1) | 2021.02.23 |
Explolation13 주식 가격 예측 (0) | 2021.02.23 |
Explolation12 생성자 모델링 (0) | 2021.02.23 |