A.I
Explolation12 생성자 모델링 본문
생성 모델링¶
1. Pix2Pix¶
- 간단한 이미지를 입력 시 실제 사진처럼 보이도록 바꿔줄 때 많이 사용되는 모델
- 원리
- 단순화된 이미지(Input Image)와 실제 이미지(Ground Truth)를 쌍을 이루는 데이터셋으로 학습을 진행
- 왼쪽의 Input Image를 입력받으면, 내부 연산을 통해 실제 사진같은 형상으로 변환된 Predicted Image를 출력
3. Neural Style Transfer¶
- 전체 이미지의 구성을 유지하고 싶은 Base Image와 입히고 싶은 스타일이 담긴 Style Image 두 장을 활용해 새로운 이미지를 만들어 내는 것
Fashion MNIST¶
- pip install imageio
pip install Pillow
mkdir -p ~/aiffel/dcgan_newimage/fashion/generated_samples
- mkdir -p ~/aiffel/dcgan_newimage/fashion/training_checkpoints
- mkdir -p ~/aiffel/dcgan_newimage/fashion/training_history
import os
import glob
import time
import PIL
import imageio
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
from IPython import display
import matplotlib.pyplot as plt
%matplotlib inline
print("tensorflow", tf.__version__)
tensorflow 2.2.0
fashion_mnist = tf.keras.datasets.fashion_mnist
(train_x, _), (test_x, _) = fashion_mnist.load_data()
print("max pixel:", train_x.max())
print("min pixel:", train_x.min())
max pixel: 255 min pixel: 0
train_x = (train_x - 127.5) / 127.5 # 이미지를 [-1, 1]로 정규화합니다.
print("max pixel:", train_x.max())
print("min pixel:", train_x.min())
max pixel: 1.0 min pixel: -1.0
train_x.shape
(60000, 28, 28)
train_x = train_x.reshape(train_x.shape[0], 28, 28, 1).astype('float32')
train_x.shape
(60000, 28, 28, 1)
plt.imshow(train_x[0].reshape(28, 28), cmap='gray')
plt.colorbar()
plt.show()
plt.figure(figsize=(10, 5))
for i in range(10):
plt.subplot(2, 5, i+1)
plt.imshow(train_x[i].reshape(28, 28), cmap='gray')
plt.title(f'index: {i}')
plt.axis('off')
plt.show()
plt.figure(figsize=(10, 12))
for i in range(25):
plt.subplot(5, 5, i+1)
random_index = np.random.randint(1, 60000)
plt.imshow(train_x[random_index].reshape(28, 28), cmap='gray')
plt.title(f'index: {random_index}')
plt.axis('off')
plt.show()
# 데이터가 잘 섞이려면 버퍼사이즈는 총 데이터사이즈와 같거나 큰것이 좋다
BUFFER_SIZE = 60000
BATCH_SIZE = 256
# 효율적 학습을 위해 배치사이즈를 잘라 한번에 학습할 양을 구분짓는다
train_dataset = tf.data.Dataset.from_tensor_slices(train_x).shuffle(BUFFER_SIZE).batch(BATCH_SIZE)
DCGAN(Deep Convolutional GAN)¶
생성자 모델¶
- (7, 7, 256) → (14, 14, 64) → (28, 28, 1) 형태로 이미지를 키워나감
def make_generator_model():
# Start
model = tf.keras.Sequential()
# First: Dense layer
model.add(layers.Dense(7*7*256, use_bias=False, input_shape=(100,)))
model.add(layers.BatchNormalization()) # 가중치 값을 정규화
model.add(layers.LeakyReLU())
# Second: Reshape layer
model.add(layers.Reshape((7, 7, 256)))
# Third: Conv2DTranspose layer 일반적인 Conv2D와 반대로 이미지 사이즈를 넓혀주는 층
model.add(layers.Conv2DTranspose(128, kernel_size=(5, 5), strides=(1, 1), padding='same', use_bias=False))
model.add(layers.BatchNormalization())
model.add(layers.LeakyReLU())
# Fourth: Conv2DTranspose layer
model.add(layers.Conv2DTranspose(64, kernel_size=(5, 5), strides=(2, 2), padding='same', use_bias=False))
model.add(layers.BatchNormalization())
model.add(layers.LeakyReLU())
# Fifth: Conv2DTranspose layer
# tanh를 쓴 이유 : -1 ~ 1 이내의 값으로 픽셀값을 정규화시켰던 데이터셋과 동일하게 하기 위함
model.add(layers.Conv2DTranspose(1, kernel_size=(5, 5), strides=(2, 2), padding='same', use_bias=False, \
activation='tanh'))
return model
generator = make_generator_model()
generator.summary()
Model: "sequential" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= dense (Dense) (None, 12544) 1254400 _________________________________________________________________ batch_normalization (BatchNo (None, 12544) 50176 _________________________________________________________________ leaky_re_lu (LeakyReLU) (None, 12544) 0 _________________________________________________________________ reshape (Reshape) (None, 7, 7, 256) 0 _________________________________________________________________ conv2d_transpose (Conv2DTran (None, 7, 7, 128) 819200 _________________________________________________________________ batch_normalization_1 (Batch (None, 7, 7, 128) 512 _________________________________________________________________ leaky_re_lu_1 (LeakyReLU) (None, 7, 7, 128) 0 _________________________________________________________________ conv2d_transpose_1 (Conv2DTr (None, 14, 14, 64) 204800 _________________________________________________________________ batch_normalization_2 (Batch (None, 14, 14, 64) 256 _________________________________________________________________ leaky_re_lu_2 (LeakyReLU) (None, 14, 14, 64) 0 _________________________________________________________________ conv2d_transpose_2 (Conv2DTr (None, 28, 28, 1) 1600 ================================================================= Total params: 2,330,944 Trainable params: 2,305,472 Non-trainable params: 25,472 _________________________________________________________________
# shape=(1, 100)의 형상을 가지는 랜덤 노이즈 벡터를 생성
noise = tf.random.normal([1, 100])
# 학습이 아니기때문에 False값을 주었다
generated_image = generator(noise, training=False)
generated_image.shape
TensorShape([1, 28, 28, 1])
# matplotlib 라이브러리는 2차원 이미지만 출력가능하므로 0번째와 3번째 축의 인덱스를 0으로 설정
plt.imshow(generated_image[0, :, :, 0], cmap='gray')
plt.colorbar()
plt.show()
판별자 모델¶
- 첫 번째 Conv2D 층에서 입력된 [28, 28, 1] 사이즈의 이미지를 (28, 28, 1) → (14, 14, 64) → (7, 7, 128)의 형태로 줄임
- Flatten 층을 사용해 3차원 이미지를 1차원으로 쭉 펴서 7x7x128=6272개의 (1, 6272) 형상의 벡터로 변환
def make_discriminator_model():
# Start
model = tf.keras.Sequential()
# First: Conv2D Layer
model.add(layers.Conv2D(64, (5, 5), strides=(2, 2), padding='same', input_shape=[28, 28, 1]))
model.add(layers.LeakyReLU())
model.add(layers.Dropout(0.3))
# Second: Conv2D Layer
model.add(layers.Conv2D(128, (5, 5), strides=(2, 2), padding='same'))
model.add(layers.LeakyReLU())
model.add(layers.Dropout(0.3))
# Third: Flatten Layer
model.add(layers.Flatten())
# Fourth: Dense Layer
model.add(layers.Dense(1))
return model
discriminator = make_discriminator_model()
discriminator.summary()
Model: "sequential_1" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= conv2d (Conv2D) (None, 14, 14, 64) 1664 _________________________________________________________________ leaky_re_lu_3 (LeakyReLU) (None, 14, 14, 64) 0 _________________________________________________________________ dropout (Dropout) (None, 14, 14, 64) 0 _________________________________________________________________ conv2d_1 (Conv2D) (None, 7, 7, 128) 204928 _________________________________________________________________ leaky_re_lu_4 (LeakyReLU) (None, 7, 7, 128) 0 _________________________________________________________________ dropout_1 (Dropout) (None, 7, 7, 128) 0 _________________________________________________________________ flatten (Flatten) (None, 6272) 0 _________________________________________________________________ dense_1 (Dense) (None, 1) 6273 ================================================================= Total params: 212,865 Trainable params: 212,865 Non-trainable params: 0 _________________________________________________________________
decision = discriminator(generated_image, training=False)
decision
<tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[-0.00138379]], dtype=float32)>
손실함수 : 교차 엔트로피¶
- 생성자 : 판별자가 Fake Image에 대해 판별한 값, 즉 D(fake_image) 값이 1에 가까워지는 것
- 판별자 : Real Image 판별값, 즉 D(real_image)는 1에, Fake Image 판별값, 즉 D(fake_image)는 0에 가까워지는 것
cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=True)
# 생성자 손실함수
# ones_like는 특정 벡터와 동일한 크기이면서 값은 1으로 가득 채워진 벡터를 만들고 싶을 때 사용
def generator_loss(fake_output):
return cross_entropy(tf.ones_like(fake_output), fake_output)
# 판별자 손실함수
# zeros_like는 특정 벡터와 동일한 크기이면서 값은 0으로 가득 채워진 벡터를 만들고 싶을 때 사용
def discriminator_loss(real_output, fake_output):
real_loss = cross_entropy(tf.ones_like(real_output), real_output)
fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output)
total_loss = real_loss + fake_loss
return total_loss
# (1) tf.math.greater_equal(real_output, tf.constant([0.5]) : real_output의 각 원소가 0.5 이상인지 True, False로 판별
# >> tf.Tensor([False, False, True, True])
# (2) tf.cast( (1), tf.float32) : (1)의 결과가 True이면 1.0, False이면 0.0으로 변환
# >> tf.Tensor([0.0, 0.0, 1.0, 1.0])
# (3) tf.reduce_mean( (2)) : (2)의 결과를 평균내어 이번 배치의 정확도(accuracy)를 계산
# >> 0.5
def discriminator_accuracy(real_output, fake_output):
real_accuracy = tf.reduce_mean(tf.cast(tf.math.greater_equal(real_output, tf.constant([0.5])), tf.float32))
fake_accuracy = tf.reduce_mean(tf.cast(tf.math.less(fake_output, tf.constant([0.5])), tf.float32))
return real_accuracy, fake_accuracy
최적화 함수(optimizer)¶
# 생성자와 판별자는 따로 학습을 진행하기때문에 따로 만들어주어야한다
generator_optimizer = tf.keras.optimizers.Adam(1e-4)
discriminator_optimizer = tf.keras.optimizers.Adam(1e-4)
noise_dim = 100
num_examples_to_generate = 16
seed = tf.random.normal([num_examples_to_generate, noise_dim])
seed.shape
TensorShape([16, 100])
훈련과정 설계¶
@tf.function # 텐서플로우 function을 사용함으로써 Tensorflow의 graph 노드가 될 수 있는 타입으로 자동변환
def train_step(images): #(1) 입력데이터
noise = tf.random.normal([BATCH_SIZE, noise_dim]) #(2) 생성자 입력 노이즈
with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape: #(3) tf.GradientTape() 오픈
generated_images = generator(noise, training=True) #(4) generated_images 생성
#(5) discriminator 판별
real_output = discriminator(images, training=True)
fake_output = discriminator(generated_images, training=True)
#(6) loss 계산
gen_loss = generator_loss(fake_output)
disc_loss = discriminator_loss(real_output, fake_output)
#(7) accuracy 계산
real_accuracy, fake_accuracy = discriminator_accuracy(real_output, fake_output)
#(8) gradient 계산
gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables)
gradients_of_discriminator = disc_tape.gradient(disc_loss, discriminator.trainable_variables)
#(9) 모델 학습
generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))
discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminator.trainable_variables))
return gen_loss, disc_loss, real_accuracy, fake_accuracy #(10) 리턴값
train_step함수에 대한 설명¶
(1) 입력데이터: Real Image 역할을 할 images 한 세트를 입력으로 받음
(2) 생성자 입력 노이즈 : generator가 FAKE IMAGE를 생성하기 위한 noise를 images 한 세트와 같은 크기인 BATCH_SIZE 만큼 생성함
(3) tf.GradientTape()는 가중치 갱신을 위한 Gradient를 자동 미분으로 계산하기 위해 with 구문 열기
(4) generated_images 생성 : generator가 noise를 입력받은 후 generated_images 생성
(5) discriminator 판별 : discriminator가 Real Image인 images와 Fake Image인 generated_images를 각각 입력받은 후 real_output, fake_output 출력
(6) loss 계산 : fake_output, real_output으로 generator와 discriminator 각각의 loss 계산
(7) accuracy 계산 : fake_output, real_output으로 discriminator가
(8) gradient 계산 : gen_tape와 disc_tape를 활용해 gradient를 자동으로 계산
(9) 모델 학습 : 계산된 gradient를 optimizer에 입력해 가중치 갱신
(10) 리턴값 : 이번 스텝에 계산된 loss와 accuracy를 리턴
def generate_and_save_images(model, epoch, it, sample_seeds):
predictions = model(sample_seeds, training=False)
fig = plt.figure(figsize=(4, 4))
for i in range(predictions.shape[0]):
plt.subplot(4, 4, i+1)
plt.imshow(predictions[i, :, :, 0], cmap='gray')
plt.axis('off')
plt.savefig('{}/aiffel/dcgan_newimage/fashion/generated_samples/sample_epoch_{:04d}_iter_{:03d}.png'
.format(os.getenv('HOME'), epoch, it))
plt.show()
from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 15, 6 # matlab 차트의 기본 크기를 15,6으로 지정해 줍니다.
def draw_train_history(history, epoch):
# summarize history for loss
plt.subplot(211)
plt.plot(history['gen_loss'])
plt.plot(history['disc_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('batch iters')
plt.legend(['gen_loss', 'disc_loss'], loc='upper left')
# summarize history for accuracy
plt.subplot(212)
plt.plot(history['fake_accuracy'])
plt.plot(history['real_accuracy'])
plt.title('discriminator accuracy')
plt.ylabel('accuracy')
plt.xlabel('batch iters')
plt.legend(['fake_accuracy', 'real_accuracy'], loc='upper left')
# training_history 디렉토리에 epoch별로 그래프를 이미지 파일로 저장합니다.
plt.savefig('{}/aiffel/dcgan_newimage/fashion/training_history/train_history_{:04d}.png'
.format(os.getenv('HOME'), epoch))
plt.show()
# 중간 체크포인트
checkpoint_dir = os.getenv('HOME')+'/aiffel/dcgan_newimage/fashion/training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(generator_optimizer=generator_optimizer,
discriminator_optimizer=discriminator_optimizer,
generator=generator,
discriminator=discriminator)
학습¶
def train(dataset, epochs, save_every):
start = time.time()
history = {'gen_loss':[], 'disc_loss':[], 'real_accuracy':[], 'fake_accuracy':[]}
for epoch in range(epochs):
epoch_start = time.time()
for it, image_batch in enumerate(dataset):
gen_loss, disc_loss, real_accuracy, fake_accuracy = train_step(image_batch)
history['gen_loss'].append(gen_loss)
history['disc_loss'].append(disc_loss)
history['real_accuracy'].append(real_accuracy)
history['fake_accuracy'].append(fake_accuracy)
if it % 50 == 0:
display.clear_output(wait=True)
generate_and_save_images(generator, epoch+1, it+1, seed)
print('Epoch {} | iter {}'.format(epoch+1, it+1))
print('Time for epoch {} : {} sec'.format(epoch+1, int(time.time()-epoch_start)))
if (epoch + 1) % save_every == 0:
checkpoint.save(file_prefix=checkpoint_prefix)
display.clear_output(wait=True)
generate_and_save_images(generator, epochs, it, seed)
print('Time for training : {} sec'.format(int(time.time()-start)))
draw_train_history(history, epoch)
save_every = 5
EPOCHS = 50
# 사용가능한 GPU 디바이스 확인
tf.config.list_physical_devices("GPU")
[]
%%time
train(train_dataset, EPOCHS, save_every)
Time for training : 5351 sec
CPU times: user 15h 52min 2s, sys: 4min 11s, total: 15h 56min 14s Wall time: 1h 29min 11s
anim_file = os.getenv('HOME')+'/aiffel/dcgan_newimage/fashion/fashion_mnist_dcgan.gif'
with imageio.get_writer(anim_file, mode='I') as writer:
filenames = glob.glob('{}/aiffel/dcgan_newimage/fashion/generated_samples/sample*.png'.format(os.getenv('HOME')))
filenames = sorted(filenames)
last = -1
for i, filename in enumerate(filenames):
frame = 2*(i**0.5)
if round(frame) > round(last):
last = frame
else:
continue
image = imageio.imread(filename)
writer.append_data(image)
image = imageio.imread(filename)
writer.append_data(image)
!ls -l ~/aiffel/dcgan_newimage/fashion/fashion_mnist_dcgan.gif
-rw-rw-r-- 1 ssac24 ssac24 1288212 2월 23 00:24 /home/ssac24/aiffel/dcgan_newimage/fashion/fashion_mnist_dcgan.gif
CIFAR-10 이미지 생성하기¶
- mkdir -p ~/aiffel/dcgan_newimage/cifar10/generated_samples
- mkdir -p ~/aiffel/dcgan_newimage/cifar10/training_checkpoints
- mkdir -p ~/aiffel/dcgan_newimage/cifar10/training_history
cifar10 = tf.keras.datasets.cifar10
(train_x, _), (test_x, _) = cifar10.load_data()
train_x.shape
(50000, 32, 32, 3)
# 정규화
train_x = (train_x - 127.5) / 127.5 # 이미지를 [-1, 1]로 정규화합니다.
print("max pixel:", train_x.max())
print("min pixel:", train_x.min())
max pixel: 1.0 min pixel: -1.0
train_x = train_x.reshape(train_x.shape[0], 32, 32, 3).astype('float32')
train_x.shape
(50000, 32, 32, 3)
plt.figure(figsize=(10, 5))
for i in range(10):
plt.subplot(2, 5, i+1)
plt.imshow(train_x[i].reshape(32, 32, 3))
plt.title(f'index: {i}')
plt.axis('off')
plt.show()
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
# 데이터가 잘 섞이려면 버퍼사이즈는 총 데이터사이즈와 같거나 큰것이 좋다
BUFFER_SIZE = 50000
BATCH_SIZE = 256
# 효율적 학습을 위해 배치사이즈를 잘라 한번에 학습할 양을 구분짓는다
train_dataset = tf.data.Dataset.from_tensor_slices(train_x).shuffle(BUFFER_SIZE).batch(BATCH_SIZE)
def make_generator_model():
# Start
model = tf.keras.Sequential()
# First: Dense layer
model.add(layers.Dense(8*8*256, use_bias=False, input_shape=(100,)))
model.add(layers.BatchNormalization()) # 가중치 값을 정규화
model.add(layers.LeakyReLU())
# Second: Reshape layer
model.add(layers.Reshape((8, 8, 256)))
# Third: Conv2DTranspose layer 일반적인 Conv2D와 반대로 이미지 사이즈를 넓혀주는 층
model.add(layers.Conv2DTranspose(128, kernel_size=(5, 5), strides=(1, 1), padding='same', use_bias=False))
model.add(layers.BatchNormalization())
model.add(layers.LeakyReLU())
# Fourth: Conv2DTranspose layer
model.add(layers.Conv2DTranspose(64, kernel_size=(5, 5), strides=(2, 2), padding='same', use_bias=False))
model.add(layers.BatchNormalization())
model.add(layers.LeakyReLU())
# Fifth: Conv2DTranspose layer
# tanh를 쓴 이유 : -1 ~ 1 이내의 값으로 픽셀값을 정규화시켰던 데이터셋과 동일하게 하기 위함
model.add(layers.Conv2DTranspose(3, kernel_size=(5, 5), strides=(2, 2), padding='same', use_bias=False, \
activation='tanh'))
return model
generator = make_generator_model()
generator.summary()
Model: "sequential_2" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= dense_2 (Dense) (None, 16384) 1638400 _________________________________________________________________ batch_normalization_3 (Batch (None, 16384) 65536 _________________________________________________________________ leaky_re_lu_5 (LeakyReLU) (None, 16384) 0 _________________________________________________________________ reshape_1 (Reshape) (None, 8, 8, 256) 0 _________________________________________________________________ conv2d_transpose_3 (Conv2DTr (None, 8, 8, 128) 819200 _________________________________________________________________ batch_normalization_4 (Batch (None, 8, 8, 128) 512 _________________________________________________________________ leaky_re_lu_6 (LeakyReLU) (None, 8, 8, 128) 0 _________________________________________________________________ conv2d_transpose_4 (Conv2DTr (None, 16, 16, 64) 204800 _________________________________________________________________ batch_normalization_5 (Batch (None, 16, 16, 64) 256 _________________________________________________________________ leaky_re_lu_7 (LeakyReLU) (None, 16, 16, 64) 0 _________________________________________________________________ conv2d_transpose_5 (Conv2DTr (None, 32, 32, 3) 4800 ================================================================= Total params: 2,733,504 Trainable params: 2,700,352 Non-trainable params: 33,152 _________________________________________________________________
# shape=(1, 100)의 형상을 가지는 랜덤 노이즈 벡터를 생성
noise = tf.random.normal([1, 100])
# 학습이 아니기때문에 False값을 주었다
generated_image = generator(noise, training=False)
generated_image.shape
TensorShape([1, 32, 32, 3])
# matplotlib 라이브러리는 2차원 이미지만 출력가능하므로 0번째와 3번째 축의 인덱스를 0으로 설정
plt.imshow(generated_image[0, :, :, 0])
plt.colorbar()
plt.show()
def make_discriminator_model():
# Start
model = tf.keras.Sequential()
# First: Conv2D Layer
model.add(layers.Conv2D(64, (5, 5), strides=(2, 2), padding='same', input_shape=[32, 32, 3]))
model.add(layers.LeakyReLU())
model.add(layers.Dropout(0.3))
# Second: Conv2D Layer
model.add(layers.Conv2D(128, (5, 5), strides=(2, 2), padding='same'))
model.add(layers.LeakyReLU())
model.add(layers.Dropout(0.3))
# Third: Flatten Layer
model.add(layers.Flatten())
# Fourth: Dense Layer
model.add(layers.Dense(3))
return model
discriminator = make_discriminator_model()
discriminator.summary()
Model: "sequential_3" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= conv2d_2 (Conv2D) (None, 16, 16, 64) 4864 _________________________________________________________________ leaky_re_lu_8 (LeakyReLU) (None, 16, 16, 64) 0 _________________________________________________________________ dropout_2 (Dropout) (None, 16, 16, 64) 0 _________________________________________________________________ conv2d_3 (Conv2D) (None, 8, 8, 128) 204928 _________________________________________________________________ leaky_re_lu_9 (LeakyReLU) (None, 8, 8, 128) 0 _________________________________________________________________ dropout_3 (Dropout) (None, 8, 8, 128) 0 _________________________________________________________________ flatten_1 (Flatten) (None, 8192) 0 _________________________________________________________________ dense_3 (Dense) (None, 3) 24579 ================================================================= Total params: 234,371 Trainable params: 234,371 Non-trainable params: 0 _________________________________________________________________
decision = discriminator(generated_image, training=False)
decision
<tf.Tensor: shape=(1, 3), dtype=float32, numpy=array([[-0.00279044, 0.00230897, 0.00076817]], dtype=float32)>
cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=True)
# 생성자 손실함수
# ones_like는 특정 벡터와 동일한 크기이면서 값은 1으로 가득 채워진 벡터를 만들고 싶을 때 사용
def generator_loss(fake_output):
return cross_entropy(tf.ones_like(fake_output), fake_output)
# 판별자 손실함수
# zeros_like는 특정 벡터와 동일한 크기이면서 값은 0으로 가득 채워진 벡터를 만들고 싶을 때 사용
def discriminator_loss(real_output, fake_output):
real_loss = cross_entropy(tf.ones_like(real_output), real_output)
fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output)
total_loss = real_loss + fake_loss
return total_loss
def discriminator_accuracy(real_output, fake_output):
real_accuracy = tf.reduce_mean(tf.cast(tf.math.greater_equal(real_output, tf.constant([0.5])), tf.float32))
fake_accuracy = tf.reduce_mean(tf.cast(tf.math.less(fake_output, tf.constant([0.5])), tf.float32))
return real_accuracy, fake_accuracy
# 생성자와 판별자는 따로 학습을 진행하기때문에 따로 만들어주어야한다
generator_optimizer = tf.keras.optimizers.Adam(1e-4)
discriminator_optimizer = tf.keras.optimizers.Adam(1e-4)
noise_dim = 100
num_examples_to_generate = 16
seed = tf.random.normal([num_examples_to_generate, noise_dim])
seed.shape
TensorShape([16, 100])
@tf.function # 텐서플로우 function을 사용함으로써 Tensorflow의 graph 노드가 될 수 있는 타입으로 자동변환
def train_step(images): #(1) 입력데이터
noise = tf.random.normal([BATCH_SIZE, noise_dim]) #(2) 생성자 입력 노이즈
with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape: #(3) tf.GradientTape() 오픈
generated_images = generator(noise, training=True) #(4) generated_images 생성
#(5) discriminator 판별
real_output = discriminator(images, training=True)
fake_output = discriminator(generated_images, training=True)
#(6) loss 계산
gen_loss = generator_loss(fake_output)
disc_loss = discriminator_loss(real_output, fake_output)
#(7) accuracy 계산
real_accuracy, fake_accuracy = discriminator_accuracy(real_output, fake_output)
#(8) gradient 계산
gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables)
gradients_of_discriminator = disc_tape.gradient(disc_loss, discriminator.trainable_variables)
#(9) 모델 학습
generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))
discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminator.trainable_variables))
return gen_loss, disc_loss, real_accuracy, fake_accuracy #(10) 리턴값
def generate_and_save_images(model, epoch, it, sample_seeds):
predictions = model(sample_seeds, training=False)
fig = plt.figure(figsize=(4, 4))
for i in range(predictions.shape[0]):
plt.subplot(4, 4, i+1)
plt.imshow(predictions[i, :, :, 0])
plt.axis('off')
plt.savefig('{}/aiffel/dcgan_newimage/cifar10/generated_samples/sample_epoch_{:04d}_iter_{:03d}.png'
.format(os.getenv('HOME'), epoch, it))
plt.show()
from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 15, 6 # matlab 차트의 기본 크기를 15,6으로 지정해 줍니다.
def draw_train_history(history, epoch):
# summarize history for loss
plt.subplot(211)
plt.plot(history['gen_loss'])
plt.plot(history['disc_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('batch iters')
plt.legend(['gen_loss', 'disc_loss'], loc='upper left')
# summarize history for accuracy
plt.subplot(212)
plt.plot(history['fake_accuracy'])
plt.plot(history['real_accuracy'])
plt.title('discriminator accuracy')
plt.ylabel('accuracy')
plt.xlabel('batch iters')
plt.legend(['fake_accuracy', 'real_accuracy'], loc='upper left')
# training_history 디렉토리에 epoch별로 그래프를 이미지 파일로 저장합니다.
plt.savefig('{}/aiffel/dcgan_newimage/cifar10/training_history/train_history_{:04d}.png'
.format(os.getenv('HOME'), epoch))
plt.show()
# 중간 체크포인트
checkpoint_dir = os.getenv('HOME')+'/aiffel/dcgan_newimage/cifar10/training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(generator_optimizer=generator_optimizer,
discriminator_optimizer=discriminator_optimizer,
generator=generator,
discriminator=discriminator)
def train(dataset, epochs, save_every):
start = time.time()
history = {'gen_loss':[], 'disc_loss':[], 'real_accuracy':[], 'fake_accuracy':[]}
for epoch in range(epochs):
epoch_start = time.time()
for it, image_batch in enumerate(dataset):
gen_loss, disc_loss, real_accuracy, fake_accuracy = train_step(image_batch)
history['gen_loss'].append(gen_loss)
history['disc_loss'].append(disc_loss)
history['real_accuracy'].append(real_accuracy)
history['fake_accuracy'].append(fake_accuracy)
if it % 50 == 0:
display.clear_output(wait=True)
generate_and_save_images(generator, epoch+1, it+1, seed)
print('Epoch {} | iter {}'.format(epoch+1, it+1))
print('Time for epoch {} : {} sec'.format(epoch+1, int(time.time()-epoch_start)))
if (epoch + 1) % save_every == 0:
checkpoint.save(file_prefix=checkpoint_prefix)
display.clear_output(wait=True)
generate_and_save_images(generator, epochs, it, seed)
print('Time for training : {} sec'.format(int(time.time()-start)))
draw_train_history(history, epoch)
save_every = 5
EPOCHS = 50
# 사용가능한 GPU 디바이스 확인
tf.config.list_physical_devices("GPU")
[]
%%time
train(train_dataset, EPOCHS, save_every)
Time for training : 5740 sec
CPU times: user 17h 15min 5s, sys: 4min 5s, total: 17h 19min 11s Wall time: 1h 35min 41s
anim_file = os.getenv('HOME')+'/aiffel/dcgan_newimage/cifar10/cifar10_mnist_dcgan.gif'
with imageio.get_writer(anim_file, mode='I') as writer:
filenames = glob.glob('{}/aiffel/dcgan_newimage/cifar10/generated_samples/sample*.png'.format(os.getenv('HOME')))
filenames = sorted(filenames)
last = -1
for i, filename in enumerate(filenames):
frame = 2*(i**0.5)
if round(frame) > round(last):
last = frame
else:
continue
image = imageio.imread(filename)
writer.append_data(image)
image = imageio.imread(filename)
writer.append_data(image)
!ls -l ~/aiffel/dcgan_newimage/cifar10/cifar10_mnist_dcgan.gif
-rw-rw-r-- 1 ssac24 ssac24 1333854 2월 23 02:51 /home/ssac24/aiffel/dcgan_newimage/cifar10/cifar10_mnist_dcgan.gif
checkpoint_dir = os.getenv('HOME')+'/aiffel/dcgan_newimage/cifar10/training_checkpoints'
latest = tf.train.latest_checkpoint(checkpoint_dir)
checkpoint.restore(latest)
generator = checkpoint.generator
discriminator = checkpoint.discriminator
# 로드한 모델이 정상적으로 이미지를 생성하는지 확인해 봅니다.
noise = tf.random.normal([1, 100])
generated_image = generator(noise, training=False)
np_generated = generated_image.numpy()
np_generated = (np_generated * 127.5) + 127.5 # reverse of normalization
np_generated = np_generated.astype(int)
plt.imshow(np_generated[0])
plt.show() # 정상적으로 모델이 로드되었다면 랜덤 이미지가 아니라 CIFAR-10 이미지가 그려질 것입니다.
정리¶
- 생성자와 판별자는 모델도 옵티마이저도 따로 만들어야 한다.
- cifar10 같은 RGB 3채널의 이미지는 1채널의 gray보다 고려할 점들이 있었다.
- 손실함수에 있어 생성자는 확장을 하며 1에 수렴하고, 판별자는 축소를 하며 0에 수렴시키기 위해 정규화 과정을 거쳐야한다.
- 이번 모델은 CPU로 학습을 해서인지 시간소요가 굉장히 많았다(GPU로 학습해보라고하는데 어떤 점을 고쳐야하는지 찾지못함)
- 기타 코드에 대한 이해는 중간중간 정리되어있어 따로 적지않았다.
'AIFFEL' 카테고리의 다른 글
Explolation11 텍스트 요약 (1) | 2021.02.23 |
---|---|
Explolation13 주식 가격 예측 (0) | 2021.02.23 |
Explolation10 인물사진 배경바꿔보기 (0) | 2021.02.04 |
Explolation9 캐글 따라해보기 - 주택가격 예측 (0) | 2021.02.02 |
Explolation 8 영화 추천 시스템 만들기 (0) | 2021.01.29 |