旅行好きなソフトエンジニアの備忘録

プログラミングや技術関連のメモを始めました

【Python】 KerasでDCGANを試す

DCGANでMNISTの手書き数字画像を生成する、ということを今更ながらやりました。元々は"Deep Learning with Python"という書籍にDCGANでCIFAR10のカエル画像を生成させる例があり、それを試してみたのですが、32×32の画像を見ても結果が良く分からなかったので、単純な手書き数字で試してみるかと思ったわけです。

www.manning.com

ただ、書籍のコードを使ってMNISTの画像を生成させようとしたのですが、Discriminatorのロス値だけ急激に下がり上手く学習できませんでした(Discriminatorのドロップアウト率を上げたり学習係数を下げたりしたのですが、失敗しました)。結局以下の実装を引用したら上手く学習できました。

github.com

--- 学習環境 ---
Windows10 Home
Python 3.5.2
Keras 2.1.2
tensorflow-gpu 1.2.0

まず以下をdcgan.pyに記述します。

import numpy as np
import keras
from keras.layers import Input, Dense, Activation, BatchNormalization, Reshape, UpSampling2D, Conv2D, MaxPool2D, Flatten

class Generator(object):
    def __init__(self, latent_dim):
        generator_input = keras.Input(shape=(latent_dim,))
        x = Dense(1024)(generator_input)
        x = Activation('tanh')(x)
        x = Dense(128*7*7)(x)
        x = BatchNormalization()(x)
        x = Activation('tanh')(x)
        x = Reshape((7, 7, 128))(x)
        x = UpSampling2D(size=(2, 2))(x)
        x = Conv2D(64, 5, padding='same')(x)
        x = Activation('tanh')(x)
        x = UpSampling2D(size=(2, 2))(x)
        x = Conv2D(1, 5, padding='same')(x)
        x = Activation('tanh')(x)
        self.generator = keras.models.Model(generator_input, x)

    def get_model(self):
        return self.generator


class Discriminator(object):
    def __init__(self, height, width, channels):
        discriminator_input = Input(shape=(height, width, channels))
        x = Conv2D(64, 5, padding='same')(discriminator_input)
        x = Activation('tanh')(x)
        x = MaxPool2D()(x)
        x = Conv2D(128, 5)(x)
        x = Activation('tanh')(x)
        x = MaxPool2D()(x)
        x = Flatten()(x)
        x = Dense(1024)(x)
        x = Activation('tanh')(x)
        x = Dense(1, activation='sigmoid')(x)
        self.discriminator = keras.models.Model(discriminator_input, x)

    def get_model(self):
        return self.discriminator


class DCGAN(object):
    def __init__(self, latent_dim, height, width, channels):
        # set generator
        self._latent_dim = latent_dim
        g = Generator(latent_dim)
        self._generator = g.get_model()
        # set discriminator
        d = Discriminator(height, width, channels)
        self._discriminator = d.get_model()
        # compile discriminator
        discriminator_optimizer = keras.optimizers.SGD(lr=0.0005, momentum=0.9, nesterov=True)
        self._discriminator.compile(optimizer=discriminator_optimizer, loss='binary_crossentropy')
        # disable training when combined with generator
        self._discriminator.trainable = False
        # set DCGAN
        dcgan_input = keras.Input(shape=(latent_dim,))
        dcgan_output = self._discriminator(self._generator(dcgan_input))
        self._dcgan = keras.models.Model(dcgan_input, dcgan_output)
        # compile DCGAN
        dcgan_optimizer = keras.optimizers.SGD(lr=0.0005, momentum=0.9, nesterov=True)
        self._dcgan.compile(optimizer=dcgan_optimizer, loss='binary_crossentropy')

    def train(self, real_images, batch_size):
        # Train so discriminator can detect fake
        random_latent_vectors = np.random.normal(size=(batch_size, self._latent_dim))
        generated_images = self._generator.predict(random_latent_vectors)
        labels = np.ones((batch_size, 1))
        labels += 0.05 * np.random.random(labels.shape)
        d_loss1 = self._discriminator.train_on_batch(generated_images, labels)
        # Train so discriminator can detect real
        labels = np.zeros((batch_size, 1))
        labels += 0.05 * np.random.random(labels.shape)
        d_loss2 = self._discriminator.train_on_batch(real_images, labels)
        d_loss = (d_loss1 + d_loss2)/2.0
        # Train so generator can fool discriminator
        random_latent_vectors = np.random.normal(size=(batch_size, self._latent_dim))
        misleading_targets = np.zeros((batch_size, 1))
        g_loss = self._dcgan.train_on_batch(random_latent_vectors, misleading_targets)
        return d_loss, g_loss

    def predict(self, latent_vector):
        return self._generator.predict(latent_vector)

    def load_weights(self, file_path, by_name=False):
        self._dcgan.load_weights(file_path, by_name)

    def save_weights(self, file_path, overwrite=True):
        self._dcgan.save_weights(file_path, overwrite)


次に以下をmain.pyに書きます(predict関数で画像をgeneratedというフォルダに作成するので、予めmain.pyと同じフォルダにgeneratedというフォルダを作成してください)。

import os
import numpy as np
import keras
from dcgan import DCGAN
from keras.preprocessing import image

# Normalize image from 0 - 255 to -1 - 1
def normalize(X):
    return (X - 127.5)/127.5

# Denormalize from -1 - 1 to 0 - 255
def denormalize(X):
    return (X + 1.0)*127.5

def train(latent_dim, height, width, channels):
    (X_train, Y_train), (_, _) = keras.datasets.mnist.load_data()
    X_train = X_train.reshape((X_train.shape[0],) + (height, width, channels)).astype('float32')
    X_train = normalize(X_train)
    epochs = 20
    batch_size = 128
    iterations = X_train.shape[0]//batch_size
    dcgan = DCGAN(latent_dim, height, width, channels)
    for epoch in range(epochs):
        for iteration in range(iterations):
            real_images = X_train[iteration*batch_size:(iteration+1)*batch_size]
            d_loss, g_loss = dcgan.train(real_images, batch_size)
            if (iteration + 1)%10 == 0:
                print('discriminator loss:', d_loss)
                print('generator loss:', g_loss)
                print()
                with open('loss.txt', 'a') as f:
                    f.write(str(d_loss) + ',' + str(g_loss) + '\r')
        dcgan.save_weights('gan' + '_epoch' + str(epoch + 1) + '.h5')
        print('epoch' + str(epoch) + ' end')
        print()

def predict(latent_dim, height, width, channels):
    random_latent_vectors = np.random.normal(size=(100, latent_dim))
    dcgan = DCGAN(latent_dim, height, width, channels)
    dcgan.load_weights('gan_epoch20.h5')
    generated_images = dcgan.predict(random_latent_vectors)
    for i, generated_image in enumerate(generated_images):
        img = image.array_to_img(denormalize(generated_image), scale=False)
        img.save(os.path.join('generated', str(i) + '.png'))

if __name__ == '__main__':
    latent_dim = 100
    height = 28
    width = 28
    channels = 1
    train(latent_dim, height, width, channels)
    predict(latent_dim, height, width, channels)


ロス値の推移は以下のようになりました。良く見ると中盤以降Generatorのロス値が上昇傾向、Discriminatorのロス値が下降傾向に見えるため、これ以上学習させるとGeneratorが負けておかしな画像が生成され始めるかもしれません。
f:id:ni4muraano:20171224233835p:plain
最後に20エポック学習終わって生成された画像は以下のようになりました。
f:id:ni4muraano:20171224235301p:plain