【Python】 KerasでDCGANを試す
DCGANでMNISTの手書き数字画像を生成する、ということを今更ながらやりました。元々は"Deep Learning with Python"という書籍にDCGANでCIFAR10のカエル画像を生成させる例があり、それを試してみたのですが、32×32の画像を見ても結果が良く分からなかったので、単純な手書き数字で試してみるかと思ったわけです。
ただ、書籍のコードを使ってMNISTの画像を生成させようとしたのですが、Discriminatorのロス値だけ急激に下がり上手く学習できませんでした(Discriminatorのドロップアウト率を上げたり学習係数を下げたりしたのですが、失敗しました)。結局以下の実装を引用したら上手く学習できました。
--- 学習環境 --- Windows10 Home Python 3.5.2 Keras 2.1.2 tensorflow-gpu 1.2.0
まず以下をdcgan.pyに記述します。
import numpy as np import keras from keras.layers import Input, Dense, Activation, BatchNormalization, Reshape, UpSampling2D, Conv2D, MaxPool2D, Flatten class Generator(object): def __init__(self, latent_dim): generator_input = keras.Input(shape=(latent_dim,)) x = Dense(1024)(generator_input) x = Activation('tanh')(x) x = Dense(128*7*7)(x) x = BatchNormalization()(x) x = Activation('tanh')(x) x = Reshape((7, 7, 128))(x) x = UpSampling2D(size=(2, 2))(x) x = Conv2D(64, 5, padding='same')(x) x = Activation('tanh')(x) x = UpSampling2D(size=(2, 2))(x) x = Conv2D(1, 5, padding='same')(x) x = Activation('tanh')(x) self.generator = keras.models.Model(generator_input, x) def get_model(self): return self.generator class Discriminator(object): def __init__(self, height, width, channels): discriminator_input = Input(shape=(height, width, channels)) x = Conv2D(64, 5, padding='same')(discriminator_input) x = Activation('tanh')(x) x = MaxPool2D()(x) x = Conv2D(128, 5)(x) x = Activation('tanh')(x) x = MaxPool2D()(x) x = Flatten()(x) x = Dense(1024)(x) x = Activation('tanh')(x) x = Dense(1, activation='sigmoid')(x) self.discriminator = keras.models.Model(discriminator_input, x) def get_model(self): return self.discriminator class DCGAN(object): def __init__(self, latent_dim, height, width, channels): # set generator self._latent_dim = latent_dim g = Generator(latent_dim) self._generator = g.get_model() # set discriminator d = Discriminator(height, width, channels) self._discriminator = d.get_model() # compile discriminator discriminator_optimizer = keras.optimizers.SGD(lr=0.0005, momentum=0.9, nesterov=True) self._discriminator.compile(optimizer=discriminator_optimizer, loss='binary_crossentropy') # disable training when combined with generator self._discriminator.trainable = False # set DCGAN dcgan_input = keras.Input(shape=(latent_dim,)) dcgan_output = self._discriminator(self._generator(dcgan_input)) self._dcgan = keras.models.Model(dcgan_input, dcgan_output) # compile DCGAN dcgan_optimizer = keras.optimizers.SGD(lr=0.0005, momentum=0.9, nesterov=True) self._dcgan.compile(optimizer=dcgan_optimizer, loss='binary_crossentropy') def train(self, real_images, batch_size): # Train so discriminator can detect fake random_latent_vectors = np.random.normal(size=(batch_size, self._latent_dim)) generated_images = self._generator.predict(random_latent_vectors) labels = np.ones((batch_size, 1)) labels += 0.05 * np.random.random(labels.shape) d_loss1 = self._discriminator.train_on_batch(generated_images, labels) # Train so discriminator can detect real labels = np.zeros((batch_size, 1)) labels += 0.05 * np.random.random(labels.shape) d_loss2 = self._discriminator.train_on_batch(real_images, labels) d_loss = (d_loss1 + d_loss2)/2.0 # Train so generator can fool discriminator random_latent_vectors = np.random.normal(size=(batch_size, self._latent_dim)) misleading_targets = np.zeros((batch_size, 1)) g_loss = self._dcgan.train_on_batch(random_latent_vectors, misleading_targets) return d_loss, g_loss def predict(self, latent_vector): return self._generator.predict(latent_vector) def load_weights(self, file_path, by_name=False): self._dcgan.load_weights(file_path, by_name) def save_weights(self, file_path, overwrite=True): self._dcgan.save_weights(file_path, overwrite)
次に以下をmain.pyに書きます(predict関数で画像をgeneratedというフォルダに作成するので、予めmain.pyと同じフォルダにgeneratedというフォルダを作成してください)。
import os import numpy as np import keras from dcgan import DCGAN from keras.preprocessing import image # Normalize image from 0 - 255 to -1 - 1 def normalize(X): return (X - 127.5)/127.5 # Denormalize from -1 - 1 to 0 - 255 def denormalize(X): return (X + 1.0)*127.5 def train(latent_dim, height, width, channels): (X_train, Y_train), (_, _) = keras.datasets.mnist.load_data() X_train = X_train.reshape((X_train.shape[0],) + (height, width, channels)).astype('float32') X_train = normalize(X_train) epochs = 20 batch_size = 128 iterations = X_train.shape[0]//batch_size dcgan = DCGAN(latent_dim, height, width, channels) for epoch in range(epochs): for iteration in range(iterations): real_images = X_train[iteration*batch_size:(iteration+1)*batch_size] d_loss, g_loss = dcgan.train(real_images, batch_size) if (iteration + 1)%10 == 0: print('discriminator loss:', d_loss) print('generator loss:', g_loss) print() with open('loss.txt', 'a') as f: f.write(str(d_loss) + ',' + str(g_loss) + '\r') dcgan.save_weights('gan' + '_epoch' + str(epoch + 1) + '.h5') print('epoch' + str(epoch) + ' end') print() def predict(latent_dim, height, width, channels): random_latent_vectors = np.random.normal(size=(100, latent_dim)) dcgan = DCGAN(latent_dim, height, width, channels) dcgan.load_weights('gan_epoch20.h5') generated_images = dcgan.predict(random_latent_vectors) for i, generated_image in enumerate(generated_images): img = image.array_to_img(denormalize(generated_image), scale=False) img.save(os.path.join('generated', str(i) + '.png')) if __name__ == '__main__': latent_dim = 100 height = 28 width = 28 channels = 1 train(latent_dim, height, width, channels) predict(latent_dim, height, width, channels)
ロス値の推移は以下のようになりました。良く見ると中盤以降Generatorのロス値が上昇傾向、Discriminatorのロス値が下降傾向に見えるため、これ以上学習させるとGeneratorが負けておかしな画像が生成され始めるかもしれません。 最後に20エポック学習終わって生成された画像は以下のようになりました。