【Python】 KerasでConditional DCGANを実装する
前回DCGANを実装しましたが、今回はConditional DCGAN([1411.1784] Conditional Generative Adversarial Nets)を実装します。
DCGANの例は入力からどのような数字が生成されるかコントロールできませんでしたが、Conditional DCGANは付加情報を足すことで生成する数字をコントロールできるようになります(下図のyが付加情報)。
実装にあたりzとyはベクトルなのでGenerator側の実装はイメージが付くのですが、xは画像なのでDiscriminator側の実装が分かりませんでした。そんな時に参考になったのが下記サイトでした。
--- 学習環境 --- Windows10 Home Python 3.5.2 Keras 2.1.2 tensorflow-gpu 1.2.0
実装に移りますが、以下をconditional_dcgan.pyに記述します。
import numpy as np import keras from keras.layers import Input, Dense, Activation, BatchNormalization, Reshape, UpSampling2D, Conv2D, MaxPool2D, Flatten, concatenate, multiply from keras.models import Model class Generator(object): def __init__(self, latent_dim, condition_dim): # latent vector input generator_input1 = Input(shape=(latent_dim,)) # condition input generator_input2 = Input(shape=(condition_dim,)) # concat 2 inputs generator_input = concatenate([generator_input1, generator_input2]) x = Dense(1024)(generator_input) x = Activation('tanh')(x) x = Dense(128*7*7)(x) x = BatchNormalization()(x) x = Activation('tanh')(x) x = Reshape((7, 7, 128))(x) x = UpSampling2D(size=(2, 2))(x) x = Conv2D(64, 5, padding='same')(x) x = Activation('tanh')(x) x = UpSampling2D(size=(2, 2))(x) x = Conv2D(1, 5, padding='same')(x) x = Activation('tanh')(x) # pass condition input to output so we can give it to discriminator self.generator = Model(inputs=[generator_input1, generator_input2], outputs=[x, generator_input2]) def get_model(self): return self.generator class Discriminator(object): def __init__(self, height, width, channels, condition_dim): # real or fake image discriminator_input1 = Input(shape=(height, width, channels)) # condition input from generator discriminator_input2 = Input(shape=(condition_dim,)) # expand dimension from (batch, channel) to (batch, height, width, channel) di2 = Reshape((1, 1, condition_dim))(discriminator_input2) # expand height and width from (1, 1) to (height, width) di2 = UpSampling2D((height, width))(di2) # concat 2 inputs discriminator_input = concatenate([discriminator_input1, di2]) x = Conv2D(64, 5, padding='same')(discriminator_input) x = Activation('tanh')(x) x = MaxPool2D()(x) x = Conv2D(128, 5)(x) x = Activation('tanh')(x) x = MaxPool2D()(x) x = Flatten()(x) x = Dense(1024)(x) x = Activation('tanh')(x) x = Dense(1, activation='sigmoid')(x) self.discriminator = Model(inputs=[discriminator_input1, discriminator_input2], outputs=x) def get_model(self): return self.discriminator class ConditionalDCGAN(object): def __init__(self, latent_dim, height, width, channels, condition_dim): # set generator self._latent_dim = latent_dim g = Generator(latent_dim, condition_dim) self._generator = g.get_model() # set discriminator d = Discriminator(height, width, channels, condition_dim) self._discriminator = d.get_model() # compile discriminator discriminator_optimizer = keras.optimizers.SGD(lr=0.0005, momentum=0.9, nesterov=True) self._discriminator.compile(optimizer=discriminator_optimizer, loss='binary_crossentropy') # disable training when combined with generator self._discriminator.trainable = False # set DCGAN dcgan_input1 = Input(shape=(latent_dim,)) dcgan_input2 = Input(shape=(condition_dim,)) dcgan_output = self._discriminator(self._generator([dcgan_input1, dcgan_input2])) self._dcgan = Model([dcgan_input1, dcgan_input2], dcgan_output) # compile DCGAN dcgan_optimizer = keras.optimizers.SGD(lr=0.0005, momentum=0.9, nesterov=True) self._dcgan.compile(optimizer=dcgan_optimizer, loss='binary_crossentropy') def train(self, real_images, conditions, batch_size): # Train discriminator so it can detect fake random_latent_vectors = np.random.normal(size=(batch_size, self._latent_dim)) generated_images = self._generator.predict([random_latent_vectors, conditions]) labels = np.ones((batch_size, 1)) labels += 0.05 * np.random.random(labels.shape) d_loss1 = self._discriminator.train_on_batch(generated_images, labels) # Train discriminator so it can detect real labels = np.zeros((batch_size, 1)) labels += 0.05 * np.random.random(labels.shape) d_loss2 = self._discriminator.train_on_batch([real_images, conditions], labels) d_loss = (d_loss1 + d_loss2)/2.0 # Train generator so it can fool discriminator random_latent_vectors = np.random.normal(size=(batch_size, self._latent_dim)) misleading_targets = np.zeros((batch_size, 1)) g_loss = self._dcgan.train_on_batch([random_latent_vectors, conditions], misleading_targets) return d_loss, g_loss def predict(self, latent_vector, condition): # return only image (remember generator returns condition too) return self._generator.predict([latent_vector, condition])[0] def load_weights(self, file_path, by_name=False): self._dcgan.load_weights(file_path, by_name) def save_weights(self, file_path, overwrite=True): self._dcgan.save_weights(file_path, overwrite)
次に以下をmain.pyに記述します(予めmain.pyと同じフォルダにgeneratedというフォルダを作成してください)。
import os import numpy as np import keras from conditional_dcgan import ConditionalDCGAN from keras.preprocessing import image from keras.utils.np_utils import to_categorical def normalize(X): return (X - 127.5)/127.5 def denormalize(X): return (X + 1.0)*127.5 def train(latent_dim, height, width, channels, num_class): (X_train, Y_train), (_, _) = keras.datasets.mnist.load_data() Y_train = to_categorical(Y_train, num_class) X_train = X_train.reshape((X_train.shape[0],) + (height, width, channels)).astype('float32') X_train = normalize(X_train) epochs = 50 batch_size = 128 iterations = X_train.shape[0]//batch_size dcgan = ConditionalDCGAN(latent_dim, height, width, channels, num_class) for epoch in range(epochs): for iteration in range(iterations): real_images = X_train[iteration*batch_size:(iteration+1)*batch_size] conditions = Y_train[iteration*batch_size:(iteration+1)*batch_size] d_loss, g_loss = dcgan.train(real_images, conditions, batch_size) if (iteration + 1)%10 == 0: print('discriminator loss:', d_loss) print('generator loss:', g_loss) print() with open('loss.txt', 'a') as f: f.write(str(d_loss) + ',' + str(g_loss) + '\r') if (epoch + 1)%5 == 0: dcgan.save_weights('gan' + '_epoch' + str(epoch + 1) + '.h5') random_latent_vectors = np.random.normal(size=(batch_size, latent_dim)) generated_images = dcgan.predict(random_latent_vectors, conditions) for i, generated_image in enumerate(generated_images): img = denormalize(generated_image) img = image.array_to_img(img, scale=False) condition = np.argmax(conditions[i]) img.save(os.path.join('generated', str(epoch) + '_' + str(condition) + '.png')) print('epoch' + str(epoch) + ' end') print() def predict(latent_dim, height, width, channels, num_class): dcgan = ConditionalDCGAN(latent_dim, height, width, channels, num_class) dcgan.load_weights('gan_epoch50.h5') for num in range(num_class): for id in range(10): random_latent_vectors = np.random.normal(size=(1, latent_dim)) condition = np.zeros((1, num_class), dtype=np.float32) condition[0, num] = 1 generated_images = dcgan.predict(random_latent_vectors, condition) img = image.array_to_img(denormalize(generated_images[0]), scale=False) img.save(os.path.join('generated', str(num) + '_' + str(id) + '.png')) if __name__ == '__main__': latent_dim = 100 height = 28 width = 28 channels = 1 num_class = 10 train(latent_dim, height, width, channels, num_class) predict(latent_dim, height, width, channels, num_class)
生成された画像は以下のようになります。conditionを付加したことで生成する文字をコントロールできています。 最後に学習時のDiscriminatorとGeneratorのロス値の推移も貼っておきます。
【Python】 KerasでDCGANを試す
DCGANでMNISTの手書き数字画像を生成する、ということを今更ながらやりました。元々は"Deep Learning with Python"という書籍にDCGANでCIFAR10のカエル画像を生成させる例があり、それを試してみたのですが、32×32の画像を見ても結果が良く分からなかったので、単純な手書き数字で試してみるかと思ったわけです。
ただ、書籍のコードを使ってMNISTの画像を生成させようとしたのですが、Discriminatorのロス値だけ急激に下がり上手く学習できませんでした(Discriminatorのドロップアウト率を上げたり学習係数を下げたりしたのですが、失敗しました)。結局以下の実装を引用したら上手く学習できました。
--- 学習環境 --- Windows10 Home Python 3.5.2 Keras 2.1.2 tensorflow-gpu 1.2.0
まず以下をdcgan.pyに記述します。
import numpy as np import keras from keras.layers import Input, Dense, Activation, BatchNormalization, Reshape, UpSampling2D, Conv2D, MaxPool2D, Flatten class Generator(object): def __init__(self, latent_dim): generator_input = keras.Input(shape=(latent_dim,)) x = Dense(1024)(generator_input) x = Activation('tanh')(x) x = Dense(128*7*7)(x) x = BatchNormalization()(x) x = Activation('tanh')(x) x = Reshape((7, 7, 128))(x) x = UpSampling2D(size=(2, 2))(x) x = Conv2D(64, 5, padding='same')(x) x = Activation('tanh')(x) x = UpSampling2D(size=(2, 2))(x) x = Conv2D(1, 5, padding='same')(x) x = Activation('tanh')(x) self.generator = keras.models.Model(generator_input, x) def get_model(self): return self.generator class Discriminator(object): def __init__(self, height, width, channels): discriminator_input = Input(shape=(height, width, channels)) x = Conv2D(64, 5, padding='same')(discriminator_input) x = Activation('tanh')(x) x = MaxPool2D()(x) x = Conv2D(128, 5)(x) x = Activation('tanh')(x) x = MaxPool2D()(x) x = Flatten()(x) x = Dense(1024)(x) x = Activation('tanh')(x) x = Dense(1, activation='sigmoid')(x) self.discriminator = keras.models.Model(discriminator_input, x) def get_model(self): return self.discriminator class DCGAN(object): def __init__(self, latent_dim, height, width, channels): # set generator self._latent_dim = latent_dim g = Generator(latent_dim) self._generator = g.get_model() # set discriminator d = Discriminator(height, width, channels) self._discriminator = d.get_model() # compile discriminator discriminator_optimizer = keras.optimizers.SGD(lr=0.0005, momentum=0.9, nesterov=True) self._discriminator.compile(optimizer=discriminator_optimizer, loss='binary_crossentropy') # disable training when combined with generator self._discriminator.trainable = False # set DCGAN dcgan_input = keras.Input(shape=(latent_dim,)) dcgan_output = self._discriminator(self._generator(dcgan_input)) self._dcgan = keras.models.Model(dcgan_input, dcgan_output) # compile DCGAN dcgan_optimizer = keras.optimizers.SGD(lr=0.0005, momentum=0.9, nesterov=True) self._dcgan.compile(optimizer=dcgan_optimizer, loss='binary_crossentropy') def train(self, real_images, batch_size): # Train so discriminator can detect fake random_latent_vectors = np.random.normal(size=(batch_size, self._latent_dim)) generated_images = self._generator.predict(random_latent_vectors) labels = np.ones((batch_size, 1)) labels += 0.05 * np.random.random(labels.shape) d_loss1 = self._discriminator.train_on_batch(generated_images, labels) # Train so discriminator can detect real labels = np.zeros((batch_size, 1)) labels += 0.05 * np.random.random(labels.shape) d_loss2 = self._discriminator.train_on_batch(real_images, labels) d_loss = (d_loss1 + d_loss2)/2.0 # Train so generator can fool discriminator random_latent_vectors = np.random.normal(size=(batch_size, self._latent_dim)) misleading_targets = np.zeros((batch_size, 1)) g_loss = self._dcgan.train_on_batch(random_latent_vectors, misleading_targets) return d_loss, g_loss def predict(self, latent_vector): return self._generator.predict(latent_vector) def load_weights(self, file_path, by_name=False): self._dcgan.load_weights(file_path, by_name) def save_weights(self, file_path, overwrite=True): self._dcgan.save_weights(file_path, overwrite)
次に以下をmain.pyに書きます(predict関数で画像をgeneratedというフォルダに作成するので、予めmain.pyと同じフォルダにgeneratedというフォルダを作成してください)。
import os import numpy as np import keras from dcgan import DCGAN from keras.preprocessing import image # Normalize image from 0 - 255 to -1 - 1 def normalize(X): return (X - 127.5)/127.5 # Denormalize from -1 - 1 to 0 - 255 def denormalize(X): return (X + 1.0)*127.5 def train(latent_dim, height, width, channels): (X_train, Y_train), (_, _) = keras.datasets.mnist.load_data() X_train = X_train.reshape((X_train.shape[0],) + (height, width, channels)).astype('float32') X_train = normalize(X_train) epochs = 20 batch_size = 128 iterations = X_train.shape[0]//batch_size dcgan = DCGAN(latent_dim, height, width, channels) for epoch in range(epochs): for iteration in range(iterations): real_images = X_train[iteration*batch_size:(iteration+1)*batch_size] d_loss, g_loss = dcgan.train(real_images, batch_size) if (iteration + 1)%10 == 0: print('discriminator loss:', d_loss) print('generator loss:', g_loss) print() with open('loss.txt', 'a') as f: f.write(str(d_loss) + ',' + str(g_loss) + '\r') dcgan.save_weights('gan' + '_epoch' + str(epoch + 1) + '.h5') print('epoch' + str(epoch) + ' end') print() def predict(latent_dim, height, width, channels): random_latent_vectors = np.random.normal(size=(100, latent_dim)) dcgan = DCGAN(latent_dim, height, width, channels) dcgan.load_weights('gan_epoch20.h5') generated_images = dcgan.predict(random_latent_vectors) for i, generated_image in enumerate(generated_images): img = image.array_to_img(denormalize(generated_image), scale=False) img.save(os.path.join('generated', str(i) + '.png')) if __name__ == '__main__': latent_dim = 100 height = 28 width = 28 channels = 1 train(latent_dim, height, width, channels) predict(latent_dim, height, width, channels)
ロス値の推移は以下のようになりました。良く見ると中盤以降Generatorのロス値が上昇傾向、Discriminatorのロス値が下降傾向に見えるため、これ以上学習させるとGeneratorが負けておかしな画像が生成され始めるかもしれません。 最後に20エポック学習終わって生成された画像は以下のようになりました。
【WPF】 矢印を描きたい
UIに矢印を描いて欲しいと言われたので、矢印を絵にしてImageに表示させようと思ったのですが、直接描くという方法もあるようです(以下リンク先参照)。
【C#】 レジストリへの書き込み処理でUnauthorizedAccessExceptionが発生する
レジストリにSetValueメソッドで書き込みを行おうとするとレジストリ自体は書き込み可能な設定なのにUnauthorizedAccessExceptionが発生しました。原因は以下の記事にあるように、レジストリを開く時に書き込み可能という事を明示しないといけなかったようです。
using Microsoft.Win32; // こうじゃなくて //var registry = Registry.LocalMachine.OpenSubKey(name); // こうしないとレジストリに書き込みできない var registry = Registry.LocalMachine.OpenSubKey(name, true);
ちなみにC#でレジストリを触る方法は以下にあります。 レジストリへの書き込み、読み込み、削除を行う: .NET Tips: C#, VB.NET
【Python】 Keras開発者の著書
Keras開発者による著書「Deep Learning with Python」が発売されています。今はクーポンコード"CTWNIPS17"で40%オフなのでお得です。
動画と音声を分離/結合する方法のメモ
動画と音声を分離/結合する方法についてメモします。ffmpegというフリーソフトで可能なようです。
動画と音声を分離する
[ffmpeg] 動画から音声を抜き出すには – 端くれプログラマの備忘録