【Python】 実行スクリプトからの相対パスでファイルにアクセスする
以下のサイトに方法が書いてありました。
【Python】 PyTorchで自前のロス関数を定義する
Kerasと違ってPyTorchで自前のロス関数を定義するのは大変かなと思ったのですが、Kerasとほぼ同じやり方で出来ました。
#1. ロス関数を定義して def dice_coef_loss(input, target): small_value = 1e-4 input_flattened = input.view(-1) target_flattened = target.view(-1) intersection = torch.sum(input_flattened * target_flattened) dice_coef = (2.0*intersection + small_value)/(torch.sum(input_flattened) + torch.sum(target_flattened) + small_value) return 1.0 - dice_coef #2. backwardするだけ(outputs, labelsは共にVariable) outputs = model(X) loss = dice_coef_loss(outputs, labels) loss.backward()
【Python】 Perceptual Hashを使って画像の類似度を調べる
一年くらい前にヒストグラムを使って画像の類似度を調べる方法をメモしていたのですが、今回はそれとは別の方法を見つけたのでその記事をメモしておきます。 ni4muraano.hatenablog.com
上記の記事で取り上げられているimagehashというライブラリは以下に使い方が書いてあり、インストールは普通に"pip install imagehash"で出来ました。 github.com
【Python】 VAE(Variational Auto Encoder)の写経
書籍「Deep Learning with Python」にMNISTを用いたVAEの実装があったので写経します(書籍では一つのファイルに全部書くスタイルだったので、VAEクラスを作ったりしました)。
VAEの解説は以下が詳しいです。 qiita.com
実装ですが、まずは以下をvae.pyに書きます。
import numpy as np from keras import Input from keras.layers import Conv2D, Flatten, Dense, Lambda, Reshape, Conv2DTranspose, Layer from keras.models import Model from keras.metrics import binary_crossentropy import keras.backend as K class CustomVariationalLayer(Layer): def set_z_mean(self, z_mean): self._z_mean = z_mean def set_z_log_var(self, z_log_var): self._z_log_var = z_log_var def _vae_loss(self, x, z_decoded): x = K.flatten(x) z_decoded = K.flatten(z_decoded) reconstruction_loss = binary_crossentropy(x, z_decoded) regularization_parameter = -5e-4 * self._compute_KL_divergence(self._z_mean, self._z_log_var) return K.mean(reconstruction_loss + regularization_parameter) def _compute_KL_divergence(self, z_mean, z_log_var): return K.mean(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1) def call(self, inputs): x = inputs[0] z_decoded = inputs[1] loss = self._vae_loss(x, z_decoded) self.add_loss(loss, inputs=inputs) return x class VAE(object): def __init__(self, image_shape, latent_dim): self._latent_dim = latent_dim # Encoding input_img = Input(shape=image_shape) x = Conv2D(32, 3, padding='same', activation='relu')(input_img) x = Conv2D(64, 3, padding='same', activation='relu', strides=(2, 2))(x) x = Conv2D(64, 3, padding='same', activation='relu')(x) x = Conv2D(64, 3, padding='same', activation='relu')(x) shape_before_flattening = K.int_shape(x) x = Flatten()(x) x = Dense(32, activation='relu')(x) z_mean = Dense(latent_dim)(x) z_log_var = Dense(latent_dim)(x) # Sampling z = Lambda(self._sampling)([z_mean, z_log_var]) # Decoding decoder_input = Input(K.int_shape(z)[1:]) x = Dense(np.prod(shape_before_flattening[1:]), activation='relu')(decoder_input) x = Reshape(shape_before_flattening[1:])(x) x = Conv2DTranspose(32, 3, padding='same', activation='relu', strides=(2, 2))(x) x = Conv2D(1, 3, padding='same', activation='sigmoid')(x) self._decoder = Model(inputs=decoder_input, outputs=x) z_decoded = self._decoder(z) l = CustomVariationalLayer() l.set_z_mean(z_mean) l.set_z_log_var(z_log_var) y = l([input_img, z_decoded]) self._vae = Model(input_img, y) def _sampling(self, args): z_mean, z_log_var = args epsilon = K.random_normal(shape=(K.shape(z_mean)[0], self._latent_dim), mean=0.0, stddev=1.0) return z_mean + K.exp(z_log_var)*epsilon def get_model(self): return self._vae def get_decoder(self): return self._decoder
後は以下をmain.pyに書けばVAEに文字を生成させることができます。
import numpy as np import matplotlib.pyplot as plt from scipy.stats import norm from keras.optimizers import RMSprop from keras.datasets import mnist from vae import VAE img_shape = (28, 28, 1) batch_size = 32 latent_dim = 2 (x_train, _), (x_test, y_test) = mnist.load_data() x_train = x_train.astype('float32')/255.0 x_train = x_train.reshape(x_train.shape + (1,)) x_test = x_test.astype('float32')/255.0 x_test = x_test.reshape(x_test.shape + (1,)) vae = VAE(img_shape, latent_dim) decoder = vae.get_decoder() vae = vae.get_model() vae.compile(optimizer=RMSprop(), loss=None) history = vae.fit(x=x_train, y=None, shuffle=True, epochs=10, batch_size=batch_size) with open('loss.txt', 'a') as f: for loss in history.history['loss']: f.write(str(loss) + '\r') n = 15 digit_size = 28 figure = np.zeros((digit_size*n, digit_size*n)) grid_x = norm.ppf(np.linspace(0.05, 0.95, n)) grid_y = norm.ppf(np.linspace(0.05, 0.95, n)) for i, yi in enumerate(grid_y): for j, xi in enumerate(grid_x): z_sample = np.array([[xi, yi]]) z_sample = np.tile(z_sample, batch_size).reshape(batch_size, 2) x_decoded = decoder.predict(z_sample, batch_size) digit = x_decoded[0].reshape(digit_size, digit_size) figure[i*digit_size:(i+1)*digit_size, j*digit_size:(j+1)*digit_size] = digit plt.figure(figsize=(10, 10)) plt.imshow(figure, cmap='Greys_r') plt.show()
plt.show()を実行したところで以下のような図が描画されます。
【WPF】 バインドした数値の表示する桁数を指定したい
TextBoxに表示する数値データを小数点一桁目までしか表示させたくなかったのですが、以下のようにStringFormatを利用することで指定可能でした(ここを参考)。
<TextBox Text="{Binding Value, StringFormat={}{0:N1}}"/>
【Python】 KerasでConditional DCGANを実装する
前回DCGANを実装しましたが、今回はConditional DCGAN([1411.1784] Conditional Generative Adversarial Nets)を実装します。
DCGANの例は入力からどのような数字が生成されるかコントロールできませんでしたが、Conditional DCGANは付加情報を足すことで生成する数字をコントロールできるようになります(下図のyが付加情報)。
実装にあたりzとyはベクトルなのでGenerator側の実装はイメージが付くのですが、xは画像なのでDiscriminator側の実装が分かりませんでした。そんな時に参考になったのが下記サイトでした。
--- 学習環境 --- Windows10 Home Python 3.5.2 Keras 2.1.2 tensorflow-gpu 1.2.0
実装に移りますが、以下をconditional_dcgan.pyに記述します。
import numpy as np import keras from keras.layers import Input, Dense, Activation, BatchNormalization, Reshape, UpSampling2D, Conv2D, MaxPool2D, Flatten, concatenate, multiply from keras.models import Model class Generator(object): def __init__(self, latent_dim, condition_dim): # latent vector input generator_input1 = Input(shape=(latent_dim,)) # condition input generator_input2 = Input(shape=(condition_dim,)) # concat 2 inputs generator_input = concatenate([generator_input1, generator_input2]) x = Dense(1024)(generator_input) x = Activation('tanh')(x) x = Dense(128*7*7)(x) x = BatchNormalization()(x) x = Activation('tanh')(x) x = Reshape((7, 7, 128))(x) x = UpSampling2D(size=(2, 2))(x) x = Conv2D(64, 5, padding='same')(x) x = Activation('tanh')(x) x = UpSampling2D(size=(2, 2))(x) x = Conv2D(1, 5, padding='same')(x) x = Activation('tanh')(x) # pass condition input to output so we can give it to discriminator self.generator = Model(inputs=[generator_input1, generator_input2], outputs=[x, generator_input2]) def get_model(self): return self.generator class Discriminator(object): def __init__(self, height, width, channels, condition_dim): # real or fake image discriminator_input1 = Input(shape=(height, width, channels)) # condition input from generator discriminator_input2 = Input(shape=(condition_dim,)) # expand dimension from (batch, channel) to (batch, height, width, channel) di2 = Reshape((1, 1, condition_dim))(discriminator_input2) # expand height and width from (1, 1) to (height, width) di2 = UpSampling2D((height, width))(di2) # concat 2 inputs discriminator_input = concatenate([discriminator_input1, di2]) x = Conv2D(64, 5, padding='same')(discriminator_input) x = Activation('tanh')(x) x = MaxPool2D()(x) x = Conv2D(128, 5)(x) x = Activation('tanh')(x) x = MaxPool2D()(x) x = Flatten()(x) x = Dense(1024)(x) x = Activation('tanh')(x) x = Dense(1, activation='sigmoid')(x) self.discriminator = Model(inputs=[discriminator_input1, discriminator_input2], outputs=x) def get_model(self): return self.discriminator class ConditionalDCGAN(object): def __init__(self, latent_dim, height, width, channels, condition_dim): # set generator self._latent_dim = latent_dim g = Generator(latent_dim, condition_dim) self._generator = g.get_model() # set discriminator d = Discriminator(height, width, channels, condition_dim) self._discriminator = d.get_model() # compile discriminator discriminator_optimizer = keras.optimizers.SGD(lr=0.0005, momentum=0.9, nesterov=True) self._discriminator.compile(optimizer=discriminator_optimizer, loss='binary_crossentropy') # disable training when combined with generator self._discriminator.trainable = False # set DCGAN dcgan_input1 = Input(shape=(latent_dim,)) dcgan_input2 = Input(shape=(condition_dim,)) dcgan_output = self._discriminator(self._generator([dcgan_input1, dcgan_input2])) self._dcgan = Model([dcgan_input1, dcgan_input2], dcgan_output) # compile DCGAN dcgan_optimizer = keras.optimizers.SGD(lr=0.0005, momentum=0.9, nesterov=True) self._dcgan.compile(optimizer=dcgan_optimizer, loss='binary_crossentropy') def train(self, real_images, conditions, batch_size): # Train discriminator so it can detect fake random_latent_vectors = np.random.normal(size=(batch_size, self._latent_dim)) generated_images = self._generator.predict([random_latent_vectors, conditions]) labels = np.ones((batch_size, 1)) labels += 0.05 * np.random.random(labels.shape) d_loss1 = self._discriminator.train_on_batch(generated_images, labels) # Train discriminator so it can detect real labels = np.zeros((batch_size, 1)) labels += 0.05 * np.random.random(labels.shape) d_loss2 = self._discriminator.train_on_batch([real_images, conditions], labels) d_loss = (d_loss1 + d_loss2)/2.0 # Train generator so it can fool discriminator random_latent_vectors = np.random.normal(size=(batch_size, self._latent_dim)) misleading_targets = np.zeros((batch_size, 1)) g_loss = self._dcgan.train_on_batch([random_latent_vectors, conditions], misleading_targets) return d_loss, g_loss def predict(self, latent_vector, condition): # return only image (remember generator returns condition too) return self._generator.predict([latent_vector, condition])[0] def load_weights(self, file_path, by_name=False): self._dcgan.load_weights(file_path, by_name) def save_weights(self, file_path, overwrite=True): self._dcgan.save_weights(file_path, overwrite)
次に以下をmain.pyに記述します(予めmain.pyと同じフォルダにgeneratedというフォルダを作成してください)。
import os import numpy as np import keras from conditional_dcgan import ConditionalDCGAN from keras.preprocessing import image from keras.utils.np_utils import to_categorical def normalize(X): return (X - 127.5)/127.5 def denormalize(X): return (X + 1.0)*127.5 def train(latent_dim, height, width, channels, num_class): (X_train, Y_train), (_, _) = keras.datasets.mnist.load_data() Y_train = to_categorical(Y_train, num_class) X_train = X_train.reshape((X_train.shape[0],) + (height, width, channels)).astype('float32') X_train = normalize(X_train) epochs = 50 batch_size = 128 iterations = X_train.shape[0]//batch_size dcgan = ConditionalDCGAN(latent_dim, height, width, channels, num_class) for epoch in range(epochs): for iteration in range(iterations): real_images = X_train[iteration*batch_size:(iteration+1)*batch_size] conditions = Y_train[iteration*batch_size:(iteration+1)*batch_size] d_loss, g_loss = dcgan.train(real_images, conditions, batch_size) if (iteration + 1)%10 == 0: print('discriminator loss:', d_loss) print('generator loss:', g_loss) print() with open('loss.txt', 'a') as f: f.write(str(d_loss) + ',' + str(g_loss) + '\r') if (epoch + 1)%5 == 0: dcgan.save_weights('gan' + '_epoch' + str(epoch + 1) + '.h5') random_latent_vectors = np.random.normal(size=(batch_size, latent_dim)) generated_images = dcgan.predict(random_latent_vectors, conditions) for i, generated_image in enumerate(generated_images): img = denormalize(generated_image) img = image.array_to_img(img, scale=False) condition = np.argmax(conditions[i]) img.save(os.path.join('generated', str(epoch) + '_' + str(condition) + '.png')) print('epoch' + str(epoch) + ' end') print() def predict(latent_dim, height, width, channels, num_class): dcgan = ConditionalDCGAN(latent_dim, height, width, channels, num_class) dcgan.load_weights('gan_epoch50.h5') for num in range(num_class): for id in range(10): random_latent_vectors = np.random.normal(size=(1, latent_dim)) condition = np.zeros((1, num_class), dtype=np.float32) condition[0, num] = 1 generated_images = dcgan.predict(random_latent_vectors, condition) img = image.array_to_img(denormalize(generated_images[0]), scale=False) img.save(os.path.join('generated', str(num) + '_' + str(id) + '.png')) if __name__ == '__main__': latent_dim = 100 height = 28 width = 28 channels = 1 num_class = 10 train(latent_dim, height, width, channels, num_class) predict(latent_dim, height, width, channels, num_class)
生成された画像は以下のようになります。conditionを付加したことで生成する文字をコントロールできています。 最後に学習時のDiscriminatorとGeneratorのロス値の推移も貼っておきます。
【Python】 KerasでDCGANを試す
DCGANでMNISTの手書き数字画像を生成する、ということを今更ながらやりました。元々は"Deep Learning with Python"という書籍にDCGANでCIFAR10のカエル画像を生成させる例があり、それを試してみたのですが、32×32の画像を見ても結果が良く分からなかったので、単純な手書き数字で試してみるかと思ったわけです。
ただ、書籍のコードを使ってMNISTの画像を生成させようとしたのですが、Discriminatorのロス値だけ急激に下がり上手く学習できませんでした(Discriminatorのドロップアウト率を上げたり学習係数を下げたりしたのですが、失敗しました)。結局以下の実装を引用したら上手く学習できました。
--- 学習環境 --- Windows10 Home Python 3.5.2 Keras 2.1.2 tensorflow-gpu 1.2.0
まず以下をdcgan.pyに記述します。
import numpy as np import keras from keras.layers import Input, Dense, Activation, BatchNormalization, Reshape, UpSampling2D, Conv2D, MaxPool2D, Flatten class Generator(object): def __init__(self, latent_dim): generator_input = keras.Input(shape=(latent_dim,)) x = Dense(1024)(generator_input) x = Activation('tanh')(x) x = Dense(128*7*7)(x) x = BatchNormalization()(x) x = Activation('tanh')(x) x = Reshape((7, 7, 128))(x) x = UpSampling2D(size=(2, 2))(x) x = Conv2D(64, 5, padding='same')(x) x = Activation('tanh')(x) x = UpSampling2D(size=(2, 2))(x) x = Conv2D(1, 5, padding='same')(x) x = Activation('tanh')(x) self.generator = keras.models.Model(generator_input, x) def get_model(self): return self.generator class Discriminator(object): def __init__(self, height, width, channels): discriminator_input = Input(shape=(height, width, channels)) x = Conv2D(64, 5, padding='same')(discriminator_input) x = Activation('tanh')(x) x = MaxPool2D()(x) x = Conv2D(128, 5)(x) x = Activation('tanh')(x) x = MaxPool2D()(x) x = Flatten()(x) x = Dense(1024)(x) x = Activation('tanh')(x) x = Dense(1, activation='sigmoid')(x) self.discriminator = keras.models.Model(discriminator_input, x) def get_model(self): return self.discriminator class DCGAN(object): def __init__(self, latent_dim, height, width, channels): # set generator self._latent_dim = latent_dim g = Generator(latent_dim) self._generator = g.get_model() # set discriminator d = Discriminator(height, width, channels) self._discriminator = d.get_model() # compile discriminator discriminator_optimizer = keras.optimizers.SGD(lr=0.0005, momentum=0.9, nesterov=True) self._discriminator.compile(optimizer=discriminator_optimizer, loss='binary_crossentropy') # disable training when combined with generator self._discriminator.trainable = False # set DCGAN dcgan_input = keras.Input(shape=(latent_dim,)) dcgan_output = self._discriminator(self._generator(dcgan_input)) self._dcgan = keras.models.Model(dcgan_input, dcgan_output) # compile DCGAN dcgan_optimizer = keras.optimizers.SGD(lr=0.0005, momentum=0.9, nesterov=True) self._dcgan.compile(optimizer=dcgan_optimizer, loss='binary_crossentropy') def train(self, real_images, batch_size): # Train so discriminator can detect fake random_latent_vectors = np.random.normal(size=(batch_size, self._latent_dim)) generated_images = self._generator.predict(random_latent_vectors) labels = np.ones((batch_size, 1)) labels += 0.05 * np.random.random(labels.shape) d_loss1 = self._discriminator.train_on_batch(generated_images, labels) # Train so discriminator can detect real labels = np.zeros((batch_size, 1)) labels += 0.05 * np.random.random(labels.shape) d_loss2 = self._discriminator.train_on_batch(real_images, labels) d_loss = (d_loss1 + d_loss2)/2.0 # Train so generator can fool discriminator random_latent_vectors = np.random.normal(size=(batch_size, self._latent_dim)) misleading_targets = np.zeros((batch_size, 1)) g_loss = self._dcgan.train_on_batch(random_latent_vectors, misleading_targets) return d_loss, g_loss def predict(self, latent_vector): return self._generator.predict(latent_vector) def load_weights(self, file_path, by_name=False): self._dcgan.load_weights(file_path, by_name) def save_weights(self, file_path, overwrite=True): self._dcgan.save_weights(file_path, overwrite)
次に以下をmain.pyに書きます(predict関数で画像をgeneratedというフォルダに作成するので、予めmain.pyと同じフォルダにgeneratedというフォルダを作成してください)。
import os import numpy as np import keras from dcgan import DCGAN from keras.preprocessing import image # Normalize image from 0 - 255 to -1 - 1 def normalize(X): return (X - 127.5)/127.5 # Denormalize from -1 - 1 to 0 - 255 def denormalize(X): return (X + 1.0)*127.5 def train(latent_dim, height, width, channels): (X_train, Y_train), (_, _) = keras.datasets.mnist.load_data() X_train = X_train.reshape((X_train.shape[0],) + (height, width, channels)).astype('float32') X_train = normalize(X_train) epochs = 20 batch_size = 128 iterations = X_train.shape[0]//batch_size dcgan = DCGAN(latent_dim, height, width, channels) for epoch in range(epochs): for iteration in range(iterations): real_images = X_train[iteration*batch_size:(iteration+1)*batch_size] d_loss, g_loss = dcgan.train(real_images, batch_size) if (iteration + 1)%10 == 0: print('discriminator loss:', d_loss) print('generator loss:', g_loss) print() with open('loss.txt', 'a') as f: f.write(str(d_loss) + ',' + str(g_loss) + '\r') dcgan.save_weights('gan' + '_epoch' + str(epoch + 1) + '.h5') print('epoch' + str(epoch) + ' end') print() def predict(latent_dim, height, width, channels): random_latent_vectors = np.random.normal(size=(100, latent_dim)) dcgan = DCGAN(latent_dim, height, width, channels) dcgan.load_weights('gan_epoch20.h5') generated_images = dcgan.predict(random_latent_vectors) for i, generated_image in enumerate(generated_images): img = image.array_to_img(denormalize(generated_image), scale=False) img.save(os.path.join('generated', str(i) + '.png')) if __name__ == '__main__': latent_dim = 100 height = 28 width = 28 channels = 1 train(latent_dim, height, width, channels) predict(latent_dim, height, width, channels)
ロス値の推移は以下のようになりました。良く見ると中盤以降Generatorのロス値が上昇傾向、Discriminatorのロス値が下降傾向に見えるため、これ以上学習させるとGeneratorが負けておかしな画像が生成され始めるかもしれません。 最後に20エポック学習終わって生成された画像は以下のようになりました。