![]() |
ノート/Generationとdeconvolution (2016-08-14)http://pepper.is.sci.toho-u.ac.jp/pepper/index.php?%A5%CE%A1%BC%A5%C8%2FGeneration%A4%C8deconvolution%20%282016-08-14%29 |
![]() |
訪問者数 2042 最終更新 2016-08-15 (月) 16:10:58
出典: UNSUPERVISED REPRESENTATION LEARNING WITH DEEP CONVOLUTIONAL GENERATIVE ADVERSARIAL NETWORKS
すぎゃーん(id:sugyan)氏: TensorFlowによるDCGANでアイドルの顔画像生成
分類器などで使っている畳み込みの逆方向の操作で、最初は小さな多数のfeature mapにreshapeして、これを徐々に小数の大きなものにしていく。"deconvolution"と呼んだり呼ばなかったり、なのかな。TensorFlowではこの操作はtf.nn.conv2d_transposeという関数で実現するようだ。
Tensorflowのマニュアルでは conv2d_transpose
本件、deconvolutionは別の意味 (この論文参照:Deconvolutional Networks) でも使われるので、要注意。
それと区別するために、Tensorflowでは、"Rename deconv2d to conv2d_transpose and expose publicly" などと
言っているようだ。
それで、彼のプログラムだと、dcgan.py では、
def __generator(self, depth1, depth2, depth3, depth4): reuse = False def model(inputs): nonlocal reuse depths = [depth1, depth2, depth3, depth4, 3] i_depth = depths[0:4] o_depth = depths[1:5] with tf.variable_scope('g', reuse=reuse): # reshape from inputs with tf.variable_scope('reshape'): fc = tf.contrib.layers.fully_connected(inputs, i_depth[0] * self.f_size * self.f_size, normalizer_fn=tf.contrib.layers.batch_norm) out = tf.reshape(fc, [-1, self.f_size, self.f_size, i_depth[0]]) # deconvolution (transpose of convolution) layers for i in range(4): with tf.variable_scope('conv%d' % (i + 1)): activation_fn = tf.nn.relu if i < 3 else None normalizer_fn = tf.contrib.layers.batch_norm if i < 3 else None out = tf.contrib.layers.conv2d_transpose(out, o_depth[i], [5, 5], stride=2, activation_fn=activation_fn, normalizer_fn=normalizer_fn) reuse = True return tf.nn.tanh(out) return model
てな感じになっている。
もう1つ、Taehoon Kim氏によるTensorflowでのインプリだと、(改名前のdeconvの名前で使っている)
model.py
Chainer
def discriminator(self, image, y=None, reuse=False): if reuse: tf.get_variable_scope().reuse_variables() if not self.y_dim: h0 = lrelu(conv2d(image, self.df_dim, name='d_h0_conv')) h1 = lrelu(self.d_bn1(conv2d(h0, self.df_dim*2, name='d_h1_conv'))) h2 = lrelu(self.d_bn2(conv2d(h1, self.df_dim*4, name='d_h2_conv'))) h3 = lrelu(self.d_bn3(conv2d(h2, self.df_dim*8, name='d_h3_conv'))) h4 = linear(tf.reshape(h3, [self.batch_size, -1]), 1, 'd_h3_lin') return tf.nn.sigmoid(h4), h4 else: yb = tf.reshape(y, [self.batch_size, 1, 1, self.y_dim]) x = conv_cond_concat(image, yb) h0 = lrelu(conv2d(x, self.c_dim + self.y_dim, name='d_h0_conv')) h0 = conv_cond_concat(h0, yb) h1 = lrelu(self.d_bn1(conv2d(h0, self.df_dim + self.y_dim, name='d_h1_conv'))) h1 = tf.reshape(h1, [self.batch_size, -1]) h1 = tf.concat(1, [h1, y]) h2 = lrelu(self.d_bn2(linear(h1, self.dfc_dim, 'd_h2_lin'))) h2 = tf.concat(1, [h2, y]) h3 = linear(h2, 1, 'd_h3_lin') return tf.nn.sigmoid(h3), h3 def generator(self, z, y=None): if not self.y_dim: s = self.output_size s2, s4, s8, s16 = int(s/2), int(s/4), int(s/8), int(s/16) # project `z` and reshape self.z_, self.h0_w, self.h0_b = linear(z, self.gf_dim*8*s16*s16, 'g_h0_lin', with_w=True) self.h0 = tf.reshape(self.z_, [-1, s16, s16, self.gf_dim * 8]) h0 = tf.nn.relu(self.g_bn0(self.h0)) self.h1, self.h1_w, self.h1_b = deconv2d(h0, [self.batch_size, s8, s8, self.gf_dim*4], name='g_h1', with_w=True) h1 = tf.nn.relu(self.g_bn1(self.h1)) h2, self.h2_w, self.h2_b = deconv2d(h1, [self.batch_size, s4, s4, self.gf_dim*2], name='g_h2', with_w=True) h2 = tf.nn.relu(self.g_bn2(h2)) h3, self.h3_w, self.h3_b = deconv2d(h2, [self.batch_size, s2, s2, self.gf_dim*1], name='g_h3', with_w=True) h3 = tf.nn.relu(self.g_bn3(h3)) h4, self.h4_w, self.h4_b = deconv2d(h3, [self.batch_size, s, s, self.c_dim], name='g_h4', with_w=True) return tf.nn.tanh(h4) else: s = self.output_size s2, s4 = int(s/2), int(s/4) # yb = tf.expand_dims(tf.expand_dims(y, 1),2) yb = tf.reshape(y, [self.batch_size, 1, 1, self.y_dim]) z = tf.concat(1, [z, y]) h0 = tf.nn.relu(self.g_bn0(linear(z, self.gfc_dim, 'g_h0_lin'))) h0 = tf.concat(1, [h0, y]) h1 = tf.nn.relu(self.g_bn1(linear(z, self.gf_dim*2*s4*s4, 'g_h1_lin'))) h1 = tf.reshape(h1, [self.batch_size, s4, s4, self.gf_dim * 2]) h1 = conv_cond_concat(h1, yb) h2 = tf.nn.relu(self.g_bn2(deconv2d(h1, [self.batch_size, s2, s2, self.gf_dim * 2], name='g_h2'))) h2 = conv_cond_concat(h2, yb) return tf.nn.sigmoid(deconv2d(h2, [self.batch_size, s, s, self.c_dim], name='g_h3'))
Rezoolab氏によるChainerのインプリ 出典は Chainerを使ってコンピュータにイラストを描かせる の下の方。
class Generator(chainer.Chain): n_hidden = 100 sigma = 0.01 def __init__(self): super(Generator, self).__init__( fc5=L.Linear(100, 512 * 4 * 4), norm5=L.BatchNormalization(512 * 4 * 4), conv4=L.Deconvolution2D(512, 256, ksize=4, stride=2, pad=1), norm4=L.BatchNormalization(256), conv3=L.Deconvolution2D(256, 128, ksize=4, stride=2, pad=1), norm3=L.BatchNormalization(128), conv2=L.Deconvolution2D(128, 64, ksize=4, stride=2, pad=1), norm2=L.BatchNormalization(64), conv1=L.Deconvolution2D(64, 3, ksize=4, stride=2, pad=1)) init_normal( [self.conv1, self.conv2, self.conv3, self.conv4, self.fc5], self.sigma) def __call__(self, z, train=True): n_sample = z.data.shape[0] test = not train h = F.relu(self.norm5(self.fc5(z), test=test)) h = F.reshape(h, (n_sample, 512, 4, 4)) h = F.relu(self.norm4(self.conv4(h), test=test)) h = F.relu(self.norm3(self.conv3(h), test=test)) h = F.relu(self.norm2(self.conv2(h), test=test)) x = F.tanh(self.conv1(h)) return x def make_optimizer(self): return chainer.optimizers.Adam(alpha=1e-4, beta1=0.5) def generate_hidden_variables(self, n): return np.asarray( np.random.uniform( low=-1.0, high=1.0, size=(n, self.n_hidden)), dtype=np.float32)
Maccha氏によるChainerのインプリ Chainerで顔イラストの自動生成
コードの出典は GDCGAN.py
class Generator(chainer.Chain): def __init__(self): super(Generator, self).__init__( l0z = L.Linear(nz, 6*6*512, wscale=0.02*math.sqrt(nz)), dc1 = L.Deconvolution2D(512, 256, 4, stride=2, pad=1, wscale=0.02*math.sqrt(4*4*512)), dc2 = L.Deconvolution2D(256, 128, 4, stride=2, pad=1, wscale=0.02*math.sqrt(4*4*256)), dc3 = L.Deconvolution2D(128, 64, 4, stride=2, pad=1, wscale=0.02*math.sqrt(4*4*128)), dc4 = L.Deconvolution2D(64, 3, 4, stride=2, pad=1, wscale=0.02*math.sqrt(4*4*64)), bn0l = L.BatchNormalization(6*6*512), bn0 = L.BatchNormalization(512), bn1 = L.BatchNormalization(256), bn2 = L.BatchNormalization(128), bn3 = L.BatchNormalization(64), ) def __call__(self, z, test=False): h = F.reshape(F.relu(self.bn0l(self.l0z(z), test=test)), (z.data.shape[0], 512, 6, 6)) h = F.relu(self.bn1(self.dc1(h), test=test)) h = F.relu(self.bn2(self.dc2(h), test=test)) h = F.relu(self.bn3(self.dc3(h), test=test)) x = (self.dc4(h)) return x
Chainerマニュアルでは、
This link wraps the deconvolution_2d() function and holds the filter weight and bias vector as parameters.
This is an implementation of two-dimensional deconvolution.
def forward_cpu(self, inputs): x, W = inputs[:2] b = inputs[2] if len(inputs) == 3 else None kh, kw = W.shape[2:] _, _, h, w = x.shape gcol = numpy.tensordot(W, x, (0, 1)).astype(x.dtype, copy=False) # - k, m, n: shape of out_channel # - b: number of inputs # - h, w: height and width of kernels # k, m, n, b, h, w -> b, k, m, n, h, w gcol = numpy.rollaxis(gcol, 3) if self.outh is None: self.outh = conv.get_deconv_outsize(h, kh, self.sy, self.ph) if self.outw is None: self.outw = conv.get_deconv_outsize(w, kw, self.sx, self.pw) y = conv.col2im_cpu( gcol, self.sy, self.sx, self.ph, self.pw, self.outh, self.outw) # b, k, h, w if b is not None: y += b.reshape(1, b.size, 1, 1) return y,
def get_conv_outsize(size, k, s, p, cover_all=False): if cover_all: return (size + p * 2 - k + s - 1) // s + 1 else: return (size + p * 2 - k) // s + 1 def get_deconv_outsize(size, k, s, p, cover_all=False): if cover_all: return s * (size - 1) + k - s + 1 - 2 * p else: return s * (size - 1) + k - 2 * pとなっている。