如何在StyleGAN2中获得用于在投影过程中生成图像的潜影



问题:如何获取在StyleGAN2的投影过程中用于生成图像的潜影

你好!我在玩这款StyleGAN2 colab笔记本https://colab.research.google.com/drive/1ShgW6wohEFQtqs_znMna3dzrcVoABKIH。它可以生成1024x1024张高分辨率人脸图像等。我一直在努力寻找一张与克里斯蒂亚诺·罗纳尔多非常相似的可生面孔。运行他们的代码,运行良好:

生成Christiano Ronaldo

然后我改变了投影Ronaldo的方法,将Projector对象返回给我,再次运行它,并将对象保存在一个变量中。

投影仪类别:

# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
#
# This work is made available under the Nvidia Source Code License-NC.
# To view a copy of this license, visit
# https://nvlabs.github.io/stylegan2/license.html
import numpy as np
import tensorflow as tf
import dnnlib
import dnnlib.tflib as tflib
from training import misc
#----------------------------------------------------------------------------
class Projector:
def __init__(self):
self.num_steps                  = 1000
self.dlatent_avg_samples        = 10000
self.initial_learning_rate      = 0.1
self.initial_noise_factor       = 0.05
self.lr_rampdown_length         = 0.25
self.lr_rampup_length           = 0.05
self.noise_ramp_length          = 0.75
self.regularize_noise_weight    = 1e5
self.verbose                    = False
self.clone_net                  = True
self._Gs                    = None
self._minibatch_size        = None
self._dlatent_avg           = None
self._dlatent_std           = None
self._noise_vars            = None
self._noise_init_op         = None
self._noise_normalize_op    = None
self._dlatents_var          = None
self._noise_in              = None
self._dlatents_expr         = None
self._images_expr           = None
self._target_images_var     = None
self._lpips                 = None
self._dist                  = None
self._loss                  = None
self._reg_sizes             = None
self._lrate_in              = None
self._opt                   = None
self._opt_step              = None
self._cur_step              = None
def _info(self, *args):
if self.verbose:
print('Projector:', *args)
def set_network(self, Gs, minibatch_size=1):
assert minibatch_size == 1
self._Gs = Gs
self._minibatch_size = minibatch_size
if self._Gs is None:
return
if self.clone_net:
self._Gs = self._Gs.clone()
# Find dlatent stats.
self._info('Finding W midpoint and stddev using %d samples...' % self.dlatent_avg_samples)
latent_samples = np.random.RandomState(123).randn(self.dlatent_avg_samples, *self._Gs.input_shapes[0][1:])
dlatent_samples = self._Gs.components.mapping.run(latent_samples, None)[:, :1, :] # [N, 1, 512]
self._dlatent_avg = np.mean(dlatent_samples, axis=0, keepdims=True) # [1, 1, 512]
self._dlatent_std = (np.sum((dlatent_samples - self._dlatent_avg) ** 2) / self.dlatent_avg_samples) ** 0.5
self._info('std = %g' % self._dlatent_std)
# Find noise inputs.
self._info('Setting up noise inputs...')
self._noise_vars = []
noise_init_ops = []
noise_normalize_ops = []
while True:
n = 'G_synthesis/noise%d' % len(self._noise_vars)
if not n in self._Gs.vars:
break
v = self._Gs.vars[n]
self._noise_vars.append(v)
noise_init_ops.append(tf.assign(v, tf.random_normal(tf.shape(v), dtype=tf.float32)))
noise_mean = tf.reduce_mean(v)
noise_std = tf.reduce_mean((v - noise_mean)**2)**0.5
noise_normalize_ops.append(tf.assign(v, (v - noise_mean) / noise_std))
self._info(n, v)
self._noise_init_op = tf.group(*noise_init_ops)
self._noise_normalize_op = tf.group(*noise_normalize_ops)
# Image output graph.
self._info('Building image output graph...')
self._dlatents_var = tf.Variable(tf.zeros([self._minibatch_size] + list(self._dlatent_avg.shape[1:])), name='dlatents_var')
self._noise_in = tf.placeholder(tf.float32, [], name='noise_in')
dlatents_noise = tf.random.normal(shape=self._dlatents_var.shape) * self._noise_in
self._dlatents_expr = tf.tile(self._dlatents_var + dlatents_noise, [1, self._Gs.components.synthesis.input_shape[1], 1])
self._images_expr = self._Gs.components.synthesis.get_output_for(self._dlatents_expr, randomize_noise=False)
# Downsample image to 256x256 if it's larger than that. VGG was built for 224x224 images.
proc_images_expr = (self._images_expr + 1) * (255 / 2)
sh = proc_images_expr.shape.as_list()
if sh[2] > 256:
factor = sh[2] // 256
proc_images_expr = tf.reduce_mean(tf.reshape(proc_images_expr, [-1, sh[1], sh[2] // factor, factor, sh[2] // factor, factor]), axis=[3,5])
# Loss graph.
self._info('Building loss graph...')
self._target_images_var = tf.Variable(tf.zeros(proc_images_expr.shape), name='target_images_var')
if self._lpips is None:
self._lpips = misc.load_pkl('http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/vgg16_zhang_perceptual.pkl')
self._dist = self._lpips.get_output_for(proc_images_expr, self._target_images_var)
self._loss = tf.reduce_sum(self._dist)
# Noise regularization graph.
self._info('Building noise regularization graph...')
reg_loss = 0.0
for v in self._noise_vars:
sz = v.shape[2]
while True:
reg_loss += tf.reduce_mean(v * tf.roll(v, shift=1, axis=3))**2 + tf.reduce_mean(v * tf.roll(v, shift=1, axis=2))**2
if sz <= 8:
break # Small enough already
v = tf.reshape(v, [1, 1, sz//2, 2, sz//2, 2]) # Downscale
v = tf.reduce_mean(v, axis=[3, 5])
sz = sz // 2
self._loss += reg_loss * self.regularize_noise_weight
# Optimizer.
self._info('Setting up optimizer...')
self._lrate_in = tf.placeholder(tf.float32, [], name='lrate_in')
self._opt = dnnlib.tflib.Optimizer(learning_rate=self._lrate_in)
self._opt.register_gradients(self._loss, [self._dlatents_var] + self._noise_vars)
self._opt_step = self._opt.apply_updates()
def run(self, target_images):
# Run to completion.
self.start(target_images)
while self._cur_step < self.num_steps:
self.step()
# Collect results.
pres = dnnlib.EasyDict()
pres.dlatents = self.get_dlatents()
pres.noises = self.get_noises()
pres.images = self.get_images()
return pres
def start(self, target_images):
assert self._Gs is not None
# Prepare target images.
self._info('Preparing target images...')
target_images = np.asarray(target_images, dtype='float32')
target_images = (target_images + 1) * (255 / 2)
sh = target_images.shape
assert sh[0] == self._minibatch_size
if sh[2] > self._target_images_var.shape[2]:
factor = sh[2] // self._target_images_var.shape[2]
target_images = np.reshape(target_images, [-1, sh[1], sh[2] // factor, factor, sh[3] // factor, factor]).mean((3, 5))
# Initialize optimization state.
self._info('Initializing optimization state...')
tflib.set_vars({self._target_images_var: target_images, self._dlatents_var: np.tile(self._dlatent_avg, [self._minibatch_size, 1, 1])})
tflib.run(self._noise_init_op)
self._opt.reset_optimizer_state()
self._cur_step = 0
def step(self):
assert self._cur_step is not None
if self._cur_step >= self.num_steps:
return
if self._cur_step == 0:
self._info('Running...')
# Hyperparameters.
t = self._cur_step / self.num_steps
noise_strength = self._dlatent_std * self.initial_noise_factor * max(0.0, 1.0 - t / self.noise_ramp_length) ** 2
lr_ramp = min(1.0, (1.0 - t) / self.lr_rampdown_length)
lr_ramp = 0.5 - 0.5 * np.cos(lr_ramp * np.pi)
lr_ramp = lr_ramp * min(1.0, t / self.lr_rampup_length)
learning_rate = self.initial_learning_rate * lr_ramp
# Train.
feed_dict = {self._noise_in: noise_strength, self._lrate_in: learning_rate}
_, dist_value, loss_value = tflib.run([self._opt_step, self._dist, self._loss], feed_dict)
tflib.run(self._noise_normalize_op)
# Print status.
self._cur_step += 1
if self._cur_step == self.num_steps or self._cur_step % 10 == 0:
self._info('%-8d%-12g%-12g' % (self._cur_step, dist_value, loss_value))
if self._cur_step == self.num_steps:
self._info('Done.')
def get_cur_step(self):
return self._cur_step
def get_dlatents(self):
return tflib.run(self._dlatents_expr, {self._noise_in: 0})
def get_noises(self):
return tflib.run(self._noise_vars)
def get_images(self):
return tflib.run(self._images_expr, {self._noise_in: 0})
#----------------------------------------------

我得到了这个对象,称为get_dlatents方法,认为这是产生Christiano的输入latent。

生成一个具有潜在的图像,显然不靠近罗纳尔多("proji"是投影仪对象(

latents = proji.get_dlatents()
latent = latents[0][17]
latent = np.reshape(latent, (1,512))
img = generate_images([latent],1.0)[0]
imshow(img)

结果:这应该是罗纳尔多

我不知道我是否犯了思维或编码错误,我只想知道:我如何获得在投影过程中用于生成图像的潜影为了理解它,你可能需要自己检查colab笔记本,不想把所有东西都贴在这里。

谢谢你花时间看这个。

我不知道你的问题是否仍然相关,但你正在寻找的是projecting an image to the latent space。这个github页面干净而精确。设置好环境后,通过两个步骤可以获得您的迟到时间。

从图像中提取和对齐人脸:python align_images.py raw_images/ aligned_images/,并使用python encode_images.py aligned_images/ generated_images/ latent_representations/找到对齐图像的潜在表示。在latent_representations文件夹下,您将有您的延迟。现在,您可以使用这些延迟来生成您想要的面孔。祝你好运

最新更新