How to do it...

Let us start with the recipe:

  1. Import a few modules such as numpy, scipy, tensorflow, and matplotlib. Then import PIL to manipulate images. Note that as this code runs on a Jupyter notebook that you can download from online, the fragment %matplotlib inline has been added:
import os 
import sys
import numpy as np
import scipy.io
import scipy.misc
import tensorflow as tf
import matplotlib.pyplot as plt
from matplotlib.pyplot
import imshow
from PIL
import Image %matplotlib inline from __future__
import division
  1. Then, set the input path for the image used to learn the style and for the content image to be repainted according to the style:
OUTPUT_DIR = 'output/' 
# Style image
STYLE_IMAGE = 'data/StarryNight.jpg'
# Content image to be repainted
CONTENT_IMAGE = 'data/Marilyn_Monroe_in_1952.jpg'
  1. Then we set up the noise ratio used during the image generation and the emphasis we would like to put on content loss and on style loss when repainting the content image. In addition to that, we store the path to the pretrained VGG model and the mean computed during the VGG pretraining. This mean is already known and it is subtracted from the input to the VGG model:
# how much noise is in the image 
NOISE_RATIO = 0.6
# How much emphasis on content loss.
BETA = 5
# How much emphasis on style loss.
ALPHA = 100
# the VGG 19-layer pre-trained model
VGG_MODEL = 'data/imagenet-vgg-verydeep-19.mat'
# The mean used when the VGG was trained
# It is subtracted from the input to the VGG model. MEAN_VALUES = np.array([123.68, 116.779, 103.939]).reshape((1,1,1,3))
  1. Show the content image just to understand how it is:
content_image = scipy.misc.imread(CONTENT_IMAGE) imshow(content_image)

Here is the output of the preceding code (note that this image is in https://commons.wikimedia.org/wiki/File:Marilyn_Monroe_in_1952.jpg) :

  1. Resize the style image and show it just to understand how it is. Note that the content image and the style image have now the same size and the same number of color channels:
style_image = scipy.misc.imread(STYLE_IMAGE) 
# Get shape of target and make the style image the same
target_shape = content_image.shape
print "target_shape=", target_shape
print "style_shape=", style_image.shape
#ratio = target_shape[1] / style_image.shape[1]
#print "resize ratio=", ratio
style_image = scipy.misc.imresize(style_image, target_shape)
scipy.misc.imsave(STYLE_IMAGE, style_image)
imshow(style_image)

Here is the output of the preceding code:

An example of Vicent Van Gogh painting as seen in https://commons.wikimedia.org/wiki/File:VanGogh-starry_night_ballance1.jpg
  1. The next step is to define the VGG model as described in the original paper. Note that the deep learning network is rather complex as it combines multiple ConvNet layers with ReLU activation function and max pooling. An additional note is that in the original paper for Transfer Styling (A Neural Algorithm of Artistic Style by Leon A. Gatys, Alexander S. Ecker, and Matthias Bethge), many experiments show that average pooling is actually outperforming max pooling. So, we will use average pooling instead:
def load_vgg_model(path, image_height, image_width, color_channels):
"""
Returns the VGG model as defined in the paper
0 is conv1_1 (3, 3, 3, 64)
1 is relu
2 is conv1_2 (3, 3, 64, 64)
3 is relu
4 is maxpool
5 is conv2_1 (3, 3, 64, 128)
6 is relu
7 is conv2_2 (3, 3, 128, 128)
8 is relu
9 is maxpool
10 is conv3_1 (3, 3, 128, 256)
11 is relu
12 is conv3_2 (3, 3, 256, 256)
13 is relu
14 is conv3_3 (3, 3, 256, 256)
15 is relu
16 is conv3_4 (3, 3, 256, 256)
17 is relu
18 is maxpool
19 is conv4_1 (3, 3, 256, 512)
20 is relu
21 is conv4_2 (3, 3, 512, 512)
22 is relu
23 is conv4_3 (3, 3, 512, 512)
24 is relu
25 is conv4_4 (3, 3, 512, 512)
26 is relu
27 is maxpool
28 is conv5_1 (3, 3, 512, 512)
29 is relu
30 is conv5_2 (3, 3, 512, 512)
31 is relu
32 is conv5_3 (3, 3, 512, 512)
33 is relu
34 is conv5_4 (3, 3, 512, 512)
35 is relu
36 is maxpool
37 is fullyconnected (7, 7, 512, 4096) 38 is relu
39 is fullyconnected (1, 1, 4096, 4096)
40 is relu
41 is fullyconnected (1, 1, 4096, 1000)
42 is softmax
"""
vgg = scipy.io.loadmat(path)
vgg_layers = vgg['layers']

def _weights(layer, expected_layer_name):
""" Return the weights and bias from the VGG model for a given layer.
"""
W = vgg_layers[0][layer][0][0][0][0][0]
b = vgg_layers[0][layer][0][0][0][0][1]
layer_name = vgg_layers[0][layer][0][0][-2]
assert layer_name == expected_layer_name
return W, b

def _relu(conv2d_layer):
"""
Return the RELU function wrapped over a TensorFlow layer. Expects a
Conv2d layer input.
"""
return tf.nn.relu(conv2d_layer)

def _conv2d(prev_layer, layer, layer_name):
"""
Return the Conv2D layer using the weights, biases from the VGG
model at 'layer'.
"""
W, b = _weights(layer, layer_name)
W = tf.constant(W)
b = tf.constant(np.reshape(b, (b.size)))
return tf.nn.conv2d(
prev_layer, filter=W, strides=[1, 1, 1, 1], padding='SAME') + b

def _conv2d_relu(prev_layer, layer, layer_name):
"""
Return the Conv2D + RELU layer using the weights, biases from the VGG
model at 'layer'.
"""
return _relu(_conv2d(prev_layer, layer, layer_name))

def _avgpool(prev_layer):
"""
Return the AveragePooling layer.
"""
return tf.nn.avg_pool(prev_layer, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

# Constructs the graph model.
graph = {}
graph['input'] = tf.Variable(np.zeros((1,
image_height, image_width, color_channels)),
dtype = 'float32')
graph['conv1_1'] = _conv2d_relu(graph['input'], 0, 'conv1_1')
graph['conv1_2'] = _conv2d_relu(graph['conv1_1'], 2, 'conv1_2')
graph['avgpool1'] = _avgpool(graph['conv1_2'])
graph['conv2_1'] = _conv2d_relu(graph['avgpool1'], 5, 'conv2_1')
graph['conv2_2'] = _conv2d_relu(graph['conv2_1'], 7, 'conv2_2')
graph['avgpool2'] = _avgpool(graph['conv2_2'])
graph['conv3_1'] = _conv2d_relu(graph['avgpool2'], 10, 'conv3_1')
graph['conv3_2'] = _conv2d_relu(graph['conv3_1'], 12, 'conv3_2')
graph['conv3_3'] = _conv2d_relu(graph['conv3_2'], 14, 'conv3_3')
graph['conv3_4'] = _conv2d_relu(graph['conv3_3'], 16, 'conv3_4')
graph['avgpool3'] = _avgpool(graph['conv3_4'])
graph['conv4_1'] = _conv2d_relu(graph['avgpool3'], 19, 'conv4_1')
graph['conv4_2'] = _conv2d_relu(graph['conv4_1'], 21, 'conv4_2')
graph['conv4_3'] = _conv2d_relu(graph['conv4_2'], 23, 'conv4_3')
graph['conv4_4'] = _conv2d_relu(graph['conv4_3'], 25, 'conv4_4')
graph['avgpool4'] = _avgpool(graph['conv4_4'])
graph['conv5_1'] = _conv2d_relu(graph['avgpool4'], 28, 'conv5_1')
graph['conv5_2'] = _conv2d_relu(graph['conv5_1'], 30, 'conv5_2')
graph['conv5_3'] = _conv2d_relu(graph['conv5_2'], 32, 'conv5_3')
graph['conv5_4'] = _conv2d_relu(graph['conv5_3'], 34, 'conv5_4')
graph['avgpool5'] = _avgpool(graph['conv5_4'])
return graph
  1. Define the content loss function as it has been described in the original paper:
def content_loss_func(sess, model): 
""" Content loss function as defined in the paper. """

def _content_loss(p, x):
# N is the number of filters (at layer l).
N = p.shape[3]
# M is the height times the width of the feature map (at layer l).
M = p.shape[1] * p.shape[2] return (1 / (4 * N * M)) * tf.reduce_sum(tf.pow(x - p, 2))
return _content_loss(sess.run(model['conv4_2']), model['conv4_2'])
  1. Define which VGG layers we are going to reuse. If we would like to have softer features, we will need to increase the weight of higher layers (conv5_1) and decrease the weight of the lower layers (conv1_1). If we would like to have harder features, we need to do the opposite:
STYLE_LAYERS = [ 
('conv1_1', 0.5),
('conv2_1', 1.0),
('conv3_1', 1.5),
('conv4_1', 3.0),
('conv5_1', 4.0),
]
  1. Define the style loss function as it has been described in the original paper:
def style_loss_func(sess, model):
"""
Style loss function as defined in the paper.
"""

def _gram_matrix(F, N, M):
"""
The gram matrix G.
"""
Ft = tf.reshape(F, (M, N))
return tf.matmul(tf.transpose(Ft), Ft)

def _style_loss(a, x):
"""
The style loss calculation.
"""
# N is the number of filters (at layer l).
N = a.shape[3]
# M is the height times the width of the feature map (at layer l).
M = a.shape[1] * a.shape[2]
# A is the style representation of the original image (at layer l).
A = _gram_matrix(a, N, M)
# G is the style representation of the generated image (at layer l).
G = _gram_matrix(x, N, M)
result = (1 / (4 * N**2 * M**2)) * tf.reduce_sum(tf.pow(G - A, 2))
return result
E = [_style_loss(sess.run(model[layer_name]), model[layer_name])
for layer_name, _ in STYLE_LAYERS]
W = [w for _, w in STYLE_LAYERS]
loss = sum([W[l] * E[l] for l in range(len(STYLE_LAYERS))])
return loss
  1. Define a function to generate a noise image and intermix it with the content image with a given ratio. Define two auxiliary methods to preprocess and save images:
def generate_noise_image(content_image, noise_ratio = NOISE_RATIO):
""" Returns a noise image intermixed with the content image at a certain ratio.
"""
noise_image = np.random.uniform(
-20, 20,
(1,
content_image[0].shape[0],
content_image[0].shape[1],
content_image[0].shape[2])).astype('float32')
# White noise image from the content representation. Take a weighted average
# of the values
input_image = noise_image * noise_ratio + content_image * (1 - noise_ratio)
return input_image

def process_image(image):
# Resize the image for convnet input, there is no change but just
# add an extra dimension.
image = np.reshape(image, ((1,) + image.shape))
# Input to the VGG model expects the mean to be subtracted.
image = image - MEAN_VALUES
return image

def save_image(path, image):
# Output should add back the mean.
image = image + MEAN_VALUES
# Get rid of the first useless dimension, what remains is the image.
image = image[0]
image = np.clip(image, 0, 255).astype('uint8')
scipy.misc.imsave(path, image)
  1. Start a TensorFlow interactive session:
sess = tf.InteractiveSession()
  1. Load the processed content image and show it:
content_image = load_image(CONTENT_IMAGE) imshow(content_image[0])

We get the output of the preceding code as follows (note that we used an image from https://commons.wikimedia.org/wiki/File:Marilyn_Monroe_in_1952.jpg) :

  1. Load the processed style image and show it:
style_image = load_image(STYLE_IMAGE) imshow(style_image[0])

The out is as follows:

  1. Load the model and show it:
model = load_vgg_model(VGG_MODEL, style_image[0].shape[0], style_image[0].shape[1], style_image[0].shape[2]) print(model)
  1. Generate a random noise image that is used to bootstrap the repainting:
input_image = generate_noise_image(content_image) imshow(input_image[0])
  1. Run TensorFlow sessions:
sess.run(tf.initialize_all_variables())
  1. Construct the content_loss and sytle_loss with respective images:
# Construct content_loss using content_image. sess.run(model['input'].assign(content_image))
content_loss = content_loss_func(sess, model)
# Construct style_loss using style_image. sess.run(model['input'].assign(style_image))
style_loss = style_loss_func(sess, model)
  1. Construct the total_loss as weighted combination of content_loss and sytle_loss:
# Construct total_loss as weighted combination of content_loss and sytle_loss 
total_loss = BETA * content_loss + ALPHA * style_loss
  1. Build an optimizer to minimize the total loss. In this case, we adopt the Adam optimizer:
# The content is built from one layer, while the style is from five 
# layers. Then we minimize the total_loss
optimizer = tf.train.AdamOptimizer(2.0)
train_step = optimizer.minimize(total_loss)
  1. Bootstrap the network with the input image:
sess.run(tf.initialize_all_variables()) sess.run(model['input'].assign(input_image))
  1. Run the model for a fixed number of iterations and produce intermediate repainted images:
sess.run(tf.initialize_all_variables())
sess.run(model['input'].assign(input_image))
print "started iteration"
for it in range(ITERATIONS):
sess.run(train_step)
print it , " "
if it%100 == 0:
# Print every 100 iteration.
mixed_image = sess.run(model['input'])
print('Iteration %d' % (it))
print('sum : ',
sess.run(tf.reduce_sum(mixed_image)))
print('cost: ', sess.run(total_loss))
if not os.path.exists(OUTPUT_DIR):
os.mkdir(OUTPUT_DIR)
filename = 'output/%d.png' % (it)
save_image(filename, mixed_image)
  1. In this image, we show how the content image has been repainted after 200, 400, and 600 iterations:
An example of style transfer
..................Content has been hidden....................

You can't read the all page of ebook, please click here login for view all page.
Reset
18.191.237.79