How to do it...

Let us start with the recipe:

Import a few modules such as numpy, scipy, tensorflow, and matplotlib. Then import PIL to manipulate images. Note that as this code runs on a Jupyter notebook that you can download from online, the fragment %matplotlib inline has been added:

import os 
import sys 
import numpy as np 
import scipy.io 
import scipy.misc 
import tensorflow as tf 
import matplotlib.pyplot as plt 
from matplotlib.pyplot 
import imshow 
from PIL 
import Image %matplotlib inline from __future__ 
import division

Then, set the input path for the image used to learn the style and for the content image to be repainted according to the style:

OUTPUT_DIR = 'output/' 
# Style image 
STYLE_IMAGE = 'data/StarryNight.jpg' 
# Content image to be repainted 
CONTENT_IMAGE = 'data/Marilyn_Monroe_in_1952.jpg'

Then we set up the noise ratio used during the image generation and the emphasis we would like to put on content loss and on style loss when repainting the content image. In addition to that, we store the path to the pretrained VGG model and the mean computed during the VGG pretraining. This mean is already known and it is subtracted from the input to the VGG model:

# how much noise is in the image 
NOISE_RATIO = 0.6 
# How much emphasis on content loss. 
BETA = 5 
# How much emphasis on style loss. 
ALPHA = 100 
# the VGG 19-layer pre-trained model 
VGG_MODEL = 'data/imagenet-vgg-verydeep-19.mat' 
# The mean used when the VGG was trained 
# It is subtracted from the input to the VGG model. MEAN_VALUES = np.array([123.68, 116.779, 103.939]).reshape((1,1,1,3))

Show the content image just to understand how it is:

content_image = scipy.misc.imread(CONTENT_IMAGE) imshow(content_image)

Here is the output of the preceding code (note that this image is in https://commons.wikimedia.org/wiki/File:Marilyn_Monroe_in_1952.jpg) :

Resize the style image and show it just to understand how it is. Note that the content image and the style image have now the same size and the same number of color channels:

style_image = scipy.misc.imread(STYLE_IMAGE) 
# Get shape of target and make the style image the same 
target_shape = content_image.shape 
print "target_shape=", target_shape 
print "style_shape=", style_image.shape 
#ratio = target_shape[1] / style_image.shape[1] 
#print "resize ratio=", ratio 
style_image = scipy.misc.imresize(style_image, target_shape) 
scipy.misc.imsave(STYLE_IMAGE, style_image) 
imshow(style_image)

Here is the output of the preceding code:

An example of Vicent Van Gogh painting as seen in https://commons.wikimedia.org/wiki/File:VanGogh-starry_night_ballance1.jpg

The next step is to define the VGG model as described in the original paper. Note that the deep learning network is rather complex as it combines multiple ConvNet layers with ReLU activation function and max pooling. An additional note is that in the original paper for Transfer Styling (A Neural Algorithm of Artistic Style by Leon A. Gatys, Alexander S. Ecker, and Matthias Bethge), many experiments show that average pooling is actually outperforming max pooling. So, we will use average pooling instead:

def load_vgg_model(path, image_height, image_width, color_channels):
   """
   Returns the VGG model as defined in the paper
       0 is conv1_1 (3, 3, 3, 64)
       1 is relu
       2 is conv1_2 (3, 3, 64, 64)
       3 is relu    
       4 is maxpool
       5 is conv2_1 (3, 3, 64, 128)
       6 is relu
       7 is conv2_2 (3, 3, 128, 128)
       8 is relu
       9 is maxpool
       10 is conv3_1 (3, 3, 128, 256)
       11 is relu
       12 is conv3_2 (3, 3, 256, 256)
       13 is relu
       14 is conv3_3 (3, 3, 256, 256)
       15 is relu
       16 is conv3_4 (3, 3, 256, 256)
       17 is relu
       18 is maxpool
       19 is conv4_1 (3, 3, 256, 512)
       20 is relu
       21 is conv4_2 (3, 3, 512, 512)
       22 is relu
       23 is conv4_3 (3, 3, 512, 512)
       24 is relu
       25 is conv4_4 (3, 3, 512, 512)
       26 is relu
       27 is maxpool
       28 is conv5_1 (3, 3, 512, 512)
       29 is relu
       30 is conv5_2 (3, 3, 512, 512)
       31 is relu
       32 is conv5_3 (3, 3, 512, 512)
       33 is relu
       34 is conv5_4 (3, 3, 512, 512)
       35 is relu
       36 is maxpool
       37 is fullyconnected (7, 7, 512, 4096)       38 is relu
       39 is fullyconnected (1, 1, 4096, 4096)
       40 is relu
       41 is fullyconnected (1, 1, 4096, 1000)
       42 is softmax
   """
   vgg = scipy.io.loadmat(path)
   vgg_layers = vgg['layers']   

   def _weights(layer, expected_layer_name):
       """       Return the weights and bias from the VGG model for a given layer.
"""
       W = vgg_layers[0][layer][0][0][0][0][0]
       b = vgg_layers[0][layer][0][0][0][0][1]
       layer_name = vgg_layers[0][layer][0][0][-2]
       assert layer_name == expected_layer_name
       return W, b
  
   def _relu(conv2d_layer):
       """
       Return the RELU function wrapped over a TensorFlow layer. Expects a
       Conv2d layer input.
       """
       return tf.nn.relu(conv2d_layer)

   def _conv2d(prev_layer, layer, layer_name):
       """
       Return the Conv2D layer using the weights, biases from the VGG
       model at 'layer'.
       """
       W, b = _weights(layer, layer_name)
       W = tf.constant(W)
       b = tf.constant(np.reshape(b, (b.size)))
       return tf.nn.conv2d(
           prev_layer, filter=W, strides=[1, 1, 1, 1], padding='SAME') + b

   def _conv2d_relu(prev_layer, layer, layer_name):
       """
       Return the Conv2D + RELU layer using the weights, biases from the VGG
       model at 'layer'.
       """
       return _relu(_conv2d(prev_layer, layer, layer_name))

   def _avgpool(prev_layer):
       """
       Return the AveragePooling layer.
       """
       return tf.nn.avg_pool(prev_layer, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

   # Constructs the graph model.
   graph = {}
   graph['input']   = tf.Variable(np.zeros((1,
                                            image_height, image_width, color_channels)),
                                  dtype = 'float32')
   graph['conv1_1']  = _conv2d_relu(graph['input'], 0, 'conv1_1')
   graph['conv1_2']  = _conv2d_relu(graph['conv1_1'], 2, 'conv1_2')
   graph['avgpool1'] = _avgpool(graph['conv1_2'])
   graph['conv2_1']  = _conv2d_relu(graph['avgpool1'], 5, 'conv2_1')
   graph['conv2_2']  = _conv2d_relu(graph['conv2_1'], 7, 'conv2_2')
   graph['avgpool2'] = _avgpool(graph['conv2_2'])
   graph['conv3_1']  = _conv2d_relu(graph['avgpool2'], 10, 'conv3_1')
   graph['conv3_2']  = _conv2d_relu(graph['conv3_1'], 12, 'conv3_2')
   graph['conv3_3']  = _conv2d_relu(graph['conv3_2'], 14, 'conv3_3')
   graph['conv3_4']  = _conv2d_relu(graph['conv3_3'], 16, 'conv3_4')
   graph['avgpool3'] = _avgpool(graph['conv3_4'])
   graph['conv4_1']  = _conv2d_relu(graph['avgpool3'], 19, 'conv4_1')
   graph['conv4_2']  = _conv2d_relu(graph['conv4_1'], 21, 'conv4_2')
   graph['conv4_3']  = _conv2d_relu(graph['conv4_2'], 23, 'conv4_3')
   graph['conv4_4']  = _conv2d_relu(graph['conv4_3'], 25, 'conv4_4')
   graph['avgpool4'] = _avgpool(graph['conv4_4'])
   graph['conv5_1']  = _conv2d_relu(graph['avgpool4'], 28, 'conv5_1')
   graph['conv5_2']  = _conv2d_relu(graph['conv5_1'], 30, 'conv5_2')
   graph['conv5_3']  = _conv2d_relu(graph['conv5_2'], 32, 'conv5_3')
   graph['conv5_4']  = _conv2d_relu(graph['conv5_3'], 34, 'conv5_4')
   graph['avgpool5'] = _avgpool(graph['conv5_4'])
   return graph

Define the content loss function as it has been described in the original paper:

def content_loss_func(sess, model): 
""" Content loss function as defined in the paper. """ 

def _content_loss(p, x): 
# N is the number of filters (at layer l). 
N = p.shape[3] 
# M is the height times the width of the feature map (at layer l). 
M = p.shape[1] * p.shape[2] return (1 / (4 * N * M)) * tf.reduce_sum(tf.pow(x - p, 2)) 
return _content_loss(sess.run(model['conv4_2']), model['conv4_2'])

Define which VGG layers we are going to reuse. If we would like to have softer features, we will need to increase the weight of higher layers (conv5_1) and decrease the weight of the lower layers (conv1_1). If we would like to have harder features, we need to do the opposite:

STYLE_LAYERS = [ 
('conv1_1', 0.5), 
('conv2_1', 1.0), 
('conv3_1', 1.5), 
('conv4_1', 3.0), 
('conv5_1', 4.0), 
]

Define the style loss function as it has been described in the original paper:

def style_loss_func(sess, model):
   """
   Style loss function as defined in the paper.
   """

   def _gram_matrix(F, N, M):
    """
       The gram matrix G.
       """
       Ft = tf.reshape(F, (M, N))
       return tf.matmul(tf.transpose(Ft), Ft)

   def _style_loss(a, x):
       """
       The style loss calculation.
       """
       # N is the number of filters (at layer l).
       N = a.shape[3]
       # M is the height times the width of the feature map (at layer l).
       M = a.shape[1] * a.shape[2]
       # A is the style representation of the original image (at layer l).
       A = _gram_matrix(a, N, M)
       # G is the style representation of the generated image (at layer l).
       G = _gram_matrix(x, N, M)
       result = (1 / (4 * N**2 * M**2)) * tf.reduce_sum(tf.pow(G - A, 2))
       return result
       E = [_style_loss(sess.run(model[layer_name]), model[layer_name])
           for layer_name, _ in STYLE_LAYERS]
       W = [w for _, w in STYLE_LAYERS]
       loss = sum([W[l] * E[l] for l in range(len(STYLE_LAYERS))])
   return loss

Define a function to generate a noise image and intermix it with the content image with a given ratio. Define two auxiliary methods to preprocess and save images:

def generate_noise_image(content_image, noise_ratio = NOISE_RATIO):
 """   Returns a noise image intermixed with the content image at a certain ratio.
"""
   noise_image = np.random.uniform(
           -20, 20,
           (1,
            content_image[0].shape[0],
            content_image[0].shape[1],
            content_image[0].shape[2])).astype('float32')
   # White noise image from the content representation. Take a weighted average
   # of the values
   input_image = noise_image * noise_ratio + content_image * (1 - noise_ratio)
   return input_image

def process_image(image):
   # Resize the image for convnet input, there is no change but just
   # add an extra dimension.
   image = np.reshape(image, ((1,) + image.shape))
   # Input to the VGG model expects the mean to be subtracted.
   image = image - MEAN_VALUES
   return image

def save_image(path, image):
   # Output should add back the mean.
   image = image + MEAN_VALUES
   # Get rid of the first useless dimension, what remains is the image.
   image = image[0]
   image = np.clip(image, 0, 255).astype('uint8')
   scipy.misc.imsave(path, image)

Start a TensorFlow interactive session:

sess = tf.InteractiveSession()

Load the processed content image and show it:

content_image = load_image(CONTENT_IMAGE) imshow(content_image[0])

We get the output of the preceding code as follows (note that we used an image from https://commons.wikimedia.org/wiki/File:Marilyn_Monroe_in_1952.jpg) :

Load the processed style image and show it:

style_image = load_image(STYLE_IMAGE) imshow(style_image[0])

The out is as follows:

Load the model and show it:

model = load_vgg_model(VGG_MODEL, style_image[0].shape[0], style_image[0].shape[1], style_image[0].shape[2]) print(model)

Generate a random noise image that is used to bootstrap the repainting:

input_image = generate_noise_image(content_image) imshow(input_image[0])

Run TensorFlow sessions:

sess.run(tf.initialize_all_variables())

Construct the content_loss and sytle_loss with respective images:

# Construct content_loss using content_image. sess.run(model['input'].assign(content_image))
content_loss = content_loss_func(sess, model) 
# Construct style_loss using style_image. sess.run(model['input'].assign(style_image)) 
style_loss = style_loss_func(sess, model)

Construct the total_loss as weighted combination of content_loss and sytle_loss:

# Construct total_loss as weighted combination of content_loss and sytle_loss 
total_loss = BETA * content_loss + ALPHA * style_loss

Build an optimizer to minimize the total loss. In this case, we adopt the Adam optimizer:

# The content is built from one layer, while the style is from five 
# layers. Then we minimize the total_loss 
optimizer = tf.train.AdamOptimizer(2.0) 
train_step = optimizer.minimize(total_loss)

Bootstrap the network with the input image:

sess.run(tf.initialize_all_variables()) sess.run(model['input'].assign(input_image))

Run the model for a fixed number of iterations and produce intermediate repainted images:

sess.run(tf.initialize_all_variables())
sess.run(model['input'].assign(input_image))
print "started iteration"
for it in range(ITERATIONS):
   sess.run(train_step)
   print it , " "
   if it%100 == 0:
       # Print every 100 iteration.
       mixed_image = sess.run(model['input'])
       print('Iteration %d' % (it))
       print('sum : ',
sess.run(tf.reduce_sum(mixed_image)))
       print('cost: ', sess.run(total_loss))
       if not os.path.exists(OUTPUT_DIR):
           os.mkdir(OUTPUT_DIR)
       filename = 'output/%d.png' % (it)
       save_image(filename, mixed_image)

In this image, we show how the content image has been repainted after 200, 400, and 600 iterations:

An example of style transfer

Table of Contents for How to do it...

Create new playlist

Sign In

Sign Up

Table of Contents for
How to do it...