1'''Neural style transfer with Keras.
2
3Run the script with:
4```
5python neural_style_transfer.py path_to_your_base_image.jpg \
6    path_to_your_reference.jpg prefix_for_results
7```
8e.g.:
9```
10python neural_style_transfer.py img/tuebingen.jpg \
11    img/starry_night.jpg results/my_result
12```
13Optional parameters:
14```
15--iter, To specify the number of iterations \
16    the style transfer takes place (Default is 10)
17--content_weight, The weight given to the content loss (Default is 0.025)
18--style_weight, The weight given to the style loss (Default is 1.0)
19--tv_weight, The weight given to the total variation loss (Default is 1.0)
20```
21
22It is preferable to run this script on GPU, for speed.
23
24Example result: https://twitter.com/fchollet/status/686631033085677568
25
26# Details
27
28Style transfer consists in generating an image
29with the same "content" as a base image, but with the
30"style" of a different picture (typically artistic).
31
32This is achieved through the optimization of a loss function
33that has 3 components: "style loss", "content loss",
34and "total variation loss":
35
36- The total variation loss imposes local spatial continuity between
37the pixels of the combination image, giving it visual coherence.
38
39- The style loss is where the deep learning keeps in --that one is defined
40using a deep convolutional neural network. Precisely, it consists in a sum of
41L2 distances between the Gram matrices of the representations of
42the base image and the style reference image, extracted from
43different layers of a convnet (trained on ImageNet). The general idea
44is to capture color/texture information at different spatial
45scales (fairly large scales --defined by the depth of the layer considered).
46
47 - The content loss is a L2 distance between the features of the base
48image (extracted from a deep layer) and the features of the combination image,
49keeping the generated image close enough to the original one.
50
51# References
52    - [A Neural Algorithm of Artistic Style](http://arxiv.org/abs/1508.06576)
53'''
54
55from __future__ import print_function
56from keras.preprocessing.image import load_img, save_img, img_to_array
57import numpy as np
58from scipy.optimize import fmin_l_bfgs_b
59import time
60import argparse
61
62from keras.applications import vgg19
63from keras import backend as K
64
65parser = argparse.ArgumentParser(description='Neural style transfer with Keras.')
66parser.add_argument('base_image_path', metavar='base', type=str,
67                    help='Path to the image to transform.')
68parser.add_argument('style_reference_image_path', metavar='ref', type=str,
69                    help='Path to the style reference image.')
70parser.add_argument('result_prefix', metavar='res_prefix', type=str,
71                    help='Prefix for the saved results.')
72parser.add_argument('--iter', type=int, default=10, required=False,
73                    help='Number of iterations to run.')
74parser.add_argument('--content_weight', type=float, default=0.025, required=False,
75                    help='Content weight.')
76parser.add_argument('--style_weight', type=float, default=1.0, required=False,
77                    help='Style weight.')
78parser.add_argument('--tv_weight', type=float, default=1.0, required=False,
79                    help='Total Variation weight.')
80
81args = parser.parse_args()
82base_image_path = args.base_image_path
83style_reference_image_path = args.style_reference_image_path
84result_prefix = args.result_prefix
85iterations = args.iter
86
87# these are the weights of the different loss components
88total_variation_weight = args.tv_weight
89style_weight = args.style_weight
90content_weight = args.content_weight
91
92# dimensions of the generated picture.
93width, height = load_img(base_image_path).size
94img_nrows = 400
95img_ncols = int(width * img_nrows / height)
96
97# util function to open, resize and format pictures into appropriate tensors
98
99
100def preprocess_image(image_path):
101    img = load_img(image_path, target_size=(img_nrows, img_ncols))
102    img = img_to_array(img)
103    img = np.expand_dims(img, axis=0)
104    img = vgg19.preprocess_input(img)
105    return img
106
107# util function to convert a tensor into a valid image
108
109
110def deprocess_image(x):
111    if K.image_data_format() == 'channels_first':
112        x = x.reshape((3, img_nrows, img_ncols))
113        x = x.transpose((1, 2, 0))
114    else:
115        x = x.reshape((img_nrows, img_ncols, 3))
116    # Remove zero-center by mean pixel
117    x[:, :, 0] += 103.939
118    x[:, :, 1] += 116.779
119    x[:, :, 2] += 123.68
120    # 'BGR'->'RGB'
121    x = x[:, :, ::-1]
122    x = np.clip(x, 0, 255).astype('uint8')
123    return x
124
125# get tensor representations of our images
126base_image = K.variable(preprocess_image(base_image_path))
127style_reference_image = K.variable(preprocess_image(style_reference_image_path))
128
129# this will contain our generated image
130if K.image_data_format() == 'channels_first':
131    combination_image = K.placeholder((1, 3, img_nrows, img_ncols))
132else:
133    combination_image = K.placeholder((1, img_nrows, img_ncols, 3))
134
135# combine the 3 images into a single Keras tensor
136input_tensor = K.concatenate([base_image,
137                              style_reference_image,
138                              combination_image], axis=0)
139
140# build the VGG19 network with our 3 images as input
141# the model will be loaded with pre-trained ImageNet weights
142model = vgg19.VGG19(input_tensor=input_tensor,
143                    weights='imagenet', include_top=False)
144print('Model loaded.')
145
146# get the symbolic outputs of each "key" layer (we gave them unique names).
147outputs_dict = dict([(layer.name, layer.output) for layer in model.layers])
148
149# compute the neural style loss
150# first we need to define 4 util functions
151
152# the gram matrix of an image tensor (feature-wise outer product)
153
154
155def gram_matrix(x):
156    assert K.ndim(x) == 3
157    if K.image_data_format() == 'channels_first':
158        features = K.batch_flatten(x)
159    else:
160        features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1)))
161    gram = K.dot(features, K.transpose(features))
162    return gram
163
164# the "style loss" is designed to maintain
165# the style of the reference image in the generated image.
166# It is based on the gram matrices (which capture style) of
167# feature maps from the style reference image
168# and from the generated image
169
170
171def style_loss(style, combination):
172    assert K.ndim(style) == 3
173    assert K.ndim(combination) == 3
174    S = gram_matrix(style)
175    C = gram_matrix(combination)
176    channels = 3
177    size = img_nrows * img_ncols
178    return K.sum(K.square(S - C)) / (4.0 * (channels ** 2) * (size ** 2))
179
180# an auxiliary loss function
181# designed to maintain the "content" of the
182# base image in the generated image
183
184
185def content_loss(base, combination):
186    return K.sum(K.square(combination - base))
187
188# the 3rd loss function, total variation loss,
189# designed to keep the generated image locally coherent
190
191
192def total_variation_loss(x):
193    assert K.ndim(x) == 4
194    if K.image_data_format() == 'channels_first':
195        a = K.square(
196            x[:, :, :img_nrows - 1, :img_ncols - 1] - x[:, :, 1:, :img_ncols - 1])
197        b = K.square(
198            x[:, :, :img_nrows - 1, :img_ncols - 1] - x[:, :, :img_nrows - 1, 1:])
199    else:
200        a = K.square(
201            x[:, :img_nrows - 1, :img_ncols - 1, :] - x[:, 1:, :img_ncols - 1, :])
202        b = K.square(
203            x[:, :img_nrows - 1, :img_ncols - 1, :] - x[:, :img_nrows - 1, 1:, :])
204    return K.sum(K.pow(a + b, 1.25))
205
206
207# combine these loss functions into a single scalar
208loss = K.variable(0.0)
209layer_features = outputs_dict['block5_conv2']
210base_image_features = layer_features[0, :, :, :]
211combination_features = layer_features[2, :, :, :]
212loss = loss + content_weight * content_loss(base_image_features,
213                                            combination_features)
214
215feature_layers = ['block1_conv1', 'block2_conv1',
216                  'block3_conv1', 'block4_conv1',
217                  'block5_conv1']
218for layer_name in feature_layers:
219    layer_features = outputs_dict[layer_name]
220    style_reference_features = layer_features[1, :, :, :]
221    combination_features = layer_features[2, :, :, :]
222    sl = style_loss(style_reference_features, combination_features)
223    loss = loss + (style_weight / len(feature_layers)) * sl
224loss = loss + total_variation_weight * total_variation_loss(combination_image)
225
226# get the gradients of the generated image wrt the loss
227grads = K.gradients(loss, combination_image)
228
229outputs = [loss]
230if isinstance(grads, (list, tuple)):
231    outputs += grads
232else:
233    outputs.append(grads)
234
235f_outputs = K.function([combination_image], outputs)
236
237
238def eval_loss_and_grads(x):
239    if K.image_data_format() == 'channels_first':
240        x = x.reshape((1, 3, img_nrows, img_ncols))
241    else:
242        x = x.reshape((1, img_nrows, img_ncols, 3))
243    outs = f_outputs([x])
244    loss_value = outs[0]
245    if len(outs[1:]) == 1:
246        grad_values = outs[1].flatten().astype('float64')
247    else:
248        grad_values = np.array(outs[1:]).flatten().astype('float64')
249    return loss_value, grad_values
250
251# this Evaluator class makes it possible
252# to compute loss and gradients in one pass
253# while retrieving them via two separate functions,
254# "loss" and "grads". This is done because scipy.optimize
255# requires separate functions for loss and gradients,
256# but computing them separately would be inefficient.
257
258
259class Evaluator(object):
260
261    def __init__(self):
262        self.loss_value = None
263        self.grads_values = None
264
265    def loss(self, x):
266        assert self.loss_value is None
267        loss_value, grad_values = eval_loss_and_grads(x)
268        self.loss_value = loss_value
269        self.grad_values = grad_values
270        return self.loss_value
271
272    def grads(self, x):
273        assert self.loss_value is not None
274        grad_values = np.copy(self.grad_values)
275        self.loss_value = None
276        self.grad_values = None
277        return grad_values
278
279
280evaluator = Evaluator()
281
282# run scipy-based optimization (L-BFGS) over the pixels of the generated image
283# so as to minimize the neural style loss
284x = preprocess_image(base_image_path)
285
286for i in range(iterations):
287    print('Start of iteration', i)
288    start_time = time.time()
289    x, min_val, info = fmin_l_bfgs_b(evaluator.loss, x.flatten(),
290                                     fprime=evaluator.grads, maxfun=20)
291    print('Current loss value:', min_val)
292    # save current generated image
293    img = deprocess_image(x.copy())
294    fname = result_prefix + '_at_iteration_%d.png' % i
295    save_img(fname, img)
296    end_time = time.time()
297    print('Image saved as', fname)
298    print('Iteration %d completed in %ds' % (i, end_time - start_time))
299