1'''Neural style transfer with Keras. 2 3Run the script with: 4``` 5python neural_style_transfer.py path_to_your_base_image.jpg \ 6 path_to_your_reference.jpg prefix_for_results 7``` 8e.g.: 9``` 10python neural_style_transfer.py img/tuebingen.jpg \ 11 img/starry_night.jpg results/my_result 12``` 13Optional parameters: 14``` 15--iter, To specify the number of iterations \ 16 the style transfer takes place (Default is 10) 17--content_weight, The weight given to the content loss (Default is 0.025) 18--style_weight, The weight given to the style loss (Default is 1.0) 19--tv_weight, The weight given to the total variation loss (Default is 1.0) 20``` 21 22It is preferable to run this script on GPU, for speed. 23 24Example result: https://twitter.com/fchollet/status/686631033085677568 25 26# Details 27 28Style transfer consists in generating an image 29with the same "content" as a base image, but with the 30"style" of a different picture (typically artistic). 31 32This is achieved through the optimization of a loss function 33that has 3 components: "style loss", "content loss", 34and "total variation loss": 35 36- The total variation loss imposes local spatial continuity between 37the pixels of the combination image, giving it visual coherence. 38 39- The style loss is where the deep learning keeps in --that one is defined 40using a deep convolutional neural network. Precisely, it consists in a sum of 41L2 distances between the Gram matrices of the representations of 42the base image and the style reference image, extracted from 43different layers of a convnet (trained on ImageNet). The general idea 44is to capture color/texture information at different spatial 45scales (fairly large scales --defined by the depth of the layer considered). 46 47 - The content loss is a L2 distance between the features of the base 48image (extracted from a deep layer) and the features of the combination image, 49keeping the generated image close enough to the original one. 50 51# References 52 - [A Neural Algorithm of Artistic Style](http://arxiv.org/abs/1508.06576) 53''' 54 55from __future__ import print_function 56from keras.preprocessing.image import load_img, save_img, img_to_array 57import numpy as np 58from scipy.optimize import fmin_l_bfgs_b 59import time 60import argparse 61 62from keras.applications import vgg19 63from keras import backend as K 64 65parser = argparse.ArgumentParser(description='Neural style transfer with Keras.') 66parser.add_argument('base_image_path', metavar='base', type=str, 67 help='Path to the image to transform.') 68parser.add_argument('style_reference_image_path', metavar='ref', type=str, 69 help='Path to the style reference image.') 70parser.add_argument('result_prefix', metavar='res_prefix', type=str, 71 help='Prefix for the saved results.') 72parser.add_argument('--iter', type=int, default=10, required=False, 73 help='Number of iterations to run.') 74parser.add_argument('--content_weight', type=float, default=0.025, required=False, 75 help='Content weight.') 76parser.add_argument('--style_weight', type=float, default=1.0, required=False, 77 help='Style weight.') 78parser.add_argument('--tv_weight', type=float, default=1.0, required=False, 79 help='Total Variation weight.') 80 81args = parser.parse_args() 82base_image_path = args.base_image_path 83style_reference_image_path = args.style_reference_image_path 84result_prefix = args.result_prefix 85iterations = args.iter 86 87# these are the weights of the different loss components 88total_variation_weight = args.tv_weight 89style_weight = args.style_weight 90content_weight = args.content_weight 91 92# dimensions of the generated picture. 93width, height = load_img(base_image_path).size 94img_nrows = 400 95img_ncols = int(width * img_nrows / height) 96 97# util function to open, resize and format pictures into appropriate tensors 98 99 100def preprocess_image(image_path): 101 img = load_img(image_path, target_size=(img_nrows, img_ncols)) 102 img = img_to_array(img) 103 img = np.expand_dims(img, axis=0) 104 img = vgg19.preprocess_input(img) 105 return img 106 107# util function to convert a tensor into a valid image 108 109 110def deprocess_image(x): 111 if K.image_data_format() == 'channels_first': 112 x = x.reshape((3, img_nrows, img_ncols)) 113 x = x.transpose((1, 2, 0)) 114 else: 115 x = x.reshape((img_nrows, img_ncols, 3)) 116 # Remove zero-center by mean pixel 117 x[:, :, 0] += 103.939 118 x[:, :, 1] += 116.779 119 x[:, :, 2] += 123.68 120 # 'BGR'->'RGB' 121 x = x[:, :, ::-1] 122 x = np.clip(x, 0, 255).astype('uint8') 123 return x 124 125# get tensor representations of our images 126base_image = K.variable(preprocess_image(base_image_path)) 127style_reference_image = K.variable(preprocess_image(style_reference_image_path)) 128 129# this will contain our generated image 130if K.image_data_format() == 'channels_first': 131 combination_image = K.placeholder((1, 3, img_nrows, img_ncols)) 132else: 133 combination_image = K.placeholder((1, img_nrows, img_ncols, 3)) 134 135# combine the 3 images into a single Keras tensor 136input_tensor = K.concatenate([base_image, 137 style_reference_image, 138 combination_image], axis=0) 139 140# build the VGG19 network with our 3 images as input 141# the model will be loaded with pre-trained ImageNet weights 142model = vgg19.VGG19(input_tensor=input_tensor, 143 weights='imagenet', include_top=False) 144print('Model loaded.') 145 146# get the symbolic outputs of each "key" layer (we gave them unique names). 147outputs_dict = dict([(layer.name, layer.output) for layer in model.layers]) 148 149# compute the neural style loss 150# first we need to define 4 util functions 151 152# the gram matrix of an image tensor (feature-wise outer product) 153 154 155def gram_matrix(x): 156 assert K.ndim(x) == 3 157 if K.image_data_format() == 'channels_first': 158 features = K.batch_flatten(x) 159 else: 160 features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1))) 161 gram = K.dot(features, K.transpose(features)) 162 return gram 163 164# the "style loss" is designed to maintain 165# the style of the reference image in the generated image. 166# It is based on the gram matrices (which capture style) of 167# feature maps from the style reference image 168# and from the generated image 169 170 171def style_loss(style, combination): 172 assert K.ndim(style) == 3 173 assert K.ndim(combination) == 3 174 S = gram_matrix(style) 175 C = gram_matrix(combination) 176 channels = 3 177 size = img_nrows * img_ncols 178 return K.sum(K.square(S - C)) / (4.0 * (channels ** 2) * (size ** 2)) 179 180# an auxiliary loss function 181# designed to maintain the "content" of the 182# base image in the generated image 183 184 185def content_loss(base, combination): 186 return K.sum(K.square(combination - base)) 187 188# the 3rd loss function, total variation loss, 189# designed to keep the generated image locally coherent 190 191 192def total_variation_loss(x): 193 assert K.ndim(x) == 4 194 if K.image_data_format() == 'channels_first': 195 a = K.square( 196 x[:, :, :img_nrows - 1, :img_ncols - 1] - x[:, :, 1:, :img_ncols - 1]) 197 b = K.square( 198 x[:, :, :img_nrows - 1, :img_ncols - 1] - x[:, :, :img_nrows - 1, 1:]) 199 else: 200 a = K.square( 201 x[:, :img_nrows - 1, :img_ncols - 1, :] - x[:, 1:, :img_ncols - 1, :]) 202 b = K.square( 203 x[:, :img_nrows - 1, :img_ncols - 1, :] - x[:, :img_nrows - 1, 1:, :]) 204 return K.sum(K.pow(a + b, 1.25)) 205 206 207# combine these loss functions into a single scalar 208loss = K.variable(0.0) 209layer_features = outputs_dict['block5_conv2'] 210base_image_features = layer_features[0, :, :, :] 211combination_features = layer_features[2, :, :, :] 212loss = loss + content_weight * content_loss(base_image_features, 213 combination_features) 214 215feature_layers = ['block1_conv1', 'block2_conv1', 216 'block3_conv1', 'block4_conv1', 217 'block5_conv1'] 218for layer_name in feature_layers: 219 layer_features = outputs_dict[layer_name] 220 style_reference_features = layer_features[1, :, :, :] 221 combination_features = layer_features[2, :, :, :] 222 sl = style_loss(style_reference_features, combination_features) 223 loss = loss + (style_weight / len(feature_layers)) * sl 224loss = loss + total_variation_weight * total_variation_loss(combination_image) 225 226# get the gradients of the generated image wrt the loss 227grads = K.gradients(loss, combination_image) 228 229outputs = [loss] 230if isinstance(grads, (list, tuple)): 231 outputs += grads 232else: 233 outputs.append(grads) 234 235f_outputs = K.function([combination_image], outputs) 236 237 238def eval_loss_and_grads(x): 239 if K.image_data_format() == 'channels_first': 240 x = x.reshape((1, 3, img_nrows, img_ncols)) 241 else: 242 x = x.reshape((1, img_nrows, img_ncols, 3)) 243 outs = f_outputs([x]) 244 loss_value = outs[0] 245 if len(outs[1:]) == 1: 246 grad_values = outs[1].flatten().astype('float64') 247 else: 248 grad_values = np.array(outs[1:]).flatten().astype('float64') 249 return loss_value, grad_values 250 251# this Evaluator class makes it possible 252# to compute loss and gradients in one pass 253# while retrieving them via two separate functions, 254# "loss" and "grads". This is done because scipy.optimize 255# requires separate functions for loss and gradients, 256# but computing them separately would be inefficient. 257 258 259class Evaluator(object): 260 261 def __init__(self): 262 self.loss_value = None 263 self.grads_values = None 264 265 def loss(self, x): 266 assert self.loss_value is None 267 loss_value, grad_values = eval_loss_and_grads(x) 268 self.loss_value = loss_value 269 self.grad_values = grad_values 270 return self.loss_value 271 272 def grads(self, x): 273 assert self.loss_value is not None 274 grad_values = np.copy(self.grad_values) 275 self.loss_value = None 276 self.grad_values = None 277 return grad_values 278 279 280evaluator = Evaluator() 281 282# run scipy-based optimization (L-BFGS) over the pixels of the generated image 283# so as to minimize the neural style loss 284x = preprocess_image(base_image_path) 285 286for i in range(iterations): 287 print('Start of iteration', i) 288 start_time = time.time() 289 x, min_val, info = fmin_l_bfgs_b(evaluator.loss, x.flatten(), 290 fprime=evaluator.grads, maxfun=20) 291 print('Current loss value:', min_val) 292 # save current generated image 293 img = deprocess_image(x.copy()) 294 fname = result_prefix + '_at_iteration_%d.png' % i 295 save_img(fname, img) 296 end_time = time.time() 297 print('Image saved as', fname) 298 print('Iteration %d completed in %ds' % (i, end_time - start_time)) 299