1"""Extended image transformations to `mxnet.image`.""" 2from __future__ import division 3import random 4import numpy as np 5import mxnet as mx 6from mxnet import nd 7from mxnet.base import numeric_types 8 9__all__ = ['imresize', 'resize_long', 'resize_short_within', 10 'random_pca_lighting', 'random_expand', 'random_flip', 11 'resize_contain', 'ten_crop'] 12 13def imresize(src, w, h, interp=1): 14 """Resize image with OpenCV. 15 16 This is a duplicate of mxnet.image.imresize for name space consistency. 17 18 Parameters 19 ---------- 20 src : mxnet.nd.NDArray 21 source image 22 w : int, required 23 Width of resized image. 24 h : int, required 25 Height of resized image. 26 interp : int, optional, default='1' 27 Interpolation method (default=cv2.INTER_LINEAR). 28 29 out : NDArray, optional 30 The output NDArray to hold the result. 31 32 Returns 33 ------- 34 out : NDArray or list of NDArrays 35 The output of this function. 36 37 Examples 38 -------- 39 >>> import mxnet as mx 40 >>> from gluoncv import data as gdata 41 >>> img = mx.random.uniform(0, 255, (300, 300, 3)).astype('uint8') 42 >>> print(img.shape) 43 (300, 300, 3) 44 >>> img = gdata.transforms.image.imresize(img, 200, 200) 45 >>> print(img.shape) 46 (200, 200, 3) 47 """ 48 from mxnet.image.image import _get_interp_method as get_interp 49 oh, ow, _ = src.shape 50 return mx.image.imresize(src, w, h, interp=get_interp(interp, (oh, ow, h, w))) 51 52def resize_long(src, size, interp=2): 53 """Resizes longer edge to size. 54 Note: `resize_long` uses OpenCV (not the CV2 Python library). 55 MXNet must have been built with OpenCV for `resize_long` to work. 56 Resizes the original image by setting the longer edge to size 57 and setting the shorter edge accordingly. This will ensure the new image will 58 fit into the `size` specified. 59 Resizing function is called from OpenCV. 60 61 Parameters 62 ---------- 63 src : NDArray 64 The original image. 65 size : int 66 The length to be set for the shorter edge. 67 interp : int, optional, default=2 68 Interpolation method used for resizing the image. 69 Possible values: 70 0: Nearest Neighbors Interpolation. 71 1: Bilinear interpolation. 72 2: Area-based (resampling using pixel area relation). It may be a 73 preferred method for image decimation, as it gives moire-free 74 results. But when the image is zoomed, it is similar to the Nearest 75 Neighbors method. (used by default). 76 3: Bicubic interpolation over 4x4 pixel neighborhood. 77 4: Lanczos interpolation over 8x8 pixel neighborhood. 78 9: Cubic for enlarge, area for shrink, bilinear for others 79 10: Random select from interpolation method mentioned above. 80 Note: 81 When shrinking an image, it will generally look best with AREA-based 82 interpolation, whereas, when enlarging an image, it will generally look best 83 with Bicubic (slow) or Bilinear (faster but still looks OK). 84 More details can be found in the documentation of OpenCV, please refer to 85 http://docs.opencv.org/master/da/d54/group__imgproc__transform.html. 86 Returns 87 ------- 88 NDArray 89 An 'NDArray' containing the resized image. 90 Example 91 ------- 92 >>> with open("flower.jpeg", 'rb') as fp: 93 ... str_image = fp.read() 94 ... 95 >>> image = mx.img.imdecode(str_image) 96 >>> image 97 <NDArray 2321x3482x3 @cpu(0)> 98 >>> size = 640 99 >>> new_image = mx.img.resize_long(image, size) 100 >>> new_image 101 <NDArray 386x640x3 @cpu(0)> 102 """ 103 from mxnet.image.image import _get_interp_method as get_interp 104 h, w, _ = src.shape 105 if h > w: 106 new_h, new_w = size, size * w // h 107 else: 108 new_h, new_w = size * h // w, size 109 return imresize(src, new_w, new_h, interp=get_interp(interp, (h, w, new_h, new_w))) 110 111def resize_short_within(src, short, max_size, mult_base=1, interp=2): 112 """Resizes shorter edge to size but make sure it's capped at maximum size. 113 Note: `resize_short_within` uses OpenCV (not the CV2 Python library). 114 MXNet must have been built with OpenCV for `resize_short_within` to work. 115 Resizes the original image by setting the shorter edge to size 116 and setting the longer edge accordingly. Also this function will ensure 117 the new image will not exceed ``max_size`` even at the longer side. 118 Resizing function is called from OpenCV. 119 120 Parameters 121 ---------- 122 src : NDArray 123 The original image. 124 short : int 125 Resize shorter side to ``short``. 126 max_size : int 127 Make sure the longer side of new image is smaller than ``max_size``. 128 mult_base : int, default is 1 129 Width and height are rounded to multiples of `mult_base`. 130 interp : int, optional, default=2 131 Interpolation method used for resizing the image. 132 Possible values: 133 0: Nearest Neighbors Interpolation. 134 1: Bilinear interpolation. 135 2: Area-based (resampling using pixel area relation). It may be a 136 preferred method for image decimation, as it gives moire-free 137 results. But when the image is zoomed, it is similar to the Nearest 138 Neighbors method. (used by default). 139 3: Bicubic interpolation over 4x4 pixel neighborhood. 140 4: Lanczos interpolation over 8x8 pixel neighborhood. 141 9: Cubic for enlarge, area for shrink, bilinear for others 142 10: Random select from interpolation method mentioned above. 143 Note: 144 When shrinking an image, it will generally look best with AREA-based 145 interpolation, whereas, when enlarging an image, it will generally look best 146 with Bicubic (slow) or Bilinear (faster but still looks OK). 147 More details can be found in the documentation of OpenCV, please refer to 148 http://docs.opencv.org/master/da/d54/group__imgproc__transform.html. 149 Returns 150 ------- 151 NDArray 152 An 'NDArray' containing the resized image. 153 Example 154 ------- 155 >>> with open("flower.jpeg", 'rb') as fp: 156 ... str_image = fp.read() 157 ... 158 >>> image = mx.img.imdecode(str_image) 159 >>> image 160 <NDArray 2321x3482x3 @cpu(0)> 161 >>> new_image = resize_short_within(image, short=800, max_size=1000) 162 >>> new_image 163 <NDArray 667x1000x3 @cpu(0)> 164 >>> new_image = resize_short_within(image, short=800, max_size=1200) 165 >>> new_image 166 <NDArray 800x1200x3 @cpu(0)> 167 >>> new_image = resize_short_within(image, short=800, max_size=1200, mult_base=32) 168 >>> new_image 169 <NDArray 800x1184x3 @cpu(0)> 170 """ 171 from mxnet.image.image import _get_interp_method as get_interp 172 h, w, _ = src.shape 173 im_size_min, im_size_max = (h, w) if w > h else (w, h) 174 scale = float(short) / float(im_size_min) 175 if np.round(scale * im_size_max / mult_base) * mult_base > max_size: 176 # fit in max_size 177 scale = float(np.floor(max_size / mult_base) * mult_base) / float(im_size_max) 178 new_w, new_h = (int(np.round(w * scale / mult_base) * mult_base), 179 int(np.round(h * scale / mult_base) * mult_base)) 180 return imresize(src, new_w, new_h, interp=get_interp(interp, (h, w, new_h, new_w))) 181 182def random_pca_lighting(src, alphastd, eigval=None, eigvec=None): 183 """Apply random pca lighting noise to input image. 184 185 Parameters 186 ---------- 187 img : mxnet.nd.NDArray 188 Input image with HWC format. 189 alphastd : float 190 Noise level [0, 1) for image with range [0, 255]. 191 eigval : list of floats. 192 Eigen values, defaults to [55.46, 4.794, 1.148]. 193 eigvec : nested lists of floats 194 Eigen vectors with shape (3, 3), defaults to 195 [[-0.5675, 0.7192, 0.4009], 196 [-0.5808, -0.0045, -0.8140], 197 [-0.5836, -0.6948, 0.4203]]. 198 199 Returns 200 ------- 201 mxnet.nd.NDArray 202 Augmented image. 203 204 """ 205 if alphastd <= 0: 206 return src 207 208 if eigval is None: 209 eigval = np.array([55.46, 4.794, 1.148]) 210 if eigvec is None: 211 eigvec = np.array([[-0.5675, 0.7192, 0.4009], 212 [-0.5808, -0.0045, -0.8140], 213 [-0.5836, -0.6948, 0.4203]]) 214 215 alpha = np.random.normal(0, alphastd, size=(3,)) 216 rgb = np.dot(eigvec * alpha, eigval) 217 src += nd.array(rgb, ctx=src.context) 218 return src 219 220def random_expand(src, max_ratio=4, fill=0, keep_ratio=True): 221 """Random expand original image with borders, this is identical to placing 222 the original image on a larger canvas. 223 224 Parameters 225 ---------- 226 src : mxnet.nd.NDArray 227 The original image with HWC format. 228 max_ratio : int or float 229 Maximum ratio of the output image on both direction(vertical and horizontal) 230 fill : int or float or array-like 231 The value(s) for padded borders. If `fill` is numerical type, RGB channels 232 will be padded with single value. Otherwise `fill` must have same length 233 as image channels, which resulted in padding with per-channel values. 234 keep_ratio : bool 235 If `True`, will keep output image the same aspect ratio as input. 236 237 Returns 238 ------- 239 mxnet.nd.NDArray 240 Augmented image. 241 tuple 242 Tuple of (offset_x, offset_y, new_width, new_height) 243 244 """ 245 if max_ratio <= 1: 246 return src, (0, 0, src.shape[1], src.shape[0]) 247 248 h, w, c = src.shape 249 ratio_x = random.uniform(1, max_ratio) 250 if keep_ratio: 251 ratio_y = ratio_x 252 else: 253 ratio_y = random.uniform(1, max_ratio) 254 255 oh, ow = int(h * ratio_y), int(w * ratio_x) 256 off_y = random.randint(0, oh - h) 257 off_x = random.randint(0, ow - w) 258 259 # make canvas 260 if isinstance(fill, numeric_types): 261 dst = nd.full(shape=(oh, ow, c), val=fill, dtype=src.dtype) 262 else: 263 fill = nd.array(fill, dtype=src.dtype, ctx=src.context) 264 if not c == fill.size: 265 raise ValueError("Channel and fill size mismatch, {} vs {}".format(c, fill.size)) 266 dst = nd.tile(fill.reshape((1, c)), reps=(oh * ow, 1)).reshape((oh, ow, c)) 267 268 dst[off_y:off_y+h, off_x:off_x+w, :] = src 269 return dst, (off_x, off_y, ow, oh) 270 271def random_flip(src, px=0, py=0, copy=False): 272 """Randomly flip image along horizontal and vertical with probabilities. 273 274 Parameters 275 ---------- 276 src : mxnet.nd.NDArray 277 Input image with HWC format. 278 px : float 279 Horizontal flip probability [0, 1]. 280 py : float 281 Vertical flip probability [0, 1]. 282 copy : bool 283 If `True`, return a copy of input 284 285 Returns 286 ------- 287 mxnet.nd.NDArray 288 Augmented image. 289 tuple 290 Tuple of (flip_x, flip_y), records of whether flips are applied. 291 292 """ 293 flip_y = np.random.choice([False, True], p=[1-py, py]) 294 flip_x = np.random.choice([False, True], p=[1-px, px]) 295 if flip_y: 296 src = nd.flip(src, axis=0) 297 if flip_x: 298 src = nd.flip(src, axis=1) 299 if copy: 300 src = src.copy() 301 return src, (flip_x, flip_y) 302 303def resize_contain(src, size, fill=0): 304 """Resize the image to fit in the given area while keeping aspect ratio. 305 306 If both the height and the width in `size` are larger than 307 the height and the width of input image, the image is placed on 308 the center with an appropriate padding to match `size`. 309 Otherwise, the input image is scaled to fit in a canvas whose size 310 is `size` while preserving aspect ratio. 311 312 Parameters 313 ---------- 314 src : mxnet.nd.NDArray 315 The original image with HWC format. 316 size : tuple 317 Tuple of length 2 as (width, height). 318 fill : int or float or array-like 319 The value(s) for padded borders. If `fill` is numerical type, RGB channels 320 will be padded with single value. Otherwise `fill` must have same length 321 as image channels, which resulted in padding with per-channel values. 322 323 Returns 324 ------- 325 mxnet.nd.NDArray 326 Augmented image. 327 tuple 328 Tuple of (offset_x, offset_y, scaled_x, scaled_y) 329 330 """ 331 h, w, c = src.shape 332 ow, oh = size 333 scale_h = oh / h 334 scale_w = ow / w 335 scale = min(min(scale_h, scale_w), 1) 336 scaled_x = int(w * scale) 337 scaled_y = int(h * scale) 338 if scale < 1: 339 src = mx.image.imresize(src, scaled_x, scaled_y) 340 341 off_y = (oh - scaled_y) // 2 if scaled_y < oh else 0 342 off_x = (ow - scaled_x) // 2 if scaled_x < ow else 0 343 344 # make canvas 345 if isinstance(fill, numeric_types): 346 dst = nd.full(shape=(oh, ow, c), val=fill, dtype=src.dtype) 347 else: 348 fill = nd.array(fill, ctx=src.context) 349 if not c == fill.size: 350 raise ValueError("Channel and fill size mismatch, {} vs {}".format(c, fill.size)) 351 dst = nd.repeat(fill, repeats=oh * ow).reshape((oh, ow, c)) 352 353 dst[off_y:off_y+scaled_y, off_x:off_x+scaled_x, :] = src 354 return dst, (off_x, off_y, scaled_x, scaled_y) 355 356def ten_crop(src, size): 357 """Crop 10 regions from an array. 358 This is performed same as: 359 http://chainercv.readthedocs.io/en/stable/reference/transforms.html#ten-crop 360 361 This method crops 10 regions. All regions will be in shape 362 :obj`size`. These regions consist of 1 center crop and 4 corner 363 crops and horizontal flips of them. 364 The crops are ordered in this order. 365 * center crop 366 * top-left crop 367 * bottom-left crop 368 * top-right crop 369 * bottom-right crop 370 * center crop (flipped horizontally) 371 * top-left crop (flipped horizontally) 372 * bottom-left crop (flipped horizontally) 373 * top-right crop (flipped horizontally) 374 * bottom-right crop (flipped horizontally) 375 376 Parameters 377 ---------- 378 src : mxnet.nd.NDArray 379 Input image. 380 size : tuple 381 Tuple of length 2, as (width, height) of the cropped areas. 382 383 Returns 384 ------- 385 mxnet.nd.NDArray 386 The cropped images with shape (10, size[1], size[0], C) 387 388 """ 389 h, w, _ = src.shape 390 ow, oh = size 391 392 if h < oh or w < ow: 393 raise ValueError( 394 "Cannot crop area {} from image with size ({}, {})".format(str(size), h, w)) 395 396 center = src[(h - oh) // 2:(h + oh) // 2, (w - ow) // 2:(w + ow) // 2, :] 397 tl = src[0:oh, 0:ow, :] 398 bl = src[h - oh:h, 0:ow, :] 399 tr = src[0:oh, w - ow:w, :] 400 br = src[h - oh:h, w - ow:w, :] 401 crops = nd.stack(*[center, tl, bl, tr, br], axis=0) 402 crops = nd.concat(*[crops, nd.flip(crops, axis=2)], dim=0) 403 return crops 404