1"""Extended image transformations to `mxnet.image`."""
2from __future__ import division
3import random
4import numpy as np
5import mxnet as mx
6from mxnet import nd
7from mxnet.base import numeric_types
8
9__all__ = ['imresize', 'resize_long', 'resize_short_within',
10           'random_pca_lighting', 'random_expand', 'random_flip',
11           'resize_contain', 'ten_crop']
12
13def imresize(src, w, h, interp=1):
14    """Resize image with OpenCV.
15
16    This is a duplicate of mxnet.image.imresize for name space consistency.
17
18    Parameters
19    ----------
20    src : mxnet.nd.NDArray
21        source image
22    w : int, required
23        Width of resized image.
24    h : int, required
25        Height of resized image.
26    interp : int, optional, default='1'
27        Interpolation method (default=cv2.INTER_LINEAR).
28
29    out : NDArray, optional
30        The output NDArray to hold the result.
31
32    Returns
33    -------
34    out : NDArray or list of NDArrays
35        The output of this function.
36
37    Examples
38    --------
39    >>> import mxnet as mx
40    >>> from gluoncv import data as gdata
41    >>> img = mx.random.uniform(0, 255, (300, 300, 3)).astype('uint8')
42    >>> print(img.shape)
43    (300, 300, 3)
44    >>> img = gdata.transforms.image.imresize(img, 200, 200)
45    >>> print(img.shape)
46    (200, 200, 3)
47    """
48    from mxnet.image.image import _get_interp_method as get_interp
49    oh, ow, _ = src.shape
50    return mx.image.imresize(src, w, h, interp=get_interp(interp, (oh, ow, h, w)))
51
52def resize_long(src, size, interp=2):
53    """Resizes longer edge to size.
54    Note: `resize_long` uses OpenCV (not the CV2 Python library).
55    MXNet must have been built with OpenCV for `resize_long` to work.
56    Resizes the original image by setting the longer edge to size
57    and setting the shorter edge accordingly. This will ensure the new image will
58    fit into the `size` specified.
59    Resizing function is called from OpenCV.
60
61    Parameters
62    ----------
63    src : NDArray
64        The original image.
65    size : int
66        The length to be set for the shorter edge.
67    interp : int, optional, default=2
68        Interpolation method used for resizing the image.
69        Possible values:
70        0: Nearest Neighbors Interpolation.
71        1: Bilinear interpolation.
72        2: Area-based (resampling using pixel area relation). It may be a
73        preferred method for image decimation, as it gives moire-free
74        results. But when the image is zoomed, it is similar to the Nearest
75        Neighbors method. (used by default).
76        3: Bicubic interpolation over 4x4 pixel neighborhood.
77        4: Lanczos interpolation over 8x8 pixel neighborhood.
78        9: Cubic for enlarge, area for shrink, bilinear for others
79        10: Random select from interpolation method mentioned above.
80        Note:
81        When shrinking an image, it will generally look best with AREA-based
82        interpolation, whereas, when enlarging an image, it will generally look best
83        with Bicubic (slow) or Bilinear (faster but still looks OK).
84        More details can be found in the documentation of OpenCV, please refer to
85        http://docs.opencv.org/master/da/d54/group__imgproc__transform.html.
86    Returns
87    -------
88    NDArray
89        An 'NDArray' containing the resized image.
90    Example
91    -------
92    >>> with open("flower.jpeg", 'rb') as fp:
93    ...     str_image = fp.read()
94    ...
95    >>> image = mx.img.imdecode(str_image)
96    >>> image
97    <NDArray 2321x3482x3 @cpu(0)>
98    >>> size = 640
99    >>> new_image = mx.img.resize_long(image, size)
100    >>> new_image
101    <NDArray 386x640x3 @cpu(0)>
102    """
103    from mxnet.image.image import _get_interp_method as get_interp
104    h, w, _ = src.shape
105    if h > w:
106        new_h, new_w = size, size * w // h
107    else:
108        new_h, new_w = size * h // w, size
109    return imresize(src, new_w, new_h, interp=get_interp(interp, (h, w, new_h, new_w)))
110
111def resize_short_within(src, short, max_size, mult_base=1, interp=2):
112    """Resizes shorter edge to size but make sure it's capped at maximum size.
113    Note: `resize_short_within` uses OpenCV (not the CV2 Python library).
114    MXNet must have been built with OpenCV for `resize_short_within` to work.
115    Resizes the original image by setting the shorter edge to size
116    and setting the longer edge accordingly. Also this function will ensure
117    the new image will not exceed ``max_size`` even at the longer side.
118    Resizing function is called from OpenCV.
119
120    Parameters
121    ----------
122    src : NDArray
123        The original image.
124    short : int
125        Resize shorter side to ``short``.
126    max_size : int
127        Make sure the longer side of new image is smaller than ``max_size``.
128    mult_base : int, default is 1
129        Width and height are rounded to multiples of `mult_base`.
130    interp : int, optional, default=2
131        Interpolation method used for resizing the image.
132        Possible values:
133        0: Nearest Neighbors Interpolation.
134        1: Bilinear interpolation.
135        2: Area-based (resampling using pixel area relation). It may be a
136        preferred method for image decimation, as it gives moire-free
137        results. But when the image is zoomed, it is similar to the Nearest
138        Neighbors method. (used by default).
139        3: Bicubic interpolation over 4x4 pixel neighborhood.
140        4: Lanczos interpolation over 8x8 pixel neighborhood.
141        9: Cubic for enlarge, area for shrink, bilinear for others
142        10: Random select from interpolation method mentioned above.
143        Note:
144        When shrinking an image, it will generally look best with AREA-based
145        interpolation, whereas, when enlarging an image, it will generally look best
146        with Bicubic (slow) or Bilinear (faster but still looks OK).
147        More details can be found in the documentation of OpenCV, please refer to
148        http://docs.opencv.org/master/da/d54/group__imgproc__transform.html.
149    Returns
150    -------
151    NDArray
152        An 'NDArray' containing the resized image.
153    Example
154    -------
155    >>> with open("flower.jpeg", 'rb') as fp:
156    ...     str_image = fp.read()
157    ...
158    >>> image = mx.img.imdecode(str_image)
159    >>> image
160    <NDArray 2321x3482x3 @cpu(0)>
161    >>> new_image = resize_short_within(image, short=800, max_size=1000)
162    >>> new_image
163    <NDArray 667x1000x3 @cpu(0)>
164    >>> new_image = resize_short_within(image, short=800, max_size=1200)
165    >>> new_image
166    <NDArray 800x1200x3 @cpu(0)>
167    >>> new_image = resize_short_within(image, short=800, max_size=1200, mult_base=32)
168    >>> new_image
169    <NDArray 800x1184x3 @cpu(0)>
170    """
171    from mxnet.image.image import _get_interp_method as get_interp
172    h, w, _ = src.shape
173    im_size_min, im_size_max = (h, w) if w > h else (w, h)
174    scale = float(short) / float(im_size_min)
175    if np.round(scale * im_size_max / mult_base) * mult_base > max_size:
176        # fit in max_size
177        scale = float(np.floor(max_size / mult_base) * mult_base) / float(im_size_max)
178    new_w, new_h = (int(np.round(w * scale / mult_base) * mult_base),
179                    int(np.round(h * scale / mult_base) * mult_base))
180    return imresize(src, new_w, new_h, interp=get_interp(interp, (h, w, new_h, new_w)))
181
182def random_pca_lighting(src, alphastd, eigval=None, eigvec=None):
183    """Apply random pca lighting noise to input image.
184
185    Parameters
186    ----------
187    img : mxnet.nd.NDArray
188        Input image with HWC format.
189    alphastd : float
190        Noise level [0, 1) for image with range [0, 255].
191    eigval : list of floats.
192        Eigen values, defaults to [55.46, 4.794, 1.148].
193    eigvec : nested lists of floats
194        Eigen vectors with shape (3, 3), defaults to
195        [[-0.5675, 0.7192, 0.4009],
196         [-0.5808, -0.0045, -0.8140],
197         [-0.5836, -0.6948, 0.4203]].
198
199    Returns
200    -------
201    mxnet.nd.NDArray
202        Augmented image.
203
204    """
205    if alphastd <= 0:
206        return src
207
208    if eigval is None:
209        eigval = np.array([55.46, 4.794, 1.148])
210    if eigvec is None:
211        eigvec = np.array([[-0.5675, 0.7192, 0.4009],
212                           [-0.5808, -0.0045, -0.8140],
213                           [-0.5836, -0.6948, 0.4203]])
214
215    alpha = np.random.normal(0, alphastd, size=(3,))
216    rgb = np.dot(eigvec * alpha, eigval)
217    src += nd.array(rgb, ctx=src.context)
218    return src
219
220def random_expand(src, max_ratio=4, fill=0, keep_ratio=True):
221    """Random expand original image with borders, this is identical to placing
222    the original image on a larger canvas.
223
224    Parameters
225    ----------
226    src : mxnet.nd.NDArray
227        The original image with HWC format.
228    max_ratio : int or float
229        Maximum ratio of the output image on both direction(vertical and horizontal)
230    fill : int or float or array-like
231        The value(s) for padded borders. If `fill` is numerical type, RGB channels
232        will be padded with single value. Otherwise `fill` must have same length
233        as image channels, which resulted in padding with per-channel values.
234    keep_ratio : bool
235        If `True`, will keep output image the same aspect ratio as input.
236
237    Returns
238    -------
239    mxnet.nd.NDArray
240        Augmented image.
241    tuple
242        Tuple of (offset_x, offset_y, new_width, new_height)
243
244    """
245    if max_ratio <= 1:
246        return src, (0, 0, src.shape[1], src.shape[0])
247
248    h, w, c = src.shape
249    ratio_x = random.uniform(1, max_ratio)
250    if keep_ratio:
251        ratio_y = ratio_x
252    else:
253        ratio_y = random.uniform(1, max_ratio)
254
255    oh, ow = int(h * ratio_y), int(w * ratio_x)
256    off_y = random.randint(0, oh - h)
257    off_x = random.randint(0, ow - w)
258
259    # make canvas
260    if isinstance(fill, numeric_types):
261        dst = nd.full(shape=(oh, ow, c), val=fill, dtype=src.dtype)
262    else:
263        fill = nd.array(fill, dtype=src.dtype, ctx=src.context)
264        if not c == fill.size:
265            raise ValueError("Channel and fill size mismatch, {} vs {}".format(c, fill.size))
266        dst = nd.tile(fill.reshape((1, c)), reps=(oh * ow, 1)).reshape((oh, ow, c))
267
268    dst[off_y:off_y+h, off_x:off_x+w, :] = src
269    return dst, (off_x, off_y, ow, oh)
270
271def random_flip(src, px=0, py=0, copy=False):
272    """Randomly flip image along horizontal and vertical with probabilities.
273
274    Parameters
275    ----------
276    src : mxnet.nd.NDArray
277        Input image with HWC format.
278    px : float
279        Horizontal flip probability [0, 1].
280    py : float
281        Vertical flip probability [0, 1].
282    copy : bool
283        If `True`, return a copy of input
284
285    Returns
286    -------
287    mxnet.nd.NDArray
288        Augmented image.
289    tuple
290        Tuple of (flip_x, flip_y), records of whether flips are applied.
291
292    """
293    flip_y = np.random.choice([False, True], p=[1-py, py])
294    flip_x = np.random.choice([False, True], p=[1-px, px])
295    if flip_y:
296        src = nd.flip(src, axis=0)
297    if flip_x:
298        src = nd.flip(src, axis=1)
299    if copy:
300        src = src.copy()
301    return src, (flip_x, flip_y)
302
303def resize_contain(src, size, fill=0):
304    """Resize the image to fit in the given area while keeping aspect ratio.
305
306    If both the height and the width in `size` are larger than
307    the height and the width of input image, the image is placed on
308    the center with an appropriate padding to match `size`.
309    Otherwise, the input image is scaled to fit in a canvas whose size
310    is `size` while preserving aspect ratio.
311
312    Parameters
313    ----------
314    src : mxnet.nd.NDArray
315        The original image with HWC format.
316    size : tuple
317        Tuple of length 2 as (width, height).
318    fill : int or float or array-like
319        The value(s) for padded borders. If `fill` is numerical type, RGB channels
320        will be padded with single value. Otherwise `fill` must have same length
321        as image channels, which resulted in padding with per-channel values.
322
323    Returns
324    -------
325    mxnet.nd.NDArray
326        Augmented image.
327    tuple
328        Tuple of (offset_x, offset_y, scaled_x, scaled_y)
329
330    """
331    h, w, c = src.shape
332    ow, oh = size
333    scale_h = oh / h
334    scale_w = ow / w
335    scale = min(min(scale_h, scale_w), 1)
336    scaled_x = int(w * scale)
337    scaled_y = int(h * scale)
338    if scale < 1:
339        src = mx.image.imresize(src, scaled_x, scaled_y)
340
341    off_y = (oh - scaled_y) // 2 if scaled_y < oh else 0
342    off_x = (ow - scaled_x) // 2 if scaled_x < ow else 0
343
344    # make canvas
345    if isinstance(fill, numeric_types):
346        dst = nd.full(shape=(oh, ow, c), val=fill, dtype=src.dtype)
347    else:
348        fill = nd.array(fill, ctx=src.context)
349        if not c == fill.size:
350            raise ValueError("Channel and fill size mismatch, {} vs {}".format(c, fill.size))
351        dst = nd.repeat(fill, repeats=oh * ow).reshape((oh, ow, c))
352
353    dst[off_y:off_y+scaled_y, off_x:off_x+scaled_x, :] = src
354    return dst, (off_x, off_y, scaled_x, scaled_y)
355
356def ten_crop(src, size):
357    """Crop 10 regions from an array.
358    This is performed same as:
359    http://chainercv.readthedocs.io/en/stable/reference/transforms.html#ten-crop
360
361    This method crops 10 regions. All regions will be in shape
362    :obj`size`. These regions consist of 1 center crop and 4 corner
363    crops and horizontal flips of them.
364    The crops are ordered in this order.
365    * center crop
366    * top-left crop
367    * bottom-left crop
368    * top-right crop
369    * bottom-right crop
370    * center crop (flipped horizontally)
371    * top-left crop (flipped horizontally)
372    * bottom-left crop (flipped horizontally)
373    * top-right crop (flipped horizontally)
374    * bottom-right crop (flipped horizontally)
375
376    Parameters
377    ----------
378    src : mxnet.nd.NDArray
379        Input image.
380    size : tuple
381        Tuple of length 2, as (width, height) of the cropped areas.
382
383    Returns
384    -------
385    mxnet.nd.NDArray
386        The cropped images with shape (10, size[1], size[0], C)
387
388    """
389    h, w, _ = src.shape
390    ow, oh = size
391
392    if h < oh or w < ow:
393        raise ValueError(
394            "Cannot crop area {} from image with size ({}, {})".format(str(size), h, w))
395
396    center = src[(h - oh) // 2:(h + oh) // 2, (w - ow) // 2:(w + ow) // 2, :]
397    tl = src[0:oh, 0:ow, :]
398    bl = src[h - oh:h, 0:ow, :]
399    tr = src[0:oh, w - ow:w, :]
400    br = src[h - oh:h, w - ow:w, :]
401    crops = nd.stack(*[center, tl, bl, tr, br], axis=0)
402    crops = nd.concat(*[crops, nd.flip(crops, axis=2)], dim=0)
403    return crops
404