1import numpy as np
2
3from . import _hoghistogram
4from .._shared import utils
5
6
7def _hog_normalize_block(block, method, eps=1e-5):
8    if method == 'L1':
9        out = block / (np.sum(np.abs(block)) + eps)
10    elif method == 'L1-sqrt':
11        out = np.sqrt(block / (np.sum(np.abs(block)) + eps))
12    elif method == 'L2':
13        out = block / np.sqrt(np.sum(block ** 2) + eps ** 2)
14    elif method == 'L2-Hys':
15        out = block / np.sqrt(np.sum(block ** 2) + eps ** 2)
16        out = np.minimum(out, 0.2)
17        out = out / np.sqrt(np.sum(out ** 2) + eps ** 2)
18    else:
19        raise ValueError('Selected block normalization method is invalid.')
20
21    return out
22
23
24def _hog_channel_gradient(channel):
25    """Compute unnormalized gradient image along `row` and `col` axes.
26
27    Parameters
28    ----------
29    channel : (M, N) ndarray
30        Grayscale image or one of image channel.
31
32    Returns
33    -------
34    g_row, g_col : channel gradient along `row` and `col` axes correspondingly.
35    """
36    g_row = np.empty(channel.shape, dtype=channel.dtype)
37    g_row[0, :] = 0
38    g_row[-1, :] = 0
39    g_row[1:-1, :] = channel[2:, :] - channel[:-2, :]
40    g_col = np.empty(channel.shape, dtype=channel.dtype)
41    g_col[:, 0] = 0
42    g_col[:, -1] = 0
43    g_col[:, 1:-1] = channel[:, 2:] - channel[:, :-2]
44
45    return g_row, g_col
46
47
48@utils.channel_as_last_axis(multichannel_output=False)
49@utils.deprecate_multichannel_kwarg(multichannel_position=8)
50def hog(image, orientations=9, pixels_per_cell=(8, 8), cells_per_block=(3, 3),
51        block_norm='L2-Hys', visualize=False, transform_sqrt=False,
52        feature_vector=True, multichannel=None, *, channel_axis=None):
53    """Extract Histogram of Oriented Gradients (HOG) for a given image.
54
55    Compute a Histogram of Oriented Gradients (HOG) by
56
57        1. (optional) global image normalization
58        2. computing the gradient image in `row` and `col`
59        3. computing gradient histograms
60        4. normalizing across blocks
61        5. flattening into a feature vector
62
63    Parameters
64    ----------
65    image : (M, N[, C]) ndarray
66        Input image.
67    orientations : int, optional
68        Number of orientation bins.
69    pixels_per_cell : 2-tuple (int, int), optional
70        Size (in pixels) of a cell.
71    cells_per_block : 2-tuple (int, int), optional
72        Number of cells in each block.
73    block_norm : str {'L1', 'L1-sqrt', 'L2', 'L2-Hys'}, optional
74        Block normalization method:
75
76        ``L1``
77           Normalization using L1-norm.
78        ``L1-sqrt``
79           Normalization using L1-norm, followed by square root.
80        ``L2``
81           Normalization using L2-norm.
82        ``L2-Hys``
83           Normalization using L2-norm, followed by limiting the
84           maximum values to 0.2 (`Hys` stands for `hysteresis`) and
85           renormalization using L2-norm. (default)
86           For details, see [3]_, [4]_.
87
88    visualize : bool, optional
89        Also return an image of the HOG.  For each cell and orientation bin,
90        the image contains a line segment that is centered at the cell center,
91        is perpendicular to the midpoint of the range of angles spanned by the
92        orientation bin, and has intensity proportional to the corresponding
93        histogram value.
94    transform_sqrt : bool, optional
95        Apply power law compression to normalize the image before
96        processing. DO NOT use this if the image contains negative
97        values. Also see `notes` section below.
98    feature_vector : bool, optional
99        Return the data as a feature vector by calling .ravel() on the result
100        just before returning.
101    multichannel : boolean, optional
102        If True, the last `image` dimension is considered as a color channel,
103        otherwise as spatial. This argument is deprecated: specify
104        `channel_axis` instead.
105    channel_axis : int or None, optional
106        If None, the image is assumed to be a grayscale (single channel) image.
107        Otherwise, this parameter indicates which axis of the array corresponds
108        to channels.
109
110        .. versionadded:: 0.19
111           `channel_axis` was added in 0.19.
112
113    Returns
114    -------
115    out : (n_blocks_row, n_blocks_col, n_cells_row, n_cells_col, n_orient) ndarray
116        HOG descriptor for the image. If `feature_vector` is True, a 1D
117        (flattened) array is returned.
118    hog_image : (M, N) ndarray, optional
119        A visualisation of the HOG image. Only provided if `visualize` is True.
120
121    References
122    ----------
123    .. [1] https://en.wikipedia.org/wiki/Histogram_of_oriented_gradients
124
125    .. [2] Dalal, N and Triggs, B, Histograms of Oriented Gradients for
126           Human Detection, IEEE Computer Society Conference on Computer
127           Vision and Pattern Recognition 2005 San Diego, CA, USA,
128           https://lear.inrialpes.fr/people/triggs/pubs/Dalal-cvpr05.pdf,
129           :DOI:`10.1109/CVPR.2005.177`
130
131    .. [3] Lowe, D.G., Distinctive image features from scale-invatiant
132           keypoints, International Journal of Computer Vision (2004) 60: 91,
133           http://www.cs.ubc.ca/~lowe/papers/ijcv04.pdf,
134           :DOI:`10.1023/B:VISI.0000029664.99615.94`
135
136    .. [4] Dalal, N, Finding People in Images and Videos,
137           Human-Computer Interaction [cs.HC], Institut National Polytechnique
138           de Grenoble - INPG, 2006,
139           https://tel.archives-ouvertes.fr/tel-00390303/file/NavneetDalalThesis.pdf
140
141    Notes
142    -----
143    The presented code implements the HOG extraction method from [2]_ with
144    the following changes: (I) blocks of (3, 3) cells are used ((2, 2) in the
145    paper); (II) no smoothing within cells (Gaussian spatial window with sigma=8pix
146    in the paper); (III) L1 block normalization is used (L2-Hys in the paper).
147
148    Power law compression, also known as Gamma correction, is used to reduce
149    the effects of shadowing and illumination variations. The compression makes
150    the dark regions lighter. When the kwarg `transform_sqrt` is set to
151    ``True``, the function computes the square root of each color channel
152    and then applies the hog algorithm to the image.
153    """
154    image = np.atleast_2d(image)
155    float_dtype = utils._supported_float_type(image.dtype)
156    image = image.astype(float_dtype, copy=False)
157
158    multichannel = channel_axis is not None
159    ndim_spatial = image.ndim - 1 if multichannel else image.ndim
160    if ndim_spatial != 2:
161        raise ValueError('Only images with two spatial dimensions are '
162                         'supported. If using with color/multichannel '
163                         'images, specify `channel_axis`.')
164
165    """
166    The first stage applies an optional global image normalization
167    equalisation that is designed to reduce the influence of illumination
168    effects. In practice we use gamma (power law) compression, either
169    computing the square root or the log of each color channel.
170    Image texture strength is typically proportional to the local surface
171    illumination so this compression helps to reduce the effects of local
172    shadowing and illumination variations.
173    """
174
175    if transform_sqrt:
176        image = np.sqrt(image)
177
178    """
179    The second stage computes first order image gradients. These capture
180    contour, silhouette and some texture information, while providing
181    further resistance to illumination variations. The locally dominant
182    color channel is used, which provides color invariance to a large
183    extent. Variant methods may also include second order image derivatives,
184    which act as primitive bar detectors - a useful feature for capturing,
185    e.g. bar like structures in bicycles and limbs in humans.
186    """
187
188    if multichannel:
189        g_row_by_ch = np.empty_like(image, dtype=float_dtype)
190        g_col_by_ch = np.empty_like(image, dtype=float_dtype)
191        g_magn = np.empty_like(image, dtype=float_dtype)
192
193        for idx_ch in range(image.shape[2]):
194            g_row_by_ch[:, :, idx_ch], g_col_by_ch[:, :, idx_ch] = \
195                _hog_channel_gradient(image[:, :, idx_ch])
196            g_magn[:, :, idx_ch] = np.hypot(g_row_by_ch[:, :, idx_ch],
197                                            g_col_by_ch[:, :, idx_ch])
198
199        # For each pixel select the channel with the highest gradient magnitude
200        idcs_max = g_magn.argmax(axis=2)
201        rr, cc = np.meshgrid(np.arange(image.shape[0]),
202                             np.arange(image.shape[1]),
203                             indexing='ij',
204                             sparse=True)
205        g_row = g_row_by_ch[rr, cc, idcs_max]
206        g_col = g_col_by_ch[rr, cc, idcs_max]
207    else:
208        g_row, g_col = _hog_channel_gradient(image)
209
210    """
211    The third stage aims to produce an encoding that is sensitive to
212    local image content while remaining resistant to small changes in
213    pose or appearance. The adopted method pools gradient orientation
214    information locally in the same way as the SIFT [Lowe 2004]
215    feature. The image window is divided into small spatial regions,
216    called "cells". For each cell we accumulate a local 1-D histogram
217    of gradient or edge orientations over all the pixels in the
218    cell. This combined cell-level 1-D histogram forms the basic
219    "orientation histogram" representation. Each orientation histogram
220    divides the gradient angle range into a fixed number of
221    predetermined bins. The gradient magnitudes of the pixels in the
222    cell are used to vote into the orientation histogram.
223    """
224
225    s_row, s_col = image.shape[:2]
226    c_row, c_col = pixels_per_cell
227    b_row, b_col = cells_per_block
228
229    n_cells_row = int(s_row // c_row)  # number of cells along row-axis
230    n_cells_col = int(s_col // c_col)  # number of cells along col-axis
231
232    # compute orientations integral images
233    orientation_histogram = np.zeros((n_cells_row, n_cells_col, orientations),
234                                     dtype=float)
235    g_row = g_row.astype(float, copy=False)
236    g_col = g_col.astype(float, copy=False)
237
238    _hoghistogram.hog_histograms(g_col, g_row, c_col, c_row, s_col, s_row,
239                                 n_cells_col, n_cells_row,
240                                 orientations, orientation_histogram)
241
242    # now compute the histogram for each cell
243    hog_image = None
244
245    if visualize:
246        from .. import draw
247
248        radius = min(c_row, c_col) // 2 - 1
249        orientations_arr = np.arange(orientations)
250        # set dr_arr, dc_arr to correspond to midpoints of orientation bins
251        orientation_bin_midpoints = (
252            np.pi * (orientations_arr + .5) / orientations)
253        dr_arr = radius * np.sin(orientation_bin_midpoints)
254        dc_arr = radius * np.cos(orientation_bin_midpoints)
255        hog_image = np.zeros((s_row, s_col), dtype=float_dtype)
256        for r in range(n_cells_row):
257            for c in range(n_cells_col):
258                for o, dr, dc in zip(orientations_arr, dr_arr, dc_arr):
259                    centre = tuple([r * c_row + c_row // 2,
260                                    c * c_col + c_col // 2])
261                    rr, cc = draw.line(int(centre[0] - dc),
262                                       int(centre[1] + dr),
263                                       int(centre[0] + dc),
264                                       int(centre[1] - dr))
265                    hog_image[rr, cc] += orientation_histogram[r, c, o]
266
267    """
268    The fourth stage computes normalization, which takes local groups of
269    cells and contrast normalizes their overall responses before passing
270    to next stage. Normalization introduces better invariance to illumination,
271    shadowing, and edge contrast. It is performed by accumulating a measure
272    of local histogram "energy" over local groups of cells that we call
273    "blocks". The result is used to normalize each cell in the block.
274    Typically each individual cell is shared between several blocks, but
275    its normalizations are block dependent and thus different. The cell
276    thus appears several times in the final output vector with different
277    normalizations. This may seem redundant but it improves the performance.
278    We refer to the normalized block descriptors as Histogram of Oriented
279    Gradient (HOG) descriptors.
280    """
281
282    n_blocks_row = (n_cells_row - b_row) + 1
283    n_blocks_col = (n_cells_col - b_col) + 1
284    normalized_blocks = np.zeros(
285        (n_blocks_row, n_blocks_col, b_row, b_col, orientations),
286        dtype=float_dtype
287    )
288
289    for r in range(n_blocks_row):
290        for c in range(n_blocks_col):
291            block = orientation_histogram[r:r + b_row, c:c + b_col, :]
292            normalized_blocks[r, c, :] = \
293                _hog_normalize_block(block, method=block_norm)
294
295    """
296    The final step collects the HOG descriptors from all blocks of a dense
297    overlapping grid of blocks covering the detection window into a combined
298    feature vector for use in the window classifier.
299    """
300
301    if feature_vector:
302        normalized_blocks = normalized_blocks.ravel()
303
304    if visualize:
305        return normalized_blocks, hog_image
306    else:
307        return normalized_blocks
308