1import numpy as np 2 3from . import _hoghistogram 4from .._shared import utils 5 6 7def _hog_normalize_block(block, method, eps=1e-5): 8 if method == 'L1': 9 out = block / (np.sum(np.abs(block)) + eps) 10 elif method == 'L1-sqrt': 11 out = np.sqrt(block / (np.sum(np.abs(block)) + eps)) 12 elif method == 'L2': 13 out = block / np.sqrt(np.sum(block ** 2) + eps ** 2) 14 elif method == 'L2-Hys': 15 out = block / np.sqrt(np.sum(block ** 2) + eps ** 2) 16 out = np.minimum(out, 0.2) 17 out = out / np.sqrt(np.sum(out ** 2) + eps ** 2) 18 else: 19 raise ValueError('Selected block normalization method is invalid.') 20 21 return out 22 23 24def _hog_channel_gradient(channel): 25 """Compute unnormalized gradient image along `row` and `col` axes. 26 27 Parameters 28 ---------- 29 channel : (M, N) ndarray 30 Grayscale image or one of image channel. 31 32 Returns 33 ------- 34 g_row, g_col : channel gradient along `row` and `col` axes correspondingly. 35 """ 36 g_row = np.empty(channel.shape, dtype=channel.dtype) 37 g_row[0, :] = 0 38 g_row[-1, :] = 0 39 g_row[1:-1, :] = channel[2:, :] - channel[:-2, :] 40 g_col = np.empty(channel.shape, dtype=channel.dtype) 41 g_col[:, 0] = 0 42 g_col[:, -1] = 0 43 g_col[:, 1:-1] = channel[:, 2:] - channel[:, :-2] 44 45 return g_row, g_col 46 47 48@utils.channel_as_last_axis(multichannel_output=False) 49@utils.deprecate_multichannel_kwarg(multichannel_position=8) 50def hog(image, orientations=9, pixels_per_cell=(8, 8), cells_per_block=(3, 3), 51 block_norm='L2-Hys', visualize=False, transform_sqrt=False, 52 feature_vector=True, multichannel=None, *, channel_axis=None): 53 """Extract Histogram of Oriented Gradients (HOG) for a given image. 54 55 Compute a Histogram of Oriented Gradients (HOG) by 56 57 1. (optional) global image normalization 58 2. computing the gradient image in `row` and `col` 59 3. computing gradient histograms 60 4. normalizing across blocks 61 5. flattening into a feature vector 62 63 Parameters 64 ---------- 65 image : (M, N[, C]) ndarray 66 Input image. 67 orientations : int, optional 68 Number of orientation bins. 69 pixels_per_cell : 2-tuple (int, int), optional 70 Size (in pixels) of a cell. 71 cells_per_block : 2-tuple (int, int), optional 72 Number of cells in each block. 73 block_norm : str {'L1', 'L1-sqrt', 'L2', 'L2-Hys'}, optional 74 Block normalization method: 75 76 ``L1`` 77 Normalization using L1-norm. 78 ``L1-sqrt`` 79 Normalization using L1-norm, followed by square root. 80 ``L2`` 81 Normalization using L2-norm. 82 ``L2-Hys`` 83 Normalization using L2-norm, followed by limiting the 84 maximum values to 0.2 (`Hys` stands for `hysteresis`) and 85 renormalization using L2-norm. (default) 86 For details, see [3]_, [4]_. 87 88 visualize : bool, optional 89 Also return an image of the HOG. For each cell and orientation bin, 90 the image contains a line segment that is centered at the cell center, 91 is perpendicular to the midpoint of the range of angles spanned by the 92 orientation bin, and has intensity proportional to the corresponding 93 histogram value. 94 transform_sqrt : bool, optional 95 Apply power law compression to normalize the image before 96 processing. DO NOT use this if the image contains negative 97 values. Also see `notes` section below. 98 feature_vector : bool, optional 99 Return the data as a feature vector by calling .ravel() on the result 100 just before returning. 101 multichannel : boolean, optional 102 If True, the last `image` dimension is considered as a color channel, 103 otherwise as spatial. This argument is deprecated: specify 104 `channel_axis` instead. 105 channel_axis : int or None, optional 106 If None, the image is assumed to be a grayscale (single channel) image. 107 Otherwise, this parameter indicates which axis of the array corresponds 108 to channels. 109 110 .. versionadded:: 0.19 111 `channel_axis` was added in 0.19. 112 113 Returns 114 ------- 115 out : (n_blocks_row, n_blocks_col, n_cells_row, n_cells_col, n_orient) ndarray 116 HOG descriptor for the image. If `feature_vector` is True, a 1D 117 (flattened) array is returned. 118 hog_image : (M, N) ndarray, optional 119 A visualisation of the HOG image. Only provided if `visualize` is True. 120 121 References 122 ---------- 123 .. [1] https://en.wikipedia.org/wiki/Histogram_of_oriented_gradients 124 125 .. [2] Dalal, N and Triggs, B, Histograms of Oriented Gradients for 126 Human Detection, IEEE Computer Society Conference on Computer 127 Vision and Pattern Recognition 2005 San Diego, CA, USA, 128 https://lear.inrialpes.fr/people/triggs/pubs/Dalal-cvpr05.pdf, 129 :DOI:`10.1109/CVPR.2005.177` 130 131 .. [3] Lowe, D.G., Distinctive image features from scale-invatiant 132 keypoints, International Journal of Computer Vision (2004) 60: 91, 133 http://www.cs.ubc.ca/~lowe/papers/ijcv04.pdf, 134 :DOI:`10.1023/B:VISI.0000029664.99615.94` 135 136 .. [4] Dalal, N, Finding People in Images and Videos, 137 Human-Computer Interaction [cs.HC], Institut National Polytechnique 138 de Grenoble - INPG, 2006, 139 https://tel.archives-ouvertes.fr/tel-00390303/file/NavneetDalalThesis.pdf 140 141 Notes 142 ----- 143 The presented code implements the HOG extraction method from [2]_ with 144 the following changes: (I) blocks of (3, 3) cells are used ((2, 2) in the 145 paper); (II) no smoothing within cells (Gaussian spatial window with sigma=8pix 146 in the paper); (III) L1 block normalization is used (L2-Hys in the paper). 147 148 Power law compression, also known as Gamma correction, is used to reduce 149 the effects of shadowing and illumination variations. The compression makes 150 the dark regions lighter. When the kwarg `transform_sqrt` is set to 151 ``True``, the function computes the square root of each color channel 152 and then applies the hog algorithm to the image. 153 """ 154 image = np.atleast_2d(image) 155 float_dtype = utils._supported_float_type(image.dtype) 156 image = image.astype(float_dtype, copy=False) 157 158 multichannel = channel_axis is not None 159 ndim_spatial = image.ndim - 1 if multichannel else image.ndim 160 if ndim_spatial != 2: 161 raise ValueError('Only images with two spatial dimensions are ' 162 'supported. If using with color/multichannel ' 163 'images, specify `channel_axis`.') 164 165 """ 166 The first stage applies an optional global image normalization 167 equalisation that is designed to reduce the influence of illumination 168 effects. In practice we use gamma (power law) compression, either 169 computing the square root or the log of each color channel. 170 Image texture strength is typically proportional to the local surface 171 illumination so this compression helps to reduce the effects of local 172 shadowing and illumination variations. 173 """ 174 175 if transform_sqrt: 176 image = np.sqrt(image) 177 178 """ 179 The second stage computes first order image gradients. These capture 180 contour, silhouette and some texture information, while providing 181 further resistance to illumination variations. The locally dominant 182 color channel is used, which provides color invariance to a large 183 extent. Variant methods may also include second order image derivatives, 184 which act as primitive bar detectors - a useful feature for capturing, 185 e.g. bar like structures in bicycles and limbs in humans. 186 """ 187 188 if multichannel: 189 g_row_by_ch = np.empty_like(image, dtype=float_dtype) 190 g_col_by_ch = np.empty_like(image, dtype=float_dtype) 191 g_magn = np.empty_like(image, dtype=float_dtype) 192 193 for idx_ch in range(image.shape[2]): 194 g_row_by_ch[:, :, idx_ch], g_col_by_ch[:, :, idx_ch] = \ 195 _hog_channel_gradient(image[:, :, idx_ch]) 196 g_magn[:, :, idx_ch] = np.hypot(g_row_by_ch[:, :, idx_ch], 197 g_col_by_ch[:, :, idx_ch]) 198 199 # For each pixel select the channel with the highest gradient magnitude 200 idcs_max = g_magn.argmax(axis=2) 201 rr, cc = np.meshgrid(np.arange(image.shape[0]), 202 np.arange(image.shape[1]), 203 indexing='ij', 204 sparse=True) 205 g_row = g_row_by_ch[rr, cc, idcs_max] 206 g_col = g_col_by_ch[rr, cc, idcs_max] 207 else: 208 g_row, g_col = _hog_channel_gradient(image) 209 210 """ 211 The third stage aims to produce an encoding that is sensitive to 212 local image content while remaining resistant to small changes in 213 pose or appearance. The adopted method pools gradient orientation 214 information locally in the same way as the SIFT [Lowe 2004] 215 feature. The image window is divided into small spatial regions, 216 called "cells". For each cell we accumulate a local 1-D histogram 217 of gradient or edge orientations over all the pixels in the 218 cell. This combined cell-level 1-D histogram forms the basic 219 "orientation histogram" representation. Each orientation histogram 220 divides the gradient angle range into a fixed number of 221 predetermined bins. The gradient magnitudes of the pixels in the 222 cell are used to vote into the orientation histogram. 223 """ 224 225 s_row, s_col = image.shape[:2] 226 c_row, c_col = pixels_per_cell 227 b_row, b_col = cells_per_block 228 229 n_cells_row = int(s_row // c_row) # number of cells along row-axis 230 n_cells_col = int(s_col // c_col) # number of cells along col-axis 231 232 # compute orientations integral images 233 orientation_histogram = np.zeros((n_cells_row, n_cells_col, orientations), 234 dtype=float) 235 g_row = g_row.astype(float, copy=False) 236 g_col = g_col.astype(float, copy=False) 237 238 _hoghistogram.hog_histograms(g_col, g_row, c_col, c_row, s_col, s_row, 239 n_cells_col, n_cells_row, 240 orientations, orientation_histogram) 241 242 # now compute the histogram for each cell 243 hog_image = None 244 245 if visualize: 246 from .. import draw 247 248 radius = min(c_row, c_col) // 2 - 1 249 orientations_arr = np.arange(orientations) 250 # set dr_arr, dc_arr to correspond to midpoints of orientation bins 251 orientation_bin_midpoints = ( 252 np.pi * (orientations_arr + .5) / orientations) 253 dr_arr = radius * np.sin(orientation_bin_midpoints) 254 dc_arr = radius * np.cos(orientation_bin_midpoints) 255 hog_image = np.zeros((s_row, s_col), dtype=float_dtype) 256 for r in range(n_cells_row): 257 for c in range(n_cells_col): 258 for o, dr, dc in zip(orientations_arr, dr_arr, dc_arr): 259 centre = tuple([r * c_row + c_row // 2, 260 c * c_col + c_col // 2]) 261 rr, cc = draw.line(int(centre[0] - dc), 262 int(centre[1] + dr), 263 int(centre[0] + dc), 264 int(centre[1] - dr)) 265 hog_image[rr, cc] += orientation_histogram[r, c, o] 266 267 """ 268 The fourth stage computes normalization, which takes local groups of 269 cells and contrast normalizes their overall responses before passing 270 to next stage. Normalization introduces better invariance to illumination, 271 shadowing, and edge contrast. It is performed by accumulating a measure 272 of local histogram "energy" over local groups of cells that we call 273 "blocks". The result is used to normalize each cell in the block. 274 Typically each individual cell is shared between several blocks, but 275 its normalizations are block dependent and thus different. The cell 276 thus appears several times in the final output vector with different 277 normalizations. This may seem redundant but it improves the performance. 278 We refer to the normalized block descriptors as Histogram of Oriented 279 Gradient (HOG) descriptors. 280 """ 281 282 n_blocks_row = (n_cells_row - b_row) + 1 283 n_blocks_col = (n_cells_col - b_col) + 1 284 normalized_blocks = np.zeros( 285 (n_blocks_row, n_blocks_col, b_row, b_col, orientations), 286 dtype=float_dtype 287 ) 288 289 for r in range(n_blocks_row): 290 for c in range(n_blocks_col): 291 block = orientation_histogram[r:r + b_row, c:c + b_col, :] 292 normalized_blocks[r, c, :] = \ 293 _hog_normalize_block(block, method=block_norm) 294 295 """ 296 The final step collects the HOG descriptors from all blocks of a dense 297 overlapping grid of blocks covering the detection window into a combined 298 feature vector for use in the window classifier. 299 """ 300 301 if feature_vector: 302 normalized_blocks = normalized_blocks.ravel() 303 304 if visualize: 305 return normalized_blocks, hog_image 306 else: 307 return normalized_blocks 308