1 /** @file hog.c
2 ** @brief Histogram of Oriented Gradients (HOG) - Definition
3 ** @author Andrea Vedaldi
4 **/
5
6 /*
7 Copyright (C) 2007-12 Andrea Vedaldi and Brian Fulkerson.
8 All rights reserved.
9
10 This file is part of the VLFeat library and is made available under
11 the terms of the BSD license (see the COPYING file).
12 */
13
14 #include "hog.h"
15 #include "mathop.h"
16 #include <string.h>
17
18 /**
19
20 <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
21 @page hog Histogram of Oriented Gradients (HOG) features
22 @author Andrea Vedaldi
23 <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
24
25 @ref hog.h implements the Histogram of Oriented Gradients (HOG) features
26 in the variants of Dalal Triggs @cite{dalal05histograms} and of UOCTTI
27 @cite{felzenszwalb09object}. Applications include object detection
28 and deformable object detection.
29
30 - @ref hog-overview
31 - @ref hog-tech
32
33 <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
34 @section hog-overview Overview
35 <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
36
37 HOG is a standard image feature used, among others, in object detection
38 and deformable object detection. It decomposes the image into square cells
39 of a given size (typically eight pixels), compute a histogram of oriented
40 gradient in each cell (similar to @ref sift), and then renormalizes
41 the cells by looking into adjacent blocks.
42
43 VLFeat implements two HOG variants: the original one of Dalal-Triggs
44 @cite{dalal05histograms} and the one proposed in Felzenszwalb et al.
45 @cite{felzenszwalb09object}.
46
47 In order to use HOG, start by creating a new HOG object, set the desired
48 parameters, pass a (color or grayscale) image, and read off the results.
49
50 @code
51 VlHog * hog = vl_hog_new(VlHogVariantDalalTriggs, numOrientations, VL_FALSE) ;
52 vl_hog_put_image(hog, image, height, width, numChannels, cellSize) ;
53 hogWidth = vl_hog_get_width(hog) ;
54 hogHeight = vl_hog_get_height(hog) ;
55 hogDimenison = vl_hog_get_dimension(hog) ;
56 hogArray = vl_malloc(hogWidth*hogHeight*hogDimension*sizeof(float)) ;
57 vl_hog_extract(hog, hogArray) ;
58 vl_hog_delete(hog) ;
59 @endcode
60
61 HOG is a feature array of the dimension returned by ::vl_hog_get_width,
62 ::vl_hog_get_height, with each feature (histogram) having
63 dimension ::vl_hog_get_dimension. The array is stored in row major order,
64 with the slowest varying dimension beying the dimension indexing the histogram
65 elements.
66
67 The number of entreis in the histogram as well as their meaning depends
68 on the HOG variant and is detailed later. However, it is usually
69 unnecessary to know such details. @ref hog.h provides support for
70 creating an inconic representation of a HOG feature array:
71
72 @code
73 glyphSize = vl_hog_get_glyph_size(hog) ;
74 imageHeight = glyphSize * hogArrayHeight ;
75 imageWidth = glyphSize * hogArrayWidth ;
76 image = vl_malloc(sizeof(float)*imageWidth*imageHeight) ;
77 vl_hog_render(hog, image, hogArray) ;
78 @endcode
79
80 It is often convenient to mirror HOG features from left to right. This
81 can be obtained by mirroring an array of HOG cells, but the content
82 of each cell must also be rearranged. This can be done by
83 the permutation obtaiend by ::vl_hog_get_permutation.
84
85 Furthermore, @ref hog.h suppots computing HOG features not from
86 images but from vector fields.
87
88 <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
89 @section hog-tech Technical details
90 <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
91
92 HOG divdes the input image into square cells of size @c cellSize,
93 fitting as many cells as possible, filling the image domain from
94 the upper-left corner down to the right one. For each row and column,
95 the last cell is at least half contained in the image.
96 More precisely, the number of cells obtained in this manner is:
97
98 @code
99 hogWidth = (width + cellSize/2) / cellSize ;
100 hogHeight = (height + cellSize/2) / cellSize ;
101 @endcode
102
103 Then the image gradient @f$ \nabla \ell(x,y) @f$
104 is computed by using central difference (for colour image
105 the channel with the largest gradient at that pixel is used).
106 The gradient @f$ \nabla \ell(x,y) @f$ is assigned to one of @c 2*numOrientations orientation in the
107 range @f$ [0,2\pi) @f$ (see @ref hog-conventions for details).
108 Contributions are then accumulated by using bilinear interpolation
109 to four neigbhour cells, as in @ref sift.
110 This results in an histogram @f$h_d@f$ of dimension
111 2*numOrientations, called of @e directed orientations
112 since it accounts for the direction as well as the orientation
113 of the gradient. A second histogram @f$h_u@f$ of undirected orientations
114 of half the size is obtained by folding @f$ h_d @f$ into two.
115
116 Let a block of cell be a @f$ 2\times 2 @f$ sub-array of cells.
117 Let the norm of a block be the @f$ l^2 @f$ norm of the stacking of the
118 respective unoriented histogram. Given a HOG cell, four normalisation
119 factors are then obtained as the inverse of the norm of the four
120 blocks that contain the cell.
121
122 For the Dalal-Triggs variant, each histogram @f$ h_d @f$ is copied
123 four times, normalised using the four different normalisation factors,
124 the four vectors are stacked, saturated at 0.2, and finally stored as the descriptor
125 of the cell. This results in a @c numOrientations * 4 dimensional
126 cell descriptor. Blocks are visited from left to right and top to bottom
127 when forming the final descriptor.
128
129 For the UOCCTI descriptor, the same is done for both the undirected
130 as well as the directed orientation histograms. This would yield
131 a dimension of @c 4*(2+1)*numOrientations elements, but the resulting
132 vector is projected down to @c (2+1)*numOrientations elements
133 by averaging corresponding histogram dimensions. This was shown to
134 be an algebraic approximation of PCA for descriptors computed on natural
135 images.
136
137 In addition, for the UOCTTI variant the l1 norm of each of the
138 four l2 normalised undirected histograms is computed and stored
139 as additional four dimensions, for a total of
140 @c 4+3*numOrientations dimensions.
141
142 <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
143 @subsection hog-conventions Conventions
144 <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
145
146 The orientation of a gradient is expressed as the angle it forms with the
147 horizontal axis of the image. Angles are measured clock-wise (as the vertical
148 image axis points downards), and the null angle corresponds to
149 an horizontal vector pointing right. The quantized directed
150 orientations are @f$ \mathrm{k} \pi / \mathrm{numOrientations} @f$, where
151 @c k is an index that varies in the ingeger
152 range @f$ \{0, \dots, 2\mathrm{numOrientations} - 1\} @f$.
153
154 Note that the orientations capture the orientation of the gradeint;
155 image edges would be oriented at 90 degrees from these.
156
157 **/
158
159 /* ---------------------------------------------------------------- */
160 /** @brief Create a new HOG object
161 ** @param variant HOG descriptor variant.
162 ** @param numOrientations number of distinguished orientations.
163 ** @param transposed wether images are transposed (column major).
164 ** @return the new HOG object.
165 **
166 ** The function creates a new HOG object to extract descriptors of
167 ** the prescribed @c variant. The angular resolution is set by
168 ** @a numOrientations, which specifies the number of <em>undirected</em>
169 ** orientations. The object can work with column major images
170 ** by setting @a transposed to true.
171 **/
172
173 VlHog *
vl_hog_new(VlHogVariant variant,vl_size numOrientations,vl_bool transposed)174 vl_hog_new (VlHogVariant variant, vl_size numOrientations, vl_bool transposed)
175 {
176 vl_index o, k ;
177 VlHog * self = vl_calloc(1, sizeof(VlHog)) ;
178
179 assert(numOrientations >= 1) ;
180
181 self->variant = variant ;
182 self->numOrientations = numOrientations ;
183 self->glyphSize = 21 ;
184 self->transposed = transposed ;
185 self->useBilinearOrientationAssigment = VL_FALSE ;
186 self->orientationX = vl_malloc(sizeof(float) * self->numOrientations) ;
187 self->orientationY = vl_malloc(sizeof(float) * self->numOrientations) ;
188
189 /*
190 Create a vector along the center of each orientation bin. These
191 are used to map gradients to bins. If the image is transposed,
192 then this can be adjusted here by swapping X and Y in these
193 vectors.
194 */
195 for(o = 0 ; o < (signed)self->numOrientations ; ++o) {
196 double angle = o * VL_PI / self->numOrientations ;
197 if (!self->transposed) {
198 self->orientationX[o] = (float) cos(angle) ;
199 self->orientationY[o] = (float) sin(angle) ;
200 } else {
201 self->orientationX[o] = (float) sin(angle) ;
202 self->orientationY[o] = (float) cos(angle) ;
203 }
204 }
205
206 /*
207 If the number of orientation is equal to 9, one gets:
208
209 Uoccti:: 18 directed orientations + 9 undirected orientations + 4 texture
210 DalalTriggs:: 9 undirected orientations x 4 blocks.
211 */
212 switch (self->variant) {
213 case VlHogVariantUoctti:
214 self->dimension = 3*self->numOrientations + 4 ;
215 break ;
216
217 case VlHogVariantDalalTriggs:
218 self->dimension = 4*self->numOrientations ;
219 break ;
220
221 default:
222 assert(0) ;
223 }
224
225 /*
226 A permutation specifies how to permute elements in a HOG
227 descriptor to flip it horizontally. Since the first orientation
228 of index 0 points to the right, this must be swapped with orientation
229 self->numOrientation that points to the left (for the directed case,
230 and to itself for the undirected one).
231 */
232
233 self->permutation = vl_malloc(self->dimension * sizeof(vl_index)) ;
234 switch (self->variant) {
235 case VlHogVariantUoctti:
236 for(o = 0 ; o < (signed)self->numOrientations ; ++o) {
237 vl_index op = self->numOrientations - o ;
238 self->permutation[o] = op ;
239 self->permutation[o + self->numOrientations] = (op + self->numOrientations) % (2*self->numOrientations) ;
240 self->permutation[o + 2*self->numOrientations] = (op % self->numOrientations) + 2*self->numOrientations ;
241 }
242 for (k = 0 ; k < 4 ; ++k) {
243 /* The texture features correspond to four displaced block around
244 a cell. These permute with a lr flip as for DalalTriggs. */
245 vl_index blockx = k % 2 ;
246 vl_index blocky = k / 2 ;
247 vl_index q = (1 - blockx) + blocky * 2 ;
248 self->permutation[k + self->numOrientations * 3] = q + self->numOrientations * 3 ;
249 }
250 break ;
251
252 case VlHogVariantDalalTriggs:
253 for(k = 0 ; k < 4 ; ++k) {
254 /* Find the corresponding block. Blocks are listed in order 1,2,3,4,...
255 from left to right and top to bottom */
256 vl_index blockx = k % 2 ;
257 vl_index blocky = k / 2 ;
258 vl_index q = (1 - blockx) + blocky * 2 ;
259 for(o = 0 ; o < (signed)self->numOrientations ; ++o) {
260 vl_index op = self->numOrientations - o ;
261 self->permutation[o + k*self->numOrientations] = (op % self->numOrientations) + q*self->numOrientations ;
262 }
263 }
264 break ;
265
266 default:
267 assert(0) ;
268 }
269
270 /*
271 Create glyphs for representing the HOG features/ filters. The glyphs
272 are simple bars, oriented orthogonally to the gradients to represent
273 image edges. If the object is configured to work on transposed image,
274 the glyphs images are also stored in column-major.
275 */
276 self->glyphs = vl_calloc(self->glyphSize * self->glyphSize * self->numOrientations, sizeof(float)) ;
277 #define atglyph(x,y,k) self->glyphs[(x) + self->glyphSize * (y) + self->glyphSize * self->glyphSize * (k)]
278 for (o = 0 ; o < (signed)self->numOrientations ; ++o) {
279 double angle = fmod(o * VL_PI / self->numOrientations + VL_PI/2, VL_PI) ;
280 double x2 = self->glyphSize * cos(angle) / 2 ;
281 double y2 = self->glyphSize * sin(angle) / 2 ;
282
283 if (angle <= VL_PI / 4 || angle >= VL_PI * 3 / 4) {
284 /* along horizontal direction */
285 double slope = y2 / x2 ;
286 double offset = (1 - slope) * (self->glyphSize - 1) / 2 ;
287 vl_index skip = (1 - fabs(cos(angle))) / 2 * self->glyphSize ;
288 vl_index i, j ;
289 for (i = skip ; i < (signed)self->glyphSize - skip ; ++i) {
290 j = vl_round_d(slope * i + offset) ;
291 if (! self->transposed) {
292 atglyph(i,j,o) = 1 ;
293 } else {
294 atglyph(j,i,o) = 1 ;
295 }
296 }
297 } else {
298 /* along vertical direction */
299 double slope = x2 / y2 ;
300 double offset = (1 - slope) * (self->glyphSize - 1) / 2 ;
301 vl_index skip = (1 - sin(angle)) / 2 * self->glyphSize ;
302 vl_index i, j ;
303 for (j = skip ; j < (signed)self->glyphSize - skip; ++j) {
304 i = vl_round_d(slope * j + offset) ;
305 if (! self->transposed) {
306 atglyph(i,j,o) = 1 ;
307 } else {
308 atglyph(j,i,o) = 1 ;
309 }
310 }
311 }
312 }
313 return self ;
314 }
315
316 /* ---------------------------------------------------------------- */
317 /** @brief Delete a HOG object
318 ** @param self HOG object to delete.
319 **/
320
321 void
vl_hog_delete(VlHog * self)322 vl_hog_delete (VlHog * self)
323 {
324 if (self->orientationX) {
325 vl_free(self->orientationX) ;
326 self->orientationX = NULL ;
327 }
328
329 if (self->orientationY) {
330 vl_free(self->orientationY) ;
331 self->orientationY = NULL ;
332 }
333
334 if (self->glyphs) {
335 vl_free(self->glyphs) ;
336 self->glyphs = NULL ;
337 }
338
339 if (self->permutation) {
340 vl_free(self->permutation) ;
341 self->permutation = NULL ;
342 }
343
344 if (self->hog) {
345 vl_free(self->hog) ;
346 self->hog = NULL ;
347 }
348
349 if (self->hogNorm) {
350 vl_free(self->hogNorm) ;
351 self->hogNorm = NULL ;
352 }
353
354 vl_free(self) ;
355 }
356
357
358 /* ---------------------------------------------------------------- */
359 /** @brief Get HOG glyph size
360 ** @param self HOG object.
361 ** @return size (height and width) of a glyph.
362 **/
363
364 vl_size
vl_hog_get_glyph_size(VlHog const * self)365 vl_hog_get_glyph_size (VlHog const * self)
366 {
367 return self->glyphSize ;
368 }
369
370 /* ---------------------------------------------------------------- */
371 /** @brief Get HOG left-right flip permutation
372 ** @param self HOG object.
373 ** @return left-right permutation.
374 **
375 ** The function returns a pointer to an array @c permutation of ::vl_hog_get_dimension
376 ** elements. Given a HOG descriptor (for a cell) @c hog, which is also
377 ** a vector of ::vl_hog_get_dimension elements, the
378 ** descriptor obtained for the same image flipped horizotnally is
379 ** given by <code>flippedHog[i] = hog[permutation[i]]</code>.
380 **/
381
382 vl_index const *
vl_hog_get_permutation(VlHog const * self)383 vl_hog_get_permutation (VlHog const * self)
384 {
385 return self->permutation ;
386 }
387
388 /* ---------------------------------------------------------------- */
389 /** @brief Turn bilinear interpolation of assignments on or off
390 ** @param self HOG object.
391 ** @param x @c true if orientations should be assigned with bilinear interpolation.
392 **/
393
394 void
vl_hog_set_use_bilinear_orientation_assignments(VlHog * self,vl_bool x)395 vl_hog_set_use_bilinear_orientation_assignments (VlHog * self, vl_bool x) {
396 self->useBilinearOrientationAssigment = x ;
397 }
398
399 /** @brief Tell whether assignments use bilinear interpolation or not
400 ** @param self HOG object.
401 ** @return @c true if orientations are be assigned with bilinear interpolation.
402 **/
403
404 vl_bool
vl_hog_get_use_bilinear_orientation_assignments(VlHog const * self)405 vl_hog_get_use_bilinear_orientation_assignments (VlHog const * self) {
406 return self->useBilinearOrientationAssigment ;
407 }
408
409 /* ---------------------------------------------------------------- */
410 /** @brief Render a HOG descriptor to a glyph image
411 ** @param self HOG object.
412 ** @param image glyph image (output).
413 ** @param descriptor HOG descriptor.
414 ** @param width HOG descriptor width.
415 ** @param height HOG descriptor height.
416 **
417 ** The function renders the HOG descriptor or filter
418 ** @a descriptor as an image (for visualization) and stores the result in
419 ** the buffer @a image. This buffer
420 ** must be an array of dimensions @c width*glyphSize
421 ** by @c height*glyphSize elements, where @c glyphSize is
422 ** obtained from ::vl_hog_get_glyph_size and is the size in pixels
423 ** of the image element used to represent the descriptor of one
424 ** HOG cell.
425 **/
426
427 void
vl_hog_render(VlHog const * self,float * image,float const * descriptor,vl_size width,vl_size height)428 vl_hog_render (VlHog const * self,
429 float * image,
430 float const * descriptor,
431 vl_size width,
432 vl_size height)
433 {
434 vl_index x, y, k, cx, cy ;
435 vl_size hogStride = width * height ;
436
437 assert(self) ;
438 assert(image) ;
439 assert(descriptor) ;
440 assert(width > 0) ;
441 assert(height > 0) ;
442
443 for (y = 0 ; y < (signed)height ; ++y) {
444 for (x = 0 ; x < (signed)width ; ++x) {
445 float minWeight = 0 ;
446 float maxWeight = 0 ;
447
448 for (k = 0 ; k < (signed)self->numOrientations ; ++k) {
449 float weight ;
450 float const * glyph = self->glyphs + k * (self->glyphSize*self->glyphSize) ;
451 float * glyphImage = image + self->glyphSize * x + y * width * (self->glyphSize*self->glyphSize) ;
452
453 switch (self->variant) {
454 case VlHogVariantUoctti:
455 weight =
456 descriptor[k * hogStride] +
457 descriptor[(k + self->numOrientations) * hogStride] +
458 descriptor[(k + 2 * self->numOrientations) * hogStride] ;
459 break ;
460 case VlHogVariantDalalTriggs:
461 weight =
462 descriptor[k * hogStride] +
463 descriptor[(k + self->numOrientations) * hogStride] +
464 descriptor[(k + 2 * self->numOrientations) * hogStride] +
465 descriptor[(k + 3 * self->numOrientations) * hogStride] ;
466 break ;
467 default:
468 abort() ;
469 }
470 maxWeight = VL_MAX(weight, maxWeight) ;
471 minWeight = VL_MIN(weight, minWeight);
472
473 for (cy = 0 ; cy < (signed)self->glyphSize ; ++cy) {
474 for (cx = 0 ; cx < (signed)self->glyphSize ; ++cx) {
475 *glyphImage++ += weight * (*glyph++) ;
476 }
477 glyphImage += (width - 1) * self->glyphSize ;
478 }
479 } /* next orientation */
480
481 {
482 float * glyphImage = image + self->glyphSize * x + y * width * (self->glyphSize*self->glyphSize) ;
483 for (cy = 0 ; cy < (signed)self->glyphSize ; ++cy) {
484 for (cx = 0 ; cx < (signed)self->glyphSize ; ++cx) {
485 float value = *glyphImage ;
486 *glyphImage++ = VL_MAX(minWeight, VL_MIN(maxWeight, value)) ;
487 }
488 glyphImage += (width - 1) * self->glyphSize ;
489 }
490 }
491
492 ++ descriptor ;
493 } /* next column of cells (x) */
494 } /* next row of cells (y) */
495 }
496
497 /* ---------------------------------------------------------------- */
498 /** @brief Get the dimension of the HOG features
499 ** @param self HOG object.
500 ** @return imension of a HOG cell descriptors.
501 **/
502
503 vl_size
vl_hog_get_dimension(VlHog const * self)504 vl_hog_get_dimension (VlHog const * self)
505 {
506 return self->dimension ;
507 }
508
509 /** @brief Get the width of the HOG cell array
510 ** @param self HOG object.
511 ** @return number of HOG cells in the horizontal direction.
512 **/
513
514 vl_size
vl_hog_get_width(VlHog * self)515 vl_hog_get_width (VlHog * self)
516 {
517 return self->hogWidth ;
518 }
519
520 /** @brief Get the height of the HOG cell array
521 ** @param self HOG object.
522 ** @return number of HOG cells in the vertical direction.
523 **/
524
525 vl_size
vl_hog_get_height(VlHog * self)526 vl_hog_get_height (VlHog * self)
527 {
528 return self->hogHeight ;
529 }
530
531 /* ---------------------------------------------------------------- */
532 /** @internal @brief Prepare internal buffers
533 ** @param self HOG object.
534 ** @param width image width.
535 ** @param height image height.
536 ** @param cellSize size of a HOG cell.
537 **/
538
539 static void
vl_hog_prepare_buffers(VlHog * self,vl_size width,vl_size height,vl_size cellSize)540 vl_hog_prepare_buffers (VlHog * self, vl_size width, vl_size height, vl_size cellSize)
541 {
542 vl_size hogWidth = (width + cellSize/2) / cellSize ;
543 vl_size hogHeight = (height + cellSize/2) / cellSize ;
544
545 assert(width > 3) ;
546 assert(height > 3) ;
547 assert(hogWidth > 0) ;
548 assert(hogHeight > 0) ;
549
550 if (self->hog &&
551 self->hogWidth == hogWidth &&
552 self->hogHeight == hogHeight) {
553 /* a suitable buffer is already allocated */
554 memset(self->hog, 0, sizeof(float) * hogWidth * hogHeight * self->numOrientations * 2) ;
555 memset(self->hogNorm, 0, sizeof(float) * hogWidth * hogHeight) ;
556 return ;
557 }
558
559 if (self->hog) {
560 vl_free(self->hog) ;
561 self->hog = NULL ;
562 }
563
564 if (self->hogNorm) {
565 vl_free(self->hogNorm) ;
566 self->hogNorm = NULL ;
567 }
568
569 self->hog = vl_calloc(hogWidth * hogHeight * self->numOrientations * 2, sizeof(float)) ;
570 self->hogNorm = vl_calloc(hogWidth * hogHeight, sizeof(float)) ;
571 self->hogWidth = hogWidth ;
572 self->hogHeight = hogHeight ;
573 }
574
575 /* ---------------------------------------------------------------- */
576 /** @brief Process features starting from an image
577 ** @param self HOG object.
578 ** @param image image to process.
579 ** @param width image width.
580 ** @param height image height.
581 ** @param numChannels number of image channles.
582 ** @param cellSize size of a HOG cell.
583 **
584 ** The buffer @c hog must be a three-dimensional array.
585 ** The first two dimensions are @c (width + cellSize/2)/cellSize and
586 ** @c (height + cellSize/2)/cellSize, where divisions are integer.
587 ** This is approximately @c width/cellSize and @c height/cellSize,
588 ** adjusted so that the last cell is at least half contained in the
589 ** image.
590 **
591 ** The image @c width and @c height must be not smaller than three
592 ** pixels and not smaller than @c cellSize.
593 **/
594
595 void
vl_hog_put_image(VlHog * self,float const * image,vl_size width,vl_size height,vl_size numChannels,vl_size cellSize)596 vl_hog_put_image (VlHog * self,
597 float const * image,
598 vl_size width, vl_size height, vl_size numChannels,
599 vl_size cellSize)
600 {
601 vl_size hogStride ;
602 vl_size channelStride = width * height ;
603 vl_index x, y ;
604 vl_uindex k ;
605
606 assert(self) ;
607 assert(image) ;
608
609 /* clear features */
610 vl_hog_prepare_buffers(self, width, height, cellSize) ;
611 hogStride = self->hogWidth * self->hogHeight ;
612
613 #define at(x,y,k) (self->hog[(x) + (y) * self->hogWidth + (k) * hogStride])
614
615 /* compute gradients and map the to HOG cells by bilinear interpolation */
616 for (y = 1 ; y < (signed)height - 1 ; ++y) {
617 for (x = 1 ; x < (signed)width - 1 ; ++x) {
618 float gradx = 0 ;
619 float grady = 0 ;
620 float gradNorm ;
621 float orientationWeights [2] = {-1, -1} ;
622 vl_index orientationBins [2] = {-1, -1} ;
623 vl_index orientation = 0 ;
624 float hx, hy, wx1, wx2, wy1, wy2 ;
625 vl_index binx, biny, o ;
626
627 /*
628 Compute the gradient at (x,y). The image channel with
629 the maximum gradient at each location is selected.
630 */
631 {
632 float const * iter = image + y * width + x ;
633 float gradNorm2 = 0 ;
634 for (k = 0 ; k < numChannels ; ++k) {
635 float gradx_ = *(iter + 1) - *(iter - 1) ;
636 float grady_ = *(iter + width) - *(iter - width) ;
637 float gradNorm2_ = gradx_ * gradx_ + grady_ * grady_ ;
638 if (gradNorm2_ > gradNorm2) {
639 gradx = gradx_ ;
640 grady = grady_ ;
641 gradNorm2 = gradNorm2_ ;
642 }
643 iter += channelStride ;
644 }
645 gradNorm = sqrtf(gradNorm2) ;
646 }
647
648 /*
649 Map the gradient to the closest and second closets orientation bins.
650 There are numOrientations orientation in the interval [0,pi).
651 The next numOriantations are the symmetric ones, for a total
652 of 2*numOrientation directed orientations.
653 */
654 for (k = 0 ; k < self->numOrientations ; ++k) {
655 float orientationScore_ = gradx * self->orientationX[k] + grady * self->orientationY[k] ;
656 vl_index orientationBin_ = k ;
657 if (orientationScore_ < 0) {
658 orientationScore_ = - orientationScore_ ;
659 orientationBin_ += self->numOrientations ;
660 }
661 if (orientationScore_ > orientationWeights[0]) {
662 orientationBins[1] = orientationBins[0] ;
663 orientationWeights[1] = orientationWeights[0] ;
664 orientationBins[0] = orientationBin_ ; ;
665 orientationWeights[0] = orientationScore_ ;
666 } else if (orientationScore_ > orientationWeights[1]) {
667 orientationBins[1] = orientationBin_ ;
668 orientationWeights[1] = orientationScore_ ;
669 }
670 }
671
672 if (self->useBilinearOrientationAssigment) {
673 /* min(1.0,...) guards against small overflows causing NaNs */
674 float angle0 = acosf(VL_MIN(orientationWeights[0] / VL_MAX(gradNorm, 1e-10),1.0)) ;
675 orientationWeights[1] = angle0 / (VL_PI / self->numOrientations) ;
676 orientationWeights[0] = 1 - orientationWeights[1] ;
677 } else {
678 orientationWeights[0] = 1 ;
679 orientationBins[1] = -1 ;
680 }
681
682 for (o = 0 ; o < 2 ; ++o) {
683 float ow ;
684 /*
685 Accumulate the gradient. hx is the distance of the
686 pixel x to the cell center at its left, in units of cellSize.
687 With this parametrixation, a pixel on the cell center
688 has hx = 0, which gradually increases to 1 moving to the next
689 center.
690 */
691
692 orientation = orientationBins[o] ;
693 if (orientation < 0) continue ;
694
695 /* (x - (w-1)/2) / w = (x + 0.5)/w - 0.5 */
696 hx = (x + 0.5) / cellSize - 0.5 ;
697 hy = (y + 0.5) / cellSize - 0.5 ;
698 binx = vl_floor_f(hx) ;
699 biny = vl_floor_f(hy) ;
700 wx2 = hx - binx ;
701 wy2 = hy - biny ;
702 wx1 = 1.0 - wx2 ;
703 wy1 = 1.0 - wy2 ;
704
705 ow = orientationWeights[o] ;
706
707 /*VL_PRINTF("%d %d - %d %d %f %f - %f %f %f %f - %d \n ",x,y,binx,biny,hx,hy,wx1,wx2,wy1,wy2,o);*/
708
709 if (binx >= 0 && biny >=0) {
710 at(binx,biny,orientation) += gradNorm * ow * wx1 * wy1 ;
711 }
712 if (binx < (signed)self->hogWidth - 1 && biny >=0) {
713 at(binx+1,biny,orientation) += gradNorm * ow * wx2 * wy1 ;
714 }
715 if (binx < (signed)self->hogWidth - 1 && biny < (signed)self->hogHeight - 1) {
716 at(binx+1,biny+1,orientation) += gradNorm * ow * wx2 * wy2 ;
717 }
718 if (binx >= 0 && biny < (signed)self->hogHeight - 1) {
719 at(binx,biny+1,orientation) += gradNorm * ow * wx1 * wy2 ;
720 }
721 } /* next o */
722 } /* next x */
723 } /* next y */
724 }
725
726 /* ---------------------------------------------------------------- */
727 /** @brief Process features starting from a field in polar notation
728 ** @param self HOG object.
729 ** @param modulus image gradient modulus.
730 ** @param angle image gradient angle.
731 ** @param directed wrap the gradient angles at 2pi (directed) or pi (undirected).
732 ** @param width image width.
733 ** @param height image height.
734 ** @param cellSize size of a HOG cell.
735 **
736 ** The function behaves like ::vl_hog_put_image, but foregoes the internal
737 ** computation of the gradient field, allowing the user to specify
738 ** their own. Angles are measure clockwise, the y axis pointing downwards,
739 ** starting from the x axis (pointing to the right).
740 **/
741
vl_hog_put_polar_field(VlHog * self,float const * modulus,float const * angle,vl_bool directed,vl_size width,vl_size height,vl_size cellSize)742 void vl_hog_put_polar_field (VlHog * self,
743 float const * modulus,
744 float const * angle,
745 vl_bool directed,
746 vl_size width, vl_size height,
747 vl_size cellSize)
748 {
749 vl_size hogStride ;
750 vl_index x, y, o ;
751 vl_index period = self->numOrientations * (directed ? 2 : 1) ;
752 double angleStep = VL_PI / self->numOrientations ;
753
754 assert(self) ;
755 assert(modulus) ;
756 assert(angle) ;
757
758 /* clear features */
759 vl_hog_prepare_buffers(self, width, height, cellSize) ;
760 hogStride = self->hogWidth * self->hogHeight ;
761
762 #define at(x,y,k) (self->hog[(x) + (y) * self->hogWidth + (k) * hogStride])
763 #define atNorm(x,y) (self->hogNorm[(x) + (y) * self->hogWidth])
764
765 /* fill HOG cells from gradient field */
766 for (y = 0 ; y < (signed)height ; ++y) {
767 for (x = 0 ; x < (signed)width ; ++x) {
768 float ho, hx, hy, wo1, wo2, wx1, wx2, wy1, wy2 ;
769 vl_index bino, binx, biny ;
770 float orientationWeights [2] = {0,0} ;
771 vl_index orientationBins [2] = {-1,-1} ;
772 vl_index orientation = 0 ;
773 float thisAngle = *angle++ ;
774 float thisModulus = *modulus++ ;
775
776 if (thisModulus <= 0.0f) continue ;
777
778 /* (x - (w-1)/2) / w = (x + 0.5)/w - 0.5 */
779
780 ho = (float)thisAngle / angleStep ;
781 bino = vl_floor_f(ho) ;
782 wo2 = ho - bino ;
783 wo1 = 1.0f - wo2 ;
784
785 while (bino < 0) { bino += self->numOrientations * 2 ; }
786
787 if (self->useBilinearOrientationAssigment) {
788 orientationBins[0] = bino % period ;
789 orientationBins[1] = (bino + 1) % period ;
790 orientationWeights[0] = wo1 ;
791 orientationWeights[1] = wo2 ;
792 } else {
793 orientationBins[0] = (bino + ((wo1 > wo2) ? 0 : 1)) % period ;
794 orientationWeights[0] = 1 ;
795 orientationBins[1] = -1 ;
796 }
797
798 for (o = 0 ; o < 2 ; ++o) {
799 /*
800 Accumulate the gradient. hx is the distance of the
801 pixel x to the cell center at its left, in units of cellSize.
802 With this parametrixation, a pixel on the cell center
803 has hx = 0, which gradually increases to 1 moving to the next
804 center.
805 */
806
807 orientation = orientationBins[o] ;
808 if (orientation < 0) continue ;
809
810 hx = (x + 0.5) / cellSize - 0.5 ;
811 hy = (y + 0.5) / cellSize - 0.5 ;
812 binx = vl_floor_f(hx) ;
813 biny = vl_floor_f(hy) ;
814 wx2 = hx - binx ;
815 wy2 = hy - biny ;
816 wx1 = 1.0 - wx2 ;
817 wy1 = 1.0 - wy2 ;
818
819 wx1 *= orientationWeights[o] ;
820 wx2 *= orientationWeights[o] ;
821 wy1 *= orientationWeights[o] ;
822 wy2 *= orientationWeights[o] ;
823
824 /*VL_PRINTF("%d %d - %d %d %f %f - %f %f %f %f - %d \n ",x,y,binx,biny,hx,hy,wx1,wx2,wy1,wy2,o);*/
825
826 if (binx >= 0 && biny >=0) {
827 at(binx,biny,orientation) += thisModulus * wx1 * wy1 ;
828 }
829 if (binx < (signed)self->hogWidth - 1 && biny >=0) {
830 at(binx+1,biny,orientation) += thisModulus * wx2 * wy1 ;
831 }
832 if (binx < (signed)self->hogWidth - 1 && biny < (signed)self->hogHeight - 1) {
833 at(binx+1,biny+1,orientation) += thisModulus * wx2 * wy2 ;
834 }
835 if (binx >= 0 && biny < (signed)self->hogHeight - 1) {
836 at(binx,biny+1,orientation) += thisModulus * wx1 * wy2 ;
837 }
838 } /* next o */
839 } /* next x */
840 } /* next y */
841 }
842
843 /* ---------------------------------------------------------------- */
844 /** @brief Extract HOG features
845 ** @param self HOG object.
846 ** @param features HOG features (output).
847 **
848 ** This method is called after ::vl_hog_put_image or ::vl_hog_put_polar_field
849 ** in order to retrieve the computed HOG features. The buffer @c features must have the dimensions returned by
850 ** ::vl_hog_get_width, ::vl_hog_get_height, and ::vl_hog_get_dimension.
851 **/
852
853 void
vl_hog_extract(VlHog * self,float * features)854 vl_hog_extract (VlHog * self, float * features)
855 {
856 vl_index x, y ;
857 vl_uindex k ;
858 vl_size hogStride = self->hogWidth * self->hogHeight ;
859
860 assert(features) ;
861
862 #define at(x,y,k) (self->hog[(x) + (y) * self->hogWidth + (k) * hogStride])
863 #define atNorm(x,y) (self->hogNorm[(x) + (y) * self->hogWidth])
864
865 /*
866 Compute the squared L2 norm of the unoriented version of each HOG
867 cell histogram. The unoriented version is obtained by folding
868 the 2*numOrientations compotnent into numOrientations only.
869 */
870 {
871 float const * iter = self->hog ;
872 for (k = 0 ; k < self->numOrientations ; ++k) {
873 float * niter = self->hogNorm ;
874 float * niterEnd = self->hogNorm + self->hogWidth * self->hogHeight ;
875 vl_size stride = self->hogWidth*self->hogHeight*self->numOrientations ;
876 while (niter != niterEnd) {
877 float h1 = *iter ;
878 float h2 = *(iter + stride) ;
879 float h = h1 + h2 ;
880 *niter += h * h ;
881 niter++ ;
882 iter++ ;
883 }
884 }
885 }
886
887 /*
888 HOG block-normalisation.
889
890 The Dalal-Triggs implementation computes a normalized descriptor for
891 each block of 2x2 cells, by stacking the histograms of each cell
892 into a vector and L2-normalizing and truncating the result.
893
894 Each block-level descriptor is then decomposed back into cells
895 and corresponding parts are stacked into cell-level descritpors.
896 Each HOG cell is contained in exactly
897 four 2x2 cell blocks. For example, the cell number 5 in the following
898 figure is contained in blocks 1245, 2356, 4578, 5689:
899
900 +---+---+---+
901 | 1 | 2 | 3 |
902 +---+---+---+
903 | 4 | 5 | 6 |
904 +---+---+---+
905 | 7 | 8 | 9 |
906 +---+---+---+
907
908 Hence, when block-level descriptors are decomposed back
909 into cells, each cell receives contributions from four blocks. So,
910 if each cell started with a D-dimensional histogram, it
911 ends up with a 4D dimesional descriptor vector.
912
913 Note however that this is just a convenient way of rewriting the
914 blocks as per-cell contributions, but the block information
915 is unchanged. In particular, barring boundary effects,
916 in an array of H x W cells there are approximately HW blocks;
917 hence the L2 norm of all the blocks stacked is approximately HW
918 (because individual blocks are L2-normalized). Since this does
919 not change in the final HOG descriptor,
920 the L2 norm of the HOG descriptor of an image should be approximately
921 the same as the area of the image divided by the
922 area of a HOG cell. This can be used as a sanity check.
923
924 The UoCTTI variant differs in some non-negligible ways. First,
925 it includes both oriented and unoriented histograms, as well
926 as four components capturing texture. Second, and most importantly,
927 it merges the four chunks of block-level descirptors landing in
928 each cell into one by taking their average. This makes sense
929 because, ultimately, these four sub-descriptors are identical
930 to the original cell histogram, just with four different normalisations
931 applied.
932 */
933 {
934 float const * iter = self->hog ;
935 for (y = 0 ; y < (signed)self->hogHeight ; ++y) {
936 for (x = 0 ; x < (signed)self->hogWidth ; ++x) {
937
938 /* norm of upper-left, upper-right, ... cells */
939 vl_index xm = VL_MAX(x - 1, 0) ;
940 vl_index xp = VL_MIN(x + 1, (signed)self->hogWidth - 1) ;
941 vl_index ym = VL_MAX(y - 1, 0) ;
942 vl_index yp = VL_MIN(y + 1, (signed)self->hogHeight - 1) ;
943
944 double norm1 = atNorm(xm,ym) ;
945 double norm2 = atNorm(x,ym) ;
946 double norm3 = atNorm(xp,ym) ;
947 double norm4 = atNorm(xm,y) ;
948 double norm5 = atNorm(x,y) ;
949 double norm6 = atNorm(xp,y) ;
950 double norm7 = atNorm(xm,yp) ;
951 double norm8 = atNorm(x,yp) ;
952 double norm9 = atNorm(xp,yp) ;
953
954 double factor1, factor2, factor3, factor4 ;
955
956 double t1 = 0 ;
957 double t2 = 0 ;
958 double t3 = 0 ;
959 double t4 = 0 ;
960
961 float * oiter = features + x + self->hogWidth * y ;
962
963 /* each factor is the inverse of the l2 norm of one of the 2x2 blocks surrounding
964 cell x,y */
965 #if 0
966 if (self->transposed) {
967 /* if the image is transposed, y and x are swapped */
968 factor1 = 1.0 / VL_MAX(sqrt(norm1 + norm2 + norm4 + norm5), 1e-10) ;
969 factor3 = 1.0 / VL_MAX(sqrt(norm2 + norm3 + norm5 + norm6), 1e-10) ;
970 factor2 = 1.0 / VL_MAX(sqrt(norm4 + norm5 + norm7 + norm8), 1e-10) ;
971 factor4 = 1.0 / VL_MAX(sqrt(norm5 + norm6 + norm8 + norm9), 1e-10) ;
972 } else {
973 factor1 = 1.0 / VL_MAX(sqrt(norm1 + norm2 + norm4 + norm5), 1e-10) ;
974 factor2 = 1.0 / VL_MAX(sqrt(norm2 + norm3 + norm5 + norm6), 1e-10) ;
975 factor3 = 1.0 / VL_MAX(sqrt(norm4 + norm5 + norm7 + norm8), 1e-10) ;
976 factor4 = 1.0 / VL_MAX(sqrt(norm5 + norm6 + norm8 + norm9), 1e-10) ;
977 }
978 #else
979 /* as implemented in UOCTTI code */
980 if (self->transposed) {
981 /* if the image is transposed, y and x are swapped */
982 factor1 = 1.0 / sqrt(norm1 + norm2 + norm4 + norm5 + 1e-4) ;
983 factor3 = 1.0 / sqrt(norm2 + norm3 + norm5 + norm6 + 1e-4) ;
984 factor2 = 1.0 / sqrt(norm4 + norm5 + norm7 + norm8 + 1e-4) ;
985 factor4 = 1.0 / sqrt(norm5 + norm6 + norm8 + norm9 + 1e-4) ;
986 } else {
987 factor1 = 1.0 / sqrt(norm1 + norm2 + norm4 + norm5 + 1e-4) ;
988 factor2 = 1.0 / sqrt(norm2 + norm3 + norm5 + norm6 + 1e-4) ;
989 factor3 = 1.0 / sqrt(norm4 + norm5 + norm7 + norm8 + 1e-4) ;
990 factor4 = 1.0 / sqrt(norm5 + norm6 + norm8 + norm9 + 1e-4) ;
991 }
992 #endif
993
994 for (k = 0 ; k < self->numOrientations ; ++k) {
995 double ha = iter[hogStride * k] ;
996 double hb = iter[hogStride * (k + self->numOrientations)] ;
997 double hc ;
998
999 double ha1 = factor1 * ha ;
1000 double ha2 = factor2 * ha ;
1001 double ha3 = factor3 * ha ;
1002 double ha4 = factor4 * ha ;
1003
1004 double hb1 = factor1 * hb ;
1005 double hb2 = factor2 * hb ;
1006 double hb3 = factor3 * hb ;
1007 double hb4 = factor4 * hb ;
1008
1009 double hc1 = ha1 + hb1 ;
1010 double hc2 = ha2 + hb2 ;
1011 double hc3 = ha3 + hb3 ;
1012 double hc4 = ha4 + hb4 ;
1013
1014 ha1 = VL_MIN(0.2, ha1) ;
1015 ha2 = VL_MIN(0.2, ha2) ;
1016 ha3 = VL_MIN(0.2, ha3) ;
1017 ha4 = VL_MIN(0.2, ha4) ;
1018
1019 hb1 = VL_MIN(0.2, hb1) ;
1020 hb2 = VL_MIN(0.2, hb2) ;
1021 hb3 = VL_MIN(0.2, hb3) ;
1022 hb4 = VL_MIN(0.2, hb4) ;
1023
1024 hc1 = VL_MIN(0.2, hc1) ;
1025 hc2 = VL_MIN(0.2, hc2) ;
1026 hc3 = VL_MIN(0.2, hc3) ;
1027 hc4 = VL_MIN(0.2, hc4) ;
1028
1029 t1 += hc1 ;
1030 t2 += hc2 ;
1031 t3 += hc3 ;
1032 t4 += hc4 ;
1033
1034 switch (self->variant) {
1035 case VlHogVariantUoctti :
1036 ha = 0.5 * (ha1 + ha2 + ha3 + ha4) ;
1037 hb = 0.5 * (hb1 + hb2 + hb3 + hb4) ;
1038 hc = 0.5 * (hc1 + hc2 + hc3 + hc4) ;
1039 *oiter = ha ;
1040 *(oiter + hogStride * self->numOrientations) = hb ;
1041 *(oiter + 2 * hogStride * self->numOrientations) = hc ;
1042 break ;
1043
1044 case VlHogVariantDalalTriggs :
1045 *oiter = hc1 ;
1046 *(oiter + hogStride * self->numOrientations) = hc2 ;
1047 *(oiter + 2 * hogStride * self->numOrientations) = hc3 ;
1048 *(oiter + 3 * hogStride * self->numOrientations) = hc4 ;
1049 break ;
1050 }
1051 oiter += hogStride ;
1052
1053 } /* next orientation */
1054
1055 switch (self->variant) {
1056 case VlHogVariantUoctti :
1057 oiter += 2 * hogStride * self->numOrientations ;
1058 *oiter = (1.0f/sqrtf(18.0f)) * t1 ; oiter += hogStride ;
1059 *oiter = (1.0f/sqrtf(18.0f)) * t2 ; oiter += hogStride ;
1060 *oiter = (1.0f/sqrtf(18.0f)) * t3 ; oiter += hogStride ;
1061 *oiter = (1.0f/sqrtf(18.0f)) * t4 ; oiter += hogStride ;
1062 break ;
1063
1064 case VlHogVariantDalalTriggs :
1065 break ;
1066 }
1067 ++iter ;
1068 } /* next x */
1069 } /* next y */
1070 } /* block normalization */
1071 }
1072
1073