1 /** @file hog.c
2  ** @brief Histogram of Oriented Gradients (HOG) - Definition
3  ** @author Andrea Vedaldi
4  **/
5 
6 /*
7  Copyright (C) 2007-12 Andrea Vedaldi and Brian Fulkerson.
8  All rights reserved.
9 
10  This file is part of the VLFeat library and is made available under
11  the terms of the BSD license (see the COPYING file).
12 */
13 
14 #include "hog.h"
15 #include "mathop.h"
16 #include <string.h>
17 
18 /**
19 
20 <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~  -->
21 @page hog Histogram of Oriented Gradients (HOG) features
22 @author Andrea Vedaldi
23 <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~  -->
24 
25 @ref hog.h implements the Histogram of Oriented Gradients (HOG) features
26 in the variants of Dalal Triggs @cite{dalal05histograms} and of UOCTTI
27 @cite{felzenszwalb09object}. Applications include object detection
28 and deformable object detection.
29 
30 - @ref hog-overview
31 - @ref hog-tech
32 
33 <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~  -->
34 @section hog-overview Overview
35 <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~  -->
36 
37 HOG is a standard image feature used, among others, in object detection
38 and deformable object detection. It decomposes the image into square cells
39 of a given size (typically eight pixels), compute a histogram of oriented
40 gradient in each cell (similar to @ref sift), and then renormalizes
41 the cells by looking into adjacent blocks.
42 
43 VLFeat implements two HOG variants: the original one of Dalal-Triggs
44 @cite{dalal05histograms} and the one proposed in Felzenszwalb et al.
45 @cite{felzenszwalb09object}.
46 
47 In order to use HOG, start by creating a new HOG object, set the desired
48 parameters, pass a (color or grayscale) image, and read off the results.
49 
50 @code
51 VlHog * hog = vl_hog_new(VlHogVariantDalalTriggs, numOrientations, VL_FALSE) ;
52 vl_hog_put_image(hog, image, height, width, numChannels, cellSize) ;
53 hogWidth = vl_hog_get_width(hog) ;
54 hogHeight = vl_hog_get_height(hog) ;
55 hogDimenison = vl_hog_get_dimension(hog) ;
56 hogArray = vl_malloc(hogWidth*hogHeight*hogDimension*sizeof(float)) ;
57 vl_hog_extract(hog, hogArray) ;
58 vl_hog_delete(hog) ;
59 @endcode
60 
61 HOG is a feature array of the dimension returned by ::vl_hog_get_width,
62 ::vl_hog_get_height, with each feature (histogram) having
63 dimension ::vl_hog_get_dimension. The array is stored in row major order,
64 with the slowest varying dimension beying the dimension indexing the histogram
65 elements.
66 
67 The number of entreis in the histogram as well as their meaning depends
68 on the HOG variant and is detailed later. However, it is usually
69 unnecessary to know such details. @ref hog.h provides support for
70 creating an inconic representation of a HOG feature array:
71 
72 @code
73 glyphSize = vl_hog_get_glyph_size(hog) ;
74 imageHeight = glyphSize * hogArrayHeight ;
75 imageWidth = glyphSize * hogArrayWidth ;
76 image = vl_malloc(sizeof(float)*imageWidth*imageHeight) ;
77 vl_hog_render(hog, image, hogArray) ;
78 @endcode
79 
80 It is often convenient to mirror HOG features from left to right. This
81 can be obtained by mirroring an array of HOG cells, but the content
82 of each cell must also be rearranged. This can be done by
83 the permutation obtaiend by ::vl_hog_get_permutation.
84 
85 Furthermore, @ref hog.h suppots computing HOG features not from
86 images but from vector fields.
87 
88 <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~  -->
89 @section hog-tech Technical details
90 <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~  -->
91 
92 HOG divdes the input image into square cells of size @c cellSize,
93 fitting as many cells as possible, filling the image domain from
94 the upper-left corner down to the right one. For each row and column,
95 the last cell is at least half contained in the image.
96 More precisely, the number of cells obtained in this manner is:
97 
98 @code
99 hogWidth = (width + cellSize/2) / cellSize ;
100 hogHeight = (height + cellSize/2) / cellSize ;
101 @endcode
102 
103 Then the image gradient @f$ \nabla \ell(x,y) @f$
104 is computed by using central difference (for colour image
105 the channel with the largest gradient at that pixel is used).
106 The gradient @f$ \nabla \ell(x,y) @f$ is assigned to one of @c 2*numOrientations orientation in the
107 range @f$ [0,2\pi) @f$ (see @ref hog-conventions for details).
108 Contributions are then accumulated by using bilinear interpolation
109 to four neigbhour cells, as in @ref sift.
110 This results in an histogram  @f$h_d@f$ of dimension
111 2*numOrientations, called of @e directed orientations
112 since it accounts for the direction as well as the orientation
113 of the gradient. A second histogram @f$h_u@f$ of undirected orientations
114 of half the size is obtained by folding @f$ h_d @f$ into two.
115 
116 Let a block of cell be a @f$ 2\times 2 @f$ sub-array of cells.
117 Let the norm of a block be the @f$ l^2 @f$ norm of the stacking of the
118 respective unoriented histogram. Given a HOG cell, four normalisation
119 factors are then obtained as the inverse of the norm of the four
120 blocks that contain the cell.
121 
122 For the Dalal-Triggs variant, each histogram @f$ h_d @f$ is copied
123 four times, normalised using the four different normalisation factors,
124 the four vectors are stacked, saturated at 0.2, and finally stored as the descriptor
125 of the cell. This results in a @c numOrientations * 4 dimensional
126 cell descriptor. Blocks are visited from left to right and top to bottom
127 when forming the final descriptor.
128 
129 For the UOCCTI descriptor, the same is done for both the undirected
130 as well as the directed orientation histograms. This would yield
131 a dimension of @c 4*(2+1)*numOrientations elements, but the resulting
132 vector is projected down to @c (2+1)*numOrientations elements
133 by averaging corresponding histogram dimensions. This was shown to
134 be an algebraic approximation of PCA for descriptors computed on natural
135 images.
136 
137 In addition, for the UOCTTI variant the l1 norm of each of the
138 four l2 normalised undirected histograms is computed and stored
139 as additional four dimensions, for a total of
140 @c 4+3*numOrientations dimensions.
141 
142 <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~  -->
143 @subsection hog-conventions Conventions
144 <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~  -->
145 
146 The orientation of a gradient is expressed as the angle it forms with the
147 horizontal axis of the image. Angles are measured clock-wise (as the vertical
148 image axis points downards), and the null angle corresponds to
149 an horizontal vector pointing right. The quantized directed
150 orientations are @f$ \mathrm{k} \pi / \mathrm{numOrientations} @f$, where
151 @c k is an index that varies in the ingeger
152 range @f$ \{0, \dots, 2\mathrm{numOrientations} - 1\} @f$.
153 
154 Note that the orientations capture the orientation of the gradeint;
155 image edges would be oriented at 90 degrees from these.
156 
157 **/
158 
159 /* ---------------------------------------------------------------- */
160 /** @brief Create a new HOG object
161  ** @param variant HOG descriptor variant.
162  ** @param numOrientations number of distinguished orientations.
163  ** @param transposed wether images are transposed (column major).
164  ** @return the new HOG object.
165  **
166  ** The function creates a new HOG object to extract descriptors of
167  ** the prescribed @c variant. The angular resolution is set by
168  ** @a numOrientations, which specifies the number of <em>undirected</em>
169  ** orientations. The object can work with column major images
170  ** by setting @a transposed to true.
171  **/
172 
173 VlHog *
vl_hog_new(VlHogVariant variant,vl_size numOrientations,vl_bool transposed)174 vl_hog_new (VlHogVariant variant, vl_size numOrientations, vl_bool transposed)
175 {
176   vl_index o, k ;
177   VlHog * self = vl_calloc(1, sizeof(VlHog)) ;
178 
179   assert(numOrientations >= 1) ;
180 
181   self->variant = variant ;
182   self->numOrientations = numOrientations ;
183   self->glyphSize = 21 ;
184   self->transposed = transposed ;
185   self->useBilinearOrientationAssigment = VL_FALSE ;
186   self->orientationX = vl_malloc(sizeof(float) * self->numOrientations) ;
187   self->orientationY = vl_malloc(sizeof(float) * self->numOrientations) ;
188 
189   /*
190    Create a vector along the center of each orientation bin. These
191    are used to map gradients to bins. If the image is transposed,
192    then this can be adjusted here by swapping X and Y in these
193    vectors.
194    */
195   for(o = 0 ; o < (signed)self->numOrientations ; ++o) {
196     double angle = o * VL_PI / self->numOrientations ;
197     if (!self->transposed) {
198       self->orientationX[o] = (float) cos(angle) ;
199       self->orientationY[o] = (float) sin(angle) ;
200     } else {
201       self->orientationX[o] = (float) sin(angle) ;
202       self->orientationY[o] = (float) cos(angle) ;
203     }
204   }
205 
206   /*
207    If the number of orientation is equal to 9, one gets:
208 
209    Uoccti:: 18 directed orientations + 9 undirected orientations + 4 texture
210    DalalTriggs:: 9 undirected orientations x 4 blocks.
211    */
212   switch (self->variant) {
213     case VlHogVariantUoctti:
214       self->dimension = 3*self->numOrientations + 4 ;
215       break ;
216 
217     case VlHogVariantDalalTriggs:
218       self->dimension = 4*self->numOrientations ;
219       break ;
220 
221     default:
222       assert(0) ;
223   }
224 
225   /*
226    A permutation specifies how to permute elements in a HOG
227    descriptor to flip it horizontally. Since the first orientation
228    of index 0 points to the right, this must be swapped with orientation
229    self->numOrientation that points to the left (for the directed case,
230    and to itself for the undirected one).
231    */
232 
233   self->permutation = vl_malloc(self->dimension * sizeof(vl_index)) ;
234   switch (self->variant) {
235     case VlHogVariantUoctti:
236       for(o = 0 ; o < (signed)self->numOrientations ; ++o) {
237         vl_index op = self->numOrientations - o ;
238         self->permutation[o] = op ;
239         self->permutation[o + self->numOrientations] = (op + self->numOrientations) % (2*self->numOrientations) ;
240         self->permutation[o + 2*self->numOrientations] = (op % self->numOrientations) + 2*self->numOrientations ;
241       }
242       for (k = 0 ; k < 4 ; ++k) {
243         /* The texture features correspond to four displaced block around
244          a cell. These permute with a lr flip as for DalalTriggs. */
245         vl_index blockx = k % 2 ;
246         vl_index blocky = k / 2 ;
247         vl_index q = (1 - blockx) + blocky * 2 ;
248         self->permutation[k + self->numOrientations * 3] = q + self->numOrientations * 3 ;
249       }
250       break ;
251 
252     case VlHogVariantDalalTriggs:
253       for(k = 0 ; k < 4 ; ++k) {
254         /* Find the corresponding block. Blocks are listed in order 1,2,3,4,...
255            from left to right and top to bottom */
256         vl_index blockx = k % 2 ;
257         vl_index blocky = k / 2 ;
258         vl_index q = (1 - blockx) + blocky * 2 ;
259         for(o = 0 ; o < (signed)self->numOrientations ; ++o) {
260           vl_index op = self->numOrientations - o ;
261           self->permutation[o + k*self->numOrientations] = (op % self->numOrientations) + q*self->numOrientations ;
262         }
263       }
264       break ;
265 
266     default:
267       assert(0) ;
268   }
269 
270   /*
271    Create glyphs for representing the HOG features/ filters. The glyphs
272    are simple bars, oriented orthogonally to the gradients to represent
273    image edges. If the object is configured to work on transposed image,
274    the glyphs images are also stored in column-major.
275    */
276   self->glyphs = vl_calloc(self->glyphSize * self->glyphSize * self->numOrientations, sizeof(float)) ;
277 #define atglyph(x,y,k) self->glyphs[(x) + self->glyphSize * (y) + self->glyphSize * self->glyphSize * (k)]
278   for (o = 0 ; o < (signed)self->numOrientations ; ++o) {
279     double angle = fmod(o * VL_PI / self->numOrientations + VL_PI/2, VL_PI) ;
280     double x2 = self->glyphSize * cos(angle) / 2 ;
281     double y2 = self->glyphSize * sin(angle) / 2 ;
282 
283     if (angle <= VL_PI / 4 || angle >= VL_PI * 3 / 4) {
284       /* along horizontal direction */
285       double slope = y2 / x2 ;
286       double offset = (1 - slope) * (self->glyphSize - 1) / 2 ;
287       vl_index skip = (1 - fabs(cos(angle))) / 2 * self->glyphSize ;
288       vl_index i, j ;
289       for (i = skip ; i < (signed)self->glyphSize - skip ; ++i) {
290         j = vl_round_d(slope * i + offset) ;
291         if (! self->transposed) {
292           atglyph(i,j,o) = 1 ;
293         } else {
294           atglyph(j,i,o) = 1 ;
295         }
296       }
297     } else {
298       /* along vertical direction */
299       double slope = x2 / y2 ;
300       double offset = (1 - slope) * (self->glyphSize - 1) / 2 ;
301       vl_index skip = (1 - sin(angle)) / 2 * self->glyphSize ;
302       vl_index i, j ;
303       for (j = skip ; j < (signed)self->glyphSize - skip; ++j) {
304         i = vl_round_d(slope * j + offset) ;
305         if (! self->transposed) {
306           atglyph(i,j,o) = 1 ;
307         } else {
308           atglyph(j,i,o) = 1 ;
309         }
310       }
311     }
312   }
313   return self ;
314 }
315 
316 /* ---------------------------------------------------------------- */
317 /** @brief Delete a HOG object
318  ** @param self HOG object to delete.
319  **/
320 
321 void
vl_hog_delete(VlHog * self)322 vl_hog_delete (VlHog * self)
323 {
324   if (self->orientationX) {
325     vl_free(self->orientationX) ;
326     self->orientationX = NULL ;
327   }
328 
329   if (self->orientationY) {
330     vl_free(self->orientationY) ;
331     self->orientationY = NULL ;
332   }
333 
334   if (self->glyphs) {
335     vl_free(self->glyphs) ;
336     self->glyphs = NULL ;
337   }
338 
339   if (self->permutation) {
340     vl_free(self->permutation) ;
341     self->permutation = NULL ;
342   }
343 
344   if (self->hog) {
345     vl_free(self->hog) ;
346     self->hog = NULL ;
347   }
348 
349   if (self->hogNorm) {
350     vl_free(self->hogNorm) ;
351     self->hogNorm = NULL ;
352   }
353 
354   vl_free(self) ;
355 }
356 
357 
358 /* ---------------------------------------------------------------- */
359 /** @brief Get HOG glyph size
360  ** @param self HOG object.
361  ** @return size (height and width) of a glyph.
362  **/
363 
364 vl_size
vl_hog_get_glyph_size(VlHog const * self)365 vl_hog_get_glyph_size (VlHog const * self)
366 {
367   return self->glyphSize ;
368 }
369 
370 /* ---------------------------------------------------------------- */
371 /** @brief Get HOG left-right flip permutation
372  ** @param self HOG object.
373  ** @return left-right permutation.
374  **
375  ** The function returns a pointer to an array @c permutation of ::vl_hog_get_dimension
376  ** elements. Given a HOG descriptor (for a cell) @c hog, which is also
377  ** a vector of ::vl_hog_get_dimension elements, the
378  ** descriptor obtained for the same image flipped horizotnally is
379  ** given by <code>flippedHog[i] = hog[permutation[i]]</code>.
380  **/
381 
382 vl_index const *
vl_hog_get_permutation(VlHog const * self)383 vl_hog_get_permutation (VlHog const * self)
384 {
385   return self->permutation ;
386 }
387 
388 /* ---------------------------------------------------------------- */
389 /** @brief Turn bilinear interpolation of assignments on or off
390  ** @param self HOG object.
391  ** @param x @c true if orientations should be assigned with bilinear interpolation.
392  **/
393 
394 void
vl_hog_set_use_bilinear_orientation_assignments(VlHog * self,vl_bool x)395 vl_hog_set_use_bilinear_orientation_assignments (VlHog * self, vl_bool x) {
396   self->useBilinearOrientationAssigment = x ;
397 }
398 
399 /** @brief Tell whether assignments use bilinear interpolation or not
400  ** @param self HOG object.
401  ** @return @c true if orientations are be assigned with bilinear interpolation.
402  **/
403 
404 vl_bool
vl_hog_get_use_bilinear_orientation_assignments(VlHog const * self)405 vl_hog_get_use_bilinear_orientation_assignments (VlHog const * self) {
406   return self->useBilinearOrientationAssigment ;
407 }
408 
409 /* ---------------------------------------------------------------- */
410 /** @brief Render a HOG descriptor to a glyph image
411  ** @param self HOG object.
412  ** @param image glyph image (output).
413  ** @param descriptor HOG descriptor.
414  ** @param width HOG descriptor width.
415  ** @param height HOG descriptor height.
416  **
417  ** The function renders the HOG descriptor or filter
418  ** @a descriptor as an image (for visualization) and stores the result in
419  ** the buffer @a image. This buffer
420  ** must be an array of dimensions @c width*glyphSize
421  ** by @c height*glyphSize elements, where @c glyphSize is
422  ** obtained from ::vl_hog_get_glyph_size and is the size in pixels
423  ** of the image element used to represent the descriptor of one
424  ** HOG cell.
425  **/
426 
427 void
vl_hog_render(VlHog const * self,float * image,float const * descriptor,vl_size width,vl_size height)428 vl_hog_render (VlHog const * self,
429                float * image,
430                float const * descriptor,
431                vl_size width,
432                vl_size height)
433 {
434   vl_index x, y, k, cx, cy ;
435   vl_size hogStride = width * height ;
436 
437   assert(self) ;
438   assert(image) ;
439   assert(descriptor) ;
440   assert(width > 0) ;
441   assert(height > 0) ;
442 
443   for (y = 0 ; y < (signed)height ; ++y) {
444     for (x = 0 ; x < (signed)width ; ++x) {
445       float minWeight = 0 ;
446       float maxWeight = 0 ;
447 
448       for (k = 0 ; k < (signed)self->numOrientations ; ++k) {
449         float weight ;
450         float const * glyph = self->glyphs + k * (self->glyphSize*self->glyphSize) ;
451         float * glyphImage = image + self->glyphSize * x + y * width * (self->glyphSize*self->glyphSize) ;
452 
453         switch (self->variant) {
454           case VlHogVariantUoctti:
455             weight =
456             descriptor[k * hogStride] +
457             descriptor[(k + self->numOrientations) * hogStride] +
458             descriptor[(k + 2 * self->numOrientations) * hogStride] ;
459             break ;
460           case VlHogVariantDalalTriggs:
461             weight =
462             descriptor[k * hogStride] +
463             descriptor[(k + self->numOrientations) * hogStride] +
464             descriptor[(k + 2 * self->numOrientations) * hogStride] +
465             descriptor[(k + 3 * self->numOrientations) * hogStride] ;
466             break ;
467           default:
468             abort() ;
469         }
470         maxWeight = VL_MAX(weight, maxWeight) ;
471         minWeight = VL_MIN(weight, minWeight);
472 
473         for (cy = 0 ; cy < (signed)self->glyphSize ; ++cy) {
474           for (cx = 0 ; cx < (signed)self->glyphSize ; ++cx) {
475             *glyphImage++ += weight * (*glyph++) ;
476           }
477           glyphImage += (width - 1) * self->glyphSize  ;
478         }
479       } /* next orientation */
480 
481       {
482         float * glyphImage = image + self->glyphSize * x + y * width * (self->glyphSize*self->glyphSize) ;
483         for (cy = 0 ; cy < (signed)self->glyphSize ; ++cy) {
484           for (cx = 0 ; cx < (signed)self->glyphSize ; ++cx) {
485             float value = *glyphImage ;
486             *glyphImage++ = VL_MAX(minWeight, VL_MIN(maxWeight, value)) ;
487           }
488           glyphImage += (width - 1) * self->glyphSize  ;
489         }
490       }
491 
492       ++ descriptor ;
493     } /* next column of cells (x) */
494   } /* next row of cells (y) */
495 }
496 
497 /* ---------------------------------------------------------------- */
498 /** @brief Get the dimension of the HOG features
499  ** @param self HOG object.
500  ** @return imension of a HOG cell descriptors.
501  **/
502 
503 vl_size
vl_hog_get_dimension(VlHog const * self)504 vl_hog_get_dimension (VlHog const * self)
505 {
506   return self->dimension ;
507 }
508 
509 /** @brief Get the width of the HOG cell array
510  ** @param self HOG object.
511  ** @return number of HOG cells in the horizontal direction.
512  **/
513 
514 vl_size
vl_hog_get_width(VlHog * self)515 vl_hog_get_width (VlHog * self)
516 {
517   return self->hogWidth ;
518 }
519 
520 /** @brief Get the height of the HOG cell array
521  ** @param self HOG object.
522  ** @return number of HOG cells in the vertical direction.
523  **/
524 
525 vl_size
vl_hog_get_height(VlHog * self)526 vl_hog_get_height (VlHog * self)
527 {
528   return self->hogHeight ;
529 }
530 
531 /* ---------------------------------------------------------------- */
532 /** @internal @brief Prepare internal buffers
533  ** @param self HOG object.
534  ** @param width image width.
535  ** @param height image height.
536  ** @param cellSize size of a HOG cell.
537  **/
538 
539 static void
vl_hog_prepare_buffers(VlHog * self,vl_size width,vl_size height,vl_size cellSize)540 vl_hog_prepare_buffers (VlHog * self, vl_size width, vl_size height, vl_size cellSize)
541 {
542   vl_size hogWidth = (width + cellSize/2) / cellSize ;
543   vl_size hogHeight = (height + cellSize/2) / cellSize ;
544 
545   assert(width > 3) ;
546   assert(height > 3) ;
547   assert(hogWidth > 0) ;
548   assert(hogHeight > 0) ;
549 
550   if (self->hog &&
551       self->hogWidth == hogWidth &&
552       self->hogHeight == hogHeight) {
553     /* a suitable buffer is already allocated */
554     memset(self->hog, 0, sizeof(float) * hogWidth * hogHeight * self->numOrientations * 2) ;
555     memset(self->hogNorm, 0, sizeof(float) * hogWidth * hogHeight) ;
556     return ;
557   }
558 
559   if (self->hog) {
560     vl_free(self->hog) ;
561     self->hog = NULL ;
562   }
563 
564   if (self->hogNorm) {
565     vl_free(self->hogNorm) ;
566     self->hogNorm = NULL ;
567   }
568 
569   self->hog = vl_calloc(hogWidth * hogHeight * self->numOrientations * 2, sizeof(float)) ;
570   self->hogNorm = vl_calloc(hogWidth * hogHeight, sizeof(float)) ;
571   self->hogWidth = hogWidth ;
572   self->hogHeight = hogHeight ;
573 }
574 
575 /* ---------------------------------------------------------------- */
576 /** @brief Process features starting from an image
577  ** @param self HOG object.
578  ** @param image image to process.
579  ** @param width image width.
580  ** @param height image height.
581  ** @param numChannels number of image channles.
582  ** @param cellSize size of a HOG cell.
583  **
584  ** The buffer @c hog must be a three-dimensional array.
585  ** The first two dimensions are @c (width + cellSize/2)/cellSize and
586  ** @c (height + cellSize/2)/cellSize, where divisions are integer.
587  ** This is approximately @c width/cellSize and @c height/cellSize,
588  ** adjusted so that the last cell is at least half contained in the
589  ** image.
590  **
591  ** The image @c width and @c height must be not smaller than three
592  ** pixels and not smaller than @c cellSize.
593  **/
594 
595 void
vl_hog_put_image(VlHog * self,float const * image,vl_size width,vl_size height,vl_size numChannels,vl_size cellSize)596 vl_hog_put_image (VlHog * self,
597                   float const * image,
598                   vl_size width, vl_size height, vl_size numChannels,
599                   vl_size cellSize)
600 {
601   vl_size hogStride ;
602   vl_size channelStride = width * height ;
603   vl_index x, y ;
604   vl_uindex k ;
605 
606   assert(self) ;
607   assert(image) ;
608 
609   /* clear features */
610   vl_hog_prepare_buffers(self, width, height, cellSize) ;
611   hogStride = self->hogWidth * self->hogHeight ;
612 
613 #define at(x,y,k) (self->hog[(x) + (y) * self->hogWidth + (k) * hogStride])
614 
615   /* compute gradients and map the to HOG cells by bilinear interpolation */
616   for (y = 1 ; y < (signed)height - 1 ; ++y) {
617     for (x = 1 ; x < (signed)width - 1 ; ++x) {
618       float gradx = 0 ;
619       float grady = 0 ;
620       float gradNorm ;
621       float orientationWeights [2] = {-1, -1} ;
622       vl_index orientationBins [2] = {-1, -1} ;
623       vl_index orientation = 0 ;
624       float hx, hy, wx1, wx2, wy1, wy2 ;
625       vl_index binx, biny, o ;
626 
627       /*
628        Compute the gradient at (x,y). The image channel with
629        the maximum gradient at each location is selected.
630        */
631       {
632         float const * iter = image + y * width + x ;
633         float gradNorm2 = 0 ;
634         for (k = 0 ; k < numChannels ; ++k) {
635           float gradx_ = *(iter + 1) - *(iter - 1) ;
636           float grady_ = *(iter + width)  - *(iter - width) ;
637           float gradNorm2_ = gradx_ * gradx_ + grady_ * grady_ ;
638           if (gradNorm2_ > gradNorm2) {
639             gradx = gradx_ ;
640             grady = grady_ ;
641             gradNorm2 = gradNorm2_ ;
642           }
643           iter += channelStride ;
644         }
645         gradNorm = sqrtf(gradNorm2) ;
646       }
647 
648       /*
649        Map the gradient to the closest and second closets orientation bins.
650        There are numOrientations orientation in the interval [0,pi).
651        The next numOriantations are the symmetric ones, for a total
652        of 2*numOrientation directed orientations.
653        */
654       for (k = 0 ; k < self->numOrientations ; ++k) {
655         float orientationScore_ = gradx * self->orientationX[k] +  grady * self->orientationY[k] ;
656         vl_index orientationBin_ = k ;
657         if (orientationScore_ < 0) {
658           orientationScore_ = - orientationScore_ ;
659           orientationBin_ += self->numOrientations ;
660         }
661         if (orientationScore_ > orientationWeights[0]) {
662           orientationBins[1] = orientationBins[0] ;
663           orientationWeights[1] = orientationWeights[0] ;
664           orientationBins[0] = orientationBin_ ; ;
665           orientationWeights[0] = orientationScore_ ;
666         } else if (orientationScore_ > orientationWeights[1]) {
667           orientationBins[1] = orientationBin_ ;
668           orientationWeights[1] = orientationScore_ ;
669         }
670       }
671 
672       if (self->useBilinearOrientationAssigment) {
673         /* min(1.0,...) guards against small overflows causing NaNs */
674         float angle0 = acosf(VL_MIN(orientationWeights[0] / VL_MAX(gradNorm, 1e-10),1.0)) ;
675         orientationWeights[1] = angle0 / (VL_PI / self->numOrientations) ;
676         orientationWeights[0] = 1 - orientationWeights[1] ;
677       } else {
678         orientationWeights[0] = 1 ;
679         orientationBins[1] = -1 ;
680       }
681 
682       for (o = 0 ; o < 2 ; ++o) {
683         float ow ;
684         /*
685          Accumulate the gradient. hx is the distance of the
686          pixel x to the cell center at its left, in units of cellSize.
687          With this parametrixation, a pixel on the cell center
688          has hx = 0, which gradually increases to 1 moving to the next
689          center.
690          */
691 
692         orientation = orientationBins[o] ;
693         if (orientation < 0) continue ;
694 
695         /*  (x - (w-1)/2) / w = (x + 0.5)/w - 0.5 */
696         hx = (x + 0.5) / cellSize - 0.5 ;
697         hy = (y + 0.5) / cellSize - 0.5 ;
698         binx = vl_floor_f(hx) ;
699         biny = vl_floor_f(hy) ;
700         wx2 = hx - binx ;
701         wy2 = hy - biny ;
702         wx1 = 1.0 - wx2 ;
703         wy1 = 1.0 - wy2 ;
704 
705         ow = orientationWeights[o] ;
706 
707         /*VL_PRINTF("%d %d - %d %d %f %f - %f %f %f %f - %d \n ",x,y,binx,biny,hx,hy,wx1,wx2,wy1,wy2,o);*/
708 
709         if (binx >= 0 && biny >=0) {
710           at(binx,biny,orientation) += gradNorm * ow * wx1 * wy1 ;
711         }
712         if (binx < (signed)self->hogWidth - 1 && biny >=0) {
713           at(binx+1,biny,orientation) += gradNorm * ow * wx2 * wy1 ;
714         }
715         if (binx < (signed)self->hogWidth - 1 && biny < (signed)self->hogHeight - 1) {
716           at(binx+1,biny+1,orientation) += gradNorm * ow * wx2 * wy2 ;
717         }
718         if (binx >= 0 && biny < (signed)self->hogHeight - 1) {
719           at(binx,biny+1,orientation) += gradNorm * ow * wx1 * wy2 ;
720         }
721       } /* next o */
722     } /* next x */
723   } /* next y */
724 }
725 
726 /* ---------------------------------------------------------------- */
727 /** @brief Process features starting from a field in polar notation
728  ** @param self HOG object.
729  ** @param modulus image gradient modulus.
730  ** @param angle image gradient angle.
731  ** @param directed wrap the gradient angles at 2pi (directed) or pi (undirected).
732  ** @param width image width.
733  ** @param height image height.
734  ** @param cellSize size of a HOG cell.
735  **
736  ** The function behaves like ::vl_hog_put_image, but foregoes the internal
737  ** computation of the gradient field, allowing the user to specify
738  ** their own. Angles are measure clockwise, the y axis pointing downwards,
739  ** starting from the x axis (pointing to the right).
740  **/
741 
vl_hog_put_polar_field(VlHog * self,float const * modulus,float const * angle,vl_bool directed,vl_size width,vl_size height,vl_size cellSize)742 void vl_hog_put_polar_field (VlHog * self,
743                              float const * modulus,
744                              float const * angle,
745                              vl_bool directed,
746                              vl_size width, vl_size height,
747                              vl_size cellSize)
748 {
749   vl_size hogStride ;
750   vl_index x, y, o ;
751   vl_index period = self->numOrientations * (directed ? 2 : 1) ;
752   double angleStep = VL_PI / self->numOrientations ;
753 
754   assert(self) ;
755   assert(modulus) ;
756   assert(angle) ;
757 
758   /* clear features */
759   vl_hog_prepare_buffers(self, width, height, cellSize) ;
760   hogStride = self->hogWidth * self->hogHeight ;
761 
762 #define at(x,y,k) (self->hog[(x) + (y) * self->hogWidth + (k) * hogStride])
763 #define atNorm(x,y) (self->hogNorm[(x) + (y) * self->hogWidth])
764 
765   /* fill HOG cells from gradient field */
766   for (y = 0 ; y < (signed)height ; ++y) {
767     for (x = 0 ; x < (signed)width ; ++x) {
768       float ho, hx, hy, wo1, wo2, wx1, wx2, wy1, wy2 ;
769       vl_index bino, binx, biny ;
770       float orientationWeights [2] = {0,0} ;
771       vl_index orientationBins [2] = {-1,-1} ;
772       vl_index orientation = 0 ;
773       float thisAngle = *angle++ ;
774       float thisModulus = *modulus++ ;
775 
776       if (thisModulus <= 0.0f) continue ;
777 
778       /*  (x - (w-1)/2) / w = (x + 0.5)/w - 0.5 */
779 
780       ho = (float)thisAngle / angleStep ;
781       bino = vl_floor_f(ho) ;
782       wo2 = ho - bino ;
783       wo1 = 1.0f - wo2 ;
784 
785       while (bino < 0) { bino += self->numOrientations * 2 ; }
786 
787       if (self->useBilinearOrientationAssigment) {
788         orientationBins[0] = bino % period ;
789         orientationBins[1] = (bino + 1) % period ;
790         orientationWeights[0] = wo1 ;
791         orientationWeights[1] = wo2 ;
792       } else {
793         orientationBins[0] = (bino + ((wo1 > wo2) ? 0 : 1)) % period ;
794         orientationWeights[0] = 1 ;
795         orientationBins[1] = -1 ;
796       }
797 
798       for (o = 0 ; o < 2 ; ++o) {
799         /*
800          Accumulate the gradient. hx is the distance of the
801          pixel x to the cell center at its left, in units of cellSize.
802          With this parametrixation, a pixel on the cell center
803          has hx = 0, which gradually increases to 1 moving to the next
804          center.
805          */
806 
807         orientation = orientationBins[o] ;
808         if (orientation < 0) continue ;
809 
810         hx = (x + 0.5) / cellSize - 0.5 ;
811         hy = (y + 0.5) / cellSize - 0.5 ;
812         binx = vl_floor_f(hx) ;
813         biny = vl_floor_f(hy) ;
814         wx2 = hx - binx ;
815         wy2 = hy - biny ;
816         wx1 = 1.0 - wx2 ;
817         wy1 = 1.0 - wy2 ;
818 
819         wx1 *= orientationWeights[o] ;
820         wx2 *= orientationWeights[o] ;
821         wy1 *= orientationWeights[o] ;
822         wy2 *= orientationWeights[o] ;
823 
824         /*VL_PRINTF("%d %d - %d %d %f %f - %f %f %f %f - %d \n ",x,y,binx,biny,hx,hy,wx1,wx2,wy1,wy2,o);*/
825 
826         if (binx >= 0 && biny >=0) {
827           at(binx,biny,orientation) += thisModulus * wx1 * wy1 ;
828         }
829         if (binx < (signed)self->hogWidth - 1 && biny >=0) {
830           at(binx+1,biny,orientation) += thisModulus * wx2 * wy1 ;
831         }
832         if (binx < (signed)self->hogWidth - 1 && biny < (signed)self->hogHeight - 1) {
833           at(binx+1,biny+1,orientation) += thisModulus * wx2 * wy2 ;
834         }
835         if (binx >= 0 && biny < (signed)self->hogHeight - 1) {
836           at(binx,biny+1,orientation) += thisModulus * wx1 * wy2 ;
837         }
838       } /* next o */
839     } /* next x */
840   } /* next y */
841 }
842 
843 /* ---------------------------------------------------------------- */
844 /** @brief Extract HOG features
845  ** @param self HOG object.
846  ** @param features HOG features (output).
847  **
848  ** This method is called after ::vl_hog_put_image or ::vl_hog_put_polar_field
849  ** in order to retrieve the computed HOG features. The buffer @c features must have the dimensions returned by
850  ** ::vl_hog_get_width, ::vl_hog_get_height, and ::vl_hog_get_dimension.
851  **/
852 
853 void
vl_hog_extract(VlHog * self,float * features)854 vl_hog_extract (VlHog * self, float * features)
855 {
856   vl_index x, y ;
857   vl_uindex k ;
858   vl_size hogStride = self->hogWidth * self->hogHeight ;
859 
860   assert(features) ;
861 
862 #define at(x,y,k) (self->hog[(x) + (y) * self->hogWidth + (k) * hogStride])
863 #define atNorm(x,y) (self->hogNorm[(x) + (y) * self->hogWidth])
864 
865   /*
866    Compute the squared L2 norm of the unoriented version of each HOG
867    cell histogram. The unoriented version is obtained by folding
868    the 2*numOrientations compotnent into numOrientations only.
869    */
870   {
871     float const * iter = self->hog ;
872     for (k = 0 ; k < self->numOrientations ; ++k) {
873       float * niter = self->hogNorm ;
874       float * niterEnd = self->hogNorm + self->hogWidth * self->hogHeight ;
875       vl_size stride = self->hogWidth*self->hogHeight*self->numOrientations ;
876       while (niter != niterEnd) {
877         float h1 = *iter ;
878         float h2 = *(iter + stride) ;
879         float h = h1 + h2 ;
880         *niter += h * h ;
881         niter++ ;
882         iter++ ;
883       }
884     }
885   }
886 
887   /*
888    HOG block-normalisation.
889 
890    The Dalal-Triggs implementation computes a normalized descriptor for
891    each block of 2x2 cells, by stacking the histograms of each cell
892    into a vector and L2-normalizing and truncating the result.
893 
894    Each block-level descriptor is then decomposed back into cells
895    and corresponding parts are stacked into cell-level descritpors.
896    Each HOG cell is contained in exactly
897    four 2x2 cell blocks. For example, the cell number 5 in the following
898    figure is contained in blocks 1245, 2356, 4578, 5689:
899 
900    +---+---+---+
901    | 1 | 2 | 3 |
902    +---+---+---+
903    | 4 | 5 | 6 |
904    +---+---+---+
905    | 7 | 8 | 9 |
906    +---+---+---+
907 
908    Hence, when block-level descriptors are decomposed back
909    into cells, each cell receives contributions from four blocks. So,
910    if each cell started with a D-dimensional histogram, it
911    ends up with a 4D dimesional descriptor vector.
912 
913    Note however that this is just a convenient way of rewriting the
914    blocks as per-cell contributions, but the block information
915    is unchanged. In particular, barring boundary effects,
916    in an array of H x W cells there are approximately HW blocks;
917    hence the L2 norm of all the blocks stacked is approximately HW
918    (because individual blocks are L2-normalized). Since this does
919    not change in the final HOG descriptor,
920    the L2 norm of the HOG descriptor of an image should be approximately
921    the same as the area of the image divided by the
922    area of a HOG cell. This can be used as a sanity check.
923 
924    The UoCTTI variant differs in some non-negligible ways. First,
925    it includes both oriented and unoriented histograms, as well
926    as four components capturing texture. Second, and most importantly,
927    it merges the four chunks of block-level descirptors landing in
928    each cell into one by taking their average. This makes sense
929    because, ultimately, these four sub-descriptors are identical
930    to the original cell histogram, just with four different normalisations
931    applied.
932    */
933   {
934     float const * iter = self->hog ;
935     for (y = 0 ; y < (signed)self->hogHeight ; ++y) {
936       for (x = 0 ; x < (signed)self->hogWidth ; ++x) {
937 
938         /* norm of upper-left, upper-right, ... cells */
939         vl_index xm = VL_MAX(x - 1, 0) ;
940         vl_index xp = VL_MIN(x + 1, (signed)self->hogWidth - 1) ;
941         vl_index ym = VL_MAX(y - 1, 0) ;
942         vl_index yp = VL_MIN(y + 1, (signed)self->hogHeight - 1) ;
943 
944         double norm1 = atNorm(xm,ym) ;
945         double norm2 = atNorm(x,ym) ;
946         double norm3 = atNorm(xp,ym) ;
947         double norm4 = atNorm(xm,y) ;
948         double norm5 = atNorm(x,y) ;
949         double norm6 = atNorm(xp,y) ;
950         double norm7 = atNorm(xm,yp) ;
951         double norm8 = atNorm(x,yp) ;
952         double norm9 = atNorm(xp,yp) ;
953 
954         double factor1, factor2, factor3, factor4 ;
955 
956         double t1 = 0 ;
957         double t2 = 0 ;
958         double t3 = 0 ;
959         double t4 = 0 ;
960 
961         float * oiter = features + x + self->hogWidth * y ;
962 
963         /* each factor is the inverse of the l2 norm of one of the 2x2 blocks surrounding
964            cell x,y */
965 #if 0
966         if (self->transposed) {
967           /* if the image is transposed, y and x are swapped */
968           factor1 = 1.0 / VL_MAX(sqrt(norm1 + norm2 + norm4 + norm5), 1e-10) ;
969           factor3 = 1.0 / VL_MAX(sqrt(norm2 + norm3 + norm5 + norm6), 1e-10) ;
970           factor2 = 1.0 / VL_MAX(sqrt(norm4 + norm5 + norm7 + norm8), 1e-10) ;
971           factor4 = 1.0 / VL_MAX(sqrt(norm5 + norm6 + norm8 + norm9), 1e-10) ;
972         } else {
973           factor1 = 1.0 / VL_MAX(sqrt(norm1 + norm2 + norm4 + norm5), 1e-10) ;
974           factor2 = 1.0 / VL_MAX(sqrt(norm2 + norm3 + norm5 + norm6), 1e-10) ;
975           factor3 = 1.0 / VL_MAX(sqrt(norm4 + norm5 + norm7 + norm8), 1e-10) ;
976           factor4 = 1.0 / VL_MAX(sqrt(norm5 + norm6 + norm8 + norm9), 1e-10) ;
977         }
978 #else
979         /* as implemented in UOCTTI code */
980         if (self->transposed) {
981           /* if the image is transposed, y and x are swapped */
982           factor1 = 1.0 / sqrt(norm1 + norm2 + norm4 + norm5 + 1e-4) ;
983           factor3 = 1.0 / sqrt(norm2 + norm3 + norm5 + norm6 + 1e-4) ;
984           factor2 = 1.0 / sqrt(norm4 + norm5 + norm7 + norm8 + 1e-4) ;
985           factor4 = 1.0 / sqrt(norm5 + norm6 + norm8 + norm9 + 1e-4) ;
986         } else {
987           factor1 = 1.0 / sqrt(norm1 + norm2 + norm4 + norm5 + 1e-4) ;
988           factor2 = 1.0 / sqrt(norm2 + norm3 + norm5 + norm6 + 1e-4) ;
989           factor3 = 1.0 / sqrt(norm4 + norm5 + norm7 + norm8 + 1e-4) ;
990           factor4 = 1.0 / sqrt(norm5 + norm6 + norm8 + norm9 + 1e-4) ;
991         }
992 #endif
993 
994         for (k = 0 ; k < self->numOrientations ; ++k) {
995           double ha = iter[hogStride * k] ;
996           double hb = iter[hogStride * (k + self->numOrientations)] ;
997           double hc ;
998 
999           double ha1 = factor1 * ha ;
1000           double ha2 = factor2 * ha ;
1001           double ha3 = factor3 * ha ;
1002           double ha4 = factor4 * ha ;
1003 
1004           double hb1 = factor1 * hb ;
1005           double hb2 = factor2 * hb ;
1006           double hb3 = factor3 * hb ;
1007           double hb4 = factor4 * hb ;
1008 
1009           double hc1 = ha1 + hb1 ;
1010           double hc2 = ha2 + hb2 ;
1011           double hc3 = ha3 + hb3 ;
1012           double hc4 = ha4 + hb4 ;
1013 
1014           ha1 = VL_MIN(0.2, ha1) ;
1015           ha2 = VL_MIN(0.2, ha2) ;
1016           ha3 = VL_MIN(0.2, ha3) ;
1017           ha4 = VL_MIN(0.2, ha4) ;
1018 
1019           hb1 = VL_MIN(0.2, hb1) ;
1020           hb2 = VL_MIN(0.2, hb2) ;
1021           hb3 = VL_MIN(0.2, hb3) ;
1022           hb4 = VL_MIN(0.2, hb4) ;
1023 
1024           hc1 = VL_MIN(0.2, hc1) ;
1025           hc2 = VL_MIN(0.2, hc2) ;
1026           hc3 = VL_MIN(0.2, hc3) ;
1027           hc4 = VL_MIN(0.2, hc4) ;
1028 
1029           t1 += hc1 ;
1030           t2 += hc2 ;
1031           t3 += hc3 ;
1032           t4 += hc4 ;
1033 
1034           switch (self->variant) {
1035             case VlHogVariantUoctti :
1036               ha = 0.5 * (ha1 + ha2 + ha3 + ha4) ;
1037               hb = 0.5 * (hb1 + hb2 + hb3 + hb4) ;
1038               hc = 0.5 * (hc1 + hc2 + hc3 + hc4) ;
1039               *oiter = ha ;
1040               *(oiter + hogStride * self->numOrientations) = hb ;
1041               *(oiter + 2 * hogStride * self->numOrientations) = hc ;
1042               break ;
1043 
1044             case VlHogVariantDalalTriggs :
1045               *oiter = hc1 ;
1046               *(oiter + hogStride * self->numOrientations) = hc2 ;
1047               *(oiter + 2 * hogStride * self->numOrientations) = hc3 ;
1048               *(oiter + 3 * hogStride * self->numOrientations) = hc4 ;
1049               break ;
1050           }
1051           oiter += hogStride ;
1052 
1053         } /* next orientation */
1054 
1055         switch (self->variant) {
1056           case VlHogVariantUoctti :
1057             oiter += 2 * hogStride * self->numOrientations ;
1058             *oiter = (1.0f/sqrtf(18.0f)) * t1 ; oiter += hogStride ;
1059             *oiter = (1.0f/sqrtf(18.0f)) * t2 ; oiter += hogStride ;
1060             *oiter = (1.0f/sqrtf(18.0f)) * t3 ; oiter += hogStride ;
1061             *oiter = (1.0f/sqrtf(18.0f)) * t4 ; oiter += hogStride ;
1062             break ;
1063 
1064           case VlHogVariantDalalTriggs :
1065             break ;
1066         }
1067         ++iter ;
1068       } /* next x */
1069     } /* next y */
1070   } /* block normalization */
1071 }
1072 
1073