1 // Copyright (C) 2016  Davis E. King (davis@dlib.net)
2 // License: Boost Software License   See LICENSE.txt for the full license.
3 #ifndef DLIB_RaNDOM_CROPPER_H_
4 #define DLIB_RaNDOM_CROPPER_H_
5 
6 #include "random_cropper_abstract.h"
7 #include "../threads.h"
8 #include <mutex>
9 #include <vector>
10 #include "interpolation.h"
11 #include "../image_processing/full_object_detection.h"
12 #include "../rand.h"
13 
14 namespace dlib
15 {
16     class random_cropper
17     {
18         chip_dims dims = chip_dims(300,300);
19         bool randomly_flip = true;
20         double max_rotation_degrees = 30;
21         long min_object_length_long_dim = 75; // cropped object will be at least this many pixels along its longest edge.
22         long min_object_length_short_dim = 30; // cropped object will be at least this many pixels along its shortest edge.
23         double max_object_size = 0.7; // cropped object will be at most this fraction of the size of the image.
24         double background_crops_fraction = 0.5;
25         double translate_amount = 0.10;
26 
27         std::mutex rnd_mutex;
28         dlib::rand rnd;
29     public:
30 
set_seed(time_t seed)31         void set_seed (
32             time_t seed
33         ) { rnd = dlib::rand(seed); }
34 
get_translate_amount()35         double get_translate_amount (
36         ) const { return translate_amount; }
37 
set_translate_amount(double value)38         void set_translate_amount (
39             double value
40         )
41         {
42             DLIB_CASSERT(0 <= value);
43             translate_amount = value;
44         }
45 
get_background_crops_fraction()46         double get_background_crops_fraction (
47         ) const { return background_crops_fraction; }
48 
set_background_crops_fraction(double value)49         void set_background_crops_fraction (
50             double value
51         )
52         {
53             DLIB_CASSERT(0 <= value && value <= 1);
54             background_crops_fraction = value;
55         }
56 
get_chip_dims()57         const chip_dims& get_chip_dims(
58         ) const { return dims; }
59 
set_chip_dims(const chip_dims & dims_)60         void set_chip_dims (
61             const chip_dims& dims_
62         ) { dims = dims_; }
63 
set_chip_dims(unsigned long rows,unsigned long cols)64         void set_chip_dims (
65             unsigned long rows,
66             unsigned long cols
67         ) { set_chip_dims(chip_dims(rows,cols)); }
68 
get_randomly_flip()69         bool get_randomly_flip (
70         ) const { return randomly_flip; }
71 
set_randomly_flip(bool value)72         void set_randomly_flip (
73             bool value
74         ) { randomly_flip = value; }
75 
get_max_rotation_degrees()76         double get_max_rotation_degrees (
77         ) const { return max_rotation_degrees; }
set_max_rotation_degrees(double value)78         void set_max_rotation_degrees (
79             double value
80         ) { max_rotation_degrees = std::abs(value); }
81 
get_min_object_length_long_dim()82         long get_min_object_length_long_dim (
83         ) const { return min_object_length_long_dim; }
get_min_object_length_short_dim()84         long get_min_object_length_short_dim (
85         ) const { return min_object_length_short_dim; }
86 
set_min_object_size(long long_dim,long short_dim)87         void set_min_object_size (
88             long long_dim,
89             long short_dim
90         )
91         {
92             DLIB_CASSERT(0 < short_dim && short_dim <= long_dim);
93             min_object_length_long_dim = long_dim;
94             min_object_length_short_dim = short_dim;
95         }
96 
get_max_object_size()97         double get_max_object_size (
98         ) const { return max_object_size; }
set_max_object_size(double value)99         void set_max_object_size (
100             double value
101         )
102         {
103             DLIB_CASSERT(0 < value);
104             max_object_size = value;
105         }
106 
107         template <
108             typename array_type
109             >
operator()110         void operator() (
111             size_t num_crops,
112             const array_type& images,
113             const std::vector<std::vector<mmod_rect>>& rects,
114             array_type& crops,
115             std::vector<std::vector<mmod_rect>>& crop_rects
116         )
117         {
118             DLIB_CASSERT(images.size() == rects.size());
119             crops.clear();
120             crop_rects.clear();
121             append(num_crops, images, rects, crops, crop_rects);
122         }
123 
124         template <
125             typename array_type
126             >
append(size_t num_crops,const array_type & images,const std::vector<std::vector<mmod_rect>> & rects,array_type & crops,std::vector<std::vector<mmod_rect>> & crop_rects)127         void append (
128             size_t num_crops,
129             const array_type& images,
130             const std::vector<std::vector<mmod_rect>>& rects,
131             array_type& crops,
132             std::vector<std::vector<mmod_rect>>& crop_rects
133         )
134         {
135             DLIB_CASSERT(images.size() == rects.size());
136             DLIB_CASSERT(crops.size() == crop_rects.size());
137             auto original_size = crops.size();
138             crops.resize(crops.size()+num_crops);
139             crop_rects.resize(crop_rects.size()+num_crops);
140             parallel_for(original_size, original_size+num_crops, [&](long i) {
141                 (*this)(images, rects, crops[i], crop_rects[i]);
142             });
143         }
144 
145 
146         template <
147             typename array_type,
148             typename image_type
149             >
operator()150         void operator() (
151             const array_type& images,
152             const std::vector<std::vector<mmod_rect>>& rects,
153             image_type& crop,
154             std::vector<mmod_rect>& crop_rects
155         )
156         {
157             DLIB_CASSERT(images.size() == rects.size());
158             size_t idx;
159             { std::lock_guard<std::mutex> lock(rnd_mutex);
160                 idx = rnd.get_integer(images.size());
161             }
162             (*this)(images[idx], rects[idx], crop, crop_rects);
163         }
164 
165         template <
166             typename image_type1
167             >
operator()168         image_type1 operator() (
169             const image_type1& img
170         )
171         {
172             image_type1 crop;
173             std::vector<mmod_rect> junk1, junk2;
174             (*this)(img, junk1, crop, junk2);
175             return crop;
176         }
177 
178         template <
179             typename image_type1,
180             typename image_type2
181             >
operator()182         void operator() (
183             const image_type1& img,
184             const std::vector<mmod_rect>& rects,
185             image_type2& crop,
186             std::vector<mmod_rect>& crop_rects
187         )
188         {
189             DLIB_CASSERT(num_rows(img)*num_columns(img) != 0);
190             chip_details crop_plan;
191             bool should_flip_crop;
192             make_crop_plan(img, rects, crop_plan, should_flip_crop);
193 
194             extract_image_chip(img, crop_plan, crop);
195             const rectangle_transform tform = get_mapping_to_chip(crop_plan);
196 
197             // copy rects into crop_rects and set ones that are outside the crop to ignore or
198             // drop entirely as appropriate.
199             crop_rects.clear();
200             for (auto rect : rects)
201             {
202                 // map to crop
203                 rect.rect = tform(rect.rect);
204 
205                 // if the rect is at least partly in the crop
206                 if (get_rect(crop).intersect(rect.rect).area() != 0)
207                 {
208                     // set to ignore if not totally in the crop or if too small.
209                     if (!get_rect(crop).contains(rect.rect) ||
210                         ((long)rect.rect.height() < min_object_length_long_dim  && (long)rect.rect.width() < min_object_length_long_dim) ||
211                         ((long)rect.rect.height() < min_object_length_short_dim || (long)rect.rect.width() < min_object_length_short_dim))
212                     {
213                         rect.ignore = true;
214                     }
215 
216                     crop_rects.push_back(rect);
217                 }
218             }
219 
220             // Also randomly flip the image
221             if (should_flip_crop)
222             {
223                 image_type2 temp;
224                 flip_image_left_right(crop, temp);
225                 swap(crop,temp);
226                 for (auto&& rect : crop_rects)
227                     rect.rect = impl::flip_rect_left_right(rect.rect, get_rect(crop));
228             }
229         }
230 
231     private:
232 
233         template <typename image_type1>
make_crop_plan(const image_type1 & img,const std::vector<mmod_rect> & rects,chip_details & crop_plan,bool & should_flip_crop)234         void make_crop_plan (
235             const image_type1& img,
236             const std::vector<mmod_rect>& rects,
237             chip_details& crop_plan,
238             bool& should_flip_crop
239         )
240         {
241             std::lock_guard<std::mutex> lock(rnd_mutex);
242             rectangle crop_rect;
243             if (has_non_ignored_box(rects) && rnd.get_random_double() >= background_crops_fraction)
244             {
245                 auto rect = rects[randomly_pick_rect(rects)].rect;
246 
247                 // perturb the location of the crop by a small fraction of the object's size.
248                 const point rand_translate = dpoint(rnd.get_double_in_range(-translate_amount,translate_amount)*std::max(rect.height(),rect.width()),
249                                                     rnd.get_double_in_range(-translate_amount,translate_amount)*std::max(rect.height(),rect.width()));
250 
251                 // We are going to grow rect into the cropping rect.  First, we grow it a
252                 // little so that it has the desired minimum border around it.
253                 drectangle drect = centered_drect(center(rect)+rand_translate, rect.width()/max_object_size, rect.height()/max_object_size);
254 
255                 // Now make rect have the same aspect ratio as dims so that there won't be
256                 // any funny stretching when we crop it.  We do this by growing it along
257                 // whichever dimension is too short.
258                 const double target_aspect = dims.cols/(double)dims.rows;
259                 if (drect.width()/drect.height() < target_aspect)
260                     drect = centered_drect(drect, target_aspect*drect.height(), drect.height());
261                 else
262                     drect = centered_drect(drect, drect.width(), drect.width()/target_aspect);
263 
264                 // Now perturb the scale of the crop.  We do this by shrinking it, but not
265                 // so much that it gets smaller than the min object sizes require.
266                 double current_width = dims.cols*rect.width()/drect.width();
267                 double current_height = dims.rows*rect.height()/drect.height();
268 
269                 // never make any dimension smaller than the short dim.
270                 double min_scale1 = std::max(min_object_length_short_dim/current_width, min_object_length_short_dim/current_height);
271                 // at least one dimension needs to be longer than the long dim.
272                 double min_scale2 = std::min(min_object_length_long_dim/current_width, min_object_length_long_dim/current_height);
273                 double min_scale = std::max(min_scale1, min_scale2);
274 
275                 const double rand_scale_perturb = 1.0/rnd.get_double_in_range(min_scale, 1);
276                 crop_rect = centered_drect(drect, drect.width()*rand_scale_perturb, drect.height()*rand_scale_perturb);
277 
278             }
279             else
280             {
281                 crop_rect = make_random_cropping_rect(img);
282             }
283             should_flip_crop = randomly_flip && rnd.get_random_double() > 0.5;
284             const double angle = rnd.get_double_in_range(-max_rotation_degrees, max_rotation_degrees)*pi/180;
285             crop_plan = chip_details(crop_rect, dims, angle);
286         }
287 
has_non_ignored_box(const std::vector<mmod_rect> & rects)288         bool has_non_ignored_box (
289             const std::vector<mmod_rect>& rects
290         ) const
291         {
292             for (auto&& b : rects)
293             {
294                 if (!b.ignore)
295                     return true;
296             }
297             return false;
298         }
299 
randomly_pick_rect(const std::vector<mmod_rect> & rects)300         size_t randomly_pick_rect (
301             const std::vector<mmod_rect>& rects
302         )
303         {
304             DLIB_CASSERT(has_non_ignored_box(rects));
305             size_t idx = rnd.get_integer(rects.size());
306             while(rects[idx].ignore)
307                 idx = rnd.get_integer(rects.size());
308             return idx;
309         }
310 
311         template <typename image_type>
make_random_cropping_rect(const image_type & img_)312         rectangle make_random_cropping_rect(
313             const image_type& img_
314         )
315         {
316             const_image_view<image_type> img(img_);
317             // Figure out what rectangle we want to crop from the image.  We are going to
318             // crop out an image of size this->dims, so we pick a random scale factor that
319             // lets this random box be either as big as it can be while still fitting in
320             // the image or as small as a 3x zoomed in box randomly somewhere in the image.
321             double mins = 1.0/3.0, maxs = std::min(img.nr()/(double)dims.rows, img.nc()/(double)dims.cols);
322             mins = std::min(mins, maxs);
323             auto scale = rnd.get_double_in_range(mins, maxs);
324             rectangle rect(scale*dims.cols, scale*dims.rows);
325             // randomly shift the box around
326             point offset(rnd.get_integer(1+img.nc()-rect.width()),
327                          rnd.get_integer(1+img.nr()-rect.height()));
328             return move_rect(rect, offset);
329         }
330 
331 
332 
333     };
334 
335 // ----------------------------------------------------------------------------------------
336 
337     inline std::ostream& operator<< (
338         std::ostream& out,
339         const random_cropper& item
340     )
341     {
342         using std::endl;
343         out << "random_cropper details: " << endl;
344         out << "  chip_dims.rows:              " << item.get_chip_dims().rows << endl;
345         out << "  chip_dims.cols:              " << item.get_chip_dims().cols << endl;
346         out << "  randomly_flip:               " << std::boolalpha << item.get_randomly_flip() << endl;
347         out << "  max_rotation_degrees:        " << item.get_max_rotation_degrees() << endl;
348         out << "  min_object_length_long_dim:  " << item.get_min_object_length_long_dim() << endl;
349         out << "  min_object_length_short_dim: " << item.get_min_object_length_short_dim() << endl;
350         out << "  max_object_size:             " << item.get_max_object_size() << endl;
351         out << "  background_crops_fraction:   " << item.get_background_crops_fraction() << endl;
352         out << "  translate_amount:            " << item.get_translate_amount() << endl;
353         return out;
354     }
355 
356 // ----------------------------------------------------------------------------------------
357 
358 }
359 
360 #endif // DLIB_RaNDOM_CROPPER_H_
361 
362