1 // Copyright (C) 2016 Davis E. King (davis@dlib.net) 2 // License: Boost Software License See LICENSE.txt for the full license. 3 #ifndef DLIB_RaNDOM_CROPPER_H_ 4 #define DLIB_RaNDOM_CROPPER_H_ 5 6 #include "random_cropper_abstract.h" 7 #include "../threads.h" 8 #include <mutex> 9 #include <vector> 10 #include "interpolation.h" 11 #include "../image_processing/full_object_detection.h" 12 #include "../rand.h" 13 14 namespace dlib 15 { 16 class random_cropper 17 { 18 chip_dims dims = chip_dims(300,300); 19 bool randomly_flip = true; 20 double max_rotation_degrees = 30; 21 long min_object_length_long_dim = 75; // cropped object will be at least this many pixels along its longest edge. 22 long min_object_length_short_dim = 30; // cropped object will be at least this many pixels along its shortest edge. 23 double max_object_size = 0.7; // cropped object will be at most this fraction of the size of the image. 24 double background_crops_fraction = 0.5; 25 double translate_amount = 0.10; 26 27 std::mutex rnd_mutex; 28 dlib::rand rnd; 29 public: 30 set_seed(time_t seed)31 void set_seed ( 32 time_t seed 33 ) { rnd = dlib::rand(seed); } 34 get_translate_amount()35 double get_translate_amount ( 36 ) const { return translate_amount; } 37 set_translate_amount(double value)38 void set_translate_amount ( 39 double value 40 ) 41 { 42 DLIB_CASSERT(0 <= value); 43 translate_amount = value; 44 } 45 get_background_crops_fraction()46 double get_background_crops_fraction ( 47 ) const { return background_crops_fraction; } 48 set_background_crops_fraction(double value)49 void set_background_crops_fraction ( 50 double value 51 ) 52 { 53 DLIB_CASSERT(0 <= value && value <= 1); 54 background_crops_fraction = value; 55 } 56 get_chip_dims()57 const chip_dims& get_chip_dims( 58 ) const { return dims; } 59 set_chip_dims(const chip_dims & dims_)60 void set_chip_dims ( 61 const chip_dims& dims_ 62 ) { dims = dims_; } 63 set_chip_dims(unsigned long rows,unsigned long cols)64 void set_chip_dims ( 65 unsigned long rows, 66 unsigned long cols 67 ) { set_chip_dims(chip_dims(rows,cols)); } 68 get_randomly_flip()69 bool get_randomly_flip ( 70 ) const { return randomly_flip; } 71 set_randomly_flip(bool value)72 void set_randomly_flip ( 73 bool value 74 ) { randomly_flip = value; } 75 get_max_rotation_degrees()76 double get_max_rotation_degrees ( 77 ) const { return max_rotation_degrees; } set_max_rotation_degrees(double value)78 void set_max_rotation_degrees ( 79 double value 80 ) { max_rotation_degrees = std::abs(value); } 81 get_min_object_length_long_dim()82 long get_min_object_length_long_dim ( 83 ) const { return min_object_length_long_dim; } get_min_object_length_short_dim()84 long get_min_object_length_short_dim ( 85 ) const { return min_object_length_short_dim; } 86 set_min_object_size(long long_dim,long short_dim)87 void set_min_object_size ( 88 long long_dim, 89 long short_dim 90 ) 91 { 92 DLIB_CASSERT(0 < short_dim && short_dim <= long_dim); 93 min_object_length_long_dim = long_dim; 94 min_object_length_short_dim = short_dim; 95 } 96 get_max_object_size()97 double get_max_object_size ( 98 ) const { return max_object_size; } set_max_object_size(double value)99 void set_max_object_size ( 100 double value 101 ) 102 { 103 DLIB_CASSERT(0 < value); 104 max_object_size = value; 105 } 106 107 template < 108 typename array_type 109 > operator()110 void operator() ( 111 size_t num_crops, 112 const array_type& images, 113 const std::vector<std::vector<mmod_rect>>& rects, 114 array_type& crops, 115 std::vector<std::vector<mmod_rect>>& crop_rects 116 ) 117 { 118 DLIB_CASSERT(images.size() == rects.size()); 119 crops.clear(); 120 crop_rects.clear(); 121 append(num_crops, images, rects, crops, crop_rects); 122 } 123 124 template < 125 typename array_type 126 > append(size_t num_crops,const array_type & images,const std::vector<std::vector<mmod_rect>> & rects,array_type & crops,std::vector<std::vector<mmod_rect>> & crop_rects)127 void append ( 128 size_t num_crops, 129 const array_type& images, 130 const std::vector<std::vector<mmod_rect>>& rects, 131 array_type& crops, 132 std::vector<std::vector<mmod_rect>>& crop_rects 133 ) 134 { 135 DLIB_CASSERT(images.size() == rects.size()); 136 DLIB_CASSERT(crops.size() == crop_rects.size()); 137 auto original_size = crops.size(); 138 crops.resize(crops.size()+num_crops); 139 crop_rects.resize(crop_rects.size()+num_crops); 140 parallel_for(original_size, original_size+num_crops, [&](long i) { 141 (*this)(images, rects, crops[i], crop_rects[i]); 142 }); 143 } 144 145 146 template < 147 typename array_type, 148 typename image_type 149 > operator()150 void operator() ( 151 const array_type& images, 152 const std::vector<std::vector<mmod_rect>>& rects, 153 image_type& crop, 154 std::vector<mmod_rect>& crop_rects 155 ) 156 { 157 DLIB_CASSERT(images.size() == rects.size()); 158 size_t idx; 159 { std::lock_guard<std::mutex> lock(rnd_mutex); 160 idx = rnd.get_integer(images.size()); 161 } 162 (*this)(images[idx], rects[idx], crop, crop_rects); 163 } 164 165 template < 166 typename image_type1 167 > operator()168 image_type1 operator() ( 169 const image_type1& img 170 ) 171 { 172 image_type1 crop; 173 std::vector<mmod_rect> junk1, junk2; 174 (*this)(img, junk1, crop, junk2); 175 return crop; 176 } 177 178 template < 179 typename image_type1, 180 typename image_type2 181 > operator()182 void operator() ( 183 const image_type1& img, 184 const std::vector<mmod_rect>& rects, 185 image_type2& crop, 186 std::vector<mmod_rect>& crop_rects 187 ) 188 { 189 DLIB_CASSERT(num_rows(img)*num_columns(img) != 0); 190 chip_details crop_plan; 191 bool should_flip_crop; 192 make_crop_plan(img, rects, crop_plan, should_flip_crop); 193 194 extract_image_chip(img, crop_plan, crop); 195 const rectangle_transform tform = get_mapping_to_chip(crop_plan); 196 197 // copy rects into crop_rects and set ones that are outside the crop to ignore or 198 // drop entirely as appropriate. 199 crop_rects.clear(); 200 for (auto rect : rects) 201 { 202 // map to crop 203 rect.rect = tform(rect.rect); 204 205 // if the rect is at least partly in the crop 206 if (get_rect(crop).intersect(rect.rect).area() != 0) 207 { 208 // set to ignore if not totally in the crop or if too small. 209 if (!get_rect(crop).contains(rect.rect) || 210 ((long)rect.rect.height() < min_object_length_long_dim && (long)rect.rect.width() < min_object_length_long_dim) || 211 ((long)rect.rect.height() < min_object_length_short_dim || (long)rect.rect.width() < min_object_length_short_dim)) 212 { 213 rect.ignore = true; 214 } 215 216 crop_rects.push_back(rect); 217 } 218 } 219 220 // Also randomly flip the image 221 if (should_flip_crop) 222 { 223 image_type2 temp; 224 flip_image_left_right(crop, temp); 225 swap(crop,temp); 226 for (auto&& rect : crop_rects) 227 rect.rect = impl::flip_rect_left_right(rect.rect, get_rect(crop)); 228 } 229 } 230 231 private: 232 233 template <typename image_type1> make_crop_plan(const image_type1 & img,const std::vector<mmod_rect> & rects,chip_details & crop_plan,bool & should_flip_crop)234 void make_crop_plan ( 235 const image_type1& img, 236 const std::vector<mmod_rect>& rects, 237 chip_details& crop_plan, 238 bool& should_flip_crop 239 ) 240 { 241 std::lock_guard<std::mutex> lock(rnd_mutex); 242 rectangle crop_rect; 243 if (has_non_ignored_box(rects) && rnd.get_random_double() >= background_crops_fraction) 244 { 245 auto rect = rects[randomly_pick_rect(rects)].rect; 246 247 // perturb the location of the crop by a small fraction of the object's size. 248 const point rand_translate = dpoint(rnd.get_double_in_range(-translate_amount,translate_amount)*std::max(rect.height(),rect.width()), 249 rnd.get_double_in_range(-translate_amount,translate_amount)*std::max(rect.height(),rect.width())); 250 251 // We are going to grow rect into the cropping rect. First, we grow it a 252 // little so that it has the desired minimum border around it. 253 drectangle drect = centered_drect(center(rect)+rand_translate, rect.width()/max_object_size, rect.height()/max_object_size); 254 255 // Now make rect have the same aspect ratio as dims so that there won't be 256 // any funny stretching when we crop it. We do this by growing it along 257 // whichever dimension is too short. 258 const double target_aspect = dims.cols/(double)dims.rows; 259 if (drect.width()/drect.height() < target_aspect) 260 drect = centered_drect(drect, target_aspect*drect.height(), drect.height()); 261 else 262 drect = centered_drect(drect, drect.width(), drect.width()/target_aspect); 263 264 // Now perturb the scale of the crop. We do this by shrinking it, but not 265 // so much that it gets smaller than the min object sizes require. 266 double current_width = dims.cols*rect.width()/drect.width(); 267 double current_height = dims.rows*rect.height()/drect.height(); 268 269 // never make any dimension smaller than the short dim. 270 double min_scale1 = std::max(min_object_length_short_dim/current_width, min_object_length_short_dim/current_height); 271 // at least one dimension needs to be longer than the long dim. 272 double min_scale2 = std::min(min_object_length_long_dim/current_width, min_object_length_long_dim/current_height); 273 double min_scale = std::max(min_scale1, min_scale2); 274 275 const double rand_scale_perturb = 1.0/rnd.get_double_in_range(min_scale, 1); 276 crop_rect = centered_drect(drect, drect.width()*rand_scale_perturb, drect.height()*rand_scale_perturb); 277 278 } 279 else 280 { 281 crop_rect = make_random_cropping_rect(img); 282 } 283 should_flip_crop = randomly_flip && rnd.get_random_double() > 0.5; 284 const double angle = rnd.get_double_in_range(-max_rotation_degrees, max_rotation_degrees)*pi/180; 285 crop_plan = chip_details(crop_rect, dims, angle); 286 } 287 has_non_ignored_box(const std::vector<mmod_rect> & rects)288 bool has_non_ignored_box ( 289 const std::vector<mmod_rect>& rects 290 ) const 291 { 292 for (auto&& b : rects) 293 { 294 if (!b.ignore) 295 return true; 296 } 297 return false; 298 } 299 randomly_pick_rect(const std::vector<mmod_rect> & rects)300 size_t randomly_pick_rect ( 301 const std::vector<mmod_rect>& rects 302 ) 303 { 304 DLIB_CASSERT(has_non_ignored_box(rects)); 305 size_t idx = rnd.get_integer(rects.size()); 306 while(rects[idx].ignore) 307 idx = rnd.get_integer(rects.size()); 308 return idx; 309 } 310 311 template <typename image_type> make_random_cropping_rect(const image_type & img_)312 rectangle make_random_cropping_rect( 313 const image_type& img_ 314 ) 315 { 316 const_image_view<image_type> img(img_); 317 // Figure out what rectangle we want to crop from the image. We are going to 318 // crop out an image of size this->dims, so we pick a random scale factor that 319 // lets this random box be either as big as it can be while still fitting in 320 // the image or as small as a 3x zoomed in box randomly somewhere in the image. 321 double mins = 1.0/3.0, maxs = std::min(img.nr()/(double)dims.rows, img.nc()/(double)dims.cols); 322 mins = std::min(mins, maxs); 323 auto scale = rnd.get_double_in_range(mins, maxs); 324 rectangle rect(scale*dims.cols, scale*dims.rows); 325 // randomly shift the box around 326 point offset(rnd.get_integer(1+img.nc()-rect.width()), 327 rnd.get_integer(1+img.nr()-rect.height())); 328 return move_rect(rect, offset); 329 } 330 331 332 333 }; 334 335 // ---------------------------------------------------------------------------------------- 336 337 inline std::ostream& operator<< ( 338 std::ostream& out, 339 const random_cropper& item 340 ) 341 { 342 using std::endl; 343 out << "random_cropper details: " << endl; 344 out << " chip_dims.rows: " << item.get_chip_dims().rows << endl; 345 out << " chip_dims.cols: " << item.get_chip_dims().cols << endl; 346 out << " randomly_flip: " << std::boolalpha << item.get_randomly_flip() << endl; 347 out << " max_rotation_degrees: " << item.get_max_rotation_degrees() << endl; 348 out << " min_object_length_long_dim: " << item.get_min_object_length_long_dim() << endl; 349 out << " min_object_length_short_dim: " << item.get_min_object_length_short_dim() << endl; 350 out << " max_object_size: " << item.get_max_object_size() << endl; 351 out << " background_crops_fraction: " << item.get_background_crops_fraction() << endl; 352 out << " translate_amount: " << item.get_translate_amount() << endl; 353 return out; 354 } 355 356 // ---------------------------------------------------------------------------------------- 357 358 } 359 360 #endif // DLIB_RaNDOM_CROPPER_H_ 361 362