1 
2 #include "dlib/data_io.h"
3 #include "dlib/string.h"
4 #include "metadata_editor.h"
5 #include "convert_pascal_xml.h"
6 #include "convert_pascal_v1.h"
7 #include "convert_idl.h"
8 #include "cluster.h"
9 #include "flip_dataset.h"
10 #include <dlib/cmd_line_parser.h>
11 #include <dlib/image_transforms.h>
12 #include <dlib/svm.h>
13 #include <dlib/console_progress_indicator.h>
14 #include <dlib/md5.h>
15 
16 #include <iostream>
17 #include <fstream>
18 #include <string>
19 #include <set>
20 
21 #include <dlib/dir_nav.h>
22 
23 
24 const char* VERSION = "1.17";
25 
26 
27 
28 using namespace std;
29 using namespace dlib;
30 
31 // ----------------------------------------------------------------------------------------
32 
create_new_dataset(const command_line_parser & parser)33 void create_new_dataset (
34     const command_line_parser& parser
35 )
36 {
37     using namespace dlib::image_dataset_metadata;
38 
39     const std::string filename = parser.option("c").argument();
40     // make sure the file exists so we can use the get_parent_directory() command to
41     // figure out it's parent directory.
42     make_empty_file(filename);
43     const std::string parent_dir = get_parent_directory(file(filename));
44 
45     unsigned long depth = 0;
46     if (parser.option("r"))
47         depth = 30;
48 
49     dataset meta;
50     meta.name = "imglab dataset";
51     meta.comment = "Created by imglab tool.";
52     for (unsigned long i = 0; i < parser.number_of_arguments(); ++i)
53     {
54         try
55         {
56             const string temp = strip_path(file(parser[i]), parent_dir);
57             meta.images.push_back(image(temp));
58         }
59         catch (dlib::file::file_not_found&)
60         {
61             // then parser[i] should be a directory
62 
63             std::vector<file> files = get_files_in_directory_tree(parser[i],
64                                                                   match_endings(".png .PNG .jpeg .JPEG .jpg .JPG .bmp .BMP .dng .DNG .gif .GIF"),
65                                                                   depth);
66             sort(files.begin(), files.end());
67 
68             for (unsigned long j = 0; j < files.size(); ++j)
69             {
70                 meta.images.push_back(image(strip_path(files[j], parent_dir)));
71             }
72         }
73     }
74 
75     save_image_dataset_metadata(meta, filename);
76 }
77 
78 // ----------------------------------------------------------------------------------------
79 
split_dataset(const command_line_parser & parser)80 int split_dataset (
81     const command_line_parser& parser
82 )
83 {
84     if (parser.number_of_arguments() != 1)
85     {
86         cerr << "The --split option requires you to give one XML file on the command line." << endl;
87         return EXIT_FAILURE;
88     }
89 
90     const std::string label = parser.option("split").argument();
91 
92     dlib::image_dataset_metadata::dataset data, data_with, data_without;
93     load_image_dataset_metadata(data, parser[0]);
94 
95     data_with.name = data.name;
96     data_with.comment = data.comment;
97     data_without.name = data.name;
98     data_without.comment = data.comment;
99 
100     for (unsigned long i = 0; i < data.images.size(); ++i)
101     {
102         auto&& temp = data.images[i];
103 
104         bool has_the_label = false;
105         // check for the label we are looking for
106         for (unsigned long j = 0; j < temp.boxes.size(); ++j)
107         {
108             if (temp.boxes[j].label == label)
109             {
110                 has_the_label = true;
111                 break;
112             }
113         }
114 
115         if (has_the_label)
116             data_with.images.push_back(temp);
117         else
118             data_without.images.push_back(temp);
119     }
120 
121 
122     save_image_dataset_metadata(data_with, left_substr(parser[0],".") + "_with_"+label + ".xml");
123     save_image_dataset_metadata(data_without, left_substr(parser[0],".") + "_without_"+label + ".xml");
124 
125     return EXIT_SUCCESS;
126 }
127 
128 // ----------------------------------------------------------------------------------------
129 
make_train_test_splits(const command_line_parser & parser)130 int make_train_test_splits (
131     const command_line_parser& parser
132 )
133 {
134     if (parser.number_of_arguments() != 1)
135     {
136         cerr << "The --split-train-test option requires you to give one XML file on the command line." << endl;
137         return EXIT_FAILURE;
138     }
139 
140     const double train_frac = get_option(parser, "split-train-test", 0.5);
141 
142     dlib::image_dataset_metadata::dataset data, data_train, data_test;
143     load_image_dataset_metadata(data, parser[0]);
144 
145     data_train.name = data.name;
146     data_train.comment = data.comment;
147     data_test.name = data.name;
148     data_test.comment = data.comment;
149 
150     const unsigned long num_train_images = static_cast<unsigned long>(std::round(train_frac*data.images.size()));
151 
152     for (unsigned long i = 0; i < data.images.size(); ++i)
153     {
154         if (i < num_train_images)
155             data_train.images.push_back(data.images[i]);
156         else
157             data_test.images.push_back(data.images[i]);
158     }
159 
160     save_image_dataset_metadata(data_train, left_substr(parser[0],".") + "_train.xml");
161     save_image_dataset_metadata(data_test, left_substr(parser[0],".") + "_test.xml");
162 
163     return EXIT_SUCCESS;
164 }
165 
166 // ----------------------------------------------------------------------------------------
167 
print_all_labels(const dlib::image_dataset_metadata::dataset & data)168 void print_all_labels (
169     const dlib::image_dataset_metadata::dataset& data
170 )
171 {
172     std::set<std::string> labels;
173     for (unsigned long i = 0; i < data.images.size(); ++i)
174     {
175         for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
176         {
177             labels.insert(data.images[i].boxes[j].label);
178         }
179     }
180 
181     for (std::set<std::string>::iterator i = labels.begin(); i != labels.end(); ++i)
182     {
183         if (i->size() != 0)
184         {
185             cout << *i << endl;
186         }
187     }
188 }
189 
190 // ----------------------------------------------------------------------------------------
191 
print_all_label_stats(const dlib::image_dataset_metadata::dataset & data)192 void print_all_label_stats (
193     const dlib::image_dataset_metadata::dataset& data
194 )
195 {
196     std::map<std::string, running_stats<double> > area_stats, aspect_ratio;
197     std::map<std::string, int> image_hits;
198     std::set<std::string> labels;
199     unsigned long num_unignored_boxes = 0;
200     for (unsigned long i = 0; i < data.images.size(); ++i)
201     {
202         std::set<std::string> temp;
203         for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
204         {
205             labels.insert(data.images[i].boxes[j].label);
206             temp.insert(data.images[i].boxes[j].label);
207 
208             area_stats[data.images[i].boxes[j].label].add(data.images[i].boxes[j].rect.area());
209             aspect_ratio[data.images[i].boxes[j].label].add(data.images[i].boxes[j].rect.width()/
210                                                     (double)data.images[i].boxes[j].rect.height());
211 
212             if (!data.images[i].boxes[j].ignore)
213                 ++num_unignored_boxes;
214         }
215 
216         // count the number of images for each label
217         for (std::set<std::string>::iterator i = temp.begin(); i != temp.end(); ++i)
218             image_hits[*i] += 1;
219     }
220 
221     cout << "Number of images: "<< data.images.size() << endl;
222     cout << "Number of different labels: "<< labels.size() << endl;
223     cout << "Number of non-ignored boxes: " << num_unignored_boxes << endl << endl;
224 
225     for (std::set<std::string>::iterator i = labels.begin(); i != labels.end(); ++i)
226     {
227         if (i->size() == 0)
228             cout << "Unlabeled Boxes:" << endl;
229         else
230             cout << "Label: "<< *i << endl;
231         cout << "   number of images:      " << image_hits[*i] << endl;
232         cout << "   number of occurrences: " << area_stats[*i].current_n() << endl;
233         cout << "   min box area:    " << area_stats[*i].min() << endl;
234         cout << "   max box area:    " << area_stats[*i].max() << endl;
235         cout << "   mean box area:   " << area_stats[*i].mean() << endl;
236         cout << "   stddev box area: " << area_stats[*i].stddev() << endl;
237         cout << "   mean width/height ratio:   " << aspect_ratio[*i].mean() << endl;
238         cout << "   stddev width/height ratio: " << aspect_ratio[*i].stddev() << endl;
239         cout << endl;
240     }
241 }
242 
243 // ----------------------------------------------------------------------------------------
244 
rename_labels(dlib::image_dataset_metadata::dataset & data,const std::string & from,const std::string & to)245 void rename_labels (
246     dlib::image_dataset_metadata::dataset& data,
247     const std::string& from,
248     const std::string& to
249 )
250 {
251     for (unsigned long i = 0; i < data.images.size(); ++i)
252     {
253         for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
254         {
255             if (data.images[i].boxes[j].label == from)
256                 data.images[i].boxes[j].label = to;
257         }
258     }
259 
260 }
261 
262 // ----------------------------------------------------------------------------------------
263 
ignore_labels(dlib::image_dataset_metadata::dataset & data,const std::string & label)264 void ignore_labels (
265     dlib::image_dataset_metadata::dataset& data,
266     const std::string& label
267 )
268 {
269     for (unsigned long i = 0; i < data.images.size(); ++i)
270     {
271         for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
272         {
273             if (data.images[i].boxes[j].label == label)
274                 data.images[i].boxes[j].ignore = true;
275         }
276     }
277 }
278 
279 // ----------------------------------------------------------------------------------------
280 
merge_metadata_files(const command_line_parser & parser)281 void merge_metadata_files (
282     const command_line_parser& parser
283 )
284 {
285     image_dataset_metadata::dataset src, dest;
286     load_image_dataset_metadata(src, parser.option("add").argument(0));
287     load_image_dataset_metadata(dest, parser.option("add").argument(1));
288 
289     std::map<string,image_dataset_metadata::image> merged_data;
290     for (unsigned long i = 0; i < dest.images.size(); ++i)
291         merged_data[dest.images[i].filename] = dest.images[i];
292     // now add in the src data and overwrite anything if there are duplicate entries.
293     for (unsigned long i = 0; i < src.images.size(); ++i)
294         merged_data[src.images[i].filename] = src.images[i];
295 
296     // copy merged data into dest
297     dest.images.clear();
298     for (std::map<string,image_dataset_metadata::image>::const_iterator i = merged_data.begin();
299         i != merged_data.end(); ++i)
300     {
301         dest.images.push_back(i->second);
302     }
303 
304     save_image_dataset_metadata(dest, "merged.xml");
305 }
306 
307 // ----------------------------------------------------------------------------------------
308 
rotate_dataset(const command_line_parser & parser)309 void rotate_dataset(const command_line_parser& parser)
310 {
311     image_dataset_metadata::dataset metadata;
312     const string datasource = parser[0];
313     load_image_dataset_metadata(metadata,datasource);
314 
315     double angle = get_option(parser, "rotate", 0);
316 
317     // Set the current directory to be the one that contains the
318     // metadata file. We do this because the file might contain
319     // file paths which are relative to this folder.
320     set_current_dir(get_parent_directory(file(datasource)));
321 
322     const string file_prefix = "rotated_"+ cast_to_string(angle) + "_";
323     const string metadata_filename = get_parent_directory(file(datasource)).full_name() +
324         directory::get_separator() + file_prefix + file(datasource).name();
325 
326 
327     array2d<rgb_pixel> img, temp;
328     for (unsigned long i = 0; i < metadata.images.size(); ++i)
329     {
330         file f(metadata.images[i].filename);
331         string filename = get_parent_directory(f).full_name() + directory::get_separator() + file_prefix + to_png_name(f.name());
332 
333         load_image(img, metadata.images[i].filename);
334         const point_transform_affine tran = rotate_image(img, temp, angle*pi/180);
335         if (parser.option("jpg"))
336         {
337             filename = to_jpg_name(filename);
338             save_jpeg(temp, filename,JPEG_QUALITY);
339         }
340         else
341         {
342             save_png(temp, filename);
343         }
344 
345         rectangle_transform rtran = tran;
346         for (unsigned long j = 0; j < metadata.images[i].boxes.size(); ++j)
347         {
348             metadata.images[i].boxes[j].rect = rtran(metadata.images[i].boxes[j].rect);
349 
350             for (auto& p : metadata.images[i].boxes[j].parts)
351                 p.second = tran(p.second);
352         }
353 
354         metadata.images[i].filename = filename;
355     }
356 
357     save_image_dataset_metadata(metadata, metadata_filename);
358 }
359 
360 // ----------------------------------------------------------------------------------------
361 
resample_dataset(const command_line_parser & parser)362 int resample_dataset(const command_line_parser& parser)
363 {
364     if (parser.number_of_arguments() != 1)
365     {
366         cerr << "The --resample option requires you to give one XML file on the command line." << endl;
367         return EXIT_FAILURE;
368     }
369 
370     const size_t obj_size = get_option(parser,"cropped-object-size",100*100);
371     const double margin_scale =  get_option(parser,"crop-size",2.5); // cropped image will be this times wider than the object.
372     const unsigned long min_object_size = get_option(parser,"min-object-size",1);
373     const bool one_object_per_image = parser.option("one-object-per-image");
374 
375     dlib::image_dataset_metadata::dataset data, resampled_data;
376     std::ostringstream sout;
377     sout << "\nThe --resample parameters which generated this dataset were:" << endl;
378     sout << "   cropped-object-size: "<< obj_size << endl;
379     sout << "   crop-size: "<< margin_scale << endl;
380     sout << "   min-object-size: "<< min_object_size << endl;
381     if (one_object_per_image)
382         sout << "   one_object_per_image: true" << endl;
383     resampled_data.comment = data.comment + sout.str();
384     resampled_data.name = data.name + " RESAMPLED";
385 
386     load_image_dataset_metadata(data, parser[0]);
387     locally_change_current_dir chdir(get_parent_directory(file(parser[0])));
388     dlib::rand rnd;
389 
390     const size_t image_size = std::round(std::sqrt(obj_size*margin_scale*margin_scale));
391     const chip_dims cdims(image_size, image_size);
392 
393     console_progress_indicator pbar(data.images.size());
394     for (unsigned long i = 0; i < data.images.size(); ++i)
395     {
396         // don't even bother loading images that don't have objects.
397         if (data.images[i].boxes.size() == 0)
398             continue;
399 
400         pbar.print_status(i);
401         array2d<rgb_pixel> img, chip;
402         load_image(img, data.images[i].filename);
403 
404 
405         // figure out what chips we want to take from this image
406         for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
407         {
408             const rectangle rect = data.images[i].boxes[j].rect;
409             if (data.images[i].boxes[j].ignore || rect.area() < min_object_size)
410                 continue;
411 
412             const auto max_dim = std::max(rect.width(), rect.height());
413 
414             const double rand_scale_perturb = 1 - 0.3*(rnd.get_random_double()-0.5);
415             const rectangle crop_rect = centered_rect(rect, max_dim*margin_scale*rand_scale_perturb, max_dim*margin_scale*rand_scale_perturb);
416 
417             const rectangle_transform tform = get_mapping_to_chip(chip_details(crop_rect, cdims));
418             extract_image_chip(img, chip_details(crop_rect, cdims), chip);
419 
420             image_dataset_metadata::image dimg;
421             // Now transform the boxes to the crop and also mark them as ignored if they
422             // have already been cropped out or are outside the crop.
423             for (size_t k = 0; k < data.images[i].boxes.size(); ++k)
424             {
425                 image_dataset_metadata::box box = data.images[i].boxes[k];
426                 // ignore boxes outside the cropped image
427                 if (crop_rect.intersect(box.rect).area() == 0)
428                     continue;
429 
430                 // mark boxes we include in the crop as ignored.  Also mark boxes that
431                 // aren't totally within the crop as ignored.
432                 if (crop_rect.contains(grow_rect(box.rect,10)) && (!one_object_per_image || k==j))
433                     data.images[i].boxes[k].ignore = true;
434                 else
435                     box.ignore = true;
436 
437                 if (box.rect.area() < min_object_size)
438                     box.ignore = true;
439 
440                 box.rect = tform(box.rect);
441                 for (auto&& p : box.parts)
442                     p.second = tform.get_tform()(p.second);
443                 dimg.boxes.push_back(box);
444             }
445             // Put a 64bit hash of the image data into the name to make sure there are no
446             // file name conflicts.
447             std::ostringstream sout;
448             sout << hex << murmur_hash3_128bit(&chip[0][0], chip.size()*sizeof(chip[0][0])).second;
449             dimg.filename = data.images[i].filename + "_RESAMPLED_"+sout.str()+".png";
450 
451             if (parser.option("jpg"))
452             {
453                 dimg.filename = to_jpg_name(dimg.filename);
454                 save_jpeg(chip,dimg.filename, JPEG_QUALITY);
455             }
456             else
457             {
458                 save_png(chip,dimg.filename);
459             }
460             resampled_data.images.push_back(dimg);
461         }
462     }
463 
464     save_image_dataset_metadata(resampled_data, parser[0] + ".RESAMPLED.xml");
465 
466     return EXIT_SUCCESS;
467 }
468 
469 // ----------------------------------------------------------------------------------------
470 
tile_dataset(const command_line_parser & parser)471 int tile_dataset(const command_line_parser& parser)
472 {
473     if (parser.number_of_arguments() != 1)
474     {
475         cerr << "The --tile option requires you to give one XML file on the command line." << endl;
476         return EXIT_FAILURE;
477     }
478 
479     string out_image = parser.option("tile").argument();
480     string ext = right_substr(out_image,".");
481     if (ext != "png" && ext != "jpg")
482     {
483         cerr << "The output image file must have either .png or .jpg extension." << endl;
484         return EXIT_FAILURE;
485     }
486 
487     const unsigned long chip_size = get_option(parser, "size", 8000);
488 
489     dlib::image_dataset_metadata::dataset data;
490     load_image_dataset_metadata(data, parser[0]);
491     locally_change_current_dir chdir(get_parent_directory(file(parser[0])));
492     dlib::array<array2d<rgb_pixel> > images;
493     console_progress_indicator pbar(data.images.size());
494     for (unsigned long i = 0; i < data.images.size(); ++i)
495     {
496         // don't even bother loading images that don't have objects.
497         if (data.images[i].boxes.size() == 0)
498             continue;
499 
500         pbar.print_status(i);
501         array2d<rgb_pixel> img;
502         load_image(img, data.images[i].filename);
503 
504         // figure out what chips we want to take from this image
505         std::vector<chip_details> dets;
506         for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
507         {
508             if (data.images[i].boxes[j].ignore)
509                 continue;
510 
511             rectangle rect = data.images[i].boxes[j].rect;
512             dets.push_back(chip_details(rect, chip_size));
513         }
514         // Now grab all those chips at once.
515         dlib::array<array2d<rgb_pixel> > chips;
516         extract_image_chips(img, dets, chips);
517         // and put the chips into the output.
518         for (unsigned long j = 0; j < chips.size(); ++j)
519             images.push_back(chips[j]);
520     }
521 
522     chdir.revert();
523 
524     if (ext == "png")
525         save_png(tile_images(images), out_image);
526     else
527         save_jpeg(tile_images(images), out_image);
528 
529     return EXIT_SUCCESS;
530 }
531 
532 
533 // ----------------------------------------------------------------------------------------
534 
main(int argc,char ** argv)535 int main(int argc, char** argv)
536 {
537     try
538     {
539 
540         command_line_parser parser;
541 
542         parser.add_option("h","Displays this information.");
543         parser.add_option("v","Display version.");
544 
545         parser.set_group_name("Creating XML files");
546         parser.add_option("c","Create an XML file named <arg> listing a set of images.",1);
547         parser.add_option("r","Search directories recursively for images.");
548         parser.add_option("convert","Convert foreign image Annotations from <arg> format to the imglab format. "
549                           "Supported formats: pascal-xml, pascal-v1, idl.",1);
550 
551         parser.set_group_name("Viewing XML files");
552         parser.add_option("tile","Chip out all the objects and save them as one big image called <arg>.",1);
553         parser.add_option("size","When using --tile or --cluster, make each extracted object contain "
554                                  "about <arg> pixels (default 8000).",1);
555         parser.add_option("l","List all the labels in the given XML file.");
556         parser.add_option("stats","List detailed statistics on the object labels in the given XML file.");
557         parser.add_option("files","List all the files in the given XML file.");
558 
559         parser.set_group_name("Editing/Transforming XML datasets");
560         parser.add_option("rename", "Rename all labels of <arg1> to <arg2>.",2);
561         parser.add_option("parts","The display will allow image parts to be labeled.  The set of allowable parts "
562                           "is defined by <arg> which should be a space separated list of parts.",1);
563         parser.add_option("rmempty","Remove all images that don't contain non-ignored annotations and save the results to a new XML file.");
564         parser.add_option("rmdupes","Remove duplicate images from the dataset.  This is done by comparing "
565                                     "the md5 hash of each image file and removing duplicate images. " );
566         parser.add_option("rmdiff","Set the ignored flag to true for boxes marked as difficult.");
567         parser.add_option("rmtrunc","Set the ignored flag to true for boxes that are partially outside the image.");
568         parser.add_option("box-images","Add a box to each image that contains the entire image.");
569         parser.add_option("sort-num-objects","Sort the images listed an XML file so images with many objects are listed first.");
570         parser.add_option("sort","Alphabetically sort the images in an XML file.");
571         parser.add_option("shuffle","Randomly shuffle the order of the images listed in an XML file.");
572         parser.add_option("seed", "When using --shuffle, set the random seed to the string <arg>.",1);
573         parser.add_option("split", "Split the contents of an XML file into two separate files.  One containing the "
574             "images with objects labeled <arg> and another file with all the other images. ",1);
575         parser.add_option("split-train-test", "Split the contents of an XML file into two separate files.  A training "
576             "file containing <arg> fraction of the images and a testing file containing the remaining (1-<arg>) images. "
577             "The partitioning is done deterministically by putting the first images in the input xml file into the training split "
578             "and the later images into the test split.",1);
579         parser.add_option("add", "Add the image metadata from <arg1> into <arg2>.  If any of the image "
580                                  "tags are in both files then the ones in <arg2> are deleted and replaced with the "
581                                  "image tags from <arg1>.  The results are saved into merged.xml and neither <arg1> or "
582                                  "<arg2> files are modified.",2);
583         parser.add_option("flip", "Read an XML image dataset from the <arg> XML file and output a left-right flipped "
584                                   "version of the dataset and an accompanying flipped XML file named flipped_<arg>. "
585                                   "We also adjust object part labels after flipping so that the new flipped dataset "
586                                   "has the same average part layout as the source dataset." ,1);
587         parser.add_option("flip-basic", "This option is just like --flip, except we don't adjust any object part labels after flipping. "
588                                         "The parts are instead simply mirrored to the flipped dataset.", 1);
589         parser.add_option("rotate", "Read an XML image dataset and output a copy that is rotated counter clockwise by <arg> degrees. "
590                                   "The output is saved to an XML file prefixed with rotated_<arg>.",1);
591         parser.add_option("cluster", "Cluster all the objects in an XML file into <arg> different clusters (pass 0 to find automatically) and save "
592                                      "the results as cluster_###.xml and cluster_###.jpg files.",1);
593         parser.add_option("ignore", "Mark boxes labeled as <arg> as ignored.  The resulting XML file is output as a separate file and the original is not modified.",1);
594         parser.add_option("rmlabel","Remove all boxes labeled <arg> and save the results to a new XML file.",1);
595         parser.add_option("rm-other-labels","Remove all boxes not labeled <arg> and save the results to a new XML file.",1);
596         parser.add_option("rmignore","Remove all boxes marked ignore and save the results to a new XML file.");
597         parser.add_option("rm-if-overlaps","Remove all boxes labeled <arg> if they overlap any box not labeled <arg> and save the results to a new XML file.",1);
598         parser.add_option("jpg", "When saving images to disk, write them as jpg files instead of png.");
599 
600         parser.set_group_name("Cropping sub images");
601         parser.add_option("resample", "Crop out images that are centered on each object in the dataset. "
602                                       "The output is a new XML dataset.");
603         parser.add_option("cropped-object-size", "When doing --resample, make the cropped objects contain about <arg> pixels (default 10000).",1);
604         parser.add_option("min-object-size", "When doing --resample, skip objects that have fewer than <arg> pixels in them (default 1).",1);
605         parser.add_option("crop-size", "When doing --resample, the entire cropped image will be <arg> times wider than the object (default 2.5).",1);
606         parser.add_option("one-object-per-image", "When doing --resample, only include one non-ignored object per image (i.e. the central object).");
607 
608 
609 
610         parser.parse(argc, argv);
611 
612         const char* singles[] = {"h","c","r","l","files","convert","parts","rmdiff", "rmtrunc", "rmdupes", "seed", "shuffle", "split", "add",
613                                  "flip-basic", "flip", "rotate", "tile", "size", "cluster", "resample", "min-object-size", "rmempty",
614                                  "crop-size", "cropped-object-size", "rmlabel", "rm-other-labels", "rm-if-overlaps", "sort-num-objects",
615                                  "one-object-per-image", "jpg", "rmignore", "sort", "split-train-test", "box-images"};
616         parser.check_one_time_options(singles);
617         const char* c_sub_ops[] = {"r", "convert"};
618         parser.check_sub_options("c", c_sub_ops);
619         parser.check_sub_option("shuffle", "seed");
620         const char* resample_sub_ops[] = {"min-object-size", "crop-size", "cropped-object-size", "one-object-per-image"};
621         parser.check_sub_options("resample", resample_sub_ops);
622         const char* size_parent_ops[] = {"tile", "cluster"};
623         parser.check_sub_options(size_parent_ops, "size");
624         parser.check_incompatible_options("c", "l");
625         parser.check_incompatible_options("c", "files");
626         parser.check_incompatible_options("c", "rmdiff");
627         parser.check_incompatible_options("c", "rmempty");
628         parser.check_incompatible_options("c", "rmlabel");
629         parser.check_incompatible_options("c", "rm-other-labels");
630         parser.check_incompatible_options("c", "rmignore");
631         parser.check_incompatible_options("c", "rm-if-overlaps");
632         parser.check_incompatible_options("c", "rmdupes");
633         parser.check_incompatible_options("c", "rmtrunc");
634         parser.check_incompatible_options("c", "box-images");
635         parser.check_incompatible_options("c", "add");
636         parser.check_incompatible_options("c", "flip");
637         parser.check_incompatible_options("c", "flip-basic");
638         parser.check_incompatible_options("flip", "flip-basic");
639         parser.check_incompatible_options("c", "rotate");
640         parser.check_incompatible_options("c", "rename");
641         parser.check_incompatible_options("c", "ignore");
642         parser.check_incompatible_options("c", "parts");
643         parser.check_incompatible_options("c", "tile");
644         parser.check_incompatible_options("c", "cluster");
645         parser.check_incompatible_options("c", "resample");
646         parser.check_incompatible_options("l", "rename");
647         parser.check_incompatible_options("l", "ignore");
648         parser.check_incompatible_options("l", "add");
649         parser.check_incompatible_options("l", "parts");
650         parser.check_incompatible_options("l", "flip");
651         parser.check_incompatible_options("l", "flip-basic");
652         parser.check_incompatible_options("l", "rotate");
653         parser.check_incompatible_options("files", "rename");
654         parser.check_incompatible_options("files", "ignore");
655         parser.check_incompatible_options("files", "add");
656         parser.check_incompatible_options("files", "parts");
657         parser.check_incompatible_options("files", "flip");
658         parser.check_incompatible_options("files", "flip-basic");
659         parser.check_incompatible_options("files", "rotate");
660         parser.check_incompatible_options("add", "flip");
661         parser.check_incompatible_options("add", "flip-basic");
662         parser.check_incompatible_options("add", "rotate");
663         parser.check_incompatible_options("add", "tile");
664         parser.check_incompatible_options("flip", "tile");
665         parser.check_incompatible_options("flip-basic", "tile");
666         parser.check_incompatible_options("rotate", "tile");
667         parser.check_incompatible_options("cluster", "tile");
668         parser.check_incompatible_options("resample", "tile");
669         parser.check_incompatible_options("flip", "cluster");
670         parser.check_incompatible_options("flip-basic", "cluster");
671         parser.check_incompatible_options("rotate", "cluster");
672         parser.check_incompatible_options("add", "cluster");
673         parser.check_incompatible_options("flip", "resample");
674         parser.check_incompatible_options("flip-basic", "resample");
675         parser.check_incompatible_options("rotate", "resample");
676         parser.check_incompatible_options("add", "resample");
677         parser.check_incompatible_options("shuffle", "tile");
678         parser.check_incompatible_options("sort-num-objects", "tile");
679         parser.check_incompatible_options("sort", "tile");
680         parser.check_incompatible_options("convert", "l");
681         parser.check_incompatible_options("convert", "files");
682         parser.check_incompatible_options("convert", "rename");
683         parser.check_incompatible_options("convert", "ignore");
684         parser.check_incompatible_options("convert", "parts");
685         parser.check_incompatible_options("convert", "cluster");
686         parser.check_incompatible_options("convert", "resample");
687         parser.check_incompatible_options("rmdiff", "rename");
688         parser.check_incompatible_options("rmdiff", "ignore");
689         parser.check_incompatible_options("rmempty", "ignore");
690         parser.check_incompatible_options("rmempty", "rename");
691         parser.check_incompatible_options("rmlabel", "ignore");
692         parser.check_incompatible_options("rmlabel", "rename");
693         parser.check_incompatible_options("rm-other-labels", "ignore");
694         parser.check_incompatible_options("rm-other-labels", "rename");
695         parser.check_incompatible_options("rmignore", "ignore");
696         parser.check_incompatible_options("rmignore", "rename");
697         parser.check_incompatible_options("rm-if-overlaps", "ignore");
698         parser.check_incompatible_options("rm-if-overlaps", "rename");
699         parser.check_incompatible_options("rmdupes", "rename");
700         parser.check_incompatible_options("rmdupes", "ignore");
701         parser.check_incompatible_options("rmtrunc", "rename");
702         parser.check_incompatible_options("rmtrunc", "ignore");
703         parser.check_incompatible_options("box-images", "rename");
704         parser.check_incompatible_options("box-images", "ignore");
705         const char* convert_args[] = {"pascal-xml","pascal-v1","idl"};
706         parser.check_option_arg_range("convert", convert_args);
707         parser.check_option_arg_range("cluster", 0, 999);
708         parser.check_option_arg_range("rotate", -360, 360);
709         parser.check_option_arg_range("size", 10*10, 1000*1000);
710         parser.check_option_arg_range("min-object-size", 1, 10000*10000);
711         parser.check_option_arg_range("cropped-object-size", 4, 10000*10000);
712         parser.check_option_arg_range("crop-size", 1.0, 100.0);
713         parser.check_option_arg_range("split-train-test", 0.0, 1.0);
714 
715         if (parser.option("h"))
716         {
717             cout << "Usage: imglab [options] <image files/directories or XML file>\n";
718             parser.print_options(cout);
719             cout << endl << endl;
720             return EXIT_SUCCESS;
721         }
722 
723         if (parser.option("add"))
724         {
725             merge_metadata_files(parser);
726             return EXIT_SUCCESS;
727         }
728 
729         if (parser.option("flip") || parser.option("flip-basic"))
730         {
731             flip_dataset(parser);
732             return EXIT_SUCCESS;
733         }
734 
735         if (parser.option("rotate"))
736         {
737             rotate_dataset(parser);
738             return EXIT_SUCCESS;
739         }
740 
741         if (parser.option("v"))
742         {
743             cout << "imglab v" << VERSION
744                  << "\nCompiled: " << __TIME__ << " " << __DATE__
745                  << "\nWritten by Davis King\n";
746             cout << "Check for updates at http://dlib.net\n\n";
747             return EXIT_SUCCESS;
748         }
749 
750         if (parser.option("tile"))
751         {
752             return tile_dataset(parser);
753         }
754 
755         if (parser.option("cluster"))
756         {
757             return cluster_dataset(parser);
758         }
759 
760         if (parser.option("resample"))
761         {
762             return resample_dataset(parser);
763         }
764 
765         if (parser.option("c"))
766         {
767             if (parser.option("convert"))
768             {
769                 if (parser.option("convert").argument() == "pascal-xml")
770                     convert_pascal_xml(parser);
771                 else if (parser.option("convert").argument() == "pascal-v1")
772                     convert_pascal_v1(parser);
773                 else if (parser.option("convert").argument() == "idl")
774                     convert_idl(parser);
775             }
776             else
777             {
778                 create_new_dataset(parser);
779             }
780             return EXIT_SUCCESS;
781         }
782 
783         if (parser.option("rmdiff"))
784         {
785             if (parser.number_of_arguments() != 1)
786             {
787                 cerr << "The --rmdiff option requires you to give one XML file on the command line." << endl;
788                 return EXIT_FAILURE;
789             }
790 
791             dlib::image_dataset_metadata::dataset data;
792             load_image_dataset_metadata(data, parser[0]);
793             for (unsigned long i = 0; i < data.images.size(); ++i)
794             {
795                 for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
796                 {
797                     if (data.images[i].boxes[j].difficult)
798                         data.images[i].boxes[j].ignore = true;
799                 }
800             }
801             save_image_dataset_metadata(data, parser[0]);
802             return EXIT_SUCCESS;
803         }
804 
805         if (parser.option("rmempty"))
806         {
807             if (parser.number_of_arguments() != 1)
808             {
809                 cerr << "The --rmempty option requires you to give one XML file on the command line." << endl;
810                 return EXIT_FAILURE;
811             }
812 
813             dlib::image_dataset_metadata::dataset data, data2;
814             load_image_dataset_metadata(data, parser[0]);
815 
816             data2 = data;
817             data2.images.clear();
818             for (unsigned long i = 0; i < data.images.size(); ++i)
819             {
820                 bool has_label = false;
821                 for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
822                 {
823                     if (!data.images[i].boxes[j].ignore)
824                         has_label = true;
825                 }
826                 if (has_label)
827                     data2.images.push_back(data.images[i]);
828             }
829             save_image_dataset_metadata(data2, parser[0] + ".rmempty.xml");
830             return EXIT_SUCCESS;
831         }
832 
833         if (parser.option("rmlabel"))
834         {
835             if (parser.number_of_arguments() != 1)
836             {
837                 cerr << "The --rmlabel option requires you to give one XML file on the command line." << endl;
838                 return EXIT_FAILURE;
839             }
840 
841             dlib::image_dataset_metadata::dataset data;
842             load_image_dataset_metadata(data, parser[0]);
843 
844             const auto label = parser.option("rmlabel").argument();
845 
846             for (auto&& img : data.images)
847             {
848                 std::vector<dlib::image_dataset_metadata::box> boxes;
849                 for (auto&& b : img.boxes)
850                 {
851                     if (b.label != label)
852                         boxes.push_back(b);
853                 }
854                 img.boxes = boxes;
855             }
856 
857             save_image_dataset_metadata(data, parser[0] + ".rmlabel-"+label+".xml");
858             return EXIT_SUCCESS;
859         }
860 
861         if (parser.option("rm-other-labels"))
862         {
863             if (parser.number_of_arguments() != 1)
864             {
865                 cerr << "The --rm-other-labels option requires you to give one XML file on the command line." << endl;
866                 return EXIT_FAILURE;
867             }
868 
869             dlib::image_dataset_metadata::dataset data;
870             load_image_dataset_metadata(data, parser[0]);
871 
872             const auto labels = parser.option("rm-other-labels").argument();
873             // replace comma by dash to form the file name
874             std::string strlabels = labels;
875             std::replace(strlabels.begin(), strlabels.end(), ',', '-');
876             std::vector<string> all_labels = split(labels, ",");
877             for (auto&& img : data.images)
878             {
879                 std::vector<dlib::image_dataset_metadata::box> boxes;
880                 for (auto&& b : img.boxes)
881                 {
882                     if (std::find(all_labels.begin(), all_labels.end(), b.label) != all_labels.end())
883                         boxes.push_back(b);
884                 }
885                 img.boxes = boxes;
886             }
887 
888             save_image_dataset_metadata(data, parser[0] + ".rm-other-labels-"+ strlabels +".xml");
889             return EXIT_SUCCESS;
890         }
891 
892         if (parser.option("rmignore"))
893         {
894             if (parser.number_of_arguments() != 1)
895             {
896                 cerr << "The --rmignore option requires you to give one XML file on the command line." << endl;
897                 return EXIT_FAILURE;
898             }
899 
900             dlib::image_dataset_metadata::dataset data;
901             load_image_dataset_metadata(data, parser[0]);
902 
903             for (auto&& img : data.images)
904             {
905                 std::vector<dlib::image_dataset_metadata::box> boxes;
906                 for (auto&& b : img.boxes)
907                 {
908                     if (!b.ignore)
909                         boxes.push_back(b);
910                 }
911                 img.boxes = boxes;
912             }
913 
914             save_image_dataset_metadata(data, parser[0] + ".rmignore.xml");
915             return EXIT_SUCCESS;
916         }
917 
918         if (parser.option("rm-if-overlaps"))
919         {
920             if (parser.number_of_arguments() != 1)
921             {
922                 cerr << "The --rm-if-overlaps option requires you to give one XML file on the command line." << endl;
923                 return EXIT_FAILURE;
924             }
925 
926             dlib::image_dataset_metadata::dataset data;
927             load_image_dataset_metadata(data, parser[0]);
928 
929             const auto label = parser.option("rm-if-overlaps").argument();
930 
931             test_box_overlap overlaps(0.5);
932 
933             for (auto&& img : data.images)
934             {
935                 std::vector<dlib::image_dataset_metadata::box> boxes;
936                 for (auto&& b : img.boxes)
937                 {
938                     if (b.label != label)
939                     {
940                         boxes.push_back(b);
941                     }
942                     else
943                     {
944                         bool has_overlap = false;
945                         for (auto&& b2 : img.boxes)
946                         {
947                             if (b2.label != label && overlaps(b2.rect, b.rect))
948                             {
949                                 has_overlap = true;
950                                 break;
951                             }
952                         }
953                         if (!has_overlap)
954                             boxes.push_back(b);
955                     }
956                 }
957                 img.boxes = boxes;
958             }
959 
960             save_image_dataset_metadata(data, parser[0] + ".rm-if-overlaps-"+label+".xml");
961             return EXIT_SUCCESS;
962         }
963 
964         if (parser.option("rmdupes"))
965         {
966             if (parser.number_of_arguments() != 1)
967             {
968                 cerr << "The --rmdupes option requires you to give one XML file on the command line." << endl;
969                 return EXIT_FAILURE;
970             }
971 
972             dlib::image_dataset_metadata::dataset data, data_out;
973             std::set<std::string> hashes;
974             load_image_dataset_metadata(data, parser[0]);
975             data_out = data;
976             data_out.images.clear();
977 
978             for (unsigned long i = 0; i < data.images.size(); ++i)
979             {
980                 ifstream fin(data.images[i].filename.c_str(), ios::binary);
981                 string hash = md5(fin);
982                 if (hashes.count(hash) == 0)
983                 {
984                     hashes.insert(hash);
985                     data_out.images.push_back(data.images[i]);
986                 }
987             }
988             save_image_dataset_metadata(data_out, parser[0]);
989             return EXIT_SUCCESS;
990         }
991 
992         if (parser.option("box-images"))
993         {
994             if (parser.number_of_arguments() != 1)
995             {
996                 cerr << "The --box-images option requires you to give one XML file on the command line." << endl;
997                 return EXIT_FAILURE;
998             }
999 
1000             dlib::image_dataset_metadata::dataset   data;
1001             load_image_dataset_metadata(data, parser[0]);
1002             {
1003                 locally_change_current_dir chdir(get_parent_directory(file(parser[0])));
1004                 parallel_for(0, data.images.size(), [&](long i)
1005                 {
1006                     array2d<unsigned char> img;
1007                     load_image(img, data.images[i].filename);
1008                     data.images[i].boxes.emplace_back(get_rect(img));
1009                 });
1010             }
1011             save_image_dataset_metadata(data, parser[0]+".boxed.xml");
1012             return EXIT_SUCCESS;
1013         }
1014 
1015         if (parser.option("rmtrunc"))
1016         {
1017             if (parser.number_of_arguments() != 1)
1018             {
1019                 cerr << "The --rmtrunc option requires you to give one XML file on the command line." << endl;
1020                 return EXIT_FAILURE;
1021             }
1022 
1023             dlib::image_dataset_metadata::dataset data;
1024             load_image_dataset_metadata(data, parser[0]);
1025             {
1026                 locally_change_current_dir chdir(get_parent_directory(file(parser[0])));
1027                 for (unsigned long i = 0; i < data.images.size(); ++i)
1028                 {
1029                     array2d<unsigned char> img;
1030                     load_image(img, data.images[i].filename);
1031                     const rectangle area = get_rect(img);
1032                     for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
1033                     {
1034                         if (!area.contains(data.images[i].boxes[j].rect))
1035                             data.images[i].boxes[j].ignore = true;
1036                     }
1037                 }
1038             }
1039             save_image_dataset_metadata(data, parser[0]);
1040             return EXIT_SUCCESS;
1041         }
1042 
1043         if (parser.option("l"))
1044         {
1045             if (parser.number_of_arguments() != 1)
1046             {
1047                 cerr << "The -l option requires you to give one XML file on the command line." << endl;
1048                 return EXIT_FAILURE;
1049             }
1050 
1051             dlib::image_dataset_metadata::dataset data;
1052             load_image_dataset_metadata(data, parser[0]);
1053             print_all_labels(data);
1054             return EXIT_SUCCESS;
1055         }
1056 
1057         if (parser.option("files"))
1058         {
1059             if (parser.number_of_arguments() != 1)
1060             {
1061                 cerr << "The --files option requires you to give one XML file on the command line." << endl;
1062                 return EXIT_FAILURE;
1063             }
1064 
1065             dlib::image_dataset_metadata::dataset data;
1066             load_image_dataset_metadata(data, parser[0]);
1067             for (size_t i = 0; i < data.images.size(); ++i)
1068                 cout << data.images[i].filename << "\n";
1069             return EXIT_SUCCESS;
1070         }
1071 
1072         if (parser.option("split"))
1073         {
1074             return split_dataset(parser);
1075         }
1076 
1077         if (parser.option("split-train-test"))
1078         {
1079             return make_train_test_splits(parser);
1080         }
1081 
1082         if (parser.option("shuffle"))
1083         {
1084             if (parser.number_of_arguments() != 1)
1085             {
1086                 cerr << "The --shuffle option requires you to give one XML file on the command line." << endl;
1087                 return EXIT_FAILURE;
1088             }
1089 
1090             dlib::image_dataset_metadata::dataset data;
1091             load_image_dataset_metadata(data, parser[0]);
1092             const string default_seed = cast_to_string(time(0));
1093             const string seed = get_option(parser, "seed", default_seed);
1094             dlib::rand rnd(seed);
1095             randomize_samples(data.images, rnd);
1096             save_image_dataset_metadata(data, parser[0]);
1097             return EXIT_SUCCESS;
1098         }
1099 
1100         if (parser.option("sort-num-objects"))
1101         {
1102             if (parser.number_of_arguments() != 1)
1103             {
1104                 cerr << "The --sort-num-objects option requires you to give one XML file on the command line." << endl;
1105                 return EXIT_FAILURE;
1106             }
1107 
1108             dlib::image_dataset_metadata::dataset data;
1109             load_image_dataset_metadata(data, parser[0]);
1110             std::sort(data.images.rbegin(),  data.images.rend(),
1111                 [](const image_dataset_metadata::image& a, const image_dataset_metadata::image& b) { return a.boxes.size() < b.boxes.size(); });
1112             save_image_dataset_metadata(data, parser[0]);
1113             return EXIT_SUCCESS;
1114         }
1115 
1116         if (parser.option("sort"))
1117         {
1118             if (parser.number_of_arguments() != 1)
1119             {
1120                 cerr << "The --sort option requires you to give one XML file on the command line." << endl;
1121                 return EXIT_FAILURE;
1122             }
1123 
1124             dlib::image_dataset_metadata::dataset data;
1125             load_image_dataset_metadata(data, parser[0]);
1126             std::sort(data.images.begin(),  data.images.end(),
1127                 [](const image_dataset_metadata::image& a, const image_dataset_metadata::image& b) { return a.filename < b.filename; });
1128             save_image_dataset_metadata(data, parser[0]);
1129             return EXIT_SUCCESS;
1130         }
1131 
1132         if (parser.option("stats"))
1133         {
1134             if (parser.number_of_arguments() != 1)
1135             {
1136                 cerr << "The --stats option requires you to give one XML file on the command line." << endl;
1137                 return EXIT_FAILURE;
1138             }
1139 
1140             dlib::image_dataset_metadata::dataset data;
1141             load_image_dataset_metadata(data, parser[0]);
1142             print_all_label_stats(data);
1143             return EXIT_SUCCESS;
1144         }
1145 
1146         if (parser.option("rename"))
1147         {
1148             if (parser.number_of_arguments() != 1)
1149             {
1150                 cerr << "The --rename option requires you to give one XML file on the command line." << endl;
1151                 return EXIT_FAILURE;
1152             }
1153 
1154             dlib::image_dataset_metadata::dataset data;
1155             load_image_dataset_metadata(data, parser[0]);
1156             for (unsigned long i = 0; i < parser.option("rename").count(); ++i)
1157             {
1158                 rename_labels(data, parser.option("rename").argument(0,i), parser.option("rename").argument(1,i));
1159             }
1160             save_image_dataset_metadata(data, parser[0]);
1161             return EXIT_SUCCESS;
1162         }
1163 
1164         if (parser.option("ignore"))
1165         {
1166             if (parser.number_of_arguments() != 1)
1167             {
1168                 cerr << "The --ignore option requires you to give one XML file on the command line." << endl;
1169                 return EXIT_FAILURE;
1170             }
1171 
1172             dlib::image_dataset_metadata::dataset data;
1173             load_image_dataset_metadata(data, parser[0]);
1174             for (unsigned long i = 0; i < parser.option("ignore").count(); ++i)
1175             {
1176                 ignore_labels(data, parser.option("ignore").argument());
1177             }
1178             save_image_dataset_metadata(data, parser[0]+".ignored.xml");
1179             return EXIT_SUCCESS;
1180         }
1181 
1182         if (parser.number_of_arguments() == 1)
1183         {
1184             metadata_editor editor(parser[0]);
1185             if (parser.option("parts"))
1186             {
1187                 std::vector<string> parts = split(parser.option("parts").argument());
1188                 for (unsigned long i = 0; i < parts.size(); ++i)
1189                 {
1190                     editor.add_labelable_part_name(parts[i]);
1191                 }
1192             }
1193             editor.wait_until_closed();
1194             return EXIT_SUCCESS;
1195         }
1196 
1197         cout << "Invalid command, give -h to see options." << endl;
1198         return EXIT_FAILURE;
1199     }
1200     catch (exception& e)
1201     {
1202         cerr << e.what() << endl;
1203         return EXIT_FAILURE;
1204     }
1205 }
1206 
1207 // ----------------------------------------------------------------------------------------
1208 
1209