1
2 #include "dlib/data_io.h"
3 #include "dlib/string.h"
4 #include "metadata_editor.h"
5 #include "convert_pascal_xml.h"
6 #include "convert_pascal_v1.h"
7 #include "convert_idl.h"
8 #include "cluster.h"
9 #include "flip_dataset.h"
10 #include <dlib/cmd_line_parser.h>
11 #include <dlib/image_transforms.h>
12 #include <dlib/svm.h>
13 #include <dlib/console_progress_indicator.h>
14 #include <dlib/md5.h>
15
16 #include <iostream>
17 #include <fstream>
18 #include <string>
19 #include <set>
20
21 #include <dlib/dir_nav.h>
22
23
24 const char* VERSION = "1.17";
25
26
27
28 using namespace std;
29 using namespace dlib;
30
31 // ----------------------------------------------------------------------------------------
32
create_new_dataset(const command_line_parser & parser)33 void create_new_dataset (
34 const command_line_parser& parser
35 )
36 {
37 using namespace dlib::image_dataset_metadata;
38
39 const std::string filename = parser.option("c").argument();
40 // make sure the file exists so we can use the get_parent_directory() command to
41 // figure out it's parent directory.
42 make_empty_file(filename);
43 const std::string parent_dir = get_parent_directory(file(filename));
44
45 unsigned long depth = 0;
46 if (parser.option("r"))
47 depth = 30;
48
49 dataset meta;
50 meta.name = "imglab dataset";
51 meta.comment = "Created by imglab tool.";
52 for (unsigned long i = 0; i < parser.number_of_arguments(); ++i)
53 {
54 try
55 {
56 const string temp = strip_path(file(parser[i]), parent_dir);
57 meta.images.push_back(image(temp));
58 }
59 catch (dlib::file::file_not_found&)
60 {
61 // then parser[i] should be a directory
62
63 std::vector<file> files = get_files_in_directory_tree(parser[i],
64 match_endings(".png .PNG .jpeg .JPEG .jpg .JPG .bmp .BMP .dng .DNG .gif .GIF"),
65 depth);
66 sort(files.begin(), files.end());
67
68 for (unsigned long j = 0; j < files.size(); ++j)
69 {
70 meta.images.push_back(image(strip_path(files[j], parent_dir)));
71 }
72 }
73 }
74
75 save_image_dataset_metadata(meta, filename);
76 }
77
78 // ----------------------------------------------------------------------------------------
79
split_dataset(const command_line_parser & parser)80 int split_dataset (
81 const command_line_parser& parser
82 )
83 {
84 if (parser.number_of_arguments() != 1)
85 {
86 cerr << "The --split option requires you to give one XML file on the command line." << endl;
87 return EXIT_FAILURE;
88 }
89
90 const std::string label = parser.option("split").argument();
91
92 dlib::image_dataset_metadata::dataset data, data_with, data_without;
93 load_image_dataset_metadata(data, parser[0]);
94
95 data_with.name = data.name;
96 data_with.comment = data.comment;
97 data_without.name = data.name;
98 data_without.comment = data.comment;
99
100 for (unsigned long i = 0; i < data.images.size(); ++i)
101 {
102 auto&& temp = data.images[i];
103
104 bool has_the_label = false;
105 // check for the label we are looking for
106 for (unsigned long j = 0; j < temp.boxes.size(); ++j)
107 {
108 if (temp.boxes[j].label == label)
109 {
110 has_the_label = true;
111 break;
112 }
113 }
114
115 if (has_the_label)
116 data_with.images.push_back(temp);
117 else
118 data_without.images.push_back(temp);
119 }
120
121
122 save_image_dataset_metadata(data_with, left_substr(parser[0],".") + "_with_"+label + ".xml");
123 save_image_dataset_metadata(data_without, left_substr(parser[0],".") + "_without_"+label + ".xml");
124
125 return EXIT_SUCCESS;
126 }
127
128 // ----------------------------------------------------------------------------------------
129
make_train_test_splits(const command_line_parser & parser)130 int make_train_test_splits (
131 const command_line_parser& parser
132 )
133 {
134 if (parser.number_of_arguments() != 1)
135 {
136 cerr << "The --split-train-test option requires you to give one XML file on the command line." << endl;
137 return EXIT_FAILURE;
138 }
139
140 const double train_frac = get_option(parser, "split-train-test", 0.5);
141
142 dlib::image_dataset_metadata::dataset data, data_train, data_test;
143 load_image_dataset_metadata(data, parser[0]);
144
145 data_train.name = data.name;
146 data_train.comment = data.comment;
147 data_test.name = data.name;
148 data_test.comment = data.comment;
149
150 const unsigned long num_train_images = static_cast<unsigned long>(std::round(train_frac*data.images.size()));
151
152 for (unsigned long i = 0; i < data.images.size(); ++i)
153 {
154 if (i < num_train_images)
155 data_train.images.push_back(data.images[i]);
156 else
157 data_test.images.push_back(data.images[i]);
158 }
159
160 save_image_dataset_metadata(data_train, left_substr(parser[0],".") + "_train.xml");
161 save_image_dataset_metadata(data_test, left_substr(parser[0],".") + "_test.xml");
162
163 return EXIT_SUCCESS;
164 }
165
166 // ----------------------------------------------------------------------------------------
167
print_all_labels(const dlib::image_dataset_metadata::dataset & data)168 void print_all_labels (
169 const dlib::image_dataset_metadata::dataset& data
170 )
171 {
172 std::set<std::string> labels;
173 for (unsigned long i = 0; i < data.images.size(); ++i)
174 {
175 for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
176 {
177 labels.insert(data.images[i].boxes[j].label);
178 }
179 }
180
181 for (std::set<std::string>::iterator i = labels.begin(); i != labels.end(); ++i)
182 {
183 if (i->size() != 0)
184 {
185 cout << *i << endl;
186 }
187 }
188 }
189
190 // ----------------------------------------------------------------------------------------
191
print_all_label_stats(const dlib::image_dataset_metadata::dataset & data)192 void print_all_label_stats (
193 const dlib::image_dataset_metadata::dataset& data
194 )
195 {
196 std::map<std::string, running_stats<double> > area_stats, aspect_ratio;
197 std::map<std::string, int> image_hits;
198 std::set<std::string> labels;
199 unsigned long num_unignored_boxes = 0;
200 for (unsigned long i = 0; i < data.images.size(); ++i)
201 {
202 std::set<std::string> temp;
203 for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
204 {
205 labels.insert(data.images[i].boxes[j].label);
206 temp.insert(data.images[i].boxes[j].label);
207
208 area_stats[data.images[i].boxes[j].label].add(data.images[i].boxes[j].rect.area());
209 aspect_ratio[data.images[i].boxes[j].label].add(data.images[i].boxes[j].rect.width()/
210 (double)data.images[i].boxes[j].rect.height());
211
212 if (!data.images[i].boxes[j].ignore)
213 ++num_unignored_boxes;
214 }
215
216 // count the number of images for each label
217 for (std::set<std::string>::iterator i = temp.begin(); i != temp.end(); ++i)
218 image_hits[*i] += 1;
219 }
220
221 cout << "Number of images: "<< data.images.size() << endl;
222 cout << "Number of different labels: "<< labels.size() << endl;
223 cout << "Number of non-ignored boxes: " << num_unignored_boxes << endl << endl;
224
225 for (std::set<std::string>::iterator i = labels.begin(); i != labels.end(); ++i)
226 {
227 if (i->size() == 0)
228 cout << "Unlabeled Boxes:" << endl;
229 else
230 cout << "Label: "<< *i << endl;
231 cout << " number of images: " << image_hits[*i] << endl;
232 cout << " number of occurrences: " << area_stats[*i].current_n() << endl;
233 cout << " min box area: " << area_stats[*i].min() << endl;
234 cout << " max box area: " << area_stats[*i].max() << endl;
235 cout << " mean box area: " << area_stats[*i].mean() << endl;
236 cout << " stddev box area: " << area_stats[*i].stddev() << endl;
237 cout << " mean width/height ratio: " << aspect_ratio[*i].mean() << endl;
238 cout << " stddev width/height ratio: " << aspect_ratio[*i].stddev() << endl;
239 cout << endl;
240 }
241 }
242
243 // ----------------------------------------------------------------------------------------
244
rename_labels(dlib::image_dataset_metadata::dataset & data,const std::string & from,const std::string & to)245 void rename_labels (
246 dlib::image_dataset_metadata::dataset& data,
247 const std::string& from,
248 const std::string& to
249 )
250 {
251 for (unsigned long i = 0; i < data.images.size(); ++i)
252 {
253 for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
254 {
255 if (data.images[i].boxes[j].label == from)
256 data.images[i].boxes[j].label = to;
257 }
258 }
259
260 }
261
262 // ----------------------------------------------------------------------------------------
263
ignore_labels(dlib::image_dataset_metadata::dataset & data,const std::string & label)264 void ignore_labels (
265 dlib::image_dataset_metadata::dataset& data,
266 const std::string& label
267 )
268 {
269 for (unsigned long i = 0; i < data.images.size(); ++i)
270 {
271 for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
272 {
273 if (data.images[i].boxes[j].label == label)
274 data.images[i].boxes[j].ignore = true;
275 }
276 }
277 }
278
279 // ----------------------------------------------------------------------------------------
280
merge_metadata_files(const command_line_parser & parser)281 void merge_metadata_files (
282 const command_line_parser& parser
283 )
284 {
285 image_dataset_metadata::dataset src, dest;
286 load_image_dataset_metadata(src, parser.option("add").argument(0));
287 load_image_dataset_metadata(dest, parser.option("add").argument(1));
288
289 std::map<string,image_dataset_metadata::image> merged_data;
290 for (unsigned long i = 0; i < dest.images.size(); ++i)
291 merged_data[dest.images[i].filename] = dest.images[i];
292 // now add in the src data and overwrite anything if there are duplicate entries.
293 for (unsigned long i = 0; i < src.images.size(); ++i)
294 merged_data[src.images[i].filename] = src.images[i];
295
296 // copy merged data into dest
297 dest.images.clear();
298 for (std::map<string,image_dataset_metadata::image>::const_iterator i = merged_data.begin();
299 i != merged_data.end(); ++i)
300 {
301 dest.images.push_back(i->second);
302 }
303
304 save_image_dataset_metadata(dest, "merged.xml");
305 }
306
307 // ----------------------------------------------------------------------------------------
308
rotate_dataset(const command_line_parser & parser)309 void rotate_dataset(const command_line_parser& parser)
310 {
311 image_dataset_metadata::dataset metadata;
312 const string datasource = parser[0];
313 load_image_dataset_metadata(metadata,datasource);
314
315 double angle = get_option(parser, "rotate", 0);
316
317 // Set the current directory to be the one that contains the
318 // metadata file. We do this because the file might contain
319 // file paths which are relative to this folder.
320 set_current_dir(get_parent_directory(file(datasource)));
321
322 const string file_prefix = "rotated_"+ cast_to_string(angle) + "_";
323 const string metadata_filename = get_parent_directory(file(datasource)).full_name() +
324 directory::get_separator() + file_prefix + file(datasource).name();
325
326
327 array2d<rgb_pixel> img, temp;
328 for (unsigned long i = 0; i < metadata.images.size(); ++i)
329 {
330 file f(metadata.images[i].filename);
331 string filename = get_parent_directory(f).full_name() + directory::get_separator() + file_prefix + to_png_name(f.name());
332
333 load_image(img, metadata.images[i].filename);
334 const point_transform_affine tran = rotate_image(img, temp, angle*pi/180);
335 if (parser.option("jpg"))
336 {
337 filename = to_jpg_name(filename);
338 save_jpeg(temp, filename,JPEG_QUALITY);
339 }
340 else
341 {
342 save_png(temp, filename);
343 }
344
345 rectangle_transform rtran = tran;
346 for (unsigned long j = 0; j < metadata.images[i].boxes.size(); ++j)
347 {
348 metadata.images[i].boxes[j].rect = rtran(metadata.images[i].boxes[j].rect);
349
350 for (auto& p : metadata.images[i].boxes[j].parts)
351 p.second = tran(p.second);
352 }
353
354 metadata.images[i].filename = filename;
355 }
356
357 save_image_dataset_metadata(metadata, metadata_filename);
358 }
359
360 // ----------------------------------------------------------------------------------------
361
resample_dataset(const command_line_parser & parser)362 int resample_dataset(const command_line_parser& parser)
363 {
364 if (parser.number_of_arguments() != 1)
365 {
366 cerr << "The --resample option requires you to give one XML file on the command line." << endl;
367 return EXIT_FAILURE;
368 }
369
370 const size_t obj_size = get_option(parser,"cropped-object-size",100*100);
371 const double margin_scale = get_option(parser,"crop-size",2.5); // cropped image will be this times wider than the object.
372 const unsigned long min_object_size = get_option(parser,"min-object-size",1);
373 const bool one_object_per_image = parser.option("one-object-per-image");
374
375 dlib::image_dataset_metadata::dataset data, resampled_data;
376 std::ostringstream sout;
377 sout << "\nThe --resample parameters which generated this dataset were:" << endl;
378 sout << " cropped-object-size: "<< obj_size << endl;
379 sout << " crop-size: "<< margin_scale << endl;
380 sout << " min-object-size: "<< min_object_size << endl;
381 if (one_object_per_image)
382 sout << " one_object_per_image: true" << endl;
383 resampled_data.comment = data.comment + sout.str();
384 resampled_data.name = data.name + " RESAMPLED";
385
386 load_image_dataset_metadata(data, parser[0]);
387 locally_change_current_dir chdir(get_parent_directory(file(parser[0])));
388 dlib::rand rnd;
389
390 const size_t image_size = std::round(std::sqrt(obj_size*margin_scale*margin_scale));
391 const chip_dims cdims(image_size, image_size);
392
393 console_progress_indicator pbar(data.images.size());
394 for (unsigned long i = 0; i < data.images.size(); ++i)
395 {
396 // don't even bother loading images that don't have objects.
397 if (data.images[i].boxes.size() == 0)
398 continue;
399
400 pbar.print_status(i);
401 array2d<rgb_pixel> img, chip;
402 load_image(img, data.images[i].filename);
403
404
405 // figure out what chips we want to take from this image
406 for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
407 {
408 const rectangle rect = data.images[i].boxes[j].rect;
409 if (data.images[i].boxes[j].ignore || rect.area() < min_object_size)
410 continue;
411
412 const auto max_dim = std::max(rect.width(), rect.height());
413
414 const double rand_scale_perturb = 1 - 0.3*(rnd.get_random_double()-0.5);
415 const rectangle crop_rect = centered_rect(rect, max_dim*margin_scale*rand_scale_perturb, max_dim*margin_scale*rand_scale_perturb);
416
417 const rectangle_transform tform = get_mapping_to_chip(chip_details(crop_rect, cdims));
418 extract_image_chip(img, chip_details(crop_rect, cdims), chip);
419
420 image_dataset_metadata::image dimg;
421 // Now transform the boxes to the crop and also mark them as ignored if they
422 // have already been cropped out or are outside the crop.
423 for (size_t k = 0; k < data.images[i].boxes.size(); ++k)
424 {
425 image_dataset_metadata::box box = data.images[i].boxes[k];
426 // ignore boxes outside the cropped image
427 if (crop_rect.intersect(box.rect).area() == 0)
428 continue;
429
430 // mark boxes we include in the crop as ignored. Also mark boxes that
431 // aren't totally within the crop as ignored.
432 if (crop_rect.contains(grow_rect(box.rect,10)) && (!one_object_per_image || k==j))
433 data.images[i].boxes[k].ignore = true;
434 else
435 box.ignore = true;
436
437 if (box.rect.area() < min_object_size)
438 box.ignore = true;
439
440 box.rect = tform(box.rect);
441 for (auto&& p : box.parts)
442 p.second = tform.get_tform()(p.second);
443 dimg.boxes.push_back(box);
444 }
445 // Put a 64bit hash of the image data into the name to make sure there are no
446 // file name conflicts.
447 std::ostringstream sout;
448 sout << hex << murmur_hash3_128bit(&chip[0][0], chip.size()*sizeof(chip[0][0])).second;
449 dimg.filename = data.images[i].filename + "_RESAMPLED_"+sout.str()+".png";
450
451 if (parser.option("jpg"))
452 {
453 dimg.filename = to_jpg_name(dimg.filename);
454 save_jpeg(chip,dimg.filename, JPEG_QUALITY);
455 }
456 else
457 {
458 save_png(chip,dimg.filename);
459 }
460 resampled_data.images.push_back(dimg);
461 }
462 }
463
464 save_image_dataset_metadata(resampled_data, parser[0] + ".RESAMPLED.xml");
465
466 return EXIT_SUCCESS;
467 }
468
469 // ----------------------------------------------------------------------------------------
470
tile_dataset(const command_line_parser & parser)471 int tile_dataset(const command_line_parser& parser)
472 {
473 if (parser.number_of_arguments() != 1)
474 {
475 cerr << "The --tile option requires you to give one XML file on the command line." << endl;
476 return EXIT_FAILURE;
477 }
478
479 string out_image = parser.option("tile").argument();
480 string ext = right_substr(out_image,".");
481 if (ext != "png" && ext != "jpg")
482 {
483 cerr << "The output image file must have either .png or .jpg extension." << endl;
484 return EXIT_FAILURE;
485 }
486
487 const unsigned long chip_size = get_option(parser, "size", 8000);
488
489 dlib::image_dataset_metadata::dataset data;
490 load_image_dataset_metadata(data, parser[0]);
491 locally_change_current_dir chdir(get_parent_directory(file(parser[0])));
492 dlib::array<array2d<rgb_pixel> > images;
493 console_progress_indicator pbar(data.images.size());
494 for (unsigned long i = 0; i < data.images.size(); ++i)
495 {
496 // don't even bother loading images that don't have objects.
497 if (data.images[i].boxes.size() == 0)
498 continue;
499
500 pbar.print_status(i);
501 array2d<rgb_pixel> img;
502 load_image(img, data.images[i].filename);
503
504 // figure out what chips we want to take from this image
505 std::vector<chip_details> dets;
506 for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
507 {
508 if (data.images[i].boxes[j].ignore)
509 continue;
510
511 rectangle rect = data.images[i].boxes[j].rect;
512 dets.push_back(chip_details(rect, chip_size));
513 }
514 // Now grab all those chips at once.
515 dlib::array<array2d<rgb_pixel> > chips;
516 extract_image_chips(img, dets, chips);
517 // and put the chips into the output.
518 for (unsigned long j = 0; j < chips.size(); ++j)
519 images.push_back(chips[j]);
520 }
521
522 chdir.revert();
523
524 if (ext == "png")
525 save_png(tile_images(images), out_image);
526 else
527 save_jpeg(tile_images(images), out_image);
528
529 return EXIT_SUCCESS;
530 }
531
532
533 // ----------------------------------------------------------------------------------------
534
main(int argc,char ** argv)535 int main(int argc, char** argv)
536 {
537 try
538 {
539
540 command_line_parser parser;
541
542 parser.add_option("h","Displays this information.");
543 parser.add_option("v","Display version.");
544
545 parser.set_group_name("Creating XML files");
546 parser.add_option("c","Create an XML file named <arg> listing a set of images.",1);
547 parser.add_option("r","Search directories recursively for images.");
548 parser.add_option("convert","Convert foreign image Annotations from <arg> format to the imglab format. "
549 "Supported formats: pascal-xml, pascal-v1, idl.",1);
550
551 parser.set_group_name("Viewing XML files");
552 parser.add_option("tile","Chip out all the objects and save them as one big image called <arg>.",1);
553 parser.add_option("size","When using --tile or --cluster, make each extracted object contain "
554 "about <arg> pixels (default 8000).",1);
555 parser.add_option("l","List all the labels in the given XML file.");
556 parser.add_option("stats","List detailed statistics on the object labels in the given XML file.");
557 parser.add_option("files","List all the files in the given XML file.");
558
559 parser.set_group_name("Editing/Transforming XML datasets");
560 parser.add_option("rename", "Rename all labels of <arg1> to <arg2>.",2);
561 parser.add_option("parts","The display will allow image parts to be labeled. The set of allowable parts "
562 "is defined by <arg> which should be a space separated list of parts.",1);
563 parser.add_option("rmempty","Remove all images that don't contain non-ignored annotations and save the results to a new XML file.");
564 parser.add_option("rmdupes","Remove duplicate images from the dataset. This is done by comparing "
565 "the md5 hash of each image file and removing duplicate images. " );
566 parser.add_option("rmdiff","Set the ignored flag to true for boxes marked as difficult.");
567 parser.add_option("rmtrunc","Set the ignored flag to true for boxes that are partially outside the image.");
568 parser.add_option("box-images","Add a box to each image that contains the entire image.");
569 parser.add_option("sort-num-objects","Sort the images listed an XML file so images with many objects are listed first.");
570 parser.add_option("sort","Alphabetically sort the images in an XML file.");
571 parser.add_option("shuffle","Randomly shuffle the order of the images listed in an XML file.");
572 parser.add_option("seed", "When using --shuffle, set the random seed to the string <arg>.",1);
573 parser.add_option("split", "Split the contents of an XML file into two separate files. One containing the "
574 "images with objects labeled <arg> and another file with all the other images. ",1);
575 parser.add_option("split-train-test", "Split the contents of an XML file into two separate files. A training "
576 "file containing <arg> fraction of the images and a testing file containing the remaining (1-<arg>) images. "
577 "The partitioning is done deterministically by putting the first images in the input xml file into the training split "
578 "and the later images into the test split.",1);
579 parser.add_option("add", "Add the image metadata from <arg1> into <arg2>. If any of the image "
580 "tags are in both files then the ones in <arg2> are deleted and replaced with the "
581 "image tags from <arg1>. The results are saved into merged.xml and neither <arg1> or "
582 "<arg2> files are modified.",2);
583 parser.add_option("flip", "Read an XML image dataset from the <arg> XML file and output a left-right flipped "
584 "version of the dataset and an accompanying flipped XML file named flipped_<arg>. "
585 "We also adjust object part labels after flipping so that the new flipped dataset "
586 "has the same average part layout as the source dataset." ,1);
587 parser.add_option("flip-basic", "This option is just like --flip, except we don't adjust any object part labels after flipping. "
588 "The parts are instead simply mirrored to the flipped dataset.", 1);
589 parser.add_option("rotate", "Read an XML image dataset and output a copy that is rotated counter clockwise by <arg> degrees. "
590 "The output is saved to an XML file prefixed with rotated_<arg>.",1);
591 parser.add_option("cluster", "Cluster all the objects in an XML file into <arg> different clusters (pass 0 to find automatically) and save "
592 "the results as cluster_###.xml and cluster_###.jpg files.",1);
593 parser.add_option("ignore", "Mark boxes labeled as <arg> as ignored. The resulting XML file is output as a separate file and the original is not modified.",1);
594 parser.add_option("rmlabel","Remove all boxes labeled <arg> and save the results to a new XML file.",1);
595 parser.add_option("rm-other-labels","Remove all boxes not labeled <arg> and save the results to a new XML file.",1);
596 parser.add_option("rmignore","Remove all boxes marked ignore and save the results to a new XML file.");
597 parser.add_option("rm-if-overlaps","Remove all boxes labeled <arg> if they overlap any box not labeled <arg> and save the results to a new XML file.",1);
598 parser.add_option("jpg", "When saving images to disk, write them as jpg files instead of png.");
599
600 parser.set_group_name("Cropping sub images");
601 parser.add_option("resample", "Crop out images that are centered on each object in the dataset. "
602 "The output is a new XML dataset.");
603 parser.add_option("cropped-object-size", "When doing --resample, make the cropped objects contain about <arg> pixels (default 10000).",1);
604 parser.add_option("min-object-size", "When doing --resample, skip objects that have fewer than <arg> pixels in them (default 1).",1);
605 parser.add_option("crop-size", "When doing --resample, the entire cropped image will be <arg> times wider than the object (default 2.5).",1);
606 parser.add_option("one-object-per-image", "When doing --resample, only include one non-ignored object per image (i.e. the central object).");
607
608
609
610 parser.parse(argc, argv);
611
612 const char* singles[] = {"h","c","r","l","files","convert","parts","rmdiff", "rmtrunc", "rmdupes", "seed", "shuffle", "split", "add",
613 "flip-basic", "flip", "rotate", "tile", "size", "cluster", "resample", "min-object-size", "rmempty",
614 "crop-size", "cropped-object-size", "rmlabel", "rm-other-labels", "rm-if-overlaps", "sort-num-objects",
615 "one-object-per-image", "jpg", "rmignore", "sort", "split-train-test", "box-images"};
616 parser.check_one_time_options(singles);
617 const char* c_sub_ops[] = {"r", "convert"};
618 parser.check_sub_options("c", c_sub_ops);
619 parser.check_sub_option("shuffle", "seed");
620 const char* resample_sub_ops[] = {"min-object-size", "crop-size", "cropped-object-size", "one-object-per-image"};
621 parser.check_sub_options("resample", resample_sub_ops);
622 const char* size_parent_ops[] = {"tile", "cluster"};
623 parser.check_sub_options(size_parent_ops, "size");
624 parser.check_incompatible_options("c", "l");
625 parser.check_incompatible_options("c", "files");
626 parser.check_incompatible_options("c", "rmdiff");
627 parser.check_incompatible_options("c", "rmempty");
628 parser.check_incompatible_options("c", "rmlabel");
629 parser.check_incompatible_options("c", "rm-other-labels");
630 parser.check_incompatible_options("c", "rmignore");
631 parser.check_incompatible_options("c", "rm-if-overlaps");
632 parser.check_incompatible_options("c", "rmdupes");
633 parser.check_incompatible_options("c", "rmtrunc");
634 parser.check_incompatible_options("c", "box-images");
635 parser.check_incompatible_options("c", "add");
636 parser.check_incompatible_options("c", "flip");
637 parser.check_incompatible_options("c", "flip-basic");
638 parser.check_incompatible_options("flip", "flip-basic");
639 parser.check_incompatible_options("c", "rotate");
640 parser.check_incompatible_options("c", "rename");
641 parser.check_incompatible_options("c", "ignore");
642 parser.check_incompatible_options("c", "parts");
643 parser.check_incompatible_options("c", "tile");
644 parser.check_incompatible_options("c", "cluster");
645 parser.check_incompatible_options("c", "resample");
646 parser.check_incompatible_options("l", "rename");
647 parser.check_incompatible_options("l", "ignore");
648 parser.check_incompatible_options("l", "add");
649 parser.check_incompatible_options("l", "parts");
650 parser.check_incompatible_options("l", "flip");
651 parser.check_incompatible_options("l", "flip-basic");
652 parser.check_incompatible_options("l", "rotate");
653 parser.check_incompatible_options("files", "rename");
654 parser.check_incompatible_options("files", "ignore");
655 parser.check_incompatible_options("files", "add");
656 parser.check_incompatible_options("files", "parts");
657 parser.check_incompatible_options("files", "flip");
658 parser.check_incompatible_options("files", "flip-basic");
659 parser.check_incompatible_options("files", "rotate");
660 parser.check_incompatible_options("add", "flip");
661 parser.check_incompatible_options("add", "flip-basic");
662 parser.check_incompatible_options("add", "rotate");
663 parser.check_incompatible_options("add", "tile");
664 parser.check_incompatible_options("flip", "tile");
665 parser.check_incompatible_options("flip-basic", "tile");
666 parser.check_incompatible_options("rotate", "tile");
667 parser.check_incompatible_options("cluster", "tile");
668 parser.check_incompatible_options("resample", "tile");
669 parser.check_incompatible_options("flip", "cluster");
670 parser.check_incompatible_options("flip-basic", "cluster");
671 parser.check_incompatible_options("rotate", "cluster");
672 parser.check_incompatible_options("add", "cluster");
673 parser.check_incompatible_options("flip", "resample");
674 parser.check_incompatible_options("flip-basic", "resample");
675 parser.check_incompatible_options("rotate", "resample");
676 parser.check_incompatible_options("add", "resample");
677 parser.check_incompatible_options("shuffle", "tile");
678 parser.check_incompatible_options("sort-num-objects", "tile");
679 parser.check_incompatible_options("sort", "tile");
680 parser.check_incompatible_options("convert", "l");
681 parser.check_incompatible_options("convert", "files");
682 parser.check_incompatible_options("convert", "rename");
683 parser.check_incompatible_options("convert", "ignore");
684 parser.check_incompatible_options("convert", "parts");
685 parser.check_incompatible_options("convert", "cluster");
686 parser.check_incompatible_options("convert", "resample");
687 parser.check_incompatible_options("rmdiff", "rename");
688 parser.check_incompatible_options("rmdiff", "ignore");
689 parser.check_incompatible_options("rmempty", "ignore");
690 parser.check_incompatible_options("rmempty", "rename");
691 parser.check_incompatible_options("rmlabel", "ignore");
692 parser.check_incompatible_options("rmlabel", "rename");
693 parser.check_incompatible_options("rm-other-labels", "ignore");
694 parser.check_incompatible_options("rm-other-labels", "rename");
695 parser.check_incompatible_options("rmignore", "ignore");
696 parser.check_incompatible_options("rmignore", "rename");
697 parser.check_incompatible_options("rm-if-overlaps", "ignore");
698 parser.check_incompatible_options("rm-if-overlaps", "rename");
699 parser.check_incompatible_options("rmdupes", "rename");
700 parser.check_incompatible_options("rmdupes", "ignore");
701 parser.check_incompatible_options("rmtrunc", "rename");
702 parser.check_incompatible_options("rmtrunc", "ignore");
703 parser.check_incompatible_options("box-images", "rename");
704 parser.check_incompatible_options("box-images", "ignore");
705 const char* convert_args[] = {"pascal-xml","pascal-v1","idl"};
706 parser.check_option_arg_range("convert", convert_args);
707 parser.check_option_arg_range("cluster", 0, 999);
708 parser.check_option_arg_range("rotate", -360, 360);
709 parser.check_option_arg_range("size", 10*10, 1000*1000);
710 parser.check_option_arg_range("min-object-size", 1, 10000*10000);
711 parser.check_option_arg_range("cropped-object-size", 4, 10000*10000);
712 parser.check_option_arg_range("crop-size", 1.0, 100.0);
713 parser.check_option_arg_range("split-train-test", 0.0, 1.0);
714
715 if (parser.option("h"))
716 {
717 cout << "Usage: imglab [options] <image files/directories or XML file>\n";
718 parser.print_options(cout);
719 cout << endl << endl;
720 return EXIT_SUCCESS;
721 }
722
723 if (parser.option("add"))
724 {
725 merge_metadata_files(parser);
726 return EXIT_SUCCESS;
727 }
728
729 if (parser.option("flip") || parser.option("flip-basic"))
730 {
731 flip_dataset(parser);
732 return EXIT_SUCCESS;
733 }
734
735 if (parser.option("rotate"))
736 {
737 rotate_dataset(parser);
738 return EXIT_SUCCESS;
739 }
740
741 if (parser.option("v"))
742 {
743 cout << "imglab v" << VERSION
744 << "\nCompiled: " << __TIME__ << " " << __DATE__
745 << "\nWritten by Davis King\n";
746 cout << "Check for updates at http://dlib.net\n\n";
747 return EXIT_SUCCESS;
748 }
749
750 if (parser.option("tile"))
751 {
752 return tile_dataset(parser);
753 }
754
755 if (parser.option("cluster"))
756 {
757 return cluster_dataset(parser);
758 }
759
760 if (parser.option("resample"))
761 {
762 return resample_dataset(parser);
763 }
764
765 if (parser.option("c"))
766 {
767 if (parser.option("convert"))
768 {
769 if (parser.option("convert").argument() == "pascal-xml")
770 convert_pascal_xml(parser);
771 else if (parser.option("convert").argument() == "pascal-v1")
772 convert_pascal_v1(parser);
773 else if (parser.option("convert").argument() == "idl")
774 convert_idl(parser);
775 }
776 else
777 {
778 create_new_dataset(parser);
779 }
780 return EXIT_SUCCESS;
781 }
782
783 if (parser.option("rmdiff"))
784 {
785 if (parser.number_of_arguments() != 1)
786 {
787 cerr << "The --rmdiff option requires you to give one XML file on the command line." << endl;
788 return EXIT_FAILURE;
789 }
790
791 dlib::image_dataset_metadata::dataset data;
792 load_image_dataset_metadata(data, parser[0]);
793 for (unsigned long i = 0; i < data.images.size(); ++i)
794 {
795 for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
796 {
797 if (data.images[i].boxes[j].difficult)
798 data.images[i].boxes[j].ignore = true;
799 }
800 }
801 save_image_dataset_metadata(data, parser[0]);
802 return EXIT_SUCCESS;
803 }
804
805 if (parser.option("rmempty"))
806 {
807 if (parser.number_of_arguments() != 1)
808 {
809 cerr << "The --rmempty option requires you to give one XML file on the command line." << endl;
810 return EXIT_FAILURE;
811 }
812
813 dlib::image_dataset_metadata::dataset data, data2;
814 load_image_dataset_metadata(data, parser[0]);
815
816 data2 = data;
817 data2.images.clear();
818 for (unsigned long i = 0; i < data.images.size(); ++i)
819 {
820 bool has_label = false;
821 for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
822 {
823 if (!data.images[i].boxes[j].ignore)
824 has_label = true;
825 }
826 if (has_label)
827 data2.images.push_back(data.images[i]);
828 }
829 save_image_dataset_metadata(data2, parser[0] + ".rmempty.xml");
830 return EXIT_SUCCESS;
831 }
832
833 if (parser.option("rmlabel"))
834 {
835 if (parser.number_of_arguments() != 1)
836 {
837 cerr << "The --rmlabel option requires you to give one XML file on the command line." << endl;
838 return EXIT_FAILURE;
839 }
840
841 dlib::image_dataset_metadata::dataset data;
842 load_image_dataset_metadata(data, parser[0]);
843
844 const auto label = parser.option("rmlabel").argument();
845
846 for (auto&& img : data.images)
847 {
848 std::vector<dlib::image_dataset_metadata::box> boxes;
849 for (auto&& b : img.boxes)
850 {
851 if (b.label != label)
852 boxes.push_back(b);
853 }
854 img.boxes = boxes;
855 }
856
857 save_image_dataset_metadata(data, parser[0] + ".rmlabel-"+label+".xml");
858 return EXIT_SUCCESS;
859 }
860
861 if (parser.option("rm-other-labels"))
862 {
863 if (parser.number_of_arguments() != 1)
864 {
865 cerr << "The --rm-other-labels option requires you to give one XML file on the command line." << endl;
866 return EXIT_FAILURE;
867 }
868
869 dlib::image_dataset_metadata::dataset data;
870 load_image_dataset_metadata(data, parser[0]);
871
872 const auto labels = parser.option("rm-other-labels").argument();
873 // replace comma by dash to form the file name
874 std::string strlabels = labels;
875 std::replace(strlabels.begin(), strlabels.end(), ',', '-');
876 std::vector<string> all_labels = split(labels, ",");
877 for (auto&& img : data.images)
878 {
879 std::vector<dlib::image_dataset_metadata::box> boxes;
880 for (auto&& b : img.boxes)
881 {
882 if (std::find(all_labels.begin(), all_labels.end(), b.label) != all_labels.end())
883 boxes.push_back(b);
884 }
885 img.boxes = boxes;
886 }
887
888 save_image_dataset_metadata(data, parser[0] + ".rm-other-labels-"+ strlabels +".xml");
889 return EXIT_SUCCESS;
890 }
891
892 if (parser.option("rmignore"))
893 {
894 if (parser.number_of_arguments() != 1)
895 {
896 cerr << "The --rmignore option requires you to give one XML file on the command line." << endl;
897 return EXIT_FAILURE;
898 }
899
900 dlib::image_dataset_metadata::dataset data;
901 load_image_dataset_metadata(data, parser[0]);
902
903 for (auto&& img : data.images)
904 {
905 std::vector<dlib::image_dataset_metadata::box> boxes;
906 for (auto&& b : img.boxes)
907 {
908 if (!b.ignore)
909 boxes.push_back(b);
910 }
911 img.boxes = boxes;
912 }
913
914 save_image_dataset_metadata(data, parser[0] + ".rmignore.xml");
915 return EXIT_SUCCESS;
916 }
917
918 if (parser.option("rm-if-overlaps"))
919 {
920 if (parser.number_of_arguments() != 1)
921 {
922 cerr << "The --rm-if-overlaps option requires you to give one XML file on the command line." << endl;
923 return EXIT_FAILURE;
924 }
925
926 dlib::image_dataset_metadata::dataset data;
927 load_image_dataset_metadata(data, parser[0]);
928
929 const auto label = parser.option("rm-if-overlaps").argument();
930
931 test_box_overlap overlaps(0.5);
932
933 for (auto&& img : data.images)
934 {
935 std::vector<dlib::image_dataset_metadata::box> boxes;
936 for (auto&& b : img.boxes)
937 {
938 if (b.label != label)
939 {
940 boxes.push_back(b);
941 }
942 else
943 {
944 bool has_overlap = false;
945 for (auto&& b2 : img.boxes)
946 {
947 if (b2.label != label && overlaps(b2.rect, b.rect))
948 {
949 has_overlap = true;
950 break;
951 }
952 }
953 if (!has_overlap)
954 boxes.push_back(b);
955 }
956 }
957 img.boxes = boxes;
958 }
959
960 save_image_dataset_metadata(data, parser[0] + ".rm-if-overlaps-"+label+".xml");
961 return EXIT_SUCCESS;
962 }
963
964 if (parser.option("rmdupes"))
965 {
966 if (parser.number_of_arguments() != 1)
967 {
968 cerr << "The --rmdupes option requires you to give one XML file on the command line." << endl;
969 return EXIT_FAILURE;
970 }
971
972 dlib::image_dataset_metadata::dataset data, data_out;
973 std::set<std::string> hashes;
974 load_image_dataset_metadata(data, parser[0]);
975 data_out = data;
976 data_out.images.clear();
977
978 for (unsigned long i = 0; i < data.images.size(); ++i)
979 {
980 ifstream fin(data.images[i].filename.c_str(), ios::binary);
981 string hash = md5(fin);
982 if (hashes.count(hash) == 0)
983 {
984 hashes.insert(hash);
985 data_out.images.push_back(data.images[i]);
986 }
987 }
988 save_image_dataset_metadata(data_out, parser[0]);
989 return EXIT_SUCCESS;
990 }
991
992 if (parser.option("box-images"))
993 {
994 if (parser.number_of_arguments() != 1)
995 {
996 cerr << "The --box-images option requires you to give one XML file on the command line." << endl;
997 return EXIT_FAILURE;
998 }
999
1000 dlib::image_dataset_metadata::dataset data;
1001 load_image_dataset_metadata(data, parser[0]);
1002 {
1003 locally_change_current_dir chdir(get_parent_directory(file(parser[0])));
1004 parallel_for(0, data.images.size(), [&](long i)
1005 {
1006 array2d<unsigned char> img;
1007 load_image(img, data.images[i].filename);
1008 data.images[i].boxes.emplace_back(get_rect(img));
1009 });
1010 }
1011 save_image_dataset_metadata(data, parser[0]+".boxed.xml");
1012 return EXIT_SUCCESS;
1013 }
1014
1015 if (parser.option("rmtrunc"))
1016 {
1017 if (parser.number_of_arguments() != 1)
1018 {
1019 cerr << "The --rmtrunc option requires you to give one XML file on the command line." << endl;
1020 return EXIT_FAILURE;
1021 }
1022
1023 dlib::image_dataset_metadata::dataset data;
1024 load_image_dataset_metadata(data, parser[0]);
1025 {
1026 locally_change_current_dir chdir(get_parent_directory(file(parser[0])));
1027 for (unsigned long i = 0; i < data.images.size(); ++i)
1028 {
1029 array2d<unsigned char> img;
1030 load_image(img, data.images[i].filename);
1031 const rectangle area = get_rect(img);
1032 for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
1033 {
1034 if (!area.contains(data.images[i].boxes[j].rect))
1035 data.images[i].boxes[j].ignore = true;
1036 }
1037 }
1038 }
1039 save_image_dataset_metadata(data, parser[0]);
1040 return EXIT_SUCCESS;
1041 }
1042
1043 if (parser.option("l"))
1044 {
1045 if (parser.number_of_arguments() != 1)
1046 {
1047 cerr << "The -l option requires you to give one XML file on the command line." << endl;
1048 return EXIT_FAILURE;
1049 }
1050
1051 dlib::image_dataset_metadata::dataset data;
1052 load_image_dataset_metadata(data, parser[0]);
1053 print_all_labels(data);
1054 return EXIT_SUCCESS;
1055 }
1056
1057 if (parser.option("files"))
1058 {
1059 if (parser.number_of_arguments() != 1)
1060 {
1061 cerr << "The --files option requires you to give one XML file on the command line." << endl;
1062 return EXIT_FAILURE;
1063 }
1064
1065 dlib::image_dataset_metadata::dataset data;
1066 load_image_dataset_metadata(data, parser[0]);
1067 for (size_t i = 0; i < data.images.size(); ++i)
1068 cout << data.images[i].filename << "\n";
1069 return EXIT_SUCCESS;
1070 }
1071
1072 if (parser.option("split"))
1073 {
1074 return split_dataset(parser);
1075 }
1076
1077 if (parser.option("split-train-test"))
1078 {
1079 return make_train_test_splits(parser);
1080 }
1081
1082 if (parser.option("shuffle"))
1083 {
1084 if (parser.number_of_arguments() != 1)
1085 {
1086 cerr << "The --shuffle option requires you to give one XML file on the command line." << endl;
1087 return EXIT_FAILURE;
1088 }
1089
1090 dlib::image_dataset_metadata::dataset data;
1091 load_image_dataset_metadata(data, parser[0]);
1092 const string default_seed = cast_to_string(time(0));
1093 const string seed = get_option(parser, "seed", default_seed);
1094 dlib::rand rnd(seed);
1095 randomize_samples(data.images, rnd);
1096 save_image_dataset_metadata(data, parser[0]);
1097 return EXIT_SUCCESS;
1098 }
1099
1100 if (parser.option("sort-num-objects"))
1101 {
1102 if (parser.number_of_arguments() != 1)
1103 {
1104 cerr << "The --sort-num-objects option requires you to give one XML file on the command line." << endl;
1105 return EXIT_FAILURE;
1106 }
1107
1108 dlib::image_dataset_metadata::dataset data;
1109 load_image_dataset_metadata(data, parser[0]);
1110 std::sort(data.images.rbegin(), data.images.rend(),
1111 [](const image_dataset_metadata::image& a, const image_dataset_metadata::image& b) { return a.boxes.size() < b.boxes.size(); });
1112 save_image_dataset_metadata(data, parser[0]);
1113 return EXIT_SUCCESS;
1114 }
1115
1116 if (parser.option("sort"))
1117 {
1118 if (parser.number_of_arguments() != 1)
1119 {
1120 cerr << "The --sort option requires you to give one XML file on the command line." << endl;
1121 return EXIT_FAILURE;
1122 }
1123
1124 dlib::image_dataset_metadata::dataset data;
1125 load_image_dataset_metadata(data, parser[0]);
1126 std::sort(data.images.begin(), data.images.end(),
1127 [](const image_dataset_metadata::image& a, const image_dataset_metadata::image& b) { return a.filename < b.filename; });
1128 save_image_dataset_metadata(data, parser[0]);
1129 return EXIT_SUCCESS;
1130 }
1131
1132 if (parser.option("stats"))
1133 {
1134 if (parser.number_of_arguments() != 1)
1135 {
1136 cerr << "The --stats option requires you to give one XML file on the command line." << endl;
1137 return EXIT_FAILURE;
1138 }
1139
1140 dlib::image_dataset_metadata::dataset data;
1141 load_image_dataset_metadata(data, parser[0]);
1142 print_all_label_stats(data);
1143 return EXIT_SUCCESS;
1144 }
1145
1146 if (parser.option("rename"))
1147 {
1148 if (parser.number_of_arguments() != 1)
1149 {
1150 cerr << "The --rename option requires you to give one XML file on the command line." << endl;
1151 return EXIT_FAILURE;
1152 }
1153
1154 dlib::image_dataset_metadata::dataset data;
1155 load_image_dataset_metadata(data, parser[0]);
1156 for (unsigned long i = 0; i < parser.option("rename").count(); ++i)
1157 {
1158 rename_labels(data, parser.option("rename").argument(0,i), parser.option("rename").argument(1,i));
1159 }
1160 save_image_dataset_metadata(data, parser[0]);
1161 return EXIT_SUCCESS;
1162 }
1163
1164 if (parser.option("ignore"))
1165 {
1166 if (parser.number_of_arguments() != 1)
1167 {
1168 cerr << "The --ignore option requires you to give one XML file on the command line." << endl;
1169 return EXIT_FAILURE;
1170 }
1171
1172 dlib::image_dataset_metadata::dataset data;
1173 load_image_dataset_metadata(data, parser[0]);
1174 for (unsigned long i = 0; i < parser.option("ignore").count(); ++i)
1175 {
1176 ignore_labels(data, parser.option("ignore").argument());
1177 }
1178 save_image_dataset_metadata(data, parser[0]+".ignored.xml");
1179 return EXIT_SUCCESS;
1180 }
1181
1182 if (parser.number_of_arguments() == 1)
1183 {
1184 metadata_editor editor(parser[0]);
1185 if (parser.option("parts"))
1186 {
1187 std::vector<string> parts = split(parser.option("parts").argument());
1188 for (unsigned long i = 0; i < parts.size(); ++i)
1189 {
1190 editor.add_labelable_part_name(parts[i]);
1191 }
1192 }
1193 editor.wait_until_closed();
1194 return EXIT_SUCCESS;
1195 }
1196
1197 cout << "Invalid command, give -h to see options." << endl;
1198 return EXIT_FAILURE;
1199 }
1200 catch (exception& e)
1201 {
1202 cerr << e.what() << endl;
1203 return EXIT_FAILURE;
1204 }
1205 }
1206
1207 // ----------------------------------------------------------------------------------------
1208
1209