1#!/usr/bin/env python
2
3from __future__ import print_function
4
5desc = '''Generate the difference of two YAML files into a new YAML file (works on
6pair of directories too).  A new attribute 'Added' is set to True or False
7depending whether the entry is added or removed from the first input to the
8next.
9
10The tools requires PyYAML.'''
11
12import yaml
13# Try to use the C parser.
14try:
15    from yaml import CLoader as Loader
16except ImportError:
17    from yaml import Loader
18
19import optrecord
20import argparse
21from collections import defaultdict
22
23if __name__ == '__main__':
24    parser = argparse.ArgumentParser(description=desc)
25    parser.add_argument(
26        'yaml_dir_or_file_1',
27        help='An optimization record file or a directory searched for optimization '
28             'record files that are used as the old version for the comparison')
29    parser.add_argument(
30        'yaml_dir_or_file_2',
31        help='An optimization record file or a directory searched for optimization '
32             'record files that are used as the new version for the comparison')
33    parser.add_argument(
34        '--jobs',
35        '-j',
36        default=None,
37        type=int,
38        help='Max job count (defaults to %(default)s, the current CPU count)')
39    parser.add_argument(
40        '--max-size',
41        '-m',
42        default=100000,
43        type=int,
44        help='Maximum number of remarks stored in an output file')
45    parser.add_argument(
46        '--no-progress-indicator',
47        '-n',
48        action='store_true',
49        default=False,
50        help='Do not display any indicator of how many YAML files were read.')
51    parser.add_argument('--output', '-o', default='diff{}.opt.yaml')
52    args = parser.parse_args()
53
54    files1 = optrecord.find_opt_files(args.yaml_dir_or_file_1)
55    files2 = optrecord.find_opt_files(args.yaml_dir_or_file_2)
56
57    print_progress = not args.no_progress_indicator
58    all_remarks1, _, _ = optrecord.gather_results(files1, args.jobs, print_progress)
59    all_remarks2, _, _ = optrecord.gather_results(files2, args.jobs, print_progress)
60
61    added = set(all_remarks2.values()) - set(all_remarks1.values())
62    removed = set(all_remarks1.values()) - set(all_remarks2.values())
63
64    for r in added:
65        r.Added = True
66    for r in removed:
67        r.Added = False
68
69    result = list(added | removed)
70    for r in result:
71        r.recover_yaml_structure()
72
73    for i in range(0, len(result), args.max_size):
74        with open(args.output.format(i / args.max_size), 'w') as stream:
75            yaml.dump_all(result[i:i + args.max_size], stream)
76