1#!/usr/bin/env python
2"""Sort a simple YAML file, keeping blocks of comments and definitions
3together.
4
5We assume a strict subset of YAML that looks like:
6
7    # block of header comments
8    # here that should always
9    # be at the top of the file
10
11    # optional comments
12    # can go here
13    key: value
14    key: value
15
16    key: value
17
18In other words, we don't sort deeper than the top layer, and might corrupt
19complicated YAML files.
20"""
21import argparse
22from typing import List
23from typing import Optional
24from typing import Sequence
25
26
27QUOTES = ["'", '"']
28
29
30def sort(lines: List[str]) -> List[str]:
31    """Sort a YAML file in alphabetical order, keeping blocks together.
32
33    :param lines: array of strings (without newlines)
34    :return: sorted array of strings
35    """
36    # make a copy of lines since we will clobber it
37    lines = list(lines)
38    new_lines = parse_block(lines, header=True)
39
40    for block in sorted(parse_blocks(lines), key=first_key):
41        if new_lines:
42            new_lines.append('')
43        new_lines.extend(block)
44
45    return new_lines
46
47
48def parse_block(lines: List[str], header: bool = False) -> List[str]:
49    """Parse and return a single block, popping off the start of `lines`.
50
51    If parsing a header block, we stop after we reach a line that is not a
52    comment. Otherwise, we stop after reaching an empty line.
53
54    :param lines: list of lines
55    :param header: whether we are parsing a header block
56    :return: list of lines that form the single block
57    """
58    block_lines = []
59    while lines and lines[0] and (not header or lines[0].startswith('#')):
60        block_lines.append(lines.pop(0))
61    return block_lines
62
63
64def parse_blocks(lines: List[str]) -> List[List[str]]:
65    """Parse and return all possible blocks, popping off the start of `lines`.
66
67    :param lines: list of lines
68    :return: list of blocks, where each block is a list of lines
69    """
70    blocks = []
71
72    while lines:
73        if lines[0] == '':
74            lines.pop(0)
75        else:
76            blocks.append(parse_block(lines))
77
78    return blocks
79
80
81def first_key(lines: List[str]) -> str:
82    """Returns a string representing the sort key of a block.
83
84    The sort key is the first YAML key we encounter, ignoring comments, and
85    stripping leading quotes.
86
87    >>> print(test)
88    # some comment
89    'foo': true
90    >>> first_key(test)
91    'foo'
92    """
93    for line in lines:
94        if line.startswith('#'):
95            continue
96        if any(line.startswith(quote) for quote in QUOTES):
97            return line[1:]
98        return line
99    else:
100        return ''  # not actually reached in reality
101
102
103def main(argv: Optional[Sequence[str]] = None) -> int:
104    parser = argparse.ArgumentParser()
105    parser.add_argument('filenames', nargs='*', help='Filenames to fix')
106    args = parser.parse_args(argv)
107
108    retval = 0
109
110    for filename in args.filenames:
111        with open(filename, 'r+') as f:
112            lines = [line.rstrip() for line in f.readlines()]
113            new_lines = sort(lines)
114
115            if lines != new_lines:
116                print(f'Fixing file `{filename}`')
117                f.seek(0)
118                f.write('\n'.join(new_lines) + '\n')
119                f.truncate()
120                retval = 1
121
122    return retval
123
124
125if __name__ == '__main__':
126    exit(main())
127