1#!/usr/bin/env python 2"""Sort a simple YAML file, keeping blocks of comments and definitions 3together. 4 5We assume a strict subset of YAML that looks like: 6 7 # block of header comments 8 # here that should always 9 # be at the top of the file 10 11 # optional comments 12 # can go here 13 key: value 14 key: value 15 16 key: value 17 18In other words, we don't sort deeper than the top layer, and might corrupt 19complicated YAML files. 20""" 21import argparse 22from typing import List 23from typing import Optional 24from typing import Sequence 25 26 27QUOTES = ["'", '"'] 28 29 30def sort(lines: List[str]) -> List[str]: 31 """Sort a YAML file in alphabetical order, keeping blocks together. 32 33 :param lines: array of strings (without newlines) 34 :return: sorted array of strings 35 """ 36 # make a copy of lines since we will clobber it 37 lines = list(lines) 38 new_lines = parse_block(lines, header=True) 39 40 for block in sorted(parse_blocks(lines), key=first_key): 41 if new_lines: 42 new_lines.append('') 43 new_lines.extend(block) 44 45 return new_lines 46 47 48def parse_block(lines: List[str], header: bool = False) -> List[str]: 49 """Parse and return a single block, popping off the start of `lines`. 50 51 If parsing a header block, we stop after we reach a line that is not a 52 comment. Otherwise, we stop after reaching an empty line. 53 54 :param lines: list of lines 55 :param header: whether we are parsing a header block 56 :return: list of lines that form the single block 57 """ 58 block_lines = [] 59 while lines and lines[0] and (not header or lines[0].startswith('#')): 60 block_lines.append(lines.pop(0)) 61 return block_lines 62 63 64def parse_blocks(lines: List[str]) -> List[List[str]]: 65 """Parse and return all possible blocks, popping off the start of `lines`. 66 67 :param lines: list of lines 68 :return: list of blocks, where each block is a list of lines 69 """ 70 blocks = [] 71 72 while lines: 73 if lines[0] == '': 74 lines.pop(0) 75 else: 76 blocks.append(parse_block(lines)) 77 78 return blocks 79 80 81def first_key(lines: List[str]) -> str: 82 """Returns a string representing the sort key of a block. 83 84 The sort key is the first YAML key we encounter, ignoring comments, and 85 stripping leading quotes. 86 87 >>> print(test) 88 # some comment 89 'foo': true 90 >>> first_key(test) 91 'foo' 92 """ 93 for line in lines: 94 if line.startswith('#'): 95 continue 96 if any(line.startswith(quote) for quote in QUOTES): 97 return line[1:] 98 return line 99 else: 100 return '' # not actually reached in reality 101 102 103def main(argv: Optional[Sequence[str]] = None) -> int: 104 parser = argparse.ArgumentParser() 105 parser.add_argument('filenames', nargs='*', help='Filenames to fix') 106 args = parser.parse_args(argv) 107 108 retval = 0 109 110 for filename in args.filenames: 111 with open(filename, 'r+') as f: 112 lines = [line.rstrip() for line in f.readlines()] 113 new_lines = sort(lines) 114 115 if lines != new_lines: 116 print(f'Fixing file `{filename}`') 117 f.seek(0) 118 f.write('\n'.join(new_lines) + '\n') 119 f.truncate() 120 retval = 1 121 122 return retval 123 124 125if __name__ == '__main__': 126 exit(main()) 127