blob: 116b5c19797805839dd3ad3c596074ec463b6e4a [file] [log] [blame]
Daniel Gallagherb6eff3d2017-06-23 16:26:00 -07001"""Sort a simple YAML file, keeping blocks of comments and definitions
2together.
3
4We assume a strict subset of YAML that looks like:
5
6 # block of header comments
7 # here that should always
8 # be at the top of the file
9
10 # optional comments
11 # can go here
12 key: value
13 key: value
14
15 key: value
16
17In other words, we don't sort deeper than the top layer, and might corrupt
18complicated YAML files.
19"""
Anthony Sottile8f615292022-01-15 19:24:05 -050020from __future__ import annotations
21
Daniel Gallagherb6eff3d2017-06-23 16:26:00 -070022import argparse
Anthony Sottile030bfac2019-01-31 19:19:10 -080023from typing import Sequence
Daniel Gallagherb6eff3d2017-06-23 16:26:00 -070024
25
26QUOTES = ["'", '"']
27
28
Anthony Sottile8f615292022-01-15 19:24:05 -050029def sort(lines: list[str]) -> list[str]:
Daniel Gallagherb6eff3d2017-06-23 16:26:00 -070030 """Sort a YAML file in alphabetical order, keeping blocks together.
31
32 :param lines: array of strings (without newlines)
33 :return: sorted array of strings
34 """
35 # make a copy of lines since we will clobber it
36 lines = list(lines)
37 new_lines = parse_block(lines, header=True)
38
39 for block in sorted(parse_blocks(lines), key=first_key):
40 if new_lines:
41 new_lines.append('')
42 new_lines.extend(block)
43
44 return new_lines
45
46
Anthony Sottile8f615292022-01-15 19:24:05 -050047def parse_block(lines: list[str], header: bool = False) -> list[str]:
Daniel Gallagherb6eff3d2017-06-23 16:26:00 -070048 """Parse and return a single block, popping off the start of `lines`.
49
50 If parsing a header block, we stop after we reach a line that is not a
51 comment. Otherwise, we stop after reaching an empty line.
52
53 :param lines: list of lines
54 :param header: whether we are parsing a header block
55 :return: list of lines that form the single block
56 """
57 block_lines = []
58 while lines and lines[0] and (not header or lines[0].startswith('#')):
59 block_lines.append(lines.pop(0))
60 return block_lines
61
62
Anthony Sottile8f615292022-01-15 19:24:05 -050063def parse_blocks(lines: list[str]) -> list[list[str]]:
Daniel Gallagherb6eff3d2017-06-23 16:26:00 -070064 """Parse and return all possible blocks, popping off the start of `lines`.
65
66 :param lines: list of lines
67 :return: list of blocks, where each block is a list of lines
68 """
69 blocks = []
70
71 while lines:
72 if lines[0] == '':
73 lines.pop(0)
74 else:
75 blocks.append(parse_block(lines))
76
77 return blocks
78
79
Anthony Sottile8f615292022-01-15 19:24:05 -050080def first_key(lines: list[str]) -> str:
Daniel Gallagherb6eff3d2017-06-23 16:26:00 -070081 """Returns a string representing the sort key of a block.
82
83 The sort key is the first YAML key we encounter, ignoring comments, and
84 stripping leading quotes.
85
86 >>> print(test)
87 # some comment
88 'foo': true
89 >>> first_key(test)
90 'foo'
91 """
92 for line in lines:
93 if line.startswith('#'):
94 continue
95 if any(line.startswith(quote) for quote in QUOTES):
96 return line[1:]
97 return line
Anthony Sottile030bfac2019-01-31 19:19:10 -080098 else:
99 return '' # not actually reached in reality
Daniel Gallagherb6eff3d2017-06-23 16:26:00 -0700100
101
Anthony Sottile8f615292022-01-15 19:24:05 -0500102def main(argv: Sequence[str] | None = None) -> int:
Daniel Gallagherb6eff3d2017-06-23 16:26:00 -0700103 parser = argparse.ArgumentParser()
104 parser.add_argument('filenames', nargs='*', help='Filenames to fix')
105 args = parser.parse_args(argv)
106
107 retval = 0
108
109 for filename in args.filenames:
110 with open(filename, 'r+') as f:
111 lines = [line.rstrip() for line in f.readlines()]
112 new_lines = sort(lines)
113
114 if lines != new_lines:
Anthony Sottilef5c42a02020-02-05 11:10:42 -0800115 print(f'Fixing file `{filename}`')
Daniel Gallagherb6eff3d2017-06-23 16:26:00 -0700116 f.seek(0)
Anthony Sottile8626e262019-02-11 19:57:37 -0800117 f.write('\n'.join(new_lines) + '\n')
Daniel Gallagherb6eff3d2017-06-23 16:26:00 -0700118 f.truncate()
119 retval = 1
120
121 return retval
122
123
124if __name__ == '__main__':
Anthony Sottile39ab2ed2021-10-23 13:23:50 -0400125 raise SystemExit(main())