blob: 39f683e4844720f536c092a6913829b63be96aa6 [file] [log] [blame]
"""Sort a simple YAML file, keeping blocks of comments and definitions
together.
We assume a strict subset of YAML that looks like:
# block of header comments
# here that should always
# be at the top of the file
# optional comments
# can go here
key: value
key: value
key: value
In other words, we don't sort deeper than the top layer, and might corrupt
complicated YAML files.
"""
import argparse
from typing import List
from typing import Optional
from typing import Sequence
QUOTES = ["'", '"']
def sort(lines: List[str]) -> List[str]:
"""Sort a YAML file in alphabetical order, keeping blocks together.
:param lines: array of strings (without newlines)
:return: sorted array of strings
"""
# make a copy of lines since we will clobber it
lines = list(lines)
new_lines = parse_block(lines, header=True)
for block in sorted(parse_blocks(lines), key=first_key):
if new_lines:
new_lines.append('')
new_lines.extend(block)
return new_lines
def parse_block(lines: List[str], header: bool = False) -> List[str]:
"""Parse and return a single block, popping off the start of `lines`.
If parsing a header block, we stop after we reach a line that is not a
comment. Otherwise, we stop after reaching an empty line.
:param lines: list of lines
:param header: whether we are parsing a header block
:return: list of lines that form the single block
"""
block_lines = []
while lines and lines[0] and (not header or lines[0].startswith('#')):
block_lines.append(lines.pop(0))
return block_lines
def parse_blocks(lines: List[str]) -> List[List[str]]:
"""Parse and return all possible blocks, popping off the start of `lines`.
:param lines: list of lines
:return: list of blocks, where each block is a list of lines
"""
blocks = []
while lines:
if lines[0] == '':
lines.pop(0)
else:
blocks.append(parse_block(lines))
return blocks
def first_key(lines: List[str]) -> str:
"""Returns a string representing the sort key of a block.
The sort key is the first YAML key we encounter, ignoring comments, and
stripping leading quotes.
>>> print(test)
# some comment
'foo': true
>>> first_key(test)
'foo'
"""
for line in lines:
if line.startswith('#'):
continue
if any(line.startswith(quote) for quote in QUOTES):
return line[1:]
return line
else:
return '' # not actually reached in reality
def main(argv: Optional[Sequence[str]] = None) -> int:
parser = argparse.ArgumentParser()
parser.add_argument('filenames', nargs='*', help='Filenames to fix')
args = parser.parse_args(argv)
retval = 0
for filename in args.filenames:
with open(filename, 'r+') as f:
lines = [line.rstrip() for line in f.readlines()]
new_lines = sort(lines)
if lines != new_lines:
print(f'Fixing file `{filename}`')
f.seek(0)
f.write('\n'.join(new_lines) + '\n')
f.truncate()
retval = 1
return retval
if __name__ == '__main__':
raise SystemExit(main())