Anthony Sottile | 8f61529 | 2022-01-15 19:24:05 -0500 | [diff] [blame] | 1 | from __future__ import annotations |
| 2 | |
Anthony Sottile | 53f1dc0 | 2015-01-04 13:06:21 -0800 | [diff] [blame] | 3 | import argparse |
| 4 | import io |
| 5 | import tokenize |
Anthony Sottile | f5c42a0 | 2020-02-05 11:10:42 -0800 | [diff] [blame] | 6 | from tokenize import tokenize as tokenize_tokenize |
Anthony Sottile | 030bfac | 2019-01-31 19:19:10 -0800 | [diff] [blame] | 7 | from typing import Sequence |
Anthony Sottile | 53f1dc0 | 2015-01-04 13:06:21 -0800 | [diff] [blame] | 8 | |
Anthony Sottile | f5c42a0 | 2020-02-05 11:10:42 -0800 | [diff] [blame] | 9 | NON_CODE_TOKENS = frozenset(( |
| 10 | tokenize.COMMENT, tokenize.ENDMARKER, tokenize.NEWLINE, tokenize.NL, |
| 11 | tokenize.ENCODING, |
| 12 | )) |
Anthony Sottile | 53f1dc0 | 2015-01-04 13:06:21 -0800 | [diff] [blame] | 13 | |
| 14 | |
Anthony Sottile | f5c42a0 | 2020-02-05 11:10:42 -0800 | [diff] [blame] | 15 | def check_docstring_first(src: bytes, filename: str = '<unknown>') -> int: |
Anthony Sottile | 53f1dc0 | 2015-01-04 13:06:21 -0800 | [diff] [blame] | 16 | """Returns nonzero if the source has what looks like a docstring that is |
| 17 | not at the beginning of the source. |
| 18 | |
| 19 | A string will be considered a docstring if it is a STRING token with a |
| 20 | col offset of 0. |
| 21 | """ |
| 22 | found_docstring_line = None |
| 23 | found_code_line = None |
| 24 | |
Anthony Sottile | 2f6a251 | 2019-03-30 15:31:42 -0700 | [diff] [blame] | 25 | tok_gen = tokenize_tokenize(io.BytesIO(src).readline) |
Anthony Sottile | 53f1dc0 | 2015-01-04 13:06:21 -0800 | [diff] [blame] | 26 | for tok_type, _, (sline, scol), _, _ in tok_gen: |
| 27 | # Looks like a docstring! |
| 28 | if tok_type == tokenize.STRING and scol == 0: |
| 29 | if found_docstring_line is not None: |
| 30 | print( |
Anthony Sottile | b13ff9b | 2022-04-06 16:55:26 -0400 | [diff] [blame] | 31 | f'{filename}:{sline}: Multiple module docstrings ' |
Anthony Sottile | f5c42a0 | 2020-02-05 11:10:42 -0800 | [diff] [blame] | 32 | f'(first docstring on line {found_docstring_line}).', |
Anthony Sottile | 53f1dc0 | 2015-01-04 13:06:21 -0800 | [diff] [blame] | 33 | ) |
| 34 | return 1 |
| 35 | elif found_code_line is not None: |
| 36 | print( |
Anthony Sottile | b13ff9b | 2022-04-06 16:55:26 -0400 | [diff] [blame] | 37 | f'{filename}:{sline}: Module docstring appears after code ' |
Anthony Sottile | f5c42a0 | 2020-02-05 11:10:42 -0800 | [diff] [blame] | 38 | f'(code seen on line {found_code_line}).', |
Anthony Sottile | 53f1dc0 | 2015-01-04 13:06:21 -0800 | [diff] [blame] | 39 | ) |
| 40 | return 1 |
| 41 | else: |
| 42 | found_docstring_line = sline |
| 43 | elif tok_type not in NON_CODE_TOKENS and found_code_line is None: |
| 44 | found_code_line = sline |
| 45 | |
| 46 | return 0 |
| 47 | |
| 48 | |
Anthony Sottile | 8f61529 | 2022-01-15 19:24:05 -0500 | [diff] [blame] | 49 | def main(argv: Sequence[str] | None = None) -> int: |
Anthony Sottile | 53f1dc0 | 2015-01-04 13:06:21 -0800 | [diff] [blame] | 50 | parser = argparse.ArgumentParser() |
| 51 | parser.add_argument('filenames', nargs='*') |
| 52 | args = parser.parse_args(argv) |
| 53 | |
| 54 | retv = 0 |
| 55 | |
| 56 | for filename in args.filenames: |
Anthony Sottile | 2f6a251 | 2019-03-30 15:31:42 -0700 | [diff] [blame] | 57 | with open(filename, 'rb') as f: |
Anthony Sottile | 5dc306b | 2018-06-18 00:00:38 -0700 | [diff] [blame] | 58 | contents = f.read() |
Anthony Sottile | 53f1dc0 | 2015-01-04 13:06:21 -0800 | [diff] [blame] | 59 | retv |= check_docstring_first(contents, filename=filename) |
| 60 | |
| 61 | return retv |