pre_commit_hooks/check_docstring_first.py - pre-commit-hooks - Git at Google

 from __future__ import annotations

 import argparse
 import io
 import tokenize
 from tokenize import tokenize as tokenize_tokenize
 from typing import Sequence

 NON_CODE_TOKENS = frozenset((
     tokenize.COMMENT, tokenize.ENDMARKER, tokenize.NEWLINE, tokenize.NL,
     tokenize.ENCODING,
 ))


 def check_docstring_first(src: bytes, filename: str = '<unknown>') -> int:
     """Returns nonzero if the source has what looks like a docstring that is
     not at the beginning of the source.

     A string will be considered a docstring if it is a STRING token with a
     col offset of 0.
     """
     found_docstring_line = None
     found_code_line = None

     tok_gen = tokenize_tokenize(io.BytesIO(src).readline)
     for tok_type, _, (sline, scol), _, _ in tok_gen:
         # Looks like a docstring!
         if tok_type == tokenize.STRING and scol == 0:
             if found_docstring_line is not None:
                 print(
                     f'{filename}:{sline}: Multiple module docstrings '
                     f'(first docstring on line {found_docstring_line}).',
                 )
                 return 1
             elif found_code_line is not None:
                 print(
                     f'{filename}:{sline}: Module docstring appears after code '
                     f'(code seen on line {found_code_line}).',
                 )
                 return 1
             else:
                 found_docstring_line = sline
         elif tok_type not in NON_CODE_TOKENS and found_code_line is None:
             found_code_line = sline

     return 0


 def main(argv: Sequence[str] | None = None) -> int:
     parser = argparse.ArgumentParser()
     parser.add_argument('filenames', nargs='*')
     args = parser.parse_args(argv)

     retv = 0

     for filename in args.filenames:
         with open(filename, 'rb') as f:
             contents = f.read()
         retv |= check_docstring_first(contents, filename=filename)

     return retv
	from __future__ import annotations

	import argparse
	import io
	import tokenize
	from tokenize import tokenize as tokenize_tokenize
	from typing import Sequence

	NON_CODE_TOKENS = frozenset((
	tokenize.COMMENT, tokenize.ENDMARKER, tokenize.NEWLINE, tokenize.NL,
	tokenize.ENCODING,
	))


	def check_docstring_first(src: bytes, filename: str = '<unknown>') -> int:
	"""Returns nonzero if the source has what looks like a docstring that is
	not at the beginning of the source.

	A string will be considered a docstring if it is a STRING token with a
	col offset of 0.
	"""
	found_docstring_line = None
	found_code_line = None

	tok_gen = tokenize_tokenize(io.BytesIO(src).readline)
	for tok_type, _, (sline, scol), _, _ in tok_gen:
	# Looks like a docstring!
	if tok_type == tokenize.STRING and scol == 0:
	if found_docstring_line is not None:
	print(
	f'{filename}:{sline}: Multiple module docstrings '
	f'(first docstring on line {found_docstring_line}).',
	)
	return 1
	elif found_code_line is not None:
	print(
	f'{filename}:{sline}: Module docstring appears after code '
	f'(code seen on line {found_code_line}).',
	)
	return 1
	else:
	found_docstring_line = sline
	elif tok_type not in NON_CODE_TOKENS and found_code_line is None:
	found_code_line = sline

	return 0


	def main(argv: Sequence[str] \| None = None) -> int:
	parser = argparse.ArgumentParser()
	parser.add_argument('filenames', nargs='*')
	args = parser.parse_args(argv)

	retv = 0

	for filename in args.filenames:
	with open(filename, 'rb') as f:
	contents = f.read()
	retv \|= check_docstring_first(contents, filename=filename)

	return retv