codespell_lib/_codespell.py - codespell - Git at Google

 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation; version 2 of the License.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, see
 # https://www.gnu.org/licenses/old-licenses/gpl-2.0.html.
 """
 Copyright (C) 2010-2011  Lucas De Marchi <lucas.de.marchi@gmail.com>
 Copyright (C) 2011  ProFUSION embedded systems
 """

 import argparse
 import configparser
 import ctypes
 import fnmatch
 import itertools
 import os
 import re
 import sys
 import textwrap
 from typing import (
     Any,
     Dict,
     Iterable,
     List,
     Match,
     Optional,
     Pattern,
     Sequence,
     Set,
     TextIO,
     Tuple,
 )

 if sys.platform == "win32":
     from ctypes import wintypes

     ENABLE_VIRTUAL_TERMINAL_PROCESSING = 0x0004
     STD_OUTPUT_HANDLE = wintypes.HANDLE(-11)

 from ._spellchecker import Misspelling, build_dict
 from ._text_util import fix_case

 # autogenerated by setuptools_scm
 from ._version import (  # type: ignore[import-not-found]
     __version__ as VERSION,  # noqa: N812
 )

 word_regex_def = r"[\w\-'’]+"  # noqa: RUF001
 # While we want to treat characters like ( or " as okay for a starting break,
 # these may occur unescaped in URIs, and so we are more restrictive on the
 # endpoint.  Emails are more restrictive, so the endpoint remains flexible.
 uri_regex_def = (
     "(\\b(?:https?|[ts]?ftp|file|git|smb)://[^\\s]+(?=$|\\s)|\\b[\\w.%+-]+@[\\w.-]+\\b)"
 )
 inline_ignore_regex = re.compile(r"[^\w\s]\s?codespell:ignore\b(\s+(?P<words>[\w,]*))?")
 USAGE = """
 \t%prog [OPTIONS] [file1 file2 ... fileN]
 """

 supported_languages_en = ("en", "en_GB", "en_US", "en_CA", "en_AU")
 supported_languages = supported_languages_en

 # Users might want to link this file into /usr/local/bin, so we resolve the
 # symbolic link path to the real path if necessary.
 _data_root = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data")
 _builtin_dictionaries = (
     # name, desc, name, err in aspell, correction in aspell, \
     # err dictionary array, rep dictionary array
     # The arrays must contain the names of aspell dictionaries
     # The aspell tests here aren't the ideal state, but the None's are
     # realistic for obscure words
     ("clear", "for unambiguous errors", "", False, None, supported_languages_en, None),
     (
         "rare",
         "for rare (but valid) words that are likely to be errors",
         "_rare",
         None,
         None,
         None,
         None,
     ),
     (
         "informal",
         "for making informal words more formal",
         "_informal",
         True,
         True,
         supported_languages_en,
         supported_languages_en,
     ),
     (
         "usage",
         "for replacing phrasing with recommended terms",
         "_usage",
         None,
         None,
         None,
         None,
     ),
     (
         "code",
         "for words from code and/or mathematics that are likely to be typos in other contexts (such as uint)",  # noqa: E501
         "_code",
         None,
         None,
         None,
         None,
     ),
     (
         "names",
         "for valid proper names that might be typos",
         "_names",
         None,
         None,
         None,
         None,
     ),
     (
         "en-GB_to_en-US",
         "for corrections from en-GB to en-US",
         "_en-GB_to_en-US",
         True,
         True,
         ("en_GB",),
         ("en_US",),
     ),
 )
 _builtin_default = "clear,rare"

 # docs say os.EX_USAGE et al. are only available on Unix systems, so to be safe
 # we protect and just use the values they are on macOS and Linux
 EX_OK = 0
 EX_USAGE = 64
 EX_DATAERR = 65
 EX_CONFIG = 78

 # OPTIONS:
 #
 # ARGUMENTS:
 #    dict_filename       The file containing the dictionary of misspellings.
 #                        If set to '-', it will be read from stdin
 #    file1 .. fileN      Files to check spelling


 class QuietLevels:
     NONE = 0
     ENCODING = 1
     BINARY_FILE = 2
     DISABLED_FIXES = 4
     NON_AUTOMATIC_FIXES = 8
     FIXES = 16
     CONFIG_FILES = 32


 class GlobMatch:
     def __init__(self, pattern: List[str]) -> None:
         self.pattern_list: List[str] = pattern

     def match(self, filename: str) -> bool:
         return any(fnmatch.fnmatch(filename, p) for p in self.pattern_list)


 class TermColors:
     def __init__(self) -> None:
         self.FILE = "\033[33m"
         self.WWORD = "\033[31m"
         self.FWORD = "\033[32m"
         self.DISABLE = "\033[0m"

     def disable(self) -> None:
         self.FILE = ""
         self.WWORD = ""
         self.FWORD = ""
         self.DISABLE = ""


 class Summary:
     def __init__(self) -> None:
         self.summary: Dict[str, int] = {}

     def update(self, wrongword: str) -> None:
         if wrongword in self.summary:
             self.summary[wrongword] += 1
         else:
             self.summary[wrongword] = 1

     def __str__(self) -> str:
         keys = list(self.summary.keys())
         keys.sort()

         return "\n".join(
             [f"{key}{self.summary.get(key):{15 - len(key)}}" for key in keys]
         )


 class FileOpener:
     def __init__(
         self,
         use_chardet: bool,
         quiet_level: int,
         ignore_multiline_regex: Optional[Pattern[str]],
     ) -> None:
         self.use_chardet = use_chardet
         if use_chardet:
             self.init_chardet()
         self.quiet_level = quiet_level
         self.ignore_multiline_regex = ignore_multiline_regex

     def init_chardet(self) -> None:
         try:
             from chardet.universaldetector import UniversalDetector
         except ImportError as e:
             msg = (
                 "There's no chardet installed to import from. "
                 "Please, install it and check your PYTHONPATH "
                 "environment variable"
             )
             raise ImportError(msg) from e

         self.encdetector = UniversalDetector()

     def open(self, filename: str) -> Tuple[List[str], str]:
         if self.use_chardet:
             return self.open_with_chardet(filename)
         return self.open_with_internal(filename)

     def open_with_chardet(self, filename: str) -> Tuple[List[str], str]:
         self.encdetector.reset()
         with open(filename, "rb") as fb:
             for line in fb:
                 self.encdetector.feed(line)
                 if self.encdetector.done:
                     break
         self.encdetector.close()
         encoding = self.encdetector.result["encoding"]

         try:
             f = open(filename, encoding=encoding, newline="")
         except UnicodeDecodeError:
             print(f"ERROR: Could not detect encoding: {filename}", file=sys.stderr)
             raise
         except LookupError:
             print(
                 f"ERROR: Don't know how to handle encoding {encoding}: {filename}",
                 file=sys.stderr,
             )
             raise
         else:
             lines = self.get_lines(f)
             f.close()

         return lines, f.encoding

     def open_with_internal(self, filename: str) -> Tuple[List[str], str]:
         encoding = None
         first_try = True
         for encoding in ("utf-8", "iso-8859-1"):
             if first_try:
                 first_try = False
             elif not self.quiet_level & QuietLevels.ENCODING:
                 print(f'WARNING: Trying next encoding "{encoding}"', file=sys.stderr)
             with open(filename, encoding=encoding, newline="") as f:
                 try:
                     lines = self.get_lines(f)
                 except UnicodeDecodeError:
                     if not self.quiet_level & QuietLevels.ENCODING:
                         print(
                             f'WARNING: Cannot decode file using encoding "{encoding}": '
                             f"{filename}",
                             file=sys.stderr,
                         )
                 else:
                     break
         else:
             # reading with encoding "iso-8859-1" cannot fail with UnicodeDecodeError
             msg = "Unknown encoding"
             raise RuntimeError(msg)  # pragma: no cover

         return lines, encoding

     def get_lines(self, f: TextIO) -> List[str]:
         if self.ignore_multiline_regex:
             text = f.read()
             pos = 0
             text2 = ""
             for m in re.finditer(self.ignore_multiline_regex, text):
                 text2 += text[pos : m.start()]
                 # Replace with blank lines so line numbers are unchanged.
                 text2 += "\n" * m.group().count("\n")
                 pos = m.end()
             text2 += text[pos:]
             lines = text2.split("\n")
         else:
             lines = f.readlines()
         return lines


 # -.-:-.-:-.-:-.:-.-:-.-:-.-:-.-:-.:-.-:-.-:-.-:-.-:-.:-.-:-


 # If someday this breaks, we can just switch to using RawTextHelpFormatter,
 # but it has the disadvantage of not wrapping our long lines.


 class NewlineHelpFormatter(argparse.HelpFormatter):
     """Help formatter that preserves newlines and deals with lists."""

     def _split_lines(self, text: str, width: int) -> List[str]:
         parts = text.split("\n")
         out = []
         for part in parts:
             # Eventually we could allow others...
             indent_start = "- "
             offset = len(indent_start) if part.startswith(indent_start) else 0
             part = part[offset:]
             part = self._whitespace_matcher.sub(" ", part).strip()
             parts = textwrap.wrap(part, width - offset)
             parts = [" " * offset + p for p in parts]
             if offset:
                 parts[0] = indent_start + parts[0][offset:]
             out.extend(parts)
         return out


 def _toml_to_parseconfig(toml_dict: Dict[str, Any]) -> Dict[str, Any]:
     """Convert a dict read from a TOML file to the parseconfig.read_dict() format."""
     return {
         k: "" if v is True else ",".join(v) if isinstance(v, list) else v
         for k, v in toml_dict.items()
         if v is not False
     }


 def _supports_ansi_colors() -> bool:
     if sys.platform == "win32":
         # Windows Terminal enables ANSI escape codes by default. In other cases
         # it is disabled.
         # See https://ss64.com/nt/syntax-ansi.html for more information.
         kernel32 = ctypes.WinDLL("kernel32")

         # fmt: off
         kernel32.GetConsoleMode.argtypes = (
             wintypes.HANDLE,   # _In_  hConsoleHandle
             wintypes.LPDWORD,  # _Out_ lpMode
         )
         # fmt: on
         kernel32.GetConsoleMode.restype = wintypes.BOOL

         mode = wintypes.DWORD()
         handle = kernel32.GetStdHandle(STD_OUTPUT_HANDLE)
         if not kernel32.GetConsoleMode(handle, ctypes.byref(mode)):
             # TODO: print a warning with the error message on stderr?
             return False

         return (mode.value & ENABLE_VIRTUAL_TERMINAL_PROCESSING) != 0
     elif sys.platform == "wasi":
         # WASI disables ANSI escape codes for security reasons.
         # See https://github.com/WebAssembly/WASI/issues/162.
         return False
     elif sys.stdout.isatty():
         return True

     return False


 def parse_options(
     args: Sequence[str],
 ) -> Tuple[argparse.Namespace, argparse.ArgumentParser, List[str]]:
     parser = argparse.ArgumentParser(formatter_class=NewlineHelpFormatter)

     parser.set_defaults(colors=_supports_ansi_colors())
     parser.add_argument("--version", action="version", version=VERSION)

     parser.add_argument(
         "-d",
         "--disable-colors",
         action="store_false",
         dest="colors",
         help="disable colors, even when printing to terminal",
     )
     parser.add_argument(
         "-c",
         "--enable-colors",
         action="store_true",
         dest="colors",
         help="enable colors, even when not printing to terminal",
     )

     parser.add_argument(
         "-w",
         "--write-changes",
         action="store_true",
         default=False,
         help="write changes in place if possible",
     )

     parser.add_argument(
         "-D",
         "--dictionary",
         action="append",
         help="comma-separated list of custom dictionary files that "
         "contain spelling corrections. If this flag is not specified "
         'or equals "-" then the default dictionary is used.',
     )
     builtin_opts = "\n- ".join(
         [""] + [f"{d[0]!r} {d[1]}" for d in _builtin_dictionaries]
     )
     parser.add_argument(
         "--builtin",
         dest="builtin",
         default=_builtin_default,
         metavar="BUILTIN-LIST",
         help="comma-separated list of builtin dictionaries "
         'to include (when "-D -" or no "-D" is passed). '
         "Current options are:" + builtin_opts + "\n"
         "The default is %(default)r.",
     )
     parser.add_argument(
         "--ignore-regex",
         action="store",
         type=str,
         help="regular expression that is used to find "
         "patterns to ignore by treating as whitespace. "
         "When writing regular expressions, consider "
         "ensuring there are boundary non-word chars, "
         'e.g., "\\bmatch\\b". Defaults to '
         "empty/disabled.",
     )
     parser.add_argument(
         "--ignore-multiline-regex",
         action="store",
         type=str,
         help="regular expression that is used to ignore "
         "text that may span multi-line regions. "
         "The regex is run with re.DOTALL. For example to "
         "allow skipping of regions of Python code using "
         "begin/end comments one could use: "
         "--ignore-multiline-regex "
         "'# codespell:ignore-begin *\\n.*# codespell:ignore-end *\\n'. "
         "Defaults to empty/disabled.",
     )
     parser.add_argument(
         "-I",
         "--ignore-words",
         action="append",
         metavar="FILES",
         help="comma-separated list of files that contain "
         "words to be ignored by codespell. Files must contain "
         "1 word per line. Words are case sensitive based on "
         "how they are written in the dictionary file.",
     )
     parser.add_argument(
         "-L",
         "--ignore-words-list",
         action="append",
         metavar="WORDS",
         help="comma-separated list of words to be ignored "
         "by codespell. Words are case sensitive based on "
         "how they are written in the dictionary file.",
     )
     parser.add_argument(
         "--uri-ignore-words-list",
         action="append",
         metavar="WORDS",
         help="comma-separated list of words to be ignored "
         "by codespell in URIs and emails only. Words are "
         "case sensitive based on how they are written in "
         'the dictionary file. If set to "*", all '
         "misspelling in URIs and emails will be ignored.",
     )
     parser.add_argument(
         "-r",
         "--regex",
         action="store",
         type=str,
         help="regular expression that is used to find words. "
         "By default any alphanumeric character, the "
         "underscore, the hyphen, and the apostrophe are "
         "used to build words. This option cannot be "
         "specified together with --write-changes.",
     )
     parser.add_argument(
         "--uri-regex",
         action="store",
         type=str,
         help="regular expression that is used to find URIs "
         "and emails. A default expression is provided.",
     )
     parser.add_argument(
         "-s",
         "--summary",
         action="store_true",
         default=False,
         help="print summary of fixes",
     )

     parser.add_argument(
         "--count",
         action="store_true",
         default=False,
         help="print the number of errors as the last line of stderr",
     )

     parser.add_argument(
         "-S",
         "--skip",
         action="append",
         help="comma-separated list of files to skip. It "
         "accepts globs as well. E.g.: if you want "
         "codespell to skip .eps and .txt files, "
         'you\'d give "*.eps,*.txt" to this option.',
     )

     parser.add_argument(
         "-x",
         "--exclude-file",
         action="append",
         type=str,
         metavar="FILES",
         help="ignore whole lines that match those in "
         "the comma-separated list of files EXCLUDE. "
         "The lines in these files should match the "
         "to-be-excluded lines exactly",
     )

     parser.add_argument(
         "-i",
         "--interactive",
         action="store",
         type=int,
         default=0,
         choices=range(0, 4),
         help="set interactive mode when writing changes:\n"
         "- 0: no interactivity.\n"
         "- 1: ask for confirmation.\n"
         "- 2: ask user to choose one fix when more than one is available.\n"
         "- 3: both 1 and 2",
         metavar="MODE",
     )

     parser.add_argument(
         "-q",
         "--quiet-level",
         action="store",
         type=int,
         default=34,
         choices=range(0, 64),
         help="bitmask that allows suppressing messages:\n"
         "- 0: print all messages.\n"
         "- 1: disable warnings about wrong encoding.\n"
         "- 2: disable warnings about binary files.\n"
         "- 4: omit warnings about automatic fixes that were disabled in the dictionary.\n"  # noqa: E501
         "- 8: don't print anything for non-automatic fixes.\n"
         "- 16: don't print the list of fixed files.\n"
         "- 32: don't print configuration files.\n"
         "As usual with bitmasks, these levels can be "
         "combined; e.g. use 3 for levels 1+2, 7 for "
         "1+2+4, 23 for 1+2+4+16, etc. "
         "The default mask is %(default)s.",
         metavar="LEVEL",
     )

     parser.add_argument(
         "-e",
         "--hard-encoding-detection",
         action="store_true",
         default=False,
         help="use chardet to detect the encoding of each "
         "file. This can slow down codespell, but is more "
         "reliable in detecting encodings other than "
         "utf-8, iso8859-1, and ascii.",
     )

     parser.add_argument(
         "-f",
         "--check-filenames",
         action="store_true",
         default=False,
         help="check file names as well",
     )

     parser.add_argument(
         "-H",
         "--check-hidden",
         action="store_true",
         default=False,
         help='check hidden files and directories (those starting with ".") as well.',
     )
     parser.add_argument(
         "-A",
         "--after-context",
         type=int,
         metavar="LINES",
         help="print LINES of trailing context",
     )
     parser.add_argument(
         "-B",
         "--before-context",
         type=int,
         metavar="LINES",
         help="print LINES of leading context",
     )
     parser.add_argument(
         "-C",
         "--context",
         type=int,
         metavar="LINES",
         help="print LINES of surrounding context",
     )
     parser.add_argument(
         "--stdin-single-line",
         action="store_true",
         help="output just a single line for each misspelling in stdin mode",
     )
     parser.add_argument("--config", type=str, help="path to config file.")
     parser.add_argument("--toml", type=str, help="path to a pyproject.toml file.")
     parser.add_argument("files", nargs="*", help="files or directories to check")

     # Parse command line options.
     options = parser.parse_args(list(args))

     # Load config files and look for ``codespell`` options.
     cfg_files = ["setup.cfg", ".codespellrc"]
     if options.config:
         cfg_files.append(options.config)
     config = configparser.ConfigParser(interpolation=None)

     # Read toml before other config files.
     toml_files = []
     tomllib_raise_error = False
     if os.path.isfile("pyproject.toml"):
         toml_files.append("pyproject.toml")
     if options.toml:
         toml_files.append(options.toml)
         tomllib_raise_error = True
     if toml_files:
         if sys.version_info >= (3, 11):
             import tomllib
         else:
             try:
                 import tomli as tomllib  # type: ignore[no-redef]
             except ImportError as e:
                 if tomllib_raise_error:
                     msg = (
                         f"tomllib or tomli are required to read pyproject.toml "
                         f"but could not be imported, got: {e}"
                     )
                     raise ImportError(msg) from None
                 tomllib = None  # type: ignore[assignment]
         if tomllib is not None:
             for toml_file in toml_files:
                 with open(toml_file, "rb") as f:
                     data = tomllib.load(f).get("tool", {})
                 if "codespell" in data:
                     data["codespell"] = _toml_to_parseconfig(data["codespell"])
                 config.read_dict(data)

     # Collect which config files are going to be used
     used_cfg_files = []
     for cfg_file in cfg_files:
         _cfg = configparser.ConfigParser()
         _cfg.read(cfg_file)
         if _cfg.has_section("codespell"):
             used_cfg_files.append(cfg_file)

     # Use config files
     config.read(used_cfg_files)
     if config.has_section("codespell"):
         # Build a "fake" argv list using option name and value.
         cfg_args = []
         for key in config["codespell"]:
             # Add option as arg.
             cfg_args.append(f"--{key}")
             # If value is blank, skip.
             val = config["codespell"][key]
             if val:
                 cfg_args.append(val)

         # Parse config file options.
         options = parser.parse_args(cfg_args)

         # Re-parse command line options to override config.
         options = parser.parse_args(list(args), namespace=options)

     if not options.files:
         options.files.append(".")

     return options, parser, used_cfg_files


 def process_ignore_words(
     words: Iterable[str], ignore_words: Set[str], ignore_words_cased: Set[str]
 ) -> None:
     for word in words:
         word = word.strip()
         if word == word.lower():
             ignore_words.add(word)
         else:
             ignore_words_cased.add(word)


 def parse_ignore_words_option(
     ignore_words_option: List[str],
 ) -> Tuple[Set[str], Set[str]]:
     ignore_words: Set[str] = set()
     ignore_words_cased: Set[str] = set()
     if ignore_words_option:
         for comma_separated_words in ignore_words_option:
             process_ignore_words(
                 (word.strip() for word in comma_separated_words.split(",")),
                 ignore_words,
                 ignore_words_cased,
             )
     return (ignore_words, ignore_words_cased)


 def build_exclude_hashes(filename: str, exclude_lines: Set[str]) -> None:
     with open(filename, encoding="utf-8") as f:
         exclude_lines.update(line.rstrip() for line in f)


 def build_ignore_words(
     filename: str, ignore_words: Set[str], ignore_words_cased: Set[str]
 ) -> None:
     with open(filename, encoding="utf-8") as f:
         process_ignore_words(
             (line.strip() for line in f), ignore_words, ignore_words_cased
         )


 def is_hidden(filename: str, check_hidden: bool) -> bool:
     bfilename = os.path.basename(filename)

     return bfilename not in ("", ".", "..") and (
         not check_hidden and bfilename[0] == "."
     )


 def is_text_file(filename: str) -> bool:
     with open(filename, mode="rb") as f:
         s = f.read(1024)
     return b"\x00" not in s


 def ask_for_word_fix(
     line: str,
     match: Match[str],
     misspelling: Misspelling,
     interactivity: int,
     colors: TermColors,
 ) -> Tuple[bool, str]:
     wrongword = match.group()
     if interactivity <= 0:
         return misspelling.fix, fix_case(wrongword, misspelling.data)

     line_ui = (
         f"{line[: match.start()]}"
         f"{colors.WWORD}{wrongword}{colors.DISABLE}"
         f"{line[match.end() :]}"
     )

     if misspelling.fix and interactivity & 1:
         r = ""
         fixword = fix_case(wrongword, misspelling.data)
         while not r:
             print(f"{line_ui}\t{wrongword} ==> {fixword} (Y/n) ", end="", flush=True)
             r = sys.stdin.readline().strip().upper()
             if not r:
                 r = "Y"
             if r not in ("Y", "N"):
                 print("Say 'y' or 'n'")
                 r = ""

         if r == "N":
             misspelling.fix = False

     elif (interactivity & 2) and not misspelling.reason:
         # if it is not disabled, i.e. it just has more than one possible fix,
         # we ask the user which word to use

         r = ""
         opt = [w.strip() for w in misspelling.data.split(",")]
         while not r:
             print(f"{line_ui} Choose an option (blank for none): ", end="")
             for i, o in enumerate(opt):
                 fixword = fix_case(wrongword, o)
                 print(f" {i}) {fixword}", end="")
             print(": ", end="", flush=True)

             n = sys.stdin.readline().strip()
             if not n:
                 break

             try:
                 i = int(n)
                 r = opt[i]
             except (ValueError, IndexError):
                 print("Not a valid option\n")

         if r:
             misspelling.fix = True
             misspelling.data = r

     return misspelling.fix, fix_case(wrongword, misspelling.data)


 def print_context(
     lines: List[str],
     index: int,
     context: Tuple[int, int],
 ) -> None:
     # context = (context_before, context_after)
     for i in range(index - context[0], index + context[1] + 1):
         if 0 <= i < len(lines):
             print(f"{'>' if i == index else ':'} {lines[i].rstrip()}")


 def _ignore_word_sub(
     text: str,
     ignore_word_regex: Optional[Pattern[str]],
 ) -> str:
     if ignore_word_regex:
         text = ignore_word_regex.sub(" ", text)
     return text


 def extract_words(
     text: str,
     word_regex: Pattern[str],
     ignore_word_regex: Optional[Pattern[str]],
 ) -> List[str]:
     return word_regex.findall(_ignore_word_sub(text, ignore_word_regex))


 def extract_words_iter(
     text: str,
     word_regex: Pattern[str],
     ignore_word_regex: Optional[Pattern[str]],
 ) -> List[Match[str]]:
     return list(word_regex.finditer(_ignore_word_sub(text, ignore_word_regex)))


 def apply_uri_ignore_words(
     check_matches: List[Match[str]],
     line: str,
     word_regex: Pattern[str],
     ignore_word_regex: Optional[Pattern[str]],
     uri_regex: Pattern[str],
     uri_ignore_words: Set[str],
 ) -> List[Match[str]]:
     if not uri_ignore_words:
         return check_matches
     for uri in uri_regex.findall(line):
         for uri_word in extract_words(uri, word_regex, ignore_word_regex):
             if uri_word in uri_ignore_words:
                 # determine/remove only the first among matches
                 for i, match in enumerate(check_matches):
                     if match.group() == uri_word:
                         check_matches = check_matches[:i] + check_matches[i + 1 :]
                         break
     return check_matches


 def parse_file(
     filename: str,
     colors: TermColors,
     summary: Optional[Summary],
     misspellings: Dict[str, Misspelling],
     ignore_words_cased: Set[str],
     exclude_lines: Set[str],
     file_opener: FileOpener,
     word_regex: Pattern[str],
     ignore_word_regex: Optional[Pattern[str]],
     uri_regex: Pattern[str],
     uri_ignore_words: Set[str],
     context: Optional[Tuple[int, int]],
     options: argparse.Namespace,
 ) -> int:
     bad_count = 0
     lines = None
     changed = False

     if filename == "-":
         f = sys.stdin
         encoding = "utf-8"
         lines = f.readlines()
     else:
         if options.check_filenames:
             for word in extract_words(filename, word_regex, ignore_word_regex):
                 if word in ignore_words_cased:
                     continue
                 lword = word.lower()
                 if lword not in misspellings:
                     continue
                 fix = misspellings[lword].fix
                 fixword = fix_case(word, misspellings[lword].data)

                 if summary and fix:
                     summary.update(lword)

                 cfilename = f"{colors.FILE}{filename}{colors.DISABLE}"
                 cwrongword = f"{colors.WWORD}{word}{colors.DISABLE}"
                 crightword = f"{colors.FWORD}{fixword}{colors.DISABLE}"

                 reason = misspellings[lword].reason
                 if reason:
                     if options.quiet_level & QuietLevels.DISABLED_FIXES:
                         continue
                     creason = f"  | {colors.FILE}{reason}{colors.DISABLE}"
                 else:
                     if options.quiet_level & QuietLevels.NON_AUTOMATIC_FIXES:
                         continue
                     creason = ""

                 bad_count += 1

                 print(f"{cfilename}: {cwrongword} ==> {crightword}{creason}")

         # ignore irregular files
         if not os.path.isfile(filename):
             return bad_count

         try:
             text = is_text_file(filename)
         except PermissionError as e:
             print(f"WARNING: {e.strerror}: {filename}", file=sys.stderr)
             return bad_count
         except OSError:
             return bad_count

         if not text:
             if not options.quiet_level & QuietLevels.BINARY_FILE:
                 print(f"WARNING: Binary file: {filename}", file=sys.stderr)
             return bad_count
         try:
             lines, encoding = file_opener.open(filename)
         except OSError:
             return bad_count

     for i, line in enumerate(lines):
         if line.rstrip() in exclude_lines:
             continue

         extra_words_to_ignore = set()
         match = inline_ignore_regex.search(line)
         if match:
             extra_words_to_ignore = set(
                 filter(None, (match.group("words") or "").split(","))
             )
             if not extra_words_to_ignore:
                 continue

         fixed_words = set()
         asked_for = set()

         # If all URI spelling errors will be ignored, erase any URI before
         # extracting words. Otherwise, apply ignores after extracting words.
         # This ensures that if a URI ignore word occurs both inside a URI and
         # outside, it will still be a spelling error.
         if "*" in uri_ignore_words:
             line = uri_regex.sub(" ", line)
         check_matches = extract_words_iter(line, word_regex, ignore_word_regex)
         if "*" not in uri_ignore_words:
             check_matches = apply_uri_ignore_words(
                 check_matches,
                 line,
                 word_regex,
                 ignore_word_regex,
                 uri_regex,
                 uri_ignore_words,
             )
         for match in check_matches:
             word = match.group()
             if word in ignore_words_cased:
                 continue
             lword = word.lower()
             if lword in misspellings and lword not in extra_words_to_ignore:
                 # Sometimes we find a 'misspelling' which is actually a valid word
                 # preceded by a string escape sequence.  Ignore such cases as
                 # they're usually false alarms; see issue #17 among others.
                 char_before_idx = match.start() - 1
                 if (
                     char_before_idx >= 0
                     and line[char_before_idx] == "\\"
                     # bell, backspace, formfeed, newline, carriage-return, tab, vtab.
                     and word.startswith(("a", "b", "f", "n", "r", "t", "v"))
                     and lword[1:] not in misspellings
                 ):
                     continue

                 context_shown = False
                 fix = misspellings[lword].fix
                 fixword = fix_case(word, misspellings[lword].data)

                 if options.interactive and lword not in asked_for:
                     if context is not None:
                         context_shown = True
                         print_context(lines, i, context)
                     fix, fixword = ask_for_word_fix(
                         lines[i],
                         match,
                         misspellings[lword],
                         options.interactive,
                         colors=colors,
                     )
                     asked_for.add(lword)

                 if summary and fix:
                     summary.update(lword)

                 if word in fixed_words:  # can skip because of re.sub below
                     continue

                 if options.write_changes and fix:
                     changed = True
                     lines[i] = re.sub(rf"\b{word}\b", fixword, lines[i])
                     fixed_words.add(word)
                     continue

                 # otherwise warning was explicitly set by interactive mode
                 if (
                     options.interactive & 2
                     and not fix
                     and not misspellings[lword].reason
                 ):
                     continue

                 cfilename = f"{colors.FILE}{filename}{colors.DISABLE}"
                 cline = f"{colors.FILE}{i + 1}{colors.DISABLE}"
                 cwrongword = f"{colors.WWORD}{word}{colors.DISABLE}"
                 crightword = f"{colors.FWORD}{fixword}{colors.DISABLE}"

                 reason = misspellings[lword].reason
                 if reason:
                     if options.quiet_level & QuietLevels.DISABLED_FIXES:
                         continue
                     creason = f"  | {colors.FILE}{reason}{colors.DISABLE}"
                 else:
                     if options.quiet_level & QuietLevels.NON_AUTOMATIC_FIXES:
                         continue
                     creason = ""

                 # If we get to this point (uncorrected error) we should change
                 # our bad_count and thus return value
                 bad_count += 1

                 if (not context_shown) and (context is not None):
                     print_context(lines, i, context)
                 if filename != "-":
                     print(
                         f"{cfilename}:{cline}: {cwrongword} ==> {crightword}{creason}"
                     )
                 elif options.stdin_single_line:
                     print(f"{cline}: {cwrongword} ==> {crightword}{creason}")
                 else:
                     print(
                         f"{cline}: {line.strip()}\n\t{cwrongword} "
                         f"==> {crightword}{creason}"
                     )

     if changed:
         if filename == "-":
             print("---")
             for line in lines:
                 print(line, end="")
         else:
             if not options.quiet_level & QuietLevels.FIXES:
                 print(
                     f"{colors.FWORD}FIXED:{colors.DISABLE} {filename}",
                     file=sys.stderr,
                 )
             with open(filename, "w", encoding=encoding, newline="") as f:
                 f.writelines(lines)
     return bad_count


 def flatten_clean_comma_separated_arguments(
     arguments: Iterable[str],
 ) -> List[str]:
     """
     >>> flatten_clean_comma_separated_arguments(["a, b ,\n c, d,", "e"])
     ['a', 'b', 'c', 'd', 'e']
     >>> flatten_clean_comma_separated_arguments([])
     []
     """
     return [
         item.strip() for argument in arguments for item in argument.split(",") if item
     ]


 def _script_main() -> int:
     """Wrap to main() for setuptools."""
     try:
         return main(*sys.argv[1:])
     except KeyboardInterrupt:
         # User has typed CTRL+C
         sys.stdout.write("\n")
         return 130


 def _usage_error(parser: argparse.ArgumentParser, message: str) -> int:
     parser.print_usage()
     print(message, file=sys.stderr)
     return EX_USAGE


 def main(*args: str) -> int:
     """Contains flow control"""
     try:
         options, parser, used_cfg_files = parse_options(args)
     except configparser.Error as e:
         print(
             f"ERROR: ill-formed config file: {e.message}",
             file=sys.stderr,
         )
         return EX_CONFIG

     # Report used config files
     if not options.quiet_level & QuietLevels.CONFIG_FILES:
         if len(used_cfg_files) > 0:
             print("Used config files:")
         for ifile, cfg_file in enumerate(used_cfg_files, start=1):
             print(f"    {ifile}: {cfg_file}")

     if options.interactive > 0:
         options.write_changes = True

     if options.regex and options.write_changes:
         return _usage_error(
             parser,
             "ERROR: --write-changes cannot be used together with --regex",
         )
     word_regex = options.regex or word_regex_def
     try:
         word_regex = re.compile(word_regex)
     except re.error as e:
         return _usage_error(
             parser,
             f'ERROR: invalid --regex "{word_regex}" ({e})',
         )

     if options.ignore_regex:
         try:
             ignore_word_regex = re.compile(options.ignore_regex)
         except re.error as e:
             return _usage_error(
                 parser,
                 f'ERROR: invalid --ignore-regex "{options.ignore_regex}" ({e})',
             )
     else:
         ignore_word_regex = None

     if options.ignore_multiline_regex:
         try:
             ignore_multiline_regex = re.compile(
                 options.ignore_multiline_regex, re.DOTALL
             )
         except re.error as e:
             return _usage_error(
                 parser,
                 f"ERROR: invalid --ignore-multiline-regex "
                 f'"{options.ignore_multiline_regex}" ({e})',
             )
     else:
         ignore_multiline_regex = None

     ignore_words, ignore_words_cased = parse_ignore_words_option(
         options.ignore_words_list
     )
     if options.ignore_words:
         ignore_words_files = flatten_clean_comma_separated_arguments(
             options.ignore_words
         )
         for ignore_words_file in ignore_words_files:
             if not os.path.isfile(ignore_words_file):
                 return _usage_error(
                     parser,
                     f"ERROR: cannot find ignore-words file: {ignore_words_file}",
                 )
             build_ignore_words(ignore_words_file, ignore_words, ignore_words_cased)

     uri_regex = options.uri_regex or uri_regex_def
     try:
         uri_regex = re.compile(uri_regex)
     except re.error as e:
         return _usage_error(
             parser,
             f'ERROR: invalid --uri-regex "{uri_regex}" ({e})',
         )

     uri_ignore_words = set(
         itertools.chain(*parse_ignore_words_option(options.uri_ignore_words_list))
     )

     dictionaries = flatten_clean_comma_separated_arguments(options.dictionary or ["-"])

     use_dictionaries = []
     for dictionary in dictionaries:
         if dictionary == "-":
             # figure out which builtin dictionaries to use
             use = sorted(set(options.builtin.split(",")))
             for u in use:
                 for builtin in _builtin_dictionaries:
                     if builtin[0] == u:
                         use_dictionaries.append(
                             os.path.join(_data_root, f"dictionary{builtin[2]}.txt")
                         )
                         break
                 else:
                     return _usage_error(
                         parser,
                         f"ERROR: Unknown builtin dictionary: {u}",
                     )
         else:
             if not os.path.isfile(dictionary):
                 return _usage_error(
                     parser,
                     f"ERROR: cannot find dictionary file: {dictionary}",
                 )
             use_dictionaries.append(dictionary)
     misspellings: Dict[str, Misspelling] = {}
     for dictionary in use_dictionaries:
         build_dict(dictionary, misspellings, ignore_words)
     colors = TermColors()
     if not options.colors:
         colors.disable()

     summary = Summary() if options.summary else None

     context = None
     if options.context is not None:
         if (options.before_context is not None) or (options.after_context is not None):
             return _usage_error(
                 parser,
                 "ERROR: --context/-C cannot be used together with "
                 "--context-before/-B or --context-after/-A",
             )
         context_both = max(0, options.context)
         context = (context_both, context_both)
     elif (options.before_context is not None) or (options.after_context is not None):
         context_before = 0
         context_after = 0
         if options.before_context is not None:
             context_before = max(0, options.before_context)
         if options.after_context is not None:
             context_after = max(0, options.after_context)
         context = (context_before, context_after)

     exclude_lines: Set[str] = set()
     if options.exclude_file:
         exclude_files = flatten_clean_comma_separated_arguments(options.exclude_file)
         for exclude_file in exclude_files:
             build_exclude_hashes(exclude_file, exclude_lines)

     file_opener = FileOpener(
         options.hard_encoding_detection,
         options.quiet_level,
         ignore_multiline_regex,
     )

     glob_match = GlobMatch(
         flatten_clean_comma_separated_arguments(options.skip) if options.skip else []
     )
     try:
         glob_match.match("/random/path")  # does not need a real path
     except re.error:
         return _usage_error(
             parser,
             "ERROR: --skip/-S has been fed an invalid glob, "
             "try escaping special characters",
         )

     bad_count = 0
     for filename in sorted(options.files):
         # ignore hidden files
         if is_hidden(filename, options.check_hidden):
             continue

         if os.path.isdir(filename):
             for root, dirs, files in os.walk(filename):
                 if glob_match.match(root):  # skip (absolute) directories
                     dirs.clear()
                     continue
                 if is_hidden(root, options.check_hidden):  # dir itself hidden
                     continue
                 for file_ in sorted(files):
                     # ignore hidden files in directories
                     if is_hidden(file_, options.check_hidden):
                         continue
                     if glob_match.match(file_):  # skip files
                         continue
                     fname = os.path.join(root, file_)
                     if glob_match.match(fname):  # skip paths
                         continue
                     bad_count += parse_file(
                         fname,
                         colors,
                         summary,
                         misspellings,
                         ignore_words_cased,
                         exclude_lines,
                         file_opener,
                         word_regex,
                         ignore_word_regex,
                         uri_regex,
                         uri_ignore_words,
                         context,
                         options,
                     )

                 # skip (relative) directories
                 dirs[:] = [
                     dir_
                     for dir_ in dirs
                     if not glob_match.match(dir_)
                     and not is_hidden(dir_, options.check_hidden)
                 ]

         elif not glob_match.match(filename):  # skip files
             bad_count += parse_file(
                 filename,
                 colors,
                 summary,
                 misspellings,
                 ignore_words_cased,
                 exclude_lines,
                 file_opener,
                 word_regex,
                 ignore_word_regex,
                 uri_regex,
                 uri_ignore_words,
                 context,
                 options,
             )

     if summary:
         print("\n-------8<-------\nSUMMARY:")
         print(summary)
     if options.count:
         print(bad_count, file=sys.stderr)
     return EX_DATAERR if bad_count else EX_OK