Merge pull request #597 from pre-commit/remove-deprecated

remove deprecated autopep8-wrapper, flake8, pyflakes
diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
new file mode 100644
index 0000000..9408e44
--- /dev/null
+++ b/.github/FUNDING.yml
@@ -0,0 +1,2 @@
+github: asottile
+open_collective: pre-commit
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index d7885c6..93d9470 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
 repos:
 -   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v3.3.0
+    rev: v3.4.0
     hooks:
     -   id: trailing-whitespace
     -   id: end-of-file-fixer
@@ -12,39 +12,35 @@
     -   id: name-tests-test
     -   id: double-quote-string-fixer
     -   id: requirements-txt-fixer
--   repo: https://gitlab.com/pycqa/flake8
-    rev: 3.8.0
+-   repo: https://github.com/PyCQA/flake8
+    rev: 3.9.2
     hooks:
     -   id: flake8
         additional_dependencies: [flake8-typing-imports==1.7.0]
 -   repo: https://github.com/pre-commit/mirrors-autopep8
-    rev: v1.5.2
+    rev: v1.5.7
     hooks:
     -   id: autopep8
--   repo: https://github.com/pre-commit/pre-commit
-    rev: v2.4.0
-    hooks:
-    -   id: validate_manifest
 -   repo: https://github.com/asottile/reorder_python_imports
-    rev: v2.3.0
+    rev: v2.5.0
     hooks:
     -   id: reorder-python-imports
         args: [--py3-plus]
 -   repo: https://github.com/asottile/pyupgrade
-    rev: v2.4.1
+    rev: v2.15.0
     hooks:
     -   id: pyupgrade
         args: [--py36-plus]
 -   repo: https://github.com/asottile/add-trailing-comma
-    rev: v2.0.1
+    rev: v2.1.0
     hooks:
     -   id: add-trailing-comma
         args: [--py36-plus]
 -   repo: https://github.com/asottile/setup-cfg-fmt
-    rev: v1.9.0
+    rev: v1.17.0
     hooks:
     -   id: setup-cfg-fmt
 -   repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v0.770
+    rev: v0.812
     hooks:
     -   id: mypy
diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml
index a241d57..91dbdf0 100644
--- a/.pre-commit-hooks.yaml
+++ b/.pre-commit-hooks.yaml
@@ -45,6 +45,13 @@
     entry: check-json
     language: python
     types: [json]
+-   id: check-shebang-scripts-are-executable
+    name: Check that scripts with shebangs are executable
+    description: Ensures that (non-binary) files with a shebang are executable.
+    entry: check-shebang-scripts-are-executable
+    language: python
+    types: [text]
+    stages: [commit, push, manual]
 -   id: pretty-format-json
     name: Pretty format JSON
     description: This hook sets a standard for formatting JSON files.
@@ -93,6 +100,12 @@
     entry: debug-statement-hook
     language: python
     types: [python]
+-   id: destroyed-symlinks
+    name: Detect Destroyed Symlinks
+    description: Detects symlinks which are changed to regular files with a content of a path which that symlink was pointing to.
+    entry: destroyed-symlinks
+    language: python
+    types: [file]
 -   id: detect-aws-credentials
     name: Detect AWS Credentials
     description: Detects *your* aws credentials from the aws cli credentials file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 08966ec..5de3576 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,15 @@
+3.4.0 - 2020-12-15
+==================
+
+### Features
+- `file-contents-sorter`: Add `--unique` argument
+    - #524 PR by @danielhoherd.
+- `check-vcs-permalinks`: Add `--additional-github-domain` option
+    - #530 PR by @youngminz.
+- New hook: `destroyed-symlinks` to detect unintentional symlink-breakages on
+  windows.
+    - #511 PR by @m-khvoinitsky.
+
 3.3.0 - 2020-10-20
 ==================
 
diff --git a/README.md b/README.md
index 7cf5c65..5a6ddfe 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@
 
 ```yaml
 -   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v3.3.0  # Use the ref you want to point at
+    rev: v3.4.0  # Use the ref you want to point at
     hooks:
     -   id: trailing-whitespace
     # -   id: ...
@@ -58,6 +58,9 @@
 #### `check-merge-conflict`
 Check for files that contain merge conflict strings.
 
+#### `check-shebang-scripts-are-executable`
+Checks that scripts with shebangs are executable.
+
 #### `check-symlinks`
 Checks for symlinks which do not point to anything.
 
@@ -66,6 +69,10 @@
 
 #### `check-vcs-permalinks`
 Ensures that links to vcs websites are permalinks.
+  - `--additional-github-domain DOMAIN` - Add check for specified domain.
+    Can be repeated multiple times.  for example, if your company uses
+    GitHub Enterprise you may use something like
+    `--additional-github-domain github.example.com`
 
 #### `check-xml`
 Attempts to load all xml files to verify syntax.
@@ -83,6 +90,12 @@
 #### `debug-statements`
 Check for debugger imports and py37+ `breakpoint()` calls in python source.
 
+#### `destroyed-symlinks`
+Detects symlinks which are changed to regular files with a content of a path
+which that symlink was pointing to.
+This usually happens on Windows when a user clones a repository that has
+symlinks but they do not have the permission to create symlinks.
+
 #### `detect-aws-credentials`
 Checks for the existence of AWS secrets that you have set up with the AWS CLI.
 The following arguments are available:
@@ -130,7 +143,7 @@
 #### `no-commit-to-branch`
 Protect specific branches from direct checkins.
   - Use `args: [--branch, staging, --branch, master]` to set the branch.
-    `master` is the default if no branch argument is set.
+    Both `master` and `main` are protected by default if no branch argument is set.
   - `-b` / `--branch` may be specified multiple times to protect multiple
     branches.
   - `-p` / `--pattern` can be used to protect branches that match a supplied regex
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index c1ef4f4..58dc61d 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -10,7 +10,7 @@
       type: github
       endpoint: github
       name: asottile/azure-pipeline-templates
-      ref: refs/tags/v1.0.0
+      ref: refs/tags/v2.1.0
 
 jobs:
 - template: job--python-tox.yml@asottile
diff --git a/pre_commit_hooks/check_case_conflict.py b/pre_commit_hooks/check_case_conflict.py
index 6b8ba82..024c1c3 100644
--- a/pre_commit_hooks/check_case_conflict.py
+++ b/pre_commit_hooks/check_case_conflict.py
@@ -1,5 +1,7 @@
 import argparse
+import os.path
 from typing import Iterable
+from typing import Iterator
 from typing import Optional
 from typing import Sequence
 from typing import Set
@@ -12,9 +14,22 @@
     return {x.lower() for x in iterable}
 
 
+def parents(file: str) -> Iterator[str]:
+    file = os.path.dirname(file)
+    while file:
+        yield file
+        file = os.path.dirname(file)
+
+
+def directories_for(files: Set[str]) -> Set[str]:
+    return {parent for file in files for parent in parents(file)}
+
+
 def find_conflicting_filenames(filenames: Sequence[str]) -> int:
     repo_files = set(cmd_output('git', 'ls-files').splitlines())
+    repo_files |= directories_for(repo_files)
     relevant_files = set(filenames) | added_files()
+    relevant_files |= directories_for(relevant_files)
     repo_files -= relevant_files
     retv = 0
 
diff --git a/pre_commit_hooks/check_executables_have_shebangs.py b/pre_commit_hooks/check_executables_have_shebangs.py
index a02d2a9..e271c66 100644
--- a/pre_commit_hooks/check_executables_have_shebangs.py
+++ b/pre_commit_hooks/check_executables_have_shebangs.py
@@ -2,54 +2,57 @@
 import argparse
 import shlex
 import sys
+from typing import Generator
 from typing import List
+from typing import NamedTuple
 from typing import Optional
 from typing import Sequence
 from typing import Set
 
 from pre_commit_hooks.util import cmd_output
+from pre_commit_hooks.util import zsplit
 
 EXECUTABLE_VALUES = frozenset(('1', '3', '5', '7'))
 
 
-def zsplit(s: str) -> List[str]:
-    s = s.strip('\0')
-    if s:
-        return s.split('\0')
-    else:
-        return []
-
-
 def check_executables(paths: List[str]) -> int:
     if sys.platform == 'win32':  # pragma: win32 cover
         return _check_git_filemode(paths)
     else:  # pragma: win32 no cover
         retv = 0
         for path in paths:
-            if not _check_has_shebang(path):
+            if not has_shebang(path):
                 _message(path)
                 retv = 1
 
         return retv
 
 
-def _check_git_filemode(paths: Sequence[str]) -> int:
-    outs = cmd_output('git', 'ls-files', '-z', '--stage', '--', *paths)
-    seen: Set[str] = set()
-    for out in zsplit(outs):
-        metadata, path = out.split('\t')
-        tagmode = metadata.split(' ', 1)[0]
+class GitLsFile(NamedTuple):
+    mode: str
+    filename: str
 
-        is_executable = any(b in EXECUTABLE_VALUES for b in tagmode[-3:])
-        has_shebang = _check_has_shebang(path)
-        if is_executable and not has_shebang:
-            _message(path)
-            seen.add(path)
+
+def git_ls_files(paths: Sequence[str]) -> Generator[GitLsFile, None, None]:
+    outs = cmd_output('git', 'ls-files', '-z', '--stage', '--', *paths)
+    for out in zsplit(outs):
+        metadata, filename = out.split('\t')
+        mode, _, _ = metadata.split()
+        yield GitLsFile(mode, filename)
+
+
+def _check_git_filemode(paths: Sequence[str]) -> int:
+    seen: Set[str] = set()
+    for ls_file in git_ls_files(paths):
+        is_executable = any(b in EXECUTABLE_VALUES for b in ls_file.mode[-3:])
+        if is_executable and not has_shebang(ls_file.filename):
+            _message(ls_file.filename)
+            seen.add(ls_file.filename)
 
     return int(bool(seen))
 
 
-def _check_has_shebang(path: str) -> int:
+def has_shebang(path: str) -> int:
     with open(path, 'rb') as f:
         first_bytes = f.read(2)
 
diff --git a/pre_commit_hooks/check_json.py b/pre_commit_hooks/check_json.py
index 6026270..db589d0 100644
--- a/pre_commit_hooks/check_json.py
+++ b/pre_commit_hooks/check_json.py
@@ -1,7 +1,23 @@
 import argparse
 import json
+from typing import Any
+from typing import Dict
+from typing import List
 from typing import Optional
 from typing import Sequence
+from typing import Tuple
+
+
+def raise_duplicate_keys(
+        ordered_pairs: List[Tuple[str, Any]],
+) -> Dict[str, Any]:
+    d = {}
+    for key, val in ordered_pairs:
+        if key in d:
+            raise ValueError(f'Duplicate key: {key}')
+        else:
+            d[key] = val
+    return d
 
 
 def main(argv: Optional[Sequence[str]] = None) -> int:
@@ -13,7 +29,7 @@
     for filename in args.filenames:
         with open(filename, 'rb') as f:
             try:
-                json.load(f)
+                json.load(f, object_pairs_hook=raise_duplicate_keys)
             except ValueError as exc:
                 print(f'{filename}: Failed to json decode ({exc})')
                 retval = 1
diff --git a/pre_commit_hooks/check_shebang_scripts_are_executable.py b/pre_commit_hooks/check_shebang_scripts_are_executable.py
new file mode 100644
index 0000000..dce8c59
--- /dev/null
+++ b/pre_commit_hooks/check_shebang_scripts_are_executable.py
@@ -0,0 +1,53 @@
+"""Check that text files with a shebang are executable."""
+import argparse
+import shlex
+import sys
+from typing import List
+from typing import Optional
+from typing import Sequence
+from typing import Set
+
+from pre_commit_hooks.check_executables_have_shebangs import EXECUTABLE_VALUES
+from pre_commit_hooks.check_executables_have_shebangs import git_ls_files
+from pre_commit_hooks.check_executables_have_shebangs import has_shebang
+
+
+def check_shebangs(paths: List[str]) -> int:
+    # Cannot optimize on non-executability here if we intend this check to
+    # work on win32 -- and that's where problems caused by non-executability
+    # (elsewhere) are most likely to arise from.
+    return _check_git_filemode(paths)
+
+
+def _check_git_filemode(paths: Sequence[str]) -> int:
+    seen: Set[str] = set()
+    for ls_file in git_ls_files(paths):
+        is_executable = any(b in EXECUTABLE_VALUES for b in ls_file.mode[-3:])
+        if not is_executable and has_shebang(ls_file.filename):
+            _message(ls_file.filename)
+            seen.add(ls_file.filename)
+
+    return int(bool(seen))
+
+
+def _message(path: str) -> None:
+    print(
+        f'{path}: has a shebang but is not marked executable!\n'
+        f'  If it is supposed to be executable, try: '
+        f'`chmod +x {shlex.quote(path)}`\n'
+        f'  If it not supposed to be executable, double-check its shebang '
+        f'is wanted.\n',
+        file=sys.stderr,
+    )
+
+
+def main(argv: Optional[Sequence[str]] = None) -> int:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument('filenames', nargs='*')
+    args = parser.parse_args(argv)
+
+    return check_shebangs(args.filenames)
+
+
+if __name__ == '__main__':
+    exit(main())
diff --git a/pre_commit_hooks/check_vcs_permalinks.py b/pre_commit_hooks/check_vcs_permalinks.py
index bf698e1..5231d7a 100644
--- a/pre_commit_hooks/check_vcs_permalinks.py
+++ b/pre_commit_hooks/check_vcs_permalinks.py
@@ -1,35 +1,53 @@
 import argparse
 import re
 import sys
+from typing import List
 from typing import Optional
+from typing import Pattern
 from typing import Sequence
 
 
-GITHUB_NON_PERMALINK = re.compile(
-    br'https://github.com/[^/ ]+/[^/ ]+/blob/master/[^# ]+#L\d+',
-)
+def _get_pattern(domain: str) -> Pattern[bytes]:
+    regex = (
+        rf'https://{domain}/[^/ ]+/[^/ ]+/blob/'
+        r'(?![a-fA-F0-9]{4,64}/)([^/. ]+)/[^# ]+#L\d+'
+    )
+    return re.compile(regex.encode())
 
 
-def _check_filename(filename: str) -> int:
+def _check_filename(filename: str, patterns: List[Pattern[bytes]]) -> int:
     retv = 0
     with open(filename, 'rb') as f:
         for i, line in enumerate(f, 1):
-            if GITHUB_NON_PERMALINK.search(line):
-                sys.stdout.write(f'{filename}:{i}:')
-                sys.stdout.flush()
-                sys.stdout.buffer.write(line)
-                retv = 1
+            for pattern in patterns:
+                if pattern.search(line):
+                    sys.stdout.write(f'{filename}:{i}:')
+                    sys.stdout.flush()
+                    sys.stdout.buffer.write(line)
+                    retv = 1
     return retv
 
 
 def main(argv: Optional[Sequence[str]] = None) -> int:
     parser = argparse.ArgumentParser()
     parser.add_argument('filenames', nargs='*')
+    parser.add_argument(
+        '--additional-github-domain',
+        dest='additional_github_domains',
+        action='append',
+        default=['github.com'],
+    )
     args = parser.parse_args(argv)
 
+    patterns = [
+        _get_pattern(domain)
+        for domain in args.additional_github_domains
+    ]
+
     retv = 0
+
     for filename in args.filenames:
-        retv |= _check_filename(filename)
+        retv |= _check_filename(filename, patterns)
 
     if retv:
         print()
diff --git a/pre_commit_hooks/destroyed_symlinks.py b/pre_commit_hooks/destroyed_symlinks.py
new file mode 100755
index 0000000..cfaf4e5
--- /dev/null
+++ b/pre_commit_hooks/destroyed_symlinks.py
@@ -0,0 +1,96 @@
+import argparse
+import shlex
+import subprocess
+from typing import List
+from typing import Optional
+from typing import Sequence
+
+from pre_commit_hooks.util import cmd_output
+from pre_commit_hooks.util import zsplit
+
+ORDINARY_CHANGED_ENTRIES_MARKER = '1'
+PERMS_LINK = '120000'
+PERMS_NONEXIST = '000000'
+
+
+def find_destroyed_symlinks(files: Sequence[str]) -> List[str]:
+    destroyed_links: List[str] = []
+    if not files:
+        return destroyed_links
+    for line in zsplit(
+        cmd_output('git', 'status', '--porcelain=v2', '-z', '--', *files),
+    ):
+        splitted = line.split(' ')
+        if splitted and splitted[0] == ORDINARY_CHANGED_ENTRIES_MARKER:
+            # https://git-scm.com/docs/git-status#_changed_tracked_entries
+            (
+                _, _, _,
+                mode_HEAD,
+                mode_index,
+                _,
+                hash_HEAD,
+                hash_index,
+                *path_splitted,
+            ) = splitted
+            path = ' '.join(path_splitted)
+            if (
+                    mode_HEAD == PERMS_LINK and
+                    mode_index != PERMS_LINK and
+                    mode_index != PERMS_NONEXIST
+            ):
+                if hash_HEAD == hash_index:
+                    # if old and new hashes are equal, it's not needed to check
+                    # anything more, we've found a destroyed symlink for sure
+                    destroyed_links.append(path)
+                else:
+                    # if old and new hashes are *not* equal, it doesn't mean
+                    # that everything is OK - new file may be altered
+                    # by something like trailing-whitespace and/or
+                    # mixed-line-ending hooks so we need to go deeper
+                    SIZE_CMD = ('git', 'cat-file', '-s')
+                    size_index = int(cmd_output(*SIZE_CMD, hash_index).strip())
+                    size_HEAD = int(cmd_output(*SIZE_CMD, hash_HEAD).strip())
+
+                    # in the worst case new file may have CRLF added
+                    # so check content only if new file is bigger
+                    # not more than 2 bytes compared to the old one
+                    if size_index <= size_HEAD + 2:
+                        head_content = subprocess.check_output(
+                            ('git', 'cat-file', '-p', hash_HEAD),
+                        ).rstrip()
+                        index_content = subprocess.check_output(
+                            ('git', 'cat-file', '-p', hash_index),
+                        ).rstrip()
+                        if head_content == index_content:
+                            destroyed_links.append(path)
+    return destroyed_links
+
+
+def main(argv: Optional[Sequence[str]] = None) -> int:
+    parser = argparse.ArgumentParser()
+    parser.add_argument('filenames', nargs='*', help='Filenames to check.')
+    args = parser.parse_args(argv)
+    destroyed_links = find_destroyed_symlinks(files=args.filenames)
+    if destroyed_links:
+        print('Destroyed symlinks:')
+        for destroyed_link in destroyed_links:
+            print(f'- {destroyed_link}')
+        print('You should unstage affected files:')
+        print(
+            '\tgit reset HEAD -- {}'.format(
+                ' '.join(shlex.quote(link) for link in destroyed_links),
+            ),
+        )
+        print(
+            'And retry commit. As a long term solution '
+            'you may try to explicitly tell git that your '
+            'environment does not support symlinks:',
+        )
+        print('\tgit config core.symlinks false')
+        return 1
+    else:
+        return 0
+
+
+if __name__ == '__main__':
+    exit(main())
diff --git a/pre_commit_hooks/no_commit_to_branch.py b/pre_commit_hooks/no_commit_to_branch.py
index fb1506f..49ffecf 100644
--- a/pre_commit_hooks/no_commit_to_branch.py
+++ b/pre_commit_hooks/no_commit_to_branch.py
@@ -38,7 +38,7 @@
     )
     args = parser.parse_args(argv)
 
-    protected = frozenset(args.branch or ('master',))
+    protected = frozenset(args.branch or ('master', 'main'))
     patterns = frozenset(args.pattern or ())
     return int(is_on_branch(protected, patterns))
 
diff --git a/pre_commit_hooks/pretty_format_json.py b/pre_commit_hooks/pretty_format_json.py
index 25827dc..61b0169 100644
--- a/pre_commit_hooks/pretty_format_json.py
+++ b/pre_commit_hooks/pretty_format_json.py
@@ -1,5 +1,6 @@
 import argparse
 import json
+import sys
 from difflib import unified_diff
 from typing import List
 from typing import Mapping
@@ -111,17 +112,6 @@
                 contents, args.indent, ensure_ascii=not args.no_ensure_ascii,
                 sort_keys=not args.no_sort_keys, top_keys=args.top_keys,
             )
-
-            if contents != pretty_contents:
-                if args.autofix:
-                    _autofix(json_file, pretty_contents)
-                else:
-                    print(
-                        get_diff(contents, pretty_contents, json_file),
-                        end='',
-                    )
-
-                status = 1
         except ValueError:
             print(
                 f'Input File {json_file} is not a valid JSON, consider using '
@@ -129,6 +119,15 @@
             )
             return 1
 
+        if contents != pretty_contents:
+            if args.autofix:
+                _autofix(json_file, pretty_contents)
+            else:
+                diff_output = get_diff(contents, pretty_contents, json_file)
+                sys.stdout.buffer.write(diff_output.encode())
+
+            status = 1
+
     return status
 
 
diff --git a/pre_commit_hooks/requirements_txt_fixer.py b/pre_commit_hooks/requirements_txt_fixer.py
index 78103a1..351e5b1 100644
--- a/pre_commit_hooks/requirements_txt_fixer.py
+++ b/pre_commit_hooks/requirements_txt_fixer.py
@@ -36,7 +36,7 @@
 
         return name[:m.start()]
 
-    def __lt__(self, requirement: 'Requirement') -> int:
+    def __lt__(self, requirement: 'Requirement') -> bool:
         # \n means top of file comment, so always return True,
         # otherwise just do a string comparison with value.
         assert self.value is not None, self.value
@@ -95,7 +95,7 @@
                 requirement.value = b'\n'
             else:
                 requirement.comments.append(line)
-        elif line.startswith(b'#') or line.strip() == b'':
+        elif line.lstrip().startswith(b'#') or line.strip() == b'':
             requirement.comments.append(line)
         else:
             requirement.append_value(line)
diff --git a/pre_commit_hooks/util.py b/pre_commit_hooks/util.py
index e04b015..402e33e 100644
--- a/pre_commit_hooks/util.py
+++ b/pre_commit_hooks/util.py
@@ -1,5 +1,6 @@
 import subprocess
 from typing import Any
+from typing import List
 from typing import Optional
 from typing import Set
 
@@ -22,3 +23,11 @@
     if retcode is not None and proc.returncode != retcode:
         raise CalledProcessError(cmd, retcode, proc.returncode, stdout, stderr)
     return stdout
+
+
+def zsplit(s: str) -> List[str]:
+    s = s.strip('\0')
+    if s:
+        return s.split('\0')
+    else:
+        return []
diff --git a/setup.cfg b/setup.cfg
index ab80bd6..dbe151b 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = pre_commit_hooks
-version = 3.3.0
+version = 3.4.0
 description = Some out-of-the-box hooks for pre-commit.
 long_description = file: README.md
 long_description_content_type = text/markdown
@@ -16,6 +16,7 @@
     Programming Language :: Python :: 3.6
     Programming Language :: Python :: 3.7
     Programming Language :: Python :: 3.8
+    Programming Language :: Python :: 3.9
     Programming Language :: Python :: Implementation :: CPython
     Programming Language :: Python :: Implementation :: PyPy
 
@@ -26,6 +27,11 @@
     toml
 python_requires = >=3.6.1
 
+[options.packages.find]
+exclude =
+    tests*
+    testing*
+
 [options.entry_points]
 console_scripts =
     check-added-large-files = pre_commit_hooks.check_added_large_files:main
@@ -37,12 +43,14 @@
     check-executables-have-shebangs = pre_commit_hooks.check_executables_have_shebangs:main
     check-json = pre_commit_hooks.check_json:main
     check-merge-conflict = pre_commit_hooks.check_merge_conflict:main
+    check-shebang-scripts-are-executable = pre_commit_hooks.check_executables_have_shebangs:main_reverse
     check-symlinks = pre_commit_hooks.check_symlinks:main
     check-toml = pre_commit_hooks.check_toml:main
     check-vcs-permalinks = pre_commit_hooks.check_vcs_permalinks:main
     check-xml = pre_commit_hooks.check_xml:main
     check-yaml = pre_commit_hooks.check_yaml:main
     debug-statement-hook = pre_commit_hooks.debug_statement_hook:main
+    destroyed-symlinks = pre_commit_hooks.destroyed_symlinks:main
     detect-aws-credentials = pre_commit_hooks.detect_aws_credentials:main
     detect-private-key = pre_commit_hooks.detect_private_key:main
     double-quote-string-fixer = pre_commit_hooks.string_fixer:main
@@ -60,11 +68,6 @@
     sort-simple-yaml = pre_commit_hooks.sort_simple_yaml:main
     trailing-whitespace-fixer = pre_commit_hooks.trailing_whitespace_fixer:main
 
-[options.packages.find]
-exclude =
-    tests*
-    testing*
-
 [bdist_wheel]
 universal = True
 
diff --git a/testing/resources/duplicate_key_json.json b/testing/resources/duplicate_key_json.json
new file mode 100644
index 0000000..8a43262
--- /dev/null
+++ b/testing/resources/duplicate_key_json.json
@@ -0,0 +1,4 @@
+{
+    "hello": "world",
+    "hello": "planet"
+}
diff --git a/tests/check_case_conflict_test.py b/tests/check_case_conflict_test.py
index 53de852..c8c9d12 100644
--- a/tests/check_case_conflict_test.py
+++ b/tests/check_case_conflict_test.py
@@ -1,7 +1,24 @@
+import sys
+
+import pytest
+
 from pre_commit_hooks.check_case_conflict import find_conflicting_filenames
 from pre_commit_hooks.check_case_conflict import main
+from pre_commit_hooks.check_case_conflict import parents
 from pre_commit_hooks.util import cmd_output
 
+skip_win32 = pytest.mark.skipif(
+    sys.platform == 'win32',
+    reason='case conflicts between directories and files',
+)
+
+
+def test_parents():
+    assert set(parents('a')) == set()
+    assert set(parents('a/b')) == {'a'}
+    assert set(parents('a/b/c')) == {'a/b', 'a'}
+    assert set(parents('a/b/c/d')) == {'a/b/c', 'a/b', 'a'}
+
 
 def test_nothing_added(temp_git_dir):
     with temp_git_dir.as_cwd():
@@ -26,6 +43,36 @@
         assert find_conflicting_filenames(['f.py', 'F.py']) == 1
 
 
+@skip_win32  # pragma: win32 no cover
+def test_adding_files_with_conflicting_directories(temp_git_dir):
+    with temp_git_dir.as_cwd():
+        temp_git_dir.mkdir('dir').join('x').write('foo')
+        temp_git_dir.mkdir('DIR').join('y').write('foo')
+        cmd_output('git', 'add', '-A')
+
+        assert find_conflicting_filenames([]) == 1
+
+
+@skip_win32  # pragma: win32 no cover
+def test_adding_files_with_conflicting_deep_directories(temp_git_dir):
+    with temp_git_dir.as_cwd():
+        temp_git_dir.mkdir('x').mkdir('y').join('z').write('foo')
+        temp_git_dir.join('X').write('foo')
+        cmd_output('git', 'add', '-A')
+
+        assert find_conflicting_filenames([]) == 1
+
+
+@skip_win32  # pragma: win32 no cover
+def test_adding_file_with_conflicting_directory(temp_git_dir):
+    with temp_git_dir.as_cwd():
+        temp_git_dir.mkdir('dir').join('x').write('foo')
+        temp_git_dir.join('DIR').write('foo')
+        cmd_output('git', 'add', '-A')
+
+        assert find_conflicting_filenames([]) == 1
+
+
 def test_added_file_not_in_pre_commits_list(temp_git_dir):
     with temp_git_dir.as_cwd():
         temp_git_dir.join('f.py').write("print('hello world')")
@@ -46,6 +93,19 @@
         assert find_conflicting_filenames(['F.py']) == 1
 
 
+@skip_win32  # pragma: win32 no cover
+def test_file_conflicts_with_committed_dir(temp_git_dir):
+    with temp_git_dir.as_cwd():
+        temp_git_dir.mkdir('dir').join('x').write('foo')
+        cmd_output('git', 'add', '-A')
+        cmd_output('git', 'commit', '--no-gpg-sign', '-n', '-m', 'Add f.py')
+
+        temp_git_dir.join('DIR').write('foo')
+        cmd_output('git', 'add', '-A')
+
+        assert find_conflicting_filenames([]) == 1
+
+
 def test_integration(temp_git_dir):
     with temp_git_dir.as_cwd():
         assert main(argv=[]) == 0
diff --git a/tests/check_executables_have_shebangs_test.py b/tests/check_executables_have_shebangs_test.py
index 7046081..5703ede 100644
--- a/tests/check_executables_have_shebangs_test.py
+++ b/tests/check_executables_have_shebangs_test.py
@@ -102,16 +102,6 @@
         assert check_executables_have_shebangs._check_git_filemode(files) == 1
 
 
-@pytest.mark.parametrize('out', ('\0f1\0f2\0', '\0f1\0f2', 'f1\0f2\0'))
-def test_check_zsplits_correctly(out):
-    assert check_executables_have_shebangs.zsplit(out) == ['f1', 'f2']
-
-
-@pytest.mark.parametrize('out', ('\0\0', '\0', ''))
-def test_check_zsplit_returns_empty(out):
-    assert check_executables_have_shebangs.zsplit(out) == []
-
-
 @pytest.mark.parametrize(
     ('content', 'mode', 'expected'),
     (
diff --git a/tests/check_json_test.py b/tests/check_json_test.py
index c63dc4c..e010faa 100644
--- a/tests/check_json_test.py
+++ b/tests/check_json_test.py
@@ -9,6 +9,7 @@
         ('bad_json.notjson', 1),
         ('bad_json_latin1.nonjson', 1),
         ('ok_json.json', 0),
+        ('duplicate_key_json.json', 1),
     ),
 )
 def test_main(capsys, filename, expected_retval):
diff --git a/tests/check_shebang_scripts_are_executable_test.py b/tests/check_shebang_scripts_are_executable_test.py
new file mode 100644
index 0000000..9e78b06
--- /dev/null
+++ b/tests/check_shebang_scripts_are_executable_test.py
@@ -0,0 +1,87 @@
+import os
+
+import pytest
+
+from pre_commit_hooks.check_shebang_scripts_are_executable import \
+    _check_git_filemode
+from pre_commit_hooks.check_shebang_scripts_are_executable import main
+from pre_commit_hooks.util import cmd_output
+
+
+def test_check_git_filemode_passing(tmpdir):
+    with tmpdir.as_cwd():
+        cmd_output('git', 'init', '.')
+
+        f = tmpdir.join('f')
+        f.write('#!/usr/bin/env bash')
+        f_path = str(f)
+        cmd_output('chmod', '+x', f_path)
+        cmd_output('git', 'add', f_path)
+        cmd_output('git', 'update-index', '--chmod=+x', f_path)
+
+        g = tmpdir.join('g').ensure()
+        g_path = str(g)
+        cmd_output('git', 'add', g_path)
+
+        files = [f_path, g_path]
+        assert _check_git_filemode(files) == 0
+
+        # this is the one we should trigger on
+        h = tmpdir.join('h')
+        h.write('#!/usr/bin/env bash')
+        h_path = str(h)
+        cmd_output('git', 'add', h_path)
+
+        files = [h_path]
+        assert _check_git_filemode(files) == 1
+
+
+def test_check_git_filemode_passing_unusual_characters(tmpdir):
+    with tmpdir.as_cwd():
+        cmd_output('git', 'init', '.')
+
+        f = tmpdir.join('mañana.txt')
+        f.write('#!/usr/bin/env bash')
+        f_path = str(f)
+        cmd_output('chmod', '+x', f_path)
+        cmd_output('git', 'add', f_path)
+        cmd_output('git', 'update-index', '--chmod=+x', f_path)
+
+        files = (f_path,)
+        assert _check_git_filemode(files) == 0
+
+
+def test_check_git_filemode_failing(tmpdir):
+    with tmpdir.as_cwd():
+        cmd_output('git', 'init', '.')
+
+        f = tmpdir.join('f').ensure()
+        f.write('#!/usr/bin/env bash')
+        f_path = str(f)
+        cmd_output('git', 'add', f_path)
+
+        files = (f_path,)
+        assert _check_git_filemode(files) == 1
+
+
+@pytest.mark.parametrize(
+    ('content', 'mode', 'expected'),
+    (
+        pytest.param('#!python', '+x', 0, id='shebang with executable'),
+        pytest.param('#!python', '-x', 1, id='shebang without executable'),
+        pytest.param('', '+x', 0, id='no shebang with executable'),
+        pytest.param('', '-x', 0, id='no shebang without executable'),
+    ),
+)
+def test_git_executable_shebang(temp_git_dir, content, mode, expected):
+    with temp_git_dir.as_cwd():
+        path = temp_git_dir.join('path')
+        path.write(content)
+        cmd_output('git', 'add', str(path))
+        cmd_output('chmod', mode, str(path))
+        cmd_output('git', 'update-index', f'--chmod={mode}', str(path))
+
+        # simulate how identify chooses that something is executable
+        filenames = [path for path in [str(path)] if os.access(path, os.X_OK)]
+
+        assert main(filenames) == expected
diff --git a/tests/check_vcs_permalinks_test.py b/tests/check_vcs_permalinks_test.py
index 19b1c35..ad59151 100644
--- a/tests/check_vcs_permalinks_test.py
+++ b/tests/check_vcs_permalinks_test.py
@@ -11,6 +11,8 @@
     f.write_binary(
         # permalinks are ok
         b'https://github.com/asottile/test/blob/649e6/foo%20bar#L1\n'
+        # tags are ok
+        b'https://github.com/asottile/test/blob/1.0.0/foo%20bar#L1\n'
         # links to files but not line numbers are ok
         b'https://github.com/asottile/test/blob/master/foo%20bar\n'
         # regression test for overly-greedy regex
@@ -22,13 +24,17 @@
 def test_failing(tmpdir, capsys):
     with tmpdir.as_cwd():
         tmpdir.join('f.txt').write_binary(
-            b'https://github.com/asottile/test/blob/master/foo#L1\n',
+            b'https://github.com/asottile/test/blob/master/foo#L1\n'
+            b'https://example.com/asottile/test/blob/master/foo#L1\n'
+            b'https://example.com/asottile/test/blob/main/foo#L1\n',
         )
 
-        assert main(('f.txt',))
+        assert main(('f.txt', '--additional-github-domain', 'example.com'))
         out, _ = capsys.readouterr()
         assert out == (
             'f.txt:1:https://github.com/asottile/test/blob/master/foo#L1\n'
+            'f.txt:2:https://example.com/asottile/test/blob/master/foo#L1\n'
+            'f.txt:3:https://example.com/asottile/test/blob/main/foo#L1\n'
             '\n'
             'Non-permanent github link detected.\n'
             'On any page on github press [y] to load a permalink.\n'
diff --git a/tests/destroyed_symlinks_test.py b/tests/destroyed_symlinks_test.py
new file mode 100644
index 0000000..d2c9031
--- /dev/null
+++ b/tests/destroyed_symlinks_test.py
@@ -0,0 +1,74 @@
+import os
+import subprocess
+
+import pytest
+
+from pre_commit_hooks.destroyed_symlinks import find_destroyed_symlinks
+from pre_commit_hooks.destroyed_symlinks import main
+
+TEST_SYMLINK = 'test_symlink'
+TEST_SYMLINK_TARGET = '/doesnt/really/matters'
+TEST_FILE = 'test_file'
+TEST_FILE_RENAMED = f'{TEST_FILE}_renamed'
+
+
+@pytest.fixture
+def repo_with_destroyed_symlink(tmpdir):
+    source_repo = tmpdir.join('src')
+    os.makedirs(source_repo, exist_ok=True)
+    test_repo = tmpdir.join('test')
+    with source_repo.as_cwd():
+        subprocess.check_call(('git', 'init'))
+        os.symlink(TEST_SYMLINK_TARGET, TEST_SYMLINK)
+        with open(TEST_FILE, 'w') as f:
+            print('some random content', file=f)
+        subprocess.check_call(('git', 'add', '.'))
+        subprocess.check_call(
+            ('git', 'commit', '--no-gpg-sign', '-m', 'initial'),
+        )
+        assert b'120000 ' in subprocess.check_output(
+            ('git', 'cat-file', '-p', 'HEAD^{tree}'),
+        )
+    subprocess.check_call(
+        ('git', '-c', 'core.symlinks=false', 'clone', source_repo, test_repo),
+    )
+    with test_repo.as_cwd():
+        subprocess.check_call(
+            ('git', 'config', '--local', 'core.symlinks', 'true'),
+        )
+        subprocess.check_call(('git', 'mv', TEST_FILE, TEST_FILE_RENAMED))
+    assert not os.path.islink(test_repo.join(TEST_SYMLINK))
+    yield test_repo
+
+
+def test_find_destroyed_symlinks(repo_with_destroyed_symlink):
+    with repo_with_destroyed_symlink.as_cwd():
+        assert find_destroyed_symlinks([]) == []
+        assert main([]) == 0
+
+        subprocess.check_call(('git', 'add', TEST_SYMLINK))
+        assert find_destroyed_symlinks([TEST_SYMLINK]) == [TEST_SYMLINK]
+        assert find_destroyed_symlinks([]) == []
+        assert main([]) == 0
+        assert find_destroyed_symlinks([TEST_FILE_RENAMED, TEST_FILE]) == []
+        ALL_STAGED = [TEST_SYMLINK, TEST_FILE_RENAMED]
+        assert find_destroyed_symlinks(ALL_STAGED) == [TEST_SYMLINK]
+        assert main(ALL_STAGED) != 0
+
+        with open(TEST_SYMLINK, 'a') as f:
+            print(file=f)  # add trailing newline
+        subprocess.check_call(['git', 'add', TEST_SYMLINK])
+        assert find_destroyed_symlinks(ALL_STAGED) == [TEST_SYMLINK]
+        assert main(ALL_STAGED) != 0
+
+        with open(TEST_SYMLINK, 'w') as f:
+            print('0' * len(TEST_SYMLINK_TARGET), file=f)
+        subprocess.check_call(('git', 'add', TEST_SYMLINK))
+        assert find_destroyed_symlinks(ALL_STAGED) == []
+        assert main(ALL_STAGED) == 0
+
+        with open(TEST_SYMLINK, 'w') as f:
+            print('0' * (len(TEST_SYMLINK_TARGET) + 3), file=f)
+        subprocess.check_call(('git', 'add', TEST_SYMLINK))
+        assert find_destroyed_symlinks(ALL_STAGED) == []
+        assert main(ALL_STAGED) == 0
diff --git a/tests/no_commit_to_branch_test.py b/tests/no_commit_to_branch_test.py
index 72b32e6..610e660 100644
--- a/tests/no_commit_to_branch_test.py
+++ b/tests/no_commit_to_branch_test.py
@@ -67,3 +67,10 @@
         cmd_output('git', 'checkout', head)
         # we're not on a branch!
         assert main(()) == 0
+
+
+@pytest.mark.parametrize('branch_name', ('master', 'main'))
+def test_default_branch_names(temp_git_dir, branch_name):
+    with temp_git_dir.as_cwd():
+        cmd_output('git', 'checkout', '-b', branch_name)
+        assert main(()) == 1
diff --git a/tests/requirements_txt_fixer_test.py b/tests/requirements_txt_fixer_test.py
index f4f679d..e3c6ed5 100644
--- a/tests/requirements_txt_fixer_test.py
+++ b/tests/requirements_txt_fixer_test.py
@@ -30,6 +30,16 @@
         ),
         (b'#comment\n\nfoo\nbar\n', FAIL, b'#comment\n\nbar\nfoo\n'),
         (b'#comment\n\nbar\nfoo\n', PASS, b'#comment\n\nbar\nfoo\n'),
+        (
+            b'foo\n\t#comment with indent\nbar\n',
+            FAIL,
+            b'\t#comment with indent\nbar\nfoo\n',
+        ),
+        (
+            b'bar\n\t#comment with indent\nfoo\n',
+            PASS,
+            b'bar\n\t#comment with indent\nfoo\n',
+        ),
         (b'\nfoo\nbar\n', FAIL, b'bar\n\nfoo\n'),
         (b'\nbar\nfoo\n', PASS, b'\nbar\nfoo\n'),
         (
diff --git a/tests/util_test.py b/tests/util_test.py
index b42ee6f..7f48816 100644
--- a/tests/util_test.py
+++ b/tests/util_test.py
@@ -2,6 +2,7 @@
 
 from pre_commit_hooks.util import CalledProcessError
 from pre_commit_hooks.util import cmd_output
+from pre_commit_hooks.util import zsplit
 
 
 def test_raises_on_error():
@@ -12,3 +13,13 @@
 def test_output():
     ret = cmd_output('sh', '-c', 'echo hi')
     assert ret == 'hi\n'
+
+
+@pytest.mark.parametrize('out', ('\0f1\0f2\0', '\0f1\0f2', 'f1\0f2\0'))
+def test_check_zsplits_str_correctly(out):
+    assert zsplit(out) == ['f1', 'f2']
+
+
+@pytest.mark.parametrize('out', ('\0\0', '\0', ''))
+def test_check_zsplit_returns_empty(out):
+    assert zsplit(out) == []