blob: 329d3f19441f22ee021ebef26271be81b324cff0 [file] [log] [blame]
# -*- coding: utf-8 -*-
from __future__ import print_function
import contextlib
import inspect
import os
import os.path as op
import re
from shutil import copyfile
import subprocess
import sys
import pytest
import codespell_lib as cs_
from codespell_lib._codespell import uri_regex_def, EX_USAGE, EX_OK, EX_DATAERR
def test_constants():
"""Test our EX constants."""
assert EX_OK == 0
assert EX_USAGE == 64
assert EX_DATAERR == 65
class MainWrapper(object):
"""Compatibility wrapper for when we used to return the count."""
def main(self, *args, count=True, std=False, **kwargs):
if count:
args = ('--count',) + args
code = cs_.main(*args, **kwargs)
capsys = inspect.currentframe().f_back.f_locals['capsys']
stdout, stderr = capsys.readouterr()
assert code in (EX_OK, EX_USAGE, EX_DATAERR)
if code == EX_DATAERR: # have some misspellings
code = int(stderr.split('\n')[-2])
elif code == EX_OK and count:
code = int(stderr.split('\n')[-2])
assert code == 0
if std:
return (code, stdout, stderr)
return code
cs = MainWrapper()
def run_codespell(args=(), cwd=None):
"""Run codespell."""
args = ('--count',) + args
proc = subprocess.Popen(
['codespell'] + list(args), cwd=cwd,
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stderr = proc.communicate()[1].decode('utf-8')
count = int(stderr.split('\n')[-2])
return count
def test_command(tmpdir):
"""Test running the codespell executable."""
# With no arguments does "."
d = str(tmpdir)
assert run_codespell(cwd=d) == 0
with open(op.join(d, 'bad.txt'), 'w') as f:
assert run_codespell(cwd=d) == 4
def test_basic(tmpdir, capsys):
"""Test some basic functionality."""
assert cs.main('_does_not_exist_') == 0
fname = op.join(str(tmpdir), 'tmp')
with open(fname, 'w') as f:
code, _, stderr = cs.main('-D', 'foo',, std=True)
assert code == EX_USAGE, 'missing dictionary'
assert 'cannot find dictionary' in stderr
assert cs.main(fname) == 0, 'empty file'
with open(fname, 'a') as f:
f.write('this is a test file\n')
assert cs.main(fname) == 0, 'good'
with open(fname, 'a') as f:
assert cs.main(fname) == 1, 'bad'
with open(fname, 'a') as f:
assert cs.main(fname) == 2, 'worse'
with open(fname, 'a') as f:
assert cs.main(fname) == 2, 'with a name'
assert cs.main('--builtin', 'clear,rare,names,informal', fname) == 4
code, _, stderr = cs.main(fname, '--builtin', 'foo', std=True)
assert code == EX_USAGE # bad type
assert 'Unknown builtin dictionary' in stderr
d = str(tmpdir)
code, _, stderr = cs.main(fname, '-D', op.join(d, 'foo'), std=True)
assert code == EX_USAGE # bad dict
assert 'cannot find dictionary' in stderr
with open(op.join(d, 'bad.txt'), 'w') as f:
assert cs.main(d) == 4
code, _, stderr = cs.main('-w', d, std=True)
assert code == 0
assert 'FIXED:' in stderr
with open(op.join(d, 'bad.txt')) as f:
new_content =
assert cs.main(d) == 0
assert new_content == 'abandoned\nAbandoned\nABANDONED\nabandoned'
with open(op.join(d, 'bad.txt'), 'w') as f:
f.write('abandonned abandonned\n')
assert cs.main(d) == 2
code, stdout, stderr = cs.main(
'-q', '16', '-w', d, count=False, std=True)
assert code == 0
assert stdout == stderr == ''
assert cs.main(d) == 0
# empty directory
os.mkdir(op.join(d, 'test'))
assert cs.main(d) == 0
def test_interactivity(tmpdir, capsys):
"""Test interaction"""
# Windows can't read a currently-opened file, so here we use
# NamedTemporaryFile just to get a good name
with open(op.join(str(tmpdir), 'tmp'), 'w') as f:
assert cs.main( == 0, 'empty file'
with open(, 'w') as f:
assert cs.main('-i', '-1', == 1, 'bad'
with FakeStdin('y\n'):
assert cs.main('-i', '3', == 1
with FakeStdin('n\n'):
code, stdout, _ = cs.main('-w', '-i', '3',, std=True)
assert code == 0
assert '==>' in stdout
with FakeStdin('x\ny\n'):
assert cs.main('-w', '-i', '3', == 0
assert cs.main( == 0
# New example
with open(op.join(str(tmpdir), 'tmp2'), 'w') as f:
with open(, 'w') as f:
assert cs.main( == 1
with FakeStdin(' '): # blank input -> Y
assert cs.main('-w', '-i', '3', == 0
assert cs.main( == 0
# multiple options
with open(op.join(str(tmpdir), 'tmp3'), 'w') as f:
with open(, 'w') as f:
assert cs.main( == 1
with FakeStdin(' \n'): # blank input -> nothing
assert cs.main('-w', '-i', '3', == 0
assert cs.main( == 1
with FakeStdin('0\n'): # blank input -> nothing
assert cs.main('-w', '-i', '3', == 0
assert cs.main( == 0
with open(, 'r') as f_read:
assert == 'awkward\n'
with open(, 'w') as f:
assert cs.main( == 1
with FakeStdin('x\n1\n'): # blank input -> nothing
code, stdout, _ = cs.main('-w', '-i', '3',, std=True)
assert code == 0
assert 'a valid option' in stdout
assert cs.main( == 0
with open(, 'r') as f:
assert == 'backward\n'
def test_summary(tmpdir, capsys):
"""Test summary functionality."""
with open(op.join(str(tmpdir), 'tmp'), 'w') as f:
code, stdout, stderr = cs.main(, std=True, count=False)
assert code == 0
assert stdout == stderr == '', 'no output'
code, stdout, stderr = cs.main(, '--summary', std=True)
assert code == 0
assert stderr == '0\n'
assert 'SUMMARY' in stdout
assert len(stdout.split('\n')) == 5
with open(, 'w') as f:
assert code == 0
code, stdout, stderr = cs.main(, '--summary', std=True)
assert stderr == '2\n'
assert 'SUMMARY' in stdout
assert len(stdout.split('\n')) == 7
assert 'abandonned' in stdout.split()[-2]
def test_ignore_dictionary(tmpdir, capsys):
"""Test ignore dictionary functionality."""
d = str(tmpdir)
with open(op.join(d, 'bad.txt'), 'w') as f:
f.write('1 abandonned 1\n2 abandonned 2\nabondon\n')
bad_name =
assert cs.main(bad_name) == 3
with open(op.join(d, 'ignore.txt'), 'w') as f:
assert cs.main('-I',, bad_name) == 1
def test_ignore_word_list(tmpdir, capsys):
"""Test ignore word list functionality."""
d = str(tmpdir)
with open(op.join(d, 'bad.txt'), 'w') as f:
assert cs.main(d) == 3
assert cs.main('-Labandonned,someword', '-Labilty', d) == 1
def test_custom_regex(tmpdir, capsys):
"""Test custom word regex."""
d = str(tmpdir)
with open(op.join(d, 'bad.txt'), 'w') as f:
assert cs.main(d) == 0
assert cs.main('-r', "[a-z]+", d) == 2
code, stdout, _ = cs.main('-r', '[a-z]+', '--write-changes', d, std=True)
assert code == EX_USAGE
assert 'ERROR:' in stdout
def test_exclude_file(tmpdir, capsys):
"""Test exclude file functionality."""
d = str(tmpdir)
with open(op.join(d, 'bad.txt'), 'wb') as f:
f.write('1 abandonned 1\n2 abandonned 2\n'.encode('utf-8'))
bad_name =
assert cs.main(bad_name) == 2
with open(op.join(d, 'tmp.txt'), 'wb') as f:
f.write('1 abandonned 1\n'.encode('utf-8'))
assert cs.main(bad_name) == 2
assert cs.main('-x',, bad_name) == 1
def test_encoding(tmpdir, capsys):
"""Test encoding handling."""
# Some simple Unicode things
with open(op.join(str(tmpdir), 'tmp'), 'w') as f:
# with CaptureStdout() as sio:
assert cs.main( == 0
with open(, 'wb') as f:
assert cs.main( == 0
assert cs.main('-e', == 0
with open(, 'ab') as f:
assert cs.main( == 1
# Binary file warning
with open(, 'wb') as f:
code, stdout, stderr = cs.main(, std=True, count=False)
assert code == 0
assert stdout == stderr == ''
code, stdout, stderr = cs.main('-q', '0',, std=True, count=False)
assert code == 0
assert stdout == ''
assert 'WARNING: Binary file' in stderr
def test_ignore(tmpdir, capsys):
"""Test ignoring of files and directories."""
d = str(tmpdir)
with open(op.join(d, 'good.txt'), 'w') as f:
f.write('this file is okay')
assert cs.main(d) == 0
with open(op.join(d, 'bad.txt'), 'w') as f:
assert cs.main(d) == 1
assert cs.main('--skip=bad*', d) == 0
assert cs.main('--skip=bad.txt', d) == 0
subdir = op.join(d, 'ignoredir')
with open(op.join(subdir, 'bad.txt'), 'w') as f:
assert cs.main(d) == 2
assert cs.main('--skip=bad*', d) == 0
assert cs.main('--skip=*ignoredir*', d) == 1
assert cs.main('--skip=ignoredir', d) == 1
assert cs.main('--skip=*ignoredir/bad*', d) == 1
def test_check_filename(tmpdir, capsys):
"""Test filename check."""
d = str(tmpdir)
# Empty file
with open(op.join(d, 'abandonned.txt'), 'w') as f:
assert cs.main('-f', d) == 1
# Normal file with contents
with open(op.join(d, 'abandonned.txt'), 'w') as f:
assert cs.main('-f', d) == 1
# Normal file with binary contents
with open(op.join(d, 'abandonned.txt'), 'wb') as f:
assert cs.main('-f', d) == 1
@pytest.mark.skipif((not hasattr(os, "mkfifo") or not callable(os.mkfifo)),
reason='requires os.mkfifo')
def test_check_filename_irregular_file(tmpdir, capsys):
"""Test irregular file filename check."""
# Irregular file (!isfile())
d = str(tmpdir)
os.mkfifo(op.join(d, 'abandonned'))
assert cs.main('-f', d) == 1
d = str(tmpdir)
def test_check_hidden(tmpdir, capsys):
"""Test ignoring of hidden files."""
d = str(tmpdir)
# visible file
with open(op.join(d, 'test.txt'), 'w') as f:
assert cs.main(op.join(d, 'test.txt')) == 1
assert cs.main(d) == 1
# hidden file
os.rename(op.join(d, 'test.txt'), op.join(d, '.test.txt'))
assert cs.main(op.join(d, '.test.txt')) == 0
assert cs.main(d) == 0
assert cs.main('--check-hidden', op.join(d, '.test.txt')) == 1
assert cs.main('--check-hidden', d) == 1
# hidden file with typo in name
os.rename(op.join(d, '.test.txt'), op.join(d, '.abandonned.txt'))
assert cs.main(op.join(d, '.abandonned.txt')) == 0
assert cs.main(d) == 0
assert cs.main('--check-hidden', op.join(d, '.abandonned.txt')) == 1
assert cs.main('--check-hidden', d) == 1
assert cs.main('--check-hidden', '--check-filenames',
op.join(d, '.abandonned.txt')) == 2
assert cs.main('--check-hidden', '--check-filenames', d) == 2
# hidden directory
assert cs.main(d) == 0
assert cs.main('--check-hidden', d) == 1
assert cs.main('--check-hidden', '--check-filenames', d) == 2
os.mkdir(op.join(d, '.abandonned'))
copyfile(op.join(d, '.abandonned.txt'),
op.join(d, '.abandonned', 'abandonned.txt'))
assert cs.main(d) == 0
assert cs.main('--check-hidden', d) == 2
assert cs.main('--check-hidden', '--check-filenames', d) == 5
def test_case_handling(tmpdir, capsys):
"""Test that capitalized entries get detected properly."""
# Some simple Unicode things
with open(op.join(str(tmpdir), 'tmp'), 'w') as f:
# with CaptureStdout() as sio:
assert cs.main( == 0
with open(, 'wb') as f:
f.write('this has an ACII error'.encode('utf-8'))
code, stdout, _ = cs.main(, std=True)
assert code == 1
assert 'ASCII' in stdout
code, _, stderr = cs.main('-w',, std=True)
assert code == 0
assert 'FIXED' in stderr
with open(, 'rb') as f:
assert'utf-8') == 'this has an ASCII error'
def test_context(tmpdir, capsys):
"""Test context options."""
d = str(tmpdir)
with open(op.join(d, 'context.txt'), 'w') as f:
f.write('line 1\nline 2\nline 3 abandonned\nline 4\nline 5')
# symmetric context, fully within file
code, stdout, _ = cs.main('-C', '1', d, std=True)
assert code == 1
lines = stdout.split('\n')
assert len(lines) == 5
assert lines[0] == ': line 2'
assert lines[1] == '> line 3 abandonned'
assert lines[2] == ': line 4'
# requested context is bigger than the file
code, stdout, _ = cs.main('-C', '10', d, std=True)
assert code == 1
lines = stdout.split('\n')
assert len(lines) == 7
assert lines[0] == ': line 1'
assert lines[1] == ': line 2'
assert lines[2] == '> line 3 abandonned'
assert lines[3] == ': line 4'
assert lines[4] == ': line 5'
# only before context
code, stdout, _ = cs.main('-B', '2', d, std=True)
assert code == 1
lines = stdout.split('\n')
assert len(lines) == 5
assert lines[0] == ': line 1'
assert lines[1] == ': line 2'
assert lines[2] == '> line 3 abandonned'
# only after context
code, stdout, _ = cs.main('-A', '1', d, std=True)
assert code == 1
lines = stdout.split('\n')
assert len(lines) == 4
assert lines[0] == '> line 3 abandonned'
assert lines[1] == ': line 4'
# asymmetric context
code, stdout, _ = cs.main('-B', '2', '-A', '1', d, std=True)
assert code == 1
lines = stdout.split('\n')
assert len(lines) == 6
assert lines[0] == ': line 1'
assert lines[1] == ': line 2'
assert lines[2] == '> line 3 abandonned'
assert lines[3] == ': line 4'
# both '-C' and '-A' on the command line
code, stdout, _ = cs.main('-C', '2', '-A', '1', d, std=True)
assert code == EX_USAGE
lines = stdout.split('\n')
assert 'ERROR' in lines[0]
# both '-C' and '-B' on the command line
code, stdout, stderr = cs.main('-C', '2', '-B', '1', d, std=True)
assert code == EX_USAGE
lines = stdout.split('\n')
assert 'ERROR' in lines[0]
def test_ignore_regex_option(tmpdir, capsys):
"""Test ignore regex option functionality."""
d = str(tmpdir)
# Invalid regex.
code, stdout, _ = cs.main('--ignore-regex=(', std=True)
assert code == EX_USAGE
assert 'usage:' in stdout
with open(op.join(d, 'flag.txt'), 'w') as f:
f.write('# Please see for info\n')
# Test file has 1 invalid entry, and it's not ignored by default.
assert cs.main( == 1
# An empty regex is the default value, and nothing is ignored.
assert cs.main(, '--ignore-regex=') == 1
assert cs.main(, '--ignore-regex=""') == 1
# Non-matching regex results in nothing being ignored.
assert cs.main(, '--ignore-regex=^$') == 1
# A word can be ignored.
assert cs.main(, '--ignore-regex=abandonned') == 0
# Ignoring part of the word can result in odd behavior.
assert cs.main(, '--ignore-regex=nn') == 0
with open(op.join(d, 'flag.txt'), 'w') as f:
f.write('abandonned donn\n')
# Test file has 2 invalid entries.
assert cs.main( == 2
# Ignoring donn breaks them both.
assert cs.main(, '--ignore-regex=donn') == 0
# Adding word breaks causes only one to be ignored.
assert cs.main(, r'--ignore-regex=\bdonn\b') == 1
def test_uri_regex_option(tmpdir, capsys):
"""Test --uri-regex option functionality."""
d = str(tmpdir)
# Invalid regex.
code, stdout, _ = cs.main('--uri-regex=(', std=True)
assert code == EX_USAGE
assert 'usage:' in stdout
with open(op.join(d, 'flag.txt'), 'w') as f:
f.write('# Please see for info\n')
# By default, the standard regex is used.
assert cs.main( == 1
assert cs.main(, '--uri-ignore-words-list=abandonned') == 0
# If empty, nothing matches.
assert cs.main(, '--uri-regex=',
'--uri-ignore-words-list=abandonned') == 0
# Can manually match urls.
assert cs.main(, '--uri-regex=\\bhttp.*\\b',
'--uri-ignore-words-list=abandonned') == 0
# Can also match arbitrary content.
with open(op.join(d, 'flag.txt'), 'w') as f:
assert cs.main( == 1
assert cs.main(, '--uri-ignore-words-list=abandonned') == 1
assert cs.main(, '--uri-regex=.*') == 1
assert cs.main(, '--uri-regex=.*',
'--uri-ignore-words-list=abandonned') == 0
def test_uri_ignore_words_list_option_uri(tmpdir, capsys):
"""Test ignore regex option functionality."""
d = str(tmpdir)
with open(op.join(d, 'flag.txt'), 'w') as f:
f.write('# Please see for info\n')
# Test file has 1 invalid entry, and it's not ignored by default.
assert cs.main( == 1
# An empty list is the default value, and nothing is ignored.
assert cs.main(, '--uri-ignore-words-list=') == 1
# Non-matching regex results in nothing being ignored.
assert cs.main(, '--uri-ignore-words-list=foo,example') == 1
# A word can be ignored.
assert cs.main(, '--uri-ignore-words-list=abandonned') == 0
assert cs.main(, '--uri-ignore-words-list=foo,abandonned,bar') == 0
assert cs.main(, '--uri-ignore-words-list=*') == 0
# The match must be for the complete word.
assert cs.main(, '--uri-ignore-words-list=abandonn') == 1
with open(op.join(d, 'flag.txt'), 'w') as f:
# Test file has 2 invalid entries.
assert cs.main( == 2
# Ignoring the value in the URI won't ignore the word completely.
assert cs.main(, '--uri-ignore-words-list=abandonned') == 1
assert cs.main(, '--uri-ignore-words-list=*') == 1
# The regular --ignore-words-list will ignore both.
assert cs.main(, '--ignore-words-list=abandonned') == 0
variation_option = '--uri-ignore-words-list=abandonned'
# Variations where an error is ignored.
for variation in ('# Please see http://abandonned for info\n',
'# Please see "http://abandonned" for info\n',
# This variation could be un-ignored, but it'd require a
# more complex regex as " is valid in parts of URIs.
'# Please see "http://foo"abandonned for info\n',
'# Please see https://abandonned for info\n',
'# Please see ftp://abandonned for info\n',
'# Please see http://example/abandonned for info\n',
'# Please see for info\n',
'# Please see for info\n',
'# Please see for info\n',
'# Please see for info\n',
'# Please see http://[2001:0db8:85a3:0000:0000:8a2e:0370'
':7334]/abandonned for info\n'):
with open(op.join(d, 'flag.txt'), 'w') as f:
assert cs.main( == 1, variation
assert cs.main(, variation_option) == 0, variation
# Variations where no error is ignored.
for variation in ('# Please see abandonned/ for info\n',
'# Please see http:abandonned for info\n',
'# Please see foo/abandonned for info\n',
'# Please see http://foo abandonned for info\n'):
with open(op.join(d, 'flag.txt'), 'w') as f:
assert cs.main( == 1, variation
assert cs.main(, variation_option) == 1, variation
def test_uri_ignore_words_list_option_email(tmpdir, capsys):
"""Test ignore regex option functionality."""
d = str(tmpdir)
with open(op.join(d, 'flag.txt'), 'w') as f:
f.write('# Please see for info\n')
# Test file has 1 invalid entry, and it's not ignored by default.
assert cs.main( == 1
# An empty list is the default value, and nothing is ignored.
assert cs.main(, '--uri-ignore-words-list=') == 1
# Non-matching regex results in nothing being ignored.
assert cs.main(, '--uri-ignore-words-list=foo,example') == 1
# A word can be ignored.
assert cs.main(, '--uri-ignore-words-list=abandonned') == 0
assert cs.main(, '--uri-ignore-words-list=foo,abandonned,bar') == 0
assert cs.main(, '--uri-ignore-words-list=*') == 0
# The match must be for the complete word.
assert cs.main(, '--uri-ignore-words-list=abandonn') == 1
with open(op.join(d, 'flag.txt'), 'w') as f:
# Test file has 2 invalid entries.
assert cs.main( == 2
# Ignoring the value in the URI won't ignore the word completely.
assert cs.main(, '--uri-ignore-words-list=abandonned') == 1
assert cs.main(, '--uri-ignore-words-list=*') == 1
# The regular --ignore-words-list will ignore both.
assert cs.main(, '--ignore-words-list=abandonned') == 0
variation_option = '--uri-ignore-words-list=abandonned'
# Variations where an error is ignored.
for variation in ('# Please see example@abandonned for info\n',
'# Please see abandonned@example for info\n',
'# Please see for info\n',
'# Please see'
' for info\n'):
with open(op.join(d, 'flag.txt'), 'w') as f:
assert cs.main( == 1, variation
assert cs.main(, variation_option) == 0, variation
# Variations where no error is ignored.
for variation in ('# Please see example @ abandonned for info\n',
'# Please see abandonned@ example for info\n',
'# Please see'
' abandonned for info\n'):
with open(op.join(d, 'flag.txt'), 'w') as f:
assert cs.main( == 1, variation
assert cs.main(, variation_option) == 1, variation
def test_uri_regex_def():
uri_regex = re.compile(uri_regex_def)
# Tests based on
true_positives = (
true_negatives = (
false_positives = (
boilerplate = 'Surrounding text %s more text'
for uri in true_positives + false_positives:
assert uri_regex.findall(uri) == [uri], uri
assert uri_regex.findall(boilerplate % uri) == [uri], uri
for uri in true_negatives:
assert not uri_regex.findall(uri), uri
assert not uri_regex.findall(boilerplate % uri), uri
def test_config(tmpdir, capsys):
Tests loading options from a config file.
d = str(tmpdir)
# Create sample files.
with open(op.join(d, 'bad.txt'), 'w') as f:
f.write('abandonned donn\n')
with open(op.join(d, 'good.txt'), 'w') as f:
# Create a config file.
conffile = op.join(d, 'config.cfg')
with open(conffile, 'w') as f:
'skip = bad.txt\n'
'count = \n'
# Should fail when checking both.
code, stdout, _ = cs.main(d, count=True, std=True)
# Code in this case is not exit code, but count of misspellings.
assert code == 2
assert 'bad.txt' in stdout
# Should pass when skipping bad.txt
code, stdout, _ = cs.main('--config', conffile, d, count=True, std=True)
assert code == 0
assert 'bad.txt' not in stdout
def FakeStdin(text):
if sys.version[0] == '2':
from StringIO import StringIO
from io import StringIO
oldin = sys.stdin
in_ = StringIO(text)
sys.stdin = in_
sys.stdin = oldin