codespell_lib/tests/test_dictionary.py - codespell - Git at Google

 # -*- coding: utf-8 -*-

 import os.path as op
 import re


 def test_dictionary_formatting():
     """Test that all dictionary entries are valid."""
     err_dict = dict()
     ws = re.compile(r'.*\s.*')  # whitespace
     comma = re.compile(r'.*,.*')  # comma
     with open(op.join(op.dirname(__file__), '..', 'data',
                       'dictionary.txt'), 'rb') as fid:
         for line in fid:
             err, rep = line.decode('utf-8').split('->')
             err = err.lower()
             assert err != rep, 'error %r corrects to itself' % err
             assert err not in err_dict, 'error %r already exists' % err
             assert ws.match(err) is None, 'error %r has whitespace' % err
             assert comma.match(err) is None, 'error %r has a comma' % err
             rep = rep.rstrip('\n')
             assert len(rep) > 0, ('error %s: correction %r must be non-empty'
                                   % (err, rep))
             assert not re.match(r'^\s.*', rep), ('error %s: correction %r '
                                                  'cannot start with whitespace'
                                                  % (err, rep))
             for (r, msg) in [
                 (r'^,', 'error %s: correction %r starts with a comma'),
                 (r'\s,', 'error %s: correction %r contains a whitespace '
                          'character followed by a comma'),
                 (r',\s\s', 'error %s: correction %r contains a comma followed '
                            'by multiple whitespace characters'),
                 (r',[^ ]', 'error %s: correction %r contains a comma *not* '
                            'followed by a space')
             ]:
                 assert not re.search(r, rep), (msg % (err, rep))
             if rep.count(','):
                 if not rep.endswith(','):
                     assert 'disabled' in rep.split(',')[-1], \
                         ('currently corrections must end with trailing "," (if'
                          ' multiple corrections are available) or '
                          'have "disabled" in the comment')
             reps = [r.strip() for r in rep.lower().split(',')]
             reps = [r for r in reps if len(r)]
             err_dict[err] = reps
             unique = list()
             for r in reps:
                 if r not in unique:
                     unique.append(r)
             assert reps == unique, 'entries are not (lower-case) unique'
     # check for corrections that are errors (but not self replacements)
     for err in err_dict:
         for r in err_dict[err]:
             assert (r not in err_dict) or (r in err_dict[r]), \
                 ('error %s: correction %s is an error itself' % (err, r))
	# -- coding: utf-8 --

	import os.path as op
	import re


	def test_dictionary_formatting():
	"""Test that all dictionary entries are valid."""
	err_dict = dict()
	ws = re.compile(r'.\s.') # whitespace
	comma = re.compile(r'.,.') # comma
	with open(op.join(op.dirname(__file__), '..', 'data',
	'dictionary.txt'), 'rb') as fid:
	for line in fid:
	err, rep = line.decode('utf-8').split('->')
	err = err.lower()
	assert err != rep, 'error %r corrects to itself' % err
	assert err not in err_dict, 'error %r already exists' % err
	assert ws.match(err) is None, 'error %r has whitespace' % err
	assert comma.match(err) is None, 'error %r has a comma' % err
	rep = rep.rstrip('\n')
	assert len(rep) > 0, ('error %s: correction %r must be non-empty'
	% (err, rep))
	assert not re.match(r'^\s.*', rep), ('error %s: correction %r '
	'cannot start with whitespace'
	% (err, rep))
	for (r, msg) in [
	(r'^,', 'error %s: correction %r starts with a comma'),
	(r'\s,', 'error %s: correction %r contains a whitespace '
	'character followed by a comma'),
	(r',\s\s', 'error %s: correction %r contains a comma followed '
	'by multiple whitespace characters'),
	(r',[^ ]', 'error %s: correction %r contains a comma not '
	'followed by a space')
	]:
	assert not re.search(r, rep), (msg % (err, rep))
	if rep.count(','):
	if not rep.endswith(','):
	assert 'disabled' in rep.split(',')[-1], \
	('currently corrections must end with trailing "," (if'
	' multiple corrections are available) or '
	'have "disabled" in the comment')
	reps = [r.strip() for r in rep.lower().split(',')]
	reps = [r for r in reps if len(r)]
	err_dict[err] = reps
	unique = list()
	for r in reps:
	if r not in unique:
	unique.append(r)
	assert reps == unique, 'entries are not (lower-case) unique'
	# check for corrections that are errors (but not self replacements)
	for err in err_dict:
	for r in err_dict[err]:
	assert (r not in err_dict) or (r in err_dict[r]), \
	('error %s: correction %s is an error itself' % (err, r))