Check errors don't exist as valid words in the aspell dictionary (#1142)
* Check errors don't exist as valid words in the aspell dictionary
* Install aspell on Travis
* Add some missing packages
* Remove a virtual package
* Just install the version of aspell-python we need
* Keep flake8 happy
* Switch to warnings and count them, so we can see all the aspell errors in one go
* Handle different encoding of the word and aspell
* Try and fix the encoding conversion
* Find out the encoding type
* Don't assert on number of warnings
* Don't record warnings for now
* Warn on all the encoding options
* pprint the encoding
* More warning work
* Use the actual encoding type
* Correct the logic
* ENH: Multi dict support
* FIX: Fixes after merge
* FIX: Better error check
* FIX: More thorough testing, locations
* FIX: Try newer aspell
* FIX: Move to new dict
* FIX: Move
* FIX: Restore removals from #1181
* FIX: One from #1362
* Add rare chack->check, cheque,
* Minor tidy of some dictionary check code
* Add some more suggestions.
* Fix the whitespace
* Really fix the whitespace
* FIX: Refactor requirement
* Log an error when aspell not found and not required
* Fix the error logging
* Test all variants of present and missing from aspell
* Undo some tuple tidying
* Fix the true/false values used
* Skip some flake8 tests
* Fix the test cases
* Correct the not in aspell test and fix some test cases
* Remove a duplicate test
* Use a test word that isn't a typo
* Set the ideal aspell detection logic for each dictionary
I suspect we'll have to relax this as more obscure words won't be in the aspell dictionary
* Be more realistic given the size of the dictionary
* Fix a flake8 error
* Fix another line length error
* FIX: Move
* FIX: Make visible, simplify
Co-authored-by: Eric Larson <larson.eric.d@gmail.com>
diff --git a/.travis.yml b/.travis.yml
index 0a1c770..535d8c0 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,6 +4,7 @@
# for it to be on multiple physical lines, so long as you remember: - There
# can't be any leading "-"s - All newlines will be removed, so use ";"s
+dist: bionic
language: python
cache: pip
python:
@@ -12,6 +13,14 @@
- 3.6
- 3.7
- 3.8
+env:
+ REQUIRE_ASPELL=true
+
+addons:
+ apt:
+ packages:
+ - libaspell-dev
+ - aspell-en
before_install:
- source tools/travis_tools.sh
@@ -22,6 +31,8 @@
- python --version # just to check
- pip install -U pip wheel # upgrade to latest pip find 3.5 wheels; wheel to avoid errors
- retry pip install pytest pytest-cov flake8 coverage codecov chardet setuptools docutils
+ - if [ ${TRAVIS_PYTHON_VERSION:0:1} == "2" ]; then retry pip install aspell-python-py2; fi
+ - if [ ${TRAVIS_PYTHON_VERSION:0:1} == "3" ]; then retry pip install aspell-python-py3; fi
- cd $SRC_DIR
install:
diff --git a/codespell_lib/_codespell.py b/codespell_lib/_codespell.py
index 98f7ea7..ca45d6f 100755
--- a/codespell_lib/_codespell.py
+++ b/codespell_lib/_codespell.py
@@ -35,8 +35,18 @@
# Users might want to link this file into /usr/local/bin, so we resolve the
# symbolic link path to the real path if necessary.
-default_dictionary = os.path.join(os.path.dirname(os.path.realpath(__file__)),
- 'data', 'dictionary.txt')
+_data_root = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data')
+_builtin_dictionaries = ( # name, desc, name, err in aspell, correction in aspell # noqa: E501
+# The aspell tests here aren't the ideal state, but the None's are realistic
+# for obscure words
+ ('clear', 'for unambiguous errors', '', False, None),
+ ('rare', 'for rare but valid words', '_rare', None, None),
+ ('informal', 'for informal words', '_informal', True, True),
+ ('code', 'for words common to code and/or mathematics', '_code', None, None), # noqa: E501
+ ('names', 'for valid proper names that might be typos', '_names', None, None), # noqa: E501
+ ('en-GB_to_en-US', 'for corrections from en-GB to en-US', '_en-GB_to_en-US', True, True), # noqa: E501
+)
+_builtin_default = 'clear,rare'
# OPTIONS:
#
@@ -216,11 +226,21 @@
help='write changes in place if possible')
parser.add_argument('-D', '--dictionary',
- action='append', metavar='FILE',
+ action='append',
help='Custom dictionary file that contains spelling '
'corrections. If this flag is not specified or '
'equals "-" then the default dictionary is used. '
'This option can be specified multiple times.')
+ builtin_opts = ', '.join(
+ '%r %s' % (d[0], d[1]) for d in _builtin_dictionaries)
+ parser.add_argument('--builtin',
+ dest='builtin', default=_builtin_default,
+ metavar='BUILTIN-LIST',
+ help='Comma-separated list of builtin dictionaries '
+ 'to include (when "-D -" or no "-D" is passed). '
+ 'Current options are:\n%s. The default is '
+ '"--builtin %s".'
+ % (builtin_opts, _builtin_default))
parser.add_argument('-I', '--ignore-words',
action='append', metavar='FILE',
help='File that contains words which will be ignored '
@@ -603,7 +623,7 @@
ignore_words_files = options.ignore_words or []
ignore_words = set()
for ignore_words_file in ignore_words_files:
- if not os.path.exists(ignore_words_file):
+ if not os.path.isfile(ignore_words_file):
print('ERROR: cannot find ignore-words file: %s' %
ignore_words_file, file=sys.stderr)
parser.print_help()
@@ -615,16 +635,36 @@
for word in comma_separated_words.split(','):
ignore_words.add(word.strip())
- dictionaries = options.dictionary or [default_dictionary]
- misspellings = dict()
+ if options.dictionary:
+ dictionaries = options.dictionary
+ else:
+ dictionaries = ['-']
+ use_dictionaries = list()
for dictionary in dictionaries:
if dictionary == "-":
- dictionary = default_dictionary
- if not os.path.exists(dictionary):
- print('ERROR: cannot find dictionary file: %s' % dictionary,
- file=sys.stderr)
- parser.print_help()
- return 1
+ # figure out which builtin dictionaries to use
+ use = sorted(set(options.builtin.split(',')))
+ for u in use:
+ for builtin in _builtin_dictionaries:
+ if builtin[0] == u:
+ use_dictionaries.append(
+ os.path.join(_data_root, 'dictionary%s.txt'
+ % (builtin[2],)))
+ break
+ else:
+ print('ERROR: Unknown builtin dictionary: %s' % (u,),
+ file=sys.stderr)
+ parser.print_help()
+ return 1
+ else:
+ if not os.path.isfile(dictionary):
+ print('ERROR: cannot find dictionary file: %s' % dictionary,
+ file=sys.stderr)
+ parser.print_help()
+ return 1
+ use_dictionaries.append(dictionary)
+ misspellings = dict()
+ for dictionary in use_dictionaries:
build_dict(dictionary, misspellings, ignore_words)
colors = TermColors()
if not options.colors or sys.platform == 'win32':
diff --git a/codespell_lib/data/dictionary.txt b/codespell_lib/data/dictionary.txt
index 62004a9..bae964a 100644
--- a/codespell_lib/data/dictionary.txt
+++ b/codespell_lib/data/dictionary.txt
@@ -1441,7 +1441,6 @@
amgle->angle
amgles->angles
amiguous->ambiguous
-amin->main, disabled because amin might be a var name
amke->make
amking->making
ammend->amend
@@ -1624,7 +1623,6 @@
anoying->annoying
anoymous->anonymous
anroid->android
-ans->and
ansalisation->nasalisation
ansalization->nasalization
ansestors->ancestors
@@ -2091,7 +2089,6 @@
arithmentic->arithmetic
arithmetc->arithmetic
arithmethic->arithmetic
-arithmetics->arithmetic, arithmetics,
arithmitic->arithmetic
aritmetic->arithmetic
aritrary->arbitrary
@@ -2454,7 +2451,6 @@
atrtribute->attribute
atrtributes->attributes
attachd->attached
-attache->attaché, attached, attach,
attachement->attachment
attachements->attachments
attachen->attach
@@ -2698,9 +2694,6 @@
automaticalyy->automatically
automaticlly->automatically
automaticly->automatically
-automatize->automate
-automatized->automated
-automatizes->automates
autometic->automatic
autometically->automatically
automibile->automobile
@@ -2935,8 +2928,6 @@
backwardss->backwards
backware->backward
backwark->backward
-backword->backward, backword,
-backwords->backwards, backwords,
backwrad->backward
bactracking->backtracking
bacup->backup
@@ -3256,12 +3247,10 @@
Blitzkreig->Blitzkrieg
bload->bloat
bloaded->bloated
-bloc->block, bloc,
blocack->blockack
bloccks->blocks
blocekd->blocked
blockin->blocking
-blocs->blocks, blocs,
bloddy->bloody
blodk->block
blohted->bloated
@@ -3279,7 +3268,6 @@
bobard->board, bombard,
bocome->become
boddy->body
-bodgy->body, disabled because one might want to allow informal spelling
bodydbuilder->bodybuilder
boffer->buffer
bofore->before
@@ -3539,8 +3527,6 @@
busines->business
busineses->business, businesses,
busness->business
-buss->bus
-busses->buses
bussiness->business
bussy->busy
buton->button
@@ -3618,7 +3604,6 @@
cahdidates->candidates
cahe->cache
cahes->caches
-cahgne->change
cahgned->changed
cahgnes->changes
cahgning->changing
@@ -3661,7 +3646,6 @@
calcuate->calculate
calcuations->calculations
calculaion->calculation
-calculatable->calculatable, calculable,
calculatble->calculatable, calculable,
calculater->calculator
calculatted->calculated
@@ -3786,7 +3770,6 @@
canonival->canonical
canot->cannot
cant'->can't
-cant->can't, cant,
cant;->can't
canvase->canvas
caost->coast
@@ -3874,7 +3857,6 @@
cartilidge->cartilage
cartrige->cartridge
caryy->carry
-cas->case, disabled because of common abbreviations
cascace->cascade
case-insensitivy->case-insensitivity
case-insenstive->case-insensitive
@@ -4097,10 +4079,8 @@
challanges->challenges
challege->challenge
Champange->Champagne
-chancel->cancel
chanceled->canceled
chanceling->canceling
-chancels->cancels
chanched->changed
chancnel->channel, cancel,
chane->change, chain,
@@ -4362,7 +4342,6 @@
circomvents->circumvents
circual->circular
circuitery->circuitry
-circularly->circular, circularly,
circulaton->circulation
circumferance->circumference
circumferencial->circumferential
@@ -4424,7 +4403,6 @@
clared->cleared
clarety->clarity
claring->clearing
-clas->class, disabled because of name clash in c++
clasic->classic
clasical->classical
clasically->classically
@@ -4530,7 +4508,6 @@
clustred->clustered
cmak->cmake
cmmands->commands
-cmo->com, disabled due to lots of false positives
cmobination->combination
cmoputer->computer
cmoputers->computers
@@ -4877,8 +4854,6 @@
commemmorate->commemorate
commemmorating->commemorating
commen->commend, comment, common,
-commend->commend, comment, command,
-commends->commends, comments, commands,
commenet->comment
commenetd->commented
commeneted->commented
@@ -5802,7 +5777,6 @@
consept->concept
consepts->concepts
consequentely->consequently
-consequentially->consequently
consequentually->consequently
consequeseces->consequences
consequetive->consecutive
@@ -6295,7 +6269,6 @@
convovling->convolving
convserion->conversion
conyak->cognac
-coo->coup, coo,
coodinate->coordinate
coodinates->coordinates
coodrinate->coordinate
@@ -6379,7 +6352,6 @@
copoying->copying
coppermines->coppermine
coppied->copied
-copping->coping, copying, cropping, disabled due to being a legit word
coppy->copy, choppy,
copright->copyright
coprighted->copyrighted
@@ -6393,7 +6365,6 @@
copurights->copyrights
coputer->computer
copver->cover
-copyable->copyable, copiable, disabled because of name clash in C++
copyed->copied
copyeight->copyright
copyeighted->copyrighted
@@ -6676,11 +6647,8 @@
crashaes->crashes
crasheed->crashed
crashees->crashes
-crasher->crash, disabled because it denotes something that crashes
-crashers->crashes, disabled because it denotes things that crash
crashess->crashes
crashs->crashes
-crated->created, crated,
creaate->create
creaed->created
creaeted->created
@@ -6699,7 +6667,6 @@
creatre->create
creatred->created
creats->creates
-creche->crèche
credate->created
credintial->credential
credintials->credentials
@@ -6718,7 +6685,6 @@
crewsant->croissant
cricital->critical
crirical->critical
-cristal->crystal, cristal,
critcial->critical
criteak->critique
critera->criteria
@@ -6774,7 +6740,6 @@
crticised->criticised
crucialy->crucially
crucifiction->crucifixion
-crufts->cruft
cruncing->crunching
crurrent->current
crusies->cruises
@@ -7014,7 +6979,6 @@
deaktivate->deactivate
deaktivated->deactivated
dealed->dealt
-dealign->dealing, dealign,
dealilng->dealing
dealloacte->deallocate
deallocaed->deallocated
@@ -7337,7 +7301,6 @@
defering->deferring
deferreal->deferral
deffensively->defensively
-deffer->differ, defer,
deffered->differed, deferred,
defference->difference, deference,
defferent->different, deferent,
@@ -7366,7 +7329,6 @@
definatly->definitely
defind->defined, defund,
definded->defined, defunded,
-define'd->defined, disabled due to #define
defineas->defines
defineed->defined
definend->defined
@@ -7428,8 +7390,6 @@
degnerated->degenerated
degnerates->degenerates
degrads->degrades
-degrate->degrate, degrade,
-degrates->degrates, degrades,
degreee->degree
degreeee->degree
degreeees->degrees
@@ -7600,7 +7560,6 @@
dependancy->dependency
dependancys->dependencies
dependand->dependent
-dependant->dependent
dependcies->dependencies
dependcy->dependency
dependecies->dependencies
@@ -8586,7 +8545,6 @@
disconneting->disconnecting
disconnets->disconnects
disconnnect->disconnect
-discontentment->discontent
discontigious->discontiguous
discontigous->discontiguous
discontiguities->discontinuities
@@ -9029,7 +8987,6 @@
doesnt;->doesn't
doess->does
doestn't->doesn't
-dof->of, doff, disabled because it's a common abbreviation
doign->doing
doiing->doing
doiuble->double
@@ -9055,7 +9012,6 @@
donnot->do not
dont'->don't
dont't->don't
-dont->don't, disabled because of var names
donwload->download
donwloaded->downloaded
donwloading->downloading
@@ -11230,9 +11186,6 @@
fallabck->fallback
fallbck->fallback
fallhrough->fallthrough
-fallowed->followed, fallowed,
-fallowing->following, fallowing,
-fallows->follows, fallows,
fallthruogh->fallthrough
falltrough->fallthrough
falt->fault
@@ -11272,7 +11225,6 @@
faught->fought
fauilures->failures
faund->found
-fave->save
favoutrable->favourable
faymus->famous
fcound->found
@@ -11499,11 +11451,11 @@
flie->file
floading->floating, flooding,
floading-add->floating-add
-florescent->fluorescent
-floresent->fluorescent
+floatation->flotation
+floresent->fluorescent, florescent,
floride->fluoride
floting->floating
-flourescent->fluorescent
+flourescent->fluorescent, florescent,
flouride->fluoride
flourine->fluorine
flourishment->flourishing
@@ -11542,7 +11494,6 @@
followign->following
followin->following
followind->following
-followings->followings, following,
followng->following
follwing->following
follwo->follow
@@ -11592,7 +11543,6 @@
foreing->foreign
forementionned->aforementioned
foreward->foreword, forward,
-forewarded->forewarded, forwarded,
forfiet->forfeit
forgeround->foreground
forgoten->forgotten
@@ -11623,8 +11573,6 @@
formost->foremost
formt->format
formua->formula
-formule->formula, formulas, formule,
-formules->formulas
forr->for
forsaw->foresaw
forse->force
@@ -11675,7 +11623,6 @@
foundaries->foundries
foundary->foundry
Foundland->Newfoundland
-fount->fount, found,
fourties->forties
fourty->forty
fouth->fourth
@@ -11739,7 +11686,6 @@
frist->first
frmat->format
frmo->from
-fro->for, from, fro,
froce->force
frok->from
fromal->formal
@@ -11755,8 +11701,6 @@
frop->drop
fropm->from
frops->drops
-froward->forward
-frowarded->forwarded
frozee->frozen
fschk->fsck
ftrunacate->ftruncate
@@ -11772,7 +11716,6 @@
fufilled->fulfilled
fule->file
fulfiled->fulfilled
-fulfilment->fulfillment
fullfiled->fulfilled
fullfiling->fulfilling
fullfill->fulfill
@@ -12094,8 +12037,6 @@
gingam->gingham
gioen->given
gir->git
-gird->grid, gird,
-girds->grids, girds,
giser->geyser
gisers->geysers
gitar->guitar
@@ -12138,7 +12079,6 @@
golbally->globally
golbaly->globally
gonig->going
-gonna->going to, disabled because one might want to allow informal spelling
gool->ghoul
gord->gourd
gormay->gourmet
@@ -12223,7 +12163,6 @@
guaranteey->guaranty
guarantes->guarantees
guarantie->guarantee
-guarantied->guaranteed
guarbage->garbage
guared->guard, guarded,
guareded->guarded
@@ -12357,8 +12296,6 @@
Guatamalan->Guatemalan
gud->good
gude->guide, good,
-guerilla->guerrilla
-guerillas->guerrillas
guerrila->guerrilla
guerrilas->guerrillas
gueswork->guesswork
@@ -12490,7 +12427,6 @@
harrassing->harassing
harrassment->harassment
harrassments->harassments
-hart->heart, harm,
harth->hearth
harware->hardware
has'nt->hasn't
@@ -12592,7 +12528,6 @@
hidded->hidden
hiddin->hidden, hiding,
hidding->hiding, hidden,
-hided->hidden, hid,
hiden->hidden
hiearchies->hierarchies
hiearchy->hierarchy
@@ -12662,7 +12597,6 @@
hirearcy->hierarchy
hismelf->himself
hisory->history
-hist->heist, his,
histgram->histogram
histocompatability->histocompatibility
histori->history, historic,
@@ -12746,7 +12680,6 @@
housand->thousand
houskeeping->housekeeping
housr->hours, house,
-hove->hove, have, hover, love,
hovever->however
hovewer->however
howerver->however
@@ -12898,7 +12831,6 @@
idividually->individually
idividuals->individuals
iechart->piechart
-iff->if, disabled due to valid mathematical concept
ifself->itself
ifset->if set
ignoded->ignored
@@ -13206,7 +13138,6 @@
implicite->implicit, implicitly,
implicitely->implicitly
implicitley->implicitly
-implicity->implicitly, disabled due to common misspelling
implict->implicit
implictly->implicitly
impliment->implement
@@ -13303,7 +13234,6 @@
inaccessable->inaccessible
inaccuraccies->inaccuracies
inaccuraccy->inaccuracy
-inactivate->inactivate, deactivate,
inacurate->inaccurate
inacurracies->inaccuracies
inacurrate->inaccurate
@@ -13358,7 +13288,6 @@
incluging->including
incluide->include
incluing->including
-incluse->include, incluse,
inclused->included
inclusinve->inclusive
incmrement->increment
@@ -13532,7 +13461,6 @@
indentifies->identifies
indentify->identify
indentifying->identifying
-indention->indentation
indentit->identity
indentity->identity
indenx->index
@@ -13598,7 +13526,6 @@
indisputible->indisputable
indisputibly->indisputably
indistiguishable->indistinguishable
-indite->indict
indivdual->individual
indivdually->individually
indivdualy->individually
@@ -13904,7 +13831,6 @@
inludung->including
inluence->influence
inlusive->inclusive
-inly->only, inly,
inmediate->immediate
inmediatelly->immediately
inmediately->immediately
@@ -14565,7 +14491,6 @@
iritable->irritable
iritated->irritated
ironicly->ironically
-irregardless->regardless
irrelavent->irrelevant
irrelevent->irrelevant
irrelvant->irrelevant
@@ -14622,7 +14547,6 @@
itertation->iteration
iteself->itself
itesm->items
-ith->with, disabled because of ordinal form of i like nth
itialise->initialise
itialised->initialised
itialises->initialises
@@ -14752,7 +14676,6 @@
klick->click
klicked->clicked
klicks->clicks
-knifes->knives
knive->knife
kno->know
knowlage->knowledge
@@ -14881,7 +14804,6 @@
leaast->least
leace->leave
leack->leak
-leaded->led, lead, leaded,
leagacy->legacy
leagal->legal
leagalise->legalise
@@ -14902,7 +14824,6 @@
leapyear->leap year
leapyears->leap years
leary->leery
-leas->least, lease,
leaset->least
leat->lead, leak, least, leaf,
leathal->lethal
@@ -15040,8 +14961,6 @@
ligh->light, lie, lye,
ligher->lighter, liar, liger,
lighers->lighters, liars, ligers,
-lightening->lightening, lightning, lighting,
-lightsensor->light sensor
lightweigh->lightweight
lightwight->lightweight
lightyear->light year
@@ -15111,7 +15030,6 @@
listernes->listeners
listner->listener
listners->listeners
-liszt->list, liszt,
litature->literature
liteautrue->literature
literaly->literally
@@ -15146,7 +15064,6 @@
loadig->loading
loadin->loading
loadning->loading
-loafing->loading, loafing,
locahost->localhost
localation->location
localed->located
@@ -15195,8 +15112,6 @@
looknig->looking
looop->loop
loopup->lookup
-loos->loose, lose,
-loosing->losing
loosley->loosely
loosly->loosely
loosy->lossy
@@ -15206,7 +15121,6 @@
losted->lost
lotation->rotation
lotharingen->lothringen
-lousily->lousily, loosely,
lowd->load
lpatform->platform
lsat->last
@@ -15329,7 +15243,6 @@
mangager->manager
mangement->management
mangementt->management
-manger->manager
manifacture->manufacture
manifacturer->manufacturer
manifacturers->manufacturers
@@ -15400,7 +15313,6 @@
mappeds->mapped
mappping->mapping
mapppings->mappings
-marge->merge
marger->merger, marker,
margers->mergers, markers,
marging->margin, merging,
@@ -15457,7 +15369,6 @@
mateiral->material
mateirals->materials
matemathical->mathematical
-mater->matter, master, mother, mater,
materaial->material
materaials->materials
materail->material
@@ -15568,7 +15479,6 @@
medevial->medieval
medhod->method
medhods->methods
-medias->media, mediums,
mediciney->mediciny
medievel->medieval
mediterainnean->mediterranean
@@ -15736,7 +15646,6 @@
Micrsft->Microsoft
Micrsoft->Microsoft
midified->modified
-midwifes->midwives
migrateable->migratable
migt->might, midget,
migth->might
@@ -15916,7 +15825,6 @@
mistery->mystery
misteryous->mysterious
mistmatches->mismatches
-mitre->miter
mittigate->mitigate
miximum->maximum
mixure->mixture
@@ -16012,7 +15920,6 @@
mofifies->modifies
mofify->modify
mohammedans->muslims
-moil->soil, mohel,
moint->mount
moleclues->molecules
momement->moment
@@ -16082,7 +15989,6 @@
mostlky->mostly
mosture->moisture
mosty->mostly
-mot->not
motation->notation, rotation, motivation,
mothing->nothing
motiviated->motivated
@@ -16091,7 +15997,6 @@
motoroloa->motorola
moudle->module
moudule->module
-moue->mouse
mounth->month, mouth,
mountian->mountain
mountpiont->mountpoint
@@ -16105,7 +16010,6 @@
moutns->mounts
movebackwrd->movebackward
moveble->movable
-movei->movie, disabled due to assembly code
movemement->movement
movemements->movements
movememnt->movement
@@ -16200,7 +16104,6 @@
musn't->mustn't
mustator->mutator
muste->must
-mut->must, mutt, moot, disabled because of Rust keyword
mutablity->mutability
mutbale->mutable
mutch->much
@@ -16793,7 +16696,6 @@
newslines->newlines
newtork->network
Newyorker->New Yorker
-nickle->nickel
nighbor->neighbor
nighborhood->neighborhood
nighboring->neighboring
@@ -16979,7 +16881,6 @@
nowdays->nowadays
nowe->now
ntification->notification
-nto->not, disabled due to \n
nuber->number
nubering->numbering
nubmer->number
@@ -17844,7 +17745,6 @@
panicing->panicking
pannel->panel
pannels->panels
-panting->panting, painting,
pantomine->pantomime
paoition->position
paor->pair
@@ -18067,7 +17967,6 @@
pattren->pattern, patron,
pattrens->patterns, patrons,
pavillion->pavilion
-payed->paid
paínt->paint
pblisher->publisher
pbulisher->publisher
@@ -18375,7 +18274,6 @@
plagarism->plagiarism
plalform->platform
planation->plantation
-planed->planned
plantext->plaintext
plantiff->plaintiff
plase->please
@@ -18417,7 +18315,6 @@
pleae->please
pleaee->please
pleaes->please
-pleas->please
pleasd->pleased
pleasent->pleasant
pleasently->pleasantly
@@ -18882,10 +18779,7 @@
prefferably->preferably
preffered->preferred
prefices->prefixes
-preform->perform
preformance->performance
-preformed->performed
-preforms->performs
pregancies->pregnancies
prehaps->perhaps
preiod->period
@@ -18942,7 +18836,6 @@
preriod->period
preriodic->periodic
prersistent->persistent
-pres->press
presance->presence
prescrition->prescription
prescritions->prescriptions
@@ -19416,7 +19309,6 @@
proseletyzing->proselytizing
prosess->process
prosessor->processor
-prosses->process, processes, possess, prosses,
prosseses->processes, possesses,
protable->portable
protaganist->protagonist
@@ -19579,7 +19471,6 @@
purcahse->purchase
purgest->purges
puritannical->puritanical
-purportive->supportive, purportive,
purposedly->purposely
purpotedly->purportedly
purpse->purpose
@@ -19969,8 +19860,6 @@
readapted->re-adapted
readble->readable
readby->read, read by,
-readd->readd, re-add, read,
-readded->readded, read,
readeable->readable
readed->read, readd, readded,
reademe->README
@@ -20005,7 +19894,6 @@
realtive->relative, reactive,
realy->really
realyl->really
-ream->ream, stream,
reamde->README
reamins->remains
reampping->remapping, revamping,
@@ -20408,7 +20296,6 @@
referenses->references
referenz->reference
referenzes->references
-referer->referrer, disabled as in http 1.0 spec
refererd->referred
refererence->reference
referers->referrer, referrers,
@@ -20595,7 +20482,6 @@
rekursed->recursed
rekursion->recursion
rekursive->recursive
-rela->real, disabled due to lots of false positives
relaative->relative
relaease->release
relaese->release
@@ -21496,7 +21382,6 @@
retsart->restart
retsarts->restarts
retun->return
-retuned->retuned, returned,
retunr->return, retune,
retunrned->returned
retunrs->returns
@@ -21518,7 +21403,6 @@
returs->returns
retursn->returns
retutning->returning
-retying->retrying
reudce->reduce
reudced->reduced
reudces->reduces
@@ -21572,7 +21456,6 @@
reverced->reversed
reverece->reference, reverence,
revereces->references
-revered->revered, reversed,
reverese->reverse
reveresed->reversed
reveret->revert
@@ -21622,7 +21505,6 @@
rigth->right
rigths->rights
rigurous->rigorous
-rime->rhyme, rime,
riminder->reminder
riminders->reminders
riminding->reminding
@@ -21670,7 +21552,6 @@
rotatios->rotations
rotats->rotates
rouding->rounding
-rouge->rogue, rouge,
roughtly->roughly
rougly->roughly
rouine->routine
@@ -21803,7 +21684,6 @@
sasy->says, sassy,
satandard->standard
satandards->standards
-sate->state, sate,
satelite->satellite
satelites->satellites
satelitte->satellite
@@ -21832,7 +21712,6 @@
satuadays->Saturdays
saught->sought
sav->save
-savable->saveable
savees->saves
saveing->saving
savelt->svelte, save it,
@@ -21986,8 +21865,6 @@
searchin->searching
searchs->searches
seatch->search
-secant->second, disabled due to valid mathematical concept
-secants->seconds, disabled due to valid mathematical concept
secceeded->seceded, succeeded,
seccond->second
secconds->seconds
@@ -22037,7 +21914,6 @@
sedereal->sidereal
seeem->seem
seeen->seen
-seeked->sought, disabled because of JS event name
seelect->select
seemes->seems
seemless->seamless
@@ -22374,7 +22250,6 @@
settins->settings
settlment->settlement
settng->setting
-setts->sets
settter->setter
settters->setters
settting->setting
@@ -22543,7 +22418,6 @@
siganture->signature
sigantures->signatures
sigen->sign
-sightly->slightly
sigificance->significance
siginificant->significant
siginificantly->significantly
@@ -22637,15 +22511,11 @@
simultanously->simultaneously
simutaneously->simultaneously
sinature->signature
-sinc->sinc, synch, sync, sink, since, disabled due to valid mathematical concept
sincerley->sincerely
sincerly->sincerely
-sincs->sincs, syncs, sinks, since,
singal->signal, single,
singaled->signaled
singals->signals
-singe->singe, single,
-singed->signed, singled, singed,
singel->single, signal,
singelar->singular
singelarity->singularity
@@ -22758,7 +22628,6 @@
sligthly->slightly
sligtly->slightly
sliped->slipped
-slippy->slippery
sliseshow->slideshow
slowy->slowly
sluggify->slugify
@@ -22977,14 +22846,12 @@
specialiced->specialised, specialized,
specialitzed->specialised, specialized,
speciallized->specialised, specialized,
-specialties->specialities
specialy->specially
specic->specific
specication->specification
specidic->specific
specied->specified
speciefied->specified
-specif->specific, specify,
specifactions->specifications
specifc->specific
specifcation->specification
@@ -23281,7 +23148,6 @@
stocastic->stochastic
stoer->store
stoers->stores
-stoll->still, disabled because of name clash in C++
stomache->stomach
stompted->stomped
stong->strong
@@ -23307,7 +23173,6 @@
stragegy->strategy
straigh-forward->straightforward
straighforward->straightforward
-straightaway->straight away
straightfoward->straightforward
straigt->straight
straigth->straight
@@ -23865,7 +23730,6 @@
suppporting->supporting
suppports->supports
suppres->suppress
-suppressable->suppressable, suppressible,
suppressingd->suppressing
supprt->support
supprted->supported
@@ -24513,7 +24377,6 @@
thats;->that's
thay->they
thck->thick
-thead->thread, disabled due to the HTML tag
theard->thread
thearding->threading
theards->threads
@@ -24547,8 +24410,6 @@
therapudic->therapeutic
therby->thereby
thereads->threads
-therefor->therefore, therefor,
-therefrom->there from
therem->there, theorem,
thereom->theorem
thererin->therein
@@ -24559,7 +24420,6 @@
therough->through, thorough,
therstat->thermostat
thes->this, these,
-theses->these, thesis, theses,
theshold->threshold
thesholds->thresholds
thess->this, these,
@@ -24613,8 +24473,6 @@
thne->then
thnig->thing
thnigs->things
-tho->though, to, thou, tho,
-thoe->those, though,
thonic->chthonic
thorugh->through, thorough,
thoruoghly->thoroughly
@@ -24675,7 +24533,6 @@
throuth->through
throwed->threw, thrown,
throwgh->through
-thru->through, thru,
thrue->through
thruogh->through
thruoghout->throughout
@@ -24722,7 +24579,6 @@
tigthly->tightly
tihkn->think
tihs->this
-tim->time, Tim, disabled due to being a person's name
timedlta->timedelta
timeing->timing
timeot->timeout
@@ -24765,7 +24621,6 @@
tobot->robot
toches->touches
tocksen->toxin
-todays->today's, disabled because of var names
todya->today
toekn->token
togehter->together
@@ -24777,7 +24632,6 @@
toglled->toggled
togther->together
toi->to, toy,
-toke->took
tolarable->tolerable
tolelerance->tolerance
tolen->token
@@ -25291,7 +25145,6 @@
uggly->ugly
ugglyness->ugliness
uglyness->ugliness
-uint->unit, disabled due to being a data type
uique->unique
uise->use
uite->suite
@@ -25666,8 +25519,6 @@
unknonw->unknown
unknonwn->unknown
unknonws->unknowns
-unknow->unknown, unknow,
-unknows->unknowns, unknows,
unknwoing->unknowing
unknwoingly->unknowingly
unknwon->unknown
@@ -25929,7 +25780,6 @@
untranslateable->untranslatable
untrasposed->untransposed
untrustworty->untrustworthy
-untypically->atypically
unued->unused
ununsed->unused
ununsual->unusual
@@ -26326,7 +26176,6 @@
verson->version
versoned->versioned
versons->versions
-vertexes->vertices
vertextes->vertices
vertexts->vertices
vertial->vertical
@@ -26362,7 +26211,6 @@
viatnamese->vietnamese
vicefersa->vice-versa
videostreamming->videostreaming
-vie->via
vieport->viewport
vieports->viewports
vietnamesea->Vietnamese
@@ -26413,7 +26261,6 @@
visble->visible
visblie->visible
visbly->visibly
-vise->vice, vise,
visiable->visible
visiably->visibly
visibale->visible
@@ -26557,10 +26404,7 @@
wakup->wakeup
wallthickness->wall thickness
wan't->want, wasn't,
-wan->want
wan;t->want, wasn't,
-wanna->want to, disabled because one might want to allow informal spelling
-want's->wants
want;s->wants
wantto->want to
wappers->wrappers
@@ -26605,7 +26449,6 @@
webiste->website
wedensday->Wednesday
wednesdaay->Wednesday
-wee->we
wege->wedge
wehere->where
wehn->when
@@ -26620,7 +26463,6 @@
weitght->weight
well-reknown->well-renowned, well renown,
well-reknowned->well-renowned, well renowned,
-wen->we, when,
wendesday->Wednesday
wendsay->Wednesday
wensday->Wednesday
@@ -26641,7 +26483,6 @@
whatepsace->whitespace
whatepsaces->whitespaces
whather->whether, weather,
-whats->what's
whch->which
whcih->which
whe->when, we,
@@ -26657,7 +26498,6 @@
whereever->wherever
wherether->whether
whery->where
-whet->when, what, wet, whet,
wheteher->whether
whetehr->whether
wheter->whether
@@ -26672,7 +26512,6 @@
whihc->which
whihch->which
whilest->whilst
-whiling->while
whilw->while
whioch->which
whiped->wiped
@@ -26726,9 +26565,7 @@
wigdet->widget
wigdets->widgets
wighed->weighed, wicked,
-wight->weight, white, right, write, wight,
wighted->weighted, weighed,
-wights->weights, whites, rights, wights,
wih->with
wihch->which
wihich->which
@@ -26766,17 +26603,14 @@
wirting->writing
wirtten->written
wirtual->virtual
-wit->wit, with,
witable->writeable
witdh->width
wite->write, white,
witha->with a, with,
withdrawl->withdrawal, withdraw,
-withe->with
witheld->withheld
withh->with
withih->within
-withing->within
withinn->within
withion->within
witho->with
@@ -26837,7 +26671,6 @@
wonderfull->wonderful
wonderig->wondering
wont't->won't
-wont->won't, wont,
woraround->workaround
worarounds->workarounds
worbench->workbench
@@ -26897,7 +26730,6 @@
wouldnt'->wouldn't
wouldnt->wouldn't
wouldnt;->wouldn't
-wounder->wonder, wounder,
wounderful->wonderful
wouold->would
wouuld->would
@@ -27002,7 +26834,6 @@
zlot->slot
zombe->zombie
zomebie->zombie
-zoon->zoom, zoon,
zuser->user
__cpluspus->__cplusplus
__cpusplus->__cplusplus
diff --git a/codespell_lib/data/dictionary_code.txt b/codespell_lib/data/dictionary_code.txt
new file mode 100644
index 0000000..ff1118c
--- /dev/null
+++ b/codespell_lib/data/dictionary_code.txt
@@ -0,0 +1,23 @@
+amin->main
+cas->case
+clas->class
+cmo->com
+define'd->defined
+dof->of, doff,
+dont->don't
+iff->if
+ith->with
+movei->movie
+mut->must, mutt, moot,
+nto->not
+referer->referrer
+rela->real
+secant->second
+secants->seconds
+seeked->sought
+sinc->sync, sink, since,
+sincs->syncs, sinks, since,
+stoll->still
+thead->thread
+todays->today's
+uint->unit
diff --git a/codespell_lib/data/dictionary_en-GB_to_en-US.txt b/codespell_lib/data/dictionary_en-GB_to_en-US.txt
new file mode 100644
index 0000000..3b21d04
--- /dev/null
+++ b/codespell_lib/data/dictionary_en-GB_to_en-US.txt
@@ -0,0 +1,3 @@
+minimise->minimize
+mitre->miter
+mould->mold
diff --git a/codespell_lib/data/dictionary_informal.txt b/codespell_lib/data/dictionary_informal.txt
new file mode 100644
index 0000000..8a4aab6
--- /dev/null
+++ b/codespell_lib/data/dictionary_informal.txt
@@ -0,0 +1,4 @@
+gonna->going to
+wanna->want to
+tho->though, to, thou,
+thru->through
diff --git a/codespell_lib/data/dictionary_names.txt b/codespell_lib/data/dictionary_names.txt
new file mode 100644
index 0000000..61f7b93
--- /dev/null
+++ b/codespell_lib/data/dictionary_names.txt
@@ -0,0 +1,3 @@
+tim->time
+liszt->list
+wight->weight, white, right, write,
diff --git a/codespell_lib/data/dictionary_rare.txt b/codespell_lib/data/dictionary_rare.txt
new file mode 100644
index 0000000..c647031
--- /dev/null
+++ b/codespell_lib/data/dictionary_rare.txt
@@ -0,0 +1,146 @@
+ans->and
+arithmetics->arithmetic
+attache->attaché, attached, attach,
+automatize->automate
+automatized->automated
+automatizes->automates
+backword->backward
+backwords->backwards
+bloc->block
+blocs->blocks
+bodgy->body
+buss->bus
+busses->buses
+calculatable->calculable
+cant->can't
+catalogue->catalog
+chack->check, cheque,
+chancel->cancel
+chancels->cancels
+circularly->circular
+commend->comment, command,
+commends->comments, commands,
+consequentially->consequently
+coo->coup
+copping->coping, copying, cropping,
+copyable->copiable
+crasher->crash
+crashers->crashes
+crated->created
+creche->crèche
+cristal->crystal
+crufts->cruft
+dealign->dealing
+degrate->degrade
+degrates->degrades
+deffer->differ, defer,
+dependant->dependent
+derails->details
+discontentment->discontent
+fallow->follow
+fallowed->followed
+fallowing->following
+fallows->follows
+fave->save
+florescent->fluorescent
+followings->following
+forewarded->forwarded
+formule->formula, formulas,
+formules->formulas
+fount->found
+fro->for, from,
+froward->forward
+fulfilment->fulfillment
+gird->grid
+girds->grids
+guarantied->guaranteed
+guerilla->guerrilla
+guerillas->guerrillas
+hart->heart, harm,
+hided->hidden, hid,
+hist->heist, his,
+hove->have, hover, love,
+implicity->implicitly
+inactivate->deactivate
+incluse->include
+indention->indentation
+indite->indict
+inly->only
+irregardless->regardless
+knifes->knives
+leaded->led, lead,
+leas->least, lease,
+lightening->lightning, lighting,
+loafing->loading
+loos->loose, lose,
+loosing->losing
+lousily->loosely
+manger->manager
+marge->merge
+mater->matter, master, mother,
+medias->media, mediums,
+memorise->memorize
+midwifes->midwives
+moil->soil, mohel,
+mot->not
+moue->mouse
+nickle->nickel
+panting->painting
+payed->paid
+planed->planned
+pleas->please
+preform->perform
+preformed->performed
+preforms->performs
+pres->press
+prosses->process, processes, possess,
+purportive->supportive
+readd->re-add, read,
+readded->read
+ream->stream
+retuned->returned
+retying->retrying
+revered->reversed
+rime->rhyme
+rouge->rogue
+sate->state
+savable->saveable
+setts->sets
+sightly->slightly
+singe->single
+singed->signed, singled,
+slippy->slippery
+specialties->specialities
+specif->specific, specify,
+steams->streams
+sting->string
+stings->strings
+straightaway->straight away
+suppressable->suppressible
+therefor->therefore
+therefrom->there from
+theses->these, thesis,
+toke->took
+tread->thread, treat,
+unknow->unknown
+unknows->unknowns
+untypically->atypically
+vertexes->vertices
+vie->via
+vise->vice
+wan->want
+want's->wants
+wee->we
+wen->we, when,
+whats->what's
+whet->when, what, wet,
+whiling->while
+wight->weight, white, right,
+wights->weights, whites, rights,
+wit->with
+withe->with
+wither->either, whether, weather,
+withing->within
+wont->won't
+wounder->wonder
+zoon->zoom
diff --git a/codespell_lib/tests/test_basic.py b/codespell_lib/tests/test_basic.py
index e0567bd..c1b51fd 100644
--- a/codespell_lib/tests/test_basic.py
+++ b/codespell_lib/tests/test_basic.py
@@ -31,24 +31,35 @@
def test_basic(tmpdir, capsys):
"""Test some basic functionality"""
assert cs.main('_does_not_exist_') == 0
- with open(op.join(str(tmpdir), 'tmp'), 'w') as f:
+ fname = op.join(str(tmpdir), 'tmp')
+ with open(fname, 'w') as f:
pass
assert cs.main('-D', 'foo', f.name) == 1, 'missing dictionary'
- try:
- assert 'cannot find dictionary' in capsys.readouterr()[1]
- assert cs.main(f.name) == 0, 'empty file'
- with open(f.name, 'a') as f:
- f.write('this is a test file\n')
- assert cs.main(f.name) == 0, 'good'
- with open(f.name, 'a') as f:
- f.write('abandonned\n')
- assert cs.main(f.name) == 1, 'bad'
- with open(f.name, 'a') as f:
- f.write('abandonned\n')
- assert cs.main(f.name) == 2, 'worse'
- finally:
- os.remove(f.name)
+ assert 'cannot find dictionary' in capsys.readouterr()[1]
+ assert cs.main(fname) == 0, 'empty file'
+ with open(fname, 'a') as f:
+ f.write('this is a test file\n')
+ assert cs.main(fname) == 0, 'good'
+ with open(fname, 'a') as f:
+ f.write('abandonned\n')
+ assert cs.main(fname) == 1, 'bad'
+ with open(fname, 'a') as f:
+ f.write('abandonned\n')
+ assert cs.main(fname) == 2, 'worse'
+ with open(fname, 'a') as f:
+ f.write('tim\ngonna\n')
+ assert cs.main(fname) == 2, 'with a name'
+ assert cs.main('--builtin', 'clear,rare,names,informal', fname) == 4
+ capsys.readouterr()
+ assert cs.main(fname, '--builtin', 'foo') == 1 # bad type sys.exit(1)
+ stdout = capsys.readouterr()[1]
+ assert 'Unknown builtin dictionary' in stdout
d = str(tmpdir)
+ assert cs.main(fname, '-D', op.join(d, 'foo')) == 1 # bad dict
+ stdout = capsys.readouterr()[1]
+ assert 'cannot find dictionary' in stdout
+ os.remove(fname)
+
with open(op.join(d, 'bad.txt'), 'w') as f:
f.write('abandonned\nAbandonned\nABANDONNED\nAbAnDoNnEd')
assert cs.main(d) == 4
diff --git a/codespell_lib/tests/test_dictionary.py b/codespell_lib/tests/test_dictionary.py
index 654ecdd..54e0e43 100644
--- a/codespell_lib/tests/test_dictionary.py
+++ b/codespell_lib/tests/test_dictionary.py
@@ -1,61 +1,174 @@
# -*- coding: utf-8 -*-
+import glob
import os.path as op
+import os
import re
+import warnings
+
+import pytest
+
+from codespell_lib._codespell import _builtin_dictionaries
+
+try:
+ import aspell
+ speller = aspell.Speller('lang', 'en')
+except Exception as exp: # probably ImportError, but maybe also language
+ speller = None
+ if os.getenv('REQUIRE_ASPELL', 'false').lower() == 'true':
+ raise RuntimeError(
+ 'Cannot run complete tests without aspell when '
+ 'REQUIRE_ASPELL=true. Got error during import:\n%s'
+ % (exp,))
+ else:
+ warnings.warn(
+ 'aspell not found, but not required, skipping aspell tests. Got '
+ 'error during import:\n%s' % (exp,))
+
+ws = re.compile(r'.*\s.*') # whitespace
+comma = re.compile(r'.*,.*') # comma
-def test_dictionary_formatting():
+# Filename, should be seen as errors in aspell or not
+_data_dir = op.join(op.dirname(__file__), '..', 'data')
+_fnames_in_aspell = [
+ (op.join(_data_dir, 'dictionary%s.txt' % d[2]), d[3:5])
+ for d in _builtin_dictionaries]
+fname_params = pytest.mark.parametrize('fname, in_aspell', _fnames_in_aspell)
+
+
+def test_dictionaries_exist():
+ """Test consistency of dictionaries."""
+ doc_fnames = set(op.basename(f[0]) for f in _fnames_in_aspell)
+ got_fnames = set(op.basename(f)
+ for f in glob.glob(op.join(_data_dir, '*.txt')))
+ assert doc_fnames == got_fnames
+
+
+@fname_params
+def test_dictionary_formatting(fname, in_aspell):
"""Test that all dictionary entries are valid."""
- err_dict = dict()
- ws = re.compile(r'.*\s.*') # whitespace
- comma = re.compile(r'.*,.*') # comma
- with open(op.join(op.dirname(__file__), '..', 'data',
- 'dictionary.txt'), 'rb') as fid:
+ errors = list()
+ with open(fname, 'rb') as fid:
for line in fid:
err, rep = line.decode('utf-8').split('->')
err = err.lower()
rep = rep.rstrip('\n')
- assert err != rep.lower(), 'error %r corrects to itself' % err
+ try:
+ _check_err_rep(err, rep, in_aspell, fname)
+ except AssertionError as exp:
+ errors.append(str(exp).split('\n')[0])
+ if len(errors):
+ raise AssertionError('\n' + '\n'.join(errors))
+
+
+def _check_aspell(word, msg, in_aspell, fname):
+ if speller is None:
+ return # cannot check
+ if in_aspell is None:
+ return # don't check
+ if ' ' in word:
+ return # can't check (easily)
+ this_in_aspell = speller.check(
+ word.encode(speller.ConfigKeys()['encoding'][1]))
+ end = 'be in aspell for dictionary %s' % (fname,)
+ if in_aspell: # should be an error in aspell
+ assert this_in_aspell, '%s should %s' % (msg, end)
+ else: # shouldn't be
+ assert not this_in_aspell, '%s should not %s' % (msg, end)
+
+
+def _check_err_rep(err, rep, in_aspell, fname):
+ assert ws.match(err) is None, 'error %r has whitespace' % err
+ assert comma.match(err) is None, 'error %r has a comma' % err
+ assert len(rep) > 0, ('error %s: correction %r must be non-empty'
+ % (err, rep))
+ assert not re.match(r'^\s.*', rep), ('error %s: correction %r '
+ 'cannot start with whitespace'
+ % (err, rep))
+ _check_aspell(err, 'error %r' % (err,), in_aspell[0], fname)
+ prefix = 'error %s: correction %r' % (err, rep)
+ for (r, msg) in [
+ (r'^,',
+ '%s starts with a comma'),
+ (r'\s,',
+ '%s contains a whitespace character followed by a comma'),
+ (r',\s\s',
+ '%s contains a comma followed by multiple whitespace characters'),
+ (r',[^ ]',
+ '%s contains a comma *not* followed by a space'),
+ (r'\s+$',
+ '%s has a trailing space'),
+ (r'^[^,]*,\s*$',
+ '%s has a single entry but contains a trailing comma')]:
+ assert not re.search(r, rep), (msg % (prefix,))
+ del msg
+ if rep.count(','):
+ assert rep.endswith(','), ('error %s: multiple corrections must end '
+ 'with trailing ","' % (err,))
+ reps = [r.strip() for r in rep.lower().split(',')]
+ reps = [r for r in reps if len(r)]
+ for r in reps:
+ assert err != r.lower(), ('error %r corrects to itself amongst others'
+ % (err,))
+ _check_aspell(
+ r, 'error %s: correction %r' % (err, r), in_aspell[1], fname)
+ assert len(set(reps)) == len(reps), 'entries are not (lower-case) unique'
+
+
+@pytest.mark.parametrize('err, rep, match', [
+ ('a a', 'bar', 'has whitespace'),
+ ('a,a', 'bar', 'has a comma'),
+ ('a', '', 'non-empty'),
+ ('a', ' bar', 'start with whitespace'),
+ ('a', ',bar', 'starts with a comma'),
+ ('a', 'bar,bat', '.*not.*followed by a space'),
+ ('a', 'bar ', 'trailing space'),
+ ('a', 'b ,ar', 'contains a whitespace.*followed by a comma'),
+ ('a', 'bar,', 'single entry.*comma'),
+ ('a', 'bar, bat', 'must end with trailing ","'),
+ ('a', 'a, bar,', 'corrects to itself amongst others'),
+ ('a', 'a', 'corrects to itself'),
+ ('a', 'bar, bar,', 'unique'),
+])
+def test_error_checking(err, rep, match):
+ """Test that our error checking works."""
+ with pytest.raises(AssertionError, match=match):
+ _check_err_rep(err, rep, (None, None), 'dummy')
+
+
+@pytest.mark.skipif(speller is None, reason='requires aspell')
+@pytest.mark.parametrize('err, rep, err_aspell, rep_aspell, match', [
+ # This doesn't raise any exceptions, so skip for now:
+ # pytest.param('a', 'uvw, bar,', None, None, 'should be in aspell'),
+ ('abc', 'uvw, bar,', True, None, 'should be in aspell'),
+ ('a', 'uvw, bar,', False, None, 'should not be in aspell'),
+ ('a', 'abc, uvw,', None, True, 'should be in aspell'),
+ ('abc', 'uvw, bar,', True, True, 'should be in aspell'),
+ ('abc', 'uvw, bar,', False, True, 'should be in aspell'),
+ ('a', 'bar, back,', None, False, 'should not be in aspell'),
+ ('abc', 'uvw, xyz,', True, False, 'should be in aspell'),
+ ('abc', 'uvw, bar,', False, False, 'should not be in aspell'),
+])
+def test_error_checking_in_aspell(err, rep, err_aspell, rep_aspell, match):
+ """Test that our error checking works with aspell."""
+ with pytest.raises(AssertionError, match=match):
+ _check_err_rep(err, rep, (err_aspell, rep_aspell), 'dummy')
+
+
+@fname_params
+def test_dictionary_looping(fname, in_aspell):
+ """Test that all dictionary entries are valid."""
+ err_dict = dict()
+ with open(fname, 'rb') as fid:
+ for line in fid:
+ err, rep = line.decode('utf-8').split('->')
+ err = err.lower()
assert err not in err_dict, 'error %r already exists' % err
- assert ws.match(err) is None, 'error %r has whitespace' % err
- assert comma.match(err) is None, 'error %r has a comma' % err
- assert len(rep) > 0, ('error %s: correction %r must be non-empty'
- % (err, rep))
- assert not re.match(r'^\s.*', rep), ('error %s: correction %r '
- 'cannot start with whitespace'
- % (err, rep))
- prefix = 'error %s: correction %r' % (err, rep)
- for (r, msg) in [
- (r'^,',
- '%s starts with a comma'),
- (r'\s,',
- '%s contains a whitespace character followed by a comma'),
- (r',\s\s',
- '%s contains a comma followed by multiple whitespace '
- 'characters'),
- (r',[^ ]',
- '%s contains a comma *not* followed by a space'),
- (r'\s+$',
- '%s has a trailing space'),
- (r'^[^,]*,\s*$',
- '%s has a single entry but contains a trailing comma'),
- ]:
- assert not re.search(r, rep), (msg % (prefix,))
- del msg
- rep_count = rep.count(',')
- if rep_count and not rep.endswith(','):
- assert 'disabled' in rep.split(',')[-1], \
- ('currently corrections must end with trailing "," (if '
- ' multiple corrections are available) or have "disabled" '
- 'in the comment')
+ rep = rep.rstrip('\n')
reps = [r.strip() for r in rep.lower().split(',')]
reps = [r for r in reps if len(r)]
err_dict[err] = reps
- unique = list()
- for r in reps:
- if r not in unique:
- unique.append(r)
- assert reps == unique, 'entries are not (lower-case) unique'
# check for corrections that are errors (but not self replacements)
for err in err_dict:
for r in err_dict[err]:
diff --git a/setup.py b/setup.py
index f9352ba..4a6a23e 100755
--- a/setup.py
+++ b/setup.py
@@ -48,7 +48,7 @@
'codespell_lib.data',
],
package_data={'codespell_lib': [
- op.join('data', 'dictionary.txt'),
+ op.join('data', 'dictionary*.txt'),
op.join('data', 'linux-kernel.exclude'),
]},
entry_points={