Check errors don't exist as valid words in the aspell dictionary (#1142)

* Check errors don't exist as valid words in the aspell dictionary

* Install aspell on Travis

* Add some missing packages

* Remove a virtual package

* Just install the version of aspell-python we need

* Keep flake8 happy

* Switch to warnings and count them, so we can see all the aspell errors in one go

* Handle different encoding of the word and aspell

* Try and fix the encoding conversion

* Find out the encoding type

* Don't assert on number of warnings

* Don't record warnings for now

* Warn on all the encoding options

* pprint the encoding

* More warning work

* Use the actual encoding type

* Correct the logic

* ENH: Multi dict support

* FIX: Fixes after merge

* FIX: Better error check

* FIX: More thorough testing, locations

* FIX: Try newer aspell

* FIX: Move to new dict

* FIX: Move

* FIX: Restore removals from #1181

* FIX: One from #1362

* Add rare chack->check, cheque,

* Minor tidy of some dictionary check code

* Add some more suggestions.

* Fix the whitespace

* Really fix the whitespace

* FIX: Refactor requirement

* Log an error when aspell not found and not required

* Fix the error logging

* Test all variants of present and missing from aspell

* Undo some tuple tidying

* Fix the true/false values used

* Skip some flake8 tests

* Fix the test cases

* Correct the not in aspell test and fix some test cases

* Remove a duplicate test

* Use a test word that isn't a typo

* Set the ideal aspell detection logic for each dictionary

I suspect we'll have to relax this as more obscure words won't be in the aspell dictionary

* Be more realistic given the size of the dictionary

* Fix a flake8 error

* Fix another line length error

* FIX: Move

* FIX: Make visible, simplify

Co-authored-by: Eric Larson <larson.eric.d@gmail.com>
diff --git a/.travis.yml b/.travis.yml
index 0a1c770..535d8c0 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,6 +4,7 @@
 # for it to be on multiple physical lines, so long as you remember: - There
 # can't be any leading "-"s - All newlines will be removed, so use ";"s
 
+dist: bionic
 language: python
 cache: pip
 python:
@@ -12,6 +13,14 @@
   - 3.6
   - 3.7
   - 3.8
+env:
+  REQUIRE_ASPELL=true
+
+addons:
+  apt:
+    packages:
+      - libaspell-dev
+      - aspell-en
 
 before_install:
     - source tools/travis_tools.sh
@@ -22,6 +31,8 @@
     - python --version  # just to check
     - pip install -U pip wheel  # upgrade to latest pip find 3.5 wheels; wheel to avoid errors
     - retry pip install pytest pytest-cov flake8 coverage codecov chardet setuptools docutils
+    - if [ ${TRAVIS_PYTHON_VERSION:0:1} == "2" ]; then retry pip install aspell-python-py2; fi
+    - if [ ${TRAVIS_PYTHON_VERSION:0:1} == "3" ]; then retry pip install aspell-python-py3; fi
     - cd $SRC_DIR
 
 install:
diff --git a/codespell_lib/_codespell.py b/codespell_lib/_codespell.py
index 98f7ea7..ca45d6f 100755
--- a/codespell_lib/_codespell.py
+++ b/codespell_lib/_codespell.py
@@ -35,8 +35,18 @@
 
 # Users might want to link this file into /usr/local/bin, so we resolve the
 # symbolic link path to the real path if necessary.
-default_dictionary = os.path.join(os.path.dirname(os.path.realpath(__file__)),
-                                  'data', 'dictionary.txt')
+_data_root = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data')
+_builtin_dictionaries = (  # name, desc, name, err in aspell, correction in aspell  # noqa: E501
+# The aspell tests here aren't the ideal state, but the None's are realistic
+# for obscure words
+    ('clear', 'for unambiguous errors', '', False, None),
+    ('rare', 'for rare but valid words', '_rare', None, None),
+    ('informal', 'for informal words', '_informal', True, True),
+    ('code', 'for words common to code and/or mathematics', '_code', None, None),  # noqa: E501
+    ('names', 'for valid proper names that might be typos', '_names', None, None),  # noqa: E501
+    ('en-GB_to_en-US', 'for corrections from en-GB to en-US', '_en-GB_to_en-US', True, True),  # noqa: E501
+)
+_builtin_default = 'clear,rare'
 
 # OPTIONS:
 #
@@ -216,11 +226,21 @@
                         help='write changes in place if possible')
 
     parser.add_argument('-D', '--dictionary',
-                        action='append', metavar='FILE',
+                        action='append',
                         help='Custom dictionary file that contains spelling '
                              'corrections. If this flag is not specified or '
                              'equals "-" then the default dictionary is used. '
                              'This option can be specified multiple times.')
+    builtin_opts = ', '.join(
+        '%r %s' % (d[0], d[1]) for d in _builtin_dictionaries)
+    parser.add_argument('--builtin',
+                        dest='builtin', default=_builtin_default,
+                        metavar='BUILTIN-LIST',
+                        help='Comma-separated list of builtin dictionaries '
+                        'to include (when "-D -" or no "-D" is passed). '
+                        'Current options are:\n%s. The default is '
+                        '"--builtin %s".'
+                        % (builtin_opts, _builtin_default))
     parser.add_argument('-I', '--ignore-words',
                         action='append', metavar='FILE',
                         help='File that contains words which will be ignored '
@@ -603,7 +623,7 @@
     ignore_words_files = options.ignore_words or []
     ignore_words = set()
     for ignore_words_file in ignore_words_files:
-        if not os.path.exists(ignore_words_file):
+        if not os.path.isfile(ignore_words_file):
             print('ERROR: cannot find ignore-words file: %s' %
                   ignore_words_file, file=sys.stderr)
             parser.print_help()
@@ -615,16 +635,36 @@
         for word in comma_separated_words.split(','):
             ignore_words.add(word.strip())
 
-    dictionaries = options.dictionary or [default_dictionary]
-    misspellings = dict()
+    if options.dictionary:
+        dictionaries = options.dictionary
+    else:
+        dictionaries = ['-']
+    use_dictionaries = list()
     for dictionary in dictionaries:
         if dictionary == "-":
-            dictionary = default_dictionary
-        if not os.path.exists(dictionary):
-            print('ERROR: cannot find dictionary file: %s' % dictionary,
-                  file=sys.stderr)
-            parser.print_help()
-            return 1
+            # figure out which builtin dictionaries to use
+            use = sorted(set(options.builtin.split(',')))
+            for u in use:
+                for builtin in _builtin_dictionaries:
+                    if builtin[0] == u:
+                        use_dictionaries.append(
+                            os.path.join(_data_root, 'dictionary%s.txt'
+                                         % (builtin[2],)))
+                        break
+                else:
+                    print('ERROR: Unknown builtin dictionary: %s' % (u,),
+                          file=sys.stderr)
+                    parser.print_help()
+                    return 1
+        else:
+            if not os.path.isfile(dictionary):
+                print('ERROR: cannot find dictionary file: %s' % dictionary,
+                      file=sys.stderr)
+                parser.print_help()
+                return 1
+            use_dictionaries.append(dictionary)
+    misspellings = dict()
+    for dictionary in use_dictionaries:
         build_dict(dictionary, misspellings, ignore_words)
     colors = TermColors()
     if not options.colors or sys.platform == 'win32':
diff --git a/codespell_lib/data/dictionary.txt b/codespell_lib/data/dictionary.txt
index 62004a9..bae964a 100644
--- a/codespell_lib/data/dictionary.txt
+++ b/codespell_lib/data/dictionary.txt
@@ -1441,7 +1441,6 @@
 amgle->angle
 amgles->angles
 amiguous->ambiguous
-amin->main, disabled because amin might be a var name
 amke->make
 amking->making
 ammend->amend
@@ -1624,7 +1623,6 @@
 anoying->annoying
 anoymous->anonymous
 anroid->android
-ans->and
 ansalisation->nasalisation
 ansalization->nasalization
 ansestors->ancestors
@@ -2091,7 +2089,6 @@
 arithmentic->arithmetic
 arithmetc->arithmetic
 arithmethic->arithmetic
-arithmetics->arithmetic, arithmetics,
 arithmitic->arithmetic
 aritmetic->arithmetic
 aritrary->arbitrary
@@ -2454,7 +2451,6 @@
 atrtribute->attribute
 atrtributes->attributes
 attachd->attached
-attache->attaché, attached, attach,
 attachement->attachment
 attachements->attachments
 attachen->attach
@@ -2698,9 +2694,6 @@
 automaticalyy->automatically
 automaticlly->automatically
 automaticly->automatically
-automatize->automate
-automatized->automated
-automatizes->automates
 autometic->automatic
 autometically->automatically
 automibile->automobile
@@ -2935,8 +2928,6 @@
 backwardss->backwards
 backware->backward
 backwark->backward
-backword->backward, backword,
-backwords->backwards, backwords,
 backwrad->backward
 bactracking->backtracking
 bacup->backup
@@ -3256,12 +3247,10 @@
 Blitzkreig->Blitzkrieg
 bload->bloat
 bloaded->bloated
-bloc->block, bloc,
 blocack->blockack
 bloccks->blocks
 blocekd->blocked
 blockin->blocking
-blocs->blocks, blocs,
 bloddy->bloody
 blodk->block
 blohted->bloated
@@ -3279,7 +3268,6 @@
 bobard->board, bombard,
 bocome->become
 boddy->body
-bodgy->body, disabled because one might want to allow informal spelling
 bodydbuilder->bodybuilder
 boffer->buffer
 bofore->before
@@ -3539,8 +3527,6 @@
 busines->business
 busineses->business, businesses,
 busness->business
-buss->bus
-busses->buses
 bussiness->business
 bussy->busy
 buton->button
@@ -3618,7 +3604,6 @@
 cahdidates->candidates
 cahe->cache
 cahes->caches
-cahgne->change
 cahgned->changed
 cahgnes->changes
 cahgning->changing
@@ -3661,7 +3646,6 @@
 calcuate->calculate
 calcuations->calculations
 calculaion->calculation
-calculatable->calculatable, calculable,
 calculatble->calculatable, calculable,
 calculater->calculator
 calculatted->calculated
@@ -3786,7 +3770,6 @@
 canonival->canonical
 canot->cannot
 cant'->can't
-cant->can't, cant,
 cant;->can't
 canvase->canvas
 caost->coast
@@ -3874,7 +3857,6 @@
 cartilidge->cartilage
 cartrige->cartridge
 caryy->carry
-cas->case, disabled because of common abbreviations
 cascace->cascade
 case-insensitivy->case-insensitivity
 case-insenstive->case-insensitive
@@ -4097,10 +4079,8 @@
 challanges->challenges
 challege->challenge
 Champange->Champagne
-chancel->cancel
 chanceled->canceled
 chanceling->canceling
-chancels->cancels
 chanched->changed
 chancnel->channel, cancel,
 chane->change, chain,
@@ -4362,7 +4342,6 @@
 circomvents->circumvents
 circual->circular
 circuitery->circuitry
-circularly->circular, circularly,
 circulaton->circulation
 circumferance->circumference
 circumferencial->circumferential
@@ -4424,7 +4403,6 @@
 clared->cleared
 clarety->clarity
 claring->clearing
-clas->class, disabled because of name clash in c++
 clasic->classic
 clasical->classical
 clasically->classically
@@ -4530,7 +4508,6 @@
 clustred->clustered
 cmak->cmake
 cmmands->commands
-cmo->com, disabled due to lots of false positives
 cmobination->combination
 cmoputer->computer
 cmoputers->computers
@@ -4877,8 +4854,6 @@
 commemmorate->commemorate
 commemmorating->commemorating
 commen->commend, comment, common,
-commend->commend, comment, command,
-commends->commends, comments, commands,
 commenet->comment
 commenetd->commented
 commeneted->commented
@@ -5802,7 +5777,6 @@
 consept->concept
 consepts->concepts
 consequentely->consequently
-consequentially->consequently
 consequentually->consequently
 consequeseces->consequences
 consequetive->consecutive
@@ -6295,7 +6269,6 @@
 convovling->convolving
 convserion->conversion
 conyak->cognac
-coo->coup, coo,
 coodinate->coordinate
 coodinates->coordinates
 coodrinate->coordinate
@@ -6379,7 +6352,6 @@
 copoying->copying
 coppermines->coppermine
 coppied->copied
-copping->coping, copying, cropping, disabled due to being a legit word
 coppy->copy, choppy,
 copright->copyright
 coprighted->copyrighted
@@ -6393,7 +6365,6 @@
 copurights->copyrights
 coputer->computer
 copver->cover
-copyable->copyable, copiable, disabled because of name clash in C++
 copyed->copied
 copyeight->copyright
 copyeighted->copyrighted
@@ -6676,11 +6647,8 @@
 crashaes->crashes
 crasheed->crashed
 crashees->crashes
-crasher->crash, disabled because it denotes something that crashes
-crashers->crashes, disabled because it denotes things that crash
 crashess->crashes
 crashs->crashes
-crated->created, crated,
 creaate->create
 creaed->created
 creaeted->created
@@ -6699,7 +6667,6 @@
 creatre->create
 creatred->created
 creats->creates
-creche->crèche
 credate->created
 credintial->credential
 credintials->credentials
@@ -6718,7 +6685,6 @@
 crewsant->croissant
 cricital->critical
 crirical->critical
-cristal->crystal, cristal,
 critcial->critical
 criteak->critique
 critera->criteria
@@ -6774,7 +6740,6 @@
 crticised->criticised
 crucialy->crucially
 crucifiction->crucifixion
-crufts->cruft
 cruncing->crunching
 crurrent->current
 crusies->cruises
@@ -7014,7 +6979,6 @@
 deaktivate->deactivate
 deaktivated->deactivated
 dealed->dealt
-dealign->dealing, dealign,
 dealilng->dealing
 dealloacte->deallocate
 deallocaed->deallocated
@@ -7337,7 +7301,6 @@
 defering->deferring
 deferreal->deferral
 deffensively->defensively
-deffer->differ, defer,
 deffered->differed, deferred,
 defference->difference, deference,
 defferent->different, deferent,
@@ -7366,7 +7329,6 @@
 definatly->definitely
 defind->defined, defund,
 definded->defined, defunded,
-define'd->defined, disabled due to #define
 defineas->defines
 defineed->defined
 definend->defined
@@ -7428,8 +7390,6 @@
 degnerated->degenerated
 degnerates->degenerates
 degrads->degrades
-degrate->degrate, degrade,
-degrates->degrates, degrades,
 degreee->degree
 degreeee->degree
 degreeees->degrees
@@ -7600,7 +7560,6 @@
 dependancy->dependency
 dependancys->dependencies
 dependand->dependent
-dependant->dependent
 dependcies->dependencies
 dependcy->dependency
 dependecies->dependencies
@@ -8586,7 +8545,6 @@
 disconneting->disconnecting
 disconnets->disconnects
 disconnnect->disconnect
-discontentment->discontent
 discontigious->discontiguous
 discontigous->discontiguous
 discontiguities->discontinuities
@@ -9029,7 +8987,6 @@
 doesnt;->doesn't
 doess->does
 doestn't->doesn't
-dof->of, doff, disabled because it's a common abbreviation
 doign->doing
 doiing->doing
 doiuble->double
@@ -9055,7 +9012,6 @@
 donnot->do not
 dont'->don't
 dont't->don't
-dont->don't, disabled because of var names
 donwload->download
 donwloaded->downloaded
 donwloading->downloading
@@ -11230,9 +11186,6 @@
 fallabck->fallback
 fallbck->fallback
 fallhrough->fallthrough
-fallowed->followed, fallowed,
-fallowing->following, fallowing,
-fallows->follows, fallows,
 fallthruogh->fallthrough
 falltrough->fallthrough
 falt->fault
@@ -11272,7 +11225,6 @@
 faught->fought
 fauilures->failures
 faund->found
-fave->save
 favoutrable->favourable
 faymus->famous
 fcound->found
@@ -11499,11 +11451,11 @@
 flie->file
 floading->floating, flooding,
 floading-add->floating-add
-florescent->fluorescent
-floresent->fluorescent
+floatation->flotation
+floresent->fluorescent, florescent,
 floride->fluoride
 floting->floating
-flourescent->fluorescent
+flourescent->fluorescent, florescent,
 flouride->fluoride
 flourine->fluorine
 flourishment->flourishing
@@ -11542,7 +11494,6 @@
 followign->following
 followin->following
 followind->following
-followings->followings, following,
 followng->following
 follwing->following
 follwo->follow
@@ -11592,7 +11543,6 @@
 foreing->foreign
 forementionned->aforementioned
 foreward->foreword, forward,
-forewarded->forewarded, forwarded,
 forfiet->forfeit
 forgeround->foreground
 forgoten->forgotten
@@ -11623,8 +11573,6 @@
 formost->foremost
 formt->format
 formua->formula
-formule->formula, formulas, formule,
-formules->formulas
 forr->for
 forsaw->foresaw
 forse->force
@@ -11675,7 +11623,6 @@
 foundaries->foundries
 foundary->foundry
 Foundland->Newfoundland
-fount->fount, found,
 fourties->forties
 fourty->forty
 fouth->fourth
@@ -11739,7 +11686,6 @@
 frist->first
 frmat->format
 frmo->from
-fro->for, from, fro,
 froce->force
 frok->from
 fromal->formal
@@ -11755,8 +11701,6 @@
 frop->drop
 fropm->from
 frops->drops
-froward->forward
-frowarded->forwarded
 frozee->frozen
 fschk->fsck
 ftrunacate->ftruncate
@@ -11772,7 +11716,6 @@
 fufilled->fulfilled
 fule->file
 fulfiled->fulfilled
-fulfilment->fulfillment
 fullfiled->fulfilled
 fullfiling->fulfilling
 fullfill->fulfill
@@ -12094,8 +12037,6 @@
 gingam->gingham
 gioen->given
 gir->git
-gird->grid, gird,
-girds->grids, girds,
 giser->geyser
 gisers->geysers
 gitar->guitar
@@ -12138,7 +12079,6 @@
 golbally->globally
 golbaly->globally
 gonig->going
-gonna->going to, disabled because one might want to allow informal spelling
 gool->ghoul
 gord->gourd
 gormay->gourmet
@@ -12223,7 +12163,6 @@
 guaranteey->guaranty
 guarantes->guarantees
 guarantie->guarantee
-guarantied->guaranteed
 guarbage->garbage
 guared->guard, guarded,
 guareded->guarded
@@ -12357,8 +12296,6 @@
 Guatamalan->Guatemalan
 gud->good
 gude->guide, good,
-guerilla->guerrilla
-guerillas->guerrillas
 guerrila->guerrilla
 guerrilas->guerrillas
 gueswork->guesswork
@@ -12490,7 +12427,6 @@
 harrassing->harassing
 harrassment->harassment
 harrassments->harassments
-hart->heart, harm,
 harth->hearth
 harware->hardware
 has'nt->hasn't
@@ -12592,7 +12528,6 @@
 hidded->hidden
 hiddin->hidden, hiding,
 hidding->hiding, hidden,
-hided->hidden, hid,
 hiden->hidden
 hiearchies->hierarchies
 hiearchy->hierarchy
@@ -12662,7 +12597,6 @@
 hirearcy->hierarchy
 hismelf->himself
 hisory->history
-hist->heist, his,
 histgram->histogram
 histocompatability->histocompatibility
 histori->history, historic,
@@ -12746,7 +12680,6 @@
 housand->thousand
 houskeeping->housekeeping
 housr->hours, house,
-hove->hove, have, hover, love,
 hovever->however
 hovewer->however
 howerver->however
@@ -12898,7 +12831,6 @@
 idividually->individually
 idividuals->individuals
 iechart->piechart
-iff->if, disabled due to valid mathematical concept
 ifself->itself
 ifset->if set
 ignoded->ignored
@@ -13206,7 +13138,6 @@
 implicite->implicit, implicitly,
 implicitely->implicitly
 implicitley->implicitly
-implicity->implicitly, disabled due to common misspelling
 implict->implicit
 implictly->implicitly
 impliment->implement
@@ -13303,7 +13234,6 @@
 inaccessable->inaccessible
 inaccuraccies->inaccuracies
 inaccuraccy->inaccuracy
-inactivate->inactivate, deactivate,
 inacurate->inaccurate
 inacurracies->inaccuracies
 inacurrate->inaccurate
@@ -13358,7 +13288,6 @@
 incluging->including
 incluide->include
 incluing->including
-incluse->include, incluse,
 inclused->included
 inclusinve->inclusive
 incmrement->increment
@@ -13532,7 +13461,6 @@
 indentifies->identifies
 indentify->identify
 indentifying->identifying
-indention->indentation
 indentit->identity
 indentity->identity
 indenx->index
@@ -13598,7 +13526,6 @@
 indisputible->indisputable
 indisputibly->indisputably
 indistiguishable->indistinguishable
-indite->indict
 indivdual->individual
 indivdually->individually
 indivdualy->individually
@@ -13904,7 +13831,6 @@
 inludung->including
 inluence->influence
 inlusive->inclusive
-inly->only, inly,
 inmediate->immediate
 inmediatelly->immediately
 inmediately->immediately
@@ -14565,7 +14491,6 @@
 iritable->irritable
 iritated->irritated
 ironicly->ironically
-irregardless->regardless
 irrelavent->irrelevant
 irrelevent->irrelevant
 irrelvant->irrelevant
@@ -14622,7 +14547,6 @@
 itertation->iteration
 iteself->itself
 itesm->items
-ith->with, disabled because of ordinal form of i like nth
 itialise->initialise
 itialised->initialised
 itialises->initialises
@@ -14752,7 +14676,6 @@
 klick->click
 klicked->clicked
 klicks->clicks
-knifes->knives
 knive->knife
 kno->know
 knowlage->knowledge
@@ -14881,7 +14804,6 @@
 leaast->least
 leace->leave
 leack->leak
-leaded->led, lead, leaded,
 leagacy->legacy
 leagal->legal
 leagalise->legalise
@@ -14902,7 +14824,6 @@
 leapyear->leap year
 leapyears->leap years
 leary->leery
-leas->least, lease,
 leaset->least
 leat->lead, leak, least, leaf,
 leathal->lethal
@@ -15040,8 +14961,6 @@
 ligh->light, lie, lye,
 ligher->lighter, liar, liger,
 lighers->lighters, liars, ligers,
-lightening->lightening, lightning, lighting,
-lightsensor->light sensor
 lightweigh->lightweight
 lightwight->lightweight
 lightyear->light year
@@ -15111,7 +15030,6 @@
 listernes->listeners
 listner->listener
 listners->listeners
-liszt->list, liszt,
 litature->literature
 liteautrue->literature
 literaly->literally
@@ -15146,7 +15064,6 @@
 loadig->loading
 loadin->loading
 loadning->loading
-loafing->loading, loafing,
 locahost->localhost
 localation->location
 localed->located
@@ -15195,8 +15112,6 @@
 looknig->looking
 looop->loop
 loopup->lookup
-loos->loose, lose,
-loosing->losing
 loosley->loosely
 loosly->loosely
 loosy->lossy
@@ -15206,7 +15121,6 @@
 losted->lost
 lotation->rotation
 lotharingen->lothringen
-lousily->lousily, loosely,
 lowd->load
 lpatform->platform
 lsat->last
@@ -15329,7 +15243,6 @@
 mangager->manager
 mangement->management
 mangementt->management
-manger->manager
 manifacture->manufacture
 manifacturer->manufacturer
 manifacturers->manufacturers
@@ -15400,7 +15313,6 @@
 mappeds->mapped
 mappping->mapping
 mapppings->mappings
-marge->merge
 marger->merger, marker,
 margers->mergers, markers,
 marging->margin, merging,
@@ -15457,7 +15369,6 @@
 mateiral->material
 mateirals->materials
 matemathical->mathematical
-mater->matter, master, mother, mater,
 materaial->material
 materaials->materials
 materail->material
@@ -15568,7 +15479,6 @@
 medevial->medieval
 medhod->method
 medhods->methods
-medias->media, mediums,
 mediciney->mediciny
 medievel->medieval
 mediterainnean->mediterranean
@@ -15736,7 +15646,6 @@
 Micrsft->Microsoft
 Micrsoft->Microsoft
 midified->modified
-midwifes->midwives
 migrateable->migratable
 migt->might, midget,
 migth->might
@@ -15916,7 +15825,6 @@
 mistery->mystery
 misteryous->mysterious
 mistmatches->mismatches
-mitre->miter
 mittigate->mitigate
 miximum->maximum
 mixure->mixture
@@ -16012,7 +15920,6 @@
 mofifies->modifies
 mofify->modify
 mohammedans->muslims
-moil->soil, mohel,
 moint->mount
 moleclues->molecules
 momement->moment
@@ -16082,7 +15989,6 @@
 mostlky->mostly
 mosture->moisture
 mosty->mostly
-mot->not
 motation->notation, rotation, motivation,
 mothing->nothing
 motiviated->motivated
@@ -16091,7 +15997,6 @@
 motoroloa->motorola
 moudle->module
 moudule->module
-moue->mouse
 mounth->month, mouth,
 mountian->mountain
 mountpiont->mountpoint
@@ -16105,7 +16010,6 @@
 moutns->mounts
 movebackwrd->movebackward
 moveble->movable
-movei->movie, disabled due to assembly code
 movemement->movement
 movemements->movements
 movememnt->movement
@@ -16200,7 +16104,6 @@
 musn't->mustn't
 mustator->mutator
 muste->must
-mut->must, mutt, moot, disabled because of Rust keyword
 mutablity->mutability
 mutbale->mutable
 mutch->much
@@ -16793,7 +16696,6 @@
 newslines->newlines
 newtork->network
 Newyorker->New Yorker
-nickle->nickel
 nighbor->neighbor
 nighborhood->neighborhood
 nighboring->neighboring
@@ -16979,7 +16881,6 @@
 nowdays->nowadays
 nowe->now
 ntification->notification
-nto->not, disabled due to \n
 nuber->number
 nubering->numbering
 nubmer->number
@@ -17844,7 +17745,6 @@
 panicing->panicking
 pannel->panel
 pannels->panels
-panting->panting, painting,
 pantomine->pantomime
 paoition->position
 paor->pair
@@ -18067,7 +17967,6 @@
 pattren->pattern, patron,
 pattrens->patterns, patrons,
 pavillion->pavilion
-payed->paid
 paínt->paint
 pblisher->publisher
 pbulisher->publisher
@@ -18375,7 +18274,6 @@
 plagarism->plagiarism
 plalform->platform
 planation->plantation
-planed->planned
 plantext->plaintext
 plantiff->plaintiff
 plase->please
@@ -18417,7 +18315,6 @@
 pleae->please
 pleaee->please
 pleaes->please
-pleas->please
 pleasd->pleased
 pleasent->pleasant
 pleasently->pleasantly
@@ -18882,10 +18779,7 @@
 prefferably->preferably
 preffered->preferred
 prefices->prefixes
-preform->perform
 preformance->performance
-preformed->performed
-preforms->performs
 pregancies->pregnancies
 prehaps->perhaps
 preiod->period
@@ -18942,7 +18836,6 @@
 preriod->period
 preriodic->periodic
 prersistent->persistent
-pres->press
 presance->presence
 prescrition->prescription
 prescritions->prescriptions
@@ -19416,7 +19309,6 @@
 proseletyzing->proselytizing
 prosess->process
 prosessor->processor
-prosses->process, processes, possess, prosses,
 prosseses->processes, possesses,
 protable->portable
 protaganist->protagonist
@@ -19579,7 +19471,6 @@
 purcahse->purchase
 purgest->purges
 puritannical->puritanical
-purportive->supportive, purportive,
 purposedly->purposely
 purpotedly->purportedly
 purpse->purpose
@@ -19969,8 +19860,6 @@
 readapted->re-adapted
 readble->readable
 readby->read, read by,
-readd->readd, re-add, read,
-readded->readded, read,
 readeable->readable
 readed->read, readd, readded,
 reademe->README
@@ -20005,7 +19894,6 @@
 realtive->relative, reactive,
 realy->really
 realyl->really
-ream->ream, stream,
 reamde->README
 reamins->remains
 reampping->remapping, revamping,
@@ -20408,7 +20296,6 @@
 referenses->references
 referenz->reference
 referenzes->references
-referer->referrer, disabled as in http 1.0 spec
 refererd->referred
 refererence->reference
 referers->referrer, referrers,
@@ -20595,7 +20482,6 @@
 rekursed->recursed
 rekursion->recursion
 rekursive->recursive
-rela->real, disabled due to lots of false positives
 relaative->relative
 relaease->release
 relaese->release
@@ -21496,7 +21382,6 @@
 retsart->restart
 retsarts->restarts
 retun->return
-retuned->retuned, returned,
 retunr->return, retune,
 retunrned->returned
 retunrs->returns
@@ -21518,7 +21403,6 @@
 returs->returns
 retursn->returns
 retutning->returning
-retying->retrying
 reudce->reduce
 reudced->reduced
 reudces->reduces
@@ -21572,7 +21456,6 @@
 reverced->reversed
 reverece->reference, reverence,
 revereces->references
-revered->revered, reversed,
 reverese->reverse
 reveresed->reversed
 reveret->revert
@@ -21622,7 +21505,6 @@
 rigth->right
 rigths->rights
 rigurous->rigorous
-rime->rhyme, rime,
 riminder->reminder
 riminders->reminders
 riminding->reminding
@@ -21670,7 +21552,6 @@
 rotatios->rotations
 rotats->rotates
 rouding->rounding
-rouge->rogue, rouge,
 roughtly->roughly
 rougly->roughly
 rouine->routine
@@ -21803,7 +21684,6 @@
 sasy->says, sassy,
 satandard->standard
 satandards->standards
-sate->state, sate,
 satelite->satellite
 satelites->satellites
 satelitte->satellite
@@ -21832,7 +21712,6 @@
 satuadays->Saturdays
 saught->sought
 sav->save
-savable->saveable
 savees->saves
 saveing->saving
 savelt->svelte, save it,
@@ -21986,8 +21865,6 @@
 searchin->searching
 searchs->searches
 seatch->search
-secant->second, disabled due to valid mathematical concept
-secants->seconds, disabled due to valid mathematical concept
 secceeded->seceded, succeeded,
 seccond->second
 secconds->seconds
@@ -22037,7 +21914,6 @@
 sedereal->sidereal
 seeem->seem
 seeen->seen
-seeked->sought, disabled because of JS event name
 seelect->select
 seemes->seems
 seemless->seamless
@@ -22374,7 +22250,6 @@
 settins->settings
 settlment->settlement
 settng->setting
-setts->sets
 settter->setter
 settters->setters
 settting->setting
@@ -22543,7 +22418,6 @@
 siganture->signature
 sigantures->signatures
 sigen->sign
-sightly->slightly
 sigificance->significance
 siginificant->significant
 siginificantly->significantly
@@ -22637,15 +22511,11 @@
 simultanously->simultaneously
 simutaneously->simultaneously
 sinature->signature
-sinc->sinc, synch, sync, sink, since, disabled due to valid mathematical concept
 sincerley->sincerely
 sincerly->sincerely
-sincs->sincs, syncs, sinks, since,
 singal->signal, single,
 singaled->signaled
 singals->signals
-singe->singe, single,
-singed->signed, singled, singed,
 singel->single, signal,
 singelar->singular
 singelarity->singularity
@@ -22758,7 +22628,6 @@
 sligthly->slightly
 sligtly->slightly
 sliped->slipped
-slippy->slippery
 sliseshow->slideshow
 slowy->slowly
 sluggify->slugify
@@ -22977,14 +22846,12 @@
 specialiced->specialised, specialized,
 specialitzed->specialised, specialized,
 speciallized->specialised, specialized,
-specialties->specialities
 specialy->specially
 specic->specific
 specication->specification
 specidic->specific
 specied->specified
 speciefied->specified
-specif->specific, specify,
 specifactions->specifications
 specifc->specific
 specifcation->specification
@@ -23281,7 +23148,6 @@
 stocastic->stochastic
 stoer->store
 stoers->stores
-stoll->still, disabled because of name clash in C++
 stomache->stomach
 stompted->stomped
 stong->strong
@@ -23307,7 +23173,6 @@
 stragegy->strategy
 straigh-forward->straightforward
 straighforward->straightforward
-straightaway->straight away
 straightfoward->straightforward
 straigt->straight
 straigth->straight
@@ -23865,7 +23730,6 @@
 suppporting->supporting
 suppports->supports
 suppres->suppress
-suppressable->suppressable, suppressible,
 suppressingd->suppressing
 supprt->support
 supprted->supported
@@ -24513,7 +24377,6 @@
 thats;->that's
 thay->they
 thck->thick
-thead->thread, disabled due to the HTML tag
 theard->thread
 thearding->threading
 theards->threads
@@ -24547,8 +24410,6 @@
 therapudic->therapeutic
 therby->thereby
 thereads->threads
-therefor->therefore, therefor,
-therefrom->there from
 therem->there, theorem,
 thereom->theorem
 thererin->therein
@@ -24559,7 +24420,6 @@
 therough->through, thorough,
 therstat->thermostat
 thes->this, these,
-theses->these, thesis, theses,
 theshold->threshold
 thesholds->thresholds
 thess->this, these,
@@ -24613,8 +24473,6 @@
 thne->then
 thnig->thing
 thnigs->things
-tho->though, to, thou, tho,
-thoe->those, though,
 thonic->chthonic
 thorugh->through, thorough,
 thoruoghly->thoroughly
@@ -24675,7 +24533,6 @@
 throuth->through
 throwed->threw, thrown,
 throwgh->through
-thru->through, thru,
 thrue->through
 thruogh->through
 thruoghout->throughout
@@ -24722,7 +24579,6 @@
 tigthly->tightly
 tihkn->think
 tihs->this
-tim->time, Tim, disabled due to being a person's name
 timedlta->timedelta
 timeing->timing
 timeot->timeout
@@ -24765,7 +24621,6 @@
 tobot->robot
 toches->touches
 tocksen->toxin
-todays->today's, disabled because of var names
 todya->today
 toekn->token
 togehter->together
@@ -24777,7 +24632,6 @@
 toglled->toggled
 togther->together
 toi->to, toy,
-toke->took
 tolarable->tolerable
 tolelerance->tolerance
 tolen->token
@@ -25291,7 +25145,6 @@
 uggly->ugly
 ugglyness->ugliness
 uglyness->ugliness
-uint->unit, disabled due to being a data type
 uique->unique
 uise->use
 uite->suite
@@ -25666,8 +25519,6 @@
 unknonw->unknown
 unknonwn->unknown
 unknonws->unknowns
-unknow->unknown, unknow,
-unknows->unknowns, unknows,
 unknwoing->unknowing
 unknwoingly->unknowingly
 unknwon->unknown
@@ -25929,7 +25780,6 @@
 untranslateable->untranslatable
 untrasposed->untransposed
 untrustworty->untrustworthy
-untypically->atypically
 unued->unused
 ununsed->unused
 ununsual->unusual
@@ -26326,7 +26176,6 @@
 verson->version
 versoned->versioned
 versons->versions
-vertexes->vertices
 vertextes->vertices
 vertexts->vertices
 vertial->vertical
@@ -26362,7 +26211,6 @@
 viatnamese->vietnamese
 vicefersa->vice-versa
 videostreamming->videostreaming
-vie->via
 vieport->viewport
 vieports->viewports
 vietnamesea->Vietnamese
@@ -26413,7 +26261,6 @@
 visble->visible
 visblie->visible
 visbly->visibly
-vise->vice, vise,
 visiable->visible
 visiably->visibly
 visibale->visible
@@ -26557,10 +26404,7 @@
 wakup->wakeup
 wallthickness->wall thickness
 wan't->want, wasn't,
-wan->want
 wan;t->want, wasn't,
-wanna->want to, disabled because one might want to allow informal spelling
-want's->wants
 want;s->wants
 wantto->want to
 wappers->wrappers
@@ -26605,7 +26449,6 @@
 webiste->website
 wedensday->Wednesday
 wednesdaay->Wednesday
-wee->we
 wege->wedge
 wehere->where
 wehn->when
@@ -26620,7 +26463,6 @@
 weitght->weight
 well-reknown->well-renowned, well renown,
 well-reknowned->well-renowned, well renowned,
-wen->we, when,
 wendesday->Wednesday
 wendsay->Wednesday
 wensday->Wednesday
@@ -26641,7 +26483,6 @@
 whatepsace->whitespace
 whatepsaces->whitespaces
 whather->whether, weather,
-whats->what's
 whch->which
 whcih->which
 whe->when, we,
@@ -26657,7 +26498,6 @@
 whereever->wherever
 wherether->whether
 whery->where
-whet->when, what, wet, whet,
 wheteher->whether
 whetehr->whether
 wheter->whether
@@ -26672,7 +26512,6 @@
 whihc->which
 whihch->which
 whilest->whilst
-whiling->while
 whilw->while
 whioch->which
 whiped->wiped
@@ -26726,9 +26565,7 @@
 wigdet->widget
 wigdets->widgets
 wighed->weighed, wicked,
-wight->weight, white, right, write, wight,
 wighted->weighted, weighed,
-wights->weights, whites, rights, wights,
 wih->with
 wihch->which
 wihich->which
@@ -26766,17 +26603,14 @@
 wirting->writing
 wirtten->written
 wirtual->virtual
-wit->wit, with,
 witable->writeable
 witdh->width
 wite->write, white,
 witha->with a, with,
 withdrawl->withdrawal, withdraw,
-withe->with
 witheld->withheld
 withh->with
 withih->within
-withing->within
 withinn->within
 withion->within
 witho->with
@@ -26837,7 +26671,6 @@
 wonderfull->wonderful
 wonderig->wondering
 wont't->won't
-wont->won't, wont,
 woraround->workaround
 worarounds->workarounds
 worbench->workbench
@@ -26897,7 +26730,6 @@
 wouldnt'->wouldn't
 wouldnt->wouldn't
 wouldnt;->wouldn't
-wounder->wonder, wounder,
 wounderful->wonderful
 wouold->would
 wouuld->would
@@ -27002,7 +26834,6 @@
 zlot->slot
 zombe->zombie
 zomebie->zombie
-zoon->zoom, zoon,
 zuser->user
 __cpluspus->__cplusplus
 __cpusplus->__cplusplus
diff --git a/codespell_lib/data/dictionary_code.txt b/codespell_lib/data/dictionary_code.txt
new file mode 100644
index 0000000..ff1118c
--- /dev/null
+++ b/codespell_lib/data/dictionary_code.txt
@@ -0,0 +1,23 @@
+amin->main
+cas->case
+clas->class
+cmo->com
+define'd->defined
+dof->of, doff,
+dont->don't
+iff->if
+ith->with
+movei->movie
+mut->must, mutt, moot,
+nto->not
+referer->referrer
+rela->real
+secant->second
+secants->seconds
+seeked->sought
+sinc->sync, sink, since,
+sincs->syncs, sinks, since,
+stoll->still
+thead->thread
+todays->today's
+uint->unit
diff --git a/codespell_lib/data/dictionary_en-GB_to_en-US.txt b/codespell_lib/data/dictionary_en-GB_to_en-US.txt
new file mode 100644
index 0000000..3b21d04
--- /dev/null
+++ b/codespell_lib/data/dictionary_en-GB_to_en-US.txt
@@ -0,0 +1,3 @@
+minimise->minimize
+mitre->miter
+mould->mold
diff --git a/codespell_lib/data/dictionary_informal.txt b/codespell_lib/data/dictionary_informal.txt
new file mode 100644
index 0000000..8a4aab6
--- /dev/null
+++ b/codespell_lib/data/dictionary_informal.txt
@@ -0,0 +1,4 @@
+gonna->going to
+wanna->want to
+tho->though, to, thou,
+thru->through
diff --git a/codespell_lib/data/dictionary_names.txt b/codespell_lib/data/dictionary_names.txt
new file mode 100644
index 0000000..61f7b93
--- /dev/null
+++ b/codespell_lib/data/dictionary_names.txt
@@ -0,0 +1,3 @@
+tim->time
+liszt->list
+wight->weight, white, right, write,
diff --git a/codespell_lib/data/dictionary_rare.txt b/codespell_lib/data/dictionary_rare.txt
new file mode 100644
index 0000000..c647031
--- /dev/null
+++ b/codespell_lib/data/dictionary_rare.txt
@@ -0,0 +1,146 @@
+ans->and
+arithmetics->arithmetic
+attache->attaché, attached, attach,
+automatize->automate
+automatized->automated
+automatizes->automates
+backword->backward
+backwords->backwards
+bloc->block
+blocs->blocks
+bodgy->body
+buss->bus
+busses->buses
+calculatable->calculable
+cant->can't
+catalogue->catalog
+chack->check, cheque,
+chancel->cancel
+chancels->cancels
+circularly->circular
+commend->comment, command,
+commends->comments, commands,
+consequentially->consequently
+coo->coup
+copping->coping, copying, cropping,
+copyable->copiable
+crasher->crash
+crashers->crashes
+crated->created
+creche->crèche
+cristal->crystal
+crufts->cruft
+dealign->dealing
+degrate->degrade
+degrates->degrades
+deffer->differ, defer,
+dependant->dependent
+derails->details
+discontentment->discontent
+fallow->follow
+fallowed->followed
+fallowing->following
+fallows->follows
+fave->save
+florescent->fluorescent
+followings->following
+forewarded->forwarded
+formule->formula, formulas,
+formules->formulas
+fount->found
+fro->for, from,
+froward->forward
+fulfilment->fulfillment
+gird->grid
+girds->grids
+guarantied->guaranteed
+guerilla->guerrilla
+guerillas->guerrillas
+hart->heart, harm,
+hided->hidden, hid,
+hist->heist, his,
+hove->have, hover, love,
+implicity->implicitly
+inactivate->deactivate
+incluse->include
+indention->indentation
+indite->indict
+inly->only
+irregardless->regardless
+knifes->knives
+leaded->led, lead,
+leas->least, lease,
+lightening->lightning, lighting,
+loafing->loading
+loos->loose, lose,
+loosing->losing
+lousily->loosely
+manger->manager
+marge->merge
+mater->matter, master, mother,
+medias->media, mediums,
+memorise->memorize
+midwifes->midwives
+moil->soil, mohel,
+mot->not
+moue->mouse
+nickle->nickel
+panting->painting
+payed->paid
+planed->planned
+pleas->please
+preform->perform
+preformed->performed
+preforms->performs
+pres->press
+prosses->process, processes, possess,
+purportive->supportive
+readd->re-add, read,
+readded->read
+ream->stream
+retuned->returned
+retying->retrying
+revered->reversed
+rime->rhyme
+rouge->rogue
+sate->state
+savable->saveable
+setts->sets
+sightly->slightly
+singe->single
+singed->signed, singled,
+slippy->slippery
+specialties->specialities
+specif->specific, specify,
+steams->streams
+sting->string
+stings->strings
+straightaway->straight away
+suppressable->suppressible
+therefor->therefore
+therefrom->there from
+theses->these, thesis,
+toke->took
+tread->thread, treat,
+unknow->unknown
+unknows->unknowns
+untypically->atypically
+vertexes->vertices
+vie->via
+vise->vice
+wan->want
+want's->wants
+wee->we
+wen->we, when,
+whats->what's
+whet->when, what, wet,
+whiling->while
+wight->weight, white, right,
+wights->weights, whites, rights,
+wit->with
+withe->with
+wither->either, whether, weather,
+withing->within
+wont->won't
+wounder->wonder
+zoon->zoom
diff --git a/codespell_lib/tests/test_basic.py b/codespell_lib/tests/test_basic.py
index e0567bd..c1b51fd 100644
--- a/codespell_lib/tests/test_basic.py
+++ b/codespell_lib/tests/test_basic.py
@@ -31,24 +31,35 @@
 def test_basic(tmpdir, capsys):
     """Test some basic functionality"""
     assert cs.main('_does_not_exist_') == 0
-    with open(op.join(str(tmpdir), 'tmp'), 'w') as f:
+    fname = op.join(str(tmpdir), 'tmp')
+    with open(fname, 'w') as f:
         pass
     assert cs.main('-D', 'foo', f.name) == 1, 'missing dictionary'
-    try:
-        assert 'cannot find dictionary' in capsys.readouterr()[1]
-        assert cs.main(f.name) == 0, 'empty file'
-        with open(f.name, 'a') as f:
-            f.write('this is a test file\n')
-        assert cs.main(f.name) == 0, 'good'
-        with open(f.name, 'a') as f:
-            f.write('abandonned\n')
-        assert cs.main(f.name) == 1, 'bad'
-        with open(f.name, 'a') as f:
-            f.write('abandonned\n')
-        assert cs.main(f.name) == 2, 'worse'
-    finally:
-        os.remove(f.name)
+    assert 'cannot find dictionary' in capsys.readouterr()[1]
+    assert cs.main(fname) == 0, 'empty file'
+    with open(fname, 'a') as f:
+        f.write('this is a test file\n')
+    assert cs.main(fname) == 0, 'good'
+    with open(fname, 'a') as f:
+        f.write('abandonned\n')
+    assert cs.main(fname) == 1, 'bad'
+    with open(fname, 'a') as f:
+        f.write('abandonned\n')
+    assert cs.main(fname) == 2, 'worse'
+    with open(fname, 'a') as f:
+        f.write('tim\ngonna\n')
+    assert cs.main(fname) == 2, 'with a name'
+    assert cs.main('--builtin', 'clear,rare,names,informal', fname) == 4
+    capsys.readouterr()
+    assert cs.main(fname, '--builtin', 'foo') == 1  # bad type sys.exit(1)
+    stdout = capsys.readouterr()[1]
+    assert 'Unknown builtin dictionary' in stdout
     d = str(tmpdir)
+    assert cs.main(fname, '-D', op.join(d, 'foo')) == 1  # bad dict
+    stdout = capsys.readouterr()[1]
+    assert 'cannot find dictionary' in stdout
+    os.remove(fname)
+
     with open(op.join(d, 'bad.txt'), 'w') as f:
         f.write('abandonned\nAbandonned\nABANDONNED\nAbAnDoNnEd')
     assert cs.main(d) == 4
diff --git a/codespell_lib/tests/test_dictionary.py b/codespell_lib/tests/test_dictionary.py
index 654ecdd..54e0e43 100644
--- a/codespell_lib/tests/test_dictionary.py
+++ b/codespell_lib/tests/test_dictionary.py
@@ -1,61 +1,174 @@
 # -*- coding: utf-8 -*-
 
+import glob
 import os.path as op
+import os
 import re
+import warnings
+
+import pytest
+
+from codespell_lib._codespell import _builtin_dictionaries
+
+try:
+    import aspell
+    speller = aspell.Speller('lang', 'en')
+except Exception as exp:  # probably ImportError, but maybe also language
+    speller = None
+    if os.getenv('REQUIRE_ASPELL', 'false').lower() == 'true':
+        raise RuntimeError(
+            'Cannot run complete tests without aspell when '
+            'REQUIRE_ASPELL=true. Got error during import:\n%s'
+            % (exp,))
+    else:
+        warnings.warn(
+            'aspell not found, but not required, skipping aspell tests. Got '
+            'error during import:\n%s' % (exp,))
+
+ws = re.compile(r'.*\s.*')  # whitespace
+comma = re.compile(r'.*,.*')  # comma
 
 
-def test_dictionary_formatting():
+# Filename, should be seen as errors in aspell or not
+_data_dir = op.join(op.dirname(__file__), '..', 'data')
+_fnames_in_aspell = [
+    (op.join(_data_dir, 'dictionary%s.txt' % d[2]), d[3:5])
+    for d in _builtin_dictionaries]
+fname_params = pytest.mark.parametrize('fname, in_aspell', _fnames_in_aspell)
+
+
+def test_dictionaries_exist():
+    """Test consistency of dictionaries."""
+    doc_fnames = set(op.basename(f[0]) for f in _fnames_in_aspell)
+    got_fnames = set(op.basename(f)
+                     for f in glob.glob(op.join(_data_dir, '*.txt')))
+    assert doc_fnames == got_fnames
+
+
+@fname_params
+def test_dictionary_formatting(fname, in_aspell):
     """Test that all dictionary entries are valid."""
-    err_dict = dict()
-    ws = re.compile(r'.*\s.*')  # whitespace
-    comma = re.compile(r'.*,.*')  # comma
-    with open(op.join(op.dirname(__file__), '..', 'data',
-                      'dictionary.txt'), 'rb') as fid:
+    errors = list()
+    with open(fname, 'rb') as fid:
         for line in fid:
             err, rep = line.decode('utf-8').split('->')
             err = err.lower()
             rep = rep.rstrip('\n')
-            assert err != rep.lower(), 'error %r corrects to itself' % err
+            try:
+                _check_err_rep(err, rep, in_aspell, fname)
+            except AssertionError as exp:
+                errors.append(str(exp).split('\n')[0])
+    if len(errors):
+        raise AssertionError('\n' + '\n'.join(errors))
+
+
+def _check_aspell(word, msg, in_aspell, fname):
+    if speller is None:
+        return  # cannot check
+    if in_aspell is None:
+        return  # don't check
+    if ' ' in word:
+        return  # can't check (easily)
+    this_in_aspell = speller.check(
+        word.encode(speller.ConfigKeys()['encoding'][1]))
+    end = 'be in aspell for dictionary %s' % (fname,)
+    if in_aspell:  # should be an error in aspell
+        assert this_in_aspell, '%s should %s' % (msg, end)
+    else:  # shouldn't be
+        assert not this_in_aspell, '%s should not %s' % (msg, end)
+
+
+def _check_err_rep(err, rep, in_aspell, fname):
+    assert ws.match(err) is None, 'error %r has whitespace' % err
+    assert comma.match(err) is None, 'error %r has a comma' % err
+    assert len(rep) > 0, ('error %s: correction %r must be non-empty'
+                          % (err, rep))
+    assert not re.match(r'^\s.*', rep), ('error %s: correction %r '
+                                         'cannot start with whitespace'
+                                         % (err, rep))
+    _check_aspell(err, 'error %r' % (err,), in_aspell[0], fname)
+    prefix = 'error %s: correction %r' % (err, rep)
+    for (r, msg) in [
+            (r'^,',
+             '%s starts with a comma'),
+            (r'\s,',
+             '%s contains a whitespace character followed by a comma'),
+            (r',\s\s',
+             '%s contains a comma followed by multiple whitespace characters'),
+            (r',[^ ]',
+             '%s contains a comma *not* followed by a space'),
+            (r'\s+$',
+             '%s has a trailing space'),
+            (r'^[^,]*,\s*$',
+             '%s has a single entry but contains a trailing comma')]:
+        assert not re.search(r, rep), (msg % (prefix,))
+    del msg
+    if rep.count(','):
+        assert rep.endswith(','), ('error %s: multiple corrections must end '
+                                   'with trailing ","' % (err,))
+    reps = [r.strip() for r in rep.lower().split(',')]
+    reps = [r for r in reps if len(r)]
+    for r in reps:
+        assert err != r.lower(), ('error %r corrects to itself amongst others'
+                                  % (err,))
+        _check_aspell(
+            r, 'error %s: correction %r' % (err, r), in_aspell[1], fname)
+    assert len(set(reps)) == len(reps), 'entries are not (lower-case) unique'
+
+
+@pytest.mark.parametrize('err, rep, match', [
+    ('a a', 'bar', 'has whitespace'),
+    ('a,a', 'bar', 'has a comma'),
+    ('a', '', 'non-empty'),
+    ('a', ' bar', 'start with whitespace'),
+    ('a', ',bar', 'starts with a comma'),
+    ('a', 'bar,bat', '.*not.*followed by a space'),
+    ('a', 'bar ', 'trailing space'),
+    ('a', 'b ,ar', 'contains a whitespace.*followed by a comma'),
+    ('a', 'bar,', 'single entry.*comma'),
+    ('a', 'bar, bat', 'must end with trailing ","'),
+    ('a', 'a, bar,', 'corrects to itself amongst others'),
+    ('a', 'a', 'corrects to itself'),
+    ('a', 'bar, bar,', 'unique'),
+])
+def test_error_checking(err, rep, match):
+    """Test that our error checking works."""
+    with pytest.raises(AssertionError, match=match):
+        _check_err_rep(err, rep, (None, None), 'dummy')
+
+
+@pytest.mark.skipif(speller is None, reason='requires aspell')
+@pytest.mark.parametrize('err, rep, err_aspell, rep_aspell, match', [
+    # This doesn't raise any exceptions, so skip for now:
+    # pytest.param('a', 'uvw, bar,', None, None, 'should be in aspell'),
+    ('abc', 'uvw, bar,', True, None, 'should be in aspell'),
+    ('a', 'uvw, bar,', False, None, 'should not be in aspell'),
+    ('a', 'abc, uvw,', None, True, 'should be in aspell'),
+    ('abc', 'uvw, bar,', True, True, 'should be in aspell'),
+    ('abc', 'uvw, bar,', False, True, 'should be in aspell'),
+    ('a', 'bar, back,', None, False, 'should not be in aspell'),
+    ('abc', 'uvw, xyz,', True, False, 'should be in aspell'),
+    ('abc', 'uvw, bar,', False, False, 'should not be in aspell'),
+])
+def test_error_checking_in_aspell(err, rep, err_aspell, rep_aspell, match):
+    """Test that our error checking works with aspell."""
+    with pytest.raises(AssertionError, match=match):
+        _check_err_rep(err, rep, (err_aspell, rep_aspell), 'dummy')
+
+
+@fname_params
+def test_dictionary_looping(fname, in_aspell):
+    """Test that all dictionary entries are valid."""
+    err_dict = dict()
+    with open(fname, 'rb') as fid:
+        for line in fid:
+            err, rep = line.decode('utf-8').split('->')
+            err = err.lower()
             assert err not in err_dict, 'error %r already exists' % err
-            assert ws.match(err) is None, 'error %r has whitespace' % err
-            assert comma.match(err) is None, 'error %r has a comma' % err
-            assert len(rep) > 0, ('error %s: correction %r must be non-empty'
-                                  % (err, rep))
-            assert not re.match(r'^\s.*', rep), ('error %s: correction %r '
-                                                 'cannot start with whitespace'
-                                                 % (err, rep))
-            prefix = 'error %s: correction %r' % (err, rep)
-            for (r, msg) in [
-                (r'^,',
-                 '%s starts with a comma'),
-                (r'\s,',
-                 '%s contains a whitespace character followed by a comma'),
-                (r',\s\s',
-                 '%s contains a comma followed by multiple whitespace '
-                 'characters'),
-                (r',[^ ]',
-                 '%s contains a comma *not* followed by a space'),
-                (r'\s+$',
-                 '%s has a trailing space'),
-                (r'^[^,]*,\s*$',
-                 '%s has a single entry but contains a trailing comma'),
-            ]:
-                assert not re.search(r, rep), (msg % (prefix,))
-            del msg
-            rep_count = rep.count(',')
-            if rep_count and not rep.endswith(','):
-                assert 'disabled' in rep.split(',')[-1], \
-                    ('currently corrections must end with trailing "," (if '
-                     ' multiple corrections are available) or have "disabled" '
-                     'in the comment')
+            rep = rep.rstrip('\n')
             reps = [r.strip() for r in rep.lower().split(',')]
             reps = [r for r in reps if len(r)]
             err_dict[err] = reps
-            unique = list()
-            for r in reps:
-                if r not in unique:
-                    unique.append(r)
-            assert reps == unique, 'entries are not (lower-case) unique'
     # check for corrections that are errors (but not self replacements)
     for err in err_dict:
         for r in err_dict[err]:
diff --git a/setup.py b/setup.py
index f9352ba..4a6a23e 100755
--- a/setup.py
+++ b/setup.py
@@ -48,7 +48,7 @@
               'codespell_lib.data',
           ],
           package_data={'codespell_lib': [
-              op.join('data', 'dictionary.txt'),
+              op.join('data', 'dictionary*.txt'),
               op.join('data', 'linux-kernel.exclude'),
           ]},
           entry_points={