Merge branch 'dict' into v1.1.x
diff --git a/README b/README index f9ed44b..47cba2d 100644 --- a/README +++ b/README
@@ -4,7 +4,13 @@ Fix common misspellings in text files. It's designed primarily for checking misspelled words in source code, but it can be used with other files as well. -USAGE +Information +=========== + +Mailing list: + codespell@googlegroups.com + +Usage ===== Check usage with ./codespell -h. There are a few command line options. We ship
diff --git a/TODO b/TODO new file mode 100644 index 0000000..4f86e06 --- /dev/null +++ b/TODO
@@ -0,0 +1,5 @@ +- Add option to disable changes to source code, allowing them only on comments + and text files + +BUGS +====
diff --git a/codespell.py b/codespell.py index cf81490..e7061d9 100755 --- a/codespell.py +++ b/codespell.py
@@ -14,7 +14,8 @@ # along with this program; if not, see # http://www.gnu.org/licenses/old-licenses/gpl-2.0.html. """ -Copyright (C) 2010 Lucas De Marchi <lucas.de.marchi@gmail.com> +Copyright (C) 2010-2011 Lucas De Marchi <lucas.de.marchi@gmail.com> +Copyright (C) 2011 ProFUSION embedded systems """ import sys @@ -25,10 +26,12 @@ USAGE = """ \t%prog [OPTIONS] dict_filename [file1 file2 ... fileN] """ -VERSION = '1.0' +VERSION = '1.1.1' misspellings = {} +exclude_lines = set() options = None +quiet_level = 0 encodings = [ 'utf-8', 'iso-8859-1' ] #OPTIONS: @@ -38,6 +41,15 @@ # If set to '-', it will be read from stdin # file1 .. fileN Files to check spelling +class QuietLevels: + NONE = 0 + ENCODING = 1 + BINARY_FILE = 2 + DISABLED_FIXES = 4 + NON_AUTOMATIC_FIXES = 8 + FIXES = 16 + + class Mispell: def __init__(self, data, fix, reason): self.data = data @@ -57,6 +69,22 @@ self.FWORD = '' self.DISABLE = '' +class Summary: + def __init__(self): + self.summary = {} + + def update(self, wrongword): + if wrongword in self.summary: + self.summary[wrongword] += 1 + else: + self.summary[wrongword] = 1 + + def __str__(self): + keys = list(self.summary.keys()) + keys.sort() + + return "\n".join(["{0}{1:{width}}".format(key, self.summary.get(key), width=15 - len(key)) for key in keys]) + # -.-:-.-:-.-:-.:-.-:-.-:-.-:-.-:-.:-.-:-.-:-.-:-.-:-.:-.-:- def parse_options(args): @@ -73,6 +101,34 @@ action = 'store_true', default = False, help = 'write changes in place if possible') + parser.add_option('-s', '--summary', + action = 'store_true', default = False, + help = 'print summary of fixes') + + parser.add_option('-x', '--exclude-file', + help = 'FILE with lines that should not be changed', + metavar='FILE') + + parser.add_option('-i', '--interactive', + action='store', type='int', default=0, + help = 'Set interactive mode when writing changes. ' \ + '0 is the same of no interactivity; 1 makes ' \ + 'codespell ask confirmation; 2 ask user to ' \ + 'choose one fix when more than one is ' \ + 'available; 3 applies both 1 and 2') + + parser.add_option('-q', '--quiet-level', + action='store', type='int', default=0, + help = 'Bitmask that allows codespell to run quietly.'\ + '0: the default, in which all messages are '\ + 'printed. 1: disable warnings about wrong '\ + 'encoding. 2: disable warnings about binary'\ + ' file. 4: shut down warnings about automatic'\ + ' fixes that were disabled in dictionary. '\ + '8: don\'t print anything for non-automatic '\ + 'fixes. 16: don\'t print fixed files.') + + (o, args) = parser.parse_args() if (len(args) < 1): print('ERROR: you need to specify a dictionary!', file=sys.stderr) @@ -83,6 +139,10 @@ return o, args +def build_exclude_hashes(filename): + with open(filename, 'r') as f: + for line in f: + exclude_lines.add(line) def build_dict(filename): with open(filename, 'r') as f: @@ -123,12 +183,73 @@ return True -def parse_file(filename, colors): +def fix_case(word, fixword): + if word == word.capitalize(): + return fixword.capitalize() + elif word == word.upper(): + return fixword.upper() + # they are both lower case + # or we don't have any idea + return fixword + +def ask_for_word_fix(line, wrongword, misspelling, interactivity): + if interactivity <= 0: + return misspelling.fix, fix_case(wrongword, misspelling.data) + + if misspelling.fix and interactivity & 1: + r = '' + fixword = fix_case(wrongword, misspelling.data) + while not r: + print("%s\t%s ==> %s (Y/n) " % (line, wrongword, fixword), end='') + r = sys.stdin.readline().strip().upper() + if not r: r = 'Y' + if r != 'Y' and r != 'N': + print("Say 'y' or 'n'") + r = '' + + if r == 'N': + misspelling.fix = False + misspelling.fixword = '' + + elif (interactivity & 2) and not misspelling.reason: + # if it is not disabled, i.e. it just has more than one possible fix, + # we ask the user which word to use + + r = '' + opt = list(map(lambda x: x.strip(), misspelling.data.split(','))) + while not r: + print("%s Choose an option (blank for none): " % line, end='') + for i in range(len(opt)): + fixword = fix_case(wrongword, opt[i]) + print(" %d) %s" % (i, fixword), end='') + print(": ", end='') + sys.stdout.flush() + + n = sys.stdin.readline().strip() + if not n: + break + + try: + n = int(n) + r = opt[n] + except (ValueError, IndexError): + print("Not a valid option\n") + + if r: + misspelling.fix = True + misspelling.data = r + + return misspelling.fix, fix_case(wrongword, misspelling.data) + +def parse_file(filename, colors, summary): lines = None changed = False global misspellings global options global encodings + global quiet_level + + encoding = encodings[0] # if not defined, use UTF-8 if filename == '-': f = sys.stdin @@ -136,7 +257,8 @@ else: # ignore binary files if not istextfile(filename): - print("WARNING: Binary file: %s " % filename, file=sys.stderr) + if not quiet_level & QuietLevels.BINARY_FILE: + print("WARNING: Binary file: %s " % filename, file=sys.stderr) return curr = 0 @@ -146,13 +268,17 @@ lines = f.readlines() break except UnicodeDecodeError: - print('WARNING: Decoding file %s' % filename, file=sys.stderr) - print('WARNING: using encoding=%s failed. ' - % encodings[curr], file=sys.stderr) + + if not quiet_level & QuietLevels.ENCODING: + print('WARNING: Decoding file %s' % filename, + file=sys.stderr) + print('WARNING: using encoding=%s failed. ' + % encodings[curr], + file=sys.stderr) + print('WARNING: Trying next encoding: %s' % encodings[curr], + file=sys.stderr) curr += 1 - print('WARNING: Trying next encoding: %s' % encodings[curr], - file=sys.stderr) finally: f.close() @@ -162,23 +288,44 @@ file=sys.stderr) return + encoding = encodings[curr] + i = 1 + rx = re.compile(r"[\w']+") for line in lines: - for word in re.findall('\w+', line): + if line in exclude_lines: + i += 1 + continue + + fixed_words = set() + asked_for = set() + + for word in rx.findall(line): lword = word.lower() if lword in misspellings: - if word == word.capitalize(): - fixword = misspellings[lword].data.capitalize() - elif word == word.upper(): - fixword = misspellings[lword].data.upper() - else: - # even they are the same lower case or - # or we don't have any idea - fixword = misspellings[lword].data + fix = misspellings[lword].fix + fixword = fix_case(word, misspellings[lword].data) - if options.write_changes and misspellings[lword].fix: + if options.interactive and not lword in asked_for: + fix, fixword = ask_for_word_fix(lines[i - 1], word, + misspellings[lword], + options.interactive) + asked_for.add(lword) + + if summary and fix: + summary.update(lword) + + if word in fixed_words: + continue + + if options.write_changes and fix: changed = True - lines[i - 1] = line.replace(word, fixword, 1) + lines[i - 1] = re.sub(r'\b%s\b' % word, fixword, lines[i - 1]) + fixed_words.add(word) + continue + + # otherwise warning was explicitly set by interactive mode + if options.interactive & 2 and not fix and not misspellings[lword].reason: continue cfilename = "%s%s%s" % (colors.FILE, filename, colors.DISABLE) @@ -187,10 +334,16 @@ crightword = "%s%s%s" % (colors.FWORD, fixword, colors.DISABLE) if misspellings[lword].reason: + if quiet_level & QuietLevels.DISABLED_FIXES: + continue + creason = " | %s%s%s" % (colors.FILE, misspellings[lword].reason, colors.DISABLE) else: + if quiet_level & QuietLevels.NON_AUTOMATIC_FIXES: + continue + creason = '' if filename != '-': @@ -213,15 +366,18 @@ for line in lines: print(line, end='') else: - print("%sFIXED:%s %s" % (colors.FWORD, colors.DISABLE, filename), - file=sys.stderr) - f = open(filename, 'w') + if not quiet_level & QuietLevels.FIXES: + print("%sFIXED:%s %s" % (colors.FWORD, colors.DISABLE, filename), + file=sys.stderr) + f = open(filename, 'w', encoding=encoding) f.writelines(lines) f.close() def main(*args): global options + global quiet_level + (options, args) = parse_options(args) build_dict(args[0]) @@ -229,6 +385,17 @@ if options.disable_colors: colors.disable() + if options.summary: + summary = Summary() + else: + summary = None + + if options.exclude_file: + build_exclude_hashes(options.exclude_file) + + if options.quiet_level: + quiet_level = options.quiet_level + for filename in args[1:]: # ignore hidden files if ishidden(filename): @@ -250,11 +417,15 @@ if os.path.islink(file): continue - parse_file(os.path.join(root, file), colors) + parse_file(os.path.join(root, file), colors, summary) continue - parse_file(filename, colors) + parse_file(filename, colors, summary) + + if summary: + print("\n-------8<-------\nSUMMARY:") + print(summary) if __name__ == '__main__': sys.exit(main(*sys.argv))
diff --git a/data/dictionary.txt b/data/dictionary.txt index 895a5cf..e661c35 100644 --- a/data/dictionary.txt +++ b/data/dictionary.txt
@@ -648,6 +648,7 @@ cannonical->canonical cannotation->connotation cannotations->connotations +cant'->can't cant->can't caost->coast caperbility->capability @@ -1207,6 +1208,7 @@ dicovers->discovers dicovery->discovery dicussed->discussed +didnt'->didn't didnt->didn't diea->idea, die, dieing->dying, dyeing, @@ -1303,12 +1305,14 @@ doctines->doctrines documenatry->documentary doens->does +doesnt'->doesn't doesnt->doesn't doign->doing dominaton->domination dominent->dominant dominiant->dominant donig->doing +dosent'->doesn't dosen't->doesn't doub->doubt, daub, doulbe->double @@ -1821,6 +1825,7 @@ harrassing->harassing harrassment->harassment harrassments->harassments +hasnt'->hasn't hasnt->hasn't haviest->heaviest headquarer->headquarter @@ -2147,6 +2152,7 @@ irreplacable->irreplaceable irresistable->irresistible irresistably->irresistibly +isnt'->isn't isnt->isn't Israelies->Israelis issueing->issuing @@ -3459,6 +3465,7 @@ shortwhile->short while shoudl->should shoudln->should, shouldn't, +shouldnt'->shouldn't shouldnt->shouldn't shreak->shriek shrinked->shrunk @@ -4115,6 +4122,7 @@ wardobe->wardrobe warrent->warrant warrriors->warriors +wasnt'->wasn't wasnt->wasn't wass->was watn->want
diff --git a/data/linux-kernel.exclude b/data/linux-kernel.exclude new file mode 100644 index 0000000..d7dd699 --- /dev/null +++ b/data/linux-kernel.exclude
@@ -0,0 +1,87 @@ +N: Tom Dyas + * Copyright (C) 1996 Thomas K. Dyas (tdyas@eden.rutgers.edu) + * Copyright (C) 1996 Thomas K. Dyas (tdyas@eden.rutgers.edu) + * Copyright (C) 1996 Thomas K. Dyas (tdyas@eden.rutgers.edu) + * Tom Dyas + * Copyright (C) 1996 Thomas K. Dyas (tdyas@noc.rutgers.edu) +MODULE_AUTHOR("Thomas K. Dyas (tdyas@noc.rutgers.edu) and Eddie C. Dost (ecd@skynet.be)"); + * Thomas K. Dyas <tdyas@eden.rutgers.edu> + * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas. + * Tom Dyas : Module support. + * Tom Dyas : Export net symbols. + * Copyright (C) 1996,1997 Thomas K. Dyas (tdyas@eden.rutgers.edu) +MODULE_AUTHOR("Thomas K. Dyas and David S. Miller"); + + + + * Sun people can't spell worth damn. "compatability" indeed. + .asciz "compatability" + * Sun people can't spell worth damn. "compatability" indeed. + .asciz "compatability" + + + Jonathan Teh Soon Yew <j.teh@iname.com> + <j.teh@iname.com> and Alex van Kaam <darkside@chello.nl>.) + <j.teh@iname.com>) */ + + +inv24 - change timings parameters for 24bpp modes on Millenium and + Millenium II. Specify this if you see strange color shadows around + non-Millenium. + Millenium I or II, because of these devices have hardware +* ThrustMaster Millenium 3D Inceptor +DC390F (Sym53c875) accepted this as well as my Millenium. But the Am53C974 + { 1, "ThrustMaster Millenium 3D Inceptor", 6, 2, { 4, 2 }, { 4, 6 }, tmdc_abs, tmdc_btn_joy }, + * DoC 2000 (it's in the Millenium docs), but it seems to work. */ + minfo->millenium = 1; + int millenium; + /* 0 except for 6MB Millenium */ +#define isMillenium(x) (x->millenium) +#define isMillenium(x) (x->millennium) +#define FB_AUX_TEXT_MGA_STEP16 3 /* MGA Millenium I: text, attr, 14 reserved bytes */ +#define FB_ACCEL_MATROX_MGA2064W 16 /* Matrox MGA2064W (Millenium) */ +#define FB_ACCEL_MATROX_MGA2164W 18 /* Matrox MGA2164W (Millenium II) */ +#define FB_ACCEL_MATROX_MGA2164W_AGP 19 /* Matrox MGA2164W (Millenium II) */ + + + * Copyright (C) 2007 Marvell Internation Ltd. + * Copyright (C) 2007-2008 Marvell Internation Ltd. + + dbug(1,dprintf("PTY/ECT/addCONF,relPLCI=%lx",relatedPLCIvalue)); + /* send PTY/ECT req, cannot check all states because of US stuff */ + dbug(1,dprintf("ECT OK")); + { /* first indication after ECT-Request on Consultation Call */ +#define INT_CT_REJ 70 /* ECT rejected internal command */ + __u8 ip_ect; /* ECT codepoint of IPv4 header, pre-shifted */ + __u8 ece:1, cwr:1; /* TCP ECT bits */ +/* set ECT codepoint from IP header. + pr_info("new ECT codepoint %x out of mask\n", einfo->ip_ect); + /* Funny extension: if ECT is not set on a segment, + /* Not-retransmitted data segment: set ECT and inject CWR. */ + /* ACK or retransmitted segment: clear ECT|CE */ + * "The ECN-Capable Transport (ECT) bit would be set by the + * Now setting the ECT bit all the time, as it should not cause + + * Copyright © 2003 Agere Systems Inc. + +S: 1326 De Val-Brillant + <slot #2, id = 0x02, characters = "xtension whic"> + + at91_set_A_periph(AT91_PIN_PC1, 0); /* [-SMOE-]{+SOME+} */ + + _REGISTER_CLOCK(NULL, "ect", ect_clk) +infinit: + bra infinit +the read on an 8-byte boundary (e.g., if you seeked an odd number of bytes +#define ISNT 12 + (Some conversion-factor data were contributed by Jonathan Teh Soon Yew + From HWMon.cpp (Copyright 1998-2000 Jonathan Teh Soon Yew): + (These conversions were contributed by Jonathan Teh Soon Yew +/* linear fits from HWMon.cpp (Copyright 1998-2000 Jonathan Teh Soon Yew) +/* __u16 pallete:1; */ + __be32 pallete; + struct diu_addr pallete; + int virtualX, virtualY; + struct regid archType; + .ident = "Toshiba Satelite S1800-814", + at91_set_A_periph(AT91_PIN_PC1, 0); /* SMOE */