| # This Source Code Form is subject to the terms of the Mozilla Public |
| # License, v. 2.0. If a copy of the MPL was not distributed with this |
| # file, You can obtain one at http://mozilla.org/MPL/2.0/. |
| |
| 'Mozilla l10n compare locales tool' |
| |
| import codecs |
| import os |
| import os.path |
| import shutil |
| import re |
| from difflib import SequenceMatcher |
| from collections import defaultdict |
| |
| try: |
| from json import dumps |
| except: |
| from simplejson import dumps |
| |
| from compare_locales import parser |
| from compare_locales import paths |
| from compare_locales.checks import getChecker |
| |
| |
| class Tree(object): |
| def __init__(self, valuetype): |
| self.branches = dict() |
| self.valuetype = valuetype |
| self.value = None |
| |
| def __getitem__(self, leaf): |
| parts = [] |
| if isinstance(leaf, paths.File): |
| parts = [p for p in [leaf.locale, leaf.module] if p] + \ |
| leaf.file.split('/') |
| else: |
| parts = leaf.split('/') |
| return self.__get(parts) |
| |
| def __get(self, parts): |
| common = None |
| old = None |
| new = tuple(parts) |
| t = self |
| for k, v in self.branches.iteritems(): |
| for i, part in enumerate(zip(k, parts)): |
| if part[0] != part[1]: |
| i -= 1 |
| break |
| if i < 0: |
| continue |
| i += 1 |
| common = tuple(k[:i]) |
| old = tuple(k[i:]) |
| new = tuple(parts[i:]) |
| break |
| if old: |
| self.branches.pop(k) |
| t = Tree(self.valuetype) |
| t.branches[old] = v |
| self.branches[common] = t |
| elif common: |
| t = self.branches[common] |
| if new: |
| if common: |
| return t.__get(new) |
| t2 = t |
| t = Tree(self.valuetype) |
| t2.branches[new] = t |
| if t.value is None: |
| t.value = t.valuetype() |
| return t.value |
| |
| indent = ' ' |
| |
| def getContent(self, depth=0): |
| ''' |
| Returns iterator of (depth, flag, key_or_value) tuples. |
| If flag is 'value', key_or_value is a value object, otherwise |
| (flag is 'key') it's a key string. |
| ''' |
| keys = self.branches.keys() |
| keys.sort() |
| if self.value is not None: |
| yield (depth, 'value', self.value) |
| for key in keys: |
| yield (depth, 'key', key) |
| for child in self.branches[key].getContent(depth + 1): |
| yield child |
| |
| def toJSON(self): |
| ''' |
| Returns this Tree as a JSON-able tree of hashes. |
| Only the values need to take care that they're JSON-able. |
| ''' |
| json = {} |
| keys = self.branches.keys() |
| keys.sort() |
| if self.value is not None: |
| json['value'] = self.value |
| children = [('/'.join(key), self.branches[key].toJSON()) |
| for key in keys] |
| if children: |
| json['children'] = children |
| return json |
| |
| def getStrRows(self): |
| def tostr(t): |
| if t[1] == 'key': |
| return self.indent * t[0] + '/'.join(t[2]) |
| return self.indent * (t[0] + 1) + str(t[2]) |
| |
| return map(tostr, self.getContent()) |
| |
| def __str__(self): |
| return '\n'.join(self.getStrRows()) |
| |
| |
| class AddRemove(SequenceMatcher): |
| def __init__(self): |
| SequenceMatcher.__init__(self, None, None, None) |
| |
| def set_left(self, left): |
| if not isinstance(left, list): |
| left = [l for l in left] |
| self.set_seq1(left) |
| |
| def set_right(self, right): |
| if not isinstance(right, list): |
| right = [l for l in right] |
| self.set_seq2(right) |
| |
| def __iter__(self): |
| for tag, i1, i2, j1, j2 in self.get_opcodes(): |
| if tag == 'equal': |
| for pair in zip(self.a[i1:i2], self.b[j1:j2]): |
| yield ('equal', pair) |
| elif tag == 'delete': |
| for item in self.a[i1:i2]: |
| yield ('delete', item) |
| elif tag == 'insert': |
| for item in self.b[j1:j2]: |
| yield ('add', item) |
| else: |
| # tag == 'replace' |
| for item in self.a[i1:i2]: |
| yield ('delete', item) |
| for item in self.b[j1:j2]: |
| yield ('add', item) |
| |
| |
| class DirectoryCompare(SequenceMatcher): |
| def __init__(self, reference): |
| SequenceMatcher.__init__(self, None, [i for i in reference], |
| []) |
| self.watcher = None |
| |
| def setWatcher(self, watcher): |
| self.watcher = watcher |
| |
| def compareWith(self, other): |
| if not self.watcher: |
| return |
| self.set_seq2([i for i in other]) |
| for tag, i1, i2, j1, j2 in self.get_opcodes(): |
| if tag == 'equal': |
| for i, j in zip(xrange(i1, i2), xrange(j1, j2)): |
| self.watcher.compare(self.a[i], self.b[j]) |
| elif tag == 'delete': |
| for i in xrange(i1, i2): |
| self.watcher.add(self.a[i], other.cloneFile(self.a[i])) |
| elif tag == 'insert': |
| for j in xrange(j1, j2): |
| self.watcher.remove(self.b[j]) |
| else: |
| for j in xrange(j1, j2): |
| self.watcher.remove(self.b[j]) |
| for i in xrange(i1, i2): |
| self.watcher.add(self.a[i], other.cloneFile(self.a[i])) |
| |
| |
| class Observer(object): |
| stat_cats = ['missing', 'obsolete', 'missingInFiles', 'report', |
| 'changed', 'unchanged', 'keys'] |
| |
| def __init__(self): |
| class intdict(defaultdict): |
| def __init__(self): |
| defaultdict.__init__(self, int) |
| |
| self.summary = defaultdict(intdict) |
| self.details = Tree(dict) |
| self.filter = None |
| |
| # support pickling |
| def __getstate__(self): |
| return dict(summary=self.getSummary(), details=self.details) |
| |
| def __setstate__(self, state): |
| class intdict(defaultdict): |
| def __init__(self): |
| defaultdict.__init__(self, int) |
| |
| self.summary = defaultdict(intdict) |
| if 'summary' in state: |
| for loc, stats in state['summary'].iteritems(): |
| self.summary[loc].update(stats) |
| self.details = state['details'] |
| self.filter = None |
| |
| def getSummary(self): |
| plaindict = {} |
| for k, v in self.summary.iteritems(): |
| plaindict[k] = dict(v) |
| return plaindict |
| |
| def toJSON(self): |
| return dict(summary=self.getSummary(), details=self.details.toJSON()) |
| |
| def notify(self, category, file, data): |
| rv = "error" |
| if category in self.stat_cats: |
| # these get called post reporting just for stats |
| # return "error" to forward them to other other_observers |
| self.summary[file.locale][category] += data |
| # keep track of how many strings are in a missing file |
| # we got the {'missingFile': 'error'} from the first pass |
| if category == 'missingInFiles': |
| self.details[file]['strings'] = data |
| return "error" |
| if category in ['missingFile', 'obsoleteFile']: |
| if self.filter is not None: |
| rv = self.filter(file) |
| if rv != "ignore": |
| self.details[file][category] = rv |
| return rv |
| if category in ['missingEntity', 'obsoleteEntity']: |
| if self.filter is not None: |
| rv = self.filter(file, data) |
| if rv == "ignore": |
| return rv |
| v = self.details[file] |
| try: |
| v[category].append(data) |
| except KeyError: |
| v[category] = [data] |
| return rv |
| if category == 'error': |
| try: |
| self.details[file][category].append(data) |
| except KeyError: |
| self.details[file][category] = [data] |
| self.summary[file.locale]['errors'] += 1 |
| elif category == 'warning': |
| try: |
| self.details[file][category].append(data) |
| except KeyError: |
| self.details[file][category] = [data] |
| self.summary[file.locale]['warnings'] += 1 |
| return rv |
| |
| def toExhibit(self): |
| items = [] |
| for locale in sorted(self.summary.iterkeys()): |
| summary = self.summary[locale] |
| if locale is not None: |
| item = {'id': 'xxx/' + locale, |
| 'label': locale, |
| 'locale': locale} |
| else: |
| item = {'id': 'xxx', |
| 'label': 'xxx', |
| 'locale': 'xxx'} |
| item['type'] = 'Build' |
| total = sum([summary[k] |
| for k in ('changed', 'unchanged', 'report', 'missing', |
| 'missingInFiles') |
| if k in summary]) |
| rate = (('changed' in summary and summary['changed'] * 100) |
| or 0) / total |
| item.update((k, summary.get(k, 0)) |
| for k in ('changed', 'unchanged')) |
| item.update((k, summary[k]) |
| for k in ('report', 'errors', 'warnings') |
| if k in summary) |
| item['missing'] = summary.get('missing', 0) + \ |
| summary.get('missingInFiles', 0) |
| item['completion'] = rate |
| item['total'] = total |
| result = 'success' |
| if item.get('warnings', 0): |
| result = 'warning' |
| if item.get('errors', 0) or item.get('missing', 0): |
| result = 'failure' |
| item['result'] = result |
| items.append(item) |
| data = { |
| "properties": dict.fromkeys( |
| ("completion", "errors", "warnings", "missing", "report", |
| "unchanged", "changed", "obsolete"), |
| {"valueType": "number"}), |
| "types": { |
| "Build": {"pluralLabel": "Builds"} |
| }} |
| data['items'] = items |
| return dumps(data, indent=2) |
| |
| def serialize(self, type="text"): |
| if type == "exhibit": |
| return self.toExhibit() |
| if type == "json": |
| return dumps(self.toJSON()) |
| |
| def tostr(t): |
| if t[1] == 'key': |
| return ' ' * t[0] + '/'.join(t[2]) |
| o = [] |
| indent = ' ' * (t[0] + 1) |
| if 'error' in t[2]: |
| o += [indent + 'ERROR: ' + e for e in t[2]['error']] |
| if 'warning' in t[2]: |
| o += [indent + 'WARNING: ' + e for e in t[2]['warning']] |
| if 'missingEntity' in t[2] or 'obsoleteEntity' in t[2]: |
| missingEntities = ('missingEntity' in t[2] and |
| t[2]['missingEntity']) or [] |
| obsoleteEntities = ('obsoleteEntity' in t[2] and |
| t[2]['obsoleteEntity']) or [] |
| entities = missingEntities + obsoleteEntities |
| entities.sort() |
| for entity in entities: |
| op = '+' |
| if entity in obsoleteEntities: |
| op = '-' |
| o.append(indent + op + entity) |
| elif 'missingFile' in t[2]: |
| o.append(indent + '// add and localize this file') |
| elif 'obsoleteFile' in t[2]: |
| o.append(indent + '// remove this file') |
| return '\n'.join(o) |
| |
| out = [] |
| for locale, summary in sorted(self.summary.iteritems()): |
| if locale is not None: |
| out.append(locale + ':') |
| out += [k + ': ' + str(v) for k, v in sorted(summary.iteritems())] |
| total = sum([summary[k] |
| for k in ['changed', 'unchanged', 'report', 'missing', |
| 'missingInFiles'] |
| if k in summary]) |
| rate = 0 |
| if total: |
| rate = (('changed' in summary and summary['changed'] * 100) |
| or 0) / total |
| out.append('%d%% of entries changed' % rate) |
| return '\n'.join(map(tostr, self.details.getContent()) + out) |
| |
| def __str__(self): |
| return 'observer' |
| |
| |
| class ContentComparer: |
| keyRE = re.compile('[kK]ey') |
| nl = re.compile('\n', re.M) |
| |
| def __init__(self): |
| '''Create a ContentComparer. |
| observer is usually a instance of Observer. The return values |
| of the notify method are used to control the handling of missing |
| entities. |
| ''' |
| self.reference = dict() |
| self.observer = Observer() |
| self.other_observers = [] |
| self.merge_stage = None |
| |
| def add_observer(self, obs): |
| '''Add a non-filtering observer. |
| Results from the notify calls are ignored. |
| ''' |
| self.other_observers.append(obs) |
| |
| def set_merge_stage(self, merge_stage): |
| self.merge_stage = merge_stage |
| |
| def merge(self, ref_entities, ref_map, ref_file, l10n_file, missing, |
| skips, p): |
| outfile = os.path.join(self.merge_stage, l10n_file.module, |
| l10n_file.file) |
| outdir = os.path.dirname(outfile) |
| if not os.path.isdir(outdir): |
| os.makedirs(outdir) |
| if not p.canMerge: |
| shutil.copyfile(ref_file.fullpath, outfile) |
| print "copied reference to " + outfile |
| return |
| if skips: |
| # skips come in ordered by key name, we need them in file order |
| skips.sort(key=lambda s: s.span[0]) |
| trailing = (['\n'] + |
| [ref_entities[ref_map[key]].all for key in missing] + |
| [ref_entities[ref_map[skip.key]].all for skip in skips]) |
| if skips: |
| # we need to skip a few errornous blocks in the input, copy by hand |
| f = codecs.open(outfile, 'wb', p.encoding) |
| offset = 0 |
| for skip in skips: |
| chunk = skip.span |
| f.write(p.contents[offset:chunk[0]]) |
| offset = chunk[1] |
| f.write(p.contents[offset:]) |
| else: |
| shutil.copyfile(l10n_file.fullpath, outfile) |
| f = codecs.open(outfile, 'ab', p.encoding) |
| print "adding to " + outfile |
| |
| def ensureNewline(s): |
| if not s.endswith('\n'): |
| return s + '\n' |
| return s |
| |
| f.write(''.join(map(ensureNewline, trailing))) |
| f.close() |
| |
| def notify(self, category, file, data): |
| """Check observer for the found data, and if it's |
| not to ignore, notify other_observers. |
| """ |
| rv = self.observer.notify(category, file, data) |
| if rv == 'ignore': |
| return rv |
| for obs in self.other_observers: |
| # non-filtering other_observers, ignore results |
| obs.notify(category, file, data) |
| return rv |
| |
| def remove(self, obsolete): |
| self.notify('obsoleteFile', obsolete, None) |
| pass |
| |
| def compare(self, ref_file, l10n): |
| try: |
| p = parser.getParser(ref_file.file) |
| except UserWarning: |
| # no comparison, XXX report? |
| return |
| if ref_file not in self.reference: |
| # we didn't parse this before |
| try: |
| p.readContents(ref_file.getContents()) |
| except Exception, e: |
| self.notify('error', ref_file, str(e)) |
| return |
| self.reference[ref_file] = p.parse() |
| ref = self.reference[ref_file] |
| ref_list = ref[1].keys() |
| ref_list.sort() |
| try: |
| p.readContents(l10n.getContents()) |
| l10n_entities, l10n_map = p.parse() |
| except Exception, e: |
| self.notify('error', l10n, str(e)) |
| return |
| lines = [] |
| |
| def _getLine(offset): |
| if not lines: |
| lines.append(0) |
| for m in self.nl.finditer(p.contents): |
| lines.append(m.end()) |
| for i in xrange(len(lines), 0, -1): |
| if offset >= lines[i - 1]: |
| return (i, offset - lines[i - 1]) |
| return (1, offset) |
| |
| l10n_list = l10n_map.keys() |
| l10n_list.sort() |
| ar = AddRemove() |
| ar.set_left(ref_list) |
| ar.set_right(l10n_list) |
| report = missing = obsolete = changed = unchanged = keys = 0 |
| missings = [] |
| skips = [] |
| checker = getChecker(l10n, reference=ref[0]) |
| for action, item_or_pair in ar: |
| if action == 'delete': |
| # missing entity |
| _rv = self.notify('missingEntity', l10n, item_or_pair) |
| if _rv == "ignore": |
| continue |
| if _rv == "error": |
| # only add to missing entities for l10n-merge on error, |
| # not report |
| missings.append(item_or_pair) |
| missing += 1 |
| else: |
| # just report |
| report += 1 |
| elif action == 'add': |
| # obsolete entity or junk |
| if isinstance(l10n_entities[l10n_map[item_or_pair]], |
| parser.Junk): |
| junk = l10n_entities[l10n_map[item_or_pair]] |
| params = (junk.val,) + junk.span |
| self.notify('error', l10n, |
| 'Unparsed content "%s" at %d-%d' % params) |
| elif self.notify('obsoleteEntity', l10n, |
| item_or_pair) != 'ignore': |
| obsolete += 1 |
| else: |
| # entity found in both ref and l10n, check for changed |
| entity = item_or_pair[0] |
| refent = ref[0][ref[1][entity]] |
| l10nent = l10n_entities[l10n_map[entity]] |
| if self.keyRE.search(entity): |
| keys += 1 |
| else: |
| if refent.val == l10nent.val: |
| self.doUnchanged(l10nent) |
| unchanged += 1 |
| else: |
| self.doChanged(ref_file, refent, l10nent) |
| changed += 1 |
| # run checks: |
| if checker: |
| for tp, pos, msg, cat in checker.check(refent, l10nent): |
| # compute real src position, if first line, |
| # col needs adjustment |
| _l, _offset = _getLine(l10nent.val_span[0]) |
| if isinstance(pos, tuple): |
| # line, column |
| if pos[0] == 1: |
| col = pos[1] + _offset |
| else: |
| col = pos[1] |
| _l += pos[0] - 1 |
| else: |
| _l, col = _getLine(l10nent.val_span[0] + pos) |
| # skip error entities when merging |
| if tp == 'error' and self.merge_stage is not None: |
| skips.append(l10nent) |
| self.notify(tp, l10n, |
| u"%s at line %d, column %d for %s" % |
| (msg, _l, col, refent.key)) |
| pass |
| if missing: |
| self.notify('missing', l10n, missing) |
| if self.merge_stage is not None and (missings or skips): |
| self.merge(ref[0], ref[1], ref_file, l10n, missings, skips, p) |
| if report: |
| self.notify('report', l10n, report) |
| if obsolete: |
| self.notify('obsolete', l10n, obsolete) |
| if changed: |
| self.notify('changed', l10n, changed) |
| if unchanged: |
| self.notify('unchanged', l10n, unchanged) |
| if keys: |
| self.notify('keys', l10n, keys) |
| pass |
| |
| def add(self, orig, missing): |
| if self.notify('missingFile', missing, None) == "ignore": |
| # filter said that we don't need this file, don't count it |
| return |
| f = orig |
| try: |
| p = parser.getParser(f.file) |
| except UserWarning: |
| return |
| try: |
| p.readContents(f.getContents()) |
| entities, map = p.parse() |
| except Exception, e: |
| self.notify('error', f, str(e)) |
| return |
| self.notify('missingInFiles', missing, len(map)) |
| |
| def doUnchanged(self, entity): |
| # overload this if needed |
| pass |
| |
| def doChanged(self, file, ref_entity, l10n_entity): |
| # overload this if needed |
| pass |
| |
| |
| def compareApp(app, other_observer=None, merge_stage=None, clobber=False): |
| '''Compare locales set in app. |
| |
| Optional arguments are: |
| - other_observer. A object implementing |
| notify(category, _file, data) |
| The return values of that callback are ignored. |
| - merge_stage. A directory to be used for staging the output of |
| l10n-merge. |
| - clobber. Clobber the module subdirectories of the merge dir as we go. |
| Use wisely, as it might cause data loss. |
| ''' |
| comparer = ContentComparer() |
| if other_observer is not None: |
| comparer.add_observer(other_observer) |
| comparer.observer.filter = app.filter |
| for module, reference, locales in app: |
| dir_comp = DirectoryCompare(reference) |
| dir_comp.setWatcher(comparer) |
| for _, localization in locales: |
| if merge_stage is not None: |
| locale_merge = merge_stage.format(ab_CD=localization.locale) |
| comparer.set_merge_stage(locale_merge) |
| if clobber: |
| # if clobber on, remove the stage for the module if it exists |
| clobberdir = os.path.join(locale_merge, module) |
| if os.path.exists(clobberdir): |
| shutil.rmtree(clobberdir) |
| print "clobbered " + clobberdir |
| dir_comp.compareWith(localization) |
| return comparer.observer |
| |
| |
| def compareDirs(reference, locale, other_observer=None, merge_stage=None): |
| '''Compare reference and locale dir. |
| |
| Optional arguments are: |
| - other_observer. A object implementing |
| notify(category, _file, data) |
| The return values of that callback are ignored. |
| ''' |
| comparer = ContentComparer() |
| if other_observer is not None: |
| comparer.add_observer(other_observer) |
| comparer.set_merge_stage(merge_stage) |
| dir_comp = DirectoryCompare(paths.EnumerateDir(reference)) |
| dir_comp.setWatcher(comparer) |
| dir_comp.compareWith(paths.EnumerateDir(locale)) |
| return comparer.observer |