src/third_party/mozjs-45/python/compare-locales/compare_locales/checks.py - cobalt - Git at Google

 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.

 import re
 from difflib import SequenceMatcher
 from xml import sax
 try:
     from cStringIO import StringIO
 except ImportError:
     from StringIO import StringIO

 from compare_locales.parser import DTDParser, PropertiesParser


 class Checker(object):
     '''Abstract class to implement checks per file type.
     '''
     pattern = None

     @classmethod
     def use(cls, file):
         return cls.pattern.match(file.file)

     def check(self, refEnt, l10nEnt):
         '''Given the reference and localized Entities, performs checks.

         This is a generator yielding tuples of
         - "warning" or "error", depending on what should be reported,
         - tuple of line, column info for the error within the string
         - description string to be shown in the report
         '''
         if True:
             raise NotImplementedError("Need to subclass")
         yield ("error", (0, 0), "This is an example error", "example")


 class PrintfException(Exception):
     def __init__(self, msg, pos):
         self.pos = pos
         self.msg = msg


 class PropertiesChecker(Checker):
     '''Tests to run on .properties files.
     '''
     pattern = re.compile('.*\.properties$')
     printf = re.compile(r'%(?P<good>%|'
                         r'(?:(?P<number>[1-9][0-9]*)\$)?'
                         r'(?P<width>\*|[0-9]+)?'
                         r'(?P<prec>\.(?:\*|[0-9]+)?)?'
                         r'(?P<spec>[duxXosScpfg]))?')

     def check(self, refEnt, l10nEnt):
         '''Test for the different variable formats.
         '''
         refValue, l10nValue = refEnt.val, l10nEnt.val
         refSpecs = None
         # check for PluralForm.jsm stuff, should have the docs in the
         # comment
         if 'Localization_and_Plurals' in refEnt.pre_comment:
             # For plurals, common variable pattern is #1. Try that.
             pats = set(int(m.group(1)) for m in re.finditer('#([0-9]+)',
                                                             refValue))
             if len(pats) == 0:
                 return
             lpats = set(int(m.group(1)) for m in re.finditer('#([0-9]+)',
                                                              l10nValue))
             if pats - lpats:
                 yield ('warning', 0, 'not all variables used in l10n',
                        'plural')
                 return
             if lpats - pats:
                 yield ('error', 0, 'unreplaced variables in l10n',
                        'plural')
                 return
             return
         # check for lost escapes
         raw_val = l10nEnt.raw_val
         for m in PropertiesParser.escape.finditer(raw_val):
             if m.group('single') and \
                m.group('single') not in PropertiesParser.known_escapes:
                 yield ('warning', m.start(),
                        'unknown escape sequence, \\' + m.group('single'),
                        'escape')
         try:
             refSpecs = self.getPrintfSpecs(refValue)
         except PrintfException:
             refSpecs = []
         if refSpecs:
             for t in self.checkPrintf(refSpecs, l10nValue):
                 yield t
             return

     def checkPrintf(self, refSpecs, l10nValue):
         try:
             l10nSpecs = self.getPrintfSpecs(l10nValue)
         except PrintfException, e:
             yield ('error', e.pos, e.msg, 'printf')
             return
         if refSpecs != l10nSpecs:
             sm = SequenceMatcher()
             sm.set_seqs(refSpecs, l10nSpecs)
             msgs = []
             warn = None
             for action, i1, i2, j1, j2 in sm.get_opcodes():
                 if action == 'equal':
                     continue
                 if action == 'delete':
                     # missing argument in l10n
                     if i2 == len(refSpecs):
                         # trailing specs missing, that's just a warning
                         warn = ', '.join('trailing argument %d `%s` missing' %
                                          (i+1, refSpecs[i])
                                          for i in xrange(i1, i2))
                     else:
                         for i in xrange(i1, i2):
                             msgs.append('argument %d `%s` missing' %
                                         (i+1, refSpecs[i]))
                     continue
                 if action == 'insert':
                     # obsolete argument in l10n
                     for i in xrange(j1, j2):
                         msgs.append('argument %d `%s` obsolete' %
                                     (i+1, l10nSpecs[i]))
                     continue
                 if action == 'replace':
                     for i, j in zip(xrange(i1, i2), xrange(j1, j2)):
                         msgs.append('argument %d `%s` should be `%s`' %
                                     (j+1, l10nSpecs[j], refSpecs[i]))
             if msgs:
                 yield ('error', 0, ', '.join(msgs), 'printf')
             if warn is not None:
                 yield ('warning', 0, warn, 'printf')

     def getPrintfSpecs(self, val):
         hasNumber = False
         specs = []
         for m in self.printf.finditer(val):
             if m.group("good") is None:
                 # found just a '%', signal an error
                 raise PrintfException('Found single %', m.start())
             if m.group("good") == '%':
                 # escaped %
                 continue
             if ((hasNumber and m.group('number') is None) or
                     (not hasNumber and specs and
                      m.group('number') is not None)):
                 # mixed style, numbered and not
                 raise PrintfException('Mixed ordered and non-ordered args',
                                       m.start())
             hasNumber = m.group('number') is not None
             if hasNumber:
                 pos = int(m.group('number')) - 1
                 ls = len(specs)
                 if pos >= ls:
                     # pad specs
                     nones = pos - ls
                     specs[ls:pos] = nones*[None]
                     specs.append(m.group('spec'))
                 else:
                     if specs[pos] is not None:
                         raise PrintfException('Double ordered argument %d' %
                                               (pos+1),
                                               m.start())
                     specs[pos] = m.group('spec')
             else:
                 specs.append(m.group('spec'))
         # check for missing args
         if hasNumber and not all(specs):
             raise PrintfException('Ordered argument missing', 0)
         return specs


 class DTDChecker(Checker):
     """Tests to run on DTD files.

     Uses xml.sax for the heavy lifting of xml parsing.

     The code tries to parse until it doesn't find any unresolved entities
     anymore. If it finds one, it tries to grab the key, and adds an empty
     <!ENTITY key ""> definition to the header.

     Also checks for some CSS and number heuristics in the values.
     """
     pattern = re.compile('.*\.dtd$')

     eref = re.compile('&(%s);' % DTDParser.Name)
     tmpl = '''<!DOCTYPE elem [%s]>
 <elem>%s</elem>
 '''
     xmllist = set(('amp', 'lt', 'gt', 'apos', 'quot'))

     def __init__(self, reference):
         self.reference = reference
         self.__known_entities = None

     def known_entities(self, refValue):
         if self.__known_entities is None and self.reference is not None:
             self.__known_entities = set()
             for ent in self.reference:
                 self.__known_entities.update(self.entities_for_value(ent.val))
         return self.__known_entities if self.__known_entities is not None \
             else self.entities_for_value(refValue)

     def entities_for_value(self, value):
         reflist = set(m.group(1).encode('utf-8')
                       for m in self.eref.finditer(value))
         reflist -= self.xmllist
         return reflist

     # Setup for XML parser, with default and text-only content handler
     class TextContent(sax.handler.ContentHandler):
         textcontent = ''

         def characters(self, content):
             self.textcontent += content

     defaulthandler = sax.handler.ContentHandler()
     texthandler = TextContent()

     numPattern = r'([0-9]+|[0-9]*\.[0-9]+)'
     num = re.compile('^%s$' % numPattern)
     lengthPattern = '%s(em|px|ch|cm|in)' % numPattern
     length = re.compile('^%s$' % lengthPattern)
     spec = re.compile(r'((?:min\-)?(?:width|height))\s*:\s*%s' %
                       lengthPattern)
     style = re.compile(r'^%(spec)s\s*(;\s*%(spec)s\s*)*;?$' %
                        {'spec': spec.pattern})

     processContent = None

     def check(self, refEnt, l10nEnt):
         """Try to parse the refvalue inside a dummy element, and keep
         track of entities that we need to define to make that work.

         Return a checker that offers just those entities.
         """
         refValue, l10nValue = refEnt.val, l10nEnt.val
         # find entities the refValue references,
         # reusing markup from DTDParser.
         reflist = self.known_entities(refValue)
         entities = ''.join('<!ENTITY %s "">' % s for s in sorted(reflist))
         parser = sax.make_parser()
         parser.setFeature(sax.handler.feature_external_ges, False)

         parser.setContentHandler(self.defaulthandler)
         try:
             parser.parse(StringIO(self.tmpl %
                                   (entities, refValue.encode('utf-8'))))
             # also catch stray %
             parser.parse(StringIO(self.tmpl %
                                   (refEnt.all.encode('utf-8') + entities,
                                    '&%s;' % refEnt.key.encode('utf-8'))))
         except sax.SAXParseException, e:
             yield ('warning',
                    (0, 0),
                    "can't parse en-US value", 'xmlparse')

         # find entities the l10nValue references,
         # reusing markup from DTDParser.
         l10nlist = self.entities_for_value(l10nValue)
         missing = sorted(l10nlist - reflist)
         _entities = entities + ''.join('<!ENTITY %s "">' % s for s in missing)
         warntmpl = u'Referencing unknown entity `%s`'
         if reflist:
             warntmpl += ' (%s known)' % ', '.join(sorted(reflist))
         if self.processContent is not None:
             self.texthandler.textcontent = ''
             parser.setContentHandler(self.texthandler)
         try:
             parser.parse(StringIO(self.tmpl % (_entities,
                          l10nValue.encode('utf-8'))))
             # also catch stray %
             # if this fails, we need to substract the entity definition
             parser.setContentHandler(self.defaulthandler)
             parser.parse(StringIO(self.tmpl % (
                 l10nEnt.all.encode('utf-8') + _entities,
                 '&%s;' % l10nEnt.key.encode('utf-8'))))
         except sax.SAXParseException, e:
             # xml parse error, yield error
             # sometimes, the error is reported on our fake closing
             # element, make that the end of the last line
             lnr = e.getLineNumber() - 1
             lines = l10nValue.splitlines()
             if lnr > len(lines):
                 lnr = len(lines)
                 col = len(lines[lnr-1])
             else:
                 col = e.getColumnNumber()
                 if lnr == 1:
                     # first line starts with <elem>, substract
                     col -= len("<elem>")
                 elif lnr == 0:
                     col -= len("<!DOCTYPE elem [")  # first line is DOCTYPE
             yield ('error', (lnr, col), ' '.join(e.args), 'xmlparse')

         for key in missing:
             yield ('warning', (0, 0), warntmpl % key.decode('utf-8'),
                    'xmlparse')

         # Number check
         if self.num.match(refValue) and not self.num.match(l10nValue):
             yield ('warning', 0, 'reference is a number', 'number')
         # CSS checks
         # just a length, width="100em"
         if self.length.match(refValue) and not self.length.match(l10nValue):
             yield ('error', 0, 'reference is a CSS length', 'css')
         # real CSS spec, style="width:100px;"
         if self.style.match(refValue):
             if not self.style.match(l10nValue):
                 yield ('error', 0, 'reference is a CSS spec', 'css')
             else:
                 # warn if different properties or units
                 refMap = dict((s, u) for s, _, u in
                               self.spec.findall(refValue))
                 msgs = []
                 for s, _, u in self.spec.findall(l10nValue):
                     if s not in refMap:
                         msgs.insert(0, '%s only in l10n' % s)
                         continue
                     else:
                         ru = refMap.pop(s)
                         if u != ru:
                             msgs.append("units for %s don't match "
                                         "(%s != %s)" % (s, u, ru))
                 for s in refMap.iterkeys():
                     msgs.insert(0, '%s only in reference' % s)
                 if msgs:
                     yield ('warning', 0, ', '.join(msgs), 'css')

         if self.processContent is not None:
             for t in self.processContent(self.texthandler.textcontent):
                 yield t


 class PrincessAndroid(DTDChecker):
     """Checker for the string values that Android puts into an XML container.

     http://developer.android.com/guide/topics/resources/string-resource.html#FormattingAndStyling  # noqa
     has more info. Check for unescaped apostrophes and bad unicode escapes.
     """
     quoted = re.compile("(?P<q>[\"']).*(?P=q)$")

     def unicode_escape(self, str):
         """Helper method to try to decode all unicode escapes in a string.

         This code uses the standard python decode for unicode-escape, but
         that's somewhat tricky, as its input needs to be ascii. To get to
         ascii, the unicode string gets converted to ascii with
         backslashreplace, i.e., all non-ascii unicode chars get unicode
         escaped. And then we try to roll all of that back.
         Now, when that hits an error, that's from the original string, and we
         need to search for the actual error position in the original string,
         as the backslashreplace code changes string positions quite badly.
         See also the last check in TestAndroid.test_android_dtd, with a
         lengthy chinese string.
         """
         val = str.encode('ascii', 'backslashreplace')
         try:
             val.decode('unicode-escape')
         except UnicodeDecodeError, e:
             args = list(e.args)
             badstring = args[1][args[2]:args[3]]
             i = len(args[1][:args[2]].decode('unicode-escape'))
             args[2] = i
             args[3] = i + len(badstring)
             raise UnicodeDecodeError(*args)

     @classmethod
     def use(cls, file):
         """Use this Checker only for DTD files in embedding/android."""
         return (file.module in ("embedding/android",
                                 "mobile/android/base")
                 and cls.pattern.match(file.file))

     def processContent(self, val):
         """Actual check code.
         Check for unicode escapes and unescaped quotes and apostrophes,
         if string's not quoted.
         """
         # first, try to decode unicode escapes
         try:
             self.unicode_escape(val)
         except UnicodeDecodeError, e:
             yield ('error', e.args[2], e.args[4], 'android')
         # check for unescaped single or double quotes.
         # first, see if the complete string is single or double quoted,
         # that changes the rules
         m = self.quoted.match(val)
         if m:
             q = m.group('q')
             offset = 0
             val = val[1:-1]  # strip quotes
         else:
             q = "[\"']"
             offset = -1
         stray_quot = re.compile(r"[\\\\]*(%s)" % q)

         for m in stray_quot.finditer(val):
             if len(m.group(0)) % 2:
                 # found an unescaped single or double quote, which message?
                 if m.group(1) == '"':
                     msg = u"Quotes in Android DTDs need escaping with \\\" "\
                           u"or \\u0022, or put string in apostrophes."
                 else:
                     msg = u"Apostrophes in Android DTDs need escaping with "\
                           u"\\' or \\u0027, or use \u2019, or put string in "\
                           u"quotes."
                 yield ('error', m.end(0)+offset, msg, 'android')


 def getChecker(file, reference=None):
     if PropertiesChecker.use(file):
         return PropertiesChecker()
     if PrincessAndroid.use(file):
         return PrincessAndroid(reference)
     if DTDChecker.use(file):
         return DTDChecker(reference)
     return None
	# This Source Code Form is subject to the terms of the Mozilla Public
	# License, v. 2.0. If a copy of the MPL was not distributed with this
	# file, You can obtain one at http://mozilla.org/MPL/2.0/.

	import re
	from difflib import SequenceMatcher
	from xml import sax
	try:
	from cStringIO import StringIO
	except ImportError:
	from StringIO import StringIO

	from compare_locales.parser import DTDParser, PropertiesParser


	class Checker(object):
	'''Abstract class to implement checks per file type.
	'''
	pattern = None

	@classmethod
	def use(cls, file):
	return cls.pattern.match(file.file)

	def check(self, refEnt, l10nEnt):
	'''Given the reference and localized Entities, performs checks.

	This is a generator yielding tuples of
	- "warning" or "error", depending on what should be reported,
	- tuple of line, column info for the error within the string
	- description string to be shown in the report
	'''
	if True:
	raise NotImplementedError("Need to subclass")
	yield ("error", (0, 0), "This is an example error", "example")


	class PrintfException(Exception):
	def __init__(self, msg, pos):
	self.pos = pos
	self.msg = msg


	class PropertiesChecker(Checker):
	'''Tests to run on .properties files.
	'''
	pattern = re.compile('.*\.properties$')
	printf = re.compile(r'%(?P<good>%\|'
	r'(?:(?P<number>[1-9][0-9]*)\$)?'
	r'(?P<width>\*\|[0-9]+)?'
	r'(?P<prec>\.(?:\*\|[0-9]+)?)?'
	r'(?P<spec>[duxXosScpfg]))?')

	def check(self, refEnt, l10nEnt):
	'''Test for the different variable formats.
	'''
	refValue, l10nValue = refEnt.val, l10nEnt.val
	refSpecs = None
	# check for PluralForm.jsm stuff, should have the docs in the
	# comment
	if 'Localization_and_Plurals' in refEnt.pre_comment:
	# For plurals, common variable pattern is #1. Try that.
	pats = set(int(m.group(1)) for m in re.finditer('#([0-9]+)',
	refValue))
	if len(pats) == 0:
	return
	lpats = set(int(m.group(1)) for m in re.finditer('#([0-9]+)',
	l10nValue))
	if pats - lpats:
	yield ('warning', 0, 'not all variables used in l10n',
	'plural')
	return
	if lpats - pats:
	yield ('error', 0, 'unreplaced variables in l10n',
	'plural')
	return
	return
	# check for lost escapes
	raw_val = l10nEnt.raw_val
	for m in PropertiesParser.escape.finditer(raw_val):
	if m.group('single') and \
	m.group('single') not in PropertiesParser.known_escapes:
	yield ('warning', m.start(),
	'unknown escape sequence, \\' + m.group('single'),
	'escape')
	try:
	refSpecs = self.getPrintfSpecs(refValue)
	except PrintfException:
	refSpecs = []
	if refSpecs:
	for t in self.checkPrintf(refSpecs, l10nValue):
	yield t
	return

	def checkPrintf(self, refSpecs, l10nValue):
	try:
	l10nSpecs = self.getPrintfSpecs(l10nValue)
	except PrintfException, e:
	yield ('error', e.pos, e.msg, 'printf')
	return
	if refSpecs != l10nSpecs:
	sm = SequenceMatcher()
	sm.set_seqs(refSpecs, l10nSpecs)
	msgs = []
	warn = None
	for action, i1, i2, j1, j2 in sm.get_opcodes():
	if action == 'equal':
	continue
	if action == 'delete':
	# missing argument in l10n
	if i2 == len(refSpecs):
	# trailing specs missing, that's just a warning
	warn = ', '.join('trailing argument %d `%s` missing' %
	(i+1, refSpecs[i])
	for i in xrange(i1, i2))
	else:
	for i in xrange(i1, i2):
	msgs.append('argument %d `%s` missing' %
	(i+1, refSpecs[i]))
	continue
	if action == 'insert':
	# obsolete argument in l10n
	for i in xrange(j1, j2):
	msgs.append('argument %d `%s` obsolete' %
	(i+1, l10nSpecs[i]))
	continue
	if action == 'replace':
	for i, j in zip(xrange(i1, i2), xrange(j1, j2)):
	msgs.append('argument %d `%s` should be `%s`' %
	(j+1, l10nSpecs[j], refSpecs[i]))
	if msgs:
	yield ('error', 0, ', '.join(msgs), 'printf')
	if warn is not None:
	yield ('warning', 0, warn, 'printf')

	def getPrintfSpecs(self, val):
	hasNumber = False
	specs = []
	for m in self.printf.finditer(val):
	if m.group("good") is None:
	# found just a '%', signal an error
	raise PrintfException('Found single %', m.start())
	if m.group("good") == '%':
	# escaped %
	continue
	if ((hasNumber and m.group('number') is None) or
	(not hasNumber and specs and
	m.group('number') is not None)):
	# mixed style, numbered and not
	raise PrintfException('Mixed ordered and non-ordered args',
	m.start())
	hasNumber = m.group('number') is not None
	if hasNumber:
	pos = int(m.group('number')) - 1
	ls = len(specs)
	if pos >= ls:
	# pad specs
	nones = pos - ls
	specs[ls:pos] = nones*[None]
	specs.append(m.group('spec'))
	else:
	if specs[pos] is not None:
	raise PrintfException('Double ordered argument %d' %
	(pos+1),
	m.start())
	specs[pos] = m.group('spec')
	else:
	specs.append(m.group('spec'))
	# check for missing args
	if hasNumber and not all(specs):
	raise PrintfException('Ordered argument missing', 0)
	return specs


	class DTDChecker(Checker):
	"""Tests to run on DTD files.

	Uses xml.sax for the heavy lifting of xml parsing.

	The code tries to parse until it doesn't find any unresolved entities
	anymore. If it finds one, it tries to grab the key, and adds an empty
	<!ENTITY key ""> definition to the header.

	Also checks for some CSS and number heuristics in the values.
	"""
	pattern = re.compile('.*\.dtd$')

	eref = re.compile('&(%s);' % DTDParser.Name)
	tmpl = '''<!DOCTYPE elem [%s]>
	<elem>%s</elem>
	'''
	xmllist = set(('amp', 'lt', 'gt', 'apos', 'quot'))

	def __init__(self, reference):
	self.reference = reference
	self.__known_entities = None

	def known_entities(self, refValue):
	if self.__known_entities is None and self.reference is not None:
	self.__known_entities = set()
	for ent in self.reference:
	self.__known_entities.update(self.entities_for_value(ent.val))
	return self.__known_entities if self.__known_entities is not None \
	else self.entities_for_value(refValue)

	def entities_for_value(self, value):
	reflist = set(m.group(1).encode('utf-8')
	for m in self.eref.finditer(value))
	reflist -= self.xmllist
	return reflist

	# Setup for XML parser, with default and text-only content handler
	class TextContent(sax.handler.ContentHandler):
	textcontent = ''

	def characters(self, content):
	self.textcontent += content

	defaulthandler = sax.handler.ContentHandler()
	texthandler = TextContent()

	numPattern = r'([0-9]+\|[0-9]*\.[0-9]+)'
	num = re.compile('^%s$' % numPattern)
	lengthPattern = '%s(em\|px\|ch\|cm\|in)' % numPattern
	length = re.compile('^%s$' % lengthPattern)
	spec = re.compile(r'((?:min\-)?(?:width\|height))\s:\s%s' %
	lengthPattern)
	style = re.compile(r'^%(spec)s\s(;\s%(spec)s\s);?$' %
	{'spec': spec.pattern})

	processContent = None

	def check(self, refEnt, l10nEnt):
	"""Try to parse the refvalue inside a dummy element, and keep
	track of entities that we need to define to make that work.

	Return a checker that offers just those entities.
	"""
	refValue, l10nValue = refEnt.val, l10nEnt.val
	# find entities the refValue references,
	# reusing markup from DTDParser.
	reflist = self.known_entities(refValue)
	entities = ''.join('<!ENTITY %s "">' % s for s in sorted(reflist))
	parser = sax.make_parser()
	parser.setFeature(sax.handler.feature_external_ges, False)

	parser.setContentHandler(self.defaulthandler)
	try:
	parser.parse(StringIO(self.tmpl %
	(entities, refValue.encode('utf-8'))))
	# also catch stray %
	parser.parse(StringIO(self.tmpl %
	(refEnt.all.encode('utf-8') + entities,
	'&%s;' % refEnt.key.encode('utf-8'))))
	except sax.SAXParseException, e:
	yield ('warning',
	(0, 0),
	"can't parse en-US value", 'xmlparse')

	# find entities the l10nValue references,
	# reusing markup from DTDParser.
	l10nlist = self.entities_for_value(l10nValue)
	missing = sorted(l10nlist - reflist)
	_entities = entities + ''.join('<!ENTITY %s "">' % s for s in missing)
	warntmpl = u'Referencing unknown entity `%s`'
	if reflist:
	warntmpl += ' (%s known)' % ', '.join(sorted(reflist))
	if self.processContent is not None:
	self.texthandler.textcontent = ''
	parser.setContentHandler(self.texthandler)
	try:
	parser.parse(StringIO(self.tmpl % (_entities,
	l10nValue.encode('utf-8'))))
	# also catch stray %
	# if this fails, we need to substract the entity definition
	parser.setContentHandler(self.defaulthandler)
	parser.parse(StringIO(self.tmpl % (
	l10nEnt.all.encode('utf-8') + _entities,
	'&%s;' % l10nEnt.key.encode('utf-8'))))
	except sax.SAXParseException, e:
	# xml parse error, yield error
	# sometimes, the error is reported on our fake closing
	# element, make that the end of the last line
	lnr = e.getLineNumber() - 1
	lines = l10nValue.splitlines()
	if lnr > len(lines):
	lnr = len(lines)
	col = len(lines[lnr-1])
	else:
	col = e.getColumnNumber()
	if lnr == 1:
	# first line starts with <elem>, substract
	col -= len("<elem>")
	elif lnr == 0:
	col -= len("<!DOCTYPE elem [") # first line is DOCTYPE
	yield ('error', (lnr, col), ' '.join(e.args), 'xmlparse')

	for key in missing:
	yield ('warning', (0, 0), warntmpl % key.decode('utf-8'),
	'xmlparse')

	# Number check
	if self.num.match(refValue) and not self.num.match(l10nValue):
	yield ('warning', 0, 'reference is a number', 'number')
	# CSS checks
	# just a length, width="100em"
	if self.length.match(refValue) and not self.length.match(l10nValue):
	yield ('error', 0, 'reference is a CSS length', 'css')
	# real CSS spec, style="width:100px;"
	if self.style.match(refValue):
	if not self.style.match(l10nValue):
	yield ('error', 0, 'reference is a CSS spec', 'css')
	else:
	# warn if different properties or units
	refMap = dict((s, u) for s, _, u in
	self.spec.findall(refValue))
	msgs = []
	for s, _, u in self.spec.findall(l10nValue):
	if s not in refMap:
	msgs.insert(0, '%s only in l10n' % s)
	continue
	else:
	ru = refMap.pop(s)
	if u != ru:
	msgs.append("units for %s don't match "
	"(%s != %s)" % (s, u, ru))
	for s in refMap.iterkeys():
	msgs.insert(0, '%s only in reference' % s)
	if msgs:
	yield ('warning', 0, ', '.join(msgs), 'css')

	if self.processContent is not None:
	for t in self.processContent(self.texthandler.textcontent):
	yield t


	class PrincessAndroid(DTDChecker):
	"""Checker for the string values that Android puts into an XML container.

	http://developer.android.com/guide/topics/resources/string-resource.html#FormattingAndStyling # noqa
	has more info. Check for unescaped apostrophes and bad unicode escapes.
	"""
	quoted = re.compile("(?P<q>[\"']).*(?P=q)$")

	def unicode_escape(self, str):
	"""Helper method to try to decode all unicode escapes in a string.

	This code uses the standard python decode for unicode-escape, but
	that's somewhat tricky, as its input needs to be ascii. To get to
	ascii, the unicode string gets converted to ascii with
	backslashreplace, i.e., all non-ascii unicode chars get unicode
	escaped. And then we try to roll all of that back.
	Now, when that hits an error, that's from the original string, and we
	need to search for the actual error position in the original string,
	as the backslashreplace code changes string positions quite badly.
	See also the last check in TestAndroid.test_android_dtd, with a
	lengthy chinese string.
	"""
	val = str.encode('ascii', 'backslashreplace')
	try:
	val.decode('unicode-escape')
	except UnicodeDecodeError, e:
	args = list(e.args)
	badstring = args[1][args[2]:args[3]]
	i = len(args[1][:args[2]].decode('unicode-escape'))
	args[2] = i
	args[3] = i + len(badstring)
	raise UnicodeDecodeError(*args)

	@classmethod
	def use(cls, file):
	"""Use this Checker only for DTD files in embedding/android."""
	return (file.module in ("embedding/android",
	"mobile/android/base")
	and cls.pattern.match(file.file))

	def processContent(self, val):
	"""Actual check code.
	Check for unicode escapes and unescaped quotes and apostrophes,
	if string's not quoted.
	"""
	# first, try to decode unicode escapes
	try:
	self.unicode_escape(val)
	except UnicodeDecodeError, e:
	yield ('error', e.args[2], e.args[4], 'android')
	# check for unescaped single or double quotes.
	# first, see if the complete string is single or double quoted,
	# that changes the rules
	m = self.quoted.match(val)
	if m:
	q = m.group('q')
	offset = 0
	val = val[1:-1] # strip quotes
	else:
	q = "[\"']"
	offset = -1
	stray_quot = re.compile(r"[\\\\]*(%s)" % q)

	for m in stray_quot.finditer(val):
	if len(m.group(0)) % 2:
	# found an unescaped single or double quote, which message?
	if m.group(1) == '"':
	msg = u"Quotes in Android DTDs need escaping with \\\" "\
	u"or \\u0022, or put string in apostrophes."
	else:
	msg = u"Apostrophes in Android DTDs need escaping with "\
	u"\\' or \\u0027, or use \u2019, or put string in "\
	u"quotes."
	yield ('error', m.end(0)+offset, msg, 'android')


	def getChecker(file, reference=None):
	if PropertiesChecker.use(file):
	return PropertiesChecker()
	if PrincessAndroid.use(file):
	return PrincessAndroid(reference)
	if DTDChecker.use(file):
	return DTDChecker(reference)
	return None