blob: f3061f255b9f851f74565d03ed65405d481e3fc9 [file] [log] [blame]
"""
This is a very primitive line based preprocessor, for times when using
a C preprocessor isn't an option.
"""
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import sys
import os
import os.path
import re
from optparse import OptionParser
import errno
# hack around win32 mangling our line endings
# http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/65443
if sys.platform == "win32":
import msvcrt
msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
os.linesep = '\n'
import Expression
__all__ = ['Preprocessor', 'preprocess']
class Preprocessor:
"""
Class for preprocessing text files.
"""
class Error(RuntimeError):
def __init__(self, cpp, MSG, context):
self.file = cpp.context['FILE']
self.line = cpp.context['LINE']
self.key = MSG
RuntimeError.__init__(self, (self.file, self.line, self.key, context))
def __init__(self):
self.context = Expression.Context()
for k,v in {'FILE': '',
'LINE': 0,
'DIRECTORY': os.path.abspath('.')}.iteritems():
self.context[k] = v
self.actionLevel = 0
self.disableLevel = 0
# ifStates can be
# 0: hadTrue
# 1: wantsTrue
# 2: #else found
self.ifStates = []
self.checkLineNumbers = False
self.writtenLines = 0
self.filters = []
self.cmds = {}
for cmd, level in {'define': 0,
'undef': 0,
'if': sys.maxint,
'ifdef': sys.maxint,
'ifndef': sys.maxint,
'else': 1,
'elif': 1,
'elifdef': 1,
'elifndef': 1,
'endif': sys.maxint,
'expand': 0,
'literal': 0,
'filter': 0,
'unfilter': 0,
'include': 0,
'includesubst': 0,
'error': 0}.iteritems():
self.cmds[cmd] = (level, getattr(self, 'do_' + cmd))
self.out = sys.stdout
self.setMarker('#')
self.LE = '\n'
self.varsubst = re.compile('@(?P<VAR>\w+)@', re.U)
def warnUnused(self, file):
if self.actionLevel == 0:
sys.stderr.write('{0}: WARNING: no preprocessor directives found\n'.format(file))
elif self.actionLevel == 1:
sys.stderr.write('{0}: WARNING: no useful preprocessor directives found\n'.format(file))
pass
def setLineEndings(self, aLE):
"""
Set the line endings to be used for output.
"""
self.LE = {'cr': '\x0D', 'lf': '\x0A', 'crlf': '\x0D\x0A'}[aLE]
def setMarker(self, aMarker):
"""
Set the marker to be used for processing directives.
Used for handling CSS files, with pp.setMarker('%'), for example.
The given marker may be None, in which case no markers are processed.
"""
self.marker = aMarker
if aMarker:
self.instruction = re.compile('{0}(?P<cmd>[a-z]+)(?:\s(?P<args>.*))?$'
.format(aMarker),
re.U)
self.comment = re.compile(aMarker, re.U)
else:
class NoMatch(object):
def match(self, *args):
return False
self.instruction = self.comment = NoMatch()
def clone(self):
"""
Create a clone of the current processor, including line ending
settings, marker, variable definitions, output stream.
"""
rv = Preprocessor()
rv.context.update(self.context)
rv.setMarker(self.marker)
rv.LE = self.LE
rv.out = self.out
return rv
def applyFilters(self, aLine):
for f in self.filters:
aLine = f[1](aLine)
return aLine
def write(self, aLine):
"""
Internal method for handling output.
"""
if self.checkLineNumbers:
self.writtenLines += 1
ln = self.context['LINE']
if self.writtenLines != ln:
self.out.write('//@line {line} "{file}"{le}'.format(line=ln,
file=self.context['FILE'],
le=self.LE))
self.writtenLines = ln
filteredLine = self.applyFilters(aLine)
if filteredLine != aLine:
self.actionLevel = 2
# ensure our line ending. Only need to handle \n, as we're reading
# with universal line ending support, at least for files.
filteredLine = re.sub('\n', self.LE, filteredLine)
self.out.write(filteredLine)
def handleCommandLine(self, args, defaultToStdin = False):
"""
Parse a commandline into this parser.
Uses OptionParser internally, no args mean sys.argv[1:].
"""
p = self.getCommandLineParser()
(options, args) = p.parse_args(args=args)
includes = options.I
if options.output:
dir = os.path.dirname(options.output)
if dir and not os.path.exists(dir):
try:
os.makedirs(dir)
except OSError as error:
if error.errno != errno.EEXIST:
raise
self.out = open(options.output, 'w')
if defaultToStdin and len(args) == 0:
args = [sys.stdin]
includes.extend(args)
if includes:
for f in includes:
self.do_include(f, False)
self.warnUnused(f)
pass
def getCommandLineParser(self, unescapeDefines = False):
escapedValue = re.compile('".*"$')
numberValue = re.compile('\d+$')
def handleE(option, opt, value, parser):
for k,v in os.environ.iteritems():
self.context[k] = v
def handleD(option, opt, value, parser):
vals = value.split('=', 1)
if len(vals) == 1:
vals.append(1)
elif unescapeDefines and escapedValue.match(vals[1]):
# strip escaped string values
vals[1] = vals[1][1:-1]
elif numberValue.match(vals[1]):
vals[1] = int(vals[1])
self.context[vals[0]] = vals[1]
def handleU(option, opt, value, parser):
del self.context[value]
def handleF(option, opt, value, parser):
self.do_filter(value)
def handleLE(option, opt, value, parser):
self.setLineEndings(value)
def handleMarker(option, opt, value, parser):
self.setMarker(value)
p = OptionParser()
p.add_option('-I', action='append', type="string", default = [],
metavar="FILENAME", help='Include file')
p.add_option('-E', action='callback', callback=handleE,
help='Import the environment into the defined variables')
p.add_option('-D', action='callback', callback=handleD, type="string",
metavar="VAR[=VAL]", help='Define a variable')
p.add_option('-U', action='callback', callback=handleU, type="string",
metavar="VAR", help='Undefine a variable')
p.add_option('-F', action='callback', callback=handleF, type="string",
metavar="FILTER", help='Enable the specified filter')
p.add_option('-o', '--output', type="string", default=None,
metavar="FILENAME", help='Output to the specified file '+
'instead of stdout')
p.add_option('--line-endings', action='callback', callback=handleLE,
type="string", metavar="[cr|lr|crlf]",
help='Use the specified line endings [Default: OS dependent]')
p.add_option('--marker', action='callback', callback=handleMarker,
type="string",
help='Use the specified marker instead of #')
return p
def handleLine(self, aLine):
"""
Handle a single line of input (internal).
"""
if self.actionLevel == 0 and self.comment.match(aLine):
self.actionLevel = 1
m = self.instruction.match(aLine)
if m:
args = None
cmd = m.group('cmd')
try:
args = m.group('args')
except IndexError:
pass
if cmd not in self.cmds:
raise Preprocessor.Error(self, 'INVALID_CMD', aLine)
level, cmd = self.cmds[cmd]
if (level >= self.disableLevel):
cmd(args)
if cmd != 'literal':
self.actionLevel = 2
elif self.disableLevel == 0 and not self.comment.match(aLine):
self.write(aLine)
pass
# Instruction handlers
# These are named do_'instruction name' and take one argument
# Variables
def do_define(self, args):
m = re.match('(?P<name>\w+)(?:\s(?P<value>.*))?', args, re.U)
if not m:
raise Preprocessor.Error(self, 'SYNTAX_DEF', args)
val = 1
if m.group('value'):
val = self.applyFilters(m.group('value'))
try:
val = int(val)
except:
pass
self.context[m.group('name')] = val
def do_undef(self, args):
m = re.match('(?P<name>\w+)$', args, re.U)
if not m:
raise Preprocessor.Error(self, 'SYNTAX_DEF', args)
if args in self.context:
del self.context[args]
# Logic
def ensure_not_else(self):
if len(self.ifStates) == 0 or self.ifStates[-1] == 2:
sys.stderr.write('WARNING: bad nesting of #else\n')
def do_if(self, args, replace=False):
if self.disableLevel and not replace:
self.disableLevel += 1
return
val = None
try:
e = Expression.Expression(args)
val = e.evaluate(self.context)
except Exception:
# XXX do real error reporting
raise Preprocessor.Error(self, 'SYNTAX_ERR', args)
if type(val) == str:
# we're looking for a number value, strings are false
val = False
if not val:
self.disableLevel = 1
if replace:
if val:
self.disableLevel = 0
self.ifStates[-1] = self.disableLevel
else:
self.ifStates.append(self.disableLevel)
pass
def do_ifdef(self, args, replace=False):
if self.disableLevel and not replace:
self.disableLevel += 1
return
if re.match('\W', args, re.U):
raise Preprocessor.Error(self, 'INVALID_VAR', args)
if args not in self.context:
self.disableLevel = 1
if replace:
if args in self.context:
self.disableLevel = 0
self.ifStates[-1] = self.disableLevel
else:
self.ifStates.append(self.disableLevel)
pass
def do_ifndef(self, args, replace=False):
if self.disableLevel and not replace:
self.disableLevel += 1
return
if re.match('\W', args, re.U):
raise Preprocessor.Error(self, 'INVALID_VAR', args)
if args in self.context:
self.disableLevel = 1
if replace:
if args not in self.context:
self.disableLevel = 0
self.ifStates[-1] = self.disableLevel
else:
self.ifStates.append(self.disableLevel)
pass
def do_else(self, args, ifState = 2):
self.ensure_not_else()
hadTrue = self.ifStates[-1] == 0
self.ifStates[-1] = ifState # in-else
if hadTrue:
self.disableLevel = 1
return
self.disableLevel = 0
def do_elif(self, args):
if self.disableLevel == 1:
if self.ifStates[-1] == 1:
self.do_if(args, replace=True)
else:
self.do_else(None, self.ifStates[-1])
def do_elifdef(self, args):
if self.disableLevel == 1:
if self.ifStates[-1] == 1:
self.do_ifdef(args, replace=True)
else:
self.do_else(None, self.ifStates[-1])
def do_elifndef(self, args):
if self.disableLevel == 1:
if self.ifStates[-1] == 1:
self.do_ifndef(args, replace=True)
else:
self.do_else(None, self.ifStates[-1])
def do_endif(self, args):
if self.disableLevel > 0:
self.disableLevel -= 1
if self.disableLevel == 0:
self.ifStates.pop()
# output processing
def do_expand(self, args):
lst = re.split('__(\w+)__', args, re.U)
do_replace = False
def vsubst(v):
if v in self.context:
return str(self.context[v])
return ''
for i in range(1, len(lst), 2):
lst[i] = vsubst(lst[i])
lst.append('\n') # add back the newline
self.write(reduce(lambda x, y: x+y, lst, ''))
def do_literal(self, args):
self.write(args + self.LE)
def do_filter(self, args):
filters = [f for f in args.split(' ') if hasattr(self, 'filter_' + f)]
if len(filters) == 0:
return
current = dict(self.filters)
for f in filters:
current[f] = getattr(self, 'filter_' + f)
filterNames = current.keys()
filterNames.sort()
self.filters = [(fn, current[fn]) for fn in filterNames]
return
def do_unfilter(self, args):
filters = args.split(' ')
current = dict(self.filters)
for f in filters:
if f in current:
del current[f]
filterNames = current.keys()
filterNames.sort()
self.filters = [(fn, current[fn]) for fn in filterNames]
return
# Filters
#
# emptyLines
# Strips blank lines from the output.
def filter_emptyLines(self, aLine):
if aLine == '\n':
return ''
return aLine
# slashslash
# Strips everything after //
def filter_slashslash(self, aLine):
if (aLine.find('//') == -1):
return aLine
[aLine, rest] = aLine.split('//', 1)
if rest:
aLine += '\n'
return aLine
# spaces
# Collapses sequences of spaces into a single space
def filter_spaces(self, aLine):
return re.sub(' +', ' ', aLine).strip(' ')
# substition
# helper to be used by both substition and attemptSubstitution
def filter_substitution(self, aLine, fatal=True):
def repl(matchobj):
varname = matchobj.group('VAR')
if varname in self.context:
return str(self.context[varname])
if fatal:
raise Preprocessor.Error(self, 'UNDEFINED_VAR', varname)
return matchobj.group(0)
return self.varsubst.sub(repl, aLine)
def filter_attemptSubstitution(self, aLine):
return self.filter_substitution(aLine, fatal=False)
# File ops
def do_include(self, args, filters=True):
"""
Preprocess a given file.
args can either be a file name, or a file-like object.
Files should be opened, and will be closed after processing.
"""
isName = type(args) == str or type(args) == unicode
oldWrittenLines = self.writtenLines
oldCheckLineNumbers = self.checkLineNumbers
self.checkLineNumbers = False
if isName:
try:
args = str(args)
if filters:
args = self.applyFilters(args)
if not os.path.isabs(args):
args = os.path.join(self.context['DIRECTORY'], args)
args = open(args, 'rU')
except Preprocessor.Error:
raise
except:
raise Preprocessor.Error(self, 'FILE_NOT_FOUND', str(args))
self.checkLineNumbers = bool(re.search('\.(js|jsm|java)(?:\.in)?$', args.name))
oldFile = self.context['FILE']
oldLine = self.context['LINE']
oldDir = self.context['DIRECTORY']
if args.isatty():
# we're stdin, use '-' and '' for file and dir
self.context['FILE'] = '-'
self.context['DIRECTORY'] = ''
else:
abspath = os.path.abspath(args.name)
self.context['FILE'] = abspath
self.context['DIRECTORY'] = os.path.dirname(abspath)
self.context['LINE'] = 0
self.writtenLines = 0
for l in args:
self.context['LINE'] += 1
self.handleLine(l)
args.close()
self.context['FILE'] = oldFile
self.checkLineNumbers = oldCheckLineNumbers
self.writtenLines = oldWrittenLines
self.context['LINE'] = oldLine
self.context['DIRECTORY'] = oldDir
def do_includesubst(self, args):
args = self.filter_substitution(args)
self.do_include(args)
def do_error(self, args):
raise Preprocessor.Error(self, 'Error: ', str(args))
def main():
pp = Preprocessor()
pp.handleCommandLine(None, True)
return
def preprocess(includes=[sys.stdin], defines={},
output = sys.stdout,
line_endings='\n', marker='#'):
pp = Preprocessor()
pp.context.update(defines)
pp.setLineEndings(line_endings)
pp.setMarker(marker)
pp.out = output
for f in includes:
pp.do_include(f, False)
if __name__ == "__main__":
main()