blob: 15befa1a34f7a51922ad8b80af23992822e5dd9c [file] [log] [blame]
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
import re
def _tokens2re(**tokens):
# Create a pattern for non-escaped tokens, in the form:
# (?<!\\)(?:a|b|c...)
# This is meant to match patterns a, b, or c, or ... if they are not
# preceded by a backslash.
# where a, b, c... are in the form
# (?P<name>pattern)
# which matches the pattern and captures it in a named match group.
# The group names and patterns are given as arguments.
all_tokens = '|'.join('(?P<%s>%s)' % (name, value)
for name, value in tokens.iteritems())
nonescaped = r'(?<!\\)(?:%s)' % all_tokens
# The final pattern matches either the above pattern, or an escaped
# backslash, captured in the "escape" match group.
return re.compile('(?:%s|%s)' % (nonescaped, r'(?P<escape>\\\\)'))
UNQUOTED_TOKENS_RE = _tokens2re(
whitespace=r'[\t\r\n ]+',
quote=r'[\'"]',
comment='#',
special=r'[<>&|`~(){}$;\*\?]',
backslashed=r'\\[^\\]',
)
DOUBLY_QUOTED_TOKENS_RE = _tokens2re(
quote='"',
backslashedquote=r'\\"',
special='\$',
backslashed=r'\\[^\\"]',
)
ESCAPED_NEWLINES_RE = re.compile(r'\\\n')
# This regexp contains the same characters as all those listed in
# UNQUOTED_TOKENS_RE. Please keep in sync.
SHELL_QUOTE_RE = re.compile('[\\\t\r\n \'\"#<>&|`~(){}$;\*\?]')
class MetaCharacterException(Exception):
def __init__(self, char):
self.char = char
class _ClineSplitter(object):
'''
Parses a given command line string and creates a list of command
and arguments, with wildcard expansion.
'''
def __init__(self, cline):
self.arg = None
self.cline = cline
self.result = []
self._parse_unquoted()
def _push(self, str):
'''
Push the given string as part of the current argument
'''
if self.arg is None:
self.arg = ''
self.arg += str
def _next(self):
'''
Finalize current argument, effectively adding it to the list.
'''
if self.arg is None:
return
self.result.append(self.arg)
self.arg = None
def _parse_unquoted(self):
'''
Parse command line remainder in the context of an unquoted string.
'''
while self.cline:
# Find the next token
m = UNQUOTED_TOKENS_RE.search(self.cline)
# If we find none, the remainder of the string can be pushed to
# the current argument and the argument finalized
if not m:
self._push(self.cline)
break
# The beginning of the string, up to the found token, is part of
# the current argument
if m.start():
self._push(self.cline[:m.start()])
self.cline = self.cline[m.end():]
match = {name: value
for name, value in m.groupdict().items() if value}
if 'quote' in match:
# " or ' start a quoted string
if match['quote'] == '"':
self._parse_doubly_quoted()
else:
self._parse_quoted()
elif 'comment' in match:
# Comments are ignored. The current argument can be finalized,
# and parsing stopped.
break
elif 'special' in match:
# Unquoted, non-escaped special characters need to be sent to a
# shell.
raise MetaCharacterException(match['special'])
elif 'whitespace' in match:
# Whitespaces terminate current argument.
self._next()
elif 'escape' in match:
# Escaped backslashes turn into a single backslash
self._push('\\')
elif 'backslashed' in match:
# Backslashed characters are unbackslashed
# e.g. echo \a -> a
self._push(match['backslashed'][1])
else:
raise Exception("Shouldn't reach here")
if self.arg:
self._next()
def _parse_quoted(self):
# Single quoted strings are preserved, except for the final quote
index = self.cline.find("'")
if index == -1:
raise Exception('Unterminated quoted string in command')
self._push(self.cline[:index])
self.cline = self.cline[index+1:]
def _parse_doubly_quoted(self):
if not self.cline:
raise Exception('Unterminated quoted string in command')
while self.cline:
m = DOUBLY_QUOTED_TOKENS_RE.search(self.cline)
if not m:
raise Exception('Unterminated quoted string in command')
self._push(self.cline[:m.start()])
self.cline = self.cline[m.end():]
match = {name: value
for name, value in m.groupdict().items() if value}
if 'quote' in match:
# a double quote ends the quoted string, so go back to
# unquoted parsing
return
elif 'special' in match:
# Unquoted, non-escaped special characters in a doubly quoted
# string still have a special meaning and need to be sent to a
# shell.
raise MetaCharacterException(match['special'])
elif 'escape' in match:
# Escaped backslashes turn into a single backslash
self._push('\\')
elif 'backslashedquote' in match:
# Backslashed double quotes are un-backslashed
self._push('"')
elif 'backslashed' in match:
# Backslashed characters are kept backslashed
self._push(match['backslashed'])
def split(cline):
'''
Split the given command line string.
'''
s = ESCAPED_NEWLINES_RE.sub('', cline)
return _ClineSplitter(s).result
def quote(s):
'''Given a string, returns a version that can be used literally on a shell
command line, enclosing it with single quotes if necessary.
As a special case, if given an int, returns a string containing the int,
not enclosed in quotes.
'''
if type(s) == int:
return '%d' % s
# Empty strings need to be quoted to have any significance
if s and not SHELL_QUOTE_RE.search(s):
return s
# Single quoted strings can contain any characters unescaped except the
# single quote itself, which can't even be escaped, so the string needs to
# be closed, an escaped single quote added, and reopened.
t = type(s)
return t("'%s'") % s.replace(t("'"), t("'\\''"))
__all__ = ['MetaCharacterException', 'split', 'quote']