1# This Source Code Form is subject to the terms of the Mozilla Public
2# License, v. 2.0. If a copy of the MPL was not distributed with this file,
3# You can obtain one at http://mozilla.org/MPL/2.0/.
4
5import re
6
7
8def _tokens2re(**tokens):
9    # Create a pattern for non-escaped tokens, in the form:
10    #   (?<!\\)(?:a|b|c...)
11    # This is meant to match patterns a, b, or c, or ... if they are not
12    # preceded by a backslash.
13    # where a, b, c... are in the form
14    #   (?P<name>pattern)
15    # which matches the pattern and captures it in a named match group.
16    # The group names and patterns are given as arguments.
17    all_tokens = '|'.join('(?P<%s>%s)' % (name, value)
18                          for name, value in tokens.iteritems())
19    nonescaped = r'(?<!\\)(?:%s)' % all_tokens
20
21    # The final pattern matches either the above pattern, or an escaped
22    # backslash, captured in the "escape" match group.
23    return re.compile('(?:%s|%s)' % (nonescaped, r'(?P<escape>\\\\)'))
24
25UNQUOTED_TOKENS_RE = _tokens2re(
26  whitespace=r'[\t\r\n ]+',
27  quote=r'[\'"]',
28  comment='#',
29  special=r'[<>&|`~(){}$;\*\?]',
30  backslashed=r'\\[^\\]',
31)
32
33DOUBLY_QUOTED_TOKENS_RE = _tokens2re(
34  quote='"',
35  backslashedquote=r'\\"',
36  special='\$',
37  backslashed=r'\\[^\\"]',
38)
39
40ESCAPED_NEWLINES_RE = re.compile(r'\\\n')
41
42# This regexp contains the same characters as all those listed in
43# UNQUOTED_TOKENS_RE. Please keep in sync.
44SHELL_QUOTE_RE = re.compile(r'[\\\t\r\n \'\"#<>&|`~(){}$;\*\?]')
45
46
47class MetaCharacterException(Exception):
48    def __init__(self, char):
49        self.char = char
50
51
52class _ClineSplitter(object):
53    '''
54    Parses a given command line string and creates a list of command
55    and arguments, with wildcard expansion.
56    '''
57    def __init__(self, cline):
58        self.arg = None
59        self.cline = cline
60        self.result = []
61        self._parse_unquoted()
62
63    def _push(self, str):
64        '''
65        Push the given string as part of the current argument
66        '''
67        if self.arg is None:
68            self.arg = ''
69        self.arg += str
70
71    def _next(self):
72        '''
73        Finalize current argument, effectively adding it to the list.
74        '''
75        if self.arg is None:
76            return
77        self.result.append(self.arg)
78        self.arg = None
79
80    def _parse_unquoted(self):
81        '''
82        Parse command line remainder in the context of an unquoted string.
83        '''
84        while self.cline:
85            # Find the next token
86            m = UNQUOTED_TOKENS_RE.search(self.cline)
87            # If we find none, the remainder of the string can be pushed to
88            # the current argument and the argument finalized
89            if not m:
90                self._push(self.cline)
91                break
92            # The beginning of the string, up to the found token, is part of
93            # the current argument
94            if m.start():
95                self._push(self.cline[:m.start()])
96            self.cline = self.cline[m.end():]
97
98            match = {name: value
99                     for name, value in m.groupdict().items() if value}
100            if 'quote' in match:
101                # " or ' start a quoted string
102                if match['quote'] == '"':
103                    self._parse_doubly_quoted()
104                else:
105                    self._parse_quoted()
106            elif 'comment' in match:
107                # Comments are ignored. The current argument can be finalized,
108                # and parsing stopped.
109                break
110            elif 'special' in match:
111                # Unquoted, non-escaped special characters need to be sent to a
112                # shell.
113                raise MetaCharacterException(match['special'])
114            elif 'whitespace' in match:
115                # Whitespaces terminate current argument.
116                self._next()
117            elif 'escape' in match:
118                # Escaped backslashes turn into a single backslash
119                self._push('\\')
120            elif 'backslashed' in match:
121                # Backslashed characters are unbackslashed
122                # e.g. echo \a -> a
123                self._push(match['backslashed'][1])
124            else:
125                raise Exception("Shouldn't reach here")
126        if self.arg:
127            self._next()
128
129    def _parse_quoted(self):
130        # Single quoted strings are preserved, except for the final quote
131        index = self.cline.find("'")
132        if index == -1:
133            raise Exception('Unterminated quoted string in command')
134        self._push(self.cline[:index])
135        self.cline = self.cline[index+1:]
136
137    def _parse_doubly_quoted(self):
138        if not self.cline:
139            raise Exception('Unterminated quoted string in command')
140        while self.cline:
141            m = DOUBLY_QUOTED_TOKENS_RE.search(self.cline)
142            if not m:
143                raise Exception('Unterminated quoted string in command')
144            self._push(self.cline[:m.start()])
145            self.cline = self.cline[m.end():]
146            match = {name: value
147                     for name, value in m.groupdict().items() if value}
148            if 'quote' in match:
149                # a double quote ends the quoted string, so go back to
150                # unquoted parsing
151                return
152            elif 'special' in match:
153                # Unquoted, non-escaped special characters in a doubly quoted
154                # string still have a special meaning and need to be sent to a
155                # shell.
156                raise MetaCharacterException(match['special'])
157            elif 'escape' in match:
158                # Escaped backslashes turn into a single backslash
159                self._push('\\')
160            elif 'backslashedquote' in match:
161                # Backslashed double quotes are un-backslashed
162                self._push('"')
163            elif 'backslashed' in match:
164                # Backslashed characters are kept backslashed
165                self._push(match['backslashed'])
166
167
168def split(cline):
169    '''
170    Split the given command line string.
171    '''
172    s = ESCAPED_NEWLINES_RE.sub('', cline)
173    return _ClineSplitter(s).result
174
175
176def _quote(s):
177    '''Given a string, returns a version that can be used literally on a shell
178    command line, enclosing it with single quotes if necessary.
179
180    As a special case, if given an int, returns a string containing the int,
181    not enclosed in quotes.
182    '''
183    if type(s) == int:
184        return '%d' % s
185
186    # Empty strings need to be quoted to have any significance
187    if s and not SHELL_QUOTE_RE.search(s):
188        return s
189
190    # Single quoted strings can contain any characters unescaped except the
191    # single quote itself, which can't even be escaped, so the string needs to
192    # be closed, an escaped single quote added, and reopened.
193    t = type(s)
194    return t("'%s'") % s.replace(t("'"), t("'\\''"))
195
196
197def quote(*strings):
198    '''Given one or more strings, returns a quoted string that can be used
199    literally on a shell command line.
200
201        >>> quote('a', 'b')
202        "a b"
203        >>> quote('a b', 'c')
204        "'a b' c"
205    '''
206    return ' '.join(_quote(s) for s in strings)
207
208
209__all__ = ['MetaCharacterException', 'split', 'quote']
210