1# This Source Code Form is subject to the terms of the Mozilla Public 2# License, v. 2.0. If a copy of the MPL was not distributed with this file, 3# You can obtain one at http://mozilla.org/MPL/2.0/. 4 5import re 6 7 8def _tokens2re(**tokens): 9 # Create a pattern for non-escaped tokens, in the form: 10 # (?<!\\)(?:a|b|c...) 11 # This is meant to match patterns a, b, or c, or ... if they are not 12 # preceded by a backslash. 13 # where a, b, c... are in the form 14 # (?P<name>pattern) 15 # which matches the pattern and captures it in a named match group. 16 # The group names and patterns are given as arguments. 17 all_tokens = '|'.join('(?P<%s>%s)' % (name, value) 18 for name, value in tokens.iteritems()) 19 nonescaped = r'(?<!\\)(?:%s)' % all_tokens 20 21 # The final pattern matches either the above pattern, or an escaped 22 # backslash, captured in the "escape" match group. 23 return re.compile('(?:%s|%s)' % (nonescaped, r'(?P<escape>\\\\)')) 24 25UNQUOTED_TOKENS_RE = _tokens2re( 26 whitespace=r'[\t\r\n ]+', 27 quote=r'[\'"]', 28 comment='#', 29 special=r'[<>&|`~(){}$;\*\?]', 30 backslashed=r'\\[^\\]', 31) 32 33DOUBLY_QUOTED_TOKENS_RE = _tokens2re( 34 quote='"', 35 backslashedquote=r'\\"', 36 special='\$', 37 backslashed=r'\\[^\\"]', 38) 39 40ESCAPED_NEWLINES_RE = re.compile(r'\\\n') 41 42# This regexp contains the same characters as all those listed in 43# UNQUOTED_TOKENS_RE. Please keep in sync. 44SHELL_QUOTE_RE = re.compile(r'[\\\t\r\n \'\"#<>&|`~(){}$;\*\?]') 45 46 47class MetaCharacterException(Exception): 48 def __init__(self, char): 49 self.char = char 50 51 52class _ClineSplitter(object): 53 ''' 54 Parses a given command line string and creates a list of command 55 and arguments, with wildcard expansion. 56 ''' 57 def __init__(self, cline): 58 self.arg = None 59 self.cline = cline 60 self.result = [] 61 self._parse_unquoted() 62 63 def _push(self, str): 64 ''' 65 Push the given string as part of the current argument 66 ''' 67 if self.arg is None: 68 self.arg = '' 69 self.arg += str 70 71 def _next(self): 72 ''' 73 Finalize current argument, effectively adding it to the list. 74 ''' 75 if self.arg is None: 76 return 77 self.result.append(self.arg) 78 self.arg = None 79 80 def _parse_unquoted(self): 81 ''' 82 Parse command line remainder in the context of an unquoted string. 83 ''' 84 while self.cline: 85 # Find the next token 86 m = UNQUOTED_TOKENS_RE.search(self.cline) 87 # If we find none, the remainder of the string can be pushed to 88 # the current argument and the argument finalized 89 if not m: 90 self._push(self.cline) 91 break 92 # The beginning of the string, up to the found token, is part of 93 # the current argument 94 if m.start(): 95 self._push(self.cline[:m.start()]) 96 self.cline = self.cline[m.end():] 97 98 match = {name: value 99 for name, value in m.groupdict().items() if value} 100 if 'quote' in match: 101 # " or ' start a quoted string 102 if match['quote'] == '"': 103 self._parse_doubly_quoted() 104 else: 105 self._parse_quoted() 106 elif 'comment' in match: 107 # Comments are ignored. The current argument can be finalized, 108 # and parsing stopped. 109 break 110 elif 'special' in match: 111 # Unquoted, non-escaped special characters need to be sent to a 112 # shell. 113 raise MetaCharacterException(match['special']) 114 elif 'whitespace' in match: 115 # Whitespaces terminate current argument. 116 self._next() 117 elif 'escape' in match: 118 # Escaped backslashes turn into a single backslash 119 self._push('\\') 120 elif 'backslashed' in match: 121 # Backslashed characters are unbackslashed 122 # e.g. echo \a -> a 123 self._push(match['backslashed'][1]) 124 else: 125 raise Exception("Shouldn't reach here") 126 if self.arg: 127 self._next() 128 129 def _parse_quoted(self): 130 # Single quoted strings are preserved, except for the final quote 131 index = self.cline.find("'") 132 if index == -1: 133 raise Exception('Unterminated quoted string in command') 134 self._push(self.cline[:index]) 135 self.cline = self.cline[index+1:] 136 137 def _parse_doubly_quoted(self): 138 if not self.cline: 139 raise Exception('Unterminated quoted string in command') 140 while self.cline: 141 m = DOUBLY_QUOTED_TOKENS_RE.search(self.cline) 142 if not m: 143 raise Exception('Unterminated quoted string in command') 144 self._push(self.cline[:m.start()]) 145 self.cline = self.cline[m.end():] 146 match = {name: value 147 for name, value in m.groupdict().items() if value} 148 if 'quote' in match: 149 # a double quote ends the quoted string, so go back to 150 # unquoted parsing 151 return 152 elif 'special' in match: 153 # Unquoted, non-escaped special characters in a doubly quoted 154 # string still have a special meaning and need to be sent to a 155 # shell. 156 raise MetaCharacterException(match['special']) 157 elif 'escape' in match: 158 # Escaped backslashes turn into a single backslash 159 self._push('\\') 160 elif 'backslashedquote' in match: 161 # Backslashed double quotes are un-backslashed 162 self._push('"') 163 elif 'backslashed' in match: 164 # Backslashed characters are kept backslashed 165 self._push(match['backslashed']) 166 167 168def split(cline): 169 ''' 170 Split the given command line string. 171 ''' 172 s = ESCAPED_NEWLINES_RE.sub('', cline) 173 return _ClineSplitter(s).result 174 175 176def _quote(s): 177 '''Given a string, returns a version that can be used literally on a shell 178 command line, enclosing it with single quotes if necessary. 179 180 As a special case, if given an int, returns a string containing the int, 181 not enclosed in quotes. 182 ''' 183 if type(s) == int: 184 return '%d' % s 185 186 # Empty strings need to be quoted to have any significance 187 if s and not SHELL_QUOTE_RE.search(s): 188 return s 189 190 # Single quoted strings can contain any characters unescaped except the 191 # single quote itself, which can't even be escaped, so the string needs to 192 # be closed, an escaped single quote added, and reopened. 193 t = type(s) 194 return t("'%s'") % s.replace(t("'"), t("'\\''")) 195 196 197def quote(*strings): 198 '''Given one or more strings, returns a quoted string that can be used 199 literally on a shell command line. 200 201 >>> quote('a', 'b') 202 "a b" 203 >>> quote('a b', 'c') 204 "'a b' c" 205 ''' 206 return ' '.join(_quote(s) for s in strings) 207 208 209__all__ = ['MetaCharacterException', 'split', 'quote'] 210