1# -*- coding: utf-8 -*- 2 3# This copy of shlex.py from Python 3.6 is distributed with argcomplete. 4# It contains only the shlex class, with modifications as noted. 5 6"""A lexical analyzer class for simple shell-like syntaxes.""" 7 8# Module and documentation by Eric S. Raymond, 21 Dec 1998 9# Input stacking and error message cleanup added by ESR, March 2000 10# push_source() and pop_source() made explicit by ESR, January 2001. 11# Posix compliance, split(), string arguments, and 12# iterator interface by Gustavo Niemeyer, April 2003. 13# changes to tokenize more like Posix shells by Vinay Sajip, July 2016. 14 15import os 16import sys 17from collections import deque 18 19# Modified by argcomplete: 2/3 compatibility 20# Note: cStringIO is not compatible with Unicode 21try: 22 from StringIO import StringIO 23except ImportError: 24 from io import StringIO 25 26# Modified by argcomplete: 2/3 compatibility 27try: 28 basestring 29except NameError: 30 basestring = str 31 32class shlex: 33 "A lexical analyzer class for simple shell-like syntaxes." 34 def __init__(self, instream=None, infile=None, posix=False, 35 punctuation_chars=False): 36 # Modified by argcomplete: 2/3 compatibility 37 if isinstance(instream, basestring): 38 instream = StringIO(instream) 39 if instream is not None: 40 self.instream = instream 41 self.infile = infile 42 else: 43 self.instream = sys.stdin 44 self.infile = None 45 self.posix = posix 46 if posix: 47 self.eof = None 48 else: 49 self.eof = '' 50 self.commenters = '#' 51 self.wordchars = ('abcdfeghijklmnopqrstuvwxyz' 52 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_') 53 # Modified by argcomplete: 2/3 compatibility 54 # if self.posix: 55 # self.wordchars += ('ßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ' 56 # 'ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ') 57 self.whitespace = ' \t\r\n' 58 self.whitespace_split = False 59 self.quotes = '\'"' 60 self.escape = '\\' 61 self.escapedquotes = '"' 62 self.state = ' ' 63 self.pushback = deque() 64 self.lineno = 1 65 self.debug = 0 66 self.token = '' 67 self.filestack = deque() 68 self.source = None 69 if not punctuation_chars: 70 punctuation_chars = '' 71 elif punctuation_chars is True: 72 punctuation_chars = '();<>|&' 73 self.punctuation_chars = punctuation_chars 74 if punctuation_chars: 75 # _pushback_chars is a push back queue used by lookahead logic 76 self._pushback_chars = deque() 77 # these chars added because allowed in file names, args, wildcards 78 self.wordchars += '~-./*?=' 79 #remove any punctuation chars from wordchars 80 t = self.wordchars.maketrans(dict.fromkeys(punctuation_chars)) 81 self.wordchars = self.wordchars.translate(t) 82 83 # Modified by argcomplete: Record last wordbreak position 84 self.last_wordbreak_pos = None 85 self.wordbreaks = '' 86 87 def push_token(self, tok): 88 "Push a token onto the stack popped by the get_token method" 89 if self.debug >= 1: 90 print("shlex: pushing token " + repr(tok)) 91 self.pushback.appendleft(tok) 92 93 def push_source(self, newstream, newfile=None): 94 "Push an input source onto the lexer's input source stack." 95 # Modified by argcomplete: 2/3 compatibility 96 if isinstance(newstream, basestring): 97 newstream = StringIO(newstream) 98 self.filestack.appendleft((self.infile, self.instream, self.lineno)) 99 self.infile = newfile 100 self.instream = newstream 101 self.lineno = 1 102 if self.debug: 103 if newfile is not None: 104 print('shlex: pushing to file %s' % (self.infile,)) 105 else: 106 print('shlex: pushing to stream %s' % (self.instream,)) 107 108 def pop_source(self): 109 "Pop the input source stack." 110 self.instream.close() 111 (self.infile, self.instream, self.lineno) = self.filestack.popleft() 112 if self.debug: 113 print('shlex: popping to %s, line %d' \ 114 % (self.instream, self.lineno)) 115 self.state = ' ' 116 117 def get_token(self): 118 "Get a token from the input stream (or from stack if it's nonempty)" 119 if self.pushback: 120 tok = self.pushback.popleft() 121 if self.debug >= 1: 122 print("shlex: popping token " + repr(tok)) 123 return tok 124 # No pushback. Get a token. 125 raw = self.read_token() 126 # Handle inclusions 127 if self.source is not None: 128 while raw == self.source: 129 spec = self.sourcehook(self.read_token()) 130 if spec: 131 (newfile, newstream) = spec 132 self.push_source(newstream, newfile) 133 raw = self.get_token() 134 # Maybe we got EOF instead? 135 while raw == self.eof: 136 if not self.filestack: 137 return self.eof 138 else: 139 self.pop_source() 140 raw = self.get_token() 141 # Neither inclusion nor EOF 142 if self.debug >= 1: 143 if raw != self.eof: 144 print("shlex: token=" + repr(raw)) 145 else: 146 print("shlex: token=EOF") 147 return raw 148 149 def read_token(self): 150 quoted = False 151 escapedstate = ' ' 152 while True: 153 if self.punctuation_chars and self._pushback_chars: 154 nextchar = self._pushback_chars.pop() 155 else: 156 nextchar = self.instream.read(1) 157 if nextchar == '\n': 158 self.lineno += 1 159 if self.debug >= 3: 160 print("shlex: in state %r I see character: %r" % (self.state, 161 nextchar)) 162 if self.state is None: 163 self.token = '' # past end of file 164 break 165 elif self.state == ' ': 166 if not nextchar: 167 self.state = None # end of file 168 break 169 elif nextchar in self.whitespace: 170 if self.debug >= 2: 171 print("shlex: I see whitespace in whitespace state") 172 if self.token or (self.posix and quoted): 173 break # emit current token 174 else: 175 continue 176 elif nextchar in self.commenters: 177 self.instream.readline() 178 self.lineno += 1 179 elif self.posix and nextchar in self.escape: 180 escapedstate = 'a' 181 self.state = nextchar 182 elif nextchar in self.wordchars: 183 self.token = nextchar 184 self.state = 'a' 185 elif nextchar in self.punctuation_chars: 186 self.token = nextchar 187 self.state = 'c' 188 elif nextchar in self.quotes: 189 if not self.posix: 190 self.token = nextchar 191 self.state = nextchar 192 elif self.whitespace_split: 193 self.token = nextchar 194 self.state = 'a' 195 else: 196 self.token = nextchar 197 if self.token or (self.posix and quoted): 198 break # emit current token 199 else: 200 continue 201 elif self.state in self.quotes: 202 quoted = True 203 if not nextchar: # end of file 204 if self.debug >= 2: 205 print("shlex: I see EOF in quotes state") 206 # XXX what error should be raised here? 207 raise ValueError("No closing quotation") 208 if nextchar == self.state: 209 if not self.posix: 210 self.token += nextchar 211 self.state = ' ' 212 break 213 else: 214 self.state = 'a' 215 elif (self.posix and nextchar in self.escape and self.state 216 in self.escapedquotes): 217 escapedstate = self.state 218 self.state = nextchar 219 else: 220 self.token += nextchar 221 elif self.state in self.escape: 222 if not nextchar: # end of file 223 if self.debug >= 2: 224 print("shlex: I see EOF in escape state") 225 # XXX what error should be raised here? 226 raise ValueError("No escaped character") 227 # In posix shells, only the quote itself or the escape 228 # character may be escaped within quotes. 229 if (escapedstate in self.quotes and 230 nextchar != self.state and nextchar != escapedstate): 231 self.token += self.state 232 self.token += nextchar 233 self.state = escapedstate 234 elif self.state in ('a', 'c'): 235 if not nextchar: 236 self.state = None # end of file 237 break 238 elif nextchar in self.whitespace: 239 if self.debug >= 2: 240 print("shlex: I see whitespace in word state") 241 self.state = ' ' 242 if self.token or (self.posix and quoted): 243 break # emit current token 244 else: 245 continue 246 elif nextchar in self.commenters: 247 self.instream.readline() 248 self.lineno += 1 249 if self.posix: 250 self.state = ' ' 251 if self.token or (self.posix and quoted): 252 break # emit current token 253 else: 254 continue 255 elif self.posix and nextchar in self.quotes: 256 self.state = nextchar 257 elif self.posix and nextchar in self.escape: 258 escapedstate = 'a' 259 self.state = nextchar 260 elif self.state == 'c': 261 if nextchar in self.punctuation_chars: 262 self.token += nextchar 263 else: 264 if nextchar not in self.whitespace: 265 self._pushback_chars.append(nextchar) 266 self.state = ' ' 267 break 268 elif (nextchar in self.wordchars or nextchar in self.quotes 269 or self.whitespace_split): 270 self.token += nextchar 271 # Modified by argcomplete: Record last wordbreak position 272 if nextchar in self.wordbreaks: 273 self.last_wordbreak_pos = len(self.token) - 1 274 else: 275 if self.punctuation_chars: 276 self._pushback_chars.append(nextchar) 277 else: 278 self.pushback.appendleft(nextchar) 279 if self.debug >= 2: 280 print("shlex: I see punctuation in word state") 281 self.state = ' ' 282 if self.token or (self.posix and quoted): 283 break # emit current token 284 else: 285 continue 286 result = self.token 287 self.token = '' 288 if self.posix and not quoted and result == '': 289 result = None 290 if self.debug > 1: 291 if result: 292 print("shlex: raw token=" + repr(result)) 293 else: 294 print("shlex: raw token=EOF") 295 # Modified by argcomplete: Record last wordbreak position 296 if self.state == ' ': 297 self.last_wordbreak_pos = None 298 return result 299 300 def sourcehook(self, newfile): 301 "Hook called on a filename to be sourced." 302 if newfile[0] == '"': 303 newfile = newfile[1:-1] 304 # This implements cpp-like semantics for relative-path inclusion. 305 # Modified by argcomplete: 2/3 compatibility 306 if isinstance(self.infile, basestring) and not os.path.isabs(newfile): 307 newfile = os.path.join(os.path.dirname(self.infile), newfile) 308 return (newfile, open(newfile, "r")) 309 310 def error_leader(self, infile=None, lineno=None): 311 "Emit a C-compiler-like, Emacs-friendly error-message leader." 312 if infile is None: 313 infile = self.infile 314 if lineno is None: 315 lineno = self.lineno 316 return "\"%s\", line %d: " % (infile, lineno) 317 318 def __iter__(self): 319 return self 320 321 def __next__(self): 322 token = self.get_token() 323 if token == self.eof: 324 raise StopIteration 325 return token 326 327 # Modified by argcomplete: 2/3 compatibility 328 next = __next__ 329