1# -*- coding: utf-8 -*-
2
3# This copy of shlex.py from Python 3.6 is distributed with argcomplete.
4# It contains only the shlex class, with modifications as noted.
5
6"""A lexical analyzer class for simple shell-like syntaxes."""
7
8# Module and documentation by Eric S. Raymond, 21 Dec 1998
9# Input stacking and error message cleanup added by ESR, March 2000
10# push_source() and pop_source() made explicit by ESR, January 2001.
11# Posix compliance, split(), string arguments, and
12# iterator interface by Gustavo Niemeyer, April 2003.
13# changes to tokenize more like Posix shells by Vinay Sajip, July 2016.
14
15import os
16import sys
17from collections import deque
18
19# Modified by argcomplete: 2/3 compatibility
20# Note: cStringIO is not compatible with Unicode
21try:
22    from StringIO import StringIO
23except ImportError:
24    from io import StringIO
25
26# Modified by argcomplete: 2/3 compatibility
27try:
28    basestring
29except NameError:
30    basestring = str
31
32class shlex:
33    "A lexical analyzer class for simple shell-like syntaxes."
34    def __init__(self, instream=None, infile=None, posix=False,
35                 punctuation_chars=False):
36        # Modified by argcomplete: 2/3 compatibility
37        if isinstance(instream, basestring):
38            instream = StringIO(instream)
39        if instream is not None:
40            self.instream = instream
41            self.infile = infile
42        else:
43            self.instream = sys.stdin
44            self.infile = None
45        self.posix = posix
46        if posix:
47            self.eof = None
48        else:
49            self.eof = ''
50        self.commenters = '#'
51        self.wordchars = ('abcdfeghijklmnopqrstuvwxyz'
52                          'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_')
53        # Modified by argcomplete: 2/3 compatibility
54        # if self.posix:
55        #     self.wordchars += ('ßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ'
56        #                        'ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ')
57        self.whitespace = ' \t\r\n'
58        self.whitespace_split = False
59        self.quotes = '\'"'
60        self.escape = '\\'
61        self.escapedquotes = '"'
62        self.state = ' '
63        self.pushback = deque()
64        self.lineno = 1
65        self.debug = 0
66        self.token = ''
67        self.filestack = deque()
68        self.source = None
69        if not punctuation_chars:
70            punctuation_chars = ''
71        elif punctuation_chars is True:
72            punctuation_chars = '();<>|&'
73        self.punctuation_chars = punctuation_chars
74        if punctuation_chars:
75            # _pushback_chars is a push back queue used by lookahead logic
76            self._pushback_chars = deque()
77            # these chars added because allowed in file names, args, wildcards
78            self.wordchars += '~-./*?='
79            #remove any punctuation chars from wordchars
80            t = self.wordchars.maketrans(dict.fromkeys(punctuation_chars))
81            self.wordchars = self.wordchars.translate(t)
82
83        # Modified by argcomplete: Record last wordbreak position
84        self.last_wordbreak_pos = None
85        self.wordbreaks = ''
86
87    def push_token(self, tok):
88        "Push a token onto the stack popped by the get_token method"
89        if self.debug >= 1:
90            print("shlex: pushing token " + repr(tok))
91        self.pushback.appendleft(tok)
92
93    def push_source(self, newstream, newfile=None):
94        "Push an input source onto the lexer's input source stack."
95        # Modified by argcomplete: 2/3 compatibility
96        if isinstance(newstream, basestring):
97            newstream = StringIO(newstream)
98        self.filestack.appendleft((self.infile, self.instream, self.lineno))
99        self.infile = newfile
100        self.instream = newstream
101        self.lineno = 1
102        if self.debug:
103            if newfile is not None:
104                print('shlex: pushing to file %s' % (self.infile,))
105            else:
106                print('shlex: pushing to stream %s' % (self.instream,))
107
108    def pop_source(self):
109        "Pop the input source stack."
110        self.instream.close()
111        (self.infile, self.instream, self.lineno) = self.filestack.popleft()
112        if self.debug:
113            print('shlex: popping to %s, line %d' \
114                  % (self.instream, self.lineno))
115        self.state = ' '
116
117    def get_token(self):
118        "Get a token from the input stream (or from stack if it's nonempty)"
119        if self.pushback:
120            tok = self.pushback.popleft()
121            if self.debug >= 1:
122                print("shlex: popping token " + repr(tok))
123            return tok
124        # No pushback.  Get a token.
125        raw = self.read_token()
126        # Handle inclusions
127        if self.source is not None:
128            while raw == self.source:
129                spec = self.sourcehook(self.read_token())
130                if spec:
131                    (newfile, newstream) = spec
132                    self.push_source(newstream, newfile)
133                raw = self.get_token()
134        # Maybe we got EOF instead?
135        while raw == self.eof:
136            if not self.filestack:
137                return self.eof
138            else:
139                self.pop_source()
140                raw = self.get_token()
141        # Neither inclusion nor EOF
142        if self.debug >= 1:
143            if raw != self.eof:
144                print("shlex: token=" + repr(raw))
145            else:
146                print("shlex: token=EOF")
147        return raw
148
149    def read_token(self):
150        quoted = False
151        escapedstate = ' '
152        while True:
153            if self.punctuation_chars and self._pushback_chars:
154                nextchar = self._pushback_chars.pop()
155            else:
156                nextchar = self.instream.read(1)
157            if nextchar == '\n':
158                self.lineno += 1
159            if self.debug >= 3:
160                print("shlex: in state %r I see character: %r" % (self.state,
161                                                                  nextchar))
162            if self.state is None:
163                self.token = ''        # past end of file
164                break
165            elif self.state == ' ':
166                if not nextchar:
167                    self.state = None  # end of file
168                    break
169                elif nextchar in self.whitespace:
170                    if self.debug >= 2:
171                        print("shlex: I see whitespace in whitespace state")
172                    if self.token or (self.posix and quoted):
173                        break   # emit current token
174                    else:
175                        continue
176                elif nextchar in self.commenters:
177                    self.instream.readline()
178                    self.lineno += 1
179                elif self.posix and nextchar in self.escape:
180                    escapedstate = 'a'
181                    self.state = nextchar
182                elif nextchar in self.wordchars:
183                    self.token = nextchar
184                    self.state = 'a'
185                elif nextchar in self.punctuation_chars:
186                    self.token = nextchar
187                    self.state = 'c'
188                elif nextchar in self.quotes:
189                    if not self.posix:
190                        self.token = nextchar
191                    self.state = nextchar
192                elif self.whitespace_split:
193                    self.token = nextchar
194                    self.state = 'a'
195                else:
196                    self.token = nextchar
197                    if self.token or (self.posix and quoted):
198                        break   # emit current token
199                    else:
200                        continue
201            elif self.state in self.quotes:
202                quoted = True
203                if not nextchar:      # end of file
204                    if self.debug >= 2:
205                        print("shlex: I see EOF in quotes state")
206                    # XXX what error should be raised here?
207                    raise ValueError("No closing quotation")
208                if nextchar == self.state:
209                    if not self.posix:
210                        self.token += nextchar
211                        self.state = ' '
212                        break
213                    else:
214                        self.state = 'a'
215                elif (self.posix and nextchar in self.escape and self.state
216                      in self.escapedquotes):
217                    escapedstate = self.state
218                    self.state = nextchar
219                else:
220                    self.token += nextchar
221            elif self.state in self.escape:
222                if not nextchar:      # end of file
223                    if self.debug >= 2:
224                        print("shlex: I see EOF in escape state")
225                    # XXX what error should be raised here?
226                    raise ValueError("No escaped character")
227                # In posix shells, only the quote itself or the escape
228                # character may be escaped within quotes.
229                if (escapedstate in self.quotes and
230                        nextchar != self.state and nextchar != escapedstate):
231                    self.token += self.state
232                self.token += nextchar
233                self.state = escapedstate
234            elif self.state in ('a', 'c'):
235                if not nextchar:
236                    self.state = None   # end of file
237                    break
238                elif nextchar in self.whitespace:
239                    if self.debug >= 2:
240                        print("shlex: I see whitespace in word state")
241                    self.state = ' '
242                    if self.token or (self.posix and quoted):
243                        break   # emit current token
244                    else:
245                        continue
246                elif nextchar in self.commenters:
247                    self.instream.readline()
248                    self.lineno += 1
249                    if self.posix:
250                        self.state = ' '
251                        if self.token or (self.posix and quoted):
252                            break   # emit current token
253                        else:
254                            continue
255                elif self.posix and nextchar in self.quotes:
256                    self.state = nextchar
257                elif self.posix and nextchar in self.escape:
258                    escapedstate = 'a'
259                    self.state = nextchar
260                elif self.state == 'c':
261                    if nextchar in self.punctuation_chars:
262                        self.token += nextchar
263                    else:
264                        if nextchar not in self.whitespace:
265                            self._pushback_chars.append(nextchar)
266                        self.state = ' '
267                        break
268                elif (nextchar in self.wordchars or nextchar in self.quotes
269                      or self.whitespace_split):
270                    self.token += nextchar
271                    # Modified by argcomplete: Record last wordbreak position
272                    if nextchar in self.wordbreaks:
273                        self.last_wordbreak_pos = len(self.token) - 1
274                else:
275                    if self.punctuation_chars:
276                        self._pushback_chars.append(nextchar)
277                    else:
278                        self.pushback.appendleft(nextchar)
279                    if self.debug >= 2:
280                        print("shlex: I see punctuation in word state")
281                    self.state = ' '
282                    if self.token or (self.posix and quoted):
283                        break   # emit current token
284                    else:
285                        continue
286        result = self.token
287        self.token = ''
288        if self.posix and not quoted and result == '':
289            result = None
290        if self.debug > 1:
291            if result:
292                print("shlex: raw token=" + repr(result))
293            else:
294                print("shlex: raw token=EOF")
295        # Modified by argcomplete: Record last wordbreak position
296        if self.state == ' ':
297            self.last_wordbreak_pos = None
298        return result
299
300    def sourcehook(self, newfile):
301        "Hook called on a filename to be sourced."
302        if newfile[0] == '"':
303            newfile = newfile[1:-1]
304        # This implements cpp-like semantics for relative-path inclusion.
305        # Modified by argcomplete: 2/3 compatibility
306        if isinstance(self.infile, basestring) and not os.path.isabs(newfile):
307            newfile = os.path.join(os.path.dirname(self.infile), newfile)
308        return (newfile, open(newfile, "r"))
309
310    def error_leader(self, infile=None, lineno=None):
311        "Emit a C-compiler-like, Emacs-friendly error-message leader."
312        if infile is None:
313            infile = self.infile
314        if lineno is None:
315            lineno = self.lineno
316        return "\"%s\", line %d: " % (infile, lineno)
317
318    def __iter__(self):
319        return self
320
321    def __next__(self):
322        token = self.get_token()
323        if token == self.eof:
324            raise StopIteration
325        return token
326
327    # Modified by argcomplete: 2/3 compatibility
328    next = __next__
329