1"""Input transformer machinery to support IPython special syntax.
2
3This includes the machinery to recognise and transform ``%magic`` commands,
4``!system`` commands, ``help?`` querying, prompt stripping, and so forth.
5
6Added: IPython 7.0. Replaces inputsplitter and inputtransformer which were
7deprecated in 7.0.
8"""
9
10# Copyright (c) IPython Development Team.
11# Distributed under the terms of the Modified BSD License.
12
13import ast
14import sys
15from codeop import CommandCompiler, Compile
16import re
17import tokenize
18from typing import List, Tuple, Union
19import warnings
20
21_indent_re = re.compile(r'^[ \t]+')
22
23def leading_empty_lines(lines):
24    """Remove leading empty lines
25
26    If the leading lines are empty or contain only whitespace, they will be
27    removed.
28    """
29    if not lines:
30        return lines
31    for i, line in enumerate(lines):
32        if line and not line.isspace():
33            return lines[i:]
34    return lines
35
36def leading_indent(lines):
37    """Remove leading indentation.
38
39    If the first line starts with a spaces or tabs, the same whitespace will be
40    removed from each following line in the cell.
41    """
42    if not lines:
43        return lines
44    m = _indent_re.match(lines[0])
45    if not m:
46        return lines
47    space = m.group(0)
48    n = len(space)
49    return [l[n:] if l.startswith(space) else l
50            for l in lines]
51
52class PromptStripper:
53    """Remove matching input prompts from a block of input.
54
55    Parameters
56    ----------
57    prompt_re : regular expression
58        A regular expression matching any input prompt (including continuation,
59        e.g. ``...``)
60    initial_re : regular expression, optional
61        A regular expression matching only the initial prompt, but not continuation.
62        If no initial expression is given, prompt_re will be used everywhere.
63        Used mainly for plain Python prompts (``>>>``), where the continuation prompt
64        ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
65
66    Notes
67    -----
68
69    If initial_re and prompt_re differ,
70    only initial_re will be tested against the first line.
71    If any prompt is found on the first two lines,
72    prompts will be stripped from the rest of the block.
73    """
74    def __init__(self, prompt_re, initial_re=None):
75        self.prompt_re = prompt_re
76        self.initial_re = initial_re or prompt_re
77
78    def _strip(self, lines):
79        return [self.prompt_re.sub('', l, count=1) for l in lines]
80
81    def __call__(self, lines):
82        if not lines:
83            return lines
84        if self.initial_re.match(lines[0]) or \
85                (len(lines) > 1 and self.prompt_re.match(lines[1])):
86            return self._strip(lines)
87        return lines
88
89classic_prompt = PromptStripper(
90    prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
91    initial_re=re.compile(r'^>>>( |$)')
92)
93
94ipython_prompt = PromptStripper(re.compile(r'^(In \[\d+\]: |\s*\.{3,}: ?)'))
95
96def cell_magic(lines):
97    if not lines or not lines[0].startswith('%%'):
98        return lines
99    if re.match(r'%%\w+\?', lines[0]):
100        # This case will be handled by help_end
101        return lines
102    magic_name, _, first_line = lines[0][2:].rstrip().partition(' ')
103    body = ''.join(lines[1:])
104    return ['get_ipython().run_cell_magic(%r, %r, %r)\n'
105            % (magic_name, first_line, body)]
106
107
108def _find_assign_op(token_line) -> Union[int, None]:
109    """Get the index of the first assignment in the line ('=' not inside brackets)
110
111    Note: We don't try to support multiple special assignment (a = b = %foo)
112    """
113    paren_level = 0
114    for i, ti in enumerate(token_line):
115        s = ti.string
116        if s == '=' and paren_level == 0:
117            return i
118        if s in {'(','[','{'}:
119            paren_level += 1
120        elif s in {')', ']', '}'}:
121            if paren_level > 0:
122                paren_level -= 1
123
124def find_end_of_continued_line(lines, start_line: int):
125    """Find the last line of a line explicitly extended using backslashes.
126
127    Uses 0-indexed line numbers.
128    """
129    end_line = start_line
130    while lines[end_line].endswith('\\\n'):
131        end_line += 1
132        if end_line >= len(lines):
133            break
134    return end_line
135
136def assemble_continued_line(lines, start: Tuple[int, int], end_line: int):
137    r"""Assemble a single line from multiple continued line pieces
138
139    Continued lines are lines ending in ``\``, and the line following the last
140    ``\`` in the block.
141
142    For example, this code continues over multiple lines::
143
144        if (assign_ix is not None) \
145             and (len(line) >= assign_ix + 2) \
146             and (line[assign_ix+1].string == '%') \
147             and (line[assign_ix+2].type == tokenize.NAME):
148
149    This statement contains four continued line pieces.
150    Assembling these pieces into a single line would give::
151
152        if (assign_ix is not None) and (len(line) >= assign_ix + 2) and (line[...
153
154    This uses 0-indexed line numbers. *start* is (lineno, colno).
155
156    Used to allow ``%magic`` and ``!system`` commands to be continued over
157    multiple lines.
158    """
159    parts = [lines[start[0]][start[1]:]] + lines[start[0]+1:end_line+1]
160    return ' '.join([p.rstrip()[:-1] for p in parts[:-1]]  # Strip backslash+newline
161                    + [parts[-1].rstrip()])         # Strip newline from last line
162
163class TokenTransformBase:
164    """Base class for transformations which examine tokens.
165
166    Special syntax should not be transformed when it occurs inside strings or
167    comments. This is hard to reliably avoid with regexes. The solution is to
168    tokenise the code as Python, and recognise the special syntax in the tokens.
169
170    IPython's special syntax is not valid Python syntax, so tokenising may go
171    wrong after the special syntax starts. These classes therefore find and
172    transform *one* instance of special syntax at a time into regular Python
173    syntax. After each transformation, tokens are regenerated to find the next
174    piece of special syntax.
175
176    Subclasses need to implement one class method (find)
177    and one regular method (transform).
178
179    The priority attribute can select which transformation to apply if multiple
180    transformers match in the same place. Lower numbers have higher priority.
181    This allows "%magic?" to be turned into a help call rather than a magic call.
182    """
183    # Lower numbers -> higher priority (for matches in the same location)
184    priority = 10
185
186    def sortby(self):
187        return self.start_line, self.start_col, self.priority
188
189    def __init__(self, start):
190        self.start_line = start[0] - 1   # Shift from 1-index to 0-index
191        self.start_col = start[1]
192
193    @classmethod
194    def find(cls, tokens_by_line):
195        """Find one instance of special syntax in the provided tokens.
196
197        Tokens are grouped into logical lines for convenience,
198        so it is easy to e.g. look at the first token of each line.
199        *tokens_by_line* is a list of lists of tokenize.TokenInfo objects.
200
201        This should return an instance of its class, pointing to the start
202        position it has found, or None if it found no match.
203        """
204        raise NotImplementedError
205
206    def transform(self, lines: List[str]):
207        """Transform one instance of special syntax found by ``find()``
208
209        Takes a list of strings representing physical lines,
210        returns a similar list of transformed lines.
211        """
212        raise NotImplementedError
213
214class MagicAssign(TokenTransformBase):
215    """Transformer for assignments from magics (a = %foo)"""
216    @classmethod
217    def find(cls, tokens_by_line):
218        """Find the first magic assignment (a = %foo) in the cell.
219        """
220        for line in tokens_by_line:
221            assign_ix = _find_assign_op(line)
222            if (assign_ix is not None) \
223                    and (len(line) >= assign_ix + 2) \
224                    and (line[assign_ix+1].string == '%') \
225                    and (line[assign_ix+2].type == tokenize.NAME):
226                return cls(line[assign_ix+1].start)
227
228    def transform(self, lines: List[str]):
229        """Transform a magic assignment found by the ``find()`` classmethod.
230        """
231        start_line, start_col = self.start_line, self.start_col
232        lhs = lines[start_line][:start_col]
233        end_line = find_end_of_continued_line(lines, start_line)
234        rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
235        assert rhs.startswith('%'), rhs
236        magic_name, _, args = rhs[1:].partition(' ')
237
238        lines_before = lines[:start_line]
239        call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
240        new_line = lhs + call + '\n'
241        lines_after = lines[end_line+1:]
242
243        return lines_before + [new_line] + lines_after
244
245
246class SystemAssign(TokenTransformBase):
247    """Transformer for assignments from system commands (a = !foo)"""
248    @classmethod
249    def find(cls, tokens_by_line):
250        """Find the first system assignment (a = !foo) in the cell.
251        """
252        for line in tokens_by_line:
253            assign_ix = _find_assign_op(line)
254            if (assign_ix is not None) \
255                    and not line[assign_ix].line.strip().startswith('=') \
256                    and (len(line) >= assign_ix + 2) \
257                    and (line[assign_ix + 1].type == tokenize.ERRORTOKEN):
258                ix = assign_ix + 1
259
260                while ix < len(line) and line[ix].type == tokenize.ERRORTOKEN:
261                    if line[ix].string == '!':
262                        return cls(line[ix].start)
263                    elif not line[ix].string.isspace():
264                        break
265                    ix += 1
266
267    def transform(self, lines: List[str]):
268        """Transform a system assignment found by the ``find()`` classmethod.
269        """
270        start_line, start_col = self.start_line, self.start_col
271
272        lhs = lines[start_line][:start_col]
273        end_line = find_end_of_continued_line(lines, start_line)
274        rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
275        assert rhs.startswith('!'), rhs
276        cmd = rhs[1:]
277
278        lines_before = lines[:start_line]
279        call = "get_ipython().getoutput({!r})".format(cmd)
280        new_line = lhs + call + '\n'
281        lines_after = lines[end_line + 1:]
282
283        return lines_before + [new_line] + lines_after
284
285# The escape sequences that define the syntax transformations IPython will
286# apply to user input.  These can NOT be just changed here: many regular
287# expressions and other parts of the code may use their hardcoded values, and
288# for all intents and purposes they constitute the 'IPython syntax', so they
289# should be considered fixed.
290
291ESC_SHELL  = '!'     # Send line to underlying system shell
292ESC_SH_CAP = '!!'    # Send line to system shell and capture output
293ESC_HELP   = '?'     # Find information about object
294ESC_HELP2  = '??'    # Find extra-detailed information about object
295ESC_MAGIC  = '%'     # Call magic function
296ESC_MAGIC2 = '%%'    # Call cell-magic function
297ESC_QUOTE  = ','     # Split args on whitespace, quote each as string and call
298ESC_QUOTE2 = ';'     # Quote all args as a single string, call
299ESC_PAREN  = '/'     # Call first argument with rest of line as arguments
300
301ESCAPE_SINGLES = {'!', '?', '%', ',', ';', '/'}
302ESCAPE_DOUBLES = {'!!', '??'}  # %% (cell magic) is handled separately
303
304def _make_help_call(target, esc, next_input=None):
305    """Prepares a pinfo(2)/psearch call from a target name and the escape
306    (i.e. ? or ??)"""
307    method  = 'pinfo2' if esc == '??' \
308                else 'psearch' if '*' in target \
309                else 'pinfo'
310    arg = " ".join([method, target])
311    #Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args)
312    t_magic_name, _, t_magic_arg_s = arg.partition(' ')
313    t_magic_name = t_magic_name.lstrip(ESC_MAGIC)
314    if next_input is None:
315        return 'get_ipython().run_line_magic(%r, %r)' % (t_magic_name, t_magic_arg_s)
316    else:
317        return 'get_ipython().set_next_input(%r);get_ipython().run_line_magic(%r, %r)' % \
318           (next_input, t_magic_name, t_magic_arg_s)
319
320def _tr_help(content):
321    """Translate lines escaped with: ?
322
323    A naked help line should fire the intro help screen (shell.show_usage())
324    """
325    if not content:
326        return 'get_ipython().show_usage()'
327
328    return _make_help_call(content, '?')
329
330def _tr_help2(content):
331    """Translate lines escaped with: ??
332
333    A naked help line should fire the intro help screen (shell.show_usage())
334    """
335    if not content:
336        return 'get_ipython().show_usage()'
337
338    return _make_help_call(content, '??')
339
340def _tr_magic(content):
341    "Translate lines escaped with a percent sign: %"
342    name, _, args = content.partition(' ')
343    return 'get_ipython().run_line_magic(%r, %r)' % (name, args)
344
345def _tr_quote(content):
346    "Translate lines escaped with a comma: ,"
347    name, _, args = content.partition(' ')
348    return '%s("%s")' % (name, '", "'.join(args.split()) )
349
350def _tr_quote2(content):
351    "Translate lines escaped with a semicolon: ;"
352    name, _, args = content.partition(' ')
353    return '%s("%s")' % (name, args)
354
355def _tr_paren(content):
356    "Translate lines escaped with a slash: /"
357    name, _, args = content.partition(' ')
358    return '%s(%s)' % (name, ", ".join(args.split()))
359
360tr = { ESC_SHELL  : 'get_ipython().system({!r})'.format,
361       ESC_SH_CAP : 'get_ipython().getoutput({!r})'.format,
362       ESC_HELP   : _tr_help,
363       ESC_HELP2  : _tr_help2,
364       ESC_MAGIC  : _tr_magic,
365       ESC_QUOTE  : _tr_quote,
366       ESC_QUOTE2 : _tr_quote2,
367       ESC_PAREN  : _tr_paren }
368
369class EscapedCommand(TokenTransformBase):
370    """Transformer for escaped commands like %foo, !foo, or /foo"""
371    @classmethod
372    def find(cls, tokens_by_line):
373        """Find the first escaped command (%foo, !foo, etc.) in the cell.
374        """
375        for line in tokens_by_line:
376            if not line:
377                continue
378            ix = 0
379            ll = len(line)
380            while ll > ix and line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
381                ix += 1
382            if ix >= ll:
383                continue
384            if line[ix].string in ESCAPE_SINGLES:
385                return cls(line[ix].start)
386
387    def transform(self, lines):
388        """Transform an escaped line found by the ``find()`` classmethod.
389        """
390        start_line, start_col = self.start_line, self.start_col
391
392        indent = lines[start_line][:start_col]
393        end_line = find_end_of_continued_line(lines, start_line)
394        line = assemble_continued_line(lines, (start_line, start_col), end_line)
395
396        if len(line) > 1 and line[:2] in ESCAPE_DOUBLES:
397            escape, content = line[:2], line[2:]
398        else:
399            escape, content = line[:1], line[1:]
400
401        if escape in tr:
402            call = tr[escape](content)
403        else:
404            call = ''
405
406        lines_before = lines[:start_line]
407        new_line = indent + call + '\n'
408        lines_after = lines[end_line + 1:]
409
410        return lines_before + [new_line] + lines_after
411
412_help_end_re = re.compile(r"""(%{0,2}
413                              (?!\d)[\w*]+            # Variable name
414                              (\.(?!\d)[\w*]+)*       # .etc.etc
415                              )
416                              (\?\??)$                # ? or ??
417                              """,
418                              re.VERBOSE)
419
420class HelpEnd(TokenTransformBase):
421    """Transformer for help syntax: obj? and obj??"""
422    # This needs to be higher priority (lower number) than EscapedCommand so
423    # that inspecting magics (%foo?) works.
424    priority = 5
425
426    def __init__(self, start, q_locn):
427        super().__init__(start)
428        self.q_line = q_locn[0] - 1  # Shift from 1-indexed to 0-indexed
429        self.q_col = q_locn[1]
430
431    @classmethod
432    def find(cls, tokens_by_line):
433        """Find the first help command (foo?) in the cell.
434        """
435        for line in tokens_by_line:
436            # Last token is NEWLINE; look at last but one
437            if len(line) > 2 and line[-2].string == '?':
438                # Find the first token that's not INDENT/DEDENT
439                ix = 0
440                while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
441                    ix += 1
442                return cls(line[ix].start, line[-2].start)
443
444    def transform(self, lines):
445        """Transform a help command found by the ``find()`` classmethod.
446        """
447        piece = ''.join(lines[self.start_line:self.q_line+1])
448        indent, content = piece[:self.start_col], piece[self.start_col:]
449        lines_before = lines[:self.start_line]
450        lines_after = lines[self.q_line + 1:]
451
452        m = _help_end_re.search(content)
453        if not m:
454            raise SyntaxError(content)
455        assert m is not None, content
456        target = m.group(1)
457        esc = m.group(3)
458
459        # If we're mid-command, put it back on the next prompt for the user.
460        next_input = None
461        if (not lines_before) and (not lines_after) \
462                and content.strip() != m.group(0):
463            next_input = content.rstrip('?\n')
464
465        call = _make_help_call(target, esc, next_input=next_input)
466        new_line = indent + call + '\n'
467
468        return lines_before + [new_line] + lines_after
469
470def make_tokens_by_line(lines:List[str]):
471    """Tokenize a series of lines and group tokens by line.
472
473    The tokens for a multiline Python string or expression are grouped as one
474    line. All lines except the last lines should keep their line ending ('\\n',
475    '\\r\\n') for this to properly work. Use `.splitlines(keeplineending=True)`
476    for example when passing block of text to this function.
477
478    """
479    # NL tokens are used inside multiline expressions, but also after blank
480    # lines or comments. This is intentional - see https://bugs.python.org/issue17061
481    # We want to group the former case together but split the latter, so we
482    # track parentheses level, similar to the internals of tokenize.
483    NEWLINE, NL = tokenize.NEWLINE, tokenize.NL
484    tokens_by_line = [[]]
485    if len(lines) > 1 and not lines[0].endswith(('\n', '\r', '\r\n', '\x0b', '\x0c')):
486        warnings.warn("`make_tokens_by_line` received a list of lines which do not have lineending markers ('\\n', '\\r', '\\r\\n', '\\x0b', '\\x0c'), behavior will be unspecified")
487    parenlev = 0
488    try:
489        for token in tokenize.generate_tokens(iter(lines).__next__):
490            tokens_by_line[-1].append(token)
491            if (token.type == NEWLINE) \
492                    or ((token.type == NL) and (parenlev <= 0)):
493                tokens_by_line.append([])
494            elif token.string in {'(', '[', '{'}:
495                parenlev += 1
496            elif token.string in {')', ']', '}'}:
497                if parenlev > 0:
498                    parenlev -= 1
499    except tokenize.TokenError:
500        # Input ended in a multiline string or expression. That's OK for us.
501        pass
502
503
504    if not tokens_by_line[-1]:
505        tokens_by_line.pop()
506
507
508    return tokens_by_line
509
510def show_linewise_tokens(s: str):
511    """For investigation and debugging"""
512    if not s.endswith('\n'):
513        s += '\n'
514    lines = s.splitlines(keepends=True)
515    for line in make_tokens_by_line(lines):
516        print("Line -------")
517        for tokinfo in line:
518            print(" ", tokinfo)
519
520# Arbitrary limit to prevent getting stuck in infinite loops
521TRANSFORM_LOOP_LIMIT = 500
522
523class TransformerManager:
524    """Applies various transformations to a cell or code block.
525
526    The key methods for external use are ``transform_cell()``
527    and ``check_complete()``.
528    """
529    def __init__(self):
530        self.cleanup_transforms = [
531            leading_empty_lines,
532            leading_indent,
533            classic_prompt,
534            ipython_prompt,
535        ]
536        self.line_transforms = [
537            cell_magic,
538        ]
539        self.token_transformers = [
540            MagicAssign,
541            SystemAssign,
542            EscapedCommand,
543            HelpEnd,
544        ]
545
546    def do_one_token_transform(self, lines):
547        """Find and run the transform earliest in the code.
548
549        Returns (changed, lines).
550
551        This method is called repeatedly until changed is False, indicating
552        that all available transformations are complete.
553
554        The tokens following IPython special syntax might not be valid, so
555        the transformed code is retokenised every time to identify the next
556        piece of special syntax. Hopefully long code cells are mostly valid
557        Python, not using lots of IPython special syntax, so this shouldn't be
558        a performance issue.
559        """
560        tokens_by_line = make_tokens_by_line(lines)
561        candidates = []
562        for transformer_cls in self.token_transformers:
563            transformer = transformer_cls.find(tokens_by_line)
564            if transformer:
565                candidates.append(transformer)
566
567        if not candidates:
568            # Nothing to transform
569            return False, lines
570        ordered_transformers = sorted(candidates, key=TokenTransformBase.sortby)
571        for transformer in ordered_transformers:
572            try:
573                return True, transformer.transform(lines)
574            except SyntaxError:
575                pass
576        return False, lines
577
578    def do_token_transforms(self, lines):
579        for _ in range(TRANSFORM_LOOP_LIMIT):
580            changed, lines = self.do_one_token_transform(lines)
581            if not changed:
582                return lines
583
584        raise RuntimeError("Input transformation still changing after "
585                           "%d iterations. Aborting." % TRANSFORM_LOOP_LIMIT)
586
587    def transform_cell(self, cell: str) -> str:
588        """Transforms a cell of input code"""
589        if not cell.endswith('\n'):
590            cell += '\n'  # Ensure the cell has a trailing newline
591        lines = cell.splitlines(keepends=True)
592        for transform in self.cleanup_transforms + self.line_transforms:
593            lines = transform(lines)
594
595        lines = self.do_token_transforms(lines)
596        return ''.join(lines)
597
598    def check_complete(self, cell: str):
599        """Return whether a block of code is ready to execute, or should be continued
600
601        Parameters
602        ----------
603        source : string
604          Python input code, which can be multiline.
605
606        Returns
607        -------
608        status : str
609          One of 'complete', 'incomplete', or 'invalid' if source is not a
610          prefix of valid code.
611        indent_spaces : int or None
612          The number of spaces by which to indent the next line of code. If
613          status is not 'incomplete', this is None.
614        """
615        # Remember if the lines ends in a new line.
616        ends_with_newline = False
617        for character in reversed(cell):
618            if character == '\n':
619                ends_with_newline = True
620                break
621            elif character.strip():
622                break
623            else:
624                continue
625
626        if not ends_with_newline:
627            # Append an newline for consistent tokenization
628            # See https://bugs.python.org/issue33899
629            cell += '\n'
630
631        lines = cell.splitlines(keepends=True)
632
633        if not lines:
634            return 'complete', None
635
636        if lines[-1].endswith('\\'):
637            # Explicit backslash continuation
638            return 'incomplete', find_last_indent(lines)
639
640        try:
641            for transform in self.cleanup_transforms:
642                if not getattr(transform, 'has_side_effects', False):
643                    lines = transform(lines)
644        except SyntaxError:
645            return 'invalid', None
646
647        if lines[0].startswith('%%'):
648            # Special case for cell magics - completion marked by blank line
649            if lines[-1].strip():
650                return 'incomplete', find_last_indent(lines)
651            else:
652                return 'complete', None
653
654        try:
655            for transform in self.line_transforms:
656                if not getattr(transform, 'has_side_effects', False):
657                    lines = transform(lines)
658            lines = self.do_token_transforms(lines)
659        except SyntaxError:
660            return 'invalid', None
661
662        tokens_by_line = make_tokens_by_line(lines)
663
664        if not tokens_by_line:
665            return 'incomplete', find_last_indent(lines)
666
667        if tokens_by_line[-1][-1].type != tokenize.ENDMARKER:
668            # We're in a multiline string or expression
669            return 'incomplete', find_last_indent(lines)
670
671        newline_types = {tokenize.NEWLINE, tokenize.COMMENT, tokenize.ENDMARKER}
672
673        # Pop the last line which only contains DEDENTs and ENDMARKER
674        last_token_line = None
675        if {t.type for t in tokens_by_line[-1]} in [
676            {tokenize.DEDENT, tokenize.ENDMARKER},
677            {tokenize.ENDMARKER}
678        ] and len(tokens_by_line) > 1:
679            last_token_line = tokens_by_line.pop()
680
681        while tokens_by_line[-1] and tokens_by_line[-1][-1].type in newline_types:
682            tokens_by_line[-1].pop()
683
684        if not tokens_by_line[-1]:
685            return 'incomplete', find_last_indent(lines)
686
687        if tokens_by_line[-1][-1].string == ':':
688            # The last line starts a block (e.g. 'if foo:')
689            ix = 0
690            while tokens_by_line[-1][ix].type in {tokenize.INDENT, tokenize.DEDENT}:
691                ix += 1
692
693            indent = tokens_by_line[-1][ix].start[1]
694            return 'incomplete', indent + 4
695
696        if tokens_by_line[-1][0].line.endswith('\\'):
697            return 'incomplete', None
698
699        # At this point, our checks think the code is complete (or invalid).
700        # We'll use codeop.compile_command to check this with the real parser
701        try:
702            with warnings.catch_warnings():
703                warnings.simplefilter('error', SyntaxWarning)
704                res = compile_command(''.join(lines), symbol='exec')
705        except (SyntaxError, OverflowError, ValueError, TypeError,
706                MemoryError, SyntaxWarning):
707            return 'invalid', None
708        else:
709            if res is None:
710                return 'incomplete', find_last_indent(lines)
711
712        if last_token_line and last_token_line[0].type == tokenize.DEDENT:
713            if ends_with_newline:
714                return 'complete', None
715            return 'incomplete', find_last_indent(lines)
716
717        # If there's a blank line at the end, assume we're ready to execute
718        if not lines[-1].strip():
719            return 'complete', None
720
721        return 'complete', None
722
723
724def find_last_indent(lines):
725    m = _indent_re.match(lines[-1])
726    if not m:
727        return 0
728    return len(m.group(0).replace('\t', ' '*4))
729
730
731class MaybeAsyncCompile(Compile):
732    def __init__(self, extra_flags=0):
733        super().__init__()
734        self.flags |= extra_flags
735
736    def __call__(self, *args, **kwds):
737        return compile(*args, **kwds)
738
739
740class MaybeAsyncCommandCompiler(CommandCompiler):
741    def __init__(self, extra_flags=0):
742        self.compiler = MaybeAsyncCompile(extra_flags=extra_flags)
743
744
745if (sys.version_info.major, sys.version_info.minor) >= (3, 8):
746    _extra_flags = ast.PyCF_ALLOW_TOP_LEVEL_AWAIT
747else:
748    _extra_flags = ast.PyCF_ONLY_AST
749
750compile_command = MaybeAsyncCommandCompiler(extra_flags=_extra_flags)
751