1"""Extracting and changing portions of the current line
2
3All functions take cursor offset from the beginning of the line and the line of
4Python code, and return None, or a tuple of the start index, end index, and the
5word."""
6
7import re
8
9from itertools import chain
10from typing import Optional, NamedTuple
11
12from .lazyre import LazyReCompile
13
14
15class LinePart(NamedTuple):
16    start: int
17    stop: int
18    word: str
19
20
21_current_word_re = LazyReCompile(r"(?<![)\]\w_.])" r"([\w_][\w0-9._]*[(]?)")
22
23
24def current_word(cursor_offset: int, line: str) -> Optional[LinePart]:
25    """the object.attribute.attribute just before or under the cursor"""
26    start = cursor_offset
27    end = cursor_offset
28    word = None
29    for m in _current_word_re.finditer(line):
30        if m.start(1) < cursor_offset <= m.end(1):
31            start = m.start(1)
32            end = m.end(1)
33            word = m.group(1)
34    if word is None:
35        return None
36    return LinePart(start, end, word)
37
38
39# pieces of regex to match repr() of several hashable built-in types
40_match_all_dict_keys = r"""[^\]]*"""
41
42# https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
43_match_single_quote_str_bytes = r"""
44    # bytes repr() begins with `b` character; bytes and str begin with `'`
45    b?'
46    # match escape sequence; this handles `\'` in the string repr()
47    (?:\\['"nabfrtvxuU\\]|
48    # or match any non-`\` and non-single-quote character (most of the string)
49    [^'\\])*
50    # matches hanging `\` or ending `'` if one is present
51    [\\']?
52"""
53
54# bytes and str repr() only uses double quotes if the string contains 1 or more
55# `'` character and exactly 0 `"` characters
56_match_double_quote_str_bytes = r"""
57    # bytes repr() begins with `b` character
58    b?"
59    # string continues until a `"` character is reached
60    [^"]*
61    # end matching at closing double-quote if one is present
62    "?"""
63
64# match valid identifier name followed by `[` character
65_match_dict_before_key = r"""[\w_][\w0-9._]*\["""
66
67_current_dict_key_re = LazyReCompile(
68    f"{_match_dict_before_key}((?:"
69    f"{_match_single_quote_str_bytes}|"
70    f"{_match_double_quote_str_bytes}|"
71    f"{_match_all_dict_keys}|)*)",
72    re.VERBOSE,
73)
74
75
76def current_dict_key(cursor_offset: int, line: str) -> Optional[LinePart]:
77    """If in dictionary completion, return the current key"""
78    for m in _current_dict_key_re.finditer(line):
79        if m.start(1) <= cursor_offset <= m.end(1):
80            return LinePart(m.start(1), m.end(1), m.group(1))
81    return None
82
83
84# capture valid identifier name if followed by `[` character
85_capture_dict_name = r"""([\w_][\w0-9._]*)\["""
86
87_current_dict_re = LazyReCompile(
88    f"{_capture_dict_name}((?:"
89    f"{_match_single_quote_str_bytes}|"
90    f"{_match_double_quote_str_bytes}|"
91    f"{_match_all_dict_keys}|)*)",
92    re.VERBOSE,
93)
94
95
96def current_dict(cursor_offset: int, line: str) -> Optional[LinePart]:
97    """If in dictionary completion, return the dict that should be used"""
98    for m in _current_dict_re.finditer(line):
99        if m.start(2) <= cursor_offset <= m.end(2):
100            return LinePart(m.start(1), m.end(1), m.group(1))
101    return None
102
103
104_current_string_re = LazyReCompile(
105    '''(?P<open>(?:""")|"|(?:''\')|')(?:((?P<closed>.+?)(?P=open))|'''
106    """(?P<unclosed>.+))"""
107)
108
109
110def current_string(cursor_offset: int, line: str) -> Optional[LinePart]:
111    """If inside a string of nonzero length, return the string (excluding
112    quotes)
113
114    Weaker than bpython.Repl's current_string, because that checks that a
115    string is a string based on previous lines in the buffer."""
116    for m in _current_string_re.finditer(line):
117        i = 3 if m.group(3) else 4
118        if m.start(i) <= cursor_offset <= m.end(i):
119            return LinePart(m.start(i), m.end(i), m.group(i))
120    return None
121
122
123_current_object_re = LazyReCompile(r"([\w_][\w0-9_]*)[.]")
124
125
126def current_object(cursor_offset: int, line: str) -> Optional[LinePart]:
127    """If in attribute completion, the object on which attribute should be
128    looked up."""
129    match = current_word(cursor_offset, line)
130    if match is None:
131        return None
132    start, end, word = match
133    s = ".".join(
134        m.group(1)
135        for m in _current_object_re.finditer(word)
136        if m.end(1) + start < cursor_offset
137    )
138    if not s:
139        return None
140    return LinePart(start, start + len(s), s)
141
142
143_current_object_attribute_re = LazyReCompile(r"([\w_][\w0-9_]*)[.]?")
144
145
146def current_object_attribute(
147    cursor_offset: int, line: str
148) -> Optional[LinePart]:
149    """If in attribute completion, the attribute being completed"""
150    # TODO replace with more general current_expression_attribute
151    match = current_word(cursor_offset, line)
152    if match is None:
153        return None
154    start, end, word = match
155    matches = _current_object_attribute_re.finditer(word)
156    next(matches)
157    for m in matches:
158        if m.start(1) + start <= cursor_offset <= m.end(1) + start:
159            return LinePart(m.start(1) + start, m.end(1) + start, m.group(1))
160    return None
161
162
163_current_from_import_from_re = LazyReCompile(
164    r"from +([\w0-9_.]*)(?:\s+import\s+([\w0-9_]+[,]?\s*)+)*"
165)
166
167
168def current_from_import_from(
169    cursor_offset: int, line: str
170) -> Optional[LinePart]:
171    """If in from import completion, the word after from
172
173    returns None if cursor not in or just after one of the two interesting
174    parts of an import: from (module) import (name1, name2)
175    """
176    # TODO allow for as's
177    for m in _current_from_import_from_re.finditer(line):
178        if (m.start(1) < cursor_offset <= m.end(1)) or (
179            m.start(2) < cursor_offset <= m.end(2)
180        ):
181            return LinePart(m.start(1), m.end(1), m.group(1))
182    return None
183
184
185_current_from_import_import_re_1 = LazyReCompile(
186    r"from\s+([\w0-9_.]*)\s+import"
187)
188_current_from_import_import_re_2 = LazyReCompile(r"([\w0-9_]+)")
189_current_from_import_import_re_3 = LazyReCompile(r", *([\w0-9_]*)")
190
191
192def current_from_import_import(
193    cursor_offset: int, line: str
194) -> Optional[LinePart]:
195    """If in from import completion, the word after import being completed
196
197    returns None if cursor not in or just after one of these words
198    """
199    baseline = _current_from_import_import_re_1.search(line)
200    if baseline is None:
201        return None
202    match1 = _current_from_import_import_re_2.search(line[baseline.end() :])
203    if match1 is None:
204        return None
205    for m in chain(
206        (match1,),
207        _current_from_import_import_re_3.finditer(line[baseline.end() :]),
208    ):
209        start = baseline.end() + m.start(1)
210        end = baseline.end() + m.end(1)
211        if start < cursor_offset <= end:
212            return LinePart(start, end, m.group(1))
213    return None
214
215
216_current_import_re_1 = LazyReCompile(r"import")
217_current_import_re_2 = LazyReCompile(r"([\w0-9_.]+)")
218_current_import_re_3 = LazyReCompile(r"[,][ ]*([\w0-9_.]*)")
219
220
221def current_import(cursor_offset: int, line: str) -> Optional[LinePart]:
222    # TODO allow for multiple as's
223    baseline = _current_import_re_1.search(line)
224    if baseline is None:
225        return None
226    match1 = _current_import_re_2.search(line[baseline.end() :])
227    if match1 is None:
228        return None
229    for m in chain(
230        (match1,), _current_import_re_3.finditer(line[baseline.end() :])
231    ):
232        start = baseline.end() + m.start(1)
233        end = baseline.end() + m.end(1)
234        if start < cursor_offset <= end:
235            return LinePart(start, end, m.group(1))
236    return None
237
238
239_current_method_definition_name_re = LazyReCompile(r"def\s+([a-zA-Z_][\w]*)")
240
241
242def current_method_definition_name(
243    cursor_offset: int, line: str
244) -> Optional[LinePart]:
245    """The name of a method being defined"""
246    for m in _current_method_definition_name_re.finditer(line):
247        if m.start(1) <= cursor_offset <= m.end(1):
248            return LinePart(m.start(1), m.end(1), m.group(1))
249    return None
250
251
252_current_single_word_re = LazyReCompile(r"(?<![.])\b([a-zA-Z_][\w]*)")
253
254
255def current_single_word(cursor_offset: int, line: str) -> Optional[LinePart]:
256    """the un-dotted word just before or under the cursor"""
257    for m in _current_single_word_re.finditer(line):
258        if m.start(1) <= cursor_offset <= m.end(1):
259            return LinePart(m.start(1), m.end(1), m.group(1))
260    return None
261
262
263def current_dotted_attribute(
264    cursor_offset: int, line: str
265) -> Optional[LinePart]:
266    """The dotted attribute-object pair before the cursor"""
267    match = current_word(cursor_offset, line)
268    if match is None:
269        return None
270    start, end, word = match
271    if "." in word[1:]:
272        return LinePart(start, end, word)
273    return None
274
275
276_current_expression_attribute_re = LazyReCompile(
277    r"[.]\s*((?:[\w_][\w0-9_]*)|(?:))"
278)
279
280
281def current_expression_attribute(
282    cursor_offset: int, line: str
283) -> Optional[LinePart]:
284    """If after a dot, the attribute being completed"""
285    # TODO replace with more general current_expression_attribute
286    for m in _current_expression_attribute_re.finditer(line):
287        if m.start(1) <= cursor_offset <= m.end(1):
288            return LinePart(m.start(1), m.end(1), m.group(1))
289    return None
290