1"""Format all or a selected region (line slice) of text.
2
3Region formatting options: paragraph, comment block, indent, deindent,
4comment, uncomment, tabify, and untabify.
5
6File renamed from paragraph.py with functions added from editor.py.
7"""
8import re
9from tkinter.messagebox import askyesno
10from tkinter.simpledialog import askinteger
11from idlelib.config import idleConf
12
13
14class FormatParagraph:
15    """Format a paragraph, comment block, or selection to a max width.
16
17    Does basic, standard text formatting, and also understands Python
18    comment blocks. Thus, for editing Python source code, this
19    extension is really only suitable for reformatting these comment
20    blocks or triple-quoted strings.
21
22    Known problems with comment reformatting:
23    * If there is a selection marked, and the first line of the
24      selection is not complete, the block will probably not be detected
25      as comments, and will have the normal "text formatting" rules
26      applied.
27    * If a comment block has leading whitespace that mixes tabs and
28      spaces, they will not be considered part of the same block.
29    * Fancy comments, like this bulleted list, aren't handled :-)
30    """
31    def __init__(self, editwin):
32        self.editwin = editwin
33
34    @classmethod
35    def reload(cls):
36        cls.max_width = idleConf.GetOption('extensions', 'FormatParagraph',
37                                           'max-width', type='int', default=72)
38
39    def close(self):
40        self.editwin = None
41
42    def format_paragraph_event(self, event, limit=None):
43        """Formats paragraph to a max width specified in idleConf.
44
45        If text is selected, format_paragraph_event will start breaking lines
46        at the max width, starting from the beginning selection.
47
48        If no text is selected, format_paragraph_event uses the current
49        cursor location to determine the paragraph (lines of text surrounded
50        by blank lines) and formats it.
51
52        The length limit parameter is for testing with a known value.
53        """
54        limit = self.max_width if limit is None else limit
55        text = self.editwin.text
56        first, last = self.editwin.get_selection_indices()
57        if first and last:
58            data = text.get(first, last)
59            comment_header = get_comment_header(data)
60        else:
61            first, last, comment_header, data = \
62                    find_paragraph(text, text.index("insert"))
63        if comment_header:
64            newdata = reformat_comment(data, limit, comment_header)
65        else:
66            newdata = reformat_paragraph(data, limit)
67        text.tag_remove("sel", "1.0", "end")
68
69        if newdata != data:
70            text.mark_set("insert", first)
71            text.undo_block_start()
72            text.delete(first, last)
73            text.insert(first, newdata)
74            text.undo_block_stop()
75        else:
76            text.mark_set("insert", last)
77        text.see("insert")
78        return "break"
79
80
81FormatParagraph.reload()
82
83def find_paragraph(text, mark):
84    """Returns the start/stop indices enclosing the paragraph that mark is in.
85
86    Also returns the comment format string, if any, and paragraph of text
87    between the start/stop indices.
88    """
89    lineno, col = map(int, mark.split("."))
90    line = text.get("%d.0" % lineno, "%d.end" % lineno)
91
92    # Look for start of next paragraph if the index passed in is a blank line
93    while text.compare("%d.0" % lineno, "<", "end") and is_all_white(line):
94        lineno = lineno + 1
95        line = text.get("%d.0" % lineno, "%d.end" % lineno)
96    first_lineno = lineno
97    comment_header = get_comment_header(line)
98    comment_header_len = len(comment_header)
99
100    # Once start line found, search for end of paragraph (a blank line)
101    while get_comment_header(line)==comment_header and \
102              not is_all_white(line[comment_header_len:]):
103        lineno = lineno + 1
104        line = text.get("%d.0" % lineno, "%d.end" % lineno)
105    last = "%d.0" % lineno
106
107    # Search back to beginning of paragraph (first blank line before)
108    lineno = first_lineno - 1
109    line = text.get("%d.0" % lineno, "%d.end" % lineno)
110    while lineno > 0 and \
111              get_comment_header(line)==comment_header and \
112              not is_all_white(line[comment_header_len:]):
113        lineno = lineno - 1
114        line = text.get("%d.0" % lineno, "%d.end" % lineno)
115    first = "%d.0" % (lineno+1)
116
117    return first, last, comment_header, text.get(first, last)
118
119# This should perhaps be replaced with textwrap.wrap
120def reformat_paragraph(data, limit):
121    """Return data reformatted to specified width (limit)."""
122    lines = data.split("\n")
123    i = 0
124    n = len(lines)
125    while i < n and is_all_white(lines[i]):
126        i = i+1
127    if i >= n:
128        return data
129    indent1 = get_indent(lines[i])
130    if i+1 < n and not is_all_white(lines[i+1]):
131        indent2 = get_indent(lines[i+1])
132    else:
133        indent2 = indent1
134    new = lines[:i]
135    partial = indent1
136    while i < n and not is_all_white(lines[i]):
137        # XXX Should take double space after period (etc.) into account
138        words = re.split(r"(\s+)", lines[i])
139        for j in range(0, len(words), 2):
140            word = words[j]
141            if not word:
142                continue # Can happen when line ends in whitespace
143            if len((partial + word).expandtabs()) > limit and \
144                   partial != indent1:
145                new.append(partial.rstrip())
146                partial = indent2
147            partial = partial + word + " "
148            if j+1 < len(words) and words[j+1] != " ":
149                partial = partial + " "
150        i = i+1
151    new.append(partial.rstrip())
152    # XXX Should reformat remaining paragraphs as well
153    new.extend(lines[i:])
154    return "\n".join(new)
155
156def reformat_comment(data, limit, comment_header):
157    """Return data reformatted to specified width with comment header."""
158
159    # Remove header from the comment lines
160    lc = len(comment_header)
161    data = "\n".join(line[lc:] for line in data.split("\n"))
162    # Reformat to maxformatwidth chars or a 20 char width,
163    # whichever is greater.
164    format_width = max(limit - len(comment_header), 20)
165    newdata = reformat_paragraph(data, format_width)
166    # re-split and re-insert the comment header.
167    newdata = newdata.split("\n")
168    # If the block ends in a \n, we don't want the comment prefix
169    # inserted after it. (Im not sure it makes sense to reformat a
170    # comment block that is not made of complete lines, but whatever!)
171    # Can't think of a clean solution, so we hack away
172    block_suffix = ""
173    if not newdata[-1]:
174        block_suffix = "\n"
175        newdata = newdata[:-1]
176    return '\n'.join(comment_header+line for line in newdata) + block_suffix
177
178def is_all_white(line):
179    """Return True if line is empty or all whitespace."""
180
181    return re.match(r"^\s*$", line) is not None
182
183def get_indent(line):
184    """Return the initial space or tab indent of line."""
185    return re.match(r"^([ \t]*)", line).group()
186
187def get_comment_header(line):
188    """Return string with leading whitespace and '#' from line or ''.
189
190    A null return indicates that the line is not a comment line. A non-
191    null return, such as '    #', will be used to find the other lines of
192    a comment block with the same  indent.
193    """
194    m = re.match(r"^([ \t]*#*)", line)
195    if m is None: return ""
196    return m.group(1)
197
198
199# Copied from editor.py; importing it would cause an import cycle.
200_line_indent_re = re.compile(r'[ \t]*')
201
202def get_line_indent(line, tabwidth):
203    """Return a line's indentation as (# chars, effective # of spaces).
204
205    The effective # of spaces is the length after properly "expanding"
206    the tabs into spaces, as done by str.expandtabs(tabwidth).
207    """
208    m = _line_indent_re.match(line)
209    return m.end(), len(m.group().expandtabs(tabwidth))
210
211
212class FormatRegion:
213    "Format selected text (region)."
214
215    def __init__(self, editwin):
216        self.editwin = editwin
217
218    def get_region(self):
219        """Return line information about the selected text region.
220
221        If text is selected, the first and last indices will be
222        for the selection.  If there is no text selected, the
223        indices will be the current cursor location.
224
225        Return a tuple containing (first index, last index,
226            string representation of text, list of text lines).
227        """
228        text = self.editwin.text
229        first, last = self.editwin.get_selection_indices()
230        if first and last:
231            head = text.index(first + " linestart")
232            tail = text.index(last + "-1c lineend +1c")
233        else:
234            head = text.index("insert linestart")
235            tail = text.index("insert lineend +1c")
236        chars = text.get(head, tail)
237        lines = chars.split("\n")
238        return head, tail, chars, lines
239
240    def set_region(self, head, tail, chars, lines):
241        """Replace the text between the given indices.
242
243        Args:
244            head: Starting index of text to replace.
245            tail: Ending index of text to replace.
246            chars: Expected to be string of current text
247                between head and tail.
248            lines: List of new lines to insert between head
249                and tail.
250        """
251        text = self.editwin.text
252        newchars = "\n".join(lines)
253        if newchars == chars:
254            text.bell()
255            return
256        text.tag_remove("sel", "1.0", "end")
257        text.mark_set("insert", head)
258        text.undo_block_start()
259        text.delete(head, tail)
260        text.insert(head, newchars)
261        text.undo_block_stop()
262        text.tag_add("sel", head, "insert")
263
264    def indent_region_event(self, event=None):
265        "Indent region by indentwidth spaces."
266        head, tail, chars, lines = self.get_region()
267        for pos in range(len(lines)):
268            line = lines[pos]
269            if line:
270                raw, effective = get_line_indent(line, self.editwin.tabwidth)
271                effective = effective + self.editwin.indentwidth
272                lines[pos] = self.editwin._make_blanks(effective) + line[raw:]
273        self.set_region(head, tail, chars, lines)
274        return "break"
275
276    def dedent_region_event(self, event=None):
277        "Dedent region by indentwidth spaces."
278        head, tail, chars, lines = self.get_region()
279        for pos in range(len(lines)):
280            line = lines[pos]
281            if line:
282                raw, effective = get_line_indent(line, self.editwin.tabwidth)
283                effective = max(effective - self.editwin.indentwidth, 0)
284                lines[pos] = self.editwin._make_blanks(effective) + line[raw:]
285        self.set_region(head, tail, chars, lines)
286        return "break"
287
288    def comment_region_event(self, event=None):
289        """Comment out each line in region.
290
291        ## is appended to the beginning of each line to comment it out.
292        """
293        head, tail, chars, lines = self.get_region()
294        for pos in range(len(lines) - 1):
295            line = lines[pos]
296            lines[pos] = '##' + line
297        self.set_region(head, tail, chars, lines)
298        return "break"
299
300    def uncomment_region_event(self, event=None):
301        """Uncomment each line in region.
302
303        Remove ## or # in the first positions of a line.  If the comment
304        is not in the beginning position, this command will have no effect.
305        """
306        head, tail, chars, lines = self.get_region()
307        for pos in range(len(lines)):
308            line = lines[pos]
309            if not line:
310                continue
311            if line[:2] == '##':
312                line = line[2:]
313            elif line[:1] == '#':
314                line = line[1:]
315            lines[pos] = line
316        self.set_region(head, tail, chars, lines)
317        return "break"
318
319    def tabify_region_event(self, event=None):
320        "Convert leading spaces to tabs for each line in selected region."
321        head, tail, chars, lines = self.get_region()
322        tabwidth = self._asktabwidth()
323        if tabwidth is None:
324            return
325        for pos in range(len(lines)):
326            line = lines[pos]
327            if line:
328                raw, effective = get_line_indent(line, tabwidth)
329                ntabs, nspaces = divmod(effective, tabwidth)
330                lines[pos] = '\t' * ntabs + ' ' * nspaces + line[raw:]
331        self.set_region(head, tail, chars, lines)
332        return "break"
333
334    def untabify_region_event(self, event=None):
335        "Expand tabs to spaces for each line in region."
336        head, tail, chars, lines = self.get_region()
337        tabwidth = self._asktabwidth()
338        if tabwidth is None:
339            return
340        for pos in range(len(lines)):
341            lines[pos] = lines[pos].expandtabs(tabwidth)
342        self.set_region(head, tail, chars, lines)
343        return "break"
344
345    def _asktabwidth(self):
346        "Return value for tab width."
347        return askinteger(
348            "Tab width",
349            "Columns per tab? (2-16)",
350            parent=self.editwin.text,
351            initialvalue=self.editwin.indentwidth,
352            minvalue=2,
353            maxvalue=16)
354
355
356class Indents:
357    "Change future indents."
358
359    def __init__(self, editwin):
360        self.editwin = editwin
361
362    def toggle_tabs_event(self, event):
363        editwin = self.editwin
364        usetabs = editwin.usetabs
365        if askyesno(
366              "Toggle tabs",
367              "Turn tabs " + ("on", "off")[usetabs] +
368              "?\nIndent width " +
369              ("will be", "remains at")[usetabs] + " 8." +
370              "\n Note: a tab is always 8 columns",
371              parent=editwin.text):
372            editwin.usetabs = not usetabs
373            # Try to prevent inconsistent indentation.
374            # User must change indent width manually after using tabs.
375            editwin.indentwidth = 8
376        return "break"
377
378    def change_indentwidth_event(self, event):
379        editwin = self.editwin
380        new = askinteger(
381                  "Indent width",
382                  "New indent width (2-16)\n(Always use 8 when using tabs)",
383                  parent=editwin.text,
384                  initialvalue=editwin.indentwidth,
385                  minvalue=2,
386                  maxvalue=16)
387        if new and new != editwin.indentwidth and not editwin.usetabs:
388            editwin.indentwidth = new
389        return "break"
390
391
392class Rstrip:  # 'Strip Trailing Whitespace" on "Format" menu.
393    def __init__(self, editwin):
394        self.editwin = editwin
395
396    def do_rstrip(self, event=None):
397        text = self.editwin.text
398        undo = self.editwin.undo
399        undo.undo_block_start()
400
401        end_line = int(float(text.index('end')))
402        for cur in range(1, end_line):
403            txt = text.get('%i.0' % cur, '%i.end' % cur)
404            raw = len(txt)
405            cut = len(txt.rstrip())
406            # Since text.delete() marks file as changed, even if not,
407            # only call it when needed to actually delete something.
408            if cut < raw:
409                text.delete('%i.%i' % (cur, cut), '%i.end' % cur)
410
411        if (text.get('end-2c') == '\n'  # File ends with at least 1 newline;
412            and not hasattr(self.editwin, 'interp')):  # & is not Shell.
413            # Delete extra user endlines.
414            while (text.index('end-1c') > '1.0'  # Stop if file empty.
415                   and text.get('end-3c') == '\n'):
416                text.delete('end-3c')
417            # Because tk indexes are slice indexes and never raise,
418            # a file with only newlines will be emptied.
419            # patchcheck.py does the same.
420
421        undo.undo_block_stop()
422
423
424if __name__ == "__main__":
425    from unittest import main
426    main('idlelib.idle_test.test_format', verbosity=2, exit=False)
427