1"""Extension to format a paragraph or selection to a max width.
2
3Does basic, standard text formatting, and also understands Python
4comment blocks. Thus, for editing Python source code, this
5extension is really only suitable for reformatting these comment
6blocks or triple-quoted strings.
7
8Known problems with comment reformatting:
9* If there is a selection marked, and the first line of the
10  selection is not complete, the block will probably not be detected
11  as comments, and will have the normal "text formatting" rules
12  applied.
13* If a comment block has leading whitespace that mixes tabs and
14  spaces, they will not be considered part of the same block.
15* Fancy comments, like this bulleted list, aren't handled :-)
16"""
17
18import re
19from idlelib.configHandler import idleConf
20
21class FormatParagraph:
22
23    menudefs = [
24        ('format', [   # /s/edit/format   dscherer@cmu.edu
25            ('Format Paragraph', '<<format-paragraph>>'),
26         ])
27    ]
28
29    def __init__(self, editwin):
30        self.editwin = editwin
31
32    def close(self):
33        self.editwin = None
34
35    def format_paragraph_event(self, event, limit=None):
36        """Formats paragraph to a max width specified in idleConf.
37
38        If text is selected, format_paragraph_event will start breaking lines
39        at the max width, starting from the beginning selection.
40
41        If no text is selected, format_paragraph_event uses the current
42        cursor location to determine the paragraph (lines of text surrounded
43        by blank lines) and formats it.
44
45        The length limit parameter is for testing with a known value.
46        """
47        if limit is None:
48            # The default length limit is that defined by pep8
49            limit = idleConf.GetOption(
50                'extensions', 'FormatParagraph', 'max-width',
51                type='int', default=72)
52        text = self.editwin.text
53        first, last = self.editwin.get_selection_indices()
54        if first and last:
55            data = text.get(first, last)
56            comment_header = get_comment_header(data)
57        else:
58            first, last, comment_header, data = \
59                    find_paragraph(text, text.index("insert"))
60        if comment_header:
61            newdata = reformat_comment(data, limit, comment_header)
62        else:
63            newdata = reformat_paragraph(data, limit)
64        text.tag_remove("sel", "1.0", "end")
65
66        if newdata != data:
67            text.mark_set("insert", first)
68            text.undo_block_start()
69            text.delete(first, last)
70            text.insert(first, newdata)
71            text.undo_block_stop()
72        else:
73            text.mark_set("insert", last)
74        text.see("insert")
75        return "break"
76
77def find_paragraph(text, mark):
78    """Returns the start/stop indices enclosing the paragraph that mark is in.
79
80    Also returns the comment format string, if any, and paragraph of text
81    between the start/stop indices.
82    """
83    lineno, col = map(int, mark.split("."))
84    line = text.get("%d.0" % lineno, "%d.end" % lineno)
85
86    # Look for start of next paragraph if the index passed in is a blank line
87    while text.compare("%d.0" % lineno, "<", "end") and is_all_white(line):
88        lineno = lineno + 1
89        line = text.get("%d.0" % lineno, "%d.end" % lineno)
90    first_lineno = lineno
91    comment_header = get_comment_header(line)
92    comment_header_len = len(comment_header)
93
94    # Once start line found, search for end of paragraph (a blank line)
95    while get_comment_header(line)==comment_header and \
96              not is_all_white(line[comment_header_len:]):
97        lineno = lineno + 1
98        line = text.get("%d.0" % lineno, "%d.end" % lineno)
99    last = "%d.0" % lineno
100
101    # Search back to beginning of paragraph (first blank line before)
102    lineno = first_lineno - 1
103    line = text.get("%d.0" % lineno, "%d.end" % lineno)
104    while lineno > 0 and \
105              get_comment_header(line)==comment_header and \
106              not is_all_white(line[comment_header_len:]):
107        lineno = lineno - 1
108        line = text.get("%d.0" % lineno, "%d.end" % lineno)
109    first = "%d.0" % (lineno+1)
110
111    return first, last, comment_header, text.get(first, last)
112
113# This should perhaps be replaced with textwrap.wrap
114def reformat_paragraph(data, limit):
115    """Return data reformatted to specified width (limit)."""
116    lines = data.split("\n")
117    i = 0
118    n = len(lines)
119    while i < n and is_all_white(lines[i]):
120        i = i+1
121    if i >= n:
122        return data
123    indent1 = get_indent(lines[i])
124    if i+1 < n and not is_all_white(lines[i+1]):
125        indent2 = get_indent(lines[i+1])
126    else:
127        indent2 = indent1
128    new = lines[:i]
129    partial = indent1
130    while i < n and not is_all_white(lines[i]):
131        # XXX Should take double space after period (etc.) into account
132        words = re.split("(\s+)", lines[i])
133        for j in range(0, len(words), 2):
134            word = words[j]
135            if not word:
136                continue # Can happen when line ends in whitespace
137            if len((partial + word).expandtabs()) > limit and \
138                   partial != indent1:
139                new.append(partial.rstrip())
140                partial = indent2
141            partial = partial + word + " "
142            if j+1 < len(words) and words[j+1] != " ":
143                partial = partial + " "
144        i = i+1
145    new.append(partial.rstrip())
146    # XXX Should reformat remaining paragraphs as well
147    new.extend(lines[i:])
148    return "\n".join(new)
149
150def reformat_comment(data, limit, comment_header):
151    """Return data reformatted to specified width with comment header."""
152
153    # Remove header from the comment lines
154    lc = len(comment_header)
155    data = "\n".join(line[lc:] for line in data.split("\n"))
156    # Reformat to maxformatwidth chars or a 20 char width,
157    # whichever is greater.
158    format_width = max(limit - len(comment_header), 20)
159    newdata = reformat_paragraph(data, format_width)
160    # re-split and re-insert the comment header.
161    newdata = newdata.split("\n")
162    # If the block ends in a \n, we dont want the comment prefix
163    # inserted after it. (Im not sure it makes sense to reformat a
164    # comment block that is not made of complete lines, but whatever!)
165    # Can't think of a clean solution, so we hack away
166    block_suffix = ""
167    if not newdata[-1]:
168        block_suffix = "\n"
169        newdata = newdata[:-1]
170    return '\n'.join(comment_header+line for line in newdata) + block_suffix
171
172def is_all_white(line):
173    """Return True if line is empty or all whitespace."""
174
175    return re.match(r"^\s*$", line) is not None
176
177def get_indent(line):
178    """Return the initial space or tab indent of line."""
179    return re.match(r"^([ \t]*)", line).group()
180
181def get_comment_header(line):
182    """Return string with leading whitespace and '#' from line or ''.
183
184    A null return indicates that the line is not a comment line. A non-
185    null return, such as '    #', will be used to find the other lines of
186    a comment block with the same  indent.
187    """
188    m = re.match(r"^([ \t]*#*)", line)
189    if m is None: return ""
190    return m.group(1)
191
192if __name__ == "__main__":
193    import unittest
194    unittest.main('idlelib.idle_test.test_formatparagraph',
195            verbosity=2, exit=False)
196