1"""CJK wrapping and filling. Fix Python issue24665.
2
3Copyright (C) 2015-2016, Florent Gallaire <fgallaire@gmail.com>
4Copyright (C) 1999-2001, Gregory P. Ward <gward@python.net>
5Copyright (C) 2002-2003, Python Software Foundation
6
7Python2 will stay broken forever:
8<https://bugs.python.org/issue24665>
9
10Originally developed for txt2tags <http://txt2tags.org>
11
12This program is free software: you can redistribute it and/or modify
13it under the terms of the GNU Lesser General Public License as
14published by the Free Software Foundation, either version 3 of the
15License, or (at your option) any later version.
16
17This program is distributed in the hope that it will be useful,
18but WITHOUT ANY WARRANTY; without even the implied warranty of
19MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20GNU Lesser General Public License for more details.
21
22You should have received a copy of the GNU Lesser General Public License
23along with this program.  If not, see <http://www.gnu.org/licenses/>.
24"""
25
26__version__ = '2.2'
27
28import textwrap
29import unicodedata
30import sys
31
32PY3 = sys.version[0] == '3'
33
34if PY3:
35    text_type = str
36else:
37    text_type = unicode
38
39
40def is_wide(char):
41    """is_wide(unicode_char) -> boolean
42
43    Return True if unicode_char is Fullwidth or Wide, False otherwise.
44    Fullwidth and Wide CJK chars are double-width.
45    """
46    return unicodedata.east_asian_width(char) in ('F', 'W')
47
48
49def cjklen(text):
50    """cjklen(object) -> integer
51
52    Return the real width of an unicode text, the len of any other type.
53    """
54    if not isinstance(text, text_type):
55        return len(text)
56    return sum(2 if is_wide(char) else 1 for char in text)
57
58
59def cjkslices(text, index):
60    """cjkslices(object, integer) -> object, object
61
62    Return the two slices of a text cut to the index.
63    """
64    if not isinstance(text, text_type):
65        return text[:index], text[index:]
66    if cjklen(text) <= index:
67        return text, u''
68    i = 1
69    # <= and i-1 to catch the last double length char of odd line
70    while cjklen(text[:i]) <= index:
71        i = i + 1
72    return text[:i-1], text[i-1:]
73
74
75class CJKWrapper(textwrap.TextWrapper):
76    """CJK fix for the Greg Ward textwrap lib."""
77    def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
78        if width < 1:
79            space_left = 1
80        else:
81            space_left = width - cur_len
82        if self.break_long_words:
83            chunk_start, chunk_end = cjkslices(reversed_chunks[-1], space_left)
84            cur_line.append(chunk_start)
85            reversed_chunks[-1] = chunk_end
86        elif not cur_line:
87            cur_line.append(reversed_chunks.pop())
88    def _wrap_chunks(self, chunks):
89        lines = []
90        if self.width <= 0:
91            raise ValueError("invalid width %r (must be > 0)" % self.width)
92        if self.width == 1 and (sum(cjklen(chunk) for chunk in chunks) >
93                                sum(len(chunk) for chunk in chunks)):
94            raise ValueError("invalid width 1 (must be > 1 when CJK chars)")
95        chunks.reverse()
96        while chunks:
97            cur_line = []
98            cur_len = 0
99            if lines:
100                indent = self.subsequent_indent
101            else:
102                indent = self.initial_indent
103            width = self.width - len(indent)
104            if self.drop_whitespace and chunks[-1].strip() == '' and lines:
105                del chunks[-1]
106            while chunks:
107                l = cjklen(chunks[-1])
108                if cur_len + l <= width:
109                    cur_line.append(chunks.pop())
110                    cur_len += l
111                else:
112                    break
113            if chunks and cjklen(chunks[-1]) > width:
114                self._handle_long_word(chunks, cur_line, cur_len, width)
115            if self.drop_whitespace and cur_line and cur_line[-1].strip() == '':
116                del cur_line[-1]
117            if cur_line:
118                lines.append(indent + ''.join(cur_line))
119        return lines
120
121
122# Convenience interface for CJKWrapper
123
124def wrap(text, width=70, **kwargs):
125    """Wrap a single paragraph of text, returning a list of wrapped lines.
126
127    Reformat the single paragraph in 'text' so it fits in lines of no
128    more than 'width' columns, and return a list of wrapped lines.  By
129    default, tabs in 'text' are expanded with string.expandtabs(), and
130    all other whitespace characters (including newline) are converted to
131    space.  See CJKWrapper class for available keyword args to customize
132    wrapping behaviour.
133    """
134    w = CJKWrapper(width=width, **kwargs)
135    return w.wrap(text)
136
137
138def fill(text, width=70, **kwargs):
139    """Fill a single paragraph of text, returning a new string.
140
141    Reformat the single paragraph in 'text' to fit in lines of no more
142    than 'width' columns, and return a new string containing the entire
143    wrapped paragraph.  As with wrap(), tabs are expanded and other
144    whitespace characters converted to space.  See CJKWrapper class for
145    available keyword args to customize wrapping behaviour.
146    """
147    w = CJKWrapper(width=width, **kwargs)
148    return w.fill(text)
149