1"""CJK wrapping and filling. Fix Python issue24665. 2 3Copyright (C) 2015-2016, Florent Gallaire <fgallaire@gmail.com> 4Copyright (C) 1999-2001, Gregory P. Ward <gward@python.net> 5Copyright (C) 2002-2003, Python Software Foundation 6 7Python2 will stay broken forever: 8<https://bugs.python.org/issue24665> 9 10Originally developed for txt2tags <http://txt2tags.org> 11 12This program is free software: you can redistribute it and/or modify 13it under the terms of the GNU Lesser General Public License as 14published by the Free Software Foundation, either version 3 of the 15License, or (at your option) any later version. 16 17This program is distributed in the hope that it will be useful, 18but WITHOUT ANY WARRANTY; without even the implied warranty of 19MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20GNU Lesser General Public License for more details. 21 22You should have received a copy of the GNU Lesser General Public License 23along with this program. If not, see <http://www.gnu.org/licenses/>. 24""" 25 26__version__ = '2.2' 27 28import textwrap 29import unicodedata 30import sys 31 32PY3 = sys.version[0] == '3' 33 34if PY3: 35 text_type = str 36else: 37 text_type = unicode 38 39 40def is_wide(char): 41 """is_wide(unicode_char) -> boolean 42 43 Return True if unicode_char is Fullwidth or Wide, False otherwise. 44 Fullwidth and Wide CJK chars are double-width. 45 """ 46 return unicodedata.east_asian_width(char) in ('F', 'W') 47 48 49def cjklen(text): 50 """cjklen(object) -> integer 51 52 Return the real width of an unicode text, the len of any other type. 53 """ 54 if not isinstance(text, text_type): 55 return len(text) 56 return sum(2 if is_wide(char) else 1 for char in text) 57 58 59def cjkslices(text, index): 60 """cjkslices(object, integer) -> object, object 61 62 Return the two slices of a text cut to the index. 63 """ 64 if not isinstance(text, text_type): 65 return text[:index], text[index:] 66 if cjklen(text) <= index: 67 return text, u'' 68 i = 1 69 # <= and i-1 to catch the last double length char of odd line 70 while cjklen(text[:i]) <= index: 71 i = i + 1 72 return text[:i-1], text[i-1:] 73 74 75class CJKWrapper(textwrap.TextWrapper): 76 """CJK fix for the Greg Ward textwrap lib.""" 77 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width): 78 if width < 1: 79 space_left = 1 80 else: 81 space_left = width - cur_len 82 if self.break_long_words: 83 chunk_start, chunk_end = cjkslices(reversed_chunks[-1], space_left) 84 cur_line.append(chunk_start) 85 reversed_chunks[-1] = chunk_end 86 elif not cur_line: 87 cur_line.append(reversed_chunks.pop()) 88 def _wrap_chunks(self, chunks): 89 lines = [] 90 if self.width <= 0: 91 raise ValueError("invalid width %r (must be > 0)" % self.width) 92 if self.width == 1 and (sum(cjklen(chunk) for chunk in chunks) > 93 sum(len(chunk) for chunk in chunks)): 94 raise ValueError("invalid width 1 (must be > 1 when CJK chars)") 95 chunks.reverse() 96 while chunks: 97 cur_line = [] 98 cur_len = 0 99 if lines: 100 indent = self.subsequent_indent 101 else: 102 indent = self.initial_indent 103 width = self.width - len(indent) 104 if self.drop_whitespace and chunks[-1].strip() == '' and lines: 105 del chunks[-1] 106 while chunks: 107 l = cjklen(chunks[-1]) 108 if cur_len + l <= width: 109 cur_line.append(chunks.pop()) 110 cur_len += l 111 else: 112 break 113 if chunks and cjklen(chunks[-1]) > width: 114 self._handle_long_word(chunks, cur_line, cur_len, width) 115 if self.drop_whitespace and cur_line and cur_line[-1].strip() == '': 116 del cur_line[-1] 117 if cur_line: 118 lines.append(indent + ''.join(cur_line)) 119 return lines 120 121 122# Convenience interface for CJKWrapper 123 124def wrap(text, width=70, **kwargs): 125 """Wrap a single paragraph of text, returning a list of wrapped lines. 126 127 Reformat the single paragraph in 'text' so it fits in lines of no 128 more than 'width' columns, and return a list of wrapped lines. By 129 default, tabs in 'text' are expanded with string.expandtabs(), and 130 all other whitespace characters (including newline) are converted to 131 space. See CJKWrapper class for available keyword args to customize 132 wrapping behaviour. 133 """ 134 w = CJKWrapper(width=width, **kwargs) 135 return w.wrap(text) 136 137 138def fill(text, width=70, **kwargs): 139 """Fill a single paragraph of text, returning a new string. 140 141 Reformat the single paragraph in 'text' to fit in lines of no more 142 than 'width' columns, and return a new string containing the entire 143 wrapped paragraph. As with wrap(), tabs are expanded and other 144 whitespace characters converted to space. See CJKWrapper class for 145 available keyword args to customize wrapping behaviour. 146 """ 147 w = CJKWrapper(width=width, **kwargs) 148 return w.fill(text) 149