1# i18n.py 2# 3# Copyright (C) 2012-2016 Red Hat, Inc. 4# 5# This copyrighted material is made available to anyone wishing to use, 6# modify, copy, or redistribute it subject to the terms and conditions of 7# the GNU General Public License v.2, or (at your option) any later version. 8# This program is distributed in the hope that it will be useful, but WITHOUT 9# ANY WARRANTY expressed or implied, including the implied warranties of 10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General 11# Public License for more details. You should have received a copy of the 12# GNU General Public License along with this program; if not, write to the 13# Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 14# 02110-1301, USA. Any Red Hat trademarks that are incorporated in the 15# source code or documentation are not subject to the GNU General Public 16# License and may only be used or replicated with the express permission of 17# Red Hat, Inc. 18# 19 20from __future__ import print_function 21from __future__ import unicode_literals 22from dnf.pycomp import unicode 23 24import dnf 25import locale 26import os 27import signal 28import sys 29import unicodedata 30 31""" 32Centralize i18n stuff here. Must be unittested. 33""" 34 35class UnicodeStream(object): 36 def __init__(self, stream, encoding): 37 self.stream = stream 38 self.encoding = encoding 39 40 def write(self, s): 41 if not isinstance(s, str): 42 s = (s.decode(self.encoding, 'replace') if dnf.pycomp.PY3 else 43 s.encode(self.encoding, 'replace')) 44 try: 45 self.stream.write(s) 46 except UnicodeEncodeError: 47 s_bytes = s.encode(self.stream.encoding, 'backslashreplace') 48 if hasattr(self.stream, 'buffer'): 49 self.stream.buffer.write(s_bytes) 50 else: 51 s = s_bytes.decode(self.stream.encoding, 'ignore') 52 self.stream.write(s) 53 54 55 def __getattr__(self, name): 56 return getattr(self.stream, name) 57 58def _full_ucd_support(encoding): 59 """Return true if encoding can express any Unicode character. 60 61 Even if an encoding can express all accented letters in the given language, 62 we can't generally settle for it in DNF since sometimes we output special 63 characters like the registered trademark symbol (U+00AE) and surprisingly 64 many national non-unicode encodings, including e.g. ASCII and ISO-8859-2, 65 don't contain it. 66 67 """ 68 if encoding is None: 69 return False 70 lower = encoding.lower() 71 if lower.startswith('utf-') or lower.startswith('utf_'): 72 return True 73 return False 74 75def _guess_encoding(): 76 """ Take the best shot at the current system's string encoding. """ 77 encoding = locale.getpreferredencoding(False) 78 return 'utf-8' if encoding.startswith("ANSI") else encoding 79 80def setup_locale(): 81 try: 82 dnf.pycomp.setlocale(locale.LC_ALL, '') 83 except locale.Error: 84 # default to C.UTF-8 or C locale if we got a failure. 85 try: 86 dnf.pycomp.setlocale(locale.LC_ALL, 'C.UTF-8') 87 os.environ['LC_ALL'] = 'C.UTF-8' 88 except locale.Error: 89 dnf.pycomp.setlocale(locale.LC_ALL, 'C') 90 os.environ['LC_ALL'] = 'C' 91 print('Failed to set locale, defaulting to {}'.format(os.environ['LC_ALL']), 92 file=sys.stderr) 93 94def setup_stdout(): 95 """ Check that stdout is of suitable encoding and handle the situation if 96 not. 97 98 Returns True if stdout was of suitable encoding already and no changes 99 were needed. 100 """ 101 stdout = sys.stdout 102 if not stdout.isatty(): 103 signal.signal(signal.SIGPIPE, signal.SIG_DFL) 104 try: 105 encoding = stdout.encoding 106 except AttributeError: 107 encoding = None 108 if not _full_ucd_support(encoding): 109 sys.stdout = UnicodeStream(stdout, _guess_encoding()) 110 return False 111 return True 112 113 114def ucd_input(ucstring): 115 # :api, deprecated in 2.0.0, will be erased when python2 is abandoned 116 """ It uses print instead of passing the prompt to raw_input. 117 118 raw_input doesn't encode the passed string and the output 119 goes into stderr 120 """ 121 print(ucstring, end='') 122 return dnf.pycomp.raw_input() 123 124 125def ucd(obj): 126 # :api, deprecated in 2.0.0, will be erased when python2 is abandoned 127 """ Like the builtin unicode() but tries to use a reasonable encoding. """ 128 if dnf.pycomp.PY3: 129 if dnf.pycomp.is_py3bytes(obj): 130 return str(obj, _guess_encoding(), errors='ignore') 131 elif isinstance(obj, str): 132 return obj 133 return str(obj) 134 else: 135 if isinstance(obj, dnf.pycomp.unicode): 136 return obj 137 if hasattr(obj, '__unicode__'): 138 # see the doc for the unicode() built-in. The logic here is: if obj 139 # implements __unicode__, let it take a crack at it, but handle the 140 # situation if it fails: 141 try: 142 return dnf.pycomp.unicode(obj) 143 except UnicodeError: 144 pass 145 return dnf.pycomp.unicode(str(obj), _guess_encoding(), errors='ignore') 146 147 148# functions for formatting output according to terminal width, 149# They should be used instead of build-in functions to count on different 150# widths of Unicode characters 151 152def _exact_width_char(uchar): 153 return 2 if unicodedata.east_asian_width(uchar) in ('W', 'F') else 1 154 155 156def chop_str(msg, chop=None): 157 """ Return the textual width of a Unicode string, chopping it to 158 a specified value. This is what you want to use instead of %.*s, as it 159 does the "right" thing with regard to different Unicode character width 160 Eg. "%.*s" % (10, msg) <= becomes => "%s" % (chop_str(msg, 10)) """ 161 162 if chop is None: 163 return exact_width(msg), msg 164 165 width = 0 166 chopped_msg = "" 167 for char in msg: 168 char_width = _exact_width_char(char) 169 if width + char_width > chop: 170 break 171 chopped_msg += char 172 width += char_width 173 return width, chopped_msg 174 175 176def exact_width(msg): 177 """ Calculates width of char at terminal screen 178 (Asian char counts for two) """ 179 return sum(_exact_width_char(c) for c in msg) 180 181 182def fill_exact_width(msg, fill, chop=None, left=True, prefix='', suffix=''): 183 """ Expand a msg to a specified "width" or chop to same. 184 Expansion can be left or right. This is what you want to use instead of 185 %*.*s, as it does the "right" thing with regard to different Unicode 186 character width. 187 prefix and suffix should be used for "invisible" bytes, like 188 highlighting. 189 190 Examples: 191 192 ``"%-*.*s" % (10, 20, msg)`` becomes 193 ``"%s" % (fill_exact_width(msg, 10, 20))``. 194 195 ``"%20.10s" % (msg)`` becomes 196 ``"%s" % (fill_exact_width(msg, 20, 10, left=False))``. 197 198 ``"%s%.10s%s" % (pre, msg, suf)`` becomes 199 ``"%s" % (fill_exact_width(msg, 0, 10, prefix=pre, suffix=suf))``. 200 """ 201 width, msg = chop_str(msg, chop) 202 203 if width >= fill: 204 if prefix or suffix: 205 msg = ''.join([prefix, msg, suffix]) 206 else: 207 extra = " " * (fill - width) 208 if left: 209 msg = ''.join([prefix, msg, suffix, extra]) 210 else: 211 msg = ''.join([extra, prefix, msg, suffix]) 212 213 return msg 214 215 216def textwrap_fill(text, width=70, initial_indent='', subsequent_indent=''): 217 """ Works like we want textwrap.wrap() to work, uses Unicode strings 218 and doesn't screw up lists/blocks/etc. """ 219 220 def _indent_at_beg(line): 221 count = 0 222 byte = 'X' 223 for byte in line: 224 if byte != ' ': 225 break 226 count += 1 227 if byte not in ("-", "*", ".", "o", '\xe2'): 228 return count, 0 229 list_chr = chop_str(line[count:], 1)[1] 230 if list_chr in ("-", "*", ".", "o", 231 "\u2022", "\u2023", "\u2218"): 232 nxt = _indent_at_beg(line[count+len(list_chr):]) 233 nxt = nxt[1] or nxt[0] 234 if nxt: 235 return count, count + 1 + nxt 236 return count, 0 237 238 text = text.rstrip('\n') 239 lines = text.replace('\t', ' ' * 8).split('\n') 240 241 ret = [] 242 indent = initial_indent 243 wrap_last = False 244 csab = 0 245 cspc_indent = 0 246 for line in lines: 247 line = line.rstrip(' ') 248 (lsab, lspc_indent) = (csab, cspc_indent) 249 (csab, cspc_indent) = _indent_at_beg(line) 250 force_nl = False # We want to stop wrapping under "certain" conditions: 251 if wrap_last and cspc_indent: # if line starts a list or 252 force_nl = True 253 if wrap_last and csab == len(line): # is empty line 254 force_nl = True 255 # if line doesn't continue a list and is "block indented" 256 if wrap_last and not lspc_indent: 257 if csab >= 4 and csab != lsab: 258 force_nl = True 259 if force_nl: 260 ret.append(indent.rstrip(' ')) 261 indent = subsequent_indent 262 wrap_last = False 263 if csab == len(line): # empty line, remove spaces to make it easier. 264 line = '' 265 if wrap_last: 266 line = line.lstrip(' ') 267 cspc_indent = lspc_indent 268 269 if exact_width(indent + line) <= width: 270 wrap_last = False 271 ret.append(indent + line) 272 indent = subsequent_indent 273 continue 274 275 wrap_last = True 276 words = line.split(' ') 277 line = indent 278 spcs = cspc_indent 279 if not spcs and csab >= 4: 280 spcs = csab 281 for word in words: 282 if (width < exact_width(line + word)) and \ 283 (exact_width(line) > exact_width(subsequent_indent)): 284 ret.append(line.rstrip(' ')) 285 line = subsequent_indent + ' ' * spcs 286 line += word 287 line += ' ' 288 indent = line.rstrip(' ') + ' ' 289 if wrap_last: 290 ret.append(indent.rstrip(' ')) 291 292 return '\n'.join(ret) 293 294 295def select_short_long(width, msg_short, msg_long): 296 """ Automatically selects the short (abbreviated) or long (full) message 297 depending on whether we have enough screen space to display the full 298 message or not. If a caller by mistake passes a long string as 299 msg_short and a short string as a msg_long this function recognizes 300 the mistake and swaps the arguments. This function is especially useful 301 in the i18n context when you cannot predict how long are the translated 302 messages. 303 304 Limitations: 305 306 1. If msg_short is longer than width you will still get an overflow. 307 This function does not abbreviate the string. 308 2. You are not obliged to provide an actually abbreviated string, it is 309 perfectly correct to pass the same string twice if you don't want 310 any abbreviation. However, if you provide two different strings but 311 having the same width this function is unable to recognize which one 312 is correct and you should assume that it is unpredictable which one 313 is returned. 314 315 Example: 316 317 ``select_short_long (10, _("Repo"), _("Repository"))`` 318 319 will return "Repository" in English but the results in other languages 320 may be different. """ 321 width_short = exact_width(msg_short) 322 width_long = exact_width(msg_long) 323 # If we have two strings of the same width: 324 if width_short == width_long: 325 return msg_long 326 # If the short string is wider than the long string: 327 elif width_short > width_long: 328 return msg_short if width_short <= width else msg_long 329 # The regular case: 330 else: 331 return msg_long if width_long <= width else msg_short 332 333 334def translation(name): 335 # :api, deprecated in 2.0.0, will be erased when python2 is abandoned 336 """ Easy gettext translations setup based on given domain name """ 337 338 setup_locale() 339 def ucd_wrapper(fnc): 340 return lambda *w: ucd(fnc(*w)) 341 t = dnf.pycomp.gettext.translation(name, fallback=True) 342 return map(ucd_wrapper, dnf.pycomp.gettext_setup(t)) 343 344 345def pgettext(context, message): 346 result = _(context + chr(4) + message) 347 if "\004" in result: 348 return message 349 else: 350 return result 351 352# setup translations 353_, P_ = translation("dnf") 354C_ = pgettext 355