1# i18n.py
2#
3# Copyright (C) 2012-2016 Red Hat, Inc.
4#
5# This copyrighted material is made available to anyone wishing to use,
6# modify, copy, or redistribute it subject to the terms and conditions of
7# the GNU General Public License v.2, or (at your option) any later version.
8# This program is distributed in the hope that it will be useful, but WITHOUT
9# ANY WARRANTY expressed or implied, including the implied warranties of
10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
11# Public License for more details.  You should have received a copy of the
12# GNU General Public License along with this program; if not, write to the
13# Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
14# 02110-1301, USA.  Any Red Hat trademarks that are incorporated in the
15# source code or documentation are not subject to the GNU General Public
16# License and may only be used or replicated with the express permission of
17# Red Hat, Inc.
18#
19
20from __future__ import print_function
21from __future__ import unicode_literals
22from dnf.pycomp import unicode
23
24import dnf
25import locale
26import os
27import signal
28import sys
29import unicodedata
30
31"""
32Centralize i18n stuff here. Must be unittested.
33"""
34
35class UnicodeStream(object):
36    def __init__(self, stream, encoding):
37        self.stream = stream
38        self.encoding = encoding
39
40    def write(self, s):
41        if not isinstance(s, str):
42            s = (s.decode(self.encoding, 'replace') if dnf.pycomp.PY3 else
43                 s.encode(self.encoding, 'replace'))
44        try:
45            self.stream.write(s)
46        except UnicodeEncodeError:
47            s_bytes = s.encode(self.stream.encoding, 'backslashreplace')
48            if hasattr(self.stream, 'buffer'):
49                self.stream.buffer.write(s_bytes)
50            else:
51                s = s_bytes.decode(self.stream.encoding, 'ignore')
52                self.stream.write(s)
53
54
55    def __getattr__(self, name):
56        return getattr(self.stream, name)
57
58def _full_ucd_support(encoding):
59    """Return true if encoding can express any Unicode character.
60
61    Even if an encoding can express all accented letters in the given language,
62    we can't generally settle for it in DNF since sometimes we output special
63    characters like the registered trademark symbol (U+00AE) and surprisingly
64    many national non-unicode encodings, including e.g. ASCII and ISO-8859-2,
65    don't contain it.
66
67    """
68    if encoding is None:
69        return False
70    lower = encoding.lower()
71    if lower.startswith('utf-') or lower.startswith('utf_'):
72        return True
73    return False
74
75def _guess_encoding():
76    """ Take the best shot at the current system's string encoding. """
77    encoding = locale.getpreferredencoding(False)
78    return 'utf-8' if encoding.startswith("ANSI") else encoding
79
80def setup_locale():
81    try:
82        dnf.pycomp.setlocale(locale.LC_ALL, '')
83    except locale.Error:
84        # default to C.UTF-8 or C locale if we got a failure.
85        try:
86            dnf.pycomp.setlocale(locale.LC_ALL, 'C.UTF-8')
87            os.environ['LC_ALL'] = 'C.UTF-8'
88        except locale.Error:
89            dnf.pycomp.setlocale(locale.LC_ALL, 'C')
90            os.environ['LC_ALL'] = 'C'
91        print('Failed to set locale, defaulting to {}'.format(os.environ['LC_ALL']),
92              file=sys.stderr)
93
94def setup_stdout():
95    """ Check that stdout is of suitable encoding and handle the situation if
96        not.
97
98        Returns True if stdout was of suitable encoding already and no changes
99        were needed.
100    """
101    stdout = sys.stdout
102    if not stdout.isatty():
103        signal.signal(signal.SIGPIPE, signal.SIG_DFL)
104    try:
105        encoding = stdout.encoding
106    except AttributeError:
107        encoding = None
108    if not _full_ucd_support(encoding):
109        sys.stdout = UnicodeStream(stdout, _guess_encoding())
110        return False
111    return True
112
113
114def ucd_input(ucstring):
115    # :api, deprecated in 2.0.0, will be erased when python2 is abandoned
116    """ It uses print instead of passing the prompt to raw_input.
117
118        raw_input doesn't encode the passed string and the output
119        goes into stderr
120    """
121    print(ucstring, end='')
122    return dnf.pycomp.raw_input()
123
124
125def ucd(obj):
126    # :api, deprecated in 2.0.0, will be erased when python2 is abandoned
127    """ Like the builtin unicode() but tries to use a reasonable encoding. """
128    if dnf.pycomp.PY3:
129        if dnf.pycomp.is_py3bytes(obj):
130            return str(obj, _guess_encoding(), errors='ignore')
131        elif isinstance(obj, str):
132            return obj
133        return str(obj)
134    else:
135        if isinstance(obj, dnf.pycomp.unicode):
136            return obj
137        if hasattr(obj, '__unicode__'):
138            # see the doc for the unicode() built-in. The logic here is: if obj
139            # implements __unicode__, let it take a crack at it, but handle the
140            # situation if it fails:
141            try:
142                return dnf.pycomp.unicode(obj)
143            except UnicodeError:
144                pass
145        return dnf.pycomp.unicode(str(obj), _guess_encoding(), errors='ignore')
146
147
148# functions for formatting output according to terminal width,
149# They should be used instead of build-in functions to count on different
150# widths of Unicode characters
151
152def _exact_width_char(uchar):
153    return 2 if unicodedata.east_asian_width(uchar) in ('W', 'F') else 1
154
155
156def chop_str(msg, chop=None):
157    """ Return the textual width of a Unicode string, chopping it to
158        a specified value. This is what you want to use instead of %.*s, as it
159        does the "right" thing with regard to different Unicode character width
160        Eg. "%.*s" % (10, msg)   <= becomes => "%s" % (chop_str(msg, 10)) """
161
162    if chop is None:
163        return exact_width(msg), msg
164
165    width = 0
166    chopped_msg = ""
167    for char in msg:
168        char_width = _exact_width_char(char)
169        if width + char_width > chop:
170            break
171        chopped_msg += char
172        width += char_width
173    return width, chopped_msg
174
175
176def exact_width(msg):
177    """ Calculates width of char at terminal screen
178        (Asian char counts for two) """
179    return sum(_exact_width_char(c) for c in msg)
180
181
182def fill_exact_width(msg, fill, chop=None, left=True, prefix='', suffix=''):
183    """ Expand a msg to a specified "width" or chop to same.
184        Expansion can be left or right. This is what you want to use instead of
185        %*.*s, as it does the "right" thing with regard to different Unicode
186        character width.
187        prefix and suffix should be used for "invisible" bytes, like
188        highlighting.
189
190        Examples:
191
192        ``"%-*.*s" % (10, 20, msg)`` becomes
193            ``"%s" % (fill_exact_width(msg, 10, 20))``.
194
195        ``"%20.10s" % (msg)`` becomes
196            ``"%s" % (fill_exact_width(msg, 20, 10, left=False))``.
197
198        ``"%s%.10s%s" % (pre, msg, suf)`` becomes
199            ``"%s" % (fill_exact_width(msg, 0, 10, prefix=pre, suffix=suf))``.
200        """
201    width, msg = chop_str(msg, chop)
202
203    if width >= fill:
204        if prefix or suffix:
205            msg = ''.join([prefix, msg, suffix])
206    else:
207        extra = " " * (fill - width)
208        if left:
209            msg = ''.join([prefix, msg, suffix, extra])
210        else:
211            msg = ''.join([extra, prefix, msg, suffix])
212
213    return msg
214
215
216def textwrap_fill(text, width=70, initial_indent='', subsequent_indent=''):
217    """ Works like we want textwrap.wrap() to work, uses Unicode strings
218        and doesn't screw up lists/blocks/etc. """
219
220    def _indent_at_beg(line):
221        count = 0
222        byte = 'X'
223        for byte in line:
224            if byte != ' ':
225                break
226            count += 1
227        if byte not in ("-", "*", ".", "o", '\xe2'):
228            return count, 0
229        list_chr = chop_str(line[count:], 1)[1]
230        if list_chr in ("-", "*", ".", "o",
231                        "\u2022", "\u2023", "\u2218"):
232            nxt = _indent_at_beg(line[count+len(list_chr):])
233            nxt = nxt[1] or nxt[0]
234            if nxt:
235                return count, count + 1 + nxt
236        return count, 0
237
238    text = text.rstrip('\n')
239    lines = text.replace('\t', ' ' * 8).split('\n')
240
241    ret = []
242    indent = initial_indent
243    wrap_last = False
244    csab = 0
245    cspc_indent = 0
246    for line in lines:
247        line = line.rstrip(' ')
248        (lsab, lspc_indent) = (csab, cspc_indent)
249        (csab, cspc_indent) = _indent_at_beg(line)
250        force_nl = False # We want to stop wrapping under "certain" conditions:
251        if wrap_last and cspc_indent:        # if line starts a list or
252            force_nl = True
253        if wrap_last and csab == len(line):  # is empty line
254            force_nl = True
255        # if line doesn't continue a list and is "block indented"
256        if wrap_last and not lspc_indent:
257            if csab >= 4 and csab != lsab:
258                force_nl = True
259        if force_nl:
260            ret.append(indent.rstrip(' '))
261            indent = subsequent_indent
262            wrap_last = False
263        if csab == len(line):  # empty line, remove spaces to make it easier.
264            line = ''
265        if wrap_last:
266            line = line.lstrip(' ')
267            cspc_indent = lspc_indent
268
269        if exact_width(indent + line) <= width:
270            wrap_last = False
271            ret.append(indent + line)
272            indent = subsequent_indent
273            continue
274
275        wrap_last = True
276        words = line.split(' ')
277        line = indent
278        spcs = cspc_indent
279        if not spcs and csab >= 4:
280            spcs = csab
281        for word in words:
282            if (width < exact_width(line + word)) and \
283               (exact_width(line) > exact_width(subsequent_indent)):
284                ret.append(line.rstrip(' '))
285                line = subsequent_indent + ' ' * spcs
286            line += word
287            line += ' '
288        indent = line.rstrip(' ') + ' '
289    if wrap_last:
290        ret.append(indent.rstrip(' '))
291
292    return '\n'.join(ret)
293
294
295def select_short_long(width, msg_short, msg_long):
296    """ Automatically selects the short (abbreviated) or long (full) message
297        depending on whether we have enough screen space to display the full
298        message or not. If a caller by mistake passes a long string as
299        msg_short and a short string as a msg_long this function recognizes
300        the mistake and swaps the arguments. This function is especially useful
301        in the i18n context when you cannot predict how long are the translated
302        messages.
303
304        Limitations:
305
306        1. If msg_short is longer than width you will still get an overflow.
307           This function does not abbreviate the string.
308        2. You are not obliged to provide an actually abbreviated string, it is
309           perfectly correct to pass the same string twice if you don't want
310           any abbreviation. However, if you provide two different strings but
311           having the same width this function is unable to recognize which one
312           is correct and you should assume that it is unpredictable which one
313           is returned.
314
315       Example:
316
317       ``select_short_long (10, _("Repo"), _("Repository"))``
318
319       will return "Repository" in English but the results in other languages
320       may be different. """
321    width_short = exact_width(msg_short)
322    width_long = exact_width(msg_long)
323    # If we have two strings of the same width:
324    if width_short == width_long:
325        return msg_long
326    # If the short string is wider than the long string:
327    elif width_short > width_long:
328        return msg_short if width_short <= width else msg_long
329    # The regular case:
330    else:
331        return msg_long if width_long <= width else msg_short
332
333
334def translation(name):
335    # :api, deprecated in 2.0.0, will be erased when python2 is abandoned
336    """ Easy gettext translations setup based on given domain name """
337
338    setup_locale()
339    def ucd_wrapper(fnc):
340        return lambda *w: ucd(fnc(*w))
341    t = dnf.pycomp.gettext.translation(name, fallback=True)
342    return map(ucd_wrapper, dnf.pycomp.gettext_setup(t))
343
344
345def pgettext(context, message):
346    result = _(context + chr(4) + message)
347    if "\004" in result:
348        return message
349    else:
350        return result
351
352# setup translations
353_, P_ = translation("dnf")
354C_ = pgettext
355