1# -*- coding: utf-8 -*-
2# config.py
3# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
4#
5# This module is part of GitPython and is released under
6# the BSD License: http://www.opensource.org/licenses/bsd-license.php
7"""utilities to help provide compatibility with python 3"""
8# flake8: noqa
9
10import locale
11import os
12import sys
13import codecs
14
15
16from gitdb.utils.compat import (
17    xrange,
18    MAXSIZE,    # @UnusedImport
19    izip,       # @UnusedImport
20)
21from gitdb.utils.encoding import (
22    string_types,    # @UnusedImport
23    text_type,       # @UnusedImport
24    force_bytes,     # @UnusedImport
25    force_text       # @UnusedImport
26)
27
28
29PY3 = sys.version_info[0] >= 3
30is_win = (os.name == 'nt')
31is_posix = (os.name == 'posix')
32is_darwin = (os.name == 'darwin')
33defenc = sys.getdefaultencoding()
34
35if PY3:
36    import io
37    FileType = io.IOBase
38
39    def byte_ord(b):
40        return b
41
42    def bchr(n):
43        return bytes([n])
44
45    def mviter(d):
46        return d.values()
47
48    range = xrange  # @ReservedAssignment
49    unicode = str
50    binary_type = bytes
51else:
52    FileType = file  # @UndefinedVariable on PY3
53    # usually, this is just ascii, which might not enough for our encoding needs
54    # Unless it's set specifically, we override it to be utf-8
55    if defenc == 'ascii':
56        defenc = 'utf-8'
57    byte_ord = ord
58    bchr = chr
59    unicode = unicode
60    binary_type = str
61    range = xrange  # @ReservedAssignment
62
63    def mviter(d):
64        return d.itervalues()
65
66
67def safe_decode(s):
68    """Safely decodes a binary string to unicode"""
69    if isinstance(s, unicode):
70        return s
71    elif isinstance(s, bytes):
72        return s.decode(defenc, 'surrogateescape')
73    elif s is not None:
74        raise TypeError('Expected bytes or text, but got %r' % (s,))
75
76
77def safe_encode(s):
78    """Safely decodes a binary string to unicode"""
79    if isinstance(s, unicode):
80        return s.encode(defenc)
81    elif isinstance(s, bytes):
82        return s
83    elif s is not None:
84        raise TypeError('Expected bytes or text, but got %r' % (s,))
85
86
87def win_encode(s):
88    """Encode unicodes for process arguments on Windows."""
89    if isinstance(s, unicode):
90        return s.encode(locale.getpreferredencoding(False))
91    elif isinstance(s, bytes):
92        return s
93    elif s is not None:
94        raise TypeError('Expected bytes or text, but got %r' % (s,))
95
96
97def with_metaclass(meta, *bases):
98    """copied from https://github.com/Byron/bcore/blob/master/src/python/butility/future.py#L15"""
99    class metaclass(meta):
100        __call__ = type.__call__
101        __init__ = type.__init__
102
103        def __new__(cls, name, nbases, d):
104            if nbases is None:
105                return type.__new__(cls, name, (), d)
106            # There may be clients who rely on this attribute to be set to a reasonable value, which is why
107            # we set the __metaclass__ attribute explicitly
108            if not PY3 and '___metaclass__' not in d:
109                d['__metaclass__'] = meta
110            return meta(name, bases, d)
111    return metaclass(meta.__name__ + 'Helper', None, {})
112
113
114## From https://docs.python.org/3.3/howto/pyporting.html
115class UnicodeMixin(object):
116
117    """Mixin class to handle defining the proper __str__/__unicode__
118    methods in Python 2 or 3."""
119
120    if PY3:
121        def __str__(self):
122            return self.__unicode__()
123    else:  # Python 2
124        def __str__(self):
125            return self.__unicode__().encode(defenc)
126
127
128"""
129This is Victor Stinner's pure-Python implementation of PEP 383: the "surrogateescape" error
130handler of Python 3.
131Source: misc/python/surrogateescape.py in https://bitbucket.org/haypo/misc
132"""
133
134# This code is released under the Python license and the BSD 2-clause license
135
136
137FS_ERRORS = 'surrogateescape'
138
139#     # -- Python 2/3 compatibility -------------------------------------
140#     FS_ERRORS = 'my_surrogateescape'
141
142def u(text):
143    if PY3:
144        return text
145    else:
146        return text.decode('unicode_escape')
147
148def b(data):
149    if PY3:
150        return data.encode('latin1')
151    else:
152        return data
153
154if PY3:
155    _unichr = chr
156    bytes_chr = lambda code: bytes((code,))
157else:
158    _unichr = unichr
159    bytes_chr = chr
160
161def surrogateescape_handler(exc):
162    """
163    Pure Python implementation of the PEP 383: the "surrogateescape" error
164    handler of Python 3. Undecodable bytes will be replaced by a Unicode
165    character U+DCxx on decoding, and these are translated into the
166    original bytes on encoding.
167    """
168    mystring = exc.object[exc.start:exc.end]
169
170    try:
171        if isinstance(exc, UnicodeDecodeError):
172            # mystring is a byte-string in this case
173            decoded = replace_surrogate_decode(mystring)
174        elif isinstance(exc, UnicodeEncodeError):
175            # In the case of u'\udcc3'.encode('ascii',
176            # 'this_surrogateescape_handler'), both Python 2.x and 3.x raise an
177            # exception anyway after this function is called, even though I think
178            # it's doing what it should. It seems that the strict encoder is called
179            # to encode the unicode string that this function returns ...
180            decoded = replace_surrogate_encode(mystring, exc)
181        else:
182            raise exc
183    except NotASurrogateError:
184        raise exc
185    return (decoded, exc.end)
186
187
188class NotASurrogateError(Exception):
189    pass
190
191
192def replace_surrogate_encode(mystring, exc):
193    """
194    Returns a (unicode) string, not the more logical bytes, because the codecs
195    register_error functionality expects this.
196    """
197    decoded = []
198    for ch in mystring:
199        # if PY3:
200        #     code = ch
201        # else:
202        code = ord(ch)
203
204        # The following magic comes from Py3.3's Python/codecs.c file:
205        if not 0xD800 <= code <= 0xDCFF:
206            # Not a surrogate. Fail with the original exception.
207            raise exc
208        # mybytes = [0xe0 | (code >> 12),
209        #            0x80 | ((code >> 6) & 0x3f),
210        #            0x80 | (code & 0x3f)]
211        # Is this a good idea?
212        if 0xDC00 <= code <= 0xDC7F:
213            decoded.append(_unichr(code - 0xDC00))
214        elif code <= 0xDCFF:
215            decoded.append(_unichr(code - 0xDC00))
216        else:
217            raise NotASurrogateError
218    return str().join(decoded)
219
220
221def replace_surrogate_decode(mybytes):
222    """
223    Returns a (unicode) string
224    """
225    decoded = []
226    for ch in mybytes:
227        # We may be parsing newbytes (in which case ch is an int) or a native
228        # str on Py2
229        if isinstance(ch, int):
230            code = ch
231        else:
232            code = ord(ch)
233        if 0x80 <= code <= 0xFF:
234            decoded.append(_unichr(0xDC00 + code))
235        elif code <= 0x7F:
236            decoded.append(_unichr(code))
237        else:
238            # # It may be a bad byte
239            # # Try swallowing it.
240            # continue
241            # print("RAISE!")
242            raise NotASurrogateError
243    return str().join(decoded)
244
245
246def encodefilename(fn):
247    if FS_ENCODING == 'ascii':
248        # ASCII encoder of Python 2 expects that the error handler returns a
249        # Unicode string encodable to ASCII, whereas our surrogateescape error
250        # handler has to return bytes in 0x80-0xFF range.
251        encoded = []
252        for index, ch in enumerate(fn):
253            code = ord(ch)
254            if code < 128:
255                ch = bytes_chr(code)
256            elif 0xDC80 <= code <= 0xDCFF:
257                ch = bytes_chr(code - 0xDC00)
258            else:
259                raise UnicodeEncodeError(FS_ENCODING,
260                    fn, index, index+1,
261                    'ordinal not in range(128)')
262            encoded.append(ch)
263        return bytes().join(encoded)
264    elif FS_ENCODING == 'utf-8':
265        # UTF-8 encoder of Python 2 encodes surrogates, so U+DC80-U+DCFF
266        # doesn't go through our error handler
267        encoded = []
268        for index, ch in enumerate(fn):
269            code = ord(ch)
270            if 0xD800 <= code <= 0xDFFF:
271                if 0xDC80 <= code <= 0xDCFF:
272                    ch = bytes_chr(code - 0xDC00)
273                    encoded.append(ch)
274                else:
275                    raise UnicodeEncodeError(
276                        FS_ENCODING,
277                        fn, index, index+1, 'surrogates not allowed')
278            else:
279                ch_utf8 = ch.encode('utf-8')
280                encoded.append(ch_utf8)
281        return bytes().join(encoded)
282    else:
283        return fn.encode(FS_ENCODING, FS_ERRORS)
284
285def decodefilename(fn):
286    return fn.decode(FS_ENCODING, FS_ERRORS)
287
288FS_ENCODING = 'ascii'; fn = b('[abc\xff]'); encoded = u('[abc\udcff]')
289# FS_ENCODING = 'cp932'; fn = b('[abc\x81\x00]'); encoded = u('[abc\udc81\x00]')
290# FS_ENCODING = 'UTF-8'; fn = b('[abc\xff]'); encoded = u('[abc\udcff]')
291
292
293# normalize the filesystem encoding name.
294# For example, we expect "utf-8", not "UTF8".
295FS_ENCODING = codecs.lookup(FS_ENCODING).name
296
297
298def register_surrogateescape():
299    """
300    Registers the surrogateescape error handler on Python 2 (only)
301    """
302    if PY3:
303        return
304    try:
305        codecs.lookup_error(FS_ERRORS)
306    except LookupError:
307        codecs.register_error(FS_ERRORS, surrogateescape_handler)
308
309
310try:
311    b"100644 \x9f\0aaa".decode(defenc, "surrogateescape")
312except Exception:
313    register_surrogateescape()
314