1"""
2This module redefines ``str`` on Python 2.x to be a subclass of the Py2
3``unicode`` type that behaves like the Python 3.x ``str``.
4
5The main differences between ``newstr`` and Python 2.x's ``unicode`` type are
6the stricter type-checking and absence of a `u''` prefix in the representation.
7
8It is designed to be used together with the ``unicode_literals`` import
9as follows:
10
11    >>> from __future__ import unicode_literals
12    >>> from builtins import str, isinstance
13
14On Python 3.x and normally on Python 2.x, these expressions hold
15
16    >>> str('blah') is 'blah'
17    True
18    >>> isinstance('blah', str)
19    True
20
21However, on Python 2.x, with this import:
22
23    >>> from __future__ import unicode_literals
24
25the same expressions are False:
26
27    >>> str('blah') is 'blah'
28    False
29    >>> isinstance('blah', str)
30    False
31
32This module is designed to be imported together with ``unicode_literals`` on
33Python 2 to bring the meaning of ``str`` back into alignment with unprefixed
34string literals (i.e. ``unicode`` subclasses).
35
36Note that ``str()`` (and ``print()``) would then normally call the
37``__unicode__`` method on objects in Python 2. To define string
38representations of your objects portably across Py3 and Py2, use the
39:func:`python_2_unicode_compatible` decorator in  :mod:`future.utils`.
40
41"""
42
43from collections import Iterable
44from numbers import Number
45
46from future.utils import PY3, istext, with_metaclass, isnewbytes
47from future.types import no, issubset
48from future.types.newobject import newobject
49
50
51if PY3:
52    # We'll probably never use newstr on Py3 anyway...
53    unicode = str
54
55
56class BaseNewStr(type):
57    def __instancecheck__(cls, instance):
58        if cls == newstr:
59            return isinstance(instance, unicode)
60        else:
61            return issubclass(instance.__class__, cls)
62
63
64class newstr(with_metaclass(BaseNewStr, unicode)):
65    """
66    A backport of the Python 3 str object to Py2
67    """
68    no_convert_msg = "Can't convert '{0}' object to str implicitly"
69
70    def __new__(cls, *args, **kwargs):
71        """
72        From the Py3 str docstring:
73
74          str(object='') -> str
75          str(bytes_or_buffer[, encoding[, errors]]) -> str
76
77          Create a new string object from the given object. If encoding or
78          errors is specified, then the object must expose a data buffer
79          that will be decoded using the given encoding and error handler.
80          Otherwise, returns the result of object.__str__() (if defined)
81          or repr(object).
82          encoding defaults to sys.getdefaultencoding().
83          errors defaults to 'strict'.
84
85        """
86        if len(args) == 0:
87            return super(newstr, cls).__new__(cls)
88        # Special case: If someone requests str(str(u'abc')), return the same
89        # object (same id) for consistency with Py3.3. This is not true for
90        # other objects like list or dict.
91        elif type(args[0]) == newstr and cls == newstr:
92            return args[0]
93        elif isinstance(args[0], unicode):
94            value = args[0]
95        elif isinstance(args[0], bytes):   # i.e. Py2 bytes or newbytes
96            if 'encoding' in kwargs or len(args) > 1:
97                value = args[0].decode(*args[1:], **kwargs)
98            else:
99                value = args[0].__str__()
100        else:
101            value = args[0]
102        return super(newstr, cls).__new__(cls, value)
103
104    def __repr__(self):
105        """
106        Without the u prefix
107        """
108        value = super(newstr, self).__repr__()
109        # assert value[0] == u'u'
110        return value[1:]
111
112    def __getitem__(self, y):
113        """
114        Warning: Python <= 2.7.6 has a bug that causes this method never to be called
115        when y is a slice object. Therefore the type of newstr()[:2] is wrong
116        (unicode instead of newstr).
117        """
118        return newstr(super(newstr, self).__getitem__(y))
119
120    def __contains__(self, key):
121        errmsg = "'in <string>' requires string as left operand, not {0}"
122        # Don't use isinstance() here because we only want to catch
123        # newstr, not Python 2 unicode:
124        if type(key) == newstr:
125            newkey = key
126        elif isinstance(key, unicode) or isinstance(key, bytes) and not isnewbytes(key):
127            newkey = newstr(key)
128        else:
129            raise TypeError(errmsg.format(type(key)))
130        return issubset(list(newkey), list(self))
131
132    @no('newbytes')
133    def __add__(self, other):
134        return newstr(super(newstr, self).__add__(other))
135
136    @no('newbytes')
137    def __radd__(self, left):
138        " left + self "
139        try:
140            return newstr(left) + self
141        except:
142            return NotImplemented
143
144    def __mul__(self, other):
145        return newstr(super(newstr, self).__mul__(other))
146
147    def __rmul__(self, other):
148        return newstr(super(newstr, self).__rmul__(other))
149
150    def join(self, iterable):
151        errmsg = 'sequence item {0}: expected unicode string, found bytes'
152        for i, item in enumerate(iterable):
153            # Here we use type() rather than isinstance() because
154            # __instancecheck__ is being overridden. E.g.
155            # isinstance(b'abc', newbytes) is True on Py2.
156            if isnewbytes(item):
157                raise TypeError(errmsg.format(i))
158        # Support use as a staticmethod: str.join('-', ['a', 'b'])
159        if type(self) == newstr:
160            return newstr(super(newstr, self).join(iterable))
161        else:
162            return newstr(super(newstr, newstr(self)).join(iterable))
163
164    @no('newbytes')
165    def find(self, sub, *args):
166        return super(newstr, self).find(sub, *args)
167
168    @no('newbytes')
169    def rfind(self, sub, *args):
170        return super(newstr, self).rfind(sub, *args)
171
172    @no('newbytes', (1, 2))
173    def replace(self, old, new, *args):
174        return newstr(super(newstr, self).replace(old, new, *args))
175
176    def decode(self, *args):
177        raise AttributeError("decode method has been disabled in newstr")
178
179    def encode(self, encoding='utf-8', errors='strict'):
180        """
181        Returns bytes
182
183        Encode S using the codec registered for encoding. Default encoding
184        is 'utf-8'. errors may be given to set a different error
185        handling scheme. Default is 'strict' meaning that encoding errors raise
186        a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and
187        'xmlcharrefreplace' as well as any other name registered with
188        codecs.register_error that can handle UnicodeEncodeErrors.
189        """
190        from future.types.newbytes import newbytes
191        # Py2 unicode.encode() takes encoding and errors as optional parameter,
192        # not keyword arguments as in Python 3 str.
193
194        # For the surrogateescape error handling mechanism, the
195        # codecs.register_error() function seems to be inadequate for an
196        # implementation of it when encoding. (Decoding seems fine, however.)
197        # For example, in the case of
198        #     u'\udcc3'.encode('ascii', 'surrogateescape_handler')
199        # after registering the ``surrogateescape_handler`` function in
200        # future.utils.surrogateescape, both Python 2.x and 3.x raise an
201        # exception anyway after the function is called because the unicode
202        # string it has to return isn't encodable strictly as ASCII.
203
204        if errors == 'surrogateescape':
205            if encoding == 'utf-16':
206                # Known to fail here. See test_encoding_works_normally()
207                raise NotImplementedError('FIXME: surrogateescape handling is '
208                                          'not yet implemented properly')
209            # Encode char by char, building up list of byte-strings
210            mybytes = []
211            for c in self:
212                code = ord(c)
213                if 0xD800 <= code <= 0xDCFF:
214                    mybytes.append(newbytes([code - 0xDC00]))
215                else:
216                    mybytes.append(c.encode(encoding=encoding))
217            return newbytes(b'').join(mybytes)
218        return newbytes(super(newstr, self).encode(encoding, errors))
219
220    @no('newbytes', 1)
221    def startswith(self, prefix, *args):
222        if isinstance(prefix, Iterable):
223            for thing in prefix:
224                if isnewbytes(thing):
225                    raise TypeError(self.no_convert_msg.format(type(thing)))
226        return super(newstr, self).startswith(prefix, *args)
227
228    @no('newbytes', 1)
229    def endswith(self, prefix, *args):
230        # Note we need the decorator above as well as the isnewbytes()
231        # check because prefix can be either a bytes object or e.g. a
232        # tuple of possible prefixes. (If it's a bytes object, each item
233        # in it is an int.)
234        if isinstance(prefix, Iterable):
235            for thing in prefix:
236                if isnewbytes(thing):
237                    raise TypeError(self.no_convert_msg.format(type(thing)))
238        return super(newstr, self).endswith(prefix, *args)
239
240    @no('newbytes', 1)
241    def split(self, sep=None, maxsplit=-1):
242        # Py2 unicode.split() takes maxsplit as an optional parameter,
243        # not as a keyword argument as in Python 3 str.
244        parts = super(newstr, self).split(sep, maxsplit)
245        return [newstr(part) for part in parts]
246
247    @no('newbytes', 1)
248    def rsplit(self, sep=None, maxsplit=-1):
249        # Py2 unicode.rsplit() takes maxsplit as an optional parameter,
250        # not as a keyword argument as in Python 3 str.
251        parts = super(newstr, self).rsplit(sep, maxsplit)
252        return [newstr(part) for part in parts]
253
254    @no('newbytes', 1)
255    def partition(self, sep):
256        parts = super(newstr, self).partition(sep)
257        return tuple(newstr(part) for part in parts)
258
259    @no('newbytes', 1)
260    def rpartition(self, sep):
261        parts = super(newstr, self).rpartition(sep)
262        return tuple(newstr(part) for part in parts)
263
264    @no('newbytes', 1)
265    def index(self, sub, *args):
266        """
267        Like newstr.find() but raise ValueError when the substring is not
268        found.
269        """
270        pos = self.find(sub, *args)
271        if pos == -1:
272            raise ValueError('substring not found')
273        return pos
274
275    def splitlines(self, keepends=False):
276        """
277        S.splitlines(keepends=False) -> list of strings
278
279        Return a list of the lines in S, breaking at line boundaries.
280        Line breaks are not included in the resulting list unless keepends
281        is given and true.
282        """
283        # Py2 unicode.splitlines() takes keepends as an optional parameter,
284        # not as a keyword argument as in Python 3 str.
285        parts = super(newstr, self).splitlines(keepends)
286        return [newstr(part) for part in parts]
287
288    def __eq__(self, other):
289        if (isinstance(other, unicode) or
290            isinstance(other, bytes) and not isnewbytes(other)):
291            return super(newstr, self).__eq__(other)
292        else:
293            return False
294
295    def __ne__(self, other):
296        if (isinstance(other, unicode) or
297            isinstance(other, bytes) and not isnewbytes(other)):
298            return super(newstr, self).__ne__(other)
299        else:
300            return True
301
302    unorderable_err = 'unorderable types: str() and {0}'
303
304    def __lt__(self, other):
305        if not istext(other):
306            raise TypeError(self.unorderable_err.format(type(other)))
307        return super(newstr, self).__lt__(other)
308
309    def __le__(self, other):
310        if not istext(other):
311            raise TypeError(self.unorderable_err.format(type(other)))
312        return super(newstr, self).__le__(other)
313
314    def __gt__(self, other):
315        if not istext(other):
316            raise TypeError(self.unorderable_err.format(type(other)))
317        return super(newstr, self).__gt__(other)
318
319    def __ge__(self, other):
320        if not istext(other):
321            raise TypeError(self.unorderable_err.format(type(other)))
322        return super(newstr, self).__ge__(other)
323
324    def __getattribute__(self, name):
325        """
326        A trick to cause the ``hasattr`` builtin-fn to return False for
327        the 'decode' method on Py2.
328        """
329        if name in ['decode', u'decode']:
330            raise AttributeError("decode method has been disabled in newstr")
331        return super(newstr, self).__getattribute__(name)
332
333    def __native__(self):
334        """
335        A hook for the future.utils.native() function.
336        """
337        return unicode(self)
338
339    @staticmethod
340    def maketrans(x, y=None, z=None):
341        """
342        Return a translation table usable for str.translate().
343
344        If there is only one argument, it must be a dictionary mapping Unicode
345        ordinals (integers) or characters to Unicode ordinals, strings or None.
346        Character keys will be then converted to ordinals.
347        If there are two arguments, they must be strings of equal length, and
348        in the resulting dictionary, each character in x will be mapped to the
349        character at the same position in y. If there is a third argument, it
350        must be a string, whose characters will be mapped to None in the result.
351        """
352
353        if y is None:
354            assert z is None
355            if not isinstance(x, dict):
356                raise TypeError('if you give only one argument to maketrans it must be a dict')
357            result = {}
358            for (key, value) in x.items():
359                if len(key) > 1:
360                    raise ValueError('keys in translate table must be strings or integers')
361                result[ord(key)] = value
362        else:
363            if not isinstance(x, unicode) and isinstance(y, unicode):
364                raise TypeError('x and y must be unicode strings')
365            if not len(x) == len(y):
366                raise ValueError('the first two maketrans arguments must have equal length')
367            result = {}
368            for (xi, yi) in zip(x, y):
369                if len(xi) > 1:
370                    raise ValueError('keys in translate table must be strings or integers')
371                result[ord(xi)] = ord(yi)
372
373        if z is not None:
374            for char in z:
375                result[ord(char)] = None
376        return result
377
378    def translate(self, table):
379        """
380        S.translate(table) -> str
381
382        Return a copy of the string S, where all characters have been mapped
383        through the given translation table, which must be a mapping of
384        Unicode ordinals to Unicode ordinals, strings, or None.
385        Unmapped characters are left untouched. Characters mapped to None
386        are deleted.
387        """
388        l = []
389        for c in self:
390            if ord(c) in table:
391                val = table[ord(c)]
392                if val is None:
393                    continue
394                elif isinstance(val, unicode):
395                    l.append(val)
396                else:
397                    l.append(chr(val))
398            else:
399                l.append(c)
400        return ''.join(l)
401
402    def isprintable(self):
403        raise NotImplementedError('fixme')
404
405    def isidentifier(self):
406        raise NotImplementedError('fixme')
407
408    def format_map(self):
409        raise NotImplementedError('fixme')
410
411
412__all__ = ['newstr']
413