1"""
2A place for code to be called from the implementation of np.dtype
3
4String handling is much easier to do correctly in python.
5"""
6import numpy as np
7
8
9_kind_to_stem = {
10    'u': 'uint',
11    'i': 'int',
12    'c': 'complex',
13    'f': 'float',
14    'b': 'bool',
15    'V': 'void',
16    'O': 'object',
17    'M': 'datetime',
18    'm': 'timedelta',
19    'S': 'bytes',
20    'U': 'str',
21}
22
23
24def _kind_name(dtype):
25    try:
26        return _kind_to_stem[dtype.kind]
27    except KeyError as e:
28        raise RuntimeError(
29            "internal dtype error, unknown kind {!r}"
30            .format(dtype.kind)
31        ) from None
32
33
34def __str__(dtype):
35    if dtype.fields is not None:
36        return _struct_str(dtype, include_align=True)
37    elif dtype.subdtype:
38        return _subarray_str(dtype)
39    elif issubclass(dtype.type, np.flexible) or not dtype.isnative:
40        return dtype.str
41    else:
42        return dtype.name
43
44
45def __repr__(dtype):
46    arg_str = _construction_repr(dtype, include_align=False)
47    if dtype.isalignedstruct:
48        arg_str = arg_str + ", align=True"
49    return "dtype({})".format(arg_str)
50
51
52def _unpack_field(dtype, offset, title=None):
53    """
54    Helper function to normalize the items in dtype.fields.
55
56    Call as:
57
58    dtype, offset, title = _unpack_field(*dtype.fields[name])
59    """
60    return dtype, offset, title
61
62
63def _isunsized(dtype):
64    # PyDataType_ISUNSIZED
65    return dtype.itemsize == 0
66
67
68def _construction_repr(dtype, include_align=False, short=False):
69    """
70    Creates a string repr of the dtype, excluding the 'dtype()' part
71    surrounding the object. This object may be a string, a list, or
72    a dict depending on the nature of the dtype. This
73    is the object passed as the first parameter to the dtype
74    constructor, and if no additional constructor parameters are
75    given, will reproduce the exact memory layout.
76
77    Parameters
78    ----------
79    short : bool
80        If true, this creates a shorter repr using 'kind' and 'itemsize', instead
81        of the longer type name.
82
83    include_align : bool
84        If true, this includes the 'align=True' parameter
85        inside the struct dtype construction dict when needed. Use this flag
86        if you want a proper repr string without the 'dtype()' part around it.
87
88        If false, this does not preserve the
89        'align=True' parameter or sticky NPY_ALIGNED_STRUCT flag for
90        struct arrays like the regular repr does, because the 'align'
91        flag is not part of first dtype constructor parameter. This
92        mode is intended for a full 'repr', where the 'align=True' is
93        provided as the second parameter.
94    """
95    if dtype.fields is not None:
96        return _struct_str(dtype, include_align=include_align)
97    elif dtype.subdtype:
98        return _subarray_str(dtype)
99    else:
100        return _scalar_str(dtype, short=short)
101
102
103def _scalar_str(dtype, short):
104    byteorder = _byte_order_str(dtype)
105
106    if dtype.type == np.bool_:
107        if short:
108            return "'?'"
109        else:
110            return "'bool'"
111
112    elif dtype.type == np.object_:
113        # The object reference may be different sizes on different
114        # platforms, so it should never include the itemsize here.
115        return "'O'"
116
117    elif dtype.type == np.string_:
118        if _isunsized(dtype):
119            return "'S'"
120        else:
121            return "'S%d'" % dtype.itemsize
122
123    elif dtype.type == np.unicode_:
124        if _isunsized(dtype):
125            return "'%sU'" % byteorder
126        else:
127            return "'%sU%d'" % (byteorder, dtype.itemsize / 4)
128
129    # unlike the other types, subclasses of void are preserved - but
130    # historically the repr does not actually reveal the subclass
131    elif issubclass(dtype.type, np.void):
132        if _isunsized(dtype):
133            return "'V'"
134        else:
135            return "'V%d'" % dtype.itemsize
136
137    elif dtype.type == np.datetime64:
138        return "'%sM8%s'" % (byteorder, _datetime_metadata_str(dtype))
139
140    elif dtype.type == np.timedelta64:
141        return "'%sm8%s'" % (byteorder, _datetime_metadata_str(dtype))
142
143    elif np.issubdtype(dtype, np.number):
144        # Short repr with endianness, like '<f8'
145        if short or dtype.byteorder not in ('=', '|'):
146            return "'%s%c%d'" % (byteorder, dtype.kind, dtype.itemsize)
147
148        # Longer repr, like 'float64'
149        else:
150            return "'%s%d'" % (_kind_name(dtype), 8*dtype.itemsize)
151
152    elif dtype.isbuiltin == 2:
153        return dtype.type.__name__
154
155    else:
156        raise RuntimeError(
157            "Internal error: NumPy dtype unrecognized type number")
158
159
160def _byte_order_str(dtype):
161    """ Normalize byteorder to '<' or '>' """
162    # hack to obtain the native and swapped byte order characters
163    swapped = np.dtype(int).newbyteorder('S')
164    native = swapped.newbyteorder('S')
165
166    byteorder = dtype.byteorder
167    if byteorder == '=':
168        return native.byteorder
169    if byteorder == 'S':
170        # TODO: this path can never be reached
171        return swapped.byteorder
172    elif byteorder == '|':
173        return ''
174    else:
175        return byteorder
176
177
178def _datetime_metadata_str(dtype):
179    # TODO: this duplicates the C metastr_to_unicode functionality
180    unit, count = np.datetime_data(dtype)
181    if unit == 'generic':
182        return ''
183    elif count == 1:
184        return '[{}]'.format(unit)
185    else:
186        return '[{}{}]'.format(count, unit)
187
188
189def _struct_dict_str(dtype, includealignedflag):
190    # unpack the fields dictionary into ls
191    names = dtype.names
192    fld_dtypes = []
193    offsets = []
194    titles = []
195    for name in names:
196        fld_dtype, offset, title = _unpack_field(*dtype.fields[name])
197        fld_dtypes.append(fld_dtype)
198        offsets.append(offset)
199        titles.append(title)
200
201    # Build up a string to make the dictionary
202
203    # First, the names
204    ret = "{'names':["
205    ret += ",".join(repr(name) for name in names)
206
207    # Second, the formats
208    ret += "], 'formats':["
209    ret += ",".join(
210        _construction_repr(fld_dtype, short=True) for fld_dtype in fld_dtypes)
211
212    # Third, the offsets
213    ret += "], 'offsets':["
214    ret += ",".join("%d" % offset for offset in offsets)
215
216    # Fourth, the titles
217    if any(title is not None for title in titles):
218        ret += "], 'titles':["
219        ret += ",".join(repr(title) for title in titles)
220
221    # Fifth, the itemsize
222    ret += "], 'itemsize':%d" % dtype.itemsize
223
224    if (includealignedflag and dtype.isalignedstruct):
225        # Finally, the aligned flag
226        ret += ", 'aligned':True}"
227    else:
228        ret += "}"
229
230    return ret
231
232
233def _is_packed(dtype):
234    """
235    Checks whether the structured data type in 'dtype'
236    has a simple layout, where all the fields are in order,
237    and follow each other with no alignment padding.
238
239    When this returns true, the dtype can be reconstructed
240    from a list of the field names and dtypes with no additional
241    dtype parameters.
242
243    Duplicates the C `is_dtype_struct_simple_unaligned_layout` function.
244    """
245    total_offset = 0
246    for name in dtype.names:
247        fld_dtype, fld_offset, title = _unpack_field(*dtype.fields[name])
248        if fld_offset != total_offset:
249            return False
250        total_offset += fld_dtype.itemsize
251    if total_offset != dtype.itemsize:
252        return False
253    return True
254
255
256def _struct_list_str(dtype):
257    items = []
258    for name in dtype.names:
259        fld_dtype, fld_offset, title = _unpack_field(*dtype.fields[name])
260
261        item = "("
262        if title is not None:
263            item += "({!r}, {!r}), ".format(title, name)
264        else:
265            item += "{!r}, ".format(name)
266        # Special case subarray handling here
267        if fld_dtype.subdtype is not None:
268            base, shape = fld_dtype.subdtype
269            item += "{}, {}".format(
270                _construction_repr(base, short=True),
271                shape
272            )
273        else:
274            item += _construction_repr(fld_dtype, short=True)
275
276        item += ")"
277        items.append(item)
278
279    return "[" + ", ".join(items) + "]"
280
281
282def _struct_str(dtype, include_align):
283    # The list str representation can't include the 'align=' flag,
284    # so if it is requested and the struct has the aligned flag set,
285    # we must use the dict str instead.
286    if not (include_align and dtype.isalignedstruct) and _is_packed(dtype):
287        sub = _struct_list_str(dtype)
288
289    else:
290        sub = _struct_dict_str(dtype, include_align)
291
292    # If the data type isn't the default, void, show it
293    if dtype.type != np.void:
294        return "({t.__module__}.{t.__name__}, {f})".format(t=dtype.type, f=sub)
295    else:
296        return sub
297
298
299def _subarray_str(dtype):
300    base, shape = dtype.subdtype
301    return "({}, {})".format(
302        _construction_repr(base, short=True),
303        shape
304    )
305
306
307def _name_includes_bit_suffix(dtype):
308    if dtype.type == np.object_:
309        # pointer size varies by system, best to omit it
310        return False
311    elif dtype.type == np.bool_:
312        # implied
313        return False
314    elif np.issubdtype(dtype, np.flexible) and _isunsized(dtype):
315        # unspecified
316        return False
317    else:
318        return True
319
320
321def _name_get(dtype):
322    # provides dtype.name.__get__, documented as returning a "bit name"
323
324    if dtype.isbuiltin == 2:
325        # user dtypes don't promise to do anything special
326        return dtype.type.__name__
327
328    if issubclass(dtype.type, np.void):
329        # historically, void subclasses preserve their name, eg `record64`
330        name = dtype.type.__name__
331    else:
332        name = _kind_name(dtype)
333
334    # append bit counts
335    if _name_includes_bit_suffix(dtype):
336        name += "{}".format(dtype.itemsize * 8)
337
338    # append metadata to datetimes
339    if dtype.type in (np.datetime64, np.timedelta64):
340        name += _datetime_metadata_str(dtype)
341
342    return name
343