1""" 2A place for code to be called from the implementation of np.dtype 3 4String handling is much easier to do correctly in python. 5""" 6import numpy as np 7 8 9_kind_to_stem = { 10 'u': 'uint', 11 'i': 'int', 12 'c': 'complex', 13 'f': 'float', 14 'b': 'bool', 15 'V': 'void', 16 'O': 'object', 17 'M': 'datetime', 18 'm': 'timedelta', 19 'S': 'bytes', 20 'U': 'str', 21} 22 23 24def _kind_name(dtype): 25 try: 26 return _kind_to_stem[dtype.kind] 27 except KeyError as e: 28 raise RuntimeError( 29 "internal dtype error, unknown kind {!r}" 30 .format(dtype.kind) 31 ) from None 32 33 34def __str__(dtype): 35 if dtype.fields is not None: 36 return _struct_str(dtype, include_align=True) 37 elif dtype.subdtype: 38 return _subarray_str(dtype) 39 elif issubclass(dtype.type, np.flexible) or not dtype.isnative: 40 return dtype.str 41 else: 42 return dtype.name 43 44 45def __repr__(dtype): 46 arg_str = _construction_repr(dtype, include_align=False) 47 if dtype.isalignedstruct: 48 arg_str = arg_str + ", align=True" 49 return "dtype({})".format(arg_str) 50 51 52def _unpack_field(dtype, offset, title=None): 53 """ 54 Helper function to normalize the items in dtype.fields. 55 56 Call as: 57 58 dtype, offset, title = _unpack_field(*dtype.fields[name]) 59 """ 60 return dtype, offset, title 61 62 63def _isunsized(dtype): 64 # PyDataType_ISUNSIZED 65 return dtype.itemsize == 0 66 67 68def _construction_repr(dtype, include_align=False, short=False): 69 """ 70 Creates a string repr of the dtype, excluding the 'dtype()' part 71 surrounding the object. This object may be a string, a list, or 72 a dict depending on the nature of the dtype. This 73 is the object passed as the first parameter to the dtype 74 constructor, and if no additional constructor parameters are 75 given, will reproduce the exact memory layout. 76 77 Parameters 78 ---------- 79 short : bool 80 If true, this creates a shorter repr using 'kind' and 'itemsize', instead 81 of the longer type name. 82 83 include_align : bool 84 If true, this includes the 'align=True' parameter 85 inside the struct dtype construction dict when needed. Use this flag 86 if you want a proper repr string without the 'dtype()' part around it. 87 88 If false, this does not preserve the 89 'align=True' parameter or sticky NPY_ALIGNED_STRUCT flag for 90 struct arrays like the regular repr does, because the 'align' 91 flag is not part of first dtype constructor parameter. This 92 mode is intended for a full 'repr', where the 'align=True' is 93 provided as the second parameter. 94 """ 95 if dtype.fields is not None: 96 return _struct_str(dtype, include_align=include_align) 97 elif dtype.subdtype: 98 return _subarray_str(dtype) 99 else: 100 return _scalar_str(dtype, short=short) 101 102 103def _scalar_str(dtype, short): 104 byteorder = _byte_order_str(dtype) 105 106 if dtype.type == np.bool_: 107 if short: 108 return "'?'" 109 else: 110 return "'bool'" 111 112 elif dtype.type == np.object_: 113 # The object reference may be different sizes on different 114 # platforms, so it should never include the itemsize here. 115 return "'O'" 116 117 elif dtype.type == np.string_: 118 if _isunsized(dtype): 119 return "'S'" 120 else: 121 return "'S%d'" % dtype.itemsize 122 123 elif dtype.type == np.unicode_: 124 if _isunsized(dtype): 125 return "'%sU'" % byteorder 126 else: 127 return "'%sU%d'" % (byteorder, dtype.itemsize / 4) 128 129 # unlike the other types, subclasses of void are preserved - but 130 # historically the repr does not actually reveal the subclass 131 elif issubclass(dtype.type, np.void): 132 if _isunsized(dtype): 133 return "'V'" 134 else: 135 return "'V%d'" % dtype.itemsize 136 137 elif dtype.type == np.datetime64: 138 return "'%sM8%s'" % (byteorder, _datetime_metadata_str(dtype)) 139 140 elif dtype.type == np.timedelta64: 141 return "'%sm8%s'" % (byteorder, _datetime_metadata_str(dtype)) 142 143 elif np.issubdtype(dtype, np.number): 144 # Short repr with endianness, like '<f8' 145 if short or dtype.byteorder not in ('=', '|'): 146 return "'%s%c%d'" % (byteorder, dtype.kind, dtype.itemsize) 147 148 # Longer repr, like 'float64' 149 else: 150 return "'%s%d'" % (_kind_name(dtype), 8*dtype.itemsize) 151 152 elif dtype.isbuiltin == 2: 153 return dtype.type.__name__ 154 155 else: 156 raise RuntimeError( 157 "Internal error: NumPy dtype unrecognized type number") 158 159 160def _byte_order_str(dtype): 161 """ Normalize byteorder to '<' or '>' """ 162 # hack to obtain the native and swapped byte order characters 163 swapped = np.dtype(int).newbyteorder('S') 164 native = swapped.newbyteorder('S') 165 166 byteorder = dtype.byteorder 167 if byteorder == '=': 168 return native.byteorder 169 if byteorder == 'S': 170 # TODO: this path can never be reached 171 return swapped.byteorder 172 elif byteorder == '|': 173 return '' 174 else: 175 return byteorder 176 177 178def _datetime_metadata_str(dtype): 179 # TODO: this duplicates the C metastr_to_unicode functionality 180 unit, count = np.datetime_data(dtype) 181 if unit == 'generic': 182 return '' 183 elif count == 1: 184 return '[{}]'.format(unit) 185 else: 186 return '[{}{}]'.format(count, unit) 187 188 189def _struct_dict_str(dtype, includealignedflag): 190 # unpack the fields dictionary into ls 191 names = dtype.names 192 fld_dtypes = [] 193 offsets = [] 194 titles = [] 195 for name in names: 196 fld_dtype, offset, title = _unpack_field(*dtype.fields[name]) 197 fld_dtypes.append(fld_dtype) 198 offsets.append(offset) 199 titles.append(title) 200 201 # Build up a string to make the dictionary 202 203 # First, the names 204 ret = "{'names':[" 205 ret += ",".join(repr(name) for name in names) 206 207 # Second, the formats 208 ret += "], 'formats':[" 209 ret += ",".join( 210 _construction_repr(fld_dtype, short=True) for fld_dtype in fld_dtypes) 211 212 # Third, the offsets 213 ret += "], 'offsets':[" 214 ret += ",".join("%d" % offset for offset in offsets) 215 216 # Fourth, the titles 217 if any(title is not None for title in titles): 218 ret += "], 'titles':[" 219 ret += ",".join(repr(title) for title in titles) 220 221 # Fifth, the itemsize 222 ret += "], 'itemsize':%d" % dtype.itemsize 223 224 if (includealignedflag and dtype.isalignedstruct): 225 # Finally, the aligned flag 226 ret += ", 'aligned':True}" 227 else: 228 ret += "}" 229 230 return ret 231 232 233def _is_packed(dtype): 234 """ 235 Checks whether the structured data type in 'dtype' 236 has a simple layout, where all the fields are in order, 237 and follow each other with no alignment padding. 238 239 When this returns true, the dtype can be reconstructed 240 from a list of the field names and dtypes with no additional 241 dtype parameters. 242 243 Duplicates the C `is_dtype_struct_simple_unaligned_layout` function. 244 """ 245 total_offset = 0 246 for name in dtype.names: 247 fld_dtype, fld_offset, title = _unpack_field(*dtype.fields[name]) 248 if fld_offset != total_offset: 249 return False 250 total_offset += fld_dtype.itemsize 251 if total_offset != dtype.itemsize: 252 return False 253 return True 254 255 256def _struct_list_str(dtype): 257 items = [] 258 for name in dtype.names: 259 fld_dtype, fld_offset, title = _unpack_field(*dtype.fields[name]) 260 261 item = "(" 262 if title is not None: 263 item += "({!r}, {!r}), ".format(title, name) 264 else: 265 item += "{!r}, ".format(name) 266 # Special case subarray handling here 267 if fld_dtype.subdtype is not None: 268 base, shape = fld_dtype.subdtype 269 item += "{}, {}".format( 270 _construction_repr(base, short=True), 271 shape 272 ) 273 else: 274 item += _construction_repr(fld_dtype, short=True) 275 276 item += ")" 277 items.append(item) 278 279 return "[" + ", ".join(items) + "]" 280 281 282def _struct_str(dtype, include_align): 283 # The list str representation can't include the 'align=' flag, 284 # so if it is requested and the struct has the aligned flag set, 285 # we must use the dict str instead. 286 if not (include_align and dtype.isalignedstruct) and _is_packed(dtype): 287 sub = _struct_list_str(dtype) 288 289 else: 290 sub = _struct_dict_str(dtype, include_align) 291 292 # If the data type isn't the default, void, show it 293 if dtype.type != np.void: 294 return "({t.__module__}.{t.__name__}, {f})".format(t=dtype.type, f=sub) 295 else: 296 return sub 297 298 299def _subarray_str(dtype): 300 base, shape = dtype.subdtype 301 return "({}, {})".format( 302 _construction_repr(base, short=True), 303 shape 304 ) 305 306 307def _name_includes_bit_suffix(dtype): 308 if dtype.type == np.object_: 309 # pointer size varies by system, best to omit it 310 return False 311 elif dtype.type == np.bool_: 312 # implied 313 return False 314 elif np.issubdtype(dtype, np.flexible) and _isunsized(dtype): 315 # unspecified 316 return False 317 else: 318 return True 319 320 321def _name_get(dtype): 322 # provides dtype.name.__get__, documented as returning a "bit name" 323 324 if dtype.isbuiltin == 2: 325 # user dtypes don't promise to do anything special 326 return dtype.type.__name__ 327 328 if issubclass(dtype.type, np.void): 329 # historically, void subclasses preserve their name, eg `record64` 330 name = dtype.type.__name__ 331 else: 332 name = _kind_name(dtype) 333 334 # append bit counts 335 if _name_includes_bit_suffix(dtype): 336 name += "{}".format(dtype.itemsize * 8) 337 338 # append metadata to datetimes 339 if dtype.type in (np.datetime64, np.timedelta64): 340 name += _datetime_metadata_str(dtype) 341 342 return name 343