1import sys 2import os 3import re 4import functools 5import itertools 6import warnings 7import weakref 8import contextlib 9from operator import itemgetter, index as opindex 10from collections.abc import Mapping 11 12import numpy as np 13from . import format 14from ._datasource import DataSource 15from numpy.core import overrides 16from numpy.core.multiarray import packbits, unpackbits 17from numpy.core.overrides import set_array_function_like_doc, set_module 18from numpy.core._internal import recursive 19from ._iotools import ( 20 LineSplitter, NameValidator, StringConverter, ConverterError, 21 ConverterLockError, ConversionWarning, _is_string_like, 22 has_nested_fields, flatten_dtype, easy_dtype, _decode_line 23 ) 24 25from numpy.compat import ( 26 asbytes, asstr, asunicode, os_fspath, os_PathLike, 27 pickle, contextlib_nullcontext 28 ) 29 30 31@set_module('numpy') 32def loads(*args, **kwargs): 33 # NumPy 1.15.0, 2017-12-10 34 warnings.warn( 35 "np.loads is deprecated, use pickle.loads instead", 36 DeprecationWarning, stacklevel=2) 37 return pickle.loads(*args, **kwargs) 38 39 40__all__ = [ 41 'savetxt', 'loadtxt', 'genfromtxt', 'ndfromtxt', 'mafromtxt', 42 'recfromtxt', 'recfromcsv', 'load', 'loads', 'save', 'savez', 43 'savez_compressed', 'packbits', 'unpackbits', 'fromregex', 'DataSource' 44 ] 45 46 47array_function_dispatch = functools.partial( 48 overrides.array_function_dispatch, module='numpy') 49 50 51class BagObj: 52 """ 53 BagObj(obj) 54 55 Convert attribute look-ups to getitems on the object passed in. 56 57 Parameters 58 ---------- 59 obj : class instance 60 Object on which attribute look-up is performed. 61 62 Examples 63 -------- 64 >>> from numpy.lib.npyio import BagObj as BO 65 >>> class BagDemo: 66 ... def __getitem__(self, key): # An instance of BagObj(BagDemo) 67 ... # will call this method when any 68 ... # attribute look-up is required 69 ... result = "Doesn't matter what you want, " 70 ... return result + "you're gonna get this" 71 ... 72 >>> demo_obj = BagDemo() 73 >>> bagobj = BO(demo_obj) 74 >>> bagobj.hello_there 75 "Doesn't matter what you want, you're gonna get this" 76 >>> bagobj.I_can_be_anything 77 "Doesn't matter what you want, you're gonna get this" 78 79 """ 80 81 def __init__(self, obj): 82 # Use weakref to make NpzFile objects collectable by refcount 83 self._obj = weakref.proxy(obj) 84 85 def __getattribute__(self, key): 86 try: 87 return object.__getattribute__(self, '_obj')[key] 88 except KeyError: 89 raise AttributeError(key) from None 90 91 def __dir__(self): 92 """ 93 Enables dir(bagobj) to list the files in an NpzFile. 94 95 This also enables tab-completion in an interpreter or IPython. 96 """ 97 return list(object.__getattribute__(self, '_obj').keys()) 98 99 100def zipfile_factory(file, *args, **kwargs): 101 """ 102 Create a ZipFile. 103 104 Allows for Zip64, and the `file` argument can accept file, str, or 105 pathlib.Path objects. `args` and `kwargs` are passed to the zipfile.ZipFile 106 constructor. 107 """ 108 if not hasattr(file, 'read'): 109 file = os_fspath(file) 110 import zipfile 111 kwargs['allowZip64'] = True 112 return zipfile.ZipFile(file, *args, **kwargs) 113 114 115class NpzFile(Mapping): 116 """ 117 NpzFile(fid) 118 119 A dictionary-like object with lazy-loading of files in the zipped 120 archive provided on construction. 121 122 `NpzFile` is used to load files in the NumPy ``.npz`` data archive 123 format. It assumes that files in the archive have a ``.npy`` extension, 124 other files are ignored. 125 126 The arrays and file strings are lazily loaded on either 127 getitem access using ``obj['key']`` or attribute lookup using 128 ``obj.f.key``. A list of all files (without ``.npy`` extensions) can 129 be obtained with ``obj.files`` and the ZipFile object itself using 130 ``obj.zip``. 131 132 Attributes 133 ---------- 134 files : list of str 135 List of all files in the archive with a ``.npy`` extension. 136 zip : ZipFile instance 137 The ZipFile object initialized with the zipped archive. 138 f : BagObj instance 139 An object on which attribute can be performed as an alternative 140 to getitem access on the `NpzFile` instance itself. 141 allow_pickle : bool, optional 142 Allow loading pickled data. Default: False 143 144 .. versionchanged:: 1.16.3 145 Made default False in response to CVE-2019-6446. 146 147 pickle_kwargs : dict, optional 148 Additional keyword arguments to pass on to pickle.load. 149 These are only useful when loading object arrays saved on 150 Python 2 when using Python 3. 151 152 Parameters 153 ---------- 154 fid : file or str 155 The zipped archive to open. This is either a file-like object 156 or a string containing the path to the archive. 157 own_fid : bool, optional 158 Whether NpzFile should close the file handle. 159 Requires that `fid` is a file-like object. 160 161 Examples 162 -------- 163 >>> from tempfile import TemporaryFile 164 >>> outfile = TemporaryFile() 165 >>> x = np.arange(10) 166 >>> y = np.sin(x) 167 >>> np.savez(outfile, x=x, y=y) 168 >>> _ = outfile.seek(0) 169 170 >>> npz = np.load(outfile) 171 >>> isinstance(npz, np.lib.io.NpzFile) 172 True 173 >>> sorted(npz.files) 174 ['x', 'y'] 175 >>> npz['x'] # getitem access 176 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) 177 >>> npz.f.x # attribute lookup 178 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) 179 180 """ 181 # Make __exit__ safe if zipfile_factory raises an exception 182 zip = None 183 fid = None 184 185 def __init__(self, fid, own_fid=False, allow_pickle=False, 186 pickle_kwargs=None): 187 # Import is postponed to here since zipfile depends on gzip, an 188 # optional component of the so-called standard library. 189 _zip = zipfile_factory(fid) 190 self._files = _zip.namelist() 191 self.files = [] 192 self.allow_pickle = allow_pickle 193 self.pickle_kwargs = pickle_kwargs 194 for x in self._files: 195 if x.endswith('.npy'): 196 self.files.append(x[:-4]) 197 else: 198 self.files.append(x) 199 self.zip = _zip 200 self.f = BagObj(self) 201 if own_fid: 202 self.fid = fid 203 204 def __enter__(self): 205 return self 206 207 def __exit__(self, exc_type, exc_value, traceback): 208 self.close() 209 210 def close(self): 211 """ 212 Close the file. 213 214 """ 215 if self.zip is not None: 216 self.zip.close() 217 self.zip = None 218 if self.fid is not None: 219 self.fid.close() 220 self.fid = None 221 self.f = None # break reference cycle 222 223 def __del__(self): 224 self.close() 225 226 # Implement the Mapping ABC 227 def __iter__(self): 228 return iter(self.files) 229 230 def __len__(self): 231 return len(self.files) 232 233 def __getitem__(self, key): 234 # FIXME: This seems like it will copy strings around 235 # more than is strictly necessary. The zipfile 236 # will read the string and then 237 # the format.read_array will copy the string 238 # to another place in memory. 239 # It would be better if the zipfile could read 240 # (or at least uncompress) the data 241 # directly into the array memory. 242 member = False 243 if key in self._files: 244 member = True 245 elif key in self.files: 246 member = True 247 key += '.npy' 248 if member: 249 bytes = self.zip.open(key) 250 magic = bytes.read(len(format.MAGIC_PREFIX)) 251 bytes.close() 252 if magic == format.MAGIC_PREFIX: 253 bytes = self.zip.open(key) 254 return format.read_array(bytes, 255 allow_pickle=self.allow_pickle, 256 pickle_kwargs=self.pickle_kwargs) 257 else: 258 return self.zip.read(key) 259 else: 260 raise KeyError("%s is not a file in the archive" % key) 261 262 263 # deprecate the python 2 dict apis that we supported by accident in 264 # python 3. We forgot to implement itervalues() at all in earlier 265 # versions of numpy, so no need to deprecated it here. 266 267 def iteritems(self): 268 # Numpy 1.15, 2018-02-20 269 warnings.warn( 270 "NpzFile.iteritems is deprecated in python 3, to match the " 271 "removal of dict.itertems. Use .items() instead.", 272 DeprecationWarning, stacklevel=2) 273 return self.items() 274 275 def iterkeys(self): 276 # Numpy 1.15, 2018-02-20 277 warnings.warn( 278 "NpzFile.iterkeys is deprecated in python 3, to match the " 279 "removal of dict.iterkeys. Use .keys() instead.", 280 DeprecationWarning, stacklevel=2) 281 return self.keys() 282 283 284@set_module('numpy') 285def load(file, mmap_mode=None, allow_pickle=False, fix_imports=True, 286 encoding='ASCII'): 287 """ 288 Load arrays or pickled objects from ``.npy``, ``.npz`` or pickled files. 289 290 .. warning:: Loading files that contain object arrays uses the ``pickle`` 291 module, which is not secure against erroneous or maliciously 292 constructed data. Consider passing ``allow_pickle=False`` to 293 load data that is known not to contain object arrays for the 294 safer handling of untrusted sources. 295 296 Parameters 297 ---------- 298 file : file-like object, string, or pathlib.Path 299 The file to read. File-like objects must support the 300 ``seek()`` and ``read()`` methods. Pickled files require that the 301 file-like object support the ``readline()`` method as well. 302 mmap_mode : {None, 'r+', 'r', 'w+', 'c'}, optional 303 If not None, then memory-map the file, using the given mode (see 304 `numpy.memmap` for a detailed description of the modes). A 305 memory-mapped array is kept on disk. However, it can be accessed 306 and sliced like any ndarray. Memory mapping is especially useful 307 for accessing small fragments of large files without reading the 308 entire file into memory. 309 allow_pickle : bool, optional 310 Allow loading pickled object arrays stored in npy files. Reasons for 311 disallowing pickles include security, as loading pickled data can 312 execute arbitrary code. If pickles are disallowed, loading object 313 arrays will fail. Default: False 314 315 .. versionchanged:: 1.16.3 316 Made default False in response to CVE-2019-6446. 317 318 fix_imports : bool, optional 319 Only useful when loading Python 2 generated pickled files on Python 3, 320 which includes npy/npz files containing object arrays. If `fix_imports` 321 is True, pickle will try to map the old Python 2 names to the new names 322 used in Python 3. 323 encoding : str, optional 324 What encoding to use when reading Python 2 strings. Only useful when 325 loading Python 2 generated pickled files in Python 3, which includes 326 npy/npz files containing object arrays. Values other than 'latin1', 327 'ASCII', and 'bytes' are not allowed, as they can corrupt numerical 328 data. Default: 'ASCII' 329 330 Returns 331 ------- 332 result : array, tuple, dict, etc. 333 Data stored in the file. For ``.npz`` files, the returned instance 334 of NpzFile class must be closed to avoid leaking file descriptors. 335 336 Raises 337 ------ 338 IOError 339 If the input file does not exist or cannot be read. 340 ValueError 341 The file contains an object array, but allow_pickle=False given. 342 343 See Also 344 -------- 345 save, savez, savez_compressed, loadtxt 346 memmap : Create a memory-map to an array stored in a file on disk. 347 lib.format.open_memmap : Create or load a memory-mapped ``.npy`` file. 348 349 Notes 350 ----- 351 - If the file contains pickle data, then whatever object is stored 352 in the pickle is returned. 353 - If the file is a ``.npy`` file, then a single array is returned. 354 - If the file is a ``.npz`` file, then a dictionary-like object is 355 returned, containing ``{filename: array}`` key-value pairs, one for 356 each file in the archive. 357 - If the file is a ``.npz`` file, the returned value supports the 358 context manager protocol in a similar fashion to the open function:: 359 360 with load('foo.npz') as data: 361 a = data['a'] 362 363 The underlying file descriptor is closed when exiting the 'with' 364 block. 365 366 Examples 367 -------- 368 Store data to disk, and load it again: 369 370 >>> np.save('/tmp/123', np.array([[1, 2, 3], [4, 5, 6]])) 371 >>> np.load('/tmp/123.npy') 372 array([[1, 2, 3], 373 [4, 5, 6]]) 374 375 Store compressed data to disk, and load it again: 376 377 >>> a=np.array([[1, 2, 3], [4, 5, 6]]) 378 >>> b=np.array([1, 2]) 379 >>> np.savez('/tmp/123.npz', a=a, b=b) 380 >>> data = np.load('/tmp/123.npz') 381 >>> data['a'] 382 array([[1, 2, 3], 383 [4, 5, 6]]) 384 >>> data['b'] 385 array([1, 2]) 386 >>> data.close() 387 388 Mem-map the stored array, and then access the second row 389 directly from disk: 390 391 >>> X = np.load('/tmp/123.npy', mmap_mode='r') 392 >>> X[1, :] 393 memmap([4, 5, 6]) 394 395 """ 396 if encoding not in ('ASCII', 'latin1', 'bytes'): 397 # The 'encoding' value for pickle also affects what encoding 398 # the serialized binary data of NumPy arrays is loaded 399 # in. Pickle does not pass on the encoding information to 400 # NumPy. The unpickling code in numpy.core.multiarray is 401 # written to assume that unicode data appearing where binary 402 # should be is in 'latin1'. 'bytes' is also safe, as is 'ASCII'. 403 # 404 # Other encoding values can corrupt binary data, and we 405 # purposefully disallow them. For the same reason, the errors= 406 # argument is not exposed, as values other than 'strict' 407 # result can similarly silently corrupt numerical data. 408 raise ValueError("encoding must be 'ASCII', 'latin1', or 'bytes'") 409 410 pickle_kwargs = dict(encoding=encoding, fix_imports=fix_imports) 411 412 with contextlib.ExitStack() as stack: 413 if hasattr(file, 'read'): 414 fid = file 415 own_fid = False 416 else: 417 fid = stack.enter_context(open(os_fspath(file), "rb")) 418 own_fid = True 419 420 # Code to distinguish from NumPy binary files and pickles. 421 _ZIP_PREFIX = b'PK\x03\x04' 422 _ZIP_SUFFIX = b'PK\x05\x06' # empty zip files start with this 423 N = len(format.MAGIC_PREFIX) 424 magic = fid.read(N) 425 # If the file size is less than N, we need to make sure not 426 # to seek past the beginning of the file 427 fid.seek(-min(N, len(magic)), 1) # back-up 428 if magic.startswith(_ZIP_PREFIX) or magic.startswith(_ZIP_SUFFIX): 429 # zip-file (assume .npz) 430 # Potentially transfer file ownership to NpzFile 431 stack.pop_all() 432 ret = NpzFile(fid, own_fid=own_fid, allow_pickle=allow_pickle, 433 pickle_kwargs=pickle_kwargs) 434 return ret 435 elif magic == format.MAGIC_PREFIX: 436 # .npy file 437 if mmap_mode: 438 return format.open_memmap(file, mode=mmap_mode) 439 else: 440 return format.read_array(fid, allow_pickle=allow_pickle, 441 pickle_kwargs=pickle_kwargs) 442 else: 443 # Try a pickle 444 if not allow_pickle: 445 raise ValueError("Cannot load file containing pickled data " 446 "when allow_pickle=False") 447 try: 448 return pickle.load(fid, **pickle_kwargs) 449 except Exception as e: 450 raise IOError( 451 "Failed to interpret file %s as a pickle" % repr(file)) from e 452 453 454def _save_dispatcher(file, arr, allow_pickle=None, fix_imports=None): 455 return (arr,) 456 457 458@array_function_dispatch(_save_dispatcher) 459def save(file, arr, allow_pickle=True, fix_imports=True): 460 """ 461 Save an array to a binary file in NumPy ``.npy`` format. 462 463 Parameters 464 ---------- 465 file : file, str, or pathlib.Path 466 File or filename to which the data is saved. If file is a file-object, 467 then the filename is unchanged. If file is a string or Path, a ``.npy`` 468 extension will be appended to the filename if it does not already 469 have one. 470 arr : array_like 471 Array data to be saved. 472 allow_pickle : bool, optional 473 Allow saving object arrays using Python pickles. Reasons for disallowing 474 pickles include security (loading pickled data can execute arbitrary 475 code) and portability (pickled objects may not be loadable on different 476 Python installations, for example if the stored objects require libraries 477 that are not available, and not all pickled data is compatible between 478 Python 2 and Python 3). 479 Default: True 480 fix_imports : bool, optional 481 Only useful in forcing objects in object arrays on Python 3 to be 482 pickled in a Python 2 compatible way. If `fix_imports` is True, pickle 483 will try to map the new Python 3 names to the old module names used in 484 Python 2, so that the pickle data stream is readable with Python 2. 485 486 See Also 487 -------- 488 savez : Save several arrays into a ``.npz`` archive 489 savetxt, load 490 491 Notes 492 ----- 493 For a description of the ``.npy`` format, see :py:mod:`numpy.lib.format`. 494 495 Any data saved to the file is appended to the end of the file. 496 497 Examples 498 -------- 499 >>> from tempfile import TemporaryFile 500 >>> outfile = TemporaryFile() 501 502 >>> x = np.arange(10) 503 >>> np.save(outfile, x) 504 505 >>> _ = outfile.seek(0) # Only needed here to simulate closing & reopening file 506 >>> np.load(outfile) 507 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) 508 509 510 >>> with open('test.npy', 'wb') as f: 511 ... np.save(f, np.array([1, 2])) 512 ... np.save(f, np.array([1, 3])) 513 >>> with open('test.npy', 'rb') as f: 514 ... a = np.load(f) 515 ... b = np.load(f) 516 >>> print(a, b) 517 # [1 2] [1 3] 518 """ 519 if hasattr(file, 'write'): 520 file_ctx = contextlib_nullcontext(file) 521 else: 522 file = os_fspath(file) 523 if not file.endswith('.npy'): 524 file = file + '.npy' 525 file_ctx = open(file, "wb") 526 527 with file_ctx as fid: 528 arr = np.asanyarray(arr) 529 format.write_array(fid, arr, allow_pickle=allow_pickle, 530 pickle_kwargs=dict(fix_imports=fix_imports)) 531 532 533def _savez_dispatcher(file, *args, **kwds): 534 yield from args 535 yield from kwds.values() 536 537 538@array_function_dispatch(_savez_dispatcher) 539def savez(file, *args, **kwds): 540 """Save several arrays into a single file in uncompressed ``.npz`` format. 541 542 If arguments are passed in with no keywords, the corresponding variable 543 names, in the ``.npz`` file, are 'arr_0', 'arr_1', etc. If keyword 544 arguments are given, the corresponding variable names, in the ``.npz`` 545 file will match the keyword names. 546 547 Parameters 548 ---------- 549 file : str or file 550 Either the filename (string) or an open file (file-like object) 551 where the data will be saved. If file is a string or a Path, the 552 ``.npz`` extension will be appended to the filename if it is not 553 already there. 554 args : Arguments, optional 555 Arrays to save to the file. Since it is not possible for Python to 556 know the names of the arrays outside `savez`, the arrays will be saved 557 with names "arr_0", "arr_1", and so on. These arguments can be any 558 expression. 559 kwds : Keyword arguments, optional 560 Arrays to save to the file. Arrays will be saved in the file with the 561 keyword names. 562 563 Returns 564 ------- 565 None 566 567 See Also 568 -------- 569 save : Save a single array to a binary file in NumPy format. 570 savetxt : Save an array to a file as plain text. 571 savez_compressed : Save several arrays into a compressed ``.npz`` archive 572 573 Notes 574 ----- 575 The ``.npz`` file format is a zipped archive of files named after the 576 variables they contain. The archive is not compressed and each file 577 in the archive contains one variable in ``.npy`` format. For a 578 description of the ``.npy`` format, see :py:mod:`numpy.lib.format`. 579 580 When opening the saved ``.npz`` file with `load` a `NpzFile` object is 581 returned. This is a dictionary-like object which can be queried for 582 its list of arrays (with the ``.files`` attribute), and for the arrays 583 themselves. 584 585 When saving dictionaries, the dictionary keys become filenames 586 inside the ZIP archive. Therefore, keys should be valid filenames. 587 E.g., avoid keys that begin with ``/`` or contain ``.``. 588 589 Examples 590 -------- 591 >>> from tempfile import TemporaryFile 592 >>> outfile = TemporaryFile() 593 >>> x = np.arange(10) 594 >>> y = np.sin(x) 595 596 Using `savez` with \\*args, the arrays are saved with default names. 597 598 >>> np.savez(outfile, x, y) 599 >>> _ = outfile.seek(0) # Only needed here to simulate closing & reopening file 600 >>> npzfile = np.load(outfile) 601 >>> npzfile.files 602 ['arr_0', 'arr_1'] 603 >>> npzfile['arr_0'] 604 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) 605 606 Using `savez` with \\**kwds, the arrays are saved with the keyword names. 607 608 >>> outfile = TemporaryFile() 609 >>> np.savez(outfile, x=x, y=y) 610 >>> _ = outfile.seek(0) 611 >>> npzfile = np.load(outfile) 612 >>> sorted(npzfile.files) 613 ['x', 'y'] 614 >>> npzfile['x'] 615 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) 616 """ 617 _savez(file, args, kwds, False) 618 619 620def _savez_compressed_dispatcher(file, *args, **kwds): 621 yield from args 622 yield from kwds.values() 623 624 625@array_function_dispatch(_savez_compressed_dispatcher) 626def savez_compressed(file, *args, **kwds): 627 """ 628 Save several arrays into a single file in compressed ``.npz`` format. 629 630 If keyword arguments are given, then filenames are taken from the keywords. 631 If arguments are passed in with no keywords, then stored filenames are 632 arr_0, arr_1, etc. 633 634 Parameters 635 ---------- 636 file : str or file 637 Either the filename (string) or an open file (file-like object) 638 where the data will be saved. If file is a string or a Path, the 639 ``.npz`` extension will be appended to the filename if it is not 640 already there. 641 args : Arguments, optional 642 Arrays to save to the file. Since it is not possible for Python to 643 know the names of the arrays outside `savez`, the arrays will be saved 644 with names "arr_0", "arr_1", and so on. These arguments can be any 645 expression. 646 kwds : Keyword arguments, optional 647 Arrays to save to the file. Arrays will be saved in the file with the 648 keyword names. 649 650 Returns 651 ------- 652 None 653 654 See Also 655 -------- 656 numpy.save : Save a single array to a binary file in NumPy format. 657 numpy.savetxt : Save an array to a file as plain text. 658 numpy.savez : Save several arrays into an uncompressed ``.npz`` file format 659 numpy.load : Load the files created by savez_compressed. 660 661 Notes 662 ----- 663 The ``.npz`` file format is a zipped archive of files named after the 664 variables they contain. The archive is compressed with 665 ``zipfile.ZIP_DEFLATED`` and each file in the archive contains one variable 666 in ``.npy`` format. For a description of the ``.npy`` format, see 667 :py:mod:`numpy.lib.format`. 668 669 670 When opening the saved ``.npz`` file with `load` a `NpzFile` object is 671 returned. This is a dictionary-like object which can be queried for 672 its list of arrays (with the ``.files`` attribute), and for the arrays 673 themselves. 674 675 Examples 676 -------- 677 >>> test_array = np.random.rand(3, 2) 678 >>> test_vector = np.random.rand(4) 679 >>> np.savez_compressed('/tmp/123', a=test_array, b=test_vector) 680 >>> loaded = np.load('/tmp/123.npz') 681 >>> print(np.array_equal(test_array, loaded['a'])) 682 True 683 >>> print(np.array_equal(test_vector, loaded['b'])) 684 True 685 686 """ 687 _savez(file, args, kwds, True) 688 689 690def _savez(file, args, kwds, compress, allow_pickle=True, pickle_kwargs=None): 691 # Import is postponed to here since zipfile depends on gzip, an optional 692 # component of the so-called standard library. 693 import zipfile 694 695 if not hasattr(file, 'write'): 696 file = os_fspath(file) 697 if not file.endswith('.npz'): 698 file = file + '.npz' 699 700 namedict = kwds 701 for i, val in enumerate(args): 702 key = 'arr_%d' % i 703 if key in namedict.keys(): 704 raise ValueError( 705 "Cannot use un-named variables and keyword %s" % key) 706 namedict[key] = val 707 708 if compress: 709 compression = zipfile.ZIP_DEFLATED 710 else: 711 compression = zipfile.ZIP_STORED 712 713 zipf = zipfile_factory(file, mode="w", compression=compression) 714 715 for key, val in namedict.items(): 716 fname = key + '.npy' 717 val = np.asanyarray(val) 718 # always force zip64, gh-10776 719 with zipf.open(fname, 'w', force_zip64=True) as fid: 720 format.write_array(fid, val, 721 allow_pickle=allow_pickle, 722 pickle_kwargs=pickle_kwargs) 723 724 zipf.close() 725 726 727def _getconv(dtype): 728 """ Find the correct dtype converter. Adapted from matplotlib """ 729 730 def floatconv(x): 731 x.lower() 732 if '0x' in x: 733 return float.fromhex(x) 734 return float(x) 735 736 typ = dtype.type 737 if issubclass(typ, np.bool_): 738 return lambda x: bool(int(x)) 739 if issubclass(typ, np.uint64): 740 return np.uint64 741 if issubclass(typ, np.int64): 742 return np.int64 743 if issubclass(typ, np.integer): 744 return lambda x: int(float(x)) 745 elif issubclass(typ, np.longdouble): 746 return np.longdouble 747 elif issubclass(typ, np.floating): 748 return floatconv 749 elif issubclass(typ, complex): 750 return lambda x: complex(asstr(x).replace('+-', '-')) 751 elif issubclass(typ, np.bytes_): 752 return asbytes 753 elif issubclass(typ, np.unicode_): 754 return asunicode 755 else: 756 return asstr 757 758 759# amount of lines loadtxt reads in one chunk, can be overridden for testing 760_loadtxt_chunksize = 50000 761 762 763def _loadtxt_dispatcher(fname, dtype=None, comments=None, delimiter=None, 764 converters=None, skiprows=None, usecols=None, unpack=None, 765 ndmin=None, encoding=None, max_rows=None, *, like=None): 766 return (like,) 767 768 769@set_array_function_like_doc 770@set_module('numpy') 771def loadtxt(fname, dtype=float, comments='#', delimiter=None, 772 converters=None, skiprows=0, usecols=None, unpack=False, 773 ndmin=0, encoding='bytes', max_rows=None, *, like=None): 774 r""" 775 Load data from a text file. 776 777 Each row in the text file must have the same number of values. 778 779 Parameters 780 ---------- 781 fname : file, str, or pathlib.Path 782 File, filename, or generator to read. If the filename extension is 783 ``.gz`` or ``.bz2``, the file is first decompressed. Note that 784 generators should return byte strings. 785 dtype : data-type, optional 786 Data-type of the resulting array; default: float. If this is a 787 structured data-type, the resulting array will be 1-dimensional, and 788 each row will be interpreted as an element of the array. In this 789 case, the number of columns used must match the number of fields in 790 the data-type. 791 comments : str or sequence of str, optional 792 The characters or list of characters used to indicate the start of a 793 comment. None implies no comments. For backwards compatibility, byte 794 strings will be decoded as 'latin1'. The default is '#'. 795 delimiter : str, optional 796 The string used to separate values. For backwards compatibility, byte 797 strings will be decoded as 'latin1'. The default is whitespace. 798 converters : dict, optional 799 A dictionary mapping column number to a function that will parse the 800 column string into the desired value. E.g., if column 0 is a date 801 string: ``converters = {0: datestr2num}``. Converters can also be 802 used to provide a default value for missing data (but see also 803 `genfromtxt`): ``converters = {3: lambda s: float(s.strip() or 0)}``. 804 Default: None. 805 skiprows : int, optional 806 Skip the first `skiprows` lines, including comments; default: 0. 807 usecols : int or sequence, optional 808 Which columns to read, with 0 being the first. For example, 809 ``usecols = (1,4,5)`` will extract the 2nd, 5th and 6th columns. 810 The default, None, results in all columns being read. 811 812 .. versionchanged:: 1.11.0 813 When a single column has to be read it is possible to use 814 an integer instead of a tuple. E.g ``usecols = 3`` reads the 815 fourth column the same way as ``usecols = (3,)`` would. 816 unpack : bool, optional 817 If True, the returned array is transposed, so that arguments may be 818 unpacked using ``x, y, z = loadtxt(...)``. When used with a 819 structured data-type, arrays are returned for each field. 820 Default is False. 821 ndmin : int, optional 822 The returned array will have at least `ndmin` dimensions. 823 Otherwise mono-dimensional axes will be squeezed. 824 Legal values: 0 (default), 1 or 2. 825 826 .. versionadded:: 1.6.0 827 encoding : str, optional 828 Encoding used to decode the inputfile. Does not apply to input streams. 829 The special value 'bytes' enables backward compatibility workarounds 830 that ensures you receive byte arrays as results if possible and passes 831 'latin1' encoded strings to converters. Override this value to receive 832 unicode arrays and pass strings as input to converters. If set to None 833 the system default is used. The default value is 'bytes'. 834 835 .. versionadded:: 1.14.0 836 max_rows : int, optional 837 Read `max_rows` lines of content after `skiprows` lines. The default 838 is to read all the lines. 839 840 .. versionadded:: 1.16.0 841 ${ARRAY_FUNCTION_LIKE} 842 843 .. versionadded:: 1.20.0 844 845 Returns 846 ------- 847 out : ndarray 848 Data read from the text file. 849 850 See Also 851 -------- 852 load, fromstring, fromregex 853 genfromtxt : Load data with missing values handled as specified. 854 scipy.io.loadmat : reads MATLAB data files 855 856 Notes 857 ----- 858 This function aims to be a fast reader for simply formatted files. The 859 `genfromtxt` function provides more sophisticated handling of, e.g., 860 lines with missing values. 861 862 .. versionadded:: 1.10.0 863 864 The strings produced by the Python float.hex method can be used as 865 input for floats. 866 867 Examples 868 -------- 869 >>> from io import StringIO # StringIO behaves like a file object 870 >>> c = StringIO("0 1\n2 3") 871 >>> np.loadtxt(c) 872 array([[0., 1.], 873 [2., 3.]]) 874 875 >>> d = StringIO("M 21 72\nF 35 58") 876 >>> np.loadtxt(d, dtype={'names': ('gender', 'age', 'weight'), 877 ... 'formats': ('S1', 'i4', 'f4')}) 878 array([(b'M', 21, 72.), (b'F', 35, 58.)], 879 dtype=[('gender', 'S1'), ('age', '<i4'), ('weight', '<f4')]) 880 881 >>> c = StringIO("1,0,2\n3,0,4") 882 >>> x, y = np.loadtxt(c, delimiter=',', usecols=(0, 2), unpack=True) 883 >>> x 884 array([1., 3.]) 885 >>> y 886 array([2., 4.]) 887 888 This example shows how `converters` can be used to convert a field 889 with a trailing minus sign into a negative number. 890 891 >>> s = StringIO('10.01 31.25-\n19.22 64.31\n17.57- 63.94') 892 >>> def conv(fld): 893 ... return -float(fld[:-1]) if fld.endswith(b'-') else float(fld) 894 ... 895 >>> np.loadtxt(s, converters={0: conv, 1: conv}) 896 array([[ 10.01, -31.25], 897 [ 19.22, 64.31], 898 [-17.57, 63.94]]) 899 """ 900 901 if like is not None: 902 return _loadtxt_with_like( 903 fname, dtype=dtype, comments=comments, delimiter=delimiter, 904 converters=converters, skiprows=skiprows, usecols=usecols, 905 unpack=unpack, ndmin=ndmin, encoding=encoding, 906 max_rows=max_rows, like=like 907 ) 908 909 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 910 # Nested functions used by loadtxt. 911 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 912 913 # not to be confused with the flatten_dtype we import... 914 @recursive 915 def flatten_dtype_internal(self, dt): 916 """Unpack a structured data-type, and produce re-packing info.""" 917 if dt.names is None: 918 # If the dtype is flattened, return. 919 # If the dtype has a shape, the dtype occurs 920 # in the list more than once. 921 shape = dt.shape 922 if len(shape) == 0: 923 return ([dt.base], None) 924 else: 925 packing = [(shape[-1], list)] 926 if len(shape) > 1: 927 for dim in dt.shape[-2::-1]: 928 packing = [(dim*packing[0][0], packing*dim)] 929 return ([dt.base] * int(np.prod(dt.shape)), packing) 930 else: 931 types = [] 932 packing = [] 933 for field in dt.names: 934 tp, bytes = dt.fields[field] 935 flat_dt, flat_packing = self(tp) 936 types.extend(flat_dt) 937 # Avoid extra nesting for subarrays 938 if tp.ndim > 0: 939 packing.extend(flat_packing) 940 else: 941 packing.append((len(flat_dt), flat_packing)) 942 return (types, packing) 943 944 @recursive 945 def pack_items(self, items, packing): 946 """Pack items into nested lists based on re-packing info.""" 947 if packing is None: 948 return items[0] 949 elif packing is tuple: 950 return tuple(items) 951 elif packing is list: 952 return list(items) 953 else: 954 start = 0 955 ret = [] 956 for length, subpacking in packing: 957 ret.append(self(items[start:start+length], subpacking)) 958 start += length 959 return tuple(ret) 960 961 def split_line(line): 962 """Chop off comments, strip, and split at delimiter. """ 963 line = _decode_line(line, encoding=encoding) 964 965 if comments is not None: 966 line = regex_comments.split(line, maxsplit=1)[0] 967 line = line.strip('\r\n') 968 return line.split(delimiter) if line else [] 969 970 def read_data(chunk_size): 971 """Parse each line, including the first. 972 973 The file read, `fh`, is a global defined above. 974 975 Parameters 976 ---------- 977 chunk_size : int 978 At most `chunk_size` lines are read at a time, with iteration 979 until all lines are read. 980 981 """ 982 X = [] 983 line_iter = itertools.chain([first_line], fh) 984 line_iter = itertools.islice(line_iter, max_rows) 985 for i, line in enumerate(line_iter): 986 vals = split_line(line) 987 if len(vals) == 0: 988 continue 989 if usecols: 990 vals = [vals[j] for j in usecols] 991 if len(vals) != N: 992 line_num = i + skiprows + 1 993 raise ValueError("Wrong number of columns at line %d" 994 % line_num) 995 996 # Convert each value according to its column and store 997 items = [conv(val) for (conv, val) in zip(converters, vals)] 998 999 # Then pack it according to the dtype's nesting 1000 items = pack_items(items, packing) 1001 X.append(items) 1002 if len(X) > chunk_size: 1003 yield X 1004 X = [] 1005 if X: 1006 yield X 1007 1008 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 1009 # Main body of loadtxt. 1010 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 1011 1012 # Check correctness of the values of `ndmin` 1013 if ndmin not in [0, 1, 2]: 1014 raise ValueError('Illegal value of ndmin keyword: %s' % ndmin) 1015 1016 # Type conversions for Py3 convenience 1017 if comments is not None: 1018 if isinstance(comments, (str, bytes)): 1019 comments = [comments] 1020 comments = [_decode_line(x) for x in comments] 1021 # Compile regex for comments beforehand 1022 comments = (re.escape(comment) for comment in comments) 1023 regex_comments = re.compile('|'.join(comments)) 1024 1025 if delimiter is not None: 1026 delimiter = _decode_line(delimiter) 1027 1028 user_converters = converters 1029 1030 byte_converters = False 1031 if encoding == 'bytes': 1032 encoding = None 1033 byte_converters = True 1034 1035 if usecols is not None: 1036 # Allow usecols to be a single int or a sequence of ints 1037 try: 1038 usecols_as_list = list(usecols) 1039 except TypeError: 1040 usecols_as_list = [usecols] 1041 for col_idx in usecols_as_list: 1042 try: 1043 opindex(col_idx) 1044 except TypeError as e: 1045 e.args = ( 1046 "usecols must be an int or a sequence of ints but " 1047 "it contains at least one element of type %s" % 1048 type(col_idx), 1049 ) 1050 raise 1051 # Fall back to existing code 1052 usecols = usecols_as_list 1053 1054 # Make sure we're dealing with a proper dtype 1055 dtype = np.dtype(dtype) 1056 defconv = _getconv(dtype) 1057 1058 dtype_types, packing = flatten_dtype_internal(dtype) 1059 1060 fown = False 1061 try: 1062 if isinstance(fname, os_PathLike): 1063 fname = os_fspath(fname) 1064 if _is_string_like(fname): 1065 fh = np.lib._datasource.open(fname, 'rt', encoding=encoding) 1066 fencoding = getattr(fh, 'encoding', 'latin1') 1067 fh = iter(fh) 1068 fown = True 1069 else: 1070 fh = iter(fname) 1071 fencoding = getattr(fname, 'encoding', 'latin1') 1072 except TypeError as e: 1073 raise ValueError( 1074 'fname must be a string, file handle, or generator' 1075 ) from e 1076 1077 # input may be a python2 io stream 1078 if encoding is not None: 1079 fencoding = encoding 1080 # we must assume local encoding 1081 # TODO emit portability warning? 1082 elif fencoding is None: 1083 import locale 1084 fencoding = locale.getpreferredencoding() 1085 1086 try: 1087 # Skip the first `skiprows` lines 1088 for i in range(skiprows): 1089 next(fh) 1090 1091 # Read until we find a line with some values, and use 1092 # it to estimate the number of columns, N. 1093 first_vals = None 1094 try: 1095 while not first_vals: 1096 first_line = next(fh) 1097 first_vals = split_line(first_line) 1098 except StopIteration: 1099 # End of lines reached 1100 first_line = '' 1101 first_vals = [] 1102 warnings.warn('loadtxt: Empty input file: "%s"' % fname, 1103 stacklevel=2) 1104 N = len(usecols or first_vals) 1105 1106 # Now that we know N, create the default converters list, and 1107 # set packing, if necessary. 1108 if len(dtype_types) > 1: 1109 # We're dealing with a structured array, each field of 1110 # the dtype matches a column 1111 converters = [_getconv(dt) for dt in dtype_types] 1112 else: 1113 # All fields have the same dtype 1114 converters = [defconv for i in range(N)] 1115 if N > 1: 1116 packing = [(N, tuple)] 1117 1118 # By preference, use the converters specified by the user 1119 for i, conv in (user_converters or {}).items(): 1120 if usecols: 1121 try: 1122 i = usecols.index(i) 1123 except ValueError: 1124 # Unused converter specified 1125 continue 1126 if byte_converters: 1127 # converters may use decode to workaround numpy's old 1128 # behaviour, so encode the string again before passing to 1129 # the user converter 1130 def tobytes_first(x, conv): 1131 if type(x) is bytes: 1132 return conv(x) 1133 return conv(x.encode("latin1")) 1134 converters[i] = functools.partial(tobytes_first, conv=conv) 1135 else: 1136 converters[i] = conv 1137 1138 converters = [conv if conv is not bytes else 1139 lambda x: x.encode(fencoding) for conv in converters] 1140 1141 # read data in chunks and fill it into an array via resize 1142 # over-allocating and shrinking the array later may be faster but is 1143 # probably not relevant compared to the cost of actually reading and 1144 # converting the data 1145 X = None 1146 for x in read_data(_loadtxt_chunksize): 1147 if X is None: 1148 X = np.array(x, dtype) 1149 else: 1150 nshape = list(X.shape) 1151 pos = nshape[0] 1152 nshape[0] += len(x) 1153 X.resize(nshape, refcheck=False) 1154 X[pos:, ...] = x 1155 finally: 1156 if fown: 1157 fh.close() 1158 1159 if X is None: 1160 X = np.array([], dtype) 1161 1162 # Multicolumn data are returned with shape (1, N, M), i.e. 1163 # (1, 1, M) for a single row - remove the singleton dimension there 1164 if X.ndim == 3 and X.shape[:2] == (1, 1): 1165 X.shape = (1, -1) 1166 1167 # Verify that the array has at least dimensions `ndmin`. 1168 # Tweak the size and shape of the arrays - remove extraneous dimensions 1169 if X.ndim > ndmin: 1170 X = np.squeeze(X) 1171 # and ensure we have the minimum number of dimensions asked for 1172 # - has to be in this order for the odd case ndmin=1, X.squeeze().ndim=0 1173 if X.ndim < ndmin: 1174 if ndmin == 1: 1175 X = np.atleast_1d(X) 1176 elif ndmin == 2: 1177 X = np.atleast_2d(X).T 1178 1179 if unpack: 1180 if len(dtype_types) > 1: 1181 # For structured arrays, return an array for each field. 1182 return [X[field] for field in dtype.names] 1183 else: 1184 return X.T 1185 else: 1186 return X 1187 1188 1189_loadtxt_with_like = array_function_dispatch( 1190 _loadtxt_dispatcher 1191)(loadtxt) 1192 1193 1194def _savetxt_dispatcher(fname, X, fmt=None, delimiter=None, newline=None, 1195 header=None, footer=None, comments=None, 1196 encoding=None): 1197 return (X,) 1198 1199 1200@array_function_dispatch(_savetxt_dispatcher) 1201def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='', 1202 footer='', comments='# ', encoding=None): 1203 """ 1204 Save an array to a text file. 1205 1206 Parameters 1207 ---------- 1208 fname : filename or file handle 1209 If the filename ends in ``.gz``, the file is automatically saved in 1210 compressed gzip format. `loadtxt` understands gzipped files 1211 transparently. 1212 X : 1D or 2D array_like 1213 Data to be saved to a text file. 1214 fmt : str or sequence of strs, optional 1215 A single format (%10.5f), a sequence of formats, or a 1216 multi-format string, e.g. 'Iteration %d -- %10.5f', in which 1217 case `delimiter` is ignored. For complex `X`, the legal options 1218 for `fmt` are: 1219 1220 * a single specifier, `fmt='%.4e'`, resulting in numbers formatted 1221 like `' (%s+%sj)' % (fmt, fmt)` 1222 * a full string specifying every real and imaginary part, e.g. 1223 `' %.4e %+.4ej %.4e %+.4ej %.4e %+.4ej'` for 3 columns 1224 * a list of specifiers, one per column - in this case, the real 1225 and imaginary part must have separate specifiers, 1226 e.g. `['%.3e + %.3ej', '(%.15e%+.15ej)']` for 2 columns 1227 delimiter : str, optional 1228 String or character separating columns. 1229 newline : str, optional 1230 String or character separating lines. 1231 1232 .. versionadded:: 1.5.0 1233 header : str, optional 1234 String that will be written at the beginning of the file. 1235 1236 .. versionadded:: 1.7.0 1237 footer : str, optional 1238 String that will be written at the end of the file. 1239 1240 .. versionadded:: 1.7.0 1241 comments : str, optional 1242 String that will be prepended to the ``header`` and ``footer`` strings, 1243 to mark them as comments. Default: '# ', as expected by e.g. 1244 ``numpy.loadtxt``. 1245 1246 .. versionadded:: 1.7.0 1247 encoding : {None, str}, optional 1248 Encoding used to encode the outputfile. Does not apply to output 1249 streams. If the encoding is something other than 'bytes' or 'latin1' 1250 you will not be able to load the file in NumPy versions < 1.14. Default 1251 is 'latin1'. 1252 1253 .. versionadded:: 1.14.0 1254 1255 1256 See Also 1257 -------- 1258 save : Save an array to a binary file in NumPy ``.npy`` format 1259 savez : Save several arrays into an uncompressed ``.npz`` archive 1260 savez_compressed : Save several arrays into a compressed ``.npz`` archive 1261 1262 Notes 1263 ----- 1264 Further explanation of the `fmt` parameter 1265 (``%[flag]width[.precision]specifier``): 1266 1267 flags: 1268 ``-`` : left justify 1269 1270 ``+`` : Forces to precede result with + or -. 1271 1272 ``0`` : Left pad the number with zeros instead of space (see width). 1273 1274 width: 1275 Minimum number of characters to be printed. The value is not truncated 1276 if it has more characters. 1277 1278 precision: 1279 - For integer specifiers (eg. ``d,i,o,x``), the minimum number of 1280 digits. 1281 - For ``e, E`` and ``f`` specifiers, the number of digits to print 1282 after the decimal point. 1283 - For ``g`` and ``G``, the maximum number of significant digits. 1284 - For ``s``, the maximum number of characters. 1285 1286 specifiers: 1287 ``c`` : character 1288 1289 ``d`` or ``i`` : signed decimal integer 1290 1291 ``e`` or ``E`` : scientific notation with ``e`` or ``E``. 1292 1293 ``f`` : decimal floating point 1294 1295 ``g,G`` : use the shorter of ``e,E`` or ``f`` 1296 1297 ``o`` : signed octal 1298 1299 ``s`` : string of characters 1300 1301 ``u`` : unsigned decimal integer 1302 1303 ``x,X`` : unsigned hexadecimal integer 1304 1305 This explanation of ``fmt`` is not complete, for an exhaustive 1306 specification see [1]_. 1307 1308 References 1309 ---------- 1310 .. [1] `Format Specification Mini-Language 1311 <https://docs.python.org/library/string.html#format-specification-mini-language>`_, 1312 Python Documentation. 1313 1314 Examples 1315 -------- 1316 >>> x = y = z = np.arange(0.0,5.0,1.0) 1317 >>> np.savetxt('test.out', x, delimiter=',') # X is an array 1318 >>> np.savetxt('test.out', (x,y,z)) # x,y,z equal sized 1D arrays 1319 >>> np.savetxt('test.out', x, fmt='%1.4e') # use exponential notation 1320 1321 """ 1322 1323 # Py3 conversions first 1324 if isinstance(fmt, bytes): 1325 fmt = asstr(fmt) 1326 delimiter = asstr(delimiter) 1327 1328 class WriteWrap: 1329 """Convert to bytes on bytestream inputs. 1330 1331 """ 1332 def __init__(self, fh, encoding): 1333 self.fh = fh 1334 self.encoding = encoding 1335 self.do_write = self.first_write 1336 1337 def close(self): 1338 self.fh.close() 1339 1340 def write(self, v): 1341 self.do_write(v) 1342 1343 def write_bytes(self, v): 1344 if isinstance(v, bytes): 1345 self.fh.write(v) 1346 else: 1347 self.fh.write(v.encode(self.encoding)) 1348 1349 def write_normal(self, v): 1350 self.fh.write(asunicode(v)) 1351 1352 def first_write(self, v): 1353 try: 1354 self.write_normal(v) 1355 self.write = self.write_normal 1356 except TypeError: 1357 # input is probably a bytestream 1358 self.write_bytes(v) 1359 self.write = self.write_bytes 1360 1361 own_fh = False 1362 if isinstance(fname, os_PathLike): 1363 fname = os_fspath(fname) 1364 if _is_string_like(fname): 1365 # datasource doesn't support creating a new file ... 1366 open(fname, 'wt').close() 1367 fh = np.lib._datasource.open(fname, 'wt', encoding=encoding) 1368 own_fh = True 1369 elif hasattr(fname, 'write'): 1370 # wrap to handle byte output streams 1371 fh = WriteWrap(fname, encoding or 'latin1') 1372 else: 1373 raise ValueError('fname must be a string or file handle') 1374 1375 try: 1376 X = np.asarray(X) 1377 1378 # Handle 1-dimensional arrays 1379 if X.ndim == 0 or X.ndim > 2: 1380 raise ValueError( 1381 "Expected 1D or 2D array, got %dD array instead" % X.ndim) 1382 elif X.ndim == 1: 1383 # Common case -- 1d array of numbers 1384 if X.dtype.names is None: 1385 X = np.atleast_2d(X).T 1386 ncol = 1 1387 1388 # Complex dtype -- each field indicates a separate column 1389 else: 1390 ncol = len(X.dtype.names) 1391 else: 1392 ncol = X.shape[1] 1393 1394 iscomplex_X = np.iscomplexobj(X) 1395 # `fmt` can be a string with multiple insertion points or a 1396 # list of formats. E.g. '%10.5f\t%10d' or ('%10.5f', '$10d') 1397 if type(fmt) in (list, tuple): 1398 if len(fmt) != ncol: 1399 raise AttributeError('fmt has wrong shape. %s' % str(fmt)) 1400 format = asstr(delimiter).join(map(asstr, fmt)) 1401 elif isinstance(fmt, str): 1402 n_fmt_chars = fmt.count('%') 1403 error = ValueError('fmt has wrong number of %% formats: %s' % fmt) 1404 if n_fmt_chars == 1: 1405 if iscomplex_X: 1406 fmt = [' (%s+%sj)' % (fmt, fmt), ] * ncol 1407 else: 1408 fmt = [fmt, ] * ncol 1409 format = delimiter.join(fmt) 1410 elif iscomplex_X and n_fmt_chars != (2 * ncol): 1411 raise error 1412 elif ((not iscomplex_X) and n_fmt_chars != ncol): 1413 raise error 1414 else: 1415 format = fmt 1416 else: 1417 raise ValueError('invalid fmt: %r' % (fmt,)) 1418 1419 if len(header) > 0: 1420 header = header.replace('\n', '\n' + comments) 1421 fh.write(comments + header + newline) 1422 if iscomplex_X: 1423 for row in X: 1424 row2 = [] 1425 for number in row: 1426 row2.append(number.real) 1427 row2.append(number.imag) 1428 s = format % tuple(row2) + newline 1429 fh.write(s.replace('+-', '-')) 1430 else: 1431 for row in X: 1432 try: 1433 v = format % tuple(row) + newline 1434 except TypeError as e: 1435 raise TypeError("Mismatch between array dtype ('%s') and " 1436 "format specifier ('%s')" 1437 % (str(X.dtype), format)) from e 1438 fh.write(v) 1439 1440 if len(footer) > 0: 1441 footer = footer.replace('\n', '\n' + comments) 1442 fh.write(comments + footer + newline) 1443 finally: 1444 if own_fh: 1445 fh.close() 1446 1447 1448@set_module('numpy') 1449def fromregex(file, regexp, dtype, encoding=None): 1450 """ 1451 Construct an array from a text file, using regular expression parsing. 1452 1453 The returned array is always a structured array, and is constructed from 1454 all matches of the regular expression in the file. Groups in the regular 1455 expression are converted to fields of the structured array. 1456 1457 Parameters 1458 ---------- 1459 file : str or file 1460 Filename or file object to read. 1461 regexp : str or regexp 1462 Regular expression used to parse the file. 1463 Groups in the regular expression correspond to fields in the dtype. 1464 dtype : dtype or list of dtypes 1465 Dtype for the structured array. 1466 encoding : str, optional 1467 Encoding used to decode the inputfile. Does not apply to input streams. 1468 1469 .. versionadded:: 1.14.0 1470 1471 Returns 1472 ------- 1473 output : ndarray 1474 The output array, containing the part of the content of `file` that 1475 was matched by `regexp`. `output` is always a structured array. 1476 1477 Raises 1478 ------ 1479 TypeError 1480 When `dtype` is not a valid dtype for a structured array. 1481 1482 See Also 1483 -------- 1484 fromstring, loadtxt 1485 1486 Notes 1487 ----- 1488 Dtypes for structured arrays can be specified in several forms, but all 1489 forms specify at least the data type and field name. For details see 1490 `basics.rec`. 1491 1492 Examples 1493 -------- 1494 >>> f = open('test.dat', 'w') 1495 >>> _ = f.write("1312 foo\\n1534 bar\\n444 qux") 1496 >>> f.close() 1497 1498 >>> regexp = r"(\\d+)\\s+(...)" # match [digits, whitespace, anything] 1499 >>> output = np.fromregex('test.dat', regexp, 1500 ... [('num', np.int64), ('key', 'S3')]) 1501 >>> output 1502 array([(1312, b'foo'), (1534, b'bar'), ( 444, b'qux')], 1503 dtype=[('num', '<i8'), ('key', 'S3')]) 1504 >>> output['num'] 1505 array([1312, 1534, 444]) 1506 1507 """ 1508 own_fh = False 1509 if not hasattr(file, "read"): 1510 file = np.lib._datasource.open(file, 'rt', encoding=encoding) 1511 own_fh = True 1512 1513 try: 1514 if not isinstance(dtype, np.dtype): 1515 dtype = np.dtype(dtype) 1516 1517 content = file.read() 1518 if isinstance(content, bytes) and isinstance(regexp, np.compat.unicode): 1519 regexp = asbytes(regexp) 1520 elif isinstance(content, np.compat.unicode) and isinstance(regexp, bytes): 1521 regexp = asstr(regexp) 1522 1523 if not hasattr(regexp, 'match'): 1524 regexp = re.compile(regexp) 1525 seq = regexp.findall(content) 1526 if seq and not isinstance(seq[0], tuple): 1527 # Only one group is in the regexp. 1528 # Create the new array as a single data-type and then 1529 # re-interpret as a single-field structured array. 1530 newdtype = np.dtype(dtype[dtype.names[0]]) 1531 output = np.array(seq, dtype=newdtype) 1532 output.dtype = dtype 1533 else: 1534 output = np.array(seq, dtype=dtype) 1535 1536 return output 1537 finally: 1538 if own_fh: 1539 file.close() 1540 1541 1542#####-------------------------------------------------------------------------- 1543#---- --- ASCII functions --- 1544#####-------------------------------------------------------------------------- 1545 1546 1547def _genfromtxt_dispatcher(fname, dtype=None, comments=None, delimiter=None, 1548 skip_header=None, skip_footer=None, converters=None, 1549 missing_values=None, filling_values=None, usecols=None, 1550 names=None, excludelist=None, deletechars=None, 1551 replace_space=None, autostrip=None, case_sensitive=None, 1552 defaultfmt=None, unpack=None, usemask=None, loose=None, 1553 invalid_raise=None, max_rows=None, encoding=None, *, 1554 like=None): 1555 return (like,) 1556 1557 1558@set_array_function_like_doc 1559@set_module('numpy') 1560def genfromtxt(fname, dtype=float, comments='#', delimiter=None, 1561 skip_header=0, skip_footer=0, converters=None, 1562 missing_values=None, filling_values=None, usecols=None, 1563 names=None, excludelist=None, 1564 deletechars=''.join(sorted(NameValidator.defaultdeletechars)), 1565 replace_space='_', autostrip=False, case_sensitive=True, 1566 defaultfmt="f%i", unpack=None, usemask=False, loose=True, 1567 invalid_raise=True, max_rows=None, encoding='bytes', *, 1568 like=None): 1569 """ 1570 Load data from a text file, with missing values handled as specified. 1571 1572 Each line past the first `skip_header` lines is split at the `delimiter` 1573 character, and characters following the `comments` character are discarded. 1574 1575 Parameters 1576 ---------- 1577 fname : file, str, pathlib.Path, list of str, generator 1578 File, filename, list, or generator to read. If the filename 1579 extension is `.gz` or `.bz2`, the file is first decompressed. Note 1580 that generators must return byte strings. The strings 1581 in a list or produced by a generator are treated as lines. 1582 dtype : dtype, optional 1583 Data type of the resulting array. 1584 If None, the dtypes will be determined by the contents of each 1585 column, individually. 1586 comments : str, optional 1587 The character used to indicate the start of a comment. 1588 All the characters occurring on a line after a comment are discarded 1589 delimiter : str, int, or sequence, optional 1590 The string used to separate values. By default, any consecutive 1591 whitespaces act as delimiter. An integer or sequence of integers 1592 can also be provided as width(s) of each field. 1593 skiprows : int, optional 1594 `skiprows` was removed in numpy 1.10. Please use `skip_header` instead. 1595 skip_header : int, optional 1596 The number of lines to skip at the beginning of the file. 1597 skip_footer : int, optional 1598 The number of lines to skip at the end of the file. 1599 converters : variable, optional 1600 The set of functions that convert the data of a column to a value. 1601 The converters can also be used to provide a default value 1602 for missing data: ``converters = {3: lambda s: float(s or 0)}``. 1603 missing : variable, optional 1604 `missing` was removed in numpy 1.10. Please use `missing_values` 1605 instead. 1606 missing_values : variable, optional 1607 The set of strings corresponding to missing data. 1608 filling_values : variable, optional 1609 The set of values to be used as default when the data are missing. 1610 usecols : sequence, optional 1611 Which columns to read, with 0 being the first. For example, 1612 ``usecols = (1, 4, 5)`` will extract the 2nd, 5th and 6th columns. 1613 names : {None, True, str, sequence}, optional 1614 If `names` is True, the field names are read from the first line after 1615 the first `skip_header` lines. This line can optionally be proceeded 1616 by a comment delimiter. If `names` is a sequence or a single-string of 1617 comma-separated names, the names will be used to define the field names 1618 in a structured dtype. If `names` is None, the names of the dtype 1619 fields will be used, if any. 1620 excludelist : sequence, optional 1621 A list of names to exclude. This list is appended to the default list 1622 ['return','file','print']. Excluded names are appended an underscore: 1623 for example, `file` would become `file_`. 1624 deletechars : str, optional 1625 A string combining invalid characters that must be deleted from the 1626 names. 1627 defaultfmt : str, optional 1628 A format used to define default field names, such as "f%i" or "f_%02i". 1629 autostrip : bool, optional 1630 Whether to automatically strip white spaces from the variables. 1631 replace_space : char, optional 1632 Character(s) used in replacement of white spaces in the variables 1633 names. By default, use a '_'. 1634 case_sensitive : {True, False, 'upper', 'lower'}, optional 1635 If True, field names are case sensitive. 1636 If False or 'upper', field names are converted to upper case. 1637 If 'lower', field names are converted to lower case. 1638 unpack : bool, optional 1639 If True, the returned array is transposed, so that arguments may be 1640 unpacked using ``x, y, z = genfromtxt(...)``. When used with a 1641 structured data-type, arrays are returned for each field. 1642 Default is False. 1643 usemask : bool, optional 1644 If True, return a masked array. 1645 If False, return a regular array. 1646 loose : bool, optional 1647 If True, do not raise errors for invalid values. 1648 invalid_raise : bool, optional 1649 If True, an exception is raised if an inconsistency is detected in the 1650 number of columns. 1651 If False, a warning is emitted and the offending lines are skipped. 1652 max_rows : int, optional 1653 The maximum number of rows to read. Must not be used with skip_footer 1654 at the same time. If given, the value must be at least 1. Default is 1655 to read the entire file. 1656 1657 .. versionadded:: 1.10.0 1658 encoding : str, optional 1659 Encoding used to decode the inputfile. Does not apply when `fname` is 1660 a file object. The special value 'bytes' enables backward compatibility 1661 workarounds that ensure that you receive byte arrays when possible 1662 and passes latin1 encoded strings to converters. Override this value to 1663 receive unicode arrays and pass strings as input to converters. If set 1664 to None the system default is used. The default value is 'bytes'. 1665 1666 .. versionadded:: 1.14.0 1667 ${ARRAY_FUNCTION_LIKE} 1668 1669 .. versionadded:: 1.20.0 1670 1671 Returns 1672 ------- 1673 out : ndarray 1674 Data read from the text file. If `usemask` is True, this is a 1675 masked array. 1676 1677 See Also 1678 -------- 1679 numpy.loadtxt : equivalent function when no data is missing. 1680 1681 Notes 1682 ----- 1683 * When spaces are used as delimiters, or when no delimiter has been given 1684 as input, there should not be any missing data between two fields. 1685 * When the variables are named (either by a flexible dtype or with `names`), 1686 there must not be any header in the file (else a ValueError 1687 exception is raised). 1688 * Individual values are not stripped of spaces by default. 1689 When using a custom converter, make sure the function does remove spaces. 1690 1691 References 1692 ---------- 1693 .. [1] NumPy User Guide, section `I/O with NumPy 1694 <https://docs.scipy.org/doc/numpy/user/basics.io.genfromtxt.html>`_. 1695 1696 Examples 1697 -------- 1698 >>> from io import StringIO 1699 >>> import numpy as np 1700 1701 Comma delimited file with mixed dtype 1702 1703 >>> s = StringIO(u"1,1.3,abcde") 1704 >>> data = np.genfromtxt(s, dtype=[('myint','i8'),('myfloat','f8'), 1705 ... ('mystring','S5')], delimiter=",") 1706 >>> data 1707 array((1, 1.3, b'abcde'), 1708 dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', 'S5')]) 1709 1710 Using dtype = None 1711 1712 >>> _ = s.seek(0) # needed for StringIO example only 1713 >>> data = np.genfromtxt(s, dtype=None, 1714 ... names = ['myint','myfloat','mystring'], delimiter=",") 1715 >>> data 1716 array((1, 1.3, b'abcde'), 1717 dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', 'S5')]) 1718 1719 Specifying dtype and names 1720 1721 >>> _ = s.seek(0) 1722 >>> data = np.genfromtxt(s, dtype="i8,f8,S5", 1723 ... names=['myint','myfloat','mystring'], delimiter=",") 1724 >>> data 1725 array((1, 1.3, b'abcde'), 1726 dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', 'S5')]) 1727 1728 An example with fixed-width columns 1729 1730 >>> s = StringIO(u"11.3abcde") 1731 >>> data = np.genfromtxt(s, dtype=None, names=['intvar','fltvar','strvar'], 1732 ... delimiter=[1,3,5]) 1733 >>> data 1734 array((1, 1.3, b'abcde'), 1735 dtype=[('intvar', '<i8'), ('fltvar', '<f8'), ('strvar', 'S5')]) 1736 1737 An example to show comments 1738 1739 >>> f = StringIO(''' 1740 ... text,# of chars 1741 ... hello world,11 1742 ... numpy,5''') 1743 >>> np.genfromtxt(f, dtype='S12,S12', delimiter=',') 1744 array([(b'text', b''), (b'hello world', b'11'), (b'numpy', b'5')], 1745 dtype=[('f0', 'S12'), ('f1', 'S12')]) 1746 1747 """ 1748 1749 if like is not None: 1750 return _genfromtxt_with_like( 1751 fname, dtype=dtype, comments=comments, delimiter=delimiter, 1752 skip_header=skip_header, skip_footer=skip_footer, 1753 converters=converters, missing_values=missing_values, 1754 filling_values=filling_values, usecols=usecols, names=names, 1755 excludelist=excludelist, deletechars=deletechars, 1756 replace_space=replace_space, autostrip=autostrip, 1757 case_sensitive=case_sensitive, defaultfmt=defaultfmt, 1758 unpack=unpack, usemask=usemask, loose=loose, 1759 invalid_raise=invalid_raise, max_rows=max_rows, encoding=encoding, 1760 like=like 1761 ) 1762 1763 if max_rows is not None: 1764 if skip_footer: 1765 raise ValueError( 1766 "The keywords 'skip_footer' and 'max_rows' can not be " 1767 "specified at the same time.") 1768 if max_rows < 1: 1769 raise ValueError("'max_rows' must be at least 1.") 1770 1771 if usemask: 1772 from numpy.ma import MaskedArray, make_mask_descr 1773 # Check the input dictionary of converters 1774 user_converters = converters or {} 1775 if not isinstance(user_converters, dict): 1776 raise TypeError( 1777 "The input argument 'converter' should be a valid dictionary " 1778 "(got '%s' instead)" % type(user_converters)) 1779 1780 if encoding == 'bytes': 1781 encoding = None 1782 byte_converters = True 1783 else: 1784 byte_converters = False 1785 1786 # Initialize the filehandle, the LineSplitter and the NameValidator 1787 try: 1788 if isinstance(fname, os_PathLike): 1789 fname = os_fspath(fname) 1790 if isinstance(fname, str): 1791 fid = np.lib._datasource.open(fname, 'rt', encoding=encoding) 1792 fid_ctx = contextlib.closing(fid) 1793 else: 1794 fid = fname 1795 fid_ctx = contextlib_nullcontext(fid) 1796 fhd = iter(fid) 1797 except TypeError as e: 1798 raise TypeError( 1799 "fname must be a string, filehandle, list of strings, " 1800 "or generator. Got %s instead." % type(fname)) from e 1801 1802 with fid_ctx: 1803 split_line = LineSplitter(delimiter=delimiter, comments=comments, 1804 autostrip=autostrip, encoding=encoding) 1805 validate_names = NameValidator(excludelist=excludelist, 1806 deletechars=deletechars, 1807 case_sensitive=case_sensitive, 1808 replace_space=replace_space) 1809 1810 # Skip the first `skip_header` rows 1811 try: 1812 for i in range(skip_header): 1813 next(fhd) 1814 1815 # Keep on until we find the first valid values 1816 first_values = None 1817 1818 while not first_values: 1819 first_line = _decode_line(next(fhd), encoding) 1820 if (names is True) and (comments is not None): 1821 if comments in first_line: 1822 first_line = ( 1823 ''.join(first_line.split(comments)[1:])) 1824 first_values = split_line(first_line) 1825 except StopIteration: 1826 # return an empty array if the datafile is empty 1827 first_line = '' 1828 first_values = [] 1829 warnings.warn('genfromtxt: Empty input file: "%s"' % fname, stacklevel=2) 1830 1831 # Should we take the first values as names ? 1832 if names is True: 1833 fval = first_values[0].strip() 1834 if comments is not None: 1835 if fval in comments: 1836 del first_values[0] 1837 1838 # Check the columns to use: make sure `usecols` is a list 1839 if usecols is not None: 1840 try: 1841 usecols = [_.strip() for _ in usecols.split(",")] 1842 except AttributeError: 1843 try: 1844 usecols = list(usecols) 1845 except TypeError: 1846 usecols = [usecols, ] 1847 nbcols = len(usecols or first_values) 1848 1849 # Check the names and overwrite the dtype.names if needed 1850 if names is True: 1851 names = validate_names([str(_.strip()) for _ in first_values]) 1852 first_line = '' 1853 elif _is_string_like(names): 1854 names = validate_names([_.strip() for _ in names.split(',')]) 1855 elif names: 1856 names = validate_names(names) 1857 # Get the dtype 1858 if dtype is not None: 1859 dtype = easy_dtype(dtype, defaultfmt=defaultfmt, names=names, 1860 excludelist=excludelist, 1861 deletechars=deletechars, 1862 case_sensitive=case_sensitive, 1863 replace_space=replace_space) 1864 # Make sure the names is a list (for 2.5) 1865 if names is not None: 1866 names = list(names) 1867 1868 if usecols: 1869 for (i, current) in enumerate(usecols): 1870 # if usecols is a list of names, convert to a list of indices 1871 if _is_string_like(current): 1872 usecols[i] = names.index(current) 1873 elif current < 0: 1874 usecols[i] = current + len(first_values) 1875 # If the dtype is not None, make sure we update it 1876 if (dtype is not None) and (len(dtype) > nbcols): 1877 descr = dtype.descr 1878 dtype = np.dtype([descr[_] for _ in usecols]) 1879 names = list(dtype.names) 1880 # If `names` is not None, update the names 1881 elif (names is not None) and (len(names) > nbcols): 1882 names = [names[_] for _ in usecols] 1883 elif (names is not None) and (dtype is not None): 1884 names = list(dtype.names) 1885 1886 # Process the missing values ............................... 1887 # Rename missing_values for convenience 1888 user_missing_values = missing_values or () 1889 if isinstance(user_missing_values, bytes): 1890 user_missing_values = user_missing_values.decode('latin1') 1891 1892 # Define the list of missing_values (one column: one list) 1893 missing_values = [list(['']) for _ in range(nbcols)] 1894 1895 # We have a dictionary: process it field by field 1896 if isinstance(user_missing_values, dict): 1897 # Loop on the items 1898 for (key, val) in user_missing_values.items(): 1899 # Is the key a string ? 1900 if _is_string_like(key): 1901 try: 1902 # Transform it into an integer 1903 key = names.index(key) 1904 except ValueError: 1905 # We couldn't find it: the name must have been dropped 1906 continue 1907 # Redefine the key as needed if it's a column number 1908 if usecols: 1909 try: 1910 key = usecols.index(key) 1911 except ValueError: 1912 pass 1913 # Transform the value as a list of string 1914 if isinstance(val, (list, tuple)): 1915 val = [str(_) for _ in val] 1916 else: 1917 val = [str(val), ] 1918 # Add the value(s) to the current list of missing 1919 if key is None: 1920 # None acts as default 1921 for miss in missing_values: 1922 miss.extend(val) 1923 else: 1924 missing_values[key].extend(val) 1925 # We have a sequence : each item matches a column 1926 elif isinstance(user_missing_values, (list, tuple)): 1927 for (value, entry) in zip(user_missing_values, missing_values): 1928 value = str(value) 1929 if value not in entry: 1930 entry.append(value) 1931 # We have a string : apply it to all entries 1932 elif isinstance(user_missing_values, str): 1933 user_value = user_missing_values.split(",") 1934 for entry in missing_values: 1935 entry.extend(user_value) 1936 # We have something else: apply it to all entries 1937 else: 1938 for entry in missing_values: 1939 entry.extend([str(user_missing_values)]) 1940 1941 # Process the filling_values ............................... 1942 # Rename the input for convenience 1943 user_filling_values = filling_values 1944 if user_filling_values is None: 1945 user_filling_values = [] 1946 # Define the default 1947 filling_values = [None] * nbcols 1948 # We have a dictionary : update each entry individually 1949 if isinstance(user_filling_values, dict): 1950 for (key, val) in user_filling_values.items(): 1951 if _is_string_like(key): 1952 try: 1953 # Transform it into an integer 1954 key = names.index(key) 1955 except ValueError: 1956 # We couldn't find it: the name must have been dropped, 1957 continue 1958 # Redefine the key if it's a column number and usecols is defined 1959 if usecols: 1960 try: 1961 key = usecols.index(key) 1962 except ValueError: 1963 pass 1964 # Add the value to the list 1965 filling_values[key] = val 1966 # We have a sequence : update on a one-to-one basis 1967 elif isinstance(user_filling_values, (list, tuple)): 1968 n = len(user_filling_values) 1969 if (n <= nbcols): 1970 filling_values[:n] = user_filling_values 1971 else: 1972 filling_values = user_filling_values[:nbcols] 1973 # We have something else : use it for all entries 1974 else: 1975 filling_values = [user_filling_values] * nbcols 1976 1977 # Initialize the converters ................................ 1978 if dtype is None: 1979 # Note: we can't use a [...]*nbcols, as we would have 3 times the same 1980 # ... converter, instead of 3 different converters. 1981 converters = [StringConverter(None, missing_values=miss, default=fill) 1982 for (miss, fill) in zip(missing_values, filling_values)] 1983 else: 1984 dtype_flat = flatten_dtype(dtype, flatten_base=True) 1985 # Initialize the converters 1986 if len(dtype_flat) > 1: 1987 # Flexible type : get a converter from each dtype 1988 zipit = zip(dtype_flat, missing_values, filling_values) 1989 converters = [StringConverter(dt, locked=True, 1990 missing_values=miss, default=fill) 1991 for (dt, miss, fill) in zipit] 1992 else: 1993 # Set to a default converter (but w/ different missing values) 1994 zipit = zip(missing_values, filling_values) 1995 converters = [StringConverter(dtype, locked=True, 1996 missing_values=miss, default=fill) 1997 for (miss, fill) in zipit] 1998 # Update the converters to use the user-defined ones 1999 uc_update = [] 2000 for (j, conv) in user_converters.items(): 2001 # If the converter is specified by column names, use the index instead 2002 if _is_string_like(j): 2003 try: 2004 j = names.index(j) 2005 i = j 2006 except ValueError: 2007 continue 2008 elif usecols: 2009 try: 2010 i = usecols.index(j) 2011 except ValueError: 2012 # Unused converter specified 2013 continue 2014 else: 2015 i = j 2016 # Find the value to test - first_line is not filtered by usecols: 2017 if len(first_line): 2018 testing_value = first_values[j] 2019 else: 2020 testing_value = None 2021 if conv is bytes: 2022 user_conv = asbytes 2023 elif byte_converters: 2024 # converters may use decode to workaround numpy's old behaviour, 2025 # so encode the string again before passing to the user converter 2026 def tobytes_first(x, conv): 2027 if type(x) is bytes: 2028 return conv(x) 2029 return conv(x.encode("latin1")) 2030 user_conv = functools.partial(tobytes_first, conv=conv) 2031 else: 2032 user_conv = conv 2033 converters[i].update(user_conv, locked=True, 2034 testing_value=testing_value, 2035 default=filling_values[i], 2036 missing_values=missing_values[i],) 2037 uc_update.append((i, user_conv)) 2038 # Make sure we have the corrected keys in user_converters... 2039 user_converters.update(uc_update) 2040 2041 # Fixme: possible error as following variable never used. 2042 # miss_chars = [_.missing_values for _ in converters] 2043 2044 # Initialize the output lists ... 2045 # ... rows 2046 rows = [] 2047 append_to_rows = rows.append 2048 # ... masks 2049 if usemask: 2050 masks = [] 2051 append_to_masks = masks.append 2052 # ... invalid 2053 invalid = [] 2054 append_to_invalid = invalid.append 2055 2056 # Parse each line 2057 for (i, line) in enumerate(itertools.chain([first_line, ], fhd)): 2058 values = split_line(line) 2059 nbvalues = len(values) 2060 # Skip an empty line 2061 if nbvalues == 0: 2062 continue 2063 if usecols: 2064 # Select only the columns we need 2065 try: 2066 values = [values[_] for _ in usecols] 2067 except IndexError: 2068 append_to_invalid((i + skip_header + 1, nbvalues)) 2069 continue 2070 elif nbvalues != nbcols: 2071 append_to_invalid((i + skip_header + 1, nbvalues)) 2072 continue 2073 # Store the values 2074 append_to_rows(tuple(values)) 2075 if usemask: 2076 append_to_masks(tuple([v.strip() in m 2077 for (v, m) in zip(values, 2078 missing_values)])) 2079 if len(rows) == max_rows: 2080 break 2081 2082 # Upgrade the converters (if needed) 2083 if dtype is None: 2084 for (i, converter) in enumerate(converters): 2085 current_column = [itemgetter(i)(_m) for _m in rows] 2086 try: 2087 converter.iterupgrade(current_column) 2088 except ConverterLockError: 2089 errmsg = "Converter #%i is locked and cannot be upgraded: " % i 2090 current_column = map(itemgetter(i), rows) 2091 for (j, value) in enumerate(current_column): 2092 try: 2093 converter.upgrade(value) 2094 except (ConverterError, ValueError): 2095 errmsg += "(occurred line #%i for value '%s')" 2096 errmsg %= (j + 1 + skip_header, value) 2097 raise ConverterError(errmsg) 2098 2099 # Check that we don't have invalid values 2100 nbinvalid = len(invalid) 2101 if nbinvalid > 0: 2102 nbrows = len(rows) + nbinvalid - skip_footer 2103 # Construct the error message 2104 template = " Line #%%i (got %%i columns instead of %i)" % nbcols 2105 if skip_footer > 0: 2106 nbinvalid_skipped = len([_ for _ in invalid 2107 if _[0] > nbrows + skip_header]) 2108 invalid = invalid[:nbinvalid - nbinvalid_skipped] 2109 skip_footer -= nbinvalid_skipped 2110# 2111# nbrows -= skip_footer 2112# errmsg = [template % (i, nb) 2113# for (i, nb) in invalid if i < nbrows] 2114# else: 2115 errmsg = [template % (i, nb) 2116 for (i, nb) in invalid] 2117 if len(errmsg): 2118 errmsg.insert(0, "Some errors were detected !") 2119 errmsg = "\n".join(errmsg) 2120 # Raise an exception ? 2121 if invalid_raise: 2122 raise ValueError(errmsg) 2123 # Issue a warning ? 2124 else: 2125 warnings.warn(errmsg, ConversionWarning, stacklevel=2) 2126 2127 # Strip the last skip_footer data 2128 if skip_footer > 0: 2129 rows = rows[:-skip_footer] 2130 if usemask: 2131 masks = masks[:-skip_footer] 2132 2133 # Convert each value according to the converter: 2134 # We want to modify the list in place to avoid creating a new one... 2135 if loose: 2136 rows = list( 2137 zip(*[[conv._loose_call(_r) for _r in map(itemgetter(i), rows)] 2138 for (i, conv) in enumerate(converters)])) 2139 else: 2140 rows = list( 2141 zip(*[[conv._strict_call(_r) for _r in map(itemgetter(i), rows)] 2142 for (i, conv) in enumerate(converters)])) 2143 2144 # Reset the dtype 2145 data = rows 2146 if dtype is None: 2147 # Get the dtypes from the types of the converters 2148 column_types = [conv.type for conv in converters] 2149 # Find the columns with strings... 2150 strcolidx = [i for (i, v) in enumerate(column_types) 2151 if v == np.unicode_] 2152 2153 if byte_converters and strcolidx: 2154 # convert strings back to bytes for backward compatibility 2155 warnings.warn( 2156 "Reading unicode strings without specifying the encoding " 2157 "argument is deprecated. Set the encoding, use None for the " 2158 "system default.", 2159 np.VisibleDeprecationWarning, stacklevel=2) 2160 def encode_unicode_cols(row_tup): 2161 row = list(row_tup) 2162 for i in strcolidx: 2163 row[i] = row[i].encode('latin1') 2164 return tuple(row) 2165 2166 try: 2167 data = [encode_unicode_cols(r) for r in data] 2168 except UnicodeEncodeError: 2169 pass 2170 else: 2171 for i in strcolidx: 2172 column_types[i] = np.bytes_ 2173 2174 # Update string types to be the right length 2175 sized_column_types = column_types[:] 2176 for i, col_type in enumerate(column_types): 2177 if np.issubdtype(col_type, np.character): 2178 n_chars = max(len(row[i]) for row in data) 2179 sized_column_types[i] = (col_type, n_chars) 2180 2181 if names is None: 2182 # If the dtype is uniform (before sizing strings) 2183 base = { 2184 c_type 2185 for c, c_type in zip(converters, column_types) 2186 if c._checked} 2187 if len(base) == 1: 2188 uniform_type, = base 2189 (ddtype, mdtype) = (uniform_type, bool) 2190 else: 2191 ddtype = [(defaultfmt % i, dt) 2192 for (i, dt) in enumerate(sized_column_types)] 2193 if usemask: 2194 mdtype = [(defaultfmt % i, bool) 2195 for (i, dt) in enumerate(sized_column_types)] 2196 else: 2197 ddtype = list(zip(names, sized_column_types)) 2198 mdtype = list(zip(names, [bool] * len(sized_column_types))) 2199 output = np.array(data, dtype=ddtype) 2200 if usemask: 2201 outputmask = np.array(masks, dtype=mdtype) 2202 else: 2203 # Overwrite the initial dtype names if needed 2204 if names and dtype.names is not None: 2205 dtype.names = names 2206 # Case 1. We have a structured type 2207 if len(dtype_flat) > 1: 2208 # Nested dtype, eg [('a', int), ('b', [('b0', int), ('b1', 'f4')])] 2209 # First, create the array using a flattened dtype: 2210 # [('a', int), ('b1', int), ('b2', float)] 2211 # Then, view the array using the specified dtype. 2212 if 'O' in (_.char for _ in dtype_flat): 2213 if has_nested_fields(dtype): 2214 raise NotImplementedError( 2215 "Nested fields involving objects are not supported...") 2216 else: 2217 output = np.array(data, dtype=dtype) 2218 else: 2219 rows = np.array(data, dtype=[('', _) for _ in dtype_flat]) 2220 output = rows.view(dtype) 2221 # Now, process the rowmasks the same way 2222 if usemask: 2223 rowmasks = np.array( 2224 masks, dtype=np.dtype([('', bool) for t in dtype_flat])) 2225 # Construct the new dtype 2226 mdtype = make_mask_descr(dtype) 2227 outputmask = rowmasks.view(mdtype) 2228 # Case #2. We have a basic dtype 2229 else: 2230 # We used some user-defined converters 2231 if user_converters: 2232 ishomogeneous = True 2233 descr = [] 2234 for i, ttype in enumerate([conv.type for conv in converters]): 2235 # Keep the dtype of the current converter 2236 if i in user_converters: 2237 ishomogeneous &= (ttype == dtype.type) 2238 if np.issubdtype(ttype, np.character): 2239 ttype = (ttype, max(len(row[i]) for row in data)) 2240 descr.append(('', ttype)) 2241 else: 2242 descr.append(('', dtype)) 2243 # So we changed the dtype ? 2244 if not ishomogeneous: 2245 # We have more than one field 2246 if len(descr) > 1: 2247 dtype = np.dtype(descr) 2248 # We have only one field: drop the name if not needed. 2249 else: 2250 dtype = np.dtype(ttype) 2251 # 2252 output = np.array(data, dtype) 2253 if usemask: 2254 if dtype.names is not None: 2255 mdtype = [(_, bool) for _ in dtype.names] 2256 else: 2257 mdtype = bool 2258 outputmask = np.array(masks, dtype=mdtype) 2259 # Try to take care of the missing data we missed 2260 names = output.dtype.names 2261 if usemask and names: 2262 for (name, conv) in zip(names, converters): 2263 missing_values = [conv(_) for _ in conv.missing_values 2264 if _ != ''] 2265 for mval in missing_values: 2266 outputmask[name] |= (output[name] == mval) 2267 # Construct the final array 2268 if usemask: 2269 output = output.view(MaskedArray) 2270 output._mask = outputmask 2271 output = np.squeeze(output) 2272 if unpack: 2273 if names is None: 2274 return output.T 2275 elif len(names) == 1: 2276 # squeeze single-name dtypes too 2277 return output[names[0]] 2278 else: 2279 # For structured arrays with multiple fields, 2280 # return an array for each field. 2281 return [output[field] for field in names] 2282 return output 2283 2284 2285_genfromtxt_with_like = array_function_dispatch( 2286 _genfromtxt_dispatcher 2287)(genfromtxt) 2288 2289 2290def ndfromtxt(fname, **kwargs): 2291 """ 2292 Load ASCII data stored in a file and return it as a single array. 2293 2294 .. deprecated:: 1.17 2295 ndfromtxt` is a deprecated alias of `genfromtxt` which 2296 overwrites the ``usemask`` argument with `False` even when 2297 explicitly called as ``ndfromtxt(..., usemask=True)``. 2298 Use `genfromtxt` instead. 2299 2300 Parameters 2301 ---------- 2302 fname, kwargs : For a description of input parameters, see `genfromtxt`. 2303 2304 See Also 2305 -------- 2306 numpy.genfromtxt : generic function. 2307 2308 """ 2309 kwargs['usemask'] = False 2310 # Numpy 1.17 2311 warnings.warn( 2312 "np.ndfromtxt is a deprecated alias of np.genfromtxt, " 2313 "prefer the latter.", 2314 DeprecationWarning, stacklevel=2) 2315 return genfromtxt(fname, **kwargs) 2316 2317 2318def mafromtxt(fname, **kwargs): 2319 """ 2320 Load ASCII data stored in a text file and return a masked array. 2321 2322 .. deprecated:: 1.17 2323 np.mafromtxt is a deprecated alias of `genfromtxt` which 2324 overwrites the ``usemask`` argument with `True` even when 2325 explicitly called as ``mafromtxt(..., usemask=False)``. 2326 Use `genfromtxt` instead. 2327 2328 Parameters 2329 ---------- 2330 fname, kwargs : For a description of input parameters, see `genfromtxt`. 2331 2332 See Also 2333 -------- 2334 numpy.genfromtxt : generic function to load ASCII data. 2335 2336 """ 2337 kwargs['usemask'] = True 2338 # Numpy 1.17 2339 warnings.warn( 2340 "np.mafromtxt is a deprecated alias of np.genfromtxt, " 2341 "prefer the latter.", 2342 DeprecationWarning, stacklevel=2) 2343 return genfromtxt(fname, **kwargs) 2344 2345 2346def recfromtxt(fname, **kwargs): 2347 """ 2348 Load ASCII data from a file and return it in a record array. 2349 2350 If ``usemask=False`` a standard `recarray` is returned, 2351 if ``usemask=True`` a MaskedRecords array is returned. 2352 2353 Parameters 2354 ---------- 2355 fname, kwargs : For a description of input parameters, see `genfromtxt`. 2356 2357 See Also 2358 -------- 2359 numpy.genfromtxt : generic function 2360 2361 Notes 2362 ----- 2363 By default, `dtype` is None, which means that the data-type of the output 2364 array will be determined from the data. 2365 2366 """ 2367 kwargs.setdefault("dtype", None) 2368 usemask = kwargs.get('usemask', False) 2369 output = genfromtxt(fname, **kwargs) 2370 if usemask: 2371 from numpy.ma.mrecords import MaskedRecords 2372 output = output.view(MaskedRecords) 2373 else: 2374 output = output.view(np.recarray) 2375 return output 2376 2377 2378def recfromcsv(fname, **kwargs): 2379 """ 2380 Load ASCII data stored in a comma-separated file. 2381 2382 The returned array is a record array (if ``usemask=False``, see 2383 `recarray`) or a masked record array (if ``usemask=True``, 2384 see `ma.mrecords.MaskedRecords`). 2385 2386 Parameters 2387 ---------- 2388 fname, kwargs : For a description of input parameters, see `genfromtxt`. 2389 2390 See Also 2391 -------- 2392 numpy.genfromtxt : generic function to load ASCII data. 2393 2394 Notes 2395 ----- 2396 By default, `dtype` is None, which means that the data-type of the output 2397 array will be determined from the data. 2398 2399 """ 2400 # Set default kwargs for genfromtxt as relevant to csv import. 2401 kwargs.setdefault("case_sensitive", "lower") 2402 kwargs.setdefault("names", True) 2403 kwargs.setdefault("delimiter", ",") 2404 kwargs.setdefault("dtype", None) 2405 output = genfromtxt(fname, **kwargs) 2406 2407 usemask = kwargs.get("usemask", False) 2408 if usemask: 2409 from numpy.ma.mrecords import MaskedRecords 2410 output = output.view(MaskedRecords) 2411 else: 2412 output = output.view(np.recarray) 2413 return output 2414