1import sys
2import os
3import re
4import functools
5import itertools
6import warnings
7import weakref
8import contextlib
9from operator import itemgetter, index as opindex
10from collections.abc import Mapping
11
12import numpy as np
13from . import format
14from ._datasource import DataSource
15from numpy.core import overrides
16from numpy.core.multiarray import packbits, unpackbits
17from numpy.core.overrides import set_array_function_like_doc, set_module
18from numpy.core._internal import recursive
19from ._iotools import (
20    LineSplitter, NameValidator, StringConverter, ConverterError,
21    ConverterLockError, ConversionWarning, _is_string_like,
22    has_nested_fields, flatten_dtype, easy_dtype, _decode_line
23    )
24
25from numpy.compat import (
26    asbytes, asstr, asunicode, os_fspath, os_PathLike,
27    pickle, contextlib_nullcontext
28    )
29
30
31@set_module('numpy')
32def loads(*args, **kwargs):
33    # NumPy 1.15.0, 2017-12-10
34    warnings.warn(
35        "np.loads is deprecated, use pickle.loads instead",
36        DeprecationWarning, stacklevel=2)
37    return pickle.loads(*args, **kwargs)
38
39
40__all__ = [
41    'savetxt', 'loadtxt', 'genfromtxt', 'ndfromtxt', 'mafromtxt',
42    'recfromtxt', 'recfromcsv', 'load', 'loads', 'save', 'savez',
43    'savez_compressed', 'packbits', 'unpackbits', 'fromregex', 'DataSource'
44    ]
45
46
47array_function_dispatch = functools.partial(
48    overrides.array_function_dispatch, module='numpy')
49
50
51class BagObj:
52    """
53    BagObj(obj)
54
55    Convert attribute look-ups to getitems on the object passed in.
56
57    Parameters
58    ----------
59    obj : class instance
60        Object on which attribute look-up is performed.
61
62    Examples
63    --------
64    >>> from numpy.lib.npyio import BagObj as BO
65    >>> class BagDemo:
66    ...     def __getitem__(self, key): # An instance of BagObj(BagDemo)
67    ...                                 # will call this method when any
68    ...                                 # attribute look-up is required
69    ...         result = "Doesn't matter what you want, "
70    ...         return result + "you're gonna get this"
71    ...
72    >>> demo_obj = BagDemo()
73    >>> bagobj = BO(demo_obj)
74    >>> bagobj.hello_there
75    "Doesn't matter what you want, you're gonna get this"
76    >>> bagobj.I_can_be_anything
77    "Doesn't matter what you want, you're gonna get this"
78
79    """
80
81    def __init__(self, obj):
82        # Use weakref to make NpzFile objects collectable by refcount
83        self._obj = weakref.proxy(obj)
84
85    def __getattribute__(self, key):
86        try:
87            return object.__getattribute__(self, '_obj')[key]
88        except KeyError:
89            raise AttributeError(key) from None
90
91    def __dir__(self):
92        """
93        Enables dir(bagobj) to list the files in an NpzFile.
94
95        This also enables tab-completion in an interpreter or IPython.
96        """
97        return list(object.__getattribute__(self, '_obj').keys())
98
99
100def zipfile_factory(file, *args, **kwargs):
101    """
102    Create a ZipFile.
103
104    Allows for Zip64, and the `file` argument can accept file, str, or
105    pathlib.Path objects. `args` and `kwargs` are passed to the zipfile.ZipFile
106    constructor.
107    """
108    if not hasattr(file, 'read'):
109        file = os_fspath(file)
110    import zipfile
111    kwargs['allowZip64'] = True
112    return zipfile.ZipFile(file, *args, **kwargs)
113
114
115class NpzFile(Mapping):
116    """
117    NpzFile(fid)
118
119    A dictionary-like object with lazy-loading of files in the zipped
120    archive provided on construction.
121
122    `NpzFile` is used to load files in the NumPy ``.npz`` data archive
123    format. It assumes that files in the archive have a ``.npy`` extension,
124    other files are ignored.
125
126    The arrays and file strings are lazily loaded on either
127    getitem access using ``obj['key']`` or attribute lookup using
128    ``obj.f.key``. A list of all files (without ``.npy`` extensions) can
129    be obtained with ``obj.files`` and the ZipFile object itself using
130    ``obj.zip``.
131
132    Attributes
133    ----------
134    files : list of str
135        List of all files in the archive with a ``.npy`` extension.
136    zip : ZipFile instance
137        The ZipFile object initialized with the zipped archive.
138    f : BagObj instance
139        An object on which attribute can be performed as an alternative
140        to getitem access on the `NpzFile` instance itself.
141    allow_pickle : bool, optional
142        Allow loading pickled data. Default: False
143
144        .. versionchanged:: 1.16.3
145            Made default False in response to CVE-2019-6446.
146
147    pickle_kwargs : dict, optional
148        Additional keyword arguments to pass on to pickle.load.
149        These are only useful when loading object arrays saved on
150        Python 2 when using Python 3.
151
152    Parameters
153    ----------
154    fid : file or str
155        The zipped archive to open. This is either a file-like object
156        or a string containing the path to the archive.
157    own_fid : bool, optional
158        Whether NpzFile should close the file handle.
159        Requires that `fid` is a file-like object.
160
161    Examples
162    --------
163    >>> from tempfile import TemporaryFile
164    >>> outfile = TemporaryFile()
165    >>> x = np.arange(10)
166    >>> y = np.sin(x)
167    >>> np.savez(outfile, x=x, y=y)
168    >>> _ = outfile.seek(0)
169
170    >>> npz = np.load(outfile)
171    >>> isinstance(npz, np.lib.io.NpzFile)
172    True
173    >>> sorted(npz.files)
174    ['x', 'y']
175    >>> npz['x']  # getitem access
176    array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
177    >>> npz.f.x  # attribute lookup
178    array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
179
180    """
181    # Make __exit__ safe if zipfile_factory raises an exception
182    zip = None
183    fid = None
184
185    def __init__(self, fid, own_fid=False, allow_pickle=False,
186                 pickle_kwargs=None):
187        # Import is postponed to here since zipfile depends on gzip, an
188        # optional component of the so-called standard library.
189        _zip = zipfile_factory(fid)
190        self._files = _zip.namelist()
191        self.files = []
192        self.allow_pickle = allow_pickle
193        self.pickle_kwargs = pickle_kwargs
194        for x in self._files:
195            if x.endswith('.npy'):
196                self.files.append(x[:-4])
197            else:
198                self.files.append(x)
199        self.zip = _zip
200        self.f = BagObj(self)
201        if own_fid:
202            self.fid = fid
203
204    def __enter__(self):
205        return self
206
207    def __exit__(self, exc_type, exc_value, traceback):
208        self.close()
209
210    def close(self):
211        """
212        Close the file.
213
214        """
215        if self.zip is not None:
216            self.zip.close()
217            self.zip = None
218        if self.fid is not None:
219            self.fid.close()
220            self.fid = None
221        self.f = None  # break reference cycle
222
223    def __del__(self):
224        self.close()
225
226    # Implement the Mapping ABC
227    def __iter__(self):
228        return iter(self.files)
229
230    def __len__(self):
231        return len(self.files)
232
233    def __getitem__(self, key):
234        # FIXME: This seems like it will copy strings around
235        #   more than is strictly necessary.  The zipfile
236        #   will read the string and then
237        #   the format.read_array will copy the string
238        #   to another place in memory.
239        #   It would be better if the zipfile could read
240        #   (or at least uncompress) the data
241        #   directly into the array memory.
242        member = False
243        if key in self._files:
244            member = True
245        elif key in self.files:
246            member = True
247            key += '.npy'
248        if member:
249            bytes = self.zip.open(key)
250            magic = bytes.read(len(format.MAGIC_PREFIX))
251            bytes.close()
252            if magic == format.MAGIC_PREFIX:
253                bytes = self.zip.open(key)
254                return format.read_array(bytes,
255                                         allow_pickle=self.allow_pickle,
256                                         pickle_kwargs=self.pickle_kwargs)
257            else:
258                return self.zip.read(key)
259        else:
260            raise KeyError("%s is not a file in the archive" % key)
261
262
263    # deprecate the python 2 dict apis that we supported by accident in
264    # python 3. We forgot to implement itervalues() at all in earlier
265    # versions of numpy, so no need to deprecated it here.
266
267    def iteritems(self):
268        # Numpy 1.15, 2018-02-20
269        warnings.warn(
270            "NpzFile.iteritems is deprecated in python 3, to match the "
271            "removal of dict.itertems. Use .items() instead.",
272            DeprecationWarning, stacklevel=2)
273        return self.items()
274
275    def iterkeys(self):
276        # Numpy 1.15, 2018-02-20
277        warnings.warn(
278            "NpzFile.iterkeys is deprecated in python 3, to match the "
279            "removal of dict.iterkeys. Use .keys() instead.",
280            DeprecationWarning, stacklevel=2)
281        return self.keys()
282
283
284@set_module('numpy')
285def load(file, mmap_mode=None, allow_pickle=False, fix_imports=True,
286         encoding='ASCII'):
287    """
288    Load arrays or pickled objects from ``.npy``, ``.npz`` or pickled files.
289
290    .. warning:: Loading files that contain object arrays uses the ``pickle``
291                 module, which is not secure against erroneous or maliciously
292                 constructed data. Consider passing ``allow_pickle=False`` to
293                 load data that is known not to contain object arrays for the
294                 safer handling of untrusted sources.
295
296    Parameters
297    ----------
298    file : file-like object, string, or pathlib.Path
299        The file to read. File-like objects must support the
300        ``seek()`` and ``read()`` methods. Pickled files require that the
301        file-like object support the ``readline()`` method as well.
302    mmap_mode : {None, 'r+', 'r', 'w+', 'c'}, optional
303        If not None, then memory-map the file, using the given mode (see
304        `numpy.memmap` for a detailed description of the modes).  A
305        memory-mapped array is kept on disk. However, it can be accessed
306        and sliced like any ndarray.  Memory mapping is especially useful
307        for accessing small fragments of large files without reading the
308        entire file into memory.
309    allow_pickle : bool, optional
310        Allow loading pickled object arrays stored in npy files. Reasons for
311        disallowing pickles include security, as loading pickled data can
312        execute arbitrary code. If pickles are disallowed, loading object
313        arrays will fail. Default: False
314
315        .. versionchanged:: 1.16.3
316            Made default False in response to CVE-2019-6446.
317
318    fix_imports : bool, optional
319        Only useful when loading Python 2 generated pickled files on Python 3,
320        which includes npy/npz files containing object arrays. If `fix_imports`
321        is True, pickle will try to map the old Python 2 names to the new names
322        used in Python 3.
323    encoding : str, optional
324        What encoding to use when reading Python 2 strings. Only useful when
325        loading Python 2 generated pickled files in Python 3, which includes
326        npy/npz files containing object arrays. Values other than 'latin1',
327        'ASCII', and 'bytes' are not allowed, as they can corrupt numerical
328        data. Default: 'ASCII'
329
330    Returns
331    -------
332    result : array, tuple, dict, etc.
333        Data stored in the file. For ``.npz`` files, the returned instance
334        of NpzFile class must be closed to avoid leaking file descriptors.
335
336    Raises
337    ------
338    IOError
339        If the input file does not exist or cannot be read.
340    ValueError
341        The file contains an object array, but allow_pickle=False given.
342
343    See Also
344    --------
345    save, savez, savez_compressed, loadtxt
346    memmap : Create a memory-map to an array stored in a file on disk.
347    lib.format.open_memmap : Create or load a memory-mapped ``.npy`` file.
348
349    Notes
350    -----
351    - If the file contains pickle data, then whatever object is stored
352      in the pickle is returned.
353    - If the file is a ``.npy`` file, then a single array is returned.
354    - If the file is a ``.npz`` file, then a dictionary-like object is
355      returned, containing ``{filename: array}`` key-value pairs, one for
356      each file in the archive.
357    - If the file is a ``.npz`` file, the returned value supports the
358      context manager protocol in a similar fashion to the open function::
359
360        with load('foo.npz') as data:
361            a = data['a']
362
363      The underlying file descriptor is closed when exiting the 'with'
364      block.
365
366    Examples
367    --------
368    Store data to disk, and load it again:
369
370    >>> np.save('/tmp/123', np.array([[1, 2, 3], [4, 5, 6]]))
371    >>> np.load('/tmp/123.npy')
372    array([[1, 2, 3],
373           [4, 5, 6]])
374
375    Store compressed data to disk, and load it again:
376
377    >>> a=np.array([[1, 2, 3], [4, 5, 6]])
378    >>> b=np.array([1, 2])
379    >>> np.savez('/tmp/123.npz', a=a, b=b)
380    >>> data = np.load('/tmp/123.npz')
381    >>> data['a']
382    array([[1, 2, 3],
383           [4, 5, 6]])
384    >>> data['b']
385    array([1, 2])
386    >>> data.close()
387
388    Mem-map the stored array, and then access the second row
389    directly from disk:
390
391    >>> X = np.load('/tmp/123.npy', mmap_mode='r')
392    >>> X[1, :]
393    memmap([4, 5, 6])
394
395    """
396    if encoding not in ('ASCII', 'latin1', 'bytes'):
397        # The 'encoding' value for pickle also affects what encoding
398        # the serialized binary data of NumPy arrays is loaded
399        # in. Pickle does not pass on the encoding information to
400        # NumPy. The unpickling code in numpy.core.multiarray is
401        # written to assume that unicode data appearing where binary
402        # should be is in 'latin1'. 'bytes' is also safe, as is 'ASCII'.
403        #
404        # Other encoding values can corrupt binary data, and we
405        # purposefully disallow them. For the same reason, the errors=
406        # argument is not exposed, as values other than 'strict'
407        # result can similarly silently corrupt numerical data.
408        raise ValueError("encoding must be 'ASCII', 'latin1', or 'bytes'")
409
410    pickle_kwargs = dict(encoding=encoding, fix_imports=fix_imports)
411
412    with contextlib.ExitStack() as stack:
413        if hasattr(file, 'read'):
414            fid = file
415            own_fid = False
416        else:
417            fid = stack.enter_context(open(os_fspath(file), "rb"))
418            own_fid = True
419
420        # Code to distinguish from NumPy binary files and pickles.
421        _ZIP_PREFIX = b'PK\x03\x04'
422        _ZIP_SUFFIX = b'PK\x05\x06' # empty zip files start with this
423        N = len(format.MAGIC_PREFIX)
424        magic = fid.read(N)
425        # If the file size is less than N, we need to make sure not
426        # to seek past the beginning of the file
427        fid.seek(-min(N, len(magic)), 1)  # back-up
428        if magic.startswith(_ZIP_PREFIX) or magic.startswith(_ZIP_SUFFIX):
429            # zip-file (assume .npz)
430            # Potentially transfer file ownership to NpzFile
431            stack.pop_all()
432            ret = NpzFile(fid, own_fid=own_fid, allow_pickle=allow_pickle,
433                          pickle_kwargs=pickle_kwargs)
434            return ret
435        elif magic == format.MAGIC_PREFIX:
436            # .npy file
437            if mmap_mode:
438                return format.open_memmap(file, mode=mmap_mode)
439            else:
440                return format.read_array(fid, allow_pickle=allow_pickle,
441                                         pickle_kwargs=pickle_kwargs)
442        else:
443            # Try a pickle
444            if not allow_pickle:
445                raise ValueError("Cannot load file containing pickled data "
446                                 "when allow_pickle=False")
447            try:
448                return pickle.load(fid, **pickle_kwargs)
449            except Exception as e:
450                raise IOError(
451                    "Failed to interpret file %s as a pickle" % repr(file)) from e
452
453
454def _save_dispatcher(file, arr, allow_pickle=None, fix_imports=None):
455    return (arr,)
456
457
458@array_function_dispatch(_save_dispatcher)
459def save(file, arr, allow_pickle=True, fix_imports=True):
460    """
461    Save an array to a binary file in NumPy ``.npy`` format.
462
463    Parameters
464    ----------
465    file : file, str, or pathlib.Path
466        File or filename to which the data is saved.  If file is a file-object,
467        then the filename is unchanged.  If file is a string or Path, a ``.npy``
468        extension will be appended to the filename if it does not already
469        have one.
470    arr : array_like
471        Array data to be saved.
472    allow_pickle : bool, optional
473        Allow saving object arrays using Python pickles. Reasons for disallowing
474        pickles include security (loading pickled data can execute arbitrary
475        code) and portability (pickled objects may not be loadable on different
476        Python installations, for example if the stored objects require libraries
477        that are not available, and not all pickled data is compatible between
478        Python 2 and Python 3).
479        Default: True
480    fix_imports : bool, optional
481        Only useful in forcing objects in object arrays on Python 3 to be
482        pickled in a Python 2 compatible way. If `fix_imports` is True, pickle
483        will try to map the new Python 3 names to the old module names used in
484        Python 2, so that the pickle data stream is readable with Python 2.
485
486    See Also
487    --------
488    savez : Save several arrays into a ``.npz`` archive
489    savetxt, load
490
491    Notes
492    -----
493    For a description of the ``.npy`` format, see :py:mod:`numpy.lib.format`.
494
495    Any data saved to the file is appended to the end of the file.
496
497    Examples
498    --------
499    >>> from tempfile import TemporaryFile
500    >>> outfile = TemporaryFile()
501
502    >>> x = np.arange(10)
503    >>> np.save(outfile, x)
504
505    >>> _ = outfile.seek(0) # Only needed here to simulate closing & reopening file
506    >>> np.load(outfile)
507    array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
508
509
510    >>> with open('test.npy', 'wb') as f:
511    ...     np.save(f, np.array([1, 2]))
512    ...     np.save(f, np.array([1, 3]))
513    >>> with open('test.npy', 'rb') as f:
514    ...     a = np.load(f)
515    ...     b = np.load(f)
516    >>> print(a, b)
517    # [1 2] [1 3]
518    """
519    if hasattr(file, 'write'):
520        file_ctx = contextlib_nullcontext(file)
521    else:
522        file = os_fspath(file)
523        if not file.endswith('.npy'):
524            file = file + '.npy'
525        file_ctx = open(file, "wb")
526
527    with file_ctx as fid:
528        arr = np.asanyarray(arr)
529        format.write_array(fid, arr, allow_pickle=allow_pickle,
530                           pickle_kwargs=dict(fix_imports=fix_imports))
531
532
533def _savez_dispatcher(file, *args, **kwds):
534    yield from args
535    yield from kwds.values()
536
537
538@array_function_dispatch(_savez_dispatcher)
539def savez(file, *args, **kwds):
540    """Save several arrays into a single file in uncompressed ``.npz`` format.
541
542    If arguments are passed in with no keywords, the corresponding variable
543    names, in the ``.npz`` file, are 'arr_0', 'arr_1', etc. If keyword
544    arguments are given, the corresponding variable names, in the ``.npz``
545    file will match the keyword names.
546
547    Parameters
548    ----------
549    file : str or file
550        Either the filename (string) or an open file (file-like object)
551        where the data will be saved. If file is a string or a Path, the
552        ``.npz`` extension will be appended to the filename if it is not
553        already there.
554    args : Arguments, optional
555        Arrays to save to the file. Since it is not possible for Python to
556        know the names of the arrays outside `savez`, the arrays will be saved
557        with names "arr_0", "arr_1", and so on. These arguments can be any
558        expression.
559    kwds : Keyword arguments, optional
560        Arrays to save to the file. Arrays will be saved in the file with the
561        keyword names.
562
563    Returns
564    -------
565    None
566
567    See Also
568    --------
569    save : Save a single array to a binary file in NumPy format.
570    savetxt : Save an array to a file as plain text.
571    savez_compressed : Save several arrays into a compressed ``.npz`` archive
572
573    Notes
574    -----
575    The ``.npz`` file format is a zipped archive of files named after the
576    variables they contain.  The archive is not compressed and each file
577    in the archive contains one variable in ``.npy`` format. For a
578    description of the ``.npy`` format, see :py:mod:`numpy.lib.format`.
579
580    When opening the saved ``.npz`` file with `load` a `NpzFile` object is
581    returned. This is a dictionary-like object which can be queried for
582    its list of arrays (with the ``.files`` attribute), and for the arrays
583    themselves.
584
585    When saving dictionaries, the dictionary keys become filenames
586    inside the ZIP archive. Therefore, keys should be valid filenames.
587    E.g., avoid keys that begin with ``/`` or contain ``.``.
588
589    Examples
590    --------
591    >>> from tempfile import TemporaryFile
592    >>> outfile = TemporaryFile()
593    >>> x = np.arange(10)
594    >>> y = np.sin(x)
595
596    Using `savez` with \\*args, the arrays are saved with default names.
597
598    >>> np.savez(outfile, x, y)
599    >>> _ = outfile.seek(0) # Only needed here to simulate closing & reopening file
600    >>> npzfile = np.load(outfile)
601    >>> npzfile.files
602    ['arr_0', 'arr_1']
603    >>> npzfile['arr_0']
604    array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
605
606    Using `savez` with \\**kwds, the arrays are saved with the keyword names.
607
608    >>> outfile = TemporaryFile()
609    >>> np.savez(outfile, x=x, y=y)
610    >>> _ = outfile.seek(0)
611    >>> npzfile = np.load(outfile)
612    >>> sorted(npzfile.files)
613    ['x', 'y']
614    >>> npzfile['x']
615    array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
616    """
617    _savez(file, args, kwds, False)
618
619
620def _savez_compressed_dispatcher(file, *args, **kwds):
621    yield from args
622    yield from kwds.values()
623
624
625@array_function_dispatch(_savez_compressed_dispatcher)
626def savez_compressed(file, *args, **kwds):
627    """
628    Save several arrays into a single file in compressed ``.npz`` format.
629
630    If keyword arguments are given, then filenames are taken from the keywords.
631    If arguments are passed in with no keywords, then stored filenames are
632    arr_0, arr_1, etc.
633
634    Parameters
635    ----------
636    file : str or file
637        Either the filename (string) or an open file (file-like object)
638        where the data will be saved. If file is a string or a Path, the
639        ``.npz`` extension will be appended to the filename if it is not
640        already there.
641    args : Arguments, optional
642        Arrays to save to the file. Since it is not possible for Python to
643        know the names of the arrays outside `savez`, the arrays will be saved
644        with names "arr_0", "arr_1", and so on. These arguments can be any
645        expression.
646    kwds : Keyword arguments, optional
647        Arrays to save to the file. Arrays will be saved in the file with the
648        keyword names.
649
650    Returns
651    -------
652    None
653
654    See Also
655    --------
656    numpy.save : Save a single array to a binary file in NumPy format.
657    numpy.savetxt : Save an array to a file as plain text.
658    numpy.savez : Save several arrays into an uncompressed ``.npz`` file format
659    numpy.load : Load the files created by savez_compressed.
660
661    Notes
662    -----
663    The ``.npz`` file format is a zipped archive of files named after the
664    variables they contain.  The archive is compressed with
665    ``zipfile.ZIP_DEFLATED`` and each file in the archive contains one variable
666    in ``.npy`` format. For a description of the ``.npy`` format, see
667    :py:mod:`numpy.lib.format`.
668
669
670    When opening the saved ``.npz`` file with `load` a `NpzFile` object is
671    returned. This is a dictionary-like object which can be queried for
672    its list of arrays (with the ``.files`` attribute), and for the arrays
673    themselves.
674
675    Examples
676    --------
677    >>> test_array = np.random.rand(3, 2)
678    >>> test_vector = np.random.rand(4)
679    >>> np.savez_compressed('/tmp/123', a=test_array, b=test_vector)
680    >>> loaded = np.load('/tmp/123.npz')
681    >>> print(np.array_equal(test_array, loaded['a']))
682    True
683    >>> print(np.array_equal(test_vector, loaded['b']))
684    True
685
686    """
687    _savez(file, args, kwds, True)
688
689
690def _savez(file, args, kwds, compress, allow_pickle=True, pickle_kwargs=None):
691    # Import is postponed to here since zipfile depends on gzip, an optional
692    # component of the so-called standard library.
693    import zipfile
694
695    if not hasattr(file, 'write'):
696        file = os_fspath(file)
697        if not file.endswith('.npz'):
698            file = file + '.npz'
699
700    namedict = kwds
701    for i, val in enumerate(args):
702        key = 'arr_%d' % i
703        if key in namedict.keys():
704            raise ValueError(
705                "Cannot use un-named variables and keyword %s" % key)
706        namedict[key] = val
707
708    if compress:
709        compression = zipfile.ZIP_DEFLATED
710    else:
711        compression = zipfile.ZIP_STORED
712
713    zipf = zipfile_factory(file, mode="w", compression=compression)
714
715    for key, val in namedict.items():
716        fname = key + '.npy'
717        val = np.asanyarray(val)
718        # always force zip64, gh-10776
719        with zipf.open(fname, 'w', force_zip64=True) as fid:
720            format.write_array(fid, val,
721                               allow_pickle=allow_pickle,
722                               pickle_kwargs=pickle_kwargs)
723
724    zipf.close()
725
726
727def _getconv(dtype):
728    """ Find the correct dtype converter. Adapted from matplotlib """
729
730    def floatconv(x):
731        x.lower()
732        if '0x' in x:
733            return float.fromhex(x)
734        return float(x)
735
736    typ = dtype.type
737    if issubclass(typ, np.bool_):
738        return lambda x: bool(int(x))
739    if issubclass(typ, np.uint64):
740        return np.uint64
741    if issubclass(typ, np.int64):
742        return np.int64
743    if issubclass(typ, np.integer):
744        return lambda x: int(float(x))
745    elif issubclass(typ, np.longdouble):
746        return np.longdouble
747    elif issubclass(typ, np.floating):
748        return floatconv
749    elif issubclass(typ, complex):
750        return lambda x: complex(asstr(x).replace('+-', '-'))
751    elif issubclass(typ, np.bytes_):
752        return asbytes
753    elif issubclass(typ, np.unicode_):
754        return asunicode
755    else:
756        return asstr
757
758
759# amount of lines loadtxt reads in one chunk, can be overridden for testing
760_loadtxt_chunksize = 50000
761
762
763def _loadtxt_dispatcher(fname, dtype=None, comments=None, delimiter=None,
764                        converters=None, skiprows=None, usecols=None, unpack=None,
765                        ndmin=None, encoding=None, max_rows=None, *, like=None):
766    return (like,)
767
768
769@set_array_function_like_doc
770@set_module('numpy')
771def loadtxt(fname, dtype=float, comments='#', delimiter=None,
772            converters=None, skiprows=0, usecols=None, unpack=False,
773            ndmin=0, encoding='bytes', max_rows=None, *, like=None):
774    r"""
775    Load data from a text file.
776
777    Each row in the text file must have the same number of values.
778
779    Parameters
780    ----------
781    fname : file, str, or pathlib.Path
782        File, filename, or generator to read.  If the filename extension is
783        ``.gz`` or ``.bz2``, the file is first decompressed. Note that
784        generators should return byte strings.
785    dtype : data-type, optional
786        Data-type of the resulting array; default: float.  If this is a
787        structured data-type, the resulting array will be 1-dimensional, and
788        each row will be interpreted as an element of the array.  In this
789        case, the number of columns used must match the number of fields in
790        the data-type.
791    comments : str or sequence of str, optional
792        The characters or list of characters used to indicate the start of a
793        comment. None implies no comments. For backwards compatibility, byte
794        strings will be decoded as 'latin1'. The default is '#'.
795    delimiter : str, optional
796        The string used to separate values. For backwards compatibility, byte
797        strings will be decoded as 'latin1'. The default is whitespace.
798    converters : dict, optional
799        A dictionary mapping column number to a function that will parse the
800        column string into the desired value.  E.g., if column 0 is a date
801        string: ``converters = {0: datestr2num}``.  Converters can also be
802        used to provide a default value for missing data (but see also
803        `genfromtxt`): ``converters = {3: lambda s: float(s.strip() or 0)}``.
804        Default: None.
805    skiprows : int, optional
806        Skip the first `skiprows` lines, including comments; default: 0.
807    usecols : int or sequence, optional
808        Which columns to read, with 0 being the first. For example,
809        ``usecols = (1,4,5)`` will extract the 2nd, 5th and 6th columns.
810        The default, None, results in all columns being read.
811
812        .. versionchanged:: 1.11.0
813            When a single column has to be read it is possible to use
814            an integer instead of a tuple. E.g ``usecols = 3`` reads the
815            fourth column the same way as ``usecols = (3,)`` would.
816    unpack : bool, optional
817        If True, the returned array is transposed, so that arguments may be
818        unpacked using ``x, y, z = loadtxt(...)``.  When used with a
819        structured data-type, arrays are returned for each field.
820        Default is False.
821    ndmin : int, optional
822        The returned array will have at least `ndmin` dimensions.
823        Otherwise mono-dimensional axes will be squeezed.
824        Legal values: 0 (default), 1 or 2.
825
826        .. versionadded:: 1.6.0
827    encoding : str, optional
828        Encoding used to decode the inputfile. Does not apply to input streams.
829        The special value 'bytes' enables backward compatibility workarounds
830        that ensures you receive byte arrays as results if possible and passes
831        'latin1' encoded strings to converters. Override this value to receive
832        unicode arrays and pass strings as input to converters.  If set to None
833        the system default is used. The default value is 'bytes'.
834
835        .. versionadded:: 1.14.0
836    max_rows : int, optional
837        Read `max_rows` lines of content after `skiprows` lines. The default
838        is to read all the lines.
839
840        .. versionadded:: 1.16.0
841    ${ARRAY_FUNCTION_LIKE}
842
843        .. versionadded:: 1.20.0
844
845    Returns
846    -------
847    out : ndarray
848        Data read from the text file.
849
850    See Also
851    --------
852    load, fromstring, fromregex
853    genfromtxt : Load data with missing values handled as specified.
854    scipy.io.loadmat : reads MATLAB data files
855
856    Notes
857    -----
858    This function aims to be a fast reader for simply formatted files.  The
859    `genfromtxt` function provides more sophisticated handling of, e.g.,
860    lines with missing values.
861
862    .. versionadded:: 1.10.0
863
864    The strings produced by the Python float.hex method can be used as
865    input for floats.
866
867    Examples
868    --------
869    >>> from io import StringIO   # StringIO behaves like a file object
870    >>> c = StringIO("0 1\n2 3")
871    >>> np.loadtxt(c)
872    array([[0., 1.],
873           [2., 3.]])
874
875    >>> d = StringIO("M 21 72\nF 35 58")
876    >>> np.loadtxt(d, dtype={'names': ('gender', 'age', 'weight'),
877    ...                      'formats': ('S1', 'i4', 'f4')})
878    array([(b'M', 21, 72.), (b'F', 35, 58.)],
879          dtype=[('gender', 'S1'), ('age', '<i4'), ('weight', '<f4')])
880
881    >>> c = StringIO("1,0,2\n3,0,4")
882    >>> x, y = np.loadtxt(c, delimiter=',', usecols=(0, 2), unpack=True)
883    >>> x
884    array([1., 3.])
885    >>> y
886    array([2., 4.])
887
888    This example shows how `converters` can be used to convert a field
889    with a trailing minus sign into a negative number.
890
891    >>> s = StringIO('10.01 31.25-\n19.22 64.31\n17.57- 63.94')
892    >>> def conv(fld):
893    ...     return -float(fld[:-1]) if fld.endswith(b'-') else float(fld)
894    ...
895    >>> np.loadtxt(s, converters={0: conv, 1: conv})
896    array([[ 10.01, -31.25],
897           [ 19.22,  64.31],
898           [-17.57,  63.94]])
899    """
900
901    if like is not None:
902        return _loadtxt_with_like(
903            fname, dtype=dtype, comments=comments, delimiter=delimiter,
904            converters=converters, skiprows=skiprows, usecols=usecols,
905            unpack=unpack, ndmin=ndmin, encoding=encoding,
906            max_rows=max_rows, like=like
907        )
908
909    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
910    # Nested functions used by loadtxt.
911    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
912
913    # not to be confused with the flatten_dtype we import...
914    @recursive
915    def flatten_dtype_internal(self, dt):
916        """Unpack a structured data-type, and produce re-packing info."""
917        if dt.names is None:
918            # If the dtype is flattened, return.
919            # If the dtype has a shape, the dtype occurs
920            # in the list more than once.
921            shape = dt.shape
922            if len(shape) == 0:
923                return ([dt.base], None)
924            else:
925                packing = [(shape[-1], list)]
926                if len(shape) > 1:
927                    for dim in dt.shape[-2::-1]:
928                        packing = [(dim*packing[0][0], packing*dim)]
929                return ([dt.base] * int(np.prod(dt.shape)), packing)
930        else:
931            types = []
932            packing = []
933            for field in dt.names:
934                tp, bytes = dt.fields[field]
935                flat_dt, flat_packing = self(tp)
936                types.extend(flat_dt)
937                # Avoid extra nesting for subarrays
938                if tp.ndim > 0:
939                    packing.extend(flat_packing)
940                else:
941                    packing.append((len(flat_dt), flat_packing))
942            return (types, packing)
943
944    @recursive
945    def pack_items(self, items, packing):
946        """Pack items into nested lists based on re-packing info."""
947        if packing is None:
948            return items[0]
949        elif packing is tuple:
950            return tuple(items)
951        elif packing is list:
952            return list(items)
953        else:
954            start = 0
955            ret = []
956            for length, subpacking in packing:
957                ret.append(self(items[start:start+length], subpacking))
958                start += length
959            return tuple(ret)
960
961    def split_line(line):
962        """Chop off comments, strip, and split at delimiter. """
963        line = _decode_line(line, encoding=encoding)
964
965        if comments is not None:
966            line = regex_comments.split(line, maxsplit=1)[0]
967        line = line.strip('\r\n')
968        return line.split(delimiter) if line else []
969
970    def read_data(chunk_size):
971        """Parse each line, including the first.
972
973        The file read, `fh`, is a global defined above.
974
975        Parameters
976        ----------
977        chunk_size : int
978            At most `chunk_size` lines are read at a time, with iteration
979            until all lines are read.
980
981        """
982        X = []
983        line_iter = itertools.chain([first_line], fh)
984        line_iter = itertools.islice(line_iter, max_rows)
985        for i, line in enumerate(line_iter):
986            vals = split_line(line)
987            if len(vals) == 0:
988                continue
989            if usecols:
990                vals = [vals[j] for j in usecols]
991            if len(vals) != N:
992                line_num = i + skiprows + 1
993                raise ValueError("Wrong number of columns at line %d"
994                                 % line_num)
995
996            # Convert each value according to its column and store
997            items = [conv(val) for (conv, val) in zip(converters, vals)]
998
999            # Then pack it according to the dtype's nesting
1000            items = pack_items(items, packing)
1001            X.append(items)
1002            if len(X) > chunk_size:
1003                yield X
1004                X = []
1005        if X:
1006            yield X
1007
1008    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1009    # Main body of loadtxt.
1010    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1011
1012    # Check correctness of the values of `ndmin`
1013    if ndmin not in [0, 1, 2]:
1014        raise ValueError('Illegal value of ndmin keyword: %s' % ndmin)
1015
1016    # Type conversions for Py3 convenience
1017    if comments is not None:
1018        if isinstance(comments, (str, bytes)):
1019            comments = [comments]
1020        comments = [_decode_line(x) for x in comments]
1021        # Compile regex for comments beforehand
1022        comments = (re.escape(comment) for comment in comments)
1023        regex_comments = re.compile('|'.join(comments))
1024
1025    if delimiter is not None:
1026        delimiter = _decode_line(delimiter)
1027
1028    user_converters = converters
1029
1030    byte_converters = False
1031    if encoding == 'bytes':
1032        encoding = None
1033        byte_converters = True
1034
1035    if usecols is not None:
1036        # Allow usecols to be a single int or a sequence of ints
1037        try:
1038            usecols_as_list = list(usecols)
1039        except TypeError:
1040            usecols_as_list = [usecols]
1041        for col_idx in usecols_as_list:
1042            try:
1043                opindex(col_idx)
1044            except TypeError as e:
1045                e.args = (
1046                    "usecols must be an int or a sequence of ints but "
1047                    "it contains at least one element of type %s" %
1048                    type(col_idx),
1049                    )
1050                raise
1051        # Fall back to existing code
1052        usecols = usecols_as_list
1053
1054    # Make sure we're dealing with a proper dtype
1055    dtype = np.dtype(dtype)
1056    defconv = _getconv(dtype)
1057
1058    dtype_types, packing = flatten_dtype_internal(dtype)
1059
1060    fown = False
1061    try:
1062        if isinstance(fname, os_PathLike):
1063            fname = os_fspath(fname)
1064        if _is_string_like(fname):
1065            fh = np.lib._datasource.open(fname, 'rt', encoding=encoding)
1066            fencoding = getattr(fh, 'encoding', 'latin1')
1067            fh = iter(fh)
1068            fown = True
1069        else:
1070            fh = iter(fname)
1071            fencoding = getattr(fname, 'encoding', 'latin1')
1072    except TypeError as e:
1073        raise ValueError(
1074            'fname must be a string, file handle, or generator'
1075        ) from e
1076
1077    # input may be a python2 io stream
1078    if encoding is not None:
1079        fencoding = encoding
1080    # we must assume local encoding
1081    # TODO emit portability warning?
1082    elif fencoding is None:
1083        import locale
1084        fencoding = locale.getpreferredencoding()
1085
1086    try:
1087        # Skip the first `skiprows` lines
1088        for i in range(skiprows):
1089            next(fh)
1090
1091        # Read until we find a line with some values, and use
1092        # it to estimate the number of columns, N.
1093        first_vals = None
1094        try:
1095            while not first_vals:
1096                first_line = next(fh)
1097                first_vals = split_line(first_line)
1098        except StopIteration:
1099            # End of lines reached
1100            first_line = ''
1101            first_vals = []
1102            warnings.warn('loadtxt: Empty input file: "%s"' % fname,
1103                          stacklevel=2)
1104        N = len(usecols or first_vals)
1105
1106        # Now that we know N, create the default converters list, and
1107        # set packing, if necessary.
1108        if len(dtype_types) > 1:
1109            # We're dealing with a structured array, each field of
1110            # the dtype matches a column
1111            converters = [_getconv(dt) for dt in dtype_types]
1112        else:
1113            # All fields have the same dtype
1114            converters = [defconv for i in range(N)]
1115            if N > 1:
1116                packing = [(N, tuple)]
1117
1118        # By preference, use the converters specified by the user
1119        for i, conv in (user_converters or {}).items():
1120            if usecols:
1121                try:
1122                    i = usecols.index(i)
1123                except ValueError:
1124                    # Unused converter specified
1125                    continue
1126            if byte_converters:
1127                # converters may use decode to workaround numpy's old
1128                # behaviour, so encode the string again before passing to
1129                # the user converter
1130                def tobytes_first(x, conv):
1131                    if type(x) is bytes:
1132                        return conv(x)
1133                    return conv(x.encode("latin1"))
1134                converters[i] = functools.partial(tobytes_first, conv=conv)
1135            else:
1136                converters[i] = conv
1137
1138        converters = [conv if conv is not bytes else
1139                      lambda x: x.encode(fencoding) for conv in converters]
1140
1141        # read data in chunks and fill it into an array via resize
1142        # over-allocating and shrinking the array later may be faster but is
1143        # probably not relevant compared to the cost of actually reading and
1144        # converting the data
1145        X = None
1146        for x in read_data(_loadtxt_chunksize):
1147            if X is None:
1148                X = np.array(x, dtype)
1149            else:
1150                nshape = list(X.shape)
1151                pos = nshape[0]
1152                nshape[0] += len(x)
1153                X.resize(nshape, refcheck=False)
1154                X[pos:, ...] = x
1155    finally:
1156        if fown:
1157            fh.close()
1158
1159    if X is None:
1160        X = np.array([], dtype)
1161
1162    # Multicolumn data are returned with shape (1, N, M), i.e.
1163    # (1, 1, M) for a single row - remove the singleton dimension there
1164    if X.ndim == 3 and X.shape[:2] == (1, 1):
1165        X.shape = (1, -1)
1166
1167    # Verify that the array has at least dimensions `ndmin`.
1168    # Tweak the size and shape of the arrays - remove extraneous dimensions
1169    if X.ndim > ndmin:
1170        X = np.squeeze(X)
1171    # and ensure we have the minimum number of dimensions asked for
1172    # - has to be in this order for the odd case ndmin=1, X.squeeze().ndim=0
1173    if X.ndim < ndmin:
1174        if ndmin == 1:
1175            X = np.atleast_1d(X)
1176        elif ndmin == 2:
1177            X = np.atleast_2d(X).T
1178
1179    if unpack:
1180        if len(dtype_types) > 1:
1181            # For structured arrays, return an array for each field.
1182            return [X[field] for field in dtype.names]
1183        else:
1184            return X.T
1185    else:
1186        return X
1187
1188
1189_loadtxt_with_like = array_function_dispatch(
1190    _loadtxt_dispatcher
1191)(loadtxt)
1192
1193
1194def _savetxt_dispatcher(fname, X, fmt=None, delimiter=None, newline=None,
1195                        header=None, footer=None, comments=None,
1196                        encoding=None):
1197    return (X,)
1198
1199
1200@array_function_dispatch(_savetxt_dispatcher)
1201def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='',
1202            footer='', comments='# ', encoding=None):
1203    """
1204    Save an array to a text file.
1205
1206    Parameters
1207    ----------
1208    fname : filename or file handle
1209        If the filename ends in ``.gz``, the file is automatically saved in
1210        compressed gzip format.  `loadtxt` understands gzipped files
1211        transparently.
1212    X : 1D or 2D array_like
1213        Data to be saved to a text file.
1214    fmt : str or sequence of strs, optional
1215        A single format (%10.5f), a sequence of formats, or a
1216        multi-format string, e.g. 'Iteration %d -- %10.5f', in which
1217        case `delimiter` is ignored. For complex `X`, the legal options
1218        for `fmt` are:
1219
1220        * a single specifier, `fmt='%.4e'`, resulting in numbers formatted
1221          like `' (%s+%sj)' % (fmt, fmt)`
1222        * a full string specifying every real and imaginary part, e.g.
1223          `' %.4e %+.4ej %.4e %+.4ej %.4e %+.4ej'` for 3 columns
1224        * a list of specifiers, one per column - in this case, the real
1225          and imaginary part must have separate specifiers,
1226          e.g. `['%.3e + %.3ej', '(%.15e%+.15ej)']` for 2 columns
1227    delimiter : str, optional
1228        String or character separating columns.
1229    newline : str, optional
1230        String or character separating lines.
1231
1232        .. versionadded:: 1.5.0
1233    header : str, optional
1234        String that will be written at the beginning of the file.
1235
1236        .. versionadded:: 1.7.0
1237    footer : str, optional
1238        String that will be written at the end of the file.
1239
1240        .. versionadded:: 1.7.0
1241    comments : str, optional
1242        String that will be prepended to the ``header`` and ``footer`` strings,
1243        to mark them as comments. Default: '# ',  as expected by e.g.
1244        ``numpy.loadtxt``.
1245
1246        .. versionadded:: 1.7.0
1247    encoding : {None, str}, optional
1248        Encoding used to encode the outputfile. Does not apply to output
1249        streams. If the encoding is something other than 'bytes' or 'latin1'
1250        you will not be able to load the file in NumPy versions < 1.14. Default
1251        is 'latin1'.
1252
1253        .. versionadded:: 1.14.0
1254
1255
1256    See Also
1257    --------
1258    save : Save an array to a binary file in NumPy ``.npy`` format
1259    savez : Save several arrays into an uncompressed ``.npz`` archive
1260    savez_compressed : Save several arrays into a compressed ``.npz`` archive
1261
1262    Notes
1263    -----
1264    Further explanation of the `fmt` parameter
1265    (``%[flag]width[.precision]specifier``):
1266
1267    flags:
1268        ``-`` : left justify
1269
1270        ``+`` : Forces to precede result with + or -.
1271
1272        ``0`` : Left pad the number with zeros instead of space (see width).
1273
1274    width:
1275        Minimum number of characters to be printed. The value is not truncated
1276        if it has more characters.
1277
1278    precision:
1279        - For integer specifiers (eg. ``d,i,o,x``), the minimum number of
1280          digits.
1281        - For ``e, E`` and ``f`` specifiers, the number of digits to print
1282          after the decimal point.
1283        - For ``g`` and ``G``, the maximum number of significant digits.
1284        - For ``s``, the maximum number of characters.
1285
1286    specifiers:
1287        ``c`` : character
1288
1289        ``d`` or ``i`` : signed decimal integer
1290
1291        ``e`` or ``E`` : scientific notation with ``e`` or ``E``.
1292
1293        ``f`` : decimal floating point
1294
1295        ``g,G`` : use the shorter of ``e,E`` or ``f``
1296
1297        ``o`` : signed octal
1298
1299        ``s`` : string of characters
1300
1301        ``u`` : unsigned decimal integer
1302
1303        ``x,X`` : unsigned hexadecimal integer
1304
1305    This explanation of ``fmt`` is not complete, for an exhaustive
1306    specification see [1]_.
1307
1308    References
1309    ----------
1310    .. [1] `Format Specification Mini-Language
1311           <https://docs.python.org/library/string.html#format-specification-mini-language>`_,
1312           Python Documentation.
1313
1314    Examples
1315    --------
1316    >>> x = y = z = np.arange(0.0,5.0,1.0)
1317    >>> np.savetxt('test.out', x, delimiter=',')   # X is an array
1318    >>> np.savetxt('test.out', (x,y,z))   # x,y,z equal sized 1D arrays
1319    >>> np.savetxt('test.out', x, fmt='%1.4e')   # use exponential notation
1320
1321    """
1322
1323    # Py3 conversions first
1324    if isinstance(fmt, bytes):
1325        fmt = asstr(fmt)
1326    delimiter = asstr(delimiter)
1327
1328    class WriteWrap:
1329        """Convert to bytes on bytestream inputs.
1330
1331        """
1332        def __init__(self, fh, encoding):
1333            self.fh = fh
1334            self.encoding = encoding
1335            self.do_write = self.first_write
1336
1337        def close(self):
1338            self.fh.close()
1339
1340        def write(self, v):
1341            self.do_write(v)
1342
1343        def write_bytes(self, v):
1344            if isinstance(v, bytes):
1345                self.fh.write(v)
1346            else:
1347                self.fh.write(v.encode(self.encoding))
1348
1349        def write_normal(self, v):
1350            self.fh.write(asunicode(v))
1351
1352        def first_write(self, v):
1353            try:
1354                self.write_normal(v)
1355                self.write = self.write_normal
1356            except TypeError:
1357                # input is probably a bytestream
1358                self.write_bytes(v)
1359                self.write = self.write_bytes
1360
1361    own_fh = False
1362    if isinstance(fname, os_PathLike):
1363        fname = os_fspath(fname)
1364    if _is_string_like(fname):
1365        # datasource doesn't support creating a new file ...
1366        open(fname, 'wt').close()
1367        fh = np.lib._datasource.open(fname, 'wt', encoding=encoding)
1368        own_fh = True
1369    elif hasattr(fname, 'write'):
1370        # wrap to handle byte output streams
1371        fh = WriteWrap(fname, encoding or 'latin1')
1372    else:
1373        raise ValueError('fname must be a string or file handle')
1374
1375    try:
1376        X = np.asarray(X)
1377
1378        # Handle 1-dimensional arrays
1379        if X.ndim == 0 or X.ndim > 2:
1380            raise ValueError(
1381                "Expected 1D or 2D array, got %dD array instead" % X.ndim)
1382        elif X.ndim == 1:
1383            # Common case -- 1d array of numbers
1384            if X.dtype.names is None:
1385                X = np.atleast_2d(X).T
1386                ncol = 1
1387
1388            # Complex dtype -- each field indicates a separate column
1389            else:
1390                ncol = len(X.dtype.names)
1391        else:
1392            ncol = X.shape[1]
1393
1394        iscomplex_X = np.iscomplexobj(X)
1395        # `fmt` can be a string with multiple insertion points or a
1396        # list of formats.  E.g. '%10.5f\t%10d' or ('%10.5f', '$10d')
1397        if type(fmt) in (list, tuple):
1398            if len(fmt) != ncol:
1399                raise AttributeError('fmt has wrong shape.  %s' % str(fmt))
1400            format = asstr(delimiter).join(map(asstr, fmt))
1401        elif isinstance(fmt, str):
1402            n_fmt_chars = fmt.count('%')
1403            error = ValueError('fmt has wrong number of %% formats:  %s' % fmt)
1404            if n_fmt_chars == 1:
1405                if iscomplex_X:
1406                    fmt = [' (%s+%sj)' % (fmt, fmt), ] * ncol
1407                else:
1408                    fmt = [fmt, ] * ncol
1409                format = delimiter.join(fmt)
1410            elif iscomplex_X and n_fmt_chars != (2 * ncol):
1411                raise error
1412            elif ((not iscomplex_X) and n_fmt_chars != ncol):
1413                raise error
1414            else:
1415                format = fmt
1416        else:
1417            raise ValueError('invalid fmt: %r' % (fmt,))
1418
1419        if len(header) > 0:
1420            header = header.replace('\n', '\n' + comments)
1421            fh.write(comments + header + newline)
1422        if iscomplex_X:
1423            for row in X:
1424                row2 = []
1425                for number in row:
1426                    row2.append(number.real)
1427                    row2.append(number.imag)
1428                s = format % tuple(row2) + newline
1429                fh.write(s.replace('+-', '-'))
1430        else:
1431            for row in X:
1432                try:
1433                    v = format % tuple(row) + newline
1434                except TypeError as e:
1435                    raise TypeError("Mismatch between array dtype ('%s') and "
1436                                    "format specifier ('%s')"
1437                                    % (str(X.dtype), format)) from e
1438                fh.write(v)
1439
1440        if len(footer) > 0:
1441            footer = footer.replace('\n', '\n' + comments)
1442            fh.write(comments + footer + newline)
1443    finally:
1444        if own_fh:
1445            fh.close()
1446
1447
1448@set_module('numpy')
1449def fromregex(file, regexp, dtype, encoding=None):
1450    """
1451    Construct an array from a text file, using regular expression parsing.
1452
1453    The returned array is always a structured array, and is constructed from
1454    all matches of the regular expression in the file. Groups in the regular
1455    expression are converted to fields of the structured array.
1456
1457    Parameters
1458    ----------
1459    file : str or file
1460        Filename or file object to read.
1461    regexp : str or regexp
1462        Regular expression used to parse the file.
1463        Groups in the regular expression correspond to fields in the dtype.
1464    dtype : dtype or list of dtypes
1465        Dtype for the structured array.
1466    encoding : str, optional
1467        Encoding used to decode the inputfile. Does not apply to input streams.
1468
1469        .. versionadded:: 1.14.0
1470
1471    Returns
1472    -------
1473    output : ndarray
1474        The output array, containing the part of the content of `file` that
1475        was matched by `regexp`. `output` is always a structured array.
1476
1477    Raises
1478    ------
1479    TypeError
1480        When `dtype` is not a valid dtype for a structured array.
1481
1482    See Also
1483    --------
1484    fromstring, loadtxt
1485
1486    Notes
1487    -----
1488    Dtypes for structured arrays can be specified in several forms, but all
1489    forms specify at least the data type and field name. For details see
1490    `basics.rec`.
1491
1492    Examples
1493    --------
1494    >>> f = open('test.dat', 'w')
1495    >>> _ = f.write("1312 foo\\n1534  bar\\n444   qux")
1496    >>> f.close()
1497
1498    >>> regexp = r"(\\d+)\\s+(...)"  # match [digits, whitespace, anything]
1499    >>> output = np.fromregex('test.dat', regexp,
1500    ...                       [('num', np.int64), ('key', 'S3')])
1501    >>> output
1502    array([(1312, b'foo'), (1534, b'bar'), ( 444, b'qux')],
1503          dtype=[('num', '<i8'), ('key', 'S3')])
1504    >>> output['num']
1505    array([1312, 1534,  444])
1506
1507    """
1508    own_fh = False
1509    if not hasattr(file, "read"):
1510        file = np.lib._datasource.open(file, 'rt', encoding=encoding)
1511        own_fh = True
1512
1513    try:
1514        if not isinstance(dtype, np.dtype):
1515            dtype = np.dtype(dtype)
1516
1517        content = file.read()
1518        if isinstance(content, bytes) and isinstance(regexp, np.compat.unicode):
1519            regexp = asbytes(regexp)
1520        elif isinstance(content, np.compat.unicode) and isinstance(regexp, bytes):
1521            regexp = asstr(regexp)
1522
1523        if not hasattr(regexp, 'match'):
1524            regexp = re.compile(regexp)
1525        seq = regexp.findall(content)
1526        if seq and not isinstance(seq[0], tuple):
1527            # Only one group is in the regexp.
1528            # Create the new array as a single data-type and then
1529            #   re-interpret as a single-field structured array.
1530            newdtype = np.dtype(dtype[dtype.names[0]])
1531            output = np.array(seq, dtype=newdtype)
1532            output.dtype = dtype
1533        else:
1534            output = np.array(seq, dtype=dtype)
1535
1536        return output
1537    finally:
1538        if own_fh:
1539            file.close()
1540
1541
1542#####--------------------------------------------------------------------------
1543#---- --- ASCII functions ---
1544#####--------------------------------------------------------------------------
1545
1546
1547def _genfromtxt_dispatcher(fname, dtype=None, comments=None, delimiter=None,
1548                           skip_header=None, skip_footer=None, converters=None,
1549                           missing_values=None, filling_values=None, usecols=None,
1550                           names=None, excludelist=None, deletechars=None,
1551                           replace_space=None, autostrip=None, case_sensitive=None,
1552                           defaultfmt=None, unpack=None, usemask=None, loose=None,
1553                           invalid_raise=None, max_rows=None, encoding=None, *,
1554                           like=None):
1555    return (like,)
1556
1557
1558@set_array_function_like_doc
1559@set_module('numpy')
1560def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
1561               skip_header=0, skip_footer=0, converters=None,
1562               missing_values=None, filling_values=None, usecols=None,
1563               names=None, excludelist=None,
1564               deletechars=''.join(sorted(NameValidator.defaultdeletechars)),
1565               replace_space='_', autostrip=False, case_sensitive=True,
1566               defaultfmt="f%i", unpack=None, usemask=False, loose=True,
1567               invalid_raise=True, max_rows=None, encoding='bytes', *,
1568               like=None):
1569    """
1570    Load data from a text file, with missing values handled as specified.
1571
1572    Each line past the first `skip_header` lines is split at the `delimiter`
1573    character, and characters following the `comments` character are discarded.
1574
1575    Parameters
1576    ----------
1577    fname : file, str, pathlib.Path, list of str, generator
1578        File, filename, list, or generator to read.  If the filename
1579        extension is `.gz` or `.bz2`, the file is first decompressed. Note
1580        that generators must return byte strings. The strings
1581        in a list or produced by a generator are treated as lines.
1582    dtype : dtype, optional
1583        Data type of the resulting array.
1584        If None, the dtypes will be determined by the contents of each
1585        column, individually.
1586    comments : str, optional
1587        The character used to indicate the start of a comment.
1588        All the characters occurring on a line after a comment are discarded
1589    delimiter : str, int, or sequence, optional
1590        The string used to separate values.  By default, any consecutive
1591        whitespaces act as delimiter.  An integer or sequence of integers
1592        can also be provided as width(s) of each field.
1593    skiprows : int, optional
1594        `skiprows` was removed in numpy 1.10. Please use `skip_header` instead.
1595    skip_header : int, optional
1596        The number of lines to skip at the beginning of the file.
1597    skip_footer : int, optional
1598        The number of lines to skip at the end of the file.
1599    converters : variable, optional
1600        The set of functions that convert the data of a column to a value.
1601        The converters can also be used to provide a default value
1602        for missing data: ``converters = {3: lambda s: float(s or 0)}``.
1603    missing : variable, optional
1604        `missing` was removed in numpy 1.10. Please use `missing_values`
1605        instead.
1606    missing_values : variable, optional
1607        The set of strings corresponding to missing data.
1608    filling_values : variable, optional
1609        The set of values to be used as default when the data are missing.
1610    usecols : sequence, optional
1611        Which columns to read, with 0 being the first.  For example,
1612        ``usecols = (1, 4, 5)`` will extract the 2nd, 5th and 6th columns.
1613    names : {None, True, str, sequence}, optional
1614        If `names` is True, the field names are read from the first line after
1615        the first `skip_header` lines.  This line can optionally be proceeded
1616        by a comment delimiter. If `names` is a sequence or a single-string of
1617        comma-separated names, the names will be used to define the field names
1618        in a structured dtype. If `names` is None, the names of the dtype
1619        fields will be used, if any.
1620    excludelist : sequence, optional
1621        A list of names to exclude. This list is appended to the default list
1622        ['return','file','print']. Excluded names are appended an underscore:
1623        for example, `file` would become `file_`.
1624    deletechars : str, optional
1625        A string combining invalid characters that must be deleted from the
1626        names.
1627    defaultfmt : str, optional
1628        A format used to define default field names, such as "f%i" or "f_%02i".
1629    autostrip : bool, optional
1630        Whether to automatically strip white spaces from the variables.
1631    replace_space : char, optional
1632        Character(s) used in replacement of white spaces in the variables
1633        names. By default, use a '_'.
1634    case_sensitive : {True, False, 'upper', 'lower'}, optional
1635        If True, field names are case sensitive.
1636        If False or 'upper', field names are converted to upper case.
1637        If 'lower', field names are converted to lower case.
1638    unpack : bool, optional
1639        If True, the returned array is transposed, so that arguments may be
1640        unpacked using ``x, y, z = genfromtxt(...)``.  When used with a
1641        structured data-type, arrays are returned for each field.
1642        Default is False.
1643    usemask : bool, optional
1644        If True, return a masked array.
1645        If False, return a regular array.
1646    loose : bool, optional
1647        If True, do not raise errors for invalid values.
1648    invalid_raise : bool, optional
1649        If True, an exception is raised if an inconsistency is detected in the
1650        number of columns.
1651        If False, a warning is emitted and the offending lines are skipped.
1652    max_rows : int,  optional
1653        The maximum number of rows to read. Must not be used with skip_footer
1654        at the same time.  If given, the value must be at least 1. Default is
1655        to read the entire file.
1656
1657        .. versionadded:: 1.10.0
1658    encoding : str, optional
1659        Encoding used to decode the inputfile. Does not apply when `fname` is
1660        a file object.  The special value 'bytes' enables backward compatibility
1661        workarounds that ensure that you receive byte arrays when possible
1662        and passes latin1 encoded strings to converters. Override this value to
1663        receive unicode arrays and pass strings as input to converters.  If set
1664        to None the system default is used. The default value is 'bytes'.
1665
1666        .. versionadded:: 1.14.0
1667    ${ARRAY_FUNCTION_LIKE}
1668
1669        .. versionadded:: 1.20.0
1670
1671    Returns
1672    -------
1673    out : ndarray
1674        Data read from the text file. If `usemask` is True, this is a
1675        masked array.
1676
1677    See Also
1678    --------
1679    numpy.loadtxt : equivalent function when no data is missing.
1680
1681    Notes
1682    -----
1683    * When spaces are used as delimiters, or when no delimiter has been given
1684      as input, there should not be any missing data between two fields.
1685    * When the variables are named (either by a flexible dtype or with `names`),
1686      there must not be any header in the file (else a ValueError
1687      exception is raised).
1688    * Individual values are not stripped of spaces by default.
1689      When using a custom converter, make sure the function does remove spaces.
1690
1691    References
1692    ----------
1693    .. [1] NumPy User Guide, section `I/O with NumPy
1694           <https://docs.scipy.org/doc/numpy/user/basics.io.genfromtxt.html>`_.
1695
1696    Examples
1697    --------
1698    >>> from io import StringIO
1699    >>> import numpy as np
1700
1701    Comma delimited file with mixed dtype
1702
1703    >>> s = StringIO(u"1,1.3,abcde")
1704    >>> data = np.genfromtxt(s, dtype=[('myint','i8'),('myfloat','f8'),
1705    ... ('mystring','S5')], delimiter=",")
1706    >>> data
1707    array((1, 1.3, b'abcde'),
1708          dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', 'S5')])
1709
1710    Using dtype = None
1711
1712    >>> _ = s.seek(0) # needed for StringIO example only
1713    >>> data = np.genfromtxt(s, dtype=None,
1714    ... names = ['myint','myfloat','mystring'], delimiter=",")
1715    >>> data
1716    array((1, 1.3, b'abcde'),
1717          dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', 'S5')])
1718
1719    Specifying dtype and names
1720
1721    >>> _ = s.seek(0)
1722    >>> data = np.genfromtxt(s, dtype="i8,f8,S5",
1723    ... names=['myint','myfloat','mystring'], delimiter=",")
1724    >>> data
1725    array((1, 1.3, b'abcde'),
1726          dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', 'S5')])
1727
1728    An example with fixed-width columns
1729
1730    >>> s = StringIO(u"11.3abcde")
1731    >>> data = np.genfromtxt(s, dtype=None, names=['intvar','fltvar','strvar'],
1732    ...     delimiter=[1,3,5])
1733    >>> data
1734    array((1, 1.3, b'abcde'),
1735          dtype=[('intvar', '<i8'), ('fltvar', '<f8'), ('strvar', 'S5')])
1736
1737    An example to show comments
1738
1739    >>> f = StringIO('''
1740    ... text,# of chars
1741    ... hello world,11
1742    ... numpy,5''')
1743    >>> np.genfromtxt(f, dtype='S12,S12', delimiter=',')
1744    array([(b'text', b''), (b'hello world', b'11'), (b'numpy', b'5')],
1745      dtype=[('f0', 'S12'), ('f1', 'S12')])
1746
1747    """
1748
1749    if like is not None:
1750        return _genfromtxt_with_like(
1751            fname, dtype=dtype, comments=comments, delimiter=delimiter,
1752            skip_header=skip_header, skip_footer=skip_footer,
1753            converters=converters, missing_values=missing_values,
1754            filling_values=filling_values, usecols=usecols, names=names,
1755            excludelist=excludelist, deletechars=deletechars,
1756            replace_space=replace_space, autostrip=autostrip,
1757            case_sensitive=case_sensitive, defaultfmt=defaultfmt,
1758            unpack=unpack, usemask=usemask, loose=loose,
1759            invalid_raise=invalid_raise, max_rows=max_rows, encoding=encoding,
1760            like=like
1761        )
1762
1763    if max_rows is not None:
1764        if skip_footer:
1765            raise ValueError(
1766                    "The keywords 'skip_footer' and 'max_rows' can not be "
1767                    "specified at the same time.")
1768        if max_rows < 1:
1769            raise ValueError("'max_rows' must be at least 1.")
1770
1771    if usemask:
1772        from numpy.ma import MaskedArray, make_mask_descr
1773    # Check the input dictionary of converters
1774    user_converters = converters or {}
1775    if not isinstance(user_converters, dict):
1776        raise TypeError(
1777            "The input argument 'converter' should be a valid dictionary "
1778            "(got '%s' instead)" % type(user_converters))
1779
1780    if encoding == 'bytes':
1781        encoding = None
1782        byte_converters = True
1783    else:
1784        byte_converters = False
1785
1786    # Initialize the filehandle, the LineSplitter and the NameValidator
1787    try:
1788        if isinstance(fname, os_PathLike):
1789            fname = os_fspath(fname)
1790        if isinstance(fname, str):
1791            fid = np.lib._datasource.open(fname, 'rt', encoding=encoding)
1792            fid_ctx = contextlib.closing(fid)
1793        else:
1794            fid = fname
1795            fid_ctx = contextlib_nullcontext(fid)
1796        fhd = iter(fid)
1797    except TypeError as e:
1798        raise TypeError(
1799            "fname must be a string, filehandle, list of strings, "
1800            "or generator. Got %s instead." % type(fname)) from e
1801
1802    with fid_ctx:
1803        split_line = LineSplitter(delimiter=delimiter, comments=comments,
1804                                  autostrip=autostrip, encoding=encoding)
1805        validate_names = NameValidator(excludelist=excludelist,
1806                                       deletechars=deletechars,
1807                                       case_sensitive=case_sensitive,
1808                                       replace_space=replace_space)
1809
1810        # Skip the first `skip_header` rows
1811        try:
1812            for i in range(skip_header):
1813                next(fhd)
1814
1815            # Keep on until we find the first valid values
1816            first_values = None
1817
1818            while not first_values:
1819                first_line = _decode_line(next(fhd), encoding)
1820                if (names is True) and (comments is not None):
1821                    if comments in first_line:
1822                        first_line = (
1823                            ''.join(first_line.split(comments)[1:]))
1824                first_values = split_line(first_line)
1825        except StopIteration:
1826            # return an empty array if the datafile is empty
1827            first_line = ''
1828            first_values = []
1829            warnings.warn('genfromtxt: Empty input file: "%s"' % fname, stacklevel=2)
1830
1831        # Should we take the first values as names ?
1832        if names is True:
1833            fval = first_values[0].strip()
1834            if comments is not None:
1835                if fval in comments:
1836                    del first_values[0]
1837
1838        # Check the columns to use: make sure `usecols` is a list
1839        if usecols is not None:
1840            try:
1841                usecols = [_.strip() for _ in usecols.split(",")]
1842            except AttributeError:
1843                try:
1844                    usecols = list(usecols)
1845                except TypeError:
1846                    usecols = [usecols, ]
1847        nbcols = len(usecols or first_values)
1848
1849        # Check the names and overwrite the dtype.names if needed
1850        if names is True:
1851            names = validate_names([str(_.strip()) for _ in first_values])
1852            first_line = ''
1853        elif _is_string_like(names):
1854            names = validate_names([_.strip() for _ in names.split(',')])
1855        elif names:
1856            names = validate_names(names)
1857        # Get the dtype
1858        if dtype is not None:
1859            dtype = easy_dtype(dtype, defaultfmt=defaultfmt, names=names,
1860                               excludelist=excludelist,
1861                               deletechars=deletechars,
1862                               case_sensitive=case_sensitive,
1863                               replace_space=replace_space)
1864        # Make sure the names is a list (for 2.5)
1865        if names is not None:
1866            names = list(names)
1867
1868        if usecols:
1869            for (i, current) in enumerate(usecols):
1870                # if usecols is a list of names, convert to a list of indices
1871                if _is_string_like(current):
1872                    usecols[i] = names.index(current)
1873                elif current < 0:
1874                    usecols[i] = current + len(first_values)
1875            # If the dtype is not None, make sure we update it
1876            if (dtype is not None) and (len(dtype) > nbcols):
1877                descr = dtype.descr
1878                dtype = np.dtype([descr[_] for _ in usecols])
1879                names = list(dtype.names)
1880            # If `names` is not None, update the names
1881            elif (names is not None) and (len(names) > nbcols):
1882                names = [names[_] for _ in usecols]
1883        elif (names is not None) and (dtype is not None):
1884            names = list(dtype.names)
1885
1886        # Process the missing values ...............................
1887        # Rename missing_values for convenience
1888        user_missing_values = missing_values or ()
1889        if isinstance(user_missing_values, bytes):
1890            user_missing_values = user_missing_values.decode('latin1')
1891
1892        # Define the list of missing_values (one column: one list)
1893        missing_values = [list(['']) for _ in range(nbcols)]
1894
1895        # We have a dictionary: process it field by field
1896        if isinstance(user_missing_values, dict):
1897            # Loop on the items
1898            for (key, val) in user_missing_values.items():
1899                # Is the key a string ?
1900                if _is_string_like(key):
1901                    try:
1902                        # Transform it into an integer
1903                        key = names.index(key)
1904                    except ValueError:
1905                        # We couldn't find it: the name must have been dropped
1906                        continue
1907                # Redefine the key as needed if it's a column number
1908                if usecols:
1909                    try:
1910                        key = usecols.index(key)
1911                    except ValueError:
1912                        pass
1913                # Transform the value as a list of string
1914                if isinstance(val, (list, tuple)):
1915                    val = [str(_) for _ in val]
1916                else:
1917                    val = [str(val), ]
1918                # Add the value(s) to the current list of missing
1919                if key is None:
1920                    # None acts as default
1921                    for miss in missing_values:
1922                        miss.extend(val)
1923                else:
1924                    missing_values[key].extend(val)
1925        # We have a sequence : each item matches a column
1926        elif isinstance(user_missing_values, (list, tuple)):
1927            for (value, entry) in zip(user_missing_values, missing_values):
1928                value = str(value)
1929                if value not in entry:
1930                    entry.append(value)
1931        # We have a string : apply it to all entries
1932        elif isinstance(user_missing_values, str):
1933            user_value = user_missing_values.split(",")
1934            for entry in missing_values:
1935                entry.extend(user_value)
1936        # We have something else: apply it to all entries
1937        else:
1938            for entry in missing_values:
1939                entry.extend([str(user_missing_values)])
1940
1941        # Process the filling_values ...............................
1942        # Rename the input for convenience
1943        user_filling_values = filling_values
1944        if user_filling_values is None:
1945            user_filling_values = []
1946        # Define the default
1947        filling_values = [None] * nbcols
1948        # We have a dictionary : update each entry individually
1949        if isinstance(user_filling_values, dict):
1950            for (key, val) in user_filling_values.items():
1951                if _is_string_like(key):
1952                    try:
1953                        # Transform it into an integer
1954                        key = names.index(key)
1955                    except ValueError:
1956                        # We couldn't find it: the name must have been dropped,
1957                        continue
1958                # Redefine the key if it's a column number and usecols is defined
1959                if usecols:
1960                    try:
1961                        key = usecols.index(key)
1962                    except ValueError:
1963                        pass
1964                # Add the value to the list
1965                filling_values[key] = val
1966        # We have a sequence : update on a one-to-one basis
1967        elif isinstance(user_filling_values, (list, tuple)):
1968            n = len(user_filling_values)
1969            if (n <= nbcols):
1970                filling_values[:n] = user_filling_values
1971            else:
1972                filling_values = user_filling_values[:nbcols]
1973        # We have something else : use it for all entries
1974        else:
1975            filling_values = [user_filling_values] * nbcols
1976
1977        # Initialize the converters ................................
1978        if dtype is None:
1979            # Note: we can't use a [...]*nbcols, as we would have 3 times the same
1980            # ... converter, instead of 3 different converters.
1981            converters = [StringConverter(None, missing_values=miss, default=fill)
1982                          for (miss, fill) in zip(missing_values, filling_values)]
1983        else:
1984            dtype_flat = flatten_dtype(dtype, flatten_base=True)
1985            # Initialize the converters
1986            if len(dtype_flat) > 1:
1987                # Flexible type : get a converter from each dtype
1988                zipit = zip(dtype_flat, missing_values, filling_values)
1989                converters = [StringConverter(dt, locked=True,
1990                                              missing_values=miss, default=fill)
1991                              for (dt, miss, fill) in zipit]
1992            else:
1993                # Set to a default converter (but w/ different missing values)
1994                zipit = zip(missing_values, filling_values)
1995                converters = [StringConverter(dtype, locked=True,
1996                                              missing_values=miss, default=fill)
1997                              for (miss, fill) in zipit]
1998        # Update the converters to use the user-defined ones
1999        uc_update = []
2000        for (j, conv) in user_converters.items():
2001            # If the converter is specified by column names, use the index instead
2002            if _is_string_like(j):
2003                try:
2004                    j = names.index(j)
2005                    i = j
2006                except ValueError:
2007                    continue
2008            elif usecols:
2009                try:
2010                    i = usecols.index(j)
2011                except ValueError:
2012                    # Unused converter specified
2013                    continue
2014            else:
2015                i = j
2016            # Find the value to test - first_line is not filtered by usecols:
2017            if len(first_line):
2018                testing_value = first_values[j]
2019            else:
2020                testing_value = None
2021            if conv is bytes:
2022                user_conv = asbytes
2023            elif byte_converters:
2024                # converters may use decode to workaround numpy's old behaviour,
2025                # so encode the string again before passing to the user converter
2026                def tobytes_first(x, conv):
2027                    if type(x) is bytes:
2028                        return conv(x)
2029                    return conv(x.encode("latin1"))
2030                user_conv = functools.partial(tobytes_first, conv=conv)
2031            else:
2032                user_conv = conv
2033            converters[i].update(user_conv, locked=True,
2034                                 testing_value=testing_value,
2035                                 default=filling_values[i],
2036                                 missing_values=missing_values[i],)
2037            uc_update.append((i, user_conv))
2038        # Make sure we have the corrected keys in user_converters...
2039        user_converters.update(uc_update)
2040
2041        # Fixme: possible error as following variable never used.
2042        # miss_chars = [_.missing_values for _ in converters]
2043
2044        # Initialize the output lists ...
2045        # ... rows
2046        rows = []
2047        append_to_rows = rows.append
2048        # ... masks
2049        if usemask:
2050            masks = []
2051            append_to_masks = masks.append
2052        # ... invalid
2053        invalid = []
2054        append_to_invalid = invalid.append
2055
2056        # Parse each line
2057        for (i, line) in enumerate(itertools.chain([first_line, ], fhd)):
2058            values = split_line(line)
2059            nbvalues = len(values)
2060            # Skip an empty line
2061            if nbvalues == 0:
2062                continue
2063            if usecols:
2064                # Select only the columns we need
2065                try:
2066                    values = [values[_] for _ in usecols]
2067                except IndexError:
2068                    append_to_invalid((i + skip_header + 1, nbvalues))
2069                    continue
2070            elif nbvalues != nbcols:
2071                append_to_invalid((i + skip_header + 1, nbvalues))
2072                continue
2073            # Store the values
2074            append_to_rows(tuple(values))
2075            if usemask:
2076                append_to_masks(tuple([v.strip() in m
2077                                       for (v, m) in zip(values,
2078                                                         missing_values)]))
2079            if len(rows) == max_rows:
2080                break
2081
2082    # Upgrade the converters (if needed)
2083    if dtype is None:
2084        for (i, converter) in enumerate(converters):
2085            current_column = [itemgetter(i)(_m) for _m in rows]
2086            try:
2087                converter.iterupgrade(current_column)
2088            except ConverterLockError:
2089                errmsg = "Converter #%i is locked and cannot be upgraded: " % i
2090                current_column = map(itemgetter(i), rows)
2091                for (j, value) in enumerate(current_column):
2092                    try:
2093                        converter.upgrade(value)
2094                    except (ConverterError, ValueError):
2095                        errmsg += "(occurred line #%i for value '%s')"
2096                        errmsg %= (j + 1 + skip_header, value)
2097                        raise ConverterError(errmsg)
2098
2099    # Check that we don't have invalid values
2100    nbinvalid = len(invalid)
2101    if nbinvalid > 0:
2102        nbrows = len(rows) + nbinvalid - skip_footer
2103        # Construct the error message
2104        template = "    Line #%%i (got %%i columns instead of %i)" % nbcols
2105        if skip_footer > 0:
2106            nbinvalid_skipped = len([_ for _ in invalid
2107                                     if _[0] > nbrows + skip_header])
2108            invalid = invalid[:nbinvalid - nbinvalid_skipped]
2109            skip_footer -= nbinvalid_skipped
2110#
2111#            nbrows -= skip_footer
2112#            errmsg = [template % (i, nb)
2113#                      for (i, nb) in invalid if i < nbrows]
2114#        else:
2115        errmsg = [template % (i, nb)
2116                  for (i, nb) in invalid]
2117        if len(errmsg):
2118            errmsg.insert(0, "Some errors were detected !")
2119            errmsg = "\n".join(errmsg)
2120            # Raise an exception ?
2121            if invalid_raise:
2122                raise ValueError(errmsg)
2123            # Issue a warning ?
2124            else:
2125                warnings.warn(errmsg, ConversionWarning, stacklevel=2)
2126
2127    # Strip the last skip_footer data
2128    if skip_footer > 0:
2129        rows = rows[:-skip_footer]
2130        if usemask:
2131            masks = masks[:-skip_footer]
2132
2133    # Convert each value according to the converter:
2134    # We want to modify the list in place to avoid creating a new one...
2135    if loose:
2136        rows = list(
2137            zip(*[[conv._loose_call(_r) for _r in map(itemgetter(i), rows)]
2138                  for (i, conv) in enumerate(converters)]))
2139    else:
2140        rows = list(
2141            zip(*[[conv._strict_call(_r) for _r in map(itemgetter(i), rows)]
2142                  for (i, conv) in enumerate(converters)]))
2143
2144    # Reset the dtype
2145    data = rows
2146    if dtype is None:
2147        # Get the dtypes from the types of the converters
2148        column_types = [conv.type for conv in converters]
2149        # Find the columns with strings...
2150        strcolidx = [i for (i, v) in enumerate(column_types)
2151                     if v == np.unicode_]
2152
2153        if byte_converters and strcolidx:
2154            # convert strings back to bytes for backward compatibility
2155            warnings.warn(
2156                "Reading unicode strings without specifying the encoding "
2157                "argument is deprecated. Set the encoding, use None for the "
2158                "system default.",
2159                np.VisibleDeprecationWarning, stacklevel=2)
2160            def encode_unicode_cols(row_tup):
2161                row = list(row_tup)
2162                for i in strcolidx:
2163                    row[i] = row[i].encode('latin1')
2164                return tuple(row)
2165
2166            try:
2167                data = [encode_unicode_cols(r) for r in data]
2168            except UnicodeEncodeError:
2169                pass
2170            else:
2171                for i in strcolidx:
2172                    column_types[i] = np.bytes_
2173
2174        # Update string types to be the right length
2175        sized_column_types = column_types[:]
2176        for i, col_type in enumerate(column_types):
2177            if np.issubdtype(col_type, np.character):
2178                n_chars = max(len(row[i]) for row in data)
2179                sized_column_types[i] = (col_type, n_chars)
2180
2181        if names is None:
2182            # If the dtype is uniform (before sizing strings)
2183            base = {
2184                c_type
2185                for c, c_type in zip(converters, column_types)
2186                if c._checked}
2187            if len(base) == 1:
2188                uniform_type, = base
2189                (ddtype, mdtype) = (uniform_type, bool)
2190            else:
2191                ddtype = [(defaultfmt % i, dt)
2192                          for (i, dt) in enumerate(sized_column_types)]
2193                if usemask:
2194                    mdtype = [(defaultfmt % i, bool)
2195                              for (i, dt) in enumerate(sized_column_types)]
2196        else:
2197            ddtype = list(zip(names, sized_column_types))
2198            mdtype = list(zip(names, [bool] * len(sized_column_types)))
2199        output = np.array(data, dtype=ddtype)
2200        if usemask:
2201            outputmask = np.array(masks, dtype=mdtype)
2202    else:
2203        # Overwrite the initial dtype names if needed
2204        if names and dtype.names is not None:
2205            dtype.names = names
2206        # Case 1. We have a structured type
2207        if len(dtype_flat) > 1:
2208            # Nested dtype, eg [('a', int), ('b', [('b0', int), ('b1', 'f4')])]
2209            # First, create the array using a flattened dtype:
2210            # [('a', int), ('b1', int), ('b2', float)]
2211            # Then, view the array using the specified dtype.
2212            if 'O' in (_.char for _ in dtype_flat):
2213                if has_nested_fields(dtype):
2214                    raise NotImplementedError(
2215                        "Nested fields involving objects are not supported...")
2216                else:
2217                    output = np.array(data, dtype=dtype)
2218            else:
2219                rows = np.array(data, dtype=[('', _) for _ in dtype_flat])
2220                output = rows.view(dtype)
2221            # Now, process the rowmasks the same way
2222            if usemask:
2223                rowmasks = np.array(
2224                    masks, dtype=np.dtype([('', bool) for t in dtype_flat]))
2225                # Construct the new dtype
2226                mdtype = make_mask_descr(dtype)
2227                outputmask = rowmasks.view(mdtype)
2228        # Case #2. We have a basic dtype
2229        else:
2230            # We used some user-defined converters
2231            if user_converters:
2232                ishomogeneous = True
2233                descr = []
2234                for i, ttype in enumerate([conv.type for conv in converters]):
2235                    # Keep the dtype of the current converter
2236                    if i in user_converters:
2237                        ishomogeneous &= (ttype == dtype.type)
2238                        if np.issubdtype(ttype, np.character):
2239                            ttype = (ttype, max(len(row[i]) for row in data))
2240                        descr.append(('', ttype))
2241                    else:
2242                        descr.append(('', dtype))
2243                # So we changed the dtype ?
2244                if not ishomogeneous:
2245                    # We have more than one field
2246                    if len(descr) > 1:
2247                        dtype = np.dtype(descr)
2248                    # We have only one field: drop the name if not needed.
2249                    else:
2250                        dtype = np.dtype(ttype)
2251            #
2252            output = np.array(data, dtype)
2253            if usemask:
2254                if dtype.names is not None:
2255                    mdtype = [(_, bool) for _ in dtype.names]
2256                else:
2257                    mdtype = bool
2258                outputmask = np.array(masks, dtype=mdtype)
2259    # Try to take care of the missing data we missed
2260    names = output.dtype.names
2261    if usemask and names:
2262        for (name, conv) in zip(names, converters):
2263            missing_values = [conv(_) for _ in conv.missing_values
2264                              if _ != '']
2265            for mval in missing_values:
2266                outputmask[name] |= (output[name] == mval)
2267    # Construct the final array
2268    if usemask:
2269        output = output.view(MaskedArray)
2270        output._mask = outputmask
2271    output = np.squeeze(output)
2272    if unpack:
2273        if names is None:
2274            return output.T
2275        elif len(names) == 1:
2276            # squeeze single-name dtypes too
2277            return output[names[0]]
2278        else:
2279            # For structured arrays with multiple fields,
2280            # return an array for each field.
2281            return [output[field] for field in names]
2282    return output
2283
2284
2285_genfromtxt_with_like = array_function_dispatch(
2286    _genfromtxt_dispatcher
2287)(genfromtxt)
2288
2289
2290def ndfromtxt(fname, **kwargs):
2291    """
2292    Load ASCII data stored in a file and return it as a single array.
2293
2294    .. deprecated:: 1.17
2295        ndfromtxt` is a deprecated alias of `genfromtxt` which
2296        overwrites the ``usemask`` argument with `False` even when
2297        explicitly called as ``ndfromtxt(..., usemask=True)``.
2298        Use `genfromtxt` instead.
2299
2300    Parameters
2301    ----------
2302    fname, kwargs : For a description of input parameters, see `genfromtxt`.
2303
2304    See Also
2305    --------
2306    numpy.genfromtxt : generic function.
2307
2308    """
2309    kwargs['usemask'] = False
2310    # Numpy 1.17
2311    warnings.warn(
2312        "np.ndfromtxt is a deprecated alias of np.genfromtxt, "
2313        "prefer the latter.",
2314        DeprecationWarning, stacklevel=2)
2315    return genfromtxt(fname, **kwargs)
2316
2317
2318def mafromtxt(fname, **kwargs):
2319    """
2320    Load ASCII data stored in a text file and return a masked array.
2321
2322    .. deprecated:: 1.17
2323        np.mafromtxt is a deprecated alias of `genfromtxt` which
2324        overwrites the ``usemask`` argument with `True` even when
2325        explicitly called as ``mafromtxt(..., usemask=False)``.
2326        Use `genfromtxt` instead.
2327
2328    Parameters
2329    ----------
2330    fname, kwargs : For a description of input parameters, see `genfromtxt`.
2331
2332    See Also
2333    --------
2334    numpy.genfromtxt : generic function to load ASCII data.
2335
2336    """
2337    kwargs['usemask'] = True
2338    # Numpy 1.17
2339    warnings.warn(
2340        "np.mafromtxt is a deprecated alias of np.genfromtxt, "
2341        "prefer the latter.",
2342        DeprecationWarning, stacklevel=2)
2343    return genfromtxt(fname, **kwargs)
2344
2345
2346def recfromtxt(fname, **kwargs):
2347    """
2348    Load ASCII data from a file and return it in a record array.
2349
2350    If ``usemask=False`` a standard `recarray` is returned,
2351    if ``usemask=True`` a MaskedRecords array is returned.
2352
2353    Parameters
2354    ----------
2355    fname, kwargs : For a description of input parameters, see `genfromtxt`.
2356
2357    See Also
2358    --------
2359    numpy.genfromtxt : generic function
2360
2361    Notes
2362    -----
2363    By default, `dtype` is None, which means that the data-type of the output
2364    array will be determined from the data.
2365
2366    """
2367    kwargs.setdefault("dtype", None)
2368    usemask = kwargs.get('usemask', False)
2369    output = genfromtxt(fname, **kwargs)
2370    if usemask:
2371        from numpy.ma.mrecords import MaskedRecords
2372        output = output.view(MaskedRecords)
2373    else:
2374        output = output.view(np.recarray)
2375    return output
2376
2377
2378def recfromcsv(fname, **kwargs):
2379    """
2380    Load ASCII data stored in a comma-separated file.
2381
2382    The returned array is a record array (if ``usemask=False``, see
2383    `recarray`) or a masked record array (if ``usemask=True``,
2384    see `ma.mrecords.MaskedRecords`).
2385
2386    Parameters
2387    ----------
2388    fname, kwargs : For a description of input parameters, see `genfromtxt`.
2389
2390    See Also
2391    --------
2392    numpy.genfromtxt : generic function to load ASCII data.
2393
2394    Notes
2395    -----
2396    By default, `dtype` is None, which means that the data-type of the output
2397    array will be determined from the data.
2398
2399    """
2400    # Set default kwargs for genfromtxt as relevant to csv import.
2401    kwargs.setdefault("case_sensitive", "lower")
2402    kwargs.setdefault("names", True)
2403    kwargs.setdefault("delimiter", ",")
2404    kwargs.setdefault("dtype", None)
2405    output = genfromtxt(fname, **kwargs)
2406
2407    usemask = kwargs.get("usemask", False)
2408    if usemask:
2409        from numpy.ma.mrecords import MaskedRecords
2410        output = output.view(MaskedRecords)
2411    else:
2412        output = output.view(np.recarray)
2413    return output
2414