1import sys
2import gc
3import gzip
4import os
5import threading
6import time
7import warnings
8import io
9import re
10import pytest
11from pathlib import Path
12from tempfile import NamedTemporaryFile
13from io import BytesIO, StringIO
14from datetime import datetime
15import locale
16from multiprocessing import Process, Value
17from ctypes import c_bool
18
19import numpy as np
20import numpy.ma as ma
21from numpy.lib._iotools import ConverterError, ConversionWarning
22from numpy.compat import asbytes
23from numpy.ma.testutils import assert_equal
24from numpy.testing import (
25    assert_warns, assert_, assert_raises_regex, assert_raises,
26    assert_allclose, assert_array_equal, temppath, tempdir, IS_PYPY,
27    HAS_REFCOUNT, suppress_warnings, assert_no_gc_cycles, assert_no_warnings,
28    break_cycles
29    )
30from numpy.testing._private.utils import requires_memory
31
32
33class TextIO(BytesIO):
34    """Helper IO class.
35
36    Writes encode strings to bytes if needed, reads return bytes.
37    This makes it easier to emulate files opened in binary mode
38    without needing to explicitly convert strings to bytes in
39    setting up the test data.
40
41    """
42    def __init__(self, s=""):
43        BytesIO.__init__(self, asbytes(s))
44
45    def write(self, s):
46        BytesIO.write(self, asbytes(s))
47
48    def writelines(self, lines):
49        BytesIO.writelines(self, [asbytes(s) for s in lines])
50
51
52IS_64BIT = sys.maxsize > 2**32
53try:
54    import bz2
55    HAS_BZ2 = True
56except ImportError:
57    HAS_BZ2 = False
58try:
59    import lzma
60    HAS_LZMA = True
61except ImportError:
62    HAS_LZMA = False
63
64
65def strptime(s, fmt=None):
66    """
67    This function is available in the datetime module only from Python >=
68    2.5.
69
70    """
71    if type(s) == bytes:
72        s = s.decode("latin1")
73    return datetime(*time.strptime(s, fmt)[:3])
74
75
76class RoundtripTest:
77    def roundtrip(self, save_func, *args, **kwargs):
78        """
79        save_func : callable
80            Function used to save arrays to file.
81        file_on_disk : bool
82            If true, store the file on disk, instead of in a
83            string buffer.
84        save_kwds : dict
85            Parameters passed to `save_func`.
86        load_kwds : dict
87            Parameters passed to `numpy.load`.
88        args : tuple of arrays
89            Arrays stored to file.
90
91        """
92        save_kwds = kwargs.get('save_kwds', {})
93        load_kwds = kwargs.get('load_kwds', {"allow_pickle": True})
94        file_on_disk = kwargs.get('file_on_disk', False)
95
96        if file_on_disk:
97            target_file = NamedTemporaryFile(delete=False)
98            load_file = target_file.name
99        else:
100            target_file = BytesIO()
101            load_file = target_file
102
103        try:
104            arr = args
105
106            save_func(target_file, *arr, **save_kwds)
107            target_file.flush()
108            target_file.seek(0)
109
110            if sys.platform == 'win32' and not isinstance(target_file, BytesIO):
111                target_file.close()
112
113            arr_reloaded = np.load(load_file, **load_kwds)
114
115            self.arr = arr
116            self.arr_reloaded = arr_reloaded
117        finally:
118            if not isinstance(target_file, BytesIO):
119                target_file.close()
120                # holds an open file descriptor so it can't be deleted on win
121                if 'arr_reloaded' in locals():
122                    if not isinstance(arr_reloaded, np.lib.npyio.NpzFile):
123                        os.remove(target_file.name)
124
125    def check_roundtrips(self, a):
126        self.roundtrip(a)
127        self.roundtrip(a, file_on_disk=True)
128        self.roundtrip(np.asfortranarray(a))
129        self.roundtrip(np.asfortranarray(a), file_on_disk=True)
130        if a.shape[0] > 1:
131            # neither C nor Fortran contiguous for 2D arrays or more
132            self.roundtrip(np.asfortranarray(a)[1:])
133            self.roundtrip(np.asfortranarray(a)[1:], file_on_disk=True)
134
135    def test_array(self):
136        a = np.array([], float)
137        self.check_roundtrips(a)
138
139        a = np.array([[1, 2], [3, 4]], float)
140        self.check_roundtrips(a)
141
142        a = np.array([[1, 2], [3, 4]], int)
143        self.check_roundtrips(a)
144
145        a = np.array([[1 + 5j, 2 + 6j], [3 + 7j, 4 + 8j]], dtype=np.csingle)
146        self.check_roundtrips(a)
147
148        a = np.array([[1 + 5j, 2 + 6j], [3 + 7j, 4 + 8j]], dtype=np.cdouble)
149        self.check_roundtrips(a)
150
151    def test_array_object(self):
152        a = np.array([], object)
153        self.check_roundtrips(a)
154
155        a = np.array([[1, 2], [3, 4]], object)
156        self.check_roundtrips(a)
157
158    def test_1D(self):
159        a = np.array([1, 2, 3, 4], int)
160        self.roundtrip(a)
161
162    @pytest.mark.skipif(sys.platform == 'win32', reason="Fails on Win32")
163    def test_mmap(self):
164        a = np.array([[1, 2.5], [4, 7.3]])
165        self.roundtrip(a, file_on_disk=True, load_kwds={'mmap_mode': 'r'})
166
167        a = np.asfortranarray([[1, 2.5], [4, 7.3]])
168        self.roundtrip(a, file_on_disk=True, load_kwds={'mmap_mode': 'r'})
169
170    def test_record(self):
171        a = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')])
172        self.check_roundtrips(a)
173
174    @pytest.mark.slow
175    def test_format_2_0(self):
176        dt = [(("%d" % i) * 100, float) for i in range(500)]
177        a = np.ones(1000, dtype=dt)
178        with warnings.catch_warnings(record=True):
179            warnings.filterwarnings('always', '', UserWarning)
180            self.check_roundtrips(a)
181
182
183class TestSaveLoad(RoundtripTest):
184    def roundtrip(self, *args, **kwargs):
185        RoundtripTest.roundtrip(self, np.save, *args, **kwargs)
186        assert_equal(self.arr[0], self.arr_reloaded)
187        assert_equal(self.arr[0].dtype, self.arr_reloaded.dtype)
188        assert_equal(self.arr[0].flags.fnc, self.arr_reloaded.flags.fnc)
189
190
191class TestSavezLoad(RoundtripTest):
192    def roundtrip(self, *args, **kwargs):
193        RoundtripTest.roundtrip(self, np.savez, *args, **kwargs)
194        try:
195            for n, arr in enumerate(self.arr):
196                reloaded = self.arr_reloaded['arr_%d' % n]
197                assert_equal(arr, reloaded)
198                assert_equal(arr.dtype, reloaded.dtype)
199                assert_equal(arr.flags.fnc, reloaded.flags.fnc)
200        finally:
201            # delete tempfile, must be done here on windows
202            if self.arr_reloaded.fid:
203                self.arr_reloaded.fid.close()
204                os.remove(self.arr_reloaded.fid.name)
205
206    @pytest.mark.skipif(not IS_64BIT, reason="Needs 64bit platform")
207    @pytest.mark.slow
208    def test_big_arrays(self):
209        L = (1 << 31) + 100000
210        a = np.empty(L, dtype=np.uint8)
211        with temppath(prefix="numpy_test_big_arrays_", suffix=".npz") as tmp:
212            np.savez(tmp, a=a)
213            del a
214            npfile = np.load(tmp)
215            a = npfile['a']  # Should succeed
216            npfile.close()
217            del a  # Avoid pyflakes unused variable warning.
218
219    def test_multiple_arrays(self):
220        a = np.array([[1, 2], [3, 4]], float)
221        b = np.array([[1 + 2j, 2 + 7j], [3 - 6j, 4 + 12j]], complex)
222        self.roundtrip(a, b)
223
224    def test_named_arrays(self):
225        a = np.array([[1, 2], [3, 4]], float)
226        b = np.array([[1 + 2j, 2 + 7j], [3 - 6j, 4 + 12j]], complex)
227        c = BytesIO()
228        np.savez(c, file_a=a, file_b=b)
229        c.seek(0)
230        l = np.load(c)
231        assert_equal(a, l['file_a'])
232        assert_equal(b, l['file_b'])
233
234    def test_BagObj(self):
235        a = np.array([[1, 2], [3, 4]], float)
236        b = np.array([[1 + 2j, 2 + 7j], [3 - 6j, 4 + 12j]], complex)
237        c = BytesIO()
238        np.savez(c, file_a=a, file_b=b)
239        c.seek(0)
240        l = np.load(c)
241        assert_equal(sorted(dir(l.f)), ['file_a','file_b'])
242        assert_equal(a, l.f.file_a)
243        assert_equal(b, l.f.file_b)
244
245    def test_savez_filename_clashes(self):
246        # Test that issue #852 is fixed
247        # and savez functions in multithreaded environment
248
249        def writer(error_list):
250            with temppath(suffix='.npz') as tmp:
251                arr = np.random.randn(500, 500)
252                try:
253                    np.savez(tmp, arr=arr)
254                except OSError as err:
255                    error_list.append(err)
256
257        errors = []
258        threads = [threading.Thread(target=writer, args=(errors,))
259                   for j in range(3)]
260        for t in threads:
261            t.start()
262        for t in threads:
263            t.join()
264
265        if errors:
266            raise AssertionError(errors)
267
268    def test_not_closing_opened_fid(self):
269        # Test that issue #2178 is fixed:
270        # verify could seek on 'loaded' file
271        with temppath(suffix='.npz') as tmp:
272            with open(tmp, 'wb') as fp:
273                np.savez(fp, data='LOVELY LOAD')
274            with open(tmp, 'rb', 10000) as fp:
275                fp.seek(0)
276                assert_(not fp.closed)
277                np.load(fp)['data']
278                # fp must not get closed by .load
279                assert_(not fp.closed)
280                fp.seek(0)
281                assert_(not fp.closed)
282
283    @pytest.mark.slow_pypy
284    def test_closing_fid(self):
285        # Test that issue #1517 (too many opened files) remains closed
286        # It might be a "weak" test since failed to get triggered on
287        # e.g. Debian sid of 2012 Jul 05 but was reported to
288        # trigger the failure on Ubuntu 10.04:
289        # http://projects.scipy.org/numpy/ticket/1517#comment:2
290        with temppath(suffix='.npz') as tmp:
291            np.savez(tmp, data='LOVELY LOAD')
292            # We need to check if the garbage collector can properly close
293            # numpy npz file returned by np.load when their reference count
294            # goes to zero.  Python 3 running in debug mode raises a
295            # ResourceWarning when file closing is left to the garbage
296            # collector, so we catch the warnings.
297            with suppress_warnings() as sup:
298                sup.filter(ResourceWarning)  # TODO: specify exact message
299                for i in range(1, 1025):
300                    try:
301                        np.load(tmp)["data"]
302                    except Exception as e:
303                        msg = "Failed to load data from a file: %s" % e
304                        raise AssertionError(msg)
305                    finally:
306                        if IS_PYPY:
307                            gc.collect()
308
309    def test_closing_zipfile_after_load(self):
310        # Check that zipfile owns file and can close it.  This needs to
311        # pass a file name to load for the test. On windows failure will
312        # cause a second error will be raised when the attempt to remove
313        # the open file is made.
314        prefix = 'numpy_test_closing_zipfile_after_load_'
315        with temppath(suffix='.npz', prefix=prefix) as tmp:
316            np.savez(tmp, lab='place holder')
317            data = np.load(tmp)
318            fp = data.zip.fp
319            data.close()
320            assert_(fp.closed)
321
322
323class TestSaveTxt:
324    def test_array(self):
325        a = np.array([[1, 2], [3, 4]], float)
326        fmt = "%.18e"
327        c = BytesIO()
328        np.savetxt(c, a, fmt=fmt)
329        c.seek(0)
330        assert_equal(c.readlines(),
331                     [asbytes((fmt + ' ' + fmt + '\n') % (1, 2)),
332                      asbytes((fmt + ' ' + fmt + '\n') % (3, 4))])
333
334        a = np.array([[1, 2], [3, 4]], int)
335        c = BytesIO()
336        np.savetxt(c, a, fmt='%d')
337        c.seek(0)
338        assert_equal(c.readlines(), [b'1 2\n', b'3 4\n'])
339
340    def test_1D(self):
341        a = np.array([1, 2, 3, 4], int)
342        c = BytesIO()
343        np.savetxt(c, a, fmt='%d')
344        c.seek(0)
345        lines = c.readlines()
346        assert_equal(lines, [b'1\n', b'2\n', b'3\n', b'4\n'])
347
348    def test_0D_3D(self):
349        c = BytesIO()
350        assert_raises(ValueError, np.savetxt, c, np.array(1))
351        assert_raises(ValueError, np.savetxt, c, np.array([[[1], [2]]]))
352
353    def test_structured(self):
354        a = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')])
355        c = BytesIO()
356        np.savetxt(c, a, fmt='%d')
357        c.seek(0)
358        assert_equal(c.readlines(), [b'1 2\n', b'3 4\n'])
359
360    def test_structured_padded(self):
361        # gh-13297
362        a = np.array([(1, 2, 3),(4, 5, 6)], dtype=[
363            ('foo', 'i4'), ('bar', 'i4'), ('baz', 'i4')
364        ])
365        c = BytesIO()
366        np.savetxt(c, a[['foo', 'baz']], fmt='%d')
367        c.seek(0)
368        assert_equal(c.readlines(), [b'1 3\n', b'4 6\n'])
369
370    def test_multifield_view(self):
371        a = np.ones(1, dtype=[('x', 'i4'), ('y', 'i4'), ('z', 'f4')])
372        v = a[['x', 'z']]
373        with temppath(suffix='.npy') as path:
374            path = Path(path)
375            np.save(path, v)
376            data = np.load(path)
377            assert_array_equal(data, v)
378
379    def test_delimiter(self):
380        a = np.array([[1., 2.], [3., 4.]])
381        c = BytesIO()
382        np.savetxt(c, a, delimiter=',', fmt='%d')
383        c.seek(0)
384        assert_equal(c.readlines(), [b'1,2\n', b'3,4\n'])
385
386    def test_format(self):
387        a = np.array([(1, 2), (3, 4)])
388        c = BytesIO()
389        # Sequence of formats
390        np.savetxt(c, a, fmt=['%02d', '%3.1f'])
391        c.seek(0)
392        assert_equal(c.readlines(), [b'01 2.0\n', b'03 4.0\n'])
393
394        # A single multiformat string
395        c = BytesIO()
396        np.savetxt(c, a, fmt='%02d : %3.1f')
397        c.seek(0)
398        lines = c.readlines()
399        assert_equal(lines, [b'01 : 2.0\n', b'03 : 4.0\n'])
400
401        # Specify delimiter, should be overridden
402        c = BytesIO()
403        np.savetxt(c, a, fmt='%02d : %3.1f', delimiter=',')
404        c.seek(0)
405        lines = c.readlines()
406        assert_equal(lines, [b'01 : 2.0\n', b'03 : 4.0\n'])
407
408        # Bad fmt, should raise a ValueError
409        c = BytesIO()
410        assert_raises(ValueError, np.savetxt, c, a, fmt=99)
411
412    def test_header_footer(self):
413        # Test the functionality of the header and footer keyword argument.
414
415        c = BytesIO()
416        a = np.array([(1, 2), (3, 4)], dtype=int)
417        test_header_footer = 'Test header / footer'
418        # Test the header keyword argument
419        np.savetxt(c, a, fmt='%1d', header=test_header_footer)
420        c.seek(0)
421        assert_equal(c.read(),
422                     asbytes('# ' + test_header_footer + '\n1 2\n3 4\n'))
423        # Test the footer keyword argument
424        c = BytesIO()
425        np.savetxt(c, a, fmt='%1d', footer=test_header_footer)
426        c.seek(0)
427        assert_equal(c.read(),
428                     asbytes('1 2\n3 4\n# ' + test_header_footer + '\n'))
429        # Test the commentstr keyword argument used on the header
430        c = BytesIO()
431        commentstr = '% '
432        np.savetxt(c, a, fmt='%1d',
433                   header=test_header_footer, comments=commentstr)
434        c.seek(0)
435        assert_equal(c.read(),
436                     asbytes(commentstr + test_header_footer + '\n' + '1 2\n3 4\n'))
437        # Test the commentstr keyword argument used on the footer
438        c = BytesIO()
439        commentstr = '% '
440        np.savetxt(c, a, fmt='%1d',
441                   footer=test_header_footer, comments=commentstr)
442        c.seek(0)
443        assert_equal(c.read(),
444                     asbytes('1 2\n3 4\n' + commentstr + test_header_footer + '\n'))
445
446    def test_file_roundtrip(self):
447        with temppath() as name:
448            a = np.array([(1, 2), (3, 4)])
449            np.savetxt(name, a)
450            b = np.loadtxt(name)
451            assert_array_equal(a, b)
452
453    def test_complex_arrays(self):
454        ncols = 2
455        nrows = 2
456        a = np.zeros((ncols, nrows), dtype=np.complex128)
457        re = np.pi
458        im = np.e
459        a[:] = re + 1.0j * im
460
461        # One format only
462        c = BytesIO()
463        np.savetxt(c, a, fmt=' %+.3e')
464        c.seek(0)
465        lines = c.readlines()
466        assert_equal(
467            lines,
468            [b' ( +3.142e+00+ +2.718e+00j)  ( +3.142e+00+ +2.718e+00j)\n',
469             b' ( +3.142e+00+ +2.718e+00j)  ( +3.142e+00+ +2.718e+00j)\n'])
470
471        # One format for each real and imaginary part
472        c = BytesIO()
473        np.savetxt(c, a, fmt='  %+.3e' * 2 * ncols)
474        c.seek(0)
475        lines = c.readlines()
476        assert_equal(
477            lines,
478            [b'  +3.142e+00  +2.718e+00  +3.142e+00  +2.718e+00\n',
479             b'  +3.142e+00  +2.718e+00  +3.142e+00  +2.718e+00\n'])
480
481        # One format for each complex number
482        c = BytesIO()
483        np.savetxt(c, a, fmt=['(%.3e%+.3ej)'] * ncols)
484        c.seek(0)
485        lines = c.readlines()
486        assert_equal(
487            lines,
488            [b'(3.142e+00+2.718e+00j) (3.142e+00+2.718e+00j)\n',
489             b'(3.142e+00+2.718e+00j) (3.142e+00+2.718e+00j)\n'])
490
491    def test_complex_negative_exponent(self):
492        # Previous to 1.15, some formats generated x+-yj, gh 7895
493        ncols = 2
494        nrows = 2
495        a = np.zeros((ncols, nrows), dtype=np.complex128)
496        re = np.pi
497        im = np.e
498        a[:] = re - 1.0j * im
499        c = BytesIO()
500        np.savetxt(c, a, fmt='%.3e')
501        c.seek(0)
502        lines = c.readlines()
503        assert_equal(
504            lines,
505            [b' (3.142e+00-2.718e+00j)  (3.142e+00-2.718e+00j)\n',
506             b' (3.142e+00-2.718e+00j)  (3.142e+00-2.718e+00j)\n'])
507
508
509    def test_custom_writer(self):
510
511        class CustomWriter(list):
512            def write(self, text):
513                self.extend(text.split(b'\n'))
514
515        w = CustomWriter()
516        a = np.array([(1, 2), (3, 4)])
517        np.savetxt(w, a)
518        b = np.loadtxt(w)
519        assert_array_equal(a, b)
520
521    def test_unicode(self):
522        utf8 = b'\xcf\x96'.decode('UTF-8')
523        a = np.array([utf8], dtype=np.unicode_)
524        with tempdir() as tmpdir:
525            # set encoding as on windows it may not be unicode even on py3
526            np.savetxt(os.path.join(tmpdir, 'test.csv'), a, fmt=['%s'],
527                       encoding='UTF-8')
528
529    def test_unicode_roundtrip(self):
530        utf8 = b'\xcf\x96'.decode('UTF-8')
531        a = np.array([utf8], dtype=np.unicode_)
532        # our gz wrapper support encoding
533        suffixes = ['', '.gz']
534        if HAS_BZ2:
535            suffixes.append('.bz2')
536        if HAS_LZMA:
537            suffixes.extend(['.xz', '.lzma'])
538        with tempdir() as tmpdir:
539            for suffix in suffixes:
540                np.savetxt(os.path.join(tmpdir, 'test.csv' + suffix), a,
541                           fmt=['%s'], encoding='UTF-16-LE')
542                b = np.loadtxt(os.path.join(tmpdir, 'test.csv' + suffix),
543                               encoding='UTF-16-LE', dtype=np.unicode_)
544                assert_array_equal(a, b)
545
546    def test_unicode_bytestream(self):
547        utf8 = b'\xcf\x96'.decode('UTF-8')
548        a = np.array([utf8], dtype=np.unicode_)
549        s = BytesIO()
550        np.savetxt(s, a, fmt=['%s'], encoding='UTF-8')
551        s.seek(0)
552        assert_equal(s.read().decode('UTF-8'), utf8 + '\n')
553
554    def test_unicode_stringstream(self):
555        utf8 = b'\xcf\x96'.decode('UTF-8')
556        a = np.array([utf8], dtype=np.unicode_)
557        s = StringIO()
558        np.savetxt(s, a, fmt=['%s'], encoding='UTF-8')
559        s.seek(0)
560        assert_equal(s.read(), utf8 + '\n')
561
562    @pytest.mark.parametrize("fmt", [u"%f", b"%f"])
563    @pytest.mark.parametrize("iotype", [StringIO, BytesIO])
564    def test_unicode_and_bytes_fmt(self, fmt, iotype):
565        # string type of fmt should not matter, see also gh-4053
566        a = np.array([1.])
567        s = iotype()
568        np.savetxt(s, a, fmt=fmt)
569        s.seek(0)
570        if iotype is StringIO:
571            assert_equal(s.read(), u"%f\n" % 1.)
572        else:
573            assert_equal(s.read(), b"%f\n" % 1.)
574
575    @pytest.mark.skipif(sys.platform=='win32', reason="files>4GB may not work")
576    @pytest.mark.slow
577    @requires_memory(free_bytes=7e9)
578    def test_large_zip(self):
579        def check_large_zip(memoryerror_raised):
580            memoryerror_raised.value = False
581            try:
582                # The test takes at least 6GB of memory, writes a file larger
583                # than 4GB. This tests the ``allowZip64`` kwarg to ``zipfile``
584                test_data = np.asarray([np.random.rand(
585                                        np.random.randint(50,100),4)
586                                        for i in range(800000)], dtype=object)
587                with tempdir() as tmpdir:
588                    np.savez(os.path.join(tmpdir, 'test.npz'),
589                             test_data=test_data)
590            except MemoryError:
591                memoryerror_raised.value = True
592                raise
593        # run in a subprocess to ensure memory is released on PyPy, see gh-15775
594        # Use an object in shared memory to re-raise the MemoryError exception
595        # in our process if needed, see gh-16889
596        memoryerror_raised = Value(c_bool)
597        p = Process(target=check_large_zip, args=(memoryerror_raised,))
598        p.start()
599        p.join()
600        if memoryerror_raised.value:
601            raise MemoryError("Child process raised a MemoryError exception")
602        # -9 indicates a SIGKILL, probably an OOM.
603        if p.exitcode == -9:
604            pytest.xfail("subprocess got a SIGKILL, apparently free memory was not sufficient")
605        assert p.exitcode == 0
606
607class LoadTxtBase:
608    def check_compressed(self, fopen, suffixes):
609        # Test that we can load data from a compressed file
610        wanted = np.arange(6).reshape((2, 3))
611        linesep = ('\n', '\r\n', '\r')
612        for sep in linesep:
613            data = '0 1 2' + sep + '3 4 5'
614            for suffix in suffixes:
615                with temppath(suffix=suffix) as name:
616                    with fopen(name, mode='wt', encoding='UTF-32-LE') as f:
617                        f.write(data)
618                    res = self.loadfunc(name, encoding='UTF-32-LE')
619                    assert_array_equal(res, wanted)
620                    with fopen(name, "rt",  encoding='UTF-32-LE') as f:
621                        res = self.loadfunc(f)
622                    assert_array_equal(res, wanted)
623
624    def test_compressed_gzip(self):
625        self.check_compressed(gzip.open, ('.gz',))
626
627    @pytest.mark.skipif(not HAS_BZ2, reason="Needs bz2")
628    def test_compressed_bz2(self):
629        self.check_compressed(bz2.open, ('.bz2',))
630
631    @pytest.mark.skipif(not HAS_LZMA, reason="Needs lzma")
632    def test_compressed_lzma(self):
633        self.check_compressed(lzma.open, ('.xz', '.lzma'))
634
635    def test_encoding(self):
636        with temppath() as path:
637            with open(path, "wb") as f:
638                f.write('0.\n1.\n2.'.encode("UTF-16"))
639            x = self.loadfunc(path, encoding="UTF-16")
640            assert_array_equal(x, [0., 1., 2.])
641
642    def test_stringload(self):
643        # umlaute
644        nonascii = b'\xc3\xb6\xc3\xbc\xc3\xb6'.decode("UTF-8")
645        with temppath() as path:
646            with open(path, "wb") as f:
647                f.write(nonascii.encode("UTF-16"))
648            x = self.loadfunc(path, encoding="UTF-16", dtype=np.unicode_)
649            assert_array_equal(x, nonascii)
650
651    def test_binary_decode(self):
652        utf16 = b'\xff\xfeh\x04 \x00i\x04 \x00j\x04'
653        v = self.loadfunc(BytesIO(utf16), dtype=np.unicode_, encoding='UTF-16')
654        assert_array_equal(v, np.array(utf16.decode('UTF-16').split()))
655
656    def test_converters_decode(self):
657        # test converters that decode strings
658        c = TextIO()
659        c.write(b'\xcf\x96')
660        c.seek(0)
661        x = self.loadfunc(c, dtype=np.unicode_,
662                          converters={0: lambda x: x.decode('UTF-8')})
663        a = np.array([b'\xcf\x96'.decode('UTF-8')])
664        assert_array_equal(x, a)
665
666    def test_converters_nodecode(self):
667        # test native string converters enabled by setting an encoding
668        utf8 = b'\xcf\x96'.decode('UTF-8')
669        with temppath() as path:
670            with io.open(path, 'wt', encoding='UTF-8') as f:
671                f.write(utf8)
672            x = self.loadfunc(path, dtype=np.unicode_,
673                              converters={0: lambda x: x + 't'},
674                              encoding='UTF-8')
675            a = np.array([utf8 + 't'])
676            assert_array_equal(x, a)
677
678
679class TestLoadTxt(LoadTxtBase):
680    loadfunc = staticmethod(np.loadtxt)
681
682    def setup(self):
683        # lower chunksize for testing
684        self.orig_chunk = np.lib.npyio._loadtxt_chunksize
685        np.lib.npyio._loadtxt_chunksize = 1
686    def teardown(self):
687        np.lib.npyio._loadtxt_chunksize = self.orig_chunk
688
689    def test_record(self):
690        c = TextIO()
691        c.write('1 2\n3 4')
692        c.seek(0)
693        x = np.loadtxt(c, dtype=[('x', np.int32), ('y', np.int32)])
694        a = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')])
695        assert_array_equal(x, a)
696
697        d = TextIO()
698        d.write('M 64.0 75.0\nF 25.0 60.0')
699        d.seek(0)
700        mydescriptor = {'names': ('gender', 'age', 'weight'),
701                        'formats': ('S1', 'i4', 'f4')}
702        b = np.array([('M', 64.0, 75.0),
703                      ('F', 25.0, 60.0)], dtype=mydescriptor)
704        y = np.loadtxt(d, dtype=mydescriptor)
705        assert_array_equal(y, b)
706
707    def test_array(self):
708        c = TextIO()
709        c.write('1 2\n3 4')
710
711        c.seek(0)
712        x = np.loadtxt(c, dtype=int)
713        a = np.array([[1, 2], [3, 4]], int)
714        assert_array_equal(x, a)
715
716        c.seek(0)
717        x = np.loadtxt(c, dtype=float)
718        a = np.array([[1, 2], [3, 4]], float)
719        assert_array_equal(x, a)
720
721    def test_1D(self):
722        c = TextIO()
723        c.write('1\n2\n3\n4\n')
724        c.seek(0)
725        x = np.loadtxt(c, dtype=int)
726        a = np.array([1, 2, 3, 4], int)
727        assert_array_equal(x, a)
728
729        c = TextIO()
730        c.write('1,2,3,4\n')
731        c.seek(0)
732        x = np.loadtxt(c, dtype=int, delimiter=',')
733        a = np.array([1, 2, 3, 4], int)
734        assert_array_equal(x, a)
735
736    def test_missing(self):
737        c = TextIO()
738        c.write('1,2,3,,5\n')
739        c.seek(0)
740        x = np.loadtxt(c, dtype=int, delimiter=',',
741                       converters={3: lambda s: int(s or - 999)})
742        a = np.array([1, 2, 3, -999, 5], int)
743        assert_array_equal(x, a)
744
745    def test_converters_with_usecols(self):
746        c = TextIO()
747        c.write('1,2,3,,5\n6,7,8,9,10\n')
748        c.seek(0)
749        x = np.loadtxt(c, dtype=int, delimiter=',',
750                       converters={3: lambda s: int(s or - 999)},
751                       usecols=(1, 3,))
752        a = np.array([[2, -999], [7, 9]], int)
753        assert_array_equal(x, a)
754
755    def test_comments_unicode(self):
756        c = TextIO()
757        c.write('# comment\n1,2,3,5\n')
758        c.seek(0)
759        x = np.loadtxt(c, dtype=int, delimiter=',',
760                       comments=u'#')
761        a = np.array([1, 2, 3, 5], int)
762        assert_array_equal(x, a)
763
764    def test_comments_byte(self):
765        c = TextIO()
766        c.write('# comment\n1,2,3,5\n')
767        c.seek(0)
768        x = np.loadtxt(c, dtype=int, delimiter=',',
769                       comments=b'#')
770        a = np.array([1, 2, 3, 5], int)
771        assert_array_equal(x, a)
772
773    def test_comments_multiple(self):
774        c = TextIO()
775        c.write('# comment\n1,2,3\n@ comment2\n4,5,6 // comment3')
776        c.seek(0)
777        x = np.loadtxt(c, dtype=int, delimiter=',',
778                       comments=['#', '@', '//'])
779        a = np.array([[1, 2, 3], [4, 5, 6]], int)
780        assert_array_equal(x, a)
781
782    def test_comments_multi_chars(self):
783        c = TextIO()
784        c.write('/* comment\n1,2,3,5\n')
785        c.seek(0)
786        x = np.loadtxt(c, dtype=int, delimiter=',',
787                       comments='/*')
788        a = np.array([1, 2, 3, 5], int)
789        assert_array_equal(x, a)
790
791        # Check that '/*' is not transformed to ['/', '*']
792        c = TextIO()
793        c.write('*/ comment\n1,2,3,5\n')
794        c.seek(0)
795        assert_raises(ValueError, np.loadtxt, c, dtype=int, delimiter=',',
796                      comments='/*')
797
798    def test_skiprows(self):
799        c = TextIO()
800        c.write('comment\n1,2,3,5\n')
801        c.seek(0)
802        x = np.loadtxt(c, dtype=int, delimiter=',',
803                       skiprows=1)
804        a = np.array([1, 2, 3, 5], int)
805        assert_array_equal(x, a)
806
807        c = TextIO()
808        c.write('# comment\n1,2,3,5\n')
809        c.seek(0)
810        x = np.loadtxt(c, dtype=int, delimiter=',',
811                       skiprows=1)
812        a = np.array([1, 2, 3, 5], int)
813        assert_array_equal(x, a)
814
815    def test_usecols(self):
816        a = np.array([[1, 2], [3, 4]], float)
817        c = BytesIO()
818        np.savetxt(c, a)
819        c.seek(0)
820        x = np.loadtxt(c, dtype=float, usecols=(1,))
821        assert_array_equal(x, a[:, 1])
822
823        a = np.array([[1, 2, 3], [3, 4, 5]], float)
824        c = BytesIO()
825        np.savetxt(c, a)
826        c.seek(0)
827        x = np.loadtxt(c, dtype=float, usecols=(1, 2))
828        assert_array_equal(x, a[:, 1:])
829
830        # Testing with arrays instead of tuples.
831        c.seek(0)
832        x = np.loadtxt(c, dtype=float, usecols=np.array([1, 2]))
833        assert_array_equal(x, a[:, 1:])
834
835        # Testing with an integer instead of a sequence
836        for int_type in [int, np.int8, np.int16,
837                         np.int32, np.int64, np.uint8, np.uint16,
838                         np.uint32, np.uint64]:
839            to_read = int_type(1)
840            c.seek(0)
841            x = np.loadtxt(c, dtype=float, usecols=to_read)
842            assert_array_equal(x, a[:, 1])
843
844        # Testing with some crazy custom integer type
845        class CrazyInt:
846            def __index__(self):
847                return 1
848
849        crazy_int = CrazyInt()
850        c.seek(0)
851        x = np.loadtxt(c, dtype=float, usecols=crazy_int)
852        assert_array_equal(x, a[:, 1])
853
854        c.seek(0)
855        x = np.loadtxt(c, dtype=float, usecols=(crazy_int,))
856        assert_array_equal(x, a[:, 1])
857
858        # Checking with dtypes defined converters.
859        data = '''JOE 70.1 25.3
860                BOB 60.5 27.9
861                '''
862        c = TextIO(data)
863        names = ['stid', 'temp']
864        dtypes = ['S4', 'f8']
865        arr = np.loadtxt(c, usecols=(0, 2), dtype=list(zip(names, dtypes)))
866        assert_equal(arr['stid'], [b"JOE", b"BOB"])
867        assert_equal(arr['temp'], [25.3, 27.9])
868
869        # Testing non-ints in usecols
870        c.seek(0)
871        bogus_idx = 1.5
872        assert_raises_regex(
873            TypeError,
874            '^usecols must be.*%s' % type(bogus_idx),
875            np.loadtxt, c, usecols=bogus_idx
876            )
877
878        assert_raises_regex(
879            TypeError,
880            '^usecols must be.*%s' % type(bogus_idx),
881            np.loadtxt, c, usecols=[0, bogus_idx, 0]
882            )
883
884    def test_fancy_dtype(self):
885        c = TextIO()
886        c.write('1,2,3.0\n4,5,6.0\n')
887        c.seek(0)
888        dt = np.dtype([('x', int), ('y', [('t', int), ('s', float)])])
889        x = np.loadtxt(c, dtype=dt, delimiter=',')
890        a = np.array([(1, (2, 3.0)), (4, (5, 6.0))], dt)
891        assert_array_equal(x, a)
892
893    def test_shaped_dtype(self):
894        c = TextIO("aaaa  1.0  8.0  1 2 3 4 5 6")
895        dt = np.dtype([('name', 'S4'), ('x', float), ('y', float),
896                       ('block', int, (2, 3))])
897        x = np.loadtxt(c, dtype=dt)
898        a = np.array([('aaaa', 1.0, 8.0, [[1, 2, 3], [4, 5, 6]])],
899                     dtype=dt)
900        assert_array_equal(x, a)
901
902    def test_3d_shaped_dtype(self):
903        c = TextIO("aaaa  1.0  8.0  1 2 3 4 5 6 7 8 9 10 11 12")
904        dt = np.dtype([('name', 'S4'), ('x', float), ('y', float),
905                       ('block', int, (2, 2, 3))])
906        x = np.loadtxt(c, dtype=dt)
907        a = np.array([('aaaa', 1.0, 8.0,
908                       [[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])],
909                     dtype=dt)
910        assert_array_equal(x, a)
911
912    def test_str_dtype(self):
913        # see gh-8033
914        c = ["str1", "str2"]
915
916        for dt in (str, np.bytes_):
917            a = np.array(["str1", "str2"], dtype=dt)
918            x = np.loadtxt(c, dtype=dt)
919            assert_array_equal(x, a)
920
921    def test_empty_file(self):
922        with suppress_warnings() as sup:
923            sup.filter(message="loadtxt: Empty input file:")
924            c = TextIO()
925            x = np.loadtxt(c)
926            assert_equal(x.shape, (0,))
927            x = np.loadtxt(c, dtype=np.int64)
928            assert_equal(x.shape, (0,))
929            assert_(x.dtype == np.int64)
930
931    def test_unused_converter(self):
932        c = TextIO()
933        c.writelines(['1 21\n', '3 42\n'])
934        c.seek(0)
935        data = np.loadtxt(c, usecols=(1,),
936                          converters={0: lambda s: int(s, 16)})
937        assert_array_equal(data, [21, 42])
938
939        c.seek(0)
940        data = np.loadtxt(c, usecols=(1,),
941                          converters={1: lambda s: int(s, 16)})
942        assert_array_equal(data, [33, 66])
943
944    def test_dtype_with_object(self):
945        # Test using an explicit dtype with an object
946        data = """ 1; 2001-01-01
947                   2; 2002-01-31 """
948        ndtype = [('idx', int), ('code', object)]
949        func = lambda s: strptime(s.strip(), "%Y-%m-%d")
950        converters = {1: func}
951        test = np.loadtxt(TextIO(data), delimiter=";", dtype=ndtype,
952                          converters=converters)
953        control = np.array(
954            [(1, datetime(2001, 1, 1)), (2, datetime(2002, 1, 31))],
955            dtype=ndtype)
956        assert_equal(test, control)
957
958    def test_uint64_type(self):
959        tgt = (9223372043271415339, 9223372043271415853)
960        c = TextIO()
961        c.write("%s %s" % tgt)
962        c.seek(0)
963        res = np.loadtxt(c, dtype=np.uint64)
964        assert_equal(res, tgt)
965
966    def test_int64_type(self):
967        tgt = (-9223372036854775807, 9223372036854775807)
968        c = TextIO()
969        c.write("%s %s" % tgt)
970        c.seek(0)
971        res = np.loadtxt(c, dtype=np.int64)
972        assert_equal(res, tgt)
973
974    def test_from_float_hex(self):
975        # IEEE doubles and floats only, otherwise the float32
976        # conversion may fail.
977        tgt = np.logspace(-10, 10, 5).astype(np.float32)
978        tgt = np.hstack((tgt, -tgt)).astype(float)
979        inp = '\n'.join(map(float.hex, tgt))
980        c = TextIO()
981        c.write(inp)
982        for dt in [float, np.float32]:
983            c.seek(0)
984            res = np.loadtxt(c, dtype=dt)
985            assert_equal(res, tgt, err_msg="%s" % dt)
986
987    def test_from_complex(self):
988        tgt = (complex(1, 1), complex(1, -1))
989        c = TextIO()
990        c.write("%s %s" % tgt)
991        c.seek(0)
992        res = np.loadtxt(c, dtype=complex)
993        assert_equal(res, tgt)
994
995    def test_complex_misformatted(self):
996        # test for backward compatibility
997        # some complex formats used to generate x+-yj
998        a = np.zeros((2, 2), dtype=np.complex128)
999        re = np.pi
1000        im = np.e
1001        a[:] = re - 1.0j * im
1002        c = BytesIO()
1003        np.savetxt(c, a, fmt='%.16e')
1004        c.seek(0)
1005        txt = c.read()
1006        c.seek(0)
1007        # misformat the sign on the imaginary part, gh 7895
1008        txt_bad = txt.replace(b'e+00-', b'e00+-')
1009        assert_(txt_bad != txt)
1010        c.write(txt_bad)
1011        c.seek(0)
1012        res = np.loadtxt(c, dtype=complex)
1013        assert_equal(res, a)
1014
1015    def test_universal_newline(self):
1016        with temppath() as name:
1017            with open(name, 'w') as f:
1018                f.write('1 21\r3 42\r')
1019            data = np.loadtxt(name)
1020        assert_array_equal(data, [[1, 21], [3, 42]])
1021
1022    def test_empty_field_after_tab(self):
1023        c = TextIO()
1024        c.write('1 \t2 \t3\tstart \n4\t5\t6\t  \n7\t8\t9.5\t')
1025        c.seek(0)
1026        dt = {'names': ('x', 'y', 'z', 'comment'),
1027              'formats': ('<i4', '<i4', '<f4', '|S8')}
1028        x = np.loadtxt(c, dtype=dt, delimiter='\t')
1029        a = np.array([b'start ', b'  ', b''])
1030        assert_array_equal(x['comment'], a)
1031
1032    def test_unpack_structured(self):
1033        txt = TextIO("M 21 72\nF 35 58")
1034        dt = {'names': ('a', 'b', 'c'), 'formats': ('|S1', '<i4', '<f4')}
1035        a, b, c = np.loadtxt(txt, dtype=dt, unpack=True)
1036        assert_(a.dtype.str == '|S1')
1037        assert_(b.dtype.str == '<i4')
1038        assert_(c.dtype.str == '<f4')
1039        assert_array_equal(a, np.array([b'M', b'F']))
1040        assert_array_equal(b, np.array([21, 35]))
1041        assert_array_equal(c, np.array([72.,  58.]))
1042
1043    def test_ndmin_keyword(self):
1044        c = TextIO()
1045        c.write('1,2,3\n4,5,6')
1046        c.seek(0)
1047        assert_raises(ValueError, np.loadtxt, c, ndmin=3)
1048        c.seek(0)
1049        assert_raises(ValueError, np.loadtxt, c, ndmin=1.5)
1050        c.seek(0)
1051        x = np.loadtxt(c, dtype=int, delimiter=',', ndmin=1)
1052        a = np.array([[1, 2, 3], [4, 5, 6]])
1053        assert_array_equal(x, a)
1054
1055        d = TextIO()
1056        d.write('0,1,2')
1057        d.seek(0)
1058        x = np.loadtxt(d, dtype=int, delimiter=',', ndmin=2)
1059        assert_(x.shape == (1, 3))
1060        d.seek(0)
1061        x = np.loadtxt(d, dtype=int, delimiter=',', ndmin=1)
1062        assert_(x.shape == (3,))
1063        d.seek(0)
1064        x = np.loadtxt(d, dtype=int, delimiter=',', ndmin=0)
1065        assert_(x.shape == (3,))
1066
1067        e = TextIO()
1068        e.write('0\n1\n2')
1069        e.seek(0)
1070        x = np.loadtxt(e, dtype=int, delimiter=',', ndmin=2)
1071        assert_(x.shape == (3, 1))
1072        e.seek(0)
1073        x = np.loadtxt(e, dtype=int, delimiter=',', ndmin=1)
1074        assert_(x.shape == (3,))
1075        e.seek(0)
1076        x = np.loadtxt(e, dtype=int, delimiter=',', ndmin=0)
1077        assert_(x.shape == (3,))
1078
1079        # Test ndmin kw with empty file.
1080        with suppress_warnings() as sup:
1081            sup.filter(message="loadtxt: Empty input file:")
1082            f = TextIO()
1083            assert_(np.loadtxt(f, ndmin=2).shape == (0, 1,))
1084            assert_(np.loadtxt(f, ndmin=1).shape == (0,))
1085
1086    def test_generator_source(self):
1087        def count():
1088            for i in range(10):
1089                yield "%d" % i
1090
1091        res = np.loadtxt(count())
1092        assert_array_equal(res, np.arange(10))
1093
1094    def test_bad_line(self):
1095        c = TextIO()
1096        c.write('1 2 3\n4 5 6\n2 3')
1097        c.seek(0)
1098
1099        # Check for exception and that exception contains line number
1100        assert_raises_regex(ValueError, "3", np.loadtxt, c)
1101
1102    def test_none_as_string(self):
1103        # gh-5155, None should work as string when format demands it
1104        c = TextIO()
1105        c.write('100,foo,200\n300,None,400')
1106        c.seek(0)
1107        dt = np.dtype([('x', int), ('a', 'S10'), ('y', int)])
1108        np.loadtxt(c, delimiter=',', dtype=dt, comments=None)  # Should succeed
1109
1110    @pytest.mark.skipif(locale.getpreferredencoding() == 'ANSI_X3.4-1968',
1111                        reason="Wrong preferred encoding")
1112    def test_binary_load(self):
1113        butf8 = b"5,6,7,\xc3\x95scarscar\n\r15,2,3,hello\n\r"\
1114                b"20,2,3,\xc3\x95scar\n\r"
1115        sutf8 = butf8.decode("UTF-8").replace("\r", "").splitlines()
1116        with temppath() as path:
1117            with open(path, "wb") as f:
1118                f.write(butf8)
1119            with open(path, "rb") as f:
1120                x = np.loadtxt(f, encoding="UTF-8", dtype=np.unicode_)
1121            assert_array_equal(x, sutf8)
1122            # test broken latin1 conversion people now rely on
1123            with open(path, "rb") as f:
1124                x = np.loadtxt(f, encoding="UTF-8", dtype="S")
1125            x = [b'5,6,7,\xc3\x95scarscar', b'15,2,3,hello', b'20,2,3,\xc3\x95scar']
1126            assert_array_equal(x, np.array(x, dtype="S"))
1127
1128    def test_max_rows(self):
1129        c = TextIO()
1130        c.write('1,2,3,5\n4,5,7,8\n2,1,4,5')
1131        c.seek(0)
1132        x = np.loadtxt(c, dtype=int, delimiter=',',
1133                       max_rows=1)
1134        a = np.array([1, 2, 3, 5], int)
1135        assert_array_equal(x, a)
1136
1137    def test_max_rows_with_skiprows(self):
1138        c = TextIO()
1139        c.write('comments\n1,2,3,5\n4,5,7,8\n2,1,4,5')
1140        c.seek(0)
1141        x = np.loadtxt(c, dtype=int, delimiter=',',
1142                       skiprows=1, max_rows=1)
1143        a = np.array([1, 2, 3, 5], int)
1144        assert_array_equal(x, a)
1145
1146        c = TextIO()
1147        c.write('comment\n1,2,3,5\n4,5,7,8\n2,1,4,5')
1148        c.seek(0)
1149        x = np.loadtxt(c, dtype=int, delimiter=',',
1150                       skiprows=1, max_rows=2)
1151        a = np.array([[1, 2, 3, 5], [4, 5, 7, 8]], int)
1152        assert_array_equal(x, a)
1153
1154    def test_max_rows_with_read_continuation(self):
1155        c = TextIO()
1156        c.write('1,2,3,5\n4,5,7,8\n2,1,4,5')
1157        c.seek(0)
1158        x = np.loadtxt(c, dtype=int, delimiter=',',
1159                       max_rows=2)
1160        a = np.array([[1, 2, 3, 5], [4, 5, 7, 8]], int)
1161        assert_array_equal(x, a)
1162        # test continuation
1163        x = np.loadtxt(c, dtype=int, delimiter=',')
1164        a = np.array([2,1,4,5], int)
1165        assert_array_equal(x, a)
1166
1167    def test_max_rows_larger(self):
1168        #test max_rows > num rows
1169        c = TextIO()
1170        c.write('comment\n1,2,3,5\n4,5,7,8\n2,1,4,5')
1171        c.seek(0)
1172        x = np.loadtxt(c, dtype=int, delimiter=',',
1173                       skiprows=1, max_rows=6)
1174        a = np.array([[1, 2, 3, 5], [4, 5, 7, 8], [2, 1, 4, 5]], int)
1175        assert_array_equal(x, a)
1176
1177class Testfromregex:
1178    def test_record(self):
1179        c = TextIO()
1180        c.write('1.312 foo\n1.534 bar\n4.444 qux')
1181        c.seek(0)
1182
1183        dt = [('num', np.float64), ('val', 'S3')]
1184        x = np.fromregex(c, r"([0-9.]+)\s+(...)", dt)
1185        a = np.array([(1.312, 'foo'), (1.534, 'bar'), (4.444, 'qux')],
1186                     dtype=dt)
1187        assert_array_equal(x, a)
1188
1189    def test_record_2(self):
1190        c = TextIO()
1191        c.write('1312 foo\n1534 bar\n4444 qux')
1192        c.seek(0)
1193
1194        dt = [('num', np.int32), ('val', 'S3')]
1195        x = np.fromregex(c, r"(\d+)\s+(...)", dt)
1196        a = np.array([(1312, 'foo'), (1534, 'bar'), (4444, 'qux')],
1197                     dtype=dt)
1198        assert_array_equal(x, a)
1199
1200    def test_record_3(self):
1201        c = TextIO()
1202        c.write('1312 foo\n1534 bar\n4444 qux')
1203        c.seek(0)
1204
1205        dt = [('num', np.float64)]
1206        x = np.fromregex(c, r"(\d+)\s+...", dt)
1207        a = np.array([(1312,), (1534,), (4444,)], dtype=dt)
1208        assert_array_equal(x, a)
1209
1210    def test_record_unicode(self):
1211        utf8 = b'\xcf\x96'
1212        with temppath() as path:
1213            with open(path, 'wb') as f:
1214                f.write(b'1.312 foo' + utf8 + b' \n1.534 bar\n4.444 qux')
1215
1216            dt = [('num', np.float64), ('val', 'U4')]
1217            x = np.fromregex(path, r"(?u)([0-9.]+)\s+(\w+)", dt, encoding='UTF-8')
1218            a = np.array([(1.312, 'foo' + utf8.decode('UTF-8')), (1.534, 'bar'),
1219                           (4.444, 'qux')], dtype=dt)
1220            assert_array_equal(x, a)
1221
1222            regexp = re.compile(r"([0-9.]+)\s+(\w+)", re.UNICODE)
1223            x = np.fromregex(path, regexp, dt, encoding='UTF-8')
1224            assert_array_equal(x, a)
1225
1226    def test_compiled_bytes(self):
1227        regexp = re.compile(b'(\\d)')
1228        c = BytesIO(b'123')
1229        dt = [('num', np.float64)]
1230        a = np.array([1, 2, 3], dtype=dt)
1231        x = np.fromregex(c, regexp, dt)
1232        assert_array_equal(x, a)
1233
1234#####--------------------------------------------------------------------------
1235
1236
1237class TestFromTxt(LoadTxtBase):
1238    loadfunc = staticmethod(np.genfromtxt)
1239
1240    def test_record(self):
1241        # Test w/ explicit dtype
1242        data = TextIO('1 2\n3 4')
1243        test = np.genfromtxt(data, dtype=[('x', np.int32), ('y', np.int32)])
1244        control = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')])
1245        assert_equal(test, control)
1246        #
1247        data = TextIO('M 64.0 75.0\nF 25.0 60.0')
1248        descriptor = {'names': ('gender', 'age', 'weight'),
1249                      'formats': ('S1', 'i4', 'f4')}
1250        control = np.array([('M', 64.0, 75.0), ('F', 25.0, 60.0)],
1251                           dtype=descriptor)
1252        test = np.genfromtxt(data, dtype=descriptor)
1253        assert_equal(test, control)
1254
1255    def test_array(self):
1256        # Test outputting a standard ndarray
1257        data = TextIO('1 2\n3 4')
1258        control = np.array([[1, 2], [3, 4]], dtype=int)
1259        test = np.genfromtxt(data, dtype=int)
1260        assert_array_equal(test, control)
1261        #
1262        data.seek(0)
1263        control = np.array([[1, 2], [3, 4]], dtype=float)
1264        test = np.loadtxt(data, dtype=float)
1265        assert_array_equal(test, control)
1266
1267    def test_1D(self):
1268        # Test squeezing to 1D
1269        control = np.array([1, 2, 3, 4], int)
1270        #
1271        data = TextIO('1\n2\n3\n4\n')
1272        test = np.genfromtxt(data, dtype=int)
1273        assert_array_equal(test, control)
1274        #
1275        data = TextIO('1,2,3,4\n')
1276        test = np.genfromtxt(data, dtype=int, delimiter=',')
1277        assert_array_equal(test, control)
1278
1279    def test_comments(self):
1280        # Test the stripping of comments
1281        control = np.array([1, 2, 3, 5], int)
1282        # Comment on its own line
1283        data = TextIO('# comment\n1,2,3,5\n')
1284        test = np.genfromtxt(data, dtype=int, delimiter=',', comments='#')
1285        assert_equal(test, control)
1286        # Comment at the end of a line
1287        data = TextIO('1,2,3,5# comment\n')
1288        test = np.genfromtxt(data, dtype=int, delimiter=',', comments='#')
1289        assert_equal(test, control)
1290
1291    def test_skiprows(self):
1292        # Test row skipping
1293        control = np.array([1, 2, 3, 5], int)
1294        kwargs = dict(dtype=int, delimiter=',')
1295        #
1296        data = TextIO('comment\n1,2,3,5\n')
1297        test = np.genfromtxt(data, skip_header=1, **kwargs)
1298        assert_equal(test, control)
1299        #
1300        data = TextIO('# comment\n1,2,3,5\n')
1301        test = np.loadtxt(data, skiprows=1, **kwargs)
1302        assert_equal(test, control)
1303
1304    def test_skip_footer(self):
1305        data = ["# %i" % i for i in range(1, 6)]
1306        data.append("A, B, C")
1307        data.extend(["%i,%3.1f,%03s" % (i, i, i) for i in range(51)])
1308        data[-1] = "99,99"
1309        kwargs = dict(delimiter=",", names=True, skip_header=5, skip_footer=10)
1310        test = np.genfromtxt(TextIO("\n".join(data)), **kwargs)
1311        ctrl = np.array([("%f" % i, "%f" % i, "%f" % i) for i in range(41)],
1312                        dtype=[(_, float) for _ in "ABC"])
1313        assert_equal(test, ctrl)
1314
1315    def test_skip_footer_with_invalid(self):
1316        with suppress_warnings() as sup:
1317            sup.filter(ConversionWarning)
1318            basestr = '1 1\n2 2\n3 3\n4 4\n5  \n6  \n7  \n'
1319            # Footer too small to get rid of all invalid values
1320            assert_raises(ValueError, np.genfromtxt,
1321                          TextIO(basestr), skip_footer=1)
1322    #        except ValueError:
1323    #            pass
1324            a = np.genfromtxt(
1325                TextIO(basestr), skip_footer=1, invalid_raise=False)
1326            assert_equal(a, np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]]))
1327            #
1328            a = np.genfromtxt(TextIO(basestr), skip_footer=3)
1329            assert_equal(a, np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]]))
1330            #
1331            basestr = '1 1\n2  \n3 3\n4 4\n5  \n6 6\n7 7\n'
1332            a = np.genfromtxt(
1333                TextIO(basestr), skip_footer=1, invalid_raise=False)
1334            assert_equal(a, np.array([[1., 1.], [3., 3.], [4., 4.], [6., 6.]]))
1335            a = np.genfromtxt(
1336                TextIO(basestr), skip_footer=3, invalid_raise=False)
1337            assert_equal(a, np.array([[1., 1.], [3., 3.], [4., 4.]]))
1338
1339    def test_header(self):
1340        # Test retrieving a header
1341        data = TextIO('gender age weight\nM 64.0 75.0\nF 25.0 60.0')
1342        with warnings.catch_warnings(record=True) as w:
1343            warnings.filterwarnings('always', '', np.VisibleDeprecationWarning)
1344            test = np.genfromtxt(data, dtype=None, names=True)
1345            assert_(w[0].category is np.VisibleDeprecationWarning)
1346        control = {'gender': np.array([b'M', b'F']),
1347                   'age': np.array([64.0, 25.0]),
1348                   'weight': np.array([75.0, 60.0])}
1349        assert_equal(test['gender'], control['gender'])
1350        assert_equal(test['age'], control['age'])
1351        assert_equal(test['weight'], control['weight'])
1352
1353    def test_auto_dtype(self):
1354        # Test the automatic definition of the output dtype
1355        data = TextIO('A 64 75.0 3+4j True\nBCD 25 60.0 5+6j False')
1356        with warnings.catch_warnings(record=True) as w:
1357            warnings.filterwarnings('always', '', np.VisibleDeprecationWarning)
1358            test = np.genfromtxt(data, dtype=None)
1359            assert_(w[0].category is np.VisibleDeprecationWarning)
1360        control = [np.array([b'A', b'BCD']),
1361                   np.array([64, 25]),
1362                   np.array([75.0, 60.0]),
1363                   np.array([3 + 4j, 5 + 6j]),
1364                   np.array([True, False]), ]
1365        assert_equal(test.dtype.names, ['f0', 'f1', 'f2', 'f3', 'f4'])
1366        for (i, ctrl) in enumerate(control):
1367            assert_equal(test['f%i' % i], ctrl)
1368
1369    def test_auto_dtype_uniform(self):
1370        # Tests whether the output dtype can be uniformized
1371        data = TextIO('1 2 3 4\n5 6 7 8\n')
1372        test = np.genfromtxt(data, dtype=None)
1373        control = np.array([[1, 2, 3, 4], [5, 6, 7, 8]])
1374        assert_equal(test, control)
1375
1376    def test_fancy_dtype(self):
1377        # Check that a nested dtype isn't MIA
1378        data = TextIO('1,2,3.0\n4,5,6.0\n')
1379        fancydtype = np.dtype([('x', int), ('y', [('t', int), ('s', float)])])
1380        test = np.genfromtxt(data, dtype=fancydtype, delimiter=',')
1381        control = np.array([(1, (2, 3.0)), (4, (5, 6.0))], dtype=fancydtype)
1382        assert_equal(test, control)
1383
1384    def test_names_overwrite(self):
1385        # Test overwriting the names of the dtype
1386        descriptor = {'names': ('g', 'a', 'w'),
1387                      'formats': ('S1', 'i4', 'f4')}
1388        data = TextIO(b'M 64.0 75.0\nF 25.0 60.0')
1389        names = ('gender', 'age', 'weight')
1390        test = np.genfromtxt(data, dtype=descriptor, names=names)
1391        descriptor['names'] = names
1392        control = np.array([('M', 64.0, 75.0),
1393                            ('F', 25.0, 60.0)], dtype=descriptor)
1394        assert_equal(test, control)
1395
1396    def test_commented_header(self):
1397        # Check that names can be retrieved even if the line is commented out.
1398        data = TextIO("""
1399#gender age weight
1400M   21  72.100000
1401F   35  58.330000
1402M   33  21.99
1403        """)
1404        # The # is part of the first name and should be deleted automatically.
1405        with warnings.catch_warnings(record=True) as w:
1406            warnings.filterwarnings('always', '', np.VisibleDeprecationWarning)
1407            test = np.genfromtxt(data, names=True, dtype=None)
1408            assert_(w[0].category is np.VisibleDeprecationWarning)
1409        ctrl = np.array([('M', 21, 72.1), ('F', 35, 58.33), ('M', 33, 21.99)],
1410                        dtype=[('gender', '|S1'), ('age', int), ('weight', float)])
1411        assert_equal(test, ctrl)
1412        # Ditto, but we should get rid of the first element
1413        data = TextIO(b"""
1414# gender age weight
1415M   21  72.100000
1416F   35  58.330000
1417M   33  21.99
1418        """)
1419        with warnings.catch_warnings(record=True) as w:
1420            warnings.filterwarnings('always', '', np.VisibleDeprecationWarning)
1421            test = np.genfromtxt(data, names=True, dtype=None)
1422            assert_(w[0].category is np.VisibleDeprecationWarning)
1423        assert_equal(test, ctrl)
1424
1425    def test_names_and_comments_none(self):
1426        # Tests case when names is true but comments is None (gh-10780)
1427        data = TextIO('col1 col2\n 1 2\n 3 4')
1428        test = np.genfromtxt(data, dtype=(int, int), comments=None, names=True)
1429        control = np.array([(1, 2), (3, 4)], dtype=[('col1', int), ('col2', int)])
1430        assert_equal(test, control)
1431
1432    def test_file_is_closed_on_error(self):
1433        # gh-13200
1434        with tempdir() as tmpdir:
1435            fpath = os.path.join(tmpdir, "test.csv")
1436            with open(fpath, "wb") as f:
1437                f.write(u'\N{GREEK PI SYMBOL}'.encode('utf8'))
1438
1439            # ResourceWarnings are emitted from a destructor, so won't be
1440            # detected by regular propagation to errors.
1441            with assert_no_warnings():
1442                with pytest.raises(UnicodeDecodeError):
1443                    np.genfromtxt(fpath, encoding="ascii")
1444
1445    def test_autonames_and_usecols(self):
1446        # Tests names and usecols
1447        data = TextIO('A B C D\n aaaa 121 45 9.1')
1448        with warnings.catch_warnings(record=True) as w:
1449            warnings.filterwarnings('always', '', np.VisibleDeprecationWarning)
1450            test = np.genfromtxt(data, usecols=('A', 'C', 'D'),
1451                                names=True, dtype=None)
1452            assert_(w[0].category is np.VisibleDeprecationWarning)
1453        control = np.array(('aaaa', 45, 9.1),
1454                           dtype=[('A', '|S4'), ('C', int), ('D', float)])
1455        assert_equal(test, control)
1456
1457    def test_converters_with_usecols(self):
1458        # Test the combination user-defined converters and usecol
1459        data = TextIO('1,2,3,,5\n6,7,8,9,10\n')
1460        test = np.genfromtxt(data, dtype=int, delimiter=',',
1461                            converters={3: lambda s: int(s or - 999)},
1462                            usecols=(1, 3,))
1463        control = np.array([[2, -999], [7, 9]], int)
1464        assert_equal(test, control)
1465
1466    def test_converters_with_usecols_and_names(self):
1467        # Tests names and usecols
1468        data = TextIO('A B C D\n aaaa 121 45 9.1')
1469        with warnings.catch_warnings(record=True) as w:
1470            warnings.filterwarnings('always', '', np.VisibleDeprecationWarning)
1471            test = np.genfromtxt(data, usecols=('A', 'C', 'D'), names=True,
1472                                dtype=None,
1473                                converters={'C': lambda s: 2 * int(s)})
1474            assert_(w[0].category is np.VisibleDeprecationWarning)
1475        control = np.array(('aaaa', 90, 9.1),
1476                           dtype=[('A', '|S4'), ('C', int), ('D', float)])
1477        assert_equal(test, control)
1478
1479    def test_converters_cornercases(self):
1480        # Test the conversion to datetime.
1481        converter = {
1482            'date': lambda s: strptime(s, '%Y-%m-%d %H:%M:%SZ')}
1483        data = TextIO('2009-02-03 12:00:00Z, 72214.0')
1484        test = np.genfromtxt(data, delimiter=',', dtype=None,
1485                            names=['date', 'stid'], converters=converter)
1486        control = np.array((datetime(2009, 2, 3), 72214.),
1487                           dtype=[('date', np.object_), ('stid', float)])
1488        assert_equal(test, control)
1489
1490    def test_converters_cornercases2(self):
1491        # Test the conversion to datetime64.
1492        converter = {
1493            'date': lambda s: np.datetime64(strptime(s, '%Y-%m-%d %H:%M:%SZ'))}
1494        data = TextIO('2009-02-03 12:00:00Z, 72214.0')
1495        test = np.genfromtxt(data, delimiter=',', dtype=None,
1496                            names=['date', 'stid'], converters=converter)
1497        control = np.array((datetime(2009, 2, 3), 72214.),
1498                           dtype=[('date', 'datetime64[us]'), ('stid', float)])
1499        assert_equal(test, control)
1500
1501    def test_unused_converter(self):
1502        # Test whether unused converters are forgotten
1503        data = TextIO("1 21\n  3 42\n")
1504        test = np.genfromtxt(data, usecols=(1,),
1505                            converters={0: lambda s: int(s, 16)})
1506        assert_equal(test, [21, 42])
1507        #
1508        data.seek(0)
1509        test = np.genfromtxt(data, usecols=(1,),
1510                            converters={1: lambda s: int(s, 16)})
1511        assert_equal(test, [33, 66])
1512
1513    def test_invalid_converter(self):
1514        strip_rand = lambda x: float((b'r' in x.lower() and x.split()[-1]) or
1515                                     (b'r' not in x.lower() and x.strip() or 0.0))
1516        strip_per = lambda x: float((b'%' in x.lower() and x.split()[0]) or
1517                                    (b'%' not in x.lower() and x.strip() or 0.0))
1518        s = TextIO("D01N01,10/1/2003 ,1 %,R 75,400,600\r\n"
1519                   "L24U05,12/5/2003, 2 %,1,300, 150.5\r\n"
1520                   "D02N03,10/10/2004,R 1,,7,145.55")
1521        kwargs = dict(
1522            converters={2: strip_per, 3: strip_rand}, delimiter=",",
1523            dtype=None)
1524        assert_raises(ConverterError, np.genfromtxt, s, **kwargs)
1525
1526    def test_tricky_converter_bug1666(self):
1527        # Test some corner cases
1528        s = TextIO('q1,2\nq3,4')
1529        cnv = lambda s: float(s[1:])
1530        test = np.genfromtxt(s, delimiter=',', converters={0: cnv})
1531        control = np.array([[1., 2.], [3., 4.]])
1532        assert_equal(test, control)
1533
1534    def test_dtype_with_converters(self):
1535        dstr = "2009; 23; 46"
1536        test = np.genfromtxt(TextIO(dstr,),
1537                            delimiter=";", dtype=float, converters={0: bytes})
1538        control = np.array([('2009', 23., 46)],
1539                           dtype=[('f0', '|S4'), ('f1', float), ('f2', float)])
1540        assert_equal(test, control)
1541        test = np.genfromtxt(TextIO(dstr,),
1542                            delimiter=";", dtype=float, converters={0: float})
1543        control = np.array([2009., 23., 46],)
1544        assert_equal(test, control)
1545
1546    def test_dtype_with_converters_and_usecols(self):
1547        dstr = "1,5,-1,1:1\n2,8,-1,1:n\n3,3,-2,m:n\n"
1548        dmap = {'1:1':0, '1:n':1, 'm:1':2, 'm:n':3}
1549        dtyp = [('e1','i4'),('e2','i4'),('e3','i2'),('n', 'i1')]
1550        conv = {0: int, 1: int, 2: int, 3: lambda r: dmap[r.decode()]}
1551        test = np.recfromcsv(TextIO(dstr,), dtype=dtyp, delimiter=',',
1552                             names=None, converters=conv)
1553        control = np.rec.array([(1,5,-1,0), (2,8,-1,1), (3,3,-2,3)], dtype=dtyp)
1554        assert_equal(test, control)
1555        dtyp = [('e1','i4'),('e2','i4'),('n', 'i1')]
1556        test = np.recfromcsv(TextIO(dstr,), dtype=dtyp, delimiter=',',
1557                             usecols=(0,1,3), names=None, converters=conv)
1558        control = np.rec.array([(1,5,0), (2,8,1), (3,3,3)], dtype=dtyp)
1559        assert_equal(test, control)
1560
1561    def test_dtype_with_object(self):
1562        # Test using an explicit dtype with an object
1563        data = """ 1; 2001-01-01
1564                   2; 2002-01-31 """
1565        ndtype = [('idx', int), ('code', object)]
1566        func = lambda s: strptime(s.strip(), "%Y-%m-%d")
1567        converters = {1: func}
1568        test = np.genfromtxt(TextIO(data), delimiter=";", dtype=ndtype,
1569                             converters=converters)
1570        control = np.array(
1571            [(1, datetime(2001, 1, 1)), (2, datetime(2002, 1, 31))],
1572            dtype=ndtype)
1573        assert_equal(test, control)
1574
1575        ndtype = [('nest', [('idx', int), ('code', object)])]
1576        with assert_raises_regex(NotImplementedError,
1577                                 'Nested fields.* not supported.*'):
1578            test = np.genfromtxt(TextIO(data), delimiter=";",
1579                                 dtype=ndtype, converters=converters)
1580
1581        # nested but empty fields also aren't supported
1582        ndtype = [('idx', int), ('code', object), ('nest', [])]
1583        with assert_raises_regex(NotImplementedError,
1584                                 'Nested fields.* not supported.*'):
1585            test = np.genfromtxt(TextIO(data), delimiter=";",
1586                                 dtype=ndtype, converters=converters)
1587
1588    def test_dtype_with_object_no_converter(self):
1589        # Object without a converter uses bytes:
1590        parsed = np.genfromtxt(TextIO("1"), dtype=object)
1591        assert parsed[()] == b"1"
1592        parsed = np.genfromtxt(TextIO("string"), dtype=object)
1593        assert parsed[()] == b"string"
1594
1595    def test_userconverters_with_explicit_dtype(self):
1596        # Test user_converters w/ explicit (standard) dtype
1597        data = TextIO('skip,skip,2001-01-01,1.0,skip')
1598        test = np.genfromtxt(data, delimiter=",", names=None, dtype=float,
1599                             usecols=(2, 3), converters={2: bytes})
1600        control = np.array([('2001-01-01', 1.)],
1601                           dtype=[('', '|S10'), ('', float)])
1602        assert_equal(test, control)
1603
1604    def test_utf8_userconverters_with_explicit_dtype(self):
1605        utf8 = b'\xcf\x96'
1606        with temppath() as path:
1607            with open(path, 'wb') as f:
1608                f.write(b'skip,skip,2001-01-01' + utf8 + b',1.0,skip')
1609            test = np.genfromtxt(path, delimiter=",", names=None, dtype=float,
1610                                 usecols=(2, 3), converters={2: np.compat.unicode},
1611                                 encoding='UTF-8')
1612        control = np.array([('2001-01-01' + utf8.decode('UTF-8'), 1.)],
1613                           dtype=[('', '|U11'), ('', float)])
1614        assert_equal(test, control)
1615
1616    def test_spacedelimiter(self):
1617        # Test space delimiter
1618        data = TextIO("1  2  3  4   5\n6  7  8  9  10")
1619        test = np.genfromtxt(data)
1620        control = np.array([[1., 2., 3., 4., 5.],
1621                            [6., 7., 8., 9., 10.]])
1622        assert_equal(test, control)
1623
1624    def test_integer_delimiter(self):
1625        # Test using an integer for delimiter
1626        data = "  1  2  3\n  4  5 67\n890123  4"
1627        test = np.genfromtxt(TextIO(data), delimiter=3)
1628        control = np.array([[1, 2, 3], [4, 5, 67], [890, 123, 4]])
1629        assert_equal(test, control)
1630
1631    def test_missing(self):
1632        data = TextIO('1,2,3,,5\n')
1633        test = np.genfromtxt(data, dtype=int, delimiter=',',
1634                            converters={3: lambda s: int(s or - 999)})
1635        control = np.array([1, 2, 3, -999, 5], int)
1636        assert_equal(test, control)
1637
1638    def test_missing_with_tabs(self):
1639        # Test w/ a delimiter tab
1640        txt = "1\t2\t3\n\t2\t\n1\t\t3"
1641        test = np.genfromtxt(TextIO(txt), delimiter="\t",
1642                             usemask=True,)
1643        ctrl_d = np.array([(1, 2, 3), (np.nan, 2, np.nan), (1, np.nan, 3)],)
1644        ctrl_m = np.array([(0, 0, 0), (1, 0, 1), (0, 1, 0)], dtype=bool)
1645        assert_equal(test.data, ctrl_d)
1646        assert_equal(test.mask, ctrl_m)
1647
1648    def test_usecols(self):
1649        # Test the selection of columns
1650        # Select 1 column
1651        control = np.array([[1, 2], [3, 4]], float)
1652        data = TextIO()
1653        np.savetxt(data, control)
1654        data.seek(0)
1655        test = np.genfromtxt(data, dtype=float, usecols=(1,))
1656        assert_equal(test, control[:, 1])
1657        #
1658        control = np.array([[1, 2, 3], [3, 4, 5]], float)
1659        data = TextIO()
1660        np.savetxt(data, control)
1661        data.seek(0)
1662        test = np.genfromtxt(data, dtype=float, usecols=(1, 2))
1663        assert_equal(test, control[:, 1:])
1664        # Testing with arrays instead of tuples.
1665        data.seek(0)
1666        test = np.genfromtxt(data, dtype=float, usecols=np.array([1, 2]))
1667        assert_equal(test, control[:, 1:])
1668
1669    def test_usecols_as_css(self):
1670        # Test giving usecols with a comma-separated string
1671        data = "1 2 3\n4 5 6"
1672        test = np.genfromtxt(TextIO(data),
1673                             names="a, b, c", usecols="a, c")
1674        ctrl = np.array([(1, 3), (4, 6)], dtype=[(_, float) for _ in "ac"])
1675        assert_equal(test, ctrl)
1676
1677    def test_usecols_with_structured_dtype(self):
1678        # Test usecols with an explicit structured dtype
1679        data = TextIO("JOE 70.1 25.3\nBOB 60.5 27.9")
1680        names = ['stid', 'temp']
1681        dtypes = ['S4', 'f8']
1682        test = np.genfromtxt(
1683            data, usecols=(0, 2), dtype=list(zip(names, dtypes)))
1684        assert_equal(test['stid'], [b"JOE", b"BOB"])
1685        assert_equal(test['temp'], [25.3, 27.9])
1686
1687    def test_usecols_with_integer(self):
1688        # Test usecols with an integer
1689        test = np.genfromtxt(TextIO(b"1 2 3\n4 5 6"), usecols=0)
1690        assert_equal(test, np.array([1., 4.]))
1691
1692    def test_usecols_with_named_columns(self):
1693        # Test usecols with named columns
1694        ctrl = np.array([(1, 3), (4, 6)], dtype=[('a', float), ('c', float)])
1695        data = "1 2 3\n4 5 6"
1696        kwargs = dict(names="a, b, c")
1697        test = np.genfromtxt(TextIO(data), usecols=(0, -1), **kwargs)
1698        assert_equal(test, ctrl)
1699        test = np.genfromtxt(TextIO(data),
1700                             usecols=('a', 'c'), **kwargs)
1701        assert_equal(test, ctrl)
1702
1703    def test_empty_file(self):
1704        # Test that an empty file raises the proper warning.
1705        with suppress_warnings() as sup:
1706            sup.filter(message="genfromtxt: Empty input file:")
1707            data = TextIO()
1708            test = np.genfromtxt(data)
1709            assert_equal(test, np.array([]))
1710
1711            # when skip_header > 0
1712            test = np.genfromtxt(data, skip_header=1)
1713            assert_equal(test, np.array([]))
1714
1715    def test_fancy_dtype_alt(self):
1716        # Check that a nested dtype isn't MIA
1717        data = TextIO('1,2,3.0\n4,5,6.0\n')
1718        fancydtype = np.dtype([('x', int), ('y', [('t', int), ('s', float)])])
1719        test = np.genfromtxt(data, dtype=fancydtype, delimiter=',', usemask=True)
1720        control = ma.array([(1, (2, 3.0)), (4, (5, 6.0))], dtype=fancydtype)
1721        assert_equal(test, control)
1722
1723    def test_shaped_dtype(self):
1724        c = TextIO("aaaa  1.0  8.0  1 2 3 4 5 6")
1725        dt = np.dtype([('name', 'S4'), ('x', float), ('y', float),
1726                       ('block', int, (2, 3))])
1727        x = np.genfromtxt(c, dtype=dt)
1728        a = np.array([('aaaa', 1.0, 8.0, [[1, 2, 3], [4, 5, 6]])],
1729                     dtype=dt)
1730        assert_array_equal(x, a)
1731
1732    def test_withmissing(self):
1733        data = TextIO('A,B\n0,1\n2,N/A')
1734        kwargs = dict(delimiter=",", missing_values="N/A", names=True)
1735        test = np.genfromtxt(data, dtype=None, usemask=True, **kwargs)
1736        control = ma.array([(0, 1), (2, -1)],
1737                           mask=[(False, False), (False, True)],
1738                           dtype=[('A', int), ('B', int)])
1739        assert_equal(test, control)
1740        assert_equal(test.mask, control.mask)
1741        #
1742        data.seek(0)
1743        test = np.genfromtxt(data, usemask=True, **kwargs)
1744        control = ma.array([(0, 1), (2, -1)],
1745                           mask=[(False, False), (False, True)],
1746                           dtype=[('A', float), ('B', float)])
1747        assert_equal(test, control)
1748        assert_equal(test.mask, control.mask)
1749
1750    def test_user_missing_values(self):
1751        data = "A, B, C\n0, 0., 0j\n1, N/A, 1j\n-9, 2.2, N/A\n3, -99, 3j"
1752        basekwargs = dict(dtype=None, delimiter=",", names=True,)
1753        mdtype = [('A', int), ('B', float), ('C', complex)]
1754        #
1755        test = np.genfromtxt(TextIO(data), missing_values="N/A",
1756                            **basekwargs)
1757        control = ma.array([(0, 0.0, 0j), (1, -999, 1j),
1758                            (-9, 2.2, -999j), (3, -99, 3j)],
1759                           mask=[(0, 0, 0), (0, 1, 0), (0, 0, 1), (0, 0, 0)],
1760                           dtype=mdtype)
1761        assert_equal(test, control)
1762        #
1763        basekwargs['dtype'] = mdtype
1764        test = np.genfromtxt(TextIO(data),
1765                            missing_values={0: -9, 1: -99, 2: -999j}, usemask=True, **basekwargs)
1766        control = ma.array([(0, 0.0, 0j), (1, -999, 1j),
1767                            (-9, 2.2, -999j), (3, -99, 3j)],
1768                           mask=[(0, 0, 0), (0, 1, 0), (1, 0, 1), (0, 1, 0)],
1769                           dtype=mdtype)
1770        assert_equal(test, control)
1771        #
1772        test = np.genfromtxt(TextIO(data),
1773                            missing_values={0: -9, 'B': -99, 'C': -999j},
1774                            usemask=True,
1775                            **basekwargs)
1776        control = ma.array([(0, 0.0, 0j), (1, -999, 1j),
1777                            (-9, 2.2, -999j), (3, -99, 3j)],
1778                           mask=[(0, 0, 0), (0, 1, 0), (1, 0, 1), (0, 1, 0)],
1779                           dtype=mdtype)
1780        assert_equal(test, control)
1781
1782    def test_user_filling_values(self):
1783        # Test with missing and filling values
1784        ctrl = np.array([(0, 3), (4, -999)], dtype=[('a', int), ('b', int)])
1785        data = "N/A, 2, 3\n4, ,???"
1786        kwargs = dict(delimiter=",",
1787                      dtype=int,
1788                      names="a,b,c",
1789                      missing_values={0: "N/A", 'b': " ", 2: "???"},
1790                      filling_values={0: 0, 'b': 0, 2: -999})
1791        test = np.genfromtxt(TextIO(data), **kwargs)
1792        ctrl = np.array([(0, 2, 3), (4, 0, -999)],
1793                        dtype=[(_, int) for _ in "abc"])
1794        assert_equal(test, ctrl)
1795        #
1796        test = np.genfromtxt(TextIO(data), usecols=(0, -1), **kwargs)
1797        ctrl = np.array([(0, 3), (4, -999)], dtype=[(_, int) for _ in "ac"])
1798        assert_equal(test, ctrl)
1799
1800        data2 = "1,2,*,4\n5,*,7,8\n"
1801        test = np.genfromtxt(TextIO(data2), delimiter=',', dtype=int,
1802                             missing_values="*", filling_values=0)
1803        ctrl = np.array([[1, 2, 0, 4], [5, 0, 7, 8]])
1804        assert_equal(test, ctrl)
1805        test = np.genfromtxt(TextIO(data2), delimiter=',', dtype=int,
1806                             missing_values="*", filling_values=-1)
1807        ctrl = np.array([[1, 2, -1, 4], [5, -1, 7, 8]])
1808        assert_equal(test, ctrl)
1809
1810    def test_withmissing_float(self):
1811        data = TextIO('A,B\n0,1.5\n2,-999.00')
1812        test = np.genfromtxt(data, dtype=None, delimiter=',',
1813                            missing_values='-999.0', names=True, usemask=True)
1814        control = ma.array([(0, 1.5), (2, -1.)],
1815                           mask=[(False, False), (False, True)],
1816                           dtype=[('A', int), ('B', float)])
1817        assert_equal(test, control)
1818        assert_equal(test.mask, control.mask)
1819
1820    def test_with_masked_column_uniform(self):
1821        # Test masked column
1822        data = TextIO('1 2 3\n4 5 6\n')
1823        test = np.genfromtxt(data, dtype=None,
1824                             missing_values='2,5', usemask=True)
1825        control = ma.array([[1, 2, 3], [4, 5, 6]], mask=[[0, 1, 0], [0, 1, 0]])
1826        assert_equal(test, control)
1827
1828    def test_with_masked_column_various(self):
1829        # Test masked column
1830        data = TextIO('True 2 3\nFalse 5 6\n')
1831        test = np.genfromtxt(data, dtype=None,
1832                             missing_values='2,5', usemask=True)
1833        control = ma.array([(1, 2, 3), (0, 5, 6)],
1834                           mask=[(0, 1, 0), (0, 1, 0)],
1835                           dtype=[('f0', bool), ('f1', bool), ('f2', int)])
1836        assert_equal(test, control)
1837
1838    def test_invalid_raise(self):
1839        # Test invalid raise
1840        data = ["1, 1, 1, 1, 1"] * 50
1841        for i in range(5):
1842            data[10 * i] = "2, 2, 2, 2 2"
1843        data.insert(0, "a, b, c, d, e")
1844        mdata = TextIO("\n".join(data))
1845
1846        kwargs = dict(delimiter=",", dtype=None, names=True)
1847        def f():
1848            return np.genfromtxt(mdata, invalid_raise=False, **kwargs)
1849        mtest = assert_warns(ConversionWarning, f)
1850        assert_equal(len(mtest), 45)
1851        assert_equal(mtest, np.ones(45, dtype=[(_, int) for _ in 'abcde']))
1852        #
1853        mdata.seek(0)
1854        assert_raises(ValueError, np.genfromtxt, mdata,
1855                      delimiter=",", names=True)
1856
1857    def test_invalid_raise_with_usecols(self):
1858        # Test invalid_raise with usecols
1859        data = ["1, 1, 1, 1, 1"] * 50
1860        for i in range(5):
1861            data[10 * i] = "2, 2, 2, 2 2"
1862        data.insert(0, "a, b, c, d, e")
1863        mdata = TextIO("\n".join(data))
1864
1865        kwargs = dict(delimiter=",", dtype=None, names=True,
1866                      invalid_raise=False)
1867        def f():
1868            return np.genfromtxt(mdata, usecols=(0, 4), **kwargs)
1869        mtest = assert_warns(ConversionWarning, f)
1870        assert_equal(len(mtest), 45)
1871        assert_equal(mtest, np.ones(45, dtype=[(_, int) for _ in 'ae']))
1872        #
1873        mdata.seek(0)
1874        mtest = np.genfromtxt(mdata, usecols=(0, 1), **kwargs)
1875        assert_equal(len(mtest), 50)
1876        control = np.ones(50, dtype=[(_, int) for _ in 'ab'])
1877        control[[10 * _ for _ in range(5)]] = (2, 2)
1878        assert_equal(mtest, control)
1879
1880    def test_inconsistent_dtype(self):
1881        # Test inconsistent dtype
1882        data = ["1, 1, 1, 1, -1.1"] * 50
1883        mdata = TextIO("\n".join(data))
1884
1885        converters = {4: lambda x: "(%s)" % x.decode()}
1886        kwargs = dict(delimiter=",", converters=converters,
1887                      dtype=[(_, int) for _ in 'abcde'],)
1888        assert_raises(ValueError, np.genfromtxt, mdata, **kwargs)
1889
1890    def test_default_field_format(self):
1891        # Test default format
1892        data = "0, 1, 2.3\n4, 5, 6.7"
1893        mtest = np.genfromtxt(TextIO(data),
1894                             delimiter=",", dtype=None, defaultfmt="f%02i")
1895        ctrl = np.array([(0, 1, 2.3), (4, 5, 6.7)],
1896                        dtype=[("f00", int), ("f01", int), ("f02", float)])
1897        assert_equal(mtest, ctrl)
1898
1899    def test_single_dtype_wo_names(self):
1900        # Test single dtype w/o names
1901        data = "0, 1, 2.3\n4, 5, 6.7"
1902        mtest = np.genfromtxt(TextIO(data),
1903                             delimiter=",", dtype=float, defaultfmt="f%02i")
1904        ctrl = np.array([[0., 1., 2.3], [4., 5., 6.7]], dtype=float)
1905        assert_equal(mtest, ctrl)
1906
1907    def test_single_dtype_w_explicit_names(self):
1908        # Test single dtype w explicit names
1909        data = "0, 1, 2.3\n4, 5, 6.7"
1910        mtest = np.genfromtxt(TextIO(data),
1911                             delimiter=",", dtype=float, names="a, b, c")
1912        ctrl = np.array([(0., 1., 2.3), (4., 5., 6.7)],
1913                        dtype=[(_, float) for _ in "abc"])
1914        assert_equal(mtest, ctrl)
1915
1916    def test_single_dtype_w_implicit_names(self):
1917        # Test single dtype w implicit names
1918        data = "a, b, c\n0, 1, 2.3\n4, 5, 6.7"
1919        mtest = np.genfromtxt(TextIO(data),
1920                             delimiter=",", dtype=float, names=True)
1921        ctrl = np.array([(0., 1., 2.3), (4., 5., 6.7)],
1922                        dtype=[(_, float) for _ in "abc"])
1923        assert_equal(mtest, ctrl)
1924
1925    def test_easy_structured_dtype(self):
1926        # Test easy structured dtype
1927        data = "0, 1, 2.3\n4, 5, 6.7"
1928        mtest = np.genfromtxt(TextIO(data), delimiter=",",
1929                             dtype=(int, float, float), defaultfmt="f_%02i")
1930        ctrl = np.array([(0, 1., 2.3), (4, 5., 6.7)],
1931                        dtype=[("f_00", int), ("f_01", float), ("f_02", float)])
1932        assert_equal(mtest, ctrl)
1933
1934    def test_autostrip(self):
1935        # Test autostrip
1936        data = "01/01/2003  , 1.3,   abcde"
1937        kwargs = dict(delimiter=",", dtype=None)
1938        with warnings.catch_warnings(record=True) as w:
1939            warnings.filterwarnings('always', '', np.VisibleDeprecationWarning)
1940            mtest = np.genfromtxt(TextIO(data), **kwargs)
1941            assert_(w[0].category is np.VisibleDeprecationWarning)
1942        ctrl = np.array([('01/01/2003  ', 1.3, '   abcde')],
1943                        dtype=[('f0', '|S12'), ('f1', float), ('f2', '|S8')])
1944        assert_equal(mtest, ctrl)
1945        with warnings.catch_warnings(record=True) as w:
1946            warnings.filterwarnings('always', '', np.VisibleDeprecationWarning)
1947            mtest = np.genfromtxt(TextIO(data), autostrip=True, **kwargs)
1948            assert_(w[0].category is np.VisibleDeprecationWarning)
1949        ctrl = np.array([('01/01/2003', 1.3, 'abcde')],
1950                        dtype=[('f0', '|S10'), ('f1', float), ('f2', '|S5')])
1951        assert_equal(mtest, ctrl)
1952
1953    def test_replace_space(self):
1954        # Test the 'replace_space' option
1955        txt = "A.A, B (B), C:C\n1, 2, 3.14"
1956        # Test default: replace ' ' by '_' and delete non-alphanum chars
1957        test = np.genfromtxt(TextIO(txt),
1958                             delimiter=",", names=True, dtype=None)
1959        ctrl_dtype = [("AA", int), ("B_B", int), ("CC", float)]
1960        ctrl = np.array((1, 2, 3.14), dtype=ctrl_dtype)
1961        assert_equal(test, ctrl)
1962        # Test: no replace, no delete
1963        test = np.genfromtxt(TextIO(txt),
1964                             delimiter=",", names=True, dtype=None,
1965                             replace_space='', deletechars='')
1966        ctrl_dtype = [("A.A", int), ("B (B)", int), ("C:C", float)]
1967        ctrl = np.array((1, 2, 3.14), dtype=ctrl_dtype)
1968        assert_equal(test, ctrl)
1969        # Test: no delete (spaces are replaced by _)
1970        test = np.genfromtxt(TextIO(txt),
1971                             delimiter=",", names=True, dtype=None,
1972                             deletechars='')
1973        ctrl_dtype = [("A.A", int), ("B_(B)", int), ("C:C", float)]
1974        ctrl = np.array((1, 2, 3.14), dtype=ctrl_dtype)
1975        assert_equal(test, ctrl)
1976
1977    def test_replace_space_known_dtype(self):
1978        # Test the 'replace_space' (and related) options when dtype != None
1979        txt = "A.A, B (B), C:C\n1, 2, 3"
1980        # Test default: replace ' ' by '_' and delete non-alphanum chars
1981        test = np.genfromtxt(TextIO(txt),
1982                             delimiter=",", names=True, dtype=int)
1983        ctrl_dtype = [("AA", int), ("B_B", int), ("CC", int)]
1984        ctrl = np.array((1, 2, 3), dtype=ctrl_dtype)
1985        assert_equal(test, ctrl)
1986        # Test: no replace, no delete
1987        test = np.genfromtxt(TextIO(txt),
1988                             delimiter=",", names=True, dtype=int,
1989                             replace_space='', deletechars='')
1990        ctrl_dtype = [("A.A", int), ("B (B)", int), ("C:C", int)]
1991        ctrl = np.array((1, 2, 3), dtype=ctrl_dtype)
1992        assert_equal(test, ctrl)
1993        # Test: no delete (spaces are replaced by _)
1994        test = np.genfromtxt(TextIO(txt),
1995                             delimiter=",", names=True, dtype=int,
1996                             deletechars='')
1997        ctrl_dtype = [("A.A", int), ("B_(B)", int), ("C:C", int)]
1998        ctrl = np.array((1, 2, 3), dtype=ctrl_dtype)
1999        assert_equal(test, ctrl)
2000
2001    def test_incomplete_names(self):
2002        # Test w/ incomplete names
2003        data = "A,,C\n0,1,2\n3,4,5"
2004        kwargs = dict(delimiter=",", names=True)
2005        # w/ dtype=None
2006        ctrl = np.array([(0, 1, 2), (3, 4, 5)],
2007                        dtype=[(_, int) for _ in ('A', 'f0', 'C')])
2008        test = np.genfromtxt(TextIO(data), dtype=None, **kwargs)
2009        assert_equal(test, ctrl)
2010        # w/ default dtype
2011        ctrl = np.array([(0, 1, 2), (3, 4, 5)],
2012                        dtype=[(_, float) for _ in ('A', 'f0', 'C')])
2013        test = np.genfromtxt(TextIO(data), **kwargs)
2014
2015    def test_names_auto_completion(self):
2016        # Make sure that names are properly completed
2017        data = "1 2 3\n 4 5 6"
2018        test = np.genfromtxt(TextIO(data),
2019                             dtype=(int, float, int), names="a")
2020        ctrl = np.array([(1, 2, 3), (4, 5, 6)],
2021                        dtype=[('a', int), ('f0', float), ('f1', int)])
2022        assert_equal(test, ctrl)
2023
2024    def test_names_with_usecols_bug1636(self):
2025        # Make sure we pick up the right names w/ usecols
2026        data = "A,B,C,D,E\n0,1,2,3,4\n0,1,2,3,4\n0,1,2,3,4"
2027        ctrl_names = ("A", "C", "E")
2028        test = np.genfromtxt(TextIO(data),
2029                             dtype=(int, int, int), delimiter=",",
2030                             usecols=(0, 2, 4), names=True)
2031        assert_equal(test.dtype.names, ctrl_names)
2032        #
2033        test = np.genfromtxt(TextIO(data),
2034                             dtype=(int, int, int), delimiter=",",
2035                             usecols=("A", "C", "E"), names=True)
2036        assert_equal(test.dtype.names, ctrl_names)
2037        #
2038        test = np.genfromtxt(TextIO(data),
2039                             dtype=int, delimiter=",",
2040                             usecols=("A", "C", "E"), names=True)
2041        assert_equal(test.dtype.names, ctrl_names)
2042
2043    def test_fixed_width_names(self):
2044        # Test fix-width w/ names
2045        data = "    A    B   C\n    0    1 2.3\n   45   67   9."
2046        kwargs = dict(delimiter=(5, 5, 4), names=True, dtype=None)
2047        ctrl = np.array([(0, 1, 2.3), (45, 67, 9.)],
2048                        dtype=[('A', int), ('B', int), ('C', float)])
2049        test = np.genfromtxt(TextIO(data), **kwargs)
2050        assert_equal(test, ctrl)
2051        #
2052        kwargs = dict(delimiter=5, names=True, dtype=None)
2053        ctrl = np.array([(0, 1, 2.3), (45, 67, 9.)],
2054                        dtype=[('A', int), ('B', int), ('C', float)])
2055        test = np.genfromtxt(TextIO(data), **kwargs)
2056        assert_equal(test, ctrl)
2057
2058    def test_filling_values(self):
2059        # Test missing values
2060        data = b"1, 2, 3\n1, , 5\n0, 6, \n"
2061        kwargs = dict(delimiter=",", dtype=None, filling_values=-999)
2062        ctrl = np.array([[1, 2, 3], [1, -999, 5], [0, 6, -999]], dtype=int)
2063        test = np.genfromtxt(TextIO(data), **kwargs)
2064        assert_equal(test, ctrl)
2065
2066    def test_comments_is_none(self):
2067        # Github issue 329 (None was previously being converted to 'None').
2068        with warnings.catch_warnings(record=True) as w:
2069            warnings.filterwarnings('always', '', np.VisibleDeprecationWarning)
2070            test = np.genfromtxt(TextIO("test1,testNonetherestofthedata"),
2071                                 dtype=None, comments=None, delimiter=',')
2072            assert_(w[0].category is np.VisibleDeprecationWarning)
2073        assert_equal(test[1], b'testNonetherestofthedata')
2074        with warnings.catch_warnings(record=True) as w:
2075            warnings.filterwarnings('always', '', np.VisibleDeprecationWarning)
2076            test = np.genfromtxt(TextIO("test1, testNonetherestofthedata"),
2077                                 dtype=None, comments=None, delimiter=',')
2078            assert_(w[0].category is np.VisibleDeprecationWarning)
2079        assert_equal(test[1], b' testNonetherestofthedata')
2080
2081    def test_latin1(self):
2082        latin1 = b'\xf6\xfc\xf6'
2083        norm = b"norm1,norm2,norm3\n"
2084        enc = b"test1,testNonethe" + latin1 + b",test3\n"
2085        s = norm + enc + norm
2086        with warnings.catch_warnings(record=True) as w:
2087            warnings.filterwarnings('always', '', np.VisibleDeprecationWarning)
2088            test = np.genfromtxt(TextIO(s),
2089                                 dtype=None, comments=None, delimiter=',')
2090            assert_(w[0].category is np.VisibleDeprecationWarning)
2091        assert_equal(test[1, 0], b"test1")
2092        assert_equal(test[1, 1], b"testNonethe" + latin1)
2093        assert_equal(test[1, 2], b"test3")
2094        test = np.genfromtxt(TextIO(s),
2095                             dtype=None, comments=None, delimiter=',',
2096                             encoding='latin1')
2097        assert_equal(test[1, 0], u"test1")
2098        assert_equal(test[1, 1], u"testNonethe" + latin1.decode('latin1'))
2099        assert_equal(test[1, 2], u"test3")
2100
2101        with warnings.catch_warnings(record=True) as w:
2102            warnings.filterwarnings('always', '', np.VisibleDeprecationWarning)
2103            test = np.genfromtxt(TextIO(b"0,testNonethe" + latin1),
2104                                 dtype=None, comments=None, delimiter=',')
2105            assert_(w[0].category is np.VisibleDeprecationWarning)
2106        assert_equal(test['f0'], 0)
2107        assert_equal(test['f1'], b"testNonethe" + latin1)
2108
2109    def test_binary_decode_autodtype(self):
2110        utf16 = b'\xff\xfeh\x04 \x00i\x04 \x00j\x04'
2111        v = self.loadfunc(BytesIO(utf16), dtype=None, encoding='UTF-16')
2112        assert_array_equal(v, np.array(utf16.decode('UTF-16').split()))
2113
2114    def test_utf8_byte_encoding(self):
2115        utf8 = b"\xcf\x96"
2116        norm = b"norm1,norm2,norm3\n"
2117        enc = b"test1,testNonethe" + utf8 + b",test3\n"
2118        s = norm + enc + norm
2119        with warnings.catch_warnings(record=True) as w:
2120            warnings.filterwarnings('always', '', np.VisibleDeprecationWarning)
2121            test = np.genfromtxt(TextIO(s),
2122                                 dtype=None, comments=None, delimiter=',')
2123            assert_(w[0].category is np.VisibleDeprecationWarning)
2124        ctl = np.array([
2125                 [b'norm1', b'norm2', b'norm3'],
2126                 [b'test1', b'testNonethe' + utf8, b'test3'],
2127                 [b'norm1', b'norm2', b'norm3']])
2128        assert_array_equal(test, ctl)
2129
2130    def test_utf8_file(self):
2131        utf8 = b"\xcf\x96"
2132        with temppath() as path:
2133            with open(path, "wb") as f:
2134                f.write((b"test1,testNonethe" + utf8 + b",test3\n") * 2)
2135            test = np.genfromtxt(path, dtype=None, comments=None,
2136                                 delimiter=',', encoding="UTF-8")
2137            ctl = np.array([
2138                     ["test1", "testNonethe" + utf8.decode("UTF-8"), "test3"],
2139                     ["test1", "testNonethe" + utf8.decode("UTF-8"), "test3"]],
2140                     dtype=np.unicode_)
2141            assert_array_equal(test, ctl)
2142
2143            # test a mixed dtype
2144            with open(path, "wb") as f:
2145                f.write(b"0,testNonethe" + utf8)
2146            test = np.genfromtxt(path, dtype=None, comments=None,
2147                                 delimiter=',', encoding="UTF-8")
2148            assert_equal(test['f0'], 0)
2149            assert_equal(test['f1'], "testNonethe" + utf8.decode("UTF-8"))
2150
2151    def test_utf8_file_nodtype_unicode(self):
2152        # bytes encoding with non-latin1 -> unicode upcast
2153        utf8 = u'\u03d6'
2154        latin1 = u'\xf6\xfc\xf6'
2155
2156        # skip test if cannot encode utf8 test string with preferred
2157        # encoding. The preferred encoding is assumed to be the default
2158        # encoding of io.open. Will need to change this for PyTest, maybe
2159        # using pytest.mark.xfail(raises=***).
2160        try:
2161            encoding = locale.getpreferredencoding()
2162            utf8.encode(encoding)
2163        except (UnicodeError, ImportError):
2164            pytest.skip('Skipping test_utf8_file_nodtype_unicode, '
2165                        'unable to encode utf8 in preferred encoding')
2166
2167        with temppath() as path:
2168            with io.open(path, "wt") as f:
2169                f.write(u"norm1,norm2,norm3\n")
2170                f.write(u"norm1," + latin1 + u",norm3\n")
2171                f.write(u"test1,testNonethe" + utf8 + u",test3\n")
2172            with warnings.catch_warnings(record=True) as w:
2173                warnings.filterwarnings('always', '',
2174                                        np.VisibleDeprecationWarning)
2175                test = np.genfromtxt(path, dtype=None, comments=None,
2176                                     delimiter=',')
2177                # Check for warning when encoding not specified.
2178                assert_(w[0].category is np.VisibleDeprecationWarning)
2179            ctl = np.array([
2180                     ["norm1", "norm2", "norm3"],
2181                     ["norm1", latin1, "norm3"],
2182                     ["test1", "testNonethe" + utf8, "test3"]],
2183                     dtype=np.unicode_)
2184            assert_array_equal(test, ctl)
2185
2186    def test_recfromtxt(self):
2187        #
2188        data = TextIO('A,B\n0,1\n2,3')
2189        kwargs = dict(delimiter=",", missing_values="N/A", names=True)
2190        test = np.recfromtxt(data, **kwargs)
2191        control = np.array([(0, 1), (2, 3)],
2192                           dtype=[('A', int), ('B', int)])
2193        assert_(isinstance(test, np.recarray))
2194        assert_equal(test, control)
2195        #
2196        data = TextIO('A,B\n0,1\n2,N/A')
2197        test = np.recfromtxt(data, dtype=None, usemask=True, **kwargs)
2198        control = ma.array([(0, 1), (2, -1)],
2199                           mask=[(False, False), (False, True)],
2200                           dtype=[('A', int), ('B', int)])
2201        assert_equal(test, control)
2202        assert_equal(test.mask, control.mask)
2203        assert_equal(test.A, [0, 2])
2204
2205    def test_recfromcsv(self):
2206        #
2207        data = TextIO('A,B\n0,1\n2,3')
2208        kwargs = dict(missing_values="N/A", names=True, case_sensitive=True)
2209        test = np.recfromcsv(data, dtype=None, **kwargs)
2210        control = np.array([(0, 1), (2, 3)],
2211                           dtype=[('A', int), ('B', int)])
2212        assert_(isinstance(test, np.recarray))
2213        assert_equal(test, control)
2214        #
2215        data = TextIO('A,B\n0,1\n2,N/A')
2216        test = np.recfromcsv(data, dtype=None, usemask=True, **kwargs)
2217        control = ma.array([(0, 1), (2, -1)],
2218                           mask=[(False, False), (False, True)],
2219                           dtype=[('A', int), ('B', int)])
2220        assert_equal(test, control)
2221        assert_equal(test.mask, control.mask)
2222        assert_equal(test.A, [0, 2])
2223        #
2224        data = TextIO('A,B\n0,1\n2,3')
2225        test = np.recfromcsv(data, missing_values='N/A',)
2226        control = np.array([(0, 1), (2, 3)],
2227                           dtype=[('a', int), ('b', int)])
2228        assert_(isinstance(test, np.recarray))
2229        assert_equal(test, control)
2230        #
2231        data = TextIO('A,B\n0,1\n2,3')
2232        dtype = [('a', int), ('b', float)]
2233        test = np.recfromcsv(data, missing_values='N/A', dtype=dtype)
2234        control = np.array([(0, 1), (2, 3)],
2235                           dtype=dtype)
2236        assert_(isinstance(test, np.recarray))
2237        assert_equal(test, control)
2238
2239        #gh-10394
2240        data = TextIO('color\n"red"\n"blue"')
2241        test = np.recfromcsv(data, converters={0: lambda x: x.strip(b'\"')})
2242        control = np.array([('red',), ('blue',)], dtype=[('color', (bytes, 4))])
2243        assert_equal(test.dtype, control.dtype)
2244        assert_equal(test, control)
2245
2246    def test_max_rows(self):
2247        # Test the `max_rows` keyword argument.
2248        data = '1 2\n3 4\n5 6\n7 8\n9 10\n'
2249        txt = TextIO(data)
2250        a1 = np.genfromtxt(txt, max_rows=3)
2251        a2 = np.genfromtxt(txt)
2252        assert_equal(a1, [[1, 2], [3, 4], [5, 6]])
2253        assert_equal(a2, [[7, 8], [9, 10]])
2254
2255        # max_rows must be at least 1.
2256        assert_raises(ValueError, np.genfromtxt, TextIO(data), max_rows=0)
2257
2258        # An input with several invalid rows.
2259        data = '1 1\n2 2\n0 \n3 3\n4 4\n5  \n6  \n7  \n'
2260
2261        test = np.genfromtxt(TextIO(data), max_rows=2)
2262        control = np.array([[1., 1.], [2., 2.]])
2263        assert_equal(test, control)
2264
2265        # Test keywords conflict
2266        assert_raises(ValueError, np.genfromtxt, TextIO(data), skip_footer=1,
2267                      max_rows=4)
2268
2269        # Test with invalid value
2270        assert_raises(ValueError, np.genfromtxt, TextIO(data), max_rows=4)
2271
2272        # Test with invalid not raise
2273        with suppress_warnings() as sup:
2274            sup.filter(ConversionWarning)
2275
2276            test = np.genfromtxt(TextIO(data), max_rows=4, invalid_raise=False)
2277            control = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]])
2278            assert_equal(test, control)
2279
2280            test = np.genfromtxt(TextIO(data), max_rows=5, invalid_raise=False)
2281            control = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]])
2282            assert_equal(test, control)
2283
2284        # Structured array with field names.
2285        data = 'a b\n#c d\n1 1\n2 2\n#0 \n3 3\n4 4\n5  5\n'
2286
2287        # Test with header, names and comments
2288        txt = TextIO(data)
2289        test = np.genfromtxt(txt, skip_header=1, max_rows=3, names=True)
2290        control = np.array([(1.0, 1.0), (2.0, 2.0), (3.0, 3.0)],
2291                      dtype=[('c', '<f8'), ('d', '<f8')])
2292        assert_equal(test, control)
2293        # To continue reading the same "file", don't use skip_header or
2294        # names, and use the previously determined dtype.
2295        test = np.genfromtxt(txt, max_rows=None, dtype=test.dtype)
2296        control = np.array([(4.0, 4.0), (5.0, 5.0)],
2297                      dtype=[('c', '<f8'), ('d', '<f8')])
2298        assert_equal(test, control)
2299
2300    def test_gft_using_filename(self):
2301        # Test that we can load data from a filename as well as a file
2302        # object
2303        tgt = np.arange(6).reshape((2, 3))
2304        linesep = ('\n', '\r\n', '\r')
2305
2306        for sep in linesep:
2307            data = '0 1 2' + sep + '3 4 5'
2308            with temppath() as name:
2309                with open(name, 'w') as f:
2310                    f.write(data)
2311                res = np.genfromtxt(name)
2312            assert_array_equal(res, tgt)
2313
2314    def test_gft_from_gzip(self):
2315        # Test that we can load data from a gzipped file
2316        wanted = np.arange(6).reshape((2, 3))
2317        linesep = ('\n', '\r\n', '\r')
2318
2319        for sep in linesep:
2320            data = '0 1 2' + sep + '3 4 5'
2321            s = BytesIO()
2322            with gzip.GzipFile(fileobj=s, mode='w') as g:
2323                g.write(asbytes(data))
2324
2325            with temppath(suffix='.gz2') as name:
2326                with open(name, 'w') as f:
2327                    f.write(data)
2328                assert_array_equal(np.genfromtxt(name), wanted)
2329
2330    def test_gft_using_generator(self):
2331        # gft doesn't work with unicode.
2332        def count():
2333            for i in range(10):
2334                yield asbytes("%d" % i)
2335
2336        res = np.genfromtxt(count())
2337        assert_array_equal(res, np.arange(10))
2338
2339    def test_auto_dtype_largeint(self):
2340        # Regression test for numpy/numpy#5635 whereby large integers could
2341        # cause OverflowErrors.
2342
2343        # Test the automatic definition of the output dtype
2344        #
2345        # 2**66 = 73786976294838206464 => should convert to float
2346        # 2**34 = 17179869184 => should convert to int64
2347        # 2**10 = 1024 => should convert to int (int32 on 32-bit systems,
2348        #                 int64 on 64-bit systems)
2349
2350        data = TextIO('73786976294838206464 17179869184 1024')
2351
2352        test = np.genfromtxt(data, dtype=None)
2353
2354        assert_equal(test.dtype.names, ['f0', 'f1', 'f2'])
2355
2356        assert_(test.dtype['f0'] == float)
2357        assert_(test.dtype['f1'] == np.int64)
2358        assert_(test.dtype['f2'] == np.int_)
2359
2360        assert_allclose(test['f0'], 73786976294838206464.)
2361        assert_equal(test['f1'], 17179869184)
2362        assert_equal(test['f2'], 1024)
2363
2364    def test_unpack_structured(self):
2365        # Regression test for gh-4341
2366        # Unpacking should work on structured arrays
2367        txt = TextIO("M 21 72\nF 35 58")
2368        dt = {'names': ('a', 'b', 'c'), 'formats': ('S1', 'i4', 'f4')}
2369        a, b, c = np.genfromtxt(txt, dtype=dt, unpack=True)
2370        assert_equal(a.dtype, np.dtype('S1'))
2371        assert_equal(b.dtype, np.dtype('i4'))
2372        assert_equal(c.dtype, np.dtype('f4'))
2373        assert_array_equal(a, np.array([b'M', b'F']))
2374        assert_array_equal(b, np.array([21, 35]))
2375        assert_array_equal(c, np.array([72.,  58.]))
2376
2377    def test_unpack_auto_dtype(self):
2378        # Regression test for gh-4341
2379        # Unpacking should work when dtype=None
2380        txt = TextIO("M 21 72.\nF 35 58.")
2381        expected = (np.array(["M", "F"]), np.array([21, 35]), np.array([72., 58.]))
2382        test = np.genfromtxt(txt, dtype=None, unpack=True, encoding="utf-8")
2383        for arr, result in zip(expected, test):
2384            assert_array_equal(arr, result)
2385            assert_equal(arr.dtype, result.dtype)
2386
2387    def test_unpack_single_name(self):
2388        # Regression test for gh-4341
2389        # Unpacking should work when structured dtype has only one field
2390        txt = TextIO("21\n35")
2391        dt = {'names': ('a',), 'formats': ('i4',)}
2392        expected = np.array([21, 35], dtype=np.int32)
2393        test = np.genfromtxt(txt, dtype=dt, unpack=True)
2394        assert_array_equal(expected, test)
2395        assert_equal(expected.dtype, test.dtype)
2396
2397    def test_squeeze_scalar(self):
2398        # Regression test for gh-4341
2399        # Unpacking a scalar should give zero-dim output,
2400        # even if dtype is structured
2401        txt = TextIO("1")
2402        dt = {'names': ('a',), 'formats': ('i4',)}
2403        expected = np.array((1,), dtype=np.int32)
2404        test = np.genfromtxt(txt, dtype=dt, unpack=True)
2405        assert_array_equal(expected, test)
2406        assert_equal((), test.shape)
2407        assert_equal(expected.dtype, test.dtype)
2408
2409
2410class TestPathUsage:
2411    # Test that pathlib.Path can be used
2412    def test_loadtxt(self):
2413        with temppath(suffix='.txt') as path:
2414            path = Path(path)
2415            a = np.array([[1.1, 2], [3, 4]])
2416            np.savetxt(path, a)
2417            x = np.loadtxt(path)
2418            assert_array_equal(x, a)
2419
2420    def test_save_load(self):
2421        # Test that pathlib.Path instances can be used with save.
2422        with temppath(suffix='.npy') as path:
2423            path = Path(path)
2424            a = np.array([[1, 2], [3, 4]], int)
2425            np.save(path, a)
2426            data = np.load(path)
2427            assert_array_equal(data, a)
2428
2429    def test_save_load_memmap(self):
2430        # Test that pathlib.Path instances can be loaded mem-mapped.
2431        with temppath(suffix='.npy') as path:
2432            path = Path(path)
2433            a = np.array([[1, 2], [3, 4]], int)
2434            np.save(path, a)
2435            data = np.load(path, mmap_mode='r')
2436            assert_array_equal(data, a)
2437            # close the mem-mapped file
2438            del data
2439            if IS_PYPY:
2440                break_cycles()
2441                break_cycles()
2442
2443    def test_save_load_memmap_readwrite(self):
2444        # Test that pathlib.Path instances can be written mem-mapped.
2445        with temppath(suffix='.npy') as path:
2446            path = Path(path)
2447            a = np.array([[1, 2], [3, 4]], int)
2448            np.save(path, a)
2449            b = np.load(path, mmap_mode='r+')
2450            a[0][0] = 5
2451            b[0][0] = 5
2452            del b  # closes the file
2453            if IS_PYPY:
2454                break_cycles()
2455                break_cycles()
2456            data = np.load(path)
2457            assert_array_equal(data, a)
2458
2459    def test_savez_load(self):
2460        # Test that pathlib.Path instances can be used with savez.
2461        with temppath(suffix='.npz') as path:
2462            path = Path(path)
2463            np.savez(path, lab='place holder')
2464            with np.load(path) as data:
2465                assert_array_equal(data['lab'], 'place holder')
2466
2467    def test_savez_compressed_load(self):
2468        # Test that pathlib.Path instances can be used with savez.
2469        with temppath(suffix='.npz') as path:
2470            path = Path(path)
2471            np.savez_compressed(path, lab='place holder')
2472            data = np.load(path)
2473            assert_array_equal(data['lab'], 'place holder')
2474            data.close()
2475
2476    def test_genfromtxt(self):
2477        with temppath(suffix='.txt') as path:
2478            path = Path(path)
2479            a = np.array([(1, 2), (3, 4)])
2480            np.savetxt(path, a)
2481            data = np.genfromtxt(path)
2482            assert_array_equal(a, data)
2483
2484    def test_ndfromtxt(self):
2485        # Test outputting a standard ndarray
2486        with temppath(suffix='.txt') as path:
2487            path = Path(path)
2488            with path.open('w') as f:
2489                f.write(u'1 2\n3 4')
2490
2491            control = np.array([[1, 2], [3, 4]], dtype=int)
2492            test = np.genfromtxt(path, dtype=int)
2493            assert_array_equal(test, control)
2494
2495    def test_mafromtxt(self):
2496        # From `test_fancy_dtype_alt` above
2497        with temppath(suffix='.txt') as path:
2498            path = Path(path)
2499            with path.open('w') as f:
2500                f.write(u'1,2,3.0\n4,5,6.0\n')
2501
2502            test = np.genfromtxt(path, delimiter=',', usemask=True)
2503            control = ma.array([(1.0, 2.0, 3.0), (4.0, 5.0, 6.0)])
2504            assert_equal(test, control)
2505
2506    def test_recfromtxt(self):
2507        with temppath(suffix='.txt') as path:
2508            path = Path(path)
2509            with path.open('w') as f:
2510                f.write(u'A,B\n0,1\n2,3')
2511
2512            kwargs = dict(delimiter=",", missing_values="N/A", names=True)
2513            test = np.recfromtxt(path, **kwargs)
2514            control = np.array([(0, 1), (2, 3)],
2515                               dtype=[('A', int), ('B', int)])
2516            assert_(isinstance(test, np.recarray))
2517            assert_equal(test, control)
2518
2519    def test_recfromcsv(self):
2520        with temppath(suffix='.txt') as path:
2521            path = Path(path)
2522            with path.open('w') as f:
2523                f.write(u'A,B\n0,1\n2,3')
2524
2525            kwargs = dict(missing_values="N/A", names=True, case_sensitive=True)
2526            test = np.recfromcsv(path, dtype=None, **kwargs)
2527            control = np.array([(0, 1), (2, 3)],
2528                               dtype=[('A', int), ('B', int)])
2529            assert_(isinstance(test, np.recarray))
2530            assert_equal(test, control)
2531
2532
2533def test_gzip_load():
2534    a = np.random.random((5, 5))
2535
2536    s = BytesIO()
2537    f = gzip.GzipFile(fileobj=s, mode="w")
2538
2539    np.save(f, a)
2540    f.close()
2541    s.seek(0)
2542
2543    f = gzip.GzipFile(fileobj=s, mode="r")
2544    assert_array_equal(np.load(f), a)
2545
2546
2547# These next two classes encode the minimal API needed to save()/load() arrays.
2548# The `test_ducktyping` ensures they work correctly
2549class JustWriter:
2550    def __init__(self, base):
2551        self.base = base
2552
2553    def write(self, s):
2554        return self.base.write(s)
2555
2556    def flush(self):
2557        return self.base.flush()
2558
2559class JustReader:
2560    def __init__(self, base):
2561        self.base = base
2562
2563    def read(self, n):
2564        return self.base.read(n)
2565
2566    def seek(self, off, whence=0):
2567        return self.base.seek(off, whence)
2568
2569
2570def test_ducktyping():
2571    a = np.random.random((5, 5))
2572
2573    s = BytesIO()
2574    f = JustWriter(s)
2575
2576    np.save(f, a)
2577    f.flush()
2578    s.seek(0)
2579
2580    f = JustReader(s)
2581    assert_array_equal(np.load(f), a)
2582
2583
2584
2585def test_gzip_loadtxt():
2586    # Thanks to another windows brokenness, we can't use
2587    # NamedTemporaryFile: a file created from this function cannot be
2588    # reopened by another open call. So we first put the gzipped string
2589    # of the test reference array, write it to a securely opened file,
2590    # which is then read from by the loadtxt function
2591    s = BytesIO()
2592    g = gzip.GzipFile(fileobj=s, mode='w')
2593    g.write(b'1 2 3\n')
2594    g.close()
2595
2596    s.seek(0)
2597    with temppath(suffix='.gz') as name:
2598        with open(name, 'wb') as f:
2599            f.write(s.read())
2600        res = np.loadtxt(name)
2601    s.close()
2602
2603    assert_array_equal(res, [1, 2, 3])
2604
2605
2606def test_gzip_loadtxt_from_string():
2607    s = BytesIO()
2608    f = gzip.GzipFile(fileobj=s, mode="w")
2609    f.write(b'1 2 3\n')
2610    f.close()
2611    s.seek(0)
2612
2613    f = gzip.GzipFile(fileobj=s, mode="r")
2614    assert_array_equal(np.loadtxt(f), [1, 2, 3])
2615
2616
2617def test_npzfile_dict():
2618    s = BytesIO()
2619    x = np.zeros((3, 3))
2620    y = np.zeros((3, 3))
2621
2622    np.savez(s, x=x, y=y)
2623    s.seek(0)
2624
2625    z = np.load(s)
2626
2627    assert_('x' in z)
2628    assert_('y' in z)
2629    assert_('x' in z.keys())
2630    assert_('y' in z.keys())
2631
2632    for f, a in z.items():
2633        assert_(f in ['x', 'y'])
2634        assert_equal(a.shape, (3, 3))
2635
2636    assert_(len(z.items()) == 2)
2637
2638    for f in z:
2639        assert_(f in ['x', 'y'])
2640
2641    assert_('x' in z.keys())
2642
2643
2644@pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts")
2645def test_load_refcount():
2646    # Check that objects returned by np.load are directly freed based on
2647    # their refcount, rather than needing the gc to collect them.
2648
2649    f = BytesIO()
2650    np.savez(f, [1, 2, 3])
2651    f.seek(0)
2652
2653    with assert_no_gc_cycles():
2654        np.load(f)
2655
2656    f.seek(0)
2657    dt = [("a", 'u1', 2), ("b", 'u1', 2)]
2658    with assert_no_gc_cycles():
2659        x = np.loadtxt(TextIO("0 1 2 3"), dtype=dt)
2660        assert_equal(x, np.array([((0, 1), (2, 3))], dtype=dt))
2661