1import sys 2import gc 3import gzip 4import os 5import threading 6import time 7import warnings 8import io 9import re 10import pytest 11from pathlib import Path 12from tempfile import NamedTemporaryFile 13from io import BytesIO, StringIO 14from datetime import datetime 15import locale 16from multiprocessing import Process, Value 17from ctypes import c_bool 18 19import numpy as np 20import numpy.ma as ma 21from numpy.lib._iotools import ConverterError, ConversionWarning 22from numpy.compat import asbytes 23from numpy.ma.testutils import assert_equal 24from numpy.testing import ( 25 assert_warns, assert_, assert_raises_regex, assert_raises, 26 assert_allclose, assert_array_equal, temppath, tempdir, IS_PYPY, 27 HAS_REFCOUNT, suppress_warnings, assert_no_gc_cycles, assert_no_warnings, 28 break_cycles 29 ) 30from numpy.testing._private.utils import requires_memory 31 32 33class TextIO(BytesIO): 34 """Helper IO class. 35 36 Writes encode strings to bytes if needed, reads return bytes. 37 This makes it easier to emulate files opened in binary mode 38 without needing to explicitly convert strings to bytes in 39 setting up the test data. 40 41 """ 42 def __init__(self, s=""): 43 BytesIO.__init__(self, asbytes(s)) 44 45 def write(self, s): 46 BytesIO.write(self, asbytes(s)) 47 48 def writelines(self, lines): 49 BytesIO.writelines(self, [asbytes(s) for s in lines]) 50 51 52IS_64BIT = sys.maxsize > 2**32 53try: 54 import bz2 55 HAS_BZ2 = True 56except ImportError: 57 HAS_BZ2 = False 58try: 59 import lzma 60 HAS_LZMA = True 61except ImportError: 62 HAS_LZMA = False 63 64 65def strptime(s, fmt=None): 66 """ 67 This function is available in the datetime module only from Python >= 68 2.5. 69 70 """ 71 if type(s) == bytes: 72 s = s.decode("latin1") 73 return datetime(*time.strptime(s, fmt)[:3]) 74 75 76class RoundtripTest: 77 def roundtrip(self, save_func, *args, **kwargs): 78 """ 79 save_func : callable 80 Function used to save arrays to file. 81 file_on_disk : bool 82 If true, store the file on disk, instead of in a 83 string buffer. 84 save_kwds : dict 85 Parameters passed to `save_func`. 86 load_kwds : dict 87 Parameters passed to `numpy.load`. 88 args : tuple of arrays 89 Arrays stored to file. 90 91 """ 92 save_kwds = kwargs.get('save_kwds', {}) 93 load_kwds = kwargs.get('load_kwds', {"allow_pickle": True}) 94 file_on_disk = kwargs.get('file_on_disk', False) 95 96 if file_on_disk: 97 target_file = NamedTemporaryFile(delete=False) 98 load_file = target_file.name 99 else: 100 target_file = BytesIO() 101 load_file = target_file 102 103 try: 104 arr = args 105 106 save_func(target_file, *arr, **save_kwds) 107 target_file.flush() 108 target_file.seek(0) 109 110 if sys.platform == 'win32' and not isinstance(target_file, BytesIO): 111 target_file.close() 112 113 arr_reloaded = np.load(load_file, **load_kwds) 114 115 self.arr = arr 116 self.arr_reloaded = arr_reloaded 117 finally: 118 if not isinstance(target_file, BytesIO): 119 target_file.close() 120 # holds an open file descriptor so it can't be deleted on win 121 if 'arr_reloaded' in locals(): 122 if not isinstance(arr_reloaded, np.lib.npyio.NpzFile): 123 os.remove(target_file.name) 124 125 def check_roundtrips(self, a): 126 self.roundtrip(a) 127 self.roundtrip(a, file_on_disk=True) 128 self.roundtrip(np.asfortranarray(a)) 129 self.roundtrip(np.asfortranarray(a), file_on_disk=True) 130 if a.shape[0] > 1: 131 # neither C nor Fortran contiguous for 2D arrays or more 132 self.roundtrip(np.asfortranarray(a)[1:]) 133 self.roundtrip(np.asfortranarray(a)[1:], file_on_disk=True) 134 135 def test_array(self): 136 a = np.array([], float) 137 self.check_roundtrips(a) 138 139 a = np.array([[1, 2], [3, 4]], float) 140 self.check_roundtrips(a) 141 142 a = np.array([[1, 2], [3, 4]], int) 143 self.check_roundtrips(a) 144 145 a = np.array([[1 + 5j, 2 + 6j], [3 + 7j, 4 + 8j]], dtype=np.csingle) 146 self.check_roundtrips(a) 147 148 a = np.array([[1 + 5j, 2 + 6j], [3 + 7j, 4 + 8j]], dtype=np.cdouble) 149 self.check_roundtrips(a) 150 151 def test_array_object(self): 152 a = np.array([], object) 153 self.check_roundtrips(a) 154 155 a = np.array([[1, 2], [3, 4]], object) 156 self.check_roundtrips(a) 157 158 def test_1D(self): 159 a = np.array([1, 2, 3, 4], int) 160 self.roundtrip(a) 161 162 @pytest.mark.skipif(sys.platform == 'win32', reason="Fails on Win32") 163 def test_mmap(self): 164 a = np.array([[1, 2.5], [4, 7.3]]) 165 self.roundtrip(a, file_on_disk=True, load_kwds={'mmap_mode': 'r'}) 166 167 a = np.asfortranarray([[1, 2.5], [4, 7.3]]) 168 self.roundtrip(a, file_on_disk=True, load_kwds={'mmap_mode': 'r'}) 169 170 def test_record(self): 171 a = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')]) 172 self.check_roundtrips(a) 173 174 @pytest.mark.slow 175 def test_format_2_0(self): 176 dt = [(("%d" % i) * 100, float) for i in range(500)] 177 a = np.ones(1000, dtype=dt) 178 with warnings.catch_warnings(record=True): 179 warnings.filterwarnings('always', '', UserWarning) 180 self.check_roundtrips(a) 181 182 183class TestSaveLoad(RoundtripTest): 184 def roundtrip(self, *args, **kwargs): 185 RoundtripTest.roundtrip(self, np.save, *args, **kwargs) 186 assert_equal(self.arr[0], self.arr_reloaded) 187 assert_equal(self.arr[0].dtype, self.arr_reloaded.dtype) 188 assert_equal(self.arr[0].flags.fnc, self.arr_reloaded.flags.fnc) 189 190 191class TestSavezLoad(RoundtripTest): 192 def roundtrip(self, *args, **kwargs): 193 RoundtripTest.roundtrip(self, np.savez, *args, **kwargs) 194 try: 195 for n, arr in enumerate(self.arr): 196 reloaded = self.arr_reloaded['arr_%d' % n] 197 assert_equal(arr, reloaded) 198 assert_equal(arr.dtype, reloaded.dtype) 199 assert_equal(arr.flags.fnc, reloaded.flags.fnc) 200 finally: 201 # delete tempfile, must be done here on windows 202 if self.arr_reloaded.fid: 203 self.arr_reloaded.fid.close() 204 os.remove(self.arr_reloaded.fid.name) 205 206 @pytest.mark.skipif(not IS_64BIT, reason="Needs 64bit platform") 207 @pytest.mark.slow 208 def test_big_arrays(self): 209 L = (1 << 31) + 100000 210 a = np.empty(L, dtype=np.uint8) 211 with temppath(prefix="numpy_test_big_arrays_", suffix=".npz") as tmp: 212 np.savez(tmp, a=a) 213 del a 214 npfile = np.load(tmp) 215 a = npfile['a'] # Should succeed 216 npfile.close() 217 del a # Avoid pyflakes unused variable warning. 218 219 def test_multiple_arrays(self): 220 a = np.array([[1, 2], [3, 4]], float) 221 b = np.array([[1 + 2j, 2 + 7j], [3 - 6j, 4 + 12j]], complex) 222 self.roundtrip(a, b) 223 224 def test_named_arrays(self): 225 a = np.array([[1, 2], [3, 4]], float) 226 b = np.array([[1 + 2j, 2 + 7j], [3 - 6j, 4 + 12j]], complex) 227 c = BytesIO() 228 np.savez(c, file_a=a, file_b=b) 229 c.seek(0) 230 l = np.load(c) 231 assert_equal(a, l['file_a']) 232 assert_equal(b, l['file_b']) 233 234 def test_BagObj(self): 235 a = np.array([[1, 2], [3, 4]], float) 236 b = np.array([[1 + 2j, 2 + 7j], [3 - 6j, 4 + 12j]], complex) 237 c = BytesIO() 238 np.savez(c, file_a=a, file_b=b) 239 c.seek(0) 240 l = np.load(c) 241 assert_equal(sorted(dir(l.f)), ['file_a','file_b']) 242 assert_equal(a, l.f.file_a) 243 assert_equal(b, l.f.file_b) 244 245 def test_savez_filename_clashes(self): 246 # Test that issue #852 is fixed 247 # and savez functions in multithreaded environment 248 249 def writer(error_list): 250 with temppath(suffix='.npz') as tmp: 251 arr = np.random.randn(500, 500) 252 try: 253 np.savez(tmp, arr=arr) 254 except OSError as err: 255 error_list.append(err) 256 257 errors = [] 258 threads = [threading.Thread(target=writer, args=(errors,)) 259 for j in range(3)] 260 for t in threads: 261 t.start() 262 for t in threads: 263 t.join() 264 265 if errors: 266 raise AssertionError(errors) 267 268 def test_not_closing_opened_fid(self): 269 # Test that issue #2178 is fixed: 270 # verify could seek on 'loaded' file 271 with temppath(suffix='.npz') as tmp: 272 with open(tmp, 'wb') as fp: 273 np.savez(fp, data='LOVELY LOAD') 274 with open(tmp, 'rb', 10000) as fp: 275 fp.seek(0) 276 assert_(not fp.closed) 277 np.load(fp)['data'] 278 # fp must not get closed by .load 279 assert_(not fp.closed) 280 fp.seek(0) 281 assert_(not fp.closed) 282 283 @pytest.mark.slow_pypy 284 def test_closing_fid(self): 285 # Test that issue #1517 (too many opened files) remains closed 286 # It might be a "weak" test since failed to get triggered on 287 # e.g. Debian sid of 2012 Jul 05 but was reported to 288 # trigger the failure on Ubuntu 10.04: 289 # http://projects.scipy.org/numpy/ticket/1517#comment:2 290 with temppath(suffix='.npz') as tmp: 291 np.savez(tmp, data='LOVELY LOAD') 292 # We need to check if the garbage collector can properly close 293 # numpy npz file returned by np.load when their reference count 294 # goes to zero. Python 3 running in debug mode raises a 295 # ResourceWarning when file closing is left to the garbage 296 # collector, so we catch the warnings. 297 with suppress_warnings() as sup: 298 sup.filter(ResourceWarning) # TODO: specify exact message 299 for i in range(1, 1025): 300 try: 301 np.load(tmp)["data"] 302 except Exception as e: 303 msg = "Failed to load data from a file: %s" % e 304 raise AssertionError(msg) 305 finally: 306 if IS_PYPY: 307 gc.collect() 308 309 def test_closing_zipfile_after_load(self): 310 # Check that zipfile owns file and can close it. This needs to 311 # pass a file name to load for the test. On windows failure will 312 # cause a second error will be raised when the attempt to remove 313 # the open file is made. 314 prefix = 'numpy_test_closing_zipfile_after_load_' 315 with temppath(suffix='.npz', prefix=prefix) as tmp: 316 np.savez(tmp, lab='place holder') 317 data = np.load(tmp) 318 fp = data.zip.fp 319 data.close() 320 assert_(fp.closed) 321 322 323class TestSaveTxt: 324 def test_array(self): 325 a = np.array([[1, 2], [3, 4]], float) 326 fmt = "%.18e" 327 c = BytesIO() 328 np.savetxt(c, a, fmt=fmt) 329 c.seek(0) 330 assert_equal(c.readlines(), 331 [asbytes((fmt + ' ' + fmt + '\n') % (1, 2)), 332 asbytes((fmt + ' ' + fmt + '\n') % (3, 4))]) 333 334 a = np.array([[1, 2], [3, 4]], int) 335 c = BytesIO() 336 np.savetxt(c, a, fmt='%d') 337 c.seek(0) 338 assert_equal(c.readlines(), [b'1 2\n', b'3 4\n']) 339 340 def test_1D(self): 341 a = np.array([1, 2, 3, 4], int) 342 c = BytesIO() 343 np.savetxt(c, a, fmt='%d') 344 c.seek(0) 345 lines = c.readlines() 346 assert_equal(lines, [b'1\n', b'2\n', b'3\n', b'4\n']) 347 348 def test_0D_3D(self): 349 c = BytesIO() 350 assert_raises(ValueError, np.savetxt, c, np.array(1)) 351 assert_raises(ValueError, np.savetxt, c, np.array([[[1], [2]]])) 352 353 def test_structured(self): 354 a = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')]) 355 c = BytesIO() 356 np.savetxt(c, a, fmt='%d') 357 c.seek(0) 358 assert_equal(c.readlines(), [b'1 2\n', b'3 4\n']) 359 360 def test_structured_padded(self): 361 # gh-13297 362 a = np.array([(1, 2, 3),(4, 5, 6)], dtype=[ 363 ('foo', 'i4'), ('bar', 'i4'), ('baz', 'i4') 364 ]) 365 c = BytesIO() 366 np.savetxt(c, a[['foo', 'baz']], fmt='%d') 367 c.seek(0) 368 assert_equal(c.readlines(), [b'1 3\n', b'4 6\n']) 369 370 def test_multifield_view(self): 371 a = np.ones(1, dtype=[('x', 'i4'), ('y', 'i4'), ('z', 'f4')]) 372 v = a[['x', 'z']] 373 with temppath(suffix='.npy') as path: 374 path = Path(path) 375 np.save(path, v) 376 data = np.load(path) 377 assert_array_equal(data, v) 378 379 def test_delimiter(self): 380 a = np.array([[1., 2.], [3., 4.]]) 381 c = BytesIO() 382 np.savetxt(c, a, delimiter=',', fmt='%d') 383 c.seek(0) 384 assert_equal(c.readlines(), [b'1,2\n', b'3,4\n']) 385 386 def test_format(self): 387 a = np.array([(1, 2), (3, 4)]) 388 c = BytesIO() 389 # Sequence of formats 390 np.savetxt(c, a, fmt=['%02d', '%3.1f']) 391 c.seek(0) 392 assert_equal(c.readlines(), [b'01 2.0\n', b'03 4.0\n']) 393 394 # A single multiformat string 395 c = BytesIO() 396 np.savetxt(c, a, fmt='%02d : %3.1f') 397 c.seek(0) 398 lines = c.readlines() 399 assert_equal(lines, [b'01 : 2.0\n', b'03 : 4.0\n']) 400 401 # Specify delimiter, should be overridden 402 c = BytesIO() 403 np.savetxt(c, a, fmt='%02d : %3.1f', delimiter=',') 404 c.seek(0) 405 lines = c.readlines() 406 assert_equal(lines, [b'01 : 2.0\n', b'03 : 4.0\n']) 407 408 # Bad fmt, should raise a ValueError 409 c = BytesIO() 410 assert_raises(ValueError, np.savetxt, c, a, fmt=99) 411 412 def test_header_footer(self): 413 # Test the functionality of the header and footer keyword argument. 414 415 c = BytesIO() 416 a = np.array([(1, 2), (3, 4)], dtype=int) 417 test_header_footer = 'Test header / footer' 418 # Test the header keyword argument 419 np.savetxt(c, a, fmt='%1d', header=test_header_footer) 420 c.seek(0) 421 assert_equal(c.read(), 422 asbytes('# ' + test_header_footer + '\n1 2\n3 4\n')) 423 # Test the footer keyword argument 424 c = BytesIO() 425 np.savetxt(c, a, fmt='%1d', footer=test_header_footer) 426 c.seek(0) 427 assert_equal(c.read(), 428 asbytes('1 2\n3 4\n# ' + test_header_footer + '\n')) 429 # Test the commentstr keyword argument used on the header 430 c = BytesIO() 431 commentstr = '% ' 432 np.savetxt(c, a, fmt='%1d', 433 header=test_header_footer, comments=commentstr) 434 c.seek(0) 435 assert_equal(c.read(), 436 asbytes(commentstr + test_header_footer + '\n' + '1 2\n3 4\n')) 437 # Test the commentstr keyword argument used on the footer 438 c = BytesIO() 439 commentstr = '% ' 440 np.savetxt(c, a, fmt='%1d', 441 footer=test_header_footer, comments=commentstr) 442 c.seek(0) 443 assert_equal(c.read(), 444 asbytes('1 2\n3 4\n' + commentstr + test_header_footer + '\n')) 445 446 def test_file_roundtrip(self): 447 with temppath() as name: 448 a = np.array([(1, 2), (3, 4)]) 449 np.savetxt(name, a) 450 b = np.loadtxt(name) 451 assert_array_equal(a, b) 452 453 def test_complex_arrays(self): 454 ncols = 2 455 nrows = 2 456 a = np.zeros((ncols, nrows), dtype=np.complex128) 457 re = np.pi 458 im = np.e 459 a[:] = re + 1.0j * im 460 461 # One format only 462 c = BytesIO() 463 np.savetxt(c, a, fmt=' %+.3e') 464 c.seek(0) 465 lines = c.readlines() 466 assert_equal( 467 lines, 468 [b' ( +3.142e+00+ +2.718e+00j) ( +3.142e+00+ +2.718e+00j)\n', 469 b' ( +3.142e+00+ +2.718e+00j) ( +3.142e+00+ +2.718e+00j)\n']) 470 471 # One format for each real and imaginary part 472 c = BytesIO() 473 np.savetxt(c, a, fmt=' %+.3e' * 2 * ncols) 474 c.seek(0) 475 lines = c.readlines() 476 assert_equal( 477 lines, 478 [b' +3.142e+00 +2.718e+00 +3.142e+00 +2.718e+00\n', 479 b' +3.142e+00 +2.718e+00 +3.142e+00 +2.718e+00\n']) 480 481 # One format for each complex number 482 c = BytesIO() 483 np.savetxt(c, a, fmt=['(%.3e%+.3ej)'] * ncols) 484 c.seek(0) 485 lines = c.readlines() 486 assert_equal( 487 lines, 488 [b'(3.142e+00+2.718e+00j) (3.142e+00+2.718e+00j)\n', 489 b'(3.142e+00+2.718e+00j) (3.142e+00+2.718e+00j)\n']) 490 491 def test_complex_negative_exponent(self): 492 # Previous to 1.15, some formats generated x+-yj, gh 7895 493 ncols = 2 494 nrows = 2 495 a = np.zeros((ncols, nrows), dtype=np.complex128) 496 re = np.pi 497 im = np.e 498 a[:] = re - 1.0j * im 499 c = BytesIO() 500 np.savetxt(c, a, fmt='%.3e') 501 c.seek(0) 502 lines = c.readlines() 503 assert_equal( 504 lines, 505 [b' (3.142e+00-2.718e+00j) (3.142e+00-2.718e+00j)\n', 506 b' (3.142e+00-2.718e+00j) (3.142e+00-2.718e+00j)\n']) 507 508 509 def test_custom_writer(self): 510 511 class CustomWriter(list): 512 def write(self, text): 513 self.extend(text.split(b'\n')) 514 515 w = CustomWriter() 516 a = np.array([(1, 2), (3, 4)]) 517 np.savetxt(w, a) 518 b = np.loadtxt(w) 519 assert_array_equal(a, b) 520 521 def test_unicode(self): 522 utf8 = b'\xcf\x96'.decode('UTF-8') 523 a = np.array([utf8], dtype=np.unicode_) 524 with tempdir() as tmpdir: 525 # set encoding as on windows it may not be unicode even on py3 526 np.savetxt(os.path.join(tmpdir, 'test.csv'), a, fmt=['%s'], 527 encoding='UTF-8') 528 529 def test_unicode_roundtrip(self): 530 utf8 = b'\xcf\x96'.decode('UTF-8') 531 a = np.array([utf8], dtype=np.unicode_) 532 # our gz wrapper support encoding 533 suffixes = ['', '.gz'] 534 if HAS_BZ2: 535 suffixes.append('.bz2') 536 if HAS_LZMA: 537 suffixes.extend(['.xz', '.lzma']) 538 with tempdir() as tmpdir: 539 for suffix in suffixes: 540 np.savetxt(os.path.join(tmpdir, 'test.csv' + suffix), a, 541 fmt=['%s'], encoding='UTF-16-LE') 542 b = np.loadtxt(os.path.join(tmpdir, 'test.csv' + suffix), 543 encoding='UTF-16-LE', dtype=np.unicode_) 544 assert_array_equal(a, b) 545 546 def test_unicode_bytestream(self): 547 utf8 = b'\xcf\x96'.decode('UTF-8') 548 a = np.array([utf8], dtype=np.unicode_) 549 s = BytesIO() 550 np.savetxt(s, a, fmt=['%s'], encoding='UTF-8') 551 s.seek(0) 552 assert_equal(s.read().decode('UTF-8'), utf8 + '\n') 553 554 def test_unicode_stringstream(self): 555 utf8 = b'\xcf\x96'.decode('UTF-8') 556 a = np.array([utf8], dtype=np.unicode_) 557 s = StringIO() 558 np.savetxt(s, a, fmt=['%s'], encoding='UTF-8') 559 s.seek(0) 560 assert_equal(s.read(), utf8 + '\n') 561 562 @pytest.mark.parametrize("fmt", [u"%f", b"%f"]) 563 @pytest.mark.parametrize("iotype", [StringIO, BytesIO]) 564 def test_unicode_and_bytes_fmt(self, fmt, iotype): 565 # string type of fmt should not matter, see also gh-4053 566 a = np.array([1.]) 567 s = iotype() 568 np.savetxt(s, a, fmt=fmt) 569 s.seek(0) 570 if iotype is StringIO: 571 assert_equal(s.read(), u"%f\n" % 1.) 572 else: 573 assert_equal(s.read(), b"%f\n" % 1.) 574 575 @pytest.mark.skipif(sys.platform=='win32', reason="files>4GB may not work") 576 @pytest.mark.slow 577 @requires_memory(free_bytes=7e9) 578 def test_large_zip(self): 579 def check_large_zip(memoryerror_raised): 580 memoryerror_raised.value = False 581 try: 582 # The test takes at least 6GB of memory, writes a file larger 583 # than 4GB. This tests the ``allowZip64`` kwarg to ``zipfile`` 584 test_data = np.asarray([np.random.rand( 585 np.random.randint(50,100),4) 586 for i in range(800000)], dtype=object) 587 with tempdir() as tmpdir: 588 np.savez(os.path.join(tmpdir, 'test.npz'), 589 test_data=test_data) 590 except MemoryError: 591 memoryerror_raised.value = True 592 raise 593 # run in a subprocess to ensure memory is released on PyPy, see gh-15775 594 # Use an object in shared memory to re-raise the MemoryError exception 595 # in our process if needed, see gh-16889 596 memoryerror_raised = Value(c_bool) 597 p = Process(target=check_large_zip, args=(memoryerror_raised,)) 598 p.start() 599 p.join() 600 if memoryerror_raised.value: 601 raise MemoryError("Child process raised a MemoryError exception") 602 # -9 indicates a SIGKILL, probably an OOM. 603 if p.exitcode == -9: 604 pytest.xfail("subprocess got a SIGKILL, apparently free memory was not sufficient") 605 assert p.exitcode == 0 606 607class LoadTxtBase: 608 def check_compressed(self, fopen, suffixes): 609 # Test that we can load data from a compressed file 610 wanted = np.arange(6).reshape((2, 3)) 611 linesep = ('\n', '\r\n', '\r') 612 for sep in linesep: 613 data = '0 1 2' + sep + '3 4 5' 614 for suffix in suffixes: 615 with temppath(suffix=suffix) as name: 616 with fopen(name, mode='wt', encoding='UTF-32-LE') as f: 617 f.write(data) 618 res = self.loadfunc(name, encoding='UTF-32-LE') 619 assert_array_equal(res, wanted) 620 with fopen(name, "rt", encoding='UTF-32-LE') as f: 621 res = self.loadfunc(f) 622 assert_array_equal(res, wanted) 623 624 def test_compressed_gzip(self): 625 self.check_compressed(gzip.open, ('.gz',)) 626 627 @pytest.mark.skipif(not HAS_BZ2, reason="Needs bz2") 628 def test_compressed_bz2(self): 629 self.check_compressed(bz2.open, ('.bz2',)) 630 631 @pytest.mark.skipif(not HAS_LZMA, reason="Needs lzma") 632 def test_compressed_lzma(self): 633 self.check_compressed(lzma.open, ('.xz', '.lzma')) 634 635 def test_encoding(self): 636 with temppath() as path: 637 with open(path, "wb") as f: 638 f.write('0.\n1.\n2.'.encode("UTF-16")) 639 x = self.loadfunc(path, encoding="UTF-16") 640 assert_array_equal(x, [0., 1., 2.]) 641 642 def test_stringload(self): 643 # umlaute 644 nonascii = b'\xc3\xb6\xc3\xbc\xc3\xb6'.decode("UTF-8") 645 with temppath() as path: 646 with open(path, "wb") as f: 647 f.write(nonascii.encode("UTF-16")) 648 x = self.loadfunc(path, encoding="UTF-16", dtype=np.unicode_) 649 assert_array_equal(x, nonascii) 650 651 def test_binary_decode(self): 652 utf16 = b'\xff\xfeh\x04 \x00i\x04 \x00j\x04' 653 v = self.loadfunc(BytesIO(utf16), dtype=np.unicode_, encoding='UTF-16') 654 assert_array_equal(v, np.array(utf16.decode('UTF-16').split())) 655 656 def test_converters_decode(self): 657 # test converters that decode strings 658 c = TextIO() 659 c.write(b'\xcf\x96') 660 c.seek(0) 661 x = self.loadfunc(c, dtype=np.unicode_, 662 converters={0: lambda x: x.decode('UTF-8')}) 663 a = np.array([b'\xcf\x96'.decode('UTF-8')]) 664 assert_array_equal(x, a) 665 666 def test_converters_nodecode(self): 667 # test native string converters enabled by setting an encoding 668 utf8 = b'\xcf\x96'.decode('UTF-8') 669 with temppath() as path: 670 with io.open(path, 'wt', encoding='UTF-8') as f: 671 f.write(utf8) 672 x = self.loadfunc(path, dtype=np.unicode_, 673 converters={0: lambda x: x + 't'}, 674 encoding='UTF-8') 675 a = np.array([utf8 + 't']) 676 assert_array_equal(x, a) 677 678 679class TestLoadTxt(LoadTxtBase): 680 loadfunc = staticmethod(np.loadtxt) 681 682 def setup(self): 683 # lower chunksize for testing 684 self.orig_chunk = np.lib.npyio._loadtxt_chunksize 685 np.lib.npyio._loadtxt_chunksize = 1 686 def teardown(self): 687 np.lib.npyio._loadtxt_chunksize = self.orig_chunk 688 689 def test_record(self): 690 c = TextIO() 691 c.write('1 2\n3 4') 692 c.seek(0) 693 x = np.loadtxt(c, dtype=[('x', np.int32), ('y', np.int32)]) 694 a = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')]) 695 assert_array_equal(x, a) 696 697 d = TextIO() 698 d.write('M 64.0 75.0\nF 25.0 60.0') 699 d.seek(0) 700 mydescriptor = {'names': ('gender', 'age', 'weight'), 701 'formats': ('S1', 'i4', 'f4')} 702 b = np.array([('M', 64.0, 75.0), 703 ('F', 25.0, 60.0)], dtype=mydescriptor) 704 y = np.loadtxt(d, dtype=mydescriptor) 705 assert_array_equal(y, b) 706 707 def test_array(self): 708 c = TextIO() 709 c.write('1 2\n3 4') 710 711 c.seek(0) 712 x = np.loadtxt(c, dtype=int) 713 a = np.array([[1, 2], [3, 4]], int) 714 assert_array_equal(x, a) 715 716 c.seek(0) 717 x = np.loadtxt(c, dtype=float) 718 a = np.array([[1, 2], [3, 4]], float) 719 assert_array_equal(x, a) 720 721 def test_1D(self): 722 c = TextIO() 723 c.write('1\n2\n3\n4\n') 724 c.seek(0) 725 x = np.loadtxt(c, dtype=int) 726 a = np.array([1, 2, 3, 4], int) 727 assert_array_equal(x, a) 728 729 c = TextIO() 730 c.write('1,2,3,4\n') 731 c.seek(0) 732 x = np.loadtxt(c, dtype=int, delimiter=',') 733 a = np.array([1, 2, 3, 4], int) 734 assert_array_equal(x, a) 735 736 def test_missing(self): 737 c = TextIO() 738 c.write('1,2,3,,5\n') 739 c.seek(0) 740 x = np.loadtxt(c, dtype=int, delimiter=',', 741 converters={3: lambda s: int(s or - 999)}) 742 a = np.array([1, 2, 3, -999, 5], int) 743 assert_array_equal(x, a) 744 745 def test_converters_with_usecols(self): 746 c = TextIO() 747 c.write('1,2,3,,5\n6,7,8,9,10\n') 748 c.seek(0) 749 x = np.loadtxt(c, dtype=int, delimiter=',', 750 converters={3: lambda s: int(s or - 999)}, 751 usecols=(1, 3,)) 752 a = np.array([[2, -999], [7, 9]], int) 753 assert_array_equal(x, a) 754 755 def test_comments_unicode(self): 756 c = TextIO() 757 c.write('# comment\n1,2,3,5\n') 758 c.seek(0) 759 x = np.loadtxt(c, dtype=int, delimiter=',', 760 comments=u'#') 761 a = np.array([1, 2, 3, 5], int) 762 assert_array_equal(x, a) 763 764 def test_comments_byte(self): 765 c = TextIO() 766 c.write('# comment\n1,2,3,5\n') 767 c.seek(0) 768 x = np.loadtxt(c, dtype=int, delimiter=',', 769 comments=b'#') 770 a = np.array([1, 2, 3, 5], int) 771 assert_array_equal(x, a) 772 773 def test_comments_multiple(self): 774 c = TextIO() 775 c.write('# comment\n1,2,3\n@ comment2\n4,5,6 // comment3') 776 c.seek(0) 777 x = np.loadtxt(c, dtype=int, delimiter=',', 778 comments=['#', '@', '//']) 779 a = np.array([[1, 2, 3], [4, 5, 6]], int) 780 assert_array_equal(x, a) 781 782 def test_comments_multi_chars(self): 783 c = TextIO() 784 c.write('/* comment\n1,2,3,5\n') 785 c.seek(0) 786 x = np.loadtxt(c, dtype=int, delimiter=',', 787 comments='/*') 788 a = np.array([1, 2, 3, 5], int) 789 assert_array_equal(x, a) 790 791 # Check that '/*' is not transformed to ['/', '*'] 792 c = TextIO() 793 c.write('*/ comment\n1,2,3,5\n') 794 c.seek(0) 795 assert_raises(ValueError, np.loadtxt, c, dtype=int, delimiter=',', 796 comments='/*') 797 798 def test_skiprows(self): 799 c = TextIO() 800 c.write('comment\n1,2,3,5\n') 801 c.seek(0) 802 x = np.loadtxt(c, dtype=int, delimiter=',', 803 skiprows=1) 804 a = np.array([1, 2, 3, 5], int) 805 assert_array_equal(x, a) 806 807 c = TextIO() 808 c.write('# comment\n1,2,3,5\n') 809 c.seek(0) 810 x = np.loadtxt(c, dtype=int, delimiter=',', 811 skiprows=1) 812 a = np.array([1, 2, 3, 5], int) 813 assert_array_equal(x, a) 814 815 def test_usecols(self): 816 a = np.array([[1, 2], [3, 4]], float) 817 c = BytesIO() 818 np.savetxt(c, a) 819 c.seek(0) 820 x = np.loadtxt(c, dtype=float, usecols=(1,)) 821 assert_array_equal(x, a[:, 1]) 822 823 a = np.array([[1, 2, 3], [3, 4, 5]], float) 824 c = BytesIO() 825 np.savetxt(c, a) 826 c.seek(0) 827 x = np.loadtxt(c, dtype=float, usecols=(1, 2)) 828 assert_array_equal(x, a[:, 1:]) 829 830 # Testing with arrays instead of tuples. 831 c.seek(0) 832 x = np.loadtxt(c, dtype=float, usecols=np.array([1, 2])) 833 assert_array_equal(x, a[:, 1:]) 834 835 # Testing with an integer instead of a sequence 836 for int_type in [int, np.int8, np.int16, 837 np.int32, np.int64, np.uint8, np.uint16, 838 np.uint32, np.uint64]: 839 to_read = int_type(1) 840 c.seek(0) 841 x = np.loadtxt(c, dtype=float, usecols=to_read) 842 assert_array_equal(x, a[:, 1]) 843 844 # Testing with some crazy custom integer type 845 class CrazyInt: 846 def __index__(self): 847 return 1 848 849 crazy_int = CrazyInt() 850 c.seek(0) 851 x = np.loadtxt(c, dtype=float, usecols=crazy_int) 852 assert_array_equal(x, a[:, 1]) 853 854 c.seek(0) 855 x = np.loadtxt(c, dtype=float, usecols=(crazy_int,)) 856 assert_array_equal(x, a[:, 1]) 857 858 # Checking with dtypes defined converters. 859 data = '''JOE 70.1 25.3 860 BOB 60.5 27.9 861 ''' 862 c = TextIO(data) 863 names = ['stid', 'temp'] 864 dtypes = ['S4', 'f8'] 865 arr = np.loadtxt(c, usecols=(0, 2), dtype=list(zip(names, dtypes))) 866 assert_equal(arr['stid'], [b"JOE", b"BOB"]) 867 assert_equal(arr['temp'], [25.3, 27.9]) 868 869 # Testing non-ints in usecols 870 c.seek(0) 871 bogus_idx = 1.5 872 assert_raises_regex( 873 TypeError, 874 '^usecols must be.*%s' % type(bogus_idx), 875 np.loadtxt, c, usecols=bogus_idx 876 ) 877 878 assert_raises_regex( 879 TypeError, 880 '^usecols must be.*%s' % type(bogus_idx), 881 np.loadtxt, c, usecols=[0, bogus_idx, 0] 882 ) 883 884 def test_fancy_dtype(self): 885 c = TextIO() 886 c.write('1,2,3.0\n4,5,6.0\n') 887 c.seek(0) 888 dt = np.dtype([('x', int), ('y', [('t', int), ('s', float)])]) 889 x = np.loadtxt(c, dtype=dt, delimiter=',') 890 a = np.array([(1, (2, 3.0)), (4, (5, 6.0))], dt) 891 assert_array_equal(x, a) 892 893 def test_shaped_dtype(self): 894 c = TextIO("aaaa 1.0 8.0 1 2 3 4 5 6") 895 dt = np.dtype([('name', 'S4'), ('x', float), ('y', float), 896 ('block', int, (2, 3))]) 897 x = np.loadtxt(c, dtype=dt) 898 a = np.array([('aaaa', 1.0, 8.0, [[1, 2, 3], [4, 5, 6]])], 899 dtype=dt) 900 assert_array_equal(x, a) 901 902 def test_3d_shaped_dtype(self): 903 c = TextIO("aaaa 1.0 8.0 1 2 3 4 5 6 7 8 9 10 11 12") 904 dt = np.dtype([('name', 'S4'), ('x', float), ('y', float), 905 ('block', int, (2, 2, 3))]) 906 x = np.loadtxt(c, dtype=dt) 907 a = np.array([('aaaa', 1.0, 8.0, 908 [[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])], 909 dtype=dt) 910 assert_array_equal(x, a) 911 912 def test_str_dtype(self): 913 # see gh-8033 914 c = ["str1", "str2"] 915 916 for dt in (str, np.bytes_): 917 a = np.array(["str1", "str2"], dtype=dt) 918 x = np.loadtxt(c, dtype=dt) 919 assert_array_equal(x, a) 920 921 def test_empty_file(self): 922 with suppress_warnings() as sup: 923 sup.filter(message="loadtxt: Empty input file:") 924 c = TextIO() 925 x = np.loadtxt(c) 926 assert_equal(x.shape, (0,)) 927 x = np.loadtxt(c, dtype=np.int64) 928 assert_equal(x.shape, (0,)) 929 assert_(x.dtype == np.int64) 930 931 def test_unused_converter(self): 932 c = TextIO() 933 c.writelines(['1 21\n', '3 42\n']) 934 c.seek(0) 935 data = np.loadtxt(c, usecols=(1,), 936 converters={0: lambda s: int(s, 16)}) 937 assert_array_equal(data, [21, 42]) 938 939 c.seek(0) 940 data = np.loadtxt(c, usecols=(1,), 941 converters={1: lambda s: int(s, 16)}) 942 assert_array_equal(data, [33, 66]) 943 944 def test_dtype_with_object(self): 945 # Test using an explicit dtype with an object 946 data = """ 1; 2001-01-01 947 2; 2002-01-31 """ 948 ndtype = [('idx', int), ('code', object)] 949 func = lambda s: strptime(s.strip(), "%Y-%m-%d") 950 converters = {1: func} 951 test = np.loadtxt(TextIO(data), delimiter=";", dtype=ndtype, 952 converters=converters) 953 control = np.array( 954 [(1, datetime(2001, 1, 1)), (2, datetime(2002, 1, 31))], 955 dtype=ndtype) 956 assert_equal(test, control) 957 958 def test_uint64_type(self): 959 tgt = (9223372043271415339, 9223372043271415853) 960 c = TextIO() 961 c.write("%s %s" % tgt) 962 c.seek(0) 963 res = np.loadtxt(c, dtype=np.uint64) 964 assert_equal(res, tgt) 965 966 def test_int64_type(self): 967 tgt = (-9223372036854775807, 9223372036854775807) 968 c = TextIO() 969 c.write("%s %s" % tgt) 970 c.seek(0) 971 res = np.loadtxt(c, dtype=np.int64) 972 assert_equal(res, tgt) 973 974 def test_from_float_hex(self): 975 # IEEE doubles and floats only, otherwise the float32 976 # conversion may fail. 977 tgt = np.logspace(-10, 10, 5).astype(np.float32) 978 tgt = np.hstack((tgt, -tgt)).astype(float) 979 inp = '\n'.join(map(float.hex, tgt)) 980 c = TextIO() 981 c.write(inp) 982 for dt in [float, np.float32]: 983 c.seek(0) 984 res = np.loadtxt(c, dtype=dt) 985 assert_equal(res, tgt, err_msg="%s" % dt) 986 987 def test_from_complex(self): 988 tgt = (complex(1, 1), complex(1, -1)) 989 c = TextIO() 990 c.write("%s %s" % tgt) 991 c.seek(0) 992 res = np.loadtxt(c, dtype=complex) 993 assert_equal(res, tgt) 994 995 def test_complex_misformatted(self): 996 # test for backward compatibility 997 # some complex formats used to generate x+-yj 998 a = np.zeros((2, 2), dtype=np.complex128) 999 re = np.pi 1000 im = np.e 1001 a[:] = re - 1.0j * im 1002 c = BytesIO() 1003 np.savetxt(c, a, fmt='%.16e') 1004 c.seek(0) 1005 txt = c.read() 1006 c.seek(0) 1007 # misformat the sign on the imaginary part, gh 7895 1008 txt_bad = txt.replace(b'e+00-', b'e00+-') 1009 assert_(txt_bad != txt) 1010 c.write(txt_bad) 1011 c.seek(0) 1012 res = np.loadtxt(c, dtype=complex) 1013 assert_equal(res, a) 1014 1015 def test_universal_newline(self): 1016 with temppath() as name: 1017 with open(name, 'w') as f: 1018 f.write('1 21\r3 42\r') 1019 data = np.loadtxt(name) 1020 assert_array_equal(data, [[1, 21], [3, 42]]) 1021 1022 def test_empty_field_after_tab(self): 1023 c = TextIO() 1024 c.write('1 \t2 \t3\tstart \n4\t5\t6\t \n7\t8\t9.5\t') 1025 c.seek(0) 1026 dt = {'names': ('x', 'y', 'z', 'comment'), 1027 'formats': ('<i4', '<i4', '<f4', '|S8')} 1028 x = np.loadtxt(c, dtype=dt, delimiter='\t') 1029 a = np.array([b'start ', b' ', b'']) 1030 assert_array_equal(x['comment'], a) 1031 1032 def test_unpack_structured(self): 1033 txt = TextIO("M 21 72\nF 35 58") 1034 dt = {'names': ('a', 'b', 'c'), 'formats': ('|S1', '<i4', '<f4')} 1035 a, b, c = np.loadtxt(txt, dtype=dt, unpack=True) 1036 assert_(a.dtype.str == '|S1') 1037 assert_(b.dtype.str == '<i4') 1038 assert_(c.dtype.str == '<f4') 1039 assert_array_equal(a, np.array([b'M', b'F'])) 1040 assert_array_equal(b, np.array([21, 35])) 1041 assert_array_equal(c, np.array([72., 58.])) 1042 1043 def test_ndmin_keyword(self): 1044 c = TextIO() 1045 c.write('1,2,3\n4,5,6') 1046 c.seek(0) 1047 assert_raises(ValueError, np.loadtxt, c, ndmin=3) 1048 c.seek(0) 1049 assert_raises(ValueError, np.loadtxt, c, ndmin=1.5) 1050 c.seek(0) 1051 x = np.loadtxt(c, dtype=int, delimiter=',', ndmin=1) 1052 a = np.array([[1, 2, 3], [4, 5, 6]]) 1053 assert_array_equal(x, a) 1054 1055 d = TextIO() 1056 d.write('0,1,2') 1057 d.seek(0) 1058 x = np.loadtxt(d, dtype=int, delimiter=',', ndmin=2) 1059 assert_(x.shape == (1, 3)) 1060 d.seek(0) 1061 x = np.loadtxt(d, dtype=int, delimiter=',', ndmin=1) 1062 assert_(x.shape == (3,)) 1063 d.seek(0) 1064 x = np.loadtxt(d, dtype=int, delimiter=',', ndmin=0) 1065 assert_(x.shape == (3,)) 1066 1067 e = TextIO() 1068 e.write('0\n1\n2') 1069 e.seek(0) 1070 x = np.loadtxt(e, dtype=int, delimiter=',', ndmin=2) 1071 assert_(x.shape == (3, 1)) 1072 e.seek(0) 1073 x = np.loadtxt(e, dtype=int, delimiter=',', ndmin=1) 1074 assert_(x.shape == (3,)) 1075 e.seek(0) 1076 x = np.loadtxt(e, dtype=int, delimiter=',', ndmin=0) 1077 assert_(x.shape == (3,)) 1078 1079 # Test ndmin kw with empty file. 1080 with suppress_warnings() as sup: 1081 sup.filter(message="loadtxt: Empty input file:") 1082 f = TextIO() 1083 assert_(np.loadtxt(f, ndmin=2).shape == (0, 1,)) 1084 assert_(np.loadtxt(f, ndmin=1).shape == (0,)) 1085 1086 def test_generator_source(self): 1087 def count(): 1088 for i in range(10): 1089 yield "%d" % i 1090 1091 res = np.loadtxt(count()) 1092 assert_array_equal(res, np.arange(10)) 1093 1094 def test_bad_line(self): 1095 c = TextIO() 1096 c.write('1 2 3\n4 5 6\n2 3') 1097 c.seek(0) 1098 1099 # Check for exception and that exception contains line number 1100 assert_raises_regex(ValueError, "3", np.loadtxt, c) 1101 1102 def test_none_as_string(self): 1103 # gh-5155, None should work as string when format demands it 1104 c = TextIO() 1105 c.write('100,foo,200\n300,None,400') 1106 c.seek(0) 1107 dt = np.dtype([('x', int), ('a', 'S10'), ('y', int)]) 1108 np.loadtxt(c, delimiter=',', dtype=dt, comments=None) # Should succeed 1109 1110 @pytest.mark.skipif(locale.getpreferredencoding() == 'ANSI_X3.4-1968', 1111 reason="Wrong preferred encoding") 1112 def test_binary_load(self): 1113 butf8 = b"5,6,7,\xc3\x95scarscar\n\r15,2,3,hello\n\r"\ 1114 b"20,2,3,\xc3\x95scar\n\r" 1115 sutf8 = butf8.decode("UTF-8").replace("\r", "").splitlines() 1116 with temppath() as path: 1117 with open(path, "wb") as f: 1118 f.write(butf8) 1119 with open(path, "rb") as f: 1120 x = np.loadtxt(f, encoding="UTF-8", dtype=np.unicode_) 1121 assert_array_equal(x, sutf8) 1122 # test broken latin1 conversion people now rely on 1123 with open(path, "rb") as f: 1124 x = np.loadtxt(f, encoding="UTF-8", dtype="S") 1125 x = [b'5,6,7,\xc3\x95scarscar', b'15,2,3,hello', b'20,2,3,\xc3\x95scar'] 1126 assert_array_equal(x, np.array(x, dtype="S")) 1127 1128 def test_max_rows(self): 1129 c = TextIO() 1130 c.write('1,2,3,5\n4,5,7,8\n2,1,4,5') 1131 c.seek(0) 1132 x = np.loadtxt(c, dtype=int, delimiter=',', 1133 max_rows=1) 1134 a = np.array([1, 2, 3, 5], int) 1135 assert_array_equal(x, a) 1136 1137 def test_max_rows_with_skiprows(self): 1138 c = TextIO() 1139 c.write('comments\n1,2,3,5\n4,5,7,8\n2,1,4,5') 1140 c.seek(0) 1141 x = np.loadtxt(c, dtype=int, delimiter=',', 1142 skiprows=1, max_rows=1) 1143 a = np.array([1, 2, 3, 5], int) 1144 assert_array_equal(x, a) 1145 1146 c = TextIO() 1147 c.write('comment\n1,2,3,5\n4,5,7,8\n2,1,4,5') 1148 c.seek(0) 1149 x = np.loadtxt(c, dtype=int, delimiter=',', 1150 skiprows=1, max_rows=2) 1151 a = np.array([[1, 2, 3, 5], [4, 5, 7, 8]], int) 1152 assert_array_equal(x, a) 1153 1154 def test_max_rows_with_read_continuation(self): 1155 c = TextIO() 1156 c.write('1,2,3,5\n4,5,7,8\n2,1,4,5') 1157 c.seek(0) 1158 x = np.loadtxt(c, dtype=int, delimiter=',', 1159 max_rows=2) 1160 a = np.array([[1, 2, 3, 5], [4, 5, 7, 8]], int) 1161 assert_array_equal(x, a) 1162 # test continuation 1163 x = np.loadtxt(c, dtype=int, delimiter=',') 1164 a = np.array([2,1,4,5], int) 1165 assert_array_equal(x, a) 1166 1167 def test_max_rows_larger(self): 1168 #test max_rows > num rows 1169 c = TextIO() 1170 c.write('comment\n1,2,3,5\n4,5,7,8\n2,1,4,5') 1171 c.seek(0) 1172 x = np.loadtxt(c, dtype=int, delimiter=',', 1173 skiprows=1, max_rows=6) 1174 a = np.array([[1, 2, 3, 5], [4, 5, 7, 8], [2, 1, 4, 5]], int) 1175 assert_array_equal(x, a) 1176 1177class Testfromregex: 1178 def test_record(self): 1179 c = TextIO() 1180 c.write('1.312 foo\n1.534 bar\n4.444 qux') 1181 c.seek(0) 1182 1183 dt = [('num', np.float64), ('val', 'S3')] 1184 x = np.fromregex(c, r"([0-9.]+)\s+(...)", dt) 1185 a = np.array([(1.312, 'foo'), (1.534, 'bar'), (4.444, 'qux')], 1186 dtype=dt) 1187 assert_array_equal(x, a) 1188 1189 def test_record_2(self): 1190 c = TextIO() 1191 c.write('1312 foo\n1534 bar\n4444 qux') 1192 c.seek(0) 1193 1194 dt = [('num', np.int32), ('val', 'S3')] 1195 x = np.fromregex(c, r"(\d+)\s+(...)", dt) 1196 a = np.array([(1312, 'foo'), (1534, 'bar'), (4444, 'qux')], 1197 dtype=dt) 1198 assert_array_equal(x, a) 1199 1200 def test_record_3(self): 1201 c = TextIO() 1202 c.write('1312 foo\n1534 bar\n4444 qux') 1203 c.seek(0) 1204 1205 dt = [('num', np.float64)] 1206 x = np.fromregex(c, r"(\d+)\s+...", dt) 1207 a = np.array([(1312,), (1534,), (4444,)], dtype=dt) 1208 assert_array_equal(x, a) 1209 1210 def test_record_unicode(self): 1211 utf8 = b'\xcf\x96' 1212 with temppath() as path: 1213 with open(path, 'wb') as f: 1214 f.write(b'1.312 foo' + utf8 + b' \n1.534 bar\n4.444 qux') 1215 1216 dt = [('num', np.float64), ('val', 'U4')] 1217 x = np.fromregex(path, r"(?u)([0-9.]+)\s+(\w+)", dt, encoding='UTF-8') 1218 a = np.array([(1.312, 'foo' + utf8.decode('UTF-8')), (1.534, 'bar'), 1219 (4.444, 'qux')], dtype=dt) 1220 assert_array_equal(x, a) 1221 1222 regexp = re.compile(r"([0-9.]+)\s+(\w+)", re.UNICODE) 1223 x = np.fromregex(path, regexp, dt, encoding='UTF-8') 1224 assert_array_equal(x, a) 1225 1226 def test_compiled_bytes(self): 1227 regexp = re.compile(b'(\\d)') 1228 c = BytesIO(b'123') 1229 dt = [('num', np.float64)] 1230 a = np.array([1, 2, 3], dtype=dt) 1231 x = np.fromregex(c, regexp, dt) 1232 assert_array_equal(x, a) 1233 1234#####-------------------------------------------------------------------------- 1235 1236 1237class TestFromTxt(LoadTxtBase): 1238 loadfunc = staticmethod(np.genfromtxt) 1239 1240 def test_record(self): 1241 # Test w/ explicit dtype 1242 data = TextIO('1 2\n3 4') 1243 test = np.genfromtxt(data, dtype=[('x', np.int32), ('y', np.int32)]) 1244 control = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')]) 1245 assert_equal(test, control) 1246 # 1247 data = TextIO('M 64.0 75.0\nF 25.0 60.0') 1248 descriptor = {'names': ('gender', 'age', 'weight'), 1249 'formats': ('S1', 'i4', 'f4')} 1250 control = np.array([('M', 64.0, 75.0), ('F', 25.0, 60.0)], 1251 dtype=descriptor) 1252 test = np.genfromtxt(data, dtype=descriptor) 1253 assert_equal(test, control) 1254 1255 def test_array(self): 1256 # Test outputting a standard ndarray 1257 data = TextIO('1 2\n3 4') 1258 control = np.array([[1, 2], [3, 4]], dtype=int) 1259 test = np.genfromtxt(data, dtype=int) 1260 assert_array_equal(test, control) 1261 # 1262 data.seek(0) 1263 control = np.array([[1, 2], [3, 4]], dtype=float) 1264 test = np.loadtxt(data, dtype=float) 1265 assert_array_equal(test, control) 1266 1267 def test_1D(self): 1268 # Test squeezing to 1D 1269 control = np.array([1, 2, 3, 4], int) 1270 # 1271 data = TextIO('1\n2\n3\n4\n') 1272 test = np.genfromtxt(data, dtype=int) 1273 assert_array_equal(test, control) 1274 # 1275 data = TextIO('1,2,3,4\n') 1276 test = np.genfromtxt(data, dtype=int, delimiter=',') 1277 assert_array_equal(test, control) 1278 1279 def test_comments(self): 1280 # Test the stripping of comments 1281 control = np.array([1, 2, 3, 5], int) 1282 # Comment on its own line 1283 data = TextIO('# comment\n1,2,3,5\n') 1284 test = np.genfromtxt(data, dtype=int, delimiter=',', comments='#') 1285 assert_equal(test, control) 1286 # Comment at the end of a line 1287 data = TextIO('1,2,3,5# comment\n') 1288 test = np.genfromtxt(data, dtype=int, delimiter=',', comments='#') 1289 assert_equal(test, control) 1290 1291 def test_skiprows(self): 1292 # Test row skipping 1293 control = np.array([1, 2, 3, 5], int) 1294 kwargs = dict(dtype=int, delimiter=',') 1295 # 1296 data = TextIO('comment\n1,2,3,5\n') 1297 test = np.genfromtxt(data, skip_header=1, **kwargs) 1298 assert_equal(test, control) 1299 # 1300 data = TextIO('# comment\n1,2,3,5\n') 1301 test = np.loadtxt(data, skiprows=1, **kwargs) 1302 assert_equal(test, control) 1303 1304 def test_skip_footer(self): 1305 data = ["# %i" % i for i in range(1, 6)] 1306 data.append("A, B, C") 1307 data.extend(["%i,%3.1f,%03s" % (i, i, i) for i in range(51)]) 1308 data[-1] = "99,99" 1309 kwargs = dict(delimiter=",", names=True, skip_header=5, skip_footer=10) 1310 test = np.genfromtxt(TextIO("\n".join(data)), **kwargs) 1311 ctrl = np.array([("%f" % i, "%f" % i, "%f" % i) for i in range(41)], 1312 dtype=[(_, float) for _ in "ABC"]) 1313 assert_equal(test, ctrl) 1314 1315 def test_skip_footer_with_invalid(self): 1316 with suppress_warnings() as sup: 1317 sup.filter(ConversionWarning) 1318 basestr = '1 1\n2 2\n3 3\n4 4\n5 \n6 \n7 \n' 1319 # Footer too small to get rid of all invalid values 1320 assert_raises(ValueError, np.genfromtxt, 1321 TextIO(basestr), skip_footer=1) 1322 # except ValueError: 1323 # pass 1324 a = np.genfromtxt( 1325 TextIO(basestr), skip_footer=1, invalid_raise=False) 1326 assert_equal(a, np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]])) 1327 # 1328 a = np.genfromtxt(TextIO(basestr), skip_footer=3) 1329 assert_equal(a, np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]])) 1330 # 1331 basestr = '1 1\n2 \n3 3\n4 4\n5 \n6 6\n7 7\n' 1332 a = np.genfromtxt( 1333 TextIO(basestr), skip_footer=1, invalid_raise=False) 1334 assert_equal(a, np.array([[1., 1.], [3., 3.], [4., 4.], [6., 6.]])) 1335 a = np.genfromtxt( 1336 TextIO(basestr), skip_footer=3, invalid_raise=False) 1337 assert_equal(a, np.array([[1., 1.], [3., 3.], [4., 4.]])) 1338 1339 def test_header(self): 1340 # Test retrieving a header 1341 data = TextIO('gender age weight\nM 64.0 75.0\nF 25.0 60.0') 1342 with warnings.catch_warnings(record=True) as w: 1343 warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) 1344 test = np.genfromtxt(data, dtype=None, names=True) 1345 assert_(w[0].category is np.VisibleDeprecationWarning) 1346 control = {'gender': np.array([b'M', b'F']), 1347 'age': np.array([64.0, 25.0]), 1348 'weight': np.array([75.0, 60.0])} 1349 assert_equal(test['gender'], control['gender']) 1350 assert_equal(test['age'], control['age']) 1351 assert_equal(test['weight'], control['weight']) 1352 1353 def test_auto_dtype(self): 1354 # Test the automatic definition of the output dtype 1355 data = TextIO('A 64 75.0 3+4j True\nBCD 25 60.0 5+6j False') 1356 with warnings.catch_warnings(record=True) as w: 1357 warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) 1358 test = np.genfromtxt(data, dtype=None) 1359 assert_(w[0].category is np.VisibleDeprecationWarning) 1360 control = [np.array([b'A', b'BCD']), 1361 np.array([64, 25]), 1362 np.array([75.0, 60.0]), 1363 np.array([3 + 4j, 5 + 6j]), 1364 np.array([True, False]), ] 1365 assert_equal(test.dtype.names, ['f0', 'f1', 'f2', 'f3', 'f4']) 1366 for (i, ctrl) in enumerate(control): 1367 assert_equal(test['f%i' % i], ctrl) 1368 1369 def test_auto_dtype_uniform(self): 1370 # Tests whether the output dtype can be uniformized 1371 data = TextIO('1 2 3 4\n5 6 7 8\n') 1372 test = np.genfromtxt(data, dtype=None) 1373 control = np.array([[1, 2, 3, 4], [5, 6, 7, 8]]) 1374 assert_equal(test, control) 1375 1376 def test_fancy_dtype(self): 1377 # Check that a nested dtype isn't MIA 1378 data = TextIO('1,2,3.0\n4,5,6.0\n') 1379 fancydtype = np.dtype([('x', int), ('y', [('t', int), ('s', float)])]) 1380 test = np.genfromtxt(data, dtype=fancydtype, delimiter=',') 1381 control = np.array([(1, (2, 3.0)), (4, (5, 6.0))], dtype=fancydtype) 1382 assert_equal(test, control) 1383 1384 def test_names_overwrite(self): 1385 # Test overwriting the names of the dtype 1386 descriptor = {'names': ('g', 'a', 'w'), 1387 'formats': ('S1', 'i4', 'f4')} 1388 data = TextIO(b'M 64.0 75.0\nF 25.0 60.0') 1389 names = ('gender', 'age', 'weight') 1390 test = np.genfromtxt(data, dtype=descriptor, names=names) 1391 descriptor['names'] = names 1392 control = np.array([('M', 64.0, 75.0), 1393 ('F', 25.0, 60.0)], dtype=descriptor) 1394 assert_equal(test, control) 1395 1396 def test_commented_header(self): 1397 # Check that names can be retrieved even if the line is commented out. 1398 data = TextIO(""" 1399#gender age weight 1400M 21 72.100000 1401F 35 58.330000 1402M 33 21.99 1403 """) 1404 # The # is part of the first name and should be deleted automatically. 1405 with warnings.catch_warnings(record=True) as w: 1406 warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) 1407 test = np.genfromtxt(data, names=True, dtype=None) 1408 assert_(w[0].category is np.VisibleDeprecationWarning) 1409 ctrl = np.array([('M', 21, 72.1), ('F', 35, 58.33), ('M', 33, 21.99)], 1410 dtype=[('gender', '|S1'), ('age', int), ('weight', float)]) 1411 assert_equal(test, ctrl) 1412 # Ditto, but we should get rid of the first element 1413 data = TextIO(b""" 1414# gender age weight 1415M 21 72.100000 1416F 35 58.330000 1417M 33 21.99 1418 """) 1419 with warnings.catch_warnings(record=True) as w: 1420 warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) 1421 test = np.genfromtxt(data, names=True, dtype=None) 1422 assert_(w[0].category is np.VisibleDeprecationWarning) 1423 assert_equal(test, ctrl) 1424 1425 def test_names_and_comments_none(self): 1426 # Tests case when names is true but comments is None (gh-10780) 1427 data = TextIO('col1 col2\n 1 2\n 3 4') 1428 test = np.genfromtxt(data, dtype=(int, int), comments=None, names=True) 1429 control = np.array([(1, 2), (3, 4)], dtype=[('col1', int), ('col2', int)]) 1430 assert_equal(test, control) 1431 1432 def test_file_is_closed_on_error(self): 1433 # gh-13200 1434 with tempdir() as tmpdir: 1435 fpath = os.path.join(tmpdir, "test.csv") 1436 with open(fpath, "wb") as f: 1437 f.write(u'\N{GREEK PI SYMBOL}'.encode('utf8')) 1438 1439 # ResourceWarnings are emitted from a destructor, so won't be 1440 # detected by regular propagation to errors. 1441 with assert_no_warnings(): 1442 with pytest.raises(UnicodeDecodeError): 1443 np.genfromtxt(fpath, encoding="ascii") 1444 1445 def test_autonames_and_usecols(self): 1446 # Tests names and usecols 1447 data = TextIO('A B C D\n aaaa 121 45 9.1') 1448 with warnings.catch_warnings(record=True) as w: 1449 warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) 1450 test = np.genfromtxt(data, usecols=('A', 'C', 'D'), 1451 names=True, dtype=None) 1452 assert_(w[0].category is np.VisibleDeprecationWarning) 1453 control = np.array(('aaaa', 45, 9.1), 1454 dtype=[('A', '|S4'), ('C', int), ('D', float)]) 1455 assert_equal(test, control) 1456 1457 def test_converters_with_usecols(self): 1458 # Test the combination user-defined converters and usecol 1459 data = TextIO('1,2,3,,5\n6,7,8,9,10\n') 1460 test = np.genfromtxt(data, dtype=int, delimiter=',', 1461 converters={3: lambda s: int(s or - 999)}, 1462 usecols=(1, 3,)) 1463 control = np.array([[2, -999], [7, 9]], int) 1464 assert_equal(test, control) 1465 1466 def test_converters_with_usecols_and_names(self): 1467 # Tests names and usecols 1468 data = TextIO('A B C D\n aaaa 121 45 9.1') 1469 with warnings.catch_warnings(record=True) as w: 1470 warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) 1471 test = np.genfromtxt(data, usecols=('A', 'C', 'D'), names=True, 1472 dtype=None, 1473 converters={'C': lambda s: 2 * int(s)}) 1474 assert_(w[0].category is np.VisibleDeprecationWarning) 1475 control = np.array(('aaaa', 90, 9.1), 1476 dtype=[('A', '|S4'), ('C', int), ('D', float)]) 1477 assert_equal(test, control) 1478 1479 def test_converters_cornercases(self): 1480 # Test the conversion to datetime. 1481 converter = { 1482 'date': lambda s: strptime(s, '%Y-%m-%d %H:%M:%SZ')} 1483 data = TextIO('2009-02-03 12:00:00Z, 72214.0') 1484 test = np.genfromtxt(data, delimiter=',', dtype=None, 1485 names=['date', 'stid'], converters=converter) 1486 control = np.array((datetime(2009, 2, 3), 72214.), 1487 dtype=[('date', np.object_), ('stid', float)]) 1488 assert_equal(test, control) 1489 1490 def test_converters_cornercases2(self): 1491 # Test the conversion to datetime64. 1492 converter = { 1493 'date': lambda s: np.datetime64(strptime(s, '%Y-%m-%d %H:%M:%SZ'))} 1494 data = TextIO('2009-02-03 12:00:00Z, 72214.0') 1495 test = np.genfromtxt(data, delimiter=',', dtype=None, 1496 names=['date', 'stid'], converters=converter) 1497 control = np.array((datetime(2009, 2, 3), 72214.), 1498 dtype=[('date', 'datetime64[us]'), ('stid', float)]) 1499 assert_equal(test, control) 1500 1501 def test_unused_converter(self): 1502 # Test whether unused converters are forgotten 1503 data = TextIO("1 21\n 3 42\n") 1504 test = np.genfromtxt(data, usecols=(1,), 1505 converters={0: lambda s: int(s, 16)}) 1506 assert_equal(test, [21, 42]) 1507 # 1508 data.seek(0) 1509 test = np.genfromtxt(data, usecols=(1,), 1510 converters={1: lambda s: int(s, 16)}) 1511 assert_equal(test, [33, 66]) 1512 1513 def test_invalid_converter(self): 1514 strip_rand = lambda x: float((b'r' in x.lower() and x.split()[-1]) or 1515 (b'r' not in x.lower() and x.strip() or 0.0)) 1516 strip_per = lambda x: float((b'%' in x.lower() and x.split()[0]) or 1517 (b'%' not in x.lower() and x.strip() or 0.0)) 1518 s = TextIO("D01N01,10/1/2003 ,1 %,R 75,400,600\r\n" 1519 "L24U05,12/5/2003, 2 %,1,300, 150.5\r\n" 1520 "D02N03,10/10/2004,R 1,,7,145.55") 1521 kwargs = dict( 1522 converters={2: strip_per, 3: strip_rand}, delimiter=",", 1523 dtype=None) 1524 assert_raises(ConverterError, np.genfromtxt, s, **kwargs) 1525 1526 def test_tricky_converter_bug1666(self): 1527 # Test some corner cases 1528 s = TextIO('q1,2\nq3,4') 1529 cnv = lambda s: float(s[1:]) 1530 test = np.genfromtxt(s, delimiter=',', converters={0: cnv}) 1531 control = np.array([[1., 2.], [3., 4.]]) 1532 assert_equal(test, control) 1533 1534 def test_dtype_with_converters(self): 1535 dstr = "2009; 23; 46" 1536 test = np.genfromtxt(TextIO(dstr,), 1537 delimiter=";", dtype=float, converters={0: bytes}) 1538 control = np.array([('2009', 23., 46)], 1539 dtype=[('f0', '|S4'), ('f1', float), ('f2', float)]) 1540 assert_equal(test, control) 1541 test = np.genfromtxt(TextIO(dstr,), 1542 delimiter=";", dtype=float, converters={0: float}) 1543 control = np.array([2009., 23., 46],) 1544 assert_equal(test, control) 1545 1546 def test_dtype_with_converters_and_usecols(self): 1547 dstr = "1,5,-1,1:1\n2,8,-1,1:n\n3,3,-2,m:n\n" 1548 dmap = {'1:1':0, '1:n':1, 'm:1':2, 'm:n':3} 1549 dtyp = [('e1','i4'),('e2','i4'),('e3','i2'),('n', 'i1')] 1550 conv = {0: int, 1: int, 2: int, 3: lambda r: dmap[r.decode()]} 1551 test = np.recfromcsv(TextIO(dstr,), dtype=dtyp, delimiter=',', 1552 names=None, converters=conv) 1553 control = np.rec.array([(1,5,-1,0), (2,8,-1,1), (3,3,-2,3)], dtype=dtyp) 1554 assert_equal(test, control) 1555 dtyp = [('e1','i4'),('e2','i4'),('n', 'i1')] 1556 test = np.recfromcsv(TextIO(dstr,), dtype=dtyp, delimiter=',', 1557 usecols=(0,1,3), names=None, converters=conv) 1558 control = np.rec.array([(1,5,0), (2,8,1), (3,3,3)], dtype=dtyp) 1559 assert_equal(test, control) 1560 1561 def test_dtype_with_object(self): 1562 # Test using an explicit dtype with an object 1563 data = """ 1; 2001-01-01 1564 2; 2002-01-31 """ 1565 ndtype = [('idx', int), ('code', object)] 1566 func = lambda s: strptime(s.strip(), "%Y-%m-%d") 1567 converters = {1: func} 1568 test = np.genfromtxt(TextIO(data), delimiter=";", dtype=ndtype, 1569 converters=converters) 1570 control = np.array( 1571 [(1, datetime(2001, 1, 1)), (2, datetime(2002, 1, 31))], 1572 dtype=ndtype) 1573 assert_equal(test, control) 1574 1575 ndtype = [('nest', [('idx', int), ('code', object)])] 1576 with assert_raises_regex(NotImplementedError, 1577 'Nested fields.* not supported.*'): 1578 test = np.genfromtxt(TextIO(data), delimiter=";", 1579 dtype=ndtype, converters=converters) 1580 1581 # nested but empty fields also aren't supported 1582 ndtype = [('idx', int), ('code', object), ('nest', [])] 1583 with assert_raises_regex(NotImplementedError, 1584 'Nested fields.* not supported.*'): 1585 test = np.genfromtxt(TextIO(data), delimiter=";", 1586 dtype=ndtype, converters=converters) 1587 1588 def test_dtype_with_object_no_converter(self): 1589 # Object without a converter uses bytes: 1590 parsed = np.genfromtxt(TextIO("1"), dtype=object) 1591 assert parsed[()] == b"1" 1592 parsed = np.genfromtxt(TextIO("string"), dtype=object) 1593 assert parsed[()] == b"string" 1594 1595 def test_userconverters_with_explicit_dtype(self): 1596 # Test user_converters w/ explicit (standard) dtype 1597 data = TextIO('skip,skip,2001-01-01,1.0,skip') 1598 test = np.genfromtxt(data, delimiter=",", names=None, dtype=float, 1599 usecols=(2, 3), converters={2: bytes}) 1600 control = np.array([('2001-01-01', 1.)], 1601 dtype=[('', '|S10'), ('', float)]) 1602 assert_equal(test, control) 1603 1604 def test_utf8_userconverters_with_explicit_dtype(self): 1605 utf8 = b'\xcf\x96' 1606 with temppath() as path: 1607 with open(path, 'wb') as f: 1608 f.write(b'skip,skip,2001-01-01' + utf8 + b',1.0,skip') 1609 test = np.genfromtxt(path, delimiter=",", names=None, dtype=float, 1610 usecols=(2, 3), converters={2: np.compat.unicode}, 1611 encoding='UTF-8') 1612 control = np.array([('2001-01-01' + utf8.decode('UTF-8'), 1.)], 1613 dtype=[('', '|U11'), ('', float)]) 1614 assert_equal(test, control) 1615 1616 def test_spacedelimiter(self): 1617 # Test space delimiter 1618 data = TextIO("1 2 3 4 5\n6 7 8 9 10") 1619 test = np.genfromtxt(data) 1620 control = np.array([[1., 2., 3., 4., 5.], 1621 [6., 7., 8., 9., 10.]]) 1622 assert_equal(test, control) 1623 1624 def test_integer_delimiter(self): 1625 # Test using an integer for delimiter 1626 data = " 1 2 3\n 4 5 67\n890123 4" 1627 test = np.genfromtxt(TextIO(data), delimiter=3) 1628 control = np.array([[1, 2, 3], [4, 5, 67], [890, 123, 4]]) 1629 assert_equal(test, control) 1630 1631 def test_missing(self): 1632 data = TextIO('1,2,3,,5\n') 1633 test = np.genfromtxt(data, dtype=int, delimiter=',', 1634 converters={3: lambda s: int(s or - 999)}) 1635 control = np.array([1, 2, 3, -999, 5], int) 1636 assert_equal(test, control) 1637 1638 def test_missing_with_tabs(self): 1639 # Test w/ a delimiter tab 1640 txt = "1\t2\t3\n\t2\t\n1\t\t3" 1641 test = np.genfromtxt(TextIO(txt), delimiter="\t", 1642 usemask=True,) 1643 ctrl_d = np.array([(1, 2, 3), (np.nan, 2, np.nan), (1, np.nan, 3)],) 1644 ctrl_m = np.array([(0, 0, 0), (1, 0, 1), (0, 1, 0)], dtype=bool) 1645 assert_equal(test.data, ctrl_d) 1646 assert_equal(test.mask, ctrl_m) 1647 1648 def test_usecols(self): 1649 # Test the selection of columns 1650 # Select 1 column 1651 control = np.array([[1, 2], [3, 4]], float) 1652 data = TextIO() 1653 np.savetxt(data, control) 1654 data.seek(0) 1655 test = np.genfromtxt(data, dtype=float, usecols=(1,)) 1656 assert_equal(test, control[:, 1]) 1657 # 1658 control = np.array([[1, 2, 3], [3, 4, 5]], float) 1659 data = TextIO() 1660 np.savetxt(data, control) 1661 data.seek(0) 1662 test = np.genfromtxt(data, dtype=float, usecols=(1, 2)) 1663 assert_equal(test, control[:, 1:]) 1664 # Testing with arrays instead of tuples. 1665 data.seek(0) 1666 test = np.genfromtxt(data, dtype=float, usecols=np.array([1, 2])) 1667 assert_equal(test, control[:, 1:]) 1668 1669 def test_usecols_as_css(self): 1670 # Test giving usecols with a comma-separated string 1671 data = "1 2 3\n4 5 6" 1672 test = np.genfromtxt(TextIO(data), 1673 names="a, b, c", usecols="a, c") 1674 ctrl = np.array([(1, 3), (4, 6)], dtype=[(_, float) for _ in "ac"]) 1675 assert_equal(test, ctrl) 1676 1677 def test_usecols_with_structured_dtype(self): 1678 # Test usecols with an explicit structured dtype 1679 data = TextIO("JOE 70.1 25.3\nBOB 60.5 27.9") 1680 names = ['stid', 'temp'] 1681 dtypes = ['S4', 'f8'] 1682 test = np.genfromtxt( 1683 data, usecols=(0, 2), dtype=list(zip(names, dtypes))) 1684 assert_equal(test['stid'], [b"JOE", b"BOB"]) 1685 assert_equal(test['temp'], [25.3, 27.9]) 1686 1687 def test_usecols_with_integer(self): 1688 # Test usecols with an integer 1689 test = np.genfromtxt(TextIO(b"1 2 3\n4 5 6"), usecols=0) 1690 assert_equal(test, np.array([1., 4.])) 1691 1692 def test_usecols_with_named_columns(self): 1693 # Test usecols with named columns 1694 ctrl = np.array([(1, 3), (4, 6)], dtype=[('a', float), ('c', float)]) 1695 data = "1 2 3\n4 5 6" 1696 kwargs = dict(names="a, b, c") 1697 test = np.genfromtxt(TextIO(data), usecols=(0, -1), **kwargs) 1698 assert_equal(test, ctrl) 1699 test = np.genfromtxt(TextIO(data), 1700 usecols=('a', 'c'), **kwargs) 1701 assert_equal(test, ctrl) 1702 1703 def test_empty_file(self): 1704 # Test that an empty file raises the proper warning. 1705 with suppress_warnings() as sup: 1706 sup.filter(message="genfromtxt: Empty input file:") 1707 data = TextIO() 1708 test = np.genfromtxt(data) 1709 assert_equal(test, np.array([])) 1710 1711 # when skip_header > 0 1712 test = np.genfromtxt(data, skip_header=1) 1713 assert_equal(test, np.array([])) 1714 1715 def test_fancy_dtype_alt(self): 1716 # Check that a nested dtype isn't MIA 1717 data = TextIO('1,2,3.0\n4,5,6.0\n') 1718 fancydtype = np.dtype([('x', int), ('y', [('t', int), ('s', float)])]) 1719 test = np.genfromtxt(data, dtype=fancydtype, delimiter=',', usemask=True) 1720 control = ma.array([(1, (2, 3.0)), (4, (5, 6.0))], dtype=fancydtype) 1721 assert_equal(test, control) 1722 1723 def test_shaped_dtype(self): 1724 c = TextIO("aaaa 1.0 8.0 1 2 3 4 5 6") 1725 dt = np.dtype([('name', 'S4'), ('x', float), ('y', float), 1726 ('block', int, (2, 3))]) 1727 x = np.genfromtxt(c, dtype=dt) 1728 a = np.array([('aaaa', 1.0, 8.0, [[1, 2, 3], [4, 5, 6]])], 1729 dtype=dt) 1730 assert_array_equal(x, a) 1731 1732 def test_withmissing(self): 1733 data = TextIO('A,B\n0,1\n2,N/A') 1734 kwargs = dict(delimiter=",", missing_values="N/A", names=True) 1735 test = np.genfromtxt(data, dtype=None, usemask=True, **kwargs) 1736 control = ma.array([(0, 1), (2, -1)], 1737 mask=[(False, False), (False, True)], 1738 dtype=[('A', int), ('B', int)]) 1739 assert_equal(test, control) 1740 assert_equal(test.mask, control.mask) 1741 # 1742 data.seek(0) 1743 test = np.genfromtxt(data, usemask=True, **kwargs) 1744 control = ma.array([(0, 1), (2, -1)], 1745 mask=[(False, False), (False, True)], 1746 dtype=[('A', float), ('B', float)]) 1747 assert_equal(test, control) 1748 assert_equal(test.mask, control.mask) 1749 1750 def test_user_missing_values(self): 1751 data = "A, B, C\n0, 0., 0j\n1, N/A, 1j\n-9, 2.2, N/A\n3, -99, 3j" 1752 basekwargs = dict(dtype=None, delimiter=",", names=True,) 1753 mdtype = [('A', int), ('B', float), ('C', complex)] 1754 # 1755 test = np.genfromtxt(TextIO(data), missing_values="N/A", 1756 **basekwargs) 1757 control = ma.array([(0, 0.0, 0j), (1, -999, 1j), 1758 (-9, 2.2, -999j), (3, -99, 3j)], 1759 mask=[(0, 0, 0), (0, 1, 0), (0, 0, 1), (0, 0, 0)], 1760 dtype=mdtype) 1761 assert_equal(test, control) 1762 # 1763 basekwargs['dtype'] = mdtype 1764 test = np.genfromtxt(TextIO(data), 1765 missing_values={0: -9, 1: -99, 2: -999j}, usemask=True, **basekwargs) 1766 control = ma.array([(0, 0.0, 0j), (1, -999, 1j), 1767 (-9, 2.2, -999j), (3, -99, 3j)], 1768 mask=[(0, 0, 0), (0, 1, 0), (1, 0, 1), (0, 1, 0)], 1769 dtype=mdtype) 1770 assert_equal(test, control) 1771 # 1772 test = np.genfromtxt(TextIO(data), 1773 missing_values={0: -9, 'B': -99, 'C': -999j}, 1774 usemask=True, 1775 **basekwargs) 1776 control = ma.array([(0, 0.0, 0j), (1, -999, 1j), 1777 (-9, 2.2, -999j), (3, -99, 3j)], 1778 mask=[(0, 0, 0), (0, 1, 0), (1, 0, 1), (0, 1, 0)], 1779 dtype=mdtype) 1780 assert_equal(test, control) 1781 1782 def test_user_filling_values(self): 1783 # Test with missing and filling values 1784 ctrl = np.array([(0, 3), (4, -999)], dtype=[('a', int), ('b', int)]) 1785 data = "N/A, 2, 3\n4, ,???" 1786 kwargs = dict(delimiter=",", 1787 dtype=int, 1788 names="a,b,c", 1789 missing_values={0: "N/A", 'b': " ", 2: "???"}, 1790 filling_values={0: 0, 'b': 0, 2: -999}) 1791 test = np.genfromtxt(TextIO(data), **kwargs) 1792 ctrl = np.array([(0, 2, 3), (4, 0, -999)], 1793 dtype=[(_, int) for _ in "abc"]) 1794 assert_equal(test, ctrl) 1795 # 1796 test = np.genfromtxt(TextIO(data), usecols=(0, -1), **kwargs) 1797 ctrl = np.array([(0, 3), (4, -999)], dtype=[(_, int) for _ in "ac"]) 1798 assert_equal(test, ctrl) 1799 1800 data2 = "1,2,*,4\n5,*,7,8\n" 1801 test = np.genfromtxt(TextIO(data2), delimiter=',', dtype=int, 1802 missing_values="*", filling_values=0) 1803 ctrl = np.array([[1, 2, 0, 4], [5, 0, 7, 8]]) 1804 assert_equal(test, ctrl) 1805 test = np.genfromtxt(TextIO(data2), delimiter=',', dtype=int, 1806 missing_values="*", filling_values=-1) 1807 ctrl = np.array([[1, 2, -1, 4], [5, -1, 7, 8]]) 1808 assert_equal(test, ctrl) 1809 1810 def test_withmissing_float(self): 1811 data = TextIO('A,B\n0,1.5\n2,-999.00') 1812 test = np.genfromtxt(data, dtype=None, delimiter=',', 1813 missing_values='-999.0', names=True, usemask=True) 1814 control = ma.array([(0, 1.5), (2, -1.)], 1815 mask=[(False, False), (False, True)], 1816 dtype=[('A', int), ('B', float)]) 1817 assert_equal(test, control) 1818 assert_equal(test.mask, control.mask) 1819 1820 def test_with_masked_column_uniform(self): 1821 # Test masked column 1822 data = TextIO('1 2 3\n4 5 6\n') 1823 test = np.genfromtxt(data, dtype=None, 1824 missing_values='2,5', usemask=True) 1825 control = ma.array([[1, 2, 3], [4, 5, 6]], mask=[[0, 1, 0], [0, 1, 0]]) 1826 assert_equal(test, control) 1827 1828 def test_with_masked_column_various(self): 1829 # Test masked column 1830 data = TextIO('True 2 3\nFalse 5 6\n') 1831 test = np.genfromtxt(data, dtype=None, 1832 missing_values='2,5', usemask=True) 1833 control = ma.array([(1, 2, 3), (0, 5, 6)], 1834 mask=[(0, 1, 0), (0, 1, 0)], 1835 dtype=[('f0', bool), ('f1', bool), ('f2', int)]) 1836 assert_equal(test, control) 1837 1838 def test_invalid_raise(self): 1839 # Test invalid raise 1840 data = ["1, 1, 1, 1, 1"] * 50 1841 for i in range(5): 1842 data[10 * i] = "2, 2, 2, 2 2" 1843 data.insert(0, "a, b, c, d, e") 1844 mdata = TextIO("\n".join(data)) 1845 1846 kwargs = dict(delimiter=",", dtype=None, names=True) 1847 def f(): 1848 return np.genfromtxt(mdata, invalid_raise=False, **kwargs) 1849 mtest = assert_warns(ConversionWarning, f) 1850 assert_equal(len(mtest), 45) 1851 assert_equal(mtest, np.ones(45, dtype=[(_, int) for _ in 'abcde'])) 1852 # 1853 mdata.seek(0) 1854 assert_raises(ValueError, np.genfromtxt, mdata, 1855 delimiter=",", names=True) 1856 1857 def test_invalid_raise_with_usecols(self): 1858 # Test invalid_raise with usecols 1859 data = ["1, 1, 1, 1, 1"] * 50 1860 for i in range(5): 1861 data[10 * i] = "2, 2, 2, 2 2" 1862 data.insert(0, "a, b, c, d, e") 1863 mdata = TextIO("\n".join(data)) 1864 1865 kwargs = dict(delimiter=",", dtype=None, names=True, 1866 invalid_raise=False) 1867 def f(): 1868 return np.genfromtxt(mdata, usecols=(0, 4), **kwargs) 1869 mtest = assert_warns(ConversionWarning, f) 1870 assert_equal(len(mtest), 45) 1871 assert_equal(mtest, np.ones(45, dtype=[(_, int) for _ in 'ae'])) 1872 # 1873 mdata.seek(0) 1874 mtest = np.genfromtxt(mdata, usecols=(0, 1), **kwargs) 1875 assert_equal(len(mtest), 50) 1876 control = np.ones(50, dtype=[(_, int) for _ in 'ab']) 1877 control[[10 * _ for _ in range(5)]] = (2, 2) 1878 assert_equal(mtest, control) 1879 1880 def test_inconsistent_dtype(self): 1881 # Test inconsistent dtype 1882 data = ["1, 1, 1, 1, -1.1"] * 50 1883 mdata = TextIO("\n".join(data)) 1884 1885 converters = {4: lambda x: "(%s)" % x.decode()} 1886 kwargs = dict(delimiter=",", converters=converters, 1887 dtype=[(_, int) for _ in 'abcde'],) 1888 assert_raises(ValueError, np.genfromtxt, mdata, **kwargs) 1889 1890 def test_default_field_format(self): 1891 # Test default format 1892 data = "0, 1, 2.3\n4, 5, 6.7" 1893 mtest = np.genfromtxt(TextIO(data), 1894 delimiter=",", dtype=None, defaultfmt="f%02i") 1895 ctrl = np.array([(0, 1, 2.3), (4, 5, 6.7)], 1896 dtype=[("f00", int), ("f01", int), ("f02", float)]) 1897 assert_equal(mtest, ctrl) 1898 1899 def test_single_dtype_wo_names(self): 1900 # Test single dtype w/o names 1901 data = "0, 1, 2.3\n4, 5, 6.7" 1902 mtest = np.genfromtxt(TextIO(data), 1903 delimiter=",", dtype=float, defaultfmt="f%02i") 1904 ctrl = np.array([[0., 1., 2.3], [4., 5., 6.7]], dtype=float) 1905 assert_equal(mtest, ctrl) 1906 1907 def test_single_dtype_w_explicit_names(self): 1908 # Test single dtype w explicit names 1909 data = "0, 1, 2.3\n4, 5, 6.7" 1910 mtest = np.genfromtxt(TextIO(data), 1911 delimiter=",", dtype=float, names="a, b, c") 1912 ctrl = np.array([(0., 1., 2.3), (4., 5., 6.7)], 1913 dtype=[(_, float) for _ in "abc"]) 1914 assert_equal(mtest, ctrl) 1915 1916 def test_single_dtype_w_implicit_names(self): 1917 # Test single dtype w implicit names 1918 data = "a, b, c\n0, 1, 2.3\n4, 5, 6.7" 1919 mtest = np.genfromtxt(TextIO(data), 1920 delimiter=",", dtype=float, names=True) 1921 ctrl = np.array([(0., 1., 2.3), (4., 5., 6.7)], 1922 dtype=[(_, float) for _ in "abc"]) 1923 assert_equal(mtest, ctrl) 1924 1925 def test_easy_structured_dtype(self): 1926 # Test easy structured dtype 1927 data = "0, 1, 2.3\n4, 5, 6.7" 1928 mtest = np.genfromtxt(TextIO(data), delimiter=",", 1929 dtype=(int, float, float), defaultfmt="f_%02i") 1930 ctrl = np.array([(0, 1., 2.3), (4, 5., 6.7)], 1931 dtype=[("f_00", int), ("f_01", float), ("f_02", float)]) 1932 assert_equal(mtest, ctrl) 1933 1934 def test_autostrip(self): 1935 # Test autostrip 1936 data = "01/01/2003 , 1.3, abcde" 1937 kwargs = dict(delimiter=",", dtype=None) 1938 with warnings.catch_warnings(record=True) as w: 1939 warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) 1940 mtest = np.genfromtxt(TextIO(data), **kwargs) 1941 assert_(w[0].category is np.VisibleDeprecationWarning) 1942 ctrl = np.array([('01/01/2003 ', 1.3, ' abcde')], 1943 dtype=[('f0', '|S12'), ('f1', float), ('f2', '|S8')]) 1944 assert_equal(mtest, ctrl) 1945 with warnings.catch_warnings(record=True) as w: 1946 warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) 1947 mtest = np.genfromtxt(TextIO(data), autostrip=True, **kwargs) 1948 assert_(w[0].category is np.VisibleDeprecationWarning) 1949 ctrl = np.array([('01/01/2003', 1.3, 'abcde')], 1950 dtype=[('f0', '|S10'), ('f1', float), ('f2', '|S5')]) 1951 assert_equal(mtest, ctrl) 1952 1953 def test_replace_space(self): 1954 # Test the 'replace_space' option 1955 txt = "A.A, B (B), C:C\n1, 2, 3.14" 1956 # Test default: replace ' ' by '_' and delete non-alphanum chars 1957 test = np.genfromtxt(TextIO(txt), 1958 delimiter=",", names=True, dtype=None) 1959 ctrl_dtype = [("AA", int), ("B_B", int), ("CC", float)] 1960 ctrl = np.array((1, 2, 3.14), dtype=ctrl_dtype) 1961 assert_equal(test, ctrl) 1962 # Test: no replace, no delete 1963 test = np.genfromtxt(TextIO(txt), 1964 delimiter=",", names=True, dtype=None, 1965 replace_space='', deletechars='') 1966 ctrl_dtype = [("A.A", int), ("B (B)", int), ("C:C", float)] 1967 ctrl = np.array((1, 2, 3.14), dtype=ctrl_dtype) 1968 assert_equal(test, ctrl) 1969 # Test: no delete (spaces are replaced by _) 1970 test = np.genfromtxt(TextIO(txt), 1971 delimiter=",", names=True, dtype=None, 1972 deletechars='') 1973 ctrl_dtype = [("A.A", int), ("B_(B)", int), ("C:C", float)] 1974 ctrl = np.array((1, 2, 3.14), dtype=ctrl_dtype) 1975 assert_equal(test, ctrl) 1976 1977 def test_replace_space_known_dtype(self): 1978 # Test the 'replace_space' (and related) options when dtype != None 1979 txt = "A.A, B (B), C:C\n1, 2, 3" 1980 # Test default: replace ' ' by '_' and delete non-alphanum chars 1981 test = np.genfromtxt(TextIO(txt), 1982 delimiter=",", names=True, dtype=int) 1983 ctrl_dtype = [("AA", int), ("B_B", int), ("CC", int)] 1984 ctrl = np.array((1, 2, 3), dtype=ctrl_dtype) 1985 assert_equal(test, ctrl) 1986 # Test: no replace, no delete 1987 test = np.genfromtxt(TextIO(txt), 1988 delimiter=",", names=True, dtype=int, 1989 replace_space='', deletechars='') 1990 ctrl_dtype = [("A.A", int), ("B (B)", int), ("C:C", int)] 1991 ctrl = np.array((1, 2, 3), dtype=ctrl_dtype) 1992 assert_equal(test, ctrl) 1993 # Test: no delete (spaces are replaced by _) 1994 test = np.genfromtxt(TextIO(txt), 1995 delimiter=",", names=True, dtype=int, 1996 deletechars='') 1997 ctrl_dtype = [("A.A", int), ("B_(B)", int), ("C:C", int)] 1998 ctrl = np.array((1, 2, 3), dtype=ctrl_dtype) 1999 assert_equal(test, ctrl) 2000 2001 def test_incomplete_names(self): 2002 # Test w/ incomplete names 2003 data = "A,,C\n0,1,2\n3,4,5" 2004 kwargs = dict(delimiter=",", names=True) 2005 # w/ dtype=None 2006 ctrl = np.array([(0, 1, 2), (3, 4, 5)], 2007 dtype=[(_, int) for _ in ('A', 'f0', 'C')]) 2008 test = np.genfromtxt(TextIO(data), dtype=None, **kwargs) 2009 assert_equal(test, ctrl) 2010 # w/ default dtype 2011 ctrl = np.array([(0, 1, 2), (3, 4, 5)], 2012 dtype=[(_, float) for _ in ('A', 'f0', 'C')]) 2013 test = np.genfromtxt(TextIO(data), **kwargs) 2014 2015 def test_names_auto_completion(self): 2016 # Make sure that names are properly completed 2017 data = "1 2 3\n 4 5 6" 2018 test = np.genfromtxt(TextIO(data), 2019 dtype=(int, float, int), names="a") 2020 ctrl = np.array([(1, 2, 3), (4, 5, 6)], 2021 dtype=[('a', int), ('f0', float), ('f1', int)]) 2022 assert_equal(test, ctrl) 2023 2024 def test_names_with_usecols_bug1636(self): 2025 # Make sure we pick up the right names w/ usecols 2026 data = "A,B,C,D,E\n0,1,2,3,4\n0,1,2,3,4\n0,1,2,3,4" 2027 ctrl_names = ("A", "C", "E") 2028 test = np.genfromtxt(TextIO(data), 2029 dtype=(int, int, int), delimiter=",", 2030 usecols=(0, 2, 4), names=True) 2031 assert_equal(test.dtype.names, ctrl_names) 2032 # 2033 test = np.genfromtxt(TextIO(data), 2034 dtype=(int, int, int), delimiter=",", 2035 usecols=("A", "C", "E"), names=True) 2036 assert_equal(test.dtype.names, ctrl_names) 2037 # 2038 test = np.genfromtxt(TextIO(data), 2039 dtype=int, delimiter=",", 2040 usecols=("A", "C", "E"), names=True) 2041 assert_equal(test.dtype.names, ctrl_names) 2042 2043 def test_fixed_width_names(self): 2044 # Test fix-width w/ names 2045 data = " A B C\n 0 1 2.3\n 45 67 9." 2046 kwargs = dict(delimiter=(5, 5, 4), names=True, dtype=None) 2047 ctrl = np.array([(0, 1, 2.3), (45, 67, 9.)], 2048 dtype=[('A', int), ('B', int), ('C', float)]) 2049 test = np.genfromtxt(TextIO(data), **kwargs) 2050 assert_equal(test, ctrl) 2051 # 2052 kwargs = dict(delimiter=5, names=True, dtype=None) 2053 ctrl = np.array([(0, 1, 2.3), (45, 67, 9.)], 2054 dtype=[('A', int), ('B', int), ('C', float)]) 2055 test = np.genfromtxt(TextIO(data), **kwargs) 2056 assert_equal(test, ctrl) 2057 2058 def test_filling_values(self): 2059 # Test missing values 2060 data = b"1, 2, 3\n1, , 5\n0, 6, \n" 2061 kwargs = dict(delimiter=",", dtype=None, filling_values=-999) 2062 ctrl = np.array([[1, 2, 3], [1, -999, 5], [0, 6, -999]], dtype=int) 2063 test = np.genfromtxt(TextIO(data), **kwargs) 2064 assert_equal(test, ctrl) 2065 2066 def test_comments_is_none(self): 2067 # Github issue 329 (None was previously being converted to 'None'). 2068 with warnings.catch_warnings(record=True) as w: 2069 warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) 2070 test = np.genfromtxt(TextIO("test1,testNonetherestofthedata"), 2071 dtype=None, comments=None, delimiter=',') 2072 assert_(w[0].category is np.VisibleDeprecationWarning) 2073 assert_equal(test[1], b'testNonetherestofthedata') 2074 with warnings.catch_warnings(record=True) as w: 2075 warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) 2076 test = np.genfromtxt(TextIO("test1, testNonetherestofthedata"), 2077 dtype=None, comments=None, delimiter=',') 2078 assert_(w[0].category is np.VisibleDeprecationWarning) 2079 assert_equal(test[1], b' testNonetherestofthedata') 2080 2081 def test_latin1(self): 2082 latin1 = b'\xf6\xfc\xf6' 2083 norm = b"norm1,norm2,norm3\n" 2084 enc = b"test1,testNonethe" + latin1 + b",test3\n" 2085 s = norm + enc + norm 2086 with warnings.catch_warnings(record=True) as w: 2087 warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) 2088 test = np.genfromtxt(TextIO(s), 2089 dtype=None, comments=None, delimiter=',') 2090 assert_(w[0].category is np.VisibleDeprecationWarning) 2091 assert_equal(test[1, 0], b"test1") 2092 assert_equal(test[1, 1], b"testNonethe" + latin1) 2093 assert_equal(test[1, 2], b"test3") 2094 test = np.genfromtxt(TextIO(s), 2095 dtype=None, comments=None, delimiter=',', 2096 encoding='latin1') 2097 assert_equal(test[1, 0], u"test1") 2098 assert_equal(test[1, 1], u"testNonethe" + latin1.decode('latin1')) 2099 assert_equal(test[1, 2], u"test3") 2100 2101 with warnings.catch_warnings(record=True) as w: 2102 warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) 2103 test = np.genfromtxt(TextIO(b"0,testNonethe" + latin1), 2104 dtype=None, comments=None, delimiter=',') 2105 assert_(w[0].category is np.VisibleDeprecationWarning) 2106 assert_equal(test['f0'], 0) 2107 assert_equal(test['f1'], b"testNonethe" + latin1) 2108 2109 def test_binary_decode_autodtype(self): 2110 utf16 = b'\xff\xfeh\x04 \x00i\x04 \x00j\x04' 2111 v = self.loadfunc(BytesIO(utf16), dtype=None, encoding='UTF-16') 2112 assert_array_equal(v, np.array(utf16.decode('UTF-16').split())) 2113 2114 def test_utf8_byte_encoding(self): 2115 utf8 = b"\xcf\x96" 2116 norm = b"norm1,norm2,norm3\n" 2117 enc = b"test1,testNonethe" + utf8 + b",test3\n" 2118 s = norm + enc + norm 2119 with warnings.catch_warnings(record=True) as w: 2120 warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) 2121 test = np.genfromtxt(TextIO(s), 2122 dtype=None, comments=None, delimiter=',') 2123 assert_(w[0].category is np.VisibleDeprecationWarning) 2124 ctl = np.array([ 2125 [b'norm1', b'norm2', b'norm3'], 2126 [b'test1', b'testNonethe' + utf8, b'test3'], 2127 [b'norm1', b'norm2', b'norm3']]) 2128 assert_array_equal(test, ctl) 2129 2130 def test_utf8_file(self): 2131 utf8 = b"\xcf\x96" 2132 with temppath() as path: 2133 with open(path, "wb") as f: 2134 f.write((b"test1,testNonethe" + utf8 + b",test3\n") * 2) 2135 test = np.genfromtxt(path, dtype=None, comments=None, 2136 delimiter=',', encoding="UTF-8") 2137 ctl = np.array([ 2138 ["test1", "testNonethe" + utf8.decode("UTF-8"), "test3"], 2139 ["test1", "testNonethe" + utf8.decode("UTF-8"), "test3"]], 2140 dtype=np.unicode_) 2141 assert_array_equal(test, ctl) 2142 2143 # test a mixed dtype 2144 with open(path, "wb") as f: 2145 f.write(b"0,testNonethe" + utf8) 2146 test = np.genfromtxt(path, dtype=None, comments=None, 2147 delimiter=',', encoding="UTF-8") 2148 assert_equal(test['f0'], 0) 2149 assert_equal(test['f1'], "testNonethe" + utf8.decode("UTF-8")) 2150 2151 def test_utf8_file_nodtype_unicode(self): 2152 # bytes encoding with non-latin1 -> unicode upcast 2153 utf8 = u'\u03d6' 2154 latin1 = u'\xf6\xfc\xf6' 2155 2156 # skip test if cannot encode utf8 test string with preferred 2157 # encoding. The preferred encoding is assumed to be the default 2158 # encoding of io.open. Will need to change this for PyTest, maybe 2159 # using pytest.mark.xfail(raises=***). 2160 try: 2161 encoding = locale.getpreferredencoding() 2162 utf8.encode(encoding) 2163 except (UnicodeError, ImportError): 2164 pytest.skip('Skipping test_utf8_file_nodtype_unicode, ' 2165 'unable to encode utf8 in preferred encoding') 2166 2167 with temppath() as path: 2168 with io.open(path, "wt") as f: 2169 f.write(u"norm1,norm2,norm3\n") 2170 f.write(u"norm1," + latin1 + u",norm3\n") 2171 f.write(u"test1,testNonethe" + utf8 + u",test3\n") 2172 with warnings.catch_warnings(record=True) as w: 2173 warnings.filterwarnings('always', '', 2174 np.VisibleDeprecationWarning) 2175 test = np.genfromtxt(path, dtype=None, comments=None, 2176 delimiter=',') 2177 # Check for warning when encoding not specified. 2178 assert_(w[0].category is np.VisibleDeprecationWarning) 2179 ctl = np.array([ 2180 ["norm1", "norm2", "norm3"], 2181 ["norm1", latin1, "norm3"], 2182 ["test1", "testNonethe" + utf8, "test3"]], 2183 dtype=np.unicode_) 2184 assert_array_equal(test, ctl) 2185 2186 def test_recfromtxt(self): 2187 # 2188 data = TextIO('A,B\n0,1\n2,3') 2189 kwargs = dict(delimiter=",", missing_values="N/A", names=True) 2190 test = np.recfromtxt(data, **kwargs) 2191 control = np.array([(0, 1), (2, 3)], 2192 dtype=[('A', int), ('B', int)]) 2193 assert_(isinstance(test, np.recarray)) 2194 assert_equal(test, control) 2195 # 2196 data = TextIO('A,B\n0,1\n2,N/A') 2197 test = np.recfromtxt(data, dtype=None, usemask=True, **kwargs) 2198 control = ma.array([(0, 1), (2, -1)], 2199 mask=[(False, False), (False, True)], 2200 dtype=[('A', int), ('B', int)]) 2201 assert_equal(test, control) 2202 assert_equal(test.mask, control.mask) 2203 assert_equal(test.A, [0, 2]) 2204 2205 def test_recfromcsv(self): 2206 # 2207 data = TextIO('A,B\n0,1\n2,3') 2208 kwargs = dict(missing_values="N/A", names=True, case_sensitive=True) 2209 test = np.recfromcsv(data, dtype=None, **kwargs) 2210 control = np.array([(0, 1), (2, 3)], 2211 dtype=[('A', int), ('B', int)]) 2212 assert_(isinstance(test, np.recarray)) 2213 assert_equal(test, control) 2214 # 2215 data = TextIO('A,B\n0,1\n2,N/A') 2216 test = np.recfromcsv(data, dtype=None, usemask=True, **kwargs) 2217 control = ma.array([(0, 1), (2, -1)], 2218 mask=[(False, False), (False, True)], 2219 dtype=[('A', int), ('B', int)]) 2220 assert_equal(test, control) 2221 assert_equal(test.mask, control.mask) 2222 assert_equal(test.A, [0, 2]) 2223 # 2224 data = TextIO('A,B\n0,1\n2,3') 2225 test = np.recfromcsv(data, missing_values='N/A',) 2226 control = np.array([(0, 1), (2, 3)], 2227 dtype=[('a', int), ('b', int)]) 2228 assert_(isinstance(test, np.recarray)) 2229 assert_equal(test, control) 2230 # 2231 data = TextIO('A,B\n0,1\n2,3') 2232 dtype = [('a', int), ('b', float)] 2233 test = np.recfromcsv(data, missing_values='N/A', dtype=dtype) 2234 control = np.array([(0, 1), (2, 3)], 2235 dtype=dtype) 2236 assert_(isinstance(test, np.recarray)) 2237 assert_equal(test, control) 2238 2239 #gh-10394 2240 data = TextIO('color\n"red"\n"blue"') 2241 test = np.recfromcsv(data, converters={0: lambda x: x.strip(b'\"')}) 2242 control = np.array([('red',), ('blue',)], dtype=[('color', (bytes, 4))]) 2243 assert_equal(test.dtype, control.dtype) 2244 assert_equal(test, control) 2245 2246 def test_max_rows(self): 2247 # Test the `max_rows` keyword argument. 2248 data = '1 2\n3 4\n5 6\n7 8\n9 10\n' 2249 txt = TextIO(data) 2250 a1 = np.genfromtxt(txt, max_rows=3) 2251 a2 = np.genfromtxt(txt) 2252 assert_equal(a1, [[1, 2], [3, 4], [5, 6]]) 2253 assert_equal(a2, [[7, 8], [9, 10]]) 2254 2255 # max_rows must be at least 1. 2256 assert_raises(ValueError, np.genfromtxt, TextIO(data), max_rows=0) 2257 2258 # An input with several invalid rows. 2259 data = '1 1\n2 2\n0 \n3 3\n4 4\n5 \n6 \n7 \n' 2260 2261 test = np.genfromtxt(TextIO(data), max_rows=2) 2262 control = np.array([[1., 1.], [2., 2.]]) 2263 assert_equal(test, control) 2264 2265 # Test keywords conflict 2266 assert_raises(ValueError, np.genfromtxt, TextIO(data), skip_footer=1, 2267 max_rows=4) 2268 2269 # Test with invalid value 2270 assert_raises(ValueError, np.genfromtxt, TextIO(data), max_rows=4) 2271 2272 # Test with invalid not raise 2273 with suppress_warnings() as sup: 2274 sup.filter(ConversionWarning) 2275 2276 test = np.genfromtxt(TextIO(data), max_rows=4, invalid_raise=False) 2277 control = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]]) 2278 assert_equal(test, control) 2279 2280 test = np.genfromtxt(TextIO(data), max_rows=5, invalid_raise=False) 2281 control = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]]) 2282 assert_equal(test, control) 2283 2284 # Structured array with field names. 2285 data = 'a b\n#c d\n1 1\n2 2\n#0 \n3 3\n4 4\n5 5\n' 2286 2287 # Test with header, names and comments 2288 txt = TextIO(data) 2289 test = np.genfromtxt(txt, skip_header=1, max_rows=3, names=True) 2290 control = np.array([(1.0, 1.0), (2.0, 2.0), (3.0, 3.0)], 2291 dtype=[('c', '<f8'), ('d', '<f8')]) 2292 assert_equal(test, control) 2293 # To continue reading the same "file", don't use skip_header or 2294 # names, and use the previously determined dtype. 2295 test = np.genfromtxt(txt, max_rows=None, dtype=test.dtype) 2296 control = np.array([(4.0, 4.0), (5.0, 5.0)], 2297 dtype=[('c', '<f8'), ('d', '<f8')]) 2298 assert_equal(test, control) 2299 2300 def test_gft_using_filename(self): 2301 # Test that we can load data from a filename as well as a file 2302 # object 2303 tgt = np.arange(6).reshape((2, 3)) 2304 linesep = ('\n', '\r\n', '\r') 2305 2306 for sep in linesep: 2307 data = '0 1 2' + sep + '3 4 5' 2308 with temppath() as name: 2309 with open(name, 'w') as f: 2310 f.write(data) 2311 res = np.genfromtxt(name) 2312 assert_array_equal(res, tgt) 2313 2314 def test_gft_from_gzip(self): 2315 # Test that we can load data from a gzipped file 2316 wanted = np.arange(6).reshape((2, 3)) 2317 linesep = ('\n', '\r\n', '\r') 2318 2319 for sep in linesep: 2320 data = '0 1 2' + sep + '3 4 5' 2321 s = BytesIO() 2322 with gzip.GzipFile(fileobj=s, mode='w') as g: 2323 g.write(asbytes(data)) 2324 2325 with temppath(suffix='.gz2') as name: 2326 with open(name, 'w') as f: 2327 f.write(data) 2328 assert_array_equal(np.genfromtxt(name), wanted) 2329 2330 def test_gft_using_generator(self): 2331 # gft doesn't work with unicode. 2332 def count(): 2333 for i in range(10): 2334 yield asbytes("%d" % i) 2335 2336 res = np.genfromtxt(count()) 2337 assert_array_equal(res, np.arange(10)) 2338 2339 def test_auto_dtype_largeint(self): 2340 # Regression test for numpy/numpy#5635 whereby large integers could 2341 # cause OverflowErrors. 2342 2343 # Test the automatic definition of the output dtype 2344 # 2345 # 2**66 = 73786976294838206464 => should convert to float 2346 # 2**34 = 17179869184 => should convert to int64 2347 # 2**10 = 1024 => should convert to int (int32 on 32-bit systems, 2348 # int64 on 64-bit systems) 2349 2350 data = TextIO('73786976294838206464 17179869184 1024') 2351 2352 test = np.genfromtxt(data, dtype=None) 2353 2354 assert_equal(test.dtype.names, ['f0', 'f1', 'f2']) 2355 2356 assert_(test.dtype['f0'] == float) 2357 assert_(test.dtype['f1'] == np.int64) 2358 assert_(test.dtype['f2'] == np.int_) 2359 2360 assert_allclose(test['f0'], 73786976294838206464.) 2361 assert_equal(test['f1'], 17179869184) 2362 assert_equal(test['f2'], 1024) 2363 2364 def test_unpack_structured(self): 2365 # Regression test for gh-4341 2366 # Unpacking should work on structured arrays 2367 txt = TextIO("M 21 72\nF 35 58") 2368 dt = {'names': ('a', 'b', 'c'), 'formats': ('S1', 'i4', 'f4')} 2369 a, b, c = np.genfromtxt(txt, dtype=dt, unpack=True) 2370 assert_equal(a.dtype, np.dtype('S1')) 2371 assert_equal(b.dtype, np.dtype('i4')) 2372 assert_equal(c.dtype, np.dtype('f4')) 2373 assert_array_equal(a, np.array([b'M', b'F'])) 2374 assert_array_equal(b, np.array([21, 35])) 2375 assert_array_equal(c, np.array([72., 58.])) 2376 2377 def test_unpack_auto_dtype(self): 2378 # Regression test for gh-4341 2379 # Unpacking should work when dtype=None 2380 txt = TextIO("M 21 72.\nF 35 58.") 2381 expected = (np.array(["M", "F"]), np.array([21, 35]), np.array([72., 58.])) 2382 test = np.genfromtxt(txt, dtype=None, unpack=True, encoding="utf-8") 2383 for arr, result in zip(expected, test): 2384 assert_array_equal(arr, result) 2385 assert_equal(arr.dtype, result.dtype) 2386 2387 def test_unpack_single_name(self): 2388 # Regression test for gh-4341 2389 # Unpacking should work when structured dtype has only one field 2390 txt = TextIO("21\n35") 2391 dt = {'names': ('a',), 'formats': ('i4',)} 2392 expected = np.array([21, 35], dtype=np.int32) 2393 test = np.genfromtxt(txt, dtype=dt, unpack=True) 2394 assert_array_equal(expected, test) 2395 assert_equal(expected.dtype, test.dtype) 2396 2397 def test_squeeze_scalar(self): 2398 # Regression test for gh-4341 2399 # Unpacking a scalar should give zero-dim output, 2400 # even if dtype is structured 2401 txt = TextIO("1") 2402 dt = {'names': ('a',), 'formats': ('i4',)} 2403 expected = np.array((1,), dtype=np.int32) 2404 test = np.genfromtxt(txt, dtype=dt, unpack=True) 2405 assert_array_equal(expected, test) 2406 assert_equal((), test.shape) 2407 assert_equal(expected.dtype, test.dtype) 2408 2409 2410class TestPathUsage: 2411 # Test that pathlib.Path can be used 2412 def test_loadtxt(self): 2413 with temppath(suffix='.txt') as path: 2414 path = Path(path) 2415 a = np.array([[1.1, 2], [3, 4]]) 2416 np.savetxt(path, a) 2417 x = np.loadtxt(path) 2418 assert_array_equal(x, a) 2419 2420 def test_save_load(self): 2421 # Test that pathlib.Path instances can be used with save. 2422 with temppath(suffix='.npy') as path: 2423 path = Path(path) 2424 a = np.array([[1, 2], [3, 4]], int) 2425 np.save(path, a) 2426 data = np.load(path) 2427 assert_array_equal(data, a) 2428 2429 def test_save_load_memmap(self): 2430 # Test that pathlib.Path instances can be loaded mem-mapped. 2431 with temppath(suffix='.npy') as path: 2432 path = Path(path) 2433 a = np.array([[1, 2], [3, 4]], int) 2434 np.save(path, a) 2435 data = np.load(path, mmap_mode='r') 2436 assert_array_equal(data, a) 2437 # close the mem-mapped file 2438 del data 2439 if IS_PYPY: 2440 break_cycles() 2441 break_cycles() 2442 2443 def test_save_load_memmap_readwrite(self): 2444 # Test that pathlib.Path instances can be written mem-mapped. 2445 with temppath(suffix='.npy') as path: 2446 path = Path(path) 2447 a = np.array([[1, 2], [3, 4]], int) 2448 np.save(path, a) 2449 b = np.load(path, mmap_mode='r+') 2450 a[0][0] = 5 2451 b[0][0] = 5 2452 del b # closes the file 2453 if IS_PYPY: 2454 break_cycles() 2455 break_cycles() 2456 data = np.load(path) 2457 assert_array_equal(data, a) 2458 2459 def test_savez_load(self): 2460 # Test that pathlib.Path instances can be used with savez. 2461 with temppath(suffix='.npz') as path: 2462 path = Path(path) 2463 np.savez(path, lab='place holder') 2464 with np.load(path) as data: 2465 assert_array_equal(data['lab'], 'place holder') 2466 2467 def test_savez_compressed_load(self): 2468 # Test that pathlib.Path instances can be used with savez. 2469 with temppath(suffix='.npz') as path: 2470 path = Path(path) 2471 np.savez_compressed(path, lab='place holder') 2472 data = np.load(path) 2473 assert_array_equal(data['lab'], 'place holder') 2474 data.close() 2475 2476 def test_genfromtxt(self): 2477 with temppath(suffix='.txt') as path: 2478 path = Path(path) 2479 a = np.array([(1, 2), (3, 4)]) 2480 np.savetxt(path, a) 2481 data = np.genfromtxt(path) 2482 assert_array_equal(a, data) 2483 2484 def test_ndfromtxt(self): 2485 # Test outputting a standard ndarray 2486 with temppath(suffix='.txt') as path: 2487 path = Path(path) 2488 with path.open('w') as f: 2489 f.write(u'1 2\n3 4') 2490 2491 control = np.array([[1, 2], [3, 4]], dtype=int) 2492 test = np.genfromtxt(path, dtype=int) 2493 assert_array_equal(test, control) 2494 2495 def test_mafromtxt(self): 2496 # From `test_fancy_dtype_alt` above 2497 with temppath(suffix='.txt') as path: 2498 path = Path(path) 2499 with path.open('w') as f: 2500 f.write(u'1,2,3.0\n4,5,6.0\n') 2501 2502 test = np.genfromtxt(path, delimiter=',', usemask=True) 2503 control = ma.array([(1.0, 2.0, 3.0), (4.0, 5.0, 6.0)]) 2504 assert_equal(test, control) 2505 2506 def test_recfromtxt(self): 2507 with temppath(suffix='.txt') as path: 2508 path = Path(path) 2509 with path.open('w') as f: 2510 f.write(u'A,B\n0,1\n2,3') 2511 2512 kwargs = dict(delimiter=",", missing_values="N/A", names=True) 2513 test = np.recfromtxt(path, **kwargs) 2514 control = np.array([(0, 1), (2, 3)], 2515 dtype=[('A', int), ('B', int)]) 2516 assert_(isinstance(test, np.recarray)) 2517 assert_equal(test, control) 2518 2519 def test_recfromcsv(self): 2520 with temppath(suffix='.txt') as path: 2521 path = Path(path) 2522 with path.open('w') as f: 2523 f.write(u'A,B\n0,1\n2,3') 2524 2525 kwargs = dict(missing_values="N/A", names=True, case_sensitive=True) 2526 test = np.recfromcsv(path, dtype=None, **kwargs) 2527 control = np.array([(0, 1), (2, 3)], 2528 dtype=[('A', int), ('B', int)]) 2529 assert_(isinstance(test, np.recarray)) 2530 assert_equal(test, control) 2531 2532 2533def test_gzip_load(): 2534 a = np.random.random((5, 5)) 2535 2536 s = BytesIO() 2537 f = gzip.GzipFile(fileobj=s, mode="w") 2538 2539 np.save(f, a) 2540 f.close() 2541 s.seek(0) 2542 2543 f = gzip.GzipFile(fileobj=s, mode="r") 2544 assert_array_equal(np.load(f), a) 2545 2546 2547# These next two classes encode the minimal API needed to save()/load() arrays. 2548# The `test_ducktyping` ensures they work correctly 2549class JustWriter: 2550 def __init__(self, base): 2551 self.base = base 2552 2553 def write(self, s): 2554 return self.base.write(s) 2555 2556 def flush(self): 2557 return self.base.flush() 2558 2559class JustReader: 2560 def __init__(self, base): 2561 self.base = base 2562 2563 def read(self, n): 2564 return self.base.read(n) 2565 2566 def seek(self, off, whence=0): 2567 return self.base.seek(off, whence) 2568 2569 2570def test_ducktyping(): 2571 a = np.random.random((5, 5)) 2572 2573 s = BytesIO() 2574 f = JustWriter(s) 2575 2576 np.save(f, a) 2577 f.flush() 2578 s.seek(0) 2579 2580 f = JustReader(s) 2581 assert_array_equal(np.load(f), a) 2582 2583 2584 2585def test_gzip_loadtxt(): 2586 # Thanks to another windows brokenness, we can't use 2587 # NamedTemporaryFile: a file created from this function cannot be 2588 # reopened by another open call. So we first put the gzipped string 2589 # of the test reference array, write it to a securely opened file, 2590 # which is then read from by the loadtxt function 2591 s = BytesIO() 2592 g = gzip.GzipFile(fileobj=s, mode='w') 2593 g.write(b'1 2 3\n') 2594 g.close() 2595 2596 s.seek(0) 2597 with temppath(suffix='.gz') as name: 2598 with open(name, 'wb') as f: 2599 f.write(s.read()) 2600 res = np.loadtxt(name) 2601 s.close() 2602 2603 assert_array_equal(res, [1, 2, 3]) 2604 2605 2606def test_gzip_loadtxt_from_string(): 2607 s = BytesIO() 2608 f = gzip.GzipFile(fileobj=s, mode="w") 2609 f.write(b'1 2 3\n') 2610 f.close() 2611 s.seek(0) 2612 2613 f = gzip.GzipFile(fileobj=s, mode="r") 2614 assert_array_equal(np.loadtxt(f), [1, 2, 3]) 2615 2616 2617def test_npzfile_dict(): 2618 s = BytesIO() 2619 x = np.zeros((3, 3)) 2620 y = np.zeros((3, 3)) 2621 2622 np.savez(s, x=x, y=y) 2623 s.seek(0) 2624 2625 z = np.load(s) 2626 2627 assert_('x' in z) 2628 assert_('y' in z) 2629 assert_('x' in z.keys()) 2630 assert_('y' in z.keys()) 2631 2632 for f, a in z.items(): 2633 assert_(f in ['x', 'y']) 2634 assert_equal(a.shape, (3, 3)) 2635 2636 assert_(len(z.items()) == 2) 2637 2638 for f in z: 2639 assert_(f in ['x', 'y']) 2640 2641 assert_('x' in z.keys()) 2642 2643 2644@pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts") 2645def test_load_refcount(): 2646 # Check that objects returned by np.load are directly freed based on 2647 # their refcount, rather than needing the gc to collect them. 2648 2649 f = BytesIO() 2650 np.savez(f, [1, 2, 3]) 2651 f.seek(0) 2652 2653 with assert_no_gc_cycles(): 2654 np.load(f) 2655 2656 f.seek(0) 2657 dt = [("a", 'u1', 2), ("b", 'u1', 2)] 2658 with assert_no_gc_cycles(): 2659 x = np.loadtxt(TextIO("0 1 2 3"), dtype=dt) 2660 assert_equal(x, np.array([((0, 1), (2, 3))], dtype=dt)) 2661