1# This file is part of h5py, a Python interface to the HDF5 library. 2# 3# http://www.h5py.org 4# 5# Copyright 2008-2013 Andrew Collette and contributors 6# 7# License: Standard 3-clause BSD; see "license.txt" for full license terms 8# and contributor agreement. 9 10""" 11 Dataset testing operations. 12 13 Tests all dataset operations, including creation, with the exception of: 14 15 1. Slicing operations for read and write, handled by module test_slicing 16 2. Type conversion for read and write (currently untested) 17""" 18 19import pathlib 20import sys 21import numpy as np 22import platform 23import pytest 24import warnings 25 26from .common import ut, TestCase 27from .data_files import get_data_file_path 28from h5py import File, Group, Dataset 29from h5py._hl.base import is_empty_dataspace 30from h5py import h5f, h5t 31from h5py.h5py_warnings import H5pyDeprecationWarning 32import h5py 33import h5py._hl.selections as sel 34 35 36class BaseDataset(TestCase): 37 def setUp(self): 38 self.f = File(self.mktemp(), 'w') 39 40 def tearDown(self): 41 if self.f: 42 self.f.close() 43 44 45class TestRepr(BaseDataset): 46 """ 47 Feature: repr(Dataset) behaves sensibly 48 """ 49 50 def test_repr_open(self): 51 """ repr() works on live and dead datasets """ 52 ds = self.f.create_dataset('foo', (4,)) 53 self.assertIsInstance(repr(ds), str) 54 self.f.close() 55 self.assertIsInstance(repr(ds), str) 56 57 58class TestCreateShape(BaseDataset): 59 60 """ 61 Feature: Datasets can be created from a shape only 62 """ 63 64 def test_create_scalar(self): 65 """ Create a scalar dataset """ 66 dset = self.f.create_dataset('foo', ()) 67 self.assertEqual(dset.shape, ()) 68 69 def test_create_simple(self): 70 """ Create a size-1 dataset """ 71 dset = self.f.create_dataset('foo', (1,)) 72 self.assertEqual(dset.shape, (1,)) 73 74 def test_create_integer(self): 75 """ Create a size-1 dataset with integer shape""" 76 dset = self.f.create_dataset('foo', 1) 77 self.assertEqual(dset.shape, (1,)) 78 79 def test_create_extended(self): 80 """ Create an extended dataset """ 81 dset = self.f.create_dataset('foo', (63,)) 82 self.assertEqual(dset.shape, (63,)) 83 self.assertEqual(dset.size, 63) 84 dset = self.f.create_dataset('bar', (6, 10)) 85 self.assertEqual(dset.shape, (6, 10)) 86 self.assertEqual(dset.size, (60)) 87 88 def test_create_integer_extended(self): 89 """ Create an extended dataset """ 90 dset = self.f.create_dataset('foo', 63) 91 self.assertEqual(dset.shape, (63,)) 92 self.assertEqual(dset.size, 63) 93 dset = self.f.create_dataset('bar', (6, 10)) 94 self.assertEqual(dset.shape, (6, 10)) 95 self.assertEqual(dset.size, (60)) 96 97 def test_default_dtype(self): 98 """ Confirm that the default dtype is float """ 99 dset = self.f.create_dataset('foo', (63,)) 100 self.assertEqual(dset.dtype, np.dtype('=f4')) 101 102 def test_missing_shape(self): 103 """ Missing shape raises TypeError """ 104 with self.assertRaises(TypeError): 105 self.f.create_dataset('foo') 106 107 def test_long_double(self): 108 """ Confirm that the default dtype is float """ 109 dset = self.f.create_dataset('foo', (63,), dtype=np.longdouble) 110 if platform.machine() in ['ppc64le']: 111 pytest.xfail("Storage of long double deactivated on %s" % platform.machine()) 112 self.assertEqual(dset.dtype, np.longdouble) 113 114 @ut.skipIf(not hasattr(np, "complex256"), "No support for complex256") 115 def test_complex256(self): 116 """ Confirm that the default dtype is float """ 117 dset = self.f.create_dataset('foo', (63,), 118 dtype=np.dtype('complex256')) 119 self.assertEqual(dset.dtype, np.dtype('complex256')) 120 121 def test_name_bytes(self): 122 dset = self.f.create_dataset(b'foo', (1,)) 123 self.assertEqual(dset.shape, (1,)) 124 125 dset2 = self.f.create_dataset(b'bar/baz', (2,)) 126 self.assertEqual(dset2.shape, (2,)) 127 128class TestCreateData(BaseDataset): 129 130 """ 131 Feature: Datasets can be created from existing data 132 """ 133 134 def test_create_scalar(self): 135 """ Create a scalar dataset from existing array """ 136 data = np.ones((), 'f') 137 dset = self.f.create_dataset('foo', data=data) 138 self.assertEqual(dset.shape, data.shape) 139 140 def test_create_extended(self): 141 """ Create an extended dataset from existing data """ 142 data = np.ones((63,), 'f') 143 dset = self.f.create_dataset('foo', data=data) 144 self.assertEqual(dset.shape, data.shape) 145 146 def test_dataset_intermediate_group(self): 147 """ Create dataset with missing intermediate groups """ 148 ds = self.f.create_dataset("/foo/bar/baz", shape=(10, 10), dtype='<i4') 149 self.assertIsInstance(ds, h5py.Dataset) 150 self.assertTrue("/foo/bar/baz" in self.f) 151 152 def test_reshape(self): 153 """ Create from existing data, and make it fit a new shape """ 154 data = np.arange(30, dtype='f') 155 dset = self.f.create_dataset('foo', shape=(10, 3), data=data) 156 self.assertEqual(dset.shape, (10, 3)) 157 self.assertArrayEqual(dset[...], data.reshape((10, 3))) 158 159 def test_appropriate_low_level_id(self): 160 " Binding Dataset to a non-DatasetID identifier fails with ValueError " 161 with self.assertRaises(ValueError): 162 Dataset(self.f['/'].id) 163 164 def check_h5_string(self, dset, cset, length): 165 tid = dset.id.get_type() 166 assert isinstance(tid, h5t.TypeStringID) 167 assert tid.get_cset() == cset 168 if length is None: 169 assert tid.is_variable_str() 170 else: 171 assert not tid.is_variable_str() 172 assert tid.get_size() == length 173 174 def test_create_bytestring(self): 175 """ Creating dataset with byte string yields vlen ASCII dataset """ 176 def check_vlen_ascii(dset): 177 self.check_h5_string(dset, h5t.CSET_ASCII, length=None) 178 check_vlen_ascii(self.f.create_dataset('a', data=b'abc')) 179 check_vlen_ascii(self.f.create_dataset('b', data=[b'abc', b'def'])) 180 check_vlen_ascii(self.f.create_dataset('c', data=[[b'abc'], [b'def']])) 181 check_vlen_ascii(self.f.create_dataset( 182 'd', data=np.array([b'abc', b'def'], dtype=object) 183 )) 184 185 def test_create_np_s(self): 186 dset = self.f.create_dataset('a', data=np.array([b'abc', b'def'], dtype='S3')) 187 self.check_h5_string(dset, h5t.CSET_ASCII, length=3) 188 189 def test_create_strings(self): 190 def check_vlen_utf8(dset): 191 self.check_h5_string(dset, h5t.CSET_UTF8, length=None) 192 check_vlen_utf8(self.f.create_dataset('a', data='abc')) 193 check_vlen_utf8(self.f.create_dataset('b', data=['abc', 'def'])) 194 check_vlen_utf8(self.f.create_dataset('c', data=[['abc'], ['def']])) 195 check_vlen_utf8(self.f.create_dataset( 196 'd', data=np.array(['abc', 'def'], dtype=object) 197 )) 198 199 def test_create_np_u(self): 200 with self.assertRaises(TypeError): 201 self.f.create_dataset('a', data=np.array([b'abc', b'def'], dtype='U3')) 202 203 def test_empty_create_via_None_shape(self): 204 self.f.create_dataset('foo', dtype='f') 205 self.assertTrue(is_empty_dataspace(self.f['foo'].id)) 206 207 def test_empty_create_via_Empty_class(self): 208 self.f.create_dataset('foo', data=h5py.Empty(dtype='f')) 209 self.assertTrue(is_empty_dataspace(self.f['foo'].id)) 210 211 def test_create_incompatible_data(self): 212 # Shape tuple is incompatible with data 213 with self.assertRaises(ValueError): 214 self.f.create_dataset('bar', shape=4, data= np.arange(3)) 215 216 217class TestReadDirectly: 218 219 """ 220 Feature: Read data directly from Dataset into a Numpy array 221 """ 222 223 @pytest.mark.parametrize( 224 'source_shape,dest_shape,source_sel,dest_sel', 225 [ 226 ((100,), (100,), np.s_[0:10], np.s_[50:60]), 227 ((70,), (100,), np.s_[50:60], np.s_[90:]), 228 ((30, 10), (20, 20), np.s_[:20, :], np.s_[:, :10]), 229 ((5, 7, 9), (6,), np.s_[2, :6, 3], np.s_[:]), 230 ]) 231 def test_read_direct(self, writable_file, source_shape, dest_shape, source_sel, dest_sel): 232 source_values = np.arange(np.product(source_shape), dtype="int64").reshape(source_shape) 233 dset = writable_file.create_dataset("dset", source_shape, data=source_values) 234 arr = np.full(dest_shape, -1, dtype="int64") 235 expected = arr.copy() 236 expected[dest_sel] = source_values[source_sel] 237 238 dset.read_direct(arr, source_sel, dest_sel) 239 np.testing.assert_array_equal(arr, expected) 240 241 def test_no_sel(self, writable_file): 242 dset = writable_file.create_dataset("dset", (10,), data=np.arange(10, dtype="int64")) 243 arr = np.ones((10,), dtype="int64") 244 dset.read_direct(arr) 245 np.testing.assert_array_equal(arr, np.arange(10, dtype="int64")) 246 247 def test_empty(self, writable_file): 248 empty_dset = writable_file.create_dataset("edset", dtype='int64') 249 arr = np.ones((100,), 'int64') 250 with pytest.raises(TypeError): 251 empty_dset.read_direct(arr, np.s_[0:10], np.s_[50:60]) 252 253 def test_wrong_shape(self, writable_file): 254 dset = writable_file.create_dataset("dset", (100,), dtype='int64') 255 arr = np.ones((200,)) 256 with pytest.raises(TypeError): 257 dset.read_direct(arr) 258 259 def test_not_c_contiguous(self, writable_file): 260 dset = writable_file.create_dataset("dset", (10, 10), dtype='int64') 261 arr = np.ones((10, 10), order='F') 262 with pytest.raises(TypeError): 263 dset.read_direct(arr) 264 265class TestWriteDirectly: 266 267 """ 268 Feature: Write Numpy array directly into Dataset 269 """ 270 271 @pytest.mark.parametrize( 272 'source_shape,dest_shape,source_sel,dest_sel', 273 [ 274 ((100,), (100,), np.s_[0:10], np.s_[50:60]), 275 ((70,), (100,), np.s_[50:60], np.s_[90:]), 276 ((30, 10), (20, 20), np.s_[:20, :], np.s_[:, :10]), 277 ((5, 7, 9), (6,), np.s_[2, :6, 3], np.s_[:]), 278 ]) 279 def test_write_direct(self, writable_file, source_shape, dest_shape, source_sel, dest_sel): 280 dset = writable_file.create_dataset('dset', dest_shape, dtype='int32', fillvalue=-1) 281 arr = np.arange(np.product(source_shape)).reshape(source_shape) 282 expected = np.full(dest_shape, -1, dtype='int32') 283 expected[dest_sel] = arr[source_sel] 284 dset.write_direct(arr, source_sel, dest_sel) 285 np.testing.assert_array_equal(dset[:], expected) 286 287 def test_empty(self, writable_file): 288 empty_dset = writable_file.create_dataset("edset", dtype='int64') 289 with pytest.raises(TypeError): 290 empty_dset.write_direct(np.ones((100,)), np.s_[0:10], np.s_[50:60]) 291 292 def test_wrong_shape(self, writable_file): 293 dset = writable_file.create_dataset("dset", (100,), dtype='int64') 294 arr = np.ones((200,)) 295 with pytest.raises(TypeError): 296 dset.write_direct(arr) 297 298 def test_not_c_contiguous(self, writable_file): 299 dset = writable_file.create_dataset("dset", (10, 10), dtype='int64') 300 arr = np.ones((10, 10), order='F') 301 with pytest.raises(TypeError): 302 dset.write_direct(arr) 303 304 305class TestCreateRequire(BaseDataset): 306 307 """ 308 Feature: Datasets can be created only if they don't exist in the file 309 """ 310 311 def test_create(self): 312 """ Create new dataset with no conflicts """ 313 dset = self.f.require_dataset('foo', (10, 3), 'f') 314 self.assertIsInstance(dset, Dataset) 315 self.assertEqual(dset.shape, (10, 3)) 316 317 def test_create_existing(self): 318 """ require_dataset yields existing dataset """ 319 dset = self.f.require_dataset('foo', (10, 3), 'f') 320 dset2 = self.f.require_dataset('foo', (10, 3), 'f') 321 self.assertEqual(dset, dset2) 322 323 def test_create_1D(self): 324 """ require_dataset with integer shape yields existing dataset""" 325 dset = self.f.require_dataset('foo', 10, 'f') 326 dset2 = self.f.require_dataset('foo', 10, 'f') 327 self.assertEqual(dset, dset2) 328 329 dset = self.f.require_dataset('bar', (10,), 'f') 330 dset2 = self.f.require_dataset('bar', 10, 'f') 331 self.assertEqual(dset, dset2) 332 333 dset = self.f.require_dataset('baz', 10, 'f') 334 dset2 = self.f.require_dataset(b'baz', (10,), 'f') 335 self.assertEqual(dset, dset2) 336 337 def test_shape_conflict(self): 338 """ require_dataset with shape conflict yields TypeError """ 339 self.f.create_dataset('foo', (10, 3), 'f') 340 with self.assertRaises(TypeError): 341 self.f.require_dataset('foo', (10, 4), 'f') 342 343 def test_type_conflict(self): 344 """ require_dataset with object type conflict yields TypeError """ 345 self.f.create_group('foo') 346 with self.assertRaises(TypeError): 347 self.f.require_dataset('foo', (10, 3), 'f') 348 349 def test_dtype_conflict(self): 350 """ require_dataset with dtype conflict (strict mode) yields TypeError 351 """ 352 dset = self.f.create_dataset('foo', (10, 3), 'f') 353 with self.assertRaises(TypeError): 354 self.f.require_dataset('foo', (10, 3), 'S10') 355 356 def test_dtype_exact(self): 357 """ require_dataset with exactly dtype match """ 358 359 dset = self.f.create_dataset('foo', (10, 3), 'f') 360 dset2 = self.f.require_dataset('foo', (10, 3), 'f', exact=True) 361 self.assertEqual(dset, dset2) 362 363 def test_dtype_close(self): 364 """ require_dataset with convertible type succeeds (non-strict mode) 365 """ 366 dset = self.f.create_dataset('foo', (10, 3), 'i4') 367 dset2 = self.f.require_dataset('foo', (10, 3), 'i2', exact=False) 368 self.assertEqual(dset, dset2) 369 self.assertEqual(dset2.dtype, np.dtype('i4')) 370 371 372class TestCreateChunked(BaseDataset): 373 374 """ 375 Feature: Datasets can be created by manually specifying chunks 376 """ 377 378 def test_create_chunks(self): 379 """ Create via chunks tuple """ 380 dset = self.f.create_dataset('foo', shape=(100,), chunks=(10,)) 381 self.assertEqual(dset.chunks, (10,)) 382 383 def test_create_chunks_integer(self): 384 """ Create via chunks integer """ 385 dset = self.f.create_dataset('foo', shape=(100,), chunks=10) 386 self.assertEqual(dset.chunks, (10,)) 387 388 def test_chunks_mismatch(self): 389 """ Illegal chunk size raises ValueError """ 390 with self.assertRaises(ValueError): 391 self.f.create_dataset('foo', shape=(100,), chunks=(200,)) 392 393 def test_chunks_false(self): 394 """ Chunked format required for given storage options """ 395 with self.assertRaises(ValueError): 396 self.f.create_dataset('foo', shape=(10,), maxshape=100, chunks=False) 397 398 def test_chunks_scalar(self): 399 """ Attempting to create chunked scalar dataset raises TypeError """ 400 with self.assertRaises(TypeError): 401 self.f.create_dataset('foo', shape=(), chunks=(50,)) 402 403 def test_auto_chunks(self): 404 """ Auto-chunking of datasets """ 405 dset = self.f.create_dataset('foo', shape=(20, 100), chunks=True) 406 self.assertIsInstance(dset.chunks, tuple) 407 self.assertEqual(len(dset.chunks), 2) 408 409 def test_auto_chunks_abuse(self): 410 """ Auto-chunking with pathologically large element sizes """ 411 dset = self.f.create_dataset('foo', shape=(3,), dtype='S100000000', chunks=True) 412 self.assertEqual(dset.chunks, (1,)) 413 414 def test_scalar_assignment(self): 415 """ Test scalar assignment of chunked dataset """ 416 dset = self.f.create_dataset('foo', shape=(3, 50, 50), 417 dtype=np.int32, chunks=(1, 50, 50)) 418 # test assignment of selection smaller than chunk size 419 dset[1, :, 40] = 10 420 self.assertTrue(np.all(dset[1, :, 40] == 10)) 421 422 # test assignment of selection equal to chunk size 423 dset[1] = 11 424 self.assertTrue(np.all(dset[1] == 11)) 425 426 # test assignment of selection bigger than chunk size 427 dset[0:2] = 12 428 self.assertTrue(np.all(dset[0:2] == 12)) 429 430 def test_auto_chunks_no_shape(self): 431 """ Auto-chunking of empty datasets not allowed""" 432 with pytest.raises(TypeError, match='Empty') as err: 433 self.f.create_dataset('foo', dtype='S100', chunks=True) 434 435 with pytest.raises(TypeError, match='Empty') as err: 436 self.f.create_dataset('foo', dtype='S100', maxshape=20) 437 438 439class TestCreateFillvalue(BaseDataset): 440 441 """ 442 Feature: Datasets can be created with fill value 443 """ 444 445 def test_create_fillval(self): 446 """ Fill value is reflected in dataset contents """ 447 dset = self.f.create_dataset('foo', (10,), fillvalue=4.0) 448 self.assertEqual(dset[0], 4.0) 449 self.assertEqual(dset[7], 4.0) 450 451 def test_property(self): 452 """ Fill value is recoverable via property """ 453 dset = self.f.create_dataset('foo', (10,), fillvalue=3.0) 454 self.assertEqual(dset.fillvalue, 3.0) 455 self.assertNotIsInstance(dset.fillvalue, np.ndarray) 456 457 def test_property_none(self): 458 """ .fillvalue property works correctly if not set """ 459 dset = self.f.create_dataset('foo', (10,)) 460 self.assertEqual(dset.fillvalue, 0) 461 462 def test_compound(self): 463 """ Fill value works with compound types """ 464 dt = np.dtype([('a', 'f4'), ('b', 'i8')]) 465 v = np.ones((1,), dtype=dt)[0] 466 dset = self.f.create_dataset('foo', (10,), dtype=dt, fillvalue=v) 467 self.assertEqual(dset.fillvalue, v) 468 self.assertAlmostEqual(dset[4], v) 469 470 def test_exc(self): 471 """ Bogus fill value raises ValueError """ 472 with self.assertRaises(ValueError): 473 dset = self.f.create_dataset('foo', (10,), 474 dtype=[('a', 'i'), ('b', 'f')], fillvalue=42) 475 476 477class TestCreateNamedType(BaseDataset): 478 479 """ 480 Feature: Datasets created from an existing named type 481 """ 482 483 def test_named(self): 484 """ Named type object works and links the dataset to type """ 485 self.f['type'] = np.dtype('f8') 486 dset = self.f.create_dataset('x', (100,), dtype=self.f['type']) 487 self.assertEqual(dset.dtype, np.dtype('f8')) 488 self.assertEqual(dset.id.get_type(), self.f['type'].id) 489 self.assertTrue(dset.id.get_type().committed()) 490 491 492@ut.skipIf('gzip' not in h5py.filters.encode, "DEFLATE is not installed") 493class TestCreateGzip(BaseDataset): 494 495 """ 496 Feature: Datasets created with gzip compression 497 """ 498 499 def test_gzip(self): 500 """ Create with explicit gzip options """ 501 dset = self.f.create_dataset('foo', (20, 30), compression='gzip', 502 compression_opts=9) 503 self.assertEqual(dset.compression, 'gzip') 504 self.assertEqual(dset.compression_opts, 9) 505 506 def test_gzip_implicit(self): 507 """ Create with implicit gzip level (level 4) """ 508 dset = self.f.create_dataset('foo', (20, 30), compression='gzip') 509 self.assertEqual(dset.compression, 'gzip') 510 self.assertEqual(dset.compression_opts, 4) 511 512 def test_gzip_number(self): 513 """ Create with gzip level by specifying integer """ 514 dset = self.f.create_dataset('foo', (20, 30), compression=7) 515 self.assertEqual(dset.compression, 'gzip') 516 self.assertEqual(dset.compression_opts, 7) 517 518 original_compression_vals = h5py._hl.dataset._LEGACY_GZIP_COMPRESSION_VALS 519 try: 520 h5py._hl.dataset._LEGACY_GZIP_COMPRESSION_VALS = tuple() 521 with self.assertRaises(ValueError): 522 dset = self.f.create_dataset('foo', (20, 30), compression=7) 523 finally: 524 h5py._hl.dataset._LEGACY_GZIP_COMPRESSION_VALS = original_compression_vals 525 526 def test_gzip_exc(self): 527 """ Illegal gzip level (explicit or implicit) raises ValueError """ 528 with self.assertRaises((ValueError, RuntimeError)): 529 self.f.create_dataset('foo', (20, 30), compression=14) 530 with self.assertRaises(ValueError): 531 self.f.create_dataset('foo', (20, 30), compression=-4) 532 with self.assertRaises(ValueError): 533 self.f.create_dataset('foo', (20, 30), compression='gzip', 534 compression_opts=14) 535 536 537@ut.skipIf('gzip' not in h5py.filters.encode, "DEFLATE is not installed") 538class TestCreateCompressionNumber(BaseDataset): 539 540 """ 541 Feature: Datasets created with a compression code 542 """ 543 544 def test_compression_number(self): 545 """ Create with compression number of gzip (h5py.h5z.FILTER_DEFLATE) and a compression level of 7""" 546 original_compression_vals = h5py._hl.dataset._LEGACY_GZIP_COMPRESSION_VALS 547 try: 548 h5py._hl.dataset._LEGACY_GZIP_COMPRESSION_VALS = tuple() 549 dset = self.f.create_dataset('foo', (20, 30), compression=h5py.h5z.FILTER_DEFLATE, compression_opts=(7,)) 550 finally: 551 h5py._hl.dataset._LEGACY_GZIP_COMPRESSION_VALS = original_compression_vals 552 553 self.assertEqual(dset.compression, 'gzip') 554 self.assertEqual(dset.compression_opts, 7) 555 556 def test_compression_number_invalid(self): 557 """ Create with invalid compression numbers """ 558 with self.assertRaises(ValueError) as e: 559 self.f.create_dataset('foo', (20, 30), compression=-999) 560 self.assertIn("Invalid filter", str(e.exception)) 561 562 with self.assertRaises(ValueError) as e: 563 self.f.create_dataset('foo', (20, 30), compression=100) 564 self.assertIn("Unknown compression", str(e.exception)) 565 566 original_compression_vals = h5py._hl.dataset._LEGACY_GZIP_COMPRESSION_VALS 567 try: 568 h5py._hl.dataset._LEGACY_GZIP_COMPRESSION_VALS = tuple() 569 570 # Using gzip compression requires a compression level specified in compression_opts 571 with self.assertRaises(IndexError): 572 self.f.create_dataset('foo', (20, 30), compression=h5py.h5z.FILTER_DEFLATE) 573 finally: 574 h5py._hl.dataset._LEGACY_GZIP_COMPRESSION_VALS = original_compression_vals 575 576 577@ut.skipIf('lzf' not in h5py.filters.encode, "LZF is not installed") 578class TestCreateLZF(BaseDataset): 579 580 """ 581 Feature: Datasets created with LZF compression 582 """ 583 584 def test_lzf(self): 585 """ Create with explicit lzf """ 586 dset = self.f.create_dataset('foo', (20, 30), compression='lzf') 587 self.assertEqual(dset.compression, 'lzf') 588 self.assertEqual(dset.compression_opts, None) 589 590 testdata = np.arange(100) 591 dset = self.f.create_dataset('bar', data=testdata, compression='lzf') 592 self.assertEqual(dset.compression, 'lzf') 593 self.assertEqual(dset.compression_opts, None) 594 595 self.f.flush() # Actually write to file 596 597 readdata = self.f['bar'][()] 598 self.assertArrayEqual(readdata, testdata) 599 600 def test_lzf_exc(self): 601 """ Giving lzf options raises ValueError """ 602 with self.assertRaises(ValueError): 603 self.f.create_dataset('foo', (20, 30), compression='lzf', 604 compression_opts=4) 605 606 607@ut.skipIf('szip' not in h5py.filters.encode, "SZIP is not installed") 608class TestCreateSZIP(BaseDataset): 609 610 """ 611 Feature: Datasets created with LZF compression 612 """ 613 614 def test_szip(self): 615 """ Create with explicit szip """ 616 dset = self.f.create_dataset('foo', (20, 30), compression='szip', 617 compression_opts=('ec', 16)) 618 619 620@ut.skipIf('shuffle' not in h5py.filters.encode, "SHUFFLE is not installed") 621class TestCreateShuffle(BaseDataset): 622 623 """ 624 Feature: Datasets can use shuffling filter 625 """ 626 627 def test_shuffle(self): 628 """ Enable shuffle filter """ 629 dset = self.f.create_dataset('foo', (20, 30), shuffle=True) 630 self.assertTrue(dset.shuffle) 631 632 633@ut.skipIf('fletcher32' not in h5py.filters.encode, "FLETCHER32 is not installed") 634class TestCreateFletcher32(BaseDataset): 635 """ 636 Feature: Datasets can use the fletcher32 filter 637 """ 638 639 def test_fletcher32(self): 640 """ Enable fletcher32 filter """ 641 dset = self.f.create_dataset('foo', (20, 30), fletcher32=True) 642 self.assertTrue(dset.fletcher32) 643 644 645@ut.skipIf('scaleoffset' not in h5py.filters.encode, "SCALEOFFSET is not installed") 646class TestCreateScaleOffset(BaseDataset): 647 """ 648 Feature: Datasets can use the scale/offset filter 649 """ 650 651 def test_float_fails_without_options(self): 652 """ Ensure that a scale factor is required for scaleoffset compression of floating point data """ 653 654 with self.assertRaises(ValueError): 655 dset = self.f.create_dataset('foo', (20, 30), dtype=float, scaleoffset=True) 656 657 def test_non_integer(self): 658 """ Check when scaleoffset is negetive""" 659 660 with self.assertRaises(ValueError): 661 dset = self.f.create_dataset('foo', (20, 30), dtype=float, scaleoffset=-0.1) 662 663 def test_unsupport_dtype(self): 664 """ Check when dtype is unsupported type""" 665 666 with self.assertRaises(TypeError): 667 dset = self.f.create_dataset('foo', (20, 30), dtype=bool, scaleoffset=True) 668 669 def test_float(self): 670 """ Scaleoffset filter works for floating point data """ 671 672 scalefac = 4 673 shape = (100, 300) 674 range = 20 * 10 ** scalefac 675 testdata = (np.random.rand(*shape) - 0.5) * range 676 677 dset = self.f.create_dataset('foo', shape, dtype=float, scaleoffset=scalefac) 678 679 # Dataset reports that scaleoffset is in use 680 assert dset.scaleoffset is not None 681 682 # Dataset round-trips 683 dset[...] = testdata 684 filename = self.f.filename 685 self.f.close() 686 self.f = h5py.File(filename, 'r') 687 readdata = self.f['foo'][...] 688 689 # Test that data round-trips to requested precision 690 self.assertArrayEqual(readdata, testdata, precision=10 ** (-scalefac)) 691 692 # Test that the filter is actually active (i.e. compression is lossy) 693 assert not (readdata == testdata).all() 694 695 def test_int(self): 696 """ Scaleoffset filter works for integer data with default precision """ 697 698 nbits = 12 699 shape = (100, 300) 700 testdata = np.random.randint(0, 2 ** nbits - 1, size=shape) 701 702 # Create dataset; note omission of nbits (for library-determined precision) 703 dset = self.f.create_dataset('foo', shape, dtype=int, scaleoffset=True) 704 705 # Dataset reports scaleoffset enabled 706 assert dset.scaleoffset is not None 707 708 # Data round-trips correctly and identically 709 dset[...] = testdata 710 filename = self.f.filename 711 self.f.close() 712 self.f = h5py.File(filename, 'r') 713 readdata = self.f['foo'][...] 714 self.assertArrayEqual(readdata, testdata) 715 716 def test_int_with_minbits(self): 717 """ Scaleoffset filter works for integer data with specified precision """ 718 719 nbits = 12 720 shape = (100, 300) 721 testdata = np.random.randint(0, 2 ** nbits, size=shape) 722 723 dset = self.f.create_dataset('foo', shape, dtype=int, scaleoffset=nbits) 724 725 # Dataset reports scaleoffset enabled with correct precision 726 self.assertTrue(dset.scaleoffset == 12) 727 728 # Data round-trips correctly 729 dset[...] = testdata 730 filename = self.f.filename 731 self.f.close() 732 self.f = h5py.File(filename, 'r') 733 readdata = self.f['foo'][...] 734 self.assertArrayEqual(readdata, testdata) 735 736 def test_int_with_minbits_lossy(self): 737 """ Scaleoffset filter works for integer data with specified precision """ 738 739 nbits = 12 740 shape = (100, 300) 741 testdata = np.random.randint(0, 2 ** (nbits + 1) - 1, size=shape) 742 743 dset = self.f.create_dataset('foo', shape, dtype=int, scaleoffset=nbits) 744 745 # Dataset reports scaleoffset enabled with correct precision 746 self.assertTrue(dset.scaleoffset == 12) 747 748 # Data can be written and read 749 dset[...] = testdata 750 filename = self.f.filename 751 self.f.close() 752 self.f = h5py.File(filename, 'r') 753 readdata = self.f['foo'][...] 754 755 # Compression is lossy 756 assert not (readdata == testdata).all() 757 758 759class TestExternal(BaseDataset): 760 """ 761 Feature: Datasets with the external storage property 762 """ 763 def test_contents(self): 764 """ Create and access an external dataset """ 765 766 shape = (6, 100) 767 testdata = np.random.random(shape) 768 769 # create a dataset in an external file and set it 770 ext_file = self.mktemp() 771 external = [(ext_file, 0, h5f.UNLIMITED)] 772 dset = self.f.create_dataset('foo', shape, dtype=testdata.dtype, external=external) 773 dset[...] = testdata 774 775 assert dset.external is not None 776 777 # verify file's existence, size, and contents 778 with open(ext_file, 'rb') as fid: 779 contents = fid.read() 780 assert contents == testdata.tobytes() 781 782 def test_name_str(self): 783 """ External argument may be a file name str only """ 784 785 self.f.create_dataset('foo', (6, 100), external=self.mktemp()) 786 787 def test_name_path(self): 788 """ External argument may be a file name path only """ 789 790 self.f.create_dataset('foo', (6, 100), 791 external=pathlib.Path(self.mktemp())) 792 793 def test_iter_multi(self): 794 """ External argument may be an iterable of multiple tuples """ 795 796 ext_file = self.mktemp() 797 N = 100 798 external = iter((ext_file, x * 1000, 1000) for x in range(N)) 799 dset = self.f.create_dataset('poo', (6, 100), external=external) 800 assert len(dset.external) == N 801 802 def test_invalid(self): 803 """ Test with invalid external lists """ 804 805 shape = (6, 100) 806 ext_file = self.mktemp() 807 808 for exc_type, external in [ 809 (TypeError, [ext_file]), 810 (TypeError, [ext_file, 0]), 811 (TypeError, [ext_file, 0, h5f.UNLIMITED]), 812 (ValueError, [(ext_file,)]), 813 (ValueError, [(ext_file, 0)]), 814 (ValueError, [(ext_file, 0, h5f.UNLIMITED, 0)]), 815 (TypeError, [(ext_file, 0, "h5f.UNLIMITED")]), 816 ]: 817 with self.assertRaises(exc_type): 818 self.f.create_dataset('foo', shape, external=external) 819 820 821class TestAutoCreate(BaseDataset): 822 823 """ 824 Feature: Datasets auto-created from data produce the correct types 825 """ 826 def assert_string_type(self, ds, cset, variable=True): 827 tid = ds.id.get_type() 828 self.assertEqual(type(tid), h5py.h5t.TypeStringID) 829 self.assertEqual(tid.get_cset(), cset) 830 if variable: 831 assert tid.is_variable_str() 832 833 def test_vlen_bytes(self): 834 """Assigning byte strings produces a vlen string ASCII dataset """ 835 self.f['x'] = b"Hello there" 836 self.assert_string_type(self.f['x'], h5py.h5t.CSET_ASCII) 837 838 self.f['y'] = [b"a", b"bc"] 839 self.assert_string_type(self.f['y'], h5py.h5t.CSET_ASCII) 840 841 self.f['z'] = np.array([b"a", b"bc"], dtype=np.object_) 842 self.assert_string_type(self.f['z'], h5py.h5t.CSET_ASCII) 843 844 def test_vlen_unicode(self): 845 """Assigning unicode strings produces a vlen string UTF-8 dataset """ 846 self.f['x'] = "Hello there" + chr(0x2034) 847 self.assert_string_type(self.f['x'], h5py.h5t.CSET_UTF8) 848 849 self.f['y'] = ["a", "bc"] 850 self.assert_string_type(self.f['y'], h5py.h5t.CSET_UTF8) 851 852 # 2D array; this only works with an array, not nested lists 853 self.f['z'] = np.array([["a", "bc"]], dtype=np.object_) 854 self.assert_string_type(self.f['z'], h5py.h5t.CSET_UTF8) 855 856 def test_string_fixed(self): 857 """ Assignment of fixed-length byte string produces a fixed-length 858 ascii dataset """ 859 self.f['x'] = np.string_("Hello there") 860 ds = self.f['x'] 861 self.assert_string_type(ds, h5py.h5t.CSET_ASCII, variable=False) 862 self.assertEqual(ds.id.get_type().get_size(), 11) 863 864 865class TestCreateLike(BaseDataset): 866 def test_no_chunks(self): 867 self.f['lol'] = np.arange(25).reshape(5, 5) 868 self.f.create_dataset_like('like_lol', self.f['lol']) 869 dslike = self.f['like_lol'] 870 self.assertEqual(dslike.shape, (5, 5)) 871 self.assertIs(dslike.chunks, None) 872 873 def test_track_times(self): 874 orig = self.f.create_dataset('honda', data=np.arange(12), 875 track_times=True) 876 self.assertNotEqual(0, h5py.h5g.get_objinfo(orig._id).mtime) 877 similar = self.f.create_dataset_like('hyundai', orig) 878 self.assertNotEqual(0, h5py.h5g.get_objinfo(similar._id).mtime) 879 880 orig = self.f.create_dataset('ibm', data=np.arange(12), 881 track_times=False) 882 self.assertEqual(0, h5py.h5g.get_objinfo(orig._id).mtime) 883 similar = self.f.create_dataset_like('lenovo', orig) 884 self.assertEqual(0, h5py.h5g.get_objinfo(similar._id).mtime) 885 886 def test_maxshape(self): 887 """ Test when other.maxshape != other.shape """ 888 889 other = self.f.create_dataset('other', (10,), maxshape=20) 890 similar = self.f.create_dataset_like('sim', other) 891 self.assertEqual(similar.shape, (10,)) 892 self.assertEqual(similar.maxshape, (20,)) 893 894class TestChunkIterator(BaseDataset): 895 def test_no_chunks(self): 896 dset = self.f.create_dataset("foo", ()) 897 with self.assertRaises(TypeError): 898 dset.iter_chunks() 899 900 def test_1d(self): 901 dset = self.f.create_dataset("foo", (100,), chunks=(32,)) 902 expected = ((slice(0,32,1),), (slice(32,64,1),), (slice(64,96,1),), 903 (slice(96,100,1),)) 904 self.assertEqual(list(dset.iter_chunks()), list(expected)) 905 expected = ((slice(50,64,1),), (slice(64,96,1),), (slice(96,97,1),)) 906 self.assertEqual(list(dset.iter_chunks(np.s_[50:97])), list(expected)) 907 908 def test_2d(self): 909 dset = self.f.create_dataset("foo", (100,100), chunks=(32,64)) 910 expected = ((slice(0, 32, 1), slice(0, 64, 1)), (slice(0, 32, 1), 911 slice(64, 100, 1)), (slice(32, 64, 1), slice(0, 64, 1)), 912 (slice(32, 64, 1), slice(64, 100, 1)), (slice(64, 96, 1), 913 slice(0, 64, 1)), (slice(64, 96, 1), slice(64, 100, 1)), 914 (slice(96, 100, 1), slice(0, 64, 1)), (slice(96, 100, 1), 915 slice(64, 100, 1))) 916 self.assertEqual(list(dset.iter_chunks()), list(expected)) 917 918 expected = ((slice(48, 52, 1), slice(40, 50, 1)),) 919 self.assertEqual(list(dset.iter_chunks(np.s_[48:52,40:50])), list(expected)) 920 921 922class TestResize(BaseDataset): 923 924 """ 925 Feature: Datasets created with "maxshape" may be resized 926 """ 927 928 def test_create(self): 929 """ Create dataset with "maxshape" """ 930 dset = self.f.create_dataset('foo', (20, 30), maxshape=(20, 60)) 931 self.assertIsNot(dset.chunks, None) 932 self.assertEqual(dset.maxshape, (20, 60)) 933 934 def test_create_1D(self): 935 """ Create dataset with "maxshape" using integer maxshape""" 936 dset = self.f.create_dataset('foo', (20,), maxshape=20) 937 self.assertIsNot(dset.chunks, None) 938 self.assertEqual(dset.maxshape, (20,)) 939 940 dset = self.f.create_dataset('bar', 20, maxshape=20) 941 self.assertEqual(dset.maxshape, (20,)) 942 943 def test_resize(self): 944 """ Datasets may be resized up to maxshape """ 945 dset = self.f.create_dataset('foo', (20, 30), maxshape=(20, 60)) 946 self.assertEqual(dset.shape, (20, 30)) 947 dset.resize((20, 50)) 948 self.assertEqual(dset.shape, (20, 50)) 949 dset.resize((20, 60)) 950 self.assertEqual(dset.shape, (20, 60)) 951 952 def test_resize_1D(self): 953 """ Datasets may be resized up to maxshape using integer maxshape""" 954 dset = self.f.create_dataset('foo', 20, maxshape=40) 955 self.assertEqual(dset.shape, (20,)) 956 dset.resize((30,)) 957 self.assertEqual(dset.shape, (30,)) 958 959 def test_resize_over(self): 960 """ Resizing past maxshape triggers an exception """ 961 dset = self.f.create_dataset('foo', (20, 30), maxshape=(20, 60)) 962 with self.assertRaises(Exception): 963 dset.resize((20, 70)) 964 965 def test_resize_nonchunked(self): 966 """ Resizing non-chunked dataset raises TypeError """ 967 dset = self.f.create_dataset("foo", (20, 30)) 968 with self.assertRaises(TypeError): 969 dset.resize((20, 60)) 970 971 def test_resize_axis(self): 972 """ Resize specified axis """ 973 dset = self.f.create_dataset('foo', (20, 30), maxshape=(20, 60)) 974 dset.resize(50, axis=1) 975 self.assertEqual(dset.shape, (20, 50)) 976 977 def test_axis_exc(self): 978 """ Illegal axis raises ValueError """ 979 dset = self.f.create_dataset('foo', (20, 30), maxshape=(20, 60)) 980 with self.assertRaises(ValueError): 981 dset.resize(50, axis=2) 982 983 def test_zero_dim(self): 984 """ Allow zero-length initial dims for unlimited axes (issue 111) """ 985 dset = self.f.create_dataset('foo', (15, 0), maxshape=(15, None)) 986 self.assertEqual(dset.shape, (15, 0)) 987 self.assertEqual(dset.maxshape, (15, None)) 988 989 990class TestDtype(BaseDataset): 991 992 """ 993 Feature: Dataset dtype is available as .dtype property 994 """ 995 996 def test_dtype(self): 997 """ Retrieve dtype from dataset """ 998 dset = self.f.create_dataset('foo', (5,), '|S10') 999 self.assertEqual(dset.dtype, np.dtype('|S10')) 1000 1001 1002class TestLen(BaseDataset): 1003 1004 """ 1005 Feature: Size of first axis is available via Python's len 1006 """ 1007 1008 def test_len(self): 1009 """ Python len() (under 32 bits) """ 1010 dset = self.f.create_dataset('foo', (312, 15)) 1011 self.assertEqual(len(dset), 312) 1012 1013 def test_len_big(self): 1014 """ Python len() vs Dataset.len() """ 1015 dset = self.f.create_dataset('foo', (2 ** 33, 15)) 1016 self.assertEqual(dset.shape, (2 ** 33, 15)) 1017 if sys.maxsize == 2 ** 31 - 1: 1018 with self.assertRaises(OverflowError): 1019 len(dset) 1020 else: 1021 self.assertEqual(len(dset), 2 ** 33) 1022 self.assertEqual(dset.len(), 2 ** 33) 1023 1024 1025class TestIter(BaseDataset): 1026 1027 """ 1028 Feature: Iterating over a dataset yields rows 1029 """ 1030 1031 def test_iter(self): 1032 """ Iterating over a dataset yields rows """ 1033 data = np.arange(30, dtype='f').reshape((10, 3)) 1034 dset = self.f.create_dataset('foo', data=data) 1035 for x, y in zip(dset, data): 1036 self.assertEqual(len(x), 3) 1037 self.assertArrayEqual(x, y) 1038 1039 def test_iter_scalar(self): 1040 """ Iterating over scalar dataset raises TypeError """ 1041 dset = self.f.create_dataset('foo', shape=()) 1042 with self.assertRaises(TypeError): 1043 [x for x in dset] 1044 1045 1046class TestStrings(BaseDataset): 1047 1048 """ 1049 Feature: Datasets created with vlen and fixed datatypes correctly 1050 translate to and from HDF5 1051 """ 1052 1053 def test_vlen_bytes(self): 1054 """ Vlen bytes dataset maps to vlen ascii in the file """ 1055 dt = h5py.string_dtype(encoding='ascii') 1056 ds = self.f.create_dataset('x', (100,), dtype=dt) 1057 tid = ds.id.get_type() 1058 self.assertEqual(type(tid), h5py.h5t.TypeStringID) 1059 self.assertEqual(tid.get_cset(), h5py.h5t.CSET_ASCII) 1060 string_info = h5py.check_string_dtype(ds.dtype) 1061 self.assertEqual(string_info.encoding, 'ascii') 1062 1063 def test_vlen_unicode(self): 1064 """ Vlen unicode dataset maps to vlen utf-8 in the file """ 1065 dt = h5py.string_dtype() 1066 ds = self.f.create_dataset('x', (100,), dtype=dt) 1067 tid = ds.id.get_type() 1068 self.assertEqual(type(tid), h5py.h5t.TypeStringID) 1069 self.assertEqual(tid.get_cset(), h5py.h5t.CSET_UTF8) 1070 string_info = h5py.check_string_dtype(ds.dtype) 1071 self.assertEqual(string_info.encoding, 'utf-8') 1072 1073 def test_fixed_ascii(self): 1074 """ Fixed-length bytes dataset maps to fixed-length ascii in the file 1075 """ 1076 dt = np.dtype("|S10") 1077 ds = self.f.create_dataset('x', (100,), dtype=dt) 1078 tid = ds.id.get_type() 1079 self.assertEqual(type(tid), h5py.h5t.TypeStringID) 1080 self.assertFalse(tid.is_variable_str()) 1081 self.assertEqual(tid.get_size(), 10) 1082 self.assertEqual(tid.get_cset(), h5py.h5t.CSET_ASCII) 1083 string_info = h5py.check_string_dtype(ds.dtype) 1084 self.assertEqual(string_info.encoding, 'ascii') 1085 self.assertEqual(string_info.length, 10) 1086 1087 def test_fixed_utf8(self): 1088 dt = h5py.string_dtype(encoding='utf-8', length=5) 1089 ds = self.f.create_dataset('x', (100,), dtype=dt) 1090 tid = ds.id.get_type() 1091 self.assertEqual(tid.get_cset(), h5py.h5t.CSET_UTF8) 1092 s = 'cù' 1093 ds[0] = s.encode('utf-8') 1094 ds[1] = s 1095 ds[2:4] = [s, s] 1096 ds[4:6] = np.array([s, s], dtype=object) 1097 ds[6:8] = np.array([s.encode('utf-8')] * 2, dtype=dt) 1098 with self.assertRaises(TypeError): 1099 ds[8:10] = np.array([s, s], dtype='U') 1100 1101 np.testing.assert_array_equal(ds[:8], np.array([s.encode('utf-8')] * 8, dtype='S')) 1102 1103 def test_fixed_unicode(self): 1104 """ Fixed-length unicode datasets are unsupported (raise TypeError) """ 1105 dt = np.dtype("|U10") 1106 with self.assertRaises(TypeError): 1107 ds = self.f.create_dataset('x', (100,), dtype=dt) 1108 1109 def test_roundtrip_vlen_bytes(self): 1110 """ writing and reading to vlen bytes dataset preserves type and content 1111 """ 1112 dt = h5py.string_dtype(encoding='ascii') 1113 ds = self.f.create_dataset('x', (100,), dtype=dt) 1114 data = b"Hello\xef" 1115 ds[0] = data 1116 out = ds[0] 1117 self.assertEqual(type(out), bytes) 1118 self.assertEqual(out, data) 1119 1120 def test_roundtrip_fixed_bytes(self): 1121 """ Writing to and reading from fixed-length bytes dataset preserves 1122 type and content """ 1123 dt = np.dtype("|S10") 1124 ds = self.f.create_dataset('x', (100,), dtype=dt) 1125 data = b"Hello\xef" 1126 ds[0] = data 1127 out = ds[0] 1128 self.assertEqual(type(out), np.string_) 1129 self.assertEqual(out, data) 1130 1131 def test_retrieve_vlen_unicode(self): 1132 dt = h5py.string_dtype() 1133 ds = self.f.create_dataset('x', (10,), dtype=dt) 1134 data = "fàilte" 1135 ds[0] = data 1136 self.assertIsInstance(ds[0], bytes) 1137 out = ds.asstr()[0] 1138 self.assertIsInstance(out, str) 1139 self.assertEqual(out, data) 1140 1141 def test_asstr(self): 1142 ds = self.f.create_dataset('x', (10,), dtype=h5py.string_dtype()) 1143 data = "fàilte" 1144 ds[0] = data 1145 1146 strwrap1 = ds.asstr('ascii') 1147 with self.assertRaises(UnicodeDecodeError): 1148 out = strwrap1[0] 1149 1150 # Different errors parameter 1151 self.assertEqual(ds.asstr('ascii', 'ignore')[0], 'filte') 1152 1153 # latin-1 will decode it but give the wrong text 1154 self.assertNotEqual(ds.asstr('latin-1')[0], data) 1155 1156 # len of ds 1157 self.assertEqual(10, len(ds.asstr())) 1158 1159 1160 # Array output 1161 np.testing.assert_array_equal( 1162 ds.asstr()[:1], np.array([data], dtype=object) 1163 ) 1164 1165 def test_asstr_fixed(self): 1166 dt = h5py.string_dtype(length=5) 1167 ds = self.f.create_dataset('x', (10,), dtype=dt) 1168 data = 'cù' 1169 ds[0] = np.array(data.encode('utf-8'), dtype=dt) 1170 1171 self.assertIsInstance(ds[0], np.bytes_) 1172 out = ds.asstr()[0] 1173 self.assertIsInstance(out, str) 1174 self.assertEqual(out, data) 1175 1176 # Different errors parameter 1177 self.assertEqual(ds.asstr('ascii', 'ignore')[0], 'c') 1178 1179 # latin-1 will decode it but give the wrong text 1180 self.assertNotEqual(ds.asstr('latin-1')[0], data) 1181 1182 # Array output 1183 np.testing.assert_array_equal( 1184 ds.asstr()[:1], np.array([data], dtype=object) 1185 ) 1186 1187 def test_unicode_write_error(self): 1188 """Encoding error when writing a non-ASCII string to an ASCII vlen dataset""" 1189 dt = h5py.string_dtype('ascii') 1190 ds = self.f.create_dataset('x', (100,), dtype=dt) 1191 data = "fàilte" 1192 with self.assertRaises(UnicodeEncodeError): 1193 ds[0] = data 1194 1195 def test_unicode_write_bytes(self): 1196 """ Writing valid utf-8 byte strings to a unicode vlen dataset is OK 1197 """ 1198 dt = h5py.string_dtype() 1199 ds = self.f.create_dataset('x', (100,), dtype=dt) 1200 data = (u"Hello there" + chr(0x2034)).encode('utf8') 1201 ds[0] = data 1202 out = ds[0] 1203 self.assertEqual(type(out), bytes) 1204 self.assertEqual(out, data) 1205 1206 def test_vlen_bytes_write_ascii_str(self): 1207 """ Writing an ascii str to ascii vlen dataset is OK 1208 """ 1209 dt = h5py.string_dtype('ascii') 1210 ds = self.f.create_dataset('x', (100,), dtype=dt) 1211 data = "ASCII string" 1212 ds[0] = data 1213 out = ds[0] 1214 self.assertEqual(type(out), bytes) 1215 self.assertEqual(out, data.encode('ascii')) 1216 1217 1218class TestCompound(BaseDataset): 1219 1220 """ 1221 Feature: Compound types correctly round-trip 1222 """ 1223 1224 def test_rt(self): 1225 """ Compound types are read back in correct order (issue 236)""" 1226 1227 dt = np.dtype([ ('weight', np.float64), 1228 ('cputime', np.float64), 1229 ('walltime', np.float64), 1230 ('parents_offset', np.uint32), 1231 ('n_parents', np.uint32), 1232 ('status', np.uint8), 1233 ('endpoint_type', np.uint8), ]) 1234 1235 testdata = np.ndarray((16,), dtype=dt) 1236 for key in dt.fields: 1237 testdata[key] = np.random.random((16,)) * 100 1238 1239 self.f['test'] = testdata 1240 outdata = self.f['test'][...] 1241 self.assertTrue(np.all(outdata == testdata)) 1242 self.assertEqual(outdata.dtype, testdata.dtype) 1243 1244 def test_assign(self): 1245 dt = np.dtype([ ('weight', (np.float64, 3)), 1246 ('endpoint_type', np.uint8), ]) 1247 1248 testdata = np.ndarray((16,), dtype=dt) 1249 for key in dt.fields: 1250 testdata[key] = np.random.random(size=testdata[key].shape) * 100 1251 1252 ds = self.f.create_dataset('test', (16,), dtype=dt) 1253 for key in dt.fields: 1254 ds[key] = testdata[key] 1255 1256 outdata = self.f['test'][...] 1257 1258 self.assertTrue(np.all(outdata == testdata)) 1259 self.assertEqual(outdata.dtype, testdata.dtype) 1260 1261 def test_fields(self): 1262 dt = np.dtype([ 1263 ('x', np.float64), 1264 ('y', np.float64), 1265 ('z', np.float64), 1266 ]) 1267 1268 testdata = np.ndarray((16,), dtype=dt) 1269 for key in dt.fields: 1270 testdata[key] = np.random.random((16,)) * 100 1271 1272 self.f['test'] = testdata 1273 1274 # Extract multiple fields 1275 np.testing.assert_array_equal( 1276 self.f['test'].fields(['x', 'y'])[:], testdata[['x', 'y']] 1277 ) 1278 # Extract single field 1279 np.testing.assert_array_equal( 1280 self.f['test'].fields('x')[:], testdata['x'] 1281 ) 1282 1283 # Check len() on fields wrapper 1284 assert len(self.f['test'].fields('x')) == 16 1285 1286 1287class TestSubarray(BaseDataset): 1288 def test_write_list(self): 1289 ds = self.f.create_dataset("a", (1,), dtype="3int8") 1290 ds[0] = [1, 2, 3] 1291 np.testing.assert_array_equal(ds[:], [[1, 2, 3]]) 1292 1293 ds[:] = [[4, 5, 6]] 1294 np.testing.assert_array_equal(ds[:], [[4, 5, 6]]) 1295 1296 def test_write_array(self): 1297 ds = self.f.create_dataset("a", (1,), dtype="3int8") 1298 ds[0] = np.array([1, 2, 3]) 1299 np.testing.assert_array_equal(ds[:], [[1, 2, 3]]) 1300 1301 ds[:] = np.array([[4, 5, 6]]) 1302 np.testing.assert_array_equal(ds[:], [[4, 5, 6]]) 1303 1304 1305class TestEnum(BaseDataset): 1306 1307 """ 1308 Feature: Enum datatype info is preserved, read/write as integer 1309 """ 1310 1311 EDICT = {'RED': 0, 'GREEN': 1, 'BLUE': 42} 1312 1313 def test_create(self): 1314 """ Enum datasets can be created and type correctly round-trips """ 1315 dt = h5py.enum_dtype(self.EDICT, basetype='i') 1316 ds = self.f.create_dataset('x', (100, 100), dtype=dt) 1317 dt2 = ds.dtype 1318 dict2 = h5py.check_enum_dtype(dt2) 1319 self.assertEqual(dict2, self.EDICT) 1320 1321 def test_readwrite(self): 1322 """ Enum datasets can be read/written as integers """ 1323 dt = h5py.enum_dtype(self.EDICT, basetype='i4') 1324 ds = self.f.create_dataset('x', (100, 100), dtype=dt) 1325 ds[35, 37] = 42 1326 ds[1, :] = 1 1327 self.assertEqual(ds[35, 37], 42) 1328 self.assertArrayEqual(ds[1, :], np.array((1,) * 100, dtype='i4')) 1329 1330 1331class TestFloats(BaseDataset): 1332 1333 """ 1334 Test support for mini and extended-precision floats 1335 """ 1336 1337 def _exectest(self, dt): 1338 dset = self.f.create_dataset('x', (100,), dtype=dt) 1339 self.assertEqual(dset.dtype, dt) 1340 data = np.ones((100,), dtype=dt) 1341 dset[...] = data 1342 self.assertArrayEqual(dset[...], data) 1343 1344 @ut.skipUnless(hasattr(np, 'float16'), "NumPy float16 support required") 1345 def test_mini(self): 1346 """ Mini-floats round trip """ 1347 self._exectest(np.dtype('float16')) 1348 1349 # TODO: move these tests to test_h5t 1350 def test_mini_mapping(self): 1351 """ Test mapping for float16 """ 1352 if hasattr(np, 'float16'): 1353 self.assertEqual(h5t.IEEE_F16LE.dtype, np.dtype('<f2')) 1354 else: 1355 self.assertEqual(h5t.IEEE_F16LE.dtype, np.dtype('<f4')) 1356 1357 1358class TestTrackTimes(BaseDataset): 1359 1360 """ 1361 Feature: track_times 1362 """ 1363 1364 def test_disable_track_times(self): 1365 """ check that when track_times=False, the time stamp=0 (Jan 1, 1970) """ 1366 ds = self.f.create_dataset('foo', (4,), track_times=False) 1367 ds_mtime = h5py.h5g.get_objinfo(ds._id).mtime 1368 self.assertEqual(0, ds_mtime) 1369 1370 def test_invalid_track_times(self): 1371 """ check that when give track_times an invalid value """ 1372 with self.assertRaises(TypeError): 1373 self.f.create_dataset('foo', (4,), track_times='null') 1374 1375 1376class TestZeroShape(BaseDataset): 1377 1378 """ 1379 Features of datasets with (0,)-shape axes 1380 """ 1381 1382 def test_array_conversion(self): 1383 """ Empty datasets can be converted to NumPy arrays """ 1384 ds = self.f.create_dataset('x', 0, maxshape=None) 1385 self.assertEqual(ds.shape, np.array(ds).shape) 1386 1387 ds = self.f.create_dataset('y', (0,), maxshape=(None,)) 1388 self.assertEqual(ds.shape, np.array(ds).shape) 1389 1390 ds = self.f.create_dataset('z', (0, 0), maxshape=(None, None)) 1391 self.assertEqual(ds.shape, np.array(ds).shape) 1392 1393 def test_reading(self): 1394 """ Slicing into empty datasets works correctly """ 1395 dt = [('a', 'f'), ('b', 'i')] 1396 ds = self.f.create_dataset('x', (0,), dtype=dt, maxshape=(None,)) 1397 arr = np.empty((0,), dtype=dt) 1398 1399 self.assertEqual(ds[...].shape, arr.shape) 1400 self.assertEqual(ds[...].dtype, arr.dtype) 1401 self.assertEqual(ds[()].shape, arr.shape) 1402 self.assertEqual(ds[()].dtype, arr.dtype) 1403 1404# https://github.com/h5py/h5py/issues/1492 1405empty_regionref_xfail = pytest.mark.xfail( 1406 h5py.version.hdf5_version_tuple == (1, 10, 6), 1407 reason="Issue with empty region refs in HDF5 1.10.6", 1408) 1409 1410class TestRegionRefs(BaseDataset): 1411 1412 """ 1413 Various features of region references 1414 """ 1415 1416 def setUp(self): 1417 BaseDataset.setUp(self) 1418 self.data = np.arange(100 * 100).reshape((100, 100)) 1419 self.dset = self.f.create_dataset('x', data=self.data) 1420 self.dset[...] = self.data 1421 1422 def test_create_ref(self): 1423 """ Region references can be used as slicing arguments """ 1424 slic = np.s_[25:35, 10:100:5] 1425 ref = self.dset.regionref[slic] 1426 self.assertArrayEqual(self.dset[ref], self.data[slic]) 1427 1428 @empty_regionref_xfail 1429 def test_empty_region(self): 1430 ref = self.dset.regionref[:0] 1431 out = self.dset[ref] 1432 assert out.size == 0 1433 # Ideally we should preserve shape (0, 100), but it seems this is lost. 1434 1435 @empty_regionref_xfail 1436 def test_scalar_dataset(self): 1437 ds = self.f.create_dataset("scalar", data=1.0, dtype='f4') 1438 sid = h5py.h5s.create(h5py.h5s.SCALAR) 1439 1440 # Deselected 1441 sid.select_none() 1442 ref = h5py.h5r.create(ds.id, b'.', h5py.h5r.DATASET_REGION, sid) 1443 assert ds[ref] == h5py.Empty(np.dtype('f4')) 1444 1445 # Selected 1446 sid.select_all() 1447 ref = h5py.h5r.create(ds.id, b'.', h5py.h5r.DATASET_REGION, sid) 1448 assert ds[ref] == ds[()] 1449 1450 def test_ref_shape(self): 1451 """ Region reference shape and selection shape """ 1452 slic = np.s_[25:35, 10:100:5] 1453 ref = self.dset.regionref[slic] 1454 self.assertEqual(self.dset.regionref.shape(ref), self.dset.shape) 1455 self.assertEqual(self.dset.regionref.selection(ref), (10, 18)) 1456 1457 1458class TestAstype(BaseDataset): 1459 """.astype() wrapper & context manager 1460 """ 1461 def test_astype_ctx(self): 1462 dset = self.f.create_dataset('x', (100,), dtype='i2') 1463 dset[...] = np.arange(100) 1464 1465 with warnings.catch_warnings(record=True) as warn_rec: 1466 warnings.simplefilter("always") 1467 1468 with dset.astype('f8'): 1469 self.assertArrayEqual(dset[...], np.arange(100, dtype='f8')) 1470 1471 with dset.astype('f4') as f4ds: 1472 self.assertArrayEqual(f4ds[...], np.arange(100, dtype='f4')) 1473 1474 assert [w.category for w in warn_rec] == [H5pyDeprecationWarning] * 2 1475 1476 def test_astype_wrapper(self): 1477 dset = self.f.create_dataset('x', (100,), dtype='i2') 1478 dset[...] = np.arange(100) 1479 arr = dset.astype('f4')[:] 1480 self.assertArrayEqual(arr, np.arange(100, dtype='f4')) 1481 1482 1483 def test_astype_wrapper_len(self): 1484 dset = self.f.create_dataset('x', (100,), dtype='i2') 1485 dset[...] = np.arange(100) 1486 self.assertEqual(100, len(dset.astype('f4'))) 1487 1488 1489class TestScalarCompound(BaseDataset): 1490 1491 """ 1492 Retrieval of a single field from a scalar compound dataset should 1493 strip the field info 1494 """ 1495 1496 def test_scalar_compound(self): 1497 1498 dt = np.dtype([('a', 'i')]) 1499 dset = self.f.create_dataset('x', (), dtype=dt) 1500 self.assertEqual(dset['a'].dtype, np.dtype('i')) 1501 1502 1503class TestVlen(BaseDataset): 1504 def test_int(self): 1505 dt = h5py.vlen_dtype(int) 1506 ds = self.f.create_dataset('vlen', (4,), dtype=dt) 1507 ds[0] = np.arange(3) 1508 ds[1] = np.arange(0) 1509 ds[2] = [1, 2, 3] 1510 ds[3] = np.arange(1) 1511 self.assertArrayEqual(ds[0], np.arange(3)) 1512 self.assertArrayEqual(ds[1], np.arange(0)) 1513 self.assertArrayEqual(ds[2], np.array([1, 2, 3])) 1514 self.assertArrayEqual(ds[1], np.arange(0)) 1515 ds[0:2] = np.array([np.arange(5), np.arange(4)], dtype=object) 1516 self.assertArrayEqual(ds[0], np.arange(5)) 1517 self.assertArrayEqual(ds[1], np.arange(4)) 1518 ds[0:2] = np.array([np.arange(3), np.arange(3)]) 1519 self.assertArrayEqual(ds[0], np.arange(3)) 1520 self.assertArrayEqual(ds[1], np.arange(3)) 1521 1522 def test_reuse_from_other(self): 1523 dt = h5py.vlen_dtype(int) 1524 ds = self.f.create_dataset('vlen', (1,), dtype=dt) 1525 self.f.create_dataset('vlen2', (1,), ds[()].dtype) 1526 1527 def test_reuse_struct_from_other(self): 1528 dt = [('a', int), ('b', h5py.vlen_dtype(int))] 1529 ds = self.f.create_dataset('vlen', (1,), dtype=dt) 1530 fname = self.f.filename 1531 self.f.close() 1532 self.f = h5py.File(fname, 'a') 1533 self.f.create_dataset('vlen2', (1,), self.f['vlen']['b'][()].dtype) 1534 1535 def test_convert(self): 1536 dt = h5py.vlen_dtype(int) 1537 ds = self.f.create_dataset('vlen', (3,), dtype=dt) 1538 ds[0] = np.array([1.4, 1.2]) 1539 ds[1] = np.array([1.2]) 1540 ds[2] = [1.2, 2, 3] 1541 self.assertArrayEqual(ds[0], np.array([1, 1])) 1542 self.assertArrayEqual(ds[1], np.array([1])) 1543 self.assertArrayEqual(ds[2], np.array([1, 2, 3])) 1544 ds[0:2] = np.array([[0.1, 1.1, 2.1, 3.1, 4], np.arange(4)], dtype=object) 1545 self.assertArrayEqual(ds[0], np.arange(5)) 1546 self.assertArrayEqual(ds[1], np.arange(4)) 1547 ds[0:2] = np.array([np.array([0.1, 1.2, 2.2]), 1548 np.array([0.2, 1.2, 2.2])]) 1549 self.assertArrayEqual(ds[0], np.arange(3)) 1550 self.assertArrayEqual(ds[1], np.arange(3)) 1551 1552 def test_multidim(self): 1553 dt = h5py.vlen_dtype(int) 1554 ds = self.f.create_dataset('vlen', (2, 2), dtype=dt) 1555 ds[0, 0] = np.arange(1) 1556 ds[:, :] = np.array([[np.arange(3), np.arange(2)], 1557 [np.arange(1), np.arange(2)]], dtype=object) 1558 ds[:, :] = np.array([[np.arange(2), np.arange(2)], 1559 [np.arange(2), np.arange(2)]]) 1560 1561 def _help_float_testing(self, np_dt, dataset_name='vlen'): 1562 """ 1563 Helper for testing various vlen numpy data types. 1564 :param np_dt: Numpy datatype to test 1565 :param dataset_name: String name of the dataset to create for testing. 1566 """ 1567 dt = h5py.vlen_dtype(np_dt) 1568 ds = self.f.create_dataset(dataset_name, (5,), dtype=dt) 1569 1570 # Create some arrays, and assign them to the dataset 1571 array_0 = np.array([1., 2., 30.], dtype=np_dt) 1572 array_1 = np.array([100.3, 200.4, 98.1, -10.5, -300.0], dtype=np_dt) 1573 1574 # Test that a numpy array of different type gets cast correctly 1575 array_2 = np.array([1, 2, 8], dtype=np.dtype('int32')) 1576 casted_array_2 = array_2.astype(np_dt) 1577 1578 # Test that we can set a list of floats. 1579 list_3 = [1., 2., 900., 0., -0.5] 1580 list_array_3 = np.array(list_3, dtype=np_dt) 1581 1582 # Test that a list of integers gets casted correctly 1583 list_4 = [-1, -100, 0, 1, 9999, 70] 1584 list_array_4 = np.array(list_4, dtype=np_dt) 1585 1586 ds[0] = array_0 1587 ds[1] = array_1 1588 ds[2] = array_2 1589 ds[3] = list_3 1590 ds[4] = list_4 1591 1592 self.assertArrayEqual(array_0, ds[0]) 1593 self.assertArrayEqual(array_1, ds[1]) 1594 self.assertArrayEqual(casted_array_2, ds[2]) 1595 self.assertArrayEqual(list_array_3, ds[3]) 1596 self.assertArrayEqual(list_array_4, ds[4]) 1597 1598 # Test that we can reassign arrays in the dataset 1599 list_array_3 = np.array([0.3, 2.2], dtype=np_dt) 1600 1601 ds[0] = list_array_3[:] 1602 1603 self.assertArrayEqual(list_array_3, ds[0]) 1604 1605 # Make sure we can close the file. 1606 self.f.flush() 1607 self.f.close() 1608 1609 def test_numpy_float16(self): 1610 np_dt = np.dtype('float16') 1611 self._help_float_testing(np_dt) 1612 1613 def test_numpy_float32(self): 1614 np_dt = np.dtype('float32') 1615 self._help_float_testing(np_dt) 1616 1617 def test_numpy_float64_from_dtype(self): 1618 np_dt = np.dtype('float64') 1619 self._help_float_testing(np_dt) 1620 1621 def test_numpy_float64_2(self): 1622 np_dt = np.float64 1623 self._help_float_testing(np_dt) 1624 1625 def test_non_contiguous_arrays(self): 1626 """Test that non-contiguous arrays are stored correctly""" 1627 self.f.create_dataset('nc', (10,), dtype=h5py.vlen_dtype('bool')) 1628 x = np.array([True, False, True, True, False, False, False]) 1629 self.f['nc'][0] = x[::2] 1630 1631 assert all(self.f['nc'][0] == x[::2]), f"{self.f['nc'][0]} != {x[::2]}" 1632 1633 self.f.create_dataset('nc2', (10,), dtype=h5py.vlen_dtype('int8')) 1634 y = np.array([2, 4, 1, 5, -1, 3, 7]) 1635 self.f['nc2'][0] = y[::2] 1636 1637 assert all(self.f['nc2'][0] == y[::2]), f"{self.f['nc2'][0]} != {y[::2]}" 1638 1639 1640class TestLowOpen(BaseDataset): 1641 1642 def test_get_access_list(self): 1643 """ Test H5Dget_access_plist """ 1644 ds = self.f.create_dataset('foo', (4,)) 1645 p_list = ds.id.get_access_plist() 1646 1647 def test_dapl(self): 1648 """ Test the dapl keyword to h5d.open """ 1649 dapl = h5py.h5p.create(h5py.h5p.DATASET_ACCESS) 1650 dset = self.f.create_dataset('x', (100,)) 1651 del dset 1652 dsid = h5py.h5d.open(self.f.id, b'x', dapl) 1653 self.assertIsInstance(dsid, h5py.h5d.DatasetID) 1654 1655 1656@ut.skipUnless(h5py.version.hdf5_version_tuple >= (1, 10, 5), 1657 "chunk info requires HDF5 >= 1.10.5") 1658def test_get_chunk_details(): 1659 from io import BytesIO 1660 buf = BytesIO() 1661 with h5py.File(buf, 'w') as fout: 1662 fout.create_dataset('test', shape=(100, 100), chunks=(10, 10), dtype='i4') 1663 fout['test'][:] = 1 1664 1665 buf.seek(0) 1666 with h5py.File(buf, 'r') as fin: 1667 ds = fin['test'].id 1668 1669 assert ds.get_num_chunks() == 100 1670 for j in range(100): 1671 offset = tuple(np.array(np.unravel_index(j, (10, 10))) * 10) 1672 1673 si = ds.get_chunk_info(j) 1674 assert si.chunk_offset == offset 1675 assert si.filter_mask == 0 1676 assert si.byte_offset is not None 1677 assert si.size > 0 1678 1679 si = ds.get_chunk_info_by_coord((0, 0)) 1680 assert si.chunk_offset == (0, 0) 1681 assert si.filter_mask == 0 1682 assert si.byte_offset is not None 1683 assert si.size > 0 1684 1685 1686def test_empty_shape(writable_file): 1687 ds = writable_file.create_dataset('empty', dtype='int32') 1688 assert ds.shape is None 1689 assert ds.maxshape is None 1690 1691 1692def test_zero_storage_size(): 1693 # https://github.com/h5py/h5py/issues/1475 1694 from io import BytesIO 1695 buf = BytesIO() 1696 with h5py.File(buf, 'w') as fout: 1697 fout.create_dataset('empty', dtype='uint8') 1698 1699 buf.seek(0) 1700 with h5py.File(buf, 'r') as fin: 1701 assert fin['empty'].chunks is None 1702 assert fin['empty'].id.get_offset() is None 1703 assert fin['empty'].id.get_storage_size() == 0 1704 1705 1706def test_python_int_uint64(writable_file): 1707 # https://github.com/h5py/h5py/issues/1547 1708 data = [np.iinfo(np.int64).max, np.iinfo(np.int64).max + 1] 1709 1710 # Check creating a new dataset 1711 ds = writable_file.create_dataset('x', data=data, dtype=np.uint64) 1712 assert ds.dtype == np.dtype(np.uint64) 1713 np.testing.assert_array_equal(ds[:], np.array(data, dtype=np.uint64)) 1714 1715 # Check writing to an existing dataset 1716 ds[:] = data 1717 np.testing.assert_array_equal(ds[:], np.array(data, dtype=np.uint64)) 1718 1719 1720def test_setitem_fancy_indexing(writable_file): 1721 # https://github.com/h5py/h5py/issues/1593 1722 arr = writable_file.create_dataset('data', (5, 1000, 2), dtype=np.uint8) 1723 block = np.random.randint(255, size=(5, 3, 2)) 1724 arr[:, [0, 2, 4], ...] = block 1725 1726 1727def test_vlen_spacepad(): 1728 with File(get_data_file_path("vlen_string_dset.h5")) as f: 1729 assert f["DS1"][0] == b"Parting" 1730 1731 1732def test_vlen_nullterm(): 1733 with File(get_data_file_path("vlen_string_dset_utc.h5")) as f: 1734 assert f["ds1"][0] == b"2009-12-20T10:16:18.662409Z" 1735 1736 1737@pytest.mark.skipif( 1738 h5py.version.hdf5_version_tuple < (1, 10, 3), 1739 reason="Appears you cannot pass an unknown filter id for HDF5 < 1.10.3" 1740) 1741def test_allow_unknown_filter(writable_file): 1742 # apparently 256-511 are reserved for testing purposes 1743 fake_filter_id = 256 1744 ds = writable_file.create_dataset( 1745 'data', shape=(10, 10), dtype=np.uint8, compression=fake_filter_id, 1746 allow_unknown_filter=True 1747 ) 1748 assert str(fake_filter_id) in ds._filters 1749 1750 1751class TestCommutative(BaseDataset): 1752 """ 1753 Test the symmetry of operators, at least with the numpy types. 1754 Issue: https://github.com/h5py/h5py/issues/1947 1755 """ 1756 def test_numpy_commutative(self,): 1757 """ 1758 Create a h5py dataset, extract one element convert to numpy 1759 Check that it returns symmetric response to == and != 1760 """ 1761 shape = (100,1) 1762 dset = self.f.create_dataset("test", shape, dtype=float, 1763 data=np.random.rand(*shape)) 1764 1765 # grab a value from the elements, ie dset[0] 1766 # check that mask arrays are commutative wrt ==, != 1767 val = np.float64(dset[0]) 1768 1769 assert np.all((val == dset) == (dset == val)) 1770 assert np.all((val != dset) == (dset != val)) 1771 1772 # generate sample not in the dset, ie max(dset)+delta 1773 # check that mask arrays are commutative wrt ==, != 1774 delta = 0.001 1775 nval = np.nanmax(dset)+delta 1776 1777 assert np.all((nval == dset) == (dset == nval)) 1778 assert np.all((nval != dset) == (dset != nval)) 1779 1780 def test_basetype_commutative(self,): 1781 """ 1782 Create a h5py dataset and check basetype compatibility. 1783 Check that operation is symmetric, even if it is potentially 1784 not meaningful. 1785 """ 1786 shape = (100,1) 1787 dset = self.f.create_dataset("test", shape, dtype=float, 1788 data=np.random.rand(*shape)) 1789 1790 # generate float type, sample float(0.) 1791 # check that operation is symmetric (but potentially meaningless) 1792 val = float(0.) 1793 assert (val == dset) == (dset == val) 1794 assert (val != dset) == (dset != val) 1795