1# This file is part of h5py, a Python interface to the HDF5 library.
2#
3# http://www.h5py.org
4#
5# Copyright 2008-2013 Andrew Collette and contributors
6#
7# License:  Standard 3-clause BSD; see "license.txt" for full license terms
8#           and contributor agreement.
9
10"""
11    Dataset testing operations.
12
13    Tests all dataset operations, including creation, with the exception of:
14
15    1. Slicing operations for read and write, handled by module test_slicing
16    2. Type conversion for read and write (currently untested)
17"""
18
19import pathlib
20import sys
21import numpy as np
22import platform
23import pytest
24import warnings
25
26from .common import ut, TestCase
27from .data_files import get_data_file_path
28from h5py import File, Group, Dataset
29from h5py._hl.base import is_empty_dataspace
30from h5py import h5f, h5t
31from h5py.h5py_warnings import H5pyDeprecationWarning
32import h5py
33import h5py._hl.selections as sel
34
35
36class BaseDataset(TestCase):
37    def setUp(self):
38        self.f = File(self.mktemp(), 'w')
39
40    def tearDown(self):
41        if self.f:
42            self.f.close()
43
44
45class TestRepr(BaseDataset):
46    """
47        Feature: repr(Dataset) behaves sensibly
48    """
49
50    def test_repr_open(self):
51        """ repr() works on live and dead datasets """
52        ds = self.f.create_dataset('foo', (4,))
53        self.assertIsInstance(repr(ds), str)
54        self.f.close()
55        self.assertIsInstance(repr(ds), str)
56
57
58class TestCreateShape(BaseDataset):
59
60    """
61        Feature: Datasets can be created from a shape only
62    """
63
64    def test_create_scalar(self):
65        """ Create a scalar dataset """
66        dset = self.f.create_dataset('foo', ())
67        self.assertEqual(dset.shape, ())
68
69    def test_create_simple(self):
70        """ Create a size-1 dataset """
71        dset = self.f.create_dataset('foo', (1,))
72        self.assertEqual(dset.shape, (1,))
73
74    def test_create_integer(self):
75        """ Create a size-1 dataset with integer shape"""
76        dset = self.f.create_dataset('foo', 1)
77        self.assertEqual(dset.shape, (1,))
78
79    def test_create_extended(self):
80        """ Create an extended dataset """
81        dset = self.f.create_dataset('foo', (63,))
82        self.assertEqual(dset.shape, (63,))
83        self.assertEqual(dset.size, 63)
84        dset = self.f.create_dataset('bar', (6, 10))
85        self.assertEqual(dset.shape, (6, 10))
86        self.assertEqual(dset.size, (60))
87
88    def test_create_integer_extended(self):
89        """ Create an extended dataset """
90        dset = self.f.create_dataset('foo', 63)
91        self.assertEqual(dset.shape, (63,))
92        self.assertEqual(dset.size, 63)
93        dset = self.f.create_dataset('bar', (6, 10))
94        self.assertEqual(dset.shape, (6, 10))
95        self.assertEqual(dset.size, (60))
96
97    def test_default_dtype(self):
98        """ Confirm that the default dtype is float """
99        dset = self.f.create_dataset('foo', (63,))
100        self.assertEqual(dset.dtype, np.dtype('=f4'))
101
102    def test_missing_shape(self):
103        """ Missing shape raises TypeError """
104        with self.assertRaises(TypeError):
105            self.f.create_dataset('foo')
106
107    def test_long_double(self):
108        """ Confirm that the default dtype is float """
109        dset = self.f.create_dataset('foo', (63,), dtype=np.longdouble)
110        if platform.machine() in ['ppc64le']:
111            pytest.xfail("Storage of long double deactivated on %s" % platform.machine())
112        self.assertEqual(dset.dtype, np.longdouble)
113
114    @ut.skipIf(not hasattr(np, "complex256"), "No support for complex256")
115    def test_complex256(self):
116        """ Confirm that the default dtype is float """
117        dset = self.f.create_dataset('foo', (63,),
118                                     dtype=np.dtype('complex256'))
119        self.assertEqual(dset.dtype, np.dtype('complex256'))
120
121    def test_name_bytes(self):
122        dset = self.f.create_dataset(b'foo', (1,))
123        self.assertEqual(dset.shape, (1,))
124
125        dset2 = self.f.create_dataset(b'bar/baz', (2,))
126        self.assertEqual(dset2.shape, (2,))
127
128class TestCreateData(BaseDataset):
129
130    """
131        Feature: Datasets can be created from existing data
132    """
133
134    def test_create_scalar(self):
135        """ Create a scalar dataset from existing array """
136        data = np.ones((), 'f')
137        dset = self.f.create_dataset('foo', data=data)
138        self.assertEqual(dset.shape, data.shape)
139
140    def test_create_extended(self):
141        """ Create an extended dataset from existing data """
142        data = np.ones((63,), 'f')
143        dset = self.f.create_dataset('foo', data=data)
144        self.assertEqual(dset.shape, data.shape)
145
146    def test_dataset_intermediate_group(self):
147        """ Create dataset with missing intermediate groups """
148        ds = self.f.create_dataset("/foo/bar/baz", shape=(10, 10), dtype='<i4')
149        self.assertIsInstance(ds, h5py.Dataset)
150        self.assertTrue("/foo/bar/baz" in self.f)
151
152    def test_reshape(self):
153        """ Create from existing data, and make it fit a new shape """
154        data = np.arange(30, dtype='f')
155        dset = self.f.create_dataset('foo', shape=(10, 3), data=data)
156        self.assertEqual(dset.shape, (10, 3))
157        self.assertArrayEqual(dset[...], data.reshape((10, 3)))
158
159    def test_appropriate_low_level_id(self):
160        " Binding Dataset to a non-DatasetID identifier fails with ValueError "
161        with self.assertRaises(ValueError):
162            Dataset(self.f['/'].id)
163
164    def check_h5_string(self, dset, cset, length):
165        tid = dset.id.get_type()
166        assert isinstance(tid, h5t.TypeStringID)
167        assert tid.get_cset() == cset
168        if length is None:
169            assert tid.is_variable_str()
170        else:
171            assert not tid.is_variable_str()
172            assert tid.get_size() == length
173
174    def test_create_bytestring(self):
175        """ Creating dataset with byte string yields vlen ASCII dataset """
176        def check_vlen_ascii(dset):
177            self.check_h5_string(dset, h5t.CSET_ASCII, length=None)
178        check_vlen_ascii(self.f.create_dataset('a', data=b'abc'))
179        check_vlen_ascii(self.f.create_dataset('b', data=[b'abc', b'def']))
180        check_vlen_ascii(self.f.create_dataset('c', data=[[b'abc'], [b'def']]))
181        check_vlen_ascii(self.f.create_dataset(
182            'd', data=np.array([b'abc', b'def'], dtype=object)
183        ))
184
185    def test_create_np_s(self):
186        dset = self.f.create_dataset('a', data=np.array([b'abc', b'def'], dtype='S3'))
187        self.check_h5_string(dset, h5t.CSET_ASCII, length=3)
188
189    def test_create_strings(self):
190        def check_vlen_utf8(dset):
191            self.check_h5_string(dset, h5t.CSET_UTF8, length=None)
192        check_vlen_utf8(self.f.create_dataset('a', data='abc'))
193        check_vlen_utf8(self.f.create_dataset('b', data=['abc', 'def']))
194        check_vlen_utf8(self.f.create_dataset('c', data=[['abc'], ['def']]))
195        check_vlen_utf8(self.f.create_dataset(
196            'd', data=np.array(['abc', 'def'], dtype=object)
197        ))
198
199    def test_create_np_u(self):
200        with self.assertRaises(TypeError):
201            self.f.create_dataset('a', data=np.array([b'abc', b'def'], dtype='U3'))
202
203    def test_empty_create_via_None_shape(self):
204        self.f.create_dataset('foo', dtype='f')
205        self.assertTrue(is_empty_dataspace(self.f['foo'].id))
206
207    def test_empty_create_via_Empty_class(self):
208        self.f.create_dataset('foo', data=h5py.Empty(dtype='f'))
209        self.assertTrue(is_empty_dataspace(self.f['foo'].id))
210
211    def test_create_incompatible_data(self):
212        # Shape tuple is incompatible with data
213        with self.assertRaises(ValueError):
214            self.f.create_dataset('bar', shape=4, data= np.arange(3))
215
216
217class TestReadDirectly:
218
219    """
220        Feature: Read data directly from Dataset into a Numpy array
221    """
222
223    @pytest.mark.parametrize(
224        'source_shape,dest_shape,source_sel,dest_sel',
225        [
226            ((100,), (100,), np.s_[0:10], np.s_[50:60]),
227            ((70,), (100,), np.s_[50:60], np.s_[90:]),
228            ((30, 10), (20, 20), np.s_[:20, :], np.s_[:, :10]),
229            ((5, 7, 9), (6,), np.s_[2, :6, 3], np.s_[:]),
230        ])
231    def test_read_direct(self, writable_file, source_shape, dest_shape, source_sel, dest_sel):
232        source_values = np.arange(np.product(source_shape), dtype="int64").reshape(source_shape)
233        dset = writable_file.create_dataset("dset", source_shape, data=source_values)
234        arr = np.full(dest_shape, -1, dtype="int64")
235        expected = arr.copy()
236        expected[dest_sel] = source_values[source_sel]
237
238        dset.read_direct(arr, source_sel, dest_sel)
239        np.testing.assert_array_equal(arr, expected)
240
241    def test_no_sel(self, writable_file):
242        dset = writable_file.create_dataset("dset", (10,), data=np.arange(10, dtype="int64"))
243        arr = np.ones((10,), dtype="int64")
244        dset.read_direct(arr)
245        np.testing.assert_array_equal(arr, np.arange(10, dtype="int64"))
246
247    def test_empty(self, writable_file):
248        empty_dset = writable_file.create_dataset("edset", dtype='int64')
249        arr = np.ones((100,), 'int64')
250        with pytest.raises(TypeError):
251            empty_dset.read_direct(arr, np.s_[0:10], np.s_[50:60])
252
253    def test_wrong_shape(self, writable_file):
254        dset = writable_file.create_dataset("dset", (100,), dtype='int64')
255        arr = np.ones((200,))
256        with pytest.raises(TypeError):
257            dset.read_direct(arr)
258
259    def test_not_c_contiguous(self, writable_file):
260        dset = writable_file.create_dataset("dset", (10, 10), dtype='int64')
261        arr = np.ones((10, 10), order='F')
262        with pytest.raises(TypeError):
263            dset.read_direct(arr)
264
265class TestWriteDirectly:
266
267    """
268        Feature: Write Numpy array directly into Dataset
269    """
270
271    @pytest.mark.parametrize(
272        'source_shape,dest_shape,source_sel,dest_sel',
273        [
274            ((100,), (100,), np.s_[0:10], np.s_[50:60]),
275            ((70,), (100,), np.s_[50:60], np.s_[90:]),
276            ((30, 10), (20, 20), np.s_[:20, :], np.s_[:, :10]),
277            ((5, 7, 9), (6,), np.s_[2, :6, 3], np.s_[:]),
278        ])
279    def test_write_direct(self, writable_file, source_shape, dest_shape, source_sel, dest_sel):
280        dset = writable_file.create_dataset('dset', dest_shape, dtype='int32', fillvalue=-1)
281        arr = np.arange(np.product(source_shape)).reshape(source_shape)
282        expected = np.full(dest_shape, -1, dtype='int32')
283        expected[dest_sel] = arr[source_sel]
284        dset.write_direct(arr, source_sel, dest_sel)
285        np.testing.assert_array_equal(dset[:], expected)
286
287    def test_empty(self, writable_file):
288        empty_dset = writable_file.create_dataset("edset", dtype='int64')
289        with pytest.raises(TypeError):
290            empty_dset.write_direct(np.ones((100,)), np.s_[0:10], np.s_[50:60])
291
292    def test_wrong_shape(self, writable_file):
293        dset = writable_file.create_dataset("dset", (100,), dtype='int64')
294        arr = np.ones((200,))
295        with pytest.raises(TypeError):
296            dset.write_direct(arr)
297
298    def test_not_c_contiguous(self, writable_file):
299        dset = writable_file.create_dataset("dset", (10, 10), dtype='int64')
300        arr = np.ones((10, 10), order='F')
301        with pytest.raises(TypeError):
302            dset.write_direct(arr)
303
304
305class TestCreateRequire(BaseDataset):
306
307    """
308        Feature: Datasets can be created only if they don't exist in the file
309    """
310
311    def test_create(self):
312        """ Create new dataset with no conflicts """
313        dset = self.f.require_dataset('foo', (10, 3), 'f')
314        self.assertIsInstance(dset, Dataset)
315        self.assertEqual(dset.shape, (10, 3))
316
317    def test_create_existing(self):
318        """ require_dataset yields existing dataset """
319        dset = self.f.require_dataset('foo', (10, 3), 'f')
320        dset2 = self.f.require_dataset('foo', (10, 3), 'f')
321        self.assertEqual(dset, dset2)
322
323    def test_create_1D(self):
324        """ require_dataset with integer shape yields existing dataset"""
325        dset = self.f.require_dataset('foo', 10, 'f')
326        dset2 = self.f.require_dataset('foo', 10, 'f')
327        self.assertEqual(dset, dset2)
328
329        dset = self.f.require_dataset('bar', (10,), 'f')
330        dset2 = self.f.require_dataset('bar', 10, 'f')
331        self.assertEqual(dset, dset2)
332
333        dset = self.f.require_dataset('baz', 10, 'f')
334        dset2 = self.f.require_dataset(b'baz', (10,), 'f')
335        self.assertEqual(dset, dset2)
336
337    def test_shape_conflict(self):
338        """ require_dataset with shape conflict yields TypeError """
339        self.f.create_dataset('foo', (10, 3), 'f')
340        with self.assertRaises(TypeError):
341            self.f.require_dataset('foo', (10, 4), 'f')
342
343    def test_type_conflict(self):
344        """ require_dataset with object type conflict yields TypeError """
345        self.f.create_group('foo')
346        with self.assertRaises(TypeError):
347            self.f.require_dataset('foo', (10, 3), 'f')
348
349    def test_dtype_conflict(self):
350        """ require_dataset with dtype conflict (strict mode) yields TypeError
351        """
352        dset = self.f.create_dataset('foo', (10, 3), 'f')
353        with self.assertRaises(TypeError):
354            self.f.require_dataset('foo', (10, 3), 'S10')
355
356    def test_dtype_exact(self):
357        """ require_dataset with exactly dtype match """
358
359        dset = self.f.create_dataset('foo', (10, 3), 'f')
360        dset2 = self.f.require_dataset('foo', (10, 3), 'f', exact=True)
361        self.assertEqual(dset, dset2)
362
363    def test_dtype_close(self):
364        """ require_dataset with convertible type succeeds (non-strict mode)
365        """
366        dset = self.f.create_dataset('foo', (10, 3), 'i4')
367        dset2 = self.f.require_dataset('foo', (10, 3), 'i2', exact=False)
368        self.assertEqual(dset, dset2)
369        self.assertEqual(dset2.dtype, np.dtype('i4'))
370
371
372class TestCreateChunked(BaseDataset):
373
374    """
375        Feature: Datasets can be created by manually specifying chunks
376    """
377
378    def test_create_chunks(self):
379        """ Create via chunks tuple """
380        dset = self.f.create_dataset('foo', shape=(100,), chunks=(10,))
381        self.assertEqual(dset.chunks, (10,))
382
383    def test_create_chunks_integer(self):
384        """ Create via chunks integer """
385        dset = self.f.create_dataset('foo', shape=(100,), chunks=10)
386        self.assertEqual(dset.chunks, (10,))
387
388    def test_chunks_mismatch(self):
389        """ Illegal chunk size raises ValueError """
390        with self.assertRaises(ValueError):
391            self.f.create_dataset('foo', shape=(100,), chunks=(200,))
392
393    def test_chunks_false(self):
394        """ Chunked format required for given storage options """
395        with self.assertRaises(ValueError):
396            self.f.create_dataset('foo', shape=(10,), maxshape=100, chunks=False)
397
398    def test_chunks_scalar(self):
399        """ Attempting to create chunked scalar dataset raises TypeError """
400        with self.assertRaises(TypeError):
401            self.f.create_dataset('foo', shape=(), chunks=(50,))
402
403    def test_auto_chunks(self):
404        """ Auto-chunking of datasets """
405        dset = self.f.create_dataset('foo', shape=(20, 100), chunks=True)
406        self.assertIsInstance(dset.chunks, tuple)
407        self.assertEqual(len(dset.chunks), 2)
408
409    def test_auto_chunks_abuse(self):
410        """ Auto-chunking with pathologically large element sizes """
411        dset = self.f.create_dataset('foo', shape=(3,), dtype='S100000000', chunks=True)
412        self.assertEqual(dset.chunks, (1,))
413
414    def test_scalar_assignment(self):
415        """ Test scalar assignment of chunked dataset """
416        dset = self.f.create_dataset('foo', shape=(3, 50, 50),
417                                     dtype=np.int32, chunks=(1, 50, 50))
418        # test assignment of selection smaller than chunk size
419        dset[1, :, 40] = 10
420        self.assertTrue(np.all(dset[1, :, 40] == 10))
421
422        # test assignment of selection equal to chunk size
423        dset[1] = 11
424        self.assertTrue(np.all(dset[1] == 11))
425
426        # test assignment of selection bigger than chunk size
427        dset[0:2] = 12
428        self.assertTrue(np.all(dset[0:2] == 12))
429
430    def test_auto_chunks_no_shape(self):
431        """ Auto-chunking of empty datasets not allowed"""
432        with pytest.raises(TypeError, match='Empty') as err:
433            self.f.create_dataset('foo', dtype='S100', chunks=True)
434
435        with pytest.raises(TypeError, match='Empty') as err:
436            self.f.create_dataset('foo', dtype='S100', maxshape=20)
437
438
439class TestCreateFillvalue(BaseDataset):
440
441    """
442        Feature: Datasets can be created with fill value
443    """
444
445    def test_create_fillval(self):
446        """ Fill value is reflected in dataset contents """
447        dset = self.f.create_dataset('foo', (10,), fillvalue=4.0)
448        self.assertEqual(dset[0], 4.0)
449        self.assertEqual(dset[7], 4.0)
450
451    def test_property(self):
452        """ Fill value is recoverable via property """
453        dset = self.f.create_dataset('foo', (10,), fillvalue=3.0)
454        self.assertEqual(dset.fillvalue, 3.0)
455        self.assertNotIsInstance(dset.fillvalue, np.ndarray)
456
457    def test_property_none(self):
458        """ .fillvalue property works correctly if not set """
459        dset = self.f.create_dataset('foo', (10,))
460        self.assertEqual(dset.fillvalue, 0)
461
462    def test_compound(self):
463        """ Fill value works with compound types """
464        dt = np.dtype([('a', 'f4'), ('b', 'i8')])
465        v = np.ones((1,), dtype=dt)[0]
466        dset = self.f.create_dataset('foo', (10,), dtype=dt, fillvalue=v)
467        self.assertEqual(dset.fillvalue, v)
468        self.assertAlmostEqual(dset[4], v)
469
470    def test_exc(self):
471        """ Bogus fill value raises ValueError """
472        with self.assertRaises(ValueError):
473            dset = self.f.create_dataset('foo', (10,),
474                    dtype=[('a', 'i'), ('b', 'f')], fillvalue=42)
475
476
477class TestCreateNamedType(BaseDataset):
478
479    """
480        Feature: Datasets created from an existing named type
481    """
482
483    def test_named(self):
484        """ Named type object works and links the dataset to type """
485        self.f['type'] = np.dtype('f8')
486        dset = self.f.create_dataset('x', (100,), dtype=self.f['type'])
487        self.assertEqual(dset.dtype, np.dtype('f8'))
488        self.assertEqual(dset.id.get_type(), self.f['type'].id)
489        self.assertTrue(dset.id.get_type().committed())
490
491
492@ut.skipIf('gzip' not in h5py.filters.encode, "DEFLATE is not installed")
493class TestCreateGzip(BaseDataset):
494
495    """
496        Feature: Datasets created with gzip compression
497    """
498
499    def test_gzip(self):
500        """ Create with explicit gzip options """
501        dset = self.f.create_dataset('foo', (20, 30), compression='gzip',
502                                     compression_opts=9)
503        self.assertEqual(dset.compression, 'gzip')
504        self.assertEqual(dset.compression_opts, 9)
505
506    def test_gzip_implicit(self):
507        """ Create with implicit gzip level (level 4) """
508        dset = self.f.create_dataset('foo', (20, 30), compression='gzip')
509        self.assertEqual(dset.compression, 'gzip')
510        self.assertEqual(dset.compression_opts, 4)
511
512    def test_gzip_number(self):
513        """ Create with gzip level by specifying integer """
514        dset = self.f.create_dataset('foo', (20, 30), compression=7)
515        self.assertEqual(dset.compression, 'gzip')
516        self.assertEqual(dset.compression_opts, 7)
517
518        original_compression_vals = h5py._hl.dataset._LEGACY_GZIP_COMPRESSION_VALS
519        try:
520            h5py._hl.dataset._LEGACY_GZIP_COMPRESSION_VALS = tuple()
521            with self.assertRaises(ValueError):
522                dset = self.f.create_dataset('foo', (20, 30), compression=7)
523        finally:
524            h5py._hl.dataset._LEGACY_GZIP_COMPRESSION_VALS = original_compression_vals
525
526    def test_gzip_exc(self):
527        """ Illegal gzip level (explicit or implicit) raises ValueError """
528        with self.assertRaises((ValueError, RuntimeError)):
529            self.f.create_dataset('foo', (20, 30), compression=14)
530        with self.assertRaises(ValueError):
531            self.f.create_dataset('foo', (20, 30), compression=-4)
532        with self.assertRaises(ValueError):
533            self.f.create_dataset('foo', (20, 30), compression='gzip',
534                                  compression_opts=14)
535
536
537@ut.skipIf('gzip' not in h5py.filters.encode, "DEFLATE is not installed")
538class TestCreateCompressionNumber(BaseDataset):
539
540    """
541        Feature: Datasets created with a compression code
542    """
543
544    def test_compression_number(self):
545        """ Create with compression number of gzip (h5py.h5z.FILTER_DEFLATE) and a compression level of 7"""
546        original_compression_vals = h5py._hl.dataset._LEGACY_GZIP_COMPRESSION_VALS
547        try:
548            h5py._hl.dataset._LEGACY_GZIP_COMPRESSION_VALS = tuple()
549            dset = self.f.create_dataset('foo', (20, 30), compression=h5py.h5z.FILTER_DEFLATE, compression_opts=(7,))
550        finally:
551            h5py._hl.dataset._LEGACY_GZIP_COMPRESSION_VALS = original_compression_vals
552
553        self.assertEqual(dset.compression, 'gzip')
554        self.assertEqual(dset.compression_opts, 7)
555
556    def test_compression_number_invalid(self):
557        """ Create with invalid compression numbers  """
558        with self.assertRaises(ValueError) as e:
559            self.f.create_dataset('foo', (20, 30), compression=-999)
560        self.assertIn("Invalid filter", str(e.exception))
561
562        with self.assertRaises(ValueError) as e:
563            self.f.create_dataset('foo', (20, 30), compression=100)
564        self.assertIn("Unknown compression", str(e.exception))
565
566        original_compression_vals = h5py._hl.dataset._LEGACY_GZIP_COMPRESSION_VALS
567        try:
568            h5py._hl.dataset._LEGACY_GZIP_COMPRESSION_VALS = tuple()
569
570            # Using gzip compression requires a compression level specified in compression_opts
571            with self.assertRaises(IndexError):
572                self.f.create_dataset('foo', (20, 30), compression=h5py.h5z.FILTER_DEFLATE)
573        finally:
574            h5py._hl.dataset._LEGACY_GZIP_COMPRESSION_VALS = original_compression_vals
575
576
577@ut.skipIf('lzf' not in h5py.filters.encode, "LZF is not installed")
578class TestCreateLZF(BaseDataset):
579
580    """
581        Feature: Datasets created with LZF compression
582    """
583
584    def test_lzf(self):
585        """ Create with explicit lzf """
586        dset = self.f.create_dataset('foo', (20, 30), compression='lzf')
587        self.assertEqual(dset.compression, 'lzf')
588        self.assertEqual(dset.compression_opts, None)
589
590        testdata = np.arange(100)
591        dset = self.f.create_dataset('bar', data=testdata, compression='lzf')
592        self.assertEqual(dset.compression, 'lzf')
593        self.assertEqual(dset.compression_opts, None)
594
595        self.f.flush()  # Actually write to file
596
597        readdata = self.f['bar'][()]
598        self.assertArrayEqual(readdata, testdata)
599
600    def test_lzf_exc(self):
601        """ Giving lzf options raises ValueError """
602        with self.assertRaises(ValueError):
603            self.f.create_dataset('foo', (20, 30), compression='lzf',
604                                  compression_opts=4)
605
606
607@ut.skipIf('szip' not in h5py.filters.encode, "SZIP is not installed")
608class TestCreateSZIP(BaseDataset):
609
610    """
611        Feature: Datasets created with LZF compression
612    """
613
614    def test_szip(self):
615        """ Create with explicit szip """
616        dset = self.f.create_dataset('foo', (20, 30), compression='szip',
617                                     compression_opts=('ec', 16))
618
619
620@ut.skipIf('shuffle' not in h5py.filters.encode, "SHUFFLE is not installed")
621class TestCreateShuffle(BaseDataset):
622
623    """
624        Feature: Datasets can use shuffling filter
625    """
626
627    def test_shuffle(self):
628        """ Enable shuffle filter """
629        dset = self.f.create_dataset('foo', (20, 30), shuffle=True)
630        self.assertTrue(dset.shuffle)
631
632
633@ut.skipIf('fletcher32' not in h5py.filters.encode, "FLETCHER32 is not installed")
634class TestCreateFletcher32(BaseDataset):
635    """
636        Feature: Datasets can use the fletcher32 filter
637    """
638
639    def test_fletcher32(self):
640        """ Enable fletcher32 filter """
641        dset = self.f.create_dataset('foo', (20, 30), fletcher32=True)
642        self.assertTrue(dset.fletcher32)
643
644
645@ut.skipIf('scaleoffset' not in h5py.filters.encode, "SCALEOFFSET is not installed")
646class TestCreateScaleOffset(BaseDataset):
647    """
648        Feature: Datasets can use the scale/offset filter
649    """
650
651    def test_float_fails_without_options(self):
652        """ Ensure that a scale factor is required for scaleoffset compression of floating point data """
653
654        with self.assertRaises(ValueError):
655            dset = self.f.create_dataset('foo', (20, 30), dtype=float, scaleoffset=True)
656
657    def test_non_integer(self):
658        """ Check when scaleoffset is negetive"""
659
660        with self.assertRaises(ValueError):
661            dset = self.f.create_dataset('foo', (20, 30), dtype=float, scaleoffset=-0.1)
662
663    def test_unsupport_dtype(self):
664        """ Check when dtype is unsupported type"""
665
666        with self.assertRaises(TypeError):
667            dset = self.f.create_dataset('foo', (20, 30), dtype=bool, scaleoffset=True)
668
669    def test_float(self):
670        """ Scaleoffset filter works for floating point data """
671
672        scalefac = 4
673        shape = (100, 300)
674        range = 20 * 10 ** scalefac
675        testdata = (np.random.rand(*shape) - 0.5) * range
676
677        dset = self.f.create_dataset('foo', shape, dtype=float, scaleoffset=scalefac)
678
679        # Dataset reports that scaleoffset is in use
680        assert dset.scaleoffset is not None
681
682        # Dataset round-trips
683        dset[...] = testdata
684        filename = self.f.filename
685        self.f.close()
686        self.f = h5py.File(filename, 'r')
687        readdata = self.f['foo'][...]
688
689        # Test that data round-trips to requested precision
690        self.assertArrayEqual(readdata, testdata, precision=10 ** (-scalefac))
691
692        # Test that the filter is actually active (i.e. compression is lossy)
693        assert not (readdata == testdata).all()
694
695    def test_int(self):
696        """ Scaleoffset filter works for integer data with default precision """
697
698        nbits = 12
699        shape = (100, 300)
700        testdata = np.random.randint(0, 2 ** nbits - 1, size=shape)
701
702        # Create dataset; note omission of nbits (for library-determined precision)
703        dset = self.f.create_dataset('foo', shape, dtype=int, scaleoffset=True)
704
705        # Dataset reports scaleoffset enabled
706        assert dset.scaleoffset is not None
707
708        # Data round-trips correctly and identically
709        dset[...] = testdata
710        filename = self.f.filename
711        self.f.close()
712        self.f = h5py.File(filename, 'r')
713        readdata = self.f['foo'][...]
714        self.assertArrayEqual(readdata, testdata)
715
716    def test_int_with_minbits(self):
717        """ Scaleoffset filter works for integer data with specified precision """
718
719        nbits = 12
720        shape = (100, 300)
721        testdata = np.random.randint(0, 2 ** nbits, size=shape)
722
723        dset = self.f.create_dataset('foo', shape, dtype=int, scaleoffset=nbits)
724
725        # Dataset reports scaleoffset enabled with correct precision
726        self.assertTrue(dset.scaleoffset == 12)
727
728        # Data round-trips correctly
729        dset[...] = testdata
730        filename = self.f.filename
731        self.f.close()
732        self.f = h5py.File(filename, 'r')
733        readdata = self.f['foo'][...]
734        self.assertArrayEqual(readdata, testdata)
735
736    def test_int_with_minbits_lossy(self):
737        """ Scaleoffset filter works for integer data with specified precision """
738
739        nbits = 12
740        shape = (100, 300)
741        testdata = np.random.randint(0, 2 ** (nbits + 1) - 1, size=shape)
742
743        dset = self.f.create_dataset('foo', shape, dtype=int, scaleoffset=nbits)
744
745        # Dataset reports scaleoffset enabled with correct precision
746        self.assertTrue(dset.scaleoffset == 12)
747
748        # Data can be written and read
749        dset[...] = testdata
750        filename = self.f.filename
751        self.f.close()
752        self.f = h5py.File(filename, 'r')
753        readdata = self.f['foo'][...]
754
755        # Compression is lossy
756        assert not (readdata == testdata).all()
757
758
759class TestExternal(BaseDataset):
760    """
761        Feature: Datasets with the external storage property
762    """
763    def test_contents(self):
764        """ Create and access an external dataset """
765
766        shape = (6, 100)
767        testdata = np.random.random(shape)
768
769        # create a dataset in an external file and set it
770        ext_file = self.mktemp()
771        external = [(ext_file, 0, h5f.UNLIMITED)]
772        dset = self.f.create_dataset('foo', shape, dtype=testdata.dtype, external=external)
773        dset[...] = testdata
774
775        assert dset.external is not None
776
777        # verify file's existence, size, and contents
778        with open(ext_file, 'rb') as fid:
779            contents = fid.read()
780        assert contents == testdata.tobytes()
781
782    def test_name_str(self):
783        """ External argument may be a file name str only """
784
785        self.f.create_dataset('foo', (6, 100), external=self.mktemp())
786
787    def test_name_path(self):
788        """ External argument may be a file name path only """
789
790        self.f.create_dataset('foo', (6, 100),
791                              external=pathlib.Path(self.mktemp()))
792
793    def test_iter_multi(self):
794        """ External argument may be an iterable of multiple tuples """
795
796        ext_file = self.mktemp()
797        N = 100
798        external = iter((ext_file, x * 1000, 1000) for x in range(N))
799        dset = self.f.create_dataset('poo', (6, 100), external=external)
800        assert len(dset.external) == N
801
802    def test_invalid(self):
803        """ Test with invalid external lists """
804
805        shape = (6, 100)
806        ext_file = self.mktemp()
807
808        for exc_type, external in [
809            (TypeError, [ext_file]),
810            (TypeError, [ext_file, 0]),
811            (TypeError, [ext_file, 0, h5f.UNLIMITED]),
812            (ValueError, [(ext_file,)]),
813            (ValueError, [(ext_file, 0)]),
814            (ValueError, [(ext_file, 0, h5f.UNLIMITED, 0)]),
815            (TypeError, [(ext_file, 0, "h5f.UNLIMITED")]),
816        ]:
817            with self.assertRaises(exc_type):
818                self.f.create_dataset('foo', shape, external=external)
819
820
821class TestAutoCreate(BaseDataset):
822
823    """
824        Feature: Datasets auto-created from data produce the correct types
825    """
826    def assert_string_type(self, ds, cset, variable=True):
827        tid = ds.id.get_type()
828        self.assertEqual(type(tid), h5py.h5t.TypeStringID)
829        self.assertEqual(tid.get_cset(), cset)
830        if variable:
831            assert tid.is_variable_str()
832
833    def test_vlen_bytes(self):
834        """Assigning byte strings produces a vlen string ASCII dataset """
835        self.f['x'] = b"Hello there"
836        self.assert_string_type(self.f['x'], h5py.h5t.CSET_ASCII)
837
838        self.f['y'] = [b"a", b"bc"]
839        self.assert_string_type(self.f['y'], h5py.h5t.CSET_ASCII)
840
841        self.f['z'] = np.array([b"a", b"bc"], dtype=np.object_)
842        self.assert_string_type(self.f['z'], h5py.h5t.CSET_ASCII)
843
844    def test_vlen_unicode(self):
845        """Assigning unicode strings produces a vlen string UTF-8 dataset """
846        self.f['x'] = "Hello there" + chr(0x2034)
847        self.assert_string_type(self.f['x'], h5py.h5t.CSET_UTF8)
848
849        self.f['y'] = ["a", "bc"]
850        self.assert_string_type(self.f['y'], h5py.h5t.CSET_UTF8)
851
852        # 2D array; this only works with an array, not nested lists
853        self.f['z'] = np.array([["a", "bc"]], dtype=np.object_)
854        self.assert_string_type(self.f['z'], h5py.h5t.CSET_UTF8)
855
856    def test_string_fixed(self):
857        """ Assignment of fixed-length byte string produces a fixed-length
858        ascii dataset """
859        self.f['x'] = np.string_("Hello there")
860        ds = self.f['x']
861        self.assert_string_type(ds, h5py.h5t.CSET_ASCII, variable=False)
862        self.assertEqual(ds.id.get_type().get_size(), 11)
863
864
865class TestCreateLike(BaseDataset):
866    def test_no_chunks(self):
867        self.f['lol'] = np.arange(25).reshape(5, 5)
868        self.f.create_dataset_like('like_lol', self.f['lol'])
869        dslike = self.f['like_lol']
870        self.assertEqual(dslike.shape, (5, 5))
871        self.assertIs(dslike.chunks, None)
872
873    def test_track_times(self):
874        orig = self.f.create_dataset('honda', data=np.arange(12),
875                                     track_times=True)
876        self.assertNotEqual(0, h5py.h5g.get_objinfo(orig._id).mtime)
877        similar = self.f.create_dataset_like('hyundai', orig)
878        self.assertNotEqual(0, h5py.h5g.get_objinfo(similar._id).mtime)
879
880        orig = self.f.create_dataset('ibm', data=np.arange(12),
881                                     track_times=False)
882        self.assertEqual(0, h5py.h5g.get_objinfo(orig._id).mtime)
883        similar = self.f.create_dataset_like('lenovo', orig)
884        self.assertEqual(0, h5py.h5g.get_objinfo(similar._id).mtime)
885
886    def test_maxshape(self):
887        """ Test when other.maxshape != other.shape """
888
889        other = self.f.create_dataset('other', (10,), maxshape=20)
890        similar = self.f.create_dataset_like('sim', other)
891        self.assertEqual(similar.shape, (10,))
892        self.assertEqual(similar.maxshape, (20,))
893
894class TestChunkIterator(BaseDataset):
895    def test_no_chunks(self):
896        dset = self.f.create_dataset("foo", ())
897        with self.assertRaises(TypeError):
898            dset.iter_chunks()
899
900    def test_1d(self):
901        dset = self.f.create_dataset("foo", (100,), chunks=(32,))
902        expected = ((slice(0,32,1),), (slice(32,64,1),), (slice(64,96,1),),
903            (slice(96,100,1),))
904        self.assertEqual(list(dset.iter_chunks()), list(expected))
905        expected = ((slice(50,64,1),), (slice(64,96,1),), (slice(96,97,1),))
906        self.assertEqual(list(dset.iter_chunks(np.s_[50:97])), list(expected))
907
908    def test_2d(self):
909        dset = self.f.create_dataset("foo", (100,100), chunks=(32,64))
910        expected = ((slice(0, 32, 1), slice(0, 64, 1)), (slice(0, 32, 1),
911        slice(64, 100, 1)), (slice(32, 64, 1), slice(0, 64, 1)),
912        (slice(32, 64, 1), slice(64, 100, 1)), (slice(64, 96, 1),
913        slice(0, 64, 1)), (slice(64, 96, 1), slice(64, 100, 1)),
914        (slice(96, 100, 1), slice(0, 64, 1)), (slice(96, 100, 1),
915        slice(64, 100, 1)))
916        self.assertEqual(list(dset.iter_chunks()), list(expected))
917
918        expected = ((slice(48, 52, 1), slice(40, 50, 1)),)
919        self.assertEqual(list(dset.iter_chunks(np.s_[48:52,40:50])), list(expected))
920
921
922class TestResize(BaseDataset):
923
924    """
925        Feature: Datasets created with "maxshape" may be resized
926    """
927
928    def test_create(self):
929        """ Create dataset with "maxshape" """
930        dset = self.f.create_dataset('foo', (20, 30), maxshape=(20, 60))
931        self.assertIsNot(dset.chunks, None)
932        self.assertEqual(dset.maxshape, (20, 60))
933
934    def test_create_1D(self):
935        """ Create dataset with "maxshape" using integer maxshape"""
936        dset = self.f.create_dataset('foo', (20,), maxshape=20)
937        self.assertIsNot(dset.chunks, None)
938        self.assertEqual(dset.maxshape, (20,))
939
940        dset = self.f.create_dataset('bar', 20, maxshape=20)
941        self.assertEqual(dset.maxshape, (20,))
942
943    def test_resize(self):
944        """ Datasets may be resized up to maxshape """
945        dset = self.f.create_dataset('foo', (20, 30), maxshape=(20, 60))
946        self.assertEqual(dset.shape, (20, 30))
947        dset.resize((20, 50))
948        self.assertEqual(dset.shape, (20, 50))
949        dset.resize((20, 60))
950        self.assertEqual(dset.shape, (20, 60))
951
952    def test_resize_1D(self):
953        """ Datasets may be resized up to maxshape using integer maxshape"""
954        dset = self.f.create_dataset('foo', 20, maxshape=40)
955        self.assertEqual(dset.shape, (20,))
956        dset.resize((30,))
957        self.assertEqual(dset.shape, (30,))
958
959    def test_resize_over(self):
960        """ Resizing past maxshape triggers an exception """
961        dset = self.f.create_dataset('foo', (20, 30), maxshape=(20, 60))
962        with self.assertRaises(Exception):
963            dset.resize((20, 70))
964
965    def test_resize_nonchunked(self):
966        """ Resizing non-chunked dataset raises TypeError """
967        dset = self.f.create_dataset("foo", (20, 30))
968        with self.assertRaises(TypeError):
969            dset.resize((20, 60))
970
971    def test_resize_axis(self):
972        """ Resize specified axis """
973        dset = self.f.create_dataset('foo', (20, 30), maxshape=(20, 60))
974        dset.resize(50, axis=1)
975        self.assertEqual(dset.shape, (20, 50))
976
977    def test_axis_exc(self):
978        """ Illegal axis raises ValueError """
979        dset = self.f.create_dataset('foo', (20, 30), maxshape=(20, 60))
980        with self.assertRaises(ValueError):
981            dset.resize(50, axis=2)
982
983    def test_zero_dim(self):
984        """ Allow zero-length initial dims for unlimited axes (issue 111) """
985        dset = self.f.create_dataset('foo', (15, 0), maxshape=(15, None))
986        self.assertEqual(dset.shape, (15, 0))
987        self.assertEqual(dset.maxshape, (15, None))
988
989
990class TestDtype(BaseDataset):
991
992    """
993        Feature: Dataset dtype is available as .dtype property
994    """
995
996    def test_dtype(self):
997        """ Retrieve dtype from dataset """
998        dset = self.f.create_dataset('foo', (5,), '|S10')
999        self.assertEqual(dset.dtype, np.dtype('|S10'))
1000
1001
1002class TestLen(BaseDataset):
1003
1004    """
1005        Feature: Size of first axis is available via Python's len
1006    """
1007
1008    def test_len(self):
1009        """ Python len() (under 32 bits) """
1010        dset = self.f.create_dataset('foo', (312, 15))
1011        self.assertEqual(len(dset), 312)
1012
1013    def test_len_big(self):
1014        """ Python len() vs Dataset.len() """
1015        dset = self.f.create_dataset('foo', (2 ** 33, 15))
1016        self.assertEqual(dset.shape, (2 ** 33, 15))
1017        if sys.maxsize == 2 ** 31 - 1:
1018            with self.assertRaises(OverflowError):
1019                len(dset)
1020        else:
1021            self.assertEqual(len(dset), 2 ** 33)
1022        self.assertEqual(dset.len(), 2 ** 33)
1023
1024
1025class TestIter(BaseDataset):
1026
1027    """
1028        Feature: Iterating over a dataset yields rows
1029    """
1030
1031    def test_iter(self):
1032        """ Iterating over a dataset yields rows """
1033        data = np.arange(30, dtype='f').reshape((10, 3))
1034        dset = self.f.create_dataset('foo', data=data)
1035        for x, y in zip(dset, data):
1036            self.assertEqual(len(x), 3)
1037            self.assertArrayEqual(x, y)
1038
1039    def test_iter_scalar(self):
1040        """ Iterating over scalar dataset raises TypeError """
1041        dset = self.f.create_dataset('foo', shape=())
1042        with self.assertRaises(TypeError):
1043            [x for x in dset]
1044
1045
1046class TestStrings(BaseDataset):
1047
1048    """
1049        Feature: Datasets created with vlen and fixed datatypes correctly
1050        translate to and from HDF5
1051    """
1052
1053    def test_vlen_bytes(self):
1054        """ Vlen bytes dataset maps to vlen ascii in the file """
1055        dt = h5py.string_dtype(encoding='ascii')
1056        ds = self.f.create_dataset('x', (100,), dtype=dt)
1057        tid = ds.id.get_type()
1058        self.assertEqual(type(tid), h5py.h5t.TypeStringID)
1059        self.assertEqual(tid.get_cset(), h5py.h5t.CSET_ASCII)
1060        string_info = h5py.check_string_dtype(ds.dtype)
1061        self.assertEqual(string_info.encoding, 'ascii')
1062
1063    def test_vlen_unicode(self):
1064        """ Vlen unicode dataset maps to vlen utf-8 in the file """
1065        dt = h5py.string_dtype()
1066        ds = self.f.create_dataset('x', (100,), dtype=dt)
1067        tid = ds.id.get_type()
1068        self.assertEqual(type(tid), h5py.h5t.TypeStringID)
1069        self.assertEqual(tid.get_cset(), h5py.h5t.CSET_UTF8)
1070        string_info = h5py.check_string_dtype(ds.dtype)
1071        self.assertEqual(string_info.encoding, 'utf-8')
1072
1073    def test_fixed_ascii(self):
1074        """ Fixed-length bytes dataset maps to fixed-length ascii in the file
1075        """
1076        dt = np.dtype("|S10")
1077        ds = self.f.create_dataset('x', (100,), dtype=dt)
1078        tid = ds.id.get_type()
1079        self.assertEqual(type(tid), h5py.h5t.TypeStringID)
1080        self.assertFalse(tid.is_variable_str())
1081        self.assertEqual(tid.get_size(), 10)
1082        self.assertEqual(tid.get_cset(), h5py.h5t.CSET_ASCII)
1083        string_info = h5py.check_string_dtype(ds.dtype)
1084        self.assertEqual(string_info.encoding, 'ascii')
1085        self.assertEqual(string_info.length, 10)
1086
1087    def test_fixed_utf8(self):
1088        dt = h5py.string_dtype(encoding='utf-8', length=5)
1089        ds = self.f.create_dataset('x', (100,), dtype=dt)
1090        tid = ds.id.get_type()
1091        self.assertEqual(tid.get_cset(), h5py.h5t.CSET_UTF8)
1092        s = 'cù'
1093        ds[0] = s.encode('utf-8')
1094        ds[1] = s
1095        ds[2:4] = [s, s]
1096        ds[4:6] = np.array([s, s], dtype=object)
1097        ds[6:8] = np.array([s.encode('utf-8')] * 2, dtype=dt)
1098        with self.assertRaises(TypeError):
1099            ds[8:10] = np.array([s, s], dtype='U')
1100
1101        np.testing.assert_array_equal(ds[:8], np.array([s.encode('utf-8')] * 8, dtype='S'))
1102
1103    def test_fixed_unicode(self):
1104        """ Fixed-length unicode datasets are unsupported (raise TypeError) """
1105        dt = np.dtype("|U10")
1106        with self.assertRaises(TypeError):
1107            ds = self.f.create_dataset('x', (100,), dtype=dt)
1108
1109    def test_roundtrip_vlen_bytes(self):
1110        """ writing and reading to vlen bytes dataset preserves type and content
1111        """
1112        dt = h5py.string_dtype(encoding='ascii')
1113        ds = self.f.create_dataset('x', (100,), dtype=dt)
1114        data = b"Hello\xef"
1115        ds[0] = data
1116        out = ds[0]
1117        self.assertEqual(type(out), bytes)
1118        self.assertEqual(out, data)
1119
1120    def test_roundtrip_fixed_bytes(self):
1121        """ Writing to and reading from fixed-length bytes dataset preserves
1122        type and content """
1123        dt = np.dtype("|S10")
1124        ds = self.f.create_dataset('x', (100,), dtype=dt)
1125        data = b"Hello\xef"
1126        ds[0] = data
1127        out = ds[0]
1128        self.assertEqual(type(out), np.string_)
1129        self.assertEqual(out, data)
1130
1131    def test_retrieve_vlen_unicode(self):
1132        dt = h5py.string_dtype()
1133        ds = self.f.create_dataset('x', (10,), dtype=dt)
1134        data = "fàilte"
1135        ds[0] = data
1136        self.assertIsInstance(ds[0], bytes)
1137        out = ds.asstr()[0]
1138        self.assertIsInstance(out, str)
1139        self.assertEqual(out, data)
1140
1141    def test_asstr(self):
1142        ds = self.f.create_dataset('x', (10,), dtype=h5py.string_dtype())
1143        data = "fàilte"
1144        ds[0] = data
1145
1146        strwrap1 = ds.asstr('ascii')
1147        with self.assertRaises(UnicodeDecodeError):
1148            out = strwrap1[0]
1149
1150        # Different errors parameter
1151        self.assertEqual(ds.asstr('ascii', 'ignore')[0], 'filte')
1152
1153        # latin-1 will decode it but give the wrong text
1154        self.assertNotEqual(ds.asstr('latin-1')[0], data)
1155
1156        # len of ds
1157        self.assertEqual(10, len(ds.asstr()))
1158
1159
1160        # Array output
1161        np.testing.assert_array_equal(
1162            ds.asstr()[:1], np.array([data], dtype=object)
1163        )
1164
1165    def test_asstr_fixed(self):
1166        dt = h5py.string_dtype(length=5)
1167        ds = self.f.create_dataset('x', (10,), dtype=dt)
1168        data = 'cù'
1169        ds[0] = np.array(data.encode('utf-8'), dtype=dt)
1170
1171        self.assertIsInstance(ds[0], np.bytes_)
1172        out = ds.asstr()[0]
1173        self.assertIsInstance(out, str)
1174        self.assertEqual(out, data)
1175
1176        # Different errors parameter
1177        self.assertEqual(ds.asstr('ascii', 'ignore')[0], 'c')
1178
1179        # latin-1 will decode it but give the wrong text
1180        self.assertNotEqual(ds.asstr('latin-1')[0], data)
1181
1182        # Array output
1183        np.testing.assert_array_equal(
1184            ds.asstr()[:1], np.array([data], dtype=object)
1185        )
1186
1187    def test_unicode_write_error(self):
1188        """Encoding error when writing a non-ASCII string to an ASCII vlen dataset"""
1189        dt = h5py.string_dtype('ascii')
1190        ds = self.f.create_dataset('x', (100,), dtype=dt)
1191        data = "fàilte"
1192        with self.assertRaises(UnicodeEncodeError):
1193            ds[0] = data
1194
1195    def test_unicode_write_bytes(self):
1196        """ Writing valid utf-8 byte strings to a unicode vlen dataset is OK
1197        """
1198        dt = h5py.string_dtype()
1199        ds = self.f.create_dataset('x', (100,), dtype=dt)
1200        data = (u"Hello there" + chr(0x2034)).encode('utf8')
1201        ds[0] = data
1202        out = ds[0]
1203        self.assertEqual(type(out), bytes)
1204        self.assertEqual(out, data)
1205
1206    def test_vlen_bytes_write_ascii_str(self):
1207        """ Writing an ascii str to ascii vlen dataset is OK
1208        """
1209        dt = h5py.string_dtype('ascii')
1210        ds = self.f.create_dataset('x', (100,), dtype=dt)
1211        data = "ASCII string"
1212        ds[0] = data
1213        out = ds[0]
1214        self.assertEqual(type(out), bytes)
1215        self.assertEqual(out, data.encode('ascii'))
1216
1217
1218class TestCompound(BaseDataset):
1219
1220    """
1221        Feature: Compound types correctly round-trip
1222    """
1223
1224    def test_rt(self):
1225        """ Compound types are read back in correct order (issue 236)"""
1226
1227        dt = np.dtype([ ('weight', np.float64),
1228                             ('cputime', np.float64),
1229                             ('walltime', np.float64),
1230                             ('parents_offset', np.uint32),
1231                             ('n_parents', np.uint32),
1232                             ('status', np.uint8),
1233                             ('endpoint_type', np.uint8), ])
1234
1235        testdata = np.ndarray((16,), dtype=dt)
1236        for key in dt.fields:
1237            testdata[key] = np.random.random((16,)) * 100
1238
1239        self.f['test'] = testdata
1240        outdata = self.f['test'][...]
1241        self.assertTrue(np.all(outdata == testdata))
1242        self.assertEqual(outdata.dtype, testdata.dtype)
1243
1244    def test_assign(self):
1245        dt = np.dtype([ ('weight', (np.float64, 3)),
1246                         ('endpoint_type', np.uint8), ])
1247
1248        testdata = np.ndarray((16,), dtype=dt)
1249        for key in dt.fields:
1250            testdata[key] = np.random.random(size=testdata[key].shape) * 100
1251
1252        ds = self.f.create_dataset('test', (16,), dtype=dt)
1253        for key in dt.fields:
1254            ds[key] = testdata[key]
1255
1256        outdata = self.f['test'][...]
1257
1258        self.assertTrue(np.all(outdata == testdata))
1259        self.assertEqual(outdata.dtype, testdata.dtype)
1260
1261    def test_fields(self):
1262        dt = np.dtype([
1263            ('x', np.float64),
1264            ('y', np.float64),
1265            ('z', np.float64),
1266        ])
1267
1268        testdata = np.ndarray((16,), dtype=dt)
1269        for key in dt.fields:
1270            testdata[key] = np.random.random((16,)) * 100
1271
1272        self.f['test'] = testdata
1273
1274        # Extract multiple fields
1275        np.testing.assert_array_equal(
1276            self.f['test'].fields(['x', 'y'])[:], testdata[['x', 'y']]
1277        )
1278        # Extract single field
1279        np.testing.assert_array_equal(
1280            self.f['test'].fields('x')[:], testdata['x']
1281        )
1282
1283        # Check len() on fields wrapper
1284        assert len(self.f['test'].fields('x')) == 16
1285
1286
1287class TestSubarray(BaseDataset):
1288    def test_write_list(self):
1289        ds = self.f.create_dataset("a", (1,), dtype="3int8")
1290        ds[0] = [1, 2, 3]
1291        np.testing.assert_array_equal(ds[:], [[1, 2, 3]])
1292
1293        ds[:] = [[4, 5, 6]]
1294        np.testing.assert_array_equal(ds[:], [[4, 5, 6]])
1295
1296    def test_write_array(self):
1297        ds = self.f.create_dataset("a", (1,), dtype="3int8")
1298        ds[0] = np.array([1, 2, 3])
1299        np.testing.assert_array_equal(ds[:], [[1, 2, 3]])
1300
1301        ds[:] = np.array([[4, 5, 6]])
1302        np.testing.assert_array_equal(ds[:], [[4, 5, 6]])
1303
1304
1305class TestEnum(BaseDataset):
1306
1307    """
1308        Feature: Enum datatype info is preserved, read/write as integer
1309    """
1310
1311    EDICT = {'RED': 0, 'GREEN': 1, 'BLUE': 42}
1312
1313    def test_create(self):
1314        """ Enum datasets can be created and type correctly round-trips """
1315        dt = h5py.enum_dtype(self.EDICT, basetype='i')
1316        ds = self.f.create_dataset('x', (100, 100), dtype=dt)
1317        dt2 = ds.dtype
1318        dict2 = h5py.check_enum_dtype(dt2)
1319        self.assertEqual(dict2, self.EDICT)
1320
1321    def test_readwrite(self):
1322        """ Enum datasets can be read/written as integers """
1323        dt = h5py.enum_dtype(self.EDICT, basetype='i4')
1324        ds = self.f.create_dataset('x', (100, 100), dtype=dt)
1325        ds[35, 37] = 42
1326        ds[1, :] = 1
1327        self.assertEqual(ds[35, 37], 42)
1328        self.assertArrayEqual(ds[1, :], np.array((1,) * 100, dtype='i4'))
1329
1330
1331class TestFloats(BaseDataset):
1332
1333    """
1334        Test support for mini and extended-precision floats
1335    """
1336
1337    def _exectest(self, dt):
1338        dset = self.f.create_dataset('x', (100,), dtype=dt)
1339        self.assertEqual(dset.dtype, dt)
1340        data = np.ones((100,), dtype=dt)
1341        dset[...] = data
1342        self.assertArrayEqual(dset[...], data)
1343
1344    @ut.skipUnless(hasattr(np, 'float16'), "NumPy float16 support required")
1345    def test_mini(self):
1346        """ Mini-floats round trip """
1347        self._exectest(np.dtype('float16'))
1348
1349    # TODO: move these tests to test_h5t
1350    def test_mini_mapping(self):
1351        """ Test mapping for float16 """
1352        if hasattr(np, 'float16'):
1353            self.assertEqual(h5t.IEEE_F16LE.dtype, np.dtype('<f2'))
1354        else:
1355            self.assertEqual(h5t.IEEE_F16LE.dtype, np.dtype('<f4'))
1356
1357
1358class TestTrackTimes(BaseDataset):
1359
1360    """
1361        Feature: track_times
1362    """
1363
1364    def test_disable_track_times(self):
1365        """ check that when track_times=False, the time stamp=0 (Jan 1, 1970) """
1366        ds = self.f.create_dataset('foo', (4,), track_times=False)
1367        ds_mtime = h5py.h5g.get_objinfo(ds._id).mtime
1368        self.assertEqual(0, ds_mtime)
1369
1370    def test_invalid_track_times(self):
1371        """ check that when give track_times an invalid value """
1372        with self.assertRaises(TypeError):
1373            self.f.create_dataset('foo', (4,), track_times='null')
1374
1375
1376class TestZeroShape(BaseDataset):
1377
1378    """
1379        Features of datasets with (0,)-shape axes
1380    """
1381
1382    def test_array_conversion(self):
1383        """ Empty datasets can be converted to NumPy arrays """
1384        ds = self.f.create_dataset('x', 0, maxshape=None)
1385        self.assertEqual(ds.shape, np.array(ds).shape)
1386
1387        ds = self.f.create_dataset('y', (0,), maxshape=(None,))
1388        self.assertEqual(ds.shape, np.array(ds).shape)
1389
1390        ds = self.f.create_dataset('z', (0, 0), maxshape=(None, None))
1391        self.assertEqual(ds.shape, np.array(ds).shape)
1392
1393    def test_reading(self):
1394        """ Slicing into empty datasets works correctly """
1395        dt = [('a', 'f'), ('b', 'i')]
1396        ds = self.f.create_dataset('x', (0,), dtype=dt, maxshape=(None,))
1397        arr = np.empty((0,), dtype=dt)
1398
1399        self.assertEqual(ds[...].shape, arr.shape)
1400        self.assertEqual(ds[...].dtype, arr.dtype)
1401        self.assertEqual(ds[()].shape, arr.shape)
1402        self.assertEqual(ds[()].dtype, arr.dtype)
1403
1404# https://github.com/h5py/h5py/issues/1492
1405empty_regionref_xfail = pytest.mark.xfail(
1406    h5py.version.hdf5_version_tuple == (1, 10, 6),
1407    reason="Issue with empty region refs in HDF5 1.10.6",
1408)
1409
1410class TestRegionRefs(BaseDataset):
1411
1412    """
1413        Various features of region references
1414    """
1415
1416    def setUp(self):
1417        BaseDataset.setUp(self)
1418        self.data = np.arange(100 * 100).reshape((100, 100))
1419        self.dset = self.f.create_dataset('x', data=self.data)
1420        self.dset[...] = self.data
1421
1422    def test_create_ref(self):
1423        """ Region references can be used as slicing arguments """
1424        slic = np.s_[25:35, 10:100:5]
1425        ref = self.dset.regionref[slic]
1426        self.assertArrayEqual(self.dset[ref], self.data[slic])
1427
1428    @empty_regionref_xfail
1429    def test_empty_region(self):
1430        ref = self.dset.regionref[:0]
1431        out = self.dset[ref]
1432        assert out.size == 0
1433        # Ideally we should preserve shape (0, 100), but it seems this is lost.
1434
1435    @empty_regionref_xfail
1436    def test_scalar_dataset(self):
1437        ds = self.f.create_dataset("scalar", data=1.0, dtype='f4')
1438        sid = h5py.h5s.create(h5py.h5s.SCALAR)
1439
1440        # Deselected
1441        sid.select_none()
1442        ref = h5py.h5r.create(ds.id, b'.', h5py.h5r.DATASET_REGION, sid)
1443        assert ds[ref] == h5py.Empty(np.dtype('f4'))
1444
1445        # Selected
1446        sid.select_all()
1447        ref = h5py.h5r.create(ds.id, b'.', h5py.h5r.DATASET_REGION, sid)
1448        assert ds[ref] == ds[()]
1449
1450    def test_ref_shape(self):
1451        """ Region reference shape and selection shape """
1452        slic = np.s_[25:35, 10:100:5]
1453        ref = self.dset.regionref[slic]
1454        self.assertEqual(self.dset.regionref.shape(ref), self.dset.shape)
1455        self.assertEqual(self.dset.regionref.selection(ref), (10, 18))
1456
1457
1458class TestAstype(BaseDataset):
1459    """.astype() wrapper & context manager
1460    """
1461    def test_astype_ctx(self):
1462        dset = self.f.create_dataset('x', (100,), dtype='i2')
1463        dset[...] = np.arange(100)
1464
1465        with warnings.catch_warnings(record=True) as warn_rec:
1466            warnings.simplefilter("always")
1467
1468            with dset.astype('f8'):
1469                self.assertArrayEqual(dset[...], np.arange(100, dtype='f8'))
1470
1471            with dset.astype('f4') as f4ds:
1472                self.assertArrayEqual(f4ds[...], np.arange(100, dtype='f4'))
1473
1474        assert [w.category for w in warn_rec] == [H5pyDeprecationWarning] * 2
1475
1476    def test_astype_wrapper(self):
1477        dset = self.f.create_dataset('x', (100,), dtype='i2')
1478        dset[...] = np.arange(100)
1479        arr = dset.astype('f4')[:]
1480        self.assertArrayEqual(arr, np.arange(100, dtype='f4'))
1481
1482
1483    def test_astype_wrapper_len(self):
1484        dset = self.f.create_dataset('x', (100,), dtype='i2')
1485        dset[...] = np.arange(100)
1486        self.assertEqual(100, len(dset.astype('f4')))
1487
1488
1489class TestScalarCompound(BaseDataset):
1490
1491    """
1492        Retrieval of a single field from a scalar compound dataset should
1493        strip the field info
1494    """
1495
1496    def test_scalar_compound(self):
1497
1498        dt = np.dtype([('a', 'i')])
1499        dset = self.f.create_dataset('x', (), dtype=dt)
1500        self.assertEqual(dset['a'].dtype, np.dtype('i'))
1501
1502
1503class TestVlen(BaseDataset):
1504    def test_int(self):
1505        dt = h5py.vlen_dtype(int)
1506        ds = self.f.create_dataset('vlen', (4,), dtype=dt)
1507        ds[0] = np.arange(3)
1508        ds[1] = np.arange(0)
1509        ds[2] = [1, 2, 3]
1510        ds[3] = np.arange(1)
1511        self.assertArrayEqual(ds[0], np.arange(3))
1512        self.assertArrayEqual(ds[1], np.arange(0))
1513        self.assertArrayEqual(ds[2], np.array([1, 2, 3]))
1514        self.assertArrayEqual(ds[1], np.arange(0))
1515        ds[0:2] = np.array([np.arange(5), np.arange(4)], dtype=object)
1516        self.assertArrayEqual(ds[0], np.arange(5))
1517        self.assertArrayEqual(ds[1], np.arange(4))
1518        ds[0:2] = np.array([np.arange(3), np.arange(3)])
1519        self.assertArrayEqual(ds[0], np.arange(3))
1520        self.assertArrayEqual(ds[1], np.arange(3))
1521
1522    def test_reuse_from_other(self):
1523        dt = h5py.vlen_dtype(int)
1524        ds = self.f.create_dataset('vlen', (1,), dtype=dt)
1525        self.f.create_dataset('vlen2', (1,), ds[()].dtype)
1526
1527    def test_reuse_struct_from_other(self):
1528        dt = [('a', int), ('b', h5py.vlen_dtype(int))]
1529        ds = self.f.create_dataset('vlen', (1,), dtype=dt)
1530        fname = self.f.filename
1531        self.f.close()
1532        self.f = h5py.File(fname, 'a')
1533        self.f.create_dataset('vlen2', (1,), self.f['vlen']['b'][()].dtype)
1534
1535    def test_convert(self):
1536        dt = h5py.vlen_dtype(int)
1537        ds = self.f.create_dataset('vlen', (3,), dtype=dt)
1538        ds[0] = np.array([1.4, 1.2])
1539        ds[1] = np.array([1.2])
1540        ds[2] = [1.2, 2, 3]
1541        self.assertArrayEqual(ds[0], np.array([1, 1]))
1542        self.assertArrayEqual(ds[1], np.array([1]))
1543        self.assertArrayEqual(ds[2], np.array([1, 2, 3]))
1544        ds[0:2] = np.array([[0.1, 1.1, 2.1, 3.1, 4], np.arange(4)], dtype=object)
1545        self.assertArrayEqual(ds[0], np.arange(5))
1546        self.assertArrayEqual(ds[1], np.arange(4))
1547        ds[0:2] = np.array([np.array([0.1, 1.2, 2.2]),
1548                            np.array([0.2, 1.2, 2.2])])
1549        self.assertArrayEqual(ds[0], np.arange(3))
1550        self.assertArrayEqual(ds[1], np.arange(3))
1551
1552    def test_multidim(self):
1553        dt = h5py.vlen_dtype(int)
1554        ds = self.f.create_dataset('vlen', (2, 2), dtype=dt)
1555        ds[0, 0] = np.arange(1)
1556        ds[:, :] = np.array([[np.arange(3), np.arange(2)],
1557                            [np.arange(1), np.arange(2)]], dtype=object)
1558        ds[:, :] = np.array([[np.arange(2), np.arange(2)],
1559                             [np.arange(2), np.arange(2)]])
1560
1561    def _help_float_testing(self, np_dt, dataset_name='vlen'):
1562        """
1563        Helper for testing various vlen numpy data types.
1564        :param np_dt: Numpy datatype to test
1565        :param dataset_name: String name of the dataset to create for testing.
1566        """
1567        dt = h5py.vlen_dtype(np_dt)
1568        ds = self.f.create_dataset(dataset_name, (5,), dtype=dt)
1569
1570        # Create some arrays, and assign them to the dataset
1571        array_0 = np.array([1., 2., 30.], dtype=np_dt)
1572        array_1 = np.array([100.3, 200.4, 98.1, -10.5, -300.0], dtype=np_dt)
1573
1574        # Test that a numpy array of different type gets cast correctly
1575        array_2 = np.array([1, 2, 8], dtype=np.dtype('int32'))
1576        casted_array_2 = array_2.astype(np_dt)
1577
1578        # Test that we can set a list of floats.
1579        list_3 = [1., 2., 900., 0., -0.5]
1580        list_array_3 = np.array(list_3, dtype=np_dt)
1581
1582        # Test that a list of integers gets casted correctly
1583        list_4 = [-1, -100, 0, 1, 9999, 70]
1584        list_array_4 = np.array(list_4, dtype=np_dt)
1585
1586        ds[0] = array_0
1587        ds[1] = array_1
1588        ds[2] = array_2
1589        ds[3] = list_3
1590        ds[4] = list_4
1591
1592        self.assertArrayEqual(array_0, ds[0])
1593        self.assertArrayEqual(array_1, ds[1])
1594        self.assertArrayEqual(casted_array_2, ds[2])
1595        self.assertArrayEqual(list_array_3, ds[3])
1596        self.assertArrayEqual(list_array_4, ds[4])
1597
1598        # Test that we can reassign arrays in the dataset
1599        list_array_3 = np.array([0.3, 2.2], dtype=np_dt)
1600
1601        ds[0] = list_array_3[:]
1602
1603        self.assertArrayEqual(list_array_3, ds[0])
1604
1605        # Make sure we can close the file.
1606        self.f.flush()
1607        self.f.close()
1608
1609    def test_numpy_float16(self):
1610        np_dt = np.dtype('float16')
1611        self._help_float_testing(np_dt)
1612
1613    def test_numpy_float32(self):
1614        np_dt = np.dtype('float32')
1615        self._help_float_testing(np_dt)
1616
1617    def test_numpy_float64_from_dtype(self):
1618        np_dt = np.dtype('float64')
1619        self._help_float_testing(np_dt)
1620
1621    def test_numpy_float64_2(self):
1622        np_dt = np.float64
1623        self._help_float_testing(np_dt)
1624
1625    def test_non_contiguous_arrays(self):
1626        """Test that non-contiguous arrays are stored correctly"""
1627        self.f.create_dataset('nc', (10,), dtype=h5py.vlen_dtype('bool'))
1628        x = np.array([True, False, True, True, False, False, False])
1629        self.f['nc'][0] = x[::2]
1630
1631        assert all(self.f['nc'][0] == x[::2]), f"{self.f['nc'][0]} != {x[::2]}"
1632
1633        self.f.create_dataset('nc2', (10,), dtype=h5py.vlen_dtype('int8'))
1634        y = np.array([2, 4, 1, 5, -1, 3, 7])
1635        self.f['nc2'][0] = y[::2]
1636
1637        assert all(self.f['nc2'][0] == y[::2]), f"{self.f['nc2'][0]} != {y[::2]}"
1638
1639
1640class TestLowOpen(BaseDataset):
1641
1642    def test_get_access_list(self):
1643        """ Test H5Dget_access_plist """
1644        ds = self.f.create_dataset('foo', (4,))
1645        p_list = ds.id.get_access_plist()
1646
1647    def test_dapl(self):
1648        """ Test the dapl keyword to h5d.open """
1649        dapl = h5py.h5p.create(h5py.h5p.DATASET_ACCESS)
1650        dset = self.f.create_dataset('x', (100,))
1651        del dset
1652        dsid = h5py.h5d.open(self.f.id, b'x', dapl)
1653        self.assertIsInstance(dsid, h5py.h5d.DatasetID)
1654
1655
1656@ut.skipUnless(h5py.version.hdf5_version_tuple >= (1, 10, 5),
1657               "chunk info requires  HDF5 >= 1.10.5")
1658def test_get_chunk_details():
1659    from io import BytesIO
1660    buf = BytesIO()
1661    with h5py.File(buf, 'w') as fout:
1662        fout.create_dataset('test', shape=(100, 100), chunks=(10, 10), dtype='i4')
1663        fout['test'][:] = 1
1664
1665    buf.seek(0)
1666    with h5py.File(buf, 'r') as fin:
1667        ds = fin['test'].id
1668
1669        assert ds.get_num_chunks() == 100
1670        for j in range(100):
1671            offset = tuple(np.array(np.unravel_index(j, (10, 10))) * 10)
1672
1673            si = ds.get_chunk_info(j)
1674            assert si.chunk_offset == offset
1675            assert si.filter_mask == 0
1676            assert si.byte_offset is not None
1677            assert si.size > 0
1678
1679        si = ds.get_chunk_info_by_coord((0, 0))
1680        assert si.chunk_offset == (0, 0)
1681        assert si.filter_mask == 0
1682        assert si.byte_offset is not None
1683        assert si.size > 0
1684
1685
1686def test_empty_shape(writable_file):
1687    ds = writable_file.create_dataset('empty', dtype='int32')
1688    assert ds.shape is None
1689    assert ds.maxshape is None
1690
1691
1692def test_zero_storage_size():
1693    # https://github.com/h5py/h5py/issues/1475
1694    from io import BytesIO
1695    buf = BytesIO()
1696    with h5py.File(buf, 'w') as fout:
1697        fout.create_dataset('empty', dtype='uint8')
1698
1699    buf.seek(0)
1700    with h5py.File(buf, 'r') as fin:
1701        assert fin['empty'].chunks is None
1702        assert fin['empty'].id.get_offset() is None
1703        assert fin['empty'].id.get_storage_size() == 0
1704
1705
1706def test_python_int_uint64(writable_file):
1707    # https://github.com/h5py/h5py/issues/1547
1708    data = [np.iinfo(np.int64).max, np.iinfo(np.int64).max + 1]
1709
1710    # Check creating a new dataset
1711    ds = writable_file.create_dataset('x', data=data, dtype=np.uint64)
1712    assert ds.dtype == np.dtype(np.uint64)
1713    np.testing.assert_array_equal(ds[:], np.array(data, dtype=np.uint64))
1714
1715    # Check writing to an existing dataset
1716    ds[:] = data
1717    np.testing.assert_array_equal(ds[:], np.array(data, dtype=np.uint64))
1718
1719
1720def test_setitem_fancy_indexing(writable_file):
1721    # https://github.com/h5py/h5py/issues/1593
1722    arr = writable_file.create_dataset('data', (5, 1000, 2), dtype=np.uint8)
1723    block = np.random.randint(255, size=(5, 3, 2))
1724    arr[:, [0, 2, 4], ...] = block
1725
1726
1727def test_vlen_spacepad():
1728    with File(get_data_file_path("vlen_string_dset.h5")) as f:
1729        assert f["DS1"][0] == b"Parting"
1730
1731
1732def test_vlen_nullterm():
1733    with File(get_data_file_path("vlen_string_dset_utc.h5")) as f:
1734        assert f["ds1"][0] == b"2009-12-20T10:16:18.662409Z"
1735
1736
1737@pytest.mark.skipif(
1738    h5py.version.hdf5_version_tuple < (1, 10, 3),
1739    reason="Appears you cannot pass an unknown filter id for HDF5 < 1.10.3"
1740)
1741def test_allow_unknown_filter(writable_file):
1742    # apparently 256-511 are reserved for testing purposes
1743    fake_filter_id = 256
1744    ds = writable_file.create_dataset(
1745        'data', shape=(10, 10), dtype=np.uint8, compression=fake_filter_id,
1746        allow_unknown_filter=True
1747    )
1748    assert str(fake_filter_id) in ds._filters
1749
1750
1751class TestCommutative(BaseDataset):
1752    """
1753    Test the symmetry of operators, at least with the numpy types.
1754    Issue: https://github.com/h5py/h5py/issues/1947
1755    """
1756    def test_numpy_commutative(self,):
1757        """
1758        Create a h5py dataset, extract one element convert to numpy
1759        Check that it returns symmetric response to == and !=
1760        """
1761        shape = (100,1)
1762        dset = self.f.create_dataset("test", shape, dtype=float,
1763                                     data=np.random.rand(*shape))
1764
1765        # grab a value from the elements, ie dset[0]
1766        # check that mask arrays are commutative wrt ==, !=
1767        val = np.float64(dset[0])
1768
1769        assert np.all((val == dset) == (dset == val))
1770        assert np.all((val != dset) == (dset != val))
1771
1772        # generate sample not in the dset, ie max(dset)+delta
1773        # check that mask arrays are commutative wrt ==, !=
1774        delta = 0.001
1775        nval = np.nanmax(dset)+delta
1776
1777        assert np.all((nval == dset) == (dset == nval))
1778        assert np.all((nval != dset) == (dset != nval))
1779
1780    def test_basetype_commutative(self,):
1781        """
1782        Create a h5py dataset and check basetype compatibility.
1783        Check that operation is symmetric, even if it is potentially
1784        not meaningful.
1785        """
1786        shape = (100,1)
1787        dset = self.f.create_dataset("test", shape, dtype=float,
1788                                     data=np.random.rand(*shape))
1789
1790        # generate float type, sample float(0.)
1791        # check that operation is symmetric (but potentially meaningless)
1792        val = float(0.)
1793        assert (val == dset) == (dset == val)
1794        assert (val != dset) == (dset != val)
1795