1import numpy as np
2
3from numba.cuda.testing import unittest, CUDATestCase
4from numba.cuda.testing import skip_on_cudasim, skip_unless_cudasim
5from numba import cuda
6
7
8class TestCudaArray(CUDATestCase):
9    def test_gpu_array_zero_length(self):
10        x = np.arange(0)
11        dx = cuda.to_device(x)
12        hx = dx.copy_to_host()
13        self.assertEqual(x.shape, dx.shape)
14        self.assertEqual(x.size, dx.size)
15        self.assertEqual(x.shape, hx.shape)
16        self.assertEqual(x.size, hx.size)
17
18    def test_gpu_array_strided(self):
19
20        @cuda.jit('void(double[:])')
21        def kernel(x):
22            i = cuda.grid(1)
23            if i < x.shape[0]:
24                x[i] = i
25
26        x = np.arange(10, dtype=np.double)
27        y = np.ndarray(shape=10 * 8, buffer=x, dtype=np.byte)
28        z = np.ndarray(9, buffer=y[4:-4], dtype=np.double)
29        kernel[10, 10](z)
30        self.assertTrue(np.allclose(z, list(range(9))))
31
32    def test_gpu_array_interleaved(self):
33
34        @cuda.jit('void(double[:], double[:])')
35        def copykernel(x, y):
36            i = cuda.grid(1)
37            if i < x.shape[0]:
38                x[i] = i
39                y[i] = i
40
41        x = np.arange(10, dtype=np.double)
42        y = x[:-1:2]
43        # z = x[1::2]
44        # n = y.size
45        try:
46            cuda.devicearray.auto_device(y)
47        except ValueError:
48            pass
49        else:
50            raise AssertionError("Should raise exception complaining the "
51                                 "contiguous-ness of the array.")
52            # Should we handle this use case?
53            # assert z.size == y.size
54            # copykernel[1, n](y, x)
55            # print(y, z)
56            # assert np.all(y == z)
57            # assert np.all(y == list(range(n)))
58
59    def test_auto_device_const(self):
60        d, _ = cuda.devicearray.auto_device(2)
61        self.assertTrue(np.all(d.copy_to_host() == np.array(2)))
62
63    def _test_device_array_like_same(self, d_a):
64        """
65        Tests of device_array_like where shape, strides, dtype, and flags should
66        all be equal.
67        """
68        d_a_like = cuda.device_array_like(d_a)
69        self.assertEqual(d_a.shape, d_a_like.shape)
70        self.assertEqual(d_a.strides, d_a_like.strides)
71        self.assertEqual(d_a.dtype, d_a_like.dtype)
72        self.assertEqual(d_a.flags['C_CONTIGUOUS'], d_a_like.flags['C_CONTIGUOUS'])
73        self.assertEqual(d_a.flags['F_CONTIGUOUS'], d_a_like.flags['F_CONTIGUOUS'])
74
75    def test_device_array_like_1d(self):
76        d_a = cuda.device_array(10, order='C')
77        self._test_device_array_like_same(d_a)
78
79    def test_device_array_like_2d(self):
80        d_a = cuda.device_array((10, 12), order='C')
81        self._test_device_array_like_same(d_a)
82
83    def test_device_array_like_2d_transpose(self):
84        d_a = cuda.device_array((10, 12), order='C')
85        self._test_device_array_like_same(d_a.T)
86
87    def test_device_array_like_3d(self):
88        d_a = cuda.device_array((10, 12, 14), order='C')
89        self._test_device_array_like_same(d_a)
90
91    def test_device_array_like_1d_f(self):
92        d_a = cuda.device_array(10, order='F')
93        self._test_device_array_like_same(d_a)
94
95    def test_device_array_like_2d_f(self):
96        d_a = cuda.device_array((10, 12), order='F')
97        self._test_device_array_like_same(d_a)
98
99    def test_device_array_like_2d_f_transpose(self):
100        d_a = cuda.device_array((10, 12), order='F')
101        self._test_device_array_like_same(d_a.T)
102
103    def test_device_array_like_3d_f(self):
104        d_a = cuda.device_array((10, 12, 14), order='F')
105        self._test_device_array_like_same(d_a)
106
107    def _test_device_array_like_view(self, view, d_view):
108        """
109        Tests of device_array_like where the original array is a view - the
110        strides should not be equal because a contiguous array is expected.
111        """
112        d_like = cuda.device_array_like(d_view)
113        self.assertEqual(d_view.shape, d_like.shape)
114        self.assertEqual(d_view.dtype, d_like.dtype)
115
116        # Use NumPy as a reference for the expected strides
117        like = np.zeros_like(view)
118        self.assertEqual(d_like.strides, like.strides)
119        self.assertEqual(d_like.flags['C_CONTIGUOUS'], like.flags['C_CONTIGUOUS'])
120        self.assertEqual(d_like.flags['F_CONTIGUOUS'], like.flags['F_CONTIGUOUS'])
121
122    def test_device_array_like_1d_view(self):
123        shape = 10
124        view = np.zeros(shape)[::2]
125        d_view = cuda.device_array(shape)[::2]
126        self._test_device_array_like_view(view, d_view)
127
128    def test_device_array_like_1d_view_f(self):
129        shape = 10
130        view = np.zeros(shape, order='F')[::2]
131        d_view = cuda.device_array(shape, order='F')[::2]
132        self._test_device_array_like_view(view, d_view)
133
134    def test_device_array_like_2d_view(self):
135        shape = (10, 12)
136        view = np.zeros(shape)[::2, ::2]
137        d_view = cuda.device_array(shape)[::2, ::2]
138        self._test_device_array_like_view(view, d_view)
139
140    def test_device_array_like_2d_view_f(self):
141        shape = (10, 12)
142        view = np.zeros(shape, order='F')[::2, ::2]
143        d_view = cuda.device_array(shape, order='F')[::2, ::2]
144        self._test_device_array_like_view(view, d_view)
145
146    @skip_on_cudasim('Numba and NumPy stride semantics differ for transpose')
147    def test_device_array_like_2d_view_transpose_device(self):
148        shape = (10, 12)
149        view = np.zeros(shape)[::2, ::2].T
150        d_view = cuda.device_array(shape)[::2, ::2].T
151        # This is a special case (see issue #4974) because creating the
152        # transpose creates a new contiguous allocation with different strides.
153        # In this case, rather than comparing against NumPy, we can only compare
154        # against expected values.
155        d_like = cuda.device_array_like(d_view)
156        self.assertEqual(d_view.shape, d_like.shape)
157        self.assertEqual(d_view.dtype, d_like.dtype)
158        self.assertEqual((40, 8), d_like.strides)
159        self.assertTrue(d_like.is_c_contiguous())
160        self.assertFalse(d_like.is_f_contiguous())
161
162    @skip_unless_cudasim('Numba and NumPy stride semantics differ for transpose')
163    def test_device_array_like_2d_view_transpose_simulator(self):
164        shape = (10, 12)
165        view = np.zeros(shape)[::2, ::2].T
166        d_view = cuda.device_array(shape)[::2, ::2].T
167        # On the simulator, the transpose has different strides to on a CUDA
168        # device (See issue #4974). Here we can compare strides against NumPy as
169        # a reference.
170        like = np.zeros_like(view)
171        d_like = cuda.device_array_like(d_view)
172        self.assertEqual(d_view.shape, d_like.shape)
173        self.assertEqual(d_view.dtype, d_like.dtype)
174        self.assertEqual(like.strides, d_like.strides)
175        self.assertEqual(like.flags['C_CONTIGUOUS'], d_like.flags['C_CONTIGUOUS'])
176        self.assertEqual(like.flags['F_CONTIGUOUS'], d_like.flags['F_CONTIGUOUS'])
177
178    def test_device_array_like_2d_view_f_transpose(self):
179        shape = (10, 12)
180        view = np.zeros(shape, order='F')[::2, ::2].T
181        d_view = cuda.device_array(shape, order='F')[::2, ::2].T
182        self._test_device_array_like_view(view, d_view)
183
184    @skip_on_cudasim('Kernel definitions not created in the simulator')
185    def test_issue_4628(self):
186        # CUDA Device arrays were reported as always being typed with 'A' order
187        # so launching the kernel with a host array and then a device array
188        # resulted in two definitions being compiled - one for 'C' order from
189        # the host array, and one for 'A' order from the device array. With the
190        # resolution of this issue, the order of the device array is also 'C',
191        # so after the kernel launches there should only be one definition of
192        # the function.
193        @cuda.jit
194        def func(A, out):
195            i = cuda.grid(1)
196            out[i] = A[i] * 2
197
198        n = 128
199        a = np.ones((n,))
200        d_a = cuda.to_device(a)
201        result = np.zeros((n,))
202
203        func[1, 128](a, result)
204        func[1, 128](d_a, result)
205
206        self.assertEqual(1, len(func.definitions))
207
208
209if __name__ == '__main__':
210    unittest.main()
211