1from __future__ import absolute_import, division, print_function
2
3import unittest
4
5import numpy as np
6from numpy.linalg.linalg import LinAlgError
7
8import theano
9from theano import config
10from theano.gpuarray.linalg import (GpuCusolverSolve, GpuCublasTriangularSolve,
11                                    GpuCholesky, GpuMagmaCholesky,
12                                    GpuMagmaEigh, GpuMagmaMatrixInverse,
13                                    GpuMagmaQR, GpuMagmaSVD,
14                                    cusolver_available, gpu_matrix_inverse,
15                                    gpu_cholesky,
16                                    gpu_solve, gpu_solve_lower_triangular,
17                                    gpu_svd, gpu_qr)
18from theano.tensor.nlinalg import (SVD, MatrixInverse, QRFull,
19                                   QRIncomplete, eigh, matrix_inverse, qr)
20from theano.tensor.slinalg import Cholesky, cholesky, imported_scipy
21from theano.tests import unittest_tools as utt
22
23from .. import gpuarray_shared_constructor
24from .config import mode_with_gpu, mode_without_gpu
25from .test_basic_ops import rand
26from nose.tools import assert_raises
27
28
29class TestCusolver(unittest.TestCase):
30
31    def setUp(self):
32        if not cusolver_available:
33            self.skipTest('Optional package scikits.cuda.cusolver not available')
34
35    def run_gpu_solve(self, A_val, x_val, A_struct=None):
36        b_val = np.dot(A_val, x_val)
37        b_val_trans = np.dot(A_val.T, x_val)
38
39        A = theano.tensor.matrix("A", dtype="float32")
40        b = theano.tensor.matrix("b", dtype="float32")
41        b_trans = theano.tensor.matrix("b", dtype="float32")
42
43        if A_struct is None:
44            solver = gpu_solve(A, b)
45            solver_trans = gpu_solve(A, b_trans, trans='T')
46        else:
47            solver = gpu_solve(A, b, A_struct)
48            solver_trans = gpu_solve(A, b_trans, A_struct, trans='T')
49
50        fn = theano.function([A, b, b_trans], [solver, solver_trans], mode=mode_with_gpu)
51        res = fn(A_val, b_val, b_val_trans)
52        x_res = np.array(res[0])
53        x_res_trans = np.array(res[1])
54        utt.assert_allclose(x_val, x_res)
55        utt.assert_allclose(x_val, x_res_trans)
56
57    def test_diag_solve(self):
58        np.random.seed(1)
59        A_val = np.asarray([[2, 0, 0], [0, 1, 0], [0, 0, 1]],
60                           dtype="float32")
61        x_val = np.random.uniform(-0.4, 0.4, (A_val.shape[1],
62                                  1)).astype("float32")
63        self.run_gpu_solve(A_val, x_val)
64
65    def test_bshape_solve(self):
66        # Test when shape of b (k, m) is such as m > k
67
68        np.random.seed(1)
69        A_val = np.asarray([[2, 0, 0], [0, 1, 0], [0, 0, 1]],
70                           dtype="float32")
71        x_val = np.random.uniform(-0.4, 0.4, (A_val.shape[1],
72                                  A_val.shape[1] + 1)).astype("float32")
73        self.run_gpu_solve(A_val, x_val)
74
75    def test_sym_solve(self):
76        np.random.seed(1)
77        A_val = np.random.uniform(-0.4, 0.4, (5, 5)).astype("float32")
78        A_sym = np.dot(A_val, A_val.T)
79        x_val = np.random.uniform(-0.4, 0.4, (A_val.shape[1],
80                                  1)).astype("float32")
81        self.run_gpu_solve(A_sym, x_val, 'symmetric')
82
83    def test_orth_solve(self):
84        np.random.seed(1)
85        A_val = np.random.uniform(-0.4, 0.4, (5, 5)).astype("float32")
86        A_orth = np.linalg.svd(A_val)[0]
87        x_val = np.random.uniform(-0.4, 0.4, (A_orth.shape[1],
88                                  1)).astype("float32")
89        self.run_gpu_solve(A_orth, x_val)
90
91    def test_uni_rand_solve(self):
92        np.random.seed(1)
93        A_val = np.random.uniform(-0.4, 0.4, (5, 5)).astype("float32")
94        x_val = np.random.uniform(-0.4, 0.4,
95                                  (A_val.shape[1], 4)).astype("float32")
96        self.run_gpu_solve(A_val, x_val)
97
98    def test_linalgerrsym_solve(self):
99        np.random.seed(1)
100        A_val = np.random.uniform(-0.4, 0.4, (5, 5)).astype("float32")
101        x_val = np.random.uniform(-0.4, 0.4,
102                                  (A_val.shape[1], 4)).astype("float32")
103        A_val = np.dot(A_val.T, A_val)
104        # make A singular
105        A_val[:, 2] = A_val[:, 1] + A_val[:, 3]
106
107        A = theano.tensor.matrix("A", dtype="float32")
108        b = theano.tensor.matrix("b", dtype="float32")
109        solver = gpu_solve(A, b, 'symmetric')
110
111        fn = theano.function([A, b], [solver], mode=mode_with_gpu)
112        self.assertRaises(LinAlgError, fn, A_val, x_val)
113
114    def test_linalgerr_solve(self):
115        np.random.seed(1)
116        A_val = np.random.uniform(-0.4, 0.4, (5, 5)).astype("float32")
117        x_val = np.random.uniform(-0.4, 0.4,
118                                  (A_val.shape[1], 4)).astype("float32")
119        # make A singular
120        A_val[:, 2] = 0
121
122        A = theano.tensor.matrix("A", dtype="float32")
123        b = theano.tensor.matrix("b", dtype="float32")
124        solver = gpu_solve(A, b, trans='T')
125
126        fn = theano.function([A, b], [solver], mode=mode_with_gpu)
127        self.assertRaises(LinAlgError, fn, A_val, x_val)
128
129    def verify_solve_grad(self, m, n, A_structure, lower, rng):
130        # ensure diagonal elements of A relatively large to avoid numerical
131        # precision issues
132        A_val = (rng.normal(size=(m, m)) * 0.5 +
133                 np.eye(m)).astype(config.floatX)
134        if A_structure == 'lower_triangular':
135            A_val = np.tril(A_val)
136        elif A_structure == 'upper_triangular':
137            A_val = np.triu(A_val)
138        if n is None:
139            b_val = rng.normal(size=m).astype(config.floatX)
140        else:
141            b_val = rng.normal(size=(m, n)).astype(config.floatX)
142        eps = None
143        if config.floatX == "float64":
144            eps = 2e-8
145
146        if A_structure in ('lower_triangular', 'upper_triangular'):
147            solve_op = GpuCublasTriangularSolve(lower=lower)
148        else:
149            solve_op = GpuCusolverSolve(A_structure="general")
150        utt.verify_grad(solve_op, [A_val, b_val], 3, rng, eps=eps)
151
152    def test_solve_grad(self):
153        rng = np.random.RandomState(utt.fetch_seed())
154        structures = ['general', 'lower_triangular', 'upper_triangular']
155        for A_structure in structures:
156            lower = (A_structure == 'lower_triangular')
157            # self.verify_solve_grad(5, None, A_structure, lower, rng)
158            self.verify_solve_grad(6, 1, A_structure, lower, rng)
159            self.verify_solve_grad(4, 3, A_structure, lower, rng)
160        # lower should have no effect for A_structure == 'general' so also
161        # check lower=True case
162        self.verify_solve_grad(4, 3, 'general', lower=True, rng=rng)
163
164
165class TestGpuCholesky(unittest.TestCase):
166
167    def setUp(self):
168        if not cusolver_available:
169            self.skipTest('Optional package scikits.cuda.cusolver not available')
170        utt.seed_rng()
171
172    def get_gpu_cholesky_func(self, lower=True, inplace=False):
173        # Helper function to compile function from GPU Cholesky op.
174        A = theano.tensor.matrix("A", dtype="float32")
175        cholesky_op = GpuCholesky(lower=lower, inplace=inplace)
176        chol_A = cholesky_op(A)
177        return theano.function([A], chol_A, accept_inplace=inplace,
178                               mode=mode_with_gpu)
179
180    def compare_gpu_cholesky_to_np(self, A_val, lower=True, inplace=False):
181        # Helper function to compare op output to np.cholesky output.
182        chol_A_val = np.linalg.cholesky(A_val)
183        if not lower:
184            chol_A_val = chol_A_val.T
185        fn = self.get_gpu_cholesky_func(lower, inplace)
186        res = fn(A_val)
187        chol_A_res = np.array(res)
188        utt.assert_allclose(chol_A_res, chol_A_val)
189
190    def test_gpu_cholesky_opt(self):
191        if not imported_scipy:
192            self.skipTest('SciPy is not enabled, skipping test')
193        A = theano.tensor.matrix("A", dtype="float32")
194        fn = theano.function([A], cholesky(A), mode=mode_with_gpu)
195        assert any([isinstance(node.op, GpuCholesky)
196                    for node in fn.maker.fgraph.toposort()])
197
198    def test_invalid_input_fail_non_square(self):
199        # Invalid Cholesky input test with non-square matrix as input.
200        A_val = np.random.normal(size=(3, 2)).astype("float32")
201        fn = self.get_gpu_cholesky_func(True, False)
202        self.assertRaises(ValueError, fn, A_val)
203
204    def test_invalid_input_fail_vector(self):
205        # Invalid Cholesky input test with vector as input.
206        def invalid_input_func():
207            A = theano.tensor.vector("A", dtype="float32")
208            GpuCholesky(lower=True, inplace=False)(A)
209        self.assertRaises(AssertionError, invalid_input_func)
210
211    def test_invalid_input_fail_tensor3(self):
212        # Invalid Cholesky input test with 3D tensor as input.
213        def invalid_input_func():
214            A = theano.tensor.tensor3("A", dtype="float32")
215            GpuCholesky(lower=True, inplace=False)(A)
216        self.assertRaises(AssertionError, invalid_input_func)
217
218    @utt.assertFailure_fast
219    def test_diag_chol(self):
220        # Diagonal matrix input Cholesky test.
221        for lower in [True, False]:
222            for inplace in [True, False]:
223                # make sure all diagonal elements are positive so positive-definite
224                A_val = np.diag(np.random.uniform(size=5).astype("float32") + 1)
225                self.compare_gpu_cholesky_to_np(A_val, lower=lower, inplace=inplace)
226
227    @utt.assertFailure_fast
228    def test_dense_chol_lower(self):
229        # Dense matrix input lower-triangular Cholesky test.
230        for lower in [True, False]:
231            for inplace in [True, False]:
232                M_val = np.random.normal(size=(3, 3)).astype("float32")
233                # A = M.dot(M) will be positive definite for all non-singular M
234                A_val = M_val.dot(M_val.T)
235                self.compare_gpu_cholesky_to_np(A_val, lower=lower, inplace=inplace)
236
237    def test_invalid_input_fail_non_symmetric(self):
238        # Invalid Cholesky input test with non-symmetric input.
239        #    (Non-symmetric real input must also be non-positive definite).
240        A_val = None
241        while True:
242            A_val = np.random.normal(size=(3, 3)).astype("float32")
243            if not np.allclose(A_val, A_val.T):
244                break
245        fn = self.get_gpu_cholesky_func(True, False)
246        self.assertRaises(LinAlgError, fn, A_val)
247
248    def test_invalid_input_fail_negative_definite(self):
249        # Invalid Cholesky input test with negative-definite input.
250        M_val = np.random.normal(size=(3, 3)).astype("float32")
251        # A = -M.dot(M) will be negative definite for all non-singular M
252        A_val = -M_val.dot(M_val.T)
253        fn = self.get_gpu_cholesky_func(True, False)
254        self.assertRaises(LinAlgError, fn, A_val)
255
256
257class TestGpuCholesky64(unittest.TestCase):
258
259    def setUp(self):
260        if not cusolver_available:
261            self.skipTest('Optional package scikits.cuda.cusolver not available')
262        utt.seed_rng()
263
264    def get_gpu_cholesky_func(self, lower=True, inplace=False):
265        # Helper function to compile function from GPU Cholesky op.
266        A = theano.tensor.matrix("A", dtype="float64")
267        cholesky_op = GpuCholesky(lower=lower, inplace=inplace)
268        chol_A = cholesky_op(A)
269        return theano.function([A], chol_A, accept_inplace=inplace,
270                               mode=mode_with_gpu)
271
272    def compare_gpu_cholesky_to_np(self, A_val, lower=True, inplace=False):
273        # Helper function to compare op output to np.cholesky output.
274        chol_A_val = np.linalg.cholesky(A_val)
275        if not lower:
276            chol_A_val = chol_A_val.T
277        fn = self.get_gpu_cholesky_func(lower, inplace)
278        res = fn(A_val)
279        chol_A_res = np.array(res)
280        utt.assert_allclose(chol_A_res, chol_A_val)
281
282    def test_gpu_cholesky_opt(self):
283        if not imported_scipy:
284            self.skipTest('SciPy is not enabled, skipping test')
285        A = theano.tensor.matrix("A", dtype="float64")
286        fn = theano.function([A], cholesky(A), mode=mode_with_gpu)
287        assert any([isinstance(node.op, GpuCholesky)
288                    for node in fn.maker.fgraph.toposort()])
289
290    def test_invalid_input_fail_non_square(self):
291        # Invalid Cholesky input test with non-square matrix as input.
292        A_val = np.random.normal(size=(3, 2)).astype("float64")
293        fn = self.get_gpu_cholesky_func(True, False)
294        self.assertRaises(ValueError, fn, A_val)
295
296    def test_invalid_input_fail_vector(self):
297        # Invalid Cholesky input test with vector as input.
298        def invalid_input_func():
299            A = theano.tensor.vector("A", dtype="float64")
300            GpuCholesky(lower=True, inplace=False)(A)
301        self.assertRaises(AssertionError, invalid_input_func)
302
303    def test_invalid_input_fail_tensor3(self):
304        # Invalid Cholesky input test with 3D tensor as input.
305        def invalid_input_func():
306            A = theano.tensor.tensor3("A", dtype="float64")
307            GpuCholesky(lower=True, inplace=False)(A)
308        self.assertRaises(AssertionError, invalid_input_func)
309
310    @utt.assertFailure_fast
311    def test_diag_chol(self):
312        # Diagonal matrix input Cholesky test.
313        for lower in [True, False]:
314            for inplace in [True, False]:
315                # make sure all diagonal elements are positive so positive-definite
316                A_val = np.diag(np.random.uniform(size=5).astype("float64") + 1)
317                self.compare_gpu_cholesky_to_np(A_val, lower=lower, inplace=inplace)
318
319    @utt.assertFailure_fast
320    def test_dense_chol_lower(self):
321        # Dense matrix input lower-triangular Cholesky test.
322        for lower in [True, False]:
323            for inplace in [True, False]:
324                M_val = np.random.normal(size=(3, 3)).astype("float64")
325                # A = M.dot(M) will be positive definite for all non-singular M
326                A_val = M_val.dot(M_val.T)
327                self.compare_gpu_cholesky_to_np(A_val, lower=lower, inplace=inplace)
328
329    def test_invalid_input_fail_non_symmetric(self):
330        # Invalid Cholesky input test with non-symmetric input.
331        #    (Non-symmetric real input must also be non-positive definite).
332        A_val = None
333        while True:
334            A_val = np.random.normal(size=(3, 3)).astype("float64")
335            if not np.allclose(A_val, A_val.T):
336                break
337        fn = self.get_gpu_cholesky_func(True, False)
338        self.assertRaises(LinAlgError, fn, A_val)
339
340    def test_invalid_input_fail_negative_definite(self):
341        # Invalid Cholesky input test with negative-definite input.
342        M_val = np.random.normal(size=(3, 3)).astype("float64")
343        # A = -M.dot(M) will be negative definite for all non-singular M
344        A_val = -M_val.dot(M_val.T)
345        fn = self.get_gpu_cholesky_func(True, False)
346        self.assertRaises(LinAlgError, fn, A_val)
347
348
349class TestMagma(unittest.TestCase):
350
351    def setUp(self):
352        if not config.magma.enabled:
353            self.skipTest('Magma is not enabled, skipping test')
354
355    def test_magma_opt_float16(self):
356        ops_to_gpu = [(MatrixInverse(), GpuMagmaMatrixInverse),
357                      (SVD(), GpuMagmaSVD),
358                      (QRFull(mode='reduced'), GpuMagmaQR),
359                      (QRIncomplete(mode='r'), GpuMagmaQR),
360                      # TODO: add support for float16 to Eigh numpy
361                      # (Eigh(), GpuMagmaEigh),
362                      (Cholesky(), GpuMagmaCholesky)]
363        for op, gpu_op in ops_to_gpu:
364            A = theano.tensor.matrix("A", dtype="float16")
365            fn = theano.function([A], op(A), mode=mode_with_gpu.excluding('cusolver'))
366            assert any([isinstance(node.op, gpu_op)
367                        for node in fn.maker.fgraph.toposort()])
368
369    def test_gpu_matrix_inverse(self):
370        A = theano.tensor.fmatrix("A")
371
372        fn = theano.function([A], gpu_matrix_inverse(A), mode=mode_with_gpu)
373        N = 1000
374        test_rng = np.random.RandomState(seed=1)
375        # Copied from theano.tensor.tests.test_basic.rand.
376        A_val = test_rng.rand(N, N).astype('float32') * 2 - 1
377        A_val_inv = fn(A_val)
378        utt.assert_allclose(np.eye(N), np.dot(A_val_inv, A_val), atol=1e-2)
379
380    @utt.assertFailure_fast
381    def test_gpu_matrix_inverse_inplace(self):
382        N = 1000
383        test_rng = np.random.RandomState(seed=1)
384        A_val_gpu = gpuarray_shared_constructor(test_rng.rand(N, N).astype('float32') * 2 - 1)
385        A_val_copy = A_val_gpu.get_value()
386        A_val_gpu_inv = GpuMagmaMatrixInverse()(A_val_gpu)
387        fn = theano.function([], A_val_gpu_inv, mode=mode_with_gpu, updates=[(A_val_gpu, A_val_gpu_inv)])
388        assert any([
389            node.op.inplace
390            for node in fn.maker.fgraph.toposort() if
391            isinstance(node.op, GpuMagmaMatrixInverse)
392        ])
393        fn()
394        utt.assert_allclose(np.eye(N), np.dot(A_val_gpu.get_value(), A_val_copy), atol=5e-3)
395
396    @utt.assertFailure_fast
397    def test_gpu_matrix_inverse_inplace_opt(self):
398        A = theano.tensor.fmatrix("A")
399        fn = theano.function([A], matrix_inverse(A), mode=mode_with_gpu)
400        assert any([
401            node.op.inplace
402            for node in fn.maker.fgraph.toposort() if
403            isinstance(node.op, GpuMagmaMatrixInverse)
404        ])
405
406    def run_gpu_svd(self, A_val, full_matrices=True, compute_uv=True):
407        A = theano.tensor.fmatrix("A")
408        f = theano.function(
409            [A], gpu_svd(A, full_matrices=full_matrices, compute_uv=compute_uv),
410            mode=mode_with_gpu)
411        return f(A_val)
412
413    def assert_column_orthonormal(self, Ot):
414        utt.assert_allclose(np.dot(Ot.T, Ot), np.eye(Ot.shape[1]))
415
416    def check_svd(self, A, U, S, VT, rtol=None, atol=None):
417        S_m = np.zeros_like(A)
418        np.fill_diagonal(S_m, S)
419        utt.assert_allclose(
420            np.dot(np.dot(U, S_m), VT), A, rtol=rtol, atol=atol)
421
422    def test_gpu_svd_wide(self):
423        A = rand(100, 50).astype('float32')
424        M, N = A.shape
425
426        U, S, VT = self.run_gpu_svd(A)
427        self.assert_column_orthonormal(U)
428        self.assert_column_orthonormal(VT.T)
429        self.check_svd(A, U, S, VT)
430
431        U, S, VT = self.run_gpu_svd(A, full_matrices=False)
432        self.assertEqual(U.shape[1], min(M, N))
433        self.assert_column_orthonormal(U)
434        self.assertEqual(VT.shape[0], min(M, N))
435        self.assert_column_orthonormal(VT.T)
436
437    def test_gpu_svd_tall(self):
438        A = rand(50, 100).astype('float32')
439        M, N = A.shape
440
441        U, S, VT = self.run_gpu_svd(A)
442        self.assert_column_orthonormal(U)
443        self.assert_column_orthonormal(VT.T)
444        self.check_svd(A, U, S, VT)
445
446        U, S, VT = self.run_gpu_svd(A, full_matrices=False)
447        self.assertEqual(U.shape[1], min(M, N))
448        self.assert_column_orthonormal(U)
449        self.assertEqual(VT.shape[0], min(M, N))
450        self.assert_column_orthonormal(VT.T)
451
452    def test_gpu_singular_values(self):
453        A = theano.tensor.fmatrix("A")
454        f_cpu = theano.function(
455            [A], theano.tensor.nlinalg.svd(A, compute_uv=False),
456            mode=mode_without_gpu)
457        f_gpu = theano.function(
458            [A], gpu_svd(A, compute_uv=False), mode=mode_with_gpu)
459
460        A_val = rand(50, 100).astype('float32')
461        utt.assert_allclose(f_cpu(A_val), f_gpu(A_val))
462
463        A_val = rand(100, 50).astype('float32')
464        utt.assert_allclose(f_cpu(A_val), f_gpu(A_val))
465
466    def run_gpu_cholesky(self, A_val, lower=True):
467        A = theano.tensor.fmatrix("A")
468        f = theano.function([A], GpuMagmaCholesky(lower=lower)(A),
469                            mode=mode_with_gpu.excluding('cusolver'))
470        return f(A_val)
471
472    def rand_symmetric(self, N):
473        A = rand(N, N).astype('float32')
474        # ensure that eigenvalues are not too small which sometimes results in
475        # magma cholesky failure due to gpu limited numerical precision
476        D, W = np.linalg.eigh(A)
477        D[D < 1] = 1
478        V_m = np.zeros_like(A)
479        np.fill_diagonal(V_m, D)
480        return np.dot(np.dot(W.T, V_m), W)
481
482    def check_cholesky(self, N, lower=True, rtol=None, atol=None):
483        A = self.rand_symmetric(N)
484        L = self.run_gpu_cholesky(A, lower=lower)
485        if not lower:
486            L = L.T
487        utt.assert_allclose(np.dot(L, L.T), A, rtol=rtol, atol=atol)
488
489    def test_gpu_cholesky(self):
490        self.check_cholesky(1000, atol=1e-3)
491        self.check_cholesky(1000, lower=False, atol=1e-3)
492
493    def test_gpu_cholesky_opt(self):
494        A = theano.tensor.matrix("A", dtype="float32")
495        fn = theano.function([A], cholesky(A), mode=mode_with_gpu.excluding('cusolver'))
496        assert any([isinstance(node.op, GpuMagmaCholesky)
497                    for node in fn.maker.fgraph.toposort()])
498
499    @utt.assertFailure_fast
500    def test_gpu_cholesky_inplace(self):
501        A = self.rand_symmetric(1000)
502        A_gpu = gpuarray_shared_constructor(A)
503        A_copy = A_gpu.get_value()
504        C = GpuMagmaCholesky()(A_gpu)
505        fn = theano.function([], C, mode=mode_with_gpu, updates=[(A_gpu, C)])
506        assert any([
507            node.op.inplace
508            for node in fn.maker.fgraph.toposort() if
509            isinstance(node.op, GpuMagmaCholesky)
510        ])
511        fn()
512        L = A_gpu.get_value()
513        utt.assert_allclose(np.dot(L, L.T), A_copy, atol=1e-3)
514
515    @utt.assertFailure_fast
516    def test_gpu_cholesky_inplace_opt(self):
517        A = theano.tensor.fmatrix("A")
518        fn = theano.function([A], GpuMagmaCholesky()(A), mode=mode_with_gpu)
519        assert any([
520            node.op.inplace
521            for node in fn.maker.fgraph.toposort() if
522            isinstance(node.op, GpuMagmaCholesky)
523        ])
524
525    def run_gpu_qr(self, A_val, complete=True):
526        A = theano.tensor.fmatrix("A")
527        fn = theano.function([A], gpu_qr(A, complete=complete),
528                             mode=mode_with_gpu)
529        return fn(A_val)
530
531    def check_gpu_qr(self, M, N, complete=True, rtol=None, atol=None):
532        A = rand(M, N).astype('float32')
533        if complete:
534            Q_gpu, R_gpu = self.run_gpu_qr(A, complete=complete)
535        else:
536            R_gpu = self.run_gpu_qr(A, complete=complete)
537
538        Q_np, R_np = np.linalg.qr(A, mode='reduced')
539        utt.assert_allclose(R_np, R_gpu, rtol=rtol, atol=atol)
540        if complete:
541            utt.assert_allclose(Q_np, Q_gpu, rtol=rtol, atol=atol)
542
543    def test_gpu_qr(self):
544        self.check_gpu_qr(1000, 500, atol=1e-3)
545        self.check_gpu_qr(1000, 500, complete=False, atol=1e-3)
546        self.check_gpu_qr(500, 1000, atol=1e-3)
547        self.check_gpu_qr(500, 1000, complete=False, atol=1e-3)
548
549    def test_gpu_qr_opt(self):
550        A = theano.tensor.fmatrix("A")
551        fn = theano.function([A], qr(A), mode=mode_with_gpu)
552        assert any([
553            isinstance(node.op, GpuMagmaQR) and node.op.complete
554            for node in fn.maker.fgraph.toposort()
555        ])
556
557    def test_gpu_qr_incomplete_opt(self):
558        A = theano.tensor.fmatrix("A")
559        fn = theano.function([A], qr(A, mode='r'), mode=mode_with_gpu)
560        assert any([
561            isinstance(node.op, GpuMagmaQR) and not node.op.complete
562            for node in fn.maker.fgraph.toposort()
563        ])
564
565    def run_gpu_eigh(self, A_val, UPLO='L', compute_v=True):
566        A = theano.tensor.fmatrix("A")
567        fn = theano.function([A], GpuMagmaEigh(UPLO=UPLO, compute_v=compute_v)(A),
568                             mode=mode_with_gpu)
569        return fn(A_val)
570
571    def check_gpu_eigh(self, N, UPLO='L', compute_v=True, rtol=None, atol=None):
572        A = rand(N, N).astype('float32')
573        A = np.dot(A.T, A)
574        d_np, v_np = np.linalg.eigh(A, UPLO=UPLO)
575        if compute_v:
576            d_gpu, v_gpu = self.run_gpu_eigh(A, UPLO=UPLO, compute_v=compute_v)
577        else:
578            d_gpu = self.run_gpu_eigh(A, UPLO=UPLO, compute_v=False)
579        utt.assert_allclose(d_np, d_gpu, rtol=rtol, atol=atol)
580        if compute_v:
581            utt.assert_allclose(
582                np.eye(N), np.dot(v_gpu, v_gpu.T), rtol=rtol, atol=atol)
583            D_m = np.zeros_like(A)
584            np.fill_diagonal(D_m, d_gpu)
585            utt.assert_allclose(
586                A, np.dot(np.dot(v_gpu, D_m), v_gpu.T), rtol=rtol, atol=atol)
587
588    def test_gpu_eigh(self):
589        self.check_gpu_eigh(1000, UPLO='L', atol=1e-3)
590        self.check_gpu_eigh(1000, UPLO='U', atol=1e-3)
591        self.check_gpu_eigh(1000, UPLO='L', compute_v=False, atol=1e-3)
592        self.check_gpu_eigh(1000, UPLO='U', compute_v=False, atol=1e-3)
593
594    def test_gpu_eigh_opt(self):
595        A = theano.tensor.fmatrix("A")
596        fn = theano.function([A], eigh(A), mode=mode_with_gpu)
597        assert any([
598            isinstance(node.op, GpuMagmaEigh)
599            for node in fn.maker.fgraph.toposort()
600        ])
601
602
603# mostly copied from theano/tensor/tests/test_slinalg.py
604def test_cholesky_grad():
605    rng = np.random.RandomState(utt.fetch_seed())
606    r = rng.randn(5, 5).astype(config.floatX)
607
608    # The dots are inside the graph since Cholesky needs separable matrices
609
610    # Check the default.
611    yield (lambda: utt.verify_grad(lambda r: gpu_cholesky(r.dot(r.T)),
612                                   [r], 3, rng))
613    # Explicit lower-triangular.
614    yield (lambda: utt.verify_grad(lambda r: GpuCholesky(lower=True)(r.dot(r.T)),
615                                   [r], 3, rng))
616    # Explicit upper-triangular.
617    yield (lambda: utt.verify_grad(lambda r: GpuCholesky(lower=False)(r.dot(r.T)),
618                                   [r], 3, rng))
619
620
621def test_cholesky_grad_indef():
622    x = theano.tensor.matrix()
623    matrix = np.array([[1, 0.2], [0.2, -2]]).astype(config.floatX)
624    cholesky = GpuCholesky(lower=True)
625    chol_f = theano.function([x], theano.tensor.grad(cholesky(x).sum(), [x]))
626    with assert_raises(LinAlgError):
627        chol_f(matrix)
628    # cholesky = GpuCholesky(lower=True, on_error='nan')
629    # chol_f = function([x], grad(gpu_cholesky(x).sum(), [x]))
630    # assert np.all(np.isnan(chol_f(matrix)))
631
632
633def test_lower_triangular_and_cholesky_grad():
634    # Random lower triangular system is ill-conditioned.
635    #
636    # Reference
637    # -----------
638    # Viswanath, Divakar, and L. N. Trefethen. "Condition numbers of random triangular matrices."
639    # SIAM Journal on Matrix Analysis and Applications 19.2 (1998): 564-581.
640    #
641    # Use smaller number of N when using float32
642    if config.floatX == 'float64':
643        N = 100
644    else:
645        N = 5
646    rng = np.random.RandomState(utt.fetch_seed())
647    r = rng.randn(N, N).astype(config.floatX)
648    y = rng.rand(N, 1).astype(config.floatX)
649
650    def f(r, y):
651        PD = r.dot(r.T)
652        L = gpu_cholesky(PD)
653        A = gpu_solve_lower_triangular(L, y)
654        AAT = theano.tensor.dot(A, A.T)
655        B = AAT + theano.tensor.eye(N)
656        LB = gpu_cholesky(B)
657        return theano.tensor.sum(theano.tensor.log(theano.tensor.diag(LB)))
658    yield (lambda: utt.verify_grad(f, [r, y], 3, rng))
659