1from __future__ import absolute_import, division, print_function 2 3import unittest 4 5import numpy as np 6from numpy.linalg.linalg import LinAlgError 7 8import theano 9from theano import config 10from theano.gpuarray.linalg import (GpuCusolverSolve, GpuCublasTriangularSolve, 11 GpuCholesky, GpuMagmaCholesky, 12 GpuMagmaEigh, GpuMagmaMatrixInverse, 13 GpuMagmaQR, GpuMagmaSVD, 14 cusolver_available, gpu_matrix_inverse, 15 gpu_cholesky, 16 gpu_solve, gpu_solve_lower_triangular, 17 gpu_svd, gpu_qr) 18from theano.tensor.nlinalg import (SVD, MatrixInverse, QRFull, 19 QRIncomplete, eigh, matrix_inverse, qr) 20from theano.tensor.slinalg import Cholesky, cholesky, imported_scipy 21from theano.tests import unittest_tools as utt 22 23from .. import gpuarray_shared_constructor 24from .config import mode_with_gpu, mode_without_gpu 25from .test_basic_ops import rand 26from nose.tools import assert_raises 27 28 29class TestCusolver(unittest.TestCase): 30 31 def setUp(self): 32 if not cusolver_available: 33 self.skipTest('Optional package scikits.cuda.cusolver not available') 34 35 def run_gpu_solve(self, A_val, x_val, A_struct=None): 36 b_val = np.dot(A_val, x_val) 37 b_val_trans = np.dot(A_val.T, x_val) 38 39 A = theano.tensor.matrix("A", dtype="float32") 40 b = theano.tensor.matrix("b", dtype="float32") 41 b_trans = theano.tensor.matrix("b", dtype="float32") 42 43 if A_struct is None: 44 solver = gpu_solve(A, b) 45 solver_trans = gpu_solve(A, b_trans, trans='T') 46 else: 47 solver = gpu_solve(A, b, A_struct) 48 solver_trans = gpu_solve(A, b_trans, A_struct, trans='T') 49 50 fn = theano.function([A, b, b_trans], [solver, solver_trans], mode=mode_with_gpu) 51 res = fn(A_val, b_val, b_val_trans) 52 x_res = np.array(res[0]) 53 x_res_trans = np.array(res[1]) 54 utt.assert_allclose(x_val, x_res) 55 utt.assert_allclose(x_val, x_res_trans) 56 57 def test_diag_solve(self): 58 np.random.seed(1) 59 A_val = np.asarray([[2, 0, 0], [0, 1, 0], [0, 0, 1]], 60 dtype="float32") 61 x_val = np.random.uniform(-0.4, 0.4, (A_val.shape[1], 62 1)).astype("float32") 63 self.run_gpu_solve(A_val, x_val) 64 65 def test_bshape_solve(self): 66 # Test when shape of b (k, m) is such as m > k 67 68 np.random.seed(1) 69 A_val = np.asarray([[2, 0, 0], [0, 1, 0], [0, 0, 1]], 70 dtype="float32") 71 x_val = np.random.uniform(-0.4, 0.4, (A_val.shape[1], 72 A_val.shape[1] + 1)).astype("float32") 73 self.run_gpu_solve(A_val, x_val) 74 75 def test_sym_solve(self): 76 np.random.seed(1) 77 A_val = np.random.uniform(-0.4, 0.4, (5, 5)).astype("float32") 78 A_sym = np.dot(A_val, A_val.T) 79 x_val = np.random.uniform(-0.4, 0.4, (A_val.shape[1], 80 1)).astype("float32") 81 self.run_gpu_solve(A_sym, x_val, 'symmetric') 82 83 def test_orth_solve(self): 84 np.random.seed(1) 85 A_val = np.random.uniform(-0.4, 0.4, (5, 5)).astype("float32") 86 A_orth = np.linalg.svd(A_val)[0] 87 x_val = np.random.uniform(-0.4, 0.4, (A_orth.shape[1], 88 1)).astype("float32") 89 self.run_gpu_solve(A_orth, x_val) 90 91 def test_uni_rand_solve(self): 92 np.random.seed(1) 93 A_val = np.random.uniform(-0.4, 0.4, (5, 5)).astype("float32") 94 x_val = np.random.uniform(-0.4, 0.4, 95 (A_val.shape[1], 4)).astype("float32") 96 self.run_gpu_solve(A_val, x_val) 97 98 def test_linalgerrsym_solve(self): 99 np.random.seed(1) 100 A_val = np.random.uniform(-0.4, 0.4, (5, 5)).astype("float32") 101 x_val = np.random.uniform(-0.4, 0.4, 102 (A_val.shape[1], 4)).astype("float32") 103 A_val = np.dot(A_val.T, A_val) 104 # make A singular 105 A_val[:, 2] = A_val[:, 1] + A_val[:, 3] 106 107 A = theano.tensor.matrix("A", dtype="float32") 108 b = theano.tensor.matrix("b", dtype="float32") 109 solver = gpu_solve(A, b, 'symmetric') 110 111 fn = theano.function([A, b], [solver], mode=mode_with_gpu) 112 self.assertRaises(LinAlgError, fn, A_val, x_val) 113 114 def test_linalgerr_solve(self): 115 np.random.seed(1) 116 A_val = np.random.uniform(-0.4, 0.4, (5, 5)).astype("float32") 117 x_val = np.random.uniform(-0.4, 0.4, 118 (A_val.shape[1], 4)).astype("float32") 119 # make A singular 120 A_val[:, 2] = 0 121 122 A = theano.tensor.matrix("A", dtype="float32") 123 b = theano.tensor.matrix("b", dtype="float32") 124 solver = gpu_solve(A, b, trans='T') 125 126 fn = theano.function([A, b], [solver], mode=mode_with_gpu) 127 self.assertRaises(LinAlgError, fn, A_val, x_val) 128 129 def verify_solve_grad(self, m, n, A_structure, lower, rng): 130 # ensure diagonal elements of A relatively large to avoid numerical 131 # precision issues 132 A_val = (rng.normal(size=(m, m)) * 0.5 + 133 np.eye(m)).astype(config.floatX) 134 if A_structure == 'lower_triangular': 135 A_val = np.tril(A_val) 136 elif A_structure == 'upper_triangular': 137 A_val = np.triu(A_val) 138 if n is None: 139 b_val = rng.normal(size=m).astype(config.floatX) 140 else: 141 b_val = rng.normal(size=(m, n)).astype(config.floatX) 142 eps = None 143 if config.floatX == "float64": 144 eps = 2e-8 145 146 if A_structure in ('lower_triangular', 'upper_triangular'): 147 solve_op = GpuCublasTriangularSolve(lower=lower) 148 else: 149 solve_op = GpuCusolverSolve(A_structure="general") 150 utt.verify_grad(solve_op, [A_val, b_val], 3, rng, eps=eps) 151 152 def test_solve_grad(self): 153 rng = np.random.RandomState(utt.fetch_seed()) 154 structures = ['general', 'lower_triangular', 'upper_triangular'] 155 for A_structure in structures: 156 lower = (A_structure == 'lower_triangular') 157 # self.verify_solve_grad(5, None, A_structure, lower, rng) 158 self.verify_solve_grad(6, 1, A_structure, lower, rng) 159 self.verify_solve_grad(4, 3, A_structure, lower, rng) 160 # lower should have no effect for A_structure == 'general' so also 161 # check lower=True case 162 self.verify_solve_grad(4, 3, 'general', lower=True, rng=rng) 163 164 165class TestGpuCholesky(unittest.TestCase): 166 167 def setUp(self): 168 if not cusolver_available: 169 self.skipTest('Optional package scikits.cuda.cusolver not available') 170 utt.seed_rng() 171 172 def get_gpu_cholesky_func(self, lower=True, inplace=False): 173 # Helper function to compile function from GPU Cholesky op. 174 A = theano.tensor.matrix("A", dtype="float32") 175 cholesky_op = GpuCholesky(lower=lower, inplace=inplace) 176 chol_A = cholesky_op(A) 177 return theano.function([A], chol_A, accept_inplace=inplace, 178 mode=mode_with_gpu) 179 180 def compare_gpu_cholesky_to_np(self, A_val, lower=True, inplace=False): 181 # Helper function to compare op output to np.cholesky output. 182 chol_A_val = np.linalg.cholesky(A_val) 183 if not lower: 184 chol_A_val = chol_A_val.T 185 fn = self.get_gpu_cholesky_func(lower, inplace) 186 res = fn(A_val) 187 chol_A_res = np.array(res) 188 utt.assert_allclose(chol_A_res, chol_A_val) 189 190 def test_gpu_cholesky_opt(self): 191 if not imported_scipy: 192 self.skipTest('SciPy is not enabled, skipping test') 193 A = theano.tensor.matrix("A", dtype="float32") 194 fn = theano.function([A], cholesky(A), mode=mode_with_gpu) 195 assert any([isinstance(node.op, GpuCholesky) 196 for node in fn.maker.fgraph.toposort()]) 197 198 def test_invalid_input_fail_non_square(self): 199 # Invalid Cholesky input test with non-square matrix as input. 200 A_val = np.random.normal(size=(3, 2)).astype("float32") 201 fn = self.get_gpu_cholesky_func(True, False) 202 self.assertRaises(ValueError, fn, A_val) 203 204 def test_invalid_input_fail_vector(self): 205 # Invalid Cholesky input test with vector as input. 206 def invalid_input_func(): 207 A = theano.tensor.vector("A", dtype="float32") 208 GpuCholesky(lower=True, inplace=False)(A) 209 self.assertRaises(AssertionError, invalid_input_func) 210 211 def test_invalid_input_fail_tensor3(self): 212 # Invalid Cholesky input test with 3D tensor as input. 213 def invalid_input_func(): 214 A = theano.tensor.tensor3("A", dtype="float32") 215 GpuCholesky(lower=True, inplace=False)(A) 216 self.assertRaises(AssertionError, invalid_input_func) 217 218 @utt.assertFailure_fast 219 def test_diag_chol(self): 220 # Diagonal matrix input Cholesky test. 221 for lower in [True, False]: 222 for inplace in [True, False]: 223 # make sure all diagonal elements are positive so positive-definite 224 A_val = np.diag(np.random.uniform(size=5).astype("float32") + 1) 225 self.compare_gpu_cholesky_to_np(A_val, lower=lower, inplace=inplace) 226 227 @utt.assertFailure_fast 228 def test_dense_chol_lower(self): 229 # Dense matrix input lower-triangular Cholesky test. 230 for lower in [True, False]: 231 for inplace in [True, False]: 232 M_val = np.random.normal(size=(3, 3)).astype("float32") 233 # A = M.dot(M) will be positive definite for all non-singular M 234 A_val = M_val.dot(M_val.T) 235 self.compare_gpu_cholesky_to_np(A_val, lower=lower, inplace=inplace) 236 237 def test_invalid_input_fail_non_symmetric(self): 238 # Invalid Cholesky input test with non-symmetric input. 239 # (Non-symmetric real input must also be non-positive definite). 240 A_val = None 241 while True: 242 A_val = np.random.normal(size=(3, 3)).astype("float32") 243 if not np.allclose(A_val, A_val.T): 244 break 245 fn = self.get_gpu_cholesky_func(True, False) 246 self.assertRaises(LinAlgError, fn, A_val) 247 248 def test_invalid_input_fail_negative_definite(self): 249 # Invalid Cholesky input test with negative-definite input. 250 M_val = np.random.normal(size=(3, 3)).astype("float32") 251 # A = -M.dot(M) will be negative definite for all non-singular M 252 A_val = -M_val.dot(M_val.T) 253 fn = self.get_gpu_cholesky_func(True, False) 254 self.assertRaises(LinAlgError, fn, A_val) 255 256 257class TestGpuCholesky64(unittest.TestCase): 258 259 def setUp(self): 260 if not cusolver_available: 261 self.skipTest('Optional package scikits.cuda.cusolver not available') 262 utt.seed_rng() 263 264 def get_gpu_cholesky_func(self, lower=True, inplace=False): 265 # Helper function to compile function from GPU Cholesky op. 266 A = theano.tensor.matrix("A", dtype="float64") 267 cholesky_op = GpuCholesky(lower=lower, inplace=inplace) 268 chol_A = cholesky_op(A) 269 return theano.function([A], chol_A, accept_inplace=inplace, 270 mode=mode_with_gpu) 271 272 def compare_gpu_cholesky_to_np(self, A_val, lower=True, inplace=False): 273 # Helper function to compare op output to np.cholesky output. 274 chol_A_val = np.linalg.cholesky(A_val) 275 if not lower: 276 chol_A_val = chol_A_val.T 277 fn = self.get_gpu_cholesky_func(lower, inplace) 278 res = fn(A_val) 279 chol_A_res = np.array(res) 280 utt.assert_allclose(chol_A_res, chol_A_val) 281 282 def test_gpu_cholesky_opt(self): 283 if not imported_scipy: 284 self.skipTest('SciPy is not enabled, skipping test') 285 A = theano.tensor.matrix("A", dtype="float64") 286 fn = theano.function([A], cholesky(A), mode=mode_with_gpu) 287 assert any([isinstance(node.op, GpuCholesky) 288 for node in fn.maker.fgraph.toposort()]) 289 290 def test_invalid_input_fail_non_square(self): 291 # Invalid Cholesky input test with non-square matrix as input. 292 A_val = np.random.normal(size=(3, 2)).astype("float64") 293 fn = self.get_gpu_cholesky_func(True, False) 294 self.assertRaises(ValueError, fn, A_val) 295 296 def test_invalid_input_fail_vector(self): 297 # Invalid Cholesky input test with vector as input. 298 def invalid_input_func(): 299 A = theano.tensor.vector("A", dtype="float64") 300 GpuCholesky(lower=True, inplace=False)(A) 301 self.assertRaises(AssertionError, invalid_input_func) 302 303 def test_invalid_input_fail_tensor3(self): 304 # Invalid Cholesky input test with 3D tensor as input. 305 def invalid_input_func(): 306 A = theano.tensor.tensor3("A", dtype="float64") 307 GpuCholesky(lower=True, inplace=False)(A) 308 self.assertRaises(AssertionError, invalid_input_func) 309 310 @utt.assertFailure_fast 311 def test_diag_chol(self): 312 # Diagonal matrix input Cholesky test. 313 for lower in [True, False]: 314 for inplace in [True, False]: 315 # make sure all diagonal elements are positive so positive-definite 316 A_val = np.diag(np.random.uniform(size=5).astype("float64") + 1) 317 self.compare_gpu_cholesky_to_np(A_val, lower=lower, inplace=inplace) 318 319 @utt.assertFailure_fast 320 def test_dense_chol_lower(self): 321 # Dense matrix input lower-triangular Cholesky test. 322 for lower in [True, False]: 323 for inplace in [True, False]: 324 M_val = np.random.normal(size=(3, 3)).astype("float64") 325 # A = M.dot(M) will be positive definite for all non-singular M 326 A_val = M_val.dot(M_val.T) 327 self.compare_gpu_cholesky_to_np(A_val, lower=lower, inplace=inplace) 328 329 def test_invalid_input_fail_non_symmetric(self): 330 # Invalid Cholesky input test with non-symmetric input. 331 # (Non-symmetric real input must also be non-positive definite). 332 A_val = None 333 while True: 334 A_val = np.random.normal(size=(3, 3)).astype("float64") 335 if not np.allclose(A_val, A_val.T): 336 break 337 fn = self.get_gpu_cholesky_func(True, False) 338 self.assertRaises(LinAlgError, fn, A_val) 339 340 def test_invalid_input_fail_negative_definite(self): 341 # Invalid Cholesky input test with negative-definite input. 342 M_val = np.random.normal(size=(3, 3)).astype("float64") 343 # A = -M.dot(M) will be negative definite for all non-singular M 344 A_val = -M_val.dot(M_val.T) 345 fn = self.get_gpu_cholesky_func(True, False) 346 self.assertRaises(LinAlgError, fn, A_val) 347 348 349class TestMagma(unittest.TestCase): 350 351 def setUp(self): 352 if not config.magma.enabled: 353 self.skipTest('Magma is not enabled, skipping test') 354 355 def test_magma_opt_float16(self): 356 ops_to_gpu = [(MatrixInverse(), GpuMagmaMatrixInverse), 357 (SVD(), GpuMagmaSVD), 358 (QRFull(mode='reduced'), GpuMagmaQR), 359 (QRIncomplete(mode='r'), GpuMagmaQR), 360 # TODO: add support for float16 to Eigh numpy 361 # (Eigh(), GpuMagmaEigh), 362 (Cholesky(), GpuMagmaCholesky)] 363 for op, gpu_op in ops_to_gpu: 364 A = theano.tensor.matrix("A", dtype="float16") 365 fn = theano.function([A], op(A), mode=mode_with_gpu.excluding('cusolver')) 366 assert any([isinstance(node.op, gpu_op) 367 for node in fn.maker.fgraph.toposort()]) 368 369 def test_gpu_matrix_inverse(self): 370 A = theano.tensor.fmatrix("A") 371 372 fn = theano.function([A], gpu_matrix_inverse(A), mode=mode_with_gpu) 373 N = 1000 374 test_rng = np.random.RandomState(seed=1) 375 # Copied from theano.tensor.tests.test_basic.rand. 376 A_val = test_rng.rand(N, N).astype('float32') * 2 - 1 377 A_val_inv = fn(A_val) 378 utt.assert_allclose(np.eye(N), np.dot(A_val_inv, A_val), atol=1e-2) 379 380 @utt.assertFailure_fast 381 def test_gpu_matrix_inverse_inplace(self): 382 N = 1000 383 test_rng = np.random.RandomState(seed=1) 384 A_val_gpu = gpuarray_shared_constructor(test_rng.rand(N, N).astype('float32') * 2 - 1) 385 A_val_copy = A_val_gpu.get_value() 386 A_val_gpu_inv = GpuMagmaMatrixInverse()(A_val_gpu) 387 fn = theano.function([], A_val_gpu_inv, mode=mode_with_gpu, updates=[(A_val_gpu, A_val_gpu_inv)]) 388 assert any([ 389 node.op.inplace 390 for node in fn.maker.fgraph.toposort() if 391 isinstance(node.op, GpuMagmaMatrixInverse) 392 ]) 393 fn() 394 utt.assert_allclose(np.eye(N), np.dot(A_val_gpu.get_value(), A_val_copy), atol=5e-3) 395 396 @utt.assertFailure_fast 397 def test_gpu_matrix_inverse_inplace_opt(self): 398 A = theano.tensor.fmatrix("A") 399 fn = theano.function([A], matrix_inverse(A), mode=mode_with_gpu) 400 assert any([ 401 node.op.inplace 402 for node in fn.maker.fgraph.toposort() if 403 isinstance(node.op, GpuMagmaMatrixInverse) 404 ]) 405 406 def run_gpu_svd(self, A_val, full_matrices=True, compute_uv=True): 407 A = theano.tensor.fmatrix("A") 408 f = theano.function( 409 [A], gpu_svd(A, full_matrices=full_matrices, compute_uv=compute_uv), 410 mode=mode_with_gpu) 411 return f(A_val) 412 413 def assert_column_orthonormal(self, Ot): 414 utt.assert_allclose(np.dot(Ot.T, Ot), np.eye(Ot.shape[1])) 415 416 def check_svd(self, A, U, S, VT, rtol=None, atol=None): 417 S_m = np.zeros_like(A) 418 np.fill_diagonal(S_m, S) 419 utt.assert_allclose( 420 np.dot(np.dot(U, S_m), VT), A, rtol=rtol, atol=atol) 421 422 def test_gpu_svd_wide(self): 423 A = rand(100, 50).astype('float32') 424 M, N = A.shape 425 426 U, S, VT = self.run_gpu_svd(A) 427 self.assert_column_orthonormal(U) 428 self.assert_column_orthonormal(VT.T) 429 self.check_svd(A, U, S, VT) 430 431 U, S, VT = self.run_gpu_svd(A, full_matrices=False) 432 self.assertEqual(U.shape[1], min(M, N)) 433 self.assert_column_orthonormal(U) 434 self.assertEqual(VT.shape[0], min(M, N)) 435 self.assert_column_orthonormal(VT.T) 436 437 def test_gpu_svd_tall(self): 438 A = rand(50, 100).astype('float32') 439 M, N = A.shape 440 441 U, S, VT = self.run_gpu_svd(A) 442 self.assert_column_orthonormal(U) 443 self.assert_column_orthonormal(VT.T) 444 self.check_svd(A, U, S, VT) 445 446 U, S, VT = self.run_gpu_svd(A, full_matrices=False) 447 self.assertEqual(U.shape[1], min(M, N)) 448 self.assert_column_orthonormal(U) 449 self.assertEqual(VT.shape[0], min(M, N)) 450 self.assert_column_orthonormal(VT.T) 451 452 def test_gpu_singular_values(self): 453 A = theano.tensor.fmatrix("A") 454 f_cpu = theano.function( 455 [A], theano.tensor.nlinalg.svd(A, compute_uv=False), 456 mode=mode_without_gpu) 457 f_gpu = theano.function( 458 [A], gpu_svd(A, compute_uv=False), mode=mode_with_gpu) 459 460 A_val = rand(50, 100).astype('float32') 461 utt.assert_allclose(f_cpu(A_val), f_gpu(A_val)) 462 463 A_val = rand(100, 50).astype('float32') 464 utt.assert_allclose(f_cpu(A_val), f_gpu(A_val)) 465 466 def run_gpu_cholesky(self, A_val, lower=True): 467 A = theano.tensor.fmatrix("A") 468 f = theano.function([A], GpuMagmaCholesky(lower=lower)(A), 469 mode=mode_with_gpu.excluding('cusolver')) 470 return f(A_val) 471 472 def rand_symmetric(self, N): 473 A = rand(N, N).astype('float32') 474 # ensure that eigenvalues are not too small which sometimes results in 475 # magma cholesky failure due to gpu limited numerical precision 476 D, W = np.linalg.eigh(A) 477 D[D < 1] = 1 478 V_m = np.zeros_like(A) 479 np.fill_diagonal(V_m, D) 480 return np.dot(np.dot(W.T, V_m), W) 481 482 def check_cholesky(self, N, lower=True, rtol=None, atol=None): 483 A = self.rand_symmetric(N) 484 L = self.run_gpu_cholesky(A, lower=lower) 485 if not lower: 486 L = L.T 487 utt.assert_allclose(np.dot(L, L.T), A, rtol=rtol, atol=atol) 488 489 def test_gpu_cholesky(self): 490 self.check_cholesky(1000, atol=1e-3) 491 self.check_cholesky(1000, lower=False, atol=1e-3) 492 493 def test_gpu_cholesky_opt(self): 494 A = theano.tensor.matrix("A", dtype="float32") 495 fn = theano.function([A], cholesky(A), mode=mode_with_gpu.excluding('cusolver')) 496 assert any([isinstance(node.op, GpuMagmaCholesky) 497 for node in fn.maker.fgraph.toposort()]) 498 499 @utt.assertFailure_fast 500 def test_gpu_cholesky_inplace(self): 501 A = self.rand_symmetric(1000) 502 A_gpu = gpuarray_shared_constructor(A) 503 A_copy = A_gpu.get_value() 504 C = GpuMagmaCholesky()(A_gpu) 505 fn = theano.function([], C, mode=mode_with_gpu, updates=[(A_gpu, C)]) 506 assert any([ 507 node.op.inplace 508 for node in fn.maker.fgraph.toposort() if 509 isinstance(node.op, GpuMagmaCholesky) 510 ]) 511 fn() 512 L = A_gpu.get_value() 513 utt.assert_allclose(np.dot(L, L.T), A_copy, atol=1e-3) 514 515 @utt.assertFailure_fast 516 def test_gpu_cholesky_inplace_opt(self): 517 A = theano.tensor.fmatrix("A") 518 fn = theano.function([A], GpuMagmaCholesky()(A), mode=mode_with_gpu) 519 assert any([ 520 node.op.inplace 521 for node in fn.maker.fgraph.toposort() if 522 isinstance(node.op, GpuMagmaCholesky) 523 ]) 524 525 def run_gpu_qr(self, A_val, complete=True): 526 A = theano.tensor.fmatrix("A") 527 fn = theano.function([A], gpu_qr(A, complete=complete), 528 mode=mode_with_gpu) 529 return fn(A_val) 530 531 def check_gpu_qr(self, M, N, complete=True, rtol=None, atol=None): 532 A = rand(M, N).astype('float32') 533 if complete: 534 Q_gpu, R_gpu = self.run_gpu_qr(A, complete=complete) 535 else: 536 R_gpu = self.run_gpu_qr(A, complete=complete) 537 538 Q_np, R_np = np.linalg.qr(A, mode='reduced') 539 utt.assert_allclose(R_np, R_gpu, rtol=rtol, atol=atol) 540 if complete: 541 utt.assert_allclose(Q_np, Q_gpu, rtol=rtol, atol=atol) 542 543 def test_gpu_qr(self): 544 self.check_gpu_qr(1000, 500, atol=1e-3) 545 self.check_gpu_qr(1000, 500, complete=False, atol=1e-3) 546 self.check_gpu_qr(500, 1000, atol=1e-3) 547 self.check_gpu_qr(500, 1000, complete=False, atol=1e-3) 548 549 def test_gpu_qr_opt(self): 550 A = theano.tensor.fmatrix("A") 551 fn = theano.function([A], qr(A), mode=mode_with_gpu) 552 assert any([ 553 isinstance(node.op, GpuMagmaQR) and node.op.complete 554 for node in fn.maker.fgraph.toposort() 555 ]) 556 557 def test_gpu_qr_incomplete_opt(self): 558 A = theano.tensor.fmatrix("A") 559 fn = theano.function([A], qr(A, mode='r'), mode=mode_with_gpu) 560 assert any([ 561 isinstance(node.op, GpuMagmaQR) and not node.op.complete 562 for node in fn.maker.fgraph.toposort() 563 ]) 564 565 def run_gpu_eigh(self, A_val, UPLO='L', compute_v=True): 566 A = theano.tensor.fmatrix("A") 567 fn = theano.function([A], GpuMagmaEigh(UPLO=UPLO, compute_v=compute_v)(A), 568 mode=mode_with_gpu) 569 return fn(A_val) 570 571 def check_gpu_eigh(self, N, UPLO='L', compute_v=True, rtol=None, atol=None): 572 A = rand(N, N).astype('float32') 573 A = np.dot(A.T, A) 574 d_np, v_np = np.linalg.eigh(A, UPLO=UPLO) 575 if compute_v: 576 d_gpu, v_gpu = self.run_gpu_eigh(A, UPLO=UPLO, compute_v=compute_v) 577 else: 578 d_gpu = self.run_gpu_eigh(A, UPLO=UPLO, compute_v=False) 579 utt.assert_allclose(d_np, d_gpu, rtol=rtol, atol=atol) 580 if compute_v: 581 utt.assert_allclose( 582 np.eye(N), np.dot(v_gpu, v_gpu.T), rtol=rtol, atol=atol) 583 D_m = np.zeros_like(A) 584 np.fill_diagonal(D_m, d_gpu) 585 utt.assert_allclose( 586 A, np.dot(np.dot(v_gpu, D_m), v_gpu.T), rtol=rtol, atol=atol) 587 588 def test_gpu_eigh(self): 589 self.check_gpu_eigh(1000, UPLO='L', atol=1e-3) 590 self.check_gpu_eigh(1000, UPLO='U', atol=1e-3) 591 self.check_gpu_eigh(1000, UPLO='L', compute_v=False, atol=1e-3) 592 self.check_gpu_eigh(1000, UPLO='U', compute_v=False, atol=1e-3) 593 594 def test_gpu_eigh_opt(self): 595 A = theano.tensor.fmatrix("A") 596 fn = theano.function([A], eigh(A), mode=mode_with_gpu) 597 assert any([ 598 isinstance(node.op, GpuMagmaEigh) 599 for node in fn.maker.fgraph.toposort() 600 ]) 601 602 603# mostly copied from theano/tensor/tests/test_slinalg.py 604def test_cholesky_grad(): 605 rng = np.random.RandomState(utt.fetch_seed()) 606 r = rng.randn(5, 5).astype(config.floatX) 607 608 # The dots are inside the graph since Cholesky needs separable matrices 609 610 # Check the default. 611 yield (lambda: utt.verify_grad(lambda r: gpu_cholesky(r.dot(r.T)), 612 [r], 3, rng)) 613 # Explicit lower-triangular. 614 yield (lambda: utt.verify_grad(lambda r: GpuCholesky(lower=True)(r.dot(r.T)), 615 [r], 3, rng)) 616 # Explicit upper-triangular. 617 yield (lambda: utt.verify_grad(lambda r: GpuCholesky(lower=False)(r.dot(r.T)), 618 [r], 3, rng)) 619 620 621def test_cholesky_grad_indef(): 622 x = theano.tensor.matrix() 623 matrix = np.array([[1, 0.2], [0.2, -2]]).astype(config.floatX) 624 cholesky = GpuCholesky(lower=True) 625 chol_f = theano.function([x], theano.tensor.grad(cholesky(x).sum(), [x])) 626 with assert_raises(LinAlgError): 627 chol_f(matrix) 628 # cholesky = GpuCholesky(lower=True, on_error='nan') 629 # chol_f = function([x], grad(gpu_cholesky(x).sum(), [x])) 630 # assert np.all(np.isnan(chol_f(matrix))) 631 632 633def test_lower_triangular_and_cholesky_grad(): 634 # Random lower triangular system is ill-conditioned. 635 # 636 # Reference 637 # ----------- 638 # Viswanath, Divakar, and L. N. Trefethen. "Condition numbers of random triangular matrices." 639 # SIAM Journal on Matrix Analysis and Applications 19.2 (1998): 564-581. 640 # 641 # Use smaller number of N when using float32 642 if config.floatX == 'float64': 643 N = 100 644 else: 645 N = 5 646 rng = np.random.RandomState(utt.fetch_seed()) 647 r = rng.randn(N, N).astype(config.floatX) 648 y = rng.rand(N, 1).astype(config.floatX) 649 650 def f(r, y): 651 PD = r.dot(r.T) 652 L = gpu_cholesky(PD) 653 A = gpu_solve_lower_triangular(L, y) 654 AAT = theano.tensor.dot(A, A.T) 655 B = AAT + theano.tensor.eye(N) 656 LB = gpu_cholesky(B) 657 return theano.tensor.sum(theano.tensor.log(theano.tensor.diag(LB))) 658 yield (lambda: utt.verify_grad(f, [r, y], 3, rng)) 659