1from __future__ import absolute_import, print_function, division 2 3import logging 4import warnings 5import numpy as np 6from six.moves import xrange 7from functools import partial 8 9import theano 10from theano.tensor import as_tensor_variable 11from theano.gof import Op, Apply 12from theano.gradient import DisconnectedType 13from theano.tensor import basic as tensor 14from theano.tensor.basic import ExtractDiag 15logger = logging.getLogger(__name__) 16 17 18class MatrixPinv(Op): 19 """Computes the pseudo-inverse of a matrix :math:`A`. 20 21 The pseudo-inverse of a matrix :math:`A`, denoted :math:`A^+`, is 22 defined as: "the matrix that 'solves' [the least-squares problem] 23 :math:`Ax = b`," i.e., if :math:`\\bar{x}` is said solution, then 24 :math:`A^+` is that matrix such that :math:`\\bar{x} = A^+b`. 25 26 Note that :math:`Ax=AA^+b`, so :math:`AA^+` is close to the identity matrix. 27 This method is not faster than `matrix_inverse`. Its strength comes from 28 that it works for non-square matrices. 29 If you have a square matrix though, `matrix_inverse` can be both more 30 exact and faster to compute. Also this op does not get optimized into a 31 solve op. 32 33 """ 34 35 __props__ = () 36 37 def __init__(self): 38 pass 39 40 def make_node(self, x): 41 x = as_tensor_variable(x) 42 assert x.ndim == 2 43 return Apply(self, [x], [x.type()]) 44 45 def perform(self, node, inputs, outputs): 46 (x,) = inputs 47 (z,) = outputs 48 z[0] = np.linalg.pinv(x).astype(x.dtype) 49 50 def L_op(self, inputs, outputs, g_outputs): 51 r"""The gradient function should return 52 53 .. math:: V\frac{\partial X^+}{\partial X}, 54 55 where :math:`V` corresponds to ``g_outputs`` and :math:`X` to 56 ``inputs``. According to `Wikipedia 57 <https://en.wikipedia.org/wiki/Moore%E2%80%93Penrose_pseudoinverse#Derivative>`_, 58 this corresponds to 59 60 .. math:: (-X^+ V^T X^+ + X^+ X^{+T} V (I - X X^+) + (I - X^+ X) V X^{+T} X^+)^T. 61 """ 62 x, = inputs 63 z, = outputs 64 gz, = g_outputs 65 66 x_dot_z = theano.tensor.dot(x, z) 67 z_dot_x = theano.tensor.dot(z, x) 68 69 grad = (-matrix_dot(z, gz.T, z) + 70 matrix_dot(z, z.T, gz, (theano.tensor.identity_like(x_dot_z) - x_dot_z)) + 71 matrix_dot((theano.tensor.identity_like(z_dot_x) - z_dot_x), gz, z.T, z)).T 72 return [grad] 73 74pinv = MatrixPinv() 75 76 77class MatrixInverse(Op): 78 r"""Computes the inverse of a matrix :math:`A`. 79 80 Given a square matrix :math:`A`, ``matrix_inverse`` returns a square 81 matrix :math:`A_{inv}` such that the dot product :math:`A \cdot A_{inv}` 82 and :math:`A_{inv} \cdot A` equals the identity matrix :math:`I`. 83 84 Notes 85 ----- 86 When possible, the call to this op will be optimized to the call 87 of ``solve``. 88 89 """ 90 91 __props__ = () 92 93 def __init__(self): 94 pass 95 96 def make_node(self, x): 97 x = as_tensor_variable(x) 98 assert x.ndim == 2 99 return Apply(self, [x], [x.type()]) 100 101 def perform(self, node, inputs, outputs): 102 (x,) = inputs 103 (z,) = outputs 104 z[0] = np.linalg.inv(x).astype(x.dtype) 105 106 def grad(self, inputs, g_outputs): 107 r"""The gradient function should return 108 109 .. math:: V\frac{\partial X^{-1}}{\partial X}, 110 111 where :math:`V` corresponds to ``g_outputs`` and :math:`X` to 112 ``inputs``. Using the `matrix cookbook 113 <http://www2.imm.dtu.dk/pubdb/views/publication_details.php?id=3274>`_, 114 one can deduce that the relation corresponds to 115 116 .. math:: (X^{-1} \cdot V^{T} \cdot X^{-1})^T. 117 118 """ 119 x, = inputs 120 xi = self(x) 121 gz, = g_outputs 122 # TT.dot(gz.T,xi) 123 return [-matrix_dot(xi, gz.T, xi).T] 124 125 def R_op(self, inputs, eval_points): 126 r"""The gradient function should return 127 128 .. math:: \frac{\partial X^{-1}}{\partial X}V, 129 130 where :math:`V` corresponds to ``g_outputs`` and :math:`X` to 131 ``inputs``. Using the `matrix cookbook 132 <http://www2.imm.dtu.dk/pubdb/views/publication_details.php?id=3274>`_, 133 one can deduce that the relation corresponds to 134 135 .. math:: X^{-1} \cdot V \cdot X^{-1}. 136 137 """ 138 x, = inputs 139 xi = self(x) 140 ev, = eval_points 141 if ev is None: 142 return [None] 143 return [-matrix_dot(xi, ev, xi)] 144 145 def infer_shape(self, node, shapes): 146 return shapes 147 148matrix_inverse = MatrixInverse() 149 150 151def matrix_dot(*args): 152 r""" Shorthand for product between several dots. 153 154 Given :math:`N` matrices :math:`A_0, A_1, .., A_N`, ``matrix_dot`` will 155 generate the matrix product between all in the given order, namely 156 :math:`A_0 \cdot A_1 \cdot A_2 \cdot .. \cdot A_N`. 157 158 """ 159 rval = args[0] 160 for a in args[1:]: 161 rval = theano.tensor.dot(rval, a) 162 return rval 163 164 165class AllocDiag(Op): 166 """ 167 Allocates a square matrix with the given vector as its diagonal. 168 """ 169 170 __props__ = () 171 172 def make_node(self, _x): 173 warnings.warn("DeprecationWarning: theano.tensor.nlinalg.AllocDiag" 174 "is deprecated, please use theano.tensor.AllocDiag" 175 "instead.", 176 category=DeprecationWarning) 177 x = as_tensor_variable(_x) 178 if x.type.ndim != 1: 179 raise TypeError('AllocDiag only works on vectors', _x) 180 return Apply(self, [x], [theano.tensor.matrix(dtype=x.type.dtype)]) 181 182 def grad(self, inputs, g_outputs): 183 return [extract_diag(g_outputs[0])] 184 185 def perform(self, node, inputs, outputs): 186 (x,) = inputs 187 (z,) = outputs 188 if x.ndim != 1: 189 raise TypeError(x) 190 z[0] = np.diag(x) 191 192 def infer_shape(self, node, shapes): 193 x_s, = shapes 194 return [(x_s[0], x_s[0])] 195 196alloc_diag = AllocDiag() 197extract_diag = ExtractDiag() 198# TODO: optimization to insert ExtractDiag with view=True 199 200 201def diag(x): 202 """ 203 Numpy-compatibility method 204 If `x` is a matrix, return its diagonal. 205 If `x` is a vector return a matrix with it as its diagonal. 206 207 * This method does not support the `k` argument that numpy supports. 208 """ 209 xx = as_tensor_variable(x) 210 if xx.type.ndim == 1: 211 return alloc_diag(xx) 212 elif xx.type.ndim == 2: 213 return extract_diag(xx) 214 else: 215 raise TypeError('diag requires vector or matrix argument', x) 216 217 218def trace(X): 219 """ 220 Returns the sum of diagonal elements of matrix X. 221 222 Notes 223 ----- 224 Works on GPU since 0.6rc4. 225 226 """ 227 return extract_diag(X).sum() 228 229 230class Det(Op): 231 """ 232 Matrix determinant. Input should be a square matrix. 233 234 """ 235 236 __props__ = () 237 238 def make_node(self, x): 239 x = as_tensor_variable(x) 240 assert x.ndim == 2 241 o = theano.tensor.scalar(dtype=x.dtype) 242 return Apply(self, [x], [o]) 243 244 def perform(self, node, inputs, outputs): 245 (x,) = inputs 246 (z,) = outputs 247 try: 248 z[0] = np.asarray(np.linalg.det(x), dtype=x.dtype) 249 except Exception: 250 print('Failed to compute determinant', x) 251 raise 252 253 def grad(self, inputs, g_outputs): 254 gz, = g_outputs 255 x, = inputs 256 return [gz * self(x) * matrix_inverse(x).T] 257 258 def infer_shape(self, node, shapes): 259 return [()] 260 261 def __str__(self): 262 return "Det" 263det = Det() 264 265 266class Eig(Op): 267 """ 268 Compute the eigenvalues and right eigenvectors of a square array. 269 270 """ 271 272 _numop = staticmethod(np.linalg.eig) 273 __props__ = () 274 275 def make_node(self, x): 276 x = as_tensor_variable(x) 277 assert x.ndim == 2 278 w = theano.tensor.vector(dtype=x.dtype) 279 v = theano.tensor.matrix(dtype=x.dtype) 280 return Apply(self, [x], [w, v]) 281 282 def perform(self, node, inputs, outputs): 283 (x,) = inputs 284 (w, v) = outputs 285 w[0], v[0] = [z.astype(x.dtype) for z in self._numop(x)] 286 287 def infer_shape(self, node, shapes): 288 n = shapes[0][0] 289 return [(n,), (n, n)] 290 291eig = Eig() 292 293 294class Eigh(Eig): 295 """ 296 Return the eigenvalues and eigenvectors of a Hermitian or symmetric matrix. 297 298 """ 299 300 _numop = staticmethod(np.linalg.eigh) 301 __props__ = ('UPLO',) 302 303 def __init__(self, UPLO='L'): 304 assert UPLO in ['L', 'U'] 305 self.UPLO = UPLO 306 307 def make_node(self, x): 308 x = as_tensor_variable(x) 309 assert x.ndim == 2 310 # Numpy's linalg.eigh may return either double or single 311 # presision eigenvalues depending on installed version of 312 # LAPACK. Rather than trying to reproduce the (rather 313 # involved) logic, we just probe linalg.eigh with a trivial 314 # input. 315 w_dtype = self._numop([[np.dtype(x.dtype).type()]])[0].dtype.name 316 w = theano.tensor.vector(dtype=w_dtype) 317 v = theano.tensor.matrix(dtype=x.dtype) 318 return Apply(self, [x], [w, v]) 319 320 def perform(self, node, inputs, outputs): 321 (x,) = inputs 322 (w, v) = outputs 323 w[0], v[0] = self._numop(x, self.UPLO) 324 325 def grad(self, inputs, g_outputs): 326 r"""The gradient function should return 327 328 .. math:: \sum_n\left(W_n\frac{\partial\,w_n} 329 {\partial a_{ij}} + 330 \sum_k V_{nk}\frac{\partial\,v_{nk}} 331 {\partial a_{ij}}\right), 332 333 where [:math:`W`, :math:`V`] corresponds to ``g_outputs``, 334 :math:`a` to ``inputs``, and :math:`(w, v)=\mbox{eig}(a)`. 335 336 Analytic formulae for eigensystem gradients are well-known in 337 perturbation theory: 338 339 .. math:: \frac{\partial\,w_n} 340 {\partial a_{ij}} = v_{in}\,v_{jn} 341 342 343 .. math:: \frac{\partial\,v_{kn}} 344 {\partial a_{ij}} = 345 \sum_{m\ne n}\frac{v_{km}v_{jn}}{w_n-w_m} 346 347 """ 348 x, = inputs 349 w, v = self(x) 350 # Replace gradients wrt disconnected variables with 351 # zeros. This is a work-around for issue #1063. 352 gw, gv = _zero_disconnected([w, v], g_outputs) 353 return [EighGrad(self.UPLO)(x, w, v, gw, gv)] 354 355 356def _zero_disconnected(outputs, grads): 357 l = [] 358 for o, g in zip(outputs, grads): 359 if isinstance(g.type, DisconnectedType): 360 l.append(o.zeros_like()) 361 else: 362 l.append(g) 363 return l 364 365 366class EighGrad(Op): 367 """ 368 Gradient of an eigensystem of a Hermitian matrix. 369 370 """ 371 372 __props__ = ('UPLO',) 373 374 def __init__(self, UPLO='L'): 375 assert UPLO in ['L', 'U'] 376 self.UPLO = UPLO 377 if UPLO == 'L': 378 self.tri0 = np.tril 379 self.tri1 = partial(np.triu, k=1) 380 else: 381 self.tri0 = np.triu 382 self.tri1 = partial(np.tril, k=-1) 383 384 def make_node(self, x, w, v, gw, gv): 385 x, w, v, gw, gv = map(as_tensor_variable, (x, w, v, gw, gv)) 386 assert x.ndim == 2 387 assert w.ndim == 1 388 assert v.ndim == 2 389 assert gw.ndim == 1 390 assert gv.ndim == 2 391 out_dtype = theano.scalar.upcast(x.dtype, w.dtype, v.dtype, 392 gw.dtype, gv.dtype) 393 out = theano.tensor.matrix(dtype=out_dtype) 394 return Apply(self, [x, w, v, gw, gv], [out]) 395 396 def perform(self, node, inputs, outputs): 397 """ 398 Implements the "reverse-mode" gradient for the eigensystem of 399 a square matrix. 400 401 """ 402 x, w, v, W, V = inputs 403 N = x.shape[0] 404 outer = np.outer 405 406 def G(n): 407 return sum(v[:, m] * V.T[n].dot(v[:, m]) / (w[n] - w[m]) 408 for m in xrange(N) if m != n) 409 410 g = sum(outer(v[:, n], v[:, n] * W[n] + G(n)) 411 for n in xrange(N)) 412 413 # Numpy's eigh(a, 'L') (eigh(a, 'U')) is a function of tril(a) 414 # (triu(a)) only. This means that partial derivative of 415 # eigh(a, 'L') (eigh(a, 'U')) with respect to a[i,j] is zero 416 # for i < j (i > j). At the same time, non-zero components of 417 # the gradient must account for the fact that variation of the 418 # opposite triangle contributes to variation of two elements 419 # of Hermitian (symmetric) matrix. The following line 420 # implements the necessary logic. 421 out = self.tri0(g) + self.tri1(g).T 422 423 # Make sure we return the right dtype even if NumPy performed 424 # upcasting in self.tri0. 425 outputs[0][0] = np.asarray(out, dtype=node.outputs[0].dtype) 426 427 def infer_shape(self, node, shapes): 428 return [shapes[0]] 429 430 431def eigh(a, UPLO='L'): 432 return Eigh(UPLO)(a) 433 434 435class QRFull(Op): 436 """ 437 Full QR Decomposition. 438 439 Computes the QR decomposition of a matrix. 440 Factor the matrix a as qr, where q is orthonormal 441 and r is upper-triangular. 442 443 """ 444 445 _numop = staticmethod(np.linalg.qr) 446 __props__ = ('mode',) 447 448 def __init__(self, mode): 449 self.mode = mode 450 451 def make_node(self, x): 452 x = as_tensor_variable(x) 453 assert x.ndim == 2, "The input of qr function should be a matrix." 454 q = theano.tensor.matrix(dtype=x.dtype) 455 if self.mode != 'raw': 456 r = theano.tensor.matrix(dtype=x.dtype) 457 else: 458 r = theano.tensor.vector(dtype=x.dtype) 459 460 return Apply(self, [x], [q, r]) 461 462 def perform(self, node, inputs, outputs): 463 (x,) = inputs 464 (q, r) = outputs 465 assert x.ndim == 2, "The input of qr function should be a matrix." 466 q[0], r[0] = self._numop(x, self.mode) 467 468 469class QRIncomplete(Op): 470 """ 471 Incomplete QR Decomposition. 472 473 Computes the QR decomposition of a matrix. 474 Factor the matrix a as qr and return a single matrix R. 475 476 """ 477 478 _numop = staticmethod(np.linalg.qr) 479 __props__ = ('mode',) 480 481 def __init__(self, mode): 482 self.mode = mode 483 484 def make_node(self, x): 485 x = as_tensor_variable(x) 486 assert x.ndim == 2, "The input of qr function should be a matrix." 487 r = theano.tensor.matrix(dtype=x.dtype) 488 return Apply(self, [x], [r]) 489 490 def perform(self, node, inputs, outputs): 491 (x,) = inputs 492 (r,) = outputs 493 assert x.ndim == 2, "The input of qr function should be a matrix." 494 r[0] = self._numop(x, self.mode) 495 496 497def qr(a, mode="reduced"): 498 """ 499 Computes the QR decomposition of a matrix. 500 Factor the matrix a as qr, where q 501 is orthonormal and r is upper-triangular. 502 503 Parameters 504 ---------- 505 a : array_like, shape (M, N) 506 Matrix to be factored. 507 508 mode : {'reduced', 'complete', 'r', 'raw'}, optional 509 If K = min(M, N), then 510 511 'reduced' 512 returns q, r with dimensions (M, K), (K, N) 513 514 'complete' 515 returns q, r with dimensions (M, M), (M, N) 516 517 'r' 518 returns r only with dimensions (K, N) 519 520 'raw' 521 returns h, tau with dimensions (N, M), (K,) 522 523 Note that array h returned in 'raw' mode is 524 transposed for calling Fortran. 525 526 Default mode is 'reduced' 527 528 Returns 529 ------- 530 q : matrix of float or complex, optional 531 A matrix with orthonormal columns. When mode = 'complete' the 532 result is an orthogonal/unitary matrix depending on whether or 533 not a is real/complex. The determinant may be either +/- 1 in 534 that case. 535 r : matrix of float or complex, optional 536 The upper-triangular matrix. 537 538 """ 539 540 x = [[2, 1], [3, 4]] 541 if isinstance(np.linalg.qr(x, mode), tuple): 542 return QRFull(mode)(a) 543 else: 544 return QRIncomplete(mode)(a) 545 546 547class SVD(Op): 548 """ 549 550 Parameters 551 ---------- 552 full_matrices : bool, optional 553 If True (default), u and v have the shapes (M, M) and (N, N), 554 respectively. 555 Otherwise, the shapes are (M, K) and (K, N), respectively, 556 where K = min(M, N). 557 compute_uv : bool, optional 558 Whether or not to compute u and v in addition to s. 559 True by default. 560 561 """ 562 563 # See doc in the docstring of the function just after this class. 564 _numop = staticmethod(np.linalg.svd) 565 __props__ = ('full_matrices', 'compute_uv') 566 567 def __init__(self, full_matrices=True, compute_uv=True): 568 self.full_matrices = full_matrices 569 self.compute_uv = compute_uv 570 571 def make_node(self, x): 572 x = as_tensor_variable(x) 573 assert x.ndim == 2, "The input of svd function should be a matrix." 574 s = theano.tensor.vector(dtype=x.dtype) 575 if self.compute_uv: 576 u = theano.tensor.matrix(dtype=x.dtype) 577 vt = theano.tensor.matrix(dtype=x.dtype) 578 return Apply(self, [x], [u, s, vt]) 579 else: 580 return Apply(self, [x], [s]) 581 582 def perform(self, node, inputs, outputs): 583 (x,) = inputs 584 assert x.ndim == 2, "The input of svd function should be a matrix." 585 if self.compute_uv: 586 u, s, vt = outputs 587 u[0], s[0], vt[0] = self._numop(x, 588 self.full_matrices, 589 self.compute_uv) 590 else: 591 s, = outputs 592 s[0] = self._numop(x, self.full_matrices, self.compute_uv) 593 594 def infer_shape(self, node, shapes): 595 x_shape, = shapes 596 M, N = x_shape 597 K = tensor.minimum(M, N) 598 s_shape = (K, ) 599 if self.compute_uv: 600 u_shape = (M, M) if self.full_matrices else (M, K) 601 vt_shape = (N, N) if self.full_matrices else (K, N) 602 return [u_shape, s_shape, vt_shape] 603 else: 604 return [s_shape] 605 606 607def svd(a, full_matrices=1, compute_uv=1): 608 """ 609 This function performs the SVD on CPU. 610 611 Parameters 612 ---------- 613 full_matrices : bool, optional 614 If True (default), u and v have the shapes (M, M) and (N, N), 615 respectively. 616 Otherwise, the shapes are (M, K) and (K, N), respectively, 617 where K = min(M, N). 618 compute_uv : bool, optional 619 Whether or not to compute u and v in addition to s. 620 True by default. 621 622 Returns 623 ------- 624 U, V, D : matrices 625 626 """ 627 return SVD(full_matrices, compute_uv)(a) 628 629 630class lstsq(Op): 631 632 __props__ = () 633 634 def make_node(self, x, y, rcond): 635 x = theano.tensor.as_tensor_variable(x) 636 y = theano.tensor.as_tensor_variable(y) 637 rcond = theano.tensor.as_tensor_variable(rcond) 638 return theano.Apply(self, [x, y, rcond], 639 [theano.tensor.matrix(), theano.tensor.dvector(), 640 theano.tensor.lscalar(), theano.tensor.dvector()]) 641 642 def perform(self, node, inputs, outputs): 643 zz = np.linalg.lstsq(inputs[0], inputs[1], inputs[2]) 644 outputs[0][0] = zz[0] 645 outputs[1][0] = zz[1] 646 outputs[2][0] = np.array(zz[2]) 647 outputs[3][0] = zz[3] 648 649 650def matrix_power(M, n): 651 """ 652 Raise a square matrix to the (integer) power n. 653 654 Parameters 655 ---------- 656 M : Tensor variable 657 n : Python int 658 """ 659 result = 1 660 for i in xrange(n): 661 result = theano.dot(result, M) 662 return result 663 664 665def norm(x, ord): 666 x = as_tensor_variable(x) 667 ndim = x.ndim 668 if ndim == 0: 669 raise ValueError("'axis' entry is out of bounds.") 670 elif ndim == 1: 671 if ord is None: 672 return tensor.sum(x**2)**0.5 673 elif ord == 'inf': 674 return tensor.max(abs(x)) 675 elif ord == '-inf': 676 return tensor.min(abs(x)) 677 elif ord == 0: 678 return x[x.nonzero()].shape[0] 679 else: 680 try: 681 z = tensor.sum(abs(x**ord))**(1. / ord) 682 except TypeError: 683 raise ValueError("Invalid norm order for vectors.") 684 return z 685 elif ndim == 2: 686 if ord is None or ord == 'fro': 687 return tensor.sum(abs(x**2))**(0.5) 688 elif ord == 'inf': 689 return tensor.max(tensor.sum(abs(x), 1)) 690 elif ord == '-inf': 691 return tensor.min(tensor.sum(abs(x), 1)) 692 elif ord == 1: 693 return tensor.max(tensor.sum(abs(x), 0)) 694 elif ord == -1: 695 return tensor.min(tensor.sum(abs(x), 0)) 696 else: 697 raise ValueError(0) 698 elif ndim > 2: 699 raise NotImplementedError("We don't support norm with ndim > 2") 700 701 702class TensorInv(Op): 703 """ 704 Class wrapper for tensorinv() function; 705 Theano utilization of numpy.linalg.tensorinv; 706 """ 707 _numop = staticmethod(np.linalg.tensorinv) 708 __props__ = ('ind',) 709 710 def __init__(self, ind=2): 711 self.ind = ind 712 713 def make_node(self, a): 714 a = as_tensor_variable(a) 715 out = a.type() 716 return Apply(self, [a], [out]) 717 718 def perform(self, node, inputs, outputs): 719 (a,) = inputs 720 (x,) = outputs 721 x[0] = self._numop(a, self.ind) 722 723 def infer_shape(self, node, shapes): 724 sp = shapes[0][self.ind:] + shapes[0][:self.ind] 725 return [sp] 726 727 728def tensorinv(a, ind=2): 729 """ 730 Does not run on GPU; 731 Theano utilization of numpy.linalg.tensorinv; 732 733 Compute the 'inverse' of an N-dimensional array. 734 The result is an inverse for `a` relative to the tensordot operation 735 ``tensordot(a, b, ind)``, i. e., up to floating-point accuracy, 736 ``tensordot(tensorinv(a), a, ind)`` is the "identity" tensor for the 737 tensordot operation. 738 739 Parameters 740 ---------- 741 a : array_like 742 Tensor to 'invert'. Its shape must be 'square', i. e., 743 ``prod(a.shape[:ind]) == prod(a.shape[ind:])``. 744 ind : int, optional 745 Number of first indices that are involved in the inverse sum. 746 Must be a positive integer, default is 2. 747 748 Returns 749 ------- 750 b : ndarray 751 `a`'s tensordot inverse, shape ``a.shape[ind:] + a.shape[:ind]``. 752 753 Raises 754 ------ 755 LinAlgError 756 If `a` is singular or not 'square' (in the above sense). 757 """ 758 return TensorInv(ind)(a) 759 760 761class TensorSolve(Op): 762 """ 763 Theano utilization of numpy.linalg.tensorsolve 764 Class wrapper for tensorsolve function. 765 766 """ 767 _numop = staticmethod(np.linalg.tensorsolve) 768 __props__ = ('axes', ) 769 770 def __init__(self, axes=None): 771 self.axes = axes 772 773 def make_node(self, a, b): 774 a = as_tensor_variable(a) 775 b = as_tensor_variable(b) 776 out_dtype = theano.scalar.upcast(a.dtype, b.dtype) 777 x = theano.tensor.matrix(dtype=out_dtype) 778 return Apply(self, [a, b], [x]) 779 780 def perform(self, node, inputs, outputs): 781 (a, b,) = inputs 782 (x,) = outputs 783 x[0] = self._numop(a, b, self.axes) 784 785 786def tensorsolve(a, b, axes=None): 787 """ 788 Theano utilization of numpy.linalg.tensorsolve. Does not run on GPU! 789 790 Solve the tensor equation ``a x = b`` for x. 791 It is assumed that all indices of `x` are summed over in the product, 792 together with the rightmost indices of `a`, as is done in, for example, 793 ``tensordot(a, x, axes=len(b.shape))``. 794 795 Parameters 796 ---------- 797 a : array_like 798 Coefficient tensor, of shape ``b.shape + Q``. `Q`, a tuple, equals 799 the shape of that sub-tensor of `a` consisting of the appropriate 800 number of its rightmost indices, and must be such that 801 ``prod(Q) == prod(b.shape)`` (in which sense `a` is said to be 802 'square'). 803 b : array_like 804 Right-hand tensor, which can be of any shape. 805 axes : tuple of ints, optional 806 Axes in `a` to reorder to the right, before inversion. 807 If None (default), no reordering is done. 808 Returns 809 ------- 810 x : ndarray, shape Q 811 Raises 812 ------ 813 LinAlgError 814 If `a` is singular or not 'square' (in the above sense). 815 """ 816 817 return TensorSolve(axes)(a, b) 818