1from __future__ import absolute_import, print_function, division
2
3import logging
4import warnings
5import numpy as np
6from six.moves import xrange
7from functools import partial
8
9import theano
10from theano.tensor import as_tensor_variable
11from theano.gof import Op, Apply
12from theano.gradient import DisconnectedType
13from theano.tensor import basic as tensor
14from theano.tensor.basic import ExtractDiag
15logger = logging.getLogger(__name__)
16
17
18class MatrixPinv(Op):
19    """Computes the pseudo-inverse of a matrix :math:`A`.
20
21    The pseudo-inverse of a matrix :math:`A`, denoted :math:`A^+`, is
22    defined as: "the matrix that 'solves' [the least-squares problem]
23    :math:`Ax = b`," i.e., if :math:`\\bar{x}` is said solution, then
24    :math:`A^+` is that matrix such that :math:`\\bar{x} = A^+b`.
25
26    Note that :math:`Ax=AA^+b`, so :math:`AA^+` is close to the identity matrix.
27    This method is not faster than `matrix_inverse`. Its strength comes from
28    that it works for non-square matrices.
29    If you have a square matrix though, `matrix_inverse` can be both more
30    exact and faster to compute. Also this op does not get optimized into a
31    solve op.
32
33    """
34
35    __props__ = ()
36
37    def __init__(self):
38        pass
39
40    def make_node(self, x):
41        x = as_tensor_variable(x)
42        assert x.ndim == 2
43        return Apply(self, [x], [x.type()])
44
45    def perform(self, node, inputs, outputs):
46        (x,) = inputs
47        (z,) = outputs
48        z[0] = np.linalg.pinv(x).astype(x.dtype)
49
50    def L_op(self, inputs, outputs, g_outputs):
51        r"""The gradient function should return
52
53            .. math:: V\frac{\partial X^+}{\partial X},
54
55        where :math:`V` corresponds to ``g_outputs`` and :math:`X` to
56        ``inputs``. According to `Wikipedia
57        <https://en.wikipedia.org/wiki/Moore%E2%80%93Penrose_pseudoinverse#Derivative>`_,
58        this corresponds to
59
60            .. math:: (-X^+ V^T X^+ + X^+ X^{+T} V (I - X X^+) + (I - X^+ X) V X^{+T} X^+)^T.
61        """
62        x, = inputs
63        z, = outputs
64        gz, = g_outputs
65
66        x_dot_z = theano.tensor.dot(x, z)
67        z_dot_x = theano.tensor.dot(z, x)
68
69        grad = (-matrix_dot(z, gz.T, z) +
70                matrix_dot(z, z.T, gz, (theano.tensor.identity_like(x_dot_z) - x_dot_z)) +
71                matrix_dot((theano.tensor.identity_like(z_dot_x) - z_dot_x), gz, z.T, z)).T
72        return [grad]
73
74pinv = MatrixPinv()
75
76
77class MatrixInverse(Op):
78    r"""Computes the inverse of a matrix :math:`A`.
79
80    Given a square matrix :math:`A`, ``matrix_inverse`` returns a square
81    matrix :math:`A_{inv}` such that the dot product :math:`A \cdot A_{inv}`
82    and :math:`A_{inv} \cdot A` equals the identity matrix :math:`I`.
83
84    Notes
85    -----
86    When possible, the call to this op will be optimized to the call
87    of ``solve``.
88
89    """
90
91    __props__ = ()
92
93    def __init__(self):
94        pass
95
96    def make_node(self, x):
97        x = as_tensor_variable(x)
98        assert x.ndim == 2
99        return Apply(self, [x], [x.type()])
100
101    def perform(self, node, inputs, outputs):
102        (x,) = inputs
103        (z,) = outputs
104        z[0] = np.linalg.inv(x).astype(x.dtype)
105
106    def grad(self, inputs, g_outputs):
107        r"""The gradient function should return
108
109            .. math:: V\frac{\partial X^{-1}}{\partial X},
110
111        where :math:`V` corresponds to ``g_outputs`` and :math:`X` to
112        ``inputs``. Using the `matrix cookbook
113        <http://www2.imm.dtu.dk/pubdb/views/publication_details.php?id=3274>`_,
114        one can deduce that the relation corresponds to
115
116            .. math:: (X^{-1} \cdot V^{T} \cdot X^{-1})^T.
117
118        """
119        x, = inputs
120        xi = self(x)
121        gz, = g_outputs
122        # TT.dot(gz.T,xi)
123        return [-matrix_dot(xi, gz.T, xi).T]
124
125    def R_op(self, inputs, eval_points):
126        r"""The gradient function should return
127
128            .. math:: \frac{\partial X^{-1}}{\partial X}V,
129
130        where :math:`V` corresponds to ``g_outputs`` and :math:`X` to
131        ``inputs``. Using the `matrix cookbook
132        <http://www2.imm.dtu.dk/pubdb/views/publication_details.php?id=3274>`_,
133        one can deduce that the relation corresponds to
134
135            .. math:: X^{-1} \cdot V \cdot X^{-1}.
136
137        """
138        x, = inputs
139        xi = self(x)
140        ev, = eval_points
141        if ev is None:
142            return [None]
143        return [-matrix_dot(xi, ev, xi)]
144
145    def infer_shape(self, node, shapes):
146        return shapes
147
148matrix_inverse = MatrixInverse()
149
150
151def matrix_dot(*args):
152    r""" Shorthand for product between several dots.
153
154    Given :math:`N` matrices :math:`A_0, A_1, .., A_N`, ``matrix_dot`` will
155    generate the matrix product between all in the given order, namely
156    :math:`A_0 \cdot A_1 \cdot A_2 \cdot .. \cdot A_N`.
157
158    """
159    rval = args[0]
160    for a in args[1:]:
161        rval = theano.tensor.dot(rval, a)
162    return rval
163
164
165class AllocDiag(Op):
166    """
167    Allocates a square matrix with the given vector as its diagonal.
168    """
169
170    __props__ = ()
171
172    def make_node(self, _x):
173        warnings.warn("DeprecationWarning: theano.tensor.nlinalg.AllocDiag"
174                      "is deprecated, please use theano.tensor.AllocDiag"
175                      "instead.",
176                      category=DeprecationWarning)
177        x = as_tensor_variable(_x)
178        if x.type.ndim != 1:
179            raise TypeError('AllocDiag only works on vectors', _x)
180        return Apply(self, [x], [theano.tensor.matrix(dtype=x.type.dtype)])
181
182    def grad(self, inputs, g_outputs):
183        return [extract_diag(g_outputs[0])]
184
185    def perform(self, node, inputs, outputs):
186        (x,) = inputs
187        (z,) = outputs
188        if x.ndim != 1:
189            raise TypeError(x)
190        z[0] = np.diag(x)
191
192    def infer_shape(self, node, shapes):
193        x_s, = shapes
194        return [(x_s[0], x_s[0])]
195
196alloc_diag = AllocDiag()
197extract_diag = ExtractDiag()
198# TODO: optimization to insert ExtractDiag with view=True
199
200
201def diag(x):
202    """
203    Numpy-compatibility method
204    If `x` is a matrix, return its diagonal.
205    If `x` is a vector return a matrix with it as its diagonal.
206
207    * This method does not support the `k` argument that numpy supports.
208    """
209    xx = as_tensor_variable(x)
210    if xx.type.ndim == 1:
211        return alloc_diag(xx)
212    elif xx.type.ndim == 2:
213        return extract_diag(xx)
214    else:
215        raise TypeError('diag requires vector or matrix argument', x)
216
217
218def trace(X):
219    """
220    Returns the sum of diagonal elements of matrix X.
221
222    Notes
223    -----
224    Works on GPU since 0.6rc4.
225
226    """
227    return extract_diag(X).sum()
228
229
230class Det(Op):
231    """
232    Matrix determinant. Input should be a square matrix.
233
234    """
235
236    __props__ = ()
237
238    def make_node(self, x):
239        x = as_tensor_variable(x)
240        assert x.ndim == 2
241        o = theano.tensor.scalar(dtype=x.dtype)
242        return Apply(self, [x], [o])
243
244    def perform(self, node, inputs, outputs):
245        (x,) = inputs
246        (z,) = outputs
247        try:
248            z[0] = np.asarray(np.linalg.det(x), dtype=x.dtype)
249        except Exception:
250            print('Failed to compute determinant', x)
251            raise
252
253    def grad(self, inputs, g_outputs):
254        gz, = g_outputs
255        x, = inputs
256        return [gz * self(x) * matrix_inverse(x).T]
257
258    def infer_shape(self, node, shapes):
259        return [()]
260
261    def __str__(self):
262        return "Det"
263det = Det()
264
265
266class Eig(Op):
267    """
268    Compute the eigenvalues and right eigenvectors of a square array.
269
270    """
271
272    _numop = staticmethod(np.linalg.eig)
273    __props__ = ()
274
275    def make_node(self, x):
276        x = as_tensor_variable(x)
277        assert x.ndim == 2
278        w = theano.tensor.vector(dtype=x.dtype)
279        v = theano.tensor.matrix(dtype=x.dtype)
280        return Apply(self, [x], [w, v])
281
282    def perform(self, node, inputs, outputs):
283        (x,) = inputs
284        (w, v) = outputs
285        w[0], v[0] = [z.astype(x.dtype) for z in self._numop(x)]
286
287    def infer_shape(self, node, shapes):
288        n = shapes[0][0]
289        return [(n,), (n, n)]
290
291eig = Eig()
292
293
294class Eigh(Eig):
295    """
296    Return the eigenvalues and eigenvectors of a Hermitian or symmetric matrix.
297
298    """
299
300    _numop = staticmethod(np.linalg.eigh)
301    __props__ = ('UPLO',)
302
303    def __init__(self, UPLO='L'):
304        assert UPLO in ['L', 'U']
305        self.UPLO = UPLO
306
307    def make_node(self, x):
308        x = as_tensor_variable(x)
309        assert x.ndim == 2
310        # Numpy's linalg.eigh may return either double or single
311        # presision eigenvalues depending on installed version of
312        # LAPACK.  Rather than trying to reproduce the (rather
313        # involved) logic, we just probe linalg.eigh with a trivial
314        # input.
315        w_dtype = self._numop([[np.dtype(x.dtype).type()]])[0].dtype.name
316        w = theano.tensor.vector(dtype=w_dtype)
317        v = theano.tensor.matrix(dtype=x.dtype)
318        return Apply(self, [x], [w, v])
319
320    def perform(self, node, inputs, outputs):
321        (x,) = inputs
322        (w, v) = outputs
323        w[0], v[0] = self._numop(x, self.UPLO)
324
325    def grad(self, inputs, g_outputs):
326        r"""The gradient function should return
327
328           .. math:: \sum_n\left(W_n\frac{\partial\,w_n}
329                           {\partial a_{ij}} +
330                     \sum_k V_{nk}\frac{\partial\,v_{nk}}
331                           {\partial a_{ij}}\right),
332
333        where [:math:`W`, :math:`V`] corresponds to ``g_outputs``,
334        :math:`a` to ``inputs``, and  :math:`(w, v)=\mbox{eig}(a)`.
335
336        Analytic formulae for eigensystem gradients are well-known in
337        perturbation theory:
338
339           .. math:: \frac{\partial\,w_n}
340                          {\partial a_{ij}} = v_{in}\,v_{jn}
341
342
343           .. math:: \frac{\partial\,v_{kn}}
344                          {\partial a_{ij}} =
345                \sum_{m\ne n}\frac{v_{km}v_{jn}}{w_n-w_m}
346
347        """
348        x, = inputs
349        w, v = self(x)
350        # Replace gradients wrt disconnected variables with
351        # zeros. This is a work-around for issue #1063.
352        gw, gv = _zero_disconnected([w, v], g_outputs)
353        return [EighGrad(self.UPLO)(x, w, v, gw, gv)]
354
355
356def _zero_disconnected(outputs, grads):
357    l = []
358    for o, g in zip(outputs, grads):
359        if isinstance(g.type, DisconnectedType):
360            l.append(o.zeros_like())
361        else:
362            l.append(g)
363    return l
364
365
366class EighGrad(Op):
367    """
368    Gradient of an eigensystem of a Hermitian matrix.
369
370    """
371
372    __props__ = ('UPLO',)
373
374    def __init__(self, UPLO='L'):
375        assert UPLO in ['L', 'U']
376        self.UPLO = UPLO
377        if UPLO == 'L':
378            self.tri0 = np.tril
379            self.tri1 = partial(np.triu, k=1)
380        else:
381            self.tri0 = np.triu
382            self.tri1 = partial(np.tril, k=-1)
383
384    def make_node(self, x, w, v, gw, gv):
385        x, w, v, gw, gv = map(as_tensor_variable, (x, w, v, gw, gv))
386        assert x.ndim == 2
387        assert w.ndim == 1
388        assert v.ndim == 2
389        assert gw.ndim == 1
390        assert gv.ndim == 2
391        out_dtype = theano.scalar.upcast(x.dtype, w.dtype, v.dtype,
392                                         gw.dtype, gv.dtype)
393        out = theano.tensor.matrix(dtype=out_dtype)
394        return Apply(self, [x, w, v, gw, gv], [out])
395
396    def perform(self, node, inputs, outputs):
397        """
398        Implements the "reverse-mode" gradient for the eigensystem of
399        a square matrix.
400
401        """
402        x, w, v, W, V = inputs
403        N = x.shape[0]
404        outer = np.outer
405
406        def G(n):
407            return sum(v[:, m] * V.T[n].dot(v[:, m]) / (w[n] - w[m])
408                       for m in xrange(N) if m != n)
409
410        g = sum(outer(v[:, n], v[:, n] * W[n] + G(n))
411                for n in xrange(N))
412
413        # Numpy's eigh(a, 'L') (eigh(a, 'U')) is a function of tril(a)
414        # (triu(a)) only.  This means that partial derivative of
415        # eigh(a, 'L') (eigh(a, 'U')) with respect to a[i,j] is zero
416        # for i < j (i > j).  At the same time, non-zero components of
417        # the gradient must account for the fact that variation of the
418        # opposite triangle contributes to variation of two elements
419        # of Hermitian (symmetric) matrix. The following line
420        # implements the necessary logic.
421        out = self.tri0(g) + self.tri1(g).T
422
423        # Make sure we return the right dtype even if NumPy performed
424        # upcasting in self.tri0.
425        outputs[0][0] = np.asarray(out, dtype=node.outputs[0].dtype)
426
427    def infer_shape(self, node, shapes):
428        return [shapes[0]]
429
430
431def eigh(a, UPLO='L'):
432    return Eigh(UPLO)(a)
433
434
435class QRFull(Op):
436    """
437    Full QR Decomposition.
438
439    Computes the QR decomposition of a matrix.
440    Factor the matrix a as qr, where q is orthonormal
441    and r is upper-triangular.
442
443    """
444
445    _numop = staticmethod(np.linalg.qr)
446    __props__ = ('mode',)
447
448    def __init__(self, mode):
449        self.mode = mode
450
451    def make_node(self, x):
452        x = as_tensor_variable(x)
453        assert x.ndim == 2, "The input of qr function should be a matrix."
454        q = theano.tensor.matrix(dtype=x.dtype)
455        if self.mode != 'raw':
456            r = theano.tensor.matrix(dtype=x.dtype)
457        else:
458            r = theano.tensor.vector(dtype=x.dtype)
459
460        return Apply(self, [x], [q, r])
461
462    def perform(self, node, inputs, outputs):
463        (x,) = inputs
464        (q, r) = outputs
465        assert x.ndim == 2, "The input of qr function should be a matrix."
466        q[0], r[0] = self._numop(x, self.mode)
467
468
469class QRIncomplete(Op):
470    """
471    Incomplete QR Decomposition.
472
473    Computes the QR decomposition of a matrix.
474    Factor the matrix a as qr and return a single matrix R.
475
476    """
477
478    _numop = staticmethod(np.linalg.qr)
479    __props__ = ('mode',)
480
481    def __init__(self, mode):
482        self.mode = mode
483
484    def make_node(self, x):
485        x = as_tensor_variable(x)
486        assert x.ndim == 2, "The input of qr function should be a matrix."
487        r = theano.tensor.matrix(dtype=x.dtype)
488        return Apply(self, [x], [r])
489
490    def perform(self, node, inputs, outputs):
491        (x,) = inputs
492        (r,) = outputs
493        assert x.ndim == 2, "The input of qr function should be a matrix."
494        r[0] = self._numop(x, self.mode)
495
496
497def qr(a, mode="reduced"):
498    """
499    Computes the QR decomposition of a matrix.
500    Factor the matrix a as qr, where q
501    is orthonormal and r is upper-triangular.
502
503    Parameters
504    ----------
505    a : array_like, shape (M, N)
506        Matrix to be factored.
507
508    mode : {'reduced', 'complete', 'r', 'raw'}, optional
509        If K = min(M, N), then
510
511        'reduced'
512          returns q, r with dimensions (M, K), (K, N)
513
514        'complete'
515           returns q, r with dimensions (M, M), (M, N)
516
517        'r'
518          returns r only with dimensions (K, N)
519
520        'raw'
521          returns h, tau with dimensions (N, M), (K,)
522
523        Note that array h returned in 'raw' mode is
524        transposed for calling Fortran.
525
526        Default mode is 'reduced'
527
528    Returns
529    -------
530    q : matrix of float or complex, optional
531        A matrix with orthonormal columns. When mode = 'complete' the
532        result is an orthogonal/unitary matrix depending on whether or
533        not a is real/complex. The determinant may be either +/- 1 in
534        that case.
535    r : matrix of float or complex, optional
536        The upper-triangular matrix.
537
538    """
539
540    x = [[2, 1], [3, 4]]
541    if isinstance(np.linalg.qr(x, mode), tuple):
542        return QRFull(mode)(a)
543    else:
544        return QRIncomplete(mode)(a)
545
546
547class SVD(Op):
548    """
549
550    Parameters
551    ----------
552    full_matrices : bool, optional
553        If True (default), u and v have the shapes (M, M) and (N, N),
554        respectively.
555        Otherwise, the shapes are (M, K) and (K, N), respectively,
556        where K = min(M, N).
557    compute_uv : bool, optional
558        Whether or not to compute u and v in addition to s.
559        True by default.
560
561    """
562
563    # See doc in the docstring of the function just after this class.
564    _numop = staticmethod(np.linalg.svd)
565    __props__ = ('full_matrices', 'compute_uv')
566
567    def __init__(self, full_matrices=True, compute_uv=True):
568        self.full_matrices = full_matrices
569        self.compute_uv = compute_uv
570
571    def make_node(self, x):
572        x = as_tensor_variable(x)
573        assert x.ndim == 2, "The input of svd function should be a matrix."
574        s = theano.tensor.vector(dtype=x.dtype)
575        if self.compute_uv:
576            u = theano.tensor.matrix(dtype=x.dtype)
577            vt = theano.tensor.matrix(dtype=x.dtype)
578            return Apply(self, [x], [u, s, vt])
579        else:
580            return Apply(self, [x], [s])
581
582    def perform(self, node, inputs, outputs):
583        (x,) = inputs
584        assert x.ndim == 2, "The input of svd function should be a matrix."
585        if self.compute_uv:
586            u, s, vt = outputs
587            u[0], s[0], vt[0] = self._numop(x,
588                                            self.full_matrices,
589                                            self.compute_uv)
590        else:
591            s, = outputs
592            s[0] = self._numop(x, self.full_matrices, self.compute_uv)
593
594    def infer_shape(self, node, shapes):
595        x_shape, = shapes
596        M, N = x_shape
597        K = tensor.minimum(M, N)
598        s_shape = (K, )
599        if self.compute_uv:
600            u_shape = (M, M) if self.full_matrices else (M, K)
601            vt_shape = (N, N) if self.full_matrices else (K, N)
602            return [u_shape, s_shape, vt_shape]
603        else:
604            return [s_shape]
605
606
607def svd(a, full_matrices=1, compute_uv=1):
608    """
609    This function performs the SVD on CPU.
610
611    Parameters
612    ----------
613    full_matrices : bool, optional
614        If True (default), u and v have the shapes (M, M) and (N, N),
615        respectively.
616        Otherwise, the shapes are (M, K) and (K, N), respectively,
617        where K = min(M, N).
618    compute_uv : bool, optional
619        Whether or not to compute u and v in addition to s.
620        True by default.
621
622    Returns
623    -------
624    U, V,  D : matrices
625
626    """
627    return SVD(full_matrices, compute_uv)(a)
628
629
630class lstsq(Op):
631
632    __props__ = ()
633
634    def make_node(self, x, y, rcond):
635        x = theano.tensor.as_tensor_variable(x)
636        y = theano.tensor.as_tensor_variable(y)
637        rcond = theano.tensor.as_tensor_variable(rcond)
638        return theano.Apply(self, [x, y, rcond],
639                            [theano.tensor.matrix(), theano.tensor.dvector(),
640                             theano.tensor.lscalar(), theano.tensor.dvector()])
641
642    def perform(self, node, inputs, outputs):
643        zz = np.linalg.lstsq(inputs[0], inputs[1], inputs[2])
644        outputs[0][0] = zz[0]
645        outputs[1][0] = zz[1]
646        outputs[2][0] = np.array(zz[2])
647        outputs[3][0] = zz[3]
648
649
650def matrix_power(M, n):
651    """
652    Raise a square matrix to the (integer) power n.
653
654    Parameters
655    ----------
656    M : Tensor variable
657    n : Python int
658    """
659    result = 1
660    for i in xrange(n):
661        result = theano.dot(result, M)
662    return result
663
664
665def norm(x, ord):
666    x = as_tensor_variable(x)
667    ndim = x.ndim
668    if ndim == 0:
669        raise ValueError("'axis' entry is out of bounds.")
670    elif ndim == 1:
671        if ord is None:
672            return tensor.sum(x**2)**0.5
673        elif ord == 'inf':
674            return tensor.max(abs(x))
675        elif ord == '-inf':
676            return tensor.min(abs(x))
677        elif ord == 0:
678            return x[x.nonzero()].shape[0]
679        else:
680            try:
681                z = tensor.sum(abs(x**ord))**(1. / ord)
682            except TypeError:
683                raise ValueError("Invalid norm order for vectors.")
684            return z
685    elif ndim == 2:
686        if ord is None or ord == 'fro':
687            return tensor.sum(abs(x**2))**(0.5)
688        elif ord == 'inf':
689            return tensor.max(tensor.sum(abs(x), 1))
690        elif ord == '-inf':
691            return tensor.min(tensor.sum(abs(x), 1))
692        elif ord == 1:
693            return tensor.max(tensor.sum(abs(x), 0))
694        elif ord == -1:
695            return tensor.min(tensor.sum(abs(x), 0))
696        else:
697            raise ValueError(0)
698    elif ndim > 2:
699        raise NotImplementedError("We don't support norm with ndim > 2")
700
701
702class TensorInv(Op):
703    """
704    Class wrapper for tensorinv() function;
705    Theano utilization of numpy.linalg.tensorinv;
706    """
707    _numop = staticmethod(np.linalg.tensorinv)
708    __props__ = ('ind',)
709
710    def __init__(self, ind=2):
711        self.ind = ind
712
713    def make_node(self, a):
714        a = as_tensor_variable(a)
715        out = a.type()
716        return Apply(self, [a], [out])
717
718    def perform(self, node, inputs, outputs):
719        (a,) = inputs
720        (x,) = outputs
721        x[0] = self._numop(a, self.ind)
722
723    def infer_shape(self, node, shapes):
724        sp = shapes[0][self.ind:] + shapes[0][:self.ind]
725        return [sp]
726
727
728def tensorinv(a, ind=2):
729    """
730    Does not run on GPU;
731    Theano utilization of numpy.linalg.tensorinv;
732
733    Compute the 'inverse' of an N-dimensional array.
734    The result is an inverse for `a` relative to the tensordot operation
735    ``tensordot(a, b, ind)``, i. e., up to floating-point accuracy,
736    ``tensordot(tensorinv(a), a, ind)`` is the "identity" tensor for the
737    tensordot operation.
738
739    Parameters
740    ----------
741    a : array_like
742        Tensor to 'invert'. Its shape must be 'square', i. e.,
743        ``prod(a.shape[:ind]) == prod(a.shape[ind:])``.
744    ind : int, optional
745        Number of first indices that are involved in the inverse sum.
746        Must be a positive integer, default is 2.
747
748    Returns
749    -------
750    b : ndarray
751        `a`'s tensordot inverse, shape ``a.shape[ind:] + a.shape[:ind]``.
752
753    Raises
754    ------
755    LinAlgError
756        If `a` is singular or not 'square' (in the above sense).
757    """
758    return TensorInv(ind)(a)
759
760
761class TensorSolve(Op):
762    """
763    Theano utilization of numpy.linalg.tensorsolve
764    Class wrapper for tensorsolve function.
765
766    """
767    _numop = staticmethod(np.linalg.tensorsolve)
768    __props__ = ('axes', )
769
770    def __init__(self, axes=None):
771        self.axes = axes
772
773    def make_node(self, a, b):
774        a = as_tensor_variable(a)
775        b = as_tensor_variable(b)
776        out_dtype = theano.scalar.upcast(a.dtype, b.dtype)
777        x = theano.tensor.matrix(dtype=out_dtype)
778        return Apply(self, [a, b], [x])
779
780    def perform(self, node, inputs, outputs):
781        (a, b,) = inputs
782        (x,) = outputs
783        x[0] = self._numop(a, b, self.axes)
784
785
786def tensorsolve(a, b, axes=None):
787    """
788    Theano utilization of numpy.linalg.tensorsolve. Does not run on GPU!
789
790    Solve the tensor equation ``a x = b`` for x.
791    It is assumed that all indices of `x` are summed over in the product,
792    together with the rightmost indices of `a`, as is done in, for example,
793    ``tensordot(a, x, axes=len(b.shape))``.
794
795    Parameters
796    ----------
797    a : array_like
798        Coefficient tensor, of shape ``b.shape + Q``. `Q`, a tuple, equals
799        the shape of that sub-tensor of `a` consisting of the appropriate
800        number of its rightmost indices, and must be such that
801        ``prod(Q) == prod(b.shape)`` (in which sense `a` is said to be
802        'square').
803    b : array_like
804        Right-hand tensor, which can be of any shape.
805    axes : tuple of ints, optional
806        Axes in `a` to reorder to the right, before inversion.
807        If None (default), no reordering is done.
808    Returns
809    -------
810    x : ndarray, shape Q
811    Raises
812    ------
813    LinAlgError
814        If `a` is singular or not 'square' (in the above sense).
815    """
816
817    return TensorSolve(axes)(a, b)
818