1from __future__ import absolute_import, print_function, division
2import logging
3
4import numpy as np
5from theano import Apply, tensor
6from theano.gof import COp, ParamsType
7from theano.tensor import discrete_dtypes, as_tensor_variable
8from theano.scalar import bool as bool_t
9
10from theano.gradient import grad_undefined
11
12from .type import gpu_context_type
13from .basic_ops import as_gpuarray_variable, infer_context_name, gpuarray_helper_inc_dir
14
15_logger = logging.getLogger('theano.gpuarray.blocksparse')
16
17
18class GpuSparseBlockGemv(COp):
19    """
20    GPU version of SparseBlockGemv. Check SparseBlockGemv's docstring for more
21    information.
22
23    This should not be directly called since the interface is subject
24    to change without notice.  Use the sandbox.blocksparse.sparse_block_dot()
25    function for a stable interface.
26    """
27    __props__ = ('inplace',)
28    params_type = ParamsType(inplace=bool_t, context=gpu_context_type)
29    # NB: DTYPE_INPUT_* is used in C code, so I think we should not set check_input to False.
30
31    def __init__(self, inplace=False):
32        COp.__init__(self, "c_code/blockgemv.c", "APPLY_SPECIFIC(blockgemv)")
33        self.inplace = inplace
34        if self.inplace:
35            self.destroy_map = {0: [0]}
36
37    def get_params(self, node):
38        return self.params_type.get_params(self, context=node.inputs[0].type.context)
39
40    def c_header_dirs(self):
41        return [gpuarray_helper_inc_dir()]
42
43    def c_headers(self):
44        return ['<gpuarray/buffer_blas.h>', '<gpuarray/buffer.h>',
45                '<gpuarray_helper.h>']
46
47    def make_node(self, o, W, h, inputIdx, outputIdx):
48        ctx = infer_context_name(o, W, h)
49        o = as_gpuarray_variable(o, ctx)
50        W = as_gpuarray_variable(W, ctx)
51        h = as_gpuarray_variable(h, ctx)
52        inputIdx = as_tensor_variable(inputIdx)
53        outputIdx = as_tensor_variable(outputIdx)
54        assert o.ndim == 3
55        assert W.ndim == 4
56        assert h.ndim == 3
57        assert inputIdx.ndim == 2
58        assert outputIdx.ndim == 2
59
60        assert inputIdx.type.dtype in discrete_dtypes
61        assert outputIdx.type.dtype in discrete_dtypes
62
63        return Apply(self, [o, W, h, inputIdx, outputIdx],
64                     [o.type()])
65
66    def infer_shape(self, node, input_shapes):
67        return [input_shapes[0]]
68
69    def grad(self, inputs, grads):
70        o, W, h, inputIdx, outputIdx = inputs
71        go = grads[0]
72
73        Wgrad = gpu_sparse_block_outer(W.zeros_like(),
74                                       h, go, inputIdx, outputIdx)
75        hgrad = gpu_sparse_block_gemv(h.zeros_like(),
76                                      W.dimshuffle((1, 0, 3, 2)),
77                                      go,
78                                      outputIdx, inputIdx)
79        return [go, Wgrad, hgrad,
80                grad_undefined(self, 3, inputIdx,
81                               "grad of inputIdx makes no sense"),
82                grad_undefined(self, 4, outputIdx,
83                               "grad of outputIdx makes no sense")]
84
85
86gpu_sparse_block_gemv = GpuSparseBlockGemv(False)
87gpu_sparse_block_gemv_inplace = GpuSparseBlockGemv(True)
88
89
90class GpuSparseBlockOuter(COp):
91    """
92    GPU version of SparseBlockOuter. See SparseBlockOuter's docstring for more
93    information.
94
95    This op should not be called directly since its interface is
96    subject to change without notice.  It is involved in the gradient
97    of GpuSparseBlockGemv. The gradient is not implemented.
98    """
99    __props__ = ('inplace',)
100    params_type = ParamsType(inplace=bool_t, context=gpu_context_type)
101
102    def __init__(self, inplace=False):
103        COp.__init__(self, ["c_code/blockger.c"], "APPLY_SPECIFIC(blockger)")
104        self.inplace = inplace
105        if self.inplace:
106            self.destroy_map = {0: [0]}
107
108    def get_params(self, node):
109        return self.params_type.get_params(self, context=node.inputs[0].type.context)
110
111    def make_node(self, o, x, y, xIdx, yIdx, alpha=None):
112        ctx = infer_context_name(o, x, y)
113        one = tensor.constant(np.asarray(1.0, dtype='float32'))
114        o = as_gpuarray_variable(o, ctx)
115        x = as_gpuarray_variable(x, ctx)
116        y = as_gpuarray_variable(y, ctx)
117        xIdx = as_tensor_variable(xIdx)
118        yIdx = as_tensor_variable(yIdx)
119        if alpha is None:
120            alpha = one
121        return Apply(self, [o, x, y, xIdx, yIdx, alpha],
122                     [o.type()])
123
124    def infer_shape(self, node, input_shapes):
125        return [input_shapes[0]]
126
127    def c_header_dirs(self):
128        return [gpuarray_helper_inc_dir()]
129
130    def c_headers(self):
131        return ['<gpuarray/buffer_blas.h>', '<gpuarray/buffer.h>',
132                '<gpuarray_helper.h>']
133
134gpu_sparse_block_outer = GpuSparseBlockOuter(False)
135gpu_sparse_block_outer_inplace = GpuSparseBlockOuter(True)
136