1import numpy
2
3from chainer import backend
4from chainer.backends import intel64
5from chainer import function_node
6import chainer.functions
7from chainer.graph_optimizations import static_code
8from chainer import utils
9from chainer.utils import type_check
10import chainerx
11
12
13class LinearFunction(function_node.FunctionNode):
14
15    _config_use_ideep = None
16    _supports_static_optimizations = True
17
18    def check_type_forward(self, in_types):
19        n_in = in_types.size()
20        type_check.expect(2 <= n_in, n_in <= 3)
21        x_type, w_type = in_types[:2]
22        type_check._argname((x_type, w_type), ('x', 'W'))
23
24        type_check.expect(
25            x_type.dtype.kind == 'f',
26            w_type.dtype.kind == 'f',
27            x_type.ndim == 2,
28            w_type.ndim == 2,
29            x_type.shape[1] == w_type.shape[1],
30        )
31        if type_check.eval(n_in) == 3:
32            b_type = in_types[2]
33            type_check._argname((b_type,), ('b',))
34            type_check.expect(
35                b_type.dtype == x_type.dtype,
36                b_type.ndim == 1,
37                b_type.shape[0] == w_type.shape[0],
38            )
39
40    @static_code
41    def static_linear_no_bias(self, xp, optimized, inputs, outputs):
42        x, W = inputs
43        y = outputs[0]
44        # NumPy raises an error when the array is not contiguous.
45        # See: https://github.com/chainer/chainer/issues/2744
46        # TODO(niboshi): Remove this code when NumPy is fixed.
47        if (isinstance(x, numpy.ndarray) and
48                not (x.flags.c_contiguous or x.flags.f_contiguous) and
49                1 in x.shape):
50            x = numpy.ascontiguousarray(x)
51
52        if optimized:
53            # Note: We can only call this function when both x and W
54            # have the same dtype. Otherwise, the output type (for y)
55            # may not be as expected (i.e., not the same dtype as x).
56            xp.dot(x, W.T, out=y)
57        else:
58            y[:] = x.dot(W.T).astype(x.dtype, copy=False)
59
60    @static_code
61    def static_add_bias(self, inputs, outputs):
62        bias = inputs[0]
63        y = outputs[0]
64        y += bias
65
66    def forward_chainerx(self, inputs):
67        # TODO(niboshi): Support dtype casting in ChainerX
68        if inputs[0].dtype != inputs[1].dtype:
69            return chainer.Fallback
70
71        # Generic implementation
72        if len(inputs) == 3:
73            x, W, b = inputs
74            if x.dtype != b.dtype:
75                return chainer.Fallback
76            return chainerx.linear(x, W, b),
77        else:
78            x, W = inputs
79            return chainerx.linear(x, W),
80
81    def forward(self, inputs):
82        self._config_use_ideep = chainer.config.use_ideep
83        if (intel64.should_use_ideep('>=auto')
84                and intel64.inputs_all_ready(inputs)):
85            # iDeep implementation
86            return self._forward_ideep(inputs)
87
88        # Generic implementation
89        if len(inputs) == 3:
90            x, W, b = inputs
91        else:
92            (x, W), b = inputs, None
93
94        # NumPy raises an error when the array is not contiguous.
95        # See: https://github.com/chainer/chainer/issues/2744
96        # TODO(niboshi): Remove this code when NumPy is fixed.
97        if (isinstance(x, numpy.ndarray) and
98                not (x.flags.c_contiguous or x.flags.f_contiguous) and
99                1 in x.shape):
100            x = numpy.ascontiguousarray(x)
101
102        # In order to be compatible with the "static graph" feature, it is
103        # required that all output arrays of this forward
104        # function be allocated explicitly:
105        xp = backend.get_array_module(x)
106        y = xp.empty((x.shape[0], W.shape[0]), dtype=x.dtype)
107
108        # This is required because all of the "static_*()" functions
109        # use the convention that any output arrays are supplied
110        # as input arguments to the function. That is because it is
111        # not allowed for a "static_*()" function to return anything
112        # other than `None`. The reason is to prevent dynamic allocation
113        # of output arrays during execution of the static schedule
114        # because it would break the model.
115        self.static_linear_no_bias(xp, x.dtype == W.dtype, inputs=[x, W],
116                                   outputs=[y])
117        if len(inputs) == 3:
118            self.static_add_bias(inputs=[b], outputs=[y])
119
120        self.retain_inputs((0, 1))  # b is not retained
121        return y,
122
123    def _forward_ideep(self, inputs):
124        if len(inputs) == 3:
125            x, W, b = inputs
126        else:
127            (x, W), b = inputs, None
128
129        y = intel64.ideep.linear.Forward(
130            intel64.ideep.array(x),
131            intel64.ideep.array(W),
132            intel64.ideep.array(b) if b is not None else None)
133
134        self.retain_inputs((0, 1))
135        return y,
136
137    def backward(self, indexes, grad_outputs):
138        x, W = self.get_retained_inputs()
139        gy, = grad_outputs
140        ret = []
141        with chainer.using_config('use_ideep', self._config_use_ideep):
142            if 0 in indexes:
143                gx, = LinearGradData().apply((W, gy))
144                ret.append(chainer.functions.cast(gx, x.dtype))
145            if 1 in indexes:
146                gW, = LinearGradWeight(W.dtype).apply((x, gy))
147                ret.append(chainer.functions.cast(gW, W.dtype))
148            if 2 in indexes:
149                gb = chainer.functions.sum(gy, axis=0)
150                ret.append(gb)
151
152        return ret
153
154
155class LinearGradData(function_node.FunctionNode):
156
157    _config_use_ideep = None
158
159    def forward(self, inputs):
160        self._config_use_ideep = chainer.config.use_ideep
161        if (intel64.should_use_ideep('>=auto')
162                and intel64.inputs_all_ready(inputs)):
163            # iDeep implementation
164            return self._forward_ideep(inputs)
165
166        # Generic implementation
167        self.retain_inputs((0, 1))
168        W, gy = inputs
169
170        if (isinstance(gy, numpy.ndarray) and
171                not (gy.flags.c_contiguous or gy.flags.f_contiguous) and
172                1 in gy.shape):
173            gy = numpy.ascontiguousarray(gy)
174
175        gx = gy.dot(W).astype(gy.dtype, copy=False)
176        return gx,
177
178    def _forward_ideep(self, inputs):
179        self.retain_inputs((0, 1))
180        W, gy = inputs
181        gx = intel64.ideep.linear.BackwardData(
182            intel64.ideep.array(W),
183            intel64.ideep.array(gy))
184        return gx,
185
186    def backward(self, indexes, grad_outputs):
187        W, gy = self.get_retained_inputs()
188        ggx, = grad_outputs
189
190        ret = []
191        with chainer.using_config('use_ideep', self._config_use_ideep):
192            if 0 in indexes:
193                gw, = LinearGradWeight(W.dtype).apply((ggx, gy))
194                ret.append(chainer.functions.cast(gw, W.dtype))
195            if 1 in indexes:
196                ggy = linear(ggx, W)
197                ret.append(chainer.functions.cast(ggy, gy.dtype))
198        return ret
199
200
201class LinearGradWeight(function_node.FunctionNode):
202
203    _config_use_ideep = None
204
205    def __init__(self, w_dtype):
206        self._w_dtype = w_dtype
207
208    def forward(self, inputs):
209        self._config_use_ideep = chainer.config.use_ideep
210        if (intel64.should_use_ideep('>=auto')
211                and self._w_dtype == numpy.float32
212                and intel64.inputs_all_ready(inputs)):
213            # iDeep implementation
214            return self._forward_ideep(inputs)
215
216        # Generic implementation
217        self.retain_inputs((0, 1))
218        x, gy = inputs
219
220        if (isinstance(gy, numpy.ndarray) and
221                not (gy.flags.c_contiguous or gy.flags.f_contiguous) and
222                1 in gy.shape):
223            gy = numpy.ascontiguousarray(gy)
224
225        gW = gy.T.dot(x).astype(self._w_dtype, copy=False)
226        return gW,
227
228    def _forward_ideep(self, inputs):
229        self.retain_inputs((0, 1))
230        x, gy = inputs
231        gW = intel64.ideep.linear.BackwardWeights(
232            intel64.ideep.array(x),
233            intel64.ideep.array(gy))
234        return gW,
235
236    def backward(self, indexes, grad_outputs):
237        x, gy = self.get_retained_inputs()
238        ggW, = grad_outputs
239
240        ret = []
241        with chainer.using_config('use_ideep', self._config_use_ideep):
242            if 0 in indexes:
243                gx, = LinearGradData().apply((ggW, gy))
244                ret.append(chainer.functions.cast(gx, x.dtype))
245            if 1 in indexes:
246                ggy = linear(x, ggW)
247                ret.append(chainer.functions.cast(ggy, gy.dtype))
248        return ret
249
250
251def linear(x, W, b=None, n_batch_axes=1):
252    """Linear function, or affine transformation.
253
254    It accepts two or three arguments: an input minibatch ``x``, a weight
255    matrix ``W``, and optionally a bias vector ``b``. It computes
256
257    .. math:: y_i = W x_i + b.
258
259    Args:
260        x (:class:`~chainer.Variable` or :ref:`ndarray`): Input variable,
261            which is a :math:`(s_1, s_2, ..., s_n)`-shaped float array.
262            Its first ``n_batch_axes`` dimensions are handled as
263            *minibatch dimensions*. The other dimensions are handled as
264            concatenated one dimension whose size must be
265            :math:`(s_{\\rm n\\_batch\\_axes} * ... * s_n = N)`.
266        W (:class:`~chainer.Variable` or :ref:`ndarray`):
267            Weight variable of shape :math:`(M, N)`,
268            where :math:`(N = s_{\\rm n\\_batch\\_axes} * ... * s_n)`.
269        b (:class:`~chainer.Variable` or :ref:`ndarray`):
270            Bias variable (optional) of shape :math:`(M,)`.
271        n_batch_axes (int): The number of batch axes. The default is 1. The
272            input variable is reshaped into
273            (:math:`{\\rm n\\_batch\\_axes} + 1`)-dimensional tensor.
274            This should be greater than 0.
275
276    Returns:
277        ~chainer.Variable: Output variable. A float array with shape
278        of :math:`(s_1, ..., s_{\\rm n\\_batch\\_axes}, M)`.
279
280    .. seealso::
281
282        :class:`~chainer.links.Linear` to manage the model parameters
283        ``W`` and ``b``.
284
285    .. admonition:: Example
286
287        >>> x = np.random.uniform(0, 1, (3, 4)).astype(np.float32)
288        >>> W = np.random.uniform(0, 1, (5, 4)).astype(np.float32)
289        >>> b = np.random.uniform(0, 1, (5,)).astype(np.float32)
290        >>> y = F.linear(x, W, b)
291        >>> y.shape
292        (3, 5)
293
294    """
295    if n_batch_axes <= 0:
296        raise ValueError('n_batch_axes should be greater than 0.')
297    if n_batch_axes > 1:
298        batch_shape = x.shape[:n_batch_axes]
299        batch_size = utils.size_of_shape(batch_shape)
300        x = x.reshape(batch_size, -1)
301    elif x.ndim > 2:
302        x = x.reshape(x.shape[0], -1)
303    if b is None:
304        args = x, W
305    else:
306        args = x, W, b
307
308    y, = LinearFunction().apply(args)
309    if n_batch_axes > 1:
310        y = y.reshape(batch_shape + (-1,))
311    return y
312