1import numpy 2 3from chainer import backend 4from chainer.backends import intel64 5from chainer import function_node 6import chainer.functions 7from chainer.graph_optimizations import static_code 8from chainer import utils 9from chainer.utils import type_check 10import chainerx 11 12 13class LinearFunction(function_node.FunctionNode): 14 15 _config_use_ideep = None 16 _supports_static_optimizations = True 17 18 def check_type_forward(self, in_types): 19 n_in = in_types.size() 20 type_check.expect(2 <= n_in, n_in <= 3) 21 x_type, w_type = in_types[:2] 22 type_check._argname((x_type, w_type), ('x', 'W')) 23 24 type_check.expect( 25 x_type.dtype.kind == 'f', 26 w_type.dtype.kind == 'f', 27 x_type.ndim == 2, 28 w_type.ndim == 2, 29 x_type.shape[1] == w_type.shape[1], 30 ) 31 if type_check.eval(n_in) == 3: 32 b_type = in_types[2] 33 type_check._argname((b_type,), ('b',)) 34 type_check.expect( 35 b_type.dtype == x_type.dtype, 36 b_type.ndim == 1, 37 b_type.shape[0] == w_type.shape[0], 38 ) 39 40 @static_code 41 def static_linear_no_bias(self, xp, optimized, inputs, outputs): 42 x, W = inputs 43 y = outputs[0] 44 # NumPy raises an error when the array is not contiguous. 45 # See: https://github.com/chainer/chainer/issues/2744 46 # TODO(niboshi): Remove this code when NumPy is fixed. 47 if (isinstance(x, numpy.ndarray) and 48 not (x.flags.c_contiguous or x.flags.f_contiguous) and 49 1 in x.shape): 50 x = numpy.ascontiguousarray(x) 51 52 if optimized: 53 # Note: We can only call this function when both x and W 54 # have the same dtype. Otherwise, the output type (for y) 55 # may not be as expected (i.e., not the same dtype as x). 56 xp.dot(x, W.T, out=y) 57 else: 58 y[:] = x.dot(W.T).astype(x.dtype, copy=False) 59 60 @static_code 61 def static_add_bias(self, inputs, outputs): 62 bias = inputs[0] 63 y = outputs[0] 64 y += bias 65 66 def forward_chainerx(self, inputs): 67 # TODO(niboshi): Support dtype casting in ChainerX 68 if inputs[0].dtype != inputs[1].dtype: 69 return chainer.Fallback 70 71 # Generic implementation 72 if len(inputs) == 3: 73 x, W, b = inputs 74 if x.dtype != b.dtype: 75 return chainer.Fallback 76 return chainerx.linear(x, W, b), 77 else: 78 x, W = inputs 79 return chainerx.linear(x, W), 80 81 def forward(self, inputs): 82 self._config_use_ideep = chainer.config.use_ideep 83 if (intel64.should_use_ideep('>=auto') 84 and intel64.inputs_all_ready(inputs)): 85 # iDeep implementation 86 return self._forward_ideep(inputs) 87 88 # Generic implementation 89 if len(inputs) == 3: 90 x, W, b = inputs 91 else: 92 (x, W), b = inputs, None 93 94 # NumPy raises an error when the array is not contiguous. 95 # See: https://github.com/chainer/chainer/issues/2744 96 # TODO(niboshi): Remove this code when NumPy is fixed. 97 if (isinstance(x, numpy.ndarray) and 98 not (x.flags.c_contiguous or x.flags.f_contiguous) and 99 1 in x.shape): 100 x = numpy.ascontiguousarray(x) 101 102 # In order to be compatible with the "static graph" feature, it is 103 # required that all output arrays of this forward 104 # function be allocated explicitly: 105 xp = backend.get_array_module(x) 106 y = xp.empty((x.shape[0], W.shape[0]), dtype=x.dtype) 107 108 # This is required because all of the "static_*()" functions 109 # use the convention that any output arrays are supplied 110 # as input arguments to the function. That is because it is 111 # not allowed for a "static_*()" function to return anything 112 # other than `None`. The reason is to prevent dynamic allocation 113 # of output arrays during execution of the static schedule 114 # because it would break the model. 115 self.static_linear_no_bias(xp, x.dtype == W.dtype, inputs=[x, W], 116 outputs=[y]) 117 if len(inputs) == 3: 118 self.static_add_bias(inputs=[b], outputs=[y]) 119 120 self.retain_inputs((0, 1)) # b is not retained 121 return y, 122 123 def _forward_ideep(self, inputs): 124 if len(inputs) == 3: 125 x, W, b = inputs 126 else: 127 (x, W), b = inputs, None 128 129 y = intel64.ideep.linear.Forward( 130 intel64.ideep.array(x), 131 intel64.ideep.array(W), 132 intel64.ideep.array(b) if b is not None else None) 133 134 self.retain_inputs((0, 1)) 135 return y, 136 137 def backward(self, indexes, grad_outputs): 138 x, W = self.get_retained_inputs() 139 gy, = grad_outputs 140 ret = [] 141 with chainer.using_config('use_ideep', self._config_use_ideep): 142 if 0 in indexes: 143 gx, = LinearGradData().apply((W, gy)) 144 ret.append(chainer.functions.cast(gx, x.dtype)) 145 if 1 in indexes: 146 gW, = LinearGradWeight(W.dtype).apply((x, gy)) 147 ret.append(chainer.functions.cast(gW, W.dtype)) 148 if 2 in indexes: 149 gb = chainer.functions.sum(gy, axis=0) 150 ret.append(gb) 151 152 return ret 153 154 155class LinearGradData(function_node.FunctionNode): 156 157 _config_use_ideep = None 158 159 def forward(self, inputs): 160 self._config_use_ideep = chainer.config.use_ideep 161 if (intel64.should_use_ideep('>=auto') 162 and intel64.inputs_all_ready(inputs)): 163 # iDeep implementation 164 return self._forward_ideep(inputs) 165 166 # Generic implementation 167 self.retain_inputs((0, 1)) 168 W, gy = inputs 169 170 if (isinstance(gy, numpy.ndarray) and 171 not (gy.flags.c_contiguous or gy.flags.f_contiguous) and 172 1 in gy.shape): 173 gy = numpy.ascontiguousarray(gy) 174 175 gx = gy.dot(W).astype(gy.dtype, copy=False) 176 return gx, 177 178 def _forward_ideep(self, inputs): 179 self.retain_inputs((0, 1)) 180 W, gy = inputs 181 gx = intel64.ideep.linear.BackwardData( 182 intel64.ideep.array(W), 183 intel64.ideep.array(gy)) 184 return gx, 185 186 def backward(self, indexes, grad_outputs): 187 W, gy = self.get_retained_inputs() 188 ggx, = grad_outputs 189 190 ret = [] 191 with chainer.using_config('use_ideep', self._config_use_ideep): 192 if 0 in indexes: 193 gw, = LinearGradWeight(W.dtype).apply((ggx, gy)) 194 ret.append(chainer.functions.cast(gw, W.dtype)) 195 if 1 in indexes: 196 ggy = linear(ggx, W) 197 ret.append(chainer.functions.cast(ggy, gy.dtype)) 198 return ret 199 200 201class LinearGradWeight(function_node.FunctionNode): 202 203 _config_use_ideep = None 204 205 def __init__(self, w_dtype): 206 self._w_dtype = w_dtype 207 208 def forward(self, inputs): 209 self._config_use_ideep = chainer.config.use_ideep 210 if (intel64.should_use_ideep('>=auto') 211 and self._w_dtype == numpy.float32 212 and intel64.inputs_all_ready(inputs)): 213 # iDeep implementation 214 return self._forward_ideep(inputs) 215 216 # Generic implementation 217 self.retain_inputs((0, 1)) 218 x, gy = inputs 219 220 if (isinstance(gy, numpy.ndarray) and 221 not (gy.flags.c_contiguous or gy.flags.f_contiguous) and 222 1 in gy.shape): 223 gy = numpy.ascontiguousarray(gy) 224 225 gW = gy.T.dot(x).astype(self._w_dtype, copy=False) 226 return gW, 227 228 def _forward_ideep(self, inputs): 229 self.retain_inputs((0, 1)) 230 x, gy = inputs 231 gW = intel64.ideep.linear.BackwardWeights( 232 intel64.ideep.array(x), 233 intel64.ideep.array(gy)) 234 return gW, 235 236 def backward(self, indexes, grad_outputs): 237 x, gy = self.get_retained_inputs() 238 ggW, = grad_outputs 239 240 ret = [] 241 with chainer.using_config('use_ideep', self._config_use_ideep): 242 if 0 in indexes: 243 gx, = LinearGradData().apply((ggW, gy)) 244 ret.append(chainer.functions.cast(gx, x.dtype)) 245 if 1 in indexes: 246 ggy = linear(x, ggW) 247 ret.append(chainer.functions.cast(ggy, gy.dtype)) 248 return ret 249 250 251def linear(x, W, b=None, n_batch_axes=1): 252 """Linear function, or affine transformation. 253 254 It accepts two or three arguments: an input minibatch ``x``, a weight 255 matrix ``W``, and optionally a bias vector ``b``. It computes 256 257 .. math:: y_i = W x_i + b. 258 259 Args: 260 x (:class:`~chainer.Variable` or :ref:`ndarray`): Input variable, 261 which is a :math:`(s_1, s_2, ..., s_n)`-shaped float array. 262 Its first ``n_batch_axes`` dimensions are handled as 263 *minibatch dimensions*. The other dimensions are handled as 264 concatenated one dimension whose size must be 265 :math:`(s_{\\rm n\\_batch\\_axes} * ... * s_n = N)`. 266 W (:class:`~chainer.Variable` or :ref:`ndarray`): 267 Weight variable of shape :math:`(M, N)`, 268 where :math:`(N = s_{\\rm n\\_batch\\_axes} * ... * s_n)`. 269 b (:class:`~chainer.Variable` or :ref:`ndarray`): 270 Bias variable (optional) of shape :math:`(M,)`. 271 n_batch_axes (int): The number of batch axes. The default is 1. The 272 input variable is reshaped into 273 (:math:`{\\rm n\\_batch\\_axes} + 1`)-dimensional tensor. 274 This should be greater than 0. 275 276 Returns: 277 ~chainer.Variable: Output variable. A float array with shape 278 of :math:`(s_1, ..., s_{\\rm n\\_batch\\_axes}, M)`. 279 280 .. seealso:: 281 282 :class:`~chainer.links.Linear` to manage the model parameters 283 ``W`` and ``b``. 284 285 .. admonition:: Example 286 287 >>> x = np.random.uniform(0, 1, (3, 4)).astype(np.float32) 288 >>> W = np.random.uniform(0, 1, (5, 4)).astype(np.float32) 289 >>> b = np.random.uniform(0, 1, (5,)).astype(np.float32) 290 >>> y = F.linear(x, W, b) 291 >>> y.shape 292 (3, 5) 293 294 """ 295 if n_batch_axes <= 0: 296 raise ValueError('n_batch_axes should be greater than 0.') 297 if n_batch_axes > 1: 298 batch_shape = x.shape[:n_batch_axes] 299 batch_size = utils.size_of_shape(batch_shape) 300 x = x.reshape(batch_size, -1) 301 elif x.ndim > 2: 302 x = x.reshape(x.shape[0], -1) 303 if b is None: 304 args = x, W 305 else: 306 args = x, W, b 307 308 y, = LinearFunction().apply(args) 309 if n_batch_axes > 1: 310 y = y.reshape(batch_shape + (-1,)) 311 return y 312