1from __future__ import absolute_import
2import copy
3import threading
4import traceback
5import typing as tp  # NOQA
6import warnings
7import weakref
8
9import numpy
10
11import chainer
12from chainer import _backprop
13from chainer import backend
14from chainer.backends import _cpu
15from chainer.backends import cuda
16from chainer.backends import intel64
17from chainer import initializers
18from chainer.initializers import constant
19from chainer import types  # NOQA
20import chainer.utils._collections
21from chainer.utils import argument
22import chainerx
23
24
25_thread_local = threading.local()
26
27
28def _raise_grad_error(exc_type, func, msg):
29    detail = ''
30    if func:
31        detail = 'Function `{0}` ({1}) has a bug.\n'.format(
32            type(func)._impl_name, func.label)
33        stack = func.stack
34        if stack:
35            detail += 'Stacktrace of the function is below:\n'
36            for line in traceback.format_list(func.stack):
37                detail += line
38        detail += '''
39Please report this error to the issue tracker with the stack trace,
40the information of your environment, and your script:
41https://github.com/chainer/chainer/issues/new.
42'''
43
44    raise exc_type(detail + msg)
45
46
47def _check_grad_type(func, x, is_node_x, gx):
48    # is_node_x: equivalent to isinstance(x, VariableNode)
49
50    assert gx is not None
51
52    # x_shape is the raw shape
53
54    # TODO(kataoka): avoid `isinstance`
55    if isinstance(x, _ChainerxVariableNodeProps):
56        x_data = None
57        x_layout = None
58        x_shape = x.shape
59    elif is_node_x:
60        x_data = x._data
61        x_layout = x._layout
62        x_shape = x.shape
63        if x_layout is not None:
64            # to raw shape
65            x_shape = chainer.memory_layouts._transpose_shape(
66                x_shape, None, x_layout)
67    else:
68        # assert isinstance(x, Variable)
69        x_data = x._data[0]
70        x_layout = x._layout
71        x_shape = None if x_data is None else x_data.shape
72
73    # TODO(kataoka): Make _update_data_info store the array module.
74    # ``is_node_x and x_data is None`` implies that the data array is not
75    # retained.
76    # ``not is_node_x and x_data is None`` implies that grad of uninitialized
77    # variable is checked here.
78
79    if x_data is None and not is_node_x:
80        # TODO(kataoka): This should be an error.
81        return
82    if x_layout is None:
83        if x.dtype is None or x.shape is None:
84            # unretained Variable(None)
85            # TODO(kataoka): This should be an error.
86            return
87
88    if not isinstance(gx, chainer.get_array_types()):
89        _raise_grad_error(
90            TypeError,
91            func,
92            ('Type of grad is invalid:\n'
93             + 'Expected: Any of {}\n'.format(chainer.get_array_types())
94             + 'Actual: {}'.format(type(gx))))
95    elif x_data is not None and not chainer.is_arrays_compatible((gx, x_data)):
96        _raise_grad_error(
97            TypeError,
98            func,
99            ('Type of data and grad mismatch\ngrad: %s != data: %s' %
100             (type(gx), type(x_data))))
101    elif gx.dtype != x.dtype:
102        _raise_grad_error(
103            TypeError,
104            func,
105            ('Dtype of data and grad mismatch\ngrad: %s != data: %s' %
106             (gx.dtype, x.dtype)))
107    elif gx.shape != x_shape:  # comparing semantic shapes (not semantic)
108        _raise_grad_error(
109            ValueError,
110            func,
111            ('Shape of data and grad mismatch\ngrad: %s != data: %s' %
112             (gx.shape, x_shape)))
113
114
115def variable_repr(var):
116    """Return the string representation of a variable.
117
118    Args:
119        var (~chainer.Variable): Input Variable.
120    .. seealso:: numpy.array_repr
121    """
122    arr = _cpu._to_cpu(var.array)
123
124    if var.name:
125        prefix = 'variable ' + var.name
126    else:
127        prefix = 'variable'
128
129    if arr is None:
130        lst = 'None'
131    elif arr.size > 0 or arr.shape == (0,):
132        lst = numpy.array2string(arr, None, None, None, ', ', prefix + '(')
133    else:  # show zero-length shape unless it is (0,)
134        lst = '[], shape=%s' % (repr(arr.shape),)
135
136    return '%s(%s)' % (prefix, lst)
137
138
139def variable_str(var):
140    """Return the string representation of a variable.
141
142    Args:
143        var (~chainer.Variable): Input Variable.
144    .. seealso:: numpy.array_str
145    """
146    arr = _cpu._to_cpu(var.array)
147
148    if var.name:
149        prefix = 'variable ' + var.name
150    else:
151        prefix = 'variable'
152
153    if arr is None:
154        lst = 'None'
155    else:
156        lst = numpy.array2string(arr, None, None, None, ' ', prefix + '(')
157
158    return '%s(%s)' % (prefix, lst)
159
160
161class VariableNode(object):
162
163    """Node in the backward computational graph representing a variable.
164
165    This object represents a variable node in a computational graph. The node
166    is used in error backpropagation (a.k.a. backprop) to determine which
167    gradient to be passed to each function.
168
169    A variable node is held by the corresponding :class:`~chainer.Variable`
170    object, which is managed by users. :class:`~chainer.FunctionNode` objects
171    that take the variable as an input also hold references to the variable
172    node.
173
174    Note that the node does not hold a reference to the corresponding data
175    array in general. The data array is actually accessible by the node in the
176    following cases.
177
178    1. If there exists a :class:`~chainer.Variable` object that holds a
179       reference to the variable node, the variable node holds a weak reference
180       to the variable object, and thus the data array is accessible via the
181       weak reference.
182    2. If :meth:`retain_data` is called, the node holds a reference to the data
183       array. It is mainly called by a function that needs the input or output
184       data array in its backprop procedure.
185       See :meth:`FunctionNode.retain_inputs()
186       <chainer.FunctionNode.retain_inputs>`
187       and :meth:`FunctionNode.retain_outputs()
188       <chainer.FunctionNode.retain_outputs>` for more details.
189
190    Users usually do not need to touch this variable node object. The
191    computational graph is automatically managed by Chainer, and any interface
192    that is beneficial for users is also provided by
193    :class:`~chainer.Variable`.
194
195    Args:
196        variable (~chainer.Variable): The corresponding variable object.
197        name (str): Name of the variable node.
198
199    Attributes:
200        dtype: Data type of the data array.
201        shape: Shape of the data array.
202        name (str): Name of the variable node.
203
204    """
205
206    dtype = None
207    shape = None  # semantic shape
208
209    _creator_node = None
210    _data = None  # type: types.NdArray
211    _rank = 0  # type: int
212    # Name of the Function is assigned if this variable is a gradient generated
213    # by an old-style Function
214    _old_style_grad_generator = None  # type: str
215    _layout = None
216
217    def __init__(
218            self,
219            variable: 'Variable',
220            name: tp.Optional[str],
221            **kwargs: tp.Any
222    ) -> None:
223        if kwargs:
224            argument.check_unexpected_kwargs(
225                kwargs,
226                grad='unexpected keyword argument "grad": '
227                     'pass the gradient to Variable instead'
228            )
229        self._variable = weakref.ref(variable)
230        self.name = name
231        self._requires_grad = variable.requires_grad
232        self._layout = variable.layout
233
234        vdata = variable.raw_array
235        self._update_data_info(vdata)
236
237    @property
238    def creator(self):
239        """Function object that created this variable node.
240
241        When the function is implemented with the old-style API (i.e., it uses
242        :class:`~chainer.Function` class),
243        this property returns the :class:`~chainer.Function` object.
244        The object is extracted from the :class:`~chainer.FunctionAdapter`
245        object, so the returned object is not the function node, but instead
246        the actual implementation of forward and backward procedures.
247
248        When the function is implemented with the new-style API (i.e., it uses
249        :class:`~chainer.FunctionNode` class),
250        this property returns the function node
251        object. In this case, the returned object is same as
252        :attr:`creator_node`.
253
254        .. warning::
255
256           As of v3.0.0, when the creator is an old-style function, the
257           following code is invalid:
258
259           .. code-block:: python
260
261              creator = v.creator
262              v.creator = None
263              ...
264              v.creator = creator
265
266           The point is that :class:`~chainer.FunctionNode` objects are used
267           as nodes in the computational graph instead of
268           :class:`~chainer.Function`, and each :class:`~chainer.Function`
269           object only holds a *weak reference* to the corresponding
270           :class:`~chainer.FunctionNode`.
271           Since ``creator`` returns the :class:`~chainer.Function` object,
272           the :class:`~chainer.FunctionNode` object is not kept by preserving
273           ``creator``.
274
275           The above code should be fixed as follows.
276
277           .. code-block:: python
278
279              creator_node = v.creator_node
280              v.creator_node = None
281              ...
282              v.creator_node = creator_node
283
284        """
285        node = self._creator_node
286        if node is None:
287            return None
288
289        if isinstance(node, chainer.function.FunctionAdapter):
290            return node.function
291        return node
292
293    @creator.setter
294    def creator(self, func):
295        self.creator_node = func
296
297    @property
298    def creator_node(self):
299        """Function node that has this variable as an output.
300
301        See :class:`~chainer.FunctionNode` for the definition of a function
302        node.
303
304        """
305        return self._creator_node
306
307    @creator_node.setter
308    def creator_node(self, func):
309        if isinstance(func, chainer.Function):
310            func = func.node
311        self._creator_node = func
312        if func is not None:
313            self._rank = func.rank + 1
314
315    @property
316    def data(self):
317        """Data array of the corresponding variable.
318
319        If the data is not available, it returns ``None``.
320
321        """
322        return self._data
323
324    @data.setter
325    def data(self, d):
326        self._data = d
327        self._update_data_info(d)
328
329    @property
330    def grad(self):
331        """Gradient array of the corresponding variable.
332
333        If the variable is not available, it returns ``None``.
334
335        """
336        var = self._variable()
337        return None if var is None else var.grad
338
339    @property
340    def grad_var(self):
341        """Gradient variable of the corresponding variable.
342
343        If the corresponding variable is not available, it return ``None``.
344
345        """
346        var = self._variable()
347        return None if var is None else var.grad_var
348
349    def _set_grad_var_if_available(self, g):
350        var = self._variable()
351        if var is not None:
352            var._set_grad_var_without_check(g)
353
354    @property
355    def label(self):
356        """Short text that represents the variable node."""
357        if self.shape == ():
358            return str(self.dtype)
359        return '(%s), %s' % (', '.join(map(str, self.shape)),
360                             str(self.dtype))
361
362    @property
363    def rank(self):
364        return self._rank
365
366    @property
367    def requires_grad(self):
368        """It indicates that ``grad`` will be set in backward calculation."""
369        return self._requires_grad
370
371    def get_variable(self):
372        """Returns the corresponding :class:`~chainer.Variable` object.
373
374        VariableNode object holds a weak reference of the variable object. If
375        the reference is alive, it is returned by this property. Otherwise,
376        this property creates a new :class:`~chainer.Variable` object from
377        this node object and returns it.
378
379        Returns:
380            ~chainer.Variable: The variable object that refers this node.
381
382        """
383        var = self._variable()
384        if var is not None:
385            return var
386        var = Variable._init_unchecked(
387            self.data,
388            name=self.name,
389            requires_grad=self.requires_grad,
390            node=self,
391            layout=self._layout)
392        return var
393
394    def get_variable_or_none(self):
395        """Returns the holding :class:`~chainer.Variable` object or ``None``.
396
397        VariableNode object holds a weak reference of the variable object.If
398        the reference is alive, it is returned by this property. Otherwise,
399        returns ``None``.
400
401        Returns:
402            ~chainer.Variable: The variable object that refers this node.
403
404        """
405        return self._variable()
406
407    def set_creator(self, creator):
408        """Sets a :class:`~chainer.Function` object that created this node.
409
410        This method is equivalent to ``self.creator = creator``. A
411        :class:`~chainer.FunctionNode` object can also be passed.
412
413        Args:
414            creator (Function or FunctionNode): Function that has created this
415                variable.
416
417        """
418        self.creator = creator
419
420    def set_creator_node(self, creator_node):
421        """Sets a :class:`~chainer.FunctionNode` object that created this node.
422
423        This method is equivalent to ``self.creator_node = creator_node``. A
424        :class:`~chainer.Function` object can also be passed, in which case the
425        :attr:`Function.node <chainer.Function.node>` attribute is used.
426
427        Args:
428            creator_node (FunctionNode or Function): Function node that has
429                this variable as an output.
430
431        """
432        self.creator_node = creator_node
433
434    def unchain(self):
435        """Deletes the reference to the creator of this variable node.
436
437        This method is equivalent to ``self.creator_node = None``.
438
439        """
440        self.creator_node = None
441
442    def retain_data(self):
443        """Lets the node hold a reference to the underlying data array.
444
445        This method gets the data array of the corresponding variable and keeps
446        it. If the weak reference to the corresponding variable is dead, it
447        raises an error.
448
449        """
450        variable = self._variable()
451        if variable is not None:
452            self.data = variable.data
453        else:
454            raise RuntimeError('cannot retain variable data: the variable has '
455                               'been already released')
456
457    def _update_data_info(self, d):
458        # d is a raw array (with raw shape)
459        if d is None:
460            self.dtype = None
461            self.shape = None
462        else:
463            self.dtype = d.dtype
464
465            if self._layout is None:
466                self.shape = d.shape
467            else:
468                self.shape = chainer.memory_layouts._transpose_shape(
469                    d.shape, self._layout, None)
470
471        # If the node has a reference to data, update it as well.
472        if self._data is not None:
473            self._data = d
474
475    def _check_old_style_gradient(self):
476        if self._old_style_grad_generator is not None:
477            raise RuntimeError(
478                'cannot twice-differentiate an old style Function "%s"' %
479                self._old_style_grad_generator)
480
481
482def _create_variable(data, name, grad, requires_grad, device):
483    var = Variable(
484        data, name=name, grad=grad, requires_grad=requires_grad)
485    var.to_device(device)
486    return var
487
488
489class Variable(object):
490
491    """__init__(data=None, *, name=None, grad=None, requires_grad=True)
492
493    Array with a structure to keep track of computation.
494
495    Every variable holds a data array of type either :class:`numpy.ndarray` or
496    :class:`cupy.ndarray`.
497
498    A variable object holds a data array and a
499    :class:`~chainer.variable.VariableNode` object of
500    a computational graph. If the variable is constructed by the user, the node
501    is *root* and does not hold any parent. If the variable is constructed by a
502    :class:`~chainer.FunctionNode` object (i.e., by calling functions under
503    ``chainer.functions`` or user-defined functions), or by using operators
504    (see the list below), the node holds a reference to its parent called
505    :attr:`creator_node`.
506    This reference is used in backpropagation to backtrack the graph.
507
508    Users can disable (resp. enable) this chaining behavior by calling
509    :func:`~chainer.no_backprop_mode` (resp.
510    :func:`~chainer.force_backprop_mode`).
511    In the former context, a variable never creates a computational graph,
512    whereas in the latter context, it is forced to create.
513
514    .. note::
515
516        The following operators are defined for variable(s).
517
518        * Indexing: ``a[slices]`` (:meth:`__getitem__`)
519        * Addition: ``a + b`` (:meth:`__add__`, :meth:`__radd__`)
520        * Subtraction: ``a - b`` (:meth:`__sub__`, :meth:`__rsub__`)
521        * Multiplication: ``a * b`` (:meth:`__mul__`, :meth:`__rmul__`)
522        * Division: ``a / b`` (:meth:`__div__`, :meth:`__rdiv__`, \
523                               :meth:`__truediv__`, :meth:`__rtruediv__`)
524        * Floor Division: ``a // b`` (:meth:`__floordiv__`, \
525                                      :meth:`__rfloordiv__`)
526        * Exponentiation: ``a ** b`` (:meth:`__pow__`, :meth:`__rpow__`)
527        * Matrix Multiplication: ``a @ b`` (:meth:`__matmul__`, \
528                                            :meth:`__rmatmul__`)
529        * Negation (Arithmetic): ``- a`` (:meth:`__neg__`)
530        * Absolute value: ``abs(a)`` (:meth:`__abs__`)
531
532    Args:
533        data (:ref:`ndarray`): Initial data array.
534        name (str): Name of the variable.
535        grad (:ref:`ndarray`): Initial gradient array.
536        requires_grad (bool): Boolean indicating whether ``grad`` will be set
537            in backward calculation.
538
539    """
540
541    # Cached value of `self.xp is chainerx`. It prevents from initializing
542    # self._device as much as possible because it is really costly.
543    _has_chainerx_array = False
544
545    # Cached grad-stopped view of chainerx array. This is the return value
546    # of `array` and `data` properties.
547    _chainerx_nobp_array_cache = None
548
549    # Cached grad-stopped view of the array returned by `grad` property.
550    # It's a 2-element tuple, where the first is the original grad array and
551    # the second is a grad-stopped view of the first. `grad` property returns
552    # the second element.
553    _chainerx_grad_cache = None
554
555    _chainerx_name = None  # type: tp.Optional[str]
556
557    # A NumPy, CuPy array cache to avoid redundant conversions between
558    # NumPy/CuPy and ChainerX.
559    # TODO(hvy): Avoid modifying this variable from outside this class.
560    _chainerx_fallback_array = None
561
562    # Used in non-ChainerX variables. The gradient array is stored in
563    # this attribute on Variable.grad setter to delay creation of grad_var
564    # instance.
565    _grad = None
566
567    _layout = None
568
569    def as_layout(self, layout):
570        src_layout = self._layout
571        if src_layout == layout:
572            return self
573
574        y, = chainer.memory_layouts.AsLayout(layout).apply((self,))
575        return y
576
577    def __init__(
578            self,
579            data: tp.Optional[types.NdArray] = None,
580            **kwargs: tp.Any
581    ) -> None:
582        name, grad, requires_grad, grad_valid, layout = argument.parse_kwargs(
583            kwargs, ('name', None), ('grad', None), ('requires_grad', True),
584            ('_grad_valid', True), ('layout', None),
585            volatile='volatile argument is not supported anymore. '
586                     'Use chainer.using_config')
587        # _grad_valid is for internal use, hence the prefix _.
588
589        assert isinstance(requires_grad, bool)
590        if data is not None:
591            array_types = chainer.get_array_types()
592            if not isinstance(data, array_types):
593                msg = '{} or {} are expected. Actual: {}'.format(
594                    ', '.join([str(at) for at in array_types[:-1]]),
595                    array_types[-1], type(data))
596                raise TypeError(msg)
597
598        self._init_impl(
599            data, None, name, grad, grad_valid, requires_grad, None, None,
600            layout)
601
602    @staticmethod
603    def _init_unchecked(
604            data=None, device=None, name=None, grad=None, grad_valid=True,
605            requires_grad=True, is_chainerx_array=None, node=None,
606            layout=None):
607        """Creates a new :class:`Variable` without the validations for
608        optimizing performance.
609        """
610
611        # Create a Variable without invoking __init__
612        var = Variable.__new__(Variable)
613        var._init_impl(
614            data, device, name, grad, grad_valid, requires_grad,
615            is_chainerx_array, node, layout)
616        return var
617
618    def _init_impl(self, data, device, name, grad, grad_valid, requires_grad,
619                   is_chainerx_array, node, layout):
620        # `device` must be of type chainer.backend.Device.
621        # Check is skipped for performance.
622
623        self._requires_grad = requires_grad  # type: bool
624        self._loss_scale = None
625        self._grad_var = None
626        self._device = device
627        # A flag to prevent grad from being used before calling cleargrad().
628        # It becomes True when either
629        # - cleargrad() is called, or
630        # - zerograd() is called, or
631        # - grad is set.
632        # Note that it won't be True by merely initializing an uninitialized
633        # Parameter.
634        self._grad_valid = grad_valid
635        self._layout = layout
636
637        if is_chainerx_array is None:
638            is_chainerx_array = isinstance(data, chainerx.ndarray)
639
640        if is_chainerx_array:
641            if not requires_grad and grad is not None:
642                raise ValueError(
643                    'Cannot initialize a variable with gradients if the '
644                    'require_grad argument is False.')
645            self._set_chainerx_array(data, grad)  # type: ignore
646
647            # ChainerX itself has own node objects, but not exposed to python.
648            self._node = None  # type: tp.Optional[VariableNode]
649            self._chainerx_name = name
650        else:
651            # Use a list as a data structure to hold the data array indirectly
652            # to abstract its initialized/uninitialized state.
653            self._data = [data]  # type: tp.List[tp.Optional[types.NdArray]]
654            if node is None:
655                self._node = VariableNode(self, name)
656            else:
657                self._node = node
658            self._grad = grad
659
660    def __copy__(self):
661        return self._copy_to(Variable())
662
663    def _copy_to(self, target):
664        target.__dict__ = copy.copy(self.__dict__)
665        target._node = VariableNode(target, self.name)
666        return target
667
668    def __reduce__(self):
669        args = (
670            self.array, self.name, self.grad, self._requires_grad, self.device)
671        return _create_variable, args
672
673    def __repr__(self):
674        return variable_repr(self)
675
676    def __str__(self):
677        return variable_str(self)
678
679    def _clear_chainerx(self):
680        self._chainerx_nobp_array_cache = None
681        self._chainerx_grad_cache = None
682        self._chainerx_fallback_array = None
683
684    def _ensure_grad_var_up_to_date(self):
685        # For non-ChainerX, this method creates _grad_var if it's not yet
686        # created and _grad is set.
687        # For ChainerX, this method checks consistency between
688        # _grad_var._data[0] and self._data[0].grad and recreates _grad_var
689        # as necessary. (chainerx.ndarray.grad can be altered independently
690        # from chainer)
691        if self._has_chainerx_array:
692            self._grad = None
693            # Update gradient variable if it has not yet been initialized or
694            # it happens to be dirty w.r.t. the actual gradient of the
695            # underlying chainerx.ndarray.
696            arr = self._data[0]
697            actual_grad = (
698                arr.grad
699                if arr is not None and arr.is_grad_required()
700                else None)
701            if actual_grad is None:
702                self._grad_var = None
703            else:
704                grad_var = self._grad_var
705                old_grad = None if grad_var is None else grad_var._data[0]
706                if actual_grad is not old_grad:
707                    self._grad_var = Variable(
708                        actual_grad,
709                        requires_grad=actual_grad.is_backprop_required(),
710                        layout=self._layout)
711            return
712
713        if self._grad_var is None:
714            if self._grad is not None:
715                self._grad_var = Variable(self._grad, layout=self._layout)
716
717    def _set_chainerx_array(
718            self,
719            array: tp.Optional['chainerx.ndarray'],
720            grad: tp.Optional['chainerx.ndarray']
721    ) -> None:
722
723        # Sets chainerx array and grad.
724        assert array is None or isinstance(array, chainerx.ndarray)
725        requires_grad = self._requires_grad
726
727        self._grad = None
728
729        if (not requires_grad
730                and array is not None
731                and array.is_backprop_required()):
732            raise ValueError(
733                'Cannot initialize a variable to not require '
734                'gradients if the ChainerX array already requires '
735                'backprop.')
736
737        # Create a view of the given data to hold internally and modify.
738        if array is None:
739            self._data = [None]
740        else:
741            # If the array `array` is not connected to a graph, a view of it is
742            # created and kept, in order not to change the no-graph status of
743            # it. If the array is connected, the graph status is kept track of.
744            if not array.is_backprop_required():
745                array = array.view()
746            if requires_grad:
747                array.require_grad()
748                if grad is not None:
749                    array.set_grad(grad)
750            self._data = [array]
751
752        self._has_chainerx_array = True  # even if data is None
753        self._chainerx_nobp_array_cache = None
754        self._chainerx_grad_cache = None
755        self._chainerx_fallback_array = None
756
757    @property
758    def device(self):
759        """Device on which the data array of this variable reside."""
760        # lazy initialization for performance
761        if self._device is None:
762            if self._data[0] is None:
763                self._device = backend.CpuDevice()
764            else:
765                self._device = backend.get_device_from_array(self._data[0])
766        return self._device
767
768    @property
769    def xp(self) -> tp.Optional[types.Xp]:
770        """Array module for the data array of this variable."""
771        if self._has_chainerx_array:
772            return chainerx
773        else:
774            device = self.device
775            return None if device is None else device.xp
776
777    @property
778    def name(self):
779        if self._has_chainerx_array:
780            return self._chainerx_name
781        return self._node.name
782
783    @name.setter
784    def name(self, n):
785        if self._has_chainerx_array:
786            self._chainerx_name = n
787            return
788        self._node.name = n
789
790    def summary(self):
791        if self.name:
792            return '<variable %s>' % self.name
793        else:
794            return '<variable at 0x%x>' % id(self)
795
796    def debug_print(self):
797        """Display a summary of the stored data and location of the Variable"""
798
799        msg = """{summary}
800- device: {device}
801- backend: {backend}
802- shape: {shape}
803- dtype: {dtype}
804- statistics: {stats}
805- grad: {grad}"""
806
807        stats_msg = 'mean={0:.8f}, std={1:.8f}'
808
809        array = self.array
810        device = self.device
811        with chainer.using_device(device):
812            xp = device.xp
813
814            if array is None:
815                # `array` can be `None` if constructed without any arguments
816                device = None
817                backend = None
818                stats = None
819            else:
820                device = getattr(array, 'device', 'CPU')
821                backend = type(array)
822                stats = stats_msg.format(float(xp.mean(array)),
823                                         float(xp.std(array)))
824            shape = getattr(array, 'shape', None)
825            dtype = getattr(array, 'dtype', None)
826
827            if self.grad is None:
828                grad = None
829            elif xp.all(self.grad == 0):
830                grad = 0
831            else:
832                grad = stats_msg.format(float(xp.mean(self.grad)),
833                                        float(xp.std(self.grad)))
834
835        return msg.format(summary=self.summary(), device=device,
836                          backend=backend, shape=shape, dtype=dtype,
837                          stats=stats, grad=grad)
838
839    def __pos__(self):
840        return self
841
842    def __len__(self):
843        """Returns the first dimension of the data array.
844
845        Returns:
846            int: Number of the first dimension of the data array.
847
848        """
849        return len(self.array)
850
851    @property
852    def label(self):
853        """Short text that represents the variable."""
854        if self._has_chainerx_array:
855            raise RuntimeError(
856                'A variable of ChainerX does not provide a node label.')
857        return self._node.label
858
859    @property
860    def creator(self):
861        """Function implementation that created this variable.
862
863        When this variable has been created by an old-style function (i.e., it
864        is implemented as a subclass of :class:`Function`), this property
865        returns that :class:`Function` object.
866
867        When this variable has been created by a new-style function (i.e., it
868        is implemented as a subclass of :class:`FunctionNode` class), this
869        property returns that node object.
870
871        """
872        if self._has_chainerx_array:
873            raise RuntimeError(
874                'A variable of ChainerX does not provide a creator.')
875        return self._node.creator
876
877    @creator.setter
878    def creator(self, func):
879        if self._has_chainerx_array:
880            raise RuntimeError(
881                'A variable of ChainerX does not provide a creator.')
882        self._node.creator = func
883
884    @property
885    def creator_node(self):
886        """:class:`FunctionNode` object that created this variable.
887
888        This property has a setter to which ``None`` can be set. Setting
889        ``None`` to this property is equivalent to call :meth:`unchain`;
890        it purges the variable from the function that created this variable.
891
892        The setter also accepts the original :class:`FunctionNode` object that
893        created this variable. For example, you can once set ``None`` to this
894        property and then set the original value again.
895
896        .. note::
897           Setting an irrelevant :meth:`FunctionNode` object does not emit any
898           error immediately, whereas the behavior is undefined. Do not set
899           a :meth:`FunctionNode` object that did not create this variable
900           object.
901
902        """
903        if self._has_chainerx_array:
904            raise RuntimeError(
905                'A variable of ChainerX does not provide a creator_node.')
906        return self._node._creator_node
907
908    @creator_node.setter
909    def creator_node(self, func):
910        if self._has_chainerx_array:
911            raise RuntimeError(
912                'A variable of ChainerX does not provide a creator_node.')
913        self._node.creator_node = func
914
915    @property
916    def array(self) -> tp.Optional[types.NdArray]:
917        """The underlying data array.
918
919        It is either :class:`numpy.ndarray` or :class:`cupy.ndarray` object,
920        or ``None`` if the variable in in an uninitialized state.
921
922        """
923        return self._get_array()
924
925    def _get_array(self):
926        if (self._layout is not None
927                and not (
928                    _allow_array_access_with_nonstandard_layout())):
929            raise RuntimeError(
930                'Cannot directly retrieve the underlying array from a '
931                'variable with non-standard layout.')
932        return self.raw_array
933
934    @property
935    def raw_array(self):
936        """The underlying raw data array.
937
938        Its shape does not have to be the semantic shape, if the memory layout
939        is non-standard.
940        """
941        # For ChainerX, this property always returns a grad-stopped view.
942        # The view is cached to reduce potential overhead.
943        if self._has_chainerx_array:
944            if (self._chainerx_nobp_array_cache is None
945                    and self._data[0] is not None):
946                self._chainerx_nobp_array_cache = (
947                    self._data[0].as_grad_stopped())  # type: ignore
948            return self._chainerx_nobp_array_cache
949
950        return self._data[0]
951
952    @array.setter
953    def array(self, d: tp.Optional[types.NdArray]) -> None:
954        self._set_array(d)
955
956    def _set_array(self, d, *, layout_check=True):
957        if (layout_check
958                and self._layout is not None
959                and not (
960                    _allow_array_access_with_nonstandard_layout())):
961            raise RuntimeError(
962                'Cannot directly set the underlying array of a variable with '
963                'non-standard layout.')
964        if self._has_chainerx_array:
965            d_old = self._data[0]
966            if (d_old is not None
967                    and (d_old.is_backprop_required()  # type: ignore
968                         or d.is_backprop_required())):  # type: ignore
969                raise ValueError(
970                    'Cannot update the array of a Variable if either the '
971                    'existing or the new array requires backprop.')
972
973            self._set_chainerx_array(d, None)  # type: ignore
974        else:
975            self._node._update_data_info(d)  # type: ignore # _node doesn't have value when xp is chainerx # NOQA
976            self._data[0] = d
977            self._has_chainerx_array = False
978
979    @property
980    def chx_array(self):
981        """A view of the raw ChainerX array.
982
983        In contrary to :data:`Variable.array` which is always disconnected,
984        the array represented by this attribute may be connected to the
985        computational graph.
986
987        It is a view, so it has a distinct gradient from the original array.
988
989        If this attribute is queried on a :class:`Variable` with a non-ChainerX
990        array, :class:`ValueError` will be raised.
991        """
992        if not self._has_chainerx_array:
993            raise ValueError(
994                'chx_array is not available for Variable with '
995                'non-ChainerX array.')
996        return self._data[0].view()
997
998    @property
999    def data(self) -> tp.Optional[types.NdArray]:
1000        """The underlying data array (equivalent to :attr:`array`).
1001
1002        Note that using this attribute directly is discouraged; use
1003        :attr:`array` instead. Using :attr:`array`, you can find an error
1004        earlier when your code mixes up Variable and ndarray because
1005        ndarray does not have an attribute ``.array`` while it has
1006        ``.data``.
1007
1008        """
1009        return self.array
1010
1011    @data.setter
1012    def data(self, d: types.NdArray) -> None:
1013        self.array = d
1014
1015    @property
1016    def layout(self):
1017        return self._layout
1018
1019    def _set_chainerx_grad(self, g, from_grad_var):
1020        # Assigns chainerx.ndarray.grad.
1021        #
1022        # If the main array is connected to the graph, in order to enable
1023        # double-backprop, the grad will also be backprop-required
1024        # (a view is created not to affect the given grad).
1025        # If the given grad is from a grad_var, this operation is skipped,
1026        # as the status of the given grad reflects the necessity of
1027        # double-backprop.
1028        assert self.xp is chainerx
1029        if not self._requires_grad and g is not None:
1030            raise RuntimeError(
1031                'Cannot set the gradient of a variable that is flagged to not '
1032                'require one.')
1033        arr = self._data[0]
1034        if arr is None:
1035            if g is not None:
1036                raise RuntimeError(
1037                    'Cannot set a gradient to an empty variable')
1038        elif arr.is_backprop_required():
1039            # If g is grad-stopped, require grad on it.
1040            # Make a view in order not to affect the input.
1041            if (g is not None
1042                    and not from_grad_var
1043                    and not g.is_backprop_required()):
1044                g = g.view().require_grad()
1045            arr.set_grad(g)
1046
1047    def _set_grad_without_check(self, g):
1048        if self._has_chainerx_array:
1049            self._set_chainerx_grad(g, False)
1050            self._grad_var = None
1051            self._grad_valid = True
1052            return
1053
1054        self._grad = g
1055        self._grad_var = None
1056        self._grad_valid = True
1057
1058    @property
1059    def grad(self) -> tp.Optional[types.NdArray]:
1060        """Gradient array of this variable.
1061
1062        Note that this property returns the underlying array of the gradient
1063        variable instead of the gradient variable itself; to get/set
1064        gradient variable, use :attr:`grad_var` instead.
1065
1066        If the underlying array is a :class:`chainerx.ndarray` and
1067        requires_grad is false, trying to access the gradient will results in
1068        and error.
1069
1070        """
1071        return self._get_grad()
1072
1073    def _get_grad(self):
1074        if (self._layout is not None
1075                and not (
1076                    _thread_local.allow_array_access_with_nonstandard_layout)):
1077            raise RuntimeError(
1078                'Cannot directly retrieve the gradient array of a '
1079                'variable with non-standard layout.')
1080        if not self._grad_valid:
1081            raise RuntimeError(
1082                'Cannot retrieve Variable.grad. '
1083                'Either it must be set manually or Variable.cleargrad() '
1084                'must be called beforehand.')
1085
1086        if self._has_chainerx_array:
1087            arr = self._data[0]
1088            if arr is None or not arr.is_backprop_required():
1089                self._chainerx_grad_cache = None
1090                return None
1091
1092            actual_grad = arr.grad
1093
1094            if actual_grad is None:
1095                self._chainerx_grad_cache = None
1096                return None
1097
1098            # If grad is cached and the actual grad has not changed, return
1099            # the cache.
1100            if self._chainerx_grad_cache is not None:
1101                orig_grad, grad_stopped_grad = self._chainerx_grad_cache
1102                if orig_grad is actual_grad:
1103                    return grad_stopped_grad
1104
1105            # Update the cache
1106            grad_stopped_grad = actual_grad.as_grad_stopped()
1107            self._chainerx_grad_cache = (actual_grad, grad_stopped_grad)
1108
1109            return grad_stopped_grad
1110
1111        if self._grad_var is not None:
1112            return self._grad_var.array
1113        return self._grad
1114
1115    @grad.setter
1116    def grad(self, g: tp.Optional[types.NdArray]) -> None:
1117        self._set_grad(g)
1118
1119    def _set_grad(self, g, *, layout_check=True):
1120        if (layout_check
1121                and self._layout is not None
1122                and not (
1123                    _allow_array_access_with_nonstandard_layout())):
1124            raise RuntimeError(
1125                'Cannot directly set the gradient array of a '
1126                'variable with non-standard layout.')
1127        if g is not None:
1128            _check_grad_type(None, self, False, g)
1129        self._set_grad_without_check(g)
1130
1131    def _set_grad_var_without_check(self, gv):
1132        if self._has_chainerx_array:
1133            self._set_chainerx_grad(
1134                None if gv is None else gv._data[0],
1135                True)
1136            self._grad_var = gv
1137            return
1138
1139        self._grad_var = gv
1140        self._grad = None if gv is None else gv.array
1141
1142    @property
1143    def grad_var(self) -> tp.Optional['Variable']:
1144        """Gradient variable."""
1145        self._ensure_grad_var_up_to_date()
1146        return self._grad_var
1147
1148    @grad_var.setter
1149    def grad_var(self, g: tp.Optional['Variable']) -> None:
1150        if g is not None:
1151            _check_grad_type(None, self, False, g.array)
1152        self._set_grad_var_without_check(g)
1153
1154    @property
1155    def shape(self):
1156        raw_shape = self._data[0].shape
1157        if self._layout is not None:
1158            # Convert to semantic shape
1159            return chainer.memory_layouts._transpose_shape(
1160                raw_shape, self._layout, None)
1161        return raw_shape
1162
1163    @property
1164    def ndim(self):
1165        return self._data[0].ndim
1166
1167    @property
1168    def size(self):
1169        return self._data[0].size
1170
1171    @property
1172    def dtype(self):
1173        return self._data[0].dtype
1174
1175    @property
1176    def rank(self):
1177        if self._has_chainerx_array:
1178            raise RuntimeError(
1179                'A variable of ChainerX does not provide a node rank.')
1180        return self._node.rank
1181
1182    @property
1183    def node(self):
1184        if self._has_chainerx_array:
1185            raise RuntimeError(
1186                'A variable of ChainerX does not provide a node.')
1187        return self._node
1188
1189    @property
1190    def requires_grad(self):
1191        """It indicates that ``grad`` will be set in backward calculation."""
1192        return self._requires_grad
1193
1194    @property
1195    def T(self):
1196        """Transposition of this variable."""
1197        return chainer.functions.transpose(self)
1198
1199    def to_cpu(self):
1200        """Copies the data and gradient arrays to CPU."""
1201        self.to_device(backend.CpuDevice())
1202
1203    def to_gpu(self, device=None):
1204        """Copies the data and gradient arrays to specified GPU.
1205
1206        Args:
1207            device: Target device specifier. If omitted, the current device is
1208                used.
1209
1210        """
1211        cuda.check_cuda_available()
1212        self.to_device(cuda._get_device_or_current(device))
1213
1214    def to_intel64(self):
1215        """Copies the data and gradient arrays to intel64 specific mdarray.
1216
1217        If the array is not suited for intel64, it will be converted to
1218        :class:`numpy.ndarray`.
1219        """
1220        intel64.check_ideep_available()
1221        self.to_device(intel64.Intel64Device())
1222
1223    def to_chx(self):
1224        """Converts the array and gradient to ChainerX arrays without copy.
1225
1226        This method converts the underlying array and gradient to
1227        :class:`chainerx.ndarray` on the same physical device. It does nothing
1228        if the array held by the Variable object is already a ChainerX array.
1229        The new array is a view of the original one.
1230
1231        """
1232        self._to_chx(allow_unchaining=False)
1233
1234    def _to_chx(self, allow_unchaining):
1235        if not chainerx.is_available():
1236            raise RuntimeError('ChainerX is not available.')
1237
1238        if self._has_chainerx_array:
1239            return
1240
1241        if not allow_unchaining and self.creator is not None:
1242            raise RuntimeError(
1243                'A variable with a creator cannot be converted into ChainerX '
1244                'array')
1245
1246        self._to_device(
1247            backend.ChainerxDevice.from_fallback_device(self.device),
1248            allow_unchaining)
1249
1250    def from_chx(self):
1251        """Converts the array and gradient to non-ChainerX arrays without copy.
1252
1253        This method converts the underlying ChainerX array and gradient
1254        residing in either a ``native`` or ``cuda`` device to NumPy or CuPy
1255        arrays respectively, on their same physical device. It does nothing
1256        if the array held by the Variable object is not a ChainerX array. The
1257        new array is a view of the original one.
1258
1259        Raises an error if such a conversion is not supported for the device.
1260
1261        """
1262        self._from_chx(allow_unchaining=False)
1263
1264    def _from_chx(self, allow_unchaining):
1265        if not self._has_chainerx_array:
1266            return
1267
1268        if not allow_unchaining and self._data[0].is_backprop_required():
1269            raise RuntimeError(
1270                'Cannot convert from a Variable with a ChainerX array that is '
1271                'connected to a graph.')
1272
1273        self.to_device(self.device.fallback_device)
1274
1275    def to_device(self, device):
1276        """Copies the data and gradient arrays to specified device.
1277
1278        Args:
1279            device: Target device specifier. See
1280                :func:`~chainer.get_device` for available values.
1281
1282        """
1283        self._to_device(device, allow_unchaining=False)
1284
1285    def _to_device(self, device, allow_unchaining):
1286        device = chainer.get_device(device)
1287
1288        was_chainerx = self._has_chainerx_array
1289        is_chainerx = device.xp is chainerx
1290
1291        if not allow_unchaining:
1292            if was_chainerx and not is_chainerx:
1293                chx_arr = self._data[0]
1294                if chx_arr is not None and chx_arr.is_backprop_required():
1295                    raise RuntimeError(
1296                        'A variable of a ChainerX array which requires '
1297                        'gradients cannot be copied into non-chainerx device '
1298                        '({}).'.format(device))
1299            elif not was_chainerx and is_chainerx:
1300                arr = self._data[0]
1301                if arr is not None and self.creator is not None:
1302                    raise RuntimeError(
1303                        'A variable of a non-ChainerX array which is '
1304                        'connected to a graph cannot be copied to a ChainerX '
1305                        'device ({}).'.format(device))
1306
1307        arr = self._data[0]
1308        grad_var = self.grad_var
1309
1310        if was_chainerx and not is_chainerx:
1311            self._clear_chainerx()
1312            self._node = VariableNode(self, self._chainerx_name)
1313        elif not was_chainerx and is_chainerx:
1314            self._chainerx_name = self._node.name
1315
1316        self._device = device
1317        self._has_chainerx_array = is_chainerx
1318
1319        if arr is None:
1320            return
1321
1322        if backend.get_device_from_array(arr) == device:
1323            return
1324
1325        new_arr = device.send(arr)
1326        if is_chainerx:
1327            if grad_var is None:
1328                new_grad = None
1329            else:
1330                new_grad = device.send(grad_var._data[0])
1331            self._set_chainerx_array(new_arr, new_grad)
1332        else:
1333            self._data = [new_arr]
1334            if grad_var is not None:
1335                grad_var._to_device(device, allow_unchaining=allow_unchaining)
1336                # _grad has been invalidated by the line above.
1337                self._grad = grad_var.raw_array
1338
1339        # ensure that the node tracks the device migration
1340        node = self._node
1341        if is_chainerx:
1342            # ChainerX itself has own node objects,
1343            # ensure that the node is disconnected with this variable.
1344            if node is not None:
1345                # Disconnect by replacing with an alternative of dead weakref
1346                node._variable = lambda: None
1347                self._node = None
1348        else:
1349            if node._data is not None:
1350                node.retain_data()
1351
1352    def cleargrad(self):
1353        """Clears the gradient array."""
1354        self.grad_var = None
1355        self._grad_valid = True
1356
1357    def zerograd(self):
1358        """Initializes the gradient array by zeros.
1359
1360
1361        Note that the gradient variable is unchained from the computational
1362        graph by this method, because this operation breaks the backprop
1363        validity.
1364
1365        .. deprecated:: v1.15
1366           Use more efficient  :meth:`cleargrads` instead.
1367
1368        """
1369        warnings.warn(
1370            'Variable.zerograd is deprecated. Use Variable.cleargrad instead.',
1371            DeprecationWarning)
1372
1373        arr = self.array
1374        if arr is None:
1375            self._grad_valid = True
1376            return
1377
1378        if self._has_chainerx_array:
1379            gv = self.grad_var
1380            if gv is None:
1381                self.grad = chainerx.zeros_like(
1382                    arr, device=self.device.device)
1383            else:
1384                gv._data[0].fill(0)
1385        else:
1386            with chainer.using_device(self.device):
1387                xp = self.device.xp
1388                if self._grad is None:
1389                    self._grad = xp.zeros_like(arr)
1390                    self._grad_var = None
1391                else:
1392                    gv = self._grad_var
1393                    if gv is not None:
1394                        gv.unchain()
1395                    self._grad.fill(0)
1396        self._grad_valid = True
1397
1398    def copydata(self, var):
1399        """Copies the data array from given source variable.
1400
1401        This method copies the data array from given variable to this variable.
1402        The copy is done even if the arrays reside on different devices,
1403        including across the host and a GPU device. If this variable has an
1404        uninitialized data array, this method initializes it by the data array
1405        of the given variable. Similarly, if the given variable has an
1406        uninitialized data array, this method initializes it by the data array
1407        of this variable (``self``). If both are uninitialized, this method
1408        does nothing.
1409
1410        Args:
1411            var (~chainer.Variable): Source variable.
1412
1413        """
1414        src = var.array
1415        dst = self.array
1416        if src is None:
1417            if dst is None:
1418                return
1419            var.initialize(self.shape)
1420            src = var.array
1421        elif dst is None:
1422            self.initialize(src.shape)
1423            dst = self.array
1424        backend.copyto(dst, src)
1425
1426    def addgrad(self, var):
1427        """Accumulates the gradient array from given source variable.
1428
1429        This method adds the gradient of a given variable to the gradient of
1430        this variable. The accumulation is even done across the host and
1431        different devices. If this variable has uninitialized data/grad arrays,
1432        this method initializes it with the shape of the given variable and
1433        then accumulates the gradient.
1434
1435        Args:
1436            var (~chainer.Variable): Source variable.
1437
1438        """
1439        dst_device = self.device
1440        is_chainerx = dst_device.xp is chainerx
1441
1442        if is_chainerx != (var.device.xp is chainerx):
1443            raise RuntimeError(
1444                'Variable.addgrad does not support addition between '
1445                'gradients on non-ChainerX and ChainerX devices.\n'
1446                'Adding gradient to: {}\n'
1447                'Adding gradient from: {}'.format(
1448                    dst_device, var.device))
1449
1450        if var.grad is None:
1451            return
1452
1453        src = var.grad_var
1454
1455        if self.array is None:
1456            self.initialize(var.shape)
1457
1458        dst = self.grad_var
1459        src_device = src.device
1460        if src_device != dst_device:
1461            src = chainer.functions.copy(src, dst_device)
1462        self.grad_var = src if dst is None else src + dst
1463
1464    def set_creator(self, gen_func):
1465        """Notifies the variable that the given function is its creator.
1466
1467        Args:
1468            gen_func (Function): Function object that creates this variable as
1469                one of its outputs.
1470
1471        """
1472        if self._has_chainerx_array:
1473            raise RuntimeError(
1474                'A variable of ChainerX does not provide a creator.')
1475        self._node.set_creator(gen_func)
1476
1477    def set_creator_node(self, fnode):
1478        """Notifies the variable that the given node is its creator.
1479
1480        Args:
1481            fnode (FunctionNode): Function node that has this variable as an
1482                output.
1483
1484        """
1485        if self._has_chainerx_array:
1486            raise RuntimeError(
1487                'A variable of ChainerX does not provide a creator node.')
1488        self._node.set_creator_node(fnode)
1489
1490    def backward(self, retain_grad=False, enable_double_backprop=False,
1491                 loss_scale=None):
1492        """Runs error backpropagation (a.k.a.\\  backprop) from this variable.
1493
1494        On backprop,
1495        :meth:`FunctionNode.backward() <chainer.FunctionNode.backward>`
1496        is called on each :class:`~chainer.FunctionNode` object appearing in
1497        the backward graph starting from this variable.
1498        The backward graph is represented by backward
1499        references from variable nodes to their creators, and from function
1500        nodes to their input variable nodes. The backprop stops at all root
1501        nodes. Some function nodes set ``None`` as gradients of some inputs,
1502        where further backprop does not take place at such inputs.
1503
1504        This method uses :data:`grad` as the initial error array. User can
1505        manually set a gradient array before calling this method.
1506        If the shape of :data:`data` is ``()`` (i.e., it is scalar) and
1507        :data:`grad` is ``None``, then this method automatically complements
1508        1.0 as the initial error. This is useful on starting backprop from
1509        some scalar loss value.
1510
1511        From v3, this method supports *differentiable backprop* (a.k.a. double
1512        backprop, grad of grads). To enable it, pass
1513        ``enable_double_backprop=True``.
1514
1515        Args:
1516            retain_grad (bool): If ``True``, the gradient arrays of all
1517                intermediate variables are kept.
1518                Otherwise, :data:`~chainer.Variable.grad` of the
1519                intermediate variables are set to ``None`` on appropriate
1520                timing, which may reduce the maximum memory consumption.
1521
1522                In most cases of training some models, the purpose of backprop
1523                is to compute gradients of parameters, not of all variables,
1524                and therefore it is recommended that this flag be set to
1525                ``False``.
1526            enable_double_backprop (bool): *(Added in v3.0)* If ``True``,
1527                computational trace of the whole backpropagation procedure is
1528                recorded to the computational graph so that one can further do
1529                backpropagation from the resulting gradients. Note that
1530                enabling it results in larger memory consumption needed to
1531                store the gradients w.r.t intermediate variables that are
1532                required for the second gradient computation.
1533            loss_scale (float): Loss scaling factor. Loss scaling is a useful
1534                technique to mitigate vanishing gradient issue that tends to
1535                happen when low precision data type like float16 is used during
1536                training. If you set loss scaling factor, gradients of loss
1537                values are to be multiplied by the factor before backprop
1538                starts. The factor is propagated to whole gradients in a
1539                computational graph along the backprop. The gradients of
1540                parameters are divided by the factor just before the parameters
1541                are to be updated.
1542        """
1543        if self._has_chainerx_array:
1544            if retain_grad:
1545                raise RuntimeError(
1546                    'retain_grad is not supported for ChainerX array.')
1547            arr = self._data[0]
1548            assert isinstance(arr, chainerx.ndarray)
1549            # pybind has issues when converting int -> opt<float>
1550            if loss_scale:
1551                loss_scale = float(loss_scale)
1552            chainerx.backward(
1553                arr, enable_double_backprop=enable_double_backprop,
1554                loss_scale=loss_scale)
1555            return
1556
1557        # Initialize error by 1, if this is a loss variable
1558        if self.array.size == 1 and self.grad_var is None:
1559            if self.array.ndim != 0:
1560                warnings.warn(
1561                    'Treating a variable with only one element as a scalar'
1562                    ' in Variable.backward is deprecated. A scalar variable'
1563                    ' must be a 0-dimensional array. Apply'
1564                    ' chainer.functions.squeeze to obtain a scalar variable.'
1565                    ' If the size of this variable accidentally becomes one,'
1566                    ' set zero to grad.',
1567                    DeprecationWarning)
1568            with chainer.using_device(self.device):
1569                self.grad = self.device.xp.ones_like(self.array)
1570            if loss_scale is not None:
1571                self.grad *= loss_scale
1572
1573        node = self.node
1574        grad_var = self.grad_var
1575        self.grad_var = None
1576
1577        with chainer.using_config('enable_backprop', enable_double_backprop):
1578            # TODO(kataoka): The following line should not pass grad_var = None
1579            # to _backprop_to_all, but it is working because grad_var is
1580            # immediately popped away as None = _backprop_utils._reduce([None])
1581            _backprop._backprop_to_all(
1582                [(node, grad_var)], retain_grad, loss_scale)
1583
1584    def item(self):
1585        """Converts the variable with one element to a Python scalar.
1586
1587        This will incur host-device synchronization.
1588
1589        Returns:
1590            int or float: The element of the array.
1591
1592        """
1593        return self.array.item()
1594
1595    def mean(self, axis=None, *, weights=None, keepdims=False):
1596        """Calculate weighted average of array elements over a given axis.
1597
1598        .. seealso::
1599           :func:`chainer.functions.average` for full documentation,
1600
1601        """
1602        return chainer.functions.average(self, axis, weights, keepdims)
1603
1604    def reshape(self, *shape):
1605        """Returns a variable of a different shape and the same content.
1606
1607        .. seealso::
1608           :func:`chainer.functions.reshape` for full documentation,
1609
1610        """
1611        if len(shape) == 1 and isinstance(shape[0], (tuple, list)):
1612            shape = shape[0]
1613        return chainer.functions.reshape(self, shape)
1614
1615    def transpose(self, *axes):
1616        """Permute the dimensions of an input variable without copy.
1617
1618        .. seealso::
1619           :func:`chainer.functions.transpose` for full documentation.
1620
1621        """
1622        if len(axes) == 0:
1623            axes = None
1624        elif len(axes) == 1 and (isinstance(axes[0], (tuple, list)) or
1625                                 axes[0] is None):
1626            axes = axes[0]
1627        return chainer.functions.transpose(self, axes)
1628
1629    def unchain(self):
1630        """Deletes the reference to the creator of this variable.
1631
1632        This method deletes the reference to the creator from the corresponding
1633        variable node. Unlike :meth:`unchain_backward`, it does not backtrack
1634        the graph.
1635
1636        This method is equivalent to ``self.creator_node = None``.
1637
1638        """
1639        if self._has_chainerx_array:
1640            raise RuntimeError(
1641                'A variable of ChainerX does not provide an unchain method.')
1642        self.creator_node = None
1643
1644    def unchain_backward(self):
1645        """Deletes references between variable nodes and functions backward.
1646
1647        After this method completes, intermediate variable nodes and functions
1648        that are not referenced from anywhere are deallocated by reference
1649        count GC. Also this variable itself deletes the reference to its
1650        creator function from the node, i.e. the node becomes root in the
1651        computation graph. It indicates that backprop after unchaining stops at
1652        this variable. This behavior is useful to implement truncated BPTT.
1653
1654        """
1655        if self._has_chainerx_array:
1656            raise RuntimeError(
1657                'A variable of ChainerX does not provide an unchain_backward '
1658                'method.')
1659        cand_funcs = []
1660        seen_set = set()
1661
1662        def add_cand(cand):
1663            if cand is not None and cand not in seen_set:
1664                cand_funcs.append(cand)
1665                seen_set.add(cand)
1666
1667        add_cand(self.creator_node)
1668
1669        while cand_funcs:
1670            func = cand_funcs.pop()
1671            for var in func.inputs:
1672                add_cand(var.creator_node)
1673            func.unchain()
1674
1675    def retain_data(self):
1676        """Lets the corresponding variable node keep the underlying array."""
1677        if self._has_chainerx_array:
1678            raise RuntimeError(
1679                'A variable of ChainerX does not provide a retain_data '
1680                'method.')
1681        self._node.data = self._data[0]
1682
1683    def _error_nobp_op(self, op):
1684        raise TypeError(
1685            'Variables do not support {} operator. '
1686            'You could use `array` attribute instead.'.format(op))
1687
1688    def __lt__(self, other):
1689        """This operator is not supported in Variables."""
1690        self._error_nobp_op('<')
1691
1692    def __le__(self, other):
1693        """This operator is not supported in Variables."""
1694        self._error_nobp_op('<=')
1695
1696    def __eq__(self, other):
1697        """This operator is not supported in Variables."""
1698        self._error_nobp_op('==')
1699
1700    def __ne__(self, other):
1701        """This operator is not supported in Variables."""
1702        self._error_nobp_op('!=')
1703
1704    def __gt__(self, other):
1705        """This operator is not supported in Variables."""
1706        self._error_nobp_op('>')
1707
1708    def __ge__(self, other):
1709        """This operator is not supported in Variables."""
1710        self._error_nobp_op('>=')
1711
1712    def __nonzero__(self):
1713        """This operator is not supported in Variables."""
1714        # Python 2.x
1715        raise TypeError(
1716            'Variables cannot be evaluated as Python bool.')
1717
1718    def __bool__(self):
1719        """This operator is not supported in Variables."""
1720        # Python 3.x
1721        raise TypeError(
1722            'Variables cannot be evaluated as Python bool.')
1723
1724    __array_priority__ = 200  # type: int
1725    __hash__ = None  # type: tp.Callable[[object], int]
1726
1727
1728class Parameter(Variable):
1729
1730    """Parameter variable that can be registered to a link.
1731
1732    Parameter is a subclass of :class:`Variable`. It almost behaves as same
1733    as a usual variable except that a parameter can be registered to a
1734    :class:`~chainer.Link` object just by assigning it to an attribute of
1735    the link within an :meth:`~chainer.Link.init_scope` context.
1736
1737    Parameter also supports an initialization by an initializer. It can have
1738    two initializers: one for the data array, and the other for the gradient
1739    array. The initializer only specifies the way of filling the elements of
1740    these arrays, and the shape information is specified at the initialization
1741    point.
1742
1743    When a link that the parameter has been registered to is passed to an
1744    :class:`~chainer.GradientMethod`, an update rule is set to the parameter.
1745    This update rule specifies how to update the data array of the parameter
1746    using its gradient array.
1747
1748    Args:
1749        initializer (~chainer.Initializer or :ref:`ndarray`):
1750            Initializer of the data array. If ``shape`` is given, this
1751            initializer is immediately used to initialize the data array.
1752            Otherwise, if it is an array, it is immediately used as the data
1753            array, and otherwise the data array is left uninitialized and will
1754            be initialized by this initializer in :meth:`initialize`. It can
1755            also be a scalar, in which case the data array will be filled by
1756            this scalar. Note that float32 is used in this case.
1757        shape (int or tuple of int or None): Shape of the parameter. If it is
1758            ``None``, the initialization is deferred to the call of
1759            :meth:`initialize`.
1760        name (str): Name of the parameter.
1761
1762    Attributes:
1763        initializer: Initializer of the data array. It is used for
1764            initializing the data array of an uninitialized variable.
1765        update_rule: :class:`~chainer.optimizer.UpdateRule` instance that
1766            updates this variable as a parameter. This argument is set to
1767            :attr:`update_rule`.
1768
1769    """
1770
1771    initializer = None  # type: tp.Optional[tp.Union[tp.Optional[types.AbstractInitializer], types.NdArray]] # NOQA
1772    # TODO(okapies): fix the behavior when shape is None and remove NdArray
1773    _grad_initializer = None  # type: tp.Optional[types.AbstractInitializer]
1774
1775    def __init__(
1776            self,
1777            initializer: tp.Optional[types.InitializerSpec] = None,
1778            shape: tp.Optional[types.ShapeSpec] = None,
1779            name: tp.Optional[str] = None,
1780            *,
1781            layout=None
1782    ) -> None:
1783        if initializer is None:
1784            initializer = constant.NaN()
1785        elif numpy.isscalar(initializer):
1786            initializer = constant.Constant(initializer)
1787        if shape is None:
1788            if isinstance(initializer, chainer.get_array_types()):
1789                # parameter initialized by the initial array
1790                super(Parameter, self).__init__(
1791                    initializer, name=name, layout=layout)
1792            else:
1793                # uninitialized parameter
1794                super(Parameter, self).__init__(
1795                    name=name, _grad_valid=False, layout=layout)
1796                dtype = getattr(initializer, 'dtype', None)
1797                self._grad_initializer = constant.NaN(dtype)
1798        else:
1799            # parameter initialized with a given shape
1800            if isinstance(initializer, chainer.get_array_types()):
1801                xp = backend.get_array_module(initializer)
1802                initializer = constant.Constant(initializer)
1803            else:
1804                xp = numpy
1805            data = initializers.generate_array(initializer, shape, xp)  # type: ignore # NOQA
1806            grad = xp.full_like(data, numpy.nan)
1807            super(Parameter, self).__init__(
1808                data, name=name, grad=grad, layout=layout)
1809
1810        self._initial_device = backend.CpuDevice()
1811        self.update_rule = None
1812        self.initializer = initializer
1813
1814    def __copy__(self):
1815        return self._copy_to(Parameter())
1816
1817    def __reduce__(self):
1818        args = (
1819            self.array, self.name, self._grad, self._grad_valid,
1820            self.initializer, self.update_rule, self.device)
1821        return _recover_parameter, args
1822
1823    @property
1824    def is_initialized(self):
1825        return self._data[0] is not None
1826
1827    @property
1828    def dtype(self):
1829        array = self._data[0]
1830        if array is not None:
1831            return array.dtype
1832        # uninitialized
1833        initializer = self.initializer
1834        if hasattr(initializer, 'dtype'):
1835            return numpy.dtype(initializer.dtype)
1836        raise RuntimeError(
1837            'Dtype of the parameter is not determined yet because it\'s '
1838            'uninitialized and dtype was not explicitly given.')
1839
1840    def to_cpu(self):
1841        return self.to_device(backend.CpuDevice())
1842
1843    def to_gpu(self, device=None):
1844        device = chainer.get_device(cuda._get_device_or_current(device))
1845        assert device.xp is cuda.cupy
1846        self.to_device(device)
1847
1848    def to_intel64(self):
1849        self.to_device(intel64.Intel64Device())
1850
1851    def to_chx(self):
1852        if not chainerx.is_available():
1853            raise RuntimeError('ChainerX is not available.')
1854
1855        # Derive the target ChainerX device from the array if it is
1856        # initialized. Otherwise, from the current initial device.
1857        if self.array is not None:
1858            device = backend.get_device_from_array(self.array)
1859        else:
1860            device = self._initial_device
1861
1862        if device.xp is numpy:
1863            self._initial_device = backend.ChainerxDevice(
1864                chainerx.get_device('native:0'))
1865        elif device.xp is cuda.cupy:
1866            self._initial_device = backend.ChainerxDevice(
1867                chainerx.get_device('cuda:{}'.format(device.device.id)))
1868
1869        super(Parameter, self)._to_chx(allow_unchaining=True)
1870
1871    def from_chx(self):
1872        if self.array is not None:
1873            device = backend.get_device_from_array(self.array)
1874        else:
1875            device = self._initial_device
1876
1877        if device.xp is chainerx:
1878            backend_name = device.device.backend.name
1879            if backend_name == 'native':
1880                self._initial_device = backend.CpuDevice()
1881            elif backend_name == 'cuda':
1882                self._initial_device = backend.GpuDevice.from_device_id(
1883                    device.device.index)
1884
1885        super(Parameter, self)._from_chx(allow_unchaining=True)
1886
1887    def to_device(self, device):
1888        device = chainer.get_device(device)
1889        if self._data[0] is None and self._initial_device != device:
1890            self._data = [None]  # Renew placeholder to break sharing
1891            self._has_chainerx_array = False
1892        self._initial_device = device
1893        super(Parameter, self)._to_device(device, allow_unchaining=True)
1894
1895    def cleargrad(self):
1896        super(Parameter, self).cleargrad()
1897        if not self.is_initialized:
1898            self._grad_initializer = None
1899
1900    def zerograd(self):
1901        super(Parameter, self).zerograd()
1902        if not self.is_initialized:
1903            dtype = getattr(self.initializer, 'dtype', None)
1904            self._grad_initializer = initializers.Zero(dtype)
1905
1906    def initialize(self, shape):
1907        """Initializes the uninitialized variable.
1908
1909        Uninitialized variable is a variable created with the data array set to
1910        None. This method creates and initializes the data array. The shape of
1911        the variable can be left unknown until this method is called.
1912
1913        Args:
1914            shape (tuple of int): Shape of the data array.
1915
1916        """
1917        device = self._initial_device
1918        assert device is not None
1919        xp = device.xp
1920
1921        data = initializers.generate_array(
1922            self.initializer, shape, xp, device=device)
1923        data = chainer.memory_layouts._transpose_array(data, None, self.layout)
1924
1925        if self._grad_initializer is None:
1926            grad = None
1927        else:
1928            grad = initializers.generate_array(
1929                self._grad_initializer, shape, xp, device=device)
1930            grad = chainer.memory_layouts._transpose_array(
1931                grad, None, self.layout)
1932
1933        self._set_array(data, layout_check=False)
1934        self._set_grad(grad, layout_check=False)
1935
1936        # Convert the array for iDeep.
1937        # TODO(niboshi): This could be done in generate_array().
1938        if isinstance(self._initial_device, intel64.Intel64Device):
1939            self.to_intel64()
1940
1941    def update(self):
1942        """Updates the data array using the gradient and the update rule.
1943
1944        This method updates the parameter using the attached update rule.
1945
1946        """
1947        if self.update_rule is not None:
1948            if not self.update_rule.is_elementwise:
1949                if self.layout is not None:
1950                    raise RuntimeError(
1951                        'Parameter with a non-standard layout cannot be '
1952                        'updated with a non-elementwise update rule '
1953                        '({}).'.format(self.update_rule))
1954            self.update_rule.update(self)
1955
1956
1957def as_variable(obj):
1958    """Converts an array or a variable into :class:`~chainer.Variable`.
1959
1960    This is a convenient function to get a :class:`~chainer.Variable` object
1961    transparently from a raw array or a variable.
1962
1963    Note that this function should only be used for type consistency (i.e., to
1964    enforce the return value of an API having type :class:`~chainer.Variable`).
1965    The :class:`~chainer.Variable.requires_grad` flag is kept as is; if ``obj``
1966    is a raw array, the newly created variable has ``requires_grad = False``.
1967    In order to make a variable w.r.t. which you want to compute the gradient,
1968    you should use :class:`~chainer.Variable` directly.
1969
1970    Args:
1971        obj (:ref:`ndarray` or ~chainer.Variable): An array or
1972            a variable that you want to convert to :class:`~chainer.Variable`.
1973
1974    Returns:
1975        ~chainer.Variable:
1976        A variable converted from ``obj``. If ``obj`` is a raw array, this is a
1977        new :class:`~chainer.Variable` object that wraps the array. If ``obj``
1978        is already a :class:`~chainer.Variable` object, this function returns
1979        ``obj`` as is.
1980
1981    """
1982    if isinstance(obj, Variable):
1983        return obj
1984
1985    if isinstance(obj, chainerx.ndarray):
1986        requires_grad = obj.is_backprop_required()
1987    else:
1988        requires_grad = False
1989    return Variable(obj, requires_grad=requires_grad)
1990
1991
1992def as_array(obj):
1993    """Returns the underlying array from a variable or an array.
1994
1995    This is a convenient function to get the underlying array object
1996    transparently from an object that could be either a variable or an array.
1997
1998    Args:
1999        obj (:ref:`ndarray` or ~chainer.Variable): An array or a variable.
2000
2001    Returns:
2002        :ref:`ndarray` or ~chainer.Variable:
2003        The underlying array object of the argument.
2004
2005    """
2006    if isinstance(obj, Variable):
2007        return obj.array
2008    return obj
2009
2010
2011def _recover_parameter(*args):
2012    if len(args) == 7:
2013        # latest
2014        data, name, grad, grad_valid, initializer, update_rule, device = args
2015    elif len(args) == 6:
2016        data, name, grad, initializer, update_rule, device = args
2017        grad_valid = True
2018    else:
2019        assert False, len(args)
2020
2021    p = Parameter(initializer=initializer, name=name)
2022    p.array = data
2023    p._grad = grad
2024    p._grad_valid = grad_valid
2025    p.update_rule = update_rule
2026    p.to_device(device)
2027    return p
2028
2029
2030class _ChainerxVariableNodeProps(object):
2031
2032    def __init__(self, x):
2033        self.shape = x.shape
2034        self.dtype = x.dtype
2035
2036
2037class _AllowArrayAccessWithNonstandardLayout:
2038    """Context manager within which access to Variable.array is allowed for \
2039variables with a non-standard layout."""
2040
2041    def __enter__(self):
2042        self._old = _allow_array_access_with_nonstandard_layout()
2043        _thread_local.allow_array_access_with_nonstandard_layout = True
2044
2045    def __exit__(self, typ, value, traceback):
2046        _thread_local.allow_array_access_with_nonstandard_layout = self._old
2047
2048
2049def _allow_array_access_with_nonstandard_layout():
2050    # Returns wether a thread-local variable
2051    # `allow_array_access_with_nonstandard_layout` is set to True.
2052    try:
2053        return _thread_local.allow_array_access_with_nonstandard_layout
2054    except AttributeError:
2055        return False
2056