1from __future__ import absolute_import 2import copy 3import threading 4import traceback 5import typing as tp # NOQA 6import warnings 7import weakref 8 9import numpy 10 11import chainer 12from chainer import _backprop 13from chainer import backend 14from chainer.backends import _cpu 15from chainer.backends import cuda 16from chainer.backends import intel64 17from chainer import initializers 18from chainer.initializers import constant 19from chainer import types # NOQA 20import chainer.utils._collections 21from chainer.utils import argument 22import chainerx 23 24 25_thread_local = threading.local() 26 27 28def _raise_grad_error(exc_type, func, msg): 29 detail = '' 30 if func: 31 detail = 'Function `{0}` ({1}) has a bug.\n'.format( 32 type(func)._impl_name, func.label) 33 stack = func.stack 34 if stack: 35 detail += 'Stacktrace of the function is below:\n' 36 for line in traceback.format_list(func.stack): 37 detail += line 38 detail += ''' 39Please report this error to the issue tracker with the stack trace, 40the information of your environment, and your script: 41https://github.com/chainer/chainer/issues/new. 42''' 43 44 raise exc_type(detail + msg) 45 46 47def _check_grad_type(func, x, is_node_x, gx): 48 # is_node_x: equivalent to isinstance(x, VariableNode) 49 50 assert gx is not None 51 52 # x_shape is the raw shape 53 54 # TODO(kataoka): avoid `isinstance` 55 if isinstance(x, _ChainerxVariableNodeProps): 56 x_data = None 57 x_layout = None 58 x_shape = x.shape 59 elif is_node_x: 60 x_data = x._data 61 x_layout = x._layout 62 x_shape = x.shape 63 if x_layout is not None: 64 # to raw shape 65 x_shape = chainer.memory_layouts._transpose_shape( 66 x_shape, None, x_layout) 67 else: 68 # assert isinstance(x, Variable) 69 x_data = x._data[0] 70 x_layout = x._layout 71 x_shape = None if x_data is None else x_data.shape 72 73 # TODO(kataoka): Make _update_data_info store the array module. 74 # ``is_node_x and x_data is None`` implies that the data array is not 75 # retained. 76 # ``not is_node_x and x_data is None`` implies that grad of uninitialized 77 # variable is checked here. 78 79 if x_data is None and not is_node_x: 80 # TODO(kataoka): This should be an error. 81 return 82 if x_layout is None: 83 if x.dtype is None or x.shape is None: 84 # unretained Variable(None) 85 # TODO(kataoka): This should be an error. 86 return 87 88 if not isinstance(gx, chainer.get_array_types()): 89 _raise_grad_error( 90 TypeError, 91 func, 92 ('Type of grad is invalid:\n' 93 + 'Expected: Any of {}\n'.format(chainer.get_array_types()) 94 + 'Actual: {}'.format(type(gx)))) 95 elif x_data is not None and not chainer.is_arrays_compatible((gx, x_data)): 96 _raise_grad_error( 97 TypeError, 98 func, 99 ('Type of data and grad mismatch\ngrad: %s != data: %s' % 100 (type(gx), type(x_data)))) 101 elif gx.dtype != x.dtype: 102 _raise_grad_error( 103 TypeError, 104 func, 105 ('Dtype of data and grad mismatch\ngrad: %s != data: %s' % 106 (gx.dtype, x.dtype))) 107 elif gx.shape != x_shape: # comparing semantic shapes (not semantic) 108 _raise_grad_error( 109 ValueError, 110 func, 111 ('Shape of data and grad mismatch\ngrad: %s != data: %s' % 112 (gx.shape, x_shape))) 113 114 115def variable_repr(var): 116 """Return the string representation of a variable. 117 118 Args: 119 var (~chainer.Variable): Input Variable. 120 .. seealso:: numpy.array_repr 121 """ 122 arr = _cpu._to_cpu(var.array) 123 124 if var.name: 125 prefix = 'variable ' + var.name 126 else: 127 prefix = 'variable' 128 129 if arr is None: 130 lst = 'None' 131 elif arr.size > 0 or arr.shape == (0,): 132 lst = numpy.array2string(arr, None, None, None, ', ', prefix + '(') 133 else: # show zero-length shape unless it is (0,) 134 lst = '[], shape=%s' % (repr(arr.shape),) 135 136 return '%s(%s)' % (prefix, lst) 137 138 139def variable_str(var): 140 """Return the string representation of a variable. 141 142 Args: 143 var (~chainer.Variable): Input Variable. 144 .. seealso:: numpy.array_str 145 """ 146 arr = _cpu._to_cpu(var.array) 147 148 if var.name: 149 prefix = 'variable ' + var.name 150 else: 151 prefix = 'variable' 152 153 if arr is None: 154 lst = 'None' 155 else: 156 lst = numpy.array2string(arr, None, None, None, ' ', prefix + '(') 157 158 return '%s(%s)' % (prefix, lst) 159 160 161class VariableNode(object): 162 163 """Node in the backward computational graph representing a variable. 164 165 This object represents a variable node in a computational graph. The node 166 is used in error backpropagation (a.k.a. backprop) to determine which 167 gradient to be passed to each function. 168 169 A variable node is held by the corresponding :class:`~chainer.Variable` 170 object, which is managed by users. :class:`~chainer.FunctionNode` objects 171 that take the variable as an input also hold references to the variable 172 node. 173 174 Note that the node does not hold a reference to the corresponding data 175 array in general. The data array is actually accessible by the node in the 176 following cases. 177 178 1. If there exists a :class:`~chainer.Variable` object that holds a 179 reference to the variable node, the variable node holds a weak reference 180 to the variable object, and thus the data array is accessible via the 181 weak reference. 182 2. If :meth:`retain_data` is called, the node holds a reference to the data 183 array. It is mainly called by a function that needs the input or output 184 data array in its backprop procedure. 185 See :meth:`FunctionNode.retain_inputs() 186 <chainer.FunctionNode.retain_inputs>` 187 and :meth:`FunctionNode.retain_outputs() 188 <chainer.FunctionNode.retain_outputs>` for more details. 189 190 Users usually do not need to touch this variable node object. The 191 computational graph is automatically managed by Chainer, and any interface 192 that is beneficial for users is also provided by 193 :class:`~chainer.Variable`. 194 195 Args: 196 variable (~chainer.Variable): The corresponding variable object. 197 name (str): Name of the variable node. 198 199 Attributes: 200 dtype: Data type of the data array. 201 shape: Shape of the data array. 202 name (str): Name of the variable node. 203 204 """ 205 206 dtype = None 207 shape = None # semantic shape 208 209 _creator_node = None 210 _data = None # type: types.NdArray 211 _rank = 0 # type: int 212 # Name of the Function is assigned if this variable is a gradient generated 213 # by an old-style Function 214 _old_style_grad_generator = None # type: str 215 _layout = None 216 217 def __init__( 218 self, 219 variable: 'Variable', 220 name: tp.Optional[str], 221 **kwargs: tp.Any 222 ) -> None: 223 if kwargs: 224 argument.check_unexpected_kwargs( 225 kwargs, 226 grad='unexpected keyword argument "grad": ' 227 'pass the gradient to Variable instead' 228 ) 229 self._variable = weakref.ref(variable) 230 self.name = name 231 self._requires_grad = variable.requires_grad 232 self._layout = variable.layout 233 234 vdata = variable.raw_array 235 self._update_data_info(vdata) 236 237 @property 238 def creator(self): 239 """Function object that created this variable node. 240 241 When the function is implemented with the old-style API (i.e., it uses 242 :class:`~chainer.Function` class), 243 this property returns the :class:`~chainer.Function` object. 244 The object is extracted from the :class:`~chainer.FunctionAdapter` 245 object, so the returned object is not the function node, but instead 246 the actual implementation of forward and backward procedures. 247 248 When the function is implemented with the new-style API (i.e., it uses 249 :class:`~chainer.FunctionNode` class), 250 this property returns the function node 251 object. In this case, the returned object is same as 252 :attr:`creator_node`. 253 254 .. warning:: 255 256 As of v3.0.0, when the creator is an old-style function, the 257 following code is invalid: 258 259 .. code-block:: python 260 261 creator = v.creator 262 v.creator = None 263 ... 264 v.creator = creator 265 266 The point is that :class:`~chainer.FunctionNode` objects are used 267 as nodes in the computational graph instead of 268 :class:`~chainer.Function`, and each :class:`~chainer.Function` 269 object only holds a *weak reference* to the corresponding 270 :class:`~chainer.FunctionNode`. 271 Since ``creator`` returns the :class:`~chainer.Function` object, 272 the :class:`~chainer.FunctionNode` object is not kept by preserving 273 ``creator``. 274 275 The above code should be fixed as follows. 276 277 .. code-block:: python 278 279 creator_node = v.creator_node 280 v.creator_node = None 281 ... 282 v.creator_node = creator_node 283 284 """ 285 node = self._creator_node 286 if node is None: 287 return None 288 289 if isinstance(node, chainer.function.FunctionAdapter): 290 return node.function 291 return node 292 293 @creator.setter 294 def creator(self, func): 295 self.creator_node = func 296 297 @property 298 def creator_node(self): 299 """Function node that has this variable as an output. 300 301 See :class:`~chainer.FunctionNode` for the definition of a function 302 node. 303 304 """ 305 return self._creator_node 306 307 @creator_node.setter 308 def creator_node(self, func): 309 if isinstance(func, chainer.Function): 310 func = func.node 311 self._creator_node = func 312 if func is not None: 313 self._rank = func.rank + 1 314 315 @property 316 def data(self): 317 """Data array of the corresponding variable. 318 319 If the data is not available, it returns ``None``. 320 321 """ 322 return self._data 323 324 @data.setter 325 def data(self, d): 326 self._data = d 327 self._update_data_info(d) 328 329 @property 330 def grad(self): 331 """Gradient array of the corresponding variable. 332 333 If the variable is not available, it returns ``None``. 334 335 """ 336 var = self._variable() 337 return None if var is None else var.grad 338 339 @property 340 def grad_var(self): 341 """Gradient variable of the corresponding variable. 342 343 If the corresponding variable is not available, it return ``None``. 344 345 """ 346 var = self._variable() 347 return None if var is None else var.grad_var 348 349 def _set_grad_var_if_available(self, g): 350 var = self._variable() 351 if var is not None: 352 var._set_grad_var_without_check(g) 353 354 @property 355 def label(self): 356 """Short text that represents the variable node.""" 357 if self.shape == (): 358 return str(self.dtype) 359 return '(%s), %s' % (', '.join(map(str, self.shape)), 360 str(self.dtype)) 361 362 @property 363 def rank(self): 364 return self._rank 365 366 @property 367 def requires_grad(self): 368 """It indicates that ``grad`` will be set in backward calculation.""" 369 return self._requires_grad 370 371 def get_variable(self): 372 """Returns the corresponding :class:`~chainer.Variable` object. 373 374 VariableNode object holds a weak reference of the variable object. If 375 the reference is alive, it is returned by this property. Otherwise, 376 this property creates a new :class:`~chainer.Variable` object from 377 this node object and returns it. 378 379 Returns: 380 ~chainer.Variable: The variable object that refers this node. 381 382 """ 383 var = self._variable() 384 if var is not None: 385 return var 386 var = Variable._init_unchecked( 387 self.data, 388 name=self.name, 389 requires_grad=self.requires_grad, 390 node=self, 391 layout=self._layout) 392 return var 393 394 def get_variable_or_none(self): 395 """Returns the holding :class:`~chainer.Variable` object or ``None``. 396 397 VariableNode object holds a weak reference of the variable object.If 398 the reference is alive, it is returned by this property. Otherwise, 399 returns ``None``. 400 401 Returns: 402 ~chainer.Variable: The variable object that refers this node. 403 404 """ 405 return self._variable() 406 407 def set_creator(self, creator): 408 """Sets a :class:`~chainer.Function` object that created this node. 409 410 This method is equivalent to ``self.creator = creator``. A 411 :class:`~chainer.FunctionNode` object can also be passed. 412 413 Args: 414 creator (Function or FunctionNode): Function that has created this 415 variable. 416 417 """ 418 self.creator = creator 419 420 def set_creator_node(self, creator_node): 421 """Sets a :class:`~chainer.FunctionNode` object that created this node. 422 423 This method is equivalent to ``self.creator_node = creator_node``. A 424 :class:`~chainer.Function` object can also be passed, in which case the 425 :attr:`Function.node <chainer.Function.node>` attribute is used. 426 427 Args: 428 creator_node (FunctionNode or Function): Function node that has 429 this variable as an output. 430 431 """ 432 self.creator_node = creator_node 433 434 def unchain(self): 435 """Deletes the reference to the creator of this variable node. 436 437 This method is equivalent to ``self.creator_node = None``. 438 439 """ 440 self.creator_node = None 441 442 def retain_data(self): 443 """Lets the node hold a reference to the underlying data array. 444 445 This method gets the data array of the corresponding variable and keeps 446 it. If the weak reference to the corresponding variable is dead, it 447 raises an error. 448 449 """ 450 variable = self._variable() 451 if variable is not None: 452 self.data = variable.data 453 else: 454 raise RuntimeError('cannot retain variable data: the variable has ' 455 'been already released') 456 457 def _update_data_info(self, d): 458 # d is a raw array (with raw shape) 459 if d is None: 460 self.dtype = None 461 self.shape = None 462 else: 463 self.dtype = d.dtype 464 465 if self._layout is None: 466 self.shape = d.shape 467 else: 468 self.shape = chainer.memory_layouts._transpose_shape( 469 d.shape, self._layout, None) 470 471 # If the node has a reference to data, update it as well. 472 if self._data is not None: 473 self._data = d 474 475 def _check_old_style_gradient(self): 476 if self._old_style_grad_generator is not None: 477 raise RuntimeError( 478 'cannot twice-differentiate an old style Function "%s"' % 479 self._old_style_grad_generator) 480 481 482def _create_variable(data, name, grad, requires_grad, device): 483 var = Variable( 484 data, name=name, grad=grad, requires_grad=requires_grad) 485 var.to_device(device) 486 return var 487 488 489class Variable(object): 490 491 """__init__(data=None, *, name=None, grad=None, requires_grad=True) 492 493 Array with a structure to keep track of computation. 494 495 Every variable holds a data array of type either :class:`numpy.ndarray` or 496 :class:`cupy.ndarray`. 497 498 A variable object holds a data array and a 499 :class:`~chainer.variable.VariableNode` object of 500 a computational graph. If the variable is constructed by the user, the node 501 is *root* and does not hold any parent. If the variable is constructed by a 502 :class:`~chainer.FunctionNode` object (i.e., by calling functions under 503 ``chainer.functions`` or user-defined functions), or by using operators 504 (see the list below), the node holds a reference to its parent called 505 :attr:`creator_node`. 506 This reference is used in backpropagation to backtrack the graph. 507 508 Users can disable (resp. enable) this chaining behavior by calling 509 :func:`~chainer.no_backprop_mode` (resp. 510 :func:`~chainer.force_backprop_mode`). 511 In the former context, a variable never creates a computational graph, 512 whereas in the latter context, it is forced to create. 513 514 .. note:: 515 516 The following operators are defined for variable(s). 517 518 * Indexing: ``a[slices]`` (:meth:`__getitem__`) 519 * Addition: ``a + b`` (:meth:`__add__`, :meth:`__radd__`) 520 * Subtraction: ``a - b`` (:meth:`__sub__`, :meth:`__rsub__`) 521 * Multiplication: ``a * b`` (:meth:`__mul__`, :meth:`__rmul__`) 522 * Division: ``a / b`` (:meth:`__div__`, :meth:`__rdiv__`, \ 523 :meth:`__truediv__`, :meth:`__rtruediv__`) 524 * Floor Division: ``a // b`` (:meth:`__floordiv__`, \ 525 :meth:`__rfloordiv__`) 526 * Exponentiation: ``a ** b`` (:meth:`__pow__`, :meth:`__rpow__`) 527 * Matrix Multiplication: ``a @ b`` (:meth:`__matmul__`, \ 528 :meth:`__rmatmul__`) 529 * Negation (Arithmetic): ``- a`` (:meth:`__neg__`) 530 * Absolute value: ``abs(a)`` (:meth:`__abs__`) 531 532 Args: 533 data (:ref:`ndarray`): Initial data array. 534 name (str): Name of the variable. 535 grad (:ref:`ndarray`): Initial gradient array. 536 requires_grad (bool): Boolean indicating whether ``grad`` will be set 537 in backward calculation. 538 539 """ 540 541 # Cached value of `self.xp is chainerx`. It prevents from initializing 542 # self._device as much as possible because it is really costly. 543 _has_chainerx_array = False 544 545 # Cached grad-stopped view of chainerx array. This is the return value 546 # of `array` and `data` properties. 547 _chainerx_nobp_array_cache = None 548 549 # Cached grad-stopped view of the array returned by `grad` property. 550 # It's a 2-element tuple, where the first is the original grad array and 551 # the second is a grad-stopped view of the first. `grad` property returns 552 # the second element. 553 _chainerx_grad_cache = None 554 555 _chainerx_name = None # type: tp.Optional[str] 556 557 # A NumPy, CuPy array cache to avoid redundant conversions between 558 # NumPy/CuPy and ChainerX. 559 # TODO(hvy): Avoid modifying this variable from outside this class. 560 _chainerx_fallback_array = None 561 562 # Used in non-ChainerX variables. The gradient array is stored in 563 # this attribute on Variable.grad setter to delay creation of grad_var 564 # instance. 565 _grad = None 566 567 _layout = None 568 569 def as_layout(self, layout): 570 src_layout = self._layout 571 if src_layout == layout: 572 return self 573 574 y, = chainer.memory_layouts.AsLayout(layout).apply((self,)) 575 return y 576 577 def __init__( 578 self, 579 data: tp.Optional[types.NdArray] = None, 580 **kwargs: tp.Any 581 ) -> None: 582 name, grad, requires_grad, grad_valid, layout = argument.parse_kwargs( 583 kwargs, ('name', None), ('grad', None), ('requires_grad', True), 584 ('_grad_valid', True), ('layout', None), 585 volatile='volatile argument is not supported anymore. ' 586 'Use chainer.using_config') 587 # _grad_valid is for internal use, hence the prefix _. 588 589 assert isinstance(requires_grad, bool) 590 if data is not None: 591 array_types = chainer.get_array_types() 592 if not isinstance(data, array_types): 593 msg = '{} or {} are expected. Actual: {}'.format( 594 ', '.join([str(at) for at in array_types[:-1]]), 595 array_types[-1], type(data)) 596 raise TypeError(msg) 597 598 self._init_impl( 599 data, None, name, grad, grad_valid, requires_grad, None, None, 600 layout) 601 602 @staticmethod 603 def _init_unchecked( 604 data=None, device=None, name=None, grad=None, grad_valid=True, 605 requires_grad=True, is_chainerx_array=None, node=None, 606 layout=None): 607 """Creates a new :class:`Variable` without the validations for 608 optimizing performance. 609 """ 610 611 # Create a Variable without invoking __init__ 612 var = Variable.__new__(Variable) 613 var._init_impl( 614 data, device, name, grad, grad_valid, requires_grad, 615 is_chainerx_array, node, layout) 616 return var 617 618 def _init_impl(self, data, device, name, grad, grad_valid, requires_grad, 619 is_chainerx_array, node, layout): 620 # `device` must be of type chainer.backend.Device. 621 # Check is skipped for performance. 622 623 self._requires_grad = requires_grad # type: bool 624 self._loss_scale = None 625 self._grad_var = None 626 self._device = device 627 # A flag to prevent grad from being used before calling cleargrad(). 628 # It becomes True when either 629 # - cleargrad() is called, or 630 # - zerograd() is called, or 631 # - grad is set. 632 # Note that it won't be True by merely initializing an uninitialized 633 # Parameter. 634 self._grad_valid = grad_valid 635 self._layout = layout 636 637 if is_chainerx_array is None: 638 is_chainerx_array = isinstance(data, chainerx.ndarray) 639 640 if is_chainerx_array: 641 if not requires_grad and grad is not None: 642 raise ValueError( 643 'Cannot initialize a variable with gradients if the ' 644 'require_grad argument is False.') 645 self._set_chainerx_array(data, grad) # type: ignore 646 647 # ChainerX itself has own node objects, but not exposed to python. 648 self._node = None # type: tp.Optional[VariableNode] 649 self._chainerx_name = name 650 else: 651 # Use a list as a data structure to hold the data array indirectly 652 # to abstract its initialized/uninitialized state. 653 self._data = [data] # type: tp.List[tp.Optional[types.NdArray]] 654 if node is None: 655 self._node = VariableNode(self, name) 656 else: 657 self._node = node 658 self._grad = grad 659 660 def __copy__(self): 661 return self._copy_to(Variable()) 662 663 def _copy_to(self, target): 664 target.__dict__ = copy.copy(self.__dict__) 665 target._node = VariableNode(target, self.name) 666 return target 667 668 def __reduce__(self): 669 args = ( 670 self.array, self.name, self.grad, self._requires_grad, self.device) 671 return _create_variable, args 672 673 def __repr__(self): 674 return variable_repr(self) 675 676 def __str__(self): 677 return variable_str(self) 678 679 def _clear_chainerx(self): 680 self._chainerx_nobp_array_cache = None 681 self._chainerx_grad_cache = None 682 self._chainerx_fallback_array = None 683 684 def _ensure_grad_var_up_to_date(self): 685 # For non-ChainerX, this method creates _grad_var if it's not yet 686 # created and _grad is set. 687 # For ChainerX, this method checks consistency between 688 # _grad_var._data[0] and self._data[0].grad and recreates _grad_var 689 # as necessary. (chainerx.ndarray.grad can be altered independently 690 # from chainer) 691 if self._has_chainerx_array: 692 self._grad = None 693 # Update gradient variable if it has not yet been initialized or 694 # it happens to be dirty w.r.t. the actual gradient of the 695 # underlying chainerx.ndarray. 696 arr = self._data[0] 697 actual_grad = ( 698 arr.grad 699 if arr is not None and arr.is_grad_required() 700 else None) 701 if actual_grad is None: 702 self._grad_var = None 703 else: 704 grad_var = self._grad_var 705 old_grad = None if grad_var is None else grad_var._data[0] 706 if actual_grad is not old_grad: 707 self._grad_var = Variable( 708 actual_grad, 709 requires_grad=actual_grad.is_backprop_required(), 710 layout=self._layout) 711 return 712 713 if self._grad_var is None: 714 if self._grad is not None: 715 self._grad_var = Variable(self._grad, layout=self._layout) 716 717 def _set_chainerx_array( 718 self, 719 array: tp.Optional['chainerx.ndarray'], 720 grad: tp.Optional['chainerx.ndarray'] 721 ) -> None: 722 723 # Sets chainerx array and grad. 724 assert array is None or isinstance(array, chainerx.ndarray) 725 requires_grad = self._requires_grad 726 727 self._grad = None 728 729 if (not requires_grad 730 and array is not None 731 and array.is_backprop_required()): 732 raise ValueError( 733 'Cannot initialize a variable to not require ' 734 'gradients if the ChainerX array already requires ' 735 'backprop.') 736 737 # Create a view of the given data to hold internally and modify. 738 if array is None: 739 self._data = [None] 740 else: 741 # If the array `array` is not connected to a graph, a view of it is 742 # created and kept, in order not to change the no-graph status of 743 # it. If the array is connected, the graph status is kept track of. 744 if not array.is_backprop_required(): 745 array = array.view() 746 if requires_grad: 747 array.require_grad() 748 if grad is not None: 749 array.set_grad(grad) 750 self._data = [array] 751 752 self._has_chainerx_array = True # even if data is None 753 self._chainerx_nobp_array_cache = None 754 self._chainerx_grad_cache = None 755 self._chainerx_fallback_array = None 756 757 @property 758 def device(self): 759 """Device on which the data array of this variable reside.""" 760 # lazy initialization for performance 761 if self._device is None: 762 if self._data[0] is None: 763 self._device = backend.CpuDevice() 764 else: 765 self._device = backend.get_device_from_array(self._data[0]) 766 return self._device 767 768 @property 769 def xp(self) -> tp.Optional[types.Xp]: 770 """Array module for the data array of this variable.""" 771 if self._has_chainerx_array: 772 return chainerx 773 else: 774 device = self.device 775 return None if device is None else device.xp 776 777 @property 778 def name(self): 779 if self._has_chainerx_array: 780 return self._chainerx_name 781 return self._node.name 782 783 @name.setter 784 def name(self, n): 785 if self._has_chainerx_array: 786 self._chainerx_name = n 787 return 788 self._node.name = n 789 790 def summary(self): 791 if self.name: 792 return '<variable %s>' % self.name 793 else: 794 return '<variable at 0x%x>' % id(self) 795 796 def debug_print(self): 797 """Display a summary of the stored data and location of the Variable""" 798 799 msg = """{summary} 800- device: {device} 801- backend: {backend} 802- shape: {shape} 803- dtype: {dtype} 804- statistics: {stats} 805- grad: {grad}""" 806 807 stats_msg = 'mean={0:.8f}, std={1:.8f}' 808 809 array = self.array 810 device = self.device 811 with chainer.using_device(device): 812 xp = device.xp 813 814 if array is None: 815 # `array` can be `None` if constructed without any arguments 816 device = None 817 backend = None 818 stats = None 819 else: 820 device = getattr(array, 'device', 'CPU') 821 backend = type(array) 822 stats = stats_msg.format(float(xp.mean(array)), 823 float(xp.std(array))) 824 shape = getattr(array, 'shape', None) 825 dtype = getattr(array, 'dtype', None) 826 827 if self.grad is None: 828 grad = None 829 elif xp.all(self.grad == 0): 830 grad = 0 831 else: 832 grad = stats_msg.format(float(xp.mean(self.grad)), 833 float(xp.std(self.grad))) 834 835 return msg.format(summary=self.summary(), device=device, 836 backend=backend, shape=shape, dtype=dtype, 837 stats=stats, grad=grad) 838 839 def __pos__(self): 840 return self 841 842 def __len__(self): 843 """Returns the first dimension of the data array. 844 845 Returns: 846 int: Number of the first dimension of the data array. 847 848 """ 849 return len(self.array) 850 851 @property 852 def label(self): 853 """Short text that represents the variable.""" 854 if self._has_chainerx_array: 855 raise RuntimeError( 856 'A variable of ChainerX does not provide a node label.') 857 return self._node.label 858 859 @property 860 def creator(self): 861 """Function implementation that created this variable. 862 863 When this variable has been created by an old-style function (i.e., it 864 is implemented as a subclass of :class:`Function`), this property 865 returns that :class:`Function` object. 866 867 When this variable has been created by a new-style function (i.e., it 868 is implemented as a subclass of :class:`FunctionNode` class), this 869 property returns that node object. 870 871 """ 872 if self._has_chainerx_array: 873 raise RuntimeError( 874 'A variable of ChainerX does not provide a creator.') 875 return self._node.creator 876 877 @creator.setter 878 def creator(self, func): 879 if self._has_chainerx_array: 880 raise RuntimeError( 881 'A variable of ChainerX does not provide a creator.') 882 self._node.creator = func 883 884 @property 885 def creator_node(self): 886 """:class:`FunctionNode` object that created this variable. 887 888 This property has a setter to which ``None`` can be set. Setting 889 ``None`` to this property is equivalent to call :meth:`unchain`; 890 it purges the variable from the function that created this variable. 891 892 The setter also accepts the original :class:`FunctionNode` object that 893 created this variable. For example, you can once set ``None`` to this 894 property and then set the original value again. 895 896 .. note:: 897 Setting an irrelevant :meth:`FunctionNode` object does not emit any 898 error immediately, whereas the behavior is undefined. Do not set 899 a :meth:`FunctionNode` object that did not create this variable 900 object. 901 902 """ 903 if self._has_chainerx_array: 904 raise RuntimeError( 905 'A variable of ChainerX does not provide a creator_node.') 906 return self._node._creator_node 907 908 @creator_node.setter 909 def creator_node(self, func): 910 if self._has_chainerx_array: 911 raise RuntimeError( 912 'A variable of ChainerX does not provide a creator_node.') 913 self._node.creator_node = func 914 915 @property 916 def array(self) -> tp.Optional[types.NdArray]: 917 """The underlying data array. 918 919 It is either :class:`numpy.ndarray` or :class:`cupy.ndarray` object, 920 or ``None`` if the variable in in an uninitialized state. 921 922 """ 923 return self._get_array() 924 925 def _get_array(self): 926 if (self._layout is not None 927 and not ( 928 _allow_array_access_with_nonstandard_layout())): 929 raise RuntimeError( 930 'Cannot directly retrieve the underlying array from a ' 931 'variable with non-standard layout.') 932 return self.raw_array 933 934 @property 935 def raw_array(self): 936 """The underlying raw data array. 937 938 Its shape does not have to be the semantic shape, if the memory layout 939 is non-standard. 940 """ 941 # For ChainerX, this property always returns a grad-stopped view. 942 # The view is cached to reduce potential overhead. 943 if self._has_chainerx_array: 944 if (self._chainerx_nobp_array_cache is None 945 and self._data[0] is not None): 946 self._chainerx_nobp_array_cache = ( 947 self._data[0].as_grad_stopped()) # type: ignore 948 return self._chainerx_nobp_array_cache 949 950 return self._data[0] 951 952 @array.setter 953 def array(self, d: tp.Optional[types.NdArray]) -> None: 954 self._set_array(d) 955 956 def _set_array(self, d, *, layout_check=True): 957 if (layout_check 958 and self._layout is not None 959 and not ( 960 _allow_array_access_with_nonstandard_layout())): 961 raise RuntimeError( 962 'Cannot directly set the underlying array of a variable with ' 963 'non-standard layout.') 964 if self._has_chainerx_array: 965 d_old = self._data[0] 966 if (d_old is not None 967 and (d_old.is_backprop_required() # type: ignore 968 or d.is_backprop_required())): # type: ignore 969 raise ValueError( 970 'Cannot update the array of a Variable if either the ' 971 'existing or the new array requires backprop.') 972 973 self._set_chainerx_array(d, None) # type: ignore 974 else: 975 self._node._update_data_info(d) # type: ignore # _node doesn't have value when xp is chainerx # NOQA 976 self._data[0] = d 977 self._has_chainerx_array = False 978 979 @property 980 def chx_array(self): 981 """A view of the raw ChainerX array. 982 983 In contrary to :data:`Variable.array` which is always disconnected, 984 the array represented by this attribute may be connected to the 985 computational graph. 986 987 It is a view, so it has a distinct gradient from the original array. 988 989 If this attribute is queried on a :class:`Variable` with a non-ChainerX 990 array, :class:`ValueError` will be raised. 991 """ 992 if not self._has_chainerx_array: 993 raise ValueError( 994 'chx_array is not available for Variable with ' 995 'non-ChainerX array.') 996 return self._data[0].view() 997 998 @property 999 def data(self) -> tp.Optional[types.NdArray]: 1000 """The underlying data array (equivalent to :attr:`array`). 1001 1002 Note that using this attribute directly is discouraged; use 1003 :attr:`array` instead. Using :attr:`array`, you can find an error 1004 earlier when your code mixes up Variable and ndarray because 1005 ndarray does not have an attribute ``.array`` while it has 1006 ``.data``. 1007 1008 """ 1009 return self.array 1010 1011 @data.setter 1012 def data(self, d: types.NdArray) -> None: 1013 self.array = d 1014 1015 @property 1016 def layout(self): 1017 return self._layout 1018 1019 def _set_chainerx_grad(self, g, from_grad_var): 1020 # Assigns chainerx.ndarray.grad. 1021 # 1022 # If the main array is connected to the graph, in order to enable 1023 # double-backprop, the grad will also be backprop-required 1024 # (a view is created not to affect the given grad). 1025 # If the given grad is from a grad_var, this operation is skipped, 1026 # as the status of the given grad reflects the necessity of 1027 # double-backprop. 1028 assert self.xp is chainerx 1029 if not self._requires_grad and g is not None: 1030 raise RuntimeError( 1031 'Cannot set the gradient of a variable that is flagged to not ' 1032 'require one.') 1033 arr = self._data[0] 1034 if arr is None: 1035 if g is not None: 1036 raise RuntimeError( 1037 'Cannot set a gradient to an empty variable') 1038 elif arr.is_backprop_required(): 1039 # If g is grad-stopped, require grad on it. 1040 # Make a view in order not to affect the input. 1041 if (g is not None 1042 and not from_grad_var 1043 and not g.is_backprop_required()): 1044 g = g.view().require_grad() 1045 arr.set_grad(g) 1046 1047 def _set_grad_without_check(self, g): 1048 if self._has_chainerx_array: 1049 self._set_chainerx_grad(g, False) 1050 self._grad_var = None 1051 self._grad_valid = True 1052 return 1053 1054 self._grad = g 1055 self._grad_var = None 1056 self._grad_valid = True 1057 1058 @property 1059 def grad(self) -> tp.Optional[types.NdArray]: 1060 """Gradient array of this variable. 1061 1062 Note that this property returns the underlying array of the gradient 1063 variable instead of the gradient variable itself; to get/set 1064 gradient variable, use :attr:`grad_var` instead. 1065 1066 If the underlying array is a :class:`chainerx.ndarray` and 1067 requires_grad is false, trying to access the gradient will results in 1068 and error. 1069 1070 """ 1071 return self._get_grad() 1072 1073 def _get_grad(self): 1074 if (self._layout is not None 1075 and not ( 1076 _thread_local.allow_array_access_with_nonstandard_layout)): 1077 raise RuntimeError( 1078 'Cannot directly retrieve the gradient array of a ' 1079 'variable with non-standard layout.') 1080 if not self._grad_valid: 1081 raise RuntimeError( 1082 'Cannot retrieve Variable.grad. ' 1083 'Either it must be set manually or Variable.cleargrad() ' 1084 'must be called beforehand.') 1085 1086 if self._has_chainerx_array: 1087 arr = self._data[0] 1088 if arr is None or not arr.is_backprop_required(): 1089 self._chainerx_grad_cache = None 1090 return None 1091 1092 actual_grad = arr.grad 1093 1094 if actual_grad is None: 1095 self._chainerx_grad_cache = None 1096 return None 1097 1098 # If grad is cached and the actual grad has not changed, return 1099 # the cache. 1100 if self._chainerx_grad_cache is not None: 1101 orig_grad, grad_stopped_grad = self._chainerx_grad_cache 1102 if orig_grad is actual_grad: 1103 return grad_stopped_grad 1104 1105 # Update the cache 1106 grad_stopped_grad = actual_grad.as_grad_stopped() 1107 self._chainerx_grad_cache = (actual_grad, grad_stopped_grad) 1108 1109 return grad_stopped_grad 1110 1111 if self._grad_var is not None: 1112 return self._grad_var.array 1113 return self._grad 1114 1115 @grad.setter 1116 def grad(self, g: tp.Optional[types.NdArray]) -> None: 1117 self._set_grad(g) 1118 1119 def _set_grad(self, g, *, layout_check=True): 1120 if (layout_check 1121 and self._layout is not None 1122 and not ( 1123 _allow_array_access_with_nonstandard_layout())): 1124 raise RuntimeError( 1125 'Cannot directly set the gradient array of a ' 1126 'variable with non-standard layout.') 1127 if g is not None: 1128 _check_grad_type(None, self, False, g) 1129 self._set_grad_without_check(g) 1130 1131 def _set_grad_var_without_check(self, gv): 1132 if self._has_chainerx_array: 1133 self._set_chainerx_grad( 1134 None if gv is None else gv._data[0], 1135 True) 1136 self._grad_var = gv 1137 return 1138 1139 self._grad_var = gv 1140 self._grad = None if gv is None else gv.array 1141 1142 @property 1143 def grad_var(self) -> tp.Optional['Variable']: 1144 """Gradient variable.""" 1145 self._ensure_grad_var_up_to_date() 1146 return self._grad_var 1147 1148 @grad_var.setter 1149 def grad_var(self, g: tp.Optional['Variable']) -> None: 1150 if g is not None: 1151 _check_grad_type(None, self, False, g.array) 1152 self._set_grad_var_without_check(g) 1153 1154 @property 1155 def shape(self): 1156 raw_shape = self._data[0].shape 1157 if self._layout is not None: 1158 # Convert to semantic shape 1159 return chainer.memory_layouts._transpose_shape( 1160 raw_shape, self._layout, None) 1161 return raw_shape 1162 1163 @property 1164 def ndim(self): 1165 return self._data[0].ndim 1166 1167 @property 1168 def size(self): 1169 return self._data[0].size 1170 1171 @property 1172 def dtype(self): 1173 return self._data[0].dtype 1174 1175 @property 1176 def rank(self): 1177 if self._has_chainerx_array: 1178 raise RuntimeError( 1179 'A variable of ChainerX does not provide a node rank.') 1180 return self._node.rank 1181 1182 @property 1183 def node(self): 1184 if self._has_chainerx_array: 1185 raise RuntimeError( 1186 'A variable of ChainerX does not provide a node.') 1187 return self._node 1188 1189 @property 1190 def requires_grad(self): 1191 """It indicates that ``grad`` will be set in backward calculation.""" 1192 return self._requires_grad 1193 1194 @property 1195 def T(self): 1196 """Transposition of this variable.""" 1197 return chainer.functions.transpose(self) 1198 1199 def to_cpu(self): 1200 """Copies the data and gradient arrays to CPU.""" 1201 self.to_device(backend.CpuDevice()) 1202 1203 def to_gpu(self, device=None): 1204 """Copies the data and gradient arrays to specified GPU. 1205 1206 Args: 1207 device: Target device specifier. If omitted, the current device is 1208 used. 1209 1210 """ 1211 cuda.check_cuda_available() 1212 self.to_device(cuda._get_device_or_current(device)) 1213 1214 def to_intel64(self): 1215 """Copies the data and gradient arrays to intel64 specific mdarray. 1216 1217 If the array is not suited for intel64, it will be converted to 1218 :class:`numpy.ndarray`. 1219 """ 1220 intel64.check_ideep_available() 1221 self.to_device(intel64.Intel64Device()) 1222 1223 def to_chx(self): 1224 """Converts the array and gradient to ChainerX arrays without copy. 1225 1226 This method converts the underlying array and gradient to 1227 :class:`chainerx.ndarray` on the same physical device. It does nothing 1228 if the array held by the Variable object is already a ChainerX array. 1229 The new array is a view of the original one. 1230 1231 """ 1232 self._to_chx(allow_unchaining=False) 1233 1234 def _to_chx(self, allow_unchaining): 1235 if not chainerx.is_available(): 1236 raise RuntimeError('ChainerX is not available.') 1237 1238 if self._has_chainerx_array: 1239 return 1240 1241 if not allow_unchaining and self.creator is not None: 1242 raise RuntimeError( 1243 'A variable with a creator cannot be converted into ChainerX ' 1244 'array') 1245 1246 self._to_device( 1247 backend.ChainerxDevice.from_fallback_device(self.device), 1248 allow_unchaining) 1249 1250 def from_chx(self): 1251 """Converts the array and gradient to non-ChainerX arrays without copy. 1252 1253 This method converts the underlying ChainerX array and gradient 1254 residing in either a ``native`` or ``cuda`` device to NumPy or CuPy 1255 arrays respectively, on their same physical device. It does nothing 1256 if the array held by the Variable object is not a ChainerX array. The 1257 new array is a view of the original one. 1258 1259 Raises an error if such a conversion is not supported for the device. 1260 1261 """ 1262 self._from_chx(allow_unchaining=False) 1263 1264 def _from_chx(self, allow_unchaining): 1265 if not self._has_chainerx_array: 1266 return 1267 1268 if not allow_unchaining and self._data[0].is_backprop_required(): 1269 raise RuntimeError( 1270 'Cannot convert from a Variable with a ChainerX array that is ' 1271 'connected to a graph.') 1272 1273 self.to_device(self.device.fallback_device) 1274 1275 def to_device(self, device): 1276 """Copies the data and gradient arrays to specified device. 1277 1278 Args: 1279 device: Target device specifier. See 1280 :func:`~chainer.get_device` for available values. 1281 1282 """ 1283 self._to_device(device, allow_unchaining=False) 1284 1285 def _to_device(self, device, allow_unchaining): 1286 device = chainer.get_device(device) 1287 1288 was_chainerx = self._has_chainerx_array 1289 is_chainerx = device.xp is chainerx 1290 1291 if not allow_unchaining: 1292 if was_chainerx and not is_chainerx: 1293 chx_arr = self._data[0] 1294 if chx_arr is not None and chx_arr.is_backprop_required(): 1295 raise RuntimeError( 1296 'A variable of a ChainerX array which requires ' 1297 'gradients cannot be copied into non-chainerx device ' 1298 '({}).'.format(device)) 1299 elif not was_chainerx and is_chainerx: 1300 arr = self._data[0] 1301 if arr is not None and self.creator is not None: 1302 raise RuntimeError( 1303 'A variable of a non-ChainerX array which is ' 1304 'connected to a graph cannot be copied to a ChainerX ' 1305 'device ({}).'.format(device)) 1306 1307 arr = self._data[0] 1308 grad_var = self.grad_var 1309 1310 if was_chainerx and not is_chainerx: 1311 self._clear_chainerx() 1312 self._node = VariableNode(self, self._chainerx_name) 1313 elif not was_chainerx and is_chainerx: 1314 self._chainerx_name = self._node.name 1315 1316 self._device = device 1317 self._has_chainerx_array = is_chainerx 1318 1319 if arr is None: 1320 return 1321 1322 if backend.get_device_from_array(arr) == device: 1323 return 1324 1325 new_arr = device.send(arr) 1326 if is_chainerx: 1327 if grad_var is None: 1328 new_grad = None 1329 else: 1330 new_grad = device.send(grad_var._data[0]) 1331 self._set_chainerx_array(new_arr, new_grad) 1332 else: 1333 self._data = [new_arr] 1334 if grad_var is not None: 1335 grad_var._to_device(device, allow_unchaining=allow_unchaining) 1336 # _grad has been invalidated by the line above. 1337 self._grad = grad_var.raw_array 1338 1339 # ensure that the node tracks the device migration 1340 node = self._node 1341 if is_chainerx: 1342 # ChainerX itself has own node objects, 1343 # ensure that the node is disconnected with this variable. 1344 if node is not None: 1345 # Disconnect by replacing with an alternative of dead weakref 1346 node._variable = lambda: None 1347 self._node = None 1348 else: 1349 if node._data is not None: 1350 node.retain_data() 1351 1352 def cleargrad(self): 1353 """Clears the gradient array.""" 1354 self.grad_var = None 1355 self._grad_valid = True 1356 1357 def zerograd(self): 1358 """Initializes the gradient array by zeros. 1359 1360 1361 Note that the gradient variable is unchained from the computational 1362 graph by this method, because this operation breaks the backprop 1363 validity. 1364 1365 .. deprecated:: v1.15 1366 Use more efficient :meth:`cleargrads` instead. 1367 1368 """ 1369 warnings.warn( 1370 'Variable.zerograd is deprecated. Use Variable.cleargrad instead.', 1371 DeprecationWarning) 1372 1373 arr = self.array 1374 if arr is None: 1375 self._grad_valid = True 1376 return 1377 1378 if self._has_chainerx_array: 1379 gv = self.grad_var 1380 if gv is None: 1381 self.grad = chainerx.zeros_like( 1382 arr, device=self.device.device) 1383 else: 1384 gv._data[0].fill(0) 1385 else: 1386 with chainer.using_device(self.device): 1387 xp = self.device.xp 1388 if self._grad is None: 1389 self._grad = xp.zeros_like(arr) 1390 self._grad_var = None 1391 else: 1392 gv = self._grad_var 1393 if gv is not None: 1394 gv.unchain() 1395 self._grad.fill(0) 1396 self._grad_valid = True 1397 1398 def copydata(self, var): 1399 """Copies the data array from given source variable. 1400 1401 This method copies the data array from given variable to this variable. 1402 The copy is done even if the arrays reside on different devices, 1403 including across the host and a GPU device. If this variable has an 1404 uninitialized data array, this method initializes it by the data array 1405 of the given variable. Similarly, if the given variable has an 1406 uninitialized data array, this method initializes it by the data array 1407 of this variable (``self``). If both are uninitialized, this method 1408 does nothing. 1409 1410 Args: 1411 var (~chainer.Variable): Source variable. 1412 1413 """ 1414 src = var.array 1415 dst = self.array 1416 if src is None: 1417 if dst is None: 1418 return 1419 var.initialize(self.shape) 1420 src = var.array 1421 elif dst is None: 1422 self.initialize(src.shape) 1423 dst = self.array 1424 backend.copyto(dst, src) 1425 1426 def addgrad(self, var): 1427 """Accumulates the gradient array from given source variable. 1428 1429 This method adds the gradient of a given variable to the gradient of 1430 this variable. The accumulation is even done across the host and 1431 different devices. If this variable has uninitialized data/grad arrays, 1432 this method initializes it with the shape of the given variable and 1433 then accumulates the gradient. 1434 1435 Args: 1436 var (~chainer.Variable): Source variable. 1437 1438 """ 1439 dst_device = self.device 1440 is_chainerx = dst_device.xp is chainerx 1441 1442 if is_chainerx != (var.device.xp is chainerx): 1443 raise RuntimeError( 1444 'Variable.addgrad does not support addition between ' 1445 'gradients on non-ChainerX and ChainerX devices.\n' 1446 'Adding gradient to: {}\n' 1447 'Adding gradient from: {}'.format( 1448 dst_device, var.device)) 1449 1450 if var.grad is None: 1451 return 1452 1453 src = var.grad_var 1454 1455 if self.array is None: 1456 self.initialize(var.shape) 1457 1458 dst = self.grad_var 1459 src_device = src.device 1460 if src_device != dst_device: 1461 src = chainer.functions.copy(src, dst_device) 1462 self.grad_var = src if dst is None else src + dst 1463 1464 def set_creator(self, gen_func): 1465 """Notifies the variable that the given function is its creator. 1466 1467 Args: 1468 gen_func (Function): Function object that creates this variable as 1469 one of its outputs. 1470 1471 """ 1472 if self._has_chainerx_array: 1473 raise RuntimeError( 1474 'A variable of ChainerX does not provide a creator.') 1475 self._node.set_creator(gen_func) 1476 1477 def set_creator_node(self, fnode): 1478 """Notifies the variable that the given node is its creator. 1479 1480 Args: 1481 fnode (FunctionNode): Function node that has this variable as an 1482 output. 1483 1484 """ 1485 if self._has_chainerx_array: 1486 raise RuntimeError( 1487 'A variable of ChainerX does not provide a creator node.') 1488 self._node.set_creator_node(fnode) 1489 1490 def backward(self, retain_grad=False, enable_double_backprop=False, 1491 loss_scale=None): 1492 """Runs error backpropagation (a.k.a.\\ backprop) from this variable. 1493 1494 On backprop, 1495 :meth:`FunctionNode.backward() <chainer.FunctionNode.backward>` 1496 is called on each :class:`~chainer.FunctionNode` object appearing in 1497 the backward graph starting from this variable. 1498 The backward graph is represented by backward 1499 references from variable nodes to their creators, and from function 1500 nodes to their input variable nodes. The backprop stops at all root 1501 nodes. Some function nodes set ``None`` as gradients of some inputs, 1502 where further backprop does not take place at such inputs. 1503 1504 This method uses :data:`grad` as the initial error array. User can 1505 manually set a gradient array before calling this method. 1506 If the shape of :data:`data` is ``()`` (i.e., it is scalar) and 1507 :data:`grad` is ``None``, then this method automatically complements 1508 1.0 as the initial error. This is useful on starting backprop from 1509 some scalar loss value. 1510 1511 From v3, this method supports *differentiable backprop* (a.k.a. double 1512 backprop, grad of grads). To enable it, pass 1513 ``enable_double_backprop=True``. 1514 1515 Args: 1516 retain_grad (bool): If ``True``, the gradient arrays of all 1517 intermediate variables are kept. 1518 Otherwise, :data:`~chainer.Variable.grad` of the 1519 intermediate variables are set to ``None`` on appropriate 1520 timing, which may reduce the maximum memory consumption. 1521 1522 In most cases of training some models, the purpose of backprop 1523 is to compute gradients of parameters, not of all variables, 1524 and therefore it is recommended that this flag be set to 1525 ``False``. 1526 enable_double_backprop (bool): *(Added in v3.0)* If ``True``, 1527 computational trace of the whole backpropagation procedure is 1528 recorded to the computational graph so that one can further do 1529 backpropagation from the resulting gradients. Note that 1530 enabling it results in larger memory consumption needed to 1531 store the gradients w.r.t intermediate variables that are 1532 required for the second gradient computation. 1533 loss_scale (float): Loss scaling factor. Loss scaling is a useful 1534 technique to mitigate vanishing gradient issue that tends to 1535 happen when low precision data type like float16 is used during 1536 training. If you set loss scaling factor, gradients of loss 1537 values are to be multiplied by the factor before backprop 1538 starts. The factor is propagated to whole gradients in a 1539 computational graph along the backprop. The gradients of 1540 parameters are divided by the factor just before the parameters 1541 are to be updated. 1542 """ 1543 if self._has_chainerx_array: 1544 if retain_grad: 1545 raise RuntimeError( 1546 'retain_grad is not supported for ChainerX array.') 1547 arr = self._data[0] 1548 assert isinstance(arr, chainerx.ndarray) 1549 # pybind has issues when converting int -> opt<float> 1550 if loss_scale: 1551 loss_scale = float(loss_scale) 1552 chainerx.backward( 1553 arr, enable_double_backprop=enable_double_backprop, 1554 loss_scale=loss_scale) 1555 return 1556 1557 # Initialize error by 1, if this is a loss variable 1558 if self.array.size == 1 and self.grad_var is None: 1559 if self.array.ndim != 0: 1560 warnings.warn( 1561 'Treating a variable with only one element as a scalar' 1562 ' in Variable.backward is deprecated. A scalar variable' 1563 ' must be a 0-dimensional array. Apply' 1564 ' chainer.functions.squeeze to obtain a scalar variable.' 1565 ' If the size of this variable accidentally becomes one,' 1566 ' set zero to grad.', 1567 DeprecationWarning) 1568 with chainer.using_device(self.device): 1569 self.grad = self.device.xp.ones_like(self.array) 1570 if loss_scale is not None: 1571 self.grad *= loss_scale 1572 1573 node = self.node 1574 grad_var = self.grad_var 1575 self.grad_var = None 1576 1577 with chainer.using_config('enable_backprop', enable_double_backprop): 1578 # TODO(kataoka): The following line should not pass grad_var = None 1579 # to _backprop_to_all, but it is working because grad_var is 1580 # immediately popped away as None = _backprop_utils._reduce([None]) 1581 _backprop._backprop_to_all( 1582 [(node, grad_var)], retain_grad, loss_scale) 1583 1584 def item(self): 1585 """Converts the variable with one element to a Python scalar. 1586 1587 This will incur host-device synchronization. 1588 1589 Returns: 1590 int or float: The element of the array. 1591 1592 """ 1593 return self.array.item() 1594 1595 def mean(self, axis=None, *, weights=None, keepdims=False): 1596 """Calculate weighted average of array elements over a given axis. 1597 1598 .. seealso:: 1599 :func:`chainer.functions.average` for full documentation, 1600 1601 """ 1602 return chainer.functions.average(self, axis, weights, keepdims) 1603 1604 def reshape(self, *shape): 1605 """Returns a variable of a different shape and the same content. 1606 1607 .. seealso:: 1608 :func:`chainer.functions.reshape` for full documentation, 1609 1610 """ 1611 if len(shape) == 1 and isinstance(shape[0], (tuple, list)): 1612 shape = shape[0] 1613 return chainer.functions.reshape(self, shape) 1614 1615 def transpose(self, *axes): 1616 """Permute the dimensions of an input variable without copy. 1617 1618 .. seealso:: 1619 :func:`chainer.functions.transpose` for full documentation. 1620 1621 """ 1622 if len(axes) == 0: 1623 axes = None 1624 elif len(axes) == 1 and (isinstance(axes[0], (tuple, list)) or 1625 axes[0] is None): 1626 axes = axes[0] 1627 return chainer.functions.transpose(self, axes) 1628 1629 def unchain(self): 1630 """Deletes the reference to the creator of this variable. 1631 1632 This method deletes the reference to the creator from the corresponding 1633 variable node. Unlike :meth:`unchain_backward`, it does not backtrack 1634 the graph. 1635 1636 This method is equivalent to ``self.creator_node = None``. 1637 1638 """ 1639 if self._has_chainerx_array: 1640 raise RuntimeError( 1641 'A variable of ChainerX does not provide an unchain method.') 1642 self.creator_node = None 1643 1644 def unchain_backward(self): 1645 """Deletes references between variable nodes and functions backward. 1646 1647 After this method completes, intermediate variable nodes and functions 1648 that are not referenced from anywhere are deallocated by reference 1649 count GC. Also this variable itself deletes the reference to its 1650 creator function from the node, i.e. the node becomes root in the 1651 computation graph. It indicates that backprop after unchaining stops at 1652 this variable. This behavior is useful to implement truncated BPTT. 1653 1654 """ 1655 if self._has_chainerx_array: 1656 raise RuntimeError( 1657 'A variable of ChainerX does not provide an unchain_backward ' 1658 'method.') 1659 cand_funcs = [] 1660 seen_set = set() 1661 1662 def add_cand(cand): 1663 if cand is not None and cand not in seen_set: 1664 cand_funcs.append(cand) 1665 seen_set.add(cand) 1666 1667 add_cand(self.creator_node) 1668 1669 while cand_funcs: 1670 func = cand_funcs.pop() 1671 for var in func.inputs: 1672 add_cand(var.creator_node) 1673 func.unchain() 1674 1675 def retain_data(self): 1676 """Lets the corresponding variable node keep the underlying array.""" 1677 if self._has_chainerx_array: 1678 raise RuntimeError( 1679 'A variable of ChainerX does not provide a retain_data ' 1680 'method.') 1681 self._node.data = self._data[0] 1682 1683 def _error_nobp_op(self, op): 1684 raise TypeError( 1685 'Variables do not support {} operator. ' 1686 'You could use `array` attribute instead.'.format(op)) 1687 1688 def __lt__(self, other): 1689 """This operator is not supported in Variables.""" 1690 self._error_nobp_op('<') 1691 1692 def __le__(self, other): 1693 """This operator is not supported in Variables.""" 1694 self._error_nobp_op('<=') 1695 1696 def __eq__(self, other): 1697 """This operator is not supported in Variables.""" 1698 self._error_nobp_op('==') 1699 1700 def __ne__(self, other): 1701 """This operator is not supported in Variables.""" 1702 self._error_nobp_op('!=') 1703 1704 def __gt__(self, other): 1705 """This operator is not supported in Variables.""" 1706 self._error_nobp_op('>') 1707 1708 def __ge__(self, other): 1709 """This operator is not supported in Variables.""" 1710 self._error_nobp_op('>=') 1711 1712 def __nonzero__(self): 1713 """This operator is not supported in Variables.""" 1714 # Python 2.x 1715 raise TypeError( 1716 'Variables cannot be evaluated as Python bool.') 1717 1718 def __bool__(self): 1719 """This operator is not supported in Variables.""" 1720 # Python 3.x 1721 raise TypeError( 1722 'Variables cannot be evaluated as Python bool.') 1723 1724 __array_priority__ = 200 # type: int 1725 __hash__ = None # type: tp.Callable[[object], int] 1726 1727 1728class Parameter(Variable): 1729 1730 """Parameter variable that can be registered to a link. 1731 1732 Parameter is a subclass of :class:`Variable`. It almost behaves as same 1733 as a usual variable except that a parameter can be registered to a 1734 :class:`~chainer.Link` object just by assigning it to an attribute of 1735 the link within an :meth:`~chainer.Link.init_scope` context. 1736 1737 Parameter also supports an initialization by an initializer. It can have 1738 two initializers: one for the data array, and the other for the gradient 1739 array. The initializer only specifies the way of filling the elements of 1740 these arrays, and the shape information is specified at the initialization 1741 point. 1742 1743 When a link that the parameter has been registered to is passed to an 1744 :class:`~chainer.GradientMethod`, an update rule is set to the parameter. 1745 This update rule specifies how to update the data array of the parameter 1746 using its gradient array. 1747 1748 Args: 1749 initializer (~chainer.Initializer or :ref:`ndarray`): 1750 Initializer of the data array. If ``shape`` is given, this 1751 initializer is immediately used to initialize the data array. 1752 Otherwise, if it is an array, it is immediately used as the data 1753 array, and otherwise the data array is left uninitialized and will 1754 be initialized by this initializer in :meth:`initialize`. It can 1755 also be a scalar, in which case the data array will be filled by 1756 this scalar. Note that float32 is used in this case. 1757 shape (int or tuple of int or None): Shape of the parameter. If it is 1758 ``None``, the initialization is deferred to the call of 1759 :meth:`initialize`. 1760 name (str): Name of the parameter. 1761 1762 Attributes: 1763 initializer: Initializer of the data array. It is used for 1764 initializing the data array of an uninitialized variable. 1765 update_rule: :class:`~chainer.optimizer.UpdateRule` instance that 1766 updates this variable as a parameter. This argument is set to 1767 :attr:`update_rule`. 1768 1769 """ 1770 1771 initializer = None # type: tp.Optional[tp.Union[tp.Optional[types.AbstractInitializer], types.NdArray]] # NOQA 1772 # TODO(okapies): fix the behavior when shape is None and remove NdArray 1773 _grad_initializer = None # type: tp.Optional[types.AbstractInitializer] 1774 1775 def __init__( 1776 self, 1777 initializer: tp.Optional[types.InitializerSpec] = None, 1778 shape: tp.Optional[types.ShapeSpec] = None, 1779 name: tp.Optional[str] = None, 1780 *, 1781 layout=None 1782 ) -> None: 1783 if initializer is None: 1784 initializer = constant.NaN() 1785 elif numpy.isscalar(initializer): 1786 initializer = constant.Constant(initializer) 1787 if shape is None: 1788 if isinstance(initializer, chainer.get_array_types()): 1789 # parameter initialized by the initial array 1790 super(Parameter, self).__init__( 1791 initializer, name=name, layout=layout) 1792 else: 1793 # uninitialized parameter 1794 super(Parameter, self).__init__( 1795 name=name, _grad_valid=False, layout=layout) 1796 dtype = getattr(initializer, 'dtype', None) 1797 self._grad_initializer = constant.NaN(dtype) 1798 else: 1799 # parameter initialized with a given shape 1800 if isinstance(initializer, chainer.get_array_types()): 1801 xp = backend.get_array_module(initializer) 1802 initializer = constant.Constant(initializer) 1803 else: 1804 xp = numpy 1805 data = initializers.generate_array(initializer, shape, xp) # type: ignore # NOQA 1806 grad = xp.full_like(data, numpy.nan) 1807 super(Parameter, self).__init__( 1808 data, name=name, grad=grad, layout=layout) 1809 1810 self._initial_device = backend.CpuDevice() 1811 self.update_rule = None 1812 self.initializer = initializer 1813 1814 def __copy__(self): 1815 return self._copy_to(Parameter()) 1816 1817 def __reduce__(self): 1818 args = ( 1819 self.array, self.name, self._grad, self._grad_valid, 1820 self.initializer, self.update_rule, self.device) 1821 return _recover_parameter, args 1822 1823 @property 1824 def is_initialized(self): 1825 return self._data[0] is not None 1826 1827 @property 1828 def dtype(self): 1829 array = self._data[0] 1830 if array is not None: 1831 return array.dtype 1832 # uninitialized 1833 initializer = self.initializer 1834 if hasattr(initializer, 'dtype'): 1835 return numpy.dtype(initializer.dtype) 1836 raise RuntimeError( 1837 'Dtype of the parameter is not determined yet because it\'s ' 1838 'uninitialized and dtype was not explicitly given.') 1839 1840 def to_cpu(self): 1841 return self.to_device(backend.CpuDevice()) 1842 1843 def to_gpu(self, device=None): 1844 device = chainer.get_device(cuda._get_device_or_current(device)) 1845 assert device.xp is cuda.cupy 1846 self.to_device(device) 1847 1848 def to_intel64(self): 1849 self.to_device(intel64.Intel64Device()) 1850 1851 def to_chx(self): 1852 if not chainerx.is_available(): 1853 raise RuntimeError('ChainerX is not available.') 1854 1855 # Derive the target ChainerX device from the array if it is 1856 # initialized. Otherwise, from the current initial device. 1857 if self.array is not None: 1858 device = backend.get_device_from_array(self.array) 1859 else: 1860 device = self._initial_device 1861 1862 if device.xp is numpy: 1863 self._initial_device = backend.ChainerxDevice( 1864 chainerx.get_device('native:0')) 1865 elif device.xp is cuda.cupy: 1866 self._initial_device = backend.ChainerxDevice( 1867 chainerx.get_device('cuda:{}'.format(device.device.id))) 1868 1869 super(Parameter, self)._to_chx(allow_unchaining=True) 1870 1871 def from_chx(self): 1872 if self.array is not None: 1873 device = backend.get_device_from_array(self.array) 1874 else: 1875 device = self._initial_device 1876 1877 if device.xp is chainerx: 1878 backend_name = device.device.backend.name 1879 if backend_name == 'native': 1880 self._initial_device = backend.CpuDevice() 1881 elif backend_name == 'cuda': 1882 self._initial_device = backend.GpuDevice.from_device_id( 1883 device.device.index) 1884 1885 super(Parameter, self)._from_chx(allow_unchaining=True) 1886 1887 def to_device(self, device): 1888 device = chainer.get_device(device) 1889 if self._data[0] is None and self._initial_device != device: 1890 self._data = [None] # Renew placeholder to break sharing 1891 self._has_chainerx_array = False 1892 self._initial_device = device 1893 super(Parameter, self)._to_device(device, allow_unchaining=True) 1894 1895 def cleargrad(self): 1896 super(Parameter, self).cleargrad() 1897 if not self.is_initialized: 1898 self._grad_initializer = None 1899 1900 def zerograd(self): 1901 super(Parameter, self).zerograd() 1902 if not self.is_initialized: 1903 dtype = getattr(self.initializer, 'dtype', None) 1904 self._grad_initializer = initializers.Zero(dtype) 1905 1906 def initialize(self, shape): 1907 """Initializes the uninitialized variable. 1908 1909 Uninitialized variable is a variable created with the data array set to 1910 None. This method creates and initializes the data array. The shape of 1911 the variable can be left unknown until this method is called. 1912 1913 Args: 1914 shape (tuple of int): Shape of the data array. 1915 1916 """ 1917 device = self._initial_device 1918 assert device is not None 1919 xp = device.xp 1920 1921 data = initializers.generate_array( 1922 self.initializer, shape, xp, device=device) 1923 data = chainer.memory_layouts._transpose_array(data, None, self.layout) 1924 1925 if self._grad_initializer is None: 1926 grad = None 1927 else: 1928 grad = initializers.generate_array( 1929 self._grad_initializer, shape, xp, device=device) 1930 grad = chainer.memory_layouts._transpose_array( 1931 grad, None, self.layout) 1932 1933 self._set_array(data, layout_check=False) 1934 self._set_grad(grad, layout_check=False) 1935 1936 # Convert the array for iDeep. 1937 # TODO(niboshi): This could be done in generate_array(). 1938 if isinstance(self._initial_device, intel64.Intel64Device): 1939 self.to_intel64() 1940 1941 def update(self): 1942 """Updates the data array using the gradient and the update rule. 1943 1944 This method updates the parameter using the attached update rule. 1945 1946 """ 1947 if self.update_rule is not None: 1948 if not self.update_rule.is_elementwise: 1949 if self.layout is not None: 1950 raise RuntimeError( 1951 'Parameter with a non-standard layout cannot be ' 1952 'updated with a non-elementwise update rule ' 1953 '({}).'.format(self.update_rule)) 1954 self.update_rule.update(self) 1955 1956 1957def as_variable(obj): 1958 """Converts an array or a variable into :class:`~chainer.Variable`. 1959 1960 This is a convenient function to get a :class:`~chainer.Variable` object 1961 transparently from a raw array or a variable. 1962 1963 Note that this function should only be used for type consistency (i.e., to 1964 enforce the return value of an API having type :class:`~chainer.Variable`). 1965 The :class:`~chainer.Variable.requires_grad` flag is kept as is; if ``obj`` 1966 is a raw array, the newly created variable has ``requires_grad = False``. 1967 In order to make a variable w.r.t. which you want to compute the gradient, 1968 you should use :class:`~chainer.Variable` directly. 1969 1970 Args: 1971 obj (:ref:`ndarray` or ~chainer.Variable): An array or 1972 a variable that you want to convert to :class:`~chainer.Variable`. 1973 1974 Returns: 1975 ~chainer.Variable: 1976 A variable converted from ``obj``. If ``obj`` is a raw array, this is a 1977 new :class:`~chainer.Variable` object that wraps the array. If ``obj`` 1978 is already a :class:`~chainer.Variable` object, this function returns 1979 ``obj`` as is. 1980 1981 """ 1982 if isinstance(obj, Variable): 1983 return obj 1984 1985 if isinstance(obj, chainerx.ndarray): 1986 requires_grad = obj.is_backprop_required() 1987 else: 1988 requires_grad = False 1989 return Variable(obj, requires_grad=requires_grad) 1990 1991 1992def as_array(obj): 1993 """Returns the underlying array from a variable or an array. 1994 1995 This is a convenient function to get the underlying array object 1996 transparently from an object that could be either a variable or an array. 1997 1998 Args: 1999 obj (:ref:`ndarray` or ~chainer.Variable): An array or a variable. 2000 2001 Returns: 2002 :ref:`ndarray` or ~chainer.Variable: 2003 The underlying array object of the argument. 2004 2005 """ 2006 if isinstance(obj, Variable): 2007 return obj.array 2008 return obj 2009 2010 2011def _recover_parameter(*args): 2012 if len(args) == 7: 2013 # latest 2014 data, name, grad, grad_valid, initializer, update_rule, device = args 2015 elif len(args) == 6: 2016 data, name, grad, initializer, update_rule, device = args 2017 grad_valid = True 2018 else: 2019 assert False, len(args) 2020 2021 p = Parameter(initializer=initializer, name=name) 2022 p.array = data 2023 p._grad = grad 2024 p._grad_valid = grad_valid 2025 p.update_rule = update_rule 2026 p.to_device(device) 2027 return p 2028 2029 2030class _ChainerxVariableNodeProps(object): 2031 2032 def __init__(self, x): 2033 self.shape = x.shape 2034 self.dtype = x.dtype 2035 2036 2037class _AllowArrayAccessWithNonstandardLayout: 2038 """Context manager within which access to Variable.array is allowed for \ 2039variables with a non-standard layout.""" 2040 2041 def __enter__(self): 2042 self._old = _allow_array_access_with_nonstandard_layout() 2043 _thread_local.allow_array_access_with_nonstandard_layout = True 2044 2045 def __exit__(self, typ, value, traceback): 2046 _thread_local.allow_array_access_with_nonstandard_layout = self._old 2047 2048 2049def _allow_array_access_with_nonstandard_layout(): 2050 # Returns wether a thread-local variable 2051 # `allow_array_access_with_nonstandard_layout` is set to True. 2052 try: 2053 return _thread_local.allow_array_access_with_nonstandard_layout 2054 except AttributeError: 2055 return False 2056