1from chainer.functions.normalization import layer_normalization 2from chainer import link 3from chainer import utils 4from chainer import variable 5 6 7class LayerNormalization(link.Link): 8 9 """Layer normalization layer on outputs of linear functions. 10 11 .. warning:: 12 13 This feature is experimental. The interface can change in the future. 14 15 This link implements a "layer normalization" layer 16 which normalizes the input units by statistics 17 that are computed along the second axis, 18 scales and shifts them. 19 Parameter initialization will be deferred until 20 the first forward data pass at which time the size will be determined. 21 22 23 Args: 24 size (int): Size of input units. If ``None``, parameter initialization 25 will be deferred until the first forward data pass at which time 26 the size will be determined. 27 eps (float): Epsilon value for numerical stability of normalization. 28 initial_gamma (~chainer.Initializer): Initializer for scaling vector. 29 If ``None``, then the vector is filled by 1. 30 If a scalar, the vector is filled by it. 31 If ``numpy.ndarray``, the vector is set by it. 32 initial_beta (~chainer.Initializer): Initializer for shifting vector. 33 If ``None``, then the vector is filled by 0. 34 If a scalar, the vector is filled by it. 35 If ``numpy.ndarray``, the vector is set by it. 36 37 Attributes: 38 gamma (~chainer.Parameter): Scaling parameter. 39 beta (~chainer.Parameter): Shifting parameter. 40 eps (float): Epsilon value for numerical stability. 41 42 See: `Layer Normalization <https://arxiv.org/abs/1607.06450>`_ 43 """ 44 45 def __init__(self, size=None, eps=1e-6, initial_gamma=None, 46 initial_beta=None): 47 super(LayerNormalization, self).__init__() 48 if initial_gamma is None: 49 initial_gamma = 1 50 if initial_beta is None: 51 initial_beta = 0 52 53 with self.init_scope(): 54 self.gamma = variable.Parameter(initial_gamma) 55 self.beta = variable.Parameter(initial_beta) 56 self.eps = eps 57 58 if size is not None: 59 self._initialize_params(size) 60 61 def _initialize_params(self, size): 62 self.gamma.initialize(size) 63 self.beta.initialize(size) 64 65 def forward(self, x): 66 """Apply layer normalization to given input. 67 68 Args: 69 x (~chainer.Variable): Batch vectors. 70 Shape of this value must be `(batch_size, unit_size)`, 71 e.g., the output of :func:`~chainer.functions.linear`. 72 73 Returns: 74 ~chainer.Variable: Output of the layer normalization. 75 76 """ 77 if self.gamma.array is None: 78 in_size = utils.size_of_shape(x.shape[1:]) 79 self._initialize_params(in_size) 80 81 return layer_normalization.layer_normalization( 82 x, self.gamma, self.beta, self.eps) 83