Lines Matching refs:num_heads
107 hidden_size=512, num_heads=4, activation='relu', scaled=True, dropout=0.0, argument
112 self._num_heads = num_heads
121 assert units % num_heads == 0
173 units=128, hidden_size=2048, num_heads=4, scaled=True, dropout=0.0, argument
180 assert units % num_heads == 0, 'In TransformerDecoder, the units should be divided ' \
182 'num_heads={}'.format(units, num_heads)
188 self._num_heads = num_heads
230 d_head=units // num_heads, num_heads=num_heads, scaled=scaled,
234 hidden_size=hidden_size, num_heads=num_heads,
531 def __init__(self, vocab_size, num_layers=2, units=128, hidden_size=2048, num_heads=4, argument
537 assert units % num_heads == 0, 'In TransformerDecoder, the units should be divided ' \
539 'num_heads={}'.format(units, num_heads)
544 self._num_heads = num_heads
560 d_head=units // num_heads, num_heads=num_heads, scaled=scaled,
564 num_heads=num_heads, activation=activation, layer_norm_eps=1e-12,