1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements.  See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership.  The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License.  You may obtain a copy of the License at
9  *
10  *   http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied.  See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
20 /*!
21  * \file softmax_activation-inl.h
22  * \brief SoftmaxActivation operator
23  * \author Junyuan Xie, Da Zheng
24 */
25 #ifndef MXNET_OPERATOR_NN_SOFTMAX_ACTIVATION_INL_H_
26 #define MXNET_OPERATOR_NN_SOFTMAX_ACTIVATION_INL_H_
27 
28 #include <dmlc/logging.h>
29 #include <dmlc/parameter.h>
30 #include <mxnet/operator.h>
31 #include <cstring>
32 #include <map>
33 #include <string>
34 #include <vector>
35 #include <utility>
36 #include "../operator_common.h"
37 
38 namespace mxnet {
39 namespace op {
40 // Declare enumeration of input order to make code more intuitive.
41 // These enums are only visible within this header
42 namespace softmax_activation {
43 enum SoftmaxActivationOpInputs {kData};
44 enum SoftmaxActivationOpOutputs {kOut};
45 enum SoftmaxActivationOpType {kInstance, kChannel};
46 enum SoftmaxActivationOpResource {kTempSpace};
47 }  // softmax_activation
48 
49 struct SoftmaxActivationParam : public dmlc::Parameter<SoftmaxActivationParam> {
50   // use int for enumeration
51   int mode;
DMLC_DECLARE_PARAMETERSoftmaxActivationParam52   DMLC_DECLARE_PARAMETER(SoftmaxActivationParam) {
53     DMLC_DECLARE_FIELD(mode)
54     .add_enum("instance", softmax_activation::kInstance)
55     .add_enum("channel", softmax_activation::kChannel)
56     .set_default(softmax_activation::kInstance)
57     .describe("Specifies how to compute the softmax. If set to ``instance``, "
58               "it computes softmax for each instance. If set to ``channel``, "
59               "It computes cross channel softmax for each position of each instance.");
60   }
61 };
62 
63 template<typename xpu>
SoftmaxActivationCompute(const nnvm::NodeAttrs & attrs,const OpContext & ctx,const std::vector<TBlob> & inputs,const std::vector<OpReqType> & reqs,const std::vector<TBlob> & outputs)64 void SoftmaxActivationCompute(const nnvm::NodeAttrs& attrs,
65                               const OpContext& ctx,
66                               const std::vector<TBlob>& inputs,
67                               const std::vector<OpReqType>& reqs,
68                               const std::vector<TBlob>& outputs) {
69   using namespace mshadow;
70   using namespace mshadow::expr;
71   const SoftmaxActivationParam& param = nnvm::get<SoftmaxActivationParam>(attrs.parsed);
72   CHECK_EQ(inputs.size(), 1U);
73   CHECK_EQ(outputs.size(), 1U);
74   const TBlob &in_data = inputs[softmax_activation::kData];
75   const TBlob &out_data = outputs[softmax_activation::kOut];
76   Stream<xpu> *s = ctx.get_stream<xpu>();
77   if (param.mode == softmax_activation::kInstance) {
78     Tensor<xpu, 2> data = in_data.FlatTo2D<xpu, real_t>(s);
79     Tensor<xpu, 2> out = out_data.FlatTo2D<xpu, real_t>(s);
80     Softmax(out, data);
81   } else {
82     CHECK_GE(in_data.ndim(), 3)
83         << "Input need to have a least 3 dimensions when mode=channel";
84     index_t n = in_data.size(0);
85     index_t k = in_data.size(1);
86     Shape<3> s3 = Shape3(n, k, static_cast<index_t>(in_data.Size()/n/k));
87     Tensor<xpu, 3, real_t> data = in_data.get_with_shape<xpu, 3, real_t>(s3, s);
88     Tensor<xpu, 3, real_t> out = out_data.get_with_shape<xpu, 3, real_t>(s3, s);
89     Softmax(out, data);
90   }
91 }
92 
93 template<typename xpu>
SoftmaxActivationGradCompute(const nnvm::NodeAttrs & attrs,const OpContext & ctx,const std::vector<TBlob> & inputs,const std::vector<OpReqType> & reqs,const std::vector<TBlob> & outputs)94 void SoftmaxActivationGradCompute(const nnvm::NodeAttrs& attrs,
95                                   const OpContext& ctx,
96                                   const std::vector<TBlob>& inputs,
97                                   const std::vector<OpReqType>& reqs,
98                                   const std::vector<TBlob>& outputs) {
99   using namespace mshadow;
100   using namespace mshadow::expr;
101   CHECK_EQ(inputs.size(), 2U);
102   CHECK_EQ(outputs.size(), 1);
103   CHECK_EQ(reqs.size(), 1);
104   const TBlob &out_grad = inputs[0];
105   const TBlob &out_data = inputs[1];
106   const OpReqType &req = reqs[0];
107   const TBlob &in_grad = outputs[0];
108   // Use 3d tensor for both mode -> {instance, channel}. Get shapes
109   index_t total_size = in_grad.Size();
110   index_t batch_size = in_grad.shape_[0];
111   index_t channel_num = in_grad.shape_[1];
112   index_t rest_size = total_size / (batch_size * channel_num);
113   const Shape<3> data_shape = Shape3(batch_size, channel_num, rest_size);
114   // Get tensors
115   Stream<xpu> *s = ctx.get_stream<xpu>();
116   Tensor<xpu, 3> m_out_grad =
117       out_grad.get_with_shape<xpu, 3, real_t>(data_shape, s);
118   Tensor<xpu, 3> m_out_data =
119       out_data.get_with_shape<xpu, 3, real_t>(data_shape, s);
120   Tensor<xpu, 3> m_in_grad =
121       in_grad.get_with_shape<xpu, 3, real_t>(data_shape, s);
122   // get requested temp space
123   Tensor<xpu, 2> workspace = ctx.requested[softmax_activation::kTempSpace].get_space<xpu>(
124       Shape2(batch_size, rest_size), s);
125   workspace = reduce_with_axis<red::sum, false>(m_out_grad * m_out_data, 1);
126   Assign(m_in_grad, req,
127          m_out_data * (m_out_grad - broadcast_with_axis(workspace, 0, channel_num)));
128 }
129 
130 }  // namespace op
131 }  // namespace mxnet
132 #endif  // MXNET_OPERATOR_NN_SOFTMAX_ACTIVATION_INL_H_
133