1 #ifndef ACTIVATION_SOFTMAX_H_
2 #define ACTIVATION_SOFTMAX_H_
3 
4 namespace MiniDNN
5 {
6 
7 
8 ///
9 /// \ingroup Activations
10 ///
11 /// The softmax activation function
12 ///
13 class Softmax
14 {
15     private:
16         typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix;
17         typedef Eigen::Array<Scalar, 1, Eigen::Dynamic> RowArray;
18 
19     public:
20         // a = activation(z) = softmax(z)
21         // Z = [z1, ..., zn], A = [a1, ..., an], n observations
activate(const Matrix & Z,Matrix & A)22         static inline void activate(const Matrix& Z, Matrix& A)
23         {
24             A.array() = (Z.rowwise() - Z.colwise().maxCoeff()).array().exp();
25             RowArray colsums = A.colwise().sum();
26             A.array().rowwise() /= colsums;
27         }
28 
29         // Apply the Jacobian matrix J to a vector f
30         // J = d_a / d_z = diag(a) - a * a'
31         // g = J * f = a .* f - a * (a' * f) = a .* (f - a'f)
32         // Z = [z1, ..., zn], G = [g1, ..., gn], F = [f1, ..., fn]
33         // Note: When entering this function, Z and G may point to the same matrix
apply_jacobian(const Matrix & Z,const Matrix & A,const Matrix & F,Matrix & G)34         static inline void apply_jacobian(const Matrix& Z, const Matrix& A,
35                                           const Matrix& F, Matrix& G)
36         {
37             RowArray a_dot_f = A.cwiseProduct(F).colwise().sum();
38             G.array() = A.array() * (F.array().rowwise() - a_dot_f);
39         }
40 
return_type()41         static std::string return_type()
42         {
43             return "Softmax";
44         }
45 };
46 
47 
48 } // namespace MiniDNN
49 
50 
51 #endif /* ACTIVATION_SOFTMAX_H_ */
52