1 #ifndef ACTIVATION_SOFTMAX_H_ 2 #define ACTIVATION_SOFTMAX_H_ 3 4 namespace MiniDNN 5 { 6 7 8 /// 9 /// \ingroup Activations 10 /// 11 /// The softmax activation function 12 /// 13 class Softmax 14 { 15 private: 16 typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix; 17 typedef Eigen::Array<Scalar, 1, Eigen::Dynamic> RowArray; 18 19 public: 20 // a = activation(z) = softmax(z) 21 // Z = [z1, ..., zn], A = [a1, ..., an], n observations activate(const Matrix & Z,Matrix & A)22 static inline void activate(const Matrix& Z, Matrix& A) 23 { 24 A.array() = (Z.rowwise() - Z.colwise().maxCoeff()).array().exp(); 25 RowArray colsums = A.colwise().sum(); 26 A.array().rowwise() /= colsums; 27 } 28 29 // Apply the Jacobian matrix J to a vector f 30 // J = d_a / d_z = diag(a) - a * a' 31 // g = J * f = a .* f - a * (a' * f) = a .* (f - a'f) 32 // Z = [z1, ..., zn], G = [g1, ..., gn], F = [f1, ..., fn] 33 // Note: When entering this function, Z and G may point to the same matrix apply_jacobian(const Matrix & Z,const Matrix & A,const Matrix & F,Matrix & G)34 static inline void apply_jacobian(const Matrix& Z, const Matrix& A, 35 const Matrix& F, Matrix& G) 36 { 37 RowArray a_dot_f = A.cwiseProduct(F).colwise().sum(); 38 G.array() = A.array() * (F.array().rowwise() - a_dot_f); 39 } 40 return_type()41 static std::string return_type() 42 { 43 return "Softmax"; 44 } 45 }; 46 47 48 } // namespace MiniDNN 49 50 51 #endif /* ACTIVATION_SOFTMAX_H_ */ 52