1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * "License"); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, 13 * software distributed under the License is distributed on an 14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 * KIND, either express or implied. See the License for the 16 * specific language governing permissions and limitations 17 * under the License. 18 */ 19 #include <float.h> 20 #include <atomic> 21 #include "./mxnet_op.h" 22 #include "./mshadow_op.h" 23 #include "./tensor/init_op.h" 24 #include "./operator_tune-inl.h" 25 #include "./tensor/elemwise_binary_broadcast_op.h" 26 27 namespace mxnet { 28 namespace op { 29 30 /*! 31 * \brief Shared static variables for all OperatorTune data types 32 */ 33 std::atomic<bool> OperatorTuneBase::calculated_(false); 34 bool OperatorTuneBase::verbose_tuning_info_ = false; 35 double OperatorTuneBase::tuning_weight_scale_ = 0.0; 36 37 /*! 38 * \brief Instantiate static variables for OperatorTune<DType>, where 'DType' is specified 39 */ 40 #define IMPLEMENT_OPERATOR_TUNE_STATICS_FOR_TYPE(__typ$) \ 41 template<> bool OperatorTune<__typ$>::initialized_ = false; \ 42 template<> std::unique_ptr<__typ$[]> OperatorTune<__typ$>::data_set_ = nullptr; \ 43 template<> volatile tune::TuningMode OperatorTuneByType<__typ$>::tuning_mode_ = tune::kAuto; \ 44 template<> volatile int OperatorTune<__typ$>::volatile_int_ = 9; /* arbitrary number */ \ 45 template<> std::unordered_set<std::string> OperatorTune<__typ$>::operator_names_({}); \ 46 template<> bool OperatorTune<__typ$>::output_tuning_data_ = false; \ 47 template<> std::list<void (*)()> *OperatorTune<__typ$>::GetTuningList() { \ 48 static std::list<void (*)()> ll; \ 49 return ≪ \ 50 } 51 52 /*! 53 * \brief Static variables for different types (ie OperatorTune<float>, OperatorTune<double>, etc. 54 */ 55 IMPLEMENT_OPERATOR_TUNE_STATICS_FOR_TYPE(float); 56 IMPLEMENT_OPERATOR_TUNE_STATICS_FOR_TYPE(double); 57 IMPLEMENT_OPERATOR_TUNE_STATICS_FOR_TYPE(mshadow::half::half_t); 58 IMPLEMENT_OPERATOR_TUNE_STATICS_FOR_TYPE(mshadow::bfloat::bf16_t); 59 IMPLEMENT_OPERATOR_TUNE_STATICS_FOR_TYPE(int8_t); 60 IMPLEMENT_OPERATOR_TUNE_STATICS_FOR_TYPE(uint8_t); 61 IMPLEMENT_OPERATOR_TUNE_STATICS_FOR_TYPE(int32_t); 62 IMPLEMENT_OPERATOR_TUNE_STATICS_FOR_TYPE(int64_t); 63 IMPLEMENT_OPERATOR_TUNE_STATICS_FOR_TYPE(bool); 64 65 /*! 66 * \brief Init variable used to facilitate registering a tunable operator during 67 * static initialization 68 * \tparam OP Operator type 69 * \tparam DType Data type 70 */ 71 template<typename OP, typename DType> 72 struct static_init_var { 73 static bool init_; 74 }; 75 76 /*! 77 * \brief Repeat the given macro and associated arguments for each data type, 78 * appending the data type to the end of the arguments 79 */ 80 #define MSHADOW_MACRO_FOREACH_TYPE(__macro$, ...) \ 81 __macro$(__VA_ARGS__, float); \ 82 __macro$(__VA_ARGS__, double); \ 83 __macro$(__VA_ARGS__, mshadow::half::half_t); \ 84 __macro$(__VA_ARGS__, mshadow::bfloat::bf16_t); \ 85 __macro$(__VA_ARGS__, uint8_t); \ 86 __macro$(__VA_ARGS__, int8_t); \ 87 __macro$(__VA_ARGS__, int32_t); \ 88 __macro$(__VA_ARGS__, int64_t); 89 90 #define MSHADOW_MACRO_FOREACH_TYPE_WITH_BOOL(__macro$, ...) \ 91 __macro$(__VA_ARGS__, float); \ 92 __macro$(__VA_ARGS__, double); \ 93 __macro$(__VA_ARGS__, mshadow::half::half_t); \ 94 __macro$(__VA_ARGS__, mshadow::bfloat::bf16_t); \ 95 __macro$(__VA_ARGS__, uint8_t); \ 96 __macro$(__VA_ARGS__, int8_t); \ 97 __macro$(__VA_ARGS__, int32_t); \ 98 __macro$(__VA_ARGS__, int64_t); \ 99 __macro$(__VA_ARGS__, bool) 100 101 #define IMPLEMENT_WORKLOAD_VALUE_FOR_TYPE(__op$, __typ$) \ 102 namespace mxnet_op { \ 103 template<> std::vector<float> mxnet::op::mxnet_op::tuned_op<__op$, __typ$>::workload_ = \ 104 { static_cast<float>(INT_MAX >> 3) }; \ 105 } /* namespace mxnet_op */ 106 /*! 107 * \brief Implement tuning objects for a forward blank (no arguments) kernel operator 108 */ 109 #define _IMPLEMENT_BLANK_WORKLOAD_FWD(__op$, __typ$) \ 110 IMPLEMENT_WORKLOAD_VALUE_FOR_TYPE(__op$, __typ$); \ 111 namespace mxnet_op { \ 112 template<> bool ::mxnet::op::mxnet_op::tuned_op<__op$, __typ$>::UseOMP( \ 113 size_t N, size_t omp_threads) { \ 114 return ::mxnet::op::UnaryOpTune<__typ$>::UseOMP<mxnet_op::tuned_op<__op$, __typ$>>( \ 115 N, omp_threads); \ 116 }} /* namespace mxnet_op */ \ 117 template<> bool static_init_var<__op$, __typ$>::init_ = \ 118 ::mxnet::op::OperatorTune<__typ$>::ScheduleTune<__op$>( \ 119 ::mxnet::op::UnaryOpTune<__typ$>::TuneBlankOperatorEx<__op$>) 120 121 /*! 122 * \brief Implement tuning objects for a forward unary kernel operator 123 */ 124 #define _IMPLEMENT_UNARY_WORKLOAD_FWD(__op$, __typ$) \ 125 IMPLEMENT_WORKLOAD_VALUE_FOR_TYPE(__op$, __typ$); \ 126 namespace mxnet_op { \ 127 template<> bool ::mxnet::op::mxnet_op::tuned_op<__op$, __typ$>::UseOMP( \ 128 size_t N, size_t omp_threads) { \ 129 return ::mxnet::op::UnaryOpTune<__typ$>::UseOMP<mxnet_op::tuned_op<__op$, __typ$>>( \ 130 N, omp_threads); \ 131 }} /* namespace mxnet_op */ \ 132 template<> bool static_init_var<__op$, __typ$>::init_ = \ 133 ::mxnet::op::OperatorTune<__typ$>::ScheduleTune<__op$>( \ 134 ::mxnet::op::UnaryOpTune<__typ$>::TuneUnaryOperator<__op$>) 135 136 /*! 137 * \brief Implement tuning objects for a backward unary kernel operator 138 */ 139 #define _IMPLEMENT_UNARY_WORKLOAD_BWD(__op$, __typ$) \ 140 IMPLEMENT_WORKLOAD_VALUE_FOR_TYPE(::mxnet::op::mxnet_op::backward_grad_tuned<__op$>, __typ$); \ 141 namespace mxnet_op { \ 142 template<> \ 143 bool ::mxnet::op::mxnet_op::tuned_op<::mxnet::op::mxnet_op::backward_grad_tuned<__op$>, __typ$>::\ 144 UseOMP(size_t N, size_t omp_threads) { \ 145 return ::mxnet::op::UnaryOpTune<__typ$>::UseOMP<mxnet_op::tuned_op< \ 146 ::mxnet::op::mxnet_op::backward_grad_tuned<__op$>, __typ$>>(N, omp_threads); \ 147 }} /* namespace mxnet_op */ \ 148 template<> bool static_init_var<::mxnet::op::mxnet_op::backward_grad_tuned<__op$>, __typ$>:: \ 149 init_ = ::mxnet::op::OperatorTune<__typ$>::ScheduleTune<__op$>( \ 150 ::mxnet::op::UnaryOpTune<__typ$>::TuneUnaryBackwardOperator<__op$>) 151 152 /*! 153 * \brief Implement tuning objects for a forward binary kernel operator 154 */ 155 #define _IMPLEMENT_BINARY_WORKLOAD_FWD(__op$, __typ$) \ 156 IMPLEMENT_WORKLOAD_VALUE_FOR_TYPE(__op$, __typ$); \ 157 namespace mxnet_op { \ 158 template<> bool ::mxnet::op::mxnet_op::tuned_op<__op$, __typ$>::UseOMP( \ 159 size_t N, size_t omp_threads) { \ 160 return ::mxnet::op::BinaryOpTune<__typ$>::UseOMP<mxnet_op::tuned_op<__op$, __typ$>>( \ 161 N, omp_threads); \ 162 }} /* namespace mxnet_op */ \ 163 template<> bool static_init_var<__op$, __typ$>::init_ = \ 164 ::mxnet::op::OperatorTune<__typ$>::ScheduleTune<__op$>( \ 165 ::mxnet::op::BinaryOpTune<__typ$>::TuneBinaryOperator<__op$>) 166 167 /*! 168 * \brief Implement tuning objects for a backward binary kernel operator 169 */ 170 #define _IMPLEMENT_BINARY_WORKLOAD_BWD(__op$, __typ$) \ 171 IMPLEMENT_WORKLOAD_VALUE_FOR_TYPE(::mxnet::op::mxnet_op::backward_grad_tuned<__op$>, __typ$); \ 172 namespace mxnet_op { \ 173 template<> \ 174 bool ::mxnet::op::mxnet_op::tuned_op< \ 175 ::mxnet::op::mxnet_op::backward_grad_tuned<__op$>, __typ$>:: \ 176 UseOMP(size_t N, size_t omp_threads) { \ 177 return ::mxnet::op::BinaryOpTune<__typ$>::UseOMP<mxnet_op::tuned_op< \ 178 ::mxnet::op::mxnet_op::backward_grad_tuned<__op$>, __typ$>>(N, omp_threads); \ 179 }} /* namespace mxnet_op */ \ 180 template<> bool static_init_var<::mxnet::op::mxnet_op::backward_grad_tuned<__op$>, \ 181 __typ$>::init_ = \ 182 ::mxnet::op::OperatorTune<__typ$>::ScheduleTune<__op$>( \ 183 ::mxnet::op::BinaryOpTune<__typ$>::TuneBinaryBackwardOperator<__op$>) 184 185 /*! 186 * \brief Implement tuning objects for a custom forward kernel operator 187 */ 188 #define _IMPLEMENT_CUSTOM_WORKLOAD_FWD(__op$, __typ$) \ 189 IMPLEMENT_WORKLOAD_VALUE_FOR_TYPE(__op$<__typ$>, __typ$); \ 190 template<> bool static_init_var<__op$<__typ$>, __typ$>::init_ = \ 191 ::mxnet::op::OperatorTune<__typ$>::ScheduleTune<__op$<__typ$>>(\ 192 __op$<__typ$>::Tune) 193 194 /*! 195 * \brief Macros for manually adding new blank, unary and binary operators to the tuning set 196 */ 197 #define IMPLEMENT_UNARY_WORKLOAD_FWD(__op$) \ 198 MSHADOW_MACRO_FOREACH_TYPE(_IMPLEMENT_UNARY_WORKLOAD_FWD, __op$) 199 200 #define IMPLEMENT_UNARY_WORKLOAD_FWD_WITH_BOOL(__op$) \ 201 MSHADOW_MACRO_FOREACH_TYPE_WITH_BOOL(_IMPLEMENT_UNARY_WORKLOAD_FWD, __op$) 202 203 #define IMPLEMENT_BLANK_WORKLOAD_FWD(__op$) \ 204 MSHADOW_MACRO_FOREACH_TYPE(_IMPLEMENT_BLANK_WORKLOAD_FWD, __op$) 205 206 #define IMPLEMENT_BLANK_WORKLOAD_FWD_WITH_BOOL(__op$) \ 207 MSHADOW_MACRO_FOREACH_TYPE_WITH_BOOL(_IMPLEMENT_BLANK_WORKLOAD_FWD, __op$) 208 209 #define IMPLEMENT_UNARY_WORKLOAD_BWD(__op$) \ 210 MSHADOW_MACRO_FOREACH_TYPE(_IMPLEMENT_UNARY_WORKLOAD_BWD, __op$) 211 212 #define IMPLEMENT_BINARY_WORKLOAD_FWD(__op$) \ 213 MSHADOW_MACRO_FOREACH_TYPE(_IMPLEMENT_BINARY_WORKLOAD_FWD, __op$) 214 215 #define IMPLEMENT_BINARY_WORKLOAD_FWD_WITH_BOOL(__op$) \ 216 MSHADOW_MACRO_FOREACH_TYPE_WITH_BOOL(_IMPLEMENT_BINARY_WORKLOAD_FWD, __op$) 217 218 #define IMPLEMENT_BINARY_WORKLOAD_BWD(__op$) \ 219 MSHADOW_MACRO_FOREACH_TYPE(_IMPLEMENT_BINARY_WORKLOAD_BWD, __op$) 220 221 #define IMPLEMENT_CUSTOM_WORKLOAD_FWD(__op$) \ 222 MSHADOW_MACRO_FOREACH_TYPE(_IMPLEMENT_CUSTOM_WORKLOAD_FWD, __op$) 223 224 /*! 225 * \brief Tuning data and default weights in the case that MXNET_ENABLE_OPERATOR_AUTOTUNE is set 226 * to zero (thus turning off auto-tuning) 227 * \note This code can be automatically generated 228 * by setting the environment variable MXNET_OUTPUT_TUNING_DATA to a positive 229 * integer value 230 */ 231 OperatorTuneBase::duration_t OperatorTuneBase::omp_overhead_ns_ = 5000; 232 IMPLEMENT_UNARY_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mshadow_op::identity); // NOLINT() 233 IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::identity_grad); // NOLINT() 234 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::negation); // NOLINT() 235 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::reciprocal); // NOLINT() 236 IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::reciprocal_grad); // NOLINT() 237 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::sigmoid); // NOLINT() 238 IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::sigmoid_grad); // NOLINT() 239 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::softsign); // NOLINT() 240 IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::softsign_grad); // NOLINT() 241 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::relu); // NOLINT() 242 IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::relu_grad); // NOLINT() 243 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::selu); // NOLINT() 244 IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::selu_grad); // NOLINT() 245 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::gelu); // NOLINT() 246 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::tanh); // NOLINT() 247 IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::tanh_grad); // NOLINT() 248 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::softrelu); // NOLINT() 249 IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::softrelu_grad); // NOLINT() 250 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::exp); // NOLINT() 251 IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::exp); // NOLINT() 252 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::expm1); // NOLINT() 253 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::log); // NOLINT() 254 IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::log_grad); // NOLINT() 255 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::log1p); // NOLINT() 256 IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::log1p_grad); // NOLINT() 257 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::log2); // NOLINT() 258 IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::log2_grad); // NOLINT() 259 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::log10); // NOLINT() 260 IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::log10_grad); // NOLINT() 261 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::erf); // NOLINT() 262 IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::erf_grad); // NOLINT() 263 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::erfinv); // NOLINT() 264 IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::erfinv_grad); // NOLINT() 265 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::sin); // NOLINT() 266 IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::sin_grad); // NOLINT() 267 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::sinh); // NOLINT() 268 IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::sinh_grad); // NOLINT() 269 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::arcsin); // NOLINT() 270 IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::arcsin_grad); // NOLINT() 271 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::arcsinh); // NOLINT() 272 IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::arcsinh_grad); // NOLINT() 273 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::cos); // NOLINT() 274 IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::cos_grad); // NOLINT() 275 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::cosh); // NOLINT() 276 IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::cosh_grad); // NOLINT() 277 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::arccos); // NOLINT() 278 IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::arccos_grad); // NOLINT() 279 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::arccosh); // NOLINT() 280 IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::arccosh_grad); // NOLINT() 281 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::tan); // NOLINT() 282 IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::tan_grad); // NOLINT() 283 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::arctan); // NOLINT() 284 IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::arctan_grad); // NOLINT() 285 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::arctanh); // NOLINT() 286 IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::arctanh_grad); // NOLINT() 287 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::square); // NOLINT() 288 IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::square_grad); // NOLINT() 289 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::square_root); // NOLINT() 290 IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::square_root_grad); // NOLINT() 291 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::reciprocal_square_root); // NOLINT() 292 IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::reciprocal_square_root_grad); // NOLINT() 293 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::cube_root); // NOLINT() 294 IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::cube_root_grad); // NOLINT() 295 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::reciprocal_cube_root); // NOLINT() 296 IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::reciprocal_cube_root_grad); // NOLINT() 297 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::abs); // NOLINT() 298 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::sign); // NOLINT() 299 IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::sign); // NOLINT() 300 IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::sign_grad); // NOLINT() 301 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::round); // NOLINT() 302 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::floor); // NOLINT() 303 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::trunc); // NOLINT() 304 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::rint); // NOLINT() 305 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::fix); // NOLINT() 306 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::gamma); // NOLINT() 307 IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::gamma_grad); // NOLINT() 308 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::gammaln); // NOLINT() 309 IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::gammaln_grad); // NOLINT() 310 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::ceil); // NOLINT() 311 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::degrees); // NOLINT() 312 IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::degrees_grad); // NOLINT() 313 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::radians); // NOLINT() 314 IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::radians_grad); // NOLINT() 315 IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::nt); // NOLINT() 316 IMPLEMENT_UNARY_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mshadow_op::np_logical_not); // NOLINT() 317 IMPLEMENT_UNARY_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mshadow_op::bitwise_not); // NOLINT() 318 IMPLEMENT_UNARY_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mshadow_op::isnan); // NOLINT() 319 IMPLEMENT_UNARY_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mshadow_op::isinf); // NOLINT() 320 IMPLEMENT_UNARY_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mshadow_op::isposinf); // NOLINT() 321 IMPLEMENT_UNARY_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mshadow_op::isneginf); // NOLINT() 322 IMPLEMENT_UNARY_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mshadow_op::isfinite); // NOLINT() 323 IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::nt); // NOLINT() 324 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::clip); // NOLINT() 325 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::clip); // NOLINT() 326 IMPLEMENT_BINARY_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mshadow_op::plus); // NOLINT() 327 IMPLEMENT_BINARY_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mshadow_op::minus); // NOLINT() 328 IMPLEMENT_BINARY_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mshadow_op::mul); // NOLINT() 329 IMPLEMENT_BINARY_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mshadow_op::div); // NOLINT() 330 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::true_divide); // NOLINT() 331 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::minus_sign); // NOLINT() 332 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::rminus); // NOLINT() 333 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::rdiv); // NOLINT() 334 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::plus); // NOLINT() 335 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::minus); // NOLINT() 336 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::mul); // NOLINT() 337 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::div); // NOLINT() 338 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::minus_sign); // NOLINT() 339 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::rminus); // NOLINT() 340 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::rdiv); // NOLINT() 341 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::rtrue_divide); // NOLINT() 342 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::div_grad); // NOLINT() 343 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::div_grad); // NOLINT() 344 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::div_rgrad); // NOLINT() 345 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::div_rgrad); // NOLINT() 346 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::rdiv_grad); // NOLINT() 347 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::mod); // NOLINT() 348 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::mod_grad); // NOLINT() 349 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::mod_rgrad); // NOLINT() 350 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::rmod); // NOLINT() 351 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::rmod_grad); // NOLINT() 352 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::left); // NOLINT() 353 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::left); // NOLINT() 354 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::right); // NOLINT() 355 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::right); // NOLINT() 356 IMPLEMENT_BINARY_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mshadow_op::power); // NOLINT() 357 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::rpower); // NOLINT() 358 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::xelu); // NOLINT() 359 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::elu); // NOLINT() 360 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::power_grad); // NOLINT() 361 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::rpower_grad); // NOLINT() 362 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::power_rgrad); // NOLINT() 363 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::copysign); // NOLINT() 364 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::rcopysign); // NOLINT() 365 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::copysign_grad); // NOLINT() 366 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::copysign_rgrad); // NOLINT() 367 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::rcopysign_grad); // NOLINT() 368 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::arctan2); // NOLINT() 369 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::rarctan2); // NOLINT() 370 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::arctan2_grad); // NOLINT() 371 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::rarctan2_grad); // NOLINT() 372 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::arctan2_rgrad); // NOLINT() 373 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::xelu_grad); // NOLINT() 374 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::gelu_grad); // NOLINT() 375 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::prelu_grad); // NOLINT() 376 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::elu_grad); // NOLINT() 377 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::maximum); // NOLINT() 378 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::minimum); // NOLINT() 379 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::hypot); // NOLINT() 380 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::hypot_grad_left); // NOLINT() 381 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::hypot_grad_left); // NOLINT() 382 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::hypot_grad_right); // NOLINT() 383 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::hypot_grad_right); // NOLINT() 384 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::lt); // NOLINT() 385 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::lt); // NOLINT() 386 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::le); // NOLINT() 387 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::le); // NOLINT() 388 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::gt); // NOLINT() 389 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::gt); // NOLINT() 390 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::ge); // NOLINT() 391 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::ge); // NOLINT() 392 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::ne); // NOLINT() 393 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::ne); // NOLINT() 394 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::eq); // NOLINT() 395 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::eq); // NOLINT() 396 IMPLEMENT_BINARY_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mshadow_op::np_equal); // NOLINT() 397 IMPLEMENT_BINARY_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mshadow_op::np_not_equal); // NOLINT() 398 IMPLEMENT_BINARY_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mshadow_op::np_greater); // NOLINT() 399 IMPLEMENT_BINARY_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mshadow_op::np_greater_equal); // NOLINT() 400 IMPLEMENT_BINARY_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mshadow_op::np_less); // NOLINT() 401 IMPLEMENT_BINARY_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mshadow_op::np_less_equal); // NOLINT() 402 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::logical_and); // NOLINT() 403 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::logical_and); // NOLINT() 404 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::logical_or); // NOLINT() 405 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::logical_or); // NOLINT() 406 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::logical_xor); // NOLINT() 407 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::logical_xor); // NOLINT() 408 IMPLEMENT_BINARY_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mshadow_op::bitwise_and); // NOLINT() 409 IMPLEMENT_BINARY_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mshadow_op::bitwise_xor); // NOLINT() 410 IMPLEMENT_BINARY_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mshadow_op::bitwise_or); // NOLINT() 411 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::smooth_l1_loss); // NOLINT() 412 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::smooth_l1_gradient); // NOLINT() 413 IMPLEMENT_BINARY_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mshadow_op::lcm); // NOLINT() 414 IMPLEMENT_BLANK_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mxnet_op::set_to_int<0>); // NOLINT() 415 IMPLEMENT_BLANK_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mxnet_op::set_to_int<1>); // NOLINT() 416 IMPLEMENT_BLANK_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mxnet_op::set_to_bool<false>); // NOLINT() 417 IMPLEMENT_BLANK_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mxnet_op::set_to_bool<true>); // NOLINT() 418 IMPLEMENT_BLANK_WORKLOAD_FWD(mxnet::op::PopulateFullIdxRspKernel); // NOLINT() 419 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::ldexp); // NOLINT() 420 IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::rldexp); // NOLINT() 421 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::ldexp_grad); // NOLINT() 422 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::ldexp_rgrad); // NOLINT() 423 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::rldexp_grad); // NOLINT() 424 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::posone); // NOLINT() 425 IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::negone); // NOLINT() 426 /*! 427 * \brief Tuner objects, *not* automatically generated 428 */ 429 #ifdef MXNET_USE_OPERATOR_TUNING 430 static BinaryOpTune<float> binaryOpTuneFloat; 431 static BinaryOpTune<double> binaryOpTuneDouble; 432 static BinaryOpTune<mshadow::half::half_t> binaryOpTuneHalf; 433 static BinaryOpTune<mshadow::bfloat::bf16_t> binaryOpTuneBf16; 434 static BinaryOpTune<int8_t> binaryOpTuneInt8; 435 static BinaryOpTune<uint8_t> binaryOpTuneUInt8; 436 static BinaryOpTune<int32_t> binaryOpTuneInt32; 437 static BinaryOpTune<int64_t> binaryOpTuneInt64; 438 #endif // MXNET_USE_OPERATOR_TUNING 439 } // namespace op 440 } // namespace mxnet 441