1 /******************************************************************************* 2 * Copyright 2020-2021 Arm Ltd. and affiliates 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 *******************************************************************************/ 16 17 #ifndef CPU_AARCH64_ACL_GEMM_CONVOLUTION_HPP 18 #define CPU_AARCH64_ACL_GEMM_CONVOLUTION_HPP 19 20 #include "cpu/cpu_convolution_pd.hpp" 21 22 #include "cpu/aarch64/acl_convolution_utils.hpp" 23 24 namespace dnnl { 25 namespace impl { 26 namespace cpu { 27 namespace aarch64 { 28 29 struct acl_resource_t : public resource_t { acl_resource_tdnnl::impl::cpu::aarch64::acl_resource_t30 acl_resource_t() 31 : acl_obj_(utils::make_unique< 32 acl_obj_t<arm_compute::NEGEMMConvolutionLayer>>()) {} 33 configurednnl::impl::cpu::aarch64::acl_resource_t34 status_t configure(const acl_conv_conf_t &acp) { 35 if (!acl_obj_) return status::out_of_memory; 36 37 // Init Compute Library tensors based on info from descriptor 38 acl_obj_->src_tensor.allocator()->init(acp.src_info); 39 acl_obj_->wei_tensor.allocator()->init(acp.wei_info); 40 acl_obj_->dst_tensor.allocator()->init(acp.dst_info); 41 acl_obj_->bia_tensor.allocator()->init(acp.bia_info); 42 if (acp.sum_with_eltwise) { 43 acl_obj_->dst_acc_tensor.allocator()->init(acp.dst_info); 44 } 45 // clang-format off 46 acl_obj_->conv.configure( 47 &acl_obj_->src_tensor, 48 &acl_obj_->wei_tensor, 49 acp.with_bias ? &acl_obj_->bia_tensor : nullptr, 50 acp.sum_with_eltwise ? &acl_obj_->dst_acc_tensor : &acl_obj_->dst_tensor, 51 acp.padstride_info, 52 acp.weights_info, 53 acp.dilation_info, 54 acp.sum_with_eltwise ? arm_compute::ActivationLayerInfo() : acp.act_info, 55 acp.fast_math); 56 // clang-format on 57 if (acp.sum_with_eltwise) { 58 acl_obj_->add.configure(&acl_obj_->dst_tensor, 59 &acl_obj_->dst_acc_tensor, &acl_obj_->dst_acc_tensor, 60 arm_compute::ConvertPolicy::SATURATE); 61 acl_obj_->act.configure(&acl_obj_->dst_acc_tensor, 62 &acl_obj_->dst_tensor, acp.act_info); 63 acl_obj_->dst_acc_tensor.allocator()->allocate(); 64 } 65 66 return status::success; 67 } 68 get_acl_objdnnl::impl::cpu::aarch64::acl_resource_t69 acl_obj_t<arm_compute::NEGEMMConvolutionLayer> &get_acl_obj() const { 70 return *acl_obj_; 71 } 72 73 DNNL_DISALLOW_COPY_AND_ASSIGN(acl_resource_t); 74 75 private: 76 std::unique_ptr<acl_obj_t<arm_compute::NEGEMMConvolutionLayer>> acl_obj_; 77 78 }; // acl_resource_t 79 80 template <data_type_t src_type, data_type_t wei_type = src_type, 81 data_type_t dst_type = src_type, data_type_t bia_type = dst_type> 82 struct acl_gemm_convolution_fwd_t : public primitive_t { 83 struct pd_t : public cpu_convolution_fwd_pd_t { pd_tdnnl::impl::cpu::aarch64::acl_gemm_convolution_fwd_t::pd_t84 pd_t(const convolution_desc_t *adesc, const primitive_attr_t *attr, 85 const typename pd_t::base_class *hint_fwd_pd) 86 : cpu_convolution_fwd_pd_t(adesc, attr, hint_fwd_pd), acp_() {} 87 88 DECLARE_COMMON_PD_T( 89 "gemm:acl", acl_gemm_convolution_fwd_t, USE_GLOBAL_SCRATCHPAD); 90 initdnnl::impl::cpu::aarch64::acl_gemm_convolution_fwd_t::pd_t91 status_t init(engine_t *engine) { 92 using namespace data_type; 93 using smask_t = primitive_attr_t::skip_mask_t; 94 95 bool ok = is_fwd() 96 && set_default_alg_kind(alg_kind::convolution_direct) 97 && expect_data_types( 98 src_type, wei_type, bia_type, dst_type, undef) 99 && !has_zero_dim_memory() 100 && attr()->has_default_values(smask_t::oscale 101 | smask_t::zero_points | smask_t::post_ops, 102 dst_type) 103 && output_scales_mask_ok() && zero_points_ok() 104 && post_ops_ok(); 105 if (!ok) return status::unimplemented; 106 107 auto conf_status = acl_convolution_utils::init_conf_gemm(acp_, 108 src_md_, weights_md_, dst_md_, bias_md_, *desc(), *attr()); 109 if (conf_status != status::success) return status::unimplemented; 110 111 acl_common_utils::acl_thread_bind(); 112 113 return status::success; 114 } 115 116 acl_conv_conf_t acp_; 117 118 protected: output_scales_mask_okdnnl::impl::cpu::aarch64::acl_gemm_convolution_fwd_t::pd_t119 bool output_scales_mask_ok() const { 120 using namespace data_type; 121 const auto &mask = attr()->output_scales_.mask_; 122 return IMPLICATION(!utils::one_of(src_type, s8, u8), 123 attr()->output_scales_.has_default_values()) 124 // TODO: add support for per_channel quantization 125 && mask == 0; 126 } 127 zero_points_okdnnl::impl::cpu::aarch64::acl_gemm_convolution_fwd_t::pd_t128 bool zero_points_ok() const { 129 using namespace data_type; 130 // TODO: add support for asymmetric quantization 131 return attr()->zero_points_.has_default_values(); 132 } 133 post_ops_okdnnl::impl::cpu::aarch64::acl_gemm_convolution_fwd_t::pd_t134 bool post_ops_ok() const { 135 auto const &po = attr()->post_ops_; 136 // "true" here stands for eltwise.scale == 1.f check 137 auto is_eltwise 138 = [&](int idx) { return po.entry_[idx].is_eltwise(true); }; 139 auto is_sum = [&](int idx) { return po.entry_[idx].is_sum(); }; 140 141 bool sum_with_eltwise 142 = (po.len() == 2) && is_sum(0) && is_eltwise(1); 143 bool eltwise_only = (po.len() == 1) ? is_eltwise(0) : false; 144 bool eltwise_ok = false; 145 // Compute Library supports either one eltwise post-op or 146 // sum+eltwise post-ops 147 if (eltwise_only || sum_with_eltwise) { 148 const auto act_type = po.entry_[sum_with_eltwise].eltwise.alg; 149 eltwise_ok = acl_common_utils::acl_act_ok(act_type); 150 } 151 152 return eltwise_ok || (po.len() == 0); 153 } 154 }; 155 acl_gemm_convolution_fwd_tdnnl::impl::cpu::aarch64::acl_gemm_convolution_fwd_t156 acl_gemm_convolution_fwd_t(const pd_t *apd) : primitive_t(apd) {} 157 create_resourcednnl::impl::cpu::aarch64::acl_gemm_convolution_fwd_t158 status_t create_resource( 159 engine_t *engine, resource_mapper_t &mapper) const override { 160 if (mapper.has_resource(this)) return status::success; 161 162 auto r = utils::make_unique<acl_resource_t>(); 163 if (!r) return status::out_of_memory; 164 165 // Configure the resource based on information from primitive descriptor 166 auto st = r->configure(pd()->acp_); 167 if (st == status::success) { mapper.add(this, std::move(r)); } 168 169 return st; 170 } 171 172 typedef typename prec_traits<src_type>::type src_data_t; 173 typedef typename prec_traits<wei_type>::type wei_data_t; 174 typedef typename prec_traits<dst_type>::type dst_data_t; 175 typedef typename prec_traits<bia_type>::type bia_data_t; 176 executednnl::impl::cpu::aarch64::acl_gemm_convolution_fwd_t177 status_t execute(const exec_ctx_t &ctx) const override { 178 return execute_forward(ctx); 179 } 180 181 private: 182 // To guard the const execute_forward(), the mutex must be 'mutable' 183 mutable std::mutex mtx; 184 status_t execute_forward(const exec_ctx_t &ctx) const; pddnnl::impl::cpu::aarch64::acl_gemm_convolution_fwd_t185 const pd_t *pd() const { return (const pd_t *)primitive_t::pd().get(); } 186 187 }; // acl_gemm_convolution_fwd_t 188 189 } // namespace aarch64 190 } // namespace cpu 191 } // namespace impl 192 } // namespace dnnl 193 194 #endif 195