1 /*******************************************************************************
2 * Copyright 2020-2021 Arm Ltd. and affiliates
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *******************************************************************************/
16 
17 #ifndef CPU_AARCH64_ACL_GEMM_CONVOLUTION_HPP
18 #define CPU_AARCH64_ACL_GEMM_CONVOLUTION_HPP
19 
20 #include "cpu/cpu_convolution_pd.hpp"
21 
22 #include "cpu/aarch64/acl_convolution_utils.hpp"
23 
24 namespace dnnl {
25 namespace impl {
26 namespace cpu {
27 namespace aarch64 {
28 
29 struct acl_resource_t : public resource_t {
acl_resource_tdnnl::impl::cpu::aarch64::acl_resource_t30     acl_resource_t()
31         : acl_obj_(utils::make_unique<
32                 acl_obj_t<arm_compute::NEGEMMConvolutionLayer>>()) {}
33 
configurednnl::impl::cpu::aarch64::acl_resource_t34     status_t configure(const acl_conv_conf_t &acp) {
35         if (!acl_obj_) return status::out_of_memory;
36 
37         // Init Compute Library tensors based on info from descriptor
38         acl_obj_->src_tensor.allocator()->init(acp.src_info);
39         acl_obj_->wei_tensor.allocator()->init(acp.wei_info);
40         acl_obj_->dst_tensor.allocator()->init(acp.dst_info);
41         acl_obj_->bia_tensor.allocator()->init(acp.bia_info);
42         if (acp.sum_with_eltwise) {
43             acl_obj_->dst_acc_tensor.allocator()->init(acp.dst_info);
44         }
45         // clang-format off
46         acl_obj_->conv.configure(
47             &acl_obj_->src_tensor,
48             &acl_obj_->wei_tensor,
49             acp.with_bias ? &acl_obj_->bia_tensor : nullptr,
50             acp.sum_with_eltwise ? &acl_obj_->dst_acc_tensor : &acl_obj_->dst_tensor,
51             acp.padstride_info,
52             acp.weights_info,
53             acp.dilation_info,
54             acp.sum_with_eltwise ? arm_compute::ActivationLayerInfo() : acp.act_info,
55             acp.fast_math);
56         // clang-format on
57         if (acp.sum_with_eltwise) {
58             acl_obj_->add.configure(&acl_obj_->dst_tensor,
59                     &acl_obj_->dst_acc_tensor, &acl_obj_->dst_acc_tensor,
60                     arm_compute::ConvertPolicy::SATURATE);
61             acl_obj_->act.configure(&acl_obj_->dst_acc_tensor,
62                     &acl_obj_->dst_tensor, acp.act_info);
63             acl_obj_->dst_acc_tensor.allocator()->allocate();
64         }
65 
66         return status::success;
67     }
68 
get_acl_objdnnl::impl::cpu::aarch64::acl_resource_t69     acl_obj_t<arm_compute::NEGEMMConvolutionLayer> &get_acl_obj() const {
70         return *acl_obj_;
71     }
72 
73     DNNL_DISALLOW_COPY_AND_ASSIGN(acl_resource_t);
74 
75 private:
76     std::unique_ptr<acl_obj_t<arm_compute::NEGEMMConvolutionLayer>> acl_obj_;
77 
78 }; // acl_resource_t
79 
80 template <data_type_t src_type, data_type_t wei_type = src_type,
81         data_type_t dst_type = src_type, data_type_t bia_type = dst_type>
82 struct acl_gemm_convolution_fwd_t : public primitive_t {
83     struct pd_t : public cpu_convolution_fwd_pd_t {
pd_tdnnl::impl::cpu::aarch64::acl_gemm_convolution_fwd_t::pd_t84         pd_t(const convolution_desc_t *adesc, const primitive_attr_t *attr,
85                 const typename pd_t::base_class *hint_fwd_pd)
86             : cpu_convolution_fwd_pd_t(adesc, attr, hint_fwd_pd), acp_() {}
87 
88         DECLARE_COMMON_PD_T(
89                 "gemm:acl", acl_gemm_convolution_fwd_t, USE_GLOBAL_SCRATCHPAD);
90 
initdnnl::impl::cpu::aarch64::acl_gemm_convolution_fwd_t::pd_t91         status_t init(engine_t *engine) {
92             using namespace data_type;
93             using smask_t = primitive_attr_t::skip_mask_t;
94 
95             bool ok = is_fwd()
96                     && set_default_alg_kind(alg_kind::convolution_direct)
97                     && expect_data_types(
98                             src_type, wei_type, bia_type, dst_type, undef)
99                     && !has_zero_dim_memory()
100                     && attr()->has_default_values(smask_t::oscale
101                                     | smask_t::zero_points | smask_t::post_ops,
102                             dst_type)
103                     && output_scales_mask_ok() && zero_points_ok()
104                     && post_ops_ok();
105             if (!ok) return status::unimplemented;
106 
107             auto conf_status = acl_convolution_utils::init_conf_gemm(acp_,
108                     src_md_, weights_md_, dst_md_, bias_md_, *desc(), *attr());
109             if (conf_status != status::success) return status::unimplemented;
110 
111             acl_common_utils::acl_thread_bind();
112 
113             return status::success;
114         }
115 
116         acl_conv_conf_t acp_;
117 
118     protected:
output_scales_mask_okdnnl::impl::cpu::aarch64::acl_gemm_convolution_fwd_t::pd_t119         bool output_scales_mask_ok() const {
120             using namespace data_type;
121             const auto &mask = attr()->output_scales_.mask_;
122             return IMPLICATION(!utils::one_of(src_type, s8, u8),
123                            attr()->output_scales_.has_default_values())
124                     // TODO: add support for per_channel quantization
125                     && mask == 0;
126         }
127 
zero_points_okdnnl::impl::cpu::aarch64::acl_gemm_convolution_fwd_t::pd_t128         bool zero_points_ok() const {
129             using namespace data_type;
130             // TODO: add support for asymmetric quantization
131             return attr()->zero_points_.has_default_values();
132         }
133 
post_ops_okdnnl::impl::cpu::aarch64::acl_gemm_convolution_fwd_t::pd_t134         bool post_ops_ok() const {
135             auto const &po = attr()->post_ops_;
136             // "true" here stands for eltwise.scale == 1.f check
137             auto is_eltwise
138                     = [&](int idx) { return po.entry_[idx].is_eltwise(true); };
139             auto is_sum = [&](int idx) { return po.entry_[idx].is_sum(); };
140 
141             bool sum_with_eltwise
142                     = (po.len() == 2) && is_sum(0) && is_eltwise(1);
143             bool eltwise_only = (po.len() == 1) ? is_eltwise(0) : false;
144             bool eltwise_ok = false;
145             // Compute Library supports either one eltwise post-op or
146             // sum+eltwise post-ops
147             if (eltwise_only || sum_with_eltwise) {
148                 const auto act_type = po.entry_[sum_with_eltwise].eltwise.alg;
149                 eltwise_ok = acl_common_utils::acl_act_ok(act_type);
150             }
151 
152             return eltwise_ok || (po.len() == 0);
153         }
154     };
155 
acl_gemm_convolution_fwd_tdnnl::impl::cpu::aarch64::acl_gemm_convolution_fwd_t156     acl_gemm_convolution_fwd_t(const pd_t *apd) : primitive_t(apd) {}
157 
create_resourcednnl::impl::cpu::aarch64::acl_gemm_convolution_fwd_t158     status_t create_resource(
159             engine_t *engine, resource_mapper_t &mapper) const override {
160         if (mapper.has_resource(this)) return status::success;
161 
162         auto r = utils::make_unique<acl_resource_t>();
163         if (!r) return status::out_of_memory;
164 
165         // Configure the resource based on information from primitive descriptor
166         auto st = r->configure(pd()->acp_);
167         if (st == status::success) { mapper.add(this, std::move(r)); }
168 
169         return st;
170     }
171 
172     typedef typename prec_traits<src_type>::type src_data_t;
173     typedef typename prec_traits<wei_type>::type wei_data_t;
174     typedef typename prec_traits<dst_type>::type dst_data_t;
175     typedef typename prec_traits<bia_type>::type bia_data_t;
176 
executednnl::impl::cpu::aarch64::acl_gemm_convolution_fwd_t177     status_t execute(const exec_ctx_t &ctx) const override {
178         return execute_forward(ctx);
179     }
180 
181 private:
182     // To guard the const execute_forward(), the mutex must be 'mutable'
183     mutable std::mutex mtx;
184     status_t execute_forward(const exec_ctx_t &ctx) const;
pddnnl::impl::cpu::aarch64::acl_gemm_convolution_fwd_t185     const pd_t *pd() const { return (const pd_t *)primitive_t::pd().get(); }
186 
187 }; // acl_gemm_convolution_fwd_t
188 
189 } // namespace aarch64
190 } // namespace cpu
191 } // namespace impl
192 } // namespace dnnl
193 
194 #endif
195