1 /*******************************************************************************
2 * Copyright 2021 Arm Ltd. and affiliates
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *******************************************************************************/
16 
17 #include "cpu/aarch64/acl_inner_product.hpp"
18 
19 namespace dnnl {
20 namespace impl {
21 namespace cpu {
22 namespace aarch64 {
23 
24 using namespace dnnl::impl::status;
25 using namespace dnnl::impl::memory_tracking::names;
26 using namespace dnnl::impl::utils;
27 
execute_forward(const exec_ctx_t & ctx) const28 status_t acl_inner_product_fwd_t::execute_forward(const exec_ctx_t &ctx) const {
29     // Lock here is needed because resource_mapper does not support
30     // concurrent multithreaded access.
31     std::lock_guard<std::mutex> _lock {this->mtx};
32 
33     status_t status = status::success;
34     auto src_base = CTX_IN_MEM(const data_t *, DNNL_ARG_SRC);
35     auto wei_base = CTX_IN_MEM(const data_t *, DNNL_ARG_WEIGHTS);
36     auto bia_base = CTX_IN_MEM(const data_t *, DNNL_ARG_BIAS);
37     auto dst_base = CTX_OUT_MEM(data_t *, DNNL_ARG_DST);
38 
39     bool with_bias = pd()->aip_.with_bias;
40     bool with_sum = pd()->aip_.with_sum;
41 
42     // Retrieve primitive resource and configured Compute Library objects
43     auto *acl_resource
44             = ctx.get_resource_mapper()->get<acl_ip_resource_t>(this);
45     acl_ip_obj_t &acl_obj = acl_resource->get_acl_obj();
46 
47     // import_memory() and free() methods do not allocate/free any additional
48     // memory, only acquire/release pointers.
49     acl_obj.src_tensor.allocator()->import_memory(
50             const_cast<data_t *>(src_base));
51     acl_obj.wei_tensor.allocator()->import_memory(
52             const_cast<data_t *>(wei_base));
53     acl_obj.dst_tensor.allocator()->import_memory(dst_base);
54     if (with_bias) {
55         acl_obj.bia_tensor.allocator()->import_memory(
56                 const_cast<data_t *>(bia_base));
57     }
58 
59     acl_obj.fc.run();
60     if (with_sum) { acl_obj.add.run(); }
61 
62     acl_obj.src_tensor.allocator()->free();
63     acl_obj.wei_tensor.allocator()->free();
64     acl_obj.dst_tensor.allocator()->free();
65     if (with_bias) { acl_obj.bia_tensor.allocator()->free(); }
66 
67     return status;
68 }
69 
70 } // namespace aarch64
71 } // namespace cpu
72 } // namespace impl
73 } // namespace dnnl
74