1 /*******************************************************************************
2 * Copyright 2020 Intel Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *******************************************************************************/
16 
17 #include <mutex>
18 #include <unordered_map>
19 
20 #include "gpu/compute/compute_engine.hpp"
21 
22 #include "common/utils.hpp"
23 
24 namespace dnnl {
25 namespace impl {
26 namespace gpu {
27 namespace compute {
28 
29 // Cache for device_info_t objects. Reuse the already initialized
30 // device_info_t objects to save time on HW detection and nGEN binary
31 // check.
32 using device_info_cache_t = std::unordered_map<device_id_t,
33         std::shared_ptr<device_info_t>, device_id_hash_t>;
34 
device_info_cache_mutex()35 utils::rw_mutex_t &device_info_cache_mutex() {
36     static utils::rw_mutex_t m;
37     return m;
38 }
39 
device_info_cache()40 device_info_cache_t &device_info_cache() {
41     static device_info_cache_t cache;
42     return cache;
43 }
44 
45 // Returns true if found, false otherwise.
device_info_cache_get(std::shared_ptr<device_info_t> * result,engine_t * engine)46 bool device_info_cache_get(
47         std::shared_ptr<device_info_t> *result, engine_t *engine) {
48     utils::lock_read_t lock(device_info_cache_mutex());
49 
50     auto it = device_info_cache().find(engine->device_id());
51     if (it == device_info_cache().end()) return false;
52     if (result) *result = it->second;
53     return true;
54 }
55 
device_info_cache_set(engine_t * engine,const std::shared_ptr<device_info_t> & device_info)56 void device_info_cache_set(
57         engine_t *engine, const std::shared_ptr<device_info_t> &device_info) {
58     utils::lock_write_t lock(device_info_cache_mutex());
59 
60     // Clear the cache to avoid hypothetically large growth.
61     const int cache_size_threshold = 1024;
62     if (device_info_cache().size() > cache_size_threshold)
63         device_info_cache().clear();
64 
65     device_info_cache().insert({engine->device_id(), device_info});
66 }
67 
init()68 status_t compute_engine_t::init() {
69     if (device_info_cache_get(&device_info_, this)) return status::success;
70 
71     CHECK(init_device_info());
72 
73     device_info_cache_set(this, device_info_);
74 
75     return status::success;
76 }
77 
78 } // namespace compute
79 } // namespace gpu
80 } // namespace impl
81 } // namespace dnnl
82 
dnnl_impl_gpu_mayiuse_ngen_kernels(dnnl::impl::engine_t * engine)83 bool dnnl_impl_gpu_mayiuse_ngen_kernels(dnnl::impl::engine_t *engine) {
84     using namespace dnnl::impl;
85     using namespace dnnl::impl::gpu::compute;
86 
87     auto *compute_engine = utils::downcast<compute_engine_t *>(engine);
88     return compute_engine->mayiuse_ngen_kernels();
89 }
90