1 /*
2     Copyright (c) 2020-2021 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 #include "oneapi/tbb/cache_aligned_allocator.h"
18 #include "oneapi/tbb/detail/_small_object_pool.h"
19 #include "oneapi/tbb/detail/_task.h"
20 #include "governor.h"
21 #include "thread_data.h"
22 #include "task_dispatcher.h"
23 
24 #include <cstddef>
25 
26 namespace tbb {
27 namespace detail {
28 namespace r1 {
29 
30 small_object_pool_impl::small_object* const small_object_pool_impl::dead_public_list =
31                 reinterpret_cast<small_object_pool_impl::small_object*>(1);
32 
allocate(d1::small_object_pool * & allocator,std::size_t number_of_bytes,const d1::execution_data & ed)33 void* __TBB_EXPORTED_FUNC allocate(d1::small_object_pool*& allocator, std::size_t number_of_bytes, const d1::execution_data& ed) {
34     auto& tls = static_cast<const execution_data_ext&>(ed).task_disp->get_thread_data();
35     auto pool = tls.my_small_object_pool;
36     return pool->allocate_impl(allocator, number_of_bytes);
37 }
38 
allocate(d1::small_object_pool * & allocator,std::size_t number_of_bytes)39 void* __TBB_EXPORTED_FUNC allocate(d1::small_object_pool*& allocator, std::size_t number_of_bytes) {
40     // TODO: optimize if the allocator contains a valid pool.
41     auto tls = governor::get_thread_data();
42     auto pool = tls->my_small_object_pool;
43     return pool->allocate_impl(allocator, number_of_bytes);
44 }
45 
allocate_impl(d1::small_object_pool * & allocator,std::size_t number_of_bytes)46 void* small_object_pool_impl::allocate_impl(d1::small_object_pool*& allocator, std::size_t number_of_bytes)
47 {
48     small_object* obj{nullptr};
49 
50     if (number_of_bytes <= small_object_size) {
51         if (m_private_list) {
52             obj = m_private_list;
53             m_private_list = m_private_list->next;
54         } else if (m_public_list.load(std::memory_order_relaxed)) {
55             // No fence required for read of my_public_list above, because std::atomic::exchange() has a fence.
56             obj = m_public_list.exchange(nullptr);
57             __TBB_ASSERT( obj, "another thread emptied the my_public_list" );
58             m_private_list = obj->next;
59         } else {
60             obj = new (cache_aligned_allocate(small_object_size)) small_object{nullptr};
61             ++m_private_counter;
62         }
63     } else {
64         obj = new (cache_aligned_allocate(number_of_bytes)) small_object{nullptr};
65     }
66     allocator = this;
67 
68     // Return uninitialized memory for further construction on user side.
69     obj->~small_object();
70     return obj;
71 }
72 
deallocate(d1::small_object_pool & allocator,void * ptr,std::size_t number_of_bytes)73 void __TBB_EXPORTED_FUNC deallocate(d1::small_object_pool& allocator, void* ptr, std::size_t number_of_bytes) {
74     auto pool = static_cast<small_object_pool_impl*>(&allocator);
75     auto tls = governor::get_thread_data();
76     pool->deallocate_impl(ptr, number_of_bytes, *tls);
77 }
78 
deallocate(d1::small_object_pool & allocator,void * ptr,std::size_t number_of_bytes,const d1::execution_data & ed)79 void __TBB_EXPORTED_FUNC deallocate(d1::small_object_pool& allocator, void* ptr, std::size_t number_of_bytes, const d1::execution_data& ed) {
80     auto& tls = static_cast<const execution_data_ext&>(ed).task_disp->get_thread_data();
81     auto pool = static_cast<small_object_pool_impl*>(&allocator);
82     pool->deallocate_impl(ptr, number_of_bytes, tls);
83 }
84 
deallocate_impl(void * ptr,std::size_t number_of_bytes,thread_data & td)85 void small_object_pool_impl::deallocate_impl(void* ptr, std::size_t number_of_bytes, thread_data& td) {
86     __TBB_ASSERT(ptr != nullptr, "pointer to deallocate should not be null");
87     __TBB_ASSERT(number_of_bytes >= sizeof(small_object), "number of bytes should be at least sizeof(small_object)");
88 
89     if (number_of_bytes <= small_object_size) {
90         auto obj = new (ptr) small_object{nullptr};
91         if (td.my_small_object_pool == this) {
92             obj->next = m_private_list;
93             m_private_list = obj;
94         } else {
95             auto old_public_list = m_public_list.load(std::memory_order_relaxed);
96 
97             for (;;) {
98                 if (old_public_list == dead_public_list) {
99                     obj->~small_object();
100                     cache_aligned_deallocate(obj);
101                     if (++m_public_counter == 0)
102                     {
103                         this->~small_object_pool_impl();
104                         cache_aligned_deallocate(this);
105                     }
106                     break;
107                 }
108                 obj->next = old_public_list;
109                 if (m_public_list.compare_exchange_strong(old_public_list, obj)) {
110                     break;
111                 }
112             }
113         }
114     } else {
115         cache_aligned_deallocate(ptr);
116     }
117 }
118 
cleanup_list(small_object * list)119 std::int64_t small_object_pool_impl::cleanup_list(small_object* list)
120 {
121     std::int64_t removed_count{};
122 
123     while (list) {
124         small_object* current = list;
125         list = list->next;
126         current->~small_object();
127         cache_aligned_deallocate(current);
128         ++removed_count;
129     }
130     return removed_count;
131 }
132 
destroy()133 void small_object_pool_impl::destroy()
134 {
135     // clean up private list and subtract the removed count from private counter
136     m_private_counter -= cleanup_list(m_private_list);
137     // Grab public list and place dead mark
138     small_object* public_list = m_public_list.exchange(dead_public_list);
139     // clean up public list and subtract from private (intentionally) counter
140     m_private_counter -= cleanup_list(public_list);
141     __TBB_ASSERT(m_private_counter >= 0, "Private counter may not be less than 0");
142     // Equivalent to fetch_sub(m_private_counter) - m_private_counter. But we need to do it
143     // atomically with operator-= not to access m_private_counter after the subtraction.
144     auto new_value = m_public_counter -= m_private_counter;
145     // check if this method is responsible to clean up the resources
146     if (new_value == 0) {
147         this->~small_object_pool_impl();
148         cache_aligned_deallocate(this);
149     }
150 }
151 
152 } // namespace r1
153 } // namespace detail
154 } // namespace tbb
155