1 /*
2 Copyright (c) 2020-2021 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 */
16
17 #include "oneapi/tbb/cache_aligned_allocator.h"
18 #include "oneapi/tbb/detail/_small_object_pool.h"
19 #include "oneapi/tbb/detail/_task.h"
20 #include "governor.h"
21 #include "thread_data.h"
22 #include "task_dispatcher.h"
23
24 #include <cstddef>
25
26 namespace tbb {
27 namespace detail {
28 namespace r1 {
29
30 small_object_pool_impl::small_object* const small_object_pool_impl::dead_public_list =
31 reinterpret_cast<small_object_pool_impl::small_object*>(1);
32
allocate(d1::small_object_pool * & allocator,std::size_t number_of_bytes,const d1::execution_data & ed)33 void* __TBB_EXPORTED_FUNC allocate(d1::small_object_pool*& allocator, std::size_t number_of_bytes, const d1::execution_data& ed) {
34 auto& tls = static_cast<const execution_data_ext&>(ed).task_disp->get_thread_data();
35 auto pool = tls.my_small_object_pool;
36 return pool->allocate_impl(allocator, number_of_bytes);
37 }
38
allocate(d1::small_object_pool * & allocator,std::size_t number_of_bytes)39 void* __TBB_EXPORTED_FUNC allocate(d1::small_object_pool*& allocator, std::size_t number_of_bytes) {
40 // TODO: optimize if the allocator contains a valid pool.
41 auto tls = governor::get_thread_data();
42 auto pool = tls->my_small_object_pool;
43 return pool->allocate_impl(allocator, number_of_bytes);
44 }
45
allocate_impl(d1::small_object_pool * & allocator,std::size_t number_of_bytes)46 void* small_object_pool_impl::allocate_impl(d1::small_object_pool*& allocator, std::size_t number_of_bytes)
47 {
48 small_object* obj{nullptr};
49
50 if (number_of_bytes <= small_object_size) {
51 if (m_private_list) {
52 obj = m_private_list;
53 m_private_list = m_private_list->next;
54 } else if (m_public_list.load(std::memory_order_relaxed)) {
55 // No fence required for read of my_public_list above, because std::atomic::exchange() has a fence.
56 obj = m_public_list.exchange(nullptr);
57 __TBB_ASSERT( obj, "another thread emptied the my_public_list" );
58 m_private_list = obj->next;
59 } else {
60 obj = new (cache_aligned_allocate(small_object_size)) small_object{nullptr};
61 ++m_private_counter;
62 }
63 } else {
64 obj = new (cache_aligned_allocate(number_of_bytes)) small_object{nullptr};
65 }
66 allocator = this;
67
68 // Return uninitialized memory for further construction on user side.
69 obj->~small_object();
70 return obj;
71 }
72
deallocate(d1::small_object_pool & allocator,void * ptr,std::size_t number_of_bytes)73 void __TBB_EXPORTED_FUNC deallocate(d1::small_object_pool& allocator, void* ptr, std::size_t number_of_bytes) {
74 auto pool = static_cast<small_object_pool_impl*>(&allocator);
75 auto tls = governor::get_thread_data();
76 pool->deallocate_impl(ptr, number_of_bytes, *tls);
77 }
78
deallocate(d1::small_object_pool & allocator,void * ptr,std::size_t number_of_bytes,const d1::execution_data & ed)79 void __TBB_EXPORTED_FUNC deallocate(d1::small_object_pool& allocator, void* ptr, std::size_t number_of_bytes, const d1::execution_data& ed) {
80 auto& tls = static_cast<const execution_data_ext&>(ed).task_disp->get_thread_data();
81 auto pool = static_cast<small_object_pool_impl*>(&allocator);
82 pool->deallocate_impl(ptr, number_of_bytes, tls);
83 }
84
deallocate_impl(void * ptr,std::size_t number_of_bytes,thread_data & td)85 void small_object_pool_impl::deallocate_impl(void* ptr, std::size_t number_of_bytes, thread_data& td) {
86 __TBB_ASSERT(ptr != nullptr, "pointer to deallocate should not be null");
87 __TBB_ASSERT(number_of_bytes >= sizeof(small_object), "number of bytes should be at least sizeof(small_object)");
88
89 if (number_of_bytes <= small_object_size) {
90 auto obj = new (ptr) small_object{nullptr};
91 if (td.my_small_object_pool == this) {
92 obj->next = m_private_list;
93 m_private_list = obj;
94 } else {
95 auto old_public_list = m_public_list.load(std::memory_order_relaxed);
96
97 for (;;) {
98 if (old_public_list == dead_public_list) {
99 obj->~small_object();
100 cache_aligned_deallocate(obj);
101 if (++m_public_counter == 0)
102 {
103 this->~small_object_pool_impl();
104 cache_aligned_deallocate(this);
105 }
106 break;
107 }
108 obj->next = old_public_list;
109 if (m_public_list.compare_exchange_strong(old_public_list, obj)) {
110 break;
111 }
112 }
113 }
114 } else {
115 cache_aligned_deallocate(ptr);
116 }
117 }
118
cleanup_list(small_object * list)119 std::int64_t small_object_pool_impl::cleanup_list(small_object* list)
120 {
121 std::int64_t removed_count{};
122
123 while (list) {
124 small_object* current = list;
125 list = list->next;
126 current->~small_object();
127 cache_aligned_deallocate(current);
128 ++removed_count;
129 }
130 return removed_count;
131 }
132
destroy()133 void small_object_pool_impl::destroy()
134 {
135 // clean up private list and subtract the removed count from private counter
136 m_private_counter -= cleanup_list(m_private_list);
137 // Grab public list and place dead mark
138 small_object* public_list = m_public_list.exchange(dead_public_list);
139 // clean up public list and subtract from private (intentionally) counter
140 m_private_counter -= cleanup_list(public_list);
141 __TBB_ASSERT(m_private_counter >= 0, "Private counter may not be less than 0");
142 // Equivalent to fetch_sub(m_private_counter) - m_private_counter. But we need to do it
143 // atomically with operator-= not to access m_private_counter after the subtraction.
144 auto new_value = m_public_counter -= m_private_counter;
145 // check if this method is responsible to clean up the resources
146 if (new_value == 0) {
147 this->~small_object_pool_impl();
148 cache_aligned_deallocate(this);
149 }
150 }
151
152 } // namespace r1
153 } // namespace detail
154 } // namespace tbb
155