1 /*
2     Copyright (c) 2005-2021 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 #include "oneapi/tbb/version.h"
18 
19 #include "oneapi/tbb/detail/_exception.h"
20 #include "oneapi/tbb/detail/_assert.h"
21 #include "oneapi/tbb/detail/_utils.h"
22 #include "oneapi/tbb/tbb_allocator.h" // Is this OK?
23 #include "oneapi/tbb/cache_aligned_allocator.h"
24 
25 #include "dynamic_link.h"
26 #include "misc.h"
27 
28 #include <cstdlib>
29 
30 #if _WIN32 || _WIN64
31 #include <Windows.h>
32 #else
33 #include <dlfcn.h>
34 #endif /* _WIN32||_WIN64 */
35 
36 #if __TBB_WEAK_SYMBOLS_PRESENT
37 
38 #pragma weak scalable_malloc
39 #pragma weak scalable_free
40 #pragma weak scalable_aligned_malloc
41 #pragma weak scalable_aligned_free
42 
43 extern "C" {
44     void* scalable_malloc(std::size_t);
45     void  scalable_free(void*);
46     void* scalable_aligned_malloc(std::size_t, std::size_t);
47     void  scalable_aligned_free(void*);
48 }
49 
50 #endif /* __TBB_WEAK_SYMBOLS_PRESENT */
51 
52 namespace tbb {
53 namespace detail {
54 namespace r1 {
55 
56 //! Initialization routine used for first indirect call via allocate_handler.
57 static void* initialize_allocate_handler(std::size_t size);
58 
59 //! Handler for memory allocation
60 using allocate_handler_type = void* (*)(std::size_t size);
61 static std::atomic<allocate_handler_type> allocate_handler{ &initialize_allocate_handler };
62 allocate_handler_type allocate_handler_unsafe = nullptr;
63 
64 //! Handler for memory deallocation
65 static void  (*deallocate_handler)(void* pointer) = nullptr;
66 
67 //! Initialization routine used for first indirect call via cache_aligned_allocate_handler.
68 static void* initialize_cache_aligned_allocate_handler(std::size_t n, std::size_t alignment);
69 
70 //! Allocates memory using standard malloc. It is used when scalable_allocator is not available
71 static void* std_cache_aligned_allocate(std::size_t n, std::size_t alignment);
72 
73 //! Allocates memory using standard free. It is used when scalable_allocator is not available
74 static void  std_cache_aligned_deallocate(void* p);
75 
76 //! Handler for padded memory allocation
77 using cache_aligned_allocate_handler_type = void* (*)(std::size_t n, std::size_t alignment);
78 static std::atomic<cache_aligned_allocate_handler_type> cache_aligned_allocate_handler{ &initialize_cache_aligned_allocate_handler };
79 cache_aligned_allocate_handler_type cache_aligned_allocate_handler_unsafe = nullptr;
80 
81 //! Handler for padded memory deallocation
82 static void (*cache_aligned_deallocate_handler)(void* p) = nullptr;
83 
84 //! Table describing how to link the handlers.
85 static const dynamic_link_descriptor MallocLinkTable[] = {
86     DLD(scalable_malloc, allocate_handler_unsafe),
87     DLD(scalable_free, deallocate_handler),
88     DLD(scalable_aligned_malloc, cache_aligned_allocate_handler_unsafe),
89     DLD(scalable_aligned_free, cache_aligned_deallocate_handler),
90 };
91 
92 
93 #if TBB_USE_DEBUG
94 #define DEBUG_SUFFIX "_debug"
95 #else
96 #define DEBUG_SUFFIX
97 #endif /* TBB_USE_DEBUG */
98 
99 // MALLOCLIB_NAME is the name of the oneTBB memory allocator library.
100 #if _WIN32||_WIN64
101 #define MALLOCLIB_NAME "tbbmalloc" DEBUG_SUFFIX ".dll"
102 #elif __APPLE__
103 #define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".dylib"
104 #elif __FreeBSD__ || __NetBSD__ || __OpenBSD__ || __sun || _AIX || __ANDROID__ || __DragonFly__
105 #define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".so"
106 #elif __unix__  // Note that order of these #elif's is important!
107 #define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".so.2"
108 #else
109 #error Unknown OS
110 #endif
111 
112 //! Initialize the allocation/free handler pointers.
113 /** Caller is responsible for ensuring this routine is called exactly once.
114     The routine attempts to dynamically link with the TBB memory allocator.
115     If that allocator is not found, it links to malloc and free. */
initialize_handler_pointers()116 void initialize_handler_pointers() {
117     __TBB_ASSERT(allocate_handler == &initialize_allocate_handler, NULL);
118     bool success = dynamic_link(MALLOCLIB_NAME, MallocLinkTable, 4);
119     if(!success) {
120         // If unsuccessful, set the handlers to the default routines.
121         // This must be done now, and not before FillDynamicLinks runs, because if other
122         // threads call the handlers, we want them to go through the DoOneTimeInitializations logic,
123         // which forces them to wait.
124         allocate_handler_unsafe = &std::malloc;
125         deallocate_handler = &std::free;
126         cache_aligned_allocate_handler_unsafe = &std_cache_aligned_allocate;
127         cache_aligned_deallocate_handler = &std_cache_aligned_deallocate;
128     }
129 
130     allocate_handler.store(allocate_handler_unsafe, std::memory_order_release);
131     cache_aligned_allocate_handler.store(cache_aligned_allocate_handler_unsafe, std::memory_order_release);
132 
133     PrintExtraVersionInfo( "ALLOCATOR", success?"scalable_malloc":"malloc" );
134 }
135 
136 static std::once_flag initialization_state;
initialize_cache_aligned_allocator()137 void initialize_cache_aligned_allocator() {
138     std::call_once(initialization_state, &initialize_handler_pointers);
139 }
140 
141 //! Executed on very first call through allocate_handler
initialize_allocate_handler(std::size_t size)142 static void* initialize_allocate_handler(std::size_t size) {
143     initialize_cache_aligned_allocator();
144     __TBB_ASSERT(allocate_handler != &initialize_allocate_handler, NULL);
145     return (*allocate_handler)(size);
146 }
147 
148 //! Executed on very first call through cache_aligned_allocate_handler
initialize_cache_aligned_allocate_handler(std::size_t bytes,std::size_t alignment)149 static void* initialize_cache_aligned_allocate_handler(std::size_t bytes, std::size_t alignment) {
150     initialize_cache_aligned_allocator();
151     __TBB_ASSERT(cache_aligned_allocate_handler != &initialize_cache_aligned_allocate_handler, NULL);
152     return (*cache_aligned_allocate_handler)(bytes, alignment);
153 }
154 
155 // TODO: use CPUID to find actual line size, though consider backward compatibility
156 // nfs - no false sharing
157 static constexpr std::size_t nfs_size = 128;
158 
cache_line_size()159 std::size_t __TBB_EXPORTED_FUNC cache_line_size() {
160     return nfs_size;
161 }
162 
cache_aligned_allocate(std::size_t size)163 void* __TBB_EXPORTED_FUNC cache_aligned_allocate(std::size_t size) {
164     const std::size_t cache_line_size = nfs_size;
165     __TBB_ASSERT(is_power_of_two(cache_line_size), "must be power of two");
166 
167     // Check for overflow
168     if (size + cache_line_size < size) {
169         throw_exception(exception_id::bad_alloc);
170     }
171     // scalable_aligned_malloc considers zero size request an error, and returns NULL
172     if (size == 0) size = 1;
173 
174     void* result = cache_aligned_allocate_handler.load(std::memory_order_acquire)(size, cache_line_size);
175     if (!result) {
176         throw_exception(exception_id::bad_alloc);
177     }
178     __TBB_ASSERT(is_aligned(result, cache_line_size), "The returned address isn't aligned");
179     return result;
180 }
181 
cache_aligned_deallocate(void * p)182 void __TBB_EXPORTED_FUNC cache_aligned_deallocate(void* p) {
183     __TBB_ASSERT(cache_aligned_deallocate_handler, "Initialization has not been yet.");
184     (*cache_aligned_deallocate_handler)(p);
185 }
186 
std_cache_aligned_allocate(std::size_t bytes,std::size_t alignment)187 static void* std_cache_aligned_allocate(std::size_t bytes, std::size_t alignment) {
188     // TODO: make it common with cache_aligned_resource
189     std::size_t space = alignment + bytes;
190     std::uintptr_t base = reinterpret_cast<std::uintptr_t>(std::malloc(space));
191     if (!base) {
192         return nullptr;
193     }
194     std::uintptr_t result = (base + nfs_size) & ~(nfs_size - 1);
195     // Round up to the next cache line (align the base address)
196     __TBB_ASSERT((result - base) >= sizeof(std::uintptr_t), "Cannot store a base pointer to the header");
197     __TBB_ASSERT(space - (result - base) >= bytes, "Not enough space for the storage");
198 
199     // Record where block actually starts.
200     (reinterpret_cast<std::uintptr_t*>(result))[-1] = base;
201     return reinterpret_cast<void*>(result);
202 }
203 
std_cache_aligned_deallocate(void * p)204 static void std_cache_aligned_deallocate(void* p) {
205     if (p) {
206         __TBB_ASSERT(reinterpret_cast<std::uintptr_t>(p) >= 0x4096, "attempt to free block not obtained from cache_aligned_allocator");
207         // Recover where block actually starts
208         std::uintptr_t base = (reinterpret_cast<std::uintptr_t*>(p))[-1];
209         __TBB_ASSERT(((base + nfs_size) & ~(nfs_size - 1)) == reinterpret_cast<std::uintptr_t>(p), "Incorrect alignment or not allocated by std_cache_aligned_deallocate?");
210         std::free(reinterpret_cast<void*>(base));
211     }
212 }
213 
allocate_memory(std::size_t size)214 void* __TBB_EXPORTED_FUNC allocate_memory(std::size_t size) {
215     void* result = allocate_handler.load(std::memory_order_acquire)(size);
216     if (!result) {
217         throw_exception(exception_id::bad_alloc);
218     }
219     return result;
220 }
221 
deallocate_memory(void * p)222 void __TBB_EXPORTED_FUNC deallocate_memory(void* p) {
223     if (p) {
224         __TBB_ASSERT(deallocate_handler, "Initialization has not been yet.");
225         (*deallocate_handler)(p);
226     }
227 }
228 
is_tbbmalloc_used()229 bool __TBB_EXPORTED_FUNC is_tbbmalloc_used() {
230     auto handler_snapshot = allocate_handler.load(std::memory_order_acquire);
231     if (handler_snapshot == &initialize_allocate_handler) {
232         initialize_cache_aligned_allocator();
233     }
234     handler_snapshot = allocate_handler.load(std::memory_order_relaxed);
235     __TBB_ASSERT(handler_snapshot != &initialize_allocate_handler && deallocate_handler != nullptr, NULL);
236     // Cast to void avoids type mismatch errors on some compilers (e.g. __IBMCPP__)
237     __TBB_ASSERT((reinterpret_cast<void*>(handler_snapshot) == reinterpret_cast<void*>(&std::malloc)) == (reinterpret_cast<void*>(deallocate_handler) == reinterpret_cast<void*>(&std::free)),
238                   "Both shim pointers must refer to routines from the same package (either TBB or CRT)");
239     return reinterpret_cast<void*>(handler_snapshot) == reinterpret_cast<void*>(&std::malloc);
240 }
241 
242 } // namespace r1
243 } // namespace detail
244 } // namespace tbb
245