1 /****************************************************************************** 2 * Copyright (c) Intel Corporation - All rights reserved. * 3 * This file is part of the LIBXSMM library. * 4 * * 5 * For information on the license, see the LICENSE file. * 6 * Further information: https://github.com/hfp/libxsmm/ * 7 * SPDX-License-Identifier: BSD-3-Clause * 8 ******************************************************************************/ 9 /* Hans Pabst (Intel Corp.) 10 ******************************************************************************/ 11 #ifndef LIBXSMM_MALLOC_H 12 #define LIBXSMM_MALLOC_H 13 14 #include "libxsmm_memory.h" 15 16 /* include tensorflow/core/public/version.h prior to LIBXSMM otherwise the current TensorFlow API is assumed */ 17 #if !defined(LIBXSMM_TF12) && (!defined(TF_VERSION_STRING) || \ 18 LIBXSMM_VERSION2(1, 12) <= LIBXSMM_VERSION2(TF_MAJOR_VERSION, TF_MINOR_VERSION)) 19 # define LIBXSMM_TF12 /* TF_PATCH_VERSION does not matter */ 20 #endif 21 22 /** Can be used with libxsmm_[get|set]_scratch_limit. */ 23 #define LIBXSMM_SCRATCH_UNLIMITED ((size_t)LIBXSMM_UNLIMITED) 24 #define LIBXSMM_SCRATCH_DEFAULT 0 25 26 27 /** Function types accepted for memory allocation (see libxsmm_*_allocator). */ 28 LIBXSMM_EXTERN_C typedef LIBXSMM_RETARGETABLE void* (*libxsmm_malloc_ctx)(size_t /*size*/, const void* /*context*/); 29 LIBXSMM_EXTERN_C typedef LIBXSMM_RETARGETABLE void* (*libxsmm_malloc_fun)(size_t /*size*/); 30 LIBXSMM_EXTERN_C typedef union LIBXSMM_RETARGETABLE libxsmm_malloc_function { 31 libxsmm_malloc_ctx ctx_form; 32 libxsmm_malloc_fun function; 33 } libxsmm_malloc_function; 34 35 /** Function types accepted for releasing memory (see libxsmm_*_allocator). */ 36 LIBXSMM_EXTERN_C typedef LIBXSMM_RETARGETABLE void (*libxsmm_free_ctx)(void* /*buffer*/, const void* /*context*/); 37 LIBXSMM_EXTERN_C typedef LIBXSMM_RETARGETABLE void (*libxsmm_free_fun)(void* /*buffer*/); 38 LIBXSMM_EXTERN_C typedef union LIBXSMM_RETARGETABLE libxsmm_free_function { 39 libxsmm_free_ctx ctx_form; 40 libxsmm_free_fun function; 41 } libxsmm_free_function; 42 43 /** 44 * To setup the custom default memory allocator, either a malloc_fn and a free_fn 45 * are given, or two NULL-pointers designate to reset the default allocator to a 46 * library-internal default. If a context is given (non-NULL), the context-based 47 * form of the memory allocation is used. 48 * Changing the allocator including the function for deallocation applies to 49 * upcoming allocation/deallocation and works correctly for pending buffers. 50 */ 51 LIBXSMM_API int libxsmm_set_default_allocator(/* malloc_fn/free_fn must correspond */ 52 const void* context, libxsmm_malloc_function malloc_fn, libxsmm_free_function free_fn); 53 /** Retrieve the default memory allocator. */ 54 LIBXSMM_API int libxsmm_get_default_allocator(const void** context, 55 libxsmm_malloc_function* malloc_fn, libxsmm_free_function* free_fn); 56 57 /** 58 * To setup the scratch memory allocator, a malloc_fn function and an optional free_fn 59 * are given. A NULL-free acts as a "no-operation", and the deallocation is expected 60 * to be controlled otherwise. If two NULL-pointers are given, the allocator is reset 61 * to the currently active default memory allocator. If a context is given (non-NULL), 62 * the context-based form of the memory allocation is used. 63 * Changing the allocator including the function for deallocation applies to 64 * upcoming allocation/deallocation and works correctly for pending buffers. 65 */ 66 LIBXSMM_API int libxsmm_set_scratch_allocator(/* malloc_fn/free_fn must correspond */ 67 const void* context, libxsmm_malloc_function malloc_fn, libxsmm_free_function free_fn); 68 /** Retrieve the scratch memory allocator. */ 69 LIBXSMM_API int libxsmm_get_scratch_allocator(const void** context, 70 libxsmm_malloc_function* malloc_fn, libxsmm_free_function* free_fn); 71 72 /** Allocate memory (malloc/free interface). */ 73 LIBXSMM_API LIBXSMM_ATTRIBUTE_MALLOC void* libxsmm_malloc(size_t size); 74 75 /** Allocate aligned memory using the default allocator. */ 76 LIBXSMM_API LIBXSMM_ATTRIBUTE_MALLOC void* libxsmm_aligned_malloc(size_t size, 77 /** 78 * =0: align automatically according to the size 79 * 0<: align according to the alignment value 80 */ 81 size_t alignment); 82 83 /** Reallocate memory using the default allocator (alignment is preserved). */ 84 LIBXSMM_API void* libxsmm_realloc(size_t size, void* ptr); 85 86 /** 87 * Allocate aligned scratch memory. It is not supported 88 * to query properties per libxsmm_get_malloc_info, but 89 * libxsmm_get_scratch_info can used instead. 90 */ 91 LIBXSMM_API void* libxsmm_scratch_malloc(size_t size, 92 /** 93 * =0: align automatically according to the size 94 * 0<: align according to the alignment value 95 */ 96 size_t alignment, 97 /** 98 * Identifies the call site, which is used 99 * to determine the memory pool. 100 */ 101 const void* caller); 102 103 /** 104 * Binary form of libxsmm_scratch_malloc, which 105 * expands the call-context automatically. This 106 * macro is intentionally lower case. 107 */ 108 #define libxsmm_aligned_scratch(size, alignment) \ 109 libxsmm_scratch_malloc(size, alignment, \ 110 LIBXSMM_CALLER_ID) 111 112 /** Deallocate memory (malloc/free interface). */ 113 LIBXSMM_API void libxsmm_free(const void* memory); 114 115 /** 116 * Release the entire scratch memory regardless 117 * of whether it is still referenced or not. 118 */ 119 LIBXSMM_API void libxsmm_release_scratch(void); 120 121 /** Information about a buffer (default memory domain). */ 122 LIBXSMM_EXTERN_C typedef struct LIBXSMM_RETARGETABLE libxsmm_malloc_info { 123 /** Size of the buffer. */ 124 size_t size; 125 } libxsmm_malloc_info; 126 127 /** Retrieve information about a buffer (default memory domain). */ 128 LIBXSMM_API int libxsmm_get_malloc_info(const void* memory, libxsmm_malloc_info* info); 129 130 /** Information about the scratch memory domain. */ 131 LIBXSMM_EXTERN_C typedef struct LIBXSMM_RETARGETABLE libxsmm_scratch_info { 132 /** Watermark memory across pools (size), unsatisfied (local), and library-internal memory. */ 133 size_t size, local, internal; 134 /** Pending allocations (not released). */ 135 size_t npending; 136 /** Number of allocations so far. */ 137 size_t nmallocs; 138 /** Number of pools used. */ 139 unsigned int npools; 140 } libxsmm_scratch_info; 141 142 /** Retrieve information about the scratch memory domain. */ 143 LIBXSMM_API int libxsmm_get_scratch_info(libxsmm_scratch_info* info); 144 145 /** 146 * Limit the total size (Bytes) of the scratch memory. 147 * LIBXSMM_SCRATCH_UNLIMITED removes any limit, and 148 * LIBXSMM_SCRATCH_DEFAULT populates the default. 149 * The related environment variable LIBXSMM_SCRATCH_LIMIT 150 * allows units: <none>/b/B (Bytes), k/K, m/M, and g/G. 151 */ 152 LIBXSMM_API void libxsmm_set_scratch_limit(size_t nbytes); 153 /** Get the maximum size of the scratch memory domain. */ 154 LIBXSMM_API size_t libxsmm_get_scratch_limit(void); 155 156 /** 157 * Intercepts malloc/free to use scratch memory allocator. 158 * (related environment variable LIBXSMM_MALLOC). 159 * Optionally set the range of malloc-sizes to be intercepted. 160 * The related environment variable LIBXSMM_MALLOC_LIMIT 161 * allows units: <none>/b/B (Bytes), k/K, m/M, and g/G. 162 */ 163 LIBXSMM_API void libxsmm_set_malloc(int enabled, const size_t* lo, const size_t* hi); 164 /** 165 * Determines if malloc/free are (and can be) intercepted. 166 * Optionally gets the range of enabled malloc-sizes. 167 */ 168 LIBXSMM_API int libxsmm_get_malloc(size_t* lo, size_t* hi); 169 170 /** 171 * Calculate the linear offset of the n-dimensional (ndims) offset (can be NULL), 172 * and the (optional) linear size of the corresponding shape. 173 */ 174 LIBXSMM_API size_t libxsmm_offset(const size_t offset[], const size_t shape[], size_t ndims, size_t* size); 175 176 177 #if defined(__cplusplus) 178 179 /** RAII idiom to temporarily setup an allocator for the lifetime of the scope. */ 180 template<typename kind> class LIBXSMM_RETARGETABLE libxsmm_scoped_allocator { 181 public: 182 /** C'tor, which instantiates the new allocator (plain form). */ libxsmm_scoped_allocator(libxsmm_malloc_fun malloc_fn,libxsmm_free_fun free_fn)183 libxsmm_scoped_allocator(libxsmm_malloc_fun malloc_fn, libxsmm_free_fun free_fn) { 184 kind::get(m_context, m_malloc, m_free); 185 kind::set(NULL/*context*/, NULL/*malloc_ctx*/, NULL/*free_ctx*/, malloc_fn, free_fn); 186 } 187 188 /** C'tor, which instantiates the new allocator (context form). */ 189 libxsmm_scoped_allocator(const void* context, libxsmm_malloc_ctx malloc_ctx, libxsmm_free_ctx free_ctx, 190 libxsmm_malloc_fun malloc_fun = NULL, libxsmm_free_fun free_fun = NULL) 191 { 192 kind::get(m_context, m_malloc, m_free); 193 kind::set(context, malloc_ctx, free_ctx, malloc_fun, free_fun); 194 } 195 196 /** Following the RAII idiom, the d'tor restores the previous allocator. */ ~libxsmm_scoped_allocator()197 ~libxsmm_scoped_allocator() { 198 kind::set(m_context, 199 m_malloc.ctx_form, m_free.ctx_form, 200 m_malloc.function, m_free.function); 201 } 202 203 private: /* no copy/assignment */ 204 explicit libxsmm_scoped_allocator(const libxsmm_scoped_allocator&); 205 libxsmm_scoped_allocator& operator=(const libxsmm_scoped_allocator&); 206 207 protected: /* saved/previous allocator */ 208 const void* m_context; 209 libxsmm_malloc_function m_malloc; 210 libxsmm_free_function m_free; 211 }; 212 213 /** Allocator-kind to instantiate libxsmm_scoped_allocator<kind>. */ 214 struct LIBXSMM_RETARGETABLE libxsmm_default_allocator { setlibxsmm_default_allocator215 static void set(const void* context, 216 libxsmm_malloc_ctx malloc_ctx, libxsmm_free_ctx free_ctx, 217 libxsmm_malloc_fun malloc_fun, libxsmm_free_fun free_fun) 218 { 219 libxsmm_malloc_function malloc_fn; 220 libxsmm_free_function free_fn; 221 if (NULL == context) { /* use global form only when no context is given */ 222 malloc_fn.function = malloc_fun; free_fn.function = free_fun; 223 } 224 else { 225 malloc_fn.ctx_form = malloc_ctx; free_fn.ctx_form = free_ctx; 226 } 227 libxsmm_set_default_allocator(context, malloc_fn, free_fn); 228 } getlibxsmm_default_allocator229 static void get(const void*& context, 230 libxsmm_malloc_function& malloc_fn, libxsmm_free_function& free_fn) 231 { 232 libxsmm_get_default_allocator(&context, &malloc_fn, &free_fn); 233 } 234 }; 235 236 /** Allocator-kind to instantiate libxsmm_scoped_allocator<kind>. */ 237 struct LIBXSMM_RETARGETABLE libxsmm_scratch_allocator { setlibxsmm_scratch_allocator238 static void set(const void* context, 239 libxsmm_malloc_ctx malloc_ctx, libxsmm_free_ctx free_ctx, 240 libxsmm_malloc_fun malloc_fun, libxsmm_free_fun free_fun) 241 { 242 libxsmm_malloc_function malloc_fn; 243 libxsmm_free_function free_fn; 244 if (NULL != context) { /* adopt context form */ 245 malloc_fn.function = malloc_fun; free_fn.function = free_fun; 246 } 247 else { /* adopt global form */ 248 malloc_fn.ctx_form = malloc_ctx; free_fn.ctx_form = free_ctx; 249 } 250 libxsmm_set_scratch_allocator(context, malloc_fn, free_fn); 251 } getlibxsmm_scratch_allocator252 static void get(const void*& context, 253 libxsmm_malloc_function& malloc_fn, libxsmm_free_function& free_fn) 254 { 255 libxsmm_get_scratch_allocator(&context, &malloc_fn, &free_fn); 256 } 257 }; 258 259 /** Forward-declared types/functions used to implement libxsmm_tf_allocator. */ 260 namespace tensorflow { 261 class Allocator; 262 #if defined(LIBXSMM_TF12) 263 class DeviceBase; int DeviceNumaNode(const DeviceBase* /*device*/); 264 Allocator* cpu_allocator(int /*numa_node*/); 265 #else 266 Allocator* cpu_allocator(); 267 #endif 268 } 269 270 /** 271 * An object of this type adopts a memory allocator from TensorFlow. 272 * All memory allocations of the requested kind within the current 273 * scope (where the libxsmm_tf_allocator object lives) are subject 274 * to TensorFlow's memory allocation scheme. The allocation kind 275 * is usually "libxsmm_scratch_allocator"; using a second object 276 * of kind "libxsmm_default_allocator" makes the default memory 277 * allocation of LIBXSMM subject to TensorFlow as well. 278 */ 279 template<typename kind> class LIBXSMM_RETARGETABLE libxsmm_tf_allocator: 280 public libxsmm_scoped_allocator<kind> 281 { 282 public: 283 /** The TensorFlow allocator is adopted from the global CPU memory allocator. */ libxsmm_tf_allocator()284 explicit libxsmm_tf_allocator() 285 : libxsmm_scoped_allocator<kind>( 286 libxsmm_tf_allocator::malloc, 287 libxsmm_tf_allocator::free) 288 {} 289 290 /** The TensorFlow allocator is adopted from the given OpKernelContext. */ 291 template<typename context_type> libxsmm_tf_allocator(context_type & context)292 explicit libxsmm_tf_allocator(context_type& context) 293 : libxsmm_scoped_allocator<kind>(&context, 294 libxsmm_tf_allocator::template malloc_ctx<context_type>, 295 libxsmm_tf_allocator::template free_ctx<context_type>, 296 libxsmm_tf_allocator::malloc, 297 libxsmm_tf_allocator::free) 298 {} 299 300 /** Global form of allocating memory (malloc signature). */ malloc(size_t size)301 static void* malloc(size_t size) { 302 #if defined(LIBXSMM_TF12) 303 return libxsmm_tf_allocator::allocate(tensorflow::cpu_allocator(-1/*kNUMANoAffinity*/), size); 304 #else 305 return libxsmm_tf_allocator::allocate(tensorflow::cpu_allocator(), size); 306 #endif 307 } 308 309 /** Global form of deallocating memory (free signature). */ free(void * buffer)310 static void free(void* buffer) { 311 #if defined(LIBXSMM_TF12) 312 libxsmm_tf_allocator::deallocate(tensorflow::cpu_allocator(-1/*kNUMANoAffinity*/), buffer); 313 #else 314 libxsmm_tf_allocator::deallocate(tensorflow::cpu_allocator(), buffer); 315 #endif 316 } 317 318 /** Context based form of allocating memory. */ malloc_ctx(const void * context,size_t size)319 template<typename context_type> static void* malloc_ctx(const void* context, size_t size) { 320 typedef typename context_type::WrappedAllocator::first_type allocator_ptr; 321 context_type *const tf_context = static_cast<context_type*>(context); 322 allocator_ptr allocator = NULL; 323 if (NULL != tf_context) { 324 #if !defined(LIBXSMM_TF12) 325 if (NULL != tf_context->device()) { 326 if (0 < tf_context->num_outputs()) { 327 allocator = tf_context->device()->GetStepAllocator( 328 tf_context->output_alloc_attr(0), 329 tf_context->resource_manager()); 330 } 331 else if (0 < tf_context->num_inputs()) { 332 allocator = tf_context->device()->GetStepAllocator( 333 tf_context->input_alloc_attr(0), 334 tf_context->resource_manager()); 335 } 336 } 337 #else /* include tensorflow/core/public/version.h prior to LIBXSMM otherwise the current TensorFlow API is assumed */ 338 const int numa_node = DeviceNumaNode(tf_context->device()); 339 allocator = tensorflow::cpu_allocator(numa_node); 340 #endif 341 } 342 return libxsmm_tf_allocator::allocate(allocator, size); 343 } 344 345 /** Context based form of deallocating memory. */ free_ctx(const void * context,void * buffer)346 template<typename context_type> static void free_ctx(const void* context, void* buffer) { 347 typedef typename context_type::WrappedAllocator::first_type allocator_ptr; 348 context_type *const tf_context = static_cast<context_type*>(context); 349 allocator_ptr allocator = NULL; 350 if (NULL != tf_context) { 351 #if defined(LIBXSMM_TF12) 352 const int numa_node = DeviceNumaNode(tf_context->device()); 353 allocator = tensorflow::cpu_allocator(numa_node); 354 #else 355 if (NULL != tf_context->device()) { 356 if (0 < tf_context->num_outputs()) { 357 allocator = tf_context->device()->GetStepAllocator( 358 tf_context->output_alloc_attr(0), 359 tf_context->resource_manager()); 360 } 361 else if (0 < tf_context->num_inputs()) { 362 allocator = tf_context->device()->GetStepAllocator( 363 tf_context->input_alloc_attr(0), 364 tf_context->resource_manager()); 365 } 366 } 367 #endif 368 } 369 libxsmm_tf_allocator::deallocate(allocator, buffer); 370 } 371 372 private: 373 template<typename allocator_ptr> /* break interface dependency with TF */ allocate(allocator_ptr allocator,size_t size)374 static void* allocate(allocator_ptr allocator, size_t size) { 375 void* result; 376 if (NULL != allocator) { 377 /* no (useless) waste with alignment; raw result is re-aligned anyways */ 378 result = allocator->AllocateRaw(1/*alignment*/, size); 379 } 380 else { 381 LIBXSMM_ASSERT_MSG(0/*false*/, "LIBXSMM ERROR: memory allocator is missing"); 382 result = NULL; 383 } 384 return result; 385 } 386 387 template<typename allocator_ptr> /* break interface dependency with TF */ deallocate(allocator_ptr allocator,void * buffer)388 static void deallocate(allocator_ptr allocator, void* buffer) { 389 LIBXSMM_ASSERT_MSG(NULL != allocator, "LIBXSMM ERROR: memory allocator is missing"); 390 if (NULL != allocator) allocator->DeallocateRaw(buffer); 391 } 392 }; 393 394 #endif /*defined(__cplusplus)*/ 395 396 #endif /*LIBXSMM_MALLOC_H*/ 397 398