1 /******************************************************************************
2 * Copyright (c) Intel Corporation - All rights reserved.                      *
3 * This file is part of the LIBXSMM library.                                   *
4 *                                                                             *
5 * For information on the license, see the LICENSE file.                       *
6 * Further information: https://github.com/hfp/libxsmm/                        *
7 * SPDX-License-Identifier: BSD-3-Clause                                       *
8 ******************************************************************************/
9 /* Hans Pabst (Intel Corp.)
10 ******************************************************************************/
11 #ifndef LIBXSMM_MALLOC_H
12 #define LIBXSMM_MALLOC_H
13 
14 #include "libxsmm_memory.h"
15 
16 /* include tensorflow/core/public/version.h prior to LIBXSMM otherwise the current TensorFlow API is assumed */
17 #if !defined(LIBXSMM_TF12) && (!defined(TF_VERSION_STRING) || \
18   LIBXSMM_VERSION2(1, 12) <= LIBXSMM_VERSION2(TF_MAJOR_VERSION, TF_MINOR_VERSION))
19 # define LIBXSMM_TF12 /* TF_PATCH_VERSION does not matter */
20 #endif
21 
22 /** Can be used with libxsmm_[get|set]_scratch_limit. */
23 #define LIBXSMM_SCRATCH_UNLIMITED ((size_t)LIBXSMM_UNLIMITED)
24 #define LIBXSMM_SCRATCH_DEFAULT 0
25 
26 
27 /** Function types accepted for memory allocation (see libxsmm_*_allocator). */
28 LIBXSMM_EXTERN_C typedef LIBXSMM_RETARGETABLE void* (*libxsmm_malloc_ctx)(size_t /*size*/, const void* /*context*/);
29 LIBXSMM_EXTERN_C typedef LIBXSMM_RETARGETABLE void* (*libxsmm_malloc_fun)(size_t /*size*/);
30 LIBXSMM_EXTERN_C typedef union LIBXSMM_RETARGETABLE libxsmm_malloc_function {
31   libxsmm_malloc_ctx ctx_form;
32   libxsmm_malloc_fun function;
33 } libxsmm_malloc_function;
34 
35 /** Function types accepted for releasing memory (see libxsmm_*_allocator). */
36 LIBXSMM_EXTERN_C typedef LIBXSMM_RETARGETABLE void (*libxsmm_free_ctx)(void* /*buffer*/, const void* /*context*/);
37 LIBXSMM_EXTERN_C typedef LIBXSMM_RETARGETABLE void (*libxsmm_free_fun)(void* /*buffer*/);
38 LIBXSMM_EXTERN_C typedef union LIBXSMM_RETARGETABLE libxsmm_free_function {
39   libxsmm_free_ctx ctx_form;
40   libxsmm_free_fun function;
41 } libxsmm_free_function;
42 
43 /**
44  * To setup the custom default memory allocator, either a malloc_fn and a free_fn
45  * are given, or two NULL-pointers designate to reset the default allocator to a
46  * library-internal default. If a context is given (non-NULL), the context-based
47  * form of the memory allocation is used.
48  * Changing the allocator including the function for deallocation applies to
49  * upcoming allocation/deallocation and works correctly for pending buffers.
50  */
51 LIBXSMM_API int libxsmm_set_default_allocator(/* malloc_fn/free_fn must correspond */
52   const void* context, libxsmm_malloc_function malloc_fn, libxsmm_free_function free_fn);
53 /** Retrieve the default memory allocator. */
54 LIBXSMM_API int libxsmm_get_default_allocator(const void** context,
55   libxsmm_malloc_function* malloc_fn, libxsmm_free_function* free_fn);
56 
57 /**
58  * To setup the scratch memory allocator, a malloc_fn function and an optional free_fn
59  * are given. A NULL-free acts as a "no-operation", and the deallocation is expected
60  * to be controlled otherwise. If two NULL-pointers are given, the allocator is reset
61  * to the currently active default memory allocator. If a context is given (non-NULL),
62  * the context-based form of the memory allocation is used.
63  * Changing the allocator including the function for deallocation applies to
64  * upcoming allocation/deallocation and works correctly for pending buffers.
65  */
66 LIBXSMM_API int libxsmm_set_scratch_allocator(/* malloc_fn/free_fn must correspond */
67   const void* context, libxsmm_malloc_function malloc_fn, libxsmm_free_function free_fn);
68 /** Retrieve the scratch memory allocator. */
69 LIBXSMM_API int libxsmm_get_scratch_allocator(const void** context,
70   libxsmm_malloc_function* malloc_fn, libxsmm_free_function* free_fn);
71 
72 /** Allocate memory (malloc/free interface). */
73 LIBXSMM_API LIBXSMM_ATTRIBUTE_MALLOC void* libxsmm_malloc(size_t size);
74 
75 /** Allocate aligned memory using the default allocator. */
76 LIBXSMM_API LIBXSMM_ATTRIBUTE_MALLOC void* libxsmm_aligned_malloc(size_t size,
77   /**
78    * =0: align automatically according to the size
79    * 0<: align according to the alignment value
80    */
81   size_t alignment);
82 
83 /** Reallocate memory using the default allocator (alignment is preserved). */
84 LIBXSMM_API void* libxsmm_realloc(size_t size, void* ptr);
85 
86 /**
87  * Allocate aligned scratch memory. It is not supported
88  * to query properties per libxsmm_get_malloc_info, but
89  * libxsmm_get_scratch_info can used instead.
90  */
91 LIBXSMM_API void* libxsmm_scratch_malloc(size_t size,
92   /**
93    * =0: align automatically according to the size
94    * 0<: align according to the alignment value
95    */
96   size_t alignment,
97   /**
98    * Identifies the call site, which is used
99    * to determine the memory pool.
100    */
101   const void* caller);
102 
103 /**
104  * Binary form of libxsmm_scratch_malloc, which
105  * expands the call-context automatically. This
106  * macro is intentionally lower case.
107  */
108 #define libxsmm_aligned_scratch(size, alignment) \
109   libxsmm_scratch_malloc(size, alignment, \
110     LIBXSMM_CALLER_ID)
111 
112 /** Deallocate memory (malloc/free interface). */
113 LIBXSMM_API void libxsmm_free(const void* memory);
114 
115 /**
116  * Release the entire scratch memory regardless
117  * of whether it is still referenced or not.
118  */
119 LIBXSMM_API void libxsmm_release_scratch(void);
120 
121 /** Information about a buffer (default memory domain). */
122 LIBXSMM_EXTERN_C typedef struct LIBXSMM_RETARGETABLE libxsmm_malloc_info {
123   /** Size of the buffer. */
124   size_t size;
125 } libxsmm_malloc_info;
126 
127 /** Retrieve information about a buffer (default memory domain). */
128 LIBXSMM_API int libxsmm_get_malloc_info(const void* memory, libxsmm_malloc_info* info);
129 
130 /** Information about the scratch memory domain. */
131 LIBXSMM_EXTERN_C typedef struct LIBXSMM_RETARGETABLE libxsmm_scratch_info {
132   /** Watermark memory across pools (size), unsatisfied (local), and library-internal memory. */
133   size_t size, local, internal;
134   /** Pending allocations (not released). */
135   size_t npending;
136   /** Number of allocations so far. */
137   size_t nmallocs;
138   /** Number of pools used. */
139   unsigned int npools;
140 } libxsmm_scratch_info;
141 
142 /** Retrieve information about the scratch memory domain. */
143 LIBXSMM_API int libxsmm_get_scratch_info(libxsmm_scratch_info* info);
144 
145 /**
146  * Limit the total size (Bytes) of the scratch memory.
147  * LIBXSMM_SCRATCH_UNLIMITED removes any limit, and
148  * LIBXSMM_SCRATCH_DEFAULT populates the default.
149  * The related environment variable LIBXSMM_SCRATCH_LIMIT
150  * allows units: <none>/b/B (Bytes), k/K, m/M, and g/G.
151  */
152 LIBXSMM_API void libxsmm_set_scratch_limit(size_t nbytes);
153 /** Get the maximum size of the scratch memory domain. */
154 LIBXSMM_API size_t libxsmm_get_scratch_limit(void);
155 
156 /**
157  * Intercepts malloc/free to use scratch memory allocator.
158  * (related environment variable LIBXSMM_MALLOC).
159  * Optionally set the range of malloc-sizes to be intercepted.
160  * The related environment variable LIBXSMM_MALLOC_LIMIT
161  * allows units: <none>/b/B (Bytes), k/K, m/M, and g/G.
162  */
163 LIBXSMM_API void libxsmm_set_malloc(int enabled, const size_t* lo, const size_t* hi);
164 /**
165  * Determines if malloc/free are (and can be) intercepted.
166  * Optionally gets the range of enabled malloc-sizes.
167  */
168 LIBXSMM_API int libxsmm_get_malloc(size_t* lo, size_t* hi);
169 
170 /**
171  * Calculate the linear offset of the n-dimensional (ndims) offset (can be NULL),
172  * and the (optional) linear size of the corresponding shape.
173  */
174 LIBXSMM_API size_t libxsmm_offset(const size_t offset[], const size_t shape[], size_t ndims, size_t* size);
175 
176 
177 #if defined(__cplusplus)
178 
179 /** RAII idiom to temporarily setup an allocator for the lifetime of the scope. */
180 template<typename kind> class LIBXSMM_RETARGETABLE libxsmm_scoped_allocator {
181 public:
182   /** C'tor, which instantiates the new allocator (plain form). */
libxsmm_scoped_allocator(libxsmm_malloc_fun malloc_fn,libxsmm_free_fun free_fn)183   libxsmm_scoped_allocator(libxsmm_malloc_fun malloc_fn, libxsmm_free_fun free_fn) {
184     kind::get(m_context, m_malloc, m_free);
185     kind::set(NULL/*context*/, NULL/*malloc_ctx*/, NULL/*free_ctx*/, malloc_fn, free_fn);
186   }
187 
188   /** C'tor, which instantiates the new allocator (context form). */
189   libxsmm_scoped_allocator(const void* context, libxsmm_malloc_ctx malloc_ctx, libxsmm_free_ctx free_ctx,
190     libxsmm_malloc_fun malloc_fun = NULL, libxsmm_free_fun free_fun = NULL)
191   {
192     kind::get(m_context, m_malloc, m_free);
193     kind::set(context, malloc_ctx, free_ctx, malloc_fun, free_fun);
194   }
195 
196   /** Following the RAII idiom, the d'tor restores the previous allocator. */
~libxsmm_scoped_allocator()197   ~libxsmm_scoped_allocator() {
198     kind::set(m_context,
199       m_malloc.ctx_form, m_free.ctx_form,
200       m_malloc.function, m_free.function);
201   }
202 
203 private: /* no copy/assignment */
204   explicit libxsmm_scoped_allocator(const libxsmm_scoped_allocator&);
205   libxsmm_scoped_allocator& operator=(const libxsmm_scoped_allocator&);
206 
207 protected: /* saved/previous allocator */
208   const void* m_context;
209   libxsmm_malloc_function m_malloc;
210   libxsmm_free_function m_free;
211 };
212 
213 /** Allocator-kind to instantiate libxsmm_scoped_allocator<kind>. */
214 struct LIBXSMM_RETARGETABLE libxsmm_default_allocator {
setlibxsmm_default_allocator215   static void set(const void* context,
216     libxsmm_malloc_ctx malloc_ctx, libxsmm_free_ctx free_ctx,
217     libxsmm_malloc_fun malloc_fun, libxsmm_free_fun free_fun)
218   {
219     libxsmm_malloc_function malloc_fn;
220     libxsmm_free_function free_fn;
221     if (NULL == context) { /* use global form only when no context is given */
222       malloc_fn.function = malloc_fun; free_fn.function = free_fun;
223     }
224     else {
225       malloc_fn.ctx_form = malloc_ctx; free_fn.ctx_form = free_ctx;
226     }
227     libxsmm_set_default_allocator(context, malloc_fn, free_fn);
228   }
getlibxsmm_default_allocator229   static void get(const void*& context,
230     libxsmm_malloc_function& malloc_fn, libxsmm_free_function& free_fn)
231   {
232     libxsmm_get_default_allocator(&context, &malloc_fn, &free_fn);
233   }
234 };
235 
236 /** Allocator-kind to instantiate libxsmm_scoped_allocator<kind>. */
237 struct LIBXSMM_RETARGETABLE libxsmm_scratch_allocator {
setlibxsmm_scratch_allocator238   static void set(const void* context,
239     libxsmm_malloc_ctx malloc_ctx, libxsmm_free_ctx free_ctx,
240     libxsmm_malloc_fun malloc_fun, libxsmm_free_fun free_fun)
241   {
242     libxsmm_malloc_function malloc_fn;
243     libxsmm_free_function free_fn;
244     if (NULL != context) { /* adopt context form */
245       malloc_fn.function = malloc_fun; free_fn.function = free_fun;
246     }
247     else { /* adopt global form */
248       malloc_fn.ctx_form = malloc_ctx; free_fn.ctx_form = free_ctx;
249     }
250     libxsmm_set_scratch_allocator(context, malloc_fn, free_fn);
251   }
getlibxsmm_scratch_allocator252   static void get(const void*& context,
253     libxsmm_malloc_function& malloc_fn, libxsmm_free_function& free_fn)
254   {
255     libxsmm_get_scratch_allocator(&context, &malloc_fn, &free_fn);
256   }
257 };
258 
259 /** Forward-declared types/functions used to implement libxsmm_tf_allocator. */
260 namespace tensorflow {
261   class Allocator;
262 #if defined(LIBXSMM_TF12)
263   class DeviceBase; int DeviceNumaNode(const DeviceBase* /*device*/);
264   Allocator* cpu_allocator(int /*numa_node*/);
265 #else
266   Allocator* cpu_allocator();
267 #endif
268 }
269 
270 /**
271  * An object of this type adopts a memory allocator from TensorFlow.
272  * All memory allocations of the requested kind within the current
273  * scope (where the libxsmm_tf_allocator object lives) are subject
274  * to TensorFlow's memory allocation scheme. The allocation kind
275  * is usually "libxsmm_scratch_allocator"; using a second object
276  * of kind "libxsmm_default_allocator" makes the default memory
277  * allocation of LIBXSMM subject to TensorFlow as well.
278  */
279 template<typename kind> class LIBXSMM_RETARGETABLE libxsmm_tf_allocator:
280   public libxsmm_scoped_allocator<kind>
281 {
282 public:
283   /** The TensorFlow allocator is adopted from the global CPU memory allocator. */
libxsmm_tf_allocator()284   explicit libxsmm_tf_allocator()
285     : libxsmm_scoped_allocator<kind>(
286       libxsmm_tf_allocator::malloc,
287       libxsmm_tf_allocator::free)
288   {}
289 
290   /** The TensorFlow allocator is adopted from the given OpKernelContext. */
291   template<typename context_type>
libxsmm_tf_allocator(context_type & context)292   explicit libxsmm_tf_allocator(context_type& context)
293     : libxsmm_scoped_allocator<kind>(&context,
294       libxsmm_tf_allocator::template malloc_ctx<context_type>,
295       libxsmm_tf_allocator::template free_ctx<context_type>,
296       libxsmm_tf_allocator::malloc,
297       libxsmm_tf_allocator::free)
298   {}
299 
300   /** Global form of allocating memory (malloc signature). */
malloc(size_t size)301   static void* malloc(size_t size) {
302 #if defined(LIBXSMM_TF12)
303     return libxsmm_tf_allocator::allocate(tensorflow::cpu_allocator(-1/*kNUMANoAffinity*/), size);
304 #else
305     return libxsmm_tf_allocator::allocate(tensorflow::cpu_allocator(), size);
306 #endif
307   }
308 
309   /** Global form of deallocating memory (free signature). */
free(void * buffer)310   static void free(void* buffer) {
311 #if defined(LIBXSMM_TF12)
312     libxsmm_tf_allocator::deallocate(tensorflow::cpu_allocator(-1/*kNUMANoAffinity*/), buffer);
313 #else
314     libxsmm_tf_allocator::deallocate(tensorflow::cpu_allocator(), buffer);
315 #endif
316   }
317 
318   /** Context based form of allocating memory. */
malloc_ctx(const void * context,size_t size)319   template<typename context_type> static void* malloc_ctx(const void* context, size_t size) {
320     typedef typename context_type::WrappedAllocator::first_type allocator_ptr;
321     context_type *const tf_context = static_cast<context_type*>(context);
322     allocator_ptr allocator = NULL;
323     if (NULL != tf_context) {
324 #if !defined(LIBXSMM_TF12)
325       if (NULL != tf_context->device()) {
326         if (0 < tf_context->num_outputs()) {
327           allocator = tf_context->device()->GetStepAllocator(
328             tf_context->output_alloc_attr(0),
329             tf_context->resource_manager());
330         }
331         else if (0 < tf_context->num_inputs()) {
332           allocator = tf_context->device()->GetStepAllocator(
333             tf_context->input_alloc_attr(0),
334             tf_context->resource_manager());
335         }
336       }
337 #else /* include tensorflow/core/public/version.h prior to LIBXSMM otherwise the current TensorFlow API is assumed */
338       const int numa_node = DeviceNumaNode(tf_context->device());
339       allocator = tensorflow::cpu_allocator(numa_node);
340 #endif
341     }
342     return libxsmm_tf_allocator::allocate(allocator, size);
343   }
344 
345   /** Context based form of deallocating memory. */
free_ctx(const void * context,void * buffer)346   template<typename context_type> static void free_ctx(const void* context, void* buffer) {
347     typedef typename context_type::WrappedAllocator::first_type allocator_ptr;
348     context_type *const tf_context = static_cast<context_type*>(context);
349     allocator_ptr allocator = NULL;
350     if (NULL != tf_context) {
351 #if defined(LIBXSMM_TF12)
352       const int numa_node = DeviceNumaNode(tf_context->device());
353       allocator = tensorflow::cpu_allocator(numa_node);
354 #else
355       if (NULL != tf_context->device()) {
356         if (0 < tf_context->num_outputs()) {
357           allocator = tf_context->device()->GetStepAllocator(
358             tf_context->output_alloc_attr(0),
359             tf_context->resource_manager());
360         }
361         else if (0 < tf_context->num_inputs()) {
362           allocator = tf_context->device()->GetStepAllocator(
363             tf_context->input_alloc_attr(0),
364             tf_context->resource_manager());
365         }
366       }
367 #endif
368     }
369     libxsmm_tf_allocator::deallocate(allocator, buffer);
370   }
371 
372 private:
373   template<typename allocator_ptr> /* break interface dependency with TF */
allocate(allocator_ptr allocator,size_t size)374   static void* allocate(allocator_ptr allocator, size_t size) {
375     void* result;
376     if (NULL != allocator) {
377     /* no (useless) waste with alignment; raw result is re-aligned anyways */
378       result = allocator->AllocateRaw(1/*alignment*/, size);
379     }
380     else {
381       LIBXSMM_ASSERT_MSG(0/*false*/, "LIBXSMM ERROR: memory allocator is missing");
382       result = NULL;
383     }
384     return result;
385   }
386 
387   template<typename allocator_ptr> /* break interface dependency with TF */
deallocate(allocator_ptr allocator,void * buffer)388   static void deallocate(allocator_ptr allocator, void* buffer) {
389     LIBXSMM_ASSERT_MSG(NULL != allocator, "LIBXSMM ERROR: memory allocator is missing");
390     if (NULL != allocator) allocator->DeallocateRaw(buffer);
391   }
392 };
393 
394 #endif /*defined(__cplusplus)*/
395 
396 #endif /*LIBXSMM_MALLOC_H*/
397 
398