1 /******************************************************************************
2 * Copyright (c) Intel Corporation - All rights reserved.                      *
3 * This file is part of the LIBXSMM library.                                   *
4 *                                                                             *
5 * For information on the license, see the LICENSE file.                       *
6 * Further information: https://github.com/hfp/libxsmm/                        *
7 * SPDX-License-Identifier: BSD-3-Clause                                       *
8 ******************************************************************************/
9 /* Hans Pabst (Intel Corp.)
10 ******************************************************************************/
11 #include "libxsmm_trace.h"
12 #include "libxsmm_main.h"
13 #include "libxsmm_hash.h"
14 
15 #if defined(LIBXSMM_OFFLOAD_TARGET)
16 # pragma offload_attribute(push,target(LIBXSMM_OFFLOAD_TARGET))
17 #endif
18 #if (defined(LIBXSMM_BUILD) && (1 < (LIBXSMM_BUILD)))
19 # include <features.h>
20 # include <malloc.h>
21 #endif
22 #if !defined(LIBXSMM_MALLOC_GLIBC)
23 # if defined(__GLIBC__)
24 #   define LIBXSMM_MALLOC_GLIBC __GLIBC__
25 # else
26 #   define LIBXSMM_MALLOC_GLIBC 6
27 # endif
28 #endif
29 #if defined(_WIN32)
30 # include <windows.h>
31 # include <malloc.h>
32 # include <intrin.h>
33 #else
34 # include <sys/mman.h>
35 # if defined(__linux__)
36 #   include <linux/mman.h>
37 #   include <sys/syscall.h>
38 # endif
39 # if defined(MAP_POPULATE)
40 #   include <sys/utsname.h>
41 # endif
42 # include <sys/types.h>
43 # include <unistd.h>
44 # include <errno.h>
45 # if defined(__MAP_ANONYMOUS)
46 #   define LIBXSMM_MAP_ANONYMOUS __MAP_ANONYMOUS
47 # elif defined(MAP_ANONYMOUS)
48 #   define LIBXSMM_MAP_ANONYMOUS MAP_ANONYMOUS
49 # elif defined(MAP_ANON)
50 #   define LIBXSMM_MAP_ANONYMOUS MAP_ANON
51 # else
52 #  define LIBXSMM_MAP_ANONYMOUS 0x20
53 # endif
54 # if defined(MAP_SHARED) && 0
55 #   define LIBXSMM_MAP_SHARED MAP_SHARED
56 # else
57 #   define LIBXSMM_MAP_SHARED 0
58 # endif
59 LIBXSMM_EXTERN int ftruncate(int, off_t) LIBXSMM_THROW;
60 LIBXSMM_EXTERN int mkstemp(char*) LIBXSMM_NOTHROW;
61 #endif
62 #if !defined(LIBXSMM_MALLOC_FALLBACK)
63 # define LIBXSMM_MALLOC_FINAL 3
64 #endif
65 #if defined(LIBXSMM_VTUNE)
66 # if (2 <= LIBXSMM_VTUNE) /* no header file required */
67 #   if !defined(LIBXSMM_VTUNE_JITVERSION)
68 #     define LIBXSMM_VTUNE_JITVERSION LIBXSMM_VTUNE
69 #   endif
70 #   define LIBXSMM_VTUNE_JIT_DESC_TYPE iJIT_Method_Load_V2
71 #   define LIBXSMM_VTUNE_JIT_LOAD 21
72 #   define LIBXSMM_VTUNE_JIT_UNLOAD 14
73 #   define iJIT_SAMPLING_ON 0x0001
74 LIBXSMM_EXTERN unsigned int iJIT_GetNewMethodID(void);
75 LIBXSMM_EXTERN /*iJIT_IsProfilingActiveFlags*/int iJIT_IsProfilingActive(void);
76 LIBXSMM_EXTERN int iJIT_NotifyEvent(/*iJIT_JVM_EVENT*/int event_type, void *EventSpecificData);
77 LIBXSMM_EXTERN_C typedef struct LineNumberInfo {
78   unsigned int Offset;
79   unsigned int LineNumber;
80 } LineNumberInfo;
81 LIBXSMM_EXTERN_C typedef struct iJIT_Method_Load_V2 {
82   unsigned int method_id;
83   char* method_name;
84   void* method_load_address;
85   unsigned int method_size;
86   unsigned int line_number_size;
87   LineNumberInfo* line_number_table;
88   char* class_file_name;
89   char* source_file_name;
90   char* module_name;
91 } iJIT_Method_Load_V2;
92 # else /* more safe due to header dependency */
93 #   include <jitprofiling.h>
94 #   if !defined(LIBXSMM_VTUNE_JITVERSION)
95 #     define LIBXSMM_VTUNE_JITVERSION 2
96 #   endif
97 #   if (2 <= LIBXSMM_VTUNE_JITVERSION)
98 #     define LIBXSMM_VTUNE_JIT_DESC_TYPE iJIT_Method_Load_V2
99 #     define LIBXSMM_VTUNE_JIT_LOAD iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED_V2
100 #   else
101 #     define LIBXSMM_VTUNE_JIT_DESC_TYPE iJIT_Method_Load
102 #     define LIBXSMM_VTUNE_JIT_LOAD iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED
103 #   endif
104 #   define LIBXSMM_VTUNE_JIT_UNLOAD iJVM_EVENT_TYPE_METHOD_UNLOAD_START
105 # endif
106 # if !defined(LIBXSMM_MALLOC_FALLBACK)
107 #   define LIBXSMM_MALLOC_FALLBACK LIBXSMM_MALLOC_FINAL
108 # endif
109 #else
110 # if !defined(LIBXSMM_MALLOC_FALLBACK)
111 #   define LIBXSMM_MALLOC_FALLBACK 0
112 # endif
113 #endif /*defined(LIBXSMM_VTUNE)*/
114 #if !defined(LIBXSMM_MALLOC_XMAP_TEMPLATE)
115 # define LIBXSMM_MALLOC_XMAP_TEMPLATE ".libxsmm_jit." LIBXSMM_MKTEMP_PATTERN
116 #endif
117 #if defined(LIBXSMM_OFFLOAD_TARGET)
118 # pragma offload_attribute(pop)
119 #endif
120 #if defined(LIBXSMM_PERF)
121 # include "libxsmm_perf.h"
122 #endif
123 
124 #if !defined(LIBXSMM_MALLOC_ALIGNMAX)
125 # define LIBXSMM_MALLOC_ALIGNMAX (2 << 20) /* 2 MB */
126 #endif
127 #if !defined(LIBXSMM_MALLOC_ALIGNFCT)
128 # define LIBXSMM_MALLOC_ALIGNFCT 16
129 #endif
130 #if !defined(LIBXSMM_MALLOC_SEED)
131 # define LIBXSMM_MALLOC_SEED 1051981
132 #endif
133 
134 #if !defined(LIBXSMM_MALLOC_HOOK_KMP) && 0
135 # define LIBXSMM_MALLOC_HOOK_KMP
136 #endif
137 #if !defined(LIBXSMM_MALLOC_HOOK_QKMALLOC) && 0
138 # define LIBXSMM_MALLOC_HOOK_QKMALLOC
139 #endif
140 #if !defined(LIBXSMM_MALLOC_HOOK_IMALLOC) && 1
141 # define LIBXSMM_MALLOC_HOOK_IMALLOC
142 #endif
143 #if !defined(LIBXSMM_MALLOC_HOOK_CHECK) && 0
144 # define LIBXSMM_MALLOC_HOOK_CHECK 1
145 #endif
146 
147 #if !defined(LIBXSMM_MALLOC_CRC_LIGHT) && !defined(_DEBUG) && 1
148 # define LIBXSMM_MALLOC_CRC_LIGHT
149 #endif
150 #if !defined(LIBXSMM_MALLOC_CRC_OFF)
151 # if defined(NDEBUG) && !defined(LIBXSMM_MALLOC_HOOK_STATIC) && !defined(LIBXSMM_MALLOC_HOOK_DYNAMIC)
152 #   define LIBXSMM_MALLOC_CRC_OFF
153 # elif !defined(LIBXSMM_BUILD)
154 #   define LIBXSMM_MALLOC_CRC_OFF
155 # endif
156 #endif
157 
158 #if !defined(LIBXSMM_MALLOC_SCRATCH_LIMIT)
159 # define LIBXSMM_MALLOC_SCRATCH_LIMIT 0xFFFFFFFF /* ~4 GB */
160 #endif
161 #if !defined(LIBXSMM_MALLOC_SCRATCH_PADDING)
162 # define LIBXSMM_MALLOC_SCRATCH_PADDING LIBXSMM_CACHELINE
163 #endif
164 /* pointers are checked first if they belong to scratch */
165 #if !defined(LIBXSMM_MALLOC_SCRATCH_DELETE_FIRST) && 1
166 # define LIBXSMM_MALLOC_SCRATCH_DELETE_FIRST
167 #endif
168 /* can clobber memory if allocations are not exactly scoped */
169 #if !defined(LIBXSMM_MALLOC_SCRATCH_TRIM_HEAD) && 0
170 # define LIBXSMM_MALLOC_SCRATCH_TRIM_HEAD
171 #endif
172 #if !defined(LIBXSMM_MALLOC_SCRATCH_JOIN) && 1
173 # define LIBXSMM_MALLOC_SCRATCH_JOIN
174 #endif
175 #if !defined(LIBXSMM_MALLOC_LOCK_ONFAULT) && 0
176 # if defined(MLOCK_ONFAULT) && defined(SYS_mlock2)
177 #   define LIBXSMM_MALLOC_LOCK_ONFAULT
178 # endif
179 #endif
180 /* protected against double-delete (if possible) */
181 #if !defined(LIBXSMM_MALLOC_DELETE_SAFE) && 0
182 # define LIBXSMM_MALLOC_DELETE_SAFE
183 #endif
184 /* map memory for scratch buffers */
185 #if !defined(LIBXSMM_MALLOC_MMAP_SCRATCH) && 1
186 # define LIBXSMM_MALLOC_MMAP_SCRATCH
187 #endif
188 /* map memory for hooked allocation */
189 #if !defined(LIBXSMM_MALLOC_MMAP_HOOK) && 1
190 # define LIBXSMM_MALLOC_MMAP_HOOK
191 #endif
192 /* map memory also for non-executable buffers */
193 #if !defined(LIBXSMM_MALLOC_MMAP) && 1
194 # define LIBXSMM_MALLOC_MMAP
195 #endif
196 
197 #if defined(LIBXSMM_MALLOC_ALIGN_ALL)
198 # define INTERNAL_MALLOC_AUTOALIGN(SIZE, ALIGNMENT) libxsmm_alignment(SIZE, ALIGNMENT)
199 #else
200 # define INTERNAL_MALLOC_AUTOALIGN(SIZE, ALIGNMENT) (ALIGNMENT)
201 #endif
202 
203 #define INTERNAL_MEMALIGN_HOOK(RESULT, FLAGS, ALIGNMENT, SIZE, CALLER) { \
204   const int internal_memalign_hook_recursive_ = LIBXSMM_ATOMIC_ADD_FETCH( \
205     &internal_malloc_recursive, 1, LIBXSMM_ATOMIC_RELAXED); \
206   if ( 1 < internal_memalign_hook_recursive_ /* protect against recursion */ \
207     || 0 == (internal_malloc_kind & 1) || 0 >= internal_malloc_kind \
208     || (internal_malloc_limit[0] > (SIZE)) \
209     || (internal_malloc_limit[1] < (SIZE) && 0 != internal_malloc_limit[1])) \
210   { \
211     const size_t internal_memalign_hook_alignment_ = INTERNAL_MALLOC_AUTOALIGN(SIZE, ALIGNMENT); \
212     (RESULT) = (0 != internal_memalign_hook_alignment_ \
213       ? __real_memalign(internal_memalign_hook_alignment_, SIZE) \
214       : __real_malloc(SIZE)); \
215   } \
216   else { /* redirect */ \
217     LIBXSMM_INIT \
218     if (NULL == (CALLER)) { /* libxsmm_trace_caller_id may allocate memory */ \
219       internal_scratch_malloc(&(RESULT), SIZE, ALIGNMENT, FLAGS, \
220         libxsmm_trace_caller_id(0/*level*/)); \
221     } \
222     else { \
223       internal_scratch_malloc(&(RESULT), SIZE, ALIGNMENT, FLAGS, CALLER); \
224     } \
225   } \
226   LIBXSMM_ATOMIC_SUB_FETCH(&internal_malloc_recursive, 1, LIBXSMM_ATOMIC_RELAXED); \
227 }
228 
229 #define INTERNAL_REALLOC_HOOK(RESULT, FLAGS, PTR, SIZE, CALLER) { \
230   if (0 == (internal_malloc_kind & 1) || 0 >= internal_malloc_kind \
231     /*|| (0 != LIBXSMM_ATOMIC_LOAD(&internal_malloc_recursive, LIBXSMM_ATOMIC_RELAXED))*/ \
232     || (internal_malloc_limit[0] > (SIZE)) \
233     || (internal_malloc_limit[1] < (SIZE) && 0 != internal_malloc_limit[1])) \
234   { \
235     (RESULT) = __real_realloc(PTR, SIZE); \
236   } \
237   else { \
238     const int nzeros = LIBXSMM_INTRINSICS_BITSCANFWD64((uintptr_t)(PTR)), alignment = 1 << nzeros; \
239     LIBXSMM_ASSERT(0 == ((uintptr_t)(PTR) & ~(0xFFFFFFFFFFFFFFFF << nzeros))); \
240     if (NULL == (CALLER)) { /* libxsmm_trace_caller_id may allocate memory */ \
241       internal_scratch_malloc(&(PTR), SIZE, (size_t)alignment, FLAGS, \
242         libxsmm_trace_caller_id(0/*level*/)); \
243     } \
244     else { \
245       internal_scratch_malloc(&(PTR), SIZE, (size_t)alignment, FLAGS, CALLER); \
246     } \
247     (RESULT) = (PTR); \
248   } \
249 }
250 
251 #define INTERNAL_FREE_HOOK(PTR, CALLER) { \
252   LIBXSMM_UNUSED(CALLER); \
253   if (0 == (internal_malloc_kind & 1) || 0 >= internal_malloc_kind \
254     /*|| (0 != LIBXSMM_ATOMIC_LOAD(&internal_malloc_recursive, LIBXSMM_ATOMIC_RELAXED))*/ \
255   ){ \
256     __real_free(PTR); \
257   } \
258   else { /* recognize pointers not issued by LIBXSMM */ \
259     libxsmm_free(PTR); \
260   } \
261 }
262 
263 #if !defined(WIN32)
264 # if defined(MAP_32BIT)
265 #   define IF_INTERNAL_XMALLOC_MAP32(ENV, MAPSTATE, MFLAGS, SIZE, BUFFER, REPTR) \
266     if (0 != (MAP_32BIT & (MFLAGS))) { \
267       (BUFFER) = internal_xmalloc_xmap(ENV, SIZE, (MFLAGS) & ~MAP_32BIT, REPTR); \
268     } \
269     if (MAP_FAILED != (BUFFER)) (MAPSTATE) = 0; else
270 # else
271 #   define IF_INTERNAL_XMALLOC_MAP32(ENV, MAPSTATE, MFLAGS, SIZE, BUFFER, REPTR)
272 # endif
273 
274 # define INTERNAL_XMALLOC(I, FALLBACK, ENVVAR, ENVDEF, MAPSTATE, MFLAGS, SIZE, BUFFER, REPTR) \
275   if ((I) == (FALLBACK)) { \
276     static const char* internal_xmalloc_env_ = NULL; \
277     if (NULL == internal_xmalloc_env_) { \
278       internal_xmalloc_env_ = getenv(ENVVAR); \
279       if (NULL == internal_xmalloc_env_) internal_xmalloc_env_ = ENVDEF; \
280     } \
281     (BUFFER) = internal_xmalloc_xmap(internal_xmalloc_env_, SIZE, MFLAGS, REPTR); \
282     if (MAP_FAILED == (BUFFER)) { \
283       IF_INTERNAL_XMALLOC_MAP32(internal_xmalloc_env_, MAPSTATE, MFLAGS, SIZE, BUFFER, REPTR) \
284         (FALLBACK) = (I) + 1; \
285     } \
286   }
287 
288 # define INTERNAL_XMALLOC_WATERMARK(NAME, WATERMARK, LIMIT, SIZE) { \
289   const size_t internal_xmalloc_watermark_ = (WATERMARK) + (SIZE) / 2; /* accept data-race */ \
290   if (internal_xmalloc_watermark_ < (LIMIT)) { \
291     static size_t internal_xmalloc_watermark_verbose_ = 0; \
292     (LIMIT) = internal_xmalloc_watermark_; /* accept data-race */ \
293     if (internal_xmalloc_watermark_verbose_ < internal_xmalloc_watermark_ && \
294       (LIBXSMM_VERBOSITY_HIGH <= libxsmm_verbosity || 0 > libxsmm_verbosity)) \
295     { /* muted */ \
296       char internal_xmalloc_watermark_buffer_[32]; \
297       /* coverity[check_return] */ \
298       libxsmm_format_size(internal_xmalloc_watermark_buffer_, sizeof(internal_xmalloc_watermark_buffer_), \
299         internal_xmalloc_watermark_, "KM", "B", 10); \
300       fprintf(stderr, "LIBXSMM WARNING: " NAME " watermark reached at %s!\n", internal_xmalloc_watermark_buffer_); \
301       internal_xmalloc_watermark_verbose_ = internal_xmalloc_watermark_; \
302     } \
303   } \
304 }
305 
306 # define INTERNAL_XMALLOC_KIND(KIND, NAME, FLAG, FLAGS, MFLAGS, WATERMARK, LIMIT, INFO, SIZE, BUFFER) \
307   if (0 != ((KIND) & (MFLAGS))) { \
308     if (MAP_FAILED != (BUFFER)) { \
309       LIBXSMM_ASSERT(NULL != (BUFFER)); \
310       LIBXSMM_ATOMIC_ADD_FETCH(&(WATERMARK), SIZE, LIBXSMM_ATOMIC_RELAXED); \
311       (FLAGS) |= (FLAG); \
312     } \
313     else { /* retry */ \
314       (BUFFER) = mmap(NULL == (INFO) ? NULL : (INFO)->pointer, SIZE, PROT_READ | PROT_WRITE, \
315         MAP_PRIVATE | LIBXSMM_MAP_ANONYMOUS | ((MFLAGS) & ~(KIND)), -1, 0/*offset*/); \
316       if (MAP_FAILED != (BUFFER)) { /* successful retry */ \
317         LIBXSMM_ASSERT(NULL != (BUFFER)); \
318         INTERNAL_XMALLOC_WATERMARK(NAME, WATERMARK, LIMIT, SIZE); \
319       } \
320     } \
321   }
322 #endif
323 
324 
325 LIBXSMM_EXTERN_C typedef struct LIBXSMM_RETARGETABLE internal_malloc_info_type {
326   libxsmm_free_function free;
327   void *pointer, *reloc;
328   const void* context;
329   size_t size;
330   int flags;
331 #if defined(LIBXSMM_VTUNE)
332   unsigned int code_id;
333 #endif
334 #if !defined(LIBXSMM_MALLOC_CRC_OFF) /* hash *must* be the last entry */
335   unsigned int hash;
336 #endif
337 } internal_malloc_info_type;
338 
339 LIBXSMM_EXTERN_C typedef union LIBXSMM_RETARGETABLE internal_malloc_pool_type {
340   char pad[LIBXSMM_MALLOC_SCRATCH_PADDING];
341   struct {
342     size_t minsize, counter, incsize;
343     char *buffer, *head;
344 #if defined(LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS) && (1 < (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS))
345     const void* site;
346 # if (0 != LIBXSMM_SYNC)
347     unsigned int tid;
348 # endif
349 #endif
350   } instance;
351 } internal_malloc_pool_type;
352 
353 /* Scratch pool, which supports up to MAX_NSCRATCH allocation sites. */
354 #if defined(LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS) && (0 < (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS))
355 /* LIBXSMM_ALIGNED appears to contradict LIBXSMM_APIVAR, and causes multiple defined symbols (if below is seen in multiple translation units) */
356 LIBXSMM_APIVAR_DEFINE(char internal_malloc_pool_buffer[(LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS)*sizeof(internal_malloc_pool_type)+(LIBXSMM_MALLOC_SCRATCH_PADDING)-1]);
357 #endif
358 /* Interval of bytes that permit interception (internal_malloc_kind) */
359 LIBXSMM_APIVAR_DEFINE(size_t internal_malloc_limit[2]);
360 /* Maximum total size of the scratch memory domain. */
361 LIBXSMM_APIVAR_DEFINE(size_t internal_malloc_scratch_limit);
362 LIBXSMM_APIVAR_DEFINE(size_t internal_malloc_scratch_nmallocs);
363 LIBXSMM_APIVAR_DEFINE(size_t internal_malloc_private_max);
364 LIBXSMM_APIVAR_DEFINE(size_t internal_malloc_private_cur);
365 LIBXSMM_APIVAR_DEFINE(size_t internal_malloc_public_max);
366 LIBXSMM_APIVAR_DEFINE(size_t internal_malloc_public_cur);
367 LIBXSMM_APIVAR_DEFINE(size_t internal_malloc_local_max);
368 LIBXSMM_APIVAR_DEFINE(size_t internal_malloc_local_cur);
369 LIBXSMM_APIVAR_DEFINE(int internal_malloc_recursive);
370 /** 0: regular, 1/odd: intercept/scratch, otherwise: all/scratch */
371 LIBXSMM_APIVAR_DEFINE(int internal_malloc_kind);
372 #if (0 != LIBXSMM_SYNC) && defined(LIBXSMM_MALLOC_SCRATCH_JOIN)
373 LIBXSMM_APIVAR_DEFINE(int internal_malloc_join);
374 #endif
375 #if !defined(_WIN32)
376 # if defined(MAP_HUGETLB)
377 LIBXSMM_APIVAR_DEFINE(size_t internal_malloc_hugetlb);
378 # endif
379 # if defined(MAP_LOCKED)
380 LIBXSMM_APIVAR_DEFINE(size_t internal_malloc_plocked);
381 # endif
382 #endif
383 
384 
libxsmm_alignment(size_t size,size_t alignment)385 LIBXSMM_API_INTERN size_t libxsmm_alignment(size_t size, size_t alignment)
386 {
387   size_t result;
388   if ((LIBXSMM_MALLOC_ALIGNFCT * LIBXSMM_MALLOC_ALIGNMAX) <= size) {
389     result = libxsmm_lcm(0 == alignment ? (LIBXSMM_ALIGNMENT) : libxsmm_lcm(alignment, LIBXSMM_ALIGNMENT), LIBXSMM_MALLOC_ALIGNMAX);
390   }
391   else { /* small-size request */
392     if ((LIBXSMM_MALLOC_ALIGNFCT * LIBXSMM_ALIGNMENT) <= size) {
393       result = (0 == alignment ? (LIBXSMM_ALIGNMENT) : libxsmm_lcm(alignment, LIBXSMM_ALIGNMENT));
394     }
395     else if (0 != alignment) { /* custom alignment */
396       result = libxsmm_lcm(alignment, sizeof(void*));
397     }
398     else { /* tiny-size request */
399       result = sizeof(void*);
400     }
401   }
402   return result;
403 }
404 
405 
libxsmm_offset(const size_t offset[],const size_t shape[],size_t ndims,size_t * size)406 LIBXSMM_API size_t libxsmm_offset(const size_t offset[], const size_t shape[], size_t ndims, size_t* size)
407 {
408   size_t result = 0, size1 = 0;
409   if (0 != ndims && NULL != shape) {
410     size_t i;
411     result = (NULL != offset ? offset[0] : 0);
412     size1 = shape[0];
413     for (i = 1; i < ndims; ++i) {
414       result += (NULL != offset ? offset[i] : 0) * size1;
415       size1 *= shape[i];
416     }
417   }
418   if (NULL != size) *size = size1;
419   return result;
420 }
421 
422 
internal_malloc_info(const void * memory,int check)423 LIBXSMM_API_INLINE internal_malloc_info_type* internal_malloc_info(const void* memory, int check)
424 {
425   const char *const buffer = (const char*)memory;
426   internal_malloc_info_type* result = (internal_malloc_info_type*)(NULL != memory
427     ? (buffer - sizeof(internal_malloc_info_type)) : NULL);
428 #if defined(LIBXSMM_MALLOC_HOOK_CHECK)
429   if ((LIBXSMM_MALLOC_HOOK_CHECK) < check) check = (LIBXSMM_MALLOC_HOOK_CHECK);
430 #endif
431   if (0 != check && NULL != result) { /* check ownership */
432 #if !defined(_WIN32) /* mprotect: pass address rounded down to page/4k alignment */
433     if (1 == check || 0 == mprotect((void*)(((uintptr_t)result) & 0xFFFFFFFFFFFFF000),
434       sizeof(internal_malloc_info_type), PROT_READ | PROT_WRITE) || ENOMEM != errno)
435 #endif
436     {
437       const size_t maxsize = LIBXSMM_MAX(LIBXSMM_MAX(internal_malloc_public_max, internal_malloc_local_max), internal_malloc_private_max);
438       const int flags_rs = LIBXSMM_MALLOC_FLAG_REALLOC | LIBXSMM_MALLOC_FLAG_SCRATCH;
439       const int flags_mx = LIBXSMM_MALLOC_FLAG_MMAP | LIBXSMM_MALLOC_FLAG_X;
440       const char* const pointer = (const char*)result->pointer;
441       union { libxsmm_free_fun fun; const void* ptr; } convert;
442       convert.fun = result->free.function;
443       if (((flags_mx != (flags_mx & result->flags)) && NULL != result->reloc)
444         || (0 == (LIBXSMM_MALLOC_FLAG_X & result->flags) ? 0 : (0 != (flags_rs & result->flags)))
445         || (0 != (LIBXSMM_MALLOC_FLAG_X & result->flags) && NULL != result->context)
446 #if defined(LIBXSMM_VTUNE)
447         || (0 == (LIBXSMM_MALLOC_FLAG_X & result->flags) && 0 != result->code_id)
448 #endif
449         || (0 != (~LIBXSMM_MALLOC_FLAG_VALID & result->flags))
450         || (0 == (LIBXSMM_MALLOC_FLAG_R & result->flags))
451         || pointer == convert.ptr || pointer == result->context
452         || pointer >= buffer || NULL == pointer
453         || maxsize < result->size || 0 == result->size
454         || 2 > libxsmm_ninit /* before checksum calculation */
455 #if !defined(LIBXSMM_MALLOC_CRC_OFF) /* last check: checksum over info */
456 # if defined(LIBXSMM_MALLOC_CRC_LIGHT)
457         || result->hash != LIBXSMM_CRC32U(LIBXSMM_BITS)(LIBXSMM_MALLOC_SEED, &result)
458 # else
459         || result->hash != libxsmm_crc32(LIBXSMM_MALLOC_SEED, result,
460             (const char*)&result->hash - (const char*)result)
461 # endif
462 #endif
463       ) { /* mismatch */
464         result = NULL;
465       }
466     }
467 #if !defined(_WIN32)
468     else { /* mismatch */
469       result = NULL;
470     }
471 #endif
472   }
473   return result;
474 }
475 
476 
477 LIBXSMM_API_INTERN int internal_xfree(const void* /*memory*/, internal_malloc_info_type* /*info*/);
internal_xfree(const void * memory,internal_malloc_info_type * info)478 LIBXSMM_API_INTERN int internal_xfree(const void* memory, internal_malloc_info_type* info)
479 {
480 #if !defined(LIBXSMM_BUILD) || !defined(_WIN32)
481   static int error_once = 0;
482 #endif
483   int result = EXIT_SUCCESS, flags;
484   void* buffer;
485   size_t size;
486   LIBXSMM_ASSERT(NULL != memory && NULL != info);
487   buffer = info->pointer;
488   flags = info->flags;
489   size = info->size;
490 #if !defined(LIBXSMM_BUILD) /* sanity check */
491   if (NULL != buffer || 0 == size)
492 #endif
493   {
494     const size_t alloc_size = size + (((const char*)memory) - ((const char*)buffer));
495     LIBXSMM_ASSERT(NULL != buffer || 0 == size);
496     if (0 == (LIBXSMM_MALLOC_FLAG_MMAP & flags)) {
497       if (NULL != info->free.function) {
498 #if defined(LIBXSMM_MALLOC_DELETE_SAFE)
499         info->pointer = NULL; info->size = 0;
500 #endif
501         if (NULL == info->context) {
502 #if (defined(LIBXSMM_MALLOC_HOOK_STATIC) || defined(LIBXSMM_MALLOC_HOOK_DYNAMIC)) && 0
503           if (free == info->free.function) {
504             __real_free(buffer);
505           }
506           else
507 #endif
508           if (NULL != info->free.function) {
509             info->free.function(buffer);
510           }
511         }
512         else {
513           LIBXSMM_ASSERT(NULL != info->free.ctx_form);
514           info->free.ctx_form(buffer, info->context);
515         }
516       }
517     }
518     else {
519 #if defined(LIBXSMM_VTUNE)
520       if (0 != (LIBXSMM_MALLOC_FLAG_X & flags) && 0 != info->code_id && iJIT_SAMPLING_ON == iJIT_IsProfilingActive()) {
521         iJIT_NotifyEvent(LIBXSMM_VTUNE_JIT_UNLOAD, &info->code_id);
522       }
523 #endif
524 #if defined(_WIN32)
525       result = (NULL == buffer || FALSE != VirtualFree(buffer, 0, MEM_RELEASE)) ? EXIT_SUCCESS : EXIT_FAILURE;
526 #else /* !_WIN32 */
527       {
528         const size_t unmap_size = LIBXSMM_UP2(alloc_size, LIBXSMM_PAGE_MINSIZE);
529         void* const reloc = info->reloc;
530         if (0 != munmap(buffer, unmap_size)) {
531           if (0 != libxsmm_verbosity /* library code is expected to be mute */
532             && 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
533           {
534             fprintf(stderr, "LIBXSMM ERROR: %s (attempted to unmap buffer %p+%" PRIuPTR ")!\n",
535               strerror(errno), buffer, (uintptr_t)unmap_size);
536           }
537           result = EXIT_FAILURE;
538         }
539         if (0 != (LIBXSMM_MALLOC_FLAG_X & flags) && EXIT_SUCCESS == result
540           && NULL != reloc && MAP_FAILED != reloc && buffer != reloc
541           && 0 != munmap(reloc, unmap_size))
542         {
543           if (0 != libxsmm_verbosity /* library code is expected to be mute */
544             && 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
545           {
546             fprintf(stderr, "LIBXSMM ERROR: %s (attempted to unmap code %p+%" PRIuPTR ")!\n",
547               strerror(errno), reloc, (uintptr_t)unmap_size);
548           }
549           result = EXIT_FAILURE;
550         }
551       }
552 #endif
553     }
554     if (0 == (LIBXSMM_MALLOC_FLAG_X & flags)) { /* update statistics */
555 #if !defined(_WIN32)
556 # if defined(MAP_HUGETLB)
557       if (0 != (LIBXSMM_MALLOC_FLAG_PHUGE & flags)) { /* huge pages */
558         LIBXSMM_ASSERT(0 != (LIBXSMM_MALLOC_FLAG_MMAP & flags));
559         LIBXSMM_ATOMIC_SUB_FETCH(&internal_malloc_hugetlb, alloc_size, LIBXSMM_ATOMIC_RELAXED);
560       }
561 # endif
562 # if defined(MAP_LOCKED)
563       if (0 != (LIBXSMM_MALLOC_FLAG_PLOCK & flags)) { /* page-locked */
564         LIBXSMM_ASSERT(0 != (LIBXSMM_MALLOC_FLAG_MMAP & flags));
565         LIBXSMM_ATOMIC_SUB_FETCH(&internal_malloc_plocked, alloc_size, LIBXSMM_ATOMIC_RELAXED);
566       }
567 # endif
568 #endif
569       if (0 == (LIBXSMM_MALLOC_FLAG_PRIVATE & flags)) { /* public */
570         if (0 != (LIBXSMM_MALLOC_FLAG_SCRATCH & flags)) { /* scratch */
571 #if 1
572           const size_t current = (size_t)LIBXSMM_ATOMIC(LIBXSMM_ATOMIC_LOAD, LIBXSMM_BITS)(
573             &internal_malloc_public_cur, LIBXSMM_ATOMIC_RELAXED);
574           LIBXSMM_ATOMIC(LIBXSMM_ATOMIC_STORE, LIBXSMM_BITS)(&internal_malloc_public_cur,
575             alloc_size <= current ? (current - alloc_size) : 0, LIBXSMM_ATOMIC_RELAXED);
576 #else
577           LIBXSMM_ATOMIC(LIBXSMM_ATOMIC_SUB_FETCH, LIBXSMM_BITS)(
578             &internal_malloc_public_cur, alloc_size, LIBXSMM_ATOMIC_RELAXED);
579 #endif
580         }
581         else { /* local */
582 #if 1
583           const size_t current = (size_t)LIBXSMM_ATOMIC(LIBXSMM_ATOMIC_LOAD, LIBXSMM_BITS)(
584             &internal_malloc_local_cur, LIBXSMM_ATOMIC_RELAXED);
585           LIBXSMM_ATOMIC(LIBXSMM_ATOMIC_STORE, LIBXSMM_BITS)(&internal_malloc_local_cur,
586             alloc_size <= current ? (current - alloc_size) : 0, LIBXSMM_ATOMIC_RELAXED);
587 #else
588           LIBXSMM_ATOMIC(LIBXSMM_ATOMIC_SUB_FETCH, LIBXSMM_BITS)(
589             &internal_malloc_local_cur, alloc_size, LIBXSMM_ATOMIC_RELAXED);
590 #endif
591         }
592       }
593       else { /* private */
594 #if 1
595         const size_t current = (size_t)LIBXSMM_ATOMIC(LIBXSMM_ATOMIC_LOAD, LIBXSMM_BITS)(
596           &internal_malloc_private_cur, LIBXSMM_ATOMIC_RELAXED);
597         LIBXSMM_ATOMIC(LIBXSMM_ATOMIC_STORE, LIBXSMM_BITS)(&internal_malloc_private_cur,
598           alloc_size <= current ? (current - alloc_size) : 0, LIBXSMM_ATOMIC_RELAXED);
599 #else
600         LIBXSMM_ATOMIC(LIBXSMM_ATOMIC_SUB_FETCH, LIBXSMM_BITS)(
601           &internal_malloc_private_cur, alloc_size, LIBXSMM_ATOMIC_RELAXED);
602 #endif
603       }
604     }
605   }
606 #if !defined(LIBXSMM_BUILD)
607   else if ((LIBXSMM_VERBOSITY_WARN <= libxsmm_verbosity || 0 > libxsmm_verbosity) /* library code is expected to be mute */
608     && 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
609   {
610     fprintf(stderr, "LIBXSMM WARNING: attempt to release memory from non-matching implementation!\n");
611   }
612 #endif
613   return result;
614 }
615 
616 
internal_get_scratch_size(const internal_malloc_pool_type * exclude)617 LIBXSMM_API_INLINE size_t internal_get_scratch_size(const internal_malloc_pool_type* exclude)
618 {
619   size_t result = 0;
620 #if !defined(LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS) || (1 >= (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS))
621   LIBXSMM_UNUSED(exclude);
622 #else
623   const internal_malloc_pool_type* pool = (const internal_malloc_pool_type*)LIBXSMM_UP2(
624     (uintptr_t)internal_malloc_pool_buffer, LIBXSMM_MALLOC_SCRATCH_PADDING);
625 # if (1 < (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS))
626   const internal_malloc_pool_type *const end = pool + libxsmm_scratch_pools;
627   LIBXSMM_ASSERT(libxsmm_scratch_pools <= LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS);
628   for (; pool != end; ++pool)
629 # endif /*(1 < (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS))*/
630   {
631     if (0 != pool->instance.minsize) {
632 # if 1 /* memory info is not used */
633       if (pool != exclude && (LIBXSMM_MALLOC_INTERNAL_CALLER) != pool->instance.site) {
634         result += pool->instance.minsize;
635       }
636 # else
637       const internal_malloc_info_type* const info = internal_malloc_info(pool->instance.buffer, 0/*no check*/);
638       if (NULL != info && pool != exclude && (LIBXSMM_MALLOC_INTERNAL_CALLER) != pool->instance.site) {
639         result += info->size;
640       }
641 # endif
642     }
643     else break; /* early exit */
644   }
645 #endif /*defined(LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS) && (0 < (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS))*/
646   return result;
647 }
648 
649 
internal_scratch_malloc_pool(const void * memory)650 LIBXSMM_API_INLINE internal_malloc_pool_type* internal_scratch_malloc_pool(const void* memory)
651 {
652   internal_malloc_pool_type* result = NULL;
653   internal_malloc_pool_type* pool = (internal_malloc_pool_type*)LIBXSMM_UP2(
654     (uintptr_t)internal_malloc_pool_buffer, LIBXSMM_MALLOC_SCRATCH_PADDING);
655   const char* const buffer = (const char*)memory;
656 #if defined(LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS) && (1 < (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS))
657   const unsigned int npools = libxsmm_scratch_pools;
658 #else
659   const unsigned int npools = 1;
660 #endif
661   internal_malloc_pool_type *const end = pool + npools;
662   LIBXSMM_ASSERT(npools <= LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS);
663   LIBXSMM_ASSERT(NULL != memory);
664   for (; pool != end; ++pool) {
665     if (0 != pool->instance.minsize) {
666       if (0 != pool->instance.counter
667 #if 1 /* should be implied by non-zero counter */
668         && NULL != pool->instance.buffer
669 #endif
670       ){/* check if memory belongs to scratch domain or local domain */
671 #if 1
672         const size_t size = pool->instance.minsize;
673 #else
674         const internal_malloc_info_type* const info = internal_malloc_info(pool->instance.buffer, 0/*no check*/);
675         const size_t size = info->size;
676 #endif
677         if (pool->instance.buffer == buffer /* fast path */ ||
678            (pool->instance.buffer < buffer && buffer < (pool->instance.buffer + size)))
679         {
680           result = pool;
681           break;
682         }
683       }
684     }
685     else break; /* early exit */
686   }
687   return result;
688 }
689 
690 
691 LIBXSMM_API_INTERN void internal_scratch_free(const void* /*memory*/, internal_malloc_pool_type* /*pool*/);
internal_scratch_free(const void * memory,internal_malloc_pool_type * pool)692 LIBXSMM_API_INTERN void internal_scratch_free(const void* memory, internal_malloc_pool_type* pool)
693 {
694 #if defined(LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS) && (0 < (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS))
695   const size_t counter = LIBXSMM_ATOMIC_SUB_FETCH(&pool->instance.counter, 1, LIBXSMM_ATOMIC_SEQ_CST);
696   char* const pool_buffer = pool->instance.buffer;
697 # if !defined(NDEBUG) || defined(LIBXSMM_MALLOC_SCRATCH_TRIM_HEAD)
698   char *const buffer = (char*)memory; /* non-const */
699   LIBXSMM_ASSERT(pool_buffer <= buffer && buffer < pool_buffer + pool->instance.minsize);
700 # endif
701   LIBXSMM_ASSERT(pool_buffer <= pool->instance.head);
702   if (0 == counter) { /* reuse or reallocate scratch domain */
703     internal_malloc_info_type *const info = internal_malloc_info(pool_buffer, 0/*no check*/);
704     const size_t scale_size = (size_t)(1 != libxsmm_scratch_scale ? (libxsmm_scratch_scale * info->size) : info->size); /* hysteresis */
705     const size_t size = pool->instance.minsize + pool->instance.incsize;
706     LIBXSMM_ASSERT(0 == (LIBXSMM_MALLOC_FLAG_X & info->flags)); /* scratch memory is not executable */
707     if (size <= scale_size) { /* reuse scratch domain */
708       pool->instance.head = pool_buffer; /* reuse scratch domain */
709     }
710     else { /* release buffer */
711 # if !defined(NDEBUG)
712       static int error_once = 0;
713 # endif
714       pool->instance.buffer = pool->instance.head = NULL;
715 # if defined(LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS) && (1 < (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS))
716       pool->instance.site = NULL; /* clear affinity */
717 # endif
718 # if !defined(NDEBUG)
719       if (EXIT_SUCCESS != internal_xfree(pool_buffer, info)
720         && 0 != libxsmm_verbosity /* library code is expected to be mute */
721         && 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
722       {
723         fprintf(stderr, "LIBXSMM ERROR: memory deallocation failed!\n");
724       }
725 # else
726       internal_xfree(pool_buffer, info); /* !libxsmm_free */
727 # endif
728     }
729   }
730 # if defined(LIBXSMM_MALLOC_SCRATCH_TRIM_HEAD) /* TODO: document linear/scoped allocator policy */
731   else if (buffer < pool->instance.head) { /* reuse scratch domain */
732     pool->instance.head = buffer;
733   }
734 # else
735   LIBXSMM_UNUSED(memory);
736 # endif
737 #else
738   LIBXSMM_UNUSED(memory); LIBXSMM_UNUSED(pool);
739 #endif
740 }
741 
742 
743 LIBXSMM_API_INTERN void internal_scratch_malloc(void** /*memory*/, size_t /*size*/, size_t /*alignment*/, int /*flags*/, const void* /*caller*/);
internal_scratch_malloc(void ** memory,size_t size,size_t alignment,int flags,const void * caller)744 LIBXSMM_API_INTERN void internal_scratch_malloc(void** memory, size_t size, size_t alignment, int flags, const void* caller)
745 {
746   LIBXSMM_ASSERT(NULL != memory && 0 == (LIBXSMM_MALLOC_FLAG_X & flags));
747   if (0 == (LIBXSMM_MALLOC_FLAG_REALLOC & flags) || NULL == *memory) {
748     static int error_once = 0;
749     size_t local_size = 0;
750 #if defined(LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS) && (0 < (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS))
751     if (0 < libxsmm_scratch_pools) {
752       internal_malloc_pool_type *const pools = (internal_malloc_pool_type*)LIBXSMM_UP2(
753         (uintptr_t)internal_malloc_pool_buffer, LIBXSMM_MALLOC_SCRATCH_PADDING);
754       internal_malloc_pool_type *const end = pools + libxsmm_scratch_pools, *pool = pools;
755       const size_t align_size = libxsmm_alignment(size, alignment), alloc_size = size + align_size - 1;
756 # if (0 != LIBXSMM_SYNC)
757       const unsigned int tid = libxsmm_get_tid();
758 # endif
759       unsigned int npools = 1;
760 # if defined(LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS) && (1 < (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS))
761       const void *const site = caller; /* no further attempt in case of NULL */
762       internal_malloc_pool_type *pool0 = end;
763       for (; pool != end; ++pool) { /* counter: memory info is not employed as pools are still manipulated */
764         if (NULL != pool->instance.buffer) {
765           if ((LIBXSMM_MALLOC_INTERNAL_CALLER) != pool->instance.site) ++npools; /* count number of occupied pools */
766           if ( /* find matching pool and enter fast path (draw from pool-buffer) */
767 #   if (0 != LIBXSMM_SYNC) && !defined(LIBXSMM_MALLOC_SCRATCH_JOIN)
768             (site == pool->instance.site && tid == pool->instance.tid))
769 #   elif (0 != LIBXSMM_SYNC)
770             (site == pool->instance.site && (0 != internal_malloc_join || tid == pool->instance.tid)))
771 #   else
772             (site == pool->instance.site))
773 #   endif
774           {
775             break;
776           }
777         }
778         else {
779           if (end == pool0) pool0 = pool; /* first available pool*/
780           if (0 == pool->instance.minsize) { /* early exit */
781             pool = pool0; break;
782           }
783         }
784       }
785 # endif
786       LIBXSMM_ASSERT(NULL != pool);
787       if (end != pool && 0 <= internal_malloc_kind) {
788         const size_t counter = LIBXSMM_ATOMIC_ADD_FETCH(&pool->instance.counter, (size_t)1, LIBXSMM_ATOMIC_SEQ_CST);
789         if (NULL != pool->instance.buffer || 1 != counter) { /* attempt to (re-)use existing pool */
790           const internal_malloc_info_type *const info = internal_malloc_info(pool->instance.buffer, 1/*check*/);
791           const size_t pool_size = ((NULL != info && 0 != counter) ? info->size : 0);
792           const size_t used_size = pool->instance.head - pool->instance.buffer;
793           const size_t req_size = alloc_size + used_size;
794           if (req_size <= pool_size) { /* fast path: draw from pool-buffer */
795 # if (0 != LIBXSMM_SYNC) && defined(LIBXSMM_MALLOC_SCRATCH_JOIN)
796             void *const headaddr = &pool->instance.head;
797             char *const head = (0 == internal_malloc_join
798               ? (pool->instance.head += alloc_size)
799               : ((char*)LIBXSMM_ATOMIC(LIBXSMM_ATOMIC_ADD_FETCH, LIBXSMM_BITS)(
800                 (uintptr_t*)headaddr, alloc_size, LIBXSMM_ATOMIC_SEQ_CST)));
801 # else
802             char *const head = (char*)(pool->instance.head += alloc_size);
803 # endif
804             *memory = LIBXSMM_ALIGN(head - alloc_size, align_size);
805           }
806           else { /* fall-back to local memory allocation */
807             const size_t incsize = req_size - LIBXSMM_MIN(pool_size, req_size);
808             pool->instance.incsize = LIBXSMM_MAX(pool->instance.incsize, incsize);
809 # if (0 != LIBXSMM_SYNC) && defined(LIBXSMM_MALLOC_SCRATCH_JOIN)
810             if (0 == internal_malloc_join) {
811               --pool->instance.counter;
812             }
813             else {
814               LIBXSMM_ATOMIC_SUB_FETCH(&pool->instance.counter, 1, LIBXSMM_ATOMIC_SEQ_CST);
815             }
816 # else
817             --pool->instance.counter;
818 # endif
819             if (
820 # if defined(LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS) && (1 < (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS))
821               (LIBXSMM_MALLOC_INTERNAL_CALLER) != pool->instance.site &&
822 # endif
823               0 == (LIBXSMM_MALLOC_FLAG_PRIVATE & flags))
824             {
825               const size_t watermark = LIBXSMM_ATOMIC(LIBXSMM_ATOMIC_ADD_FETCH, LIBXSMM_BITS)(
826                 &internal_malloc_local_cur, alloc_size, LIBXSMM_ATOMIC_RELAXED);
827               if (internal_malloc_local_max < watermark) internal_malloc_local_max = watermark; /* accept data-race */
828             }
829             else {
830               const size_t watermark = LIBXSMM_ATOMIC(LIBXSMM_ATOMIC_ADD_FETCH, LIBXSMM_BITS)(
831                 &internal_malloc_private_cur, alloc_size, LIBXSMM_ATOMIC_RELAXED);
832               if (internal_malloc_private_max < watermark) internal_malloc_private_max = watermark; /* accept data-race */
833             }
834             local_size = size;
835           }
836         }
837         else { /* fresh pool */
838           const size_t scratch_limit = libxsmm_get_scratch_limit();
839           const size_t scratch_size = internal_get_scratch_size(pool); /* exclude current pool */
840           const size_t limit_size = (1 < npools ? (scratch_limit - LIBXSMM_MIN(scratch_size, scratch_limit)) : LIBXSMM_SCRATCH_UNLIMITED);
841           const size_t scale_size = (size_t)(1 != libxsmm_scratch_scale ? (libxsmm_scratch_scale * alloc_size) : alloc_size); /* hysteresis */
842           const size_t incsize = (size_t)(libxsmm_scratch_scale * pool->instance.incsize);
843           const size_t maxsize = LIBXSMM_MAX(scale_size, pool->instance.minsize) + incsize;
844           const size_t limsize = LIBXSMM_MIN(maxsize, limit_size);
845           const size_t minsize = limsize;
846           LIBXSMM_ASSERT(1 <= libxsmm_scratch_scale);
847           LIBXSMM_ASSERT(1 == counter);
848           pool->instance.incsize = 0; /* reset */
849           pool->instance.minsize = minsize;
850 # if defined(LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS) && (1 < (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS))
851           pool->instance.site = site;
852 #   if (0 != LIBXSMM_SYNC)
853           pool->instance.tid = tid;
854 #   endif
855 # endif
856           if (alloc_size <= minsize && /* allocate scratch pool */
857             EXIT_SUCCESS == libxsmm_xmalloc(memory, minsize, 0/*auto-align*/,
858               (flags | LIBXSMM_MALLOC_FLAG_SCRATCH) & ~LIBXSMM_MALLOC_FLAG_REALLOC,
859               NULL/*extra*/, 0/*extra_size*/))
860           {
861             pool->instance.buffer = (char*)*memory;
862             pool->instance.head = pool->instance.buffer + alloc_size;
863             *memory = LIBXSMM_ALIGN((char*)*memory, align_size);
864 # if defined(LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS) && (1 < (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS))
865             if ((LIBXSMM_MALLOC_INTERNAL_CALLER) != pool->instance.site)
866 # endif
867             {
868               LIBXSMM_ATOMIC_ADD_FETCH(&internal_malloc_scratch_nmallocs, 1, LIBXSMM_ATOMIC_RELAXED);
869             }
870           }
871           else { /* fall-back to local allocation */
872             LIBXSMM_ATOMIC_SUB_FETCH(&pool->instance.counter, 1, LIBXSMM_ATOMIC_SEQ_CST);
873             if (0 != libxsmm_verbosity /* library code is expected to be mute */
874               && 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
875             {
876               if (alloc_size <= minsize) {
877                 fprintf(stderr, "LIBXSMM ERROR: failed to allocate scratch memory!\n");
878               }
879               else if ((LIBXSMM_MALLOC_INTERNAL_CALLER) != caller
880                 && (LIBXSMM_VERBOSITY_WARN <= libxsmm_verbosity || 0 > libxsmm_verbosity))
881               {
882                 fprintf(stderr, "LIBXSMM WARNING: scratch memory domain exhausted!\n");
883               }
884             }
885             local_size = size;
886           }
887         }
888       }
889       else { /* fall-back to local memory allocation */
890         local_size = size;
891       }
892     }
893     else { /* fall-back to local memory allocation */
894       local_size = size;
895     }
896     if (0 != local_size)
897 #else
898     local_size = size;
899 #endif /*defined(LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS) && (0 < (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS))*/
900     { /* local memory allocation */
901       if (EXIT_SUCCESS != libxsmm_xmalloc(memory, local_size, alignment,
902           flags & ~(LIBXSMM_MALLOC_FLAG_SCRATCH | LIBXSMM_MALLOC_FLAG_REALLOC), NULL/*extra*/, 0/*extra_size*/)
903         && /* library code is expected to be mute */0 != libxsmm_verbosity
904         && 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
905       {
906         fprintf(stderr, "LIBXSMM ERROR: scratch memory fall-back failed!\n");
907         LIBXSMM_ASSERT(NULL == *memory);
908       }
909       if ((LIBXSMM_MALLOC_INTERNAL_CALLER) != caller) {
910         LIBXSMM_ATOMIC_ADD_FETCH(&internal_malloc_scratch_nmallocs, 1, LIBXSMM_ATOMIC_RELAXED);
911       }
912     }
913   }
914   else { /* reallocate memory */
915     const void *const preserve = *memory;
916 #if defined(LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS) && (0 < (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS))
917     internal_malloc_pool_type *const pool = internal_scratch_malloc_pool(preserve);
918     if (NULL != pool) {
919       const internal_malloc_info_type *const info = internal_malloc_info(pool->instance.buffer, 0/*no check*/);
920       void* buffer;
921       LIBXSMM_ASSERT(pool->instance.buffer <= pool->instance.head && NULL != info);
922       internal_scratch_malloc(&buffer, size, alignment,
923         ~LIBXSMM_MALLOC_FLAG_REALLOC & (LIBXSMM_MALLOC_FLAG_SCRATCH | flags), caller);
924       if (NULL != buffer) {
925         memcpy(buffer, preserve, LIBXSMM_MIN(size, info->size)); /* TODO: memmove? */
926         *memory = buffer;
927       }
928       internal_scratch_free(memory, pool);
929     }
930     else
931 #endif
932     { /* non-pooled (potentially foreign pointer) */
933 #if !defined(NDEBUG)
934       const int status =
935 #endif
936       libxsmm_xmalloc(memory, size, alignment/* no need here to determine alignment of given buffer */,
937         ~LIBXSMM_MALLOC_FLAG_SCRATCH & flags, NULL/*extra*/, 0/*extra_size*/);
938       assert(EXIT_SUCCESS == status || NULL == *memory); /* !LIBXSMM_ASSERT */
939     }
940   }
941 }
942 
943 
944 #if defined(LIBXSMM_MALLOC_HOOK_DYNAMIC)
945 LIBXSMM_APIVAR_PRIVATE_DEF(libxsmm_malloc_fntype libxsmm_malloc_fn);
946 
947 #if defined(LIBXSMM_MALLOC_HOOK_QKMALLOC)
948 LIBXSMM_API_INTERN void* internal_memalign_malloc(size_t /*alignment*/, size_t /*size*/);
internal_memalign_malloc(size_t alignment,size_t size)949 LIBXSMM_API_INTERN void* internal_memalign_malloc(size_t alignment, size_t size)
950 {
951   LIBXSMM_UNUSED(alignment);
952   LIBXSMM_ASSERT(NULL != libxsmm_malloc_fn.malloc.dlsym);
953   return libxsmm_malloc_fn.malloc.ptr(size);
954 }
955 #elif defined(LIBXSMM_MALLOC_HOOK_KMP)
956 LIBXSMM_API_INTERN void* internal_memalign_twiddle(size_t /*alignment*/, size_t /*size*/);
internal_memalign_twiddle(size_t alignment,size_t size)957 LIBXSMM_API_INTERN void* internal_memalign_twiddle(size_t alignment, size_t size)
958 {
959   LIBXSMM_ASSERT(NULL != libxsmm_malloc_fn.alignmem.dlsym);
960   return libxsmm_malloc_fn.alignmem.ptr(size, alignment);
961 }
962 #endif
963 #endif /*defined(LIBXSMM_MALLOC_HOOK_DYNAMIC)*/
964 
965 
966 #if (defined(LIBXSMM_MALLOC_HOOK_STATIC) || defined(LIBXSMM_MALLOC_HOOK_DYNAMIC))
967 
968 LIBXSMM_API_INTERN void* internal_memalign_hook(size_t /*alignment*/, size_t /*size*/, const void* /*caller*/);
internal_memalign_hook(size_t alignment,size_t size,const void * caller)969 LIBXSMM_API_INTERN void* internal_memalign_hook(size_t alignment, size_t size, const void* caller)
970 {
971   void* result;
972 # if defined(LIBXSMM_MALLOC_MMAP_HOOK)
973   INTERNAL_MEMALIGN_HOOK(result, LIBXSMM_MALLOC_FLAG_MMAP, alignment, size, caller);
974 # else
975   INTERNAL_MEMALIGN_HOOK(result, LIBXSMM_MALLOC_FLAG_DEFAULT, alignment, size, caller);
976 # endif
977   return result;
978 }
979 
980 LIBXSMM_API void* __wrap_memalign(size_t /*alignment*/, size_t /*size*/);
__wrap_memalign(size_t alignment,size_t size)981 LIBXSMM_API void* __wrap_memalign(size_t alignment, size_t size)
982 {
983   void* result;
984 # if defined(LIBXSMM_MALLOC_MMAP_HOOK)
985   INTERNAL_MEMALIGN_HOOK(result, LIBXSMM_MALLOC_FLAG_MMAP, alignment, size, NULL/*caller*/);
986 # else
987   INTERNAL_MEMALIGN_HOOK(result, LIBXSMM_MALLOC_FLAG_DEFAULT, alignment, size, NULL/*caller*/);
988 # endif
989   return result;
990 }
991 
992 LIBXSMM_API_INTERN void* internal_malloc_hook(size_t /*size*/, const void* /*caller*/);
internal_malloc_hook(size_t size,const void * caller)993 LIBXSMM_API_INTERN void* internal_malloc_hook(size_t size, const void* caller)
994 {
995   return internal_memalign_hook(0/*auto-alignment*/, size, caller);
996 }
997 
998 LIBXSMM_API void* __wrap_malloc(size_t /*size*/);
__wrap_malloc(size_t size)999 LIBXSMM_API void* __wrap_malloc(size_t size)
1000 {
1001   void* result;
1002 # if defined(LIBXSMM_MALLOC_MMAP_HOOK)
1003   INTERNAL_MEMALIGN_HOOK(result, LIBXSMM_MALLOC_FLAG_MMAP, 0/*auto-alignment*/, size, NULL/*caller*/);
1004 # else
1005   INTERNAL_MEMALIGN_HOOK(result, LIBXSMM_MALLOC_FLAG_DEFAULT, 0/*auto-alignment*/, size, NULL/*caller*/);
1006 # endif
1007   return result;
1008 }
1009 
1010 #if defined(LIBXSMM_MALLOC_HOOK_CALLOC)
1011 LIBXSMM_API void* __wrap_calloc(size_t /*num*/, size_t /*size*/);
__wrap_calloc(size_t num,size_t size)1012 LIBXSMM_API void* __wrap_calloc(size_t num, size_t size)
1013 {
1014   void* result;
1015   const size_t nbytes = num * size;
1016 # if defined(LIBXSMM_MALLOC_MMAP_HOOK)
1017   INTERNAL_MEMALIGN_HOOK(result, LIBXSMM_MALLOC_FLAG_MMAP, 0/*auto-alignment*/, nbytes, NULL/*caller*/);
1018 # else
1019   INTERNAL_MEMALIGN_HOOK(result, LIBXSMM_MALLOC_FLAG_DEFAULT, 0/*auto-alignment*/, nbytes, NULL/*caller*/);
1020 # endif
1021   /* TODO: signal anonymous/zeroed pages */
1022   if (NULL != result) memset(result, 0, nbytes);
1023   return result;
1024 }
1025 #endif
1026 
1027 #if defined(LIBXSMM_MALLOC_HOOK_REALLOC)
1028 LIBXSMM_API_INTERN void* internal_realloc_hook(void* /*ptr*/, size_t /*size*/, const void* /*caller*/);
internal_realloc_hook(void * ptr,size_t size,const void * caller)1029 LIBXSMM_API_INTERN void* internal_realloc_hook(void* ptr, size_t size, const void* caller)
1030 {
1031   void* result;
1032 # if defined(LIBXSMM_MALLOC_MMAP_HOOK)
1033   INTERNAL_REALLOC_HOOK(result, LIBXSMM_MALLOC_FLAG_REALLOC | LIBXSMM_MALLOC_FLAG_MMAP, ptr, size, caller);
1034 # else
1035   INTERNAL_REALLOC_HOOK(result, LIBXSMM_MALLOC_FLAG_REALLOC | LIBXSMM_MALLOC_FLAG_DEFAULT, ptr, size, caller);
1036 # endif
1037   return result;
1038 }
1039 
1040 LIBXSMM_API void* __wrap_realloc(void* /*ptr*/, size_t /*size*/);
__wrap_realloc(void * ptr,size_t size)1041 LIBXSMM_API void* __wrap_realloc(void* ptr, size_t size)
1042 {
1043   void* result;
1044 # if defined(LIBXSMM_MALLOC_MMAP_HOOK)
1045   INTERNAL_REALLOC_HOOK(result, LIBXSMM_MALLOC_FLAG_REALLOC | LIBXSMM_MALLOC_FLAG_MMAP, ptr, size, NULL/*caller*/);
1046 # else
1047   INTERNAL_REALLOC_HOOK(result, LIBXSMM_MALLOC_FLAG_REALLOC | LIBXSMM_MALLOC_FLAG_DEFAULT, ptr, size, NULL/*caller*/);
1048 # endif
1049   return result;
1050 }
1051 #endif
1052 
1053 LIBXSMM_API_INTERN void internal_free_hook(void* /*ptr*/, const void* /*caller*/);
internal_free_hook(void * ptr,const void * caller)1054 LIBXSMM_API_INTERN void internal_free_hook(void* ptr, const void* caller)
1055 {
1056   INTERNAL_FREE_HOOK(ptr, caller);
1057 }
1058 
1059 LIBXSMM_API void __wrap_free(void* /*ptr*/);
__wrap_free(void * ptr)1060 LIBXSMM_API void __wrap_free(void* ptr)
1061 {
1062   INTERNAL_FREE_HOOK(ptr, NULL/*caller*/);
1063 }
1064 
1065 #endif /*(defined(LIBXSMM_MALLOC_HOOK_STATIC) || defined(LIBXSMM_MALLOC_HOOK_DYNAMIC))*/
1066 
1067 #if defined(LIBXSMM_MALLOC_HOOK_DYNAMIC)
1068 LIBXSMM_API LIBXSMM_ATTRIBUTE_WEAK LIBXSMM_ATTRIBUTE_MALLOC void* memalign(size_t /*alignment*/, size_t /*size*/) LIBXSMM_THROW;
memalign(size_t alignment,size_t size)1069 LIBXSMM_API LIBXSMM_ATTRIBUTE_WEAK LIBXSMM_ATTRIBUTE_MALLOC void* memalign(size_t alignment, size_t size) LIBXSMM_THROW
1070 {
1071   void* result;
1072 # if defined(LIBXSMM_MALLOC_MMAP_HOOK)
1073   INTERNAL_MEMALIGN_HOOK(result, LIBXSMM_MALLOC_FLAG_MMAP, alignment, size, NULL/*caller*/);
1074 # else
1075   INTERNAL_MEMALIGN_HOOK(result, LIBXSMM_MALLOC_FLAG_DEFAULT, alignment, size, NULL/*caller*/);
1076 # endif
1077   return result;
1078 }
1079 
1080 LIBXSMM_API LIBXSMM_ATTRIBUTE_WEAK LIBXSMM_ATTRIBUTE_MALLOC void* malloc(size_t /*size*/) LIBXSMM_THROW;
malloc(size_t size)1081 LIBXSMM_API LIBXSMM_ATTRIBUTE_WEAK LIBXSMM_ATTRIBUTE_MALLOC void* malloc(size_t size) LIBXSMM_THROW
1082 {
1083   void* result;
1084 # if defined(LIBXSMM_MALLOC_MMAP_HOOK)
1085   INTERNAL_MEMALIGN_HOOK(result, LIBXSMM_MALLOC_FLAG_MMAP, 0/*auto-alignment*/, size, NULL/*caller*/);
1086 # else
1087   INTERNAL_MEMALIGN_HOOK(result, LIBXSMM_MALLOC_FLAG_DEFAULT, 0/*auto-alignment*/, size, NULL/*caller*/);
1088 # endif
1089   return result;
1090 }
1091 
1092 #if defined(LIBXSMM_MALLOC_HOOK_CALLOC)
1093 LIBXSMM_API LIBXSMM_ATTRIBUTE_WEAK LIBXSMM_ATTRIBUTE_MALLOC void* calloc(size_t /*num*/, size_t /*size*/) LIBXSMM_THROW;
calloc(size_t num,size_t size)1094 LIBXSMM_API LIBXSMM_ATTRIBUTE_WEAK LIBXSMM_ATTRIBUTE_MALLOC void* calloc(size_t num, size_t size) LIBXSMM_THROW
1095 {
1096   void* result;
1097   const size_t nbytes = num * size;
1098 # if defined(LIBXSMM_MALLOC_MMAP_HOOK)
1099   INTERNAL_MEMALIGN_HOOK(result, LIBXSMM_MALLOC_FLAG_MMAP, 0/*auto-alignment*/, nbytes, NULL/*caller*/);
1100 # else
1101   INTERNAL_MEMALIGN_HOOK(result, LIBXSMM_MALLOC_FLAG_DEFAULT, 0/*auto-alignment*/, nbytes, NULL/*caller*/);
1102 # endif
1103   /* TODO: signal anonymous/zeroed pages */
1104   if (NULL != result) memset(result, 0, nbytes);
1105   return result;
1106 }
1107 #endif
1108 
1109 #if defined(LIBXSMM_MALLOC_HOOK_REALLOC)
1110 LIBXSMM_API LIBXSMM_ATTRIBUTE_WEAK void* realloc(void* /*ptr*/, size_t /*size*/) LIBXSMM_THROW;
realloc(void * ptr,size_t size)1111 LIBXSMM_API LIBXSMM_ATTRIBUTE_WEAK void* realloc(void* ptr, size_t size) LIBXSMM_THROW
1112 {
1113   void* result;
1114 # if defined(LIBXSMM_MALLOC_MMAP_HOOK)
1115   INTERNAL_REALLOC_HOOK(result, LIBXSMM_MALLOC_FLAG_REALLOC | LIBXSMM_MALLOC_FLAG_MMAP, ptr, size, NULL/*caller*/);
1116 # else
1117   INTERNAL_REALLOC_HOOK(result, LIBXSMM_MALLOC_FLAG_REALLOC | LIBXSMM_MALLOC_FLAG_DEFAULT, ptr, size, NULL/*caller*/);
1118 # endif
1119   return result;
1120 }
1121 #endif
1122 
1123 LIBXSMM_API LIBXSMM_ATTRIBUTE_WEAK void free(void* /*ptr*/) LIBXSMM_THROW;
free(void * ptr)1124 LIBXSMM_API LIBXSMM_ATTRIBUTE_WEAK void free(void* ptr) LIBXSMM_THROW
1125 {
1126   INTERNAL_FREE_HOOK(ptr, NULL/*caller*/);
1127 }
1128 #endif /*defined(LIBXSMM_MALLOC_HOOK_DYNAMIC)*/
1129 
1130 
libxsmm_malloc_init(void)1131 LIBXSMM_API_INTERN void libxsmm_malloc_init(void)
1132 {
1133 #if (0 != LIBXSMM_SYNC) && defined(LIBXSMM_MALLOC_SCRATCH_JOIN)
1134   const char *const env = getenv("LIBXSMM_MALLOC_JOIN");
1135   if (NULL != env && 0 != *env) internal_malloc_join = atoi(env);
1136 #endif
1137 #if defined(LIBXSMM_MALLOC_HOOK_DYNAMIC)
1138 # if defined(LIBXSMM_MALLOC_HOOK_QKMALLOC)
1139   void* handle_qkmalloc = NULL;
1140   dlerror(); /* clear an eventual error status */
1141   handle_qkmalloc = dlopen("libqkmalloc.so", RTLD_LAZY);
1142   if (NULL != handle_qkmalloc) {
1143     libxsmm_malloc_fn.memalign.ptr = internal_memalign_malloc;
1144     libxsmm_malloc_fn.malloc.dlsym = dlsym(handle_qkmalloc, "malloc");
1145     if (NULL == dlerror() && NULL != libxsmm_malloc_fn.malloc.dlsym) {
1146 #   if defined(LIBXSMM_MALLOC_HOOK_CALLOC)
1147       libxsmm_malloc_fn.calloc.dlsym = dlsym(handle_qkmalloc, "calloc");
1148       if (NULL == dlerror() && NULL != libxsmm_malloc_fn.calloc.dlsym)
1149 #   endif
1150       {
1151 #   if defined(LIBXSMM_MALLOC_HOOK_REALLOC)
1152         libxsmm_malloc_fn.realloc.dlsym = dlsym(handle_qkmalloc, "realloc");
1153         if (NULL == dlerror() && NULL != libxsmm_malloc_fn.realloc.dlsym)
1154 #   endif
1155         {
1156           libxsmm_malloc_fn.free.dlsym = dlsym(handle_qkmalloc, "free");
1157         }
1158       }
1159     }
1160     dlclose(handle_qkmalloc);
1161   }
1162   if (NULL == libxsmm_malloc_fn.free.ptr)
1163 # elif defined(LIBXSMM_MALLOC_HOOK_KMP)
1164   dlerror(); /* clear an eventual error status */
1165   libxsmm_malloc_fn.alignmem.dlsym = dlsym(LIBXSMM_RTLD_NEXT, "kmp_aligned_malloc");
1166   if (NULL == dlerror() && NULL != libxsmm_malloc_fn.alignmem.dlsym) {
1167     libxsmm_malloc_fn.memalign.ptr = internal_memalign_twiddle;
1168     libxsmm_malloc_fn.malloc.dlsym = dlsym(LIBXSMM_RTLD_NEXT, "kmp_malloc");
1169     if (NULL == dlerror() && NULL != libxsmm_malloc_fn.malloc.dlsym) {
1170 # if defined(LIBXSMM_MALLOC_HOOK_CALLOC)
1171       libxsmm_malloc_fn.calloc.dlsym = dlsym(LIBXSMM_RTLD_NEXT, "kmp_calloc");
1172       if (NULL == dlerror() && NULL != libxsmm_malloc_fn.calloc.dlsym)
1173 # endif
1174       {
1175 # if defined(LIBXSMM_MALLOC_HOOK_REALLOC)
1176         libxsmm_malloc_fn.realloc.dlsym = dlsym(LIBXSMM_RTLD_NEXT, "kmp_realloc");
1177         if (NULL == dlerror() && NULL != libxsmm_malloc_fn.realloc.dlsym)
1178 # endif
1179         {
1180           libxsmm_malloc_fn.free.dlsym = dlsym(LIBXSMM_RTLD_NEXT, "kmp_free");
1181         }
1182       }
1183     }
1184   }
1185   if (NULL == libxsmm_malloc_fn.free.ptr)
1186 # endif /*defined(LIBXSMM_MALLOC_HOOK_QKMALLOC)*/
1187   {
1188     dlerror(); /* clear an eventual error status */
1189 # if (defined(LIBXSMM_BUILD) && (1 < (LIBXSMM_BUILD)))
1190     libxsmm_malloc_fn.memalign.dlsym = dlsym(LIBXSMM_RTLD_NEXT, "__libc_memalign");
1191     if (NULL == dlerror() && NULL != libxsmm_malloc_fn.memalign.dlsym) {
1192       libxsmm_malloc_fn.malloc.dlsym = dlsym(LIBXSMM_RTLD_NEXT, "__libc_malloc");
1193       if (NULL == dlerror() && NULL != libxsmm_malloc_fn.malloc.dlsym) {
1194 #   if defined(LIBXSMM_MALLOC_HOOK_CALLOC)
1195         libxsmm_malloc_fn.calloc.dlsym = dlsym(LIBXSMM_RTLD_NEXT, "__libc_calloc");
1196         if (NULL == dlerror() && NULL != libxsmm_malloc_fn.calloc.dlsym)
1197 #   endif
1198         {
1199 #   if defined(LIBXSMM_MALLOC_HOOK_REALLOC)
1200           libxsmm_malloc_fn.realloc.dlsym = dlsym(LIBXSMM_RTLD_NEXT, "__libc_realloc");
1201           if (NULL == dlerror() && NULL != libxsmm_malloc_fn.realloc.dlsym)
1202 #   endif
1203           {
1204             libxsmm_malloc_fn.free.dlsym = dlsym(LIBXSMM_RTLD_NEXT, "__libc_free");
1205           }
1206         }
1207       }
1208     }
1209     if (NULL == libxsmm_malloc_fn.free.ptr) {
1210       void* handle_libc = NULL;
1211       dlerror(); /* clear an eventual error status */
1212       handle_libc = dlopen("libc.so." LIBXSMM_STRINGIFY(LIBXSMM_MALLOC_GLIBC), RTLD_LAZY);
1213       if (NULL != handle_libc) {
1214         libxsmm_malloc_fn.memalign.dlsym = dlsym(handle_libc, "__libc_memalign");
1215         if (NULL == dlerror() && NULL != libxsmm_malloc_fn.memalign.dlsym) {
1216           libxsmm_malloc_fn.malloc.dlsym = dlsym(handle_libc, "__libc_malloc");
1217           if (NULL == dlerror() && NULL != libxsmm_malloc_fn.malloc.dlsym) {
1218 #   if defined(LIBXSMM_MALLOC_HOOK_CALLOC)
1219             libxsmm_malloc_fn.calloc.dlsym = dlsym(handle_libc, "__libc_calloc");
1220             if (NULL == dlerror() && NULL != libxsmm_malloc_fn.calloc.dlsym)
1221 #   endif
1222             {
1223 #   if defined(LIBXSMM_MALLOC_HOOK_REALLOC)
1224               libxsmm_malloc_fn.realloc.dlsym = dlsym(handle_libc, "__libc_realloc");
1225               if (NULL == dlerror() && NULL != libxsmm_malloc_fn.realloc.dlsym)
1226 #   endif
1227               {
1228                 libxsmm_malloc_fn.free.dlsym = dlsym(handle_libc, "__libc_free");
1229               }
1230             }
1231           }
1232         }
1233         dlclose(handle_libc);
1234       }
1235     }
1236 #   if 0
1237     { /* attempt to setup deprecated GLIBC hooks */
1238       union { const void* dlsym; void* (**ptr)(size_t, size_t, const void*); } hook_memalign;
1239       dlerror(); /* clear an eventual error status */
1240       hook_memalign.dlsym = dlsym(LIBXSMM_RTLD_NEXT, "__memalign_hook");
1241       if (NULL == dlerror() && NULL != hook_memalign.dlsym) {
1242         union { const void* dlsym; void* (**ptr)(size_t, const void*); } hook_malloc;
1243         hook_malloc.dlsym = dlsym(LIBXSMM_RTLD_NEXT, "__malloc_hook");
1244         if (NULL == dlerror() && NULL != hook_malloc.dlsym) {
1245 #   if defined(LIBXSMM_MALLOC_HOOK_REALLOC)
1246           union { const void* dlsym; void* (**ptr)(void*, size_t, const void*); } hook_realloc;
1247           hook_realloc.dlsym = dlsym(LIBXSMM_RTLD_NEXT, "__realloc_hook");
1248           if (NULL == dlerror() && NULL != hook_realloc.dlsym)
1249 #   endif
1250           {
1251             union { const void* dlsym; void (**ptr)(void*, const void*); } hook_free;
1252             hook_free.dlsym = dlsym(LIBXSMM_RTLD_NEXT, "__free_hook");
1253             if (NULL == dlerror() && NULL != hook_free.dlsym) {
1254               *hook_memalign.ptr = internal_memalign_hook;
1255               *hook_malloc.ptr = internal_malloc_hook;
1256 #   if defined(LIBXSMM_MALLOC_HOOK_REALLOC)
1257               *hook_realloc.ptr = internal_realloc_hook;
1258 #   endif
1259               *hook_free.ptr = internal_free_hook;
1260             }
1261           }
1262         }
1263       }
1264     }
1265 #   endif
1266 # else /* TODO */
1267 # endif /*(defined(LIBXSMM_BUILD) && (1 < (LIBXSMM_BUILD)))*/
1268   }
1269   if (NULL != libxsmm_malloc_fn.free.ptr) {
1270 # if defined(LIBXSMM_MALLOC_HOOK_IMALLOC)
1271     union { const void* dlsym; libxsmm_malloc_fun* ptr; } i_malloc;
1272     i_malloc.dlsym = dlsym(LIBXSMM_RTLD_NEXT, "i_malloc");
1273     if (NULL == dlerror() && NULL != i_malloc.dlsym) {
1274 #   if defined(LIBXSMM_MALLOC_HOOK_CALLOC)
1275       union { const void* dlsym; void* (**ptr)(size_t, size_t); } i_calloc;
1276       i_calloc.dlsym = dlsym(LIBXSMM_RTLD_NEXT, "i_calloc");
1277       if (NULL == dlerror() && NULL != i_calloc.dlsym)
1278 #   endif
1279       {
1280 #   if defined(LIBXSMM_MALLOC_HOOK_REALLOC)
1281         union { const void* dlsym; libxsmm_realloc_fun* ptr; } i_realloc;
1282         i_realloc.dlsym = dlsym(LIBXSMM_RTLD_NEXT, "i_realloc");
1283         if (NULL == dlerror() && NULL != i_realloc.dlsym)
1284 #   endif
1285         {
1286           union { const void* dlsym; libxsmm_free_fun* ptr; } i_free;
1287           i_free.dlsym = dlsym(LIBXSMM_RTLD_NEXT, "i_free");
1288           if (NULL == dlerror() && NULL != i_free.dlsym) {
1289             *i_malloc.ptr = libxsmm_malloc_fn.malloc.ptr;
1290 #   if defined(LIBXSMM_MALLOC_HOOK_CALLOC)
1291             *i_calloc.ptr = libxsmm_malloc_fn.calloc.ptr;
1292 #   endif
1293 #   if defined(LIBXSMM_MALLOC_HOOK_REALLOC)
1294             *i_realloc.ptr = libxsmm_malloc_fn.realloc.ptr;
1295 #   endif
1296             *i_free.ptr = libxsmm_malloc_fn.free.ptr;
1297           }
1298         }
1299       }
1300     }
1301 # endif /*defined(LIBXSMM_MALLOC_HOOK_IMALLOC)*/
1302   }
1303   else { /* fall-back: potentially recursive */
1304 # if (defined(LIBXSMM_BUILD) && (1 < (LIBXSMM_BUILD)))
1305     libxsmm_malloc_fn.memalign.ptr = __libc_memalign;
1306     libxsmm_malloc_fn.malloc.ptr = __libc_malloc;
1307 #   if defined(LIBXSMM_MALLOC_HOOK_CALLOC)
1308     libxsmm_malloc_fn.calloc.ptr = __libc_calloc;
1309 #   endif
1310 #   if defined(LIBXSMM_MALLOC_HOOK_REALLOC)
1311     libxsmm_malloc_fn.realloc.ptr = __libc_realloc;
1312 #   endif
1313     libxsmm_malloc_fn.free.ptr = __libc_free;
1314 # else
1315     libxsmm_malloc_fn.memalign.ptr = libxsmm_memalign_internal;
1316     libxsmm_malloc_fn.malloc.ptr = malloc;
1317 #   if defined(LIBXSMM_MALLOC_HOOK_CALLOC)
1318     libxsmm_malloc_fn.calloc.ptr = calloc;
1319 #   endif
1320 #   if defined(LIBXSMM_MALLOC_HOOK_REALLOC)
1321     libxsmm_malloc_fn.realloc.ptr = realloc;
1322 #   endif
1323     libxsmm_malloc_fn.free.ptr = free;
1324 # endif
1325   }
1326 #endif
1327 }
1328 
1329 
libxsmm_malloc_finalize(void)1330 LIBXSMM_API_INTERN void libxsmm_malloc_finalize(void)
1331 {
1332 }
1333 
1334 
libxsmm_xset_default_allocator(LIBXSMM_LOCK_TYPE (LIBXSMM_LOCK)* lock,const void * context,libxsmm_malloc_function malloc_fn,libxsmm_free_function free_fn)1335 LIBXSMM_API_INTERN int libxsmm_xset_default_allocator(LIBXSMM_LOCK_TYPE(LIBXSMM_LOCK)* lock,
1336   const void* context, libxsmm_malloc_function malloc_fn, libxsmm_free_function free_fn)
1337 {
1338   int result = EXIT_SUCCESS;
1339   if (NULL != lock) {
1340     LIBXSMM_INIT
1341     LIBXSMM_LOCK_ACQUIRE(LIBXSMM_LOCK, lock);
1342   }
1343   if (NULL != malloc_fn.function && NULL != free_fn.function) {
1344     libxsmm_default_allocator_context = context;
1345     libxsmm_default_malloc_fn = malloc_fn;
1346     libxsmm_default_free_fn = free_fn;
1347   }
1348   else {
1349     libxsmm_malloc_function internal_malloc_fn;
1350     libxsmm_free_function internal_free_fn;
1351     const void* internal_allocator = NULL;
1352     internal_malloc_fn.function = __real_malloc;
1353     internal_free_fn.function = __real_free;
1354     /*internal_allocator = NULL;*/
1355     if (NULL == malloc_fn.function && NULL == free_fn.function) {
1356       libxsmm_default_allocator_context = internal_allocator;
1357       libxsmm_default_malloc_fn = internal_malloc_fn;
1358       libxsmm_default_free_fn = internal_free_fn;
1359     }
1360     else { /* invalid allocator */
1361       static int error_once = 0;
1362       if (0 != libxsmm_verbosity /* library code is expected to be mute */
1363         && 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
1364       {
1365         fprintf(stderr, "LIBXSMM ERROR: allocator setup without malloc or free function!\n");
1366       }
1367       /* keep any valid (previously instantiated) default allocator */
1368       if (NULL == libxsmm_default_malloc_fn.function || NULL == libxsmm_default_free_fn.function) {
1369         libxsmm_default_allocator_context = internal_allocator;
1370         libxsmm_default_malloc_fn = internal_malloc_fn;
1371         libxsmm_default_free_fn = internal_free_fn;
1372       }
1373       result = EXIT_FAILURE;
1374     }
1375   }
1376   if (NULL != lock) {
1377     LIBXSMM_LOCK_RELEASE(LIBXSMM_LOCK, lock);
1378   }
1379   LIBXSMM_ASSERT(EXIT_SUCCESS == result);
1380   return result;
1381 }
1382 
1383 
libxsmm_xget_default_allocator(LIBXSMM_LOCK_TYPE (LIBXSMM_LOCK)* lock,const void ** context,libxsmm_malloc_function * malloc_fn,libxsmm_free_function * free_fn)1384 LIBXSMM_API_INTERN int libxsmm_xget_default_allocator(LIBXSMM_LOCK_TYPE(LIBXSMM_LOCK)* lock,
1385   const void** context, libxsmm_malloc_function* malloc_fn, libxsmm_free_function* free_fn)
1386 {
1387   int result = EXIT_SUCCESS;
1388   if (NULL != context || NULL != malloc_fn || NULL != free_fn) {
1389     if (NULL != lock) {
1390       LIBXSMM_INIT
1391       LIBXSMM_LOCK_ACQUIRE(LIBXSMM_LOCK, lock);
1392     }
1393     if (context) *context = libxsmm_default_allocator_context;
1394     if (NULL != malloc_fn) *malloc_fn = libxsmm_default_malloc_fn;
1395     if (NULL != free_fn) *free_fn = libxsmm_default_free_fn;
1396     if (NULL != lock) {
1397       LIBXSMM_LOCK_RELEASE(LIBXSMM_LOCK, lock);
1398     }
1399   }
1400   else if (0 != libxsmm_verbosity) { /* library code is expected to be mute */
1401     static int error_once = 0;
1402     if (1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED)) {
1403       fprintf(stderr, "LIBXSMM ERROR: invalid signature used to get the default memory allocator!\n");
1404     }
1405     result = EXIT_FAILURE;
1406   }
1407   LIBXSMM_ASSERT(EXIT_SUCCESS == result);
1408   return result;
1409 }
1410 
1411 
libxsmm_xset_scratch_allocator(LIBXSMM_LOCK_TYPE (LIBXSMM_LOCK)* lock,const void * context,libxsmm_malloc_function malloc_fn,libxsmm_free_function free_fn)1412 LIBXSMM_API_INTERN int libxsmm_xset_scratch_allocator(LIBXSMM_LOCK_TYPE(LIBXSMM_LOCK)* lock,
1413   const void* context, libxsmm_malloc_function malloc_fn, libxsmm_free_function free_fn)
1414 {
1415   int result = EXIT_SUCCESS;
1416   static int error_once = 0;
1417   if (NULL != lock) {
1418     LIBXSMM_INIT
1419     LIBXSMM_LOCK_ACQUIRE(LIBXSMM_LOCK, lock);
1420   }
1421   /* make sure the default allocator is setup before adopting it eventually */
1422   if (NULL == libxsmm_default_malloc_fn.function || NULL == libxsmm_default_free_fn.function) {
1423     const libxsmm_malloc_function null_malloc_fn = { NULL };
1424     const libxsmm_free_function null_free_fn = { NULL };
1425     libxsmm_xset_default_allocator(NULL/*already locked*/, NULL/*context*/, null_malloc_fn, null_free_fn);
1426   }
1427   if (NULL == malloc_fn.function && NULL == free_fn.function) { /* adopt default allocator */
1428     libxsmm_scratch_allocator_context = libxsmm_default_allocator_context;
1429     libxsmm_scratch_malloc_fn = libxsmm_default_malloc_fn;
1430     libxsmm_scratch_free_fn = libxsmm_default_free_fn;
1431   }
1432   else if (NULL != malloc_fn.function) {
1433     if (NULL == free_fn.function
1434       && /*warning*/(LIBXSMM_VERBOSITY_WARN <= libxsmm_verbosity || 0 > libxsmm_verbosity)
1435       && 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
1436     {
1437       fprintf(stderr, "LIBXSMM WARNING: scratch allocator setup without free function!\n");
1438     }
1439     libxsmm_scratch_allocator_context = context;
1440     libxsmm_scratch_malloc_fn = malloc_fn;
1441     libxsmm_scratch_free_fn = free_fn; /* NULL allowed */
1442   }
1443   else { /* invalid scratch allocator */
1444     if (0 != libxsmm_verbosity /* library code is expected to be mute */
1445       && 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
1446     {
1447       fprintf(stderr, "LIBXSMM ERROR: invalid scratch allocator (default used)!\n");
1448     }
1449     /* keep any valid (previously instantiated) scratch allocator */
1450     if (NULL == libxsmm_scratch_malloc_fn.function) {
1451       libxsmm_scratch_allocator_context = libxsmm_default_allocator_context;
1452       libxsmm_scratch_malloc_fn = libxsmm_default_malloc_fn;
1453       libxsmm_scratch_free_fn = libxsmm_default_free_fn;
1454     }
1455     result = EXIT_FAILURE;
1456   }
1457   if (NULL != lock) {
1458     LIBXSMM_LOCK_RELEASE(LIBXSMM_LOCK, lock);
1459   }
1460   LIBXSMM_ASSERT(EXIT_SUCCESS == result);
1461   return result;
1462 }
1463 
1464 
libxsmm_xget_scratch_allocator(LIBXSMM_LOCK_TYPE (LIBXSMM_LOCK)* lock,const void ** context,libxsmm_malloc_function * malloc_fn,libxsmm_free_function * free_fn)1465 LIBXSMM_API_INTERN int libxsmm_xget_scratch_allocator(LIBXSMM_LOCK_TYPE(LIBXSMM_LOCK)* lock,
1466   const void** context, libxsmm_malloc_function* malloc_fn, libxsmm_free_function* free_fn)
1467 {
1468   int result = EXIT_SUCCESS;
1469   if (NULL != context || NULL != malloc_fn || NULL != free_fn) {
1470     if (NULL != lock) {
1471       LIBXSMM_INIT
1472       LIBXSMM_LOCK_ACQUIRE(LIBXSMM_LOCK, lock);
1473     }
1474     if (context) *context = libxsmm_scratch_allocator_context;
1475     if (NULL != malloc_fn) *malloc_fn = libxsmm_scratch_malloc_fn;
1476     if (NULL != free_fn) *free_fn = libxsmm_scratch_free_fn;
1477     if (NULL != lock) {
1478       LIBXSMM_LOCK_RELEASE(LIBXSMM_LOCK, lock);
1479     }
1480   }
1481   else if (0 != libxsmm_verbosity) { /* library code is expected to be mute */
1482     static int error_once = 0;
1483     if (1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED)) {
1484       fprintf(stderr, "LIBXSMM ERROR: invalid signature used to get the scratch memory allocator!\n");
1485     }
1486     result = EXIT_FAILURE;
1487   }
1488   LIBXSMM_ASSERT(EXIT_SUCCESS == result);
1489   return result;
1490 }
1491 
1492 
libxsmm_set_default_allocator(const void * context,libxsmm_malloc_function malloc_fn,libxsmm_free_function free_fn)1493 LIBXSMM_API int libxsmm_set_default_allocator(const void* context,
1494   libxsmm_malloc_function malloc_fn, libxsmm_free_function free_fn)
1495 {
1496   return libxsmm_xset_default_allocator(&libxsmm_lock_global, context, malloc_fn, free_fn);
1497 }
1498 
1499 
libxsmm_get_default_allocator(const void ** context,libxsmm_malloc_function * malloc_fn,libxsmm_free_function * free_fn)1500 LIBXSMM_API int libxsmm_get_default_allocator(const void** context,
1501   libxsmm_malloc_function* malloc_fn, libxsmm_free_function* free_fn)
1502 {
1503   return libxsmm_xget_default_allocator(&libxsmm_lock_global, context, malloc_fn, free_fn);
1504 }
1505 
1506 
libxsmm_set_scratch_allocator(const void * context,libxsmm_malloc_function malloc_fn,libxsmm_free_function free_fn)1507 LIBXSMM_API int libxsmm_set_scratch_allocator(const void* context,
1508   libxsmm_malloc_function malloc_fn, libxsmm_free_function free_fn)
1509 {
1510   return libxsmm_xset_scratch_allocator(&libxsmm_lock_global, context, malloc_fn, free_fn);
1511 }
1512 
1513 
libxsmm_get_scratch_allocator(const void ** context,libxsmm_malloc_function * malloc_fn,libxsmm_free_function * free_fn)1514 LIBXSMM_API int libxsmm_get_scratch_allocator(const void** context,
1515   libxsmm_malloc_function* malloc_fn, libxsmm_free_function* free_fn)
1516 {
1517   return libxsmm_xget_scratch_allocator(&libxsmm_lock_global, context, malloc_fn, free_fn);
1518 }
1519 
1520 
libxsmm_get_malloc_xinfo(const void * memory,size_t * size,int * flags,void ** extra)1521 LIBXSMM_API int libxsmm_get_malloc_xinfo(const void* memory, size_t* size, int* flags, void** extra)
1522 {
1523   int result;
1524 #if !defined(NDEBUG)
1525   if (NULL != size || NULL != extra)
1526 #endif
1527   {
1528     const int check = ((NULL == flags || 0 == (LIBXSMM_MALLOC_FLAG_X & *flags)) ? 2 : 1);
1529     const internal_malloc_info_type *const info = internal_malloc_info(memory, check);
1530     if (NULL != info) {
1531       if (NULL != size) *size = info->size;
1532       if (NULL != flags) *flags = info->flags;
1533       if (NULL != extra) *extra = info->pointer;
1534       result = EXIT_SUCCESS;
1535     }
1536     else { /* potentially foreign buffer */
1537       result = (NULL != memory ? EXIT_FAILURE : EXIT_SUCCESS);
1538       if (NULL != size) *size = 0;
1539       if (NULL != flags) *flags = 0;
1540       if (NULL != extra) *extra = 0;
1541     }
1542   }
1543 #if !defined(NDEBUG)
1544   else {
1545     static int error_once = 0;
1546     if (0 != libxsmm_verbosity /* library code is expected to be mute */
1547       && 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
1548     {
1549       fprintf(stderr, "LIBXSMM ERROR: attachment error for memory buffer %p!\n", memory);
1550     }
1551     LIBXSMM_ASSERT_MSG(0/*false*/, "LIBXSMM ERROR: attachment error");
1552     result = EXIT_FAILURE;
1553   }
1554 #endif
1555   return result;
1556 }
1557 
1558 
1559 #if !defined(_WIN32)
1560 
internal_xmalloc_mhint(void * buffer,size_t size)1561 LIBXSMM_API_INLINE void internal_xmalloc_mhint(void* buffer, size_t size)
1562 {
1563   LIBXSMM_ASSERT((MAP_FAILED != buffer && NULL != buffer) || 0 == size);
1564 #if defined(_DEFAULT_SOURCE) || defined(_BSD_SOURCE)
1565   /* proceed after failed madvise (even in case of an error; take what we got) */
1566   /* issue no warning as a failure seems to be related to the kernel version */
1567   madvise(buffer, size, MADV_NORMAL/*MADV_RANDOM*/
1568 # if defined(MADV_NOHUGEPAGE) /* if not available, we then take what we got (THP) */
1569     | ((LIBXSMM_MALLOC_ALIGNMAX * LIBXSMM_MALLOC_ALIGNFCT) > size ? MADV_NOHUGEPAGE : 0)
1570 # endif
1571 # if defined(MADV_DONTDUMP)
1572     | ((LIBXSMM_MALLOC_ALIGNMAX * LIBXSMM_MALLOC_ALIGNFCT) > size ? 0 : MADV_DONTDUMP)
1573 # endif
1574   );
1575 #else
1576   LIBXSMM_UNUSED(buffer); LIBXSMM_UNUSED(size);
1577 #endif
1578 }
1579 
1580 
internal_xmalloc_xmap(const char * dir,size_t size,int flags,void ** rx)1581 LIBXSMM_API_INLINE void* internal_xmalloc_xmap(const char* dir, size_t size, int flags, void** rx)
1582 {
1583   void* result = MAP_FAILED;
1584   char filename[4096] = LIBXSMM_MALLOC_XMAP_TEMPLATE;
1585   int i = 0;
1586   LIBXSMM_ASSERT(NULL != rx && MAP_FAILED != *rx);
1587   if (NULL != dir && 0 != *dir) {
1588     i = LIBXSMM_SNPRINTF(filename, sizeof(filename), "%s/" LIBXSMM_MALLOC_XMAP_TEMPLATE, dir);
1589   }
1590   if (0 <= i && i < (int)sizeof(filename)) {
1591     /* coverity[secure_temp] */
1592     i = mkstemp(filename);
1593     if (0 <= i) {
1594       if (0 == unlink(filename) && 0 == ftruncate(i, size)) {
1595         const int mflags = (flags | LIBXSMM_MAP_SHARED);
1596         void *const xmap = mmap(*rx, size, PROT_READ | PROT_EXEC, mflags, i, 0/*offset*/);
1597         if (MAP_FAILED != xmap) {
1598           LIBXSMM_ASSERT(NULL != xmap);
1599           result = mmap(NULL, size, PROT_READ | PROT_WRITE, mflags, i, 0/*offset*/);
1600           if (MAP_FAILED != result) {
1601             LIBXSMM_ASSERT(NULL != result);
1602             internal_xmalloc_mhint(xmap, size);
1603             *rx = xmap;
1604           }
1605           else {
1606             munmap(xmap, size);
1607             *rx = NULL;
1608           }
1609         }
1610       }
1611       close(i);
1612     }
1613   }
1614   return result;
1615 }
1616 
1617 #endif /*!defined(_WIN32)*/
1618 
1619 
internal_xrealloc(void ** ptr,internal_malloc_info_type ** info,size_t size,libxsmm_realloc_fun realloc_fn,libxsmm_free_fun free_fn)1620 LIBXSMM_API_INLINE void* internal_xrealloc(void** ptr, internal_malloc_info_type** info, size_t size,
1621   libxsmm_realloc_fun realloc_fn, libxsmm_free_fun free_fn)
1622 {
1623   char *const base = (char*)(NULL != *info ? (*info)->pointer : *ptr), *result;
1624   LIBXSMM_ASSERT(NULL != *ptr);
1625   /* may implicitly invalidate info */
1626   result = (char*)realloc_fn(base, size);
1627   if (result == base) { /* signal no-copy */
1628     LIBXSMM_ASSERT(NULL != result);
1629     *info = NULL; /* no delete */
1630     *ptr = NULL; /* no copy */
1631   }
1632   else if (NULL != result) { /* copy */
1633     const size_t offset_src = (const char*)*ptr - base;
1634     *ptr = result + offset_src; /* copy */
1635     *info = NULL; /* no delete */
1636   }
1637 #if !defined(NDEBUG) && 0
1638   else { /* failed */
1639     if (NULL != *info) {
1640       /* implicitly invalidates info */
1641       internal_xfree(*ptr, *info);
1642     }
1643     else { /* foreign pointer */
1644       free_fn(*ptr);
1645     }
1646     *info = NULL; /* no delete */
1647     *ptr = NULL; /* no copy */
1648   }
1649 #else
1650   LIBXSMM_UNUSED(free_fn);
1651 #endif
1652   return result;
1653 }
1654 
1655 
1656 LIBXSMM_API_INTERN void* internal_xmalloc(void** /*ptr*/, internal_malloc_info_type** /*info*/, size_t /*size*/,
1657   const void* /*context*/, libxsmm_malloc_function /*malloc_fn*/, libxsmm_free_function /*free_fn*/);
internal_xmalloc(void ** ptr,internal_malloc_info_type ** info,size_t size,const void * context,libxsmm_malloc_function malloc_fn,libxsmm_free_function free_fn)1658 LIBXSMM_API_INTERN void* internal_xmalloc(void** ptr, internal_malloc_info_type** info, size_t size,
1659   const void* context, libxsmm_malloc_function malloc_fn, libxsmm_free_function free_fn)
1660 {
1661   void* result;
1662   LIBXSMM_ASSERT(NULL != ptr && NULL != info && NULL != malloc_fn.function);
1663   if (NULL == *ptr) {
1664     result = (NULL == context
1665       ? malloc_fn.function(size)
1666       : malloc_fn.ctx_form(size, context));
1667   }
1668   else { /* reallocate */
1669     if (NULL != free_fn.function /* prefer free_fn since it is part of pointer-info */
1670       ? (__real_free == free_fn.function || free == free_fn.function)
1671       : (__real_malloc == malloc_fn.function || malloc == malloc_fn.function))
1672     {
1673 #if defined(LIBXSMM_MALLOC_HOOK_REALLOC)
1674       result = internal_xrealloc(ptr, info, size, __real_realloc, __real_free);
1675 #else
1676       result = internal_xrealloc(ptr, info, size, realloc, __real_free);
1677 #endif
1678     }
1679     else { /* fall-back with regular allocation */
1680       result = (NULL == context
1681         ? malloc_fn.function(size)
1682         : malloc_fn.ctx_form(size, context));
1683       if (NULL == result) { /* failed */
1684         if (NULL != *info) {
1685           internal_xfree(*ptr, *info);
1686         }
1687         else { /* foreign pointer */
1688           (NULL != free_fn.function ? free_fn.function : __real_free)(*ptr);
1689         }
1690         *ptr = NULL; /* safe delete */
1691       }
1692     }
1693   }
1694   return result;
1695 }
1696 
1697 
libxsmm_xmalloc(void ** memory,size_t size,size_t alignment,int flags,const void * extra,size_t extra_size)1698 LIBXSMM_API_INTERN int libxsmm_xmalloc(void** memory, size_t size, size_t alignment,
1699   int flags, const void* extra, size_t extra_size)
1700 {
1701   int result = EXIT_SUCCESS;
1702 #if !defined(NDEBUG)
1703   if (NULL != memory)
1704 #endif
1705   {
1706     static int error_once = 0;
1707     if (0 != size) {
1708       size_t alloc_alignment = 0, alloc_size = 0, max_preserve = 0;
1709       internal_malloc_info_type* info = NULL;
1710       void* buffer = NULL, * reloc = NULL;
1711       /* ATOMIC BEGIN: this region should be atomic/locked */
1712       const void* context = libxsmm_default_allocator_context;
1713       libxsmm_malloc_function malloc_fn = libxsmm_default_malloc_fn;
1714       libxsmm_free_function free_fn = libxsmm_default_free_fn;
1715       if (0 != (LIBXSMM_MALLOC_FLAG_SCRATCH & flags)) {
1716         context = libxsmm_scratch_allocator_context;
1717         malloc_fn = libxsmm_scratch_malloc_fn;
1718         free_fn = libxsmm_scratch_free_fn;
1719 #if defined(LIBXSMM_MALLOC_MMAP_SCRATCH)
1720         flags |= LIBXSMM_MALLOC_FLAG_MMAP;
1721 #endif
1722       }
1723       if ((0 != (internal_malloc_kind & 1) && 0 < internal_malloc_kind)
1724         || NULL == malloc_fn.function || NULL == free_fn.function)
1725       {
1726         malloc_fn.function = __real_malloc;
1727         free_fn.function = __real_free;
1728         context = NULL;
1729       }
1730       /* ATOMIC END: this region should be atomic */
1731       flags |= LIBXSMM_MALLOC_FLAG_RW; /* normalize given flags since flags=0 is accepted as well */
1732       if (0 != (LIBXSMM_MALLOC_FLAG_REALLOC & flags) && NULL != *memory) {
1733         info = internal_malloc_info(*memory, 2/*check*/);
1734         if (NULL != info) {
1735           max_preserve = info->size;
1736         }
1737         else { /* reallocation of unknown allocation */
1738           flags &= ~LIBXSMM_MALLOC_FLAG_MMAP;
1739         }
1740       }
1741       else *memory = NULL;
1742 #if !defined(LIBXSMM_MALLOC_MMAP)
1743       if (0 == (LIBXSMM_MALLOC_FLAG_X & flags) && 0 == (LIBXSMM_MALLOC_FLAG_MMAP & flags)) {
1744         alloc_alignment = (0 == (LIBXSMM_MALLOC_FLAG_REALLOC & flags) ? libxsmm_alignment(size, alignment) : alignment);
1745         alloc_size = size + extra_size + sizeof(internal_malloc_info_type) + alloc_alignment - 1;
1746         buffer = internal_xmalloc(memory, &info, alloc_size, context, malloc_fn, free_fn);
1747       }
1748       else
1749 #endif
1750       if (NULL == info || size != info->size) {
1751 #if defined(_WIN32) ||defined(__CYGWIN__)
1752         const int mflags = (0 != (LIBXSMM_MALLOC_FLAG_X & flags) ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE);
1753         static SIZE_T alloc_alignmax = 0, alloc_pagesize = 0;
1754         if (0 == alloc_alignmax) { /* first/one time */
1755           SYSTEM_INFO system_info;
1756           GetSystemInfo(&system_info);
1757           alloc_pagesize = system_info.dwPageSize;
1758           alloc_alignmax = GetLargePageMinimum();
1759         }
1760         if ((LIBXSMM_MALLOC_ALIGNMAX * LIBXSMM_MALLOC_ALIGNFCT) <= size) { /* attempt to use large pages */
1761           HANDLE process_token;
1762           alloc_alignment = (NULL == info
1763             ? (0 == alignment ? alloc_alignmax : libxsmm_lcm(alignment, alloc_alignmax))
1764             : libxsmm_lcm(alignment, alloc_alignmax));
1765           alloc_size = LIBXSMM_UP2(size + extra_size + sizeof(internal_malloc_info_type) + alloc_alignment - 1, alloc_alignmax);
1766           if (TRUE == OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &process_token)) {
1767             TOKEN_PRIVILEGES tp;
1768             if (TRUE == LookupPrivilegeValue(NULL, TEXT("SeLockMemoryPrivilege"), &tp.Privileges[0].Luid)) {
1769               tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; tp.PrivilegeCount = 1; /* enable privilege */
1770               if (TRUE == AdjustTokenPrivileges(process_token, FALSE, &tp, 0, (PTOKEN_PRIVILEGES)NULL, 0)
1771                 && ERROR_SUCCESS == GetLastError()/*may has failed (regardless of TRUE)*/)
1772               {
1773                 /* VirtualAlloc cannot be used to reallocate memory */
1774                 buffer = VirtualAlloc(NULL, alloc_size, MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES, mflags);
1775               }
1776               tp.Privileges[0].Attributes = 0; /* disable privilege */
1777               AdjustTokenPrivileges(process_token, FALSE, &tp, 0, (PTOKEN_PRIVILEGES)NULL, 0);
1778             }
1779             CloseHandle(process_token);
1780           }
1781         }
1782         else { /* small allocation using regular page-size */
1783           alloc_alignment = (NULL == info ? libxsmm_alignment(size, alignment) : alignment);
1784           alloc_size = LIBXSMM_UP2(size + extra_size + sizeof(internal_malloc_info_type) + alloc_alignment - 1, alloc_pagesize);
1785         }
1786         if (NULL == buffer) { /* small allocation or retry with regular page size */
1787           /* VirtualAlloc cannot be used to reallocate memory */
1788           buffer = VirtualAlloc(NULL, alloc_size, MEM_RESERVE | MEM_COMMIT, mflags);
1789         }
1790         if (NULL != buffer) {
1791           flags |= LIBXSMM_MALLOC_FLAG_MMAP; /* select the corresponding deallocation */
1792         }
1793         else if (0 == (LIBXSMM_MALLOC_FLAG_MMAP & flags)) { /* fall-back allocation */
1794           buffer = internal_xmalloc(memory, &info, alloc_size, context, malloc_fn, free_fn);
1795         }
1796 #else /* !defined(_WIN32) */
1797 # if defined(MAP_HUGETLB)
1798         static size_t limit_hugetlb = LIBXSMM_SCRATCH_UNLIMITED;
1799 # endif
1800 # if defined(MAP_LOCKED)
1801         static size_t limit_plocked = LIBXSMM_SCRATCH_UNLIMITED;
1802 # endif
1803 # if defined(MAP_32BIT)
1804         static int map32 = 1;
1805 # endif
1806         int mflags = 0
1807 # if defined(MAP_UNINITIALIZED) && 0/*fails with WSL*/
1808           | MAP_UNINITIALIZED /* unlikely available */
1809 # endif
1810 # if defined(MAP_NORESERVE)
1811           | (LIBXSMM_MALLOC_ALIGNMAX < size ? 0 : MAP_NORESERVE)
1812 # endif
1813 # if defined(MAP_32BIT)
1814           | ((0 != (LIBXSMM_MALLOC_FLAG_X & flags) && 0 != map32
1815             && LIBXSMM_X86_AVX512_CORE > libxsmm_target_archid
1816             && LIBXSMM_X86_AVX512 < libxsmm_target_archid) ? MAP_32BIT : 0)
1817 # endif
1818 # if defined(MAP_HUGETLB) /* may fail depending on system settings */
1819           | ((0 == (LIBXSMM_MALLOC_FLAG_X & flags)
1820             && ((LIBXSMM_MALLOC_ALIGNMAX * LIBXSMM_MALLOC_ALIGNFCT) <= size ||
1821               0 != (LIBXSMM_MALLOC_FLAG_PHUGE & flags))
1822             && (internal_malloc_hugetlb + size) < limit_hugetlb) ? MAP_HUGETLB : 0)
1823 # endif
1824 # if defined(MAP_LOCKED) && !defined(LIBXSMM_MALLOC_LOCK_ONFAULT)
1825           | ((0 == (LIBXSMM_MALLOC_FLAG_X & flags)
1826             && (internal_malloc_plocked + size) < limit_plocked) ? MAP_LOCKED : 0)
1827 # endif
1828         ; /* mflags */
1829 # if defined(MAP_POPULATE)
1830         { static int prefault = 0;
1831           if (0 == prefault) { /* prefault only on Linux 3.10.0-327 (and later) to avoid data race in page-fault handler */
1832             struct utsname osinfo; unsigned int version_major = 3, version_minor = 10, version_update = 0, version_patch = 327;
1833             if (0 <= uname(&osinfo) && 0 == strcmp("Linux", osinfo.sysname)
1834               && 4 == sscanf(osinfo.release, "%u.%u.%u-%u", &version_major, &version_minor, &version_update, &version_patch)
1835               && LIBXSMM_VERSION4(3, 10, 0, 327) > LIBXSMM_VERSION4(version_major, version_minor, version_update, version_patch))
1836             {
1837               mflags |= MAP_POPULATE; prefault = 1;
1838             }
1839             else prefault = -1;
1840           }
1841           else if (1 == prefault) mflags |= MAP_POPULATE;
1842         }
1843 # endif
1844         /* make allocated size at least a multiple of the smallest page-size to avoid split-pages (unmap!) */
1845         alloc_alignment = libxsmm_lcm(0 == alignment ? libxsmm_alignment(size, alignment) : alignment, LIBXSMM_PAGE_MINSIZE);
1846         alloc_size = LIBXSMM_UP2(size + extra_size + sizeof(internal_malloc_info_type) + alloc_alignment - 1, alloc_alignment);
1847         if (0 == (LIBXSMM_MALLOC_FLAG_X & flags)) { /* anonymous and non-executable */
1848 # if defined(MAP_32BIT)
1849           LIBXSMM_ASSERT(0 == (MAP_32BIT & mflags));
1850 # endif
1851 # if 0
1852           LIBXSMM_ASSERT(NULL != info || NULL == *memory); /* no memory mapping of foreign pointer */
1853 # endif
1854           buffer = mmap(NULL == info ? NULL : info->pointer, alloc_size, PROT_READ | PROT_WRITE,
1855             MAP_PRIVATE | LIBXSMM_MAP_ANONYMOUS | mflags, -1, 0/*offset*/);
1856 # if defined(MAP_HUGETLB)
1857           INTERNAL_XMALLOC_KIND(MAP_HUGETLB, "huge-page", LIBXSMM_MALLOC_FLAG_PHUGE, flags, mflags,
1858             internal_malloc_hugetlb, limit_hugetlb, info, alloc_size, buffer);
1859 # endif
1860 # if defined(MAP_LOCKED)
1861 #   if !defined(LIBXSMM_MALLOC_LOCK_ONFAULT)
1862           INTERNAL_XMALLOC_KIND(MAP_LOCKED, "locked-page", LIBXSMM_MALLOC_FLAG_PLOCK, flags, mflags,
1863             internal_malloc_plocked, limit_plocked, info, alloc_size, buffer);
1864 #   else
1865           if (0 != (MAP_LOCKED & mflags) && MAP_FAILED != buffer) {
1866             LIBXSMM_ASSERT(NULL != buffer);
1867 #     if 0 /* mlock2 is potentially not exposed */
1868             if (0 == mlock2(buffer, alloc_size, MLOCK_ONFAULT))
1869 #     else
1870             if (0 == syscall(SYS_mlock2, buffer, alloc_size, MLOCK_ONFAULT))
1871 #     endif
1872             {
1873               LIBXSMM_ATOMIC_ADD_FETCH(&internal_malloc_plocked, alloc_size, LIBXSMM_ATOMIC_RELAXED);
1874               flags |= LIBXSMM_MALLOC_FLAG_PLOCK;
1875             }
1876             else { /* update watermark */
1877               INTERNAL_XMALLOC_WATERMARK("locked-page", internal_malloc_plocked, limit_plocked, alloc_size);
1878             }
1879           }
1880 #   endif
1881 # endif
1882         }
1883         else { /* executable buffer requested */
1884           static /*LIBXSMM_TLS*/ int fallback = -1; /* fall-back allocation method */
1885 # if defined(MAP_HUGETLB)
1886           LIBXSMM_ASSERT(0 == (MAP_HUGETLB & mflags));
1887 # endif
1888 # if defined(MAP_LOCKED)
1889           LIBXSMM_ASSERT(0 == (MAP_LOCKED & mflags));
1890 # endif
1891           if (0 > (int)LIBXSMM_ATOMIC_LOAD(&fallback, LIBXSMM_ATOMIC_RELAXED)) {
1892             const char *const env = getenv("LIBXSMM_SE");
1893             LIBXSMM_ATOMIC_STORE(&fallback, NULL == env
1894               /* libxsmm_se decides */
1895               ? (0 == libxsmm_se ? LIBXSMM_MALLOC_FINAL : LIBXSMM_MALLOC_FALLBACK)
1896               /* user's choice takes precedence */
1897               : ('0' != *env ? LIBXSMM_MALLOC_FALLBACK : LIBXSMM_MALLOC_FINAL),
1898               LIBXSMM_ATOMIC_SEQ_CST);
1899             LIBXSMM_ASSERT(0 <= fallback);
1900           }
1901           INTERNAL_XMALLOC(0, fallback, "TMPDIR", "/tmp", map32, mflags, alloc_size, buffer, &reloc); /* 1st try */
1902           if (1 <= fallback) { /* continue with fall-back */
1903             INTERNAL_XMALLOC(1, fallback, "JITDUMPDIR", "", map32, mflags, alloc_size, buffer, &reloc); /* 2nd try */
1904             if (2 <= fallback) { /* continue with fall-back */
1905               INTERNAL_XMALLOC(2, fallback, "HOME", "", map32, mflags, alloc_size, buffer, &reloc); /* 3rd try */
1906               if (3 <= fallback) { /* continue with fall-back */
1907                 if (3 == fallback) { /* 4th try */
1908                   buffer = mmap(reloc, alloc_size, PROT_READ | PROT_WRITE | PROT_EXEC,
1909 # if defined(MAP_32BIT)
1910                     MAP_PRIVATE | LIBXSMM_MAP_ANONYMOUS | (mflags & ~MAP_32BIT),
1911 # else
1912                     MAP_PRIVATE | LIBXSMM_MAP_ANONYMOUS | mflags,
1913 # endif
1914                     -1, 0/*offset*/);
1915                   if (MAP_FAILED == buffer) fallback = 4;
1916                 }
1917                 if (4 == fallback && MAP_FAILED != buffer) { /* final */
1918                   LIBXSMM_ASSERT(fallback == LIBXSMM_MALLOC_FINAL + 1);
1919                   buffer = MAP_FAILED; /* trigger final fall-back */
1920                 }
1921               }
1922             }
1923           }
1924         }
1925         if (MAP_FAILED != buffer && NULL != buffer) {
1926           flags |= LIBXSMM_MALLOC_FLAG_MMAP; /* select deallocation */
1927         }
1928         else { /* allocation failed */
1929           if (0 == (LIBXSMM_MALLOC_FLAG_MMAP & flags)) { /* ultimate fall-back */
1930             buffer = (NULL != malloc_fn.function
1931               ? (NULL == context ? malloc_fn.function(alloc_size) : malloc_fn.ctx_form(alloc_size, context))
1932               : (NULL));
1933           }
1934           reloc = NULL;
1935         }
1936         if (MAP_FAILED != buffer && NULL != buffer) {
1937           internal_xmalloc_mhint(buffer, alloc_size);
1938         }
1939 #endif /* !defined(_WIN32) */
1940       }
1941       else { /* reallocation of the same pointer and size */
1942         alloc_size = size + extra_size + sizeof(internal_malloc_info_type) + alignment - 1;
1943         if (NULL != info) {
1944           buffer = info->pointer;
1945           flags |= info->flags;
1946         }
1947         else {
1948           flags |= LIBXSMM_MALLOC_FLAG_MMAP;
1949           buffer = *memory;
1950         }
1951         alloc_alignment = alignment;
1952         *memory = NULL; /* signal no-copy */
1953       }
1954       if (
1955 #if !defined(_WIN32) && !defined(__clang_analyzer__)
1956         MAP_FAILED != buffer &&
1957 #endif
1958         NULL != buffer)
1959       {
1960         char *const cbuffer = (char*)buffer, *const aligned = LIBXSMM_ALIGN(
1961           cbuffer + extra_size + sizeof(internal_malloc_info_type), alloc_alignment);
1962         internal_malloc_info_type *const buffer_info = (internal_malloc_info_type*)(
1963           aligned - sizeof(internal_malloc_info_type));
1964         LIBXSMM_ASSERT((aligned + size) <= (cbuffer + alloc_size));
1965         LIBXSMM_ASSERT(0 < alloc_alignment);
1966         /* former content must be preserved prior to setup of buffer_info */
1967         if (NULL != *memory) { /* preserve/copy previous content */
1968 #if 0
1969           LIBXSMM_ASSERT(0 != (LIBXSMM_MALLOC_FLAG_REALLOC & flags));
1970 #endif
1971           /* content behind foreign pointers is not explicitly preserved; buffers may overlap */
1972           memmove(aligned, *memory, LIBXSMM_MIN(max_preserve, size));
1973           if (NULL != info /* known allocation (non-foreign pointer) */
1974             && EXIT_SUCCESS != internal_xfree(*memory, info) /* !libxsmm_free */
1975             && 0 != libxsmm_verbosity /* library code is expected to be mute */
1976             && 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
1977           { /* display some extra context of the failure (reallocation) */
1978             fprintf(stderr, "LIBXSMM ERROR: memory reallocation failed to release memory!\n");
1979           }
1980         }
1981         if (NULL != extra || 0 == extra_size) {
1982           const char *const src = (const char*)extra;
1983           int i; for (i = 0; i < (int)extra_size; ++i) cbuffer[i] = src[i];
1984         }
1985         else if (0 != libxsmm_verbosity /* library code is expected to be mute */
1986           && 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
1987         {
1988           fprintf(stderr, "LIBXSMM ERROR: incorrect extraneous data specification!\n");
1989           /* no EXIT_FAILURE because valid buffer is returned */
1990         }
1991         if (0 == (LIBXSMM_MALLOC_FLAG_X & flags)) { /* update statistics */
1992           if (0 == (LIBXSMM_MALLOC_FLAG_PRIVATE & flags)) { /* public */
1993             if (0 != (LIBXSMM_MALLOC_FLAG_SCRATCH & flags)) { /* scratch */
1994               const size_t watermark = LIBXSMM_ATOMIC(LIBXSMM_ATOMIC_ADD_FETCH, LIBXSMM_BITS)(
1995                 &internal_malloc_public_cur, alloc_size, LIBXSMM_ATOMIC_RELAXED);
1996               if (internal_malloc_public_max < watermark) internal_malloc_public_max = watermark; /* accept data-race */
1997             }
1998             else { /* local */
1999               const size_t watermark = LIBXSMM_ATOMIC(LIBXSMM_ATOMIC_ADD_FETCH, LIBXSMM_BITS)(
2000                 &internal_malloc_local_cur, alloc_size, LIBXSMM_ATOMIC_RELAXED);
2001               if (internal_malloc_local_max < watermark) internal_malloc_local_max = watermark; /* accept data-race */
2002             }
2003           }
2004           else { /* private */
2005             const size_t watermark = LIBXSMM_ATOMIC(LIBXSMM_ATOMIC_ADD_FETCH, LIBXSMM_BITS)(
2006               &internal_malloc_private_cur, alloc_size, LIBXSMM_ATOMIC_RELAXED);
2007             if (internal_malloc_private_max < watermark) internal_malloc_private_max = watermark; /* accept data-race */
2008           }
2009         }
2010         /* keep allocation function on record */
2011         if (0 == (LIBXSMM_MALLOC_FLAG_MMAP & flags)) {
2012           buffer_info->context = context;
2013           buffer_info->free = free_fn;
2014         }
2015         else {
2016           buffer_info->free.function = NULL;
2017           buffer_info->context = NULL;
2018         }
2019         buffer_info->size = size; /* record user's size rather than allocated size */
2020         buffer_info->pointer = buffer;
2021         buffer_info->reloc = reloc;
2022         buffer_info->flags = flags;
2023 #if defined(LIBXSMM_VTUNE)
2024         buffer_info->code_id = 0;
2025 #endif /* info must be initialized to calculate correct checksum */
2026 #if !defined(LIBXSMM_MALLOC_CRC_OFF)
2027 # if defined(LIBXSMM_MALLOC_CRC_LIGHT)
2028         buffer_info->hash = LIBXSMM_CRC32U(LIBXSMM_BITS)(LIBXSMM_MALLOC_SEED, &buffer_info);
2029 # else
2030         buffer_info->hash = libxsmm_crc32(LIBXSMM_MALLOC_SEED, buffer_info,
2031           (unsigned int)(((char*)&buffer_info->hash) - ((char*)buffer_info)));
2032 # endif
2033 #endif  /* finally commit/return allocated buffer */
2034         *memory = aligned;
2035       }
2036       else {
2037         if (0 != libxsmm_verbosity /* library code is expected to be mute */
2038          && 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
2039         {
2040           char alloc_size_buffer[32];
2041           libxsmm_format_size(alloc_size_buffer, sizeof(alloc_size_buffer), alloc_size, "KM", "B", 10);
2042           fprintf(stderr, "LIBXSMM ERROR: failed to allocate %s with flag=%i!\n", alloc_size_buffer, flags);
2043         }
2044         result = EXIT_FAILURE;
2045         *memory = NULL;
2046       }
2047     }
2048     else {
2049       if ((LIBXSMM_VERBOSITY_HIGH <= libxsmm_verbosity || 0 > libxsmm_verbosity) /* library code is expected to be mute */
2050         && 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
2051       {
2052         fprintf(stderr, "LIBXSMM WARNING: zero-sized memory allocation detected!\n");
2053       }
2054       *memory = NULL; /* no EXIT_FAILURE */
2055     }
2056   }
2057 #if !defined(NDEBUG)
2058   else if (0 != size) {
2059     result = EXIT_FAILURE;
2060   }
2061 #endif
2062   return result;
2063 }
2064 
2065 
libxsmm_xfree(const void * memory,int check)2066 LIBXSMM_API_INTERN void libxsmm_xfree(const void* memory, int check)
2067 {
2068 #if (!defined(LIBXSMM_MALLOC_HOOK_STATIC) && !defined(LIBXSMM_MALLOC_HOOK_DYNAMIC)) || defined(_DEBUG)
2069   static int error_once = 0;
2070 #endif
2071   /*const*/ internal_malloc_info_type *const info = internal_malloc_info(memory, check);
2072   if (NULL != info) { /* !libxsmm_free */
2073 #if (!defined(LIBXSMM_MALLOC_HOOK_STATIC) && !defined(LIBXSMM_MALLOC_HOOK_DYNAMIC)) || defined(_DEBUG)
2074     if (EXIT_SUCCESS != internal_xfree(memory, info)) {
2075       if ( 0 != libxsmm_verbosity /* library code is expected to be mute */
2076         && 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
2077       {
2078         fprintf(stderr, "LIBXSMM ERROR: memory deallocation failed!\n");
2079       }
2080     }
2081 #else
2082     internal_xfree(memory, info);
2083 #endif
2084   }
2085   else if (NULL != memory) {
2086 #if 1
2087     union { const void* const_ptr; void* ptr; } cast;
2088     cast.const_ptr = memory; /* C-cast still warns */
2089     __real_free(cast.ptr);
2090 #endif
2091 #if (!defined(LIBXSMM_MALLOC_HOOK_STATIC) && !defined(LIBXSMM_MALLOC_HOOK_DYNAMIC)) || defined(_DEBUG)
2092     if ( 0 != libxsmm_verbosity /* library code is expected to be mute */
2093       && 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
2094     {
2095       fprintf(stderr, "LIBXSMM ERROR: deallocation does not match allocation!\n");
2096     }
2097 #endif
2098   }
2099 }
2100 
2101 
2102 #if defined(LIBXSMM_VTUNE)
internal_get_vtune_jitdesc(const void * code,unsigned int code_id,size_t code_size,const char * code_name,LIBXSMM_VTUNE_JIT_DESC_TYPE * desc)2103 LIBXSMM_API_INLINE void internal_get_vtune_jitdesc(const void* code,
2104   unsigned int code_id, size_t code_size, const char* code_name,
2105   LIBXSMM_VTUNE_JIT_DESC_TYPE* desc)
2106 {
2107   LIBXSMM_ASSERT(NULL != code && 0 != code_id && 0 != code_size && NULL != desc);
2108   desc->method_id = code_id;
2109   /* incorrect constness (method_name) */
2110   desc->method_name = (char*)code_name;
2111   /* incorrect constness (method_load_address) */
2112   desc->method_load_address = (void*)code;
2113   desc->method_size = code_size;
2114   desc->line_number_size = 0;
2115   desc->line_number_table = NULL;
2116   desc->class_file_name = NULL;
2117   desc->source_file_name = NULL;
2118 # if (2 <= LIBXSMM_VTUNE_JITVERSION)
2119   desc->module_name = "libxsmm.jit";
2120 # endif
2121 }
2122 #endif
2123 
2124 
libxsmm_malloc_attrib(void ** memory,int flags,const char * name)2125 LIBXSMM_API_INTERN int libxsmm_malloc_attrib(void** memory, int flags, const char* name)
2126 {
2127   internal_malloc_info_type *const info = (NULL != memory ? internal_malloc_info(*memory, 0/*no check*/) : NULL);
2128   int result = EXIT_SUCCESS;
2129   static int error_once = 0;
2130   if (NULL != info) {
2131     void *const buffer = info->pointer;
2132     const size_t size = info->size;
2133 #if defined(_WIN32)
2134     LIBXSMM_ASSERT(NULL != buffer || 0 == size);
2135 #else
2136     LIBXSMM_ASSERT((NULL != buffer && MAP_FAILED != buffer) || 0 == size);
2137 #endif
2138     flags |= (info->flags & ~LIBXSMM_MALLOC_FLAG_RWX); /* merge with current flags */
2139     /* quietly keep the read permission, but eventually revoke write permissions */
2140     if (0 == (LIBXSMM_MALLOC_FLAG_W & flags) || 0 != (LIBXSMM_MALLOC_FLAG_X & flags)) {
2141       const size_t alignment = (size_t)(((const char*)(*memory)) - ((const char*)buffer));
2142       const size_t alloc_size = size + alignment;
2143       if (0 == (LIBXSMM_MALLOC_FLAG_X & flags)) { /* data-buffer; non-executable */
2144 #if defined(_WIN32)
2145         /* TODO: implement memory protection under Microsoft Windows */
2146         LIBXSMM_UNUSED(alloc_size);
2147 #else
2148         if (EXIT_SUCCESS != mprotect(buffer, alloc_size/*entire memory region*/, PROT_READ)
2149           && (LIBXSMM_VERBOSITY_HIGH <= libxsmm_verbosity || 0 > libxsmm_verbosity) /* library code is expected to be mute */
2150           && 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
2151         {
2152           fprintf(stderr, "LIBXSMM WARNING: read-only request for buffer failed!\n");
2153         }
2154 #endif
2155       }
2156       else { /* executable buffer requested */
2157         void *const code_ptr = NULL != info->reloc ? ((void*)(((char*)info->reloc) + alignment)) : *memory;
2158         LIBXSMM_ASSERT(0 != (LIBXSMM_MALLOC_FLAG_X & flags));
2159         if (name && *name) { /* profiler support requested */
2160           if (0 > libxsmm_verbosity) { /* avoid dump when only the profiler is enabled */
2161             FILE* code_file = fopen(name, "rb");
2162             int diff = 0;
2163             if (NULL == code_file) { /* file does not exist */
2164               code_file = fopen(name, "wb");
2165               if (NULL != code_file) { /* dump byte-code into a file */
2166                 fwrite(code_ptr, 1, size, code_file);
2167                 fclose(code_file);
2168               }
2169             }
2170             else { /* check existing file */
2171               const char* check_a = (const char*)code_ptr;
2172               char check_b[4096];
2173               size_t rest = size;
2174               do {
2175                 const size_t n = fread(check_b, 1, LIBXSMM_MIN(sizeof(check_b), rest), code_file);
2176                 diff += memcmp(check_a, check_b, LIBXSMM_MIN(sizeof(check_b), n));
2177                 check_a += n;
2178                 rest -= n;
2179               } while (0 < rest && 0 == diff);
2180               fclose(code_file);
2181             }
2182             fprintf(stderr, "LIBXSMM-JIT-DUMP(ptr:file) %p : %s\n", code_ptr, name);
2183             if (0 != diff) { /* override existing dump and warn about erroneous condition */
2184               fprintf(stderr, "LIBXSMM ERROR: %s is shared by different code!\n", name);
2185               code_file = fopen(name, "wb");
2186               if (NULL != code_file) { /* dump byte-code into a file */
2187                 fwrite(code_ptr, 1, size, code_file);
2188                 fclose(code_file);
2189               }
2190             }
2191           }
2192 #if defined(LIBXSMM_VTUNE)
2193           if (iJIT_SAMPLING_ON == iJIT_IsProfilingActive()) {
2194             LIBXSMM_VTUNE_JIT_DESC_TYPE vtune_jit_desc;
2195             const unsigned int code_id = iJIT_GetNewMethodID();
2196             internal_get_vtune_jitdesc(code_ptr, code_id, size, name, &vtune_jit_desc);
2197             iJIT_NotifyEvent(LIBXSMM_VTUNE_JIT_LOAD, &vtune_jit_desc);
2198             info->code_id = code_id;
2199           }
2200           else {
2201             info->code_id = 0;
2202           }
2203 #endif
2204 #if defined(LIBXSMM_PERF)
2205           /* If JIT is enabled and a valid name is given, emit information for profiler
2206            * In jitdump case this needs to be done after mprotect as it gets overwritten
2207            * otherwise. */
2208           libxsmm_perf_dump_code(code_ptr, size, name);
2209 #endif
2210         }
2211         if (NULL != info->reloc && info->pointer != info->reloc) {
2212 #if defined(_WIN32)
2213           /* TODO: implement memory protection under Microsoft Windows */
2214 #else
2215           /* memory is already protected at this point; relocate code */
2216           LIBXSMM_ASSERT(0 != (LIBXSMM_MALLOC_FLAG_MMAP & flags));
2217           *memory = code_ptr; /* relocate */
2218           info->pointer = info->reloc;
2219           info->reloc = NULL;
2220 # if !defined(LIBXSMM_MALLOC_CRC_OFF) /* update checksum */
2221 #   if defined(LIBXSMM_MALLOC_CRC_LIGHT)
2222           { const internal_malloc_info_type *const code_info = internal_malloc_info(code_ptr, 0/*no check*/);
2223             info->hash = LIBXSMM_CRC32U(LIBXSMM_BITS)(LIBXSMM_MALLOC_SEED, &code_info);
2224           }
2225 #   else
2226           info->hash = libxsmm_crc32(LIBXSMM_MALLOC_SEED, info,
2227             /* info size minus actual hash value */
2228             (unsigned int)(((char*)&info->hash) - ((char*)info)));
2229 #   endif
2230 # endif   /* treat memory protection errors as soft error; ignore return value */
2231           munmap(buffer, alloc_size);
2232 #endif
2233         }
2234 #if !defined(_WIN32)
2235         else { /* malloc-based fall-back */
2236           int mprotect_result;
2237 # if !defined(LIBXSMM_MALLOC_CRC_OFF) && defined(LIBXSMM_VTUNE) /* check checksum */
2238 #   if defined(LIBXSMM_MALLOC_CRC_LIGHT)
2239           assert(info->hash == LIBXSMM_CRC32U(LIBXSMM_BITS)(LIBXSMM_MALLOC_SEED, &info)); /* !LIBXSMM_ASSERT */
2240 #   else
2241           assert(info->hash == libxsmm_crc32(LIBXSMM_MALLOC_SEED, info, /* !LIBXSMM_ASSERT */
2242             /* info size minus actual hash value */
2243             (unsigned int)(((char*)&info->hash) - ((char*)info))));
2244 #   endif
2245 # endif   /* treat memory protection errors as soft error; ignore return value */
2246           mprotect_result = mprotect(buffer, alloc_size/*entire memory region*/, PROT_READ | PROT_EXEC);
2247           if (EXIT_SUCCESS != mprotect_result) {
2248             if (0 != libxsmm_se) { /* hard-error in case of SELinux */
2249               if (0 != libxsmm_verbosity /* library code is expected to be mute */
2250                 && 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
2251               {
2252                 fprintf(stderr, "LIBXSMM ERROR: failed to allocate an executable buffer!\n");
2253               }
2254               result = mprotect_result;
2255             }
2256             else if ((LIBXSMM_VERBOSITY_HIGH <= libxsmm_verbosity || 0 > libxsmm_verbosity) /* library code is expected to be mute */
2257               && 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
2258             {
2259               fprintf(stderr, "LIBXSMM WARNING: read-only request for JIT-buffer failed!\n");
2260             }
2261           }
2262         }
2263 #endif
2264       }
2265     }
2266   }
2267   else if (NULL == memory || NULL == *memory) {
2268     if (0 != libxsmm_verbosity /* library code is expected to be mute */
2269      && 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
2270     {
2271       fprintf(stderr, "LIBXSMM ERROR: libxsmm_malloc_attrib failed because NULL cannot be attributed!\n");
2272     }
2273     result = EXIT_FAILURE;
2274   }
2275   else if ((LIBXSMM_VERBOSITY_WARN <= libxsmm_verbosity || 0 > libxsmm_verbosity)
2276     && 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
2277   {
2278     fprintf(stderr, "LIBXSMM WARNING: %s buffer %p does not match!\n",
2279       0 != (LIBXSMM_MALLOC_FLAG_X & flags) ? "executable" : "memory", *memory);
2280   }
2281   return result;
2282 }
2283 
2284 
libxsmm_aligned_malloc(size_t size,size_t alignment)2285 LIBXSMM_API LIBXSMM_ATTRIBUTE_MALLOC void* libxsmm_aligned_malloc(size_t size, size_t alignment)
2286 {
2287   void* result = NULL;
2288   LIBXSMM_INIT
2289   if (2 > internal_malloc_kind) {
2290 #if !defined(NDEBUG)
2291     int status =
2292 #endif
2293     libxsmm_xmalloc(&result, size, alignment, LIBXSMM_MALLOC_FLAG_DEFAULT, NULL/*extra*/, 0/*extra_size*/);
2294     assert(EXIT_SUCCESS == status || NULL == result); /* !LIBXSMM_ASSERT */
2295   }
2296   else { /* scratch */
2297     const void *const caller = libxsmm_trace_caller_id(0/*level*/);
2298     internal_scratch_malloc(&result, size, alignment, LIBXSMM_MALLOC_FLAG_DEFAULT, caller);
2299   }
2300   return result;
2301 }
2302 
2303 
libxsmm_realloc(size_t size,void * ptr)2304 LIBXSMM_API void* libxsmm_realloc(size_t size, void* ptr)
2305 {
2306   const int nzeros = LIBXSMM_INTRINSICS_BITSCANFWD64((uintptr_t)ptr), alignment = 1 << nzeros;
2307   LIBXSMM_ASSERT(0 == ((uintptr_t)ptr & ~(0xFFFFFFFFFFFFFFFF << nzeros)));
2308   LIBXSMM_INIT
2309   if (2 > internal_malloc_kind) {
2310 #if !defined(NDEBUG)
2311     int status =
2312 #endif
2313     libxsmm_xmalloc(&ptr, size, alignment, LIBXSMM_MALLOC_FLAG_REALLOC, NULL/*extra*/, 0/*extra_size*/);
2314     assert(EXIT_SUCCESS == status || NULL == ptr); /* !LIBXSMM_ASSERT */
2315   }
2316   else { /* scratch */
2317     const void *const caller = libxsmm_trace_caller_id(0/*level*/);
2318     internal_scratch_malloc(&ptr, size, alignment, LIBXSMM_MALLOC_FLAG_REALLOC, caller);
2319   }
2320   return ptr;
2321 }
2322 
2323 
libxsmm_scratch_malloc(size_t size,size_t alignment,const void * caller)2324 LIBXSMM_API void* libxsmm_scratch_malloc(size_t size, size_t alignment, const void* caller)
2325 {
2326   void* result;
2327   LIBXSMM_INIT
2328   internal_scratch_malloc(&result, size, alignment,
2329     LIBXSMM_MALLOC_INTERNAL_CALLER != caller ? LIBXSMM_MALLOC_FLAG_DEFAULT : LIBXSMM_MALLOC_FLAG_PRIVATE,
2330     caller);
2331   return result;
2332 }
2333 
2334 
libxsmm_malloc(size_t size)2335 LIBXSMM_API LIBXSMM_ATTRIBUTE_MALLOC void* libxsmm_malloc(size_t size)
2336 {
2337   return libxsmm_aligned_malloc(size, 0/*auto*/);
2338 }
2339 
2340 
libxsmm_free(const void * memory)2341 LIBXSMM_API void libxsmm_free(const void* memory)
2342 {
2343   if (NULL != memory) {
2344 #if defined(LIBXSMM_MALLOC_SCRATCH_DELETE_FIRST) || /* prefer safe method if possible */ \
2345   (!defined(LIBXSMM_MALLOC_HOOK_STATIC) && !defined(LIBXSMM_MALLOC_HOOK_DYNAMIC))
2346 # if defined(LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS) && (0 < (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS))
2347     internal_malloc_pool_type *const pool = internal_scratch_malloc_pool(memory);
2348     if (NULL != pool) { /* memory belongs to scratch domain */
2349       internal_scratch_free(memory, pool);
2350     }
2351     else
2352 # endif
2353     { /* local */
2354       libxsmm_xfree(memory, 2/*check*/);
2355     }
2356 #else /* lookup matching pool */
2357     internal_malloc_info_type *const info = internal_malloc_info(memory, 2/*check*/);
2358     static int error_once = 0;
2359     if (NULL != info && 0 == (LIBXSMM_MALLOC_FLAG_SCRATCH & info->flags)) { /* !libxsmm_free */
2360 # if !defined(NDEBUG)
2361       if (EXIT_SUCCESS != internal_xfree(memory, info)
2362         && 0 != libxsmm_verbosity /* library code is expected to be mute */
2363         && 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
2364       {
2365         fprintf(stderr, "LIBXSMM ERROR: memory deallocation failed!\n");
2366       }
2367 # else
2368       internal_xfree(memory, info); /* !libxsmm_free */
2369 # endif
2370     }
2371     else {
2372 # if defined(LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS) && (0 < (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS))
2373       internal_malloc_pool_type *const pool = internal_scratch_malloc_pool(memory);
2374       if (NULL != pool) { /* memory belongs to scratch domain */
2375         internal_scratch_free(memory, pool);
2376       }
2377       else
2378 # endif
2379       {
2380 # if defined(NDEBUG) && (defined(LIBXSMM_MALLOC_HOOK_STATIC) || defined(LIBXSMM_MALLOC_HOOK_DYNAMIC))
2381         __real_free((void*)memory);
2382 # else
2383 #   if (defined(LIBXSMM_MALLOC_HOOK_STATIC) || defined(LIBXSMM_MALLOC_HOOK_DYNAMIC))
2384         __real_free((void*)memory);
2385 #   endif
2386         if (0 != libxsmm_verbosity && /* library code is expected to be mute */
2387             1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
2388         {
2389           fprintf(stderr, "LIBXSMM ERROR: deallocation does not match allocation!\n");
2390         }
2391 # endif
2392       }
2393     }
2394 #endif
2395   }
2396 }
2397 
2398 
libxsmm_xrelease_scratch(LIBXSMM_LOCK_TYPE (LIBXSMM_LOCK)* lock)2399 LIBXSMM_API_INTERN void libxsmm_xrelease_scratch(LIBXSMM_LOCK_TYPE(LIBXSMM_LOCK)* lock)
2400 {
2401 #if defined(LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS) && (0 < (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS))
2402   internal_malloc_pool_type* pools = NULL;
2403   libxsmm_scratch_info scratch_info;
2404   LIBXSMM_ASSERT(libxsmm_scratch_pools <= LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS);
2405   if (NULL != lock) {
2406     LIBXSMM_LOCK_ACQUIRE(LIBXSMM_LOCK, lock);
2407   }
2408 # if defined(LIBXSMM_MALLOC_DELETE_SAFE)
2409   if (0 == (internal_malloc_kind & 1) || 0 >= internal_malloc_kind)
2410 # endif
2411   {
2412     unsigned int i;
2413     pools = (internal_malloc_pool_type*)LIBXSMM_UP2(
2414       (uintptr_t)internal_malloc_pool_buffer, LIBXSMM_MALLOC_SCRATCH_PADDING);
2415     for (i = 0; i < libxsmm_scratch_pools; ++i) {
2416       if (0 != pools[i].instance.minsize) {
2417         if (
2418 # if !defined(LIBXSMM_MALLOC_SCRATCH_DELETE_FIRST)
2419           1 < pools[i].instance.counter &&
2420 # endif
2421           NULL != pools[i].instance.buffer)
2422         {
2423           internal_malloc_info_type* const info = internal_malloc_info(pools[i].instance.buffer, 2/*check*/);
2424           if (NULL != info) internal_xfree(info->pointer, info);
2425         }
2426       }
2427       else break; /* early exit */
2428     }
2429   }
2430   LIBXSMM_EXPECT(EXIT_SUCCESS, libxsmm_get_scratch_info(&scratch_info));
2431   if (0 != scratch_info.npending && /* library code is expected to be mute */
2432     (LIBXSMM_VERBOSITY_WARN <= libxsmm_verbosity || 0 > libxsmm_verbosity))
2433   {
2434     char pending_size_buffer[32];
2435     libxsmm_format_size(pending_size_buffer, sizeof(pending_size_buffer),
2436       internal_malloc_public_cur + internal_malloc_local_cur, "KM", "B", 10);
2437     fprintf(stderr, "LIBXSMM WARNING: %s pending scratch-memory by %" PRIuPTR " allocation%s!\n",
2438       pending_size_buffer, (uintptr_t)scratch_info.npending, 1 < scratch_info.npending ? "s" : "");
2439   }
2440   if (NULL != pools) {
2441     memset(pools, 0, (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS) * sizeof(internal_malloc_pool_type));
2442     /* no reset: keep private watermark (internal_malloc_private_max, internal_malloc_private_cur) */
2443     internal_malloc_public_max = internal_malloc_public_cur = 0;
2444     internal_malloc_local_max = internal_malloc_local_cur = 0;
2445     internal_malloc_scratch_nmallocs = 0;
2446   }
2447   if (NULL != lock) {
2448     LIBXSMM_LOCK_RELEASE(LIBXSMM_LOCK, lock);
2449   }
2450 #endif
2451 }
2452 
2453 
libxsmm_release_scratch(void)2454 LIBXSMM_API void libxsmm_release_scratch(void)
2455 {
2456   libxsmm_xrelease_scratch(&libxsmm_lock_global);
2457 }
2458 
2459 
libxsmm_get_malloc_info(const void * memory,libxsmm_malloc_info * info)2460 LIBXSMM_API int libxsmm_get_malloc_info(const void* memory, libxsmm_malloc_info* info)
2461 {
2462   int result = EXIT_SUCCESS;
2463   if (NULL != info) {
2464     size_t size;
2465     result = libxsmm_get_malloc_xinfo(memory, &size, NULL/*flags*/, NULL/*extra*/);
2466     LIBXSMM_MEMZERO127(info);
2467     if (EXIT_SUCCESS == result) {
2468       info->size = size;
2469     }
2470 #if !defined(NDEBUG) /* library code is expected to be mute */
2471     else if (LIBXSMM_VERBOSITY_WARN <= libxsmm_verbosity || 0 > libxsmm_verbosity) {
2472       static int error_once = 0;
2473       if (1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED)) {
2474         fprintf(stderr, "LIBXSMM WARNING: foreign memory buffer %p discovered!\n", memory);
2475       }
2476     }
2477 #endif
2478   }
2479   else {
2480     result = EXIT_FAILURE;
2481   }
2482   return result;
2483 }
2484 
2485 
libxsmm_get_scratch_info(libxsmm_scratch_info * info)2486 LIBXSMM_API int libxsmm_get_scratch_info(libxsmm_scratch_info* info)
2487 {
2488   int result = EXIT_SUCCESS;
2489   if (NULL != info) {
2490 #if defined(LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS) && (0 < (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS))
2491     LIBXSMM_MEMZERO127(info);
2492     info->nmallocs = internal_malloc_scratch_nmallocs;
2493     info->internal = internal_malloc_private_max;
2494     info->local = internal_malloc_local_max;
2495     info->size = internal_malloc_public_max;
2496     { const internal_malloc_pool_type* pool = (const internal_malloc_pool_type*)LIBXSMM_UP2(
2497         (uintptr_t)internal_malloc_pool_buffer, LIBXSMM_MALLOC_SCRATCH_PADDING);
2498 # if (1 < (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS))
2499       const internal_malloc_pool_type *const end = pool + libxsmm_scratch_pools;
2500       LIBXSMM_ASSERT(libxsmm_scratch_pools <= LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS);
2501       for (; pool != end; ++pool) if ((LIBXSMM_MALLOC_INTERNAL_CALLER) != pool->instance.site) {
2502 # endif
2503         if (0 != pool->instance.minsize) {
2504           const size_t npending = pool->instance.counter;
2505 # if defined(LIBXSMM_MALLOC_SCRATCH_DELETE_FIRST)
2506           info->npending += npending;
2507 # else
2508           info->npending += 1 < npending ? (npending - 1) : 0;
2509 # endif
2510           ++info->npools;
2511         }
2512 # if (1 < (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS))
2513         else break; /* early exit */
2514       }
2515 # endif
2516     }
2517 #else
2518     LIBXSMM_MEMZERO127(info);
2519 #endif /*defined(LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS) && (0 < (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS))*/
2520   }
2521   else {
2522     result = EXIT_FAILURE;
2523   }
2524   return result;
2525 }
2526 
2527 
libxsmm_set_scratch_limit(size_t nbytes)2528 LIBXSMM_API void libxsmm_set_scratch_limit(size_t nbytes)
2529 {
2530   /* !LIBXSMM_INIT */
2531   internal_malloc_scratch_limit = nbytes;
2532 }
2533 
2534 
libxsmm_get_scratch_limit(void)2535 LIBXSMM_API size_t libxsmm_get_scratch_limit(void)
2536 {
2537   size_t result;
2538   /* !LIBXSMM_INIT */
2539   if (LIBXSMM_SCRATCH_DEFAULT != internal_malloc_scratch_limit) {
2540     result = internal_malloc_scratch_limit;
2541   }
2542   else if (0 == internal_malloc_kind) {
2543     result = LIBXSMM_MALLOC_SCRATCH_LIMIT;
2544   }
2545   else {
2546     result = LIBXSMM_SCRATCH_UNLIMITED;
2547   }
2548   return result;
2549 }
2550 
2551 
libxsmm_set_malloc(int enabled,const size_t * lo,const size_t * hi)2552 LIBXSMM_API void libxsmm_set_malloc(int enabled, const size_t* lo, const size_t* hi)
2553 {
2554   /* !LIBXSMM_INIT */
2555 #if !(defined(LIBXSMM_MALLOC_HOOK_DYNAMIC) || defined(LIBXSMM_INTERCEPT_DYNAMIC))
2556   LIBXSMM_UNUSED(enabled);
2557   internal_malloc_kind = 0;
2558 #elif defined(LIBXSMM_MALLOC) && (0 < LIBXSMM_MALLOC)
2559   LIBXSMM_UNUSED(enabled);
2560   internal_malloc_kind = LIBXSMM_MALLOC;
2561 #else
2562   internal_malloc_kind = enabled;
2563 #endif
2564   /* setup lo/hi after internal_malloc_kind! */
2565   if (NULL != lo) internal_malloc_limit[0] = *lo;
2566   if (NULL != hi) {
2567     const size_t scratch_limit = libxsmm_get_scratch_limit();
2568     const size_t malloc_upper = LIBXSMM_MIN(*hi, scratch_limit);
2569     internal_malloc_limit[1] = LIBXSMM_MAX(malloc_upper, internal_malloc_limit[0]);
2570   }
2571   libxsmm_malloc_init();
2572 }
2573 
2574 
libxsmm_get_malloc(size_t * lo,size_t * hi)2575 LIBXSMM_API int libxsmm_get_malloc(size_t* lo, size_t* hi)
2576 {
2577   int result;
2578   LIBXSMM_INIT
2579   if (NULL != lo) *lo = internal_malloc_limit[0];
2580   if (NULL != hi) *hi = internal_malloc_limit[1];
2581 #if (defined(LIBXSMM_MALLOC_HOOK_DYNAMIC) || defined(LIBXSMM_INTERCEPT_DYNAMIC))
2582   result = 0 != (internal_malloc_kind & 1) && 0 < internal_malloc_kind;
2583 #else
2584   result = 0;
2585 #endif
2586   return result;
2587 }
2588 
2589