1 /******************************************************************************
2 * Copyright (c) Intel Corporation - All rights reserved. *
3 * This file is part of the LIBXSMM library. *
4 * *
5 * For information on the license, see the LICENSE file. *
6 * Further information: https://github.com/hfp/libxsmm/ *
7 * SPDX-License-Identifier: BSD-3-Clause *
8 ******************************************************************************/
9 /* Hans Pabst (Intel Corp.)
10 ******************************************************************************/
11 #include "libxsmm_trace.h"
12 #include "libxsmm_main.h"
13 #include "libxsmm_hash.h"
14
15 #if defined(LIBXSMM_OFFLOAD_TARGET)
16 # pragma offload_attribute(push,target(LIBXSMM_OFFLOAD_TARGET))
17 #endif
18 #if (defined(LIBXSMM_BUILD) && (1 < (LIBXSMM_BUILD)))
19 # include <features.h>
20 # include <malloc.h>
21 #endif
22 #if !defined(LIBXSMM_MALLOC_GLIBC)
23 # if defined(__GLIBC__)
24 # define LIBXSMM_MALLOC_GLIBC __GLIBC__
25 # else
26 # define LIBXSMM_MALLOC_GLIBC 6
27 # endif
28 #endif
29 #if defined(_WIN32)
30 # include <windows.h>
31 # include <malloc.h>
32 # include <intrin.h>
33 #else
34 # include <sys/mman.h>
35 # if defined(__linux__)
36 # include <linux/mman.h>
37 # include <sys/syscall.h>
38 # endif
39 # if defined(MAP_POPULATE)
40 # include <sys/utsname.h>
41 # endif
42 # include <sys/types.h>
43 # include <unistd.h>
44 # include <errno.h>
45 # if defined(__MAP_ANONYMOUS)
46 # define LIBXSMM_MAP_ANONYMOUS __MAP_ANONYMOUS
47 # elif defined(MAP_ANONYMOUS)
48 # define LIBXSMM_MAP_ANONYMOUS MAP_ANONYMOUS
49 # elif defined(MAP_ANON)
50 # define LIBXSMM_MAP_ANONYMOUS MAP_ANON
51 # else
52 # define LIBXSMM_MAP_ANONYMOUS 0x20
53 # endif
54 # if defined(MAP_SHARED) && 0
55 # define LIBXSMM_MAP_SHARED MAP_SHARED
56 # else
57 # define LIBXSMM_MAP_SHARED 0
58 # endif
59 LIBXSMM_EXTERN int ftruncate(int, off_t) LIBXSMM_THROW;
60 LIBXSMM_EXTERN int mkstemp(char*) LIBXSMM_NOTHROW;
61 #endif
62 #if !defined(LIBXSMM_MALLOC_FALLBACK)
63 # define LIBXSMM_MALLOC_FINAL 3
64 #endif
65 #if defined(LIBXSMM_VTUNE)
66 # if (2 <= LIBXSMM_VTUNE) /* no header file required */
67 # if !defined(LIBXSMM_VTUNE_JITVERSION)
68 # define LIBXSMM_VTUNE_JITVERSION LIBXSMM_VTUNE
69 # endif
70 # define LIBXSMM_VTUNE_JIT_DESC_TYPE iJIT_Method_Load_V2
71 # define LIBXSMM_VTUNE_JIT_LOAD 21
72 # define LIBXSMM_VTUNE_JIT_UNLOAD 14
73 # define iJIT_SAMPLING_ON 0x0001
74 LIBXSMM_EXTERN unsigned int iJIT_GetNewMethodID(void);
75 LIBXSMM_EXTERN /*iJIT_IsProfilingActiveFlags*/int iJIT_IsProfilingActive(void);
76 LIBXSMM_EXTERN int iJIT_NotifyEvent(/*iJIT_JVM_EVENT*/int event_type, void *EventSpecificData);
77 LIBXSMM_EXTERN_C typedef struct LineNumberInfo {
78 unsigned int Offset;
79 unsigned int LineNumber;
80 } LineNumberInfo;
81 LIBXSMM_EXTERN_C typedef struct iJIT_Method_Load_V2 {
82 unsigned int method_id;
83 char* method_name;
84 void* method_load_address;
85 unsigned int method_size;
86 unsigned int line_number_size;
87 LineNumberInfo* line_number_table;
88 char* class_file_name;
89 char* source_file_name;
90 char* module_name;
91 } iJIT_Method_Load_V2;
92 # else /* more safe due to header dependency */
93 # include <jitprofiling.h>
94 # if !defined(LIBXSMM_VTUNE_JITVERSION)
95 # define LIBXSMM_VTUNE_JITVERSION 2
96 # endif
97 # if (2 <= LIBXSMM_VTUNE_JITVERSION)
98 # define LIBXSMM_VTUNE_JIT_DESC_TYPE iJIT_Method_Load_V2
99 # define LIBXSMM_VTUNE_JIT_LOAD iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED_V2
100 # else
101 # define LIBXSMM_VTUNE_JIT_DESC_TYPE iJIT_Method_Load
102 # define LIBXSMM_VTUNE_JIT_LOAD iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED
103 # endif
104 # define LIBXSMM_VTUNE_JIT_UNLOAD iJVM_EVENT_TYPE_METHOD_UNLOAD_START
105 # endif
106 # if !defined(LIBXSMM_MALLOC_FALLBACK)
107 # define LIBXSMM_MALLOC_FALLBACK LIBXSMM_MALLOC_FINAL
108 # endif
109 #else
110 # if !defined(LIBXSMM_MALLOC_FALLBACK)
111 # define LIBXSMM_MALLOC_FALLBACK 0
112 # endif
113 #endif /*defined(LIBXSMM_VTUNE)*/
114 #if !defined(LIBXSMM_MALLOC_XMAP_TEMPLATE)
115 # define LIBXSMM_MALLOC_XMAP_TEMPLATE ".libxsmm_jit." LIBXSMM_MKTEMP_PATTERN
116 #endif
117 #if defined(LIBXSMM_OFFLOAD_TARGET)
118 # pragma offload_attribute(pop)
119 #endif
120 #if defined(LIBXSMM_PERF)
121 # include "libxsmm_perf.h"
122 #endif
123
124 #if !defined(LIBXSMM_MALLOC_ALIGNMAX)
125 # define LIBXSMM_MALLOC_ALIGNMAX (2 << 20) /* 2 MB */
126 #endif
127 #if !defined(LIBXSMM_MALLOC_ALIGNFCT)
128 # define LIBXSMM_MALLOC_ALIGNFCT 16
129 #endif
130 #if !defined(LIBXSMM_MALLOC_SEED)
131 # define LIBXSMM_MALLOC_SEED 1051981
132 #endif
133
134 #if !defined(LIBXSMM_MALLOC_HOOK_KMP) && 0
135 # define LIBXSMM_MALLOC_HOOK_KMP
136 #endif
137 #if !defined(LIBXSMM_MALLOC_HOOK_QKMALLOC) && 0
138 # define LIBXSMM_MALLOC_HOOK_QKMALLOC
139 #endif
140 #if !defined(LIBXSMM_MALLOC_HOOK_IMALLOC) && 1
141 # define LIBXSMM_MALLOC_HOOK_IMALLOC
142 #endif
143 #if !defined(LIBXSMM_MALLOC_HOOK_CHECK) && 0
144 # define LIBXSMM_MALLOC_HOOK_CHECK 1
145 #endif
146
147 #if !defined(LIBXSMM_MALLOC_CRC_LIGHT) && !defined(_DEBUG) && 1
148 # define LIBXSMM_MALLOC_CRC_LIGHT
149 #endif
150 #if !defined(LIBXSMM_MALLOC_CRC_OFF)
151 # if defined(NDEBUG) && !defined(LIBXSMM_MALLOC_HOOK_STATIC) && !defined(LIBXSMM_MALLOC_HOOK_DYNAMIC)
152 # define LIBXSMM_MALLOC_CRC_OFF
153 # elif !defined(LIBXSMM_BUILD)
154 # define LIBXSMM_MALLOC_CRC_OFF
155 # endif
156 #endif
157
158 #if !defined(LIBXSMM_MALLOC_SCRATCH_LIMIT)
159 # define LIBXSMM_MALLOC_SCRATCH_LIMIT 0xFFFFFFFF /* ~4 GB */
160 #endif
161 #if !defined(LIBXSMM_MALLOC_SCRATCH_PADDING)
162 # define LIBXSMM_MALLOC_SCRATCH_PADDING LIBXSMM_CACHELINE
163 #endif
164 /* pointers are checked first if they belong to scratch */
165 #if !defined(LIBXSMM_MALLOC_SCRATCH_DELETE_FIRST) && 1
166 # define LIBXSMM_MALLOC_SCRATCH_DELETE_FIRST
167 #endif
168 /* can clobber memory if allocations are not exactly scoped */
169 #if !defined(LIBXSMM_MALLOC_SCRATCH_TRIM_HEAD) && 0
170 # define LIBXSMM_MALLOC_SCRATCH_TRIM_HEAD
171 #endif
172 #if !defined(LIBXSMM_MALLOC_SCRATCH_JOIN) && 1
173 # define LIBXSMM_MALLOC_SCRATCH_JOIN
174 #endif
175 #if !defined(LIBXSMM_MALLOC_LOCK_ONFAULT) && 0
176 # if defined(MLOCK_ONFAULT) && defined(SYS_mlock2)
177 # define LIBXSMM_MALLOC_LOCK_ONFAULT
178 # endif
179 #endif
180 /* protected against double-delete (if possible) */
181 #if !defined(LIBXSMM_MALLOC_DELETE_SAFE) && 0
182 # define LIBXSMM_MALLOC_DELETE_SAFE
183 #endif
184 /* map memory for scratch buffers */
185 #if !defined(LIBXSMM_MALLOC_MMAP_SCRATCH) && 1
186 # define LIBXSMM_MALLOC_MMAP_SCRATCH
187 #endif
188 /* map memory for hooked allocation */
189 #if !defined(LIBXSMM_MALLOC_MMAP_HOOK) && 1
190 # define LIBXSMM_MALLOC_MMAP_HOOK
191 #endif
192 /* map memory also for non-executable buffers */
193 #if !defined(LIBXSMM_MALLOC_MMAP) && 1
194 # define LIBXSMM_MALLOC_MMAP
195 #endif
196
197 #if defined(LIBXSMM_MALLOC_ALIGN_ALL)
198 # define INTERNAL_MALLOC_AUTOALIGN(SIZE, ALIGNMENT) libxsmm_alignment(SIZE, ALIGNMENT)
199 #else
200 # define INTERNAL_MALLOC_AUTOALIGN(SIZE, ALIGNMENT) (ALIGNMENT)
201 #endif
202
203 #define INTERNAL_MEMALIGN_HOOK(RESULT, FLAGS, ALIGNMENT, SIZE, CALLER) { \
204 const int internal_memalign_hook_recursive_ = LIBXSMM_ATOMIC_ADD_FETCH( \
205 &internal_malloc_recursive, 1, LIBXSMM_ATOMIC_RELAXED); \
206 if ( 1 < internal_memalign_hook_recursive_ /* protect against recursion */ \
207 || 0 == (internal_malloc_kind & 1) || 0 >= internal_malloc_kind \
208 || (internal_malloc_limit[0] > (SIZE)) \
209 || (internal_malloc_limit[1] < (SIZE) && 0 != internal_malloc_limit[1])) \
210 { \
211 const size_t internal_memalign_hook_alignment_ = INTERNAL_MALLOC_AUTOALIGN(SIZE, ALIGNMENT); \
212 (RESULT) = (0 != internal_memalign_hook_alignment_ \
213 ? __real_memalign(internal_memalign_hook_alignment_, SIZE) \
214 : __real_malloc(SIZE)); \
215 } \
216 else { /* redirect */ \
217 LIBXSMM_INIT \
218 if (NULL == (CALLER)) { /* libxsmm_trace_caller_id may allocate memory */ \
219 internal_scratch_malloc(&(RESULT), SIZE, ALIGNMENT, FLAGS, \
220 libxsmm_trace_caller_id(0/*level*/)); \
221 } \
222 else { \
223 internal_scratch_malloc(&(RESULT), SIZE, ALIGNMENT, FLAGS, CALLER); \
224 } \
225 } \
226 LIBXSMM_ATOMIC_SUB_FETCH(&internal_malloc_recursive, 1, LIBXSMM_ATOMIC_RELAXED); \
227 }
228
229 #define INTERNAL_REALLOC_HOOK(RESULT, FLAGS, PTR, SIZE, CALLER) { \
230 if (0 == (internal_malloc_kind & 1) || 0 >= internal_malloc_kind \
231 /*|| (0 != LIBXSMM_ATOMIC_LOAD(&internal_malloc_recursive, LIBXSMM_ATOMIC_RELAXED))*/ \
232 || (internal_malloc_limit[0] > (SIZE)) \
233 || (internal_malloc_limit[1] < (SIZE) && 0 != internal_malloc_limit[1])) \
234 { \
235 (RESULT) = __real_realloc(PTR, SIZE); \
236 } \
237 else { \
238 const int nzeros = LIBXSMM_INTRINSICS_BITSCANFWD64((uintptr_t)(PTR)), alignment = 1 << nzeros; \
239 LIBXSMM_ASSERT(0 == ((uintptr_t)(PTR) & ~(0xFFFFFFFFFFFFFFFF << nzeros))); \
240 if (NULL == (CALLER)) { /* libxsmm_trace_caller_id may allocate memory */ \
241 internal_scratch_malloc(&(PTR), SIZE, (size_t)alignment, FLAGS, \
242 libxsmm_trace_caller_id(0/*level*/)); \
243 } \
244 else { \
245 internal_scratch_malloc(&(PTR), SIZE, (size_t)alignment, FLAGS, CALLER); \
246 } \
247 (RESULT) = (PTR); \
248 } \
249 }
250
251 #define INTERNAL_FREE_HOOK(PTR, CALLER) { \
252 LIBXSMM_UNUSED(CALLER); \
253 if (0 == (internal_malloc_kind & 1) || 0 >= internal_malloc_kind \
254 /*|| (0 != LIBXSMM_ATOMIC_LOAD(&internal_malloc_recursive, LIBXSMM_ATOMIC_RELAXED))*/ \
255 ){ \
256 __real_free(PTR); \
257 } \
258 else { /* recognize pointers not issued by LIBXSMM */ \
259 libxsmm_free(PTR); \
260 } \
261 }
262
263 #if !defined(WIN32)
264 # if defined(MAP_32BIT)
265 # define IF_INTERNAL_XMALLOC_MAP32(ENV, MAPSTATE, MFLAGS, SIZE, BUFFER, REPTR) \
266 if (0 != (MAP_32BIT & (MFLAGS))) { \
267 (BUFFER) = internal_xmalloc_xmap(ENV, SIZE, (MFLAGS) & ~MAP_32BIT, REPTR); \
268 } \
269 if (MAP_FAILED != (BUFFER)) (MAPSTATE) = 0; else
270 # else
271 # define IF_INTERNAL_XMALLOC_MAP32(ENV, MAPSTATE, MFLAGS, SIZE, BUFFER, REPTR)
272 # endif
273
274 # define INTERNAL_XMALLOC(I, FALLBACK, ENVVAR, ENVDEF, MAPSTATE, MFLAGS, SIZE, BUFFER, REPTR) \
275 if ((I) == (FALLBACK)) { \
276 static const char* internal_xmalloc_env_ = NULL; \
277 if (NULL == internal_xmalloc_env_) { \
278 internal_xmalloc_env_ = getenv(ENVVAR); \
279 if (NULL == internal_xmalloc_env_) internal_xmalloc_env_ = ENVDEF; \
280 } \
281 (BUFFER) = internal_xmalloc_xmap(internal_xmalloc_env_, SIZE, MFLAGS, REPTR); \
282 if (MAP_FAILED == (BUFFER)) { \
283 IF_INTERNAL_XMALLOC_MAP32(internal_xmalloc_env_, MAPSTATE, MFLAGS, SIZE, BUFFER, REPTR) \
284 (FALLBACK) = (I) + 1; \
285 } \
286 }
287
288 # define INTERNAL_XMALLOC_WATERMARK(NAME, WATERMARK, LIMIT, SIZE) { \
289 const size_t internal_xmalloc_watermark_ = (WATERMARK) + (SIZE) / 2; /* accept data-race */ \
290 if (internal_xmalloc_watermark_ < (LIMIT)) { \
291 static size_t internal_xmalloc_watermark_verbose_ = 0; \
292 (LIMIT) = internal_xmalloc_watermark_; /* accept data-race */ \
293 if (internal_xmalloc_watermark_verbose_ < internal_xmalloc_watermark_ && \
294 (LIBXSMM_VERBOSITY_HIGH <= libxsmm_verbosity || 0 > libxsmm_verbosity)) \
295 { /* muted */ \
296 char internal_xmalloc_watermark_buffer_[32]; \
297 /* coverity[check_return] */ \
298 libxsmm_format_size(internal_xmalloc_watermark_buffer_, sizeof(internal_xmalloc_watermark_buffer_), \
299 internal_xmalloc_watermark_, "KM", "B", 10); \
300 fprintf(stderr, "LIBXSMM WARNING: " NAME " watermark reached at %s!\n", internal_xmalloc_watermark_buffer_); \
301 internal_xmalloc_watermark_verbose_ = internal_xmalloc_watermark_; \
302 } \
303 } \
304 }
305
306 # define INTERNAL_XMALLOC_KIND(KIND, NAME, FLAG, FLAGS, MFLAGS, WATERMARK, LIMIT, INFO, SIZE, BUFFER) \
307 if (0 != ((KIND) & (MFLAGS))) { \
308 if (MAP_FAILED != (BUFFER)) { \
309 LIBXSMM_ASSERT(NULL != (BUFFER)); \
310 LIBXSMM_ATOMIC_ADD_FETCH(&(WATERMARK), SIZE, LIBXSMM_ATOMIC_RELAXED); \
311 (FLAGS) |= (FLAG); \
312 } \
313 else { /* retry */ \
314 (BUFFER) = mmap(NULL == (INFO) ? NULL : (INFO)->pointer, SIZE, PROT_READ | PROT_WRITE, \
315 MAP_PRIVATE | LIBXSMM_MAP_ANONYMOUS | ((MFLAGS) & ~(KIND)), -1, 0/*offset*/); \
316 if (MAP_FAILED != (BUFFER)) { /* successful retry */ \
317 LIBXSMM_ASSERT(NULL != (BUFFER)); \
318 INTERNAL_XMALLOC_WATERMARK(NAME, WATERMARK, LIMIT, SIZE); \
319 } \
320 } \
321 }
322 #endif
323
324
325 LIBXSMM_EXTERN_C typedef struct LIBXSMM_RETARGETABLE internal_malloc_info_type {
326 libxsmm_free_function free;
327 void *pointer, *reloc;
328 const void* context;
329 size_t size;
330 int flags;
331 #if defined(LIBXSMM_VTUNE)
332 unsigned int code_id;
333 #endif
334 #if !defined(LIBXSMM_MALLOC_CRC_OFF) /* hash *must* be the last entry */
335 unsigned int hash;
336 #endif
337 } internal_malloc_info_type;
338
339 LIBXSMM_EXTERN_C typedef union LIBXSMM_RETARGETABLE internal_malloc_pool_type {
340 char pad[LIBXSMM_MALLOC_SCRATCH_PADDING];
341 struct {
342 size_t minsize, counter, incsize;
343 char *buffer, *head;
344 #if defined(LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS) && (1 < (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS))
345 const void* site;
346 # if (0 != LIBXSMM_SYNC)
347 unsigned int tid;
348 # endif
349 #endif
350 } instance;
351 } internal_malloc_pool_type;
352
353 /* Scratch pool, which supports up to MAX_NSCRATCH allocation sites. */
354 #if defined(LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS) && (0 < (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS))
355 /* LIBXSMM_ALIGNED appears to contradict LIBXSMM_APIVAR, and causes multiple defined symbols (if below is seen in multiple translation units) */
356 LIBXSMM_APIVAR_DEFINE(char internal_malloc_pool_buffer[(LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS)*sizeof(internal_malloc_pool_type)+(LIBXSMM_MALLOC_SCRATCH_PADDING)-1]);
357 #endif
358 /* Interval of bytes that permit interception (internal_malloc_kind) */
359 LIBXSMM_APIVAR_DEFINE(size_t internal_malloc_limit[2]);
360 /* Maximum total size of the scratch memory domain. */
361 LIBXSMM_APIVAR_DEFINE(size_t internal_malloc_scratch_limit);
362 LIBXSMM_APIVAR_DEFINE(size_t internal_malloc_scratch_nmallocs);
363 LIBXSMM_APIVAR_DEFINE(size_t internal_malloc_private_max);
364 LIBXSMM_APIVAR_DEFINE(size_t internal_malloc_private_cur);
365 LIBXSMM_APIVAR_DEFINE(size_t internal_malloc_public_max);
366 LIBXSMM_APIVAR_DEFINE(size_t internal_malloc_public_cur);
367 LIBXSMM_APIVAR_DEFINE(size_t internal_malloc_local_max);
368 LIBXSMM_APIVAR_DEFINE(size_t internal_malloc_local_cur);
369 LIBXSMM_APIVAR_DEFINE(int internal_malloc_recursive);
370 /** 0: regular, 1/odd: intercept/scratch, otherwise: all/scratch */
371 LIBXSMM_APIVAR_DEFINE(int internal_malloc_kind);
372 #if (0 != LIBXSMM_SYNC) && defined(LIBXSMM_MALLOC_SCRATCH_JOIN)
373 LIBXSMM_APIVAR_DEFINE(int internal_malloc_join);
374 #endif
375 #if !defined(_WIN32)
376 # if defined(MAP_HUGETLB)
377 LIBXSMM_APIVAR_DEFINE(size_t internal_malloc_hugetlb);
378 # endif
379 # if defined(MAP_LOCKED)
380 LIBXSMM_APIVAR_DEFINE(size_t internal_malloc_plocked);
381 # endif
382 #endif
383
384
libxsmm_alignment(size_t size,size_t alignment)385 LIBXSMM_API_INTERN size_t libxsmm_alignment(size_t size, size_t alignment)
386 {
387 size_t result;
388 if ((LIBXSMM_MALLOC_ALIGNFCT * LIBXSMM_MALLOC_ALIGNMAX) <= size) {
389 result = libxsmm_lcm(0 == alignment ? (LIBXSMM_ALIGNMENT) : libxsmm_lcm(alignment, LIBXSMM_ALIGNMENT), LIBXSMM_MALLOC_ALIGNMAX);
390 }
391 else { /* small-size request */
392 if ((LIBXSMM_MALLOC_ALIGNFCT * LIBXSMM_ALIGNMENT) <= size) {
393 result = (0 == alignment ? (LIBXSMM_ALIGNMENT) : libxsmm_lcm(alignment, LIBXSMM_ALIGNMENT));
394 }
395 else if (0 != alignment) { /* custom alignment */
396 result = libxsmm_lcm(alignment, sizeof(void*));
397 }
398 else { /* tiny-size request */
399 result = sizeof(void*);
400 }
401 }
402 return result;
403 }
404
405
libxsmm_offset(const size_t offset[],const size_t shape[],size_t ndims,size_t * size)406 LIBXSMM_API size_t libxsmm_offset(const size_t offset[], const size_t shape[], size_t ndims, size_t* size)
407 {
408 size_t result = 0, size1 = 0;
409 if (0 != ndims && NULL != shape) {
410 size_t i;
411 result = (NULL != offset ? offset[0] : 0);
412 size1 = shape[0];
413 for (i = 1; i < ndims; ++i) {
414 result += (NULL != offset ? offset[i] : 0) * size1;
415 size1 *= shape[i];
416 }
417 }
418 if (NULL != size) *size = size1;
419 return result;
420 }
421
422
internal_malloc_info(const void * memory,int check)423 LIBXSMM_API_INLINE internal_malloc_info_type* internal_malloc_info(const void* memory, int check)
424 {
425 const char *const buffer = (const char*)memory;
426 internal_malloc_info_type* result = (internal_malloc_info_type*)(NULL != memory
427 ? (buffer - sizeof(internal_malloc_info_type)) : NULL);
428 #if defined(LIBXSMM_MALLOC_HOOK_CHECK)
429 if ((LIBXSMM_MALLOC_HOOK_CHECK) < check) check = (LIBXSMM_MALLOC_HOOK_CHECK);
430 #endif
431 if (0 != check && NULL != result) { /* check ownership */
432 #if !defined(_WIN32) /* mprotect: pass address rounded down to page/4k alignment */
433 if (1 == check || 0 == mprotect((void*)(((uintptr_t)result) & 0xFFFFFFFFFFFFF000),
434 sizeof(internal_malloc_info_type), PROT_READ | PROT_WRITE) || ENOMEM != errno)
435 #endif
436 {
437 const size_t maxsize = LIBXSMM_MAX(LIBXSMM_MAX(internal_malloc_public_max, internal_malloc_local_max), internal_malloc_private_max);
438 const int flags_rs = LIBXSMM_MALLOC_FLAG_REALLOC | LIBXSMM_MALLOC_FLAG_SCRATCH;
439 const int flags_mx = LIBXSMM_MALLOC_FLAG_MMAP | LIBXSMM_MALLOC_FLAG_X;
440 const char* const pointer = (const char*)result->pointer;
441 union { libxsmm_free_fun fun; const void* ptr; } convert;
442 convert.fun = result->free.function;
443 if (((flags_mx != (flags_mx & result->flags)) && NULL != result->reloc)
444 || (0 == (LIBXSMM_MALLOC_FLAG_X & result->flags) ? 0 : (0 != (flags_rs & result->flags)))
445 || (0 != (LIBXSMM_MALLOC_FLAG_X & result->flags) && NULL != result->context)
446 #if defined(LIBXSMM_VTUNE)
447 || (0 == (LIBXSMM_MALLOC_FLAG_X & result->flags) && 0 != result->code_id)
448 #endif
449 || (0 != (~LIBXSMM_MALLOC_FLAG_VALID & result->flags))
450 || (0 == (LIBXSMM_MALLOC_FLAG_R & result->flags))
451 || pointer == convert.ptr || pointer == result->context
452 || pointer >= buffer || NULL == pointer
453 || maxsize < result->size || 0 == result->size
454 || 2 > libxsmm_ninit /* before checksum calculation */
455 #if !defined(LIBXSMM_MALLOC_CRC_OFF) /* last check: checksum over info */
456 # if defined(LIBXSMM_MALLOC_CRC_LIGHT)
457 || result->hash != LIBXSMM_CRC32U(LIBXSMM_BITS)(LIBXSMM_MALLOC_SEED, &result)
458 # else
459 || result->hash != libxsmm_crc32(LIBXSMM_MALLOC_SEED, result,
460 (const char*)&result->hash - (const char*)result)
461 # endif
462 #endif
463 ) { /* mismatch */
464 result = NULL;
465 }
466 }
467 #if !defined(_WIN32)
468 else { /* mismatch */
469 result = NULL;
470 }
471 #endif
472 }
473 return result;
474 }
475
476
477 LIBXSMM_API_INTERN int internal_xfree(const void* /*memory*/, internal_malloc_info_type* /*info*/);
internal_xfree(const void * memory,internal_malloc_info_type * info)478 LIBXSMM_API_INTERN int internal_xfree(const void* memory, internal_malloc_info_type* info)
479 {
480 #if !defined(LIBXSMM_BUILD) || !defined(_WIN32)
481 static int error_once = 0;
482 #endif
483 int result = EXIT_SUCCESS, flags;
484 void* buffer;
485 size_t size;
486 LIBXSMM_ASSERT(NULL != memory && NULL != info);
487 buffer = info->pointer;
488 flags = info->flags;
489 size = info->size;
490 #if !defined(LIBXSMM_BUILD) /* sanity check */
491 if (NULL != buffer || 0 == size)
492 #endif
493 {
494 const size_t alloc_size = size + (((const char*)memory) - ((const char*)buffer));
495 LIBXSMM_ASSERT(NULL != buffer || 0 == size);
496 if (0 == (LIBXSMM_MALLOC_FLAG_MMAP & flags)) {
497 if (NULL != info->free.function) {
498 #if defined(LIBXSMM_MALLOC_DELETE_SAFE)
499 info->pointer = NULL; info->size = 0;
500 #endif
501 if (NULL == info->context) {
502 #if (defined(LIBXSMM_MALLOC_HOOK_STATIC) || defined(LIBXSMM_MALLOC_HOOK_DYNAMIC)) && 0
503 if (free == info->free.function) {
504 __real_free(buffer);
505 }
506 else
507 #endif
508 if (NULL != info->free.function) {
509 info->free.function(buffer);
510 }
511 }
512 else {
513 LIBXSMM_ASSERT(NULL != info->free.ctx_form);
514 info->free.ctx_form(buffer, info->context);
515 }
516 }
517 }
518 else {
519 #if defined(LIBXSMM_VTUNE)
520 if (0 != (LIBXSMM_MALLOC_FLAG_X & flags) && 0 != info->code_id && iJIT_SAMPLING_ON == iJIT_IsProfilingActive()) {
521 iJIT_NotifyEvent(LIBXSMM_VTUNE_JIT_UNLOAD, &info->code_id);
522 }
523 #endif
524 #if defined(_WIN32)
525 result = (NULL == buffer || FALSE != VirtualFree(buffer, 0, MEM_RELEASE)) ? EXIT_SUCCESS : EXIT_FAILURE;
526 #else /* !_WIN32 */
527 {
528 const size_t unmap_size = LIBXSMM_UP2(alloc_size, LIBXSMM_PAGE_MINSIZE);
529 void* const reloc = info->reloc;
530 if (0 != munmap(buffer, unmap_size)) {
531 if (0 != libxsmm_verbosity /* library code is expected to be mute */
532 && 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
533 {
534 fprintf(stderr, "LIBXSMM ERROR: %s (attempted to unmap buffer %p+%" PRIuPTR ")!\n",
535 strerror(errno), buffer, (uintptr_t)unmap_size);
536 }
537 result = EXIT_FAILURE;
538 }
539 if (0 != (LIBXSMM_MALLOC_FLAG_X & flags) && EXIT_SUCCESS == result
540 && NULL != reloc && MAP_FAILED != reloc && buffer != reloc
541 && 0 != munmap(reloc, unmap_size))
542 {
543 if (0 != libxsmm_verbosity /* library code is expected to be mute */
544 && 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
545 {
546 fprintf(stderr, "LIBXSMM ERROR: %s (attempted to unmap code %p+%" PRIuPTR ")!\n",
547 strerror(errno), reloc, (uintptr_t)unmap_size);
548 }
549 result = EXIT_FAILURE;
550 }
551 }
552 #endif
553 }
554 if (0 == (LIBXSMM_MALLOC_FLAG_X & flags)) { /* update statistics */
555 #if !defined(_WIN32)
556 # if defined(MAP_HUGETLB)
557 if (0 != (LIBXSMM_MALLOC_FLAG_PHUGE & flags)) { /* huge pages */
558 LIBXSMM_ASSERT(0 != (LIBXSMM_MALLOC_FLAG_MMAP & flags));
559 LIBXSMM_ATOMIC_SUB_FETCH(&internal_malloc_hugetlb, alloc_size, LIBXSMM_ATOMIC_RELAXED);
560 }
561 # endif
562 # if defined(MAP_LOCKED)
563 if (0 != (LIBXSMM_MALLOC_FLAG_PLOCK & flags)) { /* page-locked */
564 LIBXSMM_ASSERT(0 != (LIBXSMM_MALLOC_FLAG_MMAP & flags));
565 LIBXSMM_ATOMIC_SUB_FETCH(&internal_malloc_plocked, alloc_size, LIBXSMM_ATOMIC_RELAXED);
566 }
567 # endif
568 #endif
569 if (0 == (LIBXSMM_MALLOC_FLAG_PRIVATE & flags)) { /* public */
570 if (0 != (LIBXSMM_MALLOC_FLAG_SCRATCH & flags)) { /* scratch */
571 #if 1
572 const size_t current = (size_t)LIBXSMM_ATOMIC(LIBXSMM_ATOMIC_LOAD, LIBXSMM_BITS)(
573 &internal_malloc_public_cur, LIBXSMM_ATOMIC_RELAXED);
574 LIBXSMM_ATOMIC(LIBXSMM_ATOMIC_STORE, LIBXSMM_BITS)(&internal_malloc_public_cur,
575 alloc_size <= current ? (current - alloc_size) : 0, LIBXSMM_ATOMIC_RELAXED);
576 #else
577 LIBXSMM_ATOMIC(LIBXSMM_ATOMIC_SUB_FETCH, LIBXSMM_BITS)(
578 &internal_malloc_public_cur, alloc_size, LIBXSMM_ATOMIC_RELAXED);
579 #endif
580 }
581 else { /* local */
582 #if 1
583 const size_t current = (size_t)LIBXSMM_ATOMIC(LIBXSMM_ATOMIC_LOAD, LIBXSMM_BITS)(
584 &internal_malloc_local_cur, LIBXSMM_ATOMIC_RELAXED);
585 LIBXSMM_ATOMIC(LIBXSMM_ATOMIC_STORE, LIBXSMM_BITS)(&internal_malloc_local_cur,
586 alloc_size <= current ? (current - alloc_size) : 0, LIBXSMM_ATOMIC_RELAXED);
587 #else
588 LIBXSMM_ATOMIC(LIBXSMM_ATOMIC_SUB_FETCH, LIBXSMM_BITS)(
589 &internal_malloc_local_cur, alloc_size, LIBXSMM_ATOMIC_RELAXED);
590 #endif
591 }
592 }
593 else { /* private */
594 #if 1
595 const size_t current = (size_t)LIBXSMM_ATOMIC(LIBXSMM_ATOMIC_LOAD, LIBXSMM_BITS)(
596 &internal_malloc_private_cur, LIBXSMM_ATOMIC_RELAXED);
597 LIBXSMM_ATOMIC(LIBXSMM_ATOMIC_STORE, LIBXSMM_BITS)(&internal_malloc_private_cur,
598 alloc_size <= current ? (current - alloc_size) : 0, LIBXSMM_ATOMIC_RELAXED);
599 #else
600 LIBXSMM_ATOMIC(LIBXSMM_ATOMIC_SUB_FETCH, LIBXSMM_BITS)(
601 &internal_malloc_private_cur, alloc_size, LIBXSMM_ATOMIC_RELAXED);
602 #endif
603 }
604 }
605 }
606 #if !defined(LIBXSMM_BUILD)
607 else if ((LIBXSMM_VERBOSITY_WARN <= libxsmm_verbosity || 0 > libxsmm_verbosity) /* library code is expected to be mute */
608 && 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
609 {
610 fprintf(stderr, "LIBXSMM WARNING: attempt to release memory from non-matching implementation!\n");
611 }
612 #endif
613 return result;
614 }
615
616
internal_get_scratch_size(const internal_malloc_pool_type * exclude)617 LIBXSMM_API_INLINE size_t internal_get_scratch_size(const internal_malloc_pool_type* exclude)
618 {
619 size_t result = 0;
620 #if !defined(LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS) || (1 >= (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS))
621 LIBXSMM_UNUSED(exclude);
622 #else
623 const internal_malloc_pool_type* pool = (const internal_malloc_pool_type*)LIBXSMM_UP2(
624 (uintptr_t)internal_malloc_pool_buffer, LIBXSMM_MALLOC_SCRATCH_PADDING);
625 # if (1 < (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS))
626 const internal_malloc_pool_type *const end = pool + libxsmm_scratch_pools;
627 LIBXSMM_ASSERT(libxsmm_scratch_pools <= LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS);
628 for (; pool != end; ++pool)
629 # endif /*(1 < (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS))*/
630 {
631 if (0 != pool->instance.minsize) {
632 # if 1 /* memory info is not used */
633 if (pool != exclude && (LIBXSMM_MALLOC_INTERNAL_CALLER) != pool->instance.site) {
634 result += pool->instance.minsize;
635 }
636 # else
637 const internal_malloc_info_type* const info = internal_malloc_info(pool->instance.buffer, 0/*no check*/);
638 if (NULL != info && pool != exclude && (LIBXSMM_MALLOC_INTERNAL_CALLER) != pool->instance.site) {
639 result += info->size;
640 }
641 # endif
642 }
643 else break; /* early exit */
644 }
645 #endif /*defined(LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS) && (0 < (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS))*/
646 return result;
647 }
648
649
internal_scratch_malloc_pool(const void * memory)650 LIBXSMM_API_INLINE internal_malloc_pool_type* internal_scratch_malloc_pool(const void* memory)
651 {
652 internal_malloc_pool_type* result = NULL;
653 internal_malloc_pool_type* pool = (internal_malloc_pool_type*)LIBXSMM_UP2(
654 (uintptr_t)internal_malloc_pool_buffer, LIBXSMM_MALLOC_SCRATCH_PADDING);
655 const char* const buffer = (const char*)memory;
656 #if defined(LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS) && (1 < (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS))
657 const unsigned int npools = libxsmm_scratch_pools;
658 #else
659 const unsigned int npools = 1;
660 #endif
661 internal_malloc_pool_type *const end = pool + npools;
662 LIBXSMM_ASSERT(npools <= LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS);
663 LIBXSMM_ASSERT(NULL != memory);
664 for (; pool != end; ++pool) {
665 if (0 != pool->instance.minsize) {
666 if (0 != pool->instance.counter
667 #if 1 /* should be implied by non-zero counter */
668 && NULL != pool->instance.buffer
669 #endif
670 ){/* check if memory belongs to scratch domain or local domain */
671 #if 1
672 const size_t size = pool->instance.minsize;
673 #else
674 const internal_malloc_info_type* const info = internal_malloc_info(pool->instance.buffer, 0/*no check*/);
675 const size_t size = info->size;
676 #endif
677 if (pool->instance.buffer == buffer /* fast path */ ||
678 (pool->instance.buffer < buffer && buffer < (pool->instance.buffer + size)))
679 {
680 result = pool;
681 break;
682 }
683 }
684 }
685 else break; /* early exit */
686 }
687 return result;
688 }
689
690
691 LIBXSMM_API_INTERN void internal_scratch_free(const void* /*memory*/, internal_malloc_pool_type* /*pool*/);
internal_scratch_free(const void * memory,internal_malloc_pool_type * pool)692 LIBXSMM_API_INTERN void internal_scratch_free(const void* memory, internal_malloc_pool_type* pool)
693 {
694 #if defined(LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS) && (0 < (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS))
695 const size_t counter = LIBXSMM_ATOMIC_SUB_FETCH(&pool->instance.counter, 1, LIBXSMM_ATOMIC_SEQ_CST);
696 char* const pool_buffer = pool->instance.buffer;
697 # if !defined(NDEBUG) || defined(LIBXSMM_MALLOC_SCRATCH_TRIM_HEAD)
698 char *const buffer = (char*)memory; /* non-const */
699 LIBXSMM_ASSERT(pool_buffer <= buffer && buffer < pool_buffer + pool->instance.minsize);
700 # endif
701 LIBXSMM_ASSERT(pool_buffer <= pool->instance.head);
702 if (0 == counter) { /* reuse or reallocate scratch domain */
703 internal_malloc_info_type *const info = internal_malloc_info(pool_buffer, 0/*no check*/);
704 const size_t scale_size = (size_t)(1 != libxsmm_scratch_scale ? (libxsmm_scratch_scale * info->size) : info->size); /* hysteresis */
705 const size_t size = pool->instance.minsize + pool->instance.incsize;
706 LIBXSMM_ASSERT(0 == (LIBXSMM_MALLOC_FLAG_X & info->flags)); /* scratch memory is not executable */
707 if (size <= scale_size) { /* reuse scratch domain */
708 pool->instance.head = pool_buffer; /* reuse scratch domain */
709 }
710 else { /* release buffer */
711 # if !defined(NDEBUG)
712 static int error_once = 0;
713 # endif
714 pool->instance.buffer = pool->instance.head = NULL;
715 # if defined(LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS) && (1 < (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS))
716 pool->instance.site = NULL; /* clear affinity */
717 # endif
718 # if !defined(NDEBUG)
719 if (EXIT_SUCCESS != internal_xfree(pool_buffer, info)
720 && 0 != libxsmm_verbosity /* library code is expected to be mute */
721 && 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
722 {
723 fprintf(stderr, "LIBXSMM ERROR: memory deallocation failed!\n");
724 }
725 # else
726 internal_xfree(pool_buffer, info); /* !libxsmm_free */
727 # endif
728 }
729 }
730 # if defined(LIBXSMM_MALLOC_SCRATCH_TRIM_HEAD) /* TODO: document linear/scoped allocator policy */
731 else if (buffer < pool->instance.head) { /* reuse scratch domain */
732 pool->instance.head = buffer;
733 }
734 # else
735 LIBXSMM_UNUSED(memory);
736 # endif
737 #else
738 LIBXSMM_UNUSED(memory); LIBXSMM_UNUSED(pool);
739 #endif
740 }
741
742
743 LIBXSMM_API_INTERN void internal_scratch_malloc(void** /*memory*/, size_t /*size*/, size_t /*alignment*/, int /*flags*/, const void* /*caller*/);
internal_scratch_malloc(void ** memory,size_t size,size_t alignment,int flags,const void * caller)744 LIBXSMM_API_INTERN void internal_scratch_malloc(void** memory, size_t size, size_t alignment, int flags, const void* caller)
745 {
746 LIBXSMM_ASSERT(NULL != memory && 0 == (LIBXSMM_MALLOC_FLAG_X & flags));
747 if (0 == (LIBXSMM_MALLOC_FLAG_REALLOC & flags) || NULL == *memory) {
748 static int error_once = 0;
749 size_t local_size = 0;
750 #if defined(LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS) && (0 < (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS))
751 if (0 < libxsmm_scratch_pools) {
752 internal_malloc_pool_type *const pools = (internal_malloc_pool_type*)LIBXSMM_UP2(
753 (uintptr_t)internal_malloc_pool_buffer, LIBXSMM_MALLOC_SCRATCH_PADDING);
754 internal_malloc_pool_type *const end = pools + libxsmm_scratch_pools, *pool = pools;
755 const size_t align_size = libxsmm_alignment(size, alignment), alloc_size = size + align_size - 1;
756 # if (0 != LIBXSMM_SYNC)
757 const unsigned int tid = libxsmm_get_tid();
758 # endif
759 unsigned int npools = 1;
760 # if defined(LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS) && (1 < (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS))
761 const void *const site = caller; /* no further attempt in case of NULL */
762 internal_malloc_pool_type *pool0 = end;
763 for (; pool != end; ++pool) { /* counter: memory info is not employed as pools are still manipulated */
764 if (NULL != pool->instance.buffer) {
765 if ((LIBXSMM_MALLOC_INTERNAL_CALLER) != pool->instance.site) ++npools; /* count number of occupied pools */
766 if ( /* find matching pool and enter fast path (draw from pool-buffer) */
767 # if (0 != LIBXSMM_SYNC) && !defined(LIBXSMM_MALLOC_SCRATCH_JOIN)
768 (site == pool->instance.site && tid == pool->instance.tid))
769 # elif (0 != LIBXSMM_SYNC)
770 (site == pool->instance.site && (0 != internal_malloc_join || tid == pool->instance.tid)))
771 # else
772 (site == pool->instance.site))
773 # endif
774 {
775 break;
776 }
777 }
778 else {
779 if (end == pool0) pool0 = pool; /* first available pool*/
780 if (0 == pool->instance.minsize) { /* early exit */
781 pool = pool0; break;
782 }
783 }
784 }
785 # endif
786 LIBXSMM_ASSERT(NULL != pool);
787 if (end != pool && 0 <= internal_malloc_kind) {
788 const size_t counter = LIBXSMM_ATOMIC_ADD_FETCH(&pool->instance.counter, (size_t)1, LIBXSMM_ATOMIC_SEQ_CST);
789 if (NULL != pool->instance.buffer || 1 != counter) { /* attempt to (re-)use existing pool */
790 const internal_malloc_info_type *const info = internal_malloc_info(pool->instance.buffer, 1/*check*/);
791 const size_t pool_size = ((NULL != info && 0 != counter) ? info->size : 0);
792 const size_t used_size = pool->instance.head - pool->instance.buffer;
793 const size_t req_size = alloc_size + used_size;
794 if (req_size <= pool_size) { /* fast path: draw from pool-buffer */
795 # if (0 != LIBXSMM_SYNC) && defined(LIBXSMM_MALLOC_SCRATCH_JOIN)
796 void *const headaddr = &pool->instance.head;
797 char *const head = (0 == internal_malloc_join
798 ? (pool->instance.head += alloc_size)
799 : ((char*)LIBXSMM_ATOMIC(LIBXSMM_ATOMIC_ADD_FETCH, LIBXSMM_BITS)(
800 (uintptr_t*)headaddr, alloc_size, LIBXSMM_ATOMIC_SEQ_CST)));
801 # else
802 char *const head = (char*)(pool->instance.head += alloc_size);
803 # endif
804 *memory = LIBXSMM_ALIGN(head - alloc_size, align_size);
805 }
806 else { /* fall-back to local memory allocation */
807 const size_t incsize = req_size - LIBXSMM_MIN(pool_size, req_size);
808 pool->instance.incsize = LIBXSMM_MAX(pool->instance.incsize, incsize);
809 # if (0 != LIBXSMM_SYNC) && defined(LIBXSMM_MALLOC_SCRATCH_JOIN)
810 if (0 == internal_malloc_join) {
811 --pool->instance.counter;
812 }
813 else {
814 LIBXSMM_ATOMIC_SUB_FETCH(&pool->instance.counter, 1, LIBXSMM_ATOMIC_SEQ_CST);
815 }
816 # else
817 --pool->instance.counter;
818 # endif
819 if (
820 # if defined(LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS) && (1 < (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS))
821 (LIBXSMM_MALLOC_INTERNAL_CALLER) != pool->instance.site &&
822 # endif
823 0 == (LIBXSMM_MALLOC_FLAG_PRIVATE & flags))
824 {
825 const size_t watermark = LIBXSMM_ATOMIC(LIBXSMM_ATOMIC_ADD_FETCH, LIBXSMM_BITS)(
826 &internal_malloc_local_cur, alloc_size, LIBXSMM_ATOMIC_RELAXED);
827 if (internal_malloc_local_max < watermark) internal_malloc_local_max = watermark; /* accept data-race */
828 }
829 else {
830 const size_t watermark = LIBXSMM_ATOMIC(LIBXSMM_ATOMIC_ADD_FETCH, LIBXSMM_BITS)(
831 &internal_malloc_private_cur, alloc_size, LIBXSMM_ATOMIC_RELAXED);
832 if (internal_malloc_private_max < watermark) internal_malloc_private_max = watermark; /* accept data-race */
833 }
834 local_size = size;
835 }
836 }
837 else { /* fresh pool */
838 const size_t scratch_limit = libxsmm_get_scratch_limit();
839 const size_t scratch_size = internal_get_scratch_size(pool); /* exclude current pool */
840 const size_t limit_size = (1 < npools ? (scratch_limit - LIBXSMM_MIN(scratch_size, scratch_limit)) : LIBXSMM_SCRATCH_UNLIMITED);
841 const size_t scale_size = (size_t)(1 != libxsmm_scratch_scale ? (libxsmm_scratch_scale * alloc_size) : alloc_size); /* hysteresis */
842 const size_t incsize = (size_t)(libxsmm_scratch_scale * pool->instance.incsize);
843 const size_t maxsize = LIBXSMM_MAX(scale_size, pool->instance.minsize) + incsize;
844 const size_t limsize = LIBXSMM_MIN(maxsize, limit_size);
845 const size_t minsize = limsize;
846 LIBXSMM_ASSERT(1 <= libxsmm_scratch_scale);
847 LIBXSMM_ASSERT(1 == counter);
848 pool->instance.incsize = 0; /* reset */
849 pool->instance.minsize = minsize;
850 # if defined(LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS) && (1 < (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS))
851 pool->instance.site = site;
852 # if (0 != LIBXSMM_SYNC)
853 pool->instance.tid = tid;
854 # endif
855 # endif
856 if (alloc_size <= minsize && /* allocate scratch pool */
857 EXIT_SUCCESS == libxsmm_xmalloc(memory, minsize, 0/*auto-align*/,
858 (flags | LIBXSMM_MALLOC_FLAG_SCRATCH) & ~LIBXSMM_MALLOC_FLAG_REALLOC,
859 NULL/*extra*/, 0/*extra_size*/))
860 {
861 pool->instance.buffer = (char*)*memory;
862 pool->instance.head = pool->instance.buffer + alloc_size;
863 *memory = LIBXSMM_ALIGN((char*)*memory, align_size);
864 # if defined(LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS) && (1 < (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS))
865 if ((LIBXSMM_MALLOC_INTERNAL_CALLER) != pool->instance.site)
866 # endif
867 {
868 LIBXSMM_ATOMIC_ADD_FETCH(&internal_malloc_scratch_nmallocs, 1, LIBXSMM_ATOMIC_RELAXED);
869 }
870 }
871 else { /* fall-back to local allocation */
872 LIBXSMM_ATOMIC_SUB_FETCH(&pool->instance.counter, 1, LIBXSMM_ATOMIC_SEQ_CST);
873 if (0 != libxsmm_verbosity /* library code is expected to be mute */
874 && 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
875 {
876 if (alloc_size <= minsize) {
877 fprintf(stderr, "LIBXSMM ERROR: failed to allocate scratch memory!\n");
878 }
879 else if ((LIBXSMM_MALLOC_INTERNAL_CALLER) != caller
880 && (LIBXSMM_VERBOSITY_WARN <= libxsmm_verbosity || 0 > libxsmm_verbosity))
881 {
882 fprintf(stderr, "LIBXSMM WARNING: scratch memory domain exhausted!\n");
883 }
884 }
885 local_size = size;
886 }
887 }
888 }
889 else { /* fall-back to local memory allocation */
890 local_size = size;
891 }
892 }
893 else { /* fall-back to local memory allocation */
894 local_size = size;
895 }
896 if (0 != local_size)
897 #else
898 local_size = size;
899 #endif /*defined(LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS) && (0 < (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS))*/
900 { /* local memory allocation */
901 if (EXIT_SUCCESS != libxsmm_xmalloc(memory, local_size, alignment,
902 flags & ~(LIBXSMM_MALLOC_FLAG_SCRATCH | LIBXSMM_MALLOC_FLAG_REALLOC), NULL/*extra*/, 0/*extra_size*/)
903 && /* library code is expected to be mute */0 != libxsmm_verbosity
904 && 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
905 {
906 fprintf(stderr, "LIBXSMM ERROR: scratch memory fall-back failed!\n");
907 LIBXSMM_ASSERT(NULL == *memory);
908 }
909 if ((LIBXSMM_MALLOC_INTERNAL_CALLER) != caller) {
910 LIBXSMM_ATOMIC_ADD_FETCH(&internal_malloc_scratch_nmallocs, 1, LIBXSMM_ATOMIC_RELAXED);
911 }
912 }
913 }
914 else { /* reallocate memory */
915 const void *const preserve = *memory;
916 #if defined(LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS) && (0 < (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS))
917 internal_malloc_pool_type *const pool = internal_scratch_malloc_pool(preserve);
918 if (NULL != pool) {
919 const internal_malloc_info_type *const info = internal_malloc_info(pool->instance.buffer, 0/*no check*/);
920 void* buffer;
921 LIBXSMM_ASSERT(pool->instance.buffer <= pool->instance.head && NULL != info);
922 internal_scratch_malloc(&buffer, size, alignment,
923 ~LIBXSMM_MALLOC_FLAG_REALLOC & (LIBXSMM_MALLOC_FLAG_SCRATCH | flags), caller);
924 if (NULL != buffer) {
925 memcpy(buffer, preserve, LIBXSMM_MIN(size, info->size)); /* TODO: memmove? */
926 *memory = buffer;
927 }
928 internal_scratch_free(memory, pool);
929 }
930 else
931 #endif
932 { /* non-pooled (potentially foreign pointer) */
933 #if !defined(NDEBUG)
934 const int status =
935 #endif
936 libxsmm_xmalloc(memory, size, alignment/* no need here to determine alignment of given buffer */,
937 ~LIBXSMM_MALLOC_FLAG_SCRATCH & flags, NULL/*extra*/, 0/*extra_size*/);
938 assert(EXIT_SUCCESS == status || NULL == *memory); /* !LIBXSMM_ASSERT */
939 }
940 }
941 }
942
943
944 #if defined(LIBXSMM_MALLOC_HOOK_DYNAMIC)
945 LIBXSMM_APIVAR_PRIVATE_DEF(libxsmm_malloc_fntype libxsmm_malloc_fn);
946
947 #if defined(LIBXSMM_MALLOC_HOOK_QKMALLOC)
948 LIBXSMM_API_INTERN void* internal_memalign_malloc(size_t /*alignment*/, size_t /*size*/);
internal_memalign_malloc(size_t alignment,size_t size)949 LIBXSMM_API_INTERN void* internal_memalign_malloc(size_t alignment, size_t size)
950 {
951 LIBXSMM_UNUSED(alignment);
952 LIBXSMM_ASSERT(NULL != libxsmm_malloc_fn.malloc.dlsym);
953 return libxsmm_malloc_fn.malloc.ptr(size);
954 }
955 #elif defined(LIBXSMM_MALLOC_HOOK_KMP)
956 LIBXSMM_API_INTERN void* internal_memalign_twiddle(size_t /*alignment*/, size_t /*size*/);
internal_memalign_twiddle(size_t alignment,size_t size)957 LIBXSMM_API_INTERN void* internal_memalign_twiddle(size_t alignment, size_t size)
958 {
959 LIBXSMM_ASSERT(NULL != libxsmm_malloc_fn.alignmem.dlsym);
960 return libxsmm_malloc_fn.alignmem.ptr(size, alignment);
961 }
962 #endif
963 #endif /*defined(LIBXSMM_MALLOC_HOOK_DYNAMIC)*/
964
965
966 #if (defined(LIBXSMM_MALLOC_HOOK_STATIC) || defined(LIBXSMM_MALLOC_HOOK_DYNAMIC))
967
968 LIBXSMM_API_INTERN void* internal_memalign_hook(size_t /*alignment*/, size_t /*size*/, const void* /*caller*/);
internal_memalign_hook(size_t alignment,size_t size,const void * caller)969 LIBXSMM_API_INTERN void* internal_memalign_hook(size_t alignment, size_t size, const void* caller)
970 {
971 void* result;
972 # if defined(LIBXSMM_MALLOC_MMAP_HOOK)
973 INTERNAL_MEMALIGN_HOOK(result, LIBXSMM_MALLOC_FLAG_MMAP, alignment, size, caller);
974 # else
975 INTERNAL_MEMALIGN_HOOK(result, LIBXSMM_MALLOC_FLAG_DEFAULT, alignment, size, caller);
976 # endif
977 return result;
978 }
979
980 LIBXSMM_API void* __wrap_memalign(size_t /*alignment*/, size_t /*size*/);
__wrap_memalign(size_t alignment,size_t size)981 LIBXSMM_API void* __wrap_memalign(size_t alignment, size_t size)
982 {
983 void* result;
984 # if defined(LIBXSMM_MALLOC_MMAP_HOOK)
985 INTERNAL_MEMALIGN_HOOK(result, LIBXSMM_MALLOC_FLAG_MMAP, alignment, size, NULL/*caller*/);
986 # else
987 INTERNAL_MEMALIGN_HOOK(result, LIBXSMM_MALLOC_FLAG_DEFAULT, alignment, size, NULL/*caller*/);
988 # endif
989 return result;
990 }
991
992 LIBXSMM_API_INTERN void* internal_malloc_hook(size_t /*size*/, const void* /*caller*/);
internal_malloc_hook(size_t size,const void * caller)993 LIBXSMM_API_INTERN void* internal_malloc_hook(size_t size, const void* caller)
994 {
995 return internal_memalign_hook(0/*auto-alignment*/, size, caller);
996 }
997
998 LIBXSMM_API void* __wrap_malloc(size_t /*size*/);
__wrap_malloc(size_t size)999 LIBXSMM_API void* __wrap_malloc(size_t size)
1000 {
1001 void* result;
1002 # if defined(LIBXSMM_MALLOC_MMAP_HOOK)
1003 INTERNAL_MEMALIGN_HOOK(result, LIBXSMM_MALLOC_FLAG_MMAP, 0/*auto-alignment*/, size, NULL/*caller*/);
1004 # else
1005 INTERNAL_MEMALIGN_HOOK(result, LIBXSMM_MALLOC_FLAG_DEFAULT, 0/*auto-alignment*/, size, NULL/*caller*/);
1006 # endif
1007 return result;
1008 }
1009
1010 #if defined(LIBXSMM_MALLOC_HOOK_CALLOC)
1011 LIBXSMM_API void* __wrap_calloc(size_t /*num*/, size_t /*size*/);
__wrap_calloc(size_t num,size_t size)1012 LIBXSMM_API void* __wrap_calloc(size_t num, size_t size)
1013 {
1014 void* result;
1015 const size_t nbytes = num * size;
1016 # if defined(LIBXSMM_MALLOC_MMAP_HOOK)
1017 INTERNAL_MEMALIGN_HOOK(result, LIBXSMM_MALLOC_FLAG_MMAP, 0/*auto-alignment*/, nbytes, NULL/*caller*/);
1018 # else
1019 INTERNAL_MEMALIGN_HOOK(result, LIBXSMM_MALLOC_FLAG_DEFAULT, 0/*auto-alignment*/, nbytes, NULL/*caller*/);
1020 # endif
1021 /* TODO: signal anonymous/zeroed pages */
1022 if (NULL != result) memset(result, 0, nbytes);
1023 return result;
1024 }
1025 #endif
1026
1027 #if defined(LIBXSMM_MALLOC_HOOK_REALLOC)
1028 LIBXSMM_API_INTERN void* internal_realloc_hook(void* /*ptr*/, size_t /*size*/, const void* /*caller*/);
internal_realloc_hook(void * ptr,size_t size,const void * caller)1029 LIBXSMM_API_INTERN void* internal_realloc_hook(void* ptr, size_t size, const void* caller)
1030 {
1031 void* result;
1032 # if defined(LIBXSMM_MALLOC_MMAP_HOOK)
1033 INTERNAL_REALLOC_HOOK(result, LIBXSMM_MALLOC_FLAG_REALLOC | LIBXSMM_MALLOC_FLAG_MMAP, ptr, size, caller);
1034 # else
1035 INTERNAL_REALLOC_HOOK(result, LIBXSMM_MALLOC_FLAG_REALLOC | LIBXSMM_MALLOC_FLAG_DEFAULT, ptr, size, caller);
1036 # endif
1037 return result;
1038 }
1039
1040 LIBXSMM_API void* __wrap_realloc(void* /*ptr*/, size_t /*size*/);
__wrap_realloc(void * ptr,size_t size)1041 LIBXSMM_API void* __wrap_realloc(void* ptr, size_t size)
1042 {
1043 void* result;
1044 # if defined(LIBXSMM_MALLOC_MMAP_HOOK)
1045 INTERNAL_REALLOC_HOOK(result, LIBXSMM_MALLOC_FLAG_REALLOC | LIBXSMM_MALLOC_FLAG_MMAP, ptr, size, NULL/*caller*/);
1046 # else
1047 INTERNAL_REALLOC_HOOK(result, LIBXSMM_MALLOC_FLAG_REALLOC | LIBXSMM_MALLOC_FLAG_DEFAULT, ptr, size, NULL/*caller*/);
1048 # endif
1049 return result;
1050 }
1051 #endif
1052
1053 LIBXSMM_API_INTERN void internal_free_hook(void* /*ptr*/, const void* /*caller*/);
internal_free_hook(void * ptr,const void * caller)1054 LIBXSMM_API_INTERN void internal_free_hook(void* ptr, const void* caller)
1055 {
1056 INTERNAL_FREE_HOOK(ptr, caller);
1057 }
1058
1059 LIBXSMM_API void __wrap_free(void* /*ptr*/);
__wrap_free(void * ptr)1060 LIBXSMM_API void __wrap_free(void* ptr)
1061 {
1062 INTERNAL_FREE_HOOK(ptr, NULL/*caller*/);
1063 }
1064
1065 #endif /*(defined(LIBXSMM_MALLOC_HOOK_STATIC) || defined(LIBXSMM_MALLOC_HOOK_DYNAMIC))*/
1066
1067 #if defined(LIBXSMM_MALLOC_HOOK_DYNAMIC)
1068 LIBXSMM_API LIBXSMM_ATTRIBUTE_WEAK LIBXSMM_ATTRIBUTE_MALLOC void* memalign(size_t /*alignment*/, size_t /*size*/) LIBXSMM_THROW;
memalign(size_t alignment,size_t size)1069 LIBXSMM_API LIBXSMM_ATTRIBUTE_WEAK LIBXSMM_ATTRIBUTE_MALLOC void* memalign(size_t alignment, size_t size) LIBXSMM_THROW
1070 {
1071 void* result;
1072 # if defined(LIBXSMM_MALLOC_MMAP_HOOK)
1073 INTERNAL_MEMALIGN_HOOK(result, LIBXSMM_MALLOC_FLAG_MMAP, alignment, size, NULL/*caller*/);
1074 # else
1075 INTERNAL_MEMALIGN_HOOK(result, LIBXSMM_MALLOC_FLAG_DEFAULT, alignment, size, NULL/*caller*/);
1076 # endif
1077 return result;
1078 }
1079
1080 LIBXSMM_API LIBXSMM_ATTRIBUTE_WEAK LIBXSMM_ATTRIBUTE_MALLOC void* malloc(size_t /*size*/) LIBXSMM_THROW;
malloc(size_t size)1081 LIBXSMM_API LIBXSMM_ATTRIBUTE_WEAK LIBXSMM_ATTRIBUTE_MALLOC void* malloc(size_t size) LIBXSMM_THROW
1082 {
1083 void* result;
1084 # if defined(LIBXSMM_MALLOC_MMAP_HOOK)
1085 INTERNAL_MEMALIGN_HOOK(result, LIBXSMM_MALLOC_FLAG_MMAP, 0/*auto-alignment*/, size, NULL/*caller*/);
1086 # else
1087 INTERNAL_MEMALIGN_HOOK(result, LIBXSMM_MALLOC_FLAG_DEFAULT, 0/*auto-alignment*/, size, NULL/*caller*/);
1088 # endif
1089 return result;
1090 }
1091
1092 #if defined(LIBXSMM_MALLOC_HOOK_CALLOC)
1093 LIBXSMM_API LIBXSMM_ATTRIBUTE_WEAK LIBXSMM_ATTRIBUTE_MALLOC void* calloc(size_t /*num*/, size_t /*size*/) LIBXSMM_THROW;
calloc(size_t num,size_t size)1094 LIBXSMM_API LIBXSMM_ATTRIBUTE_WEAK LIBXSMM_ATTRIBUTE_MALLOC void* calloc(size_t num, size_t size) LIBXSMM_THROW
1095 {
1096 void* result;
1097 const size_t nbytes = num * size;
1098 # if defined(LIBXSMM_MALLOC_MMAP_HOOK)
1099 INTERNAL_MEMALIGN_HOOK(result, LIBXSMM_MALLOC_FLAG_MMAP, 0/*auto-alignment*/, nbytes, NULL/*caller*/);
1100 # else
1101 INTERNAL_MEMALIGN_HOOK(result, LIBXSMM_MALLOC_FLAG_DEFAULT, 0/*auto-alignment*/, nbytes, NULL/*caller*/);
1102 # endif
1103 /* TODO: signal anonymous/zeroed pages */
1104 if (NULL != result) memset(result, 0, nbytes);
1105 return result;
1106 }
1107 #endif
1108
1109 #if defined(LIBXSMM_MALLOC_HOOK_REALLOC)
1110 LIBXSMM_API LIBXSMM_ATTRIBUTE_WEAK void* realloc(void* /*ptr*/, size_t /*size*/) LIBXSMM_THROW;
realloc(void * ptr,size_t size)1111 LIBXSMM_API LIBXSMM_ATTRIBUTE_WEAK void* realloc(void* ptr, size_t size) LIBXSMM_THROW
1112 {
1113 void* result;
1114 # if defined(LIBXSMM_MALLOC_MMAP_HOOK)
1115 INTERNAL_REALLOC_HOOK(result, LIBXSMM_MALLOC_FLAG_REALLOC | LIBXSMM_MALLOC_FLAG_MMAP, ptr, size, NULL/*caller*/);
1116 # else
1117 INTERNAL_REALLOC_HOOK(result, LIBXSMM_MALLOC_FLAG_REALLOC | LIBXSMM_MALLOC_FLAG_DEFAULT, ptr, size, NULL/*caller*/);
1118 # endif
1119 return result;
1120 }
1121 #endif
1122
1123 LIBXSMM_API LIBXSMM_ATTRIBUTE_WEAK void free(void* /*ptr*/) LIBXSMM_THROW;
free(void * ptr)1124 LIBXSMM_API LIBXSMM_ATTRIBUTE_WEAK void free(void* ptr) LIBXSMM_THROW
1125 {
1126 INTERNAL_FREE_HOOK(ptr, NULL/*caller*/);
1127 }
1128 #endif /*defined(LIBXSMM_MALLOC_HOOK_DYNAMIC)*/
1129
1130
libxsmm_malloc_init(void)1131 LIBXSMM_API_INTERN void libxsmm_malloc_init(void)
1132 {
1133 #if (0 != LIBXSMM_SYNC) && defined(LIBXSMM_MALLOC_SCRATCH_JOIN)
1134 const char *const env = getenv("LIBXSMM_MALLOC_JOIN");
1135 if (NULL != env && 0 != *env) internal_malloc_join = atoi(env);
1136 #endif
1137 #if defined(LIBXSMM_MALLOC_HOOK_DYNAMIC)
1138 # if defined(LIBXSMM_MALLOC_HOOK_QKMALLOC)
1139 void* handle_qkmalloc = NULL;
1140 dlerror(); /* clear an eventual error status */
1141 handle_qkmalloc = dlopen("libqkmalloc.so", RTLD_LAZY);
1142 if (NULL != handle_qkmalloc) {
1143 libxsmm_malloc_fn.memalign.ptr = internal_memalign_malloc;
1144 libxsmm_malloc_fn.malloc.dlsym = dlsym(handle_qkmalloc, "malloc");
1145 if (NULL == dlerror() && NULL != libxsmm_malloc_fn.malloc.dlsym) {
1146 # if defined(LIBXSMM_MALLOC_HOOK_CALLOC)
1147 libxsmm_malloc_fn.calloc.dlsym = dlsym(handle_qkmalloc, "calloc");
1148 if (NULL == dlerror() && NULL != libxsmm_malloc_fn.calloc.dlsym)
1149 # endif
1150 {
1151 # if defined(LIBXSMM_MALLOC_HOOK_REALLOC)
1152 libxsmm_malloc_fn.realloc.dlsym = dlsym(handle_qkmalloc, "realloc");
1153 if (NULL == dlerror() && NULL != libxsmm_malloc_fn.realloc.dlsym)
1154 # endif
1155 {
1156 libxsmm_malloc_fn.free.dlsym = dlsym(handle_qkmalloc, "free");
1157 }
1158 }
1159 }
1160 dlclose(handle_qkmalloc);
1161 }
1162 if (NULL == libxsmm_malloc_fn.free.ptr)
1163 # elif defined(LIBXSMM_MALLOC_HOOK_KMP)
1164 dlerror(); /* clear an eventual error status */
1165 libxsmm_malloc_fn.alignmem.dlsym = dlsym(LIBXSMM_RTLD_NEXT, "kmp_aligned_malloc");
1166 if (NULL == dlerror() && NULL != libxsmm_malloc_fn.alignmem.dlsym) {
1167 libxsmm_malloc_fn.memalign.ptr = internal_memalign_twiddle;
1168 libxsmm_malloc_fn.malloc.dlsym = dlsym(LIBXSMM_RTLD_NEXT, "kmp_malloc");
1169 if (NULL == dlerror() && NULL != libxsmm_malloc_fn.malloc.dlsym) {
1170 # if defined(LIBXSMM_MALLOC_HOOK_CALLOC)
1171 libxsmm_malloc_fn.calloc.dlsym = dlsym(LIBXSMM_RTLD_NEXT, "kmp_calloc");
1172 if (NULL == dlerror() && NULL != libxsmm_malloc_fn.calloc.dlsym)
1173 # endif
1174 {
1175 # if defined(LIBXSMM_MALLOC_HOOK_REALLOC)
1176 libxsmm_malloc_fn.realloc.dlsym = dlsym(LIBXSMM_RTLD_NEXT, "kmp_realloc");
1177 if (NULL == dlerror() && NULL != libxsmm_malloc_fn.realloc.dlsym)
1178 # endif
1179 {
1180 libxsmm_malloc_fn.free.dlsym = dlsym(LIBXSMM_RTLD_NEXT, "kmp_free");
1181 }
1182 }
1183 }
1184 }
1185 if (NULL == libxsmm_malloc_fn.free.ptr)
1186 # endif /*defined(LIBXSMM_MALLOC_HOOK_QKMALLOC)*/
1187 {
1188 dlerror(); /* clear an eventual error status */
1189 # if (defined(LIBXSMM_BUILD) && (1 < (LIBXSMM_BUILD)))
1190 libxsmm_malloc_fn.memalign.dlsym = dlsym(LIBXSMM_RTLD_NEXT, "__libc_memalign");
1191 if (NULL == dlerror() && NULL != libxsmm_malloc_fn.memalign.dlsym) {
1192 libxsmm_malloc_fn.malloc.dlsym = dlsym(LIBXSMM_RTLD_NEXT, "__libc_malloc");
1193 if (NULL == dlerror() && NULL != libxsmm_malloc_fn.malloc.dlsym) {
1194 # if defined(LIBXSMM_MALLOC_HOOK_CALLOC)
1195 libxsmm_malloc_fn.calloc.dlsym = dlsym(LIBXSMM_RTLD_NEXT, "__libc_calloc");
1196 if (NULL == dlerror() && NULL != libxsmm_malloc_fn.calloc.dlsym)
1197 # endif
1198 {
1199 # if defined(LIBXSMM_MALLOC_HOOK_REALLOC)
1200 libxsmm_malloc_fn.realloc.dlsym = dlsym(LIBXSMM_RTLD_NEXT, "__libc_realloc");
1201 if (NULL == dlerror() && NULL != libxsmm_malloc_fn.realloc.dlsym)
1202 # endif
1203 {
1204 libxsmm_malloc_fn.free.dlsym = dlsym(LIBXSMM_RTLD_NEXT, "__libc_free");
1205 }
1206 }
1207 }
1208 }
1209 if (NULL == libxsmm_malloc_fn.free.ptr) {
1210 void* handle_libc = NULL;
1211 dlerror(); /* clear an eventual error status */
1212 handle_libc = dlopen("libc.so." LIBXSMM_STRINGIFY(LIBXSMM_MALLOC_GLIBC), RTLD_LAZY);
1213 if (NULL != handle_libc) {
1214 libxsmm_malloc_fn.memalign.dlsym = dlsym(handle_libc, "__libc_memalign");
1215 if (NULL == dlerror() && NULL != libxsmm_malloc_fn.memalign.dlsym) {
1216 libxsmm_malloc_fn.malloc.dlsym = dlsym(handle_libc, "__libc_malloc");
1217 if (NULL == dlerror() && NULL != libxsmm_malloc_fn.malloc.dlsym) {
1218 # if defined(LIBXSMM_MALLOC_HOOK_CALLOC)
1219 libxsmm_malloc_fn.calloc.dlsym = dlsym(handle_libc, "__libc_calloc");
1220 if (NULL == dlerror() && NULL != libxsmm_malloc_fn.calloc.dlsym)
1221 # endif
1222 {
1223 # if defined(LIBXSMM_MALLOC_HOOK_REALLOC)
1224 libxsmm_malloc_fn.realloc.dlsym = dlsym(handle_libc, "__libc_realloc");
1225 if (NULL == dlerror() && NULL != libxsmm_malloc_fn.realloc.dlsym)
1226 # endif
1227 {
1228 libxsmm_malloc_fn.free.dlsym = dlsym(handle_libc, "__libc_free");
1229 }
1230 }
1231 }
1232 }
1233 dlclose(handle_libc);
1234 }
1235 }
1236 # if 0
1237 { /* attempt to setup deprecated GLIBC hooks */
1238 union { const void* dlsym; void* (**ptr)(size_t, size_t, const void*); } hook_memalign;
1239 dlerror(); /* clear an eventual error status */
1240 hook_memalign.dlsym = dlsym(LIBXSMM_RTLD_NEXT, "__memalign_hook");
1241 if (NULL == dlerror() && NULL != hook_memalign.dlsym) {
1242 union { const void* dlsym; void* (**ptr)(size_t, const void*); } hook_malloc;
1243 hook_malloc.dlsym = dlsym(LIBXSMM_RTLD_NEXT, "__malloc_hook");
1244 if (NULL == dlerror() && NULL != hook_malloc.dlsym) {
1245 # if defined(LIBXSMM_MALLOC_HOOK_REALLOC)
1246 union { const void* dlsym; void* (**ptr)(void*, size_t, const void*); } hook_realloc;
1247 hook_realloc.dlsym = dlsym(LIBXSMM_RTLD_NEXT, "__realloc_hook");
1248 if (NULL == dlerror() && NULL != hook_realloc.dlsym)
1249 # endif
1250 {
1251 union { const void* dlsym; void (**ptr)(void*, const void*); } hook_free;
1252 hook_free.dlsym = dlsym(LIBXSMM_RTLD_NEXT, "__free_hook");
1253 if (NULL == dlerror() && NULL != hook_free.dlsym) {
1254 *hook_memalign.ptr = internal_memalign_hook;
1255 *hook_malloc.ptr = internal_malloc_hook;
1256 # if defined(LIBXSMM_MALLOC_HOOK_REALLOC)
1257 *hook_realloc.ptr = internal_realloc_hook;
1258 # endif
1259 *hook_free.ptr = internal_free_hook;
1260 }
1261 }
1262 }
1263 }
1264 }
1265 # endif
1266 # else /* TODO */
1267 # endif /*(defined(LIBXSMM_BUILD) && (1 < (LIBXSMM_BUILD)))*/
1268 }
1269 if (NULL != libxsmm_malloc_fn.free.ptr) {
1270 # if defined(LIBXSMM_MALLOC_HOOK_IMALLOC)
1271 union { const void* dlsym; libxsmm_malloc_fun* ptr; } i_malloc;
1272 i_malloc.dlsym = dlsym(LIBXSMM_RTLD_NEXT, "i_malloc");
1273 if (NULL == dlerror() && NULL != i_malloc.dlsym) {
1274 # if defined(LIBXSMM_MALLOC_HOOK_CALLOC)
1275 union { const void* dlsym; void* (**ptr)(size_t, size_t); } i_calloc;
1276 i_calloc.dlsym = dlsym(LIBXSMM_RTLD_NEXT, "i_calloc");
1277 if (NULL == dlerror() && NULL != i_calloc.dlsym)
1278 # endif
1279 {
1280 # if defined(LIBXSMM_MALLOC_HOOK_REALLOC)
1281 union { const void* dlsym; libxsmm_realloc_fun* ptr; } i_realloc;
1282 i_realloc.dlsym = dlsym(LIBXSMM_RTLD_NEXT, "i_realloc");
1283 if (NULL == dlerror() && NULL != i_realloc.dlsym)
1284 # endif
1285 {
1286 union { const void* dlsym; libxsmm_free_fun* ptr; } i_free;
1287 i_free.dlsym = dlsym(LIBXSMM_RTLD_NEXT, "i_free");
1288 if (NULL == dlerror() && NULL != i_free.dlsym) {
1289 *i_malloc.ptr = libxsmm_malloc_fn.malloc.ptr;
1290 # if defined(LIBXSMM_MALLOC_HOOK_CALLOC)
1291 *i_calloc.ptr = libxsmm_malloc_fn.calloc.ptr;
1292 # endif
1293 # if defined(LIBXSMM_MALLOC_HOOK_REALLOC)
1294 *i_realloc.ptr = libxsmm_malloc_fn.realloc.ptr;
1295 # endif
1296 *i_free.ptr = libxsmm_malloc_fn.free.ptr;
1297 }
1298 }
1299 }
1300 }
1301 # endif /*defined(LIBXSMM_MALLOC_HOOK_IMALLOC)*/
1302 }
1303 else { /* fall-back: potentially recursive */
1304 # if (defined(LIBXSMM_BUILD) && (1 < (LIBXSMM_BUILD)))
1305 libxsmm_malloc_fn.memalign.ptr = __libc_memalign;
1306 libxsmm_malloc_fn.malloc.ptr = __libc_malloc;
1307 # if defined(LIBXSMM_MALLOC_HOOK_CALLOC)
1308 libxsmm_malloc_fn.calloc.ptr = __libc_calloc;
1309 # endif
1310 # if defined(LIBXSMM_MALLOC_HOOK_REALLOC)
1311 libxsmm_malloc_fn.realloc.ptr = __libc_realloc;
1312 # endif
1313 libxsmm_malloc_fn.free.ptr = __libc_free;
1314 # else
1315 libxsmm_malloc_fn.memalign.ptr = libxsmm_memalign_internal;
1316 libxsmm_malloc_fn.malloc.ptr = malloc;
1317 # if defined(LIBXSMM_MALLOC_HOOK_CALLOC)
1318 libxsmm_malloc_fn.calloc.ptr = calloc;
1319 # endif
1320 # if defined(LIBXSMM_MALLOC_HOOK_REALLOC)
1321 libxsmm_malloc_fn.realloc.ptr = realloc;
1322 # endif
1323 libxsmm_malloc_fn.free.ptr = free;
1324 # endif
1325 }
1326 #endif
1327 }
1328
1329
libxsmm_malloc_finalize(void)1330 LIBXSMM_API_INTERN void libxsmm_malloc_finalize(void)
1331 {
1332 }
1333
1334
libxsmm_xset_default_allocator(LIBXSMM_LOCK_TYPE (LIBXSMM_LOCK)* lock,const void * context,libxsmm_malloc_function malloc_fn,libxsmm_free_function free_fn)1335 LIBXSMM_API_INTERN int libxsmm_xset_default_allocator(LIBXSMM_LOCK_TYPE(LIBXSMM_LOCK)* lock,
1336 const void* context, libxsmm_malloc_function malloc_fn, libxsmm_free_function free_fn)
1337 {
1338 int result = EXIT_SUCCESS;
1339 if (NULL != lock) {
1340 LIBXSMM_INIT
1341 LIBXSMM_LOCK_ACQUIRE(LIBXSMM_LOCK, lock);
1342 }
1343 if (NULL != malloc_fn.function && NULL != free_fn.function) {
1344 libxsmm_default_allocator_context = context;
1345 libxsmm_default_malloc_fn = malloc_fn;
1346 libxsmm_default_free_fn = free_fn;
1347 }
1348 else {
1349 libxsmm_malloc_function internal_malloc_fn;
1350 libxsmm_free_function internal_free_fn;
1351 const void* internal_allocator = NULL;
1352 internal_malloc_fn.function = __real_malloc;
1353 internal_free_fn.function = __real_free;
1354 /*internal_allocator = NULL;*/
1355 if (NULL == malloc_fn.function && NULL == free_fn.function) {
1356 libxsmm_default_allocator_context = internal_allocator;
1357 libxsmm_default_malloc_fn = internal_malloc_fn;
1358 libxsmm_default_free_fn = internal_free_fn;
1359 }
1360 else { /* invalid allocator */
1361 static int error_once = 0;
1362 if (0 != libxsmm_verbosity /* library code is expected to be mute */
1363 && 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
1364 {
1365 fprintf(stderr, "LIBXSMM ERROR: allocator setup without malloc or free function!\n");
1366 }
1367 /* keep any valid (previously instantiated) default allocator */
1368 if (NULL == libxsmm_default_malloc_fn.function || NULL == libxsmm_default_free_fn.function) {
1369 libxsmm_default_allocator_context = internal_allocator;
1370 libxsmm_default_malloc_fn = internal_malloc_fn;
1371 libxsmm_default_free_fn = internal_free_fn;
1372 }
1373 result = EXIT_FAILURE;
1374 }
1375 }
1376 if (NULL != lock) {
1377 LIBXSMM_LOCK_RELEASE(LIBXSMM_LOCK, lock);
1378 }
1379 LIBXSMM_ASSERT(EXIT_SUCCESS == result);
1380 return result;
1381 }
1382
1383
libxsmm_xget_default_allocator(LIBXSMM_LOCK_TYPE (LIBXSMM_LOCK)* lock,const void ** context,libxsmm_malloc_function * malloc_fn,libxsmm_free_function * free_fn)1384 LIBXSMM_API_INTERN int libxsmm_xget_default_allocator(LIBXSMM_LOCK_TYPE(LIBXSMM_LOCK)* lock,
1385 const void** context, libxsmm_malloc_function* malloc_fn, libxsmm_free_function* free_fn)
1386 {
1387 int result = EXIT_SUCCESS;
1388 if (NULL != context || NULL != malloc_fn || NULL != free_fn) {
1389 if (NULL != lock) {
1390 LIBXSMM_INIT
1391 LIBXSMM_LOCK_ACQUIRE(LIBXSMM_LOCK, lock);
1392 }
1393 if (context) *context = libxsmm_default_allocator_context;
1394 if (NULL != malloc_fn) *malloc_fn = libxsmm_default_malloc_fn;
1395 if (NULL != free_fn) *free_fn = libxsmm_default_free_fn;
1396 if (NULL != lock) {
1397 LIBXSMM_LOCK_RELEASE(LIBXSMM_LOCK, lock);
1398 }
1399 }
1400 else if (0 != libxsmm_verbosity) { /* library code is expected to be mute */
1401 static int error_once = 0;
1402 if (1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED)) {
1403 fprintf(stderr, "LIBXSMM ERROR: invalid signature used to get the default memory allocator!\n");
1404 }
1405 result = EXIT_FAILURE;
1406 }
1407 LIBXSMM_ASSERT(EXIT_SUCCESS == result);
1408 return result;
1409 }
1410
1411
libxsmm_xset_scratch_allocator(LIBXSMM_LOCK_TYPE (LIBXSMM_LOCK)* lock,const void * context,libxsmm_malloc_function malloc_fn,libxsmm_free_function free_fn)1412 LIBXSMM_API_INTERN int libxsmm_xset_scratch_allocator(LIBXSMM_LOCK_TYPE(LIBXSMM_LOCK)* lock,
1413 const void* context, libxsmm_malloc_function malloc_fn, libxsmm_free_function free_fn)
1414 {
1415 int result = EXIT_SUCCESS;
1416 static int error_once = 0;
1417 if (NULL != lock) {
1418 LIBXSMM_INIT
1419 LIBXSMM_LOCK_ACQUIRE(LIBXSMM_LOCK, lock);
1420 }
1421 /* make sure the default allocator is setup before adopting it eventually */
1422 if (NULL == libxsmm_default_malloc_fn.function || NULL == libxsmm_default_free_fn.function) {
1423 const libxsmm_malloc_function null_malloc_fn = { NULL };
1424 const libxsmm_free_function null_free_fn = { NULL };
1425 libxsmm_xset_default_allocator(NULL/*already locked*/, NULL/*context*/, null_malloc_fn, null_free_fn);
1426 }
1427 if (NULL == malloc_fn.function && NULL == free_fn.function) { /* adopt default allocator */
1428 libxsmm_scratch_allocator_context = libxsmm_default_allocator_context;
1429 libxsmm_scratch_malloc_fn = libxsmm_default_malloc_fn;
1430 libxsmm_scratch_free_fn = libxsmm_default_free_fn;
1431 }
1432 else if (NULL != malloc_fn.function) {
1433 if (NULL == free_fn.function
1434 && /*warning*/(LIBXSMM_VERBOSITY_WARN <= libxsmm_verbosity || 0 > libxsmm_verbosity)
1435 && 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
1436 {
1437 fprintf(stderr, "LIBXSMM WARNING: scratch allocator setup without free function!\n");
1438 }
1439 libxsmm_scratch_allocator_context = context;
1440 libxsmm_scratch_malloc_fn = malloc_fn;
1441 libxsmm_scratch_free_fn = free_fn; /* NULL allowed */
1442 }
1443 else { /* invalid scratch allocator */
1444 if (0 != libxsmm_verbosity /* library code is expected to be mute */
1445 && 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
1446 {
1447 fprintf(stderr, "LIBXSMM ERROR: invalid scratch allocator (default used)!\n");
1448 }
1449 /* keep any valid (previously instantiated) scratch allocator */
1450 if (NULL == libxsmm_scratch_malloc_fn.function) {
1451 libxsmm_scratch_allocator_context = libxsmm_default_allocator_context;
1452 libxsmm_scratch_malloc_fn = libxsmm_default_malloc_fn;
1453 libxsmm_scratch_free_fn = libxsmm_default_free_fn;
1454 }
1455 result = EXIT_FAILURE;
1456 }
1457 if (NULL != lock) {
1458 LIBXSMM_LOCK_RELEASE(LIBXSMM_LOCK, lock);
1459 }
1460 LIBXSMM_ASSERT(EXIT_SUCCESS == result);
1461 return result;
1462 }
1463
1464
libxsmm_xget_scratch_allocator(LIBXSMM_LOCK_TYPE (LIBXSMM_LOCK)* lock,const void ** context,libxsmm_malloc_function * malloc_fn,libxsmm_free_function * free_fn)1465 LIBXSMM_API_INTERN int libxsmm_xget_scratch_allocator(LIBXSMM_LOCK_TYPE(LIBXSMM_LOCK)* lock,
1466 const void** context, libxsmm_malloc_function* malloc_fn, libxsmm_free_function* free_fn)
1467 {
1468 int result = EXIT_SUCCESS;
1469 if (NULL != context || NULL != malloc_fn || NULL != free_fn) {
1470 if (NULL != lock) {
1471 LIBXSMM_INIT
1472 LIBXSMM_LOCK_ACQUIRE(LIBXSMM_LOCK, lock);
1473 }
1474 if (context) *context = libxsmm_scratch_allocator_context;
1475 if (NULL != malloc_fn) *malloc_fn = libxsmm_scratch_malloc_fn;
1476 if (NULL != free_fn) *free_fn = libxsmm_scratch_free_fn;
1477 if (NULL != lock) {
1478 LIBXSMM_LOCK_RELEASE(LIBXSMM_LOCK, lock);
1479 }
1480 }
1481 else if (0 != libxsmm_verbosity) { /* library code is expected to be mute */
1482 static int error_once = 0;
1483 if (1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED)) {
1484 fprintf(stderr, "LIBXSMM ERROR: invalid signature used to get the scratch memory allocator!\n");
1485 }
1486 result = EXIT_FAILURE;
1487 }
1488 LIBXSMM_ASSERT(EXIT_SUCCESS == result);
1489 return result;
1490 }
1491
1492
libxsmm_set_default_allocator(const void * context,libxsmm_malloc_function malloc_fn,libxsmm_free_function free_fn)1493 LIBXSMM_API int libxsmm_set_default_allocator(const void* context,
1494 libxsmm_malloc_function malloc_fn, libxsmm_free_function free_fn)
1495 {
1496 return libxsmm_xset_default_allocator(&libxsmm_lock_global, context, malloc_fn, free_fn);
1497 }
1498
1499
libxsmm_get_default_allocator(const void ** context,libxsmm_malloc_function * malloc_fn,libxsmm_free_function * free_fn)1500 LIBXSMM_API int libxsmm_get_default_allocator(const void** context,
1501 libxsmm_malloc_function* malloc_fn, libxsmm_free_function* free_fn)
1502 {
1503 return libxsmm_xget_default_allocator(&libxsmm_lock_global, context, malloc_fn, free_fn);
1504 }
1505
1506
libxsmm_set_scratch_allocator(const void * context,libxsmm_malloc_function malloc_fn,libxsmm_free_function free_fn)1507 LIBXSMM_API int libxsmm_set_scratch_allocator(const void* context,
1508 libxsmm_malloc_function malloc_fn, libxsmm_free_function free_fn)
1509 {
1510 return libxsmm_xset_scratch_allocator(&libxsmm_lock_global, context, malloc_fn, free_fn);
1511 }
1512
1513
libxsmm_get_scratch_allocator(const void ** context,libxsmm_malloc_function * malloc_fn,libxsmm_free_function * free_fn)1514 LIBXSMM_API int libxsmm_get_scratch_allocator(const void** context,
1515 libxsmm_malloc_function* malloc_fn, libxsmm_free_function* free_fn)
1516 {
1517 return libxsmm_xget_scratch_allocator(&libxsmm_lock_global, context, malloc_fn, free_fn);
1518 }
1519
1520
libxsmm_get_malloc_xinfo(const void * memory,size_t * size,int * flags,void ** extra)1521 LIBXSMM_API int libxsmm_get_malloc_xinfo(const void* memory, size_t* size, int* flags, void** extra)
1522 {
1523 int result;
1524 #if !defined(NDEBUG)
1525 if (NULL != size || NULL != extra)
1526 #endif
1527 {
1528 const int check = ((NULL == flags || 0 == (LIBXSMM_MALLOC_FLAG_X & *flags)) ? 2 : 1);
1529 const internal_malloc_info_type *const info = internal_malloc_info(memory, check);
1530 if (NULL != info) {
1531 if (NULL != size) *size = info->size;
1532 if (NULL != flags) *flags = info->flags;
1533 if (NULL != extra) *extra = info->pointer;
1534 result = EXIT_SUCCESS;
1535 }
1536 else { /* potentially foreign buffer */
1537 result = (NULL != memory ? EXIT_FAILURE : EXIT_SUCCESS);
1538 if (NULL != size) *size = 0;
1539 if (NULL != flags) *flags = 0;
1540 if (NULL != extra) *extra = 0;
1541 }
1542 }
1543 #if !defined(NDEBUG)
1544 else {
1545 static int error_once = 0;
1546 if (0 != libxsmm_verbosity /* library code is expected to be mute */
1547 && 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
1548 {
1549 fprintf(stderr, "LIBXSMM ERROR: attachment error for memory buffer %p!\n", memory);
1550 }
1551 LIBXSMM_ASSERT_MSG(0/*false*/, "LIBXSMM ERROR: attachment error");
1552 result = EXIT_FAILURE;
1553 }
1554 #endif
1555 return result;
1556 }
1557
1558
1559 #if !defined(_WIN32)
1560
internal_xmalloc_mhint(void * buffer,size_t size)1561 LIBXSMM_API_INLINE void internal_xmalloc_mhint(void* buffer, size_t size)
1562 {
1563 LIBXSMM_ASSERT((MAP_FAILED != buffer && NULL != buffer) || 0 == size);
1564 #if defined(_DEFAULT_SOURCE) || defined(_BSD_SOURCE)
1565 /* proceed after failed madvise (even in case of an error; take what we got) */
1566 /* issue no warning as a failure seems to be related to the kernel version */
1567 madvise(buffer, size, MADV_NORMAL/*MADV_RANDOM*/
1568 # if defined(MADV_NOHUGEPAGE) /* if not available, we then take what we got (THP) */
1569 | ((LIBXSMM_MALLOC_ALIGNMAX * LIBXSMM_MALLOC_ALIGNFCT) > size ? MADV_NOHUGEPAGE : 0)
1570 # endif
1571 # if defined(MADV_DONTDUMP)
1572 | ((LIBXSMM_MALLOC_ALIGNMAX * LIBXSMM_MALLOC_ALIGNFCT) > size ? 0 : MADV_DONTDUMP)
1573 # endif
1574 );
1575 #else
1576 LIBXSMM_UNUSED(buffer); LIBXSMM_UNUSED(size);
1577 #endif
1578 }
1579
1580
internal_xmalloc_xmap(const char * dir,size_t size,int flags,void ** rx)1581 LIBXSMM_API_INLINE void* internal_xmalloc_xmap(const char* dir, size_t size, int flags, void** rx)
1582 {
1583 void* result = MAP_FAILED;
1584 char filename[4096] = LIBXSMM_MALLOC_XMAP_TEMPLATE;
1585 int i = 0;
1586 LIBXSMM_ASSERT(NULL != rx && MAP_FAILED != *rx);
1587 if (NULL != dir && 0 != *dir) {
1588 i = LIBXSMM_SNPRINTF(filename, sizeof(filename), "%s/" LIBXSMM_MALLOC_XMAP_TEMPLATE, dir);
1589 }
1590 if (0 <= i && i < (int)sizeof(filename)) {
1591 /* coverity[secure_temp] */
1592 i = mkstemp(filename);
1593 if (0 <= i) {
1594 if (0 == unlink(filename) && 0 == ftruncate(i, size)) {
1595 const int mflags = (flags | LIBXSMM_MAP_SHARED);
1596 void *const xmap = mmap(*rx, size, PROT_READ | PROT_EXEC, mflags, i, 0/*offset*/);
1597 if (MAP_FAILED != xmap) {
1598 LIBXSMM_ASSERT(NULL != xmap);
1599 result = mmap(NULL, size, PROT_READ | PROT_WRITE, mflags, i, 0/*offset*/);
1600 if (MAP_FAILED != result) {
1601 LIBXSMM_ASSERT(NULL != result);
1602 internal_xmalloc_mhint(xmap, size);
1603 *rx = xmap;
1604 }
1605 else {
1606 munmap(xmap, size);
1607 *rx = NULL;
1608 }
1609 }
1610 }
1611 close(i);
1612 }
1613 }
1614 return result;
1615 }
1616
1617 #endif /*!defined(_WIN32)*/
1618
1619
internal_xrealloc(void ** ptr,internal_malloc_info_type ** info,size_t size,libxsmm_realloc_fun realloc_fn,libxsmm_free_fun free_fn)1620 LIBXSMM_API_INLINE void* internal_xrealloc(void** ptr, internal_malloc_info_type** info, size_t size,
1621 libxsmm_realloc_fun realloc_fn, libxsmm_free_fun free_fn)
1622 {
1623 char *const base = (char*)(NULL != *info ? (*info)->pointer : *ptr), *result;
1624 LIBXSMM_ASSERT(NULL != *ptr);
1625 /* may implicitly invalidate info */
1626 result = (char*)realloc_fn(base, size);
1627 if (result == base) { /* signal no-copy */
1628 LIBXSMM_ASSERT(NULL != result);
1629 *info = NULL; /* no delete */
1630 *ptr = NULL; /* no copy */
1631 }
1632 else if (NULL != result) { /* copy */
1633 const size_t offset_src = (const char*)*ptr - base;
1634 *ptr = result + offset_src; /* copy */
1635 *info = NULL; /* no delete */
1636 }
1637 #if !defined(NDEBUG) && 0
1638 else { /* failed */
1639 if (NULL != *info) {
1640 /* implicitly invalidates info */
1641 internal_xfree(*ptr, *info);
1642 }
1643 else { /* foreign pointer */
1644 free_fn(*ptr);
1645 }
1646 *info = NULL; /* no delete */
1647 *ptr = NULL; /* no copy */
1648 }
1649 #else
1650 LIBXSMM_UNUSED(free_fn);
1651 #endif
1652 return result;
1653 }
1654
1655
1656 LIBXSMM_API_INTERN void* internal_xmalloc(void** /*ptr*/, internal_malloc_info_type** /*info*/, size_t /*size*/,
1657 const void* /*context*/, libxsmm_malloc_function /*malloc_fn*/, libxsmm_free_function /*free_fn*/);
internal_xmalloc(void ** ptr,internal_malloc_info_type ** info,size_t size,const void * context,libxsmm_malloc_function malloc_fn,libxsmm_free_function free_fn)1658 LIBXSMM_API_INTERN void* internal_xmalloc(void** ptr, internal_malloc_info_type** info, size_t size,
1659 const void* context, libxsmm_malloc_function malloc_fn, libxsmm_free_function free_fn)
1660 {
1661 void* result;
1662 LIBXSMM_ASSERT(NULL != ptr && NULL != info && NULL != malloc_fn.function);
1663 if (NULL == *ptr) {
1664 result = (NULL == context
1665 ? malloc_fn.function(size)
1666 : malloc_fn.ctx_form(size, context));
1667 }
1668 else { /* reallocate */
1669 if (NULL != free_fn.function /* prefer free_fn since it is part of pointer-info */
1670 ? (__real_free == free_fn.function || free == free_fn.function)
1671 : (__real_malloc == malloc_fn.function || malloc == malloc_fn.function))
1672 {
1673 #if defined(LIBXSMM_MALLOC_HOOK_REALLOC)
1674 result = internal_xrealloc(ptr, info, size, __real_realloc, __real_free);
1675 #else
1676 result = internal_xrealloc(ptr, info, size, realloc, __real_free);
1677 #endif
1678 }
1679 else { /* fall-back with regular allocation */
1680 result = (NULL == context
1681 ? malloc_fn.function(size)
1682 : malloc_fn.ctx_form(size, context));
1683 if (NULL == result) { /* failed */
1684 if (NULL != *info) {
1685 internal_xfree(*ptr, *info);
1686 }
1687 else { /* foreign pointer */
1688 (NULL != free_fn.function ? free_fn.function : __real_free)(*ptr);
1689 }
1690 *ptr = NULL; /* safe delete */
1691 }
1692 }
1693 }
1694 return result;
1695 }
1696
1697
libxsmm_xmalloc(void ** memory,size_t size,size_t alignment,int flags,const void * extra,size_t extra_size)1698 LIBXSMM_API_INTERN int libxsmm_xmalloc(void** memory, size_t size, size_t alignment,
1699 int flags, const void* extra, size_t extra_size)
1700 {
1701 int result = EXIT_SUCCESS;
1702 #if !defined(NDEBUG)
1703 if (NULL != memory)
1704 #endif
1705 {
1706 static int error_once = 0;
1707 if (0 != size) {
1708 size_t alloc_alignment = 0, alloc_size = 0, max_preserve = 0;
1709 internal_malloc_info_type* info = NULL;
1710 void* buffer = NULL, * reloc = NULL;
1711 /* ATOMIC BEGIN: this region should be atomic/locked */
1712 const void* context = libxsmm_default_allocator_context;
1713 libxsmm_malloc_function malloc_fn = libxsmm_default_malloc_fn;
1714 libxsmm_free_function free_fn = libxsmm_default_free_fn;
1715 if (0 != (LIBXSMM_MALLOC_FLAG_SCRATCH & flags)) {
1716 context = libxsmm_scratch_allocator_context;
1717 malloc_fn = libxsmm_scratch_malloc_fn;
1718 free_fn = libxsmm_scratch_free_fn;
1719 #if defined(LIBXSMM_MALLOC_MMAP_SCRATCH)
1720 flags |= LIBXSMM_MALLOC_FLAG_MMAP;
1721 #endif
1722 }
1723 if ((0 != (internal_malloc_kind & 1) && 0 < internal_malloc_kind)
1724 || NULL == malloc_fn.function || NULL == free_fn.function)
1725 {
1726 malloc_fn.function = __real_malloc;
1727 free_fn.function = __real_free;
1728 context = NULL;
1729 }
1730 /* ATOMIC END: this region should be atomic */
1731 flags |= LIBXSMM_MALLOC_FLAG_RW; /* normalize given flags since flags=0 is accepted as well */
1732 if (0 != (LIBXSMM_MALLOC_FLAG_REALLOC & flags) && NULL != *memory) {
1733 info = internal_malloc_info(*memory, 2/*check*/);
1734 if (NULL != info) {
1735 max_preserve = info->size;
1736 }
1737 else { /* reallocation of unknown allocation */
1738 flags &= ~LIBXSMM_MALLOC_FLAG_MMAP;
1739 }
1740 }
1741 else *memory = NULL;
1742 #if !defined(LIBXSMM_MALLOC_MMAP)
1743 if (0 == (LIBXSMM_MALLOC_FLAG_X & flags) && 0 == (LIBXSMM_MALLOC_FLAG_MMAP & flags)) {
1744 alloc_alignment = (0 == (LIBXSMM_MALLOC_FLAG_REALLOC & flags) ? libxsmm_alignment(size, alignment) : alignment);
1745 alloc_size = size + extra_size + sizeof(internal_malloc_info_type) + alloc_alignment - 1;
1746 buffer = internal_xmalloc(memory, &info, alloc_size, context, malloc_fn, free_fn);
1747 }
1748 else
1749 #endif
1750 if (NULL == info || size != info->size) {
1751 #if defined(_WIN32) ||defined(__CYGWIN__)
1752 const int mflags = (0 != (LIBXSMM_MALLOC_FLAG_X & flags) ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE);
1753 static SIZE_T alloc_alignmax = 0, alloc_pagesize = 0;
1754 if (0 == alloc_alignmax) { /* first/one time */
1755 SYSTEM_INFO system_info;
1756 GetSystemInfo(&system_info);
1757 alloc_pagesize = system_info.dwPageSize;
1758 alloc_alignmax = GetLargePageMinimum();
1759 }
1760 if ((LIBXSMM_MALLOC_ALIGNMAX * LIBXSMM_MALLOC_ALIGNFCT) <= size) { /* attempt to use large pages */
1761 HANDLE process_token;
1762 alloc_alignment = (NULL == info
1763 ? (0 == alignment ? alloc_alignmax : libxsmm_lcm(alignment, alloc_alignmax))
1764 : libxsmm_lcm(alignment, alloc_alignmax));
1765 alloc_size = LIBXSMM_UP2(size + extra_size + sizeof(internal_malloc_info_type) + alloc_alignment - 1, alloc_alignmax);
1766 if (TRUE == OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &process_token)) {
1767 TOKEN_PRIVILEGES tp;
1768 if (TRUE == LookupPrivilegeValue(NULL, TEXT("SeLockMemoryPrivilege"), &tp.Privileges[0].Luid)) {
1769 tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; tp.PrivilegeCount = 1; /* enable privilege */
1770 if (TRUE == AdjustTokenPrivileges(process_token, FALSE, &tp, 0, (PTOKEN_PRIVILEGES)NULL, 0)
1771 && ERROR_SUCCESS == GetLastError()/*may has failed (regardless of TRUE)*/)
1772 {
1773 /* VirtualAlloc cannot be used to reallocate memory */
1774 buffer = VirtualAlloc(NULL, alloc_size, MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES, mflags);
1775 }
1776 tp.Privileges[0].Attributes = 0; /* disable privilege */
1777 AdjustTokenPrivileges(process_token, FALSE, &tp, 0, (PTOKEN_PRIVILEGES)NULL, 0);
1778 }
1779 CloseHandle(process_token);
1780 }
1781 }
1782 else { /* small allocation using regular page-size */
1783 alloc_alignment = (NULL == info ? libxsmm_alignment(size, alignment) : alignment);
1784 alloc_size = LIBXSMM_UP2(size + extra_size + sizeof(internal_malloc_info_type) + alloc_alignment - 1, alloc_pagesize);
1785 }
1786 if (NULL == buffer) { /* small allocation or retry with regular page size */
1787 /* VirtualAlloc cannot be used to reallocate memory */
1788 buffer = VirtualAlloc(NULL, alloc_size, MEM_RESERVE | MEM_COMMIT, mflags);
1789 }
1790 if (NULL != buffer) {
1791 flags |= LIBXSMM_MALLOC_FLAG_MMAP; /* select the corresponding deallocation */
1792 }
1793 else if (0 == (LIBXSMM_MALLOC_FLAG_MMAP & flags)) { /* fall-back allocation */
1794 buffer = internal_xmalloc(memory, &info, alloc_size, context, malloc_fn, free_fn);
1795 }
1796 #else /* !defined(_WIN32) */
1797 # if defined(MAP_HUGETLB)
1798 static size_t limit_hugetlb = LIBXSMM_SCRATCH_UNLIMITED;
1799 # endif
1800 # if defined(MAP_LOCKED)
1801 static size_t limit_plocked = LIBXSMM_SCRATCH_UNLIMITED;
1802 # endif
1803 # if defined(MAP_32BIT)
1804 static int map32 = 1;
1805 # endif
1806 int mflags = 0
1807 # if defined(MAP_UNINITIALIZED) && 0/*fails with WSL*/
1808 | MAP_UNINITIALIZED /* unlikely available */
1809 # endif
1810 # if defined(MAP_NORESERVE)
1811 | (LIBXSMM_MALLOC_ALIGNMAX < size ? 0 : MAP_NORESERVE)
1812 # endif
1813 # if defined(MAP_32BIT)
1814 | ((0 != (LIBXSMM_MALLOC_FLAG_X & flags) && 0 != map32
1815 && LIBXSMM_X86_AVX512_CORE > libxsmm_target_archid
1816 && LIBXSMM_X86_AVX512 < libxsmm_target_archid) ? MAP_32BIT : 0)
1817 # endif
1818 # if defined(MAP_HUGETLB) /* may fail depending on system settings */
1819 | ((0 == (LIBXSMM_MALLOC_FLAG_X & flags)
1820 && ((LIBXSMM_MALLOC_ALIGNMAX * LIBXSMM_MALLOC_ALIGNFCT) <= size ||
1821 0 != (LIBXSMM_MALLOC_FLAG_PHUGE & flags))
1822 && (internal_malloc_hugetlb + size) < limit_hugetlb) ? MAP_HUGETLB : 0)
1823 # endif
1824 # if defined(MAP_LOCKED) && !defined(LIBXSMM_MALLOC_LOCK_ONFAULT)
1825 | ((0 == (LIBXSMM_MALLOC_FLAG_X & flags)
1826 && (internal_malloc_plocked + size) < limit_plocked) ? MAP_LOCKED : 0)
1827 # endif
1828 ; /* mflags */
1829 # if defined(MAP_POPULATE)
1830 { static int prefault = 0;
1831 if (0 == prefault) { /* prefault only on Linux 3.10.0-327 (and later) to avoid data race in page-fault handler */
1832 struct utsname osinfo; unsigned int version_major = 3, version_minor = 10, version_update = 0, version_patch = 327;
1833 if (0 <= uname(&osinfo) && 0 == strcmp("Linux", osinfo.sysname)
1834 && 4 == sscanf(osinfo.release, "%u.%u.%u-%u", &version_major, &version_minor, &version_update, &version_patch)
1835 && LIBXSMM_VERSION4(3, 10, 0, 327) > LIBXSMM_VERSION4(version_major, version_minor, version_update, version_patch))
1836 {
1837 mflags |= MAP_POPULATE; prefault = 1;
1838 }
1839 else prefault = -1;
1840 }
1841 else if (1 == prefault) mflags |= MAP_POPULATE;
1842 }
1843 # endif
1844 /* make allocated size at least a multiple of the smallest page-size to avoid split-pages (unmap!) */
1845 alloc_alignment = libxsmm_lcm(0 == alignment ? libxsmm_alignment(size, alignment) : alignment, LIBXSMM_PAGE_MINSIZE);
1846 alloc_size = LIBXSMM_UP2(size + extra_size + sizeof(internal_malloc_info_type) + alloc_alignment - 1, alloc_alignment);
1847 if (0 == (LIBXSMM_MALLOC_FLAG_X & flags)) { /* anonymous and non-executable */
1848 # if defined(MAP_32BIT)
1849 LIBXSMM_ASSERT(0 == (MAP_32BIT & mflags));
1850 # endif
1851 # if 0
1852 LIBXSMM_ASSERT(NULL != info || NULL == *memory); /* no memory mapping of foreign pointer */
1853 # endif
1854 buffer = mmap(NULL == info ? NULL : info->pointer, alloc_size, PROT_READ | PROT_WRITE,
1855 MAP_PRIVATE | LIBXSMM_MAP_ANONYMOUS | mflags, -1, 0/*offset*/);
1856 # if defined(MAP_HUGETLB)
1857 INTERNAL_XMALLOC_KIND(MAP_HUGETLB, "huge-page", LIBXSMM_MALLOC_FLAG_PHUGE, flags, mflags,
1858 internal_malloc_hugetlb, limit_hugetlb, info, alloc_size, buffer);
1859 # endif
1860 # if defined(MAP_LOCKED)
1861 # if !defined(LIBXSMM_MALLOC_LOCK_ONFAULT)
1862 INTERNAL_XMALLOC_KIND(MAP_LOCKED, "locked-page", LIBXSMM_MALLOC_FLAG_PLOCK, flags, mflags,
1863 internal_malloc_plocked, limit_plocked, info, alloc_size, buffer);
1864 # else
1865 if (0 != (MAP_LOCKED & mflags) && MAP_FAILED != buffer) {
1866 LIBXSMM_ASSERT(NULL != buffer);
1867 # if 0 /* mlock2 is potentially not exposed */
1868 if (0 == mlock2(buffer, alloc_size, MLOCK_ONFAULT))
1869 # else
1870 if (0 == syscall(SYS_mlock2, buffer, alloc_size, MLOCK_ONFAULT))
1871 # endif
1872 {
1873 LIBXSMM_ATOMIC_ADD_FETCH(&internal_malloc_plocked, alloc_size, LIBXSMM_ATOMIC_RELAXED);
1874 flags |= LIBXSMM_MALLOC_FLAG_PLOCK;
1875 }
1876 else { /* update watermark */
1877 INTERNAL_XMALLOC_WATERMARK("locked-page", internal_malloc_plocked, limit_plocked, alloc_size);
1878 }
1879 }
1880 # endif
1881 # endif
1882 }
1883 else { /* executable buffer requested */
1884 static /*LIBXSMM_TLS*/ int fallback = -1; /* fall-back allocation method */
1885 # if defined(MAP_HUGETLB)
1886 LIBXSMM_ASSERT(0 == (MAP_HUGETLB & mflags));
1887 # endif
1888 # if defined(MAP_LOCKED)
1889 LIBXSMM_ASSERT(0 == (MAP_LOCKED & mflags));
1890 # endif
1891 if (0 > (int)LIBXSMM_ATOMIC_LOAD(&fallback, LIBXSMM_ATOMIC_RELAXED)) {
1892 const char *const env = getenv("LIBXSMM_SE");
1893 LIBXSMM_ATOMIC_STORE(&fallback, NULL == env
1894 /* libxsmm_se decides */
1895 ? (0 == libxsmm_se ? LIBXSMM_MALLOC_FINAL : LIBXSMM_MALLOC_FALLBACK)
1896 /* user's choice takes precedence */
1897 : ('0' != *env ? LIBXSMM_MALLOC_FALLBACK : LIBXSMM_MALLOC_FINAL),
1898 LIBXSMM_ATOMIC_SEQ_CST);
1899 LIBXSMM_ASSERT(0 <= fallback);
1900 }
1901 INTERNAL_XMALLOC(0, fallback, "TMPDIR", "/tmp", map32, mflags, alloc_size, buffer, &reloc); /* 1st try */
1902 if (1 <= fallback) { /* continue with fall-back */
1903 INTERNAL_XMALLOC(1, fallback, "JITDUMPDIR", "", map32, mflags, alloc_size, buffer, &reloc); /* 2nd try */
1904 if (2 <= fallback) { /* continue with fall-back */
1905 INTERNAL_XMALLOC(2, fallback, "HOME", "", map32, mflags, alloc_size, buffer, &reloc); /* 3rd try */
1906 if (3 <= fallback) { /* continue with fall-back */
1907 if (3 == fallback) { /* 4th try */
1908 buffer = mmap(reloc, alloc_size, PROT_READ | PROT_WRITE | PROT_EXEC,
1909 # if defined(MAP_32BIT)
1910 MAP_PRIVATE | LIBXSMM_MAP_ANONYMOUS | (mflags & ~MAP_32BIT),
1911 # else
1912 MAP_PRIVATE | LIBXSMM_MAP_ANONYMOUS | mflags,
1913 # endif
1914 -1, 0/*offset*/);
1915 if (MAP_FAILED == buffer) fallback = 4;
1916 }
1917 if (4 == fallback && MAP_FAILED != buffer) { /* final */
1918 LIBXSMM_ASSERT(fallback == LIBXSMM_MALLOC_FINAL + 1);
1919 buffer = MAP_FAILED; /* trigger final fall-back */
1920 }
1921 }
1922 }
1923 }
1924 }
1925 if (MAP_FAILED != buffer && NULL != buffer) {
1926 flags |= LIBXSMM_MALLOC_FLAG_MMAP; /* select deallocation */
1927 }
1928 else { /* allocation failed */
1929 if (0 == (LIBXSMM_MALLOC_FLAG_MMAP & flags)) { /* ultimate fall-back */
1930 buffer = (NULL != malloc_fn.function
1931 ? (NULL == context ? malloc_fn.function(alloc_size) : malloc_fn.ctx_form(alloc_size, context))
1932 : (NULL));
1933 }
1934 reloc = NULL;
1935 }
1936 if (MAP_FAILED != buffer && NULL != buffer) {
1937 internal_xmalloc_mhint(buffer, alloc_size);
1938 }
1939 #endif /* !defined(_WIN32) */
1940 }
1941 else { /* reallocation of the same pointer and size */
1942 alloc_size = size + extra_size + sizeof(internal_malloc_info_type) + alignment - 1;
1943 if (NULL != info) {
1944 buffer = info->pointer;
1945 flags |= info->flags;
1946 }
1947 else {
1948 flags |= LIBXSMM_MALLOC_FLAG_MMAP;
1949 buffer = *memory;
1950 }
1951 alloc_alignment = alignment;
1952 *memory = NULL; /* signal no-copy */
1953 }
1954 if (
1955 #if !defined(_WIN32) && !defined(__clang_analyzer__)
1956 MAP_FAILED != buffer &&
1957 #endif
1958 NULL != buffer)
1959 {
1960 char *const cbuffer = (char*)buffer, *const aligned = LIBXSMM_ALIGN(
1961 cbuffer + extra_size + sizeof(internal_malloc_info_type), alloc_alignment);
1962 internal_malloc_info_type *const buffer_info = (internal_malloc_info_type*)(
1963 aligned - sizeof(internal_malloc_info_type));
1964 LIBXSMM_ASSERT((aligned + size) <= (cbuffer + alloc_size));
1965 LIBXSMM_ASSERT(0 < alloc_alignment);
1966 /* former content must be preserved prior to setup of buffer_info */
1967 if (NULL != *memory) { /* preserve/copy previous content */
1968 #if 0
1969 LIBXSMM_ASSERT(0 != (LIBXSMM_MALLOC_FLAG_REALLOC & flags));
1970 #endif
1971 /* content behind foreign pointers is not explicitly preserved; buffers may overlap */
1972 memmove(aligned, *memory, LIBXSMM_MIN(max_preserve, size));
1973 if (NULL != info /* known allocation (non-foreign pointer) */
1974 && EXIT_SUCCESS != internal_xfree(*memory, info) /* !libxsmm_free */
1975 && 0 != libxsmm_verbosity /* library code is expected to be mute */
1976 && 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
1977 { /* display some extra context of the failure (reallocation) */
1978 fprintf(stderr, "LIBXSMM ERROR: memory reallocation failed to release memory!\n");
1979 }
1980 }
1981 if (NULL != extra || 0 == extra_size) {
1982 const char *const src = (const char*)extra;
1983 int i; for (i = 0; i < (int)extra_size; ++i) cbuffer[i] = src[i];
1984 }
1985 else if (0 != libxsmm_verbosity /* library code is expected to be mute */
1986 && 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
1987 {
1988 fprintf(stderr, "LIBXSMM ERROR: incorrect extraneous data specification!\n");
1989 /* no EXIT_FAILURE because valid buffer is returned */
1990 }
1991 if (0 == (LIBXSMM_MALLOC_FLAG_X & flags)) { /* update statistics */
1992 if (0 == (LIBXSMM_MALLOC_FLAG_PRIVATE & flags)) { /* public */
1993 if (0 != (LIBXSMM_MALLOC_FLAG_SCRATCH & flags)) { /* scratch */
1994 const size_t watermark = LIBXSMM_ATOMIC(LIBXSMM_ATOMIC_ADD_FETCH, LIBXSMM_BITS)(
1995 &internal_malloc_public_cur, alloc_size, LIBXSMM_ATOMIC_RELAXED);
1996 if (internal_malloc_public_max < watermark) internal_malloc_public_max = watermark; /* accept data-race */
1997 }
1998 else { /* local */
1999 const size_t watermark = LIBXSMM_ATOMIC(LIBXSMM_ATOMIC_ADD_FETCH, LIBXSMM_BITS)(
2000 &internal_malloc_local_cur, alloc_size, LIBXSMM_ATOMIC_RELAXED);
2001 if (internal_malloc_local_max < watermark) internal_malloc_local_max = watermark; /* accept data-race */
2002 }
2003 }
2004 else { /* private */
2005 const size_t watermark = LIBXSMM_ATOMIC(LIBXSMM_ATOMIC_ADD_FETCH, LIBXSMM_BITS)(
2006 &internal_malloc_private_cur, alloc_size, LIBXSMM_ATOMIC_RELAXED);
2007 if (internal_malloc_private_max < watermark) internal_malloc_private_max = watermark; /* accept data-race */
2008 }
2009 }
2010 /* keep allocation function on record */
2011 if (0 == (LIBXSMM_MALLOC_FLAG_MMAP & flags)) {
2012 buffer_info->context = context;
2013 buffer_info->free = free_fn;
2014 }
2015 else {
2016 buffer_info->free.function = NULL;
2017 buffer_info->context = NULL;
2018 }
2019 buffer_info->size = size; /* record user's size rather than allocated size */
2020 buffer_info->pointer = buffer;
2021 buffer_info->reloc = reloc;
2022 buffer_info->flags = flags;
2023 #if defined(LIBXSMM_VTUNE)
2024 buffer_info->code_id = 0;
2025 #endif /* info must be initialized to calculate correct checksum */
2026 #if !defined(LIBXSMM_MALLOC_CRC_OFF)
2027 # if defined(LIBXSMM_MALLOC_CRC_LIGHT)
2028 buffer_info->hash = LIBXSMM_CRC32U(LIBXSMM_BITS)(LIBXSMM_MALLOC_SEED, &buffer_info);
2029 # else
2030 buffer_info->hash = libxsmm_crc32(LIBXSMM_MALLOC_SEED, buffer_info,
2031 (unsigned int)(((char*)&buffer_info->hash) - ((char*)buffer_info)));
2032 # endif
2033 #endif /* finally commit/return allocated buffer */
2034 *memory = aligned;
2035 }
2036 else {
2037 if (0 != libxsmm_verbosity /* library code is expected to be mute */
2038 && 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
2039 {
2040 char alloc_size_buffer[32];
2041 libxsmm_format_size(alloc_size_buffer, sizeof(alloc_size_buffer), alloc_size, "KM", "B", 10);
2042 fprintf(stderr, "LIBXSMM ERROR: failed to allocate %s with flag=%i!\n", alloc_size_buffer, flags);
2043 }
2044 result = EXIT_FAILURE;
2045 *memory = NULL;
2046 }
2047 }
2048 else {
2049 if ((LIBXSMM_VERBOSITY_HIGH <= libxsmm_verbosity || 0 > libxsmm_verbosity) /* library code is expected to be mute */
2050 && 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
2051 {
2052 fprintf(stderr, "LIBXSMM WARNING: zero-sized memory allocation detected!\n");
2053 }
2054 *memory = NULL; /* no EXIT_FAILURE */
2055 }
2056 }
2057 #if !defined(NDEBUG)
2058 else if (0 != size) {
2059 result = EXIT_FAILURE;
2060 }
2061 #endif
2062 return result;
2063 }
2064
2065
libxsmm_xfree(const void * memory,int check)2066 LIBXSMM_API_INTERN void libxsmm_xfree(const void* memory, int check)
2067 {
2068 #if (!defined(LIBXSMM_MALLOC_HOOK_STATIC) && !defined(LIBXSMM_MALLOC_HOOK_DYNAMIC)) || defined(_DEBUG)
2069 static int error_once = 0;
2070 #endif
2071 /*const*/ internal_malloc_info_type *const info = internal_malloc_info(memory, check);
2072 if (NULL != info) { /* !libxsmm_free */
2073 #if (!defined(LIBXSMM_MALLOC_HOOK_STATIC) && !defined(LIBXSMM_MALLOC_HOOK_DYNAMIC)) || defined(_DEBUG)
2074 if (EXIT_SUCCESS != internal_xfree(memory, info)) {
2075 if ( 0 != libxsmm_verbosity /* library code is expected to be mute */
2076 && 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
2077 {
2078 fprintf(stderr, "LIBXSMM ERROR: memory deallocation failed!\n");
2079 }
2080 }
2081 #else
2082 internal_xfree(memory, info);
2083 #endif
2084 }
2085 else if (NULL != memory) {
2086 #if 1
2087 union { const void* const_ptr; void* ptr; } cast;
2088 cast.const_ptr = memory; /* C-cast still warns */
2089 __real_free(cast.ptr);
2090 #endif
2091 #if (!defined(LIBXSMM_MALLOC_HOOK_STATIC) && !defined(LIBXSMM_MALLOC_HOOK_DYNAMIC)) || defined(_DEBUG)
2092 if ( 0 != libxsmm_verbosity /* library code is expected to be mute */
2093 && 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
2094 {
2095 fprintf(stderr, "LIBXSMM ERROR: deallocation does not match allocation!\n");
2096 }
2097 #endif
2098 }
2099 }
2100
2101
2102 #if defined(LIBXSMM_VTUNE)
internal_get_vtune_jitdesc(const void * code,unsigned int code_id,size_t code_size,const char * code_name,LIBXSMM_VTUNE_JIT_DESC_TYPE * desc)2103 LIBXSMM_API_INLINE void internal_get_vtune_jitdesc(const void* code,
2104 unsigned int code_id, size_t code_size, const char* code_name,
2105 LIBXSMM_VTUNE_JIT_DESC_TYPE* desc)
2106 {
2107 LIBXSMM_ASSERT(NULL != code && 0 != code_id && 0 != code_size && NULL != desc);
2108 desc->method_id = code_id;
2109 /* incorrect constness (method_name) */
2110 desc->method_name = (char*)code_name;
2111 /* incorrect constness (method_load_address) */
2112 desc->method_load_address = (void*)code;
2113 desc->method_size = code_size;
2114 desc->line_number_size = 0;
2115 desc->line_number_table = NULL;
2116 desc->class_file_name = NULL;
2117 desc->source_file_name = NULL;
2118 # if (2 <= LIBXSMM_VTUNE_JITVERSION)
2119 desc->module_name = "libxsmm.jit";
2120 # endif
2121 }
2122 #endif
2123
2124
libxsmm_malloc_attrib(void ** memory,int flags,const char * name)2125 LIBXSMM_API_INTERN int libxsmm_malloc_attrib(void** memory, int flags, const char* name)
2126 {
2127 internal_malloc_info_type *const info = (NULL != memory ? internal_malloc_info(*memory, 0/*no check*/) : NULL);
2128 int result = EXIT_SUCCESS;
2129 static int error_once = 0;
2130 if (NULL != info) {
2131 void *const buffer = info->pointer;
2132 const size_t size = info->size;
2133 #if defined(_WIN32)
2134 LIBXSMM_ASSERT(NULL != buffer || 0 == size);
2135 #else
2136 LIBXSMM_ASSERT((NULL != buffer && MAP_FAILED != buffer) || 0 == size);
2137 #endif
2138 flags |= (info->flags & ~LIBXSMM_MALLOC_FLAG_RWX); /* merge with current flags */
2139 /* quietly keep the read permission, but eventually revoke write permissions */
2140 if (0 == (LIBXSMM_MALLOC_FLAG_W & flags) || 0 != (LIBXSMM_MALLOC_FLAG_X & flags)) {
2141 const size_t alignment = (size_t)(((const char*)(*memory)) - ((const char*)buffer));
2142 const size_t alloc_size = size + alignment;
2143 if (0 == (LIBXSMM_MALLOC_FLAG_X & flags)) { /* data-buffer; non-executable */
2144 #if defined(_WIN32)
2145 /* TODO: implement memory protection under Microsoft Windows */
2146 LIBXSMM_UNUSED(alloc_size);
2147 #else
2148 if (EXIT_SUCCESS != mprotect(buffer, alloc_size/*entire memory region*/, PROT_READ)
2149 && (LIBXSMM_VERBOSITY_HIGH <= libxsmm_verbosity || 0 > libxsmm_verbosity) /* library code is expected to be mute */
2150 && 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
2151 {
2152 fprintf(stderr, "LIBXSMM WARNING: read-only request for buffer failed!\n");
2153 }
2154 #endif
2155 }
2156 else { /* executable buffer requested */
2157 void *const code_ptr = NULL != info->reloc ? ((void*)(((char*)info->reloc) + alignment)) : *memory;
2158 LIBXSMM_ASSERT(0 != (LIBXSMM_MALLOC_FLAG_X & flags));
2159 if (name && *name) { /* profiler support requested */
2160 if (0 > libxsmm_verbosity) { /* avoid dump when only the profiler is enabled */
2161 FILE* code_file = fopen(name, "rb");
2162 int diff = 0;
2163 if (NULL == code_file) { /* file does not exist */
2164 code_file = fopen(name, "wb");
2165 if (NULL != code_file) { /* dump byte-code into a file */
2166 fwrite(code_ptr, 1, size, code_file);
2167 fclose(code_file);
2168 }
2169 }
2170 else { /* check existing file */
2171 const char* check_a = (const char*)code_ptr;
2172 char check_b[4096];
2173 size_t rest = size;
2174 do {
2175 const size_t n = fread(check_b, 1, LIBXSMM_MIN(sizeof(check_b), rest), code_file);
2176 diff += memcmp(check_a, check_b, LIBXSMM_MIN(sizeof(check_b), n));
2177 check_a += n;
2178 rest -= n;
2179 } while (0 < rest && 0 == diff);
2180 fclose(code_file);
2181 }
2182 fprintf(stderr, "LIBXSMM-JIT-DUMP(ptr:file) %p : %s\n", code_ptr, name);
2183 if (0 != diff) { /* override existing dump and warn about erroneous condition */
2184 fprintf(stderr, "LIBXSMM ERROR: %s is shared by different code!\n", name);
2185 code_file = fopen(name, "wb");
2186 if (NULL != code_file) { /* dump byte-code into a file */
2187 fwrite(code_ptr, 1, size, code_file);
2188 fclose(code_file);
2189 }
2190 }
2191 }
2192 #if defined(LIBXSMM_VTUNE)
2193 if (iJIT_SAMPLING_ON == iJIT_IsProfilingActive()) {
2194 LIBXSMM_VTUNE_JIT_DESC_TYPE vtune_jit_desc;
2195 const unsigned int code_id = iJIT_GetNewMethodID();
2196 internal_get_vtune_jitdesc(code_ptr, code_id, size, name, &vtune_jit_desc);
2197 iJIT_NotifyEvent(LIBXSMM_VTUNE_JIT_LOAD, &vtune_jit_desc);
2198 info->code_id = code_id;
2199 }
2200 else {
2201 info->code_id = 0;
2202 }
2203 #endif
2204 #if defined(LIBXSMM_PERF)
2205 /* If JIT is enabled and a valid name is given, emit information for profiler
2206 * In jitdump case this needs to be done after mprotect as it gets overwritten
2207 * otherwise. */
2208 libxsmm_perf_dump_code(code_ptr, size, name);
2209 #endif
2210 }
2211 if (NULL != info->reloc && info->pointer != info->reloc) {
2212 #if defined(_WIN32)
2213 /* TODO: implement memory protection under Microsoft Windows */
2214 #else
2215 /* memory is already protected at this point; relocate code */
2216 LIBXSMM_ASSERT(0 != (LIBXSMM_MALLOC_FLAG_MMAP & flags));
2217 *memory = code_ptr; /* relocate */
2218 info->pointer = info->reloc;
2219 info->reloc = NULL;
2220 # if !defined(LIBXSMM_MALLOC_CRC_OFF) /* update checksum */
2221 # if defined(LIBXSMM_MALLOC_CRC_LIGHT)
2222 { const internal_malloc_info_type *const code_info = internal_malloc_info(code_ptr, 0/*no check*/);
2223 info->hash = LIBXSMM_CRC32U(LIBXSMM_BITS)(LIBXSMM_MALLOC_SEED, &code_info);
2224 }
2225 # else
2226 info->hash = libxsmm_crc32(LIBXSMM_MALLOC_SEED, info,
2227 /* info size minus actual hash value */
2228 (unsigned int)(((char*)&info->hash) - ((char*)info)));
2229 # endif
2230 # endif /* treat memory protection errors as soft error; ignore return value */
2231 munmap(buffer, alloc_size);
2232 #endif
2233 }
2234 #if !defined(_WIN32)
2235 else { /* malloc-based fall-back */
2236 int mprotect_result;
2237 # if !defined(LIBXSMM_MALLOC_CRC_OFF) && defined(LIBXSMM_VTUNE) /* check checksum */
2238 # if defined(LIBXSMM_MALLOC_CRC_LIGHT)
2239 assert(info->hash == LIBXSMM_CRC32U(LIBXSMM_BITS)(LIBXSMM_MALLOC_SEED, &info)); /* !LIBXSMM_ASSERT */
2240 # else
2241 assert(info->hash == libxsmm_crc32(LIBXSMM_MALLOC_SEED, info, /* !LIBXSMM_ASSERT */
2242 /* info size minus actual hash value */
2243 (unsigned int)(((char*)&info->hash) - ((char*)info))));
2244 # endif
2245 # endif /* treat memory protection errors as soft error; ignore return value */
2246 mprotect_result = mprotect(buffer, alloc_size/*entire memory region*/, PROT_READ | PROT_EXEC);
2247 if (EXIT_SUCCESS != mprotect_result) {
2248 if (0 != libxsmm_se) { /* hard-error in case of SELinux */
2249 if (0 != libxsmm_verbosity /* library code is expected to be mute */
2250 && 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
2251 {
2252 fprintf(stderr, "LIBXSMM ERROR: failed to allocate an executable buffer!\n");
2253 }
2254 result = mprotect_result;
2255 }
2256 else if ((LIBXSMM_VERBOSITY_HIGH <= libxsmm_verbosity || 0 > libxsmm_verbosity) /* library code is expected to be mute */
2257 && 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
2258 {
2259 fprintf(stderr, "LIBXSMM WARNING: read-only request for JIT-buffer failed!\n");
2260 }
2261 }
2262 }
2263 #endif
2264 }
2265 }
2266 }
2267 else if (NULL == memory || NULL == *memory) {
2268 if (0 != libxsmm_verbosity /* library code is expected to be mute */
2269 && 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
2270 {
2271 fprintf(stderr, "LIBXSMM ERROR: libxsmm_malloc_attrib failed because NULL cannot be attributed!\n");
2272 }
2273 result = EXIT_FAILURE;
2274 }
2275 else if ((LIBXSMM_VERBOSITY_WARN <= libxsmm_verbosity || 0 > libxsmm_verbosity)
2276 && 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
2277 {
2278 fprintf(stderr, "LIBXSMM WARNING: %s buffer %p does not match!\n",
2279 0 != (LIBXSMM_MALLOC_FLAG_X & flags) ? "executable" : "memory", *memory);
2280 }
2281 return result;
2282 }
2283
2284
libxsmm_aligned_malloc(size_t size,size_t alignment)2285 LIBXSMM_API LIBXSMM_ATTRIBUTE_MALLOC void* libxsmm_aligned_malloc(size_t size, size_t alignment)
2286 {
2287 void* result = NULL;
2288 LIBXSMM_INIT
2289 if (2 > internal_malloc_kind) {
2290 #if !defined(NDEBUG)
2291 int status =
2292 #endif
2293 libxsmm_xmalloc(&result, size, alignment, LIBXSMM_MALLOC_FLAG_DEFAULT, NULL/*extra*/, 0/*extra_size*/);
2294 assert(EXIT_SUCCESS == status || NULL == result); /* !LIBXSMM_ASSERT */
2295 }
2296 else { /* scratch */
2297 const void *const caller = libxsmm_trace_caller_id(0/*level*/);
2298 internal_scratch_malloc(&result, size, alignment, LIBXSMM_MALLOC_FLAG_DEFAULT, caller);
2299 }
2300 return result;
2301 }
2302
2303
libxsmm_realloc(size_t size,void * ptr)2304 LIBXSMM_API void* libxsmm_realloc(size_t size, void* ptr)
2305 {
2306 const int nzeros = LIBXSMM_INTRINSICS_BITSCANFWD64((uintptr_t)ptr), alignment = 1 << nzeros;
2307 LIBXSMM_ASSERT(0 == ((uintptr_t)ptr & ~(0xFFFFFFFFFFFFFFFF << nzeros)));
2308 LIBXSMM_INIT
2309 if (2 > internal_malloc_kind) {
2310 #if !defined(NDEBUG)
2311 int status =
2312 #endif
2313 libxsmm_xmalloc(&ptr, size, alignment, LIBXSMM_MALLOC_FLAG_REALLOC, NULL/*extra*/, 0/*extra_size*/);
2314 assert(EXIT_SUCCESS == status || NULL == ptr); /* !LIBXSMM_ASSERT */
2315 }
2316 else { /* scratch */
2317 const void *const caller = libxsmm_trace_caller_id(0/*level*/);
2318 internal_scratch_malloc(&ptr, size, alignment, LIBXSMM_MALLOC_FLAG_REALLOC, caller);
2319 }
2320 return ptr;
2321 }
2322
2323
libxsmm_scratch_malloc(size_t size,size_t alignment,const void * caller)2324 LIBXSMM_API void* libxsmm_scratch_malloc(size_t size, size_t alignment, const void* caller)
2325 {
2326 void* result;
2327 LIBXSMM_INIT
2328 internal_scratch_malloc(&result, size, alignment,
2329 LIBXSMM_MALLOC_INTERNAL_CALLER != caller ? LIBXSMM_MALLOC_FLAG_DEFAULT : LIBXSMM_MALLOC_FLAG_PRIVATE,
2330 caller);
2331 return result;
2332 }
2333
2334
libxsmm_malloc(size_t size)2335 LIBXSMM_API LIBXSMM_ATTRIBUTE_MALLOC void* libxsmm_malloc(size_t size)
2336 {
2337 return libxsmm_aligned_malloc(size, 0/*auto*/);
2338 }
2339
2340
libxsmm_free(const void * memory)2341 LIBXSMM_API void libxsmm_free(const void* memory)
2342 {
2343 if (NULL != memory) {
2344 #if defined(LIBXSMM_MALLOC_SCRATCH_DELETE_FIRST) || /* prefer safe method if possible */ \
2345 (!defined(LIBXSMM_MALLOC_HOOK_STATIC) && !defined(LIBXSMM_MALLOC_HOOK_DYNAMIC))
2346 # if defined(LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS) && (0 < (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS))
2347 internal_malloc_pool_type *const pool = internal_scratch_malloc_pool(memory);
2348 if (NULL != pool) { /* memory belongs to scratch domain */
2349 internal_scratch_free(memory, pool);
2350 }
2351 else
2352 # endif
2353 { /* local */
2354 libxsmm_xfree(memory, 2/*check*/);
2355 }
2356 #else /* lookup matching pool */
2357 internal_malloc_info_type *const info = internal_malloc_info(memory, 2/*check*/);
2358 static int error_once = 0;
2359 if (NULL != info && 0 == (LIBXSMM_MALLOC_FLAG_SCRATCH & info->flags)) { /* !libxsmm_free */
2360 # if !defined(NDEBUG)
2361 if (EXIT_SUCCESS != internal_xfree(memory, info)
2362 && 0 != libxsmm_verbosity /* library code is expected to be mute */
2363 && 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
2364 {
2365 fprintf(stderr, "LIBXSMM ERROR: memory deallocation failed!\n");
2366 }
2367 # else
2368 internal_xfree(memory, info); /* !libxsmm_free */
2369 # endif
2370 }
2371 else {
2372 # if defined(LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS) && (0 < (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS))
2373 internal_malloc_pool_type *const pool = internal_scratch_malloc_pool(memory);
2374 if (NULL != pool) { /* memory belongs to scratch domain */
2375 internal_scratch_free(memory, pool);
2376 }
2377 else
2378 # endif
2379 {
2380 # if defined(NDEBUG) && (defined(LIBXSMM_MALLOC_HOOK_STATIC) || defined(LIBXSMM_MALLOC_HOOK_DYNAMIC))
2381 __real_free((void*)memory);
2382 # else
2383 # if (defined(LIBXSMM_MALLOC_HOOK_STATIC) || defined(LIBXSMM_MALLOC_HOOK_DYNAMIC))
2384 __real_free((void*)memory);
2385 # endif
2386 if (0 != libxsmm_verbosity && /* library code is expected to be mute */
2387 1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED))
2388 {
2389 fprintf(stderr, "LIBXSMM ERROR: deallocation does not match allocation!\n");
2390 }
2391 # endif
2392 }
2393 }
2394 #endif
2395 }
2396 }
2397
2398
libxsmm_xrelease_scratch(LIBXSMM_LOCK_TYPE (LIBXSMM_LOCK)* lock)2399 LIBXSMM_API_INTERN void libxsmm_xrelease_scratch(LIBXSMM_LOCK_TYPE(LIBXSMM_LOCK)* lock)
2400 {
2401 #if defined(LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS) && (0 < (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS))
2402 internal_malloc_pool_type* pools = NULL;
2403 libxsmm_scratch_info scratch_info;
2404 LIBXSMM_ASSERT(libxsmm_scratch_pools <= LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS);
2405 if (NULL != lock) {
2406 LIBXSMM_LOCK_ACQUIRE(LIBXSMM_LOCK, lock);
2407 }
2408 # if defined(LIBXSMM_MALLOC_DELETE_SAFE)
2409 if (0 == (internal_malloc_kind & 1) || 0 >= internal_malloc_kind)
2410 # endif
2411 {
2412 unsigned int i;
2413 pools = (internal_malloc_pool_type*)LIBXSMM_UP2(
2414 (uintptr_t)internal_malloc_pool_buffer, LIBXSMM_MALLOC_SCRATCH_PADDING);
2415 for (i = 0; i < libxsmm_scratch_pools; ++i) {
2416 if (0 != pools[i].instance.minsize) {
2417 if (
2418 # if !defined(LIBXSMM_MALLOC_SCRATCH_DELETE_FIRST)
2419 1 < pools[i].instance.counter &&
2420 # endif
2421 NULL != pools[i].instance.buffer)
2422 {
2423 internal_malloc_info_type* const info = internal_malloc_info(pools[i].instance.buffer, 2/*check*/);
2424 if (NULL != info) internal_xfree(info->pointer, info);
2425 }
2426 }
2427 else break; /* early exit */
2428 }
2429 }
2430 LIBXSMM_EXPECT(EXIT_SUCCESS, libxsmm_get_scratch_info(&scratch_info));
2431 if (0 != scratch_info.npending && /* library code is expected to be mute */
2432 (LIBXSMM_VERBOSITY_WARN <= libxsmm_verbosity || 0 > libxsmm_verbosity))
2433 {
2434 char pending_size_buffer[32];
2435 libxsmm_format_size(pending_size_buffer, sizeof(pending_size_buffer),
2436 internal_malloc_public_cur + internal_malloc_local_cur, "KM", "B", 10);
2437 fprintf(stderr, "LIBXSMM WARNING: %s pending scratch-memory by %" PRIuPTR " allocation%s!\n",
2438 pending_size_buffer, (uintptr_t)scratch_info.npending, 1 < scratch_info.npending ? "s" : "");
2439 }
2440 if (NULL != pools) {
2441 memset(pools, 0, (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS) * sizeof(internal_malloc_pool_type));
2442 /* no reset: keep private watermark (internal_malloc_private_max, internal_malloc_private_cur) */
2443 internal_malloc_public_max = internal_malloc_public_cur = 0;
2444 internal_malloc_local_max = internal_malloc_local_cur = 0;
2445 internal_malloc_scratch_nmallocs = 0;
2446 }
2447 if (NULL != lock) {
2448 LIBXSMM_LOCK_RELEASE(LIBXSMM_LOCK, lock);
2449 }
2450 #endif
2451 }
2452
2453
libxsmm_release_scratch(void)2454 LIBXSMM_API void libxsmm_release_scratch(void)
2455 {
2456 libxsmm_xrelease_scratch(&libxsmm_lock_global);
2457 }
2458
2459
libxsmm_get_malloc_info(const void * memory,libxsmm_malloc_info * info)2460 LIBXSMM_API int libxsmm_get_malloc_info(const void* memory, libxsmm_malloc_info* info)
2461 {
2462 int result = EXIT_SUCCESS;
2463 if (NULL != info) {
2464 size_t size;
2465 result = libxsmm_get_malloc_xinfo(memory, &size, NULL/*flags*/, NULL/*extra*/);
2466 LIBXSMM_MEMZERO127(info);
2467 if (EXIT_SUCCESS == result) {
2468 info->size = size;
2469 }
2470 #if !defined(NDEBUG) /* library code is expected to be mute */
2471 else if (LIBXSMM_VERBOSITY_WARN <= libxsmm_verbosity || 0 > libxsmm_verbosity) {
2472 static int error_once = 0;
2473 if (1 == LIBXSMM_ATOMIC_ADD_FETCH(&error_once, 1, LIBXSMM_ATOMIC_RELAXED)) {
2474 fprintf(stderr, "LIBXSMM WARNING: foreign memory buffer %p discovered!\n", memory);
2475 }
2476 }
2477 #endif
2478 }
2479 else {
2480 result = EXIT_FAILURE;
2481 }
2482 return result;
2483 }
2484
2485
libxsmm_get_scratch_info(libxsmm_scratch_info * info)2486 LIBXSMM_API int libxsmm_get_scratch_info(libxsmm_scratch_info* info)
2487 {
2488 int result = EXIT_SUCCESS;
2489 if (NULL != info) {
2490 #if defined(LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS) && (0 < (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS))
2491 LIBXSMM_MEMZERO127(info);
2492 info->nmallocs = internal_malloc_scratch_nmallocs;
2493 info->internal = internal_malloc_private_max;
2494 info->local = internal_malloc_local_max;
2495 info->size = internal_malloc_public_max;
2496 { const internal_malloc_pool_type* pool = (const internal_malloc_pool_type*)LIBXSMM_UP2(
2497 (uintptr_t)internal_malloc_pool_buffer, LIBXSMM_MALLOC_SCRATCH_PADDING);
2498 # if (1 < (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS))
2499 const internal_malloc_pool_type *const end = pool + libxsmm_scratch_pools;
2500 LIBXSMM_ASSERT(libxsmm_scratch_pools <= LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS);
2501 for (; pool != end; ++pool) if ((LIBXSMM_MALLOC_INTERNAL_CALLER) != pool->instance.site) {
2502 # endif
2503 if (0 != pool->instance.minsize) {
2504 const size_t npending = pool->instance.counter;
2505 # if defined(LIBXSMM_MALLOC_SCRATCH_DELETE_FIRST)
2506 info->npending += npending;
2507 # else
2508 info->npending += 1 < npending ? (npending - 1) : 0;
2509 # endif
2510 ++info->npools;
2511 }
2512 # if (1 < (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS))
2513 else break; /* early exit */
2514 }
2515 # endif
2516 }
2517 #else
2518 LIBXSMM_MEMZERO127(info);
2519 #endif /*defined(LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS) && (0 < (LIBXSMM_MALLOC_SCRATCH_MAX_NPOOLS))*/
2520 }
2521 else {
2522 result = EXIT_FAILURE;
2523 }
2524 return result;
2525 }
2526
2527
libxsmm_set_scratch_limit(size_t nbytes)2528 LIBXSMM_API void libxsmm_set_scratch_limit(size_t nbytes)
2529 {
2530 /* !LIBXSMM_INIT */
2531 internal_malloc_scratch_limit = nbytes;
2532 }
2533
2534
libxsmm_get_scratch_limit(void)2535 LIBXSMM_API size_t libxsmm_get_scratch_limit(void)
2536 {
2537 size_t result;
2538 /* !LIBXSMM_INIT */
2539 if (LIBXSMM_SCRATCH_DEFAULT != internal_malloc_scratch_limit) {
2540 result = internal_malloc_scratch_limit;
2541 }
2542 else if (0 == internal_malloc_kind) {
2543 result = LIBXSMM_MALLOC_SCRATCH_LIMIT;
2544 }
2545 else {
2546 result = LIBXSMM_SCRATCH_UNLIMITED;
2547 }
2548 return result;
2549 }
2550
2551
libxsmm_set_malloc(int enabled,const size_t * lo,const size_t * hi)2552 LIBXSMM_API void libxsmm_set_malloc(int enabled, const size_t* lo, const size_t* hi)
2553 {
2554 /* !LIBXSMM_INIT */
2555 #if !(defined(LIBXSMM_MALLOC_HOOK_DYNAMIC) || defined(LIBXSMM_INTERCEPT_DYNAMIC))
2556 LIBXSMM_UNUSED(enabled);
2557 internal_malloc_kind = 0;
2558 #elif defined(LIBXSMM_MALLOC) && (0 < LIBXSMM_MALLOC)
2559 LIBXSMM_UNUSED(enabled);
2560 internal_malloc_kind = LIBXSMM_MALLOC;
2561 #else
2562 internal_malloc_kind = enabled;
2563 #endif
2564 /* setup lo/hi after internal_malloc_kind! */
2565 if (NULL != lo) internal_malloc_limit[0] = *lo;
2566 if (NULL != hi) {
2567 const size_t scratch_limit = libxsmm_get_scratch_limit();
2568 const size_t malloc_upper = LIBXSMM_MIN(*hi, scratch_limit);
2569 internal_malloc_limit[1] = LIBXSMM_MAX(malloc_upper, internal_malloc_limit[0]);
2570 }
2571 libxsmm_malloc_init();
2572 }
2573
2574
libxsmm_get_malloc(size_t * lo,size_t * hi)2575 LIBXSMM_API int libxsmm_get_malloc(size_t* lo, size_t* hi)
2576 {
2577 int result;
2578 LIBXSMM_INIT
2579 if (NULL != lo) *lo = internal_malloc_limit[0];
2580 if (NULL != hi) *hi = internal_malloc_limit[1];
2581 #if (defined(LIBXSMM_MALLOC_HOOK_DYNAMIC) || defined(LIBXSMM_INTERCEPT_DYNAMIC))
2582 result = 0 != (internal_malloc_kind & 1) && 0 < internal_malloc_kind;
2583 #else
2584 result = 0;
2585 #endif
2586 return result;
2587 }
2588
2589