10b57cec5SDimitry Andric /*
20b57cec5SDimitry Andric  * kmp_alloc.cpp -- private/shared dynamic memory allocation and management
30b57cec5SDimitry Andric  */
40b57cec5SDimitry Andric 
50b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
80b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
90b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric 
130b57cec5SDimitry Andric #include "kmp.h"
140b57cec5SDimitry Andric #include "kmp_io.h"
150b57cec5SDimitry Andric #include "kmp_wrapper_malloc.h"
160b57cec5SDimitry Andric 
170b57cec5SDimitry Andric // Disable bget when it is not used
180b57cec5SDimitry Andric #if KMP_USE_BGET
190b57cec5SDimitry Andric 
200b57cec5SDimitry Andric /* Thread private buffer management code */
210b57cec5SDimitry Andric 
220b57cec5SDimitry Andric typedef int (*bget_compact_t)(size_t, int);
230b57cec5SDimitry Andric typedef void *(*bget_acquire_t)(size_t);
240b57cec5SDimitry Andric typedef void (*bget_release_t)(void *);
250b57cec5SDimitry Andric 
260b57cec5SDimitry Andric /* NOTE: bufsize must be a signed datatype */
270b57cec5SDimitry Andric 
280b57cec5SDimitry Andric #if KMP_OS_WINDOWS
290b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_ARM
300b57cec5SDimitry Andric typedef kmp_int32 bufsize;
310b57cec5SDimitry Andric #else
320b57cec5SDimitry Andric typedef kmp_int64 bufsize;
330b57cec5SDimitry Andric #endif
340b57cec5SDimitry Andric #else
350b57cec5SDimitry Andric typedef ssize_t bufsize;
360b57cec5SDimitry Andric #endif // KMP_OS_WINDOWS
370b57cec5SDimitry Andric 
380b57cec5SDimitry Andric /* The three modes of operation are, fifo search, lifo search, and best-fit */
390b57cec5SDimitry Andric 
400b57cec5SDimitry Andric typedef enum bget_mode {
410b57cec5SDimitry Andric   bget_mode_fifo = 0,
420b57cec5SDimitry Andric   bget_mode_lifo = 1,
430b57cec5SDimitry Andric   bget_mode_best = 2
440b57cec5SDimitry Andric } bget_mode_t;
450b57cec5SDimitry Andric 
460b57cec5SDimitry Andric static void bpool(kmp_info_t *th, void *buffer, bufsize len);
470b57cec5SDimitry Andric static void *bget(kmp_info_t *th, bufsize size);
480b57cec5SDimitry Andric static void *bgetz(kmp_info_t *th, bufsize size);
490b57cec5SDimitry Andric static void *bgetr(kmp_info_t *th, void *buffer, bufsize newsize);
500b57cec5SDimitry Andric static void brel(kmp_info_t *th, void *buf);
510b57cec5SDimitry Andric static void bectl(kmp_info_t *th, bget_compact_t compact,
520b57cec5SDimitry Andric                   bget_acquire_t acquire, bget_release_t release,
530b57cec5SDimitry Andric                   bufsize pool_incr);
540b57cec5SDimitry Andric 
550b57cec5SDimitry Andric /* BGET CONFIGURATION */
560b57cec5SDimitry Andric /* Buffer allocation size quantum: all buffers allocated are a
570b57cec5SDimitry Andric    multiple of this size.  This MUST be a power of two. */
580b57cec5SDimitry Andric 
590b57cec5SDimitry Andric /* On IA-32 architecture with  Linux* OS, malloc() does not
60480093f4SDimitry Andric    ensure 16 byte alignment */
610b57cec5SDimitry Andric 
620b57cec5SDimitry Andric #if KMP_ARCH_X86 || !KMP_HAVE_QUAD
630b57cec5SDimitry Andric 
640b57cec5SDimitry Andric #define SizeQuant 8
650b57cec5SDimitry Andric #define AlignType double
660b57cec5SDimitry Andric 
670b57cec5SDimitry Andric #else
680b57cec5SDimitry Andric 
690b57cec5SDimitry Andric #define SizeQuant 16
700b57cec5SDimitry Andric #define AlignType _Quad
710b57cec5SDimitry Andric 
720b57cec5SDimitry Andric #endif
730b57cec5SDimitry Andric 
740b57cec5SDimitry Andric // Define this symbol to enable the bstats() function which calculates the
750b57cec5SDimitry Andric // total free space in the buffer pool, the largest available buffer, and the
760b57cec5SDimitry Andric // total space currently allocated.
770b57cec5SDimitry Andric #define BufStats 1
780b57cec5SDimitry Andric 
790b57cec5SDimitry Andric #ifdef KMP_DEBUG
800b57cec5SDimitry Andric 
810b57cec5SDimitry Andric // Define this symbol to enable the bpoold() function which dumps the buffers
820b57cec5SDimitry Andric // in a buffer pool.
830b57cec5SDimitry Andric #define BufDump 1
840b57cec5SDimitry Andric 
850b57cec5SDimitry Andric // Define this symbol to enable the bpoolv() function for validating a buffer
860b57cec5SDimitry Andric // pool.
870b57cec5SDimitry Andric #define BufValid 1
880b57cec5SDimitry Andric 
890b57cec5SDimitry Andric // Define this symbol to enable the bufdump() function which allows dumping the
900b57cec5SDimitry Andric // contents of an allocated or free buffer.
910b57cec5SDimitry Andric #define DumpData 1
920b57cec5SDimitry Andric 
930b57cec5SDimitry Andric #ifdef NOT_USED_NOW
940b57cec5SDimitry Andric 
950b57cec5SDimitry Andric // Wipe free buffers to a guaranteed pattern of garbage to trip up miscreants
960b57cec5SDimitry Andric // who attempt to use pointers into released buffers.
970b57cec5SDimitry Andric #define FreeWipe 1
980b57cec5SDimitry Andric 
990b57cec5SDimitry Andric // Use a best fit algorithm when searching for space for an allocation request.
1000b57cec5SDimitry Andric // This uses memory more efficiently, but allocation will be much slower.
1010b57cec5SDimitry Andric #define BestFit 1
1020b57cec5SDimitry Andric 
1030b57cec5SDimitry Andric #endif /* NOT_USED_NOW */
1040b57cec5SDimitry Andric #endif /* KMP_DEBUG */
1050b57cec5SDimitry Andric 
1060b57cec5SDimitry Andric static bufsize bget_bin_size[] = {
1070b57cec5SDimitry Andric     0,
1080b57cec5SDimitry Andric     //    1 << 6,    /* .5 Cache line */
1090b57cec5SDimitry Andric     1 << 7, /* 1 Cache line, new */
1100b57cec5SDimitry Andric     1 << 8, /* 2 Cache lines */
1110b57cec5SDimitry Andric     1 << 9, /* 4 Cache lines, new */
1120b57cec5SDimitry Andric     1 << 10, /* 8 Cache lines */
1130b57cec5SDimitry Andric     1 << 11, /* 16 Cache lines, new */
1140b57cec5SDimitry Andric     1 << 12, 1 << 13, /* new */
1150b57cec5SDimitry Andric     1 << 14, 1 << 15, /* new */
1160b57cec5SDimitry Andric     1 << 16, 1 << 17, 1 << 18, 1 << 19, 1 << 20, /*  1MB */
1170b57cec5SDimitry Andric     1 << 21, /*  2MB */
1180b57cec5SDimitry Andric     1 << 22, /*  4MB */
1190b57cec5SDimitry Andric     1 << 23, /*  8MB */
1200b57cec5SDimitry Andric     1 << 24, /* 16MB */
1210b57cec5SDimitry Andric     1 << 25, /* 32MB */
1220b57cec5SDimitry Andric };
1230b57cec5SDimitry Andric 
1240b57cec5SDimitry Andric #define MAX_BGET_BINS (int)(sizeof(bget_bin_size) / sizeof(bufsize))
1250b57cec5SDimitry Andric 
1260b57cec5SDimitry Andric struct bfhead;
1270b57cec5SDimitry Andric 
1280b57cec5SDimitry Andric //  Declare the interface, including the requested buffer size type, bufsize.
1290b57cec5SDimitry Andric 
1300b57cec5SDimitry Andric /* Queue links */
1310b57cec5SDimitry Andric typedef struct qlinks {
1320b57cec5SDimitry Andric   struct bfhead *flink; /* Forward link */
1330b57cec5SDimitry Andric   struct bfhead *blink; /* Backward link */
1340b57cec5SDimitry Andric } qlinks_t;
1350b57cec5SDimitry Andric 
1360b57cec5SDimitry Andric /* Header in allocated and free buffers */
1370b57cec5SDimitry Andric typedef struct bhead2 {
1380b57cec5SDimitry Andric   kmp_info_t *bthr; /* The thread which owns the buffer pool */
1390b57cec5SDimitry Andric   bufsize prevfree; /* Relative link back to previous free buffer in memory or
1400b57cec5SDimitry Andric                        0 if previous buffer is allocated.  */
1410b57cec5SDimitry Andric   bufsize bsize; /* Buffer size: positive if free, negative if allocated. */
1420b57cec5SDimitry Andric } bhead2_t;
1430b57cec5SDimitry Andric 
1440b57cec5SDimitry Andric /* Make sure the bhead structure is a multiple of SizeQuant in size. */
1450b57cec5SDimitry Andric typedef union bhead {
1460b57cec5SDimitry Andric   KMP_ALIGN(SizeQuant)
1470b57cec5SDimitry Andric   AlignType b_align;
1480b57cec5SDimitry Andric   char b_pad[sizeof(bhead2_t) + (SizeQuant - (sizeof(bhead2_t) % SizeQuant))];
1490b57cec5SDimitry Andric   bhead2_t bb;
1500b57cec5SDimitry Andric } bhead_t;
1510b57cec5SDimitry Andric #define BH(p) ((bhead_t *)(p))
1520b57cec5SDimitry Andric 
1530b57cec5SDimitry Andric /*  Header in directly allocated buffers (by acqfcn) */
1540b57cec5SDimitry Andric typedef struct bdhead {
1550b57cec5SDimitry Andric   bufsize tsize; /* Total size, including overhead */
1560b57cec5SDimitry Andric   bhead_t bh; /* Common header */
1570b57cec5SDimitry Andric } bdhead_t;
1580b57cec5SDimitry Andric #define BDH(p) ((bdhead_t *)(p))
1590b57cec5SDimitry Andric 
1600b57cec5SDimitry Andric /* Header in free buffers */
1610b57cec5SDimitry Andric typedef struct bfhead {
1620b57cec5SDimitry Andric   bhead_t bh; /* Common allocated/free header */
1630b57cec5SDimitry Andric   qlinks_t ql; /* Links on free list */
1640b57cec5SDimitry Andric } bfhead_t;
1650b57cec5SDimitry Andric #define BFH(p) ((bfhead_t *)(p))
1660b57cec5SDimitry Andric 
1670b57cec5SDimitry Andric typedef struct thr_data {
1680b57cec5SDimitry Andric   bfhead_t freelist[MAX_BGET_BINS];
1690b57cec5SDimitry Andric #if BufStats
1700b57cec5SDimitry Andric   size_t totalloc; /* Total space currently allocated */
1710b57cec5SDimitry Andric   long numget, numrel; /* Number of bget() and brel() calls */
1720b57cec5SDimitry Andric   long numpblk; /* Number of pool blocks */
1730b57cec5SDimitry Andric   long numpget, numprel; /* Number of block gets and rels */
1740b57cec5SDimitry Andric   long numdget, numdrel; /* Number of direct gets and rels */
1750b57cec5SDimitry Andric #endif /* BufStats */
1760b57cec5SDimitry Andric 
1770b57cec5SDimitry Andric   /* Automatic expansion block management functions */
1780b57cec5SDimitry Andric   bget_compact_t compfcn;
1790b57cec5SDimitry Andric   bget_acquire_t acqfcn;
1800b57cec5SDimitry Andric   bget_release_t relfcn;
1810b57cec5SDimitry Andric 
1820b57cec5SDimitry Andric   bget_mode_t mode; /* what allocation mode to use? */
1830b57cec5SDimitry Andric 
1840b57cec5SDimitry Andric   bufsize exp_incr; /* Expansion block size */
1850b57cec5SDimitry Andric   bufsize pool_len; /* 0: no bpool calls have been made
1860b57cec5SDimitry Andric                        -1: not all pool blocks are the same size
1870b57cec5SDimitry Andric                        >0: (common) block size for all bpool calls made so far
1880b57cec5SDimitry Andric                     */
1895ffd83dbSDimitry Andric   bfhead_t *last_pool; /* Last pool owned by this thread (delay deallocation) */
1900b57cec5SDimitry Andric } thr_data_t;
1910b57cec5SDimitry Andric 
1920b57cec5SDimitry Andric /*  Minimum allocation quantum: */
1930b57cec5SDimitry Andric #define QLSize (sizeof(qlinks_t))
1940b57cec5SDimitry Andric #define SizeQ ((SizeQuant > QLSize) ? SizeQuant : QLSize)
1950b57cec5SDimitry Andric #define MaxSize                                                                \
1960b57cec5SDimitry Andric   (bufsize)(                                                                   \
1970b57cec5SDimitry Andric       ~(((bufsize)(1) << (sizeof(bufsize) * CHAR_BIT - 1)) | (SizeQuant - 1)))
1985ffd83dbSDimitry Andric // Maximum for the requested size.
1990b57cec5SDimitry Andric 
2000b57cec5SDimitry Andric /* End sentinel: value placed in bsize field of dummy block delimiting
2010b57cec5SDimitry Andric    end of pool block.  The most negative number which will  fit  in  a
2020b57cec5SDimitry Andric    bufsize, defined in a way that the compiler will accept. */
2030b57cec5SDimitry Andric 
2040b57cec5SDimitry Andric #define ESent                                                                  \
2050b57cec5SDimitry Andric   ((bufsize)(-(((((bufsize)1) << ((int)sizeof(bufsize) * 8 - 2)) - 1) * 2) - 2))
2060b57cec5SDimitry Andric 
2070b57cec5SDimitry Andric /* Thread Data management routines */
bget_get_bin(bufsize size)2080b57cec5SDimitry Andric static int bget_get_bin(bufsize size) {
2090b57cec5SDimitry Andric   // binary chop bins
2100b57cec5SDimitry Andric   int lo = 0, hi = MAX_BGET_BINS - 1;
2110b57cec5SDimitry Andric 
2120b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(size > 0);
2130b57cec5SDimitry Andric 
2140b57cec5SDimitry Andric   while ((hi - lo) > 1) {
2150b57cec5SDimitry Andric     int mid = (lo + hi) >> 1;
2160b57cec5SDimitry Andric     if (size < bget_bin_size[mid])
2170b57cec5SDimitry Andric       hi = mid - 1;
2180b57cec5SDimitry Andric     else
2190b57cec5SDimitry Andric       lo = mid;
2200b57cec5SDimitry Andric   }
2210b57cec5SDimitry Andric 
2220b57cec5SDimitry Andric   KMP_DEBUG_ASSERT((lo >= 0) && (lo < MAX_BGET_BINS));
2230b57cec5SDimitry Andric 
2240b57cec5SDimitry Andric   return lo;
2250b57cec5SDimitry Andric }
2260b57cec5SDimitry Andric 
set_thr_data(kmp_info_t * th)2270b57cec5SDimitry Andric static void set_thr_data(kmp_info_t *th) {
2280b57cec5SDimitry Andric   int i;
2290b57cec5SDimitry Andric   thr_data_t *data;
2300b57cec5SDimitry Andric 
2310b57cec5SDimitry Andric   data = (thr_data_t *)((!th->th.th_local.bget_data)
2320b57cec5SDimitry Andric                             ? __kmp_allocate(sizeof(*data))
2330b57cec5SDimitry Andric                             : th->th.th_local.bget_data);
2340b57cec5SDimitry Andric 
2350b57cec5SDimitry Andric   memset(data, '\0', sizeof(*data));
2360b57cec5SDimitry Andric 
2370b57cec5SDimitry Andric   for (i = 0; i < MAX_BGET_BINS; ++i) {
2380b57cec5SDimitry Andric     data->freelist[i].ql.flink = &data->freelist[i];
2390b57cec5SDimitry Andric     data->freelist[i].ql.blink = &data->freelist[i];
2400b57cec5SDimitry Andric   }
2410b57cec5SDimitry Andric 
2420b57cec5SDimitry Andric   th->th.th_local.bget_data = data;
2430b57cec5SDimitry Andric   th->th.th_local.bget_list = 0;
2440b57cec5SDimitry Andric #if !USE_CMP_XCHG_FOR_BGET
2450b57cec5SDimitry Andric #ifdef USE_QUEUING_LOCK_FOR_BGET
2460b57cec5SDimitry Andric   __kmp_init_lock(&th->th.th_local.bget_lock);
2470b57cec5SDimitry Andric #else
2480b57cec5SDimitry Andric   __kmp_init_bootstrap_lock(&th->th.th_local.bget_lock);
2490b57cec5SDimitry Andric #endif /* USE_LOCK_FOR_BGET */
2500b57cec5SDimitry Andric #endif /* ! USE_CMP_XCHG_FOR_BGET */
2510b57cec5SDimitry Andric }
2520b57cec5SDimitry Andric 
get_thr_data(kmp_info_t * th)2530b57cec5SDimitry Andric static thr_data_t *get_thr_data(kmp_info_t *th) {
2540b57cec5SDimitry Andric   thr_data_t *data;
2550b57cec5SDimitry Andric 
2560b57cec5SDimitry Andric   data = (thr_data_t *)th->th.th_local.bget_data;
2570b57cec5SDimitry Andric 
2580b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(data != 0);
2590b57cec5SDimitry Andric 
2600b57cec5SDimitry Andric   return data;
2610b57cec5SDimitry Andric }
2620b57cec5SDimitry Andric 
2630b57cec5SDimitry Andric /* Walk the free list and release the enqueued buffers */
__kmp_bget_dequeue(kmp_info_t * th)2640b57cec5SDimitry Andric static void __kmp_bget_dequeue(kmp_info_t *th) {
2650b57cec5SDimitry Andric   void *p = TCR_SYNC_PTR(th->th.th_local.bget_list);
2660b57cec5SDimitry Andric 
2670b57cec5SDimitry Andric   if (p != 0) {
2680b57cec5SDimitry Andric #if USE_CMP_XCHG_FOR_BGET
2690b57cec5SDimitry Andric     {
2700b57cec5SDimitry Andric       volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
2710b57cec5SDimitry Andric       while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list,
2720b57cec5SDimitry Andric                                         CCAST(void *, old_value), nullptr)) {
2730b57cec5SDimitry Andric         KMP_CPU_PAUSE();
2740b57cec5SDimitry Andric         old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
2750b57cec5SDimitry Andric       }
2760b57cec5SDimitry Andric       p = CCAST(void *, old_value);
2770b57cec5SDimitry Andric     }
2780b57cec5SDimitry Andric #else /* ! USE_CMP_XCHG_FOR_BGET */
2790b57cec5SDimitry Andric #ifdef USE_QUEUING_LOCK_FOR_BGET
2800b57cec5SDimitry Andric     __kmp_acquire_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
2810b57cec5SDimitry Andric #else
2820b57cec5SDimitry Andric     __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
2830b57cec5SDimitry Andric #endif /* USE_QUEUING_LOCK_FOR_BGET */
2840b57cec5SDimitry Andric 
2850b57cec5SDimitry Andric     p = (void *)th->th.th_local.bget_list;
2860b57cec5SDimitry Andric     th->th.th_local.bget_list = 0;
2870b57cec5SDimitry Andric 
2880b57cec5SDimitry Andric #ifdef USE_QUEUING_LOCK_FOR_BGET
2890b57cec5SDimitry Andric     __kmp_release_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
2900b57cec5SDimitry Andric #else
2910b57cec5SDimitry Andric     __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
2920b57cec5SDimitry Andric #endif
2930b57cec5SDimitry Andric #endif /* USE_CMP_XCHG_FOR_BGET */
2940b57cec5SDimitry Andric 
2950b57cec5SDimitry Andric     /* Check again to make sure the list is not empty */
2960b57cec5SDimitry Andric     while (p != 0) {
2970b57cec5SDimitry Andric       void *buf = p;
2980b57cec5SDimitry Andric       bfhead_t *b = BFH(((char *)p) - sizeof(bhead_t));
2990b57cec5SDimitry Andric 
3000b57cec5SDimitry Andric       KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
3010b57cec5SDimitry Andric       KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
3020b57cec5SDimitry Andric                        (kmp_uintptr_t)th); // clear possible mark
3030b57cec5SDimitry Andric       KMP_DEBUG_ASSERT(b->ql.blink == 0);
3040b57cec5SDimitry Andric 
3050b57cec5SDimitry Andric       p = (void *)b->ql.flink;
3060b57cec5SDimitry Andric 
3070b57cec5SDimitry Andric       brel(th, buf);
3080b57cec5SDimitry Andric     }
3090b57cec5SDimitry Andric   }
3100b57cec5SDimitry Andric }
3110b57cec5SDimitry Andric 
3120b57cec5SDimitry Andric /* Chain together the free buffers by using the thread owner field */
__kmp_bget_enqueue(kmp_info_t * th,void * buf,kmp_int32 rel_gtid)3130b57cec5SDimitry Andric static void __kmp_bget_enqueue(kmp_info_t *th, void *buf
3140b57cec5SDimitry Andric #ifdef USE_QUEUING_LOCK_FOR_BGET
3150b57cec5SDimitry Andric                                ,
3160b57cec5SDimitry Andric                                kmp_int32 rel_gtid
3170b57cec5SDimitry Andric #endif
3180b57cec5SDimitry Andric ) {
3190b57cec5SDimitry Andric   bfhead_t *b = BFH(((char *)buf) - sizeof(bhead_t));
3200b57cec5SDimitry Andric 
3210b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
3220b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
3230b57cec5SDimitry Andric                    (kmp_uintptr_t)th); // clear possible mark
3240b57cec5SDimitry Andric 
3250b57cec5SDimitry Andric   b->ql.blink = 0;
3260b57cec5SDimitry Andric 
3270b57cec5SDimitry Andric   KC_TRACE(10, ("__kmp_bget_enqueue: moving buffer to T#%d list\n",
3280b57cec5SDimitry Andric                 __kmp_gtid_from_thread(th)));
3290b57cec5SDimitry Andric 
3300b57cec5SDimitry Andric #if USE_CMP_XCHG_FOR_BGET
3310b57cec5SDimitry Andric   {
3320b57cec5SDimitry Andric     volatile void *old_value = TCR_PTR(th->th.th_local.bget_list);
3330b57cec5SDimitry Andric     /* the next pointer must be set before setting bget_list to buf to avoid
3340b57cec5SDimitry Andric        exposing a broken list to other threads, even for an instant. */
3350b57cec5SDimitry Andric     b->ql.flink = BFH(CCAST(void *, old_value));
3360b57cec5SDimitry Andric 
3370b57cec5SDimitry Andric     while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list,
3380b57cec5SDimitry Andric                                       CCAST(void *, old_value), buf)) {
3390b57cec5SDimitry Andric       KMP_CPU_PAUSE();
3400b57cec5SDimitry Andric       old_value = TCR_PTR(th->th.th_local.bget_list);
3410b57cec5SDimitry Andric       /* the next pointer must be set before setting bget_list to buf to avoid
3420b57cec5SDimitry Andric          exposing a broken list to other threads, even for an instant. */
3430b57cec5SDimitry Andric       b->ql.flink = BFH(CCAST(void *, old_value));
3440b57cec5SDimitry Andric     }
3450b57cec5SDimitry Andric   }
3460b57cec5SDimitry Andric #else /* ! USE_CMP_XCHG_FOR_BGET */
3470b57cec5SDimitry Andric #ifdef USE_QUEUING_LOCK_FOR_BGET
3480b57cec5SDimitry Andric   __kmp_acquire_lock(&th->th.th_local.bget_lock, rel_gtid);
3490b57cec5SDimitry Andric #else
3500b57cec5SDimitry Andric   __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
3510b57cec5SDimitry Andric #endif
3520b57cec5SDimitry Andric 
3530b57cec5SDimitry Andric   b->ql.flink = BFH(th->th.th_local.bget_list);
3540b57cec5SDimitry Andric   th->th.th_local.bget_list = (void *)buf;
3550b57cec5SDimitry Andric 
3560b57cec5SDimitry Andric #ifdef USE_QUEUING_LOCK_FOR_BGET
3570b57cec5SDimitry Andric   __kmp_release_lock(&th->th.th_local.bget_lock, rel_gtid);
3580b57cec5SDimitry Andric #else
3590b57cec5SDimitry Andric   __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
3600b57cec5SDimitry Andric #endif
3610b57cec5SDimitry Andric #endif /* USE_CMP_XCHG_FOR_BGET */
3620b57cec5SDimitry Andric }
3630b57cec5SDimitry Andric 
3640b57cec5SDimitry Andric /* insert buffer back onto a new freelist */
__kmp_bget_insert_into_freelist(thr_data_t * thr,bfhead_t * b)3650b57cec5SDimitry Andric static void __kmp_bget_insert_into_freelist(thr_data_t *thr, bfhead_t *b) {
3660b57cec5SDimitry Andric   int bin;
3670b57cec5SDimitry Andric 
3680b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(((size_t)b) % SizeQuant == 0);
3690b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(b->bh.bb.bsize % SizeQuant == 0);
3700b57cec5SDimitry Andric 
3710b57cec5SDimitry Andric   bin = bget_get_bin(b->bh.bb.bsize);
3720b57cec5SDimitry Andric 
3730b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(thr->freelist[bin].ql.blink->ql.flink ==
3740b57cec5SDimitry Andric                    &thr->freelist[bin]);
3750b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(thr->freelist[bin].ql.flink->ql.blink ==
3760b57cec5SDimitry Andric                    &thr->freelist[bin]);
3770b57cec5SDimitry Andric 
3780b57cec5SDimitry Andric   b->ql.flink = &thr->freelist[bin];
3790b57cec5SDimitry Andric   b->ql.blink = thr->freelist[bin].ql.blink;
3800b57cec5SDimitry Andric 
3810b57cec5SDimitry Andric   thr->freelist[bin].ql.blink = b;
3820b57cec5SDimitry Andric   b->ql.blink->ql.flink = b;
3830b57cec5SDimitry Andric }
3840b57cec5SDimitry Andric 
3850b57cec5SDimitry Andric /* unlink the buffer from the old freelist */
__kmp_bget_remove_from_freelist(bfhead_t * b)3860b57cec5SDimitry Andric static void __kmp_bget_remove_from_freelist(bfhead_t *b) {
3870b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
3880b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
3890b57cec5SDimitry Andric 
3900b57cec5SDimitry Andric   b->ql.blink->ql.flink = b->ql.flink;
3910b57cec5SDimitry Andric   b->ql.flink->ql.blink = b->ql.blink;
3920b57cec5SDimitry Andric }
3930b57cec5SDimitry Andric 
3940b57cec5SDimitry Andric /*  GET STATS -- check info on free list */
bcheck(kmp_info_t * th,bufsize * max_free,bufsize * total_free)3950b57cec5SDimitry Andric static void bcheck(kmp_info_t *th, bufsize *max_free, bufsize *total_free) {
3960b57cec5SDimitry Andric   thr_data_t *thr = get_thr_data(th);
3970b57cec5SDimitry Andric   int bin;
3980b57cec5SDimitry Andric 
3990b57cec5SDimitry Andric   *total_free = *max_free = 0;
4000b57cec5SDimitry Andric 
4010b57cec5SDimitry Andric   for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
4020b57cec5SDimitry Andric     bfhead_t *b, *best;
4030b57cec5SDimitry Andric 
4040b57cec5SDimitry Andric     best = &thr->freelist[bin];
4050b57cec5SDimitry Andric     b = best->ql.flink;
4060b57cec5SDimitry Andric 
4070b57cec5SDimitry Andric     while (b != &thr->freelist[bin]) {
4080b57cec5SDimitry Andric       *total_free += (b->bh.bb.bsize - sizeof(bhead_t));
4090b57cec5SDimitry Andric       if ((best == &thr->freelist[bin]) || (b->bh.bb.bsize < best->bh.bb.bsize))
4100b57cec5SDimitry Andric         best = b;
4110b57cec5SDimitry Andric 
4120b57cec5SDimitry Andric       /* Link to next buffer */
4130b57cec5SDimitry Andric       b = b->ql.flink;
4140b57cec5SDimitry Andric     }
4150b57cec5SDimitry Andric 
4160b57cec5SDimitry Andric     if (*max_free < best->bh.bb.bsize)
4170b57cec5SDimitry Andric       *max_free = best->bh.bb.bsize;
4180b57cec5SDimitry Andric   }
4190b57cec5SDimitry Andric 
4200b57cec5SDimitry Andric   if (*max_free > (bufsize)sizeof(bhead_t))
4210b57cec5SDimitry Andric     *max_free -= sizeof(bhead_t);
4220b57cec5SDimitry Andric }
4230b57cec5SDimitry Andric 
4240b57cec5SDimitry Andric /*  BGET  --  Allocate a buffer.  */
bget(kmp_info_t * th,bufsize requested_size)4250b57cec5SDimitry Andric static void *bget(kmp_info_t *th, bufsize requested_size) {
4260b57cec5SDimitry Andric   thr_data_t *thr = get_thr_data(th);
4270b57cec5SDimitry Andric   bufsize size = requested_size;
4280b57cec5SDimitry Andric   bfhead_t *b;
4290b57cec5SDimitry Andric   void *buf;
4300b57cec5SDimitry Andric   int compactseq = 0;
4310b57cec5SDimitry Andric   int use_blink = 0;
4320b57cec5SDimitry Andric   /* For BestFit */
4330b57cec5SDimitry Andric   bfhead_t *best;
4340b57cec5SDimitry Andric 
4350b57cec5SDimitry Andric   if (size < 0 || size + sizeof(bhead_t) > MaxSize) {
4360b57cec5SDimitry Andric     return NULL;
4370b57cec5SDimitry Andric   }
4380b57cec5SDimitry Andric 
4390b57cec5SDimitry Andric   __kmp_bget_dequeue(th); /* Release any queued buffers */
4400b57cec5SDimitry Andric 
4410b57cec5SDimitry Andric   if (size < (bufsize)SizeQ) { // Need at least room for the queue links.
4420b57cec5SDimitry Andric     size = SizeQ;
4430b57cec5SDimitry Andric   }
4440b57cec5SDimitry Andric #if defined(SizeQuant) && (SizeQuant > 1)
4450b57cec5SDimitry Andric   size = (size + (SizeQuant - 1)) & (~(SizeQuant - 1));
4460b57cec5SDimitry Andric #endif
4470b57cec5SDimitry Andric 
4480b57cec5SDimitry Andric   size += sizeof(bhead_t); // Add overhead in allocated buffer to size required.
4490b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(size >= 0);
4500b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(size % SizeQuant == 0);
4510b57cec5SDimitry Andric 
4520b57cec5SDimitry Andric   use_blink = (thr->mode == bget_mode_lifo);
4530b57cec5SDimitry Andric 
4540b57cec5SDimitry Andric   /* If a compact function was provided in the call to bectl(), wrap
4550b57cec5SDimitry Andric      a loop around the allocation process  to  allow  compaction  to
4560b57cec5SDimitry Andric      intervene in case we don't find a suitable buffer in the chain. */
4570b57cec5SDimitry Andric 
4580b57cec5SDimitry Andric   for (;;) {
4590b57cec5SDimitry Andric     int bin;
4600b57cec5SDimitry Andric 
4610b57cec5SDimitry Andric     for (bin = bget_get_bin(size); bin < MAX_BGET_BINS; ++bin) {
4620b57cec5SDimitry Andric       /* Link to next buffer */
4630b57cec5SDimitry Andric       b = (use_blink ? thr->freelist[bin].ql.blink
4640b57cec5SDimitry Andric                      : thr->freelist[bin].ql.flink);
4650b57cec5SDimitry Andric 
4660b57cec5SDimitry Andric       if (thr->mode == bget_mode_best) {
4670b57cec5SDimitry Andric         best = &thr->freelist[bin];
4680b57cec5SDimitry Andric 
4690b57cec5SDimitry Andric         /* Scan the free list searching for the first buffer big enough
4700b57cec5SDimitry Andric            to hold the requested size buffer. */
4710b57cec5SDimitry Andric         while (b != &thr->freelist[bin]) {
4720b57cec5SDimitry Andric           if (b->bh.bb.bsize >= (bufsize)size) {
4730b57cec5SDimitry Andric             if ((best == &thr->freelist[bin]) ||
4740b57cec5SDimitry Andric                 (b->bh.bb.bsize < best->bh.bb.bsize)) {
4750b57cec5SDimitry Andric               best = b;
4760b57cec5SDimitry Andric             }
4770b57cec5SDimitry Andric           }
4780b57cec5SDimitry Andric 
4790b57cec5SDimitry Andric           /* Link to next buffer */
4800b57cec5SDimitry Andric           b = (use_blink ? b->ql.blink : b->ql.flink);
4810b57cec5SDimitry Andric         }
4820b57cec5SDimitry Andric         b = best;
4830b57cec5SDimitry Andric       }
4840b57cec5SDimitry Andric 
4850b57cec5SDimitry Andric       while (b != &thr->freelist[bin]) {
4860b57cec5SDimitry Andric         if ((bufsize)b->bh.bb.bsize >= (bufsize)size) {
4870b57cec5SDimitry Andric 
4880b57cec5SDimitry Andric           // Buffer is big enough to satisfy the request. Allocate it to the
4890b57cec5SDimitry Andric           // caller. We must decide whether the buffer is large enough to split
4900b57cec5SDimitry Andric           // into the part given to the caller and a free buffer that remains
4910b57cec5SDimitry Andric           // on the free list, or whether the entire buffer should be removed
4920b57cec5SDimitry Andric           // from the free list and given to the caller in its entirety. We
4930b57cec5SDimitry Andric           // only split the buffer if enough room remains for a header plus the
4940b57cec5SDimitry Andric           // minimum quantum of allocation.
4950b57cec5SDimitry Andric           if ((b->bh.bb.bsize - (bufsize)size) >
4960b57cec5SDimitry Andric               (bufsize)(SizeQ + (sizeof(bhead_t)))) {
4970b57cec5SDimitry Andric             bhead_t *ba, *bn;
4980b57cec5SDimitry Andric 
4990b57cec5SDimitry Andric             ba = BH(((char *)b) + (b->bh.bb.bsize - (bufsize)size));
5000b57cec5SDimitry Andric             bn = BH(((char *)ba) + size);
5010b57cec5SDimitry Andric 
5020b57cec5SDimitry Andric             KMP_DEBUG_ASSERT(bn->bb.prevfree == b->bh.bb.bsize);
5030b57cec5SDimitry Andric 
5040b57cec5SDimitry Andric             /* Subtract size from length of free block. */
5050b57cec5SDimitry Andric             b->bh.bb.bsize -= (bufsize)size;
5060b57cec5SDimitry Andric 
5070b57cec5SDimitry Andric             /* Link allocated buffer to the previous free buffer. */
5080b57cec5SDimitry Andric             ba->bb.prevfree = b->bh.bb.bsize;
5090b57cec5SDimitry Andric 
5100b57cec5SDimitry Andric             /* Plug negative size into user buffer. */
5110b57cec5SDimitry Andric             ba->bb.bsize = -size;
5120b57cec5SDimitry Andric 
5130b57cec5SDimitry Andric             /* Mark this buffer as owned by this thread. */
5140b57cec5SDimitry Andric             TCW_PTR(ba->bb.bthr,
5150b57cec5SDimitry Andric                     th); // not an allocated address (do not mark it)
5160b57cec5SDimitry Andric             /* Mark buffer after this one not preceded by free block. */
5170b57cec5SDimitry Andric             bn->bb.prevfree = 0;
5180b57cec5SDimitry Andric 
5190b57cec5SDimitry Andric             // unlink buffer from old freelist, and reinsert into new freelist
5200b57cec5SDimitry Andric             __kmp_bget_remove_from_freelist(b);
5210b57cec5SDimitry Andric             __kmp_bget_insert_into_freelist(thr, b);
5220b57cec5SDimitry Andric #if BufStats
5230b57cec5SDimitry Andric             thr->totalloc += (size_t)size;
5240b57cec5SDimitry Andric             thr->numget++; /* Increment number of bget() calls */
5250b57cec5SDimitry Andric #endif
5260b57cec5SDimitry Andric             buf = (void *)((((char *)ba) + sizeof(bhead_t)));
5270b57cec5SDimitry Andric             KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
5280b57cec5SDimitry Andric             return buf;
5290b57cec5SDimitry Andric           } else {
5300b57cec5SDimitry Andric             bhead_t *ba;
5310b57cec5SDimitry Andric 
5320b57cec5SDimitry Andric             ba = BH(((char *)b) + b->bh.bb.bsize);
5330b57cec5SDimitry Andric 
5340b57cec5SDimitry Andric             KMP_DEBUG_ASSERT(ba->bb.prevfree == b->bh.bb.bsize);
5350b57cec5SDimitry Andric 
5360b57cec5SDimitry Andric             /* The buffer isn't big enough to split.  Give  the  whole
5370b57cec5SDimitry Andric                shebang to the caller and remove it from the free list. */
5380b57cec5SDimitry Andric 
5390b57cec5SDimitry Andric             __kmp_bget_remove_from_freelist(b);
5400b57cec5SDimitry Andric #if BufStats
5410b57cec5SDimitry Andric             thr->totalloc += (size_t)b->bh.bb.bsize;
5420b57cec5SDimitry Andric             thr->numget++; /* Increment number of bget() calls */
5430b57cec5SDimitry Andric #endif
5440b57cec5SDimitry Andric             /* Negate size to mark buffer allocated. */
5450b57cec5SDimitry Andric             b->bh.bb.bsize = -(b->bh.bb.bsize);
5460b57cec5SDimitry Andric 
5470b57cec5SDimitry Andric             /* Mark this buffer as owned by this thread. */
5480b57cec5SDimitry Andric             TCW_PTR(ba->bb.bthr, th); // not an allocated address (do not mark)
5490b57cec5SDimitry Andric             /* Zero the back pointer in the next buffer in memory
5500b57cec5SDimitry Andric                to indicate that this buffer is allocated. */
5510b57cec5SDimitry Andric             ba->bb.prevfree = 0;
5520b57cec5SDimitry Andric 
5530b57cec5SDimitry Andric             /* Give user buffer starting at queue links. */
5540b57cec5SDimitry Andric             buf = (void *)&(b->ql);
5550b57cec5SDimitry Andric             KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
5560b57cec5SDimitry Andric             return buf;
5570b57cec5SDimitry Andric           }
5580b57cec5SDimitry Andric         }
5590b57cec5SDimitry Andric 
5600b57cec5SDimitry Andric         /* Link to next buffer */
5610b57cec5SDimitry Andric         b = (use_blink ? b->ql.blink : b->ql.flink);
5620b57cec5SDimitry Andric       }
5630b57cec5SDimitry Andric     }
5640b57cec5SDimitry Andric 
5650b57cec5SDimitry Andric     /* We failed to find a buffer. If there's a compact function defined,
5660b57cec5SDimitry Andric        notify it of the size requested. If it returns TRUE, try the allocation
5670b57cec5SDimitry Andric        again. */
5680b57cec5SDimitry Andric 
5690b57cec5SDimitry Andric     if ((thr->compfcn == 0) || (!(*thr->compfcn)(size, ++compactseq))) {
5700b57cec5SDimitry Andric       break;
5710b57cec5SDimitry Andric     }
5720b57cec5SDimitry Andric   }
5730b57cec5SDimitry Andric 
5740b57cec5SDimitry Andric   /* No buffer available with requested size free. */
5750b57cec5SDimitry Andric 
5760b57cec5SDimitry Andric   /* Don't give up yet -- look in the reserve supply. */
5770b57cec5SDimitry Andric   if (thr->acqfcn != 0) {
5780b57cec5SDimitry Andric     if (size > (bufsize)(thr->exp_incr - sizeof(bhead_t))) {
5790b57cec5SDimitry Andric       /* Request is too large to fit in a single expansion block.
5805ffd83dbSDimitry Andric          Try to satisfy it by a direct buffer acquisition. */
5810b57cec5SDimitry Andric       bdhead_t *bdh;
5820b57cec5SDimitry Andric 
5830b57cec5SDimitry Andric       size += sizeof(bdhead_t) - sizeof(bhead_t);
5840b57cec5SDimitry Andric 
5850b57cec5SDimitry Andric       KE_TRACE(10, ("%%%%%% MALLOC( %d )\n", (int)size));
5860b57cec5SDimitry Andric 
5870b57cec5SDimitry Andric       /* richryan */
5880b57cec5SDimitry Andric       bdh = BDH((*thr->acqfcn)((bufsize)size));
5890b57cec5SDimitry Andric       if (bdh != NULL) {
5900b57cec5SDimitry Andric 
5910b57cec5SDimitry Andric         // Mark the buffer special by setting size field of its header to zero.
5920b57cec5SDimitry Andric         bdh->bh.bb.bsize = 0;
5930b57cec5SDimitry Andric 
5940b57cec5SDimitry Andric         /* Mark this buffer as owned by this thread. */
5950b57cec5SDimitry Andric         TCW_PTR(bdh->bh.bb.bthr, th); // don't mark buffer as allocated,
5960b57cec5SDimitry Andric         // because direct buffer never goes to free list
5970b57cec5SDimitry Andric         bdh->bh.bb.prevfree = 0;
5980b57cec5SDimitry Andric         bdh->tsize = size;
5990b57cec5SDimitry Andric #if BufStats
6000b57cec5SDimitry Andric         thr->totalloc += (size_t)size;
6010b57cec5SDimitry Andric         thr->numget++; /* Increment number of bget() calls */
6020b57cec5SDimitry Andric         thr->numdget++; /* Direct bget() call count */
6030b57cec5SDimitry Andric #endif
6040b57cec5SDimitry Andric         buf = (void *)(bdh + 1);
6050b57cec5SDimitry Andric         KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
6060b57cec5SDimitry Andric         return buf;
6070b57cec5SDimitry Andric       }
6080b57cec5SDimitry Andric 
6090b57cec5SDimitry Andric     } else {
6100b57cec5SDimitry Andric 
6110b57cec5SDimitry Andric       /*  Try to obtain a new expansion block */
6120b57cec5SDimitry Andric       void *newpool;
6130b57cec5SDimitry Andric 
6140b57cec5SDimitry Andric       KE_TRACE(10, ("%%%%%% MALLOCB( %d )\n", (int)thr->exp_incr));
6150b57cec5SDimitry Andric 
6160b57cec5SDimitry Andric       /* richryan */
6170b57cec5SDimitry Andric       newpool = (*thr->acqfcn)((bufsize)thr->exp_incr);
6180b57cec5SDimitry Andric       KMP_DEBUG_ASSERT(((size_t)newpool) % SizeQuant == 0);
6190b57cec5SDimitry Andric       if (newpool != NULL) {
6200b57cec5SDimitry Andric         bpool(th, newpool, thr->exp_incr);
6210b57cec5SDimitry Andric         buf = bget(
6220b57cec5SDimitry Andric             th, requested_size); /* This can't, I say, can't get into a loop. */
6230b57cec5SDimitry Andric         return buf;
6240b57cec5SDimitry Andric       }
6250b57cec5SDimitry Andric     }
6260b57cec5SDimitry Andric   }
6270b57cec5SDimitry Andric 
6280b57cec5SDimitry Andric   /*  Still no buffer available */
6290b57cec5SDimitry Andric 
6300b57cec5SDimitry Andric   return NULL;
6310b57cec5SDimitry Andric }
6320b57cec5SDimitry Andric 
6330b57cec5SDimitry Andric /*  BGETZ  --  Allocate a buffer and clear its contents to zero.  We clear
6340b57cec5SDimitry Andric                the  entire  contents  of  the buffer to zero, not just the
6350b57cec5SDimitry Andric                region requested by the caller. */
6360b57cec5SDimitry Andric 
bgetz(kmp_info_t * th,bufsize size)6370b57cec5SDimitry Andric static void *bgetz(kmp_info_t *th, bufsize size) {
6380b57cec5SDimitry Andric   char *buf = (char *)bget(th, size);
6390b57cec5SDimitry Andric 
6400b57cec5SDimitry Andric   if (buf != NULL) {
6410b57cec5SDimitry Andric     bhead_t *b;
6420b57cec5SDimitry Andric     bufsize rsize;
6430b57cec5SDimitry Andric 
6440b57cec5SDimitry Andric     b = BH(buf - sizeof(bhead_t));
6450b57cec5SDimitry Andric     rsize = -(b->bb.bsize);
6460b57cec5SDimitry Andric     if (rsize == 0) {
6470b57cec5SDimitry Andric       bdhead_t *bd;
6480b57cec5SDimitry Andric 
6490b57cec5SDimitry Andric       bd = BDH(buf - sizeof(bdhead_t));
6500b57cec5SDimitry Andric       rsize = bd->tsize - (bufsize)sizeof(bdhead_t);
6510b57cec5SDimitry Andric     } else {
6520b57cec5SDimitry Andric       rsize -= sizeof(bhead_t);
6530b57cec5SDimitry Andric     }
6540b57cec5SDimitry Andric 
6550b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(rsize >= size);
6560b57cec5SDimitry Andric 
6570b57cec5SDimitry Andric     (void)memset(buf, 0, (bufsize)rsize);
6580b57cec5SDimitry Andric   }
6590b57cec5SDimitry Andric   return ((void *)buf);
6600b57cec5SDimitry Andric }
6610b57cec5SDimitry Andric 
6620b57cec5SDimitry Andric /*  BGETR  --  Reallocate a buffer.  This is a minimal implementation,
6630b57cec5SDimitry Andric                simply in terms of brel()  and  bget().   It  could  be
6640b57cec5SDimitry Andric                enhanced to allow the buffer to grow into adjacent free
6650b57cec5SDimitry Andric                blocks and to avoid moving data unnecessarily.  */
6660b57cec5SDimitry Andric 
bgetr(kmp_info_t * th,void * buf,bufsize size)6670b57cec5SDimitry Andric static void *bgetr(kmp_info_t *th, void *buf, bufsize size) {
6680b57cec5SDimitry Andric   void *nbuf;
6690b57cec5SDimitry Andric   bufsize osize; /* Old size of buffer */
6700b57cec5SDimitry Andric   bhead_t *b;
6710b57cec5SDimitry Andric 
6720b57cec5SDimitry Andric   nbuf = bget(th, size);
6730b57cec5SDimitry Andric   if (nbuf == NULL) { /* Acquire new buffer */
6740b57cec5SDimitry Andric     return NULL;
6750b57cec5SDimitry Andric   }
6760b57cec5SDimitry Andric   if (buf == NULL) {
6770b57cec5SDimitry Andric     return nbuf;
6780b57cec5SDimitry Andric   }
6790b57cec5SDimitry Andric   b = BH(((char *)buf) - sizeof(bhead_t));
6800b57cec5SDimitry Andric   osize = -b->bb.bsize;
6810b57cec5SDimitry Andric   if (osize == 0) {
6820b57cec5SDimitry Andric     /*  Buffer acquired directly through acqfcn. */
6830b57cec5SDimitry Andric     bdhead_t *bd;
6840b57cec5SDimitry Andric 
6850b57cec5SDimitry Andric     bd = BDH(((char *)buf) - sizeof(bdhead_t));
6860b57cec5SDimitry Andric     osize = bd->tsize - (bufsize)sizeof(bdhead_t);
6870b57cec5SDimitry Andric   } else {
6880b57cec5SDimitry Andric     osize -= sizeof(bhead_t);
6890b57cec5SDimitry Andric   }
6900b57cec5SDimitry Andric 
6910b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(osize > 0);
6920b57cec5SDimitry Andric 
6930b57cec5SDimitry Andric   (void)KMP_MEMCPY((char *)nbuf, (char *)buf, /* Copy the data */
6940b57cec5SDimitry Andric                    (size_t)((size < osize) ? size : osize));
6950b57cec5SDimitry Andric   brel(th, buf);
6960b57cec5SDimitry Andric 
6970b57cec5SDimitry Andric   return nbuf;
6980b57cec5SDimitry Andric }
6990b57cec5SDimitry Andric 
7000b57cec5SDimitry Andric /*  BREL  --  Release a buffer.  */
brel(kmp_info_t * th,void * buf)7010b57cec5SDimitry Andric static void brel(kmp_info_t *th, void *buf) {
7020b57cec5SDimitry Andric   thr_data_t *thr = get_thr_data(th);
7030b57cec5SDimitry Andric   bfhead_t *b, *bn;
7040b57cec5SDimitry Andric   kmp_info_t *bth;
7050b57cec5SDimitry Andric 
7060b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(buf != NULL);
7070b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
7080b57cec5SDimitry Andric 
7090b57cec5SDimitry Andric   b = BFH(((char *)buf) - sizeof(bhead_t));
7100b57cec5SDimitry Andric 
7110b57cec5SDimitry Andric   if (b->bh.bb.bsize == 0) { /* Directly-acquired buffer? */
7120b57cec5SDimitry Andric     bdhead_t *bdh;
7130b57cec5SDimitry Andric 
7140b57cec5SDimitry Andric     bdh = BDH(((char *)buf) - sizeof(bdhead_t));
7150b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
7160b57cec5SDimitry Andric #if BufStats
7170b57cec5SDimitry Andric     thr->totalloc -= (size_t)bdh->tsize;
7180b57cec5SDimitry Andric     thr->numdrel++; /* Number of direct releases */
7190b57cec5SDimitry Andric     thr->numrel++; /* Increment number of brel() calls */
7200b57cec5SDimitry Andric #endif /* BufStats */
7210b57cec5SDimitry Andric #ifdef FreeWipe
7220b57cec5SDimitry Andric     (void)memset((char *)buf, 0x55, (size_t)(bdh->tsize - sizeof(bdhead_t)));
7230b57cec5SDimitry Andric #endif /* FreeWipe */
7240b57cec5SDimitry Andric 
7250b57cec5SDimitry Andric     KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)bdh));
7260b57cec5SDimitry Andric 
7270b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(thr->relfcn != 0);
7280b57cec5SDimitry Andric     (*thr->relfcn)((void *)bdh); /* Release it directly. */
7290b57cec5SDimitry Andric     return;
7300b57cec5SDimitry Andric   }
7310b57cec5SDimitry Andric 
7320b57cec5SDimitry Andric   bth = (kmp_info_t *)((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) &
7330b57cec5SDimitry Andric                        ~1); // clear possible mark before comparison
7340b57cec5SDimitry Andric   if (bth != th) {
7350b57cec5SDimitry Andric     /* Add this buffer to be released by the owning thread later */
7360b57cec5SDimitry Andric     __kmp_bget_enqueue(bth, buf
7370b57cec5SDimitry Andric #ifdef USE_QUEUING_LOCK_FOR_BGET
7380b57cec5SDimitry Andric                        ,
7390b57cec5SDimitry Andric                        __kmp_gtid_from_thread(th)
7400b57cec5SDimitry Andric #endif
7410b57cec5SDimitry Andric     );
7420b57cec5SDimitry Andric     return;
7430b57cec5SDimitry Andric   }
7440b57cec5SDimitry Andric 
7450b57cec5SDimitry Andric   /* Buffer size must be negative, indicating that the buffer is allocated. */
7460b57cec5SDimitry Andric   if (b->bh.bb.bsize >= 0) {
7470b57cec5SDimitry Andric     bn = NULL;
7480b57cec5SDimitry Andric   }
7490b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(b->bh.bb.bsize < 0);
7500b57cec5SDimitry Andric 
7510b57cec5SDimitry Andric   /*  Back pointer in next buffer must be zero, indicating the same thing: */
7520b57cec5SDimitry Andric 
7530b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.bsize)->bb.prevfree == 0);
7540b57cec5SDimitry Andric 
7550b57cec5SDimitry Andric #if BufStats
7560b57cec5SDimitry Andric   thr->numrel++; /* Increment number of brel() calls */
7570b57cec5SDimitry Andric   thr->totalloc += (size_t)b->bh.bb.bsize;
7580b57cec5SDimitry Andric #endif
7590b57cec5SDimitry Andric 
7600b57cec5SDimitry Andric   /* If the back link is nonzero, the previous buffer is free.  */
7610b57cec5SDimitry Andric 
7620b57cec5SDimitry Andric   if (b->bh.bb.prevfree != 0) {
7630b57cec5SDimitry Andric     /* The previous buffer is free. Consolidate this buffer with it by adding
7640b57cec5SDimitry Andric        the length of this buffer to the previous free buffer. Note that we
7650b57cec5SDimitry Andric        subtract the size in the buffer being released, since it's negative to
7660b57cec5SDimitry Andric        indicate that the buffer is allocated. */
7670b57cec5SDimitry Andric     bufsize size = b->bh.bb.bsize;
7680b57cec5SDimitry Andric 
7690b57cec5SDimitry Andric     /* Make the previous buffer the one we're working on. */
7700b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.prevfree)->bb.bsize ==
7710b57cec5SDimitry Andric                      b->bh.bb.prevfree);
7720b57cec5SDimitry Andric     b = BFH(((char *)b) - b->bh.bb.prevfree);
7730b57cec5SDimitry Andric     b->bh.bb.bsize -= size;
7740b57cec5SDimitry Andric 
7750b57cec5SDimitry Andric     /* unlink the buffer from the old freelist */
7760b57cec5SDimitry Andric     __kmp_bget_remove_from_freelist(b);
7770b57cec5SDimitry Andric   } else {
7780b57cec5SDimitry Andric     /* The previous buffer isn't allocated. Mark this buffer size as positive
7790b57cec5SDimitry Andric        (i.e. free) and fall through to place the buffer on the free list as an
7800b57cec5SDimitry Andric        isolated free block. */
7810b57cec5SDimitry Andric     b->bh.bb.bsize = -b->bh.bb.bsize;
7820b57cec5SDimitry Andric   }
7830b57cec5SDimitry Andric 
7840b57cec5SDimitry Andric   /* insert buffer back onto a new freelist */
7850b57cec5SDimitry Andric   __kmp_bget_insert_into_freelist(thr, b);
7860b57cec5SDimitry Andric 
7870b57cec5SDimitry Andric   /* Now we look at the next buffer in memory, located by advancing from
7880b57cec5SDimitry Andric      the  start  of  this  buffer  by its size, to see if that buffer is
7890b57cec5SDimitry Andric      free.  If it is, we combine  this  buffer  with  the  next  one  in
7900b57cec5SDimitry Andric      memory, dechaining the second buffer from the free list. */
7910b57cec5SDimitry Andric   bn = BFH(((char *)b) + b->bh.bb.bsize);
7920b57cec5SDimitry Andric   if (bn->bh.bb.bsize > 0) {
7930b57cec5SDimitry Andric 
7940b57cec5SDimitry Andric     /* The buffer is free.  Remove it from the free list and add
7950b57cec5SDimitry Andric        its size to that of our buffer. */
7960b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(BH((char *)bn + bn->bh.bb.bsize)->bb.prevfree ==
7970b57cec5SDimitry Andric                      bn->bh.bb.bsize);
7980b57cec5SDimitry Andric 
7990b57cec5SDimitry Andric     __kmp_bget_remove_from_freelist(bn);
8000b57cec5SDimitry Andric 
8010b57cec5SDimitry Andric     b->bh.bb.bsize += bn->bh.bb.bsize;
8020b57cec5SDimitry Andric 
8030b57cec5SDimitry Andric     /* unlink the buffer from the old freelist, and reinsert it into the new
8040b57cec5SDimitry Andric      * freelist */
8050b57cec5SDimitry Andric     __kmp_bget_remove_from_freelist(b);
8060b57cec5SDimitry Andric     __kmp_bget_insert_into_freelist(thr, b);
8070b57cec5SDimitry Andric 
8080b57cec5SDimitry Andric     /* Finally,  advance  to   the  buffer  that   follows  the  newly
8090b57cec5SDimitry Andric        consolidated free block.  We must set its  backpointer  to  the
8100b57cec5SDimitry Andric        head  of  the  consolidated free block.  We know the next block
8110b57cec5SDimitry Andric        must be an allocated block because the process of recombination
8120b57cec5SDimitry Andric        guarantees  that  two  free  blocks will never be contiguous in
8130b57cec5SDimitry Andric        memory.  */
8140b57cec5SDimitry Andric     bn = BFH(((char *)b) + b->bh.bb.bsize);
8150b57cec5SDimitry Andric   }
8160b57cec5SDimitry Andric #ifdef FreeWipe
8170b57cec5SDimitry Andric   (void)memset(((char *)b) + sizeof(bfhead_t), 0x55,
8180b57cec5SDimitry Andric                (size_t)(b->bh.bb.bsize - sizeof(bfhead_t)));
8190b57cec5SDimitry Andric #endif
8200b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(bn->bh.bb.bsize < 0);
8210b57cec5SDimitry Andric 
8220b57cec5SDimitry Andric   /* The next buffer is allocated.  Set the backpointer in it  to  point
8230b57cec5SDimitry Andric      to this buffer; the previous free buffer in memory. */
8240b57cec5SDimitry Andric 
8250b57cec5SDimitry Andric   bn->bh.bb.prevfree = b->bh.bb.bsize;
8260b57cec5SDimitry Andric 
8270b57cec5SDimitry Andric   /*  If  a  block-release function is defined, and this free buffer
8280b57cec5SDimitry Andric       constitutes the entire block, release it.  Note that  pool_len
8290b57cec5SDimitry Andric       is  defined  in  such a way that the test will fail unless all
8300b57cec5SDimitry Andric       pool blocks are the same size.  */
8310b57cec5SDimitry Andric   if (thr->relfcn != 0 &&
8320b57cec5SDimitry Andric       b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) {
8330b57cec5SDimitry Andric #if BufStats
8340b57cec5SDimitry Andric     if (thr->numpblk !=
8350b57cec5SDimitry Andric         1) { /* Do not release the last buffer until finalization time */
8360b57cec5SDimitry Andric #endif
8370b57cec5SDimitry Andric 
8380b57cec5SDimitry Andric       KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
8390b57cec5SDimitry Andric       KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
8400b57cec5SDimitry Andric       KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree ==
8410b57cec5SDimitry Andric                        b->bh.bb.bsize);
8420b57cec5SDimitry Andric 
8430b57cec5SDimitry Andric       /*  Unlink the buffer from the free list  */
8440b57cec5SDimitry Andric       __kmp_bget_remove_from_freelist(b);
8450b57cec5SDimitry Andric 
8460b57cec5SDimitry Andric       KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b));
8470b57cec5SDimitry Andric 
8480b57cec5SDimitry Andric       (*thr->relfcn)(b);
8490b57cec5SDimitry Andric #if BufStats
8500b57cec5SDimitry Andric       thr->numprel++; /* Nr of expansion block releases */
8510b57cec5SDimitry Andric       thr->numpblk--; /* Total number of blocks */
8520b57cec5SDimitry Andric       KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
8530b57cec5SDimitry Andric 
8540b57cec5SDimitry Andric       // avoid leaving stale last_pool pointer around if it is being dealloced
8550b57cec5SDimitry Andric       if (thr->last_pool == b)
8560b57cec5SDimitry Andric         thr->last_pool = 0;
8570b57cec5SDimitry Andric     } else {
8580b57cec5SDimitry Andric       thr->last_pool = b;
8590b57cec5SDimitry Andric     }
8600b57cec5SDimitry Andric #endif /* BufStats */
8610b57cec5SDimitry Andric   }
8620b57cec5SDimitry Andric }
8630b57cec5SDimitry Andric 
8640b57cec5SDimitry Andric /*  BECTL  --  Establish automatic pool expansion control  */
bectl(kmp_info_t * th,bget_compact_t compact,bget_acquire_t acquire,bget_release_t release,bufsize pool_incr)8650b57cec5SDimitry Andric static void bectl(kmp_info_t *th, bget_compact_t compact,
8660b57cec5SDimitry Andric                   bget_acquire_t acquire, bget_release_t release,
8670b57cec5SDimitry Andric                   bufsize pool_incr) {
8680b57cec5SDimitry Andric   thr_data_t *thr = get_thr_data(th);
8690b57cec5SDimitry Andric 
8700b57cec5SDimitry Andric   thr->compfcn = compact;
8710b57cec5SDimitry Andric   thr->acqfcn = acquire;
8720b57cec5SDimitry Andric   thr->relfcn = release;
8730b57cec5SDimitry Andric   thr->exp_incr = pool_incr;
8740b57cec5SDimitry Andric }
8750b57cec5SDimitry Andric 
8760b57cec5SDimitry Andric /*  BPOOL  --  Add a region of memory to the buffer pool.  */
bpool(kmp_info_t * th,void * buf,bufsize len)8770b57cec5SDimitry Andric static void bpool(kmp_info_t *th, void *buf, bufsize len) {
8780b57cec5SDimitry Andric   /*    int bin = 0; */
8790b57cec5SDimitry Andric   thr_data_t *thr = get_thr_data(th);
8800b57cec5SDimitry Andric   bfhead_t *b = BFH(buf);
8810b57cec5SDimitry Andric   bhead_t *bn;
8820b57cec5SDimitry Andric 
8830b57cec5SDimitry Andric   __kmp_bget_dequeue(th); /* Release any queued buffers */
8840b57cec5SDimitry Andric 
8850b57cec5SDimitry Andric #ifdef SizeQuant
886349cc55cSDimitry Andric   len &= ~((bufsize)(SizeQuant - 1));
8870b57cec5SDimitry Andric #endif
8880b57cec5SDimitry Andric   if (thr->pool_len == 0) {
8890b57cec5SDimitry Andric     thr->pool_len = len;
8900b57cec5SDimitry Andric   } else if (len != thr->pool_len) {
8910b57cec5SDimitry Andric     thr->pool_len = -1;
8920b57cec5SDimitry Andric   }
8930b57cec5SDimitry Andric #if BufStats
8940b57cec5SDimitry Andric   thr->numpget++; /* Number of block acquisitions */
8950b57cec5SDimitry Andric   thr->numpblk++; /* Number of blocks total */
8960b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
8970b57cec5SDimitry Andric #endif /* BufStats */
8980b57cec5SDimitry Andric 
8990b57cec5SDimitry Andric   /* Since the block is initially occupied by a single free  buffer,
9000b57cec5SDimitry Andric      it  had  better  not  be  (much) larger than the largest buffer
9010b57cec5SDimitry Andric      whose size we can store in bhead.bb.bsize. */
9020b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(len - sizeof(bhead_t) <= -((bufsize)ESent + 1));
9030b57cec5SDimitry Andric 
9040b57cec5SDimitry Andric   /* Clear  the  backpointer at  the start of the block to indicate that
9050b57cec5SDimitry Andric      there  is  no  free  block  prior  to  this   one.    That   blocks
9060b57cec5SDimitry Andric      recombination when the first block in memory is released. */
9070b57cec5SDimitry Andric   b->bh.bb.prevfree = 0;
9080b57cec5SDimitry Andric 
9090b57cec5SDimitry Andric   /* Create a dummy allocated buffer at the end of the pool.  This dummy
9100b57cec5SDimitry Andric      buffer is seen when a buffer at the end of the pool is released and
9110b57cec5SDimitry Andric      blocks  recombination  of  the last buffer with the dummy buffer at
9120b57cec5SDimitry Andric      the end.  The length in the dummy buffer  is  set  to  the  largest
9130b57cec5SDimitry Andric      negative  number  to  denote  the  end  of  the pool for diagnostic
9140b57cec5SDimitry Andric      routines (this specific value is  not  counted  on  by  the  actual
9150b57cec5SDimitry Andric      allocation and release functions). */
9160b57cec5SDimitry Andric   len -= sizeof(bhead_t);
9170b57cec5SDimitry Andric   b->bh.bb.bsize = (bufsize)len;
9180b57cec5SDimitry Andric   /* Set the owner of this buffer */
9190b57cec5SDimitry Andric   TCW_PTR(b->bh.bb.bthr,
9200b57cec5SDimitry Andric           (kmp_info_t *)((kmp_uintptr_t)th |
9210b57cec5SDimitry Andric                          1)); // mark the buffer as allocated address
9220b57cec5SDimitry Andric 
9230b57cec5SDimitry Andric   /* Chain the new block to the free list. */
9240b57cec5SDimitry Andric   __kmp_bget_insert_into_freelist(thr, b);
9250b57cec5SDimitry Andric 
9260b57cec5SDimitry Andric #ifdef FreeWipe
9270b57cec5SDimitry Andric   (void)memset(((char *)b) + sizeof(bfhead_t), 0x55,
9280b57cec5SDimitry Andric                (size_t)(len - sizeof(bfhead_t)));
9290b57cec5SDimitry Andric #endif
9300b57cec5SDimitry Andric   bn = BH(((char *)b) + len);
9310b57cec5SDimitry Andric   bn->bb.prevfree = (bufsize)len;
9320b57cec5SDimitry Andric   /* Definition of ESent assumes two's complement! */
9330b57cec5SDimitry Andric   KMP_DEBUG_ASSERT((~0) == -1 && (bn != 0));
9340b57cec5SDimitry Andric 
9350b57cec5SDimitry Andric   bn->bb.bsize = ESent;
9360b57cec5SDimitry Andric }
9370b57cec5SDimitry Andric 
9380b57cec5SDimitry Andric /*  BFREED  --  Dump the free lists for this thread. */
bfreed(kmp_info_t * th)9390b57cec5SDimitry Andric static void bfreed(kmp_info_t *th) {
9400b57cec5SDimitry Andric   int bin = 0, count = 0;
9410b57cec5SDimitry Andric   int gtid = __kmp_gtid_from_thread(th);
9420b57cec5SDimitry Andric   thr_data_t *thr = get_thr_data(th);
9430b57cec5SDimitry Andric 
9440b57cec5SDimitry Andric #if BufStats
9450b57cec5SDimitry Andric   __kmp_printf_no_lock("__kmp_printpool: T#%d total=%" KMP_UINT64_SPEC
9460b57cec5SDimitry Andric                        " get=%" KMP_INT64_SPEC " rel=%" KMP_INT64_SPEC
9470b57cec5SDimitry Andric                        " pblk=%" KMP_INT64_SPEC " pget=%" KMP_INT64_SPEC
9480b57cec5SDimitry Andric                        " prel=%" KMP_INT64_SPEC " dget=%" KMP_INT64_SPEC
9490b57cec5SDimitry Andric                        " drel=%" KMP_INT64_SPEC "\n",
9500b57cec5SDimitry Andric                        gtid, (kmp_uint64)thr->totalloc, (kmp_int64)thr->numget,
9510b57cec5SDimitry Andric                        (kmp_int64)thr->numrel, (kmp_int64)thr->numpblk,
9520b57cec5SDimitry Andric                        (kmp_int64)thr->numpget, (kmp_int64)thr->numprel,
9530b57cec5SDimitry Andric                        (kmp_int64)thr->numdget, (kmp_int64)thr->numdrel);
9540b57cec5SDimitry Andric #endif
9550b57cec5SDimitry Andric 
9560b57cec5SDimitry Andric   for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
9570b57cec5SDimitry Andric     bfhead_t *b;
9580b57cec5SDimitry Andric 
9590b57cec5SDimitry Andric     for (b = thr->freelist[bin].ql.flink; b != &thr->freelist[bin];
9600b57cec5SDimitry Andric          b = b->ql.flink) {
9610b57cec5SDimitry Andric       bufsize bs = b->bh.bb.bsize;
9620b57cec5SDimitry Andric 
9630b57cec5SDimitry Andric       KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
9640b57cec5SDimitry Andric       KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
9650b57cec5SDimitry Andric       KMP_DEBUG_ASSERT(bs > 0);
9660b57cec5SDimitry Andric 
9670b57cec5SDimitry Andric       count += 1;
9680b57cec5SDimitry Andric 
9690b57cec5SDimitry Andric       __kmp_printf_no_lock(
9700b57cec5SDimitry Andric           "__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid, b,
9710b57cec5SDimitry Andric           (long)bs);
9720b57cec5SDimitry Andric #ifdef FreeWipe
9730b57cec5SDimitry Andric       {
9740b57cec5SDimitry Andric         char *lerr = ((char *)b) + sizeof(bfhead_t);
9750b57cec5SDimitry Andric         if ((bs > sizeof(bfhead_t)) &&
9760b57cec5SDimitry Andric             ((*lerr != 0x55) ||
9770b57cec5SDimitry Andric              (memcmp(lerr, lerr + 1, (size_t)(bs - (sizeof(bfhead_t) + 1))) !=
9780b57cec5SDimitry Andric               0))) {
9790b57cec5SDimitry Andric           __kmp_printf_no_lock("__kmp_printpool: T#%d     (Contents of above "
9800b57cec5SDimitry Andric                                "free block have been overstored.)\n",
9810b57cec5SDimitry Andric                                gtid);
9820b57cec5SDimitry Andric         }
9830b57cec5SDimitry Andric       }
9840b57cec5SDimitry Andric #endif
9850b57cec5SDimitry Andric     }
9860b57cec5SDimitry Andric   }
9870b57cec5SDimitry Andric 
9880b57cec5SDimitry Andric   if (count == 0)
9890b57cec5SDimitry Andric     __kmp_printf_no_lock("__kmp_printpool: T#%d No free blocks\n", gtid);
9900b57cec5SDimitry Andric }
9910b57cec5SDimitry Andric 
__kmp_initialize_bget(kmp_info_t * th)9920b57cec5SDimitry Andric void __kmp_initialize_bget(kmp_info_t *th) {
9930b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(SizeQuant >= sizeof(void *) && (th != 0));
9940b57cec5SDimitry Andric 
9950b57cec5SDimitry Andric   set_thr_data(th);
9960b57cec5SDimitry Andric 
9970b57cec5SDimitry Andric   bectl(th, (bget_compact_t)0, (bget_acquire_t)malloc, (bget_release_t)free,
9980b57cec5SDimitry Andric         (bufsize)__kmp_malloc_pool_incr);
9990b57cec5SDimitry Andric }
10000b57cec5SDimitry Andric 
__kmp_finalize_bget(kmp_info_t * th)10010b57cec5SDimitry Andric void __kmp_finalize_bget(kmp_info_t *th) {
10020b57cec5SDimitry Andric   thr_data_t *thr;
10030b57cec5SDimitry Andric   bfhead_t *b;
10040b57cec5SDimitry Andric 
10050b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(th != 0);
10060b57cec5SDimitry Andric 
10070b57cec5SDimitry Andric #if BufStats
10080b57cec5SDimitry Andric   thr = (thr_data_t *)th->th.th_local.bget_data;
10090b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(thr != NULL);
10100b57cec5SDimitry Andric   b = thr->last_pool;
10110b57cec5SDimitry Andric 
10120b57cec5SDimitry Andric   /*  If a block-release function is defined, and this free buffer constitutes
10130b57cec5SDimitry Andric       the entire block, release it. Note that pool_len is defined in such a way
10140b57cec5SDimitry Andric       that the test will fail unless all pool blocks are the same size.  */
10150b57cec5SDimitry Andric 
10160b57cec5SDimitry Andric   // Deallocate the last pool if one exists because we no longer do it in brel()
10170b57cec5SDimitry Andric   if (thr->relfcn != 0 && b != 0 && thr->numpblk != 0 &&
10180b57cec5SDimitry Andric       b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) {
10190b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
10200b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
10210b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree ==
10220b57cec5SDimitry Andric                      b->bh.bb.bsize);
10230b57cec5SDimitry Andric 
10240b57cec5SDimitry Andric     /*  Unlink the buffer from the free list  */
10250b57cec5SDimitry Andric     __kmp_bget_remove_from_freelist(b);
10260b57cec5SDimitry Andric 
10270b57cec5SDimitry Andric     KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b));
10280b57cec5SDimitry Andric 
10290b57cec5SDimitry Andric     (*thr->relfcn)(b);
10300b57cec5SDimitry Andric     thr->numprel++; /* Nr of expansion block releases */
10310b57cec5SDimitry Andric     thr->numpblk--; /* Total number of blocks */
10320b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
10330b57cec5SDimitry Andric   }
10340b57cec5SDimitry Andric #endif /* BufStats */
10350b57cec5SDimitry Andric 
10360b57cec5SDimitry Andric   /* Deallocate bget_data */
10370b57cec5SDimitry Andric   if (th->th.th_local.bget_data != NULL) {
10380b57cec5SDimitry Andric     __kmp_free(th->th.th_local.bget_data);
10390b57cec5SDimitry Andric     th->th.th_local.bget_data = NULL;
10400b57cec5SDimitry Andric   }
10410b57cec5SDimitry Andric }
10420b57cec5SDimitry Andric 
kmpc_set_poolsize(size_t size)10430b57cec5SDimitry Andric void kmpc_set_poolsize(size_t size) {
10440b57cec5SDimitry Andric   bectl(__kmp_get_thread(), (bget_compact_t)0, (bget_acquire_t)malloc,
10450b57cec5SDimitry Andric         (bget_release_t)free, (bufsize)size);
10460b57cec5SDimitry Andric }
10470b57cec5SDimitry Andric 
kmpc_get_poolsize(void)10480b57cec5SDimitry Andric size_t kmpc_get_poolsize(void) {
10490b57cec5SDimitry Andric   thr_data_t *p;
10500b57cec5SDimitry Andric 
10510b57cec5SDimitry Andric   p = get_thr_data(__kmp_get_thread());
10520b57cec5SDimitry Andric 
10530b57cec5SDimitry Andric   return p->exp_incr;
10540b57cec5SDimitry Andric }
10550b57cec5SDimitry Andric 
kmpc_set_poolmode(int mode)10560b57cec5SDimitry Andric void kmpc_set_poolmode(int mode) {
10570b57cec5SDimitry Andric   thr_data_t *p;
10580b57cec5SDimitry Andric 
10590b57cec5SDimitry Andric   if (mode == bget_mode_fifo || mode == bget_mode_lifo ||
10600b57cec5SDimitry Andric       mode == bget_mode_best) {
10610b57cec5SDimitry Andric     p = get_thr_data(__kmp_get_thread());
10620b57cec5SDimitry Andric     p->mode = (bget_mode_t)mode;
10630b57cec5SDimitry Andric   }
10640b57cec5SDimitry Andric }
10650b57cec5SDimitry Andric 
kmpc_get_poolmode(void)10660b57cec5SDimitry Andric int kmpc_get_poolmode(void) {
10670b57cec5SDimitry Andric   thr_data_t *p;
10680b57cec5SDimitry Andric 
10690b57cec5SDimitry Andric   p = get_thr_data(__kmp_get_thread());
10700b57cec5SDimitry Andric 
10710b57cec5SDimitry Andric   return p->mode;
10720b57cec5SDimitry Andric }
10730b57cec5SDimitry Andric 
kmpc_get_poolstat(size_t * maxmem,size_t * allmem)10740b57cec5SDimitry Andric void kmpc_get_poolstat(size_t *maxmem, size_t *allmem) {
10750b57cec5SDimitry Andric   kmp_info_t *th = __kmp_get_thread();
10760b57cec5SDimitry Andric   bufsize a, b;
10770b57cec5SDimitry Andric 
10780b57cec5SDimitry Andric   __kmp_bget_dequeue(th); /* Release any queued buffers */
10790b57cec5SDimitry Andric 
10800b57cec5SDimitry Andric   bcheck(th, &a, &b);
10810b57cec5SDimitry Andric 
10820b57cec5SDimitry Andric   *maxmem = a;
10830b57cec5SDimitry Andric   *allmem = b;
10840b57cec5SDimitry Andric }
10850b57cec5SDimitry Andric 
kmpc_poolprint(void)10860b57cec5SDimitry Andric void kmpc_poolprint(void) {
10870b57cec5SDimitry Andric   kmp_info_t *th = __kmp_get_thread();
10880b57cec5SDimitry Andric 
10890b57cec5SDimitry Andric   __kmp_bget_dequeue(th); /* Release any queued buffers */
10900b57cec5SDimitry Andric 
10910b57cec5SDimitry Andric   bfreed(th);
10920b57cec5SDimitry Andric }
10930b57cec5SDimitry Andric 
10940b57cec5SDimitry Andric #endif // #if KMP_USE_BGET
10950b57cec5SDimitry Andric 
kmpc_malloc(size_t size)10960b57cec5SDimitry Andric void *kmpc_malloc(size_t size) {
10970b57cec5SDimitry Andric   void *ptr;
10980b57cec5SDimitry Andric   ptr = bget(__kmp_entry_thread(), (bufsize)(size + sizeof(ptr)));
10990b57cec5SDimitry Andric   if (ptr != NULL) {
11000b57cec5SDimitry Andric     // save allocated pointer just before one returned to user
11010b57cec5SDimitry Andric     *(void **)ptr = ptr;
11020b57cec5SDimitry Andric     ptr = (void **)ptr + 1;
11030b57cec5SDimitry Andric   }
11040b57cec5SDimitry Andric   return ptr;
11050b57cec5SDimitry Andric }
11060b57cec5SDimitry Andric 
11070b57cec5SDimitry Andric #define IS_POWER_OF_TWO(n) (((n) & ((n)-1)) == 0)
11080b57cec5SDimitry Andric 
kmpc_aligned_malloc(size_t size,size_t alignment)11090b57cec5SDimitry Andric void *kmpc_aligned_malloc(size_t size, size_t alignment) {
11100b57cec5SDimitry Andric   void *ptr;
11110b57cec5SDimitry Andric   void *ptr_allocated;
11120b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(alignment < 32 * 1024); // Alignment should not be too big
11130b57cec5SDimitry Andric   if (!IS_POWER_OF_TWO(alignment)) {
11140b57cec5SDimitry Andric     // AC: do we need to issue a warning here?
11150b57cec5SDimitry Andric     errno = EINVAL;
11160b57cec5SDimitry Andric     return NULL;
11170b57cec5SDimitry Andric   }
11180b57cec5SDimitry Andric   size = size + sizeof(void *) + alignment;
11190b57cec5SDimitry Andric   ptr_allocated = bget(__kmp_entry_thread(), (bufsize)size);
11200b57cec5SDimitry Andric   if (ptr_allocated != NULL) {
11210b57cec5SDimitry Andric     // save allocated pointer just before one returned to user
11220b57cec5SDimitry Andric     ptr = (void *)(((kmp_uintptr_t)ptr_allocated + sizeof(void *) + alignment) &
11230b57cec5SDimitry Andric                    ~(alignment - 1));
11240b57cec5SDimitry Andric     *((void **)ptr - 1) = ptr_allocated;
11250b57cec5SDimitry Andric   } else {
11260b57cec5SDimitry Andric     ptr = NULL;
11270b57cec5SDimitry Andric   }
11280b57cec5SDimitry Andric   return ptr;
11290b57cec5SDimitry Andric }
11300b57cec5SDimitry Andric 
kmpc_calloc(size_t nelem,size_t elsize)11310b57cec5SDimitry Andric void *kmpc_calloc(size_t nelem, size_t elsize) {
11320b57cec5SDimitry Andric   void *ptr;
11330b57cec5SDimitry Andric   ptr = bgetz(__kmp_entry_thread(), (bufsize)(nelem * elsize + sizeof(ptr)));
11340b57cec5SDimitry Andric   if (ptr != NULL) {
11350b57cec5SDimitry Andric     // save allocated pointer just before one returned to user
11360b57cec5SDimitry Andric     *(void **)ptr = ptr;
11370b57cec5SDimitry Andric     ptr = (void **)ptr + 1;
11380b57cec5SDimitry Andric   }
11390b57cec5SDimitry Andric   return ptr;
11400b57cec5SDimitry Andric }
11410b57cec5SDimitry Andric 
kmpc_realloc(void * ptr,size_t size)11420b57cec5SDimitry Andric void *kmpc_realloc(void *ptr, size_t size) {
11430b57cec5SDimitry Andric   void *result = NULL;
11440b57cec5SDimitry Andric   if (ptr == NULL) {
11450b57cec5SDimitry Andric     // If pointer is NULL, realloc behaves like malloc.
11460b57cec5SDimitry Andric     result = bget(__kmp_entry_thread(), (bufsize)(size + sizeof(ptr)));
11470b57cec5SDimitry Andric     // save allocated pointer just before one returned to user
11480b57cec5SDimitry Andric     if (result != NULL) {
11490b57cec5SDimitry Andric       *(void **)result = result;
11500b57cec5SDimitry Andric       result = (void **)result + 1;
11510b57cec5SDimitry Andric     }
11520b57cec5SDimitry Andric   } else if (size == 0) {
11530b57cec5SDimitry Andric     // If size is 0, realloc behaves like free.
11540b57cec5SDimitry Andric     // The thread must be registered by the call to kmpc_malloc() or
11550b57cec5SDimitry Andric     // kmpc_calloc() before.
11560b57cec5SDimitry Andric     // So it should be safe to call __kmp_get_thread(), not
11570b57cec5SDimitry Andric     // __kmp_entry_thread().
11580b57cec5SDimitry Andric     KMP_ASSERT(*((void **)ptr - 1));
11590b57cec5SDimitry Andric     brel(__kmp_get_thread(), *((void **)ptr - 1));
11600b57cec5SDimitry Andric   } else {
11610b57cec5SDimitry Andric     result = bgetr(__kmp_entry_thread(), *((void **)ptr - 1),
11620b57cec5SDimitry Andric                    (bufsize)(size + sizeof(ptr)));
11630b57cec5SDimitry Andric     if (result != NULL) {
11640b57cec5SDimitry Andric       *(void **)result = result;
11650b57cec5SDimitry Andric       result = (void **)result + 1;
11660b57cec5SDimitry Andric     }
11670b57cec5SDimitry Andric   }
11680b57cec5SDimitry Andric   return result;
11690b57cec5SDimitry Andric }
11700b57cec5SDimitry Andric 
11710b57cec5SDimitry Andric // NOTE: the library must have already been initialized by a previous allocate
kmpc_free(void * ptr)11720b57cec5SDimitry Andric void kmpc_free(void *ptr) {
11730b57cec5SDimitry Andric   if (!__kmp_init_serial) {
11740b57cec5SDimitry Andric     return;
11750b57cec5SDimitry Andric   }
11760b57cec5SDimitry Andric   if (ptr != NULL) {
11770b57cec5SDimitry Andric     kmp_info_t *th = __kmp_get_thread();
11780b57cec5SDimitry Andric     __kmp_bget_dequeue(th); /* Release any queued buffers */
11790b57cec5SDimitry Andric     // extract allocated pointer and free it
11800b57cec5SDimitry Andric     KMP_ASSERT(*((void **)ptr - 1));
11810b57cec5SDimitry Andric     brel(th, *((void **)ptr - 1));
11820b57cec5SDimitry Andric   }
11830b57cec5SDimitry Andric }
11840b57cec5SDimitry Andric 
___kmp_thread_malloc(kmp_info_t * th,size_t size KMP_SRC_LOC_DECL)11850b57cec5SDimitry Andric void *___kmp_thread_malloc(kmp_info_t *th, size_t size KMP_SRC_LOC_DECL) {
11860b57cec5SDimitry Andric   void *ptr;
11870b57cec5SDimitry Andric   KE_TRACE(30, ("-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n", th,
11880b57cec5SDimitry Andric                 (int)size KMP_SRC_LOC_PARM));
11890b57cec5SDimitry Andric   ptr = bget(th, (bufsize)size);
11900b57cec5SDimitry Andric   KE_TRACE(30, ("<- __kmp_thread_malloc() returns %p\n", ptr));
11910b57cec5SDimitry Andric   return ptr;
11920b57cec5SDimitry Andric }
11930b57cec5SDimitry Andric 
___kmp_thread_calloc(kmp_info_t * th,size_t nelem,size_t elsize KMP_SRC_LOC_DECL)11940b57cec5SDimitry Andric void *___kmp_thread_calloc(kmp_info_t *th, size_t nelem,
11950b57cec5SDimitry Andric                            size_t elsize KMP_SRC_LOC_DECL) {
11960b57cec5SDimitry Andric   void *ptr;
11970b57cec5SDimitry Andric   KE_TRACE(30, ("-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n", th,
11980b57cec5SDimitry Andric                 (int)nelem, (int)elsize KMP_SRC_LOC_PARM));
11990b57cec5SDimitry Andric   ptr = bgetz(th, (bufsize)(nelem * elsize));
12000b57cec5SDimitry Andric   KE_TRACE(30, ("<- __kmp_thread_calloc() returns %p\n", ptr));
12010b57cec5SDimitry Andric   return ptr;
12020b57cec5SDimitry Andric }
12030b57cec5SDimitry Andric 
___kmp_thread_realloc(kmp_info_t * th,void * ptr,size_t size KMP_SRC_LOC_DECL)12040b57cec5SDimitry Andric void *___kmp_thread_realloc(kmp_info_t *th, void *ptr,
12050b57cec5SDimitry Andric                             size_t size KMP_SRC_LOC_DECL) {
12060b57cec5SDimitry Andric   KE_TRACE(30, ("-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n", th,
12070b57cec5SDimitry Andric                 ptr, (int)size KMP_SRC_LOC_PARM));
12080b57cec5SDimitry Andric   ptr = bgetr(th, ptr, (bufsize)size);
12090b57cec5SDimitry Andric   KE_TRACE(30, ("<- __kmp_thread_realloc() returns %p\n", ptr));
12100b57cec5SDimitry Andric   return ptr;
12110b57cec5SDimitry Andric }
12120b57cec5SDimitry Andric 
___kmp_thread_free(kmp_info_t * th,void * ptr KMP_SRC_LOC_DECL)12130b57cec5SDimitry Andric void ___kmp_thread_free(kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL) {
12140b57cec5SDimitry Andric   KE_TRACE(30, ("-> __kmp_thread_free( %p, %p ) called from %s:%d\n", th,
12150b57cec5SDimitry Andric                 ptr KMP_SRC_LOC_PARM));
12160b57cec5SDimitry Andric   if (ptr != NULL) {
12170b57cec5SDimitry Andric     __kmp_bget_dequeue(th); /* Release any queued buffers */
12180b57cec5SDimitry Andric     brel(th, ptr);
12190b57cec5SDimitry Andric   }
12200b57cec5SDimitry Andric   KE_TRACE(30, ("<- __kmp_thread_free()\n"));
12210b57cec5SDimitry Andric }
12220b57cec5SDimitry Andric 
12230b57cec5SDimitry Andric /* OMP 5.0 Memory Management support */
12240b57cec5SDimitry Andric static const char *kmp_mk_lib_name;
12250b57cec5SDimitry Andric static void *h_memkind;
12260b57cec5SDimitry Andric /* memkind experimental API: */
12270b57cec5SDimitry Andric // memkind_alloc
12280b57cec5SDimitry Andric static void *(*kmp_mk_alloc)(void *k, size_t sz);
12290b57cec5SDimitry Andric // memkind_free
12300b57cec5SDimitry Andric static void (*kmp_mk_free)(void *kind, void *ptr);
12310b57cec5SDimitry Andric // memkind_check_available
12320b57cec5SDimitry Andric static int (*kmp_mk_check)(void *kind);
12330b57cec5SDimitry Andric // kinds we are going to use
12340b57cec5SDimitry Andric static void **mk_default;
12350b57cec5SDimitry Andric static void **mk_interleave;
12360b57cec5SDimitry Andric static void **mk_hbw;
12370b57cec5SDimitry Andric static void **mk_hbw_interleave;
12380b57cec5SDimitry Andric static void **mk_hbw_preferred;
12390b57cec5SDimitry Andric static void **mk_hugetlb;
12400b57cec5SDimitry Andric static void **mk_hbw_hugetlb;
12410b57cec5SDimitry Andric static void **mk_hbw_preferred_hugetlb;
1242e8d8bef9SDimitry Andric static void **mk_dax_kmem;
1243e8d8bef9SDimitry Andric static void **mk_dax_kmem_all;
1244e8d8bef9SDimitry Andric static void **mk_dax_kmem_preferred;
1245fe6060f1SDimitry Andric static void *(*kmp_target_alloc_host)(size_t size, int device);
1246fe6060f1SDimitry Andric static void *(*kmp_target_alloc_shared)(size_t size, int device);
1247fe6060f1SDimitry Andric static void *(*kmp_target_alloc_device)(size_t size, int device);
1248bdd1243dSDimitry Andric static void *(*kmp_target_lock_mem)(void *ptr, size_t size, int device);
1249bdd1243dSDimitry Andric static void *(*kmp_target_unlock_mem)(void *ptr, int device);
1250bdd1243dSDimitry Andric static void *(*kmp_target_free_host)(void *ptr, int device);
1251bdd1243dSDimitry Andric static void *(*kmp_target_free_shared)(void *ptr, int device);
1252bdd1243dSDimitry Andric static void *(*kmp_target_free_device)(void *ptr, int device);
1253fe6060f1SDimitry Andric static bool __kmp_target_mem_available;
1254fe6060f1SDimitry Andric #define KMP_IS_TARGET_MEM_SPACE(MS)                                            \
1255fe6060f1SDimitry Andric   (MS == llvm_omp_target_host_mem_space ||                                     \
1256fe6060f1SDimitry Andric    MS == llvm_omp_target_shared_mem_space ||                                   \
1257fe6060f1SDimitry Andric    MS == llvm_omp_target_device_mem_space)
1258fe6060f1SDimitry Andric #define KMP_IS_TARGET_MEM_ALLOC(MA)                                            \
1259fe6060f1SDimitry Andric   (MA == llvm_omp_target_host_mem_alloc ||                                     \
1260fe6060f1SDimitry Andric    MA == llvm_omp_target_shared_mem_alloc ||                                   \
1261fe6060f1SDimitry Andric    MA == llvm_omp_target_device_mem_alloc)
12620b57cec5SDimitry Andric 
126381ad6265SDimitry Andric #if KMP_OS_UNIX && KMP_DYNAMIC_LIB && !KMP_OS_DARWIN
chk_kind(void *** pkind)12640b57cec5SDimitry Andric static inline void chk_kind(void ***pkind) {
12650b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(pkind);
12660b57cec5SDimitry Andric   if (*pkind) // symbol found
12670b57cec5SDimitry Andric     if (kmp_mk_check(**pkind)) // kind not available or error
12680b57cec5SDimitry Andric       *pkind = NULL;
12690b57cec5SDimitry Andric }
12700b57cec5SDimitry Andric #endif
12710b57cec5SDimitry Andric 
__kmp_init_memkind()12720b57cec5SDimitry Andric void __kmp_init_memkind() {
12730b57cec5SDimitry Andric // as of 2018-07-31 memkind does not support Windows*, exclude it for now
127481ad6265SDimitry Andric #if KMP_OS_UNIX && KMP_DYNAMIC_LIB && !KMP_OS_DARWIN
12750b57cec5SDimitry Andric   // use of statically linked memkind is problematic, as it depends on libnuma
12760b57cec5SDimitry Andric   kmp_mk_lib_name = "libmemkind.so";
12770b57cec5SDimitry Andric   h_memkind = dlopen(kmp_mk_lib_name, RTLD_LAZY);
12780b57cec5SDimitry Andric   if (h_memkind) {
12790b57cec5SDimitry Andric     kmp_mk_check = (int (*)(void *))dlsym(h_memkind, "memkind_check_available");
12800b57cec5SDimitry Andric     kmp_mk_alloc =
12810b57cec5SDimitry Andric         (void *(*)(void *, size_t))dlsym(h_memkind, "memkind_malloc");
12820b57cec5SDimitry Andric     kmp_mk_free = (void (*)(void *, void *))dlsym(h_memkind, "memkind_free");
12830b57cec5SDimitry Andric     mk_default = (void **)dlsym(h_memkind, "MEMKIND_DEFAULT");
12840b57cec5SDimitry Andric     if (kmp_mk_check && kmp_mk_alloc && kmp_mk_free && mk_default &&
12850b57cec5SDimitry Andric         !kmp_mk_check(*mk_default)) {
12860b57cec5SDimitry Andric       __kmp_memkind_available = 1;
12870b57cec5SDimitry Andric       mk_interleave = (void **)dlsym(h_memkind, "MEMKIND_INTERLEAVE");
12880b57cec5SDimitry Andric       chk_kind(&mk_interleave);
12890b57cec5SDimitry Andric       mk_hbw = (void **)dlsym(h_memkind, "MEMKIND_HBW");
12900b57cec5SDimitry Andric       chk_kind(&mk_hbw);
12910b57cec5SDimitry Andric       mk_hbw_interleave = (void **)dlsym(h_memkind, "MEMKIND_HBW_INTERLEAVE");
12920b57cec5SDimitry Andric       chk_kind(&mk_hbw_interleave);
12930b57cec5SDimitry Andric       mk_hbw_preferred = (void **)dlsym(h_memkind, "MEMKIND_HBW_PREFERRED");
12940b57cec5SDimitry Andric       chk_kind(&mk_hbw_preferred);
12950b57cec5SDimitry Andric       mk_hugetlb = (void **)dlsym(h_memkind, "MEMKIND_HUGETLB");
12960b57cec5SDimitry Andric       chk_kind(&mk_hugetlb);
12970b57cec5SDimitry Andric       mk_hbw_hugetlb = (void **)dlsym(h_memkind, "MEMKIND_HBW_HUGETLB");
12980b57cec5SDimitry Andric       chk_kind(&mk_hbw_hugetlb);
12990b57cec5SDimitry Andric       mk_hbw_preferred_hugetlb =
13000b57cec5SDimitry Andric           (void **)dlsym(h_memkind, "MEMKIND_HBW_PREFERRED_HUGETLB");
13010b57cec5SDimitry Andric       chk_kind(&mk_hbw_preferred_hugetlb);
1302e8d8bef9SDimitry Andric       mk_dax_kmem = (void **)dlsym(h_memkind, "MEMKIND_DAX_KMEM");
1303e8d8bef9SDimitry Andric       chk_kind(&mk_dax_kmem);
1304e8d8bef9SDimitry Andric       mk_dax_kmem_all = (void **)dlsym(h_memkind, "MEMKIND_DAX_KMEM_ALL");
1305e8d8bef9SDimitry Andric       chk_kind(&mk_dax_kmem_all);
1306e8d8bef9SDimitry Andric       mk_dax_kmem_preferred =
1307e8d8bef9SDimitry Andric           (void **)dlsym(h_memkind, "MEMKIND_DAX_KMEM_PREFERRED");
1308e8d8bef9SDimitry Andric       chk_kind(&mk_dax_kmem_preferred);
13090b57cec5SDimitry Andric       KE_TRACE(25, ("__kmp_init_memkind: memkind library initialized\n"));
13100b57cec5SDimitry Andric       return; // success
13110b57cec5SDimitry Andric     }
13120b57cec5SDimitry Andric     dlclose(h_memkind); // failure
13130b57cec5SDimitry Andric   }
1314e8d8bef9SDimitry Andric #else // !(KMP_OS_UNIX && KMP_DYNAMIC_LIB)
13150b57cec5SDimitry Andric   kmp_mk_lib_name = "";
1316e8d8bef9SDimitry Andric #endif // !(KMP_OS_UNIX && KMP_DYNAMIC_LIB)
13170b57cec5SDimitry Andric   h_memkind = NULL;
13180b57cec5SDimitry Andric   kmp_mk_check = NULL;
13190b57cec5SDimitry Andric   kmp_mk_alloc = NULL;
13200b57cec5SDimitry Andric   kmp_mk_free = NULL;
13210b57cec5SDimitry Andric   mk_default = NULL;
13220b57cec5SDimitry Andric   mk_interleave = NULL;
13230b57cec5SDimitry Andric   mk_hbw = NULL;
13240b57cec5SDimitry Andric   mk_hbw_interleave = NULL;
13250b57cec5SDimitry Andric   mk_hbw_preferred = NULL;
13260b57cec5SDimitry Andric   mk_hugetlb = NULL;
13270b57cec5SDimitry Andric   mk_hbw_hugetlb = NULL;
13280b57cec5SDimitry Andric   mk_hbw_preferred_hugetlb = NULL;
1329e8d8bef9SDimitry Andric   mk_dax_kmem = NULL;
1330e8d8bef9SDimitry Andric   mk_dax_kmem_all = NULL;
1331e8d8bef9SDimitry Andric   mk_dax_kmem_preferred = NULL;
13320b57cec5SDimitry Andric }
13330b57cec5SDimitry Andric 
__kmp_fini_memkind()13340b57cec5SDimitry Andric void __kmp_fini_memkind() {
13350b57cec5SDimitry Andric #if KMP_OS_UNIX && KMP_DYNAMIC_LIB
13360b57cec5SDimitry Andric   if (__kmp_memkind_available)
13370b57cec5SDimitry Andric     KE_TRACE(25, ("__kmp_fini_memkind: finalize memkind library\n"));
13380b57cec5SDimitry Andric   if (h_memkind) {
13390b57cec5SDimitry Andric     dlclose(h_memkind);
13400b57cec5SDimitry Andric     h_memkind = NULL;
13410b57cec5SDimitry Andric   }
13420b57cec5SDimitry Andric   kmp_mk_check = NULL;
13430b57cec5SDimitry Andric   kmp_mk_alloc = NULL;
13440b57cec5SDimitry Andric   kmp_mk_free = NULL;
13450b57cec5SDimitry Andric   mk_default = NULL;
13460b57cec5SDimitry Andric   mk_interleave = NULL;
13470b57cec5SDimitry Andric   mk_hbw = NULL;
13480b57cec5SDimitry Andric   mk_hbw_interleave = NULL;
13490b57cec5SDimitry Andric   mk_hbw_preferred = NULL;
13500b57cec5SDimitry Andric   mk_hugetlb = NULL;
13510b57cec5SDimitry Andric   mk_hbw_hugetlb = NULL;
13520b57cec5SDimitry Andric   mk_hbw_preferred_hugetlb = NULL;
1353e8d8bef9SDimitry Andric   mk_dax_kmem = NULL;
1354e8d8bef9SDimitry Andric   mk_dax_kmem_all = NULL;
1355e8d8bef9SDimitry Andric   mk_dax_kmem_preferred = NULL;
13560b57cec5SDimitry Andric #endif
13570b57cec5SDimitry Andric }
135881ad6265SDimitry Andric 
__kmp_init_target_mem()1359fe6060f1SDimitry Andric void __kmp_init_target_mem() {
1360fe6060f1SDimitry Andric   *(void **)(&kmp_target_alloc_host) = KMP_DLSYM("llvm_omp_target_alloc_host");
1361fe6060f1SDimitry Andric   *(void **)(&kmp_target_alloc_shared) =
1362fe6060f1SDimitry Andric       KMP_DLSYM("llvm_omp_target_alloc_shared");
1363fe6060f1SDimitry Andric   *(void **)(&kmp_target_alloc_device) =
1364fe6060f1SDimitry Andric       KMP_DLSYM("llvm_omp_target_alloc_device");
1365bdd1243dSDimitry Andric   *(void **)(&kmp_target_free_host) = KMP_DLSYM("llvm_omp_target_free_host");
1366bdd1243dSDimitry Andric   *(void **)(&kmp_target_free_shared) =
1367bdd1243dSDimitry Andric       KMP_DLSYM("llvm_omp_target_free_shared");
1368bdd1243dSDimitry Andric   *(void **)(&kmp_target_free_device) =
1369bdd1243dSDimitry Andric       KMP_DLSYM("llvm_omp_target_free_device");
1370bdd1243dSDimitry Andric   __kmp_target_mem_available =
1371bdd1243dSDimitry Andric       kmp_target_alloc_host && kmp_target_alloc_shared &&
1372bdd1243dSDimitry Andric       kmp_target_alloc_device && kmp_target_free_host &&
1373bdd1243dSDimitry Andric       kmp_target_free_shared && kmp_target_free_device;
1374bdd1243dSDimitry Andric   // lock/pin and unlock/unpin target calls
1375bdd1243dSDimitry Andric   *(void **)(&kmp_target_lock_mem) = KMP_DLSYM("llvm_omp_target_lock_mem");
1376bdd1243dSDimitry Andric   *(void **)(&kmp_target_unlock_mem) = KMP_DLSYM("llvm_omp_target_unlock_mem");
1377fe6060f1SDimitry Andric }
13780b57cec5SDimitry Andric 
__kmpc_init_allocator(int gtid,omp_memspace_handle_t ms,int ntraits,omp_alloctrait_t traits[])13790b57cec5SDimitry Andric omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t ms,
13800b57cec5SDimitry Andric                                              int ntraits,
13810b57cec5SDimitry Andric                                              omp_alloctrait_t traits[]) {
13820b57cec5SDimitry Andric   // OpenMP 5.0 only allows predefined memspaces
13830b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(ms == omp_default_mem_space || ms == omp_low_lat_mem_space ||
13840b57cec5SDimitry Andric                    ms == omp_large_cap_mem_space || ms == omp_const_mem_space ||
1385fe6060f1SDimitry Andric                    ms == omp_high_bw_mem_space || KMP_IS_TARGET_MEM_SPACE(ms));
13860b57cec5SDimitry Andric   kmp_allocator_t *al;
13870b57cec5SDimitry Andric   int i;
13880b57cec5SDimitry Andric   al = (kmp_allocator_t *)__kmp_allocate(sizeof(kmp_allocator_t)); // zeroed
13890b57cec5SDimitry Andric   al->memspace = ms; // not used currently
13900b57cec5SDimitry Andric   for (i = 0; i < ntraits; ++i) {
13910b57cec5SDimitry Andric     switch (traits[i].key) {
1392e8d8bef9SDimitry Andric     case omp_atk_sync_hint:
13935ffd83dbSDimitry Andric     case omp_atk_access:
1394bdd1243dSDimitry Andric       break;
13955ffd83dbSDimitry Andric     case omp_atk_pinned:
1396bdd1243dSDimitry Andric       al->pinned = true;
13970b57cec5SDimitry Andric       break;
13985ffd83dbSDimitry Andric     case omp_atk_alignment:
1399e8d8bef9SDimitry Andric       __kmp_type_convert(traits[i].value, &(al->alignment));
14000b57cec5SDimitry Andric       KMP_ASSERT(IS_POWER_OF_TWO(al->alignment));
14010b57cec5SDimitry Andric       break;
14025ffd83dbSDimitry Andric     case omp_atk_pool_size:
14030b57cec5SDimitry Andric       al->pool_size = traits[i].value;
14040b57cec5SDimitry Andric       break;
14055ffd83dbSDimitry Andric     case omp_atk_fallback:
14060b57cec5SDimitry Andric       al->fb = (omp_alloctrait_value_t)traits[i].value;
14070b57cec5SDimitry Andric       KMP_DEBUG_ASSERT(
14085ffd83dbSDimitry Andric           al->fb == omp_atv_default_mem_fb || al->fb == omp_atv_null_fb ||
14095ffd83dbSDimitry Andric           al->fb == omp_atv_abort_fb || al->fb == omp_atv_allocator_fb);
14100b57cec5SDimitry Andric       break;
14115ffd83dbSDimitry Andric     case omp_atk_fb_data:
14120b57cec5SDimitry Andric       al->fb_data = RCAST(kmp_allocator_t *, traits[i].value);
14130b57cec5SDimitry Andric       break;
14145ffd83dbSDimitry Andric     case omp_atk_partition:
14150b57cec5SDimitry Andric       al->memkind = RCAST(void **, traits[i].value);
14160b57cec5SDimitry Andric       break;
14170b57cec5SDimitry Andric     default:
14180b57cec5SDimitry Andric       KMP_ASSERT2(0, "Unexpected allocator trait");
14190b57cec5SDimitry Andric     }
14200b57cec5SDimitry Andric   }
14210b57cec5SDimitry Andric   if (al->fb == 0) {
14220b57cec5SDimitry Andric     // set default allocator
14235ffd83dbSDimitry Andric     al->fb = omp_atv_default_mem_fb;
14240b57cec5SDimitry Andric     al->fb_data = (kmp_allocator_t *)omp_default_mem_alloc;
14255ffd83dbSDimitry Andric   } else if (al->fb == omp_atv_allocator_fb) {
14260b57cec5SDimitry Andric     KMP_ASSERT(al->fb_data != NULL);
14275ffd83dbSDimitry Andric   } else if (al->fb == omp_atv_default_mem_fb) {
14280b57cec5SDimitry Andric     al->fb_data = (kmp_allocator_t *)omp_default_mem_alloc;
14290b57cec5SDimitry Andric   }
14300b57cec5SDimitry Andric   if (__kmp_memkind_available) {
14310b57cec5SDimitry Andric     // Let's use memkind library if available
14320b57cec5SDimitry Andric     if (ms == omp_high_bw_mem_space) {
14335ffd83dbSDimitry Andric       if (al->memkind == (void *)omp_atv_interleaved && mk_hbw_interleave) {
14340b57cec5SDimitry Andric         al->memkind = mk_hbw_interleave;
14350b57cec5SDimitry Andric       } else if (mk_hbw_preferred) {
14360b57cec5SDimitry Andric         // AC: do not try to use MEMKIND_HBW for now, because memkind library
14370b57cec5SDimitry Andric         // cannot reliably detect exhaustion of HBW memory.
14380b57cec5SDimitry Andric         // It could be possible using hbw_verify_memory_region() but memkind
14390b57cec5SDimitry Andric         // manual says: "Using this function in production code may result in
14400b57cec5SDimitry Andric         // serious performance penalty".
14410b57cec5SDimitry Andric         al->memkind = mk_hbw_preferred;
14420b57cec5SDimitry Andric       } else {
14430b57cec5SDimitry Andric         // HBW is requested but not available --> return NULL allocator
14440b57cec5SDimitry Andric         __kmp_free(al);
14450b57cec5SDimitry Andric         return omp_null_allocator;
14460b57cec5SDimitry Andric       }
1447e8d8bef9SDimitry Andric     } else if (ms == omp_large_cap_mem_space) {
1448e8d8bef9SDimitry Andric       if (mk_dax_kmem_all) {
1449e8d8bef9SDimitry Andric         // All pmem nodes are visited
1450e8d8bef9SDimitry Andric         al->memkind = mk_dax_kmem_all;
1451e8d8bef9SDimitry Andric       } else if (mk_dax_kmem) {
1452e8d8bef9SDimitry Andric         // Only closest pmem node is visited
1453e8d8bef9SDimitry Andric         al->memkind = mk_dax_kmem;
1454e8d8bef9SDimitry Andric       } else {
1455e8d8bef9SDimitry Andric         __kmp_free(al);
1456e8d8bef9SDimitry Andric         return omp_null_allocator;
1457e8d8bef9SDimitry Andric       }
14580b57cec5SDimitry Andric     } else {
14595ffd83dbSDimitry Andric       if (al->memkind == (void *)omp_atv_interleaved && mk_interleave) {
14600b57cec5SDimitry Andric         al->memkind = mk_interleave;
14610b57cec5SDimitry Andric       } else {
14620b57cec5SDimitry Andric         al->memkind = mk_default;
14630b57cec5SDimitry Andric       }
14640b57cec5SDimitry Andric     }
1465fe6060f1SDimitry Andric   } else if (KMP_IS_TARGET_MEM_SPACE(ms) && !__kmp_target_mem_available) {
1466fe6060f1SDimitry Andric     __kmp_free(al);
1467fe6060f1SDimitry Andric     return omp_null_allocator;
14680b57cec5SDimitry Andric   } else {
14690b57cec5SDimitry Andric     if (ms == omp_high_bw_mem_space) {
14700b57cec5SDimitry Andric       // cannot detect HBW memory presence without memkind library
14710b57cec5SDimitry Andric       __kmp_free(al);
14720b57cec5SDimitry Andric       return omp_null_allocator;
14730b57cec5SDimitry Andric     }
14740b57cec5SDimitry Andric   }
14750b57cec5SDimitry Andric   return (omp_allocator_handle_t)al;
14760b57cec5SDimitry Andric }
14770b57cec5SDimitry Andric 
__kmpc_destroy_allocator(int gtid,omp_allocator_handle_t allocator)14780b57cec5SDimitry Andric void __kmpc_destroy_allocator(int gtid, omp_allocator_handle_t allocator) {
14790b57cec5SDimitry Andric   if (allocator > kmp_max_mem_alloc)
14800b57cec5SDimitry Andric     __kmp_free(allocator);
14810b57cec5SDimitry Andric }
14820b57cec5SDimitry Andric 
__kmpc_set_default_allocator(int gtid,omp_allocator_handle_t allocator)14830b57cec5SDimitry Andric void __kmpc_set_default_allocator(int gtid, omp_allocator_handle_t allocator) {
14840b57cec5SDimitry Andric   if (allocator == omp_null_allocator)
14850b57cec5SDimitry Andric     allocator = omp_default_mem_alloc;
14860b57cec5SDimitry Andric   __kmp_threads[gtid]->th.th_def_allocator = allocator;
14870b57cec5SDimitry Andric }
14880b57cec5SDimitry Andric 
__kmpc_get_default_allocator(int gtid)14890b57cec5SDimitry Andric omp_allocator_handle_t __kmpc_get_default_allocator(int gtid) {
14900b57cec5SDimitry Andric   return __kmp_threads[gtid]->th.th_def_allocator;
14910b57cec5SDimitry Andric }
14920b57cec5SDimitry Andric 
14930b57cec5SDimitry Andric typedef struct kmp_mem_desc { // Memory block descriptor
14940b57cec5SDimitry Andric   void *ptr_alloc; // Pointer returned by allocator
14950b57cec5SDimitry Andric   size_t size_a; // Size of allocated memory block (initial+descriptor+align)
1496e8d8bef9SDimitry Andric   size_t size_orig; // Original size requested
14970b57cec5SDimitry Andric   void *ptr_align; // Pointer to aligned memory, returned
14980b57cec5SDimitry Andric   kmp_allocator_t *allocator; // allocator
14990b57cec5SDimitry Andric } kmp_mem_desc_t;
1500349cc55cSDimitry Andric static int alignment = sizeof(void *); // align to pointer size by default
15010b57cec5SDimitry Andric 
1502349cc55cSDimitry Andric // external interfaces are wrappers over internal implementation
__kmpc_alloc(int gtid,size_t size,omp_allocator_handle_t allocator)15030b57cec5SDimitry Andric void *__kmpc_alloc(int gtid, size_t size, omp_allocator_handle_t allocator) {
1504349cc55cSDimitry Andric   KE_TRACE(25, ("__kmpc_alloc: T#%d (%d, %p)\n", gtid, (int)size, allocator));
1505349cc55cSDimitry Andric   void *ptr = __kmp_alloc(gtid, 0, size, allocator);
1506349cc55cSDimitry Andric   KE_TRACE(25, ("__kmpc_alloc returns %p, T#%d\n", ptr, gtid));
1507349cc55cSDimitry Andric   return ptr;
1508349cc55cSDimitry Andric }
1509349cc55cSDimitry Andric 
__kmpc_aligned_alloc(int gtid,size_t algn,size_t size,omp_allocator_handle_t allocator)1510349cc55cSDimitry Andric void *__kmpc_aligned_alloc(int gtid, size_t algn, size_t size,
1511349cc55cSDimitry Andric                            omp_allocator_handle_t allocator) {
1512349cc55cSDimitry Andric   KE_TRACE(25, ("__kmpc_aligned_alloc: T#%d (%d, %d, %p)\n", gtid, (int)algn,
1513349cc55cSDimitry Andric                 (int)size, allocator));
1514349cc55cSDimitry Andric   void *ptr = __kmp_alloc(gtid, algn, size, allocator);
1515349cc55cSDimitry Andric   KE_TRACE(25, ("__kmpc_aligned_alloc returns %p, T#%d\n", ptr, gtid));
1516349cc55cSDimitry Andric   return ptr;
1517349cc55cSDimitry Andric }
1518349cc55cSDimitry Andric 
__kmpc_calloc(int gtid,size_t nmemb,size_t size,omp_allocator_handle_t allocator)1519349cc55cSDimitry Andric void *__kmpc_calloc(int gtid, size_t nmemb, size_t size,
1520349cc55cSDimitry Andric                     omp_allocator_handle_t allocator) {
1521349cc55cSDimitry Andric   KE_TRACE(25, ("__kmpc_calloc: T#%d (%d, %d, %p)\n", gtid, (int)nmemb,
1522349cc55cSDimitry Andric                 (int)size, allocator));
1523349cc55cSDimitry Andric   void *ptr = __kmp_calloc(gtid, 0, nmemb, size, allocator);
1524349cc55cSDimitry Andric   KE_TRACE(25, ("__kmpc_calloc returns %p, T#%d\n", ptr, gtid));
1525349cc55cSDimitry Andric   return ptr;
1526349cc55cSDimitry Andric }
1527349cc55cSDimitry Andric 
__kmpc_realloc(int gtid,void * ptr,size_t size,omp_allocator_handle_t allocator,omp_allocator_handle_t free_allocator)1528349cc55cSDimitry Andric void *__kmpc_realloc(int gtid, void *ptr, size_t size,
1529349cc55cSDimitry Andric                      omp_allocator_handle_t allocator,
1530349cc55cSDimitry Andric                      omp_allocator_handle_t free_allocator) {
1531349cc55cSDimitry Andric   KE_TRACE(25, ("__kmpc_realloc: T#%d (%p, %d, %p, %p)\n", gtid, ptr, (int)size,
1532349cc55cSDimitry Andric                 allocator, free_allocator));
1533349cc55cSDimitry Andric   void *nptr = __kmp_realloc(gtid, ptr, size, allocator, free_allocator);
1534349cc55cSDimitry Andric   KE_TRACE(25, ("__kmpc_realloc returns %p, T#%d\n", nptr, gtid));
1535349cc55cSDimitry Andric   return nptr;
1536349cc55cSDimitry Andric }
1537349cc55cSDimitry Andric 
__kmpc_free(int gtid,void * ptr,omp_allocator_handle_t allocator)1538349cc55cSDimitry Andric void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t allocator) {
1539349cc55cSDimitry Andric   KE_TRACE(25, ("__kmpc_free: T#%d free(%p,%p)\n", gtid, ptr, allocator));
1540349cc55cSDimitry Andric   ___kmpc_free(gtid, ptr, allocator);
1541349cc55cSDimitry Andric   KE_TRACE(10, ("__kmpc_free: T#%d freed %p (%p)\n", gtid, ptr, allocator));
1542349cc55cSDimitry Andric   return;
1543349cc55cSDimitry Andric }
1544349cc55cSDimitry Andric 
1545349cc55cSDimitry Andric // internal implementation, called from inside the library
__kmp_alloc(int gtid,size_t algn,size_t size,omp_allocator_handle_t allocator)1546349cc55cSDimitry Andric void *__kmp_alloc(int gtid, size_t algn, size_t size,
1547349cc55cSDimitry Andric                   omp_allocator_handle_t allocator) {
15480b57cec5SDimitry Andric   void *ptr = NULL;
15490b57cec5SDimitry Andric   kmp_allocator_t *al;
15500b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(__kmp_init_serial);
1551e8d8bef9SDimitry Andric   if (size == 0)
1552e8d8bef9SDimitry Andric     return NULL;
15530b57cec5SDimitry Andric   if (allocator == omp_null_allocator)
15540b57cec5SDimitry Andric     allocator = __kmp_threads[gtid]->th.th_def_allocator;
1555bdd1243dSDimitry Andric   kmp_int32 default_device =
1556bdd1243dSDimitry Andric       __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
15570b57cec5SDimitry Andric 
1558349cc55cSDimitry Andric   al = RCAST(kmp_allocator_t *, allocator);
15590b57cec5SDimitry Andric 
15600b57cec5SDimitry Andric   int sz_desc = sizeof(kmp_mem_desc_t);
15610b57cec5SDimitry Andric   kmp_mem_desc_t desc;
15620b57cec5SDimitry Andric   kmp_uintptr_t addr; // address returned by allocator
15630b57cec5SDimitry Andric   kmp_uintptr_t addr_align; // address to return to caller
15640b57cec5SDimitry Andric   kmp_uintptr_t addr_descr; // address of memory block descriptor
1565349cc55cSDimitry Andric   size_t align = alignment; // default alignment
1566349cc55cSDimitry Andric   if (allocator > kmp_max_mem_alloc && al->alignment > align)
1567349cc55cSDimitry Andric     align = al->alignment; // alignment required by allocator trait
1568349cc55cSDimitry Andric   if (align < algn)
1569349cc55cSDimitry Andric     align = algn; // max of allocator trait, parameter and sizeof(void*)
1570e8d8bef9SDimitry Andric   desc.size_orig = size;
15710b57cec5SDimitry Andric   desc.size_a = size + sz_desc + align;
1572bdd1243dSDimitry Andric   bool is_pinned = false;
1573bdd1243dSDimitry Andric   if (allocator > kmp_max_mem_alloc)
1574bdd1243dSDimitry Andric     is_pinned = al->pinned;
1575bdd1243dSDimitry Andric 
1576bdd1243dSDimitry Andric   // Use default allocator if libmemkind is not available
1577bdd1243dSDimitry Andric   int use_default_allocator = (__kmp_memkind_available) ? false : true;
15780b57cec5SDimitry Andric 
157906c3fb27SDimitry Andric   if (KMP_IS_TARGET_MEM_ALLOC(allocator)) {
158006c3fb27SDimitry Andric     // Use size input directly as the memory may not be accessible on host.
158106c3fb27SDimitry Andric     // Use default device for now.
158206c3fb27SDimitry Andric     if (__kmp_target_mem_available) {
158306c3fb27SDimitry Andric       kmp_int32 device =
158406c3fb27SDimitry Andric           __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
158506c3fb27SDimitry Andric       if (allocator == llvm_omp_target_host_mem_alloc)
158606c3fb27SDimitry Andric         ptr = kmp_target_alloc_host(size, device);
158706c3fb27SDimitry Andric       else if (allocator == llvm_omp_target_shared_mem_alloc)
158806c3fb27SDimitry Andric         ptr = kmp_target_alloc_shared(size, device);
158906c3fb27SDimitry Andric       else // allocator == llvm_omp_target_device_mem_alloc
159006c3fb27SDimitry Andric         ptr = kmp_target_alloc_device(size, device);
159106c3fb27SDimitry Andric       return ptr;
159206c3fb27SDimitry Andric     } else {
159306c3fb27SDimitry Andric       KMP_INFORM(TargetMemNotAvailable);
159406c3fb27SDimitry Andric     }
159506c3fb27SDimitry Andric   }
159606c3fb27SDimitry Andric 
159706c3fb27SDimitry Andric   if (allocator >= kmp_max_mem_alloc && KMP_IS_TARGET_MEM_SPACE(al->memspace)) {
159806c3fb27SDimitry Andric     if (__kmp_target_mem_available) {
159906c3fb27SDimitry Andric       kmp_int32 device =
160006c3fb27SDimitry Andric           __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
160106c3fb27SDimitry Andric       if (al->memspace == llvm_omp_target_host_mem_space)
160206c3fb27SDimitry Andric         ptr = kmp_target_alloc_host(size, device);
160306c3fb27SDimitry Andric       else if (al->memspace == llvm_omp_target_shared_mem_space)
160406c3fb27SDimitry Andric         ptr = kmp_target_alloc_shared(size, device);
160506c3fb27SDimitry Andric       else // al->memspace == llvm_omp_target_device_mem_space
160606c3fb27SDimitry Andric         ptr = kmp_target_alloc_device(size, device);
160706c3fb27SDimitry Andric       return ptr;
160806c3fb27SDimitry Andric     } else {
160906c3fb27SDimitry Andric       KMP_INFORM(TargetMemNotAvailable);
161006c3fb27SDimitry Andric     }
161106c3fb27SDimitry Andric   }
161206c3fb27SDimitry Andric 
16130b57cec5SDimitry Andric   if (__kmp_memkind_available) {
16140b57cec5SDimitry Andric     if (allocator < kmp_max_mem_alloc) {
16150b57cec5SDimitry Andric       // pre-defined allocator
16160b57cec5SDimitry Andric       if (allocator == omp_high_bw_mem_alloc && mk_hbw_preferred) {
16170b57cec5SDimitry Andric         ptr = kmp_mk_alloc(*mk_hbw_preferred, desc.size_a);
1618e8d8bef9SDimitry Andric       } else if (allocator == omp_large_cap_mem_alloc && mk_dax_kmem_all) {
1619e8d8bef9SDimitry Andric         ptr = kmp_mk_alloc(*mk_dax_kmem_all, desc.size_a);
16200b57cec5SDimitry Andric       } else {
16210b57cec5SDimitry Andric         ptr = kmp_mk_alloc(*mk_default, desc.size_a);
16220b57cec5SDimitry Andric       }
16230b57cec5SDimitry Andric     } else if (al->pool_size > 0) {
16240b57cec5SDimitry Andric       // custom allocator with pool size requested
16250b57cec5SDimitry Andric       kmp_uint64 used =
16260b57cec5SDimitry Andric           KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
16270b57cec5SDimitry Andric       if (used + desc.size_a > al->pool_size) {
16280b57cec5SDimitry Andric         // not enough space, need to go fallback path
16290b57cec5SDimitry Andric         KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
16305ffd83dbSDimitry Andric         if (al->fb == omp_atv_default_mem_fb) {
16310b57cec5SDimitry Andric           al = (kmp_allocator_t *)omp_default_mem_alloc;
16320b57cec5SDimitry Andric           ptr = kmp_mk_alloc(*mk_default, desc.size_a);
16335ffd83dbSDimitry Andric         } else if (al->fb == omp_atv_abort_fb) {
16340b57cec5SDimitry Andric           KMP_ASSERT(0); // abort fallback requested
16355ffd83dbSDimitry Andric         } else if (al->fb == omp_atv_allocator_fb) {
16360b57cec5SDimitry Andric           KMP_ASSERT(al != al->fb_data);
16370b57cec5SDimitry Andric           al = al->fb_data;
1638bdd1243dSDimitry Andric           ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
1639bdd1243dSDimitry Andric           if (is_pinned && kmp_target_lock_mem)
1640bdd1243dSDimitry Andric             kmp_target_lock_mem(ptr, size, default_device);
1641bdd1243dSDimitry Andric           return ptr;
16420b57cec5SDimitry Andric         } // else ptr == NULL;
16430b57cec5SDimitry Andric       } else {
16440b57cec5SDimitry Andric         // pool has enough space
16450b57cec5SDimitry Andric         ptr = kmp_mk_alloc(*al->memkind, desc.size_a);
16460b57cec5SDimitry Andric         if (ptr == NULL) {
16475ffd83dbSDimitry Andric           if (al->fb == omp_atv_default_mem_fb) {
16480b57cec5SDimitry Andric             al = (kmp_allocator_t *)omp_default_mem_alloc;
16490b57cec5SDimitry Andric             ptr = kmp_mk_alloc(*mk_default, desc.size_a);
16505ffd83dbSDimitry Andric           } else if (al->fb == omp_atv_abort_fb) {
16510b57cec5SDimitry Andric             KMP_ASSERT(0); // abort fallback requested
16525ffd83dbSDimitry Andric           } else if (al->fb == omp_atv_allocator_fb) {
16530b57cec5SDimitry Andric             KMP_ASSERT(al != al->fb_data);
16540b57cec5SDimitry Andric             al = al->fb_data;
1655bdd1243dSDimitry Andric             ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
1656bdd1243dSDimitry Andric             if (is_pinned && kmp_target_lock_mem)
1657bdd1243dSDimitry Andric               kmp_target_lock_mem(ptr, size, default_device);
1658bdd1243dSDimitry Andric             return ptr;
16590b57cec5SDimitry Andric           }
16600b57cec5SDimitry Andric         }
16610b57cec5SDimitry Andric       }
16620b57cec5SDimitry Andric     } else {
16630b57cec5SDimitry Andric       // custom allocator, pool size not requested
16640b57cec5SDimitry Andric       ptr = kmp_mk_alloc(*al->memkind, desc.size_a);
16650b57cec5SDimitry Andric       if (ptr == NULL) {
16665ffd83dbSDimitry Andric         if (al->fb == omp_atv_default_mem_fb) {
16670b57cec5SDimitry Andric           al = (kmp_allocator_t *)omp_default_mem_alloc;
16680b57cec5SDimitry Andric           ptr = kmp_mk_alloc(*mk_default, desc.size_a);
16695ffd83dbSDimitry Andric         } else if (al->fb == omp_atv_abort_fb) {
16700b57cec5SDimitry Andric           KMP_ASSERT(0); // abort fallback requested
16715ffd83dbSDimitry Andric         } else if (al->fb == omp_atv_allocator_fb) {
16720b57cec5SDimitry Andric           KMP_ASSERT(al != al->fb_data);
16730b57cec5SDimitry Andric           al = al->fb_data;
1674bdd1243dSDimitry Andric           ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
1675bdd1243dSDimitry Andric           if (is_pinned && kmp_target_lock_mem)
1676bdd1243dSDimitry Andric             kmp_target_lock_mem(ptr, size, default_device);
1677bdd1243dSDimitry Andric           return ptr;
16780b57cec5SDimitry Andric         }
16790b57cec5SDimitry Andric       }
16800b57cec5SDimitry Andric     }
16810b57cec5SDimitry Andric   } else if (allocator < kmp_max_mem_alloc) {
16820b57cec5SDimitry Andric     // pre-defined allocator
16830b57cec5SDimitry Andric     if (allocator == omp_high_bw_mem_alloc) {
1684bdd1243dSDimitry Andric       KMP_WARNING(OmpNoAllocator, "omp_high_bw_mem_alloc");
1685e8d8bef9SDimitry Andric     } else if (allocator == omp_large_cap_mem_alloc) {
1686bdd1243dSDimitry Andric       KMP_WARNING(OmpNoAllocator, "omp_large_cap_mem_alloc");
1687bdd1243dSDimitry Andric     } else if (allocator == omp_const_mem_alloc) {
1688bdd1243dSDimitry Andric       KMP_WARNING(OmpNoAllocator, "omp_const_mem_alloc");
1689bdd1243dSDimitry Andric     } else if (allocator == omp_low_lat_mem_alloc) {
1690bdd1243dSDimitry Andric       KMP_WARNING(OmpNoAllocator, "omp_low_lat_mem_alloc");
1691bdd1243dSDimitry Andric     } else if (allocator == omp_cgroup_mem_alloc) {
1692bdd1243dSDimitry Andric       KMP_WARNING(OmpNoAllocator, "omp_cgroup_mem_alloc");
1693bdd1243dSDimitry Andric     } else if (allocator == omp_pteam_mem_alloc) {
1694bdd1243dSDimitry Andric       KMP_WARNING(OmpNoAllocator, "omp_pteam_mem_alloc");
1695bdd1243dSDimitry Andric     } else if (allocator == omp_thread_mem_alloc) {
1696bdd1243dSDimitry Andric       KMP_WARNING(OmpNoAllocator, "omp_thread_mem_alloc");
1697bdd1243dSDimitry Andric     } else { // default allocator requested
1698bdd1243dSDimitry Andric       use_default_allocator = true;
1699bdd1243dSDimitry Andric     }
1700bdd1243dSDimitry Andric     if (use_default_allocator) {
17010b57cec5SDimitry Andric       ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
1702bdd1243dSDimitry Andric       use_default_allocator = false;
17030b57cec5SDimitry Andric     }
17040b57cec5SDimitry Andric   } else if (al->pool_size > 0) {
17050b57cec5SDimitry Andric     // custom allocator with pool size requested
17060b57cec5SDimitry Andric     kmp_uint64 used =
17070b57cec5SDimitry Andric         KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
17080b57cec5SDimitry Andric     if (used + desc.size_a > al->pool_size) {
17090b57cec5SDimitry Andric       // not enough space, need to go fallback path
17100b57cec5SDimitry Andric       KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
17115ffd83dbSDimitry Andric       if (al->fb == omp_atv_default_mem_fb) {
17120b57cec5SDimitry Andric         al = (kmp_allocator_t *)omp_default_mem_alloc;
17130b57cec5SDimitry Andric         ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
17145ffd83dbSDimitry Andric       } else if (al->fb == omp_atv_abort_fb) {
17150b57cec5SDimitry Andric         KMP_ASSERT(0); // abort fallback requested
17165ffd83dbSDimitry Andric       } else if (al->fb == omp_atv_allocator_fb) {
17170b57cec5SDimitry Andric         KMP_ASSERT(al != al->fb_data);
17180b57cec5SDimitry Andric         al = al->fb_data;
1719bdd1243dSDimitry Andric         ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
1720bdd1243dSDimitry Andric         if (is_pinned && kmp_target_lock_mem)
1721bdd1243dSDimitry Andric           kmp_target_lock_mem(ptr, size, default_device);
1722bdd1243dSDimitry Andric         return ptr;
17230b57cec5SDimitry Andric       } // else ptr == NULL;
17240b57cec5SDimitry Andric     } else {
17250b57cec5SDimitry Andric       // pool has enough space
17260b57cec5SDimitry Andric       ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
17275ffd83dbSDimitry Andric       if (ptr == NULL && al->fb == omp_atv_abort_fb) {
17280b57cec5SDimitry Andric         KMP_ASSERT(0); // abort fallback requested
17290b57cec5SDimitry Andric       } // no sense to look for another fallback because of same internal alloc
17300b57cec5SDimitry Andric     }
17310b57cec5SDimitry Andric   } else {
17320b57cec5SDimitry Andric     // custom allocator, pool size not requested
17330b57cec5SDimitry Andric     ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
17345ffd83dbSDimitry Andric     if (ptr == NULL && al->fb == omp_atv_abort_fb) {
17350b57cec5SDimitry Andric       KMP_ASSERT(0); // abort fallback requested
17360b57cec5SDimitry Andric     } // no sense to look for another fallback because of same internal alloc
17370b57cec5SDimitry Andric   }
1738349cc55cSDimitry Andric   KE_TRACE(10, ("__kmp_alloc: T#%d %p=alloc(%d)\n", gtid, ptr, desc.size_a));
17390b57cec5SDimitry Andric   if (ptr == NULL)
17400b57cec5SDimitry Andric     return NULL;
17410b57cec5SDimitry Andric 
1742bdd1243dSDimitry Andric   if (is_pinned && kmp_target_lock_mem)
1743bdd1243dSDimitry Andric     kmp_target_lock_mem(ptr, desc.size_a, default_device);
1744bdd1243dSDimitry Andric 
17450b57cec5SDimitry Andric   addr = (kmp_uintptr_t)ptr;
17460b57cec5SDimitry Andric   addr_align = (addr + sz_desc + align - 1) & ~(align - 1);
17470b57cec5SDimitry Andric   addr_descr = addr_align - sz_desc;
17480b57cec5SDimitry Andric 
17490b57cec5SDimitry Andric   desc.ptr_alloc = ptr;
17500b57cec5SDimitry Andric   desc.ptr_align = (void *)addr_align;
17510b57cec5SDimitry Andric   desc.allocator = al;
17520b57cec5SDimitry Andric   *((kmp_mem_desc_t *)addr_descr) = desc; // save descriptor contents
17530b57cec5SDimitry Andric   KMP_MB();
17540b57cec5SDimitry Andric 
17550b57cec5SDimitry Andric   return desc.ptr_align;
17560b57cec5SDimitry Andric }
17570b57cec5SDimitry Andric 
__kmp_calloc(int gtid,size_t algn,size_t nmemb,size_t size,omp_allocator_handle_t allocator)1758349cc55cSDimitry Andric void *__kmp_calloc(int gtid, size_t algn, size_t nmemb, size_t size,
1759e8d8bef9SDimitry Andric                    omp_allocator_handle_t allocator) {
1760e8d8bef9SDimitry Andric   void *ptr = NULL;
1761e8d8bef9SDimitry Andric   kmp_allocator_t *al;
1762e8d8bef9SDimitry Andric   KMP_DEBUG_ASSERT(__kmp_init_serial);
1763e8d8bef9SDimitry Andric 
1764e8d8bef9SDimitry Andric   if (allocator == omp_null_allocator)
1765e8d8bef9SDimitry Andric     allocator = __kmp_threads[gtid]->th.th_def_allocator;
1766e8d8bef9SDimitry Andric 
1767349cc55cSDimitry Andric   al = RCAST(kmp_allocator_t *, allocator);
1768e8d8bef9SDimitry Andric 
1769e8d8bef9SDimitry Andric   if (nmemb == 0 || size == 0)
1770e8d8bef9SDimitry Andric     return ptr;
1771e8d8bef9SDimitry Andric 
1772e8d8bef9SDimitry Andric   if ((SIZE_MAX - sizeof(kmp_mem_desc_t)) / size < nmemb) {
1773e8d8bef9SDimitry Andric     if (al->fb == omp_atv_abort_fb) {
1774e8d8bef9SDimitry Andric       KMP_ASSERT(0);
1775e8d8bef9SDimitry Andric     }
1776e8d8bef9SDimitry Andric     return ptr;
1777e8d8bef9SDimitry Andric   }
1778e8d8bef9SDimitry Andric 
1779349cc55cSDimitry Andric   ptr = __kmp_alloc(gtid, algn, nmemb * size, allocator);
1780e8d8bef9SDimitry Andric 
1781e8d8bef9SDimitry Andric   if (ptr) {
1782e8d8bef9SDimitry Andric     memset(ptr, 0x00, nmemb * size);
1783e8d8bef9SDimitry Andric   }
1784e8d8bef9SDimitry Andric   return ptr;
1785e8d8bef9SDimitry Andric }
1786e8d8bef9SDimitry Andric 
__kmp_realloc(int gtid,void * ptr,size_t size,omp_allocator_handle_t allocator,omp_allocator_handle_t free_allocator)1787349cc55cSDimitry Andric void *__kmp_realloc(int gtid, void *ptr, size_t size,
1788e8d8bef9SDimitry Andric                     omp_allocator_handle_t allocator,
1789e8d8bef9SDimitry Andric                     omp_allocator_handle_t free_allocator) {
1790e8d8bef9SDimitry Andric   void *nptr = NULL;
1791e8d8bef9SDimitry Andric   KMP_DEBUG_ASSERT(__kmp_init_serial);
1792e8d8bef9SDimitry Andric 
1793e8d8bef9SDimitry Andric   if (size == 0) {
1794e8d8bef9SDimitry Andric     if (ptr != NULL)
1795349cc55cSDimitry Andric       ___kmpc_free(gtid, ptr, free_allocator);
1796e8d8bef9SDimitry Andric     return nptr;
1797e8d8bef9SDimitry Andric   }
1798e8d8bef9SDimitry Andric 
1799349cc55cSDimitry Andric   nptr = __kmp_alloc(gtid, 0, size, allocator);
1800e8d8bef9SDimitry Andric 
1801e8d8bef9SDimitry Andric   if (nptr != NULL && ptr != NULL) {
1802e8d8bef9SDimitry Andric     kmp_mem_desc_t desc;
1803e8d8bef9SDimitry Andric     kmp_uintptr_t addr_align; // address to return to caller
1804e8d8bef9SDimitry Andric     kmp_uintptr_t addr_descr; // address of memory block descriptor
1805e8d8bef9SDimitry Andric 
1806e8d8bef9SDimitry Andric     addr_align = (kmp_uintptr_t)ptr;
1807e8d8bef9SDimitry Andric     addr_descr = addr_align - sizeof(kmp_mem_desc_t);
1808e8d8bef9SDimitry Andric     desc = *((kmp_mem_desc_t *)addr_descr); // read descriptor
1809e8d8bef9SDimitry Andric 
1810e8d8bef9SDimitry Andric     KMP_DEBUG_ASSERT(desc.ptr_align == ptr);
1811e8d8bef9SDimitry Andric     KMP_DEBUG_ASSERT(desc.size_orig > 0);
1812e8d8bef9SDimitry Andric     KMP_DEBUG_ASSERT(desc.size_orig < desc.size_a);
1813e8d8bef9SDimitry Andric     KMP_MEMCPY((char *)nptr, (char *)ptr,
1814e8d8bef9SDimitry Andric                (size_t)((size < desc.size_orig) ? size : desc.size_orig));
1815e8d8bef9SDimitry Andric   }
1816e8d8bef9SDimitry Andric 
1817e8d8bef9SDimitry Andric   if (nptr != NULL) {
1818349cc55cSDimitry Andric     ___kmpc_free(gtid, ptr, free_allocator);
1819e8d8bef9SDimitry Andric   }
1820e8d8bef9SDimitry Andric 
1821e8d8bef9SDimitry Andric   return nptr;
1822e8d8bef9SDimitry Andric }
1823e8d8bef9SDimitry Andric 
___kmpc_free(int gtid,void * ptr,omp_allocator_handle_t allocator)1824349cc55cSDimitry Andric void ___kmpc_free(int gtid, void *ptr, omp_allocator_handle_t allocator) {
18250b57cec5SDimitry Andric   if (ptr == NULL)
18260b57cec5SDimitry Andric     return;
18270b57cec5SDimitry Andric 
18280b57cec5SDimitry Andric   kmp_allocator_t *al;
18290b57cec5SDimitry Andric   omp_allocator_handle_t oal;
18300b57cec5SDimitry Andric   al = RCAST(kmp_allocator_t *, CCAST(omp_allocator_handle_t, allocator));
18310b57cec5SDimitry Andric   kmp_mem_desc_t desc;
18320b57cec5SDimitry Andric   kmp_uintptr_t addr_align; // address to return to caller
18330b57cec5SDimitry Andric   kmp_uintptr_t addr_descr; // address of memory block descriptor
1834bdd1243dSDimitry Andric   if (__kmp_target_mem_available && (KMP_IS_TARGET_MEM_ALLOC(allocator) ||
1835fe6060f1SDimitry Andric                                      (allocator > kmp_max_mem_alloc &&
1836bdd1243dSDimitry Andric                                       KMP_IS_TARGET_MEM_SPACE(al->memspace)))) {
1837fe6060f1SDimitry Andric     kmp_int32 device =
1838fe6060f1SDimitry Andric         __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1839bdd1243dSDimitry Andric     if (allocator == llvm_omp_target_host_mem_alloc) {
1840bdd1243dSDimitry Andric       kmp_target_free_host(ptr, device);
1841bdd1243dSDimitry Andric     } else if (allocator == llvm_omp_target_shared_mem_alloc) {
1842bdd1243dSDimitry Andric       kmp_target_free_shared(ptr, device);
1843bdd1243dSDimitry Andric     } else if (allocator == llvm_omp_target_device_mem_alloc) {
1844bdd1243dSDimitry Andric       kmp_target_free_device(ptr, device);
1845bdd1243dSDimitry Andric     }
1846fe6060f1SDimitry Andric     return;
1847fe6060f1SDimitry Andric   }
18480b57cec5SDimitry Andric 
18490b57cec5SDimitry Andric   addr_align = (kmp_uintptr_t)ptr;
18500b57cec5SDimitry Andric   addr_descr = addr_align - sizeof(kmp_mem_desc_t);
18510b57cec5SDimitry Andric   desc = *((kmp_mem_desc_t *)addr_descr); // read descriptor
18520b57cec5SDimitry Andric 
18530b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(desc.ptr_align == ptr);
18540b57cec5SDimitry Andric   if (allocator) {
18550b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(desc.allocator == al || desc.allocator == al->fb_data);
18560b57cec5SDimitry Andric   }
18570b57cec5SDimitry Andric   al = desc.allocator;
18580b57cec5SDimitry Andric   oal = (omp_allocator_handle_t)al; // cast to void* for comparisons
18590b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(al);
18600b57cec5SDimitry Andric 
1861bdd1243dSDimitry Andric   if (allocator > kmp_max_mem_alloc && kmp_target_unlock_mem && al->pinned) {
1862bdd1243dSDimitry Andric     kmp_int32 device =
1863bdd1243dSDimitry Andric         __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1864bdd1243dSDimitry Andric     kmp_target_unlock_mem(desc.ptr_alloc, device);
1865bdd1243dSDimitry Andric   }
1866bdd1243dSDimitry Andric 
18670b57cec5SDimitry Andric   if (__kmp_memkind_available) {
18680b57cec5SDimitry Andric     if (oal < kmp_max_mem_alloc) {
18690b57cec5SDimitry Andric       // pre-defined allocator
18700b57cec5SDimitry Andric       if (oal == omp_high_bw_mem_alloc && mk_hbw_preferred) {
18710b57cec5SDimitry Andric         kmp_mk_free(*mk_hbw_preferred, desc.ptr_alloc);
1872e8d8bef9SDimitry Andric       } else if (oal == omp_large_cap_mem_alloc && mk_dax_kmem_all) {
1873e8d8bef9SDimitry Andric         kmp_mk_free(*mk_dax_kmem_all, desc.ptr_alloc);
18740b57cec5SDimitry Andric       } else {
18750b57cec5SDimitry Andric         kmp_mk_free(*mk_default, desc.ptr_alloc);
18760b57cec5SDimitry Andric       }
18770b57cec5SDimitry Andric     } else {
18780b57cec5SDimitry Andric       if (al->pool_size > 0) { // custom allocator with pool size requested
18790b57cec5SDimitry Andric         kmp_uint64 used =
18800b57cec5SDimitry Andric             KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
18810b57cec5SDimitry Andric         (void)used; // to suppress compiler warning
18820b57cec5SDimitry Andric         KMP_DEBUG_ASSERT(used >= desc.size_a);
18830b57cec5SDimitry Andric       }
18840b57cec5SDimitry Andric       kmp_mk_free(*al->memkind, desc.ptr_alloc);
18850b57cec5SDimitry Andric     }
18860b57cec5SDimitry Andric   } else {
18870b57cec5SDimitry Andric     if (oal > kmp_max_mem_alloc && al->pool_size > 0) {
18880b57cec5SDimitry Andric       kmp_uint64 used =
18890b57cec5SDimitry Andric           KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
18900b57cec5SDimitry Andric       (void)used; // to suppress compiler warning
18910b57cec5SDimitry Andric       KMP_DEBUG_ASSERT(used >= desc.size_a);
18920b57cec5SDimitry Andric     }
18930b57cec5SDimitry Andric     __kmp_thread_free(__kmp_thread_from_gtid(gtid), desc.ptr_alloc);
18940b57cec5SDimitry Andric   }
18950b57cec5SDimitry Andric }
18960b57cec5SDimitry Andric 
18970b57cec5SDimitry Andric /* If LEAK_MEMORY is defined, __kmp_free() will *not* free memory. It causes
18980b57cec5SDimitry Andric    memory leaks, but it may be useful for debugging memory corruptions, used
18990b57cec5SDimitry Andric    freed pointers, etc. */
19000b57cec5SDimitry Andric /* #define LEAK_MEMORY */
19010b57cec5SDimitry Andric struct kmp_mem_descr { // Memory block descriptor.
19020b57cec5SDimitry Andric   void *ptr_allocated; // Pointer returned by malloc(), subject for free().
19030b57cec5SDimitry Andric   size_t size_allocated; // Size of allocated memory block.
19040b57cec5SDimitry Andric   void *ptr_aligned; // Pointer to aligned memory, to be used by client code.
19050b57cec5SDimitry Andric   size_t size_aligned; // Size of aligned memory block.
19060b57cec5SDimitry Andric };
19070b57cec5SDimitry Andric typedef struct kmp_mem_descr kmp_mem_descr_t;
19080b57cec5SDimitry Andric 
19090b57cec5SDimitry Andric /* Allocate memory on requested boundary, fill allocated memory with 0x00.
19100b57cec5SDimitry Andric    NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
19110b57cec5SDimitry Andric    error. Must use __kmp_free when freeing memory allocated by this routine! */
___kmp_allocate_align(size_t size,size_t alignment KMP_SRC_LOC_DECL)19120b57cec5SDimitry Andric static void *___kmp_allocate_align(size_t size,
19130b57cec5SDimitry Andric                                    size_t alignment KMP_SRC_LOC_DECL) {
19140b57cec5SDimitry Andric   /* __kmp_allocate() allocates (by call to malloc()) bigger memory block than
19150b57cec5SDimitry Andric      requested to return properly aligned pointer. Original pointer returned
19160b57cec5SDimitry Andric      by malloc() and size of allocated block is saved in descriptor just
19170b57cec5SDimitry Andric      before the aligned pointer. This information used by __kmp_free() -- it
19180b57cec5SDimitry Andric      has to pass to free() original pointer, not aligned one.
19190b57cec5SDimitry Andric 
19200b57cec5SDimitry Andric           +---------+------------+-----------------------------------+---------+
19210b57cec5SDimitry Andric           | padding | descriptor |           aligned block           | padding |
19220b57cec5SDimitry Andric           +---------+------------+-----------------------------------+---------+
19230b57cec5SDimitry Andric           ^                      ^
19240b57cec5SDimitry Andric           |                      |
19250b57cec5SDimitry Andric           |                      +- Aligned pointer returned to caller
19260b57cec5SDimitry Andric           +- Pointer returned by malloc()
19270b57cec5SDimitry Andric 
19280b57cec5SDimitry Andric       Aligned block is filled with zeros, paddings are filled with 0xEF. */
19290b57cec5SDimitry Andric 
19300b57cec5SDimitry Andric   kmp_mem_descr_t descr;
19310b57cec5SDimitry Andric   kmp_uintptr_t addr_allocated; // Address returned by malloc().
19320b57cec5SDimitry Andric   kmp_uintptr_t addr_aligned; // Aligned address to return to caller.
19330b57cec5SDimitry Andric   kmp_uintptr_t addr_descr; // Address of memory block descriptor.
19340b57cec5SDimitry Andric 
19350b57cec5SDimitry Andric   KE_TRACE(25, ("-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n",
19360b57cec5SDimitry Andric                 (int)size, (int)alignment KMP_SRC_LOC_PARM));
19370b57cec5SDimitry Andric 
19380b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(alignment < 32 * 1024); // Alignment should not be too
19390b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(sizeof(void *) <= sizeof(kmp_uintptr_t));
19400b57cec5SDimitry Andric   // Make sure kmp_uintptr_t is enough to store addresses.
19410b57cec5SDimitry Andric 
19420b57cec5SDimitry Andric   descr.size_aligned = size;
19430b57cec5SDimitry Andric   descr.size_allocated =
19440b57cec5SDimitry Andric       descr.size_aligned + sizeof(kmp_mem_descr_t) + alignment;
19450b57cec5SDimitry Andric 
19460b57cec5SDimitry Andric #if KMP_DEBUG
19470b57cec5SDimitry Andric   descr.ptr_allocated = _malloc_src_loc(descr.size_allocated, _file_, _line_);
19480b57cec5SDimitry Andric #else
19490b57cec5SDimitry Andric   descr.ptr_allocated = malloc_src_loc(descr.size_allocated KMP_SRC_LOC_PARM);
19500b57cec5SDimitry Andric #endif
19510b57cec5SDimitry Andric   KE_TRACE(10, ("   malloc( %d ) returned %p\n", (int)descr.size_allocated,
19520b57cec5SDimitry Andric                 descr.ptr_allocated));
19530b57cec5SDimitry Andric   if (descr.ptr_allocated == NULL) {
19540b57cec5SDimitry Andric     KMP_FATAL(OutOfHeapMemory);
19550b57cec5SDimitry Andric   }
19560b57cec5SDimitry Andric 
19570b57cec5SDimitry Andric   addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
19580b57cec5SDimitry Andric   addr_aligned =
19590b57cec5SDimitry Andric       (addr_allocated + sizeof(kmp_mem_descr_t) + alignment) & ~(alignment - 1);
19600b57cec5SDimitry Andric   addr_descr = addr_aligned - sizeof(kmp_mem_descr_t);
19610b57cec5SDimitry Andric 
19620b57cec5SDimitry Andric   descr.ptr_aligned = (void *)addr_aligned;
19630b57cec5SDimitry Andric 
19640b57cec5SDimitry Andric   KE_TRACE(26, ("   ___kmp_allocate_align: "
19650b57cec5SDimitry Andric                 "ptr_allocated=%p, size_allocated=%d, "
19660b57cec5SDimitry Andric                 "ptr_aligned=%p, size_aligned=%d\n",
19670b57cec5SDimitry Andric                 descr.ptr_allocated, (int)descr.size_allocated,
19680b57cec5SDimitry Andric                 descr.ptr_aligned, (int)descr.size_aligned));
19690b57cec5SDimitry Andric 
19700b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(addr_allocated <= addr_descr);
19710b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(addr_descr + sizeof(kmp_mem_descr_t) == addr_aligned);
19720b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
19730b57cec5SDimitry Andric                    addr_allocated + descr.size_allocated);
19740b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(addr_aligned % alignment == 0);
19750b57cec5SDimitry Andric #ifdef KMP_DEBUG
19760b57cec5SDimitry Andric   memset(descr.ptr_allocated, 0xEF, descr.size_allocated);
19770b57cec5SDimitry Andric // Fill allocated memory block with 0xEF.
19780b57cec5SDimitry Andric #endif
19790b57cec5SDimitry Andric   memset(descr.ptr_aligned, 0x00, descr.size_aligned);
19800b57cec5SDimitry Andric   // Fill the aligned memory block (which is intended for using by caller) with
19810b57cec5SDimitry Andric   // 0x00. Do not
19820b57cec5SDimitry Andric   // put this filling under KMP_DEBUG condition! Many callers expect zeroed
19830b57cec5SDimitry Andric   // memory. (Padding
19840b57cec5SDimitry Andric   // bytes remain filled with 0xEF in debugging library.)
19850b57cec5SDimitry Andric   *((kmp_mem_descr_t *)addr_descr) = descr;
19860b57cec5SDimitry Andric 
19870b57cec5SDimitry Andric   KMP_MB();
19880b57cec5SDimitry Andric 
19890b57cec5SDimitry Andric   KE_TRACE(25, ("<- ___kmp_allocate_align() returns %p\n", descr.ptr_aligned));
19900b57cec5SDimitry Andric   return descr.ptr_aligned;
19910b57cec5SDimitry Andric } // func ___kmp_allocate_align
19920b57cec5SDimitry Andric 
19930b57cec5SDimitry Andric /* Allocate memory on cache line boundary, fill allocated memory with 0x00.
19940b57cec5SDimitry Andric    Do not call this func directly! Use __kmp_allocate macro instead.
19950b57cec5SDimitry Andric    NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
19960b57cec5SDimitry Andric    error. Must use __kmp_free when freeing memory allocated by this routine! */
___kmp_allocate(size_t size KMP_SRC_LOC_DECL)19970b57cec5SDimitry Andric void *___kmp_allocate(size_t size KMP_SRC_LOC_DECL) {
19980b57cec5SDimitry Andric   void *ptr;
19990b57cec5SDimitry Andric   KE_TRACE(25, ("-> __kmp_allocate( %d ) called from %s:%d\n",
20000b57cec5SDimitry Andric                 (int)size KMP_SRC_LOC_PARM));
20010b57cec5SDimitry Andric   ptr = ___kmp_allocate_align(size, __kmp_align_alloc KMP_SRC_LOC_PARM);
20020b57cec5SDimitry Andric   KE_TRACE(25, ("<- __kmp_allocate() returns %p\n", ptr));
20030b57cec5SDimitry Andric   return ptr;
20040b57cec5SDimitry Andric } // func ___kmp_allocate
20050b57cec5SDimitry Andric 
20060b57cec5SDimitry Andric /* Allocate memory on page boundary, fill allocated memory with 0x00.
20070b57cec5SDimitry Andric    Does not call this func directly! Use __kmp_page_allocate macro instead.
20080b57cec5SDimitry Andric    NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
20090b57cec5SDimitry Andric    error. Must use __kmp_free when freeing memory allocated by this routine! */
___kmp_page_allocate(size_t size KMP_SRC_LOC_DECL)20100b57cec5SDimitry Andric void *___kmp_page_allocate(size_t size KMP_SRC_LOC_DECL) {
20110b57cec5SDimitry Andric   int page_size = 8 * 1024;
20120b57cec5SDimitry Andric   void *ptr;
20130b57cec5SDimitry Andric 
20140b57cec5SDimitry Andric   KE_TRACE(25, ("-> __kmp_page_allocate( %d ) called from %s:%d\n",
20150b57cec5SDimitry Andric                 (int)size KMP_SRC_LOC_PARM));
20160b57cec5SDimitry Andric   ptr = ___kmp_allocate_align(size, page_size KMP_SRC_LOC_PARM);
20170b57cec5SDimitry Andric   KE_TRACE(25, ("<- __kmp_page_allocate( %d ) returns %p\n", (int)size, ptr));
20180b57cec5SDimitry Andric   return ptr;
20190b57cec5SDimitry Andric } // ___kmp_page_allocate
20200b57cec5SDimitry Andric 
20210b57cec5SDimitry Andric /* Free memory allocated by __kmp_allocate() and __kmp_page_allocate().
20220b57cec5SDimitry Andric    In debug mode, fill the memory block with 0xEF before call to free(). */
___kmp_free(void * ptr KMP_SRC_LOC_DECL)20230b57cec5SDimitry Andric void ___kmp_free(void *ptr KMP_SRC_LOC_DECL) {
20240b57cec5SDimitry Andric   kmp_mem_descr_t descr;
2025349cc55cSDimitry Andric #if KMP_DEBUG
20260b57cec5SDimitry Andric   kmp_uintptr_t addr_allocated; // Address returned by malloc().
20270b57cec5SDimitry Andric   kmp_uintptr_t addr_aligned; // Aligned address passed by caller.
2028349cc55cSDimitry Andric #endif
20290b57cec5SDimitry Andric   KE_TRACE(25,
20300b57cec5SDimitry Andric            ("-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM));
20310b57cec5SDimitry Andric   KMP_ASSERT(ptr != NULL);
20320b57cec5SDimitry Andric 
20330b57cec5SDimitry Andric   descr = *(kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t));
20340b57cec5SDimitry Andric 
20350b57cec5SDimitry Andric   KE_TRACE(26, ("   __kmp_free:     "
20360b57cec5SDimitry Andric                 "ptr_allocated=%p, size_allocated=%d, "
20370b57cec5SDimitry Andric                 "ptr_aligned=%p, size_aligned=%d\n",
20380b57cec5SDimitry Andric                 descr.ptr_allocated, (int)descr.size_allocated,
20390b57cec5SDimitry Andric                 descr.ptr_aligned, (int)descr.size_aligned));
2040349cc55cSDimitry Andric #if KMP_DEBUG
20410b57cec5SDimitry Andric   addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
20420b57cec5SDimitry Andric   addr_aligned = (kmp_uintptr_t)descr.ptr_aligned;
20430b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(addr_aligned % CACHE_LINE == 0);
20440b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(descr.ptr_aligned == ptr);
20450b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(addr_allocated + sizeof(kmp_mem_descr_t) <= addr_aligned);
20460b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(descr.size_aligned < descr.size_allocated);
20470b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
20480b57cec5SDimitry Andric                    addr_allocated + descr.size_allocated);
20490b57cec5SDimitry Andric   memset(descr.ptr_allocated, 0xEF, descr.size_allocated);
20500b57cec5SDimitry Andric // Fill memory block with 0xEF, it helps catch using freed memory.
20510b57cec5SDimitry Andric #endif
20520b57cec5SDimitry Andric 
20530b57cec5SDimitry Andric #ifndef LEAK_MEMORY
20540b57cec5SDimitry Andric   KE_TRACE(10, ("   free( %p )\n", descr.ptr_allocated));
20550b57cec5SDimitry Andric #ifdef KMP_DEBUG
20560b57cec5SDimitry Andric   _free_src_loc(descr.ptr_allocated, _file_, _line_);
20570b57cec5SDimitry Andric #else
20580b57cec5SDimitry Andric   free_src_loc(descr.ptr_allocated KMP_SRC_LOC_PARM);
20590b57cec5SDimitry Andric #endif
20600b57cec5SDimitry Andric #endif
20610b57cec5SDimitry Andric   KMP_MB();
20620b57cec5SDimitry Andric   KE_TRACE(25, ("<- __kmp_free() returns\n"));
20630b57cec5SDimitry Andric } // func ___kmp_free
20640b57cec5SDimitry Andric 
20650b57cec5SDimitry Andric #if USE_FAST_MEMORY == 3
20660b57cec5SDimitry Andric // Allocate fast memory by first scanning the thread's free lists
20670b57cec5SDimitry Andric // If a chunk the right size exists, grab it off the free list.
20680b57cec5SDimitry Andric // Otherwise allocate normally using kmp_thread_malloc.
20690b57cec5SDimitry Andric 
20700b57cec5SDimitry Andric // AC: How to choose the limit? Just get 16 for now...
20710b57cec5SDimitry Andric #define KMP_FREE_LIST_LIMIT 16
20720b57cec5SDimitry Andric 
20730b57cec5SDimitry Andric // Always use 128 bytes for determining buckets for caching memory blocks
20740b57cec5SDimitry Andric #define DCACHE_LINE 128
20750b57cec5SDimitry Andric 
___kmp_fast_allocate(kmp_info_t * this_thr,size_t size KMP_SRC_LOC_DECL)20760b57cec5SDimitry Andric void *___kmp_fast_allocate(kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL) {
20770b57cec5SDimitry Andric   void *ptr;
2078e8d8bef9SDimitry Andric   size_t num_lines, idx;
20790b57cec5SDimitry Andric   int index;
20800b57cec5SDimitry Andric   void *alloc_ptr;
20810b57cec5SDimitry Andric   size_t alloc_size;
20820b57cec5SDimitry Andric   kmp_mem_descr_t *descr;
20830b57cec5SDimitry Andric 
20840b57cec5SDimitry Andric   KE_TRACE(25, ("-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n",
20850b57cec5SDimitry Andric                 __kmp_gtid_from_thread(this_thr), (int)size KMP_SRC_LOC_PARM));
20860b57cec5SDimitry Andric 
20870b57cec5SDimitry Andric   num_lines = (size + DCACHE_LINE - 1) / DCACHE_LINE;
20880b57cec5SDimitry Andric   idx = num_lines - 1;
20890b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(idx >= 0);
20900b57cec5SDimitry Andric   if (idx < 2) {
20910b57cec5SDimitry Andric     index = 0; // idx is [ 0, 1 ], use first free list
20920b57cec5SDimitry Andric     num_lines = 2; // 1, 2 cache lines or less than cache line
20930b57cec5SDimitry Andric   } else if ((idx >>= 2) == 0) {
20940b57cec5SDimitry Andric     index = 1; // idx is [ 2, 3 ], use second free list
20950b57cec5SDimitry Andric     num_lines = 4; // 3, 4 cache lines
20960b57cec5SDimitry Andric   } else if ((idx >>= 2) == 0) {
20970b57cec5SDimitry Andric     index = 2; // idx is [ 4, 15 ], use third free list
20980b57cec5SDimitry Andric     num_lines = 16; // 5, 6, ..., 16 cache lines
20990b57cec5SDimitry Andric   } else if ((idx >>= 2) == 0) {
21000b57cec5SDimitry Andric     index = 3; // idx is [ 16, 63 ], use fourth free list
21010b57cec5SDimitry Andric     num_lines = 64; // 17, 18, ..., 64 cache lines
21020b57cec5SDimitry Andric   } else {
21030b57cec5SDimitry Andric     goto alloc_call; // 65 or more cache lines ( > 8KB ), don't use free lists
21040b57cec5SDimitry Andric   }
21050b57cec5SDimitry Andric 
21060b57cec5SDimitry Andric   ptr = this_thr->th.th_free_lists[index].th_free_list_self;
21070b57cec5SDimitry Andric   if (ptr != NULL) {
21080b57cec5SDimitry Andric     // pop the head of no-sync free list
21090b57cec5SDimitry Andric     this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
2110fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT(this_thr == ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr -
2111fe6060f1SDimitry Andric                                                       sizeof(kmp_mem_descr_t)))
21120b57cec5SDimitry Andric                                      ->ptr_aligned);
21130b57cec5SDimitry Andric     goto end;
21140b57cec5SDimitry Andric   }
21150b57cec5SDimitry Andric   ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
21160b57cec5SDimitry Andric   if (ptr != NULL) {
21170b57cec5SDimitry Andric     // no-sync free list is empty, use sync free list (filled in by other
21180b57cec5SDimitry Andric     // threads only)
21190b57cec5SDimitry Andric     // pop the head of the sync free list, push NULL instead
21200b57cec5SDimitry Andric     while (!KMP_COMPARE_AND_STORE_PTR(
21210b57cec5SDimitry Andric         &this_thr->th.th_free_lists[index].th_free_list_sync, ptr, nullptr)) {
21220b57cec5SDimitry Andric       KMP_CPU_PAUSE();
21230b57cec5SDimitry Andric       ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
21240b57cec5SDimitry Andric     }
21250b57cec5SDimitry Andric     // push the rest of chain into no-sync free list (can be NULL if there was
21260b57cec5SDimitry Andric     // the only block)
21270b57cec5SDimitry Andric     this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
2128fe6060f1SDimitry Andric     KMP_DEBUG_ASSERT(this_thr == ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr -
2129fe6060f1SDimitry Andric                                                       sizeof(kmp_mem_descr_t)))
21300b57cec5SDimitry Andric                                      ->ptr_aligned);
21310b57cec5SDimitry Andric     goto end;
21320b57cec5SDimitry Andric   }
21330b57cec5SDimitry Andric 
21340b57cec5SDimitry Andric alloc_call:
21350b57cec5SDimitry Andric   // haven't found block in the free lists, thus allocate it
21360b57cec5SDimitry Andric   size = num_lines * DCACHE_LINE;
21370b57cec5SDimitry Andric 
21380b57cec5SDimitry Andric   alloc_size = size + sizeof(kmp_mem_descr_t) + DCACHE_LINE;
21390b57cec5SDimitry Andric   KE_TRACE(25, ("__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with "
21400b57cec5SDimitry Andric                 "alloc_size %d\n",
21410b57cec5SDimitry Andric                 __kmp_gtid_from_thread(this_thr), alloc_size));
21420b57cec5SDimitry Andric   alloc_ptr = bget(this_thr, (bufsize)alloc_size);
21430b57cec5SDimitry Andric 
21440b57cec5SDimitry Andric   // align ptr to DCACHE_LINE
21450b57cec5SDimitry Andric   ptr = (void *)((((kmp_uintptr_t)alloc_ptr) + sizeof(kmp_mem_descr_t) +
21460b57cec5SDimitry Andric                   DCACHE_LINE) &
21470b57cec5SDimitry Andric                  ~(DCACHE_LINE - 1));
21480b57cec5SDimitry Andric   descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t));
21490b57cec5SDimitry Andric 
21500b57cec5SDimitry Andric   descr->ptr_allocated = alloc_ptr; // remember allocated pointer
21510b57cec5SDimitry Andric   // we don't need size_allocated
21520b57cec5SDimitry Andric   descr->ptr_aligned = (void *)this_thr; // remember allocating thread
21530b57cec5SDimitry Andric   // (it is already saved in bget buffer,
21540b57cec5SDimitry Andric   // but we may want to use another allocator in future)
21550b57cec5SDimitry Andric   descr->size_aligned = size;
21560b57cec5SDimitry Andric 
21570b57cec5SDimitry Andric end:
21580b57cec5SDimitry Andric   KE_TRACE(25, ("<- __kmp_fast_allocate( T#%d ) returns %p\n",
21590b57cec5SDimitry Andric                 __kmp_gtid_from_thread(this_thr), ptr));
21600b57cec5SDimitry Andric   return ptr;
21610b57cec5SDimitry Andric } // func __kmp_fast_allocate
21620b57cec5SDimitry Andric 
21630b57cec5SDimitry Andric // Free fast memory and place it on the thread's free list if it is of
21640b57cec5SDimitry Andric // the correct size.
___kmp_fast_free(kmp_info_t * this_thr,void * ptr KMP_SRC_LOC_DECL)21650b57cec5SDimitry Andric void ___kmp_fast_free(kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL) {
21660b57cec5SDimitry Andric   kmp_mem_descr_t *descr;
21670b57cec5SDimitry Andric   kmp_info_t *alloc_thr;
21680b57cec5SDimitry Andric   size_t size;
21690b57cec5SDimitry Andric   size_t idx;
21700b57cec5SDimitry Andric   int index;
21710b57cec5SDimitry Andric 
21720b57cec5SDimitry Andric   KE_TRACE(25, ("-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n",
21730b57cec5SDimitry Andric                 __kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM));
21740b57cec5SDimitry Andric   KMP_ASSERT(ptr != NULL);
21750b57cec5SDimitry Andric 
21760b57cec5SDimitry Andric   descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t));
21770b57cec5SDimitry Andric 
21780b57cec5SDimitry Andric   KE_TRACE(26, ("   __kmp_fast_free:     size_aligned=%d\n",
21790b57cec5SDimitry Andric                 (int)descr->size_aligned));
21800b57cec5SDimitry Andric 
21810b57cec5SDimitry Andric   size = descr->size_aligned; // 2, 4, 16, 64, 65, 66, ... cache lines
21820b57cec5SDimitry Andric 
21830b57cec5SDimitry Andric   idx = DCACHE_LINE * 2; // 2 cache lines is minimal size of block
21840b57cec5SDimitry Andric   if (idx == size) {
21850b57cec5SDimitry Andric     index = 0; // 2 cache lines
21860b57cec5SDimitry Andric   } else if ((idx <<= 1) == size) {
21870b57cec5SDimitry Andric     index = 1; // 4 cache lines
21880b57cec5SDimitry Andric   } else if ((idx <<= 2) == size) {
21890b57cec5SDimitry Andric     index = 2; // 16 cache lines
21900b57cec5SDimitry Andric   } else if ((idx <<= 2) == size) {
21910b57cec5SDimitry Andric     index = 3; // 64 cache lines
21920b57cec5SDimitry Andric   } else {
21930b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(size > DCACHE_LINE * 64);
21940b57cec5SDimitry Andric     goto free_call; // 65 or more cache lines ( > 8KB )
21950b57cec5SDimitry Andric   }
21960b57cec5SDimitry Andric 
21970b57cec5SDimitry Andric   alloc_thr = (kmp_info_t *)descr->ptr_aligned; // get thread owning the block
21980b57cec5SDimitry Andric   if (alloc_thr == this_thr) {
21990b57cec5SDimitry Andric     // push block to self no-sync free list, linking previous head (LIFO)
22000b57cec5SDimitry Andric     *((void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self;
22010b57cec5SDimitry Andric     this_thr->th.th_free_lists[index].th_free_list_self = ptr;
22020b57cec5SDimitry Andric   } else {
22030b57cec5SDimitry Andric     void *head = this_thr->th.th_free_lists[index].th_free_list_other;
22040b57cec5SDimitry Andric     if (head == NULL) {
22050b57cec5SDimitry Andric       // Create new free list
22060b57cec5SDimitry Andric       this_thr->th.th_free_lists[index].th_free_list_other = ptr;
22070b57cec5SDimitry Andric       *((void **)ptr) = NULL; // mark the tail of the list
22080b57cec5SDimitry Andric       descr->size_allocated = (size_t)1; // head of the list keeps its length
22090b57cec5SDimitry Andric     } else {
22100b57cec5SDimitry Andric       // need to check existed "other" list's owner thread and size of queue
22110b57cec5SDimitry Andric       kmp_mem_descr_t *dsc =
22120b57cec5SDimitry Andric           (kmp_mem_descr_t *)((char *)head - sizeof(kmp_mem_descr_t));
22130b57cec5SDimitry Andric       // allocating thread, same for all queue nodes
22140b57cec5SDimitry Andric       kmp_info_t *q_th = (kmp_info_t *)(dsc->ptr_aligned);
22150b57cec5SDimitry Andric       size_t q_sz =
22160b57cec5SDimitry Andric           dsc->size_allocated + 1; // new size in case we add current task
22170b57cec5SDimitry Andric       if (q_th == alloc_thr && q_sz <= KMP_FREE_LIST_LIMIT) {
22180b57cec5SDimitry Andric         // we can add current task to "other" list, no sync needed
22190b57cec5SDimitry Andric         *((void **)ptr) = head;
22200b57cec5SDimitry Andric         descr->size_allocated = q_sz;
22210b57cec5SDimitry Andric         this_thr->th.th_free_lists[index].th_free_list_other = ptr;
22220b57cec5SDimitry Andric       } else {
22230b57cec5SDimitry Andric         // either queue blocks owner is changing or size limit exceeded
22245ffd83dbSDimitry Andric         // return old queue to allocating thread (q_th) synchronously,
22250b57cec5SDimitry Andric         // and start new list for alloc_thr's tasks
22260b57cec5SDimitry Andric         void *old_ptr;
22270b57cec5SDimitry Andric         void *tail = head;
22280b57cec5SDimitry Andric         void *next = *((void **)head);
22290b57cec5SDimitry Andric         while (next != NULL) {
22300b57cec5SDimitry Andric           KMP_DEBUG_ASSERT(
22310b57cec5SDimitry Andric               // queue size should decrease by 1 each step through the list
22320b57cec5SDimitry Andric               ((kmp_mem_descr_t *)((char *)next - sizeof(kmp_mem_descr_t)))
22330b57cec5SDimitry Andric                       ->size_allocated +
22340b57cec5SDimitry Andric                   1 ==
22350b57cec5SDimitry Andric               ((kmp_mem_descr_t *)((char *)tail - sizeof(kmp_mem_descr_t)))
22360b57cec5SDimitry Andric                   ->size_allocated);
22370b57cec5SDimitry Andric           tail = next; // remember tail node
22380b57cec5SDimitry Andric           next = *((void **)next);
22390b57cec5SDimitry Andric         }
22400b57cec5SDimitry Andric         KMP_DEBUG_ASSERT(q_th != NULL);
22410b57cec5SDimitry Andric         // push block to owner's sync free list
22420b57cec5SDimitry Andric         old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
22430b57cec5SDimitry Andric         /* the next pointer must be set before setting free_list to ptr to avoid
22440b57cec5SDimitry Andric            exposing a broken list to other threads, even for an instant. */
22450b57cec5SDimitry Andric         *((void **)tail) = old_ptr;
22460b57cec5SDimitry Andric 
22470b57cec5SDimitry Andric         while (!KMP_COMPARE_AND_STORE_PTR(
22480b57cec5SDimitry Andric             &q_th->th.th_free_lists[index].th_free_list_sync, old_ptr, head)) {
22490b57cec5SDimitry Andric           KMP_CPU_PAUSE();
22500b57cec5SDimitry Andric           old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
22510b57cec5SDimitry Andric           *((void **)tail) = old_ptr;
22520b57cec5SDimitry Andric         }
22530b57cec5SDimitry Andric 
22540b57cec5SDimitry Andric         // start new list of not-selt tasks
22550b57cec5SDimitry Andric         this_thr->th.th_free_lists[index].th_free_list_other = ptr;
22560b57cec5SDimitry Andric         *((void **)ptr) = NULL;
22570b57cec5SDimitry Andric         descr->size_allocated = (size_t)1; // head of queue keeps its length
22580b57cec5SDimitry Andric       }
22590b57cec5SDimitry Andric     }
22600b57cec5SDimitry Andric   }
22610b57cec5SDimitry Andric   goto end;
22620b57cec5SDimitry Andric 
22630b57cec5SDimitry Andric free_call:
22640b57cec5SDimitry Andric   KE_TRACE(25, ("__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n",
22650b57cec5SDimitry Andric                 __kmp_gtid_from_thread(this_thr), size));
22660b57cec5SDimitry Andric   __kmp_bget_dequeue(this_thr); /* Release any queued buffers */
22670b57cec5SDimitry Andric   brel(this_thr, descr->ptr_allocated);
22680b57cec5SDimitry Andric 
22690b57cec5SDimitry Andric end:
22700b57cec5SDimitry Andric   KE_TRACE(25, ("<- __kmp_fast_free() returns\n"));
22710b57cec5SDimitry Andric 
22720b57cec5SDimitry Andric } // func __kmp_fast_free
22730b57cec5SDimitry Andric 
22740b57cec5SDimitry Andric // Initialize the thread free lists related to fast memory
22750b57cec5SDimitry Andric // Only do this when a thread is initially created.
__kmp_initialize_fast_memory(kmp_info_t * this_thr)22760b57cec5SDimitry Andric void __kmp_initialize_fast_memory(kmp_info_t *this_thr) {
22770b57cec5SDimitry Andric   KE_TRACE(10, ("__kmp_initialize_fast_memory: Called from th %p\n", this_thr));
22780b57cec5SDimitry Andric 
22790b57cec5SDimitry Andric   memset(this_thr->th.th_free_lists, 0, NUM_LISTS * sizeof(kmp_free_list_t));
22800b57cec5SDimitry Andric }
22810b57cec5SDimitry Andric 
22820b57cec5SDimitry Andric // Free the memory in the thread free lists related to fast memory
22830b57cec5SDimitry Andric // Only do this when a thread is being reaped (destroyed).
__kmp_free_fast_memory(kmp_info_t * th)22840b57cec5SDimitry Andric void __kmp_free_fast_memory(kmp_info_t *th) {
22850b57cec5SDimitry Andric   // Suppose we use BGET underlying allocator, walk through its structures...
22860b57cec5SDimitry Andric   int bin;
22870b57cec5SDimitry Andric   thr_data_t *thr = get_thr_data(th);
22880b57cec5SDimitry Andric   void **lst = NULL;
22890b57cec5SDimitry Andric 
22900b57cec5SDimitry Andric   KE_TRACE(
22910b57cec5SDimitry Andric       5, ("__kmp_free_fast_memory: Called T#%d\n", __kmp_gtid_from_thread(th)));
22920b57cec5SDimitry Andric 
22930b57cec5SDimitry Andric   __kmp_bget_dequeue(th); // Release any queued buffers
22940b57cec5SDimitry Andric 
22950b57cec5SDimitry Andric   // Dig through free lists and extract all allocated blocks
22960b57cec5SDimitry Andric   for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
22970b57cec5SDimitry Andric     bfhead_t *b = thr->freelist[bin].ql.flink;
22980b57cec5SDimitry Andric     while (b != &thr->freelist[bin]) {
22990b57cec5SDimitry Andric       if ((kmp_uintptr_t)b->bh.bb.bthr & 1) { // the buffer is allocated address
23000b57cec5SDimitry Andric         *((void **)b) =
23010b57cec5SDimitry Andric             lst; // link the list (override bthr, but keep flink yet)
23020b57cec5SDimitry Andric         lst = (void **)b; // push b into lst
23030b57cec5SDimitry Andric       }
23040b57cec5SDimitry Andric       b = b->ql.flink; // get next buffer
23050b57cec5SDimitry Andric     }
23060b57cec5SDimitry Andric   }
23070b57cec5SDimitry Andric   while (lst != NULL) {
23080b57cec5SDimitry Andric     void *next = *lst;
23090b57cec5SDimitry Andric     KE_TRACE(10, ("__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n",
23100b57cec5SDimitry Andric                   lst, next, th, __kmp_gtid_from_thread(th)));
23110b57cec5SDimitry Andric     (*thr->relfcn)(lst);
23120b57cec5SDimitry Andric #if BufStats
23130b57cec5SDimitry Andric     // count blocks to prevent problems in __kmp_finalize_bget()
23140b57cec5SDimitry Andric     thr->numprel++; /* Nr of expansion block releases */
23150b57cec5SDimitry Andric     thr->numpblk--; /* Total number of blocks */
23160b57cec5SDimitry Andric #endif
23170b57cec5SDimitry Andric     lst = (void **)next;
23180b57cec5SDimitry Andric   }
23190b57cec5SDimitry Andric 
23200b57cec5SDimitry Andric   KE_TRACE(
23210b57cec5SDimitry Andric       5, ("__kmp_free_fast_memory: Freed T#%d\n", __kmp_gtid_from_thread(th)));
23220b57cec5SDimitry Andric }
23230b57cec5SDimitry Andric 
23240b57cec5SDimitry Andric #endif // USE_FAST_MEMORY
2325