10b57cec5SDimitry Andric /*
20b57cec5SDimitry Andric  * kmp_alloc.cpp -- private/shared dynamic memory allocation and management
30b57cec5SDimitry Andric  */
40b57cec5SDimitry Andric 
50b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
80b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
90b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric 
130b57cec5SDimitry Andric #include "kmp.h"
140b57cec5SDimitry Andric #include "kmp_io.h"
150b57cec5SDimitry Andric #include "kmp_wrapper_malloc.h"
160b57cec5SDimitry Andric 
170b57cec5SDimitry Andric // Disable bget when it is not used
180b57cec5SDimitry Andric #if KMP_USE_BGET
190b57cec5SDimitry Andric 
200b57cec5SDimitry Andric /* Thread private buffer management code */
210b57cec5SDimitry Andric 
220b57cec5SDimitry Andric typedef int (*bget_compact_t)(size_t, int);
230b57cec5SDimitry Andric typedef void *(*bget_acquire_t)(size_t);
240b57cec5SDimitry Andric typedef void (*bget_release_t)(void *);
250b57cec5SDimitry Andric 
260b57cec5SDimitry Andric /* NOTE: bufsize must be a signed datatype */
270b57cec5SDimitry Andric 
280b57cec5SDimitry Andric #if KMP_OS_WINDOWS
290b57cec5SDimitry Andric #if KMP_ARCH_X86 || KMP_ARCH_ARM
300b57cec5SDimitry Andric typedef kmp_int32 bufsize;
310b57cec5SDimitry Andric #else
320b57cec5SDimitry Andric typedef kmp_int64 bufsize;
330b57cec5SDimitry Andric #endif
340b57cec5SDimitry Andric #else
350b57cec5SDimitry Andric typedef ssize_t bufsize;
360b57cec5SDimitry Andric #endif // KMP_OS_WINDOWS
370b57cec5SDimitry Andric 
380b57cec5SDimitry Andric /* The three modes of operation are, fifo search, lifo search, and best-fit */
390b57cec5SDimitry Andric 
400b57cec5SDimitry Andric typedef enum bget_mode {
410b57cec5SDimitry Andric   bget_mode_fifo = 0,
420b57cec5SDimitry Andric   bget_mode_lifo = 1,
430b57cec5SDimitry Andric   bget_mode_best = 2
440b57cec5SDimitry Andric } bget_mode_t;
450b57cec5SDimitry Andric 
460b57cec5SDimitry Andric static void bpool(kmp_info_t *th, void *buffer, bufsize len);
470b57cec5SDimitry Andric static void *bget(kmp_info_t *th, bufsize size);
480b57cec5SDimitry Andric static void *bgetz(kmp_info_t *th, bufsize size);
490b57cec5SDimitry Andric static void *bgetr(kmp_info_t *th, void *buffer, bufsize newsize);
500b57cec5SDimitry Andric static void brel(kmp_info_t *th, void *buf);
510b57cec5SDimitry Andric static void bectl(kmp_info_t *th, bget_compact_t compact,
520b57cec5SDimitry Andric                   bget_acquire_t acquire, bget_release_t release,
530b57cec5SDimitry Andric                   bufsize pool_incr);
540b57cec5SDimitry Andric 
550b57cec5SDimitry Andric /* BGET CONFIGURATION */
560b57cec5SDimitry Andric /* Buffer allocation size quantum: all buffers allocated are a
570b57cec5SDimitry Andric    multiple of this size.  This MUST be a power of two. */
580b57cec5SDimitry Andric 
590b57cec5SDimitry Andric /* On IA-32 architecture with  Linux* OS, malloc() does not
600b57cec5SDimitry Andric    ensure 16 byte alignmnent */
610b57cec5SDimitry Andric 
620b57cec5SDimitry Andric #if KMP_ARCH_X86 || !KMP_HAVE_QUAD
630b57cec5SDimitry Andric 
640b57cec5SDimitry Andric #define SizeQuant 8
650b57cec5SDimitry Andric #define AlignType double
660b57cec5SDimitry Andric 
670b57cec5SDimitry Andric #else
680b57cec5SDimitry Andric 
690b57cec5SDimitry Andric #define SizeQuant 16
700b57cec5SDimitry Andric #define AlignType _Quad
710b57cec5SDimitry Andric 
720b57cec5SDimitry Andric #endif
730b57cec5SDimitry Andric 
740b57cec5SDimitry Andric // Define this symbol to enable the bstats() function which calculates the
750b57cec5SDimitry Andric // total free space in the buffer pool, the largest available buffer, and the
760b57cec5SDimitry Andric // total space currently allocated.
770b57cec5SDimitry Andric #define BufStats 1
780b57cec5SDimitry Andric 
790b57cec5SDimitry Andric #ifdef KMP_DEBUG
800b57cec5SDimitry Andric 
810b57cec5SDimitry Andric // Define this symbol to enable the bpoold() function which dumps the buffers
820b57cec5SDimitry Andric // in a buffer pool.
830b57cec5SDimitry Andric #define BufDump 1
840b57cec5SDimitry Andric 
850b57cec5SDimitry Andric // Define this symbol to enable the bpoolv() function for validating a buffer
860b57cec5SDimitry Andric // pool.
870b57cec5SDimitry Andric #define BufValid 1
880b57cec5SDimitry Andric 
890b57cec5SDimitry Andric // Define this symbol to enable the bufdump() function which allows dumping the
900b57cec5SDimitry Andric // contents of an allocated or free buffer.
910b57cec5SDimitry Andric #define DumpData 1
920b57cec5SDimitry Andric 
930b57cec5SDimitry Andric #ifdef NOT_USED_NOW
940b57cec5SDimitry Andric 
950b57cec5SDimitry Andric // Wipe free buffers to a guaranteed pattern of garbage to trip up miscreants
960b57cec5SDimitry Andric // who attempt to use pointers into released buffers.
970b57cec5SDimitry Andric #define FreeWipe 1
980b57cec5SDimitry Andric 
990b57cec5SDimitry Andric // Use a best fit algorithm when searching for space for an allocation request.
1000b57cec5SDimitry Andric // This uses memory more efficiently, but allocation will be much slower.
1010b57cec5SDimitry Andric #define BestFit 1
1020b57cec5SDimitry Andric 
1030b57cec5SDimitry Andric #endif /* NOT_USED_NOW */
1040b57cec5SDimitry Andric #endif /* KMP_DEBUG */
1050b57cec5SDimitry Andric 
1060b57cec5SDimitry Andric static bufsize bget_bin_size[] = {
1070b57cec5SDimitry Andric     0,
1080b57cec5SDimitry Andric     //    1 << 6,    /* .5 Cache line */
1090b57cec5SDimitry Andric     1 << 7, /* 1 Cache line, new */
1100b57cec5SDimitry Andric     1 << 8, /* 2 Cache lines */
1110b57cec5SDimitry Andric     1 << 9, /* 4 Cache lines, new */
1120b57cec5SDimitry Andric     1 << 10, /* 8 Cache lines */
1130b57cec5SDimitry Andric     1 << 11, /* 16 Cache lines, new */
1140b57cec5SDimitry Andric     1 << 12, 1 << 13, /* new */
1150b57cec5SDimitry Andric     1 << 14, 1 << 15, /* new */
1160b57cec5SDimitry Andric     1 << 16, 1 << 17, 1 << 18, 1 << 19, 1 << 20, /*  1MB */
1170b57cec5SDimitry Andric     1 << 21, /*  2MB */
1180b57cec5SDimitry Andric     1 << 22, /*  4MB */
1190b57cec5SDimitry Andric     1 << 23, /*  8MB */
1200b57cec5SDimitry Andric     1 << 24, /* 16MB */
1210b57cec5SDimitry Andric     1 << 25, /* 32MB */
1220b57cec5SDimitry Andric };
1230b57cec5SDimitry Andric 
1240b57cec5SDimitry Andric #define MAX_BGET_BINS (int)(sizeof(bget_bin_size) / sizeof(bufsize))
1250b57cec5SDimitry Andric 
1260b57cec5SDimitry Andric struct bfhead;
1270b57cec5SDimitry Andric 
1280b57cec5SDimitry Andric //  Declare the interface, including the requested buffer size type, bufsize.
1290b57cec5SDimitry Andric 
1300b57cec5SDimitry Andric /* Queue links */
1310b57cec5SDimitry Andric typedef struct qlinks {
1320b57cec5SDimitry Andric   struct bfhead *flink; /* Forward link */
1330b57cec5SDimitry Andric   struct bfhead *blink; /* Backward link */
1340b57cec5SDimitry Andric } qlinks_t;
1350b57cec5SDimitry Andric 
1360b57cec5SDimitry Andric /* Header in allocated and free buffers */
1370b57cec5SDimitry Andric typedef struct bhead2 {
1380b57cec5SDimitry Andric   kmp_info_t *bthr; /* The thread which owns the buffer pool */
1390b57cec5SDimitry Andric   bufsize prevfree; /* Relative link back to previous free buffer in memory or
1400b57cec5SDimitry Andric                        0 if previous buffer is allocated.  */
1410b57cec5SDimitry Andric   bufsize bsize; /* Buffer size: positive if free, negative if allocated. */
1420b57cec5SDimitry Andric } bhead2_t;
1430b57cec5SDimitry Andric 
1440b57cec5SDimitry Andric /* Make sure the bhead structure is a multiple of SizeQuant in size. */
1450b57cec5SDimitry Andric typedef union bhead {
1460b57cec5SDimitry Andric   KMP_ALIGN(SizeQuant)
1470b57cec5SDimitry Andric   AlignType b_align;
1480b57cec5SDimitry Andric   char b_pad[sizeof(bhead2_t) + (SizeQuant - (sizeof(bhead2_t) % SizeQuant))];
1490b57cec5SDimitry Andric   bhead2_t bb;
1500b57cec5SDimitry Andric } bhead_t;
1510b57cec5SDimitry Andric #define BH(p) ((bhead_t *)(p))
1520b57cec5SDimitry Andric 
1530b57cec5SDimitry Andric /*  Header in directly allocated buffers (by acqfcn) */
1540b57cec5SDimitry Andric typedef struct bdhead {
1550b57cec5SDimitry Andric   bufsize tsize; /* Total size, including overhead */
1560b57cec5SDimitry Andric   bhead_t bh; /* Common header */
1570b57cec5SDimitry Andric } bdhead_t;
1580b57cec5SDimitry Andric #define BDH(p) ((bdhead_t *)(p))
1590b57cec5SDimitry Andric 
1600b57cec5SDimitry Andric /* Header in free buffers */
1610b57cec5SDimitry Andric typedef struct bfhead {
1620b57cec5SDimitry Andric   bhead_t bh; /* Common allocated/free header */
1630b57cec5SDimitry Andric   qlinks_t ql; /* Links on free list */
1640b57cec5SDimitry Andric } bfhead_t;
1650b57cec5SDimitry Andric #define BFH(p) ((bfhead_t *)(p))
1660b57cec5SDimitry Andric 
1670b57cec5SDimitry Andric typedef struct thr_data {
1680b57cec5SDimitry Andric   bfhead_t freelist[MAX_BGET_BINS];
1690b57cec5SDimitry Andric #if BufStats
1700b57cec5SDimitry Andric   size_t totalloc; /* Total space currently allocated */
1710b57cec5SDimitry Andric   long numget, numrel; /* Number of bget() and brel() calls */
1720b57cec5SDimitry Andric   long numpblk; /* Number of pool blocks */
1730b57cec5SDimitry Andric   long numpget, numprel; /* Number of block gets and rels */
1740b57cec5SDimitry Andric   long numdget, numdrel; /* Number of direct gets and rels */
1750b57cec5SDimitry Andric #endif /* BufStats */
1760b57cec5SDimitry Andric 
1770b57cec5SDimitry Andric   /* Automatic expansion block management functions */
1780b57cec5SDimitry Andric   bget_compact_t compfcn;
1790b57cec5SDimitry Andric   bget_acquire_t acqfcn;
1800b57cec5SDimitry Andric   bget_release_t relfcn;
1810b57cec5SDimitry Andric 
1820b57cec5SDimitry Andric   bget_mode_t mode; /* what allocation mode to use? */
1830b57cec5SDimitry Andric 
1840b57cec5SDimitry Andric   bufsize exp_incr; /* Expansion block size */
1850b57cec5SDimitry Andric   bufsize pool_len; /* 0: no bpool calls have been made
1860b57cec5SDimitry Andric                        -1: not all pool blocks are the same size
1870b57cec5SDimitry Andric                        >0: (common) block size for all bpool calls made so far
1880b57cec5SDimitry Andric                     */
1890b57cec5SDimitry Andric   bfhead_t *last_pool; /* Last pool owned by this thread (delay dealocation) */
1900b57cec5SDimitry Andric } thr_data_t;
1910b57cec5SDimitry Andric 
1920b57cec5SDimitry Andric /*  Minimum allocation quantum: */
1930b57cec5SDimitry Andric #define QLSize (sizeof(qlinks_t))
1940b57cec5SDimitry Andric #define SizeQ ((SizeQuant > QLSize) ? SizeQuant : QLSize)
1950b57cec5SDimitry Andric #define MaxSize                                                                \
1960b57cec5SDimitry Andric   (bufsize)(                                                                   \
1970b57cec5SDimitry Andric       ~(((bufsize)(1) << (sizeof(bufsize) * CHAR_BIT - 1)) | (SizeQuant - 1)))
1980b57cec5SDimitry Andric // Maximun for the requested size.
1990b57cec5SDimitry Andric 
2000b57cec5SDimitry Andric /* End sentinel: value placed in bsize field of dummy block delimiting
2010b57cec5SDimitry Andric    end of pool block.  The most negative number which will  fit  in  a
2020b57cec5SDimitry Andric    bufsize, defined in a way that the compiler will accept. */
2030b57cec5SDimitry Andric 
2040b57cec5SDimitry Andric #define ESent                                                                  \
2050b57cec5SDimitry Andric   ((bufsize)(-(((((bufsize)1) << ((int)sizeof(bufsize) * 8 - 2)) - 1) * 2) - 2))
2060b57cec5SDimitry Andric 
2070b57cec5SDimitry Andric /* Thread Data management routines */
2080b57cec5SDimitry Andric static int bget_get_bin(bufsize size) {
2090b57cec5SDimitry Andric   // binary chop bins
2100b57cec5SDimitry Andric   int lo = 0, hi = MAX_BGET_BINS - 1;
2110b57cec5SDimitry Andric 
2120b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(size > 0);
2130b57cec5SDimitry Andric 
2140b57cec5SDimitry Andric   while ((hi - lo) > 1) {
2150b57cec5SDimitry Andric     int mid = (lo + hi) >> 1;
2160b57cec5SDimitry Andric     if (size < bget_bin_size[mid])
2170b57cec5SDimitry Andric       hi = mid - 1;
2180b57cec5SDimitry Andric     else
2190b57cec5SDimitry Andric       lo = mid;
2200b57cec5SDimitry Andric   }
2210b57cec5SDimitry Andric 
2220b57cec5SDimitry Andric   KMP_DEBUG_ASSERT((lo >= 0) && (lo < MAX_BGET_BINS));
2230b57cec5SDimitry Andric 
2240b57cec5SDimitry Andric   return lo;
2250b57cec5SDimitry Andric }
2260b57cec5SDimitry Andric 
2270b57cec5SDimitry Andric static void set_thr_data(kmp_info_t *th) {
2280b57cec5SDimitry Andric   int i;
2290b57cec5SDimitry Andric   thr_data_t *data;
2300b57cec5SDimitry Andric 
2310b57cec5SDimitry Andric   data = (thr_data_t *)((!th->th.th_local.bget_data)
2320b57cec5SDimitry Andric                             ? __kmp_allocate(sizeof(*data))
2330b57cec5SDimitry Andric                             : th->th.th_local.bget_data);
2340b57cec5SDimitry Andric 
2350b57cec5SDimitry Andric   memset(data, '\0', sizeof(*data));
2360b57cec5SDimitry Andric 
2370b57cec5SDimitry Andric   for (i = 0; i < MAX_BGET_BINS; ++i) {
2380b57cec5SDimitry Andric     data->freelist[i].ql.flink = &data->freelist[i];
2390b57cec5SDimitry Andric     data->freelist[i].ql.blink = &data->freelist[i];
2400b57cec5SDimitry Andric   }
2410b57cec5SDimitry Andric 
2420b57cec5SDimitry Andric   th->th.th_local.bget_data = data;
2430b57cec5SDimitry Andric   th->th.th_local.bget_list = 0;
2440b57cec5SDimitry Andric #if !USE_CMP_XCHG_FOR_BGET
2450b57cec5SDimitry Andric #ifdef USE_QUEUING_LOCK_FOR_BGET
2460b57cec5SDimitry Andric   __kmp_init_lock(&th->th.th_local.bget_lock);
2470b57cec5SDimitry Andric #else
2480b57cec5SDimitry Andric   __kmp_init_bootstrap_lock(&th->th.th_local.bget_lock);
2490b57cec5SDimitry Andric #endif /* USE_LOCK_FOR_BGET */
2500b57cec5SDimitry Andric #endif /* ! USE_CMP_XCHG_FOR_BGET */
2510b57cec5SDimitry Andric }
2520b57cec5SDimitry Andric 
2530b57cec5SDimitry Andric static thr_data_t *get_thr_data(kmp_info_t *th) {
2540b57cec5SDimitry Andric   thr_data_t *data;
2550b57cec5SDimitry Andric 
2560b57cec5SDimitry Andric   data = (thr_data_t *)th->th.th_local.bget_data;
2570b57cec5SDimitry Andric 
2580b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(data != 0);
2590b57cec5SDimitry Andric 
2600b57cec5SDimitry Andric   return data;
2610b57cec5SDimitry Andric }
2620b57cec5SDimitry Andric 
2630b57cec5SDimitry Andric /* Walk the free list and release the enqueued buffers */
2640b57cec5SDimitry Andric static void __kmp_bget_dequeue(kmp_info_t *th) {
2650b57cec5SDimitry Andric   void *p = TCR_SYNC_PTR(th->th.th_local.bget_list);
2660b57cec5SDimitry Andric 
2670b57cec5SDimitry Andric   if (p != 0) {
2680b57cec5SDimitry Andric #if USE_CMP_XCHG_FOR_BGET
2690b57cec5SDimitry Andric     {
2700b57cec5SDimitry Andric       volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
2710b57cec5SDimitry Andric       while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list,
2720b57cec5SDimitry Andric                                         CCAST(void *, old_value), nullptr)) {
2730b57cec5SDimitry Andric         KMP_CPU_PAUSE();
2740b57cec5SDimitry Andric         old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
2750b57cec5SDimitry Andric       }
2760b57cec5SDimitry Andric       p = CCAST(void *, old_value);
2770b57cec5SDimitry Andric     }
2780b57cec5SDimitry Andric #else /* ! USE_CMP_XCHG_FOR_BGET */
2790b57cec5SDimitry Andric #ifdef USE_QUEUING_LOCK_FOR_BGET
2800b57cec5SDimitry Andric     __kmp_acquire_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
2810b57cec5SDimitry Andric #else
2820b57cec5SDimitry Andric     __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
2830b57cec5SDimitry Andric #endif /* USE_QUEUING_LOCK_FOR_BGET */
2840b57cec5SDimitry Andric 
2850b57cec5SDimitry Andric     p = (void *)th->th.th_local.bget_list;
2860b57cec5SDimitry Andric     th->th.th_local.bget_list = 0;
2870b57cec5SDimitry Andric 
2880b57cec5SDimitry Andric #ifdef USE_QUEUING_LOCK_FOR_BGET
2890b57cec5SDimitry Andric     __kmp_release_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
2900b57cec5SDimitry Andric #else
2910b57cec5SDimitry Andric     __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
2920b57cec5SDimitry Andric #endif
2930b57cec5SDimitry Andric #endif /* USE_CMP_XCHG_FOR_BGET */
2940b57cec5SDimitry Andric 
2950b57cec5SDimitry Andric     /* Check again to make sure the list is not empty */
2960b57cec5SDimitry Andric     while (p != 0) {
2970b57cec5SDimitry Andric       void *buf = p;
2980b57cec5SDimitry Andric       bfhead_t *b = BFH(((char *)p) - sizeof(bhead_t));
2990b57cec5SDimitry Andric 
3000b57cec5SDimitry Andric       KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
3010b57cec5SDimitry Andric       KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
3020b57cec5SDimitry Andric                        (kmp_uintptr_t)th); // clear possible mark
3030b57cec5SDimitry Andric       KMP_DEBUG_ASSERT(b->ql.blink == 0);
3040b57cec5SDimitry Andric 
3050b57cec5SDimitry Andric       p = (void *)b->ql.flink;
3060b57cec5SDimitry Andric 
3070b57cec5SDimitry Andric       brel(th, buf);
3080b57cec5SDimitry Andric     }
3090b57cec5SDimitry Andric   }
3100b57cec5SDimitry Andric }
3110b57cec5SDimitry Andric 
3120b57cec5SDimitry Andric /* Chain together the free buffers by using the thread owner field */
3130b57cec5SDimitry Andric static void __kmp_bget_enqueue(kmp_info_t *th, void *buf
3140b57cec5SDimitry Andric #ifdef USE_QUEUING_LOCK_FOR_BGET
3150b57cec5SDimitry Andric                                ,
3160b57cec5SDimitry Andric                                kmp_int32 rel_gtid
3170b57cec5SDimitry Andric #endif
3180b57cec5SDimitry Andric                                ) {
3190b57cec5SDimitry Andric   bfhead_t *b = BFH(((char *)buf) - sizeof(bhead_t));
3200b57cec5SDimitry Andric 
3210b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
3220b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
3230b57cec5SDimitry Andric                    (kmp_uintptr_t)th); // clear possible mark
3240b57cec5SDimitry Andric 
3250b57cec5SDimitry Andric   b->ql.blink = 0;
3260b57cec5SDimitry Andric 
3270b57cec5SDimitry Andric   KC_TRACE(10, ("__kmp_bget_enqueue: moving buffer to T#%d list\n",
3280b57cec5SDimitry Andric                 __kmp_gtid_from_thread(th)));
3290b57cec5SDimitry Andric 
3300b57cec5SDimitry Andric #if USE_CMP_XCHG_FOR_BGET
3310b57cec5SDimitry Andric   {
3320b57cec5SDimitry Andric     volatile void *old_value = TCR_PTR(th->th.th_local.bget_list);
3330b57cec5SDimitry Andric     /* the next pointer must be set before setting bget_list to buf to avoid
3340b57cec5SDimitry Andric        exposing a broken list to other threads, even for an instant. */
3350b57cec5SDimitry Andric     b->ql.flink = BFH(CCAST(void *, old_value));
3360b57cec5SDimitry Andric 
3370b57cec5SDimitry Andric     while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list,
3380b57cec5SDimitry Andric                                       CCAST(void *, old_value), buf)) {
3390b57cec5SDimitry Andric       KMP_CPU_PAUSE();
3400b57cec5SDimitry Andric       old_value = TCR_PTR(th->th.th_local.bget_list);
3410b57cec5SDimitry Andric       /* the next pointer must be set before setting bget_list to buf to avoid
3420b57cec5SDimitry Andric          exposing a broken list to other threads, even for an instant. */
3430b57cec5SDimitry Andric       b->ql.flink = BFH(CCAST(void *, old_value));
3440b57cec5SDimitry Andric     }
3450b57cec5SDimitry Andric   }
3460b57cec5SDimitry Andric #else /* ! USE_CMP_XCHG_FOR_BGET */
3470b57cec5SDimitry Andric #ifdef USE_QUEUING_LOCK_FOR_BGET
3480b57cec5SDimitry Andric   __kmp_acquire_lock(&th->th.th_local.bget_lock, rel_gtid);
3490b57cec5SDimitry Andric #else
3500b57cec5SDimitry Andric   __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
3510b57cec5SDimitry Andric #endif
3520b57cec5SDimitry Andric 
3530b57cec5SDimitry Andric   b->ql.flink = BFH(th->th.th_local.bget_list);
3540b57cec5SDimitry Andric   th->th.th_local.bget_list = (void *)buf;
3550b57cec5SDimitry Andric 
3560b57cec5SDimitry Andric #ifdef USE_QUEUING_LOCK_FOR_BGET
3570b57cec5SDimitry Andric   __kmp_release_lock(&th->th.th_local.bget_lock, rel_gtid);
3580b57cec5SDimitry Andric #else
3590b57cec5SDimitry Andric   __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
3600b57cec5SDimitry Andric #endif
3610b57cec5SDimitry Andric #endif /* USE_CMP_XCHG_FOR_BGET */
3620b57cec5SDimitry Andric }
3630b57cec5SDimitry Andric 
3640b57cec5SDimitry Andric /* insert buffer back onto a new freelist */
3650b57cec5SDimitry Andric static void __kmp_bget_insert_into_freelist(thr_data_t *thr, bfhead_t *b) {
3660b57cec5SDimitry Andric   int bin;
3670b57cec5SDimitry Andric 
3680b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(((size_t)b) % SizeQuant == 0);
3690b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(b->bh.bb.bsize % SizeQuant == 0);
3700b57cec5SDimitry Andric 
3710b57cec5SDimitry Andric   bin = bget_get_bin(b->bh.bb.bsize);
3720b57cec5SDimitry Andric 
3730b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(thr->freelist[bin].ql.blink->ql.flink ==
3740b57cec5SDimitry Andric                    &thr->freelist[bin]);
3750b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(thr->freelist[bin].ql.flink->ql.blink ==
3760b57cec5SDimitry Andric                    &thr->freelist[bin]);
3770b57cec5SDimitry Andric 
3780b57cec5SDimitry Andric   b->ql.flink = &thr->freelist[bin];
3790b57cec5SDimitry Andric   b->ql.blink = thr->freelist[bin].ql.blink;
3800b57cec5SDimitry Andric 
3810b57cec5SDimitry Andric   thr->freelist[bin].ql.blink = b;
3820b57cec5SDimitry Andric   b->ql.blink->ql.flink = b;
3830b57cec5SDimitry Andric }
3840b57cec5SDimitry Andric 
3850b57cec5SDimitry Andric /* unlink the buffer from the old freelist */
3860b57cec5SDimitry Andric static void __kmp_bget_remove_from_freelist(bfhead_t *b) {
3870b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
3880b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
3890b57cec5SDimitry Andric 
3900b57cec5SDimitry Andric   b->ql.blink->ql.flink = b->ql.flink;
3910b57cec5SDimitry Andric   b->ql.flink->ql.blink = b->ql.blink;
3920b57cec5SDimitry Andric }
3930b57cec5SDimitry Andric 
3940b57cec5SDimitry Andric /*  GET STATS -- check info on free list */
3950b57cec5SDimitry Andric static void bcheck(kmp_info_t *th, bufsize *max_free, bufsize *total_free) {
3960b57cec5SDimitry Andric   thr_data_t *thr = get_thr_data(th);
3970b57cec5SDimitry Andric   int bin;
3980b57cec5SDimitry Andric 
3990b57cec5SDimitry Andric   *total_free = *max_free = 0;
4000b57cec5SDimitry Andric 
4010b57cec5SDimitry Andric   for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
4020b57cec5SDimitry Andric     bfhead_t *b, *best;
4030b57cec5SDimitry Andric 
4040b57cec5SDimitry Andric     best = &thr->freelist[bin];
4050b57cec5SDimitry Andric     b = best->ql.flink;
4060b57cec5SDimitry Andric 
4070b57cec5SDimitry Andric     while (b != &thr->freelist[bin]) {
4080b57cec5SDimitry Andric       *total_free += (b->bh.bb.bsize - sizeof(bhead_t));
4090b57cec5SDimitry Andric       if ((best == &thr->freelist[bin]) || (b->bh.bb.bsize < best->bh.bb.bsize))
4100b57cec5SDimitry Andric         best = b;
4110b57cec5SDimitry Andric 
4120b57cec5SDimitry Andric       /* Link to next buffer */
4130b57cec5SDimitry Andric       b = b->ql.flink;
4140b57cec5SDimitry Andric     }
4150b57cec5SDimitry Andric 
4160b57cec5SDimitry Andric     if (*max_free < best->bh.bb.bsize)
4170b57cec5SDimitry Andric       *max_free = best->bh.bb.bsize;
4180b57cec5SDimitry Andric   }
4190b57cec5SDimitry Andric 
4200b57cec5SDimitry Andric   if (*max_free > (bufsize)sizeof(bhead_t))
4210b57cec5SDimitry Andric     *max_free -= sizeof(bhead_t);
4220b57cec5SDimitry Andric }
4230b57cec5SDimitry Andric 
4240b57cec5SDimitry Andric /*  BGET  --  Allocate a buffer.  */
4250b57cec5SDimitry Andric static void *bget(kmp_info_t *th, bufsize requested_size) {
4260b57cec5SDimitry Andric   thr_data_t *thr = get_thr_data(th);
4270b57cec5SDimitry Andric   bufsize size = requested_size;
4280b57cec5SDimitry Andric   bfhead_t *b;
4290b57cec5SDimitry Andric   void *buf;
4300b57cec5SDimitry Andric   int compactseq = 0;
4310b57cec5SDimitry Andric   int use_blink = 0;
4320b57cec5SDimitry Andric   /* For BestFit */
4330b57cec5SDimitry Andric   bfhead_t *best;
4340b57cec5SDimitry Andric 
4350b57cec5SDimitry Andric   if (size < 0 || size + sizeof(bhead_t) > MaxSize) {
4360b57cec5SDimitry Andric     return NULL;
4370b57cec5SDimitry Andric   }
4380b57cec5SDimitry Andric 
4390b57cec5SDimitry Andric   __kmp_bget_dequeue(th); /* Release any queued buffers */
4400b57cec5SDimitry Andric 
4410b57cec5SDimitry Andric   if (size < (bufsize)SizeQ) { // Need at least room for the queue links.
4420b57cec5SDimitry Andric     size = SizeQ;
4430b57cec5SDimitry Andric   }
4440b57cec5SDimitry Andric #if defined(SizeQuant) && (SizeQuant > 1)
4450b57cec5SDimitry Andric   size = (size + (SizeQuant - 1)) & (~(SizeQuant - 1));
4460b57cec5SDimitry Andric #endif
4470b57cec5SDimitry Andric 
4480b57cec5SDimitry Andric   size += sizeof(bhead_t); // Add overhead in allocated buffer to size required.
4490b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(size >= 0);
4500b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(size % SizeQuant == 0);
4510b57cec5SDimitry Andric 
4520b57cec5SDimitry Andric   use_blink = (thr->mode == bget_mode_lifo);
4530b57cec5SDimitry Andric 
4540b57cec5SDimitry Andric   /* If a compact function was provided in the call to bectl(), wrap
4550b57cec5SDimitry Andric      a loop around the allocation process  to  allow  compaction  to
4560b57cec5SDimitry Andric      intervene in case we don't find a suitable buffer in the chain. */
4570b57cec5SDimitry Andric 
4580b57cec5SDimitry Andric   for (;;) {
4590b57cec5SDimitry Andric     int bin;
4600b57cec5SDimitry Andric 
4610b57cec5SDimitry Andric     for (bin = bget_get_bin(size); bin < MAX_BGET_BINS; ++bin) {
4620b57cec5SDimitry Andric       /* Link to next buffer */
4630b57cec5SDimitry Andric       b = (use_blink ? thr->freelist[bin].ql.blink
4640b57cec5SDimitry Andric                      : thr->freelist[bin].ql.flink);
4650b57cec5SDimitry Andric 
4660b57cec5SDimitry Andric       if (thr->mode == bget_mode_best) {
4670b57cec5SDimitry Andric         best = &thr->freelist[bin];
4680b57cec5SDimitry Andric 
4690b57cec5SDimitry Andric         /* Scan the free list searching for the first buffer big enough
4700b57cec5SDimitry Andric            to hold the requested size buffer. */
4710b57cec5SDimitry Andric         while (b != &thr->freelist[bin]) {
4720b57cec5SDimitry Andric           if (b->bh.bb.bsize >= (bufsize)size) {
4730b57cec5SDimitry Andric             if ((best == &thr->freelist[bin]) ||
4740b57cec5SDimitry Andric                 (b->bh.bb.bsize < best->bh.bb.bsize)) {
4750b57cec5SDimitry Andric               best = b;
4760b57cec5SDimitry Andric             }
4770b57cec5SDimitry Andric           }
4780b57cec5SDimitry Andric 
4790b57cec5SDimitry Andric           /* Link to next buffer */
4800b57cec5SDimitry Andric           b = (use_blink ? b->ql.blink : b->ql.flink);
4810b57cec5SDimitry Andric         }
4820b57cec5SDimitry Andric         b = best;
4830b57cec5SDimitry Andric       }
4840b57cec5SDimitry Andric 
4850b57cec5SDimitry Andric       while (b != &thr->freelist[bin]) {
4860b57cec5SDimitry Andric         if ((bufsize)b->bh.bb.bsize >= (bufsize)size) {
4870b57cec5SDimitry Andric 
4880b57cec5SDimitry Andric           // Buffer is big enough to satisfy the request. Allocate it to the
4890b57cec5SDimitry Andric           // caller. We must decide whether the buffer is large enough to split
4900b57cec5SDimitry Andric           // into the part given to the caller and a free buffer that remains
4910b57cec5SDimitry Andric           // on the free list, or whether the entire buffer should be removed
4920b57cec5SDimitry Andric           // from the free list and given to the caller in its entirety. We
4930b57cec5SDimitry Andric           // only split the buffer if enough room remains for a header plus the
4940b57cec5SDimitry Andric           // minimum quantum of allocation.
4950b57cec5SDimitry Andric           if ((b->bh.bb.bsize - (bufsize)size) >
4960b57cec5SDimitry Andric               (bufsize)(SizeQ + (sizeof(bhead_t)))) {
4970b57cec5SDimitry Andric             bhead_t *ba, *bn;
4980b57cec5SDimitry Andric 
4990b57cec5SDimitry Andric             ba = BH(((char *)b) + (b->bh.bb.bsize - (bufsize)size));
5000b57cec5SDimitry Andric             bn = BH(((char *)ba) + size);
5010b57cec5SDimitry Andric 
5020b57cec5SDimitry Andric             KMP_DEBUG_ASSERT(bn->bb.prevfree == b->bh.bb.bsize);
5030b57cec5SDimitry Andric 
5040b57cec5SDimitry Andric             /* Subtract size from length of free block. */
5050b57cec5SDimitry Andric             b->bh.bb.bsize -= (bufsize)size;
5060b57cec5SDimitry Andric 
5070b57cec5SDimitry Andric             /* Link allocated buffer to the previous free buffer. */
5080b57cec5SDimitry Andric             ba->bb.prevfree = b->bh.bb.bsize;
5090b57cec5SDimitry Andric 
5100b57cec5SDimitry Andric             /* Plug negative size into user buffer. */
5110b57cec5SDimitry Andric             ba->bb.bsize = -size;
5120b57cec5SDimitry Andric 
5130b57cec5SDimitry Andric             /* Mark this buffer as owned by this thread. */
5140b57cec5SDimitry Andric             TCW_PTR(ba->bb.bthr,
5150b57cec5SDimitry Andric                     th); // not an allocated address (do not mark it)
5160b57cec5SDimitry Andric             /* Mark buffer after this one not preceded by free block. */
5170b57cec5SDimitry Andric             bn->bb.prevfree = 0;
5180b57cec5SDimitry Andric 
5190b57cec5SDimitry Andric             // unlink buffer from old freelist, and reinsert into new freelist
5200b57cec5SDimitry Andric             __kmp_bget_remove_from_freelist(b);
5210b57cec5SDimitry Andric             __kmp_bget_insert_into_freelist(thr, b);
5220b57cec5SDimitry Andric #if BufStats
5230b57cec5SDimitry Andric             thr->totalloc += (size_t)size;
5240b57cec5SDimitry Andric             thr->numget++; /* Increment number of bget() calls */
5250b57cec5SDimitry Andric #endif
5260b57cec5SDimitry Andric             buf = (void *)((((char *)ba) + sizeof(bhead_t)));
5270b57cec5SDimitry Andric             KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
5280b57cec5SDimitry Andric             return buf;
5290b57cec5SDimitry Andric           } else {
5300b57cec5SDimitry Andric             bhead_t *ba;
5310b57cec5SDimitry Andric 
5320b57cec5SDimitry Andric             ba = BH(((char *)b) + b->bh.bb.bsize);
5330b57cec5SDimitry Andric 
5340b57cec5SDimitry Andric             KMP_DEBUG_ASSERT(ba->bb.prevfree == b->bh.bb.bsize);
5350b57cec5SDimitry Andric 
5360b57cec5SDimitry Andric             /* The buffer isn't big enough to split.  Give  the  whole
5370b57cec5SDimitry Andric                shebang to the caller and remove it from the free list. */
5380b57cec5SDimitry Andric 
5390b57cec5SDimitry Andric             __kmp_bget_remove_from_freelist(b);
5400b57cec5SDimitry Andric #if BufStats
5410b57cec5SDimitry Andric             thr->totalloc += (size_t)b->bh.bb.bsize;
5420b57cec5SDimitry Andric             thr->numget++; /* Increment number of bget() calls */
5430b57cec5SDimitry Andric #endif
5440b57cec5SDimitry Andric             /* Negate size to mark buffer allocated. */
5450b57cec5SDimitry Andric             b->bh.bb.bsize = -(b->bh.bb.bsize);
5460b57cec5SDimitry Andric 
5470b57cec5SDimitry Andric             /* Mark this buffer as owned by this thread. */
5480b57cec5SDimitry Andric             TCW_PTR(ba->bb.bthr, th); // not an allocated address (do not mark)
5490b57cec5SDimitry Andric             /* Zero the back pointer in the next buffer in memory
5500b57cec5SDimitry Andric                to indicate that this buffer is allocated. */
5510b57cec5SDimitry Andric             ba->bb.prevfree = 0;
5520b57cec5SDimitry Andric 
5530b57cec5SDimitry Andric             /* Give user buffer starting at queue links. */
5540b57cec5SDimitry Andric             buf = (void *)&(b->ql);
5550b57cec5SDimitry Andric             KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
5560b57cec5SDimitry Andric             return buf;
5570b57cec5SDimitry Andric           }
5580b57cec5SDimitry Andric         }
5590b57cec5SDimitry Andric 
5600b57cec5SDimitry Andric         /* Link to next buffer */
5610b57cec5SDimitry Andric         b = (use_blink ? b->ql.blink : b->ql.flink);
5620b57cec5SDimitry Andric       }
5630b57cec5SDimitry Andric     }
5640b57cec5SDimitry Andric 
5650b57cec5SDimitry Andric     /* We failed to find a buffer. If there's a compact function defined,
5660b57cec5SDimitry Andric        notify it of the size requested. If it returns TRUE, try the allocation
5670b57cec5SDimitry Andric        again. */
5680b57cec5SDimitry Andric 
5690b57cec5SDimitry Andric     if ((thr->compfcn == 0) || (!(*thr->compfcn)(size, ++compactseq))) {
5700b57cec5SDimitry Andric       break;
5710b57cec5SDimitry Andric     }
5720b57cec5SDimitry Andric   }
5730b57cec5SDimitry Andric 
5740b57cec5SDimitry Andric   /* No buffer available with requested size free. */
5750b57cec5SDimitry Andric 
5760b57cec5SDimitry Andric   /* Don't give up yet -- look in the reserve supply. */
5770b57cec5SDimitry Andric   if (thr->acqfcn != 0) {
5780b57cec5SDimitry Andric     if (size > (bufsize)(thr->exp_incr - sizeof(bhead_t))) {
5790b57cec5SDimitry Andric       /* Request is too large to fit in a single expansion block.
5800b57cec5SDimitry Andric          Try to satisy it by a direct buffer acquisition. */
5810b57cec5SDimitry Andric       bdhead_t *bdh;
5820b57cec5SDimitry Andric 
5830b57cec5SDimitry Andric       size += sizeof(bdhead_t) - sizeof(bhead_t);
5840b57cec5SDimitry Andric 
5850b57cec5SDimitry Andric       KE_TRACE(10, ("%%%%%% MALLOC( %d )\n", (int)size));
5860b57cec5SDimitry Andric 
5870b57cec5SDimitry Andric       /* richryan */
5880b57cec5SDimitry Andric       bdh = BDH((*thr->acqfcn)((bufsize)size));
5890b57cec5SDimitry Andric       if (bdh != NULL) {
5900b57cec5SDimitry Andric 
5910b57cec5SDimitry Andric         // Mark the buffer special by setting size field of its header to zero.
5920b57cec5SDimitry Andric         bdh->bh.bb.bsize = 0;
5930b57cec5SDimitry Andric 
5940b57cec5SDimitry Andric         /* Mark this buffer as owned by this thread. */
5950b57cec5SDimitry Andric         TCW_PTR(bdh->bh.bb.bthr, th); // don't mark buffer as allocated,
5960b57cec5SDimitry Andric         // because direct buffer never goes to free list
5970b57cec5SDimitry Andric         bdh->bh.bb.prevfree = 0;
5980b57cec5SDimitry Andric         bdh->tsize = size;
5990b57cec5SDimitry Andric #if BufStats
6000b57cec5SDimitry Andric         thr->totalloc += (size_t)size;
6010b57cec5SDimitry Andric         thr->numget++; /* Increment number of bget() calls */
6020b57cec5SDimitry Andric         thr->numdget++; /* Direct bget() call count */
6030b57cec5SDimitry Andric #endif
6040b57cec5SDimitry Andric         buf = (void *)(bdh + 1);
6050b57cec5SDimitry Andric         KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
6060b57cec5SDimitry Andric         return buf;
6070b57cec5SDimitry Andric       }
6080b57cec5SDimitry Andric 
6090b57cec5SDimitry Andric     } else {
6100b57cec5SDimitry Andric 
6110b57cec5SDimitry Andric       /*  Try to obtain a new expansion block */
6120b57cec5SDimitry Andric       void *newpool;
6130b57cec5SDimitry Andric 
6140b57cec5SDimitry Andric       KE_TRACE(10, ("%%%%%% MALLOCB( %d )\n", (int)thr->exp_incr));
6150b57cec5SDimitry Andric 
6160b57cec5SDimitry Andric       /* richryan */
6170b57cec5SDimitry Andric       newpool = (*thr->acqfcn)((bufsize)thr->exp_incr);
6180b57cec5SDimitry Andric       KMP_DEBUG_ASSERT(((size_t)newpool) % SizeQuant == 0);
6190b57cec5SDimitry Andric       if (newpool != NULL) {
6200b57cec5SDimitry Andric         bpool(th, newpool, thr->exp_incr);
6210b57cec5SDimitry Andric         buf = bget(
6220b57cec5SDimitry Andric             th, requested_size); /* This can't, I say, can't get into a loop. */
6230b57cec5SDimitry Andric         return buf;
6240b57cec5SDimitry Andric       }
6250b57cec5SDimitry Andric     }
6260b57cec5SDimitry Andric   }
6270b57cec5SDimitry Andric 
6280b57cec5SDimitry Andric   /*  Still no buffer available */
6290b57cec5SDimitry Andric 
6300b57cec5SDimitry Andric   return NULL;
6310b57cec5SDimitry Andric }
6320b57cec5SDimitry Andric 
6330b57cec5SDimitry Andric /*  BGETZ  --  Allocate a buffer and clear its contents to zero.  We clear
6340b57cec5SDimitry Andric                the  entire  contents  of  the buffer to zero, not just the
6350b57cec5SDimitry Andric                region requested by the caller. */
6360b57cec5SDimitry Andric 
6370b57cec5SDimitry Andric static void *bgetz(kmp_info_t *th, bufsize size) {
6380b57cec5SDimitry Andric   char *buf = (char *)bget(th, size);
6390b57cec5SDimitry Andric 
6400b57cec5SDimitry Andric   if (buf != NULL) {
6410b57cec5SDimitry Andric     bhead_t *b;
6420b57cec5SDimitry Andric     bufsize rsize;
6430b57cec5SDimitry Andric 
6440b57cec5SDimitry Andric     b = BH(buf - sizeof(bhead_t));
6450b57cec5SDimitry Andric     rsize = -(b->bb.bsize);
6460b57cec5SDimitry Andric     if (rsize == 0) {
6470b57cec5SDimitry Andric       bdhead_t *bd;
6480b57cec5SDimitry Andric 
6490b57cec5SDimitry Andric       bd = BDH(buf - sizeof(bdhead_t));
6500b57cec5SDimitry Andric       rsize = bd->tsize - (bufsize)sizeof(bdhead_t);
6510b57cec5SDimitry Andric     } else {
6520b57cec5SDimitry Andric       rsize -= sizeof(bhead_t);
6530b57cec5SDimitry Andric     }
6540b57cec5SDimitry Andric 
6550b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(rsize >= size);
6560b57cec5SDimitry Andric 
6570b57cec5SDimitry Andric     (void)memset(buf, 0, (bufsize)rsize);
6580b57cec5SDimitry Andric   }
6590b57cec5SDimitry Andric   return ((void *)buf);
6600b57cec5SDimitry Andric }
6610b57cec5SDimitry Andric 
6620b57cec5SDimitry Andric /*  BGETR  --  Reallocate a buffer.  This is a minimal implementation,
6630b57cec5SDimitry Andric                simply in terms of brel()  and  bget().   It  could  be
6640b57cec5SDimitry Andric                enhanced to allow the buffer to grow into adjacent free
6650b57cec5SDimitry Andric                blocks and to avoid moving data unnecessarily.  */
6660b57cec5SDimitry Andric 
6670b57cec5SDimitry Andric static void *bgetr(kmp_info_t *th, void *buf, bufsize size) {
6680b57cec5SDimitry Andric   void *nbuf;
6690b57cec5SDimitry Andric   bufsize osize; /* Old size of buffer */
6700b57cec5SDimitry Andric   bhead_t *b;
6710b57cec5SDimitry Andric 
6720b57cec5SDimitry Andric   nbuf = bget(th, size);
6730b57cec5SDimitry Andric   if (nbuf == NULL) { /* Acquire new buffer */
6740b57cec5SDimitry Andric     return NULL;
6750b57cec5SDimitry Andric   }
6760b57cec5SDimitry Andric   if (buf == NULL) {
6770b57cec5SDimitry Andric     return nbuf;
6780b57cec5SDimitry Andric   }
6790b57cec5SDimitry Andric   b = BH(((char *)buf) - sizeof(bhead_t));
6800b57cec5SDimitry Andric   osize = -b->bb.bsize;
6810b57cec5SDimitry Andric   if (osize == 0) {
6820b57cec5SDimitry Andric     /*  Buffer acquired directly through acqfcn. */
6830b57cec5SDimitry Andric     bdhead_t *bd;
6840b57cec5SDimitry Andric 
6850b57cec5SDimitry Andric     bd = BDH(((char *)buf) - sizeof(bdhead_t));
6860b57cec5SDimitry Andric     osize = bd->tsize - (bufsize)sizeof(bdhead_t);
6870b57cec5SDimitry Andric   } else {
6880b57cec5SDimitry Andric     osize -= sizeof(bhead_t);
6890b57cec5SDimitry Andric   }
6900b57cec5SDimitry Andric 
6910b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(osize > 0);
6920b57cec5SDimitry Andric 
6930b57cec5SDimitry Andric   (void)KMP_MEMCPY((char *)nbuf, (char *)buf, /* Copy the data */
6940b57cec5SDimitry Andric                    (size_t)((size < osize) ? size : osize));
6950b57cec5SDimitry Andric   brel(th, buf);
6960b57cec5SDimitry Andric 
6970b57cec5SDimitry Andric   return nbuf;
6980b57cec5SDimitry Andric }
6990b57cec5SDimitry Andric 
7000b57cec5SDimitry Andric /*  BREL  --  Release a buffer.  */
7010b57cec5SDimitry Andric static void brel(kmp_info_t *th, void *buf) {
7020b57cec5SDimitry Andric   thr_data_t *thr = get_thr_data(th);
7030b57cec5SDimitry Andric   bfhead_t *b, *bn;
7040b57cec5SDimitry Andric   kmp_info_t *bth;
7050b57cec5SDimitry Andric 
7060b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(buf != NULL);
7070b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
7080b57cec5SDimitry Andric 
7090b57cec5SDimitry Andric   b = BFH(((char *)buf) - sizeof(bhead_t));
7100b57cec5SDimitry Andric 
7110b57cec5SDimitry Andric   if (b->bh.bb.bsize == 0) { /* Directly-acquired buffer? */
7120b57cec5SDimitry Andric     bdhead_t *bdh;
7130b57cec5SDimitry Andric 
7140b57cec5SDimitry Andric     bdh = BDH(((char *)buf) - sizeof(bdhead_t));
7150b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
7160b57cec5SDimitry Andric #if BufStats
7170b57cec5SDimitry Andric     thr->totalloc -= (size_t)bdh->tsize;
7180b57cec5SDimitry Andric     thr->numdrel++; /* Number of direct releases */
7190b57cec5SDimitry Andric     thr->numrel++; /* Increment number of brel() calls */
7200b57cec5SDimitry Andric #endif /* BufStats */
7210b57cec5SDimitry Andric #ifdef FreeWipe
7220b57cec5SDimitry Andric     (void)memset((char *)buf, 0x55, (size_t)(bdh->tsize - sizeof(bdhead_t)));
7230b57cec5SDimitry Andric #endif /* FreeWipe */
7240b57cec5SDimitry Andric 
7250b57cec5SDimitry Andric     KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)bdh));
7260b57cec5SDimitry Andric 
7270b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(thr->relfcn != 0);
7280b57cec5SDimitry Andric     (*thr->relfcn)((void *)bdh); /* Release it directly. */
7290b57cec5SDimitry Andric     return;
7300b57cec5SDimitry Andric   }
7310b57cec5SDimitry Andric 
7320b57cec5SDimitry Andric   bth = (kmp_info_t *)((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) &
7330b57cec5SDimitry Andric                        ~1); // clear possible mark before comparison
7340b57cec5SDimitry Andric   if (bth != th) {
7350b57cec5SDimitry Andric     /* Add this buffer to be released by the owning thread later */
7360b57cec5SDimitry Andric     __kmp_bget_enqueue(bth, buf
7370b57cec5SDimitry Andric #ifdef USE_QUEUING_LOCK_FOR_BGET
7380b57cec5SDimitry Andric                        ,
7390b57cec5SDimitry Andric                        __kmp_gtid_from_thread(th)
7400b57cec5SDimitry Andric #endif
7410b57cec5SDimitry Andric                            );
7420b57cec5SDimitry Andric     return;
7430b57cec5SDimitry Andric   }
7440b57cec5SDimitry Andric 
7450b57cec5SDimitry Andric   /* Buffer size must be negative, indicating that the buffer is allocated. */
7460b57cec5SDimitry Andric   if (b->bh.bb.bsize >= 0) {
7470b57cec5SDimitry Andric     bn = NULL;
7480b57cec5SDimitry Andric   }
7490b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(b->bh.bb.bsize < 0);
7500b57cec5SDimitry Andric 
7510b57cec5SDimitry Andric   /*  Back pointer in next buffer must be zero, indicating the same thing: */
7520b57cec5SDimitry Andric 
7530b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.bsize)->bb.prevfree == 0);
7540b57cec5SDimitry Andric 
7550b57cec5SDimitry Andric #if BufStats
7560b57cec5SDimitry Andric   thr->numrel++; /* Increment number of brel() calls */
7570b57cec5SDimitry Andric   thr->totalloc += (size_t)b->bh.bb.bsize;
7580b57cec5SDimitry Andric #endif
7590b57cec5SDimitry Andric 
7600b57cec5SDimitry Andric   /* If the back link is nonzero, the previous buffer is free.  */
7610b57cec5SDimitry Andric 
7620b57cec5SDimitry Andric   if (b->bh.bb.prevfree != 0) {
7630b57cec5SDimitry Andric     /* The previous buffer is free. Consolidate this buffer with it by adding
7640b57cec5SDimitry Andric        the length of this buffer to the previous free buffer. Note that we
7650b57cec5SDimitry Andric        subtract the size in the buffer being released, since it's negative to
7660b57cec5SDimitry Andric        indicate that the buffer is allocated. */
7670b57cec5SDimitry Andric     bufsize size = b->bh.bb.bsize;
7680b57cec5SDimitry Andric 
7690b57cec5SDimitry Andric     /* Make the previous buffer the one we're working on. */
7700b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.prevfree)->bb.bsize ==
7710b57cec5SDimitry Andric                      b->bh.bb.prevfree);
7720b57cec5SDimitry Andric     b = BFH(((char *)b) - b->bh.bb.prevfree);
7730b57cec5SDimitry Andric     b->bh.bb.bsize -= size;
7740b57cec5SDimitry Andric 
7750b57cec5SDimitry Andric     /* unlink the buffer from the old freelist */
7760b57cec5SDimitry Andric     __kmp_bget_remove_from_freelist(b);
7770b57cec5SDimitry Andric   } else {
7780b57cec5SDimitry Andric     /* The previous buffer isn't allocated. Mark this buffer size as positive
7790b57cec5SDimitry Andric        (i.e. free) and fall through to place the buffer on the free list as an
7800b57cec5SDimitry Andric        isolated free block. */
7810b57cec5SDimitry Andric     b->bh.bb.bsize = -b->bh.bb.bsize;
7820b57cec5SDimitry Andric   }
7830b57cec5SDimitry Andric 
7840b57cec5SDimitry Andric   /* insert buffer back onto a new freelist */
7850b57cec5SDimitry Andric   __kmp_bget_insert_into_freelist(thr, b);
7860b57cec5SDimitry Andric 
7870b57cec5SDimitry Andric   /* Now we look at the next buffer in memory, located by advancing from
7880b57cec5SDimitry Andric      the  start  of  this  buffer  by its size, to see if that buffer is
7890b57cec5SDimitry Andric      free.  If it is, we combine  this  buffer  with  the  next  one  in
7900b57cec5SDimitry Andric      memory, dechaining the second buffer from the free list. */
7910b57cec5SDimitry Andric   bn = BFH(((char *)b) + b->bh.bb.bsize);
7920b57cec5SDimitry Andric   if (bn->bh.bb.bsize > 0) {
7930b57cec5SDimitry Andric 
7940b57cec5SDimitry Andric     /* The buffer is free.  Remove it from the free list and add
7950b57cec5SDimitry Andric        its size to that of our buffer. */
7960b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(BH((char *)bn + bn->bh.bb.bsize)->bb.prevfree ==
7970b57cec5SDimitry Andric                      bn->bh.bb.bsize);
7980b57cec5SDimitry Andric 
7990b57cec5SDimitry Andric     __kmp_bget_remove_from_freelist(bn);
8000b57cec5SDimitry Andric 
8010b57cec5SDimitry Andric     b->bh.bb.bsize += bn->bh.bb.bsize;
8020b57cec5SDimitry Andric 
8030b57cec5SDimitry Andric     /* unlink the buffer from the old freelist, and reinsert it into the new
8040b57cec5SDimitry Andric      * freelist */
8050b57cec5SDimitry Andric     __kmp_bget_remove_from_freelist(b);
8060b57cec5SDimitry Andric     __kmp_bget_insert_into_freelist(thr, b);
8070b57cec5SDimitry Andric 
8080b57cec5SDimitry Andric     /* Finally,  advance  to   the  buffer  that   follows  the  newly
8090b57cec5SDimitry Andric        consolidated free block.  We must set its  backpointer  to  the
8100b57cec5SDimitry Andric        head  of  the  consolidated free block.  We know the next block
8110b57cec5SDimitry Andric        must be an allocated block because the process of recombination
8120b57cec5SDimitry Andric        guarantees  that  two  free  blocks will never be contiguous in
8130b57cec5SDimitry Andric        memory.  */
8140b57cec5SDimitry Andric     bn = BFH(((char *)b) + b->bh.bb.bsize);
8150b57cec5SDimitry Andric   }
8160b57cec5SDimitry Andric #ifdef FreeWipe
8170b57cec5SDimitry Andric   (void)memset(((char *)b) + sizeof(bfhead_t), 0x55,
8180b57cec5SDimitry Andric                (size_t)(b->bh.bb.bsize - sizeof(bfhead_t)));
8190b57cec5SDimitry Andric #endif
8200b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(bn->bh.bb.bsize < 0);
8210b57cec5SDimitry Andric 
8220b57cec5SDimitry Andric   /* The next buffer is allocated.  Set the backpointer in it  to  point
8230b57cec5SDimitry Andric      to this buffer; the previous free buffer in memory. */
8240b57cec5SDimitry Andric 
8250b57cec5SDimitry Andric   bn->bh.bb.prevfree = b->bh.bb.bsize;
8260b57cec5SDimitry Andric 
8270b57cec5SDimitry Andric   /*  If  a  block-release function is defined, and this free buffer
8280b57cec5SDimitry Andric       constitutes the entire block, release it.  Note that  pool_len
8290b57cec5SDimitry Andric       is  defined  in  such a way that the test will fail unless all
8300b57cec5SDimitry Andric       pool blocks are the same size.  */
8310b57cec5SDimitry Andric   if (thr->relfcn != 0 &&
8320b57cec5SDimitry Andric       b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) {
8330b57cec5SDimitry Andric #if BufStats
8340b57cec5SDimitry Andric     if (thr->numpblk !=
8350b57cec5SDimitry Andric         1) { /* Do not release the last buffer until finalization time */
8360b57cec5SDimitry Andric #endif
8370b57cec5SDimitry Andric 
8380b57cec5SDimitry Andric       KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
8390b57cec5SDimitry Andric       KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
8400b57cec5SDimitry Andric       KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree ==
8410b57cec5SDimitry Andric                        b->bh.bb.bsize);
8420b57cec5SDimitry Andric 
8430b57cec5SDimitry Andric       /*  Unlink the buffer from the free list  */
8440b57cec5SDimitry Andric       __kmp_bget_remove_from_freelist(b);
8450b57cec5SDimitry Andric 
8460b57cec5SDimitry Andric       KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b));
8470b57cec5SDimitry Andric 
8480b57cec5SDimitry Andric       (*thr->relfcn)(b);
8490b57cec5SDimitry Andric #if BufStats
8500b57cec5SDimitry Andric       thr->numprel++; /* Nr of expansion block releases */
8510b57cec5SDimitry Andric       thr->numpblk--; /* Total number of blocks */
8520b57cec5SDimitry Andric       KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
8530b57cec5SDimitry Andric 
8540b57cec5SDimitry Andric       // avoid leaving stale last_pool pointer around if it is being dealloced
8550b57cec5SDimitry Andric       if (thr->last_pool == b)
8560b57cec5SDimitry Andric         thr->last_pool = 0;
8570b57cec5SDimitry Andric     } else {
8580b57cec5SDimitry Andric       thr->last_pool = b;
8590b57cec5SDimitry Andric     }
8600b57cec5SDimitry Andric #endif /* BufStats */
8610b57cec5SDimitry Andric   }
8620b57cec5SDimitry Andric }
8630b57cec5SDimitry Andric 
8640b57cec5SDimitry Andric /*  BECTL  --  Establish automatic pool expansion control  */
8650b57cec5SDimitry Andric static void bectl(kmp_info_t *th, bget_compact_t compact,
8660b57cec5SDimitry Andric                   bget_acquire_t acquire, bget_release_t release,
8670b57cec5SDimitry Andric                   bufsize pool_incr) {
8680b57cec5SDimitry Andric   thr_data_t *thr = get_thr_data(th);
8690b57cec5SDimitry Andric 
8700b57cec5SDimitry Andric   thr->compfcn = compact;
8710b57cec5SDimitry Andric   thr->acqfcn = acquire;
8720b57cec5SDimitry Andric   thr->relfcn = release;
8730b57cec5SDimitry Andric   thr->exp_incr = pool_incr;
8740b57cec5SDimitry Andric }
8750b57cec5SDimitry Andric 
8760b57cec5SDimitry Andric /*  BPOOL  --  Add a region of memory to the buffer pool.  */
8770b57cec5SDimitry Andric static void bpool(kmp_info_t *th, void *buf, bufsize len) {
8780b57cec5SDimitry Andric   /*    int bin = 0; */
8790b57cec5SDimitry Andric   thr_data_t *thr = get_thr_data(th);
8800b57cec5SDimitry Andric   bfhead_t *b = BFH(buf);
8810b57cec5SDimitry Andric   bhead_t *bn;
8820b57cec5SDimitry Andric 
8830b57cec5SDimitry Andric   __kmp_bget_dequeue(th); /* Release any queued buffers */
8840b57cec5SDimitry Andric 
8850b57cec5SDimitry Andric #ifdef SizeQuant
8860b57cec5SDimitry Andric   len &= ~(SizeQuant - 1);
8870b57cec5SDimitry Andric #endif
8880b57cec5SDimitry Andric   if (thr->pool_len == 0) {
8890b57cec5SDimitry Andric     thr->pool_len = len;
8900b57cec5SDimitry Andric   } else if (len != thr->pool_len) {
8910b57cec5SDimitry Andric     thr->pool_len = -1;
8920b57cec5SDimitry Andric   }
8930b57cec5SDimitry Andric #if BufStats
8940b57cec5SDimitry Andric   thr->numpget++; /* Number of block acquisitions */
8950b57cec5SDimitry Andric   thr->numpblk++; /* Number of blocks total */
8960b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
8970b57cec5SDimitry Andric #endif /* BufStats */
8980b57cec5SDimitry Andric 
8990b57cec5SDimitry Andric   /* Since the block is initially occupied by a single free  buffer,
9000b57cec5SDimitry Andric      it  had  better  not  be  (much) larger than the largest buffer
9010b57cec5SDimitry Andric      whose size we can store in bhead.bb.bsize. */
9020b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(len - sizeof(bhead_t) <= -((bufsize)ESent + 1));
9030b57cec5SDimitry Andric 
9040b57cec5SDimitry Andric   /* Clear  the  backpointer at  the start of the block to indicate that
9050b57cec5SDimitry Andric      there  is  no  free  block  prior  to  this   one.    That   blocks
9060b57cec5SDimitry Andric      recombination when the first block in memory is released. */
9070b57cec5SDimitry Andric   b->bh.bb.prevfree = 0;
9080b57cec5SDimitry Andric 
9090b57cec5SDimitry Andric   /* Create a dummy allocated buffer at the end of the pool.  This dummy
9100b57cec5SDimitry Andric      buffer is seen when a buffer at the end of the pool is released and
9110b57cec5SDimitry Andric      blocks  recombination  of  the last buffer with the dummy buffer at
9120b57cec5SDimitry Andric      the end.  The length in the dummy buffer  is  set  to  the  largest
9130b57cec5SDimitry Andric      negative  number  to  denote  the  end  of  the pool for diagnostic
9140b57cec5SDimitry Andric      routines (this specific value is  not  counted  on  by  the  actual
9150b57cec5SDimitry Andric      allocation and release functions). */
9160b57cec5SDimitry Andric   len -= sizeof(bhead_t);
9170b57cec5SDimitry Andric   b->bh.bb.bsize = (bufsize)len;
9180b57cec5SDimitry Andric   /* Set the owner of this buffer */
9190b57cec5SDimitry Andric   TCW_PTR(b->bh.bb.bthr,
9200b57cec5SDimitry Andric           (kmp_info_t *)((kmp_uintptr_t)th |
9210b57cec5SDimitry Andric                          1)); // mark the buffer as allocated address
9220b57cec5SDimitry Andric 
9230b57cec5SDimitry Andric   /* Chain the new block to the free list. */
9240b57cec5SDimitry Andric   __kmp_bget_insert_into_freelist(thr, b);
9250b57cec5SDimitry Andric 
9260b57cec5SDimitry Andric #ifdef FreeWipe
9270b57cec5SDimitry Andric   (void)memset(((char *)b) + sizeof(bfhead_t), 0x55,
9280b57cec5SDimitry Andric                (size_t)(len - sizeof(bfhead_t)));
9290b57cec5SDimitry Andric #endif
9300b57cec5SDimitry Andric   bn = BH(((char *)b) + len);
9310b57cec5SDimitry Andric   bn->bb.prevfree = (bufsize)len;
9320b57cec5SDimitry Andric   /* Definition of ESent assumes two's complement! */
9330b57cec5SDimitry Andric   KMP_DEBUG_ASSERT((~0) == -1 && (bn != 0));
9340b57cec5SDimitry Andric 
9350b57cec5SDimitry Andric   bn->bb.bsize = ESent;
9360b57cec5SDimitry Andric }
9370b57cec5SDimitry Andric 
9380b57cec5SDimitry Andric /*  BFREED  --  Dump the free lists for this thread. */
9390b57cec5SDimitry Andric static void bfreed(kmp_info_t *th) {
9400b57cec5SDimitry Andric   int bin = 0, count = 0;
9410b57cec5SDimitry Andric   int gtid = __kmp_gtid_from_thread(th);
9420b57cec5SDimitry Andric   thr_data_t *thr = get_thr_data(th);
9430b57cec5SDimitry Andric 
9440b57cec5SDimitry Andric #if BufStats
9450b57cec5SDimitry Andric   __kmp_printf_no_lock("__kmp_printpool: T#%d total=%" KMP_UINT64_SPEC
9460b57cec5SDimitry Andric                        " get=%" KMP_INT64_SPEC " rel=%" KMP_INT64_SPEC
9470b57cec5SDimitry Andric                        " pblk=%" KMP_INT64_SPEC " pget=%" KMP_INT64_SPEC
9480b57cec5SDimitry Andric                        " prel=%" KMP_INT64_SPEC " dget=%" KMP_INT64_SPEC
9490b57cec5SDimitry Andric                        " drel=%" KMP_INT64_SPEC "\n",
9500b57cec5SDimitry Andric                        gtid, (kmp_uint64)thr->totalloc, (kmp_int64)thr->numget,
9510b57cec5SDimitry Andric                        (kmp_int64)thr->numrel, (kmp_int64)thr->numpblk,
9520b57cec5SDimitry Andric                        (kmp_int64)thr->numpget, (kmp_int64)thr->numprel,
9530b57cec5SDimitry Andric                        (kmp_int64)thr->numdget, (kmp_int64)thr->numdrel);
9540b57cec5SDimitry Andric #endif
9550b57cec5SDimitry Andric 
9560b57cec5SDimitry Andric   for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
9570b57cec5SDimitry Andric     bfhead_t *b;
9580b57cec5SDimitry Andric 
9590b57cec5SDimitry Andric     for (b = thr->freelist[bin].ql.flink; b != &thr->freelist[bin];
9600b57cec5SDimitry Andric          b = b->ql.flink) {
9610b57cec5SDimitry Andric       bufsize bs = b->bh.bb.bsize;
9620b57cec5SDimitry Andric 
9630b57cec5SDimitry Andric       KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
9640b57cec5SDimitry Andric       KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
9650b57cec5SDimitry Andric       KMP_DEBUG_ASSERT(bs > 0);
9660b57cec5SDimitry Andric 
9670b57cec5SDimitry Andric       count += 1;
9680b57cec5SDimitry Andric 
9690b57cec5SDimitry Andric       __kmp_printf_no_lock(
9700b57cec5SDimitry Andric           "__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid, b,
9710b57cec5SDimitry Andric           (long)bs);
9720b57cec5SDimitry Andric #ifdef FreeWipe
9730b57cec5SDimitry Andric       {
9740b57cec5SDimitry Andric         char *lerr = ((char *)b) + sizeof(bfhead_t);
9750b57cec5SDimitry Andric         if ((bs > sizeof(bfhead_t)) &&
9760b57cec5SDimitry Andric             ((*lerr != 0x55) ||
9770b57cec5SDimitry Andric              (memcmp(lerr, lerr + 1, (size_t)(bs - (sizeof(bfhead_t) + 1))) !=
9780b57cec5SDimitry Andric               0))) {
9790b57cec5SDimitry Andric           __kmp_printf_no_lock("__kmp_printpool: T#%d     (Contents of above "
9800b57cec5SDimitry Andric                                "free block have been overstored.)\n",
9810b57cec5SDimitry Andric                                gtid);
9820b57cec5SDimitry Andric         }
9830b57cec5SDimitry Andric       }
9840b57cec5SDimitry Andric #endif
9850b57cec5SDimitry Andric     }
9860b57cec5SDimitry Andric   }
9870b57cec5SDimitry Andric 
9880b57cec5SDimitry Andric   if (count == 0)
9890b57cec5SDimitry Andric     __kmp_printf_no_lock("__kmp_printpool: T#%d No free blocks\n", gtid);
9900b57cec5SDimitry Andric }
9910b57cec5SDimitry Andric 
9920b57cec5SDimitry Andric void __kmp_initialize_bget(kmp_info_t *th) {
9930b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(SizeQuant >= sizeof(void *) && (th != 0));
9940b57cec5SDimitry Andric 
9950b57cec5SDimitry Andric   set_thr_data(th);
9960b57cec5SDimitry Andric 
9970b57cec5SDimitry Andric   bectl(th, (bget_compact_t)0, (bget_acquire_t)malloc, (bget_release_t)free,
9980b57cec5SDimitry Andric         (bufsize)__kmp_malloc_pool_incr);
9990b57cec5SDimitry Andric }
10000b57cec5SDimitry Andric 
10010b57cec5SDimitry Andric void __kmp_finalize_bget(kmp_info_t *th) {
10020b57cec5SDimitry Andric   thr_data_t *thr;
10030b57cec5SDimitry Andric   bfhead_t *b;
10040b57cec5SDimitry Andric 
10050b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(th != 0);
10060b57cec5SDimitry Andric 
10070b57cec5SDimitry Andric #if BufStats
10080b57cec5SDimitry Andric   thr = (thr_data_t *)th->th.th_local.bget_data;
10090b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(thr != NULL);
10100b57cec5SDimitry Andric   b = thr->last_pool;
10110b57cec5SDimitry Andric 
10120b57cec5SDimitry Andric   /*  If a block-release function is defined, and this free buffer constitutes
10130b57cec5SDimitry Andric       the entire block, release it. Note that pool_len is defined in such a way
10140b57cec5SDimitry Andric       that the test will fail unless all pool blocks are the same size.  */
10150b57cec5SDimitry Andric 
10160b57cec5SDimitry Andric   // Deallocate the last pool if one exists because we no longer do it in brel()
10170b57cec5SDimitry Andric   if (thr->relfcn != 0 && b != 0 && thr->numpblk != 0 &&
10180b57cec5SDimitry Andric       b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) {
10190b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
10200b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
10210b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree ==
10220b57cec5SDimitry Andric                      b->bh.bb.bsize);
10230b57cec5SDimitry Andric 
10240b57cec5SDimitry Andric     /*  Unlink the buffer from the free list  */
10250b57cec5SDimitry Andric     __kmp_bget_remove_from_freelist(b);
10260b57cec5SDimitry Andric 
10270b57cec5SDimitry Andric     KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b));
10280b57cec5SDimitry Andric 
10290b57cec5SDimitry Andric     (*thr->relfcn)(b);
10300b57cec5SDimitry Andric     thr->numprel++; /* Nr of expansion block releases */
10310b57cec5SDimitry Andric     thr->numpblk--; /* Total number of blocks */
10320b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
10330b57cec5SDimitry Andric   }
10340b57cec5SDimitry Andric #endif /* BufStats */
10350b57cec5SDimitry Andric 
10360b57cec5SDimitry Andric   /* Deallocate bget_data */
10370b57cec5SDimitry Andric   if (th->th.th_local.bget_data != NULL) {
10380b57cec5SDimitry Andric     __kmp_free(th->th.th_local.bget_data);
10390b57cec5SDimitry Andric     th->th.th_local.bget_data = NULL;
10400b57cec5SDimitry Andric   }
10410b57cec5SDimitry Andric }
10420b57cec5SDimitry Andric 
10430b57cec5SDimitry Andric void kmpc_set_poolsize(size_t size) {
10440b57cec5SDimitry Andric   bectl(__kmp_get_thread(), (bget_compact_t)0, (bget_acquire_t)malloc,
10450b57cec5SDimitry Andric         (bget_release_t)free, (bufsize)size);
10460b57cec5SDimitry Andric }
10470b57cec5SDimitry Andric 
10480b57cec5SDimitry Andric size_t kmpc_get_poolsize(void) {
10490b57cec5SDimitry Andric   thr_data_t *p;
10500b57cec5SDimitry Andric 
10510b57cec5SDimitry Andric   p = get_thr_data(__kmp_get_thread());
10520b57cec5SDimitry Andric 
10530b57cec5SDimitry Andric   return p->exp_incr;
10540b57cec5SDimitry Andric }
10550b57cec5SDimitry Andric 
10560b57cec5SDimitry Andric void kmpc_set_poolmode(int mode) {
10570b57cec5SDimitry Andric   thr_data_t *p;
10580b57cec5SDimitry Andric 
10590b57cec5SDimitry Andric   if (mode == bget_mode_fifo || mode == bget_mode_lifo ||
10600b57cec5SDimitry Andric       mode == bget_mode_best) {
10610b57cec5SDimitry Andric     p = get_thr_data(__kmp_get_thread());
10620b57cec5SDimitry Andric     p->mode = (bget_mode_t)mode;
10630b57cec5SDimitry Andric   }
10640b57cec5SDimitry Andric }
10650b57cec5SDimitry Andric 
10660b57cec5SDimitry Andric int kmpc_get_poolmode(void) {
10670b57cec5SDimitry Andric   thr_data_t *p;
10680b57cec5SDimitry Andric 
10690b57cec5SDimitry Andric   p = get_thr_data(__kmp_get_thread());
10700b57cec5SDimitry Andric 
10710b57cec5SDimitry Andric   return p->mode;
10720b57cec5SDimitry Andric }
10730b57cec5SDimitry Andric 
10740b57cec5SDimitry Andric void kmpc_get_poolstat(size_t *maxmem, size_t *allmem) {
10750b57cec5SDimitry Andric   kmp_info_t *th = __kmp_get_thread();
10760b57cec5SDimitry Andric   bufsize a, b;
10770b57cec5SDimitry Andric 
10780b57cec5SDimitry Andric   __kmp_bget_dequeue(th); /* Release any queued buffers */
10790b57cec5SDimitry Andric 
10800b57cec5SDimitry Andric   bcheck(th, &a, &b);
10810b57cec5SDimitry Andric 
10820b57cec5SDimitry Andric   *maxmem = a;
10830b57cec5SDimitry Andric   *allmem = b;
10840b57cec5SDimitry Andric }
10850b57cec5SDimitry Andric 
10860b57cec5SDimitry Andric void kmpc_poolprint(void) {
10870b57cec5SDimitry Andric   kmp_info_t *th = __kmp_get_thread();
10880b57cec5SDimitry Andric 
10890b57cec5SDimitry Andric   __kmp_bget_dequeue(th); /* Release any queued buffers */
10900b57cec5SDimitry Andric 
10910b57cec5SDimitry Andric   bfreed(th);
10920b57cec5SDimitry Andric }
10930b57cec5SDimitry Andric 
10940b57cec5SDimitry Andric #endif // #if KMP_USE_BGET
10950b57cec5SDimitry Andric 
10960b57cec5SDimitry Andric void *kmpc_malloc(size_t size) {
10970b57cec5SDimitry Andric   void *ptr;
10980b57cec5SDimitry Andric   ptr = bget(__kmp_entry_thread(), (bufsize)(size + sizeof(ptr)));
10990b57cec5SDimitry Andric   if (ptr != NULL) {
11000b57cec5SDimitry Andric     // save allocated pointer just before one returned to user
11010b57cec5SDimitry Andric     *(void **)ptr = ptr;
11020b57cec5SDimitry Andric     ptr = (void **)ptr + 1;
11030b57cec5SDimitry Andric   }
11040b57cec5SDimitry Andric   return ptr;
11050b57cec5SDimitry Andric }
11060b57cec5SDimitry Andric 
11070b57cec5SDimitry Andric #define IS_POWER_OF_TWO(n) (((n) & ((n)-1)) == 0)
11080b57cec5SDimitry Andric 
11090b57cec5SDimitry Andric void *kmpc_aligned_malloc(size_t size, size_t alignment) {
11100b57cec5SDimitry Andric   void *ptr;
11110b57cec5SDimitry Andric   void *ptr_allocated;
11120b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(alignment < 32 * 1024); // Alignment should not be too big
11130b57cec5SDimitry Andric   if (!IS_POWER_OF_TWO(alignment)) {
11140b57cec5SDimitry Andric     // AC: do we need to issue a warning here?
11150b57cec5SDimitry Andric     errno = EINVAL;
11160b57cec5SDimitry Andric     return NULL;
11170b57cec5SDimitry Andric   }
11180b57cec5SDimitry Andric   size = size + sizeof(void *) + alignment;
11190b57cec5SDimitry Andric   ptr_allocated = bget(__kmp_entry_thread(), (bufsize)size);
11200b57cec5SDimitry Andric   if (ptr_allocated != NULL) {
11210b57cec5SDimitry Andric     // save allocated pointer just before one returned to user
11220b57cec5SDimitry Andric     ptr = (void *)(((kmp_uintptr_t)ptr_allocated + sizeof(void *) + alignment) &
11230b57cec5SDimitry Andric                    ~(alignment - 1));
11240b57cec5SDimitry Andric     *((void **)ptr - 1) = ptr_allocated;
11250b57cec5SDimitry Andric   } else {
11260b57cec5SDimitry Andric     ptr = NULL;
11270b57cec5SDimitry Andric   }
11280b57cec5SDimitry Andric   return ptr;
11290b57cec5SDimitry Andric }
11300b57cec5SDimitry Andric 
11310b57cec5SDimitry Andric void *kmpc_calloc(size_t nelem, size_t elsize) {
11320b57cec5SDimitry Andric   void *ptr;
11330b57cec5SDimitry Andric   ptr = bgetz(__kmp_entry_thread(), (bufsize)(nelem * elsize + sizeof(ptr)));
11340b57cec5SDimitry Andric   if (ptr != NULL) {
11350b57cec5SDimitry Andric     // save allocated pointer just before one returned to user
11360b57cec5SDimitry Andric     *(void **)ptr = ptr;
11370b57cec5SDimitry Andric     ptr = (void **)ptr + 1;
11380b57cec5SDimitry Andric   }
11390b57cec5SDimitry Andric   return ptr;
11400b57cec5SDimitry Andric }
11410b57cec5SDimitry Andric 
11420b57cec5SDimitry Andric void *kmpc_realloc(void *ptr, size_t size) {
11430b57cec5SDimitry Andric   void *result = NULL;
11440b57cec5SDimitry Andric   if (ptr == NULL) {
11450b57cec5SDimitry Andric     // If pointer is NULL, realloc behaves like malloc.
11460b57cec5SDimitry Andric     result = bget(__kmp_entry_thread(), (bufsize)(size + sizeof(ptr)));
11470b57cec5SDimitry Andric     // save allocated pointer just before one returned to user
11480b57cec5SDimitry Andric     if (result != NULL) {
11490b57cec5SDimitry Andric       *(void **)result = result;
11500b57cec5SDimitry Andric       result = (void **)result + 1;
11510b57cec5SDimitry Andric     }
11520b57cec5SDimitry Andric   } else if (size == 0) {
11530b57cec5SDimitry Andric     // If size is 0, realloc behaves like free.
11540b57cec5SDimitry Andric     // The thread must be registered by the call to kmpc_malloc() or
11550b57cec5SDimitry Andric     // kmpc_calloc() before.
11560b57cec5SDimitry Andric     // So it should be safe to call __kmp_get_thread(), not
11570b57cec5SDimitry Andric     // __kmp_entry_thread().
11580b57cec5SDimitry Andric     KMP_ASSERT(*((void **)ptr - 1));
11590b57cec5SDimitry Andric     brel(__kmp_get_thread(), *((void **)ptr - 1));
11600b57cec5SDimitry Andric   } else {
11610b57cec5SDimitry Andric     result = bgetr(__kmp_entry_thread(), *((void **)ptr - 1),
11620b57cec5SDimitry Andric                    (bufsize)(size + sizeof(ptr)));
11630b57cec5SDimitry Andric     if (result != NULL) {
11640b57cec5SDimitry Andric       *(void **)result = result;
11650b57cec5SDimitry Andric       result = (void **)result + 1;
11660b57cec5SDimitry Andric     }
11670b57cec5SDimitry Andric   }
11680b57cec5SDimitry Andric   return result;
11690b57cec5SDimitry Andric }
11700b57cec5SDimitry Andric 
11710b57cec5SDimitry Andric // NOTE: the library must have already been initialized by a previous allocate
11720b57cec5SDimitry Andric void kmpc_free(void *ptr) {
11730b57cec5SDimitry Andric   if (!__kmp_init_serial) {
11740b57cec5SDimitry Andric     return;
11750b57cec5SDimitry Andric   }
11760b57cec5SDimitry Andric   if (ptr != NULL) {
11770b57cec5SDimitry Andric     kmp_info_t *th = __kmp_get_thread();
11780b57cec5SDimitry Andric     __kmp_bget_dequeue(th); /* Release any queued buffers */
11790b57cec5SDimitry Andric     // extract allocated pointer and free it
11800b57cec5SDimitry Andric     KMP_ASSERT(*((void **)ptr - 1));
11810b57cec5SDimitry Andric     brel(th, *((void **)ptr - 1));
11820b57cec5SDimitry Andric   }
11830b57cec5SDimitry Andric }
11840b57cec5SDimitry Andric 
11850b57cec5SDimitry Andric void *___kmp_thread_malloc(kmp_info_t *th, size_t size KMP_SRC_LOC_DECL) {
11860b57cec5SDimitry Andric   void *ptr;
11870b57cec5SDimitry Andric   KE_TRACE(30, ("-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n", th,
11880b57cec5SDimitry Andric                 (int)size KMP_SRC_LOC_PARM));
11890b57cec5SDimitry Andric   ptr = bget(th, (bufsize)size);
11900b57cec5SDimitry Andric   KE_TRACE(30, ("<- __kmp_thread_malloc() returns %p\n", ptr));
11910b57cec5SDimitry Andric   return ptr;
11920b57cec5SDimitry Andric }
11930b57cec5SDimitry Andric 
11940b57cec5SDimitry Andric void *___kmp_thread_calloc(kmp_info_t *th, size_t nelem,
11950b57cec5SDimitry Andric                            size_t elsize KMP_SRC_LOC_DECL) {
11960b57cec5SDimitry Andric   void *ptr;
11970b57cec5SDimitry Andric   KE_TRACE(30, ("-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n", th,
11980b57cec5SDimitry Andric                 (int)nelem, (int)elsize KMP_SRC_LOC_PARM));
11990b57cec5SDimitry Andric   ptr = bgetz(th, (bufsize)(nelem * elsize));
12000b57cec5SDimitry Andric   KE_TRACE(30, ("<- __kmp_thread_calloc() returns %p\n", ptr));
12010b57cec5SDimitry Andric   return ptr;
12020b57cec5SDimitry Andric }
12030b57cec5SDimitry Andric 
12040b57cec5SDimitry Andric void *___kmp_thread_realloc(kmp_info_t *th, void *ptr,
12050b57cec5SDimitry Andric                             size_t size KMP_SRC_LOC_DECL) {
12060b57cec5SDimitry Andric   KE_TRACE(30, ("-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n", th,
12070b57cec5SDimitry Andric                 ptr, (int)size KMP_SRC_LOC_PARM));
12080b57cec5SDimitry Andric   ptr = bgetr(th, ptr, (bufsize)size);
12090b57cec5SDimitry Andric   KE_TRACE(30, ("<- __kmp_thread_realloc() returns %p\n", ptr));
12100b57cec5SDimitry Andric   return ptr;
12110b57cec5SDimitry Andric }
12120b57cec5SDimitry Andric 
12130b57cec5SDimitry Andric void ___kmp_thread_free(kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL) {
12140b57cec5SDimitry Andric   KE_TRACE(30, ("-> __kmp_thread_free( %p, %p ) called from %s:%d\n", th,
12150b57cec5SDimitry Andric                 ptr KMP_SRC_LOC_PARM));
12160b57cec5SDimitry Andric   if (ptr != NULL) {
12170b57cec5SDimitry Andric     __kmp_bget_dequeue(th); /* Release any queued buffers */
12180b57cec5SDimitry Andric     brel(th, ptr);
12190b57cec5SDimitry Andric   }
12200b57cec5SDimitry Andric   KE_TRACE(30, ("<- __kmp_thread_free()\n"));
12210b57cec5SDimitry Andric }
12220b57cec5SDimitry Andric 
12230b57cec5SDimitry Andric /* OMP 5.0 Memory Management support */
12240b57cec5SDimitry Andric static const char *kmp_mk_lib_name;
12250b57cec5SDimitry Andric static void *h_memkind;
12260b57cec5SDimitry Andric /* memkind experimental API: */
12270b57cec5SDimitry Andric // memkind_alloc
12280b57cec5SDimitry Andric static void *(*kmp_mk_alloc)(void *k, size_t sz);
12290b57cec5SDimitry Andric // memkind_free
12300b57cec5SDimitry Andric static void (*kmp_mk_free)(void *kind, void *ptr);
12310b57cec5SDimitry Andric // memkind_check_available
12320b57cec5SDimitry Andric static int (*kmp_mk_check)(void *kind);
12330b57cec5SDimitry Andric // kinds we are going to use
12340b57cec5SDimitry Andric static void **mk_default;
12350b57cec5SDimitry Andric static void **mk_interleave;
12360b57cec5SDimitry Andric static void **mk_hbw;
12370b57cec5SDimitry Andric static void **mk_hbw_interleave;
12380b57cec5SDimitry Andric static void **mk_hbw_preferred;
12390b57cec5SDimitry Andric static void **mk_hugetlb;
12400b57cec5SDimitry Andric static void **mk_hbw_hugetlb;
12410b57cec5SDimitry Andric static void **mk_hbw_preferred_hugetlb;
12420b57cec5SDimitry Andric 
12430b57cec5SDimitry Andric #if KMP_OS_UNIX && KMP_DYNAMIC_LIB
12440b57cec5SDimitry Andric static inline void chk_kind(void ***pkind) {
12450b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(pkind);
12460b57cec5SDimitry Andric   if (*pkind) // symbol found
12470b57cec5SDimitry Andric     if (kmp_mk_check(**pkind)) // kind not available or error
12480b57cec5SDimitry Andric       *pkind = NULL;
12490b57cec5SDimitry Andric }
12500b57cec5SDimitry Andric #endif
12510b57cec5SDimitry Andric 
12520b57cec5SDimitry Andric void __kmp_init_memkind() {
12530b57cec5SDimitry Andric // as of 2018-07-31 memkind does not support Windows*, exclude it for now
12540b57cec5SDimitry Andric #if KMP_OS_UNIX && KMP_DYNAMIC_LIB
12550b57cec5SDimitry Andric   // use of statically linked memkind is problematic, as it depends on libnuma
12560b57cec5SDimitry Andric   kmp_mk_lib_name = "libmemkind.so";
12570b57cec5SDimitry Andric   h_memkind = dlopen(kmp_mk_lib_name, RTLD_LAZY);
12580b57cec5SDimitry Andric   if (h_memkind) {
12590b57cec5SDimitry Andric     kmp_mk_check = (int (*)(void *))dlsym(h_memkind, "memkind_check_available");
12600b57cec5SDimitry Andric     kmp_mk_alloc =
12610b57cec5SDimitry Andric         (void *(*)(void *, size_t))dlsym(h_memkind, "memkind_malloc");
12620b57cec5SDimitry Andric     kmp_mk_free = (void (*)(void *, void *))dlsym(h_memkind, "memkind_free");
12630b57cec5SDimitry Andric     mk_default = (void **)dlsym(h_memkind, "MEMKIND_DEFAULT");
12640b57cec5SDimitry Andric     if (kmp_mk_check && kmp_mk_alloc && kmp_mk_free && mk_default &&
12650b57cec5SDimitry Andric         !kmp_mk_check(*mk_default)) {
12660b57cec5SDimitry Andric       __kmp_memkind_available = 1;
12670b57cec5SDimitry Andric       mk_interleave = (void **)dlsym(h_memkind, "MEMKIND_INTERLEAVE");
12680b57cec5SDimitry Andric       chk_kind(&mk_interleave);
12690b57cec5SDimitry Andric       mk_hbw = (void **)dlsym(h_memkind, "MEMKIND_HBW");
12700b57cec5SDimitry Andric       chk_kind(&mk_hbw);
12710b57cec5SDimitry Andric       mk_hbw_interleave = (void **)dlsym(h_memkind, "MEMKIND_HBW_INTERLEAVE");
12720b57cec5SDimitry Andric       chk_kind(&mk_hbw_interleave);
12730b57cec5SDimitry Andric       mk_hbw_preferred = (void **)dlsym(h_memkind, "MEMKIND_HBW_PREFERRED");
12740b57cec5SDimitry Andric       chk_kind(&mk_hbw_preferred);
12750b57cec5SDimitry Andric       mk_hugetlb = (void **)dlsym(h_memkind, "MEMKIND_HUGETLB");
12760b57cec5SDimitry Andric       chk_kind(&mk_hugetlb);
12770b57cec5SDimitry Andric       mk_hbw_hugetlb = (void **)dlsym(h_memkind, "MEMKIND_HBW_HUGETLB");
12780b57cec5SDimitry Andric       chk_kind(&mk_hbw_hugetlb);
12790b57cec5SDimitry Andric       mk_hbw_preferred_hugetlb =
12800b57cec5SDimitry Andric           (void **)dlsym(h_memkind, "MEMKIND_HBW_PREFERRED_HUGETLB");
12810b57cec5SDimitry Andric       chk_kind(&mk_hbw_preferred_hugetlb);
12820b57cec5SDimitry Andric       KE_TRACE(25, ("__kmp_init_memkind: memkind library initialized\n"));
12830b57cec5SDimitry Andric       return; // success
12840b57cec5SDimitry Andric     }
12850b57cec5SDimitry Andric     dlclose(h_memkind); // failure
12860b57cec5SDimitry Andric     h_memkind = NULL;
12870b57cec5SDimitry Andric   }
12880b57cec5SDimitry Andric   kmp_mk_check = NULL;
12890b57cec5SDimitry Andric   kmp_mk_alloc = NULL;
12900b57cec5SDimitry Andric   kmp_mk_free = NULL;
12910b57cec5SDimitry Andric   mk_default = NULL;
12920b57cec5SDimitry Andric   mk_interleave = NULL;
12930b57cec5SDimitry Andric   mk_hbw = NULL;
12940b57cec5SDimitry Andric   mk_hbw_interleave = NULL;
12950b57cec5SDimitry Andric   mk_hbw_preferred = NULL;
12960b57cec5SDimitry Andric   mk_hugetlb = NULL;
12970b57cec5SDimitry Andric   mk_hbw_hugetlb = NULL;
12980b57cec5SDimitry Andric   mk_hbw_preferred_hugetlb = NULL;
12990b57cec5SDimitry Andric #else
13000b57cec5SDimitry Andric   kmp_mk_lib_name = "";
13010b57cec5SDimitry Andric   h_memkind = NULL;
13020b57cec5SDimitry Andric   kmp_mk_check = NULL;
13030b57cec5SDimitry Andric   kmp_mk_alloc = NULL;
13040b57cec5SDimitry Andric   kmp_mk_free = NULL;
13050b57cec5SDimitry Andric   mk_default = NULL;
13060b57cec5SDimitry Andric   mk_interleave = NULL;
13070b57cec5SDimitry Andric   mk_hbw = NULL;
13080b57cec5SDimitry Andric   mk_hbw_interleave = NULL;
13090b57cec5SDimitry Andric   mk_hbw_preferred = NULL;
13100b57cec5SDimitry Andric   mk_hugetlb = NULL;
13110b57cec5SDimitry Andric   mk_hbw_hugetlb = NULL;
13120b57cec5SDimitry Andric   mk_hbw_preferred_hugetlb = NULL;
13130b57cec5SDimitry Andric #endif
13140b57cec5SDimitry Andric }
13150b57cec5SDimitry Andric 
13160b57cec5SDimitry Andric void __kmp_fini_memkind() {
13170b57cec5SDimitry Andric #if KMP_OS_UNIX && KMP_DYNAMIC_LIB
13180b57cec5SDimitry Andric   if (__kmp_memkind_available)
13190b57cec5SDimitry Andric     KE_TRACE(25, ("__kmp_fini_memkind: finalize memkind library\n"));
13200b57cec5SDimitry Andric   if (h_memkind) {
13210b57cec5SDimitry Andric     dlclose(h_memkind);
13220b57cec5SDimitry Andric     h_memkind = NULL;
13230b57cec5SDimitry Andric   }
13240b57cec5SDimitry Andric   kmp_mk_check = NULL;
13250b57cec5SDimitry Andric   kmp_mk_alloc = NULL;
13260b57cec5SDimitry Andric   kmp_mk_free = NULL;
13270b57cec5SDimitry Andric   mk_default = NULL;
13280b57cec5SDimitry Andric   mk_interleave = NULL;
13290b57cec5SDimitry Andric   mk_hbw = NULL;
13300b57cec5SDimitry Andric   mk_hbw_interleave = NULL;
13310b57cec5SDimitry Andric   mk_hbw_preferred = NULL;
13320b57cec5SDimitry Andric   mk_hugetlb = NULL;
13330b57cec5SDimitry Andric   mk_hbw_hugetlb = NULL;
13340b57cec5SDimitry Andric   mk_hbw_preferred_hugetlb = NULL;
13350b57cec5SDimitry Andric #endif
13360b57cec5SDimitry Andric }
13370b57cec5SDimitry Andric 
13380b57cec5SDimitry Andric omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t ms,
13390b57cec5SDimitry Andric                                              int ntraits,
13400b57cec5SDimitry Andric                                              omp_alloctrait_t traits[]) {
13410b57cec5SDimitry Andric   // OpenMP 5.0 only allows predefined memspaces
13420b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(ms == omp_default_mem_space || ms == omp_low_lat_mem_space ||
13430b57cec5SDimitry Andric                    ms == omp_large_cap_mem_space || ms == omp_const_mem_space ||
13440b57cec5SDimitry Andric                    ms == omp_high_bw_mem_space);
13450b57cec5SDimitry Andric   kmp_allocator_t *al;
13460b57cec5SDimitry Andric   int i;
13470b57cec5SDimitry Andric   al = (kmp_allocator_t *)__kmp_allocate(sizeof(kmp_allocator_t)); // zeroed
13480b57cec5SDimitry Andric   al->memspace = ms; // not used currently
13490b57cec5SDimitry Andric   for (i = 0; i < ntraits; ++i) {
13500b57cec5SDimitry Andric     switch (traits[i].key) {
13510b57cec5SDimitry Andric     case OMP_ATK_THREADMODEL:
13520b57cec5SDimitry Andric     case OMP_ATK_ACCESS:
13530b57cec5SDimitry Andric     case OMP_ATK_PINNED:
13540b57cec5SDimitry Andric       break;
13550b57cec5SDimitry Andric     case OMP_ATK_ALIGNMENT:
13560b57cec5SDimitry Andric       al->alignment = traits[i].value;
13570b57cec5SDimitry Andric       KMP_ASSERT(IS_POWER_OF_TWO(al->alignment));
13580b57cec5SDimitry Andric       break;
13590b57cec5SDimitry Andric     case OMP_ATK_POOL_SIZE:
13600b57cec5SDimitry Andric       al->pool_size = traits[i].value;
13610b57cec5SDimitry Andric       break;
13620b57cec5SDimitry Andric     case OMP_ATK_FALLBACK:
13630b57cec5SDimitry Andric       al->fb = (omp_alloctrait_value_t)traits[i].value;
13640b57cec5SDimitry Andric       KMP_DEBUG_ASSERT(
13650b57cec5SDimitry Andric           al->fb == OMP_ATV_DEFAULT_MEM_FB || al->fb == OMP_ATV_NULL_FB ||
13660b57cec5SDimitry Andric           al->fb == OMP_ATV_ABORT_FB || al->fb == OMP_ATV_ALLOCATOR_FB);
13670b57cec5SDimitry Andric       break;
13680b57cec5SDimitry Andric     case OMP_ATK_FB_DATA:
13690b57cec5SDimitry Andric       al->fb_data = RCAST(kmp_allocator_t *, traits[i].value);
13700b57cec5SDimitry Andric       break;
13710b57cec5SDimitry Andric     case OMP_ATK_PARTITION:
13720b57cec5SDimitry Andric       al->memkind = RCAST(void **, traits[i].value);
13730b57cec5SDimitry Andric       break;
13740b57cec5SDimitry Andric     default:
13750b57cec5SDimitry Andric       KMP_ASSERT2(0, "Unexpected allocator trait");
13760b57cec5SDimitry Andric     }
13770b57cec5SDimitry Andric   }
13780b57cec5SDimitry Andric   if (al->fb == 0) {
13790b57cec5SDimitry Andric     // set default allocator
13800b57cec5SDimitry Andric     al->fb = OMP_ATV_DEFAULT_MEM_FB;
13810b57cec5SDimitry Andric     al->fb_data = (kmp_allocator_t *)omp_default_mem_alloc;
13820b57cec5SDimitry Andric   } else if (al->fb == OMP_ATV_ALLOCATOR_FB) {
13830b57cec5SDimitry Andric     KMP_ASSERT(al->fb_data != NULL);
13840b57cec5SDimitry Andric   } else if (al->fb == OMP_ATV_DEFAULT_MEM_FB) {
13850b57cec5SDimitry Andric     al->fb_data = (kmp_allocator_t *)omp_default_mem_alloc;
13860b57cec5SDimitry Andric   }
13870b57cec5SDimitry Andric   if (__kmp_memkind_available) {
13880b57cec5SDimitry Andric     // Let's use memkind library if available
13890b57cec5SDimitry Andric     if (ms == omp_high_bw_mem_space) {
13900b57cec5SDimitry Andric       if (al->memkind == (void *)OMP_ATV_INTERLEAVED && mk_hbw_interleave) {
13910b57cec5SDimitry Andric         al->memkind = mk_hbw_interleave;
13920b57cec5SDimitry Andric       } else if (mk_hbw_preferred) {
13930b57cec5SDimitry Andric         // AC: do not try to use MEMKIND_HBW for now, because memkind library
13940b57cec5SDimitry Andric         // cannot reliably detect exhaustion of HBW memory.
13950b57cec5SDimitry Andric         // It could be possible using hbw_verify_memory_region() but memkind
13960b57cec5SDimitry Andric         // manual says: "Using this function in production code may result in
13970b57cec5SDimitry Andric         // serious performance penalty".
13980b57cec5SDimitry Andric         al->memkind = mk_hbw_preferred;
13990b57cec5SDimitry Andric       } else {
14000b57cec5SDimitry Andric         // HBW is requested but not available --> return NULL allocator
14010b57cec5SDimitry Andric         __kmp_free(al);
14020b57cec5SDimitry Andric         return omp_null_allocator;
14030b57cec5SDimitry Andric       }
14040b57cec5SDimitry Andric     } else {
14050b57cec5SDimitry Andric       if (al->memkind == (void *)OMP_ATV_INTERLEAVED && mk_interleave) {
14060b57cec5SDimitry Andric         al->memkind = mk_interleave;
14070b57cec5SDimitry Andric       } else {
14080b57cec5SDimitry Andric         al->memkind = mk_default;
14090b57cec5SDimitry Andric       }
14100b57cec5SDimitry Andric     }
14110b57cec5SDimitry Andric   } else {
14120b57cec5SDimitry Andric     if (ms == omp_high_bw_mem_space) {
14130b57cec5SDimitry Andric       // cannot detect HBW memory presence without memkind library
14140b57cec5SDimitry Andric       __kmp_free(al);
14150b57cec5SDimitry Andric       return omp_null_allocator;
14160b57cec5SDimitry Andric     }
14170b57cec5SDimitry Andric   }
14180b57cec5SDimitry Andric   return (omp_allocator_handle_t)al;
14190b57cec5SDimitry Andric }
14200b57cec5SDimitry Andric 
14210b57cec5SDimitry Andric void __kmpc_destroy_allocator(int gtid, omp_allocator_handle_t allocator) {
14220b57cec5SDimitry Andric   if (allocator > kmp_max_mem_alloc)
14230b57cec5SDimitry Andric     __kmp_free(allocator);
14240b57cec5SDimitry Andric }
14250b57cec5SDimitry Andric 
14260b57cec5SDimitry Andric void __kmpc_set_default_allocator(int gtid, omp_allocator_handle_t allocator) {
14270b57cec5SDimitry Andric   if (allocator == omp_null_allocator)
14280b57cec5SDimitry Andric     allocator = omp_default_mem_alloc;
14290b57cec5SDimitry Andric   __kmp_threads[gtid]->th.th_def_allocator = allocator;
14300b57cec5SDimitry Andric }
14310b57cec5SDimitry Andric 
14320b57cec5SDimitry Andric omp_allocator_handle_t __kmpc_get_default_allocator(int gtid) {
14330b57cec5SDimitry Andric   return __kmp_threads[gtid]->th.th_def_allocator;
14340b57cec5SDimitry Andric }
14350b57cec5SDimitry Andric 
14360b57cec5SDimitry Andric typedef struct kmp_mem_desc { // Memory block descriptor
14370b57cec5SDimitry Andric   void *ptr_alloc; // Pointer returned by allocator
14380b57cec5SDimitry Andric   size_t size_a; // Size of allocated memory block (initial+descriptor+align)
14390b57cec5SDimitry Andric   void *ptr_align; // Pointer to aligned memory, returned
14400b57cec5SDimitry Andric   kmp_allocator_t *allocator; // allocator
14410b57cec5SDimitry Andric } kmp_mem_desc_t;
14420b57cec5SDimitry Andric static int alignment = sizeof(void *); // let's align to pointer size
14430b57cec5SDimitry Andric 
14440b57cec5SDimitry Andric void *__kmpc_alloc(int gtid, size_t size, omp_allocator_handle_t allocator) {
14450b57cec5SDimitry Andric   void *ptr = NULL;
14460b57cec5SDimitry Andric   kmp_allocator_t *al;
14470b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(__kmp_init_serial);
14480b57cec5SDimitry Andric   if (allocator == omp_null_allocator)
14490b57cec5SDimitry Andric     allocator = __kmp_threads[gtid]->th.th_def_allocator;
14500b57cec5SDimitry Andric 
14510b57cec5SDimitry Andric   KE_TRACE(25, ("__kmpc_alloc: T#%d (%d, %p)\n", gtid, (int)size, allocator));
14520b57cec5SDimitry Andric   al = RCAST(kmp_allocator_t *, CCAST(omp_allocator_handle_t, allocator));
14530b57cec5SDimitry Andric 
14540b57cec5SDimitry Andric   int sz_desc = sizeof(kmp_mem_desc_t);
14550b57cec5SDimitry Andric   kmp_mem_desc_t desc;
14560b57cec5SDimitry Andric   kmp_uintptr_t addr; // address returned by allocator
14570b57cec5SDimitry Andric   kmp_uintptr_t addr_align; // address to return to caller
14580b57cec5SDimitry Andric   kmp_uintptr_t addr_descr; // address of memory block descriptor
14590b57cec5SDimitry Andric   int align = alignment; // default alignment
14600b57cec5SDimitry Andric   if (allocator > kmp_max_mem_alloc && al->alignment > 0) {
14610b57cec5SDimitry Andric     align = al->alignment; // alignment requested by user
14620b57cec5SDimitry Andric   }
14630b57cec5SDimitry Andric   desc.size_a = size + sz_desc + align;
14640b57cec5SDimitry Andric 
14650b57cec5SDimitry Andric   if (__kmp_memkind_available) {
14660b57cec5SDimitry Andric     if (allocator < kmp_max_mem_alloc) {
14670b57cec5SDimitry Andric       // pre-defined allocator
14680b57cec5SDimitry Andric       if (allocator == omp_high_bw_mem_alloc && mk_hbw_preferred) {
14690b57cec5SDimitry Andric         ptr = kmp_mk_alloc(*mk_hbw_preferred, desc.size_a);
14700b57cec5SDimitry Andric       } else {
14710b57cec5SDimitry Andric         ptr = kmp_mk_alloc(*mk_default, desc.size_a);
14720b57cec5SDimitry Andric       }
14730b57cec5SDimitry Andric     } else if (al->pool_size > 0) {
14740b57cec5SDimitry Andric       // custom allocator with pool size requested
14750b57cec5SDimitry Andric       kmp_uint64 used =
14760b57cec5SDimitry Andric           KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
14770b57cec5SDimitry Andric       if (used + desc.size_a > al->pool_size) {
14780b57cec5SDimitry Andric         // not enough space, need to go fallback path
14790b57cec5SDimitry Andric         KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
14800b57cec5SDimitry Andric         if (al->fb == OMP_ATV_DEFAULT_MEM_FB) {
14810b57cec5SDimitry Andric           al = (kmp_allocator_t *)omp_default_mem_alloc;
14820b57cec5SDimitry Andric           ptr = kmp_mk_alloc(*mk_default, desc.size_a);
14830b57cec5SDimitry Andric         } else if (al->fb == OMP_ATV_ABORT_FB) {
14840b57cec5SDimitry Andric           KMP_ASSERT(0); // abort fallback requested
14850b57cec5SDimitry Andric         } else if (al->fb == OMP_ATV_ALLOCATOR_FB) {
14860b57cec5SDimitry Andric           KMP_ASSERT(al != al->fb_data);
14870b57cec5SDimitry Andric           al = al->fb_data;
14880b57cec5SDimitry Andric           return __kmpc_alloc(gtid, size, (omp_allocator_handle_t)al);
14890b57cec5SDimitry Andric         } // else ptr == NULL;
14900b57cec5SDimitry Andric       } else {
14910b57cec5SDimitry Andric         // pool has enough space
14920b57cec5SDimitry Andric         ptr = kmp_mk_alloc(*al->memkind, desc.size_a);
14930b57cec5SDimitry Andric         if (ptr == NULL) {
14940b57cec5SDimitry Andric           if (al->fb == OMP_ATV_DEFAULT_MEM_FB) {
14950b57cec5SDimitry Andric             al = (kmp_allocator_t *)omp_default_mem_alloc;
14960b57cec5SDimitry Andric             ptr = kmp_mk_alloc(*mk_default, desc.size_a);
14970b57cec5SDimitry Andric           } else if (al->fb == OMP_ATV_ABORT_FB) {
14980b57cec5SDimitry Andric             KMP_ASSERT(0); // abort fallback requested
14990b57cec5SDimitry Andric           } else if (al->fb == OMP_ATV_ALLOCATOR_FB) {
15000b57cec5SDimitry Andric             KMP_ASSERT(al != al->fb_data);
15010b57cec5SDimitry Andric             al = al->fb_data;
15020b57cec5SDimitry Andric             return __kmpc_alloc(gtid, size, (omp_allocator_handle_t)al);
15030b57cec5SDimitry Andric           }
15040b57cec5SDimitry Andric         }
15050b57cec5SDimitry Andric       }
15060b57cec5SDimitry Andric     } else {
15070b57cec5SDimitry Andric       // custom allocator, pool size not requested
15080b57cec5SDimitry Andric       ptr = kmp_mk_alloc(*al->memkind, desc.size_a);
15090b57cec5SDimitry Andric       if (ptr == NULL) {
15100b57cec5SDimitry Andric         if (al->fb == OMP_ATV_DEFAULT_MEM_FB) {
15110b57cec5SDimitry Andric           al = (kmp_allocator_t *)omp_default_mem_alloc;
15120b57cec5SDimitry Andric           ptr = kmp_mk_alloc(*mk_default, desc.size_a);
15130b57cec5SDimitry Andric         } else if (al->fb == OMP_ATV_ABORT_FB) {
15140b57cec5SDimitry Andric           KMP_ASSERT(0); // abort fallback requested
15150b57cec5SDimitry Andric         } else if (al->fb == OMP_ATV_ALLOCATOR_FB) {
15160b57cec5SDimitry Andric           KMP_ASSERT(al != al->fb_data);
15170b57cec5SDimitry Andric           al = al->fb_data;
15180b57cec5SDimitry Andric           return __kmpc_alloc(gtid, size, (omp_allocator_handle_t)al);
15190b57cec5SDimitry Andric         }
15200b57cec5SDimitry Andric       }
15210b57cec5SDimitry Andric     }
15220b57cec5SDimitry Andric   } else if (allocator < kmp_max_mem_alloc) {
15230b57cec5SDimitry Andric     // pre-defined allocator
15240b57cec5SDimitry Andric     if (allocator == omp_high_bw_mem_alloc) {
15250b57cec5SDimitry Andric       // ptr = NULL;
15260b57cec5SDimitry Andric     } else {
15270b57cec5SDimitry Andric       ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
15280b57cec5SDimitry Andric     }
15290b57cec5SDimitry Andric   } else if (al->pool_size > 0) {
15300b57cec5SDimitry Andric     // custom allocator with pool size requested
15310b57cec5SDimitry Andric     kmp_uint64 used =
15320b57cec5SDimitry Andric         KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
15330b57cec5SDimitry Andric     if (used + desc.size_a > al->pool_size) {
15340b57cec5SDimitry Andric       // not enough space, need to go fallback path
15350b57cec5SDimitry Andric       KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
15360b57cec5SDimitry Andric       if (al->fb == OMP_ATV_DEFAULT_MEM_FB) {
15370b57cec5SDimitry Andric         al = (kmp_allocator_t *)omp_default_mem_alloc;
15380b57cec5SDimitry Andric         ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
15390b57cec5SDimitry Andric       } else if (al->fb == OMP_ATV_ABORT_FB) {
15400b57cec5SDimitry Andric         KMP_ASSERT(0); // abort fallback requested
15410b57cec5SDimitry Andric       } else if (al->fb == OMP_ATV_ALLOCATOR_FB) {
15420b57cec5SDimitry Andric         KMP_ASSERT(al != al->fb_data);
15430b57cec5SDimitry Andric         al = al->fb_data;
15440b57cec5SDimitry Andric         return __kmpc_alloc(gtid, size, (omp_allocator_handle_t)al);
15450b57cec5SDimitry Andric       } // else ptr == NULL;
15460b57cec5SDimitry Andric     } else {
15470b57cec5SDimitry Andric       // pool has enough space
15480b57cec5SDimitry Andric       ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
15490b57cec5SDimitry Andric       if (ptr == NULL && al->fb == OMP_ATV_ABORT_FB) {
15500b57cec5SDimitry Andric         KMP_ASSERT(0); // abort fallback requested
15510b57cec5SDimitry Andric       } // no sense to look for another fallback because of same internal alloc
15520b57cec5SDimitry Andric     }
15530b57cec5SDimitry Andric   } else {
15540b57cec5SDimitry Andric     // custom allocator, pool size not requested
15550b57cec5SDimitry Andric     ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
15560b57cec5SDimitry Andric     if (ptr == NULL && al->fb == OMP_ATV_ABORT_FB) {
15570b57cec5SDimitry Andric       KMP_ASSERT(0); // abort fallback requested
15580b57cec5SDimitry Andric     } // no sense to look for another fallback because of same internal alloc
15590b57cec5SDimitry Andric   }
15600b57cec5SDimitry Andric   KE_TRACE(10, ("__kmpc_alloc: T#%d %p=alloc(%d)\n", gtid, ptr, desc.size_a));
15610b57cec5SDimitry Andric   if (ptr == NULL)
15620b57cec5SDimitry Andric     return NULL;
15630b57cec5SDimitry Andric 
15640b57cec5SDimitry Andric   addr = (kmp_uintptr_t)ptr;
15650b57cec5SDimitry Andric   addr_align = (addr + sz_desc + align - 1) & ~(align - 1);
15660b57cec5SDimitry Andric   addr_descr = addr_align - sz_desc;
15670b57cec5SDimitry Andric 
15680b57cec5SDimitry Andric   desc.ptr_alloc = ptr;
15690b57cec5SDimitry Andric   desc.ptr_align = (void *)addr_align;
15700b57cec5SDimitry Andric   desc.allocator = al;
15710b57cec5SDimitry Andric   *((kmp_mem_desc_t *)addr_descr) = desc; // save descriptor contents
15720b57cec5SDimitry Andric   KMP_MB();
15730b57cec5SDimitry Andric 
15740b57cec5SDimitry Andric   KE_TRACE(25, ("__kmpc_alloc returns %p, T#%d\n", desc.ptr_align, gtid));
15750b57cec5SDimitry Andric   return desc.ptr_align;
15760b57cec5SDimitry Andric }
15770b57cec5SDimitry Andric 
15780b57cec5SDimitry Andric void __kmpc_free(int gtid, void *ptr, const omp_allocator_handle_t allocator) {
15790b57cec5SDimitry Andric   KE_TRACE(25, ("__kmpc_free: T#%d free(%p,%p)\n", gtid, ptr, allocator));
15800b57cec5SDimitry Andric   if (ptr == NULL)
15810b57cec5SDimitry Andric     return;
15820b57cec5SDimitry Andric 
15830b57cec5SDimitry Andric   kmp_allocator_t *al;
15840b57cec5SDimitry Andric   omp_allocator_handle_t oal;
15850b57cec5SDimitry Andric   al = RCAST(kmp_allocator_t *, CCAST(omp_allocator_handle_t, allocator));
15860b57cec5SDimitry Andric   kmp_mem_desc_t desc;
15870b57cec5SDimitry Andric   kmp_uintptr_t addr_align; // address to return to caller
15880b57cec5SDimitry Andric   kmp_uintptr_t addr_descr; // address of memory block descriptor
15890b57cec5SDimitry Andric 
15900b57cec5SDimitry Andric   addr_align = (kmp_uintptr_t)ptr;
15910b57cec5SDimitry Andric   addr_descr = addr_align - sizeof(kmp_mem_desc_t);
15920b57cec5SDimitry Andric   desc = *((kmp_mem_desc_t *)addr_descr); // read descriptor
15930b57cec5SDimitry Andric 
15940b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(desc.ptr_align == ptr);
15950b57cec5SDimitry Andric   if (allocator) {
15960b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(desc.allocator == al || desc.allocator == al->fb_data);
15970b57cec5SDimitry Andric   }
15980b57cec5SDimitry Andric   al = desc.allocator;
15990b57cec5SDimitry Andric   oal = (omp_allocator_handle_t)al; // cast to void* for comparisons
16000b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(al);
16010b57cec5SDimitry Andric 
16020b57cec5SDimitry Andric   if (__kmp_memkind_available) {
16030b57cec5SDimitry Andric     if (oal < kmp_max_mem_alloc) {
16040b57cec5SDimitry Andric       // pre-defined allocator
16050b57cec5SDimitry Andric       if (oal == omp_high_bw_mem_alloc && mk_hbw_preferred) {
16060b57cec5SDimitry Andric         kmp_mk_free(*mk_hbw_preferred, desc.ptr_alloc);
16070b57cec5SDimitry Andric       } else {
16080b57cec5SDimitry Andric         kmp_mk_free(*mk_default, desc.ptr_alloc);
16090b57cec5SDimitry Andric       }
16100b57cec5SDimitry Andric     } else {
16110b57cec5SDimitry Andric       if (al->pool_size > 0) { // custom allocator with pool size requested
16120b57cec5SDimitry Andric         kmp_uint64 used =
16130b57cec5SDimitry Andric             KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
16140b57cec5SDimitry Andric         (void)used; // to suppress compiler warning
16150b57cec5SDimitry Andric         KMP_DEBUG_ASSERT(used >= desc.size_a);
16160b57cec5SDimitry Andric       }
16170b57cec5SDimitry Andric       kmp_mk_free(*al->memkind, desc.ptr_alloc);
16180b57cec5SDimitry Andric     }
16190b57cec5SDimitry Andric   } else {
16200b57cec5SDimitry Andric     if (oal > kmp_max_mem_alloc && al->pool_size > 0) {
16210b57cec5SDimitry Andric       kmp_uint64 used =
16220b57cec5SDimitry Andric           KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
16230b57cec5SDimitry Andric       (void)used; // to suppress compiler warning
16240b57cec5SDimitry Andric       KMP_DEBUG_ASSERT(used >= desc.size_a);
16250b57cec5SDimitry Andric     }
16260b57cec5SDimitry Andric     __kmp_thread_free(__kmp_thread_from_gtid(gtid), desc.ptr_alloc);
16270b57cec5SDimitry Andric   }
16280b57cec5SDimitry Andric   KE_TRACE(10, ("__kmpc_free: T#%d freed %p (%p)\n", gtid, desc.ptr_alloc,
16290b57cec5SDimitry Andric                 allocator));
16300b57cec5SDimitry Andric }
16310b57cec5SDimitry Andric 
16320b57cec5SDimitry Andric /* If LEAK_MEMORY is defined, __kmp_free() will *not* free memory. It causes
16330b57cec5SDimitry Andric    memory leaks, but it may be useful for debugging memory corruptions, used
16340b57cec5SDimitry Andric    freed pointers, etc. */
16350b57cec5SDimitry Andric /* #define LEAK_MEMORY */
16360b57cec5SDimitry Andric struct kmp_mem_descr { // Memory block descriptor.
16370b57cec5SDimitry Andric   void *ptr_allocated; // Pointer returned by malloc(), subject for free().
16380b57cec5SDimitry Andric   size_t size_allocated; // Size of allocated memory block.
16390b57cec5SDimitry Andric   void *ptr_aligned; // Pointer to aligned memory, to be used by client code.
16400b57cec5SDimitry Andric   size_t size_aligned; // Size of aligned memory block.
16410b57cec5SDimitry Andric };
16420b57cec5SDimitry Andric typedef struct kmp_mem_descr kmp_mem_descr_t;
16430b57cec5SDimitry Andric 
16440b57cec5SDimitry Andric /* Allocate memory on requested boundary, fill allocated memory with 0x00.
16450b57cec5SDimitry Andric    NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
16460b57cec5SDimitry Andric    error. Must use __kmp_free when freeing memory allocated by this routine! */
16470b57cec5SDimitry Andric static void *___kmp_allocate_align(size_t size,
16480b57cec5SDimitry Andric                                    size_t alignment KMP_SRC_LOC_DECL) {
16490b57cec5SDimitry Andric   /* __kmp_allocate() allocates (by call to malloc()) bigger memory block than
16500b57cec5SDimitry Andric      requested to return properly aligned pointer. Original pointer returned
16510b57cec5SDimitry Andric      by malloc() and size of allocated block is saved in descriptor just
16520b57cec5SDimitry Andric      before the aligned pointer. This information used by __kmp_free() -- it
16530b57cec5SDimitry Andric      has to pass to free() original pointer, not aligned one.
16540b57cec5SDimitry Andric 
16550b57cec5SDimitry Andric           +---------+------------+-----------------------------------+---------+
16560b57cec5SDimitry Andric           | padding | descriptor |           aligned block           | padding |
16570b57cec5SDimitry Andric           +---------+------------+-----------------------------------+---------+
16580b57cec5SDimitry Andric           ^                      ^
16590b57cec5SDimitry Andric           |                      |
16600b57cec5SDimitry Andric           |                      +- Aligned pointer returned to caller
16610b57cec5SDimitry Andric           +- Pointer returned by malloc()
16620b57cec5SDimitry Andric 
16630b57cec5SDimitry Andric       Aligned block is filled with zeros, paddings are filled with 0xEF. */
16640b57cec5SDimitry Andric 
16650b57cec5SDimitry Andric   kmp_mem_descr_t descr;
16660b57cec5SDimitry Andric   kmp_uintptr_t addr_allocated; // Address returned by malloc().
16670b57cec5SDimitry Andric   kmp_uintptr_t addr_aligned; // Aligned address to return to caller.
16680b57cec5SDimitry Andric   kmp_uintptr_t addr_descr; // Address of memory block descriptor.
16690b57cec5SDimitry Andric 
16700b57cec5SDimitry Andric   KE_TRACE(25, ("-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n",
16710b57cec5SDimitry Andric                 (int)size, (int)alignment KMP_SRC_LOC_PARM));
16720b57cec5SDimitry Andric 
16730b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(alignment < 32 * 1024); // Alignment should not be too
16740b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(sizeof(void *) <= sizeof(kmp_uintptr_t));
16750b57cec5SDimitry Andric   // Make sure kmp_uintptr_t is enough to store addresses.
16760b57cec5SDimitry Andric 
16770b57cec5SDimitry Andric   descr.size_aligned = size;
16780b57cec5SDimitry Andric   descr.size_allocated =
16790b57cec5SDimitry Andric       descr.size_aligned + sizeof(kmp_mem_descr_t) + alignment;
16800b57cec5SDimitry Andric 
16810b57cec5SDimitry Andric #if KMP_DEBUG
16820b57cec5SDimitry Andric   descr.ptr_allocated = _malloc_src_loc(descr.size_allocated, _file_, _line_);
16830b57cec5SDimitry Andric #else
16840b57cec5SDimitry Andric   descr.ptr_allocated = malloc_src_loc(descr.size_allocated KMP_SRC_LOC_PARM);
16850b57cec5SDimitry Andric #endif
16860b57cec5SDimitry Andric   KE_TRACE(10, ("   malloc( %d ) returned %p\n", (int)descr.size_allocated,
16870b57cec5SDimitry Andric                 descr.ptr_allocated));
16880b57cec5SDimitry Andric   if (descr.ptr_allocated == NULL) {
16890b57cec5SDimitry Andric     KMP_FATAL(OutOfHeapMemory);
16900b57cec5SDimitry Andric   }
16910b57cec5SDimitry Andric 
16920b57cec5SDimitry Andric   addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
16930b57cec5SDimitry Andric   addr_aligned =
16940b57cec5SDimitry Andric       (addr_allocated + sizeof(kmp_mem_descr_t) + alignment) & ~(alignment - 1);
16950b57cec5SDimitry Andric   addr_descr = addr_aligned - sizeof(kmp_mem_descr_t);
16960b57cec5SDimitry Andric 
16970b57cec5SDimitry Andric   descr.ptr_aligned = (void *)addr_aligned;
16980b57cec5SDimitry Andric 
16990b57cec5SDimitry Andric   KE_TRACE(26, ("   ___kmp_allocate_align: "
17000b57cec5SDimitry Andric                 "ptr_allocated=%p, size_allocated=%d, "
17010b57cec5SDimitry Andric                 "ptr_aligned=%p, size_aligned=%d\n",
17020b57cec5SDimitry Andric                 descr.ptr_allocated, (int)descr.size_allocated,
17030b57cec5SDimitry Andric                 descr.ptr_aligned, (int)descr.size_aligned));
17040b57cec5SDimitry Andric 
17050b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(addr_allocated <= addr_descr);
17060b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(addr_descr + sizeof(kmp_mem_descr_t) == addr_aligned);
17070b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
17080b57cec5SDimitry Andric                    addr_allocated + descr.size_allocated);
17090b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(addr_aligned % alignment == 0);
17100b57cec5SDimitry Andric #ifdef KMP_DEBUG
17110b57cec5SDimitry Andric   memset(descr.ptr_allocated, 0xEF, descr.size_allocated);
17120b57cec5SDimitry Andric // Fill allocated memory block with 0xEF.
17130b57cec5SDimitry Andric #endif
17140b57cec5SDimitry Andric   memset(descr.ptr_aligned, 0x00, descr.size_aligned);
17150b57cec5SDimitry Andric   // Fill the aligned memory block (which is intended for using by caller) with
17160b57cec5SDimitry Andric   // 0x00. Do not
17170b57cec5SDimitry Andric   // put this filling under KMP_DEBUG condition! Many callers expect zeroed
17180b57cec5SDimitry Andric   // memory. (Padding
17190b57cec5SDimitry Andric   // bytes remain filled with 0xEF in debugging library.)
17200b57cec5SDimitry Andric   *((kmp_mem_descr_t *)addr_descr) = descr;
17210b57cec5SDimitry Andric 
17220b57cec5SDimitry Andric   KMP_MB();
17230b57cec5SDimitry Andric 
17240b57cec5SDimitry Andric   KE_TRACE(25, ("<- ___kmp_allocate_align() returns %p\n", descr.ptr_aligned));
17250b57cec5SDimitry Andric   return descr.ptr_aligned;
17260b57cec5SDimitry Andric } // func ___kmp_allocate_align
17270b57cec5SDimitry Andric 
17280b57cec5SDimitry Andric /* Allocate memory on cache line boundary, fill allocated memory with 0x00.
17290b57cec5SDimitry Andric    Do not call this func directly! Use __kmp_allocate macro instead.
17300b57cec5SDimitry Andric    NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
17310b57cec5SDimitry Andric    error. Must use __kmp_free when freeing memory allocated by this routine! */
17320b57cec5SDimitry Andric void *___kmp_allocate(size_t size KMP_SRC_LOC_DECL) {
17330b57cec5SDimitry Andric   void *ptr;
17340b57cec5SDimitry Andric   KE_TRACE(25, ("-> __kmp_allocate( %d ) called from %s:%d\n",
17350b57cec5SDimitry Andric                 (int)size KMP_SRC_LOC_PARM));
17360b57cec5SDimitry Andric   ptr = ___kmp_allocate_align(size, __kmp_align_alloc KMP_SRC_LOC_PARM);
17370b57cec5SDimitry Andric   KE_TRACE(25, ("<- __kmp_allocate() returns %p\n", ptr));
17380b57cec5SDimitry Andric   return ptr;
17390b57cec5SDimitry Andric } // func ___kmp_allocate
17400b57cec5SDimitry Andric 
17410b57cec5SDimitry Andric /* Allocate memory on page boundary, fill allocated memory with 0x00.
17420b57cec5SDimitry Andric    Does not call this func directly! Use __kmp_page_allocate macro instead.
17430b57cec5SDimitry Andric    NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
17440b57cec5SDimitry Andric    error. Must use __kmp_free when freeing memory allocated by this routine! */
17450b57cec5SDimitry Andric void *___kmp_page_allocate(size_t size KMP_SRC_LOC_DECL) {
17460b57cec5SDimitry Andric   int page_size = 8 * 1024;
17470b57cec5SDimitry Andric   void *ptr;
17480b57cec5SDimitry Andric 
17490b57cec5SDimitry Andric   KE_TRACE(25, ("-> __kmp_page_allocate( %d ) called from %s:%d\n",
17500b57cec5SDimitry Andric                 (int)size KMP_SRC_LOC_PARM));
17510b57cec5SDimitry Andric   ptr = ___kmp_allocate_align(size, page_size KMP_SRC_LOC_PARM);
17520b57cec5SDimitry Andric   KE_TRACE(25, ("<- __kmp_page_allocate( %d ) returns %p\n", (int)size, ptr));
17530b57cec5SDimitry Andric   return ptr;
17540b57cec5SDimitry Andric } // ___kmp_page_allocate
17550b57cec5SDimitry Andric 
17560b57cec5SDimitry Andric /* Free memory allocated by __kmp_allocate() and __kmp_page_allocate().
17570b57cec5SDimitry Andric    In debug mode, fill the memory block with 0xEF before call to free(). */
17580b57cec5SDimitry Andric void ___kmp_free(void *ptr KMP_SRC_LOC_DECL) {
17590b57cec5SDimitry Andric   kmp_mem_descr_t descr;
17600b57cec5SDimitry Andric   kmp_uintptr_t addr_allocated; // Address returned by malloc().
17610b57cec5SDimitry Andric   kmp_uintptr_t addr_aligned; // Aligned address passed by caller.
17620b57cec5SDimitry Andric 
17630b57cec5SDimitry Andric   KE_TRACE(25,
17640b57cec5SDimitry Andric            ("-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM));
17650b57cec5SDimitry Andric   KMP_ASSERT(ptr != NULL);
17660b57cec5SDimitry Andric 
17670b57cec5SDimitry Andric   descr = *(kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t));
17680b57cec5SDimitry Andric 
17690b57cec5SDimitry Andric   KE_TRACE(26, ("   __kmp_free:     "
17700b57cec5SDimitry Andric                 "ptr_allocated=%p, size_allocated=%d, "
17710b57cec5SDimitry Andric                 "ptr_aligned=%p, size_aligned=%d\n",
17720b57cec5SDimitry Andric                 descr.ptr_allocated, (int)descr.size_allocated,
17730b57cec5SDimitry Andric                 descr.ptr_aligned, (int)descr.size_aligned));
17740b57cec5SDimitry Andric 
17750b57cec5SDimitry Andric   addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
17760b57cec5SDimitry Andric   addr_aligned = (kmp_uintptr_t)descr.ptr_aligned;
17770b57cec5SDimitry Andric 
17780b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(addr_aligned % CACHE_LINE == 0);
17790b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(descr.ptr_aligned == ptr);
17800b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(addr_allocated + sizeof(kmp_mem_descr_t) <= addr_aligned);
17810b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(descr.size_aligned < descr.size_allocated);
17820b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
17830b57cec5SDimitry Andric                    addr_allocated + descr.size_allocated);
17840b57cec5SDimitry Andric 
17850b57cec5SDimitry Andric #ifdef KMP_DEBUG
17860b57cec5SDimitry Andric   memset(descr.ptr_allocated, 0xEF, descr.size_allocated);
17870b57cec5SDimitry Andric // Fill memory block with 0xEF, it helps catch using freed memory.
17880b57cec5SDimitry Andric #endif
17890b57cec5SDimitry Andric 
17900b57cec5SDimitry Andric #ifndef LEAK_MEMORY
17910b57cec5SDimitry Andric   KE_TRACE(10, ("   free( %p )\n", descr.ptr_allocated));
17920b57cec5SDimitry Andric #ifdef KMP_DEBUG
17930b57cec5SDimitry Andric   _free_src_loc(descr.ptr_allocated, _file_, _line_);
17940b57cec5SDimitry Andric #else
17950b57cec5SDimitry Andric   free_src_loc(descr.ptr_allocated KMP_SRC_LOC_PARM);
17960b57cec5SDimitry Andric #endif
17970b57cec5SDimitry Andric #endif
17980b57cec5SDimitry Andric   KMP_MB();
17990b57cec5SDimitry Andric   KE_TRACE(25, ("<- __kmp_free() returns\n"));
18000b57cec5SDimitry Andric } // func ___kmp_free
18010b57cec5SDimitry Andric 
18020b57cec5SDimitry Andric #if USE_FAST_MEMORY == 3
18030b57cec5SDimitry Andric // Allocate fast memory by first scanning the thread's free lists
18040b57cec5SDimitry Andric // If a chunk the right size exists, grab it off the free list.
18050b57cec5SDimitry Andric // Otherwise allocate normally using kmp_thread_malloc.
18060b57cec5SDimitry Andric 
18070b57cec5SDimitry Andric // AC: How to choose the limit? Just get 16 for now...
18080b57cec5SDimitry Andric #define KMP_FREE_LIST_LIMIT 16
18090b57cec5SDimitry Andric 
18100b57cec5SDimitry Andric // Always use 128 bytes for determining buckets for caching memory blocks
18110b57cec5SDimitry Andric #define DCACHE_LINE 128
18120b57cec5SDimitry Andric 
18130b57cec5SDimitry Andric void *___kmp_fast_allocate(kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL) {
18140b57cec5SDimitry Andric   void *ptr;
18150b57cec5SDimitry Andric   int num_lines;
18160b57cec5SDimitry Andric   int idx;
18170b57cec5SDimitry Andric   int index;
18180b57cec5SDimitry Andric   void *alloc_ptr;
18190b57cec5SDimitry Andric   size_t alloc_size;
18200b57cec5SDimitry Andric   kmp_mem_descr_t *descr;
18210b57cec5SDimitry Andric 
18220b57cec5SDimitry Andric   KE_TRACE(25, ("-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n",
18230b57cec5SDimitry Andric                 __kmp_gtid_from_thread(this_thr), (int)size KMP_SRC_LOC_PARM));
18240b57cec5SDimitry Andric 
18250b57cec5SDimitry Andric   num_lines = (size + DCACHE_LINE - 1) / DCACHE_LINE;
18260b57cec5SDimitry Andric   idx = num_lines - 1;
18270b57cec5SDimitry Andric   KMP_DEBUG_ASSERT(idx >= 0);
18280b57cec5SDimitry Andric   if (idx < 2) {
18290b57cec5SDimitry Andric     index = 0; // idx is [ 0, 1 ], use first free list
18300b57cec5SDimitry Andric     num_lines = 2; // 1, 2 cache lines or less than cache line
18310b57cec5SDimitry Andric   } else if ((idx >>= 2) == 0) {
18320b57cec5SDimitry Andric     index = 1; // idx is [ 2, 3 ], use second free list
18330b57cec5SDimitry Andric     num_lines = 4; // 3, 4 cache lines
18340b57cec5SDimitry Andric   } else if ((idx >>= 2) == 0) {
18350b57cec5SDimitry Andric     index = 2; // idx is [ 4, 15 ], use third free list
18360b57cec5SDimitry Andric     num_lines = 16; // 5, 6, ..., 16 cache lines
18370b57cec5SDimitry Andric   } else if ((idx >>= 2) == 0) {
18380b57cec5SDimitry Andric     index = 3; // idx is [ 16, 63 ], use fourth free list
18390b57cec5SDimitry Andric     num_lines = 64; // 17, 18, ..., 64 cache lines
18400b57cec5SDimitry Andric   } else {
18410b57cec5SDimitry Andric     goto alloc_call; // 65 or more cache lines ( > 8KB ), don't use free lists
18420b57cec5SDimitry Andric   }
18430b57cec5SDimitry Andric 
18440b57cec5SDimitry Andric   ptr = this_thr->th.th_free_lists[index].th_free_list_self;
18450b57cec5SDimitry Andric   if (ptr != NULL) {
18460b57cec5SDimitry Andric     // pop the head of no-sync free list
18470b57cec5SDimitry Andric     this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
18480b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(
18490b57cec5SDimitry Andric         this_thr ==
18500b57cec5SDimitry Andric         ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t)))
18510b57cec5SDimitry Andric             ->ptr_aligned);
18520b57cec5SDimitry Andric     goto end;
18530b57cec5SDimitry Andric   }
18540b57cec5SDimitry Andric   ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
18550b57cec5SDimitry Andric   if (ptr != NULL) {
18560b57cec5SDimitry Andric     // no-sync free list is empty, use sync free list (filled in by other
18570b57cec5SDimitry Andric     // threads only)
18580b57cec5SDimitry Andric     // pop the head of the sync free list, push NULL instead
18590b57cec5SDimitry Andric     while (!KMP_COMPARE_AND_STORE_PTR(
18600b57cec5SDimitry Andric         &this_thr->th.th_free_lists[index].th_free_list_sync, ptr, nullptr)) {
18610b57cec5SDimitry Andric       KMP_CPU_PAUSE();
18620b57cec5SDimitry Andric       ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
18630b57cec5SDimitry Andric     }
18640b57cec5SDimitry Andric     // push the rest of chain into no-sync free list (can be NULL if there was
18650b57cec5SDimitry Andric     // the only block)
18660b57cec5SDimitry Andric     this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
18670b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(
18680b57cec5SDimitry Andric         this_thr ==
18690b57cec5SDimitry Andric         ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t)))
18700b57cec5SDimitry Andric             ->ptr_aligned);
18710b57cec5SDimitry Andric     goto end;
18720b57cec5SDimitry Andric   }
18730b57cec5SDimitry Andric 
18740b57cec5SDimitry Andric alloc_call:
18750b57cec5SDimitry Andric   // haven't found block in the free lists, thus allocate it
18760b57cec5SDimitry Andric   size = num_lines * DCACHE_LINE;
18770b57cec5SDimitry Andric 
18780b57cec5SDimitry Andric   alloc_size = size + sizeof(kmp_mem_descr_t) + DCACHE_LINE;
18790b57cec5SDimitry Andric   KE_TRACE(25, ("__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with "
18800b57cec5SDimitry Andric                 "alloc_size %d\n",
18810b57cec5SDimitry Andric                 __kmp_gtid_from_thread(this_thr), alloc_size));
18820b57cec5SDimitry Andric   alloc_ptr = bget(this_thr, (bufsize)alloc_size);
18830b57cec5SDimitry Andric 
18840b57cec5SDimitry Andric   // align ptr to DCACHE_LINE
18850b57cec5SDimitry Andric   ptr = (void *)((((kmp_uintptr_t)alloc_ptr) + sizeof(kmp_mem_descr_t) +
18860b57cec5SDimitry Andric                   DCACHE_LINE) &
18870b57cec5SDimitry Andric                  ~(DCACHE_LINE - 1));
18880b57cec5SDimitry Andric   descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t));
18890b57cec5SDimitry Andric 
18900b57cec5SDimitry Andric   descr->ptr_allocated = alloc_ptr; // remember allocated pointer
18910b57cec5SDimitry Andric   // we don't need size_allocated
18920b57cec5SDimitry Andric   descr->ptr_aligned = (void *)this_thr; // remember allocating thread
18930b57cec5SDimitry Andric   // (it is already saved in bget buffer,
18940b57cec5SDimitry Andric   // but we may want to use another allocator in future)
18950b57cec5SDimitry Andric   descr->size_aligned = size;
18960b57cec5SDimitry Andric 
18970b57cec5SDimitry Andric end:
18980b57cec5SDimitry Andric   KE_TRACE(25, ("<- __kmp_fast_allocate( T#%d ) returns %p\n",
18990b57cec5SDimitry Andric                 __kmp_gtid_from_thread(this_thr), ptr));
19000b57cec5SDimitry Andric   return ptr;
19010b57cec5SDimitry Andric } // func __kmp_fast_allocate
19020b57cec5SDimitry Andric 
19030b57cec5SDimitry Andric // Free fast memory and place it on the thread's free list if it is of
19040b57cec5SDimitry Andric // the correct size.
19050b57cec5SDimitry Andric void ___kmp_fast_free(kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL) {
19060b57cec5SDimitry Andric   kmp_mem_descr_t *descr;
19070b57cec5SDimitry Andric   kmp_info_t *alloc_thr;
19080b57cec5SDimitry Andric   size_t size;
19090b57cec5SDimitry Andric   size_t idx;
19100b57cec5SDimitry Andric   int index;
19110b57cec5SDimitry Andric 
19120b57cec5SDimitry Andric   KE_TRACE(25, ("-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n",
19130b57cec5SDimitry Andric                 __kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM));
19140b57cec5SDimitry Andric   KMP_ASSERT(ptr != NULL);
19150b57cec5SDimitry Andric 
19160b57cec5SDimitry Andric   descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t));
19170b57cec5SDimitry Andric 
19180b57cec5SDimitry Andric   KE_TRACE(26, ("   __kmp_fast_free:     size_aligned=%d\n",
19190b57cec5SDimitry Andric                 (int)descr->size_aligned));
19200b57cec5SDimitry Andric 
19210b57cec5SDimitry Andric   size = descr->size_aligned; // 2, 4, 16, 64, 65, 66, ... cache lines
19220b57cec5SDimitry Andric 
19230b57cec5SDimitry Andric   idx = DCACHE_LINE * 2; // 2 cache lines is minimal size of block
19240b57cec5SDimitry Andric   if (idx == size) {
19250b57cec5SDimitry Andric     index = 0; // 2 cache lines
19260b57cec5SDimitry Andric   } else if ((idx <<= 1) == size) {
19270b57cec5SDimitry Andric     index = 1; // 4 cache lines
19280b57cec5SDimitry Andric   } else if ((idx <<= 2) == size) {
19290b57cec5SDimitry Andric     index = 2; // 16 cache lines
19300b57cec5SDimitry Andric   } else if ((idx <<= 2) == size) {
19310b57cec5SDimitry Andric     index = 3; // 64 cache lines
19320b57cec5SDimitry Andric   } else {
19330b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(size > DCACHE_LINE * 64);
19340b57cec5SDimitry Andric     goto free_call; // 65 or more cache lines ( > 8KB )
19350b57cec5SDimitry Andric   }
19360b57cec5SDimitry Andric 
19370b57cec5SDimitry Andric   alloc_thr = (kmp_info_t *)descr->ptr_aligned; // get thread owning the block
19380b57cec5SDimitry Andric   if (alloc_thr == this_thr) {
19390b57cec5SDimitry Andric     // push block to self no-sync free list, linking previous head (LIFO)
19400b57cec5SDimitry Andric     *((void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self;
19410b57cec5SDimitry Andric     this_thr->th.th_free_lists[index].th_free_list_self = ptr;
19420b57cec5SDimitry Andric   } else {
19430b57cec5SDimitry Andric     void *head = this_thr->th.th_free_lists[index].th_free_list_other;
19440b57cec5SDimitry Andric     if (head == NULL) {
19450b57cec5SDimitry Andric       // Create new free list
19460b57cec5SDimitry Andric       this_thr->th.th_free_lists[index].th_free_list_other = ptr;
19470b57cec5SDimitry Andric       *((void **)ptr) = NULL; // mark the tail of the list
19480b57cec5SDimitry Andric       descr->size_allocated = (size_t)1; // head of the list keeps its length
19490b57cec5SDimitry Andric     } else {
19500b57cec5SDimitry Andric       // need to check existed "other" list's owner thread and size of queue
19510b57cec5SDimitry Andric       kmp_mem_descr_t *dsc =
19520b57cec5SDimitry Andric           (kmp_mem_descr_t *)((char *)head - sizeof(kmp_mem_descr_t));
19530b57cec5SDimitry Andric       // allocating thread, same for all queue nodes
19540b57cec5SDimitry Andric       kmp_info_t *q_th = (kmp_info_t *)(dsc->ptr_aligned);
19550b57cec5SDimitry Andric       size_t q_sz =
19560b57cec5SDimitry Andric           dsc->size_allocated + 1; // new size in case we add current task
19570b57cec5SDimitry Andric       if (q_th == alloc_thr && q_sz <= KMP_FREE_LIST_LIMIT) {
19580b57cec5SDimitry Andric         // we can add current task to "other" list, no sync needed
19590b57cec5SDimitry Andric         *((void **)ptr) = head;
19600b57cec5SDimitry Andric         descr->size_allocated = q_sz;
19610b57cec5SDimitry Andric         this_thr->th.th_free_lists[index].th_free_list_other = ptr;
19620b57cec5SDimitry Andric       } else {
19630b57cec5SDimitry Andric         // either queue blocks owner is changing or size limit exceeded
19640b57cec5SDimitry Andric         // return old queue to allocating thread (q_th) synchroneously,
19650b57cec5SDimitry Andric         // and start new list for alloc_thr's tasks
19660b57cec5SDimitry Andric         void *old_ptr;
19670b57cec5SDimitry Andric         void *tail = head;
19680b57cec5SDimitry Andric         void *next = *((void **)head);
19690b57cec5SDimitry Andric         while (next != NULL) {
19700b57cec5SDimitry Andric           KMP_DEBUG_ASSERT(
19710b57cec5SDimitry Andric               // queue size should decrease by 1 each step through the list
19720b57cec5SDimitry Andric               ((kmp_mem_descr_t *)((char *)next - sizeof(kmp_mem_descr_t)))
19730b57cec5SDimitry Andric                       ->size_allocated +
19740b57cec5SDimitry Andric                   1 ==
19750b57cec5SDimitry Andric               ((kmp_mem_descr_t *)((char *)tail - sizeof(kmp_mem_descr_t)))
19760b57cec5SDimitry Andric                   ->size_allocated);
19770b57cec5SDimitry Andric           tail = next; // remember tail node
19780b57cec5SDimitry Andric           next = *((void **)next);
19790b57cec5SDimitry Andric         }
19800b57cec5SDimitry Andric         KMP_DEBUG_ASSERT(q_th != NULL);
19810b57cec5SDimitry Andric         // push block to owner's sync free list
19820b57cec5SDimitry Andric         old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
19830b57cec5SDimitry Andric         /* the next pointer must be set before setting free_list to ptr to avoid
19840b57cec5SDimitry Andric            exposing a broken list to other threads, even for an instant. */
19850b57cec5SDimitry Andric         *((void **)tail) = old_ptr;
19860b57cec5SDimitry Andric 
19870b57cec5SDimitry Andric         while (!KMP_COMPARE_AND_STORE_PTR(
19880b57cec5SDimitry Andric             &q_th->th.th_free_lists[index].th_free_list_sync, old_ptr, head)) {
19890b57cec5SDimitry Andric           KMP_CPU_PAUSE();
19900b57cec5SDimitry Andric           old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
19910b57cec5SDimitry Andric           *((void **)tail) = old_ptr;
19920b57cec5SDimitry Andric         }
19930b57cec5SDimitry Andric 
19940b57cec5SDimitry Andric         // start new list of not-selt tasks
19950b57cec5SDimitry Andric         this_thr->th.th_free_lists[index].th_free_list_other = ptr;
19960b57cec5SDimitry Andric         *((void **)ptr) = NULL;
19970b57cec5SDimitry Andric         descr->size_allocated = (size_t)1; // head of queue keeps its length
19980b57cec5SDimitry Andric       }
19990b57cec5SDimitry Andric     }
20000b57cec5SDimitry Andric   }
20010b57cec5SDimitry Andric   goto end;
20020b57cec5SDimitry Andric 
20030b57cec5SDimitry Andric free_call:
20040b57cec5SDimitry Andric   KE_TRACE(25, ("__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n",
20050b57cec5SDimitry Andric                 __kmp_gtid_from_thread(this_thr), size));
20060b57cec5SDimitry Andric   __kmp_bget_dequeue(this_thr); /* Release any queued buffers */
20070b57cec5SDimitry Andric   brel(this_thr, descr->ptr_allocated);
20080b57cec5SDimitry Andric 
20090b57cec5SDimitry Andric end:
20100b57cec5SDimitry Andric   KE_TRACE(25, ("<- __kmp_fast_free() returns\n"));
20110b57cec5SDimitry Andric 
20120b57cec5SDimitry Andric } // func __kmp_fast_free
20130b57cec5SDimitry Andric 
20140b57cec5SDimitry Andric // Initialize the thread free lists related to fast memory
20150b57cec5SDimitry Andric // Only do this when a thread is initially created.
20160b57cec5SDimitry Andric void __kmp_initialize_fast_memory(kmp_info_t *this_thr) {
20170b57cec5SDimitry Andric   KE_TRACE(10, ("__kmp_initialize_fast_memory: Called from th %p\n", this_thr));
20180b57cec5SDimitry Andric 
20190b57cec5SDimitry Andric   memset(this_thr->th.th_free_lists, 0, NUM_LISTS * sizeof(kmp_free_list_t));
20200b57cec5SDimitry Andric }
20210b57cec5SDimitry Andric 
20220b57cec5SDimitry Andric // Free the memory in the thread free lists related to fast memory
20230b57cec5SDimitry Andric // Only do this when a thread is being reaped (destroyed).
20240b57cec5SDimitry Andric void __kmp_free_fast_memory(kmp_info_t *th) {
20250b57cec5SDimitry Andric   // Suppose we use BGET underlying allocator, walk through its structures...
20260b57cec5SDimitry Andric   int bin;
20270b57cec5SDimitry Andric   thr_data_t *thr = get_thr_data(th);
20280b57cec5SDimitry Andric   void **lst = NULL;
20290b57cec5SDimitry Andric 
20300b57cec5SDimitry Andric   KE_TRACE(
20310b57cec5SDimitry Andric       5, ("__kmp_free_fast_memory: Called T#%d\n", __kmp_gtid_from_thread(th)));
20320b57cec5SDimitry Andric 
20330b57cec5SDimitry Andric   __kmp_bget_dequeue(th); // Release any queued buffers
20340b57cec5SDimitry Andric 
20350b57cec5SDimitry Andric   // Dig through free lists and extract all allocated blocks
20360b57cec5SDimitry Andric   for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
20370b57cec5SDimitry Andric     bfhead_t *b = thr->freelist[bin].ql.flink;
20380b57cec5SDimitry Andric     while (b != &thr->freelist[bin]) {
20390b57cec5SDimitry Andric       if ((kmp_uintptr_t)b->bh.bb.bthr & 1) { // the buffer is allocated address
20400b57cec5SDimitry Andric         *((void **)b) =
20410b57cec5SDimitry Andric             lst; // link the list (override bthr, but keep flink yet)
20420b57cec5SDimitry Andric         lst = (void **)b; // push b into lst
20430b57cec5SDimitry Andric       }
20440b57cec5SDimitry Andric       b = b->ql.flink; // get next buffer
20450b57cec5SDimitry Andric     }
20460b57cec5SDimitry Andric   }
20470b57cec5SDimitry Andric   while (lst != NULL) {
20480b57cec5SDimitry Andric     void *next = *lst;
20490b57cec5SDimitry Andric     KE_TRACE(10, ("__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n",
20500b57cec5SDimitry Andric                   lst, next, th, __kmp_gtid_from_thread(th)));
20510b57cec5SDimitry Andric     (*thr->relfcn)(lst);
20520b57cec5SDimitry Andric #if BufStats
20530b57cec5SDimitry Andric     // count blocks to prevent problems in __kmp_finalize_bget()
20540b57cec5SDimitry Andric     thr->numprel++; /* Nr of expansion block releases */
20550b57cec5SDimitry Andric     thr->numpblk--; /* Total number of blocks */
20560b57cec5SDimitry Andric #endif
20570b57cec5SDimitry Andric     lst = (void **)next;
20580b57cec5SDimitry Andric   }
20590b57cec5SDimitry Andric 
20600b57cec5SDimitry Andric   KE_TRACE(
20610b57cec5SDimitry Andric       5, ("__kmp_free_fast_memory: Freed T#%d\n", __kmp_gtid_from_thread(th)));
20620b57cec5SDimitry Andric }
20630b57cec5SDimitry Andric 
20640b57cec5SDimitry Andric #endif // USE_FAST_MEMORY
2065