1 #define PY_SSIZE_T_CLEAN
2 #include <Python.h>
3 #include "structmember.h"
4 
5 #include <pymem.h>
6 /* public api in 3.7 */
7 #if PY_VERSION_HEX < 0x03070000
8 #define PyTraceMalloc_Track _PyTraceMalloc_Track
9 #define PyTraceMalloc_Untrack _PyTraceMalloc_Untrack
10 #endif
11 
12 #define NPY_NO_DEPRECATED_API NPY_API_VERSION
13 #define _MULTIARRAYMODULE
14 #include <numpy/ndarraytypes.h>
15 #include "numpy/arrayobject.h"
16 #include <numpy/npy_common.h>
17 #include "npy_config.h"
18 #include "alloc.h"
19 
20 
21 #include <assert.h>
22 
23 #ifdef NPY_OS_LINUX
24 #include <sys/mman.h>
25 #ifndef MADV_HUGEPAGE
26 /*
27  * Use code 14 (MADV_HUGEPAGE) if it isn't defined. This gives a chance of
28  * enabling huge pages even if built with linux kernel < 2.6.38
29  */
30 #define MADV_HUGEPAGE 14
31 #endif
32 #endif
33 
34 #define NBUCKETS 1024 /* number of buckets for data*/
35 #define NBUCKETS_DIM 16 /* number of buckets for dimensions/strides */
36 #define NCACHE 7 /* number of cache entries per bucket */
37 /* this structure fits neatly into a cacheline */
38 typedef struct {
39     npy_uintp available; /* number of cached pointers */
40     void * ptrs[NCACHE];
41 } cache_bucket;
42 static cache_bucket datacache[NBUCKETS];
43 static cache_bucket dimcache[NBUCKETS_DIM];
44 
45 static int _madvise_hugepage = 1;
46 
47 
48 /*
49  * This function enables or disables the use of `MADV_HUGEPAGE` on Linux
50  * by modifying the global static `_madvise_hugepage`.
51  * It returns the previous value of `_madvise_hugepage`.
52  *
53  * It is exposed to Python as `np.core.multiarray._set_madvise_hugepage`.
54  */
55 NPY_NO_EXPORT PyObject *
_set_madvise_hugepage(PyObject * NPY_UNUSED (self),PyObject * enabled_obj)56 _set_madvise_hugepage(PyObject *NPY_UNUSED(self), PyObject *enabled_obj)
57 {
58     int was_enabled = _madvise_hugepage;
59     int enabled = PyObject_IsTrue(enabled_obj);
60     if (enabled < 0) {
61         return NULL;
62     }
63     _madvise_hugepage = enabled;
64     if (was_enabled) {
65         Py_RETURN_TRUE;
66     }
67     Py_RETURN_FALSE;
68 }
69 
70 
71 /* as the cache is managed in global variables verify the GIL is held */
72 
73 /*
74  * very simplistic small memory block cache to avoid more expensive libc
75  * allocations
76  * base function for data cache with 1 byte buckets and dimension cache with
77  * sizeof(npy_intp) byte buckets
78  */
79 static NPY_INLINE void *
_npy_alloc_cache(npy_uintp nelem,npy_uintp esz,npy_uint msz,cache_bucket * cache,void * (* alloc)(size_t))80 _npy_alloc_cache(npy_uintp nelem, npy_uintp esz, npy_uint msz,
81                  cache_bucket * cache, void * (*alloc)(size_t))
82 {
83     void * p;
84     assert((esz == 1 && cache == datacache) ||
85            (esz == sizeof(npy_intp) && cache == dimcache));
86     assert(PyGILState_Check());
87     if (nelem < msz) {
88         if (cache[nelem].available > 0) {
89             return cache[nelem].ptrs[--(cache[nelem].available)];
90         }
91     }
92     p = alloc(nelem * esz);
93     if (p) {
94 #ifdef _PyPyGC_AddMemoryPressure
95         _PyPyPyGC_AddMemoryPressure(nelem * esz);
96 #endif
97 #ifdef NPY_OS_LINUX
98         /* allow kernel allocating huge pages for large arrays */
99         if (NPY_UNLIKELY(nelem * esz >= ((1u<<22u))) && _madvise_hugepage) {
100             npy_uintp offset = 4096u - (npy_uintp)p % (4096u);
101             npy_uintp length = nelem * esz - offset;
102             /**
103              * Intentionally not checking for errors that may be returned by
104              * older kernel versions; optimistically tries enabling huge pages.
105              */
106             madvise((void*)((npy_uintp)p + offset), length, MADV_HUGEPAGE);
107         }
108 #endif
109     }
110     return p;
111 }
112 
113 /*
114  * return pointer p to cache, nelem is number of elements of the cache bucket
115  * size (1 or sizeof(npy_intp)) of the block pointed too
116  */
117 static NPY_INLINE void
_npy_free_cache(void * p,npy_uintp nelem,npy_uint msz,cache_bucket * cache,void (* dealloc)(void *))118 _npy_free_cache(void * p, npy_uintp nelem, npy_uint msz,
119                 cache_bucket * cache, void (*dealloc)(void *))
120 {
121     assert(PyGILState_Check());
122     if (p != NULL && nelem < msz) {
123         if (cache[nelem].available < NCACHE) {
124             cache[nelem].ptrs[cache[nelem].available++] = p;
125             return;
126         }
127     }
128     dealloc(p);
129 }
130 
131 
132 /*
133  * array data cache, sz is number of bytes to allocate
134  */
135 NPY_NO_EXPORT void *
npy_alloc_cache(npy_uintp sz)136 npy_alloc_cache(npy_uintp sz)
137 {
138     return _npy_alloc_cache(sz, 1, NBUCKETS, datacache, &PyDataMem_NEW);
139 }
140 
141 /* zero initialized data, sz is number of bytes to allocate */
142 NPY_NO_EXPORT void *
npy_alloc_cache_zero(npy_uintp sz)143 npy_alloc_cache_zero(npy_uintp sz)
144 {
145     void * p;
146     NPY_BEGIN_THREADS_DEF;
147     if (sz < NBUCKETS) {
148         p = _npy_alloc_cache(sz, 1, NBUCKETS, datacache, &PyDataMem_NEW);
149         if (p) {
150             memset(p, 0, sz);
151         }
152         return p;
153     }
154     NPY_BEGIN_THREADS;
155     p = PyDataMem_NEW_ZEROED(sz, 1);
156     NPY_END_THREADS;
157     return p;
158 }
159 
160 NPY_NO_EXPORT void
npy_free_cache(void * p,npy_uintp sz)161 npy_free_cache(void * p, npy_uintp sz)
162 {
163     _npy_free_cache(p, sz, NBUCKETS, datacache, &PyDataMem_FREE);
164 }
165 
166 /*
167  * dimension/stride cache, uses a different allocator and is always a multiple
168  * of npy_intp
169  */
170 NPY_NO_EXPORT void *
npy_alloc_cache_dim(npy_uintp sz)171 npy_alloc_cache_dim(npy_uintp sz)
172 {
173     /*
174      * make sure any temporary allocation can be used for array metadata which
175      * uses one memory block for both dimensions and strides
176      */
177     if (sz < 2) {
178         sz = 2;
179     }
180     return _npy_alloc_cache(sz, sizeof(npy_intp), NBUCKETS_DIM, dimcache,
181                             &PyArray_malloc);
182 }
183 
184 NPY_NO_EXPORT void
npy_free_cache_dim(void * p,npy_uintp sz)185 npy_free_cache_dim(void * p, npy_uintp sz)
186 {
187     /* see npy_alloc_cache_dim */
188     if (sz < 2) {
189         sz = 2;
190     }
191     _npy_free_cache(p, sz, NBUCKETS_DIM, dimcache,
192                     &PyArray_free);
193 }
194 
195 
196 /* malloc/free/realloc hook */
197 NPY_NO_EXPORT PyDataMem_EventHookFunc *_PyDataMem_eventhook;
198 NPY_NO_EXPORT void *_PyDataMem_eventhook_user_data;
199 
200 /*NUMPY_API
201  * Sets the allocation event hook for numpy array data.
202  * Takes a PyDataMem_EventHookFunc *, which has the signature:
203  *        void hook(void *old, void *new, size_t size, void *user_data).
204  *   Also takes a void *user_data, and void **old_data.
205  *
206  * Returns a pointer to the previous hook or NULL.  If old_data is
207  * non-NULL, the previous user_data pointer will be copied to it.
208  *
209  * If not NULL, hook will be called at the end of each PyDataMem_NEW/FREE/RENEW:
210  *   result = PyDataMem_NEW(size)        -> (*hook)(NULL, result, size, user_data)
211  *   PyDataMem_FREE(ptr)                 -> (*hook)(ptr, NULL, 0, user_data)
212  *   result = PyDataMem_RENEW(ptr, size) -> (*hook)(ptr, result, size, user_data)
213  *
214  * When the hook is called, the GIL will be held by the calling
215  * thread.  The hook should be written to be reentrant, if it performs
216  * operations that might cause new allocation events (such as the
217  * creation/destruction numpy objects, or creating/destroying Python
218  * objects which might cause a gc)
219  */
220 NPY_NO_EXPORT PyDataMem_EventHookFunc *
PyDataMem_SetEventHook(PyDataMem_EventHookFunc * newhook,void * user_data,void ** old_data)221 PyDataMem_SetEventHook(PyDataMem_EventHookFunc *newhook,
222                        void *user_data, void **old_data)
223 {
224     PyDataMem_EventHookFunc *temp;
225     NPY_ALLOW_C_API_DEF
226     NPY_ALLOW_C_API
227     temp = _PyDataMem_eventhook;
228     _PyDataMem_eventhook = newhook;
229     if (old_data != NULL) {
230         *old_data = _PyDataMem_eventhook_user_data;
231     }
232     _PyDataMem_eventhook_user_data = user_data;
233     NPY_DISABLE_C_API
234     return temp;
235 }
236 
237 /*NUMPY_API
238  * Allocates memory for array data.
239  */
240 NPY_NO_EXPORT void *
PyDataMem_NEW(size_t size)241 PyDataMem_NEW(size_t size)
242 {
243     void *result;
244 
245     assert(size != 0);
246     result = malloc(size);
247     if (_PyDataMem_eventhook != NULL) {
248         NPY_ALLOW_C_API_DEF
249         NPY_ALLOW_C_API
250         if (_PyDataMem_eventhook != NULL) {
251             (*_PyDataMem_eventhook)(NULL, result, size,
252                                     _PyDataMem_eventhook_user_data);
253         }
254         NPY_DISABLE_C_API
255     }
256     PyTraceMalloc_Track(NPY_TRACE_DOMAIN, (npy_uintp)result, size);
257     return result;
258 }
259 
260 /*NUMPY_API
261  * Allocates zeroed memory for array data.
262  */
263 NPY_NO_EXPORT void *
PyDataMem_NEW_ZEROED(size_t size,size_t elsize)264 PyDataMem_NEW_ZEROED(size_t size, size_t elsize)
265 {
266     void *result;
267 
268     result = calloc(size, elsize);
269     if (_PyDataMem_eventhook != NULL) {
270         NPY_ALLOW_C_API_DEF
271         NPY_ALLOW_C_API
272         if (_PyDataMem_eventhook != NULL) {
273             (*_PyDataMem_eventhook)(NULL, result, size * elsize,
274                                     _PyDataMem_eventhook_user_data);
275         }
276         NPY_DISABLE_C_API
277     }
278     PyTraceMalloc_Track(NPY_TRACE_DOMAIN, (npy_uintp)result, size);
279     return result;
280 }
281 
282 /*NUMPY_API
283  * Free memory for array data.
284  */
285 NPY_NO_EXPORT void
PyDataMem_FREE(void * ptr)286 PyDataMem_FREE(void *ptr)
287 {
288     PyTraceMalloc_Untrack(NPY_TRACE_DOMAIN, (npy_uintp)ptr);
289     free(ptr);
290     if (_PyDataMem_eventhook != NULL) {
291         NPY_ALLOW_C_API_DEF
292         NPY_ALLOW_C_API
293         if (_PyDataMem_eventhook != NULL) {
294             (*_PyDataMem_eventhook)(ptr, NULL, 0,
295                                     _PyDataMem_eventhook_user_data);
296         }
297         NPY_DISABLE_C_API
298     }
299 }
300 
301 /*NUMPY_API
302  * Reallocate/resize memory for array data.
303  */
304 NPY_NO_EXPORT void *
PyDataMem_RENEW(void * ptr,size_t size)305 PyDataMem_RENEW(void *ptr, size_t size)
306 {
307     void *result;
308 
309     assert(size != 0);
310     result = realloc(ptr, size);
311     if (result != ptr) {
312         PyTraceMalloc_Untrack(NPY_TRACE_DOMAIN, (npy_uintp)ptr);
313     }
314     PyTraceMalloc_Track(NPY_TRACE_DOMAIN, (npy_uintp)result, size);
315     if (_PyDataMem_eventhook != NULL) {
316         NPY_ALLOW_C_API_DEF
317         NPY_ALLOW_C_API
318         if (_PyDataMem_eventhook != NULL) {
319             (*_PyDataMem_eventhook)(ptr, result, size,
320                                     _PyDataMem_eventhook_user_data);
321         }
322         NPY_DISABLE_C_API
323     }
324     return result;
325 }
326