1 /*
2 * Copyright (c) 2015-2016 Intel Corporation, Inc. All rights reserved.
3 * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34 #ifndef _OFI_MEM_H_
35 #define _OFI_MEM_H_
36
37 #include <config.h>
38
39 #include <assert.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <ofi_list.h>
43 #include <ofi_osd.h>
44
45
46 #ifdef INCLUDE_VALGRIND
47 # include <valgrind/memcheck.h>
48 # ifndef VALGRIND_MAKE_MEM_DEFINED
49 # warning "Valgrind requested, but VALGRIND_MAKE_MEM_DEFINED undefined"
50 # endif
51 #endif
52
53 #ifndef VALGRIND_MAKE_MEM_DEFINED
54 # define VALGRIND_MAKE_MEM_DEFINED(addr, len)
55 #endif
56
57
58 void ofi_mem_init(void);
59 void ofi_mem_fini(void);
60
61 enum {
62 OFI_PAGE_SIZE,
63 OFI_DEF_HUGEPAGE_SIZE,
64 };
65
66 extern size_t *page_sizes;
67 extern size_t num_page_sizes;
68
ofi_get_page_size()69 static inline long ofi_get_page_size()
70 {
71 return ofi_sysconf(_SC_PAGESIZE);
72 }
73 ssize_t ofi_get_hugepage_size(void);
74
75 size_t ofi_get_mem_size(void);
76
77
78 /* We implement memdup to avoid external library dependency */
mem_dup(const void * src,size_t size)79 static inline void *mem_dup(const void *src, size_t size)
80 {
81 void *dest = malloc(size);
82
83 if (dest)
84 memcpy(dest, src, size);
85 return dest;
86 }
87
ofi_str_dup(const char * src,char ** dst)88 static inline int ofi_str_dup(const char *src, char **dst)
89 {
90 if (src) {
91 *dst = strdup(src);
92 if (!*dst)
93 return -FI_ENOMEM;
94 } else {
95 *dst = NULL;
96 }
97 return 0;
98 }
99
100 /*
101 * Buffer pool (free stack) template
102 */
103 #define FREESTACK_EMPTY NULL
104
105 #define freestack_get_next(user_buf) ((char *)user_buf - sizeof(void *))
106 #define freestack_get_user_buf(entry) ((char *)entry + sizeof(void *))
107
108 #if ENABLE_DEBUG
109 #define freestack_init_next(entry) *((void **)entry) = NULL
110 #define freestack_check_next(entry) assert(*((void **)entry) == NULL)
111 #else
112 #define freestack_init_next(entry)
113 #define freestack_check_next(entry)
114 #endif
115
116 #define FREESTACK_HEADER \
117 size_t size; \
118 void *next; \
119
120 #define freestack_isempty(fs) ((fs)->next == FREESTACK_EMPTY)
121 #define freestack_push(fs, p) \
122 do { \
123 freestack_check_next(freestack_get_next(p)); \
124 *(void **) (freestack_get_next(p)) = (fs)->next; \
125 (fs)->next = (freestack_get_next(p)); \
126 } while (0)
127 #define freestack_pop(fs) freestack_pop_impl(fs, (fs)->next)
128
freestack_pop_impl(void * fs,void * fs_next)129 static inline void* freestack_pop_impl(void *fs, void *fs_next)
130 {
131 struct _freestack {
132 FREESTACK_HEADER
133 } *freestack = (struct _freestack *)fs;
134 assert(!freestack_isempty(freestack));
135 freestack->next = *((void **)fs_next);
136 freestack_init_next(fs_next);
137 return freestack_get_user_buf(fs_next);
138 }
139
140 #define DECLARE_FREESTACK(entrytype, name) \
141 struct name ## _entry { \
142 void *next; \
143 entrytype buf; \
144 }; \
145 struct name { \
146 FREESTACK_HEADER \
147 struct name ## _entry entry[]; \
148 }; \
149 \
150 typedef void (*name ## _entry_init_func)(entrytype *buf, \
151 void *arg); \
152 \
153 static inline void \
154 name ## _init(struct name *fs, size_t size, \
155 name ## _entry_init_func init, void *arg) \
156 { \
157 ssize_t i; \
158 assert(size == roundup_power_of_two(size)); \
159 assert(sizeof(fs->entry[0].buf) >= sizeof(void *)); \
160 fs->size = size; \
161 fs->next = FREESTACK_EMPTY; \
162 for (i = size - 1; i >= 0; i--) { \
163 if (init) \
164 init(&fs->entry[i].buf, arg); \
165 freestack_push(fs, &fs->entry[i].buf); \
166 } \
167 } \
168 \
169 static inline struct name * \
170 name ## _create(size_t size, name ## _entry_init_func init, \
171 void *arg) \
172 { \
173 struct name *fs; \
174 fs = (struct name*) calloc(1, sizeof(*fs) + \
175 sizeof(struct name ## _entry) * \
176 (roundup_power_of_two(size))); \
177 if (fs) \
178 name ##_init(fs, roundup_power_of_two(size), \
179 init, arg); \
180 return fs; \
181 } \
182 \
183 static inline int name ## _index(struct name *fs, \
184 entrytype *entry) \
185 { \
186 return (int)((struct name ## _entry *) \
187 (freestack_get_next(entry)) \
188 - (struct name ## _entry *)fs->entry); \
189 } \
190 \
191 static inline void name ## _free(struct name *fs) \
192 { \
193 free(fs); \
194 }
195
196 /*
197 * Buffer pool (free stack) template for shared memory regions
198 */
199 #define SMR_FREESTACK_EMPTY NULL
200
201 #define SMR_FREESTACK_HEADER \
202 void *base_addr; \
203 size_t size; \
204 void *next; \
205
206 #define smr_freestack_isempty(fs) ((fs)->next == SMR_FREESTACK_EMPTY)
207 #define smr_freestack_push(fs, local_p) \
208 do { \
209 void *p = (char **) fs->base_addr + \
210 ((char **) freestack_get_next(local_p) - \
211 (char **) fs); \
212 *(void **) freestack_get_next(local_p) = (fs)->next; \
213 (fs)->next = p; \
214 } while (0)
215 #define smr_freestack_pop(fs) smr_freestack_pop_impl(fs, fs->next)
216
smr_freestack_pop_impl(void * fs,void * next)217 static inline void* smr_freestack_pop_impl(void *fs, void *next)
218 {
219 void *local;
220
221 struct _freestack {
222 SMR_FREESTACK_HEADER
223 } *freestack = (struct _freestack*) fs;
224 assert(next != NULL);
225
226 local = (char **) fs + ((char **) next -
227 (char **) freestack->base_addr);
228
229 freestack->next = *((void **)local);
230 freestack_init_next(local);
231
232 return freestack_get_user_buf(local);
233 }
234
235 #define DECLARE_SMR_FREESTACK(entrytype, name) \
236 struct name ## _entry { \
237 void *next; \
238 entrytype buf; \
239 }; \
240 struct name { \
241 SMR_FREESTACK_HEADER \
242 struct name ## _entry entry[]; \
243 }; \
244 \
245 static inline void name ## _init(struct name *fs, size_t size) \
246 { \
247 ssize_t i; \
248 assert(size == roundup_power_of_two(size)); \
249 assert(sizeof(fs->entry[0].buf) >= sizeof(void *)); \
250 fs->size = size; \
251 fs->next = SMR_FREESTACK_EMPTY; \
252 fs->base_addr = fs; \
253 for (i = size - 1; i >= 0; i--) \
254 smr_freestack_push(fs, &fs->entry[i].buf); \
255 } \
256 \
257 static inline struct name * name ## _create(size_t size) \
258 { \
259 struct name *fs; \
260 fs = (struct name*) calloc(1, sizeof(*fs) + sizeof(entrytype) * \
261 (roundup_power_of_two(size))); \
262 if (fs) \
263 name ##_init(fs, roundup_power_of_two(size)); \
264 return fs; \
265 } \
266 \
267 static inline int name ## _index(struct name *fs, \
268 entrytype *entry) \
269 { \
270 return (int)((struct name ## _entry *) \
271 (freestack_get_next(entry)) \
272 - (struct name ## _entry *)fs->entry); \
273 } \
274 \
275 static inline void name ## _free(struct name *fs) \
276 { \
277 free(fs); \
278 }
279
280
281 /*
282 * Buffer Pool
283 */
284
285 enum {
286 OFI_BUFPOOL_INDEXED = 1 << 1,
287 OFI_BUFPOOL_NO_TRACK = 1 << 2,
288 OFI_BUFPOOL_HUGEPAGES = 1 << 3,
289 };
290
291 struct ofi_bufpool_region;
292
293 struct ofi_bufpool_attr {
294 size_t size;
295 size_t alignment;
296 size_t max_cnt;
297 size_t chunk_cnt;
298 int (*alloc_fn)(struct ofi_bufpool_region *region);
299 void (*free_fn)(struct ofi_bufpool_region *region);
300 void (*init_fn)(struct ofi_bufpool_region *region, void *buf);
301 void *context;
302 int flags;
303 };
304
305 struct ofi_bufpool {
306 union {
307 struct slist entries;
308 struct dlist_entry regions;
309 } free_list;
310
311 size_t entry_size;
312 size_t entry_cnt;
313
314 struct ofi_bufpool_region **region_table;
315 size_t region_cnt;
316 size_t alloc_size;
317 size_t region_size;
318 struct ofi_bufpool_attr attr;
319 };
320
321 struct ofi_bufpool_region {
322 struct dlist_entry entry;
323 struct dlist_entry free_list;
324 char *alloc_region;
325 char *mem_region;
326 size_t index;
327 void *context;
328 struct ofi_bufpool *pool;
329 #ifndef NDEBUG
330 size_t use_cnt;
331 #endif
332 };
333
334 struct ofi_bufpool_hdr {
335 union {
336 struct slist_entry slist;
337 struct dlist_entry dlist;
338 } entry;
339 struct ofi_bufpool_region *region;
340 size_t index;
341 };
342
343 int ofi_bufpool_create_attr(struct ofi_bufpool_attr *attr,
344 struct ofi_bufpool **buf_pool);
345
346 static inline int
ofi_bufpool_create(struct ofi_bufpool ** buf_pool,size_t size,size_t alignment,size_t max_cnt,size_t chunk_cnt,int flags)347 ofi_bufpool_create(struct ofi_bufpool **buf_pool,
348 size_t size, size_t alignment,
349 size_t max_cnt, size_t chunk_cnt, int flags)
350 {
351 struct ofi_bufpool_attr attr = {
352 .size = size,
353 .alignment = alignment,
354 .max_cnt = max_cnt,
355 .chunk_cnt = chunk_cnt,
356 .flags = flags,
357 };
358 return ofi_bufpool_create_attr(&attr, buf_pool);
359 }
360
361 void ofi_bufpool_destroy(struct ofi_bufpool *pool);
362
363 int ofi_bufpool_grow(struct ofi_bufpool *pool);
364
ofi_buf_hdr(void * buf)365 static inline struct ofi_bufpool_hdr *ofi_buf_hdr(void *buf)
366 {
367 return (struct ofi_bufpool_hdr *)
368 ((char *) buf - sizeof(struct ofi_bufpool_hdr));
369 }
370
ofi_buf_data(struct ofi_bufpool_hdr * buf_hdr)371 static inline void *ofi_buf_data(struct ofi_bufpool_hdr *buf_hdr)
372 {
373 return buf_hdr + 1;
374 }
375
ofi_buf_region(void * buf)376 static inline struct ofi_bufpool_region *ofi_buf_region(void *buf)
377 {
378 assert(ofi_buf_hdr(buf)->region);
379 return ofi_buf_hdr(buf)->region;
380 }
381
ofi_buf_pool(void * buf)382 static inline struct ofi_bufpool *ofi_buf_pool(void *buf)
383 {
384 assert(ofi_buf_region(buf)->pool);
385 return ofi_buf_region(buf)->pool;
386 }
387
ofi_buf_free(void * buf)388 static inline void ofi_buf_free(void *buf)
389 {
390 assert(ofi_buf_region(buf)->use_cnt--);
391 assert(!(ofi_buf_pool(buf)->attr.flags & OFI_BUFPOOL_INDEXED));
392 slist_insert_head(&ofi_buf_hdr(buf)->entry.slist,
393 &ofi_buf_pool(buf)->free_list.entries);
394 }
395
396 int ofi_ibuf_is_lower(struct dlist_entry *item, const void *arg);
397 int ofi_ibufpool_region_is_lower(struct dlist_entry *item, const void *arg);
398
ofi_ibuf_free(void * buf)399 static inline void ofi_ibuf_free(void *buf)
400 {
401 struct ofi_bufpool_hdr *buf_hdr;
402
403 assert(ofi_buf_pool(buf)->attr.flags & OFI_BUFPOOL_INDEXED);
404 assert(ofi_buf_region(buf)->use_cnt--);
405 buf_hdr = ofi_buf_hdr(buf);
406
407 dlist_insert_order(&buf_hdr->region->free_list,
408 ofi_ibuf_is_lower, &buf_hdr->entry.dlist);
409
410 if (dlist_empty(&buf_hdr->region->entry)) {
411 dlist_insert_order(&buf_hdr->region->pool->free_list.regions,
412 ofi_ibufpool_region_is_lower,
413 &buf_hdr->region->entry);
414 }
415 }
416
ofi_buf_index(void * buf)417 static inline size_t ofi_buf_index(void *buf)
418 {
419 return ofi_buf_hdr(buf)->index;
420 }
421
ofi_bufpool_get_ibuf(struct ofi_bufpool * pool,size_t index)422 static inline void *ofi_bufpool_get_ibuf(struct ofi_bufpool *pool, size_t index)
423 {
424 void *buf;
425
426 buf = pool->region_table[(size_t)(index / pool->attr.chunk_cnt)]->
427 mem_region + (index % pool->attr.chunk_cnt) * pool->entry_size;
428
429 assert(ofi_buf_region(buf)->use_cnt);
430 return buf;
431 }
432
ofi_bufpool_empty(struct ofi_bufpool * pool)433 static inline int ofi_bufpool_empty(struct ofi_bufpool *pool)
434 {
435 return slist_empty(&pool->free_list.entries);
436 }
437
ofi_ibufpool_empty(struct ofi_bufpool * pool)438 static inline int ofi_ibufpool_empty(struct ofi_bufpool *pool)
439 {
440 return dlist_empty(&pool->free_list.regions);
441 }
442
ofi_buf_alloc(struct ofi_bufpool * pool)443 static inline void *ofi_buf_alloc(struct ofi_bufpool *pool)
444 {
445 struct ofi_bufpool_hdr *buf_hdr;
446
447 assert(!(pool->attr.flags & OFI_BUFPOOL_INDEXED));
448 if (OFI_UNLIKELY(ofi_bufpool_empty(pool))) {
449 if (ofi_bufpool_grow(pool))
450 return NULL;
451 }
452
453 slist_remove_head_container(&pool->free_list.entries,
454 struct ofi_bufpool_hdr, buf_hdr, entry.slist);
455 assert(++buf_hdr->region->use_cnt);
456 return ofi_buf_data(buf_hdr);
457 }
458
ofi_buf_alloc_ex(struct ofi_bufpool * pool,void ** context)459 static inline void *ofi_buf_alloc_ex(struct ofi_bufpool *pool,
460 void **context)
461 {
462 void *buf = ofi_buf_alloc(pool);
463
464 assert(context);
465 if (OFI_UNLIKELY(!buf))
466 return NULL;
467
468 *context = ofi_buf_region(buf)->context;
469 return buf;
470 }
471
ofi_ibuf_alloc(struct ofi_bufpool * pool)472 static inline void *ofi_ibuf_alloc(struct ofi_bufpool *pool)
473 {
474 struct ofi_bufpool_hdr *buf_hdr;
475 struct ofi_bufpool_region *buf_region;
476
477 assert(pool->attr.flags & OFI_BUFPOOL_INDEXED);
478 if (OFI_UNLIKELY(ofi_ibufpool_empty(pool))) {
479 if (ofi_bufpool_grow(pool))
480 return NULL;
481 }
482
483 buf_region = container_of(pool->free_list.regions.next,
484 struct ofi_bufpool_region, entry);
485 dlist_pop_front(&buf_region->free_list, struct ofi_bufpool_hdr,
486 buf_hdr, entry.dlist);
487 assert(++buf_hdr->region->use_cnt);
488
489 if (dlist_empty(&buf_region->free_list))
490 dlist_remove_init(&buf_region->entry);
491 return ofi_buf_data(buf_hdr);
492 }
493
494
495 /*
496 * Persistent memory support
497 */
498 void ofi_pmem_init(void);
499
500 extern uint64_t OFI_RMA_PMEM;
501 extern void (*ofi_pmem_commit)(const void *addr, size_t len);
502
503
504 #endif /* _OFI_MEM_H_ */
505