1 /*
2  * Copyright (c) 2015-2016 Intel Corporation, Inc.  All rights reserved.
3  * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * BSD license below:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      - Redistributions of source code must retain the above
16  *        copyright notice, this list of conditions and the following
17  *        disclaimer.
18  *
19  *      - Redistributions in binary form must reproduce the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer in the documentation and/or other materials
22  *        provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  */
33 
34 #ifndef _OFI_MEM_H_
35 #define _OFI_MEM_H_
36 
37 #include <config.h>
38 
39 #include <assert.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <ofi_list.h>
43 #include <ofi_osd.h>
44 
45 
46 #ifdef INCLUDE_VALGRIND
47 #   include <valgrind/memcheck.h>
48 #   ifndef VALGRIND_MAKE_MEM_DEFINED
49 #      warning "Valgrind requested, but VALGRIND_MAKE_MEM_DEFINED undefined"
50 #   endif
51 #endif
52 
53 #ifndef VALGRIND_MAKE_MEM_DEFINED
54 #   define VALGRIND_MAKE_MEM_DEFINED(addr, len)
55 #endif
56 
57 
58 void ofi_mem_init(void);
59 void ofi_mem_fini(void);
60 
61 enum {
62 	OFI_PAGE_SIZE,
63 	OFI_DEF_HUGEPAGE_SIZE,
64 };
65 
66 extern size_t *page_sizes;
67 extern size_t num_page_sizes;
68 
ofi_get_page_size()69 static inline long ofi_get_page_size()
70 {
71 	return ofi_sysconf(_SC_PAGESIZE);
72 }
73 ssize_t ofi_get_hugepage_size(void);
74 
75 size_t ofi_get_mem_size(void);
76 
77 
78 /* We implement memdup to avoid external library dependency */
mem_dup(const void * src,size_t size)79 static inline void *mem_dup(const void *src, size_t size)
80 {
81 	void *dest = malloc(size);
82 
83 	if (dest)
84 		memcpy(dest, src, size);
85 	return dest;
86 }
87 
ofi_str_dup(const char * src,char ** dst)88 static inline int ofi_str_dup(const char *src, char **dst)
89 {
90 	if (src) {
91 		*dst = strdup(src);
92 		if (!*dst)
93 			return -FI_ENOMEM;
94 	} else {
95 		*dst = NULL;
96 	}
97 	return 0;
98 }
99 
100 /*
101  * Buffer pool (free stack) template
102  */
103 #define FREESTACK_EMPTY	NULL
104 
105 #define freestack_get_next(user_buf)	((char *)user_buf - sizeof(void *))
106 #define freestack_get_user_buf(entry)	((char *)entry + sizeof(void *))
107 
108 #if ENABLE_DEBUG
109 #define freestack_init_next(entry)	*((void **)entry) = NULL
110 #define freestack_check_next(entry)	assert(*((void **)entry) == NULL)
111 #else
112 #define freestack_init_next(entry)
113 #define freestack_check_next(entry)
114 #endif
115 
116 #define FREESTACK_HEADER 					\
117 	size_t		size;					\
118 	void		*next;					\
119 
120 #define freestack_isempty(fs)	((fs)->next == FREESTACK_EMPTY)
121 #define freestack_push(fs, p)					\
122 do {								\
123 	freestack_check_next(freestack_get_next(p));		\
124 	*(void **) (freestack_get_next(p)) = (fs)->next;	\
125 	(fs)->next = (freestack_get_next(p));			\
126 } while (0)
127 #define freestack_pop(fs) freestack_pop_impl(fs, (fs)->next)
128 
freestack_pop_impl(void * fs,void * fs_next)129 static inline void* freestack_pop_impl(void *fs, void *fs_next)
130 {
131 	struct _freestack {
132 		FREESTACK_HEADER
133 	} *freestack = (struct _freestack *)fs;
134 	assert(!freestack_isempty(freestack));
135 	freestack->next = *((void **)fs_next);
136 	freestack_init_next(fs_next);
137 	return freestack_get_user_buf(fs_next);
138 }
139 
140 #define DECLARE_FREESTACK(entrytype, name)			\
141 struct name ## _entry {						\
142 	void		*next;					\
143 	entrytype	buf;					\
144 };								\
145 struct name {							\
146 	FREESTACK_HEADER					\
147 	struct name ## _entry	entry[];			\
148 };								\
149 								\
150 typedef void (*name ## _entry_init_func)(entrytype *buf,	\
151 					 void *arg);		\
152 								\
153 static inline void						\
154 name ## _init(struct name *fs, size_t size,			\
155 	      name ## _entry_init_func init, void *arg)		\
156 {								\
157 	ssize_t i;						\
158 	assert(size == roundup_power_of_two(size));		\
159 	assert(sizeof(fs->entry[0].buf) >= sizeof(void *));	\
160 	fs->size = size;					\
161 	fs->next = FREESTACK_EMPTY;				\
162 	for (i = size - 1; i >= 0; i--) {			\
163 		if (init)					\
164 			init(&fs->entry[i].buf, arg);		\
165 		freestack_push(fs, &fs->entry[i].buf);		\
166 	}							\
167 }								\
168 								\
169 static inline struct name *					\
170 name ## _create(size_t size, name ## _entry_init_func init,	\
171 		void *arg)					\
172 {								\
173 	struct name *fs;					\
174 	fs = (struct name*) calloc(1, sizeof(*fs) +		\
175 		       sizeof(struct name ## _entry) *		\
176 		       (roundup_power_of_two(size)));		\
177 	if (fs)							\
178 		name ##_init(fs, roundup_power_of_two(size),	\
179 			     init, arg);			\
180 	return fs;						\
181 }								\
182 								\
183 static inline int name ## _index(struct name *fs,		\
184 				 entrytype *entry)		\
185 {								\
186 	return (int)((struct name ## _entry *)			\
187 			(freestack_get_next(entry))		\
188 			- (struct name ## _entry *)fs->entry);	\
189 }								\
190 								\
191 static inline void name ## _free(struct name *fs)		\
192 {								\
193 	free(fs);						\
194 }
195 
196 /*
197  * Buffer pool (free stack) template for shared memory regions
198  */
199 #define SMR_FREESTACK_EMPTY	NULL
200 
201 #define SMR_FREESTACK_HEADER 					\
202 	void		*base_addr;				\
203 	size_t		size;					\
204 	void		*next;					\
205 
206 #define smr_freestack_isempty(fs)	((fs)->next == SMR_FREESTACK_EMPTY)
207 #define smr_freestack_push(fs, local_p)				\
208 do {								\
209 	void *p = (char **) fs->base_addr +			\
210 	    ((char **) freestack_get_next(local_p) -		\
211 		(char **) fs);					\
212 	*(void **) freestack_get_next(local_p) = (fs)->next;	\
213 	(fs)->next = p;						\
214 } while (0)
215 #define smr_freestack_pop(fs) smr_freestack_pop_impl(fs, fs->next)
216 
smr_freestack_pop_impl(void * fs,void * next)217 static inline void* smr_freestack_pop_impl(void *fs, void *next)
218 {
219 	void *local;
220 
221 	struct _freestack {
222 		SMR_FREESTACK_HEADER
223 	} *freestack = (struct _freestack*) fs;
224 	assert(next != NULL);
225 
226 	local = (char **) fs + ((char **) next -
227 		(char **) freestack->base_addr);
228 
229 	freestack->next = *((void **)local);
230 	freestack_init_next(local);
231 
232 	return freestack_get_user_buf(local);
233 }
234 
235 #define DECLARE_SMR_FREESTACK(entrytype, name)			\
236 struct name ## _entry {						\
237 	void		*next;					\
238 	entrytype	buf;					\
239 };								\
240 struct name {							\
241 	SMR_FREESTACK_HEADER					\
242 	struct name ## _entry	entry[];			\
243 };								\
244 								\
245 static inline void name ## _init(struct name *fs, size_t size)	\
246 {								\
247 	ssize_t i;						\
248 	assert(size == roundup_power_of_two(size));		\
249 	assert(sizeof(fs->entry[0].buf) >= sizeof(void *));	\
250 	fs->size = size;					\
251 	fs->next = SMR_FREESTACK_EMPTY;				\
252 	fs->base_addr = fs;					\
253 	for (i = size - 1; i >= 0; i--)				\
254 		smr_freestack_push(fs, &fs->entry[i].buf);	\
255 }								\
256 								\
257 static inline struct name * name ## _create(size_t size)	\
258 {								\
259 	struct name *fs;					\
260 	fs = (struct name*) calloc(1, sizeof(*fs) + sizeof(entrytype) *	\
261 		    (roundup_power_of_two(size)));		\
262 	if (fs)							\
263 		name ##_init(fs, roundup_power_of_two(size));	\
264 	return fs;						\
265 }								\
266 								\
267 static inline int name ## _index(struct name *fs,		\
268 		entrytype *entry)				\
269 {								\
270 	return (int)((struct name ## _entry *)			\
271 			(freestack_get_next(entry))		\
272 			- (struct name ## _entry *)fs->entry);	\
273 }								\
274 								\
275 static inline void name ## _free(struct name *fs)		\
276 {								\
277 	free(fs);						\
278 }
279 
280 
281 /*
282  * Buffer Pool
283  */
284 
285 enum {
286 	OFI_BUFPOOL_INDEXED		= 1 << 1,
287 	OFI_BUFPOOL_NO_TRACK		= 1 << 2,
288 	OFI_BUFPOOL_HUGEPAGES		= 1 << 3,
289 };
290 
291 struct ofi_bufpool_region;
292 
293 struct ofi_bufpool_attr {
294 	size_t 		size;
295 	size_t 		alignment;
296 	size_t	 	max_cnt;
297 	size_t 		chunk_cnt;
298 	int		(*alloc_fn)(struct ofi_bufpool_region *region);
299 	void		(*free_fn)(struct ofi_bufpool_region *region);
300 	void		(*init_fn)(struct ofi_bufpool_region *region, void *buf);
301 	void 		*context;
302 	int		flags;
303 };
304 
305 struct ofi_bufpool {
306 	union {
307 		struct slist		entries;
308 		struct dlist_entry	regions;
309 	} free_list;
310 
311 	size_t 				entry_size;
312 	size_t 				entry_cnt;
313 
314 	struct ofi_bufpool_region	**region_table;
315 	size_t				region_cnt;
316 	size_t				alloc_size;
317 	size_t				region_size;
318 	struct ofi_bufpool_attr		attr;
319 };
320 
321 struct ofi_bufpool_region {
322 	struct dlist_entry		entry;
323 	struct dlist_entry 		free_list;
324 	char				*alloc_region;
325 	char 				*mem_region;
326 	size_t				index;
327 	void 				*context;
328 	struct ofi_bufpool 		*pool;
329 #ifndef NDEBUG
330 	size_t 				use_cnt;
331 #endif
332 };
333 
334 struct ofi_bufpool_hdr {
335 	union {
336 		struct slist_entry	slist;
337 		struct dlist_entry	dlist;
338 	} entry;
339 	struct ofi_bufpool_region	*region;
340 	size_t 				index;
341 };
342 
343 int ofi_bufpool_create_attr(struct ofi_bufpool_attr *attr,
344 			    struct ofi_bufpool **buf_pool);
345 
346 static inline int
ofi_bufpool_create(struct ofi_bufpool ** buf_pool,size_t size,size_t alignment,size_t max_cnt,size_t chunk_cnt,int flags)347 ofi_bufpool_create(struct ofi_bufpool **buf_pool,
348 		   size_t size, size_t alignment,
349 		   size_t max_cnt, size_t chunk_cnt, int flags)
350 {
351 	struct ofi_bufpool_attr attr = {
352 		.size		= size,
353 		.alignment 	= alignment,
354 		.max_cnt	= max_cnt,
355 		.chunk_cnt	= chunk_cnt,
356 		.flags		= flags,
357 	};
358 	return ofi_bufpool_create_attr(&attr, buf_pool);
359 }
360 
361 void ofi_bufpool_destroy(struct ofi_bufpool *pool);
362 
363 int ofi_bufpool_grow(struct ofi_bufpool *pool);
364 
ofi_buf_hdr(void * buf)365 static inline struct ofi_bufpool_hdr *ofi_buf_hdr(void *buf)
366 {
367 	return (struct ofi_bufpool_hdr *)
368 		((char *) buf - sizeof(struct ofi_bufpool_hdr));
369 }
370 
ofi_buf_data(struct ofi_bufpool_hdr * buf_hdr)371 static inline void *ofi_buf_data(struct ofi_bufpool_hdr *buf_hdr)
372 {
373 	return buf_hdr + 1;
374 }
375 
ofi_buf_region(void * buf)376 static inline struct ofi_bufpool_region *ofi_buf_region(void *buf)
377 {
378 	assert(ofi_buf_hdr(buf)->region);
379 	return ofi_buf_hdr(buf)->region;
380 }
381 
ofi_buf_pool(void * buf)382 static inline struct ofi_bufpool *ofi_buf_pool(void *buf)
383 {
384 	assert(ofi_buf_region(buf)->pool);
385 	return ofi_buf_region(buf)->pool;
386 }
387 
ofi_buf_free(void * buf)388 static inline void ofi_buf_free(void *buf)
389 {
390 	assert(ofi_buf_region(buf)->use_cnt--);
391 	assert(!(ofi_buf_pool(buf)->attr.flags & OFI_BUFPOOL_INDEXED));
392 	slist_insert_head(&ofi_buf_hdr(buf)->entry.slist,
393 			  &ofi_buf_pool(buf)->free_list.entries);
394 }
395 
396 int ofi_ibuf_is_lower(struct dlist_entry *item, const void *arg);
397 int ofi_ibufpool_region_is_lower(struct dlist_entry *item, const void *arg);
398 
ofi_ibuf_free(void * buf)399 static inline void ofi_ibuf_free(void *buf)
400 {
401 	struct ofi_bufpool_hdr *buf_hdr;
402 
403 	assert(ofi_buf_pool(buf)->attr.flags & OFI_BUFPOOL_INDEXED);
404 	assert(ofi_buf_region(buf)->use_cnt--);
405 	buf_hdr = ofi_buf_hdr(buf);
406 
407 	dlist_insert_order(&buf_hdr->region->free_list,
408 			   ofi_ibuf_is_lower, &buf_hdr->entry.dlist);
409 
410 	if (dlist_empty(&buf_hdr->region->entry)) {
411 		dlist_insert_order(&buf_hdr->region->pool->free_list.regions,
412 				   ofi_ibufpool_region_is_lower,
413 				   &buf_hdr->region->entry);
414 	}
415 }
416 
ofi_buf_index(void * buf)417 static inline size_t ofi_buf_index(void *buf)
418 {
419 	return ofi_buf_hdr(buf)->index;
420 }
421 
ofi_bufpool_get_ibuf(struct ofi_bufpool * pool,size_t index)422 static inline void *ofi_bufpool_get_ibuf(struct ofi_bufpool *pool, size_t index)
423 {
424 	void *buf;
425 
426 	buf = pool->region_table[(size_t)(index / pool->attr.chunk_cnt)]->
427 		mem_region + (index % pool->attr.chunk_cnt) * pool->entry_size;
428 
429 	assert(ofi_buf_region(buf)->use_cnt);
430 	return buf;
431 }
432 
ofi_bufpool_empty(struct ofi_bufpool * pool)433 static inline int ofi_bufpool_empty(struct ofi_bufpool *pool)
434 {
435 	return slist_empty(&pool->free_list.entries);
436 }
437 
ofi_ibufpool_empty(struct ofi_bufpool * pool)438 static inline int ofi_ibufpool_empty(struct ofi_bufpool *pool)
439 {
440 	return dlist_empty(&pool->free_list.regions);
441 }
442 
ofi_buf_alloc(struct ofi_bufpool * pool)443 static inline void *ofi_buf_alloc(struct ofi_bufpool *pool)
444 {
445 	struct ofi_bufpool_hdr *buf_hdr;
446 
447 	assert(!(pool->attr.flags & OFI_BUFPOOL_INDEXED));
448 	if (OFI_UNLIKELY(ofi_bufpool_empty(pool))) {
449 		if (ofi_bufpool_grow(pool))
450 			return NULL;
451 	}
452 
453 	slist_remove_head_container(&pool->free_list.entries,
454 				struct ofi_bufpool_hdr, buf_hdr, entry.slist);
455 	assert(++buf_hdr->region->use_cnt);
456 	return ofi_buf_data(buf_hdr);
457 }
458 
ofi_buf_alloc_ex(struct ofi_bufpool * pool,void ** context)459 static inline void *ofi_buf_alloc_ex(struct ofi_bufpool *pool,
460 				     void **context)
461 {
462 	void *buf = ofi_buf_alloc(pool);
463 
464 	assert(context);
465 	if (OFI_UNLIKELY(!buf))
466 		return NULL;
467 
468 	*context = ofi_buf_region(buf)->context;
469 	return buf;
470 }
471 
ofi_ibuf_alloc(struct ofi_bufpool * pool)472 static inline void *ofi_ibuf_alloc(struct ofi_bufpool *pool)
473 {
474 	struct ofi_bufpool_hdr *buf_hdr;
475 	struct ofi_bufpool_region *buf_region;
476 
477 	assert(pool->attr.flags & OFI_BUFPOOL_INDEXED);
478 	if (OFI_UNLIKELY(ofi_ibufpool_empty(pool))) {
479 		if (ofi_bufpool_grow(pool))
480 			return NULL;
481 	}
482 
483 	buf_region = container_of(pool->free_list.regions.next,
484 				  struct ofi_bufpool_region, entry);
485 	dlist_pop_front(&buf_region->free_list, struct ofi_bufpool_hdr,
486 			buf_hdr, entry.dlist);
487 	assert(++buf_hdr->region->use_cnt);
488 
489 	if (dlist_empty(&buf_region->free_list))
490 		dlist_remove_init(&buf_region->entry);
491 	return ofi_buf_data(buf_hdr);
492 }
493 
494 
495 /*
496  * Persistent memory support
497  */
498 void ofi_pmem_init(void);
499 
500 extern uint64_t OFI_RMA_PMEM;
501 extern void (*ofi_pmem_commit)(const void *addr, size_t len);
502 
503 
504 #endif /* _OFI_MEM_H_ */
505