1 /*
2  * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All
3  * rights reserved.
4  * Copyright (c) 2015-2017 Cray Inc. All rights reserved.
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  */
34 
35 /*
36  * The buddy allocator splits the "base" block being managed into smaller
37  * blocks.  Each block is a "power-of-two length".  These subblocks are kept
38  * track of in a doubly linked list, or free list.  Here are the structures
39  * and format of the data structures used in the buddy allocator.  For
40  * a description of each field please see gnix_buddy_allocator.h.
41  *
42  * Handle structure:
43  * ┌──────┬──────┬─────┬─────┬────────┬───────┐
44  * │ BASE │ len  │ min │ max │ nlists │ LISTS │
45  * └──────┴──────┴─────┴─────┴────────┴───────┘
46  * The LISTS pointer points to an array of dlist structures, each containing a
47  * head pointer to the beginning of a free list.  Note that the first element of
48  * LISTS is a pointer to the head of the "min block size" free list, the second
49  * element is the head of the "min * 2 block size" free list and so on.
50  *
51  * Node format as stored in a free block:
52  * ┌──────┬──────┬──────────────────────┐
53  * │ NEXT │ PREV │ Remaining free bytes │
54  * └──────┴──────┴──────────────────────┘
55  * Each NEXT and PREV pointer is stored in the first 16 bytes of the free block.
56  * This means that there is a hard limit of 16 bytes on the minimum block size.
57  *
58  * Bitmap layout with a min block size of 16:
59  * ┌──┬──┬──┬──┬──┬──┬──┬──┬──┬──┬──┬──┬──┬──┬──┐
60  * │16│16│16│16│..│32│32│32│32│..│64│64│64│64│..│
61  * └──┴──┴──┴──┴──┴──┴──┴──┴──┴──┴──┴──┴──┴──┴──┘
62  * All the blocks that the buddy allocator allocates from the base block are of
63  * size X, where X = MBS * 2^Z, MBS is the minimum block size and Z is a
64  * non-negative integer.
65  *
66  * The bitmap has 2 * (Len / MBS) bits and it's setup so that the first
67  * Len / MBS bits in the bitmap flag each block of size MBS as free or
68  * allocated.
69  *
70  * Len is the number of bytes in the base block.
71  * The base block is pointed to by void *base.
72  *
73  * The first bit in the bitmap flags the first block of size MBS.
74  * The first block of size MBS uses the address range:
75  * base to (base + MBS - 1).
76  *
77  * The second bit in the bitmap flags the second block of size MBS.
78  * The second block of size MBS uses the address range:
79  * (base + MBS) to (base + 2 * MBS - 1)
80  *
81  * The third bit in the bitmap flags the third block of size MBS.
82  * The third block of size MBS uses the address range:
83  * (base + 2 * MBS) to (base + 3 * MBS - 1)
84  *
85  * And so on until we reach the Len / MBS bit in the bitmap.
86  *
87  * The second Len / MBS bits in the bitmap flag the remaining blocks of size X
88  * as free allocated, or split where X > MBS.
89  *
90  * So, the first bit in the second Len / MBS bits in the bitmap flags the first
91  * block of size MBS * 2.  The first block of size MBS * 2 uses the address
92  * range:
93  * base to (base + MBS * 2 - 1)
94  *
95  * And so on until we reach the next block size.
96  *
97  * A bit is set to 1 when a block is allocated, or when the block is split into
98  * two smaller blocks.
99  *
100  * A bit is reset to 0 when a block is freed, or when a free block is coalesced
101  * with another already free and unsplit block.
102  *
103  * The bitmap is only read for coalescing blocks. When a block Y is freed we
104  * look at the bit in the bitmap for the buddy block of Y, if that bit is set
105  * then the buddy of Y is allocated, split, or both in which case we cannot
106  * coalesce Y with its buddy block.  However, if the bitmap bit for the buddy of
107  * Y is reset, then the buddy block of Y is free and not split, so we coalesce Y
108  * with the buddy of block of Y and continue to coalesce this new larger block
109  * with its buddy block until we reach the max block size or a buddy block that
110  * is allocated, split, or both.
111  *
112  * TODO: dlist_insert_sorted for fragmentation reduction.
113  * TODO: Lock in __gnix_buddy_split and allow __gnix_buddy_find_block to run
114  * concurrently.
115  * TODO: Allow __gnix_buddy_coalesce to run concurrently and return to
116  * caller of _gnix_buddy_free sooner.  __gnix_buddy_coalesce is spending ~23%
117  * of the time on top of the call stack compared to other functions when running
118  * random_alloc_free.
119  * TODO: Find a better solution for finding the address of a buddy block.
120  */
121 
122 #include "gnix_buddy_allocator.h"
123 
__gnix_buddy_create_lists(gnix_buddy_alloc_handle_t * alloc_handle)124 static inline int __gnix_buddy_create_lists(gnix_buddy_alloc_handle_t
125 					    *alloc_handle)
126 {
127 	uint32_t i, offset = 0;
128 
129 	alloc_handle->nlists = (uint32_t) __gnix_buddy_log2(alloc_handle->max /
130 							    MIN_BLOCK_SIZE) + 1;
131 	alloc_handle->lists = calloc(1, sizeof(struct dlist_entry) *
132 				     alloc_handle->nlists);
133 
134 	if (OFI_UNLIKELY(!alloc_handle->lists)) {
135 		GNIX_WARN(FI_LOG_EP_CTRL,
136 			  "Could not create buddy allocator lists.\n");
137 		return -FI_ENOMEM;
138 	}
139 
140 	for (i = 0; i < alloc_handle->nlists; i++) {
141 		dlist_init(alloc_handle->lists + i);
142 	}
143 
144 	/* Insert free blocks of size max in sorted order into last list */
145 	for (i = 0; i < alloc_handle->len / alloc_handle->max; i++) {
146 		dlist_insert_tail((void *) ((uint8_t *) alloc_handle->base +
147 					    offset),
148 				  alloc_handle->lists +
149 				  alloc_handle->nlists - 1);
150 		offset += alloc_handle->max;
151 	}
152 
153 	return FI_SUCCESS;
154 }
155 
156 /**
157  * Split a block in list "j" until list "i" is reached.
158  */
__gnix_buddy_split(gnix_buddy_alloc_handle_t * alloc_handle,uint32_t j,uint32_t i,void ** ptr)159 static inline void __gnix_buddy_split(gnix_buddy_alloc_handle_t *alloc_handle,
160 				      uint32_t j, uint32_t i, void **ptr)
161 {
162 	void *tmp = alloc_handle->lists[j].next;
163 
164 	dlist_remove(tmp);
165 
166 	/* Split the block until we reach list "i" */
167 	for (; j > i; j--) {
168 		_gnix_set_bit(&alloc_handle->bitmap,
169 			      __gnix_buddy_bitmap_index(tmp,
170 							OFFSET(MIN_BLOCK_SIZE, j),
171 							alloc_handle->base,
172 							alloc_handle->len,
173 							MIN_BLOCK_SIZE));
174 
175 		dlist_insert_tail((void *) ((uint8_t *) tmp +
176 					    OFFSET(MIN_BLOCK_SIZE, j - 1)),
177 				  alloc_handle->lists + j - 1);
178 	}
179 
180 	/* Allocate the block */
181 	*ptr = tmp;
182 }
183 
184 /**
185  * Find the first free block in list i.
186  *
187  * @return 1  if the block cannot be found.
188  *
189  * @return 0 if the block is found.
190  */
__gnix_buddy_find_block(gnix_buddy_alloc_handle_t * alloc_handle,uint32_t i,void ** ptr)191 static inline int __gnix_buddy_find_block(gnix_buddy_alloc_handle_t
192 					  *alloc_handle, uint32_t i, void **ptr)
193 {
194 	uint32_t j;
195 
196 	for (j = i; j < alloc_handle->nlists; j++) {
197 		if (!dlist_empty(alloc_handle->lists + j)) {
198 			__gnix_buddy_split(alloc_handle, j, i, ptr);
199 			return 0;
200 		}
201 	}
202 
203 	return 1;
204 }
205 
206 
207 /**
208  * If the buddy block is on the free list then coalesce and insert into the next
209  * list until we reach an allocated or split buddy block, or the max list size.
210  */
__gnix_buddy_coalesce(gnix_buddy_alloc_handle_t * alloc_handle,void ** ptr,uint32_t block_size)211 static inline uint32_t __gnix_buddy_coalesce(gnix_buddy_alloc_handle_t *alloc_handle
212 					 , void **ptr, uint32_t block_size)
213 {
214 	void *buddy;
215 
216 	for (buddy = __gnix_buddy_address(*ptr, block_size, alloc_handle->base);
217 	     block_size < alloc_handle->max &&
218 		     !_gnix_test_bit(&alloc_handle->bitmap,
219 				     __gnix_buddy_bitmap_index(buddy,
220 							       block_size,
221 							       alloc_handle->base,
222 							       alloc_handle->len,
223 							       MIN_BLOCK_SIZE));
224 	     buddy = __gnix_buddy_address(*ptr, block_size, alloc_handle->base)) {
225 
226 		dlist_remove(buddy);
227 
228 		/* Ensure ptr is the beginning of the new block */
229 		if (*ptr > buddy)
230 			*ptr = buddy;
231 
232 		block_size *= 2;
233 
234 		_gnix_clear_bit(&alloc_handle->bitmap,
235 				__gnix_buddy_bitmap_index(*ptr, block_size,
236 							  alloc_handle->base,
237 							  alloc_handle->len,
238 							  MIN_BLOCK_SIZE));
239 	}
240 	return block_size;
241 }
242 
_gnix_buddy_allocator_create(void * base,uint32_t len,uint32_t max,gnix_buddy_alloc_handle_t ** alloc_handle)243 int _gnix_buddy_allocator_create(void *base, uint32_t len, uint32_t max,
244 				 gnix_buddy_alloc_handle_t **alloc_handle)
245 {
246 	char err_buf[256] = {0}, *error = NULL;
247 	int fi_errno;
248 	uint32_t size_check = len / MIN_BLOCK_SIZE * 2;
249 
250 	GNIX_TRACE(FI_LOG_EP_CTRL, "\n");
251 
252 	/* Ensure parameters are valid */
253 	if (OFI_UNLIKELY(!base || !len || !max || max > len || !alloc_handle ||
254 			 IS_NOT_POW_TWO(max) || (len % max) ||
255 			 !size_check)) {
256 
257 		GNIX_WARN(FI_LOG_EP_CTRL,
258 			  "Invalid parameter to _gnix_buddy_allocator_create."
259 			  "\n");
260 		return -FI_EINVAL;
261 	}
262 
263 	*alloc_handle = calloc(1, sizeof(gnix_buddy_alloc_handle_t));
264 
265 	if (OFI_UNLIKELY(!alloc_handle)) {
266 		error = strerror_r(errno, err_buf, sizeof(err_buf));
267 		GNIX_WARN(FI_LOG_EP_CTRL,
268 			  "Could not create buddy allocator handle.\n",
269 			  error);
270 		return -FI_ENOMEM;
271 	}
272 
273 	fastlock_init(&alloc_handle[0]->lock);
274 	alloc_handle[0]->base = base;
275 	alloc_handle[0]->len = len;
276 	alloc_handle[0]->max = max;
277 
278 	if (__gnix_buddy_create_lists(alloc_handle[0])) {
279 		free(*alloc_handle);
280 		return -FI_ENOMEM;
281 	}
282 
283 	/* The bitmap needs len / MIN_BLOCK_SIZE * 2 bits to flag every possible
284 	 * block of size: min, min * 2, min * 4, ... , max that fits in the
285 	 * base. block.  The maximum number of bits used would be if max = len.
286 	 */
287 	if ((fi_errno = _gnix_alloc_bitmap(&alloc_handle[0]->bitmap,
288 					   len / MIN_BLOCK_SIZE * 2, NULL))) {
289 
290 		free(&alloc_handle[0]->lists);
291 		free(*alloc_handle);
292 	}
293 
294 	return fi_errno;
295 }
296 
_gnix_buddy_allocator_destroy(gnix_buddy_alloc_handle_t * alloc_handle)297 int _gnix_buddy_allocator_destroy(gnix_buddy_alloc_handle_t *alloc_handle)
298 {
299 	GNIX_TRACE(FI_LOG_EP_CTRL, "\n");
300 
301 	if (OFI_UNLIKELY(!alloc_handle)) {
302 		GNIX_WARN(FI_LOG_EP_CTRL,
303 			  "Invalid parameter to _gnix_buddy_allocator_destroy."
304 			  "\n");
305 		return -FI_EINVAL;
306 	}
307 
308 	fastlock_acquire(&alloc_handle->lock);
309 
310 	free(alloc_handle->lists);
311 
312 	while (_gnix_free_bitmap(&alloc_handle->bitmap)) {
313 		GNIX_WARN(FI_LOG_EP_CTRL,
314 			  "Trying to free buddy allocator handle bitmap.\n");
315 		sleep(1);
316 	}
317 
318 	fastlock_release(&alloc_handle->lock);
319 	fastlock_destroy(&alloc_handle->lock);
320 
321 	free(alloc_handle);
322 
323 	return FI_SUCCESS;
324 }
325 
_gnix_buddy_alloc(gnix_buddy_alloc_handle_t * alloc_handle,void ** ptr,uint32_t len)326 int _gnix_buddy_alloc(gnix_buddy_alloc_handle_t *alloc_handle, void **ptr,
327 		      uint32_t len)
328 {
329 	uint32_t block_size, i = 0;
330 
331 	GNIX_TRACE(FI_LOG_EP_CTRL, "\n");
332 
333 	if (OFI_UNLIKELY(!alloc_handle || !ptr || !len ||
334 			 len > alloc_handle->max)) {
335 
336 		GNIX_WARN(FI_LOG_EP_CTRL,
337 			  "Invalid parameter to _gnix_buddy_alloc.\n");
338 		return -FI_EINVAL;
339 	}
340 
341 	block_size = BLOCK_SIZE(len, MIN_BLOCK_SIZE);
342 	i = (uint32_t) LIST_INDEX(block_size, MIN_BLOCK_SIZE);
343 
344 	fastlock_acquire(&alloc_handle->lock);
345 
346 	if (__gnix_buddy_find_block(alloc_handle, i, ptr)) {
347 		fastlock_release(&alloc_handle->lock);
348 		GNIX_WARN(FI_LOG_EP_CTRL,
349 			  "Could not allocate buddy block.\n");
350 		return -FI_ENOMEM;
351 	}
352 
353 	fastlock_release(&alloc_handle->lock);
354 
355 	_gnix_set_bit(&alloc_handle->bitmap,
356 		      __gnix_buddy_bitmap_index(*ptr, block_size,
357 						alloc_handle->base,
358 						alloc_handle->len,
359 						MIN_BLOCK_SIZE));
360 
361 	return FI_SUCCESS;
362 }
363 
_gnix_buddy_free(gnix_buddy_alloc_handle_t * alloc_handle,void * ptr,uint32_t len)364 int _gnix_buddy_free(gnix_buddy_alloc_handle_t *alloc_handle, void *ptr,
365 		     uint32_t len)
366 {
367 	uint32_t block_size;
368 
369 	GNIX_TRACE(FI_LOG_EP_CTRL, "\n");
370 
371 	if (OFI_UNLIKELY(!alloc_handle || !len || len > alloc_handle->max ||
372 			 ptr >= (void *) ((uint8_t *) alloc_handle->base +
373 				      alloc_handle->len) ||
374 		     ptr < alloc_handle->base)) {
375 
376 		GNIX_WARN(FI_LOG_EP_CTRL,
377 			  "Invalid parameter to _gnix_buddy_free.\n");
378 		return -FI_EINVAL;
379 	}
380 
381 	block_size = BLOCK_SIZE(len, MIN_BLOCK_SIZE);
382 
383 	_gnix_clear_bit(&alloc_handle->bitmap,
384 			__gnix_buddy_bitmap_index(ptr, block_size,
385 						  alloc_handle->base,
386 						  alloc_handle->len,
387 						  MIN_BLOCK_SIZE));
388 
389 	fastlock_acquire(&alloc_handle->lock);
390 
391 	block_size = __gnix_buddy_coalesce(alloc_handle, &ptr, block_size);
392 
393 	dlist_insert_tail(ptr, alloc_handle->lists +
394 			  LIST_INDEX(block_size, MIN_BLOCK_SIZE));
395 
396 	fastlock_release(&alloc_handle->lock);
397 
398 	return FI_SUCCESS;
399 }
400