1 /*
2 * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All
3 * rights reserved.
4 * Copyright (c) 2015-2017 Cray Inc. All rights reserved.
5 *
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * BSD license below:
11 *
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
14 * conditions are met:
15 *
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
18 * disclaimer.
19 *
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE.
33 */
34
35 /*
36 * The buddy allocator splits the "base" block being managed into smaller
37 * blocks. Each block is a "power-of-two length". These subblocks are kept
38 * track of in a doubly linked list, or free list. Here are the structures
39 * and format of the data structures used in the buddy allocator. For
40 * a description of each field please see gnix_buddy_allocator.h.
41 *
42 * Handle structure:
43 * ┌──────┬──────┬─────┬─────┬────────┬───────┐
44 * │ BASE │ len │ min │ max │ nlists │ LISTS │
45 * └──────┴──────┴─────┴─────┴────────┴───────┘
46 * The LISTS pointer points to an array of dlist structures, each containing a
47 * head pointer to the beginning of a free list. Note that the first element of
48 * LISTS is a pointer to the head of the "min block size" free list, the second
49 * element is the head of the "min * 2 block size" free list and so on.
50 *
51 * Node format as stored in a free block:
52 * ┌──────┬──────┬──────────────────────┐
53 * │ NEXT │ PREV │ Remaining free bytes │
54 * └──────┴──────┴──────────────────────┘
55 * Each NEXT and PREV pointer is stored in the first 16 bytes of the free block.
56 * This means that there is a hard limit of 16 bytes on the minimum block size.
57 *
58 * Bitmap layout with a min block size of 16:
59 * ┌──┬──┬──┬──┬──┬──┬──┬──┬──┬──┬──┬──┬──┬──┬──┐
60 * │16│16│16│16│..│32│32│32│32│..│64│64│64│64│..│
61 * └──┴──┴──┴──┴──┴──┴──┴──┴──┴──┴──┴──┴──┴──┴──┘
62 * All the blocks that the buddy allocator allocates from the base block are of
63 * size X, where X = MBS * 2^Z, MBS is the minimum block size and Z is a
64 * non-negative integer.
65 *
66 * The bitmap has 2 * (Len / MBS) bits and it's setup so that the first
67 * Len / MBS bits in the bitmap flag each block of size MBS as free or
68 * allocated.
69 *
70 * Len is the number of bytes in the base block.
71 * The base block is pointed to by void *base.
72 *
73 * The first bit in the bitmap flags the first block of size MBS.
74 * The first block of size MBS uses the address range:
75 * base to (base + MBS - 1).
76 *
77 * The second bit in the bitmap flags the second block of size MBS.
78 * The second block of size MBS uses the address range:
79 * (base + MBS) to (base + 2 * MBS - 1)
80 *
81 * The third bit in the bitmap flags the third block of size MBS.
82 * The third block of size MBS uses the address range:
83 * (base + 2 * MBS) to (base + 3 * MBS - 1)
84 *
85 * And so on until we reach the Len / MBS bit in the bitmap.
86 *
87 * The second Len / MBS bits in the bitmap flag the remaining blocks of size X
88 * as free allocated, or split where X > MBS.
89 *
90 * So, the first bit in the second Len / MBS bits in the bitmap flags the first
91 * block of size MBS * 2. The first block of size MBS * 2 uses the address
92 * range:
93 * base to (base + MBS * 2 - 1)
94 *
95 * And so on until we reach the next block size.
96 *
97 * A bit is set to 1 when a block is allocated, or when the block is split into
98 * two smaller blocks.
99 *
100 * A bit is reset to 0 when a block is freed, or when a free block is coalesced
101 * with another already free and unsplit block.
102 *
103 * The bitmap is only read for coalescing blocks. When a block Y is freed we
104 * look at the bit in the bitmap for the buddy block of Y, if that bit is set
105 * then the buddy of Y is allocated, split, or both in which case we cannot
106 * coalesce Y with its buddy block. However, if the bitmap bit for the buddy of
107 * Y is reset, then the buddy block of Y is free and not split, so we coalesce Y
108 * with the buddy of block of Y and continue to coalesce this new larger block
109 * with its buddy block until we reach the max block size or a buddy block that
110 * is allocated, split, or both.
111 *
112 * TODO: dlist_insert_sorted for fragmentation reduction.
113 * TODO: Lock in __gnix_buddy_split and allow __gnix_buddy_find_block to run
114 * concurrently.
115 * TODO: Allow __gnix_buddy_coalesce to run concurrently and return to
116 * caller of _gnix_buddy_free sooner. __gnix_buddy_coalesce is spending ~23%
117 * of the time on top of the call stack compared to other functions when running
118 * random_alloc_free.
119 * TODO: Find a better solution for finding the address of a buddy block.
120 */
121
122 #include "gnix_buddy_allocator.h"
123
__gnix_buddy_create_lists(gnix_buddy_alloc_handle_t * alloc_handle)124 static inline int __gnix_buddy_create_lists(gnix_buddy_alloc_handle_t
125 *alloc_handle)
126 {
127 uint32_t i, offset = 0;
128
129 alloc_handle->nlists = (uint32_t) __gnix_buddy_log2(alloc_handle->max /
130 MIN_BLOCK_SIZE) + 1;
131 alloc_handle->lists = calloc(1, sizeof(struct dlist_entry) *
132 alloc_handle->nlists);
133
134 if (OFI_UNLIKELY(!alloc_handle->lists)) {
135 GNIX_WARN(FI_LOG_EP_CTRL,
136 "Could not create buddy allocator lists.\n");
137 return -FI_ENOMEM;
138 }
139
140 for (i = 0; i < alloc_handle->nlists; i++) {
141 dlist_init(alloc_handle->lists + i);
142 }
143
144 /* Insert free blocks of size max in sorted order into last list */
145 for (i = 0; i < alloc_handle->len / alloc_handle->max; i++) {
146 dlist_insert_tail((void *) ((uint8_t *) alloc_handle->base +
147 offset),
148 alloc_handle->lists +
149 alloc_handle->nlists - 1);
150 offset += alloc_handle->max;
151 }
152
153 return FI_SUCCESS;
154 }
155
156 /**
157 * Split a block in list "j" until list "i" is reached.
158 */
__gnix_buddy_split(gnix_buddy_alloc_handle_t * alloc_handle,uint32_t j,uint32_t i,void ** ptr)159 static inline void __gnix_buddy_split(gnix_buddy_alloc_handle_t *alloc_handle,
160 uint32_t j, uint32_t i, void **ptr)
161 {
162 void *tmp = alloc_handle->lists[j].next;
163
164 dlist_remove(tmp);
165
166 /* Split the block until we reach list "i" */
167 for (; j > i; j--) {
168 _gnix_set_bit(&alloc_handle->bitmap,
169 __gnix_buddy_bitmap_index(tmp,
170 OFFSET(MIN_BLOCK_SIZE, j),
171 alloc_handle->base,
172 alloc_handle->len,
173 MIN_BLOCK_SIZE));
174
175 dlist_insert_tail((void *) ((uint8_t *) tmp +
176 OFFSET(MIN_BLOCK_SIZE, j - 1)),
177 alloc_handle->lists + j - 1);
178 }
179
180 /* Allocate the block */
181 *ptr = tmp;
182 }
183
184 /**
185 * Find the first free block in list i.
186 *
187 * @return 1 if the block cannot be found.
188 *
189 * @return 0 if the block is found.
190 */
__gnix_buddy_find_block(gnix_buddy_alloc_handle_t * alloc_handle,uint32_t i,void ** ptr)191 static inline int __gnix_buddy_find_block(gnix_buddy_alloc_handle_t
192 *alloc_handle, uint32_t i, void **ptr)
193 {
194 uint32_t j;
195
196 for (j = i; j < alloc_handle->nlists; j++) {
197 if (!dlist_empty(alloc_handle->lists + j)) {
198 __gnix_buddy_split(alloc_handle, j, i, ptr);
199 return 0;
200 }
201 }
202
203 return 1;
204 }
205
206
207 /**
208 * If the buddy block is on the free list then coalesce and insert into the next
209 * list until we reach an allocated or split buddy block, or the max list size.
210 */
__gnix_buddy_coalesce(gnix_buddy_alloc_handle_t * alloc_handle,void ** ptr,uint32_t block_size)211 static inline uint32_t __gnix_buddy_coalesce(gnix_buddy_alloc_handle_t *alloc_handle
212 , void **ptr, uint32_t block_size)
213 {
214 void *buddy;
215
216 for (buddy = __gnix_buddy_address(*ptr, block_size, alloc_handle->base);
217 block_size < alloc_handle->max &&
218 !_gnix_test_bit(&alloc_handle->bitmap,
219 __gnix_buddy_bitmap_index(buddy,
220 block_size,
221 alloc_handle->base,
222 alloc_handle->len,
223 MIN_BLOCK_SIZE));
224 buddy = __gnix_buddy_address(*ptr, block_size, alloc_handle->base)) {
225
226 dlist_remove(buddy);
227
228 /* Ensure ptr is the beginning of the new block */
229 if (*ptr > buddy)
230 *ptr = buddy;
231
232 block_size *= 2;
233
234 _gnix_clear_bit(&alloc_handle->bitmap,
235 __gnix_buddy_bitmap_index(*ptr, block_size,
236 alloc_handle->base,
237 alloc_handle->len,
238 MIN_BLOCK_SIZE));
239 }
240 return block_size;
241 }
242
_gnix_buddy_allocator_create(void * base,uint32_t len,uint32_t max,gnix_buddy_alloc_handle_t ** alloc_handle)243 int _gnix_buddy_allocator_create(void *base, uint32_t len, uint32_t max,
244 gnix_buddy_alloc_handle_t **alloc_handle)
245 {
246 char err_buf[256] = {0}, *error = NULL;
247 int fi_errno;
248 uint32_t size_check = len / MIN_BLOCK_SIZE * 2;
249
250 GNIX_TRACE(FI_LOG_EP_CTRL, "\n");
251
252 /* Ensure parameters are valid */
253 if (OFI_UNLIKELY(!base || !len || !max || max > len || !alloc_handle ||
254 IS_NOT_POW_TWO(max) || (len % max) ||
255 !size_check)) {
256
257 GNIX_WARN(FI_LOG_EP_CTRL,
258 "Invalid parameter to _gnix_buddy_allocator_create."
259 "\n");
260 return -FI_EINVAL;
261 }
262
263 *alloc_handle = calloc(1, sizeof(gnix_buddy_alloc_handle_t));
264
265 if (OFI_UNLIKELY(!alloc_handle)) {
266 error = strerror_r(errno, err_buf, sizeof(err_buf));
267 GNIX_WARN(FI_LOG_EP_CTRL,
268 "Could not create buddy allocator handle.\n",
269 error);
270 return -FI_ENOMEM;
271 }
272
273 fastlock_init(&alloc_handle[0]->lock);
274 alloc_handle[0]->base = base;
275 alloc_handle[0]->len = len;
276 alloc_handle[0]->max = max;
277
278 if (__gnix_buddy_create_lists(alloc_handle[0])) {
279 free(*alloc_handle);
280 return -FI_ENOMEM;
281 }
282
283 /* The bitmap needs len / MIN_BLOCK_SIZE * 2 bits to flag every possible
284 * block of size: min, min * 2, min * 4, ... , max that fits in the
285 * base. block. The maximum number of bits used would be if max = len.
286 */
287 if ((fi_errno = _gnix_alloc_bitmap(&alloc_handle[0]->bitmap,
288 len / MIN_BLOCK_SIZE * 2, NULL))) {
289
290 free(&alloc_handle[0]->lists);
291 free(*alloc_handle);
292 }
293
294 return fi_errno;
295 }
296
_gnix_buddy_allocator_destroy(gnix_buddy_alloc_handle_t * alloc_handle)297 int _gnix_buddy_allocator_destroy(gnix_buddy_alloc_handle_t *alloc_handle)
298 {
299 GNIX_TRACE(FI_LOG_EP_CTRL, "\n");
300
301 if (OFI_UNLIKELY(!alloc_handle)) {
302 GNIX_WARN(FI_LOG_EP_CTRL,
303 "Invalid parameter to _gnix_buddy_allocator_destroy."
304 "\n");
305 return -FI_EINVAL;
306 }
307
308 fastlock_acquire(&alloc_handle->lock);
309
310 free(alloc_handle->lists);
311
312 while (_gnix_free_bitmap(&alloc_handle->bitmap)) {
313 GNIX_WARN(FI_LOG_EP_CTRL,
314 "Trying to free buddy allocator handle bitmap.\n");
315 sleep(1);
316 }
317
318 fastlock_release(&alloc_handle->lock);
319 fastlock_destroy(&alloc_handle->lock);
320
321 free(alloc_handle);
322
323 return FI_SUCCESS;
324 }
325
_gnix_buddy_alloc(gnix_buddy_alloc_handle_t * alloc_handle,void ** ptr,uint32_t len)326 int _gnix_buddy_alloc(gnix_buddy_alloc_handle_t *alloc_handle, void **ptr,
327 uint32_t len)
328 {
329 uint32_t block_size, i = 0;
330
331 GNIX_TRACE(FI_LOG_EP_CTRL, "\n");
332
333 if (OFI_UNLIKELY(!alloc_handle || !ptr || !len ||
334 len > alloc_handle->max)) {
335
336 GNIX_WARN(FI_LOG_EP_CTRL,
337 "Invalid parameter to _gnix_buddy_alloc.\n");
338 return -FI_EINVAL;
339 }
340
341 block_size = BLOCK_SIZE(len, MIN_BLOCK_SIZE);
342 i = (uint32_t) LIST_INDEX(block_size, MIN_BLOCK_SIZE);
343
344 fastlock_acquire(&alloc_handle->lock);
345
346 if (__gnix_buddy_find_block(alloc_handle, i, ptr)) {
347 fastlock_release(&alloc_handle->lock);
348 GNIX_WARN(FI_LOG_EP_CTRL,
349 "Could not allocate buddy block.\n");
350 return -FI_ENOMEM;
351 }
352
353 fastlock_release(&alloc_handle->lock);
354
355 _gnix_set_bit(&alloc_handle->bitmap,
356 __gnix_buddy_bitmap_index(*ptr, block_size,
357 alloc_handle->base,
358 alloc_handle->len,
359 MIN_BLOCK_SIZE));
360
361 return FI_SUCCESS;
362 }
363
_gnix_buddy_free(gnix_buddy_alloc_handle_t * alloc_handle,void * ptr,uint32_t len)364 int _gnix_buddy_free(gnix_buddy_alloc_handle_t *alloc_handle, void *ptr,
365 uint32_t len)
366 {
367 uint32_t block_size;
368
369 GNIX_TRACE(FI_LOG_EP_CTRL, "\n");
370
371 if (OFI_UNLIKELY(!alloc_handle || !len || len > alloc_handle->max ||
372 ptr >= (void *) ((uint8_t *) alloc_handle->base +
373 alloc_handle->len) ||
374 ptr < alloc_handle->base)) {
375
376 GNIX_WARN(FI_LOG_EP_CTRL,
377 "Invalid parameter to _gnix_buddy_free.\n");
378 return -FI_EINVAL;
379 }
380
381 block_size = BLOCK_SIZE(len, MIN_BLOCK_SIZE);
382
383 _gnix_clear_bit(&alloc_handle->bitmap,
384 __gnix_buddy_bitmap_index(ptr, block_size,
385 alloc_handle->base,
386 alloc_handle->len,
387 MIN_BLOCK_SIZE));
388
389 fastlock_acquire(&alloc_handle->lock);
390
391 block_size = __gnix_buddy_coalesce(alloc_handle, &ptr, block_size);
392
393 dlist_insert_tail(ptr, alloc_handle->lists +
394 LIST_INDEX(block_size, MIN_BLOCK_SIZE));
395
396 fastlock_release(&alloc_handle->lock);
397
398 return FI_SUCCESS;
399 }
400