1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2 /*
3 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
4 * University Research and Technology
5 * Corporation. All rights reserved.
6 * Copyright (c) 2004-2009 The University of Tennessee and The University
7 * of Tennessee Research Foundation. All rights
8 * reserved.
9 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
10 * University of Stuttgart. All rights reserved.
11 * Copyright (c) 2004-2005 The Regents of the University of California.
12 * All rights reserved.
13 * Copyright (c) 2006-2007 Mellanox Technologies. All rights reserved.
14 * Copyright (c) 2010-2013 Cisco Systems, Inc. All rights reserved.
15 * Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
16 * Copyright (c) 2012-2018 Los Alamos National Security, LLC. All rights
17 * reserved.
18 * $COPYRIGHT$
19 *
20 * Additional copyrights may follow
21 *
22 * $HEADER$
23 */
24
25 #include "opal_config.h"
26
27 #include "opal/class/opal_free_list.h"
28 #include "opal/align.h"
29 #include "opal/util/output.h"
30 #include "opal/mca/mpool/mpool.h"
31 #include "opal/mca/mpool/base/base.h"
32 #include "opal/mca/rcache/rcache.h"
33 #include "opal/util/sys_limits.h"
34
35 typedef struct opal_free_list_item_t opal_free_list_memory_t;
36
37 OBJ_CLASS_INSTANCE(opal_free_list_item_t,
38 opal_list_item_t,
39 NULL, NULL);
40
opal_free_list_construct(opal_free_list_t * fl)41 static void opal_free_list_construct(opal_free_list_t* fl)
42 {
43 OBJ_CONSTRUCT(&fl->fl_lock, opal_mutex_t);
44 OBJ_CONSTRUCT(&fl->fl_condition, opal_condition_t);
45 fl->fl_max_to_alloc = 0;
46 fl->fl_num_allocated = 0;
47 fl->fl_num_per_alloc = 0;
48 fl->fl_num_waiting = 0;
49 fl->fl_frag_size = sizeof(opal_free_list_item_t);
50 fl->fl_frag_alignment = 0;
51 fl->fl_payload_buffer_size = 0;
52 fl->fl_payload_buffer_alignment = 0;
53 fl->fl_frag_class = OBJ_CLASS(opal_free_list_item_t);
54 fl->fl_mpool = NULL;
55 fl->fl_rcache = NULL;
56 /* default flags */
57 fl->fl_rcache_reg_flags = MCA_RCACHE_FLAGS_CACHE_BYPASS |
58 MCA_RCACHE_FLAGS_CUDA_REGISTER_MEM;
59 fl->ctx = NULL;
60 OBJ_CONSTRUCT(&(fl->fl_allocations), opal_list_t);
61 }
62
opal_free_list_allocation_release(opal_free_list_t * fl,opal_free_list_memory_t * fl_mem)63 static void opal_free_list_allocation_release (opal_free_list_t *fl, opal_free_list_memory_t *fl_mem)
64 {
65 if (NULL != fl->fl_rcache) {
66 fl->fl_rcache->rcache_deregister (fl->fl_rcache, fl_mem->registration);
67 }
68
69 if (NULL != fl->fl_mpool) {
70 fl->fl_mpool->mpool_free (fl->fl_mpool, fl_mem->ptr);
71 } else if (fl_mem->ptr) {
72 free (fl_mem->ptr);
73 }
74
75 /* destruct the item (we constructed it), then free the memory chunk */
76 OBJ_DESTRUCT(fl_mem);
77 free(fl_mem);
78 }
79
opal_free_list_destruct(opal_free_list_t * fl)80 static void opal_free_list_destruct(opal_free_list_t *fl)
81 {
82 opal_list_item_t *item;
83 opal_free_list_item_t *fl_item;
84
85 #if 0 && OPAL_ENABLE_DEBUG
86 if(opal_list_get_size(&fl->super) != fl->fl_num_allocated) {
87 opal_output(0, "opal_free_list: %d allocated %d returned: %s:%d\n",
88 fl->fl_num_allocated, opal_list_get_size(&fl->super),
89 fl->super.super.cls_init_file_name, fl->super.super.cls_init_lineno);
90 }
91 #endif
92
93 while(NULL != (item = opal_lifo_pop(&(fl->super)))) {
94 fl_item = (opal_free_list_item_t*)item;
95
96 /* destruct the item (we constructed it), the underlying memory will be
97 * reclaimed when we free the slab (opal_free_list_memory_t ptr)
98 * containing it */
99 OBJ_DESTRUCT(fl_item);
100 }
101
102 while(NULL != (item = opal_list_remove_first(&fl->fl_allocations))) {
103 opal_free_list_allocation_release (fl, (opal_free_list_memory_t *) item);
104 }
105
106 OBJ_DESTRUCT(&fl->fl_allocations);
107 OBJ_DESTRUCT(&fl->fl_condition);
108 OBJ_DESTRUCT(&fl->fl_lock);
109 }
110
111 OBJ_CLASS_INSTANCE(opal_free_list_t, opal_lifo_t, opal_free_list_construct,
112 opal_free_list_destruct);
113
114
opal_free_list_init(opal_free_list_t * flist,size_t frag_size,size_t frag_alignment,opal_class_t * frag_class,size_t payload_buffer_size,size_t payload_buffer_alignment,int num_elements_to_alloc,int max_elements_to_alloc,int num_elements_per_alloc,mca_mpool_base_module_t * mpool,int rcache_reg_flags,mca_rcache_base_module_t * rcache,opal_free_list_item_init_fn_t item_init,void * ctx)115 int opal_free_list_init (opal_free_list_t *flist, size_t frag_size, size_t frag_alignment,
116 opal_class_t *frag_class, size_t payload_buffer_size,
117 size_t payload_buffer_alignment, int num_elements_to_alloc,
118 int max_elements_to_alloc, int num_elements_per_alloc,
119 mca_mpool_base_module_t *mpool, int rcache_reg_flags,
120 mca_rcache_base_module_t *rcache, opal_free_list_item_init_fn_t item_init,
121 void *ctx)
122 {
123 /* alignment must be more than zero and power of two */
124 if (frag_alignment <= 1 || (frag_alignment & (frag_alignment - 1))) {
125 return OPAL_ERROR;
126 }
127
128 if (0 < payload_buffer_size) {
129 if (payload_buffer_alignment <= 1 || (payload_buffer_alignment & (payload_buffer_alignment - 1)))
130 return OPAL_ERROR;
131 }
132
133 if (frag_class && frag_size < frag_class->cls_sizeof) {
134 frag_size = frag_class->cls_sizeof;
135 }
136
137 if (frag_size > flist->fl_frag_size) {
138 flist->fl_frag_size = frag_size;
139 }
140
141 if (frag_class) {
142 flist->fl_frag_class = frag_class;
143 }
144
145 flist->fl_payload_buffer_size = payload_buffer_size;
146 flist->fl_max_to_alloc = max_elements_to_alloc;
147 flist->fl_num_allocated = 0;
148 flist->fl_num_per_alloc = num_elements_per_alloc;
149 flist->fl_mpool = mpool ? mpool : mca_mpool_base_default_module;
150 flist->fl_rcache = rcache;
151 flist->fl_frag_alignment = frag_alignment;
152 flist->fl_payload_buffer_alignment = payload_buffer_alignment;
153 flist->item_init = item_init;
154 flist->fl_rcache_reg_flags |= rcache_reg_flags;
155 flist->ctx = ctx;
156
157 if (num_elements_to_alloc) {
158 return opal_free_list_grow_st (flist, num_elements_to_alloc, NULL);
159 }
160
161 return OPAL_SUCCESS;
162 }
163
opal_free_list_grow_st(opal_free_list_t * flist,size_t num_elements,opal_free_list_item_t ** item_out)164 int opal_free_list_grow_st (opal_free_list_t* flist, size_t num_elements, opal_free_list_item_t **item_out)
165 {
166 unsigned char *ptr, *payload_ptr = NULL;
167 opal_free_list_memory_t *alloc_ptr;
168 size_t alloc_size, head_size, elem_size = 0, buffer_size = 0, align = 0;
169 mca_rcache_base_registration_t *reg = NULL;
170 int rc = OPAL_SUCCESS;
171
172 if (flist->fl_max_to_alloc && (flist->fl_num_allocated + num_elements) >
173 flist->fl_max_to_alloc) {
174 num_elements = flist->fl_max_to_alloc - flist->fl_num_allocated;
175 }
176
177 if (num_elements == 0) {
178 return OPAL_ERR_TEMP_OUT_OF_RESOURCE;
179 }
180
181 head_size = OPAL_ALIGN(flist->fl_frag_size, flist->fl_frag_alignment, size_t);
182
183 /* NTH: calculate allocation alignment first as it might change the number of elements */
184 if (0 != flist->fl_payload_buffer_size) {
185 elem_size = OPAL_ALIGN(flist->fl_payload_buffer_size,
186 flist->fl_payload_buffer_alignment, size_t);
187
188 /* elem_size should not be 0 here */
189 assert (elem_size > 0);
190
191 buffer_size = num_elements * elem_size;
192 align = flist->fl_payload_buffer_alignment;
193
194 if (MCA_RCACHE_FLAGS_CUDA_REGISTER_MEM & flist->fl_rcache_reg_flags) {
195 size_t pagesize = opal_getpagesize ();
196 /* CUDA cannot handle registering overlapping regions, so make
197 * sure each region is page sized and page aligned. */
198 align = OPAL_ALIGN(align, pagesize, size_t);
199 buffer_size = OPAL_ALIGN(buffer_size, pagesize, size_t);
200
201 /* avoid wasting space in the buffer */
202 num_elements = buffer_size / elem_size;
203 }
204 }
205
206 /* calculate head allocation size */
207 alloc_size = num_elements * head_size + sizeof(opal_free_list_memory_t) +
208 flist->fl_frag_alignment;
209
210 alloc_ptr = (opal_free_list_memory_t *) malloc(alloc_size);
211 if (OPAL_UNLIKELY(NULL == alloc_ptr)) {
212 return OPAL_ERR_TEMP_OUT_OF_RESOURCE;
213 }
214
215 if (0 != flist->fl_payload_buffer_size) {
216 /* allocate the rest from the mpool (or use memalign/malloc) */
217 payload_ptr = (unsigned char *) flist->fl_mpool->mpool_alloc(flist->fl_mpool, buffer_size, align, 0);
218 if (NULL == payload_ptr) {
219 free(alloc_ptr);
220 return OPAL_ERR_TEMP_OUT_OF_RESOURCE;
221 }
222
223 if (flist->fl_rcache) {
224 rc = flist->fl_rcache->rcache_register (flist->fl_rcache, payload_ptr, num_elements * elem_size,
225 flist->fl_rcache_reg_flags, MCA_RCACHE_ACCESS_ANY, ®);
226 if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
227 free (alloc_ptr);
228 flist->fl_mpool->mpool_free (flist->fl_mpool, payload_ptr);
229
230 return rc;
231 }
232 }
233 }
234
235 /* make the alloc_ptr a list item, save the chunk in the allocations list,
236 * and have ptr point to memory right after the list item structure */
237 OBJ_CONSTRUCT(alloc_ptr, opal_free_list_item_t);
238 opal_list_append(&(flist->fl_allocations), (opal_list_item_t*)alloc_ptr);
239
240 alloc_ptr->registration = reg;
241 alloc_ptr->ptr = payload_ptr;
242
243 ptr = (unsigned char*)alloc_ptr + sizeof(opal_free_list_memory_t);
244 ptr = OPAL_ALIGN_PTR(ptr, flist->fl_frag_alignment, unsigned char*);
245
246 for(size_t i = 0; i < num_elements ; ++i) {
247 opal_free_list_item_t* item = (opal_free_list_item_t*)ptr;
248 item->registration = reg;
249 item->ptr = payload_ptr;
250
251 OBJ_CONSTRUCT_INTERNAL(item, flist->fl_frag_class);
252 item->super.item_free = 0;
253
254 /* run the initialize function if present */
255 if (flist->item_init) {
256 if (OPAL_SUCCESS != (rc = flist->item_init(item, flist->ctx))) {
257 num_elements = i;
258 OBJ_DESTRUCT (item);
259 break;
260 }
261 }
262
263 /* NTH: in case the free list may be accessed from multiple threads
264 * use the atomic lifo push. The overhead is small compared to the
265 * overall overhead of opal_free_list_grow(). */
266 if (item_out && 0 == i) {
267 /* ensure the thread that is growing the free list always gets an item
268 * if one is available */
269 *item_out = item;
270 } else {
271 opal_lifo_push_atomic (&flist->super, &item->super);
272 }
273
274 ptr += head_size;
275 payload_ptr += elem_size;
276 }
277
278 if (OPAL_SUCCESS != rc && 0 == num_elements) {
279 /* couldn't initialize any items */
280 opal_list_remove_item (&flist->fl_allocations, (opal_list_item_t *) alloc_ptr);
281 opal_free_list_allocation_release (flist, alloc_ptr);
282 return OPAL_ERR_OUT_OF_RESOURCE;
283 }
284
285 flist->fl_num_allocated += num_elements;
286 return OPAL_SUCCESS;
287 }
288
289 /**
290 * This function resize the free_list to contain at least the specified
291 * number of elements. We do not create all of them in the same memory
292 * segment. Instead we will several time the fl_num_per_alloc elements
293 * until we reach the required number of the maximum allowed by the
294 * initialization.
295 */
opal_free_list_resize_mt(opal_free_list_t * flist,size_t size)296 int opal_free_list_resize_mt(opal_free_list_t *flist, size_t size)
297 {
298 ssize_t inc_num;
299 int ret = OPAL_SUCCESS;
300
301 if (flist->fl_num_allocated > size) {
302 return OPAL_SUCCESS;
303 }
304
305 opal_mutex_lock (&flist->fl_lock);
306 do {
307 ret = opal_free_list_grow_st (flist, flist->fl_num_per_alloc, NULL);
308 if (OPAL_SUCCESS != ret) {
309 break;
310 }
311
312 inc_num = (ssize_t)size - (ssize_t)flist->fl_num_allocated;
313 } while (inc_num > 0);
314 opal_mutex_unlock (&flist->fl_lock);
315
316 return ret;
317 }
318