1 /**
2  * \file
3  * Object allocation routines + managed allocators
4  *
5  * Author:
6  * 	Paolo Molaro (lupus@ximian.com)
7  *  Rodrigo Kumpera (kumpera@gmail.com)
8  *
9  * Copyright 2005-2011 Novell, Inc (http://www.novell.com)
10  * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
11  * Copyright 2011 Xamarin, Inc.
12  * Copyright (C) 2012 Xamarin Inc
13  *
14  * Licensed under the MIT license. See LICENSE file in the project root for full license information.
15  */
16 
17 /*
18  * ######################################################################
19  * ########  Object allocation
20  * ######################################################################
21  * This section of code deals with allocating memory for objects.
22  * There are several ways:
23  * *) allocate large objects
24  * *) allocate normal objects
25  * *) fast lock-free allocation
26  * *) allocation of pinned objects
27  */
28 
29 #include "config.h"
30 #ifdef HAVE_SGEN_GC
31 
32 #include <string.h>
33 
34 #include "mono/sgen/sgen-gc.h"
35 #include "mono/sgen/sgen-protocol.h"
36 #include "mono/sgen/sgen-memory-governor.h"
37 #include "mono/sgen/sgen-client.h"
38 #include "mono/utils/mono-memory-model.h"
39 
40 #define ALIGN_UP		SGEN_ALIGN_UP
41 #define ALLOC_ALIGN		SGEN_ALLOC_ALIGN
42 #define MAX_SMALL_OBJ_SIZE	SGEN_MAX_SMALL_OBJ_SIZE
43 
44 #ifdef HEAVY_STATISTICS
45 static guint64 stat_objects_alloced = 0;
46 static guint64 stat_bytes_alloced = 0;
47 static guint64 stat_bytes_alloced_los = 0;
48 
49 #endif
50 
51 /*
52  * Allocation is done from a Thread Local Allocation Buffer (TLAB). TLABs are allocated
53  * from nursery fragments.
54  * tlab_next is the pointer to the space inside the TLAB where the next object will
55  * be allocated.
56  * tlab_temp_end is the pointer to the end of the temporary space reserved for
57  * the allocation: it allows us to set the scan starts at reasonable intervals.
58  * tlab_real_end points to the end of the TLAB.
59  */
60 
61 #define TLAB_START	(__thread_info__->tlab_start)
62 #define TLAB_NEXT	(__thread_info__->tlab_next)
63 #define TLAB_TEMP_END	(__thread_info__->tlab_temp_end)
64 #define TLAB_REAL_END	(__thread_info__->tlab_real_end)
65 
66 static GCObject*
alloc_degraded(GCVTable vtable,size_t size,gboolean for_mature)67 alloc_degraded (GCVTable vtable, size_t size, gboolean for_mature)
68 {
69 	GCObject *p;
70 
71 	if (!for_mature) {
72 		sgen_client_degraded_allocation ();
73 		SGEN_ATOMIC_ADD_P (degraded_mode, size);
74 		sgen_ensure_free_space (size, GENERATION_OLD);
75 	} else {
76 		if (sgen_need_major_collection (size))
77 			sgen_perform_collection (size, GENERATION_OLD, "mature allocation failure", !for_mature, TRUE);
78 	}
79 
80 
81 	p = major_collector.alloc_degraded (vtable, size);
82 
83 	if (!for_mature)
84 		binary_protocol_alloc_degraded (p, vtable, size, sgen_client_get_provenance ());
85 
86 	return p;
87 }
88 
89 static void
zero_tlab_if_necessary(void * p,size_t size)90 zero_tlab_if_necessary (void *p, size_t size)
91 {
92 	if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION || nursery_clear_policy == CLEAR_AT_TLAB_CREATION_DEBUG) {
93 		memset (p, 0, size);
94 	} else {
95 		/*
96 		 * This function is called for all allocations in
97 		 * TLABs.  TLABs originate from fragments, which are
98 		 * initialized to be faux arrays.  The remainder of
99 		 * the fragments are zeroed out at initialization for
100 		 * CLEAR_AT_GC, so here we just need to make sure that
101 		 * the array header is zeroed.  Since we don't know
102 		 * whether we're called for the start of a fragment or
103 		 * for somewhere in between, we zero in any case, just
104 		 * to make sure.
105 		 */
106 		sgen_client_zero_array_fill_header (p, size);
107 	}
108 }
109 
110 /*
111  * Provide a variant that takes just the vtable for small fixed-size objects.
112  * The aligned size is already computed and stored in vt->gc_descr.
113  * Note: every SGEN_SCAN_START_SIZE or so we are given the chance to do some special
114  * processing. We can keep track of where objects start, for example,
115  * so when we scan the thread stacks for pinned objects, we can start
116  * a search for the pinned object in SGEN_SCAN_START_SIZE chunks.
117  */
118 GCObject*
sgen_alloc_obj_nolock(GCVTable vtable,size_t size)119 sgen_alloc_obj_nolock (GCVTable vtable, size_t size)
120 {
121 	/* FIXME: handle OOM */
122 	void **p;
123 	char *new_next;
124 	size_t real_size = size;
125 	TLAB_ACCESS_INIT;
126 
127 	CANARIFY_SIZE(size);
128 
129 	HEAVY_STAT (++stat_objects_alloced);
130 	if (real_size <= SGEN_MAX_SMALL_OBJ_SIZE)
131 		HEAVY_STAT (stat_bytes_alloced += size);
132 	else
133 		HEAVY_STAT (stat_bytes_alloced_los += size);
134 
135 	size = ALIGN_UP (size);
136 
137 	SGEN_ASSERT (6, sgen_vtable_get_descriptor (vtable), "VTable without descriptor");
138 
139 	if (G_UNLIKELY (has_per_allocation_action)) {
140 		static int alloc_count;
141 		int current_alloc = mono_atomic_inc_i32 (&alloc_count);
142 
143 		if (collect_before_allocs) {
144 			if (((current_alloc % collect_before_allocs) == 0) && nursery_section) {
145 				sgen_perform_collection (0, GENERATION_NURSERY, "collect-before-alloc-triggered", TRUE, TRUE);
146 				if (!degraded_mode && sgen_can_alloc_size (size) && real_size <= SGEN_MAX_SMALL_OBJ_SIZE) {
147 					// FIXME:
148 					g_assert_not_reached ();
149 				}
150 			}
151 		} else if (verify_before_allocs) {
152 			if ((current_alloc % verify_before_allocs) == 0)
153 				sgen_check_whole_heap_stw ();
154 		}
155 	}
156 
157 	/*
158 	 * We must already have the lock here instead of after the
159 	 * fast path because we might be interrupted in the fast path
160 	 * (after confirming that new_next < TLAB_TEMP_END) by the GC,
161 	 * and we'll end up allocating an object in a fragment which
162 	 * no longer belongs to us.
163 	 *
164 	 * The managed allocator does not do this, but it's treated
165 	 * specially by the world-stopping code.
166 	 */
167 
168 	if (real_size > SGEN_MAX_SMALL_OBJ_SIZE) {
169 		p = (void **)sgen_los_alloc_large_inner (vtable, ALIGN_UP (real_size));
170 	} else {
171 		/* tlab_next and tlab_temp_end are TLS vars so accessing them might be expensive */
172 
173 		p = (void**)TLAB_NEXT;
174 		/* FIXME: handle overflow */
175 		new_next = (char*)p + size;
176 		TLAB_NEXT = new_next;
177 
178 		if (G_LIKELY (new_next < TLAB_TEMP_END)) {
179 			/* Fast path */
180 
181 			CANARIFY_ALLOC(p,real_size);
182 			SGEN_LOG (6, "Allocated object %p, vtable: %p (%s), size: %zd", p, vtable, sgen_client_vtable_get_name (vtable), size);
183 			binary_protocol_alloc (p , vtable, size, sgen_client_get_provenance ());
184 			g_assert (*p == NULL);
185 			mono_atomic_store_seq (p, vtable);
186 
187 			return (GCObject*)p;
188 		}
189 
190 		/* Slow path */
191 
192 		/* there are two cases: the object is too big or we run out of space in the TLAB */
193 		/* we also reach here when the thread does its first allocation after a minor
194 		 * collection, since the tlab_ variables are initialized to NULL.
195 		 * there can be another case (from ORP), if we cooperate with the runtime a bit:
196 		 * objects that need finalizers can have the high bit set in their size
197 		 * so the above check fails and we can readily add the object to the queue.
198 		 * This avoids taking again the GC lock when registering, but this is moot when
199 		 * doing thread-local allocation, so it may not be a good idea.
200 		 */
201 		if (TLAB_NEXT >= TLAB_REAL_END) {
202 			int available_in_tlab;
203 			/*
204 			 * Run out of space in the TLAB. When this happens, some amount of space
205 			 * remains in the TLAB, but not enough to satisfy the current allocation
206 			 * request. Currently, we retire the TLAB in all cases, later we could
207 			 * keep it if the remaining space is above a treshold, and satisfy the
208 			 * allocation directly from the nursery.
209 			 */
210 			TLAB_NEXT -= size;
211 			/* when running in degraded mode, we continue allocing that way
212 			 * for a while, to decrease the number of useless nursery collections.
213 			 */
214 			if (degraded_mode && degraded_mode < sgen_nursery_size)
215 				return alloc_degraded (vtable, size, FALSE);
216 
217 			available_in_tlab = (int)(TLAB_REAL_END - TLAB_NEXT);//We'll never have tlabs > 2Gb
218 			if (size > tlab_size || available_in_tlab > SGEN_MAX_NURSERY_WASTE) {
219 				/* Allocate directly from the nursery */
220 				p = (void **)sgen_nursery_alloc (size);
221 				if (!p) {
222 					/*
223 					 * We couldn't allocate from the nursery, so we try
224 					 * collecting.  Even after the collection, we might
225 					 * still not have enough memory to allocate the
226 					 * object.  The reason will most likely be that we've
227 					 * run out of memory, but there is the theoretical
228 					 * possibility that other threads might have consumed
229 					 * the freed up memory ahead of us.
230 					 *
231 					 * What we do in this case is allocate degraded, i.e.,
232 					 * from the major heap.
233 					 *
234 					 * Ideally we'd like to detect the case of other
235 					 * threads allocating ahead of us and loop (if we
236 					 * always loop we will loop endlessly in the case of
237 					 * OOM).
238 					 */
239 					sgen_ensure_free_space (real_size, GENERATION_NURSERY);
240 					if (!degraded_mode)
241 						p = (void **)sgen_nursery_alloc (size);
242 				}
243 				if (!p)
244 					return alloc_degraded (vtable, size, TRUE);
245 
246 				zero_tlab_if_necessary (p, size);
247 			} else {
248 				size_t alloc_size = 0;
249 				if (TLAB_START)
250 					SGEN_LOG (3, "Retire TLAB: %p-%p [%ld]", TLAB_START, TLAB_REAL_END, (long)(TLAB_REAL_END - TLAB_NEXT - size));
251 				sgen_nursery_retire_region (p, available_in_tlab);
252 
253 				p = (void **)sgen_nursery_alloc_range (tlab_size, size, &alloc_size);
254 				if (!p) {
255 					/* See comment above in similar case. */
256 					sgen_ensure_free_space (tlab_size, GENERATION_NURSERY);
257 					if (!degraded_mode)
258 						p = (void **)sgen_nursery_alloc_range (tlab_size, size, &alloc_size);
259 				}
260 				if (!p)
261 					return alloc_degraded (vtable, size, TRUE);
262 
263 				/* Allocate a new TLAB from the current nursery fragment */
264 				TLAB_START = (char*)p;
265 				TLAB_NEXT = TLAB_START;
266 				TLAB_REAL_END = TLAB_START + alloc_size;
267 				TLAB_TEMP_END = TLAB_START + MIN (SGEN_SCAN_START_SIZE, alloc_size);
268 
269 				zero_tlab_if_necessary (TLAB_START, alloc_size);
270 
271 				/* Allocate from the TLAB */
272 				p = (void **)TLAB_NEXT;
273 				TLAB_NEXT += size;
274 				sgen_set_nursery_scan_start ((char*)p);
275 			}
276 		} else {
277 			/* Reached tlab_temp_end */
278 
279 			/* record the scan start so we can find pinned objects more easily */
280 			sgen_set_nursery_scan_start ((char*)p);
281 			/* we just bump tlab_temp_end as well */
282 			TLAB_TEMP_END = MIN (TLAB_REAL_END, TLAB_NEXT + SGEN_SCAN_START_SIZE);
283 			SGEN_LOG (5, "Expanding local alloc: %p-%p", TLAB_NEXT, TLAB_TEMP_END);
284 		}
285 		CANARIFY_ALLOC(p,real_size);
286 	}
287 
288 	if (G_LIKELY (p)) {
289 		SGEN_LOG (6, "Allocated object %p, vtable: %p (%s), size: %zd", p, vtable, sgen_client_vtable_get_name (vtable), size);
290 		binary_protocol_alloc (p, vtable, size, sgen_client_get_provenance ());
291 		mono_atomic_store_seq (p, vtable);
292 	}
293 
294 	return (GCObject*)p;
295 }
296 
297 GCObject*
sgen_try_alloc_obj_nolock(GCVTable vtable,size_t size)298 sgen_try_alloc_obj_nolock (GCVTable vtable, size_t size)
299 {
300 	void **p;
301 	char *new_next;
302 	size_t real_size = size;
303 	TLAB_ACCESS_INIT;
304 
305 	CANARIFY_SIZE(size);
306 
307 	size = ALIGN_UP (size);
308 	SGEN_ASSERT (9, real_size >= SGEN_CLIENT_MINIMUM_OBJECT_SIZE, "Object too small");
309 
310 	SGEN_ASSERT (6, sgen_vtable_get_descriptor (vtable), "VTable without descriptor");
311 
312 	if (real_size > SGEN_MAX_SMALL_OBJ_SIZE)
313 		return NULL;
314 
315 	if (G_UNLIKELY (size > tlab_size)) {
316 		/* Allocate directly from the nursery */
317 		p = (void **)sgen_nursery_alloc (size);
318 		if (!p)
319 			return NULL;
320 		sgen_set_nursery_scan_start ((char*)p);
321 
322 		/*FIXME we should use weak memory ops here. Should help specially on x86. */
323 		zero_tlab_if_necessary (p, size);
324 	} else {
325 		int available_in_tlab;
326 		char *real_end;
327 		/* tlab_next and tlab_temp_end are TLS vars so accessing them might be expensive */
328 
329 		p = (void**)TLAB_NEXT;
330 		/* FIXME: handle overflow */
331 		new_next = (char*)p + size;
332 
333 		real_end = TLAB_REAL_END;
334 		available_in_tlab = (int)(real_end - (char*)p);//We'll never have tlabs > 2Gb
335 
336 		if (G_LIKELY (new_next < real_end)) {
337 			TLAB_NEXT = new_next;
338 
339 			/* Second case, we overflowed temp end */
340 			if (G_UNLIKELY (new_next >= TLAB_TEMP_END)) {
341 				sgen_set_nursery_scan_start (new_next);
342 				/* we just bump tlab_temp_end as well */
343 				TLAB_TEMP_END = MIN (TLAB_REAL_END, TLAB_NEXT + SGEN_SCAN_START_SIZE);
344 				SGEN_LOG (5, "Expanding local alloc: %p-%p", TLAB_NEXT, TLAB_TEMP_END);
345 			}
346 		} else if (available_in_tlab > SGEN_MAX_NURSERY_WASTE) {
347 			/* Allocate directly from the nursery */
348 			p = (void **)sgen_nursery_alloc (size);
349 			if (!p)
350 				return NULL;
351 
352 			zero_tlab_if_necessary (p, size);
353 		} else {
354 			size_t alloc_size = 0;
355 
356 			sgen_nursery_retire_region (p, available_in_tlab);
357 			new_next = (char *)sgen_nursery_alloc_range (tlab_size, size, &alloc_size);
358 			p = (void**)new_next;
359 			if (!p)
360 				return NULL;
361 
362 			TLAB_START = (char*)new_next;
363 			TLAB_NEXT = new_next + size;
364 			TLAB_REAL_END = new_next + alloc_size;
365 			TLAB_TEMP_END = new_next + MIN (SGEN_SCAN_START_SIZE, alloc_size);
366 			sgen_set_nursery_scan_start ((char*)p);
367 
368 			zero_tlab_if_necessary (new_next, alloc_size);
369 		}
370 	}
371 
372 	HEAVY_STAT (++stat_objects_alloced);
373 	HEAVY_STAT (stat_bytes_alloced += size);
374 
375 	CANARIFY_ALLOC(p,real_size);
376 	SGEN_LOG (6, "Allocated object %p, vtable: %p (%s), size: %zd", p, vtable, sgen_client_vtable_get_name (vtable), size);
377 	binary_protocol_alloc (p, vtable, size, sgen_client_get_provenance ());
378 	g_assert (*p == NULL); /* FIXME disable this in non debug builds */
379 
380 	mono_atomic_store_seq (p, vtable);
381 
382 	return (GCObject*)p;
383 }
384 
385 GCObject*
sgen_alloc_obj(GCVTable vtable,size_t size)386 sgen_alloc_obj (GCVTable vtable, size_t size)
387 {
388 	GCObject *res;
389 	TLAB_ACCESS_INIT;
390 
391 	if (!SGEN_CAN_ALIGN_UP (size))
392 		return NULL;
393 
394 	if (G_UNLIKELY (has_per_allocation_action)) {
395 		static int alloc_count;
396 		int current_alloc = mono_atomic_inc_i32 (&alloc_count);
397 
398 		if (verify_before_allocs) {
399 			if ((current_alloc % verify_before_allocs) == 0) {
400 				LOCK_GC;
401 				sgen_check_whole_heap_stw ();
402 				UNLOCK_GC;
403 			}
404 		}
405 		if (collect_before_allocs) {
406 			if (((current_alloc % collect_before_allocs) == 0) && nursery_section) {
407 				LOCK_GC;
408 				sgen_perform_collection (0, GENERATION_NURSERY, "collect-before-alloc-triggered", TRUE, TRUE);
409 				UNLOCK_GC;
410 			}
411 		}
412 	}
413 
414 	ENTER_CRITICAL_REGION;
415 	res = sgen_try_alloc_obj_nolock (vtable, size);
416 	if (res) {
417 		EXIT_CRITICAL_REGION;
418 		return res;
419 	}
420 	EXIT_CRITICAL_REGION;
421 
422 	LOCK_GC;
423 	res = sgen_alloc_obj_nolock (vtable, size);
424 	UNLOCK_GC;
425 	return res;
426 }
427 
428 /*
429  * To be used for interned strings and possibly MonoThread, reflection handles.
430  * We may want to explicitly free these objects.
431  */
432 GCObject*
sgen_alloc_obj_pinned(GCVTable vtable,size_t size)433 sgen_alloc_obj_pinned (GCVTable vtable, size_t size)
434 {
435 	GCObject *p;
436 
437 	if (!SGEN_CAN_ALIGN_UP (size))
438 		return NULL;
439 	size = ALIGN_UP (size);
440 
441 	LOCK_GC;
442 
443 	if (size > SGEN_MAX_SMALL_OBJ_SIZE) {
444 		/* large objects are always pinned anyway */
445 		p = (GCObject *)sgen_los_alloc_large_inner (vtable, size);
446 	} else {
447 		SGEN_ASSERT (9, sgen_client_vtable_is_inited (vtable), "class %s:%s is not initialized", sgen_client_vtable_get_namespace (vtable), sgen_client_vtable_get_name (vtable));
448 		p = major_collector.alloc_small_pinned_obj (vtable, size, SGEN_VTABLE_HAS_REFERENCES (vtable));
449 	}
450 	if (G_LIKELY (p)) {
451 		SGEN_LOG (6, "Allocated pinned object %p, vtable: %p (%s), size: %zd", p, vtable, sgen_client_vtable_get_name (vtable), size);
452 		binary_protocol_alloc_pinned (p, vtable, size, sgen_client_get_provenance ());
453 	}
454 	UNLOCK_GC;
455 	return p;
456 }
457 
458 GCObject*
sgen_alloc_obj_mature(GCVTable vtable,size_t size)459 sgen_alloc_obj_mature (GCVTable vtable, size_t size)
460 {
461 	GCObject *res;
462 
463 	if (!SGEN_CAN_ALIGN_UP (size))
464 		return NULL;
465 	size = ALIGN_UP (size);
466 
467 	LOCK_GC;
468 	res = alloc_degraded (vtable, size, TRUE);
469 	UNLOCK_GC;
470 
471 	return res;
472 }
473 
474 /*
475  * Clear the thread local TLAB variables for all threads.
476  */
477 void
sgen_clear_tlabs(void)478 sgen_clear_tlabs (void)
479 {
480 	FOREACH_THREAD (info) {
481 		/* A new TLAB will be allocated when the thread does its first allocation */
482 		info->tlab_start = NULL;
483 		info->tlab_next = NULL;
484 		info->tlab_temp_end = NULL;
485 		info->tlab_real_end = NULL;
486 	} FOREACH_THREAD_END
487 }
488 
489 void
sgen_init_allocator(void)490 sgen_init_allocator (void)
491 {
492 #ifdef HEAVY_STATISTICS
493 	mono_counters_register ("# objects allocated", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_objects_alloced);
494 	mono_counters_register ("bytes allocated", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_bytes_alloced);
495 	mono_counters_register ("bytes allocated in LOS", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_bytes_alloced_los);
496 #endif
497 }
498 
499 #endif /*HAVE_SGEN_GC*/
500