1 /**
2  * \file
3  * Routines for parsing basic blocks from the IL stream
4  *
5  * Authors:
6  *   Rodrigo Kumpera (rkumpera@novell.com)
7  *
8  * Copyright 2010 Novell, Inc (http://www.novell.com)
9  * Licensed under the MIT license. See LICENSE file in the project root for full license information.
10  */
11 
12 #include <config.h>
13 
14 #include <mono/metadata/debug-helpers.h>
15 #include <mono/metadata/metadata-internals.h>
16 #include <mono/metadata/mono-endian.h>
17 #include <mono/metadata/mono-basic-block.h>
18 #include <mono/metadata/opcodes.h>
19 
20 #include <mono/utils/mono-error-internals.h>
21 #include <mono/utils/mono-compiler.h>
22 
23 #define CHECK_ADD4_OVERFLOW_UN(a, b) ((guint32)(0xFFFFFFFFU) - (guint32)(b) < (guint32)(a))
24 #define CHECK_ADD8_OVERFLOW_UN(a, b) ((guint64)(0xFFFFFFFFFFFFFFFFUL) - (guint64)(b) < (guint64)(a))
25 
26 #if SIZEOF_VOID_P == 4
27 #define CHECK_ADDP_OVERFLOW_UN(a,b) CHECK_ADD4_OVERFLOW_UN(a, b)
28 #else
29 #define CHECK_ADDP_OVERFLOW_UN(a,b) CHECK_ADD8_OVERFLOW_UN(a, b)
30 #endif
31 
32 #define ADDP_IS_GREATER_OR_OVF(a, b, c) (((a) + (b) > (c)) || CHECK_ADDP_OVERFLOW_UN (a, b))
33 #define ADD_IS_GREATER_OR_OVF(a, b, c) (((a) + (b) > (c)) || CHECK_ADD4_OVERFLOW_UN (a, b))
34 
35 #define DEBUG_BB 0
36 
37 enum {
38 	RED,
39 	BLACK
40 };
41 
42 #if DEBUG_BB
43 
44 static void
dump_bb_list(MonoSimpleBasicBlock * bb,MonoSimpleBasicBlock ** root,const char * msg)45 dump_bb_list (MonoSimpleBasicBlock *bb, MonoSimpleBasicBlock **root, const char *msg)
46 {
47 	printf ("------- %s --- root is %x ---\n", msg, (*root)->start);
48 	while (bb) {
49 		GSList *tmp;
50 		printf ("BB start %x end %x left ", bb->start, bb->end);
51 		if (bb->left)
52 			printf ("%x", bb->left->start);
53 		else
54 			printf ("NIL");
55 
56 		printf (" right ");
57 		if (bb->right)
58 			printf ("%x", bb->right->start);
59 		else
60 			printf ("NIL");
61 
62 		printf (" parent ");
63 		if (bb->parent)
64 			printf ("%x", bb->parent->start);
65 		else
66 			printf ("NIL");
67 
68 		printf(" color %s out [", bb->colour == RED ? "red" : "black");
69 
70 		for (tmp = bb->out_bb; tmp; tmp = tmp->next) {
71 			MonoSimpleBasicBlock *to = tmp->data;
72 			printf ("%x ", to->start);
73 		}
74 		printf ("] %s\n", bb->dead ? "dead" : "alive");
75 		bb = bb->next;
76 	}
77 }
78 
79 #endif
80 
81 static void
bb_unlink(MonoSimpleBasicBlock * from,MonoSimpleBasicBlock * to)82 bb_unlink (MonoSimpleBasicBlock *from, MonoSimpleBasicBlock *to)
83 {
84 	if (from->out_bb)
85 		from->out_bb = g_slist_remove (from->out_bb, to);
86 }
87 
88 static void
bb_link(MonoSimpleBasicBlock * from,MonoSimpleBasicBlock * to)89 bb_link (MonoSimpleBasicBlock *from, MonoSimpleBasicBlock *to)
90 {
91 	if (g_slist_find (from->out_bb, to))
92 		return;
93 	from->out_bb = g_slist_prepend (from->out_bb, to);
94 }
95 
96 
97 static MonoSimpleBasicBlock*
bb_grandparent(MonoSimpleBasicBlock * bb)98 bb_grandparent (MonoSimpleBasicBlock *bb)
99 {
100 	return bb && bb->parent ? bb->parent->parent : NULL;
101 }
102 
103 static MonoSimpleBasicBlock*
bb_uncle(MonoSimpleBasicBlock * bb)104 bb_uncle (MonoSimpleBasicBlock *bb)
105 {
106 	MonoSimpleBasicBlock *gp = bb_grandparent (bb);
107 	if (gp == NULL)
108 		return NULL;
109 	if (bb->parent == gp->left)
110 		return gp->right;
111 	return gp->left;
112 }
113 
114 static void
change_node(MonoSimpleBasicBlock * from,MonoSimpleBasicBlock * to,MonoSimpleBasicBlock ** root)115 change_node (MonoSimpleBasicBlock *from, MonoSimpleBasicBlock *to, MonoSimpleBasicBlock **root)
116 {
117 	MonoSimpleBasicBlock *parent = from->parent;
118 	if (parent) {
119 		if (parent->left == from)
120 			parent->left = to;
121 		else
122 			parent->right = to;
123 	} else {
124 		*root = to;
125 	}
126 	to->parent = parent;
127 }
128 
129 static void
rotate_right(MonoSimpleBasicBlock * parent,MonoSimpleBasicBlock ** root)130 rotate_right (MonoSimpleBasicBlock *parent, MonoSimpleBasicBlock **root)
131 {
132 	MonoSimpleBasicBlock *bb = parent->left;
133 	if (bb->right) {
134 		parent->left = bb->right;
135 		parent->left->parent = parent;
136 	} else
137 		parent->left = NULL;
138 	bb->right = parent;
139 	change_node (parent, bb, root);
140 	parent->parent = bb;
141 }
142 
143 static void
rotate_left(MonoSimpleBasicBlock * bb,MonoSimpleBasicBlock ** root)144 rotate_left (MonoSimpleBasicBlock *bb, MonoSimpleBasicBlock **root)
145 {
146 	MonoSimpleBasicBlock *other = bb->right;
147 	if (other->left) {
148 		bb->right = other->left;
149 		bb->right->parent = bb;
150 	} else
151 		bb->right = NULL;
152 	other->left = bb;
153 	change_node (bb, other, root);
154 	bb->parent = other;
155 }
156 
157 /* School book implementation of a RB tree with insert then fix (which requires a parent pointer)
158  * TODO implement Sedgewick's version that doesn't require parent pointers
159  */
160 static void
bb_insert(MonoSimpleBasicBlock * first,MonoSimpleBasicBlock * bb,MonoSimpleBasicBlock ** root)161 bb_insert (MonoSimpleBasicBlock *first, MonoSimpleBasicBlock *bb, MonoSimpleBasicBlock **root)
162 {
163 	MonoSimpleBasicBlock *parent, *uncle, *grandparent;
164 	int bb_start = bb->start;
165 
166 	parent = *root;
167 	do {
168 		if (bb_start < parent->start) {
169 			if (parent->left == NULL) {
170 				parent->left = bb;
171 				break;
172 			}
173 			parent = parent->left;
174 		} else {
175 			if (parent->right == NULL) {
176 				parent->right = bb;
177 				break;
178 			}
179 			parent = parent->right;
180 		}
181 	} while (parent);
182 	g_assert (parent);
183 	bb->parent = parent;
184 
185 	bb->colour = RED;
186 
187 	do {
188 		if (bb->parent == NULL) {
189 			bb->colour = BLACK;
190 			break;
191 		}
192 
193 		if (bb->parent->colour == BLACK)
194 			break;
195 
196 		uncle = bb_uncle (bb);
197 		if (uncle && uncle->colour == RED) {
198 			grandparent = bb_grandparent (bb);
199 
200 			bb->parent->colour = BLACK;
201 			uncle->colour = BLACK;
202 			grandparent->colour = RED;
203 			bb = grandparent;
204 			continue;
205 		}
206 
207 		grandparent = bb_grandparent (bb);
208 		if ((bb == bb->parent->right) && (bb->parent == grandparent->left)) {
209 			rotate_left (bb->parent, root);
210 			bb = bb->left;
211 		} else if ((bb == bb->parent->left) && (bb->parent == grandparent->right)) {
212 			rotate_right (bb->parent, root);
213 			bb = bb->right;
214 		}
215 
216 		grandparent = bb_grandparent (bb);
217 		bb->parent->colour = BLACK;
218 		grandparent->colour = RED;
219 		if ((bb == bb->parent->left) && (bb->parent == grandparent->left))
220 			rotate_right (grandparent, root);
221 		else
222 			rotate_left (grandparent, root);
223 		break;
224 	} while (TRUE);
225 }
226 
227 static gboolean
bb_idx_is_contained(MonoSimpleBasicBlock * bb,int target)228 bb_idx_is_contained (MonoSimpleBasicBlock *bb, int target)
229 {
230 	return bb->start <= target && target < bb->end;
231 }
232 
233 /*
234  * Split the basic blocks from @first at @target.
235  * @hint is a guess of a very close to the target basic block. It is probed before the RB tree as it's often possible
236  * to provide a near to exact guess (next block splits, switch branch targets, etc)
237  *
238  */
239 static MonoSimpleBasicBlock*
bb_split(MonoSimpleBasicBlock * first,MonoSimpleBasicBlock * hint,MonoSimpleBasicBlock ** root,guint target,gboolean link_blocks,MonoMethod * method,MonoError * error)240 bb_split (MonoSimpleBasicBlock *first, MonoSimpleBasicBlock *hint, MonoSimpleBasicBlock **root, guint target, gboolean link_blocks, MonoMethod *method, MonoError *error)
241 {
242 	MonoSimpleBasicBlock *res, *bb = first;
243 
244 	error_init (error);
245 
246 	if (bb_idx_is_contained (hint, target)) {
247 		first = hint;
248 	} else if (hint->next && bb_idx_is_contained (hint->next, target)) {
249 		first = hint->next;
250 	} else {
251 		first = *root;
252 		do {
253 			if (bb_idx_is_contained (first, target))
254 				break;
255 			if (first->start > target)
256 				first = first->left;
257 			else
258 				first = first->right;
259 		} while (first);
260 	}
261 
262 	if (first == NULL) {
263 		mono_error_set_not_verifiable (error, method, "Invalid instruction target %x", target);
264 		return NULL;
265 	}
266 
267 	if (first->start == target)
268 		return first;
269 
270 	res = g_new0 (MonoSimpleBasicBlock, 1);
271 	res->start = target;
272 	res->end = first->end;
273 	res->next = first->next;
274 	res->out_bb = first->out_bb;
275 	res->dead = TRUE;
276 
277 	first->end = res->start;
278 	first->next = res;
279 	first->out_bb = NULL;
280 
281 	if (link_blocks)
282 		bb_link (first, res);
283 	bb_insert (bb, res, root);
284 
285 	return res;
286 }
287 
288 static void
bb_liveness(MonoSimpleBasicBlock * bb)289 bb_liveness (MonoSimpleBasicBlock *bb)
290 {
291 	GPtrArray* mark_stack = g_ptr_array_new ();
292 	GSList *tmp;
293 
294 	/*All function entry points (prologue, EH handler/filter) are already marked*/
295 	while (bb) {
296 		if (!bb->dead)
297 			g_ptr_array_add (mark_stack, bb);
298 		bb = bb->next;
299 	}
300 
301 	while (mark_stack->len > 0) {
302 		MonoSimpleBasicBlock *block = (MonoSimpleBasicBlock *)g_ptr_array_remove_index_fast (mark_stack, mark_stack->len - 1);
303 		block->dead = FALSE;
304 
305 		for (tmp = block->out_bb; tmp; tmp = tmp->next) {
306 			MonoSimpleBasicBlock *to = (MonoSimpleBasicBlock *)tmp->data;
307 			if (to->dead)
308 				g_ptr_array_add (mark_stack, to);
309 		}
310 	}
311 
312 	g_ptr_array_free (mark_stack, TRUE);
313 }
314 
315 /*This doesn't returns endfilter because it's not useful to split at its boundary.
316 Endfilter must be the last instruction of a filter clause and MS enforces this, so we can't have
317 dead code after it.
318 */
319 static gboolean
mono_opcode_has_static_branch(int opcode)320 mono_opcode_has_static_branch (int opcode)
321 {
322 	switch (opcode) {
323 	case MONO_CEE_RET:
324 	case MONO_CEE_THROW:
325 	case MONO_CEE_RETHROW:
326 	case MONO_CEE_ENDFINALLY:
327 		return TRUE;
328 	}
329 	return FALSE;
330 }
331 
332 
333 static void
bb_formation_il_pass(const unsigned char * start,const unsigned char * end,MonoSimpleBasicBlock * bb,MonoSimpleBasicBlock ** root,MonoMethod * method,MonoError * error)334 bb_formation_il_pass (const unsigned char *start, const unsigned char *end, MonoSimpleBasicBlock *bb, MonoSimpleBasicBlock **root, MonoMethod *method, MonoError *error)
335 {
336 	unsigned const char *ip = start;
337 	int value, size;
338 	guint cli_addr, offset;
339 	MonoSimpleBasicBlock *branch, *next, *current;
340 	const MonoOpcode *opcode;
341 
342 	error_init (error);
343 
344 	current = bb;
345 
346 	while (ip < end) {
347 		cli_addr = ip - start;
348 		size = mono_opcode_value_and_size (&ip, end, &value);
349 		if (size < 0) {
350 			mono_error_set_not_verifiable (error, method, "Invalid instruction %x", *ip);
351 			return;
352 		}
353 
354 		while (current && cli_addr >= current->end)
355 			current = current->next;
356 		g_assert (current);
357 
358 		opcode = &mono_opcodes [value];
359 		switch (opcode->argument) {
360 		case MonoInlineNone:
361 			ip++;
362 			if (!mono_opcode_has_static_branch (value) || ip >= end)
363 				break;
364 			if (!(next = bb_split (bb, current, root, ip - start, FALSE, method, error)))
365 				return;
366 
367 			bb_unlink (current, next);
368 			current = next;
369 			break;
370 		case MonoInlineString:
371 		case MonoInlineType:
372 		case MonoInlineField:
373 		case MonoInlineTok:
374 		case MonoInlineSig:
375 		case MonoShortInlineR:
376 		case MonoInlineI:
377 			ip += 5;
378 			break;
379 
380 		case MonoInlineMethod:
381 			ip += 5;
382 			if (value != MONO_CEE_JMP || ip >= end)
383 				break;
384 			if (!(next = bb_split (bb, current, root, ip - start, FALSE, method, error)))
385 				return;
386 
387 			bb_unlink (current, next);
388 			current = next;
389 
390 			break;
391 		case MonoInlineVar:
392 			ip += 3;
393 			break;
394 		case MonoShortInlineVar:
395 		case MonoShortInlineI:
396 			ip += 2;
397 			break;
398 		case MonoInlineR:
399 		case MonoInlineI8:
400 			ip += 9;
401 			break;
402 		case MonoShortInlineBrTarget:
403 		case MonoInlineBrTarget:
404 			if (opcode->argument == MonoShortInlineBrTarget) {
405 				offset = cli_addr + 2 + (signed char)ip [1];
406 				ip += 2;
407 			} else {
408 				offset = cli_addr + 5 + (gint32)read32 (ip + 1);
409 				ip += 5;
410 			}
411 
412 			branch = bb_split (bb, current, root, offset, TRUE, method, error);
413 			if (!branch)
414 				return;
415 
416 			/*If we splitted the current BB*/
417 			if (offset < cli_addr && branch->start > current->start)
418 				current = branch;
419 			if (ip < end) {
420 				next = bb_split (bb, current, root, ip - start, opcode->flow_type != MONO_FLOW_BRANCH, method, error);
421 				if (!next)
422 					return;
423 			} else {
424 				next = NULL;
425 			}
426 
427 			bb_link (current, branch);
428 			if (next && opcode->flow_type == MONO_FLOW_BRANCH && next != branch) {
429 				bb_unlink (current, next);
430 				current = next;
431 			}
432 			break;
433 		case MonoInlineSwitch: {
434 			MonoSimpleBasicBlock *tmp;
435 			guint32 j, n = read32 (ip + 1);
436 
437 			ip += 5;
438 			offset = cli_addr + 5 + 4 * n;
439 			if (!(next = bb_split (bb, current, root, offset, TRUE, method, error)))
440 				return;
441 
442 			bb_link (current, next);
443 			tmp = next;
444 
445 			for (j = 0; j < n; ++j) {
446 				if (ip >= end) {
447 					mono_error_set_not_verifiable (error, method, "Invalid switch instruction %x", cli_addr);
448 					return;
449 				}
450 				if (!(next = bb_split (bb, next, root, offset + (gint32)read32 (ip), TRUE, method, error)))
451 					return;
452 				bb_link (current, next);
453 				ip += 4;
454 			}
455 			current = tmp;
456 			break;
457 		}
458 		default:
459 			mono_error_set_not_verifiable (error, method, "Invalid instruction %x", *ip);
460 			return;
461 		}
462 	}
463 	if (ip != end)
464 		mono_error_set_not_verifiable (error, method, "Invalid last instruction");
465 }
466 
467 static void
bb_formation_eh_pass(MonoMethodHeader * header,MonoSimpleBasicBlock * bb,MonoSimpleBasicBlock ** root,MonoMethod * method,MonoError * error)468 bb_formation_eh_pass (MonoMethodHeader *header, MonoSimpleBasicBlock *bb, MonoSimpleBasicBlock **root, MonoMethod *method, MonoError *error)
469 {
470 	int i;
471 	int end = header->code_size;
472 
473 	error_init (error);
474 
475 	/*We must split at all points to verify for targets in the middle of an instruction*/
476 	for (i = 0; i < header->num_clauses; ++i) {
477 		MonoExceptionClause *clause = header->clauses + i;
478 		MonoSimpleBasicBlock *try_block, *handler;
479 
480 		if (!(try_block = bb_split (bb, bb, root, clause->try_offset, TRUE, method, error)))
481 			return;
482 
483 		handler = bb_split (bb, try_block, root, clause->handler_offset, FALSE, method, error);
484 		if (!handler)
485 			return;
486 		handler->dead = FALSE;
487 
488 		if (clause->flags == MONO_EXCEPTION_CLAUSE_FILTER) {
489 			MonoSimpleBasicBlock *filter = bb_split (bb, try_block, root, clause->data.filter_offset, FALSE, method, error);
490 			if (!filter)
491 				return;
492 			filter->dead = FALSE;
493 		}
494 
495 		if (clause->try_offset + clause->try_len < end && !bb_split (bb, try_block, root, clause->try_offset + clause->try_len, FALSE, method, error))
496 			return;
497 
498 		if (clause->handler_offset + clause->handler_len < end && !bb_split (bb, handler, root, clause->handler_offset + clause->handler_len, FALSE, method, error))
499 			return;
500 	}
501 }
502 
503 /*
504  * mono_basic_block_free:
505  *
506  * Release the memory associated with the list of basis blocks @bb.
507 */
508 void
mono_basic_block_free(MonoSimpleBasicBlock * bb)509 mono_basic_block_free (MonoSimpleBasicBlock *bb)
510 {
511 	while (bb) {
512 		MonoSimpleBasicBlock *next = bb->next;
513 		if (bb->out_bb)
514 			g_slist_free (bb->out_bb);
515 		g_free (bb);
516 		bb = next;
517 	}
518 }
519 
520 /*
521  * mono_basic_block_split:
522  *
523  * Return the list of basic blocks of method. Return NULL on failure and set @error.
524 */
525 MonoSimpleBasicBlock*
mono_basic_block_split(MonoMethod * method,MonoError * error,MonoMethodHeader * header)526 mono_basic_block_split (MonoMethod *method, MonoError *error, MonoMethodHeader *header)
527 {
528 	MonoSimpleBasicBlock *bb, *root;
529 	const unsigned char *start, *end;
530 
531 	error_init (error);
532 
533 	start = header->code;
534 	end = start + header->code_size;
535 
536 	bb = g_new0 (MonoSimpleBasicBlock, 1);
537 	bb->start = 0;
538 	bb->end = end - start;
539 	bb->colour = BLACK;
540 	bb->dead = FALSE;
541 
542 	root = bb;
543 	bb_formation_il_pass (start, end, bb, &root, method, error);
544 	if (!mono_error_ok (error))
545 		goto fail;
546 
547 	bb_formation_eh_pass (header, bb, &root, method, error);
548 	if (!mono_error_ok (error))
549 		goto fail;
550 
551 	bb_liveness (bb);
552 
553 #if DEBUG_BB
554 	dump_bb_list (bb, &root, g_strdup_printf("AFTER LIVENESS %s", mono_method_full_name (method, TRUE)));
555 #endif
556 
557 	return bb;
558 
559 fail:
560 	mono_basic_block_free (bb);
561 	return NULL;
562 }
563 
564 /*
565  * mono_opcode_value_and_size:
566  *
567  * Returns the size of the opcode starting at *@ip, or -1 on error.
568  * Value is the opcode number.
569 */
570 int
mono_opcode_value_and_size(const unsigned char ** ip,const unsigned char * end,int * value)571 mono_opcode_value_and_size (const unsigned char **ip, const unsigned char *end, int *value)
572 {
573 	const unsigned char *start = *ip, *p;
574 	int i = *value = mono_opcode_value (ip, end);
575 	int size = 0;
576 	if (i < 0 || i >= MONO_CEE_LAST)
577 		return -1;
578 	p = *ip;
579 
580 	switch (mono_opcodes [i].argument) {
581 	case MonoInlineNone:
582 		size = 1;
583 		break;
584 	case MonoInlineString:
585 	case MonoInlineType:
586 	case MonoInlineField:
587 	case MonoInlineMethod:
588 	case MonoInlineTok:
589 	case MonoInlineSig:
590 	case MonoShortInlineR:
591 	case MonoInlineI:
592 	case MonoInlineBrTarget:
593 		size = 5;
594 		break;
595 	case MonoInlineVar:
596 		size = 3;
597 		break;
598 	case MonoShortInlineVar:
599 	case MonoShortInlineI:
600 	case MonoShortInlineBrTarget:
601 		size = 2;
602 		break;
603 	case MonoInlineR:
604 	case MonoInlineI8:
605 		size = 9;
606 		break;
607 	case MonoInlineSwitch: {
608 		guint32 entries;
609 		if (ADDP_IS_GREATER_OR_OVF (p, 5, end))
610 			return -1;
611 		entries = read32 (p + 1);
612 		if (entries >= (0xFFFFFFFFU / 4))
613 			return -1;
614 		size = 4 + 4 * entries;
615 		break;
616 	}
617 	default:
618 		g_error ("Invalid opcode %d argument %d max opcode %d\n", i, mono_opcodes [i].argument, MONO_CEE_LAST);
619 	}
620 
621 	if (ADDP_IS_GREATER_OR_OVF (p, size, end))
622 		return -1;
623 
624 	return (p - start) + size;
625 }
626 
627 /*
628  * mono_opcode_size:
629  *
630  * Returns the size of the opcode starting at @ip, or -1 on error.
631 */
632 int
mono_opcode_size(const unsigned char * ip,const unsigned char * end)633 mono_opcode_size (const unsigned char *ip, const unsigned char *end)
634 {
635 	int tmp;
636 	return mono_opcode_value_and_size (&ip, end, &tmp);
637 }
638 
639