1 /*
2  * Copyright (C) 2014-2020 Paul Cercueil <paul@crapouillou.net>
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Lesser General Public License for more details.
13  */
14 
15 #include "disassembler.h"
16 #include "lightrec.h"
17 #include "memmanager.h"
18 #include "optimizer.h"
19 #include "regcache.h"
20 
21 #include <errno.h>
22 #include <stdbool.h>
23 #include <stdlib.h>
24 
25 struct optimizer_list {
26 	void (**optimizers)(struct opcode *);
27 	unsigned int nb_optimizers;
28 };
29 
opcode_reads_register(union code op,u8 reg)30 bool opcode_reads_register(union code op, u8 reg)
31 {
32 	switch (op.i.op) {
33 	case OP_SPECIAL:
34 		switch (op.r.op) {
35 		case OP_SPECIAL_SYSCALL:
36 		case OP_SPECIAL_BREAK:
37 			return false;
38 		case OP_SPECIAL_JR:
39 		case OP_SPECIAL_JALR:
40 		case OP_SPECIAL_MTHI:
41 		case OP_SPECIAL_MTLO:
42 			return op.r.rs == reg;
43 		case OP_SPECIAL_MFHI:
44 			return reg == REG_HI;
45 		case OP_SPECIAL_MFLO:
46 			return reg == REG_LO;
47 		case OP_SPECIAL_SLL:
48 		case OP_SPECIAL_SRL:
49 		case OP_SPECIAL_SRA:
50 			return op.r.rt == reg;
51 		default:
52 			return op.r.rs == reg || op.r.rt == reg;
53 		}
54 	case OP_CP0:
55 		switch (op.r.rs) {
56 		case OP_CP0_MTC0:
57 		case OP_CP0_CTC0:
58 			return op.r.rt == reg;
59 		default:
60 			return false;
61 		}
62 	case OP_CP2:
63 		if (op.r.op == OP_CP2_BASIC) {
64 			switch (op.r.rs) {
65 			case OP_CP2_BASIC_MTC2:
66 			case OP_CP2_BASIC_CTC2:
67 				return op.r.rt == reg;
68 			default:
69 				return false;
70 			}
71 		} else {
72 			return false;
73 		}
74 	case OP_J:
75 	case OP_JAL:
76 	case OP_LUI:
77 		return false;
78 	case OP_BEQ:
79 	case OP_BNE:
80 	case OP_LWL:
81 	case OP_LWR:
82 	case OP_SB:
83 	case OP_SH:
84 	case OP_SWL:
85 	case OP_SW:
86 	case OP_SWR:
87 		return op.i.rs == reg || op.i.rt == reg;
88 	default:
89 		return op.i.rs == reg;
90 	}
91 }
92 
opcode_writes_register(union code op,u8 reg)93 bool opcode_writes_register(union code op, u8 reg)
94 {
95 	switch (op.i.op) {
96 	case OP_SPECIAL:
97 		switch (op.r.op) {
98 		case OP_SPECIAL_JR:
99 		case OP_SPECIAL_JALR:
100 		case OP_SPECIAL_SYSCALL:
101 		case OP_SPECIAL_BREAK:
102 			return false;
103 		case OP_SPECIAL_MULT:
104 		case OP_SPECIAL_MULTU:
105 		case OP_SPECIAL_DIV:
106 		case OP_SPECIAL_DIVU:
107 			return reg == REG_LO || reg == REG_HI;
108 		case OP_SPECIAL_MTHI:
109 			return reg == REG_HI;
110 		case OP_SPECIAL_MTLO:
111 			return reg == REG_LO;
112 		default:
113 			return op.r.rd == reg;
114 		}
115 	case OP_ADDI:
116 	case OP_ADDIU:
117 	case OP_SLTI:
118 	case OP_SLTIU:
119 	case OP_ANDI:
120 	case OP_ORI:
121 	case OP_XORI:
122 	case OP_LUI:
123 	case OP_LB:
124 	case OP_LH:
125 	case OP_LWL:
126 	case OP_LW:
127 	case OP_LBU:
128 	case OP_LHU:
129 	case OP_LWR:
130 		return op.i.rt == reg;
131 	case OP_CP0:
132 		switch (op.r.rs) {
133 		case OP_CP0_MFC0:
134 		case OP_CP0_CFC0:
135 			return op.i.rt == reg;
136 		default:
137 			return false;
138 		}
139 	case OP_CP2:
140 		if (op.r.op == OP_CP2_BASIC) {
141 			switch (op.r.rs) {
142 			case OP_CP2_BASIC_MFC2:
143 			case OP_CP2_BASIC_CFC2:
144 				return op.i.rt == reg;
145 			default:
146 				return false;
147 			}
148 		} else {
149 			return false;
150 		}
151 	case OP_META_MOV:
152 		return op.r.rd == reg;
153 	default:
154 		return false;
155 	}
156 }
157 
158 /* TODO: Complete */
is_nop(union code op)159 static bool is_nop(union code op)
160 {
161 	if (opcode_writes_register(op, 0)) {
162 		switch (op.i.op) {
163 		case OP_CP0:
164 			return op.r.rs != OP_CP0_MFC0;
165 		case OP_LB:
166 		case OP_LH:
167 		case OP_LWL:
168 		case OP_LW:
169 		case OP_LBU:
170 		case OP_LHU:
171 		case OP_LWR:
172 			return false;
173 		default:
174 			return true;
175 		}
176 	}
177 
178 	switch (op.i.op) {
179 	case OP_SPECIAL:
180 		switch (op.r.op) {
181 		case OP_SPECIAL_AND:
182 			return op.r.rd == op.r.rt && op.r.rd == op.r.rs;
183 		case OP_SPECIAL_ADD:
184 		case OP_SPECIAL_ADDU:
185 			return (op.r.rd == op.r.rt && op.r.rs == 0) ||
186 				(op.r.rd == op.r.rs && op.r.rt == 0);
187 		case OP_SPECIAL_SUB:
188 		case OP_SPECIAL_SUBU:
189 			return op.r.rd == op.r.rs && op.r.rt == 0;
190 		case OP_SPECIAL_OR:
191 			if (op.r.rd == op.r.rt)
192 				return op.r.rd == op.r.rs || op.r.rs == 0;
193 			else
194 				return (op.r.rd == op.r.rs) && op.r.rt == 0;
195 		case OP_SPECIAL_SLL:
196 		case OP_SPECIAL_SRA:
197 		case OP_SPECIAL_SRL:
198 			return op.r.rd == op.r.rt && op.r.imm == 0;
199 		default:
200 			return false;
201 		}
202 	case OP_ORI:
203 	case OP_ADDI:
204 	case OP_ADDIU:
205 		return op.i.rt == op.i.rs && op.i.imm == 0;
206 	case OP_BGTZ:
207 		return (op.i.rs == 0 || op.i.imm == 1);
208 	case OP_REGIMM:
209 		return (op.i.op == OP_REGIMM_BLTZ ||
210 				op.i.op == OP_REGIMM_BLTZAL) &&
211 			(op.i.rs == 0 || op.i.imm == 1);
212 	case OP_BNE:
213 		return (op.i.rs == op.i.rt || op.i.imm == 1);
214 	default:
215 		return false;
216 	}
217 }
218 
load_in_delay_slot(union code op)219 bool load_in_delay_slot(union code op)
220 {
221 	switch (op.i.op) {
222 	case OP_CP0:
223 		switch (op.r.rs) {
224 		case OP_CP0_MFC0:
225 		case OP_CP0_CFC0:
226 			return true;
227 		default:
228 			break;
229 		}
230 
231 		break;
232 	case OP_CP2:
233 		if (op.r.op == OP_CP2_BASIC) {
234 			switch (op.r.rs) {
235 			case OP_CP2_BASIC_MFC2:
236 			case OP_CP2_BASIC_CFC2:
237 				return true;
238 			default:
239 				break;
240 			}
241 		}
242 
243 		break;
244 	case OP_LB:
245 	case OP_LH:
246 	case OP_LW:
247 	case OP_LWL:
248 	case OP_LWR:
249 	case OP_LBU:
250 	case OP_LHU:
251 		return true;
252 	default:
253 		break;
254 	}
255 
256 	return false;
257 }
258 
lightrec_propagate_consts(union code c,u32 known,u32 * v)259 static u32 lightrec_propagate_consts(union code c, u32 known, u32 *v)
260 {
261 	switch (c.i.op) {
262 	case OP_SPECIAL:
263 		switch (c.r.op) {
264 		case OP_SPECIAL_SLL:
265 			if (known & BIT(c.r.rt)) {
266 				known |= BIT(c.r.rd);
267 				v[c.r.rd] = v[c.r.rt] << c.r.imm;
268 			} else {
269 				known &= ~BIT(c.r.rd);
270 			}
271 			break;
272 		case OP_SPECIAL_SRL:
273 			if (known & BIT(c.r.rt)) {
274 				known |= BIT(c.r.rd);
275 				v[c.r.rd] = v[c.r.rt] >> c.r.imm;
276 			} else {
277 				known &= ~BIT(c.r.rd);
278 			}
279 			break;
280 		case OP_SPECIAL_SRA:
281 			if (known & BIT(c.r.rt)) {
282 				known |= BIT(c.r.rd);
283 				v[c.r.rd] = (s32)v[c.r.rt] >> c.r.imm;
284 			} else {
285 				known &= ~BIT(c.r.rd);
286 			}
287 			break;
288 		case OP_SPECIAL_SLLV:
289 			if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
290 				known |= BIT(c.r.rd);
291 				v[c.r.rd] = v[c.r.rt] << (v[c.r.rs] & 0x1f);
292 			} else {
293 				known &= ~BIT(c.r.rd);
294 			}
295 			break;
296 		case OP_SPECIAL_SRLV:
297 			if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
298 				known |= BIT(c.r.rd);
299 				v[c.r.rd] = v[c.r.rt] >> (v[c.r.rs] & 0x1f);
300 			} else {
301 				known &= ~BIT(c.r.rd);
302 			}
303 			break;
304 		case OP_SPECIAL_SRAV:
305 			if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
306 				known |= BIT(c.r.rd);
307 				v[c.r.rd] = (s32)v[c.r.rt]
308 					  >> (v[c.r.rs] & 0x1f);
309 			} else {
310 				known &= ~BIT(c.r.rd);
311 			}
312 			break;
313 		case OP_SPECIAL_ADD:
314 		case OP_SPECIAL_ADDU:
315 			if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
316 				known |= BIT(c.r.rd);
317 				v[c.r.rd] = (s32)v[c.r.rt] + (s32)v[c.r.rs];
318 			} else {
319 				known &= ~BIT(c.r.rd);
320 			}
321 			break;
322 		case OP_SPECIAL_SUB:
323 		case OP_SPECIAL_SUBU:
324 			if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
325 				known |= BIT(c.r.rd);
326 				v[c.r.rd] = v[c.r.rt] - v[c.r.rs];
327 			} else {
328 				known &= ~BIT(c.r.rd);
329 			}
330 			break;
331 		case OP_SPECIAL_AND:
332 			if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
333 				known |= BIT(c.r.rd);
334 				v[c.r.rd] = v[c.r.rt] & v[c.r.rs];
335 			} else {
336 				known &= ~BIT(c.r.rd);
337 			}
338 			break;
339 		case OP_SPECIAL_OR:
340 			if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
341 				known |= BIT(c.r.rd);
342 				v[c.r.rd] = v[c.r.rt] | v[c.r.rs];
343 			} else {
344 				known &= ~BIT(c.r.rd);
345 			}
346 			break;
347 		case OP_SPECIAL_XOR:
348 			if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
349 				known |= BIT(c.r.rd);
350 				v[c.r.rd] = v[c.r.rt] ^ v[c.r.rs];
351 			} else {
352 				known &= ~BIT(c.r.rd);
353 			}
354 			break;
355 		case OP_SPECIAL_NOR:
356 			if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
357 				known |= BIT(c.r.rd);
358 				v[c.r.rd] = ~(v[c.r.rt] | v[c.r.rs]);
359 			} else {
360 				known &= ~BIT(c.r.rd);
361 			}
362 			break;
363 		case OP_SPECIAL_SLT:
364 			if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
365 				known |= BIT(c.r.rd);
366 				v[c.r.rd] = (s32)v[c.r.rs] < (s32)v[c.r.rt];
367 			} else {
368 				known &= ~BIT(c.r.rd);
369 			}
370 			break;
371 		case OP_SPECIAL_SLTU:
372 			if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
373 				known |= BIT(c.r.rd);
374 				v[c.r.rd] = v[c.r.rs] < v[c.r.rt];
375 			} else {
376 				known &= ~BIT(c.r.rd);
377 			}
378 			break;
379 		default:
380 			break;
381 		}
382 		break;
383 	case OP_REGIMM:
384 		break;
385 	case OP_ADDI:
386 	case OP_ADDIU:
387 		if (known & BIT(c.i.rs)) {
388 			known |= BIT(c.i.rt);
389 			v[c.i.rt] = v[c.i.rs] + (s32)(s16)c.i.imm;
390 		} else {
391 			known &= ~BIT(c.i.rt);
392 		}
393 		break;
394 	case OP_SLTI:
395 		if (known & BIT(c.i.rs)) {
396 			known |= BIT(c.i.rt);
397 			v[c.i.rt] = (s32)v[c.i.rs] < (s32)(s16)c.i.imm;
398 		} else {
399 			known &= ~BIT(c.i.rt);
400 		}
401 		break;
402 	case OP_SLTIU:
403 		if (known & BIT(c.i.rs)) {
404 			known |= BIT(c.i.rt);
405 			v[c.i.rt] = v[c.i.rs] < (u32)(s32)(s16)c.i.imm;
406 		} else {
407 			known &= ~BIT(c.i.rt);
408 		}
409 		break;
410 	case OP_ANDI:
411 		if (known & BIT(c.i.rs)) {
412 			known |= BIT(c.i.rt);
413 			v[c.i.rt] = v[c.i.rs] & c.i.imm;
414 		} else {
415 			known &= ~BIT(c.i.rt);
416 		}
417 		break;
418 	case OP_ORI:
419 		if (known & BIT(c.i.rs)) {
420 			known |= BIT(c.i.rt);
421 			v[c.i.rt] = v[c.i.rs] | c.i.imm;
422 		} else {
423 			known &= ~BIT(c.i.rt);
424 		}
425 		break;
426 	case OP_XORI:
427 		if (known & BIT(c.i.rs)) {
428 			known |= BIT(c.i.rt);
429 			v[c.i.rt] = v[c.i.rs] ^ c.i.imm;
430 		} else {
431 			known &= ~BIT(c.i.rt);
432 		}
433 		break;
434 	case OP_LUI:
435 		known |= BIT(c.i.rt);
436 		v[c.i.rt] = c.i.imm << 16;
437 		break;
438 	case OP_CP0:
439 		switch (c.r.rs) {
440 		case OP_CP0_MFC0:
441 		case OP_CP0_CFC0:
442 			known &= ~BIT(c.r.rt);
443 			break;
444 		}
445 		break;
446 	case OP_CP2:
447 		if (c.r.op == OP_CP2_BASIC) {
448 			switch (c.r.rs) {
449 			case OP_CP2_BASIC_MFC2:
450 			case OP_CP2_BASIC_CFC2:
451 				known &= ~BIT(c.r.rt);
452 				break;
453 			}
454 		}
455 		break;
456 	case OP_LB:
457 	case OP_LH:
458 	case OP_LWL:
459 	case OP_LW:
460 	case OP_LBU:
461 	case OP_LHU:
462 	case OP_LWR:
463 	case OP_LWC2:
464 		known &= ~BIT(c.i.rt);
465 		break;
466 	case OP_META_MOV:
467 		if (known & BIT(c.r.rs)) {
468 			known |= BIT(c.r.rd);
469 			v[c.r.rd] = v[c.r.rs];
470 		} else {
471 			known &= ~BIT(c.r.rd);
472 		}
473 		break;
474 	default:
475 		break;
476 	}
477 
478 	return known;
479 }
480 
lightrec_add_meta(struct block * block,struct opcode * op,union code code)481 static int lightrec_add_meta(struct block *block,
482 			     struct opcode *op, union code code)
483 {
484 	struct opcode *meta;
485 
486 	meta = lightrec_malloc(block->state, MEM_FOR_IR, sizeof(*meta));
487 	if (!meta)
488 		return -ENOMEM;
489 
490 	meta->c = code;
491 	meta->flags = 0;
492 
493 	if (op) {
494 		meta->offset = op->offset;
495 		meta->next = op->next;
496 		op->next = meta;
497 	} else {
498 		meta->offset = 0;
499 		meta->next = block->opcode_list;
500 		block->opcode_list = meta;
501 	}
502 
503 	return 0;
504 }
505 
lightrec_add_sync(struct block * block,struct opcode * prev)506 static int lightrec_add_sync(struct block *block, struct opcode *prev)
507 {
508 	return lightrec_add_meta(block, prev, (union code){
509 				 .j.op = OP_META_SYNC,
510 				 });
511 }
512 
lightrec_transform_ops(struct block * block)513 static int lightrec_transform_ops(struct block *block)
514 {
515 	struct opcode *list = block->opcode_list;
516 
517 	for (; list; list = list->next) {
518 
519 		/* Transform all opcodes detected as useless to real NOPs
520 		 * (0x0: SLL r0, r0, #0) */
521 		if (list->opcode != 0 && is_nop(list->c)) {
522 			pr_debug("Converting useless opcode 0x%08x to NOP\n",
523 					list->opcode);
524 			list->opcode = 0x0;
525 		}
526 
527 		if (!list->opcode)
528 			continue;
529 
530 		switch (list->i.op) {
531 		/* Transform BEQ / BNE to BEQZ / BNEZ meta-opcodes if one of the
532 		 * two registers is zero. */
533 		case OP_BEQ:
534 			if ((list->i.rs == 0) ^ (list->i.rt == 0)) {
535 				list->i.op = OP_META_BEQZ;
536 				if (list->i.rs == 0) {
537 					list->i.rs = list->i.rt;
538 					list->i.rt = 0;
539 				}
540 			} else if (list->i.rs == list->i.rt) {
541 				list->i.rs = 0;
542 				list->i.rt = 0;
543 			}
544 			break;
545 		case OP_BNE:
546 			if (list->i.rs == 0) {
547 				list->i.op = OP_META_BNEZ;
548 				list->i.rs = list->i.rt;
549 				list->i.rt = 0;
550 			} else if (list->i.rt == 0) {
551 				list->i.op = OP_META_BNEZ;
552 			}
553 			break;
554 
555 		/* Transform ORI/ADDI/ADDIU with imm #0 or ORR/ADD/ADDU/SUB/SUBU
556 		 * with register $zero to the MOV meta-opcode */
557 		case OP_ORI:
558 		case OP_ADDI:
559 		case OP_ADDIU:
560 			if (list->i.imm == 0) {
561 				pr_debug("Convert ORI/ADDI/ADDIU #0 to MOV\n");
562 				list->i.op = OP_META_MOV;
563 				list->r.rd = list->i.rt;
564 			}
565 			break;
566 		case OP_SPECIAL:
567 			switch (list->r.op) {
568 			case OP_SPECIAL_SLL:
569 			case OP_SPECIAL_SRA:
570 			case OP_SPECIAL_SRL:
571 				if (list->r.imm == 0) {
572 					pr_debug("Convert SLL/SRL/SRA #0 to MOV\n");
573 					list->i.op = OP_META_MOV;
574 					list->r.rs = list->r.rt;
575 				}
576 				break;
577 			case OP_SPECIAL_OR:
578 			case OP_SPECIAL_ADD:
579 			case OP_SPECIAL_ADDU:
580 				if (list->r.rs == 0) {
581 					pr_debug("Convert OR/ADD $zero to MOV\n");
582 					list->i.op = OP_META_MOV;
583 					list->r.rs = list->r.rt;
584 				}
585 			case OP_SPECIAL_SUB: /* fall-through */
586 			case OP_SPECIAL_SUBU:
587 				if (list->r.rt == 0) {
588 					pr_debug("Convert OR/ADD/SUB $zero to MOV\n");
589 					list->i.op = OP_META_MOV;
590 				}
591 			default: /* fall-through */
592 				break;
593 			}
594 		default: /* fall-through */
595 			break;
596 		}
597 	}
598 
599 	return 0;
600 }
601 
lightrec_switch_delay_slots(struct block * block)602 static int lightrec_switch_delay_slots(struct block *block)
603 {
604 	struct opcode *list, *prev;
605 	u8 flags;
606 
607 	for (list = block->opcode_list, prev = NULL; list->next;
608 	     prev = list, list = list->next) {
609 		union code op = list->c;
610 		union code next_op = list->next->c;
611 
612 		if (!has_delay_slot(op) ||
613 		    list->flags & (LIGHTREC_NO_DS | LIGHTREC_EMULATE_BRANCH) ||
614 		    op.opcode == 0)
615 			continue;
616 
617 		if (prev && has_delay_slot(prev->c))
618 			continue;
619 
620 		switch (list->i.op) {
621 		case OP_SPECIAL:
622 			switch (op.r.op) {
623 			case OP_SPECIAL_JALR:
624 				if (opcode_reads_register(next_op, op.r.rd) ||
625 				    opcode_writes_register(next_op, op.r.rd))
626 					continue;
627 			case OP_SPECIAL_JR: /* fall-through */
628 				if (opcode_writes_register(next_op, op.r.rs))
629 					continue;
630 			default: /* fall-through */
631 				break;
632 			}
633 		case OP_J: /* fall-through */
634 			break;
635 		case OP_JAL:
636 			if (opcode_reads_register(next_op, 31) ||
637 			    opcode_writes_register(next_op, 31))
638 				continue;
639 			else
640 				break;
641 		case OP_BEQ:
642 		case OP_BNE:
643 			if (op.i.rt && opcode_writes_register(next_op, op.i.rt))
644 				continue;
645 		case OP_BLEZ: /* fall-through */
646 		case OP_BGTZ:
647 		case OP_META_BEQZ:
648 		case OP_META_BNEZ:
649 			if (op.i.rs && opcode_writes_register(next_op, op.i.rs))
650 				continue;
651 			break;
652 		case OP_REGIMM:
653 			switch (op.r.rt) {
654 			case OP_REGIMM_BLTZAL:
655 			case OP_REGIMM_BGEZAL:
656 				if (opcode_reads_register(next_op, 31) ||
657 				    opcode_writes_register(next_op, 31))
658 					continue;
659 			case OP_REGIMM_BLTZ: /* fall-through */
660 			case OP_REGIMM_BGEZ:
661 				if (op.i.rs &&
662 				    opcode_writes_register(next_op, op.i.rs))
663 					continue;
664 				break;
665 			}
666 		default: /* fall-through */
667 			break;
668 		}
669 
670 		pr_debug("Swap branch and delay slot opcodes "
671 			 "at offsets 0x%x / 0x%x\n", list->offset << 2,
672 			 list->next->offset << 2);
673 
674 		flags = list->next->flags;
675 		list->c = next_op;
676 		list->next->c = op;
677 		list->next->flags = list->flags | LIGHTREC_NO_DS;
678 		list->flags = flags | LIGHTREC_NO_DS;
679 		list->offset++;
680 		list->next->offset--;
681 	}
682 
683 	return 0;
684 }
685 
lightrec_detect_impossible_branches(struct block * block)686 static int lightrec_detect_impossible_branches(struct block *block)
687 {
688 	struct opcode *op, *next;
689 
690 	for (op = block->opcode_list, next = op->next; next;
691 	     op = next, next = op->next) {
692 		if (!has_delay_slot(op->c) ||
693 		    (!load_in_delay_slot(next->c) &&
694 		     !has_delay_slot(next->c) &&
695 		     !(next->i.op == OP_CP0 && next->r.rs == OP_CP0_RFE)))
696 			continue;
697 
698 		if (op->c.opcode == next->c.opcode) {
699 			/* The delay slot is the exact same opcode as the branch
700 			 * opcode: this is effectively a NOP */
701 			next->c.opcode = 0;
702 			continue;
703 		}
704 
705 		if (op == block->opcode_list) {
706 			/* If the first opcode is an 'impossible' branch, we
707 			 * only keep the first two opcodes of the block (the
708 			 * branch itself + its delay slot) */
709 			lightrec_free_opcode_list(block->state, next->next);
710 			next->next = NULL;
711 			block->nb_ops = 2;
712 		}
713 
714 		op->flags |= LIGHTREC_EMULATE_BRANCH;
715 	}
716 
717 	return 0;
718 }
719 
lightrec_local_branches(struct block * block)720 static int lightrec_local_branches(struct block *block)
721 {
722 	struct opcode *list, *target, *prev;
723 	s32 offset;
724 	int ret;
725 
726 	for (list = block->opcode_list; list; list = list->next) {
727 		if (list->flags & LIGHTREC_EMULATE_BRANCH)
728 			continue;
729 
730 		switch (list->i.op) {
731 		case OP_BEQ:
732 		case OP_BNE:
733 		case OP_BLEZ:
734 		case OP_BGTZ:
735 		case OP_REGIMM:
736 		case OP_META_BEQZ:
737 		case OP_META_BNEZ:
738 			offset = list->offset + 1 + (s16)list->i.imm;
739 			if (offset >= 0 && offset < block->nb_ops)
740 				break;
741 		default: /* fall-through */
742 			continue;
743 		}
744 
745 		pr_debug("Found local branch to offset 0x%x\n", offset << 2);
746 
747 		for (target = block->opcode_list, prev = NULL;
748 		     target; prev = target, target = target->next) {
749 			if (target->offset != offset ||
750 			    target->j.op == OP_META_SYNC)
751 				continue;
752 
753 			if (target->flags & LIGHTREC_EMULATE_BRANCH) {
754 				pr_debug("Branch target must be emulated"
755 					 " - skip\n");
756 				break;
757 			}
758 
759 			if (prev && has_delay_slot(prev->c)) {
760 				pr_debug("Branch target is a delay slot"
761 					 " - skip\n");
762 				break;
763 			}
764 
765 			if (prev && prev->j.op != OP_META_SYNC) {
766 				pr_debug("Adding sync before offset "
767 					 "0x%x\n", offset << 2);
768 				ret = lightrec_add_sync(block, prev);
769 				if (ret)
770 					return ret;
771 
772 				prev->next->offset = target->offset;
773 			}
774 
775 			list->flags |= LIGHTREC_LOCAL_BRANCH;
776 			break;
777 		}
778 	}
779 
780 	return 0;
781 }
782 
has_delay_slot(union code op)783 bool has_delay_slot(union code op)
784 {
785 	switch (op.i.op) {
786 	case OP_SPECIAL:
787 		switch (op.r.op) {
788 		case OP_SPECIAL_JR:
789 		case OP_SPECIAL_JALR:
790 			return true;
791 		default:
792 			return false;
793 		}
794 	case OP_J:
795 	case OP_JAL:
796 	case OP_BEQ:
797 	case OP_BNE:
798 	case OP_BLEZ:
799 	case OP_BGTZ:
800 	case OP_REGIMM:
801 	case OP_META_BEQZ:
802 	case OP_META_BNEZ:
803 		return true;
804 	default:
805 		return false;
806 	}
807 }
808 
lightrec_add_unload(struct block * block,struct opcode * op,u8 reg)809 static int lightrec_add_unload(struct block *block, struct opcode *op, u8 reg)
810 {
811 	return lightrec_add_meta(block, op, (union code){
812 				 .i.op = OP_META_REG_UNLOAD,
813 				 .i.rs = reg,
814 				 });
815 }
816 
lightrec_early_unload(struct block * block)817 static int lightrec_early_unload(struct block *block)
818 {
819 	struct opcode *list = block->opcode_list;
820 	u8 i;
821 
822 	for (i = 1; i < 34; i++) {
823 		struct opcode *op, *last_r = NULL, *last_w = NULL;
824 		unsigned int last_r_id = 0, last_w_id = 0, id = 0;
825 		int ret;
826 
827 		for (op = list; op->next; op = op->next, id++) {
828 			if (opcode_reads_register(op->c, i)) {
829 				last_r = op;
830 				last_r_id = id;
831 			}
832 
833 			if (opcode_writes_register(op->c, i)) {
834 				last_w = op;
835 				last_w_id = id;
836 			}
837 		}
838 
839 		if (last_w_id > last_r_id) {
840 			if (has_delay_slot(last_w->c) &&
841 			    !(last_w->flags & LIGHTREC_NO_DS))
842 				last_w = last_w->next;
843 
844 			if (last_w->next) {
845 				ret = lightrec_add_unload(block, last_w, i);
846 				if (ret)
847 					return ret;
848 			}
849 		} else if (last_r) {
850 			if (has_delay_slot(last_r->c) &&
851 			    !(last_r->flags & LIGHTREC_NO_DS))
852 				last_r = last_r->next;
853 
854 			if (last_r->next) {
855 				ret = lightrec_add_unload(block, last_r, i);
856 				if (ret)
857 					return ret;
858 			}
859 		}
860 	}
861 
862 	return 0;
863 }
864 
lightrec_flag_stores(struct block * block)865 static int lightrec_flag_stores(struct block *block)
866 {
867 	struct opcode *list;
868 	u32 known = BIT(0);
869 	u32 values[32] = { 0 };
870 
871 	for (list = block->opcode_list; list; list = list->next) {
872 		/* Register $zero is always, well, zero */
873 		known |= BIT(0);
874 		values[0] = 0;
875 
876 		switch (list->i.op) {
877 		case OP_SB:
878 		case OP_SH:
879 		case OP_SW:
880 			/* Mark all store operations that target $sp or $gp
881 			 * as not requiring code invalidation. This is based
882 			 * on the heuristic that stores using one of these
883 			 * registers as address will never hit a code page. */
884 			if (list->i.rs >= 28 && list->i.rs <= 29 &&
885 			    !block->state->maps[PSX_MAP_KERNEL_USER_RAM].ops) {
886 				pr_debug("Flaging opcode 0x%08x as not requiring invalidation\n",
887 					 list->opcode);
888 				list->flags |= LIGHTREC_NO_INVALIDATE;
889 			}
890 
891 			/* Detect writes whose destination address is inside the
892 			 * current block, using constant propagation. When these
893 			 * occur, we mark the blocks as not compilable. */
894 			if ((known & BIT(list->i.rs)) &&
895 			    kunseg(values[list->i.rs]) >= kunseg(block->pc) &&
896 			    kunseg(values[list->i.rs]) < (kunseg(block->pc) +
897 							  block->nb_ops * 4)) {
898 				pr_debug("Self-modifying block detected\n");
899 				block->flags |= BLOCK_NEVER_COMPILE;
900 				list->flags |= LIGHTREC_SMC;
901 			}
902 		default: /* fall-through */
903 			break;
904 		}
905 
906 		known = lightrec_propagate_consts(list->c, known, values);
907 	}
908 
909 	return 0;
910 }
911 
is_mult32(const struct block * block,const struct opcode * op)912 static bool is_mult32(const struct block *block, const struct opcode *op)
913 {
914 	const struct opcode *next, *last = NULL;
915 	u32 offset;
916 
917 	for (op = op->next; op != last; op = op->next) {
918 		switch (op->i.op) {
919 		case OP_BEQ:
920 		case OP_BNE:
921 		case OP_BLEZ:
922 		case OP_BGTZ:
923 		case OP_REGIMM:
924 		case OP_META_BEQZ:
925 		case OP_META_BNEZ:
926 			/* TODO: handle backwards branches too */
927 			if ((op->flags & LIGHTREC_LOCAL_BRANCH) &&
928 			    (s16)op->c.i.imm >= 0) {
929 				offset = op->offset + 1 + (s16)op->c.i.imm;
930 
931 				for (next = op; next->offset != offset;
932 				     next = next->next);
933 
934 				if (!is_mult32(block, next))
935 					return false;
936 
937 				last = next;
938 				continue;
939 			} else {
940 				return false;
941 			}
942 		case OP_SPECIAL:
943 			switch (op->r.op) {
944 			case OP_SPECIAL_MULT:
945 			case OP_SPECIAL_MULTU:
946 			case OP_SPECIAL_DIV:
947 			case OP_SPECIAL_DIVU:
948 			case OP_SPECIAL_MTHI:
949 				return true;
950 			case OP_SPECIAL_JR:
951 				return op->r.rs == 31 &&
952 					((op->flags & LIGHTREC_NO_DS) ||
953 					 !(op->next->i.op == OP_SPECIAL &&
954 					   op->next->r.op == OP_SPECIAL_MFHI));
955 			case OP_SPECIAL_JALR:
956 			case OP_SPECIAL_MFHI:
957 				return false;
958 			default:
959 				continue;
960 			}
961 		default:
962 			continue;
963 		}
964 	}
965 
966 	return last != NULL;
967 }
968 
lightrec_flag_mults(struct block * block)969 static int lightrec_flag_mults(struct block *block)
970 {
971 	struct opcode *list, *prev;
972 
973 	for (list = block->opcode_list, prev = NULL; list;
974 	     prev = list, list = list->next) {
975 		if (list->i.op != OP_SPECIAL)
976 			continue;
977 
978 		switch (list->r.op) {
979 		case OP_SPECIAL_MULT:
980 		case OP_SPECIAL_MULTU:
981 			break;
982 		default:
983 			continue;
984 		}
985 
986 		/* Don't support MULT(U) opcodes in delay slots */
987 		if (prev && has_delay_slot(prev->c))
988 			continue;
989 
990 		if (is_mult32(block, list)) {
991 			pr_debug("Mark MULT(U) opcode at offset 0x%x as"
992 				 " 32-bit\n", list->offset << 2);
993 			list->flags |= LIGHTREC_MULT32;
994 		}
995 	}
996 
997 	return 0;
998 }
999 
1000 static int (*lightrec_optimizers[])(struct block *) = {
1001 	&lightrec_detect_impossible_branches,
1002 	&lightrec_transform_ops,
1003 	&lightrec_local_branches,
1004 	&lightrec_switch_delay_slots,
1005 	&lightrec_flag_stores,
1006 	&lightrec_flag_mults,
1007 	&lightrec_early_unload,
1008 };
1009 
lightrec_optimize(struct block * block)1010 int lightrec_optimize(struct block *block)
1011 {
1012 	unsigned int i;
1013 
1014 	for (i = 0; i < ARRAY_SIZE(lightrec_optimizers); i++) {
1015 		int ret = lightrec_optimizers[i](block);
1016 
1017 		if (ret)
1018 			return ret;
1019 	}
1020 
1021 	return 0;
1022 }
1023