1 /*
2 * Copyright (C) 2014-2020 Paul Cercueil <paul@crapouillou.net>
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 */
14
15 #include "disassembler.h"
16 #include "lightrec.h"
17 #include "memmanager.h"
18 #include "optimizer.h"
19 #include "regcache.h"
20
21 #include <errno.h>
22 #include <stdbool.h>
23 #include <stdlib.h>
24
25 struct optimizer_list {
26 void (**optimizers)(struct opcode *);
27 unsigned int nb_optimizers;
28 };
29
opcode_reads_register(union code op,u8 reg)30 bool opcode_reads_register(union code op, u8 reg)
31 {
32 switch (op.i.op) {
33 case OP_SPECIAL:
34 switch (op.r.op) {
35 case OP_SPECIAL_SYSCALL:
36 case OP_SPECIAL_BREAK:
37 return false;
38 case OP_SPECIAL_JR:
39 case OP_SPECIAL_JALR:
40 case OP_SPECIAL_MTHI:
41 case OP_SPECIAL_MTLO:
42 return op.r.rs == reg;
43 case OP_SPECIAL_MFHI:
44 return reg == REG_HI;
45 case OP_SPECIAL_MFLO:
46 return reg == REG_LO;
47 case OP_SPECIAL_SLL:
48 case OP_SPECIAL_SRL:
49 case OP_SPECIAL_SRA:
50 return op.r.rt == reg;
51 default:
52 return op.r.rs == reg || op.r.rt == reg;
53 }
54 case OP_CP0:
55 switch (op.r.rs) {
56 case OP_CP0_MTC0:
57 case OP_CP0_CTC0:
58 return op.r.rt == reg;
59 default:
60 return false;
61 }
62 case OP_CP2:
63 if (op.r.op == OP_CP2_BASIC) {
64 switch (op.r.rs) {
65 case OP_CP2_BASIC_MTC2:
66 case OP_CP2_BASIC_CTC2:
67 return op.r.rt == reg;
68 default:
69 return false;
70 }
71 } else {
72 return false;
73 }
74 case OP_J:
75 case OP_JAL:
76 case OP_LUI:
77 return false;
78 case OP_BEQ:
79 case OP_BNE:
80 case OP_LWL:
81 case OP_LWR:
82 case OP_SB:
83 case OP_SH:
84 case OP_SWL:
85 case OP_SW:
86 case OP_SWR:
87 return op.i.rs == reg || op.i.rt == reg;
88 default:
89 return op.i.rs == reg;
90 }
91 }
92
opcode_writes_register(union code op,u8 reg)93 bool opcode_writes_register(union code op, u8 reg)
94 {
95 switch (op.i.op) {
96 case OP_SPECIAL:
97 switch (op.r.op) {
98 case OP_SPECIAL_JR:
99 case OP_SPECIAL_JALR:
100 case OP_SPECIAL_SYSCALL:
101 case OP_SPECIAL_BREAK:
102 return false;
103 case OP_SPECIAL_MULT:
104 case OP_SPECIAL_MULTU:
105 case OP_SPECIAL_DIV:
106 case OP_SPECIAL_DIVU:
107 return reg == REG_LO || reg == REG_HI;
108 case OP_SPECIAL_MTHI:
109 return reg == REG_HI;
110 case OP_SPECIAL_MTLO:
111 return reg == REG_LO;
112 default:
113 return op.r.rd == reg;
114 }
115 case OP_ADDI:
116 case OP_ADDIU:
117 case OP_SLTI:
118 case OP_SLTIU:
119 case OP_ANDI:
120 case OP_ORI:
121 case OP_XORI:
122 case OP_LUI:
123 case OP_LB:
124 case OP_LH:
125 case OP_LWL:
126 case OP_LW:
127 case OP_LBU:
128 case OP_LHU:
129 case OP_LWR:
130 return op.i.rt == reg;
131 case OP_CP0:
132 switch (op.r.rs) {
133 case OP_CP0_MFC0:
134 case OP_CP0_CFC0:
135 return op.i.rt == reg;
136 default:
137 return false;
138 }
139 case OP_CP2:
140 if (op.r.op == OP_CP2_BASIC) {
141 switch (op.r.rs) {
142 case OP_CP2_BASIC_MFC2:
143 case OP_CP2_BASIC_CFC2:
144 return op.i.rt == reg;
145 default:
146 return false;
147 }
148 } else {
149 return false;
150 }
151 case OP_META_MOV:
152 return op.r.rd == reg;
153 default:
154 return false;
155 }
156 }
157
158 /* TODO: Complete */
is_nop(union code op)159 static bool is_nop(union code op)
160 {
161 if (opcode_writes_register(op, 0)) {
162 switch (op.i.op) {
163 case OP_CP0:
164 return op.r.rs != OP_CP0_MFC0;
165 case OP_LB:
166 case OP_LH:
167 case OP_LWL:
168 case OP_LW:
169 case OP_LBU:
170 case OP_LHU:
171 case OP_LWR:
172 return false;
173 default:
174 return true;
175 }
176 }
177
178 switch (op.i.op) {
179 case OP_SPECIAL:
180 switch (op.r.op) {
181 case OP_SPECIAL_AND:
182 return op.r.rd == op.r.rt && op.r.rd == op.r.rs;
183 case OP_SPECIAL_ADD:
184 case OP_SPECIAL_ADDU:
185 return (op.r.rd == op.r.rt && op.r.rs == 0) ||
186 (op.r.rd == op.r.rs && op.r.rt == 0);
187 case OP_SPECIAL_SUB:
188 case OP_SPECIAL_SUBU:
189 return op.r.rd == op.r.rs && op.r.rt == 0;
190 case OP_SPECIAL_OR:
191 if (op.r.rd == op.r.rt)
192 return op.r.rd == op.r.rs || op.r.rs == 0;
193 else
194 return (op.r.rd == op.r.rs) && op.r.rt == 0;
195 case OP_SPECIAL_SLL:
196 case OP_SPECIAL_SRA:
197 case OP_SPECIAL_SRL:
198 return op.r.rd == op.r.rt && op.r.imm == 0;
199 default:
200 return false;
201 }
202 case OP_ORI:
203 case OP_ADDI:
204 case OP_ADDIU:
205 return op.i.rt == op.i.rs && op.i.imm == 0;
206 case OP_BGTZ:
207 return (op.i.rs == 0 || op.i.imm == 1);
208 case OP_REGIMM:
209 return (op.i.op == OP_REGIMM_BLTZ ||
210 op.i.op == OP_REGIMM_BLTZAL) &&
211 (op.i.rs == 0 || op.i.imm == 1);
212 case OP_BNE:
213 return (op.i.rs == op.i.rt || op.i.imm == 1);
214 default:
215 return false;
216 }
217 }
218
load_in_delay_slot(union code op)219 bool load_in_delay_slot(union code op)
220 {
221 switch (op.i.op) {
222 case OP_CP0:
223 switch (op.r.rs) {
224 case OP_CP0_MFC0:
225 case OP_CP0_CFC0:
226 return true;
227 default:
228 break;
229 }
230
231 break;
232 case OP_CP2:
233 if (op.r.op == OP_CP2_BASIC) {
234 switch (op.r.rs) {
235 case OP_CP2_BASIC_MFC2:
236 case OP_CP2_BASIC_CFC2:
237 return true;
238 default:
239 break;
240 }
241 }
242
243 break;
244 case OP_LB:
245 case OP_LH:
246 case OP_LW:
247 case OP_LWL:
248 case OP_LWR:
249 case OP_LBU:
250 case OP_LHU:
251 return true;
252 default:
253 break;
254 }
255
256 return false;
257 }
258
lightrec_propagate_consts(union code c,u32 known,u32 * v)259 static u32 lightrec_propagate_consts(union code c, u32 known, u32 *v)
260 {
261 switch (c.i.op) {
262 case OP_SPECIAL:
263 switch (c.r.op) {
264 case OP_SPECIAL_SLL:
265 if (known & BIT(c.r.rt)) {
266 known |= BIT(c.r.rd);
267 v[c.r.rd] = v[c.r.rt] << c.r.imm;
268 } else {
269 known &= ~BIT(c.r.rd);
270 }
271 break;
272 case OP_SPECIAL_SRL:
273 if (known & BIT(c.r.rt)) {
274 known |= BIT(c.r.rd);
275 v[c.r.rd] = v[c.r.rt] >> c.r.imm;
276 } else {
277 known &= ~BIT(c.r.rd);
278 }
279 break;
280 case OP_SPECIAL_SRA:
281 if (known & BIT(c.r.rt)) {
282 known |= BIT(c.r.rd);
283 v[c.r.rd] = (s32)v[c.r.rt] >> c.r.imm;
284 } else {
285 known &= ~BIT(c.r.rd);
286 }
287 break;
288 case OP_SPECIAL_SLLV:
289 if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
290 known |= BIT(c.r.rd);
291 v[c.r.rd] = v[c.r.rt] << (v[c.r.rs] & 0x1f);
292 } else {
293 known &= ~BIT(c.r.rd);
294 }
295 break;
296 case OP_SPECIAL_SRLV:
297 if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
298 known |= BIT(c.r.rd);
299 v[c.r.rd] = v[c.r.rt] >> (v[c.r.rs] & 0x1f);
300 } else {
301 known &= ~BIT(c.r.rd);
302 }
303 break;
304 case OP_SPECIAL_SRAV:
305 if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
306 known |= BIT(c.r.rd);
307 v[c.r.rd] = (s32)v[c.r.rt]
308 >> (v[c.r.rs] & 0x1f);
309 } else {
310 known &= ~BIT(c.r.rd);
311 }
312 break;
313 case OP_SPECIAL_ADD:
314 case OP_SPECIAL_ADDU:
315 if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
316 known |= BIT(c.r.rd);
317 v[c.r.rd] = (s32)v[c.r.rt] + (s32)v[c.r.rs];
318 } else {
319 known &= ~BIT(c.r.rd);
320 }
321 break;
322 case OP_SPECIAL_SUB:
323 case OP_SPECIAL_SUBU:
324 if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
325 known |= BIT(c.r.rd);
326 v[c.r.rd] = v[c.r.rt] - v[c.r.rs];
327 } else {
328 known &= ~BIT(c.r.rd);
329 }
330 break;
331 case OP_SPECIAL_AND:
332 if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
333 known |= BIT(c.r.rd);
334 v[c.r.rd] = v[c.r.rt] & v[c.r.rs];
335 } else {
336 known &= ~BIT(c.r.rd);
337 }
338 break;
339 case OP_SPECIAL_OR:
340 if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
341 known |= BIT(c.r.rd);
342 v[c.r.rd] = v[c.r.rt] | v[c.r.rs];
343 } else {
344 known &= ~BIT(c.r.rd);
345 }
346 break;
347 case OP_SPECIAL_XOR:
348 if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
349 known |= BIT(c.r.rd);
350 v[c.r.rd] = v[c.r.rt] ^ v[c.r.rs];
351 } else {
352 known &= ~BIT(c.r.rd);
353 }
354 break;
355 case OP_SPECIAL_NOR:
356 if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
357 known |= BIT(c.r.rd);
358 v[c.r.rd] = ~(v[c.r.rt] | v[c.r.rs]);
359 } else {
360 known &= ~BIT(c.r.rd);
361 }
362 break;
363 case OP_SPECIAL_SLT:
364 if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
365 known |= BIT(c.r.rd);
366 v[c.r.rd] = (s32)v[c.r.rs] < (s32)v[c.r.rt];
367 } else {
368 known &= ~BIT(c.r.rd);
369 }
370 break;
371 case OP_SPECIAL_SLTU:
372 if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) {
373 known |= BIT(c.r.rd);
374 v[c.r.rd] = v[c.r.rs] < v[c.r.rt];
375 } else {
376 known &= ~BIT(c.r.rd);
377 }
378 break;
379 default:
380 break;
381 }
382 break;
383 case OP_REGIMM:
384 break;
385 case OP_ADDI:
386 case OP_ADDIU:
387 if (known & BIT(c.i.rs)) {
388 known |= BIT(c.i.rt);
389 v[c.i.rt] = v[c.i.rs] + (s32)(s16)c.i.imm;
390 } else {
391 known &= ~BIT(c.i.rt);
392 }
393 break;
394 case OP_SLTI:
395 if (known & BIT(c.i.rs)) {
396 known |= BIT(c.i.rt);
397 v[c.i.rt] = (s32)v[c.i.rs] < (s32)(s16)c.i.imm;
398 } else {
399 known &= ~BIT(c.i.rt);
400 }
401 break;
402 case OP_SLTIU:
403 if (known & BIT(c.i.rs)) {
404 known |= BIT(c.i.rt);
405 v[c.i.rt] = v[c.i.rs] < (u32)(s32)(s16)c.i.imm;
406 } else {
407 known &= ~BIT(c.i.rt);
408 }
409 break;
410 case OP_ANDI:
411 if (known & BIT(c.i.rs)) {
412 known |= BIT(c.i.rt);
413 v[c.i.rt] = v[c.i.rs] & c.i.imm;
414 } else {
415 known &= ~BIT(c.i.rt);
416 }
417 break;
418 case OP_ORI:
419 if (known & BIT(c.i.rs)) {
420 known |= BIT(c.i.rt);
421 v[c.i.rt] = v[c.i.rs] | c.i.imm;
422 } else {
423 known &= ~BIT(c.i.rt);
424 }
425 break;
426 case OP_XORI:
427 if (known & BIT(c.i.rs)) {
428 known |= BIT(c.i.rt);
429 v[c.i.rt] = v[c.i.rs] ^ c.i.imm;
430 } else {
431 known &= ~BIT(c.i.rt);
432 }
433 break;
434 case OP_LUI:
435 known |= BIT(c.i.rt);
436 v[c.i.rt] = c.i.imm << 16;
437 break;
438 case OP_CP0:
439 switch (c.r.rs) {
440 case OP_CP0_MFC0:
441 case OP_CP0_CFC0:
442 known &= ~BIT(c.r.rt);
443 break;
444 }
445 break;
446 case OP_CP2:
447 if (c.r.op == OP_CP2_BASIC) {
448 switch (c.r.rs) {
449 case OP_CP2_BASIC_MFC2:
450 case OP_CP2_BASIC_CFC2:
451 known &= ~BIT(c.r.rt);
452 break;
453 }
454 }
455 break;
456 case OP_LB:
457 case OP_LH:
458 case OP_LWL:
459 case OP_LW:
460 case OP_LBU:
461 case OP_LHU:
462 case OP_LWR:
463 case OP_LWC2:
464 known &= ~BIT(c.i.rt);
465 break;
466 case OP_META_MOV:
467 if (known & BIT(c.r.rs)) {
468 known |= BIT(c.r.rd);
469 v[c.r.rd] = v[c.r.rs];
470 } else {
471 known &= ~BIT(c.r.rd);
472 }
473 break;
474 default:
475 break;
476 }
477
478 return known;
479 }
480
lightrec_add_meta(struct block * block,struct opcode * op,union code code)481 static int lightrec_add_meta(struct block *block,
482 struct opcode *op, union code code)
483 {
484 struct opcode *meta;
485
486 meta = lightrec_malloc(block->state, MEM_FOR_IR, sizeof(*meta));
487 if (!meta)
488 return -ENOMEM;
489
490 meta->c = code;
491 meta->flags = 0;
492
493 if (op) {
494 meta->offset = op->offset;
495 meta->next = op->next;
496 op->next = meta;
497 } else {
498 meta->offset = 0;
499 meta->next = block->opcode_list;
500 block->opcode_list = meta;
501 }
502
503 return 0;
504 }
505
lightrec_add_sync(struct block * block,struct opcode * prev)506 static int lightrec_add_sync(struct block *block, struct opcode *prev)
507 {
508 return lightrec_add_meta(block, prev, (union code){
509 .j.op = OP_META_SYNC,
510 });
511 }
512
lightrec_transform_ops(struct block * block)513 static int lightrec_transform_ops(struct block *block)
514 {
515 struct opcode *list = block->opcode_list;
516
517 for (; list; list = list->next) {
518
519 /* Transform all opcodes detected as useless to real NOPs
520 * (0x0: SLL r0, r0, #0) */
521 if (list->opcode != 0 && is_nop(list->c)) {
522 pr_debug("Converting useless opcode 0x%08x to NOP\n",
523 list->opcode);
524 list->opcode = 0x0;
525 }
526
527 if (!list->opcode)
528 continue;
529
530 switch (list->i.op) {
531 /* Transform BEQ / BNE to BEQZ / BNEZ meta-opcodes if one of the
532 * two registers is zero. */
533 case OP_BEQ:
534 if ((list->i.rs == 0) ^ (list->i.rt == 0)) {
535 list->i.op = OP_META_BEQZ;
536 if (list->i.rs == 0) {
537 list->i.rs = list->i.rt;
538 list->i.rt = 0;
539 }
540 } else if (list->i.rs == list->i.rt) {
541 list->i.rs = 0;
542 list->i.rt = 0;
543 }
544 break;
545 case OP_BNE:
546 if (list->i.rs == 0) {
547 list->i.op = OP_META_BNEZ;
548 list->i.rs = list->i.rt;
549 list->i.rt = 0;
550 } else if (list->i.rt == 0) {
551 list->i.op = OP_META_BNEZ;
552 }
553 break;
554
555 /* Transform ORI/ADDI/ADDIU with imm #0 or ORR/ADD/ADDU/SUB/SUBU
556 * with register $zero to the MOV meta-opcode */
557 case OP_ORI:
558 case OP_ADDI:
559 case OP_ADDIU:
560 if (list->i.imm == 0) {
561 pr_debug("Convert ORI/ADDI/ADDIU #0 to MOV\n");
562 list->i.op = OP_META_MOV;
563 list->r.rd = list->i.rt;
564 }
565 break;
566 case OP_SPECIAL:
567 switch (list->r.op) {
568 case OP_SPECIAL_SLL:
569 case OP_SPECIAL_SRA:
570 case OP_SPECIAL_SRL:
571 if (list->r.imm == 0) {
572 pr_debug("Convert SLL/SRL/SRA #0 to MOV\n");
573 list->i.op = OP_META_MOV;
574 list->r.rs = list->r.rt;
575 }
576 break;
577 case OP_SPECIAL_OR:
578 case OP_SPECIAL_ADD:
579 case OP_SPECIAL_ADDU:
580 if (list->r.rs == 0) {
581 pr_debug("Convert OR/ADD $zero to MOV\n");
582 list->i.op = OP_META_MOV;
583 list->r.rs = list->r.rt;
584 }
585 case OP_SPECIAL_SUB: /* fall-through */
586 case OP_SPECIAL_SUBU:
587 if (list->r.rt == 0) {
588 pr_debug("Convert OR/ADD/SUB $zero to MOV\n");
589 list->i.op = OP_META_MOV;
590 }
591 default: /* fall-through */
592 break;
593 }
594 default: /* fall-through */
595 break;
596 }
597 }
598
599 return 0;
600 }
601
lightrec_switch_delay_slots(struct block * block)602 static int lightrec_switch_delay_slots(struct block *block)
603 {
604 struct opcode *list, *prev;
605 u8 flags;
606
607 for (list = block->opcode_list, prev = NULL; list->next;
608 prev = list, list = list->next) {
609 union code op = list->c;
610 union code next_op = list->next->c;
611
612 if (!has_delay_slot(op) ||
613 list->flags & (LIGHTREC_NO_DS | LIGHTREC_EMULATE_BRANCH) ||
614 op.opcode == 0)
615 continue;
616
617 if (prev && has_delay_slot(prev->c))
618 continue;
619
620 switch (list->i.op) {
621 case OP_SPECIAL:
622 switch (op.r.op) {
623 case OP_SPECIAL_JALR:
624 if (opcode_reads_register(next_op, op.r.rd) ||
625 opcode_writes_register(next_op, op.r.rd))
626 continue;
627 case OP_SPECIAL_JR: /* fall-through */
628 if (opcode_writes_register(next_op, op.r.rs))
629 continue;
630 default: /* fall-through */
631 break;
632 }
633 case OP_J: /* fall-through */
634 break;
635 case OP_JAL:
636 if (opcode_reads_register(next_op, 31) ||
637 opcode_writes_register(next_op, 31))
638 continue;
639 else
640 break;
641 case OP_BEQ:
642 case OP_BNE:
643 if (op.i.rt && opcode_writes_register(next_op, op.i.rt))
644 continue;
645 case OP_BLEZ: /* fall-through */
646 case OP_BGTZ:
647 case OP_META_BEQZ:
648 case OP_META_BNEZ:
649 if (op.i.rs && opcode_writes_register(next_op, op.i.rs))
650 continue;
651 break;
652 case OP_REGIMM:
653 switch (op.r.rt) {
654 case OP_REGIMM_BLTZAL:
655 case OP_REGIMM_BGEZAL:
656 if (opcode_reads_register(next_op, 31) ||
657 opcode_writes_register(next_op, 31))
658 continue;
659 case OP_REGIMM_BLTZ: /* fall-through */
660 case OP_REGIMM_BGEZ:
661 if (op.i.rs &&
662 opcode_writes_register(next_op, op.i.rs))
663 continue;
664 break;
665 }
666 default: /* fall-through */
667 break;
668 }
669
670 pr_debug("Swap branch and delay slot opcodes "
671 "at offsets 0x%x / 0x%x\n", list->offset << 2,
672 list->next->offset << 2);
673
674 flags = list->next->flags;
675 list->c = next_op;
676 list->next->c = op;
677 list->next->flags = list->flags | LIGHTREC_NO_DS;
678 list->flags = flags | LIGHTREC_NO_DS;
679 list->offset++;
680 list->next->offset--;
681 }
682
683 return 0;
684 }
685
lightrec_detect_impossible_branches(struct block * block)686 static int lightrec_detect_impossible_branches(struct block *block)
687 {
688 struct opcode *op, *next;
689
690 for (op = block->opcode_list, next = op->next; next;
691 op = next, next = op->next) {
692 if (!has_delay_slot(op->c) ||
693 (!load_in_delay_slot(next->c) &&
694 !has_delay_slot(next->c) &&
695 !(next->i.op == OP_CP0 && next->r.rs == OP_CP0_RFE)))
696 continue;
697
698 if (op->c.opcode == next->c.opcode) {
699 /* The delay slot is the exact same opcode as the branch
700 * opcode: this is effectively a NOP */
701 next->c.opcode = 0;
702 continue;
703 }
704
705 if (op == block->opcode_list) {
706 /* If the first opcode is an 'impossible' branch, we
707 * only keep the first two opcodes of the block (the
708 * branch itself + its delay slot) */
709 lightrec_free_opcode_list(block->state, next->next);
710 next->next = NULL;
711 block->nb_ops = 2;
712 }
713
714 op->flags |= LIGHTREC_EMULATE_BRANCH;
715 }
716
717 return 0;
718 }
719
lightrec_local_branches(struct block * block)720 static int lightrec_local_branches(struct block *block)
721 {
722 struct opcode *list, *target, *prev;
723 s32 offset;
724 int ret;
725
726 for (list = block->opcode_list; list; list = list->next) {
727 if (list->flags & LIGHTREC_EMULATE_BRANCH)
728 continue;
729
730 switch (list->i.op) {
731 case OP_BEQ:
732 case OP_BNE:
733 case OP_BLEZ:
734 case OP_BGTZ:
735 case OP_REGIMM:
736 case OP_META_BEQZ:
737 case OP_META_BNEZ:
738 offset = list->offset + 1 + (s16)list->i.imm;
739 if (offset >= 0 && offset < block->nb_ops)
740 break;
741 default: /* fall-through */
742 continue;
743 }
744
745 pr_debug("Found local branch to offset 0x%x\n", offset << 2);
746
747 for (target = block->opcode_list, prev = NULL;
748 target; prev = target, target = target->next) {
749 if (target->offset != offset ||
750 target->j.op == OP_META_SYNC)
751 continue;
752
753 if (target->flags & LIGHTREC_EMULATE_BRANCH) {
754 pr_debug("Branch target must be emulated"
755 " - skip\n");
756 break;
757 }
758
759 if (prev && has_delay_slot(prev->c)) {
760 pr_debug("Branch target is a delay slot"
761 " - skip\n");
762 break;
763 }
764
765 if (prev && prev->j.op != OP_META_SYNC) {
766 pr_debug("Adding sync before offset "
767 "0x%x\n", offset << 2);
768 ret = lightrec_add_sync(block, prev);
769 if (ret)
770 return ret;
771
772 prev->next->offset = target->offset;
773 }
774
775 list->flags |= LIGHTREC_LOCAL_BRANCH;
776 break;
777 }
778 }
779
780 return 0;
781 }
782
has_delay_slot(union code op)783 bool has_delay_slot(union code op)
784 {
785 switch (op.i.op) {
786 case OP_SPECIAL:
787 switch (op.r.op) {
788 case OP_SPECIAL_JR:
789 case OP_SPECIAL_JALR:
790 return true;
791 default:
792 return false;
793 }
794 case OP_J:
795 case OP_JAL:
796 case OP_BEQ:
797 case OP_BNE:
798 case OP_BLEZ:
799 case OP_BGTZ:
800 case OP_REGIMM:
801 case OP_META_BEQZ:
802 case OP_META_BNEZ:
803 return true;
804 default:
805 return false;
806 }
807 }
808
lightrec_add_unload(struct block * block,struct opcode * op,u8 reg)809 static int lightrec_add_unload(struct block *block, struct opcode *op, u8 reg)
810 {
811 return lightrec_add_meta(block, op, (union code){
812 .i.op = OP_META_REG_UNLOAD,
813 .i.rs = reg,
814 });
815 }
816
lightrec_early_unload(struct block * block)817 static int lightrec_early_unload(struct block *block)
818 {
819 struct opcode *list = block->opcode_list;
820 u8 i;
821
822 for (i = 1; i < 34; i++) {
823 struct opcode *op, *last_r = NULL, *last_w = NULL;
824 unsigned int last_r_id = 0, last_w_id = 0, id = 0;
825 int ret;
826
827 for (op = list; op->next; op = op->next, id++) {
828 if (opcode_reads_register(op->c, i)) {
829 last_r = op;
830 last_r_id = id;
831 }
832
833 if (opcode_writes_register(op->c, i)) {
834 last_w = op;
835 last_w_id = id;
836 }
837 }
838
839 if (last_w_id > last_r_id) {
840 if (has_delay_slot(last_w->c) &&
841 !(last_w->flags & LIGHTREC_NO_DS))
842 last_w = last_w->next;
843
844 if (last_w->next) {
845 ret = lightrec_add_unload(block, last_w, i);
846 if (ret)
847 return ret;
848 }
849 } else if (last_r) {
850 if (has_delay_slot(last_r->c) &&
851 !(last_r->flags & LIGHTREC_NO_DS))
852 last_r = last_r->next;
853
854 if (last_r->next) {
855 ret = lightrec_add_unload(block, last_r, i);
856 if (ret)
857 return ret;
858 }
859 }
860 }
861
862 return 0;
863 }
864
lightrec_flag_stores(struct block * block)865 static int lightrec_flag_stores(struct block *block)
866 {
867 struct opcode *list;
868 u32 known = BIT(0);
869 u32 values[32] = { 0 };
870
871 for (list = block->opcode_list; list; list = list->next) {
872 /* Register $zero is always, well, zero */
873 known |= BIT(0);
874 values[0] = 0;
875
876 switch (list->i.op) {
877 case OP_SB:
878 case OP_SH:
879 case OP_SW:
880 /* Mark all store operations that target $sp or $gp
881 * as not requiring code invalidation. This is based
882 * on the heuristic that stores using one of these
883 * registers as address will never hit a code page. */
884 if (list->i.rs >= 28 && list->i.rs <= 29 &&
885 !block->state->maps[PSX_MAP_KERNEL_USER_RAM].ops) {
886 pr_debug("Flaging opcode 0x%08x as not requiring invalidation\n",
887 list->opcode);
888 list->flags |= LIGHTREC_NO_INVALIDATE;
889 }
890
891 /* Detect writes whose destination address is inside the
892 * current block, using constant propagation. When these
893 * occur, we mark the blocks as not compilable. */
894 if ((known & BIT(list->i.rs)) &&
895 kunseg(values[list->i.rs]) >= kunseg(block->pc) &&
896 kunseg(values[list->i.rs]) < (kunseg(block->pc) +
897 block->nb_ops * 4)) {
898 pr_debug("Self-modifying block detected\n");
899 block->flags |= BLOCK_NEVER_COMPILE;
900 list->flags |= LIGHTREC_SMC;
901 }
902 default: /* fall-through */
903 break;
904 }
905
906 known = lightrec_propagate_consts(list->c, known, values);
907 }
908
909 return 0;
910 }
911
is_mult32(const struct block * block,const struct opcode * op)912 static bool is_mult32(const struct block *block, const struct opcode *op)
913 {
914 const struct opcode *next, *last = NULL;
915 u32 offset;
916
917 for (op = op->next; op != last; op = op->next) {
918 switch (op->i.op) {
919 case OP_BEQ:
920 case OP_BNE:
921 case OP_BLEZ:
922 case OP_BGTZ:
923 case OP_REGIMM:
924 case OP_META_BEQZ:
925 case OP_META_BNEZ:
926 /* TODO: handle backwards branches too */
927 if ((op->flags & LIGHTREC_LOCAL_BRANCH) &&
928 (s16)op->c.i.imm >= 0) {
929 offset = op->offset + 1 + (s16)op->c.i.imm;
930
931 for (next = op; next->offset != offset;
932 next = next->next);
933
934 if (!is_mult32(block, next))
935 return false;
936
937 last = next;
938 continue;
939 } else {
940 return false;
941 }
942 case OP_SPECIAL:
943 switch (op->r.op) {
944 case OP_SPECIAL_MULT:
945 case OP_SPECIAL_MULTU:
946 case OP_SPECIAL_DIV:
947 case OP_SPECIAL_DIVU:
948 case OP_SPECIAL_MTHI:
949 return true;
950 case OP_SPECIAL_JR:
951 return op->r.rs == 31 &&
952 ((op->flags & LIGHTREC_NO_DS) ||
953 !(op->next->i.op == OP_SPECIAL &&
954 op->next->r.op == OP_SPECIAL_MFHI));
955 case OP_SPECIAL_JALR:
956 case OP_SPECIAL_MFHI:
957 return false;
958 default:
959 continue;
960 }
961 default:
962 continue;
963 }
964 }
965
966 return last != NULL;
967 }
968
lightrec_flag_mults(struct block * block)969 static int lightrec_flag_mults(struct block *block)
970 {
971 struct opcode *list, *prev;
972
973 for (list = block->opcode_list, prev = NULL; list;
974 prev = list, list = list->next) {
975 if (list->i.op != OP_SPECIAL)
976 continue;
977
978 switch (list->r.op) {
979 case OP_SPECIAL_MULT:
980 case OP_SPECIAL_MULTU:
981 break;
982 default:
983 continue;
984 }
985
986 /* Don't support MULT(U) opcodes in delay slots */
987 if (prev && has_delay_slot(prev->c))
988 continue;
989
990 if (is_mult32(block, list)) {
991 pr_debug("Mark MULT(U) opcode at offset 0x%x as"
992 " 32-bit\n", list->offset << 2);
993 list->flags |= LIGHTREC_MULT32;
994 }
995 }
996
997 return 0;
998 }
999
1000 static int (*lightrec_optimizers[])(struct block *) = {
1001 &lightrec_detect_impossible_branches,
1002 &lightrec_transform_ops,
1003 &lightrec_local_branches,
1004 &lightrec_switch_delay_slots,
1005 &lightrec_flag_stores,
1006 &lightrec_flag_mults,
1007 &lightrec_early_unload,
1008 };
1009
lightrec_optimize(struct block * block)1010 int lightrec_optimize(struct block *block)
1011 {
1012 unsigned int i;
1013
1014 for (i = 0; i < ARRAY_SIZE(lightrec_optimizers); i++) {
1015 int ret = lightrec_optimizers[i](block);
1016
1017 if (ret)
1018 return ret;
1019 }
1020
1021 return 0;
1022 }
1023