xref: /qemu/tcg/optimize.c (revision 753d11f2)
1 /*
2  * Optimizations for Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2010 Samsung Electronics.
5  * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "config.h"
27 
28 #include <stdlib.h>
29 #include <stdio.h>
30 
31 #include "qemu-common.h"
32 #include "tcg-op.h"
33 
34 #define CASE_OP_32_64(x)                        \
35         glue(glue(case INDEX_op_, x), _i32):    \
36         glue(glue(case INDEX_op_, x), _i64)
37 
38 typedef enum {
39     TCG_TEMP_UNDEF = 0,
40     TCG_TEMP_CONST,
41     TCG_TEMP_COPY,
42     TCG_TEMP_HAS_COPY,
43     TCG_TEMP_ANY
44 } tcg_temp_state;
45 
46 struct tcg_temp_info {
47     tcg_temp_state state;
48     uint16_t prev_copy;
49     uint16_t next_copy;
50     tcg_target_ulong val;
51 };
52 
53 static struct tcg_temp_info temps[TCG_MAX_TEMPS];
54 
55 /* Reset TEMP's state to TCG_TEMP_ANY.  If TEMP was a representative of some
56    class of equivalent temp's, a new representative should be chosen in this
57    class. */
58 static void reset_temp(TCGArg temp, int nb_temps, int nb_globals)
59 {
60     int i;
61     TCGArg new_base = (TCGArg)-1;
62     if (temps[temp].state == TCG_TEMP_HAS_COPY) {
63         for (i = temps[temp].next_copy; i != temp; i = temps[i].next_copy) {
64             if (i >= nb_globals) {
65                 temps[i].state = TCG_TEMP_HAS_COPY;
66                 new_base = i;
67                 break;
68             }
69         }
70         for (i = temps[temp].next_copy; i != temp; i = temps[i].next_copy) {
71             if (new_base == (TCGArg)-1) {
72                 temps[i].state = TCG_TEMP_ANY;
73             } else {
74                 temps[i].val = new_base;
75             }
76         }
77         temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
78         temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
79     } else if (temps[temp].state == TCG_TEMP_COPY) {
80         temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
81         temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
82         new_base = temps[temp].val;
83     }
84     temps[temp].state = TCG_TEMP_ANY;
85     if (new_base != (TCGArg)-1 && temps[new_base].next_copy == new_base) {
86         temps[new_base].state = TCG_TEMP_ANY;
87     }
88 }
89 
90 static int op_bits(TCGOpcode op)
91 {
92     const TCGOpDef *def = &tcg_op_defs[op];
93     return def->flags & TCG_OPF_64BIT ? 64 : 32;
94 }
95 
96 static TCGOpcode op_to_movi(TCGOpcode op)
97 {
98     switch (op_bits(op)) {
99     case 32:
100         return INDEX_op_movi_i32;
101     case 64:
102         return INDEX_op_movi_i64;
103     default:
104         fprintf(stderr, "op_to_movi: unexpected return value of "
105                 "function op_bits.\n");
106         tcg_abort();
107     }
108 }
109 
110 static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args, TCGArg dst,
111                             TCGArg src, int nb_temps, int nb_globals)
112 {
113         reset_temp(dst, nb_temps, nb_globals);
114         assert(temps[src].state != TCG_TEMP_COPY);
115         /* Don't try to copy if one of temps is a global or either one
116            is local and another is register */
117         if (src >= nb_globals && dst >= nb_globals &&
118             tcg_arg_is_local(s, src) == tcg_arg_is_local(s, dst)) {
119             assert(temps[src].state != TCG_TEMP_CONST);
120             if (temps[src].state != TCG_TEMP_HAS_COPY) {
121                 temps[src].state = TCG_TEMP_HAS_COPY;
122                 temps[src].next_copy = src;
123                 temps[src].prev_copy = src;
124             }
125             temps[dst].state = TCG_TEMP_COPY;
126             temps[dst].val = src;
127             temps[dst].next_copy = temps[src].next_copy;
128             temps[dst].prev_copy = src;
129             temps[temps[dst].next_copy].prev_copy = dst;
130             temps[src].next_copy = dst;
131         }
132         gen_args[0] = dst;
133         gen_args[1] = src;
134 }
135 
136 static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val,
137                              int nb_temps, int nb_globals)
138 {
139         reset_temp(dst, nb_temps, nb_globals);
140         temps[dst].state = TCG_TEMP_CONST;
141         temps[dst].val = val;
142         gen_args[0] = dst;
143         gen_args[1] = val;
144 }
145 
146 static TCGOpcode op_to_mov(TCGOpcode op)
147 {
148     switch (op_bits(op)) {
149     case 32:
150         return INDEX_op_mov_i32;
151     case 64:
152         return INDEX_op_mov_i64;
153     default:
154         fprintf(stderr, "op_to_mov: unexpected return value of "
155                 "function op_bits.\n");
156         tcg_abort();
157     }
158 }
159 
160 static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
161 {
162     switch (op) {
163     CASE_OP_32_64(add):
164         return x + y;
165 
166     CASE_OP_32_64(sub):
167         return x - y;
168 
169     CASE_OP_32_64(mul):
170         return x * y;
171 
172     CASE_OP_32_64(and):
173         return x & y;
174 
175     CASE_OP_32_64(or):
176         return x | y;
177 
178     CASE_OP_32_64(xor):
179         return x ^ y;
180 
181     case INDEX_op_shl_i32:
182         return (uint32_t)x << (uint32_t)y;
183 
184     case INDEX_op_shl_i64:
185         return (uint64_t)x << (uint64_t)y;
186 
187     case INDEX_op_shr_i32:
188         return (uint32_t)x >> (uint32_t)y;
189 
190     case INDEX_op_shr_i64:
191         return (uint64_t)x >> (uint64_t)y;
192 
193     case INDEX_op_sar_i32:
194         return (int32_t)x >> (int32_t)y;
195 
196     case INDEX_op_sar_i64:
197         return (int64_t)x >> (int64_t)y;
198 
199     case INDEX_op_rotr_i32:
200         x = ((uint32_t)x << (32 - y)) | ((uint32_t)x >> y);
201         return x;
202 
203     case INDEX_op_rotr_i64:
204         x = ((uint64_t)x << (64 - y)) | ((uint64_t)x >> y);
205         return x;
206 
207     case INDEX_op_rotl_i32:
208         x = ((uint32_t)x << y) | ((uint32_t)x >> (32 - y));
209         return x;
210 
211     case INDEX_op_rotl_i64:
212         x = ((uint64_t)x << y) | ((uint64_t)x >> (64 - y));
213         return x;
214 
215     CASE_OP_32_64(not):
216         return ~x;
217 
218     CASE_OP_32_64(neg):
219         return -x;
220 
221     CASE_OP_32_64(andc):
222         return x & ~y;
223 
224     CASE_OP_32_64(orc):
225         return x | ~y;
226 
227     CASE_OP_32_64(eqv):
228         return ~(x ^ y);
229 
230     CASE_OP_32_64(nand):
231         return ~(x & y);
232 
233     CASE_OP_32_64(nor):
234         return ~(x | y);
235 
236     CASE_OP_32_64(ext8s):
237         return (int8_t)x;
238 
239     CASE_OP_32_64(ext16s):
240         return (int16_t)x;
241 
242     CASE_OP_32_64(ext8u):
243         return (uint8_t)x;
244 
245     CASE_OP_32_64(ext16u):
246         return (uint16_t)x;
247 
248     case INDEX_op_ext32s_i64:
249         return (int32_t)x;
250 
251     case INDEX_op_ext32u_i64:
252         return (uint32_t)x;
253 
254     default:
255         fprintf(stderr,
256                 "Unrecognized operation %d in do_constant_folding.\n", op);
257         tcg_abort();
258     }
259 }
260 
261 static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
262 {
263     TCGArg res = do_constant_folding_2(op, x, y);
264     if (op_bits(op) == 32) {
265         res &= 0xffffffff;
266     }
267     return res;
268 }
269 
270 /* Propagate constants and copies, fold constant expressions. */
271 static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
272                                     TCGArg *args, TCGOpDef *tcg_op_defs)
273 {
274     int i, nb_ops, op_index, nb_temps, nb_globals, nb_call_args;
275     TCGOpcode op;
276     const TCGOpDef *def;
277     TCGArg *gen_args;
278     TCGArg tmp;
279     /* Array VALS has an element for each temp.
280        If this temp holds a constant then its value is kept in VALS' element.
281        If this temp is a copy of other ones then this equivalence class'
282        representative is kept in VALS' element.
283        If this temp is neither copy nor constant then corresponding VALS'
284        element is unused. */
285 
286     nb_temps = s->nb_temps;
287     nb_globals = s->nb_globals;
288     memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
289 
290     nb_ops = tcg_opc_ptr - gen_opc_buf;
291     gen_args = args;
292     for (op_index = 0; op_index < nb_ops; op_index++) {
293         op = gen_opc_buf[op_index];
294         def = &tcg_op_defs[op];
295         /* Do copy propagation */
296         if (!(def->flags & (TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS))) {
297             assert(op != INDEX_op_call);
298             for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) {
299                 if (temps[args[i]].state == TCG_TEMP_COPY) {
300                     args[i] = temps[args[i]].val;
301                 }
302             }
303         }
304 
305         /* For commutative operations make constant second argument */
306         switch (op) {
307         CASE_OP_32_64(add):
308         CASE_OP_32_64(mul):
309         CASE_OP_32_64(and):
310         CASE_OP_32_64(or):
311         CASE_OP_32_64(xor):
312         CASE_OP_32_64(eqv):
313         CASE_OP_32_64(nand):
314         CASE_OP_32_64(nor):
315             if (temps[args[1]].state == TCG_TEMP_CONST) {
316                 tmp = args[1];
317                 args[1] = args[2];
318                 args[2] = tmp;
319             }
320             break;
321         default:
322             break;
323         }
324 
325         /* Simplify expression if possible. */
326         switch (op) {
327         CASE_OP_32_64(add):
328         CASE_OP_32_64(sub):
329         CASE_OP_32_64(shl):
330         CASE_OP_32_64(shr):
331         CASE_OP_32_64(sar):
332         CASE_OP_32_64(rotl):
333         CASE_OP_32_64(rotr):
334             if (temps[args[1]].state == TCG_TEMP_CONST) {
335                 /* Proceed with possible constant folding. */
336                 break;
337             }
338             if (temps[args[2]].state == TCG_TEMP_CONST
339                 && temps[args[2]].val == 0) {
340                 if ((temps[args[0]].state == TCG_TEMP_COPY
341                     && temps[args[0]].val == args[1])
342                     || args[0] == args[1]) {
343                     args += 3;
344                     gen_opc_buf[op_index] = INDEX_op_nop;
345                 } else {
346                     gen_opc_buf[op_index] = op_to_mov(op);
347                     tcg_opt_gen_mov(s, gen_args, args[0], args[1],
348                                     nb_temps, nb_globals);
349                     gen_args += 2;
350                     args += 3;
351                 }
352                 continue;
353             }
354             break;
355         CASE_OP_32_64(mul):
356             if ((temps[args[2]].state == TCG_TEMP_CONST
357                 && temps[args[2]].val == 0)) {
358                 gen_opc_buf[op_index] = op_to_movi(op);
359                 tcg_opt_gen_movi(gen_args, args[0], 0, nb_temps, nb_globals);
360                 args += 3;
361                 gen_args += 2;
362                 continue;
363             }
364             break;
365         CASE_OP_32_64(or):
366         CASE_OP_32_64(and):
367             if (args[1] == args[2]) {
368                 if (args[1] == args[0]) {
369                     args += 3;
370                     gen_opc_buf[op_index] = INDEX_op_nop;
371                 } else {
372                     gen_opc_buf[op_index] = op_to_mov(op);
373                     tcg_opt_gen_mov(s, gen_args, args[0], args[1], nb_temps,
374                                     nb_globals);
375                     gen_args += 2;
376                     args += 3;
377                 }
378                 continue;
379             }
380             break;
381         default:
382             break;
383         }
384 
385         /* Propagate constants through copy operations and do constant
386            folding.  Constants will be substituted to arguments by register
387            allocator where needed and possible.  Also detect copies. */
388         switch (op) {
389         CASE_OP_32_64(mov):
390             if ((temps[args[1]].state == TCG_TEMP_COPY
391                 && temps[args[1]].val == args[0])
392                 || args[0] == args[1]) {
393                 args += 2;
394                 gen_opc_buf[op_index] = INDEX_op_nop;
395                 break;
396             }
397             if (temps[args[1]].state != TCG_TEMP_CONST) {
398                 tcg_opt_gen_mov(s, gen_args, args[0], args[1],
399                                 nb_temps, nb_globals);
400                 gen_args += 2;
401                 args += 2;
402                 break;
403             }
404             /* Source argument is constant.  Rewrite the operation and
405                let movi case handle it. */
406             op = op_to_movi(op);
407             gen_opc_buf[op_index] = op;
408             args[1] = temps[args[1]].val;
409             /* fallthrough */
410         CASE_OP_32_64(movi):
411             tcg_opt_gen_movi(gen_args, args[0], args[1], nb_temps, nb_globals);
412             gen_args += 2;
413             args += 2;
414             break;
415         CASE_OP_32_64(not):
416         CASE_OP_32_64(neg):
417         CASE_OP_32_64(ext8s):
418         CASE_OP_32_64(ext8u):
419         CASE_OP_32_64(ext16s):
420         CASE_OP_32_64(ext16u):
421         case INDEX_op_ext32s_i64:
422         case INDEX_op_ext32u_i64:
423             if (temps[args[1]].state == TCG_TEMP_CONST) {
424                 gen_opc_buf[op_index] = op_to_movi(op);
425                 tmp = do_constant_folding(op, temps[args[1]].val, 0);
426                 tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
427                 gen_args += 2;
428                 args += 2;
429                 break;
430             } else {
431                 reset_temp(args[0], nb_temps, nb_globals);
432                 gen_args[0] = args[0];
433                 gen_args[1] = args[1];
434                 gen_args += 2;
435                 args += 2;
436                 break;
437             }
438         CASE_OP_32_64(add):
439         CASE_OP_32_64(sub):
440         CASE_OP_32_64(mul):
441         CASE_OP_32_64(or):
442         CASE_OP_32_64(and):
443         CASE_OP_32_64(xor):
444         CASE_OP_32_64(shl):
445         CASE_OP_32_64(shr):
446         CASE_OP_32_64(sar):
447         CASE_OP_32_64(rotl):
448         CASE_OP_32_64(rotr):
449         CASE_OP_32_64(andc):
450         CASE_OP_32_64(orc):
451         CASE_OP_32_64(eqv):
452         CASE_OP_32_64(nand):
453         CASE_OP_32_64(nor):
454             if (temps[args[1]].state == TCG_TEMP_CONST
455                 && temps[args[2]].state == TCG_TEMP_CONST) {
456                 gen_opc_buf[op_index] = op_to_movi(op);
457                 tmp = do_constant_folding(op, temps[args[1]].val,
458                                           temps[args[2]].val);
459                 tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
460                 gen_args += 2;
461                 args += 3;
462                 break;
463             } else {
464                 reset_temp(args[0], nb_temps, nb_globals);
465                 gen_args[0] = args[0];
466                 gen_args[1] = args[1];
467                 gen_args[2] = args[2];
468                 gen_args += 3;
469                 args += 3;
470                 break;
471             }
472         case INDEX_op_call:
473             nb_call_args = (args[0] >> 16) + (args[0] & 0xffff);
474             if (!(args[nb_call_args + 1] & (TCG_CALL_CONST | TCG_CALL_PURE))) {
475                 for (i = 0; i < nb_globals; i++) {
476                     reset_temp(i, nb_temps, nb_globals);
477                 }
478             }
479             for (i = 0; i < (args[0] >> 16); i++) {
480                 reset_temp(args[i + 1], nb_temps, nb_globals);
481             }
482             i = nb_call_args + 3;
483             while (i) {
484                 *gen_args = *args;
485                 args++;
486                 gen_args++;
487                 i--;
488             }
489             break;
490         case INDEX_op_set_label:
491         case INDEX_op_jmp:
492         case INDEX_op_br:
493         CASE_OP_32_64(brcond):
494             memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
495             for (i = 0; i < def->nb_args; i++) {
496                 *gen_args = *args;
497                 args++;
498                 gen_args++;
499             }
500             break;
501         default:
502             /* Default case: we do know nothing about operation so no
503                propagation is done.  We only trash output args.  */
504             for (i = 0; i < def->nb_oargs; i++) {
505                 reset_temp(args[i], nb_temps, nb_globals);
506             }
507             for (i = 0; i < def->nb_args; i++) {
508                 gen_args[i] = args[i];
509             }
510             args += def->nb_args;
511             gen_args += def->nb_args;
512             break;
513         }
514     }
515 
516     return gen_args;
517 }
518 
519 TCGArg *tcg_optimize(TCGContext *s, uint16_t *tcg_opc_ptr,
520         TCGArg *args, TCGOpDef *tcg_op_defs)
521 {
522     TCGArg *res;
523     res = tcg_constant_folding(s, tcg_opc_ptr, args, tcg_op_defs);
524     return res;
525 }
526