1 /* Dependency checks for instruction scheduling, shared between ARM and
2 AARCH64.
3
4 Copyright (C) 1991-2016 Free Software Foundation, Inc.
5 Contributed by ARM Ltd.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "rtl-iter.h"
30
31 /* In ARMv8-A there's a general expectation that AESE/AESMC
32 and AESD/AESIMC sequences of the form:
33
34 AESE Vn, _
35 AESMC Vn, Vn
36
37 will issue both instructions in a single cycle on super-scalar
38 implementations. This function identifies such pairs. */
39
40 int
aarch_crypto_can_dual_issue(rtx_insn * producer_insn,rtx_insn * consumer_insn)41 aarch_crypto_can_dual_issue (rtx_insn *producer_insn, rtx_insn *consumer_insn)
42 {
43 rtx producer_set, consumer_set;
44 rtx producer_src, consumer_src;
45
46 producer_set = single_set (producer_insn);
47 consumer_set = single_set (consumer_insn);
48
49 producer_src = producer_set ? SET_SRC (producer_set) : NULL;
50 consumer_src = consumer_set ? SET_SRC (consumer_set) : NULL;
51
52 if (producer_src && consumer_src
53 && GET_CODE (producer_src) == UNSPEC && GET_CODE (consumer_src) == UNSPEC
54 && ((XINT (producer_src, 1) == UNSPEC_AESE
55 && XINT (consumer_src, 1) == UNSPEC_AESMC)
56 || (XINT (producer_src, 1) == UNSPEC_AESD
57 && XINT (consumer_src, 1) == UNSPEC_AESIMC)))
58 {
59 unsigned int regno = REGNO (SET_DEST (producer_set));
60
61 /* Before reload the registers are virtual, so the destination of
62 consumer_set doesn't need to match. */
63
64 return (REGNO (SET_DEST (consumer_set)) == regno || !reload_completed)
65 && REGNO (XVECEXP (consumer_src, 0, 0)) == regno;
66 }
67
68 return 0;
69 }
70
71 /* Return TRUE if X is either an arithmetic shift left, or
72 is a multiplication by a power of two. */
73 bool
arm_rtx_shift_left_p(rtx x)74 arm_rtx_shift_left_p (rtx x)
75 {
76 enum rtx_code code = GET_CODE (x);
77
78 if (code == MULT && CONST_INT_P (XEXP (x, 1))
79 && exact_log2 (INTVAL (XEXP (x, 1))) > 0)
80 return true;
81
82 if (code == ASHIFT)
83 return true;
84
85 return false;
86 }
87
88 static rtx_code shift_rtx_codes[] =
89 { ASHIFT, ROTATE, ASHIFTRT, LSHIFTRT,
90 ROTATERT, ZERO_EXTEND, SIGN_EXTEND };
91
92 /* Traverse PATTERN looking for a sub-rtx with RTX_CODE CODE.
93 If FIND_ANY_SHIFT then we are interested in anything which can
94 reasonably be described as a SHIFT RTX. */
95 static rtx
arm_find_sub_rtx_with_code(rtx pattern,rtx_code code,bool find_any_shift)96 arm_find_sub_rtx_with_code (rtx pattern, rtx_code code, bool find_any_shift)
97 {
98 subrtx_var_iterator::array_type array;
99 FOR_EACH_SUBRTX_VAR (iter, array, pattern, NONCONST)
100 {
101 rtx x = *iter;
102 if (find_any_shift)
103 {
104 /* Left shifts might have been canonicalized to a MULT of some
105 power of two. Make sure we catch them. */
106 if (arm_rtx_shift_left_p (x))
107 return x;
108 else
109 for (unsigned int i = 0; i < ARRAY_SIZE (shift_rtx_codes); i++)
110 if (GET_CODE (x) == shift_rtx_codes[i])
111 return x;
112 }
113
114 if (GET_CODE (x) == code)
115 return x;
116 }
117 return NULL_RTX;
118 }
119
120 /* Traverse PATTERN looking for any sub-rtx which looks like a shift. */
121 static rtx
arm_find_shift_sub_rtx(rtx pattern)122 arm_find_shift_sub_rtx (rtx pattern)
123 {
124 return arm_find_sub_rtx_with_code (pattern, ASHIFT, true);
125 }
126
127 /* PRODUCER and CONSUMER are two potentially dependant RTX. PRODUCER
128 (possibly) contains a SET which will provide a result we can access
129 using the SET_DEST macro. We will place the RTX which would be
130 written by PRODUCER in SET_SOURCE.
131 Similarly, CONSUMER (possibly) contains a SET which has an operand
132 we can access using SET_SRC. We place this operand in
133 SET_DESTINATION.
134
135 Return nonzero if we found the SET RTX we expected. */
136 static int
arm_get_set_operands(rtx producer,rtx consumer,rtx * set_source,rtx * set_destination)137 arm_get_set_operands (rtx producer, rtx consumer,
138 rtx *set_source, rtx *set_destination)
139 {
140 rtx set_producer = arm_find_sub_rtx_with_code (PATTERN (producer),
141 SET, false);
142 rtx set_consumer = arm_find_sub_rtx_with_code (PATTERN (consumer),
143 SET, false);
144
145 if (set_producer && set_consumer)
146 {
147 *set_source = SET_DEST (set_producer);
148 *set_destination = SET_SRC (set_consumer);
149 return 1;
150 }
151 return 0;
152 }
153
154 bool
aarch_rev16_shright_mask_imm_p(rtx val,machine_mode mode)155 aarch_rev16_shright_mask_imm_p (rtx val, machine_mode mode)
156 {
157 return CONST_INT_P (val)
158 && INTVAL (val)
159 == trunc_int_for_mode (HOST_WIDE_INT_C (0xff00ff00ff00ff),
160 mode);
161 }
162
163 bool
aarch_rev16_shleft_mask_imm_p(rtx val,machine_mode mode)164 aarch_rev16_shleft_mask_imm_p (rtx val, machine_mode mode)
165 {
166 return CONST_INT_P (val)
167 && INTVAL (val)
168 == trunc_int_for_mode (HOST_WIDE_INT_C (0xff00ff00ff00ff00),
169 mode);
170 }
171
172
173 static bool
aarch_rev16_p_1(rtx lhs,rtx rhs,machine_mode mode)174 aarch_rev16_p_1 (rtx lhs, rtx rhs, machine_mode mode)
175 {
176 if (GET_CODE (lhs) == AND
177 && GET_CODE (XEXP (lhs, 0)) == ASHIFT
178 && CONST_INT_P (XEXP (XEXP (lhs, 0), 1))
179 && INTVAL (XEXP (XEXP (lhs, 0), 1)) == 8
180 && REG_P (XEXP (XEXP (lhs, 0), 0))
181 && CONST_INT_P (XEXP (lhs, 1))
182 && GET_CODE (rhs) == AND
183 && GET_CODE (XEXP (rhs, 0)) == LSHIFTRT
184 && REG_P (XEXP (XEXP (rhs, 0), 0))
185 && CONST_INT_P (XEXP (XEXP (rhs, 0), 1))
186 && INTVAL (XEXP (XEXP (rhs, 0), 1)) == 8
187 && CONST_INT_P (XEXP (rhs, 1))
188 && REGNO (XEXP (XEXP (rhs, 0), 0)) == REGNO (XEXP (XEXP (lhs, 0), 0)))
189
190 {
191 rtx lhs_mask = XEXP (lhs, 1);
192 rtx rhs_mask = XEXP (rhs, 1);
193
194 return aarch_rev16_shright_mask_imm_p (rhs_mask, mode)
195 && aarch_rev16_shleft_mask_imm_p (lhs_mask, mode);
196 }
197
198 return false;
199 }
200
201 /* Recognise a sequence of bitwise operations corresponding to a rev16 operation.
202 These will be of the form:
203 ((x >> 8) & 0x00ff00ff)
204 | ((x << 8) & 0xff00ff00)
205 for SImode and with similar but wider bitmasks for DImode.
206 The two sub-expressions of the IOR can appear on either side so check both
207 permutations with the help of aarch_rev16_p_1 above. */
208
209 bool
aarch_rev16_p(rtx x)210 aarch_rev16_p (rtx x)
211 {
212 rtx left_sub_rtx, right_sub_rtx;
213 bool is_rev = false;
214
215 if (GET_CODE (x) != IOR)
216 return false;
217
218 left_sub_rtx = XEXP (x, 0);
219 right_sub_rtx = XEXP (x, 1);
220
221 /* There are no canonicalisation rules for the position of the two shifts
222 involved in a rev, so try both permutations. */
223 is_rev = aarch_rev16_p_1 (left_sub_rtx, right_sub_rtx, GET_MODE (x));
224
225 if (!is_rev)
226 is_rev = aarch_rev16_p_1 (right_sub_rtx, left_sub_rtx, GET_MODE (x));
227
228 return is_rev;
229 }
230
231 /* Return nonzero if the CONSUMER instruction (a load) does need
232 PRODUCER's value to calculate the address. */
233 int
arm_early_load_addr_dep(rtx producer,rtx consumer)234 arm_early_load_addr_dep (rtx producer, rtx consumer)
235 {
236 rtx value, addr;
237
238 if (!arm_get_set_operands (producer, consumer, &value, &addr))
239 return 0;
240
241 return reg_overlap_mentioned_p (value, addr);
242 }
243
244 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
245 have an early register shift value or amount dependency on the
246 result of PRODUCER. */
247 int
arm_no_early_alu_shift_dep(rtx producer,rtx consumer)248 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
249 {
250 rtx value, op;
251 rtx early_op;
252
253 if (!arm_get_set_operands (producer, consumer, &value, &op))
254 return 0;
255
256 if ((early_op = arm_find_shift_sub_rtx (op)))
257 {
258 if (REG_P (early_op))
259 early_op = op;
260
261 return !reg_overlap_mentioned_p (value, early_op);
262 }
263
264 return 0;
265 }
266
267 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
268 have an early register shift value dependency on the result of
269 PRODUCER. */
270 int
arm_no_early_alu_shift_value_dep(rtx producer,rtx consumer)271 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
272 {
273 rtx value, op;
274 rtx early_op;
275
276 if (!arm_get_set_operands (producer, consumer, &value, &op))
277 return 0;
278
279 if ((early_op = arm_find_shift_sub_rtx (op)))
280 /* We want to check the value being shifted. */
281 if (!reg_overlap_mentioned_p (value, XEXP (early_op, 0)))
282 return 1;
283
284 return 0;
285 }
286
287 /* Return nonzero if the CONSUMER (a mul or mac op) does not
288 have an early register mult dependency on the result of
289 PRODUCER. */
290 int
arm_no_early_mul_dep(rtx producer,rtx consumer)291 arm_no_early_mul_dep (rtx producer, rtx consumer)
292 {
293 rtx value, op;
294
295 if (!arm_get_set_operands (producer, consumer, &value, &op))
296 return 0;
297
298 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
299 {
300 if (GET_CODE (XEXP (op, 0)) == MULT)
301 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
302 else
303 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
304 }
305
306 return 0;
307 }
308
309 /* Return nonzero if the CONSUMER instruction (a store) does not need
310 PRODUCER's value to calculate the address. */
311
312 int
arm_no_early_store_addr_dep(rtx producer,rtx consumer)313 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
314 {
315 rtx value = arm_find_sub_rtx_with_code (PATTERN (producer), SET, false);
316 rtx addr = arm_find_sub_rtx_with_code (PATTERN (consumer), SET, false);
317
318 if (value)
319 value = SET_DEST (value);
320
321 if (addr)
322 addr = SET_DEST (addr);
323
324 if (!value || !addr)
325 return 0;
326
327 return !reg_overlap_mentioned_p (value, addr);
328 }
329
330 /* Return nonzero if the CONSUMER instruction (a store) does need
331 PRODUCER's value to calculate the address. */
332
333 int
arm_early_store_addr_dep(rtx producer,rtx consumer)334 arm_early_store_addr_dep (rtx producer, rtx consumer)
335 {
336 return !arm_no_early_store_addr_dep (producer, consumer);
337 }
338
339 /* Return non-zero iff the consumer (a multiply-accumulate or a
340 multiple-subtract instruction) has an accumulator dependency on the
341 result of the producer and no other dependency on that result. It
342 does not check if the producer is multiply-accumulate instruction. */
343 int
arm_mac_accumulator_is_result(rtx producer,rtx consumer)344 arm_mac_accumulator_is_result (rtx producer, rtx consumer)
345 {
346 rtx result;
347 rtx op0, op1, acc;
348
349 producer = PATTERN (producer);
350 consumer = PATTERN (consumer);
351
352 if (GET_CODE (producer) == COND_EXEC)
353 producer = COND_EXEC_CODE (producer);
354 if (GET_CODE (consumer) == COND_EXEC)
355 consumer = COND_EXEC_CODE (consumer);
356
357 if (GET_CODE (producer) != SET)
358 return 0;
359
360 result = XEXP (producer, 0);
361
362 if (GET_CODE (consumer) != SET)
363 return 0;
364
365 /* Check that the consumer is of the form
366 (set (...) (plus (mult ...) (...)))
367 or
368 (set (...) (minus (...) (mult ...))). */
369 if (GET_CODE (XEXP (consumer, 1)) == PLUS)
370 {
371 if (GET_CODE (XEXP (XEXP (consumer, 1), 0)) != MULT)
372 return 0;
373
374 op0 = XEXP (XEXP (XEXP (consumer, 1), 0), 0);
375 op1 = XEXP (XEXP (XEXP (consumer, 1), 0), 1);
376 acc = XEXP (XEXP (consumer, 1), 1);
377 }
378 else if (GET_CODE (XEXP (consumer, 1)) == MINUS)
379 {
380 if (GET_CODE (XEXP (XEXP (consumer, 1), 1)) != MULT)
381 return 0;
382
383 op0 = XEXP (XEXP (XEXP (consumer, 1), 1), 0);
384 op1 = XEXP (XEXP (XEXP (consumer, 1), 1), 1);
385 acc = XEXP (XEXP (consumer, 1), 0);
386 }
387 else
388 return 0;
389
390 return (reg_overlap_mentioned_p (result, acc)
391 && !reg_overlap_mentioned_p (result, op0)
392 && !reg_overlap_mentioned_p (result, op1));
393 }
394
395 /* Return non-zero if the destination of PRODUCER feeds the accumulator
396 operand of an MLA-like operation. */
397
398 int
aarch_accumulator_forwarding(rtx_insn * producer,rtx_insn * consumer)399 aarch_accumulator_forwarding (rtx_insn *producer, rtx_insn *consumer)
400 {
401 rtx producer_set = single_set (producer);
402 rtx consumer_set = single_set (consumer);
403
404 /* We are looking for a SET feeding a SET. */
405 if (!producer_set || !consumer_set)
406 return 0;
407
408 rtx dest = SET_DEST (producer_set);
409 rtx mla = SET_SRC (consumer_set);
410
411 /* We're looking for a register SET. */
412 if (!REG_P (dest))
413 return 0;
414
415 rtx accumulator;
416
417 /* Strip a zero_extend. */
418 if (GET_CODE (mla) == ZERO_EXTEND)
419 mla = XEXP (mla, 0);
420
421 switch (GET_CODE (mla))
422 {
423 case PLUS:
424 /* Possibly an MADD. */
425 if (GET_CODE (XEXP (mla, 0)) == MULT)
426 accumulator = XEXP (mla, 1);
427 else
428 return 0;
429 break;
430 case MINUS:
431 /* Possibly an MSUB. */
432 if (GET_CODE (XEXP (mla, 1)) == MULT)
433 accumulator = XEXP (mla, 0);
434 else
435 return 0;
436 break;
437 case FMA:
438 {
439 /* Possibly an FMADD/FMSUB/FNMADD/FNMSUB. */
440 if (REG_P (XEXP (mla, 1))
441 && REG_P (XEXP (mla, 2))
442 && (REG_P (XEXP (mla, 0))
443 || GET_CODE (XEXP (mla, 0)) == NEG))
444
445 {
446 /* FMADD/FMSUB. */
447 accumulator = XEXP (mla, 2);
448 }
449 else if (REG_P (XEXP (mla, 1))
450 && GET_CODE (XEXP (mla, 2)) == NEG
451 && (REG_P (XEXP (mla, 0))
452 || GET_CODE (XEXP (mla, 0)) == NEG))
453 {
454 /* FNMADD/FNMSUB. */
455 accumulator = XEXP (XEXP (mla, 2), 0);
456 }
457 else
458 return 0;
459 break;
460 }
461 default:
462 /* Not an MLA-like operation. */
463 return 0;
464 }
465
466 if (GET_CODE (accumulator) == SUBREG)
467 accumulator = SUBREG_REG (accumulator);
468
469 if (!REG_P (accumulator))
470 return 0;
471
472 return (REGNO (dest) == REGNO (accumulator));
473 }
474
475 /* Return nonzero if the CONSUMER instruction is some sort of
476 arithmetic or logic + shift operation, and the register we are
477 writing in PRODUCER is not used in a register shift by register
478 operation. */
479
480 int
aarch_forward_to_shift_is_not_shifted_reg(rtx_insn * producer,rtx_insn * consumer)481 aarch_forward_to_shift_is_not_shifted_reg (rtx_insn *producer,
482 rtx_insn *consumer)
483 {
484 rtx value, op;
485 rtx early_op;
486
487 if (!arm_get_set_operands (producer, consumer, &value, &op))
488 return 0;
489
490 if ((early_op = arm_find_shift_sub_rtx (op)))
491 {
492 if (REG_P (early_op))
493 early_op = op;
494
495 /* Any other canonicalisation of a shift is a shift-by-constant
496 so we don't care. */
497 if (GET_CODE (early_op) == ASHIFT)
498 return (!REG_P (XEXP (early_op, 0))
499 || !REG_P (XEXP (early_op, 1)));
500 else
501 return 1;
502 }
503
504 return 0;
505 }
506
507 /* Return non-zero if the consumer (a multiply-accumulate instruction)
508 has an accumulator dependency on the result of the producer (a
509 multiplication instruction) and no other dependency on that result. */
510 int
arm_mac_accumulator_is_mul_result(rtx producer,rtx consumer)511 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
512 {
513 rtx mul = PATTERN (producer);
514 rtx mac = PATTERN (consumer);
515 rtx mul_result;
516 rtx mac_op0, mac_op1, mac_acc;
517
518 if (GET_CODE (mul) == COND_EXEC)
519 mul = COND_EXEC_CODE (mul);
520 if (GET_CODE (mac) == COND_EXEC)
521 mac = COND_EXEC_CODE (mac);
522
523 /* Check that mul is of the form (set (...) (mult ...))
524 and mla is of the form (set (...) (plus (mult ...) (...))). */
525 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
526 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
527 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
528 return 0;
529
530 mul_result = XEXP (mul, 0);
531 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
532 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
533 mac_acc = XEXP (XEXP (mac, 1), 1);
534
535 return (reg_overlap_mentioned_p (mul_result, mac_acc)
536 && !reg_overlap_mentioned_p (mul_result, mac_op0)
537 && !reg_overlap_mentioned_p (mul_result, mac_op1));
538 }
539