1 /* Dependency checks for instruction scheduling, shared between ARM and
2 AARCH64.
3
4 Copyright (C) 1991-2018 Free Software Foundation, Inc.
5 Contributed by ARM Ltd.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23
24 #define IN_TARGET_CODE 1
25
26 #include "config.h"
27 #include "system.h"
28 #include "coretypes.h"
29 #include "tm.h"
30 #include "rtl.h"
31 #include "rtl-iter.h"
32
33 /* In ARMv8-A there's a general expectation that AESE/AESMC
34 and AESD/AESIMC sequences of the form:
35
36 AESE Vn, _
37 AESMC Vn, Vn
38
39 will issue both instructions in a single cycle on super-scalar
40 implementations. This function identifies such pairs. */
41
42 int
aarch_crypto_can_dual_issue(rtx_insn * producer_insn,rtx_insn * consumer_insn)43 aarch_crypto_can_dual_issue (rtx_insn *producer_insn, rtx_insn *consumer_insn)
44 {
45 rtx producer_set, consumer_set;
46 rtx producer_src, consumer_src;
47
48 producer_set = single_set (producer_insn);
49 consumer_set = single_set (consumer_insn);
50
51 producer_src = producer_set ? SET_SRC (producer_set) : NULL;
52 consumer_src = consumer_set ? SET_SRC (consumer_set) : NULL;
53
54 if (producer_src && consumer_src
55 && GET_CODE (producer_src) == UNSPEC && GET_CODE (consumer_src) == UNSPEC
56 && ((XINT (producer_src, 1) == UNSPEC_AESE
57 && XINT (consumer_src, 1) == UNSPEC_AESMC)
58 || (XINT (producer_src, 1) == UNSPEC_AESD
59 && XINT (consumer_src, 1) == UNSPEC_AESIMC)))
60 {
61 unsigned int regno = REGNO (SET_DEST (producer_set));
62
63 /* Before reload the registers are virtual, so the destination of
64 consumer_set doesn't need to match. */
65
66 return (REGNO (SET_DEST (consumer_set)) == regno || !reload_completed)
67 && REGNO (XVECEXP (consumer_src, 0, 0)) == regno;
68 }
69
70 return 0;
71 }
72
73 /* Return TRUE if X is either an arithmetic shift left, or
74 is a multiplication by a power of two. */
75 bool
arm_rtx_shift_left_p(rtx x)76 arm_rtx_shift_left_p (rtx x)
77 {
78 enum rtx_code code = GET_CODE (x);
79
80 if (code == MULT && CONST_INT_P (XEXP (x, 1))
81 && exact_log2 (INTVAL (XEXP (x, 1))) > 0)
82 return true;
83
84 if (code == ASHIFT)
85 return true;
86
87 return false;
88 }
89
90 static rtx_code shift_rtx_codes[] =
91 { ASHIFT, ROTATE, ASHIFTRT, LSHIFTRT,
92 ROTATERT, ZERO_EXTEND, SIGN_EXTEND };
93
94 /* Traverse PATTERN looking for a sub-rtx with RTX_CODE CODE.
95 If FIND_ANY_SHIFT then we are interested in anything which can
96 reasonably be described as a SHIFT RTX. */
97 static rtx
arm_find_sub_rtx_with_code(rtx pattern,rtx_code code,bool find_any_shift)98 arm_find_sub_rtx_with_code (rtx pattern, rtx_code code, bool find_any_shift)
99 {
100 subrtx_var_iterator::array_type array;
101 FOR_EACH_SUBRTX_VAR (iter, array, pattern, NONCONST)
102 {
103 rtx x = *iter;
104 if (find_any_shift)
105 {
106 /* Left shifts might have been canonicalized to a MULT of some
107 power of two. Make sure we catch them. */
108 if (arm_rtx_shift_left_p (x))
109 return x;
110 else
111 for (unsigned int i = 0; i < ARRAY_SIZE (shift_rtx_codes); i++)
112 if (GET_CODE (x) == shift_rtx_codes[i])
113 return x;
114 }
115
116 if (GET_CODE (x) == code)
117 return x;
118 }
119 return NULL_RTX;
120 }
121
122 /* Traverse PATTERN looking for any sub-rtx which looks like a shift. */
123 static rtx
arm_find_shift_sub_rtx(rtx pattern)124 arm_find_shift_sub_rtx (rtx pattern)
125 {
126 return arm_find_sub_rtx_with_code (pattern, ASHIFT, true);
127 }
128
129 /* PRODUCER and CONSUMER are two potentially dependant RTX. PRODUCER
130 (possibly) contains a SET which will provide a result we can access
131 using the SET_DEST macro. We will place the RTX which would be
132 written by PRODUCER in SET_SOURCE.
133 Similarly, CONSUMER (possibly) contains a SET which has an operand
134 we can access using SET_SRC. We place this operand in
135 SET_DESTINATION.
136
137 Return nonzero if we found the SET RTX we expected. */
138 static int
arm_get_set_operands(rtx producer,rtx consumer,rtx * set_source,rtx * set_destination)139 arm_get_set_operands (rtx producer, rtx consumer,
140 rtx *set_source, rtx *set_destination)
141 {
142 rtx set_producer = arm_find_sub_rtx_with_code (PATTERN (producer),
143 SET, false);
144 rtx set_consumer = arm_find_sub_rtx_with_code (PATTERN (consumer),
145 SET, false);
146
147 if (set_producer && set_consumer)
148 {
149 *set_source = SET_DEST (set_producer);
150 *set_destination = SET_SRC (set_consumer);
151 return 1;
152 }
153 return 0;
154 }
155
156 bool
aarch_rev16_shright_mask_imm_p(rtx val,machine_mode mode)157 aarch_rev16_shright_mask_imm_p (rtx val, machine_mode mode)
158 {
159 return CONST_INT_P (val)
160 && INTVAL (val)
161 == trunc_int_for_mode (HOST_WIDE_INT_C (0xff00ff00ff00ff),
162 mode);
163 }
164
165 bool
aarch_rev16_shleft_mask_imm_p(rtx val,machine_mode mode)166 aarch_rev16_shleft_mask_imm_p (rtx val, machine_mode mode)
167 {
168 return CONST_INT_P (val)
169 && INTVAL (val)
170 == trunc_int_for_mode (HOST_WIDE_INT_C (0xff00ff00ff00ff00),
171 mode);
172 }
173
174
175 static bool
aarch_rev16_p_1(rtx lhs,rtx rhs,machine_mode mode)176 aarch_rev16_p_1 (rtx lhs, rtx rhs, machine_mode mode)
177 {
178 if (GET_CODE (lhs) == AND
179 && GET_CODE (XEXP (lhs, 0)) == ASHIFT
180 && CONST_INT_P (XEXP (XEXP (lhs, 0), 1))
181 && INTVAL (XEXP (XEXP (lhs, 0), 1)) == 8
182 && REG_P (XEXP (XEXP (lhs, 0), 0))
183 && CONST_INT_P (XEXP (lhs, 1))
184 && GET_CODE (rhs) == AND
185 && GET_CODE (XEXP (rhs, 0)) == LSHIFTRT
186 && REG_P (XEXP (XEXP (rhs, 0), 0))
187 && CONST_INT_P (XEXP (XEXP (rhs, 0), 1))
188 && INTVAL (XEXP (XEXP (rhs, 0), 1)) == 8
189 && CONST_INT_P (XEXP (rhs, 1))
190 && REGNO (XEXP (XEXP (rhs, 0), 0)) == REGNO (XEXP (XEXP (lhs, 0), 0)))
191
192 {
193 rtx lhs_mask = XEXP (lhs, 1);
194 rtx rhs_mask = XEXP (rhs, 1);
195
196 return aarch_rev16_shright_mask_imm_p (rhs_mask, mode)
197 && aarch_rev16_shleft_mask_imm_p (lhs_mask, mode);
198 }
199
200 return false;
201 }
202
203 /* Recognise a sequence of bitwise operations corresponding to a rev16 operation.
204 These will be of the form:
205 ((x >> 8) & 0x00ff00ff)
206 | ((x << 8) & 0xff00ff00)
207 for SImode and with similar but wider bitmasks for DImode.
208 The two sub-expressions of the IOR can appear on either side so check both
209 permutations with the help of aarch_rev16_p_1 above. */
210
211 bool
aarch_rev16_p(rtx x)212 aarch_rev16_p (rtx x)
213 {
214 rtx left_sub_rtx, right_sub_rtx;
215 bool is_rev = false;
216
217 if (GET_CODE (x) != IOR)
218 return false;
219
220 left_sub_rtx = XEXP (x, 0);
221 right_sub_rtx = XEXP (x, 1);
222
223 /* There are no canonicalisation rules for the position of the two shifts
224 involved in a rev, so try both permutations. */
225 is_rev = aarch_rev16_p_1 (left_sub_rtx, right_sub_rtx, GET_MODE (x));
226
227 if (!is_rev)
228 is_rev = aarch_rev16_p_1 (right_sub_rtx, left_sub_rtx, GET_MODE (x));
229
230 return is_rev;
231 }
232
233 /* Return nonzero if the CONSUMER instruction (a load) does need
234 PRODUCER's value to calculate the address. */
235 int
arm_early_load_addr_dep(rtx producer,rtx consumer)236 arm_early_load_addr_dep (rtx producer, rtx consumer)
237 {
238 rtx value, addr;
239
240 if (!arm_get_set_operands (producer, consumer, &value, &addr))
241 return 0;
242
243 return reg_overlap_mentioned_p (value, addr);
244 }
245
246 /* Return nonzero if the CONSUMER instruction (a load) does need
247 a Pmode PRODUCER's value to calculate the address. */
248
249 int
arm_early_load_addr_dep_ptr(rtx producer,rtx consumer)250 arm_early_load_addr_dep_ptr (rtx producer, rtx consumer)
251 {
252 rtx value = arm_find_sub_rtx_with_code (PATTERN (producer), SET, false);
253 rtx addr = arm_find_sub_rtx_with_code (PATTERN (consumer), SET, false);
254
255 if (!value || !addr || !MEM_P (SET_SRC (value)))
256 return 0;
257
258 value = SET_DEST (value);
259 addr = SET_SRC (addr);
260
261 return GET_MODE (value) == Pmode && reg_overlap_mentioned_p (value, addr);
262 }
263
264 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
265 have an early register shift value or amount dependency on the
266 result of PRODUCER. */
267 int
arm_no_early_alu_shift_dep(rtx producer,rtx consumer)268 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
269 {
270 rtx value, op;
271 rtx early_op;
272
273 if (!arm_get_set_operands (producer, consumer, &value, &op))
274 return 0;
275
276 if ((early_op = arm_find_shift_sub_rtx (op)))
277 return !reg_overlap_mentioned_p (value, early_op);
278
279 return 0;
280 }
281
282 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
283 have an early register shift value dependency on the result of
284 PRODUCER. */
285 int
arm_no_early_alu_shift_value_dep(rtx producer,rtx consumer)286 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
287 {
288 rtx value, op;
289 rtx early_op;
290
291 if (!arm_get_set_operands (producer, consumer, &value, &op))
292 return 0;
293
294 if ((early_op = arm_find_shift_sub_rtx (op)))
295 /* We want to check the value being shifted. */
296 if (!reg_overlap_mentioned_p (value, XEXP (early_op, 0)))
297 return 1;
298
299 return 0;
300 }
301
302 /* Return nonzero if the CONSUMER (a mul or mac op) does not
303 have an early register mult dependency on the result of
304 PRODUCER. */
305 int
arm_no_early_mul_dep(rtx producer,rtx consumer)306 arm_no_early_mul_dep (rtx producer, rtx consumer)
307 {
308 rtx value, op;
309
310 if (!arm_get_set_operands (producer, consumer, &value, &op))
311 return 0;
312
313 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
314 {
315 if (GET_CODE (XEXP (op, 0)) == MULT)
316 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
317 else
318 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
319 }
320
321 return 0;
322 }
323
324 /* Return nonzero if the CONSUMER instruction (a store) does not need
325 PRODUCER's value to calculate the address. */
326
327 int
arm_no_early_store_addr_dep(rtx producer,rtx consumer)328 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
329 {
330 rtx value = arm_find_sub_rtx_with_code (PATTERN (producer), SET, false);
331 rtx addr = arm_find_sub_rtx_with_code (PATTERN (consumer), SET, false);
332
333 if (value)
334 value = SET_DEST (value);
335
336 if (addr)
337 addr = SET_DEST (addr);
338
339 if (!value || !addr)
340 return 0;
341
342 return !reg_overlap_mentioned_p (value, addr);
343 }
344
345 /* Return nonzero if the CONSUMER instruction (a store) does need
346 PRODUCER's value to calculate the address. */
347
348 int
arm_early_store_addr_dep(rtx producer,rtx consumer)349 arm_early_store_addr_dep (rtx producer, rtx consumer)
350 {
351 return !arm_no_early_store_addr_dep (producer, consumer);
352 }
353
354 /* Return nonzero if the CONSUMER instruction (a store) does need
355 a Pmode PRODUCER's value to calculate the address. */
356
357 int
arm_early_store_addr_dep_ptr(rtx producer,rtx consumer)358 arm_early_store_addr_dep_ptr (rtx producer, rtx consumer)
359 {
360 rtx value = arm_find_sub_rtx_with_code (PATTERN (producer), SET, false);
361 rtx addr = arm_find_sub_rtx_with_code (PATTERN (consumer), SET, false);
362
363 if (!value || !addr || !MEM_P (SET_SRC (value)))
364 return 0;
365
366 value = SET_DEST (value);
367 addr = SET_DEST (addr);
368
369 return GET_MODE (value) == Pmode && reg_overlap_mentioned_p (value, addr);
370 }
371
372 /* Return non-zero iff the consumer (a multiply-accumulate or a
373 multiple-subtract instruction) has an accumulator dependency on the
374 result of the producer and no other dependency on that result. It
375 does not check if the producer is multiply-accumulate instruction. */
376 int
arm_mac_accumulator_is_result(rtx producer,rtx consumer)377 arm_mac_accumulator_is_result (rtx producer, rtx consumer)
378 {
379 rtx result;
380 rtx op0, op1, acc;
381
382 producer = PATTERN (producer);
383 consumer = PATTERN (consumer);
384
385 if (GET_CODE (producer) == COND_EXEC)
386 producer = COND_EXEC_CODE (producer);
387 if (GET_CODE (consumer) == COND_EXEC)
388 consumer = COND_EXEC_CODE (consumer);
389
390 if (GET_CODE (producer) != SET)
391 return 0;
392
393 result = XEXP (producer, 0);
394
395 if (GET_CODE (consumer) != SET)
396 return 0;
397
398 /* Check that the consumer is of the form
399 (set (...) (plus (mult ...) (...)))
400 or
401 (set (...) (minus (...) (mult ...))). */
402 if (GET_CODE (XEXP (consumer, 1)) == PLUS)
403 {
404 if (GET_CODE (XEXP (XEXP (consumer, 1), 0)) != MULT)
405 return 0;
406
407 op0 = XEXP (XEXP (XEXP (consumer, 1), 0), 0);
408 op1 = XEXP (XEXP (XEXP (consumer, 1), 0), 1);
409 acc = XEXP (XEXP (consumer, 1), 1);
410 }
411 else if (GET_CODE (XEXP (consumer, 1)) == MINUS)
412 {
413 if (GET_CODE (XEXP (XEXP (consumer, 1), 1)) != MULT)
414 return 0;
415
416 op0 = XEXP (XEXP (XEXP (consumer, 1), 1), 0);
417 op1 = XEXP (XEXP (XEXP (consumer, 1), 1), 1);
418 acc = XEXP (XEXP (consumer, 1), 0);
419 }
420 else
421 return 0;
422
423 return (reg_overlap_mentioned_p (result, acc)
424 && !reg_overlap_mentioned_p (result, op0)
425 && !reg_overlap_mentioned_p (result, op1));
426 }
427
428 /* Return non-zero if the destination of PRODUCER feeds the accumulator
429 operand of an MLA-like operation. */
430
431 int
aarch_accumulator_forwarding(rtx_insn * producer,rtx_insn * consumer)432 aarch_accumulator_forwarding (rtx_insn *producer, rtx_insn *consumer)
433 {
434 rtx producer_set = single_set (producer);
435 rtx consumer_set = single_set (consumer);
436
437 /* We are looking for a SET feeding a SET. */
438 if (!producer_set || !consumer_set)
439 return 0;
440
441 rtx dest = SET_DEST (producer_set);
442 rtx mla = SET_SRC (consumer_set);
443
444 /* We're looking for a register SET. */
445 if (!REG_P (dest))
446 return 0;
447
448 rtx accumulator;
449
450 /* Strip a zero_extend. */
451 if (GET_CODE (mla) == ZERO_EXTEND)
452 mla = XEXP (mla, 0);
453
454 switch (GET_CODE (mla))
455 {
456 case PLUS:
457 /* Possibly an MADD. */
458 if (GET_CODE (XEXP (mla, 0)) == MULT)
459 accumulator = XEXP (mla, 1);
460 else
461 return 0;
462 break;
463 case MINUS:
464 /* Possibly an MSUB. */
465 if (GET_CODE (XEXP (mla, 1)) == MULT)
466 accumulator = XEXP (mla, 0);
467 else
468 return 0;
469 break;
470 case FMA:
471 {
472 /* Possibly an FMADD/FMSUB/FNMADD/FNMSUB. */
473 if (REG_P (XEXP (mla, 1))
474 && REG_P (XEXP (mla, 2))
475 && (REG_P (XEXP (mla, 0))
476 || GET_CODE (XEXP (mla, 0)) == NEG))
477
478 {
479 /* FMADD/FMSUB. */
480 accumulator = XEXP (mla, 2);
481 }
482 else if (REG_P (XEXP (mla, 1))
483 && GET_CODE (XEXP (mla, 2)) == NEG
484 && (REG_P (XEXP (mla, 0))
485 || GET_CODE (XEXP (mla, 0)) == NEG))
486 {
487 /* FNMADD/FNMSUB. */
488 accumulator = XEXP (XEXP (mla, 2), 0);
489 }
490 else
491 return 0;
492 break;
493 }
494 default:
495 /* Not an MLA-like operation. */
496 return 0;
497 }
498
499 if (GET_CODE (accumulator) == SUBREG)
500 accumulator = SUBREG_REG (accumulator);
501
502 if (!REG_P (accumulator))
503 return 0;
504
505 return (REGNO (dest) == REGNO (accumulator));
506 }
507
508 /* Return non-zero if the consumer (a multiply-accumulate instruction)
509 has an accumulator dependency on the result of the producer (a
510 multiplication instruction) and no other dependency on that result. */
511 int
arm_mac_accumulator_is_mul_result(rtx producer,rtx consumer)512 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
513 {
514 rtx mul = PATTERN (producer);
515 rtx mac = PATTERN (consumer);
516 rtx mul_result;
517 rtx mac_op0, mac_op1, mac_acc;
518
519 if (GET_CODE (mul) == COND_EXEC)
520 mul = COND_EXEC_CODE (mul);
521 if (GET_CODE (mac) == COND_EXEC)
522 mac = COND_EXEC_CODE (mac);
523
524 /* Check that mul is of the form (set (...) (mult ...))
525 and mla is of the form (set (...) (plus (mult ...) (...))). */
526 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
527 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
528 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
529 return 0;
530
531 mul_result = XEXP (mul, 0);
532 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
533 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
534 mac_acc = XEXP (XEXP (mac, 1), 1);
535
536 return (reg_overlap_mentioned_p (mul_result, mac_acc)
537 && !reg_overlap_mentioned_p (mul_result, mac_op0)
538 && !reg_overlap_mentioned_p (mul_result, mac_op1));
539 }
540