1 /* Dependency checks for instruction scheduling, shared between ARM and
2    AARCH64.
3 
4    Copyright (C) 1991-2018 Free Software Foundation, Inc.
5    Contributed by ARM Ltd.
6 
7    This file is part of GCC.
8 
9    GCC is free software; you can redistribute it and/or modify it
10    under the terms of the GNU General Public License as published
11    by the Free Software Foundation; either version 3, or (at your
12    option) any later version.
13 
14    GCC is distributed in the hope that it will be useful, but WITHOUT
15    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
17    License for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with GCC; see the file COPYING3.  If not see
21    <http://www.gnu.org/licenses/>.  */
22 
23 
24 #define IN_TARGET_CODE 1
25 
26 #include "config.h"
27 #include "system.h"
28 #include "coretypes.h"
29 #include "tm.h"
30 #include "rtl.h"
31 #include "rtl-iter.h"
32 
33 /* In ARMv8-A there's a general expectation that AESE/AESMC
34    and AESD/AESIMC sequences of the form:
35 
36    AESE Vn, _
37    AESMC Vn, Vn
38 
39    will issue both instructions in a single cycle on super-scalar
40    implementations.  This function identifies such pairs.  */
41 
42 int
aarch_crypto_can_dual_issue(rtx_insn * producer_insn,rtx_insn * consumer_insn)43 aarch_crypto_can_dual_issue (rtx_insn *producer_insn, rtx_insn *consumer_insn)
44 {
45   rtx producer_set, consumer_set;
46   rtx producer_src, consumer_src;
47 
48   producer_set = single_set (producer_insn);
49   consumer_set = single_set (consumer_insn);
50 
51   producer_src = producer_set ? SET_SRC (producer_set) : NULL;
52   consumer_src = consumer_set ? SET_SRC (consumer_set) : NULL;
53 
54   if (producer_src && consumer_src
55       && GET_CODE (producer_src) == UNSPEC && GET_CODE (consumer_src) == UNSPEC
56       && ((XINT (producer_src, 1) == UNSPEC_AESE
57            && XINT (consumer_src, 1) == UNSPEC_AESMC)
58           || (XINT (producer_src, 1) == UNSPEC_AESD
59               && XINT (consumer_src, 1) == UNSPEC_AESIMC)))
60   {
61     unsigned int regno = REGNO (SET_DEST (producer_set));
62 
63     /* Before reload the registers are virtual, so the destination of
64        consumer_set doesn't need to match.  */
65 
66     return (REGNO (SET_DEST (consumer_set)) == regno || !reload_completed)
67 	    && REGNO (XVECEXP (consumer_src, 0, 0)) == regno;
68   }
69 
70   return 0;
71 }
72 
73 /* Return TRUE if X is either an arithmetic shift left, or
74    is a multiplication by a power of two.  */
75 bool
arm_rtx_shift_left_p(rtx x)76 arm_rtx_shift_left_p (rtx x)
77 {
78   enum rtx_code code = GET_CODE (x);
79 
80   if (code == MULT && CONST_INT_P (XEXP (x, 1))
81       && exact_log2 (INTVAL (XEXP (x, 1))) > 0)
82     return true;
83 
84   if (code == ASHIFT)
85     return true;
86 
87   return false;
88 }
89 
90 static rtx_code shift_rtx_codes[] =
91   { ASHIFT, ROTATE, ASHIFTRT, LSHIFTRT,
92     ROTATERT, ZERO_EXTEND, SIGN_EXTEND };
93 
94 /* Traverse PATTERN looking for a sub-rtx with RTX_CODE CODE.
95    If FIND_ANY_SHIFT then we are interested in anything which can
96    reasonably be described as a SHIFT RTX.  */
97 static rtx
arm_find_sub_rtx_with_code(rtx pattern,rtx_code code,bool find_any_shift)98 arm_find_sub_rtx_with_code (rtx pattern, rtx_code code, bool find_any_shift)
99 {
100   subrtx_var_iterator::array_type array;
101   FOR_EACH_SUBRTX_VAR (iter, array, pattern, NONCONST)
102     {
103       rtx x = *iter;
104       if (find_any_shift)
105 	{
106 	  /* Left shifts might have been canonicalized to a MULT of some
107 	     power of two.  Make sure we catch them.  */
108 	  if (arm_rtx_shift_left_p (x))
109 	    return x;
110 	  else
111 	    for (unsigned int i = 0; i < ARRAY_SIZE (shift_rtx_codes); i++)
112 	      if (GET_CODE (x) == shift_rtx_codes[i])
113 		return x;
114 	}
115 
116       if (GET_CODE (x) == code)
117 	return x;
118     }
119   return NULL_RTX;
120 }
121 
122 /* Traverse PATTERN looking for any sub-rtx which looks like a shift.  */
123 static rtx
arm_find_shift_sub_rtx(rtx pattern)124 arm_find_shift_sub_rtx (rtx pattern)
125 {
126   return arm_find_sub_rtx_with_code (pattern, ASHIFT, true);
127 }
128 
129 /* PRODUCER and CONSUMER are two potentially dependant RTX.  PRODUCER
130    (possibly) contains a SET which will provide a result we can access
131    using the SET_DEST macro.  We will place the RTX which would be
132    written by PRODUCER in SET_SOURCE.
133    Similarly, CONSUMER (possibly) contains a SET which has an operand
134    we can access using SET_SRC.  We place this operand in
135    SET_DESTINATION.
136 
137    Return nonzero if we found the SET RTX we expected.  */
138 static int
arm_get_set_operands(rtx producer,rtx consumer,rtx * set_source,rtx * set_destination)139 arm_get_set_operands (rtx producer, rtx consumer,
140 		      rtx *set_source, rtx *set_destination)
141 {
142   rtx set_producer = arm_find_sub_rtx_with_code (PATTERN (producer),
143 						 SET, false);
144   rtx set_consumer = arm_find_sub_rtx_with_code (PATTERN (consumer),
145 						 SET, false);
146 
147   if (set_producer && set_consumer)
148     {
149       *set_source = SET_DEST (set_producer);
150       *set_destination = SET_SRC (set_consumer);
151       return 1;
152     }
153   return 0;
154 }
155 
156 bool
aarch_rev16_shright_mask_imm_p(rtx val,machine_mode mode)157 aarch_rev16_shright_mask_imm_p (rtx val, machine_mode mode)
158 {
159   return CONST_INT_P (val)
160          && INTVAL (val)
161             == trunc_int_for_mode (HOST_WIDE_INT_C (0xff00ff00ff00ff),
162                                    mode);
163 }
164 
165 bool
aarch_rev16_shleft_mask_imm_p(rtx val,machine_mode mode)166 aarch_rev16_shleft_mask_imm_p (rtx val, machine_mode mode)
167 {
168   return CONST_INT_P (val)
169          && INTVAL (val)
170             == trunc_int_for_mode (HOST_WIDE_INT_C (0xff00ff00ff00ff00),
171                                    mode);
172 }
173 
174 
175 static bool
aarch_rev16_p_1(rtx lhs,rtx rhs,machine_mode mode)176 aarch_rev16_p_1 (rtx lhs, rtx rhs, machine_mode mode)
177 {
178   if (GET_CODE (lhs) == AND
179          && GET_CODE (XEXP (lhs, 0)) == ASHIFT
180             && CONST_INT_P (XEXP (XEXP (lhs, 0), 1))
181             && INTVAL (XEXP (XEXP (lhs, 0), 1)) == 8
182             && REG_P (XEXP (XEXP (lhs, 0), 0))
183          && CONST_INT_P (XEXP (lhs, 1))
184       && GET_CODE (rhs) == AND
185          && GET_CODE (XEXP (rhs, 0)) == LSHIFTRT
186             && REG_P (XEXP (XEXP (rhs, 0), 0))
187             && CONST_INT_P (XEXP (XEXP (rhs, 0), 1))
188             && INTVAL (XEXP (XEXP (rhs, 0), 1)) == 8
189          && CONST_INT_P (XEXP (rhs, 1))
190       && REGNO (XEXP (XEXP (rhs, 0), 0)) == REGNO (XEXP (XEXP (lhs, 0), 0)))
191 
192     {
193       rtx lhs_mask = XEXP (lhs, 1);
194       rtx rhs_mask = XEXP (rhs, 1);
195 
196       return aarch_rev16_shright_mask_imm_p (rhs_mask, mode)
197              && aarch_rev16_shleft_mask_imm_p (lhs_mask, mode);
198     }
199 
200   return false;
201 }
202 
203 /* Recognise a sequence of bitwise operations corresponding to a rev16 operation.
204    These will be of the form:
205      ((x >> 8) & 0x00ff00ff)
206    | ((x << 8) & 0xff00ff00)
207    for SImode and with similar but wider bitmasks for DImode.
208    The two sub-expressions of the IOR can appear on either side so check both
209    permutations with the help of aarch_rev16_p_1 above.  */
210 
211 bool
aarch_rev16_p(rtx x)212 aarch_rev16_p (rtx x)
213 {
214   rtx left_sub_rtx, right_sub_rtx;
215   bool is_rev = false;
216 
217   if (GET_CODE (x) != IOR)
218     return false;
219 
220   left_sub_rtx = XEXP (x, 0);
221   right_sub_rtx = XEXP (x, 1);
222 
223   /* There are no canonicalisation rules for the position of the two shifts
224      involved in a rev, so try both permutations.  */
225   is_rev = aarch_rev16_p_1 (left_sub_rtx, right_sub_rtx, GET_MODE (x));
226 
227   if (!is_rev)
228     is_rev = aarch_rev16_p_1 (right_sub_rtx, left_sub_rtx, GET_MODE (x));
229 
230   return is_rev;
231 }
232 
233 /* Return nonzero if the CONSUMER instruction (a load) does need
234    PRODUCER's value to calculate the address.  */
235 int
arm_early_load_addr_dep(rtx producer,rtx consumer)236 arm_early_load_addr_dep (rtx producer, rtx consumer)
237 {
238   rtx value, addr;
239 
240   if (!arm_get_set_operands (producer, consumer, &value, &addr))
241     return 0;
242 
243   return reg_overlap_mentioned_p (value, addr);
244 }
245 
246 /* Return nonzero if the CONSUMER instruction (a load) does need
247    a Pmode PRODUCER's value to calculate the address.  */
248 
249 int
arm_early_load_addr_dep_ptr(rtx producer,rtx consumer)250 arm_early_load_addr_dep_ptr (rtx producer, rtx consumer)
251 {
252   rtx value = arm_find_sub_rtx_with_code (PATTERN (producer), SET, false);
253   rtx addr = arm_find_sub_rtx_with_code (PATTERN (consumer), SET, false);
254 
255   if (!value || !addr || !MEM_P (SET_SRC (value)))
256     return 0;
257 
258   value = SET_DEST (value);
259   addr = SET_SRC (addr);
260 
261   return GET_MODE (value) == Pmode && reg_overlap_mentioned_p (value, addr);
262 }
263 
264 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
265    have an early register shift value or amount dependency on the
266    result of PRODUCER.  */
267 int
arm_no_early_alu_shift_dep(rtx producer,rtx consumer)268 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
269 {
270   rtx value, op;
271   rtx early_op;
272 
273   if (!arm_get_set_operands (producer, consumer, &value, &op))
274     return 0;
275 
276   if ((early_op = arm_find_shift_sub_rtx (op)))
277     return !reg_overlap_mentioned_p (value, early_op);
278 
279   return 0;
280 }
281 
282 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
283    have an early register shift value dependency on the result of
284    PRODUCER.  */
285 int
arm_no_early_alu_shift_value_dep(rtx producer,rtx consumer)286 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
287 {
288   rtx value, op;
289   rtx early_op;
290 
291   if (!arm_get_set_operands (producer, consumer, &value, &op))
292     return 0;
293 
294   if ((early_op = arm_find_shift_sub_rtx (op)))
295     /* We want to check the value being shifted.  */
296     if (!reg_overlap_mentioned_p (value, XEXP (early_op, 0)))
297       return 1;
298 
299   return 0;
300 }
301 
302 /* Return nonzero if the CONSUMER (a mul or mac op) does not
303    have an early register mult dependency on the result of
304    PRODUCER.  */
305 int
arm_no_early_mul_dep(rtx producer,rtx consumer)306 arm_no_early_mul_dep (rtx producer, rtx consumer)
307 {
308   rtx value, op;
309 
310   if (!arm_get_set_operands (producer, consumer, &value, &op))
311     return 0;
312 
313   if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
314     {
315       if (GET_CODE (XEXP (op, 0)) == MULT)
316 	return !reg_overlap_mentioned_p (value, XEXP (op, 0));
317       else
318 	return !reg_overlap_mentioned_p (value, XEXP (op, 1));
319     }
320 
321   return 0;
322 }
323 
324 /* Return nonzero if the CONSUMER instruction (a store) does not need
325    PRODUCER's value to calculate the address.  */
326 
327 int
arm_no_early_store_addr_dep(rtx producer,rtx consumer)328 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
329 {
330   rtx value = arm_find_sub_rtx_with_code (PATTERN (producer), SET, false);
331   rtx addr = arm_find_sub_rtx_with_code (PATTERN (consumer), SET, false);
332 
333   if (value)
334     value = SET_DEST (value);
335 
336   if (addr)
337     addr = SET_DEST (addr);
338 
339   if (!value || !addr)
340     return 0;
341 
342   return !reg_overlap_mentioned_p (value, addr);
343 }
344 
345 /* Return nonzero if the CONSUMER instruction (a store) does need
346    PRODUCER's value to calculate the address.  */
347 
348 int
arm_early_store_addr_dep(rtx producer,rtx consumer)349 arm_early_store_addr_dep (rtx producer, rtx consumer)
350 {
351   return !arm_no_early_store_addr_dep (producer, consumer);
352 }
353 
354 /* Return nonzero if the CONSUMER instruction (a store) does need
355    a Pmode PRODUCER's value to calculate the address.  */
356 
357 int
arm_early_store_addr_dep_ptr(rtx producer,rtx consumer)358 arm_early_store_addr_dep_ptr (rtx producer, rtx consumer)
359 {
360   rtx value = arm_find_sub_rtx_with_code (PATTERN (producer), SET, false);
361   rtx addr = arm_find_sub_rtx_with_code (PATTERN (consumer), SET, false);
362 
363   if (!value || !addr || !MEM_P (SET_SRC (value)))
364     return 0;
365 
366   value = SET_DEST (value);
367   addr = SET_DEST (addr);
368 
369   return GET_MODE (value) == Pmode && reg_overlap_mentioned_p (value, addr);
370 }
371 
372 /* Return non-zero iff the consumer (a multiply-accumulate or a
373    multiple-subtract instruction) has an accumulator dependency on the
374    result of the producer and no other dependency on that result.  It
375    does not check if the producer is multiply-accumulate instruction.  */
376 int
arm_mac_accumulator_is_result(rtx producer,rtx consumer)377 arm_mac_accumulator_is_result (rtx producer, rtx consumer)
378 {
379   rtx result;
380   rtx op0, op1, acc;
381 
382   producer = PATTERN (producer);
383   consumer = PATTERN (consumer);
384 
385   if (GET_CODE (producer) == COND_EXEC)
386     producer = COND_EXEC_CODE (producer);
387   if (GET_CODE (consumer) == COND_EXEC)
388     consumer = COND_EXEC_CODE (consumer);
389 
390   if (GET_CODE (producer) != SET)
391     return 0;
392 
393   result = XEXP (producer, 0);
394 
395   if (GET_CODE (consumer) != SET)
396     return 0;
397 
398   /* Check that the consumer is of the form
399      (set (...) (plus (mult ...) (...)))
400      or
401      (set (...) (minus (...) (mult ...))).  */
402   if (GET_CODE (XEXP (consumer, 1)) == PLUS)
403     {
404       if (GET_CODE (XEXP (XEXP (consumer, 1), 0)) != MULT)
405         return 0;
406 
407       op0 = XEXP (XEXP (XEXP (consumer, 1), 0), 0);
408       op1 = XEXP (XEXP (XEXP (consumer, 1), 0), 1);
409       acc = XEXP (XEXP (consumer, 1), 1);
410     }
411   else if (GET_CODE (XEXP (consumer, 1)) == MINUS)
412     {
413       if (GET_CODE (XEXP (XEXP (consumer, 1), 1)) != MULT)
414         return 0;
415 
416       op0 = XEXP (XEXP (XEXP (consumer, 1), 1), 0);
417       op1 = XEXP (XEXP (XEXP (consumer, 1), 1), 1);
418       acc = XEXP (XEXP (consumer, 1), 0);
419     }
420   else
421     return 0;
422 
423   return (reg_overlap_mentioned_p (result, acc)
424           && !reg_overlap_mentioned_p (result, op0)
425           && !reg_overlap_mentioned_p (result, op1));
426 }
427 
428 /* Return non-zero if the destination of PRODUCER feeds the accumulator
429    operand of an MLA-like operation.  */
430 
431 int
aarch_accumulator_forwarding(rtx_insn * producer,rtx_insn * consumer)432 aarch_accumulator_forwarding (rtx_insn *producer, rtx_insn *consumer)
433 {
434   rtx producer_set = single_set (producer);
435   rtx consumer_set = single_set (consumer);
436 
437   /* We are looking for a SET feeding a SET.  */
438   if (!producer_set || !consumer_set)
439     return 0;
440 
441   rtx dest = SET_DEST (producer_set);
442   rtx mla = SET_SRC (consumer_set);
443 
444   /* We're looking for a register SET.  */
445   if (!REG_P (dest))
446     return 0;
447 
448   rtx accumulator;
449 
450   /* Strip a zero_extend.  */
451   if (GET_CODE (mla) == ZERO_EXTEND)
452     mla = XEXP (mla, 0);
453 
454   switch (GET_CODE (mla))
455     {
456     case PLUS:
457       /* Possibly an MADD.  */
458       if (GET_CODE (XEXP (mla, 0)) == MULT)
459 	accumulator = XEXP (mla, 1);
460       else
461 	return 0;
462       break;
463     case MINUS:
464       /* Possibly an MSUB.  */
465       if (GET_CODE (XEXP (mla, 1)) == MULT)
466 	accumulator = XEXP (mla, 0);
467       else
468 	return 0;
469       break;
470     case FMA:
471 	{
472 	  /* Possibly an FMADD/FMSUB/FNMADD/FNMSUB.  */
473 	  if (REG_P (XEXP (mla, 1))
474 	      && REG_P (XEXP (mla, 2))
475 	      && (REG_P (XEXP (mla, 0))
476 		  || GET_CODE (XEXP (mla, 0)) == NEG))
477 
478 	    {
479 	      /* FMADD/FMSUB.  */
480 	      accumulator = XEXP (mla, 2);
481 	    }
482 	  else if (REG_P (XEXP (mla, 1))
483 		   && GET_CODE (XEXP (mla, 2)) == NEG
484 		   && (REG_P (XEXP (mla, 0))
485 		       || GET_CODE (XEXP (mla, 0)) == NEG))
486 	    {
487 	      /* FNMADD/FNMSUB.  */
488 	      accumulator = XEXP (XEXP (mla, 2), 0);
489 	    }
490 	  else
491 	    return 0;
492 	  break;
493 	}
494       default:
495 	/* Not an MLA-like operation.  */
496 	return 0;
497     }
498 
499   if (GET_CODE (accumulator) == SUBREG)
500     accumulator = SUBREG_REG (accumulator);
501 
502   if (!REG_P (accumulator))
503     return 0;
504 
505   return (REGNO (dest) == REGNO (accumulator));
506 }
507 
508 /* Return non-zero if the consumer (a multiply-accumulate instruction)
509    has an accumulator dependency on the result of the producer (a
510    multiplication instruction) and no other dependency on that result.  */
511 int
arm_mac_accumulator_is_mul_result(rtx producer,rtx consumer)512 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
513 {
514   rtx mul = PATTERN (producer);
515   rtx mac = PATTERN (consumer);
516   rtx mul_result;
517   rtx mac_op0, mac_op1, mac_acc;
518 
519   if (GET_CODE (mul) == COND_EXEC)
520     mul = COND_EXEC_CODE (mul);
521   if (GET_CODE (mac) == COND_EXEC)
522     mac = COND_EXEC_CODE (mac);
523 
524   /* Check that mul is of the form (set (...) (mult ...))
525      and mla is of the form (set (...) (plus (mult ...) (...))).  */
526   if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
527       || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
528           || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
529     return 0;
530 
531   mul_result = XEXP (mul, 0);
532   mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
533   mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
534   mac_acc = XEXP (XEXP (mac, 1), 1);
535 
536   return (reg_overlap_mentioned_p (mul_result, mac_acc)
537           && !reg_overlap_mentioned_p (mul_result, mac_op0)
538           && !reg_overlap_mentioned_p (mul_result, mac_op1));
539 }
540