1 /* Dependency checks for instruction scheduling, shared between ARM and
2    AARCH64.
3 
4    Copyright (C) 1991-2016 Free Software Foundation, Inc.
5    Contributed by ARM Ltd.
6 
7    This file is part of GCC.
8 
9    GCC is free software; you can redistribute it and/or modify it
10    under the terms of the GNU General Public License as published
11    by the Free Software Foundation; either version 3, or (at your
12    option) any later version.
13 
14    GCC is distributed in the hope that it will be useful, but WITHOUT
15    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
17    License for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with GCC; see the file COPYING3.  If not see
21    <http://www.gnu.org/licenses/>.  */
22 
23 
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "rtl-iter.h"
30 
31 /* In ARMv8-A there's a general expectation that AESE/AESMC
32    and AESD/AESIMC sequences of the form:
33 
34    AESE Vn, _
35    AESMC Vn, Vn
36 
37    will issue both instructions in a single cycle on super-scalar
38    implementations.  This function identifies such pairs.  */
39 
40 int
aarch_crypto_can_dual_issue(rtx_insn * producer_insn,rtx_insn * consumer_insn)41 aarch_crypto_can_dual_issue (rtx_insn *producer_insn, rtx_insn *consumer_insn)
42 {
43   rtx producer_set, consumer_set;
44   rtx producer_src, consumer_src;
45 
46   producer_set = single_set (producer_insn);
47   consumer_set = single_set (consumer_insn);
48 
49   producer_src = producer_set ? SET_SRC (producer_set) : NULL;
50   consumer_src = consumer_set ? SET_SRC (consumer_set) : NULL;
51 
52   if (producer_src && consumer_src
53       && GET_CODE (producer_src) == UNSPEC && GET_CODE (consumer_src) == UNSPEC
54       && ((XINT (producer_src, 1) == UNSPEC_AESE
55            && XINT (consumer_src, 1) == UNSPEC_AESMC)
56           || (XINT (producer_src, 1) == UNSPEC_AESD
57               && XINT (consumer_src, 1) == UNSPEC_AESIMC)))
58   {
59     unsigned int regno = REGNO (SET_DEST (producer_set));
60 
61     /* Before reload the registers are virtual, so the destination of
62        consumer_set doesn't need to match.  */
63 
64     return (REGNO (SET_DEST (consumer_set)) == regno || !reload_completed)
65 	    && REGNO (XVECEXP (consumer_src, 0, 0)) == regno;
66   }
67 
68   return 0;
69 }
70 
71 /* Return TRUE if X is either an arithmetic shift left, or
72    is a multiplication by a power of two.  */
73 bool
arm_rtx_shift_left_p(rtx x)74 arm_rtx_shift_left_p (rtx x)
75 {
76   enum rtx_code code = GET_CODE (x);
77 
78   if (code == MULT && CONST_INT_P (XEXP (x, 1))
79       && exact_log2 (INTVAL (XEXP (x, 1))) > 0)
80     return true;
81 
82   if (code == ASHIFT)
83     return true;
84 
85   return false;
86 }
87 
88 static rtx_code shift_rtx_codes[] =
89   { ASHIFT, ROTATE, ASHIFTRT, LSHIFTRT,
90     ROTATERT, ZERO_EXTEND, SIGN_EXTEND };
91 
92 /* Traverse PATTERN looking for a sub-rtx with RTX_CODE CODE.
93    If FIND_ANY_SHIFT then we are interested in anything which can
94    reasonably be described as a SHIFT RTX.  */
95 static rtx
arm_find_sub_rtx_with_code(rtx pattern,rtx_code code,bool find_any_shift)96 arm_find_sub_rtx_with_code (rtx pattern, rtx_code code, bool find_any_shift)
97 {
98   subrtx_var_iterator::array_type array;
99   FOR_EACH_SUBRTX_VAR (iter, array, pattern, NONCONST)
100     {
101       rtx x = *iter;
102       if (find_any_shift)
103 	{
104 	  /* Left shifts might have been canonicalized to a MULT of some
105 	     power of two.  Make sure we catch them.  */
106 	  if (arm_rtx_shift_left_p (x))
107 	    return x;
108 	  else
109 	    for (unsigned int i = 0; i < ARRAY_SIZE (shift_rtx_codes); i++)
110 	      if (GET_CODE (x) == shift_rtx_codes[i])
111 		return x;
112 	}
113 
114       if (GET_CODE (x) == code)
115 	return x;
116     }
117   return NULL_RTX;
118 }
119 
120 /* Traverse PATTERN looking for any sub-rtx which looks like a shift.  */
121 static rtx
arm_find_shift_sub_rtx(rtx pattern)122 arm_find_shift_sub_rtx (rtx pattern)
123 {
124   return arm_find_sub_rtx_with_code (pattern, ASHIFT, true);
125 }
126 
127 /* PRODUCER and CONSUMER are two potentially dependant RTX.  PRODUCER
128    (possibly) contains a SET which will provide a result we can access
129    using the SET_DEST macro.  We will place the RTX which would be
130    written by PRODUCER in SET_SOURCE.
131    Similarly, CONSUMER (possibly) contains a SET which has an operand
132    we can access using SET_SRC.  We place this operand in
133    SET_DESTINATION.
134 
135    Return nonzero if we found the SET RTX we expected.  */
136 static int
arm_get_set_operands(rtx producer,rtx consumer,rtx * set_source,rtx * set_destination)137 arm_get_set_operands (rtx producer, rtx consumer,
138 		      rtx *set_source, rtx *set_destination)
139 {
140   rtx set_producer = arm_find_sub_rtx_with_code (PATTERN (producer),
141 						 SET, false);
142   rtx set_consumer = arm_find_sub_rtx_with_code (PATTERN (consumer),
143 						 SET, false);
144 
145   if (set_producer && set_consumer)
146     {
147       *set_source = SET_DEST (set_producer);
148       *set_destination = SET_SRC (set_consumer);
149       return 1;
150     }
151   return 0;
152 }
153 
154 bool
aarch_rev16_shright_mask_imm_p(rtx val,machine_mode mode)155 aarch_rev16_shright_mask_imm_p (rtx val, machine_mode mode)
156 {
157   return CONST_INT_P (val)
158          && INTVAL (val)
159             == trunc_int_for_mode (HOST_WIDE_INT_C (0xff00ff00ff00ff),
160                                    mode);
161 }
162 
163 bool
aarch_rev16_shleft_mask_imm_p(rtx val,machine_mode mode)164 aarch_rev16_shleft_mask_imm_p (rtx val, machine_mode mode)
165 {
166   return CONST_INT_P (val)
167          && INTVAL (val)
168             == trunc_int_for_mode (HOST_WIDE_INT_C (0xff00ff00ff00ff00),
169                                    mode);
170 }
171 
172 
173 static bool
aarch_rev16_p_1(rtx lhs,rtx rhs,machine_mode mode)174 aarch_rev16_p_1 (rtx lhs, rtx rhs, machine_mode mode)
175 {
176   if (GET_CODE (lhs) == AND
177          && GET_CODE (XEXP (lhs, 0)) == ASHIFT
178             && CONST_INT_P (XEXP (XEXP (lhs, 0), 1))
179             && INTVAL (XEXP (XEXP (lhs, 0), 1)) == 8
180             && REG_P (XEXP (XEXP (lhs, 0), 0))
181          && CONST_INT_P (XEXP (lhs, 1))
182       && GET_CODE (rhs) == AND
183          && GET_CODE (XEXP (rhs, 0)) == LSHIFTRT
184             && REG_P (XEXP (XEXP (rhs, 0), 0))
185             && CONST_INT_P (XEXP (XEXP (rhs, 0), 1))
186             && INTVAL (XEXP (XEXP (rhs, 0), 1)) == 8
187          && CONST_INT_P (XEXP (rhs, 1))
188       && REGNO (XEXP (XEXP (rhs, 0), 0)) == REGNO (XEXP (XEXP (lhs, 0), 0)))
189 
190     {
191       rtx lhs_mask = XEXP (lhs, 1);
192       rtx rhs_mask = XEXP (rhs, 1);
193 
194       return aarch_rev16_shright_mask_imm_p (rhs_mask, mode)
195              && aarch_rev16_shleft_mask_imm_p (lhs_mask, mode);
196     }
197 
198   return false;
199 }
200 
201 /* Recognise a sequence of bitwise operations corresponding to a rev16 operation.
202    These will be of the form:
203      ((x >> 8) & 0x00ff00ff)
204    | ((x << 8) & 0xff00ff00)
205    for SImode and with similar but wider bitmasks for DImode.
206    The two sub-expressions of the IOR can appear on either side so check both
207    permutations with the help of aarch_rev16_p_1 above.  */
208 
209 bool
aarch_rev16_p(rtx x)210 aarch_rev16_p (rtx x)
211 {
212   rtx left_sub_rtx, right_sub_rtx;
213   bool is_rev = false;
214 
215   if (GET_CODE (x) != IOR)
216     return false;
217 
218   left_sub_rtx = XEXP (x, 0);
219   right_sub_rtx = XEXP (x, 1);
220 
221   /* There are no canonicalisation rules for the position of the two shifts
222      involved in a rev, so try both permutations.  */
223   is_rev = aarch_rev16_p_1 (left_sub_rtx, right_sub_rtx, GET_MODE (x));
224 
225   if (!is_rev)
226     is_rev = aarch_rev16_p_1 (right_sub_rtx, left_sub_rtx, GET_MODE (x));
227 
228   return is_rev;
229 }
230 
231 /* Return nonzero if the CONSUMER instruction (a load) does need
232    PRODUCER's value to calculate the address.  */
233 int
arm_early_load_addr_dep(rtx producer,rtx consumer)234 arm_early_load_addr_dep (rtx producer, rtx consumer)
235 {
236   rtx value, addr;
237 
238   if (!arm_get_set_operands (producer, consumer, &value, &addr))
239     return 0;
240 
241   return reg_overlap_mentioned_p (value, addr);
242 }
243 
244 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
245    have an early register shift value or amount dependency on the
246    result of PRODUCER.  */
247 int
arm_no_early_alu_shift_dep(rtx producer,rtx consumer)248 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
249 {
250   rtx value, op;
251   rtx early_op;
252 
253   if (!arm_get_set_operands (producer, consumer, &value, &op))
254     return 0;
255 
256   if ((early_op = arm_find_shift_sub_rtx (op)))
257     {
258       if (REG_P (early_op))
259 	early_op = op;
260 
261       return !reg_overlap_mentioned_p (value, early_op);
262     }
263 
264   return 0;
265 }
266 
267 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
268    have an early register shift value dependency on the result of
269    PRODUCER.  */
270 int
arm_no_early_alu_shift_value_dep(rtx producer,rtx consumer)271 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
272 {
273   rtx value, op;
274   rtx early_op;
275 
276   if (!arm_get_set_operands (producer, consumer, &value, &op))
277     return 0;
278 
279   if ((early_op = arm_find_shift_sub_rtx (op)))
280     /* We want to check the value being shifted.  */
281     if (!reg_overlap_mentioned_p (value, XEXP (early_op, 0)))
282       return 1;
283 
284   return 0;
285 }
286 
287 /* Return nonzero if the CONSUMER (a mul or mac op) does not
288    have an early register mult dependency on the result of
289    PRODUCER.  */
290 int
arm_no_early_mul_dep(rtx producer,rtx consumer)291 arm_no_early_mul_dep (rtx producer, rtx consumer)
292 {
293   rtx value, op;
294 
295   if (!arm_get_set_operands (producer, consumer, &value, &op))
296     return 0;
297 
298   if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
299     {
300       if (GET_CODE (XEXP (op, 0)) == MULT)
301 	return !reg_overlap_mentioned_p (value, XEXP (op, 0));
302       else
303 	return !reg_overlap_mentioned_p (value, XEXP (op, 1));
304     }
305 
306   return 0;
307 }
308 
309 /* Return nonzero if the CONSUMER instruction (a store) does not need
310    PRODUCER's value to calculate the address.  */
311 
312 int
arm_no_early_store_addr_dep(rtx producer,rtx consumer)313 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
314 {
315   rtx value = arm_find_sub_rtx_with_code (PATTERN (producer), SET, false);
316   rtx addr = arm_find_sub_rtx_with_code (PATTERN (consumer), SET, false);
317 
318   if (value)
319     value = SET_DEST (value);
320 
321   if (addr)
322     addr = SET_DEST (addr);
323 
324   if (!value || !addr)
325     return 0;
326 
327   return !reg_overlap_mentioned_p (value, addr);
328 }
329 
330 /* Return nonzero if the CONSUMER instruction (a store) does need
331    PRODUCER's value to calculate the address.  */
332 
333 int
arm_early_store_addr_dep(rtx producer,rtx consumer)334 arm_early_store_addr_dep (rtx producer, rtx consumer)
335 {
336   return !arm_no_early_store_addr_dep (producer, consumer);
337 }
338 
339 /* Return non-zero iff the consumer (a multiply-accumulate or a
340    multiple-subtract instruction) has an accumulator dependency on the
341    result of the producer and no other dependency on that result.  It
342    does not check if the producer is multiply-accumulate instruction.  */
343 int
arm_mac_accumulator_is_result(rtx producer,rtx consumer)344 arm_mac_accumulator_is_result (rtx producer, rtx consumer)
345 {
346   rtx result;
347   rtx op0, op1, acc;
348 
349   producer = PATTERN (producer);
350   consumer = PATTERN (consumer);
351 
352   if (GET_CODE (producer) == COND_EXEC)
353     producer = COND_EXEC_CODE (producer);
354   if (GET_CODE (consumer) == COND_EXEC)
355     consumer = COND_EXEC_CODE (consumer);
356 
357   if (GET_CODE (producer) != SET)
358     return 0;
359 
360   result = XEXP (producer, 0);
361 
362   if (GET_CODE (consumer) != SET)
363     return 0;
364 
365   /* Check that the consumer is of the form
366      (set (...) (plus (mult ...) (...)))
367      or
368      (set (...) (minus (...) (mult ...))).  */
369   if (GET_CODE (XEXP (consumer, 1)) == PLUS)
370     {
371       if (GET_CODE (XEXP (XEXP (consumer, 1), 0)) != MULT)
372         return 0;
373 
374       op0 = XEXP (XEXP (XEXP (consumer, 1), 0), 0);
375       op1 = XEXP (XEXP (XEXP (consumer, 1), 0), 1);
376       acc = XEXP (XEXP (consumer, 1), 1);
377     }
378   else if (GET_CODE (XEXP (consumer, 1)) == MINUS)
379     {
380       if (GET_CODE (XEXP (XEXP (consumer, 1), 1)) != MULT)
381         return 0;
382 
383       op0 = XEXP (XEXP (XEXP (consumer, 1), 1), 0);
384       op1 = XEXP (XEXP (XEXP (consumer, 1), 1), 1);
385       acc = XEXP (XEXP (consumer, 1), 0);
386     }
387   else
388     return 0;
389 
390   return (reg_overlap_mentioned_p (result, acc)
391           && !reg_overlap_mentioned_p (result, op0)
392           && !reg_overlap_mentioned_p (result, op1));
393 }
394 
395 /* Return non-zero if the destination of PRODUCER feeds the accumulator
396    operand of an MLA-like operation.  */
397 
398 int
aarch_accumulator_forwarding(rtx_insn * producer,rtx_insn * consumer)399 aarch_accumulator_forwarding (rtx_insn *producer, rtx_insn *consumer)
400 {
401   rtx producer_set = single_set (producer);
402   rtx consumer_set = single_set (consumer);
403 
404   /* We are looking for a SET feeding a SET.  */
405   if (!producer_set || !consumer_set)
406     return 0;
407 
408   rtx dest = SET_DEST (producer_set);
409   rtx mla = SET_SRC (consumer_set);
410 
411   /* We're looking for a register SET.  */
412   if (!REG_P (dest))
413     return 0;
414 
415   rtx accumulator;
416 
417   /* Strip a zero_extend.  */
418   if (GET_CODE (mla) == ZERO_EXTEND)
419     mla = XEXP (mla, 0);
420 
421   switch (GET_CODE (mla))
422     {
423     case PLUS:
424       /* Possibly an MADD.  */
425       if (GET_CODE (XEXP (mla, 0)) == MULT)
426 	accumulator = XEXP (mla, 1);
427       else
428 	return 0;
429       break;
430     case MINUS:
431       /* Possibly an MSUB.  */
432       if (GET_CODE (XEXP (mla, 1)) == MULT)
433 	accumulator = XEXP (mla, 0);
434       else
435 	return 0;
436       break;
437     case FMA:
438 	{
439 	  /* Possibly an FMADD/FMSUB/FNMADD/FNMSUB.  */
440 	  if (REG_P (XEXP (mla, 1))
441 	      && REG_P (XEXP (mla, 2))
442 	      && (REG_P (XEXP (mla, 0))
443 		  || GET_CODE (XEXP (mla, 0)) == NEG))
444 
445 	    {
446 	      /* FMADD/FMSUB.  */
447 	      accumulator = XEXP (mla, 2);
448 	    }
449 	  else if (REG_P (XEXP (mla, 1))
450 		   && GET_CODE (XEXP (mla, 2)) == NEG
451 		   && (REG_P (XEXP (mla, 0))
452 		       || GET_CODE (XEXP (mla, 0)) == NEG))
453 	    {
454 	      /* FNMADD/FNMSUB.  */
455 	      accumulator = XEXP (XEXP (mla, 2), 0);
456 	    }
457 	  else
458 	    return 0;
459 	  break;
460 	}
461       default:
462 	/* Not an MLA-like operation.  */
463 	return 0;
464     }
465 
466   if (GET_CODE (accumulator) == SUBREG)
467     accumulator = SUBREG_REG (accumulator);
468 
469   if (!REG_P (accumulator))
470     return 0;
471 
472   return (REGNO (dest) == REGNO (accumulator));
473 }
474 
475 /* Return nonzero if the CONSUMER instruction is some sort of
476    arithmetic or logic + shift operation, and the register we are
477    writing in PRODUCER is not used in a register shift by register
478    operation.  */
479 
480 int
aarch_forward_to_shift_is_not_shifted_reg(rtx_insn * producer,rtx_insn * consumer)481 aarch_forward_to_shift_is_not_shifted_reg (rtx_insn *producer,
482 					   rtx_insn *consumer)
483 {
484   rtx value, op;
485   rtx early_op;
486 
487   if (!arm_get_set_operands (producer, consumer, &value, &op))
488     return 0;
489 
490   if ((early_op = arm_find_shift_sub_rtx (op)))
491     {
492       if (REG_P (early_op))
493 	early_op = op;
494 
495       /* Any other canonicalisation of a shift is a shift-by-constant
496 	 so we don't care.  */
497       if (GET_CODE (early_op) == ASHIFT)
498 	return (!REG_P (XEXP (early_op, 0))
499 		|| !REG_P (XEXP (early_op, 1)));
500       else
501 	return 1;
502     }
503 
504   return 0;
505 }
506 
507 /* Return non-zero if the consumer (a multiply-accumulate instruction)
508    has an accumulator dependency on the result of the producer (a
509    multiplication instruction) and no other dependency on that result.  */
510 int
arm_mac_accumulator_is_mul_result(rtx producer,rtx consumer)511 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
512 {
513   rtx mul = PATTERN (producer);
514   rtx mac = PATTERN (consumer);
515   rtx mul_result;
516   rtx mac_op0, mac_op1, mac_acc;
517 
518   if (GET_CODE (mul) == COND_EXEC)
519     mul = COND_EXEC_CODE (mul);
520   if (GET_CODE (mac) == COND_EXEC)
521     mac = COND_EXEC_CODE (mac);
522 
523   /* Check that mul is of the form (set (...) (mult ...))
524      and mla is of the form (set (...) (plus (mult ...) (...))).  */
525   if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
526       || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
527           || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
528     return 0;
529 
530   mul_result = XEXP (mul, 0);
531   mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
532   mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
533   mac_acc = XEXP (XEXP (mac, 1), 1);
534 
535   return (reg_overlap_mentioned_p (mul_result, mac_acc)
536           && !reg_overlap_mentioned_p (mul_result, mac_op0)
537           && !reg_overlap_mentioned_p (mul_result, mac_op1));
538 }
539