1 /* FMA steering optimization pass for Cortex-A57.
2    Copyright (C) 2015-2020 Free Software Foundation, Inc.
3    Contributed by ARM Ltd.
4 
5    This file is part of GCC.
6 
7    GCC is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11 
12    GCC is distributed in the hope that it will be useful, but
13    WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15    General Public License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with GCC; see the file COPYING3.  If not see
19    <http://www.gnu.org/licenses/>.  */
20 
21 #define IN_TARGET_CODE 1
22 
23 #include "config.h"
24 #define INCLUDE_LIST
25 #include "system.h"
26 #include "coretypes.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "df.h"
31 #include "insn-config.h"
32 #include "regs.h"
33 #include "memmodel.h"
34 #include "emit-rtl.h"
35 #include "recog.h"
36 #include "cfganal.h"
37 #include "insn-attr.h"
38 #include "context.h"
39 #include "tree-pass.h"
40 #include "function-abi.h"
41 #include "regrename.h"
42 #include "aarch64-protos.h"
43 
44 /* For better performance, the destination of FMADD/FMSUB instructions should
45    have the same parity as their accumulator register if the accumulator
46    contains the result of a previous FMUL or FMADD/FMSUB instruction if
47    targetting Cortex-A57 processors.  Performance is also increased by
48    otherwise keeping a good balance in the parity of the destination register
49    of FMUL or FMADD/FMSUB.
50 
51    This pass ensure that registers are renamed so that these conditions hold.
52    We reuse the existing register renaming facility from regrename.c to build
53    dependency chains and expose candidate registers for renaming.
54 
55 
56    The algorithm has three steps:
57 
58    First, the functions of the register renaming pass are called.  These
59    analyze the instructions and produce a list of def/use chains of
60    instructions.
61 
62    Next, this information is used to build trees of multiply and
63    multiply-accumulate instructions.  The roots of these trees are any
64    multiply, or any multiply-accumulate whose accumulator is not dependent on
65    a multiply or multiply-accumulate instruction.  A child is added to the
66    tree where a dependency chain exists between the result of the parent
67    instruction and the accumulator operand of the child, as in the diagram
68    below:
69 
70 		 fmul s2, s0, s1
71 		/		\
72    fmadd s0, s1, s1, s2   fmadd s4, s1, s1 s2
73 	    |
74    fmadd s3, s1, s1, s0
75 
76    Trees made of a single instruction are permitted.
77 
78    Finally, renaming is performed.  The parity of the destination register at
79    the root of a tree is checked against the current balance of multiply and
80    multiply-accumulate on each pipeline.  If necessary, the root of a tree is
81    renamed, in which case the rest of the tree is then renamed to keep the same
82    parity in the destination registers of all instructions in the tree.  */
83 
84 
85 
86 /* Forward declarations.  */
87 class fma_node;
88 class fma_root_node;
89 class func_fma_steering;
90 
91 /* Dependencies between FMUL or FMADD/FMSUB instructions and subsequent
92    FMADD/FMSUB instructions form a graph.  This is because alternatives can
93    make a register be set by several FMUL or FMADD/FMSUB instructions in
94    different basic blocks and because of loops.  For ease of browsing, the
95    connected components of this graph are broken up into forests of trees.
96    Forests are represented by fma_forest objects, contained in the fma_forests
97    list.  Using a separate object for the forests allows for a better use of
98    memory as there is some information that is global to each forest, such as
99    the number of FMSUB and FMADD/FMSUB instructions currently scheduled on each
100    floating-point execution pipelines.  */
101 
102 class fma_forest
103 {
104 public:
105   fma_forest (func_fma_steering *, fma_root_node *, int);
106   ~fma_forest ();
107 
108   int get_id ();
109   std::list<fma_root_node *> *get_roots ();
110   func_fma_steering *get_globals ();
111   int get_target_parity ();
112   void fma_node_created (fma_node *);
113   void merge_forest (fma_forest *);
114   void dump_info ();
115   void dispatch ();
116 
117 private:
118   /* Prohibit copy construction.  */
119   fma_forest (const fma_forest &);
120 
121   /* The list of roots that form this forest.  */
122   std::list<fma_root_node *> *m_roots;
123 
124   /* Target parity the destination register of all FMUL and FMADD/FMSUB
125      instructions in this forest should have.  */
126   int m_target_parity;
127 
128   /* Link to the instance of func_fma_steering holding data related to the
129      FMA steering of the current function (cfun).  */
130   func_fma_steering *m_globals;
131 
132   /* Identifier for the forest (used for dumps).  */
133   int m_id;
134 
135   /* Total number of nodes in the forest (for statistics).  */
136   int m_nb_nodes;
137 };
138 
139 class fma_node
140 {
141 public:
142   fma_node (fma_node *parent, du_chain *chain);
143   ~fma_node ();
144 
145   bool root_p ();
146   fma_forest *get_forest ();
147   std::list<fma_node *> *get_children ();
148   rtx_insn *get_insn ();
149   void add_child (fma_node *);
150   int get_parity ();
151   void set_head (du_head *);
152   void rename (fma_forest *);
153   void dump_info (fma_forest *);
154 
155 private:
156   /* Prohibit copy construction.  */
157   fma_node (const fma_node &);
158 
159 protected:
160   /* Root node that lead to this node.  */
161   fma_root_node *m_root;
162 
163   /* The parent node of this node.  If the node belong to a chain with several
164      parent nodes, the first one encountered in a depth-first search is chosen
165      as canonical parent.  */
166   fma_node *m_parent;
167 
168   /* The list of child nodes.  If a chain contains several parent nodes, one is
169      chosen as canonical parent and the others will have no children.  */
170   std::list<fma_node *> *m_children;
171 
172   /* The associated DU_HEAD chain that the insn represented by this object
173      is (one of) the root of.  When a chain contains several roots, the non
174      canonical ones have this field set to NULL.  */
175   struct du_head *m_head;
176 
177   /* The FMUL or FMADD/FMSUB instruction this object corresponds to.  */
178   rtx_insn *m_insn;
179 };
180 
181 class fma_root_node : public fma_node
182 {
183 public:
184   fma_root_node (func_fma_steering *, du_chain *, int);
185 
186   fma_forest *get_forest ();
187   void set_forest (fma_forest *);
188   void dump_info (fma_forest *);
189 
190 private:
191   /* The forest this node belonged to when it was created.  */
192   fma_forest *m_forest;
193 };
194 
195 /* Class holding all data and methods relative to the FMA steering of a given
196    function.  The FMA steering pass could then run in parallel for different
197    functions.  */
198 
199 class func_fma_steering
200 {
201 public:
202   func_fma_steering ();
203   ~func_fma_steering ();
204 
205   int get_fpu_balance ();
206   void remove_forest (fma_forest *);
207   bool put_node (fma_node *);
208   void update_balance (int);
209   fma_node *get_fma_node (rtx_insn *);
210   void analyze_fma_fmul_insn (fma_forest *, du_chain *, du_head_p);
211   void execute_fma_steering ();
212 
213 private:
214   /* Prohibit copy construction.  */
215   func_fma_steering (const func_fma_steering &);
216 
217   void dfs (void (*) (fma_forest *), void (*) (fma_forest *, fma_root_node *),
218 	    void (*) (fma_forest *, fma_node *), bool);
219   void analyze ();
220   void rename_fma_trees ();
221 
222   /* Mapping between FMUL or FMADD/FMSUB instructions and the associated
223      fma_node object.  Used when analyzing an instruction that is a root of
224      a chain to find if such an object was created because this instruction
225      is also a use in another chain.  */
226   hash_map<rtx_insn *, fma_node *> *m_insn_fma_head_map;
227 
228   /* A list of all the forests in a given function.  */
229   std::list<fma_forest *> m_fma_forests;
230 
231   /* Balance of FMUL and FMADD/FMSUB instructions between the two FPU
232      pipelines:
233      < 0: more instruction dispatched to the first pipeline
234      == 0: perfect balance
235      > 0: more instruction dispatched to the second pipeline.  */
236   int m_fpu_balance;
237 
238   /* Identifier for the next forest created.  */
239   int m_next_forest_id;
240 };
241 
242 /* Rename the register HEAD->regno in all the insns in the chain HEAD to any
243    register not in the set UNAVAILABLE.  Adapted from rename_chains in
244    regrename.c.  */
245 
246 static bool
rename_single_chain(du_head_p head,HARD_REG_SET * unavailable)247 rename_single_chain (du_head_p head, HARD_REG_SET *unavailable)
248 {
249   int best_new_reg;
250   int n_uses = 0;
251   struct du_chain *tmp;
252   int reg = head->regno;
253   enum reg_class super_class = NO_REGS;
254 
255   if (head->cannot_rename)
256     return false;
257 
258   if (fixed_regs[reg] || global_regs[reg]
259       || (frame_pointer_needed && reg == HARD_FRAME_POINTER_REGNUM))
260     return false;
261 
262   /* Iterate over elements in the chain in order to:
263      1. Count number of uses, and narrow the set of registers we can
264 	use for renaming.
265      2. Compute the superunion of register classes in this chain.  */
266   for (tmp = head->first; tmp; tmp = tmp->next_use)
267     {
268       if (DEBUG_INSN_P (tmp->insn))
269 	continue;
270       n_uses++;
271       *unavailable |= ~reg_class_contents[tmp->cl];
272       super_class = reg_class_superunion[(int) super_class][(int) tmp->cl];
273     }
274 
275   if (n_uses < 1)
276     return false;
277 
278   best_new_reg = find_rename_reg (head, super_class, unavailable, reg,
279 				  false);
280 
281   if (dump_file)
282     {
283       fprintf (dump_file, "Register %s in insn %d", reg_names[reg],
284 	       INSN_UID (head->first->insn));
285       if (head->call_abis)
286 	fprintf (dump_file, " crosses a call");
287     }
288 
289   if (best_new_reg == reg)
290     {
291       if (dump_file)
292 	fprintf (dump_file, "; no available better choice\n");
293       return false;
294     }
295 
296   if (regrename_do_replace (head, best_new_reg))
297     {
298       if (dump_file)
299 	fprintf (dump_file, ", renamed as %s\n", reg_names[best_new_reg]);
300       df_set_regs_ever_live (best_new_reg, true);
301     }
302   else
303     {
304       if (dump_file)
305 	fprintf (dump_file, ", renaming as %s failed\n",
306 		 reg_names[best_new_reg]);
307       return false;
308     }
309   return true;
310 }
311 
312 /* Return whether T is the attribute of a FMADD/FMSUB-like instruction.  */
313 
314 static bool
is_fmac_op(enum attr_type t)315 is_fmac_op (enum attr_type t)
316 {
317   return (t == TYPE_FMACS) || (t == TYPE_FMACD) || (t == TYPE_NEON_FP_MLA_S);
318 }
319 
320 /* Return whether T is the attribute of a FMUL instruction.  */
321 
322 static bool
is_fmul_op(enum attr_type t)323 is_fmul_op (enum attr_type t)
324 {
325   return (t == TYPE_FMULS) || (t == TYPE_FMULD) || (t == TYPE_NEON_FP_MUL_S);
326 }
327 
328 /* Return whether INSN is an FMUL (if FMUL_OK is true) or FMADD/FMSUB
329    instruction.  */
330 
331 static bool
is_fmul_fmac_insn(rtx_insn * insn,bool fmul_ok)332 is_fmul_fmac_insn (rtx_insn *insn, bool fmul_ok)
333 {
334   enum attr_type t;
335 
336   if (!NONDEBUG_INSN_P (insn))
337     return false;
338 
339   if (recog_memoized (insn) < 0)
340     return false;
341 
342   /* Only consider chain(s) this instruction is a root of if this is an FMUL or
343      FMADD/FMSUB instruction.  This allows to avoid browsing chains of all
344      instructions for FMUL or FMADD/FMSUB in them.  */
345   t = get_attr_type (insn);
346   return is_fmac_op (t) || (fmul_ok && is_fmul_op (t));
347 }
348 
349 
350 /*
351  * Class fma_forest method definitions.
352  */
353 
fma_forest(func_fma_steering * fma_steer,fma_root_node * fma_root,int id)354 fma_forest::fma_forest (func_fma_steering *fma_steer, fma_root_node *fma_root,
355 			int id)
356 {
357       memset (this, 0, sizeof (*this));
358       this->m_globals = fma_steer;
359       this->m_roots = new std::list<fma_root_node *>;
360       this->m_roots->push_back (fma_root);
361       this->m_id = id;
362 }
363 
~fma_forest()364 fma_forest::~fma_forest ()
365 {
366   delete this->m_roots;
367 }
368 
369 int
get_id()370 fma_forest::get_id ()
371 {
372   return this->m_id;
373 }
374 
375 std::list<fma_root_node *> *
get_roots()376 fma_forest::get_roots ()
377 {
378   return this->m_roots;
379 }
380 
381 func_fma_steering *
get_globals()382 fma_forest::get_globals ()
383 {
384   return this->m_globals;
385 }
386 
387 int
get_target_parity()388 fma_forest::get_target_parity ()
389 {
390   return this->m_target_parity;
391 }
392 
393 /* Act on the creation of NODE by updating statistics in FOREST and adding an
394    entry for it in the func_fma_steering hashmap.  */
395 
fma_node_created(fma_node * node)396 void fma_forest::fma_node_created (fma_node *node)
397 {
398   bool created = !this->m_globals->put_node (node);
399 
400   gcc_assert (created);
401   this->m_nb_nodes++;
402 }
403 
404 /* Merge REF_FOREST and OTHER_FOREST together, making REF_FOREST the canonical
405    fma_forest object to represent both.  */
406 
407 void
merge_forest(fma_forest * other_forest)408 fma_forest::merge_forest (fma_forest *other_forest)
409 {
410   std::list<fma_root_node *> *other_roots;
411   std::list<fma_root_node *>::iterator other_root_iter;
412 
413   if (this == other_forest)
414     return;
415 
416   other_roots = other_forest->m_roots;
417 
418   /* Update root nodes' pointer to forest.  */
419   for (other_root_iter = other_roots->begin ();
420        other_root_iter != other_roots->end (); ++other_root_iter)
421     (*other_root_iter)->set_forest (this);
422 
423   /* Remove other_forest from the list of forests and move its tree roots in
424      the list of tree roots of ref_forest.  */
425   this->m_globals->remove_forest (other_forest);
426   this->m_roots->splice (this->m_roots->begin (), *other_roots);
427   this->m_nb_nodes += other_forest->m_nb_nodes;
428 
429   delete other_forest;
430 }
431 
432 /* Dump information about the forest FOREST.  */
433 
434 void
dump_info()435 fma_forest::dump_info ()
436 {
437   gcc_assert (dump_file);
438 
439   fprintf (dump_file, "Forest #%d has %d nodes\n", this->m_id,
440 	   this->m_nb_nodes);
441 }
442 
443 /* Wrapper around fma_forest::dump_info for use as parameter of function
444    pointer type in func_fma_steering::dfs.  */
445 
446 static void
dump_forest_info(fma_forest * forest)447 dump_forest_info (fma_forest *forest)
448 {
449   forest->dump_info ();
450 }
451 
452 /* Dispatch forest to the least utilized pipeline.  */
453 
454 void
dispatch()455 fma_forest::dispatch ()
456 {
457   this->m_target_parity = this->m_roots->front ()->get_parity ();
458   int fpu_balance = this->m_globals->get_fpu_balance ();
459   if (fpu_balance != 0)
460     this->m_target_parity = (fpu_balance < 0);
461 
462   if (dump_file)
463     fprintf (dump_file, "Target parity for forest #%d: %s\n", this->m_id,
464 	     this->m_target_parity ? "odd" : "even");
465 }
466 
467 /* Wrapper around fma_forest::dispatch for use as parameter of function pointer
468    type in func_fma_steering::dfs.  */
469 
470 static void
dispatch_forest(fma_forest * forest)471 dispatch_forest (fma_forest *forest)
472 {
473   forest->dispatch ();
474 }
475 
fma_node(fma_node * parent,du_chain * chain)476 fma_node::fma_node (fma_node *parent, du_chain *chain)
477 {
478   memset (this, 0, sizeof (*this));
479   this->m_parent = parent;
480   this->m_children = new std::list<fma_node *>;
481   this->m_insn = chain->insn;
482   /* root_p () cannot be used to check for root before root is set.  */
483   if (this->m_parent == this)
484     this->m_root = static_cast<fma_root_node *> (parent);
485   else
486     {
487       this->m_root = parent->m_root;
488       this->get_forest ()->fma_node_created (this);
489     }
490 }
491 
~fma_node()492 fma_node::~fma_node ()
493 {
494   delete this->m_children;
495 }
496 
497 std::list<fma_node *> *
get_children()498 fma_node::get_children ()
499 {
500   return this->m_children;
501 }
502 
503 rtx_insn *
get_insn()504 fma_node::get_insn ()
505 {
506   return this->m_insn;
507 }
508 
509 void
set_head(du_head * head)510 fma_node::set_head (du_head *head)
511 {
512   gcc_assert (!this->m_head);
513   this->m_head = head;
514 }
515 
516 /* Add a child to this node in the list of children.  */
517 
518 void
add_child(fma_node * child)519 fma_node::add_child (fma_node *child)
520 {
521   this->m_children->push_back (child);
522 }
523 
524 /* Return the parity of the destination register of the instruction represented
525    by this node.  */
526 
527 int
get_parity()528 fma_node::get_parity ()
529 {
530   return this->m_head->regno % 2;
531 }
532 
533 /* Get the actual forest associated with a non root node as the one the node
534    points to might have been merged into another one.  In that case the pointer
535    in the root nodes are updated so we return the forest pointer of a root node
536    pointed to by the initial forest.  Despite being a oneliner, this method is
537    defined here as it references a method from fma_root_node.  */
538 
539 fma_forest *
get_forest()540 fma_node::get_forest ()
541 {
542   return this->m_root->get_forest ();
543 }
544 
545 /* Return whether a node is a root node.  */
546 
547 bool
root_p()548 fma_node::root_p ()
549 {
550   return this->m_root == this;
551 }
552 
553 /* Dump information about the children of node FMA_NODE in forest FOREST.  */
554 
555 void
dump_info(ATTRIBUTE_UNUSED fma_forest * forest)556 fma_node::dump_info (ATTRIBUTE_UNUSED fma_forest *forest)
557 {
558   struct du_chain *chain;
559   std::list<fma_node *>::iterator fma_child;
560 
561   gcc_assert (dump_file);
562 
563   if (this->get_children ()->empty ())
564     return;
565 
566   fprintf (dump_file, "Instruction(s)");
567   for (chain = this->m_head->first; chain; chain = chain->next_use)
568     {
569       if (!is_fmul_fmac_insn (chain->insn, true))
570 	continue;
571 
572       if (chain->loc != &SET_DEST (PATTERN (chain->insn)))
573 	continue;
574 
575       fprintf (dump_file, " %d", INSN_UID (chain->insn));
576     }
577 
578   fprintf (dump_file, " is(are) accumulator dependency of instructions");
579   for (fma_child = this->get_children ()->begin ();
580        fma_child != this->get_children ()->end (); fma_child++)
581     fprintf (dump_file, " %d", INSN_UID ((*fma_child)->m_insn));
582   fprintf (dump_file, "\n");
583 }
584 
585 /* Wrapper around fma_node::dump_info for use as parameter of function pointer
586    type in func_fma_steering::dfs.  */
587 
588 static void
dump_tree_node_info(fma_forest * forest,fma_node * node)589 dump_tree_node_info (fma_forest *forest, fma_node *node)
590 {
591   node->dump_info (forest);
592 }
593 
594 /* Rename the destination register of a single FMUL or FMADD/FMSUB instruction
595    represented by FMA_NODE to a register that respect the target parity for
596    FOREST or with same parity of the instruction represented by its parent node
597    if it has one.  */
598 
599 void
rename(fma_forest * forest)600 fma_node::rename (fma_forest *forest)
601 {
602   int cur_parity, target_parity;
603 
604   /* This is alternate root of a chain and thus has no children.  It will be
605      renamed when processing the canonical root for that chain.  */
606   if (!this->m_head)
607     return;
608 
609   target_parity = forest->get_target_parity ();
610   if (this->m_parent)
611     target_parity = this->m_parent->get_parity ();
612   cur_parity = this->get_parity ();
613 
614   /* Rename if parity differs.  */
615   if (cur_parity != target_parity)
616     {
617       rtx_insn *insn = this->m_insn;
618       HARD_REG_SET unavailable;
619       machine_mode mode;
620       int reg;
621 
622       if (dump_file)
623 	{
624 	  unsigned cur_dest_reg = this->m_head->regno;
625 
626 	  fprintf (dump_file, "FMA or FMUL at insn %d but destination "
627 		   "register (%s) has different parity from expected to "
628 		   "maximize FPU pipeline utilization\n", INSN_UID (insn),
629 		   reg_names[cur_dest_reg]);
630 	}
631 
632       /* Don't clobber traceback for noreturn functions.  */
633       CLEAR_HARD_REG_SET (unavailable);
634       if (frame_pointer_needed)
635 	{
636 	  add_to_hard_reg_set (&unavailable, Pmode, FRAME_POINTER_REGNUM);
637 	  add_to_hard_reg_set (&unavailable, Pmode, HARD_FRAME_POINTER_REGNUM);
638 	}
639 
640       /* Exclude registers with wrong parity.  */
641       mode = GET_MODE (SET_DEST (PATTERN (insn)));
642       for (reg = cur_parity; reg < FIRST_PSEUDO_REGISTER; reg += 2)
643 	add_to_hard_reg_set (&unavailable, mode, reg);
644 
645       if (!rename_single_chain (this->m_head, &unavailable))
646 	{
647 	  if (dump_file)
648 	    fprintf (dump_file, "Destination register of insn %d could not be "
649 		     "renamed. Dependent FMA insns will use this parity from "
650 		     "there on.\n", INSN_UID (insn));
651 	}
652       else
653 	cur_parity = target_parity;
654     }
655 
656   forest->get_globals ()->update_balance (cur_parity);
657 }
658 
659 /* Wrapper around fma_node::dump_info for use as parameter of function pointer
660    type in func_fma_steering::dfs.  */
661 
662 static void
rename_fma_node(fma_forest * forest,fma_node * node)663 rename_fma_node (fma_forest *forest, fma_node *node)
664 {
665   node->rename (forest);
666 }
667 
fma_root_node(func_fma_steering * globals,du_chain * chain,int id)668 fma_root_node::fma_root_node (func_fma_steering *globals, du_chain *chain,
669 			      int id) : fma_node (this, chain)
670 {
671   this->m_forest = new fma_forest (globals, this, id);
672   this->m_forest->fma_node_created (this);
673 }
674 
675 fma_forest *
get_forest()676 fma_root_node::get_forest ()
677 {
678   return this->m_forest;
679 }
680 
681 void
set_forest(fma_forest * ref_forest)682 fma_root_node::set_forest (fma_forest *ref_forest)
683 {
684   this->m_forest = ref_forest;
685 }
686 
687 /* Dump information about the roots of forest FOREST.  */
688 
689 void
dump_info(fma_forest * forest)690 fma_root_node::dump_info (fma_forest *forest)
691 {
692   gcc_assert (dump_file);
693 
694   if (this == forest->get_roots ()->front ())
695     fprintf (dump_file, "Instruction(s) at root of forest #%d:",
696 	     forest->get_id ());
697   fprintf (dump_file, " %d", INSN_UID (this->m_insn));
698   if (this == forest->get_roots ()->back ())
699     fprintf (dump_file, "\n");
700 }
701 
702 /* Wrapper around fma_root_node::dump_info for use as parameter of function
703    pointer type in func_fma_steering::dfs.  */
704 
705 static void
dump_tree_root_info(fma_forest * forest,fma_root_node * node)706 dump_tree_root_info (fma_forest *forest, fma_root_node *node)
707 {
708   node->dump_info (forest);
709 }
710 
func_fma_steering()711 func_fma_steering::func_fma_steering () : m_fpu_balance (0)
712 {
713   this->m_insn_fma_head_map = new hash_map<rtx_insn *, fma_node *>;
714   this->m_fma_forests.clear ();
715   this->m_next_forest_id = 0;
716 }
717 
~func_fma_steering()718 func_fma_steering::~func_fma_steering ()
719 {
720   delete this->m_insn_fma_head_map;
721 }
722 
723 int
get_fpu_balance()724 func_fma_steering::get_fpu_balance ()
725 {
726   return this->m_fpu_balance;
727 }
728 
729 void
remove_forest(fma_forest * forest)730 func_fma_steering::remove_forest (fma_forest *forest)
731 {
732   this->m_fma_forests.remove (forest);
733 }
734 
735 /* Memorize the mapping of this instruction to its fma_node object and return
736    whether such a mapping existed.  */
737 
738 bool
put_node(fma_node * node)739 func_fma_steering::put_node (fma_node *node)
740 {
741   return this->m_insn_fma_head_map->put (node->get_insn (), node);
742 }
743 
744 /* Update the current balance considering a node with the given PARITY.  */
745 
746 void
update_balance(int parity)747 func_fma_steering::update_balance (int parity)
748 {
749   this->m_fpu_balance = parity ? this->m_fpu_balance + 1
750 			       : this->m_fpu_balance - 1;
751 }
752 
753 /* Return whether an fma_node object exists for instruction INSN and, if not,
754    allocate one in *RET.  */
755 
756 fma_node *
get_fma_node(rtx_insn * insn)757 func_fma_steering::get_fma_node (rtx_insn *insn)
758 {
759   fma_node **fma_slot;
760 
761   fma_slot = this->m_insn_fma_head_map->get (insn);
762   if (fma_slot)
763     return *fma_slot;
764   return NULL;
765 }
766 
767 /* Allocate and initialize fma_node objects for the FMUL or FMADD/FMSUB
768    instruction in CHAIN->insn and its dependent FMADD/FMSUB instructions, all
769    part of FOREST.  For the children, the associated head is left untouched
770    (and thus null) as this function will be called again when considering the
771    chain where they are def.  For the parent, the chain is given in HEAD.  */
772 
773 void
analyze_fma_fmul_insn(fma_forest * ref_forest,du_chain * chain,du_head_p head)774 func_fma_steering::analyze_fma_fmul_insn (fma_forest *ref_forest,
775 					  du_chain *chain, du_head_p head)
776 {
777   fma_forest *forest;
778   fma_node *node = this->get_fma_node (chain->insn);
779 
780   /* This is a root node.  */
781   if (!node)
782     {
783       fma_root_node *root_node;
784 
785       root_node = new fma_root_node (this, chain, this->m_next_forest_id++);
786       forest = root_node->get_forest ();
787       node = root_node;
788 
789       /* Until proved otherwise, assume this root is not part of an existing
790 	 forest and thus add its forest to the list of forests.  */
791       this->m_fma_forests.push_back (forest);
792     }
793   else
794     forest = node->get_forest ();
795 
796   node->set_head (head);
797 
798   /* fma_node is part of a chain with several defs, one of them having already
799      been processed.  The root of that already processed def is the canonical
800      one and the root of fma_node is added to its forest.  No need to process
801      the children nodes as they were already processed when the other def was
802      processed.  */
803   if (ref_forest)
804     {
805       ref_forest->merge_forest (forest);
806       return;
807     }
808 
809   for (chain = head->first; chain; chain = chain->next_use)
810     {
811       fma_node *child_fma;
812       rtx fma_rtx, *accum_rtx_p;
813 
814       if (!is_fmul_fmac_insn (chain->insn, false))
815 	continue;
816 
817       /* Get FMA rtx.  */
818       fma_rtx = SET_SRC (PATTERN (chain->insn));
819       /* FMA is negated.  */
820       if (GET_CODE (fma_rtx) == NEG)
821 	fma_rtx = XEXP (fma_rtx, 0);
822       /* Get accumulator rtx.  */
823       accum_rtx_p = &XEXP (fma_rtx, 2);
824       /* Accumulator is negated.  */
825       if (!REG_P (*accum_rtx_p))
826 	accum_rtx_p = &XEXP (*accum_rtx_p, 0);
827 
828       /* This du_chain structure is not for the accumulator register.  */
829       if (accum_rtx_p != chain->loc)
830 	continue;
831 
832       /* If object already created, this is a loop carried dependency.  We
833 	 don't include this object in the children as we want trees for
834 	 rename_fma_trees to not be an infinite loop.  */
835       if (this->get_fma_node (chain->insn))
836 	continue;
837 
838       child_fma = new fma_node (node, chain);
839 
840       /* Memorize the mapping of this instruction to its fma_node object
841 	 as it will be processed for the chain starting at its destination
842 	 register later.  */
843 
844       /* Link to siblings.  */
845       node->add_child (child_fma);
846     }
847 }
848 
849 /* Perform a depth-first search of the forests of fma_node in
850    THIS->m_fma_forests, calling PROCESS_FOREST () on each fma_forest object in
851    THIS->m_fma_forests list, PROCESS_ROOT () on each tree root and
852    PROCESS_NODE () on each node.  If FREE is true, free all std::list in the
853    same dfs.  */
854 
855 void
dfs(void (* process_forest)(fma_forest *),void (* process_root)(fma_forest *,fma_root_node *),void (* process_node)(fma_forest *,fma_node *),bool free)856 func_fma_steering::dfs (void (*process_forest) (fma_forest *),
857 			void (*process_root) (fma_forest *, fma_root_node *),
858 			void (*process_node) (fma_forest *, fma_node *),
859 			bool free)
860 {
861   auto_vec<fma_node *> to_process;
862   auto_vec<fma_node *> to_free;
863   std::list<fma_forest *>::iterator forest_iter;
864 
865   /* For each forest.  */
866   for (forest_iter = this->m_fma_forests.begin ();
867        forest_iter != this->m_fma_forests.end (); ++forest_iter)
868     {
869       std::list<fma_root_node *>::iterator root_iter;
870 
871       if (process_forest)
872 	process_forest (*forest_iter);
873 
874       /* For each tree root in this forest.  */
875       for (root_iter = (*forest_iter)->get_roots ()->begin ();
876 	   root_iter != (*forest_iter)->get_roots ()->end (); ++root_iter)
877 	{
878 	  if (process_root)
879 	    process_root (*forest_iter, *root_iter);
880 	  to_process.safe_push (*root_iter);
881 	}
882 
883       /* For each tree node in this forest.  */
884       while (!to_process.is_empty ())
885 	{
886 	  fma_node *node;
887 	  std::list<fma_node *>::iterator child_iter;
888 
889 	  node = to_process.pop ();
890 
891 	  if (process_node)
892 	    process_node (*forest_iter, node);
893 
894 	  for (child_iter = node->get_children ()->begin ();
895 	       child_iter != node->get_children ()->end (); ++child_iter)
896 	    to_process.safe_push (*child_iter);
897 
898 	  /* Defer freeing so that the process_node callback can access the
899 	     parent and children of the node being processed.  */
900 	  if (free)
901 	    to_free.safe_push (node);
902 	}
903 
904       if (free)
905 	{
906 	  delete *forest_iter;
907 
908 	  while (!to_free.is_empty ())
909 	    {
910 	      fma_node *node = to_free.pop ();
911 	      if (node->root_p ())
912 		delete static_cast<fma_root_node *> (node);
913 	      else
914 		delete node;
915 	    }
916 	}
917     }
918 }
919 
920 /* Build the dependency trees of FMUL and FMADD/FMSUB instructions.  */
921 
922 void
analyze()923 func_fma_steering::analyze ()
924 {
925   int i, n_blocks, *bb_dfs_preorder;
926   basic_block bb;
927   rtx_insn *insn;
928 
929   bb_dfs_preorder = XNEWVEC (int, last_basic_block_for_fn (cfun));
930   n_blocks = pre_and_rev_post_order_compute (bb_dfs_preorder, NULL, false);
931 
932   /* Browse the graph of basic blocks looking for FMUL or FMADD/FMSUB
933      instructions.  */
934   for (i = 0; i < n_blocks; i++)
935     {
936       bb = BASIC_BLOCK_FOR_FN (cfun, bb_dfs_preorder[i]);
937       FOR_BB_INSNS (bb, insn)
938 	{
939 	  operand_rr_info *dest_op_info;
940 	  struct du_chain *chain = NULL;
941 	  unsigned dest_regno;
942 	  fma_forest *forest = NULL;
943 	  du_head_p head = NULL;
944 	  int i;
945 
946 	  if (!is_fmul_fmac_insn (insn, true))
947 	    continue;
948 
949 	  /* Search the chain where this instruction is (one of) the root.  */
950 	  dest_op_info = insn_rr[INSN_UID (insn)].op_info;
951 	  dest_regno = REGNO (SET_DEST (PATTERN (insn)));
952 	  for (i = 0; i < dest_op_info->n_chains; i++)
953 	    {
954 	      /* The register tracked by this chain does not match the
955 		 destination register of insn.  */
956 	      if (dest_op_info->heads[i]->regno != dest_regno)
957 		continue;
958 
959 	      head = dest_op_info->heads[i];
960 	      /* The chain was merged in another, find the new head.  */
961 	      if (!head->first)
962 		head = regrename_chain_from_id (head->id);
963 
964 	      /* Search the chain element for this instruction and, if another
965 		 FMUL or FMADD/FMSUB instruction was already processed, note
966 		 the forest of its tree.  */
967 	      forest = NULL;
968 	      for (chain = head->first; chain; chain = chain->next_use)
969 		{
970 		  fma_node **fma_slot;
971 
972 		  if (!is_fmul_fmac_insn (chain->insn, true))
973 		    continue;
974 
975 		  /* This is a use, continue.  */
976 		  if (chain->loc != &SET_DEST (PATTERN (chain->insn)))
977 		    continue;
978 
979 		  if (chain->insn == insn)
980 		    break;
981 
982 		  fma_slot = this->m_insn_fma_head_map->get (chain->insn);
983 		  if (fma_slot && (*fma_slot)->get_children ())
984 		    forest = (*fma_slot)->get_forest ();
985 		}
986 	      if (chain)
987 		break;
988 	    }
989 
990 	  /* Due to implementation of regrename, dest register can slip away
991 	     from regrename's analysis.  As a result, there is no chain for
992 	     the destination register of insn.  We simply skip the insn even
993 	     it is a fmul/fmac instruction.  This can happen when the dest
994 	     register is also a source register of insn and one of the below
995 	     conditions is satisfied:
996 	       1) the source reg is setup in larger mode than this insn;
997 	       2) the source reg is uninitialized;
998 	       3) the source reg is passed in as parameter.  */
999 	  if (i < dest_op_info->n_chains)
1000 	    this->analyze_fma_fmul_insn (forest, chain, head);
1001 	}
1002     }
1003   free (bb_dfs_preorder);
1004 
1005   if (dump_file)
1006     this->dfs (dump_forest_info, dump_tree_root_info, dump_tree_node_info,
1007 	       false);
1008 }
1009 
1010 /* Perform the renaming of all chains with FMUL or FMADD/FMSUB involved with
1011    the objective of keeping FPU pipeline balanced in term of instructions and
1012    having FMADD/FMSUB with dependencies on previous FMUL or FMADD/FMSUB be
1013    scheduled on the same pipeline.  */
1014 
1015 void
rename_fma_trees()1016 func_fma_steering::rename_fma_trees ()
1017 {
1018   this->dfs (dispatch_forest, NULL, rename_fma_node, true);
1019 
1020   if (dump_file && !this->m_fma_forests.empty ())
1021     {
1022       fprintf (dump_file, "Function %s has ", current_function_name ());
1023       if (this->m_fpu_balance == 0)
1024 	fprintf (dump_file, "perfect balance of FMUL/FMA chains between the "
1025 		 "two FPU pipelines\n");
1026       else if (this->m_fpu_balance > 0)
1027 	fprintf (dump_file, "%d more FMUL/FMA chains scheduled on the second "
1028 		 "FPU pipeline\n", this->m_fpu_balance);
1029       else /* this->m_fpu_balance < 0 */
1030 	fprintf (dump_file, "%d more FMUL/FMA chains scheduled on the first "
1031 		 "FPU pipeline\n", - this->m_fpu_balance);
1032     }
1033 }
1034 
1035 /* Execute FMA steering pass.  */
1036 
1037 void
execute_fma_steering()1038 func_fma_steering::execute_fma_steering ()
1039 {
1040   df_set_flags (DF_LR_RUN_DCE);
1041   df_note_add_problem ();
1042   df_analyze ();
1043   df_set_flags (DF_DEFER_INSN_RESCAN);
1044 
1045   regrename_init (true);
1046   regrename_analyze (NULL);
1047   this->analyze ();
1048   this->rename_fma_trees ();
1049   regrename_finish ();
1050 }
1051 
1052 const pass_data pass_data_fma_steering =
1053 {
1054   RTL_PASS, /* type */
1055   "fma_steering", /* name */
1056   OPTGROUP_NONE, /* optinfo_flags */
1057   TV_NONE, /* tv_id */
1058   0, /* properties_required */
1059   0, /* properties_provided */
1060   0, /* properties_destroyed */
1061   0, /* todo_flags_start */
1062   TODO_df_finish, /* todo_flags_finish */
1063 };
1064 
1065 class pass_fma_steering : public rtl_opt_pass
1066 {
1067 public:
pass_fma_steering(gcc::context * ctxt)1068   pass_fma_steering (gcc::context *ctxt)
1069     : rtl_opt_pass (pass_data_fma_steering, ctxt)
1070   {}
1071 
1072   /* opt_pass methods: */
gate(function *)1073   virtual bool gate (function *)
1074     {
1075       return (aarch64_tune_params.extra_tuning_flags
1076 	      & AARCH64_EXTRA_TUNE_RENAME_FMA_REGS)
1077 	      && optimize >= 2;
1078     }
1079 
execute(function *)1080   virtual unsigned int execute (function *)
1081     {
1082       func_fma_steering *fma_steering = new func_fma_steering;
1083       fma_steering->execute_fma_steering ();
1084       delete fma_steering;
1085       return 0;
1086     }
1087 
1088 }; // class pass_fma_steering
1089 
1090 /* Create a new fma steering pass instance.  */
1091 
1092 rtl_opt_pass *
make_pass_fma_steering(gcc::context * ctxt)1093 make_pass_fma_steering (gcc::context *ctxt)
1094 {
1095   return new pass_fma_steering (ctxt);
1096 }
1097