1 /* FMA steering optimization pass for Cortex-A57.
2    Copyright (C) 2015-2018 Free Software Foundation, Inc.
3    Contributed by ARM Ltd.
4 
5    This file is part of GCC.
6 
7    GCC is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11 
12    GCC is distributed in the hope that it will be useful, but
13    WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15    General Public License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with GCC; see the file COPYING3.  If not see
19    <http://www.gnu.org/licenses/>.  */
20 
21 #define IN_TARGET_CODE 1
22 
23 #include "config.h"
24 #define INCLUDE_LIST
25 #include "system.h"
26 #include "coretypes.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "df.h"
31 #include "insn-config.h"
32 #include "regs.h"
33 #include "memmodel.h"
34 #include "emit-rtl.h"
35 #include "recog.h"
36 #include "cfganal.h"
37 #include "insn-attr.h"
38 #include "context.h"
39 #include "tree-pass.h"
40 #include "regrename.h"
41 #include "aarch64-protos.h"
42 
43 /* For better performance, the destination of FMADD/FMSUB instructions should
44    have the same parity as their accumulator register if the accumulator
45    contains the result of a previous FMUL or FMADD/FMSUB instruction if
46    targetting Cortex-A57 processors.  Performance is also increased by
47    otherwise keeping a good balance in the parity of the destination register
48    of FMUL or FMADD/FMSUB.
49 
50    This pass ensure that registers are renamed so that these conditions hold.
51    We reuse the existing register renaming facility from regrename.c to build
52    dependency chains and expose candidate registers for renaming.
53 
54 
55    The algorithm has three steps:
56 
57    First, the functions of the register renaming pass are called.  These
58    analyze the instructions and produce a list of def/use chains of
59    instructions.
60 
61    Next, this information is used to build trees of multiply and
62    multiply-accumulate instructions.  The roots of these trees are any
63    multiply, or any multiply-accumulate whose accumulator is not dependent on
64    a multiply or multiply-accumulate instruction.  A child is added to the
65    tree where a dependency chain exists between the result of the parent
66    instruction and the accumulator operand of the child, as in the diagram
67    below:
68 
69 		 fmul s2, s0, s1
70 		/		\
71    fmadd s0, s1, s1, s2   fmadd s4, s1, s1 s2
72 	    |
73    fmadd s3, s1, s1, s0
74 
75    Trees made of a single instruction are permitted.
76 
77    Finally, renaming is performed.  The parity of the destination register at
78    the root of a tree is checked against the current balance of multiply and
79    multiply-accumulate on each pipeline.  If necessary, the root of a tree is
80    renamed, in which case the rest of the tree is then renamed to keep the same
81    parity in the destination registers of all instructions in the tree.  */
82 
83 
84 
85 /* Forward declarations.  */
86 class fma_node;
87 class fma_root_node;
88 class func_fma_steering;
89 
90 /* Dependencies between FMUL or FMADD/FMSUB instructions and subsequent
91    FMADD/FMSUB instructions form a graph.  This is because alternatives can
92    make a register be set by several FMUL or FMADD/FMSUB instructions in
93    different basic blocks and because of loops.  For ease of browsing, the
94    connected components of this graph are broken up into forests of trees.
95    Forests are represented by fma_forest objects, contained in the fma_forests
96    list.  Using a separate object for the forests allows for a better use of
97    memory as there is some information that is global to each forest, such as
98    the number of FMSUB and FMADD/FMSUB instructions currently scheduled on each
99    floating-point execution pipelines.  */
100 
101 class fma_forest
102 {
103 public:
104   fma_forest (func_fma_steering *, fma_root_node *, int);
105   ~fma_forest ();
106 
107   int get_id ();
108   std::list<fma_root_node *> *get_roots ();
109   func_fma_steering *get_globals ();
110   int get_target_parity ();
111   void fma_node_created (fma_node *);
112   void merge_forest (fma_forest *);
113   void dump_info ();
114   void dispatch ();
115 
116 private:
117   /* The list of roots that form this forest.  */
118   std::list<fma_root_node *> *m_roots;
119 
120   /* Target parity the destination register of all FMUL and FMADD/FMSUB
121      instructions in this forest should have.  */
122   int m_target_parity;
123 
124   /* Link to the instance of func_fma_steering holding data related to the
125      FMA steering of the current function (cfun).  */
126   func_fma_steering *m_globals;
127 
128   /* Identifier for the forest (used for dumps).  */
129   int m_id;
130 
131   /* Total number of nodes in the forest (for statistics).  */
132   int m_nb_nodes;
133 };
134 
135 class fma_node
136 {
137 public:
138   fma_node (fma_node *parent, du_chain *chain);
139   ~fma_node ();
140 
141   bool root_p ();
142   fma_forest *get_forest ();
143   std::list<fma_node *> *get_children ();
144   rtx_insn *get_insn ();
145   void add_child (fma_node *);
146   int get_parity ();
147   void set_head (du_head *);
148   void rename (fma_forest *);
149   void dump_info (fma_forest *);
150 
151 protected:
152   /* Root node that lead to this node.  */
153   fma_root_node *m_root;
154 
155   /* The parent node of this node.  If the node belong to a chain with several
156      parent nodes, the first one encountered in a depth-first search is chosen
157      as canonical parent.  */
158   fma_node *m_parent;
159 
160   /* The list of child nodes.  If a chain contains several parent nodes, one is
161      chosen as canonical parent and the others will have no children.  */
162   std::list<fma_node *> *m_children;
163 
164   /* The associated DU_HEAD chain that the insn represented by this object
165      is (one of) the root of.  When a chain contains several roots, the non
166      canonical ones have this field set to NULL.  */
167   struct du_head *m_head;
168 
169   /* The FMUL or FMADD/FMSUB instruction this object corresponds to.  */
170   rtx_insn *m_insn;
171 };
172 
173 class fma_root_node : public fma_node
174 {
175 public:
176   fma_root_node (func_fma_steering *, du_chain *, int);
177 
178   fma_forest *get_forest ();
179   void set_forest (fma_forest *);
180   void dump_info (fma_forest *);
181 
182 private:
183   /* The forest this node belonged to when it was created.  */
184   fma_forest *m_forest;
185 };
186 
187 /* Class holding all data and methods relative to the FMA steering of a given
188    function.  The FMA steering pass could then run in parallel for different
189    functions.  */
190 
191 class func_fma_steering
192 {
193 public:
194   func_fma_steering ();
195   ~func_fma_steering ();
196 
197   int get_fpu_balance ();
198   void remove_forest (fma_forest *);
199   bool put_node (fma_node *);
200   void update_balance (int);
201   fma_node *get_fma_node (rtx_insn *);
202   void analyze_fma_fmul_insn (fma_forest *, du_chain *, du_head_p);
203   void execute_fma_steering ();
204 
205 private:
206   void dfs (void (*) (fma_forest *), void (*) (fma_forest *, fma_root_node *),
207 	    void (*) (fma_forest *, fma_node *), bool);
208   void analyze ();
209   void rename_fma_trees ();
210 
211   /* Mapping between FMUL or FMADD/FMSUB instructions and the associated
212      fma_node object.  Used when analyzing an instruction that is a root of
213      a chain to find if such an object was created because this instruction
214      is also a use in another chain.  */
215   hash_map<rtx_insn *, fma_node *> *m_insn_fma_head_map;
216 
217   /* A list of all the forests in a given function.  */
218   std::list<fma_forest *> m_fma_forests;
219 
220   /* Balance of FMUL and FMADD/FMSUB instructions between the two FPU
221      pipelines:
222      < 0: more instruction dispatched to the first pipeline
223      == 0: perfect balance
224      > 0: more instruction dispatched to the second pipeline.  */
225   int m_fpu_balance;
226 
227   /* Identifier for the next forest created.  */
228   int m_next_forest_id;
229 };
230 
231 /* Rename the register HEAD->regno in all the insns in the chain HEAD to any
232    register not in the set UNAVAILABLE.  Adapted from rename_chains in
233    regrename.c.  */
234 
235 static bool
rename_single_chain(du_head_p head,HARD_REG_SET * unavailable)236 rename_single_chain (du_head_p head, HARD_REG_SET *unavailable)
237 {
238   int best_new_reg;
239   int n_uses = 0;
240   struct du_chain *tmp;
241   int reg = head->regno;
242   enum reg_class super_class = NO_REGS;
243 
244   if (head->cannot_rename)
245     return false;
246 
247   if (fixed_regs[reg] || global_regs[reg]
248       || (frame_pointer_needed && reg == HARD_FRAME_POINTER_REGNUM))
249     return false;
250 
251   /* Iterate over elements in the chain in order to:
252      1. Count number of uses, and narrow the set of registers we can
253 	use for renaming.
254      2. Compute the superunion of register classes in this chain.  */
255   for (tmp = head->first; tmp; tmp = tmp->next_use)
256     {
257       if (DEBUG_INSN_P (tmp->insn))
258 	continue;
259       n_uses++;
260       IOR_COMPL_HARD_REG_SET (*unavailable, reg_class_contents[tmp->cl]);
261       super_class = reg_class_superunion[(int) super_class][(int) tmp->cl];
262     }
263 
264   if (n_uses < 1)
265     return false;
266 
267   best_new_reg = find_rename_reg (head, super_class, unavailable, reg,
268 				  false);
269 
270   if (dump_file)
271     {
272       fprintf (dump_file, "Register %s in insn %d", reg_names[reg],
273 	       INSN_UID (head->first->insn));
274       if (head->need_caller_save_reg)
275 	fprintf (dump_file, " crosses a call");
276     }
277 
278   if (best_new_reg == reg)
279     {
280       if (dump_file)
281 	fprintf (dump_file, "; no available better choice\n");
282       return false;
283     }
284 
285   if (regrename_do_replace (head, best_new_reg))
286     {
287       if (dump_file)
288 	fprintf (dump_file, ", renamed as %s\n", reg_names[best_new_reg]);
289       df_set_regs_ever_live (best_new_reg, true);
290     }
291   else
292     {
293       if (dump_file)
294 	fprintf (dump_file, ", renaming as %s failed\n",
295 		 reg_names[best_new_reg]);
296       return false;
297     }
298   return true;
299 }
300 
301 /* Return whether T is the attribute of a FMADD/FMSUB-like instruction.  */
302 
303 static bool
is_fmac_op(enum attr_type t)304 is_fmac_op (enum attr_type t)
305 {
306   return (t == TYPE_FMACS) || (t == TYPE_FMACD) || (t == TYPE_NEON_FP_MLA_S);
307 }
308 
309 /* Return whether T is the attribute of a FMUL instruction.  */
310 
311 static bool
is_fmul_op(enum attr_type t)312 is_fmul_op (enum attr_type t)
313 {
314   return (t == TYPE_FMULS) || (t == TYPE_FMULD) || (t == TYPE_NEON_FP_MUL_S);
315 }
316 
317 /* Return whether INSN is an FMUL (if FMUL_OK is true) or FMADD/FMSUB
318    instruction.  */
319 
320 static bool
is_fmul_fmac_insn(rtx_insn * insn,bool fmul_ok)321 is_fmul_fmac_insn (rtx_insn *insn, bool fmul_ok)
322 {
323   enum attr_type t;
324 
325   if (!NONDEBUG_INSN_P (insn))
326     return false;
327 
328   if (recog_memoized (insn) < 0)
329     return false;
330 
331   /* Only consider chain(s) this instruction is a root of if this is an FMUL or
332      FMADD/FMSUB instruction.  This allows to avoid browsing chains of all
333      instructions for FMUL or FMADD/FMSUB in them.  */
334   t = get_attr_type (insn);
335   return is_fmac_op (t) || (fmul_ok && is_fmul_op (t));
336 }
337 
338 
339 /*
340  * Class fma_forest method definitions.
341  */
342 
fma_forest(func_fma_steering * fma_steer,fma_root_node * fma_root,int id)343 fma_forest::fma_forest (func_fma_steering *fma_steer, fma_root_node *fma_root,
344 			int id)
345 {
346       memset (this, 0, sizeof (*this));
347       this->m_globals = fma_steer;
348       this->m_roots = new std::list<fma_root_node *>;
349       this->m_roots->push_back (fma_root);
350       this->m_id = id;
351 }
352 
~fma_forest()353 fma_forest::~fma_forest ()
354 {
355   delete this->m_roots;
356 }
357 
358 int
get_id()359 fma_forest::get_id ()
360 {
361   return this->m_id;
362 }
363 
364 std::list<fma_root_node *> *
get_roots()365 fma_forest::get_roots ()
366 {
367   return this->m_roots;
368 }
369 
370 func_fma_steering *
get_globals()371 fma_forest::get_globals ()
372 {
373   return this->m_globals;
374 }
375 
376 int
get_target_parity()377 fma_forest::get_target_parity ()
378 {
379   return this->m_target_parity;
380 }
381 
382 /* Act on the creation of NODE by updating statistics in FOREST and adding an
383    entry for it in the func_fma_steering hashmap.  */
384 
fma_node_created(fma_node * node)385 void fma_forest::fma_node_created (fma_node *node)
386 {
387   bool created = !this->m_globals->put_node (node);
388 
389   gcc_assert (created);
390   this->m_nb_nodes++;
391 }
392 
393 /* Merge REF_FOREST and OTHER_FOREST together, making REF_FOREST the canonical
394    fma_forest object to represent both.  */
395 
396 void
merge_forest(fma_forest * other_forest)397 fma_forest::merge_forest (fma_forest *other_forest)
398 {
399   std::list<fma_root_node *> *other_roots;
400   std::list<fma_root_node *>::iterator other_root_iter;
401 
402   if (this == other_forest)
403     return;
404 
405   other_roots = other_forest->m_roots;
406 
407   /* Update root nodes' pointer to forest.  */
408   for (other_root_iter = other_roots->begin ();
409        other_root_iter != other_roots->end (); ++other_root_iter)
410     (*other_root_iter)->set_forest (this);
411 
412   /* Remove other_forest from the list of forests and move its tree roots in
413      the list of tree roots of ref_forest.  */
414   this->m_globals->remove_forest (other_forest);
415   this->m_roots->splice (this->m_roots->begin (), *other_roots);
416   this->m_nb_nodes += other_forest->m_nb_nodes;
417 
418   delete other_forest;
419 }
420 
421 /* Dump information about the forest FOREST.  */
422 
423 void
dump_info()424 fma_forest::dump_info ()
425 {
426   gcc_assert (dump_file);
427 
428   fprintf (dump_file, "Forest #%d has %d nodes\n", this->m_id,
429 	   this->m_nb_nodes);
430 }
431 
432 /* Wrapper around fma_forest::dump_info for use as parameter of function
433    pointer type in func_fma_steering::dfs.  */
434 
435 static void
dump_forest_info(fma_forest * forest)436 dump_forest_info (fma_forest *forest)
437 {
438   forest->dump_info ();
439 }
440 
441 /* Dispatch forest to the least utilized pipeline.  */
442 
443 void
dispatch()444 fma_forest::dispatch ()
445 {
446   this->m_target_parity = this->m_roots->front ()->get_parity ();
447   int fpu_balance = this->m_globals->get_fpu_balance ();
448   if (fpu_balance != 0)
449     this->m_target_parity = (fpu_balance < 0);
450 
451   if (dump_file)
452     fprintf (dump_file, "Target parity for forest #%d: %s\n", this->m_id,
453 	     this->m_target_parity ? "odd" : "even");
454 }
455 
456 /* Wrapper around fma_forest::dispatch for use as parameter of function pointer
457    type in func_fma_steering::dfs.  */
458 
459 static void
dispatch_forest(fma_forest * forest)460 dispatch_forest (fma_forest *forest)
461 {
462   forest->dispatch ();
463 }
464 
fma_node(fma_node * parent,du_chain * chain)465 fma_node::fma_node (fma_node *parent, du_chain *chain)
466 {
467   memset (this, 0, sizeof (*this));
468   this->m_parent = parent;
469   this->m_children = new std::list<fma_node *>;
470   this->m_insn = chain->insn;
471   /* root_p () cannot be used to check for root before root is set.  */
472   if (this->m_parent == this)
473     this->m_root = static_cast<fma_root_node *> (parent);
474   else
475     {
476       this->m_root = parent->m_root;
477       this->get_forest ()->fma_node_created (this);
478     }
479 }
480 
~fma_node()481 fma_node::~fma_node ()
482 {
483   delete this->m_children;
484 }
485 
486 std::list<fma_node *> *
get_children()487 fma_node::get_children ()
488 {
489   return this->m_children;
490 }
491 
492 rtx_insn *
get_insn()493 fma_node::get_insn ()
494 {
495   return this->m_insn;
496 }
497 
498 void
set_head(du_head * head)499 fma_node::set_head (du_head *head)
500 {
501   gcc_assert (!this->m_head);
502   this->m_head = head;
503 }
504 
505 /* Add a child to this node in the list of children.  */
506 
507 void
add_child(fma_node * child)508 fma_node::add_child (fma_node *child)
509 {
510   this->m_children->push_back (child);
511 }
512 
513 /* Return the parity of the destination register of the instruction represented
514    by this node.  */
515 
516 int
get_parity()517 fma_node::get_parity ()
518 {
519   return this->m_head->regno % 2;
520 }
521 
522 /* Get the actual forest associated with a non root node as the one the node
523    points to might have been merged into another one.  In that case the pointer
524    in the root nodes are updated so we return the forest pointer of a root node
525    pointed to by the initial forest.  Despite being a oneliner, this method is
526    defined here as it references a method from fma_root_node.  */
527 
528 fma_forest *
get_forest()529 fma_node::get_forest ()
530 {
531   return this->m_root->get_forest ();
532 }
533 
534 /* Return whether a node is a root node.  */
535 
536 bool
root_p()537 fma_node::root_p ()
538 {
539   return this->m_root == this;
540 }
541 
542 /* Dump information about the children of node FMA_NODE in forest FOREST.  */
543 
544 void
dump_info(ATTRIBUTE_UNUSED fma_forest * forest)545 fma_node::dump_info (ATTRIBUTE_UNUSED fma_forest *forest)
546 {
547   struct du_chain *chain;
548   std::list<fma_node *>::iterator fma_child;
549 
550   gcc_assert (dump_file);
551 
552   if (this->get_children ()->empty ())
553     return;
554 
555   fprintf (dump_file, "Instruction(s)");
556   for (chain = this->m_head->first; chain; chain = chain->next_use)
557     {
558       if (!is_fmul_fmac_insn (chain->insn, true))
559 	continue;
560 
561       if (chain->loc != &SET_DEST (PATTERN (chain->insn)))
562 	continue;
563 
564       fprintf (dump_file, " %d", INSN_UID (chain->insn));
565     }
566 
567   fprintf (dump_file, " is(are) accumulator dependency of instructions");
568   for (fma_child = this->get_children ()->begin ();
569        fma_child != this->get_children ()->end (); fma_child++)
570     fprintf (dump_file, " %d", INSN_UID ((*fma_child)->m_insn));
571   fprintf (dump_file, "\n");
572 }
573 
574 /* Wrapper around fma_node::dump_info for use as parameter of function pointer
575    type in func_fma_steering::dfs.  */
576 
577 static void
dump_tree_node_info(fma_forest * forest,fma_node * node)578 dump_tree_node_info (fma_forest *forest, fma_node *node)
579 {
580   node->dump_info (forest);
581 }
582 
583 /* Rename the destination register of a single FMUL or FMADD/FMSUB instruction
584    represented by FMA_NODE to a register that respect the target parity for
585    FOREST or with same parity of the instruction represented by its parent node
586    if it has one.  */
587 
588 void
rename(fma_forest * forest)589 fma_node::rename (fma_forest *forest)
590 {
591   int cur_parity, target_parity;
592 
593   /* This is alternate root of a chain and thus has no children.  It will be
594      renamed when processing the canonical root for that chain.  */
595   if (!this->m_head)
596     return;
597 
598   target_parity = forest->get_target_parity ();
599   if (this->m_parent)
600     target_parity = this->m_parent->get_parity ();
601   cur_parity = this->get_parity ();
602 
603   /* Rename if parity differs.  */
604   if (cur_parity != target_parity)
605     {
606       rtx_insn *insn = this->m_insn;
607       HARD_REG_SET unavailable;
608       machine_mode mode;
609       int reg;
610 
611       if (dump_file)
612 	{
613 	  unsigned cur_dest_reg = this->m_head->regno;
614 
615 	  fprintf (dump_file, "FMA or FMUL at insn %d but destination "
616 		   "register (%s) has different parity from expected to "
617 		   "maximize FPU pipeline utilization\n", INSN_UID (insn),
618 		   reg_names[cur_dest_reg]);
619 	}
620 
621       /* Don't clobber traceback for noreturn functions.  */
622       CLEAR_HARD_REG_SET (unavailable);
623       if (frame_pointer_needed)
624 	{
625 	  add_to_hard_reg_set (&unavailable, Pmode, FRAME_POINTER_REGNUM);
626 	  add_to_hard_reg_set (&unavailable, Pmode, HARD_FRAME_POINTER_REGNUM);
627 	}
628 
629       /* Exclude registers with wrong parity.  */
630       mode = GET_MODE (SET_DEST (PATTERN (insn)));
631       for (reg = cur_parity; reg < FIRST_PSEUDO_REGISTER; reg += 2)
632 	add_to_hard_reg_set (&unavailable, mode, reg);
633 
634       if (!rename_single_chain (this->m_head, &unavailable))
635 	{
636 	  if (dump_file)
637 	    fprintf (dump_file, "Destination register of insn %d could not be "
638 		     "renamed. Dependent FMA insns will use this parity from "
639 		     "there on.\n", INSN_UID (insn));
640 	}
641       else
642 	cur_parity = target_parity;
643     }
644 
645   forest->get_globals ()->update_balance (cur_parity);
646 }
647 
648 /* Wrapper around fma_node::dump_info for use as parameter of function pointer
649    type in func_fma_steering::dfs.  */
650 
651 static void
rename_fma_node(fma_forest * forest,fma_node * node)652 rename_fma_node (fma_forest *forest, fma_node *node)
653 {
654   node->rename (forest);
655 }
656 
fma_root_node(func_fma_steering * globals,du_chain * chain,int id)657 fma_root_node::fma_root_node (func_fma_steering *globals, du_chain *chain,
658 			      int id) : fma_node (this, chain)
659 {
660   this->m_forest = new fma_forest (globals, this, id);
661   this->m_forest->fma_node_created (this);
662 }
663 
664 fma_forest *
get_forest()665 fma_root_node::get_forest ()
666 {
667   return this->m_forest;
668 }
669 
670 void
set_forest(fma_forest * ref_forest)671 fma_root_node::set_forest (fma_forest *ref_forest)
672 {
673   this->m_forest = ref_forest;
674 }
675 
676 /* Dump information about the roots of forest FOREST.  */
677 
678 void
dump_info(fma_forest * forest)679 fma_root_node::dump_info (fma_forest *forest)
680 {
681   gcc_assert (dump_file);
682 
683   if (this == forest->get_roots ()->front ())
684     fprintf (dump_file, "Instruction(s) at root of forest #%d:",
685 	     forest->get_id ());
686   fprintf (dump_file, " %d", INSN_UID (this->m_insn));
687   if (this == forest->get_roots ()->back ())
688     fprintf (dump_file, "\n");
689 }
690 
691 /* Wrapper around fma_root_node::dump_info for use as parameter of function
692    pointer type in func_fma_steering::dfs.  */
693 
694 static void
dump_tree_root_info(fma_forest * forest,fma_root_node * node)695 dump_tree_root_info (fma_forest *forest, fma_root_node *node)
696 {
697   node->dump_info (forest);
698 }
699 
func_fma_steering()700 func_fma_steering::func_fma_steering () : m_fpu_balance (0)
701 {
702   this->m_insn_fma_head_map = new hash_map<rtx_insn *, fma_node *>;
703   this->m_fma_forests.clear ();
704   this->m_next_forest_id = 0;
705 }
706 
~func_fma_steering()707 func_fma_steering::~func_fma_steering ()
708 {
709   delete this->m_insn_fma_head_map;
710 }
711 
712 int
get_fpu_balance()713 func_fma_steering::get_fpu_balance ()
714 {
715   return this->m_fpu_balance;
716 }
717 
718 void
remove_forest(fma_forest * forest)719 func_fma_steering::remove_forest (fma_forest *forest)
720 {
721   this->m_fma_forests.remove (forest);
722 }
723 
724 /* Memorize the mapping of this instruction to its fma_node object and return
725    whether such a mapping existed.  */
726 
727 bool
put_node(fma_node * node)728 func_fma_steering::put_node (fma_node *node)
729 {
730   return this->m_insn_fma_head_map->put (node->get_insn (), node);
731 }
732 
733 /* Update the current balance considering a node with the given PARITY.  */
734 
735 void
update_balance(int parity)736 func_fma_steering::update_balance (int parity)
737 {
738   this->m_fpu_balance = parity ? this->m_fpu_balance + 1
739 			       : this->m_fpu_balance - 1;
740 }
741 
742 /* Return whether an fma_node object exists for instruction INSN and, if not,
743    allocate one in *RET.  */
744 
745 fma_node *
get_fma_node(rtx_insn * insn)746 func_fma_steering::get_fma_node (rtx_insn *insn)
747 {
748   fma_node **fma_slot;
749 
750   fma_slot = this->m_insn_fma_head_map->get (insn);
751   if (fma_slot)
752     return *fma_slot;
753   return NULL;
754 }
755 
756 /* Allocate and initialize fma_node objects for the FMUL or FMADD/FMSUB
757    instruction in CHAIN->insn and its dependent FMADD/FMSUB instructions, all
758    part of FOREST.  For the children, the associated head is left untouched
759    (and thus null) as this function will be called again when considering the
760    chain where they are def.  For the parent, the chain is given in HEAD.  */
761 
762 void
analyze_fma_fmul_insn(fma_forest * ref_forest,du_chain * chain,du_head_p head)763 func_fma_steering::analyze_fma_fmul_insn (fma_forest *ref_forest,
764 					  du_chain *chain, du_head_p head)
765 {
766   fma_forest *forest;
767   fma_node *node = this->get_fma_node (chain->insn);
768 
769   /* This is a root node.  */
770   if (!node)
771     {
772       fma_root_node *root_node;
773 
774       root_node = new fma_root_node (this, chain, this->m_next_forest_id++);
775       forest = root_node->get_forest ();
776       node = root_node;
777 
778       /* Until proved otherwise, assume this root is not part of an existing
779 	 forest and thus add its forest to the list of forests.  */
780       this->m_fma_forests.push_back (forest);
781     }
782   else
783     forest = node->get_forest ();
784 
785   node->set_head (head);
786 
787   /* fma_node is part of a chain with several defs, one of them having already
788      been processed.  The root of that already processed def is the canonical
789      one and the root of fma_node is added to its forest.  No need to process
790      the children nodes as they were already processed when the other def was
791      processed.  */
792   if (ref_forest)
793     {
794       ref_forest->merge_forest (forest);
795       return;
796     }
797 
798   for (chain = head->first; chain; chain = chain->next_use)
799     {
800       fma_node *child_fma;
801       rtx fma_rtx, *accum_rtx_p;
802 
803       if (!is_fmul_fmac_insn (chain->insn, false))
804 	continue;
805 
806       /* Get FMA rtx.  */
807       fma_rtx = SET_SRC (PATTERN (chain->insn));
808       /* FMA is negated.  */
809       if (GET_CODE (fma_rtx) == NEG)
810 	fma_rtx = XEXP (fma_rtx, 0);
811       /* Get accumulator rtx.  */
812       accum_rtx_p = &XEXP (fma_rtx, 2);
813       /* Accumulator is negated.  */
814       if (!REG_P (*accum_rtx_p))
815 	accum_rtx_p = &XEXP (*accum_rtx_p, 0);
816 
817       /* This du_chain structure is not for the accumulator register.  */
818       if (accum_rtx_p != chain->loc)
819 	continue;
820 
821       /* If object already created, this is a loop carried dependency.  We
822 	 don't include this object in the children as we want trees for
823 	 rename_fma_trees to not be an infinite loop.  */
824       if (this->get_fma_node (chain->insn))
825 	continue;
826 
827       child_fma = new fma_node (node, chain);
828 
829       /* Memorize the mapping of this instruction to its fma_node object
830 	 as it will be processed for the chain starting at its destination
831 	 register later.  */
832 
833       /* Link to siblings.  */
834       node->add_child (child_fma);
835     }
836 }
837 
838 /* Perform a depth-first search of the forests of fma_node in
839    THIS->m_fma_forests, calling PROCESS_FOREST () on each fma_forest object in
840    THIS->m_fma_forests list, PROCESS_ROOT () on each tree root and
841    PROCESS_NODE () on each node.  If FREE is true, free all std::list in the
842    same dfs.  */
843 
844 void
dfs(void (* process_forest)(fma_forest *),void (* process_root)(fma_forest *,fma_root_node *),void (* process_node)(fma_forest *,fma_node *),bool free)845 func_fma_steering::dfs (void (*process_forest) (fma_forest *),
846 			void (*process_root) (fma_forest *, fma_root_node *),
847 			void (*process_node) (fma_forest *, fma_node *),
848 			bool free)
849 {
850   auto_vec<fma_node *> to_process;
851   auto_vec<fma_node *> to_free;
852   std::list<fma_forest *>::iterator forest_iter;
853 
854   /* For each forest.  */
855   for (forest_iter = this->m_fma_forests.begin ();
856        forest_iter != this->m_fma_forests.end (); ++forest_iter)
857     {
858       std::list<fma_root_node *>::iterator root_iter;
859 
860       if (process_forest)
861 	process_forest (*forest_iter);
862 
863       /* For each tree root in this forest.  */
864       for (root_iter = (*forest_iter)->get_roots ()->begin ();
865 	   root_iter != (*forest_iter)->get_roots ()->end (); ++root_iter)
866 	{
867 	  if (process_root)
868 	    process_root (*forest_iter, *root_iter);
869 	  to_process.safe_push (*root_iter);
870 	}
871 
872       /* For each tree node in this forest.  */
873       while (!to_process.is_empty ())
874 	{
875 	  fma_node *node;
876 	  std::list<fma_node *>::iterator child_iter;
877 
878 	  node = to_process.pop ();
879 
880 	  if (process_node)
881 	    process_node (*forest_iter, node);
882 
883 	  for (child_iter = node->get_children ()->begin ();
884 	       child_iter != node->get_children ()->end (); ++child_iter)
885 	    to_process.safe_push (*child_iter);
886 
887 	  /* Defer freeing so that the process_node callback can access the
888 	     parent and children of the node being processed.  */
889 	  if (free)
890 	    to_free.safe_push (node);
891 	}
892 
893       if (free)
894 	{
895 	  delete *forest_iter;
896 
897 	  while (!to_free.is_empty ())
898 	    {
899 	      fma_node *node = to_free.pop ();
900 	      if (node->root_p ())
901 		delete static_cast<fma_root_node *> (node);
902 	      else
903 		delete node;
904 	    }
905 	}
906     }
907 }
908 
909 /* Build the dependency trees of FMUL and FMADD/FMSUB instructions.  */
910 
911 void
analyze()912 func_fma_steering::analyze ()
913 {
914   int i, n_blocks, *bb_dfs_preorder;
915   basic_block bb;
916   rtx_insn *insn;
917 
918   bb_dfs_preorder = XNEWVEC (int, last_basic_block_for_fn (cfun));
919   n_blocks = pre_and_rev_post_order_compute (bb_dfs_preorder, NULL, false);
920 
921   /* Browse the graph of basic blocks looking for FMUL or FMADD/FMSUB
922      instructions.  */
923   for (i = 0; i < n_blocks; i++)
924     {
925       bb = BASIC_BLOCK_FOR_FN (cfun, bb_dfs_preorder[i]);
926       FOR_BB_INSNS (bb, insn)
927 	{
928 	  operand_rr_info *dest_op_info;
929 	  struct du_chain *chain = NULL;
930 	  unsigned dest_regno;
931 	  fma_forest *forest = NULL;
932 	  du_head_p head = NULL;
933 	  int i;
934 
935 	  if (!is_fmul_fmac_insn (insn, true))
936 	    continue;
937 
938 	  /* Search the chain where this instruction is (one of) the root.  */
939 	  dest_op_info = insn_rr[INSN_UID (insn)].op_info;
940 	  dest_regno = REGNO (SET_DEST (PATTERN (insn)));
941 	  for (i = 0; i < dest_op_info->n_chains; i++)
942 	    {
943 	      /* The register tracked by this chain does not match the
944 		 destination register of insn.  */
945 	      if (dest_op_info->heads[i]->regno != dest_regno)
946 		continue;
947 
948 	      head = dest_op_info->heads[i];
949 	      /* The chain was merged in another, find the new head.  */
950 	      if (!head->first)
951 		head = regrename_chain_from_id (head->id);
952 
953 	      /* Search the chain element for this instruction and, if another
954 		 FMUL or FMADD/FMSUB instruction was already processed, note
955 		 the forest of its tree.  */
956 	      forest = NULL;
957 	      for (chain = head->first; chain; chain = chain->next_use)
958 		{
959 		  fma_node **fma_slot;
960 
961 		  if (!is_fmul_fmac_insn (chain->insn, true))
962 		    continue;
963 
964 		  /* This is a use, continue.  */
965 		  if (chain->loc != &SET_DEST (PATTERN (chain->insn)))
966 		    continue;
967 
968 		  if (chain->insn == insn)
969 		    break;
970 
971 		  fma_slot = this->m_insn_fma_head_map->get (chain->insn);
972 		  if (fma_slot && (*fma_slot)->get_children ())
973 		    forest = (*fma_slot)->get_forest ();
974 		}
975 	      if (chain)
976 		break;
977 	    }
978 
979 	  /* Due to implementation of regrename, dest register can slip away
980 	     from regrename's analysis.  As a result, there is no chain for
981 	     the destination register of insn.  We simply skip the insn even
982 	     it is a fmul/fmac instruction.  This can happen when the dest
983 	     register is also a source register of insn and one of the below
984 	     conditions is satisfied:
985 	       1) the source reg is setup in larger mode than this insn;
986 	       2) the source reg is uninitialized;
987 	       3) the source reg is passed in as parameter.  */
988 	  if (i < dest_op_info->n_chains)
989 	    this->analyze_fma_fmul_insn (forest, chain, head);
990 	}
991     }
992   free (bb_dfs_preorder);
993 
994   if (dump_file)
995     this->dfs (dump_forest_info, dump_tree_root_info, dump_tree_node_info,
996 	       false);
997 }
998 
999 /* Perform the renaming of all chains with FMUL or FMADD/FMSUB involved with
1000    the objective of keeping FPU pipeline balanced in term of instructions and
1001    having FMADD/FMSUB with dependencies on previous FMUL or FMADD/FMSUB be
1002    scheduled on the same pipeline.  */
1003 
1004 void
rename_fma_trees()1005 func_fma_steering::rename_fma_trees ()
1006 {
1007   this->dfs (dispatch_forest, NULL, rename_fma_node, true);
1008 
1009   if (dump_file && !this->m_fma_forests.empty ())
1010     {
1011       fprintf (dump_file, "Function %s has ", current_function_name ());
1012       if (this->m_fpu_balance == 0)
1013 	fprintf (dump_file, "perfect balance of FMUL/FMA chains between the "
1014 		 "two FPU pipelines\n");
1015       else if (this->m_fpu_balance > 0)
1016 	fprintf (dump_file, "%d more FMUL/FMA chains scheduled on the second "
1017 		 "FPU pipeline\n", this->m_fpu_balance);
1018       else /* this->m_fpu_balance < 0 */
1019 	fprintf (dump_file, "%d more FMUL/FMA chains scheduled on the first "
1020 		 "FPU pipeline\n", - this->m_fpu_balance);
1021     }
1022 }
1023 
1024 /* Execute FMA steering pass.  */
1025 
1026 void
execute_fma_steering()1027 func_fma_steering::execute_fma_steering ()
1028 {
1029   df_set_flags (DF_LR_RUN_DCE);
1030   df_note_add_problem ();
1031   df_analyze ();
1032   df_set_flags (DF_DEFER_INSN_RESCAN);
1033 
1034   regrename_init (true);
1035   regrename_analyze (NULL);
1036   this->analyze ();
1037   this->rename_fma_trees ();
1038   regrename_finish ();
1039 }
1040 
1041 const pass_data pass_data_fma_steering =
1042 {
1043   RTL_PASS, /* type */
1044   "fma_steering", /* name */
1045   OPTGROUP_NONE, /* optinfo_flags */
1046   TV_NONE, /* tv_id */
1047   0, /* properties_required */
1048   0, /* properties_provided */
1049   0, /* properties_destroyed */
1050   0, /* todo_flags_start */
1051   TODO_df_finish, /* todo_flags_finish */
1052 };
1053 
1054 class pass_fma_steering : public rtl_opt_pass
1055 {
1056 public:
pass_fma_steering(gcc::context * ctxt)1057   pass_fma_steering (gcc::context *ctxt)
1058     : rtl_opt_pass (pass_data_fma_steering, ctxt)
1059   {}
1060 
1061   /* opt_pass methods: */
gate(function *)1062   virtual bool gate (function *)
1063     {
1064       return (aarch64_tune_params.extra_tuning_flags
1065 	      & AARCH64_EXTRA_TUNE_RENAME_FMA_REGS)
1066 	      && optimize >= 2;
1067     }
1068 
execute(function *)1069   virtual unsigned int execute (function *)
1070     {
1071       func_fma_steering *fma_steering = new func_fma_steering;
1072       fma_steering->execute_fma_steering ();
1073       delete fma_steering;
1074       return 0;
1075     }
1076 
1077 }; // class pass_fma_steering
1078 
1079 /* Create a new fma steering pass instance.  */
1080 
1081 rtl_opt_pass *
make_pass_fma_steering(gcc::context * ctxt)1082 make_pass_fma_steering (gcc::context *ctxt)
1083 {
1084   return new pass_fma_steering (ctxt);
1085 }
1086