1 /* FMA steering optimization pass for Cortex-A57.
2 Copyright (C) 2015-2020 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #define IN_TARGET_CODE 1
22
23 #include "config.h"
24 #define INCLUDE_LIST
25 #include "system.h"
26 #include "coretypes.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "df.h"
31 #include "insn-config.h"
32 #include "regs.h"
33 #include "memmodel.h"
34 #include "emit-rtl.h"
35 #include "recog.h"
36 #include "cfganal.h"
37 #include "insn-attr.h"
38 #include "context.h"
39 #include "tree-pass.h"
40 #include "function-abi.h"
41 #include "regrename.h"
42 #include "aarch64-protos.h"
43
44 /* For better performance, the destination of FMADD/FMSUB instructions should
45 have the same parity as their accumulator register if the accumulator
46 contains the result of a previous FMUL or FMADD/FMSUB instruction if
47 targetting Cortex-A57 processors. Performance is also increased by
48 otherwise keeping a good balance in the parity of the destination register
49 of FMUL or FMADD/FMSUB.
50
51 This pass ensure that registers are renamed so that these conditions hold.
52 We reuse the existing register renaming facility from regrename.c to build
53 dependency chains and expose candidate registers for renaming.
54
55
56 The algorithm has three steps:
57
58 First, the functions of the register renaming pass are called. These
59 analyze the instructions and produce a list of def/use chains of
60 instructions.
61
62 Next, this information is used to build trees of multiply and
63 multiply-accumulate instructions. The roots of these trees are any
64 multiply, or any multiply-accumulate whose accumulator is not dependent on
65 a multiply or multiply-accumulate instruction. A child is added to the
66 tree where a dependency chain exists between the result of the parent
67 instruction and the accumulator operand of the child, as in the diagram
68 below:
69
70 fmul s2, s0, s1
71 / \
72 fmadd s0, s1, s1, s2 fmadd s4, s1, s1 s2
73 |
74 fmadd s3, s1, s1, s0
75
76 Trees made of a single instruction are permitted.
77
78 Finally, renaming is performed. The parity of the destination register at
79 the root of a tree is checked against the current balance of multiply and
80 multiply-accumulate on each pipeline. If necessary, the root of a tree is
81 renamed, in which case the rest of the tree is then renamed to keep the same
82 parity in the destination registers of all instructions in the tree. */
83
84
85
86 /* Forward declarations. */
87 class fma_node;
88 class fma_root_node;
89 class func_fma_steering;
90
91 /* Dependencies between FMUL or FMADD/FMSUB instructions and subsequent
92 FMADD/FMSUB instructions form a graph. This is because alternatives can
93 make a register be set by several FMUL or FMADD/FMSUB instructions in
94 different basic blocks and because of loops. For ease of browsing, the
95 connected components of this graph are broken up into forests of trees.
96 Forests are represented by fma_forest objects, contained in the fma_forests
97 list. Using a separate object for the forests allows for a better use of
98 memory as there is some information that is global to each forest, such as
99 the number of FMSUB and FMADD/FMSUB instructions currently scheduled on each
100 floating-point execution pipelines. */
101
102 class fma_forest
103 {
104 public:
105 fma_forest (func_fma_steering *, fma_root_node *, int);
106 ~fma_forest ();
107
108 int get_id ();
109 std::list<fma_root_node *> *get_roots ();
110 func_fma_steering *get_globals ();
111 int get_target_parity ();
112 void fma_node_created (fma_node *);
113 void merge_forest (fma_forest *);
114 void dump_info ();
115 void dispatch ();
116
117 private:
118 /* Prohibit copy construction. */
119 fma_forest (const fma_forest &);
120
121 /* The list of roots that form this forest. */
122 std::list<fma_root_node *> *m_roots;
123
124 /* Target parity the destination register of all FMUL and FMADD/FMSUB
125 instructions in this forest should have. */
126 int m_target_parity;
127
128 /* Link to the instance of func_fma_steering holding data related to the
129 FMA steering of the current function (cfun). */
130 func_fma_steering *m_globals;
131
132 /* Identifier for the forest (used for dumps). */
133 int m_id;
134
135 /* Total number of nodes in the forest (for statistics). */
136 int m_nb_nodes;
137 };
138
139 class fma_node
140 {
141 public:
142 fma_node (fma_node *parent, du_chain *chain);
143 ~fma_node ();
144
145 bool root_p ();
146 fma_forest *get_forest ();
147 std::list<fma_node *> *get_children ();
148 rtx_insn *get_insn ();
149 void add_child (fma_node *);
150 int get_parity ();
151 void set_head (du_head *);
152 void rename (fma_forest *);
153 void dump_info (fma_forest *);
154
155 private:
156 /* Prohibit copy construction. */
157 fma_node (const fma_node &);
158
159 protected:
160 /* Root node that lead to this node. */
161 fma_root_node *m_root;
162
163 /* The parent node of this node. If the node belong to a chain with several
164 parent nodes, the first one encountered in a depth-first search is chosen
165 as canonical parent. */
166 fma_node *m_parent;
167
168 /* The list of child nodes. If a chain contains several parent nodes, one is
169 chosen as canonical parent and the others will have no children. */
170 std::list<fma_node *> *m_children;
171
172 /* The associated DU_HEAD chain that the insn represented by this object
173 is (one of) the root of. When a chain contains several roots, the non
174 canonical ones have this field set to NULL. */
175 struct du_head *m_head;
176
177 /* The FMUL or FMADD/FMSUB instruction this object corresponds to. */
178 rtx_insn *m_insn;
179 };
180
181 class fma_root_node : public fma_node
182 {
183 public:
184 fma_root_node (func_fma_steering *, du_chain *, int);
185
186 fma_forest *get_forest ();
187 void set_forest (fma_forest *);
188 void dump_info (fma_forest *);
189
190 private:
191 /* The forest this node belonged to when it was created. */
192 fma_forest *m_forest;
193 };
194
195 /* Class holding all data and methods relative to the FMA steering of a given
196 function. The FMA steering pass could then run in parallel for different
197 functions. */
198
199 class func_fma_steering
200 {
201 public:
202 func_fma_steering ();
203 ~func_fma_steering ();
204
205 int get_fpu_balance ();
206 void remove_forest (fma_forest *);
207 bool put_node (fma_node *);
208 void update_balance (int);
209 fma_node *get_fma_node (rtx_insn *);
210 void analyze_fma_fmul_insn (fma_forest *, du_chain *, du_head_p);
211 void execute_fma_steering ();
212
213 private:
214 /* Prohibit copy construction. */
215 func_fma_steering (const func_fma_steering &);
216
217 void dfs (void (*) (fma_forest *), void (*) (fma_forest *, fma_root_node *),
218 void (*) (fma_forest *, fma_node *), bool);
219 void analyze ();
220 void rename_fma_trees ();
221
222 /* Mapping between FMUL or FMADD/FMSUB instructions and the associated
223 fma_node object. Used when analyzing an instruction that is a root of
224 a chain to find if such an object was created because this instruction
225 is also a use in another chain. */
226 hash_map<rtx_insn *, fma_node *> *m_insn_fma_head_map;
227
228 /* A list of all the forests in a given function. */
229 std::list<fma_forest *> m_fma_forests;
230
231 /* Balance of FMUL and FMADD/FMSUB instructions between the two FPU
232 pipelines:
233 < 0: more instruction dispatched to the first pipeline
234 == 0: perfect balance
235 > 0: more instruction dispatched to the second pipeline. */
236 int m_fpu_balance;
237
238 /* Identifier for the next forest created. */
239 int m_next_forest_id;
240 };
241
242 /* Rename the register HEAD->regno in all the insns in the chain HEAD to any
243 register not in the set UNAVAILABLE. Adapted from rename_chains in
244 regrename.c. */
245
246 static bool
rename_single_chain(du_head_p head,HARD_REG_SET * unavailable)247 rename_single_chain (du_head_p head, HARD_REG_SET *unavailable)
248 {
249 int best_new_reg;
250 int n_uses = 0;
251 struct du_chain *tmp;
252 int reg = head->regno;
253 enum reg_class super_class = NO_REGS;
254
255 if (head->cannot_rename)
256 return false;
257
258 if (fixed_regs[reg] || global_regs[reg]
259 || (frame_pointer_needed && reg == HARD_FRAME_POINTER_REGNUM))
260 return false;
261
262 /* Iterate over elements in the chain in order to:
263 1. Count number of uses, and narrow the set of registers we can
264 use for renaming.
265 2. Compute the superunion of register classes in this chain. */
266 for (tmp = head->first; tmp; tmp = tmp->next_use)
267 {
268 if (DEBUG_INSN_P (tmp->insn))
269 continue;
270 n_uses++;
271 *unavailable |= ~reg_class_contents[tmp->cl];
272 super_class = reg_class_superunion[(int) super_class][(int) tmp->cl];
273 }
274
275 if (n_uses < 1)
276 return false;
277
278 best_new_reg = find_rename_reg (head, super_class, unavailable, reg,
279 false);
280
281 if (dump_file)
282 {
283 fprintf (dump_file, "Register %s in insn %d", reg_names[reg],
284 INSN_UID (head->first->insn));
285 if (head->call_abis)
286 fprintf (dump_file, " crosses a call");
287 }
288
289 if (best_new_reg == reg)
290 {
291 if (dump_file)
292 fprintf (dump_file, "; no available better choice\n");
293 return false;
294 }
295
296 if (regrename_do_replace (head, best_new_reg))
297 {
298 if (dump_file)
299 fprintf (dump_file, ", renamed as %s\n", reg_names[best_new_reg]);
300 df_set_regs_ever_live (best_new_reg, true);
301 }
302 else
303 {
304 if (dump_file)
305 fprintf (dump_file, ", renaming as %s failed\n",
306 reg_names[best_new_reg]);
307 return false;
308 }
309 return true;
310 }
311
312 /* Return whether T is the attribute of a FMADD/FMSUB-like instruction. */
313
314 static bool
is_fmac_op(enum attr_type t)315 is_fmac_op (enum attr_type t)
316 {
317 return (t == TYPE_FMACS) || (t == TYPE_FMACD) || (t == TYPE_NEON_FP_MLA_S);
318 }
319
320 /* Return whether T is the attribute of a FMUL instruction. */
321
322 static bool
is_fmul_op(enum attr_type t)323 is_fmul_op (enum attr_type t)
324 {
325 return (t == TYPE_FMULS) || (t == TYPE_FMULD) || (t == TYPE_NEON_FP_MUL_S);
326 }
327
328 /* Return whether INSN is an FMUL (if FMUL_OK is true) or FMADD/FMSUB
329 instruction. */
330
331 static bool
is_fmul_fmac_insn(rtx_insn * insn,bool fmul_ok)332 is_fmul_fmac_insn (rtx_insn *insn, bool fmul_ok)
333 {
334 enum attr_type t;
335
336 if (!NONDEBUG_INSN_P (insn))
337 return false;
338
339 if (recog_memoized (insn) < 0)
340 return false;
341
342 /* Only consider chain(s) this instruction is a root of if this is an FMUL or
343 FMADD/FMSUB instruction. This allows to avoid browsing chains of all
344 instructions for FMUL or FMADD/FMSUB in them. */
345 t = get_attr_type (insn);
346 return is_fmac_op (t) || (fmul_ok && is_fmul_op (t));
347 }
348
349
350 /*
351 * Class fma_forest method definitions.
352 */
353
fma_forest(func_fma_steering * fma_steer,fma_root_node * fma_root,int id)354 fma_forest::fma_forest (func_fma_steering *fma_steer, fma_root_node *fma_root,
355 int id)
356 {
357 memset (this, 0, sizeof (*this));
358 this->m_globals = fma_steer;
359 this->m_roots = new std::list<fma_root_node *>;
360 this->m_roots->push_back (fma_root);
361 this->m_id = id;
362 }
363
~fma_forest()364 fma_forest::~fma_forest ()
365 {
366 delete this->m_roots;
367 }
368
369 int
get_id()370 fma_forest::get_id ()
371 {
372 return this->m_id;
373 }
374
375 std::list<fma_root_node *> *
get_roots()376 fma_forest::get_roots ()
377 {
378 return this->m_roots;
379 }
380
381 func_fma_steering *
get_globals()382 fma_forest::get_globals ()
383 {
384 return this->m_globals;
385 }
386
387 int
get_target_parity()388 fma_forest::get_target_parity ()
389 {
390 return this->m_target_parity;
391 }
392
393 /* Act on the creation of NODE by updating statistics in FOREST and adding an
394 entry for it in the func_fma_steering hashmap. */
395
fma_node_created(fma_node * node)396 void fma_forest::fma_node_created (fma_node *node)
397 {
398 bool created = !this->m_globals->put_node (node);
399
400 gcc_assert (created);
401 this->m_nb_nodes++;
402 }
403
404 /* Merge REF_FOREST and OTHER_FOREST together, making REF_FOREST the canonical
405 fma_forest object to represent both. */
406
407 void
merge_forest(fma_forest * other_forest)408 fma_forest::merge_forest (fma_forest *other_forest)
409 {
410 std::list<fma_root_node *> *other_roots;
411 std::list<fma_root_node *>::iterator other_root_iter;
412
413 if (this == other_forest)
414 return;
415
416 other_roots = other_forest->m_roots;
417
418 /* Update root nodes' pointer to forest. */
419 for (other_root_iter = other_roots->begin ();
420 other_root_iter != other_roots->end (); ++other_root_iter)
421 (*other_root_iter)->set_forest (this);
422
423 /* Remove other_forest from the list of forests and move its tree roots in
424 the list of tree roots of ref_forest. */
425 this->m_globals->remove_forest (other_forest);
426 this->m_roots->splice (this->m_roots->begin (), *other_roots);
427 this->m_nb_nodes += other_forest->m_nb_nodes;
428
429 delete other_forest;
430 }
431
432 /* Dump information about the forest FOREST. */
433
434 void
dump_info()435 fma_forest::dump_info ()
436 {
437 gcc_assert (dump_file);
438
439 fprintf (dump_file, "Forest #%d has %d nodes\n", this->m_id,
440 this->m_nb_nodes);
441 }
442
443 /* Wrapper around fma_forest::dump_info for use as parameter of function
444 pointer type in func_fma_steering::dfs. */
445
446 static void
dump_forest_info(fma_forest * forest)447 dump_forest_info (fma_forest *forest)
448 {
449 forest->dump_info ();
450 }
451
452 /* Dispatch forest to the least utilized pipeline. */
453
454 void
dispatch()455 fma_forest::dispatch ()
456 {
457 this->m_target_parity = this->m_roots->front ()->get_parity ();
458 int fpu_balance = this->m_globals->get_fpu_balance ();
459 if (fpu_balance != 0)
460 this->m_target_parity = (fpu_balance < 0);
461
462 if (dump_file)
463 fprintf (dump_file, "Target parity for forest #%d: %s\n", this->m_id,
464 this->m_target_parity ? "odd" : "even");
465 }
466
467 /* Wrapper around fma_forest::dispatch for use as parameter of function pointer
468 type in func_fma_steering::dfs. */
469
470 static void
dispatch_forest(fma_forest * forest)471 dispatch_forest (fma_forest *forest)
472 {
473 forest->dispatch ();
474 }
475
fma_node(fma_node * parent,du_chain * chain)476 fma_node::fma_node (fma_node *parent, du_chain *chain)
477 {
478 memset (this, 0, sizeof (*this));
479 this->m_parent = parent;
480 this->m_children = new std::list<fma_node *>;
481 this->m_insn = chain->insn;
482 /* root_p () cannot be used to check for root before root is set. */
483 if (this->m_parent == this)
484 this->m_root = static_cast<fma_root_node *> (parent);
485 else
486 {
487 this->m_root = parent->m_root;
488 this->get_forest ()->fma_node_created (this);
489 }
490 }
491
~fma_node()492 fma_node::~fma_node ()
493 {
494 delete this->m_children;
495 }
496
497 std::list<fma_node *> *
get_children()498 fma_node::get_children ()
499 {
500 return this->m_children;
501 }
502
503 rtx_insn *
get_insn()504 fma_node::get_insn ()
505 {
506 return this->m_insn;
507 }
508
509 void
set_head(du_head * head)510 fma_node::set_head (du_head *head)
511 {
512 gcc_assert (!this->m_head);
513 this->m_head = head;
514 }
515
516 /* Add a child to this node in the list of children. */
517
518 void
add_child(fma_node * child)519 fma_node::add_child (fma_node *child)
520 {
521 this->m_children->push_back (child);
522 }
523
524 /* Return the parity of the destination register of the instruction represented
525 by this node. */
526
527 int
get_parity()528 fma_node::get_parity ()
529 {
530 return this->m_head->regno % 2;
531 }
532
533 /* Get the actual forest associated with a non root node as the one the node
534 points to might have been merged into another one. In that case the pointer
535 in the root nodes are updated so we return the forest pointer of a root node
536 pointed to by the initial forest. Despite being a oneliner, this method is
537 defined here as it references a method from fma_root_node. */
538
539 fma_forest *
get_forest()540 fma_node::get_forest ()
541 {
542 return this->m_root->get_forest ();
543 }
544
545 /* Return whether a node is a root node. */
546
547 bool
root_p()548 fma_node::root_p ()
549 {
550 return this->m_root == this;
551 }
552
553 /* Dump information about the children of node FMA_NODE in forest FOREST. */
554
555 void
dump_info(ATTRIBUTE_UNUSED fma_forest * forest)556 fma_node::dump_info (ATTRIBUTE_UNUSED fma_forest *forest)
557 {
558 struct du_chain *chain;
559 std::list<fma_node *>::iterator fma_child;
560
561 gcc_assert (dump_file);
562
563 if (this->get_children ()->empty ())
564 return;
565
566 fprintf (dump_file, "Instruction(s)");
567 for (chain = this->m_head->first; chain; chain = chain->next_use)
568 {
569 if (!is_fmul_fmac_insn (chain->insn, true))
570 continue;
571
572 if (chain->loc != &SET_DEST (PATTERN (chain->insn)))
573 continue;
574
575 fprintf (dump_file, " %d", INSN_UID (chain->insn));
576 }
577
578 fprintf (dump_file, " is(are) accumulator dependency of instructions");
579 for (fma_child = this->get_children ()->begin ();
580 fma_child != this->get_children ()->end (); fma_child++)
581 fprintf (dump_file, " %d", INSN_UID ((*fma_child)->m_insn));
582 fprintf (dump_file, "\n");
583 }
584
585 /* Wrapper around fma_node::dump_info for use as parameter of function pointer
586 type in func_fma_steering::dfs. */
587
588 static void
dump_tree_node_info(fma_forest * forest,fma_node * node)589 dump_tree_node_info (fma_forest *forest, fma_node *node)
590 {
591 node->dump_info (forest);
592 }
593
594 /* Rename the destination register of a single FMUL or FMADD/FMSUB instruction
595 represented by FMA_NODE to a register that respect the target parity for
596 FOREST or with same parity of the instruction represented by its parent node
597 if it has one. */
598
599 void
rename(fma_forest * forest)600 fma_node::rename (fma_forest *forest)
601 {
602 int cur_parity, target_parity;
603
604 /* This is alternate root of a chain and thus has no children. It will be
605 renamed when processing the canonical root for that chain. */
606 if (!this->m_head)
607 return;
608
609 target_parity = forest->get_target_parity ();
610 if (this->m_parent)
611 target_parity = this->m_parent->get_parity ();
612 cur_parity = this->get_parity ();
613
614 /* Rename if parity differs. */
615 if (cur_parity != target_parity)
616 {
617 rtx_insn *insn = this->m_insn;
618 HARD_REG_SET unavailable;
619 machine_mode mode;
620 int reg;
621
622 if (dump_file)
623 {
624 unsigned cur_dest_reg = this->m_head->regno;
625
626 fprintf (dump_file, "FMA or FMUL at insn %d but destination "
627 "register (%s) has different parity from expected to "
628 "maximize FPU pipeline utilization\n", INSN_UID (insn),
629 reg_names[cur_dest_reg]);
630 }
631
632 /* Don't clobber traceback for noreturn functions. */
633 CLEAR_HARD_REG_SET (unavailable);
634 if (frame_pointer_needed)
635 {
636 add_to_hard_reg_set (&unavailable, Pmode, FRAME_POINTER_REGNUM);
637 add_to_hard_reg_set (&unavailable, Pmode, HARD_FRAME_POINTER_REGNUM);
638 }
639
640 /* Exclude registers with wrong parity. */
641 mode = GET_MODE (SET_DEST (PATTERN (insn)));
642 for (reg = cur_parity; reg < FIRST_PSEUDO_REGISTER; reg += 2)
643 add_to_hard_reg_set (&unavailable, mode, reg);
644
645 if (!rename_single_chain (this->m_head, &unavailable))
646 {
647 if (dump_file)
648 fprintf (dump_file, "Destination register of insn %d could not be "
649 "renamed. Dependent FMA insns will use this parity from "
650 "there on.\n", INSN_UID (insn));
651 }
652 else
653 cur_parity = target_parity;
654 }
655
656 forest->get_globals ()->update_balance (cur_parity);
657 }
658
659 /* Wrapper around fma_node::dump_info for use as parameter of function pointer
660 type in func_fma_steering::dfs. */
661
662 static void
rename_fma_node(fma_forest * forest,fma_node * node)663 rename_fma_node (fma_forest *forest, fma_node *node)
664 {
665 node->rename (forest);
666 }
667
fma_root_node(func_fma_steering * globals,du_chain * chain,int id)668 fma_root_node::fma_root_node (func_fma_steering *globals, du_chain *chain,
669 int id) : fma_node (this, chain)
670 {
671 this->m_forest = new fma_forest (globals, this, id);
672 this->m_forest->fma_node_created (this);
673 }
674
675 fma_forest *
get_forest()676 fma_root_node::get_forest ()
677 {
678 return this->m_forest;
679 }
680
681 void
set_forest(fma_forest * ref_forest)682 fma_root_node::set_forest (fma_forest *ref_forest)
683 {
684 this->m_forest = ref_forest;
685 }
686
687 /* Dump information about the roots of forest FOREST. */
688
689 void
dump_info(fma_forest * forest)690 fma_root_node::dump_info (fma_forest *forest)
691 {
692 gcc_assert (dump_file);
693
694 if (this == forest->get_roots ()->front ())
695 fprintf (dump_file, "Instruction(s) at root of forest #%d:",
696 forest->get_id ());
697 fprintf (dump_file, " %d", INSN_UID (this->m_insn));
698 if (this == forest->get_roots ()->back ())
699 fprintf (dump_file, "\n");
700 }
701
702 /* Wrapper around fma_root_node::dump_info for use as parameter of function
703 pointer type in func_fma_steering::dfs. */
704
705 static void
dump_tree_root_info(fma_forest * forest,fma_root_node * node)706 dump_tree_root_info (fma_forest *forest, fma_root_node *node)
707 {
708 node->dump_info (forest);
709 }
710
func_fma_steering()711 func_fma_steering::func_fma_steering () : m_fpu_balance (0)
712 {
713 this->m_insn_fma_head_map = new hash_map<rtx_insn *, fma_node *>;
714 this->m_fma_forests.clear ();
715 this->m_next_forest_id = 0;
716 }
717
~func_fma_steering()718 func_fma_steering::~func_fma_steering ()
719 {
720 delete this->m_insn_fma_head_map;
721 }
722
723 int
get_fpu_balance()724 func_fma_steering::get_fpu_balance ()
725 {
726 return this->m_fpu_balance;
727 }
728
729 void
remove_forest(fma_forest * forest)730 func_fma_steering::remove_forest (fma_forest *forest)
731 {
732 this->m_fma_forests.remove (forest);
733 }
734
735 /* Memorize the mapping of this instruction to its fma_node object and return
736 whether such a mapping existed. */
737
738 bool
put_node(fma_node * node)739 func_fma_steering::put_node (fma_node *node)
740 {
741 return this->m_insn_fma_head_map->put (node->get_insn (), node);
742 }
743
744 /* Update the current balance considering a node with the given PARITY. */
745
746 void
update_balance(int parity)747 func_fma_steering::update_balance (int parity)
748 {
749 this->m_fpu_balance = parity ? this->m_fpu_balance + 1
750 : this->m_fpu_balance - 1;
751 }
752
753 /* Return whether an fma_node object exists for instruction INSN and, if not,
754 allocate one in *RET. */
755
756 fma_node *
get_fma_node(rtx_insn * insn)757 func_fma_steering::get_fma_node (rtx_insn *insn)
758 {
759 fma_node **fma_slot;
760
761 fma_slot = this->m_insn_fma_head_map->get (insn);
762 if (fma_slot)
763 return *fma_slot;
764 return NULL;
765 }
766
767 /* Allocate and initialize fma_node objects for the FMUL or FMADD/FMSUB
768 instruction in CHAIN->insn and its dependent FMADD/FMSUB instructions, all
769 part of FOREST. For the children, the associated head is left untouched
770 (and thus null) as this function will be called again when considering the
771 chain where they are def. For the parent, the chain is given in HEAD. */
772
773 void
analyze_fma_fmul_insn(fma_forest * ref_forest,du_chain * chain,du_head_p head)774 func_fma_steering::analyze_fma_fmul_insn (fma_forest *ref_forest,
775 du_chain *chain, du_head_p head)
776 {
777 fma_forest *forest;
778 fma_node *node = this->get_fma_node (chain->insn);
779
780 /* This is a root node. */
781 if (!node)
782 {
783 fma_root_node *root_node;
784
785 root_node = new fma_root_node (this, chain, this->m_next_forest_id++);
786 forest = root_node->get_forest ();
787 node = root_node;
788
789 /* Until proved otherwise, assume this root is not part of an existing
790 forest and thus add its forest to the list of forests. */
791 this->m_fma_forests.push_back (forest);
792 }
793 else
794 forest = node->get_forest ();
795
796 node->set_head (head);
797
798 /* fma_node is part of a chain with several defs, one of them having already
799 been processed. The root of that already processed def is the canonical
800 one and the root of fma_node is added to its forest. No need to process
801 the children nodes as they were already processed when the other def was
802 processed. */
803 if (ref_forest)
804 {
805 ref_forest->merge_forest (forest);
806 return;
807 }
808
809 for (chain = head->first; chain; chain = chain->next_use)
810 {
811 fma_node *child_fma;
812 rtx fma_rtx, *accum_rtx_p;
813
814 if (!is_fmul_fmac_insn (chain->insn, false))
815 continue;
816
817 /* Get FMA rtx. */
818 fma_rtx = SET_SRC (PATTERN (chain->insn));
819 /* FMA is negated. */
820 if (GET_CODE (fma_rtx) == NEG)
821 fma_rtx = XEXP (fma_rtx, 0);
822 /* Get accumulator rtx. */
823 accum_rtx_p = &XEXP (fma_rtx, 2);
824 /* Accumulator is negated. */
825 if (!REG_P (*accum_rtx_p))
826 accum_rtx_p = &XEXP (*accum_rtx_p, 0);
827
828 /* This du_chain structure is not for the accumulator register. */
829 if (accum_rtx_p != chain->loc)
830 continue;
831
832 /* If object already created, this is a loop carried dependency. We
833 don't include this object in the children as we want trees for
834 rename_fma_trees to not be an infinite loop. */
835 if (this->get_fma_node (chain->insn))
836 continue;
837
838 child_fma = new fma_node (node, chain);
839
840 /* Memorize the mapping of this instruction to its fma_node object
841 as it will be processed for the chain starting at its destination
842 register later. */
843
844 /* Link to siblings. */
845 node->add_child (child_fma);
846 }
847 }
848
849 /* Perform a depth-first search of the forests of fma_node in
850 THIS->m_fma_forests, calling PROCESS_FOREST () on each fma_forest object in
851 THIS->m_fma_forests list, PROCESS_ROOT () on each tree root and
852 PROCESS_NODE () on each node. If FREE is true, free all std::list in the
853 same dfs. */
854
855 void
dfs(void (* process_forest)(fma_forest *),void (* process_root)(fma_forest *,fma_root_node *),void (* process_node)(fma_forest *,fma_node *),bool free)856 func_fma_steering::dfs (void (*process_forest) (fma_forest *),
857 void (*process_root) (fma_forest *, fma_root_node *),
858 void (*process_node) (fma_forest *, fma_node *),
859 bool free)
860 {
861 auto_vec<fma_node *> to_process;
862 auto_vec<fma_node *> to_free;
863 std::list<fma_forest *>::iterator forest_iter;
864
865 /* For each forest. */
866 for (forest_iter = this->m_fma_forests.begin ();
867 forest_iter != this->m_fma_forests.end (); ++forest_iter)
868 {
869 std::list<fma_root_node *>::iterator root_iter;
870
871 if (process_forest)
872 process_forest (*forest_iter);
873
874 /* For each tree root in this forest. */
875 for (root_iter = (*forest_iter)->get_roots ()->begin ();
876 root_iter != (*forest_iter)->get_roots ()->end (); ++root_iter)
877 {
878 if (process_root)
879 process_root (*forest_iter, *root_iter);
880 to_process.safe_push (*root_iter);
881 }
882
883 /* For each tree node in this forest. */
884 while (!to_process.is_empty ())
885 {
886 fma_node *node;
887 std::list<fma_node *>::iterator child_iter;
888
889 node = to_process.pop ();
890
891 if (process_node)
892 process_node (*forest_iter, node);
893
894 for (child_iter = node->get_children ()->begin ();
895 child_iter != node->get_children ()->end (); ++child_iter)
896 to_process.safe_push (*child_iter);
897
898 /* Defer freeing so that the process_node callback can access the
899 parent and children of the node being processed. */
900 if (free)
901 to_free.safe_push (node);
902 }
903
904 if (free)
905 {
906 delete *forest_iter;
907
908 while (!to_free.is_empty ())
909 {
910 fma_node *node = to_free.pop ();
911 if (node->root_p ())
912 delete static_cast<fma_root_node *> (node);
913 else
914 delete node;
915 }
916 }
917 }
918 }
919
920 /* Build the dependency trees of FMUL and FMADD/FMSUB instructions. */
921
922 void
analyze()923 func_fma_steering::analyze ()
924 {
925 int i, n_blocks, *bb_dfs_preorder;
926 basic_block bb;
927 rtx_insn *insn;
928
929 bb_dfs_preorder = XNEWVEC (int, last_basic_block_for_fn (cfun));
930 n_blocks = pre_and_rev_post_order_compute (bb_dfs_preorder, NULL, false);
931
932 /* Browse the graph of basic blocks looking for FMUL or FMADD/FMSUB
933 instructions. */
934 for (i = 0; i < n_blocks; i++)
935 {
936 bb = BASIC_BLOCK_FOR_FN (cfun, bb_dfs_preorder[i]);
937 FOR_BB_INSNS (bb, insn)
938 {
939 operand_rr_info *dest_op_info;
940 struct du_chain *chain = NULL;
941 unsigned dest_regno;
942 fma_forest *forest = NULL;
943 du_head_p head = NULL;
944 int i;
945
946 if (!is_fmul_fmac_insn (insn, true))
947 continue;
948
949 /* Search the chain where this instruction is (one of) the root. */
950 dest_op_info = insn_rr[INSN_UID (insn)].op_info;
951 dest_regno = REGNO (SET_DEST (PATTERN (insn)));
952 for (i = 0; i < dest_op_info->n_chains; i++)
953 {
954 /* The register tracked by this chain does not match the
955 destination register of insn. */
956 if (dest_op_info->heads[i]->regno != dest_regno)
957 continue;
958
959 head = dest_op_info->heads[i];
960 /* The chain was merged in another, find the new head. */
961 if (!head->first)
962 head = regrename_chain_from_id (head->id);
963
964 /* Search the chain element for this instruction and, if another
965 FMUL or FMADD/FMSUB instruction was already processed, note
966 the forest of its tree. */
967 forest = NULL;
968 for (chain = head->first; chain; chain = chain->next_use)
969 {
970 fma_node **fma_slot;
971
972 if (!is_fmul_fmac_insn (chain->insn, true))
973 continue;
974
975 /* This is a use, continue. */
976 if (chain->loc != &SET_DEST (PATTERN (chain->insn)))
977 continue;
978
979 if (chain->insn == insn)
980 break;
981
982 fma_slot = this->m_insn_fma_head_map->get (chain->insn);
983 if (fma_slot && (*fma_slot)->get_children ())
984 forest = (*fma_slot)->get_forest ();
985 }
986 if (chain)
987 break;
988 }
989
990 /* Due to implementation of regrename, dest register can slip away
991 from regrename's analysis. As a result, there is no chain for
992 the destination register of insn. We simply skip the insn even
993 it is a fmul/fmac instruction. This can happen when the dest
994 register is also a source register of insn and one of the below
995 conditions is satisfied:
996 1) the source reg is setup in larger mode than this insn;
997 2) the source reg is uninitialized;
998 3) the source reg is passed in as parameter. */
999 if (i < dest_op_info->n_chains)
1000 this->analyze_fma_fmul_insn (forest, chain, head);
1001 }
1002 }
1003 free (bb_dfs_preorder);
1004
1005 if (dump_file)
1006 this->dfs (dump_forest_info, dump_tree_root_info, dump_tree_node_info,
1007 false);
1008 }
1009
1010 /* Perform the renaming of all chains with FMUL or FMADD/FMSUB involved with
1011 the objective of keeping FPU pipeline balanced in term of instructions and
1012 having FMADD/FMSUB with dependencies on previous FMUL or FMADD/FMSUB be
1013 scheduled on the same pipeline. */
1014
1015 void
rename_fma_trees()1016 func_fma_steering::rename_fma_trees ()
1017 {
1018 this->dfs (dispatch_forest, NULL, rename_fma_node, true);
1019
1020 if (dump_file && !this->m_fma_forests.empty ())
1021 {
1022 fprintf (dump_file, "Function %s has ", current_function_name ());
1023 if (this->m_fpu_balance == 0)
1024 fprintf (dump_file, "perfect balance of FMUL/FMA chains between the "
1025 "two FPU pipelines\n");
1026 else if (this->m_fpu_balance > 0)
1027 fprintf (dump_file, "%d more FMUL/FMA chains scheduled on the second "
1028 "FPU pipeline\n", this->m_fpu_balance);
1029 else /* this->m_fpu_balance < 0 */
1030 fprintf (dump_file, "%d more FMUL/FMA chains scheduled on the first "
1031 "FPU pipeline\n", - this->m_fpu_balance);
1032 }
1033 }
1034
1035 /* Execute FMA steering pass. */
1036
1037 void
execute_fma_steering()1038 func_fma_steering::execute_fma_steering ()
1039 {
1040 df_set_flags (DF_LR_RUN_DCE);
1041 df_note_add_problem ();
1042 df_analyze ();
1043 df_set_flags (DF_DEFER_INSN_RESCAN);
1044
1045 regrename_init (true);
1046 regrename_analyze (NULL);
1047 this->analyze ();
1048 this->rename_fma_trees ();
1049 regrename_finish ();
1050 }
1051
1052 const pass_data pass_data_fma_steering =
1053 {
1054 RTL_PASS, /* type */
1055 "fma_steering", /* name */
1056 OPTGROUP_NONE, /* optinfo_flags */
1057 TV_NONE, /* tv_id */
1058 0, /* properties_required */
1059 0, /* properties_provided */
1060 0, /* properties_destroyed */
1061 0, /* todo_flags_start */
1062 TODO_df_finish, /* todo_flags_finish */
1063 };
1064
1065 class pass_fma_steering : public rtl_opt_pass
1066 {
1067 public:
pass_fma_steering(gcc::context * ctxt)1068 pass_fma_steering (gcc::context *ctxt)
1069 : rtl_opt_pass (pass_data_fma_steering, ctxt)
1070 {}
1071
1072 /* opt_pass methods: */
gate(function *)1073 virtual bool gate (function *)
1074 {
1075 return (aarch64_tune_params.extra_tuning_flags
1076 & AARCH64_EXTRA_TUNE_RENAME_FMA_REGS)
1077 && optimize >= 2;
1078 }
1079
execute(function *)1080 virtual unsigned int execute (function *)
1081 {
1082 func_fma_steering *fma_steering = new func_fma_steering;
1083 fma_steering->execute_fma_steering ();
1084 delete fma_steering;
1085 return 0;
1086 }
1087
1088 }; // class pass_fma_steering
1089
1090 /* Create a new fma steering pass instance. */
1091
1092 rtl_opt_pass *
make_pass_fma_steering(gcc::context * ctxt)1093 make_pass_fma_steering (gcc::context *ctxt)
1094 {
1095 return new pass_fma_steering (ctxt);
1096 }
1097