1 /* FMA steering optimization pass for Cortex-A57.
2 Copyright (C) 2015-2018 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #define IN_TARGET_CODE 1
22
23 #include "config.h"
24 #define INCLUDE_LIST
25 #include "system.h"
26 #include "coretypes.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "df.h"
31 #include "insn-config.h"
32 #include "regs.h"
33 #include "memmodel.h"
34 #include "emit-rtl.h"
35 #include "recog.h"
36 #include "cfganal.h"
37 #include "insn-attr.h"
38 #include "context.h"
39 #include "tree-pass.h"
40 #include "regrename.h"
41 #include "aarch64-protos.h"
42
43 /* For better performance, the destination of FMADD/FMSUB instructions should
44 have the same parity as their accumulator register if the accumulator
45 contains the result of a previous FMUL or FMADD/FMSUB instruction if
46 targetting Cortex-A57 processors. Performance is also increased by
47 otherwise keeping a good balance in the parity of the destination register
48 of FMUL or FMADD/FMSUB.
49
50 This pass ensure that registers are renamed so that these conditions hold.
51 We reuse the existing register renaming facility from regrename.c to build
52 dependency chains and expose candidate registers for renaming.
53
54
55 The algorithm has three steps:
56
57 First, the functions of the register renaming pass are called. These
58 analyze the instructions and produce a list of def/use chains of
59 instructions.
60
61 Next, this information is used to build trees of multiply and
62 multiply-accumulate instructions. The roots of these trees are any
63 multiply, or any multiply-accumulate whose accumulator is not dependent on
64 a multiply or multiply-accumulate instruction. A child is added to the
65 tree where a dependency chain exists between the result of the parent
66 instruction and the accumulator operand of the child, as in the diagram
67 below:
68
69 fmul s2, s0, s1
70 / \
71 fmadd s0, s1, s1, s2 fmadd s4, s1, s1 s2
72 |
73 fmadd s3, s1, s1, s0
74
75 Trees made of a single instruction are permitted.
76
77 Finally, renaming is performed. The parity of the destination register at
78 the root of a tree is checked against the current balance of multiply and
79 multiply-accumulate on each pipeline. If necessary, the root of a tree is
80 renamed, in which case the rest of the tree is then renamed to keep the same
81 parity in the destination registers of all instructions in the tree. */
82
83
84
85 /* Forward declarations. */
86 class fma_node;
87 class fma_root_node;
88 class func_fma_steering;
89
90 /* Dependencies between FMUL or FMADD/FMSUB instructions and subsequent
91 FMADD/FMSUB instructions form a graph. This is because alternatives can
92 make a register be set by several FMUL or FMADD/FMSUB instructions in
93 different basic blocks and because of loops. For ease of browsing, the
94 connected components of this graph are broken up into forests of trees.
95 Forests are represented by fma_forest objects, contained in the fma_forests
96 list. Using a separate object for the forests allows for a better use of
97 memory as there is some information that is global to each forest, such as
98 the number of FMSUB and FMADD/FMSUB instructions currently scheduled on each
99 floating-point execution pipelines. */
100
101 class fma_forest
102 {
103 public:
104 fma_forest (func_fma_steering *, fma_root_node *, int);
105 ~fma_forest ();
106
107 int get_id ();
108 std::list<fma_root_node *> *get_roots ();
109 func_fma_steering *get_globals ();
110 int get_target_parity ();
111 void fma_node_created (fma_node *);
112 void merge_forest (fma_forest *);
113 void dump_info ();
114 void dispatch ();
115
116 private:
117 /* The list of roots that form this forest. */
118 std::list<fma_root_node *> *m_roots;
119
120 /* Target parity the destination register of all FMUL and FMADD/FMSUB
121 instructions in this forest should have. */
122 int m_target_parity;
123
124 /* Link to the instance of func_fma_steering holding data related to the
125 FMA steering of the current function (cfun). */
126 func_fma_steering *m_globals;
127
128 /* Identifier for the forest (used for dumps). */
129 int m_id;
130
131 /* Total number of nodes in the forest (for statistics). */
132 int m_nb_nodes;
133 };
134
135 class fma_node
136 {
137 public:
138 fma_node (fma_node *parent, du_chain *chain);
139 ~fma_node ();
140
141 bool root_p ();
142 fma_forest *get_forest ();
143 std::list<fma_node *> *get_children ();
144 rtx_insn *get_insn ();
145 void add_child (fma_node *);
146 int get_parity ();
147 void set_head (du_head *);
148 void rename (fma_forest *);
149 void dump_info (fma_forest *);
150
151 protected:
152 /* Root node that lead to this node. */
153 fma_root_node *m_root;
154
155 /* The parent node of this node. If the node belong to a chain with several
156 parent nodes, the first one encountered in a depth-first search is chosen
157 as canonical parent. */
158 fma_node *m_parent;
159
160 /* The list of child nodes. If a chain contains several parent nodes, one is
161 chosen as canonical parent and the others will have no children. */
162 std::list<fma_node *> *m_children;
163
164 /* The associated DU_HEAD chain that the insn represented by this object
165 is (one of) the root of. When a chain contains several roots, the non
166 canonical ones have this field set to NULL. */
167 struct du_head *m_head;
168
169 /* The FMUL or FMADD/FMSUB instruction this object corresponds to. */
170 rtx_insn *m_insn;
171 };
172
173 class fma_root_node : public fma_node
174 {
175 public:
176 fma_root_node (func_fma_steering *, du_chain *, int);
177
178 fma_forest *get_forest ();
179 void set_forest (fma_forest *);
180 void dump_info (fma_forest *);
181
182 private:
183 /* The forest this node belonged to when it was created. */
184 fma_forest *m_forest;
185 };
186
187 /* Class holding all data and methods relative to the FMA steering of a given
188 function. The FMA steering pass could then run in parallel for different
189 functions. */
190
191 class func_fma_steering
192 {
193 public:
194 func_fma_steering ();
195 ~func_fma_steering ();
196
197 int get_fpu_balance ();
198 void remove_forest (fma_forest *);
199 bool put_node (fma_node *);
200 void update_balance (int);
201 fma_node *get_fma_node (rtx_insn *);
202 void analyze_fma_fmul_insn (fma_forest *, du_chain *, du_head_p);
203 void execute_fma_steering ();
204
205 private:
206 void dfs (void (*) (fma_forest *), void (*) (fma_forest *, fma_root_node *),
207 void (*) (fma_forest *, fma_node *), bool);
208 void analyze ();
209 void rename_fma_trees ();
210
211 /* Mapping between FMUL or FMADD/FMSUB instructions and the associated
212 fma_node object. Used when analyzing an instruction that is a root of
213 a chain to find if such an object was created because this instruction
214 is also a use in another chain. */
215 hash_map<rtx_insn *, fma_node *> *m_insn_fma_head_map;
216
217 /* A list of all the forests in a given function. */
218 std::list<fma_forest *> m_fma_forests;
219
220 /* Balance of FMUL and FMADD/FMSUB instructions between the two FPU
221 pipelines:
222 < 0: more instruction dispatched to the first pipeline
223 == 0: perfect balance
224 > 0: more instruction dispatched to the second pipeline. */
225 int m_fpu_balance;
226
227 /* Identifier for the next forest created. */
228 int m_next_forest_id;
229 };
230
231 /* Rename the register HEAD->regno in all the insns in the chain HEAD to any
232 register not in the set UNAVAILABLE. Adapted from rename_chains in
233 regrename.c. */
234
235 static bool
rename_single_chain(du_head_p head,HARD_REG_SET * unavailable)236 rename_single_chain (du_head_p head, HARD_REG_SET *unavailable)
237 {
238 int best_new_reg;
239 int n_uses = 0;
240 struct du_chain *tmp;
241 int reg = head->regno;
242 enum reg_class super_class = NO_REGS;
243
244 if (head->cannot_rename)
245 return false;
246
247 if (fixed_regs[reg] || global_regs[reg]
248 || (frame_pointer_needed && reg == HARD_FRAME_POINTER_REGNUM))
249 return false;
250
251 /* Iterate over elements in the chain in order to:
252 1. Count number of uses, and narrow the set of registers we can
253 use for renaming.
254 2. Compute the superunion of register classes in this chain. */
255 for (tmp = head->first; tmp; tmp = tmp->next_use)
256 {
257 if (DEBUG_INSN_P (tmp->insn))
258 continue;
259 n_uses++;
260 IOR_COMPL_HARD_REG_SET (*unavailable, reg_class_contents[tmp->cl]);
261 super_class = reg_class_superunion[(int) super_class][(int) tmp->cl];
262 }
263
264 if (n_uses < 1)
265 return false;
266
267 best_new_reg = find_rename_reg (head, super_class, unavailable, reg,
268 false);
269
270 if (dump_file)
271 {
272 fprintf (dump_file, "Register %s in insn %d", reg_names[reg],
273 INSN_UID (head->first->insn));
274 if (head->need_caller_save_reg)
275 fprintf (dump_file, " crosses a call");
276 }
277
278 if (best_new_reg == reg)
279 {
280 if (dump_file)
281 fprintf (dump_file, "; no available better choice\n");
282 return false;
283 }
284
285 if (regrename_do_replace (head, best_new_reg))
286 {
287 if (dump_file)
288 fprintf (dump_file, ", renamed as %s\n", reg_names[best_new_reg]);
289 df_set_regs_ever_live (best_new_reg, true);
290 }
291 else
292 {
293 if (dump_file)
294 fprintf (dump_file, ", renaming as %s failed\n",
295 reg_names[best_new_reg]);
296 return false;
297 }
298 return true;
299 }
300
301 /* Return whether T is the attribute of a FMADD/FMSUB-like instruction. */
302
303 static bool
is_fmac_op(enum attr_type t)304 is_fmac_op (enum attr_type t)
305 {
306 return (t == TYPE_FMACS) || (t == TYPE_FMACD) || (t == TYPE_NEON_FP_MLA_S);
307 }
308
309 /* Return whether T is the attribute of a FMUL instruction. */
310
311 static bool
is_fmul_op(enum attr_type t)312 is_fmul_op (enum attr_type t)
313 {
314 return (t == TYPE_FMULS) || (t == TYPE_FMULD) || (t == TYPE_NEON_FP_MUL_S);
315 }
316
317 /* Return whether INSN is an FMUL (if FMUL_OK is true) or FMADD/FMSUB
318 instruction. */
319
320 static bool
is_fmul_fmac_insn(rtx_insn * insn,bool fmul_ok)321 is_fmul_fmac_insn (rtx_insn *insn, bool fmul_ok)
322 {
323 enum attr_type t;
324
325 if (!NONDEBUG_INSN_P (insn))
326 return false;
327
328 if (recog_memoized (insn) < 0)
329 return false;
330
331 /* Only consider chain(s) this instruction is a root of if this is an FMUL or
332 FMADD/FMSUB instruction. This allows to avoid browsing chains of all
333 instructions for FMUL or FMADD/FMSUB in them. */
334 t = get_attr_type (insn);
335 return is_fmac_op (t) || (fmul_ok && is_fmul_op (t));
336 }
337
338
339 /*
340 * Class fma_forest method definitions.
341 */
342
fma_forest(func_fma_steering * fma_steer,fma_root_node * fma_root,int id)343 fma_forest::fma_forest (func_fma_steering *fma_steer, fma_root_node *fma_root,
344 int id)
345 {
346 memset (this, 0, sizeof (*this));
347 this->m_globals = fma_steer;
348 this->m_roots = new std::list<fma_root_node *>;
349 this->m_roots->push_back (fma_root);
350 this->m_id = id;
351 }
352
~fma_forest()353 fma_forest::~fma_forest ()
354 {
355 delete this->m_roots;
356 }
357
358 int
get_id()359 fma_forest::get_id ()
360 {
361 return this->m_id;
362 }
363
364 std::list<fma_root_node *> *
get_roots()365 fma_forest::get_roots ()
366 {
367 return this->m_roots;
368 }
369
370 func_fma_steering *
get_globals()371 fma_forest::get_globals ()
372 {
373 return this->m_globals;
374 }
375
376 int
get_target_parity()377 fma_forest::get_target_parity ()
378 {
379 return this->m_target_parity;
380 }
381
382 /* Act on the creation of NODE by updating statistics in FOREST and adding an
383 entry for it in the func_fma_steering hashmap. */
384
fma_node_created(fma_node * node)385 void fma_forest::fma_node_created (fma_node *node)
386 {
387 bool created = !this->m_globals->put_node (node);
388
389 gcc_assert (created);
390 this->m_nb_nodes++;
391 }
392
393 /* Merge REF_FOREST and OTHER_FOREST together, making REF_FOREST the canonical
394 fma_forest object to represent both. */
395
396 void
merge_forest(fma_forest * other_forest)397 fma_forest::merge_forest (fma_forest *other_forest)
398 {
399 std::list<fma_root_node *> *other_roots;
400 std::list<fma_root_node *>::iterator other_root_iter;
401
402 if (this == other_forest)
403 return;
404
405 other_roots = other_forest->m_roots;
406
407 /* Update root nodes' pointer to forest. */
408 for (other_root_iter = other_roots->begin ();
409 other_root_iter != other_roots->end (); ++other_root_iter)
410 (*other_root_iter)->set_forest (this);
411
412 /* Remove other_forest from the list of forests and move its tree roots in
413 the list of tree roots of ref_forest. */
414 this->m_globals->remove_forest (other_forest);
415 this->m_roots->splice (this->m_roots->begin (), *other_roots);
416 this->m_nb_nodes += other_forest->m_nb_nodes;
417
418 delete other_forest;
419 }
420
421 /* Dump information about the forest FOREST. */
422
423 void
dump_info()424 fma_forest::dump_info ()
425 {
426 gcc_assert (dump_file);
427
428 fprintf (dump_file, "Forest #%d has %d nodes\n", this->m_id,
429 this->m_nb_nodes);
430 }
431
432 /* Wrapper around fma_forest::dump_info for use as parameter of function
433 pointer type in func_fma_steering::dfs. */
434
435 static void
dump_forest_info(fma_forest * forest)436 dump_forest_info (fma_forest *forest)
437 {
438 forest->dump_info ();
439 }
440
441 /* Dispatch forest to the least utilized pipeline. */
442
443 void
dispatch()444 fma_forest::dispatch ()
445 {
446 this->m_target_parity = this->m_roots->front ()->get_parity ();
447 int fpu_balance = this->m_globals->get_fpu_balance ();
448 if (fpu_balance != 0)
449 this->m_target_parity = (fpu_balance < 0);
450
451 if (dump_file)
452 fprintf (dump_file, "Target parity for forest #%d: %s\n", this->m_id,
453 this->m_target_parity ? "odd" : "even");
454 }
455
456 /* Wrapper around fma_forest::dispatch for use as parameter of function pointer
457 type in func_fma_steering::dfs. */
458
459 static void
dispatch_forest(fma_forest * forest)460 dispatch_forest (fma_forest *forest)
461 {
462 forest->dispatch ();
463 }
464
fma_node(fma_node * parent,du_chain * chain)465 fma_node::fma_node (fma_node *parent, du_chain *chain)
466 {
467 memset (this, 0, sizeof (*this));
468 this->m_parent = parent;
469 this->m_children = new std::list<fma_node *>;
470 this->m_insn = chain->insn;
471 /* root_p () cannot be used to check for root before root is set. */
472 if (this->m_parent == this)
473 this->m_root = static_cast<fma_root_node *> (parent);
474 else
475 {
476 this->m_root = parent->m_root;
477 this->get_forest ()->fma_node_created (this);
478 }
479 }
480
~fma_node()481 fma_node::~fma_node ()
482 {
483 delete this->m_children;
484 }
485
486 std::list<fma_node *> *
get_children()487 fma_node::get_children ()
488 {
489 return this->m_children;
490 }
491
492 rtx_insn *
get_insn()493 fma_node::get_insn ()
494 {
495 return this->m_insn;
496 }
497
498 void
set_head(du_head * head)499 fma_node::set_head (du_head *head)
500 {
501 gcc_assert (!this->m_head);
502 this->m_head = head;
503 }
504
505 /* Add a child to this node in the list of children. */
506
507 void
add_child(fma_node * child)508 fma_node::add_child (fma_node *child)
509 {
510 this->m_children->push_back (child);
511 }
512
513 /* Return the parity of the destination register of the instruction represented
514 by this node. */
515
516 int
get_parity()517 fma_node::get_parity ()
518 {
519 return this->m_head->regno % 2;
520 }
521
522 /* Get the actual forest associated with a non root node as the one the node
523 points to might have been merged into another one. In that case the pointer
524 in the root nodes are updated so we return the forest pointer of a root node
525 pointed to by the initial forest. Despite being a oneliner, this method is
526 defined here as it references a method from fma_root_node. */
527
528 fma_forest *
get_forest()529 fma_node::get_forest ()
530 {
531 return this->m_root->get_forest ();
532 }
533
534 /* Return whether a node is a root node. */
535
536 bool
root_p()537 fma_node::root_p ()
538 {
539 return this->m_root == this;
540 }
541
542 /* Dump information about the children of node FMA_NODE in forest FOREST. */
543
544 void
dump_info(ATTRIBUTE_UNUSED fma_forest * forest)545 fma_node::dump_info (ATTRIBUTE_UNUSED fma_forest *forest)
546 {
547 struct du_chain *chain;
548 std::list<fma_node *>::iterator fma_child;
549
550 gcc_assert (dump_file);
551
552 if (this->get_children ()->empty ())
553 return;
554
555 fprintf (dump_file, "Instruction(s)");
556 for (chain = this->m_head->first; chain; chain = chain->next_use)
557 {
558 if (!is_fmul_fmac_insn (chain->insn, true))
559 continue;
560
561 if (chain->loc != &SET_DEST (PATTERN (chain->insn)))
562 continue;
563
564 fprintf (dump_file, " %d", INSN_UID (chain->insn));
565 }
566
567 fprintf (dump_file, " is(are) accumulator dependency of instructions");
568 for (fma_child = this->get_children ()->begin ();
569 fma_child != this->get_children ()->end (); fma_child++)
570 fprintf (dump_file, " %d", INSN_UID ((*fma_child)->m_insn));
571 fprintf (dump_file, "\n");
572 }
573
574 /* Wrapper around fma_node::dump_info for use as parameter of function pointer
575 type in func_fma_steering::dfs. */
576
577 static void
dump_tree_node_info(fma_forest * forest,fma_node * node)578 dump_tree_node_info (fma_forest *forest, fma_node *node)
579 {
580 node->dump_info (forest);
581 }
582
583 /* Rename the destination register of a single FMUL or FMADD/FMSUB instruction
584 represented by FMA_NODE to a register that respect the target parity for
585 FOREST or with same parity of the instruction represented by its parent node
586 if it has one. */
587
588 void
rename(fma_forest * forest)589 fma_node::rename (fma_forest *forest)
590 {
591 int cur_parity, target_parity;
592
593 /* This is alternate root of a chain and thus has no children. It will be
594 renamed when processing the canonical root for that chain. */
595 if (!this->m_head)
596 return;
597
598 target_parity = forest->get_target_parity ();
599 if (this->m_parent)
600 target_parity = this->m_parent->get_parity ();
601 cur_parity = this->get_parity ();
602
603 /* Rename if parity differs. */
604 if (cur_parity != target_parity)
605 {
606 rtx_insn *insn = this->m_insn;
607 HARD_REG_SET unavailable;
608 machine_mode mode;
609 int reg;
610
611 if (dump_file)
612 {
613 unsigned cur_dest_reg = this->m_head->regno;
614
615 fprintf (dump_file, "FMA or FMUL at insn %d but destination "
616 "register (%s) has different parity from expected to "
617 "maximize FPU pipeline utilization\n", INSN_UID (insn),
618 reg_names[cur_dest_reg]);
619 }
620
621 /* Don't clobber traceback for noreturn functions. */
622 CLEAR_HARD_REG_SET (unavailable);
623 if (frame_pointer_needed)
624 {
625 add_to_hard_reg_set (&unavailable, Pmode, FRAME_POINTER_REGNUM);
626 add_to_hard_reg_set (&unavailable, Pmode, HARD_FRAME_POINTER_REGNUM);
627 }
628
629 /* Exclude registers with wrong parity. */
630 mode = GET_MODE (SET_DEST (PATTERN (insn)));
631 for (reg = cur_parity; reg < FIRST_PSEUDO_REGISTER; reg += 2)
632 add_to_hard_reg_set (&unavailable, mode, reg);
633
634 if (!rename_single_chain (this->m_head, &unavailable))
635 {
636 if (dump_file)
637 fprintf (dump_file, "Destination register of insn %d could not be "
638 "renamed. Dependent FMA insns will use this parity from "
639 "there on.\n", INSN_UID (insn));
640 }
641 else
642 cur_parity = target_parity;
643 }
644
645 forest->get_globals ()->update_balance (cur_parity);
646 }
647
648 /* Wrapper around fma_node::dump_info for use as parameter of function pointer
649 type in func_fma_steering::dfs. */
650
651 static void
rename_fma_node(fma_forest * forest,fma_node * node)652 rename_fma_node (fma_forest *forest, fma_node *node)
653 {
654 node->rename (forest);
655 }
656
fma_root_node(func_fma_steering * globals,du_chain * chain,int id)657 fma_root_node::fma_root_node (func_fma_steering *globals, du_chain *chain,
658 int id) : fma_node (this, chain)
659 {
660 this->m_forest = new fma_forest (globals, this, id);
661 this->m_forest->fma_node_created (this);
662 }
663
664 fma_forest *
get_forest()665 fma_root_node::get_forest ()
666 {
667 return this->m_forest;
668 }
669
670 void
set_forest(fma_forest * ref_forest)671 fma_root_node::set_forest (fma_forest *ref_forest)
672 {
673 this->m_forest = ref_forest;
674 }
675
676 /* Dump information about the roots of forest FOREST. */
677
678 void
dump_info(fma_forest * forest)679 fma_root_node::dump_info (fma_forest *forest)
680 {
681 gcc_assert (dump_file);
682
683 if (this == forest->get_roots ()->front ())
684 fprintf (dump_file, "Instruction(s) at root of forest #%d:",
685 forest->get_id ());
686 fprintf (dump_file, " %d", INSN_UID (this->m_insn));
687 if (this == forest->get_roots ()->back ())
688 fprintf (dump_file, "\n");
689 }
690
691 /* Wrapper around fma_root_node::dump_info for use as parameter of function
692 pointer type in func_fma_steering::dfs. */
693
694 static void
dump_tree_root_info(fma_forest * forest,fma_root_node * node)695 dump_tree_root_info (fma_forest *forest, fma_root_node *node)
696 {
697 node->dump_info (forest);
698 }
699
func_fma_steering()700 func_fma_steering::func_fma_steering () : m_fpu_balance (0)
701 {
702 this->m_insn_fma_head_map = new hash_map<rtx_insn *, fma_node *>;
703 this->m_fma_forests.clear ();
704 this->m_next_forest_id = 0;
705 }
706
~func_fma_steering()707 func_fma_steering::~func_fma_steering ()
708 {
709 delete this->m_insn_fma_head_map;
710 }
711
712 int
get_fpu_balance()713 func_fma_steering::get_fpu_balance ()
714 {
715 return this->m_fpu_balance;
716 }
717
718 void
remove_forest(fma_forest * forest)719 func_fma_steering::remove_forest (fma_forest *forest)
720 {
721 this->m_fma_forests.remove (forest);
722 }
723
724 /* Memorize the mapping of this instruction to its fma_node object and return
725 whether such a mapping existed. */
726
727 bool
put_node(fma_node * node)728 func_fma_steering::put_node (fma_node *node)
729 {
730 return this->m_insn_fma_head_map->put (node->get_insn (), node);
731 }
732
733 /* Update the current balance considering a node with the given PARITY. */
734
735 void
update_balance(int parity)736 func_fma_steering::update_balance (int parity)
737 {
738 this->m_fpu_balance = parity ? this->m_fpu_balance + 1
739 : this->m_fpu_balance - 1;
740 }
741
742 /* Return whether an fma_node object exists for instruction INSN and, if not,
743 allocate one in *RET. */
744
745 fma_node *
get_fma_node(rtx_insn * insn)746 func_fma_steering::get_fma_node (rtx_insn *insn)
747 {
748 fma_node **fma_slot;
749
750 fma_slot = this->m_insn_fma_head_map->get (insn);
751 if (fma_slot)
752 return *fma_slot;
753 return NULL;
754 }
755
756 /* Allocate and initialize fma_node objects for the FMUL or FMADD/FMSUB
757 instruction in CHAIN->insn and its dependent FMADD/FMSUB instructions, all
758 part of FOREST. For the children, the associated head is left untouched
759 (and thus null) as this function will be called again when considering the
760 chain where they are def. For the parent, the chain is given in HEAD. */
761
762 void
analyze_fma_fmul_insn(fma_forest * ref_forest,du_chain * chain,du_head_p head)763 func_fma_steering::analyze_fma_fmul_insn (fma_forest *ref_forest,
764 du_chain *chain, du_head_p head)
765 {
766 fma_forest *forest;
767 fma_node *node = this->get_fma_node (chain->insn);
768
769 /* This is a root node. */
770 if (!node)
771 {
772 fma_root_node *root_node;
773
774 root_node = new fma_root_node (this, chain, this->m_next_forest_id++);
775 forest = root_node->get_forest ();
776 node = root_node;
777
778 /* Until proved otherwise, assume this root is not part of an existing
779 forest and thus add its forest to the list of forests. */
780 this->m_fma_forests.push_back (forest);
781 }
782 else
783 forest = node->get_forest ();
784
785 node->set_head (head);
786
787 /* fma_node is part of a chain with several defs, one of them having already
788 been processed. The root of that already processed def is the canonical
789 one and the root of fma_node is added to its forest. No need to process
790 the children nodes as they were already processed when the other def was
791 processed. */
792 if (ref_forest)
793 {
794 ref_forest->merge_forest (forest);
795 return;
796 }
797
798 for (chain = head->first; chain; chain = chain->next_use)
799 {
800 fma_node *child_fma;
801 rtx fma_rtx, *accum_rtx_p;
802
803 if (!is_fmul_fmac_insn (chain->insn, false))
804 continue;
805
806 /* Get FMA rtx. */
807 fma_rtx = SET_SRC (PATTERN (chain->insn));
808 /* FMA is negated. */
809 if (GET_CODE (fma_rtx) == NEG)
810 fma_rtx = XEXP (fma_rtx, 0);
811 /* Get accumulator rtx. */
812 accum_rtx_p = &XEXP (fma_rtx, 2);
813 /* Accumulator is negated. */
814 if (!REG_P (*accum_rtx_p))
815 accum_rtx_p = &XEXP (*accum_rtx_p, 0);
816
817 /* This du_chain structure is not for the accumulator register. */
818 if (accum_rtx_p != chain->loc)
819 continue;
820
821 /* If object already created, this is a loop carried dependency. We
822 don't include this object in the children as we want trees for
823 rename_fma_trees to not be an infinite loop. */
824 if (this->get_fma_node (chain->insn))
825 continue;
826
827 child_fma = new fma_node (node, chain);
828
829 /* Memorize the mapping of this instruction to its fma_node object
830 as it will be processed for the chain starting at its destination
831 register later. */
832
833 /* Link to siblings. */
834 node->add_child (child_fma);
835 }
836 }
837
838 /* Perform a depth-first search of the forests of fma_node in
839 THIS->m_fma_forests, calling PROCESS_FOREST () on each fma_forest object in
840 THIS->m_fma_forests list, PROCESS_ROOT () on each tree root and
841 PROCESS_NODE () on each node. If FREE is true, free all std::list in the
842 same dfs. */
843
844 void
dfs(void (* process_forest)(fma_forest *),void (* process_root)(fma_forest *,fma_root_node *),void (* process_node)(fma_forest *,fma_node *),bool free)845 func_fma_steering::dfs (void (*process_forest) (fma_forest *),
846 void (*process_root) (fma_forest *, fma_root_node *),
847 void (*process_node) (fma_forest *, fma_node *),
848 bool free)
849 {
850 auto_vec<fma_node *> to_process;
851 auto_vec<fma_node *> to_free;
852 std::list<fma_forest *>::iterator forest_iter;
853
854 /* For each forest. */
855 for (forest_iter = this->m_fma_forests.begin ();
856 forest_iter != this->m_fma_forests.end (); ++forest_iter)
857 {
858 std::list<fma_root_node *>::iterator root_iter;
859
860 if (process_forest)
861 process_forest (*forest_iter);
862
863 /* For each tree root in this forest. */
864 for (root_iter = (*forest_iter)->get_roots ()->begin ();
865 root_iter != (*forest_iter)->get_roots ()->end (); ++root_iter)
866 {
867 if (process_root)
868 process_root (*forest_iter, *root_iter);
869 to_process.safe_push (*root_iter);
870 }
871
872 /* For each tree node in this forest. */
873 while (!to_process.is_empty ())
874 {
875 fma_node *node;
876 std::list<fma_node *>::iterator child_iter;
877
878 node = to_process.pop ();
879
880 if (process_node)
881 process_node (*forest_iter, node);
882
883 for (child_iter = node->get_children ()->begin ();
884 child_iter != node->get_children ()->end (); ++child_iter)
885 to_process.safe_push (*child_iter);
886
887 /* Defer freeing so that the process_node callback can access the
888 parent and children of the node being processed. */
889 if (free)
890 to_free.safe_push (node);
891 }
892
893 if (free)
894 {
895 delete *forest_iter;
896
897 while (!to_free.is_empty ())
898 {
899 fma_node *node = to_free.pop ();
900 if (node->root_p ())
901 delete static_cast<fma_root_node *> (node);
902 else
903 delete node;
904 }
905 }
906 }
907 }
908
909 /* Build the dependency trees of FMUL and FMADD/FMSUB instructions. */
910
911 void
analyze()912 func_fma_steering::analyze ()
913 {
914 int i, n_blocks, *bb_dfs_preorder;
915 basic_block bb;
916 rtx_insn *insn;
917
918 bb_dfs_preorder = XNEWVEC (int, last_basic_block_for_fn (cfun));
919 n_blocks = pre_and_rev_post_order_compute (bb_dfs_preorder, NULL, false);
920
921 /* Browse the graph of basic blocks looking for FMUL or FMADD/FMSUB
922 instructions. */
923 for (i = 0; i < n_blocks; i++)
924 {
925 bb = BASIC_BLOCK_FOR_FN (cfun, bb_dfs_preorder[i]);
926 FOR_BB_INSNS (bb, insn)
927 {
928 operand_rr_info *dest_op_info;
929 struct du_chain *chain = NULL;
930 unsigned dest_regno;
931 fma_forest *forest = NULL;
932 du_head_p head = NULL;
933 int i;
934
935 if (!is_fmul_fmac_insn (insn, true))
936 continue;
937
938 /* Search the chain where this instruction is (one of) the root. */
939 dest_op_info = insn_rr[INSN_UID (insn)].op_info;
940 dest_regno = REGNO (SET_DEST (PATTERN (insn)));
941 for (i = 0; i < dest_op_info->n_chains; i++)
942 {
943 /* The register tracked by this chain does not match the
944 destination register of insn. */
945 if (dest_op_info->heads[i]->regno != dest_regno)
946 continue;
947
948 head = dest_op_info->heads[i];
949 /* The chain was merged in another, find the new head. */
950 if (!head->first)
951 head = regrename_chain_from_id (head->id);
952
953 /* Search the chain element for this instruction and, if another
954 FMUL or FMADD/FMSUB instruction was already processed, note
955 the forest of its tree. */
956 forest = NULL;
957 for (chain = head->first; chain; chain = chain->next_use)
958 {
959 fma_node **fma_slot;
960
961 if (!is_fmul_fmac_insn (chain->insn, true))
962 continue;
963
964 /* This is a use, continue. */
965 if (chain->loc != &SET_DEST (PATTERN (chain->insn)))
966 continue;
967
968 if (chain->insn == insn)
969 break;
970
971 fma_slot = this->m_insn_fma_head_map->get (chain->insn);
972 if (fma_slot && (*fma_slot)->get_children ())
973 forest = (*fma_slot)->get_forest ();
974 }
975 if (chain)
976 break;
977 }
978
979 /* Due to implementation of regrename, dest register can slip away
980 from regrename's analysis. As a result, there is no chain for
981 the destination register of insn. We simply skip the insn even
982 it is a fmul/fmac instruction. This can happen when the dest
983 register is also a source register of insn and one of the below
984 conditions is satisfied:
985 1) the source reg is setup in larger mode than this insn;
986 2) the source reg is uninitialized;
987 3) the source reg is passed in as parameter. */
988 if (i < dest_op_info->n_chains)
989 this->analyze_fma_fmul_insn (forest, chain, head);
990 }
991 }
992 free (bb_dfs_preorder);
993
994 if (dump_file)
995 this->dfs (dump_forest_info, dump_tree_root_info, dump_tree_node_info,
996 false);
997 }
998
999 /* Perform the renaming of all chains with FMUL or FMADD/FMSUB involved with
1000 the objective of keeping FPU pipeline balanced in term of instructions and
1001 having FMADD/FMSUB with dependencies on previous FMUL or FMADD/FMSUB be
1002 scheduled on the same pipeline. */
1003
1004 void
rename_fma_trees()1005 func_fma_steering::rename_fma_trees ()
1006 {
1007 this->dfs (dispatch_forest, NULL, rename_fma_node, true);
1008
1009 if (dump_file && !this->m_fma_forests.empty ())
1010 {
1011 fprintf (dump_file, "Function %s has ", current_function_name ());
1012 if (this->m_fpu_balance == 0)
1013 fprintf (dump_file, "perfect balance of FMUL/FMA chains between the "
1014 "two FPU pipelines\n");
1015 else if (this->m_fpu_balance > 0)
1016 fprintf (dump_file, "%d more FMUL/FMA chains scheduled on the second "
1017 "FPU pipeline\n", this->m_fpu_balance);
1018 else /* this->m_fpu_balance < 0 */
1019 fprintf (dump_file, "%d more FMUL/FMA chains scheduled on the first "
1020 "FPU pipeline\n", - this->m_fpu_balance);
1021 }
1022 }
1023
1024 /* Execute FMA steering pass. */
1025
1026 void
execute_fma_steering()1027 func_fma_steering::execute_fma_steering ()
1028 {
1029 df_set_flags (DF_LR_RUN_DCE);
1030 df_note_add_problem ();
1031 df_analyze ();
1032 df_set_flags (DF_DEFER_INSN_RESCAN);
1033
1034 regrename_init (true);
1035 regrename_analyze (NULL);
1036 this->analyze ();
1037 this->rename_fma_trees ();
1038 regrename_finish ();
1039 }
1040
1041 const pass_data pass_data_fma_steering =
1042 {
1043 RTL_PASS, /* type */
1044 "fma_steering", /* name */
1045 OPTGROUP_NONE, /* optinfo_flags */
1046 TV_NONE, /* tv_id */
1047 0, /* properties_required */
1048 0, /* properties_provided */
1049 0, /* properties_destroyed */
1050 0, /* todo_flags_start */
1051 TODO_df_finish, /* todo_flags_finish */
1052 };
1053
1054 class pass_fma_steering : public rtl_opt_pass
1055 {
1056 public:
pass_fma_steering(gcc::context * ctxt)1057 pass_fma_steering (gcc::context *ctxt)
1058 : rtl_opt_pass (pass_data_fma_steering, ctxt)
1059 {}
1060
1061 /* opt_pass methods: */
gate(function *)1062 virtual bool gate (function *)
1063 {
1064 return (aarch64_tune_params.extra_tuning_flags
1065 & AARCH64_EXTRA_TUNE_RENAME_FMA_REGS)
1066 && optimize >= 2;
1067 }
1068
execute(function *)1069 virtual unsigned int execute (function *)
1070 {
1071 func_fma_steering *fma_steering = new func_fma_steering;
1072 fma_steering->execute_fma_steering ();
1073 delete fma_steering;
1074 return 0;
1075 }
1076
1077 }; // class pass_fma_steering
1078
1079 /* Create a new fma steering pass instance. */
1080
1081 rtl_opt_pass *
make_pass_fma_steering(gcc::context * ctxt)1082 make_pass_fma_steering (gcc::context *ctxt)
1083 {
1084 return new pass_fma_steering (ctxt);
1085 }
1086