1 /* SLP - Pattern matcher on SLP trees
2 Copyright (C) 2020-2021 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "target.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "tree-pass.h"
29 #include "ssa.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "gimple-iterator.h"
36 #include "cfgloop.h"
37 #include "tree-vectorizer.h"
38 #include "langhooks.h"
39 #include "gimple-walk.h"
40 #include "dbgcnt.h"
41 #include "tree-vector-builder.h"
42 #include "vec-perm-indices.h"
43 #include "gimple-fold.h"
44 #include "internal-fn.h"
45
46 /* SLP Pattern matching mechanism.
47
48 This extension to the SLP vectorizer allows one to transform the generated SLP
49 tree based on any pattern. The difference between this and the normal vect
50 pattern matcher is that unlike the former, this matcher allows you to match
51 with instructions that do not belong to the same SSA dominator graph.
52
53 The only requirement that this pattern matcher has is that you are only
54 only allowed to either match an entire group or none.
55
56 The pattern matcher currently only allows you to perform replacements to
57 internal functions.
58
59 Once the patterns are matched it is one way, these cannot be undone. It is
60 currently not supported to match patterns recursively.
61
62 To add a new pattern, implement the vect_pattern class and add the type to
63 slp_patterns.
64
65 */
66
67 /*******************************************************************************
68 * vect_pattern class
69 ******************************************************************************/
70
71 /* Default implementation of recognize that performs matching, validation and
72 replacement of nodes but that can be overriden if required. */
73
74 static bool
vect_pattern_validate_optab(internal_fn ifn,slp_tree node)75 vect_pattern_validate_optab (internal_fn ifn, slp_tree node)
76 {
77 tree vectype = SLP_TREE_VECTYPE (node);
78 if (ifn == IFN_LAST || !vectype)
79 return false;
80
81 if (dump_enabled_p ())
82 dump_printf_loc (MSG_NOTE, vect_location,
83 "Found %s pattern in SLP tree\n",
84 internal_fn_name (ifn));
85
86 if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
87 {
88 if (dump_enabled_p ())
89 dump_printf_loc (MSG_NOTE, vect_location,
90 "Target supports %s vectorization with mode %T\n",
91 internal_fn_name (ifn), vectype);
92 }
93 else
94 {
95 if (dump_enabled_p ())
96 {
97 if (!vectype)
98 dump_printf_loc (MSG_NOTE, vect_location,
99 "Target does not support vector type for %T\n",
100 SLP_TREE_DEF_TYPE (node));
101 else
102 dump_printf_loc (MSG_NOTE, vect_location,
103 "Target does not support %s for vector type "
104 "%T\n", internal_fn_name (ifn), vectype);
105 }
106 return false;
107 }
108 return true;
109 }
110
111 /*******************************************************************************
112 * General helper types
113 ******************************************************************************/
114
115 /* The COMPLEX_OPERATION enum denotes the possible pair of operations that can
116 be matched when looking for expressions that we are interested matching for
117 complex numbers addition and mla. */
118
119 typedef enum _complex_operation : unsigned {
120 PLUS_PLUS,
121 MINUS_PLUS,
122 PLUS_MINUS,
123 MULT_MULT,
124 CMPLX_NONE
125 } complex_operation_t;
126
127 /*******************************************************************************
128 * General helper functions
129 ******************************************************************************/
130
131 /* Helper function of linear_loads_p that checks to see if the load permutation
132 is sequential and in monotonically increasing order of loads with no gaps.
133 */
134
135 static inline complex_perm_kinds_t
is_linear_load_p(load_permutation_t loads)136 is_linear_load_p (load_permutation_t loads)
137 {
138 if (loads.length() == 0)
139 return PERM_UNKNOWN;
140
141 unsigned load, i;
142 complex_perm_kinds_t candidates[4]
143 = { PERM_ODDODD
144 , PERM_EVENEVEN
145 , PERM_EVENODD
146 , PERM_ODDEVEN
147 };
148
149 int valid_patterns = 4;
150 FOR_EACH_VEC_ELT (loads, i, load)
151 {
152 if (candidates[0] != PERM_UNKNOWN && load != 1)
153 {
154 candidates[0] = PERM_UNKNOWN;
155 valid_patterns--;
156 }
157 if (candidates[1] != PERM_UNKNOWN && load != 0)
158 {
159 candidates[1] = PERM_UNKNOWN;
160 valid_patterns--;
161 }
162 if (candidates[2] != PERM_UNKNOWN && load != i)
163 {
164 candidates[2] = PERM_UNKNOWN;
165 valid_patterns--;
166 }
167 if (candidates[3] != PERM_UNKNOWN
168 && load != (i % 2 == 0 ? i + 1 : i - 1))
169 {
170 candidates[3] = PERM_UNKNOWN;
171 valid_patterns--;
172 }
173
174 if (valid_patterns == 0)
175 return PERM_UNKNOWN;
176 }
177
178 for (i = 0; i < sizeof(candidates); i++)
179 if (candidates[i] != PERM_UNKNOWN)
180 return candidates[i];
181
182 return PERM_UNKNOWN;
183 }
184
185 /* Combine complex_perm_kinds A and B into a new permute kind that describes the
186 resulting operation. */
187
188 static inline complex_perm_kinds_t
vect_merge_perms(complex_perm_kinds_t a,complex_perm_kinds_t b)189 vect_merge_perms (complex_perm_kinds_t a, complex_perm_kinds_t b)
190 {
191 if (a == b)
192 return a;
193
194 if (a == PERM_TOP)
195 return b;
196
197 if (b == PERM_TOP)
198 return a;
199
200 return PERM_UNKNOWN;
201 }
202
203 /* Check to see if all loads rooted in ROOT are linear. Linearity is
204 defined as having no gaps between values loaded. */
205
206 static complex_perm_kinds_t
linear_loads_p(slp_tree_to_load_perm_map_t * perm_cache,slp_tree root)207 linear_loads_p (slp_tree_to_load_perm_map_t *perm_cache, slp_tree root)
208 {
209 if (!root)
210 return PERM_UNKNOWN;
211
212 unsigned i;
213 complex_perm_kinds_t *tmp;
214
215 if ((tmp = perm_cache->get (root)) != NULL)
216 return *tmp;
217
218 complex_perm_kinds_t retval = PERM_UNKNOWN;
219 perm_cache->put (root, retval);
220
221 /* If it's a load node, then just read the load permute. */
222 if (SLP_TREE_LOAD_PERMUTATION (root).exists ())
223 {
224 retval = is_linear_load_p (SLP_TREE_LOAD_PERMUTATION (root));
225 perm_cache->put (root, retval);
226 return retval;
227 }
228 else if (SLP_TREE_DEF_TYPE (root) != vect_internal_def)
229 {
230 retval = PERM_TOP;
231 perm_cache->put (root, retval);
232 return retval;
233 }
234
235 complex_perm_kinds_t kind = PERM_TOP;
236
237 slp_tree child;
238 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (root), i, child)
239 {
240 complex_perm_kinds_t res = linear_loads_p (perm_cache, child);
241 kind = vect_merge_perms (kind, res);
242 /* Unknown and Top are not valid on blends as they produce no permute. */
243 retval = kind;
244 if (kind == PERM_UNKNOWN || kind == PERM_TOP)
245 return retval;
246 }
247
248 retval = kind;
249
250 perm_cache->put (root, retval);
251 return retval;
252 }
253
254
255 /* This function attempts to make a node rooted in NODE is linear. If the node
256 if already linear than the node itself is returned in RESULT.
257
258 If the node is not linear then a new VEC_PERM_EXPR node is created with a
259 lane permute that when applied will make the node linear. If such a
260 permute cannot be created then FALSE is returned from the function.
261
262 Here linearity is defined as having a sequential, monotically increasing
263 load position inside the load permute generated by the loads reachable from
264 NODE. */
265
266 static slp_tree
vect_build_swap_evenodd_node(slp_tree node)267 vect_build_swap_evenodd_node (slp_tree node)
268 {
269 /* Attempt to linearise the permute. */
270 vec<std::pair<unsigned, unsigned> > zipped;
271 zipped.create (SLP_TREE_LANES (node));
272
273 for (unsigned x = 0; x < SLP_TREE_LANES (node); x+=2)
274 {
275 zipped.quick_push (std::make_pair (0, x+1));
276 zipped.quick_push (std::make_pair (0, x));
277 }
278
279 /* Create the new permute node and store it instead. */
280 slp_tree vnode = vect_create_new_slp_node (1, VEC_PERM_EXPR);
281 SLP_TREE_LANE_PERMUTATION (vnode) = zipped;
282 SLP_TREE_VECTYPE (vnode) = SLP_TREE_VECTYPE (node);
283 SLP_TREE_CHILDREN (vnode).quick_push (node);
284 SLP_TREE_REF_COUNT (vnode) = 1;
285 SLP_TREE_LANES (vnode) = SLP_TREE_LANES (node);
286 SLP_TREE_REPRESENTATIVE (vnode) = SLP_TREE_REPRESENTATIVE (node);
287 SLP_TREE_REF_COUNT (node)++;
288 return vnode;
289 }
290
291 /* Checks to see of the expression represented by NODE is a gimple assign with
292 code CODE. */
293
294 static inline bool
vect_match_expression_p(slp_tree node,tree_code code)295 vect_match_expression_p (slp_tree node, tree_code code)
296 {
297 if (!node
298 || !SLP_TREE_REPRESENTATIVE (node))
299 return false;
300
301 gimple* expr = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (node));
302 if (!is_gimple_assign (expr)
303 || gimple_assign_rhs_code (expr) != code)
304 return false;
305
306 return true;
307 }
308
309 /* Checks to see if the expression represented by NODE is a call to the internal
310 function FN. */
311
312 static inline bool
vect_match_call_p(slp_tree node,internal_fn fn)313 vect_match_call_p (slp_tree node, internal_fn fn)
314 {
315 if (!node
316 || !SLP_TREE_REPRESENTATIVE (node))
317 return false;
318
319 gimple* expr = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (node));
320 if (!expr
321 || !gimple_call_internal_p (expr, fn))
322 return false;
323
324 return true;
325 }
326
327 /* Check if the given lane permute in PERMUTES matches an alternating sequence
328 of {even odd even odd ...}. This to account for unrolled loops. Further
329 mode there resulting permute must be linear. */
330
331 static inline bool
vect_check_evenodd_blend(lane_permutation_t & permutes,unsigned even,unsigned odd)332 vect_check_evenodd_blend (lane_permutation_t &permutes,
333 unsigned even, unsigned odd)
334 {
335 if (permutes.length () == 0
336 || permutes.length () % 2 != 0)
337 return false;
338
339 unsigned val[2] = {even, odd};
340 unsigned seed = 0;
341 for (unsigned i = 0; i < permutes.length (); i++)
342 if (permutes[i].first != val[i % 2]
343 || permutes[i].second != seed++)
344 return false;
345
346 return true;
347 }
348
349 /* This function will match the two gimple expressions representing NODE1 and
350 NODE2 in parallel and returns the pair operation that represents the two
351 expressions in the two statements.
352
353 If match is successful then the corresponding complex_operation is
354 returned and the arguments to the two matched operations are returned in OPS.
355
356 If TWO_OPERANDS it is expected that the LANES of the parent VEC_PERM select
357 from the two nodes alternatingly.
358
359 If unsuccessful then CMPLX_NONE is returned and OPS is untouched.
360
361 e.g. the following gimple statements
362
363 stmt 0 _39 = _37 + _12;
364 stmt 1 _6 = _38 - _36;
365
366 will return PLUS_MINUS along with OPS containing {_37, _12, _38, _36}.
367 */
368
369 static complex_operation_t
370 vect_detect_pair_op (slp_tree node1, slp_tree node2, lane_permutation_t &lanes,
371 bool two_operands = true, vec<slp_tree> *ops = NULL)
372 {
373 complex_operation_t result = CMPLX_NONE;
374
375 if (vect_match_expression_p (node1, MINUS_EXPR)
376 && vect_match_expression_p (node2, PLUS_EXPR)
377 && (!two_operands || vect_check_evenodd_blend (lanes, 0, 1)))
378 result = MINUS_PLUS;
379 else if (vect_match_expression_p (node1, PLUS_EXPR)
380 && vect_match_expression_p (node2, MINUS_EXPR)
381 && (!two_operands || vect_check_evenodd_blend (lanes, 0, 1)))
382 result = PLUS_MINUS;
383 else if (vect_match_expression_p (node1, PLUS_EXPR)
384 && vect_match_expression_p (node2, PLUS_EXPR))
385 result = PLUS_PLUS;
386 else if (vect_match_expression_p (node1, MULT_EXPR)
387 && vect_match_expression_p (node2, MULT_EXPR))
388 result = MULT_MULT;
389
390 if (result != CMPLX_NONE && ops != NULL)
391 {
392 ops->safe_push (node1);
393 ops->safe_push (node2);
394 }
395 return result;
396 }
397
398 /* Overload of vect_detect_pair_op that matches against the representative
399 statements in the children of NODE. It is expected that NODE has exactly
400 two children and when TWO_OPERANDS then NODE must be a VEC_PERM. */
401
402 static complex_operation_t
403 vect_detect_pair_op (slp_tree node, bool two_operands = true,
404 vec<slp_tree> *ops = NULL)
405 {
406 if (!two_operands && SLP_TREE_CODE (node) == VEC_PERM_EXPR)
407 return CMPLX_NONE;
408
409 if (SLP_TREE_CHILDREN (node).length () != 2)
410 return CMPLX_NONE;
411
412 vec<slp_tree> children = SLP_TREE_CHILDREN (node);
413 lane_permutation_t &lanes = SLP_TREE_LANE_PERMUTATION (node);
414
415 return vect_detect_pair_op (children[0], children[1], lanes, two_operands,
416 ops);
417 }
418
419 /*******************************************************************************
420 * complex_pattern class
421 ******************************************************************************/
422
423 /* SLP Complex Numbers pattern matching.
424
425 As an example, the following simple loop:
426
427 double a[restrict N]; double b[restrict N]; double c[restrict N];
428
429 for (int i=0; i < N; i+=2)
430 {
431 c[i] = a[i] - b[i+1];
432 c[i+1] = a[i+1] + b[i];
433 }
434
435 which represents a complex addition on with a rotation of 90* around the
436 argand plane. i.e. if `a` and `b` were complex numbers then this would be the
437 same as `a + (b * I)`.
438
439 Here the expressions for `c[i]` and `c[i+1]` are independent but have to be
440 both recognized in order for the pattern to work. As an SLP tree this is
441 represented as
442
443 +--------------------------------+
444 | stmt 0 *_9 = _10; |
445 | stmt 1 *_15 = _16; |
446 +--------------------------------+
447 |
448 |
449 v
450 +--------------------------------+
451 | stmt 0 _10 = _4 - _8; |
452 | stmt 1 _16 = _12 + _14; |
453 | lane permutation { 0[0] 1[1] } |
454 +--------------------------------+
455 | |
456 | |
457 | |
458 +-----+ | | +-----+
459 | | | | | |
460 +-----| { } |<-----+ +----->| { } --------+
461 | | | +------------------| | |
462 | +-----+ | +-----+ |
463 | | | |
464 | | | |
465 | +------|------------------+ |
466 | | | |
467 v v v v
468 +--------------------------+ +--------------------------------+
469 | stmt 0 _8 = *_7; | | stmt 0 _4 = *_3; |
470 | stmt 1 _14 = *_13; | | stmt 1 _12 = *_11; |
471 | load permutation { 1 0 } | | load permutation { 0 1 } |
472 +--------------------------+ +--------------------------------+
473
474 The pattern matcher allows you to replace both statements 0 and 1 or none at
475 all. Because this operation is a two operands operation the actual nodes
476 being replaced are those in the { } nodes. The actual scalar statements
477 themselves are not replaced or used during the matching but instead the
478 SLP_TREE_REPRESENTATIVE statements are inspected. You are also allowed to
479 replace and match on any number of nodes.
480
481 Because the pattern matcher matches on the representative statement for the
482 SLP node the case of two_operators it allows you to match the children of the
483 node. This is done using the method `recognize ()`.
484
485 */
486
487 /* The complex_pattern class contains common code for pattern matchers that work
488 on complex numbers. These provide functionality to allow de-construction and
489 validation of sequences depicting/transforming REAL and IMAG pairs. */
490
491 class complex_pattern : public vect_pattern
492 {
493 protected:
494 auto_vec<slp_tree> m_workset;
complex_pattern(slp_tree * node,vec<slp_tree> * m_ops,internal_fn ifn)495 complex_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
496 : vect_pattern (node, m_ops, ifn)
497 {
498 this->m_workset.safe_push (*node);
499 }
500
501 public:
502 void build (vec_info *);
503
504 static internal_fn
505 matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *,
506 vec<slp_tree> *);
507 };
508
509 /* Create a replacement pattern statement for each node in m_node and inserts
510 the new statement into m_node as the new representative statement. The old
511 statement is marked as being in a pattern defined by the new statement. The
512 statement is created as call to internal function IFN with m_num_args
513 arguments.
514
515 Futhermore the new pattern is also added to the vectorization information
516 structure VINFO and the old statement STMT_INFO is marked as unused while
517 the new statement is marked as used and the number of SLP uses of the new
518 statement is incremented.
519
520 The newly created SLP nodes are marked as SLP only and will be dissolved
521 if SLP is aborted.
522
523 The newly created gimple call is returned and the BB remains unchanged.
524
525 This default method is designed to only match against simple operands where
526 all the input and output types are the same.
527 */
528
529 void
build(vec_info * vinfo)530 complex_pattern::build (vec_info *vinfo)
531 {
532 stmt_vec_info stmt_info;
533
534 auto_vec<tree> args;
535 args.create (this->m_num_args);
536 args.quick_grow_cleared (this->m_num_args);
537 slp_tree node;
538 unsigned ix;
539 stmt_vec_info call_stmt_info;
540 gcall *call_stmt = NULL;
541
542 /* Now modify the nodes themselves. */
543 FOR_EACH_VEC_ELT (this->m_workset, ix, node)
544 {
545 /* Calculate the location of the statement in NODE to replace. */
546 stmt_info = SLP_TREE_REPRESENTATIVE (node);
547 stmt_vec_info reduc_def
548 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info));
549 gimple* old_stmt = STMT_VINFO_STMT (stmt_info);
550 tree lhs_old_stmt = gimple_get_lhs (old_stmt);
551 tree type = TREE_TYPE (lhs_old_stmt);
552
553 /* Create the argument set for use by gimple_build_call_internal_vec. */
554 for (unsigned i = 0; i < this->m_num_args; i++)
555 args[i] = lhs_old_stmt;
556
557 /* Create the new pattern statements. */
558 call_stmt = gimple_build_call_internal_vec (this->m_ifn, args);
559 tree var = make_temp_ssa_name (type, call_stmt, "slp_patt");
560 gimple_call_set_lhs (call_stmt, var);
561 gimple_set_location (call_stmt, gimple_location (old_stmt));
562 gimple_call_set_nothrow (call_stmt, true);
563
564 /* Adjust the book-keeping for the new and old statements for use during
565 SLP. This is required to get the right VF and statement during SLP
566 analysis. These changes are created after relevancy has been set for
567 the nodes as such we need to manually update them. Any changes will be
568 undone if SLP is cancelled. */
569 call_stmt_info
570 = vinfo->add_pattern_stmt (call_stmt, stmt_info);
571
572 /* Make sure to mark the representative statement pure_slp and
573 relevant and transfer reduction info. */
574 STMT_VINFO_RELEVANT (call_stmt_info) = vect_used_in_scope;
575 STMT_SLP_TYPE (call_stmt_info) = pure_slp;
576 STMT_VINFO_REDUC_DEF (call_stmt_info) = reduc_def;
577
578 gimple_set_bb (call_stmt, gimple_bb (stmt_info->stmt));
579 STMT_VINFO_VECTYPE (call_stmt_info) = SLP_TREE_VECTYPE (node);
580 STMT_VINFO_SLP_VECT_ONLY_PATTERN (call_stmt_info) = true;
581
582 /* Since we are replacing all the statements in the group with the same
583 thing it doesn't really matter. So just set it every time a new stmt
584 is created. */
585 SLP_TREE_REPRESENTATIVE (node) = call_stmt_info;
586 SLP_TREE_LANE_PERMUTATION (node).release ();
587 SLP_TREE_CODE (node) = CALL_EXPR;
588 }
589 }
590
591 /*******************************************************************************
592 * complex_add_pattern class
593 ******************************************************************************/
594
595 class complex_add_pattern : public complex_pattern
596 {
597 protected:
complex_add_pattern(slp_tree * node,vec<slp_tree> * m_ops,internal_fn ifn)598 complex_add_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
599 : complex_pattern (node, m_ops, ifn)
600 {
601 this->m_num_args = 2;
602 }
603
604 public:
605 void build (vec_info *);
606 static internal_fn
607 matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *,
608 vec<slp_tree> *);
609
610 static vect_pattern*
611 recognize (slp_tree_to_load_perm_map_t *, slp_tree *);
612
613 static vect_pattern*
mkInstance(slp_tree * node,vec<slp_tree> * m_ops,internal_fn ifn)614 mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
615 {
616 return new complex_add_pattern (node, m_ops, ifn);
617 }
618 };
619
620 /* Perform a replacement of the detected complex add pattern with the new
621 instruction sequences. */
622
623 void
build(vec_info * vinfo)624 complex_add_pattern::build (vec_info *vinfo)
625 {
626 SLP_TREE_CHILDREN (*this->m_node).reserve_exact (2);
627
628 slp_tree node = this->m_ops[0];
629 vec<slp_tree> children = SLP_TREE_CHILDREN (node);
630
631 /* First re-arrange the children. */
632 SLP_TREE_CHILDREN (*this->m_node)[0] = children[0];
633 SLP_TREE_CHILDREN (*this->m_node)[1] =
634 vect_build_swap_evenodd_node (children[1]);
635
636 SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (*this->m_node)[0])++;
637 SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (*this->m_node)[1])++;
638 vect_free_slp_tree (this->m_ops[0]);
639 vect_free_slp_tree (this->m_ops[1]);
640
641 complex_pattern::build (vinfo);
642 }
643
644 /* Pattern matcher for trying to match complex addition pattern in SLP tree.
645
646 If no match is found then IFN is set to IFN_LAST.
647 This function matches the patterns shaped as:
648
649 c[i] = a[i] - b[i+1];
650 c[i+1] = a[i+1] + b[i];
651
652 If a match occurred then TRUE is returned, else FALSE. The initial match is
653 expected to be in OP1 and the initial match operands in args0. */
654
655 internal_fn
matches(complex_operation_t op,slp_tree_to_load_perm_map_t * perm_cache,slp_tree * node,vec<slp_tree> * ops)656 complex_add_pattern::matches (complex_operation_t op,
657 slp_tree_to_load_perm_map_t *perm_cache,
658 slp_tree *node, vec<slp_tree> *ops)
659 {
660 internal_fn ifn = IFN_LAST;
661
662 /* Find the two components. Rotation in the complex plane will modify
663 the operations:
664
665 * Rotation 0: + +
666 * Rotation 90: - +
667 * Rotation 180: - -
668 * Rotation 270: + -
669
670 Rotation 0 and 180 can be handled by normal SIMD code, so we don't need
671 to care about them here. */
672 if (op == MINUS_PLUS)
673 ifn = IFN_COMPLEX_ADD_ROT90;
674 else if (op == PLUS_MINUS)
675 ifn = IFN_COMPLEX_ADD_ROT270;
676 else
677 return ifn;
678
679 /* verify that there is a permute, otherwise this isn't a pattern we
680 we support. */
681 gcc_assert (ops->length () == 2);
682
683 vec<slp_tree> children = SLP_TREE_CHILDREN ((*ops)[0]);
684
685 /* First node must be unpermuted. */
686 if (linear_loads_p (perm_cache, children[0]) != PERM_EVENODD)
687 return IFN_LAST;
688
689 /* Second node must be permuted. */
690 if (linear_loads_p (perm_cache, children[1]) != PERM_ODDEVEN)
691 return IFN_LAST;
692
693 if (!vect_pattern_validate_optab (ifn, *node))
694 return IFN_LAST;
695
696 return ifn;
697 }
698
699 /* Attempt to recognize a complex add pattern. */
700
701 vect_pattern*
recognize(slp_tree_to_load_perm_map_t * perm_cache,slp_tree * node)702 complex_add_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
703 slp_tree *node)
704 {
705 auto_vec<slp_tree> ops;
706 complex_operation_t op
707 = vect_detect_pair_op (*node, true, &ops);
708 internal_fn ifn
709 = complex_add_pattern::matches (op, perm_cache, node, &ops);
710 if (ifn == IFN_LAST)
711 return NULL;
712
713 return new complex_add_pattern (node, &ops, ifn);
714 }
715
716 /*******************************************************************************
717 * complex_mul_pattern
718 ******************************************************************************/
719
720 /* Helper function of that looks for a match in the CHILDth child of NODE. The
721 child used is stored in RES.
722
723 If the match is successful then ARGS will contain the operands matched
724 and the complex_operation_t type is returned. If match is not successful
725 then CMPLX_NONE is returned and ARGS is left unmodified. */
726
727 static inline complex_operation_t
728 vect_match_call_complex_mla (slp_tree node, unsigned child,
729 vec<slp_tree> *args = NULL, slp_tree *res = NULL)
730 {
731 gcc_assert (child < SLP_TREE_CHILDREN (node).length ());
732
733 slp_tree data = SLP_TREE_CHILDREN (node)[child];
734
735 if (res)
736 *res = data;
737
738 return vect_detect_pair_op (data, false, args);
739 }
740
741 /* Check to see if either of the trees in ARGS are a NEGATE_EXPR. If the first
742 child (args[0]) is a NEGATE_EXPR then NEG_FIRST_P is set to TRUE.
743
744 If a negate is found then the values in ARGS are reordered such that the
745 negate node is always the second one and the entry is replaced by the child
746 of the negate node. */
747
748 static inline bool
749 vect_normalize_conj_loc (vec<slp_tree> args, bool *neg_first_p = NULL)
750 {
751 gcc_assert (args.length () == 2);
752 bool neg_found = false;
753
754 if (vect_match_expression_p (args[0], NEGATE_EXPR))
755 {
756 std::swap (args[0], args[1]);
757 neg_found = true;
758 if (neg_first_p)
759 *neg_first_p = true;
760 }
761 else if (vect_match_expression_p (args[1], NEGATE_EXPR))
762 {
763 neg_found = true;
764 if (neg_first_p)
765 *neg_first_p = false;
766 }
767
768 if (neg_found)
769 args[1] = SLP_TREE_CHILDREN (args[1])[0];
770
771 return neg_found;
772 }
773
774 /* Helper function to check if PERM is KIND or PERM_TOP. */
775
776 static inline bool
is_eq_or_top(complex_perm_kinds_t perm,complex_perm_kinds_t kind)777 is_eq_or_top (complex_perm_kinds_t perm, complex_perm_kinds_t kind)
778 {
779 return perm == kind || perm == PERM_TOP;
780 }
781
782 /* Helper function that checks to see if LEFT_OP and RIGHT_OP are both MULT_EXPR
783 nodes but also that they represent an operation that is either a complex
784 multiplication or a complex multiplication by conjugated value.
785
786 Of the negation is expected to be in the first half of the tree (As required
787 by an FMS pattern) then NEG_FIRST is true. If the operation is a conjugate
788 operation then CONJ_FIRST_OPERAND is set to indicate whether the first or
789 second operand contains the conjugate operation. */
790
791 static inline bool
vect_validate_multiplication(slp_tree_to_load_perm_map_t * perm_cache,vec<slp_tree> left_op,vec<slp_tree> right_op,bool neg_first,bool * conj_first_operand,bool fms)792 vect_validate_multiplication (slp_tree_to_load_perm_map_t *perm_cache,
793 vec<slp_tree> left_op, vec<slp_tree> right_op,
794 bool neg_first, bool *conj_first_operand,
795 bool fms)
796 {
797 /* The presence of a negation indicates that we have either a conjugate or a
798 rotation. We need to distinguish which one. */
799 *conj_first_operand = false;
800 complex_perm_kinds_t kind;
801
802 /* Complex conjugates have the negation on the imaginary part of the
803 number where rotations affect the real component. So check if the
804 negation is on a dup of lane 1. */
805 if (fms)
806 {
807 /* Canonicalization for fms is not consistent. So have to test both
808 variants to be sure. This needs to be fixed in the mid-end so
809 this part can be simpler. */
810 kind = linear_loads_p (perm_cache, right_op[0]);
811 if (!((is_eq_or_top (linear_loads_p (perm_cache, right_op[0]), PERM_ODDODD)
812 && is_eq_or_top (linear_loads_p (perm_cache, right_op[1]),
813 PERM_ODDEVEN))
814 || (kind == PERM_ODDEVEN
815 && is_eq_or_top (linear_loads_p (perm_cache, right_op[1]),
816 PERM_ODDODD))))
817 return false;
818 }
819 else
820 {
821 if (linear_loads_p (perm_cache, right_op[1]) != PERM_ODDODD
822 && !is_eq_or_top (linear_loads_p (perm_cache, right_op[0]),
823 PERM_ODDEVEN))
824 return false;
825 }
826
827 /* Deal with differences in indexes. */
828 int index1 = fms ? 1 : 0;
829 int index2 = fms ? 0 : 1;
830
831 /* Check if the conjugate is on the second first or second operand. The
832 order of the node with the conjugate value determines this, and the dup
833 node must be one of lane 0 of the same DR as the neg node. */
834 kind = linear_loads_p (perm_cache, left_op[index1]);
835 if (kind == PERM_TOP)
836 {
837 if (linear_loads_p (perm_cache, left_op[index2]) == PERM_EVENODD)
838 return true;
839 }
840 else if (kind == PERM_EVENODD)
841 {
842 if ((kind = linear_loads_p (perm_cache, left_op[index2])) == PERM_EVENODD)
843 return false;
844 return true;
845 }
846 else if (!neg_first)
847 *conj_first_operand = true;
848 else
849 return false;
850
851 if (kind != PERM_EVENEVEN)
852 return false;
853
854 return true;
855 }
856
857 /* Helper function to help distinguish between a conjugate and a rotation in a
858 complex multiplication. The operations have similar shapes but the order of
859 the load permutes are different. This function returns TRUE when the order
860 is consistent with a multiplication or multiplication by conjugated
861 operand but returns FALSE if it's a multiplication by rotated operand. */
862
863 static inline bool
vect_validate_multiplication(slp_tree_to_load_perm_map_t * perm_cache,vec<slp_tree> op,complex_perm_kinds_t permKind)864 vect_validate_multiplication (slp_tree_to_load_perm_map_t *perm_cache,
865 vec<slp_tree> op, complex_perm_kinds_t permKind)
866 {
867 /* The left node is the more common case, test it first. */
868 if (!is_eq_or_top (linear_loads_p (perm_cache, op[0]), permKind))
869 {
870 if (!is_eq_or_top (linear_loads_p (perm_cache, op[1]), permKind))
871 return false;
872 }
873 return true;
874 }
875
876 /* This function combines two nodes containing only even and only odd lanes
877 together into a single node which contains the nodes in even/odd order
878 by using a lane permute.
879
880 The lanes in EVEN and ODD are duplicated 2 times inside the vectors.
881 So for a lanes = 4 EVEN contains {EVEN1, EVEN1, EVEN2, EVEN2}.
882
883 The tree REPRESENTATION is taken from the supplied REP along with the
884 vectype which must be the same between all three nodes.
885 */
886
887 static slp_tree
vect_build_combine_node(slp_tree even,slp_tree odd,slp_tree rep)888 vect_build_combine_node (slp_tree even, slp_tree odd, slp_tree rep)
889 {
890 vec<std::pair<unsigned, unsigned> > perm;
891 perm.create (SLP_TREE_LANES (rep));
892
893 for (unsigned x = 0; x < SLP_TREE_LANES (rep); x+=2)
894 {
895 perm.quick_push (std::make_pair (0, x));
896 perm.quick_push (std::make_pair (1, x+1));
897 }
898
899 slp_tree vnode = vect_create_new_slp_node (2, SLP_TREE_CODE (even));
900 SLP_TREE_CODE (vnode) = VEC_PERM_EXPR;
901 SLP_TREE_LANE_PERMUTATION (vnode) = perm;
902
903 SLP_TREE_CHILDREN (vnode).create (2);
904 SLP_TREE_CHILDREN (vnode).quick_push (even);
905 SLP_TREE_CHILDREN (vnode).quick_push (odd);
906 SLP_TREE_REF_COUNT (even)++;
907 SLP_TREE_REF_COUNT (odd)++;
908 SLP_TREE_REF_COUNT (vnode) = 1;
909
910 SLP_TREE_LANES (vnode) = SLP_TREE_LANES (rep);
911 gcc_assert (perm.length () == SLP_TREE_LANES (vnode));
912 /* Representation is set to that of the current node as the vectorizer
913 can't deal with VEC_PERMs with no representation, as would be the
914 case with invariants. */
915 SLP_TREE_REPRESENTATIVE (vnode) = SLP_TREE_REPRESENTATIVE (rep);
916 SLP_TREE_VECTYPE (vnode) = SLP_TREE_VECTYPE (rep);
917 return vnode;
918 }
919
920 class complex_mul_pattern : public complex_pattern
921 {
922 protected:
complex_mul_pattern(slp_tree * node,vec<slp_tree> * m_ops,internal_fn ifn)923 complex_mul_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
924 : complex_pattern (node, m_ops, ifn)
925 {
926 this->m_num_args = 2;
927 }
928
929 public:
930 void build (vec_info *);
931 static internal_fn
932 matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *,
933 vec<slp_tree> *);
934
935 static vect_pattern*
936 recognize (slp_tree_to_load_perm_map_t *, slp_tree *);
937
938 static vect_pattern*
mkInstance(slp_tree * node,vec<slp_tree> * m_ops,internal_fn ifn)939 mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
940 {
941 return new complex_mul_pattern (node, m_ops, ifn);
942 }
943
944 };
945
946 /* Pattern matcher for trying to match complex multiply pattern in SLP tree
947 If the operation matches then IFN is set to the operation it matched
948 and the arguments to the two replacement statements are put in m_ops.
949
950 If no match is found then IFN is set to IFN_LAST and m_ops is unchanged.
951
952 This function matches the patterns shaped as:
953
954 double ax = (b[i+1] * a[i]);
955 double bx = (a[i+1] * b[i]);
956
957 c[i] = c[i] - ax;
958 c[i+1] = c[i+1] + bx;
959
960 If a match occurred then TRUE is returned, else FALSE. The initial match is
961 expected to be in OP1 and the initial match operands in args0. */
962
963 internal_fn
matches(complex_operation_t op,slp_tree_to_load_perm_map_t * perm_cache,slp_tree * node,vec<slp_tree> * ops)964 complex_mul_pattern::matches (complex_operation_t op,
965 slp_tree_to_load_perm_map_t *perm_cache,
966 slp_tree *node, vec<slp_tree> *ops)
967 {
968 internal_fn ifn = IFN_LAST;
969
970 if (op != MINUS_PLUS)
971 return IFN_LAST;
972
973 slp_tree root = *node;
974 /* First two nodes must be a multiply. */
975 auto_vec<slp_tree> muls;
976 if (vect_match_call_complex_mla (root, 0) != MULT_MULT
977 || vect_match_call_complex_mla (root, 1, &muls) != MULT_MULT)
978 return IFN_LAST;
979
980 /* Now operand2+4 may lead to another expression. */
981 auto_vec<slp_tree> left_op, right_op;
982 left_op.safe_splice (SLP_TREE_CHILDREN (muls[0]));
983 right_op.safe_splice (SLP_TREE_CHILDREN (muls[1]));
984
985 if (linear_loads_p (perm_cache, left_op[1]) == PERM_ODDEVEN)
986 return IFN_LAST;
987
988 bool neg_first = false;
989 bool conj_first_operand = false;
990 bool is_neg = vect_normalize_conj_loc (right_op, &neg_first);
991
992 if (!is_neg)
993 {
994 /* A multiplication needs to multiply agains the real pair, otherwise
995 the pattern matches that of FMS. */
996 if (!vect_validate_multiplication (perm_cache, left_op, PERM_EVENEVEN)
997 || vect_normalize_conj_loc (left_op))
998 return IFN_LAST;
999 ifn = IFN_COMPLEX_MUL;
1000 }
1001 else if (is_neg)
1002 {
1003 if (!vect_validate_multiplication (perm_cache, left_op, right_op,
1004 neg_first, &conj_first_operand,
1005 false))
1006 return IFN_LAST;
1007
1008 ifn = IFN_COMPLEX_MUL_CONJ;
1009 }
1010
1011 if (!vect_pattern_validate_optab (ifn, *node))
1012 return IFN_LAST;
1013
1014 ops->truncate (0);
1015 ops->create (3);
1016
1017 complex_perm_kinds_t kind = linear_loads_p (perm_cache, left_op[0]);
1018 if (kind == PERM_EVENODD)
1019 {
1020 ops->quick_push (left_op[1]);
1021 ops->quick_push (right_op[1]);
1022 ops->quick_push (left_op[0]);
1023 }
1024 else if (kind == PERM_TOP)
1025 {
1026 ops->quick_push (left_op[1]);
1027 ops->quick_push (right_op[1]);
1028 ops->quick_push (left_op[0]);
1029 }
1030 else if (kind == PERM_EVENEVEN && !conj_first_operand)
1031 {
1032 ops->quick_push (left_op[0]);
1033 ops->quick_push (right_op[0]);
1034 ops->quick_push (left_op[1]);
1035 }
1036 else
1037 {
1038 ops->quick_push (left_op[0]);
1039 ops->quick_push (right_op[1]);
1040 ops->quick_push (left_op[1]);
1041 }
1042
1043 return ifn;
1044 }
1045
1046 /* Attempt to recognize a complex mul pattern. */
1047
1048 vect_pattern*
recognize(slp_tree_to_load_perm_map_t * perm_cache,slp_tree * node)1049 complex_mul_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
1050 slp_tree *node)
1051 {
1052 auto_vec<slp_tree> ops;
1053 complex_operation_t op
1054 = vect_detect_pair_op (*node, true, &ops);
1055 internal_fn ifn
1056 = complex_mul_pattern::matches (op, perm_cache, node, &ops);
1057 if (ifn == IFN_LAST)
1058 return NULL;
1059
1060 return new complex_mul_pattern (node, &ops, ifn);
1061 }
1062
1063 /* Perform a replacement of the detected complex mul pattern with the new
1064 instruction sequences. */
1065
1066 void
build(vec_info * vinfo)1067 complex_mul_pattern::build (vec_info *vinfo)
1068 {
1069 slp_tree node;
1070 unsigned i;
1071 slp_tree newnode
1072 = vect_build_combine_node (this->m_ops[0], this->m_ops[1], *this->m_node);
1073 SLP_TREE_REF_COUNT (this->m_ops[2])++;
1074
1075 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node)
1076 vect_free_slp_tree (node);
1077
1078 /* First re-arrange the children. */
1079 SLP_TREE_CHILDREN (*this->m_node).reserve_exact (2);
1080 SLP_TREE_CHILDREN (*this->m_node)[0] = this->m_ops[2];
1081 SLP_TREE_CHILDREN (*this->m_node)[1] = newnode;
1082
1083 /* And then rewrite the node itself. */
1084 complex_pattern::build (vinfo);
1085 }
1086
1087 /*******************************************************************************
1088 * complex_fma_pattern class
1089 ******************************************************************************/
1090
1091 class complex_fma_pattern : public complex_pattern
1092 {
1093 protected:
complex_fma_pattern(slp_tree * node,vec<slp_tree> * m_ops,internal_fn ifn)1094 complex_fma_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
1095 : complex_pattern (node, m_ops, ifn)
1096 {
1097 this->m_num_args = 3;
1098 }
1099
1100 public:
1101 void build (vec_info *);
1102 static internal_fn
1103 matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *,
1104 vec<slp_tree> *);
1105
1106 static vect_pattern*
1107 recognize (slp_tree_to_load_perm_map_t *, slp_tree *);
1108
1109 static vect_pattern*
mkInstance(slp_tree * node,vec<slp_tree> * m_ops,internal_fn ifn)1110 mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
1111 {
1112 return new complex_fma_pattern (node, m_ops, ifn);
1113 }
1114 };
1115
1116 /* Pattern matcher for trying to match complex multiply and accumulate
1117 and multiply and subtract patterns in SLP tree.
1118 If the operation matches then IFN is set to the operation it matched and
1119 the arguments to the two replacement statements are put in m_ops.
1120
1121 If no match is found then IFN is set to IFN_LAST and m_ops is unchanged.
1122
1123 This function matches the patterns shaped as:
1124
1125 double ax = (b[i+1] * a[i]) + (b[i] * a[i]);
1126 double bx = (a[i+1] * b[i]) - (a[i+1] * b[i+1]);
1127
1128 c[i] = c[i] - ax;
1129 c[i+1] = c[i+1] + bx;
1130
1131 If a match occurred then TRUE is returned, else FALSE. The match is
1132 performed after COMPLEX_MUL which would have done the majority of the work.
1133 This function merely matches an ADD with a COMPLEX_MUL IFN. The initial
1134 match is expected to be in OP1 and the initial match operands in args0. */
1135
1136 internal_fn
matches(complex_operation_t op,slp_tree_to_load_perm_map_t *,slp_tree * ref_node,vec<slp_tree> * ops)1137 complex_fma_pattern::matches (complex_operation_t op,
1138 slp_tree_to_load_perm_map_t * /* perm_cache */,
1139 slp_tree *ref_node, vec<slp_tree> *ops)
1140 {
1141 internal_fn ifn = IFN_LAST;
1142
1143 /* Find the two components. We match Complex MUL first which reduces the
1144 amount of work this pattern has to do. After that we just match the
1145 head node and we're done.:
1146
1147 * FMA: + +.
1148
1149 We need to ignore the two_operands nodes that may also match.
1150 For that we can check if they have any scalar statements and also
1151 check that it's not a permute node as we're looking for a normal
1152 PLUS_EXPR operation. */
1153 if (op != CMPLX_NONE)
1154 return IFN_LAST;
1155
1156 /* Find the two components. We match Complex MUL first which reduces the
1157 amount of work this pattern has to do. After that we just match the
1158 head node and we're done.:
1159
1160 * FMA: + + on a non-two_operands node. */
1161 slp_tree vnode = *ref_node;
1162 if (SLP_TREE_LANE_PERMUTATION (vnode).exists ()
1163 || !SLP_TREE_CHILDREN (vnode).exists ()
1164 || !vect_match_expression_p (vnode, PLUS_EXPR))
1165 return IFN_LAST;
1166
1167 slp_tree node = SLP_TREE_CHILDREN (vnode)[1];
1168
1169 if (vect_match_call_p (node, IFN_COMPLEX_MUL))
1170 ifn = IFN_COMPLEX_FMA;
1171 else if (vect_match_call_p (node, IFN_COMPLEX_MUL_CONJ))
1172 ifn = IFN_COMPLEX_FMA_CONJ;
1173 else
1174 return IFN_LAST;
1175
1176 if (!vect_pattern_validate_optab (ifn, vnode))
1177 return IFN_LAST;
1178
1179 ops->truncate (0);
1180 ops->create (3);
1181
1182 if (ifn == IFN_COMPLEX_FMA)
1183 {
1184 ops->quick_push (SLP_TREE_CHILDREN (vnode)[0]);
1185 ops->quick_push (SLP_TREE_CHILDREN (node)[1]);
1186 ops->quick_push (SLP_TREE_CHILDREN (node)[0]);
1187 }
1188 else
1189 {
1190 ops->quick_push (SLP_TREE_CHILDREN (vnode)[0]);
1191 ops->quick_push (SLP_TREE_CHILDREN (node)[0]);
1192 ops->quick_push (SLP_TREE_CHILDREN (node)[1]);
1193 }
1194
1195 return ifn;
1196 }
1197
1198 /* Attempt to recognize a complex mul pattern. */
1199
1200 vect_pattern*
recognize(slp_tree_to_load_perm_map_t * perm_cache,slp_tree * node)1201 complex_fma_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
1202 slp_tree *node)
1203 {
1204 auto_vec<slp_tree> ops;
1205 complex_operation_t op
1206 = vect_detect_pair_op (*node, true, &ops);
1207 internal_fn ifn
1208 = complex_fma_pattern::matches (op, perm_cache, node, &ops);
1209 if (ifn == IFN_LAST)
1210 return NULL;
1211
1212 return new complex_fma_pattern (node, &ops, ifn);
1213 }
1214
1215 /* Perform a replacement of the detected complex mul pattern with the new
1216 instruction sequences. */
1217
1218 void
build(vec_info * vinfo)1219 complex_fma_pattern::build (vec_info *vinfo)
1220 {
1221 slp_tree node = SLP_TREE_CHILDREN (*this->m_node)[1];
1222
1223 SLP_TREE_CHILDREN (*this->m_node).release ();
1224 SLP_TREE_CHILDREN (*this->m_node).create (3);
1225 SLP_TREE_CHILDREN (*this->m_node).safe_splice (this->m_ops);
1226
1227 SLP_TREE_REF_COUNT (this->m_ops[1])++;
1228 SLP_TREE_REF_COUNT (this->m_ops[2])++;
1229
1230 vect_free_slp_tree (node);
1231
1232 complex_pattern::build (vinfo);
1233 }
1234
1235 /*******************************************************************************
1236 * complex_fms_pattern class
1237 ******************************************************************************/
1238
1239 class complex_fms_pattern : public complex_pattern
1240 {
1241 protected:
complex_fms_pattern(slp_tree * node,vec<slp_tree> * m_ops,internal_fn ifn)1242 complex_fms_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
1243 : complex_pattern (node, m_ops, ifn)
1244 {
1245 this->m_num_args = 3;
1246 }
1247
1248 public:
1249 void build (vec_info *);
1250 static internal_fn
1251 matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *,
1252 vec<slp_tree> *);
1253
1254 static vect_pattern*
1255 recognize (slp_tree_to_load_perm_map_t *, slp_tree *);
1256
1257 static vect_pattern*
mkInstance(slp_tree * node,vec<slp_tree> * m_ops,internal_fn ifn)1258 mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
1259 {
1260 return new complex_fms_pattern (node, m_ops, ifn);
1261 }
1262 };
1263
1264
1265 /* Pattern matcher for trying to match complex multiply and accumulate
1266 and multiply and subtract patterns in SLP tree.
1267 If the operation matches then IFN is set to the operation it matched and
1268 the arguments to the two replacement statements are put in m_ops.
1269
1270 If no match is found then IFN is set to IFN_LAST and m_ops is unchanged.
1271
1272 This function matches the patterns shaped as:
1273
1274 double ax = (b[i+1] * a[i]) + (b[i] * a[i]);
1275 double bx = (a[i+1] * b[i]) - (a[i+1] * b[i+1]);
1276
1277 c[i] = c[i] - ax;
1278 c[i+1] = c[i+1] + bx;
1279
1280 If a match occurred then TRUE is returned, else FALSE. The initial match is
1281 expected to be in OP1 and the initial match operands in args0. */
1282
1283 internal_fn
matches(complex_operation_t op,slp_tree_to_load_perm_map_t * perm_cache,slp_tree * ref_node,vec<slp_tree> * ops)1284 complex_fms_pattern::matches (complex_operation_t op,
1285 slp_tree_to_load_perm_map_t *perm_cache,
1286 slp_tree * ref_node, vec<slp_tree> *ops)
1287 {
1288 internal_fn ifn = IFN_LAST;
1289
1290 /* Find the two components. We match Complex MUL first which reduces the
1291 amount of work this pattern has to do. After that we just match the
1292 head node and we're done.:
1293
1294 * FMS: - +. */
1295 slp_tree child = NULL;
1296
1297 /* We need to ignore the two_operands nodes that may also match,
1298 for that we can check if they have any scalar statements and also
1299 check that it's not a permute node as we're looking for a normal
1300 PLUS_EXPR operation. */
1301 if (op != PLUS_MINUS)
1302 return IFN_LAST;
1303
1304 child = SLP_TREE_CHILDREN ((*ops)[1])[1];
1305 if (vect_detect_pair_op (child) != MINUS_PLUS)
1306 return IFN_LAST;
1307
1308 /* First two nodes must be a multiply. */
1309 auto_vec<slp_tree> muls;
1310 if (vect_match_call_complex_mla (child, 0) != MULT_MULT
1311 || vect_match_call_complex_mla (child, 1, &muls) != MULT_MULT)
1312 return IFN_LAST;
1313
1314 /* Now operand2+4 may lead to another expression. */
1315 auto_vec<slp_tree> left_op, right_op;
1316 left_op.safe_splice (SLP_TREE_CHILDREN (muls[0]));
1317 right_op.safe_splice (SLP_TREE_CHILDREN (muls[1]));
1318
1319 bool is_neg = vect_normalize_conj_loc (left_op);
1320
1321 child = SLP_TREE_CHILDREN ((*ops)[1])[0];
1322 bool conj_first_operand = false;
1323 if (!vect_validate_multiplication (perm_cache, right_op, left_op, false,
1324 &conj_first_operand, true))
1325 return IFN_LAST;
1326
1327 if (!is_neg)
1328 ifn = IFN_COMPLEX_FMS;
1329 else if (is_neg)
1330 ifn = IFN_COMPLEX_FMS_CONJ;
1331
1332 if (!vect_pattern_validate_optab (ifn, *ref_node))
1333 return IFN_LAST;
1334
1335 ops->truncate (0);
1336 ops->create (4);
1337
1338 complex_perm_kinds_t kind = linear_loads_p (perm_cache, right_op[0]);
1339 if (kind == PERM_EVENODD)
1340 {
1341 ops->quick_push (child);
1342 ops->quick_push (right_op[0]);
1343 ops->quick_push (right_op[1]);
1344 ops->quick_push (left_op[1]);
1345 }
1346 else if (kind == PERM_TOP)
1347 {
1348 ops->quick_push (child);
1349 ops->quick_push (right_op[1]);
1350 ops->quick_push (right_op[0]);
1351 ops->quick_push (left_op[0]);
1352 }
1353 else if (kind == PERM_EVENEVEN && !is_neg)
1354 {
1355 ops->quick_push (child);
1356 ops->quick_push (right_op[1]);
1357 ops->quick_push (right_op[0]);
1358 ops->quick_push (left_op[0]);
1359 }
1360 else
1361 {
1362 ops->quick_push (child);
1363 ops->quick_push (right_op[1]);
1364 ops->quick_push (right_op[0]);
1365 ops->quick_push (left_op[1]);
1366 }
1367
1368 return ifn;
1369 }
1370
1371 /* Attempt to recognize a complex mul pattern. */
1372
1373 vect_pattern*
recognize(slp_tree_to_load_perm_map_t * perm_cache,slp_tree * node)1374 complex_fms_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
1375 slp_tree *node)
1376 {
1377 auto_vec<slp_tree> ops;
1378 complex_operation_t op
1379 = vect_detect_pair_op (*node, true, &ops);
1380 internal_fn ifn
1381 = complex_fms_pattern::matches (op, perm_cache, node, &ops);
1382 if (ifn == IFN_LAST)
1383 return NULL;
1384
1385 return new complex_fms_pattern (node, &ops, ifn);
1386 }
1387
1388 /* Perform a replacement of the detected complex mul pattern with the new
1389 instruction sequences. */
1390
1391 void
build(vec_info * vinfo)1392 complex_fms_pattern::build (vec_info *vinfo)
1393 {
1394 slp_tree node;
1395 unsigned i;
1396 slp_tree newnode =
1397 vect_build_combine_node (this->m_ops[2], this->m_ops[3], *this->m_node);
1398 SLP_TREE_REF_COUNT (this->m_ops[0])++;
1399 SLP_TREE_REF_COUNT (this->m_ops[1])++;
1400
1401 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node)
1402 vect_free_slp_tree (node);
1403
1404 SLP_TREE_CHILDREN (*this->m_node).release ();
1405 SLP_TREE_CHILDREN (*this->m_node).create (3);
1406
1407 /* First re-arrange the children. */
1408 SLP_TREE_CHILDREN (*this->m_node).quick_push (this->m_ops[0]);
1409 SLP_TREE_CHILDREN (*this->m_node).quick_push (this->m_ops[1]);
1410 SLP_TREE_CHILDREN (*this->m_node).quick_push (newnode);
1411
1412 /* And then rewrite the node itself. */
1413 complex_pattern::build (vinfo);
1414 }
1415
1416 /*******************************************************************************
1417 * complex_operations_pattern class
1418 ******************************************************************************/
1419
1420 /* This function combines all the existing pattern matchers above into one class
1421 that shares the functionality between them. The initial match is shared
1422 between all complex operations. */
1423
1424 class complex_operations_pattern : public complex_pattern
1425 {
1426 protected:
complex_operations_pattern(slp_tree * node,vec<slp_tree> * m_ops,internal_fn ifn)1427 complex_operations_pattern (slp_tree *node, vec<slp_tree> *m_ops,
1428 internal_fn ifn)
1429 : complex_pattern (node, m_ops, ifn)
1430 {
1431 this->m_num_args = 0;
1432 }
1433
1434 public:
1435 void build (vec_info *);
1436 static internal_fn
1437 matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *,
1438 vec<slp_tree> *);
1439
1440 static vect_pattern*
1441 recognize (slp_tree_to_load_perm_map_t *, slp_tree *);
1442 };
1443
1444 /* Dummy matches implementation for proxy object. */
1445
1446 internal_fn
1447 complex_operations_pattern::
matches(complex_operation_t,slp_tree_to_load_perm_map_t *,slp_tree *,vec<slp_tree> *)1448 matches (complex_operation_t /* op */,
1449 slp_tree_to_load_perm_map_t * /* perm_cache */,
1450 slp_tree * /* ref_node */, vec<slp_tree> * /* ops */)
1451 {
1452 return IFN_LAST;
1453 }
1454
1455 /* Attempt to recognize a complex mul pattern. */
1456
1457 vect_pattern*
recognize(slp_tree_to_load_perm_map_t * perm_cache,slp_tree * node)1458 complex_operations_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
1459 slp_tree *node)
1460 {
1461 auto_vec<slp_tree> ops;
1462 complex_operation_t op
1463 = vect_detect_pair_op (*node, true, &ops);
1464 internal_fn ifn = IFN_LAST;
1465
1466 ifn = complex_fms_pattern::matches (op, perm_cache, node, &ops);
1467 if (ifn != IFN_LAST)
1468 return complex_fms_pattern::mkInstance (node, &ops, ifn);
1469
1470 ifn = complex_mul_pattern::matches (op, perm_cache, node, &ops);
1471 if (ifn != IFN_LAST)
1472 return complex_mul_pattern::mkInstance (node, &ops, ifn);
1473
1474 ifn = complex_fma_pattern::matches (op, perm_cache, node, &ops);
1475 if (ifn != IFN_LAST)
1476 return complex_fma_pattern::mkInstance (node, &ops, ifn);
1477
1478 ifn = complex_add_pattern::matches (op, perm_cache, node, &ops);
1479 if (ifn != IFN_LAST)
1480 return complex_add_pattern::mkInstance (node, &ops, ifn);
1481
1482 return NULL;
1483 }
1484
1485 /* Dummy implementation of build. */
1486
1487 void
build(vec_info *)1488 complex_operations_pattern::build (vec_info * /* vinfo */)
1489 {
1490 gcc_unreachable ();
1491 }
1492
1493 /*******************************************************************************
1494 * Pattern matching definitions
1495 ******************************************************************************/
1496
1497 #define SLP_PATTERN(x) &x::recognize
1498 vect_pattern_decl_t slp_patterns[]
1499 {
1500 /* For least amount of back-tracking and more efficient matching
1501 order patterns from the largest to the smallest. Especially if they
1502 overlap in what they can detect. */
1503
1504 SLP_PATTERN (complex_operations_pattern),
1505 };
1506 #undef SLP_PATTERN
1507
1508 /* Set the number of SLP pattern matchers available. */
1509 size_t num__slp_patterns = sizeof(slp_patterns)/sizeof(vect_pattern_decl_t);
1510