1 /* SLP - Pattern matcher on SLP trees
2    Copyright (C) 2020-2021 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "target.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "tree-pass.h"
29 #include "ssa.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h"		/* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "gimple-iterator.h"
36 #include "cfgloop.h"
37 #include "tree-vectorizer.h"
38 #include "langhooks.h"
39 #include "gimple-walk.h"
40 #include "dbgcnt.h"
41 #include "tree-vector-builder.h"
42 #include "vec-perm-indices.h"
43 #include "gimple-fold.h"
44 #include "internal-fn.h"
45 
46 /* SLP Pattern matching mechanism.
47 
48   This extension to the SLP vectorizer allows one to transform the generated SLP
49   tree based on any pattern.  The difference between this and the normal vect
50   pattern matcher is that unlike the former, this matcher allows you to match
51   with instructions that do not belong to the same SSA dominator graph.
52 
53   The only requirement that this pattern matcher has is that you are only
54   only allowed to either match an entire group or none.
55 
56   The pattern matcher currently only allows you to perform replacements to
57   internal functions.
58 
59   Once the patterns are matched it is one way, these cannot be undone.  It is
60   currently not supported to match patterns recursively.
61 
62   To add a new pattern, implement the vect_pattern class and add the type to
63   slp_patterns.
64 
65 */
66 
67 /*******************************************************************************
68  * vect_pattern class
69  ******************************************************************************/
70 
71 /* Default implementation of recognize that performs matching, validation and
72    replacement of nodes but that can be overriden if required.  */
73 
74 static bool
vect_pattern_validate_optab(internal_fn ifn,slp_tree node)75 vect_pattern_validate_optab (internal_fn ifn, slp_tree node)
76 {
77   tree vectype = SLP_TREE_VECTYPE (node);
78   if (ifn == IFN_LAST || !vectype)
79     return false;
80 
81   if (dump_enabled_p ())
82     dump_printf_loc (MSG_NOTE, vect_location,
83 		     "Found %s pattern in SLP tree\n",
84 		     internal_fn_name (ifn));
85 
86   if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
87     {
88       if (dump_enabled_p ())
89 	dump_printf_loc (MSG_NOTE, vect_location,
90 			 "Target supports %s vectorization with mode %T\n",
91 			 internal_fn_name (ifn), vectype);
92     }
93   else
94     {
95       if (dump_enabled_p ())
96         {
97 	  if (!vectype)
98 	    dump_printf_loc (MSG_NOTE, vect_location,
99 			     "Target does not support vector type for %T\n",
100 			     SLP_TREE_DEF_TYPE (node));
101 	  else
102 	    dump_printf_loc (MSG_NOTE, vect_location,
103 			     "Target does not support %s for vector type "
104 			     "%T\n", internal_fn_name (ifn), vectype);
105 	}
106       return false;
107     }
108   return true;
109 }
110 
111 /*******************************************************************************
112  * General helper types
113  ******************************************************************************/
114 
115 /* The COMPLEX_OPERATION enum denotes the possible pair of operations that can
116    be matched when looking for expressions that we are interested matching for
117    complex numbers addition and mla.  */
118 
119 typedef enum _complex_operation : unsigned {
120   PLUS_PLUS,
121   MINUS_PLUS,
122   PLUS_MINUS,
123   MULT_MULT,
124   CMPLX_NONE
125 } complex_operation_t;
126 
127 /*******************************************************************************
128  * General helper functions
129  ******************************************************************************/
130 
131 /* Helper function of linear_loads_p that checks to see if the load permutation
132    is sequential and in monotonically increasing order of loads with no gaps.
133 */
134 
135 static inline complex_perm_kinds_t
is_linear_load_p(load_permutation_t loads)136 is_linear_load_p (load_permutation_t loads)
137 {
138   if (loads.length() == 0)
139     return PERM_UNKNOWN;
140 
141   unsigned load, i;
142   complex_perm_kinds_t candidates[4]
143     = { PERM_ODDODD
144       , PERM_EVENEVEN
145       , PERM_EVENODD
146       , PERM_ODDEVEN
147       };
148 
149   int valid_patterns = 4;
150   FOR_EACH_VEC_ELT (loads, i, load)
151     {
152       if (candidates[0] != PERM_UNKNOWN && load != 1)
153 	{
154 	  candidates[0] = PERM_UNKNOWN;
155 	  valid_patterns--;
156 	}
157       if (candidates[1] != PERM_UNKNOWN && load != 0)
158 	{
159 	  candidates[1] = PERM_UNKNOWN;
160 	  valid_patterns--;
161 	}
162       if (candidates[2] != PERM_UNKNOWN && load != i)
163 	{
164 	  candidates[2] = PERM_UNKNOWN;
165 	  valid_patterns--;
166 	}
167       if (candidates[3] != PERM_UNKNOWN
168 	  && load != (i % 2 == 0 ? i + 1 : i - 1))
169 	{
170 	  candidates[3] = PERM_UNKNOWN;
171 	  valid_patterns--;
172 	}
173 
174       if (valid_patterns == 0)
175 	return PERM_UNKNOWN;
176     }
177 
178   for (i = 0; i < sizeof(candidates); i++)
179     if (candidates[i] != PERM_UNKNOWN)
180       return candidates[i];
181 
182   return PERM_UNKNOWN;
183 }
184 
185 /* Combine complex_perm_kinds A and B into a new permute kind that describes the
186    resulting operation.  */
187 
188 static inline complex_perm_kinds_t
vect_merge_perms(complex_perm_kinds_t a,complex_perm_kinds_t b)189 vect_merge_perms (complex_perm_kinds_t a, complex_perm_kinds_t b)
190 {
191   if (a == b)
192     return a;
193 
194   if (a == PERM_TOP)
195     return b;
196 
197   if (b == PERM_TOP)
198     return a;
199 
200   return PERM_UNKNOWN;
201 }
202 
203 /* Check to see if all loads rooted in ROOT are linear.  Linearity is
204    defined as having no gaps between values loaded.  */
205 
206 static complex_perm_kinds_t
linear_loads_p(slp_tree_to_load_perm_map_t * perm_cache,slp_tree root)207 linear_loads_p (slp_tree_to_load_perm_map_t *perm_cache, slp_tree root)
208 {
209   if (!root)
210     return PERM_UNKNOWN;
211 
212   unsigned i;
213   complex_perm_kinds_t *tmp;
214 
215   if ((tmp = perm_cache->get (root)) != NULL)
216     return *tmp;
217 
218   complex_perm_kinds_t retval = PERM_UNKNOWN;
219   perm_cache->put (root, retval);
220 
221   /* If it's a load node, then just read the load permute.  */
222   if (SLP_TREE_LOAD_PERMUTATION (root).exists ())
223     {
224       retval = is_linear_load_p (SLP_TREE_LOAD_PERMUTATION (root));
225       perm_cache->put (root, retval);
226       return retval;
227     }
228   else if (SLP_TREE_DEF_TYPE (root) != vect_internal_def)
229     {
230       retval = PERM_TOP;
231       perm_cache->put (root, retval);
232       return retval;
233     }
234 
235   complex_perm_kinds_t kind = PERM_TOP;
236 
237   slp_tree child;
238   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (root), i, child)
239     {
240       complex_perm_kinds_t res = linear_loads_p (perm_cache, child);
241       kind = vect_merge_perms (kind, res);
242       /* Unknown and Top are not valid on blends as they produce no permute.  */
243       retval = kind;
244       if (kind == PERM_UNKNOWN || kind == PERM_TOP)
245 	return retval;
246     }
247 
248   retval = kind;
249 
250   perm_cache->put (root, retval);
251   return retval;
252 }
253 
254 
255 /* This function attempts to make a node rooted in NODE is linear.  If the node
256    if already linear than the node itself is returned in RESULT.
257 
258    If the node is not linear then a new VEC_PERM_EXPR node is created with a
259    lane permute that when applied will make the node linear.   If such a
260    permute cannot be created then FALSE is returned from the function.
261 
262    Here linearity is defined as having a sequential, monotically increasing
263    load position inside the load permute generated by the loads reachable from
264    NODE.  */
265 
266 static slp_tree
vect_build_swap_evenodd_node(slp_tree node)267 vect_build_swap_evenodd_node (slp_tree node)
268 {
269   /* Attempt to linearise the permute.  */
270   vec<std::pair<unsigned, unsigned> > zipped;
271   zipped.create (SLP_TREE_LANES (node));
272 
273   for (unsigned x = 0; x < SLP_TREE_LANES (node); x+=2)
274     {
275       zipped.quick_push (std::make_pair (0, x+1));
276       zipped.quick_push (std::make_pair (0, x));
277     }
278 
279   /* Create the new permute node and store it instead.  */
280   slp_tree vnode = vect_create_new_slp_node (1, VEC_PERM_EXPR);
281   SLP_TREE_LANE_PERMUTATION (vnode) = zipped;
282   SLP_TREE_VECTYPE (vnode) = SLP_TREE_VECTYPE (node);
283   SLP_TREE_CHILDREN (vnode).quick_push (node);
284   SLP_TREE_REF_COUNT (vnode) = 1;
285   SLP_TREE_LANES (vnode) = SLP_TREE_LANES (node);
286   SLP_TREE_REPRESENTATIVE (vnode) = SLP_TREE_REPRESENTATIVE (node);
287   SLP_TREE_REF_COUNT (node)++;
288   return vnode;
289 }
290 
291 /* Checks to see of the expression represented by NODE is a gimple assign with
292    code CODE.  */
293 
294 static inline bool
vect_match_expression_p(slp_tree node,tree_code code)295 vect_match_expression_p (slp_tree node, tree_code code)
296 {
297   if (!node
298       || !SLP_TREE_REPRESENTATIVE (node))
299     return false;
300 
301   gimple* expr = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (node));
302   if (!is_gimple_assign (expr)
303       || gimple_assign_rhs_code (expr) != code)
304     return false;
305 
306   return true;
307 }
308 
309 /* Checks to see if the expression represented by NODE is a call to the internal
310    function FN.  */
311 
312 static inline bool
vect_match_call_p(slp_tree node,internal_fn fn)313 vect_match_call_p (slp_tree node, internal_fn fn)
314 {
315   if (!node
316       || !SLP_TREE_REPRESENTATIVE (node))
317     return false;
318 
319   gimple* expr = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (node));
320   if (!expr
321       || !gimple_call_internal_p (expr, fn))
322     return false;
323 
324    return true;
325 }
326 
327 /* Check if the given lane permute in PERMUTES matches an alternating sequence
328    of {even odd even odd ...}.  This to account for unrolled loops.  Further
329    mode there resulting permute must be linear.   */
330 
331 static inline bool
vect_check_evenodd_blend(lane_permutation_t & permutes,unsigned even,unsigned odd)332 vect_check_evenodd_blend (lane_permutation_t &permutes,
333 			 unsigned even, unsigned odd)
334 {
335   if (permutes.length () == 0
336       || permutes.length () % 2 != 0)
337     return false;
338 
339   unsigned val[2] = {even, odd};
340   unsigned seed = 0;
341   for (unsigned i = 0; i < permutes.length (); i++)
342     if (permutes[i].first != val[i % 2]
343 	|| permutes[i].second != seed++)
344       return false;
345 
346   return true;
347 }
348 
349 /* This function will match the two gimple expressions representing NODE1 and
350    NODE2 in parallel and returns the pair operation that represents the two
351    expressions in the two statements.
352 
353    If match is successful then the corresponding complex_operation is
354    returned and the arguments to the two matched operations are returned in OPS.
355 
356    If TWO_OPERANDS it is expected that the LANES of the parent VEC_PERM select
357    from the two nodes alternatingly.
358 
359    If unsuccessful then CMPLX_NONE is returned and OPS is untouched.
360 
361    e.g. the following gimple statements
362 
363    stmt 0 _39 = _37 + _12;
364    stmt 1 _6 = _38 - _36;
365 
366    will return PLUS_MINUS along with OPS containing {_37, _12, _38, _36}.
367 */
368 
369 static complex_operation_t
370 vect_detect_pair_op (slp_tree node1, slp_tree node2, lane_permutation_t &lanes,
371 		     bool two_operands = true, vec<slp_tree> *ops = NULL)
372 {
373   complex_operation_t result = CMPLX_NONE;
374 
375   if (vect_match_expression_p (node1, MINUS_EXPR)
376       && vect_match_expression_p (node2, PLUS_EXPR)
377       && (!two_operands || vect_check_evenodd_blend (lanes, 0, 1)))
378     result = MINUS_PLUS;
379   else if (vect_match_expression_p (node1, PLUS_EXPR)
380 	   && vect_match_expression_p (node2, MINUS_EXPR)
381 	   && (!two_operands || vect_check_evenodd_blend (lanes, 0, 1)))
382     result = PLUS_MINUS;
383   else if (vect_match_expression_p (node1, PLUS_EXPR)
384 	   && vect_match_expression_p (node2, PLUS_EXPR))
385     result = PLUS_PLUS;
386   else if (vect_match_expression_p (node1, MULT_EXPR)
387 	   && vect_match_expression_p (node2, MULT_EXPR))
388     result = MULT_MULT;
389 
390   if (result != CMPLX_NONE && ops != NULL)
391     {
392       ops->safe_push (node1);
393       ops->safe_push (node2);
394     }
395   return result;
396 }
397 
398 /* Overload of vect_detect_pair_op that matches against the representative
399    statements in the children of NODE.  It is expected that NODE has exactly
400    two children and when TWO_OPERANDS then NODE must be a VEC_PERM.  */
401 
402 static complex_operation_t
403 vect_detect_pair_op (slp_tree node, bool two_operands = true,
404 		     vec<slp_tree> *ops = NULL)
405 {
406   if (!two_operands && SLP_TREE_CODE (node) == VEC_PERM_EXPR)
407     return CMPLX_NONE;
408 
409   if (SLP_TREE_CHILDREN (node).length () != 2)
410     return CMPLX_NONE;
411 
412   vec<slp_tree> children = SLP_TREE_CHILDREN (node);
413   lane_permutation_t &lanes = SLP_TREE_LANE_PERMUTATION (node);
414 
415   return vect_detect_pair_op (children[0], children[1], lanes, two_operands,
416 			      ops);
417 }
418 
419 /*******************************************************************************
420  * complex_pattern class
421  ******************************************************************************/
422 
423 /* SLP Complex Numbers pattern matching.
424 
425   As an example, the following simple loop:
426 
427     double a[restrict N]; double b[restrict N]; double c[restrict N];
428 
429     for (int i=0; i < N; i+=2)
430     {
431       c[i] = a[i] - b[i+1];
432       c[i+1] = a[i+1] + b[i];
433     }
434 
435   which represents a complex addition on with a rotation of 90* around the
436   argand plane. i.e. if `a` and `b` were complex numbers then this would be the
437   same as `a + (b * I)`.
438 
439   Here the expressions for `c[i]` and `c[i+1]` are independent but have to be
440   both recognized in order for the pattern to work.  As an SLP tree this is
441   represented as
442 
443                 +--------------------------------+
444                 |       stmt 0 *_9 = _10;        |
445                 |       stmt 1 *_15 = _16;       |
446                 +--------------------------------+
447                                 |
448                                 |
449                                 v
450                 +--------------------------------+
451                 |     stmt 0 _10 = _4 - _8;      |
452                 |    stmt 1 _16 = _12 + _14;     |
453                 | lane permutation { 0[0] 1[1] } |
454                 +--------------------------------+
455                             |        |
456                             |        |
457                             |        |
458                +-----+      |        |      +-----+
459                |     |      |        |      |     |
460          +-----| { } |<-----+        +----->| { } --------+
461          |     |     |   +------------------|     |       |
462          |     +-----+   |                  +-----+       |
463          |        |      |                                |
464          |        |      |                                |
465          |        +------|------------------+             |
466          |               |                  |             |
467          v               v                  v             v
468      +--------------------------+     +--------------------------------+
469      |     stmt 0 _8 = *_7;     |     |        stmt 0 _4 = *_3;        |
470      |    stmt 1 _14 = *_13;    |     |       stmt 1 _12 = *_11;       |
471      | load permutation { 1 0 } |     |    load permutation { 0 1 }    |
472      +--------------------------+     +--------------------------------+
473 
474   The pattern matcher allows you to replace both statements 0 and 1 or none at
475   all.  Because this operation is a two operands operation the actual nodes
476   being replaced are those in the { } nodes.  The actual scalar statements
477   themselves are not replaced or used during the matching but instead the
478   SLP_TREE_REPRESENTATIVE statements are inspected.  You are also allowed to
479   replace and match on any number of nodes.
480 
481   Because the pattern matcher matches on the representative statement for the
482   SLP node the case of two_operators it allows you to match the children of the
483   node.  This is done using the method `recognize ()`.
484 
485 */
486 
487 /* The complex_pattern class contains common code for pattern matchers that work
488    on complex numbers.  These provide functionality to allow de-construction and
489    validation of sequences depicting/transforming REAL and IMAG pairs.  */
490 
491 class complex_pattern : public vect_pattern
492 {
493   protected:
494     auto_vec<slp_tree> m_workset;
complex_pattern(slp_tree * node,vec<slp_tree> * m_ops,internal_fn ifn)495     complex_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
496       : vect_pattern (node, m_ops, ifn)
497     {
498       this->m_workset.safe_push (*node);
499     }
500 
501   public:
502     void build (vec_info *);
503 
504     static internal_fn
505     matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *,
506 	     vec<slp_tree> *);
507 };
508 
509 /* Create a replacement pattern statement for each node in m_node and inserts
510    the new statement into m_node as the new representative statement.  The old
511    statement is marked as being in a pattern defined by the new statement.  The
512    statement is created as call to internal function IFN with m_num_args
513    arguments.
514 
515    Futhermore the new pattern is also added to the vectorization information
516    structure VINFO and the old statement STMT_INFO is marked as unused while
517    the new statement is marked as used and the number of SLP uses of the new
518    statement is incremented.
519 
520    The newly created SLP nodes are marked as SLP only and will be dissolved
521    if SLP is aborted.
522 
523    The newly created gimple call is returned and the BB remains unchanged.
524 
525    This default method is designed to only match against simple operands where
526    all the input and output types are the same.
527 */
528 
529 void
build(vec_info * vinfo)530 complex_pattern::build (vec_info *vinfo)
531 {
532   stmt_vec_info stmt_info;
533 
534   auto_vec<tree> args;
535   args.create (this->m_num_args);
536   args.quick_grow_cleared (this->m_num_args);
537   slp_tree node;
538   unsigned ix;
539   stmt_vec_info call_stmt_info;
540   gcall *call_stmt = NULL;
541 
542   /* Now modify the nodes themselves.  */
543   FOR_EACH_VEC_ELT (this->m_workset, ix, node)
544     {
545       /* Calculate the location of the statement in NODE to replace.  */
546       stmt_info = SLP_TREE_REPRESENTATIVE (node);
547       stmt_vec_info reduc_def
548 	= STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info));
549       gimple* old_stmt = STMT_VINFO_STMT (stmt_info);
550       tree lhs_old_stmt = gimple_get_lhs (old_stmt);
551       tree type = TREE_TYPE (lhs_old_stmt);
552 
553       /* Create the argument set for use by gimple_build_call_internal_vec.  */
554       for (unsigned i = 0; i < this->m_num_args; i++)
555 	args[i] = lhs_old_stmt;
556 
557       /* Create the new pattern statements.  */
558       call_stmt = gimple_build_call_internal_vec (this->m_ifn, args);
559       tree var = make_temp_ssa_name (type, call_stmt, "slp_patt");
560       gimple_call_set_lhs (call_stmt, var);
561       gimple_set_location (call_stmt, gimple_location (old_stmt));
562       gimple_call_set_nothrow (call_stmt, true);
563 
564       /* Adjust the book-keeping for the new and old statements for use during
565 	 SLP.  This is required to get the right VF and statement during SLP
566 	 analysis.  These changes are created after relevancy has been set for
567 	 the nodes as such we need to manually update them.  Any changes will be
568 	 undone if SLP is cancelled.  */
569       call_stmt_info
570 	= vinfo->add_pattern_stmt (call_stmt, stmt_info);
571 
572       /* Make sure to mark the representative statement pure_slp and
573 	 relevant and transfer reduction info. */
574       STMT_VINFO_RELEVANT (call_stmt_info) = vect_used_in_scope;
575       STMT_SLP_TYPE (call_stmt_info) = pure_slp;
576       STMT_VINFO_REDUC_DEF (call_stmt_info) = reduc_def;
577 
578       gimple_set_bb (call_stmt, gimple_bb (stmt_info->stmt));
579       STMT_VINFO_VECTYPE (call_stmt_info) = SLP_TREE_VECTYPE (node);
580       STMT_VINFO_SLP_VECT_ONLY_PATTERN (call_stmt_info) = true;
581 
582       /* Since we are replacing all the statements in the group with the same
583 	 thing it doesn't really matter.  So just set it every time a new stmt
584 	 is created.  */
585       SLP_TREE_REPRESENTATIVE (node) = call_stmt_info;
586       SLP_TREE_LANE_PERMUTATION (node).release ();
587       SLP_TREE_CODE (node) = CALL_EXPR;
588     }
589 }
590 
591 /*******************************************************************************
592  * complex_add_pattern class
593  ******************************************************************************/
594 
595 class complex_add_pattern : public complex_pattern
596 {
597   protected:
complex_add_pattern(slp_tree * node,vec<slp_tree> * m_ops,internal_fn ifn)598     complex_add_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
599       : complex_pattern (node, m_ops, ifn)
600     {
601       this->m_num_args = 2;
602     }
603 
604   public:
605     void build (vec_info *);
606     static internal_fn
607     matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *,
608 	     vec<slp_tree> *);
609 
610     static vect_pattern*
611     recognize (slp_tree_to_load_perm_map_t *, slp_tree *);
612 
613     static vect_pattern*
mkInstance(slp_tree * node,vec<slp_tree> * m_ops,internal_fn ifn)614     mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
615     {
616       return new complex_add_pattern (node, m_ops, ifn);
617     }
618 };
619 
620 /* Perform a replacement of the detected complex add pattern with the new
621    instruction sequences.  */
622 
623 void
build(vec_info * vinfo)624 complex_add_pattern::build (vec_info *vinfo)
625 {
626   SLP_TREE_CHILDREN (*this->m_node).reserve_exact (2);
627 
628   slp_tree node = this->m_ops[0];
629   vec<slp_tree> children = SLP_TREE_CHILDREN (node);
630 
631   /* First re-arrange the children.  */
632   SLP_TREE_CHILDREN (*this->m_node)[0] = children[0];
633   SLP_TREE_CHILDREN (*this->m_node)[1] =
634     vect_build_swap_evenodd_node (children[1]);
635 
636   SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (*this->m_node)[0])++;
637   SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (*this->m_node)[1])++;
638   vect_free_slp_tree (this->m_ops[0]);
639   vect_free_slp_tree (this->m_ops[1]);
640 
641   complex_pattern::build (vinfo);
642 }
643 
644 /* Pattern matcher for trying to match complex addition pattern in SLP tree.
645 
646    If no match is found then IFN is set to IFN_LAST.
647    This function matches the patterns shaped as:
648 
649    c[i] = a[i] - b[i+1];
650    c[i+1] = a[i+1] + b[i];
651 
652    If a match occurred then TRUE is returned, else FALSE.  The initial match is
653    expected to be in OP1 and the initial match operands in args0.  */
654 
655 internal_fn
matches(complex_operation_t op,slp_tree_to_load_perm_map_t * perm_cache,slp_tree * node,vec<slp_tree> * ops)656 complex_add_pattern::matches (complex_operation_t op,
657 			      slp_tree_to_load_perm_map_t *perm_cache,
658 			      slp_tree *node, vec<slp_tree> *ops)
659 {
660   internal_fn ifn = IFN_LAST;
661 
662   /* Find the two components.  Rotation in the complex plane will modify
663      the operations:
664 
665       * Rotation  0: + +
666       * Rotation 90: - +
667       * Rotation 180: - -
668       * Rotation 270: + -
669 
670       Rotation 0 and 180 can be handled by normal SIMD code, so we don't need
671       to care about them here.  */
672   if (op == MINUS_PLUS)
673     ifn = IFN_COMPLEX_ADD_ROT90;
674   else if (op == PLUS_MINUS)
675     ifn = IFN_COMPLEX_ADD_ROT270;
676   else
677     return ifn;
678 
679   /* verify that there is a permute, otherwise this isn't a pattern we
680      we support.  */
681   gcc_assert (ops->length () == 2);
682 
683   vec<slp_tree> children = SLP_TREE_CHILDREN ((*ops)[0]);
684 
685   /* First node must be unpermuted.  */
686   if (linear_loads_p (perm_cache, children[0]) != PERM_EVENODD)
687     return IFN_LAST;
688 
689   /* Second node must be permuted.  */
690   if (linear_loads_p (perm_cache, children[1]) != PERM_ODDEVEN)
691     return IFN_LAST;
692 
693   if (!vect_pattern_validate_optab (ifn, *node))
694     return IFN_LAST;
695 
696   return ifn;
697 }
698 
699 /* Attempt to recognize a complex add pattern.  */
700 
701 vect_pattern*
recognize(slp_tree_to_load_perm_map_t * perm_cache,slp_tree * node)702 complex_add_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
703 				slp_tree *node)
704 {
705   auto_vec<slp_tree> ops;
706   complex_operation_t op
707     = vect_detect_pair_op (*node, true, &ops);
708   internal_fn ifn
709     = complex_add_pattern::matches (op, perm_cache, node, &ops);
710   if (ifn == IFN_LAST)
711     return NULL;
712 
713   return new complex_add_pattern (node, &ops, ifn);
714 }
715 
716 /*******************************************************************************
717  * complex_mul_pattern
718  ******************************************************************************/
719 
720 /* Helper function of that looks for a match in the CHILDth child of NODE.  The
721    child used is stored in RES.
722 
723    If the match is successful then ARGS will contain the operands matched
724    and the complex_operation_t type is returned.  If match is not successful
725    then CMPLX_NONE is returned and ARGS is left unmodified.  */
726 
727 static inline complex_operation_t
728 vect_match_call_complex_mla (slp_tree node, unsigned child,
729 			     vec<slp_tree> *args = NULL, slp_tree *res = NULL)
730 {
731   gcc_assert (child < SLP_TREE_CHILDREN (node).length ());
732 
733   slp_tree data = SLP_TREE_CHILDREN (node)[child];
734 
735   if (res)
736     *res = data;
737 
738   return vect_detect_pair_op (data, false, args);
739 }
740 
741 /* Check to see if either of the trees in ARGS are a NEGATE_EXPR.  If the first
742    child (args[0]) is a NEGATE_EXPR then NEG_FIRST_P is set to TRUE.
743 
744    If a negate is found then the values in ARGS are reordered such that the
745    negate node is always the second one and the entry is replaced by the child
746    of the negate node.  */
747 
748 static inline bool
749 vect_normalize_conj_loc (vec<slp_tree> args, bool *neg_first_p = NULL)
750 {
751   gcc_assert (args.length () == 2);
752   bool neg_found = false;
753 
754   if (vect_match_expression_p (args[0], NEGATE_EXPR))
755     {
756       std::swap (args[0], args[1]);
757       neg_found = true;
758       if (neg_first_p)
759 	*neg_first_p = true;
760     }
761   else if (vect_match_expression_p (args[1], NEGATE_EXPR))
762     {
763       neg_found = true;
764       if (neg_first_p)
765 	*neg_first_p = false;
766     }
767 
768   if (neg_found)
769     args[1] = SLP_TREE_CHILDREN (args[1])[0];
770 
771   return neg_found;
772 }
773 
774 /* Helper function to check if PERM is KIND or PERM_TOP.  */
775 
776 static inline bool
is_eq_or_top(complex_perm_kinds_t perm,complex_perm_kinds_t kind)777 is_eq_or_top (complex_perm_kinds_t perm, complex_perm_kinds_t kind)
778 {
779   return perm == kind || perm == PERM_TOP;
780 }
781 
782 /* Helper function that checks to see if LEFT_OP and RIGHT_OP are both MULT_EXPR
783    nodes but also that they represent an operation that is either a complex
784    multiplication or a complex multiplication by conjugated value.
785 
786    Of the negation is expected to be in the first half of the tree (As required
787    by an FMS pattern) then NEG_FIRST is true.  If the operation is a conjugate
788    operation then CONJ_FIRST_OPERAND is set to indicate whether the first or
789    second operand contains the conjugate operation.  */
790 
791 static inline bool
vect_validate_multiplication(slp_tree_to_load_perm_map_t * perm_cache,vec<slp_tree> left_op,vec<slp_tree> right_op,bool neg_first,bool * conj_first_operand,bool fms)792 vect_validate_multiplication (slp_tree_to_load_perm_map_t *perm_cache,
793 			     vec<slp_tree> left_op, vec<slp_tree> right_op,
794 			     bool neg_first, bool *conj_first_operand,
795 			     bool fms)
796 {
797   /* The presence of a negation indicates that we have either a conjugate or a
798      rotation.  We need to distinguish which one.  */
799   *conj_first_operand = false;
800   complex_perm_kinds_t kind;
801 
802   /* Complex conjugates have the negation on the imaginary part of the
803      number where rotations affect the real component.  So check if the
804      negation is on a dup of lane 1.  */
805   if (fms)
806     {
807       /* Canonicalization for fms is not consistent. So have to test both
808 	 variants to be sure.  This needs to be fixed in the mid-end so
809 	 this part can be simpler.  */
810       kind = linear_loads_p (perm_cache, right_op[0]);
811       if (!((is_eq_or_top (linear_loads_p (perm_cache, right_op[0]), PERM_ODDODD)
812 	   && is_eq_or_top (linear_loads_p (perm_cache, right_op[1]),
813 			     PERM_ODDEVEN))
814 	  || (kind == PERM_ODDEVEN
815 	      && is_eq_or_top (linear_loads_p (perm_cache, right_op[1]),
816 			     PERM_ODDODD))))
817 	return false;
818     }
819   else
820     {
821       if (linear_loads_p (perm_cache, right_op[1]) != PERM_ODDODD
822 	  && !is_eq_or_top (linear_loads_p (perm_cache, right_op[0]),
823 			    PERM_ODDEVEN))
824 	return false;
825     }
826 
827   /* Deal with differences in indexes.  */
828   int index1 = fms ? 1 : 0;
829   int index2 = fms ? 0 : 1;
830 
831   /* Check if the conjugate is on the second first or second operand.  The
832      order of the node with the conjugate value determines this, and the dup
833      node must be one of lane 0 of the same DR as the neg node.  */
834   kind = linear_loads_p (perm_cache, left_op[index1]);
835   if (kind == PERM_TOP)
836     {
837       if (linear_loads_p (perm_cache, left_op[index2]) == PERM_EVENODD)
838 	return true;
839     }
840   else if (kind == PERM_EVENODD)
841     {
842       if ((kind = linear_loads_p (perm_cache, left_op[index2])) == PERM_EVENODD)
843 	return false;
844       return true;
845     }
846   else if (!neg_first)
847     *conj_first_operand = true;
848   else
849     return false;
850 
851   if (kind != PERM_EVENEVEN)
852     return false;
853 
854   return true;
855 }
856 
857 /* Helper function to help distinguish between a conjugate and a rotation in a
858    complex multiplication.  The operations have similar shapes but the order of
859    the load permutes are different.  This function returns TRUE when the order
860    is consistent with a multiplication or multiplication by conjugated
861    operand but returns FALSE if it's a multiplication by rotated operand.  */
862 
863 static inline bool
vect_validate_multiplication(slp_tree_to_load_perm_map_t * perm_cache,vec<slp_tree> op,complex_perm_kinds_t permKind)864 vect_validate_multiplication (slp_tree_to_load_perm_map_t *perm_cache,
865 			     vec<slp_tree> op, complex_perm_kinds_t permKind)
866 {
867   /* The left node is the more common case, test it first.  */
868   if (!is_eq_or_top (linear_loads_p (perm_cache, op[0]), permKind))
869     {
870       if (!is_eq_or_top (linear_loads_p (perm_cache, op[1]), permKind))
871 	return false;
872     }
873   return true;
874 }
875 
876 /* This function combines two nodes containing only even and only odd lanes
877    together into a single node which contains the nodes in even/odd order
878    by using a lane permute.
879 
880    The lanes in EVEN and ODD are duplicated 2 times inside the vectors.
881    So for a lanes = 4 EVEN contains {EVEN1, EVEN1, EVEN2, EVEN2}.
882 
883    The tree REPRESENTATION is taken from the supplied REP along with the
884    vectype which must be the same between all three nodes.
885 */
886 
887 static slp_tree
vect_build_combine_node(slp_tree even,slp_tree odd,slp_tree rep)888 vect_build_combine_node (slp_tree even, slp_tree odd, slp_tree rep)
889 {
890   vec<std::pair<unsigned, unsigned> > perm;
891   perm.create (SLP_TREE_LANES (rep));
892 
893   for (unsigned x = 0; x < SLP_TREE_LANES (rep); x+=2)
894     {
895       perm.quick_push (std::make_pair (0, x));
896       perm.quick_push (std::make_pair (1, x+1));
897     }
898 
899   slp_tree vnode = vect_create_new_slp_node (2, SLP_TREE_CODE (even));
900   SLP_TREE_CODE (vnode) = VEC_PERM_EXPR;
901   SLP_TREE_LANE_PERMUTATION (vnode) = perm;
902 
903   SLP_TREE_CHILDREN (vnode).create (2);
904   SLP_TREE_CHILDREN (vnode).quick_push (even);
905   SLP_TREE_CHILDREN (vnode).quick_push (odd);
906   SLP_TREE_REF_COUNT (even)++;
907   SLP_TREE_REF_COUNT (odd)++;
908   SLP_TREE_REF_COUNT (vnode) = 1;
909 
910   SLP_TREE_LANES (vnode) = SLP_TREE_LANES (rep);
911   gcc_assert (perm.length () == SLP_TREE_LANES (vnode));
912   /* Representation is set to that of the current node as the vectorizer
913      can't deal with VEC_PERMs with no representation, as would be the
914      case with invariants.  */
915   SLP_TREE_REPRESENTATIVE (vnode) = SLP_TREE_REPRESENTATIVE (rep);
916   SLP_TREE_VECTYPE (vnode) = SLP_TREE_VECTYPE (rep);
917   return vnode;
918 }
919 
920 class complex_mul_pattern : public complex_pattern
921 {
922   protected:
complex_mul_pattern(slp_tree * node,vec<slp_tree> * m_ops,internal_fn ifn)923     complex_mul_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
924       : complex_pattern (node, m_ops, ifn)
925     {
926       this->m_num_args = 2;
927     }
928 
929   public:
930     void build (vec_info *);
931     static internal_fn
932     matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *,
933 	     vec<slp_tree> *);
934 
935     static vect_pattern*
936     recognize (slp_tree_to_load_perm_map_t *, slp_tree *);
937 
938     static vect_pattern*
mkInstance(slp_tree * node,vec<slp_tree> * m_ops,internal_fn ifn)939     mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
940     {
941       return new complex_mul_pattern (node, m_ops, ifn);
942     }
943 
944 };
945 
946 /* Pattern matcher for trying to match complex multiply pattern in SLP tree
947    If the operation matches then IFN is set to the operation it matched
948    and the arguments to the two replacement statements are put in m_ops.
949 
950    If no match is found then IFN is set to IFN_LAST and m_ops is unchanged.
951 
952    This function matches the patterns shaped as:
953 
954    double ax = (b[i+1] * a[i]);
955    double bx = (a[i+1] * b[i]);
956 
957    c[i] = c[i] - ax;
958    c[i+1] = c[i+1] + bx;
959 
960    If a match occurred then TRUE is returned, else FALSE.  The initial match is
961    expected to be in OP1 and the initial match operands in args0.  */
962 
963 internal_fn
matches(complex_operation_t op,slp_tree_to_load_perm_map_t * perm_cache,slp_tree * node,vec<slp_tree> * ops)964 complex_mul_pattern::matches (complex_operation_t op,
965 			      slp_tree_to_load_perm_map_t *perm_cache,
966 			      slp_tree *node, vec<slp_tree> *ops)
967 {
968   internal_fn ifn = IFN_LAST;
969 
970   if (op != MINUS_PLUS)
971     return IFN_LAST;
972 
973   slp_tree root = *node;
974   /* First two nodes must be a multiply.  */
975   auto_vec<slp_tree> muls;
976   if (vect_match_call_complex_mla (root, 0) != MULT_MULT
977       || vect_match_call_complex_mla (root, 1, &muls) != MULT_MULT)
978     return IFN_LAST;
979 
980   /* Now operand2+4 may lead to another expression.  */
981   auto_vec<slp_tree> left_op, right_op;
982   left_op.safe_splice (SLP_TREE_CHILDREN (muls[0]));
983   right_op.safe_splice (SLP_TREE_CHILDREN (muls[1]));
984 
985   if (linear_loads_p (perm_cache, left_op[1]) == PERM_ODDEVEN)
986     return IFN_LAST;
987 
988   bool neg_first = false;
989   bool conj_first_operand = false;
990   bool is_neg = vect_normalize_conj_loc (right_op, &neg_first);
991 
992   if (!is_neg)
993     {
994       /* A multiplication needs to multiply agains the real pair, otherwise
995 	 the pattern matches that of FMS.   */
996       if (!vect_validate_multiplication (perm_cache, left_op, PERM_EVENEVEN)
997 	  || vect_normalize_conj_loc (left_op))
998 	return IFN_LAST;
999       ifn = IFN_COMPLEX_MUL;
1000     }
1001   else if (is_neg)
1002     {
1003       if (!vect_validate_multiplication (perm_cache, left_op, right_op,
1004 					 neg_first, &conj_first_operand,
1005 					 false))
1006 	return IFN_LAST;
1007 
1008       ifn = IFN_COMPLEX_MUL_CONJ;
1009     }
1010 
1011   if (!vect_pattern_validate_optab (ifn, *node))
1012     return IFN_LAST;
1013 
1014   ops->truncate (0);
1015   ops->create (3);
1016 
1017   complex_perm_kinds_t kind = linear_loads_p (perm_cache, left_op[0]);
1018   if (kind == PERM_EVENODD)
1019     {
1020       ops->quick_push (left_op[1]);
1021       ops->quick_push (right_op[1]);
1022       ops->quick_push (left_op[0]);
1023     }
1024   else if (kind == PERM_TOP)
1025     {
1026       ops->quick_push (left_op[1]);
1027       ops->quick_push (right_op[1]);
1028       ops->quick_push (left_op[0]);
1029     }
1030   else if (kind == PERM_EVENEVEN && !conj_first_operand)
1031     {
1032       ops->quick_push (left_op[0]);
1033       ops->quick_push (right_op[0]);
1034       ops->quick_push (left_op[1]);
1035     }
1036   else
1037     {
1038       ops->quick_push (left_op[0]);
1039       ops->quick_push (right_op[1]);
1040       ops->quick_push (left_op[1]);
1041     }
1042 
1043   return ifn;
1044 }
1045 
1046 /* Attempt to recognize a complex mul pattern.  */
1047 
1048 vect_pattern*
recognize(slp_tree_to_load_perm_map_t * perm_cache,slp_tree * node)1049 complex_mul_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
1050 				slp_tree *node)
1051 {
1052   auto_vec<slp_tree> ops;
1053   complex_operation_t op
1054     = vect_detect_pair_op (*node, true, &ops);
1055   internal_fn ifn
1056     = complex_mul_pattern::matches (op, perm_cache, node, &ops);
1057   if (ifn == IFN_LAST)
1058     return NULL;
1059 
1060   return new complex_mul_pattern (node, &ops, ifn);
1061 }
1062 
1063 /* Perform a replacement of the detected complex mul pattern with the new
1064    instruction sequences.  */
1065 
1066 void
build(vec_info * vinfo)1067 complex_mul_pattern::build (vec_info *vinfo)
1068 {
1069   slp_tree node;
1070   unsigned i;
1071   slp_tree newnode
1072     = vect_build_combine_node (this->m_ops[0], this->m_ops[1], *this->m_node);
1073   SLP_TREE_REF_COUNT (this->m_ops[2])++;
1074 
1075   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node)
1076     vect_free_slp_tree (node);
1077 
1078   /* First re-arrange the children.  */
1079   SLP_TREE_CHILDREN (*this->m_node).reserve_exact (2);
1080   SLP_TREE_CHILDREN (*this->m_node)[0] = this->m_ops[2];
1081   SLP_TREE_CHILDREN (*this->m_node)[1] = newnode;
1082 
1083   /* And then rewrite the node itself.  */
1084   complex_pattern::build (vinfo);
1085 }
1086 
1087 /*******************************************************************************
1088  * complex_fma_pattern class
1089  ******************************************************************************/
1090 
1091 class complex_fma_pattern : public complex_pattern
1092 {
1093   protected:
complex_fma_pattern(slp_tree * node,vec<slp_tree> * m_ops,internal_fn ifn)1094     complex_fma_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
1095       : complex_pattern (node, m_ops, ifn)
1096     {
1097       this->m_num_args = 3;
1098     }
1099 
1100   public:
1101     void build (vec_info *);
1102     static internal_fn
1103     matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *,
1104 	     vec<slp_tree> *);
1105 
1106     static vect_pattern*
1107     recognize (slp_tree_to_load_perm_map_t *, slp_tree *);
1108 
1109     static vect_pattern*
mkInstance(slp_tree * node,vec<slp_tree> * m_ops,internal_fn ifn)1110     mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
1111     {
1112       return new complex_fma_pattern (node, m_ops, ifn);
1113     }
1114 };
1115 
1116 /* Pattern matcher for trying to match complex multiply and accumulate
1117    and multiply and subtract patterns in SLP tree.
1118    If the operation matches then IFN is set to the operation it matched and
1119    the arguments to the two replacement statements are put in m_ops.
1120 
1121    If no match is found then IFN is set to IFN_LAST and m_ops is unchanged.
1122 
1123    This function matches the patterns shaped as:
1124 
1125    double ax = (b[i+1] * a[i]) + (b[i] * a[i]);
1126    double bx = (a[i+1] * b[i]) - (a[i+1] * b[i+1]);
1127 
1128    c[i] = c[i] - ax;
1129    c[i+1] = c[i+1] + bx;
1130 
1131    If a match occurred then TRUE is returned, else FALSE.  The match is
1132    performed after COMPLEX_MUL which would have done the majority of the work.
1133    This function merely matches an ADD with a COMPLEX_MUL IFN.  The initial
1134    match is expected to be in OP1 and the initial match operands in args0.  */
1135 
1136 internal_fn
matches(complex_operation_t op,slp_tree_to_load_perm_map_t *,slp_tree * ref_node,vec<slp_tree> * ops)1137 complex_fma_pattern::matches (complex_operation_t op,
1138 			      slp_tree_to_load_perm_map_t * /* perm_cache */,
1139 			      slp_tree *ref_node, vec<slp_tree> *ops)
1140 {
1141   internal_fn ifn = IFN_LAST;
1142 
1143   /* Find the two components.  We match Complex MUL first which reduces the
1144      amount of work this pattern has to do.  After that we just match the
1145      head node and we're done.:
1146 
1147      * FMA: + +.
1148 
1149      We need to ignore the two_operands nodes that may also match.
1150      For that we can check if they have any scalar statements and also
1151      check that it's not a permute node as we're looking for a normal
1152      PLUS_EXPR operation.  */
1153   if (op != CMPLX_NONE)
1154     return IFN_LAST;
1155 
1156   /* Find the two components.  We match Complex MUL first which reduces the
1157      amount of work this pattern has to do.  After that we just match the
1158      head node and we're done.:
1159 
1160    * FMA: + + on a non-two_operands node.  */
1161   slp_tree vnode = *ref_node;
1162   if (SLP_TREE_LANE_PERMUTATION (vnode).exists ()
1163       || !SLP_TREE_CHILDREN (vnode).exists ()
1164       || !vect_match_expression_p (vnode, PLUS_EXPR))
1165     return IFN_LAST;
1166 
1167   slp_tree node = SLP_TREE_CHILDREN (vnode)[1];
1168 
1169   if (vect_match_call_p (node, IFN_COMPLEX_MUL))
1170     ifn = IFN_COMPLEX_FMA;
1171   else if (vect_match_call_p (node, IFN_COMPLEX_MUL_CONJ))
1172     ifn = IFN_COMPLEX_FMA_CONJ;
1173   else
1174     return IFN_LAST;
1175 
1176   if (!vect_pattern_validate_optab (ifn, vnode))
1177     return IFN_LAST;
1178 
1179   ops->truncate (0);
1180   ops->create (3);
1181 
1182   if (ifn == IFN_COMPLEX_FMA)
1183     {
1184       ops->quick_push (SLP_TREE_CHILDREN (vnode)[0]);
1185       ops->quick_push (SLP_TREE_CHILDREN (node)[1]);
1186       ops->quick_push (SLP_TREE_CHILDREN (node)[0]);
1187     }
1188   else
1189     {
1190       ops->quick_push (SLP_TREE_CHILDREN (vnode)[0]);
1191       ops->quick_push (SLP_TREE_CHILDREN (node)[0]);
1192       ops->quick_push (SLP_TREE_CHILDREN (node)[1]);
1193     }
1194 
1195   return ifn;
1196 }
1197 
1198 /* Attempt to recognize a complex mul pattern.  */
1199 
1200 vect_pattern*
recognize(slp_tree_to_load_perm_map_t * perm_cache,slp_tree * node)1201 complex_fma_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
1202 				slp_tree *node)
1203 {
1204   auto_vec<slp_tree> ops;
1205   complex_operation_t op
1206     = vect_detect_pair_op (*node, true, &ops);
1207   internal_fn ifn
1208     = complex_fma_pattern::matches (op, perm_cache, node, &ops);
1209   if (ifn == IFN_LAST)
1210     return NULL;
1211 
1212   return new complex_fma_pattern (node, &ops, ifn);
1213 }
1214 
1215 /* Perform a replacement of the detected complex mul pattern with the new
1216    instruction sequences.  */
1217 
1218 void
build(vec_info * vinfo)1219 complex_fma_pattern::build (vec_info *vinfo)
1220 {
1221   slp_tree node = SLP_TREE_CHILDREN (*this->m_node)[1];
1222 
1223   SLP_TREE_CHILDREN (*this->m_node).release ();
1224   SLP_TREE_CHILDREN (*this->m_node).create (3);
1225   SLP_TREE_CHILDREN (*this->m_node).safe_splice (this->m_ops);
1226 
1227   SLP_TREE_REF_COUNT (this->m_ops[1])++;
1228   SLP_TREE_REF_COUNT (this->m_ops[2])++;
1229 
1230   vect_free_slp_tree (node);
1231 
1232   complex_pattern::build (vinfo);
1233 }
1234 
1235 /*******************************************************************************
1236  * complex_fms_pattern class
1237  ******************************************************************************/
1238 
1239 class complex_fms_pattern : public complex_pattern
1240 {
1241   protected:
complex_fms_pattern(slp_tree * node,vec<slp_tree> * m_ops,internal_fn ifn)1242     complex_fms_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
1243       : complex_pattern (node, m_ops, ifn)
1244     {
1245       this->m_num_args = 3;
1246     }
1247 
1248   public:
1249     void build (vec_info *);
1250     static internal_fn
1251     matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *,
1252 	     vec<slp_tree> *);
1253 
1254     static vect_pattern*
1255     recognize (slp_tree_to_load_perm_map_t *, slp_tree *);
1256 
1257     static vect_pattern*
mkInstance(slp_tree * node,vec<slp_tree> * m_ops,internal_fn ifn)1258     mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
1259     {
1260       return new complex_fms_pattern (node, m_ops, ifn);
1261     }
1262 };
1263 
1264 
1265 /* Pattern matcher for trying to match complex multiply and accumulate
1266    and multiply and subtract patterns in SLP tree.
1267    If the operation matches then IFN is set to the operation it matched and
1268    the arguments to the two replacement statements are put in m_ops.
1269 
1270    If no match is found then IFN is set to IFN_LAST and m_ops is unchanged.
1271 
1272    This function matches the patterns shaped as:
1273 
1274    double ax = (b[i+1] * a[i]) + (b[i] * a[i]);
1275    double bx = (a[i+1] * b[i]) - (a[i+1] * b[i+1]);
1276 
1277    c[i] = c[i] - ax;
1278    c[i+1] = c[i+1] + bx;
1279 
1280    If a match occurred then TRUE is returned, else FALSE.  The initial match is
1281    expected to be in OP1 and the initial match operands in args0.  */
1282 
1283 internal_fn
matches(complex_operation_t op,slp_tree_to_load_perm_map_t * perm_cache,slp_tree * ref_node,vec<slp_tree> * ops)1284 complex_fms_pattern::matches (complex_operation_t op,
1285 			      slp_tree_to_load_perm_map_t *perm_cache,
1286 			      slp_tree * ref_node, vec<slp_tree> *ops)
1287 {
1288   internal_fn ifn = IFN_LAST;
1289 
1290   /* Find the two components.  We match Complex MUL first which reduces the
1291      amount of work this pattern has to do.  After that we just match the
1292      head node and we're done.:
1293 
1294      * FMS: - +.  */
1295   slp_tree child = NULL;
1296 
1297   /* We need to ignore the two_operands nodes that may also match,
1298      for that we can check if they have any scalar statements and also
1299      check that it's not a permute node as we're looking for a normal
1300      PLUS_EXPR operation.  */
1301   if (op != PLUS_MINUS)
1302     return IFN_LAST;
1303 
1304   child = SLP_TREE_CHILDREN ((*ops)[1])[1];
1305   if (vect_detect_pair_op (child) != MINUS_PLUS)
1306     return IFN_LAST;
1307 
1308   /* First two nodes must be a multiply.  */
1309   auto_vec<slp_tree> muls;
1310   if (vect_match_call_complex_mla (child, 0) != MULT_MULT
1311       || vect_match_call_complex_mla (child, 1, &muls) != MULT_MULT)
1312     return IFN_LAST;
1313 
1314   /* Now operand2+4 may lead to another expression.  */
1315   auto_vec<slp_tree> left_op, right_op;
1316   left_op.safe_splice (SLP_TREE_CHILDREN (muls[0]));
1317   right_op.safe_splice (SLP_TREE_CHILDREN (muls[1]));
1318 
1319   bool is_neg = vect_normalize_conj_loc (left_op);
1320 
1321   child = SLP_TREE_CHILDREN ((*ops)[1])[0];
1322   bool conj_first_operand = false;
1323   if (!vect_validate_multiplication (perm_cache, right_op, left_op, false,
1324 				     &conj_first_operand, true))
1325     return IFN_LAST;
1326 
1327   if (!is_neg)
1328     ifn = IFN_COMPLEX_FMS;
1329   else if (is_neg)
1330     ifn = IFN_COMPLEX_FMS_CONJ;
1331 
1332   if (!vect_pattern_validate_optab (ifn, *ref_node))
1333     return IFN_LAST;
1334 
1335   ops->truncate (0);
1336   ops->create (4);
1337 
1338   complex_perm_kinds_t kind = linear_loads_p (perm_cache, right_op[0]);
1339   if (kind == PERM_EVENODD)
1340     {
1341       ops->quick_push (child);
1342       ops->quick_push (right_op[0]);
1343       ops->quick_push (right_op[1]);
1344       ops->quick_push (left_op[1]);
1345     }
1346   else if (kind == PERM_TOP)
1347     {
1348       ops->quick_push (child);
1349       ops->quick_push (right_op[1]);
1350       ops->quick_push (right_op[0]);
1351       ops->quick_push (left_op[0]);
1352     }
1353   else if (kind == PERM_EVENEVEN && !is_neg)
1354     {
1355       ops->quick_push (child);
1356       ops->quick_push (right_op[1]);
1357       ops->quick_push (right_op[0]);
1358       ops->quick_push (left_op[0]);
1359     }
1360   else
1361     {
1362       ops->quick_push (child);
1363       ops->quick_push (right_op[1]);
1364       ops->quick_push (right_op[0]);
1365       ops->quick_push (left_op[1]);
1366     }
1367 
1368   return ifn;
1369 }
1370 
1371 /* Attempt to recognize a complex mul pattern.  */
1372 
1373 vect_pattern*
recognize(slp_tree_to_load_perm_map_t * perm_cache,slp_tree * node)1374 complex_fms_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
1375 				slp_tree *node)
1376 {
1377   auto_vec<slp_tree> ops;
1378   complex_operation_t op
1379     = vect_detect_pair_op (*node, true, &ops);
1380   internal_fn ifn
1381     = complex_fms_pattern::matches (op, perm_cache, node, &ops);
1382   if (ifn == IFN_LAST)
1383     return NULL;
1384 
1385   return new complex_fms_pattern (node, &ops, ifn);
1386 }
1387 
1388 /* Perform a replacement of the detected complex mul pattern with the new
1389    instruction sequences.  */
1390 
1391 void
build(vec_info * vinfo)1392 complex_fms_pattern::build (vec_info *vinfo)
1393 {
1394   slp_tree node;
1395   unsigned i;
1396   slp_tree newnode =
1397     vect_build_combine_node (this->m_ops[2], this->m_ops[3], *this->m_node);
1398   SLP_TREE_REF_COUNT (this->m_ops[0])++;
1399   SLP_TREE_REF_COUNT (this->m_ops[1])++;
1400 
1401   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node)
1402     vect_free_slp_tree (node);
1403 
1404   SLP_TREE_CHILDREN (*this->m_node).release ();
1405   SLP_TREE_CHILDREN (*this->m_node).create (3);
1406 
1407   /* First re-arrange the children.  */
1408   SLP_TREE_CHILDREN (*this->m_node).quick_push (this->m_ops[0]);
1409   SLP_TREE_CHILDREN (*this->m_node).quick_push (this->m_ops[1]);
1410   SLP_TREE_CHILDREN (*this->m_node).quick_push (newnode);
1411 
1412   /* And then rewrite the node itself.  */
1413   complex_pattern::build (vinfo);
1414 }
1415 
1416 /*******************************************************************************
1417  * complex_operations_pattern class
1418  ******************************************************************************/
1419 
1420 /* This function combines all the existing pattern matchers above into one class
1421    that shares the functionality between them.  The initial match is shared
1422    between all complex operations.  */
1423 
1424 class complex_operations_pattern : public complex_pattern
1425 {
1426   protected:
complex_operations_pattern(slp_tree * node,vec<slp_tree> * m_ops,internal_fn ifn)1427     complex_operations_pattern (slp_tree *node, vec<slp_tree> *m_ops,
1428 				internal_fn ifn)
1429       : complex_pattern (node, m_ops, ifn)
1430     {
1431       this->m_num_args = 0;
1432     }
1433 
1434   public:
1435     void build (vec_info *);
1436     static internal_fn
1437     matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *,
1438 	     vec<slp_tree> *);
1439 
1440     static vect_pattern*
1441     recognize (slp_tree_to_load_perm_map_t *, slp_tree *);
1442 };
1443 
1444 /* Dummy matches implementation for proxy object.  */
1445 
1446 internal_fn
1447 complex_operations_pattern::
matches(complex_operation_t,slp_tree_to_load_perm_map_t *,slp_tree *,vec<slp_tree> *)1448 matches (complex_operation_t /* op */,
1449 	 slp_tree_to_load_perm_map_t * /* perm_cache */,
1450 	 slp_tree * /* ref_node */, vec<slp_tree> * /* ops */)
1451 {
1452   return IFN_LAST;
1453 }
1454 
1455 /* Attempt to recognize a complex mul pattern.  */
1456 
1457 vect_pattern*
recognize(slp_tree_to_load_perm_map_t * perm_cache,slp_tree * node)1458 complex_operations_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
1459 				       slp_tree *node)
1460 {
1461   auto_vec<slp_tree> ops;
1462   complex_operation_t op
1463     = vect_detect_pair_op (*node, true, &ops);
1464   internal_fn ifn = IFN_LAST;
1465 
1466   ifn  = complex_fms_pattern::matches (op, perm_cache, node, &ops);
1467   if (ifn != IFN_LAST)
1468     return complex_fms_pattern::mkInstance (node, &ops, ifn);
1469 
1470   ifn  = complex_mul_pattern::matches (op, perm_cache, node, &ops);
1471   if (ifn != IFN_LAST)
1472     return complex_mul_pattern::mkInstance (node, &ops, ifn);
1473 
1474   ifn  = complex_fma_pattern::matches (op, perm_cache, node, &ops);
1475   if (ifn != IFN_LAST)
1476     return complex_fma_pattern::mkInstance (node, &ops, ifn);
1477 
1478   ifn  = complex_add_pattern::matches (op, perm_cache, node, &ops);
1479   if (ifn != IFN_LAST)
1480     return complex_add_pattern::mkInstance (node, &ops, ifn);
1481 
1482   return NULL;
1483 }
1484 
1485 /* Dummy implementation of build.  */
1486 
1487 void
build(vec_info *)1488 complex_operations_pattern::build (vec_info * /* vinfo */)
1489 {
1490   gcc_unreachable ();
1491 }
1492 
1493 /*******************************************************************************
1494  * Pattern matching definitions
1495  ******************************************************************************/
1496 
1497 #define SLP_PATTERN(x) &x::recognize
1498 vect_pattern_decl_t slp_patterns[]
1499 {
1500   /* For least amount of back-tracking and more efficient matching
1501      order patterns from the largest to the smallest.  Especially if they
1502      overlap in what they can detect.  */
1503 
1504   SLP_PATTERN (complex_operations_pattern),
1505 };
1506 #undef SLP_PATTERN
1507 
1508 /* Set the number of SLP pattern matchers available.  */
1509 size_t num__slp_patterns = sizeof(slp_patterns)/sizeof(vect_pattern_decl_t);
1510