1 /* Copyright (C) 1988-2021 Free Software Foundation, Inc.
2 
3 This file is part of GCC.
4 
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9 
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 GNU General Public License for more details.
14 
15 You should have received a copy of the GNU General Public License
16 along with GCC; see the file COPYING3.  If not see
17 <http://www.gnu.org/licenses/>.  */
18 
19 #define IN_TARGET_CODE 1
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "memmodel.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "cfgloop.h"
31 #include "df.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "expmed.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "recog.h"
39 #include "cgraph.h"
40 #include "diagnostic.h"
41 #include "cfgbuild.h"
42 #include "alias.h"
43 #include "fold-const.h"
44 #include "attribs.h"
45 #include "calls.h"
46 #include "stor-layout.h"
47 #include "varasm.h"
48 #include "output.h"
49 #include "insn-attr.h"
50 #include "flags.h"
51 #include "except.h"
52 #include "explow.h"
53 #include "expr.h"
54 #include "cfgrtl.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
57 #include "reload.h"
58 #include "gimplify.h"
59 #include "dwarf2.h"
60 #include "tm-constrs.h"
61 #include "cselib.h"
62 #include "sched-int.h"
63 #include "opts.h"
64 #include "tree-pass.h"
65 #include "context.h"
66 #include "pass_manager.h"
67 #include "target-globals.h"
68 #include "gimple-iterator.h"
69 #include "tree-vectorizer.h"
70 #include "shrink-wrap.h"
71 #include "builtins.h"
72 #include "rtl-iter.h"
73 #include "tree-iterator.h"
74 #include "dbgcnt.h"
75 #include "case-cfn-macros.h"
76 #include "dojump.h"
77 #include "fold-const-call.h"
78 #include "tree-vrp.h"
79 #include "tree-ssanames.h"
80 #include "selftest.h"
81 #include "selftest-rtl.h"
82 #include "print-rtl.h"
83 #include "intl.h"
84 #include "ifcvt.h"
85 #include "symbol-summary.h"
86 #include "ipa-prop.h"
87 #include "ipa-fnsummary.h"
88 #include "wide-int-bitmask.h"
89 #include "tree-vector-builder.h"
90 #include "debug.h"
91 #include "dwarf2out.h"
92 #include "i386-builtins.h"
93 #include "i386-features.h"
94 
95 const char * const xlogue_layout::STUB_BASE_NAMES[XLOGUE_STUB_COUNT] = {
96   "savms64",
97   "resms64",
98   "resms64x",
99   "savms64f",
100   "resms64f",
101   "resms64fx"
102 };
103 
104 const unsigned xlogue_layout::REG_ORDER[xlogue_layout::MAX_REGS] = {
105 /* The below offset values are where each register is stored for the layout
106    relative to incoming stack pointer.  The value of each m_regs[].offset will
107    be relative to the incoming base pointer (rax or rsi) used by the stub.
108 
109     s_instances:   0		1		2		3
110     Offset:					realigned or	aligned + 8
111     Register	   aligned	aligned + 8	aligned w/HFP	w/HFP	*/
112     XMM15_REG,	/* 0x10		0x18		0x10		0x18	*/
113     XMM14_REG,	/* 0x20		0x28		0x20		0x28	*/
114     XMM13_REG,	/* 0x30		0x38		0x30		0x38	*/
115     XMM12_REG,	/* 0x40		0x48		0x40		0x48	*/
116     XMM11_REG,	/* 0x50		0x58		0x50		0x58	*/
117     XMM10_REG,	/* 0x60		0x68		0x60		0x68	*/
118     XMM9_REG,	/* 0x70		0x78		0x70		0x78	*/
119     XMM8_REG,	/* 0x80		0x88		0x80		0x88	*/
120     XMM7_REG,	/* 0x90		0x98		0x90		0x98	*/
121     XMM6_REG,	/* 0xa0		0xa8		0xa0		0xa8	*/
122     SI_REG,	/* 0xa8		0xb0		0xa8		0xb0	*/
123     DI_REG,	/* 0xb0		0xb8		0xb0		0xb8	*/
124     BX_REG,	/* 0xb8		0xc0		0xb8		0xc0	*/
125     BP_REG,	/* 0xc0		0xc8		N/A		N/A	*/
126     R12_REG,	/* 0xc8		0xd0		0xc0		0xc8	*/
127     R13_REG,	/* 0xd0		0xd8		0xc8		0xd0	*/
128     R14_REG,	/* 0xd8		0xe0		0xd0		0xd8	*/
129     R15_REG,	/* 0xe0		0xe8		0xd8		0xe0	*/
130 };
131 
132 /* Instantiate static const values.  */
133 const HOST_WIDE_INT xlogue_layout::STUB_INDEX_OFFSET;
134 const unsigned xlogue_layout::MIN_REGS;
135 const unsigned xlogue_layout::MAX_REGS;
136 const unsigned xlogue_layout::MAX_EXTRA_REGS;
137 const unsigned xlogue_layout::VARIANT_COUNT;
138 const unsigned xlogue_layout::STUB_NAME_MAX_LEN;
139 
140 /* Initialize xlogue_layout::s_stub_names to zero.  */
141 char xlogue_layout::s_stub_names[2][XLOGUE_STUB_COUNT][VARIANT_COUNT]
142 				[STUB_NAME_MAX_LEN];
143 
144 /* Instantiates all xlogue_layout instances.  */
145 const xlogue_layout xlogue_layout::s_instances[XLOGUE_SET_COUNT] = {
146   xlogue_layout (0, false),
147   xlogue_layout (8, false),
148   xlogue_layout (0, true),
149   xlogue_layout (8, true)
150 };
151 
152 /* Return an appropriate const instance of xlogue_layout based upon values
153    in cfun->machine and crtl.  */
154 const class xlogue_layout &
get_instance()155 xlogue_layout::get_instance ()
156 {
157   enum xlogue_stub_sets stub_set;
158   bool aligned_plus_8 = cfun->machine->call_ms2sysv_pad_in;
159 
160   if (stack_realign_fp)
161     stub_set = XLOGUE_SET_HFP_ALIGNED_OR_REALIGN;
162   else if (frame_pointer_needed)
163     stub_set = aligned_plus_8
164 	      ? XLOGUE_SET_HFP_ALIGNED_PLUS_8
165 	      : XLOGUE_SET_HFP_ALIGNED_OR_REALIGN;
166   else
167     stub_set = aligned_plus_8 ? XLOGUE_SET_ALIGNED_PLUS_8 : XLOGUE_SET_ALIGNED;
168 
169   return s_instances[stub_set];
170 }
171 
172 /* Determine how many clobbered registers can be saved by the stub.
173    Returns the count of registers the stub will save and restore.  */
174 unsigned
count_stub_managed_regs()175 xlogue_layout::count_stub_managed_regs ()
176 {
177   bool hfp = frame_pointer_needed || stack_realign_fp;
178   unsigned i, count;
179   unsigned regno;
180 
181   for (count = i = MIN_REGS; i < MAX_REGS; ++i)
182     {
183       regno = REG_ORDER[i];
184       if (regno == BP_REG && hfp)
185 	continue;
186       if (!ix86_save_reg (regno, false, false))
187 	break;
188       ++count;
189     }
190   return count;
191 }
192 
193 /* Determine if register REGNO is a stub managed register given the
194    total COUNT of stub managed registers.  */
195 bool
is_stub_managed_reg(unsigned regno,unsigned count)196 xlogue_layout::is_stub_managed_reg (unsigned regno, unsigned count)
197 {
198   bool hfp = frame_pointer_needed || stack_realign_fp;
199   unsigned i;
200 
201   for (i = 0; i < count; ++i)
202     {
203       gcc_assert (i < MAX_REGS);
204       if (REG_ORDER[i] == BP_REG && hfp)
205 	++count;
206       else if (REG_ORDER[i] == regno)
207 	return true;
208     }
209   return false;
210 }
211 
212 /* Constructor for xlogue_layout.  */
xlogue_layout(HOST_WIDE_INT stack_align_off_in,bool hfp)213 xlogue_layout::xlogue_layout (HOST_WIDE_INT stack_align_off_in, bool hfp)
214   : m_hfp (hfp) , m_nregs (hfp ? 17 : 18),
215     m_stack_align_off_in (stack_align_off_in)
216 {
217   HOST_WIDE_INT offset = stack_align_off_in;
218   unsigned i, j;
219 
220   for (i = j = 0; i < MAX_REGS; ++i)
221     {
222       unsigned regno = REG_ORDER[i];
223 
224       if (regno == BP_REG && hfp)
225 	continue;
226       if (SSE_REGNO_P (regno))
227 	{
228 	  offset += 16;
229 	  /* Verify that SSE regs are always aligned.  */
230 	  gcc_assert (!((stack_align_off_in + offset) & 15));
231 	}
232       else
233 	offset += 8;
234 
235       m_regs[j].regno    = regno;
236       m_regs[j++].offset = offset - STUB_INDEX_OFFSET;
237     }
238   gcc_assert (j == m_nregs);
239 }
240 
241 const char *
get_stub_name(enum xlogue_stub stub,unsigned n_extra_regs)242 xlogue_layout::get_stub_name (enum xlogue_stub stub,
243 			      unsigned n_extra_regs)
244 {
245   const int have_avx = TARGET_AVX;
246   char *name = s_stub_names[!!have_avx][stub][n_extra_regs];
247 
248   /* Lazy init */
249   if (!*name)
250     {
251       int res = snprintf (name, STUB_NAME_MAX_LEN, "__%s_%s_%u",
252 			  (have_avx ? "avx" : "sse"),
253 			  STUB_BASE_NAMES[stub],
254 			  MIN_REGS + n_extra_regs);
255       gcc_checking_assert (res < (int)STUB_NAME_MAX_LEN);
256     }
257 
258   return name;
259 }
260 
261 /* Return rtx of a symbol ref for the entry point (based upon
262    cfun->machine->call_ms2sysv_extra_regs) of the specified stub.  */
263 rtx
get_stub_rtx(enum xlogue_stub stub)264 xlogue_layout::get_stub_rtx (enum xlogue_stub stub)
265 {
266   const unsigned n_extra_regs = cfun->machine->call_ms2sysv_extra_regs;
267   gcc_checking_assert (n_extra_regs <= MAX_EXTRA_REGS);
268   gcc_assert (stub < XLOGUE_STUB_COUNT);
269   gcc_assert (crtl->stack_realign_finalized);
270 
271   return gen_rtx_SYMBOL_REF (Pmode, get_stub_name (stub, n_extra_regs));
272 }
273 
274 unsigned scalar_chain::max_id = 0;
275 
276 namespace {
277 
278 /* Initialize new chain.  */
279 
scalar_chain(enum machine_mode smode_,enum machine_mode vmode_)280 scalar_chain::scalar_chain (enum machine_mode smode_, enum machine_mode vmode_)
281 {
282   smode = smode_;
283   vmode = vmode_;
284 
285   chain_id = ++max_id;
286 
287    if (dump_file)
288     fprintf (dump_file, "Created a new instruction chain #%d\n", chain_id);
289 
290   bitmap_obstack_initialize (NULL);
291   insns = BITMAP_ALLOC (NULL);
292   defs = BITMAP_ALLOC (NULL);
293   defs_conv = BITMAP_ALLOC (NULL);
294   queue = NULL;
295 }
296 
297 /* Free chain's data.  */
298 
~scalar_chain()299 scalar_chain::~scalar_chain ()
300 {
301   BITMAP_FREE (insns);
302   BITMAP_FREE (defs);
303   BITMAP_FREE (defs_conv);
304   bitmap_obstack_release (NULL);
305 }
306 
307 /* Add instruction into chains' queue.  */
308 
309 void
add_to_queue(unsigned insn_uid)310 scalar_chain::add_to_queue (unsigned insn_uid)
311 {
312   if (bitmap_bit_p (insns, insn_uid)
313       || bitmap_bit_p (queue, insn_uid))
314     return;
315 
316   if (dump_file)
317     fprintf (dump_file, "  Adding insn %d into chain's #%d queue\n",
318 	     insn_uid, chain_id);
319   bitmap_set_bit (queue, insn_uid);
320 }
321 
general_scalar_chain(enum machine_mode smode_,enum machine_mode vmode_)322 general_scalar_chain::general_scalar_chain (enum machine_mode smode_,
323 					    enum machine_mode vmode_)
324      : scalar_chain (smode_, vmode_)
325 {
326   insns_conv = BITMAP_ALLOC (NULL);
327   n_sse_to_integer = 0;
328   n_integer_to_sse = 0;
329 }
330 
~general_scalar_chain()331 general_scalar_chain::~general_scalar_chain ()
332 {
333   BITMAP_FREE (insns_conv);
334 }
335 
336 /* For DImode conversion, mark register defined by DEF as requiring
337    conversion.  */
338 
339 void
mark_dual_mode_def(df_ref def)340 general_scalar_chain::mark_dual_mode_def (df_ref def)
341 {
342   gcc_assert (DF_REF_REG_DEF_P (def));
343 
344   /* Record the def/insn pair so we can later efficiently iterate over
345      the defs to convert on insns not in the chain.  */
346   bool reg_new = bitmap_set_bit (defs_conv, DF_REF_REGNO (def));
347   if (!bitmap_bit_p (insns, DF_REF_INSN_UID (def)))
348     {
349       if (!bitmap_set_bit (insns_conv, DF_REF_INSN_UID (def))
350 	  && !reg_new)
351 	return;
352       n_integer_to_sse++;
353     }
354   else
355     {
356       if (!reg_new)
357 	return;
358       n_sse_to_integer++;
359     }
360 
361   if (dump_file)
362     fprintf (dump_file,
363 	     "  Mark r%d def in insn %d as requiring both modes in chain #%d\n",
364 	     DF_REF_REGNO (def), DF_REF_INSN_UID (def), chain_id);
365 }
366 
367 /* For TImode conversion, it is unused.  */
368 
369 void
mark_dual_mode_def(df_ref)370 timode_scalar_chain::mark_dual_mode_def (df_ref)
371 {
372   gcc_unreachable ();
373 }
374 
375 /* Check REF's chain to add new insns into a queue
376    and find registers requiring conversion.  */
377 
378 void
analyze_register_chain(bitmap candidates,df_ref ref)379 scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref)
380 {
381   df_link *chain;
382 
383   gcc_assert (bitmap_bit_p (insns, DF_REF_INSN_UID (ref))
384 	      || bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)));
385   add_to_queue (DF_REF_INSN_UID (ref));
386 
387   for (chain = DF_REF_CHAIN (ref); chain; chain = chain->next)
388     {
389       unsigned uid = DF_REF_INSN_UID (chain->ref);
390 
391       if (!NONDEBUG_INSN_P (DF_REF_INSN (chain->ref)))
392 	continue;
393 
394       if (!DF_REF_REG_MEM_P (chain->ref))
395 	{
396 	  if (bitmap_bit_p (insns, uid))
397 	    continue;
398 
399 	  if (bitmap_bit_p (candidates, uid))
400 	    {
401 	      add_to_queue (uid);
402 	      continue;
403 	    }
404 	}
405 
406       if (DF_REF_REG_DEF_P (chain->ref))
407 	{
408 	  if (dump_file)
409 	    fprintf (dump_file, "  r%d def in insn %d isn't convertible\n",
410 		     DF_REF_REGNO (chain->ref), uid);
411 	  mark_dual_mode_def (chain->ref);
412 	}
413       else
414 	{
415 	  if (dump_file)
416 	    fprintf (dump_file, "  r%d use in insn %d isn't convertible\n",
417 		     DF_REF_REGNO (chain->ref), uid);
418 	  mark_dual_mode_def (ref);
419 	}
420     }
421 }
422 
423 /* Add instruction into a chain.  */
424 
425 void
add_insn(bitmap candidates,unsigned int insn_uid)426 scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid)
427 {
428   if (bitmap_bit_p (insns, insn_uid))
429     return;
430 
431   if (dump_file)
432     fprintf (dump_file, "  Adding insn %d to chain #%d\n", insn_uid, chain_id);
433 
434   bitmap_set_bit (insns, insn_uid);
435 
436   rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
437   rtx def_set = single_set (insn);
438   if (def_set && REG_P (SET_DEST (def_set))
439       && !HARD_REGISTER_P (SET_DEST (def_set)))
440     bitmap_set_bit (defs, REGNO (SET_DEST (def_set)));
441 
442   /* ???  The following is quadratic since analyze_register_chain
443      iterates over all refs to look for dual-mode regs.  Instead this
444      should be done separately for all regs mentioned in the chain once.  */
445   df_ref ref;
446   for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
447     if (!HARD_REGISTER_P (DF_REF_REG (ref)))
448       analyze_register_chain (candidates, ref);
449   for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
450     if (!DF_REF_REG_MEM_P (ref))
451       analyze_register_chain (candidates, ref);
452 }
453 
454 /* Build new chain starting from insn INSN_UID recursively
455    adding all dependent uses and definitions.  */
456 
457 void
build(bitmap candidates,unsigned insn_uid)458 scalar_chain::build (bitmap candidates, unsigned insn_uid)
459 {
460   queue = BITMAP_ALLOC (NULL);
461   bitmap_set_bit (queue, insn_uid);
462 
463   if (dump_file)
464     fprintf (dump_file, "Building chain #%d...\n", chain_id);
465 
466   while (!bitmap_empty_p (queue))
467     {
468       insn_uid = bitmap_first_set_bit (queue);
469       bitmap_clear_bit (queue, insn_uid);
470       bitmap_clear_bit (candidates, insn_uid);
471       add_insn (candidates, insn_uid);
472     }
473 
474   if (dump_file)
475     {
476       fprintf (dump_file, "Collected chain #%d...\n", chain_id);
477       fprintf (dump_file, "  insns: ");
478       dump_bitmap (dump_file, insns);
479       if (!bitmap_empty_p (defs_conv))
480 	{
481 	  bitmap_iterator bi;
482 	  unsigned id;
483 	  const char *comma = "";
484 	  fprintf (dump_file, "  defs to convert: ");
485 	  EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi)
486 	    {
487 	      fprintf (dump_file, "%sr%d", comma, id);
488 	      comma = ", ";
489 	    }
490 	  fprintf (dump_file, "\n");
491 	}
492     }
493 
494   BITMAP_FREE (queue);
495 }
496 
497 /* Return a cost of building a vector costant
498    instead of using a scalar one.  */
499 
500 int
vector_const_cost(rtx exp)501 general_scalar_chain::vector_const_cost (rtx exp)
502 {
503   gcc_assert (CONST_INT_P (exp));
504 
505   if (standard_sse_constant_p (exp, vmode))
506     return ix86_cost->sse_op;
507   /* We have separate costs for SImode and DImode, use SImode costs
508      for smaller modes.  */
509   return ix86_cost->sse_load[smode == DImode ? 1 : 0];
510 }
511 
512 /* Compute a gain for chain conversion.  */
513 
514 int
compute_convert_gain()515 general_scalar_chain::compute_convert_gain ()
516 {
517   bitmap_iterator bi;
518   unsigned insn_uid;
519   int gain = 0;
520   int cost = 0;
521 
522   if (dump_file)
523     fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id);
524 
525   /* SSE costs distinguish between SImode and DImode loads/stores, for
526      int costs factor in the number of GPRs involved.  When supporting
527      smaller modes than SImode the int load/store costs need to be
528      adjusted as well.  */
529   unsigned sse_cost_idx = smode == DImode ? 1 : 0;
530   unsigned m = smode == DImode ? (TARGET_64BIT ? 1 : 2) : 1;
531 
532   EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi)
533     {
534       rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
535       rtx def_set = single_set (insn);
536       rtx src = SET_SRC (def_set);
537       rtx dst = SET_DEST (def_set);
538       int igain = 0;
539 
540       if (REG_P (src) && REG_P (dst))
541 	igain += 2 * m - ix86_cost->xmm_move;
542       else if (REG_P (src) && MEM_P (dst))
543 	igain
544 	  += m * ix86_cost->int_store[2] - ix86_cost->sse_store[sse_cost_idx];
545       else if (MEM_P (src) && REG_P (dst))
546 	igain += m * ix86_cost->int_load[2] - ix86_cost->sse_load[sse_cost_idx];
547       else if (GET_CODE (src) == ASHIFT
548 	       || GET_CODE (src) == ASHIFTRT
549 	       || GET_CODE (src) == LSHIFTRT)
550 	{
551 	  if (m == 2)
552 	    {
553 	      if (INTVAL (XEXP (src, 1)) >= 32)
554 		igain += ix86_cost->add;
555 	      else
556 		igain += ix86_cost->shift_const;
557 	    }
558 
559 	  igain += ix86_cost->shift_const - ix86_cost->sse_op;
560 
561 	  if (CONST_INT_P (XEXP (src, 0)))
562 	    igain -= vector_const_cost (XEXP (src, 0));
563 	}
564       else if (GET_CODE (src) == PLUS
565 	       || GET_CODE (src) == MINUS
566 	       || GET_CODE (src) == IOR
567 	       || GET_CODE (src) == XOR
568 	       || GET_CODE (src) == AND)
569 	{
570 	  igain += m * ix86_cost->add - ix86_cost->sse_op;
571 	  /* Additional gain for andnot for targets without BMI.  */
572 	  if (GET_CODE (XEXP (src, 0)) == NOT
573 	      && !TARGET_BMI)
574 	    igain += m * ix86_cost->add;
575 
576 	  if (CONST_INT_P (XEXP (src, 0)))
577 	    igain -= vector_const_cost (XEXP (src, 0));
578 	  if (CONST_INT_P (XEXP (src, 1)))
579 	    igain -= vector_const_cost (XEXP (src, 1));
580 	}
581       else if (GET_CODE (src) == NEG
582 	       || GET_CODE (src) == NOT)
583 	igain += m * ix86_cost->add - ix86_cost->sse_op - COSTS_N_INSNS (1);
584       else if (GET_CODE (src) == ABS
585 	       || GET_CODE (src) == SMAX
586 	       || GET_CODE (src) == SMIN
587 	       || GET_CODE (src) == UMAX
588 	       || GET_CODE (src) == UMIN)
589 	{
590 	  /* We do not have any conditional move cost, estimate it as a
591 	     reg-reg move.  Comparisons are costed as adds.  */
592 	  igain += m * (COSTS_N_INSNS (2) + ix86_cost->add);
593 	  /* Integer SSE ops are all costed the same.  */
594 	  igain -= ix86_cost->sse_op;
595 	}
596       else if (GET_CODE (src) == COMPARE)
597 	{
598 	  /* Assume comparison cost is the same.  */
599 	}
600       else if (CONST_INT_P (src))
601 	{
602 	  if (REG_P (dst))
603 	    /* DImode can be immediate for TARGET_64BIT and SImode always.  */
604 	    igain += m * COSTS_N_INSNS (1);
605 	  else if (MEM_P (dst))
606 	    igain += (m * ix86_cost->int_store[2]
607 		     - ix86_cost->sse_store[sse_cost_idx]);
608 	  igain -= vector_const_cost (src);
609 	}
610       else
611 	gcc_unreachable ();
612 
613       if (igain != 0 && dump_file)
614 	{
615 	  fprintf (dump_file, "  Instruction gain %d for ", igain);
616 	  dump_insn_slim (dump_file, insn);
617 	}
618       gain += igain;
619     }
620 
621   if (dump_file)
622     fprintf (dump_file, "  Instruction conversion gain: %d\n", gain);
623 
624   /* Cost the integer to sse and sse to integer moves.  */
625   cost += n_sse_to_integer * ix86_cost->sse_to_integer;
626   /* ???  integer_to_sse but we only have that in the RA cost table.
627      Assume sse_to_integer/integer_to_sse are the same which they
628      are at the moment.  */
629   cost += n_integer_to_sse * ix86_cost->sse_to_integer;
630 
631   if (dump_file)
632     fprintf (dump_file, "  Registers conversion cost: %d\n", cost);
633 
634   gain -= cost;
635 
636   if (dump_file)
637     fprintf (dump_file, "  Total gain: %d\n", gain);
638 
639   return gain;
640 }
641 
642 /* Insert generated conversion instruction sequence INSNS
643    after instruction AFTER.  New BB may be required in case
644    instruction has EH region attached.  */
645 
646 void
emit_conversion_insns(rtx insns,rtx_insn * after)647 scalar_chain::emit_conversion_insns (rtx insns, rtx_insn *after)
648 {
649   if (!control_flow_insn_p (after))
650     {
651       emit_insn_after (insns, after);
652       return;
653     }
654 
655   basic_block bb = BLOCK_FOR_INSN (after);
656   edge e = find_fallthru_edge (bb->succs);
657   gcc_assert (e);
658 
659   basic_block new_bb = split_edge (e);
660   emit_insn_after (insns, BB_HEAD (new_bb));
661 }
662 
663 } // anon namespace
664 
665 /* Generate the canonical SET_SRC to move GPR to a VMODE vector register,
666    zeroing the upper parts.  */
667 
668 static rtx
gen_gpr_to_xmm_move_src(enum machine_mode vmode,rtx gpr)669 gen_gpr_to_xmm_move_src (enum machine_mode vmode, rtx gpr)
670 {
671   switch (GET_MODE_NUNITS (vmode))
672     {
673     case 1:
674       /* We are not using this case currently.  */
675       gcc_unreachable ();
676     case 2:
677       return gen_rtx_VEC_CONCAT (vmode, gpr,
678 				 CONST0_RTX (GET_MODE_INNER (vmode)));
679     default:
680       return gen_rtx_VEC_MERGE (vmode, gen_rtx_VEC_DUPLICATE (vmode, gpr),
681 				CONST0_RTX (vmode), GEN_INT (HOST_WIDE_INT_1U));
682     }
683 }
684 
685 /* Make vector copies for all register REGNO definitions
686    and replace its uses in a chain.  */
687 
688 void
make_vector_copies(rtx_insn * insn,rtx reg)689 general_scalar_chain::make_vector_copies (rtx_insn *insn, rtx reg)
690 {
691   rtx vreg = *defs_map.get (reg);
692 
693   start_sequence ();
694   if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
695     {
696       rtx tmp = assign_386_stack_local (smode, SLOT_STV_TEMP);
697       if (smode == DImode && !TARGET_64BIT)
698 	{
699 	  emit_move_insn (adjust_address (tmp, SImode, 0),
700 			  gen_rtx_SUBREG (SImode, reg, 0));
701 	  emit_move_insn (adjust_address (tmp, SImode, 4),
702 			  gen_rtx_SUBREG (SImode, reg, 4));
703 	}
704       else
705 	emit_move_insn (copy_rtx (tmp), reg);
706       emit_insn (gen_rtx_SET (gen_rtx_SUBREG (vmode, vreg, 0),
707 			      gen_gpr_to_xmm_move_src (vmode, tmp)));
708     }
709   else if (!TARGET_64BIT && smode == DImode)
710     {
711       if (TARGET_SSE4_1)
712 	{
713 	  emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
714 				      CONST0_RTX (V4SImode),
715 				      gen_rtx_SUBREG (SImode, reg, 0)));
716 	  emit_insn (gen_sse4_1_pinsrd (gen_rtx_SUBREG (V4SImode, vreg, 0),
717 					gen_rtx_SUBREG (V4SImode, vreg, 0),
718 					gen_rtx_SUBREG (SImode, reg, 4),
719 					GEN_INT (2)));
720 	}
721       else
722 	{
723 	  rtx tmp = gen_reg_rtx (DImode);
724 	  emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
725 				      CONST0_RTX (V4SImode),
726 				      gen_rtx_SUBREG (SImode, reg, 0)));
727 	  emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, tmp, 0),
728 				      CONST0_RTX (V4SImode),
729 				      gen_rtx_SUBREG (SImode, reg, 4)));
730 	  emit_insn (gen_vec_interleave_lowv4si
731 		     (gen_rtx_SUBREG (V4SImode, vreg, 0),
732 		      gen_rtx_SUBREG (V4SImode, vreg, 0),
733 		      gen_rtx_SUBREG (V4SImode, tmp, 0)));
734 	}
735     }
736   else
737     emit_insn (gen_rtx_SET (gen_rtx_SUBREG (vmode, vreg, 0),
738 			    gen_gpr_to_xmm_move_src (vmode, reg)));
739   rtx_insn *seq = get_insns ();
740   end_sequence ();
741   emit_conversion_insns (seq, insn);
742 
743   if (dump_file)
744     fprintf (dump_file,
745 	     "  Copied r%d to a vector register r%d for insn %d\n",
746 	     REGNO (reg), REGNO (vreg), INSN_UID (insn));
747 }
748 
749 /* Copy the definition SRC of INSN inside the chain to DST for
750    scalar uses outside of the chain.  */
751 
752 void
convert_reg(rtx_insn * insn,rtx dst,rtx src)753 general_scalar_chain::convert_reg (rtx_insn *insn, rtx dst, rtx src)
754 {
755   start_sequence ();
756   if (!TARGET_INTER_UNIT_MOVES_FROM_VEC)
757     {
758       rtx tmp = assign_386_stack_local (smode, SLOT_STV_TEMP);
759       emit_move_insn (tmp, src);
760       if (!TARGET_64BIT && smode == DImode)
761 	{
762 	  emit_move_insn (gen_rtx_SUBREG (SImode, dst, 0),
763 			  adjust_address (tmp, SImode, 0));
764 	  emit_move_insn (gen_rtx_SUBREG (SImode, dst, 4),
765 			  adjust_address (tmp, SImode, 4));
766 	}
767       else
768 	emit_move_insn (dst, copy_rtx (tmp));
769     }
770   else if (!TARGET_64BIT && smode == DImode)
771     {
772       if (TARGET_SSE4_1)
773 	{
774 	  rtx tmp = gen_rtx_PARALLEL (VOIDmode,
775 				      gen_rtvec (1, const0_rtx));
776 	  emit_insn
777 	      (gen_rtx_SET
778 	       (gen_rtx_SUBREG (SImode, dst, 0),
779 		gen_rtx_VEC_SELECT (SImode,
780 				    gen_rtx_SUBREG (V4SImode, src, 0),
781 				    tmp)));
782 
783 	  tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const1_rtx));
784 	  emit_insn
785 	      (gen_rtx_SET
786 	       (gen_rtx_SUBREG (SImode, dst, 4),
787 		gen_rtx_VEC_SELECT (SImode,
788 				    gen_rtx_SUBREG (V4SImode, src, 0),
789 				    tmp)));
790 	}
791       else
792 	{
793 	  rtx vcopy = gen_reg_rtx (V2DImode);
794 	  emit_move_insn (vcopy, gen_rtx_SUBREG (V2DImode, src, 0));
795 	  emit_move_insn (gen_rtx_SUBREG (SImode, dst, 0),
796 			  gen_rtx_SUBREG (SImode, vcopy, 0));
797 	  emit_move_insn (vcopy,
798 			  gen_rtx_LSHIFTRT (V2DImode,
799 					    vcopy, GEN_INT (32)));
800 	  emit_move_insn (gen_rtx_SUBREG (SImode, dst, 4),
801 			  gen_rtx_SUBREG (SImode, vcopy, 0));
802 	}
803     }
804   else
805     emit_move_insn (dst, src);
806 
807   rtx_insn *seq = get_insns ();
808   end_sequence ();
809   emit_conversion_insns (seq, insn);
810 
811   if (dump_file)
812     fprintf (dump_file,
813 	     "  Copied r%d to a scalar register r%d for insn %d\n",
814 	     REGNO (src), REGNO (dst), INSN_UID (insn));
815 }
816 
817 /* Convert operand OP in INSN.  We should handle
818    memory operands and uninitialized registers.
819    All other register uses are converted during
820    registers conversion.  */
821 
822 void
convert_op(rtx * op,rtx_insn * insn)823 general_scalar_chain::convert_op (rtx *op, rtx_insn *insn)
824 {
825   *op = copy_rtx_if_shared (*op);
826 
827   if (GET_CODE (*op) == NOT)
828     {
829       convert_op (&XEXP (*op, 0), insn);
830       PUT_MODE (*op, vmode);
831     }
832   else if (MEM_P (*op))
833     {
834       rtx tmp = gen_reg_rtx (GET_MODE (*op));
835 
836       /* Handle movabs.  */
837       if (!memory_operand (*op, GET_MODE (*op)))
838 	{
839 	  rtx tmp2 = gen_reg_rtx (GET_MODE (*op));
840 
841 	  emit_insn_before (gen_rtx_SET (tmp2, *op), insn);
842 	  *op = tmp2;
843 	}
844 
845       emit_insn_before (gen_rtx_SET (gen_rtx_SUBREG (vmode, tmp, 0),
846 				     gen_gpr_to_xmm_move_src (vmode, *op)),
847 			insn);
848       *op = gen_rtx_SUBREG (vmode, tmp, 0);
849 
850       if (dump_file)
851 	fprintf (dump_file, "  Preloading operand for insn %d into r%d\n",
852 		 INSN_UID (insn), REGNO (tmp));
853     }
854   else if (REG_P (*op))
855     {
856       *op = gen_rtx_SUBREG (vmode, *op, 0);
857     }
858   else if (CONST_INT_P (*op))
859     {
860       rtx vec_cst;
861       rtx tmp = gen_rtx_SUBREG (vmode, gen_reg_rtx (smode), 0);
862 
863       /* Prefer all ones vector in case of -1.  */
864       if (constm1_operand (*op, GET_MODE (*op)))
865 	vec_cst = CONSTM1_RTX (vmode);
866       else
867 	{
868 	  unsigned n = GET_MODE_NUNITS (vmode);
869 	  rtx *v = XALLOCAVEC (rtx, n);
870 	  v[0] = *op;
871 	  for (unsigned i = 1; i < n; ++i)
872 	    v[i] = const0_rtx;
873 	  vec_cst = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (n, v));
874 	}
875 
876       if (!standard_sse_constant_p (vec_cst, vmode))
877 	{
878 	  start_sequence ();
879 	  vec_cst = validize_mem (force_const_mem (vmode, vec_cst));
880 	  rtx_insn *seq = get_insns ();
881 	  end_sequence ();
882 	  emit_insn_before (seq, insn);
883 	}
884 
885       emit_insn_before (gen_move_insn (copy_rtx (tmp), vec_cst), insn);
886       *op = tmp;
887     }
888   else
889     {
890       gcc_assert (SUBREG_P (*op));
891       gcc_assert (GET_MODE (*op) == vmode);
892     }
893 }
894 
895 /* Convert INSN to vector mode.  */
896 
897 void
convert_insn(rtx_insn * insn)898 general_scalar_chain::convert_insn (rtx_insn *insn)
899 {
900   /* Generate copies for out-of-chain uses of defs and adjust debug uses.  */
901   for (df_ref ref = DF_INSN_DEFS (insn); ref; ref = DF_REF_NEXT_LOC (ref))
902     if (bitmap_bit_p (defs_conv, DF_REF_REGNO (ref)))
903       {
904 	df_link *use;
905 	for (use = DF_REF_CHAIN (ref); use; use = use->next)
906 	  if (NONDEBUG_INSN_P (DF_REF_INSN (use->ref))
907 	      && (DF_REF_REG_MEM_P (use->ref)
908 		  || !bitmap_bit_p (insns, DF_REF_INSN_UID (use->ref))))
909 	    break;
910 	if (use)
911 	  convert_reg (insn, DF_REF_REG (ref),
912 		       *defs_map.get (regno_reg_rtx [DF_REF_REGNO (ref)]));
913 	else if (MAY_HAVE_DEBUG_BIND_INSNS)
914 	  {
915 	    /* If we generated a scalar copy we can leave debug-insns
916 	       as-is, if not, we have to adjust them.  */
917 	    auto_vec<rtx_insn *, 5> to_reset_debug_insns;
918 	    for (use = DF_REF_CHAIN (ref); use; use = use->next)
919 	      if (DEBUG_INSN_P (DF_REF_INSN (use->ref)))
920 		{
921 		  rtx_insn *debug_insn = DF_REF_INSN (use->ref);
922 		  /* If there's a reaching definition outside of the
923 		     chain we have to reset.  */
924 		  df_link *def;
925 		  for (def = DF_REF_CHAIN (use->ref); def; def = def->next)
926 		    if (!bitmap_bit_p (insns, DF_REF_INSN_UID (def->ref)))
927 		      break;
928 		  if (def)
929 		    to_reset_debug_insns.safe_push (debug_insn);
930 		  else
931 		    {
932 		      *DF_REF_REAL_LOC (use->ref)
933 			= *defs_map.get (regno_reg_rtx [DF_REF_REGNO (ref)]);
934 		      df_insn_rescan (debug_insn);
935 		    }
936 		}
937 	    /* Have to do the reset outside of the DF_CHAIN walk to not
938 	       disrupt it.  */
939 	    while (!to_reset_debug_insns.is_empty ())
940 	      {
941 		rtx_insn *debug_insn = to_reset_debug_insns.pop ();
942 		INSN_VAR_LOCATION_LOC (debug_insn) = gen_rtx_UNKNOWN_VAR_LOC ();
943 		df_insn_rescan_debug_internal (debug_insn);
944 	      }
945 	  }
946       }
947 
948   /* Replace uses in this insn with the defs we use in the chain.  */
949   for (df_ref ref = DF_INSN_USES (insn); ref; ref = DF_REF_NEXT_LOC (ref))
950     if (!DF_REF_REG_MEM_P (ref))
951       if (rtx *vreg = defs_map.get (regno_reg_rtx[DF_REF_REGNO (ref)]))
952 	{
953 	  /* Also update a corresponding REG_DEAD note.  */
954 	  rtx note = find_reg_note (insn, REG_DEAD, DF_REF_REG (ref));
955 	  if (note)
956 	    XEXP (note, 0) = *vreg;
957 	  *DF_REF_REAL_LOC (ref) = *vreg;
958 	}
959 
960   rtx def_set = single_set (insn);
961   rtx src = SET_SRC (def_set);
962   rtx dst = SET_DEST (def_set);
963   rtx subreg;
964 
965   if (MEM_P (dst) && !REG_P (src))
966     {
967       /* There are no scalar integer instructions and therefore
968 	 temporary register usage is required.  */
969       rtx tmp = gen_reg_rtx (smode);
970       emit_conversion_insns (gen_move_insn (dst, tmp), insn);
971       dst = gen_rtx_SUBREG (vmode, tmp, 0);
972     }
973   else if (REG_P (dst))
974     {
975       /* Replace the definition with a SUBREG to the definition we
976          use inside the chain.  */
977       rtx *vdef = defs_map.get (dst);
978       if (vdef)
979 	dst = *vdef;
980       dst = gen_rtx_SUBREG (vmode, dst, 0);
981       /* IRA doesn't like to have REG_EQUAL/EQUIV notes when the SET_DEST
982          is a non-REG_P.  So kill those off.  */
983       rtx note = find_reg_equal_equiv_note (insn);
984       if (note)
985 	remove_note (insn, note);
986     }
987 
988   switch (GET_CODE (src))
989     {
990     case PLUS:
991     case MINUS:
992     case IOR:
993     case XOR:
994     case AND:
995     case SMAX:
996     case SMIN:
997     case UMAX:
998     case UMIN:
999       convert_op (&XEXP (src, 1), insn);
1000       /* FALLTHRU */
1001 
1002     case ABS:
1003     case ASHIFT:
1004     case ASHIFTRT:
1005     case LSHIFTRT:
1006       convert_op (&XEXP (src, 0), insn);
1007       PUT_MODE (src, vmode);
1008       break;
1009 
1010     case NEG:
1011       src = XEXP (src, 0);
1012       convert_op (&src, insn);
1013       subreg = gen_reg_rtx (vmode);
1014       emit_insn_before (gen_move_insn (subreg, CONST0_RTX (vmode)), insn);
1015       src = gen_rtx_MINUS (vmode, subreg, src);
1016       break;
1017 
1018     case NOT:
1019       src = XEXP (src, 0);
1020       convert_op (&src, insn);
1021       subreg = gen_reg_rtx (vmode);
1022       emit_insn_before (gen_move_insn (subreg, CONSTM1_RTX (vmode)), insn);
1023       src = gen_rtx_XOR (vmode, src, subreg);
1024       break;
1025 
1026     case MEM:
1027       if (!REG_P (dst))
1028 	convert_op (&src, insn);
1029       break;
1030 
1031     case REG:
1032       if (!MEM_P (dst))
1033 	convert_op (&src, insn);
1034       break;
1035 
1036     case SUBREG:
1037       gcc_assert (GET_MODE (src) == vmode);
1038       break;
1039 
1040     case COMPARE:
1041       src = SUBREG_REG (XEXP (XEXP (src, 0), 0));
1042 
1043       gcc_assert (REG_P (src) && GET_MODE (src) == DImode);
1044       subreg = gen_rtx_SUBREG (V2DImode, src, 0);
1045       emit_insn_before (gen_vec_interleave_lowv2di (copy_rtx_if_shared (subreg),
1046 						    copy_rtx_if_shared (subreg),
1047 						    copy_rtx_if_shared (subreg)),
1048 			insn);
1049       dst = gen_rtx_REG (CCmode, FLAGS_REG);
1050       src = gen_rtx_UNSPEC (CCmode, gen_rtvec (2, copy_rtx_if_shared (subreg),
1051 					       copy_rtx_if_shared (subreg)),
1052 			    UNSPEC_PTEST);
1053       break;
1054 
1055     case CONST_INT:
1056       convert_op (&src, insn);
1057       break;
1058 
1059     default:
1060       gcc_unreachable ();
1061     }
1062 
1063   SET_SRC (def_set) = src;
1064   SET_DEST (def_set) = dst;
1065 
1066   /* Drop possible dead definitions.  */
1067   PATTERN (insn) = def_set;
1068 
1069   INSN_CODE (insn) = -1;
1070   int patt = recog_memoized (insn);
1071   if  (patt == -1)
1072     fatal_insn_not_found (insn);
1073   df_insn_rescan (insn);
1074 }
1075 
1076 /* Fix uses of converted REG in debug insns.  */
1077 
1078 void
fix_debug_reg_uses(rtx reg)1079 timode_scalar_chain::fix_debug_reg_uses (rtx reg)
1080 {
1081   if (!flag_var_tracking)
1082     return;
1083 
1084   df_ref ref, next;
1085   for (ref = DF_REG_USE_CHAIN (REGNO (reg)); ref; ref = next)
1086     {
1087       rtx_insn *insn = DF_REF_INSN (ref);
1088       /* Make sure the next ref is for a different instruction,
1089          so that we're not affected by the rescan.  */
1090       next = DF_REF_NEXT_REG (ref);
1091       while (next && DF_REF_INSN (next) == insn)
1092 	next = DF_REF_NEXT_REG (next);
1093 
1094       if (DEBUG_INSN_P (insn))
1095 	{
1096 	  /* It may be a debug insn with a TImode variable in
1097 	     register.  */
1098 	  bool changed = false;
1099 	  for (; ref != next; ref = DF_REF_NEXT_REG (ref))
1100 	    {
1101 	      rtx *loc = DF_REF_LOC (ref);
1102 	      if (REG_P (*loc) && GET_MODE (*loc) == V1TImode)
1103 		{
1104 		  *loc = gen_rtx_SUBREG (TImode, *loc, 0);
1105 		  changed = true;
1106 		}
1107 	    }
1108 	  if (changed)
1109 	    df_insn_rescan (insn);
1110 	}
1111     }
1112 }
1113 
1114 /* Convert INSN from TImode to V1T1mode.  */
1115 
1116 void
convert_insn(rtx_insn * insn)1117 timode_scalar_chain::convert_insn (rtx_insn *insn)
1118 {
1119   rtx def_set = single_set (insn);
1120   rtx src = SET_SRC (def_set);
1121   rtx dst = SET_DEST (def_set);
1122 
1123   switch (GET_CODE (dst))
1124     {
1125     case REG:
1126       {
1127 	rtx tmp = find_reg_equal_equiv_note (insn);
1128 	if (tmp)
1129 	  PUT_MODE (XEXP (tmp, 0), V1TImode);
1130 	PUT_MODE (dst, V1TImode);
1131 	fix_debug_reg_uses (dst);
1132       }
1133       break;
1134     case MEM:
1135       PUT_MODE (dst, V1TImode);
1136       break;
1137 
1138     default:
1139       gcc_unreachable ();
1140     }
1141 
1142   switch (GET_CODE (src))
1143     {
1144     case REG:
1145       PUT_MODE (src, V1TImode);
1146       /* Call fix_debug_reg_uses only if SRC is never defined.  */
1147       if (!DF_REG_DEF_CHAIN (REGNO (src)))
1148 	fix_debug_reg_uses (src);
1149       break;
1150 
1151     case MEM:
1152       PUT_MODE (src, V1TImode);
1153       break;
1154 
1155     case CONST_WIDE_INT:
1156       if (NONDEBUG_INSN_P (insn))
1157 	{
1158 	  /* Since there are no instructions to store 128-bit constant,
1159 	     temporary register usage is required.  */
1160 	  rtx tmp = gen_reg_rtx (V1TImode);
1161 	  start_sequence ();
1162 	  src = gen_rtx_CONST_VECTOR (V1TImode, gen_rtvec (1, src));
1163 	  src = validize_mem (force_const_mem (V1TImode, src));
1164 	  rtx_insn *seq = get_insns ();
1165 	  end_sequence ();
1166 	  if (seq)
1167 	    emit_insn_before (seq, insn);
1168 	  emit_conversion_insns (gen_rtx_SET (dst, tmp), insn);
1169 	  dst = tmp;
1170 	}
1171       break;
1172 
1173     case CONST_INT:
1174       switch (standard_sse_constant_p (src, TImode))
1175 	{
1176 	case 1:
1177 	  src = CONST0_RTX (GET_MODE (dst));
1178 	  break;
1179 	case 2:
1180 	  src = CONSTM1_RTX (GET_MODE (dst));
1181 	  break;
1182 	default:
1183 	  gcc_unreachable ();
1184 	}
1185       if (NONDEBUG_INSN_P (insn))
1186 	{
1187 	  rtx tmp = gen_reg_rtx (V1TImode);
1188 	  /* Since there are no instructions to store standard SSE
1189 	     constant, temporary register usage is required.  */
1190 	  emit_conversion_insns (gen_rtx_SET (dst, tmp), insn);
1191 	  dst = tmp;
1192 	}
1193       break;
1194 
1195     default:
1196       gcc_unreachable ();
1197     }
1198 
1199   SET_SRC (def_set) = src;
1200   SET_DEST (def_set) = dst;
1201 
1202   /* Drop possible dead definitions.  */
1203   PATTERN (insn) = def_set;
1204 
1205   INSN_CODE (insn) = -1;
1206   recog_memoized (insn);
1207   df_insn_rescan (insn);
1208 }
1209 
1210 /* Generate copies from defs used by the chain but not defined therein.
1211    Also populates defs_map which is used later by convert_insn.  */
1212 
1213 void
convert_registers()1214 general_scalar_chain::convert_registers ()
1215 {
1216   bitmap_iterator bi;
1217   unsigned id;
1218   EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi)
1219     {
1220       rtx chain_reg = gen_reg_rtx (smode);
1221       defs_map.put (regno_reg_rtx[id], chain_reg);
1222     }
1223   EXECUTE_IF_SET_IN_BITMAP (insns_conv, 0, id, bi)
1224     for (df_ref ref = DF_INSN_UID_DEFS (id); ref; ref = DF_REF_NEXT_LOC (ref))
1225       if (bitmap_bit_p (defs_conv, DF_REF_REGNO (ref)))
1226 	make_vector_copies (DF_REF_INSN (ref), DF_REF_REAL_REG (ref));
1227 }
1228 
1229 /* Convert whole chain creating required register
1230    conversions and copies.  */
1231 
1232 int
convert()1233 scalar_chain::convert ()
1234 {
1235   bitmap_iterator bi;
1236   unsigned id;
1237   int converted_insns = 0;
1238 
1239   if (!dbg_cnt (stv_conversion))
1240     return 0;
1241 
1242   if (dump_file)
1243     fprintf (dump_file, "Converting chain #%d...\n", chain_id);
1244 
1245   convert_registers ();
1246 
1247   EXECUTE_IF_SET_IN_BITMAP (insns, 0, id, bi)
1248     {
1249       convert_insn (DF_INSN_UID_GET (id)->insn);
1250       converted_insns++;
1251     }
1252 
1253   return converted_insns;
1254 }
1255 
1256 /* Return the SET expression if INSN doesn't reference hard register.
1257    Return NULL if INSN uses or defines a hard register, excluding
1258    pseudo register pushes, hard register uses in a memory address,
1259    clobbers and flags definitions.  */
1260 
1261 static rtx
pseudo_reg_set(rtx_insn * insn)1262 pseudo_reg_set (rtx_insn *insn)
1263 {
1264   rtx set = single_set (insn);
1265   if (!set)
1266     return NULL;
1267 
1268   /* Check pseudo register push first. */
1269   machine_mode mode = TARGET_64BIT ? TImode : DImode;
1270   if (REG_P (SET_SRC (set))
1271       && !HARD_REGISTER_P (SET_SRC (set))
1272       && push_operand (SET_DEST (set), mode))
1273     return set;
1274 
1275   df_ref ref;
1276   FOR_EACH_INSN_DEF (ref, insn)
1277     if (HARD_REGISTER_P (DF_REF_REAL_REG (ref))
1278 	&& !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER)
1279 	&& DF_REF_REGNO (ref) != FLAGS_REG)
1280       return NULL;
1281 
1282   FOR_EACH_INSN_USE (ref, insn)
1283     if (!DF_REF_REG_MEM_P (ref) && HARD_REGISTER_P (DF_REF_REAL_REG (ref)))
1284       return NULL;
1285 
1286   return set;
1287 }
1288 
1289 /* Check if comparison INSN may be transformed
1290    into vector comparison.  Currently we transform
1291    zero checks only which look like:
1292 
1293    (set (reg:CCZ 17 flags)
1294         (compare:CCZ (ior:SI (subreg:SI (reg:DI x) 4)
1295                              (subreg:SI (reg:DI x) 0))
1296 		     (const_int 0 [0])))  */
1297 
1298 static bool
convertible_comparison_p(rtx_insn * insn,enum machine_mode mode)1299 convertible_comparison_p (rtx_insn *insn, enum machine_mode mode)
1300 {
1301   /* ??? Currently convertible for double-word DImode chain only.  */
1302   if (TARGET_64BIT || mode != DImode)
1303     return false;
1304 
1305   if (!TARGET_SSE4_1)
1306     return false;
1307 
1308   rtx def_set = single_set (insn);
1309 
1310   gcc_assert (def_set);
1311 
1312   rtx src = SET_SRC (def_set);
1313   rtx dst = SET_DEST (def_set);
1314 
1315   gcc_assert (GET_CODE (src) == COMPARE);
1316 
1317   if (GET_CODE (dst) != REG
1318       || REGNO (dst) != FLAGS_REG
1319       || GET_MODE (dst) != CCZmode)
1320     return false;
1321 
1322   rtx op1 = XEXP (src, 0);
1323   rtx op2 = XEXP (src, 1);
1324 
1325   if (op2 != CONST0_RTX (GET_MODE (op2)))
1326     return false;
1327 
1328   if (GET_CODE (op1) != IOR)
1329     return false;
1330 
1331   op2 = XEXP (op1, 1);
1332   op1 = XEXP (op1, 0);
1333 
1334   if (!SUBREG_P (op1)
1335       || !SUBREG_P (op2)
1336       || GET_MODE (op1) != SImode
1337       || GET_MODE (op2) != SImode
1338       || ((SUBREG_BYTE (op1) != 0
1339 	   || SUBREG_BYTE (op2) != GET_MODE_SIZE (SImode))
1340 	  && (SUBREG_BYTE (op2) != 0
1341 	      || SUBREG_BYTE (op1) != GET_MODE_SIZE (SImode))))
1342     return false;
1343 
1344   op1 = SUBREG_REG (op1);
1345   op2 = SUBREG_REG (op2);
1346 
1347   if (op1 != op2
1348       || !REG_P (op1)
1349       || GET_MODE (op1) != DImode)
1350     return false;
1351 
1352   return true;
1353 }
1354 
1355 /* The general version of scalar_to_vector_candidate_p.  */
1356 
1357 static bool
general_scalar_to_vector_candidate_p(rtx_insn * insn,enum machine_mode mode)1358 general_scalar_to_vector_candidate_p (rtx_insn *insn, enum machine_mode mode)
1359 {
1360   rtx def_set = pseudo_reg_set (insn);
1361 
1362   if (!def_set)
1363     return false;
1364 
1365   rtx src = SET_SRC (def_set);
1366   rtx dst = SET_DEST (def_set);
1367 
1368   if (GET_CODE (src) == COMPARE)
1369     return convertible_comparison_p (insn, mode);
1370 
1371   /* We are interested in "mode" only.  */
1372   if ((GET_MODE (src) != mode
1373        && !CONST_INT_P (src))
1374       || GET_MODE (dst) != mode)
1375     return false;
1376 
1377   if (!REG_P (dst) && !MEM_P (dst))
1378     return false;
1379 
1380   switch (GET_CODE (src))
1381     {
1382     case ASHIFTRT:
1383       if (!TARGET_AVX512VL)
1384 	return false;
1385       /* FALLTHRU */
1386 
1387     case ASHIFT:
1388     case LSHIFTRT:
1389       if (!CONST_INT_P (XEXP (src, 1))
1390 	  || !IN_RANGE (INTVAL (XEXP (src, 1)), 0, GET_MODE_BITSIZE (mode)-1))
1391 	return false;
1392       break;
1393 
1394     case SMAX:
1395     case SMIN:
1396     case UMAX:
1397     case UMIN:
1398       if ((mode == DImode && !TARGET_AVX512VL)
1399 	  || (mode == SImode && !TARGET_SSE4_1))
1400 	return false;
1401       /* Fallthru.  */
1402 
1403     case PLUS:
1404     case MINUS:
1405     case IOR:
1406     case XOR:
1407     case AND:
1408       if (!REG_P (XEXP (src, 1))
1409 	  && !MEM_P (XEXP (src, 1))
1410 	  && !CONST_INT_P (XEXP (src, 1)))
1411 	return false;
1412 
1413       if (GET_MODE (XEXP (src, 1)) != mode
1414 	  && !CONST_INT_P (XEXP (src, 1)))
1415 	return false;
1416       break;
1417 
1418     case ABS:
1419       if ((mode == DImode && !TARGET_AVX512VL)
1420 	  || (mode == SImode && !TARGET_SSSE3))
1421 	return false;
1422       break;
1423 
1424     case NEG:
1425     case NOT:
1426       break;
1427 
1428     case REG:
1429       return true;
1430 
1431     case MEM:
1432     case CONST_INT:
1433       return REG_P (dst);
1434 
1435     default:
1436       return false;
1437     }
1438 
1439   if (!REG_P (XEXP (src, 0))
1440       && !MEM_P (XEXP (src, 0))
1441       && !CONST_INT_P (XEXP (src, 0))
1442       /* Check for andnot case.  */
1443       && (GET_CODE (src) != AND
1444 	  || GET_CODE (XEXP (src, 0)) != NOT
1445 	  || !REG_P (XEXP (XEXP (src, 0), 0))))
1446       return false;
1447 
1448   if (GET_MODE (XEXP (src, 0)) != mode
1449       && !CONST_INT_P (XEXP (src, 0)))
1450     return false;
1451 
1452   return true;
1453 }
1454 
1455 /* The TImode version of scalar_to_vector_candidate_p.  */
1456 
1457 static bool
timode_scalar_to_vector_candidate_p(rtx_insn * insn)1458 timode_scalar_to_vector_candidate_p (rtx_insn *insn)
1459 {
1460   rtx def_set = pseudo_reg_set (insn);
1461 
1462   if (!def_set)
1463     return false;
1464 
1465   rtx src = SET_SRC (def_set);
1466   rtx dst = SET_DEST (def_set);
1467 
1468   /* Only TImode load and store are allowed.  */
1469   if (GET_MODE (dst) != TImode)
1470     return false;
1471 
1472   if (MEM_P (dst))
1473     {
1474       /* Check for store.  Memory must be aligned or unaligned store
1475 	 is optimal.  Only support store from register, standard SSE
1476 	 constant or CONST_WIDE_INT generated from piecewise store.
1477 
1478 	 ??? Verify performance impact before enabling CONST_INT for
1479 	 __int128 store.  */
1480       if (misaligned_operand (dst, TImode)
1481 	  && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
1482 	return false;
1483 
1484       switch (GET_CODE (src))
1485 	{
1486 	default:
1487 	  return false;
1488 
1489 	case REG:
1490 	case CONST_WIDE_INT:
1491 	  return true;
1492 
1493 	case CONST_INT:
1494 	  return standard_sse_constant_p (src, TImode);
1495 	}
1496     }
1497   else if (MEM_P (src))
1498     {
1499       /* Check for load.  Memory must be aligned or unaligned load is
1500 	 optimal.  */
1501       return (REG_P (dst)
1502 	      && (!misaligned_operand (src, TImode)
1503 		  || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL));
1504     }
1505 
1506   return false;
1507 }
1508 
1509 /* For a register REGNO, scan instructions for its defs and uses.
1510    Put REGNO in REGS if a def or use isn't in CANDIDATES.  */
1511 
1512 static void
timode_check_non_convertible_regs(bitmap candidates,bitmap regs,unsigned int regno)1513 timode_check_non_convertible_regs (bitmap candidates, bitmap regs,
1514 				   unsigned int regno)
1515 {
1516   for (df_ref def = DF_REG_DEF_CHAIN (regno);
1517        def;
1518        def = DF_REF_NEXT_REG (def))
1519     {
1520       if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1521 	{
1522 	  if (dump_file)
1523 	    fprintf (dump_file,
1524 		     "r%d has non convertible def in insn %d\n",
1525 		     regno, DF_REF_INSN_UID (def));
1526 
1527 	  bitmap_set_bit (regs, regno);
1528 	  break;
1529 	}
1530     }
1531 
1532   for (df_ref ref = DF_REG_USE_CHAIN (regno);
1533        ref;
1534        ref = DF_REF_NEXT_REG (ref))
1535     {
1536       /* Debug instructions are skipped.  */
1537       if (NONDEBUG_INSN_P (DF_REF_INSN (ref))
1538 	  && !bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
1539 	{
1540 	  if (dump_file)
1541 	    fprintf (dump_file,
1542 		     "r%d has non convertible use in insn %d\n",
1543 		     regno, DF_REF_INSN_UID (ref));
1544 
1545 	  bitmap_set_bit (regs, regno);
1546 	  break;
1547 	}
1548     }
1549 }
1550 
1551 /* The TImode version of remove_non_convertible_regs.  */
1552 
1553 static void
timode_remove_non_convertible_regs(bitmap candidates)1554 timode_remove_non_convertible_regs (bitmap candidates)
1555 {
1556   bitmap_iterator bi;
1557   unsigned id;
1558   bitmap regs = BITMAP_ALLOC (NULL);
1559 
1560   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi)
1561     {
1562       rtx def_set = single_set (DF_INSN_UID_GET (id)->insn);
1563       rtx dest = SET_DEST (def_set);
1564       rtx src = SET_SRC (def_set);
1565 
1566       if ((!REG_P (dest)
1567 	   || bitmap_bit_p (regs, REGNO (dest))
1568 	   || HARD_REGISTER_P (dest))
1569 	  && (!REG_P (src)
1570 	      || bitmap_bit_p (regs, REGNO (src))
1571 	      || HARD_REGISTER_P (src)))
1572 	continue;
1573 
1574       if (REG_P (dest))
1575 	timode_check_non_convertible_regs (candidates, regs,
1576 					   REGNO (dest));
1577 
1578       if (REG_P (src))
1579 	timode_check_non_convertible_regs (candidates, regs,
1580 					   REGNO (src));
1581     }
1582 
1583   EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi)
1584     {
1585       for (df_ref def = DF_REG_DEF_CHAIN (id);
1586 	   def;
1587 	   def = DF_REF_NEXT_REG (def))
1588 	if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1589 	  {
1590 	    if (dump_file)
1591 	      fprintf (dump_file, "Removing insn %d from candidates list\n",
1592 		       DF_REF_INSN_UID (def));
1593 
1594 	    bitmap_clear_bit (candidates, DF_REF_INSN_UID (def));
1595 	  }
1596 
1597       for (df_ref ref = DF_REG_USE_CHAIN (id);
1598 	   ref;
1599 	   ref = DF_REF_NEXT_REG (ref))
1600 	if (bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
1601 	  {
1602 	    if (dump_file)
1603 	      fprintf (dump_file, "Removing insn %d from candidates list\n",
1604 		       DF_REF_INSN_UID (ref));
1605 
1606 	    bitmap_clear_bit (candidates, DF_REF_INSN_UID (ref));
1607 	  }
1608     }
1609 
1610   BITMAP_FREE (regs);
1611 }
1612 
1613 /* Main STV pass function.  Find and convert scalar
1614    instructions into vector mode when profitable.  */
1615 
1616 static unsigned int
convert_scalars_to_vector(bool timode_p)1617 convert_scalars_to_vector (bool timode_p)
1618 {
1619   basic_block bb;
1620   int converted_insns = 0;
1621 
1622   bitmap_obstack_initialize (NULL);
1623   const machine_mode cand_mode[3] = { SImode, DImode, TImode };
1624   const machine_mode cand_vmode[3] = { V4SImode, V2DImode, V1TImode };
1625   bitmap_head candidates[3];  /* { SImode, DImode, TImode } */
1626   for (unsigned i = 0; i < 3; ++i)
1627     bitmap_initialize (&candidates[i], &bitmap_default_obstack);
1628 
1629   calculate_dominance_info (CDI_DOMINATORS);
1630   df_set_flags (DF_DEFER_INSN_RESCAN | DF_RD_PRUNE_DEAD_DEFS);
1631   df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
1632   df_analyze ();
1633 
1634   /* Find all instructions we want to convert into vector mode.  */
1635   if (dump_file)
1636     fprintf (dump_file, "Searching for mode conversion candidates...\n");
1637 
1638   FOR_EACH_BB_FN (bb, cfun)
1639     {
1640       rtx_insn *insn;
1641       FOR_BB_INSNS (bb, insn)
1642 	if (timode_p
1643 	    && timode_scalar_to_vector_candidate_p (insn))
1644 	  {
1645 	    if (dump_file)
1646 	      fprintf (dump_file, "  insn %d is marked as a TImode candidate\n",
1647 		       INSN_UID (insn));
1648 
1649 	    bitmap_set_bit (&candidates[2], INSN_UID (insn));
1650 	  }
1651 	else if (!timode_p)
1652 	  {
1653 	    /* Check {SI,DI}mode.  */
1654 	    for (unsigned i = 0; i <= 1; ++i)
1655 	      if (general_scalar_to_vector_candidate_p (insn, cand_mode[i]))
1656 		{
1657 		  if (dump_file)
1658 		    fprintf (dump_file, "  insn %d is marked as a %s candidate\n",
1659 			     INSN_UID (insn), i == 0 ? "SImode" : "DImode");
1660 
1661 		  bitmap_set_bit (&candidates[i], INSN_UID (insn));
1662 		  break;
1663 		}
1664 	  }
1665     }
1666 
1667   if (timode_p)
1668     timode_remove_non_convertible_regs (&candidates[2]);
1669 
1670   for (unsigned i = 0; i <= 2; ++i)
1671     if (!bitmap_empty_p (&candidates[i]))
1672       break;
1673     else if (i == 2 && dump_file)
1674       fprintf (dump_file, "There are no candidates for optimization.\n");
1675 
1676   for (unsigned i = 0; i <= 2; ++i)
1677     while (!bitmap_empty_p (&candidates[i]))
1678       {
1679 	unsigned uid = bitmap_first_set_bit (&candidates[i]);
1680 	scalar_chain *chain;
1681 
1682 	if (cand_mode[i] == TImode)
1683 	  chain = new timode_scalar_chain;
1684 	else
1685 	  chain = new general_scalar_chain (cand_mode[i], cand_vmode[i]);
1686 
1687 	/* Find instructions chain we want to convert to vector mode.
1688 	   Check all uses and definitions to estimate all required
1689 	   conversions.  */
1690 	chain->build (&candidates[i], uid);
1691 
1692 	if (chain->compute_convert_gain () > 0)
1693 	  converted_insns += chain->convert ();
1694 	else
1695 	  if (dump_file)
1696 	    fprintf (dump_file, "Chain #%d conversion is not profitable\n",
1697 		     chain->chain_id);
1698 
1699 	delete chain;
1700       }
1701 
1702   if (dump_file)
1703     fprintf (dump_file, "Total insns converted: %d\n", converted_insns);
1704 
1705   for (unsigned i = 0; i <= 2; ++i)
1706     bitmap_release (&candidates[i]);
1707   bitmap_obstack_release (NULL);
1708   df_process_deferred_rescans ();
1709 
1710   /* Conversion means we may have 128bit register spills/fills
1711      which require aligned stack.  */
1712   if (converted_insns)
1713     {
1714       if (crtl->stack_alignment_needed < 128)
1715 	crtl->stack_alignment_needed = 128;
1716       if (crtl->stack_alignment_estimated < 128)
1717 	crtl->stack_alignment_estimated = 128;
1718 
1719       crtl->stack_realign_needed
1720 	= INCOMING_STACK_BOUNDARY < crtl->stack_alignment_estimated;
1721       crtl->stack_realign_tried = crtl->stack_realign_needed;
1722 
1723       crtl->stack_realign_processed = true;
1724 
1725       if (!crtl->drap_reg)
1726 	{
1727 	  rtx drap_rtx = targetm.calls.get_drap_rtx ();
1728 
1729 	  /* stack_realign_drap and drap_rtx must match.  */
1730 	  gcc_assert ((stack_realign_drap != 0) == (drap_rtx != NULL));
1731 
1732 	  /* Do nothing if NULL is returned,
1733 	     which means DRAP is not needed.  */
1734 	  if (drap_rtx != NULL)
1735 	    {
1736 	      crtl->args.internal_arg_pointer = drap_rtx;
1737 
1738 	      /* Call fixup_tail_calls to clean up
1739 		 REG_EQUIV note if DRAP is needed. */
1740 	      fixup_tail_calls ();
1741 	    }
1742 	}
1743 
1744       /* Fix up DECL_RTL/DECL_INCOMING_RTL of arguments.  */
1745       if (TARGET_64BIT)
1746 	for (tree parm = DECL_ARGUMENTS (current_function_decl);
1747 	     parm; parm = DECL_CHAIN (parm))
1748 	  {
1749 	    if (TYPE_MODE (TREE_TYPE (parm)) != TImode)
1750 	      continue;
1751 	    if (DECL_RTL_SET_P (parm)
1752 		&& GET_MODE (DECL_RTL (parm)) == V1TImode)
1753 	      {
1754 		rtx r = DECL_RTL (parm);
1755 		if (REG_P (r))
1756 		  SET_DECL_RTL (parm, gen_rtx_SUBREG (TImode, r, 0));
1757 	      }
1758 	    if (DECL_INCOMING_RTL (parm)
1759 		&& GET_MODE (DECL_INCOMING_RTL (parm)) == V1TImode)
1760 	      {
1761 		rtx r = DECL_INCOMING_RTL (parm);
1762 		if (REG_P (r))
1763 		  DECL_INCOMING_RTL (parm) = gen_rtx_SUBREG (TImode, r, 0);
1764 	      }
1765 	  }
1766     }
1767 
1768   return 0;
1769 }
1770 
1771 /* Modify the vzeroupper pattern in INSN so that it describes the effect
1772    that the instruction has on the SSE registers.  LIVE_REGS are the set
1773    of registers that are live across the instruction.
1774 
1775    For a live register R we use:
1776 
1777      (set (reg:V2DF R) (reg:V2DF R))
1778 
1779    which preserves the low 128 bits but clobbers the upper bits.  */
1780 
1781 static void
ix86_add_reg_usage_to_vzeroupper(rtx_insn * insn,bitmap live_regs)1782 ix86_add_reg_usage_to_vzeroupper (rtx_insn *insn, bitmap live_regs)
1783 {
1784   rtx pattern = PATTERN (insn);
1785   unsigned int nregs = TARGET_64BIT ? 16 : 8;
1786   unsigned int npats = nregs;
1787   for (unsigned int i = 0; i < nregs; ++i)
1788     {
1789       unsigned int regno = GET_SSE_REGNO (i);
1790       if (!bitmap_bit_p (live_regs, regno))
1791 	npats--;
1792     }
1793   if (npats == 0)
1794     return;
1795   rtvec vec = rtvec_alloc (npats + 1);
1796   RTVEC_ELT (vec, 0) = XVECEXP (pattern, 0, 0);
1797   for (unsigned int i = 0, j = 0; i < nregs; ++i)
1798     {
1799       unsigned int regno = GET_SSE_REGNO (i);
1800       if (!bitmap_bit_p (live_regs, regno))
1801 	continue;
1802       rtx reg = gen_rtx_REG (V2DImode, regno);
1803       ++j;
1804       RTVEC_ELT (vec, j) = gen_rtx_SET (reg, reg);
1805     }
1806   XVEC (pattern, 0) = vec;
1807   INSN_CODE (insn) = -1;
1808   df_insn_rescan (insn);
1809 }
1810 
1811 /* Walk the vzeroupper instructions in the function and annotate them
1812    with the effect that they have on the SSE registers.  */
1813 
1814 static void
ix86_add_reg_usage_to_vzerouppers(void)1815 ix86_add_reg_usage_to_vzerouppers (void)
1816 {
1817   basic_block bb;
1818   rtx_insn *insn;
1819   auto_bitmap live_regs;
1820 
1821   df_analyze ();
1822   FOR_EACH_BB_FN (bb, cfun)
1823     {
1824       bitmap_copy (live_regs, df_get_live_out (bb));
1825       df_simulate_initialize_backwards (bb, live_regs);
1826       FOR_BB_INSNS_REVERSE (bb, insn)
1827 	{
1828 	  if (!NONDEBUG_INSN_P (insn))
1829 	    continue;
1830 	  if (vzeroupper_pattern (PATTERN (insn), VOIDmode))
1831 	    ix86_add_reg_usage_to_vzeroupper (insn, live_regs);
1832 	  df_simulate_one_insn_backwards (bb, insn, live_regs);
1833 	}
1834     }
1835 }
1836 
1837 static unsigned int
rest_of_handle_insert_vzeroupper(void)1838 rest_of_handle_insert_vzeroupper (void)
1839 {
1840   if (TARGET_VZEROUPPER
1841       && flag_expensive_optimizations
1842       && !optimize_size)
1843     {
1844       /* vzeroupper instructions are inserted immediately after reload to
1845 	 account for possible spills from 256bit or 512bit registers.  The pass
1846 	 reuses mode switching infrastructure by re-running mode insertion
1847 	 pass, so disable entities that have already been processed.  */
1848       for (int i = 0; i < MAX_386_ENTITIES; i++)
1849 	ix86_optimize_mode_switching[i] = 0;
1850 
1851       ix86_optimize_mode_switching[AVX_U128] = 1;
1852 
1853       /* Call optimize_mode_switching.  */
1854       g->get_passes ()->execute_pass_mode_switching ();
1855     }
1856   ix86_add_reg_usage_to_vzerouppers ();
1857   return 0;
1858 }
1859 
1860 namespace {
1861 
1862 const pass_data pass_data_insert_vzeroupper =
1863 {
1864   RTL_PASS, /* type */
1865   "vzeroupper", /* name */
1866   OPTGROUP_NONE, /* optinfo_flags */
1867   TV_MACH_DEP, /* tv_id */
1868   0, /* properties_required */
1869   0, /* properties_provided */
1870   0, /* properties_destroyed */
1871   0, /* todo_flags_start */
1872   TODO_df_finish, /* todo_flags_finish */
1873 };
1874 
1875 class pass_insert_vzeroupper : public rtl_opt_pass
1876 {
1877 public:
pass_insert_vzeroupper(gcc::context * ctxt)1878   pass_insert_vzeroupper(gcc::context *ctxt)
1879     : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
1880   {}
1881 
1882   /* opt_pass methods: */
gate(function *)1883   virtual bool gate (function *)
1884     {
1885       return TARGET_AVX
1886 	     && ((TARGET_VZEROUPPER
1887 		  && flag_expensive_optimizations
1888 		  && !optimize_size)
1889 		 || cfun->machine->has_explicit_vzeroupper);
1890     }
1891 
execute(function *)1892   virtual unsigned int execute (function *)
1893     {
1894       return rest_of_handle_insert_vzeroupper ();
1895     }
1896 
1897 }; // class pass_insert_vzeroupper
1898 
1899 const pass_data pass_data_stv =
1900 {
1901   RTL_PASS, /* type */
1902   "stv", /* name */
1903   OPTGROUP_NONE, /* optinfo_flags */
1904   TV_MACH_DEP, /* tv_id */
1905   0, /* properties_required */
1906   0, /* properties_provided */
1907   0, /* properties_destroyed */
1908   0, /* todo_flags_start */
1909   TODO_df_finish, /* todo_flags_finish */
1910 };
1911 
1912 class pass_stv : public rtl_opt_pass
1913 {
1914 public:
pass_stv(gcc::context * ctxt)1915   pass_stv (gcc::context *ctxt)
1916     : rtl_opt_pass (pass_data_stv, ctxt),
1917       timode_p (false)
1918   {}
1919 
1920   /* opt_pass methods: */
gate(function *)1921   virtual bool gate (function *)
1922     {
1923       return ((!timode_p || TARGET_64BIT)
1924 	      && TARGET_STV && TARGET_SSE2 && optimize > 1);
1925     }
1926 
execute(function *)1927   virtual unsigned int execute (function *)
1928     {
1929       return convert_scalars_to_vector (timode_p);
1930     }
1931 
clone()1932   opt_pass *clone ()
1933     {
1934       return new pass_stv (m_ctxt);
1935     }
1936 
set_pass_param(unsigned int n,bool param)1937   void set_pass_param (unsigned int n, bool param)
1938     {
1939       gcc_assert (n == 0);
1940       timode_p = param;
1941     }
1942 
1943 private:
1944   bool timode_p;
1945 }; // class pass_stv
1946 
1947 } // anon namespace
1948 
1949 rtl_opt_pass *
make_pass_insert_vzeroupper(gcc::context * ctxt)1950 make_pass_insert_vzeroupper (gcc::context *ctxt)
1951 {
1952   return new pass_insert_vzeroupper (ctxt);
1953 }
1954 
1955 rtl_opt_pass *
make_pass_stv(gcc::context * ctxt)1956 make_pass_stv (gcc::context *ctxt)
1957 {
1958   return new pass_stv (ctxt);
1959 }
1960 
1961 /* Inserting ENDBR and pseudo patchable-area instructions.  */
1962 
1963 static void
rest_of_insert_endbr_and_patchable_area(bool need_endbr,unsigned int patchable_area_size)1964 rest_of_insert_endbr_and_patchable_area (bool need_endbr,
1965 					 unsigned int patchable_area_size)
1966 {
1967   rtx endbr;
1968   rtx_insn *insn;
1969   rtx_insn *endbr_insn = NULL;
1970   basic_block bb;
1971 
1972   if (need_endbr)
1973     {
1974       /* Currently emit EB if it's a tracking function, i.e. 'nocf_check'
1975 	 is absent among function attributes.  Later an optimization will
1976 	 be introduced to make analysis if an address of a static function
1977 	 is taken.  A static function whose address is not taken will get
1978 	 a nocf_check attribute.  This will allow to reduce the number of
1979 	 EB.  */
1980       if (!lookup_attribute ("nocf_check",
1981 			     TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
1982 	  && (!flag_manual_endbr
1983 	      || lookup_attribute ("cf_check",
1984 				   DECL_ATTRIBUTES (cfun->decl)))
1985 	  && (!cgraph_node::get (cfun->decl)->only_called_directly_p ()
1986 	      || ix86_cmodel == CM_LARGE
1987 	      || ix86_cmodel == CM_LARGE_PIC
1988 	      || flag_force_indirect_call
1989 	      || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
1990 		  && DECL_DLLIMPORT_P (cfun->decl))))
1991 	{
1992 	  if (crtl->profile && flag_fentry)
1993 	    {
1994 	      /* Queue ENDBR insertion to x86_function_profiler.
1995 		 NB: Any patchable-area insn will be inserted after
1996 		 ENDBR.  */
1997 	      cfun->machine->insn_queued_at_entrance = TYPE_ENDBR;
1998 	    }
1999 	  else
2000 	    {
2001 	      endbr = gen_nop_endbr ();
2002 	      bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
2003 	      rtx_insn *insn = BB_HEAD (bb);
2004 	      endbr_insn = emit_insn_before (endbr, insn);
2005 	    }
2006 	}
2007     }
2008 
2009   if (patchable_area_size)
2010     {
2011       if (crtl->profile && flag_fentry)
2012 	{
2013 	  /* Queue patchable-area insertion to x86_function_profiler.
2014 	     NB: If there is a queued ENDBR, x86_function_profiler
2015 	     will also handle patchable-area.  */
2016 	  if (!cfun->machine->insn_queued_at_entrance)
2017 	    cfun->machine->insn_queued_at_entrance = TYPE_PATCHABLE_AREA;
2018 	}
2019       else
2020 	{
2021 	  rtx patchable_area
2022 	    = gen_patchable_area (GEN_INT (patchable_area_size),
2023 				  GEN_INT (crtl->patch_area_entry == 0));
2024 	  if (endbr_insn)
2025 	    emit_insn_after (patchable_area, endbr_insn);
2026 	  else
2027 	    {
2028 	      bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
2029 	      insn = BB_HEAD (bb);
2030 	      emit_insn_before (patchable_area, insn);
2031 	    }
2032 	}
2033     }
2034 
2035   if (!need_endbr)
2036     return;
2037 
2038   bb = 0;
2039   FOR_EACH_BB_FN (bb, cfun)
2040     {
2041       for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
2042 	   insn = NEXT_INSN (insn))
2043 	{
2044 	  if (CALL_P (insn))
2045 	    {
2046 	      need_endbr = find_reg_note (insn, REG_SETJMP, NULL) != NULL;
2047 	      if (!need_endbr && !SIBLING_CALL_P (insn))
2048 		{
2049 		  rtx call = get_call_rtx_from (insn);
2050 		  rtx fnaddr = XEXP (call, 0);
2051 		  tree fndecl = NULL_TREE;
2052 
2053 		  /* Also generate ENDBRANCH for non-tail call which
2054 		     may return via indirect branch.  */
2055 		  if (GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
2056 		    fndecl = SYMBOL_REF_DECL (XEXP (fnaddr, 0));
2057 		  if (fndecl == NULL_TREE)
2058 		    fndecl = MEM_EXPR (fnaddr);
2059 		  if (fndecl
2060 		      && TREE_CODE (TREE_TYPE (fndecl)) != FUNCTION_TYPE
2061 		      && TREE_CODE (TREE_TYPE (fndecl)) != METHOD_TYPE)
2062 		    fndecl = NULL_TREE;
2063 		  if (fndecl && TYPE_ARG_TYPES (TREE_TYPE (fndecl)))
2064 		    {
2065 		      tree fntype = TREE_TYPE (fndecl);
2066 		      if (lookup_attribute ("indirect_return",
2067 					    TYPE_ATTRIBUTES (fntype)))
2068 			need_endbr = true;
2069 		    }
2070 		}
2071 	      if (!need_endbr)
2072 		continue;
2073 	      /* Generate ENDBRANCH after CALL, which can return more than
2074 		 twice, setjmp-like functions.  */
2075 
2076 	      endbr = gen_nop_endbr ();
2077 	      emit_insn_after_setloc (endbr, insn, INSN_LOCATION (insn));
2078 	      continue;
2079 	    }
2080 
2081 	  if (JUMP_P (insn) && flag_cet_switch)
2082 	    {
2083 	      rtx target = JUMP_LABEL (insn);
2084 	      if (target == NULL_RTX || ANY_RETURN_P (target))
2085 		continue;
2086 
2087 	      /* Check the jump is a switch table.  */
2088 	      rtx_insn *label = as_a<rtx_insn *> (target);
2089 	      rtx_insn *table = next_insn (label);
2090 	      if (table == NULL_RTX || !JUMP_TABLE_DATA_P (table))
2091 		continue;
2092 
2093 	      /* For the indirect jump find out all places it jumps and insert
2094 		 ENDBRANCH there.  It should be done under a special flag to
2095 		 control ENDBRANCH generation for switch stmts.  */
2096 	      edge_iterator ei;
2097 	      edge e;
2098 	      basic_block dest_blk;
2099 
2100 	      FOR_EACH_EDGE (e, ei, bb->succs)
2101 		{
2102 		  rtx_insn *insn;
2103 
2104 		  dest_blk = e->dest;
2105 		  insn = BB_HEAD (dest_blk);
2106 		  gcc_assert (LABEL_P (insn));
2107 		  endbr = gen_nop_endbr ();
2108 		  emit_insn_after (endbr, insn);
2109 		}
2110 	      continue;
2111 	    }
2112 
2113 	  if (LABEL_P (insn) && LABEL_PRESERVE_P (insn))
2114 	    {
2115 	      endbr = gen_nop_endbr ();
2116 	      emit_insn_after (endbr, insn);
2117 	      continue;
2118 	    }
2119 	}
2120     }
2121 
2122   return;
2123 }
2124 
2125 namespace {
2126 
2127 const pass_data pass_data_insert_endbr_and_patchable_area =
2128 {
2129   RTL_PASS, /* type.  */
2130   "endbr_and_patchable_area", /* name.  */
2131   OPTGROUP_NONE, /* optinfo_flags.  */
2132   TV_MACH_DEP, /* tv_id.  */
2133   0, /* properties_required.  */
2134   0, /* properties_provided.  */
2135   0, /* properties_destroyed.  */
2136   0, /* todo_flags_start.  */
2137   0, /* todo_flags_finish.  */
2138 };
2139 
2140 class pass_insert_endbr_and_patchable_area : public rtl_opt_pass
2141 {
2142 public:
pass_insert_endbr_and_patchable_area(gcc::context * ctxt)2143   pass_insert_endbr_and_patchable_area (gcc::context *ctxt)
2144     : rtl_opt_pass (pass_data_insert_endbr_and_patchable_area, ctxt)
2145   {}
2146 
2147   /* opt_pass methods: */
gate(function *)2148   virtual bool gate (function *)
2149     {
2150       need_endbr = (flag_cf_protection & CF_BRANCH) != 0;
2151       patchable_area_size = crtl->patch_area_size - crtl->patch_area_entry;
2152       return need_endbr || patchable_area_size;
2153     }
2154 
execute(function *)2155   virtual unsigned int execute (function *)
2156     {
2157       timevar_push (TV_MACH_DEP);
2158       rest_of_insert_endbr_and_patchable_area (need_endbr,
2159 					       patchable_area_size);
2160       timevar_pop (TV_MACH_DEP);
2161       return 0;
2162     }
2163 
2164 private:
2165   bool need_endbr;
2166   unsigned int patchable_area_size;
2167 }; // class pass_insert_endbr_and_patchable_area
2168 
2169 } // anon namespace
2170 
2171 rtl_opt_pass *
make_pass_insert_endbr_and_patchable_area(gcc::context * ctxt)2172 make_pass_insert_endbr_and_patchable_area (gcc::context *ctxt)
2173 {
2174   return new pass_insert_endbr_and_patchable_area (ctxt);
2175 }
2176 
2177 /* Replace all one-value const vector that are referenced by SYMBOL_REFs in x
2178    with embedded broadcast. i.e.transform
2179 
2180      vpaddq .LC0(%rip), %zmm0, %zmm0
2181      ret
2182   .LC0:
2183     .quad 3
2184     .quad 3
2185     .quad 3
2186     .quad 3
2187     .quad 3
2188     .quad 3
2189     .quad 3
2190     .quad 3
2191 
2192     to
2193 
2194      vpaddq .LC0(%rip){1to8}, %zmm0, %zmm0
2195      ret
2196   .LC0:
2197     .quad 3  */
2198 static void
replace_constant_pool_with_broadcast(rtx_insn * insn)2199 replace_constant_pool_with_broadcast (rtx_insn *insn)
2200 {
2201   subrtx_ptr_iterator::array_type array;
2202   FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), ALL)
2203     {
2204       rtx *loc = *iter;
2205       rtx x = *loc;
2206       rtx broadcast_mem, vec_dup, constant, first;
2207       machine_mode mode;
2208 
2209       /* Constant pool.  */
2210       if (!MEM_P (x)
2211 	  || !SYMBOL_REF_P (XEXP (x, 0))
2212 	  || !CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)))
2213 	continue;
2214 
2215       /* Const vector.  */
2216       mode = GET_MODE (x);
2217       if (!VECTOR_MODE_P (mode))
2218 	return;
2219       constant = get_pool_constant (XEXP (x, 0));
2220       if (GET_CODE (constant) != CONST_VECTOR)
2221 	return;
2222 
2223       /* There could be some rtx like
2224 	 (mem/u/c:V16QI (symbol_ref/u:DI ("*.LC1")))
2225 	 but with "*.LC1" refer to V2DI constant vector.  */
2226       if (GET_MODE (constant) != mode)
2227 	{
2228 	  constant = simplify_subreg (mode, constant, GET_MODE (constant), 0);
2229 	  if (constant == NULL_RTX || GET_CODE (constant) != CONST_VECTOR)
2230 	    return;
2231 	}
2232       first = XVECEXP (constant, 0, 0);
2233 
2234       for (int i = 1; i < GET_MODE_NUNITS (mode); ++i)
2235 	{
2236 	  rtx tmp = XVECEXP (constant, 0, i);
2237 	  /* Vector duplicate value.  */
2238 	  if (!rtx_equal_p (tmp, first))
2239 	    return;
2240 	}
2241 
2242       /* Replace with embedded broadcast.  */
2243       broadcast_mem = force_const_mem (GET_MODE_INNER (mode), first);
2244       vec_dup = gen_rtx_VEC_DUPLICATE (mode, broadcast_mem);
2245       validate_change (insn, loc, vec_dup, 0);
2246 
2247       /* At most 1 memory_operand in an insn.  */
2248       return;
2249     }
2250 }
2251 
2252 /* At entry of the nearest common dominator for basic blocks with
2253    conversions, generate a single
2254 	vxorps %xmmN, %xmmN, %xmmN
2255    for all
2256 	vcvtss2sd  op, %xmmN, %xmmX
2257 	vcvtsd2ss  op, %xmmN, %xmmX
2258 	vcvtsi2ss  op, %xmmN, %xmmX
2259 	vcvtsi2sd  op, %xmmN, %xmmX
2260 
2261    NB: We want to generate only a single vxorps to cover the whole
2262    function.  The LCM algorithm isn't appropriate here since it may
2263    place a vxorps inside the loop.  */
2264 
2265 static unsigned int
remove_partial_avx_dependency(void)2266 remove_partial_avx_dependency (void)
2267 {
2268   timevar_push (TV_MACH_DEP);
2269 
2270   bitmap_obstack_initialize (NULL);
2271   bitmap convert_bbs = BITMAP_ALLOC (NULL);
2272 
2273   basic_block bb;
2274   rtx_insn *insn, *set_insn;
2275   rtx set;
2276   rtx v4sf_const0 = NULL_RTX;
2277 
2278   auto_vec<rtx_insn *> control_flow_insns;
2279 
2280   /* We create invalid RTL initially so defer rescans.  */
2281   df_set_flags (DF_DEFER_INSN_RESCAN);
2282 
2283   FOR_EACH_BB_FN (bb, cfun)
2284     {
2285       FOR_BB_INSNS (bb, insn)
2286 	{
2287 	  if (!NONDEBUG_INSN_P (insn))
2288 	    continue;
2289 
2290 	  /* Handle AVX512 embedded broadcast here to save compile time.  */
2291 	  if (TARGET_AVX512F)
2292 	    replace_constant_pool_with_broadcast (insn);
2293 
2294 	  set = single_set (insn);
2295 	  if (!set)
2296 	    continue;
2297 
2298 	  if (get_attr_avx_partial_xmm_update (insn)
2299 	      != AVX_PARTIAL_XMM_UPDATE_TRUE)
2300 	    continue;
2301 
2302 	  if (!v4sf_const0)
2303 	    v4sf_const0 = gen_reg_rtx (V4SFmode);
2304 
2305 	  /* Convert PARTIAL_XMM_UPDATE_TRUE insns, DF -> SF, SF -> DF,
2306 	     SI -> SF, SI -> DF, DI -> SF, DI -> DF, to vec_dup and
2307 	     vec_merge with subreg.  */
2308 	  rtx src = SET_SRC (set);
2309 	  rtx dest = SET_DEST (set);
2310 	  machine_mode dest_mode = GET_MODE (dest);
2311 
2312 	  rtx zero;
2313 	  machine_mode dest_vecmode;
2314 	  if (dest_mode == E_SFmode)
2315 	    {
2316 	      dest_vecmode = V4SFmode;
2317 	      zero = v4sf_const0;
2318 	    }
2319 	  else
2320 	    {
2321 	      dest_vecmode = V2DFmode;
2322 	      zero = gen_rtx_SUBREG (V2DFmode, v4sf_const0, 0);
2323 	    }
2324 
2325 	  /* Change source to vector mode.  */
2326 	  src = gen_rtx_VEC_DUPLICATE (dest_vecmode, src);
2327 	  src = gen_rtx_VEC_MERGE (dest_vecmode, src, zero,
2328 				   GEN_INT (HOST_WIDE_INT_1U));
2329 	  /* Change destination to vector mode.  */
2330 	  rtx vec = gen_reg_rtx (dest_vecmode);
2331 	  /* Generate an XMM vector SET.  */
2332 	  set = gen_rtx_SET (vec, src);
2333 	  set_insn = emit_insn_before (set, insn);
2334 	  df_insn_rescan (set_insn);
2335 
2336 	  if (cfun->can_throw_non_call_exceptions)
2337 	    {
2338 	      /* Handle REG_EH_REGION note.  */
2339 	      rtx note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
2340 	      if (note)
2341 		{
2342 		  control_flow_insns.safe_push (set_insn);
2343 		  add_reg_note (set_insn, REG_EH_REGION, XEXP (note, 0));
2344 		}
2345 	    }
2346 
2347 	  src = gen_rtx_SUBREG (dest_mode, vec, 0);
2348 	  set = gen_rtx_SET (dest, src);
2349 
2350 	  /* Drop possible dead definitions.  */
2351 	  PATTERN (insn) = set;
2352 
2353 	  INSN_CODE (insn) = -1;
2354 	  recog_memoized (insn);
2355 	  df_insn_rescan (insn);
2356 	  bitmap_set_bit (convert_bbs, bb->index);
2357 	}
2358     }
2359 
2360   if (v4sf_const0)
2361     {
2362       /* (Re-)discover loops so that bb->loop_father can be used in the
2363 	 analysis below.  */
2364       calculate_dominance_info (CDI_DOMINATORS);
2365       loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2366 
2367       /* Generate a vxorps at entry of the nearest dominator for basic
2368 	 blocks with conversions, which is in the fake loop that
2369 	 contains the whole function, so that there is only a single
2370 	 vxorps in the whole function.   */
2371       bb = nearest_common_dominator_for_set (CDI_DOMINATORS,
2372 					     convert_bbs);
2373       while (bb->loop_father->latch
2374 	     != EXIT_BLOCK_PTR_FOR_FN (cfun))
2375 	bb = get_immediate_dominator (CDI_DOMINATORS,
2376 				      bb->loop_father->header);
2377 
2378       set = gen_rtx_SET (v4sf_const0, CONST0_RTX (V4SFmode));
2379 
2380       insn = BB_HEAD (bb);
2381       while (insn && !NONDEBUG_INSN_P (insn))
2382 	{
2383 	  if (insn == BB_END (bb))
2384 	    {
2385 	      insn = NULL;
2386 	      break;
2387 	    }
2388 	  insn = NEXT_INSN (insn);
2389 	}
2390       if (insn == BB_HEAD (bb))
2391         set_insn = emit_insn_before (set, insn);
2392       else
2393 	set_insn = emit_insn_after (set,
2394 				    insn ? PREV_INSN (insn) : BB_END (bb));
2395       df_insn_rescan (set_insn);
2396       loop_optimizer_finalize ();
2397 
2398       if (!control_flow_insns.is_empty ())
2399 	{
2400 	  free_dominance_info (CDI_DOMINATORS);
2401 
2402 	  unsigned int i;
2403 	  FOR_EACH_VEC_ELT (control_flow_insns, i, insn)
2404 	    if (control_flow_insn_p (insn))
2405 	      {
2406 		/* Split the block after insn.  There will be a fallthru
2407 		   edge, which is OK so we keep it.  We have to create
2408 		   the exception edges ourselves.  */
2409 		bb = BLOCK_FOR_INSN (insn);
2410 		split_block (bb, insn);
2411 		rtl_make_eh_edge (NULL, bb, BB_END (bb));
2412 	      }
2413 	}
2414     }
2415 
2416   df_process_deferred_rescans ();
2417   df_clear_flags (DF_DEFER_INSN_RESCAN);
2418   bitmap_obstack_release (NULL);
2419   BITMAP_FREE (convert_bbs);
2420 
2421   timevar_pop (TV_MACH_DEP);
2422   return 0;
2423 }
2424 
2425 static bool
remove_partial_avx_dependency_gate()2426 remove_partial_avx_dependency_gate ()
2427 {
2428   return (TARGET_AVX
2429 	  && TARGET_SSE_PARTIAL_REG_DEPENDENCY
2430 	  && TARGET_SSE_MATH
2431 	  && optimize
2432 	  && optimize_function_for_speed_p (cfun));
2433 }
2434 
2435 namespace {
2436 
2437 const pass_data pass_data_remove_partial_avx_dependency =
2438 {
2439   RTL_PASS, /* type */
2440   "rpad", /* name */
2441   OPTGROUP_NONE, /* optinfo_flags */
2442   TV_MACH_DEP, /* tv_id */
2443   0, /* properties_required */
2444   0, /* properties_provided */
2445   0, /* properties_destroyed */
2446   0, /* todo_flags_start */
2447   0, /* todo_flags_finish */
2448 };
2449 
2450 class pass_remove_partial_avx_dependency : public rtl_opt_pass
2451 {
2452 public:
pass_remove_partial_avx_dependency(gcc::context * ctxt)2453   pass_remove_partial_avx_dependency (gcc::context *ctxt)
2454     : rtl_opt_pass (pass_data_remove_partial_avx_dependency, ctxt)
2455   {}
2456 
2457   /* opt_pass methods: */
gate(function *)2458   virtual bool gate (function *)
2459     {
2460       return remove_partial_avx_dependency_gate ();
2461     }
2462 
execute(function *)2463   virtual unsigned int execute (function *)
2464     {
2465       return remove_partial_avx_dependency ();
2466     }
2467 }; // class pass_rpad
2468 
2469 } // anon namespace
2470 
2471 rtl_opt_pass *
make_pass_remove_partial_avx_dependency(gcc::context * ctxt)2472 make_pass_remove_partial_avx_dependency (gcc::context *ctxt)
2473 {
2474   return new pass_remove_partial_avx_dependency (ctxt);
2475 }
2476 
2477 /* For const vector having one duplicated value, there's no need to put
2478    whole vector in the constant pool when target supports embedded broadcast. */
2479 static unsigned int
constant_pool_broadcast(void)2480 constant_pool_broadcast (void)
2481 {
2482   timevar_push (TV_MACH_DEP);
2483   rtx_insn *insn;
2484 
2485   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2486     {
2487       if (INSN_P (insn))
2488 	replace_constant_pool_with_broadcast (insn);
2489     }
2490   timevar_pop (TV_MACH_DEP);
2491   return 0;
2492 }
2493 
2494 namespace {
2495 
2496 const pass_data pass_data_constant_pool_broadcast =
2497 {
2498   RTL_PASS, /* type */
2499   "cpb", /* name */
2500   OPTGROUP_NONE, /* optinfo_flags */
2501   TV_MACH_DEP, /* tv_id */
2502   0, /* properties_required */
2503   0, /* properties_provided */
2504   0, /* properties_destroyed */
2505   0, /* todo_flags_start */
2506   TODO_df_finish, /* todo_flags_finish */
2507 };
2508 
2509 class pass_constant_pool_broadcast : public rtl_opt_pass
2510 {
2511 public:
pass_constant_pool_broadcast(gcc::context * ctxt)2512   pass_constant_pool_broadcast (gcc::context *ctxt)
2513     : rtl_opt_pass (pass_data_constant_pool_broadcast, ctxt)
2514   {}
2515 
2516   /* opt_pass methods: */
gate(function *)2517   virtual bool gate (function *)
2518     {
2519       /* Return false if rpad pass gate is true.
2520 	 replace_constant_pool_with_broadcast is called
2521 	 from both this pass and rpad pass.  */
2522       return (TARGET_AVX512F && !remove_partial_avx_dependency_gate ());
2523     }
2524 
execute(function *)2525   virtual unsigned int execute (function *)
2526     {
2527       return constant_pool_broadcast ();
2528     }
2529 }; // class pass_cpb
2530 
2531 } // anon namespace
2532 
2533 rtl_opt_pass *
make_pass_constant_pool_broadcast(gcc::context * ctxt)2534 make_pass_constant_pool_broadcast (gcc::context *ctxt)
2535 {
2536   return new pass_constant_pool_broadcast (ctxt);
2537 }
2538 
2539 /* This compares the priority of target features in function DECL1
2540    and DECL2.  It returns positive value if DECL1 is higher priority,
2541    negative value if DECL2 is higher priority and 0 if they are the
2542    same.  */
2543 
2544 int
ix86_compare_version_priority(tree decl1,tree decl2)2545 ix86_compare_version_priority (tree decl1, tree decl2)
2546 {
2547   unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
2548   unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
2549 
2550   return (int)priority1 - (int)priority2;
2551 }
2552 
2553 /* V1 and V2 point to function versions with different priorities
2554    based on the target ISA.  This function compares their priorities.  */
2555 
2556 static int
feature_compare(const void * v1,const void * v2)2557 feature_compare (const void *v1, const void *v2)
2558 {
2559   typedef struct _function_version_info
2560     {
2561       tree version_decl;
2562       tree predicate_chain;
2563       unsigned int dispatch_priority;
2564     } function_version_info;
2565 
2566   const function_version_info c1 = *(const function_version_info *)v1;
2567   const function_version_info c2 = *(const function_version_info *)v2;
2568   return (c2.dispatch_priority - c1.dispatch_priority);
2569 }
2570 
2571 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
2572    to return a pointer to VERSION_DECL if the outcome of the expression
2573    formed by PREDICATE_CHAIN is true.  This function will be called during
2574    version dispatch to decide which function version to execute.  It returns
2575    the basic block at the end, to which more conditions can be added.  */
2576 
2577 static basic_block
add_condition_to_bb(tree function_decl,tree version_decl,tree predicate_chain,basic_block new_bb)2578 add_condition_to_bb (tree function_decl, tree version_decl,
2579 		     tree predicate_chain, basic_block new_bb)
2580 {
2581   gimple *return_stmt;
2582   tree convert_expr, result_var;
2583   gimple *convert_stmt;
2584   gimple *call_cond_stmt;
2585   gimple *if_else_stmt;
2586 
2587   basic_block bb1, bb2, bb3;
2588   edge e12, e23;
2589 
2590   tree cond_var, and_expr_var = NULL_TREE;
2591   gimple_seq gseq;
2592 
2593   tree predicate_decl, predicate_arg;
2594 
2595   push_cfun (DECL_STRUCT_FUNCTION (function_decl));
2596 
2597   gcc_assert (new_bb != NULL);
2598   gseq = bb_seq (new_bb);
2599 
2600 
2601   convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
2602 	     		 build_fold_addr_expr (version_decl));
2603   result_var = create_tmp_var (ptr_type_node);
2604   convert_stmt = gimple_build_assign (result_var, convert_expr);
2605   return_stmt = gimple_build_return (result_var);
2606 
2607   if (predicate_chain == NULL_TREE)
2608     {
2609       gimple_seq_add_stmt (&gseq, convert_stmt);
2610       gimple_seq_add_stmt (&gseq, return_stmt);
2611       set_bb_seq (new_bb, gseq);
2612       gimple_set_bb (convert_stmt, new_bb);
2613       gimple_set_bb (return_stmt, new_bb);
2614       pop_cfun ();
2615       return new_bb;
2616     }
2617 
2618   while (predicate_chain != NULL)
2619     {
2620       cond_var = create_tmp_var (integer_type_node);
2621       predicate_decl = TREE_PURPOSE (predicate_chain);
2622       predicate_arg = TREE_VALUE (predicate_chain);
2623       call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
2624       gimple_call_set_lhs (call_cond_stmt, cond_var);
2625 
2626       gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
2627       gimple_set_bb (call_cond_stmt, new_bb);
2628       gimple_seq_add_stmt (&gseq, call_cond_stmt);
2629 
2630       predicate_chain = TREE_CHAIN (predicate_chain);
2631 
2632       if (and_expr_var == NULL)
2633         and_expr_var = cond_var;
2634       else
2635 	{
2636 	  gimple *assign_stmt;
2637 	  /* Use MIN_EXPR to check if any integer is zero?.
2638 	     and_expr_var = min_expr <cond_var, and_expr_var>  */
2639 	  assign_stmt = gimple_build_assign (and_expr_var,
2640 			  build2 (MIN_EXPR, integer_type_node,
2641 				  cond_var, and_expr_var));
2642 
2643 	  gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
2644 	  gimple_set_bb (assign_stmt, new_bb);
2645 	  gimple_seq_add_stmt (&gseq, assign_stmt);
2646 	}
2647     }
2648 
2649   if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
2650 	  		            integer_zero_node,
2651 				    NULL_TREE, NULL_TREE);
2652   gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
2653   gimple_set_bb (if_else_stmt, new_bb);
2654   gimple_seq_add_stmt (&gseq, if_else_stmt);
2655 
2656   gimple_seq_add_stmt (&gseq, convert_stmt);
2657   gimple_seq_add_stmt (&gseq, return_stmt);
2658   set_bb_seq (new_bb, gseq);
2659 
2660   bb1 = new_bb;
2661   e12 = split_block (bb1, if_else_stmt);
2662   bb2 = e12->dest;
2663   e12->flags &= ~EDGE_FALLTHRU;
2664   e12->flags |= EDGE_TRUE_VALUE;
2665 
2666   e23 = split_block (bb2, return_stmt);
2667 
2668   gimple_set_bb (convert_stmt, bb2);
2669   gimple_set_bb (return_stmt, bb2);
2670 
2671   bb3 = e23->dest;
2672   make_edge (bb1, bb3, EDGE_FALSE_VALUE);
2673 
2674   remove_edge (e23);
2675   make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
2676 
2677   pop_cfun ();
2678 
2679   return bb3;
2680 }
2681 
2682 /* This function generates the dispatch function for
2683    multi-versioned functions.  DISPATCH_DECL is the function which will
2684    contain the dispatch logic.  FNDECLS are the function choices for
2685    dispatch, and is a tree chain.  EMPTY_BB is the basic block pointer
2686    in DISPATCH_DECL in which the dispatch code is generated.  */
2687 
2688 static int
dispatch_function_versions(tree dispatch_decl,void * fndecls_p,basic_block * empty_bb)2689 dispatch_function_versions (tree dispatch_decl,
2690 			    void *fndecls_p,
2691 			    basic_block *empty_bb)
2692 {
2693   tree default_decl;
2694   gimple *ifunc_cpu_init_stmt;
2695   gimple_seq gseq;
2696   int ix;
2697   tree ele;
2698   vec<tree> *fndecls;
2699   unsigned int num_versions = 0;
2700   unsigned int actual_versions = 0;
2701   unsigned int i;
2702 
2703   struct _function_version_info
2704     {
2705       tree version_decl;
2706       tree predicate_chain;
2707       unsigned int dispatch_priority;
2708     }*function_version_info;
2709 
2710   gcc_assert (dispatch_decl != NULL
2711 	      && fndecls_p != NULL
2712 	      && empty_bb != NULL);
2713 
2714   /*fndecls_p is actually a vector.  */
2715   fndecls = static_cast<vec<tree> *> (fndecls_p);
2716 
2717   /* At least one more version other than the default.  */
2718   num_versions = fndecls->length ();
2719   gcc_assert (num_versions >= 2);
2720 
2721   function_version_info = (struct _function_version_info *)
2722     XNEWVEC (struct _function_version_info, (num_versions - 1));
2723 
2724   /* The first version in the vector is the default decl.  */
2725   default_decl = (*fndecls)[0];
2726 
2727   push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
2728 
2729   gseq = bb_seq (*empty_bb);
2730   /* Function version dispatch is via IFUNC.  IFUNC resolvers fire before
2731      constructors, so explicity call __builtin_cpu_init here.  */
2732   ifunc_cpu_init_stmt
2733     = gimple_build_call_vec (get_ix86_builtin (IX86_BUILTIN_CPU_INIT), vNULL);
2734   gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
2735   gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
2736   set_bb_seq (*empty_bb, gseq);
2737 
2738   pop_cfun ();
2739 
2740 
2741   for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
2742     {
2743       tree version_decl = ele;
2744       tree predicate_chain = NULL_TREE;
2745       unsigned int priority;
2746       /* Get attribute string, parse it and find the right predicate decl.
2747          The predicate function could be a lengthy combination of many
2748 	 features, like arch-type and various isa-variants.  */
2749       priority = get_builtin_code_for_version (version_decl,
2750 	 			               &predicate_chain);
2751 
2752       if (predicate_chain == NULL_TREE)
2753 	continue;
2754 
2755       function_version_info [actual_versions].version_decl = version_decl;
2756       function_version_info [actual_versions].predicate_chain
2757 	 = predicate_chain;
2758       function_version_info [actual_versions].dispatch_priority = priority;
2759       actual_versions++;
2760     }
2761 
2762   /* Sort the versions according to descending order of dispatch priority.  The
2763      priority is based on the ISA.  This is not a perfect solution.  There
2764      could still be ambiguity.  If more than one function version is suitable
2765      to execute,  which one should be dispatched?  In future, allow the user
2766      to specify a dispatch  priority next to the version.  */
2767   qsort (function_version_info, actual_versions,
2768          sizeof (struct _function_version_info), feature_compare);
2769 
2770   for  (i = 0; i < actual_versions; ++i)
2771     *empty_bb = add_condition_to_bb (dispatch_decl,
2772 				     function_version_info[i].version_decl,
2773 				     function_version_info[i].predicate_chain,
2774 				     *empty_bb);
2775 
2776   /* dispatch default version at the end.  */
2777   *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
2778 				   NULL, *empty_bb);
2779 
2780   free (function_version_info);
2781   return 0;
2782 }
2783 
2784 /* This function changes the assembler name for functions that are
2785    versions.  If DECL is a function version and has a "target"
2786    attribute, it appends the attribute string to its assembler name.  */
2787 
2788 static tree
ix86_mangle_function_version_assembler_name(tree decl,tree id)2789 ix86_mangle_function_version_assembler_name (tree decl, tree id)
2790 {
2791   tree version_attr;
2792   const char *orig_name, *version_string;
2793   char *attr_str, *assembler_name;
2794 
2795   if (DECL_DECLARED_INLINE_P (decl)
2796       && lookup_attribute ("gnu_inline",
2797 			   DECL_ATTRIBUTES (decl)))
2798     error_at (DECL_SOURCE_LOCATION (decl),
2799 	      "function versions cannot be marked as %<gnu_inline%>,"
2800 	      " bodies have to be generated");
2801 
2802   if (DECL_VIRTUAL_P (decl)
2803       || DECL_VINDEX (decl))
2804     sorry ("virtual function multiversioning not supported");
2805 
2806   version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
2807 
2808   /* target attribute string cannot be NULL.  */
2809   gcc_assert (version_attr != NULL_TREE);
2810 
2811   orig_name = IDENTIFIER_POINTER (id);
2812   version_string
2813     = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
2814 
2815   if (strcmp (version_string, "default") == 0)
2816     return id;
2817 
2818   attr_str = sorted_attr_string (TREE_VALUE (version_attr));
2819   assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
2820 
2821   sprintf (assembler_name, "%s.%s", orig_name, attr_str);
2822 
2823   /* Allow assembler name to be modified if already set.  */
2824   if (DECL_ASSEMBLER_NAME_SET_P (decl))
2825     SET_DECL_RTL (decl, NULL);
2826 
2827   tree ret = get_identifier (assembler_name);
2828   XDELETEVEC (attr_str);
2829   XDELETEVEC (assembler_name);
2830   return ret;
2831 }
2832 
2833 tree
ix86_mangle_decl_assembler_name(tree decl,tree id)2834 ix86_mangle_decl_assembler_name (tree decl, tree id)
2835 {
2836   /* For function version, add the target suffix to the assembler name.  */
2837   if (TREE_CODE (decl) == FUNCTION_DECL
2838       && DECL_FUNCTION_VERSIONED (decl))
2839     id = ix86_mangle_function_version_assembler_name (decl, id);
2840 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
2841   id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
2842 #endif
2843 
2844   return id;
2845 }
2846 
2847 /* Make a dispatcher declaration for the multi-versioned function DECL.
2848    Calls to DECL function will be replaced with calls to the dispatcher
2849    by the front-end.  Returns the decl of the dispatcher function.  */
2850 
2851 tree
ix86_get_function_versions_dispatcher(void * decl)2852 ix86_get_function_versions_dispatcher (void *decl)
2853 {
2854   tree fn = (tree) decl;
2855   struct cgraph_node *node = NULL;
2856   struct cgraph_node *default_node = NULL;
2857   struct cgraph_function_version_info *node_v = NULL;
2858   struct cgraph_function_version_info *first_v = NULL;
2859 
2860   tree dispatch_decl = NULL;
2861 
2862   struct cgraph_function_version_info *default_version_info = NULL;
2863 
2864   gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
2865 
2866   node = cgraph_node::get (fn);
2867   gcc_assert (node != NULL);
2868 
2869   node_v = node->function_version ();
2870   gcc_assert (node_v != NULL);
2871 
2872   if (node_v->dispatcher_resolver != NULL)
2873     return node_v->dispatcher_resolver;
2874 
2875   /* Find the default version and make it the first node.  */
2876   first_v = node_v;
2877   /* Go to the beginning of the chain.  */
2878   while (first_v->prev != NULL)
2879     first_v = first_v->prev;
2880   default_version_info = first_v;
2881   while (default_version_info != NULL)
2882     {
2883       if (is_function_default_version
2884 	    (default_version_info->this_node->decl))
2885         break;
2886       default_version_info = default_version_info->next;
2887     }
2888 
2889   /* If there is no default node, just return NULL.  */
2890   if (default_version_info == NULL)
2891     return NULL;
2892 
2893   /* Make default info the first node.  */
2894   if (first_v != default_version_info)
2895     {
2896       default_version_info->prev->next = default_version_info->next;
2897       if (default_version_info->next)
2898         default_version_info->next->prev = default_version_info->prev;
2899       first_v->prev = default_version_info;
2900       default_version_info->next = first_v;
2901       default_version_info->prev = NULL;
2902     }
2903 
2904   default_node = default_version_info->this_node;
2905 
2906 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
2907   if (targetm.has_ifunc_p ())
2908     {
2909       struct cgraph_function_version_info *it_v = NULL;
2910       struct cgraph_node *dispatcher_node = NULL;
2911       struct cgraph_function_version_info *dispatcher_version_info = NULL;
2912 
2913       /* Right now, the dispatching is done via ifunc.  */
2914       dispatch_decl = make_dispatcher_decl (default_node->decl);
2915 
2916       dispatcher_node = cgraph_node::get_create (dispatch_decl);
2917       gcc_assert (dispatcher_node != NULL);
2918       dispatcher_node->dispatcher_function = 1;
2919       dispatcher_version_info
2920 	= dispatcher_node->insert_new_function_version ();
2921       dispatcher_version_info->next = default_version_info;
2922       dispatcher_node->definition = 1;
2923 
2924       /* Set the dispatcher for all the versions.  */
2925       it_v = default_version_info;
2926       while (it_v != NULL)
2927 	{
2928 	  it_v->dispatcher_resolver = dispatch_decl;
2929 	  it_v = it_v->next;
2930 	}
2931     }
2932   else
2933 #endif
2934     {
2935       error_at (DECL_SOURCE_LOCATION (default_node->decl),
2936 		"multiversioning needs %<ifunc%> which is not supported "
2937 		"on this target");
2938     }
2939 
2940   return dispatch_decl;
2941 }
2942 
2943 /* Make the resolver function decl to dispatch the versions of
2944    a multi-versioned function,  DEFAULT_DECL.  IFUNC_ALIAS_DECL is
2945    ifunc alias that will point to the created resolver.  Create an
2946    empty basic block in the resolver and store the pointer in
2947    EMPTY_BB.  Return the decl of the resolver function.  */
2948 
2949 static tree
make_resolver_func(const tree default_decl,const tree ifunc_alias_decl,basic_block * empty_bb)2950 make_resolver_func (const tree default_decl,
2951 		    const tree ifunc_alias_decl,
2952 		    basic_block *empty_bb)
2953 {
2954   tree decl, type, t;
2955 
2956   /* Create resolver function name based on default_decl.  */
2957   tree decl_name = clone_function_name (default_decl, "resolver");
2958   const char *resolver_name = IDENTIFIER_POINTER (decl_name);
2959 
2960   /* The resolver function should return a (void *). */
2961   type = build_function_type_list (ptr_type_node, NULL_TREE);
2962 
2963   decl = build_fn_decl (resolver_name, type);
2964   SET_DECL_ASSEMBLER_NAME (decl, decl_name);
2965 
2966   DECL_NAME (decl) = decl_name;
2967   TREE_USED (decl) = 1;
2968   DECL_ARTIFICIAL (decl) = 1;
2969   DECL_IGNORED_P (decl) = 1;
2970   TREE_PUBLIC (decl) = 0;
2971   DECL_UNINLINABLE (decl) = 1;
2972 
2973   /* Resolver is not external, body is generated.  */
2974   DECL_EXTERNAL (decl) = 0;
2975   DECL_EXTERNAL (ifunc_alias_decl) = 0;
2976 
2977   DECL_CONTEXT (decl) = NULL_TREE;
2978   DECL_INITIAL (decl) = make_node (BLOCK);
2979   DECL_STATIC_CONSTRUCTOR (decl) = 0;
2980 
2981   if (DECL_COMDAT_GROUP (default_decl)
2982       || TREE_PUBLIC (default_decl))
2983     {
2984       /* In this case, each translation unit with a call to this
2985 	 versioned function will put out a resolver.  Ensure it
2986 	 is comdat to keep just one copy.  */
2987       DECL_COMDAT (decl) = 1;
2988       make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
2989     }
2990   else
2991     TREE_PUBLIC (ifunc_alias_decl) = 0;
2992 
2993   /* Build result decl and add to function_decl. */
2994   t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
2995   DECL_CONTEXT (t) = decl;
2996   DECL_ARTIFICIAL (t) = 1;
2997   DECL_IGNORED_P (t) = 1;
2998   DECL_RESULT (decl) = t;
2999 
3000   gimplify_function_tree (decl);
3001   push_cfun (DECL_STRUCT_FUNCTION (decl));
3002   *empty_bb = init_lowered_empty_function (decl, false,
3003 					   profile_count::uninitialized ());
3004 
3005   cgraph_node::add_new_function (decl, true);
3006   symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
3007 
3008   pop_cfun ();
3009 
3010   gcc_assert (ifunc_alias_decl != NULL);
3011   /* Mark ifunc_alias_decl as "ifunc" with resolver as resolver_name.  */
3012   DECL_ATTRIBUTES (ifunc_alias_decl)
3013     = make_attribute ("ifunc", resolver_name,
3014 		      DECL_ATTRIBUTES (ifunc_alias_decl));
3015 
3016   /* Create the alias for dispatch to resolver here.  */
3017   cgraph_node::create_same_body_alias (ifunc_alias_decl, decl);
3018   return decl;
3019 }
3020 
3021 /* Generate the dispatching code body to dispatch multi-versioned function
3022    DECL.  The target hook is called to process the "target" attributes and
3023    provide the code to dispatch the right function at run-time.  NODE points
3024    to the dispatcher decl whose body will be created.  */
3025 
3026 tree
ix86_generate_version_dispatcher_body(void * node_p)3027 ix86_generate_version_dispatcher_body (void *node_p)
3028 {
3029   tree resolver_decl;
3030   basic_block empty_bb;
3031   tree default_ver_decl;
3032   struct cgraph_node *versn;
3033   struct cgraph_node *node;
3034 
3035   struct cgraph_function_version_info *node_version_info = NULL;
3036   struct cgraph_function_version_info *versn_info = NULL;
3037 
3038   node = (cgraph_node *)node_p;
3039 
3040   node_version_info = node->function_version ();
3041   gcc_assert (node->dispatcher_function
3042 	      && node_version_info != NULL);
3043 
3044   if (node_version_info->dispatcher_resolver)
3045     return node_version_info->dispatcher_resolver;
3046 
3047   /* The first version in the chain corresponds to the default version.  */
3048   default_ver_decl = node_version_info->next->this_node->decl;
3049 
3050   /* node is going to be an alias, so remove the finalized bit.  */
3051   node->definition = false;
3052 
3053   resolver_decl = make_resolver_func (default_ver_decl,
3054 				      node->decl, &empty_bb);
3055 
3056   node_version_info->dispatcher_resolver = resolver_decl;
3057 
3058   push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
3059 
3060   auto_vec<tree, 2> fn_ver_vec;
3061 
3062   for (versn_info = node_version_info->next; versn_info;
3063        versn_info = versn_info->next)
3064     {
3065       versn = versn_info->this_node;
3066       /* Check for virtual functions here again, as by this time it should
3067 	 have been determined if this function needs a vtable index or
3068 	 not.  This happens for methods in derived classes that override
3069 	 virtual methods in base classes but are not explicitly marked as
3070 	 virtual.  */
3071       if (DECL_VINDEX (versn->decl))
3072 	sorry ("virtual function multiversioning not supported");
3073 
3074       fn_ver_vec.safe_push (versn->decl);
3075     }
3076 
3077   dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
3078   cgraph_edge::rebuild_edges ();
3079   pop_cfun ();
3080   return resolver_decl;
3081 }
3082 
3083 
3084