1 /* LTO partitioning logic routines.
2    Copyright (C) 2009-2021 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "target.h"
24 #include "function.h"
25 #include "basic-block.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "alloc-pool.h"
29 #include "stringpool.h"
30 #include "cgraph.h"
31 #include "lto-streamer.h"
32 #include "symbol-summary.h"
33 #include "tree-vrp.h"
34 #include "ipa-prop.h"
35 #include "ipa-fnsummary.h"
36 #include "lto-partition.h"
37 #include "sreal.h"
38 
39 vec<ltrans_partition> ltrans_partitions;
40 
41 static void add_symbol_to_partition (ltrans_partition part, symtab_node *node);
42 
43 
44 /* Helper for qsort; compare partitions and return one with smaller order.  */
45 
46 static int
cmp_partitions_order(const void * a,const void * b)47 cmp_partitions_order (const void *a, const void *b)
48 {
49   const struct ltrans_partition_def *pa
50      = *(struct ltrans_partition_def *const *)a;
51   const struct ltrans_partition_def *pb
52      = *(struct ltrans_partition_def *const *)b;
53   int ordera = -1, orderb = -1;
54 
55   if (lto_symtab_encoder_size (pa->encoder))
56     ordera = lto_symtab_encoder_deref (pa->encoder, 0)->order;
57   if (lto_symtab_encoder_size (pb->encoder))
58     orderb = lto_symtab_encoder_deref (pb->encoder, 0)->order;
59   return orderb - ordera;
60 }
61 
62 /* Create new partition with name NAME.  */
63 
64 static ltrans_partition
new_partition(const char * name)65 new_partition (const char *name)
66 {
67   ltrans_partition part = XCNEW (struct ltrans_partition_def);
68   part->encoder = lto_symtab_encoder_new (false);
69   part->name = name;
70   part->insns = 0;
71   part->symbols = 0;
72   ltrans_partitions.safe_push (part);
73   return part;
74 }
75 
76 /* Free memory used by ltrans datastructures.  */
77 
78 void
free_ltrans_partitions(void)79 free_ltrans_partitions (void)
80 {
81   unsigned int idx;
82   ltrans_partition part;
83   for (idx = 0; ltrans_partitions.iterate (idx, &part); idx++)
84     {
85       if (part->initializers_visited)
86 	delete part->initializers_visited;
87       /* Symtab encoder is freed after streaming.  */
88       free (part);
89     }
90   ltrans_partitions.release ();
91 }
92 
93 /* Return true if symbol is already in some partition.  */
94 
95 static inline bool
symbol_partitioned_p(symtab_node * node)96 symbol_partitioned_p (symtab_node *node)
97 {
98   return node->aux;
99 }
100 
101 /* Add references into the partition.  */
102 static void
add_references_to_partition(ltrans_partition part,symtab_node * node)103 add_references_to_partition (ltrans_partition part, symtab_node *node)
104 {
105   int i;
106   struct ipa_ref *ref = NULL;
107 
108   /* Add all duplicated references to the partition.  */
109   for (i = 0; node->iterate_reference (i, ref); i++)
110     if (ref->referred->get_partitioning_class () == SYMBOL_DUPLICATE)
111       add_symbol_to_partition (part, ref->referred);
112     /* References to a readonly variable may be constant foled into its value.
113        Recursively look into the initializers of the constant variable and add
114        references, too.  */
115     else if (is_a <varpool_node *> (ref->referred)
116 	     && (dyn_cast <varpool_node *> (ref->referred)
117 		 ->ctor_useable_for_folding_p ())
118 	     && !lto_symtab_encoder_in_partition_p (part->encoder, ref->referred))
119       {
120 	if (!part->initializers_visited)
121 	  part->initializers_visited = new hash_set<symtab_node *>;
122 	if (!part->initializers_visited->add (ref->referred))
123 	  add_references_to_partition (part, ref->referred);
124       }
125 }
126 
127 /* Helper function for add_symbol_to_partition doing the actual dirty work
128    of adding NODE to PART.  */
129 
130 static bool
add_symbol_to_partition_1(ltrans_partition part,symtab_node * node)131 add_symbol_to_partition_1 (ltrans_partition part, symtab_node *node)
132 {
133   enum symbol_partitioning_class c = node->get_partitioning_class ();
134   struct ipa_ref *ref;
135   symtab_node *node1;
136 
137   /* If NODE is already there, we have nothing to do.  */
138   if (lto_symtab_encoder_in_partition_p (part->encoder, node))
139     return true;
140 
141   /* non-duplicated aliases or tunks of a duplicated symbol needs to be output
142      just once.
143 
144      Be lax about comdats; they may or may not be duplicated and we may
145      end up in need to duplicate keyed comdat because it has unkeyed alias.  */
146   if (c == SYMBOL_PARTITION && !DECL_COMDAT (node->decl)
147       && symbol_partitioned_p (node))
148     return false;
149 
150   /* Be sure that we never try to duplicate partitioned symbol
151      or add external symbol.  */
152   gcc_assert (c != SYMBOL_EXTERNAL
153 	      && (c == SYMBOL_DUPLICATE || !symbol_partitioned_p (node)));
154 
155   part->symbols++;
156 
157   lto_set_symtab_encoder_in_partition (part->encoder, node);
158 
159   if (symbol_partitioned_p (node))
160     {
161       node->in_other_partition = 1;
162       if (dump_file)
163 	fprintf (dump_file,
164 		 "Symbol node %s now used in multiple partitions\n",
165 		 node->dump_name ());
166     }
167   node->aux = (void *)((size_t)node->aux + 1);
168 
169   if (cgraph_node *cnode = dyn_cast <cgraph_node *> (node))
170     {
171       struct cgraph_edge *e;
172       if (!node->alias && c == SYMBOL_PARTITION)
173 	part->insns += ipa_size_summaries->get (cnode)->size;
174 
175       /* Add all inline clones and callees that are duplicated.  */
176       for (e = cnode->callees; e; e = e->next_callee)
177 	if (!e->inline_failed)
178 	  add_symbol_to_partition_1 (part, e->callee);
179 	else if (e->callee->get_partitioning_class () == SYMBOL_DUPLICATE)
180 	  add_symbol_to_partition (part, e->callee);
181 
182       /* Add all thunks associated with the function.  */
183       for (e = cnode->callers; e; e = e->next_caller)
184 	if (e->caller->thunk && !e->caller->inlined_to)
185 	  add_symbol_to_partition_1 (part, e->caller);
186     }
187 
188   add_references_to_partition (part, node);
189 
190   /* Add all aliases associated with the symbol.  */
191 
192   FOR_EACH_ALIAS (node, ref)
193     if (!ref->referring->transparent_alias)
194       add_symbol_to_partition_1 (part, ref->referring);
195     else
196       {
197 	struct ipa_ref *ref2;
198 	/* We do not need to add transparent aliases if they are not used.
199 	   However we must add aliases of transparent aliases if they exist.  */
200 	FOR_EACH_ALIAS (ref->referring, ref2)
201 	  {
202 	    /* Nested transparent aliases are not permitted.  */
203 	    gcc_checking_assert (!ref2->referring->transparent_alias);
204 	    add_symbol_to_partition_1 (part, ref2->referring);
205 	  }
206       }
207 
208   /* Ensure that SAME_COMDAT_GROUP lists all allways added in a group.  */
209   if (node->same_comdat_group)
210     for (node1 = node->same_comdat_group;
211 	 node1 != node; node1 = node1->same_comdat_group)
212       if (!node->alias)
213 	{
214 	  bool added = add_symbol_to_partition_1 (part, node1);
215 	  gcc_assert (added);
216 	}
217   return true;
218 }
219 
220 /* If symbol NODE is really part of other symbol's definition (i.e. it is
221    internal label, thunk, alias or so), return the outer symbol.
222    When add_symbol_to_partition_1 is called on the outer symbol it must
223    eventually add NODE, too.  */
224 static symtab_node *
contained_in_symbol(symtab_node * node)225 contained_in_symbol (symtab_node *node)
226 {
227   /* There is no need to consider transparent aliases to be part of the
228      definition: they are only useful insite the partition they are output
229      and thus we will always see an explicit reference to it.  */
230   if (node->transparent_alias)
231     return node;
232   if (cgraph_node *cnode = dyn_cast <cgraph_node *> (node))
233     {
234       cnode = cnode->function_symbol ();
235       if (cnode->inlined_to)
236 	cnode = cnode->inlined_to;
237       return cnode;
238     }
239   else if (varpool_node *vnode = dyn_cast <varpool_node *> (node))
240     return vnode->ultimate_alias_target ();
241   return node;
242 }
243 
244 /* Add symbol NODE to partition.  When definition of NODE is part
245    of other symbol definition, add the other symbol, too.  */
246 
247 static void
add_symbol_to_partition(ltrans_partition part,symtab_node * node)248 add_symbol_to_partition (ltrans_partition part, symtab_node *node)
249 {
250   symtab_node *node1;
251 
252   /* Verify that we do not try to duplicate something that cannot be.  */
253   gcc_checking_assert (node->get_partitioning_class () == SYMBOL_DUPLICATE
254 		       || !symbol_partitioned_p (node));
255 
256   while ((node1 = contained_in_symbol (node)) != node)
257     node = node1;
258 
259   /* If we have duplicated symbol contained in something we cannot duplicate,
260      we are very badly screwed.  The other way is possible, so we do not
261      assert this in add_symbol_to_partition_1.
262 
263      Be lax about comdats; they may or may not be duplicated and we may
264      end up in need to duplicate keyed comdat because it has unkeyed alias.  */
265 
266   gcc_assert (node->get_partitioning_class () == SYMBOL_DUPLICATE
267 	      || DECL_COMDAT (node->decl)
268 	      || !symbol_partitioned_p (node));
269 
270   add_symbol_to_partition_1 (part, node);
271 }
272 
273 /* Undo all additions until number of cgraph nodes in PARITION is N_CGRAPH_NODES
274    and number of varpool nodes is N_VARPOOL_NODES.  */
275 
276 static void
undo_partition(ltrans_partition partition,unsigned int n_nodes)277 undo_partition (ltrans_partition partition, unsigned int n_nodes)
278 {
279   while (lto_symtab_encoder_size (partition->encoder) > (int)n_nodes)
280     {
281       symtab_node *node = lto_symtab_encoder_deref (partition->encoder,
282 						   n_nodes);
283       partition->symbols--;
284       cgraph_node *cnode;
285 
286       /* After UNDO we no longer know what was visited.  */
287       if (partition->initializers_visited)
288 	delete partition->initializers_visited;
289       partition->initializers_visited = NULL;
290 
291       if (!node->alias && (cnode = dyn_cast <cgraph_node *> (node))
292           && node->get_partitioning_class () == SYMBOL_PARTITION)
293 	partition->insns -= ipa_size_summaries->get (cnode)->size;
294       lto_symtab_encoder_delete_node (partition->encoder, node);
295       node->aux = (void *)((size_t)node->aux - 1);
296     }
297 }
298 
299 /* Group cgrah nodes by input files.  This is used mainly for testing
300    right now.  */
301 
302 void
lto_1_to_1_map(void)303 lto_1_to_1_map (void)
304 {
305   symtab_node *node;
306   struct lto_file_decl_data *file_data;
307   hash_map<lto_file_decl_data *, ltrans_partition> pmap;
308   ltrans_partition partition;
309   int npartitions = 0;
310 
311   FOR_EACH_SYMBOL (node)
312     {
313       if (node->get_partitioning_class () != SYMBOL_PARTITION
314 	  || symbol_partitioned_p (node))
315 	continue;
316 
317       file_data = node->lto_file_data;
318 
319       if (file_data)
320 	{
321           ltrans_partition *slot = &pmap.get_or_insert (file_data);
322           if (*slot)
323 	    partition = *slot;
324 	  else
325 	    {
326 	      partition = new_partition (file_data->file_name);
327 	      *slot = partition;
328 	      npartitions++;
329 	    }
330 	}
331       else if (!file_data && ltrans_partitions.length ())
332 	partition = ltrans_partitions[0];
333       else
334 	{
335 	  partition = new_partition ("");
336 	  pmap.put (NULL, partition);
337 	  npartitions++;
338 	}
339 
340       add_symbol_to_partition (partition, node);
341     }
342 
343   /* If the cgraph is empty, create one cgraph node set so that there is still
344      an output file for any variables that need to be exported in a DSO.  */
345   if (!npartitions)
346     new_partition ("empty");
347 
348   /* Order partitions by order of symbols because they are linked into binary
349      that way.  */
350   ltrans_partitions.qsort (cmp_partitions_order);
351 }
352 
353 /* Maximal partitioning.  Put every new symbol into new partition if possible.  */
354 
355 void
lto_max_map(void)356 lto_max_map (void)
357 {
358   symtab_node *node;
359   ltrans_partition partition;
360   int npartitions = 0;
361 
362   FOR_EACH_SYMBOL (node)
363     {
364       if (node->get_partitioning_class () != SYMBOL_PARTITION
365 	  || symbol_partitioned_p (node))
366 	continue;
367       partition = new_partition (node->asm_name ());
368       add_symbol_to_partition (partition, node);
369       npartitions++;
370     }
371   if (!npartitions)
372     new_partition ("empty");
373 }
374 
375 /* Helper function for qsort; sort nodes by order.  */
376 static int
node_cmp(const void * pa,const void * pb)377 node_cmp (const void *pa, const void *pb)
378 {
379   const symtab_node *a = *static_cast<const symtab_node * const *> (pa);
380   const symtab_node *b = *static_cast<const symtab_node * const *> (pb);
381   return b->order - a->order;
382 }
383 
384 /* Add all symtab nodes from NEXT_NODE to PARTITION in order.  */
385 
386 static void
add_sorted_nodes(vec<symtab_node * > & next_nodes,ltrans_partition partition)387 add_sorted_nodes (vec<symtab_node *> &next_nodes, ltrans_partition partition)
388 {
389   unsigned i;
390   symtab_node *node;
391 
392   next_nodes.qsort (node_cmp);
393   FOR_EACH_VEC_ELT (next_nodes, i, node)
394     if (!symbol_partitioned_p (node))
395       add_symbol_to_partition (partition, node);
396 }
397 
398 /* Return true if we should account reference from N1 to N2 in cost
399    of partition boundary.  */
400 
401 bool
account_reference_p(symtab_node * n1,symtab_node * n2)402 account_reference_p (symtab_node *n1, symtab_node *n2)
403 {
404   if (cgraph_node *cnode = dyn_cast <cgraph_node *> (n1))
405     n1 = cnode;
406   /* Do not account references from aliases - they are never split across
407      partitions.  */
408   if (n1->alias)
409     return false;
410   /* Do not account recursion - the code below will handle it incorrectly
411      otherwise.  Do not account references to external symbols: they will
412      never become local.  Finally do not account references to duplicated
413      symbols: they will be always local.  */
414   if (n1 == n2
415       || !n2->definition
416       || n2->get_partitioning_class () != SYMBOL_PARTITION)
417     return false;
418   /* If referring node is external symbol do not account it to boundary
419      cost. Those are added into units only to enable possible constant
420      folding and devirtulization.
421 
422      Here we do not know if it will ever be added to some partition
423      (this is decided by compute_ltrans_boundary) and second it is not
424      that likely that constant folding will actually use the reference.  */
425   if (contained_in_symbol (n1)
426 	->get_partitioning_class () == SYMBOL_EXTERNAL)
427     return false;
428   return true;
429 }
430 
431 
432 /* Group cgraph nodes into equally-sized partitions.
433 
434    The partitioning algorithm is simple: nodes are taken in predefined order.
435    The order corresponds to the order we want functions to have in the final
436    output.  In the future this will be given by function reordering pass, but
437    at the moment we use the topological order, which is a good approximation.
438 
439    The goal is to partition this linear order into intervals (partitions) so
440    that all the partitions have approximately the same size and the number of
441    callgraph or IPA reference edges crossing boundaries is minimal.
442 
443    This is a lot faster (O(n) in size of callgraph) than algorithms doing
444    priority-based graph clustering that are generally O(n^2) and, since
445    WHOPR is designed to make things go well across partitions, it leads
446    to good results.
447 
448    We compute the expected size of a partition as:
449 
450      max (total_size / lto_partitions, min_partition_size)
451 
452    We use dynamic expected size of partition so small programs are partitioned
453    into enough partitions to allow use of multiple CPUs, while large programs
454    are not partitioned too much.  Creating too many partitions significantly
455    increases the streaming overhead.
456 
457    In the future, we would like to bound the maximal size of partitions so as
458    to prevent the LTRANS stage from consuming too much memory.  At the moment,
459    however, the WPA stage is the most memory intensive for large benchmarks,
460    since too many types and declarations are read into memory.
461 
462    The function implements a simple greedy algorithm.  Nodes are being added
463    to the current partition until after 3/4 of the expected partition size is
464    reached.  Past this threshold, we keep track of boundary size (number of
465    edges going to other partitions) and continue adding functions until after
466    the current partition has grown to twice the expected partition size.  Then
467    the process is undone to the point where the minimal ratio of boundary size
468    and in-partition calls was reached.  */
469 
470 void
lto_balanced_map(int n_lto_partitions,int max_partition_size)471 lto_balanced_map (int n_lto_partitions, int max_partition_size)
472 {
473   int n_varpool_nodes = 0, varpool_pos = 0, best_varpool_pos = 0;
474   int best_noreorder_pos = 0;
475   auto_vec <cgraph_node *> order (symtab->cgraph_count);
476   auto_vec<cgraph_node *> noreorder;
477   auto_vec<varpool_node *> varpool_order;
478   struct cgraph_node *node;
479   int64_t original_total_size, total_size = 0;
480   int64_t partition_size;
481   ltrans_partition partition;
482   int last_visited_node = 0;
483   varpool_node *vnode;
484   int64_t cost = 0, internal = 0;
485   unsigned int best_n_nodes = 0, best_i = 0;
486   int64_t best_cost = -1, best_internal = 0, best_size = 0;
487   int npartitions;
488   int current_order = -1;
489   int noreorder_pos = 0;
490 
491   FOR_EACH_VARIABLE (vnode)
492     gcc_assert (!vnode->aux);
493 
494   FOR_EACH_DEFINED_FUNCTION (node)
495     if (node->get_partitioning_class () == SYMBOL_PARTITION)
496       {
497 	if (node->no_reorder)
498 	  noreorder.safe_push (node);
499 	else
500 	  order.safe_push (node);
501 	if (!node->alias)
502 	  total_size += ipa_size_summaries->get (node)->size;
503       }
504 
505   original_total_size = total_size;
506 
507   /* Streaming works best when the source units do not cross partition
508      boundaries much.  This is because importing function from a source
509      unit tends to import a lot of global trees defined there.  We should
510      get better about minimizing the function bounday, but until that
511      things works smoother if we order in source order.  */
512   order.qsort (tp_first_run_node_cmp);
513   noreorder.qsort (node_cmp);
514 
515   if (dump_file)
516     {
517       for (unsigned i = 0; i < order.length (); i++)
518 	fprintf (dump_file, "Balanced map symbol order:%s:%u\n",
519 		 order[i]->dump_name (), order[i]->tp_first_run);
520       for (unsigned i = 0; i < noreorder.length (); i++)
521 	fprintf (dump_file, "Balanced map symbol no_reorder:%s:%u\n",
522 		 noreorder[i]->dump_name (), noreorder[i]->tp_first_run);
523     }
524 
525   /* Collect all variables that should not be reordered.  */
526   FOR_EACH_VARIABLE (vnode)
527     if (vnode->get_partitioning_class () == SYMBOL_PARTITION
528 	&& vnode->no_reorder)
529       varpool_order.safe_push (vnode);
530   n_varpool_nodes = varpool_order.length ();
531   varpool_order.qsort (node_cmp);
532 
533   /* Compute partition size and create the first partition.  */
534   if (param_min_partition_size > max_partition_size)
535     fatal_error (input_location, "min partition size cannot be greater "
536 		 "than max partition size");
537 
538   partition_size = total_size / n_lto_partitions;
539   if (partition_size < param_min_partition_size)
540     partition_size = param_min_partition_size;
541   npartitions = 1;
542   partition = new_partition ("");
543   if (dump_file)
544     fprintf (dump_file, "Total unit size: %" PRId64 ", partition size: %" PRId64 "\n",
545 	     total_size, partition_size);
546 
547   auto_vec<symtab_node *> next_nodes;
548 
549   for (unsigned i = 0; i < order.length (); i++)
550     {
551       if (symbol_partitioned_p (order[i]))
552 	continue;
553 
554       current_order = order[i]->order;
555 
556       /* Output noreorder and varpool in program order first.  */
557       next_nodes.truncate (0);
558       while (varpool_pos < n_varpool_nodes
559 	     && varpool_order[varpool_pos]->order < current_order)
560 	next_nodes.safe_push (varpool_order[varpool_pos++]);
561       while (noreorder_pos < (int)noreorder.length ()
562 	     && noreorder[noreorder_pos]->order < current_order)
563 	next_nodes.safe_push (noreorder[noreorder_pos++]);
564       add_sorted_nodes (next_nodes, partition);
565 
566       if (!symbol_partitioned_p (order[i]))
567         add_symbol_to_partition (partition, order[i]);
568 
569 
570       /* Once we added a new node to the partition, we also want to add
571          all referenced variables unless they was already added into some
572          earlier partition.
573 	 add_symbol_to_partition adds possibly multiple nodes and
574 	 variables that are needed to satisfy needs of ORDER[i].
575          We remember last visited cgraph and varpool node from last iteration
576          of outer loop that allows us to process every new addition.
577 
578 	 At the same time we compute size of the boundary into COST.  Every
579          callgraph or IPA reference edge leaving the partition contributes into
580          COST.  Every edge inside partition was earlier computed as one leaving
581 	 it and thus we need to subtract it from COST.  */
582       while (last_visited_node < lto_symtab_encoder_size (partition->encoder))
583 	{
584 	  int j;
585 	  struct ipa_ref *ref = NULL;
586 	  symtab_node *snode = lto_symtab_encoder_deref (partition->encoder,
587 							last_visited_node);
588 
589 	  if (cgraph_node *node = dyn_cast <cgraph_node *> (snode))
590 	    {
591 	      struct cgraph_edge *edge;
592 
593 
594 	      last_visited_node++;
595 
596 	      gcc_assert (node->definition || node->weakref
597 			  || node->declare_variant_alt);
598 
599 	      /* Compute boundary cost of callgraph edges.  */
600 	      for (edge = node->callees; edge; edge = edge->next_callee)
601 		/* Inline edges will always end up local.  */
602 		if (edge->inline_failed
603 		    && account_reference_p (node, edge->callee))
604 		  {
605 		    int edge_cost = edge->frequency ();
606 		    int index;
607 
608 		    if (!edge_cost)
609 		      edge_cost = 1;
610 		    gcc_assert (edge_cost > 0);
611 		    index = lto_symtab_encoder_lookup (partition->encoder,
612 						       edge->callee);
613 		    if (index != LCC_NOT_FOUND
614 		        && index < last_visited_node - 1)
615 		      cost -= edge_cost, internal += edge_cost;
616 		    else
617 		      cost += edge_cost;
618 		  }
619 	      for (edge = node->callers; edge; edge = edge->next_caller)
620 		if (edge->inline_failed
621 		    && account_reference_p (edge->caller, node))
622 		{
623 		  int edge_cost = edge->frequency ();
624 		  int index;
625 
626 		  gcc_assert (edge->caller->definition);
627 		  if (!edge_cost)
628 		    edge_cost = 1;
629 		  gcc_assert (edge_cost > 0);
630 		  index = lto_symtab_encoder_lookup (partition->encoder,
631 						     edge->caller);
632 		  if (index != LCC_NOT_FOUND
633 		      && index < last_visited_node - 1)
634 		    cost -= edge_cost, internal += edge_cost;
635 		  else
636 		    cost += edge_cost;
637 		}
638 	    }
639 	  else
640 	    last_visited_node++;
641 
642 	  /* Compute boundary cost of IPA REF edges and at the same time look into
643 	     variables referenced from current partition and try to add them.  */
644 	  for (j = 0; snode->iterate_reference (j, ref); j++)
645 	    if (!account_reference_p (snode, ref->referred))
646 	      ;
647 	    else if (is_a <varpool_node *> (ref->referred))
648 	      {
649 		int index;
650 
651 		vnode = dyn_cast <varpool_node *> (ref->referred);
652 		if (!symbol_partitioned_p (vnode)
653 		    && !vnode->no_reorder
654 		    && vnode->get_partitioning_class () == SYMBOL_PARTITION)
655 		  add_symbol_to_partition (partition, vnode);
656 		index = lto_symtab_encoder_lookup (partition->encoder,
657 						   vnode);
658 		if (index != LCC_NOT_FOUND
659 		    && index < last_visited_node - 1)
660 		  cost--, internal++;
661 		else
662 		  cost++;
663 	      }
664 	    else
665 	      {
666 		int index;
667 
668 		node = dyn_cast <cgraph_node *> (ref->referred);
669 		index = lto_symtab_encoder_lookup (partition->encoder,
670 						   node);
671 		if (index != LCC_NOT_FOUND
672 		    && index < last_visited_node - 1)
673 		  cost--, internal++;
674 		else
675 		  cost++;
676 	      }
677 	  for (j = 0; snode->iterate_referring (j, ref); j++)
678 	    if (!account_reference_p (ref->referring, snode))
679 	      ;
680 	    else if (is_a <varpool_node *> (ref->referring))
681 	      {
682 		int index;
683 
684 		vnode = dyn_cast <varpool_node *> (ref->referring);
685 		gcc_assert (vnode->definition);
686 		/* It is better to couple variables with their users,
687 		   because it allows them to be removed.  Coupling
688 		   with objects they refer to only helps to reduce
689 		   number of symbols promoted to hidden.  */
690 		if (!symbol_partitioned_p (vnode)
691 		    && !vnode->no_reorder
692 		    && !vnode->can_remove_if_no_refs_p ()
693 		    && vnode->get_partitioning_class () == SYMBOL_PARTITION)
694 		  add_symbol_to_partition (partition, vnode);
695 		index = lto_symtab_encoder_lookup (partition->encoder,
696 						   vnode);
697 		if (index != LCC_NOT_FOUND
698 		    && index < last_visited_node - 1)
699 		  cost--, internal++;
700 		else
701 		  cost++;
702 	      }
703 	    else
704 	      {
705 		int index;
706 
707 		node = dyn_cast <cgraph_node *> (ref->referring);
708 		gcc_assert (node->definition || node->declare_variant_alt);
709 		index = lto_symtab_encoder_lookup (partition->encoder,
710 						   node);
711 		if (index != LCC_NOT_FOUND
712 		    && index < last_visited_node - 1)
713 		  cost--, internal++;
714 		else
715 		  cost++;
716 	      }
717 	}
718 
719       gcc_assert (cost >= 0 && internal >= 0);
720 
721       /* If the partition is large enough, start looking for smallest boundary cost.
722          If partition still seems too small (less than 7/8 of target weight) accept
723 	 any cost.  If partition has right size, optimize for highest internal/cost.
724 	 Later we stop building partition if its size is 9/8 of the target wight.  */
725       if (partition->insns < partition_size * 7 / 8
726 	  || best_cost == -1
727 	  || (!cost
728 	      || ((sreal)best_internal * (sreal) cost
729 		  < ((sreal) internal * (sreal)best_cost))))
730 	{
731 	  best_cost = cost;
732 	  best_internal = internal;
733 	  best_size = partition->insns;
734 	  best_i = i;
735 	  best_n_nodes = lto_symtab_encoder_size (partition->encoder);
736 	  best_varpool_pos = varpool_pos;
737 	  best_noreorder_pos = noreorder_pos;
738 	}
739       if (dump_file)
740 	fprintf (dump_file, "Step %i: added %s, size %i, "
741 		 "cost %" PRId64 "/%" PRId64 " "
742 		 "best %" PRId64 "/%" PRId64", step %i\n", i,
743 		 order[i]->dump_name (),
744 		 partition->insns, cost, internal,
745 		 best_cost, best_internal, best_i);
746       /* Partition is too large, unwind into step when best cost was reached and
747 	 start new partition.  */
748       if (partition->insns > 9 * partition_size / 8
749 	  || partition->insns > max_partition_size)
750 	{
751 	  if (best_i != i)
752 	    {
753 	      if (dump_file)
754 		fprintf (dump_file, "Unwinding %i insertions to step %i\n",
755 			 i - best_i, best_i);
756 	      undo_partition (partition, best_n_nodes);
757 	      varpool_pos = best_varpool_pos;
758 	      noreorder_pos = best_noreorder_pos;
759 	    }
760 	  gcc_assert (best_size == partition->insns);
761 	  i = best_i;
762 	  if (dump_file)
763 	    fprintf (dump_file,
764 		     "Partition insns: %i (want %" PRId64 ")\n",
765 		     partition->insns, partition_size);
766  	  /* When we are finished, avoid creating empty partition.  */
767 	  while (i < order.length () - 1 && symbol_partitioned_p (order[i + 1]))
768 	    i++;
769 	  if (i == order.length () - 1)
770 	    break;
771 	  total_size -= partition->insns;
772 	  partition = new_partition ("");
773 	  last_visited_node = 0;
774 	  cost = 0;
775 
776 	  if (dump_file)
777 	    fprintf (dump_file, "New partition\n");
778 	  best_n_nodes = 0;
779 	  best_cost = -1;
780 
781 	  /* Since the size of partitions is just approximate, update the size after
782 	     we finished current one.  */
783 	  if (npartitions < n_lto_partitions)
784 	    partition_size = total_size / (n_lto_partitions - npartitions);
785 	  else
786 	    /* Watch for overflow.  */
787 	    partition_size = INT_MAX / 16;
788 
789 	  if (dump_file)
790 	    fprintf (dump_file,
791 		     "Total size: %" PRId64 " partition_size: %" PRId64 "\n",
792 		     total_size, partition_size);
793 	  if (partition_size < param_min_partition_size)
794 	    partition_size = param_min_partition_size;
795 	  npartitions ++;
796 	}
797     }
798 
799   next_nodes.truncate (0);
800 
801   /* Varables that are not reachable from the code go into last partition.  */
802   FOR_EACH_VARIABLE (vnode)
803     if (vnode->get_partitioning_class () == SYMBOL_PARTITION
804 	&& !symbol_partitioned_p (vnode))
805       next_nodes.safe_push (vnode);
806 
807   /* Output remaining ordered symbols.  */
808   while (varpool_pos < n_varpool_nodes)
809     next_nodes.safe_push (varpool_order[varpool_pos++]);
810   while (noreorder_pos < (int)noreorder.length ())
811     next_nodes.safe_push (noreorder[noreorder_pos++]);
812   /* For one partition the cost of boundary should be 0 unless we added final
813      symbols here (these are not accounted) or we have accounting bug.  */
814   gcc_assert (next_nodes.length () || npartitions != 1 || !best_cost || best_cost == -1);
815   add_sorted_nodes (next_nodes, partition);
816 
817   if (dump_file)
818     {
819       fprintf (dump_file, "\nPartition sizes:\n");
820       unsigned partitions = ltrans_partitions.length ();
821 
822       for (unsigned i = 0; i < partitions ; i++)
823 	{
824 	  ltrans_partition p = ltrans_partitions[i];
825 	  fprintf (dump_file, "partition %d contains %d (%2.2f%%)"
826 		   " symbols and %d (%2.2f%%) insns\n", i, p->symbols,
827 		   100.0 * p->symbols / order.length (), p->insns,
828 		   100.0 * p->insns / original_total_size);
829 	}
830 
831       fprintf (dump_file, "\n");
832     }
833 }
834 
835 /* Return true if we must not change the name of the NODE.  The name as
836    extracted from the corresponding decl should be passed in NAME.  */
837 
838 static bool
must_not_rename(symtab_node * node,const char * name)839 must_not_rename (symtab_node *node, const char *name)
840 {
841   /* Our renaming machinery do not handle more than one change of assembler name.
842      We should not need more than one anyway.  */
843   if (node->lto_file_data
844       && lto_get_decl_name_mapping (node->lto_file_data, name) != name)
845     {
846       if (dump_file)
847 	fprintf (dump_file,
848 		 "Not privatizing symbol name: %s. It privatized already.\n",
849 		 name);
850       return true;
851     }
852   /* Avoid mangling of already mangled clones.
853      ???  should have a flag whether a symbol has a 'private' name already,
854      since we produce some symbols like that i.e. for global constructors
855      that are not really clones.
856      ???  it is what unique_name means.  We only need to set it when doing
857      private symbols.  */
858   if (node->unique_name)
859     {
860       if (dump_file)
861 	fprintf (dump_file,
862 		 "Not privatizing symbol name: %s. Has unique name.\n",
863 		 name);
864       return true;
865     }
866   return false;
867 }
868 
869 /* If we are an offload compiler, we may have to rewrite symbols to be
870    valid on this target.  Return either PTR or a modified version of it.  */
871 
872 static const char *
maybe_rewrite_identifier(const char * ptr)873 maybe_rewrite_identifier (const char *ptr)
874 {
875 #if defined ACCEL_COMPILER && (defined NO_DOT_IN_LABEL || defined NO_DOLLAR_IN_LABEL)
876 #ifndef NO_DOT_IN_LABEL
877   char valid = '.';
878   const char reject[] = "$";
879 #elif !defined NO_DOLLAR_IN_LABEL
880   char valid = '$';
881   const char reject[] = ".";
882 #else
883   char valid = '_';
884   const char reject[] = ".$";
885 #endif
886 
887   char *copy = NULL;
888   const char *match = ptr;
889   for (;;)
890     {
891       size_t off = strcspn (match, reject);
892       if (match[off] == '\0')
893 	break;
894       if (copy == NULL)
895 	{
896 	  copy = xstrdup (ptr);
897 	  match = copy;
898 	}
899       copy[off] = valid;
900     }
901   return match;
902 #else
903   return ptr;
904 #endif
905 }
906 
907 /* Ensure that the symbol in NODE is valid for the target, and if not,
908    rewrite it.  */
909 
910 static void
validize_symbol_for_target(symtab_node * node)911 validize_symbol_for_target (symtab_node *node)
912 {
913   tree decl = node->decl;
914   const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
915 
916   if (must_not_rename (node, name))
917     return;
918 
919   const char *name2 = maybe_rewrite_identifier (name);
920   if (name2 != name)
921     {
922       symtab->change_decl_assembler_name (decl, get_identifier (name2));
923       if (node->lto_file_data)
924 	lto_record_renamed_decl (node->lto_file_data, name,
925 				 IDENTIFIER_POINTER
926 				 (DECL_ASSEMBLER_NAME (decl)));
927     }
928 }
929 
930 /* Maps symbol names to unique lto clone counters.  */
931 static hash_map<const char *, unsigned> *lto_clone_numbers;
932 
933 /* Helper for privatize_symbol_name.  Mangle NODE symbol name
934    represented by DECL.  */
935 
936 static bool
privatize_symbol_name_1(symtab_node * node,tree decl)937 privatize_symbol_name_1 (symtab_node *node, tree decl)
938 {
939   const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
940 
941   if (must_not_rename (node, name))
942     return false;
943 
944   name = maybe_rewrite_identifier (name);
945   unsigned &clone_number = lto_clone_numbers->get_or_insert (name);
946   symtab->change_decl_assembler_name (decl,
947 				      clone_function_name (
948 					  name, "lto_priv", clone_number));
949   clone_number++;
950 
951   if (node->lto_file_data)
952     lto_record_renamed_decl (node->lto_file_data, name,
953 			     IDENTIFIER_POINTER
954 			     (DECL_ASSEMBLER_NAME (decl)));
955 
956   if (dump_file)
957     fprintf (dump_file,
958 	     "Privatizing symbol name: %s -> %s\n",
959 	     name, IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)));
960 
961   return true;
962 }
963 
964 /* Mangle NODE symbol name into a local name.
965    This is necessary to do
966    1) if two or more static vars of same assembler name
967       are merged into single ltrans unit.
968    2) if previously static var was promoted hidden to avoid possible conflict
969       with symbols defined out of the LTO world.  */
970 
971 static bool
privatize_symbol_name(symtab_node * node)972 privatize_symbol_name (symtab_node *node)
973 {
974   if (!privatize_symbol_name_1 (node, node->decl))
975     return false;
976 
977   return true;
978 }
979 
980 /* Promote variable VNODE to be static.  */
981 
982 static void
promote_symbol(symtab_node * node)983 promote_symbol (symtab_node *node)
984 {
985   /* We already promoted ... */
986   if (DECL_VISIBILITY (node->decl) == VISIBILITY_HIDDEN
987       && DECL_VISIBILITY_SPECIFIED (node->decl)
988       && TREE_PUBLIC (node->decl))
989     {
990       validize_symbol_for_target (node);
991       return;
992     }
993 
994   gcc_checking_assert (!TREE_PUBLIC (node->decl)
995 		       && !DECL_EXTERNAL (node->decl));
996   /* Be sure that newly public symbol does not conflict with anything already
997      defined by the non-LTO part.  */
998   privatize_symbol_name (node);
999   TREE_PUBLIC (node->decl) = 1;
1000   /* After privatization the node should not conflict with any other symbol,
1001      so it is prevailing.  This is important to keep binds_to_current_def_p
1002      to work across partitions.  */
1003   node->resolution = LDPR_PREVAILING_DEF_IRONLY;
1004   node->semantic_interposition = false;
1005   DECL_VISIBILITY (node->decl) = VISIBILITY_HIDDEN;
1006   DECL_VISIBILITY_SPECIFIED (node->decl) = true;
1007   if (dump_file)
1008     fprintf (dump_file,
1009 	     "Promoting as hidden: %s (%s)\n", node->dump_name (),
1010 	     IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (node->decl)));
1011 
1012   /* Promoting a symbol also promotes all transparent aliases with exception
1013      of weakref where the visibility flags are always wrong and set to
1014      !PUBLIC.  */
1015   ipa_ref *ref;
1016   for (unsigned i = 0; node->iterate_direct_aliases (i, ref); i++)
1017     {
1018       struct symtab_node *alias = ref->referring;
1019       if (alias->transparent_alias && !alias->weakref)
1020 	{
1021 	  TREE_PUBLIC (alias->decl) = 1;
1022 	  DECL_VISIBILITY (alias->decl) = VISIBILITY_HIDDEN;
1023 	  DECL_VISIBILITY_SPECIFIED (alias->decl) = true;
1024 	  if (dump_file)
1025 	    fprintf (dump_file,
1026 		     "Promoting alias as hidden: %s\n",
1027 		     IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (node->decl)));
1028 	}
1029       gcc_assert (!alias->weakref || TREE_PUBLIC (alias->decl));
1030     }
1031 }
1032 
1033 /* Return true if NODE needs named section even if it won't land in
1034    the partition symbol table.
1035 
1036    FIXME: we should really not use named sections for inline clones
1037    and master clones.  */
1038 
1039 static bool
may_need_named_section_p(lto_symtab_encoder_t encoder,symtab_node * node)1040 may_need_named_section_p (lto_symtab_encoder_t encoder, symtab_node *node)
1041 {
1042   struct cgraph_node *cnode = dyn_cast <cgraph_node *> (node);
1043   if (!cnode)
1044     return false;
1045   if (node->real_symbol_p ())
1046     return false;
1047   return (!encoder
1048 	  || (lto_symtab_encoder_lookup (encoder, node) != LCC_NOT_FOUND
1049               && lto_symtab_encoder_encode_body_p (encoder,
1050 				                   cnode)));
1051 }
1052 
1053 /* If NODE represents a static variable.  See if there are other variables
1054    of the same name in partition ENCODER (or in whole compilation unit if
1055    ENCODER is NULL) and if so, mangle the statics.  Always mangle all
1056    conflicting statics, so we reduce changes of silently miscompiling
1057    asm statements referring to them by symbol name.  */
1058 
1059 static void
rename_statics(lto_symtab_encoder_t encoder,symtab_node * node)1060 rename_statics (lto_symtab_encoder_t encoder, symtab_node *node)
1061 {
1062   tree decl = node->decl;
1063   symtab_node *s;
1064   tree name = DECL_ASSEMBLER_NAME (decl);
1065 
1066   /* See if this is static symbol. */
1067   if (((node->externally_visible && !node->weakref)
1068       /* FIXME: externally_visible is somewhat illogically not set for
1069 	 external symbols (i.e. those not defined).  Remove this test
1070 	 once this is fixed.  */
1071         || DECL_EXTERNAL (node->decl)
1072 	|| !node->real_symbol_p ())
1073        && !may_need_named_section_p (encoder, node))
1074     return;
1075 
1076   /* Now walk symbols sharing the same name and see if there are any conflicts.
1077      (all types of symbols counts here, since we cannot have static of the
1078      same name as external or public symbol.)  */
1079   for (s = symtab_node::get_for_asmname (name);
1080        s; s = s->next_sharing_asm_name)
1081     if ((s->real_symbol_p () || may_need_named_section_p (encoder, s))
1082 	&& s->decl != node->decl
1083 	&& (!encoder
1084 	    || lto_symtab_encoder_lookup (encoder, s) != LCC_NOT_FOUND))
1085        break;
1086 
1087   /* OK, no confict, so we have nothing to do.  */
1088   if (!s)
1089     return;
1090 
1091   if (dump_file)
1092     fprintf (dump_file,
1093 	    "Renaming statics with asm name: %s\n", node->dump_name ());
1094 
1095   /* Assign every symbol in the set that shares the same ASM name an unique
1096      mangled name.  */
1097   for (s = symtab_node::get_for_asmname (name); s;)
1098     if ((!s->externally_visible || s->weakref)
1099 	/* Transparent aliases having same name as target are renamed at a
1100 	   time their target gets new name.  Transparent aliases that use
1101 	   separate assembler name require the name to be unique.  */
1102 	&& (!s->transparent_alias || !s->definition || s->weakref
1103 	    || !symbol_table::assembler_names_equal_p
1104 		 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (s->decl)),
1105 		  IDENTIFIER_POINTER
1106 		    (DECL_ASSEMBLER_NAME (s->get_alias_target()->decl))))
1107 	&& ((s->real_symbol_p ()
1108              && !DECL_EXTERNAL (s->decl)
1109 	     && !TREE_PUBLIC (s->decl))
1110  	    || may_need_named_section_p (encoder, s))
1111 	&& (!encoder
1112 	    || lto_symtab_encoder_lookup (encoder, s) != LCC_NOT_FOUND))
1113       {
1114         if (privatize_symbol_name (s))
1115 	  /* Re-start from beginning since we do not know how many
1116 	     symbols changed a name.  */
1117 	  s = symtab_node::get_for_asmname (name);
1118         else s = s->next_sharing_asm_name;
1119       }
1120     else s = s->next_sharing_asm_name;
1121 }
1122 
1123 /* Find out all static decls that need to be promoted to global because
1124    of cross file sharing.  This function must be run in the WPA mode after
1125    all inlinees are added.  */
1126 
1127 void
lto_promote_cross_file_statics(void)1128 lto_promote_cross_file_statics (void)
1129 {
1130   unsigned i, n_sets;
1131 
1132   gcc_assert (flag_wpa);
1133 
1134   lto_stream_offload_p = false;
1135   select_what_to_stream ();
1136 
1137   /* First compute boundaries.  */
1138   n_sets = ltrans_partitions.length ();
1139   for (i = 0; i < n_sets; i++)
1140     {
1141       ltrans_partition part
1142 	= ltrans_partitions[i];
1143       part->encoder = compute_ltrans_boundary (part->encoder);
1144     }
1145 
1146   lto_clone_numbers = new hash_map<const char *, unsigned>;
1147 
1148   /* Look at boundaries and promote symbols as needed.  */
1149   for (i = 0; i < n_sets; i++)
1150     {
1151       lto_symtab_encoder_iterator lsei;
1152       lto_symtab_encoder_t encoder = ltrans_partitions[i]->encoder;
1153 
1154       for (lsei = lsei_start (encoder); !lsei_end_p (lsei);
1155 	   lsei_next (&lsei))
1156         {
1157           symtab_node *node = lsei_node (lsei);
1158 
1159 	  /* If symbol is static, rename it if its assembler name
1160 	     clashes with anything else in this unit.  */
1161 	  rename_statics (encoder, node);
1162 
1163 	  /* No need to promote if symbol already is externally visible ... */
1164 	  if (node->externally_visible
1165  	      /* ... or if it is part of current partition ... */
1166 	      || lto_symtab_encoder_in_partition_p (encoder, node)
1167 	      /* ... or if we do not partition it. This mean that it will
1168 		 appear in every partition referencing it.  */
1169 	      || node->get_partitioning_class () != SYMBOL_PARTITION)
1170 	    {
1171 	      validize_symbol_for_target (node);
1172 	      continue;
1173 	    }
1174 
1175           promote_symbol (node);
1176         }
1177     }
1178   delete lto_clone_numbers;
1179 }
1180 
1181 /* Rename statics in the whole unit in the case that
1182    we do -flto-partition=none.  */
1183 
1184 void
lto_promote_statics_nonwpa(void)1185 lto_promote_statics_nonwpa (void)
1186 {
1187   symtab_node *node;
1188 
1189   lto_clone_numbers = new hash_map<const char *, unsigned>;
1190   FOR_EACH_SYMBOL (node)
1191     {
1192       rename_statics (NULL, node);
1193       validize_symbol_for_target (node);
1194     }
1195   delete lto_clone_numbers;
1196 }
1197