xref: /freebsd/contrib/llvm-project/lld/MachO/ICF.cpp (revision 716fd348)
1 //===- ICF.cpp ------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ICF.h"
10 #include "ConcatOutputSection.h"
11 #include "InputSection.h"
12 #include "Symbols.h"
13 #include "UnwindInfoSection.h"
14 
15 #include "llvm/Support/Parallel.h"
16 #include "llvm/Support/TimeProfiler.h"
17 
18 #include <atomic>
19 
20 using namespace llvm;
21 using namespace lld;
22 using namespace lld::macho;
23 
24 class ICF {
25 public:
26   ICF(std::vector<ConcatInputSection *> &inputs);
27 
28   void run();
29   void segregate(size_t begin, size_t end,
30                  std::function<bool(const ConcatInputSection *,
31                                     const ConcatInputSection *)>
32                      equals);
33   size_t findBoundary(size_t begin, size_t end);
34   void forEachClassRange(size_t begin, size_t end,
35                          std::function<void(size_t, size_t)> func);
36   void forEachClass(std::function<void(size_t, size_t)> func);
37 
38   // ICF needs a copy of the inputs vector because its equivalence-class
39   // segregation algorithm destroys the proper sequence.
40   std::vector<ConcatInputSection *> icfInputs;
41 };
42 
43 ICF::ICF(std::vector<ConcatInputSection *> &inputs) {
44   icfInputs.assign(inputs.begin(), inputs.end());
45 }
46 
47 // ICF = Identical Code Folding
48 //
49 // We only fold __TEXT,__text, so this is really "code" folding, and not
50 // "COMDAT" folding. String and scalar constant literals are deduplicated
51 // elsewhere.
52 //
53 // Summary of segments & sections:
54 //
55 // The __TEXT segment is readonly at the MMU. Some sections are already
56 // deduplicated elsewhere (__TEXT,__cstring & __TEXT,__literal*) and some are
57 // synthetic and inherently free of duplicates (__TEXT,__stubs &
58 // __TEXT,__unwind_info). Note that we don't yet run ICF on __TEXT,__const,
59 // because doing so induces many test failures.
60 //
61 // The __LINKEDIT segment is readonly at the MMU, yet entirely synthetic, and
62 // thus ineligible for ICF.
63 //
64 // The __DATA_CONST segment is read/write at the MMU, but is logically const to
65 // the application after dyld applies fixups to pointer data. We currently
66 // fold only the __DATA_CONST,__cfstring section.
67 //
68 // The __DATA segment is read/write at the MMU, and as application-writeable
69 // data, none of its sections are eligible for ICF.
70 //
71 // Please see the large block comment in lld/ELF/ICF.cpp for an explanation
72 // of the segregation algorithm.
73 //
74 // FIXME(gkm): implement keep-unique attributes
75 // FIXME(gkm): implement address-significance tables for MachO object files
76 
77 static unsigned icfPass = 0;
78 static std::atomic<bool> icfRepeat{false};
79 
80 // Compare "non-moving" parts of two ConcatInputSections, namely everything
81 // except references to other ConcatInputSections.
82 static bool equalsConstant(const ConcatInputSection *ia,
83                            const ConcatInputSection *ib) {
84   // We can only fold within the same OutputSection.
85   if (ia->parent != ib->parent)
86     return false;
87   if (ia->data.size() != ib->data.size())
88     return false;
89   if (ia->data != ib->data)
90     return false;
91   if (ia->relocs.size() != ib->relocs.size())
92     return false;
93   auto f = [](const Reloc &ra, const Reloc &rb) {
94     if (ra.type != rb.type)
95       return false;
96     if (ra.pcrel != rb.pcrel)
97       return false;
98     if (ra.length != rb.length)
99       return false;
100     if (ra.offset != rb.offset)
101       return false;
102     if (ra.addend != rb.addend)
103       return false;
104     if (ra.referent.is<Symbol *>() != rb.referent.is<Symbol *>())
105       return false;
106 
107     InputSection *isecA, *isecB;
108 
109     uint64_t valueA = 0;
110     uint64_t valueB = 0;
111     if (ra.referent.is<Symbol *>()) {
112       const auto *sa = ra.referent.get<Symbol *>();
113       const auto *sb = rb.referent.get<Symbol *>();
114       if (sa->kind() != sb->kind())
115         return false;
116       if (!isa<Defined>(sa)) {
117         // ICF runs before Undefineds are reported.
118         assert(isa<DylibSymbol>(sa) || isa<Undefined>(sa));
119         return sa == sb;
120       }
121       const auto *da = cast<Defined>(sa);
122       const auto *db = cast<Defined>(sb);
123       if (!da->isec || !db->isec) {
124         assert(da->isAbsolute() && db->isAbsolute());
125         return da->value == db->value;
126       }
127       isecA = da->isec;
128       valueA = da->value;
129       isecB = db->isec;
130       valueB = db->value;
131     } else {
132       isecA = ra.referent.get<InputSection *>();
133       isecB = rb.referent.get<InputSection *>();
134     }
135 
136     if (isecA->parent != isecB->parent)
137       return false;
138     // Sections with identical parents should be of the same kind.
139     assert(isecA->kind() == isecB->kind());
140     // We will compare ConcatInputSection contents in equalsVariable.
141     if (isa<ConcatInputSection>(isecA))
142       return true;
143     // Else we have two literal sections. References to them are equal iff their
144     // offsets in the output section are equal.
145     return isecA->getOffset(valueA + ra.addend) ==
146            isecB->getOffset(valueB + rb.addend);
147   };
148   return std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(),
149                     f);
150 }
151 
152 // Compare the "moving" parts of two ConcatInputSections -- i.e. everything not
153 // handled by equalsConstant().
154 static bool equalsVariable(const ConcatInputSection *ia,
155                            const ConcatInputSection *ib) {
156   assert(ia->relocs.size() == ib->relocs.size());
157   auto f = [](const Reloc &ra, const Reloc &rb) {
158     // We already filtered out mismatching values/addends in equalsConstant.
159     if (ra.referent == rb.referent)
160       return true;
161     const ConcatInputSection *isecA, *isecB;
162     if (ra.referent.is<Symbol *>()) {
163       // Matching DylibSymbols are already filtered out by the
164       // identical-referent check above. Non-matching DylibSymbols were filtered
165       // out in equalsConstant(). So we can safely cast to Defined here.
166       const auto *da = cast<Defined>(ra.referent.get<Symbol *>());
167       const auto *db = cast<Defined>(rb.referent.get<Symbol *>());
168       if (da->isAbsolute())
169         return true;
170       isecA = dyn_cast<ConcatInputSection>(da->isec);
171       if (!isecA)
172         return true; // literal sections were checked in equalsConstant.
173       isecB = cast<ConcatInputSection>(db->isec);
174     } else {
175       const auto *sa = ra.referent.get<InputSection *>();
176       const auto *sb = rb.referent.get<InputSection *>();
177       isecA = dyn_cast<ConcatInputSection>(sa);
178       if (!isecA)
179         return true;
180       isecB = cast<ConcatInputSection>(sb);
181     }
182     return isecA->icfEqClass[icfPass % 2] == isecB->icfEqClass[icfPass % 2];
183   };
184   if (!std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(), f))
185     return false;
186 
187   // If there are symbols with associated unwind info, check that the unwind
188   // info matches. For simplicity, we only handle the case where there are only
189   // symbols at offset zero within the section (which is typically the case with
190   // .subsections_via_symbols.)
191   auto hasCU = [](Defined *d) { return d->unwindEntry != nullptr; };
192   auto itA = std::find_if(ia->symbols.begin(), ia->symbols.end(), hasCU);
193   auto itB = std::find_if(ib->symbols.begin(), ib->symbols.end(), hasCU);
194   if (itA == ia->symbols.end())
195     return itB == ib->symbols.end();
196   if (itB == ib->symbols.end())
197     return false;
198   const Defined *da = *itA;
199   const Defined *db = *itB;
200   if (da->unwindEntry->icfEqClass[icfPass % 2] !=
201           db->unwindEntry->icfEqClass[icfPass % 2] ||
202       da->value != 0 || db->value != 0)
203     return false;
204   auto isZero = [](Defined *d) { return d->value == 0; };
205   return std::find_if_not(std::next(itA), ia->symbols.end(), isZero) ==
206              ia->symbols.end() &&
207          std::find_if_not(std::next(itB), ib->symbols.end(), isZero) ==
208              ib->symbols.end();
209 }
210 
211 // Find the first InputSection after BEGIN whose equivalence class differs
212 size_t ICF::findBoundary(size_t begin, size_t end) {
213   uint64_t beginHash = icfInputs[begin]->icfEqClass[icfPass % 2];
214   for (size_t i = begin + 1; i < end; ++i)
215     if (beginHash != icfInputs[i]->icfEqClass[icfPass % 2])
216       return i;
217   return end;
218 }
219 
220 // Invoke FUNC on subranges with matching equivalence class
221 void ICF::forEachClassRange(size_t begin, size_t end,
222                             std::function<void(size_t, size_t)> func) {
223   while (begin < end) {
224     size_t mid = findBoundary(begin, end);
225     func(begin, mid);
226     begin = mid;
227   }
228 }
229 
230 // Split icfInputs into shards, then parallelize invocation of FUNC on subranges
231 // with matching equivalence class
232 void ICF::forEachClass(std::function<void(size_t, size_t)> func) {
233   // Only use threads when the benefits outweigh the overhead.
234   const size_t threadingThreshold = 1024;
235   if (icfInputs.size() < threadingThreshold) {
236     forEachClassRange(0, icfInputs.size(), func);
237     ++icfPass;
238     return;
239   }
240 
241   // Shard into non-overlapping intervals, and call FUNC in parallel.  The
242   // sharding must be completed before any calls to FUNC are made so that FUNC
243   // can modify the InputSection in its shard without causing data races.
244   const size_t shards = 256;
245   size_t step = icfInputs.size() / shards;
246   size_t boundaries[shards + 1];
247   boundaries[0] = 0;
248   boundaries[shards] = icfInputs.size();
249   parallelForEachN(1, shards, [&](size_t i) {
250     boundaries[i] = findBoundary((i - 1) * step, icfInputs.size());
251   });
252   parallelForEachN(1, shards + 1, [&](size_t i) {
253     if (boundaries[i - 1] < boundaries[i]) {
254       forEachClassRange(boundaries[i - 1], boundaries[i], func);
255     }
256   });
257   ++icfPass;
258 }
259 
260 void ICF::run() {
261   // Into each origin-section hash, combine all reloc referent section hashes.
262   for (icfPass = 0; icfPass < 2; ++icfPass) {
263     parallelForEach(icfInputs, [&](ConcatInputSection *isec) {
264       uint64_t hash = isec->icfEqClass[icfPass % 2];
265       for (const Reloc &r : isec->relocs) {
266         if (auto *sym = r.referent.dyn_cast<Symbol *>()) {
267           if (auto *dylibSym = dyn_cast<DylibSymbol>(sym))
268             hash += dylibSym->stubsHelperIndex;
269           else if (auto *defined = dyn_cast<Defined>(sym)) {
270             if (defined->isec) {
271               if (auto isec = dyn_cast<ConcatInputSection>(defined->isec))
272                 hash += defined->value + isec->icfEqClass[icfPass % 2];
273               else
274                 hash += defined->isec->kind() +
275                         defined->isec->getOffset(defined->value);
276             } else {
277               hash += defined->value;
278             }
279           } else if (!isa<Undefined>(sym)) // ICF runs before Undefined diags.
280             llvm_unreachable("foldIdenticalSections symbol kind");
281         }
282       }
283       // Set MSB to 1 to avoid collisions with non-hashed classes.
284       isec->icfEqClass[(icfPass + 1) % 2] = hash | (1ull << 63);
285     });
286   }
287 
288   llvm::stable_sort(
289       icfInputs, [](const ConcatInputSection *a, const ConcatInputSection *b) {
290         return a->icfEqClass[0] < b->icfEqClass[0];
291       });
292   forEachClass(
293       [&](size_t begin, size_t end) { segregate(begin, end, equalsConstant); });
294 
295   // Split equivalence groups by comparing relocations until convergence
296   do {
297     icfRepeat = false;
298     forEachClass([&](size_t begin, size_t end) {
299       segregate(begin, end, equalsVariable);
300     });
301   } while (icfRepeat);
302   log("ICF needed " + Twine(icfPass) + " iterations");
303 
304   // Fold sections within equivalence classes
305   forEachClass([&](size_t begin, size_t end) {
306     if (end - begin < 2)
307       return;
308     ConcatInputSection *beginIsec = icfInputs[begin];
309     for (size_t i = begin + 1; i < end; ++i)
310       beginIsec->foldIdentical(icfInputs[i]);
311   });
312 }
313 
314 // Split an equivalence class into smaller classes.
315 void ICF::segregate(
316     size_t begin, size_t end,
317     std::function<bool(const ConcatInputSection *, const ConcatInputSection *)>
318         equals) {
319   while (begin < end) {
320     // Divide [begin, end) into two. Let mid be the start index of the
321     // second group.
322     auto bound = std::stable_partition(icfInputs.begin() + begin + 1,
323                                        icfInputs.begin() + end,
324                                        [&](ConcatInputSection *isec) {
325                                          return equals(icfInputs[begin], isec);
326                                        });
327     size_t mid = bound - icfInputs.begin();
328 
329     // Split [begin, end) into [begin, mid) and [mid, end). We use mid as an
330     // equivalence class ID because every group ends with a unique index.
331     for (size_t i = begin; i < mid; ++i)
332       icfInputs[i]->icfEqClass[(icfPass + 1) % 2] = mid;
333 
334     // If we created a group, we need to iterate the main loop again.
335     if (mid != end)
336       icfRepeat = true;
337 
338     begin = mid;
339   }
340 }
341 
342 void macho::foldIdenticalSections() {
343   TimeTraceScope timeScope("Fold Identical Code Sections");
344   // The ICF equivalence-class segregation algorithm relies on pre-computed
345   // hashes of InputSection::data for the ConcatOutputSection::inputs and all
346   // sections referenced by their relocs. We could recursively traverse the
347   // relocs to find every referenced InputSection, but that precludes easy
348   // parallelization. Therefore, we hash every InputSection here where we have
349   // them all accessible as simple vectors.
350 
351   // If an InputSection is ineligible for ICF, we give it a unique ID to force
352   // it into an unfoldable singleton equivalence class.  Begin the unique-ID
353   // space at inputSections.size(), so that it will never intersect with
354   // equivalence-class IDs which begin at 0. Since hashes & unique IDs never
355   // coexist with equivalence-class IDs, this is not necessary, but might help
356   // someone keep the numbers straight in case we ever need to debug the
357   // ICF::segregate()
358   std::vector<ConcatInputSection *> hashable;
359   uint64_t icfUniqueID = inputSections.size();
360   for (ConcatInputSection *isec : inputSections) {
361     // FIXME: consider non-code __text sections as hashable?
362     bool isHashable = (isCodeSection(isec) || isCfStringSection(isec)) &&
363                       !isec->shouldOmitFromOutput() && isec->isHashableForICF();
364     if (isHashable) {
365       hashable.push_back(isec);
366       for (Defined *d : isec->symbols)
367         if (d->unwindEntry)
368           hashable.push_back(d->unwindEntry);
369     } else {
370       isec->icfEqClass[0] = ++icfUniqueID;
371     }
372   }
373   parallelForEach(hashable,
374                   [](ConcatInputSection *isec) { isec->hashForICF(); });
375   // Now that every input section is either hashed or marked as unique, run the
376   // segregation algorithm to detect foldable subsections.
377   ICF(hashable).run();
378 }
379