1 //===- DylibReader.cpp -------------- TAPI MachO Dylib Reader --*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// Implements the TAPI Reader for Mach-O dynamic libraries.
10 ///
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/TextAPI/DylibReader.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/ADT/StringMap.h"
16 #include "llvm/Object/Binary.h"
17 #include "llvm/Object/MachOUniversal.h"
18 #include "llvm/Support/Endian.h"
19 #include "llvm/TargetParser/Triple.h"
20 #include "llvm/TextAPI/RecordsSlice.h"
21 #include "llvm/TextAPI/TextAPIError.h"
22 #include <iomanip>
23 #include <set>
24 #include <sstream>
25 #include <string>
26 #include <tuple>
27 
28 using namespace llvm;
29 using namespace llvm::object;
30 using namespace llvm::MachO;
31 using namespace llvm::MachO::DylibReader;
32 
33 using TripleVec = std::vector<Triple>;
34 static typename TripleVec::iterator emplace(TripleVec &Container, Triple &&T) {
35   auto I = partition_point(Container, [=](const Triple &CT) {
36     return std::forward_as_tuple(CT.getArch(), CT.getOS(),
37                                  CT.getEnvironment()) <
38            std::forward_as_tuple(T.getArch(), T.getOS(), T.getEnvironment());
39   });
40 
41   if (I != Container.end() && *I == T)
42     return I;
43   return Container.emplace(I, T);
44 }
45 
46 static TripleVec constructTriples(MachOObjectFile *Obj,
47                                   const Architecture ArchT) {
48   auto getOSVersionStr = [](uint32_t V) {
49     PackedVersion OSVersion(V);
50     std::string Vers;
51     raw_string_ostream VStream(Vers);
52     VStream << OSVersion;
53     return VStream.str();
54   };
55   auto getOSVersion = [&](const MachOObjectFile::LoadCommandInfo &cmd) {
56     auto Vers = Obj->getVersionMinLoadCommand(cmd);
57     return getOSVersionStr(Vers.version);
58   };
59 
60   TripleVec Triples;
61   bool IsIntel = ArchitectureSet(ArchT).hasX86();
62   auto Arch = getArchitectureName(ArchT);
63 
64   for (const auto &cmd : Obj->load_commands()) {
65     std::string OSVersion;
66     switch (cmd.C.cmd) {
67     case MachO::LC_VERSION_MIN_MACOSX:
68       OSVersion = getOSVersion(cmd);
69       emplace(Triples, {Arch, "apple", "macos" + OSVersion});
70       break;
71     case MachO::LC_VERSION_MIN_IPHONEOS:
72       OSVersion = getOSVersion(cmd);
73       if (IsIntel)
74         emplace(Triples, {Arch, "apple", "ios" + OSVersion, "simulator"});
75       else
76         emplace(Triples, {Arch, "apple", "ios" + OSVersion});
77       break;
78     case MachO::LC_VERSION_MIN_TVOS:
79       OSVersion = getOSVersion(cmd);
80       if (IsIntel)
81         emplace(Triples, {Arch, "apple", "tvos" + OSVersion, "simulator"});
82       else
83         emplace(Triples, {Arch, "apple", "tvos" + OSVersion});
84       break;
85     case MachO::LC_VERSION_MIN_WATCHOS:
86       OSVersion = getOSVersion(cmd);
87       if (IsIntel)
88         emplace(Triples, {Arch, "apple", "watchos" + OSVersion, "simulator"});
89       else
90         emplace(Triples, {Arch, "apple", "watchos" + OSVersion});
91       break;
92     case MachO::LC_BUILD_VERSION: {
93       OSVersion = getOSVersionStr(Obj->getBuildVersionLoadCommand(cmd).minos);
94       switch (Obj->getBuildVersionLoadCommand(cmd).platform) {
95       case MachO::PLATFORM_MACOS:
96         emplace(Triples, {Arch, "apple", "macos" + OSVersion});
97         break;
98       case MachO::PLATFORM_IOS:
99         emplace(Triples, {Arch, "apple", "ios" + OSVersion});
100         break;
101       case MachO::PLATFORM_TVOS:
102         emplace(Triples, {Arch, "apple", "tvos" + OSVersion});
103         break;
104       case MachO::PLATFORM_WATCHOS:
105         emplace(Triples, {Arch, "apple", "watchos" + OSVersion});
106         break;
107       case MachO::PLATFORM_BRIDGEOS:
108         emplace(Triples, {Arch, "apple", "bridgeos" + OSVersion});
109         break;
110       case MachO::PLATFORM_MACCATALYST:
111         emplace(Triples, {Arch, "apple", "ios" + OSVersion, "macabi"});
112         break;
113       case MachO::PLATFORM_IOSSIMULATOR:
114         emplace(Triples, {Arch, "apple", "ios" + OSVersion, "simulator"});
115         break;
116       case MachO::PLATFORM_TVOSSIMULATOR:
117         emplace(Triples, {Arch, "apple", "tvos" + OSVersion, "simulator"});
118         break;
119       case MachO::PLATFORM_WATCHOSSIMULATOR:
120         emplace(Triples, {Arch, "apple", "watchos" + OSVersion, "simulator"});
121         break;
122       case MachO::PLATFORM_DRIVERKIT:
123         emplace(Triples, {Arch, "apple", "driverkit" + OSVersion});
124         break;
125       default:
126         break; // Skip any others.
127       }
128       break;
129     }
130     default:
131       break;
132     }
133   }
134 
135   // Record unknown platform for older binaries that don't enforce platform
136   // load commands.
137   if (Triples.empty())
138     emplace(Triples, {Arch, "apple", "unknown"});
139 
140   return Triples;
141 }
142 
143 static Error readMachOHeader(MachOObjectFile *Obj, RecordsSlice &Slice) {
144   auto H = Obj->getHeader();
145   auto &BA = Slice.getBinaryAttrs();
146 
147   switch (H.filetype) {
148   default:
149     llvm_unreachable("unsupported binary type");
150   case MachO::MH_DYLIB:
151     BA.File = FileType::MachO_DynamicLibrary;
152     break;
153   case MachO::MH_DYLIB_STUB:
154     BA.File = FileType::MachO_DynamicLibrary_Stub;
155     break;
156   case MachO::MH_BUNDLE:
157     BA.File = FileType::MachO_Bundle;
158     break;
159   }
160 
161   if (H.flags & MachO::MH_TWOLEVEL)
162     BA.TwoLevelNamespace = true;
163   if (H.flags & MachO::MH_APP_EXTENSION_SAFE)
164     BA.AppExtensionSafe = true;
165 
166   for (const auto &LCI : Obj->load_commands()) {
167     switch (LCI.C.cmd) {
168     case MachO::LC_ID_DYLIB: {
169       auto DLLC = Obj->getDylibIDLoadCommand(LCI);
170       BA.InstallName = Slice.copyString(LCI.Ptr + DLLC.dylib.name);
171       BA.CurrentVersion = DLLC.dylib.current_version;
172       BA.CompatVersion = DLLC.dylib.compatibility_version;
173       break;
174     }
175     case MachO::LC_REEXPORT_DYLIB: {
176       auto DLLC = Obj->getDylibIDLoadCommand(LCI);
177       BA.RexportedLibraries.emplace_back(
178           Slice.copyString(LCI.Ptr + DLLC.dylib.name));
179       break;
180     }
181     case MachO::LC_SUB_FRAMEWORK: {
182       auto SFC = Obj->getSubFrameworkCommand(LCI);
183       BA.ParentUmbrella = Slice.copyString(LCI.Ptr + SFC.umbrella);
184       break;
185     }
186     case MachO::LC_SUB_CLIENT: {
187       auto SCLC = Obj->getSubClientCommand(LCI);
188       BA.AllowableClients.emplace_back(Slice.copyString(LCI.Ptr + SCLC.client));
189       break;
190     }
191     case MachO::LC_UUID: {
192       auto UUIDLC = Obj->getUuidCommand(LCI);
193       std::stringstream Stream;
194       for (unsigned I = 0; I < 16; ++I) {
195         if (I == 4 || I == 6 || I == 8 || I == 10)
196           Stream << '-';
197         Stream << std::setfill('0') << std::setw(2) << std::uppercase
198                << std::hex << static_cast<int>(UUIDLC.uuid[I]);
199       }
200       BA.UUID = Slice.copyString(Stream.str());
201       break;
202     }
203     case MachO::LC_RPATH: {
204       auto RPLC = Obj->getRpathCommand(LCI);
205       BA.RPaths.emplace_back(Slice.copyString(LCI.Ptr + RPLC.path));
206       break;
207     }
208     case MachO::LC_SEGMENT_SPLIT_INFO: {
209       auto SSILC = Obj->getLinkeditDataLoadCommand(LCI);
210       if (SSILC.datasize == 0)
211         BA.OSLibNotForSharedCache = true;
212       break;
213     }
214     default:
215       break;
216     }
217   }
218 
219   for (auto &Sect : Obj->sections()) {
220     auto SectName = Sect.getName();
221     if (!SectName)
222       return SectName.takeError();
223     if (*SectName != "__objc_imageinfo" && *SectName != "__image_info")
224       continue;
225 
226     auto Content = Sect.getContents();
227     if (!Content)
228       return Content.takeError();
229 
230     if ((Content->size() >= 8) && (Content->front() == 0)) {
231       uint32_t Flags;
232       if (Obj->isLittleEndian()) {
233         auto *p =
234             reinterpret_cast<const support::ulittle32_t *>(Content->data() + 4);
235         Flags = *p;
236       } else {
237         auto *p =
238             reinterpret_cast<const support::ubig32_t *>(Content->data() + 4);
239         Flags = *p;
240       }
241       BA.SwiftABI = (Flags >> 8) & 0xFF;
242     }
243   }
244   return Error::success();
245 }
246 
247 static Error readSymbols(MachOObjectFile *Obj, RecordsSlice &Slice,
248                          const ParseOption &Opt) {
249 
250   auto parseExport = [](const auto ExportFlags,
251                         auto Addr) -> std::tuple<SymbolFlags, RecordLinkage> {
252     SymbolFlags Flags = SymbolFlags::None;
253     switch (ExportFlags & MachO::EXPORT_SYMBOL_FLAGS_KIND_MASK) {
254     case MachO::EXPORT_SYMBOL_FLAGS_KIND_REGULAR:
255       if (ExportFlags & MachO::EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION)
256         Flags |= SymbolFlags::WeakDefined;
257       break;
258     case MachO::EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL:
259       Flags |= SymbolFlags::ThreadLocalValue;
260       break;
261     }
262 
263     RecordLinkage Linkage = (ExportFlags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT)
264                                 ? RecordLinkage::Rexported
265                                 : RecordLinkage::Exported;
266     return {Flags, Linkage};
267   };
268 
269   Error Err = Error::success();
270 
271   StringMap<std::pair<SymbolFlags, RecordLinkage>> Exports;
272   // Collect symbols from export trie first. Sometimes, there are more exports
273   // in the trie than in n-list due to stripping. This is common for swift
274   // mangled symbols.
275   for (auto &Sym : Obj->exports(Err)) {
276     auto [Flags, Linkage] = parseExport(Sym.flags(), Sym.address());
277     Slice.addRecord(Sym.name(), Flags, GlobalRecord::Kind::Unknown, Linkage);
278     Exports[Sym.name()] = {Flags, Linkage};
279   }
280 
281   for (const auto &Sym : Obj->symbols()) {
282     auto FlagsOrErr = Sym.getFlags();
283     if (!FlagsOrErr)
284       return FlagsOrErr.takeError();
285     auto Flags = *FlagsOrErr;
286 
287     auto NameOrErr = Sym.getName();
288     if (!NameOrErr)
289       return NameOrErr.takeError();
290     auto Name = *NameOrErr;
291 
292     RecordLinkage Linkage = RecordLinkage::Unknown;
293     SymbolFlags RecordFlags = SymbolFlags::None;
294 
295     if (Opt.Undefineds && (Flags & SymbolRef::SF_Undefined)) {
296       Linkage = RecordLinkage::Undefined;
297       if (Flags & SymbolRef::SF_Weak)
298         RecordFlags |= SymbolFlags::WeakReferenced;
299     } else if (Flags & SymbolRef::SF_Exported) {
300       auto Exp = Exports.find(Name);
301       // This should never be possible when binaries are produced with Apple
302       // linkers. However it is possible to craft dylibs where the export trie
303       // is either malformed or has conflicting symbols compared to n_list.
304       if (Exp != Exports.end())
305         std::tie(RecordFlags, Linkage) = Exp->second;
306       else
307         Linkage = RecordLinkage::Exported;
308     } else if (Flags & SymbolRef::SF_Hidden) {
309       Linkage = RecordLinkage::Internal;
310     } else
311       continue;
312 
313     auto TypeOrErr = Sym.getType();
314     if (!TypeOrErr)
315       return TypeOrErr.takeError();
316     auto Type = *TypeOrErr;
317 
318     GlobalRecord::Kind GV = (Type & SymbolRef::ST_Function)
319                                 ? GlobalRecord::Kind::Function
320                                 : GlobalRecord::Kind::Variable;
321 
322     if (GV == GlobalRecord::Kind::Function)
323       RecordFlags |= SymbolFlags::Text;
324     else
325       RecordFlags |= SymbolFlags::Data;
326 
327     Slice.addRecord(Name, RecordFlags, GV, Linkage);
328   }
329   return Err;
330 }
331 
332 static Error load(MachOObjectFile *Obj, RecordsSlice &Slice,
333                   const ParseOption &Opt, const Architecture Arch) {
334   if (Arch == AK_unknown)
335     return make_error<TextAPIError>(TextAPIErrorCode::UnsupportedTarget);
336 
337   if (Opt.MachOHeader)
338     if (auto Err = readMachOHeader(Obj, Slice))
339       return Err;
340 
341   if (Opt.SymbolTable)
342     if (auto Err = readSymbols(Obj, Slice, Opt))
343       return Err;
344 
345   return Error::success();
346 }
347 
348 Expected<Records> DylibReader::readFile(MemoryBufferRef Buffer,
349                                         const ParseOption &Opt) {
350   Records Results;
351 
352   auto BinOrErr = createBinary(Buffer);
353   if (!BinOrErr)
354     return BinOrErr.takeError();
355 
356   Binary &Bin = *BinOrErr.get();
357   if (auto *Obj = dyn_cast<MachOObjectFile>(&Bin)) {
358     const auto Arch = getArchitectureFromCpuType(Obj->getHeader().cputype,
359                                                  Obj->getHeader().cpusubtype);
360     if (!Opt.Archs.has(Arch))
361       return make_error<TextAPIError>(TextAPIErrorCode::NoSuchArchitecture);
362 
363     auto Triples = constructTriples(Obj, Arch);
364     for (const auto &T : Triples) {
365       if (mapToPlatformType(T) == PLATFORM_UNKNOWN)
366         return make_error<TextAPIError>(TextAPIErrorCode::UnsupportedTarget);
367       Results.emplace_back(std::make_shared<RecordsSlice>(RecordsSlice({T})));
368       if (auto Err = load(Obj, *Results.back(), Opt, Arch))
369         return std::move(Err);
370       Results.back()->getBinaryAttrs().Path = Buffer.getBufferIdentifier();
371     }
372     return Results;
373   }
374 
375   // Only expect MachO universal binaries at this point.
376   assert(isa<MachOUniversalBinary>(&Bin) &&
377          "Expected a MachO universal binary.");
378   auto *UB = cast<MachOUniversalBinary>(&Bin);
379 
380   for (auto OI = UB->begin_objects(), OE = UB->end_objects(); OI != OE; ++OI) {
381     // Skip architecture if not requested.
382     auto Arch =
383         getArchitectureFromCpuType(OI->getCPUType(), OI->getCPUSubType());
384     if (!Opt.Archs.has(Arch))
385       continue;
386 
387     // Skip unknown architectures.
388     if (Arch == AK_unknown)
389       continue;
390 
391     // This can fail if the object is an archive.
392     auto ObjOrErr = OI->getAsObjectFile();
393 
394     // Skip the archive and consume the error.
395     if (!ObjOrErr) {
396       consumeError(ObjOrErr.takeError());
397       continue;
398     }
399 
400     auto &Obj = *ObjOrErr.get();
401     switch (Obj.getHeader().filetype) {
402     default:
403       break;
404     case MachO::MH_BUNDLE:
405     case MachO::MH_DYLIB:
406     case MachO::MH_DYLIB_STUB:
407       for (const auto &T : constructTriples(&Obj, Arch)) {
408         Results.emplace_back(std::make_shared<RecordsSlice>(RecordsSlice({T})));
409         if (auto Err = load(&Obj, *Results.back(), Opt, Arch))
410           return std::move(Err);
411       }
412       break;
413     }
414   }
415 
416   if (Results.empty())
417     return make_error<TextAPIError>(TextAPIErrorCode::EmptyResults);
418   return Results;
419 }
420 
421 Expected<std::unique_ptr<InterfaceFile>>
422 DylibReader::get(MemoryBufferRef Buffer) {
423   ParseOption Options;
424   auto SlicesOrErr = readFile(Buffer, Options);
425   if (!SlicesOrErr)
426     return SlicesOrErr.takeError();
427 
428   return convertToInterfaceFile(*SlicesOrErr);
429 }
430