1 //===- Archive.h - ar archive file format -----------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file declares the ar archive file format class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_OBJECT_ARCHIVE_H
14 #define LLVM_OBJECT_ARCHIVE_H
15 
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/ADT/fallible_iterator.h"
18 #include "llvm/ADT/iterator_range.h"
19 #include "llvm/Object/Binary.h"
20 #include "llvm/Support/Chrono.h"
21 #include "llvm/Support/Error.h"
22 #include "llvm/Support/FileSystem.h"
23 #include "llvm/Support/MemoryBuffer.h"
24 #include <cassert>
25 #include <cstdint>
26 #include <memory>
27 #include <string>
28 #include <vector>
29 
30 namespace llvm {
31 namespace object {
32 
33 const char ArchiveMagic[] = "!<arch>\n";
34 const char ThinArchiveMagic[] = "!<thin>\n";
35 const char BigArchiveMagic[] = "<bigaf>\n";
36 
37 class Archive;
38 
39 class AbstractArchiveMemberHeader {
40 protected:
AbstractArchiveMemberHeader(const Archive * Parent)41   AbstractArchiveMemberHeader(const Archive *Parent) : Parent(Parent){};
42 
43 public:
44   friend class Archive;
45   virtual std::unique_ptr<AbstractArchiveMemberHeader> clone() const = 0;
46   virtual ~AbstractArchiveMemberHeader() = default;
47 
48   /// Get the name without looking up long names.
49   virtual Expected<StringRef> getRawName() const = 0;
50   virtual StringRef getRawAccessMode() const = 0;
51   virtual StringRef getRawLastModified() const = 0;
52   virtual StringRef getRawUID() const = 0;
53   virtual StringRef getRawGID() const = 0;
54 
55   /// Get the name looking up long names.
56   virtual Expected<StringRef> getName(uint64_t Size) const = 0;
57   virtual Expected<uint64_t> getSize() const = 0;
58   virtual uint64_t getOffset() const = 0;
59 
60   /// Get next file member location.
61   virtual Expected<const char *> getNextChildLoc() const = 0;
62   virtual Expected<bool> isThin() const = 0;
63 
64   Expected<sys::fs::perms> getAccessMode() const;
65   Expected<sys::TimePoint<std::chrono::seconds>> getLastModified() const;
66   Expected<unsigned> getUID() const;
67   Expected<unsigned> getGID() const;
68 
69   /// Returns the size in bytes of the format-defined member header of the
70   /// concrete archive type.
71   virtual uint64_t getSizeOf() const = 0;
72 
73   const Archive *Parent;
74 };
75 
76 template <typename T>
77 class CommonArchiveMemberHeader : public AbstractArchiveMemberHeader {
78 public:
CommonArchiveMemberHeader(const Archive * Parent,const T * RawHeaderPtr)79   CommonArchiveMemberHeader(const Archive *Parent, const T *RawHeaderPtr)
80       : AbstractArchiveMemberHeader(Parent), ArMemHdr(RawHeaderPtr){};
81   StringRef getRawAccessMode() const override;
82   StringRef getRawLastModified() const override;
83   StringRef getRawUID() const override;
84   StringRef getRawGID() const override;
85 
86   uint64_t getOffset() const override;
getSizeOf()87   uint64_t getSizeOf() const override { return sizeof(T); }
88 
89   T const *ArMemHdr;
90 };
91 
92 struct UnixArMemHdrType {
93   char Name[16];
94   char LastModified[12];
95   char UID[6];
96   char GID[6];
97   char AccessMode[8];
98   char Size[10]; ///< Size of data, not including header or padding.
99   char Terminator[2];
100 };
101 
102 class ArchiveMemberHeader : public CommonArchiveMemberHeader<UnixArMemHdrType> {
103 public:
104   ArchiveMemberHeader(const Archive *Parent, const char *RawHeaderPtr,
105                       uint64_t Size, Error *Err);
106 
clone()107   std::unique_ptr<AbstractArchiveMemberHeader> clone() const override {
108     return std::make_unique<ArchiveMemberHeader>(*this);
109   }
110 
111   Expected<StringRef> getRawName() const override;
112 
113   Expected<StringRef> getName(uint64_t Size) const override;
114   Expected<uint64_t> getSize() const override;
115   Expected<const char *> getNextChildLoc() const override;
116   Expected<bool> isThin() const override;
117 };
118 
119 // File Member Header
120 struct BigArMemHdrType {
121   char Size[20];       // File member size in decimal
122   char NextOffset[20]; // Next member offset in decimal
123   char PrevOffset[20]; // Previous member offset in decimal
124   char LastModified[12];
125   char UID[12];
126   char GID[12];
127   char AccessMode[12];
128   char NameLen[4]; // File member name length in decimal
129   union {
130     char Name[2]; // Start of member name
131     char Terminator[2];
132   };
133 };
134 
135 // Define file member header of AIX big archive.
136 class BigArchiveMemberHeader
137     : public CommonArchiveMemberHeader<BigArMemHdrType> {
138 
139 public:
140   BigArchiveMemberHeader(Archive const *Parent, const char *RawHeaderPtr,
141                          uint64_t Size, Error *Err);
clone()142   std::unique_ptr<AbstractArchiveMemberHeader> clone() const override {
143     return std::make_unique<BigArchiveMemberHeader>(*this);
144   }
145 
146   Expected<StringRef> getRawName() const override;
147   Expected<uint64_t> getRawNameSize() const;
148 
149   Expected<StringRef> getName(uint64_t Size) const override;
150   Expected<uint64_t> getSize() const override;
151   Expected<const char *> getNextChildLoc() const override;
152   Expected<uint64_t> getNextOffset() const;
isThin()153   Expected<bool> isThin() const override { return false; }
154 };
155 
156 class Archive : public Binary {
157   virtual void anchor();
158 
159 public:
160   class Child {
161     friend Archive;
162     friend AbstractArchiveMemberHeader;
163 
164     const Archive *Parent;
165     std::unique_ptr<AbstractArchiveMemberHeader> Header;
166     /// Includes header but not padding byte.
167     StringRef Data;
168     /// Offset from Data to the start of the file.
169     uint16_t StartOfFile;
170 
171     Expected<bool> isThinMember() const;
172 
173   public:
174     Child(const Archive *Parent, const char *Start, Error *Err);
175     Child(const Archive *Parent, StringRef Data, uint16_t StartOfFile);
176 
Child(const Child & C)177     Child(const Child &C)
178         : Parent(C.Parent), Data(C.Data), StartOfFile(C.StartOfFile) {
179       if (C.Header)
180         Header = C.Header->clone();
181     }
182 
Child(Child && C)183     Child(Child &&C) {
184       Parent = std::move(C.Parent);
185       Header = std::move(C.Header);
186       Data = C.Data;
187       StartOfFile = C.StartOfFile;
188     }
189 
190     Child &operator=(Child &&C) noexcept {
191       if (&C == this)
192         return *this;
193 
194       Parent = std::move(C.Parent);
195       Header = std::move(C.Header);
196       Data = C.Data;
197       StartOfFile = C.StartOfFile;
198 
199       return *this;
200     }
201 
202     Child &operator=(const Child &C) {
203       if (&C == this)
204         return *this;
205 
206       Parent = C.Parent;
207       if (C.Header)
208         Header = C.Header->clone();
209       Data = C.Data;
210       StartOfFile = C.StartOfFile;
211 
212       return *this;
213     }
214 
215     bool operator==(const Child &other) const {
216       assert(!Parent || !other.Parent || Parent == other.Parent);
217       return Data.begin() == other.Data.begin();
218     }
219 
getParent()220     const Archive *getParent() const { return Parent; }
221     Expected<Child> getNext() const;
222 
223     Expected<StringRef> getName() const;
224     Expected<std::string> getFullName() const;
getRawName()225     Expected<StringRef> getRawName() const { return Header->getRawName(); }
226 
getLastModified()227     Expected<sys::TimePoint<std::chrono::seconds>> getLastModified() const {
228       return Header->getLastModified();
229     }
230 
getRawLastModified()231     StringRef getRawLastModified() const {
232       return Header->getRawLastModified();
233     }
234 
getUID()235     Expected<unsigned> getUID() const { return Header->getUID(); }
getGID()236     Expected<unsigned> getGID() const { return Header->getGID(); }
237 
getAccessMode()238     Expected<sys::fs::perms> getAccessMode() const {
239       return Header->getAccessMode();
240     }
241 
242     /// \return the size of the archive member without the header or padding.
243     Expected<uint64_t> getSize() const;
244     /// \return the size in the archive header for this member.
245     Expected<uint64_t> getRawSize() const;
246 
247     Expected<StringRef> getBuffer() const;
248     uint64_t getChildOffset() const;
getDataOffset()249     uint64_t getDataOffset() const { return getChildOffset() + StartOfFile; }
250 
251     Expected<MemoryBufferRef> getMemoryBufferRef() const;
252 
253     Expected<std::unique_ptr<Binary>>
254     getAsBinary(LLVMContext *Context = nullptr) const;
255   };
256 
257   class ChildFallibleIterator {
258     Child C;
259 
260   public:
ChildFallibleIterator()261     ChildFallibleIterator() : C(Child(nullptr, nullptr, nullptr)) {}
ChildFallibleIterator(const Child & C)262     ChildFallibleIterator(const Child &C) : C(C) {}
263 
264     const Child *operator->() const { return &C; }
265     const Child &operator*() const { return C; }
266 
267     bool operator==(const ChildFallibleIterator &other) const {
268       // Ignore errors here: If an error occurred during increment then getNext
269       // will have been set to child_end(), and the following comparison should
270       // do the right thing.
271       return C == other.C;
272     }
273 
274     bool operator!=(const ChildFallibleIterator &other) const {
275       return !(*this == other);
276     }
277 
inc()278     Error inc() {
279       auto NextChild = C.getNext();
280       if (!NextChild)
281         return NextChild.takeError();
282       C = std::move(*NextChild);
283       return Error::success();
284     }
285   };
286 
287   using child_iterator = fallible_iterator<ChildFallibleIterator>;
288 
289   class Symbol {
290     const Archive *Parent;
291     uint32_t SymbolIndex;
292     uint32_t StringIndex; // Extra index to the string.
293 
294   public:
Symbol(const Archive * p,uint32_t symi,uint32_t stri)295     Symbol(const Archive *p, uint32_t symi, uint32_t stri)
296         : Parent(p), SymbolIndex(symi), StringIndex(stri) {}
297 
298     bool operator==(const Symbol &other) const {
299       return (Parent == other.Parent) && (SymbolIndex == other.SymbolIndex);
300     }
301 
302     StringRef getName() const;
303     Expected<Child> getMember() const;
304     Symbol getNext() const;
305     bool isECSymbol() const;
306   };
307 
308   class symbol_iterator {
309     Symbol symbol;
310 
311   public:
symbol_iterator(const Symbol & s)312     symbol_iterator(const Symbol &s) : symbol(s) {}
313 
314     const Symbol *operator->() const { return &symbol; }
315     const Symbol &operator*() const { return symbol; }
316 
317     bool operator==(const symbol_iterator &other) const {
318       return symbol == other.symbol;
319     }
320 
321     bool operator!=(const symbol_iterator &other) const {
322       return !(*this == other);
323     }
324 
325     symbol_iterator &operator++() { // Preincrement
326       symbol = symbol.getNext();
327       return *this;
328     }
329   };
330 
331   Archive(MemoryBufferRef Source, Error &Err);
332   static Expected<std::unique_ptr<Archive>> create(MemoryBufferRef Source);
333 
334   /// Size field is 10 decimal digits long
335   static const uint64_t MaxMemberSize = 9999999999;
336 
337   enum Kind { K_GNU, K_GNU64, K_BSD, K_DARWIN, K_DARWIN64, K_COFF, K_AIXBIG };
338 
kind()339   Kind kind() const { return (Kind)Format; }
isThin()340   bool isThin() const { return IsThin; }
341   static object::Archive::Kind getDefaultKindForHost();
342 
343   child_iterator child_begin(Error &Err, bool SkipInternal = true) const;
344   child_iterator child_end() const;
345   iterator_range<child_iterator> children(Error &Err,
346                                           bool SkipInternal = true) const {
347     return make_range(child_begin(Err, SkipInternal), child_end());
348   }
349 
350   symbol_iterator symbol_begin() const;
351   symbol_iterator symbol_end() const;
symbols()352   iterator_range<symbol_iterator> symbols() const {
353     return make_range(symbol_begin(), symbol_end());
354   }
355 
356   Expected<iterator_range<symbol_iterator>> ec_symbols() const;
357 
classof(Binary const * v)358   static bool classof(Binary const *v) { return v->isArchive(); }
359 
360   // check if a symbol is in the archive
361   Expected<std::optional<Child>> findSym(StringRef name) const;
362 
363   virtual bool isEmpty() const;
364   bool hasSymbolTable() const;
getSymbolTable()365   StringRef getSymbolTable() const { return SymbolTable; }
getStringTable()366   StringRef getStringTable() const { return StringTable; }
367   uint32_t getNumberOfSymbols() const;
368   uint32_t getNumberOfECSymbols() const;
getFirstChildOffset()369   virtual uint64_t getFirstChildOffset() const { return getArchiveMagicLen(); }
370 
takeThinBuffers()371   std::vector<std::unique_ptr<MemoryBuffer>> takeThinBuffers() {
372     return std::move(ThinBuffers);
373   }
374 
375   std::unique_ptr<AbstractArchiveMemberHeader>
376   createArchiveMemberHeader(const char *RawHeaderPtr, uint64_t Size,
377                             Error *Err) const;
378 
379 protected:
380   uint64_t getArchiveMagicLen() const;
381   void setFirstRegular(const Child &C);
382 
383   StringRef SymbolTable;
384   StringRef ECSymbolTable;
385   StringRef StringTable;
386 
387 private:
388   StringRef FirstRegularData;
389   uint16_t FirstRegularStartOfFile = -1;
390 
391   unsigned Format : 3;
392   unsigned IsThin : 1;
393   mutable std::vector<std::unique_ptr<MemoryBuffer>> ThinBuffers;
394 };
395 
396 class BigArchive : public Archive {
397 public:
398   /// Fixed-Length Header.
399   struct FixLenHdr {
400     char Magic[sizeof(BigArchiveMagic) - 1]; ///< Big archive magic string.
401     char MemOffset[20];                      ///< Offset to member table.
402     char GlobSymOffset[20];                  ///< Offset to global symbol table.
403     char
404         GlobSym64Offset[20]; ///< Offset global symbol table for 64-bit objects.
405     char FirstChildOffset[20]; ///< Offset to first archive member.
406     char LastChildOffset[20];  ///< Offset to last archive member.
407     char FreeOffset[20];       ///< Offset to first mem on free list.
408   };
409 
410   const FixLenHdr *ArFixLenHdr;
411   uint64_t FirstChildOffset = 0;
412   uint64_t LastChildOffset = 0;
413   std::string MergedGlobalSymtabBuf;
414   bool Has32BitGlobalSymtab = false;
415   bool Has64BitGlobalSymtab = false;
416 
417 public:
418   BigArchive(MemoryBufferRef Source, Error &Err);
getFirstChildOffset()419   uint64_t getFirstChildOffset() const override { return FirstChildOffset; }
getLastChildOffset()420   uint64_t getLastChildOffset() const { return LastChildOffset; }
isEmpty()421   bool isEmpty() const override { return getFirstChildOffset() == 0; }
422 
has32BitGlobalSymtab()423   bool has32BitGlobalSymtab() { return Has32BitGlobalSymtab; }
has64BitGlobalSymtab()424   bool has64BitGlobalSymtab() { return Has64BitGlobalSymtab; }
425 };
426 
427 } // end namespace object
428 } // end namespace llvm
429 
430 #endif // LLVM_OBJECT_ARCHIVE_H
431