1 //===-- ObjectContainerBSDArchive.cpp -------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ObjectContainerBSDArchive.h"
10 
11 #if defined(_WIN32) || defined(__ANDROID__)
12 // Defines from ar, missing on Windows
13 #define ARMAG "!<arch>\n"
14 #define SARMAG 8
15 #define ARFMAG "`\n"
16 
17 typedef struct ar_hdr {
18   char ar_name[16];
19   char ar_date[12];
20   char ar_uid[6], ar_gid[6];
21   char ar_mode[8];
22   char ar_size[10];
23   char ar_fmag[2];
24 } ar_hdr;
25 #else
26 #include <ar.h>
27 #endif
28 
29 #include "lldb/Core/Module.h"
30 #include "lldb/Core/ModuleSpec.h"
31 #include "lldb/Core/PluginManager.h"
32 #include "lldb/Host/FileSystem.h"
33 #include "lldb/Symbol/ObjectFile.h"
34 #include "lldb/Utility/ArchSpec.h"
35 #include "lldb/Utility/Stream.h"
36 #include "lldb/Utility/Timer.h"
37 
38 #include "llvm/Support/MemoryBuffer.h"
39 
40 using namespace lldb;
41 using namespace lldb_private;
42 
43 LLDB_PLUGIN_DEFINE(ObjectContainerBSDArchive)
44 
45 ObjectContainerBSDArchive::Object::Object() : ar_name() {}
46 
47 void ObjectContainerBSDArchive::Object::Clear() {
48   ar_name.Clear();
49   modification_time = 0;
50   uid = 0;
51   gid = 0;
52   mode = 0;
53   size = 0;
54   file_offset = 0;
55   file_size = 0;
56 }
57 
58 lldb::offset_t
59 ObjectContainerBSDArchive::Object::Extract(const DataExtractor &data,
60                                            lldb::offset_t offset) {
61   size_t ar_name_len = 0;
62   std::string str;
63   char *err;
64 
65   // File header
66   //
67   // The common format is as follows.
68   //
69   //  Offset  Length	Name            Format
70   //  0       16      File name       ASCII right padded with spaces (no spaces
71   //  allowed in file name)
72   //  16      12      File mod        Decimal as cstring right padded with
73   //  spaces
74   //  28      6       Owner ID        Decimal as cstring right padded with
75   //  spaces
76   //  34      6       Group ID        Decimal as cstring right padded with
77   //  spaces
78   //  40      8       File mode       Octal   as cstring right padded with
79   //  spaces
80   //  48      10      File byte size  Decimal as cstring right padded with
81   //  spaces
82   //  58      2       File magic      0x60 0x0A
83 
84   // Make sure there is enough data for the file header and bail if not
85   if (!data.ValidOffsetForDataOfSize(offset, 60))
86     return LLDB_INVALID_OFFSET;
87 
88   str.assign((const char *)data.GetData(&offset, 16), 16);
89   if (llvm::StringRef(str).startswith("#1/")) {
90     // If the name is longer than 16 bytes, or contains an embedded space then
91     // it will use this format where the length of the name is here and the
92     // name characters are after this header.
93     ar_name_len = strtoul(str.c_str() + 3, &err, 10);
94   } else {
95     // Strip off any trailing spaces.
96     const size_t last_pos = str.find_last_not_of(' ');
97     if (last_pos != std::string::npos) {
98       if (last_pos + 1 < 16)
99         str.erase(last_pos + 1);
100     }
101     ar_name.SetCString(str.c_str());
102   }
103 
104   str.assign((const char *)data.GetData(&offset, 12), 12);
105   modification_time = strtoul(str.c_str(), &err, 10);
106 
107   str.assign((const char *)data.GetData(&offset, 6), 6);
108   uid = strtoul(str.c_str(), &err, 10);
109 
110   str.assign((const char *)data.GetData(&offset, 6), 6);
111   gid = strtoul(str.c_str(), &err, 10);
112 
113   str.assign((const char *)data.GetData(&offset, 8), 8);
114   mode = strtoul(str.c_str(), &err, 8);
115 
116   str.assign((const char *)data.GetData(&offset, 10), 10);
117   size = strtoul(str.c_str(), &err, 10);
118 
119   str.assign((const char *)data.GetData(&offset, 2), 2);
120   if (str == ARFMAG) {
121     if (ar_name_len > 0) {
122       const void *ar_name_ptr = data.GetData(&offset, ar_name_len);
123       // Make sure there was enough data for the string value and bail if not
124       if (ar_name_ptr == nullptr)
125         return LLDB_INVALID_OFFSET;
126       str.assign((const char *)ar_name_ptr, ar_name_len);
127       ar_name.SetCString(str.c_str());
128     }
129     file_offset = offset;
130     file_size = size - ar_name_len;
131     return offset;
132   }
133   return LLDB_INVALID_OFFSET;
134 }
135 
136 ObjectContainerBSDArchive::Archive::Archive(const lldb_private::ArchSpec &arch,
137                                             const llvm::sys::TimePoint<> &time,
138                                             lldb::offset_t file_offset,
139                                             lldb_private::DataExtractor &data)
140     : m_arch(arch), m_modification_time(time), m_file_offset(file_offset),
141       m_objects(), m_data(data) {}
142 
143 ObjectContainerBSDArchive::Archive::~Archive() = default;
144 
145 size_t ObjectContainerBSDArchive::Archive::ParseObjects() {
146   DataExtractor &data = m_data;
147   std::string str;
148   lldb::offset_t offset = 0;
149   str.assign((const char *)data.GetData(&offset, SARMAG), SARMAG);
150   if (str == ARMAG) {
151     Object obj;
152     do {
153       offset = obj.Extract(data, offset);
154       if (offset == LLDB_INVALID_OFFSET)
155         break;
156       size_t obj_idx = m_objects.size();
157       m_objects.push_back(obj);
158       // Insert all of the C strings out of order for now...
159       m_object_name_to_index_map.Append(obj.ar_name, obj_idx);
160       offset += obj.file_size;
161       obj.Clear();
162     } while (data.ValidOffset(offset));
163 
164     // Now sort all of the object name pointers
165     m_object_name_to_index_map.Sort();
166   }
167   return m_objects.size();
168 }
169 
170 ObjectContainerBSDArchive::Object *
171 ObjectContainerBSDArchive::Archive::FindObject(
172     ConstString object_name, const llvm::sys::TimePoint<> &object_mod_time) {
173   const ObjectNameToIndexMap::Entry *match =
174       m_object_name_to_index_map.FindFirstValueForName(object_name);
175   if (!match)
176     return nullptr;
177   if (object_mod_time == llvm::sys::TimePoint<>())
178     return &m_objects[match->value];
179 
180   const uint64_t object_modification_date = llvm::sys::toTimeT(object_mod_time);
181   if (m_objects[match->value].modification_time == object_modification_date)
182     return &m_objects[match->value];
183 
184   const ObjectNameToIndexMap::Entry *next_match =
185       m_object_name_to_index_map.FindNextValueForName(match);
186   while (next_match) {
187     if (m_objects[next_match->value].modification_time ==
188         object_modification_date)
189       return &m_objects[next_match->value];
190     next_match = m_object_name_to_index_map.FindNextValueForName(next_match);
191   }
192 
193   return nullptr;
194 }
195 
196 ObjectContainerBSDArchive::Archive::shared_ptr
197 ObjectContainerBSDArchive::Archive::FindCachedArchive(
198     const FileSpec &file, const ArchSpec &arch,
199     const llvm::sys::TimePoint<> &time, lldb::offset_t file_offset) {
200   std::lock_guard<std::recursive_mutex> guard(Archive::GetArchiveCacheMutex());
201   shared_ptr archive_sp;
202   Archive::Map &archive_map = Archive::GetArchiveCache();
203   Archive::Map::iterator pos = archive_map.find(file);
204   // Don't cache a value for "archive_map.end()" below since we might delete an
205   // archive entry...
206   while (pos != archive_map.end() && pos->first == file) {
207     bool match = true;
208     if (arch.IsValid() &&
209         !pos->second->GetArchitecture().IsCompatibleMatch(arch))
210       match = false;
211     else if (file_offset != LLDB_INVALID_OFFSET &&
212              pos->second->GetFileOffset() != file_offset)
213       match = false;
214     if (match) {
215       if (pos->second->GetModificationTime() == time) {
216         return pos->second;
217       } else {
218         // We have a file at the same path with the same architecture whose
219         // modification time doesn't match. It doesn't make sense for us to
220         // continue to use this BSD archive since we cache only the object info
221         // which consists of file time info and also the file offset and file
222         // size of any contained objects. Since this information is now out of
223         // date, we won't get the correct information if we go and extract the
224         // file data, so we should remove the old and outdated entry.
225         archive_map.erase(pos);
226         pos = archive_map.find(file);
227         continue; // Continue to next iteration so we don't increment pos
228                   // below...
229       }
230     }
231     ++pos;
232   }
233   return archive_sp;
234 }
235 
236 ObjectContainerBSDArchive::Archive::shared_ptr
237 ObjectContainerBSDArchive::Archive::ParseAndCacheArchiveForFile(
238     const FileSpec &file, const ArchSpec &arch,
239     const llvm::sys::TimePoint<> &time, lldb::offset_t file_offset,
240     DataExtractor &data) {
241   shared_ptr archive_sp(new Archive(arch, time, file_offset, data));
242   if (archive_sp) {
243     const size_t num_objects = archive_sp->ParseObjects();
244     if (num_objects > 0) {
245       std::lock_guard<std::recursive_mutex> guard(
246           Archive::GetArchiveCacheMutex());
247       Archive::GetArchiveCache().insert(std::make_pair(file, archive_sp));
248     } else {
249       archive_sp.reset();
250     }
251   }
252   return archive_sp;
253 }
254 
255 ObjectContainerBSDArchive::Archive::Map &
256 ObjectContainerBSDArchive::Archive::GetArchiveCache() {
257   static Archive::Map g_archive_map;
258   return g_archive_map;
259 }
260 
261 std::recursive_mutex &
262 ObjectContainerBSDArchive::Archive::GetArchiveCacheMutex() {
263   static std::recursive_mutex g_archive_map_mutex;
264   return g_archive_map_mutex;
265 }
266 
267 void ObjectContainerBSDArchive::Initialize() {
268   PluginManager::RegisterPlugin(GetPluginNameStatic(),
269                                 GetPluginDescriptionStatic(), CreateInstance,
270                                 GetModuleSpecifications);
271 }
272 
273 void ObjectContainerBSDArchive::Terminate() {
274   PluginManager::UnregisterPlugin(CreateInstance);
275 }
276 
277 lldb_private::ConstString ObjectContainerBSDArchive::GetPluginNameStatic() {
278   static ConstString g_name("bsd-archive");
279   return g_name;
280 }
281 
282 const char *ObjectContainerBSDArchive::GetPluginDescriptionStatic() {
283   return "BSD Archive object container reader.";
284 }
285 
286 ObjectContainer *ObjectContainerBSDArchive::CreateInstance(
287     const lldb::ModuleSP &module_sp, DataBufferSP &data_sp,
288     lldb::offset_t data_offset, const FileSpec *file,
289     lldb::offset_t file_offset, lldb::offset_t length) {
290   ConstString object_name(module_sp->GetObjectName());
291   if (!object_name)
292     return nullptr;
293 
294   if (data_sp) {
295     // We have data, which means this is the first 512 bytes of the file Check
296     // to see if the magic bytes match and if they do, read the entire table of
297     // contents for the archive and cache it
298     DataExtractor data;
299     data.SetData(data_sp, data_offset, length);
300     if (file && data_sp && ObjectContainerBSDArchive::MagicBytesMatch(data)) {
301       LLDB_SCOPED_TIMERF(
302           "ObjectContainerBSDArchive::CreateInstance (module = %s, file = "
303           "%p, file_offset = 0x%8.8" PRIx64 ", file_size = 0x%8.8" PRIx64 ")",
304           module_sp->GetFileSpec().GetPath().c_str(),
305           static_cast<const void *>(file), static_cast<uint64_t>(file_offset),
306           static_cast<uint64_t>(length));
307 
308       // Map the entire .a file to be sure that we don't lose any data if the
309       // file gets updated by a new build while this .a file is being used for
310       // debugging
311       DataBufferSP archive_data_sp =
312           FileSystem::Instance().CreateDataBuffer(*file, length, file_offset);
313       if (!archive_data_sp)
314         return nullptr;
315 
316       lldb::offset_t archive_data_offset = 0;
317 
318       Archive::shared_ptr archive_sp(Archive::FindCachedArchive(
319           *file, module_sp->GetArchitecture(), module_sp->GetModificationTime(),
320           file_offset));
321       std::unique_ptr<ObjectContainerBSDArchive> container_up(
322           new ObjectContainerBSDArchive(module_sp, archive_data_sp,
323                                         archive_data_offset, file, file_offset,
324                                         length));
325 
326       if (container_up) {
327         if (archive_sp) {
328           // We already have this archive in our cache, use it
329           container_up->SetArchive(archive_sp);
330           return container_up.release();
331         } else if (container_up->ParseHeader())
332           return container_up.release();
333       }
334     }
335   } else {
336     // No data, just check for a cached archive
337     Archive::shared_ptr archive_sp(Archive::FindCachedArchive(
338         *file, module_sp->GetArchitecture(), module_sp->GetModificationTime(),
339         file_offset));
340     if (archive_sp) {
341       std::unique_ptr<ObjectContainerBSDArchive> container_up(
342           new ObjectContainerBSDArchive(module_sp, data_sp, data_offset, file,
343                                         file_offset, length));
344 
345       if (container_up) {
346         // We already have this archive in our cache, use it
347         container_up->SetArchive(archive_sp);
348         return container_up.release();
349       }
350     }
351   }
352   return nullptr;
353 }
354 
355 bool ObjectContainerBSDArchive::MagicBytesMatch(const DataExtractor &data) {
356   uint32_t offset = 0;
357   const char *armag = (const char *)data.PeekData(offset, sizeof(ar_hdr));
358   if (armag && ::strncmp(armag, ARMAG, SARMAG) == 0) {
359     armag += offsetof(struct ar_hdr, ar_fmag) + SARMAG;
360     if (strncmp(armag, ARFMAG, 2) == 0)
361       return true;
362   }
363   return false;
364 }
365 
366 ObjectContainerBSDArchive::ObjectContainerBSDArchive(
367     const lldb::ModuleSP &module_sp, DataBufferSP &data_sp,
368     lldb::offset_t data_offset, const lldb_private::FileSpec *file,
369     lldb::offset_t file_offset, lldb::offset_t size)
370     : ObjectContainer(module_sp, file, file_offset, size, data_sp, data_offset),
371       m_archive_sp() {}
372 void ObjectContainerBSDArchive::SetArchive(Archive::shared_ptr &archive_sp) {
373   m_archive_sp = archive_sp;
374 }
375 
376 ObjectContainerBSDArchive::~ObjectContainerBSDArchive() = default;
377 
378 bool ObjectContainerBSDArchive::ParseHeader() {
379   if (m_archive_sp.get() == nullptr) {
380     if (m_data.GetByteSize() > 0) {
381       ModuleSP module_sp(GetModule());
382       if (module_sp) {
383         m_archive_sp = Archive::ParseAndCacheArchiveForFile(
384             m_file, module_sp->GetArchitecture(),
385             module_sp->GetModificationTime(), m_offset, m_data);
386       }
387       // Clear the m_data that contains the entire archive data and let our
388       // m_archive_sp hold onto the data.
389       m_data.Clear();
390     }
391   }
392   return m_archive_sp.get() != nullptr;
393 }
394 
395 void ObjectContainerBSDArchive::Dump(Stream *s) const {
396   s->Printf("%p: ", static_cast<const void *>(this));
397   s->Indent();
398   const size_t num_archs = GetNumArchitectures();
399   const size_t num_objects = GetNumObjects();
400   s->Printf("ObjectContainerBSDArchive, num_archs = %" PRIu64
401             ", num_objects = %" PRIu64 "",
402             (uint64_t)num_archs, (uint64_t)num_objects);
403   uint32_t i;
404   ArchSpec arch;
405   s->IndentMore();
406   for (i = 0; i < num_archs; i++) {
407     s->Indent();
408     GetArchitectureAtIndex(i, arch);
409     s->Printf("arch[%u] = %s\n", i, arch.GetArchitectureName());
410   }
411   for (i = 0; i < num_objects; i++) {
412     s->Indent();
413     s->Printf("object[%u] = %s\n", i, GetObjectNameAtIndex(i));
414   }
415   s->IndentLess();
416   s->EOL();
417 }
418 
419 ObjectFileSP ObjectContainerBSDArchive::GetObjectFile(const FileSpec *file) {
420   ModuleSP module_sp(GetModule());
421   if (module_sp) {
422     if (module_sp->GetObjectName() && m_archive_sp) {
423       Object *object = m_archive_sp->FindObject(
424           module_sp->GetObjectName(), module_sp->GetObjectModificationTime());
425       if (object) {
426         lldb::offset_t data_offset = object->file_offset;
427         return ObjectFile::FindPlugin(
428             module_sp, file, m_offset + object->file_offset, object->file_size,
429             m_archive_sp->GetData().GetSharedDataBuffer(), data_offset);
430       }
431     }
432   }
433   return ObjectFileSP();
434 }
435 
436 // PluginInterface protocol
437 lldb_private::ConstString ObjectContainerBSDArchive::GetPluginName() {
438   return GetPluginNameStatic();
439 }
440 
441 uint32_t ObjectContainerBSDArchive::GetPluginVersion() { return 1; }
442 
443 size_t ObjectContainerBSDArchive::GetModuleSpecifications(
444     const lldb_private::FileSpec &file, lldb::DataBufferSP &data_sp,
445     lldb::offset_t data_offset, lldb::offset_t file_offset,
446     lldb::offset_t file_size, lldb_private::ModuleSpecList &specs) {
447 
448   // We have data, which means this is the first 512 bytes of the file Check to
449   // see if the magic bytes match and if they do, read the entire table of
450   // contents for the archive and cache it
451   DataExtractor data;
452   data.SetData(data_sp, data_offset, data_sp->GetByteSize());
453   if (!file || !data_sp || !ObjectContainerBSDArchive::MagicBytesMatch(data))
454     return 0;
455 
456   const size_t initial_count = specs.GetSize();
457   llvm::sys::TimePoint<> file_mod_time = FileSystem::Instance().GetModificationTime(file);
458   Archive::shared_ptr archive_sp(
459       Archive::FindCachedArchive(file, ArchSpec(), file_mod_time, file_offset));
460   bool set_archive_arch = false;
461   if (!archive_sp) {
462     set_archive_arch = true;
463     data_sp =
464         FileSystem::Instance().CreateDataBuffer(file, file_size, file_offset);
465     if (data_sp) {
466       data.SetData(data_sp, 0, data_sp->GetByteSize());
467       archive_sp = Archive::ParseAndCacheArchiveForFile(
468           file, ArchSpec(), file_mod_time, file_offset, data);
469     }
470   }
471 
472   if (archive_sp) {
473     const size_t num_objects = archive_sp->GetNumObjects();
474     for (size_t idx = 0; idx < num_objects; ++idx) {
475       const Object *object = archive_sp->GetObjectAtIndex(idx);
476       if (object) {
477         const lldb::offset_t object_file_offset =
478             file_offset + object->file_offset;
479         if (object->file_offset < file_size && file_size > object_file_offset) {
480           if (ObjectFile::GetModuleSpecifications(
481                   file, object_file_offset, file_size - object_file_offset,
482                   specs)) {
483             ModuleSpec &spec =
484                 specs.GetModuleSpecRefAtIndex(specs.GetSize() - 1);
485             llvm::sys::TimePoint<> object_mod_time(
486                 std::chrono::seconds(object->modification_time));
487             spec.GetObjectName() = object->ar_name;
488             spec.SetObjectOffset(object_file_offset);
489             spec.SetObjectSize(file_size - object_file_offset);
490             spec.GetObjectModificationTime() = object_mod_time;
491           }
492         }
493       }
494     }
495   }
496   const size_t end_count = specs.GetSize();
497   size_t num_specs_added = end_count - initial_count;
498   if (set_archive_arch && num_specs_added > 0) {
499     // The archive was created but we didn't have an architecture so we need to
500     // set it
501     for (size_t i = initial_count; i < end_count; ++i) {
502       ModuleSpec module_spec;
503       if (specs.GetModuleSpecAtIndex(i, module_spec)) {
504         if (module_spec.GetArchitecture().IsValid()) {
505           archive_sp->SetArchitecture(module_spec.GetArchitecture());
506           break;
507         }
508       }
509     }
510   }
511   return num_specs_added;
512 }
513