1 //===-- Symbol.h ------------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLDB_SYMBOL_SYMBOL_H
10 #define LLDB_SYMBOL_SYMBOL_H
11 
12 #include "lldb/Core/AddressRange.h"
13 #include "lldb/Core/Mangled.h"
14 #include "lldb/Core/Section.h"
15 #include "lldb/Symbol/SymbolContextScope.h"
16 #include "lldb/Utility/Stream.h"
17 #include "lldb/Utility/UserID.h"
18 #include "lldb/lldb-private.h"
19 #include "llvm/Support/JSON.h"
20 
21 namespace lldb_private {
22 
23 struct JSONSymbol {
24   std::optional<uint64_t> address;
25   std::optional<uint64_t> value;
26   std::optional<uint64_t> size;
27   std::optional<uint64_t> id;
28   std::optional<lldb::SymbolType> type;
29   std::string name;
30 };
31 
32 class Symbol : public SymbolContextScope {
33 public:
34   // ObjectFile readers can classify their symbol table entries and searches
35   // can be made on specific types where the symbol values will have
36   // drastically different meanings and sorting requirements.
37   Symbol();
38 
39   Symbol(uint32_t symID, llvm::StringRef name, lldb::SymbolType type,
40          bool external, bool is_debug, bool is_trampoline, bool is_artificial,
41          const lldb::SectionSP &section_sp, lldb::addr_t value,
42          lldb::addr_t size, bool size_is_valid,
43          bool contains_linker_annotations, uint32_t flags);
44 
45   Symbol(uint32_t symID, const Mangled &mangled, lldb::SymbolType type,
46          bool external, bool is_debug, bool is_trampoline, bool is_artificial,
47          const AddressRange &range, bool size_is_valid,
48          bool contains_linker_annotations, uint32_t flags);
49 
50   Symbol(const Symbol &rhs);
51 
52   const Symbol &operator=(const Symbol &rhs);
53 
54   static llvm::Expected<Symbol> FromJSON(const JSONSymbol &symbol,
55                                          SectionList *section_list);
56 
57   void Clear();
58 
59   bool Compare(ConstString name, lldb::SymbolType type) const;
60 
61   void Dump(Stream *s, Target *target, uint32_t index,
62             Mangled::NamePreference name_preference =
63                 Mangled::ePreferDemangled) const;
64 
65   bool ValueIsAddress() const;
66 
67   // The GetAddressRef() accessor functions should only be called if you
68   // previously call ValueIsAddress() otherwise you might get an reference to
69   // an Address object that contains an constant integer value in
70   // m_addr_range.m_base_addr.m_offset which could be incorrectly used to
71   // represent an absolute address since it has no section.
GetAddressRef()72   Address &GetAddressRef() { return m_addr_range.GetBaseAddress(); }
73 
GetAddressRef()74   const Address &GetAddressRef() const { return m_addr_range.GetBaseAddress(); }
75 
76   // Makes sure the symbol's value is an address and returns the file address.
77   // Returns LLDB_INVALID_ADDRESS if the symbol's value isn't an address.
78   lldb::addr_t GetFileAddress() const;
79 
80   // Makes sure the symbol's value is an address and gets the load address
81   // using \a target if it is. Returns LLDB_INVALID_ADDRESS if the symbol's
82   // value isn't an address or if the section isn't loaded in \a target.
83   lldb::addr_t GetLoadAddress(Target *target) const;
84 
85   // Access the address value. Do NOT hand out the AddressRange as an object as
86   // the byte size of the address range may not be filled in and it should be
87   // accessed via GetByteSize().
GetAddress()88   Address GetAddress() const {
89     // Make sure the our value is an address before we hand a copy out. We use
90     // the Address inside m_addr_range to contain the value for symbols that
91     // are not address based symbols so we are using it for more than just
92     // addresses. For example undefined symbols on MacOSX have a nlist.n_value
93     // of 0 (zero) and this will get placed into
94     // m_addr_range.m_base_addr.m_offset and it will have no section. So in the
95     // GetAddress() accessor, we need to hand out an invalid address if the
96     // symbol's value isn't an address.
97     if (ValueIsAddress())
98       return m_addr_range.GetBaseAddress();
99     else
100       return Address();
101   }
102 
103   /// Get the raw value of the symbol from the symbol table.
104   ///
105   /// If the symbol's value is an address, return the file address, else return
106   /// the raw value that is stored in the m_addr_range. If the base address has
107   /// no section, then getting the file address will return the correct value
108   /// as it will return the offset in the base address which is the value.
GetRawValue()109   uint64_t GetRawValue() const {
110     return m_addr_range.GetBaseAddress().GetFileAddress();
111   }
112 
113   // When a symbol's value isn't an address, we need to access the raw value.
114   // This function will ensure this symbol's value isn't an address and return
115   // the integer value if this checks out, otherwise it will return
116   // "fail_value" if the symbol is an address value.
117   uint64_t GetIntegerValue(uint64_t fail_value = 0) const {
118     if (ValueIsAddress()) {
119       // This symbol's value is an address. Use Symbol::GetAddress() to get the
120       // address.
121       return fail_value;
122     } else {
123       // The value is stored in the base address' offset
124       return m_addr_range.GetBaseAddress().GetOffset();
125     }
126   }
127 
128   lldb::addr_t ResolveCallableAddress(Target &target) const;
129 
130   ConstString GetName() const;
131 
132   ConstString GetNameNoArguments() const;
133 
134   ConstString GetDisplayName() const;
135 
GetID()136   uint32_t GetID() const { return m_uid; }
137 
GetLanguage()138   lldb::LanguageType GetLanguage() const {
139     // TODO: See if there is a way to determine the language for a symbol
140     // somehow, for now just return our best guess
141     return GetMangled().GuessLanguage();
142   }
143 
SetID(uint32_t uid)144   void SetID(uint32_t uid) { m_uid = uid; }
145 
GetMangled()146   Mangled &GetMangled() {
147     SynthesizeNameIfNeeded();
148     return m_mangled;
149   }
150 
GetMangled()151   const Mangled &GetMangled() const {
152     SynthesizeNameIfNeeded();
153     return m_mangled;
154   }
155 
156   ConstString GetReExportedSymbolName() const;
157 
158   FileSpec GetReExportedSymbolSharedLibrary() const;
159 
160   void SetReExportedSymbolName(ConstString name);
161 
162   bool SetReExportedSymbolSharedLibrary(const FileSpec &fspec);
163 
164   Symbol *ResolveReExportedSymbol(Target &target) const;
165 
166   uint32_t GetSiblingIndex() const;
167 
GetType()168   lldb::SymbolType GetType() const { return (lldb::SymbolType)m_type; }
169 
SetType(lldb::SymbolType type)170   void SetType(lldb::SymbolType type) { m_type = (lldb::SymbolType)type; }
171 
172   const char *GetTypeAsString() const;
173 
GetFlags()174   uint32_t GetFlags() const { return m_flags; }
175 
SetFlags(uint32_t flags)176   void SetFlags(uint32_t flags) { m_flags = flags; }
177 
178   void GetDescription(
179       Stream *s, lldb::DescriptionLevel level, Target *target,
180       std::optional<Stream::HighlightSettings> settings = std::nullopt) const;
181 
IsSynthetic()182   bool IsSynthetic() const { return m_is_synthetic; }
183 
184   bool IsSyntheticWithAutoGeneratedName() const;
185 
SetIsSynthetic(bool b)186   void SetIsSynthetic(bool b) { m_is_synthetic = b; }
187 
GetSizeIsSynthesized()188   bool GetSizeIsSynthesized() const { return m_size_is_synthesized; }
189 
SetSizeIsSynthesized(bool b)190   void SetSizeIsSynthesized(bool b) { m_size_is_synthesized = b; }
191 
IsDebug()192   bool IsDebug() const { return m_is_debug; }
193 
SetDebug(bool b)194   void SetDebug(bool b) { m_is_debug = b; }
195 
IsExternal()196   bool IsExternal() const { return m_is_external; }
197 
SetExternal(bool b)198   void SetExternal(bool b) { m_is_external = b; }
199 
200   bool IsTrampoline() const;
201 
202   bool IsIndirect() const;
203 
IsWeak()204   bool IsWeak() const { return m_is_weak; }
205 
SetIsWeak(bool b)206   void SetIsWeak(bool b) { m_is_weak = b; }
207 
GetByteSizeIsValid()208   bool GetByteSizeIsValid() const { return m_size_is_valid; }
209 
210   lldb::addr_t GetByteSize() const;
211 
SetByteSize(lldb::addr_t size)212   void SetByteSize(lldb::addr_t size) {
213     m_size_is_valid = size > 0;
214     m_addr_range.SetByteSize(size);
215   }
216 
GetSizeIsSibling()217   bool GetSizeIsSibling() const { return m_size_is_sibling; }
218 
SetSizeIsSibling(bool b)219   void SetSizeIsSibling(bool b) { m_size_is_sibling = b; }
220 
221   // If m_type is "Code" or "Function" then this will return the prologue size
222   // in bytes, else it will return zero.
223   uint32_t GetPrologueByteSize();
224 
GetDemangledNameIsSynthesized()225   bool GetDemangledNameIsSynthesized() const {
226     return m_demangled_is_synthesized;
227   }
228 
SetDemangledNameIsSynthesized(bool b)229   void SetDemangledNameIsSynthesized(bool b) { m_demangled_is_synthesized = b; }
230 
ContainsLinkerAnnotations()231   bool ContainsLinkerAnnotations() const {
232     return m_contains_linker_annotations;
233   }
SetContainsLinkerAnnotations(bool b)234   void SetContainsLinkerAnnotations(bool b) {
235     m_contains_linker_annotations = b;
236   }
237   /// \copydoc SymbolContextScope::CalculateSymbolContext(SymbolContext*)
238   ///
239   /// \see SymbolContextScope
240   void CalculateSymbolContext(SymbolContext *sc) override;
241 
242   lldb::ModuleSP CalculateSymbolContextModule() override;
243 
244   Symbol *CalculateSymbolContextSymbol() override;
245 
246   /// \copydoc SymbolContextScope::DumpSymbolContext(Stream*)
247   ///
248   /// \see SymbolContextScope
249   void DumpSymbolContext(Stream *s) override;
250 
251   lldb::DisassemblerSP GetInstructions(const ExecutionContext &exe_ctx,
252                                        const char *flavor,
253                                        bool prefer_file_cache);
254 
255   bool GetDisassembly(const ExecutionContext &exe_ctx, const char *flavor,
256                       bool prefer_file_cache, Stream &strm);
257 
258   bool ContainsFileAddress(lldb::addr_t file_addr) const;
259 
GetSyntheticSymbolPrefix()260   static llvm::StringRef GetSyntheticSymbolPrefix() {
261     return "___lldb_unnamed_symbol";
262   }
263 
264   /// Decode a serialized version of this object from data.
265   ///
266   /// \param data
267   ///   The decoder object that references the serialized data.
268   ///
269   /// \param offset_ptr
270   ///   A pointer that contains the offset from which the data will be decoded
271   ///   from that gets updated as data gets decoded.
272   ///
273   /// \param section_list
274   ///   A section list that allows lldb_private::Address objects to be filled
275   ///   in. The address information for symbols are serilized as file addresses
276   ///   and must be converted into Address objects with the right section and
277   ///   offset.
278   ///
279   /// \param strtab
280   ///   All strings in cache files are put into string tables for efficiency
281   ///   and cache file size reduction. Strings are stored as uint32_t string
282   ///   table offsets in the cache data.
283   ///
284   /// \return
285   ///   True if the symbol is successfully decoded, false otherwise.
286   bool Decode(const DataExtractor &data, lldb::offset_t *offset_ptr,
287               const SectionList *section_list, const StringTableReader &strtab);
288 
289   /// Encode this object into a data encoder object.
290   ///
291   /// This allows this object to be serialized to disk.
292   ///
293   /// \param encoder
294   ///   A data encoder object that serialized bytes will be encoded into.
295   ///
296   /// \param strtab
297   ///   All strings in cache files are put into string tables for efficiency
298   ///   and cache file size reduction. Strings are stored as uint32_t string
299   ///   table offsets in the cache data.
300   void Encode(DataEncoder &encoder, ConstStringTable &strtab) const;
301 
302   bool operator==(const Symbol &rhs) const;
303 
304 protected:
305   // This is the internal guts of ResolveReExportedSymbol, it assumes
306   // reexport_name is not null, and that module_spec is valid.  We track the
307   // modules we've already seen to make sure we don't get caught in a cycle.
308 
309   Symbol *ResolveReExportedSymbolInModuleSpec(
310       Target &target, ConstString &reexport_name,
311       lldb_private::ModuleSpec &module_spec,
312       lldb_private::ModuleList &seen_modules) const;
313 
314   void SynthesizeNameIfNeeded() const;
315 
316   uint32_t m_uid =
317       UINT32_MAX;           // User ID (usually the original symbol table index)
318   uint16_t m_type_data = 0; // data specific to m_type
319   uint16_t m_type_data_resolved : 1, // True if the data in m_type_data has
320                                      // already been calculated
321       m_is_synthetic : 1, // non-zero if this symbol is not actually in the
322                           // symbol table, but synthesized from other info in
323                           // the object file.
324       m_is_debug : 1,     // non-zero if this symbol is debug information in a
325                           // symbol
326       m_is_external : 1,  // non-zero if this symbol is globally visible
327       m_size_is_sibling : 1,     // m_size contains the index of this symbol's
328                                  // sibling
329       m_size_is_synthesized : 1, // non-zero if this symbol's size was
330                                  // calculated using a delta between this
331                                  // symbol and the next
332       m_size_is_valid : 1,
333       m_demangled_is_synthesized : 1, // The demangled name was created should
334                                       // not be used for expressions or other
335                                       // lookups
336       m_contains_linker_annotations : 1, // The symbol name contains linker
337                                          // annotations, which are optional when
338                                          // doing name lookups
339       m_is_weak : 1,
340       m_type : 6;            // Values from the lldb::SymbolType enum.
341   mutable Mangled m_mangled; // uniqued symbol name/mangled name pair
342   AddressRange m_addr_range; // Contains the value, or the section offset
343                              // address when the value is an address in a
344                              // section, and the size (if any)
345   uint32_t m_flags = 0; // A copy of the flags from the original symbol table,
346                         // the ObjectFile plug-in can interpret these
347 };
348 
349 } // namespace lldb_private
350 
351 namespace llvm {
352 namespace json {
353 
354 bool fromJSON(const llvm::json::Value &value, lldb_private::JSONSymbol &symbol,
355               llvm::json::Path path);
356 
357 bool fromJSON(const llvm::json::Value &value, lldb::SymbolType &type,
358               llvm::json::Path path);
359 
360 } // namespace json
361 } // namespace llvm
362 
363 #endif // LLDB_SYMBOL_SYMBOL_H
364