1 //===- MCSymbol.h - Machine Code Symbols ------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the declaration of the MCSymbol class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_MC_MCSYMBOL_H
14 #define LLVM_MC_MCSYMBOL_H
15 
16 #include "llvm/ADT/PointerIntPair.h"
17 #include "llvm/ADT/StringMapEntry.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/MC/MCExpr.h"
20 #include "llvm/MC/MCFragment.h"
21 #include "llvm/Support/ErrorHandling.h"
22 #include "llvm/Support/MathExtras.h"
23 #include <cassert>
24 #include <cstddef>
25 #include <cstdint>
26 
27 namespace llvm {
28 
29 class MCAsmInfo;
30 class MCContext;
31 class MCSection;
32 class raw_ostream;
33 
34 /// MCSymbol - Instances of this class represent a symbol name in the MC file,
35 /// and MCSymbols are created and uniqued by the MCContext class.  MCSymbols
36 /// should only be constructed with valid names for the object file.
37 ///
38 /// If the symbol is defined/emitted into the current translation unit, the
39 /// Section member is set to indicate what section it lives in.  Otherwise, if
40 /// it is a reference to an external entity, it has a null section.
41 class MCSymbol {
42 protected:
43   /// The kind of the symbol.  If it is any value other than unset then this
44   /// class is actually one of the appropriate subclasses of MCSymbol.
45   enum SymbolKind {
46     SymbolKindUnset,
47     SymbolKindCOFF,
48     SymbolKindELF,
49     SymbolKindGOFF,
50     SymbolKindMachO,
51     SymbolKindWasm,
52     SymbolKindXCOFF,
53   };
54 
55   /// A symbol can contain an Offset, or Value, or be Common, but never more
56   /// than one of these.
57   enum Contents : uint8_t {
58     SymContentsUnset,
59     SymContentsOffset,
60     SymContentsVariable,
61     SymContentsCommon,
62     SymContentsTargetCommon, // Index stores the section index
63   };
64 
65   // Special sentinal value for the absolute pseudo fragment.
66   static MCFragment *AbsolutePseudoFragment;
67 
68   /// If a symbol has a Fragment, the section is implied, so we only need
69   /// one pointer.
70   /// The special AbsolutePseudoFragment value is for absolute symbols.
71   /// If this is a variable symbol, this caches the variable value's fragment.
72   /// FIXME: We might be able to simplify this by having the asm streamer create
73   /// dummy fragments.
74   /// If this is a section, then it gives the symbol is defined in. This is null
75   /// for undefined symbols.
76   ///
77   /// If this is a fragment, then it gives the fragment this symbol's value is
78   /// relative to, if any.
79   mutable MCFragment *Fragment = nullptr;
80 
81   /// True if this symbol is named.  A named symbol will have a pointer to the
82   /// name allocated in the bytes immediately prior to the MCSymbol.
83   unsigned HasName : 1;
84 
85   /// IsTemporary - True if this is an assembler temporary label, which
86   /// typically does not survive in the .o file's symbol table.  Usually
87   /// "Lfoo" or ".foo".
88   unsigned IsTemporary : 1;
89 
90   /// True if this symbol can be redefined.
91   unsigned IsRedefinable : 1;
92 
93   /// IsUsed - True if this symbol has been used.
94   mutable unsigned IsUsed : 1;
95 
96   mutable unsigned IsRegistered : 1;
97 
98   /// True if this symbol is visible outside this translation unit. Note: ELF
99   /// uses binding instead of this bit.
100   mutable unsigned IsExternal : 1;
101 
102   /// This symbol is private extern.
103   mutable unsigned IsPrivateExtern : 1;
104 
105   /// This symbol is weak external.
106   mutable unsigned IsWeakExternal : 1;
107 
108   /// LLVM RTTI discriminator. This is actually a SymbolKind enumerator, but is
109   /// unsigned to avoid sign extension and achieve better bitpacking with MSVC.
110   unsigned Kind : 3;
111 
112   /// True if we have created a relocation that uses this symbol.
113   mutable unsigned IsUsedInReloc : 1;
114 
115   /// This is actually a Contents enumerator, but is unsigned to avoid sign
116   /// extension and achieve better bitpacking with MSVC.
117   unsigned SymbolContents : 3;
118 
119   /// The alignment of the symbol if it is 'common'.
120   ///
121   /// Internally, this is stored as log2(align) + 1.
122   /// We reserve 5 bits to encode this value which allows the following values
123   /// 0b00000 -> unset
124   /// 0b00001 -> 1ULL <<  0 = 1
125   /// 0b00010 -> 1ULL <<  1 = 2
126   /// 0b00011 -> 1ULL <<  2 = 4
127   /// ...
128   /// 0b11111 -> 1ULL << 30 = 1 GiB
129   enum : unsigned { NumCommonAlignmentBits = 5 };
130   unsigned CommonAlignLog2 : NumCommonAlignmentBits;
131 
132   /// The Flags field is used by object file implementations to store
133   /// additional per symbol information which is not easily classified.
134   enum : unsigned { NumFlagsBits = 16 };
135   mutable uint32_t Flags : NumFlagsBits;
136 
137   /// Index field, for use by the object file implementation.
138   mutable uint32_t Index = 0;
139 
140   union {
141     /// The offset to apply to the fragment address to form this symbol's value.
142     uint64_t Offset;
143 
144     /// The size of the symbol, if it is 'common'.
145     uint64_t CommonSize;
146 
147     /// If non-null, the value for a variable symbol.
148     const MCExpr *Value;
149   };
150 
151   // MCContext creates and uniques these.
152   friend class MCExpr;
153   friend class MCContext;
154 
155   /// The name for a symbol.
156   /// MCSymbol contains a uint64_t so is probably aligned to 8.  On a 32-bit
157   /// system, the name is a pointer so isn't going to satisfy the 8 byte
158   /// alignment of uint64_t.  Account for that here.
159   using NameEntryStorageTy = union {
160     const StringMapEntry<bool> *NameEntry;
161     uint64_t AlignmentPadding;
162   };
163 
164   MCSymbol(SymbolKind Kind, const StringMapEntry<bool> *Name, bool isTemporary)
165       : IsTemporary(isTemporary), IsRedefinable(false), IsUsed(false),
166         IsRegistered(false), IsExternal(false), IsPrivateExtern(false),
167         IsWeakExternal(false), Kind(Kind), IsUsedInReloc(false),
168         SymbolContents(SymContentsUnset), CommonAlignLog2(0), Flags(0) {
169     Offset = 0;
170     HasName = !!Name;
171     if (Name)
172       getNameEntryPtr() = Name;
173   }
174 
175   // Provide custom new/delete as we will only allocate space for a name
176   // if we need one.
177   void *operator new(size_t s, const StringMapEntry<bool> *Name,
178                      MCContext &Ctx);
179 
180 private:
181   void operator delete(void *);
182   /// Placement delete - required by std, but never called.
183   void operator delete(void*, unsigned) {
184     llvm_unreachable("Constructor throws?");
185   }
186   /// Placement delete - required by std, but never called.
187   void operator delete(void*, unsigned, bool) {
188     llvm_unreachable("Constructor throws?");
189   }
190 
191   /// Get a reference to the name field.  Requires that we have a name
192   const StringMapEntry<bool> *&getNameEntryPtr() {
193     assert(HasName && "Name is required");
194     NameEntryStorageTy *Name = reinterpret_cast<NameEntryStorageTy *>(this);
195     return (*(Name - 1)).NameEntry;
196   }
197   const StringMapEntry<bool> *&getNameEntryPtr() const {
198     return const_cast<MCSymbol*>(this)->getNameEntryPtr();
199   }
200 
201 public:
202   MCSymbol(const MCSymbol &) = delete;
203   MCSymbol &operator=(const MCSymbol &) = delete;
204 
205   /// getName - Get the symbol name.
206   StringRef getName() const {
207     if (!HasName)
208       return StringRef();
209 
210     return getNameEntryPtr()->first();
211   }
212 
213   bool isRegistered() const { return IsRegistered; }
214   void setIsRegistered(bool Value) const { IsRegistered = Value; }
215 
216   void setUsedInReloc() const { IsUsedInReloc = true; }
217   bool isUsedInReloc() const { return IsUsedInReloc; }
218 
219   /// \name Accessors
220   /// @{
221 
222   /// isTemporary - Check if this is an assembler temporary symbol.
223   bool isTemporary() const { return IsTemporary; }
224 
225   /// isUsed - Check if this is used.
226   bool isUsed() const { return IsUsed; }
227 
228   /// Check if this symbol is redefinable.
229   bool isRedefinable() const { return IsRedefinable; }
230   /// Mark this symbol as redefinable.
231   void setRedefinable(bool Value) { IsRedefinable = Value; }
232   /// Prepare this symbol to be redefined.
233   void redefineIfPossible() {
234     if (IsRedefinable) {
235       if (SymbolContents == SymContentsVariable) {
236         Value = nullptr;
237         SymbolContents = SymContentsUnset;
238       }
239       setUndefined();
240       IsRedefinable = false;
241     }
242   }
243 
244   /// @}
245   /// \name Associated Sections
246   /// @{
247 
248   /// isDefined - Check if this symbol is defined (i.e., it has an address).
249   ///
250   /// Defined symbols are either absolute or in some section.
251   bool isDefined() const { return !isUndefined(); }
252 
253   /// isInSection - Check if this symbol is defined in some section (i.e., it
254   /// is defined but not absolute).
255   bool isInSection() const {
256     return isDefined() && !isAbsolute();
257   }
258 
259   /// isUndefined - Check if this symbol undefined (i.e., implicitly defined).
260   bool isUndefined(bool SetUsed = true) const {
261     return getFragment(SetUsed) == nullptr;
262   }
263 
264   /// isAbsolute - Check if this is an absolute symbol.
265   bool isAbsolute() const {
266     return getFragment() == AbsolutePseudoFragment;
267   }
268 
269   /// Get the section associated with a defined, non-absolute symbol.
270   MCSection &getSection() const {
271     assert(isInSection() && "Invalid accessor!");
272     return *getFragment()->getParent();
273   }
274 
275   /// Mark the symbol as defined in the fragment \p F.
276   void setFragment(MCFragment *F) const {
277     assert(!isVariable() && "Cannot set fragment of variable");
278     Fragment = F;
279   }
280 
281   /// Mark the symbol as undefined.
282   void setUndefined() { Fragment = nullptr; }
283 
284   bool isELF() const { return Kind == SymbolKindELF; }
285 
286   bool isCOFF() const { return Kind == SymbolKindCOFF; }
287 
288   bool isGOFF() const { return Kind == SymbolKindGOFF; }
289 
290   bool isMachO() const { return Kind == SymbolKindMachO; }
291 
292   bool isWasm() const { return Kind == SymbolKindWasm; }
293 
294   bool isXCOFF() const { return Kind == SymbolKindXCOFF; }
295 
296   /// @}
297   /// \name Variable Symbols
298   /// @{
299 
300   /// isVariable - Check if this is a variable symbol.
301   bool isVariable() const {
302     return SymbolContents == SymContentsVariable;
303   }
304 
305   /// getVariableValue - Get the value for variable symbols.
306   const MCExpr *getVariableValue(bool SetUsed = true) const {
307     assert(isVariable() && "Invalid accessor!");
308     IsUsed |= SetUsed;
309     return Value;
310   }
311 
312   void setVariableValue(const MCExpr *Value);
313 
314   /// @}
315 
316   /// Get the (implementation defined) index.
317   uint32_t getIndex() const {
318     return Index;
319   }
320 
321   /// Set the (implementation defined) index.
322   void setIndex(uint32_t Value) const {
323     Index = Value;
324   }
325 
326   bool isUnset() const { return SymbolContents == SymContentsUnset; }
327 
328   uint64_t getOffset() const {
329     assert((SymbolContents == SymContentsUnset ||
330             SymbolContents == SymContentsOffset) &&
331            "Cannot get offset for a common/variable symbol");
332     return Offset;
333   }
334   void setOffset(uint64_t Value) {
335     assert((SymbolContents == SymContentsUnset ||
336             SymbolContents == SymContentsOffset) &&
337            "Cannot set offset for a common/variable symbol");
338     Offset = Value;
339     SymbolContents = SymContentsOffset;
340   }
341 
342   /// Return the size of a 'common' symbol.
343   uint64_t getCommonSize() const {
344     assert(isCommon() && "Not a 'common' symbol!");
345     return CommonSize;
346   }
347 
348   /// Mark this symbol as being 'common'.
349   ///
350   /// \param Size - The size of the symbol.
351   /// \param Alignment - The alignment of the symbol.
352   /// \param Target - Is the symbol a target-specific common-like symbol.
353   void setCommon(uint64_t Size, Align Alignment, bool Target = false) {
354     assert(getOffset() == 0);
355     CommonSize = Size;
356     SymbolContents = Target ? SymContentsTargetCommon : SymContentsCommon;
357 
358     unsigned Log2Align = encode(Alignment);
359     assert(Log2Align < (1U << NumCommonAlignmentBits) &&
360            "Out of range alignment");
361     CommonAlignLog2 = Log2Align;
362   }
363 
364   ///  Return the alignment of a 'common' symbol.
365   MaybeAlign getCommonAlignment() const {
366     assert(isCommon() && "Not a 'common' symbol!");
367     return decodeMaybeAlign(CommonAlignLog2);
368   }
369 
370   /// Declare this symbol as being 'common'.
371   ///
372   /// \param Size - The size of the symbol.
373   /// \param Alignment - The alignment of the symbol.
374   /// \param Target - Is the symbol a target-specific common-like symbol.
375   /// \return True if symbol was already declared as a different type
376   bool declareCommon(uint64_t Size, Align Alignment, bool Target = false) {
377     assert(isCommon() || getOffset() == 0);
378     if(isCommon()) {
379       if (CommonSize != Size || getCommonAlignment() != Alignment ||
380           isTargetCommon() != Target)
381         return true;
382     } else
383       setCommon(Size, Alignment, Target);
384     return false;
385   }
386 
387   /// Is this a 'common' symbol.
388   bool isCommon() const {
389     return SymbolContents == SymContentsCommon ||
390            SymbolContents == SymContentsTargetCommon;
391   }
392 
393   /// Is this a target-specific common-like symbol.
394   bool isTargetCommon() const {
395     return SymbolContents == SymContentsTargetCommon;
396   }
397 
398   MCFragment *getFragment(bool SetUsed = true) const {
399     if (Fragment || !isVariable() || isWeakExternal())
400       return Fragment;
401     // If the symbol is a non-weak alias, get information about
402     // the aliasee. (Don't try to resolve weak aliases.)
403     Fragment = getVariableValue(SetUsed)->findAssociatedFragment();
404     return Fragment;
405   }
406 
407   bool isExternal() const { return IsExternal; }
408   void setExternal(bool Value) const { IsExternal = Value; }
409 
410   bool isPrivateExtern() const { return IsPrivateExtern; }
411   void setPrivateExtern(bool Value) { IsPrivateExtern = Value; }
412 
413   bool isWeakExternal() const { return IsWeakExternal; }
414 
415   /// print - Print the value to the stream \p OS.
416   void print(raw_ostream &OS, const MCAsmInfo *MAI) const;
417 
418   /// dump - Print the value to stderr.
419   void dump() const;
420 
421 protected:
422   /// Get the (implementation defined) symbol flags.
423   uint32_t getFlags() const { return Flags; }
424 
425   /// Set the (implementation defined) symbol flags.
426   void setFlags(uint32_t Value) const {
427     assert(Value < (1U << NumFlagsBits) && "Out of range flags");
428     Flags = Value;
429   }
430 
431   /// Modify the flags via a mask
432   void modifyFlags(uint32_t Value, uint32_t Mask) const {
433     assert(Value < (1U << NumFlagsBits) && "Out of range flags");
434     Flags = (Flags & ~Mask) | Value;
435   }
436 };
437 
438 inline raw_ostream &operator<<(raw_ostream &OS, const MCSymbol &Sym) {
439   Sym.print(OS, nullptr);
440   return OS;
441 }
442 
443 } // end namespace llvm
444 
445 #endif // LLVM_MC_MCSYMBOL_H
446