1 //===- TypeIndexDiscovery.cpp -----------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
10 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
11 #include "llvm/ADT/ArrayRef.h"
12 #include "llvm/Support/Endian.h"
13 
14 using namespace llvm;
15 using namespace llvm::codeview;
16 
17 static inline MethodKind getMethodKind(uint16_t Attrs) {
18   Attrs &= uint16_t(MethodOptions::MethodKindMask);
19   Attrs >>= 2;
20   return MethodKind(Attrs);
21 }
22 
23 static inline bool isIntroVirtual(uint16_t Attrs) {
24   MethodKind MK = getMethodKind(Attrs);
25   return MK == MethodKind::IntroducingVirtual ||
26          MK == MethodKind::PureIntroducingVirtual;
27 }
28 
29 static inline PointerMode getPointerMode(uint32_t Attrs) {
30   return static_cast<PointerMode>((Attrs >> PointerRecord::PointerModeShift) &
31                                   PointerRecord::PointerModeMask);
32 }
33 
34 static inline bool isMemberPointer(uint32_t Attrs) {
35   PointerMode Mode = getPointerMode(Attrs);
36   return Mode == PointerMode::PointerToDataMember ||
37          Mode == PointerMode::PointerToMemberFunction;
38 }
39 
40 static inline uint32_t getEncodedIntegerLength(ArrayRef<uint8_t> Data) {
41   uint16_t N = support::endian::read16le(Data.data());
42   if (N < LF_NUMERIC)
43     return 2;
44 
45   assert(N <= LF_UQUADWORD);
46 
47   constexpr uint32_t Sizes[] = {
48       1,  // LF_CHAR
49       2,  // LF_SHORT
50       2,  // LF_USHORT
51       4,  // LF_LONG
52       4,  // LF_ULONG
53       4,  // LF_REAL32
54       8,  // LF_REAL64
55       10, // LF_REAL80
56       16, // LF_REAL128
57       8,  // LF_QUADWORD
58       8,  // LF_UQUADWORD
59   };
60 
61   return 2 + Sizes[N - LF_NUMERIC];
62 }
63 
64 static inline uint32_t getCStringLength(ArrayRef<uint8_t> Data) {
65   const char *S = reinterpret_cast<const char *>(Data.data());
66   return strlen(S) + 1;
67 }
68 
69 static void handleMethodOverloadList(ArrayRef<uint8_t> Content,
70                                      SmallVectorImpl<TiReference> &Refs) {
71   uint32_t Offset = 0;
72 
73   while (!Content.empty()) {
74     // Array of:
75     //   0: Attrs
76     //   2: Padding
77     //   4: TypeIndex
78     //   if (isIntroVirtual())
79     //     8: VFTableOffset
80 
81     // At least 8 bytes are guaranteed.  4 extra bytes come iff function is an
82     // intro virtual.
83     uint32_t Len = 8;
84 
85     uint16_t Attrs = support::endian::read16le(Content.data());
86     Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1});
87 
88     if (LLVM_UNLIKELY(isIntroVirtual(Attrs)))
89       Len += 4;
90     Offset += Len;
91     Content = Content.drop_front(Len);
92   }
93 }
94 
95 static uint32_t handleBaseClass(ArrayRef<uint8_t> Data, uint32_t Offset,
96                                 SmallVectorImpl<TiReference> &Refs) {
97   // 0: Kind
98   // 2: Padding
99   // 4: TypeIndex
100   // 8: Encoded Integer
101   Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1});
102   return 8 + getEncodedIntegerLength(Data.drop_front(8));
103 }
104 
105 static uint32_t handleEnumerator(ArrayRef<uint8_t> Data, uint32_t Offset,
106                                  SmallVectorImpl<TiReference> &Refs) {
107   // 0: Kind
108   // 2: Padding
109   // 4: Encoded Integer
110   // <next>: Name
111   uint32_t Size = 4 + getEncodedIntegerLength(Data.drop_front(4));
112   return Size + getCStringLength(Data.drop_front(Size));
113 }
114 
115 static uint32_t handleDataMember(ArrayRef<uint8_t> Data, uint32_t Offset,
116                                  SmallVectorImpl<TiReference> &Refs) {
117   // 0: Kind
118   // 2: Padding
119   // 4: TypeIndex
120   // 8: Encoded Integer
121   // <next>: Name
122   Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1});
123   uint32_t Size = 8 + getEncodedIntegerLength(Data.drop_front(8));
124   return Size + getCStringLength(Data.drop_front(Size));
125 }
126 
127 static uint32_t handleOverloadedMethod(ArrayRef<uint8_t> Data, uint32_t Offset,
128                                        SmallVectorImpl<TiReference> &Refs) {
129   // 0: Kind
130   // 2: Padding
131   // 4: TypeIndex
132   // 8: Name
133   Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1});
134   return 8 + getCStringLength(Data.drop_front(8));
135 }
136 
137 static uint32_t handleOneMethod(ArrayRef<uint8_t> Data, uint32_t Offset,
138                                 SmallVectorImpl<TiReference> &Refs) {
139   // 0: Kind
140   // 2: Attributes
141   // 4: Type
142   // if (isIntroVirtual)
143   //   8: VFTableOffset
144   // <next>: Name
145   uint32_t Size = 8;
146   Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1});
147 
148   uint16_t Attrs = support::endian::read16le(Data.drop_front(2).data());
149   if (LLVM_UNLIKELY(isIntroVirtual(Attrs)))
150     Size += 4;
151 
152   return Size + getCStringLength(Data.drop_front(Size));
153 }
154 
155 static uint32_t handleNestedType(ArrayRef<uint8_t> Data, uint32_t Offset,
156                                  SmallVectorImpl<TiReference> &Refs) {
157   // 0: Kind
158   // 2: Padding
159   // 4: TypeIndex
160   // 8: Name
161   Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1});
162   return 8 + getCStringLength(Data.drop_front(8));
163 }
164 
165 static uint32_t handleStaticDataMember(ArrayRef<uint8_t> Data, uint32_t Offset,
166                                        SmallVectorImpl<TiReference> &Refs) {
167   // 0: Kind
168   // 2: Padding
169   // 4: TypeIndex
170   // 8: Name
171   Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1});
172   return 8 + getCStringLength(Data.drop_front(8));
173 }
174 
175 static uint32_t handleVirtualBaseClass(ArrayRef<uint8_t> Data, uint32_t Offset,
176                                        bool IsIndirect,
177                                        SmallVectorImpl<TiReference> &Refs) {
178   // 0: Kind
179   // 2: Attrs
180   // 4: TypeIndex
181   // 8: TypeIndex
182   // 12: Encoded Integer
183   // <next>: Encoded Integer
184   uint32_t Size = 12;
185   Refs.push_back({TiRefKind::TypeRef, Offset + 4, 2});
186   Size += getEncodedIntegerLength(Data.drop_front(Size));
187   Size += getEncodedIntegerLength(Data.drop_front(Size));
188   return Size;
189 }
190 
191 static uint32_t handleVFPtr(ArrayRef<uint8_t> Data, uint32_t Offset,
192                             SmallVectorImpl<TiReference> &Refs) {
193   // 0: Kind
194   // 2: Padding
195   // 4: TypeIndex
196   Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1});
197   return 8;
198 }
199 
200 static uint32_t handleListContinuation(ArrayRef<uint8_t> Data, uint32_t Offset,
201                                        SmallVectorImpl<TiReference> &Refs) {
202   // 0: Kind
203   // 2: Padding
204   // 4: TypeIndex
205   Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1});
206   return 8;
207 }
208 
209 static void handleFieldList(ArrayRef<uint8_t> Content,
210                             SmallVectorImpl<TiReference> &Refs) {
211   uint32_t Offset = 0;
212   uint32_t ThisLen = 0;
213   while (!Content.empty()) {
214     TypeLeafKind Kind =
215         static_cast<TypeLeafKind>(support::endian::read16le(Content.data()));
216     switch (Kind) {
217     case LF_BCLASS:
218       ThisLen = handleBaseClass(Content, Offset, Refs);
219       break;
220     case LF_ENUMERATE:
221       ThisLen = handleEnumerator(Content, Offset, Refs);
222       break;
223     case LF_MEMBER:
224       ThisLen = handleDataMember(Content, Offset, Refs);
225       break;
226     case LF_METHOD:
227       ThisLen = handleOverloadedMethod(Content, Offset, Refs);
228       break;
229     case LF_ONEMETHOD:
230       ThisLen = handleOneMethod(Content, Offset, Refs);
231       break;
232     case LF_NESTTYPE:
233       ThisLen = handleNestedType(Content, Offset, Refs);
234       break;
235     case LF_STMEMBER:
236       ThisLen = handleStaticDataMember(Content, Offset, Refs);
237       break;
238     case LF_VBCLASS:
239     case LF_IVBCLASS:
240       ThisLen =
241           handleVirtualBaseClass(Content, Offset, Kind == LF_VBCLASS, Refs);
242       break;
243     case LF_VFUNCTAB:
244       ThisLen = handleVFPtr(Content, Offset, Refs);
245       break;
246     case LF_INDEX:
247       ThisLen = handleListContinuation(Content, Offset, Refs);
248       break;
249     default:
250       return;
251     }
252     Content = Content.drop_front(ThisLen);
253     Offset += ThisLen;
254     if (!Content.empty()) {
255       uint8_t Pad = Content.front();
256       if (Pad >= LF_PAD0) {
257         uint32_t Skip = Pad & 0x0F;
258         Content = Content.drop_front(Skip);
259         Offset += Skip;
260       }
261     }
262   }
263 }
264 
265 static void handlePointer(ArrayRef<uint8_t> Content,
266                           SmallVectorImpl<TiReference> &Refs) {
267   Refs.push_back({TiRefKind::TypeRef, 0, 1});
268 
269   uint32_t Attrs = support::endian::read32le(Content.drop_front(4).data());
270   if (isMemberPointer(Attrs))
271     Refs.push_back({TiRefKind::TypeRef, 8, 1});
272 }
273 
274 static void discoverTypeIndices(ArrayRef<uint8_t> Content, TypeLeafKind Kind,
275                                 SmallVectorImpl<TiReference> &Refs) {
276   uint32_t Count;
277   // FIXME: In the future it would be nice if we could avoid hardcoding these
278   // values.  One idea is to define some structures representing these types
279   // that would allow the use of offsetof().
280   switch (Kind) {
281   case TypeLeafKind::LF_FUNC_ID:
282     Refs.push_back({TiRefKind::IndexRef, 0, 1});
283     Refs.push_back({TiRefKind::TypeRef, 4, 1});
284     break;
285   case TypeLeafKind::LF_MFUNC_ID:
286     Refs.push_back({TiRefKind::TypeRef, 0, 2});
287     break;
288   case TypeLeafKind::LF_STRING_ID:
289     Refs.push_back({TiRefKind::IndexRef, 0, 1});
290     break;
291   case TypeLeafKind::LF_SUBSTR_LIST:
292     Count = support::endian::read32le(Content.data());
293     if (Count > 0)
294       Refs.push_back({TiRefKind::IndexRef, 4, Count});
295     break;
296   case TypeLeafKind::LF_BUILDINFO:
297     Count = support::endian::read16le(Content.data());
298     if (Count > 0)
299       Refs.push_back({TiRefKind::IndexRef, 2, Count});
300     break;
301   case TypeLeafKind::LF_UDT_SRC_LINE:
302     Refs.push_back({TiRefKind::TypeRef, 0, 1});
303     Refs.push_back({TiRefKind::IndexRef, 4, 1});
304     break;
305   case TypeLeafKind::LF_UDT_MOD_SRC_LINE:
306     Refs.push_back({TiRefKind::TypeRef, 0, 1});
307     break;
308   case TypeLeafKind::LF_MODIFIER:
309     Refs.push_back({TiRefKind::TypeRef, 0, 1});
310     break;
311   case TypeLeafKind::LF_PROCEDURE:
312     Refs.push_back({TiRefKind::TypeRef, 0, 1});
313     Refs.push_back({TiRefKind::TypeRef, 8, 1});
314     break;
315   case TypeLeafKind::LF_MFUNCTION:
316     Refs.push_back({TiRefKind::TypeRef, 0, 3});
317     Refs.push_back({TiRefKind::TypeRef, 16, 1});
318     break;
319   case TypeLeafKind::LF_ARGLIST:
320     Count = support::endian::read32le(Content.data());
321     if (Count > 0)
322       Refs.push_back({TiRefKind::TypeRef, 4, Count});
323     break;
324   case TypeLeafKind::LF_ARRAY:
325     Refs.push_back({TiRefKind::TypeRef, 0, 2});
326     break;
327   case TypeLeafKind::LF_CLASS:
328   case TypeLeafKind::LF_STRUCTURE:
329   case TypeLeafKind::LF_INTERFACE:
330     Refs.push_back({TiRefKind::TypeRef, 4, 3});
331     break;
332   case TypeLeafKind::LF_UNION:
333     Refs.push_back({TiRefKind::TypeRef, 4, 1});
334     break;
335   case TypeLeafKind::LF_ENUM:
336     Refs.push_back({TiRefKind::TypeRef, 4, 2});
337     break;
338   case TypeLeafKind::LF_BITFIELD:
339     Refs.push_back({TiRefKind::TypeRef, 0, 1});
340     break;
341   case TypeLeafKind::LF_VFTABLE:
342     Refs.push_back({TiRefKind::TypeRef, 0, 2});
343     break;
344   case TypeLeafKind::LF_VTSHAPE:
345     break;
346   case TypeLeafKind::LF_METHODLIST:
347     handleMethodOverloadList(Content, Refs);
348     break;
349   case TypeLeafKind::LF_FIELDLIST:
350     handleFieldList(Content, Refs);
351     break;
352   case TypeLeafKind::LF_POINTER:
353     handlePointer(Content, Refs);
354     break;
355   default:
356     break;
357   }
358 }
359 
360 static bool discoverTypeIndices(ArrayRef<uint8_t> Content, SymbolKind Kind,
361                                 SmallVectorImpl<TiReference> &Refs) {
362   uint32_t Count;
363   // FIXME: In the future it would be nice if we could avoid hardcoding these
364   // values.  One idea is to define some structures representing these types
365   // that would allow the use of offsetof().
366   switch (Kind) {
367   case SymbolKind::S_GPROC32_ID:
368   case SymbolKind::S_LPROC32_ID:
369   case SymbolKind::S_LPROC32_DPC:
370   case SymbolKind::S_LPROC32_DPC_ID:
371     Refs.push_back({TiRefKind::IndexRef, 24, 1}); // LF_FUNC_ID
372     break;
373   case SymbolKind::S_GPROC32:
374   case SymbolKind::S_LPROC32:
375     Refs.push_back({TiRefKind::TypeRef, 24, 1}); // Type
376     break;
377   case SymbolKind::S_UDT:
378     Refs.push_back({TiRefKind::TypeRef, 0, 1}); // UDT
379     break;
380   case SymbolKind::S_GDATA32:
381   case SymbolKind::S_LDATA32:
382     Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type
383     break;
384   case SymbolKind::S_BUILDINFO:
385     Refs.push_back({TiRefKind::IndexRef, 0, 1}); // Compile flags
386     break;
387   case SymbolKind::S_LTHREAD32:
388   case SymbolKind::S_GTHREAD32:
389     Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type
390     break;
391   case SymbolKind::S_FILESTATIC:
392     Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type
393     break;
394   case SymbolKind::S_LOCAL:
395     Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type
396     break;
397   case SymbolKind::S_REGISTER:
398     Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type
399     break;
400   case SymbolKind::S_CONSTANT:
401     Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type
402     break;
403   case SymbolKind::S_BPREL32:
404   case SymbolKind::S_REGREL32:
405     Refs.push_back({TiRefKind::TypeRef, 4, 1}); // Type
406     break;
407   case SymbolKind::S_CALLSITEINFO:
408     Refs.push_back({TiRefKind::TypeRef, 8, 1}); // Call signature
409     break;
410   case SymbolKind::S_CALLERS:
411   case SymbolKind::S_CALLEES:
412   case SymbolKind::S_INLINEES:
413     // The record is a count followed by an array of type indices.
414     Count = *reinterpret_cast<const ulittle32_t *>(Content.data());
415     Refs.push_back({TiRefKind::IndexRef, 4, Count}); // Callees
416     break;
417   case SymbolKind::S_INLINESITE:
418     Refs.push_back({TiRefKind::IndexRef, 8, 1}); // ID of inlinee
419     break;
420   case SymbolKind::S_HEAPALLOCSITE:
421     Refs.push_back({TiRefKind::TypeRef, 8, 1}); // UDT allocated
422     break;
423 
424   // Defranges don't have types, just registers and code offsets.
425   case SymbolKind::S_DEFRANGE_REGISTER:
426   case SymbolKind::S_DEFRANGE_REGISTER_REL:
427   case SymbolKind::S_DEFRANGE_FRAMEPOINTER_REL:
428   case SymbolKind::S_DEFRANGE_FRAMEPOINTER_REL_FULL_SCOPE:
429   case SymbolKind::S_DEFRANGE_SUBFIELD_REGISTER:
430   case SymbolKind::S_DEFRANGE_SUBFIELD:
431     break;
432 
433   // No type references.
434   case SymbolKind::S_LABEL32:
435   case SymbolKind::S_OBJNAME:
436   case SymbolKind::S_COMPILE:
437   case SymbolKind::S_COMPILE2:
438   case SymbolKind::S_COMPILE3:
439   case SymbolKind::S_ENVBLOCK:
440   case SymbolKind::S_BLOCK32:
441   case SymbolKind::S_FRAMEPROC:
442   case SymbolKind::S_THUNK32:
443   case SymbolKind::S_FRAMECOOKIE:
444   case SymbolKind::S_UNAMESPACE:
445   case SymbolKind::S_ARMSWITCHTABLE:
446     break;
447   // Scope ending symbols.
448   case SymbolKind::S_END:
449   case SymbolKind::S_INLINESITE_END:
450   case SymbolKind::S_PROC_ID_END:
451     break;
452   default:
453     return false; // Unknown symbol.
454   }
455   return true;
456 }
457 
458 void llvm::codeview::discoverTypeIndices(const CVType &Type,
459                                          SmallVectorImpl<TiReference> &Refs) {
460   ::discoverTypeIndices(Type.content(), Type.kind(), Refs);
461 }
462 
463 static void resolveTypeIndexReferences(ArrayRef<uint8_t> RecordData,
464                                        ArrayRef<TiReference> Refs,
465                                        SmallVectorImpl<TypeIndex> &Indices) {
466   Indices.clear();
467 
468   if (Refs.empty())
469     return;
470 
471   RecordData = RecordData.drop_front(sizeof(RecordPrefix));
472 
473   BinaryStreamReader Reader(RecordData, llvm::endianness::little);
474   for (const auto &Ref : Refs) {
475     Reader.setOffset(Ref.Offset);
476     FixedStreamArray<TypeIndex> Run;
477     cantFail(Reader.readArray(Run, Ref.Count));
478     Indices.append(Run.begin(), Run.end());
479   }
480 }
481 
482 void llvm::codeview::discoverTypeIndices(const CVType &Type,
483                                          SmallVectorImpl<TypeIndex> &Indices) {
484   return discoverTypeIndices(Type.RecordData, Indices);
485 }
486 
487 void llvm::codeview::discoverTypeIndices(ArrayRef<uint8_t> RecordData,
488                                          SmallVectorImpl<TypeIndex> &Indices) {
489   SmallVector<TiReference, 4> Refs;
490   discoverTypeIndices(RecordData, Refs);
491   resolveTypeIndexReferences(RecordData, Refs, Indices);
492 }
493 
494 void llvm::codeview::discoverTypeIndices(ArrayRef<uint8_t> RecordData,
495                                          SmallVectorImpl<TiReference> &Refs) {
496   const RecordPrefix *P =
497       reinterpret_cast<const RecordPrefix *>(RecordData.data());
498   TypeLeafKind K = static_cast<TypeLeafKind>(uint16_t(P->RecordKind));
499   ::discoverTypeIndices(RecordData.drop_front(sizeof(RecordPrefix)), K, Refs);
500 }
501 
502 bool llvm::codeview::discoverTypeIndicesInSymbol(
503     const CVSymbol &Sym, SmallVectorImpl<TiReference> &Refs) {
504   SymbolKind K = Sym.kind();
505   return ::discoverTypeIndices(Sym.content(), K, Refs);
506 }
507 
508 bool llvm::codeview::discoverTypeIndicesInSymbol(
509     ArrayRef<uint8_t> RecordData, SmallVectorImpl<TiReference> &Refs) {
510   const RecordPrefix *P =
511       reinterpret_cast<const RecordPrefix *>(RecordData.data());
512   SymbolKind K = static_cast<SymbolKind>(uint16_t(P->RecordKind));
513   return ::discoverTypeIndices(RecordData.drop_front(sizeof(RecordPrefix)), K,
514                                Refs);
515 }
516 
517 bool llvm::codeview::discoverTypeIndicesInSymbol(
518     ArrayRef<uint8_t> RecordData, SmallVectorImpl<TypeIndex> &Indices) {
519   SmallVector<TiReference, 2> Refs;
520   if (!discoverTypeIndicesInSymbol(RecordData, Refs))
521     return false;
522   resolveTypeIndexReferences(RecordData, Refs, Indices);
523   return true;
524 }
525