1 /*
2     Copyright (C) 2015 Volker Krause <vkrause@kde.org>
3 
4     This program is free software; you can redistribute it and/or modify it
5     under the terms of the GNU Library General Public License as published by
6     the Free Software Foundation; either version 2 of the License, or (at your
7     option) any later version.
8 
9     This program is distributed in the hope that it will be useful, but WITHOUT
10     ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11     FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
12     License for more details.
13 
14     You should have received a copy of the GNU General Public License
15     along with this program.  If not, see <https://www.gnu.org/licenses/>.
16 */
17 
18 #include "structurepackingcheck.h"
19 
20 #include <elf/elffileset.h>
21 #include <dwarf/dwarfinfo.h>
22 #include <dwarf/dwarfdie.h>
23 #include <dwarf/dwarfcudie.h>
24 #include <dwarf/dwarfexpression.h>
25 
26 #include <QBitArray>
27 #include <QDebug>
28 #include <QString>
29 #include <QTextStream>
30 
31 #include <dwarf.h>
32 
33 #include <cassert>
34 #include <iostream>
35 
setElfFileSet(ElfFileSet * fileSet)36 void StructurePackingCheck::setElfFileSet(ElfFileSet* fileSet)
37 {
38     m_fileSet = fileSet;
39 }
40 
checkAll(DwarfInfo * info)41 void StructurePackingCheck::checkAll(DwarfInfo* info)
42 {
43     assert(m_fileSet);
44     if (!info)
45         return;
46 
47     foreach (auto die, info->compilationUnits())
48         checkDie(die);
49 }
50 
printSummary(int structSize,int usedBytes,int usedBits,int optimalSize)51 static QString printSummary(int structSize, int usedBytes, int usedBits, int optimalSize)
52 {
53     QString s;
54     s += QLatin1String("Used bytes: ") + QString::number(usedBytes) + QLatin1Char('/') + QString::number(structSize);
55     s += " (" + QString::number(double(usedBytes*100) / double(structSize), 'g', 4) + "%)\n";
56     s += QLatin1String("Used bits: ") + QString::number(usedBits) + QLatin1Char('/') + QString::number(structSize*8);
57     s += " (" + QString::number(double(usedBits*100) / double(structSize*8), 'g', 4) + "%)\n";
58     if (optimalSize < structSize) {
59         s += "Optimal size: " + QString::number(optimalSize) + " bytes (";
60         const int saving = structSize - optimalSize;
61         s += QString::number(-saving) + " bytes, " + QString::number(double(saving*100) / double(structSize), 'g', 4) + "%)\n";
62     }
63     return s;
64 }
65 
dataMemberLocation(DwarfDie * die)66 static int dataMemberLocation(DwarfDie *die)
67 {
68     const auto attr = die->attribute(DW_AT_data_member_location);
69     if (attr.canConvert(QVariant::Int))
70         return attr.toInt();
71     qWarning() << "Cannot convert location of" << die->displayName() << ":" << attr.value<DwarfExpression>().displayString();
72     return 0;
73 }
74 
compareMemberDiesByLocation(DwarfDie * lhs,DwarfDie * rhs)75 static bool compareMemberDiesByLocation(DwarfDie *lhs, DwarfDie *rhs)
76 {
77     const auto lhsLoc = dataMemberLocation(lhs);
78     const auto rhsLoc = dataMemberLocation(rhs);
79     if (lhsLoc == rhsLoc) {
80         return lhs->attribute(DW_AT_bit_offset) > rhs->attribute(DW_AT_bit_offset);
81     }
82     return lhsLoc < rhsLoc;
83 }
84 
checkOneStructure(DwarfDie * structDie) const85 QString StructurePackingCheck::checkOneStructure(DwarfDie* structDie) const
86 {
87     assert(structDie->tag() == DW_TAG_class_type || structDie->tag() == DW_TAG_structure_type);
88 
89     QVector<DwarfDie*> members;
90     foreach (auto child, structDie->children()) {
91         if (child->tag() == DW_TAG_member && !child->isStaticMember())
92             members.push_back(child);
93         else if (child->tag() == DW_TAG_inheritance)
94             members.push_back(child);
95     }
96     std::sort(members.begin(), members.end(), compareMemberDiesByLocation);
97 
98     const int structSize = structDie->typeSize();
99     int usedBytes;
100     int usedBits;
101     std::tie(usedBytes, usedBits) = computeStructureMemoryUsage(structDie, members);
102     const int optimalSize = optimalStructureSize(structDie, members);
103 
104     QString s = printSummary(structSize, usedBytes, usedBits, optimalSize);
105     s += '\n';
106     s += printStructure(structDie, members);
107     return s;
108 }
109 
checkDie(DwarfDie * die)110 void StructurePackingCheck::checkDie(DwarfDie* die)
111 {
112     if (die->tag() == DW_TAG_structure_type || die->tag() == DW_TAG_class_type) {
113         QVector<DwarfDie*> members;
114         foreach (auto child, die->children()) {
115             if (child->tag() == DW_TAG_member && !child->isStaticMember())
116                 members.push_back(child);
117             else if (child->tag() == DW_TAG_inheritance)
118                 members.push_back(child);
119             else
120                 checkDie(child);
121         }
122         std::sort(members.begin(), members.end(), compareMemberDiesByLocation);
123 
124         const int structSize = die->typeSize();
125         if (structSize <= 0)
126             return;
127 
128         int usedBytes;
129         int usedBits;
130         std::tie(usedBytes, usedBits) = computeStructureMemoryUsage(die, members);
131         const int optimalSize = optimalStructureSize(die, members);
132 
133         if ((usedBytes != structSize || usedBits != structSize * 8) && optimalSize != structSize) {
134             const QString loc = die->sourceLocation();
135             if (m_duplicateCheck.contains(loc))
136                 return;
137             std::cout << printSummary(structSize, usedBytes, usedBits, optimalSize).toLocal8Bit().constData();
138             std::cout << printStructure(die, members).toLocal8Bit().constData();
139             std::cout << std::endl;
140             m_duplicateCheck.insert(loc);
141         }
142 
143     } else {
144         foreach (auto child, die->children())
145             checkDie(child);
146     }
147 }
148 
countBytes(const QBitArray & bits)149 static int countBytes(const QBitArray &bits)
150 {
151     int count = 0;
152     for (int byteIndex = 0; byteIndex < bits.size() / 8; ++byteIndex) {
153         for (int bitIndex = 0; bitIndex < 8; ++bitIndex) {
154             if (bits[byteIndex * 8 + bitIndex]) {
155                 ++count;
156                 break;
157             }
158         }
159     }
160     return count;
161 }
162 
countBits(const QBitArray & bits)163 static int countBits(const QBitArray &bits)
164 {
165     int count = 0;
166     for (int i = 0; i < bits.size(); ++i) {
167         if (bits[i])
168             ++count;
169     }
170     return count;
171 }
172 
bitsForEnum(DwarfDie * die)173 static int bitsForEnum(DwarfDie *die)
174 {
175     assert(die->tag() == DW_TAG_enumeration_type);
176 
177     // approach 1: count all covered bits
178     QBitArray bits(die->typeSize() * 8, false);
179     // approach 2: count number of enum values
180     int enumCount = 0;
181 
182     foreach (auto child, die->children()) {
183         if (child->tag() != DW_TAG_enumerator)
184             continue;
185         ++enumCount;
186         const auto enumValue = child->attribute(DW_AT_const_value).toInt();
187         for (int i = 0; i < bits.size(); ++i) {
188             if ((1 << i) & enumValue)
189                 bits[i] = true;
190         }
191     }
192     if (enumCount == 0) {
193         return die->typeSize() * 8; // incomplete information or something went wrong here...
194     }
195 
196     // minimum of the above is our best guess
197     return std::min(enumCount - 1, countBits(bits));
198 }
199 
actualTypeSize(DwarfDie * die)200 static int actualTypeSize(DwarfDie *die)
201 {
202     switch (die->tag()) {
203         case DW_TAG_base_type:
204             if (die->name() == "bool")
205                 return 1;
206             return die->typeSize() * 8;
207         case DW_TAG_enumeration_type:
208             return bitsForEnum(die);
209         case DW_TAG_pointer_type:
210             return die->typeSize() * 8; // TODO pointer alignment can save a few bits
211         case DW_TAG_const_type:
212         case DW_TAG_restrict_type:
213         case DW_TAG_typedef:
214         case DW_TAG_volatile_type:
215         {
216             const auto typeDie = die->attribute(DW_AT_type).value<DwarfDie*>();
217             assert(typeDie);
218             return actualTypeSize(typeDie);
219         }
220         case DW_TAG_array_type:
221         {
222             return die->typeSize() * 8; // TODO the below is correct, but we need to distribute that over the memory bit array below, otherwise usedBytes is wrong
223 /*            const auto typeDie = die->attribute(DW_AT_type).value<DwarfDie*>();
224             assert(typeDie);
225             return die->typeSize() / typeDie->typeSize() * actualTypeSize(typeDie);*/
226         }
227     }
228     return die->typeSize() * 8;
229 }
230 
computeStructureMemoryUsage(DwarfDie * structDie,const QVector<DwarfDie * > & memberDies) const231 std::tuple<int, int> StructurePackingCheck::computeStructureMemoryUsage(DwarfDie* structDie, const QVector< DwarfDie* >& memberDies) const
232 {
233     const auto structSize = structDie->typeSize();
234     if (structSize <= 0)
235         return {};
236 
237     assert(structSize > 0);
238     QBitArray memUsage(structSize * 8);
239 
240     for (DwarfDie *memberDie : memberDies) {
241         const auto memberTypeDie = findTypeDefinition(memberDie->attribute(DW_AT_type).value<DwarfDie*>());
242         assert(memberTypeDie);
243 
244         const auto memberLocation = dataMemberLocation(memberDie);
245         const auto bitSize = memberDie->attribute(DW_AT_bit_size).toInt();
246         const auto bitOffset = memberDie->attribute(DW_AT_bit_offset).toInt();
247 
248         if (bitSize <= 0) {
249             assert((structSize * 8) >= (memberLocation * 8 + memberTypeDie->typeSize() * 8));
250             memUsage.fill(true, memberLocation * 8, memberLocation * 8 + actualTypeSize(memberTypeDie));
251         } else {
252             assert((structSize * 8) >= (memberLocation * 8 + bitOffset + bitSize));
253             memUsage.fill(true, memberLocation * 8 + bitOffset, memberLocation * 8 + bitOffset + bitSize);
254         }
255     }
256 
257     const auto usedBytes = countBytes(memUsage);
258     const auto usedBits = countBits(memUsage);
259     return std::make_tuple(usedBytes, usedBits);
260 }
261 
hasUnknownSize(DwarfDie * typeDie)262 static bool hasUnknownSize(DwarfDie *typeDie)
263 {
264     // 0-size elements can exist, see e.g. __flexarr in inotify.h
265     return typeDie->typeSize() == 0 && (typeDie->tag() == DW_TAG_class_type || typeDie->tag() == DW_TAG_structure_type);
266 }
267 
printStructure(DwarfDie * structDie,const QVector<DwarfDie * > & memberDies) const268 QString StructurePackingCheck::printStructure(DwarfDie* structDie, const QVector<DwarfDie*>& memberDies) const
269 {
270     QString str;
271     QTextStream s(&str);
272 
273     s << (structDie->tag() == DW_TAG_class_type ? "class " : "struct ");
274     s << structDie->fullyQualifiedName();
275     s << " // location: " << structDie->sourceLocation();
276     s << "\n{\n";
277 
278     bool skipPadding = false; // TODO this should not be needed, look outside of the current CU for the real DIE?
279     int nextMemberLocation = 0;
280     for (DwarfDie *memberDie : memberDies) {
281         s << "    ";
282 
283         if (memberDie->tag() == DW_TAG_inheritance)
284             s << "inherits ";
285 
286         DwarfDie *unresolvedTypeDie = memberDie->attribute(DW_AT_type).value<DwarfDie*>();
287         const auto memberTypeDie = findTypeDefinition(unresolvedTypeDie);
288         assert(memberTypeDie);
289 
290         const auto memberLocation = dataMemberLocation(memberDie);
291         if (memberLocation > nextMemberLocation && !skipPadding) {
292             s << "// " << (memberLocation - nextMemberLocation) << " byte(s) padding\n";
293             s << "    ";
294         }
295 
296         // we use the unresolved DIE here to have the user-visible type name, e.g. of a typedef
297         s << unresolvedTypeDie->fullyQualifiedName();
298         s << " ";
299         s << memberDie->name();
300 
301         const auto bitSize = memberDie->attribute(DW_AT_bit_size).toInt();
302         if (bitSize > 0) {
303             s << ':' << bitSize;
304         }
305 
306         s << "; // member offset: " << memberLocation;
307 
308         if (hasUnknownSize(memberTypeDie)) {
309             s << ", unknown size";
310             skipPadding = true;
311         } else {
312             s << ", size: " << memberTypeDie->typeSize();
313             skipPadding = false;
314 
315             const auto actualSize = actualTypeSize(memberTypeDie);
316             if (actualSize != memberTypeDie->typeSize() * 8)
317                 s << " (needed: " << actualSize << " bits)";
318         }
319         s << ", alignment: " << memberTypeDie->typeAlignment();
320 
321         if (bitSize > 0) {
322             const auto bitOffset = memberDie->attribute(DW_AT_bit_offset).toInt();
323             s << ", bit offset: " << bitOffset;
324         }
325 
326         s << "\n";
327 
328         nextMemberLocation = memberLocation + memberTypeDie->typeSize();
329     }
330 
331     if (nextMemberLocation < structDie->typeSize() && !skipPadding)
332         s << "    // " << (structDie->typeSize() - nextMemberLocation) << " byte(s) padding\n";
333 
334     s << "}; // size: " << structDie->typeSize();
335     s << ", alignment: " << structDie->typeAlignment();
336     s << "\n";
337     return str;
338 }
339 
isEmptyBaseClass(DwarfDie * inheritanceDie)340 static bool isEmptyBaseClass(DwarfDie* inheritanceDie)
341 {
342     assert(inheritanceDie->tag() == DW_TAG_inheritance);
343     const auto baseTypeDie = inheritanceDie->attribute(DW_AT_type).value<DwarfDie*>();
344     if (baseTypeDie->typeSize() != 1)
345         return false;
346 
347     foreach (auto d, baseTypeDie->children()) {
348         if (d->tag() == DW_TAG_member)
349             return false;
350         if (d->tag() != DW_TAG_inheritance)
351             continue;
352         if (!isEmptyBaseClass(d))
353             return false;
354     }
355     return true;
356 }
357 
optimalStructureSize(DwarfDie * structDie,const QVector<DwarfDie * > & memberDies) const358 int StructurePackingCheck::optimalStructureSize(DwarfDie* structDie, const QVector< DwarfDie* >& memberDies) const
359 {
360     int size = 0;
361     int alignment = 1;
362     QVector<int> sizes;
363 
364     // TODO: lots of stuff missing to compute optimal bit field layout
365     int prevMemberLocation = -1;
366     bool guessSize = false; // TODO see above, this probably needs better lookup for external types
367     for (DwarfDie* memberDie : memberDies) {
368         // consider empty base class optimization, they don't count, unless we are entirely empty, which is handled at the very end
369         if (memberDie->tag() == DW_TAG_inheritance && isEmptyBaseClass(memberDie))
370             continue;
371 
372         if (prevMemberLocation == dataMemberLocation(memberDie))
373             continue; // skip bit fields for now
374 
375         const auto memberTypeDie = findTypeDefinition(memberDie->attribute(DW_AT_type).value<DwarfDie*>());
376         assert(memberTypeDie);
377 
378         const auto memberLocation = dataMemberLocation(memberDie);
379         if (guessSize)
380             sizes.push_back(memberLocation - prevMemberLocation);
381 
382         guessSize = hasUnknownSize(memberTypeDie);
383         sizes.push_back(memberTypeDie->typeSize());
384         alignment = std::max(alignment, memberTypeDie->typeAlignment());
385 
386         prevMemberLocation = memberLocation;
387     }
388 
389     if (guessSize)
390         sizes.push_back(structDie->typeSize() - prevMemberLocation);
391 
392     // TODO: sort by alignment and add padding
393     foreach (const auto s, sizes)
394         size += s;
395 
396     // align the entire struct to maximum member alignment
397     if (size % alignment)
398         size += alignment - (size % alignment);
399 
400     // structs are always at least 1 byte
401     return std::max(1, size);
402 }
403 
findTypeDefinitionRecursive(DwarfDie * die,const QVector<QByteArray> & fullId)404 static DwarfDie* findTypeDefinitionRecursive(DwarfDie *die, const QVector<QByteArray> &fullId)
405 {
406     // TODO filter to namespace/class/struct tags?
407     if (die->name() != fullId.first())
408         return nullptr;
409     if (fullId.size() == 1)
410         return die;
411 
412     QVector<QByteArray> partialId = fullId;
413     partialId.pop_front();
414     foreach (auto child, die->children()) {
415         DwarfDie *found = findTypeDefinitionRecursive(child, partialId);
416         if (found)
417             return found;
418     }
419     return nullptr;
420 }
421 
findTypeDefinition(DwarfDie * typeDie) const422 DwarfDie* StructurePackingCheck::findTypeDefinition(DwarfDie* typeDie) const
423 {
424     // recurse into typedefs
425     if (typeDie->tag() == DW_TAG_typedef)
426         return findTypeDefinition(typeDie->attribute(DW_AT_type).value<DwarfDie*>());
427 
428     if (!hasUnknownSize(typeDie))
429         return typeDie;
430 
431     // determine the full identifier of the type
432     QVector<QByteArray> fullId;
433     DwarfDie *parentDie = typeDie;
434     do {
435         fullId.prepend(parentDie->name());
436         parentDie = parentDie->parentDie();
437     } while (parentDie && parentDie->tag() != DW_TAG_compile_unit);
438 
439     // sequential search in all CUs for a DIE with the same full id containing the full definition
440     for (int i = 0; i < m_fileSet->size(); ++i) {
441         const auto file = m_fileSet->file(i);
442         if (!file->dwarfInfo())
443             continue;
444 
445         foreach (auto cuDie, file->dwarfInfo()->compilationUnits()) {
446             foreach (auto topDie, cuDie->children()) {
447                 DwarfDie *die = findTypeDefinitionRecursive(topDie, fullId);
448                 if (die && die->typeSize() > 0) {
449                     //qDebug() << "replacing" << typeDie->displayName() << "with" << die->displayName();
450                     return die;
451                 }
452             }
453         }
454     }
455 
456     // no luck
457     qDebug() << "didn't fine a full definition for" << typeDie->displayName();
458     return typeDie;
459 }
460