1 /*
2 * Copyright 2019 WebAssembly Community Group participants
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "wasm-debug.h"
18 #include "wasm.h"
19
20 #ifdef BUILD_LLVM_DWARF
21 #include "llvm/ObjectYAML/DWARFEmitter.h"
22 #include "llvm/ObjectYAML/DWARFYAML.h"
23 #include "llvm/include/llvm/DebugInfo/DWARFContext.h"
24
25 std::error_code dwarf2yaml(llvm::DWARFContext& DCtx, llvm::DWARFYAML::Data& Y);
26 #endif
27
28 #include "wasm-binary.h"
29 #include "wasm-debug.h"
30 #include "wasm.h"
31
32 namespace wasm {
33
34 namespace Debug {
35
isDWARFSection(Name name)36 bool isDWARFSection(Name name) { return name.startsWith(".debug_"); }
37
hasDWARFSections(const Module & wasm)38 bool hasDWARFSections(const Module& wasm) {
39 for (auto& section : wasm.userSections) {
40 if (isDWARFSection(section.name)) {
41 return true;
42 }
43 }
44 return false;
45 }
46
47 #ifdef BUILD_LLVM_DWARF
48
49 // In wasm32 the address size is 32 bits.
50 static const size_t AddressSize = 4;
51
52 struct BinaryenDWARFInfo {
53 llvm::StringMap<std::unique_ptr<llvm::MemoryBuffer>> sections;
54 std::unique_ptr<llvm::DWARFContext> context;
55
BinaryenDWARFInfowasm::Debug::BinaryenDWARFInfo56 BinaryenDWARFInfo(const Module& wasm) {
57 // Get debug sections from the wasm.
58 for (auto& section : wasm.userSections) {
59 if (Name(section.name).startsWith(".debug_") && section.data.data()) {
60 // TODO: efficiency
61 sections[section.name.substr(1)] = llvm::MemoryBuffer::getMemBufferCopy(
62 llvm::StringRef(section.data.data(), section.data.size()));
63 }
64 }
65 // Parse debug sections.
66 uint8_t addrSize = AddressSize;
67 bool isLittleEndian = true;
68 context = llvm::DWARFContext::create(sections, addrSize, isLittleEndian);
69 }
70 };
71
dumpDWARF(const Module & wasm)72 void dumpDWARF(const Module& wasm) {
73 BinaryenDWARFInfo info(wasm);
74 std::cout << "DWARF debug info\n";
75 std::cout << "================\n\n";
76 for (auto& section : wasm.userSections) {
77 if (Name(section.name).startsWith(".debug_")) {
78 std::cout << "Contains section " << section.name << " ("
79 << section.data.size() << " bytes)\n";
80 }
81 }
82 llvm::DIDumpOptions options;
83 options.DumpType = llvm::DIDT_All;
84 options.ShowChildren = true;
85 options.Verbose = true;
86 info.context->dump(llvm::outs(), options);
87 }
88
89 //
90 // Big picture: We use a DWARFContext to read data, then DWARFYAML support
91 // code to write it. That is not the main LLVM Dwarf code used for writing
92 // object files, but it avoids us create a "fake" MC layer, and provides a
93 // simple way to write out the debug info. Likely the level of info represented
94 // in the DWARFYAML::Data object is sufficient for Binaryen's needs, but if not,
95 // we may need a different approach.
96 //
97 // In more detail:
98 //
99 // 1. Binary sections => DWARFContext:
100 //
101 // llvm::DWARFContext::create(sections..)
102 //
103 // 2. DWARFContext => DWARFYAML::Data
104 //
105 // std::error_code dwarf2yaml(DWARFContext &DCtx, DWARFYAML::Data &Y) {
106 //
107 // 3. DWARFYAML::Data => binary sections
108 //
109 // StringMap<std::unique_ptr<MemoryBuffer>>
110 // EmitDebugSections(llvm::DWARFYAML::Data &DI, bool ApplyFixups);
111 //
112
113 // Represents the state when parsing a line table.
114 struct LineState {
115 uint32_t addr = 0;
116 // TODO sectionIndex?
117 uint32_t line = 1;
118 uint32_t col = 0;
119 uint32_t file = 1;
120 uint32_t isa = 0;
121 uint32_t discriminator = 0;
122 bool isStmt;
123 bool basicBlock = false;
124 bool prologueEnd = false;
125 bool epilogueBegin = false;
126 // Each instruction is part of a sequence, all of which get the same ID. The
127 // order within a sequence may change if binaryen reorders things, which means
128 // that we can't track the end_sequence location and assume it is at the end -
129 // we must track sequences and then emit an end for each one.
130 // -1 is an invalid marker value (note that this assumes we can fit all ids
131 // into just under 32 bits).
132 uint32_t sequenceId = -1;
133
134 LineState(const LineState& other) = default;
LineStatewasm::Debug::LineState135 LineState(const llvm::DWARFYAML::LineTable& table, uint32_t sequenceId)
136 : isStmt(table.DefaultIsStmt), sequenceId(sequenceId) {}
137
138 LineState& operator=(const LineState& other) = default;
139
140 // Updates the state, and returns whether a new row is ready to be emitted.
updatewasm::Debug::LineState141 bool update(llvm::DWARFYAML::LineTableOpcode& opcode,
142 const llvm::DWARFYAML::LineTable& table) {
143 switch (opcode.Opcode) {
144 case 0: {
145 // Extended opcodes
146 switch (opcode.SubOpcode) {
147 case llvm::dwarf::DW_LNE_set_address: {
148 addr = opcode.Data;
149 break;
150 }
151 case llvm::dwarf::DW_LNE_end_sequence: {
152 return true;
153 }
154 case llvm::dwarf::DW_LNE_set_discriminator: {
155 discriminator = opcode.Data;
156 break;
157 }
158 case llvm::dwarf::DW_LNE_define_file: {
159 Fatal() << "TODO: DW_LNE_define_file";
160 }
161 default: {
162 // An unknown opcode, ignore.
163 std::cerr << "warning: unknown subopcopde " << opcode.SubOpcode
164 << '\n';
165 }
166 }
167 break;
168 }
169 case llvm::dwarf::DW_LNS_set_column: {
170 col = opcode.Data;
171 break;
172 }
173 case llvm::dwarf::DW_LNS_set_prologue_end: {
174 prologueEnd = true;
175 break;
176 }
177 case llvm::dwarf::DW_LNS_copy: {
178 return true;
179 }
180 case llvm::dwarf::DW_LNS_advance_pc: {
181 assert(table.MinInstLength == 1);
182 addr += opcode.Data;
183 break;
184 }
185 case llvm::dwarf::DW_LNS_advance_line: {
186 line += opcode.SData;
187 break;
188 }
189 case llvm::dwarf::DW_LNS_set_file: {
190 file = opcode.Data;
191 break;
192 }
193 case llvm::dwarf::DW_LNS_negate_stmt: {
194 isStmt = !isStmt;
195 break;
196 }
197 case llvm::dwarf::DW_LNS_set_basic_block: {
198 basicBlock = true;
199 break;
200 }
201 case llvm::dwarf::DW_LNS_const_add_pc: {
202 uint8_t AdjustOpcode = 255 - table.OpcodeBase;
203 uint64_t AddrOffset =
204 (AdjustOpcode / table.LineRange) * table.MinInstLength;
205 addr += AddrOffset;
206 break;
207 }
208 case llvm::dwarf::DW_LNS_fixed_advance_pc: {
209 addr += opcode.Data;
210 break;
211 }
212 case llvm::dwarf::DW_LNS_set_isa: {
213 isa = opcode.Data;
214 break;
215 }
216 default: {
217 if (opcode.Opcode >= table.OpcodeBase) {
218 // Special opcode: adjust line and addr, using some math.
219 uint8_t AdjustOpcode =
220 opcode.Opcode - table.OpcodeBase; // 20 - 13 = 7
221 uint64_t AddrOffset = (AdjustOpcode / table.LineRange) *
222 table.MinInstLength; // (7 / 14) * 1 = 0
223 int32_t LineOffset =
224 table.LineBase +
225 (AdjustOpcode % table.LineRange); // -5 + (7 % 14) = 2
226 line += LineOffset;
227 addr += AddrOffset;
228 return true;
229 } else {
230 Fatal() << "unknown debug line opcode: " << std::hex << opcode.Opcode;
231 }
232 }
233 }
234 return false;
235 }
236
237 // Checks if this starts a new range of addresses. Each range is a set of
238 // related addresses, where in particular, if the first has been zeroed out
239 // by the linker, we must omit the entire range. (If we do not, then the
240 // initial range is 0 and the others are offsets relative to it, which will
241 // look like random addresses, perhaps into the middle of instructions, and
242 // perhaps that happen to collide with real ones.)
startsNewRangewasm::Debug::LineState243 bool startsNewRange(llvm::DWARFYAML::LineTableOpcode& opcode) {
244 return opcode.Opcode == 0 &&
245 opcode.SubOpcode == llvm::dwarf::DW_LNE_set_address;
246 }
247
needToEmitwasm::Debug::LineState248 bool needToEmit() {
249 // Zero values imply we can ignore this line.
250 // https://github.com/WebAssembly/debugging/issues/9#issuecomment-567720872
251 return line != 0 && addr != 0;
252 }
253
254 // Given an old state, emit the diff from it to this state into a new line
255 // table entry (that will be emitted in the updated DWARF debug line section).
emitDiffwasm::Debug::LineState256 void emitDiff(const LineState& old,
257 std::vector<llvm::DWARFYAML::LineTableOpcode>& newOpcodes,
258 const llvm::DWARFYAML::LineTable& table,
259 bool endSequence) const {
260 bool useSpecial = false;
261 if (addr != old.addr || line != old.line) {
262 // Try to use a special opcode TODO
263 }
264 if (addr != old.addr && !useSpecial) {
265 // len = 1 (subopcode) + 4 (wasm32 address)
266 // FIXME: look at AddrSize on the Unit.
267 auto item = makeItem(llvm::dwarf::DW_LNE_set_address, 5);
268 item.Data = addr;
269 newOpcodes.push_back(item);
270 }
271 if (line != old.line && !useSpecial) {
272 auto item = makeItem(llvm::dwarf::DW_LNS_advance_line);
273 // In wasm32 we have 32-bit addresses, and the delta here might be
274 // negative (note that SData is 64-bit, as LLVM supports 64-bit
275 // addresses too).
276 item.SData = int32_t(line - old.line);
277 newOpcodes.push_back(item);
278 }
279 if (col != old.col) {
280 auto item = makeItem(llvm::dwarf::DW_LNS_set_column);
281 item.Data = col;
282 newOpcodes.push_back(item);
283 }
284 if (file != old.file) {
285 auto item = makeItem(llvm::dwarf::DW_LNS_set_file);
286 item.Data = file;
287 newOpcodes.push_back(item);
288 }
289 if (isa != old.isa) {
290 auto item = makeItem(llvm::dwarf::DW_LNS_set_isa);
291 item.Data = isa;
292 newOpcodes.push_back(item);
293 }
294 if (discriminator != old.discriminator) {
295 // len = 1 (subopcode) + 4 (wasm32 address)
296 auto item = makeItem(llvm::dwarf::DW_LNE_set_discriminator, 5);
297 item.Data = discriminator;
298 newOpcodes.push_back(item);
299 }
300 if (isStmt != old.isStmt) {
301 newOpcodes.push_back(makeItem(llvm::dwarf::DW_LNS_negate_stmt));
302 }
303 if (basicBlock != old.basicBlock) {
304 assert(basicBlock);
305 newOpcodes.push_back(makeItem(llvm::dwarf::DW_LNS_set_basic_block));
306 }
307 if (prologueEnd) {
308 newOpcodes.push_back(makeItem(llvm::dwarf::DW_LNS_set_prologue_end));
309 }
310 if (epilogueBegin != old.epilogueBegin) {
311 Fatal() << "eb";
312 }
313 if (useSpecial) {
314 // Emit a special, which emits a line automatically.
315 // TODO
316 } else {
317 // Emit the line manually.
318 if (endSequence) {
319 // len = 1 (subopcode)
320 newOpcodes.push_back(makeItem(llvm::dwarf::DW_LNE_end_sequence, 1));
321 } else {
322 newOpcodes.push_back(makeItem(llvm::dwarf::DW_LNS_copy));
323 }
324 }
325 }
326
327 // Some flags are automatically reset after each debug line.
resetAfterLinewasm::Debug::LineState328 void resetAfterLine() { prologueEnd = false; }
329
330 private:
331 llvm::DWARFYAML::LineTableOpcode
makeItemwasm::Debug::LineState332 makeItem(llvm::dwarf::LineNumberOps opcode) const {
333 llvm::DWARFYAML::LineTableOpcode item = {};
334 item.Opcode = opcode;
335 return item;
336 }
337
338 llvm::DWARFYAML::LineTableOpcode
makeItemwasm::Debug::LineState339 makeItem(llvm::dwarf::LineNumberExtendedOps opcode, uint64_t len) const {
340 auto item = makeItem(llvm::dwarf::LineNumberOps(0));
341 // All the length after the len field itself, including the subopcode
342 // (1 byte).
343 item.ExtLen = len;
344 item.SubOpcode = opcode;
345 return item;
346 }
347 };
348
349 // Represents a mapping of addresses to expressions. We track beginnings and
350 // endings of expressions separately, since the end of one (which is one past
351 // the end in DWARF notation) overlaps with the beginning of the next, and also
352 // to let us use contextual information (we may know we are looking up the end
353 // of an instruction).
354 struct AddrExprMap {
355 std::unordered_map<BinaryLocation, Expression*> startMap;
356 std::unordered_map<BinaryLocation, Expression*> endMap;
357
358 // Some instructions have delimiter binary locations, like the else and end in
359 // and if. Track those separately, including their expression and their id
360 // ("else", "end", etc.), as they are rare, and we don't want to
361 // bloat the common case which is represented in the earlier maps.
362 struct DelimiterInfo {
363 Expression* expr;
364 BinaryLocations::DelimiterId id;
365 };
366 std::unordered_map<BinaryLocation, DelimiterInfo> delimiterMap;
367
368 // Construct the map from the binaryLocations loaded from the wasm.
AddrExprMapwasm::Debug::AddrExprMap369 AddrExprMap(const Module& wasm) {
370 for (auto& func : wasm.functions) {
371 for (auto pair : func->expressionLocations) {
372 add(pair.first, pair.second);
373 }
374 for (auto pair : func->delimiterLocations) {
375 add(pair.first, pair.second);
376 }
377 }
378 }
379
getStartwasm::Debug::AddrExprMap380 Expression* getStart(BinaryLocation addr) const {
381 auto iter = startMap.find(addr);
382 if (iter != startMap.end()) {
383 return iter->second;
384 }
385 return nullptr;
386 }
387
getEndwasm::Debug::AddrExprMap388 Expression* getEnd(BinaryLocation addr) const {
389 auto iter = endMap.find(addr);
390 if (iter != endMap.end()) {
391 return iter->second;
392 }
393 return nullptr;
394 }
395
getDelimiterwasm::Debug::AddrExprMap396 DelimiterInfo getDelimiter(BinaryLocation addr) const {
397 auto iter = delimiterMap.find(addr);
398 if (iter != delimiterMap.end()) {
399 return iter->second;
400 }
401 return DelimiterInfo{nullptr, BinaryLocations::Invalid};
402 }
403
404 private:
addwasm::Debug::AddrExprMap405 void add(Expression* expr, const BinaryLocations::Span span) {
406 assert(startMap.count(span.start) == 0);
407 startMap[span.start] = expr;
408 assert(endMap.count(span.end) == 0);
409 endMap[span.end] = expr;
410 }
411
addwasm::Debug::AddrExprMap412 void add(Expression* expr,
413 const BinaryLocations::DelimiterLocations& delimiter) {
414 for (Index i = 0; i < delimiter.size(); i++) {
415 if (delimiter[i] != 0) {
416 assert(delimiterMap.count(delimiter[i]) == 0);
417 delimiterMap[delimiter[i]] =
418 DelimiterInfo{expr, BinaryLocations::DelimiterId(i)};
419 }
420 }
421 }
422 };
423
424 // Represents a mapping of addresses to expressions. As with expressions, we
425 // track both start and end; here, however, "start" means the "start" and
426 // "declarations" fields in FunctionLocations, and "end" means the two locations
427 // of one past the end, and one before it which is the "end" opcode that is
428 // emitted.
429 struct FuncAddrMap {
430 std::unordered_map<BinaryLocation, Function*> startMap, endMap;
431
432 // Construct the map from the binaryLocations loaded from the wasm.
FuncAddrMapwasm::Debug::FuncAddrMap433 FuncAddrMap(const Module& wasm) {
434 for (auto& func : wasm.functions) {
435 startMap[func->funcLocation.start] = func.get();
436 startMap[func->funcLocation.declarations] = func.get();
437 endMap[func->funcLocation.end - 1] = func.get();
438 endMap[func->funcLocation.end] = func.get();
439 }
440 }
441
getStartwasm::Debug::FuncAddrMap442 Function* getStart(BinaryLocation addr) const {
443 auto iter = startMap.find(addr);
444 if (iter != startMap.end()) {
445 return iter->second;
446 }
447 return nullptr;
448 }
449
getEndwasm::Debug::FuncAddrMap450 Function* getEnd(BinaryLocation addr) const {
451 auto iter = endMap.find(addr);
452 if (iter != endMap.end()) {
453 return iter->second;
454 }
455 return nullptr;
456 }
457 };
458
459 // Track locations from the original binary and the new one we wrote, so that
460 // we can update debug positions.
461 // We track expressions and functions separately, instead of having a single
462 // big map of (oldAddr) => (newAddr) because of the potentially ambiguous case
463 // of the final expression in a function: it's end might be identical in offset
464 // to the end of the function. So we have two different things that map to the
465 // same offset. However, if the context is "the end of the function" then the
466 // updated address is the new end of the function, even if the function ends
467 // with a different instruction now, as the old last instruction might have
468 // moved or been optimized out.
469 struct LocationUpdater {
470 Module& wasm;
471 const BinaryLocations& newLocations;
472
473 AddrExprMap oldExprAddrMap;
474 FuncAddrMap oldFuncAddrMap;
475
476 // Map offsets of location list entries in the debug_loc section to the index
477 // of their compile unit.
478 std::unordered_map<BinaryLocation, size_t> locToUnitMap;
479
480 // Map start of line tables in the debug_line section to their new locations.
481 std::unordered_map<BinaryLocation, BinaryLocation> debugLineMap;
482
483 typedef std::pair<BinaryLocation, BinaryLocation> OldToNew;
484
485 // Map of compile unit index => old and new base offsets (i.e., in the
486 // original binary and in the new one).
487 std::unordered_map<size_t, OldToNew> compileUnitBases;
488
489 // TODO: for memory efficiency, we may want to do this in a streaming manner,
490 // binary to binary, without YAML IR.
491
LocationUpdaterwasm::Debug::LocationUpdater492 LocationUpdater(Module& wasm, const BinaryLocations& newLocations)
493 : wasm(wasm), newLocations(newLocations), oldExprAddrMap(wasm),
494 oldFuncAddrMap(wasm) {}
495
496 // Updates an expression's address. If there was never an instruction at that
497 // address, or if there was but if that instruction no longer exists, return
498 // 0. Otherwise, return the new updated location.
getNewExprStartwasm::Debug::LocationUpdater499 BinaryLocation getNewExprStart(BinaryLocation oldAddr) const {
500 if (auto* expr = oldExprAddrMap.getStart(oldAddr)) {
501 auto iter = newLocations.expressions.find(expr);
502 if (iter != newLocations.expressions.end()) {
503 BinaryLocation newAddr = iter->second.start;
504 return newAddr;
505 }
506 }
507 return 0;
508 }
509
hasOldExprStartwasm::Debug::LocationUpdater510 bool hasOldExprStart(BinaryLocation oldAddr) const {
511 return oldExprAddrMap.getStart(oldAddr);
512 }
513
getNewExprEndwasm::Debug::LocationUpdater514 BinaryLocation getNewExprEnd(BinaryLocation oldAddr) const {
515 if (auto* expr = oldExprAddrMap.getEnd(oldAddr)) {
516 auto iter = newLocations.expressions.find(expr);
517 if (iter != newLocations.expressions.end()) {
518 return iter->second.end;
519 }
520 }
521 return 0;
522 }
523
hasOldExprEndwasm::Debug::LocationUpdater524 bool hasOldExprEnd(BinaryLocation oldAddr) const {
525 return oldExprAddrMap.getEnd(oldAddr);
526 }
527
getNewFuncStartwasm::Debug::LocationUpdater528 BinaryLocation getNewFuncStart(BinaryLocation oldAddr) const {
529 if (auto* func = oldFuncAddrMap.getStart(oldAddr)) {
530 // The function might have been optimized away, check.
531 auto iter = newLocations.functions.find(func);
532 if (iter != newLocations.functions.end()) {
533 auto oldLocations = func->funcLocation;
534 auto newLocations = iter->second;
535 if (oldAddr == oldLocations.start) {
536 return newLocations.start;
537 } else if (oldAddr == oldLocations.declarations) {
538 return newLocations.declarations;
539 } else {
540 WASM_UNREACHABLE("invalid func start");
541 }
542 }
543 }
544 return 0;
545 }
546
hasOldFuncStartwasm::Debug::LocationUpdater547 bool hasOldFuncStart(BinaryLocation oldAddr) const {
548 return oldFuncAddrMap.getStart(oldAddr);
549 }
550
getNewFuncEndwasm::Debug::LocationUpdater551 BinaryLocation getNewFuncEnd(BinaryLocation oldAddr) const {
552 if (auto* func = oldFuncAddrMap.getEnd(oldAddr)) {
553 // The function might have been optimized away, check.
554 auto iter = newLocations.functions.find(func);
555 if (iter != newLocations.functions.end()) {
556 auto oldLocations = func->funcLocation;
557 auto newLocations = iter->second;
558 if (oldAddr == oldLocations.end) {
559 return newLocations.end;
560 } else if (oldAddr == oldLocations.end - 1) {
561 return newLocations.end - 1;
562 } else {
563 WASM_UNREACHABLE("invalid func end");
564 }
565 }
566 }
567 return 0;
568 }
569
570 // Check for either the end opcode, or one past the end.
hasOldFuncEndwasm::Debug::LocationUpdater571 bool hasOldFuncEnd(BinaryLocation oldAddr) const {
572 return oldFuncAddrMap.getEnd(oldAddr);
573 }
574
575 // Check specifically for the end opcode.
hasOldFuncEndOpcodewasm::Debug::LocationUpdater576 bool hasOldFuncEndOpcode(BinaryLocation oldAddr) const {
577 if (auto* func = oldFuncAddrMap.getEnd(oldAddr)) {
578 return oldAddr == func->funcLocation.end - 1;
579 }
580 return false;
581 }
582
getNewDelimiterwasm::Debug::LocationUpdater583 BinaryLocation getNewDelimiter(BinaryLocation oldAddr) const {
584 auto info = oldExprAddrMap.getDelimiter(oldAddr);
585 if (info.expr) {
586 auto iter = newLocations.delimiters.find(info.expr);
587 if (iter != newLocations.delimiters.end()) {
588 return iter->second[info.id];
589 }
590 }
591 return 0;
592 }
593
hasOldDelimiterwasm::Debug::LocationUpdater594 bool hasOldDelimiter(BinaryLocation oldAddr) const {
595 return oldExprAddrMap.getDelimiter(oldAddr).expr;
596 }
597
598 // getNewStart|EndAddr utilities.
599 // TODO: should we track the start and end of delimiters, even though they
600 // are just one byte?
getNewStartwasm::Debug::LocationUpdater601 BinaryLocation getNewStart(BinaryLocation oldStart) const {
602 if (hasOldExprStart(oldStart)) {
603 return getNewExprStart(oldStart);
604 } else if (hasOldFuncStart(oldStart)) {
605 return getNewFuncStart(oldStart);
606 } else if (hasOldDelimiter(oldStart)) {
607 return getNewDelimiter(oldStart);
608 }
609 return 0;
610 }
611
getNewEndwasm::Debug::LocationUpdater612 BinaryLocation getNewEnd(BinaryLocation oldEnd) const {
613 if (hasOldExprEnd(oldEnd)) {
614 return getNewExprEnd(oldEnd);
615 } else if (hasOldFuncEnd(oldEnd)) {
616 return getNewFuncEnd(oldEnd);
617 } else if (hasOldDelimiter(oldEnd)) {
618 return getNewDelimiter(oldEnd);
619 }
620 return 0;
621 }
622
getNewDebugLineLocationwasm::Debug::LocationUpdater623 BinaryLocation getNewDebugLineLocation(BinaryLocation old) const {
624 return debugLineMap.at(old);
625 }
626
627 // Given an offset in .debug_loc, get the old and new compile unit bases.
getCompileUnitBasesForLocwasm::Debug::LocationUpdater628 OldToNew getCompileUnitBasesForLoc(size_t offset) const {
629 auto index = locToUnitMap.at(offset);
630 auto iter = compileUnitBases.find(index);
631 if (iter != compileUnitBases.end()) {
632 return iter->second;
633 }
634 return OldToNew{0, 0};
635 }
636 };
637
638 // Update debug lines, and update the locationUpdater with debug line offset
639 // changes so we can update offsets into the debug line section.
updateDebugLines(llvm::DWARFYAML::Data & data,LocationUpdater & locationUpdater)640 static void updateDebugLines(llvm::DWARFYAML::Data& data,
641 LocationUpdater& locationUpdater) {
642 for (auto& table : data.DebugLines) {
643 uint32_t sequenceId = 0;
644 // Parse the original opcodes and emit new ones.
645 LineState state(table, sequenceId);
646 // All the addresses we need to write out.
647 std::vector<BinaryLocation> newAddrs;
648 std::unordered_map<BinaryLocation, LineState> newAddrInfo;
649 // If the address was zeroed out, we must omit the entire range (we could
650 // also leave it unchanged, so that the debugger ignores it based on the
651 // initial zero; but it's easier and better to just not emit it at all).
652 bool omittingRange = false;
653 for (auto& opcode : table.Opcodes) {
654 // Update the state, and check if we have a new row to emit.
655 if (state.startsNewRange(opcode)) {
656 omittingRange = false;
657 }
658 if (state.update(opcode, table)) {
659 if (state.addr == 0) {
660 omittingRange = true;
661 }
662 if (omittingRange) {
663 state = LineState(table, sequenceId);
664 continue;
665 }
666 // An expression may not exist for this line table item, if we optimized
667 // it away.
668 BinaryLocation oldAddr = state.addr;
669 BinaryLocation newAddr = 0;
670 if (locationUpdater.hasOldExprStart(oldAddr)) {
671 newAddr = locationUpdater.getNewExprStart(oldAddr);
672 }
673 // Test for a function's end address first, as LLVM output appears to
674 // use 1-past-the-end-of-the-function as a location in that function,
675 // and not the next (but the first byte of the next function, which is
676 // ambiguously identical to that value, is used at least in low_pc).
677 else if (locationUpdater.hasOldFuncEnd(oldAddr)) {
678 newAddr = locationUpdater.getNewFuncEnd(oldAddr);
679 } else if (locationUpdater.hasOldFuncStart(oldAddr)) {
680 newAddr = locationUpdater.getNewFuncStart(oldAddr);
681 } else if (locationUpdater.hasOldDelimiter(oldAddr)) {
682 newAddr = locationUpdater.getNewDelimiter(oldAddr);
683 }
684 if (newAddr && state.needToEmit()) {
685 // LLVM sometimes emits the same address more than once. We should
686 // probably investigate that.
687 if (newAddrInfo.count(newAddr)) {
688 continue;
689 }
690 newAddrs.push_back(newAddr);
691 newAddrInfo.emplace(newAddr, state);
692 auto& updatedState = newAddrInfo.at(newAddr);
693 // The only difference is the address TODO other stuff?
694 updatedState.addr = newAddr;
695 // Reset relevant state.
696 state.resetAfterLine();
697 }
698 if (opcode.Opcode == 0 &&
699 opcode.SubOpcode == llvm::dwarf::DW_LNE_end_sequence) {
700 sequenceId++;
701 // We assume the number of sequences can fit in 32 bits, and -1 is
702 // an invalid value.
703 assert(sequenceId != uint32_t(-1));
704 state = LineState(table, sequenceId);
705 }
706 }
707 }
708 // Sort the new addresses (which may be substantially different from the
709 // original layout after optimization).
710 std::sort(newAddrs.begin(), newAddrs.end());
711 // Emit a new line table.
712 {
713 std::vector<llvm::DWARFYAML::LineTableOpcode> newOpcodes;
714 for (size_t i = 0; i < newAddrs.size(); i++) {
715 LineState state = newAddrInfo.at(newAddrs[i]);
716 assert(state.needToEmit());
717 LineState lastState(table, -1);
718 if (i != 0) {
719 lastState = newAddrInfo.at(newAddrs[i - 1]);
720 // If the last line is in another sequence, clear the old state, as
721 // there is nothing to diff to.
722 if (lastState.sequenceId != state.sequenceId) {
723 lastState = LineState(table, -1);
724 }
725 }
726 // This line ends a sequence if there is no next line after it, or if
727 // the next line is in a different sequence.
728 bool endSequence =
729 i + 1 == newAddrs.size() ||
730 newAddrInfo.at(newAddrs[i + 1]).sequenceId != state.sequenceId;
731 state.emitDiff(lastState, newOpcodes, table, endSequence);
732 }
733 table.Opcodes.swap(newOpcodes);
734 }
735 }
736 // After updating the contents, run the emitter in order to update the
737 // lengths of each section. We will use that to update offsets into the
738 // debug_line section.
739 std::vector<size_t> computedLengths;
740 llvm::DWARFYAML::ComputeDebugLine(data, computedLengths);
741 BinaryLocation oldLocation = 0, newLocation = 0;
742 for (size_t i = 0; i < data.DebugLines.size(); i++) {
743 auto& table = data.DebugLines[i];
744 locationUpdater.debugLineMap[oldLocation] = newLocation;
745 oldLocation += table.Length.getLength() + AddressSize;
746 newLocation += computedLengths[i] + AddressSize;
747 table.Length.setLength(computedLengths[i]);
748 }
749 }
750
751 // Iterate in parallel over a DwarfContext representation element and a
752 // YAML element, which parallel each other.
753 template<typename T, typename U, typename W>
iterContextAndYAML(const T & contextList,U & yamlList,W func)754 static void iterContextAndYAML(const T& contextList, U& yamlList, W func) {
755 auto yamlValue = yamlList.begin();
756 for (const auto& contextValue : contextList) {
757 assert(yamlValue != yamlList.end());
758 func(contextValue, *yamlValue);
759 yamlValue++;
760 }
761 assert(yamlValue == yamlList.end());
762 }
763
764 // Updates a YAML entry from a DWARF DIE. Also updates LocationUpdater
765 // associating each .debug_loc entry with the base address of its corresponding
766 // compilation unit.
updateDIE(const llvm::DWARFDebugInfoEntry & DIE,llvm::DWARFYAML::Entry & yamlEntry,const llvm::DWARFAbbreviationDeclaration * abbrevDecl,LocationUpdater & locationUpdater,size_t compileUnitIndex)767 static void updateDIE(const llvm::DWARFDebugInfoEntry& DIE,
768 llvm::DWARFYAML::Entry& yamlEntry,
769 const llvm::DWARFAbbreviationDeclaration* abbrevDecl,
770 LocationUpdater& locationUpdater,
771 size_t compileUnitIndex) {
772 auto tag = DIE.getTag();
773 // Pairs of low/high_pc require some special handling, as the high
774 // may be an offset relative to the low. First, process everything but
775 // the high pcs, so we see the low pcs first.
776 BinaryLocation oldLowPC = 0, newLowPC = 0;
777 iterContextAndYAML(
778 abbrevDecl->attributes(),
779 yamlEntry.Values,
780 [&](const llvm::DWARFAbbreviationDeclaration::AttributeSpec& attrSpec,
781 llvm::DWARFYAML::FormValue& yamlValue) {
782 auto attr = attrSpec.Attr;
783 if (attr == llvm::dwarf::DW_AT_low_pc) {
784 // This is an address.
785 BinaryLocation oldValue = yamlValue.Value, newValue = 0;
786 if (tag == llvm::dwarf::DW_TAG_GNU_call_site ||
787 tag == llvm::dwarf::DW_TAG_inlined_subroutine ||
788 tag == llvm::dwarf::DW_TAG_lexical_block ||
789 tag == llvm::dwarf::DW_TAG_label) {
790 newValue = locationUpdater.getNewStart(oldValue);
791 } else if (tag == llvm::dwarf::DW_TAG_compile_unit) {
792 newValue = locationUpdater.getNewFuncStart(oldValue);
793 // Per the DWARF spec, "The base address of a compile unit is
794 // defined as the value of the DW_AT_low_pc attribute, if present."
795 locationUpdater.compileUnitBases[compileUnitIndex] =
796 LocationUpdater::OldToNew{oldValue, newValue};
797 } else if (tag == llvm::dwarf::DW_TAG_subprogram) {
798 newValue = locationUpdater.getNewFuncStart(oldValue);
799 } else {
800 Fatal() << "unknown tag with low_pc "
801 << llvm::dwarf::TagString(tag).str();
802 }
803 oldLowPC = oldValue;
804 newLowPC = newValue;
805 yamlValue.Value = newValue;
806 } else if (attr == llvm::dwarf::DW_AT_stmt_list) {
807 // This is an offset into the debug line section.
808 yamlValue.Value =
809 locationUpdater.getNewDebugLineLocation(yamlValue.Value);
810 } else if (attr == llvm::dwarf::DW_AT_location &&
811 attrSpec.Form == llvm::dwarf::DW_FORM_sec_offset) {
812 BinaryLocation locOffset = yamlValue.Value;
813 locationUpdater.locToUnitMap[locOffset] = compileUnitIndex;
814 }
815 });
816 // Next, process the high_pcs.
817 // TODO: do this more efficiently, without a second traversal (but that's a
818 // little tricky given the special double-traversal we have).
819 iterContextAndYAML(
820 abbrevDecl->attributes(),
821 yamlEntry.Values,
822 [&](const llvm::DWARFAbbreviationDeclaration::AttributeSpec& attrSpec,
823 llvm::DWARFYAML::FormValue& yamlValue) {
824 auto attr = attrSpec.Attr;
825 if (attr != llvm::dwarf::DW_AT_high_pc) {
826 return;
827 }
828 BinaryLocation oldValue = yamlValue.Value, newValue = 0;
829 bool isRelative = attrSpec.Form == llvm::dwarf::DW_FORM_data4;
830 if (isRelative) {
831 oldValue += oldLowPC;
832 }
833 if (tag == llvm::dwarf::DW_TAG_GNU_call_site ||
834 tag == llvm::dwarf::DW_TAG_inlined_subroutine ||
835 tag == llvm::dwarf::DW_TAG_lexical_block ||
836 tag == llvm::dwarf::DW_TAG_label) {
837 newValue = locationUpdater.getNewExprEnd(oldValue);
838 } else if (tag == llvm::dwarf::DW_TAG_compile_unit ||
839 tag == llvm::dwarf::DW_TAG_subprogram) {
840 newValue = locationUpdater.getNewFuncEnd(oldValue);
841 } else {
842 Fatal() << "unknown tag with low_pc "
843 << llvm::dwarf::TagString(tag).str();
844 }
845 if (isRelative) {
846 newValue -= newLowPC;
847 }
848 yamlValue.Value = newValue;
849 });
850 }
851
updateCompileUnits(const BinaryenDWARFInfo & info,llvm::DWARFYAML::Data & yaml,LocationUpdater & locationUpdater)852 static void updateCompileUnits(const BinaryenDWARFInfo& info,
853 llvm::DWARFYAML::Data& yaml,
854 LocationUpdater& locationUpdater) {
855 // The context has the high-level information we need, and the YAML is where
856 // we write changes. First, iterate over the compile units.
857 size_t compileUnitIndex = 0;
858 iterContextAndYAML(
859 info.context->compile_units(),
860 yaml.CompileUnits,
861 [&](const std::unique_ptr<llvm::DWARFUnit>& CU,
862 llvm::DWARFYAML::Unit& yamlUnit) {
863 // Process the DIEs in each compile unit.
864 iterContextAndYAML(
865 CU->dies(),
866 yamlUnit.Entries,
867 [&](const llvm::DWARFDebugInfoEntry& DIE,
868 llvm::DWARFYAML::Entry& yamlEntry) {
869 // Process the entries in each relevant DIE, looking for attributes to
870 // change.
871 auto abbrevDecl = DIE.getAbbreviationDeclarationPtr();
872 if (abbrevDecl) {
873 // This is relevant; look for things to update.
874 updateDIE(
875 DIE, yamlEntry, abbrevDecl, locationUpdater, compileUnitIndex);
876 }
877 });
878 compileUnitIndex++;
879 });
880 }
881
updateRanges(llvm::DWARFYAML::Data & yaml,const LocationUpdater & locationUpdater)882 static void updateRanges(llvm::DWARFYAML::Data& yaml,
883 const LocationUpdater& locationUpdater) {
884 // In each range section, try to update the start and end. If we no longer
885 // have something to map them to, we must skip that part.
886 size_t skip = 0;
887 for (size_t i = 0; i < yaml.Ranges.size(); i++) {
888 auto& range = yaml.Ranges[i];
889 BinaryLocation oldStart = range.Start, oldEnd = range.End, newStart = 0,
890 newEnd = 0;
891 // If this is an end marker (0, 0), or an invalid range (0, x) or (x, 0)
892 // then just emit it as it is - either to mark the end, or to mark an
893 // invalid entry.
894 if (oldStart == 0 || oldEnd == 0) {
895 newStart = oldStart;
896 newEnd = oldEnd;
897 } else {
898 // This was a valid entry; update it.
899 newStart = locationUpdater.getNewStart(oldStart);
900 newEnd = locationUpdater.getNewEnd(oldEnd);
901 if (newStart == 0 || newEnd == 0) {
902 // This part of the range no longer has a mapping, so we must skip it.
903 // Don't use (0, 0) as that would be an end marker; emit something
904 // invalid for the debugger to ignore.
905 newStart = 0;
906 newEnd = 1;
907 }
908 // TODO even if range start and end markers have been preserved,
909 // instructions in the middle may have moved around, making the range no
910 // longer contiguous. We should check that, and possibly split/merge
911 // the range. Or, we may need to have tracking in the IR for this.
912 }
913 auto& writtenRange = yaml.Ranges[i - skip];
914 writtenRange.Start = newStart;
915 writtenRange.End = newEnd;
916 }
917 }
918
919 // A location that is ignoreable, i.e., not a special value like 0 or -1 (which
920 // would indicate an end or a base in .debug_loc).
921 static const BinaryLocation IGNOREABLE_LOCATION = 1;
922
isNewBaseLoc(const llvm::DWARFYAML::Loc & loc)923 static bool isNewBaseLoc(const llvm::DWARFYAML::Loc& loc) {
924 return loc.Start == BinaryLocation(-1);
925 }
926
isEndMarkerLoc(const llvm::DWARFYAML::Loc & loc)927 static bool isEndMarkerLoc(const llvm::DWARFYAML::Loc& loc) {
928 return loc.Start == 0 && loc.End == 0;
929 }
930
931 // Update the .debug_loc section.
updateLoc(llvm::DWARFYAML::Data & yaml,const LocationUpdater & locationUpdater)932 static void updateLoc(llvm::DWARFYAML::Data& yaml,
933 const LocationUpdater& locationUpdater) {
934 // Similar to ranges, try to update the start and end. Note that here we
935 // can't skip since the location description is a variable number of bytes,
936 // so we mark no longer valid addresses as empty.
937 bool atStart = true;
938 // We need to keep positions in the .debug_loc section identical to before
939 // (or else we'd need to update their positions too) and so we need to keep
940 // base entries around (a base entry is added to every entry after it in the
941 // list). However, we may change the base's value as after moving instructions
942 // around the old base may not be smaller than all the values relative to it.
943 BinaryLocation oldBase, newBase;
944 auto& locs = yaml.Locs;
945 for (size_t i = 0; i < locs.size(); i++) {
946 auto& loc = locs[i];
947 if (atStart) {
948 std::tie(oldBase, newBase) =
949 locationUpdater.getCompileUnitBasesForLoc(loc.CompileUnitOffset);
950 atStart = false;
951 }
952 // By default we copy values over, unless we modify them below.
953 BinaryLocation newStart = loc.Start, newEnd = loc.End;
954 if (isNewBaseLoc(loc)) {
955 // This is a new base.
956 // Note that the base is not the address of an instruction, necessarily -
957 // it's just a number (seems like it could always be an instruction, but
958 // that's not what LLVM emits).
959 // We must look forward at everything relative to this base, so that we
960 // can emit a new proper base (as mentioned earlier, the original base may
961 // not be valid if instructions moved to a position before it - they must
962 // be positive offsets from it).
963 oldBase = newBase = newEnd;
964 BinaryLocation smallest = -1;
965 for (size_t j = i + 1; j < locs.size(); j++) {
966 auto& futureLoc = locs[j];
967 if (isNewBaseLoc(futureLoc) || isEndMarkerLoc(futureLoc)) {
968 break;
969 }
970 auto updatedStart =
971 locationUpdater.getNewStart(futureLoc.Start + oldBase);
972 // If we found a valid mapping, this is a relevant value for us. If the
973 // optimizer removed it, it's a 0, and we can ignore it here - we will
974 // emit IGNOREABLE_LOCATION for it later anyhow.
975 if (updatedStart != 0) {
976 smallest = std::min(smallest, updatedStart);
977 }
978 }
979 // If we found no valid values that will be relativized here, just use 0
980 // as the new (never-to-be-used) base, which is less confusing (otherwise
981 // the value looks like it means something).
982 if (smallest == BinaryLocation(-1)) {
983 smallest = 0;
984 }
985 newBase = newEnd = smallest;
986 } else if (isEndMarkerLoc(loc)) {
987 // This is an end marker, this list is done; reset the base.
988 atStart = true;
989 } else {
990 // This is a normal entry, try to find what it should be updated to. First
991 // de-relativize it to the base to get the absolute address, then look for
992 // a new address for it.
993 newStart = locationUpdater.getNewStart(loc.Start + oldBase);
994 newEnd = locationUpdater.getNewEnd(loc.End + oldBase);
995 if (newStart == 0 || newEnd == 0 || newStart > newEnd) {
996 // This part of the loc no longer has a mapping, or after the mapping
997 // it is no longer a proper span, so we must ignore it.
998 newStart = newEnd = IGNOREABLE_LOCATION;
999 } else {
1000 // We picked a new base that ensures it is smaller than the values we
1001 // will relativize to it.
1002 assert(newStart >= newBase && newEnd >= newBase);
1003 newStart -= newBase;
1004 newEnd -= newBase;
1005 if (newStart == 0 && newEnd == 0) {
1006 // After mapping to the new positions, and after relativizing to the
1007 // base, if we end up with (0, 0) then we must emit something else, as
1008 // that would be interpreted as the end of a list. As it is an empty
1009 // span, the actual value doesn't matter, it just has to be != 0.
1010 // This can happen if the very first span in a compile unit is an
1011 // empty span, in which case relative to the base of the compile unit
1012 // we would have (0, 0).
1013 newStart = newEnd = IGNOREABLE_LOCATION;
1014 }
1015 }
1016 // The loc start and end markers have been preserved. However, TODO
1017 // instructions in the middle may have moved around, making the loc no
1018 // longer contiguous, we should check that, and possibly split/merge
1019 // the loc. Or, we may need to have tracking in the IR for this.
1020 }
1021 loc.Start = newStart;
1022 loc.End = newEnd;
1023 // Note how the ".Location" field is unchanged.
1024 }
1025 }
1026
writeDWARFSections(Module & wasm,const BinaryLocations & newLocations)1027 void writeDWARFSections(Module& wasm, const BinaryLocations& newLocations) {
1028 BinaryenDWARFInfo info(wasm);
1029
1030 // Convert to Data representation, which YAML can use to write.
1031 llvm::DWARFYAML::Data data;
1032 if (dwarf2yaml(*info.context, data)) {
1033 Fatal() << "Failed to parse DWARF to YAML";
1034 }
1035
1036 LocationUpdater locationUpdater(wasm, newLocations);
1037
1038 updateDebugLines(data, locationUpdater);
1039
1040 updateCompileUnits(info, data, locationUpdater);
1041
1042 updateRanges(data, locationUpdater);
1043
1044 updateLoc(data, locationUpdater);
1045
1046 // Convert to binary sections.
1047 auto newSections =
1048 EmitDebugSections(data, false /* EmitFixups for debug_info */);
1049
1050 // Update the custom sections in the wasm.
1051 // TODO: efficiency
1052 for (auto& section : wasm.userSections) {
1053 if (Name(section.name).startsWith(".debug_")) {
1054 auto llvmName = section.name.substr(1);
1055 if (newSections.count(llvmName)) {
1056 auto llvmData = newSections[llvmName]->getBuffer();
1057 section.data.resize(llvmData.size());
1058 std::copy(llvmData.begin(), llvmData.end(), section.data.data());
1059 }
1060 }
1061 }
1062 }
1063
1064 #else // BUILD_LLVM_DWARF
1065
dumpDWARF(const Module & wasm)1066 void dumpDWARF(const Module& wasm) {
1067 std::cerr << "warning: no DWARF dumping support present\n";
1068 }
1069
writeDWARFSections(Module & wasm,const BinaryLocations & newLocations)1070 void writeDWARFSections(Module& wasm, const BinaryLocations& newLocations) {
1071 std::cerr << "warning: no DWARF updating support present\n";
1072 }
1073
1074 #endif // BUILD_LLVM_DWARF
1075
1076 } // namespace Debug
1077
1078 } // namespace wasm
1079