src/wasm/wasm-debug.cpp

/*
 * Copyright 2019 WebAssembly Community Group participants
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "wasm-debug.h"
#include "wasm.h"

#ifdef BUILD_LLVM_DWARF
#include "llvm/ObjectYAML/DWARFEmitter.h"
#include "llvm/ObjectYAML/DWARFYAML.h"
#include "llvm/include/llvm/DebugInfo/DWARFContext.h"

std::error_code dwarf2yaml(llvm::DWARFContext& DCtx, llvm::DWARFYAML::Data& Y);
#endif

#include "wasm-binary.h"
#include "wasm-debug.h"
#include "wasm.h"

namespace wasm {

namespace Debug {

bool isDWARFSection(Name name) { return name.startsWith(".debug_"); }

bool hasDWARFSections(const Module& wasm) {
  for (auto& section : wasm.userSections) {
    if (isDWARFSection(section.name)) {
      return true;
    }
  }
  return false;
}

#ifdef BUILD_LLVM_DWARF

// In wasm32 the address size is 32 bits.
static const size_t AddressSize = 4;

struct BinaryenDWARFInfo {
  llvm::StringMap<std::unique_ptr<llvm::MemoryBuffer>> sections;
  std::unique_ptr<llvm::DWARFContext> context;

  BinaryenDWARFInfo(const Module& wasm) {
    // Get debug sections from the wasm.
    for (auto& section : wasm.userSections) {
      if (Name(section.name).startsWith(".debug_") && section.data.data()) {
        // TODO: efficiency
        sections[section.name.substr(1)] = llvm::MemoryBuffer::getMemBufferCopy(
          llvm::StringRef(section.data.data(), section.data.size()));
      }
    }
    // Parse debug sections.
    uint8_t addrSize = AddressSize;
    bool isLittleEndian = true;
    context = llvm::DWARFContext::create(sections, addrSize, isLittleEndian);
  }
};

void dumpDWARF(const Module& wasm) {
  BinaryenDWARFInfo info(wasm);
  std::cout << "DWARF debug info\n";
  std::cout << "================\n\n";
  for (auto& section : wasm.userSections) {
    if (Name(section.name).startsWith(".debug_")) {
      std::cout << "Contains section " << section.name << " ("
                << section.data.size() << " bytes)\n";
    }
  }
  llvm::DIDumpOptions options;
  options.DumpType = llvm::DIDT_All;
  options.ShowChildren = true;
  options.Verbose = true;
  info.context->dump(llvm::outs(), options);
}

//
// Big picture: We use a DWARFContext to read data, then DWARFYAML support
// code to write it. That is not the main LLVM Dwarf code used for writing
// object files, but it avoids us create a "fake" MC layer, and provides a
// simple way to write out the debug info. Likely the level of info represented
// in the DWARFYAML::Data object is sufficient for Binaryen's needs, but if not,
// we may need a different approach.
//
// In more detail:
//
// 1. Binary sections => DWARFContext:
//
//     llvm::DWARFContext::create(sections..)
//
// 2. DWARFContext => DWARFYAML::Data
//
//     std::error_code dwarf2yaml(DWARFContext &DCtx, DWARFYAML::Data &Y) {
//
// 3. DWARFYAML::Data => binary sections
//
//     StringMap<std::unique_ptr<MemoryBuffer>>
//     EmitDebugSections(llvm::DWARFYAML::Data &DI, bool ApplyFixups);
//

// Represents the state when parsing a line table.
struct LineState {
  uint32_t addr = 0;
  // TODO sectionIndex?
  uint32_t line = 1;
  uint32_t col = 0;
  uint32_t file = 1;
  uint32_t isa = 0;
  uint32_t discriminator = 0;
  bool isStmt;
  bool basicBlock = false;
  bool prologueEnd = false;
  bool epilogueBegin = false;
  // Each instruction is part of a sequence, all of which get the same ID. The
  // order within a sequence may change if binaryen reorders things, which means
  // that we can't track the end_sequence location and assume it is at the end -
  // we must track sequences and then emit an end for each one.
  // -1 is an invalid marker value (note that this assumes we can fit all ids
  // into just under 32 bits).
  uint32_t sequenceId = -1;

  LineState(const LineState& other) = default;
  LineState(const llvm::DWARFYAML::LineTable& table, uint32_t sequenceId)
    : isStmt(table.DefaultIsStmt), sequenceId(sequenceId) {}

  LineState& operator=(const LineState& other) = default;

  // Updates the state, and returns whether a new row is ready to be emitted.
  bool update(llvm::DWARFYAML::LineTableOpcode& opcode,
              const llvm::DWARFYAML::LineTable& table) {
    switch (opcode.Opcode) {
      case 0: {
        // Extended opcodes
        switch (opcode.SubOpcode) {
          case llvm::dwarf::DW_LNE_set_address: {
            addr = opcode.Data;
            break;
          }
          case llvm::dwarf::DW_LNE_end_sequence: {
            return true;
          }
          case llvm::dwarf::DW_LNE_set_discriminator: {
            discriminator = opcode.Data;
            break;
          }
          case llvm::dwarf::DW_LNE_define_file: {
            Fatal() << "TODO: DW_LNE_define_file";
          }
          default: {
            // An unknown opcode, ignore.
            std::cerr << "warning: unknown subopcopde " << opcode.SubOpcode
                      << '\n';
          }
        }
        break;
      }
      case llvm::dwarf::DW_LNS_set_column: {
        col = opcode.Data;
        break;
      }
      case llvm::dwarf::DW_LNS_set_prologue_end: {
        prologueEnd = true;
        break;
      }
      case llvm::dwarf::DW_LNS_copy: {
        return true;
      }
      case llvm::dwarf::DW_LNS_advance_pc: {
        assert(table.MinInstLength == 1);
        addr += opcode.Data;
        break;
      }
      case llvm::dwarf::DW_LNS_advance_line: {
        line += opcode.SData;
        break;
      }
      case llvm::dwarf::DW_LNS_set_file: {
        file = opcode.Data;
        break;
      }
      case llvm::dwarf::DW_LNS_negate_stmt: {
        isStmt = !isStmt;
        break;
      }
      case llvm::dwarf::DW_LNS_set_basic_block: {
        basicBlock = true;
        break;
      }
      case llvm::dwarf::DW_LNS_const_add_pc: {
        uint8_t AdjustOpcode = 255 - table.OpcodeBase;
        uint64_t AddrOffset =
          (AdjustOpcode / table.LineRange) * table.MinInstLength;
        addr += AddrOffset;
        break;
      }
      case llvm::dwarf::DW_LNS_fixed_advance_pc: {
        addr += opcode.Data;
        break;
      }
      case llvm::dwarf::DW_LNS_set_isa: {
        isa = opcode.Data;
        break;
      }
      default: {
        if (opcode.Opcode >= table.OpcodeBase) {
          // Special opcode: adjust line and addr, using some math.
          uint8_t AdjustOpcode =
            opcode.Opcode - table.OpcodeBase; // 20 - 13 = 7
          uint64_t AddrOffset = (AdjustOpcode / table.LineRange) *
                                table.MinInstLength; // (7 / 14) * 1 = 0
          int32_t LineOffset =
            table.LineBase +
            (AdjustOpcode % table.LineRange); // -5 + (7 % 14) = 2
          line += LineOffset;
          addr += AddrOffset;
          return true;
        } else {
          Fatal() << "unknown debug line opcode: " << std::hex << opcode.Opcode;
        }
      }
    }
    return false;
  }

  // Checks if this starts a new range of addresses. Each range is a set of
  // related addresses, where in particular, if the first has been zeroed out
  // by the linker, we must omit the entire range. (If we do not, then the
  // initial range is 0 and the others are offsets relative to it, which will
  // look like random addresses, perhaps into the middle of instructions, and
  // perhaps that happen to collide with real ones.)
  bool startsNewRange(llvm::DWARFYAML::LineTableOpcode& opcode) {
    return opcode.Opcode == 0 &&
           opcode.SubOpcode == llvm::dwarf::DW_LNE_set_address;
  }

  bool needToEmit() {
    // Zero values imply we can ignore this line.
    // https://github.com/WebAssembly/debugging/issues/9#issuecomment-567720872
    return line != 0 && addr != 0;
  }

  // Given an old state, emit the diff from it to this state into a new line
  // table entry (that will be emitted in the updated DWARF debug line section).
  void emitDiff(const LineState& old,
                std::vector<llvm::DWARFYAML::LineTableOpcode>& newOpcodes,
                const llvm::DWARFYAML::LineTable& table,
                bool endSequence) const {
    bool useSpecial = false;
    if (addr != old.addr || line != old.line) {
      // Try to use a special opcode TODO
    }
    if (addr != old.addr && !useSpecial) {
      // len = 1 (subopcode) + 4 (wasm32 address)
      // FIXME: look at AddrSize on the Unit.
      auto item = makeItem(llvm::dwarf::DW_LNE_set_address, 5);
      item.Data = addr;
      newOpcodes.push_back(item);
    }
    if (line != old.line && !useSpecial) {
      auto item = makeItem(llvm::dwarf::DW_LNS_advance_line);
      // In wasm32 we have 32-bit addresses, and the delta here might be
      // negative (note that SData is 64-bit, as LLVM supports 64-bit
      // addresses too).
      item.SData = int32_t(line - old.line);
      newOpcodes.push_back(item);
    }
    if (col != old.col) {
      auto item = makeItem(llvm::dwarf::DW_LNS_set_column);
      item.Data = col;
      newOpcodes.push_back(item);
    }
    if (file != old.file) {
      auto item = makeItem(llvm::dwarf::DW_LNS_set_file);
      item.Data = file;
      newOpcodes.push_back(item);
    }
    if (isa != old.isa) {
      auto item = makeItem(llvm::dwarf::DW_LNS_set_isa);
      item.Data = isa;
      newOpcodes.push_back(item);
    }
    if (discriminator != old.discriminator) {
      // len = 1 (subopcode) + 4 (wasm32 address)
      auto item = makeItem(llvm::dwarf::DW_LNE_set_discriminator, 5);
      item.Data = discriminator;
      newOpcodes.push_back(item);
    }
    if (isStmt != old.isStmt) {
      newOpcodes.push_back(makeItem(llvm::dwarf::DW_LNS_negate_stmt));
    }
    if (basicBlock != old.basicBlock) {
      assert(basicBlock);
      newOpcodes.push_back(makeItem(llvm::dwarf::DW_LNS_set_basic_block));
    }
    if (prologueEnd) {
      newOpcodes.push_back(makeItem(llvm::dwarf::DW_LNS_set_prologue_end));
    }
    if (epilogueBegin != old.epilogueBegin) {
      Fatal() << "eb";
    }
    if (useSpecial) {
      // Emit a special, which emits a line automatically.
      // TODO
    } else {
      // Emit the line manually.
      if (endSequence) {
        // len = 1 (subopcode)
        newOpcodes.push_back(makeItem(llvm::dwarf::DW_LNE_end_sequence, 1));
      } else {
        newOpcodes.push_back(makeItem(llvm::dwarf::DW_LNS_copy));
      }
    }
  }

  // Some flags are automatically reset after each debug line.
  void resetAfterLine() { prologueEnd = false; }

private:
  llvm::DWARFYAML::LineTableOpcode
  makeItem(llvm::dwarf::LineNumberOps opcode) const {
    llvm::DWARFYAML::LineTableOpcode item = {};
    item.Opcode = opcode;
    return item;
  }

  llvm::DWARFYAML::LineTableOpcode
  makeItem(llvm::dwarf::LineNumberExtendedOps opcode, uint64_t len) const {
    auto item = makeItem(llvm::dwarf::LineNumberOps(0));
    // All the length after the len field itself, including the subopcode
    // (1 byte).
    item.ExtLen = len;
    item.SubOpcode = opcode;
    return item;
  }
};

// Represents a mapping of addresses to expressions. We track beginnings and
// endings of expressions separately, since the end of one (which is one past
// the end in DWARF notation) overlaps with the beginning of the next, and also
// to let us use contextual information (we may know we are looking up the end
// of an instruction).
struct AddrExprMap {
  std::unordered_map<BinaryLocation, Expression*> startMap;
  std::unordered_map<BinaryLocation, Expression*> endMap;

  // Some instructions have delimiter binary locations, like the else and end in
  // and if. Track those separately, including their expression and their id
  // ("else", "end", etc.), as they are rare, and we don't want to
  // bloat the common case which is represented in the earlier maps.
  struct DelimiterInfo {
    Expression* expr;
    BinaryLocations::DelimiterId id;
  };
  std::unordered_map<BinaryLocation, DelimiterInfo> delimiterMap;

  // Construct the map from the binaryLocations loaded from the wasm.
  AddrExprMap(const Module& wasm) {
    for (auto& func : wasm.functions) {
      for (auto pair : func->expressionLocations) {
        add(pair.first, pair.second);
      }
      for (auto pair : func->delimiterLocations) {
        add(pair.first, pair.second);
      }
    }
  }

  Expression* getStart(BinaryLocation addr) const {
    auto iter = startMap.find(addr);
    if (iter != startMap.end()) {
      return iter->second;
    }
    return nullptr;
  }

  Expression* getEnd(BinaryLocation addr) const {
    auto iter = endMap.find(addr);
    if (iter != endMap.end()) {
      return iter->second;
    }
    return nullptr;
  }

  DelimiterInfo getDelimiter(BinaryLocation addr) const {
    auto iter = delimiterMap.find(addr);
    if (iter != delimiterMap.end()) {
      return iter->second;
    }
    return DelimiterInfo{nullptr, BinaryLocations::Invalid};
  }

private:
  void add(Expression* expr, const BinaryLocations::Span span) {
    assert(startMap.count(span.start) == 0);
    startMap[span.start] = expr;
    assert(endMap.count(span.end) == 0);
    endMap[span.end] = expr;
  }

  void add(Expression* expr,
           const BinaryLocations::DelimiterLocations& delimiter) {
    for (Index i = 0; i < delimiter.size(); i++) {
      if (delimiter[i] != 0) {
        assert(delimiterMap.count(delimiter[i]) == 0);
        delimiterMap[delimiter[i]] =
          DelimiterInfo{expr, BinaryLocations::DelimiterId(i)};
      }
    }
  }
};

// Represents a mapping of addresses to expressions. As with expressions, we
// track both start and end; here, however, "start" means the "start" and
// "declarations" fields in FunctionLocations, and "end" means the two locations
// of one past the end, and one before it which is the "end" opcode that is
// emitted.
struct FuncAddrMap {
  std::unordered_map<BinaryLocation, Function*> startMap, endMap;

  // Construct the map from the binaryLocations loaded from the wasm.
  FuncAddrMap(const Module& wasm) {
    for (auto& func : wasm.functions) {
      startMap[func->funcLocation.start] = func.get();
      startMap[func->funcLocation.declarations] = func.get();
      endMap[func->funcLocation.end - 1] = func.get();
      endMap[func->funcLocation.end] = func.get();
    }
  }

  Function* getStart(BinaryLocation addr) const {
    auto iter = startMap.find(addr);
    if (iter != startMap.end()) {
      return iter->second;
    }
    return nullptr;
  }

  Function* getEnd(BinaryLocation addr) const {
    auto iter = endMap.find(addr);
    if (iter != endMap.end()) {
      return iter->second;
    }
    return nullptr;
  }
};

// Track locations from the original binary and the new one we wrote, so that
// we can update debug positions.
// We track expressions and functions separately, instead of having a single
// big map of (oldAddr) => (newAddr) because of the potentially ambiguous case
// of the final expression in a function: it's end might be identical in offset
// to the end of the function. So we have two different things that map to the
// same offset. However, if the context is "the end of the function" then the
// updated address is the new end of the function, even if the function ends
// with a different instruction now, as the old last instruction might have
// moved or been optimized out.
struct LocationUpdater {
  Module& wasm;
  const BinaryLocations& newLocations;

  AddrExprMap oldExprAddrMap;
  FuncAddrMap oldFuncAddrMap;

  // Map offsets of location list entries in the debug_loc section to the index
  // of their compile unit.
  std::unordered_map<BinaryLocation, size_t> locToUnitMap;

  // Map start of line tables in the debug_line section to their new locations.
  std::unordered_map<BinaryLocation, BinaryLocation> debugLineMap;

  typedef std::pair<BinaryLocation, BinaryLocation> OldToNew;

  // Map of compile unit index => old and new base offsets (i.e., in the
  // original binary and in the new one).
  std::unordered_map<size_t, OldToNew> compileUnitBases;

  // TODO: for memory efficiency, we may want to do this in a streaming manner,
  //       binary to binary, without YAML IR.

  LocationUpdater(Module& wasm, const BinaryLocations& newLocations)
    : wasm(wasm), newLocations(newLocations), oldExprAddrMap(wasm),
      oldFuncAddrMap(wasm) {}

  // Updates an expression's address. If there was never an instruction at that
  // address, or if there was but if that instruction no longer exists, return
  // 0. Otherwise, return the new updated location.
  BinaryLocation getNewExprStart(BinaryLocation oldAddr) const {
    if (auto* expr = oldExprAddrMap.getStart(oldAddr)) {
      auto iter = newLocations.expressions.find(expr);
      if (iter != newLocations.expressions.end()) {
        BinaryLocation newAddr = iter->second.start;
        return newAddr;
      }
    }
    return 0;
  }

  bool hasOldExprStart(BinaryLocation oldAddr) const {
    return oldExprAddrMap.getStart(oldAddr);
  }

  BinaryLocation getNewExprEnd(BinaryLocation oldAddr) const {
    if (auto* expr = oldExprAddrMap.getEnd(oldAddr)) {
      auto iter = newLocations.expressions.find(expr);
      if (iter != newLocations.expressions.end()) {
        return iter->second.end;
      }
    }
    return 0;
  }

  bool hasOldExprEnd(BinaryLocation oldAddr) const {
    return oldExprAddrMap.getEnd(oldAddr);
  }

  BinaryLocation getNewFuncStart(BinaryLocation oldAddr) const {
    if (auto* func = oldFuncAddrMap.getStart(oldAddr)) {
      // The function might have been optimized away, check.
      auto iter = newLocations.functions.find(func);
      if (iter != newLocations.functions.end()) {
        auto oldLocations = func->funcLocation;
        auto newLocations = iter->second;
        if (oldAddr == oldLocations.start) {
          return newLocations.start;
        } else if (oldAddr == oldLocations.declarations) {
          return newLocations.declarations;
        } else {
          WASM_UNREACHABLE("invalid func start");
        }
      }
    }
    return 0;
  }

  bool hasOldFuncStart(BinaryLocation oldAddr) const {
    return oldFuncAddrMap.getStart(oldAddr);
  }

  BinaryLocation getNewFuncEnd(BinaryLocation oldAddr) const {
    if (auto* func = oldFuncAddrMap.getEnd(oldAddr)) {
      // The function might have been optimized away, check.
      auto iter = newLocations.functions.find(func);
      if (iter != newLocations.functions.end()) {
        auto oldLocations = func->funcLocation;
        auto newLocations = iter->second;
        if (oldAddr == oldLocations.end) {
          return newLocations.end;
        } else if (oldAddr == oldLocations.end - 1) {
          return newLocations.end - 1;
        } else {
          WASM_UNREACHABLE("invalid func end");
        }
      }
    }
    return 0;
  }

  // Check for either the end opcode, or one past the end.
  bool hasOldFuncEnd(BinaryLocation oldAddr) const {
    return oldFuncAddrMap.getEnd(oldAddr);
  }

  // Check specifically for the end opcode.
  bool hasOldFuncEndOpcode(BinaryLocation oldAddr) const {
    if (auto* func = oldFuncAddrMap.getEnd(oldAddr)) {
      return oldAddr == func->funcLocation.end - 1;
    }
    return false;
  }

  BinaryLocation getNewDelimiter(BinaryLocation oldAddr) const {
    auto info = oldExprAddrMap.getDelimiter(oldAddr);
    if (info.expr) {
      auto iter = newLocations.delimiters.find(info.expr);
      if (iter != newLocations.delimiters.end()) {
        return iter->second[info.id];
      }
    }
    return 0;
  }

  bool hasOldDelimiter(BinaryLocation oldAddr) const {
    return oldExprAddrMap.getDelimiter(oldAddr).expr;
  }

  // getNewStart|EndAddr utilities.
  // TODO: should we track the start and end of delimiters, even though they
  //       are just one byte?
  BinaryLocation getNewStart(BinaryLocation oldStart) const {
    if (hasOldExprStart(oldStart)) {
      return getNewExprStart(oldStart);
    } else if (hasOldFuncStart(oldStart)) {
      return getNewFuncStart(oldStart);
    } else if (hasOldDelimiter(oldStart)) {
      return getNewDelimiter(oldStart);
    }
    return 0;
  }

  BinaryLocation getNewEnd(BinaryLocation oldEnd) const {
    if (hasOldExprEnd(oldEnd)) {
      return getNewExprEnd(oldEnd);
    } else if (hasOldFuncEnd(oldEnd)) {
      return getNewFuncEnd(oldEnd);
    } else if (hasOldDelimiter(oldEnd)) {
      return getNewDelimiter(oldEnd);
    }
    return 0;
  }

  BinaryLocation getNewDebugLineLocation(BinaryLocation old) const {
    return debugLineMap.at(old);
  }

  // Given an offset in .debug_loc, get the old and new compile unit bases.
  OldToNew getCompileUnitBasesForLoc(size_t offset) const {
    auto index = locToUnitMap.at(offset);
    auto iter = compileUnitBases.find(index);
    if (iter != compileUnitBases.end()) {
      return iter->second;
    }
    return OldToNew{0, 0};
  }
};

// Update debug lines, and update the locationUpdater with debug line offset
// changes so we can update offsets into the debug line section.
static void updateDebugLines(llvm::DWARFYAML::Data& data,
                             LocationUpdater& locationUpdater) {
  for (auto& table : data.DebugLines) {
    uint32_t sequenceId = 0;
    // Parse the original opcodes and emit new ones.
    LineState state(table, sequenceId);
    // All the addresses we need to write out.
    std::vector<BinaryLocation> newAddrs;
    std::unordered_map<BinaryLocation, LineState> newAddrInfo;
    // If the address was zeroed out, we must omit the entire range (we could
    // also leave it unchanged, so that the debugger ignores it based on the
    // initial zero; but it's easier and better to just not emit it at all).
    bool omittingRange = false;
    for (auto& opcode : table.Opcodes) {
      // Update the state, and check if we have a new row to emit.
      if (state.startsNewRange(opcode)) {
        omittingRange = false;
      }
      if (state.update(opcode, table)) {
        if (state.addr == 0) {
          omittingRange = true;
        }
        if (omittingRange) {
          state = LineState(table, sequenceId);
          continue;
        }
        // An expression may not exist for this line table item, if we optimized
        // it away.
        BinaryLocation oldAddr = state.addr;
        BinaryLocation newAddr = 0;
        if (locationUpdater.hasOldExprStart(oldAddr)) {
          newAddr = locationUpdater.getNewExprStart(oldAddr);
        }
        // Test for a function's end address first, as LLVM output appears to
        // use 1-past-the-end-of-the-function as a location in that function,
        // and not the next (but the first byte of the next function, which is
        // ambiguously identical to that value, is used at least in low_pc).
        else if (locationUpdater.hasOldFuncEnd(oldAddr)) {
          newAddr = locationUpdater.getNewFuncEnd(oldAddr);
        } else if (locationUpdater.hasOldFuncStart(oldAddr)) {
          newAddr = locationUpdater.getNewFuncStart(oldAddr);
        } else if (locationUpdater.hasOldDelimiter(oldAddr)) {
          newAddr = locationUpdater.getNewDelimiter(oldAddr);
        }
        if (newAddr && state.needToEmit()) {
          // LLVM sometimes emits the same address more than once. We should
          // probably investigate that.
          if (newAddrInfo.count(newAddr)) {
            continue;
          }
          newAddrs.push_back(newAddr);
          newAddrInfo.emplace(newAddr, state);
          auto& updatedState = newAddrInfo.at(newAddr);
          // The only difference is the address TODO other stuff?
          updatedState.addr = newAddr;
          // Reset relevant state.
          state.resetAfterLine();
        }
        if (opcode.Opcode == 0 &&
            opcode.SubOpcode == llvm::dwarf::DW_LNE_end_sequence) {
          sequenceId++;
          // We assume the number of sequences can fit in 32 bits, and -1 is
          // an invalid value.
          assert(sequenceId != uint32_t(-1));
          state = LineState(table, sequenceId);
        }
      }
    }
    // Sort the new addresses (which may be substantially different from the
    // original layout after optimization).
    std::sort(newAddrs.begin(), newAddrs.end());
    // Emit a new line table.
    {
      std::vector<llvm::DWARFYAML::LineTableOpcode> newOpcodes;
      for (size_t i = 0; i < newAddrs.size(); i++) {
        LineState state = newAddrInfo.at(newAddrs[i]);
        assert(state.needToEmit());
        LineState lastState(table, -1);
        if (i != 0) {
          lastState = newAddrInfo.at(newAddrs[i - 1]);
          // If the last line is in another sequence, clear the old state, as
          // there is nothing to diff to.
          if (lastState.sequenceId != state.sequenceId) {
            lastState = LineState(table, -1);
          }
        }
        // This line ends a sequence if there is no next line after it, or if
        // the next line is in a different sequence.
        bool endSequence =
          i + 1 == newAddrs.size() ||
          newAddrInfo.at(newAddrs[i + 1]).sequenceId != state.sequenceId;
        state.emitDiff(lastState, newOpcodes, table, endSequence);
      }
      table.Opcodes.swap(newOpcodes);
    }
  }
  // After updating the contents, run the emitter in order to update the
  // lengths of each section. We will use that to update offsets into the
  // debug_line section.
  std::vector<size_t> computedLengths;
  llvm::DWARFYAML::ComputeDebugLine(data, computedLengths);
  BinaryLocation oldLocation = 0, newLocation = 0;
  for (size_t i = 0; i < data.DebugLines.size(); i++) {
    auto& table = data.DebugLines[i];
    locationUpdater.debugLineMap[oldLocation] = newLocation;
    oldLocation += table.Length.getLength() + AddressSize;
    newLocation += computedLengths[i] + AddressSize;
    table.Length.setLength(computedLengths[i]);
  }
}

// Iterate in parallel over a DwarfContext representation element and a
// YAML element, which parallel each other.
template<typename T, typename U, typename W>
static void iterContextAndYAML(const T& contextList, U& yamlList, W func) {
  auto yamlValue = yamlList.begin();
  for (const auto& contextValue : contextList) {
    assert(yamlValue != yamlList.end());
    func(contextValue, *yamlValue);
    yamlValue++;
  }
  assert(yamlValue == yamlList.end());
}

// Updates a YAML entry from a DWARF DIE. Also updates LocationUpdater
// associating each .debug_loc entry with the base address of its corresponding
// compilation unit.
static void updateDIE(const llvm::DWARFDebugInfoEntry& DIE,
                      llvm::DWARFYAML::Entry& yamlEntry,
                      const llvm::DWARFAbbreviationDeclaration* abbrevDecl,
                      LocationUpdater& locationUpdater,
                      size_t compileUnitIndex) {
  auto tag = DIE.getTag();
  // Pairs of low/high_pc require some special handling, as the high
  // may be an offset relative to the low. First, process everything but
  // the high pcs, so we see the low pcs first.
  BinaryLocation oldLowPC = 0, newLowPC = 0;
  iterContextAndYAML(
    abbrevDecl->attributes(),
    yamlEntry.Values,
    [&](const llvm::DWARFAbbreviationDeclaration::AttributeSpec& attrSpec,
        llvm::DWARFYAML::FormValue& yamlValue) {
      auto attr = attrSpec.Attr;
      if (attr == llvm::dwarf::DW_AT_low_pc) {
        // This is an address.
        BinaryLocation oldValue = yamlValue.Value, newValue = 0;
        if (tag == llvm::dwarf::DW_TAG_GNU_call_site ||
            tag == llvm::dwarf::DW_TAG_inlined_subroutine ||
            tag == llvm::dwarf::DW_TAG_lexical_block ||
            tag == llvm::dwarf::DW_TAG_label) {
          newValue = locationUpdater.getNewStart(oldValue);
        } else if (tag == llvm::dwarf::DW_TAG_compile_unit) {
          newValue = locationUpdater.getNewFuncStart(oldValue);
          // Per the DWARF spec, "The base address of a compile unit is
          // defined as the value of the DW_AT_low_pc attribute, if present."
          locationUpdater.compileUnitBases[compileUnitIndex] =
            LocationUpdater::OldToNew{oldValue, newValue};
        } else if (tag == llvm::dwarf::DW_TAG_subprogram) {
          newValue = locationUpdater.getNewFuncStart(oldValue);
        } else {
          Fatal() << "unknown tag with low_pc "
                  << llvm::dwarf::TagString(tag).str();
        }
        oldLowPC = oldValue;
        newLowPC = newValue;
        yamlValue.Value = newValue;
      } else if (attr == llvm::dwarf::DW_AT_stmt_list) {
        // This is an offset into the debug line section.
        yamlValue.Value =
          locationUpdater.getNewDebugLineLocation(yamlValue.Value);
      } else if (attr == llvm::dwarf::DW_AT_location &&
                 attrSpec.Form == llvm::dwarf::DW_FORM_sec_offset) {
        BinaryLocation locOffset = yamlValue.Value;
        locationUpdater.locToUnitMap[locOffset] = compileUnitIndex;
      }
    });
  // Next, process the high_pcs.
  // TODO: do this more efficiently, without a second traversal (but that's a
  //       little tricky given the special double-traversal we have).
  iterContextAndYAML(
    abbrevDecl->attributes(),
    yamlEntry.Values,
    [&](const llvm::DWARFAbbreviationDeclaration::AttributeSpec& attrSpec,
        llvm::DWARFYAML::FormValue& yamlValue) {
      auto attr = attrSpec.Attr;
      if (attr != llvm::dwarf::DW_AT_high_pc) {
        return;
      }
      BinaryLocation oldValue = yamlValue.Value, newValue = 0;
      bool isRelative = attrSpec.Form == llvm::dwarf::DW_FORM_data4;
      if (isRelative) {
        oldValue += oldLowPC;
      }
      if (tag == llvm::dwarf::DW_TAG_GNU_call_site ||
          tag == llvm::dwarf::DW_TAG_inlined_subroutine ||
          tag == llvm::dwarf::DW_TAG_lexical_block ||
          tag == llvm::dwarf::DW_TAG_label) {
        newValue = locationUpdater.getNewExprEnd(oldValue);
      } else if (tag == llvm::dwarf::DW_TAG_compile_unit ||
                 tag == llvm::dwarf::DW_TAG_subprogram) {
        newValue = locationUpdater.getNewFuncEnd(oldValue);
      } else {
        Fatal() << "unknown tag with low_pc "
                << llvm::dwarf::TagString(tag).str();
      }
      if (isRelative) {
        newValue -= newLowPC;
      }
      yamlValue.Value = newValue;
    });
}

static void updateCompileUnits(const BinaryenDWARFInfo& info,
                               llvm::DWARFYAML::Data& yaml,
                               LocationUpdater& locationUpdater) {
  // The context has the high-level information we need, and the YAML is where
  // we write changes. First, iterate over the compile units.
  size_t compileUnitIndex = 0;
  iterContextAndYAML(
    info.context->compile_units(),
    yaml.CompileUnits,
    [&](const std::unique_ptr<llvm::DWARFUnit>& CU,
        llvm::DWARFYAML::Unit& yamlUnit) {
      // Process the DIEs in each compile unit.
      iterContextAndYAML(
        CU->dies(),
        yamlUnit.Entries,
        [&](const llvm::DWARFDebugInfoEntry& DIE,
            llvm::DWARFYAML::Entry& yamlEntry) {
          // Process the entries in each relevant DIE, looking for attributes to
          // change.
          auto abbrevDecl = DIE.getAbbreviationDeclarationPtr();
          if (abbrevDecl) {
            // This is relevant; look for things to update.
            updateDIE(
              DIE, yamlEntry, abbrevDecl, locationUpdater, compileUnitIndex);
          }
        });
      compileUnitIndex++;
    });
}

static void updateRanges(llvm::DWARFYAML::Data& yaml,
                         const LocationUpdater& locationUpdater) {
  // In each range section, try to update the start and end. If we no longer
  // have something to map them to, we must skip that part.
  size_t skip = 0;
  for (size_t i = 0; i < yaml.Ranges.size(); i++) {
    auto& range = yaml.Ranges[i];
    BinaryLocation oldStart = range.Start, oldEnd = range.End, newStart = 0,
                   newEnd = 0;
    // If this is an end marker (0, 0), or an invalid range (0, x) or (x, 0)
    // then just emit it as it is - either to mark the end, or to mark an
    // invalid entry.
    if (oldStart == 0 || oldEnd == 0) {
      newStart = oldStart;
      newEnd = oldEnd;
    } else {
      // This was a valid entry; update it.
      newStart = locationUpdater.getNewStart(oldStart);
      newEnd = locationUpdater.getNewEnd(oldEnd);
      if (newStart == 0 || newEnd == 0) {
        // This part of the range no longer has a mapping, so we must skip it.
        // Don't use (0, 0) as that would be an end marker; emit something
        // invalid for the debugger to ignore.
        newStart = 0;
        newEnd = 1;
      }
      // TODO even if range start and end markers have been preserved,
      // instructions in the middle may have moved around, making the range no
      // longer contiguous. We should check that, and possibly split/merge
      // the range. Or, we may need to have tracking in the IR for this.
    }
    auto& writtenRange = yaml.Ranges[i - skip];
    writtenRange.Start = newStart;
    writtenRange.End = newEnd;
  }
}

// A location that is ignoreable, i.e., not a special value like 0 or -1 (which
// would indicate an end or a base in .debug_loc).
static const BinaryLocation IGNOREABLE_LOCATION = 1;

static bool isNewBaseLoc(const llvm::DWARFYAML::Loc& loc) {
  return loc.Start == BinaryLocation(-1);
}

static bool isEndMarkerLoc(const llvm::DWARFYAML::Loc& loc) {
  return loc.Start == 0 && loc.End == 0;
}

// Update the .debug_loc section.
static void updateLoc(llvm::DWARFYAML::Data& yaml,
                      const LocationUpdater& locationUpdater) {
  // Similar to ranges, try to update the start and end. Note that here we
  // can't skip since the location description is a variable number of bytes,
  // so we mark no longer valid addresses as empty.
  bool atStart = true;
  // We need to keep positions in the .debug_loc section identical to before
  // (or else we'd need to update their positions too) and so we need to keep
  // base entries around (a base entry is added to every entry after it in the
  // list). However, we may change the base's value as after moving instructions
  // around the old base may not be smaller than all the values relative to it.
  BinaryLocation oldBase, newBase;
  auto& locs = yaml.Locs;
  for (size_t i = 0; i < locs.size(); i++) {
    auto& loc = locs[i];
    if (atStart) {
      std::tie(oldBase, newBase) =
        locationUpdater.getCompileUnitBasesForLoc(loc.CompileUnitOffset);
      atStart = false;
    }
    // By default we copy values over, unless we modify them below.
    BinaryLocation newStart = loc.Start, newEnd = loc.End;
    if (isNewBaseLoc(loc)) {
      // This is a new base.
      // Note that the base is not the address of an instruction, necessarily -
      // it's just a number (seems like it could always be an instruction, but
      // that's not what LLVM emits).
      // We must look forward at everything relative to this base, so that we
      // can emit a new proper base (as mentioned earlier, the original base may
      // not be valid if instructions moved to a position before it - they must
      // be positive offsets from it).
      oldBase = newBase = newEnd;
      BinaryLocation smallest = -1;
      for (size_t j = i + 1; j < locs.size(); j++) {
        auto& futureLoc = locs[j];
        if (isNewBaseLoc(futureLoc) || isEndMarkerLoc(futureLoc)) {
          break;
        }
        auto updatedStart =
          locationUpdater.getNewStart(futureLoc.Start + oldBase);
        // If we found a valid mapping, this is a relevant value for us. If the
        // optimizer removed it, it's a 0, and we can ignore it here - we will
        // emit IGNOREABLE_LOCATION for it later anyhow.
        if (updatedStart != 0) {
          smallest = std::min(smallest, updatedStart);
        }
      }
      // If we found no valid values that will be relativized here, just use 0
      // as the new (never-to-be-used) base, which is less confusing (otherwise
      // the value looks like it means something).
      if (smallest == BinaryLocation(-1)) {
        smallest = 0;
      }
      newBase = newEnd = smallest;
    } else if (isEndMarkerLoc(loc)) {
      // This is an end marker, this list is done; reset the base.
      atStart = true;
    } else {
      // This is a normal entry, try to find what it should be updated to. First
      // de-relativize it to the base to get the absolute address, then look for
      // a new address for it.
      newStart = locationUpdater.getNewStart(loc.Start + oldBase);
      newEnd = locationUpdater.getNewEnd(loc.End + oldBase);
      if (newStart == 0 || newEnd == 0 || newStart > newEnd) {
        // This part of the loc no longer has a mapping, or after the mapping
        // it is no longer a proper span, so we must ignore it.
        newStart = newEnd = IGNOREABLE_LOCATION;
      } else {
        // We picked a new base that ensures it is smaller than the values we
        // will relativize to it.
        assert(newStart >= newBase && newEnd >= newBase);
        newStart -= newBase;
        newEnd -= newBase;
        if (newStart == 0 && newEnd == 0) {
          // After mapping to the new positions, and after relativizing to the
          // base, if we end up with (0, 0) then we must emit something else, as
          // that would be interpreted as the end of a list. As it is an empty
          // span, the actual value doesn't matter, it just has to be != 0.
          // This can happen if the very first span in a compile unit is an
          // empty span, in which case relative to the base of the compile unit
          // we would have (0, 0).
          newStart = newEnd = IGNOREABLE_LOCATION;
        }
      }
      // The loc start and end markers have been preserved. However, TODO
      // instructions in the middle may have moved around, making the loc no
      // longer contiguous, we should check that, and possibly split/merge
      // the loc. Or, we may need to have tracking in the IR for this.
    }
    loc.Start = newStart;
    loc.End = newEnd;
    // Note how the ".Location" field is unchanged.
  }
}

void writeDWARFSections(Module& wasm, const BinaryLocations& newLocations) {
  BinaryenDWARFInfo info(wasm);

  // Convert to Data representation, which YAML can use to write.
  llvm::DWARFYAML::Data data;
  if (dwarf2yaml(*info.context, data)) {
    Fatal() << "Failed to parse DWARF to YAML";
  }

  LocationUpdater locationUpdater(wasm, newLocations);

  updateDebugLines(data, locationUpdater);

  updateCompileUnits(info, data, locationUpdater);

  updateRanges(data, locationUpdater);

  updateLoc(data, locationUpdater);

  // Convert to binary sections.
  auto newSections =
    EmitDebugSections(data, false /* EmitFixups for debug_info */);

  // Update the custom sections in the wasm.
  // TODO: efficiency
  for (auto& section : wasm.userSections) {
    if (Name(section.name).startsWith(".debug_")) {
      auto llvmName = section.name.substr(1);
      if (newSections.count(llvmName)) {
        auto llvmData = newSections[llvmName]->getBuffer();
        section.data.resize(llvmData.size());
        std::copy(llvmData.begin(), llvmData.end(), section.data.data());
      }
    }
  }
}

#else // BUILD_LLVM_DWARF

void dumpDWARF(const Module& wasm) {
  std::cerr << "warning: no DWARF dumping support present\n";
}

void writeDWARFSections(Module& wasm, const BinaryLocations& newLocations) {
  std::cerr << "warning: no DWARF updating support present\n";
}

#endif // BUILD_LLVM_DWARF

} // namespace Debug

} // namespace wasm