Add LLDB and swift-reflection-dump support for WebAssembly

This patch adds parsing and extracting of the Swift reflection
metadata data segments from within the WebAssembly DATA section and
tests it using swift-reflection-dump. This is needed to allow LLDB to
acces Swift reflection metadata when attached to WebAssembly
processes.

rdar://159217213
This commit is contained in:
Adrian Prantl
2025-08-14 19:37:25 -07:00
parent 730e8e7d0f
commit 75bee892be
11 changed files with 1477 additions and 10 deletions

View File

@@ -83,7 +83,7 @@ public:
}
};
/// Responsible for providing the COFF reflection section identifiers
/// Responsible for providing the COFF reflection section identifiers.
class SwiftObjectFileFormatCOFF : public SwiftObjectFileFormat {
public:
llvm::StringRef getSectionName(ReflectionSectionKind section) override {
@@ -101,5 +101,28 @@ public:
return sectionName.starts_with(".sw5");
}
};
/// Responsible for providing the WebAssembly reflection section identifiers.
/// WebAssembly binaries store all reflection metadata in the DATA
/// section. There are symbols for each reflection section kind in the "name"
/// section that point to the corresponding offset inside DATA.
class SwiftObjectFileFormatWasm : public SwiftObjectFileFormat {
public:
llvm::StringRef getSectionName(ReflectionSectionKind section) override {
switch (section) {
#define HANDLE_SWIFT_SECTION(KIND, MACHO, ELF, COFF) \
case KIND: \
return ELF;
#include "llvm/BinaryFormat/Swift.def"
#undef HANDLE_SWIFT_SECTION
}
llvm_unreachable("Section not found.");
}
bool sectionContainsReflectionData(llvm::StringRef sectionName) override {
return sectionName.starts_with("swift5_");
}
};
} // namespace swift
#endif // SWIFT_ABI_OBJECTFILE_H

View File

@@ -18,13 +18,14 @@
#ifndef SWIFT_REFLECTION_REFLECTIONCONTEXT_H
#define SWIFT_REFLECTION_REFLECTIONCONTEXT_H
#include "llvm/ADT/STLExtras.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/BinaryFormat/Wasm.h"
#include "llvm/Object/COFF.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/Memory.h"
#include "llvm/ADT/STLExtras.h"
#include "swift/ABI/Enum.h"
#include "swift/ABI/ObjectFile.h"
@@ -40,6 +41,8 @@
#include "swift/RemoteInspection/TypeRefBuilder.h"
#include "swift/Basic/Unreachable.h"
#include <cstdint>
#include <map>
#include <set>
#include <utility>
#include <vector>
@@ -72,6 +75,11 @@
#define HAS_DISPATCH_LOCK_IS_LOCKED 1
#endif
#define DEBUG_TYPE "reflection"
#ifdef SWIFT_RUNTIME
#undef LLVM_DEBUG
#define LLVM_DEBUG(IGNORE)
#endif
namespace {
template <unsigned PointerSize> struct MachOTraits;
@@ -799,6 +807,297 @@ public:
}
}
/// Parses metadata information from a WebAssembly image.
///
///
/// \param[in] ImageStart
/// A remote address pointing to the start of the image in the running
/// process.
///
/// \param[in] FileBuffer
/// A buffer which contains the contents of the image's file
/// in disk. If missing, all the information will be read using the
/// instance's memory reader.
///
/// \return The newly added reflection info ID if successful, \b std::nullopt
/// otherwise.
std::optional<uint32_t>
readWasm(RemoteAddress ImageStart,
std::optional<llvm::sys::MemoryBlock> FileBuffer,
llvm::SmallVector<llvm::StringRef, 1> PotentialModuleNames) {
/// A WASM data segment. The reflection metadata "sections" are DATA
/// segments.
struct Segment {
RemoteAddress remoteAddr;
uint64_t offset;
uint64_t size;
};
std::map<std::string, Segment> sections;
std::vector<Segment> segments;
auto &reader = getReader();
RemoteAddress cursor = ImageStart;
cursor += sizeof(llvm::wasm::WasmMagic) + sizeof(llvm::wasm::WasmVersion);
/// Decode one byte and move the cursor.
auto decodeU8 = [&reader, &cursor](uint8_t &b) -> bool {
if (!reader.readInteger(cursor, 1, &b))
return false;
cursor += 1;
return true;
};
/// Decode 32-bit ULEB constants.
auto decodeULEB32 = [&](uint32_t &val) -> bool {
uint64_t result = 0;
uint8_t b;
for (uint8_t n = 0; n < 5; ++n) {
if (!decodeU8(b))
return false;
result |= (b & ~(1 << 7)) << 7 * n;
if ((b & (1 << 7)) == 0)
break;
}
if (result > std::numeric_limits<uint32_t>::max())
return false;
memcpy(&val, &result, 4);
return true;
};
/// Decode a string.
auto decodeString = [&](std::string &str) -> bool {
uint32_t len;
if (!decodeULEB32(len))
return false;
auto chars = reader.readBytes(cursor, len);
if (!chars)
return false;
str = std::string((const char *)chars.get(), len);
cursor += len;
chars.release();
return true;
};
/// Decode one section header.
auto decodeSection = [&]() -> bool {
uint8_t sectionID;
if (!decodeU8(sectionID))
return false;
if (sectionID > llvm::wasm::WASM_SEC_LAST_KNOWN)
return false;
uint32_t payloadLen;
if (!decodeULEB32(payloadLen))
return false;
RemoteAddress payloadStart = cursor;
std::string sectName;
if (sectionID == llvm::wasm::WASM_SEC_CUSTOM) {
if (!decodeString(sectName))
return false;
RemoteAddress sectionStart = cursor;
sections.insert(
{sectName, {cursor, 0, payloadLen - (sectionStart - cursor)}});
} else {
sectName = llvm::wasm::sectionTypeToString(sectionID);
sections.insert({sectName, {cursor, 0, payloadLen}});
}
LLVM_DEBUG(llvm::dbgs()
<< "section " << sectName << " size=" << payloadLen << "\n");
cursor = payloadStart + payloadLen;
return true;
};
// Decode the DATA segments.
auto decodeData = [&](uint64_t sectLength) -> bool {
RemoteAddress start = cursor;
RemoteAddress end = start + sectLength;
uint32_t count;
auto decodeActiveSegmentOffset = [&]() -> std::optional<uint32_t> {
uint32_t offset = 0;
while (true) {
uint8_t b;
if (!decodeU8(b))
return {};
if (b == llvm::wasm::WASM_OPCODE_I32_CONST) {
if (!decodeULEB32(offset)) // FIXME: Actually an SLEB.
return {};
} else if (b == llvm::wasm::WASM_OPCODE_END) {
break;
} else {
// Unhandled opcode.
return {};
}
}
return offset;
};
if (!decodeULEB32(count))
return false;
// Parse the segment header.
for (uint32_t i = 0; i < count && cursor < end; ++i) {
uint32_t flags;
uint32_t offset = 0;
if (!decodeULEB32(flags))
return false;
if ((flags & 2) == 2) {
uint32_t memidx;
if (!decodeULEB32(memidx))
return false;
}
if ((flags & 1) == 0) {
auto offsOrErr = decodeActiveSegmentOffset();
if (!offsOrErr)
return false;
offset = *offsOrErr;
}
uint32_t size;
if (!decodeULEB32(size))
return false;
LLVM_DEBUG(llvm::dbgs()
<< "Segment[" << i << "]: flags=" << flags
<< " offset=" << offset << " size=" << size << "\n");
segments.push_back({cursor, offset, size});
cursor += size;
}
return true;
};
// Decode the NAMES table pointing to the DATA segments.
auto decodeNames = [&](uint64_t sectLength) -> bool {
RemoteAddress start = cursor;
RemoteAddress end = start + sectLength;
while (cursor < end) {
uint8_t type;
if (!decodeU8(type))
return false;
uint32_t len;
if (!decodeULEB32(len))
return false;
if (type == llvm::wasm::WASM_NAMES_DATA_SEGMENT) {
uint32_t count;
if (!decodeULEB32(count))
return false;
for (uint32_t i = 0; i < count; ++i) {
uint32_t idx;
if (!decodeULEB32(idx))
return false;
std::string sectName;
if (!decodeString(sectName))
return false;
if (idx >= segments.size())
return false;
LLVM_DEBUG(llvm::dbgs() << sectName << ": " << idx << "\n");
sections[sectName] = segments[idx];
}
}
cursor += len;
}
return true;
};
// Decode the LINK section of object files.
auto decodeLinking = [&](RemoteAddress dataStart,
uint64_t sectLength) -> bool {
RemoteAddress start = cursor;
RemoteAddress end = start + sectLength;
uint32_t version;
if (!decodeULEB32(version) || version != 2)
return false;
while (cursor < end) {
uint8_t type;
if (!decodeU8(type))
return false;
uint32_t len;
if (!decodeULEB32(len))
return false;
if (type == llvm::wasm::WASM_SEGMENT_INFO) {
uint32_t count;
if (!decodeULEB32(count))
return false;
for (uint32_t idx = 0; idx < count; ++idx) {
std::string sectName;
if (!decodeString(sectName))
return false;
uint32_t align;
if (!decodeULEB32(align))
return false;
uint32_t flags;
if (!decodeULEB32(flags))
return false;
LLVM_DEBUG(llvm::dbgs() << sectName << ": " << idx << "\n");
sections[sectName] = segments[idx];
}
}
cursor += len;
}
return true;
};
while (decodeSection()) {
};
auto dataSect = sections.find("DATA");
if (dataSect == sections.end())
return false;
auto [dataStart, _, dataLength] = dataSect->second;
cursor = dataStart;
if (!decodeData(dataLength))
return false;
auto nameSect = sections.find("name");
if (nameSect != sections.end()) {
cursor = nameSect->second.remoteAddr;
if (!decodeNames(nameSect->second.size))
return false;
} else {
// This may be an object file?
auto linkingSect = sections.find("linking");
if (linkingSect == sections.end())
return false;
cursor = linkingSect->second.remoteAddr;
if (!decodeLinking(dataStart, linkingSect->second.size))
return false;
}
// Find reflection sections within the data section segments.
auto lookup =
[&](const std::string &name) -> std::pair<RemoteRef<void>, uint64_t> {
auto sectionIt = sections.find(name);
if (sectionIt == sections.end())
return {{}, 0};
auto &section = sectionIt->second;
RemoteAddress mappedSectionStart(0 + section.offset,
RemoteAddress::DefaultAddressSpace);
auto secBuf = reader.readBytes(section.remoteAddr, section.size);
auto secContents = RemoteRef<void>(mappedSectionStart, secBuf.get());
savedBuffers.push_back(std::move(secBuf));
LLVM_DEBUG(llvm::dbgs() << name << " @ " << section.offset << "\n");
return {secContents, section.size};
};
auto FieldMdSec = lookup("swift5_fieldmd");
auto AssocTySec = lookup("swift5_assocty");
auto BuiltinTySec = lookup("swift5_builtin");
auto CaptureSec = lookup("swift5_capture");
auto TypeRefMdSec = lookup("swift5_typeref");
auto ReflStrMdSec = lookup("swift5_reflstr");
auto ConformMdSec = lookup("swift5_protocol_conformances");
auto MPEnumMdSec = lookup("swift5_mpenum");
ReflectionInfo info = {{FieldMdSec.first, FieldMdSec.second},
{AssocTySec.first, AssocTySec.second},
{BuiltinTySec.first, BuiltinTySec.second},
{CaptureSec.first, CaptureSec.second},
{TypeRefMdSec.first, TypeRefMdSec.second},
{ReflStrMdSec.first, ReflStrMdSec.second},
{ConformMdSec.first, ConformMdSec.second},
{MPEnumMdSec.first, MPEnumMdSec.second},
PotentialModuleNames};
return addReflectionInfo(info);
}
/// On success returns the ID of the newly registered Reflection Info.
std::optional<uint32_t>
addImage(RemoteAddress ImageStart,
@@ -827,7 +1126,6 @@ public:
return readPECOFF(ImageStart, PotentialModuleNames);
}
// ELF.
if (MagicBytes[0] == llvm::ELF::ElfMagic[0]
&& MagicBytes[1] == llvm::ELF::ElfMagic[1]
@@ -837,6 +1135,14 @@ public:
PotentialModuleNames);
}
// WASM.
if (MagicBytes[0] == llvm::wasm::WasmMagic[0] &&
MagicBytes[1] == llvm::wasm::WasmMagic[1] &&
MagicBytes[2] == llvm::wasm::WasmMagic[2] &&
MagicBytes[3] == llvm::wasm::WasmMagic[3]) {
return readWasm(ImageStart, std::optional<llvm::sys::MemoryBlock>(),
PotentialModuleNames);
}
// We don't recognize the format.
return std::nullopt;
}

View File

@@ -0,0 +1,581 @@
//===- Wasm.h - Wasm object file format -------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines manifest constants for the wasm object file format.
// See: https://github.com/WebAssembly/design/blob/main/BinaryEncoding.md
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_BINARYFORMAT_WASM_H
#define LLVM_BINARYFORMAT_WASM_H
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Compiler.h"
#include <optional>
namespace llvm {
namespace wasm {
// Object file magic string.
const char WasmMagic[] = {'\0', 'a', 's', 'm'};
// Wasm binary format version
const uint32_t WasmVersion = 0x1;
// Wasm linking metadata version
const uint32_t WasmMetadataVersion = 0x2;
// Wasm uses a 64k page size by default (but the custom-page-sizes proposal
// allows changing it)
const uint32_t WasmDefaultPageSize = 65536;
enum : unsigned {
WASM_SEC_CUSTOM = 0, // Custom / User-defined section
WASM_SEC_TYPE = 1, // Function signature declarations
WASM_SEC_IMPORT = 2, // Import declarations
WASM_SEC_FUNCTION = 3, // Function declarations
WASM_SEC_TABLE = 4, // Indirect function table and other tables
WASM_SEC_MEMORY = 5, // Memory attributes
WASM_SEC_GLOBAL = 6, // Global declarations
WASM_SEC_EXPORT = 7, // Exports
WASM_SEC_START = 8, // Start function declaration
WASM_SEC_ELEM = 9, // Elements section
WASM_SEC_CODE = 10, // Function bodies (code)
WASM_SEC_DATA = 11, // Data segments
WASM_SEC_DATACOUNT = 12, // Data segment count
WASM_SEC_TAG = 13, // Tag declarations
WASM_SEC_LAST_KNOWN = WASM_SEC_TAG,
};
// Type immediate encodings used in various contexts.
enum : unsigned {
WASM_TYPE_I32 = 0x7F,
WASM_TYPE_I64 = 0x7E,
WASM_TYPE_F32 = 0x7D,
WASM_TYPE_F64 = 0x7C,
WASM_TYPE_V128 = 0x7B,
WASM_TYPE_NULLFUNCREF = 0x73,
WASM_TYPE_NULLEXTERNREF = 0x72,
WASM_TYPE_NULLEXNREF = 0x74,
WASM_TYPE_NULLREF = 0x71,
WASM_TYPE_FUNCREF = 0x70,
WASM_TYPE_EXTERNREF = 0x6F,
WASM_TYPE_EXNREF = 0x69,
WASM_TYPE_ANYREF = 0x6E,
WASM_TYPE_EQREF = 0x6D,
WASM_TYPE_I31REF = 0x6C,
WASM_TYPE_STRUCTREF = 0x6B,
WASM_TYPE_ARRAYREF = 0x6A,
WASM_TYPE_NONNULLABLE = 0x64,
WASM_TYPE_NULLABLE = 0x63,
WASM_TYPE_FUNC = 0x60,
WASM_TYPE_ARRAY = 0x5E,
WASM_TYPE_STRUCT = 0x5F,
WASM_TYPE_SUB = 0x50,
WASM_TYPE_SUB_FINAL = 0x4F,
WASM_TYPE_REC = 0x4E,
WASM_TYPE_NORESULT = 0x40, // for blocks with no result values
};
// Kinds of externals (for imports and exports).
enum : unsigned {
WASM_EXTERNAL_FUNCTION = 0x0,
WASM_EXTERNAL_TABLE = 0x1,
WASM_EXTERNAL_MEMORY = 0x2,
WASM_EXTERNAL_GLOBAL = 0x3,
WASM_EXTERNAL_TAG = 0x4,
};
// Opcodes used in initializer expressions.
enum : unsigned {
WASM_OPCODE_END = 0x0b,
WASM_OPCODE_CALL = 0x10,
WASM_OPCODE_LOCAL_GET = 0x20,
WASM_OPCODE_LOCAL_SET = 0x21,
WASM_OPCODE_LOCAL_TEE = 0x22,
WASM_OPCODE_GLOBAL_GET = 0x23,
WASM_OPCODE_GLOBAL_SET = 0x24,
WASM_OPCODE_I32_STORE = 0x36,
WASM_OPCODE_I64_STORE = 0x37,
WASM_OPCODE_I32_CONST = 0x41,
WASM_OPCODE_I64_CONST = 0x42,
WASM_OPCODE_F32_CONST = 0x43,
WASM_OPCODE_F64_CONST = 0x44,
WASM_OPCODE_I32_ADD = 0x6a,
WASM_OPCODE_I32_SUB = 0x6b,
WASM_OPCODE_I32_MUL = 0x6c,
WASM_OPCODE_I64_ADD = 0x7c,
WASM_OPCODE_I64_SUB = 0x7d,
WASM_OPCODE_I64_MUL = 0x7e,
WASM_OPCODE_REF_NULL = 0xd0,
WASM_OPCODE_REF_FUNC = 0xd2,
WASM_OPCODE_GC_PREFIX = 0xfb,
};
// Opcodes in the GC-prefixed space (0xfb)
enum : unsigned {
WASM_OPCODE_STRUCT_NEW = 0x00,
WASM_OPCODE_STRUCT_NEW_DEFAULT = 0x01,
WASM_OPCODE_ARRAY_NEW = 0x06,
WASM_OPCODE_ARRAY_NEW_DEFAULT = 0x07,
WASM_OPCODE_ARRAY_NEW_FIXED = 0x08,
WASM_OPCODE_REF_I31 = 0x1c,
// any.convert_extern and extern.convert_any don't seem to be supported by
// Binaryen.
};
// Opcodes used in synthetic functions.
enum : unsigned {
WASM_OPCODE_BLOCK = 0x02,
WASM_OPCODE_BR = 0x0c,
WASM_OPCODE_BR_TABLE = 0x0e,
WASM_OPCODE_RETURN = 0x0f,
WASM_OPCODE_DROP = 0x1a,
WASM_OPCODE_MISC_PREFIX = 0xfc,
WASM_OPCODE_MEMORY_INIT = 0x08,
WASM_OPCODE_MEMORY_FILL = 0x0b,
WASM_OPCODE_DATA_DROP = 0x09,
WASM_OPCODE_ATOMICS_PREFIX = 0xfe,
WASM_OPCODE_ATOMIC_NOTIFY = 0x00,
WASM_OPCODE_I32_ATOMIC_WAIT = 0x01,
WASM_OPCODE_I32_ATOMIC_STORE = 0x17,
WASM_OPCODE_I32_RMW_CMPXCHG = 0x48,
};
// Sub-opcodes for catch clauses in a try_table instruction
enum : unsigned {
WASM_OPCODE_CATCH = 0x00,
WASM_OPCODE_CATCH_REF = 0x01,
WASM_OPCODE_CATCH_ALL = 0x02,
WASM_OPCODE_CATCH_ALL_REF = 0x03,
};
enum : unsigned {
WASM_LIMITS_FLAG_NONE = 0x0,
WASM_LIMITS_FLAG_HAS_MAX = 0x1,
WASM_LIMITS_FLAG_IS_SHARED = 0x2,
WASM_LIMITS_FLAG_IS_64 = 0x4,
WASM_LIMITS_FLAG_HAS_PAGE_SIZE = 0x8,
};
enum : unsigned {
WASM_DATA_SEGMENT_IS_PASSIVE = 0x01,
WASM_DATA_SEGMENT_HAS_MEMINDEX = 0x02,
};
enum : unsigned {
WASM_ELEM_SEGMENT_IS_PASSIVE = 0x01,
WASM_ELEM_SEGMENT_IS_DECLARATIVE = 0x02, // if passive == 1
WASM_ELEM_SEGMENT_HAS_TABLE_NUMBER = 0x02, // if passive == 0
WASM_ELEM_SEGMENT_HAS_INIT_EXPRS = 0x04,
};
const unsigned WASM_ELEM_SEGMENT_MASK_HAS_ELEM_DESC = 0x3;
// Feature policy prefixes used in the custom "target_features" section
enum : uint8_t {
WASM_FEATURE_PREFIX_USED = '+',
WASM_FEATURE_PREFIX_DISALLOWED = '-',
};
// Kind codes used in the custom "name" section
enum : unsigned {
WASM_NAMES_MODULE = 0,
WASM_NAMES_FUNCTION = 1,
WASM_NAMES_LOCAL = 2,
WASM_NAMES_GLOBAL = 7,
WASM_NAMES_DATA_SEGMENT = 9,
};
// Kind codes used in the custom "linking" section
enum : unsigned {
WASM_SEGMENT_INFO = 0x5,
WASM_INIT_FUNCS = 0x6,
WASM_COMDAT_INFO = 0x7,
WASM_SYMBOL_TABLE = 0x8,
};
// Kind codes used in the custom "dylink" section
enum : unsigned {
WASM_DYLINK_MEM_INFO = 0x1,
WASM_DYLINK_NEEDED = 0x2,
WASM_DYLINK_EXPORT_INFO = 0x3,
WASM_DYLINK_IMPORT_INFO = 0x4,
WASM_DYLINK_RUNTIME_PATH = 0x5,
};
// Kind codes used in the custom "linking" section in the WASM_COMDAT_INFO
enum : unsigned {
WASM_COMDAT_DATA = 0x0,
WASM_COMDAT_FUNCTION = 0x1,
// GLOBAL, TAG, and TABLE are in here but LLVM doesn't use them yet.
WASM_COMDAT_SECTION = 0x5,
};
// Kind codes used in the custom "linking" section in the WASM_SYMBOL_TABLE
enum WasmSymbolType : unsigned {
WASM_SYMBOL_TYPE_FUNCTION = 0x0,
WASM_SYMBOL_TYPE_DATA = 0x1,
WASM_SYMBOL_TYPE_GLOBAL = 0x2,
WASM_SYMBOL_TYPE_SECTION = 0x3,
WASM_SYMBOL_TYPE_TAG = 0x4,
WASM_SYMBOL_TYPE_TABLE = 0x5,
};
enum WasmSegmentFlag : unsigned {
WASM_SEG_FLAG_STRINGS = 0x1,
WASM_SEG_FLAG_TLS = 0x2,
WASM_SEG_FLAG_RETAIN = 0x4,
};
// Kinds of tag attributes.
enum WasmTagAttribute : uint8_t {
WASM_TAG_ATTRIBUTE_EXCEPTION = 0x0,
};
const unsigned WASM_SYMBOL_BINDING_MASK = 0x3;
const unsigned WASM_SYMBOL_VISIBILITY_MASK = 0xc;
const unsigned WASM_SYMBOL_BINDING_GLOBAL = 0x0;
const unsigned WASM_SYMBOL_BINDING_WEAK = 0x1;
const unsigned WASM_SYMBOL_BINDING_LOCAL = 0x2;
const unsigned WASM_SYMBOL_VISIBILITY_DEFAULT = 0x0;
const unsigned WASM_SYMBOL_VISIBILITY_HIDDEN = 0x4;
const unsigned WASM_SYMBOL_UNDEFINED = 0x10;
const unsigned WASM_SYMBOL_EXPORTED = 0x20;
const unsigned WASM_SYMBOL_EXPLICIT_NAME = 0x40;
const unsigned WASM_SYMBOL_NO_STRIP = 0x80;
const unsigned WASM_SYMBOL_TLS = 0x100;
const unsigned WASM_SYMBOL_ABSOLUTE = 0x200;
#define WASM_RELOC(name, value) name = value,
enum WasmRelocType : unsigned {
#include "WasmRelocs.def"
};
#undef WASM_RELOC
struct WasmObjectHeader {
StringRef Magic;
uint32_t Version;
};
// Subset of types that a value can have
enum class ValType {
I32 = WASM_TYPE_I32,
I64 = WASM_TYPE_I64,
F32 = WASM_TYPE_F32,
F64 = WASM_TYPE_F64,
V128 = WASM_TYPE_V128,
FUNCREF = WASM_TYPE_FUNCREF,
EXTERNREF = WASM_TYPE_EXTERNREF,
EXNREF = WASM_TYPE_EXNREF,
// Unmodeled value types include ref types with heap types other than
// func, extern or exn, and type-specialized funcrefs
OTHERREF = 0xff,
};
struct WasmDylinkImportInfo {
StringRef Module;
StringRef Field;
uint32_t Flags;
};
struct WasmDylinkExportInfo {
StringRef Name;
uint32_t Flags;
};
struct WasmDylinkInfo {
uint32_t MemorySize; // Memory size in bytes
uint32_t MemoryAlignment; // P2 alignment of memory
uint32_t TableSize; // Table size in elements
uint32_t TableAlignment; // P2 alignment of table
std::vector<StringRef> Needed; // Shared library dependencies
std::vector<WasmDylinkImportInfo> ImportInfo;
std::vector<WasmDylinkExportInfo> ExportInfo;
std::vector<StringRef> RuntimePath;
};
struct WasmProducerInfo {
std::vector<std::pair<std::string, std::string>> Languages;
std::vector<std::pair<std::string, std::string>> Tools;
std::vector<std::pair<std::string, std::string>> SDKs;
};
struct WasmFeatureEntry {
uint8_t Prefix;
std::string Name;
};
struct WasmExport {
StringRef Name;
uint8_t Kind;
uint32_t Index;
};
struct WasmLimits {
uint8_t Flags;
uint64_t Minimum;
uint64_t Maximum;
uint32_t PageSize;
};
struct WasmTableType {
ValType ElemType;
WasmLimits Limits;
};
struct WasmTable {
uint32_t Index;
WasmTableType Type;
StringRef SymbolName; // from the "linking" section
};
struct WasmInitExprMVP {
uint8_t Opcode;
union {
int32_t Int32;
int64_t Int64;
uint32_t Float32;
uint64_t Float64;
uint32_t Global;
} Value;
};
// Extended-const init exprs and exprs with GC types are not explicitly
// modeled, but the raw body of the expr is attached.
struct WasmInitExpr {
uint8_t Extended; // Set to non-zero if extended const is used (i.e. more than
// one instruction)
WasmInitExprMVP Inst;
ArrayRef<uint8_t> Body;
};
struct WasmGlobalType {
uint8_t Type; // TODO: make this a ValType?
bool Mutable;
};
struct WasmGlobal {
uint32_t Index;
WasmGlobalType Type;
WasmInitExpr InitExpr;
StringRef SymbolName; // from the "linking" section
uint32_t Offset; // Offset of the definition in the binary's Global section
uint32_t Size; // Size of the definition in the binary's Global section
};
struct WasmTag {
uint32_t Index;
uint32_t SigIndex;
StringRef SymbolName; // from the "linking" section
};
struct WasmImport {
StringRef Module;
StringRef Field;
uint8_t Kind;
union {
uint32_t SigIndex;
WasmGlobalType Global;
WasmTableType Table;
WasmLimits Memory;
};
};
struct WasmLocalDecl {
uint8_t Type;
uint32_t Count;
};
struct WasmFunction {
uint32_t Index;
uint32_t SigIndex;
std::vector<WasmLocalDecl> Locals;
ArrayRef<uint8_t> Body;
uint32_t CodeSectionOffset;
uint32_t Size;
uint32_t CodeOffset; // start of Locals and Body
std::optional<StringRef> ExportName; // from the "export" section
StringRef SymbolName; // from the "linking" section
StringRef DebugName; // from the "name" section
uint32_t Comdat; // from the "comdat info" section
};
struct WasmDataSegment {
uint32_t InitFlags;
// Present if InitFlags & WASM_DATA_SEGMENT_HAS_MEMINDEX.
uint32_t MemoryIndex;
// Present if InitFlags & WASM_DATA_SEGMENT_IS_PASSIVE == 0.
WasmInitExpr Offset;
ArrayRef<uint8_t> Content;
StringRef Name; // from the "segment info" section
uint32_t Alignment;
uint32_t LinkingFlags;
uint32_t Comdat; // from the "comdat info" section
};
// 3 different element segment modes are encodable. This class is currently
// only used during decoding (see WasmElemSegment below).
enum class ElemSegmentMode { Active, Passive, Declarative };
// Represents a Wasm element segment, with some limitations compared the spec:
// 1) Does not model passive or declarative segments (Segment will end up with
// an Offset field of i32.const 0)
// 2) Does not model init exprs (Segment will get an empty Functions list)
// 3) Does not model types other than basic funcref/externref/exnref (see
// ValType)
struct WasmElemSegment {
uint32_t Flags;
uint32_t TableNumber;
ValType ElemKind;
WasmInitExpr Offset;
std::vector<uint32_t> Functions;
};
// Represents the location of a Wasm data symbol within a WasmDataSegment, as
// the index of the segment, and the offset and size within the segment.
struct WasmDataReference {
uint32_t Segment;
uint64_t Offset;
uint64_t Size;
};
struct WasmRelocation {
uint8_t Type; // The type of the relocation.
uint32_t Index; // Index into either symbol or type index space.
uint64_t Offset; // Offset from the start of the section.
int64_t Addend; // A value to add to the symbol.
WasmRelocType getType() const { return static_cast<WasmRelocType>(Type); }
};
struct WasmInitFunc {
uint32_t Priority;
uint32_t Symbol;
};
struct WasmSymbolInfo {
StringRef Name;
uint8_t Kind;
uint32_t Flags;
// For undefined symbols the module of the import
std::optional<StringRef> ImportModule;
// For undefined symbols the name of the import
std::optional<StringRef> ImportName;
// For symbols to be exported from the final module
std::optional<StringRef> ExportName;
union {
// For function, table, or global symbols, the index in function, table, or
// global index space.
uint32_t ElementIndex;
// For a data symbols, the address of the data relative to segment.
WasmDataReference DataRef;
};
};
enum class NameType {
FUNCTION,
GLOBAL,
DATA_SEGMENT,
};
struct WasmDebugName {
NameType Type;
uint32_t Index;
StringRef Name;
};
// Info from the linking metadata section of a wasm object file.
struct WasmLinkingData {
uint32_t Version;
std::vector<WasmInitFunc> InitFunctions;
std::vector<StringRef> Comdats;
// The linking section also contains a symbol table. This info (represented
// in a WasmSymbolInfo struct) is stored inside the WasmSymbol object instead
// of in this structure; this allows vectors of WasmSymbols and
// WasmLinkingDatas to be reallocated.
};
struct WasmSignature {
SmallVector<ValType, 1> Returns;
SmallVector<ValType, 4> Params;
// LLVM can parse types other than functions encoded in the type section,
// but does not actually model them. Instead a placeholder signature is
// created in the Object's signature list.
enum { Function, Tag, Placeholder } Kind = Function;
// Support empty and tombstone instances, needed by DenseMap.
enum { Plain, Empty, Tombstone } State = Plain;
WasmSignature(SmallVector<ValType, 1> &&InReturns,
SmallVector<ValType, 4> &&InParams)
: Returns(InReturns), Params(InParams) {}
WasmSignature() = default;
};
// Useful comparison operators
inline bool operator==(const WasmSignature &LHS, const WasmSignature &RHS) {
return LHS.State == RHS.State && LHS.Returns == RHS.Returns &&
LHS.Params == RHS.Params;
}
inline bool operator!=(const WasmSignature &LHS, const WasmSignature &RHS) {
return !(LHS == RHS);
}
inline bool operator==(const WasmGlobalType &LHS, const WasmGlobalType &RHS) {
return LHS.Type == RHS.Type && LHS.Mutable == RHS.Mutable;
}
inline bool operator!=(const WasmGlobalType &LHS, const WasmGlobalType &RHS) {
return !(LHS == RHS);
}
inline bool operator==(const WasmLimits &LHS, const WasmLimits &RHS) {
return LHS.Flags == RHS.Flags && LHS.Minimum == RHS.Minimum &&
(LHS.Flags & WASM_LIMITS_FLAG_HAS_MAX ? LHS.Maximum == RHS.Maximum
: true) &&
(LHS.Flags & WASM_LIMITS_FLAG_HAS_PAGE_SIZE
? LHS.PageSize == RHS.PageSize
: true);
}
inline bool operator==(const WasmTableType &LHS, const WasmTableType &RHS) {
return LHS.ElemType == RHS.ElemType && LHS.Limits == RHS.Limits;
}
inline llvm::StringRef sectionTypeToString(uint32_t type) {
#define ECase(X) \
case wasm::WASM_SEC_##X: \
return #X;
switch (type) {
ECase(CUSTOM);
ECase(TYPE);
ECase(IMPORT);
ECase(FUNCTION);
ECase(TABLE);
ECase(MEMORY);
ECase(GLOBAL);
ECase(EXPORT);
ECase(START);
ECase(ELEM);
ECase(CODE);
ECase(DATA);
ECase(DATACOUNT);
ECase(TAG);
default:
llvm_unreachable("unknown section type");
}
#undef ECase
}
} // end namespace wasm
} // end namespace llvm
#endif

View File

@@ -0,0 +1,31 @@
#ifndef WASM_RELOC
#error "WASM_RELOC must be defined"
#endif
WASM_RELOC(R_WASM_FUNCTION_INDEX_LEB, 0)
WASM_RELOC(R_WASM_TABLE_INDEX_SLEB, 1)
WASM_RELOC(R_WASM_TABLE_INDEX_I32, 2)
WASM_RELOC(R_WASM_MEMORY_ADDR_LEB, 3)
WASM_RELOC(R_WASM_MEMORY_ADDR_SLEB, 4)
WASM_RELOC(R_WASM_MEMORY_ADDR_I32, 5)
WASM_RELOC(R_WASM_TYPE_INDEX_LEB, 6)
WASM_RELOC(R_WASM_GLOBAL_INDEX_LEB, 7)
WASM_RELOC(R_WASM_FUNCTION_OFFSET_I32, 8)
WASM_RELOC(R_WASM_SECTION_OFFSET_I32, 9)
WASM_RELOC(R_WASM_TAG_INDEX_LEB, 10)
WASM_RELOC(R_WASM_MEMORY_ADDR_REL_SLEB, 11)
WASM_RELOC(R_WASM_TABLE_INDEX_REL_SLEB, 12)
WASM_RELOC(R_WASM_GLOBAL_INDEX_I32, 13)
WASM_RELOC(R_WASM_MEMORY_ADDR_LEB64, 14)
WASM_RELOC(R_WASM_MEMORY_ADDR_SLEB64, 15)
WASM_RELOC(R_WASM_MEMORY_ADDR_I64, 16)
WASM_RELOC(R_WASM_MEMORY_ADDR_REL_SLEB64, 17)
WASM_RELOC(R_WASM_TABLE_INDEX_SLEB64, 18)
WASM_RELOC(R_WASM_TABLE_INDEX_I64, 19)
WASM_RELOC(R_WASM_TABLE_NUMBER_LEB, 20)
WASM_RELOC(R_WASM_MEMORY_ADDR_TLS_SLEB, 21)
WASM_RELOC(R_WASM_FUNCTION_OFFSET_I64, 22)
WASM_RELOC(R_WASM_MEMORY_ADDR_LOCREL_I32, 23)
WASM_RELOC(R_WASM_TABLE_INDEX_REL_SLEB64, 24)
WASM_RELOC(R_WASM_MEMORY_ADDR_TLS_SLEB64, 25)
WASM_RELOC(R_WASM_FUNCTION_INDEX_I32, 26)

View File

@@ -57,6 +57,8 @@ private:
void scanCOFF(const llvm::object::COFFObjectFile *O);
void scanWasm(const llvm::object::WasmObjectFile *O);
bool isMachOWithPtrAuth() const;
public:

View File

@@ -2311,14 +2311,28 @@ NodePointer NodePrinter::print(NodePointer Node, unsigned depth,
Printer << "merged ";
}
return nullptr;
case Node::Kind::TypeSymbolicReference:
case Node::Kind::TypeSymbolicReference: {
Printer << "type symbolic reference 0x";
Printer.writeHex(Node->getIndex());
if (Node->hasRemoteAddress()) {
auto ra = Node->getRemoteAddress();
Printer.writeHex(ra.first);
Printer << " (" << ra.second << ")";
} else if (Node->hasIndex()) {
Printer.writeHex(Node->getIndex());
}
return nullptr;
case Node::Kind::OpaqueTypeDescriptorSymbolicReference:
}
case Node::Kind::OpaqueTypeDescriptorSymbolicReference: {
Printer << "opaque type symbolic reference 0x";
Printer.writeHex(Node->getIndex());
if (Node->hasRemoteAddress()) {
auto ra = Node->getRemoteAddress();
Printer.writeHex(ra.first);
Printer << " (" << ra.second << ")";
} else if (Node->hasIndex()) {
Printer.writeHex(Node->getIndex());
}
return nullptr;
}
case Node::Kind::DistributedThunk:
if (!Options.ShortenThunk) {
Printer << "distributed thunk ";

View File

@@ -29,6 +29,7 @@
#include "llvm/Object/ELFTypes.h"
#include "llvm/Object/MachOUniversal.h"
#include "llvm/Object/RelocationResolver.h"
#include "llvm/Object/Wasm.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/StringSaver.h"
@@ -266,6 +267,38 @@ void Image::scanCOFF(const llvm::object::COFFObjectFile *O) {
Segments.push_back({HeaderAddress, O->getData()});
}
void Image::scanWasm(const llvm::object::WasmObjectFile *O) {
HeaderAddress = 0;
auto resolver = getRelocationResolver(*O);
auto resolverSupports = resolver.first;
auto resolve = resolver.second;
if (!resolverSupports || !resolve)
return;
for (auto SectionRef : O->sections()) {
auto Section = O->getWasmSection(SectionRef);
for (auto &r : Section.Relocations) {
auto sym = O->symbol_begin();
for (unsigned i = 0; sym != O->symbol_end() && i < r.Index; ++i)
++sym;
if (sym == O->symbol_end())
continue;
auto &wsym = O->getWasmSymbol(*sym);
if (!resolverSupports(r.Type)) {
llvm::errs() << "Unsupported +" << r.Offset << " " << wsym.Info.Name
<< " +" << r.Addend << "\n";
continue;
}
uint64_t offset = resolve(r.Type, r.Offset, 0, 0, r.Addend);
DynamicRelocations.insert({r.Offset, {wsym.Info.Name, offset}});
}
}
Segments.push_back({HeaderAddress, O->getData()});
}
bool Image::isMachOWithPtrAuth() const {
auto macho = dyn_cast<llvm::object::MachOObjectFile>(O);
if (!macho)
@@ -286,6 +319,8 @@ Image::Image(const llvm::object::ObjectFile *O) : O(O) {
scanELF(elf);
} else if (auto coff = dyn_cast<llvm::object::COFFObjectFile>(O)) {
scanCOFF(coff);
} else if (auto wasm = dyn_cast<llvm::object::WasmObjectFile>(O)) {
scanWasm(wasm);
} else {
fputs("unsupported image format\n", stderr);
abort();

File diff suppressed because one or more lines are too long

28
test/Reflection/wasm.test Normal file
View File

@@ -0,0 +1,28 @@
# RUN: %yaml2obj %S/Inputs/wasm.yaml -o %t.wasm
# RUN: %target-swift-reflection-dump --arch=wasi --no-objc-interop %t.wasm | %FileCheck %s
#
# CHECK: FIELDS
# CHECK: t:
# CHECK: ASSOCIATED TYPES
# CHECK: - : simple.P
# CHECK: typealias A = Builtin.Int32
# CHECK: Builtin.Int32
# CHECK: (builtin Builtin.Int32)
#
# simple.swift
# protocol P { associatedtype A }
# struct T {}
# struct S : P {
# typealias A = Builtin.Int32
# let t: T
# }
# sys.c
# void swift_addNewDSOImage() {}
#
# compiled for -target wasm32-unknown-wasip1
# obj2yaml simple.o -o simple.o.yaml
# (replace UNDEFINED symbol with a definition)
# obj2yaml simple.o.yaml -o simple-mod.o
# swiftc simple-mod.o sys.o -o simple.wasm
# yaml2obj simple.wasm -o simple-mod.o
#

View File

@@ -345,6 +345,7 @@ config.llvm_dis = inferSwiftBinary('llvm-dis')
config.llvm_nm = inferSwiftBinary('llvm-nm')
config.llvm_readtapi = inferSwiftBinary('llvm-readtapi')
config.llvm_size = inferSwiftBinary('llvm-size')
config.yaml2obj = inferSwiftBinary('yaml2obj')
config.sourcekitd_test = inferSwiftBinary('sourcekitd-test')
config.complete_test = inferSwiftBinary('complete-test')
config.swift_api_digester = inferSwiftBinary('swift-api-digester')
@@ -745,6 +746,7 @@ config.substitutions.append( ('%llvm-dis', config.llvm_dis) )
config.substitutions.append( ('%llvm-nm', config.llvm_nm) )
config.substitutions.append( ('%llvm-readtapi', config.llvm_readtapi) )
config.substitutions.append( ('%llvm-size', config.llvm_size) )
config.substitutions.append( ('%yaml2obj', config.yaml2obj) )
config.substitutions.append( ('%swift-demangle-yamldump', config.swift_demangle_yamldump) )
config.substitutions.append( ('%swift-demangle', config.swift_demangle) )
config.substitutions.append( ('%Benchmark_O', config.benchmark_o) )

View File

@@ -79,11 +79,13 @@ static llvm::cl::opt<std::string>
llvm::cl::desc("Architecture to inspect in the binary"),
llvm::cl::Required);
#if SWIFT_OBJC_INTEROP
static llvm::cl::opt<bool> DisableObjCInterop(
"no-objc-interop",
llvm::cl::desc("Disable Objective-C interoperability support"));
llvm::cl::desc("Disable Objective-C interoperability support"
#if SWIFT_OBJC_INTEROP
" (not supported)"
#endif
));
} // end namespace options
static int doDumpReflectionSections(ArrayRef<std::string> BinaryFilenames,