Factor out Object File handling from swift-reflection-dump to separate CMake target: StaticMirror

This target will then be used to have a broader set of tools for reading metadata from object files.
This commit is contained in:
Artem Chikin
2022-01-18 14:48:56 -08:00
parent d6d4216371
commit f3d784fd0f
8 changed files with 720 additions and 568 deletions

View File

@@ -0,0 +1,149 @@
//===---------------- ObjectFileCOntext.h - Swift Compiler ---------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2021 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
#ifndef SWIFT_OBJECT_FILE_CONTEXT_H
#define SWIFT_OBJECT_FILE_CONTEXT_H
#include "swift/Reflection/ReflectionContext.h"
namespace llvm {
namespace object {
template <typename Type>
class ELFObjectFile;
class ELFObjectFileBase;
class MachOObjectFile;
} // namespace object
} // namespace llvm
namespace swift {
namespace static_mirror {
using ReadBytesResult = swift::remote::MemoryReader::ReadBytesResult;
class Image {
private:
struct Segment {
uint64_t Addr;
StringRef Contents;
};
const llvm::object::ObjectFile *O;
uint64_t HeaderAddress;
std::vector<Segment> Segments;
struct DynamicRelocation {
StringRef Symbol;
uint64_t Offset;
};
llvm::DenseMap<uint64_t, DynamicRelocation> DynamicRelocations;
void scanMachO(const llvm::object::MachOObjectFile *O);
template <typename ELFT>
void scanELFType(const llvm::object::ELFObjectFile<ELFT> *O);
void scanELF(const llvm::object::ELFObjectFileBase *O);
void scanCOFF(const llvm::object::COFFObjectFile *O);
bool isMachOWithPtrAuth() const;
public:
explicit Image(const llvm::object::ObjectFile *O);
const llvm::object::ObjectFile *getObjectFile() const { return O; }
unsigned getBytesInAddress() const { return O->getBytesInAddress(); }
uint64_t getStartAddress() const { return HeaderAddress; }
uint64_t getEndAddress() const;
StringRef getContentsAtAddress(uint64_t Addr, uint64_t Size) const;
remote::RemoteAbsolutePointer resolvePointer(uint64_t Addr,
uint64_t pointerValue) const;
};
/// MemoryReader that reads from the on-disk representation of an executable
/// or dynamic library image.
///
/// This reader uses a remote addressing scheme where the most significant
/// 16 bits of the address value serve as an index into the array of loaded
/// images, and the low 48 bits correspond to the preferred virtual address
/// mapping of the image.
class ObjectMemoryReader : public reflection::MemoryReader {
struct ImageEntry {
Image TheImage;
uint64_t Slide;
};
std::vector<ImageEntry> Images;
std::pair<const Image *, uint64_t>
decodeImageIndexAndAddress(uint64_t Addr) const;
uint64_t encodeImageIndexAndAddress(const Image *image,
uint64_t imageAddr) const;
StringRef getContentsAtAddress(uint64_t Addr, uint64_t Size);
public:
explicit ObjectMemoryReader(
const std::vector<const llvm::object::ObjectFile *> &ObjectFiles);
ArrayRef<ImageEntry> getImages() const { return Images; }
bool queryDataLayout(DataLayoutQueryType type, void *inBuffer,
void *outBuffer) override;
reflection::RemoteAddress getImageStartAddress(unsigned i) const;
// TODO: We could consult the dynamic symbol tables of the images to
// implement this.
reflection::RemoteAddress getSymbolAddress(const std::string &name) override {
return reflection::RemoteAddress(nullptr);
}
ReadBytesResult readBytes(reflection::RemoteAddress Addr,
uint64_t Size) override;
bool readString(reflection::RemoteAddress Addr, std::string &Dest) override;
remote::RemoteAbsolutePointer resolvePointer(reflection::RemoteAddress Addr,
uint64_t pointerValue) override;
};
using ReflectionContextOwner = std::unique_ptr<void, void (*)(void *)>;
struct ReflectionContextHolder {
ReflectionContextOwner Owner;
reflection::TypeRefBuilder &Builder;
ObjectMemoryReader &Reader;
};
template <typename T>
T unwrap(llvm::Expected<T> value) {
if (value)
return std::move(value.get());
llvm::errs() << "swift-reflection-test error: " << toString(value.takeError())
<< "\n";
exit(EXIT_FAILURE);
}
ReflectionContextHolder makeReflectionContextForObjectFiles(
const std::vector<const llvm::object::ObjectFile *> &objectFiles);
ReflectionContextHolder makeReflectionContextForMetadataReader(
std::shared_ptr<ObjectMemoryReader> reader);
} // end namespace static_mirror
} // end namespace swift
#endif // SWIFT_OBJECT_FILE_CONTEXT_H

View File

@@ -45,6 +45,7 @@ add_subdirectory(SwiftRemoteMirror)
add_subdirectory(SIL) add_subdirectory(SIL)
add_subdirectory(SILGen) add_subdirectory(SILGen)
add_subdirectory(SILOptimizer) add_subdirectory(SILOptimizer)
add_subdirectory(StaticMirror)
add_subdirectory(SymbolGraphGen) add_subdirectory(SymbolGraphGen)
add_subdirectory(Syntax) add_subdirectory(Syntax)
add_subdirectory(SyntaxParse) add_subdirectory(SyntaxParse)

View File

@@ -0,0 +1,8 @@
add_swift_host_library(swiftStaticMirror STATIC
ObjectFileContext.cpp)
target_link_libraries(swiftStaticMirror PRIVATE
swiftFrontend
swiftReflection)

View File

@@ -0,0 +1,547 @@
//===------------ ObjectFileContext.cpp - Swift Compiler ----------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2021 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
#include "swift/StaticMirror/ObjectFileContext.h"
#include "swift/Basic/Unreachable.h"
#include "swift/Demangling/Demangler.h"
#include "swift/Reflection/ReflectionContext.h"
#include "swift/Reflection/TypeLowering.h"
#include "swift/Reflection/TypeRefBuilder.h"
#include "swift/Remote/CMemoryReader.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/MachOUniversal.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/ELF.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Object/ELFTypes.h"
#include "llvm/Object/MachOUniversal.h"
#include "llvm/Object/RelocationResolver.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/StringSaver.h"
#include <sstream>
using namespace llvm::object;
namespace swift {
namespace static_mirror {
// Since ObjectMemoryReader maintains ownership of the ObjectFiles and their
// raw data, we can vend ReadBytesResults with no-op destructors.
static void no_op_destructor(const void *) {}
void Image::scanMachO(const llvm::object::MachOObjectFile *O) {
using namespace llvm::MachO;
HeaderAddress = UINT64_MAX;
// Collect the segment preferred vm mappings.
for (const auto &Load : O->load_commands()) {
if (Load.C.cmd == LC_SEGMENT_64) {
auto Seg = O->getSegment64LoadCommand(Load);
if (Seg.filesize == 0)
continue;
auto contents =
O->getData().slice(Seg.fileoff, Seg.fileoff + Seg.filesize);
if (contents.empty() || contents.size() != Seg.filesize)
continue;
Segments.push_back({Seg.vmaddr, contents});
HeaderAddress = std::min(HeaderAddress, Seg.vmaddr);
} else if (Load.C.cmd == LC_SEGMENT) {
auto Seg = O->getSegmentLoadCommand(Load);
if (Seg.filesize == 0)
continue;
auto contents =
O->getData().slice(Seg.fileoff, Seg.fileoff + Seg.filesize);
if (contents.empty() || contents.size() != Seg.filesize)
continue;
Segments.push_back({Seg.vmaddr, contents});
HeaderAddress = std::min(HeaderAddress, (uint64_t)Seg.vmaddr);
}
}
// Walk through the bindings list to collect all the external references
// in the image.
llvm::Error error = llvm::Error::success();
auto OO = const_cast<llvm::object::MachOObjectFile *>(O);
for (auto bind : OO->bindTable(error)) {
if (error) {
llvm::consumeError(std::move(error));
break;
}
// The offset from the symbol is stored at the target address.
uint64_t Offset;
auto OffsetContent =
getContentsAtAddress(bind.address(), O->getBytesInAddress());
if (OffsetContent.empty())
continue;
if (O->getBytesInAddress() == 8) {
memcpy(&Offset, OffsetContent.data(), sizeof(Offset));
} else if (O->getBytesInAddress() == 4) {
uint32_t OffsetValue;
memcpy(&OffsetValue, OffsetContent.data(), sizeof(OffsetValue));
Offset = OffsetValue;
} else {
assert(false && "unexpected word size?!");
}
DynamicRelocations.insert({bind.address(), {bind.symbolName(), Offset}});
}
if (error) {
llvm::consumeError(std::move(error));
}
}
template <typename ELFT>
void Image::scanELFType(const llvm::object::ELFObjectFile<ELFT> *O) {
using namespace llvm::ELF;
HeaderAddress = UINT64_MAX;
auto phdrs = O->getELFFile().program_headers();
if (!phdrs) {
llvm::consumeError(phdrs.takeError());
}
for (auto &ph : *phdrs) {
if (ph.p_filesz == 0)
continue;
auto contents = O->getData().slice(ph.p_offset, ph.p_offset + ph.p_filesz);
if (contents.empty() || contents.size() != ph.p_filesz)
continue;
Segments.push_back({ph.p_vaddr, contents});
HeaderAddress = std::min(HeaderAddress, (uint64_t)ph.p_vaddr);
}
// Collect the dynamic relocations.
auto resolver = getRelocationResolver(*O);
auto resolverSupports = resolver.first;
auto resolve = resolver.second;
if (!resolverSupports || !resolve)
return;
auto machine = O->getELFFile().getHeader().e_machine;
auto relativeRelocType = llvm::object::getELFRelativeRelocationType(machine);
for (auto &S : static_cast<const llvm::object::ELFObjectFileBase *>(O)
->dynamic_relocation_sections()) {
bool isRela =
O->getSection(S.getRawDataRefImpl())->sh_type == llvm::ELF::SHT_RELA;
for (const llvm::object::RelocationRef &R : S.relocations()) {
// `getRelocationResolver` doesn't handle RELATIVE relocations, so we
// have to do that ourselves.
if (isRela && R.getType() == relativeRelocType) {
auto rela = O->getRela(R.getRawDataRefImpl());
DynamicRelocations.insert(
{R.getOffset(), {{}, HeaderAddress + rela->r_addend}});
continue;
}
if (!resolverSupports(R.getType()))
continue;
auto symbol = R.getSymbol();
auto name = symbol->getName();
if (!name) {
llvm::consumeError(name.takeError());
continue;
}
uint64_t offset = resolve(R.getType(), R.getOffset(), 0, 0, 0);
DynamicRelocations.insert({R.getOffset(), {*name, offset}});
}
}
}
void Image::scanELF(const llvm::object::ELFObjectFileBase *O) {
if (auto le32 =
dyn_cast<llvm::object::ELFObjectFile<llvm::object::ELF32LE>>(O)) {
scanELFType(le32);
} else if (auto be32 =
dyn_cast<llvm::object::ELFObjectFile<llvm::object::ELF32BE>>(
O)) {
scanELFType(be32);
} else if (auto le64 =
dyn_cast<llvm::object::ELFObjectFile<llvm::object::ELF64LE>>(
O)) {
scanELFType(le64);
} else if (auto be64 =
dyn_cast<llvm::object::ELFObjectFile<llvm::object::ELF64BE>>(
O)) {
scanELFType(be64);
} else {
return;
}
// FIXME: ReflectionContext tries to read bits of the ELF structure that
// aren't normally mapped by a phdr. Until that's fixed,
// allow access to the whole file 1:1 in address space that isn't otherwise
// mapped.
Segments.push_back({HeaderAddress, O->getData()});
}
void Image::scanCOFF(const llvm::object::COFFObjectFile *O) {
HeaderAddress = O->getImageBase();
for (auto SectionRef : O->sections()) {
auto Section = O->getCOFFSection(SectionRef);
if (Section->SizeOfRawData == 0)
continue;
auto SectionBase = O->getImageBase() + Section->VirtualAddress;
auto SectionContent =
O->getData().slice(Section->PointerToRawData,
Section->PointerToRawData + Section->SizeOfRawData);
if (SectionContent.empty() ||
SectionContent.size() != Section->SizeOfRawData)
continue;
Segments.push_back({SectionBase, SectionContent});
}
// FIXME: We need to map the header at least, but how much of it does
// Windows typically map?
Segments.push_back({HeaderAddress, O->getData()});
}
bool Image::isMachOWithPtrAuth() const {
auto macho = dyn_cast<llvm::object::MachOObjectFile>(O);
if (!macho)
return false;
auto &header = macho->getHeader();
return header.cputype == llvm::MachO::CPU_TYPE_ARM64 &&
header.cpusubtype == llvm::MachO::CPU_SUBTYPE_ARM64E;
}
Image::Image(const llvm::object::ObjectFile *O) : O(O) {
// Unfortunately llvm doesn't provide a uniform interface for iterating
// loadable segments or dynamic relocations in executable images yet.
if (auto macho = dyn_cast<llvm::object::MachOObjectFile>(O)) {
scanMachO(macho);
} else if (auto elf = dyn_cast<llvm::object::ELFObjectFileBase>(O)) {
scanELF(elf);
} else if (auto coff = dyn_cast<llvm::object::COFFObjectFile>(O)) {
scanCOFF(coff);
} else {
fputs("unsupported image format\n", stderr);
abort();
}
}
uint64_t Image::getEndAddress() const {
uint64_t max = 0;
for (auto &Segment : Segments) {
max = std::max(max, Segment.Addr + Segment.Contents.size());
}
return max;
}
StringRef Image::getContentsAtAddress(uint64_t Addr, uint64_t Size) const {
for (auto &Segment : Segments) {
auto addrInSegment = Segment.Addr <= Addr &&
Addr + Size <= Segment.Addr + Segment.Contents.size();
if (!addrInSegment)
continue;
auto offset = Addr - Segment.Addr;
auto result = Segment.Contents.drop_front(offset);
return result;
}
return {};
}
remote::RemoteAbsolutePointer
Image::resolvePointer(uint64_t Addr, uint64_t pointerValue) const {
auto found = DynamicRelocations.find(Addr);
remote::RemoteAbsolutePointer result;
if (found == DynamicRelocations.end())
// In Mach-O images with ptrauth, the pointer value has an offset from
// the base address in the low 32 bits, and ptrauth discriminator info
// in the top 32 bits.
if (isMachOWithPtrAuth()) {
result = remote::RemoteAbsolutePointer(
"", HeaderAddress + (pointerValue & 0xffffffffull));
} else {
result = remote::RemoteAbsolutePointer("", pointerValue);
}
else
result = remote::RemoteAbsolutePointer(found->second.Symbol,
found->second.Offset);
return result;
}
std::pair<const Image *, uint64_t>
ObjectMemoryReader::decodeImageIndexAndAddress(uint64_t Addr) const {
for (auto &Image : Images) {
if (Image.TheImage.getStartAddress() + Image.Slide <= Addr &&
Addr < Image.TheImage.getEndAddress() + Image.Slide) {
return {&Image.TheImage, Addr - Image.Slide};
}
}
return {nullptr, 0};
}
uint64_t
ObjectMemoryReader::encodeImageIndexAndAddress(const Image *image,
uint64_t imageAddr) const {
auto entry = (const ImageEntry *)image;
return imageAddr + entry->Slide;
}
StringRef ObjectMemoryReader::getContentsAtAddress(uint64_t Addr,
uint64_t Size) {
const Image *image;
uint64_t imageAddr;
std::tie(image, imageAddr) = decodeImageIndexAndAddress(Addr);
if (!image)
return StringRef();
return image->getContentsAtAddress(imageAddr, Size);
}
ObjectMemoryReader::ObjectMemoryReader(
const std::vector<const llvm::object::ObjectFile *> &ObjectFiles) {
if (ObjectFiles.empty()) {
fputs("no object files provided\n", stderr);
abort();
}
unsigned WordSize = 0;
for (const llvm::object::ObjectFile *O : ObjectFiles) {
// All the object files we look at should share a word size.
if (!WordSize) {
WordSize = O->getBytesInAddress();
} else if (WordSize != O->getBytesInAddress()) {
fputs("object files must all be for the same architecture\n", stderr);
abort();
}
Images.push_back({Image(O), 0});
}
// If there is more than one image loaded, try to fit them into one address
// space.
if (Images.size() > 1) {
uint64_t NextAddrSpace = 0;
for (auto &Image : Images) {
Image.Slide = NextAddrSpace - Image.TheImage.getStartAddress();
NextAddrSpace +=
Image.TheImage.getEndAddress() - Image.TheImage.getStartAddress();
NextAddrSpace = (NextAddrSpace + 16383) & ~16383;
}
if (WordSize < 8 && NextAddrSpace > 0xFFFFFFFFu) {
fputs("object files did not fit in address space", stderr);
abort();
}
}
}
bool ObjectMemoryReader::queryDataLayout(DataLayoutQueryType type,
void *inBuffer, void *outBuffer) {
auto wordSize = Images.front().TheImage.getBytesInAddress();
// TODO: The following should be set based on inspecting the image.
// This code sets it to match the platform this code was compiled for.
#if defined(__APPLE__) && __APPLE__
auto applePlatform = true;
#else
auto applePlatform = false;
#endif
#if defined(__APPLE__) && __APPLE__ && \
((defined(TARGET_OS_IOS) && TARGET_OS_IOS) || \
(defined(TARGET_OS_IOS) && TARGET_OS_WATCH) || \
(defined(TARGET_OS_TV) && TARGET_OS_TV) || defined(__arm64__))
auto iosDerivedPlatform = true;
#else
auto iosDerivedPlatform = false;
#endif
switch (type) {
case DLQ_GetPointerSize: {
auto result = static_cast<uint8_t *>(outBuffer);
*result = wordSize;
return true;
}
case DLQ_GetSizeSize: {
auto result = static_cast<uint8_t *>(outBuffer);
*result = wordSize;
return true;
}
case DLQ_GetPtrAuthMask: {
// We don't try to sign pointers at all in our view of the object
// mapping.
if (wordSize == 4) {
auto result = static_cast<uint32_t *>(outBuffer);
*result = (uint32_t)~0ull;
return true;
} else if (wordSize == 8) {
auto result = static_cast<uint64_t *>(outBuffer);
*result = (uint64_t)~0ull;
return true;
}
return false;
}
case DLQ_GetObjCReservedLowBits: {
auto result = static_cast<uint8_t *>(outBuffer);
if (applePlatform && !iosDerivedPlatform && wordSize == 8) {
// Obj-C reserves low bit on 64-bit macOS only.
// Other Apple platforms don't reserve this bit (even when
// running on x86_64-based simulators).
*result = 1;
} else {
*result = 0;
}
return true;
}
case DLQ_GetLeastValidPointerValue: {
auto result = static_cast<uint64_t *>(outBuffer);
if (applePlatform && wordSize == 8) {
// Swift reserves the first 4GiB on 64-bit Apple platforms
*result = 0x100000000;
} else {
// Swift reserves the first 4KiB everywhere else
*result = 0x1000;
}
return true;
}
}
return false;
}
reflection::RemoteAddress
ObjectMemoryReader::getImageStartAddress(unsigned i) const {
assert(i < Images.size());
return reflection::RemoteAddress(encodeImageIndexAndAddress(
&Images[i].TheImage, Images[i].TheImage.getStartAddress()));
}
ReadBytesResult ObjectMemoryReader::readBytes(reflection::RemoteAddress Addr,
uint64_t Size) {
auto addrValue = Addr.getAddressData();
auto resultBuffer = getContentsAtAddress(addrValue, Size);
return ReadBytesResult(resultBuffer.data(), no_op_destructor);
}
bool ObjectMemoryReader::readString(reflection::RemoteAddress Addr,
std::string &Dest) {
auto addrValue = Addr.getAddressData();
auto resultBuffer = getContentsAtAddress(addrValue, 1);
if (resultBuffer.empty())
return false;
// Make sure there's a null terminator somewhere in the contents.
unsigned i = 0;
for (unsigned e = resultBuffer.size(); i < e; ++i) {
if (resultBuffer[i] == 0)
goto found_terminator;
}
return false;
found_terminator:
Dest.append(resultBuffer.begin(), resultBuffer.begin() + i);
return true;
}
remote::RemoteAbsolutePointer
ObjectMemoryReader::resolvePointer(reflection::RemoteAddress Addr,
uint64_t pointerValue) {
auto addrValue = Addr.getAddressData();
const Image *image;
uint64_t imageAddr;
std::tie(image, imageAddr) = decodeImageIndexAndAddress(addrValue);
if (!image)
return remote::RemoteAbsolutePointer();
auto resolved = image->resolvePointer(imageAddr, pointerValue);
if (resolved && resolved.isResolved()) {
// Mix in the image index again to produce a remote address pointing into
// the same image.
return remote::RemoteAbsolutePointer(
"", encodeImageIndexAndAddress(
image, resolved.getResolvedAddress().getAddressData()));
}
// If the pointer is relative to an unresolved relocation, leave it as is.
return resolved;
}
template <typename Runtime>
ReflectionContextHolder makeReflectionContextForMetadataReader(
std::shared_ptr<ObjectMemoryReader> reader) {
using ReflectionContext = reflection::ReflectionContext<Runtime>;
auto context = new ReflectionContext(reader);
auto &builder = context->getBuilder();
for (unsigned i = 0, e = reader->getImages().size(); i < e; ++i) {
context->addImage(reader->getImageStartAddress(i));
}
return {ReflectionContextOwner(
context, [](void *x) { delete (ReflectionContext *)x; }),
builder, *reader};
}
uint8_t queryPointerSize(const std::vector<const ObjectFile *> &objectFiles) {
}
ReflectionContextHolder makeReflectionContextForObjectFiles(
const std::vector<const ObjectFile *> &objectFiles) {
auto Reader = std::make_shared<ObjectMemoryReader>(objectFiles);
uint8_t pointerSize;
Reader->queryDataLayout(DataLayoutQueryType::DLQ_GetPointerSize, nullptr,
&pointerSize);
switch (pointerSize) {
return makeReflectionContextForMetadataReader<
// FIXME: This could be configurable.
#if SWIFT_OBJC_INTEROP
External<WithObjCInterop<RuntimeTarget<4>>>
#else
External<NoObjCInterop<RuntimeTarget<4>>>
#endif
>(std::move(Reader));
case 8:
return makeReflectionContextForMetadataReader<
// FIXME: This could be configurable.
#if SWIFT_OBJC_INTEROP
External<WithObjCInterop<RuntimeTarget<8>>>
#else
External<NoObjCInterop<RuntimeTarget<8>>>
#endif
>(std::move(Reader));
default:
fputs("unsupported word size in object file\n", stderr);
abort();
}
}
} // end namespace static_mirror
} // end namespace swift

View File

@@ -497,14 +497,13 @@ void TypeRefBuilder::dumpCaptureSection(std::ostream &stream) {
} }
/// Given the address of a conformance descriptor, attempt to read it. /// Given the address of a conformance descriptor, attempt to read it.
static void readConformanceDescriptor( static void readConformanceDescriptor(const ExternalProtocolConformanceRecord &record) {
const ExternalProtocolConformanceRecord &record) {
// Read the flags to figure out how much space we should read. // Read the flags to figure out how much space we should read.
ContextDescriptorFlags flags; // ContextDescriptorFlags flags;
if (!Reader->readBytes(RemoteAddress(address), (uint8_t*)&flags, // if (!Reader->readBytes(RemoteAddress(address), (uint8_t*)&flags,
sizeof(flags))) // sizeof(flags)))
return nullptr; // return nullptr;
} }
void TypeRefBuilder::dumpConformanceSection(std::ostream &stream) { void TypeRefBuilder::dumpConformanceSection(std::ostream &stream) {

View File

@@ -208,14 +208,16 @@ swift_reflection_addReflectionInfo(SwiftReflectionContextRef ContextRef,
std::cerr << "reserved field in swift_reflection_info_t is not zero\n"; std::cerr << "reserved field in swift_reflection_info_t is not zero\n";
abort(); abort();
} }
ReflectionInfo ContextInfo{ ReflectionInfo ContextInfo{
sectionFromInfo<FieldDescriptorIterator>(Info, Info.field), sectionFromInfo<FieldDescriptorIterator>(Info, Info.field),
sectionFromInfo<AssociatedTypeIterator>(Info, Info.associated_types), sectionFromInfo<AssociatedTypeIterator>(Info, Info.associated_types),
sectionFromInfo<BuiltinTypeDescriptorIterator>(Info, Info.builtin_types), sectionFromInfo<BuiltinTypeDescriptorIterator>(Info, Info.builtin_types),
sectionFromInfo<CaptureDescriptorIterator>(Info, Info.capture), sectionFromInfo<CaptureDescriptorIterator>(Info, Info.capture),
sectionFromInfo<const void *>(Info, Info.type_references), sectionFromInfo<const void *>(Info, Info.type_references),
sectionFromInfo<const void *>(Info, Info.reflection_strings)}; sectionFromInfo<const void *>(Info, Info.reflection_strings),
// TODO: Conformance section
ConformanceSection(RemoteRef<void>(), 0)};
Context->addReflectionInfo(ContextInfo); Context->addReflectionInfo(ContextInfo);
} }
@@ -234,8 +236,9 @@ void swift_reflection_addReflectionMappingInfo(
reflectionSectionFromLocalAndRemote<CaptureDescriptorIterator>( reflectionSectionFromLocalAndRemote<CaptureDescriptorIterator>(
Info.capture), Info.capture),
reflectionSectionFromLocalAndRemote<const void *>(Info.type_references), reflectionSectionFromLocalAndRemote<const void *>(Info.type_references),
reflectionSectionFromLocalAndRemote<const void *>( reflectionSectionFromLocalAndRemote<const void *>(Info.reflection_strings),
Info.reflection_strings)}; // TODO: Conformance section
ConformanceSection(RemoteRef<void>(), 0)};
Context->addReflectionInfo(ContextInfo); Context->addReflectionInfo(ContextInfo);
} }

View File

@@ -5,4 +5,5 @@ add_swift_host_tool(swift-reflection-dump
) )
target_link_libraries(swift-reflection-dump target_link_libraries(swift-reflection-dump
PRIVATE PRIVATE
swiftStaticMirror
swiftReflection) swiftReflection)

View File

@@ -19,6 +19,7 @@
#include "swift/Reflection/ReflectionContext.h" #include "swift/Reflection/ReflectionContext.h"
#include "swift/Reflection/TypeRef.h" #include "swift/Reflection/TypeRef.h"
#include "swift/Reflection/TypeRefBuilder.h" #include "swift/Reflection/TypeRefBuilder.h"
#include "swift/StaticMirror/ObjectFileContext.h"
#include "llvm/ADT/StringSet.h" #include "llvm/ADT/StringSet.h"
#include "llvm/Object/Archive.h" #include "llvm/Object/Archive.h"
#include "llvm/Object/COFF.h" #include "llvm/Object/COFF.h"
@@ -50,6 +51,7 @@ using namespace llvm::object;
using namespace swift; using namespace swift;
using namespace swift::reflection; using namespace swift::reflection;
using namespace swift::static_mirror;
using namespace swift::remote; using namespace swift::remote;
using namespace Demangle; using namespace Demangle;
@@ -78,564 +80,6 @@ static llvm::cl::opt<std::string>
llvm::cl::Required); llvm::cl::Required);
} // end namespace options } // end namespace options
template <typename T> static T unwrap(llvm::Expected<T> value) {
if (value)
return std::move(value.get());
llvm::errs() << "swift-reflection-test error: " << toString(value.takeError())
<< "\n";
exit(EXIT_FAILURE);
}
using ReadBytesResult = swift::remote::MemoryReader::ReadBytesResult;
// Since ObjectMemoryReader maintains ownership of the ObjectFiles and their
// raw data, we can vend ReadBytesResults with no-op destructors.
static void no_op_destructor(const void*) {}
class Image {
private:
struct Segment {
uint64_t Addr;
StringRef Contents;
};
const ObjectFile *O;
uint64_t HeaderAddress;
std::vector<Segment> Segments;
struct DynamicRelocation {
StringRef Symbol;
uint64_t Offset;
};
llvm::DenseMap<uint64_t, DynamicRelocation> DynamicRelocations;
void scanMachO(const MachOObjectFile *O) {
using namespace llvm::MachO;
HeaderAddress = UINT64_MAX;
// Collect the segment preferred vm mappings.
for (const auto &Load : O->load_commands()) {
if (Load.C.cmd == LC_SEGMENT_64) {
auto Seg = O->getSegment64LoadCommand(Load);
if (Seg.filesize == 0)
continue;
auto contents = O->getData().slice(Seg.fileoff,
Seg.fileoff + Seg.filesize);
if (contents.empty() || contents.size() != Seg.filesize)
continue;
Segments.push_back({Seg.vmaddr, contents});
HeaderAddress = std::min(HeaderAddress, Seg.vmaddr);
} else if (Load.C.cmd == LC_SEGMENT) {
auto Seg = O->getSegmentLoadCommand(Load);
if (Seg.filesize == 0)
continue;
auto contents = O->getData().slice(Seg.fileoff,
Seg.fileoff + Seg.filesize);
if (contents.empty() || contents.size() != Seg.filesize)
continue;
Segments.push_back({Seg.vmaddr, contents});
HeaderAddress = std::min(HeaderAddress, (uint64_t)Seg.vmaddr);
}
}
// Walk through the bindings list to collect all the external references
// in the image.
llvm::Error error = llvm::Error::success();
auto OO = const_cast<MachOObjectFile*>(O);
for (auto bind : OO->bindTable(error)) {
if (error) {
llvm::consumeError(std::move(error));
break;
}
// The offset from the symbol is stored at the target address.
uint64_t Offset;
auto OffsetContent = getContentsAtAddress(bind.address(),
O->getBytesInAddress());
if (OffsetContent.empty())
continue;
if (O->getBytesInAddress() == 8) {
memcpy(&Offset, OffsetContent.data(), sizeof(Offset));
} else if (O->getBytesInAddress() == 4) {
uint32_t OffsetValue;
memcpy(&OffsetValue, OffsetContent.data(), sizeof(OffsetValue));
Offset = OffsetValue;
} else {
assert(false && "unexpected word size?!");
}
DynamicRelocations.insert({bind.address(), {bind.symbolName(), Offset}});
}
if (error) {
llvm::consumeError(std::move(error));
}
}
template<typename ELFT>
void scanELFType(const ELFObjectFile<ELFT> *O) {
using namespace llvm::ELF;
HeaderAddress = UINT64_MAX;
auto phdrs = O->getELFFile().program_headers();
if (!phdrs) {
llvm::consumeError(phdrs.takeError());
}
for (auto &ph : *phdrs) {
if (ph.p_filesz == 0)
continue;
auto contents = O->getData().slice(ph.p_offset,
ph.p_offset + ph.p_filesz);
if (contents.empty() || contents.size() != ph.p_filesz)
continue;
Segments.push_back({ph.p_vaddr, contents});
HeaderAddress = std::min(HeaderAddress, (uint64_t)ph.p_vaddr);
}
// Collect the dynamic relocations.
auto resolver = getRelocationResolver(*O);
auto resolverSupports = resolver.first;
auto resolve = resolver.second;
if (!resolverSupports || !resolve)
return;
auto machine = O->getELFFile().getHeader().e_machine;
auto relativeRelocType = getELFRelativeRelocationType(machine);
for (auto &S : static_cast<const ELFObjectFileBase*>(O)
->dynamic_relocation_sections()) {
bool isRela = O->getSection(S.getRawDataRefImpl())->sh_type
== llvm::ELF::SHT_RELA;
for (const RelocationRef &R : S.relocations()) {
// `getRelocationResolver` doesn't handle RELATIVE relocations, so we
// have to do that ourselves.
if (isRela && R.getType() == relativeRelocType) {
auto rela = O->getRela(R.getRawDataRefImpl());
DynamicRelocations.insert({R.getOffset(),
{{}, HeaderAddress + rela->r_addend}});
continue;
}
if (!resolverSupports(R.getType()))
continue;
auto symbol = R.getSymbol();
auto name = symbol->getName();
if (!name) {
llvm::consumeError(name.takeError());
continue;
}
uint64_t offset = resolve(R.getType(), R.getOffset(), 0, 0, 0);
DynamicRelocations.insert({R.getOffset(), {*name, offset}});
}
}
}
void scanELF(const ELFObjectFileBase *O) {
if (auto le32 = dyn_cast<ELFObjectFile<ELF32LE>>(O)) {
scanELFType(le32);
} else if (auto be32 = dyn_cast<ELFObjectFile<ELF32BE>>(O)) {
scanELFType(be32);
} else if (auto le64 = dyn_cast<ELFObjectFile<ELF64LE>>(O)) {
scanELFType(le64);
} else if (auto be64 = dyn_cast<ELFObjectFile<ELF64BE>>(O)) {
scanELFType(be64);
} else {
return;
}
// FIXME: ReflectionContext tries to read bits of the ELF structure that
// aren't normally mapped by a phdr. Until that's fixed,
// allow access to the whole file 1:1 in address space that isn't otherwise
// mapped.
Segments.push_back({HeaderAddress, O->getData()});
}
void scanCOFF(const COFFObjectFile *O) {
HeaderAddress = O->getImageBase();
for (auto SectionRef : O->sections()) {
auto Section = O->getCOFFSection(SectionRef);
if (Section->SizeOfRawData == 0)
continue;
auto SectionBase = O->getImageBase() + Section->VirtualAddress;
auto SectionContent =
O->getData().slice(Section->PointerToRawData,
Section->PointerToRawData + Section->SizeOfRawData);
if (SectionContent.empty()
|| SectionContent.size() != Section->SizeOfRawData)
continue;
Segments.push_back({SectionBase, SectionContent});
}
// FIXME: We need to map the header at least, but how much of it does
// Windows typically map?
Segments.push_back({HeaderAddress, O->getData()});
}
bool isMachOWithPtrAuth() const {
auto macho = dyn_cast<MachOObjectFile>(O);
if (!macho)
return false;
auto &header = macho->getHeader();
return header.cputype == llvm::MachO::CPU_TYPE_ARM64
&& header.cpusubtype == llvm::MachO::CPU_SUBTYPE_ARM64E;
}
public:
explicit Image(const ObjectFile *O) : O(O) {
// Unfortunately llvm doesn't provide a uniform interface for iterating
// loadable segments or dynamic relocations in executable images yet.
if (auto macho = dyn_cast<MachOObjectFile>(O)) {
scanMachO(macho);
} else if (auto elf = dyn_cast<ELFObjectFileBase>(O)) {
scanELF(elf);
} else if (auto coff = dyn_cast<COFFObjectFile>(O)) {
scanCOFF(coff);
} else {
fputs("unsupported image format\n", stderr);
abort();
}
}
const ObjectFile *getObjectFile() const { return O; }
unsigned getBytesInAddress() const {
return O->getBytesInAddress();
}
uint64_t getStartAddress() const {
return HeaderAddress;
}
uint64_t getEndAddress() const {
uint64_t max = 0;
for (auto &Segment : Segments) {
max = std::max(max, Segment.Addr + Segment.Contents.size());
}
return max;
}
StringRef getContentsAtAddress(uint64_t Addr, uint64_t Size) const {
for (auto &Segment : Segments) {
auto addrInSegment = Segment.Addr <= Addr
&& Addr + Size <= Segment.Addr + Segment.Contents.size();
if (!addrInSegment)
continue;
auto offset = Addr - Segment.Addr;
auto result = Segment.Contents.drop_front(offset);
return result;
}
return {};
}
RemoteAbsolutePointer
resolvePointer(uint64_t Addr, uint64_t pointerValue) const {
auto found = DynamicRelocations.find(Addr);
RemoteAbsolutePointer result;
if (found == DynamicRelocations.end())
// In Mach-O images with ptrauth, the pointer value has an offset from
// the base address in the low 32 bits, and ptrauth discriminator info
// in the top 32 bits.
if (isMachOWithPtrAuth()) {
result = RemoteAbsolutePointer("",
HeaderAddress + (pointerValue & 0xffffffffull));
} else {
result = RemoteAbsolutePointer("", pointerValue);
}
else
result = RemoteAbsolutePointer(found->second.Symbol,
found->second.Offset);
return result;
}
};
/// MemoryReader that reads from the on-disk representation of an executable
/// or dynamic library image.
///
/// This reader uses a remote addressing scheme where the most significant
/// 16 bits of the address value serve as an index into the array of loaded images,
/// and the low 48 bits correspond to the preferred virtual address mapping of
/// the image.
class ObjectMemoryReader : public MemoryReader {
struct ImageEntry {
Image TheImage;
uint64_t Slide;
};
std::vector<ImageEntry> Images;
std::pair<const Image *, uint64_t>
decodeImageIndexAndAddress(uint64_t Addr) const {
for (auto &Image : Images) {
if (Image.TheImage.getStartAddress() + Image.Slide <= Addr
&& Addr < Image.TheImage.getEndAddress() + Image.Slide) {
return {&Image.TheImage, Addr - Image.Slide};
}
}
return {nullptr, 0};
}
uint64_t
encodeImageIndexAndAddress(const Image *image, uint64_t imageAddr) const {
auto entry = (const ImageEntry*)image;
return imageAddr + entry->Slide;
}
StringRef getContentsAtAddress(uint64_t Addr, uint64_t Size) {
const Image *image;
uint64_t imageAddr;
std::tie(image, imageAddr) = decodeImageIndexAndAddress(Addr);
if (!image)
return StringRef();
return image->getContentsAtAddress(imageAddr, Size);
}
public:
explicit ObjectMemoryReader(
const std::vector<const ObjectFile *> &ObjectFiles) {
if (ObjectFiles.empty()) {
fputs("no object files provided\n", stderr);
abort();
}
unsigned WordSize = 0;
for (const ObjectFile *O : ObjectFiles) {
// All the object files we look at should share a word size.
if (!WordSize) {
WordSize = O->getBytesInAddress();
} else if (WordSize != O->getBytesInAddress()) {
fputs("object files must all be for the same architecture\n", stderr);
abort();
}
Images.push_back({Image(O), 0});
}
// If there is more than one image loaded, try to fit them into one address
// space.
if (Images.size() > 1) {
uint64_t NextAddrSpace = 0;
for (auto &Image : Images) {
Image.Slide = NextAddrSpace - Image.TheImage.getStartAddress();
NextAddrSpace +=
Image.TheImage.getEndAddress() - Image.TheImage.getStartAddress();
NextAddrSpace = (NextAddrSpace + 16383) & ~16383;
}
if (WordSize < 8 && NextAddrSpace > 0xFFFFFFFFu) {
fputs("object files did not fit in address space", stderr);
abort();
}
}
}
ArrayRef<ImageEntry> getImages() const { return Images; }
bool queryDataLayout(DataLayoutQueryType type, void *inBuffer,
void *outBuffer) override {
auto wordSize = Images.front().TheImage.getBytesInAddress();
// TODO: The following should be set based on inspecting the image.
// This code sets it to match the platform this code was compiled for.
#if defined(__APPLE__) && __APPLE__
auto applePlatform = true;
#else
auto applePlatform = false;
#endif
#if defined(__APPLE__) && __APPLE__ && ((defined(TARGET_OS_IOS) && TARGET_OS_IOS) || (defined(TARGET_OS_IOS) && TARGET_OS_WATCH) || (defined(TARGET_OS_TV) && TARGET_OS_TV) || defined(__arm64__))
auto iosDerivedPlatform = true;
#else
auto iosDerivedPlatform = false;
#endif
switch (type) {
case DLQ_GetPointerSize: {
auto result = static_cast<uint8_t *>(outBuffer);
*result = wordSize;
return true;
}
case DLQ_GetSizeSize: {
auto result = static_cast<uint8_t *>(outBuffer);
*result = wordSize;
return true;
}
case DLQ_GetPtrAuthMask: {
// We don't try to sign pointers at all in our view of the object
// mapping.
if (wordSize == 4) {
auto result = static_cast<uint32_t *>(outBuffer);
*result = (uint32_t)~0ull;
return true;
} else if (wordSize == 8) {
auto result = static_cast<uint64_t *>(outBuffer);
*result = (uint64_t)~0ull;
return true;
}
return false;
}
case DLQ_GetObjCReservedLowBits: {
auto result = static_cast<uint8_t *>(outBuffer);
if (applePlatform && !iosDerivedPlatform && wordSize == 8) {
// Obj-C reserves low bit on 64-bit macOS only.
// Other Apple platforms don't reserve this bit (even when
// running on x86_64-based simulators).
*result = 1;
} else {
*result = 0;
}
return true;
}
case DLQ_GetLeastValidPointerValue: {
auto result = static_cast<uint64_t *>(outBuffer);
if (applePlatform && wordSize == 8) {
// Swift reserves the first 4GiB on 64-bit Apple platforms
*result = 0x100000000;
} else {
// Swift reserves the first 4KiB everywhere else
*result = 0x1000;
}
return true;
}
}
return false;
}
RemoteAddress getImageStartAddress(unsigned i) const {
assert(i < Images.size());
return RemoteAddress(
encodeImageIndexAndAddress(&Images[i].TheImage,
Images[i].TheImage.getStartAddress()));
}
// TODO: We could consult the dynamic symbol tables of the images to
// implement this.
RemoteAddress getSymbolAddress(const std::string &name) override {
return RemoteAddress(nullptr);
}
ReadBytesResult readBytes(RemoteAddress Addr, uint64_t Size) override {
auto addrValue = Addr.getAddressData();
auto resultBuffer = getContentsAtAddress(addrValue, Size);
return ReadBytesResult(resultBuffer.data(), no_op_destructor);
}
bool readString(RemoteAddress Addr, std::string &Dest) override {
auto addrValue = Addr.getAddressData();
auto resultBuffer = getContentsAtAddress(addrValue, 1);
if (resultBuffer.empty())
return false;
// Make sure there's a null terminator somewhere in the contents.
unsigned i = 0;
for (unsigned e = resultBuffer.size(); i < e; ++i) {
if (resultBuffer[i] == 0)
goto found_terminator;
}
return false;
found_terminator:
Dest.append(resultBuffer.begin(), resultBuffer.begin() + i);
return true;
}
RemoteAbsolutePointer resolvePointer(RemoteAddress Addr,
uint64_t pointerValue) override {
auto addrValue = Addr.getAddressData();
const Image *image;
uint64_t imageAddr;
std::tie(image, imageAddr) =
decodeImageIndexAndAddress(addrValue);
if (!image)
return RemoteAbsolutePointer();
auto resolved = image->resolvePointer(imageAddr, pointerValue);
if (resolved && resolved.isResolved()) {
// Mix in the image index again to produce a remote address pointing into
// the same image.
return RemoteAbsolutePointer("", encodeImageIndexAndAddress(image,
resolved.getResolvedAddress().getAddressData()));
}
// If the pointer is relative to an unresolved relocation, leave it as is.
return resolved;
}
};
using ReflectionContextOwner
= std::unique_ptr<void, void (*)(void*)>;
struct ReflectionContextHolder {
ReflectionContextOwner Owner;
TypeRefBuilder &Builder;
ObjectMemoryReader &Reader;
};
template <typename Runtime>
static ReflectionContextHolder makeReflectionContextForMetadataReader(
std::shared_ptr<ObjectMemoryReader> reader) {
using ReflectionContext = ReflectionContext<Runtime>;
auto context = new ReflectionContext(reader);
auto &builder = context->getBuilder();
for (unsigned i = 0, e = reader->getImages().size(); i < e; ++i) {
context->addImage(reader->getImageStartAddress(i));
}
return {ReflectionContextOwner(
context, [](void *x) { delete (ReflectionContext *)x; }),
builder, *reader};
}
static ReflectionContextHolder makeReflectionContextForObjectFiles(
const std::vector<const ObjectFile *> &objectFiles) {
auto Reader = std::make_shared<ObjectMemoryReader>(objectFiles);
uint8_t pointerSize;
Reader->queryDataLayout(DataLayoutQueryType::DLQ_GetPointerSize,
nullptr, &pointerSize);
switch (pointerSize) {
case 4:
return makeReflectionContextForMetadataReader<
// FIXME: This could be configurable.
#if SWIFT_OBJC_INTEROP
External<WithObjCInterop<RuntimeTarget<4>>>
#else
External<NoObjCInterop<RuntimeTarget<4>>>
#endif
>(std::move(Reader));
case 8:
return makeReflectionContextForMetadataReader<
// FIXME: This could be configurable.
#if SWIFT_OBJC_INTEROP
External<WithObjCInterop<RuntimeTarget<8>>>
#else
External<NoObjCInterop<RuntimeTarget<8>>>
#endif
>(std::move(Reader));
default:
fputs("unsupported word size in object file\n", stderr);
abort();
}
}
static int doDumpReflectionSections(ArrayRef<std::string> BinaryFilenames, static int doDumpReflectionSections(ArrayRef<std::string> BinaryFilenames,
StringRef Arch, ActionType Action, StringRef Arch, ActionType Action,
std::ostream &stream) { std::ostream &stream) {