Files
swift-mirror/tools/swift-reflection-dump/swift-reflection-dump.cpp
Joe Groff bce163608f swift-reflection-dump: Mach-O binding list support
When resolving a pointer value in a Mach-O image, look through the binding list
to see if a symbol address will be added at a given location, and return
an unresolved `RemoteAbsolutePointer` with the symbol name if so.
2019-10-01 12:36:55 -07:00

492 lines
16 KiB
C++

//===--- swift-reflection-dump.cpp - Reflection testing application -------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
// This is a host-side tool to dump remote reflection sections in swift
// binaries.
//===----------------------------------------------------------------------===//
#include "swift/ABI/MetadataValues.h"
#include "swift/Basic/LLVMInitialize.h"
#include "swift/Demangling/Demangle.h"
#include "swift/Reflection/ReflectionContext.h"
#include "swift/Reflection/TypeRef.h"
#include "swift/Reflection/TypeRefBuilder.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/COFF.h"
#include "llvm/Object/ELF.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Object/MachOUniversal.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Error.h"
#if defined(_WIN32)
#include <io.h>
#else
#include <unistd.h>
#endif
#include <algorithm>
#include <csignal>
#include <iostream>
using llvm::ArrayRef;
using llvm::dyn_cast;
using llvm::StringRef;
using namespace llvm::object;
using namespace swift;
using namespace swift::reflection;
using namespace swift::remote;
using namespace Demangle;
enum class ActionType { DumpReflectionSections, DumpTypeLowering };
namespace options {
static llvm::cl::opt<ActionType> Action(
llvm::cl::desc("Mode:"),
llvm::cl::values(
clEnumValN(ActionType::DumpReflectionSections,
"dump-reflection-sections",
"Dump the field reflection section"),
clEnumValN(
ActionType::DumpTypeLowering, "dump-type-lowering",
"Dump the field layout for typeref strings read from stdin")),
llvm::cl::init(ActionType::DumpReflectionSections));
static llvm::cl::list<std::string>
BinaryFilename("binary-filename",
llvm::cl::desc("Filenames of the binary files"),
llvm::cl::OneOrMore);
static llvm::cl::opt<std::string>
Architecture("arch",
llvm::cl::desc("Architecture to inspect in the binary"),
llvm::cl::Required);
} // end namespace options
template <typename T> static T unwrap(llvm::Expected<T> value) {
if (value)
return std::move(value.get());
llvm::errs() << "swift-reflection-test error: " << toString(value.takeError())
<< "\n";
exit(EXIT_FAILURE);
}
using NativeReflectionContext =
swift::reflection::ReflectionContext<External<RuntimeTarget<sizeof(uintptr_t)>>>;
using ReadBytesResult = swift::remote::MemoryReader::ReadBytesResult;
// Since ObjectMemoryReader maintains ownership of the ObjectFiles and their
// raw data, we can vend ReadBytesResults with no-op destructors.
static void no_op_destructor(const void*) {}
class Image {
private:
struct Segment {
uint64_t Addr;
StringRef Contents;
};
uint64_t HeaderAddress;
std::vector<Segment> Segments;
llvm::DenseMap<uint64_t, StringRef> DynamicRelocations;
void scanMachO(const MachOObjectFile *O) {
using namespace llvm::MachO;
HeaderAddress = UINT64_MAX;
// Collect the segment preferred vm mappings.
for (const auto &Load : O->load_commands()) {
if (Load.C.cmd == LC_SEGMENT_64) {
auto Seg = O->getSegment64LoadCommand(Load);
if (Seg.filesize == 0)
continue;
auto contents = O->getData().slice(Seg.fileoff,
Seg.fileoff + Seg.filesize);
if (contents.empty() || contents.size() != Seg.filesize)
continue;
Segments.push_back({Seg.vmaddr, contents});
HeaderAddress = std::min(HeaderAddress, Seg.vmaddr);
} else if (Load.C.cmd == LC_SEGMENT) {
auto Seg = O->getSegmentLoadCommand(Load);
if (Seg.filesize == 0)
continue;
auto contents = O->getData().slice(Seg.fileoff,
Seg.fileoff + Seg.filesize);
if (contents.empty() || contents.size() != Seg.filesize)
continue;
Segments.push_back({Seg.vmaddr, contents});
HeaderAddress = std::min(HeaderAddress, (uint64_t)Seg.vmaddr);
}
}
// Walk through the bindings list to collect all the external references
// in the image.
llvm::Error error = llvm::Error::success();
auto OO = const_cast<MachOObjectFile*>(O);
for (auto bind : OO->bindTable(error)) {
if (error) {
llvm::consumeError(std::move(error));
break;
}
DynamicRelocations.insert({bind.address(), bind.symbolName()});
}
if (error) {
llvm::consumeError(std::move(error));
}
}
template<typename ELFT>
void scanELFType(const ELFObjectFile<ELFT> *O) {
using namespace llvm::ELF;
HeaderAddress = UINT64_MAX;
auto phdrs = O->getELFFile()->program_headers();
if (!phdrs) {
llvm::consumeError(phdrs.takeError());
return;
}
for (auto &ph : *phdrs) {
if (ph.p_filesz == 0)
continue;
auto contents = O->getData().slice(ph.p_offset,
ph.p_offset + ph.p_filesz);
if (contents.empty() || contents.size() != ph.p_filesz)
continue;
Segments.push_back({ph.p_vaddr, contents});
HeaderAddress = std::min(HeaderAddress, (uint64_t)ph.p_vaddr);
}
}
void scanELF(const ELFObjectFileBase *O) {
if (auto le32 = dyn_cast<ELFObjectFile<ELF32LE>>(O)) {
scanELFType(le32);
} else if (auto be32 = dyn_cast<ELFObjectFile<ELF32BE>>(O)) {
scanELFType(be32);
} else if (auto le64 = dyn_cast<ELFObjectFile<ELF64LE>>(O)) {
scanELFType(le64);
} else if (auto be64 = dyn_cast<ELFObjectFile<ELF64BE>>(O)) {
scanELFType(be64);
}
// FIXME: ReflectionContext tries to read bits of the ELF structure that
// aren't normally mapped by a phdr. Until that's fixed,
// allow access to the whole file 1:1 in address space that isn't otherwise
// mapped.
Segments.push_back({HeaderAddress, O->getData()});
}
void scanCOFF(const COFFObjectFile *O) {
HeaderAddress = O->getImageBase();
for (auto SectionRef : O->sections()) {
auto Section = O->getCOFFSection(SectionRef);
if (Section->SizeOfRawData == 0)
continue;
auto SectionBase = O->getImageBase() + Section->VirtualAddress;
auto SectionContent =
O->getData().slice(Section->PointerToRawData,
Section->PointerToRawData + Section->SizeOfRawData);
if (SectionContent.empty()
|| SectionContent.size() != Section->SizeOfRawData)
continue;
Segments.push_back({SectionBase, SectionContent});
}
Segments.push_back({HeaderAddress, O->getData()});
}
public:
explicit Image(const ObjectFile *O) {
// Unfortunately llvm doesn't provide a uniform interface for iterating
// loadable segments or dynamic relocations in executable images yet.
if (auto macho = dyn_cast<MachOObjectFile>(O)) {
scanMachO(macho);
} else if (auto elf = dyn_cast<ELFObjectFileBase>(O)) {
scanELF(elf);
} else if (auto coff = dyn_cast<COFFObjectFile>(O)) {
scanCOFF(coff);
} else {
fputs("unsupported image format\n", stderr);
abort();
}
// ObjectMemoryReader uses the most significant 16 bits of the address to
// index multiple images, so if an object maps stuff out of that range
// we won't be able to read it. 2**48 of virtual address space ought to be
// enough for anyone, but warn if we blow that limit.
for (auto Segment : Segments) {
if (Segment.Addr >= 0xFFFFFFFFFFFFull) {
fputs("warning: segment mapped at address above 2**48\n", stderr);
continue;
}
}
}
uint64_t getStartAddress() const {
return HeaderAddress;
}
StringRef getContentsAtAddress(uint64_t Addr, uint64_t Size) const {
for (auto &Segment : Segments) {
auto addrInSegment = Segment.Addr <= Addr
&& Addr + Size <= Segment.Addr + Segment.Contents.size();
if (!addrInSegment)
continue;
auto offset = Addr - Segment.Addr;
return Segment.Contents.drop_front(offset);
}
return {};
}
RemoteAbsolutePointer
resolvePointer(uint64_t Addr, uint64_t pointerValue) const {
auto found = DynamicRelocations.find(Addr);
if (found == DynamicRelocations.end())
return RemoteAbsolutePointer("", pointerValue);
return RemoteAbsolutePointer(found->second, pointerValue);
}
};
/// MemoryReader that reads from the on-disk representation of an executable
/// or dynamic library image.
///
/// This reader uses a remote addressing scheme where the most significant
/// 16 bits of the address value serve as an index into the array of loaded images,
/// and the low 48 bits correspond to the preferred virtual address mapping of
/// the image.
class ObjectMemoryReader : public MemoryReader {
std::vector<Image> Images;
std::pair<const Image *, uint64_t>
decodeImageIndexAndAddress(uint64_t Addr) const {
unsigned index = Addr >> 48;
if (index >= Images.size())
return {nullptr, 0};
return {&Images[index], Addr & ((1ull << 48) - 1)};
}
uint64_t
encodeImageIndexAndAddress(const Image *image, uint64_t imageAddr) const {
unsigned index = image - Images.data();
return imageAddr | ((uint64_t)index << 48);
}
StringRef getContentsAtAddress(uint64_t Addr, uint64_t Size) {
const Image *image;
uint64_t imageAddr;
std::tie(image, imageAddr) = decodeImageIndexAndAddress(Addr);
if (!image)
return StringRef();
return image->getContentsAtAddress(imageAddr, Size);
}
public:
explicit ObjectMemoryReader(
const std::vector<const ObjectFile *> &ObjectFiles) {
// We use a 16-bit index for images, so can't take more than 64K at once.
if (ObjectFiles.size() >= 0x10000) {
fputs("can't dump more than 65,536 images at once", stderr);
abort();
}
for (const ObjectFile *O : ObjectFiles)
Images.emplace_back(O);
}
ArrayRef<Image> getImages() const { return Images; }
bool queryDataLayout(DataLayoutQueryType type, void *inBuffer,
void *outBuffer) override {
switch (type) {
case DLQ_GetPointerSize: {
auto result = static_cast<uint8_t *>(outBuffer);
*result = sizeof(void *);
return true;
}
case DLQ_GetSizeSize: {
auto result = static_cast<uint8_t *>(outBuffer);
*result = sizeof(size_t);
return true;
}
}
return false;
}
RemoteAddress getImageStartAddress(unsigned i) const {
assert(i < Images.size());
return RemoteAddress(
encodeImageIndexAndAddress(&Images[i], Images[i].getStartAddress()));
}
// TODO: We could consult the dynamic symbol tables of the images to
// implement this.
RemoteAddress getSymbolAddress(const std::string &name) override {
return RemoteAddress(nullptr);
}
ReadBytesResult readBytes(RemoteAddress Addr, uint64_t Size) override {
auto addrValue = Addr.getAddressData();
auto resultBuffer = getContentsAtAddress(addrValue, Size);
return ReadBytesResult(resultBuffer.data(), no_op_destructor);
}
bool readString(RemoteAddress Addr, std::string &Dest) override {
auto addrValue = Addr.getAddressData();
auto resultBuffer = getContentsAtAddress(addrValue, 1);
if (resultBuffer.empty())
return false;
// Make sure there's a null terminator somewhere in the contents.
unsigned i = 0;
for (unsigned e = resultBuffer.size(); i < e; ++i) {
if (resultBuffer[i] == 0)
goto found_terminator;
}
return false;
found_terminator:
Dest.append(resultBuffer.begin(), resultBuffer.begin() + i);
return true;
}
RemoteAbsolutePointer resolvePointer(RemoteAddress Addr,
uint64_t pointerValue) override {
auto addrValue = Addr.getAddressData();
const Image *image;
uint64_t imageAddr;
std::tie(image, imageAddr) =
decodeImageIndexAndAddress(addrValue);
if (!image)
return RemoteAbsolutePointer();
auto resolved = image->resolvePointer(imageAddr, pointerValue);
if (resolved && resolved.isResolved()) {
// Mix in the image index again to produce a remote address pointing into
// the same image.
return RemoteAbsolutePointer("", encodeImageIndexAndAddress(image,
resolved.getResolvedAddress().getAddressData()));
}
// If the pointer is relative to an unresolved relocation, leave it as is.
return resolved;
}
};
static int doDumpReflectionSections(ArrayRef<std::string> BinaryFilenames,
StringRef Arch, ActionType Action,
std::ostream &OS) {
// Note: binaryOrError and objectOrError own the memory for our ObjectFile;
// once they go out of scope, we can no longer do anything.
std::vector<OwningBinary<Binary>> BinaryOwners;
std::vector<std::unique_ptr<ObjectFile>> ObjectOwners;
std::vector<const ObjectFile *> ObjectFiles;
for (const std::string &BinaryFilename : BinaryFilenames) {
auto BinaryOwner = unwrap(createBinary(BinaryFilename));
Binary *BinaryFile = BinaryOwner.getBinary();
// The object file we are doing lookups in -- either the binary itself, or
// a particular slice of a universal binary.
std::unique_ptr<ObjectFile> ObjectOwner;
const ObjectFile *O = dyn_cast<ObjectFile>(BinaryFile);
if (!O) {
auto Universal = cast<MachOUniversalBinary>(BinaryFile);
ObjectOwner = unwrap(Universal->getObjectForArch(Arch));
O = ObjectOwner.get();
}
// Retain the objects that own section memory
BinaryOwners.push_back(std::move(BinaryOwner));
ObjectOwners.push_back(std::move(ObjectOwner));
ObjectFiles.push_back(O);
}
auto Reader = std::make_shared<ObjectMemoryReader>(ObjectFiles);
NativeReflectionContext Context(Reader);
for (unsigned i = 0, e = Reader->getImages().size(); i < e; ++i) {
Context.addImage(Reader->getImageStartAddress(i));
}
switch (Action) {
case ActionType::DumpReflectionSections:
// Dump everything
Context.getBuilder().dumpAllSections(OS);
break;
case ActionType::DumpTypeLowering: {
for (std::string Line; std::getline(std::cin, Line);) {
if (Line.empty())
continue;
if (StringRef(Line).startswith("//"))
continue;
Demangle::Demangler Dem;
auto Demangled = Dem.demangleType(Line);
auto *TypeRef =
swift::Demangle::decodeMangledType(Context.getBuilder(), Demangled);
if (TypeRef == nullptr) {
OS << "Invalid typeref: " << Line << "\n";
continue;
}
TypeRef->dump(OS);
auto *TypeInfo =
Context.getBuilder().getTypeConverter().getTypeInfo(TypeRef);
if (TypeInfo == nullptr) {
OS << "Invalid lowering\n";
continue;
}
TypeInfo->dump(OS);
}
break;
}
}
return EXIT_SUCCESS;
}
int main(int argc, char *argv[]) {
PROGRAM_START(argc, argv);
llvm::cl::ParseCommandLineOptions(argc, argv, "Swift Reflection Dump\n");
return doDumpReflectionSections(options::BinaryFilename,
options::Architecture, options::Action,
std::cout);
}