//===------------------ ExternalGenericMetadataBuilder.cpp ----------------===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See https://swift.org/LICENSE.txt for license information // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===----------------------------------------------------------------------===// #include "ExternalGenericMetadataBuilder.h" #include "swift/ABI/MetadataValues.h" #include "swift/Demangling/Demangle.h" #include "swift/RemoteInspection/Records.h" #include "swift/Runtime/GenericMetadataBuilder.h" #include "swift/Runtime/LibPrespecialized.h" #include "swift/Runtime/PrebuiltStringMap.h" #include "llvm/ADT/StringMap.h" #include "llvm/Object/Binary.h" #include "llvm/Object/MachO.h" #include "llvm/Object/MachOUniversal.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/Error.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/JSON.h" #include #include #include #include #include #include #include #include "swift/Demangling/Demangler.h" namespace swift { // Logging macro. Currently we always enable logs, but we'll want to make this // conditional eventually. LOG_ENABLED can be used to gate work that's needed // for log statements but not for anything else. enum class LogLevel { Detail, Info, Warning, }; static const LogLevel logLevel = LogLevel::Warning; #pragma clang diagnostic ignored "-Wgnu-zero-variadic-macro-arguments" #define LOG(level, fmt, ...) \ do { \ if (level >= logLevel) \ fprintf(stderr, "%s:%d:%s: " fmt "\n", METADATA_BUILDER_LOG_FILE_NAME, \ __LINE__, __func__, __VA_ARGS__); \ } while (0) static const char *nodeKindString(swift::Demangle::Node::Kind k) { switch (k) { #define NODE(ID) \ case Node::Kind::ID: \ return #ID; #include "swift/Demangling/DemangleNodes.def" } return "Demangle::Node::Kind::???"; } // An external runtime target that preserves the static type of pointees. template struct TypedExternal { // using StoredPointer = typename Runtime::StoredPointer; struct StoredPointer { typename Runtime::StoredPointer value; StoredPointer(MetadataKind kind) : value(static_cast(kind)) {} static bool isNull(StoredPointer ptr) { return ptr.value == 0; } }; using StoredSignedPointer = typename Runtime::StoredSignedPointer; using StoredSize = typename Runtime::StoredSize; using StoredPointerDifference = typename Runtime::StoredPointerDifference; static constexpr size_t PointerSize = Runtime::PointerSize; static constexpr bool ObjCInterop = Runtime::ObjCInterop; const StoredPointer PointerValue; template struct TypedInteger { Integer value; }; template struct TypedSignedInteger { Integer SignedValue; }; template using Pointer = TypedInteger; template using SignedPointer = TypedSignedInteger; template using FarRelativeDirectPointer = TypedInteger; struct RelativeIndirectablePointerTag {}; template using RelativeIndirectablePointer = TypedInteger; template using RelativeDirectPointer = TypedInteger; template using CompactFunctionPointer = TypedInteger; StoredPointer getStrippedSignedPointer(const StoredSignedPointer pointer) const { return swift_ptrauth_strip(pointer); } }; template inline MetadataKind getEnumeratedMetadataKind(T kind, decltype(kind.value) dummy = 0) { return getEnumeratedMetadataKind(kind.value); } template NodePointer wrapNode(Demangler &demangler, NodePointer node, Args... kinds) { std::initializer_list kindsList{kinds...}; for (auto iter = std::rbegin(kindsList); iter != std::rend(kindsList); iter++) { auto wrapper = demangler.createNode(*iter); wrapper->addChild(node, demangler); node = wrapper; } return node; } std::string getErrorString(llvm::Error error) { std::string errorString; handleAllErrors(std::move(error), [&](const llvm::ErrorInfoBase &E) { errorString = E.message(); }); return errorString; } struct FixupTarget { // This value in `library` means the target is the containing library. static constexpr int selfLibrary = 0; int library; // Virtual address or symbol name. std::variant target; }; struct MachOFile { std::optional> memoryBuffer; std::unique_ptr objectFile; std::string path; // With the leading '_'. llvm::StringMap symbols; // Guaranteed that getName returns a value. std::vector namedSymbolsSortedByAddress; std::vector exportedSymbolsSortedByAddress; std::vector libraryNames; std::unordered_map fixups; MachOFile(std::optional> &&memoryBuffer, std::unique_ptr &&objectFile, const std::string &path) : memoryBuffer(std::move(memoryBuffer)), objectFile(std::move(objectFile)), path(path) {} }; template class ReaderWriter; template class ExternalGenericMetadataBuilderContext { struct Atom; struct FileTarget { MachOFile *file; uint64_t addressInFile; }; struct AtomTarget { Atom *atom; uint64_t offset; }; struct PointerTarget { template class Buffer; unsigned offset; unsigned size; std::variant fileOrAtom; }; struct Atom { std::string name; std::vector buffer; std::vector pointerTargetsSorted; }; std::vector> atoms; Atom *allocateAtom(size_t size) { auto atom = atoms.emplace_back(new Atom).get(); atom->buffer.resize(size); return atom; } public: using ContextDescriptor = TargetContextDescriptor; using ExtensionContextDescriptor = TargetExtensionContextDescriptor; using ModuleContextDescriptor = TargetModuleContextDescriptor; using ProtocolDescriptor = TargetProtocolDescriptor; using TypeContextDescriptor = TargetTypeContextDescriptor; using Metadata = TargetMetadata; using StoredPointer = typename Runtime::StoredPointer; template using Pointer = typename Runtime::template Pointer; template using SignedPointer = typename Runtime::template SignedPointer; template class Buffer { template friend class Buffer; // Copy constructor, but done as a static function to make errors less // confusing when outside code accidentally tries to initialize a Buffer // with a Buffer. template static Buffer createFrom(const Buffer &other) { Buffer result{other.context}; result.section = other.section; result.ptr = (T *)(void *)other.ptr; result.file = other.file; result.atom = other.atom; return result; } protected: template BuilderErrorOr> resolveVirtualAddressInFile(MachOFile *targetFile, uint64_t vmAddr) { auto section = context->findSectionInFile(targetFile, vmAddr); if (!section) return BuilderError("No section contained resolved address %#" PRIx64, vmAddr); intptr_t targetOffsetInSection = vmAddr - section->getAddress(); auto targetSectionContents = section->getContents(); if (!targetSectionContents) { return BuilderError( "Failed to get target section contents: %s", getErrorString(targetSectionContents.takeError()).c_str()); } intptr_t target = (intptr_t)targetSectionContents->data() + targetOffsetInSection; return Buffer{context, targetFile, *section, (U *)target}; } template BuilderErrorOr> resolveVirtualAddress(uint64_t vmAddr) { return resolveVirtualAddressInFile(file, vmAddr); } template BuilderErrorOr> resolveRelativePointer(const void *ptr, Integer value) { if (Nullable && value == 0) return {{Buffer::null(context)}}; auto pointerVirtualAddress = getVirtualAddress(ptr); if (!pointerVirtualAddress) return *pointerVirtualAddress.getError(); uintptr_t targetVirtualAddress = *pointerVirtualAddress + value; return resolveVirtualAddress(targetVirtualAddress); } BuilderErrorOr getLibraryName(int ordinal) { if (ordinal > 0 && (size_t)ordinal <= file->libraryNames.size()) return file->libraryNames[ordinal - 1]; return BuilderError("ordinal %d is out of bounds", ordinal); } public: ExternalGenericMetadataBuilderContext *context; MachOFile *file = nullptr; llvm::object::SectionRef section; T *ptr; Atom *atom = nullptr; static Buffer null(ExternalGenericMetadataBuilderContext *context) { return Buffer{context}; } Buffer(ExternalGenericMetadataBuilderContext *context) : context(context), ptr(nullptr) {} explicit Buffer(ExternalGenericMetadataBuilderContext *context, MachOFile *file, llvm::object::SectionRef section, T *ptr) : context(context), file(file), section(section), ptr(ptr) {} explicit Buffer(ExternalGenericMetadataBuilderContext *context, MachOFile *file, llvm::object::SectionRef section, llvm::StringRef str) : context(context), file(file), section(section), ptr(reinterpret_cast(str.data())) {} operator bool() { return ptr != nullptr; } bool isNull() { return ptr == nullptr; } Buffer toConst() { return Buffer::createFrom(*this); } template Buffer cast() { return Buffer::createFrom(*this); } Buffer offsetBy(ptrdiff_t offset) const { auto newBuffer = *this; newBuffer.ptr = (T *)((uintptr_t)ptr + offset); return newBuffer; } BuilderErrorOr getVirtualAddress(const void *ptr) { auto sectionContents = section.getContents(); if (!sectionContents) { return BuilderError( "Failed to get section contents: %s", getErrorString(sectionContents.takeError()).c_str()); } intptr_t sectionStartInMemory = (intptr_t)sectionContents->data(); intptr_t pointerAddress = (intptr_t)ptr; uint64_t pointerVirtualAddress = section.getAddress() + pointerAddress - sectionStartInMemory; return pointerVirtualAddress; } // Get the virtual address of ptr, or ~0 if there's an error. Meant for // logging purposes. uint64_t getAddress() { if (auto *address = getVirtualAddress(ptr).getValue()) return *address; return ~0; } template BuilderErrorOr> resolvePointer( const TargetRelativeDirectPointer *ptr) { return resolveRelativePointervalue), Nullable>( ptr, ptr->value); } template BuilderErrorOr> resolvePointer( const TargetRelativeIndirectablePointer *ptr) { bool isDirect = !(ptr->value & 1); auto pointerValue = ptr->value & ~static_castvalue)>(1); auto directBuffer = resolveRelativePointervalue), Nullable>( ptr, pointerValue); if (directBuffer.getError() || isDirect) return directBuffer; // Indirect case: relative pointer to absolute pointer to value. auto indirectAbsolutePointer = directBuffer.getValue()->template cast(); return indirectAbsolutePointer.template resolvePointer( indirectAbsolutePointer.ptr); } template BuilderErrorOr> resolvePointer( const RelativeDirectPointerIntPair *ptr) { return resolveRelativePointer(ptr, ptr->getOffset()); } template BuilderErrorOr> resolvePointer(const StoredPointer *ptr) { if (isNull()) return BuilderError("Tried to resolve pointer %p in null buffer", ptr); if (!file) { return reinterpret_cast *>(this) ->template resolveWritableDataPointer(ptr); } assert(section.getObject()); auto pointerVirtualAddress = getVirtualAddress(ptr); if (!pointerVirtualAddress) return *pointerVirtualAddress.getError(); auto found = file->fixups.find(*pointerVirtualAddress); if (found != file->fixups.end()) { auto target = std::get<1>(*found); if (auto *vmAddr = std::get_if(&target.target)) { if (target.library > 0) { const char *libName = "ordinal too large"; if (auto ptr = getLibraryName(target.library).getValue()) libName = *ptr; BuilderError("Found fixup at %#" PRIx64 " with address target %#" PRIx64 " and library target %s", *pointerVirtualAddress, *vmAddr, libName); } return resolveVirtualAddress(*vmAddr); } auto symbolName = std::get(target.target); MachOFile *targetFile; if (target.library > 0) { auto targetLibraryName = getLibraryName(target.library); if (!targetLibraryName) return *targetLibraryName.getError(); auto found = context->machOFilesByPath.find(*targetLibraryName); if (found == context->machOFilesByPath.end()) return BuilderError("Symbol referenced unknown library '%s'", *targetLibraryName); targetFile = found->getValue(); } else if (target.library == FixupTarget::selfLibrary) { targetFile = file; } else { return BuilderError( "Can't resolve pointer with fixup target ordinal %d", target.library); } auto foundSymbol = targetFile->symbols.find(symbolName); if (foundSymbol == targetFile->symbols.end()) { const char *targetLibraryName; if (auto *ptr = getLibraryName(target.library).getValue()) targetLibraryName = *ptr; else targetLibraryName = file->path.c_str(); return BuilderError( "Symbol referenced unknown symbol '%s' in library '%s'", symbolName.str().c_str(), targetLibraryName); } return context->readerWriter->template getSymbolPointer( symbolName, foundSymbol->getValue(), targetFile); } // We didn't find any fixups for this spot. It might just be NULL. if (ptr->value == 0) return {Buffer::null(context)}; return BuilderError("Attempted to resolve pointer at %#" PRIx64 " with contents %#" PRIx64 " but no fixup exists at this location", *pointerVirtualAddress, (uint64_t)ptr->value); } template BuilderErrorOr> resolvePointer(const SignedPointer *ptr) { return resolvePointer(reinterpret_cast(ptr)); } template BuilderErrorOr> resolvePointer(const Pointer *ptr) { return resolvePointer(reinterpret_cast(ptr)); } template BuilderErrorOr> resolveFunctionPointer(const U *ptr) { static_assert(sizeof(*ptr) == sizeof(StoredPointer)); return resolvePointer(reinterpret_cast(ptr)); } template BuilderErrorOr> resolveFunctionPointer( const TargetCompactFunctionPointer *ptr) { auto result = resolvePointer(ptr); if (auto *error = result.getError()) return *error; return result.getValue()->template cast(); } }; template class WritableData : public Buffer { /// Check that the given pointer lies within memory of this data object. void checkPtr(void *toCheck) { assert((uintptr_t)toCheck - (uintptr_t)this->atom->buffer.data() < this->atom->buffer.size()); } template void writePointerImpl(void *where, Buffer value) { if (!value) { memset(where, 0, sizeof(StoredPointer)); return; } // Write some arbitrary nonzero data so that null checks work with != 0. // This value won't make it out into the serialized data. memset(where, 0x5a, sizeof(StoredPointer)); PointerTarget target = {}; target.offset = (uintptr_t)where - (uintptr_t)this->atom->buffer.data(); assert(target.offset + sizeof(StoredPointer) <= this->atom->buffer.size()); target.size = sizeof(StoredPointer); if (auto *file = value.file) { auto contents = value.section.getContents(); if (!contents) { fprintf(stderr, "section.getContents should never fail, but it did: %s", getErrorString(contents.takeError()).c_str()); abort(); } auto addressInFile = (uintptr_t)value.ptr - (uintptr_t)contents->data() + value.section.getAddress(); target.fileOrAtom = FileTarget{value.file, addressInFile}; } else { auto offsetInValue = (uintptr_t)value.ptr - (uintptr_t)value.atom->buffer.data(); target.fileOrAtom = AtomTarget{value.atom, offsetInValue}; } auto &targets = this->atom->pointerTargetsSorted; auto compare = [](const PointerTarget &a, const PointerTarget &b) { return a.offset < b.offset; }; auto insertionPoint = std::upper_bound(targets.begin(), targets.end(), target, compare); // Check to see if there's already a target at this offset, and overwrite // it if so. if (insertionPoint > targets.begin()) { auto prevPoint = insertionPoint - 1; if (prevPoint->offset == target.offset) { *prevPoint = target; return; } } targets.insert(insertionPoint, target); return; } public: WritableData(ExternalGenericMetadataBuilderContext *context, Atom *atom) : Buffer(context, {}, {}, reinterpret_cast(atom->buffer.data())) { this->atom = atom; } void setName(const std::string &str) { this->atom->name = str; } template BuilderErrorOr> resolveWritableDataPointer(const StoredPointer *ptr) { unsigned offset = (uintptr_t)ptr - (uintptr_t)this->atom->buffer.data(); assert(offset + sizeof(StoredPointer) <= this->atom->buffer.size()); auto &targets = this->atom->pointerTargetsSorted; auto compare = [](const PointerTarget &a, const unsigned &b) { return a.offset < b; }; auto found = std::lower_bound(targets.begin(), targets.end(), offset, compare); if (found == targets.end() || found->offset != offset) { if (ptr->value == 0) return Buffer::null(this->context); return BuilderError( "Asked to resolve non-null pointer %#" PRIx64 " at offset %u in writable data with no target at that offset", (uint64_t)ptr->value, offset); } if (auto *fileTarget = std::get_if(&found->fileOrAtom)) return this->template resolveVirtualAddressInFile( fileTarget->file, fileTarget->addressInFile); auto atomTarget = std::get(found->fileOrAtom); WritableData buffer{this->context, atomTarget.atom}; return buffer.offsetBy(atomTarget.offset); } template void writePointer(Pointer *where, Buffer value) { checkPtr(where); where->value.value = ~(uintptr_t)value.ptr; writePointerImpl(where, value); } // SignedPointer is templated on the pointer type, not the pointee type like // the other pointer templates. template void writePointer(SignedPointer *where, Buffer value) { checkPtr(where); where->SignedValue.value = ~(uintptr_t)value.ptr; writePointerImpl(where, value); } template void writePointer(StoredPointer *where, Buffer value) { checkPtr(where); where->value = ~(uintptr_t)value.ptr; writePointerImpl(where, value); } void writeFunctionPointer(void *where, Buffer target) { writePointer(reinterpret_cast(where), target); } }; ExternalGenericMetadataBuilderContext() { readerWriter.reset(new ReaderWriter{this}); } ExternalGenericMetadataBuilderContext( const ExternalGenericMetadataBuilderContext &) = delete; ExternalGenericMetadataBuilderContext & operator=(const ExternalGenericMetadataBuilderContext &) = delete; template WritableData allocate(size_t size) { auto atom = allocateAtom(size); return WritableData{this, atom}; } template WritableData allocateArray(size_t count) { return allocate(count * sizeof(T)); } void setArch(const char *arch) { this->arch = arch; } void setNamesToBuild(const std::vector &names) { this->mangledNamesToBuild = names; } BuilderErrorOr buildDemanglingForContext(Buffer descriptorBuffer, Demangler &dem); llvm::Error addImage(std::unique_ptr &&binary, std::optional> &&memoryBuffer, const std::string &path); llvm::Error addImageAtPath(const std::string &path); llvm::Error addImageInMemory(const void *start, uint64_t length, const std::string &path); BuilderErrorOr addImagesInPath(std::string path); BuilderErrorOr> metadataForNode(swift::Demangle::NodePointer Node); std::optional> findSymbol(llvm::StringRef name, MachOFile *searchFile); std::optional findSectionInFile(MachOFile *file, uint64_t address); std::optional findPointerInFiles(const void *ptr); void build(); void writeOutput(llvm::json::OStream &J, unsigned platform, const std::string &platformVersion); void logDescriptorMap(); private: using Builder = GenericMetadataBuilder>; using ConstructedMetadata = typename Builder::ConstructedMetadata; void cacheDescriptor(Buffer descriptorBuffer); void addImage(MachOFile *file); void populateMachOSymbols(MachOFile *file); void populateMachOFixups(MachOFile *file); void readMachOSections(MachOFile *file); BuilderErrorOr> constructMetadataForMangledTypeName(llvm::StringRef typeName); BuilderErrorOr> constructMetadataForNode(swift::Demangle::NodePointer Node); Buffer serializeMetadataMapTable(void); template void writeAtomContentsJSON(llvm::json::OStream &J, const Atom &atom, const SymbolCallback &symbolCallback); void writeDylibsJSON( llvm::json::OStream &J, std::unordered_map> &symbolReferences); void writeJSONSerialization(llvm::json::OStream &J, unsigned platform, const std::string &platformVersion); // The architecture being targeted. std::string arch; // The readerWriter and builder helper objects. std::unique_ptr> readerWriter; std::unique_ptr builder; // The mangled names we're building metadata for. std::vector mangledNamesToBuild; // Map from standardized mangled names to the corresponding type descriptors. std::unordered_map> mangledNominalTypeDescriptorMap; // Map from mangled type names to the built metadata. std::unordered_map builtMetadataMap; // All of the MachOFile objects we're working with. std::vector> machOFiles; // A map from paths (install names) to MachOFile objects. llvm::StringMap machOFilesByPath; // A map from symbol names (including _ prefix) to file and SymbolRef. llvm::StringMap> allSymbols; swift::Demangle::Context demangleCtx; }; template class ReaderWriter { ExternalGenericMetadataBuilderContext *context; bool sectionContainsAddress(llvm::object::SectionRef section, uint64_t addr) { return section.getAddress() <= addr && addr < section.getAddress() + section.getSize(); } std::optional getNearestSymbol(MachOFile *file, uint64_t addr, const std::vector &in) { auto comparator = [&](uint64_t addr, const llvm::object::SymbolRef &sym) { auto symAddr = sym.getAddress(); if (!symAddr) return true; return addr < *symAddr; }; auto foundSymbol = std::upper_bound(in.begin(), in.end(), addr, comparator); // We found the first symbol with address greater than our target. Back // up one to find the symbol with address less than or equal to our // target. if (foundSymbol != in.begin()) foundSymbol--; if (foundSymbol == in.end()) return {}; // Make sure the symbol is in the same section as the target. auto section = foundSymbol->getSection(); if (!section) return {}; if (!sectionContainsAddress(*section.get(), addr)) { // Try the following symbol and use it if it's in the same section as the // target. foundSymbol++; if (foundSymbol == in.end()) return {}; auto section = foundSymbol->getSection(); if (!section) return {}; if (!sectionContainsAddress(*section.get(), addr)) return {}; } return *foundSymbol; } public: using Runtime = RuntimeT; using MetadataContext = ExternalGenericMetadataBuilderContext; using Metadata = typename MetadataContext::Metadata; using GenericArgument = typename MetadataContext::template Buffer; template using Buffer = typename MetadataContext::template Buffer; template using WritableData = typename MetadataContext::template WritableData; struct SymbolInfo { std::string symbolName; std::string libraryName; uint64_t pointerOffset; }; class ResolveToDemangling { ReaderWriter &readerWriter; Demangle::Demangler &dem; public: ResolveToDemangling(ReaderWriter &readerWriter, Demangle::Demangler &dem) : readerWriter(readerWriter), dem(dem) {} Demangle::NodePointer operator()(Demangle::SymbolicReferenceKind kind, Demangle::Directness directness, int32_t offset, const void *base) { auto maybeBuffer = readerWriter.getBufferForPointerInFile< TargetRelativeDirectPointer>(base); if (auto *error = maybeBuffer.getError()) { LOG(LogLevel::Warning, "Failed to find buffer for symbolic reference at %p offset " "%" PRId32, base, offset); return nullptr; } auto buffer = maybeBuffer.getValue(); auto target = buffer->resolvePointer(buffer->ptr); if (auto *error = target.getError()) { LOG(LogLevel::Warning, "Failed to resolve symbolic reference at %p offset %" PRId32, base, offset); return nullptr; } if (directness == Directness::Indirect) { auto castBuffer = target.getValue() ->template cast(); target = castBuffer.resolvePointer(castBuffer.ptr); if (auto *error = target.getError()) { LOG(LogLevel::Warning, "Failed to resolve indirect symbolic reference at %p offset " "%" PRId32, base, offset); return nullptr; } } switch (kind) { case Demangle::SymbolicReferenceKind::Context: { auto contextBuffer = target.getValue() ->template cast>(); auto demangleNode = readerWriter.context->buildDemanglingForContext(contextBuffer, dem); if (auto *error = demangleNode.getError()) { LOG(LogLevel::Warning, "Failed to build demangling for symbolic reference at %p offset " "%" PRId32, base, offset); return nullptr; } return *demangleNode.getValue(); } default: LOG(LogLevel::Warning, "Don't know how to handle symbolic reference kind %u", (unsigned)kind); return nullptr; } } }; ReaderWriter(ExternalGenericMetadataBuilderContext *context) : context(context) {} bool isLoggingEnabled() { return true; } SWIFT_FORMAT(5, 6) void log(const char *filename, unsigned line, const char *function, const char *fmt, ...) { va_list args; va_start(args, fmt); fprintf(stderr, "%s:%u:%s: ", filename, line, function); vfprintf(stderr, fmt, args); fputs("\n", stderr); va_end(args); } template BuilderErrorOr> getSymbolPointer(const char *name) { return getSymbolPointer(llvm::StringRef{name}); } template BuilderErrorOr> getSymbolPointer(llvm::StringRef name, MachOFile *searchFile = nullptr) { auto result = context->findSymbol(name, searchFile); if (!result) return BuilderError("Could not find symbol '%s'", name.str().c_str()); auto [file, symbol] = *result; return getSymbolPointer(name, symbol, file); } template BuilderErrorOr> getSymbolPointer(llvm::StringRef name, llvm::object::SymbolRef symbol, MachOFile *file) { auto expectedAddress = symbol.getAddress(); if (!expectedAddress) { return BuilderError("Could not get address of symbol '%s': %s", name.str().c_str(), getErrorString(expectedAddress.takeError()).c_str()); } auto address = expectedAddress.get(); auto expectedSection = symbol.getSection(); if (!expectedSection) { return BuilderError("Could not get section of symbol '%s': %s", name.str().c_str(), getErrorString(expectedSection.takeError()).c_str()); } auto section = expectedSection.get(); if (section == file->objectFile->section_end()) return BuilderError("Could not get section of symbol '%s' (%#" PRIx64 "), it is undefined or absolute", name.str().c_str(), address); auto sectionAddress = section->getAddress(); if (address < sectionAddress) return BuilderError("Symbol '%s' has address %#" PRIx64 " < section address %#" PRIx64, name.str().c_str(), address, sectionAddress); auto expectedSectionContents = section->getContents(); if (!expectedSectionContents) return BuilderError( "Could not get section contents for symbol '%s': %s", name.str().c_str(), getErrorString(expectedSectionContents.takeError()).c_str()); auto sectionContents = expectedSectionContents.get(); auto delta = address - sectionAddress; if (delta >= sectionContents.size()) return BuilderError( "Symbol '%s' has address %#" PRIx64 " past the end of its section, start address %#" PRIx64 " length %zu", name.str().c_str(), address, sectionAddress, sectionContents.size()); auto symbolContents = sectionContents.drop_front(delta); Buffer buffer{context, file, *section, symbolContents}; return {buffer}; } template BuilderErrorOr> getBufferForPointerInFile(const void *ptr) { auto maybeFile = context->findPointerInFiles(ptr); if (!maybeFile) return BuilderError("Pointer %p is not in any loaded mach-o file", ptr); auto file = *maybeFile; auto target = (uintptr_t)ptr; bool hadErrors = false; for (auto §ion : file->objectFile->sections()) { auto contents = section.getContents(); if (!contents) { consumeError(contents.takeError()); hadErrors = true; continue; } auto start = (uintptr_t)contents->begin(); auto end = (uintptr_t)contents->end(); if (start <= target && target < end) return Buffer{context, file, section, (const T *)ptr}; } return BuilderError( "Pointer %p is in file %s but is not in any section%s", ptr, file->path.c_str(), hadErrors ? " (errors occurred when fetching section contents)" : ""); } BuilderErrorOr substituteGenericParameters( NodePointer node, NodePointer metadataMangleNode, WritableData> containingMetadataBuffer) { if (node->getKind() == Demangle::Node::Kind::DependentGenericParamType) { auto depth = node->getChild(0)->getIndex(); auto index = node->getChild(1)->getIndex(); unsigned foundDepth = 0; NodePointer cursor = metadataMangleNode; while (cursor) { switch (cursor->getKind()) { case Node::Kind::BoundGenericClass: case Node::Kind::BoundGenericStructure: case Node::Kind::BoundGenericEnum: case Node::Kind::BoundGenericProtocol: case Node::Kind::BoundGenericTypeAlias: case Node::Kind::BoundGenericFunction: case Node::Kind::BoundGenericOtherNominalType: if (foundDepth == depth) { auto typeList = cursor->getChild(1); if (!typeList) return BuilderError( "Requested generic parameter at depth=%" PRIu64 " index=%" PRIu64 ", BoundGeneric node does not have two children", depth, index); if (typeList->getKind() != Node::Kind::TypeList) return BuilderError( "Requested generic parameter at depth=%" PRIu64 " index=%" PRIu64 ", child 1 of BoundGeneric is %s, not TypeList", depth, index, nodeKindString(typeList->getKind())); auto child = typeList->getChild(index); if (!child) return BuilderError( "Requested generic parameter at depth=%" PRIu64 " index=%" PRIu64 ", but bound generic TypeList only has %zu children", depth, index, typeList->getNumChildren()); return child; } foundDepth++; cursor = cursor->getFirstChild(); break; default: cursor = cursor->getFirstChild(); break; } } return BuilderError("Requested generic parameter at depth=%" PRIu64 " index=%" PRIu64 ", but maximum depth was %u", depth, index, foundDepth); } for (size_t i = 0; i < node->getNumChildren(); i++) { auto child = node->getChild(i); auto newChild = substituteGenericParameters(child, metadataMangleNode, containingMetadataBuffer); if (!newChild) return *newChild.getError(); if (*newChild != child) node->replaceChild(i, *newChild); } return node; } BuilderErrorOr> getTypeByMangledName( WritableData> containingMetadataBuffer, NodePointer metadataMangleNode, llvm::StringRef mangledTypeName) { Demangle::Demangler dem; auto node = dem.demangleType(mangledTypeName, ResolveToDemangling(*this, dem)); LOG(LogLevel::Detail, "%s", getNodeTreeAsString(node).c_str()); auto substituted = substituteGenericParameters(node, metadataMangleNode, containingMetadataBuffer); if (!substituted) return *substituted.getError(); LOG(LogLevel::Detail, "%s", getNodeTreeAsString(*substituted).c_str()); return context->metadataForNode(*substituted); } /// Get info about the symbol corresponding to the given buffer. If no /// information can be retrieved, the result is filled with "" /// strings and a 0 offset. template SymbolInfo getSymbolInfo(Buffer buffer) { SymbolInfo result = {"", "", 0}; if (buffer.file) { result.libraryName = buffer.file->path; if (auto *address = buffer.getVirtualAddress(buffer.ptr).getValue()) { auto symbol = getNearestSymbol(buffer.file, *address); if (symbol) { if (auto name = symbol->getName()) result.symbolName = *name; result.pointerOffset = *address; } } } return result; } // Find the nearest exported symbol before the target, if possible. Finds the // first symbol after the target if there are none before. If the file // contains no symbols, returns an empty SymbolRef. std::optional getNearestExportedSymbol(MachOFile *file, uint64_t addr) { return getNearestSymbol(file, addr, file->exportedSymbolsSortedByAddress); } // Find the nearest symbol before the target, if possible. Finds the first // symbol after the target if there are none before. If the file contains no // symbols, returns an empty SymbolRef. std::optional getNearestSymbol(MachOFile *file, uint64_t addr) { return getNearestSymbol(file, addr, file->namedSymbolsSortedByAddress); } template WritableData allocate(size_t size) { return context->template allocate(size); } }; // Produces the standard mangled name for a prespecialized metadata that we'll // use as the key to the metadata map table. static BuilderErrorOr _standardMangledNameForNode(Demangle::NodePointer typeNode, Demangler &demangler) { // Wrap the type in a global node to match the mangling we get from the input. if (typeNode->getKind() != Node::Kind::Global) { typeNode = wrapNode(demangler, typeNode, Node::Kind::Global); } auto resolver = [](SymbolicReferenceKind kind, const void *ref) -> NodePointer { abort(); }; auto mangling = Demangle::mangleNode(typeNode, resolver, demangler); if (!mangling.isSuccess()) return BuilderError("Failed to mangle node: %s", getNodeTreeAsString(typeNode).c_str()); return mangling.result().str(); } static std::string getInstallName(llvm::object::MachOObjectFile *objectFile) { for (auto &command : objectFile->load_commands()) { if (command.C.cmd == llvm::MachO::LC_ID_DYLIB) { auto dylibCommand = objectFile->getDylibIDLoadCommand(command); return command.Ptr + dylibCommand.dylib.name; } } return ""; } template BuilderErrorOr ExternalGenericMetadataBuilderContext::buildDemanglingForContext( Buffer descriptorBuffer, Demangler &dem) { NodePointer node = nullptr; // Walk up the context tree. llvm::SmallVector, 8> descriptorPath; { Buffer parent = descriptorBuffer; while (parent) { descriptorPath.push_back(parent); // Temporary shenanigans to resolve the weird pointer type until we figure // out the right template whatnot to make it work properly. auto parentFieldPointer = &parent.ptr->Parent; auto parentFieldPointerCast = (const TargetRelativeIndirectablePointer *)parentFieldPointer; auto newParent = parent.resolvePointer(parentFieldPointerCast); if (!newParent) return *newParent.getError(); parent = *newParent; } } for (auto component : llvm::reverse(descriptorPath)) { switch (auto kind = component.ptr->getKind()) { case ContextDescriptorKind::Module: { if (node != nullptr) return BuilderError("Building demangling for context, found module not " "at the top level"); auto moduleDescriptor = component.template cast(); auto name = moduleDescriptor.resolvePointer(&moduleDescriptor.ptr->Name); if (!name) return *name.getError(); node = dem.createNode(Node::Kind::Module, name->ptr); break; } case ContextDescriptorKind::Extension: { auto extension = component.template cast(); auto mangledExtendedContextBuffer = extension.resolvePointer(&extension.ptr->ExtendedContext); if (!mangledExtendedContextBuffer) return *mangledExtendedContextBuffer.getError(); auto mangledExtendedContext = Demangle::makeSymbolicMangledNameStringRef( mangledExtendedContextBuffer->ptr); typename ReaderWriter::ResolveToDemangling resolver{ *readerWriter, dem}; auto selfType = dem.demangleType(mangledExtendedContext, resolver); if (selfType->getKind() == Node::Kind::Type) selfType = selfType->getFirstChild(); if (selfType->getKind() == Node::Kind::BoundGenericEnum || selfType->getKind() == Node::Kind::BoundGenericStructure || selfType->getKind() == Node::Kind::BoundGenericClass || selfType->getKind() == Node::Kind::BoundGenericOtherNominalType) { // TODO: Use the unsubstituted type if we can't handle the // substitutions yet. selfType = selfType->getChild(0)->getChild(0); } auto extNode = dem.createNode(Node::Kind::Extension); extNode->addChild(node, dem); extNode->addChild(selfType, dem); // TODO: Turn the generic signature into a demangling as the third // generic argument. node = extNode; break; } case ContextDescriptorKind::Protocol: { auto protocol = component.template cast(); auto name = protocol.resolvePointer(&protocol.ptr->Name); if (!name) return *name.getError(); auto protocolNode = dem.createNode(Node::Kind::Protocol); protocolNode->addChild(node, dem); auto nameNode = dem.createNode(Node::Kind::Identifier, name->ptr); protocolNode->addChild(nameNode, dem); node = protocolNode; break; } default: // Form a type context demangling for type contexts. if (auto type = llvm::dyn_cast(component.ptr)) { if (type->getTypeContextDescriptorFlags().hasImportInfo()) return BuilderError("Don't know how to build demangling for type " "context descriptor with import info"); Node::Kind nodeKind; switch (kind) { case ContextDescriptorKind::Class: nodeKind = Node::Kind::Class; break; case ContextDescriptorKind::Struct: nodeKind = Node::Kind::Structure; break; case ContextDescriptorKind::Enum: nodeKind = Node::Kind::Enum; break; default: // We don't know about this kind of type. Use an "other type" mangling // for it. nodeKind = Node::Kind::OtherNominalType; break; } auto name = component.resolvePointer(&type->Name); if (!name) return *name.getError(); auto typeNode = dem.createNode(nodeKind); typeNode->addChild(node, dem); auto nameNode = dem.createNode(Node::Kind::Identifier, name->ptr); typeNode->addChild(nameNode, dem); node = typeNode; break; } return BuilderError( "Don't know how to build demangling for descriptor kind %hhu", static_cast(kind)); } } return wrapNode(dem, node, Node::Kind::Type); } template llvm::Error ExternalGenericMetadataBuilderContext::addImage( std::unique_ptr &&binary, std::optional> &&memoryBuffer, const std::string &path) { if (isa(binary.get())) { auto objectFile = cast(std::move(binary)); auto filetype = objectFile->getHeader().filetype; // Do we want to support bundles? if (filetype != llvm::MachO::MH_DYLIB && filetype != llvm::MachO::MH_EXECUTE) return llvm::createStringError(llvm::inconvertibleErrorCode(), "Unsupported mach-o filetype %" PRIu32, filetype); LOG(LogLevel::Info, "Loaded %p from %s", objectFile.get(), path.c_str()); MachOFile *file = new MachOFile{std::move(memoryBuffer), std::move(objectFile), path}; addImage(file); return llvm::Error::success(); } if (auto universal = dyn_cast(binary.get())) { auto objectForArch = universal->getMachOObjectForArch(arch); if (!objectForArch) return objectForArch.takeError(); auto objectFile = objectForArch.get().get(); LOG(LogLevel::Info, "Loaded %p from %s", objectFile, path.c_str()); MachOFile *file = new MachOFile{std::move(memoryBuffer), std::move(objectForArch.get()), path}; addImage(file); return llvm::Error::success(); } return llvm::createStringError(llvm::inconvertibleErrorCode(), "Unknown binary type."); } template llvm::Error ExternalGenericMetadataBuilderContext::addImageAtPath( const std::string &path) { LOG(LogLevel::Info, "Adding %s", path.c_str()); auto binaryOwner = llvm::object::createBinary(path); if (!binaryOwner) { return binaryOwner.takeError(); } auto [binary, buffer] = binaryOwner->takeBinary(); return addImage(std::move(binary), std::optional(std::move(buffer)), path); } template llvm::Error ExternalGenericMetadataBuilderContext::addImageInMemory( const void *start, uint64_t length, const std::string &path) { LOG(LogLevel::Info, "Adding %s in memory", path.c_str()); llvm::StringRef stringRef{reinterpret_cast(start), length}; llvm::MemoryBufferRef bufferRef{stringRef, {}}; auto binary = llvm::object::createBinary(bufferRef); if (!binary) return binary.takeError(); return addImage(std::move(binary.get()), {}, path); } /// Add images in the given path. Returns an error if there was a filesystem /// error when iterating over the directory. Errors reading individual images /// are NOT returned, as it's assumed that there may be non-dylib content in the /// directory as well. template BuilderErrorOr ExternalGenericMetadataBuilderContext::addImagesInPath( std::string path) { auto forEachFile = [&](std::string filepath) { auto error = addImageAtPath(filepath); handleAllErrors(std::move(error), [&](const llvm::ErrorInfoBase &E) { fprintf(stderr, "Could not read dylib at %s: %s\n", filepath.c_str(), E.message().c_str()); }); }; auto result = enumerateFilesInPath(path, forEachFile); if (!result) return *result.getError(); return {{}}; } template BuilderErrorOr< typename ExternalGenericMetadataBuilderContext::template Buffer< const typename ExternalGenericMetadataBuilderContext< Runtime>::Metadata>> ExternalGenericMetadataBuilderContext::metadataForNode( swift::Demangle::NodePointer node) { Demangler demangler; auto symbolNode = wrapNode(demangler, node, Node::Kind::Global, Node::Kind::TypeMetadata); auto resolver = [](SymbolicReferenceKind kind, const void *ref) -> NodePointer { abort(); }; auto symbolMangling = Demangle::mangleNode(symbolNode, resolver, demangler); if (!symbolMangling.isSuccess()) { return BuilderError("symbol mangling failed with code %u", symbolMangling.error().code); } LOG(LogLevel::Detail, "symbol mangling: %s", symbolMangling.result().str().c_str()); auto symbolName = symbolMangling.result(); auto maybeSymbol = readerWriter->template getSymbolPointer(symbolName); auto symbolResult = findSymbol(symbolName, nullptr); if (symbolResult) { auto [file, symbol] = *symbolResult; LOG(LogLevel::Detail, "Found symbol for metadata '%s' in %s", symbolName.str().c_str(), file->path.c_str()); return readerWriter->template getSymbolPointer( symbolName, symbol, file); } auto metadataName = _standardMangledNameForNode(node, demangler); if (!metadataName) return *metadataName.getError(); auto metadata = builtMetadataMap.find(*metadataName); if (metadata != builtMetadataMap.end()) { LOG(LogLevel::Detail, "Found metadata we already built for '%s'", symbolName.str().c_str()); auto constructedMetadata = std::get<1>(*metadata); return constructedMetadata.data.offsetBy(constructedMetadata.offset) .template cast(); } // TODO: need a way to find non-exported metadata with no corresponding // symbol. LOG(LogLevel::Detail, "Did not find metadata '%s', trying to build it", symbolName.str().c_str()); auto constructedMetadata = constructMetadataForNode(node); if (!constructedMetadata) return *constructedMetadata.getError(); if (!*constructedMetadata) return BuilderError("Failed to find non-generic type '%s'", metadataName->c_str()); LOG(LogLevel::Info, "Successfully built metadata for '%s'", symbolName.str().c_str()); auto metadataFromMap = builtMetadataMap.find(*metadataName); if (metadataFromMap == builtMetadataMap.end()) return BuilderError("Successfully constructed metadata for " "'%s', but it was not in the map", metadataName->c_str()); auto constructedMetadataFromMap = std::get<1>(*metadataFromMap); assert((*constructedMetadata)->data.ptr == constructedMetadataFromMap.data.ptr); assert((*constructedMetadata)->offset == constructedMetadataFromMap.offset); return (*constructedMetadata) ->data.offsetBy((*constructedMetadata)->offset) .template cast(); } template std::optional> ExternalGenericMetadataBuilderContext::findSymbol( llvm::StringRef toFind, MachOFile *searchFile) { llvm::SmallString<128> prefixed; prefixed += "_"; prefixed += toFind; if (searchFile == nullptr) { auto it = allSymbols.find(prefixed); if (it != allSymbols.end()) return it->getValue(); return {}; } auto it = searchFile->symbols.find(prefixed); if (it != searchFile->symbols.end()) return {{searchFile, it->getValue()}}; return {}; } template std::optional ExternalGenericMetadataBuilderContext::findSectionInFile( MachOFile *file, uint64_t vmAddr) { // TODO: be smarter than scanning all sections? for (auto §ion : file->objectFile->sections()) { if (section.getAddress() <= vmAddr && vmAddr < section.getAddress() + section.getSize()) { return section; } } return {}; } template std::optional ExternalGenericMetadataBuilderContext::findPointerInFiles( const void *ptr) { auto target = (uintptr_t)ptr; // TODO: we should probably do a binary search or anything better than just // a linear scan. for (auto &file : machOFiles) { auto buffer = file->objectFile->getMemoryBufferRef(); auto start = (uintptr_t)buffer.getBufferStart(); auto end = (uintptr_t)buffer.getBufferEnd(); if (start <= target && target < end) return file.get(); } return {}; } template void ExternalGenericMetadataBuilderContext::build() { // Set up the builder. builder.reset( new GenericMetadataBuilder>{*readerWriter.get()}); // Process all input symbmols. for (auto mangledTypeName : mangledNamesToBuild) { auto result = constructMetadataForMangledTypeName(mangledTypeName); if (auto *error = result.getError()) fprintf(stderr, "Could not construct metadata for '%s': %s\n", mangledTypeName.c_str(), error->cStr()); } auto metadataMap = serializeMetadataMapTable(); using PrespecializedData = LibPrespecializedData; auto topLevelData = allocate(sizeof(PrespecializedData)); topLevelData.setName(LIB_PRESPECIALIZED_TOP_LEVEL_SYMBOL_NAME); topLevelData.ptr->majorVersion = PrespecializedData::currentMajorVersion; topLevelData.ptr->minorVersion = PrespecializedData::currentMinorVersion; topLevelData.writePointer(&topLevelData.ptr->metadataMap, metadataMap.template cast()); } template void ExternalGenericMetadataBuilderContext::writeOutput( llvm::json::OStream &J, unsigned platform, const std::string &platformVersion) { writeJSONSerialization(J, platform, platformVersion); } template void ExternalGenericMetadataBuilderContext::logDescriptorMap() { if (logLevel <= LogLevel::Detail) { LOG(LogLevel::Detail, "Descriptor map (%zu entries):", mangledNominalTypeDescriptorMap.size()); for (auto [name, descriptor] : mangledNominalTypeDescriptorMap) { fprintf(stderr, " %s -> %p\n", name.c_str(), descriptor.ptr); } } } template void ExternalGenericMetadataBuilderContext::cacheDescriptor( Buffer descriptorBuffer) { auto desc = descriptorBuffer.ptr; auto typeDesc = dyn_cast(desc); if (!typeDesc) { return; } // This produces a Type -> {Structure|Class|Enum} Demangle::Demangler Dem; auto demangleNode = buildDemanglingForContext(descriptorBuffer, Dem); if (auto *error = demangleNode.getError()) { auto symbolInfo = readerWriter->getSymbolInfo(descriptorBuffer); LOG(LogLevel::Warning, "Could not build demangling for context %s (%s + %" PRIu64 "): %s", symbolInfo.symbolName.c_str(), symbolInfo.libraryName.c_str(), symbolInfo.pointerOffset, error->cStr()); return; } // ... which needs to be wrapped in Global -> NominalTypeDescriptor -> * to // match what we cache during dylib reading. auto global = wrapNode(Dem, *demangleNode.getValue(), Node::Kind::Global, Node::Kind::NominalTypeDescriptor); auto mangling = Demangle::mangleNode(global); if (!mangling.isSuccess()) { auto symbolInfo = readerWriter->getSymbolInfo(descriptorBuffer); LOG(LogLevel::Warning, "Could not build demangling for context %s (%s + %" PRIu64 "): %s", symbolInfo.symbolName.c_str(), symbolInfo.libraryName.c_str(), symbolInfo.pointerOffset, getNodeTreeAsString(global).c_str()); return; } LOG(LogLevel::Info, "Found descriptor %s", mangling.result().c_str()); auto [iterator, didInsert] = mangledNominalTypeDescriptorMap.try_emplace( mangling.result(), descriptorBuffer.template cast()); if (!didInsert) { auto [name, oldTypeDesc] = *iterator; LOG(LogLevel::Warning, "Descriptor for %s already exists - old: %p - new: %p", name.c_str(), oldTypeDesc.ptr, typeDesc); } } // Perform prep tasks on file, add it to machOFiles, and take ownership of the // object. template void ExternalGenericMetadataBuilderContext::addImage(MachOFile *file) { // Replace the path with the library's install name if possible. auto installName = getInstallName(file->objectFile.get()); if (installName.length() > 0) file->path = installName; populateMachOSymbols(file); populateMachOFixups(file); readMachOSections(file); machOFiles.emplace_back(file); machOFilesByPath.insert({file->path, file}); } template void ExternalGenericMetadataBuilderContext::populateMachOSymbols( MachOFile *file) { // Fill out library names. for (auto &command : file->objectFile->load_commands()) { if (command.C.cmd == llvm::MachO::LC_LOAD_DYLIB || command.C.cmd == llvm::MachO::LC_LOAD_WEAK_DYLIB || command.C.cmd == llvm::MachO::LC_REEXPORT_DYLIB || command.C.cmd == llvm::MachO::LC_LOAD_UPWARD_DYLIB) { auto *dylibCommand = reinterpret_cast(command.Ptr); auto name = reinterpret_cast(dylibCommand) + dylibCommand->dylib.name; file->libraryNames.push_back(name); } } // Find all the symbols. for (auto &sym : file->objectFile->symbols()) { auto type = sym.getType(); if (!type) continue; if (type.get() != llvm::object::SymbolRef::ST_Data && type.get() != llvm::object::SymbolRef::ST_Function) continue; auto name = sym.getName(); if (!name) { consumeError(name.takeError()); continue; } auto flags = sym.getFlags(); if (!flags) { consumeError(flags.takeError()); continue; } auto [iterator, didInsert] = file->symbols.try_emplace(*name, sym); if (!didInsert) { LOG(LogLevel::Detail, "Duplicate symbol name %s in %s", name->str().c_str(), file->path.c_str()); } file->namedSymbolsSortedByAddress.push_back(sym); allSymbols.insert({*name, {file, sym}}); if (*flags & llvm::object::SymbolRef::Flags::SF_Exported) { file->exportedSymbolsSortedByAddress.push_back(sym); } } auto compareAddresses = [&](const llvm::object::SymbolRef &a, const llvm::object::SymbolRef &b) { auto aAddr = a.getAddress(); if (!aAddr) return false; auto bAddr = b.getAddress(); if (!bAddr) return true; return *aAddr < *bAddr; }; std::sort(file->namedSymbolsSortedByAddress.begin(), file->namedSymbolsSortedByAddress.end(), compareAddresses); std::sort(file->exportedSymbolsSortedByAddress.begin(), file->exportedSymbolsSortedByAddress.end(), compareAddresses); LOG(LogLevel::Info, "%s: populated %zu linked libraries and %u symbols", file->path.c_str(), file->libraryNames.size(), file->symbols.size()); } template void ExternalGenericMetadataBuilderContext::populateMachOFixups( MachOFile *file) { llvm::Error err = llvm::Error::success(); auto objectFile = file->objectFile.get(); std::optional> found; for (const auto &bind : objectFile->bindTable(err)) { LOG(LogLevel::Detail, "bind: %s %s %s %s %#" PRIx64, bind.typeName().str().c_str(), bind.symbolName().str().c_str(), bind.segmentName().str().c_str(), bind.sectionName().str().c_str(), bind.address()); FixupTarget target{bind.ordinal(), bind.symbolName()}; file->fixups.insert({bind.address(), target}); } if (err) { LOG(LogLevel::Warning, "Failed to get bind table: %s", getErrorString(std::move(err)).c_str()); } // Ignoring lazyBindTable and weakBindTable, as the symbols we're interested // in shouldn't appear in them. for (const auto &fixup : objectFile->fixupTable(err)) { const char *libName; int ordinal = fixup.ordinal(); if (ordinal > 0 && (size_t)ordinal <= file->libraryNames.size()) libName = file->libraryNames[ordinal - 1]; else if (ordinal == FixupTarget::selfLibrary) libName = ""; else libName = ""; LOG(LogLevel::Detail, "fixup: ordinal=%d type=%s symbol=%s segment=%s section=%s " "address=%#" PRIx64 " lib=%s", ordinal, fixup.typeName().str().c_str(), fixup.symbolName().str().c_str(), fixup.segmentName().str().c_str(), fixup.sectionName().str().c_str(), fixup.address(), libName); FixupTarget target; target.library = fixup.ordinal(); if (auto ptr = fixup.pointerValue()) target.target = ptr; else target.target = fixup.symbolName(); file->fixups.insert({fixup.address(), target}); } if (err) { LOG(LogLevel::Warning, "Failed to get bind table: %s", getErrorString(std::move(err)).c_str()); } } template void ExternalGenericMetadataBuilderContext::readMachOSections( MachOFile *file) { using namespace llvm; // Search for the section for types for (auto &Section : file->objectFile->sections()) { llvm::Expected SectionNameOrErr = Section.getName(); if (!SectionNameOrErr) { llvm::consumeError(SectionNameOrErr.takeError()); continue; } StringRef SectionName = *SectionNameOrErr; if (SectionName == "__swift5_types") { llvm::Expected SectionData = Section.getContents(); if (!SectionData) { LOG(LogLevel::Warning, "Failed to get __swift5_types section data from %s: %s", file->path.c_str(), getErrorString(SectionData.takeError()).c_str()); return; } auto Contents = SectionData.get(); using SectionContentType = const RelativeDirectPointerIntPair; Buffer sectionBuffer{this, file, Section, Contents}; size_t count = Contents.size() / sizeof(SectionContentType); for (size_t i = 0; i < count; i++) { auto &record = sectionBuffer.ptr[i]; if (record.getInt() == TypeReferenceKind::DirectTypeDescriptor) { auto resolved = sectionBuffer.resolvePointer(&record); if (auto *error = resolved.getError()) { LOG(LogLevel::Warning, "Descriptor record %p (__swift5_types + %zu), failed to " "resolve pointer: %s", &record, (size_t)&record - (size_t)sectionBuffer.ptr, error->cStr()); break; } auto descBuffer = *resolved.getValue(); LOG(LogLevel::Info, "Caching descriptor record %p (__swift5_types + %zu) -> %p", &record, (size_t)&record - (size_t)sectionBuffer.ptr, descBuffer.ptr); cacheDescriptor(descBuffer); } else { LOG(LogLevel::Warning, "Descriptor record %p (__swift5_types + %zu) is indirect, " "not yet implemented", &record, (size_t)&record - (size_t)sectionBuffer.ptr); } } break; } } } template BuilderErrorOr::Builder::ConstructedMetadata>> ExternalGenericMetadataBuilderContext< Runtime>::constructMetadataForMangledTypeName(llvm::StringRef typeName) { auto node = demangleCtx.demangleTypeAsNode(typeName); if (!node) { return BuilderError("Failed to demangle '%s'.", typeName.str().c_str()); } LOG(LogLevel::Detail, "Result: %s", nodeToString(node).c_str()); return constructMetadataForNode(node); } static BuilderErrorOr _mangledNominalTypeNameForBoundGenericNode( Demangle::NodePointer BoundGenericNode) { LOG(LogLevel::Detail, "BoundGenericNode:\n%s", getNodeTreeAsString(BoundGenericNode).c_str()); Demangle::Demangler Dem; auto unspecializedOrError = getUnspecialized(BoundGenericNode, Dem); if (!unspecializedOrError.isSuccess()) { return BuilderError("getUnspecialized failed with code %u", unspecializedOrError.error().code); } auto unspecialized = unspecializedOrError.result(); LOG(LogLevel::Detail, "unspecialized:\n%s", getNodeTreeAsString(unspecialized).c_str()); auto globalNode = wrapNode(Dem, unspecialized, Node::Kind::Global, Node::Kind::NominalTypeDescriptor, Node::Kind::Type); LOG(LogLevel::Detail, "globalTypeDescriptor:\n%s", getNodeTreeAsString(globalNode).c_str()); auto mangling = Demangle::mangleNode(globalNode); if (!mangling.isSuccess()) { return BuilderError("Failed to mangle unspecialized node."); } return mangling.result(); } // Returns the constructed metadata, or no value if the node doesn't contain a // bound generic. template BuilderErrorOr::Builder::ConstructedMetadata>> ExternalGenericMetadataBuilderContext::constructMetadataForNode( swift::Demangle::NodePointer node) { LOG(LogLevel::Detail, "\n%s", getNodeTreeAsString(node).c_str()); // Pre-specialize BoundGenericClass|Structure|Enum switch (node->getKind()) { case Node::Kind::Global: case Node::Kind::Type: case Node::Kind::TypeMangling: case Node::Kind::TypeMetadata: if (!node->hasChildren()) return BuilderError("%s node has no children", nodeKindString(node->getKind())); return constructMetadataForNode(node->getFirstChild()); case Node::Kind::BoundGenericClass: case Node::Kind::BoundGenericStructure: case Node::Kind::BoundGenericEnum: break; case Node::Kind::BoundGenericProtocol: case Node::Kind::BoundGenericTypeAlias: case Node::Kind::BoundGenericFunction: case Node::Kind::BoundGenericOtherNominalType: return BuilderError("Can't construct metadata for kind %#hx (%s)", static_cast(node->getKind()), nodeKindString(node->getKind())); default: LOG(LogLevel::Info, "Default success with kind %s", nodeKindString(node->getKind())); return {{}}; } // Get the type descriptor. // Type descriptors were cached from __swift5_types sections during // readMachOSections. If one is missing here, then there was a mismatch // between what bound generic type names we were given, and what dylibs we // have parsed. auto mangledNominalTypeName = _mangledNominalTypeNameForBoundGenericNode(node); if (!mangledNominalTypeName) return *mangledNominalTypeName.getError(); auto foundTypeDescriptor = mangledNominalTypeDescriptorMap.find(*mangledNominalTypeName); if (foundTypeDescriptor == mangledNominalTypeDescriptorMap.end()) { // Either the mangling failed, or we didn't find this type during dylib // ingestion. LOG(LogLevel::Detail, "%s", getNodeTreeAsString(node).c_str()); return BuilderError( "Failed to look up type descriptor for '%s' from string '%s'.", nodeToString(node).c_str(), mangledNominalTypeName->c_str()); } auto typeDescriptor = std::get<1>(*foundTypeDescriptor); LOG(LogLevel::Detail, "Found type descriptor %p\n", typeDescriptor.ptr); // Get the generic arguments. auto genericTypeListNode = node->getChild(1); assert(genericTypeListNode->getKind() == Node::Kind::TypeList); assert(genericTypeListNode->hasChildren()); std::vector> genericTypeMetadatas; for (auto genericTypeNode : *genericTypeListNode) { LOG(LogLevel::Detail, "genericTypeNode:\n%s", getNodeTreeAsString(genericTypeNode).c_str()); auto maybeMetadata = metadataForNode(genericTypeNode); if (auto *error = maybeMetadata.getError()) { return BuilderError("Failed to get generic type reference '%s': %s", nodeToString(genericTypeNode).c_str(), error->cStr()); } genericTypeMetadatas.push_back(*maybeMetadata.getValue()); } // Get the generic metadata pattern. auto &generics = typeDescriptor.ptr->getFullGenericContextHeader(); auto maybePattern = typeDescriptor.resolvePointer(&generics.DefaultInstantiationPattern); if (auto *error = maybePattern.getError()) { return BuilderError("Failed to resolve instantiation pattern pointer: %s", error->cStr()); } auto pattern = *maybePattern.getValue(); LOG(LogLevel::Detail, "pattern: %p", pattern.ptr); auto maybeExtraDataSize = builder->extraDataSize(typeDescriptor, pattern.toConst()); if (auto *error = maybeExtraDataSize.getError()) { return BuilderError("Failed to compute extra data size: %s", error->cStr()); } auto extraDataSize = *maybeExtraDataSize.getValue(); LOG(LogLevel::Detail, "extraDataSize: %zu", extraDataSize); auto maybeMetadata = builder->buildGenericMetadata( typeDescriptor, genericTypeMetadatas, pattern.toConst(), extraDataSize); if (auto *error = maybeMetadata.getError()) { return BuilderError("Failed to build metadata '%s': %s", getNodeTreeAsString(node).c_str(), error->cStr()); } auto metadata = *maybeMetadata.getValue(); LOG(LogLevel::Detail, "metadata: %p", metadata.data.ptr); Demangle::Demangler Dem; auto mangledName = _standardMangledNameForNode(node, Dem); if (!mangledName) return *mangledName.getError(); metadata.data.setName(*mangledName); builtMetadataMap.emplace(*mangledName, metadata); auto initializeResult = builder->initializeGenericMetadata(metadata.data, node); if (auto *error = initializeResult.getError()) { builtMetadataMap.erase(*mangledName); return BuilderError("Failed to build metadata '%s': %s", getNodeTreeAsString(node).c_str(), error->cStr()); } auto optionalMetadata = std::optional{metadata}; return {optionalMetadata}; } /// Make the metadata map table, and return a buffer pointing to it. template typename ExternalGenericMetadataBuilderContext::template Buffer ExternalGenericMetadataBuilderContext::serializeMetadataMapTable( void) { auto numEntries = builtMetadataMap.size(); // Array size must be at least numEntries+1 per the requirements of // PrebuiltStringMap. Aim for a 75% load factor. auto arraySize = std::max(numEntries * 4 / 3, numEntries + 1); using Map = PrebuiltStringMap; auto byteSize = Map::byteSize(arraySize); auto mapData = allocate(byteSize); auto serializedMap = new (mapData.ptr) Map(arraySize); std::vector *> sortedMapElements; sortedMapElements.reserve(numEntries); for (auto &entry : builtMetadataMap) sortedMapElements.push_back(&entry); std::sort( sortedMapElements.begin(), sortedMapElements.end(), [](const std::pair *a, const std::pair *b) { return std::get<0>(*a) < std::get<0>(*b); }); for (auto *entry : sortedMapElements) { auto [key, value] = *entry; auto *elementPtr = serializedMap->insert(key.c_str()); if (!elementPtr) { fprintf(stderr, "PrebuiltStringMap insert failed!\n"); abort(); } auto stringData = allocate(key.size() + 1); memcpy(stringData.ptr, key.c_str(), key.size()); stringData.setName("_cstring_" + key); mapData.writePointer(&elementPtr->key, stringData); auto metadataPointer = value.data.offsetBy(value.offset); mapData.writePointer(&elementPtr->value, metadataPointer); } mapData.setName("_swift_prespecializedMetadataMap"); return mapData; } /// Write the atom's contents as JSON. For each symbol reference emitted, the /// callback is passed the target MachOFile * and StringRef of the symbol's /// name. template template void ExternalGenericMetadataBuilderContext::writeAtomContentsJSON( llvm::json::OStream &J, const typename ExternalGenericMetadataBuilderContext::Atom &atom, const SymbolCallback &symbolCallback) { const char *ptrTargetKind = sizeof(StoredPointer) == 8 ? "ptr64" : "ptr32"; // Maintain cursors in the atom's buffer and targets. In a loop, we write out // any buffer contents between the current point and the next target, then we // write out the next target. size_t bufferCursor = 0; auto targetsCursor = atom.pointerTargetsSorted.begin(); auto targetsEnd = atom.pointerTargetsSorted.end(); auto writeSlice = [&](size_t from, size_t to) { if (from < to) { llvm::StringRef slice{&atom.buffer[from], to - from}; J.value(llvm::toHex(slice)); } }; while (targetsCursor != targetsEnd) { assert(bufferCursor <= targetsCursor->offset); writeSlice(bufferCursor, targetsCursor->offset); J.object([&] { if (auto *fileTarget = std::get_if(&targetsCursor->fileOrAtom)) { std::string foundSymbolName = ""; uint64_t foundSymbolAddress = 0; auto foundSymbol = readerWriter->getNearestExportedSymbol( fileTarget->file, fileTarget->addressInFile); if (foundSymbol) { if (auto name = foundSymbol->getName()) { if (auto address = foundSymbol->getAddress()) { foundSymbolName = *name; foundSymbolAddress = *address; } } } if (foundSymbolName == "") { auto section = findSectionInFile(fileTarget->file, fileTarget->addressInFile); if (section) { auto name = section->getName(); if (name) { const char *segment = section->isText() ? "__TEXT" : "__DATA"; foundSymbolName = "$dylib_segment$" + fileTarget->file->path + "$start$" + segment + "$" + name->str(); } } } if (foundSymbolName == "") { // Couldn't find a symbol or a section, this really shouldn't happen. foundSymbolName = "$dylib_segment$" + fileTarget->file->path + "$start$__TEXT"; foundSymbolAddress = 0; } symbolCallback(fileTarget->file, foundSymbolName); J.attribute("target", foundSymbolName); J.attribute("addend", (int64_t)fileTarget->addressInFile - (int64_t)foundSymbolAddress); J.attribute("kind", ptrTargetKind); } else { auto atomTarget = std::get(targetsCursor->fileOrAtom); J.attribute("self", true); J.attribute("target", "_" + atomTarget.atom->name); J.attribute("addend", atomTarget.offset); J.attribute("kind", ptrTargetKind); } }); bufferCursor = targetsCursor->offset + targetsCursor->size; targetsCursor++; } // Write any remaining data after the last target. writeSlice(bufferCursor, atom.buffer.size()); } template void ExternalGenericMetadataBuilderContext::writeDylibsJSON( llvm::json::OStream &J, std::unordered_map> &symbolReferences) { for (auto &elt : symbolReferences) { auto file = std::get<0>(elt); auto &names = std::get<1>(elt); J.object([&] { J.attribute("installName", file->path); J.attributeArray("exports", [&] { for (auto name : names) { J.object([&] { J.attribute("name", name); }); } }); }); } } template void ExternalGenericMetadataBuilderContext::writeJSONSerialization( llvm::json::OStream &J, unsigned platform, const std::string &platformVersion) { // Name any unnamed atoms. unsigned i = 0; for (auto &atom : atoms) { if (atom->name.length() == 0) { atom->name = "__unnamed_atom_" + std::to_string(i++); } } J.object([&] { J.attribute("version", 1); J.attribute("platform", platform); J.attribute("platformVersion", platformVersion); J.attribute("arch", arch); J.attribute("installName", "/usr/lib/libswiftPrespecialized.dylib"); // Track the referenced libraries and symbol names from all of the atoms. std::unordered_map> symbolReferences; J.attributeArray("atoms", [&] { for (auto &atom : atoms) { J.object([&] { J.attribute("name", "_" + atom->name); J.attribute("contentType", "constData"); J.attributeArray("contents", [&] { writeAtomContentsJSON( J, *atom, [&](MachOFile *file, llvm::StringRef symbolName) { symbolReferences[file].insert(symbolName.str()); }); }); }); } }); J.attributeArray("dylibs", [&] { writeDylibsJSON(J, symbolReferences); }); }); } BuilderErrorOr> readNames(llvm::StringRef contents) { auto json = llvm::json::parse(contents); if (!json) return BuilderError("Failed to parse names JSON: %s", getErrorString(json.takeError()).c_str()); auto topLevel = json->getAsObject(); if (!topLevel) return BuilderError( "Failed to parse names JSON: top level value is not an object"); auto metadataNamesValue = topLevel->get("metadataNames"); if (!metadataNamesValue) return BuilderError( "Failed to parse names JSON: no 'metadataNames' key in top level"); auto metadataNames = metadataNamesValue->getAsArray(); if (!metadataNames) return BuilderError( "Failed to parse names JSON: 'metadataNames' is not an array"); std::vector names; for (auto &entryValue : *metadataNames) { auto entry = entryValue.getAsObject(); if (!entry) return BuilderError("Failed to parse names JSON: 'entries' contains " "value that is not an object"); auto nameValue = entry->get("name"); if (!nameValue) return BuilderError( "Failed to parse names JSON: entry does not contain 'name' key"); auto name = nameValue->getAsString(); if (!name) return BuilderError( "Failed to parse names JSON: 'name' value is not a string"); names.push_back(std::string(*name)); } return {names}; } BuilderErrorOr> readNamesFromFile(const char *path) { auto contents = llvm::MemoryBuffer::getFile(path); if (!contents) return BuilderError("Could not read names file at '%s': %s", path, contents.getError().message().c_str()); return readNames(contents.get()->getBuffer()); } template static BuilderErrorOr enumerateFilesInPath(std::string path, const Fn &callback) { std::error_code err; llvm::sys::fs::recursive_directory_iterator iterator(path, err); llvm::sys::fs::recursive_directory_iterator end; while (iterator != end && !err) { if (iterator->type() != llvm::sys::fs::file_type::directory_file) callback(iterator->path()); iterator.increment(err); } if (err) return BuilderError("Iteration of directory %s failed: %s", path.c_str(), err.message().c_str()); return {{}}; } BuilderErrorOr getPointerWidth(std::string arch) { if (arch == "arm64" || arch == "arm64e") return 8; if (arch == "arm64_32") return 4; if (arch == "x86_64") return 8; return BuilderError("Unknown arch '%s'", arch.c_str()); } } // namespace swift using ExternalRuntime32 = swift::TypedExternal>>; using ExternalRuntime64 = swift::TypedExternal>>; struct SwiftExternalMetadataBuilder { using Builder32 = swift::ExternalGenericMetadataBuilderContext; using Builder64 = swift::ExternalGenericMetadataBuilderContext; std::variant context; int platform; // Storage for an error string returned to the caller. std::string lastErrorString; SwiftExternalMetadataBuilder(int pointerSize) { if (pointerSize == 4) { context.emplace(); } else if (pointerSize == 8) { context.emplace(); } else { fprintf(stderr, "Unsupported pointer size %d\n", pointerSize); abort(); } } template void withContext(const Fn &fn) { if (auto *context32 = std::get_if(&context)) fn(context32); if (auto *context64 = std::get_if(&context)) fn(context64); } }; struct SwiftExternalMetadataBuilder * swift_externalMetadataBuilder_create(int platform, const char *arch) { auto pointerWidth = swift::getPointerWidth(arch); if (auto *error = pointerWidth.getError()) { fprintf(stderr, "%s\n", error->getErrorString().c_str()); return nullptr; } auto *builder = new SwiftExternalMetadataBuilder(*pointerWidth.getValue()); builder->platform = platform; builder->withContext([&](auto *context) { context->setArch(arch); }); return builder; } void swift_externalMetadataBuilder_destroy( struct SwiftExternalMetadataBuilder *builder) { delete builder; } const char *swift_externalMetadataBuilder_readNamesJSON( struct SwiftExternalMetadataBuilder *builder, const char *names_json) { auto names = swift::readNames(names_json); if (auto *error = names.getError()) { builder->lastErrorString = error->cStr(); return builder->lastErrorString.c_str(); } builder->withContext( [&](auto *context) { context->setNamesToBuild(*names.getValue()); }); return nullptr; } const char *swift_externalMetadataBuilder_addDylib( struct SwiftExternalMetadataBuilder *builder, const char *installName, const struct mach_header *mh, uint64_t size) { builder->lastErrorString = ""; builder->withContext([&](auto *context) { auto error = context->addImageInMemory(mh, size, installName); if (error) builder->lastErrorString = swift::getErrorString(std::move(error)); }); if (builder->lastErrorString != "") return builder->lastErrorString.c_str(); return nullptr; } const char *swift_externalMetadataBuilder_buildMetadata( struct SwiftExternalMetadataBuilder *builder) { builder->withContext([&](auto *context) { context->build(); }); return nullptr; } const char *swift_externalMetadataBuilder_getMetadataJSON( struct SwiftExternalMetadataBuilder *builder) { bool prettyPrint = true; unsigned indent = prettyPrint ? 4 : 0; std::string output; llvm::raw_string_ostream ostream(output); llvm::json::OStream J(ostream, indent); builder->withContext([&](auto *context) { context->writeOutput(J, builder->platform, "1.0"); }); return strdup(output.c_str()); } int swift_type_metadata_extract(const char *inputPath, // mangled names const char *dylibSearchPath, // images to add const char *arch, const char *outputPath // json output ) { auto names = swift::readNamesFromFile(inputPath); if (auto *error = names.getError()) { fprintf(stderr, "%s\n", error->getErrorString().c_str()); exit(1); } auto pointerWidth = swift::getPointerWidth(arch); if (auto *error = pointerWidth.getError()) { fprintf(stderr, "%s\n", error->getErrorString().c_str()); exit(1); } SwiftExternalMetadataBuilder builder(*pointerWidth.getValue()); builder.withContext([&](auto *context) { context->setNamesToBuild(*names.getValue()); context->setArch(arch); auto imageAddResult = context->addImagesInPath(dylibSearchPath); if (auto *error = imageAddResult.getError()) { fprintf(stderr, "Error iterating over dylib search path '%s': %s", dylibSearchPath, error->cStr()); exit(1); } context->logDescriptorMap(); bool prettyPrint = true; std::error_code errorCode; llvm::raw_fd_ostream fileStream(outputPath, errorCode); if (errorCode) { fprintf(stderr, "ERROR: Could not open %s for writing.\n", outputPath); exit(1); } context->build(); unsigned indent = prettyPrint ? 4 : 0; llvm::json::OStream J(fileStream, indent); unsigned platform = 1; // PLATFORM_MACOS const char *platformVersion = "14.0"; context->writeOutput(J, platform, platformVersion); }); fprintf(stderr, "Completed!\n"); return 0; }