[ExternalGenericMetadataBuilder] Fix pointer targets, exports, and recursive metadata.

Use exports, not symbols, when emitting a pointer target. Exports are what the linker can actually work with.

When searching for a nearby export to use for a pointer target, accept anything within the same segment, not just the same section. Only segments can be rearranged relative to each other, not sections within a segment, so this is safe and allows for more possible targets.

Disallow pointer targets with no export within the same segment. We attempted to emit a target that's relative to the section starting point in this case, but that didn't work. We'll revisit if it looks useful to do so.

In order to make this work, we resolve the export when writing a pointer instead of when emitting JSON, and make the writePointer functions failable. If writePointer fails, we'll fail to build the metadata and skip it.

Correctly handle the case where the names JSON contains a metadata we already constructed as part of a prior name. Previously we'd emit it twice, now it checks to see if it's already been built and do nothing in that case. Also save errors when a metadata can't be built, so subsequent attempts to build it can fail immediately.

When emitting fixups with ptrauth attributes, use the correct target kind "arm64_auth_ptr".

Fix the VerifyExternalMetadata.swift test not to load an arm64e runtime slice when testing arm64. That's normally fine, but we depend on loading the exact same dylib that we built prespecializations for.

rdar://122968337
This commit is contained in:
Mike Ash
2024-02-14 15:18:11 -05:00
parent 5ce824f042
commit 752f0fb2f8
6 changed files with 369 additions and 269 deletions

View File

@@ -296,9 +296,11 @@ public:
patternPointers->resolvePointer(&patternPointers->ptr[i]);
if (!patternPointer)
return *patternPointer.getError();
data.writePointer(
auto writeResult = data.writePointer(
&metadataExtraData[i + extraDataPattern->OffsetInWords],
patternPointer->template cast<const StoredPointer>());
if (!writeResult)
return *writeResult.getError();
}
}
@@ -310,7 +312,10 @@ public:
if (!valueWitnesses)
return *valueWitnesses.getError();
METADATA_BUILDER_LOG("Setting initial value witnesses");
data.writePointer(&fullMetadata->ValueWitnesses, *valueWitnesses);
auto writeResult =
data.writePointer(&fullMetadata->ValueWitnesses, *valueWitnesses);
if (!writeResult)
return *writeResult.getError();
// Set the metadata kind.
METADATA_BUILDER_LOG("Setting metadata kind %#x",
@@ -319,7 +324,9 @@ public:
// Set the type descriptor.
METADATA_BUILDER_LOG("Setting descriptor");
data.writePointer(&metadata->Description, descriptionBuffer);
writeResult = data.writePointer(&metadata->Description, descriptionBuffer);
if (!writeResult)
return *writeResult.getError();
return {{}};
}
@@ -353,8 +360,11 @@ public:
header.NumKeyArguments,
getGenericArgumentOffset(
descriptionBuffer.template cast<const TypeContextDescriptor>()));
for (unsigned i = 0; i < header.NumKeyArguments; i++)
data.writePointer(&dst[i], arguments[i]);
for (unsigned i = 0; i < header.NumKeyArguments; i++) {
auto writeResult = data.writePointer(&dst[i], arguments[i]);
if (!writeResult)
return *writeResult.getError();
}
// TODO: parameter pack support.
@@ -495,7 +505,11 @@ public:
auto LOWER_ID##_Buffer = from.resolveFunctionPointer(&from.ptr->LOWER_ID); \
if (!LOWER_ID##_Buffer) \
return *LOWER_ID##_Buffer.getError(); \
vwtBuffer.writeFunctionPointer(&vwtBuffer.ptr->LOWER_ID, *LOWER_ID##_Buffer);
if (auto *error = vwtBuffer \
.writeFunctionPointer(&vwtBuffer.ptr->LOWER_ID, \
*LOWER_ID##_Buffer) \
.getError()) \
return *error;
#define DATA_VALUE_WITNESS(LOWER_ID, UPPER_ID, TYPE)
#include "swift/ABI/ValueWitness.def"
@@ -520,7 +534,7 @@ public:
(size_t)flags.getAlignmentMask());
switch (sizeWithAlignmentMask(layout.size, flags.getAlignmentMask(),
hasExtraInhabitants)) {
default:
default: {
// For uncommon layouts, use value witnesses that work with an arbitrary
// size and alignment.
METADATA_BUILDER_LOG("Uncommon layout case, flags.isInlineStorage=%s",
@@ -528,32 +542,48 @@ public:
if (flags.isInlineStorage()) {
if (!pod_direct_initializeBufferWithCopyOfBuffer)
return *pod_direct_initializeBufferWithCopyOfBuffer.getError();
vwtBuffer.writeFunctionPointer(
auto writeResult = vwtBuffer.writeFunctionPointer(
&vwtBuffer.ptr->initializeBufferWithCopyOfBuffer,
*pod_direct_initializeBufferWithCopyOfBuffer);
if (!writeResult)
return *writeResult.getError();
} else {
if (!pod_indirect_initializeBufferWithCopyOfBuffer)
return *pod_indirect_initializeBufferWithCopyOfBuffer.getError();
vwtBuffer.writeFunctionPointer(
auto writeResult = vwtBuffer.writeFunctionPointer(
&vwtBuffer.ptr->initializeBufferWithCopyOfBuffer,
*pod_indirect_initializeBufferWithCopyOfBuffer);
if (!writeResult)
return *writeResult.getError();
}
if (!pod_destroy)
return *pod_destroy.getError();
if (!pod_copy)
return *pod_copy.getError();
vwtBuffer.writeFunctionPointer(&vwtBuffer.ptr->destroy, *pod_destroy);
vwtBuffer.writeFunctionPointer(&vwtBuffer.ptr->initializeWithCopy,
*pod_copy);
vwtBuffer.writeFunctionPointer(&vwtBuffer.ptr->initializeWithTake,
*pod_copy);
vwtBuffer.writeFunctionPointer(&vwtBuffer.ptr->assignWithCopy,
*pod_copy);
vwtBuffer.writeFunctionPointer(&vwtBuffer.ptr->assignWithTake,
*pod_copy);
auto writeResult = vwtBuffer.writeFunctionPointer(
&vwtBuffer.ptr->destroy, *pod_destroy);
if (!writeResult)
return *writeResult.getError();
writeResult = vwtBuffer.writeFunctionPointer(
&vwtBuffer.ptr->initializeWithCopy, *pod_copy);
if (!writeResult)
return *writeResult.getError();
writeResult = vwtBuffer.writeFunctionPointer(
&vwtBuffer.ptr->initializeWithTake, *pod_copy);
if (!writeResult)
return *writeResult.getError();
writeResult = vwtBuffer.writeFunctionPointer(
&vwtBuffer.ptr->assignWithCopy, *pod_copy);
if (!writeResult)
return *writeResult.getError();
writeResult = vwtBuffer.writeFunctionPointer(
&vwtBuffer.ptr->assignWithTake, *pod_copy);
if (!writeResult)
return *writeResult.getError();
// getEnumTagSinglePayload and storeEnumTagSinglePayload are not
// interestingly optimizable based on POD-ness.
return {{}};
}
case sizeWithAlignmentMask(1, 0, 0): {
METADATA_BUILDER_LOG("case sizeWithAlignmentMask(1, 0, 0)");
@@ -629,8 +659,10 @@ public:
// Use POD value witnesses for operations that do an initializeWithTake.
if (!pod_copy)
return *pod_copy.getError();
vwtBuffer.writeFunctionPointer(&vwtBuffer.ptr->initializeWithTake,
*pod_copy);
auto writeResult = vwtBuffer.writeFunctionPointer(
&vwtBuffer.ptr->initializeWithTake, *pod_copy);
if (!writeResult)
return *writeResult.getError();
}
return {{}};
}
@@ -761,7 +793,10 @@ public:
auto fptr = oldVWTBuffer->resolveFunctionPointer(&oldVWT->LOWER_ID); \
if (!fptr) \
return *fptr.getError(); \
newVWTData.writeFunctionPointer(&newVWT->LOWER_ID, *fptr); \
if (auto *error = \
newVWTData.writeFunctionPointer(&newVWT->LOWER_ID, *fptr) \
.getError()) \
return *error; \
}
#include "swift/ABI/ValueWitness.def"
@@ -774,9 +809,11 @@ public:
newVWT->extraInhabitantCount = layout.extraInhabitantCount;
newVWT->flags = layout.flags;
metadataBuffer.writePointer(
auto writeResult = metadataBuffer.writePointer(
&metadataBuffer.ptr->ValueWitnesses,
newVWTData.template cast<const ValueWitnessTable>());
if (!writeResult)
return *writeResult.getError();
return {{}}; // success
}

View File

@@ -211,18 +211,39 @@ struct FixupTarget {
std::variant<uint64_t, llvm::StringRef> target;
};
// An export from a library. Contains a name and address.
struct Export {
std::string name;
uint64_t address;
};
// A segment in a library.
struct Segment {
std::string name;
uint64_t address;
uint64_t size;
Segment(const char name[16], uint64_t address, uint64_t size)
: address(address), size(size) {
size_t length = strnlen(name, 16);
llvm::StringRef nameRef{name, length};
this->name = nameRef.str();
}
};
struct MachOFile {
std::optional<std::unique_ptr<llvm::MemoryBuffer>> memoryBuffer;
std::unique_ptr<llvm::object::MachOObjectFile> objectFile;
std::string path;
// With the leading '_'.
llvm::StringMap<llvm::object::SymbolRef> symbols;
uint64_t baseAddress;
// Guaranteed that getName returns a value.
std::vector<llvm::object::SymbolRef> namedSymbolsSortedByAddress;
std::vector<llvm::object::SymbolRef> exportedSymbolsSortedByAddress;
// With the leading '_'.
llvm::StringMap<Export> symbols;
std::vector<Segment> segments;
std::vector<Export> exportsSortedByAddress;
std::vector<const char *> libraryNames;
std::unordered_map<uint64_t, FixupTarget> fixups;
@@ -263,6 +284,7 @@ class ExternalGenericMetadataBuilderContext {
struct FileTarget {
MachOFile *file;
const Export *nearestExport; // Pointer to entry in exportsSortedByAddress.
uint64_t addressInFile;
};
@@ -293,6 +315,8 @@ class ExternalGenericMetadataBuilderContext {
std::vector<std::unique_ptr<Atom>> atoms;
std::unordered_set<std::string> atomNames;
Atom *allocateAtom(size_t size) {
auto atom = atoms.emplace_back(new Atom).get();
atom->buffer.resize(size);
@@ -537,7 +561,7 @@ public:
"Symbol referenced unknown symbol '%s' in library '%s'",
symbolName.str().c_str(), targetLibraryName);
}
return context->readerWriter->template getSymbolPointer<U>(
return context->readerWriter->template getSymbolPointerInFile<U>(
symbolName, foundSymbol->getValue(), targetFile);
}
@@ -587,13 +611,12 @@ public:
}
template <typename U>
void writePointerImpl(void *where, Buffer<U> value,
PtrauthKey ptrauthKey = PtrauthKey::None,
bool addressDiversified = true,
unsigned discriminator = 0) {
BuilderErrorOr<std::monostate> writePointerImpl(
void *where, Buffer<U> value, PtrauthKey ptrauthKey = PtrauthKey::None,
bool addressDiversified = true, unsigned discriminator = 0) {
if (!value) {
memset(where, 0, sizeof(StoredPointer));
return;
return {{}};
}
// Write some arbitrary nonzero data so that null checks work with != 0.
@@ -624,7 +647,16 @@ public:
auto addressInFile = (uintptr_t)value.ptr -
(uintptr_t)contents->data() +
value.section.getAddress();
target.fileOrAtom = FileTarget{value.file, addressInFile};
auto foundExport =
this->context->readerWriter->getNearestExportedSymbol(
file, addressInFile);
if (!foundExport)
return BuilderError(
"Failed to write pointer to %s %#" PRIx64
" as there are no exported symbols in that segment.",
file->path.c_str(), addressInFile);
target.fileOrAtom = FileTarget{value.file, foundExport, addressInFile};
} else {
auto offsetInValue =
(uintptr_t)value.ptr - (uintptr_t)value.atom->buffer.data();
@@ -644,12 +676,12 @@ public:
auto prevPoint = insertionPoint - 1;
if (prevPoint->offset == target.offset) {
*prevPoint = target;
return;
return {{}};
}
}
targets.insert(insertionPoint, target);
return;
return {{}};
}
public:
@@ -692,44 +724,49 @@ public:
}
template <typename U>
void writePointer(Pointer<U> *where, Buffer<U> value) {
BuilderErrorOr<std::monostate> writePointer(Pointer<U> *where,
Buffer<U> value) {
checkPtr(where);
where->value.value = ~(uintptr_t)value.ptr;
writePointerImpl(where, value);
return writePointerImpl(where, value);
}
// SignedPointer is templated on the pointer type, not the pointee type like
// the other pointer templates.
template <typename U>
void writePointer(SignedPointer<U *> *where, Buffer<U> value) {
BuilderErrorOr<std::monostate> writePointer(SignedPointer<U *> *where,
Buffer<U> value) {
checkPtr(where);
where->SignedValue.value = ~(uintptr_t)value.ptr;
writePointerImpl(where, value, PtrauthInfo<U>::key,
PtrauthInfo<U>::addressDiversified,
PtrauthInfo<U>::discriminator);
return writePointerImpl(where, value, PtrauthInfo<U>::key,
PtrauthInfo<U>::addressDiversified,
PtrauthInfo<U>::discriminator);
}
template <typename U>
void writePointer(StoredPointer *where, Buffer<U> value) {
BuilderErrorOr<std::monostate> writePointer(StoredPointer *where,
Buffer<U> value) {
checkPtr(where);
where->value = ~(uintptr_t)value.ptr;
writePointerImpl(where, value);
return writePointerImpl(where, value);
}
void writeFunctionPointer(void *where, Buffer<const char> target) {
writePointer(reinterpret_cast<StoredPointer *>(where), target);
BuilderErrorOr<std::monostate>
writeFunctionPointer(void *where, Buffer<const char> target) {
return writePointer(reinterpret_cast<StoredPointer *>(where), target);
}
template <unsigned discriminator>
void writeFunctionPointer(
BuilderErrorOr<std::monostate> writeFunctionPointer(
ExternalRuntime64::ValueWitnessFunctionPointer<discriminator> *where,
Buffer<const char> target) {
checkPtr(where);
writePointerImpl(where, target, PtrauthKey::IA, true, discriminator);
return writePointerImpl(where, target, PtrauthKey::IA, true,
discriminator);
}
};
@@ -786,10 +823,11 @@ public:
BuilderErrorOr<Buffer<const Metadata>>
metadataForNode(swift::Demangle::NodePointer Node);
std::optional<std::pair<MachOFile *, llvm::object::SymbolRef>>
std::optional<std::pair<MachOFile *, Export>>
findSymbol(llvm::StringRef name, MachOFile *searchFile);
std::optional<llvm::object::SectionRef> findSectionInFile(MachOFile *file,
uint64_t address);
std::optional<Segment> findSegmentInFile(MachOFile *file, uint64_t address);
std::optional<MachOFile *> findPointerInFiles(const void *ptr);
void build();
@@ -809,8 +847,8 @@ private:
void readMachOSections(MachOFile *file);
BuilderErrorOr<std::string> _mangledNominalTypeNameForBoundGenericNode(Demangle::NodePointer BoundGenericNode);
BuilderErrorOr<std::optional<typename Builder::ConstructedMetadata>>
constructMetadataForMangledTypeName(llvm::StringRef typeName);
BuilderErrorOr<std::monostate>
ensureMetadataForMangledTypeName(llvm::StringRef typeName);
BuilderErrorOr<std::optional<typename Builder::ConstructedMetadata>>
constructMetadataForNode(swift::Demangle::NodePointer Node);
@@ -846,8 +884,10 @@ private:
std::unordered_map<std::string, Buffer<const TypeContextDescriptor>>
mangledNominalTypeDescriptorMap;
// Map from mangled type names to the built metadata.
std::unordered_map<std::string, const typename Builder::ConstructedMetadata>
// Map from mangled type names to the built metadata, or error that occurred
// when building it.
std::unordered_map<
std::string, BuilderErrorOr<const typename Builder::ConstructedMetadata>>
builtMetadataMap;
// All of the MachOFile objects we're working with.
@@ -856,8 +896,8 @@ private:
// A map from paths (install names) to MachOFile objects.
llvm::StringMap<MachOFile *> machOFilesByPath;
// A map from symbol names (including _ prefix) to file and SymbolRef.
llvm::StringMap<std::pair<MachOFile *, llvm::object::SymbolRef>> allSymbols;
// A map from symbol names (including _ prefix) to file and export.
llvm::StringMap<std::pair<MachOFile *, Export>> allSymbols;
swift::Demangle::Context demangleCtx;
};
@@ -866,19 +906,14 @@ template <typename RuntimeT>
class ReaderWriter {
ExternalGenericMetadataBuilderContext<RuntimeT> *context;
bool sectionContainsAddress(llvm::object::SectionRef section, uint64_t addr) {
return section.getAddress() <= addr &&
addr < section.getAddress() + section.getSize();
bool segmentContainsAddress(Segment segment, uint64_t addr) {
return segment.address <= addr && addr < segment.address + segment.size;
}
std::optional<llvm::object::SymbolRef>
getNearestSymbol(MachOFile *file, uint64_t addr,
const std::vector<llvm::object::SymbolRef> &in) {
auto comparator = [&](uint64_t addr, const llvm::object::SymbolRef &sym) {
auto symAddr = sym.getAddress();
if (!symAddr)
return true;
return addr < *symAddr;
const Export *getNearestSymbol(MachOFile *file, uint64_t addr,
const std::vector<Export> &in) {
auto comparator = [&](uint64_t addr, const Export &ex) -> bool {
return addr < ex.address;
};
auto foundSymbol = std::upper_bound(in.begin(), in.end(), addr, comparator);
@@ -891,25 +926,24 @@ class ReaderWriter {
if (foundSymbol == in.end())
return {};
// Make sure the symbol is in the same section as the target.
auto section = foundSymbol->getSection();
if (!section)
// Make sure the symbol is in the same segment as the target.
auto targetSegment = context->findSegmentInFile(file, addr);
// If the target isn't even in a segment, give up. This should never happen.
if (!targetSegment)
return {};
if (!sectionContainsAddress(*section.get(), addr)) {
// Try the following symbol and use it if it's in the same section as the
if (!segmentContainsAddress(*targetSegment, foundSymbol->address)) {
// Try the following symbol and use it if it's in the same segment as the
// target.
foundSymbol++;
if (foundSymbol == in.end())
return {};
auto section = foundSymbol->getSection();
if (!section)
return {};
if (!sectionContainsAddress(*section.get(), addr))
if (!segmentContainsAddress(*targetSegment, foundSymbol->address))
return {};
}
return *foundSymbol;
return &*foundSymbol;
}
public:
@@ -1032,40 +1066,21 @@ public:
}
template <typename T = char>
BuilderErrorOr<Buffer<const T>> getSymbolPointer(const char *name) {
return getSymbolPointer<T>(llvm::StringRef{name});
}
template <typename T = char>
BuilderErrorOr<Buffer<const T>>
getSymbolPointer(llvm::StringRef name, MachOFile *searchFile = nullptr) {
auto result = context->findSymbol(name, searchFile);
BuilderErrorOr<Buffer<const T>> getSymbolPointer(llvm::StringRef name) {
auto result = context->findSymbol(name, nullptr);
if (!result)
return BuilderError("Could not find symbol '%s'", name.str().c_str());
auto [file, symbol] = *result;
return getSymbolPointer<T>(name, symbol, file);
return getSymbolPointerInFile<T>(name, symbol, file);
}
template <typename T = char>
BuilderErrorOr<Buffer<const T>>
getSymbolPointer(llvm::StringRef name, llvm::object::SymbolRef symbol,
MachOFile *file) {
auto expectedAddress = symbol.getAddress();
if (!expectedAddress) {
return BuilderError("Could not get address of symbol '%s': %s",
name.str().c_str(),
getErrorString(expectedAddress.takeError()).c_str());
}
auto address = expectedAddress.get();
auto expectedSection = symbol.getSection();
if (!expectedSection) {
return BuilderError("Could not get section of symbol '%s': %s",
name.str().c_str(),
getErrorString(expectedSection.takeError()).c_str());
}
auto section = expectedSection.get();
if (section == file->objectFile->section_end())
getSymbolPointerInFile(llvm::StringRef name, Export symbol, MachOFile *file) {
auto address = symbol.address;
auto section = context->findSectionInFile(file, address);
if (!section)
return BuilderError("Could not get section of symbol '%s' (%#" PRIx64
"), it is undefined or absolute",
name.str().c_str(), address);
@@ -1225,11 +1240,10 @@ public:
if (buffer.file) {
result.libraryName = buffer.file->path;
if (auto *address = buffer.getVirtualAddress(buffer.ptr).getValue()) {
auto symbol = getNearestSymbol(buffer.file, *address);
auto symbol = getNearestExportedSymbol(buffer.file, *address);
if (symbol) {
if (auto name = symbol->getName())
result.symbolName = *name;
result.pointerOffset = *address;
result.symbolName = symbol->name;
result.pointerOffset = *address - symbol->address;
}
}
}
@@ -1237,19 +1251,9 @@ public:
}
// Find the nearest exported symbol before the target, if possible. Finds the
// first symbol after the target if there are none before. If the file
// contains no symbols, returns an empty SymbolRef.
std::optional<llvm::object::SymbolRef>
getNearestExportedSymbol(MachOFile *file, uint64_t addr) {
return getNearestSymbol(file, addr, file->exportedSymbolsSortedByAddress);
}
// Find the nearest symbol before the target, if possible. Finds the first
// symbol after the target if there are none before. If the file contains no
// symbols, returns an empty SymbolRef.
std::optional<llvm::object::SymbolRef> getNearestSymbol(MachOFile *file,
uint64_t addr) {
return getNearestSymbol(file, addr, file->namedSymbolsSortedByAddress);
// first symbol after the target if there are none before.
const Export *getNearestExportedSymbol(MachOFile *file, uint64_t addr) {
return getNearestSymbol(file, addr, file->exportsSortedByAddress);
}
template <typename T>
@@ -1544,7 +1548,7 @@ ExternalGenericMetadataBuilderContext<Runtime>::metadataForNode(
auto [file, symbol] = *symbolResult;
LOG(LogLevel::Detail, "Found symbol for metadata '%s' in %s",
symbolName.str().c_str(), file->path.c_str());
return readerWriter->template getSymbolPointer<const Metadata>(
return readerWriter->template getSymbolPointerInFile<const Metadata>(
symbolName, symbol, file);
}
@@ -1553,10 +1557,12 @@ ExternalGenericMetadataBuilderContext<Runtime>::metadataForNode(
return *metadataName.getError();
auto metadata = builtMetadataMap.find(*metadataName);
if (metadata != builtMetadataMap.end()) {
auto &[name, constructedMetadata] = *metadata;
if (!constructedMetadata)
return *constructedMetadata.getError();
LOG(LogLevel::Detail, "Found metadata we already built for '%s'",
symbolName.str().c_str());
auto constructedMetadata = std::get<1>(*metadata);
return constructedMetadata.data.offsetBy(constructedMetadata.offset)
return constructedMetadata->data.offsetBy(constructedMetadata->offset)
.template cast<const Metadata>();
}
@@ -1581,9 +1587,14 @@ ExternalGenericMetadataBuilderContext<Runtime>::metadataForNode(
"'%s', but it was not in the map",
metadataName->c_str());
auto constructedMetadataFromMap = std::get<1>(*metadataFromMap);
if (!constructedMetadataFromMap)
return BuilderError("Metadata construction for '%s' indicated success, but "
"the map contains an error: %s",
metadataName->c_str(),
constructedMetadataFromMap.getError()->cStr());
assert((*constructedMetadata)->data.ptr ==
constructedMetadataFromMap.data.ptr);
assert((*constructedMetadata)->offset == constructedMetadataFromMap.offset);
constructedMetadataFromMap->data.ptr);
assert((*constructedMetadata)->offset == constructedMetadataFromMap->offset);
return (*constructedMetadata)
->data.offsetBy((*constructedMetadata)->offset)
@@ -1591,7 +1602,7 @@ ExternalGenericMetadataBuilderContext<Runtime>::metadataForNode(
}
template <typename Runtime>
std::optional<std::pair<MachOFile *, llvm::object::SymbolRef>>
std::optional<std::pair<MachOFile *, Export>>
ExternalGenericMetadataBuilderContext<Runtime>::findSymbol(
llvm::StringRef toFind, MachOFile *searchFile) {
llvm::SmallString<128> prefixed;
@@ -1627,6 +1638,19 @@ ExternalGenericMetadataBuilderContext<Runtime>::findSectionInFile(
return {};
}
template <typename Runtime>
std::optional<Segment>
ExternalGenericMetadataBuilderContext<Runtime>::findSegmentInFile(
MachOFile *file, uint64_t address) {
// A brute force search isn't the most efficient, but we typically have only
// a handful of segments.
for (auto &segment : file->segments) {
if (segment.address <= address && address < segment.address + segment.size)
return {segment};
}
return {};
}
template <typename Runtime>
std::optional<MachOFile *>
ExternalGenericMetadataBuilderContext<Runtime>::findPointerInFiles(
@@ -1654,10 +1678,12 @@ void ExternalGenericMetadataBuilderContext<Runtime>::build() {
// Process all input symbmols.
for (auto mangledTypeName : mangledNamesToBuild) {
auto result = constructMetadataForMangledTypeName(mangledTypeName);
LOG(LogLevel::Info, "Processing JSON requested metadata for %s",
mangledTypeName.c_str());
auto result = ensureMetadataForMangledTypeName(mangledTypeName);
if (auto *error = result.getError())
fprintf(stderr, "Could not construct metadata for '%s': %s\n",
mangledTypeName.c_str(), error->cStr());
LOG(LogLevel::Warning, "Could not construct metadata for '%s': %s\n",
mangledTypeName.c_str(), error->cStr());
}
auto metadataMap = serializeMetadataMapTable();
@@ -1668,8 +1694,14 @@ void ExternalGenericMetadataBuilderContext<Runtime>::build() {
topLevelData.ptr->majorVersion = PrespecializedData::currentMajorVersion;
topLevelData.ptr->minorVersion = PrespecializedData::currentMinorVersion;
topLevelData.writePointer(&topLevelData.ptr->metadataMap,
metadataMap.template cast<const void>());
auto result = topLevelData.writePointer(
&topLevelData.ptr->metadataMap, metadataMap.template cast<const void>());
if (!result) {
fprintf(stderr,
"Fatal error: could not write pointer to top level data: %s\n",
result.getError()->cStr());
abort();
}
}
template <typename Runtime>
@@ -1773,56 +1805,51 @@ void ExternalGenericMetadataBuilderContext<Runtime>::populateMachOSymbols(
}
}
// Find all the symbols.
for (auto &sym : file->objectFile->symbols()) {
auto type = sym.getType();
if (!type)
continue;
if (type.get() != llvm::object::SymbolRef::ST_Data &&
type.get() != llvm::object::SymbolRef::ST_Function)
continue;
auto name = sym.getName();
if (!name) {
consumeError(name.takeError());
continue;
}
auto flags = sym.getFlags();
if (!flags) {
consumeError(flags.takeError());
continue;
}
auto [iterator, didInsert] = file->symbols.try_emplace(*name, sym);
if (!didInsert) {
LOG(LogLevel::Detail, "Duplicate symbol name %s in %s",
name->str().c_str(), file->path.c_str());
}
file->namedSymbolsSortedByAddress.push_back(sym);
allSymbols.insert({*name, {file, sym}});
if (*flags & llvm::object::SymbolRef::Flags::SF_Exported) {
file->exportedSymbolsSortedByAddress.push_back(sym);
// Find all the segments.
for (auto &command : file->objectFile->load_commands()) {
auto addSegment = [&](const auto &segmentCommand) {
Segment segment{segmentCommand.segname, segmentCommand.vmaddr,
segmentCommand.vmsize};
if (segment.name == "__TEXT")
file->baseAddress = segment.address;
file->segments.push_back(segment);
};
if (command.C.cmd == llvm::MachO::LC_SEGMENT_64) {
auto segmentCommand = file->objectFile->getSegment64LoadCommand(command);
addSegment(segmentCommand);
} else if (command.C.cmd == llvm::MachO::LC_SEGMENT) {
auto segmentCommand = file->objectFile->getSegmentLoadCommand(command);
addSegment(segmentCommand);
}
}
auto compareAddresses = [&](const llvm::object::SymbolRef &a,
const llvm::object::SymbolRef &b) {
auto aAddr = a.getAddress();
if (!aAddr)
return false;
auto bAddr = b.getAddress();
if (!bAddr)
return true;
// Find all the exported symbols.
llvm::Error err = llvm::Error::success();
for (auto &ex : file->objectFile->exports(err)) {
// Just in case there's somehow a symbol with a name that's not valid UTF-8,
// ignore it, since we can't represent it in our JSON output.
if (!llvm::json::isUTF8(ex.name()))
continue;
return *aAddr < *bAddr;
Export convertedExport{ex.name().str(), ex.address() + file->baseAddress};
file->exportsSortedByAddress.push_back(convertedExport);
allSymbols.insert({ex.name(), {file, convertedExport}});
auto [iterator, didInsert] =
file->symbols.try_emplace(ex.name(), convertedExport);
if (!didInsert) {
LOG(LogLevel::Detail, "Duplicate symbol name %s in %s",
ex.name().str().c_str(), file->path.c_str());
}
}
if (err)
consumeError(std::move(err));
auto compareAddresses = [&](Export &a, Export &b) {
return a.address < b.address;
};
std::sort(file->namedSymbolsSortedByAddress.begin(),
file->namedSymbolsSortedByAddress.end(), compareAddresses);
std::sort(file->exportedSymbolsSortedByAddress.begin(),
file->exportedSymbolsSortedByAddress.end(), compareAddresses);
std::sort(file->exportsSortedByAddress.begin(),
file->exportsSortedByAddress.end(), compareAddresses);
LOG(LogLevel::Info, "%s: populated %zu linked libraries and %u symbols",
file->path.c_str(), file->libraryNames.size(), file->symbols.size());
@@ -1980,16 +2007,17 @@ BuilderErrorOr<std::string> ExternalGenericMetadataBuilderContext<Runtime>::_man
}
template <typename Runtime>
BuilderErrorOr<std::optional<typename ExternalGenericMetadataBuilderContext<
Runtime>::Builder::ConstructedMetadata>>
ExternalGenericMetadataBuilderContext<
Runtime>::constructMetadataForMangledTypeName(llvm::StringRef typeName) {
BuilderErrorOr<std::monostate> ExternalGenericMetadataBuilderContext<
Runtime>::ensureMetadataForMangledTypeName(llvm::StringRef typeName) {
auto node = demangleCtx.demangleTypeAsNode(typeName);
if (!node) {
return BuilderError("Failed to demangle '%s'.", typeName.str().c_str());
}
LOG(LogLevel::Detail, "Result: %s", nodeToString(node).c_str());
return constructMetadataForNode(node);
auto result = metadataForNode(node);
if (!result)
return *result.getError();
return {{}};
}
// Returns the constructed metadata, or no value if the node doesn't contain a
@@ -2081,6 +2109,7 @@ ExternalGenericMetadataBuilderContext<Runtime>::constructMetadataForNode(
LOG(LogLevel::Detail, "pattern: %p", pattern.ptr);
// Compute extra data size.
auto maybeExtraDataSize =
builder->extraDataSize(typeDescriptor, pattern.toConst());
if (auto *error = maybeExtraDataSize.getError()) {
@@ -2089,9 +2118,17 @@ ExternalGenericMetadataBuilderContext<Runtime>::constructMetadataForNode(
auto extraDataSize = *maybeExtraDataSize.getValue();
LOG(LogLevel::Detail, "extraDataSize: %zu", extraDataSize);
// Get the canonical name for this metadata.
Demangle::Demangler Dem;
auto mangledName = _standardMangledNameForNode(node, Dem);
if (!mangledName)
return *mangledName.getError();
// Build the metadata.
auto maybeMetadata = builder->buildGenericMetadata(
typeDescriptor, genericTypeMetadatas, pattern.toConst(), extraDataSize);
if (auto *error = maybeMetadata.getError()) {
builtMetadataMap.emplace(*mangledName, *error);
return BuilderError("Failed to build metadata '%s': %s",
getNodeTreeAsString(node).c_str(), error->cStr());
}
@@ -2099,20 +2136,22 @@ ExternalGenericMetadataBuilderContext<Runtime>::constructMetadataForNode(
LOG(LogLevel::Detail, "metadata: %p", metadata.data.ptr);
Demangle::Demangler Dem;
auto mangledName = _standardMangledNameForNode(node, Dem);
if (!mangledName)
return *mangledName.getError();
// Place the built metadata in the map.
metadata.data.setName(*mangledName);
builtMetadataMap.emplace(*mangledName, metadata);
BuilderErrorOr<const typename Builder::ConstructedMetadata> mapValue{
metadata};
builtMetadataMap.emplace(*mangledName, mapValue);
// Initialize the built metadata.
auto initializeResult =
builder->initializeGenericMetadata(metadata.data, node);
if (auto *error = initializeResult.getError()) {
builtMetadataMap.erase(*mangledName);
return BuilderError("Failed to build metadata '%s': %s",
getNodeTreeAsString(node).c_str(), error->cStr());
builtMetadataMap.emplace(*mangledName, *error);
auto resultError = BuilderError("Failed to build metadata '%s': %s",
nodeToString(node).c_str(), error->cStr());
LOG(LogLevel::Warning, "%s", resultError.cStr());
return resultError;
}
auto optionalMetadata = std::optional{metadata};
@@ -2124,7 +2163,24 @@ template <typename Runtime>
typename ExternalGenericMetadataBuilderContext<Runtime>::template Buffer<char>
ExternalGenericMetadataBuilderContext<Runtime>::serializeMetadataMapTable(
void) {
auto numEntries = builtMetadataMap.size();
// Build a sorted vector of the constructed metadatas. Skip all entries that
// contain an error.
std::vector<const std::pair<const std::string,
BuilderErrorOr<const ConstructedMetadata>> *>
sortedMapElements;
sortedMapElements.reserve(builtMetadataMap.size());
for (auto &entry : builtMetadataMap) {
auto &[name, metadata] = entry;
if (metadata)
sortedMapElements.push_back(&entry);
}
std::sort(sortedMapElements.begin(), sortedMapElements.end(),
[](const auto *a, const auto *b) {
return std::get<0>(*a) < std::get<0>(*b);
});
auto numEntries = sortedMapElements.size();
// Array size must be at least numEntries+1 per the requirements of
// PrebuiltStringMap. Aim for a 75% load factor.
@@ -2137,19 +2193,6 @@ ExternalGenericMetadataBuilderContext<Runtime>::serializeMetadataMapTable(
auto mapData = allocate<char>(byteSize);
auto serializedMap = new (mapData.ptr) Map(arraySize);
std::vector<const std::pair<const std::string, const ConstructedMetadata> *>
sortedMapElements;
sortedMapElements.reserve(numEntries);
for (auto &entry : builtMetadataMap)
sortedMapElements.push_back(&entry);
std::sort(
sortedMapElements.begin(), sortedMapElements.end(),
[](const std::pair<const std::string, const ConstructedMetadata> *a,
const std::pair<const std::string, const ConstructedMetadata> *b) {
return std::get<0>(*a) < std::get<0>(*b);
});
for (auto *entry : sortedMapElements) {
auto [key, value] = *entry;
auto *elementPtr = serializedMap->insert(key.c_str());
@@ -2161,10 +2204,24 @@ ExternalGenericMetadataBuilderContext<Runtime>::serializeMetadataMapTable(
auto stringData = allocate<char>(key.size() + 1);
memcpy(stringData.ptr, key.c_str(), key.size());
stringData.setName("_cstring_" + key);
mapData.writePointer(&elementPtr->key, stringData);
auto result = mapData.writePointer(&elementPtr->key, stringData);
if (!result) {
fprintf(
stderr,
"Fatal error: could not write pointer to metadata table name: %s\n",
result.getError()->cStr());
abort();
}
auto metadataPointer = value.data.offsetBy(value.offset);
mapData.writePointer(&elementPtr->value, metadataPointer);
auto metadataPointer = value->data.offsetBy(value->offset);
result = mapData.writePointer(&elementPtr->value, metadataPointer);
if (!result) {
fprintf(
stderr,
"Fatal error: could not write pointer to metadata table entry: %s\n",
result.getError()->cStr());
abort();
}
}
mapData.setName("_swift_prespecializedMetadataMap");
@@ -2181,7 +2238,6 @@ void ExternalGenericMetadataBuilderContext<Runtime>::writeAtomContentsJSON(
llvm::json::OStream &J,
const typename ExternalGenericMetadataBuilderContext<Runtime>::Atom &atom,
const SymbolCallback &symbolCallback) {
const char *ptrTargetKind = sizeof(StoredPointer) == 8 ? "ptr64" : "ptr32";
// Maintain cursors in the atom's buffer and targets. In a loop, we write out
// any buffer contents between the current point and the next target, then we
@@ -2204,61 +2260,33 @@ void ExternalGenericMetadataBuilderContext<Runtime>::writeAtomContentsJSON(
J.object([&] {
if (auto *fileTarget =
std::get_if<FileTarget>(&targetsCursor->fileOrAtom)) {
std::string foundSymbolName = "";
uint64_t foundSymbolAddress = 0;
symbolCallback(fileTarget->file, fileTarget->nearestExport->name);
auto foundSymbol = readerWriter->getNearestExportedSymbol(
fileTarget->file, fileTarget->addressInFile);
auto addend = (int64_t)fileTarget->addressInFile -
(int64_t)fileTarget->nearestExport->address;
if (foundSymbol) {
if (auto name = foundSymbol->getName()) {
if (auto address = foundSymbol->getAddress()) {
foundSymbolName = *name;
foundSymbolAddress = *address;
}
}
}
if (foundSymbolName == "") {
auto section =
findSectionInFile(fileTarget->file, fileTarget->addressInFile);
if (section) {
auto name = section->getName();
if (name) {
const char *segment = section->isText() ? "__TEXT" : "__DATA";
foundSymbolName = "$dylib_segment$" + fileTarget->file->path +
"$start$" + segment + "$" + name->str();
}
}
}
if (foundSymbolName == "") {
// Couldn't find a symbol or a section, this really shouldn't happen.
foundSymbolName =
"$dylib_segment$" + fileTarget->file->path + "$start$__TEXT";
foundSymbolAddress = 0;
}
symbolCallback(fileTarget->file, foundSymbolName);
J.attribute("target", foundSymbolName);
J.attribute("addend", (int64_t)fileTarget->addressInFile -
(int64_t)foundSymbolAddress);
J.attribute("kind", ptrTargetKind);
J.attribute("target", fileTarget->nearestExport->name);
J.attribute("addend", addend);
} else {
auto atomTarget = std::get<AtomTarget>(targetsCursor->fileOrAtom);
J.attribute("self", true);
J.attribute("target", "_" + atomTarget.atom->name);
J.attribute("addend", atomTarget.offset);
J.attribute("kind", ptrTargetKind);
}
const char *ptrTargetKind =
sizeof(StoredPointer) == 8 ? "ptr64" : "ptr32";
if (usePtrauth && targetsCursor->ptrauthKey != PtrauthKey::None) {
J.attributeObject("authPtr", [&] {
J.attribute("key", static_cast<uint8_t>(targetsCursor->ptrauthKey));
J.attribute("addr", targetsCursor->addressDiversified);
J.attribute("diversity", targetsCursor->discriminator);
});
ptrTargetKind = "arm64_auth_ptr";
}
J.attribute("kind", ptrTargetKind);
});
bufferCursor = targetsCursor->offset + targetsCursor->size;

View File

@@ -136,42 +136,52 @@ public:
/// abstracted is needed for out-of-process work.
template <typename U>
void writePointer(StoredPointer *to, Buffer<U> target) {
BuilderErrorOr<std::monostate> writePointer(StoredPointer *to,
Buffer<U> target) {
checkPtr(to);
*to = reinterpret_cast<StoredPointer>(target.ptr);
return {{}};
}
template <typename U>
void writePointer(U **to, Buffer<U> target) {
BuilderErrorOr<std::monostate> writePointer(U **to, Buffer<U> target) {
checkPtr(to);
*to = target.ptr;
return {{}};
}
template <typename U>
void writePointer(const U **to, Buffer<U> target) {
BuilderErrorOr<std::monostate> writePointer(const U **to,
Buffer<U> target) {
checkPtr(to);
*to = target.ptr;
return {{}};
}
void writePointer(const Metadata **to, GenericArgument target) {
BuilderErrorOr<std::monostate> writePointer(const Metadata **to,
GenericArgument target) {
checkPtr(to);
*to = reinterpret_cast<const Metadata *>(target);
return {{}};
}
template <typename To, typename From>
void writePointer(To *to, Buffer<From> target) {
BuilderErrorOr<std::monostate> writePointer(To *to, Buffer<From> target) {
checkPtr((void *)to);
*to = target.ptr;
return {{}};
}
template <typename U>
void writeFunctionPointer(U *to, Buffer<const char> target) {
BuilderErrorOr<std::monostate>
writeFunctionPointer(U *to, Buffer<const char> target) {
checkPtr((void *)to);
// This weird double cast handles the case where the function pointer
// type has a custom __ptrauth attribute, which the compiler doesn't like
// casting to.
auto castTarget = (const decltype(&**to))(void *)target.ptr;
*to = castTarget;
return {{}};
}
};

View File

@@ -27,6 +27,11 @@ public struct GenericField<T, U> {
var int: Int
}
// The protocol conformance puts a symbol into __DATA_CONST which the builder
// can use as the base symbol for references to other data.
public protocol PublicProto {}
extension GenericStruct: PublicProto {}
ExternalGenericMetadataBuilderTests.test("JSON output") {
let builder = swift_externalMetadataBuilder_create(1, "arm64")
@@ -206,22 +211,22 @@ var expectedJSON: String {
"contents": [
{
"target": "_$s27ExternalMetadataBuilderTest12GenericFieldVMa",
"addend": 1032,
"addend": 432,
"kind": "ptr64"
},
{
"target": "_$s27ExternalMetadataBuilderTest12GenericFieldVMa",
"addend": 1376,
"addend": 776,
"kind": "ptr64"
},
{
"target": "_$s27ExternalMetadataBuilderTest12GenericFieldVMa",
"addend": 1536,
"addend": 936,
"kind": "ptr64"
},
{
"target": "_$s27ExternalMetadataBuilderTest12GenericFieldVMa",
"addend": 1768,
"addend": 1168,
"kind": "ptr64"
},
{
@@ -231,17 +236,17 @@ var expectedJSON: String {
},
{
"target": "_$s27ExternalMetadataBuilderTest12GenericFieldVMa",
"addend": 2200,
"addend": 1600,
"kind": "ptr64"
},
{
"target": "_$s27ExternalMetadataBuilderTest12GenericFieldVMa",
"addend": 2420,
"addend": 1820,
"kind": "ptr64"
},
{
"target": "_$s27ExternalMetadataBuilderTest12GenericFieldVMa",
"addend": 2864,
"addend": 2264,
"kind": "ptr64"
},
"2000000000000000200000000000000007000300FFFFFF7F"
@@ -253,22 +258,22 @@ var expectedJSON: String {
"contents": [
{
"target": "_$s27ExternalMetadataBuilderTest12GenericFieldVMa",
"addend": 3520,
"addend": 2920,
"kind": "ptr64"
},
{
"target": "_$s27ExternalMetadataBuilderTest12GenericFieldVMa",
"addend": 3892,
"addend": 3292,
"kind": "ptr64"
},
{
"target": "_$s27ExternalMetadataBuilderTest12GenericFieldVMa",
"addend": 4052,
"addend": 3452,
"kind": "ptr64"
},
{
"target": "_$s27ExternalMetadataBuilderTest12GenericFieldVMa",
"addend": 4312,
"addend": 3712,
"kind": "ptr64"
},
{
@@ -278,17 +283,17 @@ var expectedJSON: String {
},
{
"target": "_$s27ExternalMetadataBuilderTest12GenericFieldVMa",
"addend": 4796,
"addend": 4196,
"kind": "ptr64"
},
{
"target": "_$s27ExternalMetadataBuilderTest12GenericFieldVMa",
"addend": 5044,
"addend": 4444,
"kind": "ptr64"
},
{
"target": "_$s27ExternalMetadataBuilderTest12GenericFieldVMa",
"addend": 5160,
"addend": 4560,
"kind": "ptr64"
},
"2800000000000000280000000000000007000300FFFFFF7F"

View File

@@ -14,6 +14,7 @@ struct Ptrauth: Decodable {
}
struct Fixup: Decodable {
var kind: String
var target: String
var addend: Int64
var authPtr: Ptrauth?
@@ -108,7 +109,7 @@ func printDeclarations(_ document: Document) {
print(" uint8_t \(name)[\(bytes.count)];")
case .pointer(let fixup):
let ptrauthQualifier: String
if let ptrauth = fixup.authPtr {
if fixup.kind == "arm64_auth_ptr", let ptrauth = fixup.authPtr {
ptrauthQualifier = """
__ptrauth(\(ptrauth.key),
\(ptrauth.addr ? 1 : 0),

View File

@@ -3,7 +3,6 @@
// RUN: %target-codesign %t/VerifyExternalMetadata
//
// RUN: %host-build-swift -Xfrontend -disable-availability-checking -I %swift-lib-dir -I %swift_src_root/lib/ExternalGenericMetadataBuilder -L%swift-lib-dir -lswiftGenericMetadataBuilder -Xlinker -rpath -Xlinker %swift-lib-dir -enable-experimental-feature Extern %S/Inputs/buildMetadataJSON.swift -o %t/buildMetadataJSON
// no: %target-build-swift -I %swift-lib-dir -I %swift_src_root/lib/ExternalGenericMetadataBuilder -L%swift-lib-dir -lswiftGenericMetadataBuilder -enable-experimental-feature Extern %S/Inputs/buildMetadataJSON.swift -o %t/buildMetadataJSON
// RUN: %target-codesign %t/buildMetadataJSON
//
// RUN: %target-build-swift -Xfrontend -disable-availability-checking %S/Inputs/json2c.swift -o %t/json2c
@@ -14,8 +13,10 @@
// RUN: %target-run %t/json2c %t/libswiftPrespecialized.json > %t/libswiftPrespecialized.c
// RUN: %clang -isysroot %sdk -target %target-triple -bundle %t/libswiftPrespecialized.c -L%stdlib_dir -lswiftCore -bundle_loader %t/VerifyExternalMetadata -o %t/libswiftPrespecialized.bundle
//
//
// RUN: env SWIFT_DEBUG_ENABLE_LIB_PRESPECIALIZED_LOGGING=y SWIFT_DEBUG_LIB_PRESPECIALIZED_PATH=%t/libswiftPrespecialized.bundle %target-run %t/VerifyExternalMetadata
// Set a custom library path because we need to ensure we don't load an arm64e
// dylib into an arm64 test, since the prespecialized metadata depends on the
// exact contents of the library.
// RUN: env SWIFT_DEBUG_ENABLE_LIB_PRESPECIALIZED_LOGGING=y SWIFT_DEBUG_LIB_PRESPECIALIZED_PATH=%t/libswiftPrespecialized.bundle %target-run env DYLD_LIBRARY_PATH=%stdlib_dir/%target-arch %t/VerifyExternalMetadata
// REQUIRES: executable_test
// REQUIRES: OS=macosx && CPU=arm64
@@ -36,6 +37,21 @@ public struct GenericField<T, U> {
var int: Int
}
public struct Box<T> {
var field: T
}
public struct Box3<T, U, V> {
var field1: T
var field2: U
var field3: V
}
// The protocol conformance puts a symbol into __DATA_CONST which the builder
// can use as the base symbol for references to other data.
public protocol PublicProto {}
extension Box3: PublicProto {}
let args = CommandLine.arguments
if args.count > 1 && args[1] == "getJSON" {
@@ -44,6 +60,9 @@ if args.count > 1 && args[1] == "getJSON" {
GenericField<GenericField<Int8, Int16>,
Array<GenericStruct<Double, String, Float>>>.self,
Array<Array<Array<Array<Array<Array<Array<Double>>>>>>>.self,
Box<Int>.self,
Box<String>.self,
Box3<Int, Int, Int>.self,
]
let typeNames = types.map { _mangledTypeName($0)! }
let jsonNames = typeNames.map { [ "name": $0 ] }