mirror of
https://github.com/apple/swift.git
synced 2025-12-14 20:36:38 +01:00
This change adds a new type of cache (cache by type descriptor) to the protocol conformance lookup system. This optimization is beneficial for generic types, where the same conformance can be reused across different instantiations of the generic type. Key changes: - Add a `GetOrInsertManyScope` class to `ConcurrentReadableHashMap` for performing multiple insertions under a single lock - Add type descriptor-based caching for protocol conformances - Add environment variables for controlling and debugging the conformance cache - Add tests to verify the behavior of the conformance cache - Fix for https://github.com/swiftlang/swift/issues/82889 The implementation is controlled by the `SWIFT_DEBUG_ENABLE_CACHE_PROTOCOL_CONFORMANCES_BY_TYPE_DESCRIPTOR` environment variable, which is enabled by default. This reapplies https://github.com/swiftlang/swift/pull/82818 after it's been reverted in https://github.com/swiftlang/swift/pull/83770.
915 lines
35 KiB
C++
915 lines
35 KiB
C++
//===--- Concurrent.h - Concurrent Data Structures -------------*- C++ -*-===//
|
|
//
|
|
// This source file is part of the Swift.org open source project
|
|
//
|
|
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
|
|
// Licensed under Apache License v2.0 with Runtime Library Exception
|
|
//
|
|
// See https://swift.org/LICENSE.txt for license information
|
|
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
#ifndef SWIFT_RUNTIME_CONCURRENTUTILS_H
|
|
#define SWIFT_RUNTIME_CONCURRENTUTILS_H
|
|
#include "Atomic.h"
|
|
#include "Debug.h"
|
|
#include "swift/Threading/Mutex.h"
|
|
#include "llvm/ADT/Hashing.h"
|
|
#include "llvm/Support/Allocator.h"
|
|
#include <algorithm>
|
|
#include <atomic>
|
|
#include <functional>
|
|
#include <iterator>
|
|
#include <stdint.h>
|
|
#include <vector>
|
|
|
|
#if defined(__FreeBSD__) || defined(__CYGWIN__) || defined(__HAIKU__)
|
|
#include <stdio.h>
|
|
#endif
|
|
|
|
#if defined(__APPLE__) && defined(__MACH__)
|
|
#include <malloc/malloc.h>
|
|
#endif
|
|
|
|
namespace swift {
|
|
|
|
/// A simple linked list representing pointers that need to be freed. This is
|
|
/// not a concurrent data structure, just a bit of support used in the types
|
|
/// below.
|
|
struct ConcurrentFreeListNode {
|
|
ConcurrentFreeListNode *Next;
|
|
void *Ptr;
|
|
|
|
static void add(ConcurrentFreeListNode **head, void *ptr) {
|
|
auto *newNode = reinterpret_cast<ConcurrentFreeListNode *>(
|
|
malloc(sizeof(ConcurrentFreeListNode)));
|
|
newNode->Next = *head;
|
|
newNode->Ptr = ptr;
|
|
*head = newNode;
|
|
}
|
|
|
|
/// Free all nodes in the free list, resetting `head` to `NULL`. Calls
|
|
/// `FreeFn` on the Ptr field of every node.
|
|
template <typename FreeFn>
|
|
static void freeAll(ConcurrentFreeListNode **head, const FreeFn &freeFn) {
|
|
auto *node = *head;
|
|
while (node) {
|
|
auto *next = node->Next;
|
|
freeFn(node->Ptr);
|
|
free(node);
|
|
node = next;
|
|
}
|
|
*head = nullptr;
|
|
}
|
|
};
|
|
|
|
/// An append-only array that can be read without taking locks. Writes
|
|
/// are still locked and serialized, but only with respect to other
|
|
/// writes.
|
|
template <class ElemTy> struct ConcurrentReadableArray {
|
|
private:
|
|
/// The struct used for the array's storage. The `Elem` member is
|
|
/// considered to be the first element of a variable-length array,
|
|
/// whose size is determined by the allocation. The `Capacity` member
|
|
/// from `ConcurrentReadableArray` indicates how large it can be.
|
|
struct Storage {
|
|
std::atomic<size_t> Count;
|
|
typename std::aligned_storage<sizeof(ElemTy), alignof(ElemTy)>::type Elem;
|
|
|
|
static Storage *allocate(size_t capacity) {
|
|
auto size = sizeof(Storage) + (capacity - 1) * sizeof(Storage().Elem);
|
|
auto *ptr = reinterpret_cast<Storage *>(malloc(size));
|
|
if (!ptr) swift::crash("Could not allocate memory.");
|
|
ptr->Count.store(0, std::memory_order_relaxed);
|
|
return ptr;
|
|
}
|
|
|
|
void deallocate() {
|
|
for (size_t i = 0; i < Count; ++i) {
|
|
data()[i].~ElemTy();
|
|
}
|
|
free(this);
|
|
}
|
|
|
|
ElemTy *data() {
|
|
return reinterpret_cast<ElemTy *>(&Elem);
|
|
}
|
|
};
|
|
|
|
size_t Capacity;
|
|
std::atomic<size_t> ReaderCount;
|
|
std::atomic<Storage *> Elements;
|
|
Mutex WriterLock;
|
|
ConcurrentFreeListNode *FreeList{nullptr};
|
|
|
|
void incrementReaders() {
|
|
ReaderCount.fetch_add(1, std::memory_order_acquire);
|
|
}
|
|
|
|
void decrementReaders() {
|
|
ReaderCount.fetch_sub(1, std::memory_order_release);
|
|
}
|
|
|
|
void deallocateFreeList() {
|
|
ConcurrentFreeListNode::freeAll(&FreeList, [](void *ptr) {
|
|
reinterpret_cast<Storage *>(ptr)->deallocate();
|
|
});
|
|
}
|
|
|
|
public:
|
|
struct Snapshot {
|
|
ConcurrentReadableArray *Array;
|
|
const ElemTy *Start;
|
|
size_t Count;
|
|
|
|
Snapshot(ConcurrentReadableArray *array, const ElemTy *start, size_t count)
|
|
: Array(array), Start(start), Count(count) {}
|
|
|
|
Snapshot(const Snapshot &other)
|
|
: Array(other.Array), Start(other.Start), Count(other.Count) {
|
|
Array->incrementReaders();
|
|
}
|
|
|
|
~Snapshot() {
|
|
Array->decrementReaders();
|
|
}
|
|
|
|
// These are marked as ref-qualified (the &) to make sure they can't be
|
|
// called on temporaries, since the temporary would be destroyed before the
|
|
// return value can be used, making it invalid.
|
|
const ElemTy *begin() const& { return Start; }
|
|
const ElemTy *end() const& { return Start + Count; }
|
|
const ElemTy& operator [](size_t index) const& {
|
|
assert(index < count() && "out-of-bounds access to snapshot element");
|
|
return Start[index];
|
|
}
|
|
|
|
size_t count() const { return Count; }
|
|
};
|
|
|
|
// This type cannot be safely copied or moved.
|
|
ConcurrentReadableArray(const ConcurrentReadableArray &) = delete;
|
|
ConcurrentReadableArray(ConcurrentReadableArray &&) = delete;
|
|
ConcurrentReadableArray &operator=(const ConcurrentReadableArray &) = delete;
|
|
|
|
ConcurrentReadableArray() : Capacity(0), ReaderCount(0), Elements(nullptr) {}
|
|
|
|
~ConcurrentReadableArray() {
|
|
assert(ReaderCount.load(std::memory_order_acquire) == 0 &&
|
|
"deallocating ConcurrentReadableArray with outstanding snapshots");
|
|
deallocateFreeList();
|
|
}
|
|
|
|
void push_back(const ElemTy &elem) {
|
|
Mutex::ScopedLock guard(WriterLock);
|
|
|
|
auto *storage = Elements.load(std::memory_order_relaxed);
|
|
auto count = storage ? storage->Count.load(std::memory_order_relaxed) : 0;
|
|
if (count >= Capacity) {
|
|
auto newCapacity = std::max((size_t)16, count * 2);
|
|
auto *newStorage = Storage::allocate(newCapacity);
|
|
if (storage) {
|
|
std::uninitialized_copy_n(storage->data(), count, newStorage->data());
|
|
newStorage->Count.store(count, std::memory_order_release);
|
|
ConcurrentFreeListNode::add(&FreeList, storage);
|
|
}
|
|
|
|
storage = newStorage;
|
|
Capacity = newCapacity;
|
|
|
|
Elements.store(storage, std::memory_order_release);
|
|
}
|
|
|
|
new(&storage->data()[count]) ElemTy(elem);
|
|
storage->Count.store(count + 1, std::memory_order_release);
|
|
|
|
// The standard says that std::memory_order_seq_cst only applies to
|
|
// read-modify-write operations, so we need an explicit fence:
|
|
std::atomic_thread_fence(std::memory_order_seq_cst);
|
|
if (ReaderCount.load(std::memory_order_relaxed) == 0)
|
|
deallocateFreeList();
|
|
}
|
|
|
|
Snapshot snapshot() {
|
|
incrementReaders();
|
|
auto *storage = Elements.load(SWIFT_MEMORY_ORDER_CONSUME);
|
|
if (storage == nullptr) {
|
|
return Snapshot(this, nullptr, 0);
|
|
}
|
|
|
|
auto count = storage->Count.load(std::memory_order_acquire);
|
|
const auto *ptr = storage->data();
|
|
return Snapshot(this, ptr, count);
|
|
}
|
|
};
|
|
|
|
using llvm::hash_value;
|
|
|
|
/// A hash table that can be queried without taking any locks. Writes are still
|
|
/// locked and serialized, but only with respect to other locks. Writers can add
|
|
/// elements and clear the table, but they cannot remove individual elements.
|
|
/// Readers work by taking a snapshot of the table and then querying that
|
|
/// snapshot.
|
|
///
|
|
/// The basic structure of the table consists of two arrays. Elements are stored
|
|
/// in a contiguous array, with new elements appended to the end. The second
|
|
/// array is the actual hash table, and it contains indices into the elements
|
|
/// array. This scheme cuts down on wasted space when the elements are larger
|
|
/// than a few bytes: instead of wasting `(1 - loadFactor) * sizeof(element)`
|
|
/// bytes on unused space in the hash table, we only waste `(1 - loadFactor) *
|
|
/// sizeof(index)`. This scheme also avoids readers seeing partially constructed
|
|
/// elements.
|
|
///
|
|
/// Reader/writer synchronization for new elements is handled by keeping an
|
|
/// element count which is only incremented when the element has been fully
|
|
/// constructed. A reader which sees an index beyond its view of the current
|
|
/// count will ignore it and treat that as if there was no entry.
|
|
///
|
|
/// Reader/writer synchronization for resizing the arrays is handled by tracking
|
|
/// the current number of active readers. When resizing, the new array is
|
|
/// allocated, the data copied, and then the old array is placed in a free list.
|
|
/// The free list is only deallocated if there are no readers, otherwise freeing
|
|
/// is deferred.
|
|
///
|
|
/// Reader/writer synchronization for clearing the table is a combination of the
|
|
/// above. By keeping the old arrays around until all readers are finished, we
|
|
/// ensure that readers which started before the clear see valid (pre-clear)
|
|
/// data. Readers which see any array as empty will produce no results, thus
|
|
/// providing valid post-clear data.
|
|
///
|
|
/// This is intended to be used for tables that exist for the life of the
|
|
/// process. It has no destructor, to avoid generating useless global destructor
|
|
/// calls. The memory it allocates can be freed by calling clear() with no
|
|
/// outstanding readers, but this won't destroy the static mutex it uses.
|
|
template <class ElemTy, class MutexTy = LazyMutex>
|
|
struct ConcurrentReadableHashMap {
|
|
// We don't call destructors. Make sure the elements will put up with this.
|
|
static_assert(std::is_trivially_destructible<ElemTy>::value,
|
|
"Elements must not have destructors (they won't be called).");
|
|
|
|
private:
|
|
/// The reciprocal of the load factor at which we expand the table. A value of
|
|
/// 4 means that we resize at 1/4 = 75% load factor.
|
|
static const size_t ResizeProportion = 4;
|
|
|
|
/// Get the "good size" for a given allocation size. When available, this
|
|
/// rounds up to the next allocation quantum by calling `malloc_good_size`.
|
|
/// Otherwise, just return the passed-in size, which is always valid even if
|
|
/// not necessarily optimal.
|
|
static size_t goodSize(size_t size) {
|
|
#if defined(__APPLE__) && defined(__MACH__) && SWIFT_STDLIB_HAS_DARWIN_LIBMALLOC
|
|
return malloc_good_size(size);
|
|
#else
|
|
return size;
|
|
#endif
|
|
}
|
|
|
|
/// A private class representing the storage of the indices. In order to
|
|
/// ensure that readers can get a consistent view of the indices with a single
|
|
/// atomic read, we store the size of the indices array inline, as the first
|
|
/// element in the array.
|
|
///
|
|
/// We want the number of indices to be a power of two so that we can use a
|
|
/// bitwise AND to convert a hash code to an index. We want the entire array
|
|
/// to be a power of two in size to be friendly to the allocator, but the size
|
|
/// is stored inline. We work around this contradiction by considering the
|
|
/// first index to always be occupied with a value that never matches any key.
|
|
struct IndexStorage {
|
|
using RawType = uintptr_t;
|
|
|
|
RawType Value;
|
|
|
|
static constexpr uintptr_t log2(uintptr_t x) {
|
|
return x <= 1 ? 0 : log2(x >> 1) + 1;
|
|
}
|
|
|
|
// A crude way to detect trivial use-after-free bugs given that a lot of
|
|
// data structure have a strong bias toward bits that are zero.
|
|
#ifndef NDEBUG
|
|
static constexpr uint8_t InlineCapacityDebugBits = 0xC0;
|
|
#else
|
|
static constexpr uint8_t InlineCapacityDebugBits = 0;
|
|
#endif
|
|
static constexpr uintptr_t InlineIndexBits = 4;
|
|
static constexpr uintptr_t InlineIndexMask = 0xF;
|
|
static constexpr uintptr_t InlineCapacity =
|
|
sizeof(RawType) * CHAR_BIT / InlineIndexBits;
|
|
static constexpr uintptr_t InlineCapacityLog2 = log2(InlineCapacity);
|
|
|
|
// Indices can be stored in different ways, depending on how big they need
|
|
// to be. The index mode is stored in the bottom two bits of Value. The
|
|
// meaning of the rest of Value depends on the mode.
|
|
enum class IndexMode {
|
|
// Value is treated as an array of four-bit integers, storing the indices.
|
|
// The first element overlaps with the mode, and is never used.
|
|
Inline,
|
|
|
|
// The rest of Value holds a pointer to storage. The first byte of this
|
|
// storage holds the log2 of the storage capacity. The storage is treated
|
|
// as an array of 8, 16, or 32-bit integers. The first element overlaps
|
|
// with the capacity, and is never used.
|
|
Array8,
|
|
Array16,
|
|
Array32,
|
|
};
|
|
|
|
IndexStorage() : Value(0) {}
|
|
IndexStorage(RawType value) : Value(value) {}
|
|
IndexStorage(void *ptr, unsigned indexSize, uint8_t capacityLog2) {
|
|
assert(capacityLog2 > InlineCapacityLog2);
|
|
IndexMode mode;
|
|
switch (indexSize) {
|
|
case sizeof(uint8_t):
|
|
mode = IndexMode::Array8;
|
|
break;
|
|
case sizeof(uint16_t):
|
|
mode = IndexMode::Array16;
|
|
break;
|
|
case sizeof(uint32_t):
|
|
mode = IndexMode::Array32;
|
|
break;
|
|
default:
|
|
swift_unreachable("unknown index size");
|
|
}
|
|
Value = reinterpret_cast<uintptr_t>(ptr) | static_cast<uintptr_t>(mode);
|
|
*reinterpret_cast<uint8_t *>(ptr) = capacityLog2 | InlineCapacityDebugBits;
|
|
}
|
|
|
|
bool valueIsPointer() { return Value & 3; }
|
|
|
|
void *pointer() {
|
|
if (valueIsPointer())
|
|
return (void *)(Value & (RawType)~3);
|
|
return nullptr;
|
|
}
|
|
|
|
IndexMode indexMode() { return IndexMode(Value & 3); }
|
|
|
|
// Index size is variable based on capacity, either 8, 16, or 32 bits.
|
|
//
|
|
// This is somewhat conservative. We could have, for example, a capacity of
|
|
// 512 but a maximum index of only 200, which would still allow for 8-bit
|
|
// indices. However, taking advantage of this would require reallocating
|
|
// the index storage when the element count crossed a threshold, which is
|
|
// more complex, and the advantages are minimal. This keeps it simple.
|
|
|
|
// Get the size, in bytes, of the index needed for the given capacity.
|
|
static unsigned indexSize(uint8_t capacityLog2) {
|
|
if (capacityLog2 <= sizeof(uint8_t) * CHAR_BIT)
|
|
return sizeof(uint8_t);
|
|
if (capacityLog2 <= sizeof(uint16_t) * CHAR_BIT)
|
|
return sizeof(uint16_t);
|
|
return sizeof(uint32_t);
|
|
}
|
|
|
|
uint8_t getCapacityLog2() {
|
|
if (auto *ptr = pointer()) {
|
|
auto result = *reinterpret_cast<uint8_t *>(ptr);
|
|
assert((result & InlineCapacityDebugBits) == InlineCapacityDebugBits);
|
|
return result & ~InlineCapacityDebugBits;
|
|
}
|
|
return InlineCapacityLog2;
|
|
}
|
|
|
|
static IndexStorage allocate(size_t capacityLog2) {
|
|
assert(capacityLog2 > 0);
|
|
size_t capacity = 1UL << capacityLog2;
|
|
unsigned size = indexSize(capacityLog2);
|
|
auto *ptr = calloc(capacity, size);
|
|
if (!ptr)
|
|
swift::crash("Could not allocate memory.");
|
|
return IndexStorage(ptr, size, capacityLog2);
|
|
}
|
|
|
|
unsigned loadIndexAt(size_t i, std::memory_order order) {
|
|
assert(i > 0 && "index zero is off-limits, used to store capacity");
|
|
assert(i < (1 << getCapacityLog2()) &&
|
|
"index is off the end of the indices");
|
|
|
|
switch (indexMode()) {
|
|
case IndexMode::Inline:
|
|
return (Value >> (i * InlineIndexBits)) & InlineIndexMask;
|
|
case IndexMode::Array8:
|
|
return ((std::atomic<uint8_t> *)pointer())[i].load(order);
|
|
case IndexMode::Array16:
|
|
return ((std::atomic<uint16_t> *)pointer())[i].load(order);
|
|
case IndexMode::Array32:
|
|
return ((std::atomic<uint32_t> *)pointer())[i].load(order);
|
|
}
|
|
}
|
|
|
|
void storeIndexAt(std::atomic<RawType> *inlineStorage, unsigned value,
|
|
size_t i, std::memory_order order) {
|
|
assert(i > 0 && "index zero is off-limits, used to store capacity");
|
|
assert(i < (1 << getCapacityLog2()) &&
|
|
"index is off the end of the indices");
|
|
|
|
switch (indexMode()) {
|
|
case IndexMode::Inline: {
|
|
assert(value == (value & InlineIndexMask) && "value is too big to fit");
|
|
auto shift = i * InlineIndexBits;
|
|
assert((Value & (InlineIndexMask << shift)) == 0 &&
|
|
"can't overwrite an existing index");
|
|
assert(Value == inlineStorage->load(std::memory_order_relaxed) &&
|
|
"writing with a stale IndexStorage");
|
|
auto newStorage = Value | ((RawType)value << shift);
|
|
inlineStorage->store(newStorage, order);
|
|
break;
|
|
}
|
|
case IndexMode::Array8:
|
|
((std::atomic<uint8_t> *)pointer())[i].store(value, order);
|
|
break;
|
|
case IndexMode::Array16:
|
|
((std::atomic<uint16_t> *)pointer())[i].store(value, order);
|
|
break;
|
|
case IndexMode::Array32:
|
|
((std::atomic<uint32_t> *)pointer())[i].store(value, order);
|
|
break;
|
|
}
|
|
}
|
|
};
|
|
|
|
/// The struct used for element storage. The `Elem` member is considered to be
|
|
/// the first element of a variable-length array, whose size is determined by
|
|
/// the allocation.
|
|
struct ElementStorage {
|
|
uintptr_t Capacity : 32;
|
|
ElemTy Elem;
|
|
|
|
static ElementStorage *allocate(size_t capacity) {
|
|
auto headerSize = offsetof(ElementStorage, Elem);
|
|
auto size = goodSize(headerSize + capacity * sizeof(ElemTy));
|
|
|
|
auto *ptr = reinterpret_cast<ElementStorage *>(malloc(size));
|
|
if (!ptr)
|
|
swift::crash("Could not allocate memory.");
|
|
|
|
ptr->Capacity = (size - headerSize) / sizeof(ElemTy);
|
|
|
|
return ptr;
|
|
}
|
|
|
|
ElemTy *data() { return &Elem; }
|
|
};
|
|
|
|
/// The number of readers currently active, equal to the number of snapshot
|
|
/// objects currently alive.
|
|
std::atomic<uint32_t> ReaderCount{0};
|
|
|
|
/// The number of elements in the elements array.
|
|
std::atomic<uint32_t> ElementCount{0};
|
|
|
|
/// The array of elements.
|
|
std::atomic<ElementStorage *> Elements{nullptr};
|
|
|
|
/// The array of indices.
|
|
///
|
|
/// This has to be stored as a IndexStorage::RawType instead of a IndexStorage
|
|
/// because some of our targets don't support interesting structs as atomic
|
|
/// types. See also MetadataCache::TrackingInfo which uses the same technique.
|
|
std::atomic<typename IndexStorage::RawType> Indices{0};
|
|
|
|
/// The writer lock, which must be taken before any mutation of the table.
|
|
MutexTy WriterLock;
|
|
|
|
/// The list of pointers to be freed once no readers are active.
|
|
ConcurrentFreeListNode *FreeList{nullptr};
|
|
|
|
void incrementReaders() {
|
|
ReaderCount.fetch_add(1, std::memory_order_acquire);
|
|
}
|
|
|
|
void decrementReaders() {
|
|
ReaderCount.fetch_sub(1, std::memory_order_release);
|
|
}
|
|
|
|
/// Free all the arrays in the free lists if there are no active readers. If
|
|
/// there are active readers, do nothing.
|
|
void deallocateFreeListIfSafe() {
|
|
// The standard says that std::memory_order_seq_cst only applies to
|
|
// read-modify-write operations, so we need an explicit fence:
|
|
std::atomic_thread_fence(std::memory_order_seq_cst);
|
|
if (ReaderCount.load(std::memory_order_relaxed) == 0)
|
|
ConcurrentFreeListNode::freeAll(&FreeList, free);
|
|
}
|
|
|
|
/// Grow the elements array, adding the old array to the free list and
|
|
/// returning the new array with all existing elements copied into it.
|
|
ElementStorage *resize(ElementStorage *elements, size_t elementCount) {
|
|
// Grow capacity by 25%, making sure we grow by at least 1.
|
|
size_t newCapacity =
|
|
std::max(elementCount + (elementCount >> 2), elementCount + 1);
|
|
auto *newElements = ElementStorage::allocate(newCapacity);
|
|
|
|
if (elements) {
|
|
if (std::is_trivially_copyable<ElemTy>::value) {
|
|
memcpy(newElements->data(), elements->data(),
|
|
elementCount * sizeof(ElemTy));
|
|
} else {
|
|
std::uninitialized_copy_n(elements->data(), elementCount,
|
|
newElements->data());
|
|
}
|
|
ConcurrentFreeListNode::add(&FreeList, elements);
|
|
}
|
|
|
|
// Use seq_cst here to ensure that the subsequent load of ReaderCount is
|
|
// ordered after this store. If ReaderCount is loaded first, then a new
|
|
// reader could come in between that load and this store, and then we
|
|
// could end up freeing the old elements pointer while it's still in use.
|
|
Elements.store(newElements, std::memory_order_seq_cst);
|
|
return newElements;
|
|
}
|
|
|
|
/// Grow the indices array, adding the old array to the free list and
|
|
/// returning the new array with all existing indices copied into it. This
|
|
/// operation performs a rehash, so that the indices are in the correct
|
|
/// location in the new array.
|
|
IndexStorage resize(IndexStorage indices, uint8_t indicesCapacityLog2,
|
|
ElemTy *elements) {
|
|
// Double the size.
|
|
size_t newCapacityLog2 = indicesCapacityLog2 + 1;
|
|
size_t newMask = (1UL << newCapacityLog2) - 1;
|
|
|
|
IndexStorage newIndices = IndexStorage::allocate(newCapacityLog2);
|
|
|
|
size_t indicesCount = 1UL << indicesCapacityLog2;
|
|
for (size_t i = 1; i < indicesCount; i++) {
|
|
unsigned index = indices.loadIndexAt(i, std::memory_order_relaxed);
|
|
if (index == 0)
|
|
continue;
|
|
|
|
auto *element = &elements[index - 1];
|
|
auto hash = hash_value(*element);
|
|
|
|
size_t newI = hash & newMask;
|
|
// Index 0 is unusable (occupied by the capacity), so always skip it.
|
|
while (newI == 0 ||
|
|
newIndices.loadIndexAt(newI, std::memory_order_relaxed) != 0) {
|
|
newI = (newI + 1) & newMask;
|
|
}
|
|
newIndices.storeIndexAt(nullptr, index, newI, std::memory_order_relaxed);
|
|
}
|
|
|
|
// Use seq_cst here to ensure that the subsequent load of ReaderCount is
|
|
// ordered after this store. If ReaderCount is loaded first, then a new
|
|
// reader could come in between that load and this store, and then we
|
|
// could end up freeing the old indices pointer while it's still in use.
|
|
Indices.store(newIndices.Value, std::memory_order_seq_cst);
|
|
|
|
if (auto *ptr = indices.pointer())
|
|
ConcurrentFreeListNode::add(&FreeList, ptr);
|
|
|
|
return newIndices;
|
|
}
|
|
|
|
/// Search for the given key within the given indices and elements arrays. If
|
|
/// an entry already exists for that key, return a pointer to the element. If
|
|
/// no entry exists, return the location in the indices array where the index
|
|
/// of the new element would be stored.
|
|
template <class KeyTy>
|
|
static std::pair<ElemTy *, unsigned>
|
|
find(const KeyTy &key, IndexStorage indices, size_t elementCount,
|
|
ElemTy *elements) {
|
|
auto hash = hash_value(key);
|
|
auto indicesMask = (1UL << indices.getCapacityLog2()) - 1;
|
|
|
|
auto i = hash & indicesMask;
|
|
while (true) {
|
|
// Index 0 is used for the mask and is not actually an index.
|
|
if (i == 0)
|
|
i++;
|
|
|
|
auto index = indices.loadIndexAt(i, std::memory_order_acquire);
|
|
// Element indices are 1-based, 0 means no entry.
|
|
if (index == 0)
|
|
return {nullptr, i};
|
|
if (index - 1 < elementCount) {
|
|
auto *candidate = &elements[index - 1];
|
|
if (candidate->matchesKey(key))
|
|
return {candidate, 0};
|
|
}
|
|
|
|
i = (i + 1) & indicesMask;
|
|
}
|
|
}
|
|
|
|
// Common implementation for `getOrInsert` and `GetOrInsertManyScope`
|
|
template <class KeyTy, typename Call>
|
|
void getOrInsertExternallyLocked(KeyTy key, const Call &call) {
|
|
auto indices = IndexStorage{Indices.load(std::memory_order_relaxed)};
|
|
auto indicesCapacityLog2 = indices.getCapacityLog2();
|
|
auto elementCount = ElementCount.load(std::memory_order_relaxed);
|
|
auto *elements = Elements.load(std::memory_order_relaxed);
|
|
auto *elementsPtr = elements ? elements->data() : nullptr;
|
|
|
|
|
|
auto found = this->find(key, indices, elementCount, elementsPtr);
|
|
if (found.first) {
|
|
call(found.first, false);
|
|
return;
|
|
}
|
|
|
|
auto indicesCapacity = 1UL << indicesCapacityLog2;
|
|
|
|
// The number of slots in use is elementCount + 1, since the capacity also
|
|
// takes a slot.
|
|
auto emptyCount = indicesCapacity - (elementCount + 1);
|
|
auto proportion = indicesCapacity / emptyCount;
|
|
if (proportion >= ResizeProportion) {
|
|
indices = resize(indices, indicesCapacityLog2, elementsPtr);
|
|
found = find(key, indices, elementCount, elementsPtr);
|
|
assert(!found.first && "Shouldn't suddenly find the key after rehashing");
|
|
}
|
|
|
|
if (!elements || elementCount >= elements->Capacity) {
|
|
elements = resize(elements, elementCount);
|
|
}
|
|
auto *element = &elements->data()[elementCount];
|
|
|
|
// Order matters: fill out the element, then update the count,
|
|
// then update the index.
|
|
bool keep = call(element, true);
|
|
if (keep) {
|
|
assert(hash_value(key) == hash_value(*element) &&
|
|
"Element must have the same hash code as its key.");
|
|
ElementCount.store(elementCount + 1, std::memory_order_release);
|
|
indices.storeIndexAt(&Indices, elementCount + 1, found.second,
|
|
std::memory_order_release);
|
|
}
|
|
}
|
|
|
|
public:
|
|
// Implicitly trivial constructor/destructor.
|
|
ConcurrentReadableHashMap() = default;
|
|
~ConcurrentReadableHashMap() = default;
|
|
|
|
// This type cannot be safely copied or moved.
|
|
ConcurrentReadableHashMap(const ConcurrentReadableHashMap &) = delete;
|
|
ConcurrentReadableHashMap(ConcurrentReadableHashMap &&) = delete;
|
|
ConcurrentReadableHashMap &
|
|
operator=(const ConcurrentReadableHashMap &) = delete;
|
|
|
|
/// Returns whether there are outstanding readers. For testing purposes only.
|
|
bool hasActiveReaders() {
|
|
return ReaderCount.load(std::memory_order_relaxed) > 0;
|
|
}
|
|
|
|
/// Readers take a snapshot of the hash map, then work with the snapshot.
|
|
class Snapshot {
|
|
ConcurrentReadableHashMap *Map;
|
|
IndexStorage Indices;
|
|
ElemTy *Elements;
|
|
size_t ElementCount;
|
|
|
|
public:
|
|
Snapshot(ConcurrentReadableHashMap *map, IndexStorage indices,
|
|
ElemTy *elements, size_t elementCount)
|
|
: Map(map), Indices(indices), Elements(elements),
|
|
ElementCount(elementCount) {}
|
|
|
|
Snapshot(const Snapshot &other)
|
|
: Map(other.Map), Indices(other.Indices), Elements(other.Elements),
|
|
ElementCount(other.ElementCount) {
|
|
Map->incrementReaders();
|
|
}
|
|
|
|
~Snapshot() { Map->decrementReaders(); }
|
|
|
|
/// Search for an element matching the given key. Returns a pointer to the
|
|
/// found element, or nullptr if no matching element exists.
|
|
//
|
|
// This is marked as ref-qualified (the &) to make sure it can't be called
|
|
// on temporaries, since the temporary would be destroyed before the return
|
|
// value can be used, making it invalid.
|
|
template <class KeyTy> const ElemTy *find(const KeyTy &key) & {
|
|
if (!Indices.Value || !ElementCount || !Elements)
|
|
return nullptr;
|
|
return ConcurrentReadableHashMap::find(key, Indices, ElementCount,
|
|
Elements)
|
|
.first;
|
|
}
|
|
};
|
|
|
|
/// Take a snapshot of the current state of the hash map.
|
|
Snapshot snapshot() {
|
|
incrementReaders();
|
|
|
|
// Carefully loading the indices, element count, and elements pointer in
|
|
// order ensures a consistent view of the table with respect to concurrent
|
|
// inserts. However, this is not sufficient to avoid an inconsistent view
|
|
// with respect to concurrent clears. The danger scenario is:
|
|
//
|
|
// 1. Read indices and elementCount from a table with N entries.
|
|
// 2. Another thread clears the table.
|
|
// 3. Another thread inserts M entries, where M < N.
|
|
// 4. The reader thread reads elements.
|
|
// 5. The reader thread performs a find. The key's hash leads us to an index
|
|
// I, where > M.
|
|
// 6. The reader thread reads from element I, which is off the end of the
|
|
// elements array.
|
|
//
|
|
// To avoid this, read the elements pointer twice, at the beginning and end.
|
|
// If the values are not the same then there may have been a clear in the
|
|
// middle, so we retry. This will have false positives: a new element
|
|
// pointer can just mean a concurrent insert that triggered a resize of the
|
|
// elements array. This is harmless aside from a small performance hit, and
|
|
// should not happen often.
|
|
IndexStorage indices;
|
|
size_t elementCount;
|
|
ElementStorage *elements;
|
|
ElementStorage *elements2;
|
|
do {
|
|
elements = Elements.load(std::memory_order_acquire);
|
|
indices = Indices.load(std::memory_order_acquire);
|
|
elementCount = ElementCount.load(std::memory_order_acquire);
|
|
elements2 = Elements.load(std::memory_order_acquire);
|
|
} while (elements != elements2);
|
|
|
|
ElemTy *elementsPtr = elements ? elements->data() : nullptr;
|
|
return Snapshot(this, indices, elementsPtr, elementCount);
|
|
}
|
|
|
|
/// A wrapper that allows performing several `getOrInsert` operations under
|
|
/// the same lock.
|
|
class GetOrInsertManyScope {
|
|
GetOrInsertManyScope() = delete;
|
|
GetOrInsertManyScope(const GetOrInsertManyScope &) = delete;
|
|
GetOrInsertManyScope &operator=(const GetOrInsertManyScope &) = delete;
|
|
GetOrInsertManyScope(GetOrInsertManyScope &&) = delete;
|
|
GetOrInsertManyScope &operator=(GetOrInsertManyScope &&) = delete;
|
|
|
|
ConcurrentReadableHashMap ⤅
|
|
|
|
public:
|
|
GetOrInsertManyScope(ConcurrentReadableHashMap &map) : Map(map) {
|
|
Map.WriterLock.lock();
|
|
}
|
|
|
|
~GetOrInsertManyScope() {
|
|
Map.deallocateFreeListIfSafe();
|
|
Map.WriterLock.unlock();
|
|
}
|
|
|
|
/// Get an element by key, or insert a new element for that key if one is
|
|
/// not already present. Invoke `call` with the pointer to the element.
|
|
///
|
|
/// `call` is passed the following parameters:
|
|
/// - `element`: the pointer to the element corresponding to `key`
|
|
/// - `created`: true if the element is newly created, false if it already
|
|
/// exists
|
|
/// `call` returns a `bool`. When `created` is `true`, the return values
|
|
/// mean:
|
|
/// - `true` the new entry is to be kept
|
|
/// - `false` indicates that the new entry is discarded
|
|
/// If the new entry is kept, then the new element MUST be initialized, and
|
|
/// have a hash value that matches the hash value of `key`.
|
|
///
|
|
/// The return value is ignored when `created` is `false`.
|
|
template <class KeyTy, typename Call>
|
|
void getOrInsert(KeyTy key, const Call &call) {
|
|
Map.getOrInsertExternallyLocked(key, call);
|
|
}
|
|
};
|
|
|
|
/// Get an element by key, or insert a new element for that key if one is not
|
|
/// already present. Invoke `call` with the pointer to the element. BEWARE:
|
|
/// `call` is invoked with the internal writer lock held, keep work to a
|
|
/// minimum.
|
|
///
|
|
/// `call` is passed the following parameters:
|
|
/// - `element`: the pointer to the element corresponding to `key`
|
|
/// - `created`: true if the element is newly created, false if it already
|
|
/// exists
|
|
/// `call` returns a `bool`. When `created` is `true`, the return values mean:
|
|
/// - `true` the new entry is to be kept
|
|
/// - `false` indicates that the new entry is discarded
|
|
/// If the new entry is kept, then the new element MUST be initialized, and
|
|
/// have a hash value that matches the hash value of `key`.
|
|
///
|
|
/// The return value is ignored when `created` is `false`.
|
|
template <class KeyTy, typename Call>
|
|
void getOrInsert(KeyTy key, const Call &call) {
|
|
typename MutexTy::ScopedLock guard(WriterLock);
|
|
getOrInsertExternallyLocked(key, call);
|
|
deallocateFreeListIfSafe();
|
|
}
|
|
|
|
/// Clear the hash table, freeing (when safe) all memory currently used for
|
|
/// indices and elements.
|
|
///
|
|
/// - addFreedNodes: if nonempty, this function will be called with the head
|
|
/// of the free list. The function may add additional nodes to the free
|
|
/// list, which will be freed when it is safe to do so.
|
|
void clear(std::function<void(ConcurrentFreeListNode *&)> addFreedNodes
|
|
= nullptr) {
|
|
typename MutexTy::ScopedLock guard(WriterLock);
|
|
|
|
IndexStorage indices = Indices.load(std::memory_order_relaxed);
|
|
auto *elements = Elements.load(std::memory_order_relaxed);
|
|
|
|
// Order doesn't matter here, snapshots will gracefully handle any field
|
|
// being NULL/0 while the others are not.
|
|
Indices.store(0, std::memory_order_relaxed);
|
|
ElementCount.store(0, std::memory_order_relaxed);
|
|
Elements.store(nullptr, std::memory_order_relaxed);
|
|
|
|
if (auto *ptr = indices.pointer())
|
|
ConcurrentFreeListNode::add(&FreeList, ptr);
|
|
ConcurrentFreeListNode::add(&FreeList, elements);
|
|
|
|
if (addFreedNodes)
|
|
addFreedNodes(FreeList);
|
|
|
|
deallocateFreeListIfSafe();
|
|
}
|
|
};
|
|
|
|
/// A wrapper type for indirect hash map elements. Stores a pointer to the real
|
|
/// element and forwards key matching and hashing.
|
|
template <class ElemTy> struct HashMapElementWrapper {
|
|
ElemTy *Ptr;
|
|
|
|
template <class KeyTy> bool matchesKey(const KeyTy &key) {
|
|
return Ptr->matchesKey(key);
|
|
}
|
|
|
|
friend llvm::hash_code hash_value(const HashMapElementWrapper &wrapper) {
|
|
return hash_value(*wrapper.Ptr);
|
|
}
|
|
};
|
|
|
|
/// A ConcurrentReadableHashMap that provides stable addresses for the elements
|
|
/// by allocating them separately and storing pointers to them. The elements of
|
|
/// the hash table are instances of HashMapElementWrapper. A new getOrInsert
|
|
/// method is provided that directly returns the stable element pointer.
|
|
template <class ElemTy, class Allocator, class MutexTy = LazyMutex>
|
|
struct StableAddressConcurrentReadableHashMap
|
|
: public ConcurrentReadableHashMap<HashMapElementWrapper<ElemTy>, MutexTy> {
|
|
// Implicitly trivial destructor.
|
|
~StableAddressConcurrentReadableHashMap() = default;
|
|
|
|
std::atomic<ElemTy *> LastFound{nullptr};
|
|
|
|
/// Get or insert an element for the given key and arguments. Returns the
|
|
/// pointer to the existing or new element, and a bool indicating whether the
|
|
/// element was created. When false, the element already existed before the
|
|
/// call.
|
|
template <class KeyTy, class... ArgTys>
|
|
std::pair<ElemTy *, bool> getOrInsert(KeyTy key, ArgTys &&...args) {
|
|
// See if this is a repeat of the previous search.
|
|
if (auto lastFound = LastFound.load(std::memory_order_acquire))
|
|
if (lastFound->matchesKey(key))
|
|
return {lastFound, false};
|
|
|
|
// Optimize for the case where the value already exists.
|
|
{
|
|
// Tightly scope the snapshot so it's gone before we call getOrInsert
|
|
// below, otherwise that call will always see an outstanding snapshot and
|
|
// never be able to collect garbage.
|
|
auto snapshot = this->snapshot();
|
|
if (auto wrapper = snapshot.find(key)) {
|
|
LastFound.store(wrapper->Ptr, std::memory_order_relaxed);
|
|
return {wrapper->Ptr, false};
|
|
}
|
|
}
|
|
|
|
// No such element. Insert if needed. Note: another thread may have inserted
|
|
// it in the meantime, so we still have to handle both cases!
|
|
ElemTy *ptr = nullptr;
|
|
bool outerCreated = false;
|
|
ConcurrentReadableHashMap<HashMapElementWrapper<ElemTy>, MutexTy>::
|
|
getOrInsert(key, [&](HashMapElementWrapper<ElemTy> *wrapper,
|
|
bool created) {
|
|
if (created) {
|
|
// Created the indirect entry. Allocate the actual storage.
|
|
size_t allocSize =
|
|
sizeof(ElemTy) + ElemTy::getExtraAllocationSize(key, args...);
|
|
void *memory = Allocator().Allocate(allocSize, alignof(ElemTy));
|
|
new (memory) ElemTy(key, std::forward<ArgTys>(args)...);
|
|
wrapper->Ptr = reinterpret_cast<ElemTy *>(memory);
|
|
}
|
|
ptr = wrapper->Ptr;
|
|
outerCreated = created;
|
|
return true; // Keep the new entry.
|
|
});
|
|
LastFound.store(ptr, std::memory_order_relaxed);
|
|
return {ptr, outerCreated};
|
|
}
|
|
|
|
template <class KeyTy> ElemTy *find(const KeyTy &key) {
|
|
auto snapshot = this->snapshot();
|
|
auto result = snapshot.find(key);
|
|
if (!result)
|
|
return nullptr;
|
|
return result->Ptr;
|
|
}
|
|
|
|
private:
|
|
// Clearing would require deallocating elements, which we don't support.
|
|
void clear() = delete;
|
|
};
|
|
|
|
} // end namespace swift
|
|
|
|
#endif // SWIFT_RUNTIME_CONCURRENTUTILS_H
|