//===--- Concurrent.h - Concurrent Data Structures -------------*- C++ -*-===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See https://swift.org/LICENSE.txt for license information // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===----------------------------------------------------------------------===// #ifndef SWIFT_RUNTIME_CONCURRENTUTILS_H #define SWIFT_RUNTIME_CONCURRENTUTILS_H #include "Atomic.h" #include "Debug.h" #include "swift/Threading/Mutex.h" #include "llvm/ADT/Hashing.h" #include "llvm/Support/Allocator.h" #include #include #include #include #include #include #if defined(__FreeBSD__) || defined(__CYGWIN__) || defined(__HAIKU__) #include #endif #if defined(__APPLE__) && defined(__MACH__) #include #endif namespace swift { /// A simple linked list representing pointers that need to be freed. This is /// not a concurrent data structure, just a bit of support used in the types /// below. struct ConcurrentFreeListNode { ConcurrentFreeListNode *Next; void *Ptr; static void add(ConcurrentFreeListNode **head, void *ptr) { auto *newNode = reinterpret_cast( malloc(sizeof(ConcurrentFreeListNode))); newNode->Next = *head; newNode->Ptr = ptr; *head = newNode; } /// Free all nodes in the free list, resetting `head` to `NULL`. Calls /// `FreeFn` on the Ptr field of every node. template static void freeAll(ConcurrentFreeListNode **head, const FreeFn &freeFn) { auto *node = *head; while (node) { auto *next = node->Next; freeFn(node->Ptr); free(node); node = next; } *head = nullptr; } }; /// An append-only array that can be read without taking locks. Writes /// are still locked and serialized, but only with respect to other /// writes. template struct ConcurrentReadableArray { private: /// The struct used for the array's storage. The `Elem` member is /// considered to be the first element of a variable-length array, /// whose size is determined by the allocation. The `Capacity` member /// from `ConcurrentReadableArray` indicates how large it can be. struct Storage { std::atomic Count; typename std::aligned_storage::type Elem; static Storage *allocate(size_t capacity) { auto size = sizeof(Storage) + (capacity - 1) * sizeof(Storage().Elem); auto *ptr = reinterpret_cast(malloc(size)); if (!ptr) swift::crash("Could not allocate memory."); ptr->Count.store(0, std::memory_order_relaxed); return ptr; } void deallocate() { for (size_t i = 0; i < Count; ++i) { data()[i].~ElemTy(); } free(this); } ElemTy *data() { return reinterpret_cast(&Elem); } }; size_t Capacity; std::atomic ReaderCount; std::atomic Elements; Mutex WriterLock; ConcurrentFreeListNode *FreeList{nullptr}; void incrementReaders() { ReaderCount.fetch_add(1, std::memory_order_acquire); } void decrementReaders() { ReaderCount.fetch_sub(1, std::memory_order_release); } void deallocateFreeList() { ConcurrentFreeListNode::freeAll(&FreeList, [](void *ptr) { reinterpret_cast(ptr)->deallocate(); }); } public: struct Snapshot { ConcurrentReadableArray *Array; const ElemTy *Start; size_t Count; Snapshot(ConcurrentReadableArray *array, const ElemTy *start, size_t count) : Array(array), Start(start), Count(count) {} Snapshot(const Snapshot &other) : Array(other.Array), Start(other.Start), Count(other.Count) { Array->incrementReaders(); } ~Snapshot() { Array->decrementReaders(); } // These are marked as ref-qualified (the &) to make sure they can't be // called on temporaries, since the temporary would be destroyed before the // return value can be used, making it invalid. const ElemTy *begin() const& { return Start; } const ElemTy *end() const& { return Start + Count; } const ElemTy& operator [](size_t index) const& { assert(index < count() && "out-of-bounds access to snapshot element"); return Start[index]; } size_t count() const { return Count; } }; // This type cannot be safely copied or moved. ConcurrentReadableArray(const ConcurrentReadableArray &) = delete; ConcurrentReadableArray(ConcurrentReadableArray &&) = delete; ConcurrentReadableArray &operator=(const ConcurrentReadableArray &) = delete; ConcurrentReadableArray() : Capacity(0), ReaderCount(0), Elements(nullptr) {} ~ConcurrentReadableArray() { assert(ReaderCount.load(std::memory_order_acquire) == 0 && "deallocating ConcurrentReadableArray with outstanding snapshots"); deallocateFreeList(); } void push_back(const ElemTy &elem) { Mutex::ScopedLock guard(WriterLock); auto *storage = Elements.load(std::memory_order_relaxed); auto count = storage ? storage->Count.load(std::memory_order_relaxed) : 0; if (count >= Capacity) { auto newCapacity = std::max((size_t)16, count * 2); auto *newStorage = Storage::allocate(newCapacity); if (storage) { std::uninitialized_copy_n(storage->data(), count, newStorage->data()); newStorage->Count.store(count, std::memory_order_release); ConcurrentFreeListNode::add(&FreeList, storage); } storage = newStorage; Capacity = newCapacity; Elements.store(storage, std::memory_order_release); } new(&storage->data()[count]) ElemTy(elem); storage->Count.store(count + 1, std::memory_order_release); // The standard says that std::memory_order_seq_cst only applies to // read-modify-write operations, so we need an explicit fence: std::atomic_thread_fence(std::memory_order_seq_cst); if (ReaderCount.load(std::memory_order_relaxed) == 0) deallocateFreeList(); } Snapshot snapshot() { incrementReaders(); auto *storage = Elements.load(SWIFT_MEMORY_ORDER_CONSUME); if (storage == nullptr) { return Snapshot(this, nullptr, 0); } auto count = storage->Count.load(std::memory_order_acquire); const auto *ptr = storage->data(); return Snapshot(this, ptr, count); } }; using llvm::hash_value; /// A hash table that can be queried without taking any locks. Writes are still /// locked and serialized, but only with respect to other locks. Writers can add /// elements and clear the table, but they cannot remove individual elements. /// Readers work by taking a snapshot of the table and then querying that /// snapshot. /// /// The basic structure of the table consists of two arrays. Elements are stored /// in a contiguous array, with new elements appended to the end. The second /// array is the actual hash table, and it contains indices into the elements /// array. This scheme cuts down on wasted space when the elements are larger /// than a few bytes: instead of wasting `(1 - loadFactor) * sizeof(element)` /// bytes on unused space in the hash table, we only waste `(1 - loadFactor) * /// sizeof(index)`. This scheme also avoids readers seeing partially constructed /// elements. /// /// Reader/writer synchronization for new elements is handled by keeping an /// element count which is only incremented when the element has been fully /// constructed. A reader which sees an index beyond its view of the current /// count will ignore it and treat that as if there was no entry. /// /// Reader/writer synchronization for resizing the arrays is handled by tracking /// the current number of active readers. When resizing, the new array is /// allocated, the data copied, and then the old array is placed in a free list. /// The free list is only deallocated if there are no readers, otherwise freeing /// is deferred. /// /// Reader/writer synchronization for clearing the table is a combination of the /// above. By keeping the old arrays around until all readers are finished, we /// ensure that readers which started before the clear see valid (pre-clear) /// data. Readers which see any array as empty will produce no results, thus /// providing valid post-clear data. /// /// This is intended to be used for tables that exist for the life of the /// process. It has no destructor, to avoid generating useless global destructor /// calls. The memory it allocates can be freed by calling clear() with no /// outstanding readers, but this won't destroy the static mutex it uses. template struct ConcurrentReadableHashMap { // We don't call destructors. Make sure the elements will put up with this. static_assert(std::is_trivially_destructible::value, "Elements must not have destructors (they won't be called)."); private: /// The reciprocal of the load factor at which we expand the table. A value of /// 4 means that we resize at 1/4 = 75% load factor. static const size_t ResizeProportion = 4; /// Get the "good size" for a given allocation size. When available, this /// rounds up to the next allocation quantum by calling `malloc_good_size`. /// Otherwise, just return the passed-in size, which is always valid even if /// not necessarily optimal. static size_t goodSize(size_t size) { #if defined(__APPLE__) && defined(__MACH__) && SWIFT_STDLIB_HAS_DARWIN_LIBMALLOC return malloc_good_size(size); #else return size; #endif } /// A private class representing the storage of the indices. In order to /// ensure that readers can get a consistent view of the indices with a single /// atomic read, we store the size of the indices array inline, as the first /// element in the array. /// /// We want the number of indices to be a power of two so that we can use a /// bitwise AND to convert a hash code to an index. We want the entire array /// to be a power of two in size to be friendly to the allocator, but the size /// is stored inline. We work around this contradiction by considering the /// first index to always be occupied with a value that never matches any key. struct IndexStorage { using RawType = uintptr_t; RawType Value; static constexpr uintptr_t log2(uintptr_t x) { return x <= 1 ? 0 : log2(x >> 1) + 1; } // A crude way to detect trivial use-after-free bugs given that a lot of // data structure have a strong bias toward bits that are zero. #ifndef NDEBUG static constexpr uint8_t InlineCapacityDebugBits = 0xC0; #else static constexpr uint8_t InlineCapacityDebugBits = 0; #endif static constexpr uintptr_t InlineIndexBits = 4; static constexpr uintptr_t InlineIndexMask = 0xF; static constexpr uintptr_t InlineCapacity = sizeof(RawType) * CHAR_BIT / InlineIndexBits; static constexpr uintptr_t InlineCapacityLog2 = log2(InlineCapacity); // Indices can be stored in different ways, depending on how big they need // to be. The index mode is stored in the bottom two bits of Value. The // meaning of the rest of Value depends on the mode. enum class IndexMode { // Value is treated as an array of four-bit integers, storing the indices. // The first element overlaps with the mode, and is never used. Inline, // The rest of Value holds a pointer to storage. The first byte of this // storage holds the log2 of the storage capacity. The storage is treated // as an array of 8, 16, or 32-bit integers. The first element overlaps // with the capacity, and is never used. Array8, Array16, Array32, }; IndexStorage() : Value(0) {} IndexStorage(RawType value) : Value(value) {} IndexStorage(void *ptr, unsigned indexSize, uint8_t capacityLog2) { assert(capacityLog2 > InlineCapacityLog2); IndexMode mode; switch (indexSize) { case sizeof(uint8_t): mode = IndexMode::Array8; break; case sizeof(uint16_t): mode = IndexMode::Array16; break; case sizeof(uint32_t): mode = IndexMode::Array32; break; default: swift_unreachable("unknown index size"); } Value = reinterpret_cast(ptr) | static_cast(mode); *reinterpret_cast(ptr) = capacityLog2 | InlineCapacityDebugBits; } bool valueIsPointer() { return Value & 3; } void *pointer() { if (valueIsPointer()) return (void *)(Value & (RawType)~3); return nullptr; } IndexMode indexMode() { return IndexMode(Value & 3); } // Index size is variable based on capacity, either 8, 16, or 32 bits. // // This is somewhat conservative. We could have, for example, a capacity of // 512 but a maximum index of only 200, which would still allow for 8-bit // indices. However, taking advantage of this would require reallocating // the index storage when the element count crossed a threshold, which is // more complex, and the advantages are minimal. This keeps it simple. // Get the size, in bytes, of the index needed for the given capacity. static unsigned indexSize(uint8_t capacityLog2) { if (capacityLog2 <= sizeof(uint8_t) * CHAR_BIT) return sizeof(uint8_t); if (capacityLog2 <= sizeof(uint16_t) * CHAR_BIT) return sizeof(uint16_t); return sizeof(uint32_t); } uint8_t getCapacityLog2() { if (auto *ptr = pointer()) { auto result = *reinterpret_cast(ptr); assert((result & InlineCapacityDebugBits) == InlineCapacityDebugBits); return result & ~InlineCapacityDebugBits; } return InlineCapacityLog2; } static IndexStorage allocate(size_t capacityLog2) { assert(capacityLog2 > 0); size_t capacity = 1UL << capacityLog2; unsigned size = indexSize(capacityLog2); auto *ptr = calloc(capacity, size); if (!ptr) swift::crash("Could not allocate memory."); return IndexStorage(ptr, size, capacityLog2); } unsigned loadIndexAt(size_t i, std::memory_order order) { assert(i > 0 && "index zero is off-limits, used to store capacity"); assert(i < (1 << getCapacityLog2()) && "index is off the end of the indices"); switch (indexMode()) { case IndexMode::Inline: return (Value >> (i * InlineIndexBits)) & InlineIndexMask; case IndexMode::Array8: return ((std::atomic *)pointer())[i].load(order); case IndexMode::Array16: return ((std::atomic *)pointer())[i].load(order); case IndexMode::Array32: return ((std::atomic *)pointer())[i].load(order); } } void storeIndexAt(std::atomic *inlineStorage, unsigned value, size_t i, std::memory_order order) { assert(i > 0 && "index zero is off-limits, used to store capacity"); assert(i < (1 << getCapacityLog2()) && "index is off the end of the indices"); switch (indexMode()) { case IndexMode::Inline: { assert(value == (value & InlineIndexMask) && "value is too big to fit"); auto shift = i * InlineIndexBits; assert((Value & (InlineIndexMask << shift)) == 0 && "can't overwrite an existing index"); assert(Value == inlineStorage->load(std::memory_order_relaxed) && "writing with a stale IndexStorage"); auto newStorage = Value | ((RawType)value << shift); inlineStorage->store(newStorage, order); break; } case IndexMode::Array8: ((std::atomic *)pointer())[i].store(value, order); break; case IndexMode::Array16: ((std::atomic *)pointer())[i].store(value, order); break; case IndexMode::Array32: ((std::atomic *)pointer())[i].store(value, order); break; } } }; /// The struct used for element storage. The `Elem` member is considered to be /// the first element of a variable-length array, whose size is determined by /// the allocation. struct ElementStorage { uintptr_t Capacity : 32; ElemTy Elem; static ElementStorage *allocate(size_t capacity) { auto headerSize = offsetof(ElementStorage, Elem); auto size = goodSize(headerSize + capacity * sizeof(ElemTy)); auto *ptr = reinterpret_cast(malloc(size)); if (!ptr) swift::crash("Could not allocate memory."); ptr->Capacity = (size - headerSize) / sizeof(ElemTy); return ptr; } ElemTy *data() { return &Elem; } }; /// The number of readers currently active, equal to the number of snapshot /// objects currently alive. std::atomic ReaderCount{0}; /// The number of elements in the elements array. std::atomic ElementCount{0}; /// The array of elements. std::atomic Elements{nullptr}; /// The array of indices. /// /// This has to be stored as a IndexStorage::RawType instead of a IndexStorage /// because some of our targets don't support interesting structs as atomic /// types. See also MetadataCache::TrackingInfo which uses the same technique. std::atomic Indices{0}; /// The writer lock, which must be taken before any mutation of the table. MutexTy WriterLock; /// The list of pointers to be freed once no readers are active. ConcurrentFreeListNode *FreeList{nullptr}; void incrementReaders() { ReaderCount.fetch_add(1, std::memory_order_acquire); } void decrementReaders() { ReaderCount.fetch_sub(1, std::memory_order_release); } /// Free all the arrays in the free lists if there are no active readers. If /// there are active readers, do nothing. void deallocateFreeListIfSafe() { // The standard says that std::memory_order_seq_cst only applies to // read-modify-write operations, so we need an explicit fence: std::atomic_thread_fence(std::memory_order_seq_cst); if (ReaderCount.load(std::memory_order_relaxed) == 0) ConcurrentFreeListNode::freeAll(&FreeList, free); } /// Grow the elements array, adding the old array to the free list and /// returning the new array with all existing elements copied into it. ElementStorage *resize(ElementStorage *elements, size_t elementCount) { // Grow capacity by 25%, making sure we grow by at least 1. size_t newCapacity = std::max(elementCount + (elementCount >> 2), elementCount + 1); auto *newElements = ElementStorage::allocate(newCapacity); if (elements) { if (std::is_trivially_copyable::value) { memcpy(newElements->data(), elements->data(), elementCount * sizeof(ElemTy)); } else { std::uninitialized_copy_n(elements->data(), elementCount, newElements->data()); } ConcurrentFreeListNode::add(&FreeList, elements); } // Use seq_cst here to ensure that the subsequent load of ReaderCount is // ordered after this store. If ReaderCount is loaded first, then a new // reader could come in between that load and this store, and then we // could end up freeing the old elements pointer while it's still in use. Elements.store(newElements, std::memory_order_seq_cst); return newElements; } /// Grow the indices array, adding the old array to the free list and /// returning the new array with all existing indices copied into it. This /// operation performs a rehash, so that the indices are in the correct /// location in the new array. IndexStorage resize(IndexStorage indices, uint8_t indicesCapacityLog2, ElemTy *elements) { // Double the size. size_t newCapacityLog2 = indicesCapacityLog2 + 1; size_t newMask = (1UL << newCapacityLog2) - 1; IndexStorage newIndices = IndexStorage::allocate(newCapacityLog2); size_t indicesCount = 1UL << indicesCapacityLog2; for (size_t i = 1; i < indicesCount; i++) { unsigned index = indices.loadIndexAt(i, std::memory_order_relaxed); if (index == 0) continue; auto *element = &elements[index - 1]; auto hash = hash_value(*element); size_t newI = hash & newMask; // Index 0 is unusable (occupied by the capacity), so always skip it. while (newI == 0 || newIndices.loadIndexAt(newI, std::memory_order_relaxed) != 0) { newI = (newI + 1) & newMask; } newIndices.storeIndexAt(nullptr, index, newI, std::memory_order_relaxed); } // Use seq_cst here to ensure that the subsequent load of ReaderCount is // ordered after this store. If ReaderCount is loaded first, then a new // reader could come in between that load and this store, and then we // could end up freeing the old indices pointer while it's still in use. Indices.store(newIndices.Value, std::memory_order_seq_cst); if (auto *ptr = indices.pointer()) ConcurrentFreeListNode::add(&FreeList, ptr); return newIndices; } /// Search for the given key within the given indices and elements arrays. If /// an entry already exists for that key, return a pointer to the element. If /// no entry exists, return the location in the indices array where the index /// of the new element would be stored. template static std::pair find(const KeyTy &key, IndexStorage indices, size_t elementCount, ElemTy *elements) { auto hash = hash_value(key); auto indicesMask = (1UL << indices.getCapacityLog2()) - 1; auto i = hash & indicesMask; while (true) { // Index 0 is used for the mask and is not actually an index. if (i == 0) i++; auto index = indices.loadIndexAt(i, std::memory_order_acquire); // Element indices are 1-based, 0 means no entry. if (index == 0) return {nullptr, i}; if (index - 1 < elementCount) { auto *candidate = &elements[index - 1]; if (candidate->matchesKey(key)) return {candidate, 0}; } i = (i + 1) & indicesMask; } } public: // Implicitly trivial constructor/destructor. ConcurrentReadableHashMap() = default; ~ConcurrentReadableHashMap() = default; // This type cannot be safely copied or moved. ConcurrentReadableHashMap(const ConcurrentReadableHashMap &) = delete; ConcurrentReadableHashMap(ConcurrentReadableHashMap &&) = delete; ConcurrentReadableHashMap & operator=(const ConcurrentReadableHashMap &) = delete; /// Returns whether there are outstanding readers. For testing purposes only. bool hasActiveReaders() { return ReaderCount.load(std::memory_order_relaxed) > 0; } /// Readers take a snapshot of the hash map, then work with the snapshot. class Snapshot { ConcurrentReadableHashMap *Map; IndexStorage Indices; ElemTy *Elements; size_t ElementCount; public: Snapshot(ConcurrentReadableHashMap *map, IndexStorage indices, ElemTy *elements, size_t elementCount) : Map(map), Indices(indices), Elements(elements), ElementCount(elementCount) {} Snapshot(const Snapshot &other) : Map(other.Map), Indices(other.Indices), Elements(other.Elements), ElementCount(other.ElementCount) { Map->incrementReaders(); } ~Snapshot() { Map->decrementReaders(); } /// Search for an element matching the given key. Returns a pointer to the /// found element, or nullptr if no matching element exists. // // This is marked as ref-qualified (the &) to make sure it can't be called // on temporaries, since the temporary would be destroyed before the return // value can be used, making it invalid. template const ElemTy *find(const KeyTy &key) & { if (!Indices.Value || !ElementCount || !Elements) return nullptr; return ConcurrentReadableHashMap::find(key, Indices, ElementCount, Elements) .first; } }; /// Take a snapshot of the current state of the hash map. Snapshot snapshot() { incrementReaders(); // Carefully loading the indices, element count, and elements pointer in // order ensures a consistent view of the table with respect to concurrent // inserts. However, this is not sufficient to avoid an inconsistent view // with respect to concurrent clears. The danger scenario is: // // 1. Read indices and elementCount from a table with N entries. // 2. Another thread clears the table. // 3. Another thread inserts M entries, where M < N. // 4. The reader thread reads elements. // 5. The reader thread performs a find. The key's hash leads us to an index // I, where > M. // 6. The reader thread reads from element I, which is off the end of the // elements array. // // To avoid this, read the elements pointer twice, at the beginning and end. // If the values are not the same then there may have been a clear in the // middle, so we retry. This will have false positives: a new element // pointer can just mean a concurrent insert that triggered a resize of the // elements array. This is harmless aside from a small performance hit, and // should not happen often. IndexStorage indices; size_t elementCount; ElementStorage *elements; ElementStorage *elements2; do { elements = Elements.load(std::memory_order_acquire); indices = Indices.load(std::memory_order_acquire); elementCount = ElementCount.load(std::memory_order_acquire); elements2 = Elements.load(std::memory_order_acquire); } while (elements != elements2); ElemTy *elementsPtr = elements ? elements->data() : nullptr; return Snapshot(this, indices, elementsPtr, elementCount); } /// Get an element by key, or insert a new element for that key if one is not /// already present. Invoke `call` with the pointer to the element. BEWARE: /// `call` is invoked with the internal writer lock held, keep work to a /// minimum. /// /// `call` is passed the following parameters: /// - `element`: the pointer to the element corresponding to `key` /// - `created`: true if the element is newly created, false if it already /// exists /// `call` returns a `bool`. When `created` is `true`, the return values mean: /// - `true` the new entry is to be kept /// - `false` indicates that the new entry is discarded /// If the new entry is kept, then the new element MUST be initialized, and /// have a hash value that matches the hash value of `key`. /// /// The return value is ignored when `created` is `false`. template void getOrInsert(KeyTy key, const Call &call) { typename MutexTy::ScopedLock guard(WriterLock); auto indices = IndexStorage{Indices.load(std::memory_order_relaxed)}; auto indicesCapacityLog2 = indices.getCapacityLog2(); auto elementCount = ElementCount.load(std::memory_order_relaxed); auto *elements = Elements.load(std::memory_order_relaxed); auto *elementsPtr = elements ? elements->data() : nullptr; auto found = this->find(key, indices, elementCount, elementsPtr); if (found.first) { call(found.first, false); deallocateFreeListIfSafe(); return; } auto indicesCapacity = 1UL << indicesCapacityLog2; // The number of slots in use is elementCount + 1, since the capacity also // takes a slot. auto emptyCount = indicesCapacity - (elementCount + 1); auto proportion = indicesCapacity / emptyCount; if (proportion >= ResizeProportion) { indices = resize(indices, indicesCapacityLog2, elementsPtr); found = find(key, indices, elementCount, elementsPtr); assert(!found.first && "Shouldn't suddenly find the key after rehashing"); } if (!elements || elementCount >= elements->Capacity) { elements = resize(elements, elementCount); } auto *element = &elements->data()[elementCount]; // Order matters: fill out the element, then update the count, // then update the index. bool keep = call(element, true); if (keep) { assert(hash_value(key) == hash_value(*element) && "Element must have the same hash code as its key."); ElementCount.store(elementCount + 1, std::memory_order_release); indices.storeIndexAt(&Indices, elementCount + 1, found.second, std::memory_order_release); } deallocateFreeListIfSafe(); } /// Clear the hash table, freeing (when safe) all memory currently used for /// indices and elements. /// /// - addFreedNodes: if nonempty, this function will be called with the head /// of the free list. The function may add additional nodes to the free /// list, which will be freed when it is safe to do so. void clear(std::function addFreedNodes = nullptr) { typename MutexTy::ScopedLock guard(WriterLock); IndexStorage indices = Indices.load(std::memory_order_relaxed); auto *elements = Elements.load(std::memory_order_relaxed); // Order doesn't matter here, snapshots will gracefully handle any field // being NULL/0 while the others are not. Indices.store(0, std::memory_order_relaxed); ElementCount.store(0, std::memory_order_relaxed); Elements.store(nullptr, std::memory_order_relaxed); if (auto *ptr = indices.pointer()) ConcurrentFreeListNode::add(&FreeList, ptr); ConcurrentFreeListNode::add(&FreeList, elements); if (addFreedNodes) addFreedNodes(FreeList); deallocateFreeListIfSafe(); } }; /// A wrapper type for indirect hash map elements. Stores a pointer to the real /// element and forwards key matching and hashing. template struct HashMapElementWrapper { ElemTy *Ptr; template bool matchesKey(const KeyTy &key) { return Ptr->matchesKey(key); } friend llvm::hash_code hash_value(const HashMapElementWrapper &wrapper) { return hash_value(*wrapper.Ptr); } }; /// A ConcurrentReadableHashMap that provides stable addresses for the elements /// by allocating them separately and storing pointers to them. The elements of /// the hash table are instances of HashMapElementWrapper. A new getOrInsert /// method is provided that directly returns the stable element pointer. template struct StableAddressConcurrentReadableHashMap : public ConcurrentReadableHashMap, MutexTy> { // Implicitly trivial destructor. ~StableAddressConcurrentReadableHashMap() = default; std::atomic LastFound{nullptr}; /// Get or insert an element for the given key and arguments. Returns the /// pointer to the existing or new element, and a bool indicating whether the /// element was created. When false, the element already existed before the /// call. template std::pair getOrInsert(KeyTy key, ArgTys &&...args) { // See if this is a repeat of the previous search. if (auto lastFound = LastFound.load(std::memory_order_acquire)) if (lastFound->matchesKey(key)) return {lastFound, false}; // Optimize for the case where the value already exists. { // Tightly scope the snapshot so it's gone before we call getOrInsert // below, otherwise that call will always see an outstanding snapshot and // never be able to collect garbage. auto snapshot = this->snapshot(); if (auto wrapper = snapshot.find(key)) { LastFound.store(wrapper->Ptr, std::memory_order_relaxed); return {wrapper->Ptr, false}; } } // No such element. Insert if needed. Note: another thread may have inserted // it in the meantime, so we still have to handle both cases! ElemTy *ptr = nullptr; bool outerCreated = false; ConcurrentReadableHashMap, MutexTy>:: getOrInsert(key, [&](HashMapElementWrapper *wrapper, bool created) { if (created) { // Created the indirect entry. Allocate the actual storage. size_t allocSize = sizeof(ElemTy) + ElemTy::getExtraAllocationSize(key, args...); void *memory = Allocator().Allocate(allocSize, alignof(ElemTy)); new (memory) ElemTy(key, std::forward(args)...); wrapper->Ptr = reinterpret_cast(memory); } ptr = wrapper->Ptr; outerCreated = created; return true; // Keep the new entry. }); LastFound.store(ptr, std::memory_order_relaxed); return {ptr, outerCreated}; } template ElemTy *find(const KeyTy &key) { auto snapshot = this->snapshot(); auto result = snapshot.find(key); if (!result) return nullptr; return result->Ptr; } private: // Clearing would require deallocating elements, which we don't support. void clear() = delete; }; } // end namespace swift #endif // SWIFT_RUNTIME_CONCURRENTUTILS_H