//===--- Concurrent.h - Concurrent Data Structures  -------------*- C++ -*-===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
#ifndef SWIFT_RUNTIME_CONCURRENTUTILS_H
#define SWIFT_RUNTIME_CONCURRENTUTILS_H
#include <iterator>
#include <algorithm>
#include <atomic>
#include <functional>
#include <stdint.h>
#include <vector>
#include "llvm/ADT/Hashing.h"
#include "llvm/Support/Allocator.h"
#include "Atomic.h"
#include "Debug.h"
#include "Mutex.h"

#if defined(__FreeBSD__) || defined(__CYGWIN__) || defined(__HAIKU__)
#include <stdio.h>
#endif

#if defined(__APPLE__) && defined(__MACH__)
#include <malloc/malloc.h>
#endif

namespace swift {

/// This is a node in a concurrent linked list.
template <class ElemTy> struct ConcurrentListNode {
  ConcurrentListNode(ElemTy Elem) : Payload(Elem), Next(nullptr) {}
  ConcurrentListNode(const ConcurrentListNode &) = delete;
  ConcurrentListNode &operator=(const ConcurrentListNode &) = delete;

  /// The element.
  ElemTy Payload;
  /// Points to the next link in the chain.
  ConcurrentListNode<ElemTy> *Next;
};

/// This is a concurrent linked list. It supports insertion at the beginning
/// of the list and traversal using iterators.
/// This is a very simple implementation of a concurrent linked list
/// using atomic operations. The 'push_front' method allocates a new link
/// and attempts to compare and swap the old head pointer with pointer to
/// the new link. This operation may fail many times if there are other
/// contending threads, but eventually the head pointer is set to the new
/// link that already points to the old head value. Notice that the more
/// difficult feature of removing links is not supported.
/// See 'push_front' for more details.
template <class ElemTy> struct ConcurrentList {
  ConcurrentList() : First(nullptr) {}
  ~ConcurrentList() {
    clear();
  }

  /// Remove all of the links in the chain. This method leaves
  /// the list at a usable state and new links can be added.
  /// Notice that this operation is non-concurrent because
  /// we have no way of ensuring that no one is currently
  /// traversing the list.
  void clear() {
    // Iterate over the list and delete all the nodes.
    auto Ptr = First.load(std::memory_order_acquire);
    First.store(nullptr, std:: memory_order_release);

    while (Ptr) {
      auto N = Ptr->Next;
      delete Ptr;
      Ptr = N;
    }
  }

  ConcurrentList(const ConcurrentList &) = delete;
  ConcurrentList &operator=(const ConcurrentList &) = delete;

  /// A list iterator.
  struct ConcurrentListIterator :
      public std::iterator<std::forward_iterator_tag, ElemTy> {

    /// Points to the current link.
    ConcurrentListNode<ElemTy> *Ptr;
    /// C'tor.
    ConcurrentListIterator(ConcurrentListNode<ElemTy> *P) : Ptr(P) {}
    /// Move to the next element.
    ConcurrentListIterator &operator++() {
      Ptr = Ptr->Next;
      return *this;
    }
    /// Access the element.
    ElemTy &operator*() { return Ptr->Payload; }
    /// Same?
    bool operator==(const ConcurrentListIterator &o) const {
      return o.Ptr == Ptr;
    }
    /// Not the same?
    bool operator!=(const ConcurrentListIterator &o) const {
      return o.Ptr != Ptr;
    }
  };

  /// Iterator entry point.
  typedef ConcurrentListIterator iterator;
  /// Marks the beginning of the list.
  iterator begin() const {
    return ConcurrentListIterator(First.load(std::memory_order_acquire));
  }
  /// Marks the end of the list.
  iterator end() const { return ConcurrentListIterator(nullptr); }

  /// Add a new item to the list.
  void push_front(ElemTy Elem) {
    /// Allocate a new node.
    ConcurrentListNode<ElemTy> *N = new ConcurrentListNode<ElemTy>(Elem);
    // Point to the first element in the list.
    N->Next = First.load(std::memory_order_acquire);
    auto OldFirst = N->Next;
    // Try to replace the current First with the new node.
    while (!std::atomic_compare_exchange_weak_explicit(&First, &OldFirst, N,
                                               std::memory_order_release,
                                               std::memory_order_relaxed)) {
      // If we fail, update the new node to point to the new head and try to
      // insert before the new
      // first element.
      N->Next = OldFirst;
    }
  }

  /// Points to the first link in the list.
  std::atomic<ConcurrentListNode<ElemTy> *> First;
};

/// A utility function for ordering two integers, which is useful
/// for implementing compareWithKey.
template <class T>
static inline int compareIntegers(T left, T right) {
  return (left == right ? 0 : left < right ? -1 : 1);
}

/// A utility function for ordering two pointers, which is useful
/// for implementing compareWithKey.
template <class T>
static inline int comparePointers(const T *left, const T *right) {
  return (left == right ? 0 : std::less<const T *>()(left, right) ? -1 : 1);
}

template <class EntryTy, bool ProvideDestructor, class Allocator>
class ConcurrentMapBase;

/// The partial specialization of ConcurrentMapBase whose destructor is
/// trivial.  The other implementation inherits from this, so this is a
/// base for all ConcurrentMaps.
template <class EntryTy, class Allocator>
class ConcurrentMapBase<EntryTy, false, Allocator> : protected Allocator {
protected:
  struct Node {
    std::atomic<Node*> Left;
    std::atomic<Node*> Right;
    EntryTy Payload;

    template <class... Args>
    Node(Args &&... args)
      : Left(nullptr), Right(nullptr), Payload(std::forward<Args>(args)...) {}

    Node(const Node &) = delete;
    Node &operator=(const Node &) = delete;

  #ifndef NDEBUG
    void dump() const {
      auto L = Left.load(std::memory_order_acquire);
      auto R = Right.load(std::memory_order_acquire);
      printf("\"%p\" [ label = \" {<f0> %08lx | {<f1> | <f2>}}\" "
             "style=\"rounded\" shape=\"record\"];\n",
             this, (long) Payload.getKeyValueForDump());

      if (L) {
        L->dump();
        printf("\"%p\":f1 -> \"%p\":f0;\n", this, L);
      }
      if (R) {
        R->dump();
        printf("\"%p\":f2 -> \"%p\":f0;\n", this, R);
      }
    }
  #endif
  };

  std::atomic<Node*> Root;

  constexpr ConcurrentMapBase() : Root(nullptr) {}

  // Implicitly trivial destructor.
  ~ConcurrentMapBase() = default;

  void destroyNode(Node *node) {
    assert(node && "destroying null node");
    auto allocSize = sizeof(Node) + node->Payload.getExtraAllocationSize();

    // Destroy the node's payload.
    node->~Node();

    // Deallocate the node.  The static_cast here is required
    // because LLVM's allocator API is insane.
    this->Deallocate(static_cast<void*>(node), allocSize, alignof(Node));
  }
};

/// The partial specialization of ConcurrentMapBase which provides a
/// non-trivial destructor.
template <class EntryTy, class Allocator>
class ConcurrentMapBase<EntryTy, true, Allocator>
    : protected ConcurrentMapBase<EntryTy, false, Allocator> {
protected:
  using super = ConcurrentMapBase<EntryTy, false, Allocator>;
  using Node = typename super::Node;

  constexpr ConcurrentMapBase() {}

  ~ConcurrentMapBase() {
    destroyTree(this->Root);
  }

private:
  void destroyTree(const std::atomic<Node*> &edge) {
    // This can be a relaxed load because destruction is not allowed to race
    // with other operations.
    auto node = edge.load(std::memory_order_relaxed);
    if (!node) return;

    // Destroy the node's children.
    destroyTree(node->Left);
    destroyTree(node->Right);

    // Destroy the node itself.
    this->destroyNode(node);
  }
};

/// A concurrent map that is implemented using a binary tree. It supports
/// concurrent insertions but does not support removals or rebalancing of
/// the tree.
///
/// The entry type must provide the following operations:
///
///   /// For debugging purposes only. Summarize this key as an integer value.
///   intptr_t getKeyIntValueForDump() const;
///
///   /// A ternary comparison.  KeyTy is the type of the key provided
///   /// to find or getOrInsert.
///   int compareWithKey(KeyTy key) const;
///
///   /// Return the amount of extra trailing space required by an entry,
///   /// where KeyTy is the type of the first argument to getOrInsert and
///   /// ArgTys is the type of the remaining arguments.
///   static size_t getExtraAllocationSize(KeyTy key, ArgTys...)
///
///   /// Return the amount of extra trailing space that was requested for
///   /// this entry.  This method is only used to compute the size of the
///   /// object during node deallocation; it does not need to return a
///   /// correct value so long as the allocator's Deallocate implementation
///   /// ignores this argument.
///   size_t getExtraAllocationSize() const;
///
/// If ProvideDestructor is false, the destructor will be trivial.  This
/// can be appropriate when the object is declared at global scope.
template <class EntryTy, bool ProvideDestructor = true,
          class Allocator = llvm::MallocAllocator>
class ConcurrentMap
      : private ConcurrentMapBase<EntryTy, ProvideDestructor, Allocator> {
  using super = ConcurrentMapBase<EntryTy, ProvideDestructor, Allocator>;

  using Node = typename super::Node;

  /// Inherited from base class:
  ///   std::atomic<Node*> Root;
  using super::Root;

  /// This member stores the address of the last node that was found by the
  /// search procedure. We cache the last search to accelerate code that
  /// searches the same value in a loop.
  std::atomic<Node*> LastSearch;

public:
  constexpr ConcurrentMap() : LastSearch(nullptr) {}

  ConcurrentMap(const ConcurrentMap &) = delete;
  ConcurrentMap &operator=(const ConcurrentMap &) = delete;

  // ConcurrentMap<T, false> must have a trivial destructor.
  ~ConcurrentMap() = default;

public:

  Allocator &getAllocator() {
    return *this;
  }

#ifndef NDEBUG
  void dump() const {
    auto R = Root.load(std::memory_order_acquire);
    printf("digraph g {\n"
           "graph [ rankdir = \"TB\"];\n"
           "node  [ fontsize = \"16\" ];\n"
           "edge  [ ];\n");
    if (R) {
      R->dump();
    }
    printf("\n}\n");
  }
#endif

  /// Search for a value by key \p Key.
  /// \returns a pointer to the value or null if the value is not in the map.
  template <class KeyTy>
  EntryTy *find(const KeyTy &key) {
    // Check if we are looking for the same key that we looked for in the last
    // time we called this function.
    if (Node *last = LastSearch.load(std::memory_order_acquire)) {
      if (last->Payload.compareWithKey(key) == 0)
        return &last->Payload;
    }

    // Search the tree, starting from the root.
    Node *node = Root.load(std::memory_order_acquire);
    while (node) {
      int comparisonResult = node->Payload.compareWithKey(key);
      if (comparisonResult == 0) {
        LastSearch.store(node, std::memory_order_release);
        return &node->Payload;
      } else if (comparisonResult < 0) {
        node = node->Left.load(std::memory_order_acquire);
      } else {
        node = node->Right.load(std::memory_order_acquire);
      }
    }

    return nullptr;
  }

  /// Get or create an entry in the map.
  ///
  /// \returns the entry in the map and whether a new node was added (true)
  ///   or already existed (false)
  template <class KeyTy, class... ArgTys>
  std::pair<EntryTy*, bool> getOrInsert(KeyTy key, ArgTys &&... args) {
    // Check if we are looking for the same key that we looked for the
    // last time we called this function.
    if (Node *last = LastSearch.load(std::memory_order_acquire)) {
      if (last && last->Payload.compareWithKey(key) == 0)
        return { &last->Payload, false };
    }

    // The node we allocated.
    Node *newNode = nullptr;

    // Start from the root.
    auto edge = &Root;

    while (true) {
      // Load the edge.
      Node *node = edge->load(std::memory_order_acquire);

      // If there's a node there, it's either a match or we're going to
      // one of its children.
      if (node) {
      searchFromNode:

        // Compare our key against the node's key.
        int comparisonResult = node->Payload.compareWithKey(key);

        // If it's equal, we can use this node.
        if (comparisonResult == 0) {
          // Destroy the node we allocated before if we're carrying one around.
          if (newNode) this->destroyNode(newNode);

          // Cache and report that we found an existing node.
          LastSearch.store(node, std::memory_order_release);
          return { &node->Payload, false };
        }

        // Otherwise, select the appropriate child edge and descend.
        edge = (comparisonResult < 0 ? &node->Left : &node->Right);
        continue;
      }

      // Create a new node.
      if (!newNode) {
        size_t allocSize =
          sizeof(Node) + EntryTy::getExtraAllocationSize(key, args...);
        void *memory = this->Allocate(allocSize, alignof(Node));
        newNode = ::new (memory) Node(key, std::forward<ArgTys>(args)...);
      }

      // Try to set the edge to the new node.
      if (std::atomic_compare_exchange_strong_explicit(edge, &node, newNode,
                                                  std::memory_order_acq_rel,
                                                  std::memory_order_acquire)) {
        // If that succeeded, cache and report that we created a new node.
        LastSearch.store(newNode, std::memory_order_release);
        return { &newNode->Payload, true };
      }

      // Otherwise, we lost the race because some other thread initialized
      // the edge before us.  node will be set to the current value;
      // repeat the search from there.
      assert(node && "spurious failure from compare_exchange_strong?");
      goto searchFromNode;
    }
  }
};


/// An append-only array that can be read without taking locks. Writes
/// are still locked and serialized, but only with respect to other
/// writes.
template <class ElemTy> struct ConcurrentReadableArray {
private:
  /// The struct used for the array's storage. The `Elem` member is
  /// considered to be the first element of a variable-length array,
  /// whose size is determined by the allocation. The `Capacity` member
  /// from `ConcurrentReadableArray` indicates how large it can be.
  struct Storage {
    std::atomic<size_t> Count;
    typename std::aligned_storage<sizeof(ElemTy), alignof(ElemTy)>::type Elem;

    static Storage *allocate(size_t capacity) {
      auto size = sizeof(Storage) + (capacity - 1) * sizeof(Storage().Elem);
      auto *ptr = reinterpret_cast<Storage *>(malloc(size));
      if (!ptr) swift::crash("Could not allocate memory.");
      ptr->Count.store(0, std::memory_order_relaxed);
      return ptr;
    }

    void deallocate() {
      for (size_t i = 0; i < Count; ++i) {
        data()[i].~ElemTy();
      }
      free(this);
    }

    ElemTy *data() {
      return reinterpret_cast<ElemTy *>(&Elem);
    }
  };
  
  size_t Capacity;
  std::atomic<size_t> ReaderCount;
  std::atomic<Storage *> Elements;
  Mutex WriterLock;
  std::vector<Storage *> FreeList;
  
  void incrementReaders() {
    ReaderCount.fetch_add(1, std::memory_order_acquire);
  }
  
  void decrementReaders() {
    ReaderCount.fetch_sub(1, std::memory_order_release);
  }
  
  void deallocateFreeList() {
    for (Storage *storage : FreeList)
      storage->deallocate();
    FreeList.clear();
    FreeList.shrink_to_fit();
  }
  
public:
  struct Snapshot {
    ConcurrentReadableArray *Array;
    const ElemTy *Start;
    size_t Count;
    
    Snapshot(ConcurrentReadableArray *array, const ElemTy *start, size_t count)
      : Array(array), Start(start), Count(count) {}
    
    Snapshot(const Snapshot &other)
      : Array(other.Array), Start(other.Start), Count(other.Count) {
      Array->incrementReaders();
    }
    
    ~Snapshot() {
      Array->decrementReaders();
    }
    
    const ElemTy *begin() { return Start; }
    const ElemTy *end() { return Start + Count; }
    size_t count() { return Count; }
  };

  // This type cannot be safely copied or moved.
  ConcurrentReadableArray(const ConcurrentReadableArray &) = delete;
  ConcurrentReadableArray(ConcurrentReadableArray &&) = delete;
  ConcurrentReadableArray &operator=(const ConcurrentReadableArray &) = delete;
  
  ConcurrentReadableArray() : Capacity(0), ReaderCount(0), Elements(nullptr) {}
  
  ~ConcurrentReadableArray() {
    assert(ReaderCount.load(std::memory_order_acquire) == 0 &&
           "deallocating ConcurrentReadableArray with outstanding snapshots");
    deallocateFreeList();
  }
  
  void push_back(const ElemTy &elem) {
    ScopedLock guard(WriterLock);
    
    auto *storage = Elements.load(std::memory_order_relaxed);
    auto count = storage ? storage->Count.load(std::memory_order_relaxed) : 0;
    if (count >= Capacity) {
      auto newCapacity = std::max((size_t)16, count * 2);
      auto *newStorage = Storage::allocate(newCapacity);
      if (storage) {
        std::copy(storage->data(), storage->data() + count, newStorage->data());
        newStorage->Count.store(count, std::memory_order_release);
        FreeList.push_back(storage);
      }
      
      storage = newStorage;
      Capacity = newCapacity;
      Elements.store(storage, std::memory_order_release);
    }
    
    new(&storage->data()[count]) ElemTy(elem);
    storage->Count.store(count + 1, std::memory_order_release);
    
    if (ReaderCount.load(std::memory_order_acquire) == 0)
      deallocateFreeList();
  }

  Snapshot snapshot() {
    incrementReaders();
    auto *storage = Elements.load(SWIFT_MEMORY_ORDER_CONSUME);
    if (storage == nullptr) {
      return Snapshot(this, nullptr, 0);
    }
    
    auto count = storage->Count.load(std::memory_order_acquire);
    const auto *ptr = storage->data();
    return Snapshot(this, ptr, count);
  }
};

using llvm::hash_value;

/// A hash table that can be queried without taking any locks. Writes are still
/// locked and serialized, but only with respect to other locks. Writers can add
/// elements and clear the table, but they cannot remove individual elements.
/// Readers work by taking a snapshot of the table and then querying that
/// snapshot.
///
/// The basic structure of the table consists of two arrays. Elements are stored
/// in a contiguous array, with new elements appended to the end. The second
/// array is the actual hash table, and it contains indices into the elements
/// array. This scheme cuts down on wasted space when the elements are larger
/// than a few bytes: instead of wasting `(1 - loadFactor) * sizeof(element)`
/// bytes on unused space in the hash table, we only waste `(1 - loadFactor) *
/// sizeof(index)`. This scheme also avoids readers seeing partially constructed
/// elements.
///
/// Reader/writer synchronization for new elements is handled by keeping an
/// element count which is only incremented when the element has been fully
/// constructed. A reader which sees an index beyond its view of the current
/// count will ignore it and treat that as if there was no entry.
///
/// Reader/writer synchronization for resizing the arrays is handled by tracking
/// the current number of active readers. When resizing, the new array is
/// allocated, the data copied, and then the old array is placed in a free list.
/// The free list is only deallocated if there are no readers, otherwise freeing
/// is deferred.
///
/// Reader/writer synchronization for clearing the table is a combination of the
/// above. By keeping the old arrays around until all readers are finished, we
/// ensure that readers which started before the clear see valid (pre-clear)
/// data. Readers which see any array as empty will produce no results, thus
/// providing valid post-clear data.
template <class ElemTy> struct ConcurrentReadableHashMap {
  // We use memcpy and don't call destructors. Make sure the elements will put
  // up with this.
  static_assert(std::is_trivially_copyable<ElemTy>::value,
                "Elements must be trivially copyable.");
  static_assert(std::is_trivially_destructible<ElemTy>::value,
                "Elements must not have destructors (they won't be called).");

private:
  /// The type of the elements of the indices array. TODO: use one or two byte
  /// indices for smaller tables to save more memory.
  using Index = unsigned;

  /// The reciprocal of the load factor at which we expand the table. A value of
  /// 4 means that we resize at 1/4 = 75% load factor.
  static const size_t ResizeProportion = 4;

  /// Get the "good size" for a given allocation size. When available, this
  /// rounds up to the next allocation quantum by calling `malloc_good_size`.
  /// Otherwise, just return the passed-in size, which is always valid even if
  /// not necessarily optimal.
  size_t goodSize(size_t size) {
#if defined(__APPLE__) && defined(__MACH__)
    return malloc_good_size(size);
#else
    return size;
#endif
  }

  /// A private class representing the storage of the indices. In order to
  /// ensure that readers can get a consistent view of the indices with a single
  /// atomic read, we store the size of the indices array inline, as the first
  /// element in the array.
  ///
  /// We want the number of indices to be a power of two so that we can use a
  /// bitwise AND to convert a hash code to an index. We want the entire array
  /// to be a power of two in size to be friendly to the allocator, but the size
  /// is stored inline. We work around this contradiction by considering the
  /// first index to always be occupied with a value that never matches any key.
  struct IndexStorage {
    std::atomic<Index> Mask;

    static IndexStorage *allocate(size_t capacity) {
      assert((capacity & (capacity - 1)) == 0 &&
             "Capacity must be a power of 2");
      auto *ptr =
          reinterpret_cast<IndexStorage *>(calloc(capacity, sizeof(Mask)));
      if (!ptr)
        swift::crash("Could not allocate memory.");
      ptr->Mask.store(capacity - 1, std::memory_order_relaxed);
      return ptr;
    }

    std::atomic<Index> &at(size_t i) { return (&Mask)[i]; }
  };

  /// A simple linked list representing pointers that need to be freed.
  struct FreeListNode {
    FreeListNode *Next;
    void *Ptr;

    static void add(FreeListNode **head, void *ptr) {
      auto *newNode = new FreeListNode{*head, ptr};
      *head = newNode;
    }

    static void freeAll(FreeListNode **head) {
      auto *node = *head;
      while (node) {
        auto *next = node->Next;
        free(node->Ptr);
        delete node;
        node = next;
      }
      *head = nullptr;
    }
  };

  /// The number of readers currently active, equal to the number of snapshot
  /// objects currently alive.
  std::atomic<uint32_t> ReaderCount{0};

  /// The number of elements in the elements array.
  std::atomic<uint32_t> ElementCount{0};

  /// The array of elements.
  std::atomic<ElemTy *> Elements{nullptr};

  /// The array of indices.
  std::atomic<IndexStorage *> Indices{nullptr};

  /// The writer lock, which must be taken before any mutation of the table.
  Mutex WriterLock;

  /// The maximum number of elements that the current elements array can hold.
  uint32_t ElementCapacity{0};

  /// The list of pointers to be freed once no readers are active.
  FreeListNode *FreeList{nullptr};

  void incrementReaders() {
    ReaderCount.fetch_add(1, std::memory_order_acquire);
  }

  void decrementReaders() {
    ReaderCount.fetch_sub(1, std::memory_order_release);
  }

  /// Free all the arrays in the free lists if there are no active readers. If
  /// there are active readers, do nothing.
  void deallocateFreeListIfSafe() {
    if (ReaderCount.load(std::memory_order_acquire) == 0)
      FreeListNode::freeAll(&FreeList);
  }

  /// Grow the elements array, adding the old array to the free list and
  /// returning the new array with all existing elements copied into it.
  ElemTy *resize(ElemTy *elements, size_t elementCount) {
    // Grow capacity by 25%, making sure we grow by at least 1.
    size_t newCapacity =
        std::max(elementCount + (elementCount >> 2), elementCount + 1);
    size_t newSize = newCapacity * sizeof(ElemTy);

    newSize = goodSize(newSize);
    newCapacity = newSize / sizeof(ElemTy);

    ElemTy *newElements = static_cast<ElemTy *>(malloc(newSize));
    if (elements) {
      memcpy(newElements, elements, elementCount * sizeof(ElemTy));
      FreeListNode::add(&FreeList, elements);
    }

    ElementCapacity = newCapacity;
    Elements.store(newElements, std::memory_order_release);
    return newElements;
  }

  /// Grow the indices array, adding the old array to the free list and
  /// returning the new array with all existing indices copied into it. This
  /// operation performs a rehash, so that the indices are in the correct
  /// location in the new array.
  IndexStorage *resize(IndexStorage *indices, Index indicesMask,
                       ElemTy *elements) {
    // Mask is size - 1. Double the size. Start with 4 (fits into 16-byte malloc
    // bucket).
    size_t newCount = indices ? 2 * (indicesMask + 1) : 4;
    size_t newMask = newCount - 1;

    IndexStorage *newIndices = IndexStorage::allocate(newCount);

    for (size_t i = 1; i <= indicesMask; i++) {
      Index index = indices->at(i).load(std::memory_order_relaxed);
      if (index == 0)
        continue;

      auto *element = &elements[index - 1];
      auto hash = hash_value(*element);

      size_t newI = hash & newMask;
      while (newIndices->at(newI) != 0)
        newI = (newI + 1) & newMask;
      newIndices->at(newI).store(index, std::memory_order_relaxed);
    }

    Indices.store(newIndices, std::memory_order_release);

    FreeListNode::add(&FreeList, indices);

    return newIndices;
  }

  /// Search for the given key within the given indices and elements arrays. If
  /// an entry already exists for that key, return a pointer to the element. If
  /// no entry exists, return a pointer to the location in the indices array
  /// where the index of the new element would be stored.
  template <class KeyTy>
  static std::pair<ElemTy *, std::atomic<Index> *>
  find(const KeyTy &key, IndexStorage *indices, size_t elementCount,
       ElemTy *elements) {
    if (!indices)
      return {nullptr, nullptr};
    auto hash = hash_value(key);
    auto indicesMask = indices->Mask.load(std::memory_order_relaxed);

    auto i = hash & indicesMask;
    while (true) {
      // Index 0 is used for the mask and is not actually an index.
      if (i == 0)
        i++;

      auto *indexPtr = &indices->at(i);
      auto index = indexPtr->load(std::memory_order_acquire);
      // Element indices are 1-based, 0 means no entry.
      if (index == 0)
        return {nullptr, indexPtr};
      if (index - 1 < elementCount) {
        auto *candidate = &elements[index - 1];
        if (candidate->matchesKey(key))
          return {candidate, nullptr};
      }

      i = (i + 1) & indicesMask;
    }
  }

public:
  // This type cannot be safely copied or moved.
  ConcurrentReadableHashMap(const ConcurrentReadableHashMap &) = delete;
  ConcurrentReadableHashMap(ConcurrentReadableHashMap &&) = delete;
  ConcurrentReadableHashMap &
  operator=(const ConcurrentReadableHashMap &) = delete;

  ConcurrentReadableHashMap()
      : ReaderCount(0), ElementCount(0), Elements(nullptr), Indices(nullptr),
        ElementCapacity(0) {}

  ~ConcurrentReadableHashMap() {
    assert(ReaderCount.load(std::memory_order_acquire) == 0 &&
           "deallocating ConcurrentReadableHashMap with outstanding snapshots");
    FreeListNode::freeAll(&FreeList);
  }

  /// Readers take a snapshot of the hash map, then work with the snapshot.
  class Snapshot {
    ConcurrentReadableHashMap *Map;
    IndexStorage *Indices;
    ElemTy *Elements;
    size_t ElementCount;

  public:
    Snapshot(ConcurrentReadableHashMap *map, IndexStorage *indices,
             ElemTy *elements, size_t elementCount)
        : Map(map), Indices(indices), Elements(elements),
          ElementCount(elementCount) {}

    Snapshot(const Snapshot &other)
        : Map(other.Map), Indices(other.Indices), Elements(other.Elements),
          ElementCount(other.ElementCount) {
      Map->incrementReaders();
    }

    ~Snapshot() { Map->decrementReaders(); }

    /// Search for an element matching the given key. Returns a pointer to the
    /// found element, or nullptr if no matching element exists.
    template <class KeyTy> const ElemTy *find(const KeyTy &key) {
      if (!Indices || !ElementCount || !Elements)
        return nullptr;
      return ConcurrentReadableHashMap::find(key, Indices, ElementCount,
                                             Elements)
          .first;
    }
  };

  /// Take a snapshot of the current state of the hash map.
  Snapshot snapshot() {
    incrementReaders();

    // Carefully loading the indices, element count, and elements pointer in
    // order ensures a consistent view of the table with respect to concurrent
    // inserts. However, this is not sufficient to avoid an inconsistent view
    // with respect to concurrent clears. The danger scenario is:
    //
    // 1. Read indices and elementCount from a table with N entries.
    // 2. Another thread clears the table.
    // 3. Another thread inserts M entries, where M < N.
    // 4. The reader thread reads elements.
    // 5. The reader thread performs a find. The key's hash leads us to an index
    //    I, where > M.
    // 6. The reader thread reads from element I, which is off the end of the
    //    elements array.
    //
    // To avoid this, read the elements pointer twice, at the beginning and end.
    // If the values are not the same then there may have been a clear in the
    // middle, so we retry. This will have false positives: a new element
    // pointer can just mean a concurrent insert that triggered a resize of the
    // elements array. This is harmless aside from a small performance hit, and
    // should not happen often.
    IndexStorage *indices;
    size_t elementCount;
    ElemTy *elements;
    ElemTy *elements2;
    do {
      elements = Elements.load(std::memory_order_acquire);
      indices = Indices.load(std::memory_order_acquire);
      elementCount = ElementCount.load(std::memory_order_acquire);
      elements2 = Elements.load(std::memory_order_acquire);
    } while (elements != elements2);

    return Snapshot(this, indices, elements, elementCount);
  }

  /// Get an element by key, or insert a new element for that key if one is not
  /// already present. Invoke `call` with the pointer to the element. BEWARE:
  /// `call` is invoked with the internal writer lock held, keep work to a
  /// minimum.
  ///
  /// `call` is passed the following parameters:
  ///   - `element`: the pointer to the element corresponding to `key`
  ///   - `created`: true if the element is newly created, false if it already
  ///                exists
  /// `call` returns a `bool`. When `created` is `true`, the return values mean:
  ///   - `true` the new entry is to be kept
  ///   - `false` indicates that the new entry is discarded
  /// If the new entry is kept, then the new element MUST be initialized, and
  /// have a hash value that matches the hash value of `key`.
  ///
  /// The return value is ignored when `created` is `false`.
  template <class KeyTy, typename Call>
  void getOrInsert(KeyTy key, const Call &call) {
    ScopedLock guard(WriterLock);

    auto *indices = Indices.load(std::memory_order_relaxed);
    if (!indices)
      indices = resize(indices, 0, nullptr);

    auto indicesMask = indices->Mask.load(std::memory_order_relaxed);
    auto elementCount = ElementCount.load(std::memory_order_relaxed);
    auto *elements = Elements.load(std::memory_order_relaxed);

    auto found = find(key, indices, elementCount, elements);
    if (found.first) {
      call(found.first, false);
      deallocateFreeListIfSafe();
      return;
    }

    // The actual capacity is indicesMask + 1. The number of slots in use is
    // elementCount + 1, since the mask also takes a slot.
    auto emptyCount = (indicesMask + 1) - (elementCount + 1);
    auto proportion = (indicesMask + 1) / emptyCount;
    if (proportion >= ResizeProportion) {
      indices = resize(indices, indicesMask, elements);
      found = find(key, indices, elementCount, elements);
      assert(!found.first && "Shouldn't suddenly find the key after rehashing");
    }

    if (elementCount >= ElementCapacity) {
      elements = resize(elements, elementCount);
    }
    auto *element = &elements[elementCount];

    // Order matters: fill out the element, then update the count,
    // then update the index.
    bool keep = call(element, true);
    if (keep) {
      assert(hash_value(key) == hash_value(*element) &&
             "Element must have the same hash code as its key.");
      ElementCount.store(elementCount + 1, std::memory_order_release);
      found.second->store(elementCount + 1, std::memory_order_release);
    }

    deallocateFreeListIfSafe();
  }

  /// Clear the hash table, freeing (when safe) all memory currently used for
  /// indices and elements.
  void clear() {
    ScopedLock guard(WriterLock);

    auto *indices = Indices.load(std::memory_order_relaxed);
    auto *elements = Elements.load(std::memory_order_relaxed);

    // Order doesn't matter here, snapshots will gracefully handle any field
    // being NULL/0 while the others are not.
    Indices.store(nullptr, std::memory_order_relaxed);
    ElementCount.store(0, std::memory_order_relaxed);
    Elements.store(nullptr, std::memory_order_relaxed);
    ElementCapacity = 0;

    FreeListNode::add(&FreeList, indices);
    FreeListNode::add(&FreeList, elements);

    deallocateFreeListIfSafe();
  }
};

} // end namespace swift

#endif // SWIFT_RUNTIME_CONCURRENTUTILS_H