Files
swift-mirror/include/swift/Runtime/Atomic.h
Rokhini Prabhu 3c0f4f79fe Provide a convenience for addStatusRecord for clients who have already
done the load or who need the oldStatus information after adding the
status record.

Change some of the memory barrier logic since we can take advantage of
load-through HW address dependency.

Radar-Id: rdar://problem/105634683
2023-02-20 23:14:47 -08:00

201 lines
7.8 KiB
C++

//===--- Atomic.h - Utilities for atomic operations. ------------*- C++ -*-===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2018 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
//
// Utilities for atomic operations, to use with std::atomic.
//
//===----------------------------------------------------------------------===//
#ifndef SWIFT_RUNTIME_ATOMIC_H
#define SWIFT_RUNTIME_ATOMIC_H
#include "swift/Runtime/Config.h"
#include <assert.h>
#include <atomic>
#include <cstdlib>
#if defined(_WIN64)
#include <intrin.h>
#endif
// FIXME: Workaround for rdar://problem/18889711. 'Consume' does not require
// a barrier on ARM64, but LLVM doesn't know that. Although 'relaxed'
// is formally UB by C++11 language rules, we should be OK because neither
// the processor model nor the optimizer can realistically reorder our uses
// of 'consume'.
#if defined(__arm__) || defined(_M_ARM) || defined(__arm64__) || defined(__aarch64__) || defined(_M_ARM64)
# define SWIFT_MEMORY_ORDER_CONSUME (std::memory_order_relaxed)
#else
# define SWIFT_MEMORY_ORDER_CONSUME (std::memory_order_consume)
#endif
#if defined(_M_ARM) || defined(__arm__) || defined(__aarch64__)
#define SWIFT_HAS_MSVC_ARM_ATOMICS 1
#else
#define SWIFT_HAS_MSVC_ARM_ATOMICS 0
#endif
namespace swift {
namespace impl {
/// The default implementation for swift::atomic<T>, which just wraps
/// std::atomic with minor differences.
///
/// TODO: should we make this use non-atomic operations when the runtime
/// is single-threaded?
template <class Value, std::size_t Size = sizeof(Value)>
class alignas(Size) atomic_impl {
std::atomic<Value> value;
public:
constexpr atomic_impl(Value value) : value(value) {}
/// Force clients to always pass an order.
Value load(std::memory_order order) const {
return value.load(order);
}
/// Force clients to always pass an order.
void store(Value newValue, std::memory_order order) {
return value.store(newValue, order);
}
/// Force clients to always pass an order.
bool compare_exchange_weak(Value &oldValue, Value newValue,
std::memory_order successOrder,
std::memory_order failureOrder) {
return value.compare_exchange_weak(oldValue, newValue, successOrder,
failureOrder);
}
bool compare_exchange_strong(Value &oldValue, Value newValue,
std::memory_order successOrder,
std::memory_order failureOrder) {
return value.compare_exchange_strong(oldValue, newValue, successOrder,
failureOrder);
}
};
#if defined(_WIN64)
/// MSVC's std::atomic uses an inline spin lock for 16-byte atomics,
/// which is not only unnecessarily inefficient but also doubles the size
/// of the atomic object. We don't care about supporting ancient
/// AMD processors that lack cmpxchg16b, so we just use the intrinsic.
template <class Value>
class alignas(2 * sizeof(void*)) atomic_impl<Value, 2 * sizeof(void*)> {
mutable volatile Value atomicValue;
public:
constexpr atomic_impl(Value initialValue) : atomicValue(initialValue) {}
atomic_impl(const atomic_impl &) = delete;
atomic_impl &operator=(const atomic_impl &) = delete;
Value load(std::memory_order order) const {
assert(order == std::memory_order_relaxed ||
order == std::memory_order_acquire ||
order == std::memory_order_consume);
// Aligned SSE loads are atomic on every known processor, but
// the only 16-byte access that's architecturally guaranteed to be
// atomic is lock cmpxchg16b, so we do that with identical comparison
// and new values purely for the side-effect of updating the old value.
__int64 resultArray[2] = {};
#if SWIFT_HAS_MSVC_ARM_ATOMICS
if (order != std::memory_order_acquire) {
(void) _InterlockedCompareExchange128_nf(
reinterpret_cast<volatile __int64*>(&atomicValue),
0, 0, resultArray);
} else {
#endif
(void) _InterlockedCompareExchange128(
reinterpret_cast<volatile __int64*>(&atomicValue),
0, 0, resultArray);
#if SWIFT_HAS_MSVC_ARM_ATOMICS
}
#endif
return reinterpret_cast<Value &>(resultArray);
}
void store(Value newValue, std::memory_order order) {
assert(order == std::memory_order_relaxed ||
order == std::memory_order_release);
Value oldValue = load(std::memory_order_relaxed);
while (!compare_exchange_weak(oldValue, newValue,
/*success*/ order,
/*failure*/ std::memory_order_relaxed)) {
// try again
}
}
bool compare_exchange_weak(Value &oldValue, Value newValue,
std::memory_order successOrder,
std::memory_order failureOrder) {
// We do not have weak CAS intrinsics, fallback to strong
return compare_exchange_strong(oldValue, newValue, successOrder,
failureOrder);
}
bool compare_exchange_strong(Value &oldValue, Value newValue,
std::memory_order successOrder,
std::memory_order failureOrder) {
#if SWIFT_HAS_MSVC_ARM_ATOMICS
if (successOrder == std::memory_order_relaxed &&
failureOrder != std::memory_order_acquire) {
return _InterlockedCompareExchange128_nf(
reinterpret_cast<volatile __int64*>(&atomicValue),
reinterpret_cast<const __int64*>(&newValue)[1],
reinterpret_cast<const __int64*>(&newValue)[0],
reinterpret_cast<__int64*>(&oldValue));
} else if (successOrder == std::memory_order_relaxed) {
return _InterlockedCompareExchange128_acq(
reinterpret_cast<volatile __int64*>(&atomicValue),
reinterpret_cast<const __int64*>(&newValue)[1],
reinterpret_cast<const __int64*>(&newValue)[0],
reinterpret_cast<__int64*>(&oldValue));
} else if (failureOrder != std::memory_order_acquire) {
return _InterlockedCompareExchange128_rel(
reinterpret_cast<volatile __int64*>(&atomicValue),
reinterpret_cast<const __int64*>(&newValue)[1],
reinterpret_cast<const __int64*>(&newValue)[0],
reinterpret_cast<__int64*>(&oldValue));
} else {
#endif
return _InterlockedCompareExchange128(
reinterpret_cast<volatile __int64*>(&atomicValue),
reinterpret_cast<const __int64*>(&newValue)[1],
reinterpret_cast<const __int64*>(&newValue)[0],
reinterpret_cast<__int64*>(&oldValue));
#if SWIFT_HAS_MSVC_ARM_ATOMICS
}
#endif
}
};
#endif
} // end namespace swift::impl
/// A simple wrapper for std::atomic that provides the most important
/// interfaces and fixes the API bug where all of the orderings default
/// to sequentially-consistent.
///
/// It also sometimes uses a different implementation in cases where
/// std::atomic has made unfortunate choices; our uses of this broadly
/// don't have the ABI-compatibility issues that std::atomic faces.
template <class T>
class atomic : public impl::atomic_impl<T> {
public:
constexpr atomic(T value) : impl::atomic_impl<T>(value) {}
};
} // end namespace swift
#endif