| /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
| /* This Source Code Form is subject to the terms of the Mozilla Public |
| * License, v. 2.0. If a copy of the MPL was not distributed with this |
| * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
| |
| /* |
| * Implements (almost always) lock-free atomic operations. The operations here |
| * are a subset of that which can be found in C++11's <atomic> header, with a |
| * different API to enforce consistent memory ordering constraints. |
| * |
| * Anyone caught using |volatile| for inter-thread memory safety needs to be |
| * sent a copy of this header and the C++11 standard. |
| */ |
| |
| #ifndef mozilla_Atomics_h_ |
| #define mozilla_Atomics_h_ |
| |
| #include "mozilla/Assertions.h" |
| #include "mozilla/TypeTraits.h" |
| |
| #include <stdint.h> |
| |
| /* |
| * Our minimum deployment target on clang/OS X is OS X 10.6, whose SDK |
| * does not have <atomic>. So be sure to check for <atomic> support |
| * along with C++0x support. |
| */ |
| #if defined(__clang__) |
| /* |
| * clang doesn't like libstdc++'s version of <atomic> before GCC 4.7, |
| * due to the loose typing of the __sync_* family of functions done by |
| * GCC. We do not have a particularly good way to detect this sort of |
| * case at this point, so just assume that if we're on a Linux system, |
| * we can't use the system's <atomic>. |
| * |
| * OpenBSD uses an old libstdc++ 4.2.1 and thus doesnt have <atomic>. |
| */ |
| # if !defined(__linux__) && !defined(__OpenBSD__) && \ |
| (__cplusplus >= 201103L || defined(__GXX_EXPERIMENTAL_CXX0X__)) && \ |
| __has_include(<atomic>) |
| # define MOZ_HAVE_CXX11_ATOMICS |
| # endif |
| /* |
| * Android uses a different C++ standard library that does not provide |
| * support for <atomic> |
| */ |
| #elif defined(__GNUC__) && !defined(__ANDROID__) |
| # include "mozilla/Compiler.h" |
| # if (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L) && \ |
| MOZ_GCC_VERSION_AT_LEAST(4, 5, 2) |
| # define MOZ_HAVE_CXX11_ATOMICS |
| # endif |
| #elif defined(_MSC_VER) && _MSC_VER >= 1700 |
| # define MOZ_HAVE_CXX11_ATOMICS |
| #endif |
| |
| namespace mozilla { |
| |
| /** |
| * An enum of memory ordering possibilities for atomics. |
| * |
| * Memory ordering is the observable state of distinct values in memory. |
| * (It's a separate concept from atomicity, which concerns whether an |
| * operation can ever be observed in an intermediate state. Don't |
| * conflate the two!) Given a sequence of operations in source code on |
| * memory, it is *not* always the case that, at all times and on all |
| * cores, those operations will appear to have occurred in that exact |
| * sequence. First, the compiler might reorder that sequence, if it |
| * thinks another ordering will be more efficient. Second, the CPU may |
| * not expose so consistent a view of memory. CPUs will often perform |
| * their own instruction reordering, above and beyond that performed by |
| * the compiler. And each core has its own memory caches, and accesses |
| * (reads and writes both) to "memory" may only resolve to out-of-date |
| * cache entries -- not to the "most recently" performed operation in |
| * some global sense. Any access to a value that may be used by |
| * multiple threads, potentially across multiple cores, must therefore |
| * have a memory ordering imposed on it, for all code on all |
| * threads/cores to have a sufficiently coherent worldview. |
| * |
| * http://gcc.gnu.org/wiki/Atomic/GCCMM/AtomicSync and |
| * http://en.cppreference.com/w/cpp/atomic/memory_order go into more |
| * detail on all this, including examples of how each mode works. |
| * |
| * Note that for simplicity and practicality, not all of the modes in |
| * C++11 are supported. The missing C++11 modes are either subsumed by |
| * the modes we provide below, or not relevant for the CPUs we support |
| * in Gecko. These three modes are confusing enough as it is! |
| */ |
| enum MemoryOrdering { |
| /* |
| * Relaxed ordering is the simplest memory ordering: none at all. |
| * When the result of a write is observed, nothing may be inferred |
| * about other memory. Writes ostensibly performed "before" on the |
| * writing thread may not yet be visible. Writes performed "after" on |
| * the writing thread may already be visible, if the compiler or CPU |
| * reordered them. (The latter can happen if reads and/or writes get |
| * held up in per-processor caches.) Relaxed ordering means |
| * operations can always use cached values (as long as the actual |
| * updates to atomic values actually occur, correctly, eventually), so |
| * it's usually the fastest sort of atomic access. For this reason, |
| * *it's also the most dangerous kind of access*. |
| * |
| * Relaxed ordering is good for things like process-wide statistics |
| * counters that don't need to be consistent with anything else, so |
| * long as updates themselves are atomic. (And so long as any |
| * observations of that value can tolerate being out-of-date -- if you |
| * need some sort of up-to-date value, you need some sort of other |
| * synchronizing operation.) It's *not* good for locks, mutexes, |
| * reference counts, etc. that mediate access to other memory, or must |
| * be observably consistent with other memory. |
| * |
| * x86 architectures don't take advantage of the optimization |
| * opportunities that relaxed ordering permits. Thus it's possible |
| * that using relaxed ordering will "work" on x86 but fail elsewhere |
| * (ARM, say, which *does* implement non-sequentially-consistent |
| * relaxed ordering semantics). Be extra-careful using relaxed |
| * ordering if you can't easily test non-x86 architectures! |
| */ |
| Relaxed, |
| /* |
| * When an atomic value is updated with ReleaseAcquire ordering, and |
| * that new value is observed with ReleaseAcquire ordering, prior |
| * writes (atomic or not) are also observable. What ReleaseAcquire |
| * *doesn't* give you is any observable ordering guarantees for |
| * ReleaseAcquire-ordered operations on different objects. For |
| * example, if there are two cores that each perform ReleaseAcquire |
| * operations on separate objects, each core may or may not observe |
| * the operations made by the other core. The only way the cores can |
| * be synchronized with ReleaseAcquire is if they both |
| * ReleaseAcquire-access the same object. This implies that you can't |
| * necessarily describe some global total ordering of ReleaseAcquire |
| * operations. |
| * |
| * ReleaseAcquire ordering is good for (as the name implies) atomic |
| * operations on values controlling ownership of things: reference |
| * counts, mutexes, and the like. However, if you are thinking about |
| * using these to implement your own locks or mutexes, you should take |
| * a good, hard look at actual lock or mutex primitives first. |
| */ |
| ReleaseAcquire, |
| /* |
| * When an atomic value is updated with SequentiallyConsistent |
| * ordering, all writes observable when the update is observed, just |
| * as with ReleaseAcquire ordering. But, furthermore, a global total |
| * ordering of SequentiallyConsistent operations *can* be described. |
| * For example, if two cores perform SequentiallyConsistent operations |
| * on separate objects, one core will observably perform its update |
| * (and all previous operations will have completed), then the other |
| * core will observably perform its update (and all previous |
| * operations will have completed). (Although those previous |
| * operations aren't themselves ordered -- they could be intermixed, |
| * or ordered if they occur on atomic values with ordering |
| * requirements.) SequentiallyConsistent is the *simplest and safest* |
| * ordering of atomic operations -- it's always as if one operation |
| * happens, then another, then another, in some order -- and every |
| * core observes updates to happen in that single order. Because it |
| * has the most synchronization requirements, operations ordered this |
| * way also tend to be slowest. |
| * |
| * SequentiallyConsistent ordering can be desirable when multiple |
| * threads observe objects, and they all have to agree on the |
| * observable order of changes to them. People expect |
| * SequentiallyConsistent ordering, even if they shouldn't, when |
| * writing code, atomic or otherwise. SequentiallyConsistent is also |
| * the ordering of choice when designing lockless data structures. If |
| * you don't know what order to use, use this one. |
| */ |
| SequentiallyConsistent, |
| }; |
| |
| } // namespace mozilla |
| |
| // Build up the underlying intrinsics. |
| #ifdef MOZ_HAVE_CXX11_ATOMICS |
| |
| # include <atomic> |
| |
| namespace mozilla { |
| namespace detail { |
| |
| template<MemoryOrdering Order> struct AtomicOrderConstraints; |
| |
| template<> |
| struct AtomicOrderConstraints<Relaxed> |
| { |
| static const std::memory_order AtomicRMWOrder = std::memory_order_relaxed; |
| static const std::memory_order LoadOrder = std::memory_order_relaxed; |
| static const std::memory_order StoreOrder = std::memory_order_relaxed; |
| }; |
| |
| template<> |
| struct AtomicOrderConstraints<ReleaseAcquire> |
| { |
| static const std::memory_order AtomicRMWOrder = std::memory_order_acq_rel; |
| static const std::memory_order LoadOrder = std::memory_order_acquire; |
| static const std::memory_order StoreOrder = std::memory_order_release; |
| }; |
| |
| template<> |
| struct AtomicOrderConstraints<SequentiallyConsistent> |
| { |
| static const std::memory_order AtomicRMWOrder = std::memory_order_seq_cst; |
| static const std::memory_order LoadOrder = std::memory_order_seq_cst; |
| static const std::memory_order StoreOrder = std::memory_order_seq_cst; |
| }; |
| |
| template<typename T, MemoryOrdering Order> |
| struct IntrinsicBase |
| { |
| typedef std::atomic<T> ValueType; |
| typedef AtomicOrderConstraints<Order> OrderedOp; |
| }; |
| |
| template<typename T, MemoryOrdering Order> |
| struct IntrinsicMemoryOps : public IntrinsicBase<T, Order> |
| { |
| typedef IntrinsicBase<T, Order> Base; |
| static T load(const typename Base::ValueType& ptr) { |
| return ptr.load(Base::OrderedOp::LoadOrder); |
| } |
| static void store(typename Base::ValueType& ptr, T val) { |
| ptr.store(val, Base::OrderedOp::StoreOrder); |
| } |
| static T exchange(typename Base::ValueType& ptr, T val) { |
| return ptr.exchange(val, Base::OrderedOp::AtomicRMWOrder); |
| } |
| static bool compareExchange(typename Base::ValueType& ptr, T oldVal, T newVal) { |
| return ptr.compare_exchange_strong(oldVal, newVal, Base::OrderedOp::AtomicRMWOrder); |
| } |
| }; |
| |
| template<typename T, MemoryOrdering Order> |
| struct IntrinsicAddSub : public IntrinsicBase<T, Order> |
| { |
| typedef IntrinsicBase<T, Order> Base; |
| static T add(typename Base::ValueType& ptr, T val) { |
| return ptr.fetch_add(val, Base::OrderedOp::AtomicRMWOrder); |
| } |
| static T sub(typename Base::ValueType& ptr, T val) { |
| return ptr.fetch_sub(val, Base::OrderedOp::AtomicRMWOrder); |
| } |
| }; |
| |
| template<typename T, MemoryOrdering Order> |
| struct IntrinsicAddSub<T*, Order> : public IntrinsicBase<T*, Order> |
| { |
| typedef IntrinsicBase<T*, Order> Base; |
| static T* add(typename Base::ValueType& ptr, ptrdiff_t val) { |
| return ptr.fetch_add(fixupAddend(val), Base::OrderedOp::AtomicRMWOrder); |
| } |
| static T* sub(typename Base::ValueType& ptr, ptrdiff_t val) { |
| return ptr.fetch_sub(fixupAddend(val), Base::OrderedOp::AtomicRMWOrder); |
| } |
| private: |
| /* |
| * GCC 4.6's <atomic> header has a bug where adding X to an |
| * atomic<T*> is not the same as adding X to a T*. Hence the need |
| * for this function to provide the correct addend. |
| */ |
| static ptrdiff_t fixupAddend(ptrdiff_t val) { |
| #if defined(__clang__) || defined(_MSC_VER) |
| return val; |
| #elif defined(__GNUC__) && MOZ_GCC_VERSION_AT_LEAST(4, 6, 0) && \ |
| !MOZ_GCC_VERSION_AT_LEAST(4, 7, 0) |
| return val * sizeof(T); |
| #else |
| return val; |
| #endif |
| } |
| }; |
| |
| template<typename T, MemoryOrdering Order> |
| struct IntrinsicIncDec : public IntrinsicAddSub<T, Order> |
| { |
| typedef IntrinsicBase<T, Order> Base; |
| static T inc(typename Base::ValueType& ptr) { |
| return IntrinsicAddSub<T, Order>::add(ptr, 1); |
| } |
| static T dec(typename Base::ValueType& ptr) { |
| return IntrinsicAddSub<T, Order>::sub(ptr, 1); |
| } |
| }; |
| |
| template<typename T, MemoryOrdering Order> |
| struct AtomicIntrinsics : public IntrinsicMemoryOps<T, Order>, |
| public IntrinsicIncDec<T, Order> |
| { |
| typedef IntrinsicBase<T, Order> Base; |
| static T or_(typename Base::ValueType& ptr, T val) { |
| return ptr.fetch_or(val, Base::OrderedOp::AtomicRMWOrder); |
| } |
| static T xor_(typename Base::ValueType& ptr, T val) { |
| return ptr.fetch_xor(val, Base::OrderedOp::AtomicRMWOrder); |
| } |
| static T and_(typename Base::ValueType& ptr, T val) { |
| return ptr.fetch_and(val, Base::OrderedOp::AtomicRMWOrder); |
| } |
| }; |
| |
| template<typename T, MemoryOrdering Order> |
| struct AtomicIntrinsics<T*, Order> |
| : public IntrinsicMemoryOps<T*, Order>, public IntrinsicIncDec<T*, Order> |
| { |
| }; |
| |
| } // namespace detail |
| } // namespace mozilla |
| |
| #elif defined(__GNUC__) |
| |
| namespace mozilla { |
| namespace detail { |
| |
| /* |
| * The __sync_* family of intrinsics is documented here: |
| * |
| * http://gcc.gnu.org/onlinedocs/gcc-4.6.4/gcc/Atomic-Builtins.html |
| * |
| * While these intrinsics are deprecated in favor of the newer __atomic_* |
| * family of intrincs: |
| * |
| * http://gcc.gnu.org/onlinedocs/gcc-4.7.3/gcc/_005f_005fatomic-Builtins.html |
| * |
| * any GCC version that supports the __atomic_* intrinsics will also support |
| * the <atomic> header and so will be handled above. We provide a version of |
| * atomics using the __sync_* intrinsics to support older versions of GCC. |
| * |
| * All __sync_* intrinsics that we use below act as full memory barriers, for |
| * both compiler and hardware reordering, except for __sync_lock_test_and_set, |
| * which is a only an acquire barrier. When we call __sync_lock_test_and_set, |
| * we add a barrier above it as appropriate. |
| */ |
| |
| template<MemoryOrdering Order> struct Barrier; |
| |
| /* |
| * Some processors (in particular, x86) don't require quite so many calls to |
| * __sync_sychronize as our specializations of Barrier produce. If |
| * performance turns out to be an issue, defining these specializations |
| * on a per-processor basis would be a good first tuning step. |
| */ |
| |
| template<> |
| struct Barrier<Relaxed> |
| { |
| static void beforeLoad() {} |
| static void afterLoad() {} |
| static void beforeStore() {} |
| static void afterStore() {} |
| }; |
| |
| template<> |
| struct Barrier<ReleaseAcquire> |
| { |
| static void beforeLoad() {} |
| static void afterLoad() { __sync_synchronize(); } |
| static void beforeStore() { __sync_synchronize(); } |
| static void afterStore() {} |
| }; |
| |
| template<> |
| struct Barrier<SequentiallyConsistent> |
| { |
| static void beforeLoad() { __sync_synchronize(); } |
| static void afterLoad() { __sync_synchronize(); } |
| static void beforeStore() { __sync_synchronize(); } |
| static void afterStore() { __sync_synchronize(); } |
| }; |
| |
| template<typename T, MemoryOrdering Order> |
| struct IntrinsicMemoryOps |
| { |
| static T load(const T& ptr) { |
| Barrier<Order>::beforeLoad(); |
| T val = ptr; |
| Barrier<Order>::afterLoad(); |
| return val; |
| } |
| static void store(T& ptr, T val) { |
| Barrier<Order>::beforeStore(); |
| ptr = val; |
| Barrier<Order>::afterStore(); |
| } |
| static T exchange(T& ptr, T val) { |
| // __sync_lock_test_and_set is only an acquire barrier; loads and stores |
| // can't be moved up from after to before it, but they can be moved down |
| // from before to after it. We may want a stricter ordering, so we need |
| // an explicit barrier. |
| |
| Barrier<Order>::beforeStore(); |
| return __sync_lock_test_and_set(&ptr, val); |
| } |
| static bool compareExchange(T& ptr, T oldVal, T newVal) { |
| return __sync_bool_compare_and_swap(&ptr, oldVal, newVal); |
| } |
| }; |
| |
| template<typename T> |
| struct IntrinsicAddSub |
| { |
| typedef T ValueType; |
| static T add(T& ptr, T val) { |
| return __sync_fetch_and_add(&ptr, val); |
| } |
| static T sub(T& ptr, T val) { |
| return __sync_fetch_and_sub(&ptr, val); |
| } |
| }; |
| |
| template<typename T> |
| struct IntrinsicAddSub<T*> |
| { |
| typedef T* ValueType; |
| /* |
| * The reinterpret_casts are needed so that |
| * __sync_fetch_and_{add,sub} will properly type-check. |
| * |
| * Also, these functions do not provide standard semantics for |
| * pointer types, so we need to adjust the addend. |
| */ |
| static ValueType add(ValueType& ptr, ptrdiff_t val) { |
| ValueType amount = reinterpret_cast<ValueType>(val * sizeof(T)); |
| return __sync_fetch_and_add(&ptr, amount); |
| } |
| static ValueType sub(ValueType& ptr, ptrdiff_t val) { |
| ValueType amount = reinterpret_cast<ValueType>(val * sizeof(T)); |
| return __sync_fetch_and_sub(&ptr, amount); |
| } |
| }; |
| |
| template<typename T> |
| struct IntrinsicIncDec : public IntrinsicAddSub<T> |
| { |
| static T inc(T& ptr) { return IntrinsicAddSub<T>::add(ptr, 1); } |
| static T dec(T& ptr) { return IntrinsicAddSub<T>::sub(ptr, 1); } |
| }; |
| |
| template<typename T, MemoryOrdering Order> |
| struct AtomicIntrinsics : public IntrinsicMemoryOps<T, Order>, |
| public IntrinsicIncDec<T> |
| { |
| static T or_(T& ptr, T val) { |
| return __sync_fetch_and_or(&ptr, val); |
| } |
| static T xor_(T& ptr, T val) { |
| return __sync_fetch_and_xor(&ptr, val); |
| } |
| static T and_(T& ptr, T val) { |
| return __sync_fetch_and_and(&ptr, val); |
| } |
| }; |
| |
| template<typename T, MemoryOrdering Order> |
| struct AtomicIntrinsics<T*, Order> : public IntrinsicMemoryOps<T*, Order>, |
| public IntrinsicIncDec<T*> |
| { |
| }; |
| |
| } // namespace detail |
| } // namespace mozilla |
| |
| #elif defined(_MSC_VER) |
| |
| /* |
| * Windows comes with a full complement of atomic operations. |
| * Unfortunately, most of those aren't available for Windows XP (even if |
| * the compiler supports intrinsics for them), which is the oldest |
| * version of Windows we support. Therefore, we only provide operations |
| * on 32-bit datatypes for 32-bit Windows versions; for 64-bit Windows |
| * versions, we support 64-bit datatypes as well. |
| * |
| * To avoid namespace pollution issues, we declare whatever functions we |
| * need ourselves. |
| */ |
| |
| extern "C" { |
| long __cdecl _InterlockedExchangeAdd(long volatile* dst, long value); |
| long __cdecl _InterlockedOr(long volatile* dst, long value); |
| long __cdecl _InterlockedXor(long volatile* dst, long value); |
| long __cdecl _InterlockedAnd(long volatile* dst, long value); |
| long __cdecl _InterlockedExchange(long volatile *dst, long value); |
| long __cdecl _InterlockedCompareExchange(long volatile *dst, long newVal, long oldVal); |
| } |
| |
| # pragma intrinsic(_InterlockedExchangeAdd) |
| # pragma intrinsic(_InterlockedOr) |
| # pragma intrinsic(_InterlockedXor) |
| # pragma intrinsic(_InterlockedAnd) |
| # pragma intrinsic(_InterlockedExchange) |
| # pragma intrinsic(_InterlockedCompareExchange) |
| |
| namespace mozilla { |
| namespace detail { |
| |
| # if !defined(_M_IX86) && !defined(_M_X64) |
| /* |
| * The implementations below are optimized for x86ish systems. You |
| * will have to modify them if you are porting to Windows on a |
| * different architecture. |
| */ |
| # error "Unknown CPU type" |
| # endif |
| |
| /* |
| * The PrimitiveIntrinsics template should define |Type|, the datatype of size |
| * DataSize upon which we operate, and the following eight functions. |
| * |
| * static Type add(Type* ptr, Type val); |
| * static Type sub(Type* ptr, Type val); |
| * static Type or_(Type* ptr, Type val); |
| * static Type xor_(Type* ptr, Type val); |
| * static Type and_(Type* ptr, Type val); |
| * |
| * These functions perform the obvious operation on the value contained in |
| * |*ptr| combined with |val| and return the value previously stored in |
| * |*ptr|. |
| * |
| * static void store(Type* ptr, Type val); |
| * |
| * This function atomically stores |val| into |*ptr| and must provide a full |
| * memory fence after the store to prevent compiler and hardware instruction |
| * reordering. It should also act as a compiler barrier to prevent reads and |
| * writes from moving to after the store. |
| * |
| * static Type exchange(Type* ptr, Type val); |
| * |
| * This function atomically stores |val| into |*ptr| and returns the previous |
| * contents of *ptr; |
| * |
| * static bool compareExchange(Type* ptr, Type oldVal, Type newVal); |
| * |
| * This function atomically performs the following operation: |
| * |
| * if (*ptr == oldVal) { |
| * *ptr = newVal; |
| * return true; |
| * } else { |
| * return false; |
| * } |
| * |
| */ |
| template<size_t DataSize> struct PrimitiveIntrinsics; |
| |
| template<> |
| struct PrimitiveIntrinsics<4> |
| { |
| typedef long Type; |
| |
| static Type add(Type* ptr, Type val) { |
| return _InterlockedExchangeAdd(ptr, val); |
| } |
| static Type sub(Type* ptr, Type val) { |
| /* |
| * _InterlockedExchangeSubtract isn't available before Windows 7, |
| * and we must support Windows XP. |
| */ |
| return _InterlockedExchangeAdd(ptr, -val); |
| } |
| static Type or_(Type* ptr, Type val) { |
| return _InterlockedOr(ptr, val); |
| } |
| static Type xor_(Type* ptr, Type val) { |
| return _InterlockedXor(ptr, val); |
| } |
| static Type and_(Type* ptr, Type val) { |
| return _InterlockedAnd(ptr, val); |
| } |
| static void store(Type* ptr, Type val) { |
| _InterlockedExchange(ptr, val); |
| } |
| static Type exchange(Type* ptr, Type val) { |
| return _InterlockedExchange(ptr, val); |
| } |
| static bool compareExchange(Type* ptr, Type oldVal, Type newVal) { |
| return _InterlockedCompareExchange(ptr, newVal, oldVal) == oldVal; |
| } |
| }; |
| |
| # if defined(_M_X64) |
| |
| extern "C" { |
| long long __cdecl _InterlockedExchangeAdd64(long long volatile* dst, |
| long long value); |
| long long __cdecl _InterlockedOr64(long long volatile* dst, |
| long long value); |
| long long __cdecl _InterlockedXor64(long long volatile* dst, |
| long long value); |
| long long __cdecl _InterlockedAnd64(long long volatile* dst, |
| long long value); |
| long long __cdecl _InterlockedExchange64(long long volatile* dst, |
| long long value); |
| long long __cdecl _InterlockedCompareExchange64(long long volatile* dst, |
| long long newVal, |
| long long oldVal); |
| } |
| |
| # pragma intrinsic(_InterlockedExchangeAdd64) |
| # pragma intrinsic(_InterlockedOr64) |
| # pragma intrinsic(_InterlockedXor64) |
| # pragma intrinsic(_InterlockedAnd64) |
| # pragma intrinsic(_InterlockedExchange64) |
| # pragma intrinsic(_InterlockedCompareExchange64) |
| |
| template <> |
| struct PrimitiveIntrinsics<8> |
| { |
| typedef __int64 Type; |
| |
| static Type add(Type* ptr, Type val) { |
| return _InterlockedExchangeAdd64(ptr, val); |
| } |
| static Type sub(Type* ptr, Type val) { |
| /* |
| * There is no _InterlockedExchangeSubtract64. |
| */ |
| return _InterlockedExchangeAdd64(ptr, -val); |
| } |
| static Type or_(Type* ptr, Type val) { |
| return _InterlockedOr64(ptr, val); |
| } |
| static Type xor_(Type* ptr, Type val) { |
| return _InterlockedXor64(ptr, val); |
| } |
| static Type and_(Type* ptr, Type val) { |
| return _InterlockedAnd64(ptr, val); |
| } |
| static void store(Type* ptr, Type val) { |
| _InterlockedExchange64(ptr, val); |
| } |
| static Type exchange(Type* ptr, Type val) { |
| return _InterlockedExchange64(ptr, val); |
| } |
| static bool compareExchange(Type* ptr, Type oldVal, Type newVal) { |
| return _InterlockedCompareExchange64(ptr, newVal, oldVal) == oldVal; |
| } |
| }; |
| |
| # endif |
| |
| extern "C" { void _ReadWriteBarrier(); } |
| |
| # pragma intrinsic(_ReadWriteBarrier) |
| |
| template<MemoryOrdering Order> struct Barrier; |
| |
| /* |
| * We do not provide an afterStore method in Barrier, as Relaxed and |
| * ReleaseAcquire orderings do not require one, and the required barrier |
| * for SequentiallyConsistent is handled by PrimitiveIntrinsics. |
| */ |
| |
| template<> |
| struct Barrier<Relaxed> |
| { |
| static void beforeLoad() {} |
| static void afterLoad() {} |
| static void beforeStore() {} |
| }; |
| |
| template<> |
| struct Barrier<ReleaseAcquire> |
| { |
| static void beforeLoad() {} |
| static void afterLoad() { _ReadWriteBarrier(); } |
| static void beforeStore() { _ReadWriteBarrier(); } |
| }; |
| |
| template<> |
| struct Barrier<SequentiallyConsistent> |
| { |
| static void beforeLoad() { _ReadWriteBarrier(); } |
| static void afterLoad() { _ReadWriteBarrier(); } |
| static void beforeStore() { _ReadWriteBarrier(); } |
| }; |
| |
| template<typename PrimType, typename T> |
| struct CastHelper |
| { |
| static PrimType toPrimType(T val) { return static_cast<PrimType>(val); } |
| static T fromPrimType(PrimType val) { return static_cast<T>(val); } |
| }; |
| |
| template<typename PrimType, typename T> |
| struct CastHelper<PrimType, T*> |
| { |
| static PrimType toPrimType(T* val) { return reinterpret_cast<PrimType>(val); } |
| static T* fromPrimType(PrimType val) { return reinterpret_cast<T*>(val); } |
| }; |
| |
| template<typename T> |
| struct IntrinsicBase |
| { |
| typedef T ValueType; |
| typedef PrimitiveIntrinsics<sizeof(T)> Primitives; |
| typedef typename Primitives::Type PrimType; |
| MOZ_STATIC_ASSERT(sizeof(PrimType) == sizeof(T), |
| "Selection of PrimitiveIntrinsics was wrong"); |
| typedef CastHelper<PrimType, T> Cast; |
| }; |
| |
| template<typename T, MemoryOrdering Order> |
| struct IntrinsicMemoryOps : public IntrinsicBase<T> |
| { |
| static ValueType load(const ValueType& ptr) { |
| Barrier<Order>::beforeLoad(); |
| ValueType val = ptr; |
| Barrier<Order>::afterLoad(); |
| return val; |
| } |
| static void store(ValueType& ptr, ValueType val) { |
| // For SequentiallyConsistent, Primitives::store() will generate the |
| // proper memory fence. Everything else just needs a barrier before |
| // the store. |
| if (Order == SequentiallyConsistent) { |
| Primitives::store(reinterpret_cast<PrimType*>(&ptr), |
| Cast::toPrimType(val)); |
| } else { |
| Barrier<Order>::beforeStore(); |
| ptr = val; |
| } |
| } |
| static ValueType exchange(ValueType& ptr, ValueType val) { |
| PrimType oldval = |
| Primitives::exchange(reinterpret_cast<PrimType*>(&ptr), |
| Cast::toPrimType(val)); |
| return Cast::fromPrimType(oldval); |
| } |
| static bool compareExchange(ValueType& ptr, ValueType oldVal, ValueType newVal) { |
| return Primitives::compareExchange(reinterpret_cast<PrimType*>(&ptr), |
| Cast::toPrimType(oldVal), |
| Cast::toPrimType(newVal)); |
| } |
| }; |
| |
| template<typename T> |
| struct IntrinsicApplyHelper : public IntrinsicBase<T> |
| { |
| typedef PrimType (*BinaryOp)(PrimType*, PrimType); |
| typedef PrimType (*UnaryOp)(PrimType*); |
| |
| static ValueType applyBinaryFunction(BinaryOp op, ValueType& ptr, |
| ValueType val) { |
| PrimType* primTypePtr = reinterpret_cast<PrimType*>(&ptr); |
| PrimType primTypeVal = Cast::toPrimType(val); |
| return Cast::fromPrimType(op(primTypePtr, primTypeVal)); |
| } |
| |
| static ValueType applyUnaryFunction(UnaryOp op, ValueType& ptr) { |
| PrimType* primTypePtr = reinterpret_cast<PrimType*>(&ptr); |
| return Cast::fromPrimType(op(primTypePtr)); |
| } |
| }; |
| |
| template<typename T> |
| struct IntrinsicAddSub : public IntrinsicApplyHelper<T> |
| { |
| static ValueType add(ValueType& ptr, ValueType val) { |
| return applyBinaryFunction(&Primitives::add, ptr, val); |
| } |
| static ValueType sub(ValueType& ptr, ValueType val) { |
| return applyBinaryFunction(&Primitives::sub, ptr, val); |
| } |
| }; |
| |
| template<typename T> |
| struct IntrinsicAddSub<T*> : public IntrinsicApplyHelper<T*> |
| { |
| static ValueType add(ValueType& ptr, ptrdiff_t amount) { |
| return applyBinaryFunction(&Primitives::add, ptr, |
| (ValueType)(amount * sizeof(ValueType))); |
| } |
| static ValueType sub(ValueType& ptr, ptrdiff_t amount) { |
| return applyBinaryFunction(&Primitives::sub, ptr, |
| (ValueType)(amount * sizeof(ValueType))); |
| } |
| }; |
| |
| template<typename T> |
| struct IntrinsicIncDec : public IntrinsicAddSub<T> |
| { |
| static ValueType inc(ValueType& ptr) { return add(ptr, 1); } |
| static ValueType dec(ValueType& ptr) { return sub(ptr, 1); } |
| }; |
| |
| template<typename T, MemoryOrdering Order> |
| struct AtomicIntrinsics : public IntrinsicMemoryOps<T, Order>, |
| public IntrinsicIncDec<T> |
| { |
| static ValueType or_(ValueType& ptr, T val) { |
| return applyBinaryFunction(&Primitives::or_, ptr, val); |
| } |
| static ValueType xor_(ValueType& ptr, T val) { |
| return applyBinaryFunction(&Primitives::xor_, ptr, val); |
| } |
| static ValueType and_(ValueType& ptr, T val) { |
| return applyBinaryFunction(&Primitives::and_, ptr, val); |
| } |
| }; |
| |
| template<typename T, MemoryOrdering Order> |
| struct AtomicIntrinsics<T*, Order> : public IntrinsicMemoryOps<T*, Order>, |
| public IntrinsicIncDec<T*> |
| { |
| }; |
| |
| } // namespace detail |
| } // namespace mozilla |
| |
| #else |
| # error "Atomic compiler intrinsics are not supported on your platform" |
| #endif |
| |
| namespace mozilla { |
| |
| namespace detail { |
| |
| template<typename T, MemoryOrdering Order> |
| class AtomicBase |
| { |
| protected: |
| typedef typename detail::AtomicIntrinsics<T, Order> Intrinsics; |
| typename Intrinsics::ValueType mValue; |
| |
| public: |
| AtomicBase() : mValue() {} |
| AtomicBase(T aInit) { Intrinsics::store(mValue, aInit); } |
| |
| T operator++(int) { return Intrinsics::inc(mValue); } |
| T operator--(int) { return Intrinsics::dec(mValue); } |
| T operator++() { return Intrinsics::inc(mValue) + 1; } |
| T operator--() { return Intrinsics::dec(mValue) - 1; } |
| |
| operator T() const { return Intrinsics::load(mValue); } |
| |
| /** |
| * Performs an atomic swap operation. aValue is stored and the previous |
| * value of this variable is returned. |
| */ |
| T exchange(T aValue) { |
| return Intrinsics::exchange(mValue, aValue); |
| } |
| /** |
| * Performs an atomic compare-and-swap operation and returns true if it |
| * succeeded. This is equivalent to atomically doing |
| * |
| * if (mValue == aOldValue) { |
| * mValue = aNewValue; |
| * return true; |
| * } else { |
| * return false; |
| * } |
| */ |
| bool compareExchange(T aOldValue, T aNewValue) { |
| return Intrinsics::compareExchange(mValue, aOldValue, aNewValue); |
| } |
| |
| private: |
| template<MemoryOrdering AnyOrder> |
| AtomicBase(const AtomicBase<T, AnyOrder>& aCopy) MOZ_DELETE; |
| }; |
| |
| } // namespace detail |
| |
| /** |
| * A wrapper for a type that enforces that all memory accesses are atomic. |
| * |
| * In general, where a variable |T foo| exists, |Atomic<T> foo| can be |
| * used in its place. In addition to atomic store and load operations, |
| * compound assignment and increment/decrement operators are implemented |
| * which perform the corresponding read-modify-write operation |
| * atomically. Finally, an atomic swap method is provided. |
| * |
| * Atomic accesses are sequentially consistent by default. You should |
| * use the default unless you are tall enough to ride the |
| * memory-ordering roller coaster (if you're not sure, you aren't) and |
| * you have a compelling reason to do otherwise. |
| * |
| * There is one exception to the case of atomic memory accesses: providing an |
| * initial value of the atomic value is not guaranteed to be atomic. This is a |
| * deliberate design choice that enables static atomic variables to be declared |
| * without introducing extra static constructors. |
| */ |
| template<typename T, MemoryOrdering Order = SequentiallyConsistent> |
| class Atomic : public detail::AtomicBase<T, Order> |
| { |
| // We only support 32-bit types on 32-bit Windows, which constrains our |
| // implementation elsewhere. But we support pointer-sized types everywhere. |
| MOZ_STATIC_ASSERT(sizeof(T) == 4 || (sizeof(uintptr_t) == 8 && sizeof(T) == 8), |
| "mozilla/Atomics.h only supports 32-bit and pointer-sized types"); |
| // Regardless of the OS, we only support integral types here. |
| MOZ_STATIC_ASSERT(IsIntegral<T>::value, "can only have integral atomic variables"); |
| |
| typedef typename detail::AtomicBase<T, Order> Base; |
| |
| public: |
| Atomic() : detail::AtomicBase<T, Order>() {} |
| Atomic(T aInit) : detail::AtomicBase<T, Order>(aInit) {} |
| |
| T operator+=(T delta) { return Base::Intrinsics::add(Base::mValue, delta) + delta; } |
| T operator-=(T delta) { return Base::Intrinsics::sub(Base::mValue, delta) - delta; } |
| T operator|=(T val) { return Base::Intrinsics::or_(Base::mValue, val) | val; } |
| T operator^=(T val) { return Base::Intrinsics::xor_(Base::mValue, val) ^ val; } |
| T operator&=(T val) { return Base::Intrinsics::and_(Base::mValue, val) & val; } |
| |
| T operator=(T aValue) { |
| Base::Intrinsics::store(Base::mValue, aValue); |
| return aValue; |
| } |
| |
| private: |
| Atomic(Atomic<T, Order>& aOther) MOZ_DELETE; |
| }; |
| |
| /** |
| * A partial specialization of Atomic for pointer variables. |
| * |
| * Like Atomic<T>, Atomic<T*> is equivalent in most respects to a regular T* |
| * variable. An atomic compare-and-swap primitive for pointer variables is |
| * provided, as are atomic increment and decement operators. Also provided |
| * are the compound assignment operators for addition and subtraction. |
| * Atomic swap (via exchange()) is included as well. |
| * |
| * Atomic accesses are sequentially consistent by default. You should |
| * use the default unless you are tall enough to ride the |
| * memory-ordering roller coaster (if you're not sure, you aren't) and |
| * you have a compelling reason to do otherwise. |
| * |
| * There is one exception to the case of atomic memory accesses: providing an |
| * initial value of the atomic value is not guaranteed to be atomic. This is a |
| * deliberate design choice that enables static atomic variables to be declared |
| * without introducing extra static constructors. |
| */ |
| template<typename T, MemoryOrdering Order> |
| class Atomic<T*, Order> : public detail::AtomicBase<T*, Order> |
| { |
| typedef typename detail::AtomicBase<T*, Order> Base; |
| |
| public: |
| Atomic() : detail::AtomicBase<T*, Order>() {} |
| Atomic(T* aInit) : detail::AtomicBase<T*, Order>(aInit) {} |
| |
| T* operator +=(ptrdiff_t delta) { |
| return Base::Intrinsics::add(Base::mValue, delta) + delta; |
| } |
| T* operator -=(ptrdiff_t delta) { |
| return Base::Intrinsics::sub(Base::mValue, delta) - delta; |
| } |
| |
| T* operator=(T* aValue) { |
| Base::Intrinsics::store(Base::mValue, aValue); |
| return aValue; |
| } |
| |
| private: |
| Atomic(Atomic<T*, Order>& aOther) MOZ_DELETE; |
| }; |
| |
| } // namespace mozilla |
| |
| #endif /* mozilla_Atomics_h_ */ |