blob: 99f60e2c3a59014db014474303be971f81cefaae [file] [log] [blame]
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
* vim: set ts=8 sts=4 et sw=4 tw=99:
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
/* For overall documentation, see jit/AtomicOperations.h */
#ifndef jit_shared_AtomicOperations_x86_shared_h
#define jit_shared_AtomicOperations_x86_shared_h
#include "mozilla/Assertions.h"
#include "mozilla/Types.h"
// Lock-freedom on x86 and x64:
//
// On x86 and x64 there are atomic instructions for 8-byte accesses:
//
// Load and stores:
// - Loads and stores are single-copy atomic for up to 8 bytes
// starting with the Pentium; the store requires a post-fence for
// sequential consistency
//
// CompareExchange:
// - On x64 CMPXCHGQ can always be used
// - On x86 CMPXCHG8B can be used starting with the first Pentium
//
// Exchange:
// - On x64 XCHGQ can always be used
// - On x86 one has to use a CompareExchange loop
//
// Observe also that the JIT will not be enabled unless we have SSE2,
// which was introduced with the Pentium 4. Ergo the JIT will be able
// to use atomic instructions for up to 8 bytes on all x86 platforms
// for the primitives we care about.
//
// However, C++ compilers and libraries may not provide access to
// those 8-byte instructions directly. Clang in 32-bit mode does not
// provide 8-byte atomic primitives at all (even with eg -arch i686
// specified). On Windows 32-bit, MSVC does not provide
// _InterlockedExchange64 since it does not map directly to an
// instruction.
//
// There are thus sundry workarounds below to handle known corner
// cases.
#if defined(__clang__) || defined(__GNUC__)
// The default implementation tactic for gcc/clang is to use the newer
// __atomic intrinsics added for use in C++11 <atomic>. Where that
// isn't available, we use GCC's older __sync functions instead.
//
// ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS is kept as a backward
// compatible option for older compilers: enable this to use GCC's old
// __sync functions instead of the newer __atomic functions. This
// will be required for GCC 4.6.x and earlier, and probably for Clang
// 3.1, should we need to use those versions.
// #define ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS
// Lock-free 8-byte atomics are assumed on x86 but must be disabled in
// corner cases, see comments below and in isLockfree8().
# define LOCKFREE8
// This pertains to Clang compiling with -m32, in this case the 64-bit
// __atomic builtins are not available (observed on various Mac OS X
// versions with Apple Clang and on Linux with Clang 3.5).
//
// For now just punt: disable lock-free 8-word data. The JIT will
// call isLockfree8() to determine what to do and will stay in sync.
// (Bug 1146817 tracks the work to improve on this.)
# if defined(__clang__) && defined(__i386)
# undef LOCKFREE8
# endif
inline bool
js::jit::AtomicOperations::isLockfree8()
{
# ifndef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS
MOZ_ASSERT(__atomic_always_lock_free(sizeof(int8_t), 0));
MOZ_ASSERT(__atomic_always_lock_free(sizeof(int16_t), 0));
MOZ_ASSERT(__atomic_always_lock_free(sizeof(int32_t), 0));
# endif
# ifdef LOCKFREE8
# ifndef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS
MOZ_ASSERT(__atomic_always_lock_free(sizeof(int64_t), 0));
# endif
return true;
# else
return false;
# endif
}
inline void
js::jit::AtomicOperations::fenceSeqCst()
{
# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS
__sync_synchronize();
# else
__atomic_thread_fence(__ATOMIC_SEQ_CST);
# endif
}
template<typename T>
inline T
js::jit::AtomicOperations::loadSeqCst(T* addr)
{
MOZ_ASSERT(sizeof(T) < 8 || isLockfree8());
# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS
// Inhibit compiler reordering with a volatile load. The x86 does
// not reorder loads with respect to subsequent loads or stores
// and no ordering barrier is required here. See more elaborate
// comments in storeSeqCst.
T v = *static_cast<T volatile*>(addr);
# else
T v;
__atomic_load(addr, &v, __ATOMIC_SEQ_CST);
# endif
return v;
}
# ifndef LOCKFREE8
template<>
inline int64_t
js::jit::AtomicOperations::loadSeqCst(int64_t* addr)
{
MOZ_CRASH();
}
template<>
inline uint64_t
js::jit::AtomicOperations::loadSeqCst(uint64_t* addr)
{
MOZ_CRASH();
}
# endif // LOCKFREE8
template<typename T>
inline void
js::jit::AtomicOperations::storeSeqCst(T* addr, T val)
{
MOZ_ASSERT(sizeof(T) < 8 || isLockfree8());
# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS
// Inhibit compiler reordering with a volatile store. The x86 may
// reorder a store with respect to a subsequent load from a
// different location, hence there is an ordering barrier here to
// prevent that.
//
// By way of background, look to eg
// http://bartoszmilewski.com/2008/11/05/who-ordered-memory-fences-on-an-x86/
//
// Consider:
//
// uint8_t x = 0, y = 0; // to start
//
// thread1:
// sx: AtomicOperations::store(&x, 1);
// gy: uint8_t obs1 = AtomicOperations::loadSeqCst(&y);
//
// thread2:
// sy: AtomicOperations::store(&y, 1);
// gx: uint8_t obs2 = AtomicOperations::loadSeqCst(&x);
//
// Sequential consistency requires a total global ordering of
// operations: sx-gy-sy-gx, sx-sy-gx-gy, sx-sy-gy-gx, sy-gx-sx-gy,
// sy-sx-gy-gx, or sy-sx-gx-gy. In every ordering at least one of
// sx-before-gx or sy-before-gy happens, so *at least one* of
// obs1/obs2 is 1.
//
// If AtomicOperations::{load,store}SeqCst were just volatile
// {load,store}, x86 could reorder gx/gy before each thread's
// prior load. That would permit gx-gy-sx-sy: both loads would be
// 0! Thus after a volatile store we must synchronize to ensure
// the store happens before the load.
*static_cast<T volatile*>(addr) = val;
__sync_synchronize();
# else
__atomic_store(addr, &val, __ATOMIC_SEQ_CST);
# endif
}
# ifndef LOCKFREE8
template<>
inline void
js::jit::AtomicOperations::storeSeqCst(int64_t* addr, int64_t val)
{
MOZ_CRASH();
}
template<>
inline void
js::jit::AtomicOperations::storeSeqCst(uint64_t* addr, uint64_t val)
{
MOZ_CRASH();
}
# endif // LOCKFREE8
template<typename T>
inline T
js::jit::AtomicOperations::exchangeSeqCst(T* addr, T val)
{
MOZ_ASSERT(sizeof(T) < 8 || isLockfree8());
# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS
T v;
do {
// Here I assume the compiler will not hoist the load. It
// shouldn't, because the CAS could affect* addr.
v = *addr;
} while (!__sync_bool_compare_and_swap(addr, v, val));
return v;
# else
T v;
__atomic_exchange(addr, &val, &v, __ATOMIC_SEQ_CST);
return v;
# endif
}
# ifndef LOCKFREE8
template<>
inline int64_t
js::jit::AtomicOperations::exchangeSeqCst(int64_t* addr, int64_t val)
{
MOZ_CRASH();
}
template<>
inline uint64_t
js::jit::AtomicOperations::exchangeSeqCst(uint64_t* addr, uint64_t val)
{
MOZ_CRASH();
}
# endif // LOCKFREE8
template<typename T>
inline T
js::jit::AtomicOperations::compareExchangeSeqCst(T* addr, T oldval, T newval)
{
MOZ_ASSERT(sizeof(T) < 8 || isLockfree8());
# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS
return __sync_val_compare_and_swap(addr, oldval, newval);
# else
__atomic_compare_exchange(addr, &oldval, &newval, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
return oldval;
# endif
}
# ifndef LOCKFREE8
template<>
inline int64_t
js::jit::AtomicOperations::compareExchangeSeqCst(int64_t* addr, int64_t oldval, int64_t newval)
{
MOZ_CRASH();
}
template<>
inline uint64_t
js::jit::AtomicOperations::compareExchangeSeqCst(uint64_t* addr, uint64_t oldval, uint64_t newval)
{
MOZ_CRASH();
}
# endif // LOCKFREE8
template<typename T>
inline T
js::jit::AtomicOperations::fetchAddSeqCst(T* addr, T val)
{
static_assert(sizeof(T) <= 4, "not available for 8-byte values yet");
# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS
return __sync_fetch_and_add(addr, val);
# else
return __atomic_fetch_add(addr, val, __ATOMIC_SEQ_CST);
# endif
}
template<typename T>
inline T
js::jit::AtomicOperations::fetchSubSeqCst(T* addr, T val)
{
static_assert(sizeof(T) <= 4, "not available for 8-byte values yet");
# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS
return __sync_fetch_and_sub(addr, val);
# else
return __atomic_fetch_sub(addr, val, __ATOMIC_SEQ_CST);
# endif
}
template<typename T>
inline T
js::jit::AtomicOperations::fetchAndSeqCst(T* addr, T val)
{
static_assert(sizeof(T) <= 4, "not available for 8-byte values yet");
# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS
return __sync_fetch_and_and(addr, val);
# else
return __atomic_fetch_and(addr, val, __ATOMIC_SEQ_CST);
# endif
}
template<typename T>
inline T
js::jit::AtomicOperations::fetchOrSeqCst(T* addr, T val)
{
static_assert(sizeof(T) <= 4, "not available for 8-byte values yet");
# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS
return __sync_fetch_and_or(addr, val);
# else
return __atomic_fetch_or(addr, val, __ATOMIC_SEQ_CST);
# endif
}
template<typename T>
inline T
js::jit::AtomicOperations::fetchXorSeqCst(T* addr, T val)
{
static_assert(sizeof(T) <= 4, "not available for 8-byte values yet");
# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS
return __sync_fetch_and_xor(addr, val);
# else
return __atomic_fetch_xor(addr, val, __ATOMIC_SEQ_CST);
# endif
}
template<typename T>
inline T
js::jit::AtomicOperations::loadSafeWhenRacy(T* addr)
{
return *addr; // FIXME (1208663): not yet safe
}
template<typename T>
inline void
js::jit::AtomicOperations::storeSafeWhenRacy(T* addr, T val)
{
*addr = val; // FIXME (1208663): not yet safe
}
inline void
js::jit::AtomicOperations::memcpySafeWhenRacy(void* dest, const void* src, size_t nbytes)
{
::memcpy(dest, src, nbytes); // FIXME (1208663): not yet safe
}
inline void
js::jit::AtomicOperations::memmoveSafeWhenRacy(void* dest, const void* src, size_t nbytes)
{
::memmove(dest, src, nbytes); // FIXME (1208663): not yet safe
}
template<size_t nbytes>
inline void
js::jit::RegionLock::acquire(void* addr)
{
# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS
while (!__sync_bool_compare_and_swap(&spinlock, 0, 1))
continue;
# else
uint32_t zero = 0;
uint32_t one = 1;
while (!__atomic_compare_exchange(&spinlock, &zero, &one, false, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE)) {
zero = 0;
continue;
}
# endif
}
template<size_t nbytes>
inline void
js::jit::RegionLock::release(void* addr)
{
MOZ_ASSERT(AtomicOperations::loadSeqCst(&spinlock) == 1, "releasing unlocked region lock");
# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS
__sync_sub_and_fetch(&spinlock, 1); // Should turn into LOCK XADD
# else
uint32_t zero = 0;
__atomic_store(&spinlock, &zero, __ATOMIC_SEQ_CST);
# endif
}
# undef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS
# undef LOCKFREE8
#elif defined(_MSC_VER)
// On 32-bit CPUs there is no 64-bit XCHG instruction, one must
// instead use a loop with CMPXCHG8B. Since MSVC provides
// _InterlockedExchange64 only if it maps directly to XCHG, the
// workaround must be manual.
# define HAVE_EXCHANGE64
# if !_WIN64
# undef HAVE_EXCHANGE64
# endif
// Below, _ReadWriteBarrier is a compiler directive, preventing
// reordering of instructions and reuse of memory values across it.
inline bool
js::jit::AtomicOperations::isLockfree8()
{
// See general comments at the start of this file.
//
// The MSDN docs suggest very strongly that if code is compiled for
// Pentium or better the 64-bit primitives will be lock-free, see
// eg the "Remarks" secion of the page for _InterlockedCompareExchange64,
// currently here:
// https://msdn.microsoft.com/en-us/library/ttk2z1ws%28v=vs.85%29.aspx
//
// But I've found no way to assert that at compile time or run time,
// there appears to be no WinAPI is_lock_free() test.
return true;
}
inline void
js::jit::AtomicOperations::fenceSeqCst()
{
_ReadWriteBarrier();
# if JS_BITS_PER_WORD == 32
// If configured for SSE2+ we can use the MFENCE instruction, available
// through the _mm_mfence intrinsic. But for non-SSE2 systems we have
// to do something else. Linux uses "lock add [esp], 0", so why not?
__asm lock add [esp], 0;
# else
_mm_mfence();
# endif
}
template<typename T>
inline T
js::jit::AtomicOperations::loadSeqCst(T* addr)
{
MOZ_ASSERT(sizeof(T) < 8 || isLockfree8());
_ReadWriteBarrier();
T v = *addr;
_ReadWriteBarrier();
return v;
}
template<typename T>
inline void
js::jit::AtomicOperations::storeSeqCst(T* addr, T val)
{
MOZ_ASSERT(sizeof(T) < 8 || isLockfree8());
_ReadWriteBarrier();
*addr = val;
fenceSeqCst();
}
# define MSC_EXCHANGEOP(T, U, xchgop) \
template<> inline T \
js::jit::AtomicOperations::exchangeSeqCst(T* addr, T val) { \
MOZ_ASSERT(sizeof(T) < 8 || isLockfree8()); \
return (T)xchgop((U volatile*)addr, (U)val); \
}
# define MSC_EXCHANGEOP_CAS(T, U, cmpxchg) \
template<> inline T \
js::jit::AtomicOperations::exchangeSeqCst(T* addr, T newval) { \
MOZ_ASSERT(sizeof(T) < 8 || isLockfree8()); \
T oldval; \
do { \
_ReadWriteBarrier(); \
oldval = *addr; \
} while (!cmpxchg((U volatile*)addr, (U)newval, (U)oldval)); \
return oldval; \
}
MSC_EXCHANGEOP(int8_t, char, _InterlockedExchange8)
MSC_EXCHANGEOP(uint8_t, char, _InterlockedExchange8)
MSC_EXCHANGEOP(int16_t, short, _InterlockedExchange16)
MSC_EXCHANGEOP(uint16_t, short, _InterlockedExchange16)
MSC_EXCHANGEOP(int32_t, long, _InterlockedExchange)
MSC_EXCHANGEOP(uint32_t, long, _InterlockedExchange)
# ifdef HAVE_EXCHANGE64
MSC_EXCHANGEOP(int64_t, __int64, _InterlockedExchange64)
MSC_EXCHANGEOP(uint64_t, __int64, _InterlockedExchange64)
# else
MSC_EXCHANGEOP_CAS(int64_t, __int64, _InterlockedCompareExchange64)
MSC_EXCHANGEOP_CAS(uint64_t, __int64, _InterlockedCompareExchange64)
# endif
# undef MSC_EXCHANGEOP
# undef MSC_EXCHANGEOP_CAS
# define MSC_CAS(T, U, cmpxchg) \
template<> inline T \
js::jit::AtomicOperations::compareExchangeSeqCst(T* addr, T oldval, T newval) { \
MOZ_ASSERT(sizeof(T) < 8 || isLockfree8()); \
return (T)cmpxchg((U volatile*)addr, (U)newval, (U)oldval); \
}
MSC_CAS(int8_t, char, _InterlockedCompareExchange8)
MSC_CAS(uint8_t, char, _InterlockedCompareExchange8)
MSC_CAS(int16_t, short, _InterlockedCompareExchange16)
MSC_CAS(uint16_t, short, _InterlockedCompareExchange16)
MSC_CAS(int32_t, long, _InterlockedCompareExchange)
MSC_CAS(uint32_t, long, _InterlockedCompareExchange)
MSC_CAS(int64_t, __int64, _InterlockedCompareExchange64)
MSC_CAS(uint64_t, __int64, _InterlockedCompareExchange64)
# undef MSC_CAS
# define MSC_FETCHADDOP(T, U, xadd) \
template<> inline T \
js::jit::AtomicOperations::fetchAddSeqCst(T* addr, T val) { \
static_assert(sizeof(T) <= 4, "not available for 8-byte values yet"); \
return (T)xadd((U volatile*)addr, (U)val); \
} \
template<> inline T \
js::jit::AtomicOperations::fetchSubSeqCst(T* addr, T val) { \
static_assert(sizeof(T) <= 4, "not available for 8-byte values yet"); \
return (T)xadd((U volatile*)addr, -(U)val); \
}
MSC_FETCHADDOP(int8_t, char, _InterlockedExchangeAdd8)
MSC_FETCHADDOP(uint8_t, char, _InterlockedExchangeAdd8)
MSC_FETCHADDOP(int16_t, short, _InterlockedExchangeAdd16)
MSC_FETCHADDOP(uint16_t, short, _InterlockedExchangeAdd16)
MSC_FETCHADDOP(int32_t, long, _InterlockedExchangeAdd)
MSC_FETCHADDOP(uint32_t, long, _InterlockedExchangeAdd)
# undef MSC_FETCHADDOP
# define MSC_FETCHBITOP(T, U, andop, orop, xorop) \
template<> inline T \
js::jit::AtomicOperations::fetchAndSeqCst(T* addr, T val) { \
static_assert(sizeof(T) <= 4, "not available for 8-byte values yet"); \
return (T)andop((U volatile*)addr, (U)val); \
} \
template<> inline T \
js::jit::AtomicOperations::fetchOrSeqCst(T* addr, T val) { \
static_assert(sizeof(T) <= 4, "not available for 8-byte values yet"); \
return (T)orop((U volatile*)addr, (U)val); \
} \
template<> inline T \
js::jit::AtomicOperations::fetchXorSeqCst(T* addr, T val) { \
static_assert(sizeof(T) <= 4, "not available for 8-byte values yet"); \
return (T)xorop((U volatile*)addr, (U)val); \
}
MSC_FETCHBITOP(int8_t, char, _InterlockedAnd8, _InterlockedOr8, _InterlockedXor8)
MSC_FETCHBITOP(uint8_t, char, _InterlockedAnd8, _InterlockedOr8, _InterlockedXor8)
MSC_FETCHBITOP(int16_t, short, _InterlockedAnd16, _InterlockedOr16, _InterlockedXor16)
MSC_FETCHBITOP(uint16_t, short, _InterlockedAnd16, _InterlockedOr16, _InterlockedXor16)
MSC_FETCHBITOP(int32_t, long, _InterlockedAnd, _InterlockedOr, _InterlockedXor)
MSC_FETCHBITOP(uint32_t, long, _InterlockedAnd, _InterlockedOr, _InterlockedXor)
# undef MSC_FETCHBITOP
template<typename T>
inline T
js::jit::AtomicOperations::loadSafeWhenRacy(T* addr)
{
return *addr; // FIXME (1208663): not yet safe
}
template<typename T>
inline void
js::jit::AtomicOperations::storeSafeWhenRacy(T* addr, T val)
{
*addr = val; // FIXME (1208663): not yet safe
}
inline void
js::jit::AtomicOperations::memcpySafeWhenRacy(void* dest, const void* src, size_t nbytes)
{
::memcpy(dest, src, nbytes); // FIXME (1208663): not yet safe
}
inline void
js::jit::AtomicOperations::memmoveSafeWhenRacy(void* dest, const void* src, size_t nbytes)
{
::memmove(dest, src, nbytes); // FIXME (1208663): not yet safe
}
template<size_t nbytes>
inline void
js::jit::RegionLock::acquire(void* addr)
{
while (_InterlockedCompareExchange((long*)&spinlock, /*newval=*/1, /*oldval=*/0) == 1)
continue;
}
template<size_t nbytes>
inline void
js::jit::RegionLock::release(void* addr)
{
MOZ_ASSERT(AtomicOperations::loadSeqCst(&spinlock) == 1, "releasing unlocked region lock");
_InterlockedExchange((long*)&spinlock, 0);
}
# undef HAVE_EXCHANGE64
#elif defined(ENABLE_SHARED_ARRAY_BUFFER)
# error "Either disable JS shared memory, use GCC, Clang, or MSVC, or add code here"
#endif // platform
#endif // jit_shared_AtomicOperations_x86_shared_h