blob: 8ded2e24ca579f1ae55ab724f2677565ff7e223e [file] [log] [blame]
// Copyright 2020 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_ALLOCATOR_PARTITION_ALLOCATOR_SPINNING_MUTEX_H_
#define BASE_ALLOCATOR_PARTITION_ALLOCATOR_SPINNING_MUTEX_H_
#include <algorithm>
#include <atomic>
#include "base/allocator/partition_allocator/partition_alloc_base/compiler_specific.h"
#include "base/allocator/partition_allocator/partition_alloc_base/component_export.h"
#include "base/allocator/partition_allocator/partition_alloc_base/thread_annotations.h"
#include "base/allocator/partition_allocator/partition_alloc_check.h"
#include "base/allocator/partition_allocator/partition_alloc_config.h"
#include "base/allocator/partition_allocator/yield_processor.h"
#include "build/build_config.h"
#if BUILDFLAG(IS_WIN)
#include "base/allocator/partition_allocator/partition_alloc_base/win/windows_types.h"
#endif
#if BUILDFLAG(IS_POSIX)
#include <errno.h>
#include <pthread.h>
#endif
#if BUILDFLAG(IS_APPLE)
#include <os/lock.h>
#endif // BUILDFLAG(IS_APPLE)
#if BUILDFLAG(IS_FUCHSIA)
#include <lib/sync/mutex.h>
#endif
namespace partition_alloc::internal {
// The behavior of this class depends on whether PA_HAS_FAST_MUTEX is defined.
// 1. When it is defined:
//
// Simple spinning lock. It will spin in user space a set number of times before
// going into the kernel to sleep.
//
// This is intended to give "the best of both worlds" between a SpinLock and
// base::Lock:
// - SpinLock: Inlined fast path, no external function calls, just
// compare-and-swap. Short waits do not go into the kernel. Good behavior in
// low contention cases.
// - base::Lock: Good behavior in case of contention.
//
// We don't rely on base::Lock which we could make spin (by calling Try() in a
// loop), as performance is below a custom spinlock as seen on high-level
// benchmarks. Instead this implements a simple non-recursive mutex on top of
// the futex() syscall on Linux, SRWLock on Windows, os_unfair_lock on macOS,
// and pthread_mutex on POSIX. The main difference between this and a libc
// implementation is that it only supports the simplest path: private (to a
// process), non-recursive mutexes with no priority inheritance, no timed waits.
//
// As an interesting side-effect to be used in the allocator, this code does not
// make any allocations, locks are small with a constexpr constructor and no
// destructor.
//
// 2. Otherwise: This is a simple SpinLock, in the sense that it does not have
// any awareness of other threads' behavior.
class PA_LOCKABLE PA_COMPONENT_EXPORT(PARTITION_ALLOC) SpinningMutex {
public:
inline constexpr SpinningMutex();
PA_ALWAYS_INLINE void Acquire() PA_EXCLUSIVE_LOCK_FUNCTION();
PA_ALWAYS_INLINE void Release() PA_UNLOCK_FUNCTION();
PA_ALWAYS_INLINE bool Try() PA_EXCLUSIVE_TRYLOCK_FUNCTION(true);
void AssertAcquired() const {} // Not supported.
void Reinit() PA_UNLOCK_FUNCTION();
private:
PA_NOINLINE void AcquireSpinThenBlock() PA_EXCLUSIVE_LOCK_FUNCTION();
#if PA_CONFIG(HAS_FAST_MUTEX)
void LockSlow() PA_EXCLUSIVE_LOCK_FUNCTION();
#else
PA_ALWAYS_INLINE void LockSlow() PA_EXCLUSIVE_LOCK_FUNCTION();
#endif
// See below, the latency of PA_YIELD_PROCESSOR can be as high as ~150
// cycles. Meanwhile, sleeping costs a few us. Spinning 64 times at 3GHz would
// cost 150 * 64 / 3e9 ~= 3.2us.
//
// This applies to Linux kernels, on x86_64. On ARM we might want to spin
// more.
static constexpr int kSpinCount = 64;
#if PA_CONFIG(HAS_FAST_MUTEX)
#if PA_CONFIG(HAS_LINUX_KERNEL)
void FutexWait();
void FutexWake();
static constexpr int kUnlocked = 0;
static constexpr int kLockedUncontended = 1;
static constexpr int kLockedContended = 2;
std::atomic<int32_t> state_{kUnlocked};
#elif BUILDFLAG(IS_WIN)
PA_CHROME_SRWLOCK lock_ = SRWLOCK_INIT;
#elif BUILDFLAG(IS_APPLE)
os_unfair_lock unfair_lock_ = OS_UNFAIR_LOCK_INIT;
#elif BUILDFLAG(IS_POSIX)
pthread_mutex_t lock_ = PTHREAD_MUTEX_INITIALIZER;
#elif BUILDFLAG(IS_FUCHSIA)
sync_mutex lock_;
#endif
#else // PA_CONFIG(HAS_FAST_MUTEX)
std::atomic<bool> lock_{false};
// Spinlock-like, fallback.
PA_ALWAYS_INLINE bool TrySpinLock();
PA_ALWAYS_INLINE void ReleaseSpinLock();
void LockSlowSpinLock();
#endif // PA_CONFIG(HAS_FAST_MUTEX)
};
PA_ALWAYS_INLINE void SpinningMutex::Acquire() {
// Not marked PA_LIKELY(), as:
// 1. We don't know how much contention the lock would experience
// 2. This may lead to weird-looking code layout when inlined into a caller
// with PA_(UN)LIKELY() annotations.
if (Try()) {
return;
}
return AcquireSpinThenBlock();
}
inline constexpr SpinningMutex::SpinningMutex() = default;
#if PA_CONFIG(HAS_FAST_MUTEX)
#if PA_CONFIG(HAS_LINUX_KERNEL)
PA_ALWAYS_INLINE bool SpinningMutex::Try() {
// Using the weak variant of compare_exchange(), which may fail spuriously. On
// some architectures such as ARM, CAS is typically performed as a LDREX/STREX
// pair, where the store may fail. In the strong version, there is a loop
// inserted by the compiler to retry in these cases.
//
// Since we are retrying in Lock() anyway, there is no point having two nested
// loops.
int expected = kUnlocked;
return (state_.load(std::memory_order_relaxed) == expected) &&
state_.compare_exchange_weak(expected, kLockedUncontended,
std::memory_order_acquire,
std::memory_order_relaxed);
}
PA_ALWAYS_INLINE void SpinningMutex::Release() {
if (PA_UNLIKELY(state_.exchange(kUnlocked, std::memory_order_release) ==
kLockedContended)) {
// |kLockedContended|: there is a waiter to wake up.
//
// Here there is a window where the lock is unlocked, since we just set it
// to |kUnlocked| above. Meaning that another thread can grab the lock
// in-between now and |FutexWake()| waking up a waiter. Aside from
// potentially fairness, this is not an issue, as the newly-awaken thread
// will check that the lock is still free.
//
// There is a small pessimization here though: if we have a single waiter,
// then when it wakes up, the lock will be set to |kLockedContended|, so
// when this waiter releases the lock, it will needlessly call
// |FutexWake()|, even though there are no waiters. This is supported by the
// kernel, and is what bionic (Android's libc) also does.
FutexWake();
}
}
#elif BUILDFLAG(IS_WIN)
PA_ALWAYS_INLINE bool SpinningMutex::Try() {
return !!::TryAcquireSRWLockExclusive(reinterpret_cast<PSRWLOCK>(&lock_));
}
PA_ALWAYS_INLINE void SpinningMutex::Release() {
::ReleaseSRWLockExclusive(reinterpret_cast<PSRWLOCK>(&lock_));
}
#elif BUILDFLAG(IS_APPLE)
PA_ALWAYS_INLINE bool SpinningMutex::Try() {
return os_unfair_lock_trylock(&unfair_lock_);
}
PA_ALWAYS_INLINE void SpinningMutex::Release() {
return os_unfair_lock_unlock(&unfair_lock_);
}
#elif BUILDFLAG(IS_POSIX)
PA_ALWAYS_INLINE bool SpinningMutex::Try() {
int retval = pthread_mutex_trylock(&lock_);
PA_DCHECK(retval == 0 || retval == EBUSY);
return retval == 0;
}
PA_ALWAYS_INLINE void SpinningMutex::Release() {
int retval = pthread_mutex_unlock(&lock_);
PA_DCHECK(retval == 0);
}
#elif BUILDFLAG(IS_FUCHSIA)
PA_ALWAYS_INLINE bool SpinningMutex::Try() {
return sync_mutex_trylock(&lock_) == ZX_OK;
}
PA_ALWAYS_INLINE void SpinningMutex::Release() {
sync_mutex_unlock(&lock_);
}
#endif
#else // PA_CONFIG(HAS_FAST_MUTEX)
PA_ALWAYS_INLINE bool SpinningMutex::Try() {
// Possibly faster than CAS. The theory is that if the cacheline is shared,
// then it can stay shared, for the contended case.
return !lock_.load(std::memory_order_relaxed) &&
!lock_.exchange(true, std::memory_order_acquire);
}
PA_ALWAYS_INLINE void SpinningMutex::Release() {
lock_.store(false, std::memory_order_release);
}
PA_ALWAYS_INLINE void SpinningMutex::LockSlow() {
return LockSlowSpinLock();
}
#endif // PA_CONFIG(HAS_FAST_MUTEX)
} // namespace partition_alloc::internal
#endif // BASE_ALLOCATOR_PARTITION_ALLOCATOR_SPINNING_MUTEX_H_