blob: 9d4918750a9c60775ac141391c3c7800e5356cc4 [file] [log] [blame]
// Copyright 2022 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_ALLOCATOR_DISPATCHER_TLS_H_
#define BASE_ALLOCATOR_DISPATCHER_TLS_H_
#include "build/build_config.h"
#if BUILDFLAG(IS_POSIX) // the current allocation mechanism (mmap) and TLS
// support (pthread) are both defined by POSIX
#define USE_LOCAL_TLS_EMULATION() true
#else
#define USE_LOCAL_TLS_EMULATION() false
#endif
#if USE_LOCAL_TLS_EMULATION()
#include <algorithm>
#include <atomic>
#include <memory>
#include <mutex>
#include "base/allocator/partition_allocator/partition_alloc_constants.h"
#include "base/base_export.h"
#include "base/check.h"
#include "base/compiler_specific.h"
#include <pthread.h>
#if HAS_FEATURE(thread_sanitizer)
#define DISABLE_TSAN_INSTRUMENTATION __attribute__((no_sanitize("thread")))
#else
#define DISABLE_TSAN_INSTRUMENTATION
#endif
namespace base::allocator::dispatcher {
namespace internal {
// Allocate memory using POSIX' mmap and unmap functionality. The allocator
// implements the allocator interface required by ThreadLocalStorage.
struct BASE_EXPORT MMapAllocator {
// The minimum size of a memory chunk when allocating. Even for chunks with
// fewer bytes, at least AllocationChunkSize bytes are allocated. For mmap, this
// is usually the page size of the system.
// For various OS-CPU combinations, partition_alloc::PartitionPageSize() is not
// constexpr. Hence, we can not use this value but define it locally.
#if defined(PAGE_ALLOCATOR_CONSTANTS_ARE_CONSTEXPR) && \
PAGE_ALLOCATOR_CONSTANTS_ARE_CONSTEXPR
constexpr static size_t AllocationChunkSize =
partition_alloc::PartitionPageSize();
#elif BUILDFLAG(IS_APPLE)
constexpr static size_t AllocationChunkSize = 16384;
#elif BUILDFLAG(IS_LINUX) && defined(ARCH_CPU_ARM64)
constexpr static size_t AllocationChunkSize = 16384;
#else
constexpr static size_t AllocationChunkSize = 4096;
#endif
// Allocate size_in_bytes bytes of raw memory. Return nullptr if allocation
// fails.
void* AllocateMemory(size_t size_in_bytes);
// Free the raw memory pointed to by pointer_to_allocated. Returns a boolean
// value indicating if the free was successful.
bool FreeMemoryForTesting(void* pointer_to_allocated, size_t size_in_bytes);
};
// The allocator used by default for the thread local storage.
using DefaultAllocator = MMapAllocator;
using OnThreadTerminationFunction = void (*)(void*);
// The TLS system used by default for the thread local storage. It stores and
// retrieves thread specific data pointers.
struct BASE_EXPORT PThreadTLSSystem {
// Initialize the TLS system to store a data set for different threads.
// @param thread_termination_function An optional function which will be
// invoked upon termination of a thread.
bool Setup(OnThreadTerminationFunction thread_termination_function);
// Tear down the TLS system. After completing tear down, the thread
// termination function passed to Setup will not be invoked anymore.
bool TearDownForTesting();
// Get the pointer to the data associated to the current thread. Returns
// nullptr if the TLS system is not initialized or no data was set before.
void* GetThreadSpecificData();
// Set the pointer to the data associated to the current thread. Return true
// if stored successfully, false otherwise.
bool SetThreadSpecificData(void* data);
private:
pthread_key_t data_access_key_ = 0;
#if DCHECK_IS_ON()
// From POSIX standard at https://www.open-std.org/jtc1/sc22/open/n4217.pdf:
// The effect of calling pthread_getspecific() or pthread_setspecific() with a
// key value not obtained from pthread_key_create() or after key has been
// deleted with pthread_key_delete() is undefined.
//
// Unfortunately, POSIX doesn't define a special value of pthread_key_t
// indicating an invalid key which would allow us to detect accesses outside
// of initialized state. Hence, to prevent us from drifting into the evil
// realm of undefined behaviour we store whether we're somewhere between Setup
// and Teardown.
std::atomic_bool initialized_{false};
#endif
};
using DefaultTLSSystem = PThreadTLSSystem;
// In some scenarios, most notably when testing, the allocator and TLS system
// passed to |ThreadLocalStorage| are not copyable and have to be wrapped, i.e.
// using std::reference_wrapper. |dereference| is a small helper to retrieve the
// underlying value.
template <typename T>
T& dereference(T& ref) {
return ref;
}
template <typename T>
T& dereference(std::reference_wrapper<T>& ref) {
// std::reference_wrapper requires a valid reference for construction,
// therefore, no need in checking here.
return ref.get();
}
// Store thread local data. The data is organized in chunks, where each chunk
// holds |ItemsPerChunk|. Each item may be free or used.
//
// When a thread requests data, the chunks are searched for a free data item,
// which is registered for this thread and marked as |used|. Further requests by
// this thread will then always return the same item. When a thread terminates,
// the item will be reset and return to the pool of free items.
//
// Upon construction, the first chunk is created. If a thread requests data and
// there is no free item available, another chunk is created. Upon destruction,
// all memory is freed. Pointers to data items become invalid!
//
// Constructor and destructor are not thread safe.
//
// @tparam PayloadType The item type to be stored.
// @tparam AllocatorType The allocator being used. An allocator must provide
// the following interface:
// void* AllocateMemory(size_t size_in_bytes); // Allocate size_in_bytes bytes
// of raw memory.
// void FreeMemory(void* pointer_to_allocated, size_t size_in_bytes); // Free
// the raw memory pointed to by pointer_to_allocated.
// Any failure in allocation or free must terminate the process.
// @tparam TLSSystemType The TLS system being used. A TLS system must provide
// the following interface:
// bool Setup(OnThreadTerminationFunction thread_termination_function);
// bool Destroy();
// void* GetThreadSpecificData();
// bool SetThreadSpecificData(void* data);
// @tparam AllocationChunkSize The minimum size of a memory chunk that the
// allocator can handle. We try to size the chunks so that each chunk uses this
// size to the maximum.
// @tparam IsDestructibleForTesting For testing purposes we allow the destructor
// to perform clean up upon destruction. Otherwise, using the destructor will
// result in a compilation failure.
template <typename PayloadType,
typename AllocatorType,
typename TLSSystemType,
size_t AllocationChunkSize,
bool IsDestructibleForTesting>
struct ThreadLocalStorage {
ThreadLocalStorage() : root_(AllocateAndInitializeChunk()) { Initialize(); }
// Create a new instance of |ThreadLocalStorage| using the passed allocator
// and TLS system. This initializes the underlying TLS system and creates the
// first chunk of data.
ThreadLocalStorage(AllocatorType allocator, TLSSystemType tlsSystem)
: allocator_(std::move(allocator)),
tls_system_(std::move(tlsSystem)),
root_(AllocateAndInitializeChunk()) {
Initialize();
}
// Deletes an instance of |ThreadLocalStorage| and delete all the data chunks
// created.
~ThreadLocalStorage() {
if constexpr (IsDestructibleForTesting) {
TearDownForTesting();
} else if constexpr (!IsDestructibleForTesting) {
static_assert(
IsDestructibleForTesting,
"ThreadLocalStorage cannot be destructed outside of test code.");
}
}
// Explicitly prevent all forms of Copy/Move construction/assignment. For an
// exact copy of ThreadLocalStorage we would need to copy the mapping of
// thread to item, which we can't do at the moment. On the other side, our
// atomic members do not support moving out of the box.
ThreadLocalStorage(const ThreadLocalStorage&) = delete;
ThreadLocalStorage(ThreadLocalStorage&& other) = delete;
ThreadLocalStorage& operator=(const ThreadLocalStorage&) = delete;
ThreadLocalStorage& operator=(ThreadLocalStorage&&) = delete;
// Get the data item for the current thread. If no data is registered so far,
// find a free item in the chunks and register it for the current thread.
PayloadType* GetThreadLocalData() {
auto& tls_system = dereference(tls_system_);
auto* slot = static_cast<SingleSlot*>(tls_system.GetThreadSpecificData());
if (UNLIKELY(slot == nullptr)) {
slot = FindAndAllocateFreeSlot(root_.load(std::memory_order_relaxed));
// We might be called in the course of handling a memory allocation. We do
// not use CHECK since they might allocate and cause a recursion.
RAW_CHECK(tls_system.SetThreadSpecificData(slot));
// Reset the content to wipe out any previous data.
Reset(slot->item);
}
return &(slot->item);
}
private:
// Encapsulate the payload item and some administrative data.
struct SingleSlot {
PayloadType item;
#if !defined(__cpp_lib_atomic_value_initialization) || \
__cpp_lib_atomic_value_initialization < 201911L
std::atomic_flag is_used = ATOMIC_FLAG_INIT;
#else
std::atomic_flag is_used;
#endif
};
template <size_t NumberOfItems>
struct ChunkT {
SingleSlot slots[NumberOfItems];
// Pointer to the next chunk.
std::atomic<ChunkT*> next_chunk = nullptr;
// Helper flag to ensure we create the next chunk only once in a multi
// threaded environment.
std::once_flag create_next_chunk_flag;
};
template <size_t LowerNumberOfItems,
size_t UpperNumberOfItems,
size_t NumberOfBytes>
static constexpr size_t CalculateEffectiveNumberOfItemsBinSearch() {
if constexpr (LowerNumberOfItems == UpperNumberOfItems) {
return LowerNumberOfItems;
}
constexpr size_t CurrentNumberOfItems =
(UpperNumberOfItems - LowerNumberOfItems) / 2 + LowerNumberOfItems;
if constexpr (sizeof(ChunkT<CurrentNumberOfItems>) > NumberOfBytes) {
return CalculateEffectiveNumberOfItemsBinSearch<
LowerNumberOfItems, CurrentNumberOfItems, NumberOfBytes>();
}
if constexpr (sizeof(ChunkT<CurrentNumberOfItems + 1>) < NumberOfBytes) {
return CalculateEffectiveNumberOfItemsBinSearch<
CurrentNumberOfItems + 1, UpperNumberOfItems, NumberOfBytes>();
}
return CurrentNumberOfItems;
}
// Calculate the maximum number of items we can store in one chunk without the
// size of the chunk exceeding NumberOfBytes. To avoid things like alignment
// and packing tampering with the calculation, instead of calculating the
// correct number of items we use sizeof-operator against ChunkT to search for
// the correct size. Unfortunately, the number of recursions is limited by the
// compiler. Therefore, we use a binary search instead of a simple linear
// search.
template <size_t MinimumNumberOfItems, size_t NumberOfBytes>
static constexpr size_t CalculateEffectiveNumberOfItems() {
if constexpr (sizeof(ChunkT<MinimumNumberOfItems>) < NumberOfBytes) {
constexpr size_t LowerNumberOfItems = MinimumNumberOfItems;
constexpr size_t UpperNumberOfItems =
NumberOfBytes / sizeof(PayloadType) + 1;
return CalculateEffectiveNumberOfItemsBinSearch<
LowerNumberOfItems, UpperNumberOfItems, NumberOfBytes>();
}
return MinimumNumberOfItems;
}
public:
// The minimum number of items per chunk. It should be high enough to
// accommodate most items in the root chunk whilst not wasting to much space
// on unnecessary items.
static constexpr size_t MinimumNumberOfItemsPerChunk = 75;
// The effective number of items per chunk. We use the AllocationChunkSize as
// a hint to calculate to effective number of items so we occupy one of these
// memory chunks to the maximum extent possible.
static constexpr size_t ItemsPerChunk =
CalculateEffectiveNumberOfItems<MinimumNumberOfItemsPerChunk,
AllocationChunkSize>();
private:
using Chunk = ChunkT<ItemsPerChunk>;
static_assert(ItemsPerChunk >= MinimumNumberOfItemsPerChunk);
// Mark an item's slot ready for reuse. This function is used as thread
// termination function in the TLS system. We do not destroy anything at this
// point but simply mark the slot as unused.
static void MarkSlotAsFree(void* data) {
// We always store SingleSlots in the TLS system. Therefore, we cast to
// SingleSlot and reset the is_used flag.
auto* const slot = static_cast<SingleSlot*>(data);
// We might be called in the course of handling a memory allocation. We do
// not use CHECK since they might allocate and cause a recursion.
RAW_CHECK(slot && slot->is_used.test_and_set());
slot->is_used.clear(std::memory_order_relaxed);
}
// Perform common initialization during construction of an instance.
void Initialize() {
// The constructor must be called outside of the allocation path. Therefore,
// it is secure to verify with CHECK.
// Passing MarkSlotAsFree as thread_termination_function we ensure the
// slot/item assigned to the finished thread will be returned to the pool of
// unused items.
CHECK(dereference(tls_system_).Setup(&MarkSlotAsFree));
}
Chunk* AllocateAndInitializeChunk() {
void* const uninitialized_memory =
dereference(allocator_).AllocateMemory(sizeof(Chunk));
// We might be called in the course of handling a memory allocation. We do
// not use CHECK since they might allocate and cause a recursion.
RAW_CHECK(uninitialized_memory != nullptr);
return new (uninitialized_memory) Chunk{};
}
void FreeAndDeallocateChunkForTesting(Chunk* chunk_to_erase) {
chunk_to_erase->~Chunk();
// FreeAndDeallocateChunkForTesting must be called outside of the allocation
// path. Therefore, it is secure to verify with CHECK.
CHECK(dereference(allocator_)
.FreeMemoryForTesting(chunk_to_erase, sizeof(Chunk)));
}
// Find a free slot in the passed chunk, reserve it and return it to the
// caller. If no free slot can be found, head on to the next chunk. If the
// next chunk doesn't exist, create it.
SingleSlot* FindAndAllocateFreeSlot(Chunk* const chunk) {
SingleSlot* const slot = std::find_if_not(
std::begin(chunk->slots), std::end(chunk->slots),
[](SingleSlot& candidate_slot) {
return candidate_slot.is_used.test_and_set(std::memory_order_relaxed);
});
// So we found a slot. Happily return it to the caller.
if (slot != std::end(chunk->slots)) {
return slot;
}
// Ok, there are no more free slots in this chunk. First, ensure the next
// chunk is valid and create one if necessary.
std::call_once(chunk->create_next_chunk_flag, [&] {
// From https://eel.is/c++draft/thread.once.callonce#3
//
// Synchronization: For any given once_­flag: all active executions occur
// in a total order; completion of an active execution synchronizes with
// the start of the next one in this total order; and the returning
// execution synchronizes with the return from all passive executions.
//
// Therefore, we do only a relaxed store here, call_once synchronizes with
// other threads.
chunk->next_chunk.store(AllocateAndInitializeChunk(),
std::memory_order_relaxed);
});
return FindAndAllocateFreeSlot(chunk->next_chunk);
}
template <bool IsDestructibleForTestingP = IsDestructibleForTesting>
typename std::enable_if<IsDestructibleForTestingP>::type
TearDownForTesting() {
// The destructor must be called outside of the allocation path. Therefore,
// it is secure to verify with CHECK.
// All accessing threads must be terminated by now. For additional security
// we tear down the TLS system first. This way we ensure that
// MarkSlotAsFree is not called anymore and we have no accesses from the
// TLS system's side.
CHECK(dereference(tls_system_).TearDownForTesting());
// Delete all data chunks.
for (auto* chunk = root_.load(); chunk != nullptr;) {
auto* next_chunk = chunk->next_chunk.load();
FreeAndDeallocateChunkForTesting(chunk);
chunk = next_chunk;
}
}
// Reset a single item to its default value.
// Since items are re-used, they may be accessed from different threads,
// causing TSan to trigger. Therefore, the reset is exempt from TSan
// instrumentation.
DISABLE_TSAN_INSTRUMENTATION void Reset(PayloadType& item) { item = {}; }
AllocatorType allocator_;
TLSSystemType tls_system_;
std::atomic<Chunk*> const root_;
};
} // namespace internal
// The ThreadLocalStorage visible to the user. This uses the internal default
// allocator and TLS system.
template <typename StorageType,
typename AllocatorType = internal::DefaultAllocator,
typename TLSSystemType = internal::DefaultTLSSystem,
size_t AllocationChunkSize = AllocatorType::AllocationChunkSize,
bool IsDestructibleForTesting = false>
using ThreadLocalStorage =
internal::ThreadLocalStorage<StorageType,
AllocatorType,
TLSSystemType,
AllocationChunkSize,
IsDestructibleForTesting>;
} // namespace base::allocator::dispatcher
#endif // USE_LOCAL_TLS_EMULATION()
#endif // BASE_ALLOCATOR_DISPATCHER_TLS_H_