| // Copyright 2022 The Chromium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #ifndef BASE_ALLOCATOR_DISPATCHER_TLS_H_ |
| #define BASE_ALLOCATOR_DISPATCHER_TLS_H_ |
| |
| #include "build/build_config.h" |
| |
| #if BUILDFLAG(IS_POSIX) // the current allocation mechanism (mmap) and TLS |
| // support (pthread) are both defined by POSIX |
| #define USE_LOCAL_TLS_EMULATION() true |
| #else |
| #define USE_LOCAL_TLS_EMULATION() false |
| #endif |
| |
| #if USE_LOCAL_TLS_EMULATION() |
| #include <algorithm> |
| #include <atomic> |
| #include <memory> |
| #include <mutex> |
| |
| #include "base/allocator/partition_allocator/partition_alloc_constants.h" |
| #include "base/base_export.h" |
| #include "base/check.h" |
| #include "base/compiler_specific.h" |
| |
| #include <pthread.h> |
| |
| #if HAS_FEATURE(thread_sanitizer) |
| #define DISABLE_TSAN_INSTRUMENTATION __attribute__((no_sanitize("thread"))) |
| #else |
| #define DISABLE_TSAN_INSTRUMENTATION |
| #endif |
| |
| namespace base::allocator::dispatcher { |
| namespace internal { |
| |
| // Allocate memory using POSIX' mmap and unmap functionality. The allocator |
| // implements the allocator interface required by ThreadLocalStorage. |
| struct BASE_EXPORT MMapAllocator { |
| // The minimum size of a memory chunk when allocating. Even for chunks with |
| // fewer bytes, at least AllocationChunkSize bytes are allocated. For mmap, this |
| // is usually the page size of the system. |
| // For various OS-CPU combinations, partition_alloc::PartitionPageSize() is not |
| // constexpr. Hence, we can not use this value but define it locally. |
| #if defined(PAGE_ALLOCATOR_CONSTANTS_ARE_CONSTEXPR) && \ |
| PAGE_ALLOCATOR_CONSTANTS_ARE_CONSTEXPR |
| constexpr static size_t AllocationChunkSize = |
| partition_alloc::PartitionPageSize(); |
| #elif BUILDFLAG(IS_APPLE) |
| constexpr static size_t AllocationChunkSize = 16384; |
| #elif BUILDFLAG(IS_LINUX) && defined(ARCH_CPU_ARM64) |
| constexpr static size_t AllocationChunkSize = 16384; |
| #else |
| constexpr static size_t AllocationChunkSize = 4096; |
| #endif |
| |
| // Allocate size_in_bytes bytes of raw memory. Return nullptr if allocation |
| // fails. |
| void* AllocateMemory(size_t size_in_bytes); |
| // Free the raw memory pointed to by pointer_to_allocated. Returns a boolean |
| // value indicating if the free was successful. |
| bool FreeMemoryForTesting(void* pointer_to_allocated, size_t size_in_bytes); |
| }; |
| |
| // The allocator used by default for the thread local storage. |
| using DefaultAllocator = MMapAllocator; |
| |
| using OnThreadTerminationFunction = void (*)(void*); |
| |
| // The TLS system used by default for the thread local storage. It stores and |
| // retrieves thread specific data pointers. |
| struct BASE_EXPORT PThreadTLSSystem { |
| // Initialize the TLS system to store a data set for different threads. |
| // @param thread_termination_function An optional function which will be |
| // invoked upon termination of a thread. |
| bool Setup(OnThreadTerminationFunction thread_termination_function); |
| // Tear down the TLS system. After completing tear down, the thread |
| // termination function passed to Setup will not be invoked anymore. |
| bool TearDownForTesting(); |
| |
| // Get the pointer to the data associated to the current thread. Returns |
| // nullptr if the TLS system is not initialized or no data was set before. |
| void* GetThreadSpecificData(); |
| // Set the pointer to the data associated to the current thread. Return true |
| // if stored successfully, false otherwise. |
| bool SetThreadSpecificData(void* data); |
| |
| private: |
| pthread_key_t data_access_key_ = 0; |
| #if DCHECK_IS_ON() |
| // From POSIX standard at https://www.open-std.org/jtc1/sc22/open/n4217.pdf: |
| // The effect of calling pthread_getspecific() or pthread_setspecific() with a |
| // key value not obtained from pthread_key_create() or after key has been |
| // deleted with pthread_key_delete() is undefined. |
| // |
| // Unfortunately, POSIX doesn't define a special value of pthread_key_t |
| // indicating an invalid key which would allow us to detect accesses outside |
| // of initialized state. Hence, to prevent us from drifting into the evil |
| // realm of undefined behaviour we store whether we're somewhere between Setup |
| // and Teardown. |
| std::atomic_bool initialized_{false}; |
| #endif |
| }; |
| |
| using DefaultTLSSystem = PThreadTLSSystem; |
| |
| // In some scenarios, most notably when testing, the allocator and TLS system |
| // passed to |ThreadLocalStorage| are not copyable and have to be wrapped, i.e. |
| // using std::reference_wrapper. |dereference| is a small helper to retrieve the |
| // underlying value. |
| template <typename T> |
| T& dereference(T& ref) { |
| return ref; |
| } |
| |
| template <typename T> |
| T& dereference(std::reference_wrapper<T>& ref) { |
| // std::reference_wrapper requires a valid reference for construction, |
| // therefore, no need in checking here. |
| return ref.get(); |
| } |
| |
| // Store thread local data. The data is organized in chunks, where each chunk |
| // holds |ItemsPerChunk|. Each item may be free or used. |
| // |
| // When a thread requests data, the chunks are searched for a free data item, |
| // which is registered for this thread and marked as |used|. Further requests by |
| // this thread will then always return the same item. When a thread terminates, |
| // the item will be reset and return to the pool of free items. |
| // |
| // Upon construction, the first chunk is created. If a thread requests data and |
| // there is no free item available, another chunk is created. Upon destruction, |
| // all memory is freed. Pointers to data items become invalid! |
| // |
| // Constructor and destructor are not thread safe. |
| // |
| // @tparam PayloadType The item type to be stored. |
| // @tparam AllocatorType The allocator being used. An allocator must provide |
| // the following interface: |
| // void* AllocateMemory(size_t size_in_bytes); // Allocate size_in_bytes bytes |
| // of raw memory. |
| // void FreeMemory(void* pointer_to_allocated, size_t size_in_bytes); // Free |
| // the raw memory pointed to by pointer_to_allocated. |
| // Any failure in allocation or free must terminate the process. |
| // @tparam TLSSystemType The TLS system being used. A TLS system must provide |
| // the following interface: |
| // bool Setup(OnThreadTerminationFunction thread_termination_function); |
| // bool Destroy(); |
| // void* GetThreadSpecificData(); |
| // bool SetThreadSpecificData(void* data); |
| // @tparam AllocationChunkSize The minimum size of a memory chunk that the |
| // allocator can handle. We try to size the chunks so that each chunk uses this |
| // size to the maximum. |
| // @tparam IsDestructibleForTesting For testing purposes we allow the destructor |
| // to perform clean up upon destruction. Otherwise, using the destructor will |
| // result in a compilation failure. |
| template <typename PayloadType, |
| typename AllocatorType, |
| typename TLSSystemType, |
| size_t AllocationChunkSize, |
| bool IsDestructibleForTesting> |
| struct ThreadLocalStorage { |
| ThreadLocalStorage() : root_(AllocateAndInitializeChunk()) { Initialize(); } |
| |
| // Create a new instance of |ThreadLocalStorage| using the passed allocator |
| // and TLS system. This initializes the underlying TLS system and creates the |
| // first chunk of data. |
| ThreadLocalStorage(AllocatorType allocator, TLSSystemType tlsSystem) |
| : allocator_(std::move(allocator)), |
| tls_system_(std::move(tlsSystem)), |
| root_(AllocateAndInitializeChunk()) { |
| Initialize(); |
| } |
| |
| // Deletes an instance of |ThreadLocalStorage| and delete all the data chunks |
| // created. |
| ~ThreadLocalStorage() { |
| if constexpr (IsDestructibleForTesting) { |
| TearDownForTesting(); |
| } else if constexpr (!IsDestructibleForTesting) { |
| static_assert( |
| IsDestructibleForTesting, |
| "ThreadLocalStorage cannot be destructed outside of test code."); |
| } |
| } |
| |
| // Explicitly prevent all forms of Copy/Move construction/assignment. For an |
| // exact copy of ThreadLocalStorage we would need to copy the mapping of |
| // thread to item, which we can't do at the moment. On the other side, our |
| // atomic members do not support moving out of the box. |
| ThreadLocalStorage(const ThreadLocalStorage&) = delete; |
| ThreadLocalStorage(ThreadLocalStorage&& other) = delete; |
| ThreadLocalStorage& operator=(const ThreadLocalStorage&) = delete; |
| ThreadLocalStorage& operator=(ThreadLocalStorage&&) = delete; |
| |
| // Get the data item for the current thread. If no data is registered so far, |
| // find a free item in the chunks and register it for the current thread. |
| PayloadType* GetThreadLocalData() { |
| auto& tls_system = dereference(tls_system_); |
| |
| auto* slot = static_cast<SingleSlot*>(tls_system.GetThreadSpecificData()); |
| |
| if (UNLIKELY(slot == nullptr)) { |
| slot = FindAndAllocateFreeSlot(root_.load(std::memory_order_relaxed)); |
| |
| // We might be called in the course of handling a memory allocation. We do |
| // not use CHECK since they might allocate and cause a recursion. |
| RAW_CHECK(tls_system.SetThreadSpecificData(slot)); |
| |
| // Reset the content to wipe out any previous data. |
| Reset(slot->item); |
| } |
| |
| return &(slot->item); |
| } |
| |
| private: |
| // Encapsulate the payload item and some administrative data. |
| struct SingleSlot { |
| PayloadType item; |
| #if !defined(__cpp_lib_atomic_value_initialization) || \ |
| __cpp_lib_atomic_value_initialization < 201911L |
| std::atomic_flag is_used = ATOMIC_FLAG_INIT; |
| #else |
| std::atomic_flag is_used; |
| #endif |
| }; |
| |
| template <size_t NumberOfItems> |
| struct ChunkT { |
| SingleSlot slots[NumberOfItems]; |
| // Pointer to the next chunk. |
| std::atomic<ChunkT*> next_chunk = nullptr; |
| // Helper flag to ensure we create the next chunk only once in a multi |
| // threaded environment. |
| std::once_flag create_next_chunk_flag; |
| }; |
| |
| template <size_t LowerNumberOfItems, |
| size_t UpperNumberOfItems, |
| size_t NumberOfBytes> |
| static constexpr size_t CalculateEffectiveNumberOfItemsBinSearch() { |
| if constexpr (LowerNumberOfItems == UpperNumberOfItems) { |
| return LowerNumberOfItems; |
| } |
| |
| constexpr size_t CurrentNumberOfItems = |
| (UpperNumberOfItems - LowerNumberOfItems) / 2 + LowerNumberOfItems; |
| |
| if constexpr (sizeof(ChunkT<CurrentNumberOfItems>) > NumberOfBytes) { |
| return CalculateEffectiveNumberOfItemsBinSearch< |
| LowerNumberOfItems, CurrentNumberOfItems, NumberOfBytes>(); |
| } |
| |
| if constexpr (sizeof(ChunkT<CurrentNumberOfItems + 1>) < NumberOfBytes) { |
| return CalculateEffectiveNumberOfItemsBinSearch< |
| CurrentNumberOfItems + 1, UpperNumberOfItems, NumberOfBytes>(); |
| } |
| |
| return CurrentNumberOfItems; |
| } |
| |
| // Calculate the maximum number of items we can store in one chunk without the |
| // size of the chunk exceeding NumberOfBytes. To avoid things like alignment |
| // and packing tampering with the calculation, instead of calculating the |
| // correct number of items we use sizeof-operator against ChunkT to search for |
| // the correct size. Unfortunately, the number of recursions is limited by the |
| // compiler. Therefore, we use a binary search instead of a simple linear |
| // search. |
| template <size_t MinimumNumberOfItems, size_t NumberOfBytes> |
| static constexpr size_t CalculateEffectiveNumberOfItems() { |
| if constexpr (sizeof(ChunkT<MinimumNumberOfItems>) < NumberOfBytes) { |
| constexpr size_t LowerNumberOfItems = MinimumNumberOfItems; |
| constexpr size_t UpperNumberOfItems = |
| NumberOfBytes / sizeof(PayloadType) + 1; |
| return CalculateEffectiveNumberOfItemsBinSearch< |
| LowerNumberOfItems, UpperNumberOfItems, NumberOfBytes>(); |
| } |
| |
| return MinimumNumberOfItems; |
| } |
| |
| public: |
| // The minimum number of items per chunk. It should be high enough to |
| // accommodate most items in the root chunk whilst not wasting to much space |
| // on unnecessary items. |
| static constexpr size_t MinimumNumberOfItemsPerChunk = 75; |
| // The effective number of items per chunk. We use the AllocationChunkSize as |
| // a hint to calculate to effective number of items so we occupy one of these |
| // memory chunks to the maximum extent possible. |
| static constexpr size_t ItemsPerChunk = |
| CalculateEffectiveNumberOfItems<MinimumNumberOfItemsPerChunk, |
| AllocationChunkSize>(); |
| |
| private: |
| using Chunk = ChunkT<ItemsPerChunk>; |
| |
| static_assert(ItemsPerChunk >= MinimumNumberOfItemsPerChunk); |
| |
| // Mark an item's slot ready for reuse. This function is used as thread |
| // termination function in the TLS system. We do not destroy anything at this |
| // point but simply mark the slot as unused. |
| static void MarkSlotAsFree(void* data) { |
| // We always store SingleSlots in the TLS system. Therefore, we cast to |
| // SingleSlot and reset the is_used flag. |
| auto* const slot = static_cast<SingleSlot*>(data); |
| |
| // We might be called in the course of handling a memory allocation. We do |
| // not use CHECK since they might allocate and cause a recursion. |
| RAW_CHECK(slot && slot->is_used.test_and_set()); |
| |
| slot->is_used.clear(std::memory_order_relaxed); |
| } |
| |
| // Perform common initialization during construction of an instance. |
| void Initialize() { |
| // The constructor must be called outside of the allocation path. Therefore, |
| // it is secure to verify with CHECK. |
| |
| // Passing MarkSlotAsFree as thread_termination_function we ensure the |
| // slot/item assigned to the finished thread will be returned to the pool of |
| // unused items. |
| CHECK(dereference(tls_system_).Setup(&MarkSlotAsFree)); |
| } |
| |
| Chunk* AllocateAndInitializeChunk() { |
| void* const uninitialized_memory = |
| dereference(allocator_).AllocateMemory(sizeof(Chunk)); |
| |
| // We might be called in the course of handling a memory allocation. We do |
| // not use CHECK since they might allocate and cause a recursion. |
| RAW_CHECK(uninitialized_memory != nullptr); |
| |
| return new (uninitialized_memory) Chunk{}; |
| } |
| |
| void FreeAndDeallocateChunkForTesting(Chunk* chunk_to_erase) { |
| chunk_to_erase->~Chunk(); |
| |
| // FreeAndDeallocateChunkForTesting must be called outside of the allocation |
| // path. Therefore, it is secure to verify with CHECK. |
| CHECK(dereference(allocator_) |
| .FreeMemoryForTesting(chunk_to_erase, sizeof(Chunk))); |
| } |
| |
| // Find a free slot in the passed chunk, reserve it and return it to the |
| // caller. If no free slot can be found, head on to the next chunk. If the |
| // next chunk doesn't exist, create it. |
| SingleSlot* FindAndAllocateFreeSlot(Chunk* const chunk) { |
| SingleSlot* const slot = std::find_if_not( |
| std::begin(chunk->slots), std::end(chunk->slots), |
| [](SingleSlot& candidate_slot) { |
| return candidate_slot.is_used.test_and_set(std::memory_order_relaxed); |
| }); |
| |
| // So we found a slot. Happily return it to the caller. |
| if (slot != std::end(chunk->slots)) { |
| return slot; |
| } |
| |
| // Ok, there are no more free slots in this chunk. First, ensure the next |
| // chunk is valid and create one if necessary. |
| std::call_once(chunk->create_next_chunk_flag, [&] { |
| // From https://eel.is/c++draft/thread.once.callonce#3 |
| // |
| // Synchronization: For any given once_flag: all active executions occur |
| // in a total order; completion of an active execution synchronizes with |
| // the start of the next one in this total order; and the returning |
| // execution synchronizes with the return from all passive executions. |
| // |
| // Therefore, we do only a relaxed store here, call_once synchronizes with |
| // other threads. |
| chunk->next_chunk.store(AllocateAndInitializeChunk(), |
| std::memory_order_relaxed); |
| }); |
| |
| return FindAndAllocateFreeSlot(chunk->next_chunk); |
| } |
| |
| template <bool IsDestructibleForTestingP = IsDestructibleForTesting> |
| typename std::enable_if<IsDestructibleForTestingP>::type |
| TearDownForTesting() { |
| // The destructor must be called outside of the allocation path. Therefore, |
| // it is secure to verify with CHECK. |
| |
| // All accessing threads must be terminated by now. For additional security |
| // we tear down the TLS system first. This way we ensure that |
| // MarkSlotAsFree is not called anymore and we have no accesses from the |
| // TLS system's side. |
| CHECK(dereference(tls_system_).TearDownForTesting()); |
| |
| // Delete all data chunks. |
| for (auto* chunk = root_.load(); chunk != nullptr;) { |
| auto* next_chunk = chunk->next_chunk.load(); |
| FreeAndDeallocateChunkForTesting(chunk); |
| chunk = next_chunk; |
| } |
| } |
| |
| // Reset a single item to its default value. |
| // Since items are re-used, they may be accessed from different threads, |
| // causing TSan to trigger. Therefore, the reset is exempt from TSan |
| // instrumentation. |
| DISABLE_TSAN_INSTRUMENTATION void Reset(PayloadType& item) { item = {}; } |
| |
| AllocatorType allocator_; |
| TLSSystemType tls_system_; |
| std::atomic<Chunk*> const root_; |
| }; |
| |
| } // namespace internal |
| |
| // The ThreadLocalStorage visible to the user. This uses the internal default |
| // allocator and TLS system. |
| template <typename StorageType, |
| typename AllocatorType = internal::DefaultAllocator, |
| typename TLSSystemType = internal::DefaultTLSSystem, |
| size_t AllocationChunkSize = AllocatorType::AllocationChunkSize, |
| bool IsDestructibleForTesting = false> |
| using ThreadLocalStorage = |
| internal::ThreadLocalStorage<StorageType, |
| AllocatorType, |
| TLSSystemType, |
| AllocationChunkSize, |
| IsDestructibleForTesting>; |
| |
| } // namespace base::allocator::dispatcher |
| |
| #endif // USE_LOCAL_TLS_EMULATION() |
| #endif // BASE_ALLOCATOR_DISPATCHER_TLS_H_ |