| // Copyright 2014 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "base/threading/thread_local_storage.h" |
| |
| #include "base/atomicops.h" |
| #include "base/logging.h" |
| #include "base/synchronization/lock.h" |
| #include "build/build_config.h" |
| #include "starboard/memory.h" |
| |
| using base::internal::PlatformThreadLocalStorage; |
| |
| // Chrome Thread Local Storage (TLS) |
| // |
| // This TLS system allows Chrome to use a single OS level TLS slot process-wide, |
| // and allows us to control the slot limits instead of being at the mercy of the |
| // platform. To do this, Chrome TLS replicates an array commonly found in the OS |
| // thread metadata. |
| // |
| // Overview: |
| // |
| // OS TLS Slots Per-Thread Per-Process Global |
| // ... |
| // [] Chrome TLS Array Chrome TLS Metadata |
| // [] ----------> [][][][][ ][][][][] [][][][][ ][][][][] |
| // [] | | |
| // ... V V |
| // Metadata Version Slot Information |
| // Your Data! |
| // |
| // Using a single OS TLS slot, Chrome TLS allocates an array on demand for the |
| // lifetime of each thread that requests Chrome TLS data. Each per-thread TLS |
| // array matches the length of the per-process global metadata array. |
| // |
| // A per-process global TLS metadata array tracks information about each item in |
| // the per-thread array: |
| // * Status: Tracks if the slot is allocated or free to assign. |
| // * Destructor: An optional destructor to call on thread destruction for that |
| // specific slot. |
| // * Version: Tracks the current version of the TLS slot. Each TLS slot |
| // allocation is associated with a unique version number. |
| // |
| // Most OS TLS APIs guarantee that a newly allocated TLS slot is |
| // initialized to 0 for all threads. The Chrome TLS system provides |
| // this guarantee by tracking the version for each TLS slot here |
| // on each per-thread Chrome TLS array entry. Threads that access |
| // a slot with a mismatched version will receive 0 as their value. |
| // The metadata version is incremented when the client frees a |
| // slot. The per-thread metadata version is updated when a client |
| // writes to the slot. This scheme allows for constant time |
| // invalidation and avoids the need to iterate through each Chrome |
| // TLS array to mark the slot as zero. |
| // |
| // Just like an OS TLS API, clients of the Chrome TLS are responsible for |
| // managing any necessary lifetime of the data in their slots. The only |
| // convenience provided is automatic destruction when a thread ends. If a client |
| // frees a slot, that client is responsible for destroying the data in the slot. |
| |
| namespace { |
| // In order to make TLS destructors work, we need to keep around a function |
| // pointer to the destructor for each slot. We keep this array of pointers in a |
| // global (static) array. |
| // We use the single OS-level TLS slot (giving us one pointer per thread) to |
| // hold a pointer to a per-thread array (table) of slots that we allocate to |
| // Chromium consumers. |
| |
| // g_native_tls_key is the one native TLS that we use. It stores our table. |
| #if defined(STARBOARD) |
| base::subtle::AtomicWord g_native_tls_key = |
| reinterpret_cast<base::subtle::AtomicWord>( |
| PlatformThreadLocalStorage::TLS_KEY_OUT_OF_INDEXES); |
| #else |
| base::subtle::Atomic32 g_native_tls_key = |
| PlatformThreadLocalStorage::TLS_KEY_OUT_OF_INDEXES; |
| #endif |
| |
| // The OS TLS slot has three states: |
| // * kUninitialized: Any call to Slot::Get()/Set() will create the base |
| // per-thread TLS state. On POSIX, kUninitialized must be 0. |
| // * [Memory Address]: Raw pointer to the base per-thread TLS state. |
| // * kDestroyed: The base per-thread TLS state has been freed. |
| // |
| // Final States: |
| // * Windows: kDestroyed. Windows does not iterate through the OS TLS to clean |
| // up the values. |
| // * POSIX: kUninitialized. POSIX iterates through TLS until all slots contain |
| // nullptr. |
| // |
| // More details on this design: |
| // We need some type of thread-local state to indicate that the TLS system has |
| // been destroyed. To do so, we leverage the multi-pass nature of destruction |
| // of pthread_key. |
| // |
| // a) After destruction of TLS system, we set the pthread_key to a sentinel |
| // kDestroyed. |
| // b) All calls to Slot::Get() DCHECK that the state is not kDestroyed, and |
| // any system which might potentially invoke Slot::Get() after destruction |
| // of TLS must check ThreadLocalStorage::ThreadIsBeingDestroyed(). |
| // c) After a full pass of the pthread_keys, on the next invocation of |
| // ConstructTlsVector(), we'll then set the key to nullptr. |
| // d) At this stage, the TLS system is back in its uninitialized state. |
| // e) If in the second pass of destruction of pthread_keys something were to |
| // re-initialize TLS [this should never happen! Since the only code which |
| // uses Chrome TLS is Chrome controlled, we should really be striving for |
| // single-pass destruction], then TLS will be re-initialized and then go |
| // through the 2-pass destruction system again. Everything should just |
| // work (TM). |
| |
| // The consumers of kUninitialized and kDestroyed expect void*, since that's |
| // what the API exposes on both POSIX and Windows. |
| void* const kUninitialized = nullptr; |
| |
| // A sentinel value to indicate that the TLS system has been destroyed. |
| void* const kDestroyed = reinterpret_cast<void*>(1); |
| |
| // The maximum number of slots in our thread local storage stack. |
| constexpr int kThreadLocalStorageSize = 256; |
| |
| enum TlsStatus { |
| FREE, |
| IN_USE, |
| }; |
| |
| struct TlsMetadata { |
| TlsStatus status; |
| base::ThreadLocalStorage::TLSDestructorFunc destructor; |
| uint32_t version; |
| }; |
| |
| struct TlsVectorEntry { |
| void* data; |
| uint32_t version; |
| }; |
| |
| // This lock isn't needed until after we've constructed the per-thread TLS |
| // vector, so it's safe to use. |
| base::Lock* GetTLSMetadataLock() { |
| static auto* lock = new base::Lock(); |
| return lock; |
| } |
| TlsMetadata g_tls_metadata[kThreadLocalStorageSize]; |
| size_t g_last_assigned_slot = 0; |
| |
| // The maximum number of times to try to clear slots by calling destructors. |
| // Use pthread naming convention for clarity. |
| constexpr int kMaxDestructorIterations = kThreadLocalStorageSize; |
| |
| // This function is called to initialize our entire Chromium TLS system. |
| // It may be called very early, and we need to complete most all of the setup |
| // (initialization) before calling *any* memory allocator functions, which may |
| // recursively depend on this initialization. |
| // As a result, we use Atomics, and avoid anything (like a singleton) that might |
| // require memory allocations. |
| TlsVectorEntry* ConstructTlsVector() { |
| PlatformThreadLocalStorage::TLSKey key = |
| reinterpret_cast<PlatformThreadLocalStorage::TLSKey>( |
| base::subtle::NoBarrier_Load(&g_native_tls_key)); |
| if (key == PlatformThreadLocalStorage::TLS_KEY_OUT_OF_INDEXES) { |
| CHECK(PlatformThreadLocalStorage::AllocTLS(&key)); |
| |
| // The TLS_KEY_OUT_OF_INDEXES is used to find out whether the key is set or |
| // not in NoBarrier_CompareAndSwap, but Posix doesn't have invalid key, we |
| // define an almost impossible value be it. |
| // If we really get TLS_KEY_OUT_OF_INDEXES as value of key, just alloc |
| // another TLS slot. |
| if (key == PlatformThreadLocalStorage::TLS_KEY_OUT_OF_INDEXES) { |
| PlatformThreadLocalStorage::TLSKey tmp = key; |
| CHECK(PlatformThreadLocalStorage::AllocTLS(&key) && |
| key != PlatformThreadLocalStorage::TLS_KEY_OUT_OF_INDEXES); |
| PlatformThreadLocalStorage::FreeTLS(tmp); |
| } |
| // Atomically test-and-set the tls_key. If the key is |
| // TLS_KEY_OUT_OF_INDEXES, go ahead and set it. Otherwise, do nothing, as |
| // another thread already did our dirty work. |
| if (PlatformThreadLocalStorage::TLS_KEY_OUT_OF_INDEXES != |
| reinterpret_cast<PlatformThreadLocalStorage::TLSKey>( |
| base::subtle::NoBarrier_CompareAndSwap( |
| &g_native_tls_key, |
| reinterpret_cast<base::subtle::AtomicWord>( |
| PlatformThreadLocalStorage::TLS_KEY_OUT_OF_INDEXES), |
| reinterpret_cast<base::subtle::AtomicWord>(key)))) { |
| // We've been shortcut. Another thread replaced g_native_tls_key first so |
| // we need to destroy our index and use the one the other thread got |
| // first. |
| PlatformThreadLocalStorage::FreeTLS(key); |
| key = reinterpret_cast<PlatformThreadLocalStorage::TLSKey>( |
| base::subtle::NoBarrier_Load(&g_native_tls_key)); |
| } |
| } |
| CHECK_EQ(PlatformThreadLocalStorage::GetTLSValue(key), kUninitialized); |
| |
| // Some allocators, such as TCMalloc, make use of thread local storage. As a |
| // result, any attempt to call new (or malloc) will lazily cause such a system |
| // to initialize, which will include registering for a TLS key. If we are not |
| // careful here, then that request to create a key will call new back, and |
| // we'll have an infinite loop. We avoid that as follows: Use a stack |
| // allocated vector, so that we don't have dependence on our allocator until |
| // our service is in place. (i.e., don't even call new until after we're |
| // setup) |
| TlsVectorEntry stack_allocated_tls_data[kThreadLocalStorageSize]; |
| memset(stack_allocated_tls_data, 0, sizeof(stack_allocated_tls_data)); |
| // Ensure that any rentrant calls change the temp version. |
| PlatformThreadLocalStorage::SetTLSValue(key, stack_allocated_tls_data); |
| |
| // Allocate an array to store our data. |
| TlsVectorEntry* tls_data = new TlsVectorEntry[kThreadLocalStorageSize]; |
| memcpy(tls_data, stack_allocated_tls_data, |
| sizeof(stack_allocated_tls_data)); |
| PlatformThreadLocalStorage::SetTLSValue(key, tls_data); |
| return tls_data; |
| } |
| |
| void OnThreadExitInternal(TlsVectorEntry* tls_data) { |
| // This branch is for POSIX, where this function is called twice. The first |
| // pass calls dtors and sets state to kDestroyed. The second pass sets |
| // kDestroyed to kUninitialized. |
| if (tls_data == kDestroyed) { |
| PlatformThreadLocalStorage::TLSKey key = |
| reinterpret_cast<PlatformThreadLocalStorage::TLSKey>( |
| base::subtle::NoBarrier_Load(&g_native_tls_key)); |
| PlatformThreadLocalStorage::SetTLSValue(key, kUninitialized); |
| return; |
| } |
| |
| DCHECK(tls_data); |
| // Some allocators, such as TCMalloc, use TLS. As a result, when a thread |
| // terminates, one of the destructor calls we make may be to shut down an |
| // allocator. We have to be careful that after we've shutdown all of the known |
| // destructors (perchance including an allocator), that we don't call the |
| // allocator and cause it to resurrect itself (with no possibly destructor |
| // call to follow). We handle this problem as follows: Switch to using a stack |
| // allocated vector, so that we don't have dependence on our allocator after |
| // we have called all g_tls_metadata destructors. (i.e., don't even call |
| // delete[] after we're done with destructors.) |
| TlsVectorEntry stack_allocated_tls_data[kThreadLocalStorageSize]; |
| memcpy(stack_allocated_tls_data, tls_data, |
| sizeof(stack_allocated_tls_data)); |
| // Ensure that any re-entrant calls change the temp version. |
| PlatformThreadLocalStorage::TLSKey key = |
| reinterpret_cast<PlatformThreadLocalStorage::TLSKey>( |
| base::subtle::NoBarrier_Load(&g_native_tls_key)); |
| PlatformThreadLocalStorage::SetTLSValue(key, stack_allocated_tls_data); |
| delete[] tls_data; // Our last dependence on an allocator. |
| |
| // Snapshot the TLS Metadata so we don't have to lock on every access. |
| TlsMetadata tls_metadata[kThreadLocalStorageSize]; |
| { |
| base::AutoLock auto_lock(*GetTLSMetadataLock()); |
| memcpy(tls_metadata, g_tls_metadata, sizeof(g_tls_metadata)); |
| } |
| |
| int remaining_attempts = kMaxDestructorIterations; |
| bool need_to_scan_destructors = true; |
| while (need_to_scan_destructors) { |
| need_to_scan_destructors = false; |
| // Try to destroy the first-created-slot (which is slot 1) in our last |
| // destructor call. That user was able to function, and define a slot with |
| // no other services running, so perhaps it is a basic service (like an |
| // allocator) and should also be destroyed last. If we get the order wrong, |
| // then we'll iterate several more times, so it is really not that critical |
| // (but it might help). |
| for (int slot = 0; slot < kThreadLocalStorageSize ; ++slot) { |
| void* tls_value = stack_allocated_tls_data[slot].data; |
| if (!tls_value || tls_metadata[slot].status == TlsStatus::FREE || |
| stack_allocated_tls_data[slot].version != tls_metadata[slot].version) |
| continue; |
| |
| base::ThreadLocalStorage::TLSDestructorFunc destructor = |
| tls_metadata[slot].destructor; |
| if (!destructor) |
| continue; |
| stack_allocated_tls_data[slot].data = nullptr; // pre-clear the slot. |
| destructor(tls_value); |
| // Any destructor might have called a different service, which then set a |
| // different slot to a non-null value. Hence we need to check the whole |
| // vector again. This is a pthread standard. |
| need_to_scan_destructors = true; |
| } |
| if (--remaining_attempts <= 0) { |
| NOTREACHED(); // Destructors might not have been called. |
| break; |
| } |
| } |
| |
| // Remove our stack allocated vector. |
| PlatformThreadLocalStorage::SetTLSValue(key, kDestroyed); |
| } |
| |
| } // namespace |
| |
| namespace base { |
| |
| namespace internal { |
| |
| #if defined(STARBOARD) |
| void PlatformThreadLocalStorage::OnThreadExit(void* value) { |
| OnThreadExitInternal(static_cast<TlsVectorEntry*>(value)); |
| } |
| #else |
| #if defined(OS_WIN) |
| void PlatformThreadLocalStorage::OnThreadExit() { |
| PlatformThreadLocalStorage::TLSKey key = |
| base::subtle::NoBarrier_Load(&g_native_tls_key); |
| if (key == PlatformThreadLocalStorage::TLS_KEY_OUT_OF_INDEXES) |
| return; |
| void *tls_data = GetTLSValue(key); |
| |
| // On Windows, thread destruction callbacks are only invoked once per module, |
| // so there should be no way that this could be invoked twice. |
| DCHECK_NE(tls_data, kDestroyed); |
| |
| // Maybe we have never initialized TLS for this thread. |
| if (tls_data == kUninitialized) |
| return; |
| OnThreadExitInternal(static_cast<TlsVectorEntry*>(tls_data)); |
| } |
| #elif defined(OS_POSIX) || defined(OS_FUCHSIA) |
| void PlatformThreadLocalStorage::OnThreadExit(void* value) { |
| OnThreadExitInternal(static_cast<TlsVectorEntry*>(value)); |
| } |
| #endif // defined(OS_WIN) |
| #endif |
| } // namespace internal |
| |
| bool ThreadLocalStorage::HasBeenDestroyed() { |
| PlatformThreadLocalStorage::TLSKey key = |
| reinterpret_cast<PlatformThreadLocalStorage::TLSKey>( |
| base::subtle::NoBarrier_Load(&g_native_tls_key)); |
| if (key == PlatformThreadLocalStorage::TLS_KEY_OUT_OF_INDEXES) |
| return false; |
| return PlatformThreadLocalStorage::GetTLSValue(key) == kDestroyed; |
| } |
| |
| void ThreadLocalStorage::Slot::Initialize(TLSDestructorFunc destructor) { |
| PlatformThreadLocalStorage::TLSKey key = |
| reinterpret_cast<PlatformThreadLocalStorage::TLSKey>( |
| base::subtle::NoBarrier_Load(&g_native_tls_key)); |
| if (key == PlatformThreadLocalStorage::TLS_KEY_OUT_OF_INDEXES || |
| PlatformThreadLocalStorage::GetTLSValue(key) == kUninitialized) { |
| ConstructTlsVector(); |
| } |
| |
| // Grab a new slot. |
| { |
| base::AutoLock auto_lock(*GetTLSMetadataLock()); |
| for (int i = 0; i < kThreadLocalStorageSize; ++i) { |
| // Tracking the last assigned slot is an attempt to find the next |
| // available slot within one iteration. Under normal usage, slots remain |
| // in use for the lifetime of the process (otherwise before we reclaimed |
| // slots, we would have run out of slots). This makes it highly likely the |
| // next slot is going to be a free slot. |
| size_t slot_candidate = |
| (g_last_assigned_slot + 1 + i) % kThreadLocalStorageSize; |
| if (g_tls_metadata[slot_candidate].status == TlsStatus::FREE) { |
| g_tls_metadata[slot_candidate].status = TlsStatus::IN_USE; |
| g_tls_metadata[slot_candidate].destructor = destructor; |
| g_last_assigned_slot = slot_candidate; |
| DCHECK_EQ(kInvalidSlotValue, slot_); |
| slot_ = slot_candidate; |
| version_ = g_tls_metadata[slot_candidate].version; |
| break; |
| } |
| } |
| } |
| CHECK_NE(slot_, kInvalidSlotValue); |
| CHECK_LT(slot_, kThreadLocalStorageSize); |
| } |
| |
| void ThreadLocalStorage::Slot::Free() { |
| DCHECK_NE(slot_, kInvalidSlotValue); |
| DCHECK_LT(slot_, kThreadLocalStorageSize); |
| { |
| base::AutoLock auto_lock(*GetTLSMetadataLock()); |
| g_tls_metadata[slot_].status = TlsStatus::FREE; |
| g_tls_metadata[slot_].destructor = nullptr; |
| ++(g_tls_metadata[slot_].version); |
| } |
| slot_ = kInvalidSlotValue; |
| } |
| |
| void* ThreadLocalStorage::Slot::Get() const { |
| TlsVectorEntry* tls_data = static_cast<TlsVectorEntry*>( |
| PlatformThreadLocalStorage::GetTLSValue( |
| reinterpret_cast<PlatformThreadLocalStorage::TLSKey>( |
| base::subtle::NoBarrier_Load(&g_native_tls_key)))); |
| DCHECK_NE(tls_data, kDestroyed); |
| if (!tls_data) |
| return nullptr; |
| DCHECK_NE(slot_, kInvalidSlotValue); |
| DCHECK_LT(slot_, kThreadLocalStorageSize); |
| // Version mismatches means this slot was previously freed. |
| if (tls_data[slot_].version != version_) |
| return nullptr; |
| return tls_data[slot_].data; |
| } |
| |
| void ThreadLocalStorage::Slot::Set(void* value) { |
| TlsVectorEntry* tls_data = static_cast<TlsVectorEntry*>( |
| PlatformThreadLocalStorage::GetTLSValue( |
| reinterpret_cast<PlatformThreadLocalStorage::TLSKey>( |
| base::subtle::NoBarrier_Load(&g_native_tls_key)))); |
| DCHECK_NE(tls_data, kDestroyed); |
| if (!tls_data) |
| tls_data = ConstructTlsVector(); |
| DCHECK_NE(slot_, kInvalidSlotValue); |
| DCHECK_LT(slot_, kThreadLocalStorageSize); |
| tls_data[slot_].data = value; |
| tls_data[slot_].version = version_; |
| } |
| |
| ThreadLocalStorage::Slot::Slot(TLSDestructorFunc destructor) { |
| Initialize(destructor); |
| } |
| |
| ThreadLocalStorage::Slot::~Slot() { |
| Free(); |
| } |
| |
| } // namespace base |