third_party/perfetto/include/perfetto/ext/base/flat_hash_map.h - cobalt - Git at Google

 /*
  * Copyright (C) 2021 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 #ifndef INCLUDE_PERFETTO_EXT_BASE_FLAT_HASH_MAP_H_
 #define INCLUDE_PERFETTO_EXT_BASE_FLAT_HASH_MAP_H_

 #include "perfetto/base/compiler.h"
 #include "perfetto/base/logging.h"
 #include "perfetto/ext/base/hash.h"
 #include "perfetto/ext/base/utils.h"

 #include <algorithm>
 #include <functional>
 #include <limits>

 namespace perfetto {
 namespace base {

 // An open-addressing hashmap implementation.
 // Pointers are not stable, neither for keys nor values.
 // Has similar performances of a RobinHood hash (without the complications)
 // and 2x an unordered map.
 // Doc: http://go/perfetto-hashtables .
 //
 // When used to implement a string pool in TraceProcessor, the performance
 // characteristics obtained by replaying the set of strings seeen in a 4GB trace
 // (226M strings, 1M unique) are the following (see flat_hash_map_benchmark.cc):
 // This(Linear+AppendOnly)    879,383,676 ns    258.013M insertions/s
 // This(LinearProbe):         909,206,047 ns    249.546M insertions/s
 // This(QuadraticProbe):    1,083,844,388 ns    209.363M insertions/s
 // std::unordered_map:      6,203,351,870 ns    36.5811M insertions/s
 // tsl::robin_map:            931,403,397 ns    243.622M insertions/s
 // absl::flat_hash_map:       998,013,459 ns    227.379M insertions/s
 // FollyF14FastMap:         1,181,480,602 ns    192.074M insertions/s

 // The structs below define the probing algorithm used to probe slots upon a
 // collision. They are guaranteed to visit all slots as our table size is always
 // a power of two (see https://en.wikipedia.org/wiki/Quadratic_probing).

 // Linear probing can be faster if the hashing is well distributed and the load
 // is not high. For TraceProcessor's StringPool this is the fastest. It can
 // degenerate badly if the hashing doesn't spread (e.g., if using directly pids
 // as keys, with a no-op hashing function).
 struct LinearProbe {
   static inline size_t Calc(size_t key_hash, size_t step, size_t capacity) {
     return (key_hash + step) & (capacity - 1);  // Linear probe
   }
 };

 // Generates the sequence: 0, 3, 10, 21, 36, 55, ...
 // Can be a bit (~5%) slower than LinearProbe because it's less cache hot, but
 // avoids degenerating badly if the hash function is bad and causes clusters.
 // A good default choice unless benchmarks prove otherwise.
 struct QuadraticProbe {
   static inline size_t Calc(size_t key_hash, size_t step, size_t capacity) {
     return (key_hash + 2 * step * step + step) & (capacity - 1);
   }
 };

 // Tends to perform in the middle between linear and quadratic.
 // It's a bit more cache-effective than the QuadraticProbe but can create more
 // clustering if the hash function doesn't spread well.
 // Generates the sequence: 0, 1, 3, 6, 10, 15, 21, ...
 struct QuadraticHalfProbe {
   static inline size_t Calc(size_t key_hash, size_t step, size_t capacity) {
     return (key_hash + (step * step + step) / 2) & (capacity - 1);
   }
 };

 template <typename Key,
           typename Value,
           typename Hasher = base::Hash<Key>,
           typename Probe = QuadraticProbe,
           bool AppendOnly = false>
 class FlatHashMap {
  public:
   class Iterator {
    public:
     explicit Iterator(const FlatHashMap* map) : map_(map) { FindNextNonFree(); }
     ~Iterator() = default;
     Iterator(const Iterator&) = default;
     Iterator& operator=(const Iterator&) = default;
     Iterator(Iterator&&) noexcept = default;
     Iterator& operator=(Iterator&&) noexcept = default;

     Key& key() { return map_->keys_[idx_]; }
     Value& value() { return map_->values_[idx_]; }
     const Key& key() const { return map_->keys_[idx_]; }
     const Value& value() const { return map_->values_[idx_]; }

     explicit operator bool() const { return idx_ != kEnd; }
     Iterator& operator++() {
       PERFETTO_DCHECK(idx_ < map_->capacity_);
       ++idx_;
       FindNextNonFree();
       return *this;
     }

    private:
     static constexpr size_t kEnd = std::numeric_limits<size_t>::max();

     void FindNextNonFree() {
       const auto& tags = map_->tags_;
       for (; idx_ < map_->capacity_; idx_++) {
         if (tags[idx_] != kFreeSlot && (AppendOnly || tags[idx_] != kTombstone))
           return;
       }
       idx_ = kEnd;
     }

     const FlatHashMap* map_ = nullptr;
     size_t idx_ = 0;
   };  // Iterator

   static constexpr int kDefaultLoadLimitPct = 75;
   explicit FlatHashMap(size_t initial_capacity = 0,
                        int load_limit_pct = kDefaultLoadLimitPct)
       : load_limit_percent_(load_limit_pct) {
     if (initial_capacity > 0)
       Reset(initial_capacity);
   }

   // We are calling Clear() so that the destructors for the inserted entries are
   // called (unless they are trivial, in which case it will be a no-op).
   ~FlatHashMap() { Clear(); }

   FlatHashMap(FlatHashMap&& other) noexcept {
     tags_ = std::move(other.tags_);
     keys_ = std::move(other.keys_);
     values_ = std::move(other.values_);
     capacity_ = other.capacity_;
     size_ = other.size_;
     max_probe_length_ = other.max_probe_length_;
     load_limit_ = other.load_limit_;
     load_limit_percent_ = other.load_limit_percent_;

     new (&other) FlatHashMap();
   }

   FlatHashMap& operator=(FlatHashMap&& other) noexcept {
     this->~FlatHashMap();
     new (this) FlatHashMap(std::move(other));
     return *this;
   }

   FlatHashMap(const FlatHashMap&) = delete;
   FlatHashMap& operator=(const FlatHashMap&) = delete;

   std::pair<Value*, bool> Insert(Key key, Value value) {
     const size_t key_hash = Hasher{}(key);
     const uint8_t tag = HashToTag(key_hash);
     static constexpr size_t kSlotNotFound = std::numeric_limits<size_t>::max();

     // This for loop does in reality at most two attempts:
     // The first iteration either:
     //  - Early-returns, because the key exists already,
     //  - Finds an insertion slot and proceeds because the load is < limit.
     // The second iteration is only hit in the unlikely case of this insertion
     // bringing the table beyond the target |load_limit_| (or the edge case
     // of the HT being full, if |load_limit_pct_| = 100).
     // We cannot simply pre-grow the table before insertion, because we must
     // guarantee that calling Insert() with a key that already exists doesn't
     // invalidate iterators.
     size_t insertion_slot;
     size_t probe_len;
     for (;;) {
       PERFETTO_DCHECK((capacity_ & (capacity_ - 1)) == 0);  // Must be a pow2.
       insertion_slot = kSlotNotFound;
       // Start the iteration at the desired slot (key_hash % capacity_)
       // searching either for a free slot or a tombstone. In the worst case we
       // might end up scanning the whole array of slots. The Probe functions are
       // guaranteed to visit all the slots within |capacity_| steps. If we find
       // a free slot, we can stop the search immediately (a free slot acts as an
       // "end of chain for entries having the same hash". If we find a
       // tombstones (a deleted slot) we remember its position, but have to keep
       // searching until a free slot to make sure we don't insert a duplicate
       // key.
       for (probe_len = 0; probe_len < capacity_;) {
         const size_t idx = Probe::Calc(key_hash, probe_len, capacity_);
         PERFETTO_DCHECK(idx < capacity_);
         const uint8_t tag_idx = tags_[idx];
         ++probe_len;
         if (tag_idx == kFreeSlot) {
           // Rationale for "insertion_slot == kSlotNotFound": if we encountered
           // a tombstone while iterating we should reuse that rather than
           // taking another slot.
           if (AppendOnly || insertion_slot == kSlotNotFound)
             insertion_slot = idx;
           break;
         }
         // We should never encounter tombstones in AppendOnly mode.
         PERFETTO_DCHECK(!(tag_idx == kTombstone && AppendOnly));
         if (!AppendOnly && tag_idx == kTombstone) {
           insertion_slot = idx;
           continue;
         }
         if (tag_idx == tag && keys_[idx] == key) {
           // The key is already in the map.
           return std::make_pair(&values_[idx], false);
         }
       }  // for (idx)

       // If we got to this point the key does not exist (otherwise we would have
       // hit the return above) and we are going to insert a new entry.
       // Before doing so, ensure we stay under the target load limit.
       if (PERFETTO_UNLIKELY(size_ >= load_limit_)) {
         MaybeGrowAndRehash(/*grow=*/true);
         continue;
       }
       PERFETTO_DCHECK(insertion_slot != kSlotNotFound);
       break;
     }  // for (attempt)

     PERFETTO_CHECK(insertion_slot < capacity_);

     // We found a free slot (or a tombstone). Proceed with the insertion.
     Value* value_idx = &values_[insertion_slot];
     new (&keys_[insertion_slot]) Key(std::move(key));
     new (value_idx) Value(std::move(value));
     tags_[insertion_slot] = tag;
     PERFETTO_DCHECK(probe_len > 0 && probe_len <= capacity_);
     max_probe_length_ = std::max(max_probe_length_, probe_len);
     size_++;

     return std::make_pair(value_idx, true);
   }

   Value* Find(const Key& key) const {
     const size_t idx = FindInternal(key);
     if (idx == kNotFound)
       return nullptr;
     return &values_[idx];
   }

   bool Erase(const Key& key) {
     if (AppendOnly)
       PERFETTO_FATAL("Erase() not supported because AppendOnly=true");
     size_t idx = FindInternal(key);
     if (idx == kNotFound)
       return false;
     EraseInternal(idx);
     return true;
   }

   void Clear() {
     // Avoid trivial heap operations on zero-capacity std::move()-d objects.
     if (PERFETTO_UNLIKELY(capacity_ == 0))
       return;

     for (size_t i = 0; i < capacity_; ++i) {
       const uint8_t tag = tags_[i];
       if (tag != kFreeSlot && tag != kTombstone)
         EraseInternal(i);
     }
     // Clear all tombstones. We really need to do this for AppendOnly.
     MaybeGrowAndRehash(/*grow=*/false);
   }

   Value& operator[](Key key) {
     auto it_and_inserted = Insert(std::move(key), Value{});
     return *it_and_inserted.first;
   }

   Iterator GetIterator() { return Iterator(this); }
   const Iterator GetIterator() const { return Iterator(this); }

   size_t size() const { return size_; }
   size_t capacity() const { return capacity_; }

   // "protected" here is only for the flat_hash_map_benchmark.cc. Everything
   // below is by all means private.
  protected:
   enum ReservedTags : uint8_t { kFreeSlot = 0, kTombstone = 1 };
   static constexpr size_t kNotFound = std::numeric_limits<size_t>::max();

   size_t FindInternal(const Key& key) const {
     const size_t key_hash = Hasher{}(key);
     const uint8_t tag = HashToTag(key_hash);
     PERFETTO_DCHECK((capacity_ & (capacity_ - 1)) == 0);  // Must be a pow2.
     PERFETTO_DCHECK(max_probe_length_ <= capacity_);
     for (size_t i = 0; i < max_probe_length_; ++i) {
       const size_t idx = Probe::Calc(key_hash, i, capacity_);
       const uint8_t tag_idx = tags_[idx];

       if (tag_idx == kFreeSlot)
         return kNotFound;
       // HashToTag() never returns kTombstone, so the tag-check below cannot
       // possibly match. Also we just want to skip tombstones.
       if (tag_idx == tag && keys_[idx] == key) {
         PERFETTO_DCHECK(tag_idx > kTombstone);
         return idx;
       }
     }  // for (idx)
     return kNotFound;
   }

   void EraseInternal(size_t idx) {
     PERFETTO_DCHECK(tags_[idx] > kTombstone);
     PERFETTO_DCHECK(size_ > 0);
     tags_[idx] = kTombstone;
     keys_[idx].~Key();
     values_[idx].~Value();
     size_--;
   }

   PERFETTO_NO_INLINE void MaybeGrowAndRehash(bool grow) {
     PERFETTO_DCHECK(size_ <= capacity_);
     const size_t old_capacity = capacity_;

     // Grow quickly up to 1MB, then chill.
     const size_t old_size_bytes = old_capacity * (sizeof(Key) + sizeof(Value));
     const size_t grow_factor = old_size_bytes < (1024u * 1024u) ? 8 : 2;
     const size_t new_capacity =
         grow ? std::max(old_capacity * grow_factor, size_t(1024))
              : old_capacity;

     auto old_tags(std::move(tags_));
     auto old_keys(std::move(keys_));
     auto old_values(std::move(values_));
     size_t old_size = size_;

     // This must be a CHECK (i.e. not just a DCHECK) to prevent UAF attacks on
     // 32-bit archs that try to double the size of the table until wrapping.
     PERFETTO_CHECK(new_capacity >= old_capacity);
     Reset(new_capacity);

     size_t new_size = 0;  // Recompute the size.
     for (size_t i = 0; i < old_capacity; ++i) {
       const uint8_t old_tag = old_tags[i];
       if (old_tag != kFreeSlot && old_tag != kTombstone) {
         Insert(std::move(old_keys[i]), std::move(old_values[i]));
         old_keys[i].~Key();  // Destroy the old objects.
         old_values[i].~Value();
         new_size++;
       }
     }
     PERFETTO_DCHECK(new_size == old_size);
     size_ = new_size;
   }

   // Doesn't call destructors. Use Clear() for that.
   PERFETTO_NO_INLINE void Reset(size_t n) {
     PERFETTO_DCHECK((n & (n - 1)) == 0);  // Must be a pow2.

     capacity_ = n;
     max_probe_length_ = 0;
     size_ = 0;
     load_limit_ = n * static_cast<size_t>(load_limit_percent_) / 100;
     load_limit_ = std::min(load_limit_, n);

     tags_.reset(new uint8_t[n]);
     memset(&tags_[0], 0, n);                  // Clear all tags.
     keys_ = AlignedAllocTyped<Key[]>(n);      // Deliberately not 0-initialized.
     values_ = AlignedAllocTyped<Value[]>(n);  // Deliberately not 0-initialized.
   }

   static inline uint8_t HashToTag(size_t full_hash) {
     uint8_t tag = full_hash >> (sizeof(full_hash) * 8 - 8);
     // Ensure the hash is always >= 2. We use 0, 1 for kFreeSlot and kTombstone.
     tag += (tag <= kTombstone) << 1;
     PERFETTO_DCHECK(tag > kTombstone);
     return tag;
   }

   size_t capacity_ = 0;
   size_t size_ = 0;
   size_t max_probe_length_ = 0;
   size_t load_limit_ = 0;  // Updated every time |capacity_| changes.
   int load_limit_percent_ =
       kDefaultLoadLimitPct;  // Load factor limit in % of |capacity_|.

   // These arrays have always the |capacity_| elements.
   // Note: AlignedUniquePtr just allocates memory, doesn't invoke any ctor/dtor.
   std::unique_ptr<uint8_t[]> tags_;
   AlignedUniquePtr<Key[]> keys_;
   AlignedUniquePtr<Value[]> values_;
 };

 }  // namespace base
 }  // namespace perfetto

 #endif  // INCLUDE_PERFETTO_EXT_BASE_FLAT_HASH_MAP_H_
	/*
	* Copyright (C) 2021 The Android Open Source Project
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	#ifndef INCLUDE_PERFETTO_EXT_BASE_FLAT_HASH_MAP_H_
	#define INCLUDE_PERFETTO_EXT_BASE_FLAT_HASH_MAP_H_

	#include "perfetto/base/compiler.h"
	#include "perfetto/base/logging.h"
	#include "perfetto/ext/base/hash.h"
	#include "perfetto/ext/base/utils.h"

	#include <algorithm>
	#include <functional>
	#include <limits>

	namespace perfetto {
	namespace base {

	// An open-addressing hashmap implementation.
	// Pointers are not stable, neither for keys nor values.
	// Has similar performances of a RobinHood hash (without the complications)
	// and 2x an unordered map.
	// Doc: http://go/perfetto-hashtables .
	//
	// When used to implement a string pool in TraceProcessor, the performance
	// characteristics obtained by replaying the set of strings seeen in a 4GB trace
	// (226M strings, 1M unique) are the following (see flat_hash_map_benchmark.cc):
	// This(Linear+AppendOnly) 879,383,676 ns 258.013M insertions/s
	// This(LinearProbe): 909,206,047 ns 249.546M insertions/s
	// This(QuadraticProbe): 1,083,844,388 ns 209.363M insertions/s
	// std::unordered_map: 6,203,351,870 ns 36.5811M insertions/s
	// tsl::robin_map: 931,403,397 ns 243.622M insertions/s
	// absl::flat_hash_map: 998,013,459 ns 227.379M insertions/s
	// FollyF14FastMap: 1,181,480,602 ns 192.074M insertions/s

	// The structs below define the probing algorithm used to probe slots upon a
	// collision. They are guaranteed to visit all slots as our table size is always
	// a power of two (see https://en.wikipedia.org/wiki/Quadratic_probing).

	// Linear probing can be faster if the hashing is well distributed and the load
	// is not high. For TraceProcessor's StringPool this is the fastest. It can
	// degenerate badly if the hashing doesn't spread (e.g., if using directly pids
	// as keys, with a no-op hashing function).
	struct LinearProbe {
	static inline size_t Calc(size_t key_hash, size_t step, size_t capacity) {
	return (key_hash + step) & (capacity - 1); // Linear probe
	}
	};

	// Generates the sequence: 0, 3, 10, 21, 36, 55, ...
	// Can be a bit (~5%) slower than LinearProbe because it's less cache hot, but
	// avoids degenerating badly if the hash function is bad and causes clusters.
	// A good default choice unless benchmarks prove otherwise.
	struct QuadraticProbe {
	static inline size_t Calc(size_t key_hash, size_t step, size_t capacity) {
	return (key_hash + 2 * step * step + step) & (capacity - 1);
	}
	};

	// Tends to perform in the middle between linear and quadratic.
	// It's a bit more cache-effective than the QuadraticProbe but can create more
	// clustering if the hash function doesn't spread well.
	// Generates the sequence: 0, 1, 3, 6, 10, 15, 21, ...
	struct QuadraticHalfProbe {
	static inline size_t Calc(size_t key_hash, size_t step, size_t capacity) {
	return (key_hash + (step * step + step) / 2) & (capacity - 1);
	}
	};

	template <typename Key,
	typename Value,
	typename Hasher = base::Hash<Key>,
	typename Probe = QuadraticProbe,
	bool AppendOnly = false>
	class FlatHashMap {
	public:
	class Iterator {
	public:
	explicit Iterator(const FlatHashMap* map) : map_(map) { FindNextNonFree(); }
	~Iterator() = default;
	Iterator(const Iterator&) = default;
	Iterator& operator=(const Iterator&) = default;
	Iterator(Iterator&&) noexcept = default;
	Iterator& operator=(Iterator&&) noexcept = default;

	Key& key() { return map_->keys_[idx_]; }
	Value& value() { return map_->values_[idx_]; }
	const Key& key() const { return map_->keys_[idx_]; }
	const Value& value() const { return map_->values_[idx_]; }

	explicit operator bool() const { return idx_ != kEnd; }
	Iterator& operator++() {
	PERFETTO_DCHECK(idx_ < map_->capacity_);
	++idx_;
	FindNextNonFree();
	return *this;
	}

	private:
	static constexpr size_t kEnd = std::numeric_limits<size_t>::max();

	void FindNextNonFree() {
	const auto& tags = map_->tags_;
	for (; idx_ < map_->capacity_; idx_++) {
	if (tags[idx_] != kFreeSlot && (AppendOnly \|\| tags[idx_] != kTombstone))
	return;
	}
	idx_ = kEnd;
	}

	const FlatHashMap* map_ = nullptr;
	size_t idx_ = 0;
	}; // Iterator

	static constexpr int kDefaultLoadLimitPct = 75;
	explicit FlatHashMap(size_t initial_capacity = 0,
	int load_limit_pct = kDefaultLoadLimitPct)
	: load_limit_percent_(load_limit_pct) {
	if (initial_capacity > 0)
	Reset(initial_capacity);
	}

	// We are calling Clear() so that the destructors for the inserted entries are
	// called (unless they are trivial, in which case it will be a no-op).
	~FlatHashMap() { Clear(); }

	FlatHashMap(FlatHashMap&& other) noexcept {
	tags_ = std::move(other.tags_);
	keys_ = std::move(other.keys_);
	values_ = std::move(other.values_);
	capacity_ = other.capacity_;
	size_ = other.size_;
	max_probe_length_ = other.max_probe_length_;
	load_limit_ = other.load_limit_;
	load_limit_percent_ = other.load_limit_percent_;

	new (&other) FlatHashMap();
	}

	FlatHashMap& operator=(FlatHashMap&& other) noexcept {
	this->~FlatHashMap();
	new (this) FlatHashMap(std::move(other));
	return *this;
	}

	FlatHashMap(const FlatHashMap&) = delete;
	FlatHashMap& operator=(const FlatHashMap&) = delete;

	std::pair<Value*, bool> Insert(Key key, Value value) {
	const size_t key_hash = Hasher{}(key);
	const uint8_t tag = HashToTag(key_hash);
	static constexpr size_t kSlotNotFound = std::numeric_limits<size_t>::max();

	// This for loop does in reality at most two attempts:
	// The first iteration either:
	// - Early-returns, because the key exists already,
	// - Finds an insertion slot and proceeds because the load is < limit.
	// The second iteration is only hit in the unlikely case of this insertion
	// bringing the table beyond the target \|load_limit_\| (or the edge case
	// of the HT being full, if \|load_limit_pct_\| = 100).
	// We cannot simply pre-grow the table before insertion, because we must
	// guarantee that calling Insert() with a key that already exists doesn't
	// invalidate iterators.
	size_t insertion_slot;
	size_t probe_len;
	for (;;) {
	PERFETTO_DCHECK((capacity_ & (capacity_ - 1)) == 0); // Must be a pow2.
	insertion_slot = kSlotNotFound;
	// Start the iteration at the desired slot (key_hash % capacity_)
	// searching either for a free slot or a tombstone. In the worst case we
	// might end up scanning the whole array of slots. The Probe functions are
	// guaranteed to visit all the slots within \|capacity_\| steps. If we find
	// a free slot, we can stop the search immediately (a free slot acts as an
	// "end of chain for entries having the same hash". If we find a
	// tombstones (a deleted slot) we remember its position, but have to keep
	// searching until a free slot to make sure we don't insert a duplicate
	// key.
	for (probe_len = 0; probe_len < capacity_;) {
	const size_t idx = Probe::Calc(key_hash, probe_len, capacity_);
	PERFETTO_DCHECK(idx < capacity_);
	const uint8_t tag_idx = tags_[idx];
	++probe_len;
	if (tag_idx == kFreeSlot) {
	// Rationale for "insertion_slot == kSlotNotFound": if we encountered
	// a tombstone while iterating we should reuse that rather than
	// taking another slot.
	if (AppendOnly \|\| insertion_slot == kSlotNotFound)
	insertion_slot = idx;
	break;
	}
	// We should never encounter tombstones in AppendOnly mode.
	PERFETTO_DCHECK(!(tag_idx == kTombstone && AppendOnly));
	if (!AppendOnly && tag_idx == kTombstone) {
	insertion_slot = idx;
	continue;
	}
	if (tag_idx == tag && keys_[idx] == key) {
	// The key is already in the map.
	return std::make_pair(&values_[idx], false);
	}
	} // for (idx)

	// If we got to this point the key does not exist (otherwise we would have
	// hit the return above) and we are going to insert a new entry.
	// Before doing so, ensure we stay under the target load limit.
	if (PERFETTO_UNLIKELY(size_ >= load_limit_)) {
	MaybeGrowAndRehash(/grow=/true);
	continue;
	}
	PERFETTO_DCHECK(insertion_slot != kSlotNotFound);
	break;
	} // for (attempt)

	PERFETTO_CHECK(insertion_slot < capacity_);

	// We found a free slot (or a tombstone). Proceed with the insertion.
	Value* value_idx = &values_[insertion_slot];
	new (&keys_[insertion_slot]) Key(std::move(key));
	new (value_idx) Value(std::move(value));
	tags_[insertion_slot] = tag;
	PERFETTO_DCHECK(probe_len > 0 && probe_len <= capacity_);
	max_probe_length_ = std::max(max_probe_length_, probe_len);
	size_++;

	return std::make_pair(value_idx, true);
	}

	Value* Find(const Key& key) const {
	const size_t idx = FindInternal(key);
	if (idx == kNotFound)
	return nullptr;
	return &values_[idx];
	}

	bool Erase(const Key& key) {
	if (AppendOnly)
	PERFETTO_FATAL("Erase() not supported because AppendOnly=true");
	size_t idx = FindInternal(key);
	if (idx == kNotFound)
	return false;
	EraseInternal(idx);
	return true;
	}

	void Clear() {
	// Avoid trivial heap operations on zero-capacity std::move()-d objects.
	if (PERFETTO_UNLIKELY(capacity_ == 0))
	return;

	for (size_t i = 0; i < capacity_; ++i) {
	const uint8_t tag = tags_[i];
	if (tag != kFreeSlot && tag != kTombstone)
	EraseInternal(i);
	}
	// Clear all tombstones. We really need to do this for AppendOnly.
	MaybeGrowAndRehash(/grow=/false);
	}

	Value& operator[](Key key) {
	auto it_and_inserted = Insert(std::move(key), Value{});
	return *it_and_inserted.first;
	}

	Iterator GetIterator() { return Iterator(this); }
	const Iterator GetIterator() const { return Iterator(this); }

	size_t size() const { return size_; }
	size_t capacity() const { return capacity_; }

	// "protected" here is only for the flat_hash_map_benchmark.cc. Everything
	// below is by all means private.
	protected:
	enum ReservedTags : uint8_t { kFreeSlot = 0, kTombstone = 1 };
	static constexpr size_t kNotFound = std::numeric_limits<size_t>::max();

	size_t FindInternal(const Key& key) const {
	const size_t key_hash = Hasher{}(key);
	const uint8_t tag = HashToTag(key_hash);
	PERFETTO_DCHECK((capacity_ & (capacity_ - 1)) == 0); // Must be a pow2.
	PERFETTO_DCHECK(max_probe_length_ <= capacity_);
	for (size_t i = 0; i < max_probe_length_; ++i) {
	const size_t idx = Probe::Calc(key_hash, i, capacity_);
	const uint8_t tag_idx = tags_[idx];

	if (tag_idx == kFreeSlot)
	return kNotFound;
	// HashToTag() never returns kTombstone, so the tag-check below cannot
	// possibly match. Also we just want to skip tombstones.
	if (tag_idx == tag && keys_[idx] == key) {
	PERFETTO_DCHECK(tag_idx > kTombstone);
	return idx;
	}
	} // for (idx)
	return kNotFound;
	}

	void EraseInternal(size_t idx) {
	PERFETTO_DCHECK(tags_[idx] > kTombstone);
	PERFETTO_DCHECK(size_ > 0);
	tags_[idx] = kTombstone;
	keys_[idx].~Key();
	values_[idx].~Value();
	size_--;
	}

	PERFETTO_NO_INLINE void MaybeGrowAndRehash(bool grow) {
	PERFETTO_DCHECK(size_ <= capacity_);
	const size_t old_capacity = capacity_;

	// Grow quickly up to 1MB, then chill.
	const size_t old_size_bytes = old_capacity * (sizeof(Key) + sizeof(Value));
	const size_t grow_factor = old_size_bytes < (1024u * 1024u) ? 8 : 2;
	const size_t new_capacity =
	grow ? std::max(old_capacity * grow_factor, size_t(1024))
	: old_capacity;

	auto old_tags(std::move(tags_));
	auto old_keys(std::move(keys_));
	auto old_values(std::move(values_));
	size_t old_size = size_;

	// This must be a CHECK (i.e. not just a DCHECK) to prevent UAF attacks on
	// 32-bit archs that try to double the size of the table until wrapping.
	PERFETTO_CHECK(new_capacity >= old_capacity);
	Reset(new_capacity);

	size_t new_size = 0; // Recompute the size.
	for (size_t i = 0; i < old_capacity; ++i) {
	const uint8_t old_tag = old_tags[i];
	if (old_tag != kFreeSlot && old_tag != kTombstone) {
	Insert(std::move(old_keys[i]), std::move(old_values[i]));
	old_keys[i].~Key(); // Destroy the old objects.
	old_values[i].~Value();
	new_size++;
	}
	}
	PERFETTO_DCHECK(new_size == old_size);
	size_ = new_size;
	}

	// Doesn't call destructors. Use Clear() for that.
	PERFETTO_NO_INLINE void Reset(size_t n) {
	PERFETTO_DCHECK((n & (n - 1)) == 0); // Must be a pow2.

	capacity_ = n;
	max_probe_length_ = 0;
	size_ = 0;
	load_limit_ = n * static_cast<size_t>(load_limit_percent_) / 100;
	load_limit_ = std::min(load_limit_, n);

	tags_.reset(new uint8_t[n]);
	memset(&tags_[0], 0, n); // Clear all tags.
	keys_ = AlignedAllocTyped<Key[]>(n); // Deliberately not 0-initialized.
	values_ = AlignedAllocTyped<Value[]>(n); // Deliberately not 0-initialized.
	}

	static inline uint8_t HashToTag(size_t full_hash) {
	uint8_t tag = full_hash >> (sizeof(full_hash) * 8 - 8);
	// Ensure the hash is always >= 2. We use 0, 1 for kFreeSlot and kTombstone.
	tag += (tag <= kTombstone) << 1;
	PERFETTO_DCHECK(tag > kTombstone);
	return tag;
	}

	size_t capacity_ = 0;
	size_t size_ = 0;
	size_t max_probe_length_ = 0;
	size_t load_limit_ = 0; // Updated every time \|capacity_\| changes.
	int load_limit_percent_ =
	kDefaultLoadLimitPct; // Load factor limit in % of \|capacity_\|.

	// These arrays have always the \|capacity_\| elements.
	// Note: AlignedUniquePtr just allocates memory, doesn't invoke any ctor/dtor.
	std::unique_ptr<uint8_t[]> tags_;
	AlignedUniquePtr<Key[]> keys_;
	AlignedUniquePtr<Value[]> values_;
	};

	} // namespace base
	} // namespace perfetto

	#endif // INCLUDE_PERFETTO_EXT_BASE_FLAT_HASH_MAP_H_