blob: 0be192da8f11e69e6abdada3e0c5216091e8776a [file] [log] [blame]
/*
* Copyright (C) 2021 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef INCLUDE_PERFETTO_EXT_BASE_FLAT_HASH_MAP_H_
#define INCLUDE_PERFETTO_EXT_BASE_FLAT_HASH_MAP_H_
#include "perfetto/base/compiler.h"
#include "perfetto/base/logging.h"
#include "perfetto/ext/base/hash.h"
#include "perfetto/ext/base/utils.h"
#include <algorithm>
#include <functional>
#include <limits>
namespace perfetto {
namespace base {
// An open-addressing hashmap implementation.
// Pointers are not stable, neither for keys nor values.
// Has similar performances of a RobinHood hash (without the complications)
// and 2x an unordered map.
// Doc: http://go/perfetto-hashtables .
//
// When used to implement a string pool in TraceProcessor, the performance
// characteristics obtained by replaying the set of strings seeen in a 4GB trace
// (226M strings, 1M unique) are the following (see flat_hash_map_benchmark.cc):
// This(Linear+AppendOnly) 879,383,676 ns 258.013M insertions/s
// This(LinearProbe): 909,206,047 ns 249.546M insertions/s
// This(QuadraticProbe): 1,083,844,388 ns 209.363M insertions/s
// std::unordered_map: 6,203,351,870 ns 36.5811M insertions/s
// tsl::robin_map: 931,403,397 ns 243.622M insertions/s
// absl::flat_hash_map: 998,013,459 ns 227.379M insertions/s
// FollyF14FastMap: 1,181,480,602 ns 192.074M insertions/s
// The structs below define the probing algorithm used to probe slots upon a
// collision. They are guaranteed to visit all slots as our table size is always
// a power of two (see https://en.wikipedia.org/wiki/Quadratic_probing).
// Linear probing can be faster if the hashing is well distributed and the load
// is not high. For TraceProcessor's StringPool this is the fastest. It can
// degenerate badly if the hashing doesn't spread (e.g., if using directly pids
// as keys, with a no-op hashing function).
struct LinearProbe {
static inline size_t Calc(size_t key_hash, size_t step, size_t capacity) {
return (key_hash + step) & (capacity - 1); // Linear probe
}
};
// Generates the sequence: 0, 3, 10, 21, 36, 55, ...
// Can be a bit (~5%) slower than LinearProbe because it's less cache hot, but
// avoids degenerating badly if the hash function is bad and causes clusters.
// A good default choice unless benchmarks prove otherwise.
struct QuadraticProbe {
static inline size_t Calc(size_t key_hash, size_t step, size_t capacity) {
return (key_hash + 2 * step * step + step) & (capacity - 1);
}
};
// Tends to perform in the middle between linear and quadratic.
// It's a bit more cache-effective than the QuadraticProbe but can create more
// clustering if the hash function doesn't spread well.
// Generates the sequence: 0, 1, 3, 6, 10, 15, 21, ...
struct QuadraticHalfProbe {
static inline size_t Calc(size_t key_hash, size_t step, size_t capacity) {
return (key_hash + (step * step + step) / 2) & (capacity - 1);
}
};
template <typename Key,
typename Value,
typename Hasher = base::Hash<Key>,
typename Probe = QuadraticProbe,
bool AppendOnly = false>
class FlatHashMap {
public:
class Iterator {
public:
explicit Iterator(const FlatHashMap* map) : map_(map) { FindNextNonFree(); }
~Iterator() = default;
Iterator(const Iterator&) = default;
Iterator& operator=(const Iterator&) = default;
Iterator(Iterator&&) noexcept = default;
Iterator& operator=(Iterator&&) noexcept = default;
Key& key() { return map_->keys_[idx_]; }
Value& value() { return map_->values_[idx_]; }
const Key& key() const { return map_->keys_[idx_]; }
const Value& value() const { return map_->values_[idx_]; }
explicit operator bool() const { return idx_ != kEnd; }
Iterator& operator++() {
PERFETTO_DCHECK(idx_ < map_->capacity_);
++idx_;
FindNextNonFree();
return *this;
}
private:
static constexpr size_t kEnd = std::numeric_limits<size_t>::max();
void FindNextNonFree() {
const auto& tags = map_->tags_;
for (; idx_ < map_->capacity_; idx_++) {
if (tags[idx_] != kFreeSlot && (AppendOnly || tags[idx_] != kTombstone))
return;
}
idx_ = kEnd;
}
const FlatHashMap* map_ = nullptr;
size_t idx_ = 0;
}; // Iterator
static constexpr int kDefaultLoadLimitPct = 75;
explicit FlatHashMap(size_t initial_capacity = 0,
int load_limit_pct = kDefaultLoadLimitPct)
: load_limit_percent_(load_limit_pct) {
if (initial_capacity > 0)
Reset(initial_capacity);
}
// We are calling Clear() so that the destructors for the inserted entries are
// called (unless they are trivial, in which case it will be a no-op).
~FlatHashMap() { Clear(); }
FlatHashMap(FlatHashMap&& other) noexcept {
tags_ = std::move(other.tags_);
keys_ = std::move(other.keys_);
values_ = std::move(other.values_);
capacity_ = other.capacity_;
size_ = other.size_;
max_probe_length_ = other.max_probe_length_;
load_limit_ = other.load_limit_;
load_limit_percent_ = other.load_limit_percent_;
new (&other) FlatHashMap();
}
FlatHashMap& operator=(FlatHashMap&& other) noexcept {
this->~FlatHashMap();
new (this) FlatHashMap(std::move(other));
return *this;
}
FlatHashMap(const FlatHashMap&) = delete;
FlatHashMap& operator=(const FlatHashMap&) = delete;
std::pair<Value*, bool> Insert(Key key, Value value) {
const size_t key_hash = Hasher{}(key);
const uint8_t tag = HashToTag(key_hash);
static constexpr size_t kSlotNotFound = std::numeric_limits<size_t>::max();
// This for loop does in reality at most two attempts:
// The first iteration either:
// - Early-returns, because the key exists already,
// - Finds an insertion slot and proceeds because the load is < limit.
// The second iteration is only hit in the unlikely case of this insertion
// bringing the table beyond the target |load_limit_| (or the edge case
// of the HT being full, if |load_limit_pct_| = 100).
// We cannot simply pre-grow the table before insertion, because we must
// guarantee that calling Insert() with a key that already exists doesn't
// invalidate iterators.
size_t insertion_slot;
size_t probe_len;
for (;;) {
PERFETTO_DCHECK((capacity_ & (capacity_ - 1)) == 0); // Must be a pow2.
insertion_slot = kSlotNotFound;
// Start the iteration at the desired slot (key_hash % capacity_)
// searching either for a free slot or a tombstone. In the worst case we
// might end up scanning the whole array of slots. The Probe functions are
// guaranteed to visit all the slots within |capacity_| steps. If we find
// a free slot, we can stop the search immediately (a free slot acts as an
// "end of chain for entries having the same hash". If we find a
// tombstones (a deleted slot) we remember its position, but have to keep
// searching until a free slot to make sure we don't insert a duplicate
// key.
for (probe_len = 0; probe_len < capacity_;) {
const size_t idx = Probe::Calc(key_hash, probe_len, capacity_);
PERFETTO_DCHECK(idx < capacity_);
const uint8_t tag_idx = tags_[idx];
++probe_len;
if (tag_idx == kFreeSlot) {
// Rationale for "insertion_slot == kSlotNotFound": if we encountered
// a tombstone while iterating we should reuse that rather than
// taking another slot.
if (AppendOnly || insertion_slot == kSlotNotFound)
insertion_slot = idx;
break;
}
// We should never encounter tombstones in AppendOnly mode.
PERFETTO_DCHECK(!(tag_idx == kTombstone && AppendOnly));
if (!AppendOnly && tag_idx == kTombstone) {
insertion_slot = idx;
continue;
}
if (tag_idx == tag && keys_[idx] == key) {
// The key is already in the map.
return std::make_pair(&values_[idx], false);
}
} // for (idx)
// If we got to this point the key does not exist (otherwise we would have
// hit the return above) and we are going to insert a new entry.
// Before doing so, ensure we stay under the target load limit.
if (PERFETTO_UNLIKELY(size_ >= load_limit_)) {
MaybeGrowAndRehash(/*grow=*/true);
continue;
}
PERFETTO_DCHECK(insertion_slot != kSlotNotFound);
break;
} // for (attempt)
PERFETTO_CHECK(insertion_slot < capacity_);
// We found a free slot (or a tombstone). Proceed with the insertion.
Value* value_idx = &values_[insertion_slot];
new (&keys_[insertion_slot]) Key(std::move(key));
new (value_idx) Value(std::move(value));
tags_[insertion_slot] = tag;
PERFETTO_DCHECK(probe_len > 0 && probe_len <= capacity_);
max_probe_length_ = std::max(max_probe_length_, probe_len);
size_++;
return std::make_pair(value_idx, true);
}
Value* Find(const Key& key) const {
const size_t idx = FindInternal(key);
if (idx == kNotFound)
return nullptr;
return &values_[idx];
}
bool Erase(const Key& key) {
if (AppendOnly)
PERFETTO_FATAL("Erase() not supported because AppendOnly=true");
size_t idx = FindInternal(key);
if (idx == kNotFound)
return false;
EraseInternal(idx);
return true;
}
void Clear() {
// Avoid trivial heap operations on zero-capacity std::move()-d objects.
if (PERFETTO_UNLIKELY(capacity_ == 0))
return;
for (size_t i = 0; i < capacity_; ++i) {
const uint8_t tag = tags_[i];
if (tag != kFreeSlot && tag != kTombstone)
EraseInternal(i);
}
// Clear all tombstones. We really need to do this for AppendOnly.
MaybeGrowAndRehash(/*grow=*/false);
}
Value& operator[](Key key) {
auto it_and_inserted = Insert(std::move(key), Value{});
return *it_and_inserted.first;
}
Iterator GetIterator() { return Iterator(this); }
const Iterator GetIterator() const { return Iterator(this); }
size_t size() const { return size_; }
size_t capacity() const { return capacity_; }
// "protected" here is only for the flat_hash_map_benchmark.cc. Everything
// below is by all means private.
protected:
enum ReservedTags : uint8_t { kFreeSlot = 0, kTombstone = 1 };
static constexpr size_t kNotFound = std::numeric_limits<size_t>::max();
size_t FindInternal(const Key& key) const {
const size_t key_hash = Hasher{}(key);
const uint8_t tag = HashToTag(key_hash);
PERFETTO_DCHECK((capacity_ & (capacity_ - 1)) == 0); // Must be a pow2.
PERFETTO_DCHECK(max_probe_length_ <= capacity_);
for (size_t i = 0; i < max_probe_length_; ++i) {
const size_t idx = Probe::Calc(key_hash, i, capacity_);
const uint8_t tag_idx = tags_[idx];
if (tag_idx == kFreeSlot)
return kNotFound;
// HashToTag() never returns kTombstone, so the tag-check below cannot
// possibly match. Also we just want to skip tombstones.
if (tag_idx == tag && keys_[idx] == key) {
PERFETTO_DCHECK(tag_idx > kTombstone);
return idx;
}
} // for (idx)
return kNotFound;
}
void EraseInternal(size_t idx) {
PERFETTO_DCHECK(tags_[idx] > kTombstone);
PERFETTO_DCHECK(size_ > 0);
tags_[idx] = kTombstone;
keys_[idx].~Key();
values_[idx].~Value();
size_--;
}
PERFETTO_NO_INLINE void MaybeGrowAndRehash(bool grow) {
PERFETTO_DCHECK(size_ <= capacity_);
const size_t old_capacity = capacity_;
// Grow quickly up to 1MB, then chill.
const size_t old_size_bytes = old_capacity * (sizeof(Key) + sizeof(Value));
const size_t grow_factor = old_size_bytes < (1024u * 1024u) ? 8 : 2;
const size_t new_capacity =
grow ? std::max(old_capacity * grow_factor, size_t(1024))
: old_capacity;
auto old_tags(std::move(tags_));
auto old_keys(std::move(keys_));
auto old_values(std::move(values_));
size_t old_size = size_;
// This must be a CHECK (i.e. not just a DCHECK) to prevent UAF attacks on
// 32-bit archs that try to double the size of the table until wrapping.
PERFETTO_CHECK(new_capacity >= old_capacity);
Reset(new_capacity);
size_t new_size = 0; // Recompute the size.
for (size_t i = 0; i < old_capacity; ++i) {
const uint8_t old_tag = old_tags[i];
if (old_tag != kFreeSlot && old_tag != kTombstone) {
Insert(std::move(old_keys[i]), std::move(old_values[i]));
old_keys[i].~Key(); // Destroy the old objects.
old_values[i].~Value();
new_size++;
}
}
PERFETTO_DCHECK(new_size == old_size);
size_ = new_size;
}
// Doesn't call destructors. Use Clear() for that.
PERFETTO_NO_INLINE void Reset(size_t n) {
PERFETTO_DCHECK((n & (n - 1)) == 0); // Must be a pow2.
capacity_ = n;
max_probe_length_ = 0;
size_ = 0;
load_limit_ = n * static_cast<size_t>(load_limit_percent_) / 100;
load_limit_ = std::min(load_limit_, n);
tags_.reset(new uint8_t[n]);
memset(&tags_[0], 0, n); // Clear all tags.
keys_ = AlignedAllocTyped<Key[]>(n); // Deliberately not 0-initialized.
values_ = AlignedAllocTyped<Value[]>(n); // Deliberately not 0-initialized.
}
static inline uint8_t HashToTag(size_t full_hash) {
uint8_t tag = full_hash >> (sizeof(full_hash) * 8 - 8);
// Ensure the hash is always >= 2. We use 0, 1 for kFreeSlot and kTombstone.
tag += (tag <= kTombstone) << 1;
PERFETTO_DCHECK(tag > kTombstone);
return tag;
}
size_t capacity_ = 0;
size_t size_ = 0;
size_t max_probe_length_ = 0;
size_t load_limit_ = 0; // Updated every time |capacity_| changes.
int load_limit_percent_ =
kDefaultLoadLimitPct; // Load factor limit in % of |capacity_|.
// These arrays have always the |capacity_| elements.
// Note: AlignedUniquePtr just allocates memory, doesn't invoke any ctor/dtor.
std::unique_ptr<uint8_t[]> tags_;
AlignedUniquePtr<Key[]> keys_;
AlignedUniquePtr<Value[]> values_;
};
} // namespace base
} // namespace perfetto
#endif // INCLUDE_PERFETTO_EXT_BASE_FLAT_HASH_MAP_H_