blob: 2e04d60bcca7599bc88441a4bcde848e4f7a6fc8 [file] [log] [blame]
// Copyright 2017 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef STARBOARD_COMMON_FLAT_MAP_H_
#define STARBOARD_COMMON_FLAT_MAP_H_
#include <algorithm>
#include <functional>
#include <utility>
#include <vector>
#include "starboard/log.h"
#include "starboard/types.h"
namespace starboard {
namespace flat_map_detail {
// IsPod<> is a white list of common types that are "plain-old-data'.
// Types not registered with IsPod<> will default to non-pod.
// Usage: IsPod<int>::value == true;
// IsPod<std::string>::value == false
// See specializations at the bottom of this file for what has been defined
// as pod.
template <typename T>
struct IsPod {
enum { value = false };
};
} // namespace flat_map_detail.
// FlatMap<key_type, mapped_type> is a sorted vector map of key-value pairs.
// This map has an interface that is largely compatible with std::map<> and
// is designed to be a near drop in replacement for std::map<>.
//
// A main usage difference between FlatMap<> and std::map<> is that FlatMap<>
// will invalidate it's iterators on insert() and erase().
//
// Reasons to use this map include low-level operations which require that
// the map does not allocate memory during runtime (this is achievable by
// invoking FlatMap::reserve()), or to speed up a lot of find() operations
// where the FlatMap() is mutated to very occasionally.
//
// PERFORMANCE
// where n is the number of items in flatmap
// and m is the input size for the operation.
//
// bulk insert | O(m*log(m) + m*log(n) + n+m) (sort input, check dups, merge)
// insert | O(n)
// erase | O(n)
// bulk erase | O(n*m) TODO: Make faster - O(n+m + log(m))
// find | O(log(n))
// clear | O(n)
//
// Performance of FlatMap::find() tends to be about +50% faster than that
// of std::map<> for pod types in optimized builds. However this is balanced
// by the fact that FlatMap::insert() and FlatMap::erase() both operate at
// O(n), where std::map will operate at O(log n) for both operations.
//
// FlatMap<int,int>::find() Performance
// NUMBER OF ELEMENTS | SPEED COMPARSION vs std::map
// -------------------------------------
// 5 | 220.37%
// 10 | 158.602%
// 25 | 87.7049%
// 50 | 91.0873%
// 100 | 96.1367%
// 1000 | 120.588%
// 10000 | 156.969%
// 100000 | 179.55%
//
// When in doubt, use std::map. If you need to use FlatMap<>, then make sure
// that insertion() is done in bulk, and that delete are infrequent, or that
// the maps are small.
//
// Please see unit tests for additional usage.
template <typename Key,
typename Value,
typename LessThan = std::less<Key>,
typename VectorType = std::vector<std::pair<Key, Value> > >
class FlatMap {
public:
// Most typedefs here are to make FlatMap<> compatible with std::map<>.
typedef Key key_type;
typedef Value mapped_type;
typedef LessThan key_compare;
typedef std::pair<key_type, mapped_type> value_type;
typedef typename VectorType::size_type size_type;
typedef typename VectorType::difference_type difference_type;
typedef typename VectorType::iterator iterator;
typedef typename VectorType::const_iterator const_iterator;
FlatMap() {}
FlatMap(const FlatMap& other) : vector_(other.vector_) {}
template <typename ITERATOR>
FlatMap(ITERATOR start, ITERATOR end) {
insert(start, end);
}
void reserve(size_t size) { vector_.reserve(size); }
// Takes the incoming elements and only adds the elements that don't already
// exist in this map.
// Returns the number of elements that were added.
template <typename Iterator>
size_t insert(Iterator begin_it, Iterator end_it) {
const size_t partition_idx = vector_.size();
// PART 1 - Elements are added unsorted into array as a new partition.
//
// Only add elements that don't exist
for (Iterator it = begin_it; it != end_it; ++it) {
// These have to be recomputed every loop iteration because
// vector_.push_back() will invalidate iterators.
const_iterator sorted_begin = vector_.begin();
const_iterator sorted_end = sorted_begin + partition_idx;
const bool already_exists_sorted_part =
exists_in_range(sorted_begin, sorted_end, it->first);
// Okay so it doesn't exist yet so place it in the vector.
if (!already_exists_sorted_part) {
vector_.push_back(*it);
}
}
// No elements added.
if (vector_.size() == partition_idx) {
return 0;
}
iterator unsorted_begin = vector_.begin() + partition_idx;
// std::sort(...) will not maintain the order of values which have
// keys. Therefore InplaceMergeSort(...) is used instead, which has
// the same guarantees as std::stable_sort but with the added addition
// that no memory will be allocated during the operation of
// InplaceMergeSort(...). This is important because when bulk inserting
// elements, the first key-value pair (of duplicate keys) will be the one
// that gets inserted, and the second one is ignored.
InplaceMergeSort(unsorted_begin, vector_.end());
// Corner-case: remove duplicates in the input.
iterator new_end = std::unique(unsorted_begin, vector_.end(), EqualTo);
std::inplace_merge(vector_.begin(), unsorted_begin, new_end, LessThanValue);
vector_.erase(new_end, vector_.end());
return std::distance(vector_.begin() + partition_idx, new_end);
}
std::pair<iterator, bool> insert(const value_type& entry) {
iterator insertion_it =
std::upper_bound(begin(), end(), entry, LessThanValue);
// DUPLICATE CHECK - If the key already exists then return it.
if (insertion_it != begin()) {
// Check for a duplicate value, which will be the preceding value, if
// it exits.
iterator previous_it = (insertion_it - 1);
const key_type& previous_key = previous_it->first;
if (EqualKeyTo(previous_key, entry.first)) {
// Value already exists.
return std::pair<iterator, bool>(previous_it, false);
}
}
iterator inserted_it = vector_.insert(insertion_it, entry);
return std::pair<iterator, bool>(inserted_it, true);
}
iterator find(const key_type& key) {
return find_in_range(vector_.begin(), vector_.end(), key);
}
const_iterator find(const key_type& key) const {
return find_in_range_const(vector_.begin(), vector_.end(), key);
}
bool erase(const key_type& key) {
iterator found_it = find(key);
if (found_it != vector_.end()) {
vector_.erase(found_it); // no resorting necessary.
return true;
} else {
return false;
}
}
void erase(iterator begin_it, iterator end_it) {
vector_.erase(begin_it, end_it);
}
bool empty() const { return vector_.empty(); }
size_t size() const { return vector_.size(); }
iterator begin() { return vector_.begin(); }
const_iterator begin() const { return vector_.begin(); }
const_iterator cbegin() const { return vector_.begin(); }
iterator end() { return vector_.end(); }
const_iterator end() const { return vector_.end(); }
const_iterator cend() const { return vector_.end(); }
void clear() { vector_.clear(); }
size_t count(const key_type& key) const {
if (cend() != find(key)) {
return 1;
} else {
return 0;
}
}
size_type max_size() const { return vector_.max_size(); }
void swap(FlatMap& other) { vector_.swap(other.vector_); }
iterator lower_bound(const key_type& key) {
mapped_type dummy;
value_type key_data(key, dummy); // Second value is ignored.
return std::lower_bound(begin(), end(), key_data, LessThanValue);
}
const_iterator lower_bound(const key_type& key) const {
mapped_type dummy;
value_type key_data(key, dummy); // Second value is ignored.
return std::lower_bound(cbegin(), cend(), key_data, LessThanValue);
}
iterator upper_bound(const key_type& key) {
mapped_type dummy;
value_type key_data(key, dummy); // Second value is ignored.
return std::upper_bound(begin(), end(), key_data, LessThanValue);
}
const_iterator upper_bound(const key_type& key) const {
mapped_type dummy;
value_type key_data(key, dummy); // Second value is ignored.
return std::upper_bound(cbegin(), cend(), key_data, LessThanValue);
}
std::pair<iterator, iterator> equal_range(const key_type& key) {
iterator found = find(key);
if (found == end()) {
return std::pair<iterator, iterator>(end(), end());
}
iterator found_end = found;
++found_end;
return std::pair<iterator, iterator>(found, found_end);
}
std::pair<const_iterator, const_iterator> equal_range(
const key_type& key) const {
const_iterator found = find(key);
if (found == end()) {
return std::pair<const_iterator, const_iterator>(cend(), cend());
}
const_iterator found_end = found;
++found_end;
return std::pair<const_iterator, const_iterator>(found, found_end);
}
key_compare key_comp() const {
key_compare return_value;
return return_value;
}
mapped_type& operator[](const key_type& key) {
std::pair<key_type, mapped_type> entry;
entry.first = key;
std::pair<iterator, bool> result = insert(entry);
iterator found = result.first;
mapped_type& value = found->second;
return value;
}
bool operator==(const FlatMap& other) const {
return vector_ == other.vector_;
}
bool operator!=(const FlatMap& other) const {
return vector_ != other.vector_;
}
private:
static bool LessThanValue(const std::pair<key_type, mapped_type>& a,
const std::pair<key_type, mapped_type>& b) {
return LessThanKey(a.first, b.first);
}
static bool LessThanKey(const key_type& a, const key_type& b) {
key_compare less_than;
return less_than(a, b);
}
static bool NotEqualKeyTo(const key_type& a, const key_type& b) {
key_compare less_than;
return less_than(a, b) || less_than(b, a);
}
static bool EqualKeyTo(const key_type& a, const key_type& b) {
return !NotEqualKeyTo(a, b);
}
static bool EqualTo(const std::pair<key_type, mapped_type>& a,
const std::pair<key_type, mapped_type>& b) {
return EqualKeyTo(a.first, b.first);
}
static iterator find_in_range(iterator begin_it,
iterator end_it,
const key_type& key) {
// Delegate to find_in_range_const().
const_iterator begin_it_const = begin_it;
const_iterator end_it_const = end_it;
const_iterator found_it_const =
find_in_range_const(begin_it_const, end_it_const, key);
const size_t diff = std::distance(begin_it_const, found_it_const);
return begin_it + diff;
}
static inline const_iterator find_in_range_const_linear(
const_iterator begin_it,
const_iterator end_it,
const value_type& key_data) {
SB_DCHECK(end_it >= begin_it);
for (const_iterator it = begin_it; it != end_it; ++it) {
if (LessThanValue(key_data, *it)) {
continue;
}
if (!LessThanValue(*it, key_data)) {
return it;
}
}
return end_it;
}
static inline const_iterator find_in_range_const(const_iterator begin_it,
const_iterator end_it,
const key_type& key) {
// This was tested and found to have a very positive effect on
// performance. The threshold could be a lot higher (~20 elements) but is
// kept at 11 elements to be on the conservative side.
static const difference_type kLinearSearchThreshold = 11;
mapped_type dummy;
value_type key_data(key, dummy); // Second value is ignored.
// Speedup for small maps of pod type: just do a linear search. This was
// tested to be significantly faster - 2x speedup. The conditions for this
// search are very specific so that in many situations the fast path won't
// be taken.
if (flat_map_detail::IsPod<key_type>::value) {
const difference_type range_distance = std::distance(begin_it, end_it);
// Linear search.
if (range_distance < kLinearSearchThreshold) {
return find_in_range_const_linear(begin_it, end_it, key_data);
}
}
const_iterator found_it =
std::lower_bound(begin_it, end_it, key_data, LessThanValue);
if (found_it == end_it) {
return end_it;
}
if (NotEqualKeyTo(found_it->first, key)) { // different keys.
return end_it;
}
size_t dist = std::distance(begin_it, found_it);
return begin_it + dist;
}
static bool exists_in_range(const_iterator begin_it,
const_iterator end_it,
const key_type& key) {
const_iterator result_iterator = find_in_range_const(begin_it, end_it, key);
return result_iterator != end_it;
}
// This is needed as a stable sorting algorithm, which leaves duplicates in
// relative order from which they appeared in the original container.
// Unlike std::stable_sort(...) this function will not allocate memory.
void InplaceMergeSort(iterator begin_it, iterator end_it) {
if (end_it - begin_it > 1) {
iterator middle_it = begin_it + (end_it - begin_it) / 2;
InplaceMergeSort(begin_it, middle_it);
InplaceMergeSort(middle_it, end_it);
std::inplace_merge(begin_it, middle_it, end_it, LessThanValue);
}
}
VectorType vector_;
};
namespace flat_map_detail {
#define STARBOARD_FLATMAP_DEFINE_IS_POD(TYPE) \
template <> \
struct IsPod<TYPE> { \
enum { value = true }; \
}
STARBOARD_FLATMAP_DEFINE_IS_POD(bool);
STARBOARD_FLATMAP_DEFINE_IS_POD(float);
STARBOARD_FLATMAP_DEFINE_IS_POD(double);
STARBOARD_FLATMAP_DEFINE_IS_POD(int8_t);
STARBOARD_FLATMAP_DEFINE_IS_POD(uint8_t);
STARBOARD_FLATMAP_DEFINE_IS_POD(int16_t);
STARBOARD_FLATMAP_DEFINE_IS_POD(uint16_t);
STARBOARD_FLATMAP_DEFINE_IS_POD(int32_t);
STARBOARD_FLATMAP_DEFINE_IS_POD(uint32_t);
STARBOARD_FLATMAP_DEFINE_IS_POD(int64_t);
STARBOARD_FLATMAP_DEFINE_IS_POD(uint64_t);
#undef STARBOARD_FLATMAP_DEFINE_IS_POD
// Specialization - all pointer types are treated as pod.
template <typename T>
struct IsPod<T*> {
enum { value = true };
};
} // namespace flat_map_detail.
} // namespace starboard.
#endif // STARBOARD_COMMON_FLAT_MAP_H_