Reduce RAM usage and improve speed of "gn gen" This CL continues previous optimization work [1] to improve GN's speed and RAM usage, especially for the Zircon build (see [2] for measurements). It consists in the following changes: - Introduce src/gn/hash_table_base.h to generalize the fast hash table used by StringAtom's implementation through a custom template class, and document its usage properly (with a disclaimer not to use it unless profiling shows it's worthwhile). - Use it to simplify and optimize the implementation of the UniqueVector<> template. - Implement SourceDir and Label with StringAtom instead of std::string instances. + Remove a small unwanted const Label copy in src/gn/analyzer.cc [1] https://gn-review.googlesource.com/c/gn/+/7280 [2] Measurement results for "gn gen" runs on my local machine, both using recent Chromium and Fuchsia source checkouts. CHROMIUM: RAM saved: 68 MiB Speedup: 8.4% ZIRCON: RAM saved: 405 MiB Speedup: 24.5% // CHROMIUM BEFORE /work/chromium0/src$ repeat_cmd 5 /tmp/gn-master gen out/default/ Done. Made 12150 targets from 2121 files in 4256ms Done. Made 12150 targets from 2121 files in 4218ms Done. Made 12150 targets from 2121 files in 4286ms Done. Made 12150 targets from 2121 files in 4201ms * Done. Made 12150 targets from 2121 files in 4244ms /work/chromium0/src$ /usr/bin/time -v /tmp/gn-master gen out/default Maximum resident set size (kbytes): 535480 // CHROMIUM AFTER /work/chromium0/src$ repeat_cmd 5 /tmp/gn-after gen out/default/ Done. Made 12150 targets from 2121 files in 3992ms Done. Made 12150 targets from 2121 files in 3874ms * Done. Made 12150 targets from 2121 files in 3951ms Done. Made 12150 targets from 2121 files in 3907ms Done. Made 12150 targets from 2121 files in 3905ms /work/chromium0/src$ /usr/bin/time -v /tmp/gn-after gen out/default Maximum resident set size (kbytes): 466056 // ZIRCON BEFORE /work/fuchsia0$ repeat_cmd 5 /tmp/gn-master gen --root=zircon out/default.zircon Done. Made 47910 targets from 782 files in 8915ms * Done. Made 47910 targets from 782 files in 8945ms Done. Made 47910 targets from 782 files in 9141ms Done. Made 47910 targets from 782 files in 9094ms Done. Made 47910 targets from 782 files in 9512ms /work/fuchsia0$ /usr/bin/time -v /tmp/gn-master gen --root=zircon out/default.zircon Maximum resident set size (kbytes): 1595364 // ZIRCON AFTER /work/fuchsia0$ repeat_cmd 5 /tmp/gn-after gen --root=zircon out/default.zircon Done. Made 47910 targets from 782 files in 7156ms * Done. Made 47910 targets from 782 files in 7715ms Done. Made 47910 targets from 782 files in 7327ms Done. Made 47910 targets from 782 files in 7491ms Done. Made 47910 targets from 782 files in 7573ms /work/fuchsia0$ /usr/bin/time -v /tmp/gn-master gen --root=zircon out/default.zircon Maximum resident set size (kbytes): 1180640 Change-Id: Id315564a9b2285cd89ca07cd69962f008439c9fb Reviewed-on: https://gn-review.googlesource.com/c/gn/+/7340 Reviewed-by: Brett Wilson <brettw@chromium.org> Reviewed-by: Brett Wilson <brettw@google.com> Commit-Queue: Brett Wilson <brettw@chromium.org>

commit: 07854239257cc3eff8759503246595d2629f9367 [log] [tgz]
author: David 'Digit' Turner <digit@google.com> Mon Feb 03 18:12:12 2020 +0100
committer: Commit Bot <commit-bot@chromium.org> Wed Mar 11 17:25:40 2020 +0000
tree: b55ad7e85222381c37bc143d7f45187e9a469be6
parent: 992e927e217baa8a74e6e2c5d7417cb65cf24824 [diff]
diff --git a/build/gen.py b/build/gen.py
index 0911f35..758c51a 100755
--- a/build/gen.py
+++ b/build/gen.py

@@ -620,6 +620,7 @@
         'src/gn/functions_target_rust_unittest.cc',
         'src/gn/functions_target_unittest.cc',
         'src/gn/functions_unittest.cc',
+        'src/gn/hash_table_base_unittest.cc',
         'src/gn/header_checker_unittest.cc',
         'src/gn/inherited_libraries_unittest.cc',
         'src/gn/input_conversion_unittest.cc',

diff --git a/src/gn/analyzer.cc b/src/gn/analyzer.cc
index c269af7..11fb111 100644
--- a/src/gn/analyzer.cc
+++ b/src/gn/analyzer.cc

@@ -101,7 +101,7 @@
                  const std::set<Label>& labels) {
   std::vector<std::string> strings;
   auto value = std::make_unique<base::ListValue>();
-  for (const auto l : labels)
+  for (const auto& l : labels)
     strings.push_back(l.GetUserVisibleName(default_toolchain));
   std::sort(strings.begin(), strings.end());
   value->AppendStrings(strings);

diff --git a/src/gn/filesystem_utils.cc b/src/gn/filesystem_utils.cc
index 4beabc0..fee2596 100644
--- a/src/gn/filesystem_utils.cc
+++ b/src/gn/filesystem_utils.cc

@@ -283,7 +283,7 @@
   path->resize(FindFilenameOffset(*path));
 }
 
-bool EndsWithSlash(const std::string& s) {
+bool EndsWithSlash(const std::string_view s) {
   return !s.empty() && IsSlash(s[s.size() - 1]);
 }
 

diff --git a/src/gn/filesystem_utils.h b/src/gn/filesystem_utils.h
index e359b9f..830478a 100644
--- a/src/gn/filesystem_utils.h
+++ b/src/gn/filesystem_utils.h

@@ -60,7 +60,7 @@
 }
 
 // Returns true if the given path ends with a slash.
-bool EndsWithSlash(const std::string& s);
+bool EndsWithSlash(const std::string_view s);
 
 // Path parts -----------------------------------------------------------------
 

diff --git a/src/gn/hash_table_base.h b/src/gn/hash_table_base.h
new file mode 100644
index 0000000..9e43733
--- /dev/null
+++ b/src/gn/hash_table_base.h

@@ -0,0 +1,502 @@
+// Copyright (c) 2020 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef TOOLS_GN_HASH_TABLE_BASE_H_
+#define TOOLS_GN_HASH_TABLE_BASE_H_
+
+#include "base/compiler_specific.h"
+
+#include <stdlib.h>
+#include <type_traits>
+
+// IMPORTANT DISCLAIMER:
+//
+// THIS IS *NOT* A GENERAL PURPOSE HASH TABLE TEMPLATE. INSTEAD, IT CAN
+// CAN BE USED AS THE BASIS FOR VERY HIGH SPEED AND COMPACT HASH TABLES
+// THAT OBEY VERY STRICT CONDITIONS DESCRIBED BELOW.
+//
+// DO NOT USE THIS UNLESS YOU HAVE A GOOD REASON, I.E. THAT PROFILING
+// SHOWS THERE *IS* AN OVERALL BENEFIT TO DO SO. FOR MOST CASES,
+// std::set<>, std::unordered_set<> and base::flat_set<> ARE PERFECTLY
+// FINE AND SHOULD BE PREFERRED.
+//
+//
+// That being said, this implementation uses a completely typical
+// open-addressing scheme with a buckets array size which is always
+// a power of 2, instead of a prime number. Experience shows this is
+// not detrimental to performance as long as you have a sufficiently
+// good hash function (which is the case of all C++ standard libraries
+// these days for strings and pointers).
+//
+// The reason it is so fast is due to its compactness and the very
+// simple but tight code for a typical lookup / insert / deletion
+// operation.
+//
+// The |buckets_| field holds a pointer to an array of NODE_TYPE
+// instances, called nodes. Each node represents one of the following:
+// a free entry in the table, an inserted item, or a tombstone marking
+// the location of a previously deleted item. Tombstones are only
+// used if the hash table instantiation requires deletion support
+// (see the is_tombstone() description below).
+//
+// The template requires that Node be a type with the following traits:
+//
+//  - It *must* be a trivial type, that is zero-initialized.
+//
+//  - It provides an is_null() const method, which should return true
+//    iff the corresponding node matches a 'free' entry in the hash
+//    table, i.e. one that has not been assigned to an item, or
+//    to a deletion tombstone (see below).
+//
+//    Of course, a default (zeroed) value, should always return true.
+//
+//  - It provides an is_tombstone() const method, which should return
+//    return true iff the corresponding node indicates a previously
+//    deleted item.
+//
+//    Note that if your hash table does not need deletion support,
+//    it is highly recommended to make this a static constexpr method
+//    that always return false. Doing so will optimize the lookup loop
+//    automatically!
+//
+//  - It must provide an is_valid() method, that simply returns
+//    (!is_null() && !is_tombstone()). This is a convenience for this
+//    template, but also for the derived class that will extend it
+//    (more on this below, in the usage description).
+//
+// Note that because Node instances are trivial, std::unique_ptr<>
+// cannot be used in them. Item lifecycle must this be managed
+// explicitly by a derived class of the template instantiation
+// instead.
+//
+// Lookup, insertion and deletion are performed in ways that
+// are *very* different from standard containers, and the reason
+// is, unsuprisingly, performance.
+//
+// Use NodeLookup() to look for an existing item in the hash table.
+// This takes the item's hash value, and a templated callable to
+// compare a node against the search key. This scheme allows
+// heterogeneous lookups, and keeps the node type details
+// out of this header. Any recent C++ optimizer will generate
+// very tight machine code for this call.
+//
+// The NodeLookup() function always returns a non-null and
+// mutable |node| pointer. If |node->is_valid()| is true,
+// then the item was found, and |node| points to it.
+//
+// Otherwise, |node| corresponds to a location that may be
+// used for insertion. To do so, the caller should update the
+// content of |node| appropriately (e.g. writing a pointer and/or
+// hash value to it), then call UpdateAfterInsertion(), which
+// may eventually grow the table and rehash nodes in it.
+//
+// To delete an item, call NodeLookup() first to verify that
+// the item is present, then write a tombstone value to |node|,
+// then call UpdateAfterDeletion().
+//
+// Note that what the tombstone value is doesn't matter to this
+// header, as long as |node->is_tombstone()| returns true for
+// it.
+//
+// Here's an example of a concrete implementation that stores
+// a hash value and an owning pointer in each node:
+//
+//     struct MyFooNode {
+//       size_t hash;
+//       Foo*   foo;
+//
+//       bool is_null() const { return !foo; }
+//       bool is_tombstone() const { return foo == &kTombstone; }
+//       bool is_valid() const { return !is_null() && !is_tombstone(); }
+//
+//       static const Foo kTombstone;
+//     };
+//
+//    class MyFooSet : public HashTableBase<MyFoodNode> {
+//    public:
+//      using BaseType = HashTableBase<MyFooNode>;
+//      using Node = BaseType::Node;
+//
+//      ~MyFooSet() {
+//        // Destroy all items in the set.
+//        // Note that the iterator only parses over valid items.
+//        for (Node* node : *this) {
+//          delete node->foo;
+//        }
+//      }
+//
+//      // Returns true iff this set contains |key|.
+//      bool contains(const Foo& key) const {
+//        Node* node = BaseType::NodeLookup(
+//            MakeHash(key),
+//            [&](const Node* node) { return node->foo == key; });
+//
+//        return node->is_valid();
+//      }
+//
+//      // Try to add |key| to the set. Return true if the insertion
+//      // was succesful, or false if the item was already in the set.
+//      bool add(const Foo& key) {
+//        size_t hash = MakeHash(key);
+//        Node* node = NodeLookup(
+//            hash,
+//            [&](const Node* node) { return node->foo == key; });
+//
+//        if (node->is_valid()) {
+//          // Already in the set.
+//          return false;
+//        }
+//
+//        // Add it.
+//        node->hash = hash;
+//        node->foo  = new Foo(key);
+//        UpdateAfterInsert();
+//        return true;
+//      }
+//
+//      // Try to remove |key| from the set. Return true if the
+//      // item was already in it, false otherwise.
+//      bool erase(const Foo& key) {
+//        Node* node = BaseType::NodeLookup(
+//            MakeHash(key),
+//            [&](const Node* node) { return node->foo == key; });
+//
+//        if (!node->is_valid()) {
+//          // Not in the set.
+//          return false;
+//        }
+//
+//        delete node->foo;
+//        node->foo = Node::kTombstone;
+//        UpdateAfterDeletion().
+//      }
+//
+//      static size_t MakeHash(const Foo& foo) {
+//        return std::hash<Foo>()();
+//      }
+//    };
+//
+// For more concrete examples, see the implementation of
+// StringAtom or UniqueVector<>
+//
+template <typename NODE_TYPE>
+class HashTableBase {
+ public:
+  using Node = NODE_TYPE;
+
+  static_assert(std::is_trivial<NODE_TYPE>::value,
+                "KEY_TYPE should be a trivial type!");
+
+  static_assert(std::is_standard_layout<NODE_TYPE>::value,
+                "KEY_TYPE should be a standard layout type!");
+
+  // Default constructor.
+  HashTableBase() = default;
+
+  // Destructor. This only deals with the |buckets_| array itself.
+  ~HashTableBase() {
+    if (buckets_ != buckets0_)
+      free(buckets_);
+  }
+
+  // Copy and move operations. These only operate on the |buckets_| array
+  // so any owned pointer inside nodes should be handled by custom
+  // constructors and operators in the derived class, if needed.
+  HashTableBase(const HashTableBase& other)
+      : count_(other.count_), size_(other.size_) {
+    if (other.buckets_ != other.buckets0_) {
+      // NOTE: using malloc() here to clarify that no object construction
+      // should occur here.
+      buckets_ = reinterpret_cast<Node*>(malloc(other.size_ * sizeof(Node)));
+    }
+    memcpy(buckets_, other.buckets_, other.size_ * sizeof(Node));
+  }
+
+  HashTableBase& operator=(const HashTableBase& other) {
+    if (this != &other) {
+      this->~HashTableBase();
+      new (this) HashTableBase(other);
+    }
+    return *this;
+  }
+
+  HashTableBase(HashTableBase&& other) noexcept
+      : count_(other.count_), size_(other.size_), buckets_(other.buckets_) {
+    if (buckets_ == other.buckets0_) {
+      buckets0_[0] = other.buckets0_[0];
+      buckets_ = buckets0_;
+    } else {
+      other.buckets_ = other.buckets0_;
+    }
+    other.NodeClear();
+  }
+
+  HashTableBase& operator=(HashTableBase&& other) noexcept {
+    if (this != &other) {
+      this->~HashTableBase();
+      new (this) HashTableBase(std::move(other));
+    }
+    return *this;
+  }
+
+  // Return true iff the table is empty.
+  bool empty() const { return count_ == 0; }
+
+  // Return the number of keys in the set.
+  size_t size() const { return count_; }
+
+ protected:
+  // The following should only be called by derived classes that
+  // extend this template class, and are not available to their
+  // clients. This forces the derived class to implement lookup
+  // insertion and deletion with sane APIs instead.
+
+  // Iteration support note:
+  //
+  // Derived classes, if they wish to support iteration, should provide their
+  // own iterator/const_iterator/begin()/end() types and methods, possibly as
+  // simple wrappers around the NodeIterator/NodeBegin/NodeEnd below.
+  //
+  // The ValidNodesRange() method also returns a object that has begin() and
+  // end() methods to be used in for-range loops as in:
+  //
+  //    for (Node& node : my_table.ValidNodesRange()) { ... }
+  //
+  struct NodeIterator {
+    Node& operator*() { return *node_; }
+    const Node& operator*() const { return *node_; }
+
+    Node* operator->() { return node_; }
+    const Node* operator->() const { return node_; }
+
+    bool operator==(const NodeIterator& other) const {
+      return node_ == other.node_;
+    }
+
+    bool operator!=(const NodeIterator& other) const {
+      return node_ != other.node_;
+    }
+
+    // pre-increment
+    NodeIterator& operator++() {
+      node_++;
+      while (node_ < node_limit_ && !node_->is_valid())
+        node_++;
+
+      return *this;
+    }
+
+    // post-increment
+    NodeIterator operator++(int) {
+      NodeIterator result = *this;
+      ++(*this);
+      return result;
+    }
+
+    Node* node_ = nullptr;
+    Node* node_limit_ = nullptr;
+  };
+
+  // NOTE: This is intentionally not public to avoid exposing
+  // them in derived classes by mistake. If a derived class
+  // wants to support iteration, it provide its own begin() and end()
+  // methods, possibly using NodeBegin() and NodeEnd() below to
+  // implement them.
+  NodeIterator begin() { return NodeBegin(); }
+  NodeIterator end() { return NodeEnd(); }
+
+  // Providing methods named NodeBegin() and NodeEnd(), instead of
+  // just using begin() and end() is a convenience to derived classes
+  // that need to provide their own begin() and end() method, e.g.
+  // consider this class:
+  //
+  //  struct FooNode {
+  //     size_t hash;
+  //     Foo*   foo_ptr;
+  //     ...
+  //  };
+  //
+  //  class FooTable : public HashTableBase<FooNode> {
+  //  public:
+  //     ...
+  //
+  //     // Iterators point to Foo instances, not table nodes.
+  //     struct ConstIterator {
+  //       const Foo* operator->() { return iter_->foo_ptr; }
+  //       const Foo& operator->() { return *(iter_->foo_ptr); }
+  //       ...
+  //       NodeIterator iter_;
+  //     };
+  //
+  // and compare two ways to implement its begin() method:
+  //
+  //    Foo::ConstIterator Foo::begin() const {
+  //      return {
+  //        reinterpret_cast<const HashTableBase<FooNode> *>(this)->begin()
+  //      };
+  //    };
+  //
+  // with:
+  //
+  //    Foo::ConstIterator Foo::begin() const {
+  //      return { NodeBegin(); }
+  //    }
+  //
+  NodeIterator NodeBegin() const {
+    Node* node = buckets_;
+    Node* limit = node + size_;
+    while (node < limit && !node->is_valid())
+      node++;
+
+    return {node, limit};
+  }
+
+  NodeIterator NodeEnd() const {
+    Node* limit = buckets_ + size_;
+    return {limit, limit};
+  }
+
+  // ValidNodeRange() allows a derived-class to use range-based  loops
+  // over valid nodes, even if they have defined their own begin() and
+  // end() methods, e.g. following the same class design as in the
+  // above comment:
+  //
+  //   FooTable::~FooTable() {
+  //     for (const FooNode& node : ValidNodesRange()) {
+  //       delete node->foo_ptr;
+  //     }
+  //   }
+  //
+  // which is a little bit clearer than the (valid) alternative:
+  //
+  //   FooTable::~FooTable() {
+  //     for (Foo& foo : *this) {
+  //       delete &foo;  // WHAT!?
+  //     }
+  //   }
+  //
+  struct NodeIteratorPair {
+    NodeIterator begin() { return begin_; }
+    NodeIterator end() { return end_; }
+
+    NodeIterator begin_;
+    NodeIterator end_;
+  };
+
+  NodeIteratorPair ValidNodesRange() const { return {NodeBegin(), NodeEnd()}; }
+
+  // Clear the nodes table completely.
+  void NodeClear() {
+    if (buckets_ != buckets0_)
+      free(buckets_);
+
+    count_ = 0;
+    size_ = 1;
+    buckets_ = buckets0_;
+    buckets0_[0] = Node{};
+  }
+
+  // Lookup for a node in the hash table that matches the |node_equal|
+  // predicate, which takes a const Node* pointer argument, and returns
+  // true iff this corresponds to a lookup match.
+  //
+  // IMPORTANT: |node_equal| may or may not check the node hash value,
+  // the choice is left to the implementation.
+  //
+  // Returns a non-null *mutable* node pointer. |node->is_valid()| will
+  // be true for matches, and false for misses.
+  //
+  // NOTE: In case of a miss, this will return the location of the first
+  // tombstone encountered during probing, if any, or the first free entry
+  // otherwise. This keeps the table consistent in case the node is overwritten
+  // by the caller in a following insert operation.
+  template <typename NODE_EQUAL>
+  Node* NodeLookup(size_t hash, NODE_EQUAL node_equal) const {
+    size_t mask = size_ - 1;
+    size_t index = hash & mask;
+    Node* tombstone = nullptr;  // First tombstone node found, if any.
+    for (;;) {
+      Node* node = buckets_ + index;
+      if (node->is_null()) {
+        return tombstone ? tombstone : node;
+      }
+      if (node->is_tombstone()) {
+        if (!tombstone)
+          tombstone = node;
+      } else if (node_equal(node)) {
+        return node;
+      }
+      index = (index + 1) & mask;
+    }
+  }
+
+  // Call this method after updating the content of the |node| pointer
+  // returned by an unsucessful NodeLookup(). Return true to indicate that
+  // the table size changed, and that existing iterators were invalidated.
+  bool UpdateAfterInsert() {
+    count_ += 1;
+    if (UNLIKELY(count_ * 4 >= size_ * 3)) {
+      GrowBuckets();
+      return true;
+    }
+    return false;
+  }
+
+  // Call this method after updating the content of the |node| value
+  // returned a by succesful NodeLookup, to the tombstone value, if any.
+  // Return true to indicate a table size change, ie. that existing
+  // iterators where invalidated.
+  bool UpdateAfterRemoval() {
+    count_ -= 1;
+    // For now don't support shrinking since this is not useful for GN.
+    return false;
+  }
+
+ private:
+#if defined(__GNUC__) || defined(__clang__)
+  [[gnu::noinline]]
+#endif
+  void
+  GrowBuckets() {
+    size_t size = size_;
+    size_t new_size = (size == 1) ? 8 : size * 2;
+    size_t new_mask = new_size - 1;
+
+    // NOTE: Using calloc() since no object constructiopn can or should take
+    // place here.
+    Node* new_buckets = reinterpret_cast<Node*>(calloc(new_size, sizeof(Node)));
+
+    for (size_t src_index = 0; src_index < size; ++src_index) {
+      const Node* node = &buckets_[src_index];
+      if (!node->is_valid())
+        continue;
+      size_t dst_index = node->hash_value() & new_mask;
+      for (;;) {
+        Node* node2 = new_buckets + dst_index;
+        if (node2->is_null()) {
+          *node2 = *node;
+          break;
+        }
+        dst_index = (dst_index + 1) & new_mask;
+      }
+    }
+
+    if (buckets_ != buckets0_)
+      free(buckets_);
+
+    buckets_ = new_buckets;
+    buckets0_[0] = Node{};
+    size_ = new_size;
+  }
+
+  // NOTE: The reason for default-initializing |buckets_| to a storage slot
+  // inside the object is to ensure the value is never null. This removes one
+  // nullptr check from each NodeLookup() instantiation.
+  size_t count_ = 0;
+  size_t size_ = 1;
+  Node* buckets_ = buckets0_;
+  Node buckets0_[1] = {{}};
+};
+
+#endif  // TOOLS_GN_HASH_TABLE_BASE_H_

diff --git a/src/gn/hash_table_base_unittest.cc b/src/gn/hash_table_base_unittest.cc
new file mode 100644
index 0000000..a4c3244
--- /dev/null
+++ b/src/gn/hash_table_base_unittest.cc

@@ -0,0 +1,356 @@
+// Copyright (c) 2020 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "hash_table_base.h"
+
+#include "util/test/test.h"
+
+#include <algorithm>
+#include <vector>
+
+// This unit-test is also used to illustrate how to use HashTableBase<>
+// in a concrete way. Here, each node is a simple pointer to a Int class
+// that wraps a simple integer, but also keeps tracks of
+// construction/destruction steps in global counters. This is used by the
+// test to verify that operations like copies or moves do not miss or
+// create allocations/deallocations.
+//
+// Because the derived table HashTableTest owns all pointer objects, it
+// needs to manually create/deallocate them in its destructor, copy/move
+// constructors and operators, as well as insert()/erase()/clear() methods.
+//
+// Finally, iteration support is provided through const_iterator and
+// begin(), end() methods, which are enough for range-based for loops.
+//
+
+// A simple int wrapper class that can also count creation/destruction.
+class Int {
+ public:
+  explicit Int(int x) : x_(x) { creation_counter++; }
+
+  Int(const Int& other) : x_(other.x_) { creation_counter++; }
+
+  ~Int() { destruction_counter++; }
+
+  int& x() { return x_; }
+  const int& x() const { return x_; }
+
+  size_t hash() const { return static_cast<size_t>(x_); }
+
+  static void ResetCounters() {
+    creation_counter = 0;
+    destruction_counter = 0;
+  }
+
+  int x_;
+
+  static size_t creation_counter;
+  static size_t destruction_counter;
+};
+
+size_t Int::creation_counter;
+size_t Int::destruction_counter;
+
+// A HashTableBase<> node type that contains a simple pointer to a Int value.
+struct TestHashNode {
+  Int* int_ptr;
+
+  bool is_null() const { return !int_ptr; }
+  bool is_tombstone() const { return int_ptr == &kTombstone; }
+  bool is_valid() const { return !is_null() && !is_tombstone(); }
+  size_t hash_value() const { return int_ptr->hash(); }
+
+  static Int kTombstone;
+};
+
+// Value is irrelevant since only its address is used.
+Int TestHashNode::kTombstone(-1);
+
+// TestHashTable derives a HashTableBase<> instantiation to demonstrate
+// full uses of the template. This includes:
+//
+//  - Storing a pointer in each node, and managing ownership of pointed
+//    objects explicitly in the destructor, copy/move constructor and
+//    operator, as well as insert() and erase() methods.
+//
+//  - The internal pointed objects are Int instance, but the TestHashTable
+//    API masks that entirely, instead implementing a simple set of integers,
+//    including iteration support.
+//
+//  Note that placing the integers directly in the nodes would be much easier,
+//  but would not allow demonstrating how to manage ownership in thedestructor
+class TestHashTable : public HashTableBase<TestHashNode> {
+ public:
+  using BaseType = HashTableBase<TestHashNode>;
+  using Node = BaseType::Node;
+
+  static_assert(std::is_same<Node, TestHashNode>::value,
+                "HashTableBase<>::Node is not defined properly!");
+
+  BaseType& asBaseType() { return *this; }
+  const BaseType& asBaseType() const { return *this; }
+
+  TestHashTable() = default;
+
+  // IMPORTANT NOTE: Because the table contains bare owning pointers, we
+  // have to use explicit copy and move constructor/operators for things
+  // to work as expected. This is yet another reason why HashTableBase<>
+  // should only be used with care (preferrably with non-owning pointers).
+  //
+  TestHashTable(const TestHashTable& other) : BaseType(other) {
+    // Only node (i.e. pointers) are copied by the base type.
+    for (Node& node : ValidNodesRange()) {
+      node.int_ptr = new Int(*node.int_ptr);
+    }
+  }
+
+  TestHashTable& operator=(const TestHashTable& other) {
+    if (this != &other) {
+      this->~TestHashTable();
+      new (this) TestHashTable(other);
+    }
+    return *this;
+  }
+
+  TestHashTable(TestHashTable&& other) noexcept : BaseType(std::move(other)) {}
+  TestHashTable& operator=(TestHashTable&& other) noexcept {
+    if (this != &other) {
+      this->~TestHashTable();
+      new (this) TestHashTable(std::move(other));
+    }
+    return *this;
+  }
+
+  ~TestHashTable() {
+    // Discard all valid Int pointers in the hash table.
+    for (Node& node : ValidNodesRange())
+      delete node.int_ptr;
+  }
+
+  // Return true iff the table contains |x|.
+  bool contains(int x) const {
+    size_t hash = static_cast<size_t>(x);
+    Node* node = NodeLookup(
+        hash, [&](const Node* node) { return node->int_ptr->x() == x; });
+    return node->is_valid();
+  }
+
+  // Try to insert |x| in the table. Returns true on success, or false if
+  // the value was already in it.
+  bool insert(int x) {
+    size_t hash = static_cast<size_t>(x);
+    Node* node = NodeLookup(
+        hash, [&](const Node* node) { return node->int_ptr->x() == x; });
+    if (node->is_valid())
+      return false;
+
+    node->int_ptr = new Int(x);
+    UpdateAfterInsert();
+    return true;
+  }
+
+  // Try to remove |x| from the table. Return true on success, or false
+  // if the value was not in it.
+  bool erase(int x) {
+    size_t hash = static_cast<size_t>(x);
+    Node* node = NodeLookup(
+        hash, [&](const Node* node) { return node->int_ptr->x() == x; });
+    if (!node->is_valid())
+      return false;
+
+    delete node->int_ptr;
+    node->int_ptr = &TestHashNode::kTombstone;
+    UpdateAfterRemoval();
+    return true;
+  }
+
+  // Remove all items
+  void clear() {
+    // Remove all pointed objects, since NodeClear() will not do it.
+    for (Node& node : ValidNodesRange())
+      delete node.int_ptr;
+
+    NodeClear();
+  }
+
+  // Define const_iterator that point to the integer value instead of the node
+  // of the Int instance to completely hide these from this class' API.
+  struct const_iterator : public BaseType::NodeIterator {
+    const int& operator*() const {
+      return (this->BaseType::NodeIterator::operator*()).int_ptr->x();
+    }
+    const int* operator->() const { return &(this->operator*()); }
+  };
+
+  const_iterator begin() const { return {BaseType::NodeBegin()}; }
+
+  const_iterator end() const { return {BaseType::NodeEnd()}; }
+};
+
+TEST(HashTableBaseTest, Construction) {
+  Int::ResetCounters();
+  {
+    TestHashTable table;
+    EXPECT_TRUE(table.empty());
+    EXPECT_EQ(table.size(), 0u);
+    EXPECT_EQ(table.begin(), table.end());
+  }
+
+  // No item was created or destroyed.
+  EXPECT_EQ(Int::creation_counter, 0u);
+  EXPECT_EQ(Int::destruction_counter, 0u);
+}
+
+TEST(HashTableBaseTest, InsertionsAndLookups) {
+  Int::ResetCounters();
+  {
+    TestHashTable table;
+    table.insert(1);
+    table.insert(5);
+    table.insert(7);
+
+    EXPECT_FALSE(table.empty());
+    EXPECT_EQ(table.size(), 3u);
+    EXPECT_NE(table.begin(), table.end());
+
+    EXPECT_EQ(Int::creation_counter, 3u);
+    EXPECT_EQ(Int::destruction_counter, 0u);
+
+    EXPECT_FALSE(table.contains(0));
+    EXPECT_TRUE(table.contains(1));
+    EXPECT_FALSE(table.contains(2));
+    EXPECT_FALSE(table.contains(3));
+    EXPECT_TRUE(table.contains(5));
+    EXPECT_FALSE(table.contains(6));
+    EXPECT_TRUE(table.contains(7));
+    EXPECT_FALSE(table.contains(8));
+  }
+
+  EXPECT_EQ(Int::creation_counter, 3u);
+  EXPECT_EQ(Int::destruction_counter, 3u);
+}
+
+TEST(HashTableBaseTest, CopyAssignment) {
+  Int::ResetCounters();
+  {
+    TestHashTable table;
+    table.insert(1);
+    table.insert(5);
+    table.insert(7);
+
+    EXPECT_FALSE(table.empty());
+    EXPECT_EQ(table.size(), 3u);
+
+    TestHashTable table2;
+    EXPECT_TRUE(table2.empty());
+    table2 = table;
+    EXPECT_FALSE(table2.empty());
+    EXPECT_EQ(table2.size(), 3u);
+    EXPECT_FALSE(table.empty());
+    EXPECT_EQ(table.size(), 3u);
+
+    EXPECT_EQ(Int::creation_counter, 6u);
+    EXPECT_EQ(Int::destruction_counter, 0u);
+
+    EXPECT_FALSE(table.contains(0));
+    EXPECT_TRUE(table.contains(1));
+    EXPECT_FALSE(table.contains(2));
+    EXPECT_FALSE(table.contains(3));
+    EXPECT_TRUE(table.contains(5));
+    EXPECT_FALSE(table.contains(6));
+    EXPECT_TRUE(table.contains(7));
+    EXPECT_FALSE(table.contains(8));
+
+    EXPECT_FALSE(table2.contains(0));
+    EXPECT_TRUE(table2.contains(1));
+    EXPECT_FALSE(table2.contains(2));
+    EXPECT_FALSE(table2.contains(3));
+    EXPECT_TRUE(table2.contains(5));
+    EXPECT_FALSE(table2.contains(6));
+    EXPECT_TRUE(table2.contains(7));
+    EXPECT_FALSE(table2.contains(8));
+  }
+
+  EXPECT_EQ(Int::creation_counter, 6u);
+  EXPECT_EQ(Int::destruction_counter, 6u);
+}
+
+TEST(HashTableBaseTest, MoveAssignment) {
+  Int::ResetCounters();
+  {
+    TestHashTable table;
+    table.insert(1);
+    table.insert(5);
+    table.insert(7);
+
+    EXPECT_FALSE(table.empty());
+    EXPECT_EQ(table.size(), 3u);
+
+    TestHashTable table2;
+    EXPECT_TRUE(table2.empty());
+    table2 = std::move(table);
+    EXPECT_FALSE(table2.empty());
+    EXPECT_EQ(table2.size(), 3u);
+    EXPECT_TRUE(table.empty());
+    EXPECT_EQ(table.size(), 0u);
+
+    EXPECT_EQ(Int::creation_counter, 3u);
+    EXPECT_EQ(Int::destruction_counter, 0u);
+
+    EXPECT_FALSE(table2.contains(0));
+    EXPECT_TRUE(table2.contains(1));
+    EXPECT_FALSE(table2.contains(2));
+    EXPECT_FALSE(table2.contains(3));
+    EXPECT_TRUE(table2.contains(5));
+    EXPECT_FALSE(table2.contains(6));
+    EXPECT_TRUE(table2.contains(7));
+    EXPECT_FALSE(table2.contains(8));
+  }
+
+  EXPECT_EQ(Int::creation_counter, 3u);
+  EXPECT_EQ(Int::destruction_counter, 3u);
+}
+
+TEST(HashTableBaseTest, Clear) {
+  Int::ResetCounters();
+  {
+    TestHashTable table;
+    table.insert(1);
+    table.insert(5);
+    table.insert(7);
+
+    EXPECT_FALSE(table.empty());
+    EXPECT_EQ(table.size(), 3u);
+
+    table.clear();
+    EXPECT_TRUE(table.empty());
+
+    EXPECT_EQ(Int::creation_counter, 3u);
+    EXPECT_EQ(Int::destruction_counter, 3u);
+  }
+
+  EXPECT_EQ(Int::creation_counter, 3u);
+  EXPECT_EQ(Int::destruction_counter, 3u);
+}
+
+TEST(HashTableBaseTest, Iteration) {
+  TestHashTable table;
+  table.insert(1);
+  table.insert(5);
+  table.insert(7);
+
+  EXPECT_FALSE(table.empty());
+  EXPECT_EQ(table.size(), 3u);
+
+  std::vector<int> values;
+
+  for (const int& x : table)
+    values.push_back(x);
+
+  std::sort(values.begin(), values.end());
+  EXPECT_EQ(values.size(), 3u);
+  EXPECT_EQ(values[0], 1);
+  EXPECT_EQ(values[1], 5);
+  EXPECT_EQ(values[2], 7);
+}

diff --git a/src/gn/label.cc b/src/gn/label.cc
index 4811196..173c3d5 100644
--- a/src/gn/label.cc
+++ b/src/gn/label.cc

@@ -51,11 +51,11 @@
 bool ComputeTargetNameFromDep(const Value& input_value,
                               const SourceDir& computed_location,
                               const std::string_view& input,
-                              std::string* result,
+                              StringAtom* result,
                               Err* err) {
   if (!input.empty()) {
     // Easy case: input is specified, just use it.
-    result->assign(input.data(), input.size());
+    *result = StringAtom(input);
     return true;
   }
 
@@ -69,8 +69,8 @@
 
   size_t next_to_last_slash = loc.rfind('/', loc.size() - 2);
   DCHECK(next_to_last_slash != std::string::npos);
-  result->assign(&loc[next_to_last_slash + 1],
-                 loc.size() - next_to_last_slash - 2);
+  *result = StringAtom(std::string_view{&loc[next_to_last_slash + 1],
+                                        loc.size() - next_to_last_slash - 2});
   return true;
 }
 
@@ -93,9 +93,9 @@
              const Value& original_value,
              const std::string_view& input,
              SourceDir* out_dir,
-             std::string* out_name,
+             StringAtom* out_name,
              SourceDir* out_toolchain_dir,
-             std::string* out_toolchain_name,
+             StringAtom* out_toolchain_name,
              Err* err) {
   // To workaround the problem that std::string_view operator[] doesn't return a
   // ref.
@@ -185,7 +185,7 @@
     // check.
     if (toolchain_piece.empty()) {
       *out_toolchain_dir = current_toolchain.dir();
-      *out_toolchain_name = current_toolchain.name();
+      *out_toolchain_name = current_toolchain.name_atom();
       return true;
     } else {
       return Resolve(current_dir, source_root, current_toolchain,
@@ -253,18 +253,21 @@
     //tools/gn  ->  //tools/gn:gn
 )*";
 
+Label::Label() : hash_(ComputeHash()) {}
+
 Label::Label(const SourceDir& dir,
              const std::string_view& name,
              const SourceDir& toolchain_dir,
              const std::string_view& toolchain_name)
-    : dir_(dir), toolchain_dir_(toolchain_dir) {
-  name_.assign(name.data(), name.size());
-  toolchain_name_.assign(toolchain_name.data(), toolchain_name.size());
-}
+    : dir_(dir),
+      name_(StringAtom(name)),
+      toolchain_dir_(toolchain_dir),
+      toolchain_name_(StringAtom(toolchain_name)),
+      hash_(ComputeHash()) {}
 
-Label::Label(const SourceDir& dir, const std::string_view& name) : dir_(dir) {
-  name_.assign(name.data(), name.size());
-}
+Label::Label(const SourceDir& dir, const std::string_view& name)
+    : dir_(dir), name_(StringAtom(name)),
+      hash_(ComputeHash()) {}
 
 // static
 Label Label::Resolve(const SourceDir& current_dir,
@@ -300,21 +303,21 @@
 
 std::string Label::GetUserVisibleName(bool include_toolchain) const {
   std::string ret;
-  ret.reserve(dir_.value().size() + name_.size() + 1);
+  ret.reserve(dir_.value().size() + name_.str().size() + 1);
 
   if (dir_.is_null())
     return ret;
 
   ret = DirWithNoTrailingSlash(dir_);
   ret.push_back(':');
-  ret.append(name_);
+  ret.append(name_.str());
 
   if (include_toolchain) {
     ret.push_back('(');
     if (!toolchain_dir_.is_null() && !toolchain_name_.empty()) {
       ret.append(DirWithNoTrailingSlash(toolchain_dir_));
       ret.push_back(':');
-      ret.append(toolchain_name_);
+      ret.append(toolchain_name_.str());
     }
     ret.push_back(')');
   }
@@ -323,6 +326,6 @@
 
 std::string Label::GetUserVisibleName(const Label& default_toolchain) const {
   bool include_toolchain = default_toolchain.dir() != toolchain_dir_ ||
-                           default_toolchain.name() != toolchain_name_;
+                           default_toolchain.name_atom() != toolchain_name_;
   return GetUserVisibleName(include_toolchain);
 }

diff --git a/src/gn/label.h b/src/gn/label.h
index c266828..95234c9 100644
--- a/src/gn/label.h
+++ b/src/gn/label.h

@@ -10,6 +10,7 @@
 #include <stddef.h>
 
 #include "gn/source_dir.h"
+#include "gn/string_atom.h"
 
 class Err;
 class Value;
@@ -19,7 +20,7 @@
 // part, so it starts with a slash, and has one colon.
 class Label {
  public:
-  Label() = default;
+  Label();
 
   // Makes a label given an already-separated out path and name.
   // See also Resolve().
@@ -43,10 +44,12 @@
   bool is_null() const { return dir_.is_null(); }
 
   const SourceDir& dir() const { return dir_; }
-  const std::string& name() const { return name_; }
+  const std::string& name() const { return name_.str(); }
+  StringAtom name_atom() const { return name_; }
 
   const SourceDir& toolchain_dir() const { return toolchain_dir_; }
-  const std::string& toolchain_name() const { return toolchain_name_; }
+  const std::string& toolchain_name() const { return toolchain_name_.str(); }
+  StringAtom toolchain_name_atom() const { return toolchain_name_; }
 
   // Returns the current label's toolchain as its own Label.
   Label GetToolchainLabel() const;
@@ -66,9 +69,9 @@
   std::string GetUserVisibleName(const Label& default_toolchain) const;
 
   bool operator==(const Label& other) const {
-    return name_ == other.name_ && dir_ == other.dir_ &&
+    return name_.SameAs(other.name_) && dir_ == other.dir_ &&
            toolchain_dir_ == other.toolchain_dir_ &&
-           toolchain_name_ == other.toolchain_name_;
+           toolchain_name_.SameAs(other.toolchain_name_);
   }
   bool operator!=(const Label& other) const { return !operator==(other); }
   bool operator<(const Label& other) const {
@@ -81,28 +84,46 @@
   // other object.
   bool ToolchainsEqual(const Label& other) const {
     return toolchain_dir_ == other.toolchain_dir_ &&
-           toolchain_name_ == other.toolchain_name_;
+           toolchain_name_.SameAs(other.toolchain_name_);
   }
 
+  size_t hash() const { return hash_; }
+
  private:
+  Label(SourceDir dir, StringAtom name) : dir_(dir), name_(name) {}
+
+  Label(SourceDir dir,
+        StringAtom name,
+        SourceDir toolchain_dir,
+        StringAtom toolchain_name)
+      : dir_(dir),
+        name_(name),
+        toolchain_dir_(toolchain_dir),
+        toolchain_name_(toolchain_name) {}
+
+  size_t ComputeHash() const {
+    size_t h0 = dir_.hash();
+    size_t h1 = name_.hash();
+    size_t h2 = toolchain_dir_.hash();
+    size_t h3 = toolchain_name_.hash();
+    return ((h3 * 131 + h2) * 131 + h1) * 131 + h0;
+  }
+
   SourceDir dir_;
-  std::string name_;
+  StringAtom name_;
 
   SourceDir toolchain_dir_;
-  std::string toolchain_name_;
+  StringAtom toolchain_name_;
+
+  size_t hash_;
+  // NOTE: Must be initialized by constructors with ComputeHash() value.
 };
 
 namespace std {
 
 template <>
 struct hash<Label> {
-  std::size_t operator()(const Label& v) const {
-    hash<std::string> stringhash;
-    return ((stringhash(v.dir().value()) * 131 + stringhash(v.name())) * 131 +
-            stringhash(v.toolchain_dir().value())) *
-               131 +
-           stringhash(v.toolchain_name());
-  }
+  std::size_t operator()(const Label& v) const { return v.hash(); }
 };
 
 }  // namespace std

diff --git a/src/gn/source_dir.cc b/src/gn/source_dir.cc
index dd66688..ce92cf2 100644
--- a/src/gn/source_dir.cc
+++ b/src/gn/source_dir.cc

@@ -13,7 +13,7 @@
 
 namespace {
 
-void AssertValueSourceDirString(const std::string& s) {
+void AssertValueSourceDirString(const std::string_view s) {
   if (!s.empty()) {
 #if defined(OS_WIN)
     DCHECK(s[0] == '/' ||
@@ -57,19 +57,24 @@
   return true;
 }
 
+static StringAtom SourceDirStringAtom(const std::string_view s) {
+  if (EndsWithSlash(s)) {  // Avoid allocation when possible.
+    AssertValueSourceDirString(s);
+    return StringAtom(s);
+  }
+
+  std::string str;
+  str.reserve(s.size() + 1);
+  str += s;
+  str.push_back('/');
+  AssertValueSourceDirString(str);
+  return StringAtom(str);
+}
+
 }  // namespace
 
-SourceDir::SourceDir(const std::string& s) : value_(s) {
-  if (!EndsWithSlash(value_))
-    value_.push_back('/');
-  AssertValueSourceDirString(value_);
-}
-
-SourceDir::SourceDir(std::string&& s) : value_(std::move(s)) {
-  if (!EndsWithSlash(value_))
-    value_.push_back('/');
-  AssertValueSourceDirString(value_);
-}
+SourceDir::SourceDir(const std::string_view s)
+    : value_(SourceDirStringAtom(s)) {}
 
 template <typename StringType>
 std::string SourceDir::ResolveRelativeAs(
@@ -82,7 +87,7 @@
                                         err)) {
     return std::string();
   }
-  return ResolveRelative(input_value, value_, as_file, source_root);
+  return ResolveRelative(input_value, value_.str(), as_file, source_root);
 }
 
 SourceFile SourceDir::ResolveRelativeFile(
@@ -98,7 +103,7 @@
   if (!ValidateResolveInput<std::string>(true, p, input_string, err))
     return ret;
 
-  ret.SetValue(ResolveRelative(input_string, value_, true, source_root));
+  ret.SetValue(ResolveRelative(input_string, value_.str(), true, source_root));
   return ret;
 }
 
@@ -131,7 +136,7 @@
 }
 
 base::FilePath SourceDir::Resolve(const base::FilePath& source_root) const {
-  return ResolvePath(value_, false, source_root);
+  return ResolvePath(value_.str(), false, source_root);
 }
 
 // Explicit template instantiation

diff --git a/src/gn/source_dir.h b/src/gn/source_dir.h
index ed4b1e3..9dd847c 100644
--- a/src/gn/source_dir.h
+++ b/src/gn/source_dir.h

@@ -14,6 +14,8 @@
 #include "base/files/file_path.h"
 #include "base/logging.h"
 
+#include "gn/string_atom.h"
+
 class Err;
 class SourceFile;
 class Value;
@@ -29,8 +31,7 @@
  public:
   SourceDir() = default;
 
-  SourceDir(const std::string& s);
-  explicit SourceDir(std::string&& s);
+  SourceDir(const std::string_view s);
 
   // Resolves a file or dir name (based on as_file parameter) relative
   // to this source directory. Will return an empty string on error
@@ -74,8 +75,8 @@
       Err* err,
       const std::string_view& source_root = std::string_view()) const {
     SourceDir ret;
-    ret.value_ = ResolveRelativeAs<StringType>(false, blame_input_value,
-                                               input_value, err, source_root);
+    ret.value_ = StringAtom(ResolveRelativeAs<StringType>(
+        false, blame_input_value, input_value, err, source_root));
     return ret;
   }
 
@@ -91,12 +92,13 @@
   base::FilePath Resolve(const base::FilePath& source_root) const;
 
   bool is_null() const { return value_.empty(); }
-  const std::string& value() const { return value_; }
+  const std::string& value() const { return value_.str(); }
 
   // Returns true if this path starts with a "//" which indicates a path
   // from the source root.
   bool is_source_absolute() const {
-    return value_.size() >= 2 && value_[0] == '/' && value_[1] == '/';
+    const std::string& v = value_.str();
+    return v.size() >= 2 && v[0] == '/' && v[1] == '/';
   }
 
   // Returns true if this path starts with a single slash which indicates a
@@ -112,7 +114,8 @@
   // return value points into our buffer.
   std::string_view SourceAbsoluteWithOneSlash() const {
     CHECK(is_source_absolute());
-    return std::string_view(&value_[1], value_.size() - 1);
+    const std::string& v = value_.str();
+    return std::string_view(&v[1], v.size() - 1);
   }
 
   // Returns a path that does not end with a slash.
@@ -120,32 +123,30 @@
   // This function simply returns the reference to the value if the path is a
   // root, e.g. "/" or "//".
   std::string_view SourceWithNoTrailingSlash() const {
-    if (value_.size() > 2)
-      return std::string_view(&value_[0], value_.size() - 1);
-    return std::string_view(value_);
+    const std::string& v = value_.str();
+    if (v.size() > 2)
+      return std::string_view(&v[0], v.size() - 1);
+    return std::string_view(v);
   }
 
-  void SwapValue(std::string* v);
-
   bool operator==(const SourceDir& other) const {
-    return value_ == other.value_;
+    return value_.SameAs(other.value_);
   }
   bool operator!=(const SourceDir& other) const { return !operator==(other); }
   bool operator<(const SourceDir& other) const { return value_ < other.value_; }
 
+  size_t hash() const { return value_.hash(); }
+
  private:
   friend class SourceFile;
-  std::string value_;
+  StringAtom value_;
 };
 
 namespace std {
 
 template <>
 struct hash<SourceDir> {
-  std::size_t operator()(const SourceDir& v) const {
-    hash<std::string> h;
-    return h(v.value());
-  }
+  std::size_t operator()(const SourceDir& v) const { return v.hash(); }
 };
 
 }  // namespace std

diff --git a/src/gn/string_atom.cc b/src/gn/string_atom.cc
index c310ffd..fe1ef73 100644
--- a/src/gn/string_atom.cc
+++ b/src/gn/string_atom.cc

@@ -12,7 +12,7 @@
 #include <unordered_set>
 #include <vector>
 
-#include "base/containers/flat_set.h"
+#include "gn/hash_table_base.h"
 
 namespace {
 
@@ -42,6 +42,20 @@
 
 using KeyType = const std::string*;
 
+// A HashTableBase node type that stores one hash value and one string pointer.
+struct KeyNode {
+  size_t hash;
+  KeyType key;
+
+  // The following methods are required by HashTableBase<>
+  bool is_valid() const { return !is_null(); }
+  bool is_null() const { return !key; }
+  size_t hash_value() const { return hash; }
+
+  // No deletion support means faster lookup code.
+  static constexpr bool is_tombstone() { return false; }
+};
+
 // This is a trivial hash table of string pointers, using open addressing.
 // It is faster in practice than using a standard container or even a
 // base::flat_set<>.
@@ -58,11 +72,9 @@
 //        and call Insert(), passing the |node|, |hash| and new string
 //        address as arguments.
 //
-struct KeySet {
-  struct Node {
-    size_t hash = 0;
-    KeyType key = nullptr;
-  };
+struct KeySet : public HashTableBase<KeyNode> {
+  using BaseType = HashTableBase<KeyNode>;
+  using Node = BaseType::Node;
 
   // Compute hash for |str|. Replace with faster hash function if available.
   static size_t Hash(std::string_view str) {
@@ -79,53 +91,19 @@
   //       passed to Insert() in case of a miss.
   //
   Node* Lookup(size_t hash, std::string_view str) const {
-    size_t index = hash & (size_ - 1);
-    const Node* nodes = &buckets_[0];
-    const Node* nodes_limit = nodes + size_;
-    const Node* node = nodes + index;
-    for (;;) {
-      if (!node->key || (node->hash == hash && *node->key == str))
-        return const_cast<Node*>(node);
-      if (++node == nodes_limit)
-        node = nodes;
-    }
+    return BaseType::NodeLookup(hash, [hash, &str](const Node* node) {
+      // NOTE: Only is_valid() node pointers are passed to this function
+      // which means key won't be null, and there are no tombstone values
+      // in this derivation of HashTableBase<>.
+      return node->hash == hash && *node->key == str;
+    });
   }
 
-  // Insert a new key in this set. |node| must be a value returned by
-  // a previous Lookup() call. |hash| is the hash value for |key|.
   void Insert(Node* node, size_t hash, KeyType key) {
     node->hash = hash;
     node->key = key;
-    count_ += 1;
-    if (count_ * 4 >= size_ * 3)  // 75% max load factor
-      GrowBuckets();
+    BaseType::UpdateAfterInsert();
   }
-
-  void GrowBuckets() {
-    size_t size = buckets_.size();
-    size_t new_size = size * 2;
-    size_t new_mask = new_size - 1;
-
-    std::vector<Node> new_buckets;
-    new_buckets.resize(new_size);
-    for (const Node& node : buckets_) {
-      size_t index = node.hash & new_mask;
-      for (;;) {
-        Node& node2 = new_buckets[index];
-        if (!node2.key) {
-          node2 = node;
-          break;
-        }
-        index = (index + 1) & new_mask;
-      }
-    }
-    buckets_ = std::move(new_buckets);
-    size_ = new_size;
-  }
-
-  size_t size_ = 2;
-  size_t count_ = 0;
-  std::vector<Node> buckets_ = {Node{}, Node{}};
 };
 
 class StringAtomSet {

diff --git a/src/gn/unique_vector.h b/src/gn/unique_vector.h
index c53ed3a..ce5380b 100644
--- a/src/gn/unique_vector.h
+++ b/src/gn/unique_vector.h

@@ -11,80 +11,65 @@
 #include <unordered_set>
 #include <vector>
 
-namespace internal {
+#include "hash_table_base.h"
 
-// This class allows us to insert things by reference into a hash table which
-// avoids copies. The hash function of a UniquifyRef is that of the object it
-// points to.
-//
-// There are two ways it can store data: (1) by (vector*, index) which is used
-// to refer to the array in UniqueVector and make it work even when the
-// underlying vector is reallocated; (2) by T* which is used to do lookups
-// into the hash table of things that aren't in a vector yet.
-//
-// It also caches the hash value which allows us to query and then insert
-// without recomputing the hash.
-template <typename T>
-class UniquifyRef {
+// A HashTableBase node type used by all UniqueVector<> instantiations.
+// The node stores the item's hash value and its index plus 1, in order
+// to follow the HashTableBase requirements (i.e. zero-initializable null
+// value).
+struct UniqueVectorNode {
+  size_t hash;
+  size_t index_plus1;
+
+  size_t hash_value() const { return hash; }
+
+  bool is_valid() const { return !is_null(); }
+
+  bool is_null() const { return index_plus1 == 0; }
+
+  // Do not support deletion, making lookup faster.
+  static constexpr bool is_tombstone() { return false; }
+
+  // Return vector index.
+  size_t index() const { return index_plus1 - 1u; }
+
+  // Create new instance from hash value and vector index.
+  static UniqueVectorNode Make(size_t hash, size_t index) {
+    return {hash, index + 1u};
+  }
+};
+
+using UniqueVectorHashTableBase = HashTableBase<UniqueVectorNode>;
+
+// A common HashSet implementation used by all UniqueVector instantiations.
+class UniqueVectorHashSet : public UniqueVectorHashTableBase {
  public:
-  UniquifyRef() = default;
+  using BaseType = UniqueVectorHashTableBase;
+  using Node = BaseType::Node;
 
-  // Initialize with a pointer to a value.
-  explicit UniquifyRef(const T* v) : value_(v) { FillHashValue(); }
-
-  // Initialize with an array + index.
-  UniquifyRef(const std::vector<T>* v, size_t i) : vect_(v), index_(i) {
-    FillHashValue();
+  // Specialized Lookup() template function.
+  // |hash| is the hash value for |item|.
+  // |item| is the item search key being looked up.
+  // |vector| is containing vector for existing items.
+  //
+  // Returns a non-null mutable Node pointer.
+  template <typename T>
+  Node* Lookup(size_t hash, const T& item, const std::vector<T>& vector) const {
+    return BaseType::NodeLookup(hash, [&](const Node* node) {
+      return node->hash == hash && vector[node->index()] == item;
+    });
   }
 
-  // Initialize with an array + index and a known hash value to prevent
-  // re-hashing.
-  UniquifyRef(const std::vector<T>* v, size_t i, size_t hash_value)
-      : vect_(v), index_(i), hash_val_(hash_value) {}
-
-  const T& value() const { return value_ ? *value_ : (*vect_)[index_]; }
-  size_t hash_val() const { return hash_val_; }
-  size_t index() const { return index_; }
-
- private:
-  void FillHashValue() {
-    std::hash<T> h;
-    hash_val_ = h(value());
+  // Specialized Insert() function that converts |index| into the proper
+  // UniqueVectorKey type.
+  void Insert(Node* node, size_t hash, size_t index) {
+    *node = Node::Make(hash, index);
+    BaseType::UpdateAfterInsert();
   }
 
-  // When non-null, points to the object.
-  const T* value_ = nullptr;
-
-  // When value is null these are used.
-  const std::vector<T>* vect_ = nullptr;
-  size_t index_ = static_cast<size_t>(-1);
-
-  size_t hash_val_ = 0;
+  void Clear() { NodeClear(); }
 };
 
-template <typename T>
-inline bool operator==(const UniquifyRef<T>& a, const UniquifyRef<T>& b) {
-  return a.value() == b.value();
-}
-
-template <typename T>
-inline bool operator<(const UniquifyRef<T>& a, const UniquifyRef<T>& b) {
-  return a.value() < b.value();
-}
-
-}  // namespace internal
-
-namespace std {
-
-template <typename T>
-struct hash<internal::UniquifyRef<T>> {
-  std::size_t operator()(const internal::UniquifyRef<T>& v) const {
-    return v.hash_val();
-  }
-};
-
-}  // namespace std
-
 // An ordered set optimized for GN's usage. Such sets are used to store lists
 // of configs and libraries, and are appended to but not randomly inserted
 // into.
@@ -100,38 +85,38 @@
   bool empty() const { return vector_.empty(); }
   void clear() {
     vector_.clear();
-    set_.clear();
+    set_.Clear();
   }
   void reserve(size_t s) { vector_.reserve(s); }
 
   const T& operator[](size_t index) const { return vector_[index]; }
 
   const_iterator begin() const { return vector_.begin(); }
-  iterator begin() { return vector_.begin(); }
   const_iterator end() const { return vector_.end(); }
-  iterator end() { return vector_.end(); }
 
   // Returns true if the item was appended, false if it already existed (and
   // thus the vector was not modified).
   bool push_back(const T& t) {
-    Ref ref(&t);
-    if (set_.find(ref) != set_.end())
+    size_t hash = std::hash<T>()(t);
+    auto* node = set_.Lookup(hash, t, vector_);
+    if (node->is_valid()) {
       return false;  // Already have this one.
+    }
 
     vector_.push_back(t);
-    set_.insert(Ref(&vector_, vector_.size() - 1, ref.hash_val()));
+    set_.Insert(node, hash, vector_.size() - 1);
     return true;
   }
 
   bool push_back(T&& t) {
-    Ref ref(&t);
-    if (set_.find(ref) != set_.end())
+    size_t hash = std::hash<T>()(t);
+    auto* node = set_.Lookup(hash, t, vector_);
+    if (node->is_valid()) {
       return false;  // Already have this one.
+    }
 
-    auto ref_hash_val = ref.hash_val();  // Save across moving t.
-
-    vector_.push_back(std::move(t));  // Invalidates |ref|.
-    set_.insert(Ref(&vector_, vector_.size() - 1, ref_hash_val));
+    vector_.push_back(std::move(t));
+    set_.Insert(node, hash, vector_.size() - 1);
     return true;
   }
 
@@ -145,19 +130,14 @@
   // Returns the index of the item matching the given value in the list, or
   // (size_t)(-1) if it's not found.
   size_t IndexOf(const T& t) const {
-    Ref ref(&t);
-    typename HashSet::const_iterator found = set_.find(ref);
-    if (found == set_.end())
-      return static_cast<size_t>(-1);
-    return found->index();
+    size_t hash = std::hash<T>()(t);
+    auto* node = set_.Lookup(hash, t, vector_);
+    return node->index();
   }
 
  private:
-  using Ref = internal::UniquifyRef<T>;
-  using HashSet = std::unordered_set<Ref>;
-
-  HashSet set_;
   Vector vector_;
+  UniqueVectorHashSet set_;
 };
 
 #endif  // TOOLS_GN_UNIQUE_VECTOR_H_
commit	07854239257cc3eff8759503246595d2629f9367	[log] [tgz]
author	David 'Digit' Turner <digit@google.com>	Mon Feb 03 18:12:12 2020 +0100
committer	Commit Bot <commit-bot@chromium.org>	Wed Mar 11 17:25:40 2020 +0000
tree	b55ad7e85222381c37bc143d7f45187e9a469be6
parent	992e927e217baa8a74e6e2c5d7417cb65cf24824 [diff]