Teach gn to handle systems with > 64 processors

Previously, when run on Windows systems with > 64 logical processors,
gn's calculation of the number of available processors (used to
determine how many worker threads to create) under-counted the number of
processors, thereby not making full use of the system's available
processing power.

This change corrects gn's count of available CPUs so that it can use all
of the system's processors. In addition to creating the correct number
of workers, this change also distributes created workers between
processor groups, a necessary step when utilizing more than 64
processors on Windows.

On the 72-logical-processor P920, this change makes gn gen use all 36
cores, rather than only 18.

Bug: crbug.com/982982
Change-Id: I40b40f4d34b634566991e4bebf686f48836445e8
Reviewed-on: https://gn-review.googlesource.com/c/gn/+/5660
Commit-Queue: Scott Graham <scottmg@chromium.org>
Reviewed-by: Scott Graham <scottmg@chromium.org>
diff --git a/util/sys_info.cc b/util/sys_info.cc
index a1ce3e9..5fefc9b 100644
--- a/util/sys_info.cc
+++ b/util/sys_info.cc
@@ -72,9 +72,7 @@
 
   return static_cast<int>(res);
 #elif defined(OS_WIN)
-  SYSTEM_INFO system_info = {};
-  ::GetNativeSystemInfo(&system_info);
-  return system_info.dwNumberOfProcessors;
+  return ::GetActiveProcessorCount(ALL_PROCESSOR_GROUPS);
 #else
 #error
 #endif
diff --git a/util/worker_pool.cc b/util/worker_pool.cc
index 92fadd4..60bcfbf 100644
--- a/util/worker_pool.cc
+++ b/util/worker_pool.cc
@@ -7,10 +7,50 @@
 #include "base/command_line.h"
 #include "base/strings/string_number_conversions.h"
 #include "tools/gn/switches.h"
+#include "util/build_config.h"
 #include "util/sys_info.h"
 
+#if defined(OS_WIN)
+#include <windows.h>
+#endif
+
 namespace {
 
+#if defined(OS_WIN)
+class ProcessorGroupSetter {
+ public:
+  void SetProcessorGroup(std::thread* thread);
+
+ private:
+  int group_ = 0;
+  GROUP_AFFINITY group_affinity_;
+  int num_available_cores_in_group_ = ::GetActiveProcessorCount(group_) / 2;
+  const int num_groups_ = ::GetActiveProcessorGroupCount();
+};
+
+void ProcessorGroupSetter::SetProcessorGroup(std::thread* thread) {
+  if (num_groups_ <= 1)
+    return;
+
+  const HANDLE thread_handle = thread->native_handle();
+  ::GetThreadGroupAffinity(thread_handle, &group_affinity_);
+  group_affinity_.Group = group_;
+  const bool success =
+      ::SetThreadGroupAffinity(thread_handle, &group_affinity_, nullptr);
+  DCHECK(success);
+
+  // Move to next group once one thread has been assigned per core in |group_|.
+  num_available_cores_in_group_--;
+  if (num_available_cores_in_group_ <= 0) {
+    group_++;
+    if (group_ >= num_groups_) {
+      group_ = 0;
+    }
+    num_available_cores_in_group_ = ::GetActiveProcessorCount(group_) / 2;
+  }
+}
+#endif
+
 int GetThreadCount() {
   std::string thread_count =
       base::CommandLine::ForCurrentProcess()->GetSwitchValueASCII(
@@ -46,9 +86,22 @@
 WorkerPool::WorkerPool() : WorkerPool(GetThreadCount()) {}
 
 WorkerPool::WorkerPool(size_t thread_count) : should_stop_processing_(false) {
+#if defined(OS_WIN)
+  ProcessorGroupSetter processor_group_setter;
+#endif
+
   threads_.reserve(thread_count);
-  for (size_t i = 0; i < thread_count; ++i)
+  for (size_t i = 0; i < thread_count; ++i) {
     threads_.emplace_back([this]() { Worker(); });
+
+#if defined(OS_WIN)
+    // Set thread processor group. This is needed for systems with more than 64
+    // logical processors, wherein available processors are divided into groups,
+    // and applications that need to use more than one group's processors must
+    // manually assign their threads to groups.
+    processor_group_setter.SetProcessorGroup(&threads_.back());
+#endif
+  }
 }
 
 WorkerPool::~WorkerPool() {