Import Cobalt 24.master.0.1033952
diff --git a/third_party/llvm-project/libcxx/include/__format/unicode.h b/third_party/llvm-project/libcxx/include/__format/unicode.h
index 4327258..12aed50 100644
--- a/third_party/llvm-project/libcxx/include/__format/unicode.h
+++ b/third_party/llvm-project/libcxx/include/__format/unicode.h
@@ -11,11 +11,15 @@
 #define _LIBCPP___FORMAT_UNICODE_H
 
 #include <__assert>
+#include <__bit/countl.h>
+#include <__concepts/same_as.h>
 #include <__config>
 #include <__format/extended_grapheme_cluster_table.h>
+#include <__iterator/concepts.h>
+#include <__iterator/readable_traits.h> // iter_value_t
 #include <__type_traits/make_unsigned.h>
 #include <__utility/unreachable.h>
-#include <bit>
+#include <string_view>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
@@ -23,27 +27,32 @@
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-#if _LIBCPP_STD_VER > 17
+#if _LIBCPP_STD_VER >= 20
 
 namespace __unicode {
 
-#  if _LIBCPP_STD_VER > 20
+// Helper struct for the result of a consume operation.
+//
+// The status value for a correct code point is 0. This allows a valid value to
+// be used without masking.
+// When the decoding fails it know the number of code units affected. For the
+// current use-cases that value is not needed, therefore it is not stored.
+// The escape routine needs the number of code units for both a valid and
+// invalid character and keeps track of it itself. Doing it in this result
+// unconditionally would give some overhead when the value is unneeded.
+struct __consume_result {
+  // When __status == __ok it contains the decoded code point.
+  // Else it contains the replacement character U+FFFD
+  char32_t __code_point : 31;
 
-/// The result of consuming a code point using P2286' semantics
-///
-/// TODO FMT Combine __consume and  __consume_p2286 in one function.
-struct __consume_p2286_result {
-  // A size of 0 means well formed. This to differenciate between
-  // a valid code point and a code unit that's invalid like 0b11111xxx.
-  int __ill_formed_size;
-
-  // If well formed the consumed code point.
-  // Otherwise the ill-formed code units as unsigned 8-bit values. They are
-  // stored in reverse order, to make it easier to extract the values.
-  char32_t __value;
+  enum : char32_t {
+    // Consumed a well-formed code point.
+    __ok = 0,
+    // Encountered invalid UTF-8
+    __error = 1
+  } __status : 1 {__ok};
 };
-
-#  endif // _LIBCPP_STD_VER > 20
+static_assert(sizeof(__consume_result) == sizeof(char32_t));
 
 #  ifndef _LIBCPP_HAS_NO_UNICODE
 
@@ -62,7 +71,39 @@
 
 inline constexpr char32_t __replacement_character = U'\ufffd';
 
-_LIBCPP_HIDE_FROM_ABI constexpr bool __is_continuation(const char* __char, int __count) {
+// The error of a consume operation.
+//
+// This sets the code point to the replacement character. This code point does
+// not participate in the grapheme clustering, so grapheme clustering code can
+// ignore the error status and always use the code point.
+inline constexpr __consume_result __consume_result_error{__replacement_character, __consume_result::__error};
+
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool __is_high_surrogate(char32_t __value) {
+  return __value >= 0xd800 && __value <= 0xdbff;
+}
+
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool __is_low_surrogate(char32_t __value) {
+  return __value >= 0xdc00 && __value <= 0xdfff;
+}
+
+// https://www.unicode.org/glossary/#surrogate_code_point
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr bool __is_surrogate(char32_t __value) {
+  return __value >= 0xd800 && __value <= 0xdfff;
+}
+
+// https://www.unicode.org/glossary/#code_point
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr bool __is_code_point(char32_t __value) {
+  return __value <= 0x10ffff;
+}
+
+// https://www.unicode.org/glossary/#unicode_scalar_value
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr bool __is_scalar_value(char32_t __value) {
+  return __unicode::__is_code_point(__value) && !__unicode::__is_surrogate(__value);
+}
+
+template <contiguous_iterator _Iterator>
+  requires same_as<iter_value_t<_Iterator>, char>
+_LIBCPP_HIDE_FROM_ABI constexpr bool __is_continuation(_Iterator __char, int __count) {
   do {
     if ((*__char & 0b1000'0000) != 0b1000'0000)
       return false;
@@ -82,68 +123,38 @@
 /// UTF-8 specialization.
 template <>
 class __code_point_view<char> {
+  using _Iterator = basic_string_view<char>::const_iterator;
+
 public:
-  _LIBCPP_HIDE_FROM_ABI constexpr explicit __code_point_view(const char* __first, const char* __last)
+  _LIBCPP_HIDE_FROM_ABI constexpr explicit __code_point_view(_Iterator __first, _Iterator __last)
       : __first_(__first), __last_(__last) {}
 
   _LIBCPP_HIDE_FROM_ABI constexpr bool __at_end() const noexcept { return __first_ == __last_; }
-  _LIBCPP_HIDE_FROM_ABI constexpr const char* __position() const noexcept { return __first_; }
+  _LIBCPP_HIDE_FROM_ABI constexpr _Iterator __position() const noexcept { return __first_; }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr char32_t __consume() noexcept {
-    _LIBCPP_ASSERT(__first_ != __last_, "can't move beyond the end of input");
-
-    // Based on the number of leading 1 bits the number of code units in the
-    // code point can be determined. See
-    // https://en.wikipedia.org/wiki/UTF-8#Encoding
-    switch (_VSTD::countl_one(static_cast<unsigned char>(*__first_))) {
-    case 0:
-      return *__first_++;
-
-    case 2:
-      if (__last_ - __first_ < 2 || !__unicode::__is_continuation(__first_ + 1, 1)) [[unlikely]]
-        break;
-      else {
-        char32_t __value = static_cast<unsigned char>(*__first_++) & 0x1f;
-        __value <<= 6;
-        __value |= static_cast<unsigned char>(*__first_++) & 0x3f;
-        return __value;
-      }
-
-    case 3:
-      if (__last_ - __first_ < 3 || !__unicode::__is_continuation(__first_ + 1, 2)) [[unlikely]]
-        break;
-      else {
-        char32_t __value = static_cast<unsigned char>(*__first_++) & 0x0f;
-        __value <<= 6;
-        __value |= static_cast<unsigned char>(*__first_++) & 0x3f;
-        __value <<= 6;
-        __value |= static_cast<unsigned char>(*__first_++) & 0x3f;
-        return __value;
-      }
-
-    case 4:
-      if (__last_ - __first_ < 4 || !__unicode::__is_continuation(__first_ + 1, 3)) [[unlikely]]
-        break;
-      else {
-        char32_t __value = static_cast<unsigned char>(*__first_++) & 0x07;
-        __value <<= 6;
-        __value |= static_cast<unsigned char>(*__first_++) & 0x3f;
-        __value <<= 6;
-        __value |= static_cast<unsigned char>(*__first_++) & 0x3f;
-        __value <<= 6;
-        __value |= static_cast<unsigned char>(*__first_++) & 0x3f;
-        return __value;
-      }
-    }
-    // An invalid number of leading ones can be garbage or a code unit in the
-    // middle of a code point. By consuming one code unit the parser may get
-    // "in sync" after a few code units.
-    ++__first_;
-    return __replacement_character;
-  }
-
-#    if _LIBCPP_STD_VER > 20
-  _LIBCPP_HIDE_FROM_ABI constexpr __consume_p2286_result __consume_p2286() noexcept {
+  // https://www.unicode.org/versions/latest/ch03.pdf#G7404
+  // Based on Table 3-7, Well-Formed UTF-8 Byte Sequences
+  //
+  // Code Points        First Byte Second Byte Third Byte Fourth Byte  Remarks
+  // U+0000..U+007F     00..7F                                         U+0000..U+007F 1 code unit range
+  //                    C0..C1     80..BF                              invalid overlong encoding
+  // U+0080..U+07FF     C2..DF     80..BF                              U+0080..U+07FF 2 code unit range
+  //                    E0         80..9F      80..BF                  invalid overlong encoding
+  // U+0800..U+0FFF     E0         A0..BF      80..BF                  U+0800..U+FFFF 3 code unit range
+  // U+1000..U+CFFF     E1..EC     80..BF      80..BF
+  // U+D000..U+D7FF     ED         80..9F      80..BF
+  // U+D800..U+DFFF     ED         A0..BF      80..BF                  invalid encoding of surrogate code point
+  // U+E000..U+FFFF     EE..EF     80..BF      80..BF
+  //                    F0         80..8F      80..BF     80..BF       invalid overlong encoding
+  // U+10000..U+3FFFF   F0         90..BF      80..BF     80..BF       U+10000..U+10FFFF 4 code unit range
+  // U+40000..U+FFFFF   F1..F3     80..BF      80..BF     80..BF
+  // U+100000..U+10FFFF F4         80..8F      80..BF     80..BF
+  //                    F4         90..BF      80..BF     80..BF       U+110000.. invalid code point range
+  //
+  // Unlike other parsers, these invalid entries are tested after decoding.
+  // - The parser always needs to consume these code units
+  // - The code is optimized for well-formed UTF-8
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr __consume_result __consume() noexcept {
     _LIBCPP_ASSERT(__first_ != __last_, "can't move beyond the end of input");
 
     // Based on the number of leading 1 bits the number of code units in the
@@ -151,64 +162,77 @@
     // https://en.wikipedia.org/wiki/UTF-8#Encoding
     switch (std::countl_one(static_cast<unsigned char>(*__first_))) {
     case 0:
-      return {0, static_cast<unsigned char>(*__first_++)};
+      return {static_cast<unsigned char>(*__first_++)};
 
-    case 2:
-      if (__last_ - __first_ < 2) [[unlikely]]
+    case 2: {
+      if (__last_ - __first_ < 2 || !__unicode::__is_continuation(__first_ + 1, 1)) [[unlikely]]
         break;
 
-      if (__unicode::__is_continuation(__first_ + 1, 1)) {
-        char32_t __value = static_cast<unsigned char>(*__first_++) & 0x1f;
-        __value <<= 6;
-        __value |= static_cast<unsigned char>(*__first_++) & 0x3f;
-        return {0, __value};
-      }
-      break;
+      char32_t __value = static_cast<unsigned char>(*__first_++) & 0x1f;
+      __value <<= 6;
+      __value |= static_cast<unsigned char>(*__first_++) & 0x3f;
 
-    case 3:
-      if (__last_ - __first_ < 3) [[unlikely]]
+      // These values should be encoded in 1 UTF-8 code unit.
+      if (__value < 0x0080) [[unlikely]]
+        return __consume_result_error;
+
+      return {__value};
+    }
+
+    case 3: {
+      if (__last_ - __first_ < 3 || !__unicode::__is_continuation(__first_ + 1, 2)) [[unlikely]]
         break;
 
-      if (__unicode::__is_continuation(__first_ + 1, 2)) {
-        char32_t __value = static_cast<unsigned char>(*__first_++) & 0x0f;
-        __value <<= 6;
-        __value |= static_cast<unsigned char>(*__first_++) & 0x3f;
-        __value <<= 6;
-        __value |= static_cast<unsigned char>(*__first_++) & 0x3f;
-        return {0, __value};
-      }
-      break;
+      char32_t __value = static_cast<unsigned char>(*__first_++) & 0x0f;
+      __value <<= 6;
+      __value |= static_cast<unsigned char>(*__first_++) & 0x3f;
+      __value <<= 6;
+      __value |= static_cast<unsigned char>(*__first_++) & 0x3f;
 
-    case 4:
-      if (__last_ - __first_ < 4) [[unlikely]]
+      // These values should be encoded in 1 or 2 UTF-8 code units.
+      if (__value < 0x0800) [[unlikely]]
+        return __consume_result_error;
+
+      // A surrogate value is always encoded in 3 UTF-8 code units.
+      if (__unicode::__is_surrogate(__value)) [[unlikely]]
+        return __consume_result_error;
+
+      return {__value};
+    }
+
+    case 4: {
+      if (__last_ - __first_ < 4 || !__unicode::__is_continuation(__first_ + 1, 3)) [[unlikely]]
         break;
 
-      if (__unicode::__is_continuation(__first_ + 1, 3)) {
-        char32_t __value = static_cast<unsigned char>(*__first_++) & 0x07;
-        __value <<= 6;
-        __value |= static_cast<unsigned char>(*__first_++) & 0x3f;
-        __value <<= 6;
-        __value |= static_cast<unsigned char>(*__first_++) & 0x3f;
-        __value <<= 6;
-        __value |= static_cast<unsigned char>(*__first_++) & 0x3f;
+      char32_t __value = static_cast<unsigned char>(*__first_++) & 0x07;
+      __value <<= 6;
+      __value |= static_cast<unsigned char>(*__first_++) & 0x3f;
+      __value <<= 6;
+      __value |= static_cast<unsigned char>(*__first_++) & 0x3f;
+      __value <<= 6;
+      __value |= static_cast<unsigned char>(*__first_++) & 0x3f;
 
-        if (__value > 0x10FFFF) // Outside the valid Unicode range?
-          return {4, __value};
+      // These values should be encoded in 1, 2, or 3 UTF-8 code units.
+      if (__value < 0x10000) [[unlikely]]
+        return __consume_result_error;
 
-        return {0, __value};
-      }
-      break;
+      // A value too large is always encoded in 4 UTF-8 code units.
+      if (!__unicode::__is_code_point(__value)) [[unlikely]]
+        return __consume_result_error;
+
+      return {__value};
+    }
     }
     // An invalid number of leading ones can be garbage or a code unit in the
     // middle of a code point. By consuming one code unit the parser may get
     // "in sync" after a few code units.
-    return {1, static_cast<unsigned char>(*__first_++)};
+    ++__first_;
+    return __consume_result_error;
   }
-#    endif // _LIBCPP_STD_VER > 20
 
 private:
-  const char* __first_;
-  const char* __last_;
+  _Iterator __first_;
+  _Iterator __last_;
 };
 
 #    ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
@@ -225,75 +249,48 @@
 /// - 4 UTF-32 (for example Linux)
 template <>
 class __code_point_view<wchar_t> {
+  using _Iterator = typename basic_string_view<wchar_t>::const_iterator;
+
 public:
   static_assert(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4, "sizeof(wchar_t) has a not implemented value");
 
-  _LIBCPP_HIDE_FROM_ABI constexpr explicit __code_point_view(const wchar_t* __first, const wchar_t* __last)
+  _LIBCPP_HIDE_FROM_ABI constexpr explicit __code_point_view(_Iterator __first, _Iterator __last)
       : __first_(__first), __last_(__last) {}
 
-  _LIBCPP_HIDE_FROM_ABI constexpr const wchar_t* __position() const noexcept { return __first_; }
+  _LIBCPP_HIDE_FROM_ABI constexpr _Iterator __position() const noexcept { return __first_; }
   _LIBCPP_HIDE_FROM_ABI constexpr bool __at_end() const noexcept { return __first_ == __last_; }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr char32_t __consume() noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr __consume_result __consume() noexcept {
     _LIBCPP_ASSERT(__first_ != __last_, "can't move beyond the end of input");
 
+    char32_t __value = static_cast<char32_t>(*__first_++);
     if constexpr (sizeof(wchar_t) == 2) {
-      char32_t __result = *__first_++;
-      // Is the code unit part of a surrogate pair? See
-      // https://en.wikipedia.org/wiki/UTF-16#U+D800_to_U+DFFF
-      if (__result >= 0xd800 && __result <= 0xDfff) {
-        // Malformed Unicode.
-        if (__first_ == __last_) [[unlikely]]
-          return __replacement_character;
+      if (__unicode::__is_low_surrogate(__value)) [[unlikely]]
+        return __consume_result_error;
 
-        __result -= 0xd800;
-        __result <<= 10;
-        __result += *__first_++ - 0xdc00;
-        __result += 0x10000;
+      if (__unicode::__is_high_surrogate(__value)) {
+        if (__first_ == __last_ || !__unicode::__is_low_surrogate(static_cast<char32_t>(*__first_))) [[unlikely]]
+          return __consume_result_error;
+
+        __value -= 0xd800;
+        __value <<= 10;
+        __value += static_cast<char32_t>(*__first_++) - 0xdc00;
+        __value += 0x10000;
+
+        if (!__unicode::__is_code_point(__value)) [[unlikely]]
+          return __consume_result_error;
       }
-      return __result;
-
-    } else if constexpr (sizeof(wchar_t) == 4) {
-      char32_t __result = *__first_++;
-      if (__result > 0x10FFFF) [[unlikely]]
-        return __replacement_character;
-      return __result;
     } else {
-      __libcpp_unreachable();
-    }
-  }
-
-#      if _LIBCPP_STD_VER > 20
-  _LIBCPP_HIDE_FROM_ABI constexpr __consume_p2286_result __consume_p2286() noexcept {
-    _LIBCPP_ASSERT(__first_ != __last_, "can't move beyond the end of input");
-
-    char32_t __result = *__first_++;
-    if constexpr (sizeof(wchar_t) == 2) {
-      // https://en.wikipedia.org/wiki/UTF-16#U+D800_to_U+DFFF
-      if (__is_surrogate_pair_high(__result)) {
-        // Malformed Unicode.
-        if (__first_ == __last_ || !__is_surrogate_pair_low(*(__first_ + 1))) [[unlikely]]
-          return {1, __result};
-
-        __result -= 0xd800;
-        __result <<= 10;
-        __result += *__first_++ - 0xdc00;
-        __result += 0x10000;
-      } else if (__is_surrogate_pair_low(__result))
-        // A code point shouldn't start with the low surrogate pair
-        return {1, __result};
-    } else {
-      if (__result > 0x10FFFF) [[unlikely]]
-        return {1, __result};
+      if (!__unicode::__is_scalar_value(__value)) [[unlikely]]
+        return __consume_result_error;
     }
 
-    return {0, __result};
+    return {__value};
   }
-#      endif // _LIBCPP_STD_VER > 20
 
 private:
-  const wchar_t* __first_;
-  const wchar_t* __last_;
+  _Iterator __first_;
+  _Iterator __last_;
 };
 #    endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
 
@@ -384,10 +381,12 @@
 /// Therefore only this code point is extracted.
 template <class _CharT>
 class __extended_grapheme_cluster_view {
+  using _Iterator = typename basic_string_view<_CharT>::const_iterator;
+
 public:
-  _LIBCPP_HIDE_FROM_ABI constexpr explicit __extended_grapheme_cluster_view(const _CharT* __first, const _CharT* __last)
+  _LIBCPP_HIDE_FROM_ABI constexpr explicit __extended_grapheme_cluster_view(_Iterator __first, _Iterator __last)
       : __code_point_view_(__first, __last),
-        __next_code_point_(__code_point_view_.__consume()),
+        __next_code_point_(__code_point_view_.__consume().__code_point),
         __next_prop_(__extended_grapheme_custer_property_boundary::__get_property(__next_code_point_)) {}
 
   struct __cluster {
@@ -401,13 +400,14 @@
     ///
     /// It's expected the caller has the start position and thus can determine
     /// the code unit range of the extended grapheme cluster.
-    const _CharT* __last_;
+    _Iterator __last_;
   };
 
   _LIBCPP_HIDE_FROM_ABI constexpr __cluster __consume() {
     _LIBCPP_ASSERT(
         __next_prop_ != __extended_grapheme_custer_property_boundary::__property::__eot,
         "can't move beyond the end of input");
+
     char32_t __code_point = __next_code_point_;
     if (!__code_point_view_.__at_end())
       return {__code_point, __get_break()};
@@ -422,17 +422,17 @@
   char32_t __next_code_point_;
   __extended_grapheme_custer_property_boundary::__property __next_prop_;
 
-  _LIBCPP_HIDE_FROM_ABI constexpr const _CharT* __get_break() {
+  _LIBCPP_HIDE_FROM_ABI constexpr _Iterator __get_break() {
     bool __ri_break_allowed         = true;
     bool __has_extened_pictographic = false;
     while (true) {
-      const _CharT* __result                                          = __code_point_view_.__position();
+      _Iterator __result                                              = __code_point_view_.__position();
       __extended_grapheme_custer_property_boundary::__property __prev = __next_prop_;
       if (__code_point_view_.__at_end()) {
         __next_prop_ = __extended_grapheme_custer_property_boundary::__property::__eot;
         return __result;
       }
-      __next_code_point_ = __code_point_view_.__consume();
+      __next_code_point_ = __code_point_view_.__consume().__code_point;
       __next_prop_       = __extended_grapheme_custer_property_boundary::__get_property(__next_code_point_);
 
       __has_extened_pictographic |=
@@ -444,8 +444,8 @@
   }
 };
 
-template <class _CharT>
-__extended_grapheme_cluster_view(const _CharT*, const _CharT*) -> __extended_grapheme_cluster_view<_CharT>;
+template <contiguous_iterator _Iterator>
+__extended_grapheme_cluster_view(_Iterator, _Iterator) -> __extended_grapheme_cluster_view<iter_value_t<_Iterator>>;
 
 #  else //  _LIBCPP_HAS_NO_UNICODE
 
@@ -453,36 +453,30 @@
 // This makes it easier to write code agnostic of the _LIBCPP_HAS_NO_UNICODE define.
 template <class _CharT>
 class __code_point_view {
+  using _Iterator = typename basic_string_view<_CharT>::const_iterator;
+
 public:
-  _LIBCPP_HIDE_FROM_ABI constexpr explicit __code_point_view(const _CharT* __first, const _CharT* __last)
+  _LIBCPP_HIDE_FROM_ABI constexpr explicit __code_point_view(_Iterator __first, _Iterator __last)
       : __first_(__first), __last_(__last) {}
 
   _LIBCPP_HIDE_FROM_ABI constexpr bool __at_end() const noexcept { return __first_ == __last_; }
-  _LIBCPP_HIDE_FROM_ABI constexpr const _CharT* __position() const noexcept { return __first_; }
+  _LIBCPP_HIDE_FROM_ABI constexpr _Iterator __position() const noexcept { return __first_; }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr char32_t __consume() noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr __consume_result __consume() noexcept {
     _LIBCPP_ASSERT(__first_ != __last_, "can't move beyond the end of input");
-    return *__first_++;
+    return {static_cast<char32_t>(*__first_++)};
   }
 
-#    if _LIBCPP_STD_VER > 20
-  _LIBCPP_HIDE_FROM_ABI constexpr __consume_p2286_result __consume_p2286() noexcept {
-    _LIBCPP_ASSERT(__first_ != __last_, "can't move beyond the end of input");
-
-    return {0, std::make_unsigned_t<_CharT>(*__first_++)};
-  }
-#    endif // _LIBCPP_STD_VER > 20
-
 private:
-  const _CharT* __first_;
-  const _CharT* __last_;
+  _Iterator __first_;
+  _Iterator __last_;
 };
 
 #  endif //  _LIBCPP_HAS_NO_UNICODE
 
 } // namespace __unicode
 
-#endif //_LIBCPP_STD_VER > 17
+#endif //_LIBCPP_STD_VER >= 20
 
 _LIBCPP_END_NAMESPACE_STD