Import cobalt 25.master.0.1034729
diff --git a/url/BUILD.gn b/url/BUILD.gn
index a09bbe6..3d27a69 100644
--- a/url/BUILD.gn
+++ b/url/BUILD.gn
@@ -1,18 +1,20 @@
-# Copyright (c) 2013 The Chromium Authors. All rights reserved.
+# Copyright 2013 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import("//build/buildflag_header.gni")
-import("//testing/test.gni")
import("//testing/libfuzzer/fuzzer_test.gni")
+import("//testing/test.gni")
import("features.gni")
-if (is_android) {
+import("//build/config/cronet/config.gni")
+
+if (is_android || is_robolectric) {
import("//build/config/android/rules.gni")
}
-buildflag_header("url_features") {
- header = "url_features.h"
+buildflag_header("buildflags") {
+ header = "buildflags.h"
flags = [ "USE_PLATFORM_ICU_ALTERNATIVES=$use_platform_icu_alternatives" ]
}
@@ -47,7 +49,8 @@
"url_canon_stdurl.cc",
"url_constants.cc",
"url_constants.h",
- "url_export.h",
+ "url_features.cc",
+ "url_features.h",
"url_file.h",
"url_parse_file.cc",
"url_parse_internal.h",
@@ -56,25 +59,11 @@
"url_util_internal.h",
]
- defines = [ "URL_IMPLEMENTATION" ]
+ defines = [ "IS_URL_IMPL" ]
- configs += [
- # TODO(jschuh): crbug.com/167187 fix size_t to int truncations.
- "//build/config/compiler:no_size_t_to_int_warning",
- ]
+ public_deps = [ "//base" ]
- if (is_starboard) {
- configs += [ "//build/config/compiler:chromium_code" ]
- }
-
- deps = [
- "//base",
- "//base/third_party/dynamic_annotations",
- ]
-
- if (is_starboard) {
- deps += [ "//starboard/common" ]
- }
+ deps = [ "//base/third_party/dynamic_annotations" ]
if (is_win) {
# Don't conflict with Windows' "url.dll".
@@ -86,7 +75,7 @@
if (is_android) {
sources += [ "url_idna_icu_alternatives_android.cc" ]
deps += [
- ":url_features",
+ ":buildflags",
":url_java",
":url_jni_headers",
"//base",
@@ -105,30 +94,118 @@
"url_canon_icu.h",
"url_idna_icu.cc",
]
- deps += [ "//third_party/icu" ]
+ deps += [
+ "//base:i18n",
+ "//third_party/icu",
+ ]
}
}
-if (is_android && !is_starboard) {
- android_library("url_java") {
- java_files = [ "android/java/src/org/chromium/url/IDNStringUtil.java" ]
- deps = [
- "//base:base_java",
- ]
- }
-
+if ((is_android || is_robolectric) && !use_cobalt_customizations) {
generate_jni("url_jni_headers") {
sources = [
"android/java/src/org/chromium/url/IDNStringUtil.java",
+ "android/java/src/org/chromium/url/Origin.java",
]
- jni_package = "url"
+ }
+
+ generate_jni("gurl_jni_headers") {
+ sources = [
+ "android/java/src/org/chromium/url/GURL.java",
+ "android/java/src/org/chromium/url/Parsed.java",
+ ]
+ }
+
+ source_set("gurl_android") {
+ sources = [
+ "android/gurl_android.cc",
+ "android/gurl_android.h",
+ "android/parsed_android.cc",
+ "android/parsed_android.h",
+ ]
+
+ deps = [
+ ":gurl_jni_headers",
+ ":url",
+ "//base:base",
+ ]
+
+ if (is_robolectric) {
+ # Make jni.h available.
+ configs += [ "//third_party/jdk" ]
+ }
+ }
+
+ static_library("origin_android") {
+ sources = [ "android/origin_android.cc" ]
+
+ deps = [
+ ":gurl_android",
+ ":url",
+ ":url_jni_headers",
+ "//base",
+ ]
}
}
-if (!is_starboard || !is_win) {
- target(gtest_target_type, "url_unittests") {
- testonly = true
+if (is_android && !use_cobalt_customizations) {
+ android_library("url_java") {
+ sources = [ "android/java/src/org/chromium/url/IDNStringUtil.java" ]
+ deps = [ "//base:jni_java" ]
+ }
+}
+if (is_android && !is_cronet_build && !use_cobalt_customizations) {
+ android_library("gurl_java") {
+ sources = [
+ "android/java/src/org/chromium/url/GURL.java",
+ "android/java/src/org/chromium/url/Parsed.java",
+ "android/java/src/org/chromium/url/URI.java",
+ ]
+ deps = [
+ "//base:base_java",
+ "//base:jni_java",
+ "//build/android:build_java",
+ "//third_party/android_deps:com_google_errorprone_error_prone_annotations_java",
+ "//third_party/androidx:androidx_annotation_annotation_java",
+ "//url/mojom:url_mojom_gurl_java",
+ ]
+ annotation_processor_deps = [ "//base/android/jni_generator:jni_processor" ]
+ }
+
+ android_library("origin_java") {
+ sources = [ "android/java/src/org/chromium/url/Origin.java" ]
+ deps = [
+ ":gurl_java",
+ "//base:jni_java",
+ "//build/android:build_java",
+ "//mojo/public/java:bindings_java",
+ "//mojo/public/mojom/base:base_java",
+ "//url/mojom:url_mojom_origin_java",
+ ]
+ annotation_processor_deps = [ "//base/android/jni_generator:jni_processor" ]
+ }
+}
+
+source_set("url_test_support") {
+ testonly = true
+
+ sources = [
+ "gurl_abstract_tests.h",
+ "origin_abstract_tests.cc",
+ "origin_abstract_tests.h",
+ ]
+
+ public_deps = [
+ ":url",
+ "//base",
+ "//base/test:test_support",
+ "//testing/gmock",
+ "//testing/gtest",
+ ]
+}
+
+test("url_unittests") {
sources = [
"gurl_unittest.cc",
"origin_unittest.cc",
@@ -143,18 +220,16 @@
deps = [
":url",
+ ":url_test_support",
"//base",
"//base/test:test_support",
+ "//testing/gmock",
"//testing/gtest",
]
- if (!is_ios && !is_starboard) {
- sources += [ "mojom/url_gurl_mojom_traits_unittest.cc" ]
- }
-
if (use_platform_icu_alternatives) {
# Unit tests that are not supported by the current ICU alternatives on Android.
- if (is_android) {
+ if (is_android && !use_cobalt_customizations) {
sources -= [
"url_canon_icu_unittest.cc",
"url_canon_unittest.cc",
@@ -175,23 +250,39 @@
deps += [ "//third_party/icu:icuuc" ]
}
- # TODO(jschuh): crbug.com/167187 fix size_t to int truncations.
- configs += [ "//build/config/compiler:no_size_t_to_int_warning" ]
-
- if (!is_ios && !is_starboard) {
+ if (!is_ios && !is_cronet_build && !is_starboard) {
+ sources += [
+ "mojom/scheme_host_port_mojom_traits_unittest.cc",
+ "mojom/url_gurl_mojom_traits_unittest.cc",
+ ]
deps += [
"//mojo/core/embedder",
+ "//mojo/public/cpp/test_support:test_utils",
+ "//url/ipc:url_ipc_unittests",
+ "//url/mojom:mojom_traits",
"//url/mojom:test_url_mojom_gurl",
]
- }
}
}
-if (!is_starboard) {
-fuzzer_test("gurl_fuzzer") {
+if (!use_cobalt_customizations) {
+test("url_perftests") {
sources = [
- "gurl_fuzzer.cc",
+ "run_all_perftests.cc",
+ "url_parse_perftest.cc",
]
+
+ deps = [
+ ":url",
+ "//base",
+ "//base/test:test_support",
+ "//testing/gtest",
+ ]
+}
+}
+
+fuzzer_test("gurl_fuzzer") {
+ sources = [ "gurl_fuzzer.cc" ]
deps = [
":url",
"//base",
@@ -199,4 +290,118 @@
]
dict = "gurl_fuzzer.dict"
}
+
+if (is_android && !is_cronet_build && !use_cobalt_customizations) {
+ source_set("android_test_helper") {
+ testonly = true
+ sources = [
+ "android/gurl_java_test_helper.cc",
+ "android/origin_java_test_helper.cc",
+ ]
+ deps = [
+ ":gurl_android",
+ ":j_test_jni_headers",
+ ":origin_android",
+ ":url",
+ "//base/test:test_support",
+ ]
+ }
+
+ android_library("android_test_helper_java") {
+ testonly = true
+ annotation_processor_deps = [ "//base/android/jni_generator:jni_processor" ]
+ sources = [
+ "android/javatests/src/org/chromium/url/GURLJavaTestHelper.java",
+ "android/javatests/src/org/chromium/url/OriginJavaTestHelper.java",
+ ]
+ deps = [
+ ":gurl_java",
+ ":origin_java",
+ "//base:base_java_test_support",
+ "//base:jni_java",
+ ]
+ }
+
+ # Targets depending on gurl_junit_test_support do not need to bypass platform
+ # checks.
+ android_library("gurl_junit_test_support") {
+ testonly = true
+ sources = [ "android/test/java/src/org/chromium/url/JUnitTestGURLs.java" ]
+ deps = [ ":gurl_java" ]
+ }
+
+ # Unlike gurl_junit_test_support targets depending on gurl_junit_shadows must
+ # bypass platform checks.
+ robolectric_library("gurl_junit_shadows") {
+ sources = [ "android/test/java/src/org/chromium/url/ShadowGURL.java" ]
+ deps = [
+ ":gurl_java",
+ ":gurl_junit_test_support",
+ ]
+ }
+
+ android_library("url_java_unit_tests") {
+ testonly = true
+ sources = [
+ "android/javatests/src/org/chromium/url/GURLJavaTest.java",
+ "android/javatests/src/org/chromium/url/JUnitTestGURLsTest.java",
+ "android/javatests/src/org/chromium/url/OriginJavaTest.java",
+ ]
+ deps = [
+ ":android_test_helper_java",
+ ":gurl_java",
+ ":gurl_junit_test_support",
+ ":origin_java",
+ "//base:base_java",
+ "//base:base_java_test_support",
+ "//base:jni_java",
+ "//content/public/test/android:content_java_test_support",
+ "//mojo/public/mojom/base:base_java",
+ "//third_party/androidx:androidx_core_core_java",
+ "//third_party/androidx:androidx_test_runner_java",
+ "//third_party/junit",
+ "//third_party/mockito:mockito_java",
+ "//url/mojom:url_mojom_gurl_java",
+ "//url/mojom:url_mojom_origin_java",
+ ]
+ annotation_processor_deps = [ "//base/android/jni_generator:jni_processor" ]
+ }
+
+ # See https://bugs.chromium.org/p/chromium/issues/detail?id=908819 for why we
+ # can't put 'java' in the name here.
+ generate_jni("j_test_jni_headers") {
+ testonly = true
+ sources = [
+ "android/javatests/src/org/chromium/url/GURLJavaTestHelper.java",
+ "android/javatests/src/org/chromium/url/OriginJavaTestHelper.java",
+ ]
+ }
+
+ robolectric_library("gurl_junit_tests") {
+ sources = [ "android/junit/src/org/chromium/url/ShadowGURLTest.java" ]
+ deps = [
+ ":gurl_java",
+ ":gurl_junit_shadows",
+ ":gurl_junit_test_support",
+ "//base:base_java_test_support",
+ "//base:base_junit_test_support",
+ "//base/test:test_support_java",
+ "//third_party/junit",
+ ]
+ }
+}
+
+if (is_robolectric) {
+ # Use this in robolectric_binary() targets if you just need GURL and //base
+ # functionality. Otherwise, define a custom shared_library().
+ shared_library("libgurl_robolectric") {
+ sources = [ "android/robolectric_test_main.cc" ]
+ deps = [
+ "//base",
+ "//url:gurl_android",
+ ]
+
+ # Make jni.h available.
+ configs += [ "//third_party/jdk" ]
+ }
}
diff --git a/url/DEPS b/url/DEPS
index 8ee3d2d..166f6a2 100644
--- a/url/DEPS
+++ b/url/DEPS
@@ -1,6 +1,4 @@
include_rules = [
- "+jni",
-
# Limit files that can depend on icu.
"-base/i18n",
"-third_party/icu",
@@ -11,6 +9,7 @@
"+base/i18n",
],
"url_(canon|idna)_icu(\.cc|_unittest\.cc)": [
+ "+base/i18n",
"+third_party/icu",
],
"run_all_unittests\.cc": [
diff --git a/url/DIR_METADATA b/url/DIR_METADATA
new file mode 100644
index 0000000..16c80be
--- /dev/null
+++ b/url/DIR_METADATA
@@ -0,0 +1,11 @@
+# Metadata information for this directory.
+#
+# For more information on DIR_METADATA files, see:
+# https://source.chromium.org/chromium/infra/infra/+/main:go/src/infra/tools/dirmd/README.md
+#
+# For the schema of this file, see Metadata message:
+# https://source.chromium.org/chromium/infra/infra/+/main:go/src/infra/tools/dirmd/proto/dir_metadata.proto
+
+monorail {
+ component: "Blink>Network"
+}
\ No newline at end of file
diff --git a/url/METADATA b/url/METADATA
index 5d634cf..8985d66 100644
--- a/url/METADATA
+++ b/url/METADATA
@@ -5,20 +5,20 @@
third_party {
identifier {
type: "ChromiumVersion"
- value: "70.0.3538.110" # from https://chromereleases.googleblog.com/2018/11/stable-channel-update-for-desktop_19.html
+ value: "114.0.5735.358" # from https://chromereleases.googleblog.com/2024/03/long-term-support-channel-update-for_26.html
}
identifier {
type: "Git"
value: "https://chromium.googlesource.com/chromium/src.git"
- version: "ec3092c8d21e20e39698f5272c8a7f71c3ee88a2"
+ version: "1759c6ae9316996b9f150c0ce9d0ca78a3d15c02"
}
identifier {
type: "UpstreamSubdir"
value: "url"
}
last_upgrade_date {
- year: 2018
- month: 8
- day: 30
+ year: 2023
+ month: 11
+ day: 1
}
}
diff --git a/url/OWNERS b/url/OWNERS
index 7badfe9..58f2e5f 100644
--- a/url/OWNERS
+++ b/url/OWNERS
@@ -1,4 +1,9 @@
+set noparent
+# NOTE: keep this in sync with lsc-owners-override@chromium.org owners
+# by emailing lsc-policy@chromium.org when this list changes.
csharrison@chromium.org
dcheng@chromium.org
mkwst@chromium.org
-palmer@chromium.org
+timothygu@chromium.org
+# NOTE: keep this in sync with lsc-owners-override@chromium.org owners
+# by emailing lsc-policy@chromium.org when this list changes.
diff --git a/url/README.md b/url/README.md
index 412bd43..8d63fda 100644
--- a/url/README.md
+++ b/url/README.md
@@ -8,7 +8,7 @@
### Parsing
The `url_parse.*` files are the parser. This code does no string
-transformations. Its only job is to take an input string and splits out the
+transformations. Its only job is to take an input string and split out the
components of the URL as best as it can deduce them, for a given type of URL.
Parsing can never fail, it will take its best guess. This layer does not
have logic for determining the type of URL parsing to apply, that needs to
@@ -53,7 +53,7 @@
The main header to include is `url/url_util.h`.
-### GURL and Origin
+### Google URL (GURL) and Origin
At the highest layer, a C++ object for representing URLs is provided. This
object uses STL. Most uses need only this layer. Include `url/gurl.h`.
diff --git a/url/android/OWNERS b/url/android/OWNERS
new file mode 100644
index 0000000..c19374d
--- /dev/null
+++ b/url/android/OWNERS
@@ -0,0 +1 @@
+mthiesse@chromium.org
diff --git a/url/android/gurl_android.cc b/url/android/gurl_android.cc
new file mode 100644
index 0000000..bf398a1
--- /dev/null
+++ b/url/android/gurl_android.cc
@@ -0,0 +1,160 @@
+// Copyright 2019 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "url/android/gurl_android.h"
+
+#include <jni.h>
+
+#include <cstdint>
+#include <string>
+#include <vector>
+
+#include "base/android/jni_android.h"
+#include "base/android/jni_string.h"
+#include "base/functional/bind.h"
+#include "base/functional/callback.h"
+#include "base/memory/ptr_util.h"
+#include "url/android/parsed_android.h"
+#include "url/gurl_jni_headers/GURL_jni.h"
+#include "url/third_party/mozilla/url_parse.h"
+
+using base::android::AttachCurrentThread;
+using base::android::JavaParamRef;
+using base::android::JavaRef;
+using base::android::ScopedJavaLocalRef;
+
+namespace url {
+
+namespace {
+
+static GURL FromJString(JNIEnv* env, const JavaRef<jstring>& uri) {
+ if (!uri)
+ return GURL();
+ return GURL(base::android::ConvertJavaStringToUTF16(env, uri));
+}
+
+static std::unique_ptr<GURL> FromJavaGURL(JNIEnv* env,
+ const JavaRef<jstring>& j_spec,
+ bool is_valid,
+ jlong parsed_ptr) {
+ Parsed* parsed = reinterpret_cast<Parsed*>(parsed_ptr);
+ const std::string& spec = ConvertJavaStringToUTF8(env, j_spec);
+ std::unique_ptr<GURL> gurl =
+ std::make_unique<GURL>(spec.data(), parsed->Length(), *parsed, is_valid);
+ delete parsed;
+ return gurl;
+}
+
+static void InitFromGURL(JNIEnv* env,
+ const GURL& gurl,
+ const JavaRef<jobject>& target) {
+ Java_GURL_init(
+ env, target,
+ base::android::ConvertUTF8ToJavaString(env, gurl.possibly_invalid_spec()),
+ gurl.is_valid(),
+ ParsedAndroid::InitFromParsed(env,
+ gurl.parsed_for_possibly_invalid_spec()));
+}
+
+// As |GetArrayLength| makes no guarantees about the returned value (e.g., it
+// may be -1 if |array| is not a valid Java array), provide a safe wrapper
+// that always returns a valid, non-negative size.
+template <typename JavaArrayType>
+size_t SafeGetArrayLength(JNIEnv* env, const JavaRef<JavaArrayType>& jarray) {
+ DCHECK(jarray);
+ jsize length = env->GetArrayLength(jarray.obj());
+ DCHECK_GE(length, 0) << "Invalid array length: " << length;
+ return static_cast<size_t>(std::max(0, length));
+}
+
+} // namespace
+
+// static
+std::unique_ptr<GURL> GURLAndroid::ToNativeGURL(
+ JNIEnv* env,
+ const base::android::JavaRef<jobject>& j_gurl) {
+ return base::WrapUnique<GURL>(
+ reinterpret_cast<GURL*>(Java_GURL_toNativeGURL(env, j_gurl)));
+}
+
+void GURLAndroid::JavaGURLArrayToGURLVector(
+ JNIEnv* env,
+ const base::android::JavaRef<jobjectArray>& array,
+ std::vector<GURL>* out) {
+ DCHECK(out);
+ DCHECK(out->empty());
+ if (!array)
+ return;
+ size_t len = SafeGetArrayLength(env, array);
+ for (size_t i = 0; i < len; ++i) {
+ ScopedJavaLocalRef<jobject> j_gurl(
+ env, static_cast<jobject>(env->GetObjectArrayElement(array.obj(), i)));
+ out->emplace_back(
+ *reinterpret_cast<GURL*>(Java_GURL_toNativeGURL(env, j_gurl)));
+ }
+}
+
+// static
+ScopedJavaLocalRef<jobject> GURLAndroid::FromNativeGURL(JNIEnv* env,
+ const GURL& gurl) {
+ ScopedJavaLocalRef<jobject> j_gurl = Java_GURL_Constructor(env);
+ InitFromGURL(env, gurl, j_gurl);
+ return j_gurl;
+}
+
+// static
+ScopedJavaLocalRef<jobject> GURLAndroid::EmptyGURL(JNIEnv* env) {
+ return Java_GURL_emptyGURL(env);
+}
+
+// static
+ScopedJavaLocalRef<jobjectArray> GURLAndroid::ToJavaArrayOfGURLs(
+ JNIEnv* env,
+ base::span<ScopedJavaLocalRef<jobject>> v) {
+ jclass clazz = org_chromium_url_GURL_clazz(env);
+ DCHECK(clazz);
+ jobjectArray joa = env->NewObjectArray(v.size(), clazz, nullptr);
+ base::android::CheckException(env);
+
+ for (size_t i = 0; i < v.size(); ++i) {
+ env->SetObjectArrayElement(joa, i, v[i].obj());
+ }
+ return ScopedJavaLocalRef<jobjectArray>(env, joa);
+}
+
+static void JNI_GURL_GetOrigin(JNIEnv* env,
+ const JavaParamRef<jstring>& j_spec,
+ jboolean is_valid,
+ jlong parsed_ptr,
+ const JavaParamRef<jobject>& target) {
+ std::unique_ptr<GURL> gurl = FromJavaGURL(env, j_spec, is_valid, parsed_ptr);
+ InitFromGURL(env, gurl->DeprecatedGetOriginAsURL(), target);
+}
+
+static jboolean JNI_GURL_DomainIs(JNIEnv* env,
+ const JavaParamRef<jstring>& j_spec,
+ jboolean is_valid,
+ jlong parsed_ptr,
+ const JavaParamRef<jstring>& j_domain) {
+ std::unique_ptr<GURL> gurl = FromJavaGURL(env, j_spec, is_valid, parsed_ptr);
+ const std::string& domain = ConvertJavaStringToUTF8(env, j_domain);
+ return gurl->DomainIs(domain);
+}
+
+static void JNI_GURL_Init(JNIEnv* env,
+ const base::android::JavaParamRef<jstring>& uri,
+ const base::android::JavaParamRef<jobject>& target) {
+ const GURL& gurl = FromJString(env, uri);
+ InitFromGURL(env, gurl, target);
+}
+
+static jlong JNI_GURL_CreateNative(JNIEnv* env,
+ const JavaParamRef<jstring>& j_spec,
+ jboolean is_valid,
+ jlong parsed_ptr) {
+ return reinterpret_cast<intptr_t>(
+ FromJavaGURL(env, j_spec, is_valid, parsed_ptr).release());
+}
+
+} // namespace url
diff --git a/url/android/gurl_android.h b/url/android/gurl_android.h
new file mode 100644
index 0000000..8b35607
--- /dev/null
+++ b/url/android/gurl_android.h
@@ -0,0 +1,36 @@
+// Copyright 2019 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_ANDROID_GURL_ANDROID_H_
+#define URL_ANDROID_GURL_ANDROID_H_
+
+#include <memory>
+
+#include "base/android/scoped_java_ref.h"
+#include "base/containers/span.h"
+#include "url/gurl.h"
+
+namespace url {
+
+class GURLAndroid {
+ public:
+ static std::unique_ptr<GURL> ToNativeGURL(
+ JNIEnv* env,
+ const base::android::JavaRef<jobject>& j_gurl);
+ static base::android::ScopedJavaLocalRef<jobject> FromNativeGURL(
+ JNIEnv* env,
+ const GURL& gurl);
+ static base::android::ScopedJavaLocalRef<jobject> EmptyGURL(JNIEnv* env);
+ static base::android::ScopedJavaLocalRef<jobjectArray> ToJavaArrayOfGURLs(
+ JNIEnv* env,
+ base::span<base::android::ScopedJavaLocalRef<jobject>> v);
+ static void JavaGURLArrayToGURLVector(
+ JNIEnv* env,
+ const base::android::JavaRef<jobjectArray>& gurl_array,
+ std::vector<GURL>* out);
+};
+
+} // namespace url
+
+#endif // URL_ANDROID_GURL_ANDROID_H_
diff --git a/url/android/gurl_java_test_helper.cc b/url/android/gurl_java_test_helper.cc
new file mode 100644
index 0000000..5ad1b44
--- /dev/null
+++ b/url/android/gurl_java_test_helper.cc
@@ -0,0 +1,72 @@
+// Copyright 2019 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <stddef.h>
+
+#include "base/android/jni_android.h"
+#include "base/android/jni_string.h"
+#include "base/test/icu_test_util.h"
+#include "url/android/gurl_android.h"
+#include "url/gurl.h"
+#include "url/j_test_jni_headers/GURLJavaTestHelper_jni.h"
+
+using base::android::AttachCurrentThread;
+
+namespace url {
+
+static void JNI_GURLJavaTestHelper_InitializeICU(JNIEnv* env) {
+ base::test::InitializeICUForTesting();
+}
+
+static void JNI_GURLJavaTestHelper_TestGURLEquivalence(JNIEnv* env) {
+ const char* cases[] = {
+ // Common Standard URLs.
+ "https://www.google.com",
+ "https://www.google.com/",
+ "https://www.google.com/maps.htm",
+ "https://www.google.com/maps/",
+ "https://www.google.com/index.html",
+ "https://www.google.com/index.html?q=maps",
+ "https://www.google.com/index.html#maps/",
+ "https://foo:bar@www.google.com/maps.htm",
+ "https://www.google.com/maps/au/index.html",
+ "https://www.google.com/maps/au/north",
+ "https://www.google.com/maps/au/north/",
+ "https://www.google.com/maps/au/index.html?q=maps#fragment/",
+ "http://www.google.com:8000/maps/au/index.html?q=maps#fragment/",
+ "https://www.google.com/maps/au/north/?q=maps#fragment",
+ "https://www.google.com/maps/au/north?q=maps#fragment",
+ // Less common standard URLs.
+ "filesystem:http://www.google.com/temporary/bar.html?baz=22",
+ "file:///temporary/bar.html?baz=22",
+ "ftp://foo/test/index.html",
+ "gopher://foo/test/index.html",
+ "ws://foo/test/index.html",
+ // Non-standard,
+ "chrome://foo/bar.html",
+ "httpa://foo/test/index.html",
+ "blob:https://foo.bar/test/index.html",
+ "about:blank",
+ "data:foobar",
+ "scheme:opaque_data",
+ // Invalid URLs.
+ "foobar",
+ };
+ for (const char* uri : cases) {
+ GURL gurl(uri);
+ base::android::ScopedJavaLocalRef<jobject> j_gurl =
+ Java_GURLJavaTestHelper_createGURL(
+ env, base::android::ConvertUTF8ToJavaString(env, uri));
+ std::unique_ptr<GURL> gurl2 = GURLAndroid::ToNativeGURL(env, j_gurl);
+ if (gurl != *gurl2) {
+ std::stringstream ss;
+ ss << "GURL not equivalent: " << gurl << ", " << *gurl2;
+ env->ThrowNew(env->FindClass("java/lang/AssertionError"),
+ ss.str().data());
+ return;
+ }
+ }
+}
+
+} // namespace url
diff --git a/url/android/java/src/org/chromium/url/GURL.java b/url/android/java/src/org/chromium/url/GURL.java
new file mode 100644
index 0000000..34bd924
--- /dev/null
+++ b/url/android/java/src/org/chromium/url/GURL.java
@@ -0,0 +1,413 @@
+// Copyright 2019 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package org.chromium.url;
+
+import android.os.SystemClock;
+import android.text.TextUtils;
+
+import androidx.annotation.Nullable;
+import androidx.annotation.VisibleForTesting;
+
+import com.google.errorprone.annotations.DoNotMock;
+
+import org.chromium.base.Log;
+import org.chromium.base.ThreadUtils;
+import org.chromium.base.annotations.CalledByNative;
+import org.chromium.base.annotations.JNINamespace;
+import org.chromium.base.annotations.NativeMethods;
+import org.chromium.base.library_loader.LibraryLoader;
+import org.chromium.base.metrics.RecordHistogram;
+import org.chromium.base.task.PostTask;
+import org.chromium.base.task.TaskTraits;
+import org.chromium.build.annotations.MainDex;
+import org.chromium.url.mojom.Url;
+import org.chromium.url.mojom.UrlConstants;
+
+import java.util.Random;
+
+/**
+ * An immutable Java wrapper for GURL, Chromium's URL parsing library.
+ *
+ * This class is safe to use during startup, but will block on the native library being sufficiently
+ * loaded to use native GURL (and will not wait for content initialization). In practice it's very
+ * unlikely that this will actually block startup unless used extremely early, in which case you
+ * should probably seek an alternative solution to using GURL.
+ *
+ * The design of this class avoids destruction/finalization by caching all values necessary to
+ * reconstruct a GURL in Java, allowing it to be much faster in the common case and easier to use.
+ */
+@JNINamespace("url")
+@MainDex
+@DoNotMock("Create a real instance instead. For Robolectric, see JUnitTestGURLs.java")
+public class GURL {
+ private static final String TAG = "GURL";
+ /* package */ static final int SERIALIZER_VERSION = 1;
+ /* package */ static final char SERIALIZER_DELIMITER = '\0';
+
+ @FunctionalInterface
+ public interface ReportDebugThrowableCallback {
+ void run(Throwable throwable);
+ }
+
+ /**
+ * Exception signalling that a GURL failed to parse due to an unexpected version marker in the
+ * serialized input.
+ */
+ public static class BadSerializerVersionException extends RuntimeException {}
+
+ // Right now this is only collecting reports on Canary which has a relatively small population.
+ private static final int DEBUG_REPORT_PERCENTAGE = 10;
+ private static ReportDebugThrowableCallback sReportCallback;
+
+ // TODO(https://crbug.com/1039841): Right now we return a new String with each request for a
+ // GURL component other than the spec itself. Should we cache return Strings (as
+ // WeakReference?) so that callers can share String memory?
+ private String mSpec;
+ private boolean mIsValid;
+ private Parsed mParsed;
+
+ private static class Holder { private static GURL sEmptyGURL = new GURL(""); }
+
+ @CalledByNative
+ public static GURL emptyGURL() {
+ return Holder.sEmptyGURL;
+ }
+
+ /**
+ * Create a new GURL.
+ *
+ * @param uri The string URI representation to parse into a GURL.
+ */
+ public GURL(String uri) {
+ // Avoid a jni hop (and initializing the native library) for empty GURLs.
+ if (TextUtils.isEmpty(uri)) {
+ mSpec = "";
+ mParsed = Parsed.createEmpty();
+ return;
+ }
+ ensureNativeInitializedForGURL();
+ getNatives().init(uri, this);
+ }
+
+ @CalledByNative
+ protected GURL() {}
+
+ /**
+ * Enables debug stack trace gathering for GURL.
+ */
+ public static void setReportDebugThrowableCallback(ReportDebugThrowableCallback callback) {
+ sReportCallback = callback;
+ }
+
+ /**
+ * Ensures that the native library is sufficiently loaded for GURL usage.
+ *
+ * This function is public so that GURL-related usage like the UrlFormatter also counts towards
+ * the "Startup.Android.GURLEnsureMainDexInitialized" histogram.
+ */
+ public static void ensureNativeInitializedForGURL() {
+ if (LibraryLoader.getInstance().isInitialized()) return;
+ long time = SystemClock.elapsedRealtime();
+ LibraryLoader.getInstance().ensureMainDexInitialized();
+ // Record metrics only for the UI thread where the delay in loading the library is relevant.
+ if (ThreadUtils.runningOnUiThread()) {
+ // "MainDex" in name of histogram is a dated reference to when we used to have 2
+ // sections of the native library, main dex and non-main dex. Maintaining name for
+ // consistency in metrics.
+ RecordHistogram.recordTimesHistogram("Startup.Android.GURLEnsureMainDexInitialized",
+ SystemClock.elapsedRealtime() - time);
+ if (sReportCallback != null && new Random().nextInt(100) < DEBUG_REPORT_PERCENTAGE) {
+ final Throwable throwable =
+ new Throwable("This is not a crash, please ignore. See crbug.com/1065377.");
+ // This isn't an assert, because by design this is possible, but we would prefer
+ // this path does not get hit more than necessary and getting stack traces from the
+ // wild will help find issues.
+ PostTask.postTask(TaskTraits.BEST_EFFORT_MAY_BLOCK,
+ () -> { sReportCallback.run(throwable); });
+ }
+ }
+ }
+
+ /** @return true if the GURL is null, empty, or invalid. */
+ public static boolean isEmptyOrInvalid(@Nullable GURL gurl) {
+ return gurl == null || gurl.isEmpty() || !gurl.isValid();
+ }
+
+ @CalledByNative
+ private void init(String spec, boolean isValid, Parsed parsed) {
+ mSpec = spec;
+ // Ensure that the spec only contains US-ASCII or the parsed indices will be wrong.
+ assert mSpec.matches("\\A\\p{ASCII}*\\z");
+ mIsValid = isValid;
+ mParsed = parsed;
+ }
+
+ @CalledByNative
+ private long toNativeGURL() {
+ return getNatives().createNative(mSpec, mIsValid, mParsed.toNativeParsed());
+ }
+
+ /**
+ * See native GURL::is_valid().
+ */
+ public boolean isValid() {
+ return mIsValid;
+ }
+
+ /**
+ * See native GURL::spec().
+ */
+ public String getSpec() {
+ if (isValid() || mSpec.isEmpty()) return mSpec;
+ assert false : "Trying to get the spec of an invalid URL!";
+ return "";
+ }
+
+ /**
+ * @return Either a valid Spec (see {@link #getSpec}), or an empty string.
+ */
+ public String getValidSpecOrEmpty() {
+ if (isValid()) return mSpec;
+ return "";
+ }
+
+ /**
+ * See native GURL::possibly_invalid_spec().
+ */
+ public String getPossiblyInvalidSpec() {
+ return mSpec;
+ }
+
+ private String getComponent(int begin, int length) {
+ if (length <= 0) return "";
+ return mSpec.substring(begin, begin + length);
+ }
+
+ /**
+ * See native GURL::scheme().
+ */
+ public String getScheme() {
+ return getComponent(mParsed.mSchemeBegin, mParsed.mSchemeLength);
+ }
+
+ /**
+ * See native GURL::username().
+ */
+ public String getUsername() {
+ return getComponent(mParsed.mUsernameBegin, mParsed.mUsernameLength);
+ }
+
+ /**
+ * See native GURL::password().
+ */
+ public String getPassword() {
+ return getComponent(mParsed.mPasswordBegin, mParsed.mPasswordLength);
+ }
+
+ /**
+ * See native GURL::host().
+ */
+ public String getHost() {
+ return getComponent(mParsed.mHostBegin, mParsed.mHostLength);
+ }
+
+ /**
+ * See native GURL::port().
+ *
+ * Note: Do not convert this to an integer yourself. See native GURL::IntPort().
+ */
+ public String getPort() {
+ return getComponent(mParsed.mPortBegin, mParsed.mPortLength);
+ }
+
+ /**
+ * See native GURL::path().
+ */
+ public String getPath() {
+ return getComponent(mParsed.mPathBegin, mParsed.mPathLength);
+ }
+
+ /**
+ * See native GURL::query().
+ */
+ public String getQuery() {
+ return getComponent(mParsed.mQueryBegin, mParsed.mQueryLength);
+ }
+
+ /**
+ * See native GURL::ref().
+ */
+ public String getRef() {
+ return getComponent(mParsed.mRefBegin, mParsed.mRefLength);
+ }
+
+ /**
+ * @return Whether the GURL is the empty String.
+ */
+ public boolean isEmpty() {
+ return mSpec.isEmpty();
+ }
+
+ /**
+ * See native GURL::GetOrigin().
+ */
+ public GURL getOrigin() {
+ GURL target = new GURL();
+ getOriginInternal(target);
+ return target;
+ }
+
+ protected void getOriginInternal(GURL target) {
+ getNatives().getOrigin(mSpec, mIsValid, mParsed.toNativeParsed(), target);
+ }
+
+ /**
+ * See native GURL::DomainIs().
+ */
+ public boolean domainIs(String domain) {
+ return getNatives().domainIs(mSpec, mIsValid, mParsed.toNativeParsed(), domain);
+ }
+
+ @Override
+ public final int hashCode() {
+ return mSpec.hashCode();
+ }
+
+ @Override
+ public final boolean equals(Object other) {
+ if (other == this) return true;
+ if (!(other instanceof GURL)) return false;
+ return mSpec.equals(((GURL) other).mSpec);
+ }
+
+ /**
+ * Serialize a GURL to a String, to be used with {@link GURL#deserialize(String)}.
+ *
+ * Note that a serialized GURL should only be used internally to Chrome, and should *never* be
+ * used if coming from an untrusted source.
+ *
+ * @return A serialzed GURL.
+ */
+ public final String serialize() {
+ StringBuilder builder = new StringBuilder();
+ builder.append(SERIALIZER_VERSION).append(SERIALIZER_DELIMITER);
+ builder.append(mIsValid).append(SERIALIZER_DELIMITER);
+ builder.append(mParsed.serialize()).append(SERIALIZER_DELIMITER);
+ builder.append(mSpec);
+ String serialization = builder.toString();
+ return Integer.toString(serialization.length()) + SERIALIZER_DELIMITER + serialization;
+ }
+
+ /**
+ * Deserialize a GURL serialized with {@link GURL#serialize()}. This will re-parse in case of
+ * version mismatch, which may trigger undesired native loading. {@see
+ * deserializeLatestVersionOnly} if you want to fail in case of version mismatch.
+ *
+ * This function should *never* be used on a String coming from an untrusted source.
+ *
+ * @return The deserialized GURL (or null if the input is empty).
+ */
+ public static GURL deserialize(@Nullable String gurl) {
+ try {
+ return deserializeLatestVersionOnly(gurl);
+ } catch (BadSerializerVersionException be) {
+ // Just re-parse the GURL on version changes.
+ String[] tokens = gurl.split(Character.toString(SERIALIZER_DELIMITER));
+ return new GURL(getSpecFromTokens(gurl, tokens));
+ } catch (Exception e) {
+ // This is unexpected, maybe the storage got corrupted somehow?
+ Log.w(TAG, "Exception while deserializing a GURL: " + gurl, e);
+ return emptyGURL();
+ }
+ }
+
+ /**
+ * Deserialize a GURL serialized with {@link #serialize()}, throwing {@code
+ * BadSerializerException} if the serialized input has a version other than the latest. This
+ * function should never be used on a String coming from an untrusted source.
+ */
+ public static GURL deserializeLatestVersionOnly(@Nullable String gurl) {
+ if (TextUtils.isEmpty(gurl)) return emptyGURL();
+ String[] tokens = gurl.split(Character.toString(SERIALIZER_DELIMITER));
+
+ // First token MUST always be the length of the serialized data.
+ String length = tokens[0];
+ if (gurl.length() != Integer.parseInt(length) + length.length() + 1) {
+ throw new IllegalArgumentException("Serialized GURL had the wrong length.");
+ }
+
+ String spec = getSpecFromTokens(gurl, tokens);
+ // Second token MUST always be the version number.
+ int version = Integer.parseInt(tokens[1]);
+ if (version != SERIALIZER_VERSION) {
+ throw new BadSerializerVersionException();
+ }
+
+ boolean isValid = Boolean.parseBoolean(tokens[2]);
+ Parsed parsed = Parsed.deserialize(tokens, 3);
+ GURL result = new GURL();
+ result.init(spec, isValid, parsed);
+ return result;
+ }
+
+ private static String getSpecFromTokens(String gurl, String[] tokens) {
+ // Last token MUST always be the original spec.
+ // Special case for empty spec - it won't get its own token.
+ return gurl.endsWith(Character.toString(SERIALIZER_DELIMITER)) ? ""
+ : tokens[tokens.length - 1];
+ }
+
+ /**
+ * Returns the instance of {@link Natives}. The Robolectric Shadow intercepts invocations of
+ * this method.
+ *
+ * <p>Unlike {@code GURLJni.TEST_HOOKS.setInstanceForTesting}, shadowing this method doesn't
+ * rely on tests correctly cleaning up global state.
+ */
+ private static Natives getNatives() {
+ return GURLJni.get();
+ }
+
+ /** Inits this GURL with the internal state of another GURL. */
+ @VisibleForTesting
+ /* package */ void initForTesting(GURL gurl) {
+ init(gurl.mSpec, gurl.mIsValid, gurl.mParsed);
+ }
+
+ /** @return A Mojom representation of this URL. */
+ public Url toMojom() {
+ Url url = new Url();
+ // See url/mojom/url_gurl_mojom_traits.cc.
+ url.url = TextUtils.isEmpty(getPossiblyInvalidSpec())
+ || getPossiblyInvalidSpec().length() > UrlConstants.MAX_URL_CHARS
+ || !isValid()
+ ? ""
+ : getPossiblyInvalidSpec();
+ return url;
+ }
+
+ @NativeMethods
+ interface Natives {
+ /**
+ * Initializes the provided |target| by parsing the provided |uri|.
+ */
+ void init(String uri, GURL target);
+
+ /**
+ * Reconstructs the native GURL for this Java GURL and initializes |target| with its Origin.
+ */
+ void getOrigin(String spec, boolean isValid, long nativeParsed, GURL target);
+
+ /**
+ * Reconstructs the native GURL for this Java GURL, and calls GURL.DomainIs.
+ */
+ boolean domainIs(String spec, boolean isValid, long nativeParsed, String domain);
+
+ /**
+ * Reconstructs the native GURL for this Java GURL, returning its native pointer.
+ */
+ long createNative(String spec, boolean isValid, long nativeParsed);
+ }
+}
diff --git a/url/android/java/src/org/chromium/url/IDNStringUtil.java b/url/android/java/src/org/chromium/url/IDNStringUtil.java
index 37d77dc..10957b6 100644
--- a/url/android/java/src/org/chromium/url/IDNStringUtil.java
+++ b/url/android/java/src/org/chromium/url/IDNStringUtil.java
@@ -1,4 +1,4 @@
-// Copyright 2014 The Chromium Authors. All rights reserved.
+// Copyright 2014 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
diff --git a/url/android/java/src/org/chromium/url/Origin.java b/url/android/java/src/org/chromium/url/Origin.java
new file mode 100644
index 0000000..87ce870
--- /dev/null
+++ b/url/android/java/src/org/chromium/url/Origin.java
@@ -0,0 +1,114 @@
+// Copyright 2019 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package org.chromium.url;
+
+import org.chromium.base.annotations.CalledByNative;
+import org.chromium.base.annotations.JNINamespace;
+import org.chromium.base.annotations.NativeMethods;
+
+/** An origin is either a (scheme, host, port) tuple or is opaque. */
+@JNINamespace("url")
+public class Origin {
+ private final String mScheme;
+ private final String mHost;
+ private final short mPort;
+
+ private final boolean mIsOpaque;
+
+ // Serialization of the Unguessable Token. Do not use directly.
+ private final long mTokenHighBits;
+ private final long mTokenLowBits;
+
+ /**
+ * Constructs an opaque origin.
+ */
+ public static Origin createOpaqueOrigin() {
+ return OriginJni.get().createOpaque();
+ }
+
+ /**
+ * See origin.h for many warnings about this method.
+ *
+ * Constructs an Origin from a GURL.
+ */
+ public static Origin create(GURL gurl) {
+ return OriginJni.get().createFromGURL(gurl);
+ }
+
+ /**
+ * Parses a mojo Origin into a Java analogue of the c++ Origin class.
+ *
+ * `org.chromium.url.internal.mojom.Origin`s, are provided by Mojo-generated code but not
+ * intended for direct use (see crbug.com/1156866).
+ *
+ * @return A Java equivalent of the c++ Origin represented by the provided mojo Origin.
+ */
+ public Origin(org.chromium.url.internal.mojom.Origin mojoOrigin) {
+ mScheme = mojoOrigin.scheme;
+ mHost = mojoOrigin.host;
+ mPort = mojoOrigin.port;
+ if (mojoOrigin.nonceIfOpaque != null) {
+ mIsOpaque = true;
+ mTokenHighBits = mojoOrigin.nonceIfOpaque.high;
+ mTokenLowBits = mojoOrigin.nonceIfOpaque.low;
+ } else {
+ mIsOpaque = false;
+ mTokenHighBits = 0;
+ mTokenLowBits = 0;
+ }
+ }
+
+ @CalledByNative
+ private Origin(String scheme, String host, short port, boolean isOpaque, long tokenHighBits,
+ long tokenLowBits) {
+ mScheme = scheme;
+ mHost = host;
+ mPort = port;
+ mIsOpaque = isOpaque;
+ mTokenHighBits = tokenHighBits;
+ mTokenLowBits = tokenLowBits;
+ }
+
+ /** @return The scheme of the origin. Returns an empty string for an opaque origin. */
+ public String getScheme() {
+ return !isOpaque() ? mScheme : "";
+ }
+
+ /** @return The host of the origin. Returns an empty string for an opaque origin. */
+ public String getHost() {
+ return !isOpaque() ? mHost : "";
+ }
+
+ /** @return The port of the origin. Returns 0 for an opaque origin. */
+ public int getPort() {
+ return !isOpaque() ? Short.toUnsignedInt(mPort) : 0;
+ }
+
+ /** @return Whether the origin is opaque. */
+ public boolean isOpaque() {
+ return mIsOpaque;
+ }
+
+ @CalledByNative
+ private long toNativeOrigin() {
+ return OriginJni.get().createNative(
+ mScheme, mHost, mPort, mIsOpaque, mTokenHighBits, mTokenLowBits);
+ }
+
+ @NativeMethods
+ interface Natives {
+ /** Constructs a new Opaque origin. */
+ Origin createOpaque();
+
+ /** Constructs an Origin from a GURL. */
+ Origin createFromGURL(GURL gurl);
+
+ /**
+ * Reconstructs the native Origin for this Java Origin, returning its native pointer.
+ */
+ long createNative(String scheme, String host, short port, boolean isOpaque,
+ long tokenHighBits, long tokenLowBits);
+ }
+}
diff --git a/url/android/java/src/org/chromium/url/Parsed.java b/url/android/java/src/org/chromium/url/Parsed.java
new file mode 100644
index 0000000..ca41cfb
--- /dev/null
+++ b/url/android/java/src/org/chromium/url/Parsed.java
@@ -0,0 +1,141 @@
+// Copyright 2019 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package org.chromium.url;
+
+import org.chromium.base.annotations.CalledByNative;
+import org.chromium.base.annotations.JNINamespace;
+import org.chromium.base.annotations.NativeMethods;
+import org.chromium.build.annotations.MainDex;
+
+/**
+ * A java wrapper for Parsed, GURL's internal parsed URI representation.
+ */
+@MainDex
+@JNINamespace("url")
+/* package */ class Parsed {
+ /* package */ final int mSchemeBegin;
+ /* package */ final int mSchemeLength;
+ /* package */ final int mUsernameBegin;
+ /* package */ final int mUsernameLength;
+ /* package */ final int mPasswordBegin;
+ /* package */ final int mPasswordLength;
+ /* package */ final int mHostBegin;
+ /* package */ final int mHostLength;
+ /* package */ final int mPortBegin;
+ /* package */ final int mPortLength;
+ /* package */ final int mPathBegin;
+ /* package */ final int mPathLength;
+ /* package */ final int mQueryBegin;
+ /* package */ final int mQueryLength;
+ /* package */ final int mRefBegin;
+ /* package */ final int mRefLength;
+ private final Parsed mInnerUrl;
+ private final boolean mPotentiallyDanglingMarkup;
+
+ /* package */ static Parsed createEmpty() {
+ return new Parsed(0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, false, null);
+ }
+
+ @CalledByNative
+ private Parsed(int schemeBegin, int schemeLength, int usernameBegin, int usernameLength,
+ int passwordBegin, int passwordLength, int hostBegin, int hostLength, int portBegin,
+ int portLength, int pathBegin, int pathLength, int queryBegin, int queryLength,
+ int refBegin, int refLength, boolean potentiallyDanglingMarkup, Parsed innerUrl) {
+ mSchemeBegin = schemeBegin;
+ mSchemeLength = schemeLength;
+ mUsernameBegin = usernameBegin;
+ mUsernameLength = usernameLength;
+ mPasswordBegin = passwordBegin;
+ mPasswordLength = passwordLength;
+ mHostBegin = hostBegin;
+ mHostLength = hostLength;
+ mPortBegin = portBegin;
+ mPortLength = portLength;
+ mPathBegin = pathBegin;
+ mPathLength = pathLength;
+ mQueryBegin = queryBegin;
+ mQueryLength = queryLength;
+ mRefBegin = refBegin;
+ mRefLength = refLength;
+ mPotentiallyDanglingMarkup = potentiallyDanglingMarkup;
+ mInnerUrl = innerUrl;
+ }
+
+ /* package */ long toNativeParsed() {
+ long inner = 0;
+ if (mInnerUrl != null) {
+ inner = mInnerUrl.toNativeParsed();
+ }
+ return ParsedJni.get().createNative(mSchemeBegin, mSchemeLength, mUsernameBegin,
+ mUsernameLength, mPasswordBegin, mPasswordLength, mHostBegin, mHostLength,
+ mPortBegin, mPortLength, mPathBegin, mPathLength, mQueryBegin, mQueryLength,
+ mRefBegin, mRefLength, mPotentiallyDanglingMarkup, inner);
+ }
+
+ /* package */ String serialize() {
+ StringBuilder builder = new StringBuilder();
+ builder.append(mSchemeBegin).append(GURL.SERIALIZER_DELIMITER);
+ builder.append(mSchemeLength).append(GURL.SERIALIZER_DELIMITER);
+ builder.append(mUsernameBegin).append(GURL.SERIALIZER_DELIMITER);
+ builder.append(mUsernameLength).append(GURL.SERIALIZER_DELIMITER);
+ builder.append(mPasswordBegin).append(GURL.SERIALIZER_DELIMITER);
+ builder.append(mPasswordLength).append(GURL.SERIALIZER_DELIMITER);
+ builder.append(mHostBegin).append(GURL.SERIALIZER_DELIMITER);
+ builder.append(mHostLength).append(GURL.SERIALIZER_DELIMITER);
+ builder.append(mPortBegin).append(GURL.SERIALIZER_DELIMITER);
+ builder.append(mPortLength).append(GURL.SERIALIZER_DELIMITER);
+ builder.append(mPathBegin).append(GURL.SERIALIZER_DELIMITER);
+ builder.append(mPathLength).append(GURL.SERIALIZER_DELIMITER);
+ builder.append(mQueryBegin).append(GURL.SERIALIZER_DELIMITER);
+ builder.append(mQueryLength).append(GURL.SERIALIZER_DELIMITER);
+ builder.append(mRefBegin).append(GURL.SERIALIZER_DELIMITER);
+ builder.append(mRefLength).append(GURL.SERIALIZER_DELIMITER);
+ builder.append(mPotentiallyDanglingMarkup).append(GURL.SERIALIZER_DELIMITER);
+ builder.append(mInnerUrl != null);
+ if (mInnerUrl != null) {
+ builder.append(GURL.SERIALIZER_DELIMITER).append(mInnerUrl.serialize());
+ }
+ return builder.toString();
+ }
+
+ /* package */ static Parsed deserialize(String[] tokens, int startIndex) {
+ int schemeBegin = Integer.parseInt(tokens[startIndex++]);
+ int schemeLength = Integer.parseInt(tokens[startIndex++]);
+ int usernameBegin = Integer.parseInt(tokens[startIndex++]);
+ int usernameLength = Integer.parseInt(tokens[startIndex++]);
+ int passwordBegin = Integer.parseInt(tokens[startIndex++]);
+ int passwordLength = Integer.parseInt(tokens[startIndex++]);
+ int hostBegin = Integer.parseInt(tokens[startIndex++]);
+ int hostLength = Integer.parseInt(tokens[startIndex++]);
+ int portBegin = Integer.parseInt(tokens[startIndex++]);
+ int portLength = Integer.parseInt(tokens[startIndex++]);
+ int pathBegin = Integer.parseInt(tokens[startIndex++]);
+ int pathLength = Integer.parseInt(tokens[startIndex++]);
+ int queryBegin = Integer.parseInt(tokens[startIndex++]);
+ int queryLength = Integer.parseInt(tokens[startIndex++]);
+ int refBegin = Integer.parseInt(tokens[startIndex++]);
+ int refLength = Integer.parseInt(tokens[startIndex++]);
+ boolean potentiallyDanglingMarkup = Boolean.parseBoolean(tokens[startIndex++]);
+ Parsed innerParsed = null;
+ if (Boolean.parseBoolean(tokens[startIndex++])) {
+ innerParsed = Parsed.deserialize(tokens, startIndex);
+ }
+ return new Parsed(schemeBegin, schemeLength, usernameBegin, usernameLength, passwordBegin,
+ passwordLength, hostBegin, hostLength, portBegin, portLength, pathBegin, pathLength,
+ queryBegin, queryLength, refBegin, refLength, potentiallyDanglingMarkup,
+ innerParsed);
+ }
+
+ @NativeMethods
+ interface Natives {
+ /**
+ * Create and return the pointer to a native Parsed.
+ */
+ long createNative(int schemeBegin, int schemeLength, int usernameBegin, int usernameLength,
+ int passwordBegin, int passwordLength, int hostBegin, int hostLength, int portBegin,
+ int portLength, int pathBegin, int pathLength, int queryBegin, int queryLength,
+ int refBegin, int refLength, boolean potentiallyDanglingMarkup, long innerUrl);
+ }
+}
diff --git a/url/android/java/src/org/chromium/url/URI.java b/url/android/java/src/org/chromium/url/URI.java
new file mode 100644
index 0000000..e83d615
--- /dev/null
+++ b/url/android/java/src/org/chromium/url/URI.java
@@ -0,0 +1,61 @@
+// Copyright 2019 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package org.chromium.url;
+
+import java.net.URISyntaxException;
+
+/**
+ * An API shim around GURL that mostly matches the java.net.URI API.
+ *
+ * @deprecated Please use GURL directly in new code.
+ */
+@Deprecated
+public class URI extends GURL {
+ /**
+ * Create a new GURL with a java.net.URI API shim.
+ */
+ public URI(String uri) throws URISyntaxException {
+ super(uri);
+ if (!isValid()) {
+ throw new URISyntaxException(uri, "Uri could not be parsed as a valid GURL");
+ }
+ }
+
+ private URI() {}
+
+ /**
+ * This function is a convenience wrapper around {@link URI#URI(String)}, that wraps the thrown
+ * thrown URISyntaxException in an IllegalArgumentException and throws that instead.
+ */
+ public static URI create(String str) {
+ try {
+ return new URI(str);
+ } catch (URISyntaxException e) {
+ throw new IllegalArgumentException(e);
+ }
+ }
+
+ @Override
+ public URI getOrigin() {
+ URI target = new URI();
+ getOriginInternal(target);
+ return target;
+ }
+
+ /** See {@link GURL#getRef()} */
+ public String getFragment() {
+ return getRef();
+ }
+
+ /** See {@link java.net.URI#isAbsolute()} */
+ public boolean isAbsolute() {
+ return !getScheme().isEmpty();
+ }
+
+ @Override
+ public String toString() {
+ return getPossiblyInvalidSpec();
+ }
+}
diff --git a/url/android/javatests/DEPS b/url/android/javatests/DEPS
new file mode 100644
index 0000000..aa93591
--- /dev/null
+++ b/url/android/javatests/DEPS
@@ -0,0 +1,3 @@
+include_rules = [
+ "+content/public/test/android",
+]
diff --git a/url/android/javatests/src/org/chromium/url/GURLJavaTest.java b/url/android/javatests/src/org/chromium/url/GURLJavaTest.java
new file mode 100644
index 0000000..e684e51
--- /dev/null
+++ b/url/android/javatests/src/org/chromium/url/GURLJavaTest.java
@@ -0,0 +1,314 @@
+// Copyright 2019 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package org.chromium.url;
+
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.Mockito.doThrow;
+
+import androidx.test.filters.SmallTest;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+
+import org.chromium.base.test.BaseJUnit4ClassRunner;
+import org.chromium.base.test.util.Batch;
+import org.chromium.content_public.browser.test.NativeLibraryTestUtils;
+
+import java.net.URISyntaxException;
+
+/**
+ * Tests for {@link GURL}. GURL relies heavily on the native implementation, and the lion's share of
+ * the logic is tested there. This test is primarily to make sure everything is plumbed through
+ * correctly.
+ */
+@RunWith(BaseJUnit4ClassRunner.class)
+@Batch(Batch.UNIT_TESTS)
+public class GURLJavaTest {
+ @Mock
+ GURL.Natives mGURLMocks;
+
+ @Before
+ public void setUp() {
+ MockitoAnnotations.initMocks(this);
+
+ NativeLibraryTestUtils.loadNativeLibraryNoBrowserProcess();
+ GURLJavaTestHelper.nativeInitializeICU();
+ }
+
+ /* package */ static void deepAssertEquals(GURL expected, GURL actual) {
+ Assert.assertEquals(expected, actual);
+ Assert.assertEquals(expected.getScheme(), actual.getScheme());
+ Assert.assertEquals(expected.getUsername(), actual.getUsername());
+ Assert.assertEquals(expected.getPassword(), actual.getPassword());
+ Assert.assertEquals(expected.getHost(), actual.getHost());
+ Assert.assertEquals(expected.getPort(), actual.getPort());
+ Assert.assertEquals(expected.getPath(), actual.getPath());
+ Assert.assertEquals(expected.getQuery(), actual.getQuery());
+ Assert.assertEquals(expected.getRef(), actual.getRef());
+ }
+
+ private String prependLengthToSerialization(String serialization) {
+ return Integer.toString(serialization.length()) + GURL.SERIALIZER_DELIMITER + serialization;
+ }
+
+ @SmallTest
+ @Test
+ public void testGURLEquivalence() {
+ GURLJavaTestHelper.nativeTestGURLEquivalence();
+ }
+
+ // Equivalent of GURLTest.Components
+ @SmallTest
+ @Test
+ @SuppressWarnings(value = "AuthLeak")
+ public void testComponents() {
+ GURL empty = new GURL("");
+ Assert.assertTrue(empty.isEmpty());
+ Assert.assertFalse(empty.isValid());
+
+ GURL url = new GURL("http://user:pass@google.com:99/foo;bar?q=a#ref");
+ Assert.assertFalse(url.isEmpty());
+ Assert.assertTrue(url.isValid());
+ Assert.assertTrue(url.getScheme().equals("http"));
+
+ Assert.assertEquals("http://user:pass@google.com:99/foo;bar?q=a#ref", url.getSpec());
+
+ Assert.assertEquals("http", url.getScheme());
+ Assert.assertEquals("user", url.getUsername());
+ Assert.assertEquals("pass", url.getPassword());
+ Assert.assertEquals("google.com", url.getHost());
+ Assert.assertEquals("99", url.getPort());
+ Assert.assertEquals("/foo;bar", url.getPath());
+ Assert.assertEquals("q=a", url.getQuery());
+ Assert.assertEquals("ref", url.getRef());
+
+ // Test parsing userinfo with special characters.
+ GURL urlSpecialPass = new GURL("http://user:%40!$&'()*+,;=:@google.com:12345");
+ Assert.assertTrue(urlSpecialPass.isValid());
+ // GURL canonicalizes some delimiters.
+ Assert.assertEquals("%40!$&%27()*+,%3B%3D%3A", urlSpecialPass.getPassword());
+ Assert.assertEquals("google.com", urlSpecialPass.getHost());
+ Assert.assertEquals("12345", urlSpecialPass.getPort());
+ }
+
+ // Equivalent of GURLTest.Empty
+ @SmallTest
+ @Test
+ public void testEmpty() {
+ GURLJni.TEST_HOOKS.setInstanceForTesting(mGURLMocks);
+ doThrow(new RuntimeException("Should not need to parse empty URL"))
+ .when(mGURLMocks)
+ .init(any(), any());
+ GURL url = new GURL("");
+ Assert.assertFalse(url.isValid());
+ Assert.assertEquals("", url.getSpec());
+
+ Assert.assertEquals("", url.getScheme());
+ Assert.assertEquals("", url.getUsername());
+ Assert.assertEquals("", url.getPassword());
+ Assert.assertEquals("", url.getHost());
+ Assert.assertEquals("", url.getPort());
+ Assert.assertEquals("", url.getPath());
+ Assert.assertEquals("", url.getQuery());
+ Assert.assertEquals("", url.getRef());
+ GURLJni.TEST_HOOKS.setInstanceForTesting(null);
+ }
+
+ // Test that GURL and URI return the correct Origin.
+ @SmallTest
+ @Test
+ @SuppressWarnings(value = "AuthLeak")
+ public void testOrigin() throws URISyntaxException {
+ final String kExpectedOrigin1 = "http://google.com:21/";
+ final String kExpectedOrigin2 = "";
+ GURL url1 = new GURL("filesystem:http://user:pass@google.com:21/blah#baz");
+ GURL url2 = new GURL("javascript:window.alert(\"hello,world\");");
+ URI uri = new URI("filesystem:http://user:pass@google.com:21/blah#baz");
+
+ Assert.assertEquals(kExpectedOrigin1, url1.getOrigin().getSpec());
+ Assert.assertEquals(kExpectedOrigin2, url2.getOrigin().getSpec());
+ URI origin = uri.getOrigin();
+ Assert.assertEquals(kExpectedOrigin1, origin.getSpec());
+ }
+
+ @SmallTest
+ @Test
+ public void testWideInput() throws URISyntaxException {
+ final String kExpectedSpec = "http://xn--1xa.com/";
+
+ GURL url = new GURL("http://\u03C0.com");
+ Assert.assertEquals(kExpectedSpec, url.getSpec());
+ Assert.assertEquals("http", url.getScheme());
+ Assert.assertEquals("", url.getUsername());
+ Assert.assertEquals("", url.getPassword());
+ Assert.assertEquals("xn--1xa.com", url.getHost());
+ Assert.assertEquals("", url.getPort());
+ Assert.assertEquals("/", url.getPath());
+ Assert.assertEquals("", url.getQuery());
+ Assert.assertEquals("", url.getRef());
+ }
+
+ @SmallTest
+ @Test
+ @SuppressWarnings(value = "AuthLeak")
+ public void testSerialization() {
+ GURL cases[] = {
+ // Common Standard URLs.
+ new GURL("https://www.google.com"),
+ new GURL("https://www.google.com/"),
+ new GURL("https://www.google.com/maps.htm"),
+ new GURL("https://www.google.com/maps/"),
+ new GURL("https://www.google.com/index.html"),
+ new GURL("https://www.google.com/index.html?q=maps"),
+ new GURL("https://www.google.com/index.html#maps/"),
+ new GURL("https://foo:bar@www.google.com/maps.htm"),
+ new GURL("https://www.google.com/maps/au/index.html"),
+ new GURL("https://www.google.com/maps/au/north"),
+ new GURL("https://www.google.com/maps/au/north/"),
+ new GURL("https://www.google.com/maps/au/index.html?q=maps#fragment/"),
+ new GURL("http://www.google.com:8000/maps/au/index.html?q=maps#fragment/"),
+ new GURL("https://www.google.com/maps/au/north/?q=maps#fragment"),
+ new GURL("https://www.google.com/maps/au/north?q=maps#fragment"),
+ // Less common standard URLs.
+ new GURL("filesystem:http://www.google.com/temporary/bar.html?baz=22"),
+ new GURL("file:///temporary/bar.html?baz=22"),
+ new GURL("ftp://foo/test/index.html"),
+ new GURL("gopher://foo/test/index.html"),
+ new GURL("ws://foo/test/index.html"),
+ // Non-standard,
+ new GURL("chrome://foo/bar.html"),
+ new GURL("httpa://foo/test/index.html"),
+ new GURL("blob:https://foo.bar/test/index.html"),
+ new GURL("about:blank"),
+ new GURL("data:foobar"),
+ new GURL("scheme:opaque_data"),
+ // Invalid URLs.
+ new GURL("foobar"),
+ // URLs containing the delimiter
+ new GURL("https://www.google.ca/" + GURL.SERIALIZER_DELIMITER + ",foo"),
+ new GURL("https://www.foo" + GURL.SERIALIZER_DELIMITER + "bar.com"),
+ };
+
+ GURLJni.TEST_HOOKS.setInstanceForTesting(mGURLMocks);
+ doThrow(new RuntimeException("Should not re-initialize for deserialization when the "
+ + "version hasn't changed."))
+ .when(mGURLMocks)
+ .init(any(), any());
+ for (GURL url : cases) {
+ GURL out = GURL.deserialize(url.serialize());
+ deepAssertEquals(url, out);
+ }
+ GURLJni.TEST_HOOKS.setInstanceForTesting(null);
+ }
+
+ /**
+ * Tests that we re-parse the URL from the spec, which must always be the last token in the
+ * serialization, if the serialization version differs.
+ */
+ @SmallTest
+ @Test
+ public void testSerializationWithVersionSkew() {
+ GURL url = new GURL("https://www.google.com");
+ String serialization = (GURL.SERIALIZER_VERSION + 1)
+ + ",0,0,0,0,foo,https://url.bad,blah,0,".replace(',', GURL.SERIALIZER_DELIMITER)
+ + url.getSpec();
+ serialization = prependLengthToSerialization(serialization);
+ GURL out = GURL.deserialize(serialization);
+ deepAssertEquals(url, out);
+ }
+
+ /**
+ * Tests that fields that aren't visible to java code are correctly serialized.
+ */
+ @SmallTest
+ @Test
+ public void testSerializationOfPrivateFields() {
+ String serialization = GURL.SERIALIZER_VERSION
+ + ",true,"
+ // Outer Parsed.
+ + "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,false,true,"
+ // Inner Parsed.
+ + "17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,true,false,"
+ + "chrome://foo/bar.html";
+ serialization = serialization.replace(',', GURL.SERIALIZER_DELIMITER);
+ serialization = prependLengthToSerialization(serialization);
+ GURL url = GURL.deserialize(serialization);
+ Assert.assertEquals(url.serialize(), serialization);
+ }
+
+ /**
+ * Tests serialized GURL truncated by storage.
+ */
+ @SmallTest
+ @Test
+ public void testTruncatedDeserialization() {
+ String serialization = "123,1,true,1,2,3,4,5,6,7,8,9,10";
+ serialization = serialization.replace(',', GURL.SERIALIZER_DELIMITER);
+ GURL url = GURL.deserialize(serialization);
+ Assert.assertEquals(url, GURL.emptyGURL());
+ }
+
+ /**
+ * Tests serialized GURL truncated by storage.
+ */
+ @SmallTest
+ @Test
+ public void testCorruptedSerializations() {
+ String serialization = new GURL("https://www.google.ca").serialize();
+ // Replace the scheme length (5) with an extra delimiter.
+ String corruptedParsed = serialization.replace('5', GURL.SERIALIZER_DELIMITER);
+ GURL url = GURL.deserialize(corruptedParsed);
+ Assert.assertEquals(GURL.emptyGURL(), url);
+
+ String corruptedVersion =
+ serialization.replaceFirst(Integer.toString(GURL.SERIALIZER_VERSION), "x");
+ url = GURL.deserialize(corruptedVersion);
+ Assert.assertEquals(GURL.emptyGURL(), url);
+ }
+
+ // Test that domainIs is hooked up correctly.
+ @SmallTest
+ @Test
+ public void testDomainIs() {
+ GURL url1 = new GURL("https://www.google.com");
+ GURL url2 = new GURL("https://www.notgoogle.com");
+
+ Assert.assertTrue(url1.domainIs("com"));
+ Assert.assertTrue(url2.domainIs("com"));
+ Assert.assertTrue(url1.domainIs("google.com"));
+ Assert.assertFalse(url2.domainIs("google.com"));
+
+ Assert.assertTrue(url1.domainIs("www.google.com"));
+ Assert.assertFalse(url1.domainIs("images.google.com"));
+ }
+
+ // Tests Mojom conversion.
+ @SmallTest
+ @Test
+ public void testMojomConvertion() {
+ // Valid:
+ Assert.assertEquals(
+ "https://www.google.com/", new GURL("https://www.google.com/").toMojom().url);
+
+ // Null:
+ Assert.assertEquals("", new GURL(null).toMojom().url);
+
+ // Empty:
+ Assert.assertEquals("", new GURL("").toMojom().url);
+
+ // Invalid:
+ Assert.assertEquals("", new GURL(new String(new byte[] {1, 1, 1})).toMojom().url);
+
+ // Too long.
+ Assert.assertEquals("",
+ new GURL("https://www.google.com/".concat("a".repeat(2 * 1024 * 1024)))
+ .toMojom()
+ .url);
+ }
+}
diff --git a/url/android/javatests/src/org/chromium/url/GURLJavaTestHelper.java b/url/android/javatests/src/org/chromium/url/GURLJavaTestHelper.java
new file mode 100644
index 0000000..975b009
--- /dev/null
+++ b/url/android/javatests/src/org/chromium/url/GURLJavaTestHelper.java
@@ -0,0 +1,34 @@
+// Copyright 2020 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package org.chromium.url;
+
+import org.chromium.base.annotations.CalledByNative;
+import org.chromium.base.annotations.JNINamespace;
+import org.chromium.base.annotations.NativeMethods;
+
+/**
+ * Helpers for GURLJavaTest that need to call into native code.
+ */
+@JNINamespace("url")
+public class GURLJavaTestHelper {
+ @CalledByNative
+ public static GURL createGURL(String uri) {
+ return new GURL(uri);
+ }
+
+ public static void nativeInitializeICU() {
+ GURLJavaTestHelperJni.get().initializeICU();
+ }
+
+ public static void nativeTestGURLEquivalence() {
+ GURLJavaTestHelperJni.get().testGURLEquivalence();
+ }
+
+ @NativeMethods
+ interface Natives {
+ void initializeICU();
+ void testGURLEquivalence();
+ }
+}
diff --git a/url/android/javatests/src/org/chromium/url/JUnitTestGURLsTest.java b/url/android/javatests/src/org/chromium/url/JUnitTestGURLsTest.java
new file mode 100644
index 0000000..a23967c
--- /dev/null
+++ b/url/android/javatests/src/org/chromium/url/JUnitTestGURLsTest.java
@@ -0,0 +1,73 @@
+// Copyright 2020 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package org.chromium.url;
+
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.Mockito.doThrow;
+
+import androidx.test.filters.SmallTest;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+
+import org.chromium.base.Log;
+import org.chromium.base.test.BaseJUnit4ClassRunner;
+import org.chromium.base.test.util.Batch;
+
+import java.util.Map;
+
+/**
+ * Tests for JUnitTestGURLs.
+ */
+@RunWith(BaseJUnit4ClassRunner.class)
+@Batch(Batch.UNIT_TESTS)
+public class JUnitTestGURLsTest {
+ private static final String TAG = "JUnitTestGURLs";
+
+ @Mock
+ GURL.Natives mGURLMocks;
+
+ @Before
+ public void setUp() {
+ MockitoAnnotations.initMocks(this);
+ }
+
+ private RuntimeException getErrorForGURL(GURL gurl) {
+ String serialized = gurl.serialize();
+ Assert.assertEquals(-1, serialized.indexOf(","));
+ serialized = serialized.replace(GURL.SERIALIZER_DELIMITER, ',');
+
+ return new RuntimeException("Please update the serialization in JUnitTestGURLs.java for "
+ + gurl.getPossiblyInvalidSpec() + " to: '" + serialized + "'");
+ }
+
+ @SmallTest
+ @Test
+ public void testGURLEquivalence() throws Throwable {
+ doThrow(new RuntimeException("Deserialization required re-initialization."))
+ .when(mGURLMocks)
+ .init(any(), any());
+
+ Throwable exception = null;
+ for (Map.Entry<String, String> entry : JUnitTestGURLs.sGURLMap.entrySet()) {
+ GURL gurl = new GURL(entry.getKey());
+ try {
+ GURLJni.TEST_HOOKS.setInstanceForTesting(mGURLMocks);
+ GURL deserialized = JUnitTestGURLs.getGURL(entry.getKey());
+ GURLJni.TEST_HOOKS.setInstanceForTesting(null);
+ GURLJavaTest.deepAssertEquals(deserialized, gurl);
+ } catch (Throwable e) {
+ GURLJni.TEST_HOOKS.setInstanceForTesting(null);
+ exception = getErrorForGURL(gurl);
+ Log.e(TAG, "Error: ", exception);
+ }
+ }
+ if (exception != null) throw exception;
+ }
+}
diff --git a/url/android/javatests/src/org/chromium/url/OriginJavaTest.java b/url/android/javatests/src/org/chromium/url/OriginJavaTest.java
new file mode 100644
index 0000000..3a4665a
--- /dev/null
+++ b/url/android/javatests/src/org/chromium/url/OriginJavaTest.java
@@ -0,0 +1,99 @@
+// Copyright 2022 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package org.chromium.url;
+
+import androidx.test.filters.SmallTest;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+
+import org.chromium.base.test.BaseJUnit4ClassRunner;
+import org.chromium.base.test.util.Batch;
+import org.chromium.content_public.browser.test.NativeLibraryTestUtils;
+import org.chromium.mojo_base.mojom.UnguessableToken;
+
+/**
+ * Tests for {@link Origin}. Origin relies heavily on the native implementation, and the lion's
+ * share of the logic is tested there. This test is primarily to make sure everything is plumbed
+ * through correctly.
+ */
+@RunWith(BaseJUnit4ClassRunner.class)
+@Batch(Batch.UNIT_TESTS)
+public class OriginJavaTest {
+ @Before
+ public void setUp() {
+ NativeLibraryTestUtils.loadNativeLibraryNoBrowserProcess();
+ }
+
+ @SmallTest
+ @Test
+ public void testOriginEquivalence() {
+ OriginJavaTestHelper.testOriginEquivalence();
+ }
+
+ @SmallTest
+ @Test
+ public void testCreateOpaqueOrigin() {
+ Origin opaque = Origin.createOpaqueOrigin();
+ Assert.assertTrue(opaque.isOpaque());
+ Assert.assertEquals("", opaque.getScheme());
+ Assert.assertEquals("", opaque.getHost());
+ Assert.assertEquals(0, opaque.getPort());
+ }
+
+ @SmallTest
+ @Test
+ public void testNonOpaqueMojomConstructor() {
+ String scheme = "http";
+ String host = "host.name";
+ short port = 42;
+ org.chromium.url.internal.mojom.Origin mojom = new org.chromium.url.internal.mojom.Origin();
+ mojom.scheme = scheme;
+ mojom.host = host;
+ mojom.port = port;
+ Origin origin = new Origin(mojom);
+
+ Assert.assertEquals(scheme, origin.getScheme());
+ Assert.assertEquals(host, origin.getHost());
+ Assert.assertEquals(port, origin.getPort());
+ Assert.assertFalse(origin.isOpaque());
+ }
+
+ @SmallTest
+ @Test
+ public void testOpaqueMojomConstructor() {
+ String scheme = "http";
+ String host = "host.name";
+ short port = 42;
+ org.chromium.url.internal.mojom.Origin mojom = new org.chromium.url.internal.mojom.Origin();
+ mojom.scheme = scheme;
+ mojom.host = host;
+ mojom.port = port;
+ UnguessableToken token = new UnguessableToken();
+ token.high = 3;
+ token.low = 4;
+ mojom.nonceIfOpaque = token;
+
+ Origin origin = new Origin(mojom);
+
+ Assert.assertEquals("", origin.getScheme());
+ Assert.assertEquals("", origin.getHost());
+ Assert.assertEquals(0, origin.getPort());
+ Assert.assertTrue(origin.isOpaque());
+ }
+
+ @SmallTest
+ @Test
+ public void testCreateFromGURL() {
+ GURL gurl = new GURL("https://host.name:61234/path");
+ Origin opaque = Origin.create(gurl);
+ Assert.assertFalse(opaque.isOpaque());
+ Assert.assertEquals("https", opaque.getScheme());
+ Assert.assertEquals("host.name", opaque.getHost());
+ Assert.assertEquals(61234, opaque.getPort());
+ }
+}
diff --git a/url/android/javatests/src/org/chromium/url/OriginJavaTestHelper.java b/url/android/javatests/src/org/chromium/url/OriginJavaTestHelper.java
new file mode 100644
index 0000000..2eb9550
--- /dev/null
+++ b/url/android/javatests/src/org/chromium/url/OriginJavaTestHelper.java
@@ -0,0 +1,23 @@
+// Copyright 2022 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package org.chromium.url;
+
+import org.chromium.base.annotations.JNINamespace;
+import org.chromium.base.annotations.NativeMethods;
+
+/**
+ * Helpers for OriginJavaTest that need to call into native code.
+ */
+@JNINamespace("url")
+public class OriginJavaTestHelper {
+ public static void testOriginEquivalence() {
+ OriginJavaTestHelperJni.get().testOriginEquivalence();
+ }
+
+ @NativeMethods
+ interface Natives {
+ void testOriginEquivalence();
+ }
+}
diff --git a/url/android/junit/src/org/chromium/url/ShadowGURLTest.java b/url/android/junit/src/org/chromium/url/ShadowGURLTest.java
new file mode 100644
index 0000000..a491de1
--- /dev/null
+++ b/url/android/junit/src/org/chromium/url/ShadowGURLTest.java
@@ -0,0 +1,70 @@
+// Copyright 2021 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package org.chromium.url;
+
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.robolectric.annotation.Config;
+
+import org.chromium.base.test.BaseRobolectricTestRunner;
+
+/** Tests of {@link ShadowGURL}. */
+@RunWith(BaseRobolectricTestRunner.class)
+@Config(shadows = {ShadowGURL.class})
+public class ShadowGURLTest {
+ /* package */ static void deepAssertEquals(GURL expected, GURL actual) {
+ Assert.assertEquals(expected, actual);
+ Assert.assertEquals(expected.getScheme(), actual.getScheme());
+ Assert.assertEquals(expected.getUsername(), actual.getUsername());
+ Assert.assertEquals(expected.getPassword(), actual.getPassword());
+ Assert.assertEquals(expected.getHost(), actual.getHost());
+ Assert.assertEquals(expected.getPort(), actual.getPort());
+ Assert.assertEquals(expected.getPath(), actual.getPath());
+ Assert.assertEquals(expected.getQuery(), actual.getQuery());
+ Assert.assertEquals(expected.getRef(), actual.getRef());
+ }
+
+ @Test
+ public void testComponents() {
+ GURL url = new GURL(JUnitTestGURLs.SEARCH_URL);
+ Assert.assertFalse(url.isEmpty());
+ Assert.assertTrue(url.isValid());
+
+ Assert.assertEquals(JUnitTestGURLs.SEARCH_URL, url.getSpec());
+ Assert.assertEquals("https", url.getScheme());
+ Assert.assertEquals("", url.getUsername());
+ Assert.assertEquals("", url.getPassword());
+ Assert.assertEquals("www.google.com", url.getHost());
+ Assert.assertEquals("", url.getPort());
+ Assert.assertEquals("/search", url.getPath());
+ Assert.assertEquals("q=test", url.getQuery());
+ Assert.assertEquals("", url.getRef());
+ }
+
+ @Test
+ public void testEmpty() {
+ GURL url = new GURL("");
+ Assert.assertFalse(url.isValid());
+
+ Assert.assertEquals("", url.getSpec());
+ Assert.assertEquals("", url.getScheme());
+ Assert.assertEquals("", url.getUsername());
+ Assert.assertEquals("", url.getPassword());
+ Assert.assertEquals("", url.getHost());
+ Assert.assertEquals("", url.getPort());
+ Assert.assertEquals("", url.getPath());
+ Assert.assertEquals("", url.getQuery());
+ Assert.assertEquals("", url.getRef());
+ }
+
+ @Test
+ public void testSerialization() {
+ GURL gurl = new GURL(JUnitTestGURLs.URL_1_WITH_PATH);
+ GURL deserialized = GURL.deserialize(gurl.serialize());
+
+ deepAssertEquals(deserialized, gurl);
+ }
+}
diff --git a/url/android/origin_android.cc b/url/android/origin_android.cc
new file mode 100644
index 0000000..a0dd271
--- /dev/null
+++ b/url/android/origin_android.cc
@@ -0,0 +1,87 @@
+// Copyright 2019 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "url/origin.h"
+
+#include <cstdint>
+
+#include "base/android/jni_android.h"
+#include "base/android/jni_string.h"
+#include "base/android/scoped_java_ref.h"
+#include "base/memory/ptr_util.h"
+#include "url/android/gurl_android.h"
+#include "url/url_jni_headers/Origin_jni.h"
+
+namespace url {
+
+base::android::ScopedJavaLocalRef<jobject> Origin::CreateJavaObject() const {
+ JNIEnv* env = base::android::AttachCurrentThread();
+ const base::UnguessableToken* token = Origin::GetNonceForSerialization();
+ return Java_Origin_Constructor(
+ env, base::android::ConvertUTF8ToJavaString(env, tuple_.scheme()),
+ base::android::ConvertUTF8ToJavaString(env, tuple_.host()), tuple_.port(),
+ opaque(), token ? token->GetHighForSerialization() : 0,
+ token ? token->GetLowForSerialization() : 0);
+}
+
+// static
+Origin Origin::FromJavaObject(
+ const base::android::JavaRef<jobject>& java_origin) {
+ JNIEnv* env = base::android::AttachCurrentThread();
+ std::unique_ptr<Origin> origin = base::WrapUnique<Origin>(
+ reinterpret_cast<Origin*>(Java_Origin_toNativeOrigin(env, java_origin)));
+ return std::move(*origin);
+}
+
+// static
+jlong Origin::CreateNative(JNIEnv* env,
+ const base::android::JavaRef<jstring>& java_scheme,
+ const base::android::JavaRef<jstring>& java_host,
+ uint16_t port,
+ bool is_opaque,
+ uint64_t token_high_bits,
+ uint64_t token_low_bits) {
+ const std::string& scheme = ConvertJavaStringToUTF8(env, java_scheme);
+ const std::string& host = ConvertJavaStringToUTF8(env, java_host);
+
+ absl::optional<base::UnguessableToken> nonce_token =
+ base::UnguessableToken::Deserialize(token_high_bits, token_low_bits);
+ bool has_nonce = nonce_token.has_value();
+ CHECK(has_nonce == is_opaque);
+ Origin::Nonce nonce;
+ if (has_nonce) {
+ nonce = Origin::Nonce(nonce_token.value());
+ }
+ Origin origin = is_opaque
+ ? Origin::CreateOpaqueFromNormalizedPrecursorTuple(
+ scheme, host, port, nonce)
+ : Origin::CreateFromNormalizedTuple(scheme, host, port);
+ return reinterpret_cast<intptr_t>(new Origin(origin));
+}
+
+static base::android::ScopedJavaLocalRef<jobject> JNI_Origin_CreateOpaque(
+ JNIEnv* env) {
+ return Origin().CreateJavaObject();
+}
+
+static base::android::ScopedJavaLocalRef<jobject> JNI_Origin_CreateFromGURL(
+ JNIEnv* env,
+ const base::android::JavaParamRef<jobject>& j_gurl) {
+ return Origin::Create(*GURLAndroid::ToNativeGURL(env, j_gurl))
+ .CreateJavaObject();
+}
+
+static jlong JNI_Origin_CreateNative(
+ JNIEnv* env,
+ const base::android::JavaParamRef<jstring>& java_scheme,
+ const base::android::JavaParamRef<jstring>& java_host,
+ jshort port,
+ jboolean is_opaque,
+ jlong token_high_bits,
+ jlong token_low_bits) {
+ return Origin::CreateNative(env, java_scheme, java_host, port, is_opaque,
+ token_high_bits, token_low_bits);
+}
+
+} // namespace url
diff --git a/url/android/origin_java_test_helper.cc b/url/android/origin_java_test_helper.cc
new file mode 100644
index 0000000..62554d8
--- /dev/null
+++ b/url/android/origin_java_test_helper.cc
@@ -0,0 +1,37 @@
+// Copyright 2022 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <stddef.h>
+
+#include "base/android/jni_android.h"
+#include "base/android/jni_string.h"
+#include "url/gurl.h"
+#include "url/j_test_jni_headers/OriginJavaTestHelper_jni.h"
+#include "url/origin.h"
+
+namespace url {
+
+static void JNI_OriginJavaTestHelper_TestOriginEquivalence(JNIEnv* env) {
+ Origin cases[] = {
+ Origin(),
+ Origin::Create(GURL("http://a.com")),
+ Origin::Create(GURL("http://a.com:8000")),
+ Origin::Create(GURL("scheme:host")),
+ Origin::Create(GURL("http://a.com:8000")).DeriveNewOpaqueOrigin(),
+ };
+ for (const Origin& origin : cases) {
+ base::android::ScopedJavaLocalRef<jobject> j_origin =
+ origin.CreateJavaObject();
+ Origin sameOrigin = Origin::FromJavaObject(j_origin);
+ if (origin != sameOrigin) {
+ std::stringstream ss;
+ ss << "Origin not equivalent: " << origin << ", " << sameOrigin;
+ env->ThrowNew(env->FindClass("java/lang/AssertionError"),
+ ss.str().data());
+ return;
+ }
+ }
+}
+
+} // namespace url
diff --git a/url/android/parsed_android.cc b/url/android/parsed_android.cc
new file mode 100644
index 0000000..36d8aa2
--- /dev/null
+++ b/url/android/parsed_android.cc
@@ -0,0 +1,96 @@
+// Copyright 2019 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "url/android/parsed_android.h"
+
+#include <jni.h>
+
+#include "base/android/jni_android.h"
+#include "url/gurl_jni_headers/Parsed_jni.h"
+
+using base::android::AttachCurrentThread;
+using base::android::JavaRef;
+using base::android::ScopedJavaLocalRef;
+
+namespace url {
+
+namespace {
+
+ScopedJavaLocalRef<jobject> CreateJavaParsed(JNIEnv* env,
+ const Parsed& parsed,
+ const JavaRef<jobject>& inner) {
+ static constexpr bool is_signed =
+ std::is_signed<decltype(parsed.scheme.begin)>::value;
+ static constexpr size_t offset_size = sizeof(parsed.scheme.begin);
+ static_assert((is_signed && sizeof(jint) >= offset_size) ||
+ (!is_signed && sizeof(jint) > offset_size),
+ "Java size offsets for Parsed Components must be large enough "
+ "to store the full C++ offset.");
+ return Java_Parsed_Constructor(
+ env, parsed.scheme.begin, parsed.scheme.len, parsed.username.begin,
+ parsed.username.len, parsed.password.begin, parsed.password.len,
+ parsed.host.begin, parsed.host.len, parsed.port.begin, parsed.port.len,
+ parsed.path.begin, parsed.path.len, parsed.query.begin, parsed.query.len,
+ parsed.ref.begin, parsed.ref.len, parsed.potentially_dangling_markup,
+ inner);
+}
+
+} // namespace
+
+// static
+ScopedJavaLocalRef<jobject> ParsedAndroid::InitFromParsed(
+ JNIEnv* env,
+ const Parsed& parsed) {
+ ScopedJavaLocalRef<jobject> inner;
+ if (parsed.inner_parsed())
+ inner = CreateJavaParsed(env, *parsed.inner_parsed(), nullptr);
+ return CreateJavaParsed(env, parsed, inner);
+}
+
+static jlong JNI_Parsed_CreateNative(JNIEnv* env,
+ jint scheme_begin,
+ jint scheme_length,
+ jint username_begin,
+ jint username_length,
+ jint password_begin,
+ jint password_length,
+ jint host_begin,
+ jint host_length,
+ jint port_begin,
+ jint port_length,
+ jint path_begin,
+ jint path_length,
+ jint query_begin,
+ jint query_length,
+ jint ref_begin,
+ jint ref_length,
+ jboolean potentially_dangling_markup,
+ jlong inner_parsed) {
+ Parsed* parsed = new Parsed();
+ parsed->scheme.begin = scheme_begin;
+ parsed->scheme.len = scheme_length;
+ parsed->username.begin = username_begin;
+ parsed->username.len = username_length;
+ parsed->password.begin = password_begin;
+ parsed->password.len = password_length;
+ parsed->host.begin = host_begin;
+ parsed->host.len = host_length;
+ parsed->port.begin = port_begin;
+ parsed->port.len = port_length;
+ parsed->path.begin = path_begin;
+ parsed->path.len = path_length;
+ parsed->query.begin = query_begin;
+ parsed->query.len = query_length;
+ parsed->ref.begin = ref_begin;
+ parsed->ref.len = ref_length;
+ parsed->potentially_dangling_markup = potentially_dangling_markup;
+ Parsed* inner = reinterpret_cast<Parsed*>(inner_parsed);
+ if (inner) {
+ parsed->set_inner_parsed(*inner);
+ delete inner;
+ }
+ return reinterpret_cast<intptr_t>(parsed);
+}
+
+} // namespace url
diff --git a/url/android/parsed_android.h b/url/android/parsed_android.h
new file mode 100644
index 0000000..244ada5
--- /dev/null
+++ b/url/android/parsed_android.h
@@ -0,0 +1,22 @@
+// Copyright 2019 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_ANDROID_PARSED_ANDROID_H_
+#define URL_ANDROID_PARSED_ANDROID_H_
+
+#include "base/android/scoped_java_ref.h"
+#include "url/third_party/mozilla/url_parse.h"
+
+namespace url {
+
+class ParsedAndroid {
+ public:
+ static base::android::ScopedJavaLocalRef<jobject> InitFromParsed(
+ JNIEnv* env,
+ const Parsed& parsed);
+};
+
+} // namespace url
+
+#endif // URL_ANDROID_PARSED_ANDROID_H_
diff --git a/url/android/robolectric_test_main.cc b/url/android/robolectric_test_main.cc
new file mode 100644
index 0000000..28fb4d2
--- /dev/null
+++ b/url/android/robolectric_test_main.cc
@@ -0,0 +1,15 @@
+// Copyright 2022 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+#include <jni.h>
+
+#include "base/android/base_jni_onload.h"
+#include "base/android/jni_android.h"
+
+extern "C" JNI_EXPORT jint JNI_OnLoad(JavaVM* vm, void* reserved) {
+ base::android::InitVM(vm);
+ base::android::OnJNIOnLoadInit();
+ // TODO(1223993): Initialize GURL schemes, like in
+ // content::RegisterContentSchemes().
+ return JNI_VERSION_1_4;
+}
diff --git a/url/android/test/java/src/org/chromium/url/JUnitTestGURLs.java b/url/android/test/java/src/org/chromium/url/JUnitTestGURLs.java
new file mode 100644
index 0000000..9f19c6c
--- /dev/null
+++ b/url/android/test/java/src/org/chromium/url/JUnitTestGURLs.java
@@ -0,0 +1,174 @@
+// Copyright 2020 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package org.chromium.url;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * A Helper class for JUnit tests to be able to use GURLs without requiring native initialization.
+ * This should be used sparingly, when converting junit tests to Batched Instrumentation tests is
+ * not feasible.
+ *
+ * If any more complex GURL behaviour is tested, like comparing Origins, the test should be written
+ * as an Instrumentation test instead - you should never mock GURL.
+ */
+public class JUnitTestGURLs {
+ // In order to add a test URL:
+ // 1. Add the URL String as a constant here.
+ // 2. Add the constant to the map below, with a placeholder string for the GURL serialization.
+ // 3. Run JUnitTestGURLsTest (eg. './tools/autotest.py -C out/Debug JUnitTestGURLsTest').
+ // 4. Check logcat output or test exception for the correct serialization String, and place it
+ // in the map.
+ public static final String EXAMPLE_URL = "https://www.example.com/";
+ public static final String HTTP_URL = "http://www.example.com/";
+ public static final String URL_1 = "https://www.one.com/";
+ public static final String URL_1_NUMERAL = "https://www.1.com/";
+ public static final String URL_1_WITH_PATH = "https://www.one.com/some_path.html";
+ public static final String URL_2 = "https://www.two.com/";
+ public static final String URL_3 = "https://www.three.com/";
+ public static final String MAPS_URL = "https://maps.google.com/";
+ public static final String SEARCH_URL = "https://www.google.com/search?q=test";
+ public static final String SEARCH_2_URL = "https://www.google.com/search?q=query";
+ public static final String INITIAL_URL = "https://initial.com";
+ public static final String SPECULATED_URL = "https://speculated.com";
+ public static final String NTP_URL = "chrome://newtab/";
+ public static final String NTP_NATIVE_URL = "chrome-native://newtab/";
+ public static final String DOM_DISILLER_URL = "chrome-distiller://url";
+ public static final String RED_1 = "https://www.red.com/page1";
+ public static final String RED_2 = "https://www.red.com/page2";
+ public static final String RED_3 = "https://www.red.com/page3";
+ public static final String BLUE_1 = "https://www.blue.com/page1";
+ public static final String BLUE_2 = "https://www.blue.com/page2";
+ public static final String BLUE_3 = "https://www.blue.com/page3";
+ public static final String AMP_URL =
+ "https://www.google.com/amp/www.nyt.com/ampthml/blogs.html";
+ public static final String AMP_CACHE_URL =
+ "https://www.google.com/amp/s/www.nyt.com/ampthml/blogs.html";
+ public static final String TEXT_FRAGMENT_URL = "https://www.example.com/#:~:text=selector";
+ public static final String MULTI_TEXT_FRAGMENT_URL =
+ "https://www.example.com/#:~:text=selector1&text=selector2&text=selector3";
+ public static final String INVALID_URL = "http://0x100.0/";
+ public static final String GOOGLE_URL = "http://www.google.com/";
+ public static final String GOOGLE_URL_DOGS = "http://www.google.com/dogs";
+ public static final String GOOGLE_URL_DOGS_FUN = "http://www.google.com/dogs-are-fun";
+ public static final String GOOGLE_URL_DOG = "http://www.google.com/dog";
+ public static final String GOOGLE_URL_CAT = "http://www.google.com/cat";
+ public static final String GOOGLE_URL_PIG = "http://www.google.com/pig";
+ public static final String ABOUT_BLANK = "about:blank";
+ public static final String CHROME_ABOUT = "chrome://about";
+
+ // Map of URL string to GURL serialization.
+ /* package */ static final Map<String, String> sGURLMap;
+ static {
+ Map<String, String> map = new HashMap<>();
+ map.put(EXAMPLE_URL,
+ "82,1,true,0,5,0,-1,0,-1,8,15,0,-1,23,1,0,-1,0,-1,"
+ + "false,false,https://www.example.com/");
+ map.put(HTTP_URL,
+ "81,1,true,0,4,0,-1,0,-1,7,15,0,-1,22,1,0,-1,0,-1,"
+ + "false,false,http://www.example.com/");
+ map.put(URL_1,
+ "78,1,true,0,5,0,-1,0,-1,8,11,0,-1,19,1,0,-1,0,-1,"
+ + "false,false,https://www.one.com/");
+ map.put(URL_1_NUMERAL,
+ "75,1,true,0,5,0,-1,0,-1,8,9,0,-1,17,1,0,-1,0,-1,"
+ + "false,false,https://www.1.com/");
+ map.put(URL_1_WITH_PATH,
+ "93,1,true,0,5,0,-1,0,-1,8,11,0,-1,19,15,0,-1,0,-1,"
+ + "false,false,https://www.one.com/some_path.html");
+ map.put(URL_2,
+ "78,1,true,0,5,0,-1,0,-1,8,11,0,-1,19,1,0,-1,0,-1,"
+ + "false,false,https://www.two.com/");
+ map.put(URL_3,
+ "80,1,true,0,5,0,-1,0,-1,8,13,0,-1,21,1,0,-1,0,-1,false,false,https://www.three.com/");
+ map.put(RED_1,
+ "83,1,true,0,5,0,-1,0,-1,8,11,0,-1,19,6,0,-1,0,-1,"
+ + "false,false,https://www.red.com/page1");
+ map.put(RED_2,
+ "83,1,true,0,5,0,-1,0,-1,8,11,0,-1,19,6,0,-1,0,-1,"
+ + "false,false,https://www.red.com/page2");
+ map.put(RED_3,
+ "83,1,true,0,5,0,-1,0,-1,8,11,0,-1,19,6,0,-1,0,-1,"
+ + "false,false,https://www.red.com/page3");
+ map.put(BLUE_1,
+ "84,1,true,0,5,0,-1,0,-1,8,12,0,-1,20,6,0,-1,0,-1,"
+ + "false,false,https://www.blue.com/page1");
+ map.put(BLUE_2,
+ "84,1,true,0,5,0,-1,0,-1,8,12,0,-1,20,6,0,-1,0,-1,"
+ + "false,false,https://www.blue.com/page2");
+ map.put(BLUE_3,
+ "84,1,true,0,5,0,-1,0,-1,8,12,0,-1,20,6,0,-1,0,-1,"
+ + "false,false,https://www.blue.com/page3");
+ map.put(SEARCH_URL,
+ "94,1,true,0,5,0,-1,0,-1,8,14,0,-1,22,7,30,6,0,-1,"
+ + "false,false,https://www.google.com/search?q=test");
+ map.put(SEARCH_2_URL,
+ "95,1,true,0,5,0,-1,0,-1,8,14,0,-1,22,7,30,7,0,-1,"
+ + "false,false,https://www.google.com/search?q=query");
+ map.put(INITIAL_URL,
+ "78,1,true,0,5,0,-1,0,-1,8,11,0,-1,19,1,0,-1,0,-1,"
+ + "false,false,https://initial.com/");
+ map.put(SPECULATED_URL,
+ "81,1,true,0,5,0,-1,0,-1,8,14,0,-1,22,1,0,-1,0,-1,"
+ + "false,false,https://speculated.com/");
+ map.put(NTP_URL,
+ "73,1,true,0,6,0,-1,0,-1,9,6,0,-1,15,1,0,-1,0,-1,"
+ + "false,false,chrome://newtab/");
+ map.put(NTP_NATIVE_URL,
+ "82,1,true,0,13,0,-1,0,-1,16,6,0,-1,22,1,0,-1,0,-1,false,false,"
+ + "chrome-native://newtab/");
+ map.put(DOM_DISILLER_URL,
+ "82,1,true,0,16,0,-1,0,-1,19,3,0,-1,22,1,0,-1,0,-1,false,false,"
+ + "chrome-distiller://url/");
+ map.put(MAPS_URL,
+ "82,1,true,0,5,0,-1,0,-1,8,15,0,-1,23,1,0,-1,0,-1,false,false,https://maps.google.com/");
+ map.put(AMP_URL,
+ "116,1,true,0,5,0,-1,0,-1,8,14,0,-1,22,35,0,-1,0,-1,false,false,https://www.google.com/amp/www.nyt.com/ampthml/blogs.html");
+ map.put(AMP_CACHE_URL,
+ "118,1,true,0,5,0,-1,0,-1,8,14,0,-1,22,37,0,-1,0,-1,false,false,https://www.google.com/amp/s/www.nyt.com/ampthml/blogs.html");
+ map.put(TEXT_FRAGMENT_URL,
+ "100,1,true,0,5,0,-1,0,-1,8,15,0,-1,23,1,0,-1,25,16,false,false,https://www.example.com/#:~:text=selector");
+ map.put(MULTI_TEXT_FRAGMENT_URL,
+ "131,1,true,0,5,0,-1,0,-1,8,15,0,-1,23,1,0,-1,25,47,false,false,https://www.example.com/#:~:text=selector1&text=selector2&text=selector3");
+ map.put(INVALID_URL,
+ "73,1,false,0,4,0,-1,0,-1,7,7,0,-1,14,1,0,-1,0,-1,false,false,http://0x100.0/");
+ map.put(GOOGLE_URL,
+ "80,1,true,0,4,0,-1,0,-1,7,14,0,-1,21,1,0,-1,0,-1,false,false,http://www.google.com/");
+ map.put(GOOGLE_URL_DOGS,
+ "84,1,true,0,4,0,-1,0,-1,7,14,0,-1,21,5,0,-1,0,-1,false,false,http://www.google.com/dogs");
+ map.put(GOOGLE_URL_DOGS_FUN,
+ "93,1,true,0,4,0,-1,0,-1,7,14,0,-1,21,13,0,-1,0,-1,false,false,http://www.google.com/dogs-are-fun");
+ map.put(GOOGLE_URL_DOG,
+ "83,1,true,0,4,0,-1,0,-1,7,14,0,-1,21,4,0,-1,0,-1,false,false,http://www.google.com/dog");
+ map.put(GOOGLE_URL_CAT,
+ "83,1,true,0,4,0,-1,0,-1,7,14,0,-1,21,4,0,-1,0,-1,false,false,http://www.google.com/cat");
+ map.put(GOOGLE_URL_PIG,
+ "83,1,true,0,4,0,-1,0,-1,7,14,0,-1,21,4,0,-1,0,-1,false,false,http://www.google.com/pig");
+ map.put(ABOUT_BLANK,
+ "68,1,true,0,5,0,-1,0,-1,0,-1,0,-1,6,5,0,-1,0,-1,false,false,about:blank");
+ map.put(CHROME_ABOUT,
+ "72,1,true,0,6,0,-1,0,-1,9,5,0,-1,14,1,0,-1,0,-1,false,false,chrome://about/");
+ sGURLMap = Collections.unmodifiableMap(map);
+ }
+
+ /**
+ * @return the GURL resulting from parsing the provided url. Must be registered in |sGURLMap|.
+ */
+ public static GURL getGURL(String url) {
+ String serialized = sGURLMap.get(url);
+ if (serialized == null) {
+ throw new IllegalArgumentException("URL " + url + " not found");
+ }
+ serialized = serialized.replace(',', GURL.SERIALIZER_DELIMITER);
+ GURL gurl = GURL.deserialize(serialized);
+ // If you're here looking to use an empty GURL, just use GURL.emptyGURL() directly.
+ if (gurl.isEmpty()) {
+ throw new RuntimeException("Could not deserialize: " + serialized);
+ }
+ return gurl;
+ }
+}
diff --git a/url/android/test/java/src/org/chromium/url/ShadowGURL.java b/url/android/test/java/src/org/chromium/url/ShadowGURL.java
new file mode 100644
index 0000000..53e1da1
--- /dev/null
+++ b/url/android/test/java/src/org/chromium/url/ShadowGURL.java
@@ -0,0 +1,62 @@
+// Copyright 2021 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package org.chromium.url;
+
+import org.robolectric.annotation.Implementation;
+import org.robolectric.annotation.Implements;
+
+import org.chromium.url.GURL.Natives;
+
+/**
+ * Shadow of {@link GURL}. Lets Robolectric tests use {@code GURL} without the native libraries
+ * loaded.
+ *
+ * <p>This shadow can create only GURLs listed in {@link JUnitTestGURLs}.
+ */
+@Implements(GURL.class)
+public class ShadowGURL {
+ /**
+ * The {@link GURL.Natives} implementation used by a shadowed {@link GURL}.
+ */
+ private static class NativesImpl implements GURL.Natives {
+ @Override
+ public void init(String url, GURL target) {
+ target.initForTesting(JUnitTestGURLs.getGURL(url));
+ }
+
+ @Override
+ public void getOrigin(String spec, boolean isValid, long nativeParsed, GURL target) {
+ throw new UnsupportedOperationException(
+ "ShadowGURL.NativesImpl#getOrigin is not implemented");
+ }
+
+ @Override
+ public boolean domainIs(String spec, boolean isValid, long nativeParsed, String domain) {
+ throw new UnsupportedOperationException(
+ "ShadowGURL.NativesImpl#domainIs is not implemented");
+ }
+
+ @Override
+ public long createNative(String spec, boolean isValid, long nativeParsed) {
+ throw new UnsupportedOperationException(
+ "ShadowGURL.NativesImpl#createNative is not implemented");
+ }
+ }
+ private static final NativesImpl sNativesInstance = new NativesImpl();
+
+ /**
+ * We could instead shadow {@code GURLJni#get}, but that would require tests using this to load
+ * both shadows.
+ */
+ @Implementation
+ protected static Natives getNatives() {
+ return sNativesInstance;
+ }
+
+ @Implementation
+ protected static void ensureNativeInitializedForGURL() {
+ // Skip native initialization.
+ }
+}
diff --git a/url/features.gni b/url/features.gni
index 31078d7..482d849 100644
--- a/url/features.gni
+++ b/url/features.gni
@@ -1,4 +1,4 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
+# Copyright 2016 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
@@ -13,4 +13,4 @@
# Never use platform icu for host toolchain.
# E.g. Don't apply this for host binaries when target_os = "android".
use_platform_icu_alternatives =
- use_platform_icu_alternatives && is_starboardized_toolchain
+ use_platform_icu_alternatives && current_toolchain == default_toolchain
diff --git a/url/gurl.cc b/url/gurl.cc
index c2ce264..6930f73 100644
--- a/url/gurl.cc
+++ b/url/gurl.cc
@@ -1,27 +1,25 @@
-// Copyright 2013 The Chromium Authors. All rights reserved.
+// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "url/gurl.h"
-#include <algorithm>
-#include <ostream>
+#include <stddef.h>
-#include "base/lazy_instance.h"
-#include "base/logging.h"
+#include <algorithm>
+#include <memory>
+#include <ostream>
+#include <utility>
+
+#include "base/check_op.h"
+#include "base/no_destructor.h"
#include "base/strings/string_piece.h"
#include "base/strings/string_util.h"
+#include "base/trace_event/base_tracing.h"
#include "base/trace_event/memory_usage_estimator.h"
-#include "starboard/types.h"
#include "url/url_canon_stdstring.h"
#include "url/url_util.h"
-namespace {
-
-static base::LazyInstance<GURL>::Leaky empty_gurl = LAZY_INSTANCE_INITIALIZER;
-
-} // namespace
-
GURL::GURL() : is_valid_(false) {
}
@@ -30,7 +28,7 @@
is_valid_(other.is_valid_),
parsed_(other.parsed_) {
if (other.inner_url_)
- inner_url_.reset(new GURL(*other.inner_url_));
+ inner_url_ = std::make_unique<GURL>(*other.inner_url_);
// Valid filesystem urls should always have an inner_url_.
DCHECK(!is_valid_ || !SchemeIsFileSystem() || inner_url_);
}
@@ -53,7 +51,7 @@
}
GURL::GURL(const std::string& url_string, RetainWhiteSpaceSelector) {
- InitCanonical(base::StringPiece(url_string), false);
+ InitCanonical(url_string, false);
}
GURL::GURL(const char* canonical_spec,
@@ -71,9 +69,8 @@
InitializeFromCanonicalSpec();
}
-template<typename STR>
-void GURL::InitCanonical(base::BasicStringPiece<STR> input_spec,
- bool trim_path_end) {
+template <typename T, typename CharT>
+void GURL::InitCanonical(T input_spec, bool trim_path_end) {
url::StdStringCanonOutput output(&spec_);
is_valid_ = url::Canonicalize(
input_spec.data(), static_cast<int>(input_spec.length()), trim_path_end,
@@ -81,8 +78,8 @@
output.Complete(); // Must be done before using string.
if (is_valid_ && SchemeIsFileSystem()) {
- inner_url_.reset(new GURL(spec_.data(), parsed_.Length(),
- *parsed_.inner_parsed(), true));
+ inner_url_ = std::make_unique<GURL>(spec_.data(), parsed_.Length(),
+ *parsed_.inner_parsed(), true);
}
// Valid URLs always have non-empty specs.
DCHECK(!is_valid_ || !spec_.empty());
@@ -90,9 +87,8 @@
void GURL::InitializeFromCanonicalSpec() {
if (is_valid_ && SchemeIsFileSystem()) {
- inner_url_.reset(
- new GURL(spec_.data(), parsed_.Length(),
- *parsed_.inner_parsed(), true));
+ inner_url_ = std::make_unique<GURL>(spec_.data(), parsed_.Length(),
+ *parsed_.inner_parsed(), true);
}
#ifndef NDEBUG
@@ -114,17 +110,17 @@
// removed from a "foo:hello #ref" URL (see http://crbug.com/291747).
GURL test_url(spec_, RETAIN_TRAILING_PATH_WHITEPACE);
- DCHECK(test_url.is_valid_ == is_valid_);
- DCHECK(test_url.spec_ == spec_);
+ DCHECK_EQ(test_url.is_valid_, is_valid_);
+ DCHECK_EQ(test_url.spec_, spec_);
- DCHECK(test_url.parsed_.scheme == parsed_.scheme);
- DCHECK(test_url.parsed_.username == parsed_.username);
- DCHECK(test_url.parsed_.password == parsed_.password);
- DCHECK(test_url.parsed_.host == parsed_.host);
- DCHECK(test_url.parsed_.port == parsed_.port);
- DCHECK(test_url.parsed_.path == parsed_.path);
- DCHECK(test_url.parsed_.query == parsed_.query);
- DCHECK(test_url.parsed_.ref == parsed_.ref);
+ DCHECK_EQ(test_url.parsed_.scheme, parsed_.scheme);
+ DCHECK_EQ(test_url.parsed_.username, parsed_.username);
+ DCHECK_EQ(test_url.parsed_.password, parsed_.password);
+ DCHECK_EQ(test_url.parsed_.host, parsed_.host);
+ DCHECK_EQ(test_url.parsed_.port, parsed_.port);
+ DCHECK_EQ(test_url.parsed_.path, parsed_.path);
+ DCHECK_EQ(test_url.parsed_.query, parsed_.query);
+ DCHECK_EQ(test_url.parsed_.ref, parsed_.ref);
}
}
#endif
@@ -142,7 +138,7 @@
else if (inner_url_)
*inner_url_ = *other.inner_url_;
else
- inner_url_.reset(new GURL(*other.inner_url_));
+ inner_url_ = std::make_unique<GURL>(*other.inner_url_);
return *this;
}
@@ -193,9 +189,9 @@
output.Complete();
result.is_valid_ = true;
if (result.SchemeIsFileSystem()) {
- result.inner_url_.reset(
- new GURL(result.spec_.data(), result.parsed_.Length(),
- *result.parsed_.inner_parsed(), true));
+ result.inner_url_ =
+ std::make_unique<GURL>(result.spec_.data(), result.parsed_.Length(),
+ *result.parsed_.inner_parsed(), true);
}
return result;
}
@@ -219,16 +215,15 @@
output.Complete();
result.is_valid_ = true;
if (result.SchemeIsFileSystem()) {
- result.inner_url_.reset(
- new GURL(result.spec_.data(), result.parsed_.Length(),
- *result.parsed_.inner_parsed(), true));
+ result.inner_url_ =
+ std::make_unique<GURL>(result.spec_.data(), result.parsed_.Length(),
+ *result.parsed_.inner_parsed(), true);
}
return result;
}
// Note: code duplicated below (it's inconvenient to use a template here).
-GURL GURL::ReplaceComponents(
- const url::Replacements<char>& replacements) const {
+GURL GURL::ReplaceComponents(const Replacements& replacements) const {
GURL result;
// Not allowed for invalid URLs.
@@ -241,17 +236,13 @@
NULL, &output, &result.parsed_);
output.Complete();
- if (result.is_valid_ && result.SchemeIsFileSystem()) {
- result.inner_url_.reset(new GURL(result.spec_.data(),
- result.parsed_.Length(),
- *result.parsed_.inner_parsed(), true));
- }
+
+ result.ProcessFileSystemURLAfterReplaceComponents();
return result;
}
// Note: code duplicated above (it's inconvenient to use a template here).
-GURL GURL::ReplaceComponents(
- const url::Replacements<base::char16>& replacements) const {
+GURL GURL::ReplaceComponents(const ReplacementsW& replacements) const {
GURL result;
// Not allowed for invalid URLs.
@@ -264,24 +255,31 @@
NULL, &output, &result.parsed_);
output.Complete();
- if (result.is_valid_ && result.SchemeIsFileSystem()) {
- result.inner_url_.reset(new GURL(result.spec_.data(),
- result.parsed_.Length(),
- *result.parsed_.inner_parsed(), true));
- }
+
+ result.ProcessFileSystemURLAfterReplaceComponents();
+
return result;
}
-GURL GURL::GetOrigin() const {
+void GURL::ProcessFileSystemURLAfterReplaceComponents() {
+ if (!is_valid_)
+ return;
+ if (SchemeIsFileSystem()) {
+ inner_url_ = std::make_unique<GURL>(spec_.data(), parsed_.Length(),
+ *parsed_.inner_parsed(), true);
+ }
+}
+
+GURL GURL::DeprecatedGetOriginAsURL() const {
// This doesn't make sense for invalid or nonstandard URLs, so return
// the empty URL.
if (!is_valid_ || !IsStandard())
return GURL();
if (SchemeIsFileSystem())
- return inner_url_->GetOrigin();
+ return inner_url_->DeprecatedGetOriginAsURL();
- url::Replacements<char> replacements;
+ Replacements replacements;
replacements.ClearUsername();
replacements.ClearPassword();
replacements.ClearPath();
@@ -292,13 +290,13 @@
}
GURL GURL::GetAsReferrer() const {
- if (!SchemeIsValidForReferrer())
+ if (!is_valid() || !IsReferrerScheme(spec_.data(), parsed_.scheme))
return GURL();
if (!has_ref() && !has_username() && !has_password())
return GURL(*this);
- url::Replacements<char> replacements;
+ Replacements replacements;
replacements.ClearRef();
replacements.ClearUsername();
replacements.ClearPassword();
@@ -333,44 +331,65 @@
return Resolve(".");
}
+GURL GURL::GetWithoutRef() const {
+ if (!has_ref())
+ return GURL(*this);
+
+ Replacements replacements;
+ replacements.ClearRef();
+ return ReplaceComponents(replacements);
+}
+
bool GURL::IsStandard() const {
return url::IsStandard(spec_.data(), parsed_.scheme);
}
bool GURL::IsAboutBlank() const {
- if (!SchemeIs(url::kAboutScheme))
- return false;
+ return IsAboutUrl(url::kAboutBlankPath);
+}
- if (has_host() || has_username() || has_password() || has_port())
- return false;
-
- if (path() != url::kAboutBlankPath && path() != url::kAboutBlankWithHashPath)
- return false;
-
- return true;
+bool GURL::IsAboutSrcdoc() const {
+ return IsAboutUrl(url::kAboutSrcdocPath);
}
bool GURL::SchemeIs(base::StringPiece lower_ascii_scheme) const {
DCHECK(base::IsStringASCII(lower_ascii_scheme));
DCHECK(base::ToLowerASCII(lower_ascii_scheme) == lower_ascii_scheme);
- if (parsed_.scheme.len <= 0)
+ if (!has_scheme())
return lower_ascii_scheme.empty();
return scheme_piece() == lower_ascii_scheme;
}
bool GURL::SchemeIsHTTPOrHTTPS() const {
- return SchemeIs(url::kHttpScheme) || SchemeIs(url::kHttpsScheme);
-}
-
-bool GURL::SchemeIsValidForReferrer() const {
- return is_valid_ && IsReferrerScheme(spec_.data(), parsed_.scheme);
+ return SchemeIs(url::kHttpsScheme) || SchemeIs(url::kHttpScheme);
}
bool GURL::SchemeIsWSOrWSS() const {
return SchemeIs(url::kWsScheme) || SchemeIs(url::kWssScheme);
}
+bool GURL::SchemeIsCryptographic() const {
+ if (!has_scheme())
+ return false;
+ return SchemeIsCryptographic(scheme_piece());
+}
+
+bool GURL::SchemeIsCryptographic(base::StringPiece lower_ascii_scheme) {
+ DCHECK(base::IsStringASCII(lower_ascii_scheme));
+ DCHECK(base::ToLowerASCII(lower_ascii_scheme) == lower_ascii_scheme);
+
+ return lower_ascii_scheme == url::kHttpsScheme ||
+ lower_ascii_scheme == url::kWssScheme;
+}
+
+bool GURL::SchemeIsLocal() const {
+ // The `filesystem:` scheme is not in the Fetch spec, but Chromium still
+ // supports it in large part. It should be treated as a local scheme too.
+ return SchemeIs(url::kAboutScheme) || SchemeIs(url::kBlobScheme) ||
+ SchemeIs(url::kDataScheme) || SchemeIs(url::kFileSystemScheme);
+}
+
int GURL::IntPort() const {
if (parsed_.port.is_nonempty())
return url::ParsePort(spec_.data(), parsed_.port);
@@ -391,14 +410,14 @@
return ComponentString(file_component);
}
-std::string GURL::PathForRequest() const {
- DCHECK(parsed_.path.len > 0)
+base::StringPiece GURL::PathForRequestPiece() const {
+ DCHECK(parsed_.path.is_nonempty())
<< "Canonical path for requests should be non-empty";
- if (parsed_.ref.len >= 0) {
+ if (parsed_.ref.is_valid()) {
// Clip off the reference when it exists. The reference starts after the
// #-sign, so we have to subtract one to also remove it.
- return std::string(spec_, parsed_.path.begin,
- parsed_.ref.begin - parsed_.path.begin - 1);
+ return base::StringPiece(spec_).substr(
+ parsed_.path.begin, parsed_.ref.begin - parsed_.path.begin - 1);
}
// Compute the actual path length, rather than depending on the spec's
// terminator. If we're an inner_url, our spec continues on into our outer
@@ -407,11 +426,15 @@
if (parsed_.query.is_valid())
path_len = parsed_.query.end() - parsed_.path.begin;
- return std::string(spec_, parsed_.path.begin, path_len);
+ return base::StringPiece(spec_).substr(parsed_.path.begin, path_len);
+}
+
+std::string GURL::PathForRequest() const {
+ return std::string(PathForRequestPiece());
}
std::string GURL::HostNoBrackets() const {
- return HostNoBracketsPiece().as_string();
+ return std::string(HostNoBracketsPiece());
}
base::StringPiece GURL::HostNoBracketsPiece() const {
@@ -425,7 +448,16 @@
}
std::string GURL::GetContent() const {
- return is_valid_ ? ComponentString(parsed_.GetContent()) : std::string();
+ return std::string(GetContentPiece());
+}
+
+base::StringPiece GURL::GetContentPiece() const {
+ if (!is_valid_)
+ return base::StringPiece();
+ url::Component content_component = parsed_.GetContent();
+ if (!SchemeIs(url::kJavaScriptScheme) && parsed_.ref.is_valid())
+ content_component.len -= parsed_.ref.len + 1;
+ return ComponentStringPiece(content_component);
}
bool GURL::HostIsIPAddress() const {
@@ -433,7 +465,8 @@
}
const GURL& GURL::EmptyGURL() {
- return empty_gurl.Get();
+ static base::NoDestructor<GURL> empty_gurl;
+ return *empty_gurl;
}
bool GURL::DomainIs(base::StringPiece canonical_domain) const {
@@ -462,13 +495,43 @@
}
size_t GURL::EstimateMemoryUsage() const {
-#if defined(STARBOARD)
- return 0;
-#else
return base::trace_event::EstimateMemoryUsage(spec_) +
base::trace_event::EstimateMemoryUsage(inner_url_) +
(parsed_.inner_parsed() ? sizeof(url::Parsed) : 0);
-#endif
+}
+
+bool GURL::IsAboutUrl(base::StringPiece allowed_path) const {
+ if (!SchemeIs(url::kAboutScheme))
+ return false;
+
+ if (has_host() || has_username() || has_password() || has_port())
+ return false;
+
+ return IsAboutPath(path_piece(), allowed_path);
+}
+
+// static
+bool GURL::IsAboutPath(base::StringPiece actual_path,
+ base::StringPiece allowed_path) {
+ if (!base::StartsWith(actual_path, allowed_path))
+ return false;
+
+ if (actual_path.size() == allowed_path.size()) {
+ DCHECK_EQ(actual_path, allowed_path);
+ return true;
+ }
+
+ if ((actual_path.size() == allowed_path.size() + 1) &&
+ actual_path.back() == '/') {
+ DCHECK_EQ(actual_path, std::string(allowed_path) + '/');
+ return true;
+ }
+
+ return false;
+}
+
+void GURL::WriteIntoTrace(perfetto::TracedValue context) const {
+ std::move(context).WriteString(possibly_invalid_spec());
}
std::ostream& operator<<(std::ostream& out, const GURL& url) {
@@ -484,7 +547,9 @@
}
bool operator==(const GURL& x, const base::StringPiece& spec) {
- DCHECK_EQ(GURL(spec).possibly_invalid_spec(), spec);
+ DCHECK_EQ(GURL(spec).possibly_invalid_spec(), spec)
+ << "Comparisons of GURLs and strings must ensure as a precondition that "
+ "the string is fully canonicalized.";
return x.possibly_invalid_spec() == spec;
}
@@ -499,3 +564,15 @@
bool operator!=(const base::StringPiece& spec, const GURL& x) {
return !(x == spec);
}
+
+namespace url::debug {
+
+ScopedUrlCrashKey::ScopedUrlCrashKey(base::debug::CrashKeyString* crash_key,
+ const GURL& url)
+ : scoped_string_value_(
+ crash_key,
+ url.is_empty() ? "<empty url>" : url.possibly_invalid_spec()) {}
+
+ScopedUrlCrashKey::~ScopedUrlCrashKey() = default;
+
+} // namespace url::debug
diff --git a/url/gurl.h b/url/gurl.h
index 0f0fd8c..688a101 100644
--- a/url/gurl.h
+++ b/url/gurl.h
@@ -1,25 +1,27 @@
-// Copyright 2013 The Chromium Authors. All rights reserved.
+// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef URL_GURL_H_
#define URL_GURL_H_
+#include <stddef.h>
+
#include <iosfwd>
#include <memory>
#include <string>
+#include "base/component_export.h"
#include "base/debug/alias.h"
-#include "base/strings/string16.h"
+#include "base/debug/crash_logging.h"
#include "base/strings/string_piece.h"
-#include "starboard/types.h"
+#include "base/trace_event/base_tracing_forward.h"
#include "url/third_party/mozilla/url_parse.h"
#include "url/url_canon.h"
#include "url/url_canon_stdstring.h"
#include "url/url_constants.h"
-#include "url/url_export.h"
-// Represents a URL.
+// Represents a URL. GURL is Google's URL parsing library.
//
// A parsed canonicalized URL is guaranteed to be UTF-8. Any non-ASCII input
// characters are UTF-8 encoded and % escaped to ASCII.
@@ -42,10 +44,10 @@
// path that contains a literal '#'. Using string concatenation will generate a
// URL with a truncated path and a reference fragment, while ReplaceComponents
// will know to escape this and produce the desired result.
-class URL_EXPORT GURL {
+class COMPONENT_EXPORT(URL) GURL {
public:
- typedef url::StringPieceReplacements<std::string> Replacements;
- typedef url::StringPieceReplacements<base::string16> ReplacementsW;
+ typedef url::StringPieceReplacements<char> Replacements;
+ typedef url::StringPieceReplacements<char16_t> ReplacementsW;
// Creates an empty, invalid URL.
GURL();
@@ -162,11 +164,10 @@
// It is an error to replace components of an invalid URL. The result will
// be the empty URL.
//
- // Note that we use the more general url::Replacements type to give
- // callers extra flexibility rather than our override.
- GURL ReplaceComponents(const url::Replacements<char>& replacements) const;
- GURL ReplaceComponents(
- const url::Replacements<base::char16>& replacements) const;
+ // Note that this intentionally disallows direct use of url::Replacements,
+ // which is harder to use correctly.
+ GURL ReplaceComponents(const Replacements& replacements) const;
+ GURL ReplaceComponents(const ReplacementsW& replacements) const;
// A helper function that is equivalent to replacing the path with a slash
// and clearing out everything after that. We sometimes need to know just the
@@ -188,6 +189,14 @@
// scheme, authority or path, it will return an empty, invalid GURL.
GURL GetWithoutFilename() const;
+ // A helper function to return a GURL without the Ref (also named Fragment
+ // Identifier). For example,
+ // GURL("https://www.foo.com/index.html#test").GetWithoutRef().spec()
+ // will return "https://www.foo.com/index.html".
+ // If the GURL is invalid or missing a
+ // scheme, authority or path, it will return an empty, invalid GURL.
+ GURL GetWithoutRef() const;
+
// A helper function to return a GURL containing just the scheme, host,
// and port from a URL. Equivalent to clearing any username and password,
// replacing the path with a slash, and clearing everything after that. If
@@ -197,7 +206,13 @@
//
// It is an error to get the origin of an invalid URL. The result
// will be the empty URL.
- GURL GetOrigin() const;
+ //
+ // WARNING: Please avoid converting urls into origins if at all possible!
+ // //docs/security/origin-vs-url.md is a list of gotchas that can result. Such
+ // conversions will likely return a wrong result for about:blank and/or
+ // in the presence of iframe.sandbox attribute. Prefer to get origins directly
+ // from the source (e.g. RenderFrameHost::GetLastCommittedOrigin).
+ GURL DeprecatedGetOriginAsURL() const;
// A helper function to return a GURL stripped from the elements that are not
// supposed to be sent as HTTP referrer: username, password and ref fragment.
@@ -216,6 +231,10 @@
// about:blank/#foo.
bool IsAboutBlank() const;
+ // Returns true when the url is of the form about:srcdoc, about:srcdoc?foo or
+ // about:srcdoc/#foo.
+ bool IsAboutSrcdoc() const;
+
// Returns true if the given parameter (should be lower-case ASCII to match
// the canonicalized scheme) is the scheme for this URL. Do not include a
// colon.
@@ -224,9 +243,6 @@
// Returns true if the scheme is "http" or "https".
bool SchemeIsHTTPOrHTTPS() const;
- // Returns true if the scheme is valid for use as a referrer.
- bool SchemeIsValidForReferrer() const;
-
// Returns true is the scheme is "ws" or "wss".
bool SchemeIsWSOrWSS() const;
@@ -248,28 +264,36 @@
// is minimally trustworthy. For that, see Chromium's |IsOriginSecure| for a
// higher-level and more complete semantics. See that function's documentation
// for more detail.
- bool SchemeIsCryptographic() const {
- return SchemeIs(url::kHttpsScheme) || SchemeIs(url::kWssScheme);
- }
+ bool SchemeIsCryptographic() const;
+
+ // As above, but static. Parameter should be lower-case ASCII.
+ static bool SchemeIsCryptographic(base::StringPiece lower_ascii_scheme);
// Returns true if the scheme is "blob".
bool SchemeIsBlob() const {
return SchemeIs(url::kBlobScheme);
}
- // The "content" of the URL is everything after the scheme (skipping the
- // scheme delimiting colon). It is an error to get the content of an invalid
- // URL: the result will be an empty string.
+ // Returns true if the scheme is a local scheme, as defined in Fetch:
+ // https://fetch.spec.whatwg.org/#local-scheme
+ bool SchemeIsLocal() const;
+
+ // For most URLs, the "content" is everything after the scheme (skipping the
+ // scheme delimiting colon) and before the fragment (skipping the fragment
+ // delimiting octothorpe). For javascript URLs the "content" also includes the
+ // fragment delimiter and fragment.
+ //
+ // It is an error to get the content of an invalid URL: the result will be an
+ // empty string.
std::string GetContent() const;
+ base::StringPiece GetContentPiece() const;
// Returns true if the hostname is an IP address. Note: this function isn't
// as cheap as a simple getter because it re-parses the hostname to verify.
bool HostIsIPAddress() const;
// Not including the colon. If you are comparing schemes, prefer SchemeIs.
- bool has_scheme() const {
- return parsed_.scheme.len >= 0;
- }
+ bool has_scheme() const { return parsed_.scheme.is_valid(); }
std::string scheme() const {
return ComponentString(parsed_.scheme);
}
@@ -277,9 +301,7 @@
return ComponentStringPiece(parsed_.scheme);
}
- bool has_username() const {
- return parsed_.username.len >= 0;
- }
+ bool has_username() const { return parsed_.username.is_valid(); }
std::string username() const {
return ComponentString(parsed_.username);
}
@@ -287,9 +309,7 @@
return ComponentStringPiece(parsed_.username);
}
- bool has_password() const {
- return parsed_.password.len >= 0;
- }
+ bool has_password() const { return parsed_.password.is_valid(); }
std::string password() const {
return ComponentString(parsed_.password);
}
@@ -302,7 +322,7 @@
// HostNoBrackets() below.
bool has_host() const {
// Note that hosts are special, absence of host means length 0.
- return parsed_.host.len > 0;
+ return parsed_.host.is_nonempty();
}
std::string host() const {
return ComponentString(parsed_.host);
@@ -314,9 +334,7 @@
// The port if one is explicitly specified. Most callers will want IntPort()
// or EffectiveIntPort() instead of these. The getters will not include the
// ':'.
- bool has_port() const {
- return parsed_.port.len >= 0;
- }
+ bool has_port() const { return parsed_.port.is_valid(); }
std::string port() const {
return ComponentString(parsed_.port);
}
@@ -326,9 +344,7 @@
// Including first slash following host, up to the query. The URL
// "http://www.google.com/" has a path of "/".
- bool has_path() const {
- return parsed_.path.len >= 0;
- }
+ bool has_path() const { return parsed_.path.is_valid(); }
std::string path() const {
return ComponentString(parsed_.path);
}
@@ -337,9 +353,7 @@
}
// Stuff following '?' up to the ref. The getters will not include the '?'.
- bool has_query() const {
- return parsed_.query.len >= 0;
- }
+ bool has_query() const { return parsed_.query.is_valid(); }
std::string query() const {
return ComponentString(parsed_.query);
}
@@ -349,9 +363,7 @@
// Stuff following '#' to the end of the string. This will be %-escaped UTF-8.
// The getters will not include the '#'.
- bool has_ref() const {
- return parsed_.ref.len >= 0;
- }
+ bool has_ref() const { return parsed_.ref.is_valid(); }
std::string ref() const {
return ComponentString(parsed_.ref);
}
@@ -376,6 +388,9 @@
// parameter, and query portions of the URL. It is guaranteed to be ASCII.
std::string PathForRequest() const;
+ // Returns the same characters as PathForRequest(), avoiding a copy.
+ base::StringPiece PathForRequestPiece() const;
+
// Returns the host, excluding the square brackets surrounding IPv6 address
// literals. This can be useful for passing to getaddrinfo().
std::string HostNoBrackets() const;
@@ -424,6 +439,12 @@
// See base/trace_event/memory_usage_estimator.h for more info.
size_t EstimateMemoryUsage() const;
+ // Helper used by GURL::IsAboutUrl and KURL::IsAboutURL.
+ static bool IsAboutPath(base::StringPiece actual_path,
+ base::StringPiece allowed_path);
+
+ void WriteIntoTrace(perfetto::TracedValue context) const;
+
private:
// Variant of the string parsing constructor that allows the caller to elect
// retain trailing whitespace, if any, on the passed URL spec, but only if
@@ -433,24 +454,27 @@
enum RetainWhiteSpaceSelector { RETAIN_TRAILING_PATH_WHITEPACE };
GURL(const std::string& url_string, RetainWhiteSpaceSelector);
- template<typename STR>
- void InitCanonical(base::BasicStringPiece<STR> input_spec,
- bool trim_path_end);
+ template <typename T, typename CharT = typename T::value_type>
+ void InitCanonical(T input_spec, bool trim_path_end);
void InitializeFromCanonicalSpec();
+ // Helper used by IsAboutBlank and IsAboutSrcdoc.
+ bool IsAboutUrl(base::StringPiece allowed_path) const;
+
// Returns the substring of the input identified by the given component.
std::string ComponentString(const url::Component& comp) const {
- if (comp.len <= 0)
- return std::string();
- return std::string(spec_, comp.begin, comp.len);
+ return std::string(ComponentStringPiece(comp));
}
base::StringPiece ComponentStringPiece(const url::Component& comp) const {
- if (comp.len <= 0)
+ if (comp.is_empty())
return base::StringPiece();
- return base::StringPiece(&spec_[comp.begin], comp.len);
+ return base::StringPiece(spec_).substr(static_cast<size_t>(comp.begin),
+ static_cast<size_t>(comp.len));
}
+ void ProcessFileSystemURLAfterReplaceComponents();
+
// The actual text of the URL, in canonical ASCII form.
std::string spec_;
@@ -467,18 +491,23 @@
};
// Stream operator so GURL can be used in assertion statements.
-URL_EXPORT std::ostream& operator<<(std::ostream& out, const GURL& url);
+COMPONENT_EXPORT(URL)
+std::ostream& operator<<(std::ostream& out, const GURL& url);
-URL_EXPORT bool operator==(const GURL& x, const GURL& y);
-URL_EXPORT bool operator!=(const GURL& x, const GURL& y);
+COMPONENT_EXPORT(URL) bool operator==(const GURL& x, const GURL& y);
+COMPONENT_EXPORT(URL) bool operator!=(const GURL& x, const GURL& y);
// Equality operator for comparing raw spec_. This should be used in place of
// url == GURL(spec) where |spec| is known (i.e. constants). This is to prevent
// needlessly re-parsing |spec| into a temporary GURL.
-URL_EXPORT bool operator==(const GURL& x, const base::StringPiece& spec);
-URL_EXPORT bool operator==(const base::StringPiece& spec, const GURL& x);
-URL_EXPORT bool operator!=(const GURL& x, const base::StringPiece& spec);
-URL_EXPORT bool operator!=(const base::StringPiece& spec, const GURL& x);
+COMPONENT_EXPORT(URL)
+bool operator==(const GURL& x, const base::StringPiece& spec);
+COMPONENT_EXPORT(URL)
+bool operator==(const base::StringPiece& spec, const GURL& x);
+COMPONENT_EXPORT(URL)
+bool operator!=(const GURL& x, const base::StringPiece& spec);
+COMPONENT_EXPORT(URL)
+bool operator!=(const base::StringPiece& spec, const GURL& x);
// DEBUG_ALIAS_FOR_GURL(var_name, url) copies |url| into a new stack-allocated
// variable named |<var_name>|. This helps ensure that the value of |url| gets
@@ -486,4 +515,20 @@
#define DEBUG_ALIAS_FOR_GURL(var_name, url) \
DEBUG_ALIAS_FOR_CSTR(var_name, (url).possibly_invalid_spec().c_str(), 128)
+namespace url::debug {
+
+class COMPONENT_EXPORT(URL) ScopedUrlCrashKey {
+ public:
+ ScopedUrlCrashKey(base::debug::CrashKeyString* crash_key, const GURL& value);
+ ~ScopedUrlCrashKey();
+
+ ScopedUrlCrashKey(const ScopedUrlCrashKey&) = delete;
+ ScopedUrlCrashKey& operator=(const ScopedUrlCrashKey&) = delete;
+
+ private:
+ base::debug::ScopedCrashKeyString scoped_string_value_;
+};
+
+} // namespace url::debug
+
#endif // URL_GURL_H_
diff --git a/url/gurl_abstract_tests.h b/url/gurl_abstract_tests.h
new file mode 100644
index 0000000..3cde842
--- /dev/null
+++ b/url/gurl_abstract_tests.h
@@ -0,0 +1,119 @@
+// Copyright 2021 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_GURL_ABSTRACT_TESTS_H_
+#define URL_GURL_ABSTRACT_TESTS_H_
+
+// Test suite for tests that cover both url::Url and blink::SecurityUrl.
+//
+// AbstractUrlTest below abstracts away differences between GURL and blink::KURL
+// by parametrizing the tests with a class that has to expose the following
+// members:
+// using UrlType = ...;
+// static UrlType CreateUrlFromString(base::StringPiece s);
+// static bool IsAboutBlank(const UrlType& url);
+// static bool IsAboutSrcdoc(const UrlType& url);
+template <typename TUrlTraits>
+class AbstractUrlTest : public testing::Test {
+ protected:
+ // Wrappers that help ellide away TUrlTraits.
+ //
+ // Note that calling the wrappers needs to be prefixed with `this->...` to
+ // avoid hitting: explicit qualification required to use member 'IsAboutBlank'
+ // from dependent base class.
+ using UrlType = typename TUrlTraits::UrlType;
+ UrlType CreateUrlFromString(base::StringPiece s) {
+ return TUrlTraits::CreateUrlFromString(s);
+ }
+ bool IsAboutBlank(const UrlType& url) {
+ return TUrlTraits::IsAboutBlank(url);
+ }
+ bool IsAboutSrcdoc(const UrlType& url) {
+ return TUrlTraits::IsAboutSrcdoc(url);
+ }
+};
+
+TYPED_TEST_SUITE_P(AbstractUrlTest);
+
+TYPED_TEST_P(AbstractUrlTest, IsAboutBlankTest) {
+ // See https://tools.ietf.org/html/rfc6694 which explicitly allows
+ // `about-query` and `about-fragment` parts in about: URLs.
+ const std::string kAboutBlankUrls[] = {"about:blank", "about:blank?foo",
+ "about:blank/#foo",
+ "about:blank?foo#foo"};
+ for (const auto& input : kAboutBlankUrls) {
+ SCOPED_TRACE(testing::Message() << "Test input: " << input);
+ auto url = this->CreateUrlFromString(input);
+ EXPECT_TRUE(this->IsAboutBlank(url));
+ }
+
+ const std::string kNotAboutBlankUrls[] = {"",
+ "about",
+ "about:",
+ "about:blanky",
+ "about:blan",
+ "about:about:blank:",
+ "data:blank",
+ "http:blank",
+ "about://blank",
+ "about:blank/foo",
+ "about://:8000/blank",
+ "about://foo:foo@/blank",
+ "foo@about:blank",
+ "foo:bar@about:blank",
+ "about:blank:8000",
+ "about:blANk"};
+ for (const auto& input : kNotAboutBlankUrls) {
+ SCOPED_TRACE(testing::Message() << "Test input: " << input);
+ auto url = this->CreateUrlFromString(input);
+ EXPECT_FALSE(this->IsAboutBlank(url));
+ }
+}
+
+TYPED_TEST_P(AbstractUrlTest, IsAboutSrcdocTest) {
+ // See https://tools.ietf.org/html/rfc6694 which explicitly allows
+ // `about-query` and `about-fragment` parts in about: URLs.
+ //
+ // `about:srcdoc` is defined in
+ // https://html.spec.whatwg.org/multipage/urls-and-fetching.html#about:srcdoc
+ // which refers to rfc6694 for details.
+ const std::string kAboutSrcdocUrls[] = {
+ "about:srcdoc", "about:srcdoc/", "about:srcdoc?foo", "about:srcdoc/#foo",
+ "about:srcdoc?foo#foo"};
+ for (const auto& input : kAboutSrcdocUrls) {
+ SCOPED_TRACE(testing::Message() << "Test input: " << input);
+ auto url = this->CreateUrlFromString(input);
+ EXPECT_TRUE(this->IsAboutSrcdoc(url));
+ }
+
+ const std::string kNotAboutSrcdocUrls[] = {"",
+ "about",
+ "about:",
+ "about:srcdocx",
+ "about:srcdo",
+ "about:about:srcdoc:",
+ "data:srcdoc",
+ "http:srcdoc",
+ "about:srcdo",
+ "about://srcdoc",
+ "about://srcdoc\\",
+ "about:srcdoc/foo",
+ "about://:8000/srcdoc",
+ "about://foo:foo@/srcdoc",
+ "foo@about:srcdoc",
+ "foo:bar@about:srcdoc",
+ "about:srcdoc:8000",
+ "about:srCDOc"};
+ for (const auto& input : kNotAboutSrcdocUrls) {
+ SCOPED_TRACE(testing::Message() << "Test input: " << input);
+ auto url = this->CreateUrlFromString(input);
+ EXPECT_FALSE(this->IsAboutSrcdoc(url));
+ }
+}
+
+REGISTER_TYPED_TEST_SUITE_P(AbstractUrlTest,
+ IsAboutBlankTest,
+ IsAboutSrcdocTest);
+
+#endif // URL_GURL_ABSTRACT_TESTS_H_
diff --git a/url/gurl_fuzzer.cc b/url/gurl_fuzzer.cc
index 0c6137a..029a387 100644
--- a/url/gurl_fuzzer.cc
+++ b/url/gurl_fuzzer.cc
@@ -1,9 +1,11 @@
-// Copyright 2015 The Chromium Authors. All rights reserved.
+// Copyright 2015 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/at_exit.h"
+#include "base/check_op.h"
#include "base/i18n/icu_util.h"
+#include "base/no_destructor.h"
#include "url/gurl.h"
struct TestCase {
@@ -15,43 +17,73 @@
TestCase* test_case = new TestCase();
+// Checks that GURL's canonicalization is idempotent. This can help discover
+// issues like https://crbug.com/1128999.
+void CheckIdempotency(const GURL& url) {
+ if (!url.is_valid())
+ return;
+ const std::string& spec = url.spec();
+ GURL recanonicalized(spec);
+ CHECK(recanonicalized.is_valid());
+ CHECK_EQ(spec, recanonicalized.spec());
+}
+
+// Checks that |url.spec()| is preserved across a call to ReplaceComponents with
+// zero replacements, which is effectively a copy. This can help discover issues
+// like https://crbug.com/1075515.
+void CheckReplaceComponentsPreservesSpec(const GURL& url) {
+ static const base::NoDestructor<GURL::Replacements> no_op;
+ GURL copy = url.ReplaceComponents(*no_op);
+ CHECK_EQ(url.is_valid(), copy.is_valid());
+ if (url.is_valid()) {
+ CHECK_EQ(url.spec(), copy.spec());
+ }
+}
+
// Entry point for LibFuzzer.
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
if (size < 1)
return 0;
-
- base::StringPiece string_piece_input(reinterpret_cast<const char*>(data),
- size);
- GURL url_from_string_piece(string_piece_input);
-
+ {
+ base::StringPiece string_piece_input(reinterpret_cast<const char*>(data),
+ size);
+ const GURL url_from_string_piece(string_piece_input);
+ CheckIdempotency(url_from_string_piece);
+ CheckReplaceComponentsPreservesSpec(url_from_string_piece);
+ }
// Test for StringPiece16 if size is even.
- if (size % 2 == 0) {
+ if (size % sizeof(char16_t) == 0) {
base::StringPiece16 string_piece_input16(
- reinterpret_cast<const base::char16*>(data), size / 2);
-
- GURL url_from_string_piece16(string_piece_input16);
+ reinterpret_cast<const char16_t*>(data), size / sizeof(char16_t));
+ const GURL url_from_string_piece16(string_piece_input16);
+ CheckIdempotency(url_from_string_piece16);
+ CheckReplaceComponentsPreservesSpec(url_from_string_piece16);
}
-
// Resolve relative url tests.
- size_t size_t_bytes = sizeof(size_t);
- if (size < size_t_bytes + 1) {
- return 0;
- }
- size_t relative_size =
- *reinterpret_cast<const size_t*>(data) % (size - size_t_bytes);
- std::string relative_string(
- reinterpret_cast<const char*>(data + size_t_bytes), relative_size);
- base::StringPiece string_piece_part_input(
- reinterpret_cast<const char*>(data + size_t_bytes + relative_size),
- size - relative_size - size_t_bytes);
- GURL url_from_string_piece_part(string_piece_part_input);
- url_from_string_piece_part.Resolve(relative_string);
+ {
+ size_t size_t_bytes = sizeof(size_t);
+ if (size < size_t_bytes + 1) {
+ return 0;
+ }
+ size_t relative_size =
+ *reinterpret_cast<const size_t*>(data) % (size - size_t_bytes);
+ std::string relative_string(
+ reinterpret_cast<const char*>(data + size_t_bytes), relative_size);
+ base::StringPiece string_piece_part_input(
+ reinterpret_cast<const char*>(data + size_t_bytes + relative_size),
+ size - relative_size - size_t_bytes);
+ const GURL url_from_string_piece_part(string_piece_part_input);
+ CheckIdempotency(url_from_string_piece_part);
+ CheckReplaceComponentsPreservesSpec(url_from_string_piece_part);
- if (relative_size % 2 == 0) {
- base::string16 relative_string16(
- reinterpret_cast<const base::char16*>(data + size_t_bytes),
- relative_size / 2);
- url_from_string_piece_part.Resolve(relative_string16);
+ url_from_string_piece_part.Resolve(relative_string);
+
+ if (relative_size % sizeof(char16_t) == 0) {
+ std::u16string relative_string16(
+ reinterpret_cast<const char16_t*>(data + size_t_bytes),
+ relative_size / sizeof(char16_t));
+ url_from_string_piece_part.Resolve(relative_string16);
+ }
}
return 0;
}
diff --git a/url/gurl_fuzzer.dict b/url/gurl_fuzzer.dict
index 302f590..fcf7e03 100644
--- a/url/gurl_fuzzer.dict
+++ b/url/gurl_fuzzer.dict
@@ -1,8 +1,8 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
+# Copyright 2016 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
-# This file has been generated with testing/libfuzzer/dictionary_generator.py
+# This block has been generated with testing/libfuzzer/dictionary_generator.py
# using url_parse_fuzzer binary and RFC 3986.
"DNS"
"text"
@@ -401,3 +401,32 @@
"H.,"
"\"MIME"
+# This comes from https://crbug.com/1075515.
+"FilEsysteM:htTp:E=/."
+
+# This comes from https://crbug.com/1128999.
+"file:///.//"
+"file:////"
+
+# string declared from url/url_constants.cc
+"://"
+"about"
+"about:blank"
+"about:srcdoc"
+"blank"
+"blob"
+"cid"
+"content"
+"data"
+"file"
+"filesystem"
+"ftp"
+"http"
+"https"
+"javascript"
+"mailto"
+"quic-transport"
+"srcdoc"
+"tel"
+"ws"
+"wss"
diff --git a/url/gurl_unittest.cc b/url/gurl_unittest.cc
index 68c432d..ee206ab 100644
--- a/url/gurl_unittest.cc
+++ b/url/gurl_unittest.cc
@@ -1,13 +1,16 @@
-// Copyright 2013 The Chromium Authors. All rights reserved.
+// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#include "base/macros.h"
-#include "base/strings/utf_string_conversions.h"
-#include "starboard/common/string.h"
-#include "starboard/types.h"
-#include "testing/gtest/include/gtest/gtest.h"
#include "url/gurl.h"
+
+#include <stddef.h>
+
+#include "base/strings/string_number_conversions.h"
+#include "base/strings/utf_string_conversions.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "url/gurl_abstract_tests.h"
+#include "url/origin.h"
#include "url/url_canon.h"
#include "url/url_test_utils.h"
@@ -15,19 +18,6 @@
namespace {
-template<typename CHAR>
-void SetupReplacement(
- void (Replacements<CHAR>::*func)(const CHAR*, const Component&),
- Replacements<CHAR>* replacements,
- const CHAR* str) {
- if (str) {
- Component comp;
- if (str[0])
- comp.len = static_cast<int>(strlen(str));
- (replacements->*func)(str, comp);
- }
-}
-
// Returns the canonicalized string for the given URL string for the
// GURLTest.Types test.
std::string TypesTestCase(const char* src) {
@@ -65,11 +55,11 @@
// the parser is already tested and works, so we are mostly interested if the
// object does the right thing with the results.
TEST(GURLTest, Components) {
- GURL empty_url(base::UTF8ToUTF16(""));
+ GURL empty_url(u"");
EXPECT_TRUE(empty_url.is_empty());
EXPECT_FALSE(empty_url.is_valid());
- GURL url(base::UTF8ToUTF16("http://user:pass@google.com:99/foo;bar?q=a#ref"));
+ GURL url(u"http://user:pass@google.com:99/foo;bar?q=a#ref");
EXPECT_FALSE(url.is_empty());
EXPECT_TRUE(url.is_valid());
EXPECT_TRUE(url.SchemeIs("http"));
@@ -114,8 +104,7 @@
}
TEST(GURLTest, Copy) {
- GURL url(base::UTF8ToUTF16(
- "http://user:pass@google.com:99/foo;bar?q=a#ref"));
+ GURL url(u"http://user:pass@google.com:99/foo;bar?q=a#ref");
GURL url2(url);
EXPECT_TRUE(url2.is_valid());
@@ -148,8 +137,7 @@
}
TEST(GURLTest, Assign) {
- GURL url(base::UTF8ToUTF16(
- "http://user:pass@google.com:99/foo;bar?q=a#ref"));
+ GURL url(u"http://user:pass@google.com:99/foo;bar?q=a#ref");
GURL url2;
url2 = url;
@@ -191,8 +179,7 @@
}
TEST(GURLTest, CopyFileSystem) {
- GURL url(base::UTF8ToUTF16(
- "filesystem:https://user:pass@google.com:99/t/foo;bar?q=a#ref"));
+ GURL url(u"filesystem:https://user:pass@google.com:99/t/foo;bar?q=a#ref");
GURL url2(url);
EXPECT_TRUE(url2.is_valid());
@@ -223,31 +210,34 @@
TEST(GURLTest, IsValid) {
const char* valid_cases[] = {
- "http://google.com",
- "unknown://google.com",
- "http://user:pass@google.com",
- "http://google.com:12345",
- "http://google.com/path",
- "http://google.com//path",
- "http://google.com?k=v#fragment",
- "http://user:pass@google.com:12345/path?k=v#fragment",
- "http:/path",
- "http:path",
+ "http://google.com",
+ "unknown://google.com",
+ "http://user:pass@google.com",
+ "http://google.com:12345",
+ "http://google.com:0", // 0 is a valid port
+ "http://google.com/path",
+ "http://google.com//path",
+ "http://google.com?k=v#fragment",
+ "http://user:pass@google.com:12345/path?k=v#fragment",
+ "http:/path",
+ "http:path",
};
- for (size_t i = 0; i < arraysize(valid_cases); i++) {
+ for (size_t i = 0; i < std::size(valid_cases); i++) {
EXPECT_TRUE(GURL(valid_cases[i]).is_valid())
<< "Case: " << valid_cases[i];
}
const char* invalid_cases[] = {
- "http://?k=v",
- "http:://google.com",
- "http//google.com",
- "http://google.com:12three45",
- "://google.com",
- "path",
+ "http://?k=v",
+ "http:://google.com",
+ "http//google.com",
+ "http://google.com:12three45",
+ "file://server:123", // file: URLs cannot have a port
+ "file://server:0",
+ "://google.com",
+ "path",
};
- for (size_t i = 0; i < arraysize(invalid_cases); i++) {
+ for (size_t i = 0; i < std::size(invalid_cases); i++) {
EXPECT_FALSE(GURL(invalid_cases[i]).is_valid())
<< "Case: " << invalid_cases[i];
}
@@ -262,21 +252,49 @@
EXPECT_EQ("/", url.path());
}
-// Given an invalid URL, we should still get most of the components.
+// Given invalid URLs, we should still get most of the components.
TEST(GURLTest, ComponentGettersWorkEvenForInvalidURL) {
- GURL url("http:google.com:foo");
- EXPECT_FALSE(url.is_valid());
- EXPECT_EQ("http://google.com:foo/", url.possibly_invalid_spec());
+ constexpr struct InvalidURLTestExpectations {
+ const char* url;
+ const char* spec;
+ const char* scheme;
+ const char* host;
+ const char* port;
+ const char* path;
+ // Extend as needed...
+ } expectations[] = {
+ {
+ "http:google.com:foo",
+ "http://google.com:foo/",
+ "http",
+ "google.com",
+ "foo",
+ "/",
+ },
+ {
+ "https:google.com:foo",
+ "https://google.com:foo/",
+ "https",
+ "google.com",
+ "foo",
+ "/",
+ },
+ };
- EXPECT_EQ("http", url.scheme());
- EXPECT_EQ("", url.username());
- EXPECT_EQ("", url.password());
- EXPECT_EQ("google.com", url.host());
- EXPECT_EQ("foo", url.port());
- EXPECT_EQ(PORT_INVALID, url.IntPort());
- EXPECT_EQ("/", url.path());
- EXPECT_EQ("", url.query());
- EXPECT_EQ("", url.ref());
+ for (const auto& e : expectations) {
+ const GURL url(e.url);
+ EXPECT_FALSE(url.is_valid());
+ EXPECT_EQ(e.spec, url.possibly_invalid_spec());
+ EXPECT_EQ(e.scheme, url.scheme());
+ EXPECT_EQ("", url.username());
+ EXPECT_EQ("", url.password());
+ EXPECT_EQ(e.host, url.host());
+ EXPECT_EQ(e.port, url.port());
+ EXPECT_EQ(PORT_INVALID, url.IntPort());
+ EXPECT_EQ(e.path, url.path());
+ EXPECT_EQ("", url.query());
+ EXPECT_EQ("", url.ref());
+ }
}
TEST(GURLTest, Resolve) {
@@ -289,24 +307,46 @@
bool expected_valid;
const char* expected;
} resolve_cases[] = {
- {"http://www.google.com/", "foo.html", true, "http://www.google.com/foo.html"},
- {"http://www.google.com/foo/", "bar", true, "http://www.google.com/foo/bar"},
- {"http://www.google.com/foo/", "/bar", true, "http://www.google.com/bar"},
- {"http://www.google.com/foo", "bar", true, "http://www.google.com/bar"},
- {"http://www.google.com/", "http://images.google.com/foo.html", true, "http://images.google.com/foo.html"},
- {"http://www.google.com/", "http://images.\tgoogle.\ncom/\rfoo.html", true, "http://images.google.com/foo.html"},
- {"http://www.google.com/blah/bloo?c#d", "../../../hello/./world.html?a#b", true, "http://www.google.com/hello/world.html?a#b"},
- {"http://www.google.com/foo#bar", "#com", true, "http://www.google.com/foo#com"},
- {"http://www.google.com/", "Https:images.google.com", true, "https://images.google.com/"},
+ {"http://www.google.com/", "foo.html", true,
+ "http://www.google.com/foo.html"},
+ {"http://www.google.com/foo/", "bar", true,
+ "http://www.google.com/foo/bar"},
+ {"http://www.google.com/foo/", "/bar", true, "http://www.google.com/bar"},
+ {"http://www.google.com/foo", "bar", true, "http://www.google.com/bar"},
+ {"http://www.google.com/", "http://images.google.com/foo.html", true,
+ "http://images.google.com/foo.html"},
+ {"http://www.google.com/", "http://images.\tgoogle.\ncom/\rfoo.html",
+ true, "http://images.google.com/foo.html"},
+ {"http://www.google.com/blah/bloo?c#d", "../../../hello/./world.html?a#b",
+ true, "http://www.google.com/hello/world.html?a#b"},
+ {"http://www.google.com/foo#bar", "#com", true,
+ "http://www.google.com/foo#com"},
+ {"http://www.google.com/", "Https:images.google.com", true,
+ "https://images.google.com/"},
// A non-standard base can be replaced with a standard absolute URL.
- {"data:blahblah", "http://google.com/", true, "http://google.com/"},
- {"data:blahblah", "http:google.com", true, "http://google.com/"},
+ {"data:blahblah", "http://google.com/", true, "http://google.com/"},
+ {"data:blahblah", "http:google.com", true, "http://google.com/"},
+ {"data:blahblah", "https:google.com", true, "https://google.com/"},
// Filesystem URLs have different paths to test.
- {"filesystem:http://www.google.com/type/", "foo.html", true, "filesystem:http://www.google.com/type/foo.html"},
- {"filesystem:http://www.google.com/type/", "../foo.html", true, "filesystem:http://www.google.com/type/foo.html"},
+ {"filesystem:http://www.google.com/type/", "foo.html", true,
+ "filesystem:http://www.google.com/type/foo.html"},
+ {"filesystem:http://www.google.com/type/", "../foo.html", true,
+ "filesystem:http://www.google.com/type/foo.html"},
+ // https://crbug.com/530123 - scheme validation (e.g. are "10.0.0.7:"
+ // or "x1:" valid schemes) when deciding if |relative| is an absolute url.
+ {"file:///some/dir/ip-relative.html", "10.0.0.7:8080/foo.html", true,
+ "file:///some/dir/10.0.0.7:8080/foo.html"},
+ {"file:///some/dir/", "1://host", true, "file:///some/dir/1://host"},
+ {"file:///some/dir/", "x1://host", true, "x1://host"},
+ {"file:///some/dir/", "X1://host", true, "x1://host"},
+ {"file:///some/dir/", "x.://host", true, "x.://host"},
+ {"file:///some/dir/", "x+://host", true, "x+://host"},
+ {"file:///some/dir/", "x-://host", true, "x-://host"},
+ {"file:///some/dir/", "x!://host", true, "file:///some/dir/x!://host"},
+ {"file:///some/dir/", "://host", true, "file:///some/dir/://host"},
};
- for (size_t i = 0; i < arraysize(resolve_cases); i++) {
+ for (size_t i = 0; i < std::size(resolve_cases); i++) {
// 8-bit code path.
GURL input(resolve_cases[i].base);
GURL output = input.Resolve(resolve_cases[i].relative);
@@ -329,18 +369,23 @@
const char* input;
const char* expected;
} cases[] = {
- {"http://www.google.com", "http://www.google.com/"},
- {"javascript:window.alert(\"hello,world\");", ""},
- {"http://user:pass@www.google.com:21/blah#baz", "http://www.google.com:21/"},
- {"http://user@www.google.com", "http://www.google.com/"},
- {"http://:pass@www.google.com", "http://www.google.com/"},
- {"http://:@www.google.com", "http://www.google.com/"},
- {"filesystem:http://www.google.com/temp/foo?q#b", "http://www.google.com/"},
- {"filesystem:http://user:pass@google.com:21/blah#baz", "http://google.com:21/"},
+ {"http://www.google.com", "http://www.google.com/"},
+ {"javascript:window.alert(\"hello,world\");", ""},
+ {"http://user:pass@www.google.com:21/blah#baz",
+ "http://www.google.com:21/"},
+ {"http://user@www.google.com", "http://www.google.com/"},
+ {"http://:pass@www.google.com", "http://www.google.com/"},
+ {"http://:@www.google.com", "http://www.google.com/"},
+ {"filesystem:http://www.google.com/temp/foo?q#b",
+ "http://www.google.com/"},
+ {"filesystem:http://user:pass@google.com:21/blah#baz",
+ "http://google.com:21/"},
+ {"blob:null/guid-goes-here", ""},
+ {"blob:http://origin/guid-goes-here", "" /* should be http://origin/ */},
};
- for (size_t i = 0; i < arraysize(cases); i++) {
+ for (size_t i = 0; i < std::size(cases); i++) {
GURL url(cases[i].input);
- GURL origin = url.GetOrigin();
+ GURL origin = url.DeprecatedGetOriginAsURL();
EXPECT_EQ(cases[i].expected, origin.spec());
}
}
@@ -361,7 +406,7 @@
{"file:///tmp/test.html", ""},
{"https://www.google.com", "https://www.google.com/"},
};
- for (size_t i = 0; i < arraysize(cases); i++) {
+ for (size_t i = 0; i < std::size(cases); i++) {
GURL url(cases[i].input);
GURL origin = url.GetAsReferrer();
EXPECT_EQ(cases[i].expected, origin.spec());
@@ -380,7 +425,7 @@
{"filesystem:file:///temporary/bar.html?baz=22", "filesystem:file:///temporary/"},
};
- for (size_t i = 0; i < arraysize(cases); i++) {
+ for (size_t i = 0; i < std::size(cases); i++) {
GURL url(cases[i].input);
GURL empty_path = url.GetWithEmptyPath();
EXPECT_EQ(cases[i].expected, empty_path.spec());
@@ -426,70 +471,270 @@
{"foobar", ""},
};
- for (size_t i = 0; i < arraysize(cases); i++) {
+ for (size_t i = 0; i < std::size(cases); i++) {
GURL url(cases[i].input);
GURL without_filename = url.GetWithoutFilename();
EXPECT_EQ(cases[i].expected, without_filename.spec()) << i;
}
}
+TEST(GURLTest, GetWithoutRef) {
+ struct TestCase {
+ const char* input;
+ const char* expected;
+ } cases[] = {
+ // Common Standard URLs.
+ {"https://www.google.com/index.html",
+ "https://www.google.com/index.html"},
+ {"https://www.google.com/index.html#maps/",
+ "https://www.google.com/index.html"},
+
+ {"https://foo:bar@www.google.com/maps.htm",
+ "https://foo:bar@www.google.com/maps.htm"},
+ {"https://foo:bar@www.google.com/maps.htm#fragment",
+ "https://foo:bar@www.google.com/maps.htm"},
+
+ {"https://www.google.com/maps/au/index.html?q=maps",
+ "https://www.google.com/maps/au/index.html?q=maps"},
+ {"https://www.google.com/maps/au/index.html?q=maps#fragment/",
+ "https://www.google.com/maps/au/index.html?q=maps"},
+
+ {"http://www.google.com:8000/maps/au/index.html?q=maps",
+ "http://www.google.com:8000/maps/au/index.html?q=maps"},
+ {"http://www.google.com:8000/maps/au/index.html?q=maps#fragment/",
+ "http://www.google.com:8000/maps/au/index.html?q=maps"},
+
+ {"https://www.google.com/maps/au/north/?q=maps",
+ "https://www.google.com/maps/au/north/?q=maps"},
+ {"https://www.google.com/maps/au/north?q=maps#fragment",
+ "https://www.google.com/maps/au/north?q=maps"},
+
+ // Less common standard URLs.
+ {"filesystem:http://www.google.com/temporary/bar.html?baz=22",
+ "filesystem:http://www.google.com/temporary/bar.html?baz=22"},
+ {"file:///temporary/bar.html?baz=22#fragment",
+ "file:///temporary/bar.html?baz=22"},
+
+ {"ftp://foo/test/index.html", "ftp://foo/test/index.html"},
+ {"ftp://foo/test/index.html#fragment", "ftp://foo/test/index.html"},
+
+ {"gopher://foo/test/index.html", "gopher://foo/test/index.html"},
+ {"gopher://foo/test/index.html#fragment", "gopher://foo/test/index.html"},
+
+ {"ws://foo/test/index.html", "ws://foo/test/index.html"},
+ {"ws://foo/test/index.html#fragment", "ws://foo/test/index.html"},
+
+ // Non-standard, hierarchical URLs.
+ {"chrome://foo/bar.html", "chrome://foo/bar.html"},
+ {"chrome://foo/bar.html#fragment", "chrome://foo/bar.html"},
+
+ {"httpa://foo/test/index.html", "httpa://foo/test/index.html"},
+ {"httpa://foo/test/index.html#fragment", "httpa://foo/test/index.html"},
+
+ // Non-standard, non-hierarchical URLs.
+ {"blob:https://foo.bar/test/index.html",
+ "blob:https://foo.bar/test/index.html"},
+ {"blob:https://foo.bar/test/index.html#fragment",
+ "blob:https://foo.bar/test/index.html"},
+
+ {"about:blank", "about:blank"},
+ {"about:blank#ref", "about:blank"},
+
+ {"data:foobar", "data:foobar"},
+ {"scheme:opaque_data", "scheme:opaque_data"},
+ // Invalid URLs.
+ {"foobar", ""},
+ };
+
+ for (size_t i = 0; i < std::size(cases); i++) {
+ GURL url(cases[i].input);
+ GURL without_ref = url.GetWithoutRef();
+ EXPECT_EQ(cases[i].expected, without_ref.spec());
+ }
+}
+
TEST(GURLTest, Replacements) {
// The URL canonicalizer replacement test will handle most of these case.
// The most important thing to do here is to check that the proper
// canonicalizer gets called based on the scheme of the input.
struct ReplaceCase {
+ using ApplyReplacementsFunc = GURL(const GURL&);
+
const char* base;
- const char* scheme;
- const char* username;
- const char* password;
- const char* host;
- const char* port;
- const char* path;
- const char* query;
- const char* ref;
+ ApplyReplacementsFunc* apply_replacements;
const char* expected;
} replace_cases[] = {
- {"http://www.google.com/foo/bar.html?foo#bar", NULL, NULL, NULL, NULL,
- NULL, "/", "", "", "http://www.google.com/"},
- {"http://www.google.com/foo/bar.html?foo#bar", "javascript", "", "", "",
- "", "window.open('foo');", "", "", "javascript:window.open('foo');"},
- {"file:///C:/foo/bar.txt", "http", NULL, NULL, "www.google.com", "99",
- "/foo", "search", "ref", "http://www.google.com:99/foo?search#ref"},
+#if defined(COBALT_PENDING_CLEAN_UP)
+ {"http://www.google.com/foo/bar.html?foo#bar",
+
+ +[](const GURL& url) {
+ GURL::Replacements replacements;
+ replacements.SetPathStr("/");
+ replacements.ClearQuery();
+ replacements.ClearRef();
+ return url.ReplaceComponents(replacements);
+ },
+ "http://www.google.com/"},
+ {"http://www.google.com/foo/bar.html?foo#bar",
+
+ +[](const GURL& url) {
+ GURL::Replacements replacements;
+ replacements.SetSchemeStr("javascript");
+ replacements.ClearUsername();
+ replacements.ClearPassword();
+ replacements.ClearHost();
+ replacements.ClearPort();
+ replacements.SetPathStr("window.open('foo');");
+ replacements.ClearQuery();
+ replacements.ClearRef();
+ return url.ReplaceComponents(replacements);
+ },
+ "javascript:window.open('foo');"},
+ {"file:///C:/foo/bar.txt",
+
+ +[](const GURL& url) {
+ GURL::Replacements replacements;
+ replacements.SetSchemeStr("http");
+ replacements.SetHostStr("www.google.com");
+ replacements.SetPortStr("99");
+ replacements.SetPathStr("/foo");
+ replacements.SetQueryStr("search");
+ replacements.SetRefStr("ref");
+ return url.ReplaceComponents(replacements);
+ },
+ "http://www.google.com:99/foo?search#ref"},
#ifdef WIN32
- {"http://www.google.com/foo/bar.html?foo#bar", "file", "", "", "", "",
- "c:\\", "", "", "file:///C:/"},
+ {"http://www.google.com/foo/bar.html?foo#bar",
+
+ +[](const GURL& url) {
+ GURL::Replacements replacements;
+ replacements.SetSchemeStr("file");
+ replacements.ClearUsername();
+ replacements.ClearPassword();
+ replacements.ClearHost();
+ replacements.ClearPort();
+ replacements.SetPathStr("c:\\");
+ replacements.ClearQuery();
+ replacements.ClearRef();
+ return url.ReplaceComponents(replacements);
+ },
+ "file:///C:/"},
#endif
- {"filesystem:http://www.google.com/foo/bar.html?foo#bar", NULL, NULL,
- NULL, NULL, NULL, "/", "", "", "filesystem:http://www.google.com/foo/"},
+ {"filesystem:http://www.google.com/foo/bar.html?foo#bar",
+
+ +[](const GURL& url) {
+ GURL::Replacements replacements;
+ replacements.SetPathStr("/");
+ replacements.ClearQuery();
+ replacements.ClearRef();
+ return url.ReplaceComponents(replacements);
+ },
+ "filesystem:http://www.google.com/foo/"},
// Lengthen the URL instead of shortening it, to test creation of
// inner_url.
- {"filesystem:http://www.google.com/foo/", NULL, NULL, NULL, NULL, NULL,
- "bar.html", "foo", "bar",
+ {"filesystem:http://www.google.com/foo/",
+
+ +[](const GURL& url) {
+ GURL::Replacements replacements;
+ replacements.SetPathStr("bar.html");
+ replacements.SetQueryStr("foo");
+ replacements.SetRefStr("bar");
+ return url.ReplaceComponents(replacements);
+ },
"filesystem:http://www.google.com/foo/bar.html?foo#bar"},
};
+#else
+ {.base = "http://www.google.com/foo/bar.html?foo#bar",
+ .apply_replacements =
+ +[](const GURL& url) {
+ GURL::Replacements replacements;
+ replacements.SetPathStr("/");
+ replacements.ClearQuery();
+ replacements.ClearRef();
+ return url.ReplaceComponents(replacements);
+ },
+ .expected = "http://www.google.com/"},
+ {.base = "http://www.google.com/foo/bar.html?foo#bar",
+ .apply_replacements =
+ +[](const GURL& url) {
+ GURL::Replacements replacements;
+ replacements.SetSchemeStr("javascript");
+ replacements.ClearUsername();
+ replacements.ClearPassword();
+ replacements.ClearHost();
+ replacements.ClearPort();
+ replacements.SetPathStr("window.open('foo');");
+ replacements.ClearQuery();
+ replacements.ClearRef();
+ return url.ReplaceComponents(replacements);
+ },
+ .expected = "javascript:window.open('foo');"},
+ {.base = "file:///C:/foo/bar.txt",
+ .apply_replacements =
+ +[](const GURL& url) {
+ GURL::Replacements replacements;
+ replacements.SetSchemeStr("http");
+ replacements.SetHostStr("www.google.com");
+ replacements.SetPortStr("99");
+ replacements.SetPathStr("/foo");
+ replacements.SetQueryStr("search");
+ replacements.SetRefStr("ref");
+ return url.ReplaceComponents(replacements);
+ },
+ .expected = "http://www.google.com:99/foo?search#ref"},
+#ifdef WIN32
+ {.base = "http://www.google.com/foo/bar.html?foo#bar",
+ .apply_replacements =
+ +[](const GURL& url) {
+ GURL::Replacements replacements;
+ replacements.SetSchemeStr("file");
+ replacements.ClearUsername();
+ replacements.ClearPassword();
+ replacements.ClearHost();
+ replacements.ClearPort();
+ replacements.SetPathStr("c:\\");
+ replacements.ClearQuery();
+ replacements.ClearRef();
+ return url.ReplaceComponents(replacements);
+ },
+ .expected = "file:///C:/"},
+#endif
+ {.base = "filesystem:http://www.google.com/foo/bar.html?foo#bar",
+ .apply_replacements =
+ +[](const GURL& url) {
+ GURL::Replacements replacements;
+ replacements.SetPathStr("/");
+ replacements.ClearQuery();
+ replacements.ClearRef();
+ return url.ReplaceComponents(replacements);
+ },
+ .expected = "filesystem:http://www.google.com/foo/"},
+ // Lengthen the URL instead of shortening it, to test creation of
+ // inner_url.
+ {.base = "filesystem:http://www.google.com/foo/",
+ .apply_replacements =
+ +[](const GURL& url) {
+ GURL::Replacements replacements;
+ replacements.SetPathStr("bar.html");
+ replacements.SetQueryStr("foo");
+ replacements.SetRefStr("bar");
+ return url.ReplaceComponents(replacements);
+ },
+ .expected = "filesystem:http://www.google.com/foo/bar.html?foo#bar"},
+ };
+#endif
- for (size_t i = 0; i < arraysize(replace_cases); i++) {
- const ReplaceCase& cur = replace_cases[i];
- GURL url(cur.base);
- GURL::Replacements repl;
- SetupReplacement(&GURL::Replacements::SetScheme, &repl, cur.scheme);
- SetupReplacement(&GURL::Replacements::SetUsername, &repl, cur.username);
- SetupReplacement(&GURL::Replacements::SetPassword, &repl, cur.password);
- SetupReplacement(&GURL::Replacements::SetHost, &repl, cur.host);
- SetupReplacement(&GURL::Replacements::SetPort, &repl, cur.port);
- SetupReplacement(&GURL::Replacements::SetPath, &repl, cur.path);
- SetupReplacement(&GURL::Replacements::SetQuery, &repl, cur.query);
- SetupReplacement(&GURL::Replacements::SetRef, &repl, cur.ref);
- GURL output = url.ReplaceComponents(repl);
+ for (const ReplaceCase& c : replace_cases) {
+ GURL output = c.apply_replacements(GURL(c.base));
- EXPECT_EQ(replace_cases[i].expected, output.spec());
+ EXPECT_EQ(c.expected, output.spec());
EXPECT_EQ(output.SchemeIsFileSystem(), output.inner_url() != NULL);
if (output.SchemeIsFileSystem()) {
// TODO(mmenke): inner_url()->spec() is currently the same as the spec()
// for the GURL itself. This should be fixed.
// See https://crbug.com/619596
- EXPECT_EQ(replace_cases[i].expected, output.inner_url()->spec());
+ EXPECT_EQ(c.expected, output.inner_url()->spec());
}
}
}
@@ -498,23 +743,34 @@
// http://crbug.com/291747 - a data URL may legitimately have trailing
// whitespace in the spec after the ref is cleared. Test this does not trigger
// the Parsed importing validation DCHECK in GURL.
- GURL url(" data: one ? two # three ");
+ GURL url(" data: one # two ");
+ EXPECT_TRUE(url.is_valid());
// By default the trailing whitespace will have been stripped.
- EXPECT_EQ("data: one ? two # three", url.spec());
+ EXPECT_EQ("data: one #%20two", url.spec());
+
+ // Clear the URL's ref and observe the trailing whitespace.
GURL::Replacements repl;
repl.ClearRef();
GURL url_no_ref = url.ReplaceComponents(repl);
-
- EXPECT_EQ("data: one ? two ", url_no_ref.spec());
+ EXPECT_TRUE(url_no_ref.is_valid());
+ EXPECT_EQ("data: one ", url_no_ref.spec());
// Importing a parsed URL via this constructor overload will retain trailing
// whitespace.
GURL import_url(url_no_ref.spec(),
url_no_ref.parsed_for_possibly_invalid_spec(),
url_no_ref.is_valid());
+ EXPECT_TRUE(import_url.is_valid());
EXPECT_EQ(url_no_ref, import_url);
- EXPECT_EQ(import_url.query(), " two ");
+ EXPECT_EQ("data: one ", import_url.spec());
+ EXPECT_EQ(" one ", import_url.path());
+
+ // For completeness, test that re-parsing the same URL rather than importing
+ // it trims the trailing whitespace.
+ GURL reparsed_url(url_no_ref.spec());
+ EXPECT_TRUE(reparsed_url.is_valid());
+ EXPECT_EQ("data: one", reparsed_url.spec());
}
TEST(GURLTest, PathForRequest) {
@@ -523,22 +779,29 @@
const char* expected;
const char* inner_expected;
} cases[] = {
- {"http://www.google.com", "/", NULL},
- {"http://www.google.com/", "/", NULL},
- {"http://www.google.com/foo/bar.html?baz=22", "/foo/bar.html?baz=22", NULL},
- {"http://www.google.com/foo/bar.html#ref", "/foo/bar.html", NULL},
- {"http://www.google.com/foo/bar.html?query#ref", "/foo/bar.html?query", NULL},
- {"filesystem:http://www.google.com/temporary/foo/bar.html?query#ref", "/foo/bar.html?query", "/temporary"},
- {"filesystem:http://www.google.com/temporary/foo/bar.html?query", "/foo/bar.html?query", "/temporary"},
+ {"http://www.google.com", "/", nullptr},
+ {"http://www.google.com/", "/", nullptr},
+ {"http://www.google.com/foo/bar.html?baz=22", "/foo/bar.html?baz=22",
+ nullptr},
+ {"http://www.google.com/foo/bar.html#ref", "/foo/bar.html", nullptr},
+ {"http://www.google.com/foo/bar.html?query#ref", "/foo/bar.html?query",
+ nullptr},
+ {"filesystem:http://www.google.com/temporary/foo/bar.html?query#ref",
+ "/foo/bar.html?query", "/temporary"},
+ {"filesystem:http://www.google.com/temporary/foo/bar.html?query",
+ "/foo/bar.html?query", "/temporary"},
};
- for (size_t i = 0; i < arraysize(cases); i++) {
+ for (size_t i = 0; i < std::size(cases); i++) {
GURL url(cases[i].input);
- std::string path_request = url.PathForRequest();
- EXPECT_EQ(cases[i].expected, path_request);
+ EXPECT_EQ(cases[i].expected, url.PathForRequest());
+ EXPECT_EQ(cases[i].expected, url.PathForRequestPiece());
EXPECT_EQ(cases[i].inner_expected == NULL, url.inner_url() == NULL);
- if (url.inner_url() && cases[i].inner_expected)
+ if (url.inner_url() && cases[i].inner_expected) {
EXPECT_EQ(cases[i].inner_expected, url.inner_url()->PathForRequest());
+ EXPECT_EQ(cases[i].inner_expected,
+ url.inner_url()->PathForRequestPiece());
+ }
}
}
@@ -562,11 +825,6 @@
{"ftp://www.google.com:21/", 21},
{"ftp://www.google.com:80/", 80},
- // gopher
- {"gopher://www.google.com/", 70},
- {"gopher://www.google.com:70/", 70},
- {"gopher://www.google.com:80/", 80},
-
// file - no port
{"file://www.google.com/", PORT_UNSPECIFIED},
{"file://www.google.com:443/", PORT_UNSPECIFIED},
@@ -580,7 +838,7 @@
{"filesystem:file:///t/foo", PORT_UNSPECIFIED},
};
- for (size_t i = 0; i < arraysize(port_tests); i++) {
+ for (size_t i = 0; i < std::size(port_tests); i++) {
GURL url(port_tests[i].spec);
EXPECT_EQ(port_tests[i].expected_int_port, url.EffectiveIntPort());
}
@@ -601,7 +859,7 @@
{"some random input!", false},
};
- for (size_t i = 0; i < arraysize(ip_tests); i++) {
+ for (size_t i = 0; i < std::size(ip_tests); i++) {
GURL url(ip_tests[i].spec);
EXPECT_EQ(ip_tests[i].expected_ip, url.HostIsIPAddress());
}
@@ -626,7 +884,7 @@
{"http://]/", "]", "]"},
{"", "", ""},
};
- for (size_t i = 0; i < arraysize(cases); i++) {
+ for (size_t i = 0; i < std::size(cases); i++) {
GURL url(cases[i].input);
EXPECT_EQ(cases[i].expected_host, url.host());
EXPECT_EQ(cases[i].expected_plainhost, url.HostNoBrackets());
@@ -760,13 +1018,81 @@
EXPECT_FALSE(GURL("ws://foo.bar.com/").SchemeIsCryptographic());
}
+TEST(GURLTest, SchemeIsCryptographicStatic) {
+ EXPECT_TRUE(GURL::SchemeIsCryptographic("https"));
+ EXPECT_TRUE(GURL::SchemeIsCryptographic("wss"));
+ EXPECT_FALSE(GURL::SchemeIsCryptographic("http"));
+ EXPECT_FALSE(GURL::SchemeIsCryptographic("ws"));
+ EXPECT_FALSE(GURL::SchemeIsCryptographic("ftp"));
+}
+
TEST(GURLTest, SchemeIsBlob) {
EXPECT_TRUE(GURL("BLOB://BAR/").SchemeIsBlob());
EXPECT_TRUE(GURL("blob://bar/").SchemeIsBlob());
EXPECT_FALSE(GURL("http://bar/").SchemeIsBlob());
}
-TEST(GURLTest, ContentAndPathForNonStandardURLs) {
+TEST(GURLTest, SchemeIsLocal) {
+ EXPECT_TRUE(GURL("BLOB://BAR/").SchemeIsLocal());
+ EXPECT_TRUE(GURL("blob://bar/").SchemeIsLocal());
+ EXPECT_TRUE(GURL("DATA:TEXT/HTML,BAR").SchemeIsLocal());
+ EXPECT_TRUE(GURL("data:text/html,bar").SchemeIsLocal());
+ EXPECT_TRUE(GURL("ABOUT:BAR").SchemeIsLocal());
+ EXPECT_TRUE(GURL("about:bar").SchemeIsLocal());
+ EXPECT_TRUE(GURL("FILESYSTEM:HTTP://FOO.EXAMPLE/BAR").SchemeIsLocal());
+ EXPECT_TRUE(GURL("filesystem:http://foo.example/bar").SchemeIsLocal());
+
+ EXPECT_FALSE(GURL("http://bar/").SchemeIsLocal());
+ EXPECT_FALSE(GURL("file:///bar").SchemeIsLocal());
+}
+
+// Tests that the 'content' of the URL is properly extracted. This can be
+// complex in cases such as multiple schemes (view-source:http:) or for
+// javascript URLs. See GURL::GetContent for more details.
+TEST(GURLTest, ContentForNonStandardURLs) {
+ struct TestCase {
+ const char* url;
+ const char* expected;
+ } cases[] = {
+ {"null", ""},
+ {"not-a-standard-scheme:this is arbitrary content",
+ "this is arbitrary content"},
+
+ // When there are multiple schemes, only the first is excluded from the
+ // content. Note also that for e.g. 'http://', the '//' is part of the
+ // content not the scheme.
+ {"view-source:http://example.com/path", "http://example.com/path"},
+ {"blob:http://example.com/GUID", "http://example.com/GUID"},
+ {"blob://http://example.com/GUID", "//http://example.com/GUID"},
+ {"blob:http://user:password@example.com/GUID",
+ "http://user:password@example.com/GUID"},
+
+ // The octothorpe character ('#') marks the end of the URL content, and
+ // the start of the fragment. It should not be included in the content.
+ {"http://www.example.com/GUID#ref", "www.example.com/GUID"},
+ {"http://me:secret@example.com/GUID/#ref", "me:secret@example.com/GUID/"},
+ {"data:text/html,Question?<div style=\"color: #bad\">idea</div>",
+ "text/html,Question?%3Cdiv%20style=%22color:%20"},
+
+ // TODO(mkwst): This seems like a bug. https://crbug.com/513600
+ {"filesystem:http://example.com/path", "/"},
+
+ // Javascript URLs include '#' symbols in their content.
+ {"javascript:#", "#"},
+ {"javascript:alert('#');", "alert('#');"},
+ };
+
+ for (const auto& test : cases) {
+ GURL url(test.url);
+ EXPECT_EQ(test.expected, url.GetContent()) << test.url;
+ EXPECT_EQ(test.expected, url.GetContentPiece()) << test.url;
+ }
+}
+
+// Tests that the URL path is properly extracted for unusual URLs. This can be
+// complex in cases such as multiple schemes (view-source:http:) or when
+// octothorpes ('#') are involved.
+TEST(GURLTest, PathForNonStandardURLs) {
struct TestCase {
const char* url;
const char* expected;
@@ -780,6 +1106,11 @@
{"blob:http://user:password@example.com/GUID",
"http://user:password@example.com/GUID"},
+ {"http://www.example.com/GUID#ref", "/GUID"},
+ {"http://me:secret@example.com/GUID/#ref", "/GUID/"},
+ {"data:text/html,Question?<div style=\"color: #bad\">idea</div>",
+ "text/html,Question"},
+
// TODO(mkwst): This seems like a bug. https://crbug.com/513600
{"filesystem:http://example.com/path", "/"},
};
@@ -787,25 +1118,9 @@
for (const auto& test : cases) {
GURL url(test.url);
EXPECT_EQ(test.expected, url.path()) << test.url;
- EXPECT_EQ(test.expected, url.GetContent()) << test.url;
}
}
-TEST(GURLTest, IsAboutBlank) {
- const std::string kAboutBlankUrls[] = {"about:blank", "about:blank?foo",
- "about:blank/#foo",
- "about:blank?foo#foo"};
- for (const auto& url : kAboutBlankUrls)
- EXPECT_TRUE(GURL(url).IsAboutBlank()) << url;
-
- const std::string kNotAboutBlankUrls[] = {
- "http:blank", "about:blan", "about://blank",
- "about:blank/foo", "about://:8000/blank", "about://foo:foo@/blank",
- "foo@about:blank", "foo:bar@about:blank", "about:blank:8000"};
- for (const auto& url : kNotAboutBlankUrls)
- EXPECT_FALSE(GURL(url).IsAboutBlank()) << url;
-}
-
TEST(GURLTest, EqualsIgnoringRef) {
const struct {
const char* url_a;
@@ -875,4 +1190,72 @@
EXPECT_STREQ("https://foo.com/bar", url_debug_alias);
}
+TEST(GURLTest, InvalidHost) {
+ // This contains an invalid percent escape (%T%) and also a valid
+ // percent escape that's not 7-bit ascii (%ae), so that the unescaped
+ // host contains both an invalid percent escape and invalid UTF-8.
+ GURL url("http://%T%Ae");
+
+ EXPECT_FALSE(url.is_valid());
+ EXPECT_TRUE(url.SchemeIs(url::kHttpScheme));
+
+ // The invalid percent escape becomes an escaped percent sign (%25), and the
+ // invalid UTF-8 character becomes REPLACEMENT CHARACTER' (U+FFFD) encoded as
+ // UTF-8.
+ EXPECT_EQ(url.host_piece(), "%25t%EF%BF%BD");
+}
+
+TEST(GURLTest, PortZero) {
+ GURL port_zero_url("http://127.0.0.1:0/blah");
+
+ // https://url.spec.whatwg.org/#port-state says that the port 1) consists of
+ // ASCII digits (this excludes negative numbers) and 2) cannot be greater than
+ // 2^16-1. This means that port=0 should be valid.
+ EXPECT_TRUE(port_zero_url.is_valid());
+ EXPECT_EQ("0", port_zero_url.port());
+ EXPECT_EQ("127.0.0.1", port_zero_url.host());
+ EXPECT_EQ("http", port_zero_url.scheme());
+
+ // https://crbug.com/1065532: SchemeHostPort would previously incorrectly
+ // consider port=0 to be invalid.
+ SchemeHostPort scheme_host_port(port_zero_url);
+ EXPECT_TRUE(scheme_host_port.IsValid());
+ EXPECT_EQ(port_zero_url.scheme(), scheme_host_port.scheme());
+ EXPECT_EQ(port_zero_url.host(), scheme_host_port.host());
+ EXPECT_EQ(port_zero_url.port(),
+ base::NumberToString(scheme_host_port.port()));
+
+ // https://crbug.com/1065532: The SchemeHostPort problem above would lead to
+ // bizarre results below - resolved origin would incorrectly be returned as an
+ // opaque origin derived from |another_origin|.
+ url::Origin another_origin = url::Origin::Create(GURL("http://other.com"));
+ url::Origin resolved_origin =
+ url::Origin::Resolve(port_zero_url, another_origin);
+ EXPECT_FALSE(resolved_origin.opaque());
+ EXPECT_EQ(port_zero_url.scheme(), resolved_origin.scheme());
+ EXPECT_EQ(port_zero_url.host(), resolved_origin.host());
+ EXPECT_EQ(port_zero_url.port(), base::NumberToString(resolved_origin.port()));
+
+ // port=0 and default HTTP port are different.
+ GURL default_port("http://127.0.0.1/foo");
+ EXPECT_EQ(0, SchemeHostPort(port_zero_url).port());
+ EXPECT_EQ(80, SchemeHostPort(default_port).port());
+ url::Origin default_port_origin = url::Origin::Create(default_port);
+ EXPECT_FALSE(default_port_origin.IsSameOriginWith(resolved_origin));
+}
+
+class GURLTestTraits {
+ public:
+ using UrlType = GURL;
+
+ static UrlType CreateUrlFromString(base::StringPiece s) { return GURL(s); }
+ static bool IsAboutBlank(const UrlType& url) { return url.IsAboutBlank(); }
+ static bool IsAboutSrcdoc(const UrlType& url) { return url.IsAboutSrcdoc(); }
+
+ // Only static members.
+ GURLTestTraits() = delete;
+};
+
+INSTANTIATE_TYPED_TEST_SUITE_P(GURL, AbstractUrlTest, GURLTestTraits);
+
} // namespace url
diff --git a/url/ipc/BUILD.gn b/url/ipc/BUILD.gn
index 52968a1..d7801af 100644
--- a/url/ipc/BUILD.gn
+++ b/url/ipc/BUILD.gn
@@ -1,4 +1,4 @@
-# Copyright (c) 2016 The Chromium Authors. All rights reserved.
+# Copyright 2016 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
@@ -17,22 +17,19 @@
"//ipc",
"//url",
]
- deps = [
- "//base",
- ]
+ deps = [ "//base" ]
}
# IPC unit tests aren't build on iOS.
if (!is_ios) {
- test("url_ipc_unittests") {
- sources = [
- "url_param_traits_unittest.cc",
- ]
+ source_set("url_ipc_unittests") {
+ testonly = true
+
+ sources = [ "url_param_traits_unittest.cc" ]
deps = [
":url_ipc",
"//base",
- "//base/test:run_all_unittests",
"//ipc:test_support",
"//testing/gtest",
"//url:url",
diff --git a/url/ipc/url_ipc_export.h b/url/ipc/url_ipc_export.h
index 1da0fa6..ca500ab 100644
--- a/url/ipc/url_ipc_export.h
+++ b/url/ipc/url_ipc_export.h
@@ -1,9 +1,9 @@
-// Copyright 2016 The Chromium Authors. All rights reserved.
+// Copyright 2016 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#ifndef URL_IPC_EXPORT_H_
-#define URL_IPC_EXPORT_H_
+#ifndef URL_IPC_URL_IPC_EXPORT_H_
+#define URL_IPC_URL_IPC_EXPORT_H_
#if defined(COMPONENT_BUILD)
#if defined(WIN32)
@@ -26,4 +26,4 @@
#define URL_IPC_EXPORT
#endif
-#endif // URL_IPC_EXPORT_H_
+#endif // URL_IPC_URL_IPC_EXPORT_H_
diff --git a/url/ipc/url_param_traits.cc b/url/ipc/url_param_traits.cc
index e40ae8f..6999c82 100644
--- a/url/ipc/url_param_traits.cc
+++ b/url/ipc/url_param_traits.cc
@@ -1,10 +1,14 @@
-// Copyright (c) 2016 The Chromium Authors. All rights reserved.
+// Copyright 2016 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "url/ipc/url_param_traits.h"
+#include <string>
+
+#include "base/pickle.h"
#include "url/gurl.h"
+#include "url/url_constants.h"
namespace IPC {
diff --git a/url/ipc/url_param_traits.h b/url/ipc/url_param_traits.h
index f40150f..cdb5724 100644
--- a/url/ipc/url_param_traits.h
+++ b/url/ipc/url_param_traits.h
@@ -1,14 +1,21 @@
-// Copyright (c) 2016 The Chromium Authors. All rights reserved.
+// Copyright 2016 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef URL_IPC_URL_PARAM_TRAITS_H_
#define URL_IPC_URL_PARAM_TRAITS_H_
-#include "ipc/ipc_message_utils.h"
+#include <string>
+
+#include "ipc/ipc_param_traits.h"
#include "url/gurl.h"
#include "url/ipc/url_ipc_export.h"
+namespace base {
+class Pickle;
+class PickleIterator;
+} // namespace base
+
namespace IPC {
template <>
diff --git a/url/ipc/url_param_traits_unittest.cc b/url/ipc/url_param_traits_unittest.cc
index 16eeab0..78bed35 100644
--- a/url/ipc/url_param_traits_unittest.cc
+++ b/url/ipc/url_param_traits_unittest.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2016 The Chromium Authors. All rights reserved.
+// Copyright 2016 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@@ -10,63 +10,150 @@
#include "url/gurl.h"
#include "url/ipc/url_param_traits.h"
+namespace {
+
+GURL BounceUrl(const GURL& input) {
+ IPC::Message msg(1, 2, IPC::Message::PRIORITY_NORMAL);
+ IPC::ParamTraits<GURL>::Write(&msg, input);
+
+ GURL output;
+ base::PickleIterator iter(msg);
+ EXPECT_TRUE(IPC::ParamTraits<GURL>::Read(&msg, &iter, &output));
+
+ return output;
+}
+
+void ExpectSerializationRoundtrips(const GURL& input) {
+ SCOPED_TRACE(testing::Message()
+ << "Input GURL: " << input.possibly_invalid_spec());
+ GURL output = BounceUrl(input);
+
+ // We want to test each component individually to make sure its range was
+ // correctly serialized and deserialized, not just the spec.
+ EXPECT_EQ(input.possibly_invalid_spec(), output.possibly_invalid_spec());
+ EXPECT_EQ(input.is_valid(), output.is_valid());
+ EXPECT_EQ(input.scheme(), output.scheme());
+ EXPECT_EQ(input.username(), output.username());
+ EXPECT_EQ(input.password(), output.password());
+ EXPECT_EQ(input.host(), output.host());
+ EXPECT_EQ(input.port(), output.port());
+ EXPECT_EQ(input.path(), output.path());
+ EXPECT_EQ(input.query(), output.query());
+ EXPECT_EQ(input.ref(), output.ref());
+}
+
+} // namespace
+
// Tests that serialize/deserialize correctly understand each other.
-TEST(IPCMessageTest, Serialize) {
+TEST(IPCMessageTest, SerializeGurl_Basic) {
const char* serialize_cases[] = {
"http://www.google.com/",
"http://user:pass@host.com:888/foo;bar?baz#nop",
};
- for (size_t i = 0; i < arraysize(serialize_cases); i++) {
- GURL input(serialize_cases[i]);
- IPC::Message msg(1, 2, IPC::Message::PRIORITY_NORMAL);
- IPC::ParamTraits<GURL>::Write(&msg, input);
-
- GURL output;
- base::PickleIterator iter(msg);
- EXPECT_TRUE(IPC::ParamTraits<GURL>::Read(&msg, &iter, &output));
-
- // We want to test each component individually to make sure its range was
- // correctly serialized and deserialized, not just the spec.
- EXPECT_EQ(input.possibly_invalid_spec(), output.possibly_invalid_spec());
- EXPECT_EQ(input.is_valid(), output.is_valid());
- EXPECT_EQ(input.scheme(), output.scheme());
- EXPECT_EQ(input.username(), output.username());
- EXPECT_EQ(input.password(), output.password());
- EXPECT_EQ(input.host(), output.host());
- EXPECT_EQ(input.port(), output.port());
- EXPECT_EQ(input.path(), output.path());
- EXPECT_EQ(input.query(), output.query());
- EXPECT_EQ(input.ref(), output.ref());
+ for (const char* test_input : serialize_cases) {
+ SCOPED_TRACE(testing::Message() << "Test input: " << test_input);
+ GURL input(test_input);
+ ExpectSerializationRoundtrips(input);
}
+}
- // Test an excessively long GURL.
- {
- const std::string url = std::string("http://example.org/").append(
- url::kMaxURLChars + 1, 'a');
- GURL input(url.c_str());
- IPC::Message msg(1, 2, IPC::Message::PRIORITY_NORMAL);
- IPC::ParamTraits<GURL>::Write(&msg, input);
+// Test of an excessively long GURL.
+TEST(IPCMessageTest, SerializeGurl_ExcessivelyLong) {
+ const std::string url =
+ std::string("http://example.org/").append(url::kMaxURLChars + 1, 'a');
+ GURL input(url.c_str());
+ GURL output = BounceUrl(input);
+ EXPECT_TRUE(output.is_empty());
+}
- GURL output;
- base::PickleIterator iter(msg);
- EXPECT_TRUE(IPC::ParamTraits<GURL>::Read(&msg, &iter, &output));
- EXPECT_TRUE(output.is_empty());
- }
+// Test of an invalid GURL.
+TEST(IPCMessageTest, SerializeGurl_InvalidUrl) {
+ IPC::Message msg;
+ msg.WriteString("#inva://idurl/");
+ GURL output;
+ base::PickleIterator iter(msg);
+ EXPECT_FALSE(IPC::ParamTraits<GURL>::Read(&msg, &iter, &output));
+}
- // Test an invalid GURL.
- {
- IPC::Message msg;
- msg.WriteString("#inva://idurl/");
- GURL output;
- base::PickleIterator iter(msg);
- EXPECT_FALSE(IPC::ParamTraits<GURL>::Read(&msg, &iter, &output));
- }
-
- // Also test the corrupt case.
+// Test of a corrupt deserialization input.
+TEST(IPCMessageTest, SerializeGurl_CorruptPayload) {
IPC::Message msg(1, 2, IPC::Message::PRIORITY_NORMAL);
msg.WriteInt(99);
GURL output;
base::PickleIterator iter(msg);
EXPECT_FALSE(IPC::ParamTraits<GURL>::Read(&msg, &iter, &output));
}
+
+// Test for the GURL testcase based on https://crbug.com/1214098 (which in turn
+// was based on ContentSecurityPolicyBrowserTest.FileURLs).
+TEST(IPCMessageTest, SerializeGurl_WindowsDriveInPathReplacement) {
+ {
+ // #1: Try creating a file URL with a non-empty hostname.
+ GURL url_without_windows_drive_letter("file://hostname/");
+ EXPECT_EQ("/", url_without_windows_drive_letter.path());
+ EXPECT_EQ("hostname", url_without_windows_drive_letter.host());
+ ExpectSerializationRoundtrips(url_without_windows_drive_letter);
+ }
+
+ {
+ // #2: Use GURL::Replacement to create a GURL with 1) a path that starts
+ // with a Windows drive letter and 2) has a non-empty hostname (inherited
+ // from `url_without_windows_drive_letter` above). This used to not go
+ // through the DoParseUNC path that normally strips the hostname (for more
+ // details, see https://crbug.com/1214098#c4).
+ GURL::Replacements repl;
+ const std::string kNewPath = "/C:/dir/file.txt";
+ repl.SetPathStr(kNewPath);
+ GURL url_made_with_replace_components =
+ GURL("file://hostname/").ReplaceComponents(repl);
+
+ EXPECT_EQ(kNewPath, url_made_with_replace_components.path());
+ EXPECT_EQ("hostname", url_made_with_replace_components.host());
+ EXPECT_EQ("file://hostname/C:/dir/file.txt",
+ url_made_with_replace_components.spec());
+ // This is the MAIN VERIFICATION in this test. This used to fail on Windows,
+ // see https://crbug.com/1214098.
+ ExpectSerializationRoundtrips(url_made_with_replace_components);
+ }
+
+ {
+ // #3: Try to create a URL with a Windows drive letter and a non-empty
+ // hostname directly.
+ GURL url_created_directly("file://hostname/C:/dir/file.txt");
+ EXPECT_EQ("/C:/dir/file.txt", url_created_directly.path());
+ EXPECT_EQ("hostname", url_created_directly.host());
+ EXPECT_EQ("file://hostname/C:/dir/file.txt", url_created_directly.spec());
+ ExpectSerializationRoundtrips(url_created_directly);
+
+ // The URL created directly and the URL created through ReplaceComponents
+ // should be the same.
+ GURL::Replacements repl;
+ const std::string kNewPath = "/C:/dir/file.txt";
+ repl.SetPathStr(kNewPath);
+ GURL url_made_with_replace_components =
+ GURL("file://hostname/").ReplaceComponents(repl);
+ EXPECT_EQ(url_created_directly.spec(),
+ url_made_with_replace_components.spec());
+ }
+
+ {
+ // #4: Try to create a URL with a Windows drive letter and "localhost" as
+ // hostname directly.
+ GURL url_created_directly("file://localhost/C:/dir/file.txt");
+ EXPECT_EQ("/C:/dir/file.txt", url_created_directly.path());
+ EXPECT_EQ("", url_created_directly.host());
+ EXPECT_EQ("file:///C:/dir/file.txt", url_created_directly.spec());
+ ExpectSerializationRoundtrips(url_created_directly);
+
+ // The URL created directly and the URL created through ReplaceComponents
+ // should be the same.
+ GURL::Replacements repl;
+ const std::string kNewPath = "/C:/dir/file.txt";
+ repl.SetPathStr(kNewPath);
+ GURL url_made_with_replace_components =
+ GURL("file://localhost/").ReplaceComponents(repl);
+ EXPECT_EQ(url_created_directly.spec(),
+ url_made_with_replace_components.spec());
+ }
+}
diff --git a/url/mojom/BUILD.gn b/url/mojom/BUILD.gn
index 1f77a2f..a936a43 100644
--- a/url/mojom/BUILD.gn
+++ b/url/mojom/BUILD.gn
@@ -1,34 +1,141 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
+# Copyright 2016 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import("//mojo/public/tools/bindings/mojom.gni")
mojom("url_mojom_gurl") {
- sources = [
- "url.mojom",
+ generate_java = true
+ sources = [ "url.mojom" ]
+
+ cpp_typemaps = [
+ {
+ types = [
+ {
+ mojom = "url.mojom.Url"
+ cpp = "::GURL"
+ },
+ ]
+ traits_headers = [ "//url/mojom/url_gurl_mojom_traits.h" ]
+ traits_public_deps = [
+ ":mojom_traits",
+ "//url",
+ ]
+ },
]
+
+ blink_cpp_typemaps = [
+ {
+ types = [
+ {
+ mojom = "url.mojom.Url"
+ cpp = "::blink::KURL"
+ force_serialize = true
+ },
+ ]
+ traits_headers = [
+ "//third_party/blink/renderer/platform/mojo/kurl_mojom_traits.h",
+ "//third_party/blink/renderer/platform/weborigin/kurl_hash.h",
+ ]
+ traits_public_deps = [ "//url" ]
+ },
+ ]
+
+ webui_module_path = "chrome://resources/mojo/url/mojom"
}
mojom("url_mojom_origin") {
- sources = [
- "origin.mojom",
- ]
+ generate_java = true
+ sources = [ "origin.mojom" ]
public_deps = [
":url_mojom_gurl",
+ "//mojo/public/mojom/base",
]
check_includes_blink = false
+
+ cpp_typemaps = [
+ {
+ types = [
+ {
+ mojom = "url.mojom.Origin"
+ cpp = "::url::Origin"
+ },
+ ]
+ traits_headers = [ "//url/mojom/origin_mojom_traits.h" ]
+ traits_public_deps = [
+ ":mojom_traits",
+ "//url",
+ ]
+ },
+ ]
+
+ blink_cpp_typemaps = [
+ {
+ types = [
+ {
+ mojom = "url.mojom.Origin"
+ cpp = "::scoped_refptr<const ::blink::SecurityOrigin>"
+ nullable_is_same_type = true
+ },
+ ]
+ traits_headers = [ "//third_party/blink/renderer/platform/mojo/security_origin_mojom_traits.h" ]
+ traits_public_deps = [ "//url" ]
+ },
+ ]
+
+ webui_module_path = "chrome://resources/mojo/url/mojom"
+}
+
+mojom("url_mojom_scheme_host_port") {
+ generate_java = true
+ sources = [ "scheme_host_port.mojom" ]
+
+ cpp_typemaps = [
+ {
+ types = [
+ {
+ mojom = "url.mojom.SchemeHostPort"
+ cpp = "::url::SchemeHostPort"
+ },
+ ]
+ traits_headers = [ "//url/mojom/scheme_host_port_mojom_traits.h" ]
+ traits_public_deps = [ "//url" ]
+ },
+ ]
}
mojom("test_url_mojom_gurl") {
- sources = [
- "url_test.mojom",
- ]
+ sources = [ "url_test.mojom" ]
public_deps = [
":url_mojom_gurl",
":url_mojom_origin",
+ ":url_mojom_scheme_host_port",
+ ]
+}
+
+component("mojom_traits") {
+ output_name = "url_mojom_traits"
+
+ sources = [
+ "origin_mojom_traits.cc",
+ "origin_mojom_traits.h",
+ "scheme_host_port_mojom_traits.cc",
+ "scheme_host_port_mojom_traits.h",
+ "url_gurl_mojom_traits.cc",
+ "url_gurl_mojom_traits.h",
+ ]
+
+ defines = [ "IS_URL_MOJOM_TRAITS_IMPL" ]
+
+ public_deps = [
+ ":url_mojom_gurl_shared",
+ ":url_mojom_origin_shared",
+ ":url_mojom_scheme_host_port",
+ "//base",
+ "//mojo/public/cpp/base:shared_typemap_traits",
+ "//url",
]
}
diff --git a/url/mojom/DIR_METADATA b/url/mojom/DIR_METADATA
new file mode 100644
index 0000000..c080aa1
--- /dev/null
+++ b/url/mojom/DIR_METADATA
@@ -0,0 +1,11 @@
+# Metadata information for this directory.
+#
+# For more information on DIR_METADATA files, see:
+# https://source.chromium.org/chromium/infra/infra/+/main:go/src/infra/tools/dirmd/README.md
+#
+# For the schema of this file, see Metadata message:
+# https://source.chromium.org/chromium/infra/infra/+/main:go/src/infra/tools/dirmd/proto/dir_metadata.proto
+
+monorail {
+ component: "Internals>Mojo"
+}
\ No newline at end of file
diff --git a/url/mojom/OWNERS b/url/mojom/OWNERS
index ae29a36..1feb514 100644
--- a/url/mojom/OWNERS
+++ b/url/mojom/OWNERS
@@ -2,5 +2,3 @@
per-file *.mojom=file://ipc/SECURITY_OWNERS
per-file *_mojom_traits*.*=set noparent
per-file *_mojom_traits*.*=file://ipc/SECURITY_OWNERS
-per-file *.typemap=set noparent
-per-file *.typemap=file://ipc/SECURITY_OWNERS
diff --git a/url/mojom/gurl.typemap b/url/mojom/gurl.typemap
deleted file mode 100644
index f94e3e4..0000000
--- a/url/mojom/gurl.typemap
+++ /dev/null
@@ -1,11 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-mojom = "//url/mojom/url.mojom"
-public_headers = [ "//url/gurl.h" ]
-traits_headers = [ "//url/mojom/url_gurl_mojom_traits.h" ]
-deps = [
- "//url",
-]
-type_mappings = [ "url.mojom.Url=GURL" ]
diff --git a/url/mojom/origin.mojom b/url/mojom/origin.mojom
index 884357b..94b6e2d 100644
--- a/url/mojom/origin.mojom
+++ b/url/mojom/origin.mojom
@@ -1,12 +1,19 @@
-// Copyright 2016 The Chromium Authors. All rights reserved.
+// Copyright 2016 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
+[JavaPackage="org.chromium.url.internal.mojom"]
module url.mojom;
+import "mojo/public/mojom/base/unguessable_token.mojom";
+
struct Origin {
string scheme;
string host;
uint16 port;
- bool unique;
+
+ // When a nonce is provided, this origin is opaque. The scheme/host/port do
+ // not need to be valid, but if they are, they identify the tuple origin
+ // from which this opaque origin is derived.
+ mojo_base.mojom.UnguessableToken? nonce_if_opaque;
};
diff --git a/url/mojom/origin.typemap b/url/mojom/origin.typemap
deleted file mode 100644
index 464375c..0000000
--- a/url/mojom/origin.typemap
+++ /dev/null
@@ -1,11 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-mojom = "//url/mojom/origin.mojom"
-public_headers = [ "//url/origin.h" ]
-traits_headers = [ "//url/mojom/origin_mojom_traits.h" ]
-deps = [
- "//url",
-]
-type_mappings = [ "url.mojom.Origin=url::Origin" ]
diff --git a/url/mojom/origin_mojom_traits.cc b/url/mojom/origin_mojom_traits.cc
new file mode 100644
index 0000000..9e8475a
--- /dev/null
+++ b/url/mojom/origin_mojom_traits.cc
@@ -0,0 +1,34 @@
+// Copyright 2020 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "url/mojom/origin_mojom_traits.h"
+
+#include "base/strings/string_piece.h"
+
+namespace mojo {
+
+// static
+bool StructTraits<url::mojom::OriginDataView, url::Origin>::Read(
+ url::mojom::OriginDataView data,
+ url::Origin* out) {
+ base::StringPiece scheme, host;
+ absl::optional<base::UnguessableToken> nonce_if_opaque;
+ if (!data.ReadScheme(&scheme) || !data.ReadHost(&host) ||
+ !data.ReadNonceIfOpaque(&nonce_if_opaque))
+ return false;
+
+ absl::optional<url::Origin> creation_result =
+ nonce_if_opaque
+ ? url::Origin::UnsafelyCreateOpaqueOriginWithoutNormalization(
+ scheme, host, data.port(), url::Origin::Nonce(*nonce_if_opaque))
+ : url::Origin::UnsafelyCreateTupleOriginWithoutNormalization(
+ scheme, host, data.port());
+ if (!creation_result)
+ return false;
+
+ *out = std::move(creation_result.value());
+ return true;
+}
+
+} // namespace mojo
diff --git a/url/mojom/origin_mojom_traits.h b/url/mojom/origin_mojom_traits.h
index 8c78093..0d3cbb2 100644
--- a/url/mojom/origin_mojom_traits.h
+++ b/url/mojom/origin_mojom_traits.h
@@ -1,44 +1,39 @@
-// Copyright 2016 The Chromium Authors. All rights reserved.
+// Copyright 2016 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#ifndef URL_MOJO_ORIGIN_MOJOM_TRAITS_H_
-#define URL_MOJO_ORIGIN_MOJOM_TRAITS_H_
+#ifndef URL_MOJOM_ORIGIN_MOJOM_TRAITS_H_
+#define URL_MOJOM_ORIGIN_MOJOM_TRAITS_H_
-#include "base/strings/string_piece.h"
-#include "url/mojom/origin.mojom.h"
+#include "base/component_export.h"
+#include "base/unguessable_token.h"
+#include "mojo/public/cpp/base/unguessable_token_mojom_traits.h"
+#include "mojo/public/cpp/bindings/optional_as_pointer.h"
+#include "third_party/abseil-cpp/absl/types/optional.h"
+#include "url/mojom/origin.mojom-shared.h"
#include "url/origin.h"
namespace mojo {
template <>
-struct StructTraits<url::mojom::OriginDataView, url::Origin> {
- static const std::string& scheme(const url::Origin& r) { return r.scheme(); }
- static const std::string& host(const url::Origin& r) { return r.host(); }
- static uint16_t port(const url::Origin& r) { return r.port(); }
- static bool unique(const url::Origin& r) { return r.unique(); }
- static bool Read(url::mojom::OriginDataView data, url::Origin* out) {
- if (data.unique()) {
- *out = url::Origin();
- } else {
- base::StringPiece scheme, host;
- if (!data.ReadScheme(&scheme) || !data.ReadHost(&host))
- return false;
-
- *out = url::Origin::UnsafelyCreateOriginWithoutNormalization(scheme, host,
- data.port());
- }
-
- // If a unique origin was created, but the unique flag wasn't set, then
- // the values provided to 'UnsafelyCreateOriginWithoutNormalization' were
- // invalid.
- if (!data.unique() && out->unique())
- return false;
-
- return true;
+struct COMPONENT_EXPORT(URL_MOJOM_TRAITS)
+ StructTraits<url::mojom::OriginDataView, url::Origin> {
+ static const std::string& scheme(const url::Origin& r) {
+ return r.GetTupleOrPrecursorTupleIfOpaque().scheme();
}
+ static const std::string& host(const url::Origin& r) {
+ return r.GetTupleOrPrecursorTupleIfOpaque().host();
+ }
+ static uint16_t port(const url::Origin& r) {
+ return r.GetTupleOrPrecursorTupleIfOpaque().port();
+ }
+ static mojo::OptionalAsPointer<const base::UnguessableToken> nonce_if_opaque(
+ const url::Origin& r) {
+ return mojo::MakeOptionalAsPointer(r.GetNonceForSerialization());
+ }
+ static bool Read(url::mojom::OriginDataView data, url::Origin* out);
};
} // namespace mojo
-#endif // URL_MOJO_ORIGIN_MOJOM_TRAITS_H_
+#endif // URL_MOJOM_ORIGIN_MOJOM_TRAITS_H_
diff --git a/url/mojom/scheme_host_port.mojom b/url/mojom/scheme_host_port.mojom
new file mode 100644
index 0000000..79f3722
--- /dev/null
+++ b/url/mojom/scheme_host_port.mojom
@@ -0,0 +1,13 @@
+// Copyright 2021 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+module url.mojom;
+
+// See url::SchemeHostPort for details, and what differentiates this from an
+// origin.
+struct SchemeHostPort {
+ string scheme;
+ string host;
+ uint16 port;
+};
diff --git a/url/mojom/scheme_host_port_mojom_traits.cc b/url/mojom/scheme_host_port_mojom_traits.cc
new file mode 100644
index 0000000..63f6af4
--- /dev/null
+++ b/url/mojom/scheme_host_port_mojom_traits.cc
@@ -0,0 +1,27 @@
+// Copyright 2021 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "url/mojom/scheme_host_port_mojom_traits.h"
+
+#include "base/strings/string_piece.h"
+#include "url/mojom/scheme_host_port.mojom-shared.h"
+#include "url/scheme_host_port.h"
+
+namespace mojo {
+
+// static
+bool StructTraits<url::mojom::SchemeHostPortDataView, url::SchemeHostPort>::
+ Read(url::mojom::SchemeHostPortDataView data, url::SchemeHostPort* out) {
+ base::StringPiece scheme, host;
+ if (!data.ReadScheme(&scheme) || !data.ReadHost(&host))
+ return false;
+
+ *out = url::SchemeHostPort(scheme, host, data.port());
+
+ // Consider it an error if the output SchemeHostPort is not valid, but
+ // non-empty values were received over Mojo.
+ return out->IsValid() || (scheme.empty() && host.empty() && data.port() == 0);
+}
+
+} // namespace mojo
diff --git a/url/mojom/scheme_host_port_mojom_traits.h b/url/mojom/scheme_host_port_mojom_traits.h
new file mode 100644
index 0000000..e91ae3a
--- /dev/null
+++ b/url/mojom/scheme_host_port_mojom_traits.h
@@ -0,0 +1,30 @@
+// Copyright 2021 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_MOJOM_SCHEME_HOST_PORT_MOJOM_TRAITS_H_
+#define URL_MOJOM_SCHEME_HOST_PORT_MOJOM_TRAITS_H_
+
+#include "base/component_export.h"
+#include "url/mojom/scheme_host_port.mojom-shared.h"
+#include "url/scheme_host_port.h"
+
+namespace mojo {
+
+template <>
+struct COMPONENT_EXPORT(URL_MOJOM_TRAITS)
+ StructTraits<url::mojom::SchemeHostPortDataView, url::SchemeHostPort> {
+ static const std::string& scheme(const url::SchemeHostPort& r) {
+ return r.scheme();
+ }
+ static const std::string& host(const url::SchemeHostPort& r) {
+ return r.host();
+ }
+ static uint16_t port(const url::SchemeHostPort& r) { return r.port(); }
+ static bool Read(url::mojom::SchemeHostPortDataView data,
+ url::SchemeHostPort* out);
+};
+
+} // namespace mojo
+
+#endif // URL_MOJOM_SCHEME_HOST_PORT_MOJOM_TRAITS_H_
diff --git a/url/mojom/scheme_host_port_mojom_traits_unittest.cc b/url/mojom/scheme_host_port_mojom_traits_unittest.cc
new file mode 100644
index 0000000..7efd5d7
--- /dev/null
+++ b/url/mojom/scheme_host_port_mojom_traits_unittest.cc
@@ -0,0 +1,36 @@
+// Copyright 2021 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "url/mojom/scheme_host_port_mojom_traits.h"
+
+#include <string>
+
+#include "mojo/public/cpp/test_support/test_utils.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "url/mojom/scheme_host_port.mojom.h"
+#include "url/scheme_host_port.h"
+
+namespace url {
+
+namespace {
+
+void TestRoundTrip(const url::SchemeHostPort& in) {
+ url::SchemeHostPort result;
+ ASSERT_TRUE(
+ mojo::test::SerializeAndDeserialize<mojom::SchemeHostPort>(in, result))
+ << in.Serialize();
+ EXPECT_EQ(in, result) << "Expected " << in.Serialize() << ", but got "
+ << result.Serialize();
+}
+
+} // namespace
+
+TEST(SchemeHostPortMojomTraitsTest, RoundTrip) {
+ TestRoundTrip(url::SchemeHostPort());
+ TestRoundTrip(url::SchemeHostPort("http", "test", 80));
+ TestRoundTrip(url::SchemeHostPort("https", "foo.test", 443));
+ TestRoundTrip(url::SchemeHostPort("file", "", 0));
+}
+
+} // namespace url
diff --git a/url/mojom/typemaps.gni b/url/mojom/typemaps.gni
deleted file mode 100644
index 53875c3..0000000
--- a/url/mojom/typemaps.gni
+++ /dev/null
@@ -1,8 +0,0 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-typemaps = [
- "//url/mojom/origin.typemap",
- "//url/mojom/gurl.typemap",
-]
diff --git a/url/mojom/url.mojom b/url/mojom/url.mojom
index 3022837..e5fbee6 100644
--- a/url/mojom/url.mojom
+++ b/url/mojom/url.mojom
@@ -1,13 +1,13 @@
-// Copyright 2016 The Chromium Authors. All rights reserved.
+// Copyright 2016 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
module url.mojom;
-// Don't make backwards-incompatible changes to this definition!
-// It's used in PageState serialization, so backwards incompatible changes
-// would cause stored PageState objects to be un-parseable. Please contact the
-// page state serialization owners before making such a change.
+// 2 * 1024 * 1024
+const uint32 kMaxURLChars = 2097152;
+
+[Stable]
struct Url {
string url;
-};
\ No newline at end of file
+};
diff --git a/url/mojom/url_gurl_mojom_traits.cc b/url/mojom/url_gurl_mojom_traits.cc
new file mode 100644
index 0000000..97b301a
--- /dev/null
+++ b/url/mojom/url_gurl_mojom_traits.cc
@@ -0,0 +1,40 @@
+// Copyright 2020 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "url/mojom/url_gurl_mojom_traits.h"
+
+#include "url/url_constants.h"
+
+namespace mojo {
+
+// static
+base::StringPiece StructTraits<url::mojom::UrlDataView, GURL>::url(
+ const GURL& r) {
+ if (r.possibly_invalid_spec().length() > url::kMaxURLChars || !r.is_valid()) {
+ return base::StringPiece();
+ }
+
+ return base::StringPiece(r.possibly_invalid_spec().c_str(),
+ r.possibly_invalid_spec().length());
+}
+
+// static
+bool StructTraits<url::mojom::UrlDataView, GURL>::Read(
+ url::mojom::UrlDataView data,
+ GURL* out) {
+ base::StringPiece url_string;
+ if (!data.ReadUrl(&url_string))
+ return false;
+
+ if (url_string.length() > url::kMaxURLChars)
+ return false;
+
+ *out = GURL(url_string);
+ if (!url_string.empty() && !out->is_valid())
+ return false;
+
+ return true;
+}
+
+} // namespace mojo
diff --git a/url/mojom/url_gurl_mojom_traits.h b/url/mojom/url_gurl_mojom_traits.h
index 57a36c9..19ac049 100644
--- a/url/mojom/url_gurl_mojom_traits.h
+++ b/url/mojom/url_gurl_mojom_traits.h
@@ -1,42 +1,23 @@
-// Copyright 2016 The Chromium Authors. All rights reserved.
+// Copyright 2016 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef URL_MOJOM_URL_GURL_MOJOM_TRAITS_H_
#define URL_MOJOM_URL_GURL_MOJOM_TRAITS_H_
+#include "base/component_export.h"
#include "base/strings/string_piece.h"
+#include "mojo/public/cpp/bindings/struct_traits.h"
#include "url/gurl.h"
-#include "url/mojom/url.mojom.h"
-#include "url/url_constants.h"
+#include "url/mojom/url.mojom-shared.h"
namespace mojo {
template <>
-struct StructTraits<url::mojom::UrlDataView, GURL> {
- static base::StringPiece url(const GURL& r) {
- if (r.possibly_invalid_spec().length() > url::kMaxURLChars ||
- !r.is_valid()) {
- return base::StringPiece();
- }
-
- return base::StringPiece(r.possibly_invalid_spec().c_str(),
- r.possibly_invalid_spec().length());
- }
- static bool Read(url::mojom::UrlDataView data, GURL* out) {
- base::StringPiece url_string;
- if (!data.ReadUrl(&url_string))
- return false;
-
- if (url_string.length() > url::kMaxURLChars)
- return false;
-
- *out = GURL(url_string);
- if (!url_string.empty() && !out->is_valid())
- return false;
-
- return true;
- }
+struct COMPONENT_EXPORT(URL_MOJOM_TRAITS)
+ StructTraits<url::mojom::UrlDataView, GURL> {
+ static base::StringPiece url(const GURL& r);
+ static bool Read(url::mojom::UrlDataView data, GURL* out);
};
} // namespace mojo
diff --git a/url/mojom/url_gurl_mojom_traits_unittest.cc b/url/mojom/url_gurl_mojom_traits_unittest.cc
index e11d063..48968d2 100644
--- a/url/mojom/url_gurl_mojom_traits_unittest.cc
+++ b/url/mojom/url_gurl_mojom_traits_unittest.cc
@@ -1,13 +1,13 @@
-// Copyright 2016 The Chromium Authors. All rights reserved.
+// Copyright 2016 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <utility>
-#include "base/logging.h"
-#include "base/macros.h"
-#include "base/message_loop/message_loop.h"
-#include "mojo/public/cpp/bindings/binding.h"
+#include "base/test/task_environment.h"
+#include "mojo/public/cpp/bindings/pending_receiver.h"
+#include "mojo/public/cpp/bindings/receiver.h"
+#include "mojo/public/cpp/bindings/remote.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "url/mojom/url_test.mojom.h"
@@ -15,8 +15,8 @@
class UrlTestImpl : public mojom::UrlTest {
public:
- explicit UrlTestImpl(mojo::InterfaceRequest<mojom::UrlTest> request)
- : binding_(this, std::move(request)) {}
+ explicit UrlTestImpl(mojo::PendingReceiver<mojom::UrlTest> receiver)
+ : receiver_(this, std::move(receiver)) {}
// UrlTest:
void BounceUrl(const GURL& in, BounceUrlCallback callback) override {
@@ -28,24 +28,24 @@
}
private:
- mojo::Binding<UrlTest> binding_;
+ mojo::Receiver<UrlTest> receiver_;
};
-// Mojo version of chrome IPC test in url/ipc/url_param_traits_unittest.cc.
-TEST(MojoGURLStructTraitsTest, Basic) {
- base::MessageLoop message_loop;
+class MojoGURLStructTraitsTest : public ::testing::Test {
+ public:
+ MojoGURLStructTraitsTest()
+ : url_test_impl_(url_test_remote_.BindNewPipeAndPassReceiver()) {}
- mojom::UrlTestPtr proxy;
- UrlTestImpl impl(MakeRequest(&proxy));
-
- const char* serialize_cases[] = {
- "http://www.google.com/", "http://user:pass@host.com:888/foo;bar?baz#nop",
- };
-
- for (size_t i = 0; i < arraysize(serialize_cases); i++) {
- GURL input(serialize_cases[i]);
+ GURL BounceUrl(const GURL& input) {
GURL output;
- EXPECT_TRUE(proxy->BounceUrl(input, &output));
+ EXPECT_TRUE(url_test_remote_->BounceUrl(input, &output));
+ return output;
+ }
+
+ void ExpectSerializationRoundtrips(const GURL& input) {
+ SCOPED_TRACE(testing::Message()
+ << "Input GURL: " << input.possibly_invalid_spec());
+ GURL output = BounceUrl(input);
// We want to test each component individually to make sure its range was
// correctly serialized and deserialized, not just the spec.
@@ -61,33 +61,149 @@
EXPECT_EQ(input.ref(), output.ref());
}
- // Test an excessively long GURL.
- {
- const std::string url =
- std::string("http://example.org/").append(kMaxURLChars + 1, 'a');
- GURL input(url.c_str());
- GURL output;
- EXPECT_TRUE(proxy->BounceUrl(input, &output));
- EXPECT_TRUE(output.is_empty());
+ Origin BounceOrigin(const Origin& input) {
+ Origin output;
+ EXPECT_TRUE(url_test_remote_->BounceOrigin(input, &output));
+ return output;
}
- // Test basic Origin serialization.
- Origin non_unique = Origin::UnsafelyCreateOriginWithoutNormalization(
- "http", "www.google.com", 80);
- Origin output;
- EXPECT_TRUE(proxy->BounceOrigin(non_unique, &output));
- EXPECT_EQ(non_unique, output);
- EXPECT_FALSE(output.unique());
+ private:
+ base::test::SingleThreadTaskEnvironment task_environment;
+ mojo::Remote<mojom::UrlTest> url_test_remote_;
+ UrlTestImpl url_test_impl_;
+};
- Origin unique;
- EXPECT_TRUE(proxy->BounceOrigin(unique, &output));
- EXPECT_TRUE(output.unique());
+// Mojo version of chrome IPC test in url/ipc/url_param_traits_unittest.cc.
+TEST_F(MojoGURLStructTraitsTest, Basic) {
+ const char* serialize_cases[] = {
+ "http://www.google.com/",
+ "http://user:pass@host.com:888/foo;bar?baz#nop",
+ };
+
+ for (const char* test_input : serialize_cases) {
+ SCOPED_TRACE(testing::Message() << "Test input: " << test_input);
+ GURL input(test_input);
+ ExpectSerializationRoundtrips(input);
+ }
+}
+
+// Test of an excessively long GURL.
+TEST_F(MojoGURLStructTraitsTest, ExcessivelyLongUrl) {
+ const std::string url =
+ std::string("http://example.org/").append(kMaxURLChars + 1, 'a');
+ GURL input(url.c_str());
+ GURL output = BounceUrl(input);
+ EXPECT_TRUE(output.is_empty());
+}
+
+// Test for the GURL testcase based on https://crbug.com/1214098 (which in turn
+// was based on ContentSecurityPolicyBrowserTest.FileURLs).
+TEST_F(MojoGURLStructTraitsTest, WindowsDriveInPathReplacement) {
+ {
+ // #1: Try creating a file URL with a non-empty hostname.
+ GURL url_without_windows_drive_letter("file://hostname/");
+ EXPECT_EQ("/", url_without_windows_drive_letter.path());
+ EXPECT_EQ("hostname", url_without_windows_drive_letter.host());
+ ExpectSerializationRoundtrips(url_without_windows_drive_letter);
+ }
+
+ {
+ // #2: Use GURL::Replacement to create a GURL with 1) a path that starts
+ // with a Windows drive letter and 2) has a non-empty hostname (inherited
+ // from `url_without_windows_drive_letter` above). This used to not go
+ // through the DoParseUNC path that normally strips the hostname (for more
+ // details, see https://crbug.com/1214098#c4).
+ GURL::Replacements repl;
+ const std::string kNewPath = "/C:/dir/file.txt";
+ repl.SetPathStr(kNewPath);
+ GURL url_made_with_replace_components =
+ GURL("file://hostname/").ReplaceComponents(repl);
+
+ EXPECT_EQ(kNewPath, url_made_with_replace_components.path());
+ EXPECT_EQ("hostname", url_made_with_replace_components.host());
+ EXPECT_EQ("file://hostname/C:/dir/file.txt",
+ url_made_with_replace_components.spec());
+ // This is the MAIN VERIFICATION in this test. This used to fail on Windows,
+ // see https://crbug.com/1214098.
+ ExpectSerializationRoundtrips(url_made_with_replace_components);
+ }
+
+ {
+ // #3: Try to create a URL with a Windows drive letter and a non-empty
+ // hostname directly.
+ GURL url_created_directly("file://hostname/C:/dir/file.txt");
+ EXPECT_EQ("/C:/dir/file.txt", url_created_directly.path());
+ EXPECT_EQ("hostname", url_created_directly.host());
+ EXPECT_EQ("file://hostname/C:/dir/file.txt", url_created_directly.spec());
+ ExpectSerializationRoundtrips(url_created_directly);
+
+ // The URL created directly and the URL created through ReplaceComponents
+ // should be the same.
+ GURL::Replacements repl;
+ const std::string kNewPath = "/C:/dir/file.txt";
+ repl.SetPathStr(kNewPath);
+ GURL url_made_with_replace_components =
+ GURL("file://hostname/").ReplaceComponents(repl);
+ EXPECT_EQ(url_created_directly.spec(),
+ url_made_with_replace_components.spec());
+ }
+
+ {
+ // #4: Try to create a URL with a Windows drive letter and "localhost" as
+ // hostname directly.
+ GURL url_created_directly("file://localhost/C:/dir/file.txt");
+ EXPECT_EQ("/C:/dir/file.txt", url_created_directly.path());
+ EXPECT_EQ("", url_created_directly.host());
+ EXPECT_EQ("file:///C:/dir/file.txt", url_created_directly.spec());
+ ExpectSerializationRoundtrips(url_created_directly);
+
+ // The URL created directly and the URL created through ReplaceComponents
+ // should be the same.
+ GURL::Replacements repl;
+ const std::string kNewPath = "/C:/dir/file.txt";
+ repl.SetPathStr(kNewPath);
+ GURL url_made_with_replace_components =
+ GURL("file://localhost/").ReplaceComponents(repl);
+ EXPECT_EQ(url_created_directly.spec(),
+ url_made_with_replace_components.spec());
+ }
+}
+
+// Test of basic Origin serialization.
+TEST_F(MojoGURLStructTraitsTest, OriginSerialization) {
+ Origin non_unique = Origin::UnsafelyCreateTupleOriginWithoutNormalization(
+ "http", "www.google.com", 80)
+ .value();
+ Origin output = BounceOrigin(non_unique);
+ EXPECT_EQ(non_unique, output);
+ EXPECT_FALSE(output.opaque());
+
+ Origin unique1;
+ Origin unique2 = non_unique.DeriveNewOpaqueOrigin();
+ EXPECT_NE(unique1, unique2);
+ EXPECT_NE(unique2, unique1);
+ EXPECT_NE(unique2, non_unique);
+ output = BounceOrigin(unique1);
+ EXPECT_TRUE(output.opaque());
+ EXPECT_EQ(unique1, output);
+ Origin output2 = BounceOrigin(unique2);
+ EXPECT_EQ(unique2, output2);
+ EXPECT_NE(unique2, output);
+ EXPECT_NE(unique1, output2);
Origin normalized =
Origin::CreateFromNormalizedTuple("http", "www.google.com", 80);
- EXPECT_TRUE(proxy->BounceOrigin(normalized, &output));
+ EXPECT_EQ(normalized, non_unique);
+ output = BounceOrigin(normalized);
EXPECT_EQ(normalized, output);
- EXPECT_FALSE(output.unique());
+ EXPECT_EQ(non_unique, output);
+ EXPECT_FALSE(output.opaque());
+}
+
+// Test that the "kMaxURLChars" values are the same in url.mojom and
+// url_constants.cc.
+TEST_F(MojoGURLStructTraitsTest, TestMaxURLChars) {
+ EXPECT_EQ(kMaxURLChars, mojom::kMaxURLChars);
}
} // namespace url
diff --git a/url/mojom/url_test.mojom b/url/mojom/url_test.mojom
index 5e1a25c..4dc00de 100644
--- a/url/mojom/url_test.mojom
+++ b/url/mojom/url_test.mojom
@@ -1,4 +1,4 @@
-// Copyright 2016 The Chromium Authors. All rights reserved.
+// Copyright 2016 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
diff --git a/url/origin.cc b/url/origin.cc
index 487be18..38be245 100644
--- a/url/origin.cc
+++ b/url/origin.cc
@@ -1,26 +1,39 @@
-// Copyright 2015 The Chromium Authors. All rights reserved.
+// Copyright 2015 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "url/origin.h"
-#include <string.h>
+#include <stdint.h>
-#include "base/logging.h"
-#include "base/strings/string_number_conversions.h"
-#include "starboard/types.h"
+#include <algorithm>
+#include <ostream>
+#include <string>
+#include <tuple>
+#include <utility>
+
+#include "base/base64.h"
+#include "base/check.h"
+#include "base/check_op.h"
+#include "base/containers/contains.h"
+#include "base/containers/span.h"
+#include "base/debug/crash_logging.h"
+#include "base/pickle.h"
+#include "base/strings/strcat.h"
+#include "base/strings/string_piece.h"
+#include "base/trace_event/base_tracing.h"
+#include "base/unguessable_token.h"
#include "url/gurl.h"
-#include "url/url_canon.h"
-#include "url/url_canon_stdstring.h"
+#include "url/scheme_host_port.h"
#include "url/url_constants.h"
#include "url/url_util.h"
namespace url {
-Origin::Origin() {}
+Origin::Origin() : nonce_(Nonce()) {}
Origin Origin::Create(const GURL& url) {
- if (!url.is_valid() || (!url.IsStandard() && !url.SchemeIsBlob()))
+ if (!url.is_valid())
return Origin();
SchemeHostPort tuple;
@@ -35,71 +48,92 @@
tuple = SchemeHostPort(GURL(url.GetContent()));
} else {
tuple = SchemeHostPort(url);
+
+ // It's SchemeHostPort's responsibility to filter out unrecognized schemes;
+ // sanity check that this is happening.
+ DCHECK(!tuple.IsValid() || url.IsStandard() ||
+ base::Contains(GetLocalSchemes(), url.scheme_piece()) ||
+ AllowNonStandardSchemesForAndroidWebView());
}
- if (tuple.IsInvalid())
+ if (!tuple.IsValid())
return Origin();
-
return Origin(std::move(tuple));
}
-// Note: this is very similar to Create(const GURL&), but opaque origins are
-// created with CreateUniqueOpaque() rather than the default constructor.
-Origin Origin::CreateCanonical(const GURL& url) {
- if (!url.is_valid() || (!url.IsStandard() && !url.SchemeIsBlob()))
- return CreateUniqueOpaque();
-
- SchemeHostPort tuple;
-
- if (url.SchemeIsFileSystem()) {
- tuple = SchemeHostPort(*url.inner_url());
- } else if (url.SchemeIsBlob()) {
- // If we're dealing with a 'blob:' URL, https://url.spec.whatwg.org/#origin
- // defines the origin as the origin of the URL which results from parsing
- // the "path", which boils down to everything after the scheme. GURL's
- // 'GetContent()' gives us exactly that.
- tuple = SchemeHostPort(GURL(url.GetContent()));
- } else {
- tuple = SchemeHostPort(url);
- }
-
- if (tuple.IsInvalid())
- return CreateUniqueOpaque();
-
- return Origin(std::move(tuple));
+Origin Origin::Resolve(const GURL& url, const Origin& base_origin) {
+ if (url.SchemeIs(kAboutScheme) || url.is_empty())
+ return base_origin;
+ Origin result = Origin::Create(url);
+ if (!result.opaque())
+ return result;
+ return base_origin.DeriveNewOpaqueOrigin();
}
-Origin::Origin(const Origin& other) = default;
-Origin& Origin::operator=(const Origin& other) = default;
-Origin::Origin(Origin&& other) = default;
-Origin& Origin::operator=(Origin&& other) = default;
-
+Origin::Origin(const Origin&) = default;
+Origin& Origin::operator=(const Origin&) = default;
+Origin::Origin(Origin&&) noexcept = default;
+Origin& Origin::operator=(Origin&&) noexcept = default;
Origin::~Origin() = default;
// static
-Origin Origin::UnsafelyCreateOriginWithoutNormalization(
+absl::optional<Origin> Origin::UnsafelyCreateTupleOriginWithoutNormalization(
base::StringPiece scheme,
base::StringPiece host,
uint16_t port) {
- SchemeHostPort tuple(scheme.as_string(), host.as_string(), port,
+ SchemeHostPort tuple(std::string(scheme), std::string(host), port,
SchemeHostPort::CHECK_CANONICALIZATION);
- if (tuple.IsInvalid())
- return Origin();
+ if (!tuple.IsValid())
+ return absl::nullopt;
return Origin(std::move(tuple));
}
+// static
+absl::optional<Origin> Origin::UnsafelyCreateOpaqueOriginWithoutNormalization(
+ base::StringPiece precursor_scheme,
+ base::StringPiece precursor_host,
+ uint16_t precursor_port,
+ const Origin::Nonce& nonce) {
+ SchemeHostPort precursor(std::string(precursor_scheme),
+ std::string(precursor_host), precursor_port,
+ SchemeHostPort::CHECK_CANONICALIZATION);
+ // For opaque origins, it is okay for the SchemeHostPort to be invalid;
+ // however, this should only arise when the arguments indicate the
+ // canonical representation of the invalid SchemeHostPort.
+ if (!precursor.IsValid() &&
+ !(precursor_scheme.empty() && precursor_host.empty() &&
+ precursor_port == 0)) {
+ return absl::nullopt;
+ }
+ return Origin(std::move(nonce), std::move(precursor));
+}
+
+// static
Origin Origin::CreateFromNormalizedTuple(std::string scheme,
std::string host,
uint16_t port) {
SchemeHostPort tuple(std::move(scheme), std::move(host), port,
SchemeHostPort::ALREADY_CANONICALIZED);
- if (tuple.IsInvalid())
+ if (!tuple.IsValid())
return Origin();
return Origin(std::move(tuple));
}
+// static
+Origin Origin::CreateOpaqueFromNormalizedPrecursorTuple(
+ std::string precursor_scheme,
+ std::string precursor_host,
+ uint16_t precursor_port,
+ const Origin::Nonce& nonce) {
+ SchemeHostPort precursor(std::move(precursor_scheme),
+ std::move(precursor_host), precursor_port,
+ SchemeHostPort::ALREADY_CANONICALIZED);
+ // For opaque origins, it is okay for the SchemeHostPort to be invalid.
+ return Origin(std::move(nonce), std::move(precursor));
+}
+
std::string Origin::Serialize() const {
- if (unique())
+ if (opaque())
return "null";
if (scheme() == kFileScheme)
@@ -109,7 +143,7 @@
}
GURL Origin::GetURL() const {
- if (unique())
+ if (opaque())
return GURL();
if (scheme() == kFileScheme)
@@ -118,35 +152,331 @@
return tuple_.GetURL();
}
+const base::UnguessableToken* Origin::GetNonceForSerialization() const {
+ return nonce_ ? &nonce_->token() : nullptr;
+}
+
bool Origin::IsSameOriginWith(const Origin& other) const {
- return tuple_.Equals(other.tuple_) &&
- (!unique() || (nonce_ && nonce_ == other.nonce_));
+ // scheme/host/port must match, even for opaque origins where |tuple_| holds
+ // the precursor origin.
+ return std::tie(tuple_, nonce_) == std::tie(other.tuple_, other.nonce_);
+}
+
+bool Origin::IsSameOriginWith(const GURL& url) const {
+ if (opaque())
+ return false;
+
+ // The `url::Origin::Create` call here preserves how IsSameOriginWith was used
+ // historically, even though in some scenarios it is not clearly correct:
+ // - Origin of about:blank and about:srcdoc cannot be correctly
+ // computed/recovered.
+ // - Ideally passing an invalid `url` would be a caller error (e.g. a DCHECK).
+ // - The caller intent is not always clear wrt handling the outer-vs-inner
+ // origins/URLs in blob: and filesystem: schemes.
+ return IsSameOriginWith(url::Origin::Create(url));
+}
+
+bool Origin::CanBeDerivedFrom(const GURL& url) const {
+ DCHECK(url.is_valid());
+
+ // For "no access" schemes, blink's SecurityOrigin will always create an
+ // opaque unique one. However, about: scheme is also registered as such but
+ // does not behave this way, therefore exclude it from this check.
+ if (base::Contains(url::GetNoAccessSchemes(), url.scheme()) &&
+ !url.SchemeIs(kAboutScheme)) {
+ // If |this| is not opaque, definitely return false as the expectation
+ // is for opaque origin.
+ if (!opaque())
+ return false;
+
+ // And if it is unique opaque origin, it definitely is fine. But if there
+ // is a precursor stored, we should fall through to compare the tuples.
+ if (!tuple_.IsValid())
+ return true;
+ }
+
+ SchemeHostPort url_tuple;
+
+ // Optimization for the common, success case: Scheme/Host/Port match on the
+ // precursor, and the URL is standard. Opaqueness does not matter as a tuple
+ // origin can always create an opaque tuple origin.
+ if (url.IsStandard()) {
+ // Note: if extra copies of the scheme and host are undesirable, this check
+ // can be implemented using StringPiece comparisons, but it has to account
+ // explicitly checks on port numbers.
+ if (url.SchemeIsFileSystem()) {
+ url_tuple = SchemeHostPort(*url.inner_url());
+ } else {
+ url_tuple = SchemeHostPort(url);
+ }
+ return url_tuple == tuple_;
+
+ // Blob URLs still contain an inner origin, however it is not accessible
+ // through inner_url(), therefore it requires specific case to handle it.
+ } else if (url.SchemeIsBlob()) {
+ // If |this| doesn't contain any precursor information, it is an unique
+ // opaque origin. It is valid case, as any browser-initiated navigation
+ // to about:blank or data: URL will result in a document with such
+ // origin and it is valid for it to create blob: URLs.
+ if (!tuple_.IsValid())
+ return true;
+
+ url_tuple = SchemeHostPort(GURL(url.GetContent()));
+ return url_tuple == tuple_;
+ }
+
+ // At this point, the URL has non-standard scheme.
+ DCHECK(!url.IsStandard());
+
+ // All about: URLs (about:blank, about:srcdoc) inherit their origin from
+ // the context which navigated them, which means that they can be in any
+ // type of origin.
+ if (url.SchemeIs(kAboutScheme))
+ return true;
+
+ // All data: URLs commit in opaque origins, therefore |this| must be opaque
+ // if |url| has data: scheme.
+ if (url.SchemeIs(kDataScheme))
+ return opaque();
+
+ // If |this| does not have valid precursor tuple, it is unique opaque origin,
+ // which is what we expect non-standard schemes to get.
+ if (!tuple_.IsValid())
+ return true;
+
+ // However, when there is precursor present, the schemes must match.
+ return url.scheme() == tuple_.scheme();
}
bool Origin::DomainIs(base::StringPiece canonical_domain) const {
- return !unique() && url::DomainIs(tuple_.host(), canonical_domain);
+ return !opaque() && url::DomainIs(tuple_.host(), canonical_domain);
}
bool Origin::operator<(const Origin& other) const {
return std::tie(tuple_, nonce_) < std::tie(other.tuple_, other.nonce_);
}
-Origin Origin::CreateUniqueOpaque() {
- return Origin(ConstructAsOpaque::kTag);
+Origin Origin::DeriveNewOpaqueOrigin() const {
+ return Origin(Nonce(), tuple_);
}
-Origin::Origin(ConstructAsOpaque) : nonce_(base::UnguessableToken::Create()) {}
+std::string Origin::GetDebugString(bool include_nonce) const {
+ // Handle non-opaque origins first, as they are simpler.
+ if (!opaque()) {
+ std::string out = Serialize();
+ if (scheme() == kFileScheme)
+ base::StrAppend(&out, {" [internally: ", tuple_.Serialize(), "]"});
+ return out;
+ }
+
+ // For opaque origins, log the nonce and precursor as well. Without this,
+ // EXPECT_EQ failures between opaque origins are nearly impossible to
+ // understand.
+ std::string out = base::StrCat({Serialize(), " [internally:"});
+ if (include_nonce) {
+ out += " (";
+ if (nonce_->raw_token().is_empty())
+ out += "nonce TBD";
+ else
+ out += nonce_->raw_token().ToString();
+ out += ")";
+ }
+ if (!tuple_.IsValid())
+ base::StrAppend(&out, {" anonymous]"});
+ else
+ base::StrAppend(&out, {" derived from ", tuple_.Serialize(), "]"});
+ return out;
+}
Origin::Origin(SchemeHostPort tuple) : tuple_(std::move(tuple)) {
- DCHECK(!tuple_.IsInvalid());
+ DCHECK(!opaque());
+ DCHECK(tuple_.IsValid());
+}
+
+// Constructs an opaque origin derived from |precursor|.
+Origin::Origin(const Nonce& nonce, SchemeHostPort precursor)
+ : tuple_(std::move(precursor)), nonce_(std::move(nonce)) {
+ DCHECK(opaque());
+ // |precursor| is retained, but not accessible via scheme()/host()/port().
+ DCHECK_EQ("", scheme());
+ DCHECK_EQ("", host());
+ DCHECK_EQ(0U, port());
+}
+
+absl::optional<std::string> Origin::SerializeWithNonce() const {
+ return SerializeWithNonceImpl();
+}
+
+absl::optional<std::string> Origin::SerializeWithNonceAndInitIfNeeded() {
+ GetNonceForSerialization();
+ return SerializeWithNonceImpl();
+}
+
+// The pickle is saved in the following format, in order:
+// string - tuple_.GetURL().spec().
+// uint64_t (if opaque) - high bits of nonce if opaque. 0 if not initialized.
+// uint64_t (if opaque) - low bits of nonce if opaque. 0 if not initialized.
+absl::optional<std::string> Origin::SerializeWithNonceImpl() const {
+ if (!opaque() && !tuple_.IsValid())
+ return absl::nullopt;
+
+ base::Pickle pickle;
+ pickle.WriteString(tuple_.Serialize());
+ if (opaque() && !nonce_->raw_token().is_empty()) {
+ pickle.WriteUInt64(nonce_->token().GetHighForSerialization());
+ pickle.WriteUInt64(nonce_->token().GetLowForSerialization());
+ } else if (opaque()) {
+ // Nonce hasn't been initialized.
+ pickle.WriteUInt64(0);
+ pickle.WriteUInt64(0);
+ }
+
+ base::span<const uint8_t> data(static_cast<const uint8_t*>(pickle.data()),
+ pickle.size());
+ // Base64 encode the data to make it nicer to play with.
+ return base::Base64Encode(data);
+}
+
+// static
+absl::optional<Origin> Origin::Deserialize(const std::string& value) {
+ std::string data;
+ if (!base::Base64Decode(value, &data))
+ return absl::nullopt;
+ base::Pickle pickle(reinterpret_cast<char*>(&data[0]), data.size());
+ base::PickleIterator reader(pickle);
+
+ std::string pickled_url;
+ if (!reader.ReadString(&pickled_url))
+ return absl::nullopt;
+ GURL url(pickled_url);
+
+ // If only a tuple was serialized, then this origin is not opaque. For opaque
+ // origins, we expect two uint64's to be left in the pickle.
+ bool is_opaque = !reader.ReachedEnd();
+
+ // Opaque origins without a tuple are ok.
+ if (!is_opaque && !url.is_valid())
+ return absl::nullopt;
+ SchemeHostPort tuple(url);
+
+ // Possible successful early return if the pickled Origin was not opaque.
+ if (!is_opaque) {
+ Origin origin(tuple);
+ if (origin.opaque())
+ return absl::nullopt; // Something went horribly wrong.
+ return origin;
+ }
+
+ uint64_t nonce_high = 0;
+ if (!reader.ReadUInt64(&nonce_high))
+ return absl::nullopt;
+
+ uint64_t nonce_low = 0;
+ if (!reader.ReadUInt64(&nonce_low))
+ return absl::nullopt;
+
+ absl::optional<base::UnguessableToken> nonce_token =
+ base::UnguessableToken::Deserialize(nonce_high, nonce_low);
+
+ Origin::Nonce nonce;
+ if (nonce_token.has_value()) {
+ // The serialized nonce wasn't empty, so copy it here.
+ nonce = Origin::Nonce(nonce_token.value());
+ }
+ Origin origin;
+ origin.nonce_ = std::move(nonce);
+ origin.tuple_ = tuple;
+ return origin;
+}
+
+void Origin::WriteIntoTrace(perfetto::TracedValue context) const {
+ std::move(context).WriteString(GetDebugString());
}
std::ostream& operator<<(std::ostream& out, const url::Origin& origin) {
- return out << origin.Serialize();
+ out << origin.GetDebugString();
+ return out;
+}
+
+std::ostream& operator<<(std::ostream& out, const url::Origin::Nonce& nonce) {
+ // Subtle: don't let logging trigger lazy-generation of the token value.
+ if (nonce.raw_token().is_empty())
+ return (out << "(nonce TBD)");
+ else
+ return (out << nonce.raw_token());
}
bool IsSameOriginWith(const GURL& a, const GURL& b) {
return Origin::Create(a).IsSameOriginWith(Origin::Create(b));
}
+Origin::Nonce::Nonce() = default;
+Origin::Nonce::Nonce(const base::UnguessableToken& token) : token_(token) {
+ CHECK(!token_.is_empty());
+}
+
+const base::UnguessableToken& Origin::Nonce::token() const {
+ // Inspecting the value of a nonce triggers lazy-generation.
+ // TODO(dcheng): UnguessableToken::is_empty should go away -- what sentinel
+ // value to use instead?
+ if (token_.is_empty())
+ token_ = base::UnguessableToken::Create();
+ return token_;
+}
+
+const base::UnguessableToken& Origin::Nonce::raw_token() const {
+ return token_;
+}
+
+// Copying a Nonce triggers lazy-generation of the token.
+Origin::Nonce::Nonce(const Origin::Nonce& other) : token_(other.token()) {}
+
+Origin::Nonce& Origin::Nonce::operator=(const Origin::Nonce& other) {
+ // Copying a Nonce triggers lazy-generation of the token.
+ token_ = other.token();
+ return *this;
+}
+
+// Moving a nonce does NOT trigger lazy-generation of the token.
+Origin::Nonce::Nonce(Origin::Nonce&& other) noexcept : token_(other.token_) {
+ other.token_ = base::UnguessableToken(); // Reset |other|.
+}
+
+Origin::Nonce& Origin::Nonce::operator=(Origin::Nonce&& other) noexcept {
+ token_ = other.token_;
+ other.token_ = base::UnguessableToken(); // Reset |other|.
+ return *this;
+}
+
+bool Origin::Nonce::operator<(const Origin::Nonce& other) const {
+ // When comparing, lazy-generation is required of both tokens, so that an
+ // ordering is established.
+ return token() < other.token();
+}
+
+bool Origin::Nonce::operator==(const Origin::Nonce& other) const {
+ // Equality testing doesn't actually require that the tokens be generated.
+ // If the tokens are both zero, equality only holds if they're the same
+ // object.
+ return (other.token_ == token_) && !(token_.is_empty() && (&other != this));
+}
+
+bool Origin::Nonce::operator!=(const Origin::Nonce& other) const {
+ return !(*this == other);
+}
+
+namespace debug {
+
+ScopedOriginCrashKey::ScopedOriginCrashKey(
+ base::debug::CrashKeyString* crash_key,
+ const url::Origin* value)
+ : scoped_string_value_(
+ crash_key,
+ value ? value->GetDebugString(false /* include_nonce */)
+ : "nullptr") {}
+
+ScopedOriginCrashKey::~ScopedOriginCrashKey() = default;
+
+} // namespace debug
+
} // namespace url
diff --git a/url/origin.h b/url/origin.h
index 72fe276..a057533 100644
--- a/url/origin.h
+++ b/url/origin.h
@@ -1,33 +1,81 @@
-// Copyright 2015 The Chromium Authors. All rights reserved.
+// Copyright 2015 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef URL_ORIGIN_H_
#define URL_ORIGIN_H_
+#include <stdint.h>
+
+#include <memory>
#include <string>
+#include "base/component_export.h"
#include "base/debug/alias.h"
-#include "base/optional.h"
-#include "base/strings/string16.h"
-#include "base/strings/string_piece.h"
+#include "base/debug/crash_logging.h"
+#include "base/gtest_prod_util.h"
+#include "base/strings/string_piece_forward.h"
#include "base/strings/string_util.h"
+#include "base/trace_event/base_tracing_forward.h"
#include "base/unguessable_token.h"
-#include "starboard/types.h"
+#include "build/build_config.h"
+#include "build/buildflag.h"
+#include "third_party/abseil-cpp/absl/types/optional.h"
#include "url/scheme_host_port.h"
-#include "url/third_party/mozilla/url_parse.h"
-#include "url/url_canon.h"
-#include "url/url_constants.h"
-#include "url/url_export.h"
+
+#if BUILDFLAG(IS_ANDROID)
+#include <jni.h>
+
+namespace base {
+namespace android {
+template <typename>
+class ScopedJavaLocalRef;
+template <typename>
+class JavaRef;
+} // namespace android
+} // namespace base
+#endif // BUILDFLAG(IS_ANDROID)
class GURL;
+namespace blink {
+class SecurityOrigin;
+class SecurityOriginTest;
+class StorageKey;
+class StorageKeyTest;
+} // namespace blink
+
+namespace IPC {
+template <class P>
+struct ParamTraits;
+} // namespace IPC
+
+namespace ipc_fuzzer {
+template <class T>
+struct FuzzTraits;
+} // namespace ipc_fuzzer
+
+namespace mojo {
+template <typename DataViewType, typename T>
+struct StructTraits;
+struct UrlOriginAdapter;
+} // namespace mojo
+
+namespace net {
+class SchemefulSite;
+} // namespace net
+
namespace url {
+namespace mojom {
+class OriginDataView;
+} // namespace mojom
+
// Per https://html.spec.whatwg.org/multipage/origin.html#origin, an origin is
// either:
// - a tuple origin of (scheme, host, port) as described in RFC 6454.
-// - an opaque origin with an internal value
+// - an opaque origin with an internal value, and a memory of the tuple origin
+// from which it was derived.
//
// TL;DR: If you need to make a security-relevant decision, use 'url::Origin'.
// If you only need to extract the bits of a URL which are relevant for a
@@ -46,14 +94,13 @@
// between contexts. Two tuple origins are same-origin if the tuples are equal.
// A tuple origin may also be re-created from its serialization.
//
-// An opaque origin is cross-origin to any origin, including itself and copies
-// of itself. Unlike tuple origins, an opaque origin cannot be re-created from
-// its serialization, which is always the string "null".
-//
-// TODO(https://crbug.com/768460): work is in progress to associate an internal
-// globally unique identifier with an opaque origin: completing this work will
-// allow a copy of an opaque origin to be same-origin to the original instance
-// of that opaque origin.
+// An opaque origin has an internal globally unique identifier. When creating a
+// new opaque origin from a URL, a fresh globally unique identifier is
+// generated. However, if an opaque origin is copied or moved, the internal
+// globally unique identifier is preserved. Two opaque origins are same-origin
+// iff the globally unique identifiers match. Unlike tuple origins, an opaque
+// origin cannot be re-created from its serialization, which is always the
+// string "null".
//
// IMPORTANT: Since opaque origins always serialize as the string "null", it is
// *never* safe to use the serialization for security checks!
@@ -62,9 +109,7 @@
//
// There are a few subtleties to note:
//
-// * A default constructed Origin is opaque, but unlike the spec definition, has
-// no associated identifier. A default constructed Origin is cross-origin to
-// every other Origin object.
+// * A default constructed Origin is opaque, with no precursor origin.
//
// * Invalid and non-standard GURLs are parsed as opaque origins. This includes
// non-hierarchical URLs like 'data:text/html,...' and 'javascript:alert(1)'.
@@ -81,16 +126,9 @@
// * The host component of an IPv6 address includes brackets, just like the URL
// representation.
//
-// Usage:
-//
-// * Origins are generally constructed from an already-canonicalized GURL:
-//
-// GURL url("https://example.com/");
-// url::Origin origin = Origin::Create(url);
-// origin.scheme(); // "https"
-// origin.host(); // "example.com"
-// origin.port(); // 443
-// origin.unique(); // false
+// * Constructing origins from GURLs (or from SchemeHostPort) is typically a red
+// flag (this is true for `url::Origin::Create` but also to some extent for
+// `url::Origin::Resolve`). See docs/security/origin-vs-url.md for more.
//
// * To answer the question "Are |this| and |that| "same-origin" with each
// other?", use |Origin::IsSameOriginWith|:
@@ -98,39 +136,69 @@
// if (this.IsSameOriginWith(that)) {
// // Amazingness goes here.
// }
-class URL_EXPORT Origin {
+class COMPONENT_EXPORT(URL) Origin {
public:
- // Creates an opaque and always unique Origin. The returned Origin is
- // always cross-origin to any Origin, including itself.
+ // Creates an opaque Origin with a nonce that is different from all previously
+ // existing origins.
Origin();
- // Creates an Origin from |url|, as described at
- // https://url.spec.whatwg.org/#origin, with the following additions:
+ // WARNING: Converting an URL into an Origin is usually a red flag. See
+ // //docs/security/origin-vs-url.md for more details. Some discussion about
+ // deprecating the Create method can be found in https://crbug.com/1270878.
//
- // 1. If |url| is invalid or non-standard, an opaque Origin is constructed.
+ // Creates an Origin from `url`, as described at
+ // https://url.spec.whatwg.org/#origin, with the following additions:
+ // 1. If `url` is invalid or non-standard, an opaque Origin is constructed.
// 2. 'filesystem' URLs behave as 'blob' URLs (that is, the origin is parsed
// out of everything in the URL which follows the scheme).
// 3. 'file' URLs all parse as ("file", "", 0).
//
- // If this method returns an opaque Origin, the returned Origin will be
- // cross-origin to any Origin, including itself.
+ // WARNING: `url::Origin::Create(url)` can give unexpected results if:
+ // 1) `url` is "about:blank", or "about:srcdoc" (returning unique, opaque
+ // origin rather than the real origin of the frame)
+ // 2) `url` comes from a sandboxed frame (potentially returning a non-opaque
+ // origin, when an opaque one is needed; see also
+ // https://www.html5rocks.com/en/tutorials/security/sandboxed-iframes/)
+ // 3) Wrong `url` is used - e.g. in some navigations `base_url_for_data_url`
+ // might need to be used instead of relying on
+ // `content::NavigationHandle::GetURL`.
+ //
+ // WARNING: The returned Origin may have a different scheme and host from
+ // `url` (e.g. in case of blob URLs - see OriginTest.ConstructFromGURL).
+ //
+ // WARNING: data: URLs will be correctly be translated into opaque origins,
+ // but the precursor origin will be lost (unlike with `url::Origin::Resolve`).
static Origin Create(const GURL& url);
+ // Creates an Origin for the resource `url` as if it were requested
+ // from the context of `base_origin`. If `url` is standard
+ // (in the sense that it embeds a complete origin, like http/https),
+ // this returns the same value as would Create().
+ //
+ // If `url` is "about:blank" or "about:srcdoc", this returns a copy of
+ // `base_origin`.
+ //
+ // Otherwise, returns a new opaque origin derived from `base_origin`.
+ // In this case, the resulting opaque origin will inherit the tuple
+ // (or precursor tuple) of `base_origin`, but will not be same origin
+ // with `base_origin`, even if `base_origin` is already opaque.
+ static Origin Resolve(const GURL& url, const Origin& base_origin);
+
// Copyable and movable.
Origin(const Origin&);
Origin& operator=(const Origin&);
- Origin(Origin&&);
- Origin& operator=(Origin&&);
+ Origin(Origin&&) noexcept;
+ Origin& operator=(Origin&&) noexcept;
// Creates an Origin from a |scheme|, |host|, and |port|. All the parameters
- // must be valid and canonicalized. Do not use this method to create opaque
- // origins. Use Origin() or Origin::CreateOpaque() for that.
+ // must be valid and canonicalized. Returns nullopt if any parameter is not
+ // canonical, or if all the parameters are empty.
//
// This constructor should be used in order to pass 'Origin' objects back and
// forth over IPC (as transitioning through GURL would risk potentially
// dangerous recanonicalization); other potential callers should prefer the
// 'GURL'-based constructor.
- static Origin UnsafelyCreateOriginWithoutNormalization(
+ static absl::optional<Origin> UnsafelyCreateTupleOriginWithoutNormalization(
base::StringPiece scheme,
base::StringPiece host,
uint16_t port);
@@ -147,25 +215,61 @@
// For opaque origins, these return ("", "", 0).
const std::string& scheme() const {
- return !unique() ? tuple_.scheme() : base::EmptyString();
+ return !opaque() ? tuple_.scheme() : base::EmptyString();
}
const std::string& host() const {
- return !unique() ? tuple_.host() : base::EmptyString();
+ return !opaque() ? tuple_.host() : base::EmptyString();
}
- uint16_t port() const { return !unique() ? tuple_.port() : 0; }
+ uint16_t port() const { return !opaque() ? tuple_.port() : 0; }
- // TODO(dcheng): Rename this to opaque().
- bool unique() const { return tuple_.IsInvalid(); }
+ bool opaque() const { return nonce_.has_value(); }
// An ASCII serialization of the Origin as per Section 6.2 of RFC 6454, with
// the addition that all Origins with a 'file' scheme serialize to "file://".
std::string Serialize() const;
- // Two Origins are "same-origin" if their schemes, hosts, and ports are exact
- // matches; and neither is unique.
+ // Two non-opaque Origins are "same-origin" if their schemes, hosts, and ports
+ // are exact matches. Two opaque origins are same-origin only if their
+ // internal nonce values match. A non-opaque origin is never same-origin with
+ // an opaque origin.
bool IsSameOriginWith(const Origin& other) const;
- bool operator==(const Origin& other) const {
- return IsSameOriginWith(other);
+ bool operator==(const Origin& other) const { return IsSameOriginWith(other); }
+ bool operator!=(const Origin& other) const {
+ return !IsSameOriginWith(other);
+ }
+
+ // Non-opaque origin is "same-origin" with `url` if their schemes, hosts, and
+ // ports are exact matches. Opaque origin is never "same-origin" with any
+ // `url`. about:blank, about:srcdoc, and invalid GURLs are never
+ // "same-origin" with any origin. This method is a shorthand for
+ // `origin.IsSameOriginWith(url::Origin::Create(url))`.
+ //
+ // See also CanBeDerivedFrom.
+ bool IsSameOriginWith(const GURL& url) const;
+
+ // This method returns true for any |url| which if navigated to could result
+ // in an origin compatible with |this|.
+ bool CanBeDerivedFrom(const GURL& url) const;
+
+ // Get the scheme, host, and port from which this origin derives. For
+ // a tuple Origin, this gives the same values as calling scheme(), host()
+ // and port(). For an opaque Origin that was created by calling
+ // Origin::DeriveNewOpaqueOrigin() on a precursor or Origin::Resolve(),
+ // this returns the tuple inherited from the precursor.
+ //
+ // If this Origin is opaque and was created via the default constructor or
+ // Origin::Create(), the precursor origin is unknown.
+ //
+ // Use with great caution: opaque origins should generally not inherit
+ // privileges from the origins they derive from. However, in some cases
+ // (such as restrictions on process placement, or determining the http lock
+ // icon) this information may be relevant to ensure that entering an
+ // opaque origin does not grant privileges initially denied to the original
+ // non-opaque origin.
+ //
+ // This method has a deliberately obnoxious name to prompt caution in its use.
+ const SchemeHostPort& GetTupleOrPrecursorTupleIfOpaque() const {
+ return tuple_;
}
// Efficiently returns what GURL(Serialize()) would without re-parsing the
@@ -180,23 +284,21 @@
// URL (e.g. with a path component).
GURL GetURL() const;
- // Same as GURL::DomainIs. If |this| origin is unique, then returns false.
+ // Same as GURL::DomainIs. If |this| origin is opaque, then returns false.
bool DomainIs(base::StringPiece canonical_domain) const;
// Allows Origin to be used as a key in STL (for example, a std::set or
// std::map).
bool operator<(const Origin& other) const;
- private:
- friend class OriginTest;
-
// Creates a new opaque origin that is guaranteed to be cross-origin to all
// currently existing origins. An origin created by this method retains its
// identity across copies. Copies are guaranteed to be same-origin to each
// other, e.g.
//
- // url::Origin a = Origin::CreateUniqueOpaque();
- // url::Origin b = Origin::CreateUniqueOpaque();
+ // url::Origin page = Origin::Create(GURL("http://example.com"))
+ // url::Origin a = page.DeriveNewOpaqueOrigin();
+ // url::Origin b = page.DeriveNewOpaqueOrigin();
// url::Origin c = a;
// url::Origin d = b;
//
@@ -205,44 +307,190 @@
// of origins are considered cross-origin, e.g. |a| is cross-origin to |b| and
// |d|, |b| is cross-origin to |a| and |c|, |c| is cross-origin to |b| and
// |d|, and |d| is cross-origin to |a| and |c|.
+ Origin DeriveNewOpaqueOrigin() const;
+
+ // Creates a string representation of the object that can be used for logging
+ // and debugging. It serializes the internal state, such as the nonce value
+ // and precursor information.
+ std::string GetDebugString(bool include_nonce = true) const;
+
+#if BUILDFLAG(IS_ANDROID)
+ base::android::ScopedJavaLocalRef<jobject> CreateJavaObject() const;
+ static Origin FromJavaObject(
+ const base::android::JavaRef<jobject>& java_origin);
+ static jlong CreateNative(JNIEnv* env,
+ const base::android::JavaRef<jstring>& java_scheme,
+ const base::android::JavaRef<jstring>& java_host,
+ uint16_t port,
+ bool is_opaque,
+ uint64_t tokenHighBits,
+ uint64_t tokenLowBits);
+#endif // BUILDFLAG(IS_ANDROID)
+
+ void WriteIntoTrace(perfetto::TracedValue context) const;
+
+ private:
+ friend class blink::SecurityOrigin;
+ friend class blink::SecurityOriginTest;
+ friend class blink::StorageKey;
+ // SchemefulSite needs access to the serialization/deserialization logic which
+ // includes the nonce.
+ friend class net::SchemefulSite;
+ friend class OriginTest;
+ friend struct mojo::UrlOriginAdapter;
+ friend struct ipc_fuzzer::FuzzTraits<Origin>;
+ friend struct mojo::StructTraits<url::mojom::OriginDataView, url::Origin>;
+ friend IPC::ParamTraits<url::Origin>;
+ friend COMPONENT_EXPORT(URL) std::ostream& operator<<(std::ostream& out,
+ const Origin& origin);
+ friend class blink::StorageKeyTest;
+
+ // Origin::Nonce is a wrapper around base::UnguessableToken that generates
+ // the random value only when the value is first accessed. The lazy generation
+ // allows Origin to be default-constructed quickly, without spending time
+ // in random number generation.
//
- // Note that this is private internal helper, since relatively few locations
- // should be responsible for deriving a canonical origin from a GURL.
- static Origin CreateUniqueOpaque();
+ // TODO(nick): Should this optimization move into UnguessableToken, once it no
+ // longer treats the Null case specially?
+ class COMPONENT_EXPORT(URL) Nonce {
+ public:
+ // Creates a nonce to hold a newly-generated UnguessableToken. The actual
+ // token value will be generated lazily.
+ Nonce();
- // Similar to Create(const GURL&). However, if the returned Origin is an
- // opaque origin, it will be created with CreateUniqueOpaque(), have an
- // associated identity, and be considered same-origin to copies of itself.
- static Origin CreateCanonical(const GURL&);
+ // Creates a nonce to hold an already-generated UnguessableToken value. This
+ // constructor should only be used for IPC serialization and testing --
+ // regular code should never need to touch the UnguessableTokens directly,
+ // and the default constructor is faster.
+ explicit Nonce(const base::UnguessableToken& token);
- enum class ConstructAsOpaque { kTag };
- explicit Origin(ConstructAsOpaque);
+ // Accessor, which lazily initializes the underlying |token_| member.
+ const base::UnguessableToken& token() const;
- // |tuple| must be valid, implying that the created Origin is never an opaque
- // origin.
+ // Do not use in cases where lazy initialization is expected! This
+ // accessor does not initialize the |token_| member.
+ const base::UnguessableToken& raw_token() const;
+
+ // Copyable and movable. Copying a Nonce triggers lazy-initialization,
+ // moving it does not.
+ Nonce(const Nonce&);
+ Nonce& operator=(const Nonce&);
+ Nonce(Nonce&&) noexcept;
+ Nonce& operator=(Nonce&&) noexcept;
+
+ // Note that operator<, used by maps type containers, will trigger |token_|
+ // lazy-initialization. Equality comparisons do not.
+ bool operator<(const Nonce& other) const;
+ bool operator==(const Nonce& other) const;
+ bool operator!=(const Nonce& other) const;
+
+ private:
+ friend class OriginTest;
+
+ // mutable to support lazy generation.
+ mutable base::UnguessableToken token_;
+ };
+
+ // This needs to be friended within Origin as well, since Nonce is a private
+ // nested class of Origin.
+ friend COMPONENT_EXPORT(URL) std::ostream& operator<<(std::ostream& out,
+ const Nonce& nonce);
+
+ // Creates an origin without sanity checking that the host is canonicalized.
+ // This should only be used when converting between already normalized types,
+ // and should NOT be used for IPC. Method takes std::strings for use with move
+ // operators to avoid copies.
+ static Origin CreateOpaqueFromNormalizedPrecursorTuple(
+ std::string precursor_scheme,
+ std::string precursor_host,
+ uint16_t precursor_port,
+ const Nonce& nonce);
+
+ // Creates an opaque Origin with the identity given by |nonce|, and an
+ // optional precursor origin given by |precursor_scheme|, |precursor_host| and
+ // |precursor_port|. Returns nullopt if any parameter is not canonical. When
+ // the precursor is unknown, the precursor parameters should be ("", "", 0).
+ //
+ // This factory method should be used in order to pass opaque Origin objects
+ // back and forth over IPC (as transitioning through GURL would risk
+ // potentially dangerous recanonicalization).
+ static absl::optional<Origin> UnsafelyCreateOpaqueOriginWithoutNormalization(
+ base::StringPiece precursor_scheme,
+ base::StringPiece precursor_host,
+ uint16_t precursor_port,
+ const Nonce& nonce);
+
+ // Constructs a non-opaque tuple origin. |tuple| must be valid.
explicit Origin(SchemeHostPort tuple);
- // Helpers for managing union for destroy, copy, and move.
- // The tuple is used for tuple origins (e.g. https://example.com:80). This
- // is expected to be the common case. |IsInvalid()| will be true for opaque
- // origins.
+ // Constructs an opaque origin derived from the |precursor| tuple, with the
+ // given |nonce|.
+ Origin(const Nonce& nonce, SchemeHostPort precursor);
+
+ // Get the nonce associated with this origin, if it is opaque, or nullptr
+ // otherwise. This should be used only when trying to send an Origin across an
+ // IPC pipe.
+ const base::UnguessableToken* GetNonceForSerialization() const;
+
+ // Serializes this Origin, including its nonce if it is opaque. If an opaque
+ // origin's |tuple_| is invalid nullopt is returned. If the nonce is not
+ // initialized, a nonce of 0 is used. Use of this method should be limited as
+ // an opaque origin will never be matchable in future browser sessions.
+ absl::optional<std::string> SerializeWithNonce() const;
+
+ // Like SerializeWithNonce(), but forces |nonce_| to be initialized prior to
+ // serializing.
+ absl::optional<std::string> SerializeWithNonceAndInitIfNeeded();
+
+ absl::optional<std::string> SerializeWithNonceImpl() const;
+
+ // Deserializes an origin from |ToValueWithNonce|. Returns nullopt if the
+ // value was invalid in any way.
+ static absl::optional<Origin> Deserialize(const std::string& value);
+
+ // The tuple is used for both tuple origins (e.g. https://example.com:80), as
+ // well as for opaque origins, where it tracks the tuple origin from which
+ // the opaque origin was initially derived (we call this the "precursor"
+ // origin).
SchemeHostPort tuple_;
// The nonce is used for maintaining identity of an opaque origin. This
- // nonce is preserved when an opaque origin is copied or moved.
- base::Optional<base::UnguessableToken> nonce_;
+ // nonce is preserved when an opaque origin is copied or moved. An Origin
+ // is considered opaque if and only if |nonce_| holds a value.
+ absl::optional<Nonce> nonce_;
};
-URL_EXPORT std::ostream& operator<<(std::ostream& out, const Origin& origin);
+// Pretty-printers for logging. These expose the internal state of the nonce.
+COMPONENT_EXPORT(URL)
+std::ostream& operator<<(std::ostream& out, const Origin& origin);
+COMPONENT_EXPORT(URL)
+std::ostream& operator<<(std::ostream& out, const Origin::Nonce& origin);
-URL_EXPORT bool IsSameOriginWith(const GURL& a, const GURL& b);
+COMPONENT_EXPORT(URL) bool IsSameOriginWith(const GURL& a, const GURL& b);
-// DEBUG_ALIAS_FOR_ORIGIN(var_name, origin) copies |origin| into a new
-// stack-allocated variable named |<var_name>|. This helps ensure that the
-// value of |origin| gets preserved in crash dumps.
+// DEBUG_ALIAS_FOR_ORIGIN(var_name, origin) copies `origin` into a new
+// stack-allocated variable named `<var_name>`. This helps ensure that the
+// value of `origin` gets preserved in crash dumps.
#define DEBUG_ALIAS_FOR_ORIGIN(var_name, origin) \
DEBUG_ALIAS_FOR_CSTR(var_name, (origin).Serialize().c_str(), 128)
+namespace debug {
+
+class COMPONENT_EXPORT(URL) ScopedOriginCrashKey {
+ public:
+ ScopedOriginCrashKey(base::debug::CrashKeyString* crash_key,
+ const url::Origin* value);
+ ~ScopedOriginCrashKey();
+
+ ScopedOriginCrashKey(const ScopedOriginCrashKey&) = delete;
+ ScopedOriginCrashKey& operator=(const ScopedOriginCrashKey&) = delete;
+
+ private:
+ base::debug::ScopedCrashKeyString scoped_string_value_;
+};
+
+} // namespace debug
+
} // namespace url
#endif // URL_ORIGIN_H_
diff --git a/url/origin_abstract_tests.cc b/url/origin_abstract_tests.cc
new file mode 100644
index 0000000..1bc032e
--- /dev/null
+++ b/url/origin_abstract_tests.cc
@@ -0,0 +1,104 @@
+// Copyright 2021 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "url/origin_abstract_tests.h"
+
+namespace url {
+
+void ExpectParsedUrlsEqual(const GURL& a, const GURL& b) {
+ EXPECT_EQ(a, b);
+ const Parsed& a_parsed = a.parsed_for_possibly_invalid_spec();
+ const Parsed& b_parsed = b.parsed_for_possibly_invalid_spec();
+ EXPECT_EQ(a_parsed.scheme.begin, b_parsed.scheme.begin);
+ EXPECT_EQ(a_parsed.scheme.len, b_parsed.scheme.len);
+ EXPECT_EQ(a_parsed.username.begin, b_parsed.username.begin);
+ EXPECT_EQ(a_parsed.username.len, b_parsed.username.len);
+ EXPECT_EQ(a_parsed.password.begin, b_parsed.password.begin);
+ EXPECT_EQ(a_parsed.password.len, b_parsed.password.len);
+ EXPECT_EQ(a_parsed.host.begin, b_parsed.host.begin);
+ EXPECT_EQ(a_parsed.host.len, b_parsed.host.len);
+ EXPECT_EQ(a_parsed.port.begin, b_parsed.port.begin);
+ EXPECT_EQ(a_parsed.port.len, b_parsed.port.len);
+ EXPECT_EQ(a_parsed.path.begin, b_parsed.path.begin);
+ EXPECT_EQ(a_parsed.path.len, b_parsed.path.len);
+ EXPECT_EQ(a_parsed.query.begin, b_parsed.query.begin);
+ EXPECT_EQ(a_parsed.query.len, b_parsed.query.len);
+ EXPECT_EQ(a_parsed.ref.begin, b_parsed.ref.begin);
+ EXPECT_EQ(a_parsed.ref.len, b_parsed.ref.len);
+}
+
+// static
+Origin UrlOriginTestTraits::CreateOriginFromString(base::StringPiece s) {
+ return Origin::Create(GURL(s));
+}
+
+// static
+Origin UrlOriginTestTraits::CreateUniqueOpaqueOrigin() {
+ return Origin();
+}
+
+// static
+Origin UrlOriginTestTraits::CreateWithReferenceOrigin(
+ base::StringPiece url,
+ const Origin& reference_origin) {
+ return Origin::Resolve(GURL(url), reference_origin);
+}
+
+// static
+Origin UrlOriginTestTraits::DeriveNewOpaqueOrigin(
+ const Origin& reference_origin) {
+ return reference_origin.DeriveNewOpaqueOrigin();
+}
+
+// static
+bool UrlOriginTestTraits::IsOpaque(const Origin& origin) {
+ return origin.opaque();
+}
+
+// static
+std::string UrlOriginTestTraits::GetScheme(const Origin& origin) {
+ return origin.scheme();
+}
+
+// static
+std::string UrlOriginTestTraits::GetHost(const Origin& origin) {
+ return origin.host();
+}
+
+// static
+uint16_t UrlOriginTestTraits::GetPort(const Origin& origin) {
+ return origin.port();
+}
+
+// static
+SchemeHostPort UrlOriginTestTraits::GetTupleOrPrecursorTupleIfOpaque(
+ const Origin& origin) {
+ return origin.GetTupleOrPrecursorTupleIfOpaque();
+}
+
+// static
+bool UrlOriginTestTraits::IsSameOrigin(const Origin& a, const Origin& b) {
+ return a.IsSameOriginWith(b);
+}
+
+// static
+std::string UrlOriginTestTraits::Serialize(const Origin& origin) {
+ std::string serialized = origin.Serialize();
+
+ // Extra test assertion for GetURL (which doesn't have an equivalent in
+ // blink::SecurityOrigin).
+ ExpectParsedUrlsEqual(GURL(serialized), origin.GetURL());
+
+ return serialized;
+}
+
+// static
+bool UrlOriginTestTraits::IsValidUrl(base::StringPiece str) {
+ return GURL(str).is_valid();
+}
+
+// This is an abstract test suite which is instantiated by each implementation.
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AbstractOriginTest);
+
+} // namespace url
diff --git a/url/origin_abstract_tests.h b/url/origin_abstract_tests.h
new file mode 100644
index 0000000..63dded6
--- /dev/null
+++ b/url/origin_abstract_tests.h
@@ -0,0 +1,527 @@
+// Copyright 2020 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_ORIGIN_ABSTRACT_TESTS_H_
+#define URL_ORIGIN_ABSTRACT_TESTS_H_
+
+#include <string>
+#include <type_traits>
+
+#include "base/containers/contains.h"
+#include "base/strings/string_piece.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "url/gurl.h"
+#include "url/origin.h"
+#include "url/scheme_host_port.h"
+#include "url/url_util.h"
+
+namespace url {
+
+void ExpectParsedUrlsEqual(const GURL& a, const GURL& b);
+
+// AbstractOriginTest below abstracts away differences between url::Origin and
+// blink::SecurityOrigin by parametrizing the tests with a class that has to
+// expose the same public members as UrlOriginTestTraits below.
+class UrlOriginTestTraits {
+ public:
+ using OriginType = Origin;
+
+ // Constructing an origin.
+ static OriginType CreateOriginFromString(base::StringPiece s);
+ static OriginType CreateUniqueOpaqueOrigin();
+ static OriginType CreateWithReferenceOrigin(
+ base::StringPiece url,
+ const OriginType& reference_origin);
+ static OriginType DeriveNewOpaqueOrigin(const OriginType& reference_origin);
+
+ // Accessors for origin properties.
+ static bool IsOpaque(const OriginType& origin);
+ static std::string GetScheme(const OriginType& origin);
+ static std::string GetHost(const OriginType& origin);
+ static uint16_t GetPort(const OriginType& origin);
+ static SchemeHostPort GetTupleOrPrecursorTupleIfOpaque(
+ const OriginType& origin);
+
+ // Wrappers for other instance methods of OriginType.
+ static bool IsSameOrigin(const OriginType& a, const OriginType& b);
+ static std::string Serialize(const OriginType& origin);
+
+ // "Accessors" of URL properties.
+ //
+ // TODO(lukasza): Consider merging together OriginTraitsBase here and
+ // UrlTraitsBase in //url/gurl_abstract_tests.h.
+ static bool IsValidUrl(base::StringPiece str);
+
+ // Only static members = no constructors are needed.
+ UrlOriginTestTraits() = delete;
+};
+
+// Test suite for tests that cover both url::Origin and blink::SecurityOrigin.
+template <typename TOriginTraits>
+class AbstractOriginTest : public testing::Test {
+ public:
+ void SetUp() override {
+ const char* kSchemesToRegister[] = {
+ "noaccess",
+ "std-with-host",
+ "noaccess-std-with-host",
+ "local",
+ "local-noaccess",
+ "local-std-with-host",
+ "local-noaccess-std-with-host",
+ "also-local",
+ "sec",
+ "sec-std-with-host",
+ "sec-noaccess",
+ };
+ for (const char* kScheme : kSchemesToRegister) {
+ std::string scheme(kScheme);
+ if (base::Contains(scheme, "noaccess"))
+ AddNoAccessScheme(kScheme);
+ if (base::Contains(scheme, "std-with-host"))
+ AddStandardScheme(kScheme, SchemeType::SCHEME_WITH_HOST);
+ if (base::Contains(scheme, "local"))
+ AddLocalScheme(kScheme);
+ if (base::Contains(scheme, "sec"))
+ AddSecureScheme(kScheme);
+ }
+ }
+
+ protected:
+ // Wrappers that help ellide away TOriginTraits.
+ //
+ // Note that calling the wrappers needs to be prefixed with `this->...` to
+ // avoid hitting: explicit qualification required to use member 'IsOpaque'
+ // from dependent base class.
+ using OriginType = typename TOriginTraits::OriginType;
+ OriginType CreateOriginFromString(base::StringPiece s) {
+ return TOriginTraits::CreateOriginFromString(s);
+ }
+ OriginType CreateUniqueOpaqueOrigin() {
+ return TOriginTraits::CreateUniqueOpaqueOrigin();
+ }
+ OriginType CreateWithReferenceOrigin(base::StringPiece url,
+ const OriginType& reference_origin) {
+ return TOriginTraits::CreateWithReferenceOrigin(url, reference_origin);
+ }
+ OriginType DeriveNewOpaqueOrigin(const OriginType& reference_origin) {
+ return TOriginTraits::DeriveNewOpaqueOrigin(reference_origin);
+ }
+ bool IsOpaque(const OriginType& origin) {
+ return TOriginTraits::IsOpaque(origin);
+ }
+ std::string GetScheme(const OriginType& origin) {
+ return TOriginTraits::GetScheme(origin);
+ }
+ std::string GetHost(const OriginType& origin) {
+ return TOriginTraits::GetHost(origin);
+ }
+ uint16_t GetPort(const OriginType& origin) {
+ return TOriginTraits::GetPort(origin);
+ }
+ SchemeHostPort GetTupleOrPrecursorTupleIfOpaque(const OriginType& origin) {
+ return TOriginTraits::GetTupleOrPrecursorTupleIfOpaque(origin);
+ }
+ bool IsSameOrigin(const OriginType& a, const OriginType& b) {
+ bool is_a_same_with_b = TOriginTraits::IsSameOrigin(a, b);
+ bool is_b_same_with_a = TOriginTraits::IsSameOrigin(b, a);
+ EXPECT_EQ(is_a_same_with_b, is_b_same_with_a);
+ return is_a_same_with_b;
+ }
+ std::string Serialize(const OriginType& origin) {
+ return TOriginTraits::Serialize(origin);
+ }
+ bool IsValidUrl(base::StringPiece str) {
+ return TOriginTraits::IsValidUrl(str);
+ }
+
+#define EXPECT_SAME_ORIGIN(a, b) \
+ EXPECT_TRUE(this->IsSameOrigin((a), (b))) \
+ << "When checking if \"" << this->Serialize(a) << "\" is " \
+ << "same-origin with \"" << this->Serialize(b) << "\""
+
+#define EXPECT_CROSS_ORIGIN(a, b) \
+ EXPECT_FALSE(this->IsSameOrigin((a), (b))) \
+ << "When checking if \"" << this->Serialize(a) << "\" is " \
+ << "cross-origin from \"" << this->Serialize(b) << "\""
+
+ void VerifyOriginInvariants(const OriginType& origin) {
+ // An origin is always same-origin with itself.
+ EXPECT_SAME_ORIGIN(origin, origin);
+
+ // A copy of |origin| should be same-origin as well.
+ auto origin_copy = origin;
+ EXPECT_EQ(this->GetScheme(origin), this->GetScheme(origin_copy));
+ EXPECT_EQ(this->GetHost(origin), this->GetHost(origin_copy));
+ EXPECT_EQ(this->GetPort(origin), this->GetPort(origin_copy));
+ EXPECT_EQ(this->IsOpaque(origin), this->IsOpaque(origin_copy));
+ EXPECT_SAME_ORIGIN(origin, origin_copy);
+
+ // An origin is always cross-origin from another, unique, opaque origin.
+ EXPECT_CROSS_ORIGIN(origin, this->CreateUniqueOpaqueOrigin());
+
+ // An origin is always cross-origin from another tuple origin.
+ auto different_tuple_origin =
+ this->CreateOriginFromString("https://not-in-the-list.test/");
+ EXPECT_CROSS_ORIGIN(origin, different_tuple_origin);
+
+ // Deriving an origin for "about:blank".
+ auto about_blank_origin1 =
+ this->CreateWithReferenceOrigin("about:blank", origin);
+ auto about_blank_origin2 =
+ this->CreateWithReferenceOrigin("about:blank?bar#foo", origin);
+ EXPECT_SAME_ORIGIN(origin, about_blank_origin1);
+ EXPECT_SAME_ORIGIN(origin, about_blank_origin2);
+
+ // Derived opaque origins.
+ std::vector<OriginType> derived_origins = {
+ this->DeriveNewOpaqueOrigin(origin),
+ this->CreateWithReferenceOrigin("data:text/html,baz", origin),
+ this->DeriveNewOpaqueOrigin(about_blank_origin1),
+ };
+ for (size_t i = 0; i < derived_origins.size(); i++) {
+ SCOPED_TRACE(testing::Message() << "Derived origin #" << i);
+ const OriginType& derived_origin = derived_origins[i];
+ EXPECT_TRUE(this->IsOpaque(derived_origin));
+ EXPECT_SAME_ORIGIN(derived_origin, derived_origin);
+ EXPECT_CROSS_ORIGIN(origin, derived_origin);
+ EXPECT_EQ(this->GetTupleOrPrecursorTupleIfOpaque(origin),
+ this->GetTupleOrPrecursorTupleIfOpaque(derived_origin));
+ }
+ }
+
+ void VerifyUniqueOpaqueOriginInvariants(const OriginType& origin) {
+ if (!this->IsOpaque(origin)) {
+ ADD_FAILURE() << "Got unexpectedly non-opaque origin: "
+ << this->Serialize(origin);
+ return; // Skip other test assertions.
+ }
+
+ // Opaque origins should have an "empty" scheme, host and port.
+ EXPECT_EQ("", this->GetScheme(origin));
+ EXPECT_EQ("", this->GetHost(origin));
+ EXPECT_EQ(0, this->GetPort(origin));
+
+ // Unique opaque origins should have an empty precursor tuple.
+ EXPECT_EQ(SchemeHostPort(), this->GetTupleOrPrecursorTupleIfOpaque(origin));
+
+ // Serialization test.
+ EXPECT_EQ("null", this->Serialize(origin));
+
+ // Invariants that should hold for any origin.
+ VerifyOriginInvariants(origin);
+ }
+
+ void TestUniqueOpaqueOrigin(base::StringPiece test_input) {
+ auto origin = this->CreateOriginFromString(test_input);
+ this->VerifyUniqueOpaqueOriginInvariants(origin);
+
+ // Re-creating from the URL should be cross-origin.
+ auto origin_recreated_from_same_input =
+ this->CreateOriginFromString(test_input);
+ EXPECT_CROSS_ORIGIN(origin, origin_recreated_from_same_input);
+ }
+
+ void VerifyTupleOriginInvariants(const OriginType& origin,
+ const SchemeHostPort& expected_tuple) {
+ if (this->IsOpaque(origin)) {
+ ADD_FAILURE() << "Got unexpectedly opaque origin";
+ return; // Skip other test assertions.
+ }
+ SCOPED_TRACE(testing::Message()
+ << "Actual origin: " << this->Serialize(origin));
+
+ // Compare `origin` against the `expected_tuple`.
+ EXPECT_EQ(expected_tuple.scheme(), this->GetScheme(origin));
+ EXPECT_EQ(expected_tuple.host(), this->GetHost(origin));
+ EXPECT_EQ(expected_tuple.port(), this->GetPort(origin));
+ EXPECT_EQ(expected_tuple, this->GetTupleOrPrecursorTupleIfOpaque(origin));
+
+ // Serialization test.
+ //
+ // TODO(lukasza): Consider preserving the hostname when serializing file:
+ // URLs. Dropping the hostname seems incompatible with section 6 of
+ // rfc6454. Even though section 4 says that "the implementation MAY
+ // return an implementation-defined value", it seems that Chromium
+ // implementation *does* include the hostname in the origin SchemeHostPort
+ // tuple.
+ if (expected_tuple.scheme() != kFileScheme || expected_tuple.host() == "") {
+ EXPECT_SAME_ORIGIN(origin,
+ this->CreateOriginFromString(this->Serialize(origin)));
+ }
+
+ // Invariants that should hold for any origin.
+ VerifyOriginInvariants(origin);
+ }
+
+ private:
+ ScopedSchemeRegistryForTests scoped_scheme_registry_;
+};
+
+TYPED_TEST_SUITE_P(AbstractOriginTest);
+
+TYPED_TEST_P(AbstractOriginTest, NonStandardSchemeWithAndroidWebViewHack) {
+ EnableNonStandardSchemesForAndroidWebView();
+
+ // Regression test for https://crbug.com/896059.
+ auto origin = this->CreateOriginFromString("unknown-scheme://");
+ EXPECT_FALSE(this->IsOpaque(origin));
+ EXPECT_EQ("unknown-scheme", this->GetScheme(origin));
+ EXPECT_EQ("", this->GetHost(origin));
+ EXPECT_EQ(0, this->GetPort(origin));
+
+ // about:blank translates into an opaque origin, even in presence of
+ // EnableNonStandardSchemesForAndroidWebView.
+ origin = this->CreateOriginFromString("about:blank");
+ EXPECT_TRUE(this->IsOpaque(origin));
+}
+
+TYPED_TEST_P(AbstractOriginTest, OpaqueOriginsFromValidUrls) {
+ const char* kTestCases[] = {
+ // Built-in noaccess schemes.
+ "data:text/html,Hello!",
+ "javascript:alert(1)",
+ "about:blank",
+
+ // Opaque blob URLs.
+ "blob:null/foo", // blob:null (actually a valid URL)
+ "blob:data:foo", // blob + data (which is nonstandard)
+ "blob:about://blank/", // blob + about (which is nonstandard)
+ "blob:about:blank/", // blob + about (which is nonstandard)
+ "blob:blob:http://www.example.com/guid-goes-here",
+ "blob:filesystem:ws:b/.",
+ "blob:filesystem:ftp://a/b",
+ "blob:blob:file://localhost/foo/bar",
+ };
+
+ for (const char* test_input : kTestCases) {
+ SCOPED_TRACE(testing::Message() << "Test input: " << test_input);
+
+ // Verify that `origin` is opaque not just because `test_input` results is
+ // an invalid URL (because of a typo in the scheme name, or because of a
+ // technicality like having no host in a noaccess-std-with-host: scheme).
+ EXPECT_TRUE(this->IsValidUrl(test_input));
+
+ this->TestUniqueOpaqueOrigin(test_input);
+ }
+}
+
+TYPED_TEST_P(AbstractOriginTest, OpaqueOriginsFromInvalidUrls) {
+ // TODO(lukasza): Consider moving those to GURL/KURL tests that verify what
+ // inputs are parsed as an invalid URL.
+
+ const char* kTestCases[] = {
+ // Invalid file: URLs.
+ "file://example.com:443/etc/passwd", // No port expected.
+
+ // Invalid HTTP URLs.
+ "http",
+ "http:",
+ "http:/",
+ "http://",
+ "http://:",
+ "http://:1",
+ "http::///invalid.example.com/",
+ "http://example.com:65536/", // Port out of range.
+ "http://example.com:-1/", // Port out of range.
+ "http://example.com:18446744073709551616/", // Port = 2^64.
+ "http://example.com:18446744073709551616999/", // Lots of port digits.
+
+ // Invalid filesystem URLs.
+ "filesystem:http://example.com/", // Missing /type/.
+ "filesystem:local:baz./type/",
+ "filesystem:local://hostname/type/",
+ "filesystem:unknown-scheme://hostname/type/",
+ "filesystem:filesystem:http://example.org:88/foo/bar",
+
+ // Invalid IP addresses
+ "http://[]/",
+ "http://[2001:0db8:0000:0000:0000:0000:0000:0000:0001]/", // 9 groups.
+
+ // Unknown scheme without a colon character (":") gives an invalid URL.
+ "unknown-scheme",
+
+ // Standard schemes require a hostname (and result in an opaque origin if
+ // the hostname is missing).
+ "local-std-with-host:",
+ "noaccess-std-with-host:",
+ };
+
+ for (const char* test_input : kTestCases) {
+ SCOPED_TRACE(testing::Message() << "Test input: " << test_input);
+
+ // All testcases here are expected to represent invalid URLs.
+ // an invalid URL (because of a type in scheme name, or because of a
+ // technicality like having no host in a noaccess-std-with-host: scheme).
+ EXPECT_FALSE(this->IsValidUrl(test_input));
+
+ // Invalid URLs should always result in an opaque origin.
+ this->TestUniqueOpaqueOrigin(test_input);
+ }
+}
+
+TYPED_TEST_P(AbstractOriginTest, TupleOrigins) {
+ struct TestCase {
+ const char* input;
+ SchemeHostPort expected_tuple;
+ } kTestCases[] = {
+ // file: URLs
+ {"file:///etc/passwd", {"file", "", 0}},
+ {"file://example.com/etc/passwd", {"file", "example.com", 0}},
+ {"file:///", {"file", "", 0}},
+ {"file://hostname/C:/dir/file.txt", {"file", "hostname", 0}},
+
+ // HTTP URLs
+ {"http://example.com/", {"http", "example.com", 80}},
+ {"http://example.com:80/", {"http", "example.com", 80}},
+ {"http://example.com:123/", {"http", "example.com", 123}},
+ {"http://example.com:0/", {"http", "example.com", 0}},
+ {"http://example.com:65535/", {"http", "example.com", 65535}},
+ {"https://example.com/", {"https", "example.com", 443}},
+ {"https://example.com:443/", {"https", "example.com", 443}},
+ {"https://example.com:123/", {"https", "example.com", 123}},
+ {"https://example.com:0/", {"https", "example.com", 0}},
+ {"https://example.com:65535/", {"https", "example.com", 65535}},
+ {"http://user:pass@example.com/", {"http", "example.com", 80}},
+ {"http://example.com:123/?query", {"http", "example.com", 123}},
+ {"https://example.com/#1234", {"https", "example.com", 443}},
+ {"https://u:p@example.com:123/?query#1234",
+ {"https", "example.com", 123}},
+ {"http://example/", {"http", "example", 80}},
+
+ // Blob URLs.
+ {"blob:http://example.com/guid-goes-here", {"http", "example.com", 80}},
+ {"blob:http://example.com:123/guid-goes-here",
+ {"http", "example.com", 123}},
+ {"blob:https://example.com/guid-goes-here",
+ {"https", "example.com", 443}},
+ {"blob:http://u:p@example.com/guid-goes-here",
+ {"http", "example.com", 80}},
+
+ // Filesystem URLs.
+ {"filesystem:http://example.com/type/", {"http", "example.com", 80}},
+ {"filesystem:http://example.com:123/type/", {"http", "example.com", 123}},
+ {"filesystem:https://example.com/type/", {"https", "example.com", 443}},
+ {"filesystem:https://example.com:123/type/",
+ {"https", "example.com", 123}},
+ {"filesystem:local-std-with-host:baz./type/",
+ {"local-std-with-host", "baz.", 0}},
+
+ // IP Addresses
+ {"http://192.168.9.1/", {"http", "192.168.9.1", 80}},
+ {"http://[2001:db8::1]/", {"http", "[2001:db8::1]", 80}},
+ {"http://[2001:0db8:0000:0000:0000:0000:0000:0001]/",
+ {"http", "[2001:db8::1]", 80}},
+ {"http://1/", {"http", "0.0.0.1", 80}},
+ {"http://1:1/", {"http", "0.0.0.1", 1}},
+ {"http://3232237825/", {"http", "192.168.9.1", 80}},
+
+ // Punycode
+ {"http://☃.net/", {"http", "xn--n3h.net", 80}},
+ {"blob:http://☃.net/", {"http", "xn--n3h.net", 80}},
+ {"local-std-with-host:↑↑↓↓←→←→ba.↑↑↓↓←→←→ba.0.bg",
+ {"local-std-with-host", "xn--ba-rzuadaibfa.xn--ba-rzuadaibfa.0.bg", 0}},
+
+ // Registered URLs
+ {"ftp://example.com/", {"ftp", "example.com", 21}},
+ {"ws://example.com/", {"ws", "example.com", 80}},
+ {"wss://example.com/", {"wss", "example.com", 443}},
+ {"wss://user:pass@example.com/", {"wss", "example.com", 443}},
+ };
+
+ for (const TestCase& test : kTestCases) {
+ SCOPED_TRACE(testing::Message() << "Test input: " << test.input);
+
+ // Only valid URLs should translate into valid, non-opaque origins.
+ EXPECT_TRUE(this->IsValidUrl(test.input));
+
+ auto origin = this->CreateOriginFromString(test.input);
+ this->VerifyTupleOriginInvariants(origin, test.expected_tuple);
+ }
+}
+
+TYPED_TEST_P(AbstractOriginTest, CustomSchemes_OpaqueOrigins) {
+ const char* kTestCases[] = {
+ // Unknown scheme
+ "unknown-scheme:foo",
+ "unknown-scheme://bar",
+
+ // Unknown scheme that is a prefix or suffix of a registered scheme.
+ "loca:foo",
+ "ocal:foo",
+ "local-suffix:foo",
+ "prefix-local:foo",
+
+ // Custom no-access schemes translate into an opaque origin (just like the
+ // built-in no-access schemes such as about:blank or data:).
+ "noaccess-std-with-host:foo",
+ "noaccess-std-with-host://bar",
+ "noaccess://host",
+ "local-noaccess://host",
+ "local-noaccess-std-with-host://host",
+ };
+
+ for (const char* test_input : kTestCases) {
+ SCOPED_TRACE(testing::Message() << "Test input: " << test_input);
+
+ // Verify that `origin` is opaque not just because `test_input` results is
+ // an invalid URL (because of a typo in the scheme name, or because of a
+ // technicality like having no host in a noaccess-std-with-host: scheme).
+ EXPECT_TRUE(this->IsValidUrl(test_input));
+
+ this->TestUniqueOpaqueOrigin(test_input);
+ }
+}
+
+TYPED_TEST_P(AbstractOriginTest, CustomSchemes_TupleOrigins) {
+ struct TestCase {
+ const char* input;
+ SchemeHostPort expected_tuple;
+ } kTestCases[] = {
+ // Scheme (registered in SetUp()) that's both local and standard.
+ // TODO: Is it really appropriate to do network-host canonicalization of
+ // schemes without ports?
+ {"local-std-with-host:20", {"local-std-with-host", "0.0.0.20", 0}},
+ {"local-std-with-host:20.", {"local-std-with-host", "0.0.0.20", 0}},
+ {"local-std-with-host:foo", {"local-std-with-host", "foo", 0}},
+ {"local-std-with-host://bar:20", {"local-std-with-host", "bar", 0}},
+ {"local-std-with-host:baz.", {"local-std-with-host", "baz.", 0}},
+ {"local-std-with-host:baz..", {"local-std-with-host", "baz..", 0}},
+ {"local-std-with-host:baz..bar", {"local-std-with-host", "baz..bar", 0}},
+ {"local-std-with-host:baz...", {"local-std-with-host", "baz...", 0}},
+
+ // Scheme (registered in SetUp()) that's local but nonstandard. These
+ // always have empty hostnames, but are allowed to be url::Origins.
+ {"local:", {"local", "", 0}},
+ {"local:foo", {"local", "", 0}},
+ {"local://bar", {"local", "", 0}},
+ {"also-local://bar", {"also-local", "", 0}},
+
+ {"std-with-host://host", {"std-with-host", "host", 0}},
+ {"local://host", {"local", "", 0}},
+ {"local-std-with-host://host", {"local-std-with-host", "host", 0}},
+ };
+
+ for (const TestCase& test : kTestCases) {
+ SCOPED_TRACE(testing::Message() << "Test input: " << test.input);
+
+ // Only valid URLs should translate into valid, non-opaque origins.
+ EXPECT_TRUE(this->IsValidUrl(test.input));
+
+ auto origin = this->CreateOriginFromString(test.input);
+ this->VerifyTupleOriginInvariants(origin, test.expected_tuple);
+ }
+}
+
+REGISTER_TYPED_TEST_SUITE_P(AbstractOriginTest,
+ NonStandardSchemeWithAndroidWebViewHack,
+ OpaqueOriginsFromValidUrls,
+ OpaqueOriginsFromInvalidUrls,
+ TupleOrigins,
+ CustomSchemes_OpaqueOrigins,
+ CustomSchemes_TupleOrigins);
+
+} // namespace url
+
+#endif // URL_ORIGIN_ABSTRACT_TESTS_H_
diff --git a/url/origin_unittest.cc b/url/origin_unittest.cc
index 99f723f..47cca81 100644
--- a/url/origin_unittest.cc
+++ b/url/origin_unittest.cc
@@ -1,132 +1,182 @@
-// Copyright 2015 The Chromium Authors. All rights reserved.
+// Copyright 2015 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#include "base/logging.h"
-#include "base/macros.h"
-#include "starboard/types.h"
+#include <stddef.h>
+#include <stdint.h>
+
+#include "base/memory/raw_ptr.h"
+#include "testing/gmock/include/gmock/gmock.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "url/gurl.h"
#include "url/origin.h"
+#include "url/origin_abstract_tests.h"
+#include "url/url_util.h"
namespace url {
-void ExpectParsedUrlsEqual(const GURL& a, const GURL& b) {
- EXPECT_EQ(a, b);
- const Parsed& a_parsed = a.parsed_for_possibly_invalid_spec();
- const Parsed& b_parsed = b.parsed_for_possibly_invalid_spec();
- EXPECT_EQ(a_parsed.scheme.begin, b_parsed.scheme.begin);
- EXPECT_EQ(a_parsed.scheme.len, b_parsed.scheme.len);
- EXPECT_EQ(a_parsed.username.begin, b_parsed.username.begin);
- EXPECT_EQ(a_parsed.username.len, b_parsed.username.len);
- EXPECT_EQ(a_parsed.password.begin, b_parsed.password.begin);
- EXPECT_EQ(a_parsed.password.len, b_parsed.password.len);
- EXPECT_EQ(a_parsed.host.begin, b_parsed.host.begin);
- EXPECT_EQ(a_parsed.host.len, b_parsed.host.len);
- EXPECT_EQ(a_parsed.port.begin, b_parsed.port.begin);
- EXPECT_EQ(a_parsed.port.len, b_parsed.port.len);
- EXPECT_EQ(a_parsed.path.begin, b_parsed.path.begin);
- EXPECT_EQ(a_parsed.path.len, b_parsed.path.len);
- EXPECT_EQ(a_parsed.query.begin, b_parsed.query.begin);
- EXPECT_EQ(a_parsed.query.len, b_parsed.query.len);
- EXPECT_EQ(a_parsed.ref.begin, b_parsed.ref.begin);
- EXPECT_EQ(a_parsed.ref.len, b_parsed.ref.len);
-}
-
class OriginTest : public ::testing::Test {
- protected:
- Origin CreateUniqueOpaque() { return Origin::CreateUniqueOpaque(); }
+ public:
+ void SetUp() override {
+ // Add two schemes which are local but nonstandard.
+ AddLocalScheme("local-but-nonstandard");
+ AddLocalScheme("also-local-but-nonstandard");
- Origin CreateCanonical(const GURL& url) {
- return Origin::CreateCanonical(url);
+ // Add a scheme that's both local and standard.
+ AddStandardScheme("local-and-standard", SchemeType::SCHEME_WITH_HOST);
+ AddLocalScheme("local-and-standard");
+
+ // Add a scheme that's standard but no-access. We still want these to
+ // form valid SchemeHostPorts, even though they always commit as opaque
+ // origins, so that they can represent the source of the resource even if
+ // it's not committable as a non-opaque origin.
+ AddStandardScheme("standard-but-noaccess", SchemeType::SCHEME_WITH_HOST);
+ AddNoAccessScheme("standard-but-noaccess");
}
+
+ ::testing::AssertionResult DoEqualityComparisons(const url::Origin& a,
+ const url::Origin& b,
+ bool should_compare_equal) {
+ ::testing::AssertionResult failure = ::testing::AssertionFailure();
+ failure << "DoEqualityComparisons failure. Expecting "
+ << (should_compare_equal ? "equality" : "inequality")
+ << " between:\n a\n Which is: " << a
+ << "\n b\n Which is: " << b << "\nThe following check failed: ";
+ if (a.IsSameOriginWith(b) != should_compare_equal)
+ return failure << "a.IsSameOriginWith(b)";
+ if (b.IsSameOriginWith(a) != should_compare_equal)
+ return failure << "b.IsSameOriginWith(a)";
+ if ((a == b) != should_compare_equal)
+ return failure << "(a == b)";
+ if ((b == a) != should_compare_equal)
+ return failure << "(b == a)";
+ if ((b != a) != !should_compare_equal)
+ return failure << "(b != a)";
+ if ((a != b) != !should_compare_equal)
+ return failure << "(a != b)";
+ return ::testing::AssertionSuccess();
+ }
+
+ bool HasNonceTokenBeenInitialized(const url::Origin& origin) {
+ EXPECT_TRUE(origin.opaque());
+ // Avoid calling nonce_.token() here, to not trigger lazy initialization.
+ return !origin.nonce_->token_.is_empty();
+ }
+
+ Origin::Nonce CreateNonce() { return Origin::Nonce(); }
+
+ Origin::Nonce CreateNonce(base::UnguessableToken nonce) {
+ return Origin::Nonce(nonce);
+ }
+
+ const base::UnguessableToken* GetNonce(const Origin& origin) {
+ return origin.GetNonceForSerialization();
+ }
+
+ // Wrappers around url::Origin methods to expose it to tests.
+
+ absl::optional<Origin> UnsafelyCreateOpaqueOriginWithoutNormalization(
+ base::StringPiece precursor_scheme,
+ base::StringPiece precursor_host,
+ uint16_t precursor_port,
+ const Origin::Nonce& nonce) {
+ return Origin::UnsafelyCreateOpaqueOriginWithoutNormalization(
+ precursor_scheme, precursor_host, precursor_port, nonce);
+ }
+
+ absl::optional<std::string> SerializeWithNonce(const Origin& origin) {
+ return origin.SerializeWithNonce();
+ }
+
+ absl::optional<std::string> SerializeWithNonceAndInitIfNeeded(
+ Origin& origin) {
+ return origin.SerializeWithNonceAndInitIfNeeded();
+ }
+
+ absl::optional<Origin> Deserialize(const std::string& value) {
+ return Origin::Deserialize(value);
+ }
+
+ private:
+ ScopedSchemeRegistryForTests scoped_registry_;
};
TEST_F(OriginTest, OpaqueOriginComparison) {
- // A default constructed Origin should be cross origin to everything,
- // including itself.
- Origin unique_origin;
- EXPECT_EQ("", unique_origin.scheme());
- EXPECT_EQ("", unique_origin.host());
- EXPECT_EQ(0, unique_origin.port());
- EXPECT_TRUE(unique_origin.unique());
- EXPECT_FALSE(unique_origin.IsSameOriginWith(unique_origin));
+ // A default-constructed Origin should should be cross origin to everything
+ // but itself.
+ url::Origin opaque_a, opaque_b;
+ EXPECT_TRUE(opaque_a.opaque());
+ EXPECT_EQ("", opaque_a.scheme());
+ EXPECT_EQ("", opaque_a.host());
+ EXPECT_EQ(0, opaque_a.port());
+ EXPECT_EQ(SchemeHostPort(), opaque_a.GetTupleOrPrecursorTupleIfOpaque());
+ EXPECT_FALSE(opaque_a.GetTupleOrPrecursorTupleIfOpaque().IsValid());
- // An opaque Origin with a nonce should be same origin to itself though.
- Origin opaque_origin = CreateUniqueOpaque();
- EXPECT_EQ("", opaque_origin.scheme());
- EXPECT_EQ("", opaque_origin.host());
- EXPECT_EQ(0, opaque_origin.port());
- EXPECT_TRUE(opaque_origin.unique());
- EXPECT_TRUE(opaque_origin.IsSameOriginWith(opaque_origin));
+ EXPECT_TRUE(opaque_b.opaque());
+ EXPECT_EQ("", opaque_b.scheme());
+ EXPECT_EQ("", opaque_b.host());
+ EXPECT_EQ(0, opaque_b.port());
+ EXPECT_EQ(SchemeHostPort(), opaque_b.GetTupleOrPrecursorTupleIfOpaque());
+ EXPECT_FALSE(opaque_b.GetTupleOrPrecursorTupleIfOpaque().IsValid());
- // The default constructed Origin and the opaque Origin should always be
- // cross origin to each other.
- EXPECT_FALSE(opaque_origin.IsSameOriginWith(unique_origin));
+ // Two default-constructed Origins should always be cross origin to each
+ // other.
+ EXPECT_TRUE(DoEqualityComparisons(opaque_a, opaque_b, false));
+ EXPECT_TRUE(DoEqualityComparisons(opaque_b, opaque_b, true));
+ EXPECT_TRUE(DoEqualityComparisons(opaque_a, opaque_a, true));
- const char* const urls[] = {"data:text/html,Hello!",
- "javascript:alert(1)",
- "about:blank",
- "file://example.com:443/etc/passwd",
- "yay",
- "http::///invalid.example.com/"};
+ // The streaming operator should not trigger lazy initialization to the token.
+ std::ostringstream stream;
+ stream << opaque_a;
+ EXPECT_STREQ("null [internally: (nonce TBD) anonymous]",
+ stream.str().c_str());
+ EXPECT_FALSE(HasNonceTokenBeenInitialized(opaque_a));
- for (auto* test_url : urls) {
- SCOPED_TRACE(test_url);
- GURL url(test_url);
+ // None of the operations thus far should have triggered lazy-generation of
+ // the UnguessableToken. Copying an origin, however, should trigger this.
+ EXPECT_FALSE(HasNonceTokenBeenInitialized(opaque_a));
+ EXPECT_FALSE(HasNonceTokenBeenInitialized(opaque_b));
+ opaque_b = opaque_a;
- // no nonce mode of opaque origins
- {
- Origin origin = Origin::Create(url);
- EXPECT_EQ("", origin.scheme());
- EXPECT_EQ("", origin.host());
- EXPECT_EQ(0, origin.port());
- EXPECT_TRUE(origin.unique());
- // An opaque Origin with no nonce is always cross-origin to itself.
- EXPECT_FALSE(origin.IsSameOriginWith(origin));
- // A copy of |origin| should be cross-origin as well.
- Origin origin_copy = origin;
- EXPECT_EQ("", origin_copy.scheme());
- EXPECT_EQ("", origin_copy.host());
- EXPECT_EQ(0, origin_copy.port());
- EXPECT_TRUE(origin_copy.unique());
- EXPECT_FALSE(origin.IsSameOriginWith(origin_copy));
- // And it should always be cross-origin to another opaque Origin.
- EXPECT_FALSE(origin.IsSameOriginWith(opaque_origin));
- // As well as the default constructed Origin.
- EXPECT_FALSE(origin.IsSameOriginWith(unique_origin));
- // Re-creating from the URL should also be cross-origin.
- EXPECT_FALSE(origin.IsSameOriginWith(Origin::Create(url)));
+ EXPECT_TRUE(HasNonceTokenBeenInitialized(opaque_a));
+ EXPECT_TRUE(HasNonceTokenBeenInitialized(opaque_b));
+ EXPECT_TRUE(DoEqualityComparisons(opaque_a, opaque_b, true));
+ EXPECT_TRUE(DoEqualityComparisons(opaque_b, opaque_b, true));
+ EXPECT_TRUE(DoEqualityComparisons(opaque_a, opaque_a, true));
- ExpectParsedUrlsEqual(GURL(origin.Serialize()), origin.GetURL());
- }
+ // Move-initializing to a fresh Origin should restore the lazy initialization.
+ opaque_a = url::Origin();
+ EXPECT_FALSE(HasNonceTokenBeenInitialized(opaque_a));
+ EXPECT_TRUE(HasNonceTokenBeenInitialized(opaque_b));
+ EXPECT_TRUE(DoEqualityComparisons(opaque_a, opaque_b, false));
+ EXPECT_TRUE(DoEqualityComparisons(opaque_b, opaque_b, true));
+ EXPECT_TRUE(DoEqualityComparisons(opaque_a, opaque_a, true));
- // opaque origins with a nonce
- {
- Origin origin = CreateCanonical(url);
- EXPECT_EQ("", origin.scheme());
- EXPECT_EQ("", origin.host());
- EXPECT_EQ(0, origin.port());
- EXPECT_TRUE(origin.unique());
- // An opaque Origin with a nonce is always same-origin to itself.
- EXPECT_TRUE(origin.IsSameOriginWith(origin));
- // A copy of |origin| should be same-origin as well.
- Origin origin_copy = origin;
- EXPECT_EQ("", origin_copy.scheme());
- EXPECT_EQ("", origin_copy.host());
- EXPECT_EQ(0, origin_copy.port());
- EXPECT_TRUE(origin_copy.unique());
- EXPECT_TRUE(origin.IsSameOriginWith(origin_copy));
- // But it should always be cross origin to another opaque Origin.
- EXPECT_FALSE(origin.IsSameOriginWith(opaque_origin));
- // As well as the default constructed Origin.
- EXPECT_FALSE(origin.IsSameOriginWith(unique_origin));
- // Re-creating from the URL should also be cross origin.
- EXPECT_FALSE(origin.IsSameOriginWith(CreateCanonical(url)));
+ // Comparing two opaque Origins with matching SchemeHostPorts should trigger
+ // lazy initialization.
+ EXPECT_FALSE(HasNonceTokenBeenInitialized(opaque_a));
+ EXPECT_TRUE(HasNonceTokenBeenInitialized(opaque_b));
+ bool should_swap = opaque_b < opaque_a;
+ EXPECT_TRUE(HasNonceTokenBeenInitialized(opaque_a));
+ EXPECT_TRUE(HasNonceTokenBeenInitialized(opaque_b));
- ExpectParsedUrlsEqual(GURL(origin.Serialize()), origin.GetURL());
- }
- }
+ if (should_swap)
+ std::swap(opaque_a, opaque_b);
+ EXPECT_LT(opaque_a, opaque_b);
+ EXPECT_FALSE(opaque_b < opaque_a);
+
+ EXPECT_TRUE(DoEqualityComparisons(opaque_a, opaque_b, false));
+ EXPECT_TRUE(DoEqualityComparisons(opaque_b, opaque_b, true));
+ EXPECT_TRUE(DoEqualityComparisons(opaque_a, opaque_a, true));
+
+ EXPECT_LT(opaque_a, url::Origin::Create(GURL("http://www.google.com")));
+ EXPECT_LT(opaque_b, url::Origin::Create(GURL("http://www.google.com")));
+
+ EXPECT_EQ(opaque_b, url::Origin::Resolve(GURL(), opaque_b));
+ EXPECT_EQ(opaque_b, url::Origin::Resolve(GURL("about:blank"), opaque_b));
+ EXPECT_EQ(opaque_b, url::Origin::Resolve(GURL("about:srcdoc"), opaque_b));
+ EXPECT_EQ(opaque_b,
+ url::Origin::Resolve(GURL("about:blank?hello#whee"), opaque_b));
}
TEST_F(OriginTest, ConstructFromTuple) {
@@ -154,78 +204,11 @@
}
}
-TEST_F(OriginTest, ConstructFromGURL) {
- Origin different_origin =
- Origin::Create(GURL("https://not-in-the-list.test/"));
-
- struct TestCases {
- const char* const url;
- const char* const expected_scheme;
- const char* const expected_host;
- const uint16_t expected_port;
- } cases[] = {
- // IP Addresses
- {"http://192.168.9.1/", "http", "192.168.9.1", 80},
- {"http://[2001:db8::1]/", "http", "[2001:db8::1]", 80},
-
- // Punycode
- {"http://☃.net/", "http", "xn--n3h.net", 80},
- {"blob:http://☃.net/", "http", "xn--n3h.net", 80},
-
- // Generic URLs
- {"http://example.com/", "http", "example.com", 80},
- {"http://example.com:123/", "http", "example.com", 123},
- {"https://example.com/", "https", "example.com", 443},
- {"https://example.com:123/", "https", "example.com", 123},
- {"http://user:pass@example.com/", "http", "example.com", 80},
- {"http://example.com:123/?query", "http", "example.com", 123},
- {"https://example.com/#1234", "https", "example.com", 443},
- {"https://u:p@example.com:123/?query#1234", "https", "example.com", 123},
-
- // Registered URLs
- {"ftp://example.com/", "ftp", "example.com", 21},
- {"gopher://example.com/", "gopher", "example.com", 70},
- {"ws://example.com/", "ws", "example.com", 80},
- {"wss://example.com/", "wss", "example.com", 443},
-
- // file: URLs
- {"file:///etc/passwd", "file", "", 0},
- {"file://example.com/etc/passwd", "file", "example.com", 0},
-
- // Filesystem:
- {"filesystem:http://example.com/type/", "http", "example.com", 80},
- {"filesystem:http://example.com:123/type/", "http", "example.com", 123},
- {"filesystem:https://example.com/type/", "https", "example.com", 443},
- {"filesystem:https://example.com:123/type/", "https", "example.com", 123},
-
- // Blob:
- {"blob:http://example.com/guid-goes-here", "http", "example.com", 80},
- {"blob:http://example.com:123/guid-goes-here", "http", "example.com", 123},
- {"blob:https://example.com/guid-goes-here", "https", "example.com", 443},
- {"blob:http://u:p@example.com/guid-goes-here", "http", "example.com", 80},
- };
-
- for (const auto& test_case : cases) {
- SCOPED_TRACE(test_case.url);
- GURL url(test_case.url);
- EXPECT_TRUE(url.is_valid());
- Origin origin = Origin::Create(url);
- EXPECT_EQ(test_case.expected_scheme, origin.scheme());
- EXPECT_EQ(test_case.expected_host, origin.host());
- EXPECT_EQ(test_case.expected_port, origin.port());
- EXPECT_FALSE(origin.unique());
- EXPECT_TRUE(origin.IsSameOriginWith(origin));
- EXPECT_FALSE(different_origin.IsSameOriginWith(origin));
- EXPECT_FALSE(origin.IsSameOriginWith(different_origin));
-
- ExpectParsedUrlsEqual(GURL(origin.Serialize()), origin.GetURL());
- }
-}
-
TEST_F(OriginTest, Serialization) {
struct TestCases {
const char* const url;
const char* const expected;
+ const char* const expected_log;
} cases[] = {
{"http://192.168.9.1/", "http://192.168.9.1"},
{"http://[2001:db8::1]/", "http://[2001:db8::1]"},
@@ -234,8 +217,10 @@
{"http://example.com:123/", "http://example.com:123"},
{"https://example.com/", "https://example.com"},
{"https://example.com:123/", "https://example.com:123"},
- {"file:///etc/passwd", "file://"},
- {"file://example.com/etc/passwd", "file://"},
+ {"file:///etc/passwd", "file://", "file:// [internally: file://]"},
+ {"file://example.com/etc/passwd", "file://",
+ "file:// [internally: file://example.com]"},
+ {"data:,", "null", "null [internally: (nonce TBD) anonymous]"},
};
for (const auto& test_case : cases) {
@@ -248,44 +233,23 @@
EXPECT_EQ(test_case.expected, serialized);
- // The '<<' operator should produce the same serialization as Serialize().
+ // The '<<' operator sometimes produces additional information.
std::stringstream out;
out << origin;
- EXPECT_EQ(test_case.expected, out.str());
+ if (test_case.expected_log)
+ EXPECT_EQ(test_case.expected_log, out.str());
+ else
+ EXPECT_EQ(test_case.expected, out.str());
}
}
TEST_F(OriginTest, Comparison) {
// These URLs are arranged in increasing order:
const char* const urls[] = {
- "data:uniqueness",
- "http://a:80",
- "http://b:80",
- "https://a:80",
- "https://b:80",
- "http://a:81",
- "http://b:81",
- "https://a:81",
- "https://b:81",
+ "data:uniqueness", "http://a:80", "http://b:80",
+ "https://a:80", "https://b:80", "http://a:81",
+ "http://b:81", "https://a:81", "https://b:81",
};
-
- {
- // Unlike below, pre-creation here isn't necessary, since the old creation
- // path doesn't populate a nonce. It makes for easier copy and paste though.
- std::vector<Origin> origins;
- for (const auto* test_url : urls)
- origins.push_back(CreateCanonical(GURL(test_url)));
-
- for (size_t i = 0; i < origins.size(); i++) {
- const Origin& current = origins[i];
- for (size_t j = i; j < origins.size(); j++) {
- const Origin& to_compare = origins[j];
- EXPECT_EQ(i < j, current < to_compare) << i << " < " << j;
- EXPECT_EQ(j < i, to_compare < current) << j << " < " << i;
- }
- }
- }
-
// Validate the comparison logic still works when creating a canonical origin,
// when any created opaque origins contain a nonce.
{
@@ -293,8 +257,7 @@
// with each freshly-constructed Origin (that's not copied).
std::vector<Origin> origins;
for (const auto* test_url : urls)
- origins.push_back(CreateCanonical(GURL(test_url)));
-
+ origins.push_back(Origin::Create(GURL(test_url)));
for (size_t i = 0; i < origins.size(); i++) {
const Origin& current = origins[i];
for (size_t j = i; j < origins.size(); j++) {
@@ -316,89 +279,117 @@
{"http", "example.com", 123},
{"https", "example.com", 443},
{"https", "example.com", 123},
- {"file", "", 0},
+ {"http", "example.com", 0}, // 0 is a valid port for http.
+ {"file", "", 0}, // 0 indicates "no port" for file: scheme.
{"file", "example.com", 0},
};
for (const auto& test : cases) {
- SCOPED_TRACE(testing::Message() << test.scheme << "://" << test.host << ":"
- << test.port);
- Origin origin = Origin::UnsafelyCreateOriginWithoutNormalization(
- test.scheme, test.host, test.port);
- EXPECT_EQ(test.scheme, origin.scheme());
- EXPECT_EQ(test.host, origin.host());
- EXPECT_EQ(test.port, origin.port());
- EXPECT_FALSE(origin.unique());
- EXPECT_TRUE(origin.IsSameOriginWith(origin));
+ SCOPED_TRACE(testing::Message()
+ << test.scheme << "://" << test.host << ":" << test.port);
+ absl::optional<url::Origin> origin =
+ url::Origin::UnsafelyCreateTupleOriginWithoutNormalization(
+ test.scheme, test.host, test.port);
+ ASSERT_TRUE(origin);
+ EXPECT_EQ(test.scheme, origin->scheme());
+ EXPECT_EQ(test.host, origin->host());
+ EXPECT_EQ(test.port, origin->port());
+ EXPECT_FALSE(origin->opaque());
+ EXPECT_TRUE(origin->IsSameOriginWith(*origin));
- ExpectParsedUrlsEqual(GURL(origin.Serialize()), origin.GetURL());
+ ExpectParsedUrlsEqual(GURL(origin->Serialize()), origin->GetURL());
+
+ base::UnguessableToken nonce = base::UnguessableToken::Create();
+ absl::optional<url::Origin> opaque_origin =
+ UnsafelyCreateOpaqueOriginWithoutNormalization(
+ test.scheme, test.host, test.port, CreateNonce(nonce));
+ ASSERT_TRUE(opaque_origin);
+ EXPECT_TRUE(opaque_origin->opaque());
+ EXPECT_FALSE(*opaque_origin == origin);
+ EXPECT_EQ(opaque_origin->GetTupleOrPrecursorTupleIfOpaque(),
+ origin->GetTupleOrPrecursorTupleIfOpaque());
+ EXPECT_EQ(opaque_origin,
+ UnsafelyCreateOpaqueOriginWithoutNormalization(
+ test.scheme, test.host, test.port, CreateNonce(nonce)));
+ EXPECT_FALSE(*opaque_origin == origin->DeriveNewOpaqueOrigin());
}
}
TEST_F(OriginTest, UnsafelyCreateUniqueOnInvalidInput) {
+ url::AddStandardScheme("host-only", url::SCHEME_WITH_HOST);
+ url::AddStandardScheme("host-port-only", url::SCHEME_WITH_HOST_AND_PORT);
struct TestCases {
const char* scheme;
const char* host;
- uint16_t port;
- } cases[] = {{"", "", 0},
+ uint16_t port = 80;
+ } cases[] = {{"", "", 33},
{"data", "", 0},
{"blob", "", 0},
{"filesystem", "", 0},
- {"data", "example.com", 80},
- {"http", "☃.net", 80},
- {"http\nmore", "example.com", 80},
- {"http\rmore", "example.com", 80},
- {"http\n", "example.com", 80},
- {"http\r", "example.com", 80},
- {"http", "example.com\nnot-example.com", 80},
- {"http", "example.com\rnot-example.com", 80},
- {"http", "example.com\n", 80},
- {"http", "example.com\r", 80},
- {"http", "example.com", 0},
- {"file", "", 80}};
+ {"data", "example.com"},
+ {"http", "☃.net"},
+ {"http\nmore", "example.com"},
+ {"http\rmore", "example.com"},
+ {"http\n", "example.com"},
+ {"http\r", "example.com"},
+ {"http", "example.com\nnot-example.com"},
+ {"http", "example.com\rnot-example.com"},
+ {"http", "example.com\n"},
+ {"http", "example.com\r"},
+ {"unknown-scheme", "example.com"},
+ {"host-only", "\r", 0},
+ {"host-only", "example.com", 22},
+ {"file", "", 123}}; // file: shouldn't have a port.
for (const auto& test : cases) {
- SCOPED_TRACE(testing::Message() << test.scheme << "://" << test.host << ":"
- << test.port);
- Origin origin = Origin::UnsafelyCreateOriginWithoutNormalization(
- test.scheme, test.host, test.port);
- EXPECT_EQ("", origin.scheme());
- EXPECT_EQ("", origin.host());
- EXPECT_EQ(0, origin.port());
- EXPECT_TRUE(origin.unique());
- EXPECT_FALSE(origin.IsSameOriginWith(origin));
-
- ExpectParsedUrlsEqual(GURL(origin.Serialize()), origin.GetURL());
+ SCOPED_TRACE(testing::Message()
+ << test.scheme << "://" << test.host << ":" << test.port);
+ EXPECT_FALSE(UnsafelyCreateOpaqueOriginWithoutNormalization(
+ test.scheme, test.host, test.port, CreateNonce()));
+ EXPECT_FALSE(url::Origin::UnsafelyCreateTupleOriginWithoutNormalization(
+ test.scheme, test.host, test.port));
}
+
+ // An empty scheme/host/port tuple is not a valid tuple origin.
+ EXPECT_FALSE(
+ url::Origin::UnsafelyCreateTupleOriginWithoutNormalization("", "", 0));
+
+ // Opaque origins with unknown precursors are allowed.
+ base::UnguessableToken token = base::UnguessableToken::Create();
+ absl::optional<url::Origin> anonymous_opaque =
+ UnsafelyCreateOpaqueOriginWithoutNormalization("", "", 0,
+ CreateNonce(token));
+ ASSERT_TRUE(anonymous_opaque)
+ << "An invalid tuple is a valid input to "
+ << "UnsafelyCreateOpaqueOriginWithoutNormalization, so long as it is "
+ << "the canonical form of the invalid tuple.";
+ EXPECT_TRUE(anonymous_opaque->opaque());
+ EXPECT_EQ(*GetNonce(anonymous_opaque.value()), token);
+ EXPECT_EQ(anonymous_opaque->GetTupleOrPrecursorTupleIfOpaque(),
+ url::SchemeHostPort());
}
TEST_F(OriginTest, UnsafelyCreateUniqueViaEmbeddedNulls) {
struct TestCases {
- const char* scheme;
- size_t scheme_length;
- const char* host;
- size_t host_length;
- uint16_t port;
- } cases[] = {{"http\0more", 9, "example.com", 11},
- {"http\0", 5, "example.com", 11},
- {"\0http", 5, "example.com", 11},
- {"http", 4, "example.com\0not-example.com", 27},
- {"http", 4, "example.com\0", 12},
- {"http", 4, "\0example.com", 12}};
+ base::StringPiece scheme;
+ base::StringPiece host;
+ uint16_t port = 80;
+ } cases[] = {{{"http\0more", 9}, {"example.com", 11}},
+ {{"http\0", 5}, {"example.com", 11}},
+ {{"\0http", 5}, {"example.com", 11}},
+ {{"http"}, {"example.com\0not-example.com", 27}},
+ {{"http"}, {"example.com\0", 12}},
+ {{"http"}, {"\0example.com", 12}},
+ {{""}, {"\0", 1}, 0},
+ {{"\0", 1}, {""}, 0}};
for (const auto& test : cases) {
- SCOPED_TRACE(testing::Message() << test.scheme << "://" << test.host << ":"
- << test.port);
- Origin origin = Origin::UnsafelyCreateOriginWithoutNormalization(
- std::string(test.scheme, test.scheme_length),
- std::string(test.host, test.host_length), test.port);
- EXPECT_EQ("", origin.scheme());
- EXPECT_EQ("", origin.host());
- EXPECT_EQ(0, origin.port());
- EXPECT_TRUE(origin.unique());
- EXPECT_FALSE(origin.IsSameOriginWith(origin));
-
- ExpectParsedUrlsEqual(GURL(origin.Serialize()), origin.GetURL());
+ SCOPED_TRACE(testing::Message()
+ << test.scheme << "://" << test.host << ":" << test.port);
+ EXPECT_FALSE(url::Origin::UnsafelyCreateTupleOriginWithoutNormalization(
+ test.scheme, test.host, test.port));
+ EXPECT_FALSE(UnsafelyCreateOpaqueOriginWithoutNormalization(
+ test.scheme, test.host, test.port, CreateNonce()));
}
}
@@ -438,15 +429,17 @@
};
for (const auto& test_case : kTestCases) {
- SCOPED_TRACE(testing::Message() << "(url, domain): (" << test_case.url
- << ", " << test_case.lower_ascii_domain
- << ")");
+ SCOPED_TRACE(testing::Message()
+ << "(url, domain): (" << test_case.url << ", "
+ << test_case.lower_ascii_domain << ")");
GURL url(test_case.url);
ASSERT_TRUE(url.is_valid());
Origin origin = Origin::Create(url);
EXPECT_EQ(test_case.expected_domain_is,
origin.DomainIs(test_case.lower_ascii_domain));
+ EXPECT_FALSE(
+ origin.DeriveNewOpaqueOrigin().DomainIs(test_case.lower_ascii_domain));
}
// If the URL is invalid, DomainIs returns false.
@@ -465,4 +458,320 @@
EXPECT_STREQ("https://foo.com", origin1_debug_alias);
}
+TEST_F(OriginTest, CanBeDerivedFrom) {
+ AddStandardScheme("new-standard", SchemeType::SCHEME_WITH_HOST);
+ Origin opaque_unique_origin = Origin();
+
+ Origin regular_origin = Origin::Create(GURL("https://a.com/"));
+ Origin opaque_precursor_origin = regular_origin.DeriveNewOpaqueOrigin();
+
+ Origin file_origin = Origin::Create(GURL("file:///foo/bar"));
+ Origin file_opaque_precursor_origin = file_origin.DeriveNewOpaqueOrigin();
+ Origin file_host_origin = Origin::Create(GURL("file://a.com/foo/bar"));
+ Origin file_host_opaque_precursor_origin =
+ file_host_origin.DeriveNewOpaqueOrigin();
+
+ Origin non_standard_scheme_origin =
+ Origin::Create(GURL("non-standard-scheme:foo"));
+ Origin non_standard_opaque_precursor_origin =
+ non_standard_scheme_origin.DeriveNewOpaqueOrigin();
+
+ // Also, add new standard scheme that is local to the test.
+ Origin new_standard_origin = Origin::Create(GURL("new-standard://host/"));
+ Origin new_standard_opaque_precursor_origin =
+ new_standard_origin.DeriveNewOpaqueOrigin();
+
+ // No access schemes always get unique opaque origins.
+ Origin no_access_origin =
+ Origin::Create(GURL("standard-but-noaccess://b.com"));
+ Origin no_access_opaque_precursor_origin =
+ no_access_origin.DeriveNewOpaqueOrigin();
+
+ Origin local_non_standard_origin =
+ Origin::Create(GURL("local-but-nonstandard://a.com"));
+ Origin local_non_standard_opaque_precursor_origin =
+ local_non_standard_origin.DeriveNewOpaqueOrigin();
+
+ // Call origin.CanBeDerivedFrom(url) for each of the following test cases
+ // and ensure that it returns |expected_value|
+ const struct {
+ const char* url;
+ raw_ptr<Origin> origin;
+ bool expected_value;
+ } kTestCases[] = {
+ {"https://a.com", ®ular_origin, true},
+ // Web URL can commit in an opaque origin with precursor information.
+ // Example: iframe sandbox navigated to a.com.
+ {"https://a.com", &opaque_precursor_origin, true},
+ // URL that comes from the web can never commit in an opaque unique
+ // origin. It must have precursor information.
+ {"https://a.com", &opaque_unique_origin, false},
+
+ // Cross-origin URLs should never work.
+ {"https://b.com", ®ular_origin, false},
+ {"https://b.com", &opaque_precursor_origin, false},
+
+ // data: URL can never commit in a regular, non-opaque origin.
+ {"data:text/html,foo", ®ular_origin, false},
+ // This is the default case: data: URLs commit in opaque origin carrying
+ // precursor information for the origin that created them.
+ {"data:text/html,foo", &opaque_precursor_origin, true},
+ // Browser-initiated navigations can result in data: URL committing in
+ // opaque unique origin.
+ {"data:text/html,foo", &opaque_unique_origin, true},
+
+ // about:blank can commit in regular origin (default case for iframes).
+ {"about:blank", ®ular_origin, true},
+ // This can happen if data: URL that originated at a.com creates an
+ // about:blank iframe.
+ {"about:blank", &opaque_precursor_origin, true},
+ // Browser-initiated navigations can result in about:blank URL committing
+ // in opaque unique origin.
+ {"about:blank", &opaque_unique_origin, true},
+
+ // Default behavior of srcdoc is to inherit the origin of the parent
+ // document.
+ {"about:srcdoc", ®ular_origin, true},
+ // This happens for sandboxed srcdoc iframe.
+ {"about:srcdoc", &opaque_precursor_origin, true},
+ // This can happen with browser-initiated navigation to about:blank or
+ // data: URL, which in turn add srcdoc iframe.
+ {"about:srcdoc", &opaque_unique_origin, true},
+
+ // Just like srcdoc, blob: URLs can be created in all the cases.
+ {"blob:https://a.com/foo", ®ular_origin, true},
+ {"blob:https://a.com/foo", &opaque_precursor_origin, true},
+ {"blob:https://a.com/foo", &opaque_unique_origin, true},
+
+ {"filesystem:https://a.com/foo", ®ular_origin, true},
+ {"filesystem:https://a.com/foo", &opaque_precursor_origin, true},
+ // Unlike blob: URLs, filesystem: ones cannot be created in an unique
+ // opaque origin.
+ {"filesystem:https://a.com/foo", &opaque_unique_origin, false},
+
+ // file: URLs cannot result in regular web origins, regardless of
+ // opaqueness.
+ {"file:///etc/passwd", ®ular_origin, false},
+ {"file:///etc/passwd", &opaque_precursor_origin, false},
+ // However, they can result in regular file: origin and an opaque one
+ // containing another file: origin as precursor.
+ {"file:///etc/passwd", &file_origin, true},
+ {"file:///etc/passwd", &file_opaque_precursor_origin, true},
+ // It should not be possible to get an opaque unique origin for file:
+ // as it is a standard scheme and will always result in a tuple origin
+ // or will always be derived by other origin.
+ // Note: file:// URLs should become unique opaque origins at some point.
+ {"file:///etc/passwd", &opaque_unique_origin, false},
+
+ // The same set as above, but including a host.
+ {"file://a.com/etc/passwd", ®ular_origin, false},
+ {"file://a.com/etc/passwd", &opaque_precursor_origin, false},
+ {"file://a.com/etc/passwd", &file_host_origin, true},
+ {"file://a.com/etc/passwd", &file_host_opaque_precursor_origin, true},
+ {"file://a.com/etc/passwd", &opaque_unique_origin, false},
+
+ // Locally registered standard scheme should behave the same way
+ // as built-in standard schemes.
+ {"new-standard://host/foo", &new_standard_origin, true},
+ {"new-standard://host/foo", &new_standard_opaque_precursor_origin, true},
+ {"new-standard://host/foo", &opaque_unique_origin, false},
+ {"new-standard://host2/foo", &new_standard_origin, false},
+ {"new-standard://host2/foo", &new_standard_opaque_precursor_origin,
+ false},
+
+ // A non-standard scheme should never commit in an standard origin or
+ // opaque origin with standard precursor information.
+ {"non-standard-scheme://a.com/foo", ®ular_origin, false},
+ {"non-standard-scheme://a.com/foo", &opaque_precursor_origin, false},
+ // However, it should be fine to commit in unique opaque origins or in its
+ // own origin.
+ // Note: since non-standard scheme URLs don't parse out anything
+ // but the scheme, using a random different hostname here would work.
+ {"non-standard-scheme://b.com/foo2", &opaque_unique_origin, true},
+ {"non-standard-scheme://b.com/foo3", &non_standard_scheme_origin, true},
+ {"non-standard-scheme://b.com/foo4",
+ &non_standard_opaque_precursor_origin, true},
+
+ // No access scheme can only commit in opaque origin.
+ {"standard-but-noaccess://a.com/foo", ®ular_origin, false},
+ {"standard-but-noaccess://a.com/foo", &opaque_precursor_origin, false},
+ {"standard-but-noaccess://a.com/foo", &opaque_unique_origin, true},
+ {"standard-but-noaccess://a.com/foo", &no_access_origin, true},
+ {"standard-but-noaccess://a.com/foo", &no_access_opaque_precursor_origin,
+ true},
+ {"standard-but-noaccess://b.com/foo", &no_access_origin, true},
+ {"standard-but-noaccess://b.com/foo", &no_access_opaque_precursor_origin,
+ true},
+
+ // Local schemes can be non-standard, verify they also work as expected.
+ {"local-but-nonstandard://a.com", ®ular_origin, false},
+ {"local-but-nonstandard://a.com", &opaque_precursor_origin, false},
+ {"local-but-nonstandard://a.com", &opaque_unique_origin, true},
+ {"local-but-nonstandard://a.com", &local_non_standard_origin, true},
+ {"local-but-nonstandard://a.com",
+ &local_non_standard_opaque_precursor_origin, true},
+ };
+
+ for (const auto& test_case : kTestCases) {
+ SCOPED_TRACE(testing::Message() << "(origin, url): (" << *test_case.origin
+ << ", " << test_case.url << ")");
+ EXPECT_EQ(test_case.expected_value,
+ test_case.origin->CanBeDerivedFrom(GURL(test_case.url)));
+ }
+}
+
+TEST_F(OriginTest, GetDebugString) {
+ Origin http_origin = Origin::Create(GURL("http://192.168.9.1"));
+ EXPECT_STREQ(http_origin.GetDebugString().c_str(), "http://192.168.9.1");
+
+ Origin http_opaque_origin = http_origin.DeriveNewOpaqueOrigin();
+ EXPECT_THAT(
+ http_opaque_origin.GetDebugString().c_str(),
+ ::testing::MatchesRegex(
+ "null \\[internally: \\(\\w*\\) derived from http://192.168.9.1\\]"));
+ EXPECT_THAT(
+ http_opaque_origin.GetDebugString(false /* include_nonce */).c_str(),
+ ::testing::MatchesRegex(
+ "null \\[internally: derived from http://192.168.9.1\\]"));
+
+ Origin data_origin = Origin::Create(GURL("data:"));
+ EXPECT_STREQ(data_origin.GetDebugString().c_str(),
+ "null [internally: (nonce TBD) anonymous]");
+
+ // The nonce of the origin will be initialized if a new opaque origin is
+ // derived.
+ Origin data_derived_origin = data_origin.DeriveNewOpaqueOrigin();
+ EXPECT_THAT(
+ data_derived_origin.GetDebugString().c_str(),
+ ::testing::MatchesRegex("null \\[internally: \\(\\w*\\) anonymous\\]"));
+ EXPECT_THAT(
+ data_derived_origin.GetDebugString(false /* include_nonce */).c_str(),
+ ::testing::MatchesRegex("null \\[internally: anonymous\\]"));
+
+ Origin file_origin = Origin::Create(GURL("file:///etc/passwd"));
+ EXPECT_STREQ(file_origin.GetDebugString().c_str(),
+ "file:// [internally: file://]");
+
+ Origin file_server_origin =
+ Origin::Create(GURL("file://example.com/etc/passwd"));
+ EXPECT_STREQ(file_server_origin.GetDebugString().c_str(),
+ "file:// [internally: file://example.com]");
+}
+
+TEST_F(OriginTest, Deserialize) {
+ std::vector<GURL> valid_urls = {
+ GURL("https://a.com"), GURL("http://a"),
+ GURL("http://a:80"), GURL("file://a.com/etc/passwd"),
+ GURL("file:///etc/passwd"), GURL("http://192.168.1.1"),
+ GURL("http://[2001:db8::1]/"),
+ };
+ for (const GURL& url : valid_urls) {
+ SCOPED_TRACE(url.spec());
+ Origin origin = Origin::Create(url);
+ absl::optional<std::string> serialized = SerializeWithNonce(origin);
+ ASSERT_TRUE(serialized);
+
+ absl::optional<Origin> deserialized = Deserialize(std::move(*serialized));
+ ASSERT_TRUE(deserialized.has_value());
+
+ EXPECT_TRUE(DoEqualityComparisons(origin, deserialized.value(), true));
+ EXPECT_EQ(origin.GetDebugString(), deserialized.value().GetDebugString());
+ }
+}
+
+TEST_F(OriginTest, DeserializeInvalid) {
+ EXPECT_EQ(absl::nullopt, Deserialize(std::string()));
+ EXPECT_EQ(absl::nullopt, Deserialize("deadbeef"));
+ EXPECT_EQ(absl::nullopt, Deserialize("0123456789"));
+ EXPECT_EQ(absl::nullopt, Deserialize("https://a.com"));
+ EXPECT_EQ(absl::nullopt, Deserialize("https://192.168.1.1"));
+}
+
+TEST_F(OriginTest, SerializeTBDNonce) {
+ std::vector<GURL> invalid_urls = {
+ GURL("data:uniqueness"), GURL("data:,"),
+ GURL("data:text/html,Hello!"), GURL("javascript:alert(1)"),
+ GURL("about:blank"), GURL("google.com"),
+ };
+ for (const GURL& url : invalid_urls) {
+ SCOPED_TRACE(url.spec());
+ Origin origin = Origin::Create(url);
+ absl::optional<std::string> serialized = SerializeWithNonce(origin);
+ absl::optional<Origin> deserialized = Deserialize(std::move(*serialized));
+ ASSERT_TRUE(deserialized.has_value());
+
+ // Can't use DoEqualityComparisons here since empty nonces are never ==
+ // unless they are the same object.
+ EXPECT_EQ(origin.GetDebugString(), deserialized.value().GetDebugString());
+ }
+
+ {
+ // Same basic test as above, but without a GURL to create tuple_.
+ Origin opaque;
+ absl::optional<std::string> serialized = SerializeWithNonce(opaque);
+ ASSERT_TRUE(serialized);
+
+ absl::optional<Origin> deserialized = Deserialize(std::move(*serialized));
+ ASSERT_TRUE(deserialized.has_value());
+
+ // Can't use DoEqualityComparisons here since empty nonces are never ==
+ // unless they are the same object.
+ EXPECT_EQ(opaque.GetDebugString(), deserialized.value().GetDebugString());
+ }
+
+ // Now force initialization of the nonce prior to serialization.
+ for (const GURL& url : invalid_urls) {
+ SCOPED_TRACE(url.spec());
+ Origin origin = Origin::Create(url);
+ absl::optional<std::string> serialized =
+ SerializeWithNonceAndInitIfNeeded(origin);
+ absl::optional<Origin> deserialized = Deserialize(std::move(*serialized));
+ ASSERT_TRUE(deserialized.has_value());
+
+ // The nonce should have been initialized prior to Serialization().
+ EXPECT_EQ(origin, deserialized.value());
+ }
+}
+
+TEST_F(OriginTest, DeserializeValidNonce) {
+ Origin opaque;
+ GetNonce(opaque);
+
+ absl::optional<std::string> serialized = SerializeWithNonce(opaque);
+ ASSERT_TRUE(serialized);
+
+ absl::optional<Origin> deserialized = Deserialize(std::move(*serialized));
+ ASSERT_TRUE(deserialized.has_value());
+
+ EXPECT_TRUE(DoEqualityComparisons(opaque, deserialized.value(), true));
+ EXPECT_EQ(opaque.GetDebugString(), deserialized.value().GetDebugString());
+}
+
+TEST_F(OriginTest, IsSameOriginWith) {
+ url::Origin opaque_origin;
+ GURL foo_url = GURL("https://foo.com/path");
+ url::Origin foo_origin = url::Origin::Create(foo_url);
+ GURL bar_url = GURL("https://bar.com/path");
+ url::Origin bar_origin = url::Origin::Create(bar_url);
+
+ EXPECT_FALSE(opaque_origin.IsSameOriginWith(foo_origin));
+ EXPECT_FALSE(opaque_origin.IsSameOriginWith(foo_url));
+
+ EXPECT_TRUE(foo_origin.IsSameOriginWith(foo_origin));
+ EXPECT_TRUE(foo_origin.IsSameOriginWith(foo_url));
+
+ EXPECT_FALSE(foo_origin.IsSameOriginWith(bar_origin));
+ EXPECT_FALSE(foo_origin.IsSameOriginWith(bar_url));
+
+ // Documenting legacy behavior. This doesn't necessarily mean that the legacy
+ // behavior is correct (or desirable in the long-term).
+ EXPECT_FALSE(foo_origin.IsSameOriginWith(GURL("about:blank")));
+ EXPECT_FALSE(foo_origin.IsSameOriginWith(GURL())); // Invalid GURL.
+ EXPECT_TRUE(foo_origin.IsSameOriginWith(GURL("blob:https://foo.com/guid")));
+}
+
+INSTANTIATE_TYPED_TEST_SUITE_P(UrlOrigin,
+ AbstractOriginTest,
+ UrlOriginTestTraits);
+
} // namespace url
diff --git a/url/run_all_perftests.cc b/url/run_all_perftests.cc
new file mode 100644
index 0000000..f11fd29
--- /dev/null
+++ b/url/run_all_perftests.cc
@@ -0,0 +1,14 @@
+// Copyright 2019 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/functional/bind.h"
+#include "base/test/launcher/unit_test_launcher.h"
+#include "base/test/perf_test_suite.h"
+
+int main(int argc, char** argv) {
+ base::PerfTestSuite test_suite(argc, argv);
+ return base::LaunchUnitTestsSerially(
+ argc, argv,
+ base::BindOnce(&base::TestSuite::Run, base::Unretained(&test_suite)));
+}
diff --git a/url/run_all_unittests.cc b/url/run_all_unittests.cc
index 916a3eb..a5a54eb 100644
--- a/url/run_all_unittests.cc
+++ b/url/run_all_unittests.cc
@@ -1,8 +1,12 @@
-// Copyright 2016 The Chromium Authors. All rights reserved.
+// Copyright 2016 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#include "base/bind.h"
+#include <memory>
+
+#include "base/functional/bind.h"
+#include "base/test/launcher/unit_test_launcher.h"
+#include "base/test/test_io_thread.h"
#include "base/test/test_suite.h"
#include "build/build_config.h"
@@ -19,18 +23,14 @@
#else
-#include <memory>
-#include "base/test/launcher/unit_test_launcher.h"
-#include "base/test/test_io_thread.h"
-
-#if !defined(OS_IOS)
+#if !BUILDFLAG(IS_IOS)
#include "mojo/core/embedder/embedder.h" // nogncheck
#endif
int main(int argc, char** argv) {
base::TestSuite test_suite(argc, argv);
-#if !defined(OS_IOS)
+#if !BUILDFLAG(IS_IOS)
mojo::core::Init();
#endif
diff --git a/url/scheme_host_port.cc b/url/scheme_host_port.cc
index 5770834..490ae9a 100644
--- a/url/scheme_host_port.cc
+++ b/url/scheme_host_port.cc
@@ -1,17 +1,21 @@
-// Copyright 2015 The Chromium Authors. All rights reserved.
+// Copyright 2015 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "url/scheme_host_port.h"
+#include <stdint.h>
#include <string.h>
+#include <ostream>
#include <tuple>
-#include "base/logging.h"
+#include "base/check_op.h"
+#include "base/containers/contains.h"
+#include "base/notreached.h"
#include "base/numerics/safe_conversions.h"
#include "base/strings/string_number_conversions.h"
-#include "starboard/types.h"
+#include "base/strings/string_piece.h"
#include "url/gurl.h"
#include "url/third_party/mozilla/url_parse.h"
#include "url/url_canon.h"
@@ -47,25 +51,53 @@
return host == canon_host;
}
+// Note: When changing IsValidInput, consider also updating
+// ShouldTreatAsOpaqueOrigin in Blink (there might be existing differences in
+// behavior between these 2 layers, but we should avoid introducing new
+// differences).
bool IsValidInput(const base::StringPiece& scheme,
const base::StringPiece& host,
uint16_t port,
SchemeHostPort::ConstructPolicy policy) {
+ // Empty schemes are never valid.
+ if (scheme.empty())
+ return false;
+
+ // about:blank and other no-access schemes translate into an opaque origin.
+ // This helps consistency with ShouldTreatAsOpaqueOrigin in Blink.
+ if (base::Contains(GetNoAccessSchemes(), scheme))
+ return false;
+
SchemeType scheme_type = SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION;
bool is_standard = GetStandardSchemeType(
scheme.data(),
Component(0, base::checked_cast<int>(scheme.length())),
&scheme_type);
- if (!is_standard)
- return false;
+ if (!is_standard) {
+ // To be consistent with ShouldTreatAsOpaqueOrigin in Blink, local
+ // non-standard schemes are currently allowed to be tuple origins.
+ // Nonstandard schemes don't have hostnames, so their tuple is just
+ // ("protocol", "", 0).
+ //
+ // TODO: Migrate "content:" and "externalfile:" to be standard schemes, and
+ // remove this local scheme exception.
+ if (base::Contains(GetLocalSchemes(), scheme) && host.empty() && port == 0)
+ return true;
+
+ // Otherwise, allow non-standard schemes only if the Android WebView
+ // workaround is enabled.
+ return AllowNonStandardSchemesForAndroidWebView();
+ }
switch (scheme_type) {
case SCHEME_WITH_HOST_AND_PORT:
case SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION:
- // A URL with |scheme| is required to have the host and port (may be
- // omitted in a serialization if it's the same as the default value).
- // Return an invalid instance if either of them is not given.
- if (host.empty() || port == 0)
+ // A URL with |scheme| is required to have the host and port, so return an
+ // invalid instance if host is not given. Note that a valid port is
+ // always provided by SchemeHostPort(const GURL&) constructor (a missing
+ // port is replaced with a default port if needed by
+ // GURL::EffectiveIntPort()).
+ if (host.empty())
return false;
// Don't do an expensive canonicalization if the host is already
@@ -108,31 +140,33 @@
} // namespace
-SchemeHostPort::SchemeHostPort() : port_(0) {
-}
+SchemeHostPort::SchemeHostPort() = default;
SchemeHostPort::SchemeHostPort(std::string scheme,
std::string host,
uint16_t port,
- ConstructPolicy policy)
- : port_(0) {
- if (!IsValidInput(scheme, host, port, policy))
+ ConstructPolicy policy) {
+ if (!IsValidInput(scheme, host, port, policy)) {
+ DCHECK(!IsValid());
return;
+ }
scheme_ = std::move(scheme);
host_ = std::move(host);
port_ = port;
+ DCHECK(IsValid()) << "Scheme: " << scheme_ << " Host: " << host_
+ << " Port: " << port;
}
SchemeHostPort::SchemeHostPort(base::StringPiece scheme,
base::StringPiece host,
uint16_t port)
- : SchemeHostPort(scheme.as_string(),
- host.as_string(),
+ : SchemeHostPort(std::string(scheme),
+ std::string(host),
port,
ConstructPolicy::CHECK_CANONICALIZATION) {}
-SchemeHostPort::SchemeHostPort(const GURL& url) : port_(0) {
+SchemeHostPort::SchemeHostPort(const GURL& url) {
if (!url.is_valid())
return;
@@ -151,15 +185,19 @@
if (!IsValidInput(scheme, host, port, ALREADY_CANONICALIZED))
return;
- scheme.CopyToString(&scheme_);
- host.CopyToString(&host_);
+ scheme_ = std::string(scheme);
+ host_ = std::string(host);
port_ = port;
}
SchemeHostPort::~SchemeHostPort() = default;
-bool SchemeHostPort::IsInvalid() const {
- return scheme_.empty() && host_.empty() && !port_;
+bool SchemeHostPort::IsValid() const {
+ // It suffices to just check |scheme_| for emptiness; the other fields are
+ // never present without it.
+ DCHECK(!scheme_.empty() || host_.empty());
+ DCHECK(!scheme_.empty() || port_ == 0);
+ return !scheme_.empty();
}
std::string SchemeHostPort::Serialize() const {
@@ -173,7 +211,7 @@
url::Parsed parsed;
std::string serialized = SerializeInternal(&parsed);
- if (IsInvalid())
+ if (!IsValid())
return GURL(std::move(serialized), parsed, false);
// SchemeHostPort does not have enough information to determine if an empty
@@ -191,11 +229,6 @@
return GURL(std::move(serialized), parsed, true);
}
-bool SchemeHostPort::Equals(const SchemeHostPort& other) const {
- return port_ == other.port() && scheme_ == other.scheme() &&
- host_ == other.host();
-}
-
bool SchemeHostPort::operator<(const SchemeHostPort& other) const {
return std::tie(port_, scheme_, host_) <
std::tie(other.port_, other.scheme_, other.host_);
@@ -203,7 +236,7 @@
std::string SchemeHostPort::SerializeInternal(url::Parsed* parsed) const {
std::string result;
- if (IsInvalid())
+ if (!IsValid())
return result;
// Reserve enough space for the "normal" case of scheme://host/.
@@ -221,9 +254,6 @@
result.append(host_);
}
- if (port_ == 0)
- return result;
-
// Omit the port component if the port matches with the default port
// defined for the scheme, if any.
int default_port = DefaultPortForScheme(scheme_.data(),
@@ -232,7 +262,7 @@
return result;
if (port_ != default_port) {
result.push_back(':');
- std::string port(base::UintToString(port_));
+ std::string port(base::NumberToString(port_));
parsed->port = Component(result.length(), port.length());
result.append(std::move(port));
}
@@ -240,4 +270,9 @@
return result;
}
+std::ostream& operator<<(std::ostream& out,
+ const SchemeHostPort& scheme_host_port) {
+ return out << scheme_host_port.Serialize();
+}
+
} // namespace url
diff --git a/url/scheme_host_port.h b/url/scheme_host_port.h
index 956dbcf..a98e7af 100644
--- a/url/scheme_host_port.h
+++ b/url/scheme_host_port.h
@@ -1,15 +1,16 @@
-// Copyright 2015 The Chromium Authors. All rights reserved.
+// Copyright 2015 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef URL_SCHEME_HOST_PORT_H_
#define URL_SCHEME_HOST_PORT_H_
+#include <stdint.h>
+
#include <string>
+#include "base/component_export.h"
#include "base/strings/string_piece.h"
-#include "starboard/types.h"
-#include "url/url_export.h"
class GURL;
@@ -48,9 +49,9 @@
// these constructs.
//
// * SchemeHostPort has no notion of the Origin concept (RFC 6454), and in
-// particular, it has no notion of a "unique" Origin. If you need to take
-// uniqueness into account (and, if you're making security-relevant decisions
-// then you absolutely do), please use 'url::Origin' instead.
+// particular, it has no notion of an opaque Origin. If you need to take
+// opaque origins into account (and, if you're making security-relevant
+// decisions then you absolutely do), please use 'url::Origin' instead.
//
// Usage:
//
@@ -70,8 +71,8 @@
// tuple.port(); // 443
//
// GURL url("https://example.com/");
-// tuple.Equals(url::SchemeHostPort(url)); // true
-class URL_EXPORT SchemeHostPort {
+// tuple == url::SchemeHostPort(url); // true
+class COMPONENT_EXPORT(URL) SchemeHostPort {
public:
// Creates an invalid (scheme, host, port) tuple, which represents an invalid
// or non-standard URL.
@@ -79,8 +80,8 @@
// Creates a (scheme, host, port) tuple. |host| must be a canonicalized
// A-label (that is, '☃.net' must be provided as 'xn--n3h.net'). |scheme|
- // must be a standard scheme. |port| must not be 0, unless |scheme| does not
- // support ports (e.g. 'file'). In that case, |port| must be 0.
+ // must be a standard scheme. |port| must be 0 if |scheme| does not support
+ // ports (e.g. 'file').
//
// Copies the data in |scheme| and |host|.
SchemeHostPort(base::StringPiece scheme,
@@ -110,14 +111,8 @@
// Copyable and movable.
SchemeHostPort(const SchemeHostPort&) = default;
SchemeHostPort& operator=(const SchemeHostPort&) = default;
- SchemeHostPort(SchemeHostPort&&) = default;
- SchemeHostPort& operator=(SchemeHostPort&&) = default;
-#if defined(STARBOARD)
- // Cobalt's compiler can not generate operator== by default yet.
- bool operator==(const SchemeHostPort& rhs) const {
- return scheme_ == rhs.scheme_ && host_ == rhs.host_ && port_ == rhs.port_;
- }
-#endif
+ SchemeHostPort(SchemeHostPort&&) noexcept = default;
+ SchemeHostPort& operator=(SchemeHostPort&&) noexcept = default;
~SchemeHostPort();
@@ -127,18 +122,21 @@
const std::string& host() const { return host_; }
const std::string& scheme() const { return scheme_; }
uint16_t port() const { return port_; }
- bool IsInvalid() const;
+ bool IsValid() const;
// Serializes the SchemeHostPort tuple to a canonical form.
//
// While this string form resembles the Origin serialization specified in
// Section 6.2 of RFC 6454, it is important to note that invalid
// SchemeHostPort tuples serialize to the empty string, rather than being
- // serialized as a unique Origin.
+ // serialized as would an opaque Origin.
std::string Serialize() const;
// Efficiently returns what GURL(Serialize()) would return, without needing to
- // re-parse the URL.
+ // re-parse the URL. Note: this still performs allocations to copy data into
+ // GURL, so please avoid using this method if you only need to work on
+ // schemes, hosts, or ports individually.
+ // For example, see crrev.com/c/3637099/comments/782360d0_e14757be.
GURL GetURL() const;
// Two SchemeHostPort objects are "equal" iff their schemes, hosts, and ports
@@ -146,9 +144,14 @@
//
// Note that this comparison is _not_ the same as an origin-based comparison.
// In particular, invalid SchemeHostPort objects match each other (and
- // themselves). Unique origins, on the other hand, would not.
- bool Equals(const SchemeHostPort& other) const;
-
+ // themselves). Opaque origins, on the other hand, would not.
+ bool operator==(const SchemeHostPort& other) const {
+ return port_ == other.port() && scheme_ == other.scheme() &&
+ host_ == other.host();
+ }
+ bool operator!=(const SchemeHostPort& other) const {
+ return !(*this == other);
+ }
// Allows SchemeHostPort to be used as a key in STL (for example, a std::set
// or std::map).
bool operator<(const SchemeHostPort& other) const;
@@ -158,9 +161,13 @@
std::string scheme_;
std::string host_;
- uint16_t port_;
+ uint16_t port_ = 0;
};
+COMPONENT_EXPORT(URL)
+std::ostream& operator<<(std::ostream& out,
+ const SchemeHostPort& scheme_host_port);
+
} // namespace url
#endif // URL_SCHEME_HOST_PORT_H_
diff --git a/url/scheme_host_port_unittest.cc b/url/scheme_host_port_unittest.cc
index 24c4e0f..49bcf25 100644
--- a/url/scheme_host_port_unittest.cc
+++ b/url/scheme_host_port_unittest.cc
@@ -1,12 +1,14 @@
-// Copyright 2015 The Chromium Authors. All rights reserved.
+// Copyright 2015 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#include "base/macros.h"
-#include "starboard/types.h"
+#include "url/scheme_host_port.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
#include "testing/gtest/include/gtest/gtest.h"
#include "url/gurl.h"
-#include "url/scheme_host_port.h"
#include "url/url_util.h"
namespace {
@@ -14,13 +16,14 @@
class SchemeHostPortTest : public testing::Test {
public:
SchemeHostPortTest() = default;
- ~SchemeHostPortTest() override {
- // Reset any added schemes.
- url::Shutdown();
- }
+
+ SchemeHostPortTest(const SchemeHostPortTest&) = delete;
+ SchemeHostPortTest& operator=(const SchemeHostPortTest&) = delete;
+
+ ~SchemeHostPortTest() override = default;
private:
- DISALLOW_COPY_AND_ASSIGN(SchemeHostPortTest);
+ url::ScopedSchemeRegistryForTests scoped_registry_;
};
void ExpectParsedUrlsEqual(const GURL& a, const GURL& b) {
@@ -50,12 +53,19 @@
EXPECT_EQ("", invalid.scheme());
EXPECT_EQ("", invalid.host());
EXPECT_EQ(0, invalid.port());
- EXPECT_TRUE(invalid.IsInvalid());
- EXPECT_TRUE(invalid.Equals(invalid));
+ EXPECT_FALSE(invalid.IsValid());
+ EXPECT_EQ(invalid, invalid);
const char* urls[] = {
- "data:text/html,Hello!", "javascript:alert(1)",
- "file://example.com:443/etc/passwd",
+ // about:, data:, javascript: and other no-access schemes translate into
+ // an invalid SchemeHostPort
+ "about:blank", "about:blank#ref", "about:blank?query=123", "about:srcdoc",
+ "about:srcdoc#ref", "about:srcdoc?query=123", "data:text/html,Hello!",
+ "javascript:alert(1)",
+
+ // GURLs where GURL::is_valid returns false translate into an invalid
+ // SchemeHostPort.
+ "file://example.com:443/etc/passwd", "#!^%!$!&*",
// These schemes do not follow the generic URL syntax, so make sure we
// treat them as invalid (scheme, host, port) tuples (even though such
@@ -74,10 +84,10 @@
EXPECT_EQ("", tuple.scheme());
EXPECT_EQ("", tuple.host());
EXPECT_EQ(0, tuple.port());
- EXPECT_TRUE(tuple.IsInvalid());
- EXPECT_TRUE(tuple.Equals(tuple));
- EXPECT_TRUE(tuple.Equals(invalid));
- EXPECT_TRUE(invalid.Equals(tuple));
+ EXPECT_FALSE(tuple.IsValid());
+ EXPECT_EQ(tuple, tuple);
+ EXPECT_EQ(tuple, invalid);
+ EXPECT_EQ(invalid, tuple);
ExpectParsedUrlsEqual(GURL(tuple.Serialize()), tuple.GetURL());
}
}
@@ -90,9 +100,10 @@
} cases[] = {
{"http", "example.com", 80},
{"http", "example.com", 123},
+ {"http", "example.com", 0}, // 0 is a valid port for http.
{"https", "example.com", 443},
{"https", "example.com", 123},
- {"file", "", 0},
+ {"file", "", 0}, // 0 indicates "no port" for file: scheme.
{"file", "example.com", 0},
};
@@ -103,8 +114,8 @@
EXPECT_EQ(test.scheme, tuple.scheme());
EXPECT_EQ(test.host, tuple.host());
EXPECT_EQ(test.port, tuple.port());
- EXPECT_FALSE(tuple.IsInvalid());
- EXPECT_TRUE(tuple.Equals(tuple));
+ EXPECT_TRUE(tuple.IsValid());
+ EXPECT_EQ(tuple, tuple);
ExpectParsedUrlsEqual(GURL(tuple.Serialize()), tuple.GetURL());
}
}
@@ -129,8 +140,7 @@
{"http", "example.com\rnot-example.com", 80},
{"http", "example.com\n", 80},
{"http", "example.com\r", 80},
- {"http", "example.com", 0},
- {"file", "", 80}};
+ {"file", "", 80}}; // Can''t have a port for file: scheme.
for (const auto& test : cases) {
SCOPED_TRACE(testing::Message() << test.scheme << "://" << test.host << ":"
@@ -139,8 +149,8 @@
EXPECT_EQ("", tuple.scheme());
EXPECT_EQ("", tuple.host());
EXPECT_EQ(0, tuple.port());
- EXPECT_TRUE(tuple.IsInvalid());
- EXPECT_TRUE(tuple.Equals(tuple));
+ EXPECT_FALSE(tuple.IsValid());
+ EXPECT_EQ(tuple, tuple);
ExpectParsedUrlsEqual(GURL(tuple.Serialize()), tuple.GetURL());
}
}
@@ -168,7 +178,7 @@
EXPECT_EQ("", tuple.scheme());
EXPECT_EQ("", tuple.host());
EXPECT_EQ(0, tuple.port());
- EXPECT_TRUE(tuple.IsInvalid());
+ EXPECT_FALSE(tuple.IsValid());
ExpectParsedUrlsEqual(GURL(tuple.Serialize()), tuple.GetURL());
}
}
@@ -203,8 +213,8 @@
EXPECT_EQ(test.scheme, tuple.scheme());
EXPECT_EQ(test.host, tuple.host());
EXPECT_EQ(test.port, tuple.port());
- EXPECT_FALSE(tuple.IsInvalid());
- EXPECT_TRUE(tuple.Equals(tuple));
+ EXPECT_TRUE(tuple.IsValid());
+ EXPECT_EQ(tuple, tuple);
ExpectParsedUrlsEqual(GURL(tuple.Serialize()), tuple.GetURL());
}
}
@@ -223,6 +233,7 @@
{"https://example.com:123/", "https://example.com:123"},
{"file:///etc/passwd", "file://"},
{"file://example.com/etc/passwd", "file://example.com"},
+ {"https://example.com:0/", "https://example.com:0"},
};
for (const auto& test : cases) {
@@ -251,10 +262,10 @@
{"https", "b", 81},
};
- for (size_t i = 0; i < arraysize(tuples); i++) {
+ for (size_t i = 0; i < std::size(tuples); i++) {
url::SchemeHostPort current(tuples[i].scheme, tuples[i].host,
tuples[i].port);
- for (size_t j = i; j < arraysize(tuples); j++) {
+ for (size_t j = i; j < std::size(tuples); j++) {
url::SchemeHostPort to_compare(tuples[j].scheme, tuples[j].host,
tuples[j].port);
EXPECT_EQ(i < j, current < to_compare) << i << " < " << j;
diff --git a/url/third_party/mozilla/url_parse.cc b/url/third_party/mozilla/url_parse.cc
index 278ee05..61fb94e 100644
--- a/url/third_party/mozilla/url_parse.cc
+++ b/url/third_party/mozilla/url_parse.cc
@@ -38,9 +38,9 @@
#include <stdlib.h>
-#include "base/logging.h"
-#include "starboard/common/string.h"
-#include "starboard/types.h"
+#include <ostream>
+
+#include "base/check_op.h"
#include "url/url_parse_internal.h"
#include "url/url_util.h"
#include "url/url_util_internal.h"
@@ -50,14 +50,14 @@
namespace {
// Returns true if the given character is a valid digit to use in a port.
-inline bool IsPortDigit(base::char16 ch) {
+inline bool IsPortDigit(char16_t ch) {
return ch >= '0' && ch <= '9';
}
// Returns the offset of the next authority terminator in the input starting
// from start_offset. If no terminator is found, the return value will be equal
// to spec_len.
-template<typename CHAR>
+template <typename CHAR>
int FindNextAuthorityTerminator(const CHAR* spec,
int start_offset,
int spec_len) {
@@ -68,7 +68,7 @@
return spec_len; // Not found.
}
-template<typename CHAR>
+template <typename CHAR>
void ParseUserInfo(const CHAR* spec,
const Component& user,
Component* username,
@@ -82,8 +82,7 @@
if (colon_offset < user.len) {
// Found separator: <username>:<password>
*username = Component(user.begin, colon_offset);
- *password = MakeRange(user.begin + colon_offset + 1,
- user.begin + user.len);
+ *password = MakeRange(user.begin + colon_offset + 1, user.begin + user.len);
} else {
// No separator, treat everything as the username
*username = user;
@@ -91,7 +90,7 @@
}
}
-template<typename CHAR>
+template <typename CHAR>
void ParseServerInfo(const CHAR* spec,
const Component& serverinfo,
Component* hostname,
@@ -141,7 +140,7 @@
// parts. The port number will be parsed and the resulting integer will be
// filled into the given *port variable, or -1 if there is no port number or it
// is invalid.
-template<typename CHAR>
+template <typename CHAR>
void DoParseAuthority(const CHAR* spec,
const Component& auth,
Component* username,
@@ -165,10 +164,10 @@
if (spec[i] == '@') {
// Found user info: <user-info>@<server-info>
- ParseUserInfo(spec, Component(auth.begin, i - auth.begin),
- username, password);
- ParseServerInfo(spec, MakeRange(i + 1, auth.begin + auth.len),
- hostname, port_num);
+ ParseUserInfo(spec, Component(auth.begin, i - auth.begin), username,
+ password);
+ ParseServerInfo(spec, MakeRange(i + 1, auth.begin + auth.len), hostname,
+ port_num);
} else {
// No user info, everything is server info.
username->reset();
@@ -179,30 +178,47 @@
template <typename CHAR>
inline void FindQueryAndRefParts(const CHAR* spec,
- const Component& path,
- int* query_separator,
- int* ref_separator) {
- int path_end = path.begin + path.len;
- for (int i = path.begin; i < path_end; i++) {
- switch (spec[i]) {
- case '?':
- // Only match the query string if it precedes the reference fragment
- // and when we haven't found one already.
- if (*query_separator < 0)
- *query_separator = i;
- break;
- case '#':
- // Record the first # sign only.
- if (*ref_separator < 0) {
- *ref_separator = i;
- return;
- }
- break;
+ const Component& path,
+ int* query_separator,
+ int* ref_separator) {
+ if constexpr (sizeof(*spec) == 1) {
+ // memchr is much faster than any scalar code we can write.
+ const CHAR* ptr = spec + path.begin;
+ const CHAR* first_hash =
+ reinterpret_cast<const CHAR*>(memchr(ptr, '#', path.len));
+ size_t len_before_fragment =
+ first_hash == nullptr ? path.len : first_hash - ptr;
+ const CHAR* first_question =
+ reinterpret_cast<const CHAR*>(memchr(ptr, '?', len_before_fragment));
+ if (first_hash != nullptr) {
+ *ref_separator = first_hash - spec;
+ }
+ if (first_question != nullptr) {
+ *query_separator = first_question - spec;
+ }
+ } else {
+ int path_end = path.begin + path.len;
+ for (int i = path.begin; i < path_end; i++) {
+ switch (spec[i]) {
+ case '?':
+ // Only match the query string if it precedes the reference fragment
+ // and when we haven't found one already.
+ if (*query_separator < 0)
+ *query_separator = i;
+ break;
+ case '#':
+ // Record the first # sign only.
+ if (*ref_separator < 0) {
+ *ref_separator = i;
+ return;
+ }
+ break;
+ }
}
}
}
-template<typename CHAR>
+template <typename CHAR>
void ParsePath(const CHAR* spec,
const Component& path,
Component* filepath,
@@ -217,7 +233,7 @@
ref->reset();
return;
}
- DCHECK(path.len > 0) << "We should never have 0 length paths";
+ DCHECK(path.is_nonempty()) << "We should never have 0 length paths";
// Search for first occurrence of either ? or #.
int query_separator = -1; // Index of the '?'
@@ -255,10 +271,8 @@
filepath->reset();
}
-template<typename CHAR>
-bool DoExtractScheme(const CHAR* url,
- int url_len,
- Component* scheme) {
+template <typename CHAR>
+bool DoExtractScheme(const CHAR* url, int url_len, Component* scheme) {
// Skip leading whitespace and control characters.
int begin = 0;
while (begin < url_len && ShouldTrimFromURL(url[begin]))
@@ -326,7 +340,7 @@
// The main parsing function for standard URLs. Standard URLs have a scheme,
// host, path, etc.
-template<typename CHAR>
+template <typename CHAR>
void DoParseStandardURL(const CHAR* spec, int spec_len, Parsed* parsed) {
DCHECK(spec_len >= 0);
@@ -347,7 +361,7 @@
DoParseAfterScheme(spec, spec_len, after_scheme, parsed);
}
-template<typename CHAR>
+template <typename CHAR>
void DoParseFileSystemURL(const CHAR* spec, int spec_len, Parsed* parsed) {
DCHECK(spec_len >= 0);
@@ -356,9 +370,9 @@
parsed->password.reset();
parsed->host.reset();
parsed->port.reset();
- parsed->path.reset(); // May use this; reset for convenience.
- parsed->ref.reset(); // May use this; reset for convenience.
- parsed->query.reset(); // May use this; reset for convenience.
+ parsed->path.reset(); // May use this; reset for convenience.
+ parsed->ref.reset(); // May use this; reset for convenience.
+ parsed->query.reset(); // May use this; reset for convenience.
parsed->clear_inner_parsed(); // May use this; reset for convenience.
// Strip leading & trailing spaces and control characters.
@@ -453,8 +467,7 @@
return;
}
int inner_path_end = inner_parsed.path.begin + 1; // skip the leading slash
- while (inner_path_end < spec_len &&
- !IsURLSlash(spec[inner_path_end]))
+ while (inner_path_end < spec_len && !IsURLSlash(spec[inner_path_end]))
++inner_path_end;
parsed->path.begin = inner_path_end;
int new_inner_path_length = inner_path_end - inner_parsed.path.begin;
@@ -464,8 +477,9 @@
// Initializes a path URL which is merely a scheme followed by a path. Examples
// include "about:foo" and "javascript:alert('bar');"
-template<typename CHAR>
-void DoParsePathURL(const CHAR* spec, int spec_len,
+template <typename CHAR>
+void DoParsePathURL(const CHAR* spec,
+ int spec_len,
bool trim_path_end,
Parsed* parsed) {
// Get the non-path and non-scheme parts of the URL out of the way, we never
@@ -507,14 +521,11 @@
return;
DCHECK_LT(path_begin, spec_len);
- ParsePath(spec,
- MakeRange(path_begin, spec_len),
- &parsed->path,
- &parsed->query,
- &parsed->ref);
+ ParsePath(spec, MakeRange(path_begin, spec_len), &parsed->path,
+ &parsed->query, &parsed->ref);
}
-template<typename CHAR>
+template <typename CHAR>
void DoParseMailtoURL(const CHAR* spec, int spec_len, Parsed* parsed) {
DCHECK(spec_len >= 0);
@@ -580,11 +591,11 @@
// sscanf but our input is not NULL-terminated, which sscanf requires. Instead,
// we copy the digits to a small stack buffer (since we know the maximum number
// of digits in a valid port number) that we can NULL terminate.
-template<typename CHAR>
+template <typename CHAR>
int DoParsePort(const CHAR* spec, const Component& component) {
// Easy success case when there is no port.
const int kMaxDigits = 5;
- if (!component.is_nonempty())
+ if (component.is_empty())
return PORT_UNSPECIFIED;
// Skip over any leading 0s.
@@ -623,12 +634,12 @@
return port;
}
-template<typename CHAR>
+template <typename CHAR>
void DoExtractFileName(const CHAR* spec,
const Component& path,
Component* file_name) {
// Handle empty paths: they have no file names.
- if (!path.is_nonempty()) {
+ if (path.is_empty()) {
file_name->reset();
return;
}
@@ -652,7 +663,7 @@
return;
}
-template<typename CHAR>
+template <typename CHAR>
bool DoExtractQueryKeyValue(const CHAR* spec,
Component* query,
Component* key,
@@ -692,6 +703,11 @@
} // namespace
+COMPONENT_EXPORT(URL)
+std::ostream& operator<<(std::ostream& os, const Component& component) {
+ return os << '{' << component.begin << ", " << component.len << "}";
+}
+
Parsed::Parsed() : potentially_dangling_markup(false), inner_parsed_(NULL) {}
Parsed::Parsed(const Parsed& other)
@@ -814,13 +830,13 @@
return DoExtractScheme(url, url_len, scheme);
}
-bool ExtractScheme(const base::char16* url, int url_len, Component* scheme) {
+bool ExtractScheme(const char16_t* url, int url_len, Component* scheme) {
return DoExtractScheme(url, url_len, scheme);
}
// This handles everything that may be an authority terminator, including
// backslash. For special backslash handling see DoParseAfterScheme.
-bool IsAuthorityTerminator(base::char16 ch) {
+bool IsAuthorityTerminator(char16_t ch) {
return IsURLSlash(ch) || ch == '?' || ch == '#';
}
@@ -830,7 +846,7 @@
DoExtractFileName(url, path, file_name);
}
-void ExtractFileName(const base::char16* url,
+void ExtractFileName(const char16_t* url,
const Component& path,
Component* file_name) {
DoExtractFileName(url, path, file_name);
@@ -843,7 +859,7 @@
return DoExtractQueryKeyValue(url, query, key, value);
}
-bool ExtractQueryKeyValue(const base::char16* url,
+bool ExtractQueryKeyValue(const char16_t* url,
Component* query,
Component* key,
Component* value) {
@@ -859,7 +875,7 @@
DoParseAuthority(spec, auth, username, password, hostname, port_num);
}
-void ParseAuthority(const base::char16* spec,
+void ParseAuthority(const char16_t* spec,
const Component& auth,
Component* username,
Component* password,
@@ -872,7 +888,7 @@
return DoParsePort(url, port);
}
-int ParsePort(const base::char16* url, const Component& port) {
+int ParsePort(const char16_t* url, const Component& port) {
return DoParsePort(url, port);
}
@@ -880,7 +896,7 @@
DoParseStandardURL(url, url_len, parsed);
}
-void ParseStandardURL(const base::char16* url, int url_len, Parsed* parsed) {
+void ParseStandardURL(const char16_t* url, int url_len, Parsed* parsed) {
DoParseStandardURL(url, url_len, parsed);
}
@@ -891,7 +907,7 @@
DoParsePathURL(url, url_len, trim_path_end, parsed);
}
-void ParsePathURL(const base::char16* url,
+void ParsePathURL(const char16_t* url,
int url_len,
bool trim_path_end,
Parsed* parsed) {
@@ -902,7 +918,7 @@
DoParseFileSystemURL(url, url_len, parsed);
}
-void ParseFileSystemURL(const base::char16* url, int url_len, Parsed* parsed) {
+void ParseFileSystemURL(const char16_t* url, int url_len, Parsed* parsed) {
DoParseFileSystemURL(url, url_len, parsed);
}
@@ -910,7 +926,7 @@
DoParseMailtoURL(url, url_len, parsed);
}
-void ParseMailtoURL(const base::char16* url, int url_len, Parsed* parsed) {
+void ParseMailtoURL(const char16_t* url, int url_len, Parsed* parsed) {
DoParseMailtoURL(url, url_len, parsed);
}
@@ -922,7 +938,7 @@
ParsePath(spec, path, filepath, query, ref);
}
-void ParsePathInternal(const base::char16* spec,
+void ParsePathInternal(const char16_t* spec,
const Component& path,
Component* filepath,
Component* query,
@@ -937,7 +953,7 @@
DoParseAfterScheme(spec, spec_len, after_scheme, parsed);
}
-void ParseAfterScheme(const base::char16* spec,
+void ParseAfterScheme(const char16_t* spec,
int spec_len,
int after_scheme,
Parsed* parsed) {
diff --git a/url/third_party/mozilla/url_parse.h b/url/third_party/mozilla/url_parse.h
index 6d40d3f..9e824ba 100644
--- a/url/third_party/mozilla/url_parse.h
+++ b/url/third_party/mozilla/url_parse.h
@@ -1,12 +1,13 @@
-// Copyright 2013 The Chromium Authors. All rights reserved.
+// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef URL_THIRD_PARTY_MOZILLA_URL_PARSE_H_
#define URL_THIRD_PARTY_MOZILLA_URL_PARSE_H_
-#include "base/strings/string16.h"
-#include "url/url_export.h"
+#include <iosfwd>
+
+#include "base/component_export.h"
namespace url {
@@ -23,17 +24,14 @@
return begin + len;
}
- // Returns true if this component is valid, meaning the length is given. Even
- // valid components may be empty to record the fact that they exist.
- bool is_valid() const {
- return (len != -1);
- }
+ // Returns true if this component is valid, meaning the length is given.
+ // Valid components may be empty to record the fact that they exist.
+ bool is_valid() const { return len >= 0; }
- // Returns true if the given component is specified on false, the component
- // is either empty or invalid.
- bool is_nonempty() const {
- return (len > 0);
- }
+ // Determine if the component is empty or not. Empty means the length is
+ // zero or the component is invalid.
+ bool is_empty() const { return len <= 0; }
+ bool is_nonempty() const { return len > 0; }
void reset() {
begin = 0;
@@ -48,6 +46,10 @@
int len; // Will be -1 if the component is unspecified.
};
+// Permit printing Components by CHECK macros.
+COMPONENT_EXPORT(URL)
+std::ostream& operator<<(std::ostream& os, const Component& component);
+
// Helper that returns a component created with the given begin and ending
// points. The ending point is non-inclusive.
inline Component MakeRange(int begin, int end) {
@@ -74,7 +76,7 @@
// else
// ParsePathURL(url, url_len, &parsed);
//
-struct URL_EXPORT Parsed {
+struct COMPONENT_EXPORT(URL) Parsed {
// Identifies different components.
enum ComponentType {
SCHEME,
@@ -202,7 +204,7 @@
void clear_inner_parsed() {
if (inner_parsed_) {
delete inner_parsed_;
- inner_parsed_ = NULL;
+ inner_parsed_ = nullptr;
}
}
@@ -227,46 +229,44 @@
// StandardURL is for when the scheme is known to be one that has an
// authority (host) like "http". This function will not handle weird ones
// like "about:" and "javascript:", or do the right thing for "file:" URLs.
-URL_EXPORT void ParseStandardURL(const char* url,
- int url_len,
- Parsed* parsed);
-URL_EXPORT void ParseStandardURL(const base::char16* url,
- int url_len,
- Parsed* parsed);
+COMPONENT_EXPORT(URL)
+void ParseStandardURL(const char* url, int url_len, Parsed* parsed);
+COMPONENT_EXPORT(URL)
+void ParseStandardURL(const char16_t* url, int url_len, Parsed* parsed);
// PathURL is for when the scheme is known not to have an authority (host)
// section but that aren't file URLs either. The scheme is parsed, and
// everything after the scheme is considered as the path. This is used for
// things like "about:" and "javascript:"
-URL_EXPORT void ParsePathURL(const char* url,
- int url_len,
- bool trim_path_end,
- Parsed* parsed);
-URL_EXPORT void ParsePathURL(const base::char16* url,
- int url_len,
- bool trim_path_end,
- Parsed* parsed);
+COMPONENT_EXPORT(URL)
+void ParsePathURL(const char* url,
+ int url_len,
+ bool trim_path_end,
+ Parsed* parsed);
+COMPONENT_EXPORT(URL)
+void ParsePathURL(const char16_t* url,
+ int url_len,
+ bool trim_path_end,
+ Parsed* parsed);
// FileURL is for file URLs. There are some special rules for interpreting
// these.
-URL_EXPORT void ParseFileURL(const char* url, int url_len, Parsed* parsed);
-URL_EXPORT void ParseFileURL(const base::char16* url,
- int url_len,
- Parsed* parsed);
+COMPONENT_EXPORT(URL)
+void ParseFileURL(const char* url, int url_len, Parsed* parsed);
+COMPONENT_EXPORT(URL)
+void ParseFileURL(const char16_t* url, int url_len, Parsed* parsed);
// Filesystem URLs are structured differently than other URLs.
-URL_EXPORT void ParseFileSystemURL(const char* url,
- int url_len,
- Parsed* parsed);
-URL_EXPORT void ParseFileSystemURL(const base::char16* url,
- int url_len,
- Parsed* parsed);
+COMPONENT_EXPORT(URL)
+void ParseFileSystemURL(const char* url, int url_len, Parsed* parsed);
+COMPONENT_EXPORT(URL)
+void ParseFileSystemURL(const char16_t* url, int url_len, Parsed* parsed);
// MailtoURL is for mailto: urls. They are made up scheme,path,query
-URL_EXPORT void ParseMailtoURL(const char* url, int url_len, Parsed* parsed);
-URL_EXPORT void ParseMailtoURL(const base::char16* url,
- int url_len,
- Parsed* parsed);
+COMPONENT_EXPORT(URL)
+void ParseMailtoURL(const char* url, int url_len, Parsed* parsed);
+COMPONENT_EXPORT(URL)
+void ParseMailtoURL(const char16_t* url, int url_len, Parsed* parsed);
// Helper functions -----------------------------------------------------------
@@ -290,31 +290,31 @@
// end of the string).
//
// The 8-bit version requires UTF-8 encoding.
-URL_EXPORT bool ExtractScheme(const char* url,
- int url_len,
- Component* scheme);
-URL_EXPORT bool ExtractScheme(const base::char16* url,
- int url_len,
- Component* scheme);
+COMPONENT_EXPORT(URL)
+bool ExtractScheme(const char* url, int url_len, Component* scheme);
+COMPONENT_EXPORT(URL)
+bool ExtractScheme(const char16_t* url, int url_len, Component* scheme);
// Returns true if ch is a character that terminates the authority segment
// of a URL.
-URL_EXPORT bool IsAuthorityTerminator(base::char16 ch);
+COMPONENT_EXPORT(URL) bool IsAuthorityTerminator(char16_t ch);
// Does a best effort parse of input |spec|, in range |auth|. If a particular
// component is not found, it will be set to invalid.
-URL_EXPORT void ParseAuthority(const char* spec,
- const Component& auth,
- Component* username,
- Component* password,
- Component* hostname,
- Component* port_num);
-URL_EXPORT void ParseAuthority(const base::char16* spec,
- const Component& auth,
- Component* username,
- Component* password,
- Component* hostname,
- Component* port_num);
+COMPONENT_EXPORT(URL)
+void ParseAuthority(const char* spec,
+ const Component& auth,
+ Component* username,
+ Component* password,
+ Component* hostname,
+ Component* port_num);
+COMPONENT_EXPORT(URL)
+void ParseAuthority(const char16_t* spec,
+ const Component& auth,
+ Component* username,
+ Component* password,
+ Component* hostname,
+ Component* port_num);
// Computes the integer port value from the given port component. The port
// component should have been identified by one of the init functions on
@@ -323,8 +323,9 @@
// The return value will be a positive integer between 0 and 64K, or one of
// the two special values below.
enum SpecialPort { PORT_UNSPECIFIED = -1, PORT_INVALID = -2 };
-URL_EXPORT int ParsePort(const char* url, const Component& port);
-URL_EXPORT int ParsePort(const base::char16* url, const Component& port);
+COMPONENT_EXPORT(URL) int ParsePort(const char* url, const Component& port);
+COMPONENT_EXPORT(URL)
+int ParsePort(const char16_t* url, const Component& port);
// Extracts the range of the file name in the given url. The path must
// already have been computed by the parse function, and the matching URL
@@ -336,12 +337,14 @@
// following the last slash.
//
// The 8-bit version requires UTF-8 encoding.
-URL_EXPORT void ExtractFileName(const char* url,
- const Component& path,
- Component* file_name);
-URL_EXPORT void ExtractFileName(const base::char16* url,
- const Component& path,
- Component* file_name);
+COMPONENT_EXPORT(URL)
+void ExtractFileName(const char* url,
+ const Component& path,
+ Component* file_name);
+COMPONENT_EXPORT(URL)
+void ExtractFileName(const char16_t* url,
+ const Component& path,
+ Component* file_name);
// Extract the first key/value from the range defined by |*query|. Updates
// |*query| to start at the end of the extracted key/value pair. This is
@@ -358,14 +361,16 @@
//
// If no key/value are found |*key| and |*value| will be unchanged and it will
// return false.
-URL_EXPORT bool ExtractQueryKeyValue(const char* url,
- Component* query,
- Component* key,
- Component* value);
-URL_EXPORT bool ExtractQueryKeyValue(const base::char16* url,
- Component* query,
- Component* key,
- Component* value);
+COMPONENT_EXPORT(URL)
+bool ExtractQueryKeyValue(const char* url,
+ Component* query,
+ Component* key,
+ Component* value);
+COMPONENT_EXPORT(URL)
+bool ExtractQueryKeyValue(const char16_t* url,
+ Component* query,
+ Component* key,
+ Component* value);
} // namespace url
diff --git a/url/url_canon.cc b/url/url_canon.cc
index d51a317..bbacaa7 100644
--- a/url/url_canon.cc
+++ b/url/url_canon.cc
@@ -1,12 +1,15 @@
-// Copyright 2017 The Chromium Authors. All rights reserved.
+// Copyright 2017 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "url/url_canon.h"
+#include "base/component_export.h"
+
namespace url {
-template class EXPORT_TEMPLATE_DEFINE(URL_EXPORT) CanonOutputT<char>;
-template class EXPORT_TEMPLATE_DEFINE(URL_EXPORT) CanonOutputT<base::char16>;
+template class EXPORT_TEMPLATE_DEFINE(COMPONENT_EXPORT(URL)) CanonOutputT<char>;
+template class EXPORT_TEMPLATE_DEFINE(COMPONENT_EXPORT(URL))
+ CanonOutputT<char16_t>;
} // namespace url
diff --git a/url/url_canon.h b/url/url_canon.h
index 626196d..94b4442 100644
--- a/url/url_canon.h
+++ b/url/url_canon.h
@@ -1,4 +1,4 @@
-// Copyright 2013 The Chromium Authors. All rights reserved.
+// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@@ -8,12 +8,11 @@
#include <stdlib.h>
#include <string.h>
+#include "base/component_export.h"
#include "base/export_template.h"
-#include "base/strings/string16.h"
-#include "starboard/memory.h"
-#include "starboard/types.h"
+#include "base/memory/raw_ptr_exclusion.h"
+#include "base/numerics/clamped_math.h"
#include "url/third_party/mozilla/url_parse.h"
-#include "url/url_export.h"
namespace url {
@@ -27,56 +26,42 @@
// resize function that is called when the existing buffer is not big enough.
// The derived class is then in charge of setting up our buffer which we will
// manage.
-template<typename T>
+template <typename T>
class CanonOutputT {
public:
- CanonOutputT() : buffer_(NULL), buffer_len_(0), cur_len_(0) {
- }
- virtual ~CanonOutputT() {
- }
+ CanonOutputT() = default;
+ virtual ~CanonOutputT() = default;
// Implemented to resize the buffer. This function should update the buffer
// pointer to point to the new buffer, and any old data up to |cur_len_| in
// the buffer must be copied over.
//
// The new size |sz| must be larger than buffer_len_.
- virtual void Resize(int sz) = 0;
+ virtual void Resize(size_t sz) = 0;
// Accessor for returning a character at a given position. The input offset
// must be in the valid range.
- inline T at(int offset) const {
- return buffer_[offset];
- }
+ inline T at(size_t offset) const { return buffer_[offset]; }
// Sets the character at the given position. The given position MUST be less
// than the length().
- inline void set(int offset, T ch) {
- buffer_[offset] = ch;
- }
+ inline void set(size_t offset, T ch) { buffer_[offset] = ch; }
// Returns the number of characters currently in the buffer.
- inline int length() const {
- return cur_len_;
- }
+ inline size_t length() const { return cur_len_; }
// Returns the current capacity of the buffer. The length() is the number of
// characters that have been declared to be written, but the capacity() is
// the number that can be written without reallocation. If the caller must
// write many characters at once, it can make sure there is enough capacity,
// write the data, then use set_size() to declare the new length().
- int capacity() const {
- return buffer_len_;
- }
+ size_t capacity() const { return buffer_len_; }
// Called by the user of this class to get the output. The output will NOT
// be NULL-terminated. Call length() to get the
// length.
- const T* data() const {
- return buffer_;
- }
- T* data() {
- return buffer_;
- }
+ const T* data() const { return buffer_; }
+ T* data() { return buffer_; }
// Shortens the URL to the new length. Used for "backing up" when processing
// relative paths. This can also be used if an external function writes a lot
@@ -84,9 +69,7 @@
// to declare the new length.
//
// This MUST NOT be used to expand the size of the buffer beyond capacity().
- void set_length(int new_len) {
- cur_len_ = new_len;
- }
+ void set_length(size_t new_len) { cur_len_ = new_len; }
// This is the most performance critical function, since it is called for
// every character.
@@ -110,28 +93,27 @@
}
// Appends the given string to the output.
- void Append(const T* str, int str_len) {
- if (cur_len_ + str_len > buffer_len_) {
- if (!Grow(cur_len_ + str_len - buffer_len_))
+ void Append(const T* str, size_t str_len) {
+ if (str_len > buffer_len_ - cur_len_) {
+ if (!Grow(str_len - (buffer_len_ - cur_len_)))
return;
}
- for (int i = 0; i < str_len; i++)
- buffer_[cur_len_ + i] = str[i];
+ memcpy(buffer_ + cur_len_, str, str_len * sizeof(T));
cur_len_ += str_len;
}
- void ReserveSizeIfNeeded(int estimated_size) {
+ void ReserveSizeIfNeeded(size_t estimated_size) {
// Reserve a bit extra to account for escaped chars.
if (estimated_size > buffer_len_)
- Resize(estimated_size + 8);
+ Resize((base::ClampedNumeric<size_t>(estimated_size) + 8).RawValue());
}
protected:
// Grows the given buffer so that it can fit at least |min_additional|
// characters. Returns true if the buffer could be resized, false on OOM.
- bool Grow(int min_additional) {
- static const int kMinBufferLen = 16;
- int new_len = (buffer_len_ == 0) ? kMinBufferLen : buffer_len_;
+ bool Grow(size_t min_additional) {
+ static const size_t kMinBufferLen = 16;
+ size_t new_len = (buffer_len_ == 0) ? kMinBufferLen : buffer_len_;
do {
if (new_len >= (1 << 30)) // Prevent overflow below.
return false;
@@ -141,17 +123,19 @@
return true;
}
- T* buffer_;
- int buffer_len_;
+ // `buffer_` is not a raw_ptr<...> for performance reasons (based on analysis
+ // of sampling profiler data).
+ RAW_PTR_EXCLUSION T* buffer_ = nullptr;
+ size_t buffer_len_ = 0;
// Used characters in the buffer.
- int cur_len_;
+ size_t cur_len_ = 0;
};
// Simple implementation of the CanonOutput using new[]. This class
// also supports a static buffer so if it is allocated on the stack, most
// URLs can be canonicalized with no heap allocations.
-template<typename T, int fixed_capacity = 1024>
+template <typename T, int fixed_capacity = 1024>
class RawCanonOutputT : public CanonOutputT<T> {
public:
RawCanonOutputT() : CanonOutputT<T>() {
@@ -163,7 +147,7 @@
delete[] this->buffer_;
}
- void Resize(int sz) override {
+ void Resize(size_t sz) override {
T* new_buf = new T[sz];
memcpy(new_buf, this->buffer_,
sizeof(T) * (this->cur_len_ < sz ? this->cur_len_ : sz));
@@ -178,20 +162,21 @@
};
// Explicitely instantiate commonly used instatiations.
-extern template class EXPORT_TEMPLATE_DECLARE(URL_EXPORT) CanonOutputT<char>;
-extern template class EXPORT_TEMPLATE_DECLARE(URL_EXPORT)
- CanonOutputT<base::char16>;
+extern template class EXPORT_TEMPLATE_DECLARE(COMPONENT_EXPORT(URL))
+ CanonOutputT<char>;
+extern template class EXPORT_TEMPLATE_DECLARE(COMPONENT_EXPORT(URL))
+ CanonOutputT<char16_t>;
// Normally, all canonicalization output is in narrow characters. We support
// the templates so it can also be used internally if a wide buffer is
// required.
typedef CanonOutputT<char> CanonOutput;
-typedef CanonOutputT<base::char16> CanonOutputW;
+typedef CanonOutputT<char16_t> CanonOutputW;
-template<int fixed_capacity>
+template <int fixed_capacity>
class RawCanonOutput : public RawCanonOutputT<char, fixed_capacity> {};
-template<int fixed_capacity>
-class RawCanonOutputW : public RawCanonOutputT<base::char16, fixed_capacity> {};
+template <int fixed_capacity>
+class RawCanonOutputW : public RawCanonOutputT<char16_t, fixed_capacity> {};
// Character set converter ----------------------------------------------------
//
@@ -201,7 +186,7 @@
//
// Embedders will want to see the unit test for the ICU version.
-class URL_EXPORT CharsetConverter {
+class COMPONENT_EXPORT(URL) CharsetConverter {
public:
CharsetConverter() {}
virtual ~CharsetConverter() {}
@@ -217,7 +202,7 @@
// decimal, (such as "你") with escaping of the ampersand, number
// sign, and semicolon (in the previous example it would be
// "%26%2320320%3B"). This rule is based on what IE does in this situation.
- virtual void ConvertFromUTF16(const base::char16* input,
+ virtual void ConvertFromUTF16(const char16_t* input,
int input_len,
CanonOutput* output) = 0;
};
@@ -268,17 +253,18 @@
// If |input| contained both removable whitespace and a raw `<` character,
// |potentially_dangling_markup| will be set to `true`. Otherwise, it will be
// left untouched.
-URL_EXPORT const char* RemoveURLWhitespace(const char* input,
- int input_len,
- CanonOutputT<char>* buffer,
- int* output_len,
- bool* potentially_dangling_markup);
-URL_EXPORT const base::char16* RemoveURLWhitespace(
- const base::char16* input,
- int input_len,
- CanonOutputT<base::char16>* buffer,
- int* output_len,
- bool* potentially_dangling_markup);
+COMPONENT_EXPORT(URL)
+const char* RemoveURLWhitespace(const char* input,
+ int input_len,
+ CanonOutputT<char>* buffer,
+ int* output_len,
+ bool* potentially_dangling_markup);
+COMPONENT_EXPORT(URL)
+const char16_t* RemoveURLWhitespace(const char16_t* input,
+ int input_len,
+ CanonOutputT<char16_t>* buffer,
+ int* output_len,
+ bool* potentially_dangling_markup);
// IDN ------------------------------------------------------------------------
@@ -291,14 +277,13 @@
// the length of the output will be set to the length of the new host name.
//
// On error, returns false. The output in this case is undefined.
-URL_EXPORT bool IDNToASCII(const base::char16* src,
- int src_len,
- CanonOutputW* output);
+COMPONENT_EXPORT(URL)
+bool IDNToASCII(const char16_t* src, int src_len, CanonOutputW* output);
// Piece-by-piece canonicalizers ----------------------------------------------
//
// These individual canonicalizers append the canonicalized versions of the
-// corresponding URL component to the given std::string. The spec and the
+// corresponding URL component to the given CanonOutput. The spec and the
// previously-identified range of that component are the input. The range of
// the canonicalized component will be written to the output component.
//
@@ -319,14 +304,16 @@
// URLs.
//
// The 8-bit version requires UTF-8 encoding.
-URL_EXPORT bool CanonicalizeScheme(const char* spec,
- const Component& scheme,
- CanonOutput* output,
- Component* out_scheme);
-URL_EXPORT bool CanonicalizeScheme(const base::char16* spec,
- const Component& scheme,
- CanonOutput* output,
- Component* out_scheme);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeScheme(const char* spec,
+ const Component& scheme,
+ CanonOutput* output,
+ Component* out_scheme);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeScheme(const char16_t* spec,
+ const Component& scheme,
+ CanonOutput* output,
+ Component* out_scheme);
// User info: username/password. If present, this will add the delimiters so
// the output will be "<username>:<password>@" or "<username>@". Empty
@@ -338,20 +325,22 @@
// is legal as long as the two components don't overlap.
//
// The 8-bit version requires UTF-8 encoding.
-URL_EXPORT bool CanonicalizeUserInfo(const char* username_source,
- const Component& username,
- const char* password_source,
- const Component& password,
- CanonOutput* output,
- Component* out_username,
- Component* out_password);
-URL_EXPORT bool CanonicalizeUserInfo(const base::char16* username_source,
- const Component& username,
- const base::char16* password_source,
- const Component& password,
- CanonOutput* output,
- Component* out_username,
- Component* out_password);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeUserInfo(const char* username_source,
+ const Component& username,
+ const char* password_source,
+ const Component& password,
+ CanonOutput* output,
+ Component* out_username,
+ Component* out_password);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeUserInfo(const char16_t* username_source,
+ const Component& username,
+ const char16_t* password_source,
+ const Component& password,
+ CanonOutput* output,
+ Component* out_username,
+ Component* out_password);
// This structure holds detailed state exported from the IP/Host canonicalizers.
// Additional fields may be added as callers require them.
@@ -363,16 +352,16 @@
// This field summarizes how the input was classified by the canonicalizer.
enum Family {
- NEUTRAL, // - Doesn't resemble an IP address. As far as the IP
- // canonicalizer is concerned, it should be treated as a
- // hostname.
- BROKEN, // - Almost an IP, but was not canonicalized. This could be an
- // IPv4 address where truncation occurred, or something
- // containing the special characters :[] which did not parse
- // as an IPv6 address. Never attempt to connect to this
- // address, because it might actually succeed!
- IPV4, // - Successfully canonicalized as an IPv4 address.
- IPV6, // - Successfully canonicalized as an IPv6 address.
+ NEUTRAL, // - Doesn't resemble an IP address. As far as the IP
+ // canonicalizer is concerned, it should be treated as a
+ // hostname.
+ BROKEN, // - Almost an IP, but was not canonicalized. This could be an
+ // IPv4 address where truncation occurred, or something
+ // containing the special characters :[] which did not parse
+ // as an IPv6 address. Never attempt to connect to this
+ // address, because it might actually succeed!
+ IPV4, // - Successfully canonicalized as an IPv4 address.
+ IPV6, // - Successfully canonicalized as an IPv6 address.
};
Family family;
@@ -398,32 +387,35 @@
}
};
-
// Host.
//
// The 8-bit version requires UTF-8 encoding. Use this version when you only
// need to know whether canonicalization succeeded.
-URL_EXPORT bool CanonicalizeHost(const char* spec,
- const Component& host,
- CanonOutput* output,
- Component* out_host);
-URL_EXPORT bool CanonicalizeHost(const base::char16* spec,
- const Component& host,
- CanonOutput* output,
- Component* out_host);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeHost(const char* spec,
+ const Component& host,
+ CanonOutput* output,
+ Component* out_host);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeHost(const char16_t* spec,
+ const Component& host,
+ CanonOutput* output,
+ Component* out_host);
// Extended version of CanonicalizeHost, which returns additional information.
// Use this when you need to know whether the hostname was an IP address.
// A successful return is indicated by host_info->family != BROKEN. See the
// definition of CanonHostInfo above for details.
-URL_EXPORT void CanonicalizeHostVerbose(const char* spec,
- const Component& host,
- CanonOutput* output,
- CanonHostInfo* host_info);
-URL_EXPORT void CanonicalizeHostVerbose(const base::char16* spec,
- const Component& host,
- CanonOutput* output,
- CanonHostInfo* host_info);
+COMPONENT_EXPORT(URL)
+void CanonicalizeHostVerbose(const char* spec,
+ const Component& host,
+ CanonOutput* output,
+ CanonHostInfo* host_info);
+COMPONENT_EXPORT(URL)
+void CanonicalizeHostVerbose(const char16_t* spec,
+ const Component& host,
+ CanonOutput* output,
+ CanonHostInfo* host_info);
// Canonicalizes a string according to the host canonicalization rules. Unlike
// CanonicalizeHost, this will not check for IP addresses which can change the
@@ -445,12 +437,14 @@
// Returns true if the host was valid. This function will treat a 0-length
// host as valid (because it's designed to be used for substrings) while the
// full version above will mark empty hosts as broken.
-URL_EXPORT bool CanonicalizeHostSubstring(const char* spec,
- const Component& host,
- CanonOutput* output);
-URL_EXPORT bool CanonicalizeHostSubstring(const base::char16* spec,
- const Component& host,
- CanonOutput* output);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeHostSubstring(const char* spec,
+ const Component& host,
+ CanonOutput* output);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeHostSubstring(const char16_t* spec,
+ const Component& host,
+ CanonOutput* output);
// IP addresses.
//
@@ -462,34 +456,39 @@
// This is called AUTOMATICALLY from the host canonicalizer, which ensures that
// the input is unescaped and name-prepped, etc. It should not normally be
// necessary or wise to call this directly.
-URL_EXPORT void CanonicalizeIPAddress(const char* spec,
- const Component& host,
- CanonOutput* output,
- CanonHostInfo* host_info);
-URL_EXPORT void CanonicalizeIPAddress(const base::char16* spec,
- const Component& host,
- CanonOutput* output,
- CanonHostInfo* host_info);
+COMPONENT_EXPORT(URL)
+void CanonicalizeIPAddress(const char* spec,
+ const Component& host,
+ CanonOutput* output,
+ CanonHostInfo* host_info);
+COMPONENT_EXPORT(URL)
+void CanonicalizeIPAddress(const char16_t* spec,
+ const Component& host,
+ CanonOutput* output,
+ CanonHostInfo* host_info);
// Port: this function will add the colon for the port if a port is present.
// The caller can pass PORT_UNSPECIFIED as the
// default_port_for_scheme argument if there is no default port.
//
// The 8-bit version requires UTF-8 encoding.
-URL_EXPORT bool CanonicalizePort(const char* spec,
- const Component& port,
- int default_port_for_scheme,
- CanonOutput* output,
- Component* out_port);
-URL_EXPORT bool CanonicalizePort(const base::char16* spec,
- const Component& port,
- int default_port_for_scheme,
- CanonOutput* output,
- Component* out_port);
+COMPONENT_EXPORT(URL)
+bool CanonicalizePort(const char* spec,
+ const Component& port,
+ int default_port_for_scheme,
+ CanonOutput* output,
+ Component* out_port);
+COMPONENT_EXPORT(URL)
+bool CanonicalizePort(const char16_t* spec,
+ const Component& port,
+ int default_port_for_scheme,
+ CanonOutput* output,
+ Component* out_port);
// Returns the default port for the given canonical scheme, or PORT_UNSPECIFIED
-// if the scheme is unknown.
-URL_EXPORT int DefaultPortForScheme(const char* scheme, int scheme_len);
+// if the scheme is unknown. Based on https://url.spec.whatwg.org/#default-port
+COMPONENT_EXPORT(URL)
+int DefaultPortForScheme(const char* scheme, int scheme_len);
// Path. If the input does not begin in a slash (including if the input is
// empty), we'll prepend a slash to the path to make it canonical.
@@ -500,14 +499,29 @@
// an issue. Somebody giving us an 8-bit path is responsible for generating
// the path that the server expects (we'll escape high-bit characters), so
// if something is invalid, it's their problem.
-URL_EXPORT bool CanonicalizePath(const char* spec,
- const Component& path,
- CanonOutput* output,
- Component* out_path);
-URL_EXPORT bool CanonicalizePath(const base::char16* spec,
- const Component& path,
- CanonOutput* output,
- Component* out_path);
+COMPONENT_EXPORT(URL)
+bool CanonicalizePath(const char* spec,
+ const Component& path,
+ CanonOutput* output,
+ Component* out_path);
+COMPONENT_EXPORT(URL)
+bool CanonicalizePath(const char16_t* spec,
+ const Component& path,
+ CanonOutput* output,
+ Component* out_path);
+
+// Like CanonicalizePath(), but does not assume that its operating on the
+// entire path. It therefore does not prepend a slash, etc.
+COMPONENT_EXPORT(URL)
+bool CanonicalizePartialPath(const char* spec,
+ const Component& path,
+ CanonOutput* output,
+ Component* out_path);
+COMPONENT_EXPORT(URL)
+bool CanonicalizePartialPath(const char16_t* spec,
+ const Component& path,
+ CanonOutput* output,
+ Component* out_path);
// Canonicalizes the input as a file path. This is like CanonicalizePath except
// that it also handles Windows drive specs. For example, the path can begin
@@ -515,14 +529,16 @@
// The string will be appended to |*output| and |*out_path| will be updated.
//
// The 8-bit version requires UTF-8 encoding.
-URL_EXPORT bool FileCanonicalizePath(const char* spec,
- const Component& path,
- CanonOutput* output,
- Component* out_path);
-URL_EXPORT bool FileCanonicalizePath(const base::char16* spec,
- const Component& path,
- CanonOutput* output,
- Component* out_path);
+COMPONENT_EXPORT(URL)
+bool FileCanonicalizePath(const char* spec,
+ const Component& path,
+ CanonOutput* output,
+ Component* out_path);
+COMPONENT_EXPORT(URL)
+bool FileCanonicalizePath(const char16_t* spec,
+ const Component& path,
+ CanonOutput* output,
+ Component* out_path);
// Query: Prepends the ? if needed.
//
@@ -536,16 +552,18 @@
// if necessary, for ASCII input, no conversions are necessary.
//
// The converter can be NULL. In this case, the output encoding will be UTF-8.
-URL_EXPORT void CanonicalizeQuery(const char* spec,
- const Component& query,
- CharsetConverter* converter,
- CanonOutput* output,
- Component* out_query);
-URL_EXPORT void CanonicalizeQuery(const base::char16* spec,
- const Component& query,
- CharsetConverter* converter,
- CanonOutput* output,
- Component* out_query);
+COMPONENT_EXPORT(URL)
+void CanonicalizeQuery(const char* spec,
+ const Component& query,
+ CharsetConverter* converter,
+ CanonOutput* output,
+ Component* out_query);
+COMPONENT_EXPORT(URL)
+void CanonicalizeQuery(const char16_t* spec,
+ const Component& query,
+ CharsetConverter* converter,
+ CanonOutput* output,
+ Component* out_query);
// Ref: Prepends the # if needed. The output will be UTF-8 (this is the only
// canonicalizer that does not produce ASCII output). The output is
@@ -553,14 +571,16 @@
//
// This function will not fail. If the input is invalid UTF-8/UTF-16, we'll use
// the "Unicode replacement character" for the confusing bits and copy the rest.
-URL_EXPORT void CanonicalizeRef(const char* spec,
- const Component& path,
- CanonOutput* output,
- Component* out_path);
-URL_EXPORT void CanonicalizeRef(const base::char16* spec,
- const Component& path,
- CanonOutput* output,
- Component* out_path);
+COMPONENT_EXPORT(URL)
+void CanonicalizeRef(const char* spec,
+ const Component& path,
+ CanonOutput* output,
+ Component* out_path);
+COMPONENT_EXPORT(URL)
+void CanonicalizeRef(const char16_t* spec,
+ const Component& path,
+ CanonOutput* output,
+ Component* out_path);
// Full canonicalizer ---------------------------------------------------------
//
@@ -573,77 +593,100 @@
// The 8-bit versions require UTF-8 encoding.
// Use for standard URLs with authorities and paths.
-URL_EXPORT bool CanonicalizeStandardURL(const char* spec,
- int spec_len,
- const Parsed& parsed,
- SchemeType scheme_type,
- CharsetConverter* query_converter,
- CanonOutput* output,
- Parsed* new_parsed);
-URL_EXPORT bool CanonicalizeStandardURL(const base::char16* spec,
- int spec_len,
- const Parsed& parsed,
- SchemeType scheme_type,
- CharsetConverter* query_converter,
- CanonOutput* output,
- Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeStandardURL(const char* spec,
+ int spec_len,
+ const Parsed& parsed,
+ SchemeType scheme_type,
+ CharsetConverter* query_converter,
+ CanonOutput* output,
+ Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeStandardURL(const char16_t* spec,
+ int spec_len,
+ const Parsed& parsed,
+ SchemeType scheme_type,
+ CharsetConverter* query_converter,
+ CanonOutput* output,
+ Parsed* new_parsed);
// Use for file URLs.
-URL_EXPORT bool CanonicalizeFileURL(const char* spec,
- int spec_len,
- const Parsed& parsed,
- CharsetConverter* query_converter,
- CanonOutput* output,
- Parsed* new_parsed);
-URL_EXPORT bool CanonicalizeFileURL(const base::char16* spec,
- int spec_len,
- const Parsed& parsed,
- CharsetConverter* query_converter,
- CanonOutput* output,
- Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeFileURL(const char* spec,
+ int spec_len,
+ const Parsed& parsed,
+ CharsetConverter* query_converter,
+ CanonOutput* output,
+ Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeFileURL(const char16_t* spec,
+ int spec_len,
+ const Parsed& parsed,
+ CharsetConverter* query_converter,
+ CanonOutput* output,
+ Parsed* new_parsed);
// Use for filesystem URLs.
-URL_EXPORT bool CanonicalizeFileSystemURL(const char* spec,
- int spec_len,
- const Parsed& parsed,
- CharsetConverter* query_converter,
- CanonOutput* output,
- Parsed* new_parsed);
-URL_EXPORT bool CanonicalizeFileSystemURL(const base::char16* spec,
- int spec_len,
- const Parsed& parsed,
- CharsetConverter* query_converter,
- CanonOutput* output,
- Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeFileSystemURL(const char* spec,
+ int spec_len,
+ const Parsed& parsed,
+ CharsetConverter* query_converter,
+ CanonOutput* output,
+ Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeFileSystemURL(const char16_t* spec,
+ int spec_len,
+ const Parsed& parsed,
+ CharsetConverter* query_converter,
+ CanonOutput* output,
+ Parsed* new_parsed);
// Use for path URLs such as javascript. This does not modify the path in any
// way, for example, by escaping it.
-URL_EXPORT bool CanonicalizePathURL(const char* spec,
- int spec_len,
- const Parsed& parsed,
- CanonOutput* output,
- Parsed* new_parsed);
-URL_EXPORT bool CanonicalizePathURL(const base::char16* spec,
- int spec_len,
- const Parsed& parsed,
- CanonOutput* output,
- Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool CanonicalizePathURL(const char* spec,
+ int spec_len,
+ const Parsed& parsed,
+ CanonOutput* output,
+ Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool CanonicalizePathURL(const char16_t* spec,
+ int spec_len,
+ const Parsed& parsed,
+ CanonOutput* output,
+ Parsed* new_parsed);
+
+// Use to canonicalize just the path component of a "path" URL; e.g. the
+// path of a javascript URL.
+COMPONENT_EXPORT(URL)
+void CanonicalizePathURLPath(const char* source,
+ const Component& component,
+ CanonOutput* output,
+ Component* new_component);
+COMPONENT_EXPORT(URL)
+void CanonicalizePathURLPath(const char16_t* source,
+ const Component& component,
+ CanonOutput* output,
+ Component* new_component);
// Use for mailto URLs. This "canonicalizes" the URL into a path and query
// component. It does not attempt to merge "to" fields. It uses UTF-8 for
// the query encoding if there is a query. This is because a mailto URL is
// really intended for an external mail program, and the encoding of a page,
// etc. which would influence a query encoding normally are irrelevant.
-URL_EXPORT bool CanonicalizeMailtoURL(const char* spec,
- int spec_len,
- const Parsed& parsed,
- CanonOutput* output,
- Parsed* new_parsed);
-URL_EXPORT bool CanonicalizeMailtoURL(const base::char16* spec,
- int spec_len,
- const Parsed& parsed,
- CanonOutput* output,
- Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeMailtoURL(const char* spec,
+ int spec_len,
+ const Parsed& parsed,
+ CanonOutput* output,
+ Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeMailtoURL(const char16_t* spec,
+ int spec_len,
+ const Parsed& parsed,
+ CanonOutput* output,
+ Parsed* new_parsed);
// Part replacer --------------------------------------------------------------
@@ -660,21 +703,20 @@
// This structures does not own any data. It is the caller's responsibility to
// ensure that the data the pointers point to stays in scope and is not
// modified.
-template<typename CHAR>
+template <typename CHAR>
struct URLComponentSource {
// Constructor normally used by callers wishing to replace components. This
// will make them all NULL, which is no replacement. The caller would then
// override the components they want to replace.
URLComponentSource()
- : scheme(NULL),
- username(NULL),
- password(NULL),
- host(NULL),
- port(NULL),
- path(NULL),
- query(NULL),
- ref(NULL) {
- }
+ : scheme(nullptr),
+ username(nullptr),
+ password(nullptr),
+ host(nullptr),
+ port(nullptr),
+ path(nullptr),
+ query(nullptr),
+ ref(nullptr) {}
// Constructor normally used internally to initialize all the components to
// point to the same spec.
@@ -686,17 +728,32 @@
port(default_value),
path(default_value),
query(default_value),
- ref(default_value) {
- }
+ ref(default_value) {}
- const CHAR* scheme;
- const CHAR* username;
- const CHAR* password;
- const CHAR* host;
- const CHAR* port;
- const CHAR* path;
- const CHAR* query;
- const CHAR* ref;
+ // This field is not a raw_ptr<> because it was filtered by the rewriter for:
+ // #addr-of
+ RAW_PTR_EXCLUSION const CHAR* scheme;
+ // This field is not a raw_ptr<> because it was filtered by the rewriter for:
+ // #addr-of
+ RAW_PTR_EXCLUSION const CHAR* username;
+ // This field is not a raw_ptr<> because it was filtered by the rewriter for:
+ // #addr-of
+ RAW_PTR_EXCLUSION const CHAR* password;
+ // This field is not a raw_ptr<> because it was filtered by the rewriter for:
+ // #addr-of
+ RAW_PTR_EXCLUSION const CHAR* host;
+ // This field is not a raw_ptr<> because it was filtered by the rewriter for:
+ // #addr-of
+ RAW_PTR_EXCLUSION const CHAR* port;
+ // This field is not a raw_ptr<> because it was filtered by the rewriter for:
+ // #addr-of
+ RAW_PTR_EXCLUSION const CHAR* path;
+ // This field is not a raw_ptr<> because it was filtered by the rewriter for:
+ // #addr-of
+ RAW_PTR_EXCLUSION const CHAR* query;
+ // This field is not a raw_ptr<> because it was filtered by the rewriter for:
+ // #addr-of
+ RAW_PTR_EXCLUSION const CHAR* ref;
};
// This structure encapsulates information on modifying a URL. Each component
@@ -709,11 +766,10 @@
// IN SCOPE BY THE CALLER for as long as this object exists!
//
// Prefer the 8-bit replacement version if possible since it is more efficient.
-template<typename CHAR>
+template <typename CHAR>
class Replacements {
public:
- Replacements() {
- }
+ Replacements() {}
// Scheme
void SetScheme(const CHAR* s, const Component& comp) {
@@ -828,78 +884,86 @@
};
// The base must be an 8-bit canonical URL.
-URL_EXPORT bool ReplaceStandardURL(const char* base,
- const Parsed& base_parsed,
- const Replacements<char>& replacements,
- SchemeType scheme_type,
- CharsetConverter* query_converter,
- CanonOutput* output,
- Parsed* new_parsed);
-URL_EXPORT bool ReplaceStandardURL(
- const char* base,
- const Parsed& base_parsed,
- const Replacements<base::char16>& replacements,
- SchemeType scheme_type,
- CharsetConverter* query_converter,
- CanonOutput* output,
- Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool ReplaceStandardURL(const char* base,
+ const Parsed& base_parsed,
+ const Replacements<char>& replacements,
+ SchemeType scheme_type,
+ CharsetConverter* query_converter,
+ CanonOutput* output,
+ Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool ReplaceStandardURL(const char* base,
+ const Parsed& base_parsed,
+ const Replacements<char16_t>& replacements,
+ SchemeType scheme_type,
+ CharsetConverter* query_converter,
+ CanonOutput* output,
+ Parsed* new_parsed);
// Filesystem URLs can only have the path, query, or ref replaced.
// All other components will be ignored.
-URL_EXPORT bool ReplaceFileSystemURL(const char* base,
- const Parsed& base_parsed,
- const Replacements<char>& replacements,
- CharsetConverter* query_converter,
- CanonOutput* output,
- Parsed* new_parsed);
-URL_EXPORT bool ReplaceFileSystemURL(
- const char* base,
- const Parsed& base_parsed,
- const Replacements<base::char16>& replacements,
- CharsetConverter* query_converter,
- CanonOutput* output,
- Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool ReplaceFileSystemURL(const char* base,
+ const Parsed& base_parsed,
+ const Replacements<char>& replacements,
+ CharsetConverter* query_converter,
+ CanonOutput* output,
+ Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool ReplaceFileSystemURL(const char* base,
+ const Parsed& base_parsed,
+ const Replacements<char16_t>& replacements,
+ CharsetConverter* query_converter,
+ CanonOutput* output,
+ Parsed* new_parsed);
// Replacing some parts of a file URL is not permitted. Everything except
// the host, path, query, and ref will be ignored.
-URL_EXPORT bool ReplaceFileURL(const char* base,
- const Parsed& base_parsed,
- const Replacements<char>& replacements,
- CharsetConverter* query_converter,
- CanonOutput* output,
- Parsed* new_parsed);
-URL_EXPORT bool ReplaceFileURL(const char* base,
- const Parsed& base_parsed,
- const Replacements<base::char16>& replacements,
- CharsetConverter* query_converter,
- CanonOutput* output,
- Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool ReplaceFileURL(const char* base,
+ const Parsed& base_parsed,
+ const Replacements<char>& replacements,
+ CharsetConverter* query_converter,
+ CanonOutput* output,
+ Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool ReplaceFileURL(const char* base,
+ const Parsed& base_parsed,
+ const Replacements<char16_t>& replacements,
+ CharsetConverter* query_converter,
+ CanonOutput* output,
+ Parsed* new_parsed);
// Path URLs can only have the scheme and path replaced. All other components
// will be ignored.
-URL_EXPORT bool ReplacePathURL(const char* base,
- const Parsed& base_parsed,
- const Replacements<char>& replacements,
- CanonOutput* output,
- Parsed* new_parsed);
-URL_EXPORT bool ReplacePathURL(const char* base,
- const Parsed& base_parsed,
- const Replacements<base::char16>& replacements,
- CanonOutput* output,
- Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool ReplacePathURL(const char* base,
+ const Parsed& base_parsed,
+ const Replacements<char>& replacements,
+ CanonOutput* output,
+ Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool ReplacePathURL(const char* base,
+ const Parsed& base_parsed,
+ const Replacements<char16_t>& replacements,
+ CanonOutput* output,
+ Parsed* new_parsed);
// Mailto URLs can only have the scheme, path, and query replaced.
// All other components will be ignored.
-URL_EXPORT bool ReplaceMailtoURL(const char* base,
- const Parsed& base_parsed,
- const Replacements<char>& replacements,
- CanonOutput* output,
- Parsed* new_parsed);
-URL_EXPORT bool ReplaceMailtoURL(const char* base,
- const Parsed& base_parsed,
- const Replacements<base::char16>& replacements,
- CanonOutput* output,
- Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool ReplaceMailtoURL(const char* base,
+ const Parsed& base_parsed,
+ const Replacements<char>& replacements,
+ CanonOutput* output,
+ Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool ReplaceMailtoURL(const char* base,
+ const Parsed& base_parsed,
+ const Replacements<char16_t>& replacements,
+ CanonOutput* output,
+ Parsed* new_parsed);
// Relative URL ---------------------------------------------------------------
@@ -914,20 +978,22 @@
// not). Failure means that the combination of URLs doesn't make any sense.
//
// The base URL should always be canonical, therefore is ASCII.
-URL_EXPORT bool IsRelativeURL(const char* base,
- const Parsed& base_parsed,
- const char* fragment,
- int fragment_len,
- bool is_base_hierarchical,
- bool* is_relative,
- Component* relative_component);
-URL_EXPORT bool IsRelativeURL(const char* base,
- const Parsed& base_parsed,
- const base::char16* fragment,
- int fragment_len,
- bool is_base_hierarchical,
- bool* is_relative,
- Component* relative_component);
+COMPONENT_EXPORT(URL)
+bool IsRelativeURL(const char* base,
+ const Parsed& base_parsed,
+ const char* fragment,
+ int fragment_len,
+ bool is_base_hierarchical,
+ bool* is_relative,
+ Component* relative_component);
+COMPONENT_EXPORT(URL)
+bool IsRelativeURL(const char* base,
+ const Parsed& base_parsed,
+ const char16_t* fragment,
+ int fragment_len,
+ bool is_base_hierarchical,
+ bool* is_relative,
+ Component* relative_component);
// Given a canonical parsed source URL, a URL fragment known to be relative,
// and the identified relevant portion of the relative URL (computed by
@@ -947,22 +1013,24 @@
// Returns true on success. On failure, the output will be "something
// reasonable" that will be consistent and valid, just probably not what
// was intended by the web page author or caller.
-URL_EXPORT bool ResolveRelativeURL(const char* base_url,
- const Parsed& base_parsed,
- bool base_is_file,
- const char* relative_url,
- const Component& relative_component,
- CharsetConverter* query_converter,
- CanonOutput* output,
- Parsed* out_parsed);
-URL_EXPORT bool ResolveRelativeURL(const char* base_url,
- const Parsed& base_parsed,
- bool base_is_file,
- const base::char16* relative_url,
- const Component& relative_component,
- CharsetConverter* query_converter,
- CanonOutput* output,
- Parsed* out_parsed);
+COMPONENT_EXPORT(URL)
+bool ResolveRelativeURL(const char* base_url,
+ const Parsed& base_parsed,
+ bool base_is_file,
+ const char* relative_url,
+ const Component& relative_component,
+ CharsetConverter* query_converter,
+ CanonOutput* output,
+ Parsed* out_parsed);
+COMPONENT_EXPORT(URL)
+bool ResolveRelativeURL(const char* base_url,
+ const Parsed& base_parsed,
+ bool base_is_file,
+ const char16_t* relative_url,
+ const Component& relative_component,
+ CharsetConverter* query_converter,
+ CanonOutput* output,
+ Parsed* out_parsed);
} // namespace url
diff --git a/url/url_canon_etc.cc b/url/url_canon_etc.cc
index dcb5835..bae1ce8 100644
--- a/url/url_canon_etc.cc
+++ b/url/url_canon_etc.cc
@@ -1,4 +1,4 @@
-// Copyright 2013 The Chromium Authors. All rights reserved.
+// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@@ -6,7 +6,6 @@
#include <string.h>
-#include "starboard/types.h"
#include "url/url_canon.h"
#include "url/url_canon_internal.h"
@@ -32,12 +31,22 @@
// Fast verification that there's nothing that needs removal. This is the 99%
// case, so we want it to be fast and don't care about impacting the speed
// when we do find whitespace.
- int found_whitespace = false;
- for (int i = 0; i < input_len; i++) {
- if (!IsRemovableURLWhitespace(input[i]))
- continue;
- found_whitespace = true;
- break;
+ bool found_whitespace = false;
+ if (sizeof(*input) == 1 && input_len >= kMinimumLengthForSIMD) {
+ // For large strings, memchr is much faster than any scalar code we can
+ // write, even if we need to run it three times. (If this turns out to still
+ // be a bottleneck, we could write our own vector code, but given that
+ // memchr is so fast, it's unlikely to be relevant.)
+ found_whitespace = memchr(input, '\n', input_len) != nullptr ||
+ memchr(input, '\r', input_len) != nullptr ||
+ memchr(input, '\t', input_len) != nullptr;
+ } else {
+ for (int i = 0; i < input_len; i++) {
+ if (!IsRemovableURLWhitespace(input[i]))
+ continue;
+ found_whitespace = true;
+ break;
+ }
}
if (!found_whitespace) {
@@ -73,6 +82,7 @@
// Contains the canonical version of each possible input letter in the scheme
// (basically, lower-cased). The corresponding entry will be 0 if the letter
// is not allowed in a scheme.
+// clang-format off
const char kSchemeCanonical[0x80] = {
// 00-1f: all are invalid
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -89,6 +99,7 @@
0 , 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
// p q r s t u v w x y z { | } ~
'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0 , 0 , 0 , 0 , 0 };
+// clang-format on
// This could be a table lookup as well by setting the high bit for each
// valid character, but it's only called once per URL, and it makes the lookup
@@ -97,12 +108,12 @@
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
}
-template<typename CHAR, typename UCHAR>
+template <typename CHAR, typename UCHAR>
bool DoScheme(const CHAR* spec,
const Component& scheme,
CanonOutput* output,
Component* out_scheme) {
- if (scheme.len <= 0) {
+ if (scheme.is_empty()) {
// Scheme is unspecified or empty, convert to empty by appending a colon.
*out_scheme = Component(output->length(), 0);
output->push_back(':');
@@ -118,12 +129,13 @@
// FindAndCompareScheme, which could cause some security checks on
// schemes to be incorrect.
bool success = true;
- int end = scheme.end();
- for (int i = scheme.begin; i < end; i++) {
+ size_t begin = static_cast<size_t>(scheme.begin);
+ size_t end = static_cast<size_t>(scheme.end());
+ for (size_t i = begin; i < end; i++) {
UCHAR ch = static_cast<UCHAR>(spec[i]);
char replacement = 0;
if (ch < 0x80) {
- if (i == scheme.begin) {
+ if (i == begin) {
// Need to do a special check for the first letter of the scheme.
if (IsSchemeFirstChar(static_cast<unsigned char>(ch)))
replacement = kSchemeCanonical[ch];
@@ -161,7 +173,7 @@
// *_spec strings. Typically, these specs will be the same (we're
// canonicalizing a single source string), but may be different when
// replacing components.
-template<typename CHAR, typename UCHAR>
+template <typename CHAR, typename UCHAR>
bool DoUserInfo(const CHAR* username_spec,
const Component& username,
const CHAR* password_spec,
@@ -169,7 +181,7 @@
CanonOutput* output,
Component* out_username,
Component* out_password) {
- if (username.len <= 0 && password.len <= 0) {
+ if (username.is_empty() && password.is_empty()) {
// Common case: no user info. We strip empty username/passwords.
*out_username = Component();
*out_password = Component();
@@ -178,20 +190,22 @@
// Write the username.
out_username->begin = output->length();
- if (username.len > 0) {
+ if (username.is_nonempty()) {
// This will escape characters not valid for the username.
- AppendStringOfType(&username_spec[username.begin], username.len,
- CHAR_USERINFO, output);
+ AppendStringOfType(&username_spec[username.begin],
+ static_cast<size_t>(username.len), CHAR_USERINFO,
+ output);
}
out_username->len = output->length() - out_username->begin;
// When there is a password, we need the separator. Note that we strip
// empty but specified passwords.
- if (password.len > 0) {
+ if (password.is_nonempty()) {
output->push_back(':');
out_password->begin = output->length();
- AppendStringOfType(&password_spec[password.begin], password.len,
- CHAR_USERINFO, output);
+ AppendStringOfType(&password_spec[password.begin],
+ static_cast<size_t>(password.len), CHAR_USERINFO,
+ output);
out_password->len = output->length() - out_password->begin;
} else {
*out_password = Component();
@@ -211,7 +225,7 @@
}
// This function will prepend the colon if there will be a port.
-template<typename CHAR, typename UCHAR>
+template <typename CHAR, typename UCHAR>
bool DoPort(const CHAR* spec,
const Component& port,
int default_port_for_scheme,
@@ -228,7 +242,8 @@
// what the error was, and mark the URL as invalid by returning false.
output->push_back(':');
out_port->begin = output->length();
- AppendInvalidNarrowString(spec, port.begin, port.end(), output);
+ AppendInvalidNarrowString(spec, static_cast<size_t>(port.begin),
+ static_cast<size_t>(port.end()), output);
out_port->len = output->length() - out_port->begin;
return false;
}
@@ -250,10 +265,9 @@
}
// clang-format off
-// Percent-escape all "C0 controls" (0x00-0x1F)
-// https://infra.spec.whatwg.org/#c0-control along with the characters ' '
-// (0x20), '"' (0x22), '<' (0x3C), '>' (0x3E), and '`' (0x60):
-const bool kShouldEscapeCharInRef[0x80] = {
+// Percent-escape all characters from the fragment percent-encode set
+// https://url.spec.whatwg.org/#fragment-percent-encode-set
+const bool kShouldEscapeCharInFragment[0x80] = {
// Control characters (0x00-0x1F)
true, true, true, true, true, true, true, true,
true, true, true, true, true, true, true, true,
@@ -281,17 +295,17 @@
false, false, false, false, false, false, false, false,
// p q r s t u v w
false, false, false, false, false, false, false, false,
-// x y z { | } ~
- false, false, false, false, false, false, false
+// x y z { | } ~ DELETE
+ false, false, false, false, false, false, false, true
};
// clang-format on
-template<typename CHAR, typename UCHAR>
+template <typename CHAR, typename UCHAR>
void DoCanonicalizeRef(const CHAR* spec,
const Component& ref,
CanonOutput* output,
Component* out_ref) {
- if (ref.len < 0) {
+ if (!ref.is_valid()) {
// Common case of no ref.
*out_ref = Component();
return;
@@ -303,16 +317,11 @@
out_ref->begin = output->length();
// Now iterate through all the characters, converting to UTF-8 and validating.
- int end = ref.end();
- for (int i = ref.begin; i < end; i++) {
- if (spec[i] == 0) {
- // IE just strips NULLs, so we do too.
- continue;
- }
-
+ size_t end = static_cast<size_t>(ref.end());
+ for (size_t i = static_cast<size_t>(ref.begin); i < end; i++) {
UCHAR current_char = static_cast<UCHAR>(spec[i]);
if (current_char < 0x80) {
- if (kShouldEscapeCharInRef[current_char])
+ if (kShouldEscapeCharInFragment[current_char])
AppendEscapedChar(static_cast<unsigned char>(spec[i]), output);
else
output->push_back(static_cast<char>(spec[i]));
@@ -335,16 +344,16 @@
potentially_dangling_markup);
}
-const base::char16* RemoveURLWhitespace(const base::char16* input,
- int input_len,
- CanonOutputT<base::char16>* buffer,
- int* output_len,
- bool* potentially_dangling_markup) {
+const char16_t* RemoveURLWhitespace(const char16_t* input,
+ int input_len,
+ CanonOutputT<char16_t>* buffer,
+ int* output_len,
+ bool* potentially_dangling_markup) {
return DoRemoveURLWhitespace(input, input_len, buffer, output_len,
potentially_dangling_markup);
}
-char CanonicalSchemeChar(base::char16 ch) {
+char CanonicalSchemeChar(char16_t ch) {
if (ch >= 0x80)
return 0; // Non-ASCII is not supported by schemes.
return kSchemeCanonical[ch];
@@ -357,11 +366,11 @@
return DoScheme<char, unsigned char>(spec, scheme, output, out_scheme);
}
-bool CanonicalizeScheme(const base::char16* spec,
+bool CanonicalizeScheme(const char16_t* spec,
const Component& scheme,
CanonOutput* output,
Component* out_scheme) {
- return DoScheme<base::char16, base::char16>(spec, scheme, output, out_scheme);
+ return DoScheme<char16_t, char16_t>(spec, scheme, output, out_scheme);
}
bool CanonicalizeUserInfo(const char* username_source,
@@ -371,21 +380,21 @@
CanonOutput* output,
Component* out_username,
Component* out_password) {
- return DoUserInfo<char, unsigned char>(
- username_source, username, password_source, password,
- output, out_username, out_password);
+ return DoUserInfo<char, unsigned char>(username_source, username,
+ password_source, password, output,
+ out_username, out_password);
}
-bool CanonicalizeUserInfo(const base::char16* username_source,
+bool CanonicalizeUserInfo(const char16_t* username_source,
const Component& username,
- const base::char16* password_source,
+ const char16_t* password_source,
const Component& password,
CanonOutput* output,
Component* out_username,
Component* out_password) {
- return DoUserInfo<base::char16, base::char16>(
- username_source, username, password_source, password,
- output, out_username, out_password);
+ return DoUserInfo<char16_t, char16_t>(username_source, username,
+ password_source, password, output,
+ out_username, out_password);
}
bool CanonicalizePort(const char* spec,
@@ -393,18 +402,17 @@
int default_port_for_scheme,
CanonOutput* output,
Component* out_port) {
- return DoPort<char, unsigned char>(spec, port,
- default_port_for_scheme,
+ return DoPort<char, unsigned char>(spec, port, default_port_for_scheme,
output, out_port);
}
-bool CanonicalizePort(const base::char16* spec,
+bool CanonicalizePort(const char16_t* spec,
const Component& port,
int default_port_for_scheme,
CanonOutput* output,
Component* out_port) {
- return DoPort<base::char16, base::char16>(spec, port, default_port_for_scheme,
- output, out_port);
+ return DoPort<char16_t, char16_t>(spec, port, default_port_for_scheme, output,
+ out_port);
}
void CanonicalizeRef(const char* spec,
@@ -414,11 +422,11 @@
DoCanonicalizeRef<char, unsigned char>(spec, ref, output, out_ref);
}
-void CanonicalizeRef(const base::char16* spec,
+void CanonicalizeRef(const char16_t* spec,
const Component& ref,
CanonOutput* output,
Component* out_ref) {
- DoCanonicalizeRef<base::char16, base::char16>(spec, ref, output, out_ref);
+ DoCanonicalizeRef<char16_t, char16_t>(spec, ref, output, out_ref);
}
} // namespace url
diff --git a/url/url_canon_filesystemurl.cc b/url/url_canon_filesystemurl.cc
index e9f5b22..0472484 100644
--- a/url/url_canon_filesystemurl.cc
+++ b/url/url_canon_filesystemurl.cc
@@ -1,4 +1,4 @@
-// Copyright 2013 The Chromium Authors. All rights reserved.
+// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@@ -39,7 +39,7 @@
output->Append("filesystem:", 11);
new_parsed->scheme.len = 10;
- if (!parsed.inner_parsed() || !parsed.inner_parsed()->scheme.is_valid())
+ if (!inner_parsed || !inner_parsed->scheme.is_valid())
return false;
bool success = true;
@@ -57,8 +57,8 @@
inner_scheme_type = SCHEME_WITH_HOST_AND_PORT;
}
success = CanonicalizeStandardURL(
- spec, parsed.inner_parsed()->Length(), *parsed.inner_parsed(),
- inner_scheme_type, charset_converter, output, &new_inner_parsed);
+ spec, inner_parsed->Length(), *inner_parsed, inner_scheme_type,
+ charset_converter, output, &new_inner_parsed);
} else {
// TODO(ericu): The URL is wrong, but should we try to output more of what
// we were given? Echoing back filesystem:mailto etc. doesn't seem all that
@@ -66,7 +66,7 @@
return false;
}
// The filesystem type must be more than just a leading slash for validity.
- success &= parsed.inner_parsed()->path.len > 1;
+ success &= new_inner_parsed.path.len > 1;
success &= CanonicalizePath(source.path, parsed.path, output,
&new_parsed->path);
@@ -94,14 +94,14 @@
new_parsed);
}
-bool CanonicalizeFileSystemURL(const base::char16* spec,
+bool CanonicalizeFileSystemURL(const char16_t* spec,
int spec_len,
const Parsed& parsed,
CharsetConverter* charset_converter,
CanonOutput* output,
Parsed* new_parsed) {
- return DoCanonicalizeFileSystemURL<base::char16, base::char16>(
- spec, URLComponentSource<base::char16>(spec), parsed, charset_converter,
+ return DoCanonicalizeFileSystemURL<char16_t, char16_t>(
+ spec, URLComponentSource<char16_t>(spec), parsed, charset_converter,
output, new_parsed);
}
@@ -120,7 +120,7 @@
bool ReplaceFileSystemURL(const char* base,
const Parsed& base_parsed,
- const Replacements<base::char16>& replacements,
+ const Replacements<char16_t>& replacements,
CharsetConverter* charset_converter,
CanonOutput* output,
Parsed* new_parsed) {
diff --git a/url/url_canon_fileurl.cc b/url/url_canon_fileurl.cc
index 6277289..b45114d 100644
--- a/url/url_canon_fileurl.cc
+++ b/url/url_canon_fileurl.cc
@@ -1,9 +1,10 @@
-// Copyright 2013 The Chromium Authors. All rights reserved.
+// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Functions for canonicalizing "file:" URLs.
+#include "base/strings/string_piece.h"
#include "base/strings/string_util.h"
#include "url/url_canon.h"
#include "url/url_canon_internal.h"
@@ -14,6 +15,44 @@
namespace {
+bool IsLocalhost(const char* spec, int begin, int end) {
+ if (begin > end)
+ return false;
+ return base::StringPiece(&spec[begin], end - begin) == "localhost";
+}
+
+bool IsLocalhost(const char16_t* spec, int begin, int end) {
+ if (begin > end)
+ return false;
+ return base::StringPiece16(&spec[begin], end - begin) == u"localhost";
+}
+
+template <typename CHAR>
+int DoFindWindowsDriveLetter(const CHAR* spec, int begin, int end) {
+ if (begin > end)
+ return -1;
+
+ // First guess the beginning of the drive letter.
+ // If there is something that looks like a drive letter in the spec between
+ // begin and end, store its position in drive_letter_pos.
+ int drive_letter_pos =
+ DoesContainWindowsDriveSpecUntil(spec, begin, end, end);
+ if (drive_letter_pos < begin)
+ return -1;
+
+ // Check if the path up to the drive letter candidate can be canonicalized as
+ // "/".
+ Component sub_path = MakeRange(begin, drive_letter_pos);
+ RawCanonOutput<1024> output;
+ Component output_path;
+ bool success = CanonicalizePath(spec, sub_path, &output, &output_path);
+ if (!success || output_path.len != 1 || output.at(output_path.begin) != '/') {
+ return -1;
+ }
+
+ return drive_letter_pos;
+}
+
#ifdef WIN32
// Given a pointer into the spec, this copies and canonicalizes the drive
@@ -21,33 +60,26 @@
// spec, it won't do anything. The index of the next character in the input
// spec is returned (after the colon when a drive spec is found, the begin
// offset if one is not).
-template<typename CHAR>
-int FileDoDriveSpec(const CHAR* spec, int begin, int end,
- CanonOutput* output) {
- // The path could be one of several things: /foo/bar, c:/foo/bar, /c:/foo,
- // (with backslashes instead of slashes as well).
- int num_slashes = CountConsecutiveSlashes(spec, begin, end);
- int after_slashes = begin + num_slashes;
+template <typename CHAR>
+int FileDoDriveSpec(const CHAR* spec, int begin, int end, CanonOutput* output) {
+ int drive_letter_pos = FindWindowsDriveLetter(spec, begin, end);
+ if (drive_letter_pos < begin)
+ return begin;
- if (!DoesBeginWindowsDriveSpec(spec, after_slashes, end))
- return begin; // Haven't consumed any characters
+ // By now, a valid drive letter is confirmed at position drive_letter_pos,
+ // followed by a valid drive letter separator (a colon or a pipe).
- // A drive spec is the start of a path, so we need to add a slash for the
- // authority terminator (typically the third slash).
output->push_back('/');
- // DoesBeginWindowsDriveSpec will ensure that the drive letter is valid
- // and that it is followed by a colon/pipe.
-
- // Normalize Windows drive letters to uppercase
- if (base::IsAsciiLower(spec[after_slashes]))
- output->push_back(static_cast<char>(spec[after_slashes] - 'a' + 'A'));
+ // Normalize Windows drive letters to uppercase.
+ if (base::IsAsciiLower(spec[drive_letter_pos]))
+ output->push_back(static_cast<char>(spec[drive_letter_pos] - 'a' + 'A'));
else
- output->push_back(static_cast<char>(spec[after_slashes]));
+ output->push_back(static_cast<char>(spec[drive_letter_pos]));
// Normalize the character following it to a colon rather than pipe.
output->push_back(':');
- return after_slashes + 2;
+ return drive_letter_pos + 2;
}
#endif // WIN32
@@ -70,14 +102,17 @@
// drive colon (if any, Windows only), or the first slash of the path.
bool success = true;
if (after_drive < path.end()) {
- // Use the regular path canonicalizer to canonicalize the rest of the
- // path. Give it a fake output component to write into. DoCanonicalizeFile
- // will compute the full path component.
+ // Use the regular path canonicalizer to canonicalize the rest of the path
+ // after the drive.
+ //
+ // Give it a fake output component to write into, since we will be
+ // calculating the out_path ourselves (consisting of both the drive and the
+ // path we canonicalize here).
Component sub_path = MakeRange(after_drive, path.end());
Component fake_output_path;
success = CanonicalizePath(spec, sub_path, output, &fake_output_path);
- } else {
- // No input path, canonicalize to a slash.
+ } else if (after_drive == path.begin) {
+ // No input path and no drive spec, canonicalize to a slash.
output->push_back('/');
}
@@ -102,19 +137,33 @@
output->Append("file://", 7);
new_parsed->scheme.len = 4;
+ // If the host is localhost, and the path starts with a Windows drive letter,
+ // remove the host component. This does the following transformation:
+ // file://localhost/C:/hello.txt -> file:///C:/hello.txt
+ //
+ // Note: we do this on every platform per URL Standard, not just Windows.
+ //
+ // TODO(https://crbug.com/688961): According to the latest URL spec, this
+ // transformation should be done regardless of the path.
+ Component host_range = parsed.host;
+ if (IsLocalhost(source.host, host_range.begin, host_range.end()) &&
+ FindWindowsDriveLetter(source.path, parsed.path.begin,
+ parsed.path.end()) >= parsed.path.begin) {
+ host_range.reset();
+ }
+
// Append the host. For many file URLs, this will be empty. For UNC, this
// will be present.
// TODO(brettw) This doesn't do any checking for host name validity. We
// should probably handle validity checking of UNC hosts differently than
// for regular IP hosts.
- bool success = CanonicalizeHost(source.host, parsed.host,
- output, &new_parsed->host);
+ bool success =
+ CanonicalizeHost(source.host, host_range, output, &new_parsed->host);
success &= DoFileCanonicalizePath<CHAR, UCHAR>(source.path, parsed.path,
output, &new_parsed->path);
+
CanonicalizeQuery(source.query, parsed.query, query_converter,
output, &new_parsed->query);
-
- // Ignore failure for refs since the URL can probably still be loaded.
CanonicalizeRef(source.ref, parsed.ref, output, &new_parsed->ref);
return success;
@@ -122,6 +171,14 @@
} // namespace
+int FindWindowsDriveLetter(const char* spec, int begin, int end) {
+ return DoFindWindowsDriveLetter(spec, begin, end);
+}
+
+int FindWindowsDriveLetter(const char16_t* spec, int begin, int end) {
+ return DoFindWindowsDriveLetter(spec, begin, end);
+}
+
bool CanonicalizeFileURL(const char* spec,
int spec_len,
const Parsed& parsed,
@@ -133,15 +190,15 @@
output, new_parsed);
}
-bool CanonicalizeFileURL(const base::char16* spec,
+bool CanonicalizeFileURL(const char16_t* spec,
int spec_len,
const Parsed& parsed,
CharsetConverter* query_converter,
CanonOutput* output,
Parsed* new_parsed) {
- return DoCanonicalizeFileURL<base::char16, base::char16>(
- URLComponentSource<base::char16>(spec), parsed, query_converter,
- output, new_parsed);
+ return DoCanonicalizeFileURL<char16_t, char16_t>(
+ URLComponentSource<char16_t>(spec), parsed, query_converter, output,
+ new_parsed);
}
bool FileCanonicalizePath(const char* spec,
@@ -152,12 +209,12 @@
output, out_path);
}
-bool FileCanonicalizePath(const base::char16* spec,
+bool FileCanonicalizePath(const char16_t* spec,
const Component& path,
CanonOutput* output,
Component* out_path) {
- return DoFileCanonicalizePath<base::char16, base::char16>(spec, path,
- output, out_path);
+ return DoFileCanonicalizePath<char16_t, char16_t>(spec, path, output,
+ out_path);
}
bool ReplaceFileURL(const char* base,
@@ -175,7 +232,7 @@
bool ReplaceFileURL(const char* base,
const Parsed& base_parsed,
- const Replacements<base::char16>& replacements,
+ const Replacements<char16_t>& replacements,
CharsetConverter* query_converter,
CanonOutput* output,
Parsed* new_parsed) {
diff --git a/url/url_canon_host.cc b/url/url_canon_host.cc
index 9f31547..d3b1222 100644
--- a/url/url_canon_host.cc
+++ b/url/url_canon_host.cc
@@ -1,8 +1,9 @@
-// Copyright 2013 The Chromium Authors. All rights reserved.
+// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#include "base/logging.h"
+#include "base/check.h"
+#include "base/cpu_reduction_experiment.h"
#include "url/url_canon.h"
#include "url/url_canon_internal.h"
@@ -68,7 +69,7 @@
'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',kEsc,kEsc,kEsc, 0 , 0 };
// RFC1034 maximum FQDN length.
-constexpr int kMaxHostLength = 253;
+constexpr size_t kMaxHostLength = 253;
// Generous padding to account for the fact that UTS#46 normalization can cause
// a long string to actually shrink and fit within the 253 character RFC1034
@@ -76,11 +77,11 @@
// cases: An arbitrary number of characters (e.g. U+00AD SOFT HYPHEN) can be
// removed from the input by UTS#46 processing. However, this should be
// sufficient for all normally-encountered, non-abusive hostname strings.
-constexpr int kMaxHostBufferLength = kMaxHostLength*5;
+constexpr size_t kMaxHostBufferLength = kMaxHostLength * 5;
-const int kTempHostBufferLen = 1024;
-typedef RawCanonOutputT<char, kTempHostBufferLen> StackBuffer;
-typedef RawCanonOutputT<base::char16, kTempHostBufferLen> StackBufferW;
+constexpr size_t kTempHostBufferLen = 1024;
+using StackBuffer = RawCanonOutputT<char, kTempHostBufferLen>;
+using StackBufferW = RawCanonOutputT<char16_t, kTempHostBufferLen>;
// Scans a host name and fills in the output flags according to what we find.
// |has_non_ascii| will be true if there are any non-7-bit characters, and
@@ -122,15 +123,15 @@
// |*has_non_ascii| flag.
//
// The return value indicates if the output is a potentially valid host name.
-template<typename INCHAR, typename OUTCHAR>
+template <typename INCHAR, typename OUTCHAR>
bool DoSimpleHost(const INCHAR* host,
- int host_len,
+ size_t host_len,
CanonOutputT<OUTCHAR>* output,
bool* has_non_ascii) {
*has_non_ascii = false;
bool success = true;
- for (int i = 0; i < host_len; ++i) {
+ for (size_t i = 0; i < host_len; ++i) {
unsigned int source = host[i];
if (source == '%') {
// Unescape first, if possible.
@@ -174,7 +175,7 @@
}
// Canonicalizes a host that requires IDN conversion. Returns true on success
-bool DoIDNHost(const base::char16* src, int src_len, CanonOutput* output) {
+bool DoIDNHost(const char16_t* src, size_t src_len, CanonOutput* output) {
int original_output_len = output->length(); // So we can rewind below.
// We need to escape URL before doing IDN conversion, since punicode strings
@@ -200,9 +201,8 @@
// Now we check the ASCII output like a normal host. It will also handle
// unescaping. Although we unescaped everything before this function call, if
// somebody does %00 as fullwidth, ICU will convert this to ASCII.
- bool success = DoSimpleHost(wide_output.data(),
- wide_output.length(),
- output, &has_non_ascii);
+ bool success = DoSimpleHost(wide_output.data(), wide_output.length(), output,
+ &has_non_ascii);
if (has_non_ascii) {
// ICU generated something that DoSimpleHost didn't think looked like
// ASCII. This is quite rare, but ICU might convert some characters to
@@ -229,16 +229,20 @@
// 8-bit convert host to its ASCII version: this converts the UTF-8 input to
// UTF-16. The has_escaped flag should be set if the input string requires
// unescaping.
-bool DoComplexHost(const char* host, int host_len,
- bool has_non_ascii, bool has_escaped, CanonOutput* output) {
+bool DoComplexHost(const char* host,
+ size_t host_len,
+ bool has_non_ascii,
+ bool has_escaped,
+ CanonOutput* output) {
// Save the current position in the output. We may write stuff and rewind it
// below, so we need to know where to rewind to.
- int begin_length = output->length();
+ size_t begin_length = output->length();
// Points to the UTF-8 data we want to convert. This will either be the
// input or the unescaped version written to |*output| if necessary.
const char* utf8_source;
- int utf8_source_len;
+ size_t utf8_source_len;
+ bool are_all_escaped_valid = true;
if (has_escaped) {
// Unescape before converting to UTF-16 for IDN. We write this into the
// output because it most likely does not require IDNization, and we can
@@ -247,14 +251,16 @@
// unescaped input requires IDN.
if (!DoSimpleHost(host, host_len, output, &has_non_ascii)) {
// Error with some escape sequence. We'll call the current output
- // complete. DoSimpleHost will have written some "reasonable" output.
- return false;
+ // complete. DoSimpleHost will have written some "reasonable" output
+ // for the invalid escapes, but the output could be non-ASCII and
+ // needs to go through re-encoding below.
+ are_all_escaped_valid = false;
}
// Unescaping may have left us with ASCII input, in which case the
// unescaped version we wrote to output is complete.
if (!has_non_ascii) {
- return true;
+ return are_all_escaped_valid;
}
// Save the pointer into the data was just converted (it may be appended to
@@ -276,7 +282,7 @@
if (!ConvertUTF8ToUTF16(utf8_source, utf8_source_len, &utf16)) {
// In this error case, the input may or may not be the output.
StackBuffer utf8;
- for (int i = 0; i < utf8_source_len; i++)
+ for (size_t i = 0; i < utf8_source_len; i++)
utf8.push_back(utf8_source[i]);
output->set_length(begin_length);
AppendInvalidNarrowString(utf8.data(), 0, utf8.length(), output);
@@ -286,14 +292,18 @@
// This will call DoSimpleHost which will do normal ASCII canonicalization
// and also check for IP addresses in the outpt.
- return DoIDNHost(utf16.data(), utf16.length(), output);
+ return DoIDNHost(utf16.data(), utf16.length(), output) &&
+ are_all_escaped_valid;
}
// UTF-16 convert host to its ASCII version. The set up is already ready for
// the backend, so we just pass through. The has_escaped flag should be set if
// the input string requires unescaping.
-bool DoComplexHost(const base::char16* host, int host_len,
- bool has_non_ascii, bool has_escaped, CanonOutput* output) {
+bool DoComplexHost(const char16_t* host,
+ size_t host_len,
+ bool has_non_ascii,
+ bool has_escaped,
+ CanonOutput* output) {
if (has_escaped) {
// Yikes, we have escaped characters with wide input. The escaped
// characters should be interpreted as UTF-8. To solve this problem,
@@ -311,8 +321,8 @@
// Once we convert to UTF-8, we can use the 8-bit version of the complex
// host handling code above.
- return DoComplexHost(utf8.data(), utf8.length(), has_non_ascii,
- has_escaped, output);
+ return DoComplexHost(utf8.data(), utf8.length(), has_non_ascii, has_escaped,
+ output);
}
// No unescaping necessary, we can safely pass the input to ICU. This
@@ -326,16 +336,18 @@
bool DoHostSubstring(const CHAR* spec,
const Component& host,
CanonOutput* output) {
+ DCHECK(host.is_valid());
+
bool has_non_ascii, has_escaped;
ScanHostname<CHAR, UCHAR>(spec, host, &has_non_ascii, &has_escaped);
if (has_non_ascii || has_escaped) {
- return DoComplexHost(&spec[host.begin], host.len, has_non_ascii,
- has_escaped, output);
+ return DoComplexHost(&spec[host.begin], static_cast<size_t>(host.len),
+ has_non_ascii, has_escaped, output);
}
- const bool success =
- DoSimpleHost(&spec[host.begin], host.len, output, &has_non_ascii);
+ const bool success = DoSimpleHost(
+ &spec[host.begin], static_cast<size_t>(host.len), output, &has_non_ascii);
DCHECK(!has_non_ascii);
return success;
}
@@ -345,7 +357,7 @@
const Component& host,
CanonOutput* output,
CanonHostInfo* host_info) {
- if (host.len <= 0) {
+ if (host.is_empty()) {
// Empty hosts don't need anything.
host_info->family = CanonHostInfo::NEUTRAL;
host_info->out_host = Component();
@@ -391,12 +403,12 @@
return (host_info.family != CanonHostInfo::BROKEN);
}
-bool CanonicalizeHost(const base::char16* spec,
+bool CanonicalizeHost(const char16_t* spec,
const Component& host,
CanonOutput* output,
Component* out_host) {
CanonHostInfo host_info;
- DoHost<base::char16, base::char16>(spec, host, output, &host_info);
+ DoHost<char16_t, char16_t>(spec, host, output, &host_info);
*out_host = host_info.out_host;
return (host_info.family != CanonHostInfo::BROKEN);
}
@@ -408,11 +420,11 @@
DoHost<char, unsigned char>(spec, host, output, host_info);
}
-void CanonicalizeHostVerbose(const base::char16* spec,
+void CanonicalizeHostVerbose(const char16_t* spec,
const Component& host,
CanonOutput* output,
CanonHostInfo* host_info) {
- DoHost<base::char16, base::char16>(spec, host, output, host_info);
+ DoHost<char16_t, char16_t>(spec, host, output, host_info);
}
bool CanonicalizeHostSubstring(const char* spec,
@@ -421,10 +433,10 @@
return DoHostSubstring<char, unsigned char>(spec, host, output);
}
-bool CanonicalizeHostSubstring(const base::char16* spec,
+bool CanonicalizeHostSubstring(const char16_t* spec,
const Component& host,
CanonOutput* output) {
- return DoHostSubstring<base::char16, base::char16>(spec, host, output);
+ return DoHostSubstring<char16_t, char16_t>(spec, host, output);
}
} // namespace url
diff --git a/url/url_canon_icu.cc b/url/url_canon_icu.cc
index a87235b..5adc187 100644
--- a/url/url_canon_icu.cc
+++ b/url/url_canon_icu.cc
@@ -1,15 +1,16 @@
-// Copyright 2013 The Chromium Authors. All rights reserved.
+// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// ICU-based character set converter.
+#include <stdint.h>
#include <stdlib.h>
#include <string.h>
-#include "base/logging.h"
-#include "starboard/common/string.h"
-#include "starboard/types.h"
+#include "base/check.h"
+#include "base/memory/raw_ptr.h"
+#include "base/memory/raw_ptr_exclusion.h"
#include "third_party/icu/source/common/unicode/ucnv.h"
#include "third_party/icu/source/common/unicode/ucnv_cb.h"
#include "third_party/icu/source/common/unicode/utypes.h"
@@ -67,10 +68,12 @@
}
private:
- UConverter* converter_;
+ raw_ptr<UConverter> converter_;
UConverterFromUCallback old_callback_;
- const void* old_context_;
+ // This field is not a raw_ptr<> because it was filtered by the rewriter for:
+ // #addr-of
+ RAW_PTR_EXCLUSION const void* old_context_;
};
} // namespace
@@ -81,7 +84,7 @@
ICUCharsetConverter::~ICUCharsetConverter() = default;
-void ICUCharsetConverter::ConvertFromUTF16(const base::char16* input,
+void ICUCharsetConverter::ConvertFromUTF16(const char16_t* input,
int input_len,
CanonOutput* output) {
// Install our error handler. It will be called for character that can not
diff --git a/url/url_canon_icu.h b/url/url_canon_icu.h
index 80d7953..cb5da7d 100644
--- a/url/url_canon_icu.h
+++ b/url/url_canon_icu.h
@@ -1,4 +1,4 @@
-// Copyright 2013 The Chromium Authors. All rights reserved.
+// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@@ -8,8 +8,9 @@
// ICU integration functions.
#include "base/compiler_specific.h"
+#include "base/component_export.h"
+#include "base/memory/raw_ptr.h"
#include "url/url_canon.h"
-#include "url/url_export.h"
typedef struct UConverter UConverter;
@@ -17,7 +18,7 @@
// An implementation of CharsetConverter that implementations can use to
// interface the canonicalizer with ICU's conversion routines.
-class URL_EXPORT ICUCharsetConverter : public CharsetConverter {
+class COMPONENT_EXPORT(URL) ICUCharsetConverter : public CharsetConverter {
public:
// Constructs a converter using an already-existing ICU character set
// converter. This converter is NOT owned by this object; the lifetime must
@@ -26,13 +27,13 @@
~ICUCharsetConverter() override;
- void ConvertFromUTF16(const base::char16* input,
+ void ConvertFromUTF16(const char16_t* input,
int input_len,
CanonOutput* output) override;
private:
// The ICU converter, not owned by this class.
- UConverter* converter_;
+ raw_ptr<UConverter> converter_;
};
} // namespace url
diff --git a/url/url_canon_icu_unittest.cc b/url/url_canon_icu_unittest.cc
index fa2e8bc..336da3f 100644
--- a/url/url_canon_icu_unittest.cc
+++ b/url/url_canon_icu_unittest.cc
@@ -1,14 +1,16 @@
-// Copyright 2014 The Chromium Authors. All rights reserved.
+// Copyright 2014 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#include "base/macros.h"
-#include "starboard/common/string.h"
-#include "starboard/types.h"
+#include "url/url_canon_icu.h"
+
+#include <stddef.h>
+
+#include "base/logging.h"
+#include "base/memory/raw_ptr.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "third_party/icu/source/common/unicode/ucnv.h"
#include "url/url_canon.h"
-#include "url/url_canon_icu.h"
#include "url/url_canon_stdstring.h"
#include "url/url_test_utils.h"
@@ -22,18 +24,22 @@
explicit UConvScoper(const char* charset_name) {
UErrorCode err = U_ZERO_ERROR;
converter_ = ucnv_open(charset_name, &err);
+ if (!converter_) {
+ LOG(ERROR) << "Failed to open charset " << charset_name << ": "
+ << u_errorName(err);
+ }
}
~UConvScoper() {
if (converter_)
- ucnv_close(converter_);
+ ucnv_close(converter_.ExtractAsDangling());
}
// Returns the converter object, may be NULL.
UConverter* converter() const { return converter_; }
private:
- UConverter* converter_;
+ raw_ptr<UConverter> converter_;
};
TEST(URLCanonIcuTest, ICUCharsetConverter) {
@@ -47,16 +53,14 @@
{L"\x4f60\x597d", "utf-8", "\xe4\xbd\xa0\xe5\xa5\xbd"},
// Non-BMP UTF-8.
{L"!\xd800\xdf00!", "utf-8", "!\xf0\x90\x8c\x80!"},
-#if !defined(STARBOARD)
// Big5
{L"\x4f60\x597d", "big5", "\xa7\x41\xa6\x6e"},
// Unrepresentable character in the destination set.
{L"hello\x4f60\x06de\x597dworld", "big5",
"hello\xa7\x41%26%231758%3B\xa6\x6eworld"},
-#endif
};
- for (size_t i = 0; i < arraysize(icu_cases); i++) {
+ for (size_t i = 0; i < std::size(icu_cases); i++) {
UConvScoper conv(icu_cases[i].encoding);
ASSERT_TRUE(conv.converter() != NULL);
ICUCharsetConverter converter(conv.converter());
@@ -64,7 +68,7 @@
std::string str;
StdStringCanonOutput output(&str);
- base::string16 input_str(
+ std::u16string input_str(
test_utils::TruncateWStringToUTF16(icu_cases[i].input));
int input_len = static_cast<int>(input_str.length());
converter.ConvertFromUTF16(input_str.c_str(), input_len, &output);
@@ -81,14 +85,14 @@
ICUCharsetConverter converter(conv.converter());
for (int i = static_size - 2; i <= static_size + 2; i++) {
// Make a string with the appropriate length.
- base::string16 input;
+ std::u16string input;
for (int ch = 0; ch < i; ch++)
input.push_back('a');
RawCanonOutput<static_size> output;
converter.ConvertFromUTF16(input.c_str(), static_cast<int>(input.length()),
&output);
- EXPECT_EQ(input.length(), static_cast<size_t>(output.length()));
+ EXPECT_EQ(input.length(), output.length());
}
}
@@ -101,7 +105,6 @@
} query_cases[] = {
// Regular ASCII case in some different encodings.
{"foo=bar", L"foo=bar", "utf-8", "?foo=bar"},
-#if !defined(STARBOARD)
{"foo=bar", L"foo=bar", "shift_jis", "?foo=bar"},
{"foo=bar", L"foo=bar", "gb2312", "?foo=bar"},
// Chinese input/output
@@ -113,10 +116,9 @@
// "?q=你"
{"q=Chinese\xef\xbc\xa7", L"q=Chinese\xff27", "iso-8859-1",
"?q=Chinese%26%2365319%3B"},
-#endif
};
- for (size_t i = 0; i < arraysize(query_cases); i++) {
+ for (size_t i = 0; i < std::size(query_cases); i++) {
Component out_comp;
UConvScoper conv(query_cases[i].encoding);
@@ -137,7 +139,7 @@
}
if (query_cases[i].input16) {
- base::string16 input16(
+ std::u16string input16(
test_utils::TruncateWStringToUTF16(query_cases[i].input16));
int len = static_cast<int>(input16.length());
Component in_comp(0, len);
diff --git a/url/url_canon_internal.cc b/url/url_canon_internal.cc
index 7294e35..f621920 100644
--- a/url/url_canon_internal.cc
+++ b/url/url_canon_internal.cc
@@ -1,40 +1,89 @@
-// Copyright 2013 The Chromium Authors. All rights reserved.
+// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "url/url_canon_internal.h"
-#if defined(STARBOARD)
-#include "starboard/common/string.h"
-#ifndef EINVAL
-#define EINVAL 22
-#endif
-
-#else
#include <errno.h>
+#include <stddef.h>
#include <stdlib.h>
+#ifdef __SSE2__
+#include <immintrin.h>
+#elif defined(__aarch64__)
+#include <arm_neon.h>
+#endif
#include <cstdio>
-#endif
-
#include <string>
+#include "base/bits.h"
+#include "base/numerics/safe_conversions.h"
#include "base/strings/utf_string_conversion_utils.h"
-#include "starboard/types.h"
namespace url {
namespace {
-template<typename CHAR, typename UCHAR>
-void DoAppendStringOfType(const CHAR* source, int length,
+// Find the initial segment of the given string that consists solely
+// of characters valid for CHAR_QUERY. (We can have false negatives in
+// one specific case, namely the exclamation mark 0x21, but false negatives
+// are fine, and it's not worth adding a separate test for.) This is
+// a fast path to speed up checking of very long query strings that are
+// already valid, which happen on some web pages.
+//
+// This has some startup cost to load the constants and such, so it's
+// usually not worth it for short strings.
+size_t FindInitialQuerySafeString(const char* source, size_t length) {
+#if defined(__SSE2__) || defined(__aarch64__)
+ constexpr size_t kChunkSize = 16;
+ size_t i;
+ for (i = 0; i < base::bits::AlignDown(length, kChunkSize); i += kChunkSize) {
+ char b __attribute__((vector_size(16)));
+ memcpy(&b, source + i, sizeof(b));
+
+ // Compare each element with the ranges for CHAR_QUERY
+ // (see kSharedCharTypeTable), vectorized so that it creates
+ // a mask of which elements match. For completeness, we could
+ // have had (...) | b == 0x21 here, but exclamation marks are
+ // rare and the extra test costs us some time.
+ auto mask = b >= 0x24 && b <= 0x7e && b != 0x27 && b != 0x3c && b != 0x3e;
+
+#ifdef __SSE2__
+ if (_mm_movemask_epi8(reinterpret_cast<__m128i>(mask)) != 0xffff) {
+ return i;
+ }
+#else
+ if (vminvq_u8(reinterpret_cast<uint8x16_t>(mask)) == 0) {
+ return i;
+ }
+#endif
+ }
+ return i;
+#else
+ // Need SIMD support (with fast reductions) for this to be efficient.
+ return 0;
+#endif
+}
+
+template <typename CHAR, typename UCHAR>
+void DoAppendStringOfType(const CHAR* source,
+ size_t length,
SharedCharTypes type,
CanonOutput* output) {
- for (int i = 0; i < length; i++) {
+ size_t i = 0;
+ // We only instantiate this for char, to avoid a Clang crash
+ // (and because Append() does not support converting).
+ if constexpr (sizeof(CHAR) == 1) {
+ if (type == CHAR_QUERY && length >= kMinimumLengthForSIMD) {
+ i = FindInitialQuerySafeString(source, length);
+ output->Append(source, i);
+ }
+ }
+ for (; i < length; i++) {
if (static_cast<UCHAR>(source[i]) >= 0x80) {
// ReadChar will fill the code point with kUnicodeReplacementCharacter
// when the input is invalid, which is what we want.
- unsigned code_point;
+ base_icu::UChar32 code_point;
ReadUTFChar(source, &i, length, &code_point);
AppendUTF8EscapedValue(code_point, output);
} else {
@@ -50,10 +99,12 @@
// This function assumes the input values are all contained in 8-bit,
// although it allows any type. Returns true if input is valid, false if not.
-template<typename CHAR, typename UCHAR>
-void DoAppendInvalidNarrowString(const CHAR* spec, int begin, int end,
+template <typename CHAR, typename UCHAR>
+void DoAppendInvalidNarrowString(const CHAR* spec,
+ size_t begin,
+ size_t end,
CanonOutput* output) {
- for (int i = begin; i < end; i++) {
+ for (size_t i = begin; i < end; i++) {
UCHAR uch = static_cast<UCHAR>(spec[i]);
if (uch >= 0x80) {
// Handle UTF-8/16 encodings. This call will correctly handle the error
@@ -94,7 +145,7 @@
// may get resized while we're overriding a subsequent component. Instead, the
// caller should use the beginning of the |utf8_buffer| as the string pointer
// for all components once all overrides have been prepared.
-bool PrepareUTF16OverrideComponent(const base::char16* override_source,
+bool PrepareUTF16OverrideComponent(const char16_t* override_source,
const Component& override_component,
CanonOutput* utf8_buffer,
Component* dest_component) {
@@ -107,7 +158,8 @@
// Convert to UTF-8.
dest_component->begin = utf8_buffer->length();
success = ConvertUTF16ToUTF8(&override_source[override_component.begin],
- override_component.len, utf8_buffer);
+ static_cast<size_t>(override_component.len),
+ utf8_buffer);
dest_component->len = utf8_buffer->length() - dest_component->begin;
}
}
@@ -117,6 +169,7 @@
} // namespace
// See the header file for this array's declaration.
+// clang-format off
const unsigned char kSharedCharTypeTable[0x100] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x00 - 0x0f
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x10 - 0x1f
@@ -225,6 +278,7 @@
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xe0 - 0xef
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xf0 - 0xff
};
+// clang-format on
const char kHexCharLookup[0x10] = {
'0', '1', '2', '3', '4', '5', '6', '7',
@@ -242,26 +296,26 @@
0, // 0xE0 - 0xFF
};
-const base::char16 kUnicodeReplacementCharacter = 0xfffd;
+const base_icu::UChar32 kUnicodeReplacementCharacter = 0xfffd;
-void AppendStringOfType(const char* source, int length,
+void AppendStringOfType(const char* source,
+ size_t length,
SharedCharTypes type,
CanonOutput* output) {
DoAppendStringOfType<char, unsigned char>(source, length, type, output);
}
-void AppendStringOfType(const base::char16* source, int length,
+void AppendStringOfType(const char16_t* source,
+ size_t length,
SharedCharTypes type,
CanonOutput* output) {
- DoAppendStringOfType<base::char16, base::char16>(
- source, length, type, output);
+ DoAppendStringOfType<char16_t, char16_t>(source, length, type, output);
}
-bool ReadUTFChar(const char* str, int* begin, int length,
- unsigned* code_point_out) {
- // This depends on ints and int32s being the same thing. If they're not, it
- // will fail to compile.
- // TODO(mmenke): This should probably be fixed.
+bool ReadUTFChar(const char* str,
+ size_t* begin,
+ size_t length,
+ base_icu::UChar32* code_point_out) {
if (!base::ReadUnicodeCharacter(str, length, begin, code_point_out) ||
!base::IsValidCharacter(*code_point_out)) {
*code_point_out = kUnicodeReplacementCharacter;
@@ -270,11 +324,10 @@
return true;
}
-bool ReadUTFChar(const base::char16* str, int* begin, int length,
- unsigned* code_point_out) {
- // This depends on ints and int32s being the same thing. If they're not, it
- // will fail to compile.
- // TODO(mmenke): This should probably be fixed.
+bool ReadUTFChar(const char16_t* str,
+ size_t* begin,
+ size_t length,
+ base_icu::UChar32* code_point_out) {
if (!base::ReadUnicodeCharacter(str, length, begin, code_point_out) ||
!base::IsValidCharacter(*code_point_out)) {
*code_point_out = kUnicodeReplacementCharacter;
@@ -283,33 +336,38 @@
return true;
}
-void AppendInvalidNarrowString(const char* spec, int begin, int end,
+void AppendInvalidNarrowString(const char* spec,
+ size_t begin,
+ size_t end,
CanonOutput* output) {
DoAppendInvalidNarrowString<char, unsigned char>(spec, begin, end, output);
}
-void AppendInvalidNarrowString(const base::char16* spec, int begin, int end,
+void AppendInvalidNarrowString(const char16_t* spec,
+ size_t begin,
+ size_t end,
CanonOutput* output) {
- DoAppendInvalidNarrowString<base::char16, base::char16>(
- spec, begin, end, output);
+ DoAppendInvalidNarrowString<char16_t, char16_t>(spec, begin, end, output);
}
-bool ConvertUTF16ToUTF8(const base::char16* input, int input_len,
+bool ConvertUTF16ToUTF8(const char16_t* input,
+ size_t input_len,
CanonOutput* output) {
bool success = true;
- for (int i = 0; i < input_len; i++) {
- unsigned code_point;
+ for (size_t i = 0; i < input_len; i++) {
+ base_icu::UChar32 code_point;
success &= ReadUTFChar(input, &i, input_len, &code_point);
AppendUTF8Value(code_point, output);
}
return success;
}
-bool ConvertUTF8ToUTF16(const char* input, int input_len,
- CanonOutputT<base::char16>* output) {
+bool ConvertUTF8ToUTF16(const char* input,
+ size_t input_len,
+ CanonOutputT<char16_t>* output) {
bool success = true;
- for (int i = 0; i < input_len; i++) {
- unsigned code_point;
+ for (size_t i = 0; i < input_len; i++) {
+ base_icu::UChar32 code_point;
success &= ReadUTFChar(input, &i, input_len, &code_point);
AppendUTF16Value(code_point, output);
}
@@ -324,76 +382,78 @@
const URLComponentSource<char>& repl_source = repl.sources();
const Parsed& repl_parsed = repl.components();
- DoOverrideComponent(repl_source.scheme, repl_parsed.scheme,
- &source->scheme, &parsed->scheme);
+ DoOverrideComponent(repl_source.scheme, repl_parsed.scheme, &source->scheme,
+ &parsed->scheme);
DoOverrideComponent(repl_source.username, repl_parsed.username,
&source->username, &parsed->username);
DoOverrideComponent(repl_source.password, repl_parsed.password,
&source->password, &parsed->password);
// Our host should be empty if not present, so override the default setup.
- DoOverrideComponent(repl_source.host, repl_parsed.host,
- &source->host, &parsed->host);
+ DoOverrideComponent(repl_source.host, repl_parsed.host, &source->host,
+ &parsed->host);
if (parsed->host.len == -1)
parsed->host.len = 0;
- DoOverrideComponent(repl_source.port, repl_parsed.port,
- &source->port, &parsed->port);
- DoOverrideComponent(repl_source.path, repl_parsed.path,
- &source->path, &parsed->path);
- DoOverrideComponent(repl_source.query, repl_parsed.query,
- &source->query, &parsed->query);
- DoOverrideComponent(repl_source.ref, repl_parsed.ref,
- &source->ref, &parsed->ref);
+ DoOverrideComponent(repl_source.port, repl_parsed.port, &source->port,
+ &parsed->port);
+ DoOverrideComponent(repl_source.path, repl_parsed.path, &source->path,
+ &parsed->path);
+ DoOverrideComponent(repl_source.query, repl_parsed.query, &source->query,
+ &parsed->query);
+ DoOverrideComponent(repl_source.ref, repl_parsed.ref, &source->ref,
+ &parsed->ref);
}
bool SetupUTF16OverrideComponents(const char* base,
- const Replacements<base::char16>& repl,
+ const Replacements<char16_t>& repl,
CanonOutput* utf8_buffer,
URLComponentSource<char>* source,
Parsed* parsed) {
bool success = true;
// Get the source and parsed structures of the things we are replacing.
- const URLComponentSource<base::char16>& repl_source = repl.sources();
+ const URLComponentSource<char16_t>& repl_source = repl.sources();
const Parsed& repl_parsed = repl.components();
success &= PrepareUTF16OverrideComponent(
- repl_source.scheme, repl_parsed.scheme,
- utf8_buffer, &parsed->scheme);
- success &= PrepareUTF16OverrideComponent(
- repl_source.username, repl_parsed.username,
- utf8_buffer, &parsed->username);
- success &= PrepareUTF16OverrideComponent(
- repl_source.password, repl_parsed.password,
- utf8_buffer, &parsed->password);
- success &= PrepareUTF16OverrideComponent(
- repl_source.host, repl_parsed.host,
- utf8_buffer, &parsed->host);
- success &= PrepareUTF16OverrideComponent(
- repl_source.port, repl_parsed.port,
- utf8_buffer, &parsed->port);
- success &= PrepareUTF16OverrideComponent(
- repl_source.path, repl_parsed.path,
- utf8_buffer, &parsed->path);
- success &= PrepareUTF16OverrideComponent(
- repl_source.query, repl_parsed.query,
- utf8_buffer, &parsed->query);
- success &= PrepareUTF16OverrideComponent(
- repl_source.ref, repl_parsed.ref,
- utf8_buffer, &parsed->ref);
+ repl_source.scheme, repl_parsed.scheme, utf8_buffer, &parsed->scheme);
+ success &=
+ PrepareUTF16OverrideComponent(repl_source.username, repl_parsed.username,
+ utf8_buffer, &parsed->username);
+ success &=
+ PrepareUTF16OverrideComponent(repl_source.password, repl_parsed.password,
+ utf8_buffer, &parsed->password);
+ success &= PrepareUTF16OverrideComponent(repl_source.host, repl_parsed.host,
+ utf8_buffer, &parsed->host);
+ success &= PrepareUTF16OverrideComponent(repl_source.port, repl_parsed.port,
+ utf8_buffer, &parsed->port);
+ success &= PrepareUTF16OverrideComponent(repl_source.path, repl_parsed.path,
+ utf8_buffer, &parsed->path);
+ success &= PrepareUTF16OverrideComponent(repl_source.query, repl_parsed.query,
+ utf8_buffer, &parsed->query);
+ success &= PrepareUTF16OverrideComponent(repl_source.ref, repl_parsed.ref,
+ utf8_buffer, &parsed->ref);
// PrepareUTF16OverrideComponent will not have set the data pointer since the
// buffer could be resized, invalidating the pointers. We set the data
// pointers for affected components now that the buffer is finalized.
- if (repl_source.scheme) source->scheme = utf8_buffer->data();
- if (repl_source.username) source->username = utf8_buffer->data();
- if (repl_source.password) source->password = utf8_buffer->data();
- if (repl_source.host) source->host = utf8_buffer->data();
- if (repl_source.port) source->port = utf8_buffer->data();
- if (repl_source.path) source->path = utf8_buffer->data();
- if (repl_source.query) source->query = utf8_buffer->data();
- if (repl_source.ref) source->ref = utf8_buffer->data();
+ if (repl_source.scheme)
+ source->scheme = utf8_buffer->data();
+ if (repl_source.username)
+ source->username = utf8_buffer->data();
+ if (repl_source.password)
+ source->password = utf8_buffer->data();
+ if (repl_source.host)
+ source->host = utf8_buffer->data();
+ if (repl_source.port)
+ source->port = utf8_buffer->data();
+ if (repl_source.path)
+ source->path = utf8_buffer->data();
+ if (repl_source.query)
+ source->query = utf8_buffer->data();
+ if (repl_source.ref)
+ source->ref = utf8_buffer->data();
return success;
}
@@ -417,7 +477,7 @@
return 0;
}
-int _itow_s(int value, base::char16* buffer, size_t size_in_chars, int radix) {
+int _itow_s(int value, char16_t* buffer, size_t size_in_chars, int radix) {
if (radix != 10)
return EINVAL;
@@ -431,7 +491,7 @@
}
for (int i = 0; i < written; ++i) {
- buffer[i] = static_cast<base::char16>(temp[i]);
+ buffer[i] = static_cast<char16_t>(temp[i]);
}
buffer[written] = '\0';
return 0;
diff --git a/url/url_canon_internal.h b/url/url_canon_internal.h
index 12498d8..13481f5 100644
--- a/url/url_canon_internal.h
+++ b/url/url_canon_internal.h
@@ -1,4 +1,4 @@
-// Copyright 2013 The Chromium Authors. All rights reserved.
+// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@@ -10,14 +10,12 @@
// template bloat because everything is inlined when anybody calls any of our
// functions.
-#if defined(STARBOARD)
-#include "starboard/common/string.h"
-#else
+#include <stddef.h>
#include <stdlib.h>
-#endif
-#include "base/logging.h"
-#include "starboard/types.h"
+#include "base/component_export.h"
+#include "base/notreached.h"
+#include "base/third_party/icu/icu_utf.h"
#include "url/url_canon.h"
namespace url {
@@ -79,16 +77,18 @@
// Appends the given string to the output, escaping characters that do not
// match the given |type| in SharedCharTypes.
-void AppendStringOfType(const char* source, int length,
+void AppendStringOfType(const char* source,
+ size_t length,
SharedCharTypes type,
CanonOutput* output);
-void AppendStringOfType(const base::char16* source, int length,
+void AppendStringOfType(const char16_t* source,
+ size_t length,
SharedCharTypes type,
CanonOutput* output);
// Maps the hex numerical values 0x0 to 0xf to the corresponding ASCII digit
// that will be used to represent it.
-URL_EXPORT extern const char kHexCharLookup[0x10];
+COMPONENT_EXPORT(URL) extern const char kHexCharLookup[0x10];
// This lookup table allows fast conversion between ASCII hex letters and their
// corresponding numerical value. The 8-bit range is divided up into 8
@@ -101,15 +101,15 @@
extern const char kCharToHexLookup[8];
// Assumes the input is a valid hex digit! Call IsHexChar before using this.
-inline unsigned char HexCharToValue(unsigned char c) {
+inline int HexCharToValue(unsigned char c) {
return c - kCharToHexLookup[c / 0x20];
}
// Indicates if the given character is a dot or dot equivalent, returning the
// number of characters taken by it. This will be one for a literal dot, 3 for
// an escaped dot. If the character is not a dot, this will return 0.
-template<typename CHAR>
-inline int IsDot(const CHAR* spec, int offset, int end) {
+template <typename CHAR>
+inline size_t IsDot(const CHAR* spec, size_t offset, size_t end) {
if (spec[offset] == '.') {
return 1;
} else if (spec[offset] == '%' && offset + 3 <= end &&
@@ -126,22 +126,21 @@
// required for relative URL resolving to test for scheme equality.
//
// Returns 0 if the input character is not a valid scheme character.
-char CanonicalSchemeChar(base::char16 ch);
+char CanonicalSchemeChar(char16_t ch);
// Write a single character, escaped, to the output. This always escapes: it
// does no checking that thee character requires escaping.
// Escaping makes sense only 8 bit chars, so code works in all cases of
// input parameters (8/16bit).
-template<typename UINCHAR, typename OUTCHAR>
-inline void AppendEscapedChar(UINCHAR ch,
- CanonOutputT<OUTCHAR>* output) {
+template <typename UINCHAR, typename OUTCHAR>
+inline void AppendEscapedChar(UINCHAR ch, CanonOutputT<OUTCHAR>* output) {
output->push_back('%');
- output->push_back(kHexCharLookup[(ch >> 4) & 0xf]);
- output->push_back(kHexCharLookup[ch & 0xf]);
+ output->push_back(static_cast<OUTCHAR>(kHexCharLookup[(ch >> 4) & 0xf]));
+ output->push_back(static_cast<OUTCHAR>(kHexCharLookup[ch & 0xf]));
}
// The character we'll substitute for undecodable or invalid characters.
-extern const base::char16 kUnicodeReplacementCharacter;
+extern const base_icu::UChar32 kUnicodeReplacementCharacter;
// UTF-8 functions ------------------------------------------------------------
@@ -153,8 +152,11 @@
// |*begin| will be updated to point to the last character consumed so it
// can be incremented in a loop and will be ready for the next character.
// (for a single-byte ASCII character, it will not be changed).
-URL_EXPORT bool ReadUTFChar(const char* str, int* begin, int length,
- unsigned* code_point_out);
+COMPONENT_EXPORT(URL)
+bool ReadUTFChar(const char* str,
+ size_t* begin,
+ size_t length,
+ base_icu::UChar32* code_point_out);
// Generic To-UTF-8 converter. This will call the given append method for each
// character that should be appended, with the given output method. Wrappers
@@ -162,37 +164,30 @@
//
// The char_value must have already been checked that it's a valid Unicode
// character.
-template<class Output, void Appender(unsigned char, Output*)>
-inline void DoAppendUTF8(unsigned char_value, Output* output) {
+template <class Output, void Appender(unsigned char, Output*)>
+inline void DoAppendUTF8(base_icu::UChar32 char_value, Output* output) {
+ DCHECK(char_value >= 0);
+ DCHECK(char_value <= 0x10FFFF);
if (char_value <= 0x7f) {
Appender(static_cast<unsigned char>(char_value), output);
} else if (char_value <= 0x7ff) {
// 110xxxxx 10xxxxxx
- Appender(static_cast<unsigned char>(0xC0 | (char_value >> 6)),
- output);
- Appender(static_cast<unsigned char>(0x80 | (char_value & 0x3f)),
- output);
+ Appender(static_cast<unsigned char>(0xC0 | (char_value >> 6)), output);
+ Appender(static_cast<unsigned char>(0x80 | (char_value & 0x3f)), output);
} else if (char_value <= 0xffff) {
// 1110xxxx 10xxxxxx 10xxxxxx
- Appender(static_cast<unsigned char>(0xe0 | (char_value >> 12)),
- output);
+ Appender(static_cast<unsigned char>(0xe0 | (char_value >> 12)), output);
Appender(static_cast<unsigned char>(0x80 | ((char_value >> 6) & 0x3f)),
output);
- Appender(static_cast<unsigned char>(0x80 | (char_value & 0x3f)),
- output);
- } else if (char_value <= 0x10FFFF) { // Max Unicode code point.
+ Appender(static_cast<unsigned char>(0x80 | (char_value & 0x3f)), output);
+ } else {
// 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
- Appender(static_cast<unsigned char>(0xf0 | (char_value >> 18)),
- output);
+ Appender(static_cast<unsigned char>(0xf0 | (char_value >> 18)), output);
Appender(static_cast<unsigned char>(0x80 | ((char_value >> 12) & 0x3f)),
output);
Appender(static_cast<unsigned char>(0x80 | ((char_value >> 6) & 0x3f)),
output);
- Appender(static_cast<unsigned char>(0x80 | (char_value & 0x3f)),
- output);
- } else {
- // Invalid UTF-8 character (>20 bits).
- NOTREACHED();
+ Appender(static_cast<unsigned char>(0x80 | (char_value & 0x3f)), output);
}
}
@@ -206,7 +201,7 @@
// Writes the given character to the output as UTF-8. This does NO checking
// of the validity of the Unicode characters; the caller should ensure that
// the value it is appending is valid to append.
-inline void AppendUTF8Value(unsigned char_value, CanonOutput* output) {
+inline void AppendUTF8Value(base_icu::UChar32 char_value, CanonOutput* output) {
DoAppendUTF8<CanonOutput, AppendCharToOutput>(char_value, output);
}
@@ -214,7 +209,8 @@
// characters (even when they are ASCII). This does NO checking of the
// validity of the Unicode characters; the caller should ensure that the value
// it is appending is valid to append.
-inline void AppendUTF8EscapedValue(unsigned char_value, CanonOutput* output) {
+inline void AppendUTF8EscapedValue(base_icu::UChar32 char_value,
+ CanonOutput* output) {
DoAppendUTF8<CanonOutput, AppendEscapedChar>(char_value, output);
}
@@ -228,17 +224,20 @@
// |*begin| will be updated to point to the last character consumed so it
// can be incremented in a loop and will be ready for the next character.
// (for a single-16-bit-word character, it will not be changed).
-URL_EXPORT bool ReadUTFChar(const base::char16* str, int* begin, int length,
- unsigned* code_point_out);
+COMPONENT_EXPORT(URL)
+bool ReadUTFChar(const char16_t* str,
+ size_t* begin,
+ size_t length,
+ base_icu::UChar32* code_point_out);
// Equivalent to U16_APPEND_UNSAFE in ICU but uses our output method.
-inline void AppendUTF16Value(unsigned code_point,
- CanonOutputT<base::char16>* output) {
+inline void AppendUTF16Value(base_icu::UChar32 code_point,
+ CanonOutputT<char16_t>* output) {
if (code_point > 0xffff) {
- output->push_back(static_cast<base::char16>((code_point >> 10) + 0xd7c0));
- output->push_back(static_cast<base::char16>((code_point & 0x3ff) | 0xdc00));
+ output->push_back(static_cast<char16_t>((code_point >> 10) + 0xd7c0));
+ output->push_back(static_cast<char16_t>((code_point & 0x3ff) | 0xdc00));
} else {
- output->push_back(static_cast<base::char16>(code_point));
+ output->push_back(static_cast<char16_t>(code_point));
}
}
@@ -263,24 +262,28 @@
//
// Assumes that ch[begin] is within range in the array, but does not assume
// that any following characters are.
-inline bool AppendUTF8EscapedChar(const base::char16* str, int* begin,
- int length, CanonOutput* output) {
+inline bool AppendUTF8EscapedChar(const char16_t* str,
+ size_t* begin,
+ size_t length,
+ CanonOutput* output) {
// UTF-16 input. ReadUTFChar will handle invalid characters for us and give
// us the kUnicodeReplacementCharacter, so we don't have to do special
// checking after failure, just pass through the failure to the caller.
- unsigned char_value;
+ base_icu::UChar32 char_value;
bool success = ReadUTFChar(str, begin, length, &char_value);
AppendUTF8EscapedValue(char_value, output);
return success;
}
// Handles UTF-8 input. See the wide version above for usage.
-inline bool AppendUTF8EscapedChar(const char* str, int* begin, int length,
+inline bool AppendUTF8EscapedChar(const char* str,
+ size_t* begin,
+ size_t length,
CanonOutput* output) {
// ReadUTF8Char will handle invalid characters for us and give us the
// kUnicodeReplacementCharacter, so we don't have to do special checking
// after failure, just pass through the failure to the caller.
- unsigned ch;
+ base_icu::UChar32 ch;
bool success = ReadUTFChar(str, begin, length, &ch);
AppendUTF8EscapedValue(ch, output);
return success;
@@ -298,15 +301,17 @@
inline bool Is8BitChar(char c) {
return true; // this case is specialized to avoid a warning
}
-inline bool Is8BitChar(base::char16 c) {
+inline bool Is8BitChar(char16_t c) {
return c <= 255;
}
-template<typename CHAR>
-inline bool DecodeEscaped(const CHAR* spec, int* begin, int end,
+template <typename CHAR>
+inline bool DecodeEscaped(const CHAR* spec,
+ size_t* begin,
+ size_t end,
unsigned char* unescaped_value) {
- if (*begin + 3 > end ||
- !Is8BitChar(spec[*begin + 1]) || !Is8BitChar(spec[*begin + 2])) {
+ if (*begin + 3 > end || !Is8BitChar(spec[*begin + 1]) ||
+ !Is8BitChar(spec[*begin + 2])) {
// Invalid escape sequence because there's not enough room, or the
// digits are not ASCII.
return false;
@@ -320,7 +325,8 @@
}
// Valid escape sequence.
- *unescaped_value = (HexCharToValue(first) << 4) + HexCharToValue(second);
+ *unescaped_value = static_cast<unsigned char>((HexCharToValue(first) << 4) +
+ HexCharToValue(second));
*begin += 2;
return true;
}
@@ -332,9 +338,13 @@
// This is used in error cases to append invalid output so that it looks
// approximately correct. Non-error cases should not call this function since
// the escaping rules are not guaranteed!
-void AppendInvalidNarrowString(const char* spec, int begin, int end,
+void AppendInvalidNarrowString(const char* spec,
+ size_t begin,
+ size_t end,
CanonOutput* output);
-void AppendInvalidNarrowString(const base::char16* spec, int begin, int end,
+void AppendInvalidNarrowString(const char16_t* spec,
+ size_t begin,
+ size_t end,
CanonOutput* output);
// Misc canonicalization helpers ----------------------------------------------
@@ -347,14 +357,18 @@
// replacing the invalid characters with the "invalid character". It will
// return false in the failure case, and the caller should not continue as
// normal.
-URL_EXPORT bool ConvertUTF16ToUTF8(const base::char16* input, int input_len,
- CanonOutput* output);
-URL_EXPORT bool ConvertUTF8ToUTF16(const char* input, int input_len,
- CanonOutputT<base::char16>* output);
+COMPONENT_EXPORT(URL)
+bool ConvertUTF16ToUTF8(const char16_t* input,
+ size_t input_len,
+ CanonOutput* output);
+COMPONENT_EXPORT(URL)
+bool ConvertUTF8ToUTF16(const char* input,
+ size_t input_len,
+ CanonOutputT<char16_t>* output);
// Converts from UTF-16 to 8-bit using the character set converter. If the
// converter is NULL, this will use UTF-8.
-void ConvertUTF16ToQueryEncoding(const base::char16* input,
+void ConvertUTF16ToQueryEncoding(const char16_t* input,
const Component& query,
CharsetConverter* converter,
CanonOutput* output);
@@ -390,49 +404,68 @@
// although we will have still done the override with "invalid characters" in
// place of errors.
bool SetupUTF16OverrideComponents(const char* base,
- const Replacements<base::char16>& repl,
+ const Replacements<char16_t>& repl,
CanonOutput* utf8_buffer,
URLComponentSource<char>* source,
Parsed* parsed);
// Implemented in url_canon_path.cc, these are required by the relative URL
// resolver as well, so we declare them here.
-bool CanonicalizePartialPath(const char* spec,
- const Component& path,
- int path_begin_in_output,
- CanonOutput* output);
-bool CanonicalizePartialPath(const base::char16* spec,
- const Component& path,
- int path_begin_in_output,
- CanonOutput* output);
+bool CanonicalizePartialPathInternal(const char* spec,
+ const Component& path,
+ size_t path_begin_in_output,
+ CanonOutput* output);
+bool CanonicalizePartialPathInternal(const char16_t* spec,
+ const Component& path,
+ size_t path_begin_in_output,
+ CanonOutput* output);
-#if !defined(WIN32) || defined(STARBOARD)
+// Find the position of a bona fide Windows drive letter in the given path. If
+// no leading drive letter is found, -1 is returned. This function correctly
+// treats /c:/foo and /./c:/foo as having drive letters, and /def/c:/foo as not
+// having a drive letter.
+//
+// Exported for tests.
+COMPONENT_EXPORT(URL)
+int FindWindowsDriveLetter(const char* spec, int begin, int end);
+COMPONENT_EXPORT(URL)
+int FindWindowsDriveLetter(const char16_t* spec, int begin, int end);
+
+#ifndef WIN32
// Implementations of Windows' int-to-string conversions
-URL_EXPORT int _itoa_s(int value, char* buffer, size_t size_in_chars,
- int radix);
-URL_EXPORT int _itow_s(int value, base::char16* buffer, size_t size_in_chars,
- int radix);
+COMPONENT_EXPORT(URL)
+int _itoa_s(int value, char* buffer, size_t size_in_chars, int radix);
+COMPONENT_EXPORT(URL)
+int _itow_s(int value, char16_t* buffer, size_t size_in_chars, int radix);
// Secure template overloads for these functions
-template<size_t N>
+template <size_t N>
inline int _itoa_s(int value, char (&buffer)[N], int radix) {
return _itoa_s(value, buffer, N, radix);
}
-template<size_t N>
-inline int _itow_s(int value, base::char16 (&buffer)[N], int radix) {
+template <size_t N>
+inline int _itow_s(int value, char16_t (&buffer)[N], int radix) {
return _itow_s(value, buffer, N, radix);
}
// _strtoui64 and strtoull behave the same
inline unsigned long long _strtoui64(const char* nptr,
- char** endptr, int base) {
+ char** endptr,
+ int base) {
return strtoull(nptr, endptr, base);
}
#endif // WIN32
+// The threshold we set to consider SIMD processing, in bytes; there is
+// no deep theory here, it's just set empirically to a value that seems
+// to be good. (We don't really know why there's a slowdown for zero;
+// but a guess would be that there's no need in going into a complex loop
+// with a lot of setup for a five-byte string.)
+static constexpr int kMinimumLengthForSIMD = 50;
+
} // namespace url
#endif // URL_URL_CANON_INTERNAL_H_
diff --git a/url/url_canon_internal_file.h b/url/url_canon_internal_file.h
index 61b99cc..32cb840 100644
--- a/url/url_canon_internal_file.h
+++ b/url/url_canon_internal_file.h
@@ -1,4 +1,4 @@
-// Copyright 2013 The Chromium Authors. All rights reserved.
+// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
diff --git a/url/url_canon_ip.cc b/url/url_canon_ip.cc
index 23ae7fc..82ed16e 100644
--- a/url/url_canon_ip.cc
+++ b/url/url_canon_ip.cc
@@ -1,16 +1,17 @@
-// Copyright 2013 The Chromium Authors. All rights reserved.
+// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "url/url_canon_ip.h"
+#include <stdint.h>
#include <stdlib.h>
+
#include <limits>
-#include "base/logging.h"
-#include "starboard/common/string.h"
-#include "starboard/types.h"
+#include "base/check.h"
#include "url/url_canon_internal.h"
+#include "url/url_features.h"
namespace url {
@@ -31,56 +32,6 @@
}
}
-template<typename CHAR, typename UCHAR>
-bool DoFindIPv4Components(const CHAR* spec,
- const Component& host,
- Component components[4]) {
- if (!host.is_nonempty())
- return false;
-
- int cur_component = 0; // Index of the component we're working on.
- int cur_component_begin = host.begin; // Start of the current component.
- int end = host.end();
- for (int i = host.begin; /* nothing */; i++) {
- if (i >= end || spec[i] == '.') {
- // Found the end of the current component.
- int component_len = i - cur_component_begin;
- components[cur_component] = Component(cur_component_begin, component_len);
-
- // The next component starts after the dot.
- cur_component_begin = i + 1;
- cur_component++;
-
- // Don't allow empty components (two dots in a row), except we may
- // allow an empty component at the end (this would indicate that the
- // input ends in a dot). We also want to error if the component is
- // empty and it's the only component (cur_component == 1).
- if (component_len == 0 && (i < end || cur_component == 1))
- return false;
-
- if (i >= end)
- break; // End of the input.
-
- if (cur_component == 4) {
- // Anything else after the 4th component is an error unless it is a
- // dot that would otherwise be treated as the end of input.
- if (spec[i] == '.' && i + 1 == end)
- break;
- return false;
- }
- } else if (static_cast<UCHAR>(spec[i]) >= 0x80 ||
- !IsIPv4Char(static_cast<unsigned char>(spec[i]))) {
- // Invalid character for an IPv4 address.
- return false;
- }
- }
-
- // Fill in any unused components.
- while (cur_component < 4)
- components[cur_component++] = Component();
- return true;
-}
-
// Converts an IPv4 component to a 32-bit number, while checking for overflow.
//
// Possible return values:
@@ -88,13 +39,15 @@
// - BROKEN - The input was numeric, but too large for a 32-bit field.
// - NEUTRAL - Input was not numeric.
//
-// The input is assumed to be ASCII. FindIPv4Components should have stripped
-// out any input that is greater than 7 bits. The components are assumed
-// to be non-empty.
+// The input is assumed to be ASCII. The components are assumed to be non-empty.
template<typename CHAR>
CanonHostInfo::Family IPv4ComponentToNumber(const CHAR* spec,
const Component& component,
uint32_t* number) {
+ // Empty components are considered non-numeric.
+ if (component.is_empty())
+ return CanonHostInfo::NEUTRAL;
+
// Figure out the base
SharedCharTypes base;
int base_prefix_len = 0; // Size of the prefix for this base.
@@ -126,14 +79,25 @@
const int kMaxComponentLen = 16;
char buf[kMaxComponentLen + 1]; // digits + '\0'
int dest_i = 0;
+ bool may_be_broken_octal = false;
for (int i = component.begin + base_prefix_len; i < component.end(); i++) {
+ if (spec[i] >= 0x80)
+ return CanonHostInfo::NEUTRAL;
+
// We know the input is 7-bit, so convert to narrow (if this is the wide
// version of the template) by casting.
char input = static_cast<char>(spec[i]);
// Validate that this character is OK for the given base.
- if (!IsCharOfType(input, base))
- return CanonHostInfo::NEUTRAL;
+ if (!IsCharOfType(input, base)) {
+ if (IsCharOfType(input, CHAR_DEC)) {
+ // Entirely numeric components with leading 0s that aren't octal are
+ // considered broken.
+ may_be_broken_octal = true;
+ } else {
+ return CanonHostInfo::NEUTRAL;
+ }
+ }
// Fill the buffer, if there's space remaining. This check allows us to
// verify that all characters are numeric, even those that don't fit.
@@ -141,6 +105,9 @@
buf[dest_i++] = input;
}
+ if (may_be_broken_octal)
+ return CanonHostInfo::BROKEN;
+
buf[dest_i] = '\0';
// Use the 64-bit strtoi so we get a big number (no hex, decimal, or octal
@@ -157,64 +124,76 @@
}
// See declaration of IPv4AddressToNumber for documentation.
-template<typename CHAR>
+template <typename CHAR, typename UCHAR>
CanonHostInfo::Family DoIPv4AddressToNumber(const CHAR* spec,
- const Component& host,
+ Component host,
unsigned char address[4],
int* num_ipv4_components) {
- // The identified components. Not all may exist.
- Component components[4];
- if (!FindIPv4Components(spec, host, components))
+ // Ignore terminal dot, if present.
+ if (host.is_nonempty() && spec[host.end() - 1] == '.')
+ --host.len;
+
+ // Do nothing if empty.
+ if (host.is_empty())
return CanonHostInfo::NEUTRAL;
- // Convert existing components to digits. Values up to
- // |existing_components| will be valid.
+ // Read component values. The first `existing_components` of them are
+ // populated front to back, with the first one corresponding to the last
+ // component, which allows for early exit if the last component isn't a
+ // number.
uint32_t component_values[4];
int existing_components = 0;
- // Set to true if one or more components are BROKEN. BROKEN is only
- // returned if all components are IPV4 or BROKEN, so, for example,
- // 12345678912345.de returns NEUTRAL rather than broken.
- bool broken = false;
- for (int i = 0; i < 4; i++) {
- if (components[i].len <= 0)
+ int current_component_end = host.end();
+ int current_position = current_component_end;
+ while (true) {
+ // If this is not the first character of a component, go to the next
+ // component.
+ if (current_position != host.begin && spec[current_position - 1] != '.') {
+ --current_position;
continue;
- CanonHostInfo::Family family = IPv4ComponentToNumber(
- spec, components[i], &component_values[existing_components]);
-
- if (family == CanonHostInfo::BROKEN) {
- broken = true;
- } else if (family != CanonHostInfo::IPV4) {
- // Stop if we hit a non-BROKEN invalid non-empty component.
- return family;
}
- existing_components++;
+ CanonHostInfo::Family family = IPv4ComponentToNumber(
+ spec,
+ Component(current_position, current_component_end - current_position),
+ &component_values[existing_components]);
+
+ // If `family` is NEUTRAL and this is the last component, return NEUTRAL. If
+ // `family` is NEUTRAL but not the last component, this is considered a
+ // BROKEN IPv4 address, as opposed to a non-IPv4 hostname.
+ if (family == CanonHostInfo::NEUTRAL && existing_components == 0)
+ return CanonHostInfo::NEUTRAL;
+
+ if (family != CanonHostInfo::IPV4)
+ return CanonHostInfo::BROKEN;
+
+ ++existing_components;
+
+ // If this is the final component, nothing else to do.
+ if (current_position == host.begin)
+ break;
+
+ // If there are more than 4 components, fail.
+ if (existing_components == 4)
+ return CanonHostInfo::BROKEN;
+
+ current_component_end = current_position - 1;
+ --current_position;
}
- if (broken)
- return CanonHostInfo::BROKEN;
-
- // Use that sequence of numbers to fill out the 4-component IP address.
+ // Use `component_values` to fill out the 4-component IP address.
// First, process all components but the last, while making sure each fits
// within an 8-bit field.
- for (int i = 0; i < existing_components - 1; i++) {
+ for (int i = existing_components - 1; i > 0; i--) {
if (component_values[i] > std::numeric_limits<uint8_t>::max())
return CanonHostInfo::BROKEN;
- address[i] = static_cast<unsigned char>(component_values[i]);
+ address[existing_components - i - 1] =
+ static_cast<unsigned char>(component_values[i]);
}
- // Next, consume the last component to fill in the remaining bytes.
- // Work around a gcc 4.9 bug. crbug.com/392872
-#if ((__GNUC__ == 4 && __GNUC_MINOR__ >= 9) || __GNUC__ > 4)
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Warray-bounds"
-#endif
- uint32_t last_value = component_values[existing_components - 1];
-#if ((__GNUC__ == 4 && __GNUC_MINOR__ >= 9) || __GNUC__ > 4)
-#pragma GCC diagnostic pop
-#endif
+ uint32_t last_value = component_values[0];
for (int i = 3; i >= existing_components - 1; i--) {
address[i] = static_cast<unsigned char>(last_value);
last_value >>= 8;
@@ -324,7 +303,7 @@
// Zero-out the info.
parsed->reset();
- if (!host.is_nonempty())
+ if (host.is_empty())
return false;
// The index for start and end of address range (no brackets).
@@ -469,7 +448,7 @@
unsigned char address[16]) {
// Make sure the component is bounded by '[' and ']'.
int end = host.end();
- if (!host.is_nonempty() || spec[host.begin] != '[' || spec[end - 1] != ']')
+ if (host.is_empty() || spec[host.begin] != '[' || spec[end - 1] != ']')
return false;
// Exclude the square brackets.
@@ -512,13 +491,24 @@
// it to |address|.
if (ipv6_parsed.ipv4_component.is_valid()) {
// Append the 32-bit number to |address|.
- int ignored_num_ipv4_components;
+ int num_ipv4_components = 0;
+ // IPv4AddressToNumber will remove the trailing dot from the component.
+ bool trailing_dot = ipv6_parsed.ipv4_component.is_nonempty() &&
+ spec[ipv6_parsed.ipv4_component.end() - 1] == '.';
+ // The URL standard requires the embedded IPv4 address to be concisely
+ // composed of 4 parts and disallows terminal dots.
+ // See https://url.spec.whatwg.org/#concept-ipv6-parser
if (CanonHostInfo::IPV4 !=
- IPv4AddressToNumber(spec,
- ipv6_parsed.ipv4_component,
- &address[cur_index_in_address],
- &ignored_num_ipv4_components))
+ IPv4AddressToNumber(spec, ipv6_parsed.ipv4_component,
+ &address[cur_index_in_address],
+ &num_ipv4_components)) {
return false;
+ }
+ if ((num_ipv4_components != 4 || trailing_dot) &&
+ base::FeatureList::IsEnabled(
+ url::kStrictIPv4EmbeddedIPv6AddressParsing)) {
+ return false;
+ }
}
return true;
@@ -653,19 +643,6 @@
}
}
-bool FindIPv4Components(const char* spec,
- const Component& host,
- Component components[4]) {
- return DoFindIPv4Components<char, unsigned char>(spec, host, components);
-}
-
-bool FindIPv4Components(const base::char16* spec,
- const Component& host,
- Component components[4]) {
- return DoFindIPv4Components<base::char16, base::char16>(
- spec, host, components);
-}
-
void CanonicalizeIPAddress(const char* spec,
const Component& host,
CanonOutput* output,
@@ -678,15 +655,15 @@
return;
}
-void CanonicalizeIPAddress(const base::char16* spec,
+void CanonicalizeIPAddress(const char16_t* spec,
const Component& host,
CanonOutput* output,
CanonHostInfo* host_info) {
- if (DoCanonicalizeIPv4Address<base::char16, base::char16>(
- spec, host, output, host_info))
+ if (DoCanonicalizeIPv4Address<char16_t, char16_t>(spec, host, output,
+ host_info))
return;
- if (DoCanonicalizeIPv6Address<base::char16, base::char16>(
- spec, host, output, host_info))
+ if (DoCanonicalizeIPv6Address<char16_t, char16_t>(spec, host, output,
+ host_info))
return;
}
@@ -694,15 +671,16 @@
const Component& host,
unsigned char address[4],
int* num_ipv4_components) {
- return DoIPv4AddressToNumber<char>(spec, host, address, num_ipv4_components);
+ return DoIPv4AddressToNumber<char, unsigned char>(spec, host, address,
+ num_ipv4_components);
}
-CanonHostInfo::Family IPv4AddressToNumber(const base::char16* spec,
+CanonHostInfo::Family IPv4AddressToNumber(const char16_t* spec,
const Component& host,
unsigned char address[4],
int* num_ipv4_components) {
- return DoIPv4AddressToNumber<base::char16>(
- spec, host, address, num_ipv4_components);
+ return DoIPv4AddressToNumber<char16_t, char16_t>(spec, host, address,
+ num_ipv4_components);
}
bool IPv6AddressToNumber(const char* spec,
@@ -711,10 +689,10 @@
return DoIPv6AddressToNumber<char, unsigned char>(spec, host, address);
}
-bool IPv6AddressToNumber(const base::char16* spec,
+bool IPv6AddressToNumber(const char16_t* spec,
const Component& host,
unsigned char address[16]) {
- return DoIPv6AddressToNumber<base::char16, base::char16>(spec, host, address);
+ return DoIPv6AddressToNumber<char16_t, char16_t>(spec, host, address);
}
} // namespace url
diff --git a/url/url_canon_ip.h b/url/url_canon_ip.h
index 937bd46..86be08a 100644
--- a/url/url_canon_ip.h
+++ b/url/url_canon_ip.h
@@ -1,49 +1,23 @@
-// Copyright 2013 The Chromium Authors. All rights reserved.
+// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef URL_URL_CANON_IP_H_
#define URL_URL_CANON_IP_H_
-#include "base/strings/string16.h"
+#include "base/component_export.h"
#include "url/third_party/mozilla/url_parse.h"
#include "url/url_canon.h"
-#include "url/url_export.h"
namespace url {
// Writes the given IPv4 address to |output|.
-URL_EXPORT void AppendIPv4Address(const unsigned char address[4],
- CanonOutput* output);
+COMPONENT_EXPORT(URL)
+void AppendIPv4Address(const unsigned char address[4], CanonOutput* output);
// Writes the given IPv6 address to |output|.
-URL_EXPORT void AppendIPv6Address(const unsigned char address[16],
- CanonOutput* output);
-
-// Searches the host name for the portions of the IPv4 address. On success,
-// each component will be placed into |components| and it will return true.
-// It will return false if the host can not be separated as an IPv4 address
-// or if there are any non-7-bit characters or other characters that can not
-// be in an IP address. (This is important so we fail as early as possible for
-// common non-IP hostnames.)
-//
-// Not all components may exist. If there are only 3 components, for example,
-// the last one will have a length of -1 or 0 to indicate it does not exist.
-//
-// Note that many platforms' inet_addr will ignore everything after a space
-// in certain circumstances if the stuff before the space looks like an IP
-// address. IE6 is included in this. We do NOT handle this case. In many cases,
-// the browser's canonicalization will get run before this which converts
-// spaces to %20 (in the case of IE7) or rejects them (in the case of Mozilla),
-// so this code path never gets hit. Our host canonicalization will notice
-// these spaces and escape them, which will make IP address finding fail. This
-// seems like better behavior than stripping after a space.
-URL_EXPORT bool FindIPv4Components(const char* spec,
- const Component& host,
- Component components[4]);
-URL_EXPORT bool FindIPv4Components(const base::char16* spec,
- const Component& host,
- Component components[4]);
+COMPONENT_EXPORT(URL)
+void AppendIPv6Address(const unsigned char address[16], CanonOutput* output);
// Converts an IPv4 address to a 32-bit number (network byte order).
//
@@ -56,26 +30,30 @@
//
// On success, |num_ipv4_components| will be populated with the number of
// components in the IPv4 address.
-URL_EXPORT CanonHostInfo::Family IPv4AddressToNumber(const char* spec,
- const Component& host,
- unsigned char address[4],
- int* num_ipv4_components);
-URL_EXPORT CanonHostInfo::Family IPv4AddressToNumber(const base::char16* spec,
- const Component& host,
- unsigned char address[4],
- int* num_ipv4_components);
+COMPONENT_EXPORT(URL)
+CanonHostInfo::Family IPv4AddressToNumber(const char* spec,
+ const Component& host,
+ unsigned char address[4],
+ int* num_ipv4_components);
+COMPONENT_EXPORT(URL)
+CanonHostInfo::Family IPv4AddressToNumber(const char16_t* spec,
+ const Component& host,
+ unsigned char address[4],
+ int* num_ipv4_components);
// Converts an IPv6 address to a 128-bit number (network byte order), returning
// true on success. False means that the input was not a valid IPv6 address.
//
// NOTE that |host| is expected to be surrounded by square brackets.
// i.e. "[::1]" rather than "::1".
-URL_EXPORT bool IPv6AddressToNumber(const char* spec,
- const Component& host,
- unsigned char address[16]);
-URL_EXPORT bool IPv6AddressToNumber(const base::char16* spec,
- const Component& host,
- unsigned char address[16]);
+COMPONENT_EXPORT(URL)
+bool IPv6AddressToNumber(const char* spec,
+ const Component& host,
+ unsigned char address[16]);
+COMPONENT_EXPORT(URL)
+bool IPv6AddressToNumber(const char16_t* spec,
+ const Component& host,
+ unsigned char address[16]);
} // namespace url
diff --git a/url/url_canon_mailtourl.cc b/url/url_canon_mailtourl.cc
index 8a7ff1a..e48b642 100644
--- a/url/url_canon_mailtourl.cc
+++ b/url/url_canon_mailtourl.cc
@@ -1,4 +1,4 @@
-// Copyright 2013 The Chromium Authors. All rights reserved.
+// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@@ -57,8 +57,8 @@
// Copy the path using path URL's more lax escaping rules.
// We convert to UTF-8 and escape non-ASCII, but leave most
// ASCII characters alone.
- int end = parsed.path.end();
- for (int i = parsed.path.begin; i < end; ++i) {
+ size_t end = static_cast<size_t>(parsed.path.end());
+ for (size_t i = static_cast<size_t>(parsed.path.begin); i < end; ++i) {
UCHAR uch = static_cast<UCHAR>(source.path[i]);
if (ShouldEncodeMailboxCharacter<UCHAR>(uch))
success &= AppendUTF8EscapedChar(source.path, &i, end, output);
@@ -90,13 +90,13 @@
URLComponentSource<char>(spec), parsed, output, new_parsed);
}
-bool CanonicalizeMailtoURL(const base::char16* spec,
+bool CanonicalizeMailtoURL(const char16_t* spec,
int spec_len,
const Parsed& parsed,
CanonOutput* output,
Parsed* new_parsed) {
- return DoCanonicalizeMailtoURL<base::char16, base::char16>(
- URLComponentSource<base::char16>(spec), parsed, output, new_parsed);
+ return DoCanonicalizeMailtoURL<char16_t, char16_t>(
+ URLComponentSource<char16_t>(spec), parsed, output, new_parsed);
}
bool ReplaceMailtoURL(const char* base,
@@ -113,7 +113,7 @@
bool ReplaceMailtoURL(const char* base,
const Parsed& base_parsed,
- const Replacements<base::char16>& replacements,
+ const Replacements<char16_t>& replacements,
CanonOutput* output,
Parsed* new_parsed) {
RawCanonOutput<1024> utf8;
diff --git a/url/url_canon_path.cc b/url/url_canon_path.cc
index 543d15a..676468d 100644
--- a/url/url_canon_path.cc
+++ b/url/url_canon_path.cc
@@ -1,11 +1,12 @@
-// Copyright 2013 The Chromium Authors. All rights reserved.
+// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <limits.h>
-#include "base/logging.h"
-#include "starboard/types.h"
+#include "base/check.h"
+#include "base/check_op.h"
+#include "third_party/abseil-cpp/absl/types/optional.h"
#include "url/url_canon.h"
#include "url/url_canon_internal.h"
#include "url/url_parse_internal.h"
@@ -20,7 +21,8 @@
// table below more clear when neither ESCAPE or UNESCAPE is set.
PASS = 0,
- // This character requires special handling in DoPartialPath. Doing this test
+ // This character requires special handling in DoPartialPathInternal. Doing
+ // this test
// first allows us to filter out the common cases of regular characters that
// can be directly copied.
SPECIAL = 1,
@@ -100,9 +102,11 @@
// If the input is "../foo", |after_dot| = 1, |end| = 6, and
// at the end, |*consumed_len| = 2 for the "./" this function consumed. The
// original dot length should be handled by the caller.
-template<typename CHAR>
-DotDisposition ClassifyAfterDot(const CHAR* spec, int after_dot,
- int end, int* consumed_len) {
+template <typename CHAR>
+DotDisposition ClassifyAfterDot(const CHAR* spec,
+ size_t after_dot,
+ size_t end,
+ size_t* consumed_len) {
if (after_dot == end) {
// Single dot at the end.
*consumed_len = 0;
@@ -114,9 +118,9 @@
return DIRECTORY_CUR;
}
- int second_dot_len = IsDot(spec, after_dot, end);
+ size_t second_dot_len = IsDot(spec, after_dot, end);
if (second_dot_len) {
- int after_second_dot = after_dot + second_dot_len;
+ size_t after_second_dot = after_dot + second_dot_len;
if (after_second_dot == end) {
// Double dot at the end.
*consumed_len = second_dot_len;
@@ -147,19 +151,19 @@
// because it is run only on the canonical output.
//
// The output is guaranteed to end in a slash when this function completes.
-void BackUpToPreviousSlash(int path_begin_in_output,
- CanonOutput* output) {
- DCHECK(output->length() > 0);
+void BackUpToPreviousSlash(size_t path_begin_in_output, CanonOutput* output) {
+ CHECK(output->length() > 0);
+ CHECK(path_begin_in_output < output->length());
- int i = output->length() - 1;
+ size_t i = output->length() - 1;
DCHECK(output->at(i) == '/');
if (i == path_begin_in_output)
return; // We're at the first slash, nothing to do.
// Now back up (skipping the trailing slash) until we find another slash.
- i--;
- while (output->at(i) != '/' && i > path_begin_in_output)
- i--;
+ do {
+ --i;
+ } while (output->at(i) != '/' && i > path_begin_in_output);
// Now shrink the output to just include that last slash we found.
output->set_length(i + 1);
@@ -192,13 +196,13 @@
// ends with a '%' followed by one or two characters, and the '%' is the one
// pointed to by |last_invalid_percent_index|. The last character in the string
// was just unescaped.
-template<typename CHAR>
+template <typename CHAR>
void CheckForNestedEscapes(const CHAR* spec,
- int next_input_index,
- int input_len,
- int last_invalid_percent_index,
+ size_t next_input_index,
+ size_t input_len,
+ size_t last_invalid_percent_index,
CanonOutput* output) {
- const int length = output->length();
+ const size_t length = output->length();
const char last_unescaped_char = output->at(length - 1);
// If |output| currently looks like "%c", we need to try appending the next
@@ -217,7 +221,7 @@
}
// Now output ends like "%cc". Try to unescape this.
- int begin = last_invalid_percent_index;
+ size_t begin = last_invalid_percent_index;
unsigned char temp;
if (DecodeEscaped(output->data(), &begin, output->length(), &temp)) {
// New escape sequence found. Overwrite the characters following the '%'
@@ -235,10 +239,8 @@
}
}
-// Appends the given path to the output. It assumes that if the input path
-// starts with a slash, it should be copied to the output. If no path has
-// already been appended to the output (the case when not resolving
-// relative URLs), the path should begin with a slash.
+// Canonicalizes and appends the given path to the output. It assumes that if
+// the input path starts with a slash, it should be copied to the output.
//
// If there are already path components (this mode is used when appending
// relative paths for resolving), it assumes that the output already has
@@ -248,20 +250,23 @@
// We do not collapse multiple slashes in a row to a single slash. It seems
// no web browsers do this, and we don't want incompatibilities, even though
// it would be correct for most systems.
-template<typename CHAR, typename UCHAR>
-bool DoPartialPath(const CHAR* spec,
- const Component& path,
- int path_begin_in_output,
- CanonOutput* output) {
- int end = path.end();
+template <typename CHAR, typename UCHAR>
+bool DoPartialPathInternal(const CHAR* spec,
+ const Component& path,
+ size_t path_begin_in_output,
+ CanonOutput* output) {
+ if (path.is_empty())
+ return true;
+
+ size_t end = static_cast<size_t>(path.end());
// We use this variable to minimize the amount of work done when unescaping --
// we'll only call CheckForNestedEscapes() when this points at one of the last
// couple of characters in |output|.
- int last_invalid_percent_index = INT_MIN;
+ absl::optional<size_t> last_invalid_percent_index;
bool success = true;
- for (int i = path.begin; i < end; i++) {
+ for (size_t i = static_cast<size_t>(path.begin); i < end; i++) {
UCHAR uch = static_cast<UCHAR>(spec[i]);
if (sizeof(CHAR) > 1 && uch >= 0x80) {
// We only need to test wide input for having non-ASCII characters. For
@@ -276,22 +281,18 @@
unsigned char flags = kPathCharLookup[out_ch];
if (flags & SPECIAL) {
// Needs special handling of some sort.
- int dotlen;
+ size_t dotlen;
if ((dotlen = IsDot(spec, i, end)) > 0) {
- // See if this dot was preceded by a slash in the output. We
- // assume that when canonicalizing paths, they will always
- // start with a slash and not a dot, so we don't have to
- // bounds check the output.
+ // See if this dot was preceded by a slash in the output.
//
// Note that we check this in the case of dots so we don't have to
// special case slashes. Since slashes are much more common than
// dots, this actually increases performance measurably (though
// slightly).
- DCHECK(output->length() > path_begin_in_output);
if (output->length() > path_begin_in_output &&
output->at(output->length() - 1) == '/') {
// Slash followed by a dot, check to see if this is means relative
- int consumed_len;
+ size_t consumed_len;
switch (ClassifyAfterDot<CHAR>(spec, i + dotlen, end,
&consumed_len)) {
case NOT_A_DIRECTORY:
@@ -304,6 +305,9 @@
break;
case DIRECTORY_UP:
BackUpToPreviousSlash(path_begin_in_output, output);
+ if (last_invalid_percent_index >= output->length()) {
+ last_invalid_percent_index = absl::nullopt;
+ }
i += dotlen + consumed_len - 1;
break;
}
@@ -334,9 +338,12 @@
// '%' from a previously-detected invalid escape sequence, we
// might have an input string with problematic nested escape
// sequences; detect and fix them.
- if (last_invalid_percent_index >= (output->length() - 3)) {
+ if (last_invalid_percent_index.has_value() &&
+ ((last_invalid_percent_index.value() + 3) >=
+ output->length())) {
CheckForNestedEscapes(spec, i + 1, end,
- last_invalid_percent_index, output);
+ last_invalid_percent_index.value(),
+ output);
}
} else {
// Either this is an invalid escaped character, or it's a valid
@@ -378,6 +385,21 @@
return success;
}
+// Perform the same logic as in DoPartialPathInternal(), but updates the
+// publicly exposed CanonOutput structure similar to DoPath(). Returns
+// true if successful.
+template <typename CHAR, typename UCHAR>
+bool DoPartialPath(const CHAR* spec,
+ const Component& path,
+ CanonOutput* output,
+ Component* out_path) {
+ out_path->begin = output->length();
+ bool success =
+ DoPartialPathInternal<CHAR, UCHAR>(spec, path, out_path->begin, output);
+ out_path->len = output->length() - out_path->begin;
+ return success;
+}
+
template<typename CHAR, typename UCHAR>
bool DoPath(const CHAR* spec,
const Component& path,
@@ -385,7 +407,7 @@
Component* out_path) {
bool success = true;
out_path->begin = output->length();
- if (path.len > 0) {
+ if (path.is_nonempty()) {
// Write out an initial slash if the input has none. If we just parse a URL
// and then canonicalize it, it will of course have a slash already. This
// check is for the replacement and relative URL resolving cases of file
@@ -393,7 +415,8 @@
if (!IsURLSlash(spec[path.begin]))
output->push_back('/');
- success = DoPartialPath<CHAR, UCHAR>(spec, path, out_path->begin, output);
+ success =
+ DoPartialPathInternal<CHAR, UCHAR>(spec, path, out_path->begin, output);
} else {
// No input, canonical path is a slash.
output->push_back('/');
@@ -411,28 +434,41 @@
return DoPath<char, unsigned char>(spec, path, output, out_path);
}
-bool CanonicalizePath(const base::char16* spec,
+bool CanonicalizePath(const char16_t* spec,
const Component& path,
CanonOutput* output,
Component* out_path) {
- return DoPath<base::char16, base::char16>(spec, path, output, out_path);
+ return DoPath<char16_t, char16_t>(spec, path, output, out_path);
}
bool CanonicalizePartialPath(const char* spec,
const Component& path,
- int path_begin_in_output,
- CanonOutput* output) {
- return DoPartialPath<char, unsigned char>(spec, path, path_begin_in_output,
- output);
+ CanonOutput* output,
+ Component* out_path) {
+ return DoPartialPath<char, unsigned char>(spec, path, output, out_path);
}
-bool CanonicalizePartialPath(const base::char16* spec,
+bool CanonicalizePartialPath(const char16_t* spec,
const Component& path,
- int path_begin_in_output,
- CanonOutput* output) {
- return DoPartialPath<base::char16, base::char16>(spec, path,
- path_begin_in_output,
- output);
+ CanonOutput* output,
+ Component* out_path) {
+ return DoPartialPath<char16_t, char16_t>(spec, path, output, out_path);
+}
+
+bool CanonicalizePartialPathInternal(const char* spec,
+ const Component& path,
+ size_t path_begin_in_output,
+ CanonOutput* output) {
+ return DoPartialPathInternal<char, unsigned char>(
+ spec, path, path_begin_in_output, output);
+}
+
+bool CanonicalizePartialPathInternal(const char16_t* spec,
+ const Component& path,
+ size_t path_begin_in_output,
+ CanonOutput* output) {
+ return DoPartialPathInternal<char16_t, char16_t>(
+ spec, path, path_begin_in_output, output);
}
} // namespace url
diff --git a/url/url_canon_pathurl.cc b/url/url_canon_pathurl.cc
index 494fbda..85983a8 100644
--- a/url/url_canon_pathurl.cc
+++ b/url/url_canon_pathurl.cc
@@ -1,4 +1,4 @@
-// Copyright 2013 The Chromium Authors. All rights reserved.
+// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@@ -16,25 +16,27 @@
// Canonicalize the given |component| from |source| into |output| and
// |new_component|. If |separator| is non-zero, it is pre-pended to |output|
// prior to the canonicalized component; i.e. for the '?' or '#' characters.
-template<typename CHAR, typename UCHAR>
-bool DoCanonicalizePathComponent(const CHAR* source,
+template <typename CHAR, typename UCHAR>
+void DoCanonicalizePathComponent(const CHAR* source,
const Component& component,
char separator,
CanonOutput* output,
Component* new_component) {
- bool success = true;
if (component.is_valid()) {
if (separator)
output->push_back(separator);
// Copy the path using path URL's more lax escaping rules (think for
- // javascript:). We convert to UTF-8 and escape non-ASCII, but leave all
- // ASCII characters alone. This helps readability of JavaStript.
+ // javascript:). We convert to UTF-8 and escape characters from the
+ // C0 control percent-encode set, but leave all other characters alone.
+ // This helps readability of JavaScript.
+ // https://url.spec.whatwg.org/#cannot-be-a-base-url-path-state
+ // https://url.spec.whatwg.org/#c0-control-percent-encode-set
new_component->begin = output->length();
- int end = component.end();
- for (int i = component.begin; i < end; i++) {
+ size_t end = static_cast<size_t>(component.end());
+ for (size_t i = static_cast<size_t>(component.begin); i < end; i++) {
UCHAR uch = static_cast<UCHAR>(source[i]);
- if (uch < 0x20 || uch >= 0x80)
- success &= AppendUTF8EscapedChar(source, &i, end, output);
+ if (uch < 0x20 || uch > 0x7E)
+ AppendUTF8EscapedChar(source, &i, end, output);
else
output->push_back(static_cast<char>(uch));
}
@@ -43,7 +45,6 @@
// Empty part.
new_component->reset();
}
- return success;
}
template <typename CHAR, typename UCHAR>
@@ -61,14 +62,20 @@
new_parsed->password.reset();
new_parsed->host.reset();
new_parsed->port.reset();
- // We allow path URLs to have the path, query and fragment components, but we
- // will canonicalize each of the via the weaker path URL rules.
- success &= DoCanonicalizePathComponent<CHAR, UCHAR>(
- source.path, parsed.path, '\0', output, &new_parsed->path);
- success &= DoCanonicalizePathComponent<CHAR, UCHAR>(
- source.query, parsed.query, '?', output, &new_parsed->query);
- success &= DoCanonicalizePathComponent<CHAR, UCHAR>(
- source.ref, parsed.ref, '#', output, &new_parsed->ref);
+
+ // Canonicalize path via the weaker path URL rules.
+ //
+ // Note: parsing the path part should never cause a failure, see
+ // https://url.spec.whatwg.org/#cannot-be-a-base-url-path-state
+ DoCanonicalizePathComponent<CHAR, UCHAR>(source.path, parsed.path, '\0',
+ output, &new_parsed->path);
+
+ // Similar to mailto:, always use the default UTF-8 charset converter for
+ // query.
+ CanonicalizeQuery(source.query, parsed.query, nullptr, output,
+ &new_parsed->query);
+
+ CanonicalizeRef(source.ref, parsed.ref, output, &new_parsed->ref);
return success;
}
@@ -84,13 +91,29 @@
URLComponentSource<char>(spec), parsed, output, new_parsed);
}
-bool CanonicalizePathURL(const base::char16* spec,
+bool CanonicalizePathURL(const char16_t* spec,
int spec_len,
const Parsed& parsed,
CanonOutput* output,
Parsed* new_parsed) {
- return DoCanonicalizePathURL<base::char16, base::char16>(
- URLComponentSource<base::char16>(spec), parsed, output, new_parsed);
+ return DoCanonicalizePathURL<char16_t, char16_t>(
+ URLComponentSource<char16_t>(spec), parsed, output, new_parsed);
+}
+
+void CanonicalizePathURLPath(const char* source,
+ const Component& component,
+ CanonOutput* output,
+ Component* new_component) {
+ DoCanonicalizePathComponent<char, unsigned char>(source, component, '\0',
+ output, new_component);
+}
+
+void CanonicalizePathURLPath(const char16_t* source,
+ const Component& component,
+ CanonOutput* output,
+ Component* new_component) {
+ DoCanonicalizePathComponent<char16_t, char16_t>(source, component, '\0',
+ output, new_component);
}
bool ReplacePathURL(const char* base,
@@ -107,7 +130,7 @@
bool ReplacePathURL(const char* base,
const Parsed& base_parsed,
- const Replacements<base::char16>& replacements,
+ const Replacements<char16_t>& replacements,
CanonOutput* output,
Parsed* new_parsed) {
RawCanonOutput<1024> utf8;
diff --git a/url/url_canon_query.cc b/url/url_canon_query.cc
index bf59d10..47d20d1 100644
--- a/url/url_canon_query.cc
+++ b/url/url_canon_query.cc
@@ -1,4 +1,4 @@
-// Copyright 2013 The Chromium Authors. All rights reserved.
+// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@@ -39,18 +39,6 @@
namespace {
-// Returns true if the characters starting at |begin| and going until |end|
-// (non-inclusive) are all representable in 7-bits.
-template<typename CHAR, typename UCHAR>
-bool IsAllASCII(const CHAR* spec, const Component& query) {
- int end = query.end();
- for (int i = query.begin; i < end; i++) {
- if (static_cast<UCHAR>(spec[i]) >= 0x80)
- return false;
- }
- return true;
-}
-
// Appends the given string to the output, escaping characters that do not
// match the given |type| in SharedCharTypes. This version will accept 8 or 16
// bit characters, but assumes that they have only 7-bit values. It also assumes
@@ -72,45 +60,43 @@
const Component& query,
CharsetConverter* converter,
CanonOutput* output) {
+ DCHECK(query.is_valid());
// This function will replace any misencoded values with the invalid
// character. This is what we want so we don't have to check for error.
RawCanonOutputW<1024> utf16;
- ConvertUTF8ToUTF16(&spec[query.begin], query.len, &utf16);
+ ConvertUTF8ToUTF16(&spec[query.begin], static_cast<size_t>(query.len),
+ &utf16);
converter->ConvertFromUTF16(utf16.data(), utf16.length(), output);
}
// Runs the converter with the given UTF-16 input. We don't have to do
// anything, but this overridden function allows us to use the same code
// for both UTF-8 and UTF-16 input.
-void RunConverter(const base::char16* spec,
+void RunConverter(const char16_t* spec,
const Component& query,
CharsetConverter* converter,
CanonOutput* output) {
- converter->ConvertFromUTF16(&spec[query.begin], query.len, output);
+ DCHECK(query.is_valid());
+ converter->ConvertFromUTF16(&spec[query.begin],
+ static_cast<size_t>(query.len), output);
}
-template<typename CHAR, typename UCHAR>
+template <typename CHAR, typename UCHAR>
void DoConvertToQueryEncoding(const CHAR* spec,
const Component& query,
CharsetConverter* converter,
CanonOutput* output) {
- if (IsAllASCII<CHAR, UCHAR>(spec, query)) {
- // Easy: the input can just appended with no character set conversions.
- AppendRaw8BitQueryString(&spec[query.begin], query.len, output);
+ if (converter) {
+ // Run the converter to get an 8-bit string, then append it, escaping
+ // necessary values.
+ RawCanonOutput<1024> eight_bit;
+ RunConverter(spec, query, converter, &eight_bit);
+ AppendRaw8BitQueryString(eight_bit.data(), eight_bit.length(), output);
} else {
- // Harder: convert to the proper encoding first.
- if (converter) {
- // Run the converter to get an 8-bit string, then append it, escaping
- // necessary values.
- RawCanonOutput<1024> eight_bit;
- RunConverter(spec, query, converter, &eight_bit);
- AppendRaw8BitQueryString(eight_bit.data(), eight_bit.length(), output);
-
- } else {
- // No converter, do our own UTF-8 conversion.
- AppendStringOfType(&spec[query.begin], query.len, CHAR_QUERY, output);
- }
+ // No converter, do our own UTF-8 conversion.
+ AppendStringOfType(&spec[query.begin], static_cast<size_t>(query.len),
+ CHAR_QUERY, output);
}
}
@@ -120,7 +106,7 @@
CharsetConverter* converter,
CanonOutput* output,
Component* out_query) {
- if (query.len < 0) {
+ if (!query.is_valid()) {
*out_query = Component();
return;
}
@@ -144,21 +130,20 @@
output, out_query);
}
-void CanonicalizeQuery(const base::char16* spec,
+void CanonicalizeQuery(const char16_t* spec,
const Component& query,
CharsetConverter* converter,
CanonOutput* output,
Component* out_query) {
- DoCanonicalizeQuery<base::char16, base::char16>(spec, query, converter,
- output, out_query);
+ DoCanonicalizeQuery<char16_t, char16_t>(spec, query, converter, output,
+ out_query);
}
-void ConvertUTF16ToQueryEncoding(const base::char16* input,
+void ConvertUTF16ToQueryEncoding(const char16_t* input,
const Component& query,
CharsetConverter* converter,
CanonOutput* output) {
- DoConvertToQueryEncoding<base::char16, base::char16>(input, query,
- converter, output);
+ DoConvertToQueryEncoding<char16_t, char16_t>(input, query, converter, output);
}
} // namespace url
diff --git a/url/url_canon_relative.cc b/url/url_canon_relative.cc
index 7ef5ee3..d8ea528 100644
--- a/url/url_canon_relative.cc
+++ b/url/url_canon_relative.cc
@@ -1,15 +1,18 @@
-// Copyright 2013 The Chromium Authors. All rights reserved.
+// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Canonicalizer functions for working with and resolving relative URLs.
#include <algorithm>
+#include <ostream>
-#include "base/logging.h"
+#include "base/check_op.h"
+#include "base/strings/string_util.h"
#include "url/url_canon.h"
#include "url/url_canon_internal.h"
#include "url/url_constants.h"
+#include "url/url_features.h"
#include "url/url_file.h"
#include "url/url_parse_internal.h"
#include "url/url_util.h"
@@ -62,6 +65,39 @@
#endif // WIN32
+template <typename CHAR>
+bool IsValidScheme(const CHAR* url, const Component& scheme) {
+ // Caller should ensure that the |scheme| is not empty.
+ DCHECK_NE(0, scheme.len);
+
+ // From https://url.spec.whatwg.org/#scheme-start-state:
+ // scheme start state:
+ // 1. If c is an ASCII alpha, append c, lowercased, to buffer, and set
+ // state to scheme state.
+ // 2. Otherwise, if state override is not given, set state to no scheme
+ // state, and decrease pointer by one.
+ // 3. Otherwise, validation error, return failure.
+ // Note that both step 2 and step 3 mean that the scheme was not valid.
+ if (!base::IsAsciiAlpha(url[scheme.begin]))
+ return false;
+
+ // From https://url.spec.whatwg.org/#scheme-state:
+ // scheme state:
+ // 1. If c is an ASCII alphanumeric, U+002B (+), U+002D (-), or U+002E
+ // (.), append c, lowercased, to buffer.
+ // 2. Otherwise, if c is U+003A (:), then [...]
+ //
+ // We begin at |scheme.begin + 1|, because the character at |scheme.begin| has
+ // already been checked by base::IsAsciiAlpha above.
+ int scheme_end = scheme.end();
+ for (int i = scheme.begin + 1; i < scheme_end; i++) {
+ if (!CanonicalSchemeChar(url[i]))
+ return false;
+ }
+
+ return true;
+}
+
// See IsRelativeURL in the header file for usage.
template<typename CHAR>
bool DoIsRelativeURL(const char* base,
@@ -126,17 +162,19 @@
}
// If the scheme isn't valid, then it's relative.
- int scheme_end = scheme.end();
- for (int i = scheme.begin; i < scheme_end; i++) {
- if (!CanonicalSchemeChar(url[i])) {
- if (!is_base_hierarchical) {
- // Don't allow relative URLs if the base scheme doesn't support it.
- return false;
- }
- *relative_component = MakeRange(begin, url_len);
- *is_relative = true;
- return true;
+ if (!IsValidScheme(url, scheme)) {
+ if (url[begin] == '#' &&
+ base::FeatureList::IsEnabled(
+ kResolveBareFragmentWithColonOnNonHierarchical)) {
+ // |url| is a bare fragment (e.g. "#foo:bar"). This can be resolved
+ // against any base. Fall-through.
+ } else if (!is_base_hierarchical) {
+ // Don't allow relative URLs if the base scheme doesn't support it.
+ return false;
}
+ *relative_component = MakeRange(begin, url_len);
+ *is_relative = true;
+ return true;
}
// If the scheme is not the same, then we can't count it as relative.
@@ -207,7 +245,7 @@
const Component& source_component,
CanonOutput* output,
Component* output_component) {
- if (source_component.len < 0) {
+ if (!source_component.is_valid()) {
// This component is not present.
*output_component = Component();
return;
@@ -287,12 +325,11 @@
// Canonical URLs always have a path, so we can use that offset. Reserve
// enough room for the base URL, the new path, and some extra bytes for
// possible escaped characters.
- output->ReserveSizeIfNeeded(
- base_parsed.path.begin +
- std::max(path.end(), std::max(query.end(), ref.end())));
+ output->ReserveSizeIfNeeded(base_parsed.path.begin +
+ std::max({path.end(), query.end(), ref.end()}));
output->Append(base_url, base_parsed.path.begin);
- if (path.len > 0) {
+ if (path.is_nonempty()) {
// The path is replaced or modified.
int true_path_begin = output->length();
@@ -324,11 +361,11 @@
// Relative path, replace the query, and reference. We take the
// original path with the file part stripped, and append the new path.
// The canonicalizer will take care of resolving ".." and "."
- int path_begin = output->length();
+ size_t path_begin = output->length();
CopyToLastSlash(base_url, base_path_begin, base_parsed.path.end(),
output);
- success &= CanonicalizePartialPath(relative_url, path, path_begin,
- output);
+ success &= CanonicalizePartialPathInternal(relative_url, path, path_begin,
+ output);
out_parsed->path = MakeRange(path_begin, output->length());
// Copy the rest of the stuff after the path from the relative path.
@@ -461,7 +498,7 @@
// paths (even the default path of "/" is OK).
//
// We allow hosts with no length so we can handle file URLs, for example.
- if (base_parsed.path.len <= 0) {
+ if (base_parsed.path.is_empty()) {
// On error, return the input (resolving a relative URL on a non-relative
// base = the base).
int base_len = base_parsed.Length();
@@ -470,7 +507,7 @@
return false;
}
- if (relative_component.len <= 0) {
+ if (relative_component.is_empty()) {
// Empty relative URL, leave unchanged, only removing the ref component.
int base_len = base_parsed.Length();
base_len -= base_parsed.ref.len + 1;
@@ -512,10 +549,7 @@
// have a host, we want to use the special host detection logic for file
// URLs provided by DoResolveAbsoluteFile(), as opposed to the generic host
// detection logic, for consistency with parsing file URLs from scratch.
- // This also handles the special case where the URL is only slashes,
- // since that doesn't have a host part either.
- if (base_is_file &&
- (num_slashes >= 2 || num_slashes == relative_component.len)) {
+ if (base_is_file && num_slashes >= 2) {
return DoResolveAbsoluteFile(relative_url, relative_component,
query_converter, output, out_parsed);
}
@@ -550,14 +584,14 @@
bool IsRelativeURL(const char* base,
const Parsed& base_parsed,
- const base::char16* fragment,
+ const char16_t* fragment,
int fragment_len,
bool is_base_hierarchical,
bool* is_relative,
Component* relative_component) {
- return DoIsRelativeURL<base::char16>(
- base, base_parsed, fragment, fragment_len, is_base_hierarchical,
- is_relative, relative_component);
+ return DoIsRelativeURL<char16_t>(base, base_parsed, fragment, fragment_len,
+ is_base_hierarchical, is_relative,
+ relative_component);
}
bool ResolveRelativeURL(const char* base_url,
@@ -576,14 +610,14 @@
bool ResolveRelativeURL(const char* base_url,
const Parsed& base_parsed,
bool base_is_file,
- const base::char16* relative_url,
+ const char16_t* relative_url,
const Component& relative_component,
CharsetConverter* query_converter,
CanonOutput* output,
Parsed* out_parsed) {
- return DoResolveRelativeURL<base::char16>(
- base_url, base_parsed, base_is_file, relative_url,
- relative_component, query_converter, output, out_parsed);
+ return DoResolveRelativeURL<char16_t>(base_url, base_parsed, base_is_file,
+ relative_url, relative_component,
+ query_converter, output, out_parsed);
}
} // namespace url
diff --git a/url/url_canon_stdstring.cc b/url/url_canon_stdstring.cc
index c81a0a9..60e2a26 100644
--- a/url/url_canon_stdstring.cc
+++ b/url/url_canon_stdstring.cc
@@ -1,4 +1,4 @@
-// Copyright 2013 The Chromium Authors. All rights reserved.
+// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@@ -6,11 +6,10 @@
namespace url {
-StdStringCanonOutput::StdStringCanonOutput(std::string* str)
- : CanonOutput(), str_(str) {
- cur_len_ = static_cast<int>(str_->size()); // Append to existing data.
- buffer_ = str_->empty() ? NULL : &(*str_)[0];
- buffer_len_ = static_cast<int>(str_->size());
+StdStringCanonOutput::StdStringCanonOutput(std::string* str) : str_(str) {
+ cur_len_ = str_->size(); // Append to existing data.
+ buffer_ = str_->empty() ? nullptr : &(*str_)[0];
+ buffer_len_ = str_->size();
}
StdStringCanonOutput::~StdStringCanonOutput() {
@@ -22,9 +21,9 @@
buffer_len_ = cur_len_;
}
-void StdStringCanonOutput::Resize(int sz) {
+void StdStringCanonOutput::Resize(size_t sz) {
str_->resize(sz);
- buffer_ = str_->empty() ? NULL : &(*str_)[0];
+ buffer_ = str_->empty() ? nullptr : &(*str_)[0];
buffer_len_ = sz;
}
diff --git a/url/url_canon_stdstring.h b/url/url_canon_stdstring.h
index f36f3a9..528f91f 100644
--- a/url/url_canon_stdstring.h
+++ b/url/url_canon_stdstring.h
@@ -1,4 +1,4 @@
-// Copyright 2013 The Chromium Authors. All rights reserved.
+// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@@ -12,9 +12,10 @@
#include <string>
#include "base/compiler_specific.h"
+#include "base/component_export.h"
+#include "base/memory/raw_ptr_exclusion.h"
#include "base/strings/string_piece.h"
#include "url/url_canon.h"
-#include "url/url_export.h"
namespace url {
@@ -33,18 +34,24 @@
//
// Therefore, the user should call Complete() before using the string that
// this class wrote into.
-class URL_EXPORT StdStringCanonOutput : public CanonOutput {
+class COMPONENT_EXPORT(URL) StdStringCanonOutput : public CanonOutput {
public:
StdStringCanonOutput(std::string* str);
+
+ StdStringCanonOutput(const StdStringCanonOutput&) = delete;
+ StdStringCanonOutput& operator=(const StdStringCanonOutput&) = delete;
+
~StdStringCanonOutput() override;
// Must be called after writing has completed but before the string is used.
void Complete();
- void Resize(int sz) override;
+ void Resize(size_t sz) override;
protected:
- std::string* str_;
+ // `str_` is not a raw_ptr<...> for performance reasons (based on analysis of
+ // sampling profiler data and tab_search:top100:2020).
+ RAW_PTR_EXCLUSION std::string* str_;
};
// An extension of the Replacements class that allows the setters to use
@@ -52,33 +59,72 @@
//
// The contents of the StringPieces are not copied and must remain valid until
// the StringPieceReplacements object goes out of scope.
-template<typename STR>
-class StringPieceReplacements : public Replacements<typename STR::value_type> {
+//
+// In order to make it harder to misuse the API the setters do not accept rvalue
+// references to std::strings.
+// Note: Extra const char* overloads are necessary to break ambiguities that
+// would otherwise exist for char literals.
+template <typename CharT>
+class StringPieceReplacements : public Replacements<CharT> {
+ private:
+ using StringT = std::basic_string<CharT>;
+ using StringPieceT = base::BasicStringPiece<CharT>;
+ using ParentT = Replacements<CharT>;
+ using SetterFun = void (ParentT::*)(const CharT*, const Component&);
+
+ void SetImpl(SetterFun fun, StringPieceT str) {
+ (this->*fun)(str.data(), Component(0, static_cast<int>(str.size())));
+ }
+
public:
- void SetSchemeStr(const base::BasicStringPiece<STR>& s) {
- this->SetScheme(s.data(), Component(0, static_cast<int>(s.length())));
- }
- void SetUsernameStr(const base::BasicStringPiece<STR>& s) {
- this->SetUsername(s.data(), Component(0, static_cast<int>(s.length())));
- }
- void SetPasswordStr(const base::BasicStringPiece<STR>& s) {
- this->SetPassword(s.data(), Component(0, static_cast<int>(s.length())));
- }
- void SetHostStr(const base::BasicStringPiece<STR>& s) {
- this->SetHost(s.data(), Component(0, static_cast<int>(s.length())));
- }
- void SetPortStr(const base::BasicStringPiece<STR>& s) {
- this->SetPort(s.data(), Component(0, static_cast<int>(s.length())));
- }
- void SetPathStr(const base::BasicStringPiece<STR>& s) {
- this->SetPath(s.data(), Component(0, static_cast<int>(s.length())));
- }
- void SetQueryStr(const base::BasicStringPiece<STR>& s) {
- this->SetQuery(s.data(), Component(0, static_cast<int>(s.length())));
- }
- void SetRefStr(const base::BasicStringPiece<STR>& s) {
- this->SetRef(s.data(), Component(0, static_cast<int>(s.length())));
- }
+ void SetSchemeStr(const CharT* str) { SetImpl(&ParentT::SetScheme, str); }
+ void SetSchemeStr(StringPieceT str) { SetImpl(&ParentT::SetScheme, str); }
+ void SetSchemeStr(const StringT&&) = delete;
+
+ void SetUsernameStr(const CharT* str) { SetImpl(&ParentT::SetUsername, str); }
+ void SetUsernameStr(StringPieceT str) { SetImpl(&ParentT::SetUsername, str); }
+ void SetUsernameStr(const StringT&&) = delete;
+ using ParentT::ClearUsername;
+
+ void SetPasswordStr(const CharT* str) { SetImpl(&ParentT::SetPassword, str); }
+ void SetPasswordStr(StringPieceT str) { SetImpl(&ParentT::SetPassword, str); }
+ void SetPasswordStr(const StringT&&) = delete;
+ using ParentT::ClearPassword;
+
+ void SetHostStr(const CharT* str) { SetImpl(&ParentT::SetHost, str); }
+ void SetHostStr(StringPieceT str) { SetImpl(&ParentT::SetHost, str); }
+ void SetHostStr(const StringT&&) = delete;
+ using ParentT::ClearHost;
+
+ void SetPortStr(const CharT* str) { SetImpl(&ParentT::SetPort, str); }
+ void SetPortStr(StringPieceT str) { SetImpl(&ParentT::SetPort, str); }
+ void SetPortStr(const StringT&&) = delete;
+ using ParentT::ClearPort;
+
+ void SetPathStr(const CharT* str) { SetImpl(&ParentT::SetPath, str); }
+ void SetPathStr(StringPieceT str) { SetImpl(&ParentT::SetPath, str); }
+ void SetPathStr(const StringT&&) = delete;
+ using ParentT::ClearPath;
+
+ void SetQueryStr(const CharT* str) { SetImpl(&ParentT::SetQuery, str); }
+ void SetQueryStr(StringPieceT str) { SetImpl(&ParentT::SetQuery, str); }
+ void SetQueryStr(const StringT&&) = delete;
+ using ParentT::ClearQuery;
+
+ void SetRefStr(const CharT* str) { SetImpl(&ParentT::SetRef, str); }
+ void SetRefStr(StringPieceT str) { SetImpl(&ParentT::SetRef, str); }
+ void SetRefStr(const StringT&&) = delete;
+ using ParentT::ClearRef;
+
+ private:
+ using ParentT::SetHost;
+ using ParentT::SetPassword;
+ using ParentT::SetPath;
+ using ParentT::SetPort;
+ using ParentT::SetQuery;
+ using ParentT::SetRef;
+ using ParentT::SetScheme;
+ using ParentT::SetUsername;
};
} // namespace url
diff --git a/url/url_canon_stdurl.cc b/url/url_canon_stdurl.cc
index c619322..8096b56 100644
--- a/url/url_canon_stdurl.cc
+++ b/url/url_canon_stdurl.cc
@@ -1,4 +1,4 @@
-// Copyright 2013 The Chromium Authors. All rights reserved.
+// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@@ -58,7 +58,7 @@
output, &new_parsed->host);
// Host must not be empty for standard URLs.
- if (!parsed.host.is_nonempty())
+ if (parsed.host.is_empty())
success = false;
// Port: the port canonicalizer will handle the colon.
@@ -103,14 +103,20 @@
// Ref: ignore failure for this, since the page can probably still be loaded.
CanonicalizeRef(source.ref, parsed.ref, output, &new_parsed->ref);
+ // Carry over the flag for potentially dangling markup:
+ if (parsed.potentially_dangling_markup)
+ new_parsed->potentially_dangling_markup = true;
+
return success;
}
} // namespace
-
// Returns the default port for the given canonical scheme, or PORT_UNSPECIFIED
// if the scheme is unknown.
+//
+// Please keep blink::DefaultPortForProtocol and url::DefaultPortForProtocol in
+// sync.
int DefaultPortForScheme(const char* scheme, int scheme_len) {
int default_port = PORT_UNSPECIFIED;
switch (scheme_len) {
@@ -128,10 +134,6 @@
else if (!strncmp(scheme, kWssScheme, scheme_len))
default_port = 443;
break;
- case 6:
- if (!strncmp(scheme, kGopherScheme, scheme_len))
- default_port = 70;
- break;
case 2:
if (!strncmp(scheme, kWsScheme, scheme_len))
default_port = 80;
@@ -152,16 +154,16 @@
output, new_parsed);
}
-bool CanonicalizeStandardURL(const base::char16* spec,
+bool CanonicalizeStandardURL(const char16_t* spec,
int spec_len,
const Parsed& parsed,
SchemeType scheme_type,
CharsetConverter* query_converter,
CanonOutput* output,
Parsed* new_parsed) {
- return DoCanonicalizeStandardURL<base::char16, base::char16>(
- URLComponentSource<base::char16>(spec), parsed, scheme_type,
- query_converter, output, new_parsed);
+ return DoCanonicalizeStandardURL<char16_t, char16_t>(
+ URLComponentSource<char16_t>(spec), parsed, scheme_type, query_converter,
+ output, new_parsed);
}
// It might be nice in the future to optimize this so unchanged components don't
@@ -191,7 +193,7 @@
// regular code path can be used.
bool ReplaceStandardURL(const char* base,
const Parsed& base_parsed,
- const Replacements<base::char16>& replacements,
+ const Replacements<char16_t>& replacements,
SchemeType scheme_type,
CharsetConverter* query_converter,
CanonOutput* output,
diff --git a/url/url_canon_unittest.cc b/url/url_canon_unittest.cc
index 0a0567e..dee00d8 100644
--- a/url/url_canon_unittest.cc
+++ b/url/url_canon_unittest.cc
@@ -1,19 +1,21 @@
-// Copyright 2013 The Chromium Authors. All rights reserved.
+// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#include <errno.h>
+#include "url/url_canon.h"
-#include "base/macros.h"
+#include <errno.h>
+#include <stddef.h>
+
+#include "base/strings/string_piece.h"
#include "base/strings/utf_string_conversions.h"
-#include "starboard/common/string.h"
-#include "starboard/memory.h"
-#include "starboard/types.h"
+#include "base/test/gtest_util.h"
+#include "base/test/scoped_feature_list.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "url/third_party/mozilla/url_parse.h"
-#include "url/url_canon.h"
#include "url/url_canon_internal.h"
#include "url/url_canon_stdstring.h"
+#include "url/url_features.h"
#include "url/url_test_utils.h"
namespace url {
@@ -116,7 +118,7 @@
{0x10FFFF, "\xF4\x8F\xBF\xBF"},
};
std::string out_str;
- for (size_t i = 0; i < arraysize(utf_cases); i++) {
+ for (size_t i = 0; i < std::size(utf_cases); i++) {
out_str.clear();
StdStringCanonOutput output(&out_str);
AppendUTF8Value(utf_cases[i].input, &output);
@@ -125,26 +127,15 @@
}
}
-#if defined(GTEST_HAS_DEATH_TEST)
-// TODO(mattm): Can't run this in debug mode for now, since the DCHECK will
-// cause the Chromium stack trace dialog to appear and hang the test.
-// See http://crbug.com/49580.
-#if defined(NDEBUG) && !defined(DCHECK_ALWAYS_ON)
-#define MAYBE_DoAppendUTF8Invalid DoAppendUTF8Invalid
-#else
-#define MAYBE_DoAppendUTF8Invalid DISABLED_DoAppendUTF8Invalid
-#endif
-TEST(URLCanonTest, MAYBE_DoAppendUTF8Invalid) {
+TEST(URLCanonTest, DoAppendUTF8Invalid) {
std::string out_str;
StdStringCanonOutput output(&out_str);
// Invalid code point (too large).
- ASSERT_DEBUG_DEATH({
+ EXPECT_DCHECK_DEATH({
AppendUTF8Value(0x110000, &output);
output.Complete();
- EXPECT_EQ("", out_str);
- }, "");
+ });
}
-#endif // defined(GTEST_HAS_DEATH_TEST)
TEST(URLCanonTest, UTF) {
// Low-level test that we handle reading, canonicalization, and writing
@@ -156,37 +147,37 @@
const char* output;
} utf_cases[] = {
// Valid canonical input should get passed through & escaped.
- {"\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d", true, "%E4%BD%A0%E5%A5%BD"},
+ {"\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d", true, "%E4%BD%A0%E5%A5%BD"},
// Test a character that takes > 16 bits (U+10300 = old italic letter A)
- {"\xF0\x90\x8C\x80", L"\xd800\xdf00", true, "%F0%90%8C%80"},
+ {"\xF0\x90\x8C\x80", L"\xd800\xdf00", true, "%F0%90%8C%80"},
// Non-shortest-form UTF-8 characters are invalid. The bad bytes should
// each be replaced with the invalid character (EF BF DB in UTF-8).
- {"\xf0\x84\xbd\xa0\xe5\xa5\xbd", NULL, false,
- "%EF%BF%BD%EF%BF%BD%EF%BF%BD%EF%BF%BD%E5%A5%BD"},
+ {"\xf0\x84\xbd\xa0\xe5\xa5\xbd", nullptr, false,
+ "%EF%BF%BD%EF%BF%BD%EF%BF%BD%EF%BF%BD%E5%A5%BD"},
// Invalid UTF-8 sequences should be marked as invalid (the first
// sequence is truncated).
- {"\xe4\xa0\xe5\xa5\xbd", L"\xd800\x597d", false, "%EF%BF%BD%E5%A5%BD"},
+ {"\xe4\xa0\xe5\xa5\xbd", L"\xd800\x597d", false, "%EF%BF%BD%E5%A5%BD"},
// Character going off the end.
- {"\xe4\xbd\xa0\xe5\xa5", L"\x4f60\xd800", false, "%E4%BD%A0%EF%BF%BD"},
+ {"\xe4\xbd\xa0\xe5\xa5", L"\x4f60\xd800", false, "%E4%BD%A0%EF%BF%BD"},
// ...same with low surrogates with no high surrogate.
- {nullptr, L"\xdc00", false, "%EF%BF%BD"},
+ {nullptr, L"\xdc00", false, "%EF%BF%BD"},
// Test a UTF-8 encoded surrogate value is marked as invalid.
// ED A0 80 = U+D800
- {"\xed\xa0\x80", NULL, false, "%EF%BF%BD%EF%BF%BD%EF%BF%BD"},
+ {"\xed\xa0\x80", nullptr, false, "%EF%BF%BD%EF%BF%BD%EF%BF%BD"},
// ...even when paired.
- {"\xed\xa0\x80\xed\xb0\x80", nullptr, false,
- "%EF%BF%BD%EF%BF%BD%EF%BF%BD%EF%BF%BD%EF%BF%BD%EF%BF%BD"},
+ {"\xed\xa0\x80\xed\xb0\x80", nullptr, false,
+ "%EF%BF%BD%EF%BF%BD%EF%BF%BD%EF%BF%BD%EF%BF%BD%EF%BF%BD"},
};
std::string out_str;
- for (size_t i = 0; i < arraysize(utf_cases); i++) {
+ for (size_t i = 0; i < std::size(utf_cases); i++) {
if (utf_cases[i].input8) {
out_str.clear();
StdStringCanonOutput output(&out_str);
- int input_len = static_cast<int>(strlen(utf_cases[i].input8));
+ size_t input_len = strlen(utf_cases[i].input8);
bool success = true;
- for (int ch = 0; ch < input_len; ch++) {
+ for (size_t ch = 0; ch < input_len; ch++) {
success &= AppendUTF8EscapedChar(utf_cases[i].input8, &ch, input_len,
&output);
}
@@ -198,11 +189,11 @@
out_str.clear();
StdStringCanonOutput output(&out_str);
- base::string16 input_str(
+ std::u16string input_str(
test_utils::TruncateWStringToUTF16(utf_cases[i].input16));
- int input_len = static_cast<int>(input_str.length());
+ size_t input_len = input_str.length();
bool success = true;
- for (int ch = 0; ch < input_len; ch++) {
+ for (size_t ch = 0; ch < input_len; ch++) {
success &= AppendUTF8EscapedChar(input_str.c_str(), &ch, input_len,
&output);
}
@@ -217,7 +208,7 @@
// UTF-16 -> UTF-8
std::string input8_str(utf_cases[i].input8);
- base::string16 input16_str(
+ std::u16string input16_str(
test_utils::TruncateWStringToUTF16(utf_cases[i].input16));
EXPECT_EQ(input8_str, base::UTF16ToUTF8(input16_str));
@@ -250,7 +241,7 @@
std::string out_str;
- for (size_t i = 0; i < arraysize(scheme_cases); i++) {
+ for (size_t i = 0; i < std::size(scheme_cases); i++) {
int url_len = static_cast<int>(strlen(scheme_cases[i].input));
Component in_comp(0, url_len);
Component out_comp;
@@ -270,7 +261,7 @@
out_str.clear();
StdStringCanonOutput output2(&out_str);
- base::string16 wide_input(base::UTF8ToUTF16(scheme_cases[i].input));
+ std::u16string wide_input(base::UTF8ToUTF16(scheme_cases[i].input));
in_comp.len = static_cast<int>(wide_input.length());
success = CanonicalizeScheme(wide_input.c_str(), in_comp, &output2,
&out_comp);
@@ -296,38 +287,79 @@
EXPECT_EQ(0, out_comp.len);
}
-TEST(URLCanonTest, Host) {
+// IDNA mode to use in CanonHost tests.
+enum class IDNAMode { kTransitional, kNonTransitional };
+
+class URLCanonHostTest
+ : public ::testing::Test,
+ public ::testing::WithParamInterface<IDNAMode> {
+ public:
+ URLCanonHostTest() {
+ if (GetParam() == IDNAMode::kNonTransitional) {
+ scoped_feature_list_.InitAndEnableFeature(kUseIDNA2008NonTransitional);
+ } else {
+ scoped_feature_list_.InitAndDisableFeature(kUseIDNA2008NonTransitional);
+ }
+ }
+
+ private:
+ base::test::ScopedFeatureList scoped_feature_list_;
+};
+
+INSTANTIATE_TEST_SUITE_P(All,
+ URLCanonHostTest,
+ ::testing::Values(IDNAMode::kTransitional,
+ IDNAMode::kNonTransitional));
+
+TEST_P(URLCanonHostTest, Host) {
+ bool use_idna_non_transitional = IsUsingIDNA2008NonTransitional();
+
IPAddressCase host_cases[] = {
- // Basic canonicalization, uppercase should be converted to lowercase.
- {"GoOgLe.CoM", L"GoOgLe.CoM", "google.com", Component(0, 10), CanonHostInfo::NEUTRAL, -1, ""},
+ // Basic canonicalization, uppercase should be converted to lowercase.
+ {"GoOgLe.CoM", L"GoOgLe.CoM", "google.com", Component(0, 10),
+ CanonHostInfo::NEUTRAL, -1, ""},
// Spaces and some other characters should be escaped.
- {"Goo%20 goo%7C|.com", L"Goo%20 goo%7C|.com", "goo%20%20goo%7C%7C.com", Component(0, 22), CanonHostInfo::NEUTRAL, -1, ""},
+ {"Goo%20 goo%7C|.com", L"Goo%20 goo%7C|.com", "goo%20%20goo%7C%7C.com",
+ Component(0, 22), CanonHostInfo::NEUTRAL, -1, ""},
// Exciting different types of spaces!
- {NULL, L"GOO\x00a0\x3000goo.com", "goo%20%20goo.com", Component(0, 16), CanonHostInfo::NEUTRAL, -1, ""},
+ {NULL, L"GOO\x00a0\x3000goo.com", "goo%20%20goo.com", Component(0, 16),
+ CanonHostInfo::NEUTRAL, -1, ""},
// Other types of space (no-break, zero-width, zero-width-no-break) are
// name-prepped away to nothing.
- {NULL, L"GOO\x200b\x2060\xfeffgoo.com", "googoo.com", Component(0, 10), CanonHostInfo::NEUTRAL, -1, ""},
+ {NULL, L"GOO\x200b\x2060\xfeffgoo.com", "googoo.com", Component(0, 10),
+ CanonHostInfo::NEUTRAL, -1, ""},
// Ideographic full stop (full-width period for Chinese, etc.) should be
// treated as a dot.
- {NULL, L"www.foo\x3002" L"bar.com", "www.foo.bar.com", Component(0, 15), CanonHostInfo::NEUTRAL, -1, ""},
+ {NULL,
+ L"www.foo\x3002"
+ L"bar.com",
+ "www.foo.bar.com", Component(0, 15), CanonHostInfo::NEUTRAL, -1, ""},
// Invalid unicode characters should fail...
// ...In wide input, ICU will barf and we'll end up with the input as
// escaped UTF-8 (the invalid character should be replaced with the
// replacement character).
- {"\xef\xb7\x90zyx.com", L"\xfdd0zyx.com", "%EF%BF%BDzyx.com", Component(0, 16), CanonHostInfo::BROKEN, -1, ""},
+ {"\xef\xb7\x90zyx.com", L"\xfdd0zyx.com", "%EF%BF%BDzyx.com",
+ Component(0, 16), CanonHostInfo::BROKEN, -1, ""},
// ...This is the same as previous but with with escaped.
- {"%ef%b7%90zyx.com", L"%ef%b7%90zyx.com", "%EF%BF%BDzyx.com", Component(0, 16), CanonHostInfo::BROKEN, -1, ""},
- // Test name prepping, fullwidth input should be converted to ASCII and NOT
+ {"%ef%b7%90zyx.com", L"%ef%b7%90zyx.com", "%EF%BF%BDzyx.com",
+ Component(0, 16), CanonHostInfo::BROKEN, -1, ""},
+ // Test name prepping, fullwidth input should be converted to ASCII and
+ // NOT
// IDN-ized. This is "Go" in fullwidth UTF-8/UTF-16.
- {"\xef\xbc\xa7\xef\xbd\x8f.com", L"\xff27\xff4f.com", "go.com", Component(0, 6), CanonHostInfo::NEUTRAL, -1, ""},
+ {"\xef\xbc\xa7\xef\xbd\x8f.com", L"\xff27\xff4f.com", "go.com",
+ Component(0, 6), CanonHostInfo::NEUTRAL, -1, ""},
// Test that fullwidth escaped values are properly name-prepped,
// then converted or rejected.
// ...%41 in fullwidth = 'A' (also as escaped UTF-8 input)
- {"\xef\xbc\x85\xef\xbc\x94\xef\xbc\x91.com", L"\xff05\xff14\xff11.com", "a.com", Component(0, 5), CanonHostInfo::NEUTRAL, -1, ""},
- {"%ef%bc%85%ef%bc%94%ef%bc%91.com", L"%ef%bc%85%ef%bc%94%ef%bc%91.com", "a.com", Component(0, 5), CanonHostInfo::NEUTRAL, -1, ""},
+ {"\xef\xbc\x85\xef\xbc\x94\xef\xbc\x91.com", L"\xff05\xff14\xff11.com",
+ "a.com", Component(0, 5), CanonHostInfo::NEUTRAL, -1, ""},
+ {"%ef%bc%85%ef%bc%94%ef%bc%91.com", L"%ef%bc%85%ef%bc%94%ef%bc%91.com",
+ "a.com", Component(0, 5), CanonHostInfo::NEUTRAL, -1, ""},
// ...%00 in fullwidth should fail (also as escaped UTF-8 input)
- {"\xef\xbc\x85\xef\xbc\x90\xef\xbc\x90.com", L"\xff05\xff10\xff10.com", "%00.com", Component(0, 7), CanonHostInfo::BROKEN, -1, ""},
- {"%ef%bc%85%ef%bc%90%ef%bc%90.com", L"%ef%bc%85%ef%bc%90%ef%bc%90.com", "%00.com", Component(0, 7), CanonHostInfo::BROKEN, -1, ""},
+ {"\xef\xbc\x85\xef\xbc\x90\xef\xbc\x90.com", L"\xff05\xff10\xff10.com",
+ "%00.com", Component(0, 7), CanonHostInfo::BROKEN, -1, ""},
+ {"%ef%bc%85%ef%bc%90%ef%bc%90.com", L"%ef%bc%85%ef%bc%90%ef%bc%90.com",
+ "%00.com", Component(0, 7), CanonHostInfo::BROKEN, -1, ""},
// ICU will convert weird percents into ASCII percents, but not unescape
// further. A weird percent is U+FE6A (EF B9 AA in UTF-8) which is a
// "small percent". At this point we should be within our rights to mark
@@ -335,12 +367,30 @@
// happens to allow ASCII characters (%41 = "A" -> 'a') to be unescaped
// and kept as valid, so we validate that behavior here, but this level
// of fixing the input shouldn't be seen as required. "%81" is invalid.
- {"\xef\xb9\xaa" "41.com", L"\xfe6a" L"41.com", "a.com", Component(0, 5), CanonHostInfo::NEUTRAL, -1, ""},
- {"%ef%b9%aa" "41.com", L"\xfe6a" L"41.com", "a.com", Component(0, 5), CanonHostInfo::NEUTRAL, -1, ""},
- {"\xef\xb9\xaa" "81.com", L"\xfe6a" L"81.com", "%81.com", Component(0, 7), CanonHostInfo::BROKEN, -1, ""},
- {"%ef%b9%aa" "81.com", L"\xfe6a" L"81.com", "%81.com", Component(0, 7), CanonHostInfo::BROKEN, -1, ""},
+ {"\xef\xb9\xaa"
+ "41.com",
+ L"\xfe6a"
+ L"41.com",
+ "a.com", Component(0, 5), CanonHostInfo::NEUTRAL, -1, ""},
+ {"%ef%b9%aa"
+ "41.com",
+ L"\xfe6a"
+ L"41.com",
+ "a.com", Component(0, 5), CanonHostInfo::NEUTRAL, -1, ""},
+ {"\xef\xb9\xaa"
+ "81.com",
+ L"\xfe6a"
+ L"81.com",
+ "%81.com", Component(0, 7), CanonHostInfo::BROKEN, -1, ""},
+ {"%ef%b9%aa"
+ "81.com",
+ L"\xfe6a"
+ L"81.com",
+ "%81.com", Component(0, 7), CanonHostInfo::BROKEN, -1, ""},
// Basic IDN support, UTF-8 and UTF-16 input should be converted to IDN
- {"\xe4\xbd\xa0\xe5\xa5\xbd\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d\x4f60\x597d", "xn--6qqa088eba", Component(0, 14), CanonHostInfo::NEUTRAL, -1, ""},
+ {"\xe4\xbd\xa0\xe5\xa5\xbd\xe4\xbd\xa0\xe5\xa5\xbd",
+ L"\x4f60\x597d\x4f60\x597d", "xn--6qqa088eba", Component(0, 14),
+ CanonHostInfo::NEUTRAL, -1, ""},
// See http://unicode.org/cldr/utility/idna.jsp for other
// examples/experiments and http://goo.gl/7yG11o
// for the full list of characters handled differently by
@@ -348,174 +398,211 @@
// 4 Deviation characters are mapped/ignored in UTS 46 transitional
// mechansm. UTS 46, table 4 row (g).
- // Sharp-s is mapped to 'ss' in UTS 46 and IDNA 2003.
- // Otherwise, it'd be "xn--fuball-cta.de".
- {"fu\xc3\x9f" "ball.de", L"fu\x00df" L"ball.de", "fussball.de",
- Component(0, 11), CanonHostInfo::NEUTRAL, -1, ""},
- // Final-sigma (U+03C3) is mapped to regular sigma (U+03C2).
- // Otherwise, it'd be "xn--wxaijb9b".
- {"\xcf\x83\xcf\x8c\xce\xbb\xce\xbf\xcf\x82", L"\x3c3\x3cc\x3bb\x3bf\x3c2",
- "xn--wxaikc6b", Component(0, 12),
- CanonHostInfo::NEUTRAL, -1, ""},
+ // Sharp-s is mapped to 'ss' in IDNA 2003, not in IDNA 2008 or UTF 46
+ // after transitional period.
+ // Previously, it'd be "fussball.de".
+ {"fu\xc3\x9f"
+ "ball.de",
+ L"fu\x00df"
+ L"ball.de",
+ use_idna_non_transitional ? "xn--fuball-cta.de" : "fussball.de",
+ use_idna_non_transitional ? Component(0, 17) : Component(0, 11),
+ CanonHostInfo::NEUTRAL, -1, ""},
+
+ // Final-sigma (U+03C3) was mapped to regular sigma (U+03C2).
+ // Previously, it'd be "xn--wxaikc9b".
+ {"\xcf\x83\xcf\x8c\xce\xbb\xce\xbf\xcf\x82", L"\x3c3\x3cc\x3bb\x3bf\x3c2",
+ use_idna_non_transitional ? "xn--wxaijb9b" : "xn--wxaikc6b",
+ Component(0, 12), CanonHostInfo::NEUTRAL, -1, ""},
+
// ZWNJ (U+200C) and ZWJ (U+200D) are mapped away in UTS 46 transitional
- // handling as well as in IDNA 2003.
- {"a\xe2\x80\x8c" "b\xe2\x80\x8d" "c", L"a\x200c" L"b\x200d" L"c", "abc",
- Component(0, 3), CanonHostInfo::NEUTRAL, -1, ""},
- // ZWJ between Devanagari characters is still mapped away in UTS 46
- // transitional handling. IDNA 2008 would give xn--11bo0mv54g.
- {"\xe0\xa4\x95\xe0\xa5\x8d\xe2\x80\x8d\xe0\xa4\x9c",
- L"\x915\x94d\x200d\x91c", "xn--11bo0m",
- Component(0, 10), CanonHostInfo::NEUTRAL, -1, ""},
+ // handling as well as in IDNA 2003, but not thereafter.
+ {"a\xe2\x80\x8c"
+ "b\xe2\x80\x8d"
+ "c",
+ L"a\x200c"
+ L"b\x200d"
+ L"c",
+ use_idna_non_transitional ? "xn--abc-9m0ag" : "abc",
+ use_idna_non_transitional ? Component(0, 13) : Component(0, 3),
+ CanonHostInfo::NEUTRAL, -1, ""},
+
+ // ZWJ between Devanagari characters was still mapped away in UTS 46
+ // transitional handling. IDNA 2008 gives xn--11bo0mv54g.
+ // Previously "xn--11bo0m".
+ {"\xe0\xa4\x95\xe0\xa5\x8d\xe2\x80\x8d\xe0\xa4\x9c",
+ L"\x915\x94d\x200d\x91c",
+ use_idna_non_transitional ? "xn--11bo0mv54g" : "xn--11bo0m",
+ use_idna_non_transitional ? Component(0, 14) : Component(0, 10),
+ CanonHostInfo::NEUTRAL, -1, ""},
+
// Fullwidth exclamation mark is disallowed. UTS 46, table 4, row (b)
// However, we do allow this at the moment because we don't use
// STD3 rules and canonicalize full-width ASCII to ASCII.
- {"wow\xef\xbc\x81", L"wow\xff01", "wow%21",
- Component(0, 6), CanonHostInfo::NEUTRAL, -1, ""},
+ {"wow\xef\xbc\x81", L"wow\xff01", "wow%21", Component(0, 6),
+ CanonHostInfo::NEUTRAL, -1, ""},
// U+2132 (turned capital F) is disallowed. UTS 46, table 4, row (c)
// Allowed in IDNA 2003, but the mapping changed after Unicode 3.2
- {"\xe2\x84\xb2oo", L"\x2132oo", "%E2%84%B2oo",
- Component(0, 11), CanonHostInfo::BROKEN, -1, ""},
+ {"\xe2\x84\xb2oo", L"\x2132oo", "%E2%84%B2oo", Component(0, 11),
+ CanonHostInfo::BROKEN, -1, ""},
// U+2F868 (CJK Comp) is disallowed. UTS 46, table 4, row (d)
// Allowed in IDNA 2003, but the mapping changed after Unicode 3.2
- {"\xf0\xaf\xa1\xa8\xe5\xa7\xbb.cn", L"\xd87e\xdc68\x59fb.cn",
- "%F0%AF%A1%A8%E5%A7%BB.cn",
- Component(0, 24), CanonHostInfo::BROKEN, -1, ""},
+ {"\xf0\xaf\xa1\xa8\xe5\xa7\xbb.cn", L"\xd87e\xdc68\x59fb.cn",
+ "%F0%AF%A1%A8%E5%A7%BB.cn", Component(0, 24), CanonHostInfo::BROKEN, -1,
+ ""},
// Maps uppercase letters to lower case letters. UTS 46 table 4 row (e)
- {"M\xc3\x9cNCHEN", L"M\xdcNCHEN", "xn--mnchen-3ya",
- Component(0, 14), CanonHostInfo::NEUTRAL, -1, ""},
+ {"M\xc3\x9cNCHEN", L"M\xdcNCHEN", "xn--mnchen-3ya", Component(0, 14),
+ CanonHostInfo::NEUTRAL, -1, ""},
// An already-IDNA host is not modified.
- {"xn--mnchen-3ya", L"xn--mnchen-3ya", "xn--mnchen-3ya",
- Component(0, 14), CanonHostInfo::NEUTRAL, -1, ""},
+ {"xn--mnchen-3ya", L"xn--mnchen-3ya", "xn--mnchen-3ya", Component(0, 14),
+ CanonHostInfo::NEUTRAL, -1, ""},
// Symbol/punctuations are allowed in IDNA 2003/UTS46.
// Not allowed in IDNA 2008. UTS 46 table 4 row (f).
- {"\xe2\x99\xa5ny.us", L"\x2665ny.us", "xn--ny-s0x.us",
- Component(0, 13), CanonHostInfo::NEUTRAL, -1, ""},
+ {"\xe2\x99\xa5ny.us", L"\x2665ny.us", "xn--ny-s0x.us", Component(0, 13),
+ CanonHostInfo::NEUTRAL, -1, ""},
// U+11013 is new in Unicode 6.0 and is allowed. UTS 46 table 4, row (h)
// We used to allow it because we passed through unassigned code points.
- {"\xf0\x91\x80\x93.com", L"\xd804\xdc13.com", "xn--n00d.com",
- Component(0, 12), CanonHostInfo::NEUTRAL, -1, ""},
+ {"\xf0\x91\x80\x93.com", L"\xd804\xdc13.com", "xn--n00d.com",
+ Component(0, 12), CanonHostInfo::NEUTRAL, -1, ""},
// U+0602 is disallowed in UTS46/IDNA 2008. UTS 46 table 4, row(i)
// Used to be allowed in INDA 2003.
- {"\xd8\x82.eg", L"\x602.eg", "%D8%82.eg",
- Component(0, 9), CanonHostInfo::BROKEN, -1, ""},
+ {"\xd8\x82.eg", L"\x602.eg", "%D8%82.eg", Component(0, 9),
+ CanonHostInfo::BROKEN, -1, ""},
// U+20B7 is new in Unicode 5.2 (not a part of IDNA 2003 based
// on Unicode 3.2). We did allow it in the past because we let unassigned
// code point pass. We continue to allow it even though it's a
// "punctuation and symbol" blocked in IDNA 2008.
// UTS 46 table 4, row (j)
- {"\xe2\x82\xb7.com", L"\x20b7.com", "xn--wzg.com",
- Component(0, 11), CanonHostInfo::NEUTRAL, -1, ""},
+ {"\xe2\x82\xb7.com", L"\x20b7.com", "xn--wzg.com", Component(0, 11),
+ CanonHostInfo::NEUTRAL, -1, ""},
// Maps uppercase letters to lower case letters.
// In IDNA 2003, it's allowed without case-folding
// ( xn--bc-7cb.com ) because it's not defined in Unicode 3.2
// (added in Unicode 4.1). UTS 46 table 4 row (k)
- {"bc\xc8\xba.com", L"bc\x23a.com", "xn--bc-is1a.com",
- Component(0, 15), CanonHostInfo::NEUTRAL, -1, ""},
+ {"bc\xc8\xba.com", L"bc\x23a.com", "xn--bc-is1a.com", Component(0, 15),
+ CanonHostInfo::NEUTRAL, -1, ""},
// Maps U+FF43 (Full Width Small Letter C) to 'c'.
- {"ab\xef\xbd\x83.xyz", L"ab\xff43.xyz", "abc.xyz",
- Component(0, 7), CanonHostInfo::NEUTRAL, -1, ""},
+ {"ab\xef\xbd\x83.xyz", L"ab\xff43.xyz", "abc.xyz", Component(0, 7),
+ CanonHostInfo::NEUTRAL, -1, ""},
// Maps U+1D68C (Math Monospace Small C) to 'c'.
// U+1D68C = \xD835\xDE8C in UTF-16
- {"ab\xf0\x9d\x9a\x8c.xyz", L"ab\xd835\xde8c.xyz", "abc.xyz",
- Component(0, 7), CanonHostInfo::NEUTRAL, -1, ""},
+ {"ab\xf0\x9d\x9a\x8c.xyz", L"ab\xd835\xde8c.xyz", "abc.xyz",
+ Component(0, 7), CanonHostInfo::NEUTRAL, -1, ""},
// BiDi check test
// "Divehi" in Divehi (Thaana script) ends with BidiClass=NSM.
// Disallowed in IDNA 2003 but now allowed in UTS 46/IDNA 2008.
- {"\xde\x8b\xde\xa8\xde\x88\xde\xac\xde\x80\xde\xa8",
- L"\x78b\x7a8\x788\x7ac\x780\x7a8", "xn--hqbpi0jcw",
- Component(0, 13), CanonHostInfo::NEUTRAL, -1, ""},
+ {"\xde\x8b\xde\xa8\xde\x88\xde\xac\xde\x80\xde\xa8",
+ L"\x78b\x7a8\x788\x7ac\x780\x7a8", "xn--hqbpi0jcw", Component(0, 13),
+ CanonHostInfo::NEUTRAL, -1, ""},
// Disallowed in both IDNA 2003 and 2008 with BiDi check.
// Labels starting with a RTL character cannot end with a LTR character.
- {"\xd8\xac\xd8\xa7\xd8\xb1xyz", L"\x62c\x627\x631xyz",
- "%D8%AC%D8%A7%D8%B1xyz", Component(0, 21),
- CanonHostInfo::BROKEN, -1, ""},
+ {"\xd8\xac\xd8\xa7\xd8\xb1xyz", L"\x62c\x627\x631xyz",
+ "%D8%AC%D8%A7%D8%B1xyz", Component(0, 21), CanonHostInfo::BROKEN, -1,
+ ""},
// Labels starting with a RTL character can end with BC=EN (European
// number). Disallowed in IDNA 2003 but now allowed.
- {"\xd8\xac\xd8\xa7\xd8\xb1" "2", L"\x62c\x627\x631" L"2",
- "xn--2-ymcov", Component(0, 11),
- CanonHostInfo::NEUTRAL, -1, ""},
+ {"\xd8\xac\xd8\xa7\xd8\xb1"
+ "2",
+ L"\x62c\x627\x631"
+ L"2",
+ "xn--2-ymcov", Component(0, 11), CanonHostInfo::NEUTRAL, -1, ""},
// Labels starting with a RTL character cannot have "L" characters
// even if it ends with an BC=EN. Disallowed in both IDNA 2003/2008.
- {"\xd8\xac\xd8\xa7\xd8\xb1xy2", L"\x62c\x627\x631xy2",
- "%D8%AC%D8%A7%D8%B1xy2", Component(0, 21),
- CanonHostInfo::BROKEN, -1, ""},
+ {"\xd8\xac\xd8\xa7\xd8\xb1xy2", L"\x62c\x627\x631xy2",
+ "%D8%AC%D8%A7%D8%B1xy2", Component(0, 21), CanonHostInfo::BROKEN, -1,
+ ""},
// Labels starting with a RTL character can end with BC=AN (Arabic number)
// Disallowed in IDNA 2003, but now allowed.
- {"\xd8\xac\xd8\xa7\xd8\xb1\xd9\xa2", L"\x62c\x627\x631\x662",
- "xn--mgbjq0r", Component(0, 11),
- CanonHostInfo::NEUTRAL, -1, ""},
+ {"\xd8\xac\xd8\xa7\xd8\xb1\xd9\xa2", L"\x62c\x627\x631\x662",
+ "xn--mgbjq0r", Component(0, 11), CanonHostInfo::NEUTRAL, -1, ""},
// Labels starting with a RTL character cannot have "L" characters
// even if it ends with an BC=AN (Arabic number).
// Disallowed in both IDNA 2003/2008.
- {"\xd8\xac\xd8\xa7\xd8\xb1xy\xd9\xa2", L"\x62c\x627\x631xy\x662",
- "%D8%AC%D8%A7%D8%B1xy%D9%A2", Component(0, 26),
- CanonHostInfo::BROKEN, -1, ""},
+ {"\xd8\xac\xd8\xa7\xd8\xb1xy\xd9\xa2", L"\x62c\x627\x631xy\x662",
+ "%D8%AC%D8%A7%D8%B1xy%D9%A2", Component(0, 26), CanonHostInfo::BROKEN,
+ -1, ""},
// Labels starting with a RTL character cannot mix BC=EN and BC=AN
- {"\xd8\xac\xd8\xa7\xd8\xb1xy2\xd9\xa2", L"\x62c\x627\x631xy2\x662",
- "%D8%AC%D8%A7%D8%B1xy2%D9%A2", Component(0, 27),
- CanonHostInfo::BROKEN, -1, ""},
+ {"\xd8\xac\xd8\xa7\xd8\xb1xy2\xd9\xa2", L"\x62c\x627\x631xy2\x662",
+ "%D8%AC%D8%A7%D8%B1xy2%D9%A2", Component(0, 27), CanonHostInfo::BROKEN,
+ -1, ""},
// As of Unicode 6.2, U+20CF is not assigned. We do not allow it.
- {"\xe2\x83\x8f.com", L"\x20cf.com", "%E2%83%8F.com",
- Component(0, 13), CanonHostInfo::BROKEN, -1, ""},
+ {"\xe2\x83\x8f.com", L"\x20cf.com", "%E2%83%8F.com", Component(0, 13),
+ CanonHostInfo::BROKEN, -1, ""},
// U+0080 is not allowed.
- {"\xc2\x80.com", L"\x80.com", "%C2%80.com",
- Component(0, 10), CanonHostInfo::BROKEN, -1, ""},
+ {"\xc2\x80.com", L"\x80.com", "%C2%80.com", Component(0, 10),
+ CanonHostInfo::BROKEN, -1, ""},
// Mixed UTF-8 and escaped UTF-8 (narrow case) and UTF-16 and escaped
// Mixed UTF-8 and escaped UTF-8 (narrow case) and UTF-16 and escaped
// UTF-8 (wide case). The output should be equivalent to the true wide
// character input above).
- {"%E4%BD%A0%E5%A5%BD\xe4\xbd\xa0\xe5\xa5\xbd",
- L"%E4%BD%A0%E5%A5%BD\x4f60\x597d", "xn--6qqa088eba",
- Component(0, 14), CanonHostInfo::NEUTRAL, -1, ""},
+ {"%E4%BD%A0%E5%A5%BD\xe4\xbd\xa0\xe5\xa5\xbd",
+ L"%E4%BD%A0%E5%A5%BD\x4f60\x597d", "xn--6qqa088eba", Component(0, 14),
+ CanonHostInfo::NEUTRAL, -1, ""},
// Invalid escaped characters should fail and the percents should be
// escaped.
- {"%zz%66%a", L"%zz%66%a", "%25zzf%25a", Component(0, 10),
- CanonHostInfo::BROKEN, -1, ""},
+ {"%zz%66%a", L"%zz%66%a", "%25zzf%25a", Component(0, 10),
+ CanonHostInfo::BROKEN, -1, ""},
// If we get an invalid character that has been escaped.
- {"%25", L"%25", "%25", Component(0, 3),
- CanonHostInfo::BROKEN, -1, ""},
- {"hello%00", L"hello%00", "hello%00", Component(0, 8),
- CanonHostInfo::BROKEN, -1, ""},
+ {"%25", L"%25", "%25", Component(0, 3), CanonHostInfo::BROKEN, -1, ""},
+ {"hello%00", L"hello%00", "hello%00", Component(0, 8),
+ CanonHostInfo::BROKEN, -1, ""},
// Escaped numbers should be treated like IP addresses if they are.
- {"%30%78%63%30%2e%30%32%35%30.01", L"%30%78%63%30%2e%30%32%35%30.01",
- "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 3,
- "C0A80001"},
- {"%30%78%63%30%2e%30%32%35%30.01%2e", L"%30%78%63%30%2e%30%32%35%30.01%2e",
- "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 3,
- "C0A80001"},
+ {"%30%78%63%30%2e%30%32%35%30.01", L"%30%78%63%30%2e%30%32%35%30.01",
+ "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 3, "C0A80001"},
+ {"%30%78%63%30%2e%30%32%35%30.01%2e",
+ L"%30%78%63%30%2e%30%32%35%30.01%2e", "192.168.0.1", Component(0, 11),
+ CanonHostInfo::IPV4, 3, "C0A80001"},
// Invalid escaping should trigger the regular host error handling.
- {"%3g%78%63%30%2e%30%32%35%30%2E.01", L"%3g%78%63%30%2e%30%32%35%30%2E.01", "%253gxc0.0250..01", Component(0, 17), CanonHostInfo::BROKEN, -1, ""},
+ {"%3g%78%63%30%2e%30%32%35%30%2E.01",
+ L"%3g%78%63%30%2e%30%32%35%30%2E.01", "%253gxc0.0250..01",
+ Component(0, 17), CanonHostInfo::BROKEN, -1, ""},
// Something that isn't exactly an IP should get treated as a host and
// spaces escaped.
- {"192.168.0.1 hello", L"192.168.0.1 hello", "192.168.0.1%20hello", Component(0, 19), CanonHostInfo::NEUTRAL, -1, ""},
+ {"192.168.0.1 hello", L"192.168.0.1 hello", "192.168.0.1%20hello",
+ Component(0, 19), CanonHostInfo::NEUTRAL, -1, ""},
// Fullwidth and escaped UTF-8 fullwidth should still be treated as IP.
// These are "0Xc0.0250.01" in fullwidth.
- {"\xef\xbc\x90%Ef%bc\xb8%ef%Bd%83\xef\xbc\x90%EF%BC%8E\xef\xbc\x90\xef\xbc\x92\xef\xbc\x95\xef\xbc\x90\xef\xbc%8E\xef\xbc\x90\xef\xbc\x91", L"\xff10\xff38\xff43\xff10\xff0e\xff10\xff12\xff15\xff10\xff0e\xff10\xff11", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 3, "C0A80001"},
+ {"\xef\xbc\x90%Ef%bc\xb8%ef%Bd%83\xef\xbc\x90%EF%BC%"
+ "8E\xef\xbc\x90\xef\xbc\x92\xef\xbc\x95\xef\xbc\x90\xef\xbc%"
+ "8E\xef\xbc\x90\xef\xbc\x91",
+ L"\xff10\xff38\xff43\xff10\xff0e\xff10\xff12\xff15\xff10\xff0e\xff10"
+ L"\xff11",
+ "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 3, "C0A80001"},
// Broken IP addresses get marked as such.
- {"192.168.0.257", L"192.168.0.257", "192.168.0.257", Component(0, 13), CanonHostInfo::BROKEN, -1, ""},
- {"[google.com]", L"[google.com]", "[google.com]", Component(0, 12), CanonHostInfo::BROKEN, -1, ""},
+ {"192.168.0.257", L"192.168.0.257", "192.168.0.257", Component(0, 13),
+ CanonHostInfo::BROKEN, -1, ""},
+ {"[google.com]", L"[google.com]", "[google.com]", Component(0, 12),
+ CanonHostInfo::BROKEN, -1, ""},
// Cyrillic letter followed by '(' should return punycode for '(' escaped
// before punycode string was created. I.e.
// if '(' is escaped after punycode is created we would get xn--%28-8tb
// (incorrect).
- {"\xd1\x82(", L"\x0442(", "xn--%28-7ed", Component(0, 11),
- CanonHostInfo::NEUTRAL, -1, ""},
- // Address with all hexidecimal characters with leading number of 1<<32
+ {"\xd1\x82(", L"\x0442(", "xn--%28-7ed", Component(0, 11),
+ CanonHostInfo::NEUTRAL, -1, ""},
+ // Address with all hexadecimal characters with leading number of 1<<32
// or greater and should return NEUTRAL rather than BROKEN if not all
// components are numbers.
- {"12345678912345.de", L"12345678912345.de", "12345678912345.de", Component(0, 17), CanonHostInfo::NEUTRAL, -1, ""},
- {"1.12345678912345.de", L"1.12345678912345.de", "1.12345678912345.de", Component(0, 19), CanonHostInfo::NEUTRAL, -1, ""},
- {"12345678912345.12345678912345.de", L"12345678912345.12345678912345.de", "12345678912345.12345678912345.de", Component(0, 32), CanonHostInfo::NEUTRAL, -1, ""},
- {"1.2.0xB3A73CE5B59.de", L"1.2.0xB3A73CE5B59.de", "1.2.0xb3a73ce5b59.de", Component(0, 20), CanonHostInfo::NEUTRAL, -1, ""},
- {"12345678912345.0xde", L"12345678912345.0xde", "12345678912345.0xde", Component(0, 19), CanonHostInfo::BROKEN, -1, ""},
- // A label that starts with "xn--" but contains non-ASCII characters should
- // be an error. Escape the invalid characters.
- {"xn--m\xc3\xbcnchen", L"xn--m\xfcnchen", "xn--m%C3%BCnchen", Component(0, 16), CanonHostInfo::BROKEN, -1, ""},
+ {"12345678912345.de", L"12345678912345.de", "12345678912345.de",
+ Component(0, 17), CanonHostInfo::NEUTRAL, -1, ""},
+ {"1.12345678912345.de", L"1.12345678912345.de", "1.12345678912345.de",
+ Component(0, 19), CanonHostInfo::NEUTRAL, -1, ""},
+ {"12345678912345.12345678912345.de", L"12345678912345.12345678912345.de",
+ "12345678912345.12345678912345.de", Component(0, 32),
+ CanonHostInfo::NEUTRAL, -1, ""},
+ {"1.2.0xB3A73CE5B59.de", L"1.2.0xB3A73CE5B59.de", "1.2.0xb3a73ce5b59.de",
+ Component(0, 20), CanonHostInfo::NEUTRAL, -1, ""},
+ {"12345678912345.0xde", L"12345678912345.0xde", "12345678912345.0xde",
+ Component(0, 19), CanonHostInfo::BROKEN, -1, ""},
+ // A label that starts with "xn--" but contains non-ASCII characters
+ // should
+ // be an error. Escape the invalid characters.
+ {"xn--m\xc3\xbcnchen", L"xn--m\xfcnchen", "xn--m%C3%BCnchen",
+ Component(0, 16), CanonHostInfo::BROKEN, -1, ""},
};
// CanonicalizeHost() non-verbose.
std::string out_str;
- for (size_t i = 0; i < arraysize(host_cases); i++) {
+ for (size_t i = 0; i < std::size(host_cases); i++) {
// Narrow version.
if (host_cases[i].input8) {
int host_len = static_cast<int>(strlen(host_cases[i].input8));
@@ -541,7 +628,7 @@
// Wide version.
if (host_cases[i].input16) {
- base::string16 input16(
+ std::u16string input16(
test_utils::TruncateWStringToUTF16(host_cases[i].input16));
int host_len = static_cast<int>(input16.length());
Component in_comp(0, host_len);
@@ -563,7 +650,7 @@
}
// CanonicalizeHostVerbose()
- for (size_t i = 0; i < arraysize(host_cases); i++) {
+ for (size_t i = 0; i < std::size(host_cases); i++) {
// Narrow version.
if (host_cases[i].input8) {
int host_len = static_cast<int>(strlen(host_cases[i].input8));
@@ -592,7 +679,7 @@
// Wide version.
if (host_cases[i].input16) {
- base::string16 input16(
+ std::u16string input16(
test_utils::TruncateWStringToUTF16(host_cases[i].input16));
int host_len = static_cast<int>(input16.length());
Component in_comp(0, host_len);
@@ -620,21 +707,36 @@
}
TEST(URLCanonTest, IPv4) {
+ // clang-format off
IPAddressCase cases[] = {
- // Empty is not an IP address.
+ // Empty is not an IP address.
{"", L"", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
{".", L".", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
- // Regular IP addresses in different bases.
+ // Regular IP addresses in different bases.
{"192.168.0.1", L"192.168.0.1", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 4, "C0A80001"},
{"0300.0250.00.01", L"0300.0250.00.01", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 4, "C0A80001"},
{"0xC0.0Xa8.0x0.0x1", L"0xC0.0Xa8.0x0.0x1", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 4, "C0A80001"},
- // Non-IP addresses due to invalid characters.
+ // Non-IP addresses due to invalid characters.
{"192.168.9.com", L"192.168.9.com", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
- // Invalid characters for the base should be rejected.
- {"19a.168.0.1", L"19a.168.0.1", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
- {"0308.0250.00.01", L"0308.0250.00.01", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
- {"0xCG.0xA8.0x0.0x1", L"0xCG.0xA8.0x0.0x1", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
- // If there are not enough components, the last one should fill them out.
+ // Hostnames with a numeric final component but other components that don't
+ // parse as numbers should be considered broken.
+ {"19a.168.0.1", L"19a.168.0.1", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ {"19a.168.0.1.", L"19a.168.0.1.", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ {"0308.0250.00.01", L"0308.0250.00.01", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ {"0308.0250.00.01.", L"0308.0250.00.01.", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ {"0xCG.0xA8.0x0.0x1", L"0xCG.0xA8.0x0.0x1", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ {"0xCG.0xA8.0x0.0x1.", L"0xCG.0xA8.0x0.0x1.", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ // Non-numeric terminal compeonent should be considered not IPv4 hostnames, but valid.
+ {"19.168.0.1a", L"19.168.0.1a", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+ {"0xC.0xA8.0x0.0x1G", L"0xC.0xA8.0x0.0x1G", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+ // Hostnames that would be considered broken IPv4 hostnames should be considered valid non-IPv4 hostnames if they end with two dots instead of 0 or 1.
+ {"19a.168.0.1..", L"19a.168.0.1..", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+ {"0308.0250.00.01..", L"0308.0250.00.01..", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+ {"0xCG.0xA8.0x0.0x1..", L"0xCG.0xA8.0x0.0x1..", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+ // Hosts with components that aren't considered valid IPv4 numbers but are entirely numeric should be considered invalid.
+ {"1.2.3.08", L"1.2.3.08", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ {"1.2.3.08.", L"1.2.3.08.", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ // If there are not enough components, the last one should fill them out.
{"192", L"192", "0.0.0.192", Component(0, 9), CanonHostInfo::IPV4, 1, "000000C0"},
{"0xC0a80001", L"0xC0a80001", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 1, "C0A80001"},
{"030052000001", L"030052000001", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 1, "C0A80001"},
@@ -643,15 +745,16 @@
{"192.0x00A80001", L"192.0x000A80001", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 2, "C0A80001"},
{"0xc0.052000001", L"0xc0.052000001", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 2, "C0A80001"},
{"192.168.1", L"192.168.1", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 3, "C0A80001"},
- // Too many components means not an IP address.
- {"192.168.0.0.1", L"192.168.0.0.1", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
- // We allow a single trailing dot.
+ // Hostnames with too many components, but a numeric final numeric component are invalid.
+ {"192.168.0.0.1", L"192.168.0.0.1", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ // We allow a single trailing dot.
{"192.168.0.1.", L"192.168.0.1.", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 4, "C0A80001"},
{"192.168.0.1. hello", L"192.168.0.1. hello", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
{"192.168.0.1..", L"192.168.0.1..", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
- // Two dots in a row means not an IP address.
- {"192.168..1", L"192.168..1", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
- // Any numerical overflow should be marked as BROKEN.
+ // Hosts with two dots in a row with a final numeric component are considered invalid.
+ {"192.168..1", L"192.168..1", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ {"192.168..1.", L"192.168..1.", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ // Any numerical overflow should be marked as BROKEN.
{"0x100.0", L"0x100.0", "", Component(), CanonHostInfo::BROKEN, -1, ""},
{"0x100.0.0", L"0x100.0.0", "", Component(), CanonHostInfo::BROKEN, -1, ""},
{"0x100.0.0.0", L"0x100.0.0.0", "", Component(), CanonHostInfo::BROKEN, -1, ""},
@@ -661,7 +764,7 @@
{"0.0.0x10000", L"0.0.0x10000", "", Component(), CanonHostInfo::BROKEN, -1, ""},
{"0.0x1000000", L"0.0x1000000", "", Component(), CanonHostInfo::BROKEN, -1, ""},
{"0x100000000", L"0x100000000", "", Component(), CanonHostInfo::BROKEN, -1, ""},
- // Repeat the previous tests, minus 1, to verify boundaries.
+ // Repeat the previous tests, minus 1, to verify boundaries.
{"0xFF.0", L"0xFF.0", "255.0.0.0", Component(0, 9), CanonHostInfo::IPV4, 2, "FF000000"},
{"0xFF.0.0", L"0xFF.0.0", "255.0.0.0", Component(0, 9), CanonHostInfo::IPV4, 3, "FF000000"},
{"0xFF.0.0.0", L"0xFF.0.0.0", "255.0.0.0", Component(0, 9), CanonHostInfo::IPV4, 4, "FF000000"},
@@ -671,52 +774,69 @@
{"0.0.0xFFFF", L"0.0.0xFFFF", "0.0.255.255", Component(0, 11), CanonHostInfo::IPV4, 3, "0000FFFF"},
{"0.0xFFFFFF", L"0.0xFFFFFF", "0.255.255.255", Component(0, 13), CanonHostInfo::IPV4, 2, "00FFFFFF"},
{"0xFFFFFFFF", L"0xFFFFFFFF", "255.255.255.255", Component(0, 15), CanonHostInfo::IPV4, 1, "FFFFFFFF"},
- // Old trunctations tests. They're all "BROKEN" now.
+ // Old trunctations tests. They're all "BROKEN" now.
{"276.256.0xf1a2.077777", L"276.256.0xf1a2.077777", "", Component(), CanonHostInfo::BROKEN, -1, ""},
{"192.168.0.257", L"192.168.0.257", "", Component(), CanonHostInfo::BROKEN, -1, ""},
{"192.168.0xa20001", L"192.168.0xa20001", "", Component(), CanonHostInfo::BROKEN, -1, ""},
{"192.015052000001", L"192.015052000001", "", Component(), CanonHostInfo::BROKEN, -1, ""},
{"0X12C0a80001", L"0X12C0a80001", "", Component(), CanonHostInfo::BROKEN, -1, ""},
{"276.1.2", L"276.1.2", "", Component(), CanonHostInfo::BROKEN, -1, ""},
- // Spaces should be rejected.
+ // Too many components should be rejected, in valid ranges or not.
+ {"255.255.255.255.255", L"255.255.255.255.255", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ {"256.256.256.256.256", L"256.256.256.256.256", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ // Spaces should be rejected.
{"192.168.0.1 hello", L"192.168.0.1 hello", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
- // Very large numbers.
+ // Very large numbers.
{"0000000000000300.0x00000000000000fF.00000000000000001", L"0000000000000300.0x00000000000000fF.00000000000000001", "192.255.0.1", Component(0, 11), CanonHostInfo::IPV4, 3, "C0FF0001"},
{"0000000000000300.0xffffffffFFFFFFFF.3022415481470977", L"0000000000000300.0xffffffffFFFFFFFF.3022415481470977", "", Component(0, 11), CanonHostInfo::BROKEN, -1, ""},
- // A number has no length limit, but long numbers can still overflow.
+ // A number has no length limit, but long numbers can still overflow.
{"00000000000000000001", L"00000000000000000001", "0.0.0.1", Component(0, 7), CanonHostInfo::IPV4, 1, "00000001"},
{"0000000000000000100000000000000001", L"0000000000000000100000000000000001", "", Component(), CanonHostInfo::BROKEN, -1, ""},
- // If a long component is non-numeric, it's a hostname, *not* a broken IP.
+ // If a long component is non-numeric, it's a hostname, *not* a broken IP.
{"0.0.0.000000000000000000z", L"0.0.0.000000000000000000z", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
{"0.0.0.100000000000000000z", L"0.0.0.100000000000000000z", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
- // Truncation of all zeros should still result in 0.
+ // Truncation of all zeros should still result in 0.
{"0.00.0x.0x0", L"0.00.0x.0x0", "0.0.0.0", Component(0, 7), CanonHostInfo::IPV4, 4, "00000000"},
+ // Non-ASCII characters in final component should return NEUTRAL.
+ {"1.2.3.\xF0\x9F\x92\xA9", L"1.2.3.\xD83D\xDCA9", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+ {"1.2.3.4\xF0\x9F\x92\xA9", L"1.2.3.4\xD83D\xDCA9", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+ {"1.2.3.0x\xF0\x9F\x92\xA9", L"1.2.3.0x\xD83D\xDCA9", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+ {"1.2.3.0\xF0\x9F\x92\xA9", L"1.2.3.0\xD83D\xDCA9", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+ // Non-ASCII characters in other components should result in broken IPs when final component is numeric.
+ {"1.2.\xF0\x9F\x92\xA9.4", L"1.2.\xD83D\xDCA9.4", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ {"1.2.3\xF0\x9F\x92\xA9.4", L"1.2.3\xD83D\xDCA9.4", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ {"1.2.0x\xF0\x9F\x92\xA9.4", L"1.2.0x\xD83D\xDCA9.4", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ {"1.2.0\xF0\x9F\x92\xA9.4", L"1.2.0\xD83D\xDCA9.4", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+ {"\xF0\x9F\x92\xA9.2.3.4", L"\xD83D\xDCA9.2.3.4", "", Component(), CanonHostInfo::BROKEN, -1, ""},
};
+ // clang-format on
- for (size_t i = 0; i < arraysize(cases); i++) {
+ for (const auto& test_case : cases) {
+ SCOPED_TRACE(test_case.input8);
+
// 8-bit version.
- Component component(0, static_cast<int>(strlen(cases[i].input8)));
+ Component component(0, static_cast<int>(strlen(test_case.input8)));
std::string out_str1;
StdStringCanonOutput output1(&out_str1);
CanonHostInfo host_info;
- CanonicalizeIPAddress(cases[i].input8, component, &output1, &host_info);
+ CanonicalizeIPAddress(test_case.input8, component, &output1, &host_info);
output1.Complete();
- EXPECT_EQ(cases[i].expected_family, host_info.family);
- EXPECT_EQ(std::string(cases[i].expected_address_hex),
+ EXPECT_EQ(test_case.expected_family, host_info.family);
+ EXPECT_EQ(std::string(test_case.expected_address_hex),
BytesToHexString(host_info.address, host_info.AddressLength()));
if (host_info.family == CanonHostInfo::IPV4) {
- EXPECT_STREQ(cases[i].expected, out_str1.c_str());
- EXPECT_EQ(cases[i].expected_component.begin, host_info.out_host.begin);
- EXPECT_EQ(cases[i].expected_component.len, host_info.out_host.len);
- EXPECT_EQ(cases[i].expected_num_ipv4_components,
+ EXPECT_STREQ(test_case.expected, out_str1.c_str());
+ EXPECT_EQ(test_case.expected_component.begin, host_info.out_host.begin);
+ EXPECT_EQ(test_case.expected_component.len, host_info.out_host.len);
+ EXPECT_EQ(test_case.expected_num_ipv4_components,
host_info.num_ipv4_components);
}
// 16-bit version.
- base::string16 input16(
- test_utils::TruncateWStringToUTF16(cases[i].input16));
+ std::u16string input16(
+ test_utils::TruncateWStringToUTF16(test_case.input16));
component = Component(0, static_cast<int>(input16.length()));
std::string out_str2;
@@ -724,20 +844,43 @@
CanonicalizeIPAddress(input16.c_str(), component, &output2, &host_info);
output2.Complete();
- EXPECT_EQ(cases[i].expected_family, host_info.family);
- EXPECT_EQ(std::string(cases[i].expected_address_hex),
+ EXPECT_EQ(test_case.expected_family, host_info.family);
+ EXPECT_EQ(std::string(test_case.expected_address_hex),
BytesToHexString(host_info.address, host_info.AddressLength()));
if (host_info.family == CanonHostInfo::IPV4) {
- EXPECT_STREQ(cases[i].expected, out_str2.c_str());
- EXPECT_EQ(cases[i].expected_component.begin, host_info.out_host.begin);
- EXPECT_EQ(cases[i].expected_component.len, host_info.out_host.len);
- EXPECT_EQ(cases[i].expected_num_ipv4_components,
+ EXPECT_STREQ(test_case.expected, out_str2.c_str());
+ EXPECT_EQ(test_case.expected_component.begin, host_info.out_host.begin);
+ EXPECT_EQ(test_case.expected_component.len, host_info.out_host.len);
+ EXPECT_EQ(test_case.expected_num_ipv4_components,
host_info.num_ipv4_components);
}
}
}
-TEST(URLCanonTest, IPv6) {
+class URLCanonIPv6Test
+ : public ::testing::Test,
+ public ::testing::WithParamInterface<bool> {
+ public:
+ URLCanonIPv6Test() {
+ if (GetParam()) {
+ scoped_feature_list_.InitAndEnableFeature(kStrictIPv4EmbeddedIPv6AddressParsing);
+ } else {
+ scoped_feature_list_.InitAndDisableFeature(kStrictIPv4EmbeddedIPv6AddressParsing);
+ }
+ }
+
+ private:
+ base::test::ScopedFeatureList scoped_feature_list_;
+};
+
+INSTANTIATE_TEST_SUITE_P(All,
+ URLCanonIPv6Test,
+ ::testing::Bool());
+
+TEST_P(URLCanonIPv6Test, IPv6) {
+ bool strict_ipv4_embedded_ipv6_parsing =
+ base::FeatureList::IsEnabled(url::kStrictIPv4EmbeddedIPv6AddressParsing);
+
IPAddressCase cases[] = {
// Empty is not an IP address.
{"", L"", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
@@ -776,8 +919,24 @@
{"[2001::192.168.0.1]", L"[2001::192.168.0.1]", "[2001::c0a8:1]", Component(0, 14), CanonHostInfo::IPV6, -1, "200100000000000000000000C0A80001"},
{"[1:2:192.168.0.1:5:6]", L"[1:2:192.168.0.1:5:6]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
- // IPv4 with last component missing.
- {"[::ffff:192.1.2]", L"[::ffff:192.1.2]", "[::ffff:c001:2]", Component(0,15), CanonHostInfo::IPV6, -1, "00000000000000000000FFFFC0010002"},
+ // IPv4 embedded IPv6 addresses
+ {"[::ffff:192.1.2]",
+ L"[::ffff:192.1.2]",
+ "[::ffff:c001:2]",
+ strict_ipv4_embedded_ipv6_parsing ? Component() : Component(0,15),
+ strict_ipv4_embedded_ipv6_parsing ? CanonHostInfo::BROKEN : CanonHostInfo::IPV6,
+ -1,
+ (strict_ipv4_embedded_ipv6_parsing ? "" : "00000000000000000000FFFFC0010002")},
+ {"[::ffff:192.1]",
+ L"[::ffff:192.1]",
+ "[::ffff:c000:1]",
+ strict_ipv4_embedded_ipv6_parsing ? Component() : Component(0,15),
+ strict_ipv4_embedded_ipv6_parsing ? CanonHostInfo::BROKEN : CanonHostInfo::IPV6,
+ -1,
+ (strict_ipv4_embedded_ipv6_parsing ? "" : "00000000000000000000FFFFC0000001")},
+ {"[::ffff:192.1.2.3.4]",
+ L"[::ffff:192.1.2.3.4]",
+ "", Component(), CanonHostInfo::BROKEN, -1, ""},
// IPv4 using hex.
// TODO(eroman): Should this format be disallowed?
@@ -847,7 +1006,7 @@
{"[::1 hello]", L"[::1 hello]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
};
- for (size_t i = 0; i < arraysize(cases); i++) {
+ for (size_t i = 0; i < std::size(cases); i++) {
// 8-bit version.
Component component(0, static_cast<int>(strlen(cases[i].input8)));
@@ -868,7 +1027,7 @@
}
// 16-bit version.
- base::string16 input16(
+ std::u16string input16(
test_utils::TruncateWStringToUTF16(cases[i].input16));
component = Component(0, static_cast<int>(input16.length()));
@@ -973,7 +1132,7 @@
{"ftp://me\\mydomain:pass@foo.com/", "", Component(0, -1), Component(0, -1), true},
};
- for (size_t i = 0; i < arraysize(user_info_cases); i++) {
+ for (size_t i = 0; i < std::size(user_info_cases); i++) {
int url_len = static_cast<int>(strlen(user_info_cases[i].input));
Parsed parsed;
ParseStandardURL(user_info_cases[i].input, url_len, &parsed);
@@ -1000,7 +1159,7 @@
// Now try the wide version
out_str.clear();
StdStringCanonOutput output2(&out_str);
- base::string16 wide_input(base::UTF8ToUTF16(user_info_cases[i].input));
+ std::u16string wide_input(base::UTF8ToUTF16(user_info_cases[i].input));
success = CanonicalizeUserInfo(wide_input.c_str(),
parsed.username,
wide_input.c_str(),
@@ -1042,7 +1201,7 @@
{"80", PORT_UNSPECIFIED, ":80", Component(1, 2), true},
};
- for (size_t i = 0; i < arraysize(port_cases); i++) {
+ for (size_t i = 0; i < std::size(port_cases); i++) {
int url_len = static_cast<int>(strlen(port_cases[i].input));
Component in_comp(0, url_len);
Component out_comp;
@@ -1063,7 +1222,7 @@
// Now try the wide version
out_str.clear();
StdStringCanonOutput output2(&out_str);
- base::string16 wide_input(base::UTF8ToUTF16(port_cases[i].input));
+ std::u16string wide_input(base::UTF8ToUTF16(port_cases[i].input));
success = CanonicalizePort(wide_input.c_str(),
in_comp,
port_cases[i].default_port,
@@ -1078,94 +1237,117 @@
}
}
-TEST(URLCanonTest, Path) {
- DualComponentCase path_cases[] = {
+DualComponentCase kCommonPathCases[] = {
// ----- path collapsing tests -----
{"/././foo", L"/././foo", "/foo", Component(0, 4), true},
{"/./.foo", L"/./.foo", "/.foo", Component(0, 5), true},
{"/foo/.", L"/foo/.", "/foo/", Component(0, 5), true},
{"/foo/./", L"/foo/./", "/foo/", Component(0, 5), true},
- // double dots followed by a slash or the end of the string count
+ // double dots followed by a slash or the end of the string count
{"/foo/bar/..", L"/foo/bar/..", "/foo/", Component(0, 5), true},
{"/foo/bar/../", L"/foo/bar/../", "/foo/", Component(0, 5), true},
- // don't count double dots when they aren't followed by a slash
+ // don't count double dots when they aren't followed by a slash
{"/foo/..bar", L"/foo/..bar", "/foo/..bar", Component(0, 10), true},
- // some in the middle
+ // some in the middle
{"/foo/bar/../ton", L"/foo/bar/../ton", "/foo/ton", Component(0, 8), true},
- {"/foo/bar/../ton/../../a", L"/foo/bar/../ton/../../a", "/a", Component(0, 2), true},
- // we should not be able to go above the root
+ {"/foo/bar/../ton/../../a", L"/foo/bar/../ton/../../a", "/a",
+ Component(0, 2), true},
+ // we should not be able to go above the root
{"/foo/../../..", L"/foo/../../..", "/", Component(0, 1), true},
{"/foo/../../../ton", L"/foo/../../../ton", "/ton", Component(0, 4), true},
- // escaped dots should be unescaped and treated the same as dots
+ // escaped dots should be unescaped and treated the same as dots
{"/foo/%2e", L"/foo/%2e", "/foo/", Component(0, 5), true},
{"/foo/%2e%2", L"/foo/%2e%2", "/foo/.%2", Component(0, 8), true},
- {"/foo/%2e./%2e%2e/.%2e/%2e.bar", L"/foo/%2e./%2e%2e/.%2e/%2e.bar", "/..bar", Component(0, 6), true},
- // Multiple slashes in a row should be preserved and treated like empty
- // directory names.
+ {"/foo/%2e./%2e%2e/.%2e/%2e.bar", L"/foo/%2e./%2e%2e/.%2e/%2e.bar",
+ "/..bar", Component(0, 6), true},
+ // Multiple slashes in a row should be preserved and treated like empty
+ // directory names.
{"////../..", L"////../..", "//", Component(0, 2), true},
// ----- escaping tests -----
{"/foo", L"/foo", "/foo", Component(0, 4), true},
- // Valid escape sequence
+ // Valid escape sequence
{"/%20foo", L"/%20foo", "/%20foo", Component(0, 7), true},
- // Invalid escape sequence we should pass through unchanged.
+ // Invalid escape sequence we should pass through unchanged.
{"/foo%", L"/foo%", "/foo%", Component(0, 5), true},
{"/foo%2", L"/foo%2", "/foo%2", Component(0, 6), true},
- // Invalid escape sequence: bad characters should be treated the same as
- // the sourrounding text, not as escaped (in this case, UTF-8).
+ // Invalid escape sequence: bad characters should be treated the same as
+ // the surrounding text, not as escaped (in this case, UTF-8).
{"/foo%2zbar", L"/foo%2zbar", "/foo%2zbar", Component(0, 10), true},
- {"/foo%2\xc2\xa9zbar", NULL, "/foo%2%C2%A9zbar", Component(0, 16), true},
- {NULL, L"/foo%2\xc2\xa9zbar", "/foo%2%C3%82%C2%A9zbar", Component(0, 22), true},
- // Regular characters that are escaped should be unescaped
+ {"/foo%2\xc2\xa9zbar", nullptr, "/foo%2%C2%A9zbar", Component(0, 16), true},
+ {nullptr, L"/foo%2\xc2\xa9zbar", "/foo%2%C3%82%C2%A9zbar", Component(0, 22),
+ true},
+ // Regular characters that are escaped should be unescaped
{"/foo%41%7a", L"/foo%41%7a", "/fooAz", Component(0, 6), true},
- // Funny characters that are unescaped should be escaped
- {"/foo\x09\x91%91", NULL, "/foo%09%91%91", Component(0, 13), true},
- {NULL, L"/foo\x09\x91%91", "/foo%09%C2%91%91", Component(0, 16), true},
- // Invalid characters that are escaped should cause a failure.
+ // Funny characters that are unescaped should be escaped
+ {"/foo\x09\x91%91", nullptr, "/foo%09%91%91", Component(0, 13), true},
+ {nullptr, L"/foo\x09\x91%91", "/foo%09%C2%91%91", Component(0, 16), true},
+ // Invalid characters that are escaped should cause a failure.
{"/foo%00%51", L"/foo%00%51", "/foo%00Q", Component(0, 8), false},
- // Some characters should be passed through unchanged regardless of esc.
- {"/(%28:%3A%29)", L"/(%28:%3A%29)", "/(%28:%3A%29)", Component(0, 13), true},
- // Characters that are properly escaped should not have the case changed
- // of hex letters.
- {"/%3A%3a%3C%3c", L"/%3A%3a%3C%3c", "/%3A%3a%3C%3c", Component(0, 13), true},
- // Funny characters that are unescaped should be escaped
+ // Some characters should be passed through unchanged regardless of esc.
+ {"/(%28:%3A%29)", L"/(%28:%3A%29)", "/(%28:%3A%29)", Component(0, 13),
+ true},
+ // Characters that are properly escaped should not have the case changed
+ // of hex letters.
+ {"/%3A%3a%3C%3c", L"/%3A%3a%3C%3c", "/%3A%3a%3C%3c", Component(0, 13),
+ true},
+ // Funny characters that are unescaped should be escaped
{"/foo\tbar", L"/foo\tbar", "/foo%09bar", Component(0, 10), true},
- // Backslashes should get converted to forward slashes
+ // Backslashes should get converted to forward slashes
{"\\foo\\bar", L"\\foo\\bar", "/foo/bar", Component(0, 8), true},
- // Hashes found in paths (possibly only when the caller explicitly sets
- // the path on an already-parsed URL) should be escaped.
+ // Hashes found in paths (possibly only when the caller explicitly sets
+ // the path on an already-parsed URL) should be escaped.
{"/foo#bar", L"/foo#bar", "/foo%23bar", Component(0, 10), true},
- // %7f should be allowed and %3D should not be unescaped (these were wrong
- // in a previous version).
- {"/%7Ffp3%3Eju%3Dduvgw%3Dd", L"/%7Ffp3%3Eju%3Dduvgw%3Dd", "/%7Ffp3%3Eju%3Dduvgw%3Dd", Component(0, 24), true},
- // @ should be passed through unchanged (escaped or unescaped).
+ // %7f should be allowed and %3D should not be unescaped (these were wrong
+ // in a previous version).
+ {"/%7Ffp3%3Eju%3Dduvgw%3Dd", L"/%7Ffp3%3Eju%3Dduvgw%3Dd",
+ "/%7Ffp3%3Eju%3Dduvgw%3Dd", Component(0, 24), true},
+ // @ should be passed through unchanged (escaped or unescaped).
{"/@asdf%40", L"/@asdf%40", "/@asdf%40", Component(0, 9), true},
- // Nested escape sequences should result in escaping the leading '%' if
- // unescaping would result in a new escape sequence.
+ // Nested escape sequences should result in escaping the leading '%' if
+ // unescaping would result in a new escape sequence.
{"/%A%42", L"/%A%42", "/%25AB", Component(0, 6), true},
{"/%%41B", L"/%%41B", "/%25AB", Component(0, 6), true},
{"/%%41%42", L"/%%41%42", "/%25AB", Component(0, 6), true},
- // Make sure truncated "nested" escapes don't result in reading off the
- // string end.
+ // Make sure truncated "nested" escapes don't result in reading off the
+ // string end.
{"/%%41", L"/%%41", "/%A", Component(0, 3), true},
- // Don't unescape the leading '%' if unescaping doesn't result in a valid
- // new escape sequence.
+ // Don't unescape the leading '%' if unescaping doesn't result in a valid
+ // new escape sequence.
{"/%%470", L"/%%470", "/%G0", Component(0, 4), true},
{"/%%2D%41", L"/%%2D%41", "/%-A", Component(0, 4), true},
- // Don't erroneously downcast a UTF-16 charater in a way that makes it
- // look like part of an escape sequence.
- {NULL, L"/%%41\x0130", "/%A%C4%B0", Component(0, 9), true},
+ // Don't erroneously downcast a UTF-16 character in a way that makes it
+ // look like part of an escape sequence.
+ {nullptr, L"/%%41\x0130", "/%A%C4%B0", Component(0, 9), true},
// ----- encoding tests -----
- // Basic conversions
- {"/\xe4\xbd\xa0\xe5\xa5\xbd\xe4\xbd\xa0\xe5\xa5\xbd", L"/\x4f60\x597d\x4f60\x597d", "/%E4%BD%A0%E5%A5%BD%E4%BD%A0%E5%A5%BD", Component(0, 37), true},
- // Invalid unicode characters should fail. We only do validation on
- // UTF-16 input, so this doesn't happen on 8-bit.
- {"/\xef\xb7\x90zyx", NULL, "/%EF%B7%90zyx", Component(0, 13), true},
- {NULL, L"/\xfdd0zyx", "/%EF%BF%BDzyx", Component(0, 13), false},
- };
+ // Basic conversions
+ {"/\xe4\xbd\xa0\xe5\xa5\xbd\xe4\xbd\xa0\xe5\xa5\xbd",
+ L"/\x4f60\x597d\x4f60\x597d", "/%E4%BD%A0%E5%A5%BD%E4%BD%A0%E5%A5%BD",
+ Component(0, 37), true},
+ // Invalid unicode characters should fail. We only do validation on
+ // UTF-16 input, so this doesn't happen on 8-bit.
+ {"/\xef\xb7\x90zyx", nullptr, "/%EF%B7%90zyx", Component(0, 13), true},
+ {nullptr, L"/\xfdd0zyx", "/%EF%BF%BDzyx", Component(0, 13), false},
+};
- for (size_t i = 0; i < arraysize(path_cases); i++) {
+typedef bool (*CanonFunc8Bit)(const char*,
+ const Component&,
+ CanonOutput*,
+ Component*);
+typedef bool (*CanonFunc16Bit)(const char16_t*,
+ const Component&,
+ CanonOutput*,
+ Component*);
+
+void DoPathTest(const DualComponentCase* path_cases,
+ size_t num_cases,
+ CanonFunc8Bit canon_func_8,
+ CanonFunc16Bit canon_func_16) {
+ for (size_t i = 0; i < num_cases; i++) {
+ testing::Message scope_message;
+ scope_message << path_cases[i].input8 << "," << path_cases[i].input16;
+ SCOPED_TRACE(scope_message);
if (path_cases[i].input8) {
int len = static_cast<int>(strlen(path_cases[i].input8));
Component in_comp(0, len);
@@ -1173,7 +1355,7 @@
std::string out_str;
StdStringCanonOutput output(&out_str);
bool success =
- CanonicalizePath(path_cases[i].input8, in_comp, &output, &out_comp);
+ canon_func_8(path_cases[i].input8, in_comp, &output, &out_comp);
output.Complete();
EXPECT_EQ(path_cases[i].expected_success, success);
@@ -1183,7 +1365,7 @@
}
if (path_cases[i].input16) {
- base::string16 input16(
+ std::u16string input16(
test_utils::TruncateWStringToUTF16(path_cases[i].input16));
int len = static_cast<int>(input16.length());
Component in_comp(0, len);
@@ -1192,7 +1374,7 @@
StdStringCanonOutput output(&out_str);
bool success =
- CanonicalizePath(input16.c_str(), in_comp, &output, &out_comp);
+ canon_func_16(input16.c_str(), in_comp, &output, &out_comp);
output.Complete();
EXPECT_EQ(path_cases[i].expected_success, success);
@@ -1201,6 +1383,11 @@
EXPECT_EQ(path_cases[i].expected, out_str);
}
}
+}
+
+TEST(URLCanonTest, Path) {
+ DoPathTest(kCommonPathCases, std::size(kCommonPathCases), CanonicalizePath,
+ CanonicalizePath);
// Manual test: embedded NULLs should be escaped and the URL should be marked
// as invalid.
@@ -1216,6 +1403,18 @@
EXPECT_EQ("/ab%00c", out_str);
}
+TEST(URLCanonTest, PartialPath) {
+ DualComponentCase partial_path_cases[] = {
+ {".html", L".html", ".html", Component(0, 5), true},
+ {"", L"", "", Component(0, 0), true},
+ };
+
+ DoPathTest(kCommonPathCases, std::size(kCommonPathCases),
+ CanonicalizePartialPath, CanonicalizePartialPath);
+ DoPathTest(partial_path_cases, std::size(partial_path_cases),
+ CanonicalizePartialPath, CanonicalizePartialPath);
+}
+
TEST(URLCanonTest, Query) {
struct QueryCase {
const char* input8;
@@ -1242,7 +1441,7 @@
{"q=\"asdf\"", L"q=\"asdf\"", "?q=%22asdf%22"},
};
- for (size_t i = 0; i < arraysize(query_cases); i++) {
+ for (size_t i = 0; i < std::size(query_cases); i++) {
Component out_comp;
if (query_cases[i].input8) {
@@ -1259,7 +1458,7 @@
}
if (query_cases[i].input16) {
- base::string16 input16(
+ std::u16string input16(
test_utils::TruncateWStringToUTF16(query_cases[i].input16));
int len = static_cast<int>(input16.length());
Component in_comp(0, len);
@@ -1305,8 +1504,8 @@
// Escaping should be preserved unchanged, even invalid ones
{"%41%a", L"%41%a", "#%41%a", Component(1, 5), true},
// Invalid UTF-8/16 input should be flagged and the input made valid
- {"\xc2", NULL, "#%EF%BF%BD", Component(1, 9), true},
- {NULL, L"\xd800\x597d", "#%EF%BF%BD%E5%A5%BD", Component(1, 18), true},
+ {"\xc2", nullptr, "#%EF%BF%BD", Component(1, 9), true},
+ {nullptr, L"\xd800\x597d", "#%EF%BF%BD%E5%A5%BD", Component(1, 18), true},
// Test a Unicode invalid character.
{"a\xef\xb7\x90", L"a\xfdd0", "#a%EF%BF%BD", Component(1, 10), true},
// Refs can have # signs and we should preserve them.
@@ -1314,7 +1513,7 @@
{"#asdf", L"#asdf", "##asdf", Component(1, 5), true},
};
- for (size_t i = 0; i < arraysize(ref_cases); i++) {
+ for (size_t i = 0; i < std::size(ref_cases); i++) {
// 8-bit input
if (ref_cases[i].input8) {
int len = static_cast<int>(strlen(ref_cases[i].input8));
@@ -1333,7 +1532,7 @@
// 16-bit input
if (ref_cases[i].input16) {
- base::string16 input16(
+ std::u16string input16(
test_utils::TruncateWStringToUTF16(ref_cases[i].input16));
int len = static_cast<int>(input16.length());
Component in_comp(0, len);
@@ -1361,8 +1560,8 @@
output.Complete();
EXPECT_EQ(1, out_comp.begin);
- EXPECT_EQ(3, out_comp.len);
- EXPECT_EQ("#abz", out_str);
+ EXPECT_EQ(6, out_comp.len);
+ EXPECT_EQ("#ab%00z", out_str);
}
TEST(URLCanonTest, CanonicalizeStandardURL) {
@@ -1405,7 +1604,7 @@
{"https://foo:80/", "https://foo:80/", true},
{"ftp://foo:21/", "ftp://foo/", true},
{"ftp://foo:80/", "ftp://foo:80/", true},
- {"gopher://foo:70/", "gopher://foo/", true},
+ {"gopher://foo:70/", "gopher://foo:70/", true},
{"gopher://foo:443/", "gopher://foo:443/", true},
{"ws://foo:80/", "ws://foo/", true},
{"ws://foo:81/", "ws://foo:81/", true},
@@ -1424,9 +1623,12 @@
{"ws:)W\x1eW\xef\xb9\xaa"
"81:80/",
"ws://%29w%1ew%81/", false},
+ // Regression test for the last_invalid_percent_index bug described in
+ // https://crbug.com/1080890#c10.
+ {R"(HTTP:S/5%\../>%41)", "http://s/%3EA", true},
};
- for (size_t i = 0; i < arraysize(cases); i++) {
+ for (size_t i = 0; i < std::size(cases); i++) {
int url_len = static_cast<int>(strlen(cases[i].input));
Parsed parsed;
ParseStandardURL(cases[i].input, url_len, &parsed);
@@ -1449,17 +1651,25 @@
TEST(URLCanonTest, ReplaceStandardURL) {
ReplaceCase replace_cases[] = {
// Common case of truncating the path.
- {"http://www.google.com/foo?bar=baz#ref", NULL, NULL, NULL, NULL, NULL, "/", kDeleteComp, kDeleteComp, "http://www.google.com/"},
+ {"http://www.google.com/foo?bar=baz#ref", nullptr, nullptr, nullptr,
+ nullptr, nullptr, "/", kDeleteComp, kDeleteComp,
+ "http://www.google.com/"},
// Replace everything
- {"http://a:b@google.com:22/foo;bar?baz@cat", "https", "me", "pw", "host.com", "99", "/path", "query", "ref", "https://me:pw@host.com:99/path?query#ref"},
+ {"http://a:b@google.com:22/foo;bar?baz@cat", "https", "me", "pw",
+ "host.com", "99", "/path", "query", "ref",
+ "https://me:pw@host.com:99/path?query#ref"},
// Replace nothing
- {"http://a:b@google.com:22/foo?baz@cat", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "http://a:b@google.com:22/foo?baz@cat"},
+ {"http://a:b@google.com:22/foo?baz@cat", nullptr, nullptr, nullptr,
+ nullptr, nullptr, nullptr, nullptr, nullptr,
+ "http://a:b@google.com:22/foo?baz@cat"},
// Replace scheme with filesystem. The result is garbage, but you asked
// for it.
- {"http://a:b@google.com:22/foo?baz@cat", "filesystem", NULL, NULL, NULL, NULL, NULL, NULL, NULL, "filesystem://a:b@google.com:22/foo?baz@cat"},
+ {"http://a:b@google.com:22/foo?baz@cat", "filesystem", nullptr, nullptr,
+ nullptr, nullptr, nullptr, nullptr, nullptr,
+ "filesystem://a:b@google.com:22/foo?baz@cat"},
};
- for (size_t i = 0; i < arraysize(replace_cases); i++) {
+ for (size_t i = 0; i < std::size(replace_cases); i++) {
const ReplaceCase& cur = replace_cases[i];
int base_len = static_cast<int>(strlen(cur.base));
Parsed parsed;
@@ -1526,24 +1736,42 @@
TEST(URLCanonTest, ReplaceFileURL) {
ReplaceCase replace_cases[] = {
// Replace everything
- {"file:///C:/gaba?query#ref", NULL, NULL, NULL, "filer", NULL, "/foo", "b", "c", "file://filer/foo?b#c"},
+ {"file:///C:/gaba?query#ref", nullptr, nullptr, nullptr, "filer", nullptr,
+ "/foo", "b", "c", "file://filer/foo?b#c"},
// Replace nothing
- {"file:///C:/gaba?query#ref", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "file:///C:/gaba?query#ref"},
+ {"file:///C:/gaba?query#ref", nullptr, nullptr, nullptr, nullptr, nullptr,
+ nullptr, nullptr, nullptr, "file:///C:/gaba?query#ref"},
+ {"file:///Y:", nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
+ nullptr, nullptr, "file:///Y:"},
+ {"file:///Y:/", nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
+ nullptr, nullptr, "file:///Y:/"},
+ {"file:///./Y", nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
+ nullptr, nullptr, "file:///Y"},
+ {"file:///./Y:", nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
+ nullptr, nullptr, "file:///Y:"},
// Clear non-path components (common)
- {"file:///C:/gaba?query#ref", NULL, NULL, NULL, NULL, NULL, NULL, kDeleteComp, kDeleteComp, "file:///C:/gaba"},
+ {"file:///C:/gaba?query#ref", nullptr, nullptr, nullptr, nullptr, nullptr,
+ nullptr, kDeleteComp, kDeleteComp, "file:///C:/gaba"},
// Replace path with something that doesn't begin with a slash and make
// sure it gets added properly.
- {"file:///C:/gaba", NULL, NULL, NULL, NULL, NULL, "interesting/", NULL, NULL, "file:///interesting/"},
- {"file:///home/gaba?query#ref", NULL, NULL, NULL, "filer", NULL, "/foo", "b", "c", "file://filer/foo?b#c"},
- {"file:///home/gaba?query#ref", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "file:///home/gaba?query#ref"},
- {"file:///home/gaba?query#ref", NULL, NULL, NULL, NULL, NULL, NULL, kDeleteComp, kDeleteComp, "file:///home/gaba"},
- {"file:///home/gaba", NULL, NULL, NULL, NULL, NULL, "interesting/", NULL, NULL, "file:///interesting/"},
+ {"file:///C:/gaba", nullptr, nullptr, nullptr, nullptr, nullptr,
+ "interesting/", nullptr, nullptr, "file:///interesting/"},
+ {"file:///home/gaba?query#ref", nullptr, nullptr, nullptr, "filer",
+ nullptr, "/foo", "b", "c", "file://filer/foo?b#c"},
+ {"file:///home/gaba?query#ref", nullptr, nullptr, nullptr, nullptr,
+ nullptr, nullptr, nullptr, nullptr, "file:///home/gaba?query#ref"},
+ {"file:///home/gaba?query#ref", nullptr, nullptr, nullptr, nullptr,
+ nullptr, nullptr, kDeleteComp, kDeleteComp, "file:///home/gaba"},
+ {"file:///home/gaba", nullptr, nullptr, nullptr, nullptr, nullptr,
+ "interesting/", nullptr, nullptr, "file:///interesting/"},
// Replace scheme -- shouldn't do anything.
- {"file:///C:/gaba?query#ref", "http", NULL, NULL, NULL, NULL, NULL, NULL, NULL, "file:///C:/gaba?query#ref"},
+ {"file:///C:/gaba?query#ref", "http", nullptr, nullptr, nullptr, nullptr,
+ nullptr, nullptr, nullptr, "file:///C:/gaba?query#ref"},
};
- for (size_t i = 0; i < arraysize(replace_cases); i++) {
+ for (size_t i = 0; i < std::size(replace_cases); i++) {
const ReplaceCase& cur = replace_cases[i];
+ SCOPED_TRACE(cur.base);
int base_len = static_cast<int>(strlen(cur.base));
Parsed parsed;
ParseFileURL(cur.base, base_len, &parsed);
@@ -1572,42 +1800,45 @@
TEST(URLCanonTest, ReplaceFileSystemURL) {
ReplaceCase replace_cases[] = {
// Replace everything in the outer URL.
- {"filesystem:file:///temporary/gaba?query#ref", NULL, NULL, NULL, NULL,
- NULL, "/foo", "b", "c", "filesystem:file:///temporary/foo?b#c"},
+ {"filesystem:file:///temporary/gaba?query#ref", nullptr, nullptr, nullptr,
+ nullptr, nullptr, "/foo", "b", "c",
+ "filesystem:file:///temporary/foo?b#c"},
// Replace nothing
- {"filesystem:file:///temporary/gaba?query#ref", NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, "filesystem:file:///temporary/gaba?query#ref"},
+ {"filesystem:file:///temporary/gaba?query#ref", nullptr, nullptr, nullptr,
+ nullptr, nullptr, nullptr, nullptr, nullptr,
+ "filesystem:file:///temporary/gaba?query#ref"},
// Clear non-path components (common)
- {"filesystem:file:///temporary/gaba?query#ref", NULL, NULL, NULL, NULL,
- NULL, NULL, kDeleteComp, kDeleteComp,
+ {"filesystem:file:///temporary/gaba?query#ref", nullptr, nullptr, nullptr,
+ nullptr, nullptr, nullptr, kDeleteComp, kDeleteComp,
"filesystem:file:///temporary/gaba"},
// Replace path with something that doesn't begin with a slash and make
// sure it gets added properly.
- {"filesystem:file:///temporary/gaba?query#ref", NULL, NULL, NULL, NULL,
- NULL, "interesting/", NULL, NULL,
+ {"filesystem:file:///temporary/gaba?query#ref", nullptr, nullptr, nullptr,
+ nullptr, nullptr, "interesting/", nullptr, nullptr,
"filesystem:file:///temporary/interesting/?query#ref"},
// Replace scheme -- shouldn't do anything except canonicalize.
- {"filesystem:http://u:p@bar.com/t/gaba?query#ref", "http", NULL, NULL,
- NULL, NULL, NULL, NULL, NULL,
+ {"filesystem:http://u:p@bar.com/t/gaba?query#ref", "http", nullptr,
+ nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
"filesystem:http://bar.com/t/gaba?query#ref"},
// Replace username -- shouldn't do anything except canonicalize.
- {"filesystem:http://u:p@bar.com/t/gaba?query#ref", NULL, "u2", NULL, NULL,
- NULL, NULL, NULL, NULL, "filesystem:http://bar.com/t/gaba?query#ref"},
+ {"filesystem:http://u:p@bar.com/t/gaba?query#ref", nullptr, "u2", nullptr,
+ nullptr, nullptr, nullptr, nullptr, nullptr,
+ "filesystem:http://bar.com/t/gaba?query#ref"},
// Replace password -- shouldn't do anything except canonicalize.
- {"filesystem:http://u:p@bar.com/t/gaba?query#ref", NULL, NULL, "pw2",
- NULL, NULL, NULL, NULL, NULL,
+ {"filesystem:http://u:p@bar.com/t/gaba?query#ref", nullptr, nullptr,
+ "pw2", nullptr, nullptr, nullptr, nullptr, nullptr,
"filesystem:http://bar.com/t/gaba?query#ref"},
// Replace host -- shouldn't do anything except canonicalize.
- {"filesystem:http://u:p@bar.com:80/t/gaba?query#ref", NULL, NULL, NULL,
- "foo.com", NULL, NULL, NULL, NULL,
+ {"filesystem:http://u:p@bar.com:80/t/gaba?query#ref", nullptr, nullptr,
+ nullptr, "foo.com", nullptr, nullptr, nullptr, nullptr,
"filesystem:http://bar.com/t/gaba?query#ref"},
// Replace port -- shouldn't do anything except canonicalize.
- {"filesystem:http://u:p@bar.com:40/t/gaba?query#ref", NULL, NULL, NULL,
- NULL, "41", NULL, NULL, NULL,
+ {"filesystem:http://u:p@bar.com:40/t/gaba?query#ref", nullptr, nullptr,
+ nullptr, nullptr, "41", nullptr, nullptr, nullptr,
"filesystem:http://bar.com:40/t/gaba?query#ref"},
};
- for (size_t i = 0; i < arraysize(replace_cases); i++) {
+ for (size_t i = 0; i < std::size(replace_cases); i++) {
const ReplaceCase& cur = replace_cases[i];
int base_len = static_cast<int>(strlen(cur.base));
Parsed parsed;
@@ -1637,16 +1868,21 @@
TEST(URLCanonTest, ReplacePathURL) {
ReplaceCase replace_cases[] = {
// Replace everything
- {"data:foo", "javascript", NULL, NULL, NULL, NULL, "alert('foo?');", NULL, NULL, "javascript:alert('foo?');"},
+ {"data:foo", "javascript", nullptr, nullptr, nullptr, nullptr,
+ "alert('foo?');", nullptr, nullptr, "javascript:alert('foo?');"},
// Replace nothing
- {"data:foo", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "data:foo"},
+ {"data:foo", nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
+ nullptr, nullptr, "data:foo"},
// Replace one or the other
- {"data:foo", "javascript", NULL, NULL, NULL, NULL, NULL, NULL, NULL, "javascript:foo"},
- {"data:foo", NULL, NULL, NULL, NULL, NULL, "bar", NULL, NULL, "data:bar"},
- {"data:foo", NULL, NULL, NULL, NULL, NULL, kDeleteComp, NULL, NULL, "data:"},
+ {"data:foo", "javascript", nullptr, nullptr, nullptr, nullptr, nullptr,
+ nullptr, nullptr, "javascript:foo"},
+ {"data:foo", nullptr, nullptr, nullptr, nullptr, nullptr, "bar", nullptr,
+ nullptr, "data:bar"},
+ {"data:foo", nullptr, nullptr, nullptr, nullptr, nullptr, kDeleteComp,
+ nullptr, nullptr, "data:"},
};
- for (size_t i = 0; i < arraysize(replace_cases); i++) {
+ for (size_t i = 0; i < std::size(replace_cases); i++) {
const ReplaceCase& cur = replace_cases[i];
int base_len = static_cast<int>(strlen(cur.base));
Parsed parsed;
@@ -1697,7 +1933,7 @@
{"mailto:addr1", NULL, NULL, NULL, NULL, NULL, NULL, NULL, "BLAH", "mailto:addr1"},
};
- for (size_t i = 0; i < arraysize(replace_cases); i++) {
+ for (size_t i = 0; i < std::size(replace_cases); i++) {
const ReplaceCase& cur = replace_cases[i];
int base_len = static_cast<int>(strlen(cur.base));
Parsed parsed;
@@ -1785,24 +2021,32 @@
// Busted refs shouldn't make the whole thing fail.
{"file:///C:/asdf#\xc2", "file:///C:/asdf#%EF%BF%BD", true, Component(),
Component(7, 8)},
+ {"file:///./s:", "file:///S:", true, Component(), Component(7, 3)},
#else
// Unix-style paths
- {"file:///home/me", "file:///home/me", true, Component(), Component(7, 8)},
+ {"file:///home/me", "file:///home/me", true, Component(),
+ Component(7, 8)},
// Windowsy ones should get still treated as Unix-style.
- {"file:c:\\foo\\bar.html", "file:///c:/foo/bar.html", true, Component(), Component(7, 16)},
- {"file:c|//foo\\bar.html", "file:///c%7C//foo/bar.html", true, Component(), Component(7, 19)},
+ {"file:c:\\foo\\bar.html", "file:///c:/foo/bar.html", true, Component(),
+ Component(7, 16)},
+ {"file:c|//foo\\bar.html", "file:///c%7C//foo/bar.html", true,
+ Component(), Component(7, 19)},
+ {"file:///./s:", "file:///s:", true, Component(), Component(7, 3)},
// file: tests from WebKit (LayoutTests/fast/loader/url-parse-1.html)
- {"//", "file:///", true, Component(), Component(7, 1)},
- {"///", "file:///", true, Component(), Component(7, 1)},
- {"///test", "file:///test", true, Component(), Component(7, 5)},
- {"file://test", "file://test/", true, Component(7, 4), Component(11, 1)},
- {"file://localhost", "file://localhost/", true, Component(7, 9), Component(16, 1)},
- {"file://localhost/", "file://localhost/", true, Component(7, 9), Component(16, 1)},
- {"file://localhost/test", "file://localhost/test", true, Component(7, 9), Component(16, 5)},
+ {"//", "file:///", true, Component(), Component(7, 1)},
+ {"///", "file:///", true, Component(), Component(7, 1)},
+ {"///test", "file:///test", true, Component(), Component(7, 5)},
+ {"file://test", "file://test/", true, Component(7, 4), Component(11, 1)},
+ {"file://localhost", "file://localhost/", true, Component(7, 9),
+ Component(16, 1)},
+ {"file://localhost/", "file://localhost/", true, Component(7, 9),
+ Component(16, 1)},
+ {"file://localhost/test", "file://localhost/test", true, Component(7, 9),
+ Component(16, 5)},
#endif // _WIN32
};
- for (size_t i = 0; i < arraysize(cases); i++) {
+ for (size_t i = 0; i < std::size(cases); i++) {
int url_len = static_cast<int>(strlen(cases[i].input));
Parsed parsed;
ParseFileURL(cases[i].input, url_len, &parsed);
@@ -1836,16 +2080,22 @@
const char* expected;
bool expected_success;
} cases[] = {
- {"Filesystem:htTp://www.Foo.com:80/tempoRary", "filesystem:http://www.foo.com/tempoRary/", true},
- {"filesystem:httpS://www.foo.com/temporary/", "filesystem:https://www.foo.com/temporary/", true},
- {"filesystem:http://www.foo.com//", "filesystem:http://www.foo.com//", false},
- {"filesystem:http://www.foo.com/persistent/bob?query#ref", "filesystem:http://www.foo.com/persistent/bob?query#ref", true},
- {"filesystem:fIle://\\temporary/", "filesystem:file:///temporary/", true},
- {"filesystem:fiLe:///temporary", "filesystem:file:///temporary/", true},
- {"filesystem:File:///temporary/Bob?qUery#reF", "filesystem:file:///temporary/Bob?qUery#reF", true},
+ {"Filesystem:htTp://www.Foo.com:80/tempoRary",
+ "filesystem:http://www.foo.com/tempoRary/", true},
+ {"filesystem:httpS://www.foo.com/temporary/",
+ "filesystem:https://www.foo.com/temporary/", true},
+ {"filesystem:http://www.foo.com//", "filesystem:http://www.foo.com//",
+ false},
+ {"filesystem:http://www.foo.com/persistent/bob?query#ref",
+ "filesystem:http://www.foo.com/persistent/bob?query#ref", true},
+ {"filesystem:fIle://\\temporary/", "filesystem:file:///temporary/", true},
+ {"filesystem:fiLe:///temporary", "filesystem:file:///temporary/", true},
+ {"filesystem:File:///temporary/Bob?qUery#reF",
+ "filesystem:file:///temporary/Bob?qUery#reF", true},
+ {"FilEsysteM:htTp:E=/.", "filesystem:http://e%3D//", false},
};
- for (size_t i = 0; i < arraysize(cases); i++) {
+ for (size_t i = 0; i < std::size(cases); i++) {
int url_len = static_cast<int>(strlen(cases[i].input));
Parsed parsed;
ParseFileSystemURL(cases[i].input, url_len, &parsed);
@@ -1875,12 +2125,16 @@
const char* input;
const char* expected;
} path_cases[] = {
- {"javascript:", "javascript:"},
- {"JavaScript:Foo", "javascript:Foo"},
- {"Foo:\":This /is interesting;?#", "foo:\":This /is interesting;?#"},
+ {"javascript:", "javascript:"},
+ {"JavaScript:Foo", "javascript:Foo"},
+ {"Foo:\":This /is interesting;?#", "foo:\":This /is interesting;?#"},
+
+ // Validation errors should not cause failure. See
+ // https://crbug.com/925614.
+ {"javascript:\uFFFF", "javascript:%EF%BF%BD"},
};
- for (size_t i = 0; i < arraysize(path_cases); i++) {
+ for (size_t i = 0; i < std::size(path_cases); i++) {
int url_len = static_cast<int>(strlen(path_cases[i].input));
Parsed parsed;
ParsePathURL(path_cases[i].input, url_len, true, &parsed);
@@ -1906,6 +2160,53 @@
}
}
+TEST(URLCanonTest, CanonicalizePathURLPath) {
+ struct PathCase {
+ std::string input;
+ std::wstring input16;
+ std::string expected;
+ } path_cases[] = {
+ {"Foo", L"Foo", "Foo"},
+ {"\":This /is interesting;?#", L"\":This /is interesting;?#",
+ "\":This /is interesting;?#"},
+ {"\uFFFF", L"\uFFFF", "%EF%BF%BD"},
+ };
+
+ for (size_t i = 0; i < std::size(path_cases); i++) {
+ // 8-bit string input
+ std::string out_str;
+ StdStringCanonOutput output(&out_str);
+ url::Component out_component;
+ CanonicalizePathURLPath(path_cases[i].input.data(),
+ Component(0, path_cases[i].input.size()), &output,
+ &out_component);
+ output.Complete();
+
+ EXPECT_EQ(path_cases[i].expected, out_str);
+
+ EXPECT_EQ(0, out_component.begin);
+ EXPECT_EQ(path_cases[i].expected.size(),
+ static_cast<size_t>(out_component.len));
+
+ // 16-bit string input
+ std::string out_str16;
+ StdStringCanonOutput output16(&out_str16);
+ url::Component out_component16;
+ std::u16string input16(
+ test_utils::TruncateWStringToUTF16(path_cases[i].input16.data()));
+ CanonicalizePathURLPath(input16.c_str(),
+ Component(0, path_cases[i].input16.size()),
+ &output16, &out_component16);
+ output16.Complete();
+
+ EXPECT_EQ(path_cases[i].expected, out_str16);
+
+ EXPECT_EQ(0, out_component16.begin);
+ EXPECT_EQ(path_cases[i].expected.size(),
+ static_cast<size_t>(out_component16.len));
+ }
+}
+
TEST(URLCanonTest, CanonicalizeMailtoURL) {
struct URLCase {
const char* input;
@@ -1965,7 +2266,7 @@
Parsed parsed;
Parsed out_parsed;
- for (size_t i = 0; i < arraysize(cases); i++) {
+ for (size_t i = 0; i < std::size(cases); i++) {
int url_len = static_cast<int>(strlen(cases[i].input));
if (i == 0) {
// The first test case purposely has a '\0' in it -- don't count it
@@ -2040,17 +2341,17 @@
// We fill the buffer with 0xff to ensure that it's getting properly
// null-terminated. We also allocate one byte more than what we tell
// _itoa_s about, and ensure that the extra byte is untouched.
- base::char16 buf[6];
+ char16_t buf[6];
const char fill_mem = 0xff;
- const base::char16 fill_char = 0xffff;
+ const char16_t fill_char = 0xffff;
memset(buf, fill_mem, sizeof(buf));
EXPECT_EQ(0, _itow_s(12, buf, sizeof(buf) / 2 - 1, 10));
- EXPECT_EQ(base::UTF8ToUTF16("12"), base::string16(buf));
+ EXPECT_EQ(u"12", std::u16string(buf));
EXPECT_EQ(fill_char, buf[3]);
// Test the edge cases - exactly the buffer size and one over
EXPECT_EQ(0, _itow_s(1234, buf, sizeof(buf) / 2 - 1, 10));
- EXPECT_EQ(base::UTF8ToUTF16("1234"), base::string16(buf));
+ EXPECT_EQ(u"1234", std::u16string(buf));
EXPECT_EQ(fill_char, buf[5]);
memset(buf, fill_mem, sizeof(buf));
@@ -2060,13 +2361,12 @@
// Test the template overload (note that this will see the full buffer)
memset(buf, fill_mem, sizeof(buf));
EXPECT_EQ(0, _itow_s(12, buf, 10));
- EXPECT_EQ(base::UTF8ToUTF16("12"),
- base::string16(buf));
+ EXPECT_EQ(u"12", std::u16string(buf));
EXPECT_EQ(fill_char, buf[3]);
memset(buf, fill_mem, sizeof(buf));
EXPECT_EQ(0, _itow_s(12345, buf, 10));
- EXPECT_EQ(base::UTF8ToUTF16("12345"), base::string16(buf));
+ EXPECT_EQ(u"12345", std::u16string(buf));
EXPECT_EQ(EINVAL, _itow_s(123456, buf, 10));
}
@@ -2211,6 +2511,11 @@
// is not file.
{"http://host/a", true, false, "/c:\\foo", true, true, true, "http://host/c:/foo"},
{"http://host/a", true, false, "//c:\\foo", true, true, true, "http://c/foo"},
+ // Cross-platform relative file: resolution behavior.
+ {"file://host/a", true, true, "/", true, true, true, "file://host/"},
+ {"file://host/a", true, true, "//", true, true, true, "file:///"},
+ {"file://host/a", true, true, "/b", true, true, true, "file://host/b"},
+ {"file://host/a", true, true, "//b", true, true, true, "file://b/"},
// Ensure that ports aren't allowed for hosts relative to a file url.
// Although the result string shows a host:port portion, the call to
// resolve the relative URL returns false, indicating parse failure,
@@ -2231,7 +2536,7 @@
{"about:blank", false, false, "content://content.Provider/", true, false, true, ""},
};
- for (size_t i = 0; i < arraysize(rel_cases); i++) {
+ for (size_t i = 0; i < std::size(rel_cases); i++) {
const RelativeCase& cur_case = rel_cases[i];
Parsed parsed;
@@ -2297,12 +2602,12 @@
// Override two components, the path with something short, and the query with
// something long enough to trigger the bug.
- Replacements<base::char16> repl;
- base::string16 new_query;
+ Replacements<char16_t> repl;
+ std::u16string new_query;
for (int i = 0; i < 4800; i++)
new_query.push_back('a');
- base::string16 new_path(test_utils::TruncateWStringToUTF16(L"/foo"));
+ std::u16string new_path(test_utils::TruncateWStringToUTF16(L"/foo"));
repl.SetPath(new_path.c_str(), Component(0, 4));
repl.SetQuery(new_query.c_str(),
Component(0, static_cast<int>(new_query.length())));
@@ -2333,14 +2638,12 @@
{"ftp", 21},
{"ws", 80},
{"wss", 443},
- {"gopher", 70},
{"fake-scheme", PORT_UNSPECIFIED},
{"HTTP", PORT_UNSPECIFIED},
{"HTTPS", PORT_UNSPECIFIED},
{"FTP", PORT_UNSPECIFIED},
{"WS", PORT_UNSPECIFIED},
{"WSS", PORT_UNSPECIFIED},
- {"GOPHER", PORT_UNSPECIFIED},
};
for (auto& test_case : cases) {
@@ -2350,45 +2653,86 @@
}
}
+TEST(URLCanonTest, FindWindowsDriveLetter) {
+ struct TestCase {
+ base::StringPiece spec;
+ int begin;
+ int end; // -1 for end of spec
+ int expected_drive_letter_pos;
+ } cases[] = {
+ {"/", 0, -1, -1},
+
+ {"c:/foo", 0, -1, 0},
+ {"/c:/foo", 0, -1, 1},
+ {"//c:/foo", 0, -1, -1}, // "//" does not canonicalize to "/"
+ {"\\C|\\foo", 0, -1, 1},
+ {"/cd:/foo", 0, -1, -1}, // "/c" does not canonicalize to "/"
+ {"/./c:/foo", 0, -1, 3},
+ {"/.//c:/foo", 0, -1, -1}, // "/.//" does not canonicalize to "/"
+ {"/././c:/foo", 0, -1, 5},
+ {"/abc/c:/foo", 0, -1, -1}, // "/abc/" does not canonicalize to "/"
+ {"/abc/./../c:/foo", 0, -1, 10},
+
+ {"/c:/c:/foo", 3, -1, 4}, // actual input is "/c:/foo"
+ {"/c:/foo", 3, -1, -1}, // actual input is "/foo"
+ {"/c:/foo", 0, 1, -1}, // actual input is "/"
+ };
+
+ for (const auto& c : cases) {
+ int end = c.end;
+ if (end == -1)
+ end = c.spec.size();
+
+ EXPECT_EQ(c.expected_drive_letter_pos,
+ FindWindowsDriveLetter(c.spec.data(), c.begin, end))
+ << "for " << c.spec << "[" << c.begin << ":" << end << "] (UTF-8)";
+
+ std::u16string spec16 = base::ASCIIToUTF16(c.spec);
+ EXPECT_EQ(c.expected_drive_letter_pos,
+ FindWindowsDriveLetter(spec16.data(), c.begin, end))
+ << "for " << c.spec << "[" << c.begin << ":" << end << "] (UTF-16)";
+ }
+}
+
TEST(URLCanonTest, IDNToASCII) {
RawCanonOutputW<1024> output;
// Basic ASCII test.
- base::string16 str = base::UTF8ToUTF16("hello");
+ std::u16string str = u"hello";
EXPECT_TRUE(IDNToASCII(str.data(), str.length(), &output));
- EXPECT_EQ(base::UTF8ToUTF16("hello"), base::string16(output.data()));
+ EXPECT_EQ(u"hello", std::u16string(output.data()));
output.set_length(0);
// Mixed ASCII/non-ASCII.
- str = base::UTF8ToUTF16("hellö");
+ str = u"hellö";
EXPECT_TRUE(IDNToASCII(str.data(), str.length(), &output));
- EXPECT_EQ(base::UTF8ToUTF16("xn--hell-8qa"), base::string16(output.data()));
+ EXPECT_EQ(u"xn--hell-8qa", std::u16string(output.data()));
output.set_length(0);
// All non-ASCII.
- str = base::UTF8ToUTF16("ä½ å¥½");
+ str = u"ä½ å¥½";
EXPECT_TRUE(IDNToASCII(str.data(), str.length(), &output));
- EXPECT_EQ(base::UTF8ToUTF16("xn--6qq79v"), base::string16(output.data()));
+ EXPECT_EQ(u"xn--6qq79v", std::u16string(output.data()));
output.set_length(0);
// Characters that need mapping (the resulting Punycode is the encoding for
// "1⁄4").
- str = base::UTF8ToUTF16("¼");
+ str = u"¼";
EXPECT_TRUE(IDNToASCII(str.data(), str.length(), &output));
- EXPECT_EQ(base::UTF8ToUTF16("xn--14-c6t"), base::string16(output.data()));
+ EXPECT_EQ(u"xn--14-c6t", std::u16string(output.data()));
output.set_length(0);
// String to encode already starts with "xn--", and all ASCII. Should not
// modify the string.
- str = base::UTF8ToUTF16("xn--hell-8qa");
+ str = u"xn--hell-8qa";
EXPECT_TRUE(IDNToASCII(str.data(), str.length(), &output));
- EXPECT_EQ(base::UTF8ToUTF16("xn--hell-8qa"), base::string16(output.data()));
+ EXPECT_EQ(u"xn--hell-8qa", std::u16string(output.data()));
output.set_length(0);
// String to encode already starts with "xn--", and mixed ASCII/non-ASCII.
// Should fail, due to a special case: if the label starts with "xn--", it
// should be parsed as Punycode, which must be all ASCII.
- str = base::UTF8ToUTF16("xn--hellö");
+ str = u"xn--hellö";
EXPECT_FALSE(IDNToASCII(str.data(), str.length(), &output));
output.set_length(0);
@@ -2396,7 +2740,7 @@
// This tests that there is still an error for the character '⁄' (U+2044),
// which would be a valid ASCII character, U+0044, if the high byte were
// ignored.
- str = base::UTF8ToUTF16("xn--1⁄4");
+ str = u"xn--1⁄4";
EXPECT_FALSE(IDNToASCII(str.data(), str.length(), &output));
output.set_length(0);
}
diff --git a/url/url_constants.cc b/url/url_constants.cc
index 110c6a7..850a31c 100644
--- a/url/url_constants.cc
+++ b/url/url_constants.cc
@@ -1,4 +1,4 @@
-// Copyright 2014 The Chromium Authors. All rights reserved.
+// Copyright 2014 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@@ -7,27 +7,54 @@
namespace url {
const char kAboutBlankURL[] = "about:blank";
+const char16_t kAboutBlankURL16[] = u"about:blank";
+const char kAboutSrcdocURL[] = "about:srcdoc";
+const char16_t kAboutSrcdocURL16[] = u"about:srcdoc";
const char kAboutBlankPath[] = "blank";
-const char kAboutBlankWithHashPath[] = "blank/";
+const char16_t kAboutBlankPath16[] = u"blank";
+const char kAboutSrcdocPath[] = "srcdoc";
+const char16_t kAboutSrcdocPath16[] = u"srcdoc";
const char kAboutScheme[] = "about";
+const char16_t kAboutScheme16[] = u"about";
const char kBlobScheme[] = "blob";
+const char16_t kBlobScheme16[] = u"blob";
const char kContentScheme[] = "content";
+const char16_t kContentScheme16[] = u"content";
const char kContentIDScheme[] = "cid";
+const char16_t kContentIDScheme16[] = u"cid";
const char kDataScheme[] = "data";
+const char16_t kDataScheme16[] = u"data";
const char kFileScheme[] = "file";
+const char16_t kFileScheme16[] = u"file";
const char kFileSystemScheme[] = "filesystem";
+const char16_t kFileSystemScheme16[] = u"filesystem";
const char kFtpScheme[] = "ftp";
-const char kGopherScheme[] = "gopher";
+const char16_t kFtpScheme16[] = u"ftp";
const char kHttpScheme[] = "http";
+const char16_t kHttpScheme16[] = u"http";
const char kHttpsScheme[] = "https";
+const char16_t kHttpsScheme16[] = u"https";
const char kJavaScriptScheme[] = "javascript";
+const char16_t kJavaScriptScheme16[] = u"javascript";
const char kMailToScheme[] = "mailto";
+const char16_t kMailToScheme16[] = u"mailto";
+const char kTelScheme[] = "tel";
+const char16_t kTelScheme16[] = u"tel";
+const char kUrnScheme[] = "urn";
+const char16_t kUrnScheme16[] = u"urn";
+const char kUuidInPackageScheme[] = "uuid-in-package";
+const char16_t kUuidInPackageScheme16[] = u"uuid-in-package";
+const char kWebcalScheme[] = "webcal";
+const char16_t kWebcalScheme16[] = u"webcal";
const char kWsScheme[] = "ws";
+const char16_t kWsScheme16[] = u"ws";
const char kWssScheme[] = "wss";
+const char16_t kWssScheme16[] = u"wss";
const char kStandardSchemeSeparator[] = "://";
+const char16_t kStandardSchemeSeparator16[] = u"://";
const size_t kMaxURLChars = 2 * 1024 * 1024;
diff --git a/url/url_constants.h b/url/url_constants.h
index 84a5ba0..5eda4e8 100644
--- a/url/url_constants.h
+++ b/url/url_constants.h
@@ -1,42 +1,69 @@
-// Copyright 2014 The Chromium Authors. All rights reserved.
+// Copyright 2014 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef URL_URL_CONSTANTS_H_
#define URL_URL_CONSTANTS_H_
-#include "starboard/types.h"
+#include <stddef.h>
-#include "url/url_export.h"
+#include "base/component_export.h"
namespace url {
-URL_EXPORT extern const char kAboutBlankURL[];
+COMPONENT_EXPORT(URL) extern const char kAboutBlankURL[];
+COMPONENT_EXPORT(URL) extern const char16_t kAboutBlankURL16[];
+COMPONENT_EXPORT(URL) extern const char kAboutSrcdocURL[];
+COMPONENT_EXPORT(URL) extern const char16_t kAboutSrcdocURL16[];
-URL_EXPORT extern const char kAboutBlankPath[];
-URL_EXPORT extern const char kAboutBlankWithHashPath[];
+COMPONENT_EXPORT(URL) extern const char kAboutBlankPath[];
+COMPONENT_EXPORT(URL) extern const char16_t kAboutBlankPath16[];
+COMPONENT_EXPORT(URL) extern const char kAboutSrcdocPath[];
+COMPONENT_EXPORT(URL) extern const char16_t kAboutSrcdocPath16[];
-URL_EXPORT extern const char kAboutScheme[];
-URL_EXPORT extern const char kBlobScheme[];
+COMPONENT_EXPORT(URL) extern const char kAboutScheme[];
+COMPONENT_EXPORT(URL) extern const char16_t kAboutScheme16[];
+COMPONENT_EXPORT(URL) extern const char kBlobScheme[];
+COMPONENT_EXPORT(URL) extern const char16_t kBlobScheme16[];
// The content scheme is specific to Android for identifying a stored file.
-URL_EXPORT extern const char kContentScheme[];
-URL_EXPORT extern const char kContentIDScheme[];
-URL_EXPORT extern const char kDataScheme[];
-URL_EXPORT extern const char kFileScheme[];
-URL_EXPORT extern const char kFileSystemScheme[];
-URL_EXPORT extern const char kFtpScheme[];
-URL_EXPORT extern const char kGopherScheme[];
-URL_EXPORT extern const char kHttpScheme[];
-URL_EXPORT extern const char kHttpsScheme[];
-URL_EXPORT extern const char kJavaScriptScheme[];
-URL_EXPORT extern const char kMailToScheme[];
-URL_EXPORT extern const char kWsScheme[];
-URL_EXPORT extern const char kWssScheme[];
+COMPONENT_EXPORT(URL) extern const char kContentScheme[];
+COMPONENT_EXPORT(URL) extern const char16_t kContentScheme16[];
+COMPONENT_EXPORT(URL) extern const char kContentIDScheme[];
+COMPONENT_EXPORT(URL) extern const char16_t kContentIDScheme16[];
+COMPONENT_EXPORT(URL) extern const char kDataScheme[];
+COMPONENT_EXPORT(URL) extern const char16_t kDataScheme16[];
+COMPONENT_EXPORT(URL) extern const char kFileScheme[];
+COMPONENT_EXPORT(URL) extern const char16_t kFileScheme16[];
+COMPONENT_EXPORT(URL) extern const char kFileSystemScheme[];
+COMPONENT_EXPORT(URL) extern const char16_t kFileSystemScheme16[];
+COMPONENT_EXPORT(URL) extern const char kFtpScheme[];
+COMPONENT_EXPORT(URL) extern const char16_t kFtpScheme16[];
+COMPONENT_EXPORT(URL) extern const char kHttpScheme[];
+COMPONENT_EXPORT(URL) extern const char16_t kHttpScheme16[];
+COMPONENT_EXPORT(URL) extern const char kHttpsScheme[];
+COMPONENT_EXPORT(URL) extern const char16_t kHttpsScheme16[];
+COMPONENT_EXPORT(URL) extern const char kJavaScriptScheme[];
+COMPONENT_EXPORT(URL) extern const char16_t kJavaScriptScheme16[];
+COMPONENT_EXPORT(URL) extern const char kMailToScheme[];
+COMPONENT_EXPORT(URL) extern const char16_t kMailToScheme16[];
+COMPONENT_EXPORT(URL) extern const char kTelScheme[];
+COMPONENT_EXPORT(URL) extern const char16_t kTelScheme16[];
+COMPONENT_EXPORT(URL) extern const char kUrnScheme[];
+COMPONENT_EXPORT(URL) extern const char16_t kUrnScheme16[];
+COMPONENT_EXPORT(URL) extern const char kUuidInPackageScheme[];
+COMPONENT_EXPORT(URL) extern const char16_t kUuidInPackageScheme16[];
+COMPONENT_EXPORT(URL) extern const char kWebcalScheme[];
+COMPONENT_EXPORT(URL) extern const char16_t kWebcalScheme16[];
+COMPONENT_EXPORT(URL) extern const char kWsScheme[];
+COMPONENT_EXPORT(URL) extern const char16_t kWsScheme16[];
+COMPONENT_EXPORT(URL) extern const char kWssScheme[];
+COMPONENT_EXPORT(URL) extern const char16_t kWssScheme16[];
// Used to separate a standard scheme and the hostname: "://".
-URL_EXPORT extern const char kStandardSchemeSeparator[];
+COMPONENT_EXPORT(URL) extern const char kStandardSchemeSeparator[];
+COMPONENT_EXPORT(URL) extern const char16_t kStandardSchemeSeparator16[];
-URL_EXPORT extern const size_t kMaxURLChars;
+COMPONENT_EXPORT(URL) extern const size_t kMaxURLChars;
} // namespace url
diff --git a/url/url_export.h b/url/url_export.h
deleted file mode 100644
index 15ef19e..0000000
--- a/url/url_export.h
+++ /dev/null
@@ -1,33 +0,0 @@
-// Copyright 2013 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef URL_URL_EXPORT_H_
-#define URL_URL_EXPORT_H_
-
-#if defined(COMPONENT_BUILD)
-#if defined(WIN32)
-
-#if defined(URL_IMPLEMENTATION)
-#define URL_EXPORT __declspec(dllexport)
-#else
-#define URL_EXPORT __declspec(dllimport)
-#endif // defined(URL_IMPLEMENTATION)
-
-#else // !defined(WIN32)
-
-#if defined(URL_IMPLEMENTATION)
-#define URL_EXPORT __attribute__((visibility("default")))
-#else
-#define URL_EXPORT
-#endif // defined(URL_IMPLEMENTATION)
-
-#endif // defined(WIN32)
-
-#else // !defined(COMPONENT_BUILD)
-
-#define URL_EXPORT
-
-#endif // define(COMPONENT_BUILD)
-
-#endif // URL_URL_EXPORT_H_
diff --git a/url/url_features.cc b/url/url_features.cc
new file mode 100644
index 0000000..8f38ff2
--- /dev/null
+++ b/url/url_features.cc
@@ -0,0 +1,35 @@
+// Copyright 2022 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "url/url_features.h"
+
+namespace url {
+
+BASE_FEATURE(kUseIDNA2008NonTransitional,
+ "UseIDNA2008NonTransitional",
+ base::FEATURE_ENABLED_BY_DEFAULT);
+
+// Kill switch for crbug.com/1362507.
+BASE_FEATURE(kRecordIDNA2008Metrics,
+ "RecordIDNA2008Metrics",
+ base::FEATURE_ENABLED_BY_DEFAULT);
+
+BASE_FEATURE(kStrictIPv4EmbeddedIPv6AddressParsing,
+ "StrictIPv4EmbeddedIPv6AddressParsing",
+ base::FEATURE_DISABLED_BY_DEFAULT);
+
+// Kill switch for crbug.com/1220361.
+BASE_FEATURE(kResolveBareFragmentWithColonOnNonHierarchical,
+ "ResolveBareFragmentWithColonOnNonHierarchical",
+ base::FEATURE_ENABLED_BY_DEFAULT);
+
+bool IsUsingIDNA2008NonTransitional() {
+ return base::FeatureList::IsEnabled(kUseIDNA2008NonTransitional);
+}
+
+bool IsRecordingIDNA2008Metrics() {
+ return base::FeatureList::IsEnabled(kRecordIDNA2008Metrics);
+}
+
+} // namespace url
diff --git a/url/url_features.h b/url/url_features.h
new file mode 100644
index 0000000..e957521
--- /dev/null
+++ b/url/url_features.h
@@ -0,0 +1,33 @@
+// Copyright 2022 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_URL_FEATURES_H_
+#define URL_URL_FEATURES_H_
+
+#include "base/component_export.h"
+#include "base/feature_list.h"
+
+namespace url {
+
+COMPONENT_EXPORT(URL) BASE_DECLARE_FEATURE(kUseIDNA2008NonTransitional);
+
+// Returns true if Chrome is using IDNA 2008 in Non-Transitional mode.
+COMPONENT_EXPORT(URL) bool IsUsingIDNA2008NonTransitional();
+
+// Returns true if Chrome is recording IDNA 2008 related metrics.
+COMPONENT_EXPORT(URL) bool IsRecordingIDNA2008Metrics();
+
+// Returns true if Chrome is enforcing the 4 part check for IPv4 embedded IPv6
+// addresses.
+COMPONENT_EXPORT(URL)
+BASE_DECLARE_FEATURE(kStrictIPv4EmbeddedIPv6AddressParsing);
+
+// When enabled, allows resolving of a bare fragment containing a colon against
+// a non-hierarchical URL. (For example '#foo:bar' against 'about:blank'.)
+COMPONENT_EXPORT(URL)
+BASE_DECLARE_FEATURE(kResolveBareFragmentWithColonOnNonHierarchical);
+
+} // namespace url
+
+#endif // URL_URL_FEATURES_H_
diff --git a/url/url_file.h b/url/url_file.h
index 796d12c..65ce98a 100644
--- a/url/url_file.h
+++ b/url/url_file.h
@@ -1,4 +1,4 @@
-// Copyright 2013 The Chromium Authors. All rights reserved.
+// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@@ -13,14 +13,13 @@
namespace url {
-#ifdef WIN32
-
// We allow both "c:" and "c|" as drive identifiers.
-inline bool IsWindowsDriveSeparator(base::char16 ch) {
+inline bool IsWindowsDriveSeparator(char16_t ch) {
return ch == ':' || ch == '|';
}
-
-#endif // WIN32
+inline bool IsWindowsDriveSeparator(char ch) {
+ return IsWindowsDriveSeparator(static_cast<char16_t>(ch));
+}
// Returns the index of the next slash in the input after the given index, or
// spec_len if the end of the input is reached.
@@ -32,27 +31,48 @@
return idx;
}
-#ifdef WIN32
+// DoesContainWindowsDriveSpecUntil returns the least number between
+// start_offset and max_offset such that the spec has a valid drive
+// specification starting at that offset. Otherwise it returns -1. This function
+// gracefully handles, by returning -1, start_offset values that are equal to or
+// larger than the spec_len, and caps max_offset appropriately to simplify
+// callers. max_offset must be at least start_offset.
+template <typename CHAR>
+inline int DoesContainWindowsDriveSpecUntil(const CHAR* spec,
+ int start_offset,
+ int max_offset,
+ int spec_len) {
+ CHECK_LE(start_offset, max_offset);
+ if (start_offset > spec_len - 2)
+ return -1; // Not enough room.
+ if (max_offset > spec_len - 2)
+ max_offset = spec_len - 2;
+ for (int offset = start_offset; offset <= max_offset; ++offset) {
+ if (!base::IsAsciiAlpha(spec[offset]))
+ continue; // Doesn't contain a valid drive letter.
+ if (!IsWindowsDriveSeparator(spec[offset + 1]))
+ continue; // Isn't followed with a drive separator.
+ return offset;
+ }
+ return -1;
+}
// Returns true if the start_offset in the given spec looks like it begins a
// drive spec, for example "c:". This function explicitly handles start_offset
// values that are equal to or larger than the spec_len to simplify callers.
//
// If this returns true, the spec is guaranteed to have a valid drive letter
-// plus a colon starting at |start_offset|.
-template<typename CHAR>
-inline bool DoesBeginWindowsDriveSpec(const CHAR* spec, int start_offset,
+// plus a drive letter separator (a colon or a pipe) starting at |start_offset|.
+template <typename CHAR>
+inline bool DoesBeginWindowsDriveSpec(const CHAR* spec,
+ int start_offset,
int spec_len) {
- int remaining_len = spec_len - start_offset;
- if (remaining_len < 2)
- return false; // Not enough room.
- if (!base::IsAsciiAlpha(spec[start_offset]))
- return false; // Doesn't start with a valid drive letter.
- if (!IsWindowsDriveSeparator(spec[start_offset + 1]))
- return false; // Isn't followed with a drive separator.
- return true;
+ return DoesContainWindowsDriveSpecUntil(spec, start_offset, start_offset,
+ spec_len) == start_offset;
}
+#ifdef WIN32
+
// Returns true if the start_offset in the given text looks like it begins a
// UNC path, for example "\\". This function explicitly handles start_offset
// values that are equal to or larger than the spec_len to simplify callers.
diff --git a/url/url_idna_icu.cc b/url/url_idna_icu.cc
index f90b9a8..0a552a8 100644
--- a/url/url_idna_icu.cc
+++ b/url/url_idna_icu.cc
@@ -1,36 +1,39 @@
-// Copyright 2013 The Chromium Authors. All rights reserved.
+// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// ICU-based IDNA converter.
+#include <stdint.h>
#include <stdlib.h>
#include <string.h>
-#include "base/lazy_instance.h"
-#include "base/logging.h"
-#include "starboard/types.h"
+#include <ostream>
+
+#include "base/check_op.h"
#include "third_party/icu/source/common/unicode/uidna.h"
#include "third_party/icu/source/common/unicode/utypes.h"
#include "url/url_canon_icu.h"
#include "url/url_canon_internal.h" // for _itoa_s
+#include "url/url_features.h"
namespace url {
namespace {
-// A wrapper to use LazyInstance<>::Leaky with ICU's UIDNA, a C pointer to
-// a UTS46/IDNA 2008 handling object opened with uidna_openUTS46().
+// Use UIDNA, a C pointer to a UTS46/IDNA 2008 handling object opened with
+// uidna_openUTS46().
//
// We use UTS46 with BiDiCheck to migrate from IDNA 2003 (with unassigned
-// code points allowed) to IDNA 2008 with
-// the backward compatibility in mind. What it does:
+// code points allowed) to IDNA 2008 with the backward compatibility in mind.
+// What it does:
//
// 1. Use the up-to-date Unicode data.
// 2. Define a case folding/mapping with the up-to-date Unicode data as
// in IDNA 2003.
-// 3. Use transitional mechanism for 4 deviation characters (sharp-s,
-// final sigma, ZWJ and ZWNJ) for now.
+// 3. If `use_idna_non_transitional` is true, use non-transitional mechanism for
+// 4 deviation characters (sharp-s, final sigma, ZWJ and ZWNJ) per
+// url.spec.whatwg.org.
// 4. Continue to allow symbols and punctuations.
// 5. Apply new BiDi check rules more permissive than the IDNA 2003 BiDI rules.
// 6. Do not apply STD3 rules
@@ -40,30 +43,39 @@
// http://goo.gl/3XBhqw ).
// See http://http://unicode.org/reports/tr46/ and references therein
// for more details.
-struct UIDNAWrapper {
- UIDNAWrapper() {
- UErrorCode err = U_ZERO_ERROR;
- // TODO(jungshik): Change options as different parties (browsers,
- // registrars, search engines) converge toward a consensus.
- value = uidna_openUTS46(UIDNA_CHECK_BIDI, &err);
- if (U_FAILURE(err)) {
- CHECK(false) << "failed to open UTS46 data with error: "
- << u_errorName(err)
- << ". If you see this error message in a test environment "
- << "your test environment likely lacks the required data "
- << "tables for libicu. See https://crbug.com/778929.";
- value = NULL;
- }
+UIDNA* CreateIDNA(bool use_idna_non_transitional) {
+ uint32_t options = UIDNA_CHECK_BIDI;
+ if (use_idna_non_transitional) {
+ // Use non-transitional processing if enabled. See
+ // https://url.spec.whatwg.org/#idna for details.
+ options |=
+ UIDNA_NONTRANSITIONAL_TO_ASCII | UIDNA_NONTRANSITIONAL_TO_UNICODE;
}
+ UErrorCode err = U_ZERO_ERROR;
+ UIDNA* idna = uidna_openUTS46(options, &err);
+ if (U_FAILURE(err)) {
+ CHECK(false) << "failed to open UTS46 data with error: " << u_errorName(err)
+ << ". If you see this error message in a test environment "
+ << "your test environment likely lacks the required data "
+ << "tables for libicu. See https://crbug.com/778929.";
+ idna = nullptr;
+ }
+ return idna;
+}
- UIDNA* value;
-};
+UIDNA* GetUIDNA() {
+ // This logic results in having two UIDNA instances in tests. This is okay.
+ if (IsUsingIDNA2008NonTransitional()) {
+ static UIDNA* uidna = CreateIDNA(/*use_idna_non_transitional=*/true);
+ return uidna;
+ } else {
+ static UIDNA* uidna = CreateIDNA(/*use_idna_non_transitional=*/false);
+ return uidna;
+ }
+}
} // namespace
-static base::LazyInstance<UIDNAWrapper>::Leaky g_uidna =
- LAZY_INSTANCE_INITIALIZER;
-
// Converts the Unicode input representing a hostname to ASCII using IDN rules.
// The output must be ASCII, but is represented as wide characters.
//
@@ -78,23 +90,49 @@
// conversions in our code. In addition, consider using icu::IDNA's UTF-8/ASCII
// version with StringByteSink. That way, we can avoid C wrappers and additional
// string conversion.
-bool IDNToASCII(const base::char16* src, int src_len, CanonOutputW* output) {
+bool IDNToASCII(const char16_t* src, int src_len, CanonOutputW* output) {
DCHECK(output->length() == 0); // Output buffer is assumed empty.
- UIDNA* uidna = g_uidna.Get().value;
- DCHECK(uidna != NULL);
+ UIDNA* uidna = GetUIDNA();
+ DCHECK(uidna != nullptr);
while (true) {
UErrorCode err = U_ZERO_ERROR;
UIDNAInfo info = UIDNA_INFO_INITIALIZER;
int output_length = uidna_nameToASCII(uidna, src, src_len, output->data(),
output->capacity(), &info, &err);
+
+ // Ignore various errors for web compatibility. The options are specified
+ // by the WHATWG URL Standard. See
+ // - https://unicode.org/reports/tr46/
+ // - https://url.spec.whatwg.org/#concept-domain-to-ascii
+ // (we set beStrict to false)
+
+ // Disable the "CheckHyphens" option in UTS #46. See
+ // - https://crbug.com/804688
+ // - https://github.com/whatwg/url/issues/267
+ info.errors &= ~UIDNA_ERROR_HYPHEN_3_4;
+ info.errors &= ~UIDNA_ERROR_LEADING_HYPHEN;
+ info.errors &= ~UIDNA_ERROR_TRAILING_HYPHEN;
+
+ // Disable the "VerifyDnsLength" option in UTS #46.
+ info.errors &= ~UIDNA_ERROR_EMPTY_LABEL;
+ info.errors &= ~UIDNA_ERROR_LABEL_TOO_LONG;
+ info.errors &= ~UIDNA_ERROR_DOMAIN_NAME_TOO_LONG;
+
if (U_SUCCESS(err) && info.errors == 0) {
+ // Per WHATWG URL, it is a failure if the ToASCII output is empty.
+ //
+ // ICU would usually return UIDNA_ERROR_EMPTY_LABEL in this case, but we
+ // want to continue allowing http://abc..def/ while forbidding http:///.
+ //
+ if (output_length == 0) {
+ return false;
+ }
+
output->set_length(output_length);
return true;
}
- // TODO(jungshik): Look at info.errors to handle them case-by-case basis
- // if necessary.
if (err != U_BUFFER_OVERFLOW_ERROR || info.errors != 0)
return false; // Unknown error, give up.
diff --git a/url/url_idna_icu_alternatives_android.cc b/url/url_idna_icu_alternatives_android.cc
index d844be1..9faf571 100644
--- a/url/url_idna_icu_alternatives_android.cc
+++ b/url/url_idna_icu_alternatives_android.cc
@@ -1,16 +1,16 @@
-// Copyright 2014 The Chromium Authors. All rights reserved.
+// Copyright 2014 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <string.h>
+#include <string>
+
#include "base/android/jni_android.h"
#include "base/android/jni_string.h"
-#include "base/strings/string16.h"
#include "base/strings/string_piece.h"
-#include "jni/IDNStringUtil_jni.h"
-#include "starboard/types.h"
#include "url/url_canon_internal.h"
+#include "url/url_jni_headers/IDNStringUtil_jni.h"
using base::android::ScopedJavaLocalRef;
@@ -18,8 +18,8 @@
// This uses the JDK's conversion function, which uses IDNA 2003, unlike the
// ICU implementation.
-bool IDNToASCII(const base::char16* src, int src_len, CanonOutputW* output) {
- DCHECK_EQ(0, output->length()); // Output buffer is assumed empty.
+bool IDNToASCII(const char16_t* src, int src_len, CanonOutputW* output) {
+ DCHECK_EQ(0u, output->length()); // Output buffer is assumed empty.
JNIEnv* env = base::android::AttachCurrentThread();
base::android::ScopedJavaLocalRef<jstring> java_src =
@@ -31,9 +31,9 @@
if (java_result.is_null())
return false;
- base::string16 utf16_result =
+ std::u16string utf16_result =
base::android::ConvertJavaStringToUTF16(java_result);
- output->Append(utf16_result.data(), static_cast<int>(utf16_result.size()));
+ output->Append(utf16_result.data(), utf16_result.size());
return true;
}
diff --git a/url/url_idna_icu_alternatives_ios.mm b/url/url_idna_icu_alternatives_ios.mm
index 66b844e..d604b35 100644
--- a/url/url_idna_icu_alternatives_ios.mm
+++ b/url/url_idna_icu_alternatives_ios.mm
@@ -1,10 +1,12 @@
-// Copyright 2016 The Chromium Authors. All rights reserved.
+// Copyright 2016 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <string.h>
-#include "base/strings/string16.h"
+#include <ostream>
+#include <string>
+
#include "base/strings/string_piece.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
@@ -14,7 +16,7 @@
// Only allow ASCII to avoid ICU dependency. Use NSString+IDN
// to convert non-ASCII URL prior to passing to API.
-bool IDNToASCII(const base::char16* src, int src_len, CanonOutputW* output) {
+bool IDNToASCII(const char16_t* src, int src_len, CanonOutputW* output) {
if (base::IsStringASCII(base::StringPiece16(src, src_len))) {
output->Append(src, src_len);
return true;
@@ -23,4 +25,4 @@
return false;
}
-} // namespace url
\ No newline at end of file
+} // namespace url
diff --git a/url/url_parse_file.cc b/url/url_parse_file.cc
index fcbb12d..979ec82 100644
--- a/url/url_parse_file.cc
+++ b/url/url_parse_file.cc
@@ -1,8 +1,8 @@
-// Copyright 2013 The Chromium Authors. All rights reserved.
+// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#include "base/logging.h"
+#include "base/check.h"
#include "url/third_party/mozilla/url_parse.h"
#include "url/url_file.h"
#include "url/url_parse_internal.h"
@@ -42,48 +42,24 @@
namespace {
-// A subcomponent of DoInitFileURL, the input of this function should be a UNC
+// A subcomponent of DoParseFileURL, the input of this function should be a UNC
// path name, with the index of the first character after the slashes following
// the scheme given in |after_slashes|. This will initialize the host, path,
// query, and ref, and leave the other output components untouched
-// (DoInitFileURL handles these for us).
-template<typename CHAR>
+// (DoParseFileURL handles these for us).
+template <typename CHAR>
void DoParseUNC(const CHAR* spec,
int after_slashes,
int spec_len,
- Parsed* parsed) {
+ Parsed* parsed) {
int next_slash = FindNextSlash(spec, after_slashes, spec_len);
- if (next_slash == spec_len) {
- // No additional slash found, as in "file://foo", treat the text as the
- // host with no path (this will end up being UNC to server "foo").
- int host_len = spec_len - after_slashes;
- if (host_len)
- parsed->host = Component(after_slashes, host_len);
- else
- parsed->host.reset();
- parsed->path.reset();
- return;
- }
-#ifdef WIN32
- // See if we have something that looks like a path following the first
- // component. As in "file://localhost/c:/", we get "c:/" out. We want to
- // treat this as a having no host but the path given. Works on Windows only.
- if (DoesBeginWindowsDriveSpec(spec, next_slash + 1, spec_len)) {
- parsed->host.reset();
- ParsePathInternal(spec, MakeRange(next_slash, spec_len),
- &parsed->path, &parsed->query, &parsed->ref);
- return;
- }
-#endif
-
- // Otherwise, everything up until that first slash we found is the host name,
- // which will end up being the UNC host. For example "file://foo/bar.txt"
- // will get a server name of "foo" and a path of "/bar". Later, on Windows,
- // this should be treated as the filename "\\foo\bar.txt" in proper UNC
- // notation.
- int host_len = next_slash - after_slashes;
- if (host_len)
+ // Everything up until that first slash we found (or end of string) is the
+ // host name, which will end up being the UNC host. For example,
+ // "file://foo/bar.txt" will get a server name of "foo" and a path of "/bar".
+ // Later, on Windows, this should be treated as the filename "\\foo\bar.txt"
+ // in proper UNC notation.
+ if (after_slashes < next_slash)
parsed->host = MakeRange(after_slashes, next_slash);
else
parsed->host.reset();
@@ -98,7 +74,7 @@
// A subcomponent of DoParseFileURL, the input should be a local file, with the
// beginning of the path indicated by the index in |path_begin|. This will
// initialize the host, path, query, and ref, and leave the other output
-// components untouched (DoInitFileURL handles these for us).
+// components untouched (DoParseFileURL handles these for us).
template<typename CHAR>
void DoParseLocalFile(const CHAR* spec,
int path_begin,
@@ -215,7 +191,7 @@
DoParseFileURL(url, url_len, parsed);
}
-void ParseFileURL(const base::char16* url, int url_len, Parsed* parsed) {
+void ParseFileURL(const char16_t* url, int url_len, Parsed* parsed) {
DoParseFileURL(url, url_len, parsed);
}
diff --git a/url/url_parse_internal.h b/url/url_parse_internal.h
index 7630878..a73f13b 100644
--- a/url/url_parse_internal.h
+++ b/url/url_parse_internal.h
@@ -1,4 +1,4 @@
-// Copyright 2013 The Chromium Authors. All rights reserved.
+// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@@ -12,15 +12,21 @@
namespace url {
// We treat slashes and backslashes the same for IE compatibility.
-inline bool IsURLSlash(base::char16 ch) {
+inline bool IsURLSlash(char16_t ch) {
return ch == '/' || ch == '\\';
}
+inline bool IsURLSlash(char ch) {
+ return IsURLSlash(static_cast<char16_t>(ch));
+}
// Returns true if we should trim this character from the URL because it is a
// space or a control character.
-inline bool ShouldTrimFromURL(base::char16 ch) {
+inline bool ShouldTrimFromURL(char16_t ch) {
return ch <= ' ';
}
+inline bool ShouldTrimFromURL(char ch) {
+ return ShouldTrimFromURL(static_cast<char16_t>(ch));
+}
// Given an already-initialized begin index and length, this shrinks the range
// to eliminate "should-be-trimmed" characters. Note that the length does *not*
@@ -67,13 +73,12 @@
Component* filepath,
Component* query,
Component* ref);
-void ParsePathInternal(const base::char16* spec,
+void ParsePathInternal(const char16_t* spec,
const Component& path,
Component* filepath,
Component* query,
Component* ref);
-
// Given a spec and a pointer to the character after the colon following the
// scheme, this parses it and fills in the structure, Every item in the parsed
// structure is filled EXCEPT for the scheme, which is untouched.
@@ -81,7 +86,7 @@
int spec_len,
int after_scheme,
Parsed* parsed);
-void ParseAfterScheme(const base::char16* spec,
+void ParseAfterScheme(const char16_t* spec,
int spec_len,
int after_scheme,
Parsed* parsed);
diff --git a/url/url_parse_perftest.cc b/url/url_parse_perftest.cc
new file mode 100644
index 0000000..7fe1d39
--- /dev/null
+++ b/url/url_parse_perftest.cc
@@ -0,0 +1,135 @@
+// Copyright 2006-2008 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/strings/string_piece.h"
+#include "base/test/perf_time_logger.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "url/gurl.h"
+#include "url/third_party/mozilla/url_parse.h"
+#include "url/url_canon.h"
+#include "url/url_canon_stdstring.h"
+
+namespace {
+
+TEST(URLParse, FullURL) {
+ constexpr base::StringPiece kUrl =
+ "http://me:pass@host/foo/bar.html;param?query=yes#ref";
+
+ url::Parsed parsed;
+ base::PerfTimeLogger timer("Full_URL_Parse_AMillion");
+
+ for (int i = 0; i < 1000000; i++)
+ url::ParseStandardURL(kUrl.data(), kUrl.size(), &parsed);
+ timer.Done();
+}
+
+constexpr base::StringPiece kTypicalUrl1 =
+ "http://www.google.com/"
+ "search?q=url+parsing&ie=utf-8&oe=utf-8&aq=t&rls=org.mozilla:en-US:"
+ "official&client=firefox-a";
+
+constexpr base::StringPiece kTypicalUrl2 =
+ "http://www.amazon.com/Stephen-King-Thrillers-Horror-People/dp/0766012336/"
+ "ref=sr_1_2/133-4144931-4505264?ie=UTF8&s=books&qid=2144880915&sr=8-2";
+
+constexpr base::StringPiece kTypicalUrl3 =
+ "http://store.apple.com/1-800-MY-APPLE/WebObjects/AppleStore.woa/wa/"
+ "RSLID?nnmm=browse&mco=578E9744&node=home/desktop/mac_pro";
+
+TEST(URLParse, TypicalURLParse) {
+ url::Parsed parsed1;
+ url::Parsed parsed2;
+ url::Parsed parsed3;
+
+ // Do this 1/3 of a million times since we do 3 different URLs.
+ base::PerfTimeLogger parse_timer("Typical_URL_Parse_AMillion");
+ for (int i = 0; i < 333333; i++) {
+ url::ParseStandardURL(kTypicalUrl1.data(), kTypicalUrl1.size(), &parsed1);
+ url::ParseStandardURL(kTypicalUrl2.data(), kTypicalUrl2.size(), &parsed2);
+ url::ParseStandardURL(kTypicalUrl3.data(), kTypicalUrl3.size(), &parsed3);
+ }
+ parse_timer.Done();
+}
+
+// Includes both parsing and canonicalization with no mallocs.
+TEST(URLParse, TypicalURLParseCanon) {
+ url::Parsed parsed1;
+ url::Parsed parsed2;
+ url::Parsed parsed3;
+
+ base::PerfTimeLogger canon_timer("Typical_Parse_Canon_AMillion");
+ url::Parsed out_parsed;
+ url::RawCanonOutput<1024> output;
+ for (int i = 0; i < 333333; i++) { // divide by 3 so we get 1M
+ url::ParseStandardURL(kTypicalUrl1.data(), kTypicalUrl1.size(), &parsed1);
+ output.set_length(0);
+ url::CanonicalizeStandardURL(
+ kTypicalUrl1.data(), kTypicalUrl1.size(), parsed1,
+ url::SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, nullptr, &output,
+ &out_parsed);
+
+ url::ParseStandardURL(kTypicalUrl2.data(), kTypicalUrl2.size(), &parsed2);
+ output.set_length(0);
+ url::CanonicalizeStandardURL(
+ kTypicalUrl2.data(), kTypicalUrl2.size(), parsed2,
+ url::SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, nullptr, &output,
+ &out_parsed);
+
+ url::ParseStandardURL(kTypicalUrl3.data(), kTypicalUrl3.size(), &parsed3);
+ output.set_length(0);
+ url::CanonicalizeStandardURL(
+ kTypicalUrl3.data(), kTypicalUrl3.size(), parsed3,
+ url::SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, nullptr, &output,
+ &out_parsed);
+ }
+ canon_timer.Done();
+}
+
+// Includes both parsing and canonicalization, and mallocs for the output.
+TEST(URLParse, TypicalURLParseCanonStdString) {
+ url::Parsed parsed1;
+ url::Parsed parsed2;
+ url::Parsed parsed3;
+
+ base::PerfTimeLogger canon_timer("Typical_Parse_Canon_AMillion");
+ url::Parsed out_parsed;
+ for (int i = 0; i < 333333; i++) { // divide by 3 so we get 1M
+ url::ParseStandardURL(kTypicalUrl1.data(), kTypicalUrl1.size(), &parsed1);
+ std::string out1;
+ url::StdStringCanonOutput output1(&out1);
+ url::CanonicalizeStandardURL(
+ kTypicalUrl1.data(), kTypicalUrl1.size(), parsed1,
+ url::SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, nullptr, &output1,
+ &out_parsed);
+
+ url::ParseStandardURL(kTypicalUrl2.data(), kTypicalUrl2.size(), &parsed2);
+ std::string out2;
+ url::StdStringCanonOutput output2(&out2);
+ url::CanonicalizeStandardURL(
+ kTypicalUrl2.data(), kTypicalUrl2.size(), parsed2,
+ url::SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, nullptr, &output2,
+ &out_parsed);
+
+ url::ParseStandardURL(kTypicalUrl3.data(), kTypicalUrl3.size(), &parsed3);
+ std::string out3;
+ url::StdStringCanonOutput output3(&out3);
+ url::CanonicalizeStandardURL(
+ kTypicalUrl3.data(), kTypicalUrl3.size(), parsed3,
+ url::SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, nullptr, &output3,
+ &out_parsed);
+ }
+ canon_timer.Done();
+}
+
+TEST(URLParse, GURL) {
+ base::PerfTimeLogger gurl_timer("Typical_GURL_AMillion");
+ for (int i = 0; i < 333333; i++) { // divide by 3 so we get 1M
+ GURL gurl1(kTypicalUrl1);
+ GURL gurl2(kTypicalUrl2);
+ GURL gurl3(kTypicalUrl3);
+ }
+ gurl_timer.Done();
+}
+
+} // namespace
diff --git a/url/url_parse_unittest.cc b/url/url_parse_unittest.cc
index 3d71415..88b6f05 100644
--- a/url/url_parse_unittest.cc
+++ b/url/url_parse_unittest.cc
@@ -1,18 +1,12 @@
-// Copyright 2013 The Chromium Authors. All rights reserved.
+// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#include "url/third_party/mozilla/url_parse.h"
+#include <stddef.h>
-#include "base/macros.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "url/third_party/mozilla/url_parse.h"
-#if defined(STARBOARD)
-#include "starboard/common/string.h"
-#include "starboard/types.h"
-#endif
-
// Interesting IE file:isms...
//
// file:/foo/bar file:///foo/bar
@@ -95,8 +89,8 @@
bool ComponentMatches(const char* input,
const char* reference,
const Component& component) {
- // If the component is nonexistent (length == -1), it should begin at 0.
- EXPECT_TRUE(component.len >= 0 || component.len == -1);
+ // Check that the -1 sentinel is the only allowed negative value.
+ EXPECT_TRUE(component.is_valid() || component.len == -1);
// Begin should be valid.
EXPECT_LE(0, component.begin);
@@ -104,7 +98,7 @@
// A NULL reference means the component should be nonexistent.
if (!reference)
return component.len == -1;
- if (component.len < 0)
+ if (!component.is_valid())
return false; // Reference is not NULL but we don't have anything
if (strlen(reference) != static_cast<size_t>(component.len))
@@ -140,7 +134,7 @@
"http://user@",
"http:",
};
- for (size_t i = 0; i < arraysize(length_cases); i++) {
+ for (size_t i = 0; i < std::size(length_cases); i++) {
int true_length = static_cast<int>(strlen(length_cases[i]));
Parsed parsed;
@@ -199,7 +193,7 @@
{"file:///c:/foo", Parsed::HOST, true, 7},
{"file:///c:/foo", Parsed::PATH, true, 7},
};
- for (size_t i = 0; i < arraysize(count_cases); i++) {
+ for (size_t i = 0; i < std::size(count_cases); i++) {
int length = static_cast<int>(strlen(count_cases[i].url));
// Simple test to distinguish file and standard URLs.
@@ -317,7 +311,7 @@
// Declared outside for loop to try to catch cases in init() where we forget
// to reset something that is reset by the constructor.
Parsed parsed;
- for (size_t i = 0; i < arraysize(cases); i++) {
+ for (size_t i = 0; i < std::size(cases); i++) {
const char* url = cases[i].input;
ParseStandardURL(url, static_cast<int>(strlen(url)), &parsed);
int port = ParsePort(url, parsed.port);
@@ -352,7 +346,7 @@
// Declared outside for loop to try to catch cases in init() where we forget
// to reset something that is reset by the constructor.
Parsed parsed;
- for (size_t i = 0; i < arraysize(path_cases); i++) {
+ for (size_t i = 0; i < std::size(path_cases); i++) {
const char* url = path_cases[i].input;
ParsePathURL(url, static_cast<int>(strlen(url)), false, &parsed);
@@ -377,8 +371,8 @@
{"FiLe:c|", "FiLe", NULL, NULL, NULL, -1, "c|", NULL, NULL},
{"FILE:/\\\\/server/file", "FILE", NULL, NULL, "server", -1, "/file", NULL, NULL},
{"file://server/", "file", NULL, NULL, "server", -1, "/", NULL, NULL},
-{"file://localhost/c:/", "file", NULL, NULL, NULL, -1, "/c:/", NULL, NULL},
-{"file://127.0.0.1/c|\\", "file", NULL, NULL, NULL, -1, "/c|\\", NULL, NULL},
+{"file://localhost/c:/", "file", NULL, NULL, "localhost", -1, "/c:/", NULL, NULL},
+{"file://127.0.0.1/c|\\", "file", NULL, NULL, "127.0.0.1", -1, "/c|\\", NULL, NULL},
{"file:/", "file", NULL, NULL, NULL, -1, NULL, NULL, NULL},
{"file:", "file", NULL, NULL, NULL, -1, NULL, NULL, NULL},
// If there is a Windows drive letter, treat any number of slashes as the
@@ -451,7 +445,7 @@
// Declared outside for loop to try to catch cases in init() where we forget
// to reset something that is reset by the construtor.
Parsed parsed;
- for (size_t i = 0; i < arraysize(file_cases); i++) {
+ for (size_t i = 0; i < std::size(file_cases); i++) {
const char* url = file_cases[i].input;
ParseFileURL(url, static_cast<int>(strlen(url)), &parsed);
int port = ParsePort(url, parsed.port);
@@ -494,26 +488,26 @@
struct FileCase {
const char* input;
const char* expected;
- } file_cases[] = {
- {"http://www.google.com", NULL},
- {"http://www.google.com/", ""},
- {"http://www.google.com/search", "search"},
- {"http://www.google.com/search/", ""},
- {"http://www.google.com/foo/bar.html?baz=22", "bar.html"},
- {"http://www.google.com/foo/bar.html#ref", "bar.html"},
- {"http://www.google.com/search/;param", ""},
- {"http://www.google.com/foo/bar.html;param#ref", "bar.html"},
- {"http://www.google.com/foo/bar.html;foo;param#ref", "bar.html"},
- {"http://www.google.com/foo/bar.html?query#ref", "bar.html"},
- {"http://www.google.com/foo;/bar.html", "bar.html"},
- {"http://www.google.com/foo;/", ""},
- {"http://www.google.com/foo;", "foo"},
- {"http://www.google.com/;", ""},
- {"http://www.google.com/foo;bar;html", "foo"},
+ } extract_cases[] = {
+ {"http://www.google.com", nullptr},
+ {"http://www.google.com/", ""},
+ {"http://www.google.com/search", "search"},
+ {"http://www.google.com/search/", ""},
+ {"http://www.google.com/foo/bar.html?baz=22", "bar.html"},
+ {"http://www.google.com/foo/bar.html#ref", "bar.html"},
+ {"http://www.google.com/search/;param", ""},
+ {"http://www.google.com/foo/bar.html;param#ref", "bar.html"},
+ {"http://www.google.com/foo/bar.html;foo;param#ref", "bar.html"},
+ {"http://www.google.com/foo/bar.html?query#ref", "bar.html"},
+ {"http://www.google.com/foo;/bar.html", "bar.html"},
+ {"http://www.google.com/foo;/", ""},
+ {"http://www.google.com/foo;", "foo"},
+ {"http://www.google.com/;", ""},
+ {"http://www.google.com/foo;bar;html", "foo"},
};
- for (size_t i = 0; i < arraysize(file_cases); i++) {
- const char* url = file_cases[i].input;
+ for (size_t i = 0; i < std::size(extract_cases); i++) {
+ const char* url = extract_cases[i].input;
int len = static_cast<int>(strlen(url));
Parsed parsed;
@@ -522,7 +516,7 @@
Component file_name;
ExtractFileName(url, parsed.path, &file_name);
- EXPECT_TRUE(ComponentMatches(url, file_cases[i].expected, file_name));
+ EXPECT_TRUE(ComponentMatches(url, extract_cases[i].expected, file_name));
}
}
@@ -620,7 +614,7 @@
// Declared outside for loop to try to catch cases in init() where we forget
// to reset something that is reset by the constructor.
Parsed parsed;
- for (size_t i = 0; i < arraysize(mailto_cases); ++i) {
+ for (size_t i = 0; i < std::size(mailto_cases); ++i) {
const char* url = mailto_cases[i].input;
ParseMailtoURL(url, static_cast<int>(strlen(url)), &parsed);
int port = ParsePort(url, parsed.port);
@@ -652,7 +646,7 @@
// Declared outside for loop to try to catch cases in init() where we forget
// to reset something that is reset by the constructor.
Parsed parsed;
- for (size_t i = 0; i < arraysize(filesystem_cases); i++) {
+ for (size_t i = 0; i < std::size(filesystem_cases); i++) {
const FileSystemURLParseCase* parsecase = &filesystem_cases[i];
const char* url = parsecase->input;
ParseFileSystemURL(url, static_cast<int>(strlen(url)), &parsed);
diff --git a/url/url_test_utils.h b/url/url_test_utils.h
index f4f51da..e1be7fc 100644
--- a/url/url_test_utils.h
+++ b/url/url_test_utils.h
@@ -1,4 +1,4 @@
-// Copyright 2013 The Chromium Authors. All rights reserved.
+// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@@ -10,7 +10,6 @@
#include <string>
-#include "base/strings/string16.h"
#include "base/strings/utf_string_conversions.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "url/url_canon_internal.h"
@@ -24,11 +23,11 @@
// in base bacause it passes invalid UTF-16 characters which is important for
// test purposes. As a result, this is not meant to handle true UTF-32 encoded
// strings.
-inline base::string16 TruncateWStringToUTF16(const wchar_t* src) {
- base::string16 str;
+inline std::u16string TruncateWStringToUTF16(const wchar_t* src) {
+ std::u16string str;
int length = static_cast<int>(wcslen(src));
for (int i = 0; i < length; ++i) {
- str.push_back(static_cast<base::char16>(src[i]));
+ str.push_back(static_cast<char16_t>(src[i]));
}
return str;
}
diff --git a/url/url_util.cc b/url/url_util.cc
index 9b2f2fb..67913eb 100644
--- a/url/url_util.cc
+++ b/url/url_util.cc
@@ -1,18 +1,20 @@
-// Copyright 2013 The Chromium Authors. All rights reserved.
+// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "url/url_util.h"
+#include <stddef.h>
#include <string.h>
-#include "base/debug/leak_annotations.h"
-#include "base/logging.h"
+#include <atomic>
+#include <ostream>
+
+#include "base/check_op.h"
+#include "base/compiler_specific.h"
+#include "base/containers/contains.h"
+#include "base/no_destructor.h"
#include "base/strings/string_util.h"
-#include "starboard/common/string.h"
-#include "starboard/configuration.h"
-#include "starboard/memory.h"
-#include "starboard/types.h"
#include "url/url_canon_internal.h"
#include "url/url_constants.h"
#include "url/url_file.h"
@@ -22,6 +24,122 @@
namespace {
+// A pair for representing a standard scheme name and the SchemeType for it.
+struct SchemeWithType {
+ std::string scheme;
+ SchemeType type;
+};
+
+// A pair for representing a scheme and a custom protocol handler for it.
+//
+// This pair of strings must be normalized protocol handler parameters as
+// described in the Custom Handler specification.
+// https://html.spec.whatwg.org/multipage/system-state.html#normalize-protocol-handler-parameters
+struct SchemeWithHandler {
+ std::string scheme;
+ std::string handler;
+};
+
+// List of currently registered schemes and associated properties.
+struct SchemeRegistry {
+ // Standard format schemes (see header for details).
+ std::vector<SchemeWithType> standard_schemes = {
+ {kHttpsScheme, SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION},
+ {kHttpScheme, SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION},
+ // Yes, file URLs can have a hostname, so file URLs should be handled as
+ // "standard". File URLs never have a port as specified by the SchemeType
+ // field. Unlike other SCHEME_WITH_HOST schemes, the 'host' in a file
+ // URL may be empty, a behavior which is special-cased during
+ // canonicalization.
+ {kFileScheme, SCHEME_WITH_HOST},
+ {kFtpScheme, SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION},
+ {kWssScheme,
+ SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION}, // WebSocket secure.
+ {kWsScheme, SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION}, // WebSocket.
+ {kFileSystemScheme, SCHEME_WITHOUT_AUTHORITY},
+ };
+
+ // Schemes that are allowed for referrers.
+ //
+ // WARNING: Adding (1) a non-"standard" scheme or (2) a scheme whose URLs have
+ // opaque origins could lead to surprising behavior in some of the referrer
+ // generation logic. In order to avoid surprises, be sure to have adequate
+ // test coverage in each of the multiple code locations that compute
+ // referrers.
+ std::vector<SchemeWithType> referrer_schemes = {
+ {kHttpsScheme, SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION},
+ {kHttpScheme, SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION},
+ };
+
+ // Schemes that do not trigger mixed content warning.
+ std::vector<std::string> secure_schemes = {
+ kHttpsScheme,
+ kWssScheme,
+ kDataScheme,
+ kAboutScheme,
+ };
+
+ // Schemes that normal pages cannot link to or access (i.e., with the same
+ // security rules as those applied to "file" URLs).
+ std::vector<std::string> local_schemes = {
+ kFileScheme,
+ };
+
+ // Schemes that cause pages loaded with them to not have access to pages
+ // loaded with any other URL scheme.
+ std::vector<std::string> no_access_schemes = {
+ kAboutScheme,
+ kJavaScriptScheme,
+ kDataScheme,
+ };
+
+ // Schemes that can be sent CORS requests.
+ std::vector<std::string> cors_enabled_schemes = {
+ kHttpsScheme,
+ kHttpScheme,
+ kDataScheme,
+ };
+
+ // Schemes that can be used by web to store data (local storage, etc).
+ std::vector<std::string> web_storage_schemes = {
+ kHttpsScheme, kHttpScheme, kFileScheme, kFtpScheme, kWssScheme, kWsScheme,
+ };
+
+ // Schemes that can bypass the Content-Security-Policy (CSP) checks.
+ std::vector<std::string> csp_bypassing_schemes = {};
+
+ // Schemes that are strictly empty documents, allowing them to commit
+ // synchronously.
+ std::vector<std::string> empty_document_schemes = {
+ kAboutScheme,
+ };
+
+ // Schemes with a predefined default custom handler.
+ std::vector<SchemeWithHandler> predefined_handler_schemes;
+
+ bool allow_non_standard_schemes = false;
+};
+
+// See the LockSchemeRegistries declaration in the header.
+bool scheme_registries_locked = false;
+
+// Ensure that the schemes aren't modified after first use.
+static std::atomic<bool> g_scheme_registries_used{false};
+
+// Gets the scheme registry without locking the schemes. This should *only* be
+// used for adding schemes to the registry.
+SchemeRegistry* GetSchemeRegistryWithoutLocking() {
+ static base::NoDestructor<SchemeRegistry> registry;
+ return registry.get();
+}
+
+const SchemeRegistry& GetSchemeRegistry() {
+#if DCHECK_IS_ON()
+ g_scheme_registries_used.store(true);
+#endif
+ return *GetSchemeRegistryWithoutLocking();
+}
+
// Pass this enum through for methods which would like to know if whitespace
// removal is necessary.
enum WhitespaceRemovalPolicy {
@@ -29,84 +147,6 @@
DO_NOT_REMOVE_WHITESPACE,
};
-const SchemeWithType kStandardURLSchemes[] = {
- {kHttpsScheme, SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION},
- {kHttpScheme, SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION},
- // Yes, file URLs can have a hostname, so file URLs should be handled as
- // "standard". File URLs never have a port as specified by the SchemeType
- // field. Unlike other SCHEME_WITH_HOST schemes, the 'host' in a file
- // URL may be empty, a behavior which is special-cased during
- // canonicalization.
- {kFileScheme, SCHEME_WITH_HOST},
- {kFtpScheme, SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION},
- {kGopherScheme, SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION},
- {kWssScheme,
- SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION}, // WebSocket secure.
- {kWsScheme, SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION}, // WebSocket.
- {kFileSystemScheme, SCHEME_WITHOUT_AUTHORITY},
-};
-
-const SchemeWithType kReferrerURLSchemes[] = {
- {kHttpsScheme, SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION},
- {kHttpScheme, SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION},
-};
-
-const char* kSecureSchemes[] = {
- kHttpsScheme,
- kAboutScheme,
- kDataScheme,
- kWssScheme,
-};
-
-const char* kLocalSchemes[] = {
- kFileScheme,
-};
-
-const char* kNoAccessSchemes[] = {
- kAboutScheme,
- kJavaScriptScheme,
- kDataScheme,
-};
-
-const char* kCORSEnabledSchemes[] = {
- kHttpsScheme,
- kHttpScheme,
- kDataScheme,
-};
-
-const char* kWebStorageSchemes[] = {
- kHttpsScheme,
- kHttpScheme,
- kFileScheme,
- kFtpScheme,
- kWssScheme,
- kWsScheme,
-};
-
-const char* kEmptyDocumentSchemes[] = {
- kAboutScheme,
-};
-
-bool initialized = false;
-
-// Lists of the currently installed standard and referrer schemes. These lists
-// are lazily initialized by Initialize and are leaked on shutdown to prevent
-// any destructors from being called that will slow us down or cause problems.
-std::vector<SchemeWithType>* standard_schemes = nullptr;
-std::vector<SchemeWithType>* referrer_schemes = nullptr;
-
-// Similar to above, initialized by the Init*Schemes methods.
-std::vector<std::string>* secure_schemes = nullptr;
-std::vector<std::string>* local_schemes = nullptr;
-std::vector<std::string>* no_access_schemes = nullptr;
-std::vector<std::string>* cors_enabled_schemes = nullptr;
-std::vector<std::string>* web_storage_schemes = nullptr;
-std::vector<std::string>* csp_bypassing_schemes = nullptr;
-std::vector<std::string>* empty_document_schemes = nullptr;
-
-// See the LockSchemeRegistries declaration in the header.
-bool scheme_registries_locked = false;
-
// This template converts a given character type to the corresponding
// StringPiece type.
template<typename CHAR> struct CharToStringPiece {
@@ -114,39 +154,22 @@
template<> struct CharToStringPiece<char> {
typedef base::StringPiece Piece;
};
-template<> struct CharToStringPiece<base::char16> {
+template <>
+struct CharToStringPiece<char16_t> {
typedef base::StringPiece16 Piece;
};
-void InitSchemes(std::vector<std::string>** schemes,
- const char** initial_schemes,
- size_t size) {
- *schemes = new std::vector<std::string>(size);
- for (size_t i = 0; i < size; i++) {
- (*(*schemes))[i] = initial_schemes[i];
- }
-}
-
-void InitSchemesWithType(std::vector<SchemeWithType>** schemes,
- const SchemeWithType* initial_schemes,
- size_t size) {
- *schemes = new std::vector<SchemeWithType>(size);
- for (size_t i = 0; i < size; i++) {
- (*(*schemes))[i] = initial_schemes[i];
- }
-}
-
// Given a string and a range inside the string, compares it to the given
// lower-case |compare_to| buffer.
template<typename CHAR>
inline bool DoCompareSchemeComponent(const CHAR* spec,
const Component& component,
const char* compare_to) {
- if (!component.is_nonempty())
+ if (component.is_empty())
return compare_to[0] == 0; // When component is empty, match empty scheme.
- return base::LowerCaseEqualsASCII(
- typename CharToStringPiece<CHAR>::Piece(
- &spec[component.begin], component.len),
+ return base::EqualsCaseInsensitiveASCII(
+ typename CharToStringPiece<CHAR>::Piece(&spec[component.begin],
+ component.len),
compare_to);
}
@@ -157,13 +180,14 @@
const Component& scheme,
SchemeType* type,
const std::vector<SchemeWithType>& schemes) {
- if (!scheme.is_nonempty())
+ if (scheme.is_empty())
return false; // Empty or invalid schemes are non-standard.
for (const SchemeWithType& scheme_with_type : schemes) {
- if (base::LowerCaseEqualsASCII(typename CharToStringPiece<CHAR>::Piece(
- &spec[scheme.begin], scheme.len),
- scheme_with_type.scheme)) {
+ if (base::EqualsCaseInsensitiveASCII(
+ typename CharToStringPiece<CHAR>::Piece(&spec[scheme.begin],
+ scheme.len),
+ scheme_with_type.scheme)) {
*type = scheme_with_type.type;
return true;
}
@@ -173,8 +197,8 @@
template<typename CHAR>
bool DoIsStandard(const CHAR* spec, const Component& scheme, SchemeType* type) {
- Initialize();
- return DoIsInSchemes(spec, scheme, type, *standard_schemes);
+ return DoIsInSchemes(spec, scheme, type,
+ GetSchemeRegistry().standard_schemes);
}
@@ -185,7 +209,7 @@
Component* found_scheme) {
// Before extracting scheme, canonicalize the URL to remove any whitespace.
// This matches the canonicalization done in DoCanonicalize function.
- RawCanonOutputT<CHAR> whitespace_buffer;
+ STACK_UNINITIALIZED RawCanonOutputT<CHAR> whitespace_buffer;
int spec_len;
const CHAR* spec =
RemoveURLWhitespace(str, str_len, &whitespace_buffer, &spec_len, nullptr);
@@ -210,18 +234,24 @@
CharsetConverter* charset_converter,
CanonOutput* output,
Parsed* output_parsed) {
+ // Trim leading C0 control characters and spaces.
+ int begin = 0;
+ TrimURL(spec, &begin, &spec_len, trim_path_end);
+ DCHECK(0 <= begin && begin <= spec_len);
+ spec += begin;
+ spec_len -= begin;
+
output->ReserveSizeIfNeeded(spec_len);
// Remove any whitespace from the middle of the relative URL if necessary.
// Possibly this will result in copying to the new buffer.
- RawCanonOutputT<CHAR> whitespace_buffer;
+ STACK_UNINITIALIZED RawCanonOutputT<CHAR> whitespace_buffer;
if (whitespace_policy == REMOVE_WHITESPACE) {
spec = RemoveURLWhitespace(spec, spec_len, &whitespace_buffer, &spec_len,
&output_parsed->potentially_dangling_markup);
}
Parsed parsed_input;
-#ifndef STARBOARD
#ifdef WIN32
// For Windows, we allow things that look like absolute Windows paths to be
// fixed up magically to file URLs. This is done for IE compatibility. For
@@ -240,7 +270,6 @@
output, output_parsed);
}
#endif
-#endif
Component scheme;
if (!ExtractScheme(spec, spec_len, &scheme))
@@ -295,7 +324,7 @@
Parsed* output_parsed) {
// Remove any whitespace from the middle of the relative URL, possibly
// copying to the new buffer.
- RawCanonOutputT<CHAR> whitespace_buffer;
+ STACK_UNINITIALIZED RawCanonOutputT<CHAR> whitespace_buffer;
int relative_length;
const CHAR* relative = RemoveURLWhitespace(
in_relative, in_relative_length, &whitespace_buffer, &relative_length,
@@ -336,7 +365,7 @@
Parsed base_parsed_authority;
ParseStandardURL(base_spec, base_spec_len, &base_parsed_authority);
if (base_parsed_authority.host.is_nonempty()) {
- RawCanonOutputT<char> temporary_output;
+ STACK_UNINITIALIZED RawCanonOutputT<char> temporary_output;
bool did_resolve_succeed =
ResolveRelativeURL(base_spec, base_parsed_authority, false, relative,
relative_component, charset_converter,
@@ -388,7 +417,7 @@
if (replacements.IsSchemeOverridden()) {
// Canonicalize the new scheme so it is 8-bit and can be concatenated with
// the existing spec.
- RawCanonOutput<128> scheme_replaced;
+ STACK_UNINITIALIZED RawCanonOutput<128> scheme_replaced;
Component scheme_replaced_parsed;
CanonicalizeScheme(replacements.sources().scheme,
replacements.components().scheme,
@@ -405,7 +434,7 @@
// We now need to completely re-parse the resulting string since its meaning
// may have changed with the different scheme.
- RawCanonOutput<128> recanonicalized;
+ STACK_UNINITIALIZED RawCanonOutput<128> recanonicalized;
Parsed recanonicalized_parsed;
DoCanonicalize(scheme_replaced.data(), scheme_replaced.length(), true,
REMOVE_WHITESPACE, charset_converter, &recanonicalized,
@@ -426,6 +455,13 @@
// ref).
Replacements<CHAR> replacements_no_scheme = replacements;
replacements_no_scheme.SetScheme(NULL, Component());
+ // If the input URL has potentially dangling markup, set the flag on the
+ // output too. Note that in some cases the replacement gets rid of the
+ // potentially dangling markup, but this ok since the check will fail
+ // closed.
+ if (parsed.potentially_dangling_markup) {
+ out_parsed->potentially_dangling_markup = true;
+ }
return DoReplaceComponents(recanonicalized.data(), recanonicalized.length(),
recanonicalized_parsed, replacements_no_scheme,
charset_converter, output, out_parsed);
@@ -460,8 +496,16 @@
return ReplacePathURL(spec, parsed, replacements, output, out_parsed);
}
-void DoAddScheme(const char* new_scheme, std::vector<std::string>* schemes) {
- DCHECK(schemes);
+void DoSchemeModificationPreamble() {
+ // If this assert triggers, it means you've called Add*Scheme after
+ // the SchemeRegistry has been used.
+ //
+ // This normally means you're trying to set up a new scheme too late or using
+ // the SchemeRegistry too early in your application's init process.
+ DCHECK(!g_scheme_registries_used.load())
+ << "Trying to add a scheme after the lists have been used. "
+ "Make sure that you haven't added any static GURL initializers in tests.";
+
// If this assert triggers, it means you've called Add*Scheme after
// LockSchemeRegistries has been called (see the header file for
// LockSchemeRegistries for more).
@@ -471,169 +515,178 @@
// and calls LockSchemeRegistries, and add your new scheme there.
DCHECK(!scheme_registries_locked)
<< "Trying to add a scheme after the lists have been locked.";
+}
- size_t scheme_len = strlen(new_scheme);
- if (scheme_len == 0)
- return;
-
+void DoAddSchemeWithHandler(const char* new_scheme,
+ const char* handler,
+ std::vector<SchemeWithHandler>* schemes) {
+ DoSchemeModificationPreamble();
+ DCHECK(schemes);
+ DCHECK(strlen(new_scheme) > 0);
+ DCHECK(strlen(handler) > 0);
DCHECK_EQ(base::ToLowerASCII(new_scheme), new_scheme);
- schemes->push_back(std::string(new_scheme));
+ DCHECK(!base::Contains(*schemes, new_scheme, &SchemeWithHandler::scheme));
+ schemes->push_back({new_scheme, handler});
+}
+
+void DoAddScheme(const char* new_scheme, std::vector<std::string>* schemes) {
+ DoSchemeModificationPreamble();
+ DCHECK(schemes);
+ DCHECK(strlen(new_scheme) > 0);
+ DCHECK_EQ(base::ToLowerASCII(new_scheme), new_scheme);
+ DCHECK(!base::Contains(*schemes, new_scheme));
+ schemes->push_back(new_scheme);
}
void DoAddSchemeWithType(const char* new_scheme,
SchemeType type,
std::vector<SchemeWithType>* schemes) {
+ DoSchemeModificationPreamble();
DCHECK(schemes);
- // If this assert triggers, it means you've called Add*Scheme after
- // LockSchemeRegistries has been called (see the header file for
- // LockSchemeRegistries for more).
- //
- // This normally means you're trying to set up a new scheme too late in your
- // application's init process. Locate where your app does this initialization
- // and calls LockSchemeRegistries, and add your new scheme there.
- DCHECK(!scheme_registries_locked)
- << "Trying to add a scheme after the lists have been locked.";
-
- size_t scheme_len = strlen(new_scheme);
- if (scheme_len == 0)
- return;
-
+ DCHECK(strlen(new_scheme) > 0);
DCHECK_EQ(base::ToLowerASCII(new_scheme), new_scheme);
- // Duplicate the scheme into a new buffer and add it to the list of standard
- // schemes. This pointer will be leaked on shutdown.
- char* dup_scheme = new char[scheme_len + 1];
- ANNOTATE_LEAKING_OBJECT_PTR(dup_scheme);
- memcpy(dup_scheme, new_scheme, scheme_len + 1);
-
- SchemeWithType scheme_with_type;
- scheme_with_type.scheme = dup_scheme;
- scheme_with_type.type = type;
- schemes->push_back(scheme_with_type);
+ DCHECK(!base::Contains(*schemes, new_scheme, &SchemeWithType::scheme));
+ schemes->push_back({new_scheme, type});
}
} // namespace
-void Initialize() {
- if (initialized)
- return;
- InitSchemesWithType(&standard_schemes, kStandardURLSchemes,
- arraysize(kStandardURLSchemes));
- InitSchemesWithType(&referrer_schemes, kReferrerURLSchemes,
- arraysize(kReferrerURLSchemes));
- InitSchemes(&secure_schemes, kSecureSchemes, arraysize(kSecureSchemes));
- InitSchemes(&local_schemes, kLocalSchemes, arraysize(kLocalSchemes));
- InitSchemes(&no_access_schemes, kNoAccessSchemes,
- arraysize(kNoAccessSchemes));
- InitSchemes(&cors_enabled_schemes, kCORSEnabledSchemes,
- arraysize(kCORSEnabledSchemes));
- InitSchemes(&web_storage_schemes, kWebStorageSchemes,
- arraysize(kWebStorageSchemes));
- InitSchemes(&csp_bypassing_schemes, nullptr, 0);
- InitSchemes(&empty_document_schemes, kEmptyDocumentSchemes,
- arraysize(kEmptyDocumentSchemes));
- initialized = true;
+void ClearSchemesForTests() {
+ DCHECK(!g_scheme_registries_used.load())
+ << "Schemes already used "
+ << "(use ScopedSchemeRegistryForTests to relax for tests).";
+ DCHECK(!scheme_registries_locked)
+ << "Schemes already locked "
+ << "(use ScopedSchemeRegistryForTests to relax for tests).";
+ *GetSchemeRegistryWithoutLocking() = SchemeRegistry();
}
-void Shutdown() {
- initialized = false;
- delete standard_schemes;
- standard_schemes = nullptr;
- delete referrer_schemes;
- referrer_schemes = nullptr;
- delete secure_schemes;
- secure_schemes = nullptr;
- delete local_schemes;
- local_schemes = nullptr;
- delete no_access_schemes;
- no_access_schemes = nullptr;
- delete cors_enabled_schemes;
- cors_enabled_schemes = nullptr;
- delete web_storage_schemes;
- web_storage_schemes = nullptr;
- delete csp_bypassing_schemes;
- csp_bypassing_schemes = nullptr;
- delete empty_document_schemes;
- empty_document_schemes = nullptr;
+class ScopedSchemeRegistryInternal {
+ public:
+ ScopedSchemeRegistryInternal()
+ : registry_(std::make_unique<SchemeRegistry>(
+ *GetSchemeRegistryWithoutLocking())) {
+ g_scheme_registries_used.store(false);
+ scheme_registries_locked = false;
+ }
+ ~ScopedSchemeRegistryInternal() {
+ *GetSchemeRegistryWithoutLocking() = *registry_;
+ g_scheme_registries_used.store(true);
+ scheme_registries_locked = true;
+ }
+
+ private:
+ std::unique_ptr<SchemeRegistry> registry_;
+};
+
+ScopedSchemeRegistryForTests::ScopedSchemeRegistryForTests()
+ : internal_(std::make_unique<ScopedSchemeRegistryInternal>()) {}
+
+ScopedSchemeRegistryForTests::~ScopedSchemeRegistryForTests() = default;
+
+void EnableNonStandardSchemesForAndroidWebView() {
+ DoSchemeModificationPreamble();
+ GetSchemeRegistryWithoutLocking()->allow_non_standard_schemes = true;
+}
+
+bool AllowNonStandardSchemesForAndroidWebView() {
+ return GetSchemeRegistry().allow_non_standard_schemes;
}
void AddStandardScheme(const char* new_scheme, SchemeType type) {
- Initialize();
- DoAddSchemeWithType(new_scheme, type, standard_schemes);
+ DoAddSchemeWithType(new_scheme, type,
+ &GetSchemeRegistryWithoutLocking()->standard_schemes);
+}
+
+std::vector<std::string> GetStandardSchemes() {
+ std::vector<std::string> result;
+ result.reserve(GetSchemeRegistry().standard_schemes.size());
+ for (const auto& entry : GetSchemeRegistry().standard_schemes) {
+ result.push_back(entry.scheme);
+ }
+ return result;
}
void AddReferrerScheme(const char* new_scheme, SchemeType type) {
- Initialize();
- DoAddSchemeWithType(new_scheme, type, referrer_schemes);
+ DoAddSchemeWithType(new_scheme, type,
+ &GetSchemeRegistryWithoutLocking()->referrer_schemes);
}
void AddSecureScheme(const char* new_scheme) {
- Initialize();
- DoAddScheme(new_scheme, secure_schemes);
+ DoAddScheme(new_scheme, &GetSchemeRegistryWithoutLocking()->secure_schemes);
}
const std::vector<std::string>& GetSecureSchemes() {
- Initialize();
- return *secure_schemes;
+ return GetSchemeRegistry().secure_schemes;
}
void AddLocalScheme(const char* new_scheme) {
- Initialize();
- DoAddScheme(new_scheme, local_schemes);
+ DoAddScheme(new_scheme, &GetSchemeRegistryWithoutLocking()->local_schemes);
}
const std::vector<std::string>& GetLocalSchemes() {
- Initialize();
- return *local_schemes;
+ return GetSchemeRegistry().local_schemes;
}
void AddNoAccessScheme(const char* new_scheme) {
- Initialize();
- DoAddScheme(new_scheme, no_access_schemes);
+ DoAddScheme(new_scheme,
+ &GetSchemeRegistryWithoutLocking()->no_access_schemes);
}
const std::vector<std::string>& GetNoAccessSchemes() {
- Initialize();
- return *no_access_schemes;
+ return GetSchemeRegistry().no_access_schemes;
}
-void AddCORSEnabledScheme(const char* new_scheme) {
- Initialize();
- DoAddScheme(new_scheme, cors_enabled_schemes);
+void AddCorsEnabledScheme(const char* new_scheme) {
+ DoAddScheme(new_scheme,
+ &GetSchemeRegistryWithoutLocking()->cors_enabled_schemes);
}
-const std::vector<std::string>& GetCORSEnabledSchemes() {
- Initialize();
- return *cors_enabled_schemes;
+const std::vector<std::string>& GetCorsEnabledSchemes() {
+ return GetSchemeRegistry().cors_enabled_schemes;
}
void AddWebStorageScheme(const char* new_scheme) {
- Initialize();
- DoAddScheme(new_scheme, web_storage_schemes);
+ DoAddScheme(new_scheme,
+ &GetSchemeRegistryWithoutLocking()->web_storage_schemes);
}
const std::vector<std::string>& GetWebStorageSchemes() {
- Initialize();
- return *web_storage_schemes;
+ return GetSchemeRegistry().web_storage_schemes;
}
void AddCSPBypassingScheme(const char* new_scheme) {
- Initialize();
- DoAddScheme(new_scheme, csp_bypassing_schemes);
+ DoAddScheme(new_scheme,
+ &GetSchemeRegistryWithoutLocking()->csp_bypassing_schemes);
}
const std::vector<std::string>& GetCSPBypassingSchemes() {
- Initialize();
- return *csp_bypassing_schemes;
+ return GetSchemeRegistry().csp_bypassing_schemes;
}
void AddEmptyDocumentScheme(const char* new_scheme) {
- Initialize();
- DoAddScheme(new_scheme, empty_document_schemes);
+ DoAddScheme(new_scheme,
+ &GetSchemeRegistryWithoutLocking()->empty_document_schemes);
}
const std::vector<std::string>& GetEmptyDocumentSchemes() {
- Initialize();
- return *empty_document_schemes;
+ return GetSchemeRegistry().empty_document_schemes;
+}
+
+void AddPredefinedHandlerScheme(const char* new_scheme, const char* handler) {
+ DoAddSchemeWithHandler(
+ new_scheme, handler,
+ &GetSchemeRegistryWithoutLocking()->predefined_handler_schemes);
+}
+
+std::vector<std::pair<std::string, std::string>> GetPredefinedHandlerSchemes() {
+ std::vector<std::pair<std::string, std::string>> result;
+ result.reserve(GetSchemeRegistry().predefined_handler_schemes.size());
+ for (const SchemeWithHandler& entry :
+ GetSchemeRegistry().predefined_handler_schemes) {
+ result.emplace_back(entry.scheme, entry.handler);
+ }
+ return result;
}
void LockSchemeRegistries() {
@@ -651,21 +704,21 @@
return DoIsStandard(spec, scheme, type);
}
-bool GetStandardSchemeType(const base::char16* spec,
+bool GetStandardSchemeType(const char16_t* spec,
const Component& scheme,
SchemeType* type) {
return DoIsStandard(spec, scheme, type);
}
-bool IsStandard(const base::char16* spec, const Component& scheme) {
+bool IsStandard(const char16_t* spec, const Component& scheme) {
SchemeType unused_scheme_type;
return DoIsStandard(spec, scheme, &unused_scheme_type);
}
bool IsReferrerScheme(const char* spec, const Component& scheme) {
- Initialize();
SchemeType unused_scheme_type;
- return DoIsInSchemes(spec, scheme, &unused_scheme_type, *referrer_schemes);
+ return DoIsInSchemes(spec, scheme, &unused_scheme_type,
+ GetSchemeRegistry().referrer_schemes);
}
bool FindAndCompareScheme(const char* str,
@@ -675,7 +728,7 @@
return DoFindAndCompareScheme(str, str_len, compare, found_scheme);
}
-bool FindAndCompareScheme(const base::char16* str,
+bool FindAndCompareScheme(const char16_t* str,
int str_len,
const char* compare,
Component* found_scheme) {
@@ -719,7 +772,7 @@
}
bool HostIsIPAddress(base::StringPiece host) {
- url::RawCanonOutputT<char, 128> ignored_output;
+ STACK_UNINITIALIZED url::RawCanonOutputT<char, 128> ignored_output;
url::CanonHostInfo host_info;
url::CanonicalizeIPAddress(host.data(), Component(0, host.length()),
&ignored_output, &host_info);
@@ -736,7 +789,7 @@
charset_converter, output, output_parsed);
}
-bool Canonicalize(const base::char16* spec,
+bool Canonicalize(const char16_t* spec,
int spec_len,
bool trim_path_end,
CharsetConverter* charset_converter,
@@ -762,7 +815,7 @@
bool ResolveRelative(const char* base_spec,
int base_spec_len,
const Parsed& base_parsed,
- const base::char16* relative,
+ const char16_t* relative,
int relative_length,
CharsetConverter* charset_converter,
CanonOutput* output,
@@ -786,7 +839,7 @@
bool ReplaceComponents(const char* spec,
int spec_len,
const Parsed& parsed,
- const Replacements<base::char16>& replacements,
+ const Replacements<char16_t>& replacements,
CharsetConverter* charset_converter,
CanonOutput* output,
Parsed* out_parsed) {
@@ -794,14 +847,19 @@
charset_converter, output, out_parsed);
}
-DecodeURLResult DecodeURLEscapeSequences(const char* input,
- int length,
- CanonOutputW* output) {
- RawCanonOutputT<char> unescaped_chars;
- for (int i = 0; i < length; i++) {
+void DecodeURLEscapeSequences(const char* input,
+ int length,
+ DecodeURLMode mode,
+ CanonOutputW* output) {
+ if (length <= 0)
+ return;
+
+ STACK_UNINITIALIZED RawCanonOutputT<char> unescaped_chars;
+ size_t length_size_t = static_cast<size_t>(length);
+ for (size_t i = 0; i < length_size_t; i++) {
if (input[i] == '%') {
unsigned char ch;
- if (DecodeEscaped(input, &i, length, &ch)) {
+ if (DecodeEscaped(input, &i, length_size_t, &ch)) {
unescaped_chars.push_back(ch);
} else {
// Invalid escape sequence, copy the percent literal.
@@ -814,11 +872,10 @@
}
int output_initial_length = output->length();
- bool did_utf8_decode = false;
- bool did_isomorphic_decode = false;
// Convert that 8-bit to UTF-16. It's not clear IE does this at all to
// JavaScript URLs, but Firefox and Safari do.
- for (int i = 0; i < unescaped_chars.length(); i++) {
+ size_t unescaped_length = unescaped_chars.length();
+ for (size_t i = 0; i < unescaped_length; i++) {
unsigned char uch = static_cast<unsigned char>(unescaped_chars.at(i));
if (uch < 0x80) {
// Non-UTF-8, just append directly
@@ -826,35 +883,29 @@
} else {
// next_ch will point to the last character of the decoded
// character.
- int next_character = i;
- unsigned code_point;
- if (ReadUTFChar(unescaped_chars.data(), &next_character,
- unescaped_chars.length(), &code_point)) {
+ size_t next_character = i;
+ base_icu::UChar32 code_point;
+ if (ReadUTFChar(unescaped_chars.data(), &next_character, unescaped_length,
+ &code_point)) {
// Valid UTF-8 character, convert to UTF-16.
AppendUTF16Value(code_point, output);
i = next_character;
- did_utf8_decode = true;
+ } else if (mode == DecodeURLMode::kUTF8) {
+ DCHECK_EQ(code_point, 0xFFFD);
+ AppendUTF16Value(code_point, output);
+ i = next_character;
} else {
// If there are any sequences that are not valid UTF-8, we
// revert |output| changes, and promote any bytes to UTF-16. We
// copy all characters from the beginning to the end of the
// identified sequence.
output->set_length(output_initial_length);
- did_utf8_decode = false;
- for (int j = 0; j < unescaped_chars.length(); ++j)
+ for (size_t j = 0; j < unescaped_chars.length(); ++j)
output->push_back(static_cast<unsigned char>(unescaped_chars.at(j)));
- did_isomorphic_decode = true;
break;
}
}
}
-
- DCHECK(!(did_utf8_decode && did_isomorphic_decode));
- if (did_isomorphic_decode)
- return DecodeURLResult::kIsomorphic;
- if (did_utf8_decode)
- return DecodeURLResult::kUTF8;
- return DecodeURLResult::kAsciiOnly;
}
void EncodeURIComponent(const char* input, int length, CanonOutput* output) {
@@ -873,7 +924,7 @@
return DoCompareSchemeComponent(spec, component, compare_to);
}
-bool CompareSchemeComponent(const base::char16* spec,
+bool CompareSchemeComponent(const char16_t* spec,
const Component& component,
const char* compare_to) {
return DoCompareSchemeComponent(spec, component, compare_to);
diff --git a/url/url_util.h b/url/url_util.h
index 32e7f0d..670552a 100644
--- a/url/url_util.h
+++ b/url/url_util.h
@@ -1,105 +1,129 @@
-// Copyright 2013 The Chromium Authors. All rights reserved.
+// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef URL_URL_UTIL_H_
#define URL_URL_UTIL_H_
+#include <memory>
#include <string>
#include <vector>
-#include "base/strings/string16.h"
+#include "base/component_export.h"
#include "base/strings/string_piece.h"
#include "url/third_party/mozilla/url_parse.h"
#include "url/url_canon.h"
#include "url/url_constants.h"
-#include "url/url_export.h"
namespace url {
// Init ------------------------------------------------------------------------
-// Initialization is NOT required, it will be implicitly initialized when first
-// used. However, this implicit initialization is NOT threadsafe. If you are
-// using this library in a threaded environment and don't have a consistent
-// "first call" (an example might be calling Add*Scheme with your special
-// application-specific schemes) then you will want to call initialize before
-// spawning any threads.
-//
-// It is OK to call this function more than once, subsequent calls will be
-// no-ops, unless Shutdown was called in the mean time. This will also be a
-// no-op if other calls to the library have forced an initialization beforehand.
-URL_EXPORT void Initialize();
+// Used for tests that need to reset schemes. Note that this can only be used
+// in conjunction with ScopedSchemeRegistryForTests.
+COMPONENT_EXPORT(URL) void ClearSchemesForTests();
-// Cleanup is not required, except some strings may leak. For most user
-// applications, this is fine. If you're using it in a library that may get
-// loaded and unloaded, you'll want to unload to properly clean up your
-// library.
-URL_EXPORT void Shutdown();
+class ScopedSchemeRegistryInternal;
+
+// Stores the SchemeRegistry upon creation, allowing tests to modify a copy of
+// it, and restores the original SchemeRegistry when deleted.
+class COMPONENT_EXPORT(URL) ScopedSchemeRegistryForTests {
+ public:
+ ScopedSchemeRegistryForTests();
+ ~ScopedSchemeRegistryForTests();
+
+ private:
+ std::unique_ptr<ScopedSchemeRegistryInternal> internal_;
+};
// Schemes ---------------------------------------------------------------------
-// A pair for representing a standard scheme name and the SchemeType for it.
-struct URL_EXPORT SchemeWithType {
- const char* scheme;
- SchemeType type;
-};
+// Changes the behavior of SchemeHostPort / Origin to allow non-standard schemes
+// to be specified, instead of canonicalizing them to an invalid SchemeHostPort
+// or opaque Origin, respectively. This is used for Android WebView backwards
+// compatibility, which allows the use of custom schemes: content hosted in
+// Android WebView assumes that one URL with a non-standard scheme will be
+// same-origin to another URL with the same non-standard scheme.
+//
+// Not thread-safe.
+COMPONENT_EXPORT(URL) void EnableNonStandardSchemesForAndroidWebView();
+
+// Whether or not SchemeHostPort and Origin allow non-standard schemes.
+COMPONENT_EXPORT(URL) bool AllowNonStandardSchemesForAndroidWebView();
// The following Add*Scheme method are not threadsafe and can not be called
// concurrently with any other url_util function. They will assert if the lists
-// of schemes have been locked (see LockSchemeRegistries).
+// of schemes have been locked (see LockSchemeRegistries), or used.
// Adds an application-defined scheme to the internal list of "standard-format"
// URL schemes. A standard-format scheme adheres to what RFC 3986 calls "generic
// URI syntax" (https://tools.ietf.org/html/rfc3986#section-3).
-URL_EXPORT void AddStandardScheme(const char* new_scheme,
- SchemeType scheme_type);
+COMPONENT_EXPORT(URL)
+void AddStandardScheme(const char* new_scheme, SchemeType scheme_type);
+
+// Returns the list of schemes registered for "standard" URLs. Note, this
+// should not be used if you just need to check if your protocol is standard
+// or not. Instead use the IsStandard() function above as its much more
+// efficient. This function should only be used where you need to perform
+// other operations against the standard scheme list.
+COMPONENT_EXPORT(URL)
+std::vector<std::string> GetStandardSchemes();
// Adds an application-defined scheme to the internal list of schemes allowed
// for referrers.
-URL_EXPORT void AddReferrerScheme(const char* new_scheme,
- SchemeType scheme_type);
+COMPONENT_EXPORT(URL)
+void AddReferrerScheme(const char* new_scheme, SchemeType scheme_type);
// Adds an application-defined scheme to the list of schemes that do not trigger
// mixed content warnings.
-URL_EXPORT void AddSecureScheme(const char* new_scheme);
-URL_EXPORT const std::vector<std::string>& GetSecureSchemes();
+COMPONENT_EXPORT(URL) void AddSecureScheme(const char* new_scheme);
+COMPONENT_EXPORT(URL) const std::vector<std::string>& GetSecureSchemes();
// Adds an application-defined scheme to the list of schemes that normal pages
// cannot link to or access (i.e., with the same security rules as those applied
// to "file" URLs).
-URL_EXPORT void AddLocalScheme(const char* new_scheme);
-URL_EXPORT const std::vector<std::string>& GetLocalSchemes();
+COMPONENT_EXPORT(URL) void AddLocalScheme(const char* new_scheme);
+COMPONENT_EXPORT(URL) const std::vector<std::string>& GetLocalSchemes();
// Adds an application-defined scheme to the list of schemes that cause pages
// loaded with them to not have access to pages loaded with any other URL
// scheme.
-URL_EXPORT void AddNoAccessScheme(const char* new_scheme);
-URL_EXPORT const std::vector<std::string>& GetNoAccessSchemes();
+COMPONENT_EXPORT(URL) void AddNoAccessScheme(const char* new_scheme);
+COMPONENT_EXPORT(URL) const std::vector<std::string>& GetNoAccessSchemes();
// Adds an application-defined scheme to the list of schemes that can be sent
// CORS requests.
-URL_EXPORT void AddCORSEnabledScheme(const char* new_scheme);
-URL_EXPORT const std::vector<std::string>& GetCORSEnabledSchemes();
+COMPONENT_EXPORT(URL) void AddCorsEnabledScheme(const char* new_scheme);
+COMPONENT_EXPORT(URL) const std::vector<std::string>& GetCorsEnabledSchemes();
// Adds an application-defined scheme to the list of web schemes that can be
// used by web to store data (e.g. cookies, local storage, ...). This is
// to differentiate them from schemes that can store data but are not used on
// web (e.g. application's internal schemes) or schemes that are used on web but
// cannot store data.
-URL_EXPORT void AddWebStorageScheme(const char* new_scheme);
-URL_EXPORT const std::vector<std::string>& GetWebStorageSchemes();
+COMPONENT_EXPORT(URL) void AddWebStorageScheme(const char* new_scheme);
+COMPONENT_EXPORT(URL) const std::vector<std::string>& GetWebStorageSchemes();
// Adds an application-defined scheme to the list of schemes that can bypass the
-// Content-Security-Policy(CSP) checks.
-URL_EXPORT void AddCSPBypassingScheme(const char* new_scheme);
-URL_EXPORT const std::vector<std::string>& GetCSPBypassingSchemes();
+// Content-Security-Policy (CSP) checks.
+COMPONENT_EXPORT(URL) void AddCSPBypassingScheme(const char* new_scheme);
+COMPONENT_EXPORT(URL) const std::vector<std::string>& GetCSPBypassingSchemes();
// Adds an application-defined scheme to the list of schemes that are strictly
// empty documents, allowing them to commit synchronously.
-URL_EXPORT void AddEmptyDocumentScheme(const char* new_scheme);
-URL_EXPORT const std::vector<std::string>& GetEmptyDocumentSchemes();
+COMPONENT_EXPORT(URL) void AddEmptyDocumentScheme(const char* new_scheme);
+COMPONENT_EXPORT(URL) const std::vector<std::string>& GetEmptyDocumentSchemes();
+
+// Adds a scheme with a predefined default handler.
+//
+// This pair of strings must be normalized protocol handler parameters as
+// described in the Custom Handler specification.
+// https://html.spec.whatwg.org/multipage/system-state.html#normalize-protocol-handler-parameters
+COMPONENT_EXPORT(URL)
+void AddPredefinedHandlerScheme(const char* new_scheme, const char* handler);
+COMPONENT_EXPORT(URL)
+std::vector<std::pair<std::string, std::string>> GetPredefinedHandlerSchemes();
// Sets a flag to prevent future calls to Add*Scheme from succeeding.
//
@@ -113,7 +137,7 @@
// We could have had Add*Scheme use a lock instead, but that would add
// some platform-specific dependencies we don't otherwise have now, and is
// overkill considering the normal usage is so simple.
-URL_EXPORT void LockSchemeRegistries();
+COMPONENT_EXPORT(URL) void LockSchemeRegistries();
// Locates the scheme in the given string and places it into |found_scheme|,
// which may be NULL to indicate the caller does not care about the range.
@@ -121,21 +145,23 @@
// Returns whether the given |compare| scheme matches the scheme found in the
// input (if any). The |compare| scheme must be a valid canonical scheme or
// the result of the comparison is undefined.
-URL_EXPORT bool FindAndCompareScheme(const char* str,
- int str_len,
- const char* compare,
- Component* found_scheme);
-URL_EXPORT bool FindAndCompareScheme(const base::char16* str,
- int str_len,
- const char* compare,
- Component* found_scheme);
+COMPONENT_EXPORT(URL)
+bool FindAndCompareScheme(const char* str,
+ int str_len,
+ const char* compare,
+ Component* found_scheme);
+COMPONENT_EXPORT(URL)
+bool FindAndCompareScheme(const char16_t* str,
+ int str_len,
+ const char* compare,
+ Component* found_scheme);
inline bool FindAndCompareScheme(const std::string& str,
const char* compare,
Component* found_scheme) {
return FindAndCompareScheme(str.data(), static_cast<int>(str.size()),
compare, found_scheme);
}
-inline bool FindAndCompareScheme(const base::string16& str,
+inline bool FindAndCompareScheme(const std::u16string& str,
const char* compare,
Component* found_scheme) {
return FindAndCompareScheme(str.data(), static_cast<int>(str.size()),
@@ -144,22 +170,27 @@
// Returns true if the given scheme identified by |scheme| within |spec| is in
// the list of known standard-format schemes (see AddStandardScheme).
-URL_EXPORT bool IsStandard(const char* spec, const Component& scheme);
-URL_EXPORT bool IsStandard(const base::char16* spec, const Component& scheme);
+COMPONENT_EXPORT(URL)
+bool IsStandard(const char* spec, const Component& scheme);
+COMPONENT_EXPORT(URL)
+bool IsStandard(const char16_t* spec, const Component& scheme);
// Returns true if the given scheme identified by |scheme| within |spec| is in
// the list of allowed schemes for referrers (see AddReferrerScheme).
-URL_EXPORT bool IsReferrerScheme(const char* spec, const Component& scheme);
+COMPONENT_EXPORT(URL)
+bool IsReferrerScheme(const char* spec, const Component& scheme);
// Returns true and sets |type| to the SchemeType of the given scheme
// identified by |scheme| within |spec| if the scheme is in the list of known
// standard-format schemes (see AddStandardScheme).
-URL_EXPORT bool GetStandardSchemeType(const char* spec,
- const Component& scheme,
- SchemeType* type);
-URL_EXPORT bool GetStandardSchemeType(const base::char16* spec,
- const Component& scheme,
- SchemeType* type);
+COMPONENT_EXPORT(URL)
+bool GetStandardSchemeType(const char* spec,
+ const Component& scheme,
+ SchemeType* type);
+COMPONENT_EXPORT(URL)
+bool GetStandardSchemeType(const char16_t* spec,
+ const Component& scheme,
+ SchemeType* type);
// Hosts ----------------------------------------------------------------------
@@ -171,12 +202,13 @@
// If either of the input StringPieces is empty, the return value is false. The
// input domain should match host canonicalization rules. i.e. it should be
// lowercase except for escape chars.
-URL_EXPORT bool DomainIs(base::StringPiece canonical_host,
- base::StringPiece canonical_domain);
+COMPONENT_EXPORT(URL)
+bool DomainIs(base::StringPiece canonical_host,
+ base::StringPiece canonical_domain);
// Returns true if the hostname is an IP address. Note: this function isn't very
// cheap, as it must re-parse the host to verify.
-URL_EXPORT bool HostIsIPAddress(base::StringPiece host);
+COMPONENT_EXPORT(URL) bool HostIsIPAddress(base::StringPiece host);
// URL library wrappers --------------------------------------------------------
@@ -190,18 +222,20 @@
// Returns true if a valid URL was produced, false if not. On failure, the
// output and parsed structures will still be filled and will be consistent,
// but they will not represent a loadable URL.
-URL_EXPORT bool Canonicalize(const char* spec,
- int spec_len,
- bool trim_path_end,
- CharsetConverter* charset_converter,
- CanonOutput* output,
- Parsed* output_parsed);
-URL_EXPORT bool Canonicalize(const base::char16* spec,
- int spec_len,
- bool trim_path_end,
- CharsetConverter* charset_converter,
- CanonOutput* output,
- Parsed* output_parsed);
+COMPONENT_EXPORT(URL)
+bool Canonicalize(const char* spec,
+ int spec_len,
+ bool trim_path_end,
+ CharsetConverter* charset_converter,
+ CanonOutput* output,
+ Parsed* output_parsed);
+COMPONENT_EXPORT(URL)
+bool Canonicalize(const char16_t* spec,
+ int spec_len,
+ bool trim_path_end,
+ CharsetConverter* charset_converter,
+ CanonOutput* output,
+ Parsed* output_parsed);
// Resolves a potentially relative URL relative to the given parsed base URL.
// The base MUST be valid. The resulting canonical URL and parsed information
@@ -213,67 +247,67 @@
//
// Returns true if the output is valid, false if the input could not produce
// a valid URL.
-URL_EXPORT bool ResolveRelative(const char* base_spec,
- int base_spec_len,
- const Parsed& base_parsed,
- const char* relative,
- int relative_length,
- CharsetConverter* charset_converter,
- CanonOutput* output,
- Parsed* output_parsed);
-URL_EXPORT bool ResolveRelative(const char* base_spec,
- int base_spec_len,
- const Parsed& base_parsed,
- const base::char16* relative,
- int relative_length,
- CharsetConverter* charset_converter,
- CanonOutput* output,
- Parsed* output_parsed);
+COMPONENT_EXPORT(URL)
+bool ResolveRelative(const char* base_spec,
+ int base_spec_len,
+ const Parsed& base_parsed,
+ const char* relative,
+ int relative_length,
+ CharsetConverter* charset_converter,
+ CanonOutput* output,
+ Parsed* output_parsed);
+COMPONENT_EXPORT(URL)
+bool ResolveRelative(const char* base_spec,
+ int base_spec_len,
+ const Parsed& base_parsed,
+ const char16_t* relative,
+ int relative_length,
+ CharsetConverter* charset_converter,
+ CanonOutput* output,
+ Parsed* output_parsed);
// Replaces components in the given VALID input URL. The new canonical URL info
// is written to output and out_parsed.
//
// Returns true if the resulting URL is valid.
-URL_EXPORT bool ReplaceComponents(const char* spec,
- int spec_len,
- const Parsed& parsed,
- const Replacements<char>& replacements,
- CharsetConverter* charset_converter,
- CanonOutput* output,
- Parsed* out_parsed);
-URL_EXPORT bool ReplaceComponents(
- const char* spec,
- int spec_len,
- const Parsed& parsed,
- const Replacements<base::char16>& replacements,
- CharsetConverter* charset_converter,
- CanonOutput* output,
- Parsed* out_parsed);
+COMPONENT_EXPORT(URL)
+bool ReplaceComponents(const char* spec,
+ int spec_len,
+ const Parsed& parsed,
+ const Replacements<char>& replacements,
+ CharsetConverter* charset_converter,
+ CanonOutput* output,
+ Parsed* out_parsed);
+COMPONENT_EXPORT(URL)
+bool ReplaceComponents(const char* spec,
+ int spec_len,
+ const Parsed& parsed,
+ const Replacements<char16_t>& replacements,
+ CharsetConverter* charset_converter,
+ CanonOutput* output,
+ Parsed* out_parsed);
// String helper functions -----------------------------------------------------
-enum class DecodeURLResult {
- // Did not contain code points greater than 0x7F.
- kAsciiOnly,
- // Did UTF-8 decode only.
+enum class DecodeURLMode {
+ // UTF-8 decode only. Invalid byte sequences are replaced with U+FFFD.
kUTF8,
- // Did byte to Unicode mapping only.
- // https://infra.spec.whatwg.org/#isomorphic-decode
- kIsomorphic,
+ // Try UTF-8 decoding. If the input contains byte sequences invalid
+ // for UTF-8, apply byte to Unicode mapping.
+ kUTF8OrIsomorphic,
};
// Unescapes the given string using URL escaping rules.
-// This function tries to decode non-ASCII characters in UTF-8 first,
-// then in isomorphic encoding if UTF-8 decoding failed.
-URL_EXPORT DecodeURLResult DecodeURLEscapeSequences(const char* input,
- int length,
- CanonOutputW* output);
+COMPONENT_EXPORT(URL)
+void DecodeURLEscapeSequences(const char* input,
+ int length,
+ DecodeURLMode mode,
+ CanonOutputW* output);
// Escapes the given string as defined by the JS method encodeURIComponent. See
// https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects/encodeURIComponent
-URL_EXPORT void EncodeURIComponent(const char* input,
- int length,
- CanonOutput* output);
+COMPONENT_EXPORT(URL)
+void EncodeURIComponent(const char* input, int length, CanonOutput* output);
} // namespace url
diff --git a/url/url_util_internal.h b/url/url_util_internal.h
index 756c736..fe2a4d9 100644
--- a/url/url_util_internal.h
+++ b/url/url_util_internal.h
@@ -1,13 +1,10 @@
-// Copyright 2013 The Chromium Authors. All rights reserved.
+// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef URL_URL_UTIL_INTERNAL_H_
#define URL_URL_UTIL_INTERNAL_H_
-#include <string>
-
-#include "base/strings/string16.h"
#include "url/third_party/mozilla/url_parse.h"
namespace url {
@@ -17,7 +14,7 @@
bool CompareSchemeComponent(const char* spec,
const Component& component,
const char* compare_to);
-bool CompareSchemeComponent(const base::char16* spec,
+bool CompareSchemeComponent(const char16_t* spec,
const Component& component,
const char* compare_to);
diff --git a/url/url_util_unittest.cc b/url/url_util_unittest.cc
index 35b86e7..e1d7801 100644
--- a/url/url_util_unittest.cc
+++ b/url/url_util_unittest.cc
@@ -1,29 +1,34 @@
-// Copyright 2013 The Chromium Authors. All rights reserved.
+// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#include "base/macros.h"
-#include "starboard/common/string.h"
-#include "starboard/types.h"
+#include "url/url_util.h"
+
+#include <stddef.h>
+
+#include "base/strings/string_piece.h"
+#include "build/build_config.h"
+#include "testing/gtest/include/gtest/gtest-message.h"
#include "testing/gtest/include/gtest/gtest.h"
+#include "third_party/abseil-cpp/absl/types/optional.h"
#include "url/third_party/mozilla/url_parse.h"
#include "url/url_canon.h"
#include "url/url_canon_stdstring.h"
#include "url/url_test_utils.h"
-#include "url/url_util.h"
namespace url {
class URLUtilTest : public testing::Test {
public:
URLUtilTest() = default;
- ~URLUtilTest() override {
- // Reset any added schemes.
- Shutdown();
- }
+
+ URLUtilTest(const URLUtilTest&) = delete;
+ URLUtilTest& operator=(const URLUtilTest&) = delete;
+
+ ~URLUtilTest() override = default;
private:
- DISALLOW_COPY_AND_ASSIGN(URLUtilTest);
+ ScopedSchemeRegistryForTests scoped_registry_;
};
TEST_F(URLUtilTest, FindAndCompareScheme) {
@@ -92,12 +97,27 @@
}
TEST_F(URLUtilTest, AddReferrerScheme) {
- const char kFooScheme[] = "foo";
+ static const char kFooScheme[] = "foo";
EXPECT_FALSE(IsReferrerScheme(kFooScheme, Component(0, strlen(kFooScheme))));
+
+ url::ScopedSchemeRegistryForTests scoped_registry;
AddReferrerScheme(kFooScheme, url::SCHEME_WITH_HOST);
EXPECT_TRUE(IsReferrerScheme(kFooScheme, Component(0, strlen(kFooScheme))));
}
+TEST_F(URLUtilTest, ShutdownCleansUpSchemes) {
+ static const char kFooScheme[] = "foo";
+ EXPECT_FALSE(IsReferrerScheme(kFooScheme, Component(0, strlen(kFooScheme))));
+
+ {
+ url::ScopedSchemeRegistryForTests scoped_registry;
+ AddReferrerScheme(kFooScheme, url::SCHEME_WITH_HOST);
+ EXPECT_TRUE(IsReferrerScheme(kFooScheme, Component(0, strlen(kFooScheme))));
+ }
+
+ EXPECT_FALSE(IsReferrerScheme(kFooScheme, Component(0, strlen(kFooScheme))));
+}
+
TEST_F(URLUtilTest, GetStandardSchemeType) {
url::SchemeType scheme_type;
@@ -122,6 +142,15 @@
&scheme_type));
}
+TEST_F(URLUtilTest, GetStandardSchemes) {
+ std::vector<std::string> expected = {
+ kHttpsScheme, kHttpScheme, kFileScheme, kFtpScheme,
+ kWssScheme, kWsScheme, kFileSystemScheme, "foo",
+ };
+ AddStandardScheme("foo", url::SCHEME_WITHOUT_AUTHORITY);
+ EXPECT_EQ(expected, GetStandardSchemes());
+}
+
TEST_F(URLUtilTest, ReplaceComponents) {
Parsed parsed;
RawCanonOutputT<char> output;
@@ -199,71 +228,82 @@
struct DecodeCase {
const char* input;
const char* output;
- DecodeURLResult result;
} decode_cases[] = {
- {"hello, world", "hello, world", DecodeURLResult::kAsciiOnly},
+ {"hello, world", "hello, world"},
{"%01%02%03%04%05%06%07%08%09%0a%0B%0C%0D%0e%0f/",
- "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0B\x0C\x0D\x0e\x0f/",
- DecodeURLResult::kAsciiOnly},
+ "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0B\x0C\x0D\x0e\x0f/"},
{"%10%11%12%13%14%15%16%17%18%19%1a%1B%1C%1D%1e%1f/",
- "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1B\x1C\x1D\x1e\x1f/",
- DecodeURLResult::kAsciiOnly},
+ "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1B\x1C\x1D\x1e\x1f/"},
{"%20%21%22%23%24%25%26%27%28%29%2a%2B%2C%2D%2e%2f/",
- " !\"#$%&'()*+,-.//", DecodeURLResult::kAsciiOnly},
- {"%30%31%32%33%34%35%36%37%38%39%3a%3B%3C%3D%3e%3f/", "0123456789:;<=>?/",
- DecodeURLResult::kAsciiOnly},
- {"%40%41%42%43%44%45%46%47%48%49%4a%4B%4C%4D%4e%4f/", "@ABCDEFGHIJKLMNO/",
- DecodeURLResult::kAsciiOnly},
+ " !\"#$%&'()*+,-.//"},
+ {"%30%31%32%33%34%35%36%37%38%39%3a%3B%3C%3D%3e%3f/",
+ "0123456789:;<=>?/"},
+ {"%40%41%42%43%44%45%46%47%48%49%4a%4B%4C%4D%4e%4f/",
+ "@ABCDEFGHIJKLMNO/"},
{"%50%51%52%53%54%55%56%57%58%59%5a%5B%5C%5D%5e%5f/",
- "PQRSTUVWXYZ[\\]^_/", DecodeURLResult::kAsciiOnly},
- {"%60%61%62%63%64%65%66%67%68%69%6a%6B%6C%6D%6e%6f/", "`abcdefghijklmno/",
- DecodeURLResult::kAsciiOnly},
+ "PQRSTUVWXYZ[\\]^_/"},
+ {"%60%61%62%63%64%65%66%67%68%69%6a%6B%6C%6D%6e%6f/",
+ "`abcdefghijklmno/"},
{"%70%71%72%73%74%75%76%77%78%79%7a%7B%7C%7D%7e%7f/",
- "pqrstuvwxyz{|}~\x7f/", DecodeURLResult::kAsciiOnly},
- // Test un-UTF-8-ization.
- {"%e4%bd%a0%e5%a5%bd", "\xe4\xbd\xa0\xe5\xa5\xbd",
- DecodeURLResult::kUTF8},
+ "pqrstuvwxyz{|}~\x7f/"},
+ {"%e4%bd%a0%e5%a5%bd", "\xe4\xbd\xa0\xe5\xa5\xbd"},
};
- for (size_t i = 0; i < arraysize(decode_cases); i++) {
+ for (size_t i = 0; i < std::size(decode_cases); i++) {
const char* input = decode_cases[i].input;
- RawCanonOutputT<base::char16> output;
- EXPECT_EQ(decode_cases[i].result,
- DecodeURLEscapeSequences(input, strlen(input), &output));
+ RawCanonOutputT<char16_t> output;
+ DecodeURLEscapeSequences(input, strlen(input),
+ DecodeURLMode::kUTF8OrIsomorphic, &output);
+ EXPECT_EQ(decode_cases[i].output, base::UTF16ToUTF8(std::u16string(
+ output.data(), output.length())));
+
+ RawCanonOutputT<char16_t> output_utf8;
+ DecodeURLEscapeSequences(input, strlen(input), DecodeURLMode::kUTF8,
+ &output_utf8);
EXPECT_EQ(decode_cases[i].output,
- base::UTF16ToUTF8(base::string16(output.data(),
- output.length())));
+ base::UTF16ToUTF8(
+ std::u16string(output_utf8.data(), output_utf8.length())));
}
// Our decode should decode %00
const char zero_input[] = "%00";
- RawCanonOutputT<base::char16> zero_output;
- DecodeURLEscapeSequences(zero_input, strlen(zero_input), &zero_output);
- EXPECT_NE("%00", base::UTF16ToUTF8(
- base::string16(zero_output.data(), zero_output.length())));
+ RawCanonOutputT<char16_t> zero_output;
+ DecodeURLEscapeSequences(zero_input, strlen(zero_input), DecodeURLMode::kUTF8,
+ &zero_output);
+ EXPECT_NE("%00", base::UTF16ToUTF8(std::u16string(zero_output.data(),
+ zero_output.length())));
// Test the error behavior for invalid UTF-8.
- {
- const char invalid_input[] = "%e4%a0%e5%a5%bd";
- const base::char16 invalid_expected[6] = {0x00e4, 0x00a0, 0x00e5,
- 0x00a5, 0x00bd, 0};
- RawCanonOutputT<base::char16> invalid_output;
- EXPECT_EQ(DecodeURLResult::kIsomorphic,
- DecodeURLEscapeSequences(invalid_input, strlen(invalid_input),
- &invalid_output));
- EXPECT_EQ(base::string16(invalid_expected),
- base::string16(invalid_output.data(), invalid_output.length()));
- }
- {
- const char invalid_input[] = "%e4%a0%e5%bd";
- const base::char16 invalid_expected[5] = {0x00e4, 0x00a0, 0x00e5, 0x00bd,
- 0};
- RawCanonOutputT<base::char16> invalid_output;
- EXPECT_EQ(DecodeURLResult::kIsomorphic,
- DecodeURLEscapeSequences(invalid_input, strlen(invalid_input),
- &invalid_output));
- EXPECT_EQ(base::string16(invalid_expected),
- base::string16(invalid_output.data(), invalid_output.length()));
+ struct Utf8DecodeCase {
+ const char* input;
+ std::vector<char16_t> expected_iso;
+ std::vector<char16_t> expected_utf8;
+ } utf8_decode_cases[] = {
+ // %e5%a5%bd is a valid UTF-8 sequence. U+597D
+ {"%e4%a0%e5%a5%bd",
+ {0x00e4, 0x00a0, 0x00e5, 0x00a5, 0x00bd, 0},
+ {0xfffd, 0x597d, 0}},
+ {"%e5%a5%bd%e4%a0",
+ {0x00e5, 0x00a5, 0x00bd, 0x00e4, 0x00a0, 0},
+ {0x597d, 0xfffd, 0}},
+ {"%e4%a0%e5%bd",
+ {0x00e4, 0x00a0, 0x00e5, 0x00bd, 0},
+ {0xfffd, 0xfffd, 0}},
+ };
+
+ for (const auto& test : utf8_decode_cases) {
+ const char* input = test.input;
+ RawCanonOutputT<char16_t> output_iso;
+ DecodeURLEscapeSequences(input, strlen(input),
+ DecodeURLMode::kUTF8OrIsomorphic, &output_iso);
+ EXPECT_EQ(std::u16string(test.expected_iso.data()),
+ std::u16string(output_iso.data(), output_iso.length()));
+
+ RawCanonOutputT<char16_t> output_utf8;
+ DecodeURLEscapeSequences(input, strlen(input), DecodeURLMode::kUTF8,
+ &output_utf8);
+ EXPECT_EQ(std::u16string(test.expected_utf8.data()),
+ std::u16string(output_utf8.data(), output_utf8.length()));
}
}
@@ -291,7 +331,7 @@
"pqrstuvwxyz%7B%7C%7D~%7F"},
};
- for (size_t i = 0; i < arraysize(encode_cases); i++) {
+ for (size_t i = 0; i < std::size(encode_cases); i++) {
const char* input = encode_cases[i].input;
RawCanonOutputT<char> buffer;
EncodeURIComponent(input, strlen(input), &buffer);
@@ -354,6 +394,7 @@
{"about:blank", "#id42", true, "about:blank#id42"},
{"about:blank", " #id42", true, "about:blank#id42"},
{"about:blank#oldfrag", "#newfrag", true, "about:blank#newfrag"},
+ {"about:blank", " #id:42", true, "about:blank#id:42"},
// A surprising side effect of allowing fragments to resolve against
// any URL scheme is we might break javascript: URLs by doing so...
{"javascript:alert('foo#bar')", "#badfrag", true,
@@ -368,7 +409,7 @@
// adding the requested dot doesn't seem wrong either.
{"aaa://a\\", "aaa:.", true, "aaa://a\\."}};
- for (size_t i = 0; i < arraysize(resolve_non_standard_cases); i++) {
+ for (size_t i = 0; i < std::size(resolve_non_standard_cases); i++) {
const ResolveRelativeCase& test_data = resolve_non_standard_cases[i];
Parsed base_parsed;
ParsePathURL(test_data.base, strlen(test_data.base), false, &base_parsed);
@@ -457,6 +498,45 @@
}
}
+TEST_F(URLUtilTest, PotentiallyDanglingMarkupAfterReplacement) {
+ // Parse a URL with potentially dangling markup.
+ Parsed original_parsed;
+ RawCanonOutput<32> original;
+ const char* url = "htt\nps://example.com/<path";
+ Canonicalize(url, strlen(url), false, nullptr, &original, &original_parsed);
+ ASSERT_TRUE(original_parsed.potentially_dangling_markup);
+
+ // Perform a replacement, and validate that the potentially_dangling_markup
+ // flag carried over to the new Parsed object.
+ Replacements<char> replacements;
+ replacements.ClearRef();
+ Parsed replaced_parsed;
+ RawCanonOutput<32> replaced;
+ ReplaceComponents(original.data(), original.length(), original_parsed,
+ replacements, nullptr, &replaced, &replaced_parsed);
+ EXPECT_TRUE(replaced_parsed.potentially_dangling_markup);
+}
+
+TEST_F(URLUtilTest, PotentiallyDanglingMarkupAfterSchemeOnlyReplacement) {
+ // Parse a URL with potentially dangling markup.
+ Parsed original_parsed;
+ RawCanonOutput<32> original;
+ const char* url = "http://example.com/\n/<path";
+ Canonicalize(url, strlen(url), false, nullptr, &original, &original_parsed);
+ ASSERT_TRUE(original_parsed.potentially_dangling_markup);
+
+ // Perform a replacement, and validate that the potentially_dangling_markup
+ // flag carried over to the new Parsed object.
+ Replacements<char> replacements;
+ const char* new_scheme = "https";
+ replacements.SetScheme(new_scheme, Component(0, strlen(new_scheme)));
+ Parsed replaced_parsed;
+ RawCanonOutput<32> replaced;
+ ReplaceComponents(original.data(), original.length(), original_parsed,
+ replacements, nullptr, &replaced, &replaced_parsed);
+ EXPECT_TRUE(replaced_parsed.potentially_dangling_markup);
+}
+
TEST_F(URLUtilTest, TestDomainIs) {
const struct {
const char* canonicalized_host;
@@ -500,4 +580,52 @@
}
}
+namespace {
+absl::optional<std::string> CanonicalizeSpec(base::StringPiece spec,
+ bool trim_path_end) {
+ std::string canonicalized;
+ StdStringCanonOutput output(&canonicalized);
+ Parsed parsed;
+ if (!Canonicalize(spec.data(), spec.size(), trim_path_end,
+ /*charset_converter=*/nullptr, &output, &parsed)) {
+ return {};
+ }
+ output.Complete(); // Must be called before string is used.
+ return canonicalized;
+}
+} // namespace
+
+#if BUILDFLAG(IS_WIN)
+// Regression test for https://crbug.com/1252658.
+TEST_F(URLUtilTest, TestCanonicalizeWindowsPathWithLeadingNUL) {
+ auto PrefixWithNUL = [](std::string&& s) -> std::string { return '\0' + s; };
+ EXPECT_EQ(CanonicalizeSpec(PrefixWithNUL("w:"), /*trim_path_end=*/false),
+ absl::make_optional("file:///W:"));
+ EXPECT_EQ(CanonicalizeSpec(PrefixWithNUL("\\\\server\\share"),
+ /*trim_path_end=*/false),
+ absl::make_optional("file://server/share"));
+}
+#endif
+
+TEST_F(URLUtilTest, TestCanonicalizeIdempotencyWithLeadingControlCharacters) {
+ std::string spec = "_w:";
+ // Loop over all C0 control characters and the space character.
+ for (char c = '\0'; c <= ' '; c++) {
+ SCOPED_TRACE(testing::Message() << "c: " << c);
+
+ // Overwrite the first character of `spec`. Note that replacing the first
+ // character with NUL will not change the length!
+ spec[0] = c;
+
+ for (bool trim_path_end : {false, true}) {
+ SCOPED_TRACE(testing::Message() << "trim_path_end: " << trim_path_end);
+
+ absl::optional<std::string> canonicalized =
+ CanonicalizeSpec(spec, trim_path_end);
+ ASSERT_TRUE(canonicalized);
+ EXPECT_EQ(canonicalized, CanonicalizeSpec(*canonicalized, trim_path_end));
+ }
+ }
+}
+
} // namespace url