|  | // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 
|  | // Use of this source code is governed by a BSD-style license that can be | 
|  | // found in the LICENSE file. | 
|  |  | 
|  | #include <algorithm> | 
|  | #include <string> | 
|  |  | 
|  | #include "net/base/escape.h" | 
|  |  | 
|  | #include "base/basictypes.h" | 
|  | #include "base/i18n/icu_string_conversions.h" | 
|  | #include "base/string_util.h" | 
|  | #include "base/stringprintf.h" | 
|  | #include "base/utf_string_conversions.h" | 
|  | #include "testing/gtest/include/gtest/gtest.h" | 
|  |  | 
|  | namespace net { | 
|  | namespace { | 
|  |  | 
|  | const size_t kNpos = string16::npos; | 
|  |  | 
|  | struct EscapeCase { | 
|  | const char* input; | 
|  | const char* output; | 
|  | }; | 
|  |  | 
|  | struct UnescapeURLCase { | 
|  | const wchar_t* input; | 
|  | UnescapeRule::Type rules; | 
|  | const wchar_t* output; | 
|  | }; | 
|  |  | 
|  | struct UnescapeURLCaseASCII { | 
|  | const char* input; | 
|  | UnescapeRule::Type rules; | 
|  | const char* output; | 
|  | }; | 
|  |  | 
|  | struct UnescapeAndDecodeCase { | 
|  | const char* input; | 
|  |  | 
|  | // The expected output when run through UnescapeURL. | 
|  | const char* url_unescaped; | 
|  |  | 
|  | // The expected output when run through UnescapeQuery. | 
|  | const char* query_unescaped; | 
|  |  | 
|  | // The expected output when run through UnescapeAndDecodeURLComponent. | 
|  | const wchar_t* decoded; | 
|  | }; | 
|  |  | 
|  | struct AdjustOffsetCase { | 
|  | const char* input; | 
|  | size_t input_offset; | 
|  | size_t output_offset; | 
|  | }; | 
|  |  | 
|  | struct EscapeForHTMLCase { | 
|  | const char* input; | 
|  | const char* expected_output; | 
|  | }; | 
|  |  | 
|  | TEST(EscapeTest, EscapeTextForFormSubmission) { | 
|  | const EscapeCase escape_cases[] = { | 
|  | {"foo", "foo"}, | 
|  | {"foo bar", "foo+bar"}, | 
|  | {"foo++", "foo%2B%2B"} | 
|  | }; | 
|  | for (size_t i = 0; i < arraysize(escape_cases); ++i) { | 
|  | EscapeCase value = escape_cases[i]; | 
|  | EXPECT_EQ(value.output, EscapeQueryParamValue(value.input, true)); | 
|  | } | 
|  |  | 
|  | const EscapeCase escape_cases_no_plus[] = { | 
|  | {"foo", "foo"}, | 
|  | {"foo bar", "foo%20bar"}, | 
|  | {"foo++", "foo%2B%2B"} | 
|  | }; | 
|  | for (size_t i = 0; i < arraysize(escape_cases_no_plus); ++i) { | 
|  | EscapeCase value = escape_cases_no_plus[i]; | 
|  | EXPECT_EQ(value.output, EscapeQueryParamValue(value.input, false)); | 
|  | } | 
|  |  | 
|  | // Test all the values in we're supposed to be escaping. | 
|  | const std::string no_escape( | 
|  | "abcdefghijklmnopqrstuvwxyz" | 
|  | "ABCDEFGHIJKLMNOPQRSTUVWXYZ" | 
|  | "0123456789" | 
|  | "!'()*-._~"); | 
|  | for (int i = 0; i < 256; ++i) { | 
|  | std::string in; | 
|  | in.push_back(i); | 
|  | std::string out = EscapeQueryParamValue(in, true); | 
|  | if (0 == i) { | 
|  | EXPECT_EQ(out, std::string("%00")); | 
|  | } else if (32 == i) { | 
|  | // Spaces are plus escaped like web forms. | 
|  | EXPECT_EQ(out, std::string("+")); | 
|  | } else if (no_escape.find(in) == std::string::npos) { | 
|  | // Check %hex escaping | 
|  | std::string expected = base::StringPrintf("%%%02X", i); | 
|  | EXPECT_EQ(expected, out); | 
|  | } else { | 
|  | // No change for things in the no_escape list. | 
|  | EXPECT_EQ(out, in); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | TEST(EscapeTest, EscapePath) { | 
|  | ASSERT_EQ( | 
|  | // Most of the character space we care about, un-escaped | 
|  | EscapePath( | 
|  | "\x02\n\x1d !\"#$%&'()*+,-./0123456789:;" | 
|  | "<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ" | 
|  | "[\\]^_`abcdefghijklmnopqrstuvwxyz" | 
|  | "{|}~\x7f\x80\xff"), | 
|  | // Escaped | 
|  | "%02%0A%1D%20!%22%23$%25&'()*+,-./0123456789%3A;" | 
|  | "%3C=%3E%3F@ABCDEFGHIJKLMNOPQRSTUVWXYZ" | 
|  | "%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz" | 
|  | "%7B%7C%7D~%7F%80%FF"); | 
|  | } | 
|  |  | 
|  | TEST(EscapeTest, EscapeUrlEncodedData) { | 
|  | ASSERT_EQ( | 
|  | // Most of the character space we care about, un-escaped | 
|  | EscapeUrlEncodedData( | 
|  | "\x02\n\x1d !\"#$%&'()*+,-./0123456789:;" | 
|  | "<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ" | 
|  | "[\\]^_`abcdefghijklmnopqrstuvwxyz" | 
|  | "{|}~\x7f\x80\xff", true), | 
|  | // Escaped | 
|  | "%02%0A%1D+!%22%23%24%25%26%27()*%2B,-./0123456789:%3B" | 
|  | "%3C%3D%3E%3F%40ABCDEFGHIJKLMNOPQRSTUVWXYZ" | 
|  | "%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz" | 
|  | "%7B%7C%7D~%7F%80%FF"); | 
|  | } | 
|  |  | 
|  | TEST(EscapeTest, EscapeUrlEncodedDataSpace) { | 
|  | ASSERT_EQ(EscapeUrlEncodedData("a b", true), "a+b"); | 
|  | ASSERT_EQ(EscapeUrlEncodedData("a b", false), "a%20b"); | 
|  | } | 
|  |  | 
|  | TEST(EscapeTest, UnescapeURLComponentASCII) { | 
|  | const UnescapeURLCaseASCII unescape_cases[] = { | 
|  | {"", UnescapeRule::NORMAL, ""}, | 
|  | {"%2", UnescapeRule::NORMAL, "%2"}, | 
|  | {"%%%%%%", UnescapeRule::NORMAL, "%%%%%%"}, | 
|  | {"Don't escape anything", UnescapeRule::NORMAL, "Don't escape anything"}, | 
|  | {"Invalid %escape %2", UnescapeRule::NORMAL, "Invalid %escape %2"}, | 
|  | {"Some%20random text %25%2dOK", UnescapeRule::NONE, | 
|  | "Some%20random text %25%2dOK"}, | 
|  | {"Some%20random text %25%2dOK", UnescapeRule::NORMAL, | 
|  | "Some%20random text %25-OK"}, | 
|  | {"Some%20random text %25%2dOK", UnescapeRule::SPACES, | 
|  | "Some random text %25-OK"}, | 
|  | {"Some%20random text %25%2dOK", UnescapeRule::URL_SPECIAL_CHARS, | 
|  | "Some%20random text %-OK"}, | 
|  | {"Some%20random text %25%2dOK", | 
|  | UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS, | 
|  | "Some random text %-OK"}, | 
|  | {"%A0%B1%C2%D3%E4%F5", UnescapeRule::NORMAL, "\xA0\xB1\xC2\xD3\xE4\xF5"}, | 
|  | {"%Aa%Bb%Cc%Dd%Ee%Ff", UnescapeRule::NORMAL, "\xAa\xBb\xCc\xDd\xEe\xFf"}, | 
|  | // Certain URL-sensitive characters should not be unescaped unless asked. | 
|  | {"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", UnescapeRule::SPACES, | 
|  | "Hello %13%10world %23# %3F? %3D= %26& %25% %2B+"}, | 
|  | {"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", | 
|  | UnescapeRule::URL_SPECIAL_CHARS, | 
|  | "Hello%20%13%10world ## ?? == && %% ++"}, | 
|  | // We can neither escape nor unescape '@' since some websites expect it to | 
|  | // be preserved as either '@' or "%40". | 
|  | // See http://b/996720 and http://crbug.com/23933 . | 
|  | {"me@my%40example", UnescapeRule::NORMAL, "me@my%40example"}, | 
|  | // Control characters. | 
|  | {"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::URL_SPECIAL_CHARS, | 
|  | "%01%02%03%04%05%06%07%08%09 %"}, | 
|  | {"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::CONTROL_CHARS, | 
|  | "\x01\x02\x03\x04\x05\x06\x07\x08\x09 %25"}, | 
|  | {"Hello%20%13%10%02", UnescapeRule::SPACES, "Hello %13%10%02"}, | 
|  | {"Hello%20%13%10%02", UnescapeRule::CONTROL_CHARS, "Hello%20\x13\x10\x02"}, | 
|  | }; | 
|  |  | 
|  | for (size_t i = 0; i < arraysize(unescape_cases); i++) { | 
|  | std::string str(unescape_cases[i].input); | 
|  | EXPECT_EQ(std::string(unescape_cases[i].output), | 
|  | UnescapeURLComponent(str, unescape_cases[i].rules)); | 
|  | } | 
|  |  | 
|  | // Test the NULL character unescaping (which wouldn't work above since those | 
|  | // are just char pointers). | 
|  | std::string input("Null"); | 
|  | input.push_back(0);  // Also have a NULL in the input. | 
|  | input.append("%00%39Test"); | 
|  |  | 
|  | // When we're unescaping NULLs | 
|  | std::string expected("Null"); | 
|  | expected.push_back(0); | 
|  | expected.push_back(0); | 
|  | expected.append("9Test"); | 
|  | EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::CONTROL_CHARS)); | 
|  |  | 
|  | // When we're not unescaping NULLs. | 
|  | expected = "Null"; | 
|  | expected.push_back(0); | 
|  | expected.append("%009Test"); | 
|  | EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::NORMAL)); | 
|  | } | 
|  |  | 
|  | TEST(EscapeTest, UnescapeURLComponent) { | 
|  | const UnescapeURLCase unescape_cases[] = { | 
|  | {L"", UnescapeRule::NORMAL, L""}, | 
|  | {L"%2", UnescapeRule::NORMAL, L"%2"}, | 
|  | {L"%%%%%%", UnescapeRule::NORMAL, L"%%%%%%"}, | 
|  | {L"Don't escape anything", UnescapeRule::NORMAL, L"Don't escape anything"}, | 
|  | {L"Invalid %escape %2", UnescapeRule::NORMAL, L"Invalid %escape %2"}, | 
|  | {L"Some%20random text %25%2dOK", UnescapeRule::NONE, | 
|  | L"Some%20random text %25%2dOK"}, | 
|  | {L"Some%20random text %25%2dOK", UnescapeRule::NORMAL, | 
|  | L"Some%20random text %25-OK"}, | 
|  | {L"Some%20random text %25%2dOK", UnescapeRule::SPACES, | 
|  | L"Some random text %25-OK"}, | 
|  | {L"Some%20random text %25%2dOK", UnescapeRule::URL_SPECIAL_CHARS, | 
|  | L"Some%20random text %-OK"}, | 
|  | {L"Some%20random text %25%2dOK", | 
|  | UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS, | 
|  | L"Some random text %-OK"}, | 
|  | {L"%A0%B1%C2%D3%E4%F5", UnescapeRule::NORMAL, L"\xA0\xB1\xC2\xD3\xE4\xF5"}, | 
|  | {L"%Aa%Bb%Cc%Dd%Ee%Ff", UnescapeRule::NORMAL, L"\xAa\xBb\xCc\xDd\xEe\xFf"}, | 
|  | // Certain URL-sensitive characters should not be unescaped unless asked. | 
|  | {L"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", UnescapeRule::SPACES, | 
|  | L"Hello %13%10world %23# %3F? %3D= %26& %25% %2B+"}, | 
|  | {L"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", | 
|  | UnescapeRule::URL_SPECIAL_CHARS, | 
|  | L"Hello%20%13%10world ## ?? == && %% ++"}, | 
|  | // We can neither escape nor unescape '@' since some websites expect it to | 
|  | // be preserved as either '@' or "%40". | 
|  | // See http://b/996720 and http://crbug.com/23933 . | 
|  | {L"me@my%40example", UnescapeRule::NORMAL, L"me@my%40example"}, | 
|  | // Control characters. | 
|  | {L"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::URL_SPECIAL_CHARS, | 
|  | L"%01%02%03%04%05%06%07%08%09 %"}, | 
|  | {L"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::CONTROL_CHARS, | 
|  | L"\x01\x02\x03\x04\x05\x06\x07\x08\x09 %25"}, | 
|  | {L"Hello%20%13%10%02", UnescapeRule::SPACES, L"Hello %13%10%02"}, | 
|  | {L"Hello%20%13%10%02", UnescapeRule::CONTROL_CHARS, | 
|  | L"Hello%20\x13\x10\x02"}, | 
|  | {L"Hello\x9824\x9827", UnescapeRule::CONTROL_CHARS, | 
|  | L"Hello\x9824\x9827"}, | 
|  | }; | 
|  |  | 
|  | for (size_t i = 0; i < arraysize(unescape_cases); i++) { | 
|  | string16 str(WideToUTF16(unescape_cases[i].input)); | 
|  | EXPECT_EQ(WideToUTF16(unescape_cases[i].output), | 
|  | UnescapeURLComponent(str, unescape_cases[i].rules)); | 
|  | } | 
|  |  | 
|  | // Test the NULL character unescaping (which wouldn't work above since those | 
|  | // are just char pointers). | 
|  | string16 input(WideToUTF16(L"Null")); | 
|  | input.push_back(0);  // Also have a NULL in the input. | 
|  | input.append(WideToUTF16(L"%00%39Test")); | 
|  |  | 
|  | // When we're unescaping NULLs | 
|  | string16 expected(WideToUTF16(L"Null")); | 
|  | expected.push_back(0); | 
|  | expected.push_back(0); | 
|  | expected.append(ASCIIToUTF16("9Test")); | 
|  | EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::CONTROL_CHARS)); | 
|  |  | 
|  | // When we're not unescaping NULLs. | 
|  | expected = WideToUTF16(L"Null"); | 
|  | expected.push_back(0); | 
|  | expected.append(WideToUTF16(L"%009Test")); | 
|  | EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::NORMAL)); | 
|  | } | 
|  |  | 
|  | TEST(EscapeTest, UnescapeAndDecodeUTF8URLComponent) { | 
|  | const UnescapeAndDecodeCase unescape_cases[] = { | 
|  | { "%", | 
|  | "%", | 
|  | "%", | 
|  | L"%"}, | 
|  | { "+", | 
|  | "+", | 
|  | " ", | 
|  | L"+"}, | 
|  | { "%2+", | 
|  | "%2+", | 
|  | "%2 ", | 
|  | L"%2+"}, | 
|  | { "+%%%+%%%", | 
|  | "+%%%+%%%", | 
|  | " %%% %%%", | 
|  | L"+%%%+%%%"}, | 
|  | { "Don't escape anything", | 
|  | "Don't escape anything", | 
|  | "Don't escape anything", | 
|  | L"Don't escape anything"}, | 
|  | { "+Invalid %escape %2+", | 
|  | "+Invalid %escape %2+", | 
|  | " Invalid %escape %2 ", | 
|  | L"+Invalid %escape %2+"}, | 
|  | { "Some random text %25%2dOK", | 
|  | "Some random text %25-OK", | 
|  | "Some random text %25-OK", | 
|  | L"Some random text %25-OK"}, | 
|  | { "%01%02%03%04%05%06%07%08%09", | 
|  | "%01%02%03%04%05%06%07%08%09", | 
|  | "%01%02%03%04%05%06%07%08%09", | 
|  | L"%01%02%03%04%05%06%07%08%09"}, | 
|  | { "%E4%BD%A0+%E5%A5%BD", | 
|  | "\xE4\xBD\xA0+\xE5\xA5\xBD", | 
|  | "\xE4\xBD\xA0 \xE5\xA5\xBD", | 
|  | L"\x4f60+\x597d"}, | 
|  | { "%ED%ED",  // Invalid UTF-8. | 
|  | "\xED\xED", | 
|  | "\xED\xED", | 
|  | L"%ED%ED"},  // Invalid UTF-8 -> kept unescaped. | 
|  | }; | 
|  |  | 
|  | for (size_t i = 0; i < arraysize(unescape_cases); i++) { | 
|  | std::string unescaped = UnescapeURLComponent(unescape_cases[i].input, | 
|  | UnescapeRule::NORMAL); | 
|  | EXPECT_EQ(std::string(unescape_cases[i].url_unescaped), unescaped); | 
|  |  | 
|  | unescaped = UnescapeURLComponent(unescape_cases[i].input, | 
|  | UnescapeRule::REPLACE_PLUS_WITH_SPACE); | 
|  | EXPECT_EQ(std::string(unescape_cases[i].query_unescaped), unescaped); | 
|  |  | 
|  | // TODO: Need to test unescape_spaces and unescape_percent. | 
|  | string16 decoded = UnescapeAndDecodeUTF8URLComponent( | 
|  | unescape_cases[i].input, UnescapeRule::NORMAL, NULL); | 
|  | EXPECT_EQ(WideToUTF16(unescape_cases[i].decoded), decoded); | 
|  | } | 
|  | } | 
|  |  | 
|  | TEST(EscapeTest, AdjustOffset) { | 
|  | const AdjustOffsetCase adjust_cases[] = { | 
|  | {"", 0, std::string::npos}, | 
|  | {"test", 0, 0}, | 
|  | {"test", 2, 2}, | 
|  | {"test", 4, std::string::npos}, | 
|  | {"test", std::string::npos, std::string::npos}, | 
|  | {"%2dtest", 6, 4}, | 
|  | {"%2dtest", 2, std::string::npos}, | 
|  | {"test%2d", 2, 2}, | 
|  | {"%E4%BD%A0+%E5%A5%BD", 9, 1}, | 
|  | {"%E4%BD%A0+%E5%A5%BD", 6, std::string::npos}, | 
|  | {"%ED%B0%80+%E5%A5%BD", 6, 6}, | 
|  | }; | 
|  |  | 
|  | for (size_t i = 0; i < arraysize(adjust_cases); i++) { | 
|  | size_t offset = adjust_cases[i].input_offset; | 
|  | UnescapeAndDecodeUTF8URLComponent(adjust_cases[i].input, | 
|  | UnescapeRule::NORMAL, &offset); | 
|  | EXPECT_EQ(adjust_cases[i].output_offset, offset); | 
|  | } | 
|  | } | 
|  |  | 
|  | TEST(EscapeTest, EscapeForHTML) { | 
|  | const EscapeForHTMLCase tests[] = { | 
|  | { "hello", "hello" }, | 
|  | { "<hello>", "<hello>" }, | 
|  | { "don\'t mess with me", "don't mess with me" }, | 
|  | }; | 
|  | for (size_t i = 0; i < arraysize(tests); ++i) { | 
|  | std::string result = EscapeForHTML(std::string(tests[i].input)); | 
|  | EXPECT_EQ(std::string(tests[i].expected_output), result); | 
|  | } | 
|  | } | 
|  |  | 
|  | TEST(EscapeTest, UnescapeForHTML) { | 
|  | const EscapeForHTMLCase tests[] = { | 
|  | { "", "" }, | 
|  | { "<hello>", "<hello>" }, | 
|  | { "don't mess with me", "don\'t mess with me" }, | 
|  | { "<>&"'", "<>&\"'" }, | 
|  | { "& lt; & ; &; '", "& lt; & ; &; '" }, | 
|  | { "&", "&" }, | 
|  | { """, "\"" }, | 
|  | { "'", "'" }, | 
|  | { "<", "<" }, | 
|  | { ">", ">" }, | 
|  | { "& &", "& &" }, | 
|  | }; | 
|  | for (size_t i = 0; i < arraysize(tests); ++i) { | 
|  | string16 result = UnescapeForHTML(ASCIIToUTF16(tests[i].input)); | 
|  | EXPECT_EQ(ASCIIToUTF16(tests[i].expected_output), result); | 
|  | } | 
|  | } | 
|  |  | 
|  | TEST(EscapeTest, AdjustEncodingOffset) { | 
|  | // Imagine we have strings as shown in the following cases where the | 
|  | // %XX's represent encoded characters | 
|  |  | 
|  | // 1: abc%ECdef ==> abcXdef | 
|  | std::vector<size_t> offsets; | 
|  | for (size_t t = 0; t < 9; ++t) | 
|  | offsets.push_back(t); | 
|  | internal::AdjustEncodingOffset::Adjustments adjustments; | 
|  | adjustments.push_back(3); | 
|  | std::for_each(offsets.begin(), offsets.end(), | 
|  | internal::AdjustEncodingOffset(adjustments)); | 
|  | size_t expected_1[] = {0, 1, 2, 3, kNpos, kNpos, 4, 5, 6}; | 
|  | EXPECT_EQ(offsets.size(), arraysize(expected_1)); | 
|  | for (size_t i = 0; i < arraysize(expected_1); ++i) | 
|  | EXPECT_EQ(expected_1[i], offsets[i]); | 
|  |  | 
|  |  | 
|  | // 2: %ECabc%EC%ECdef%EC ==> XabcXXdefX | 
|  | offsets.clear(); | 
|  | for (size_t t = 0; t < 18; ++t) | 
|  | offsets.push_back(t); | 
|  | adjustments.clear(); | 
|  | adjustments.push_back(0); | 
|  | adjustments.push_back(6); | 
|  | adjustments.push_back(9); | 
|  | adjustments.push_back(15); | 
|  | std::for_each(offsets.begin(), offsets.end(), | 
|  | internal::AdjustEncodingOffset(adjustments)); | 
|  | size_t expected_2[] = {0, kNpos, kNpos, 1, 2, 3, 4, kNpos, kNpos, 5, kNpos, | 
|  | kNpos, 6, 7, 8, 9, kNpos, kNpos}; | 
|  | EXPECT_EQ(offsets.size(), arraysize(expected_2)); | 
|  | for (size_t i = 0; i < arraysize(expected_2); ++i) | 
|  | EXPECT_EQ(expected_2[i], offsets[i]); | 
|  | } | 
|  |  | 
|  | }  // namespace | 
|  | }  // namespace net |