| // Copyright 2015 The Chromium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "net/base/lookup_string_in_fixed_set.h" |
| |
| #include <string.h> |
| |
| #include <algorithm> |
| #include <limits> |
| #include <ostream> |
| #include <utility> |
| #include <vector> |
| |
| #include "base/base_paths.h" |
| #include "base/files/file_path.h" |
| #include "base/files/file_util.h" |
| #include "base/path_service.h" |
| #include "base/strings/string_util.h" |
| #include "base/strings/stringprintf.h" |
| #include "testing/gtest/include/gtest/gtest.h" |
| |
| namespace net { |
| namespace { |
| namespace test1 { |
| #include "net/base/registry_controlled_domains/effective_tld_names_unittest1-inc.cc" |
| } |
| namespace test3 { |
| #include "net/base/registry_controlled_domains/effective_tld_names_unittest3-inc.cc" |
| } |
| namespace test4 { |
| #include "net/base/registry_controlled_domains/effective_tld_names_unittest4-inc.cc" |
| } |
| namespace test5 { |
| #include "net/base/registry_controlled_domains/effective_tld_names_unittest5-inc.cc" |
| } |
| namespace test6 { |
| #include "net/base/registry_controlled_domains/effective_tld_names_unittest6-inc.cc" |
| } |
| |
| struct Expectation { |
| const char* const key; |
| int value; |
| }; |
| |
| void PrintTo(const Expectation& expectation, std::ostream* os) { |
| *os << "{\"" << expectation.key << "\", " << expectation.value << "}"; |
| } |
| |
| class LookupStringInFixedSetTest : public testing::TestWithParam<Expectation> { |
| protected: |
| template <size_t N> |
| int LookupInGraph(const unsigned char(&graph)[N], const char* key) { |
| return LookupStringInFixedSet(graph, N, key, strlen(key)); |
| } |
| }; |
| |
| class Dafsa1Test : public LookupStringInFixedSetTest {}; |
| |
| TEST_P(Dafsa1Test, BasicTest) { |
| const Expectation& param = GetParam(); |
| EXPECT_EQ(param.value, LookupInGraph(test1::kDafsa, param.key)); |
| } |
| |
| const Expectation kBasicTestCases[] = { |
| {"", -1}, {"j", -1}, {"jp", 0}, {"jjp", -1}, {"jpp", -1}, |
| {"bar.jp", 2}, {"pref.bar.jp", 1}, {"c", 2}, {"b.c", 1}, {"priv.no", 4}, |
| }; |
| |
| // Helper function for EnumerateDafsaLanaguage. |
| void RecursivelyEnumerateDafsaLanguage(const FixedSetIncrementalLookup& lookup, |
| std::vector<char>* sequence, |
| std::vector<std::string>* language) { |
| int result = lookup.GetResultForCurrentSequence(); |
| if (result != kDafsaNotFound) { |
| std::string line(sequence->begin(), sequence->end()); |
| line += base::StringPrintf(", %d", result); |
| language->emplace_back(std::move(line)); |
| } |
| // Try appending each char value. |
| for (char c = std::numeric_limits<char>::min();; ++c) { |
| FixedSetIncrementalLookup continued_lookup = lookup; |
| if (continued_lookup.Advance(c)) { |
| sequence->push_back(c); |
| size_t saved_language_size = language->size(); |
| RecursivelyEnumerateDafsaLanguage(continued_lookup, sequence, language); |
| CHECK_LT(saved_language_size, language->size()) |
| << "DAFSA includes a branch to nowhere at node: " |
| << std::string(sequence->begin(), sequence->end()); |
| sequence->pop_back(); |
| } |
| if (c == std::numeric_limits<char>::max()) |
| break; |
| } |
| } |
| |
| // Uses FixedSetIncrementalLookup to build a vector of every string in the |
| // language of the DAFSA. |
| template <typename Graph> |
| std::vector<std::string> EnumerateDafsaLanguage(const Graph& graph) { |
| FixedSetIncrementalLookup query(graph, sizeof(Graph)); |
| std::vector<char> sequence; |
| std::vector<std::string> language; |
| RecursivelyEnumerateDafsaLanguage(query, &sequence, &language); |
| return language; |
| } |
| |
| INSTANTIATE_TEST_SUITE_P(LookupStringInFixedSetTest, |
| Dafsa1Test, |
| ::testing::ValuesIn(kBasicTestCases)); |
| |
| class Dafsa3Test : public LookupStringInFixedSetTest {}; |
| |
| // This DAFSA is constructed so that labels begin and end with unique |
| // characters, which makes it impossible to merge labels. Each inner node |
| // is about 100 bytes and a one byte offset can at most add 64 bytes to |
| // previous offset. Thus the paths must go over two byte offsets. |
| TEST_P(Dafsa3Test, TestDafsaTwoByteOffsets) { |
| const Expectation& param = GetParam(); |
| EXPECT_EQ(param.value, LookupInGraph(test3::kDafsa, param.key)); |
| } |
| |
| const Expectation kTwoByteOffsetTestCases[] = { |
| {"0________________________________________________________________________" |
| "____________________________0", |
| 0}, |
| {"7________________________________________________________________________" |
| "____________________________7", |
| 4}, |
| {"a________________________________________________________________________" |
| "____________________________8", |
| -1}, |
| }; |
| |
| INSTANTIATE_TEST_SUITE_P(LookupStringInFixedSetTest, |
| Dafsa3Test, |
| ::testing::ValuesIn(kTwoByteOffsetTestCases)); |
| |
| class Dafsa4Test : public LookupStringInFixedSetTest {}; |
| |
| // This DAFSA is constructed so that labels begin and end with unique |
| // characters, which makes it impossible to merge labels. The byte array |
| // has a size of ~54k. A two byte offset can add at most add 8k to the |
| // previous offset. Since we can skip only forward in memory, the nodes |
| // representing the return values must be located near the end of the byte |
| // array. The probability that we can reach from an arbitrary inner node to |
| // a return value without using a three byte offset is small (but not zero). |
| // The test is repeated with some different keys and with a reasonable |
| // probability at least one of the tested paths has go over a three byte |
| // offset. |
| TEST_P(Dafsa4Test, TestDafsaThreeByteOffsets) { |
| const Expectation& param = GetParam(); |
| EXPECT_EQ(param.value, LookupInGraph(test4::kDafsa, param.key)); |
| } |
| |
| const Expectation kThreeByteOffsetTestCases[] = { |
| {"Z6_______________________________________________________________________" |
| "_____________________________Z6", |
| 0}, |
| {"Z7_______________________________________________________________________" |
| "_____________________________Z7", |
| 4}, |
| {"Za_______________________________________________________________________" |
| "_____________________________Z8", |
| -1}, |
| }; |
| |
| INSTANTIATE_TEST_SUITE_P(LookupStringInFixedSetTest, |
| Dafsa4Test, |
| ::testing::ValuesIn(kThreeByteOffsetTestCases)); |
| |
| class Dafsa5Test : public LookupStringInFixedSetTest {}; |
| |
| // This DAFSA is constructed from words with similar prefixes but distinct |
| // suffixes. The DAFSA will then form a trie with the implicit source node |
| // as root. |
| TEST_P(Dafsa5Test, TestDafsaJoinedPrefixes) { |
| const Expectation& param = GetParam(); |
| EXPECT_EQ(param.value, LookupInGraph(test5::kDafsa, param.key)); |
| } |
| |
| const Expectation kJoinedPrefixesTestCases[] = { |
| {"ai", 0}, {"bj", 4}, {"aak", 0}, {"bbl", 4}, |
| {"aaa", -1}, {"bbb", -1}, {"aaaam", 0}, {"bbbbn", 0}, |
| }; |
| |
| INSTANTIATE_TEST_SUITE_P(LookupStringInFixedSetTest, |
| Dafsa5Test, |
| ::testing::ValuesIn(kJoinedPrefixesTestCases)); |
| |
| class Dafsa6Test : public LookupStringInFixedSetTest {}; |
| |
| // This DAFSA is constructed from words with similar suffixes but distinct |
| // prefixes. The DAFSA will then form a trie with the implicit sink node as |
| // root. |
| TEST_P(Dafsa6Test, TestDafsaJoinedSuffixes) { |
| const Expectation& param = GetParam(); |
| EXPECT_EQ(param.value, LookupInGraph(test6::kDafsa, param.key)); |
| } |
| |
| const Expectation kJoinedSuffixesTestCases[] = { |
| {"ia", 0}, {"jb", 4}, {"kaa", 0}, {"lbb", 4}, |
| {"aaa", -1}, {"bbb", -1}, {"maaaa", 0}, {"nbbbb", 0}, |
| }; |
| |
| INSTANTIATE_TEST_SUITE_P(LookupStringInFixedSetTest, |
| Dafsa6Test, |
| ::testing::ValuesIn(kJoinedSuffixesTestCases)); |
| |
| // Validates that the generated DAFSA contains exactly the same information as |
| // effective_tld_names_unittest1.gperf. |
| TEST(LookupStringInFixedSetTest, Dafsa1EnumerateLanguage) { |
| auto language = EnumerateDafsaLanguage(test1::kDafsa); |
| |
| // These are the lines of effective_tld_names_unittest1.gperf, in sorted |
| // order. |
| std::vector<std::string> expected_language = { |
| "ac.jp, 0", "b.c, 1", "bar.baz.com, 0", "bar.jp, 2", |
| "baz.bar.jp, 2", "c, 2", "jp, 0", "no, 0", |
| "pref.bar.jp, 1", "priv.no, 4", "private, 4", "xn--fiqs8s, 0", |
| }; |
| |
| EXPECT_EQ(expected_language, language); |
| } |
| |
| // Validates that the generated DAFSA contains exactly the same information as |
| // effective_tld_names_unittest5.gperf. |
| TEST(LookupStringInFixedSetTest, Dafsa5EnumerateLanguage) { |
| auto language = EnumerateDafsaLanguage(test5::kDafsa); |
| |
| std::vector<std::string> expected_language = { |
| "aaaam, 0", "aak, 0", "ai, 0", "bbbbn, 0", "bbl, 4", "bj, 4", |
| }; |
| |
| EXPECT_EQ(expected_language, language); |
| } |
| |
| // Validates that the generated DAFSA contains exactly the same information as |
| // effective_tld_names_unittest6.gperf. |
| TEST(LookupStringInFixedSetTest, Dafsa6EnumerateLanguage) { |
| auto language = EnumerateDafsaLanguage(test6::kDafsa); |
| |
| std::vector<std::string> expected_language = { |
| "ia, 0", "jb, 4", "kaa, 0", "lbb, 4", "maaaa, 0", "nbbbb, 0", |
| }; |
| |
| EXPECT_EQ(expected_language, language); |
| } |
| |
| } // namespace |
| } // namespace net |