net/base/lookup_string_in_fixed_set_unittest.cc - cobalt - Git at Google

 // Copyright 2015 The Chromium Authors
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "net/base/lookup_string_in_fixed_set.h"

 #include <string.h>

 #include <algorithm>
 #include <limits>
 #include <ostream>
 #include <utility>
 #include <vector>

 #include "base/base_paths.h"
 #include "base/files/file_path.h"
 #include "base/files/file_util.h"
 #include "base/path_service.h"
 #include "base/strings/string_util.h"
 #include "base/strings/stringprintf.h"
 #include "testing/gtest/include/gtest/gtest.h"

 namespace net {
 namespace {
 namespace test1 {
 #include "net/base/registry_controlled_domains/effective_tld_names_unittest1-inc.cc"
 }
 namespace test3 {
 #include "net/base/registry_controlled_domains/effective_tld_names_unittest3-inc.cc"
 }
 namespace test4 {
 #include "net/base/registry_controlled_domains/effective_tld_names_unittest4-inc.cc"
 }
 namespace test5 {
 #include "net/base/registry_controlled_domains/effective_tld_names_unittest5-inc.cc"
 }
 namespace test6 {
 #include "net/base/registry_controlled_domains/effective_tld_names_unittest6-inc.cc"
 }

 struct Expectation {
   const char* const key;
   int value;
 };

 void PrintTo(const Expectation& expectation, std::ostream* os) {
   *os << "{\"" << expectation.key << "\", " << expectation.value << "}";
 }

 class LookupStringInFixedSetTest : public testing::TestWithParam<Expectation> {
  protected:
   template <size_t N>
   int LookupInGraph(const unsigned char(&graph)[N], const char* key) {
     return LookupStringInFixedSet(graph, N, key, strlen(key));
   }
 };

 class Dafsa1Test : public LookupStringInFixedSetTest {};

 TEST_P(Dafsa1Test, BasicTest) {
   const Expectation& param = GetParam();
   EXPECT_EQ(param.value, LookupInGraph(test1::kDafsa, param.key));
 }

 const Expectation kBasicTestCases[] = {
     {"", -1},      {"j", -1},          {"jp", 0}, {"jjp", -1}, {"jpp", -1},
     {"bar.jp", 2}, {"pref.bar.jp", 1}, {"c", 2},  {"b.c", 1},  {"priv.no", 4},
 };

 // Helper function for EnumerateDafsaLanaguage.
 void RecursivelyEnumerateDafsaLanguage(const FixedSetIncrementalLookup& lookup,
                                        std::vector<char>* sequence,
                                        std::vector<std::string>* language) {
   int result = lookup.GetResultForCurrentSequence();
   if (result != kDafsaNotFound) {
     std::string line(sequence->begin(), sequence->end());
     line += base::StringPrintf(", %d", result);
     language->emplace_back(std::move(line));
   }
   // Try appending each char value.
   for (char c = std::numeric_limits<char>::min();; ++c) {
     FixedSetIncrementalLookup continued_lookup = lookup;
     if (continued_lookup.Advance(c)) {
       sequence->push_back(c);
       size_t saved_language_size = language->size();
       RecursivelyEnumerateDafsaLanguage(continued_lookup, sequence, language);
       CHECK_LT(saved_language_size, language->size())
           << "DAFSA includes a branch to nowhere at node: "
           << std::string(sequence->begin(), sequence->end());
       sequence->pop_back();
     }
     if (c == std::numeric_limits<char>::max())
       break;
   }
 }

 // Uses FixedSetIncrementalLookup to build a vector of every string in the
 // language of the DAFSA.
 template <typename Graph>
 std::vector<std::string> EnumerateDafsaLanguage(const Graph& graph) {
   FixedSetIncrementalLookup query(graph, sizeof(Graph));
   std::vector<char> sequence;
   std::vector<std::string> language;
   RecursivelyEnumerateDafsaLanguage(query, &sequence, &language);
   return language;
 }

 INSTANTIATE_TEST_SUITE_P(LookupStringInFixedSetTest,
                          Dafsa1Test,
                          ::testing::ValuesIn(kBasicTestCases));

 class Dafsa3Test : public LookupStringInFixedSetTest {};

 // This DAFSA is constructed so that labels begin and end with unique
 // characters, which makes it impossible to merge labels. Each inner node
 // is about 100 bytes and a one byte offset can at most add 64 bytes to
 // previous offset. Thus the paths must go over two byte offsets.
 TEST_P(Dafsa3Test, TestDafsaTwoByteOffsets) {
   const Expectation& param = GetParam();
   EXPECT_EQ(param.value, LookupInGraph(test3::kDafsa, param.key));
 }

 const Expectation kTwoByteOffsetTestCases[] = {
     {"0________________________________________________________________________"
      "____________________________0",
      0},
     {"7________________________________________________________________________"
      "____________________________7",
      4},
     {"a________________________________________________________________________"
      "____________________________8",
      -1},
 };

 INSTANTIATE_TEST_SUITE_P(LookupStringInFixedSetTest,
                          Dafsa3Test,
                          ::testing::ValuesIn(kTwoByteOffsetTestCases));

 class Dafsa4Test : public LookupStringInFixedSetTest {};

 // This DAFSA is constructed so that labels begin and end with unique
 // characters, which makes it impossible to merge labels. The byte array
 // has a size of ~54k. A two byte offset can add at most add 8k to the
 // previous offset. Since we can skip only forward in memory, the nodes
 // representing the return values must be located near the end of the byte
 // array. The probability that we can reach from an arbitrary inner node to
 // a return value without using a three byte offset is small (but not zero).
 // The test is repeated with some different keys and with a reasonable
 // probability at least one of the tested paths has go over a three byte
 // offset.
 TEST_P(Dafsa4Test, TestDafsaThreeByteOffsets) {
   const Expectation& param = GetParam();
   EXPECT_EQ(param.value, LookupInGraph(test4::kDafsa, param.key));
 }

 const Expectation kThreeByteOffsetTestCases[] = {
     {"Z6_______________________________________________________________________"
      "_____________________________Z6",
      0},
     {"Z7_______________________________________________________________________"
      "_____________________________Z7",
      4},
     {"Za_______________________________________________________________________"
      "_____________________________Z8",
      -1},
 };

 INSTANTIATE_TEST_SUITE_P(LookupStringInFixedSetTest,
                          Dafsa4Test,
                          ::testing::ValuesIn(kThreeByteOffsetTestCases));

 class Dafsa5Test : public LookupStringInFixedSetTest {};

 // This DAFSA is constructed from words with similar prefixes but distinct
 // suffixes. The DAFSA will then form a trie with the implicit source node
 // as root.
 TEST_P(Dafsa5Test, TestDafsaJoinedPrefixes) {
   const Expectation& param = GetParam();
   EXPECT_EQ(param.value, LookupInGraph(test5::kDafsa, param.key));
 }

 const Expectation kJoinedPrefixesTestCases[] = {
     {"ai", 0},   {"bj", 4},   {"aak", 0},   {"bbl", 4},
     {"aaa", -1}, {"bbb", -1}, {"aaaam", 0}, {"bbbbn", 0},
 };

 INSTANTIATE_TEST_SUITE_P(LookupStringInFixedSetTest,
                          Dafsa5Test,
                          ::testing::ValuesIn(kJoinedPrefixesTestCases));

 class Dafsa6Test : public LookupStringInFixedSetTest {};

 // This DAFSA is constructed from words with similar suffixes but distinct
 // prefixes. The DAFSA will then form a trie with the implicit sink node as
 // root.
 TEST_P(Dafsa6Test, TestDafsaJoinedSuffixes) {
   const Expectation& param = GetParam();
   EXPECT_EQ(param.value, LookupInGraph(test6::kDafsa, param.key));
 }

 const Expectation kJoinedSuffixesTestCases[] = {
     {"ia", 0},   {"jb", 4},   {"kaa", 0},   {"lbb", 4},
     {"aaa", -1}, {"bbb", -1}, {"maaaa", 0}, {"nbbbb", 0},
 };

 INSTANTIATE_TEST_SUITE_P(LookupStringInFixedSetTest,
                          Dafsa6Test,
                          ::testing::ValuesIn(kJoinedSuffixesTestCases));

 // Validates that the generated DAFSA contains exactly the same information as
 // effective_tld_names_unittest1.gperf.
 TEST(LookupStringInFixedSetTest, Dafsa1EnumerateLanguage) {
   auto language = EnumerateDafsaLanguage(test1::kDafsa);

   // These are the lines of effective_tld_names_unittest1.gperf, in sorted
   // order.
   std::vector<std::string> expected_language = {
       "ac.jp, 0",       "b.c, 1",     "bar.baz.com, 0", "bar.jp, 2",
       "baz.bar.jp, 2",  "c, 2",       "jp, 0",          "no, 0",
       "pref.bar.jp, 1", "priv.no, 4", "private, 4",     "xn--fiqs8s, 0",
   };

   EXPECT_EQ(expected_language, language);
 }

 // Validates that the generated DAFSA contains exactly the same information as
 // effective_tld_names_unittest5.gperf.
 TEST(LookupStringInFixedSetTest, Dafsa5EnumerateLanguage) {
   auto language = EnumerateDafsaLanguage(test5::kDafsa);

   std::vector<std::string> expected_language = {
       "aaaam, 0", "aak, 0", "ai, 0", "bbbbn, 0", "bbl, 4", "bj, 4",
   };

   EXPECT_EQ(expected_language, language);
 }

 // Validates that the generated DAFSA contains exactly the same information as
 // effective_tld_names_unittest6.gperf.
 TEST(LookupStringInFixedSetTest, Dafsa6EnumerateLanguage) {
   auto language = EnumerateDafsaLanguage(test6::kDafsa);

   std::vector<std::string> expected_language = {
       "ia, 0", "jb, 4", "kaa, 0", "lbb, 4", "maaaa, 0", "nbbbb, 0",
   };

   EXPECT_EQ(expected_language, language);
 }

 }  // namespace
 }  // namespace net
	// Copyright 2015 The Chromium Authors
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include "net/base/lookup_string_in_fixed_set.h"

	#include <string.h>

	#include <algorithm>
	#include <limits>
	#include <ostream>
	#include <utility>
	#include <vector>

	#include "base/base_paths.h"
	#include "base/files/file_path.h"
	#include "base/files/file_util.h"
	#include "base/path_service.h"
	#include "base/strings/string_util.h"
	#include "base/strings/stringprintf.h"
	#include "testing/gtest/include/gtest/gtest.h"

	namespace net {
	namespace {
	namespace test1 {
	#include "net/base/registry_controlled_domains/effective_tld_names_unittest1-inc.cc"
	}
	namespace test3 {
	#include "net/base/registry_controlled_domains/effective_tld_names_unittest3-inc.cc"
	}
	namespace test4 {
	#include "net/base/registry_controlled_domains/effective_tld_names_unittest4-inc.cc"
	}
	namespace test5 {
	#include "net/base/registry_controlled_domains/effective_tld_names_unittest5-inc.cc"
	}
	namespace test6 {
	#include "net/base/registry_controlled_domains/effective_tld_names_unittest6-inc.cc"
	}

	struct Expectation {
	const char* const key;
	int value;
	};

	void PrintTo(const Expectation& expectation, std::ostream* os) {
	*os << "{\"" << expectation.key << "\", " << expectation.value << "}";
	}

	class LookupStringInFixedSetTest : public testing::TestWithParam<Expectation> {
	protected:
	template <size_t N>
	int LookupInGraph(const unsigned char(&graph)[N], const char* key) {
	return LookupStringInFixedSet(graph, N, key, strlen(key));
	}
	};

	class Dafsa1Test : public LookupStringInFixedSetTest {};

	TEST_P(Dafsa1Test, BasicTest) {
	const Expectation& param = GetParam();
	EXPECT_EQ(param.value, LookupInGraph(test1::kDafsa, param.key));
	}

	const Expectation kBasicTestCases[] = {
	{"", -1}, {"j", -1}, {"jp", 0}, {"jjp", -1}, {"jpp", -1},
	{"bar.jp", 2}, {"pref.bar.jp", 1}, {"c", 2}, {"b.c", 1}, {"priv.no", 4},
	};

	// Helper function for EnumerateDafsaLanaguage.
	void RecursivelyEnumerateDafsaLanguage(const FixedSetIncrementalLookup& lookup,
	std::vector<char>* sequence,
	std::vector<std::string>* language) {
	int result = lookup.GetResultForCurrentSequence();
	if (result != kDafsaNotFound) {
	std::string line(sequence->begin(), sequence->end());
	line += base::StringPrintf(", %d", result);
	language->emplace_back(std::move(line));
	}
	// Try appending each char value.
	for (char c = std::numeric_limits<char>::min();; ++c) {
	FixedSetIncrementalLookup continued_lookup = lookup;
	if (continued_lookup.Advance(c)) {
	sequence->push_back(c);
	size_t saved_language_size = language->size();
	RecursivelyEnumerateDafsaLanguage(continued_lookup, sequence, language);
	CHECK_LT(saved_language_size, language->size())
	<< "DAFSA includes a branch to nowhere at node: "
	<< std::string(sequence->begin(), sequence->end());
	sequence->pop_back();
	}
	if (c == std::numeric_limits<char>::max())
	break;
	}
	}

	// Uses FixedSetIncrementalLookup to build a vector of every string in the
	// language of the DAFSA.
	template <typename Graph>
	std::vector<std::string> EnumerateDafsaLanguage(const Graph& graph) {
	FixedSetIncrementalLookup query(graph, sizeof(Graph));
	std::vector<char> sequence;
	std::vector<std::string> language;
	RecursivelyEnumerateDafsaLanguage(query, &sequence, &language);
	return language;
	}

	INSTANTIATE_TEST_SUITE_P(LookupStringInFixedSetTest,
	Dafsa1Test,
	::testing::ValuesIn(kBasicTestCases));

	class Dafsa3Test : public LookupStringInFixedSetTest {};

	// This DAFSA is constructed so that labels begin and end with unique
	// characters, which makes it impossible to merge labels. Each inner node
	// is about 100 bytes and a one byte offset can at most add 64 bytes to
	// previous offset. Thus the paths must go over two byte offsets.
	TEST_P(Dafsa3Test, TestDafsaTwoByteOffsets) {
	const Expectation& param = GetParam();
	EXPECT_EQ(param.value, LookupInGraph(test3::kDafsa, param.key));
	}

	const Expectation kTwoByteOffsetTestCases[] = {
	{"0________________________________________________________________________"
	"____________________________0",
	0},
	{"7________________________________________________________________________"
	"____________________________7",
	4},
	{"a________________________________________________________________________"
	"____________________________8",
	-1},
	};

	INSTANTIATE_TEST_SUITE_P(LookupStringInFixedSetTest,
	Dafsa3Test,
	::testing::ValuesIn(kTwoByteOffsetTestCases));

	class Dafsa4Test : public LookupStringInFixedSetTest {};

	// This DAFSA is constructed so that labels begin and end with unique
	// characters, which makes it impossible to merge labels. The byte array
	// has a size of ~54k. A two byte offset can add at most add 8k to the
	// previous offset. Since we can skip only forward in memory, the nodes
	// representing the return values must be located near the end of the byte
	// array. The probability that we can reach from an arbitrary inner node to
	// a return value without using a three byte offset is small (but not zero).
	// The test is repeated with some different keys and with a reasonable
	// probability at least one of the tested paths has go over a three byte
	// offset.
	TEST_P(Dafsa4Test, TestDafsaThreeByteOffsets) {
	const Expectation& param = GetParam();
	EXPECT_EQ(param.value, LookupInGraph(test4::kDafsa, param.key));
	}

	const Expectation kThreeByteOffsetTestCases[] = {
	{"Z6_______________________________________________________________________"
	"_____________________________Z6",
	0},
	{"Z7_______________________________________________________________________"
	"_____________________________Z7",
	4},
	{"Za_______________________________________________________________________"
	"_____________________________Z8",
	-1},
	};

	INSTANTIATE_TEST_SUITE_P(LookupStringInFixedSetTest,
	Dafsa4Test,
	::testing::ValuesIn(kThreeByteOffsetTestCases));

	class Dafsa5Test : public LookupStringInFixedSetTest {};

	// This DAFSA is constructed from words with similar prefixes but distinct
	// suffixes. The DAFSA will then form a trie with the implicit source node
	// as root.
	TEST_P(Dafsa5Test, TestDafsaJoinedPrefixes) {
	const Expectation& param = GetParam();
	EXPECT_EQ(param.value, LookupInGraph(test5::kDafsa, param.key));
	}

	const Expectation kJoinedPrefixesTestCases[] = {
	{"ai", 0}, {"bj", 4}, {"aak", 0}, {"bbl", 4},
	{"aaa", -1}, {"bbb", -1}, {"aaaam", 0}, {"bbbbn", 0},
	};

	INSTANTIATE_TEST_SUITE_P(LookupStringInFixedSetTest,
	Dafsa5Test,
	::testing::ValuesIn(kJoinedPrefixesTestCases));

	class Dafsa6Test : public LookupStringInFixedSetTest {};

	// This DAFSA is constructed from words with similar suffixes but distinct
	// prefixes. The DAFSA will then form a trie with the implicit sink node as
	// root.
	TEST_P(Dafsa6Test, TestDafsaJoinedSuffixes) {
	const Expectation& param = GetParam();
	EXPECT_EQ(param.value, LookupInGraph(test6::kDafsa, param.key));
	}

	const Expectation kJoinedSuffixesTestCases[] = {
	{"ia", 0}, {"jb", 4}, {"kaa", 0}, {"lbb", 4},
	{"aaa", -1}, {"bbb", -1}, {"maaaa", 0}, {"nbbbb", 0},
	};

	INSTANTIATE_TEST_SUITE_P(LookupStringInFixedSetTest,
	Dafsa6Test,
	::testing::ValuesIn(kJoinedSuffixesTestCases));

	// Validates that the generated DAFSA contains exactly the same information as
	// effective_tld_names_unittest1.gperf.
	TEST(LookupStringInFixedSetTest, Dafsa1EnumerateLanguage) {
	auto language = EnumerateDafsaLanguage(test1::kDafsa);

	// These are the lines of effective_tld_names_unittest1.gperf, in sorted
	// order.
	std::vector<std::string> expected_language = {
	"ac.jp, 0", "b.c, 1", "bar.baz.com, 0", "bar.jp, 2",
	"baz.bar.jp, 2", "c, 2", "jp, 0", "no, 0",
	"pref.bar.jp, 1", "priv.no, 4", "private, 4", "xn--fiqs8s, 0",
	};

	EXPECT_EQ(expected_language, language);
	}

	// Validates that the generated DAFSA contains exactly the same information as
	// effective_tld_names_unittest5.gperf.
	TEST(LookupStringInFixedSetTest, Dafsa5EnumerateLanguage) {
	auto language = EnumerateDafsaLanguage(test5::kDafsa);

	std::vector<std::string> expected_language = {
	"aaaam, 0", "aak, 0", "ai, 0", "bbbbn, 0", "bbl, 4", "bj, 4",
	};

	EXPECT_EQ(expected_language, language);
	}

	// Validates that the generated DAFSA contains exactly the same information as
	// effective_tld_names_unittest6.gperf.
	TEST(LookupStringInFixedSetTest, Dafsa6EnumerateLanguage) {
	auto language = EnumerateDafsaLanguage(test6::kDafsa);

	std::vector<std::string> expected_language = {
	"ia, 0", "jb, 4", "kaa, 0", "lbb, 4", "maaaa, 0", "nbbbb, 0",
	};

	EXPECT_EQ(expected_language, language);
	}

	} // namespace
	} // namespace net