| // Copyright 2012 The Chromium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "net/dns/dns_hosts.h" |
| |
| #include <string> |
| #include <utility> |
| |
| #include "base/check.h" |
| #include "base/files/file_path.h" |
| #include "base/files/file_util.h" |
| #include "base/metrics/histogram_functions.h" |
| #include "base/strings/string_piece.h" |
| #include "base/strings/string_util.h" |
| #include "base/trace_event/memory_usage_estimator.h" |
| #include "base/tracing_buildflags.h" |
| #include "build/build_config.h" |
| #include "net/base/cronet_buildflags.h" |
| #include "net/base/url_util.h" |
| #include "net/dns/dns_util.h" |
| #include "url/url_canon.h" |
| |
| using base::StringPiece; |
| |
| namespace net { |
| |
| namespace { |
| |
| // Parses the contents of a hosts file. Returns one token (IP or hostname) at |
| // a time. Doesn't copy anything; accepts the file as a StringPiece and |
| // returns tokens as StringPieces. |
| class HostsParser { |
| public: |
| explicit HostsParser(const StringPiece& text, ParseHostsCommaMode comma_mode) |
| : text_(text), |
| data_(text.data()), |
| end_(text.size()), |
| comma_mode_(comma_mode) {} |
| |
| HostsParser(const HostsParser&) = delete; |
| HostsParser& operator=(const HostsParser&) = delete; |
| |
| // Advances to the next token (IP or hostname). Returns whether another |
| // token was available. |token_is_ip| and |token| can be used to find out |
| // the type and text of the token. |
| bool Advance() { |
| bool next_is_ip = (pos_ == 0); |
| while (pos_ < end_ && pos_ != std::string::npos) { |
| switch (text_[pos_]) { |
| case ' ': |
| case '\t': |
| SkipWhitespace(); |
| break; |
| |
| case '\r': |
| case '\n': |
| next_is_ip = true; |
| pos_++; |
| break; |
| |
| case '#': |
| SkipRestOfLine(); |
| break; |
| |
| case ',': |
| if (comma_mode_ == PARSE_HOSTS_COMMA_IS_WHITESPACE) { |
| SkipWhitespace(); |
| break; |
| } |
| |
| // If comma_mode_ is COMMA_IS_TOKEN, fall through: |
| [[fallthrough]]; |
| |
| default: { |
| size_t token_start = pos_; |
| SkipToken(); |
| size_t token_end = (pos_ == std::string::npos) ? end_ : pos_; |
| |
| token_ = StringPiece(data_ + token_start, token_end - token_start); |
| token_is_ip_ = next_is_ip; |
| |
| return true; |
| } |
| } |
| } |
| |
| return false; |
| } |
| |
| // Fast-forwards the parser to the next line. Should be called if an IP |
| // address doesn't parse, to avoid wasting time tokenizing hostnames that |
| // will be ignored. |
| void SkipRestOfLine() { pos_ = text_.find("\n", pos_); } |
| |
| // Returns whether the last-parsed token is an IP address (true) or a |
| // hostname (false). |
| bool token_is_ip() { return token_is_ip_; } |
| |
| // Returns the text of the last-parsed token as a StringPiece referencing |
| // the same underlying memory as the StringPiece passed to the constructor. |
| // Returns an empty StringPiece if no token has been parsed or the end of |
| // the input string has been reached. |
| const StringPiece& token() { return token_; } |
| |
| private: |
| void SkipToken() { |
| switch (comma_mode_) { |
| case PARSE_HOSTS_COMMA_IS_TOKEN: |
| pos_ = text_.find_first_of(" \t\n\r#", pos_); |
| break; |
| case PARSE_HOSTS_COMMA_IS_WHITESPACE: |
| pos_ = text_.find_first_of(" ,\t\n\r#", pos_); |
| break; |
| } |
| } |
| |
| void SkipWhitespace() { |
| switch (comma_mode_) { |
| case PARSE_HOSTS_COMMA_IS_TOKEN: |
| pos_ = text_.find_first_not_of(" \t", pos_); |
| break; |
| case PARSE_HOSTS_COMMA_IS_WHITESPACE: |
| pos_ = text_.find_first_not_of(" ,\t", pos_); |
| break; |
| } |
| } |
| |
| const StringPiece text_; |
| const char* data_; |
| const size_t end_; |
| |
| size_t pos_ = 0; |
| StringPiece token_; |
| bool token_is_ip_ = false; |
| |
| const ParseHostsCommaMode comma_mode_; |
| }; |
| |
| void ParseHostsWithCommaMode(const std::string& contents, |
| DnsHosts* dns_hosts, |
| ParseHostsCommaMode comma_mode) { |
| CHECK(dns_hosts); |
| |
| StringPiece ip_text; |
| IPAddress ip; |
| AddressFamily family = ADDRESS_FAMILY_IPV4; |
| HostsParser parser(contents, comma_mode); |
| while (parser.Advance()) { |
| if (parser.token_is_ip()) { |
| StringPiece new_ip_text = parser.token(); |
| // Some ad-blocking hosts files contain thousands of entries pointing to |
| // the same IP address (usually 127.0.0.1). Don't bother parsing the IP |
| // again if it's the same as the one above it. |
| if (new_ip_text != ip_text) { |
| IPAddress new_ip; |
| if (new_ip.AssignFromIPLiteral(parser.token())) { |
| ip_text = new_ip_text; |
| ip = new_ip; |
| family = (ip.IsIPv4()) ? ADDRESS_FAMILY_IPV4 : ADDRESS_FAMILY_IPV6; |
| } else { |
| parser.SkipRestOfLine(); |
| } |
| } |
| } else { |
| url::CanonHostInfo canonicalization_info; |
| std::string canonicalized_host = |
| CanonicalizeHost(parser.token(), &canonicalization_info); |
| |
| // Skip if token is invalid for host canonicalization, or if it |
| // canonicalizes as an IP address. |
| if (canonicalization_info.family != url::CanonHostInfo::NEUTRAL) |
| continue; |
| |
| DnsHostsKey key(std::move(canonicalized_host), family); |
| if (!IsCanonicalizedHostCompliant(key.first)) |
| continue; |
| IPAddress* mapped_ip = &(*dns_hosts)[key]; |
| if (mapped_ip->empty()) |
| *mapped_ip = ip; |
| // else ignore this entry (first hit counts) |
| } |
| } |
| } |
| |
| } // namespace |
| |
| void ParseHostsWithCommaModeForTesting(const std::string& contents, |
| DnsHosts* dns_hosts, |
| ParseHostsCommaMode comma_mode) { |
| ParseHostsWithCommaMode(contents, dns_hosts, comma_mode); |
| } |
| |
| void ParseHosts(const std::string& contents, DnsHosts* dns_hosts) { |
| ParseHostsCommaMode comma_mode; |
| #if BUILDFLAG(IS_APPLE) |
| // Mac OS X allows commas to separate hostnames. |
| comma_mode = PARSE_HOSTS_COMMA_IS_WHITESPACE; |
| #else |
| // Linux allows commas in hostnames. |
| comma_mode = PARSE_HOSTS_COMMA_IS_TOKEN; |
| #endif |
| |
| ParseHostsWithCommaMode(contents, dns_hosts, comma_mode); |
| |
| // TODO(crbug.com/1377305): Remove this when we have enough data. |
| base::UmaHistogramCounts100000("Net.DNS.DnsHosts.Count", dns_hosts->size()); |
| |
| #if BUILDFLAG(ENABLE_BASE_TRACING) |
| // Cronet disables tracing and doesn't provide an implementation of |
| // base::trace_event::EstimateMemoryUsage for DnsHosts. Having this |
| // conditional is preferred over a fake implementation to avoid reporting fake |
| // metrics. |
| base::UmaHistogramMemoryKB( |
| "Net.DNS.DnsHosts.EstimateMemoryUsage", |
| base::trace_event::EstimateMemoryUsage(*dns_hosts)); |
| #endif // !BUILDFLAG(CRONET_BUILD) |
| } |
| |
| DnsHostsParser::~DnsHostsParser() = default; |
| |
| DnsHostsFileParser::DnsHostsFileParser(base::FilePath hosts_file_path) |
| : hosts_file_path_(std::move(hosts_file_path)) {} |
| |
| DnsHostsFileParser::~DnsHostsFileParser() = default; |
| |
| bool DnsHostsFileParser::ParseHosts(DnsHosts* dns_hosts) const { |
| dns_hosts->clear(); |
| // Missing file indicates empty HOSTS. |
| if (!base::PathExists(hosts_file_path_)) |
| return true; |
| |
| int64_t size; |
| if (!base::GetFileSize(hosts_file_path_, &size)) |
| return false; |
| |
| // Reject HOSTS files larger than |kMaxHostsSize| bytes. |
| const int64_t kMaxHostsSize = 1 << 25; // 32MB |
| |
| // TODO(crbug.com/1377305): Remove this when we have enough data. |
| base::UmaHistogramCustomCounts("Net.DNS.DnsHosts.FileSize", size, 1, |
| kMaxHostsSize * 2, 50); |
| if (size > kMaxHostsSize) |
| return false; |
| |
| std::string contents; |
| if (!base::ReadFileToString(hosts_file_path_, &contents)) |
| return false; |
| |
| net::ParseHosts(contents, dns_hosts); |
| return true; |
| } |
| |
| } // namespace net |