| // Copyright 2017 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "components/variations/service/safe_seed_manager.h" |
| |
| #include "base/base_switches.h" |
| #include "base/command_line.h" |
| #include "base/metrics/histogram_functions.h" |
| #include "base/metrics/histogram_macros.h" |
| #include "base/numerics/ranges.h" |
| #include "components/prefs/pref_registry_simple.h" |
| #include "components/prefs/pref_service.h" |
| #include "components/variations/client_filterable_state.h" |
| #include "components/variations/pref_names.h" |
| #include "components/variations/variations_seed_store.h" |
| |
| namespace variations { |
| |
| // As of the time of this writing, January 2018, users at the 99.5th percentile, |
| // across all platforms, tend to experience fewer than 3 consecutive crashes: |
| // [1], [2], [3], [4]. Note, however, that this is less true for the less-stable |
| // channels on some platforms. |
| // [1] All platforms, stable channel (consistently stable): |
| // https://uma.googleplex.com/timeline_v2?sid=90ac80f4573249fb341a8e49501bfcfd |
| // [2] Most platforms, all channels (consistently stable other than occasional |
| // spikes on Canary): |
| // https://uma.googleplex.com/timeline_v2?sid=7af5ba1969db76689a401f982a1db539 |
| // [3] A less stable platform, all channels: |
| // https://uma.googleplex.com/timeline_v2?sid=07dbc8e4fa9f08e332fb609309a21882 |
| // [4] Another less stable platform, all channels: |
| // https://uma.googleplex.com/timeline_v2?sid=a7b529ef5d52863fae2d216e963c4cbc |
| // Overall, the only {platform, channel} combinations that spike above 3 |
| // consecutive crashes are ones with very few users, plus Canary. It's probably |
| // not realistic to avoid false positives for these less-stable configurations. |
| constexpr int kCrashStreakThreshold = 3; |
| |
| // Consecutive seed fetch failures are, unfortunately, a bit more common. As of |
| // January 2018, users at the 99.5th percentile tend to see fewer than 4 |
| // consecutive fetch failures on mobile platforms; and users at the 99th |
| // percentile tend to see fewer than 5 or 6 consecutive failures on desktop |
| // platforms. It makes sense that the characteristics differ on mobile |
| // vs. desktop platforms, given that the two use different scheduling algorithms |
| // for the fetches. Graphs: |
| // [1] Android, all channels (consistently connected): |
| // https://uma.googleplex.com/timeline_v2?sid=99d1d4c2490c60bcbde7afeb77c12a28 |
| // [2] High-connectivity platforms, Stable and Beta channel (consistently |
| // connected): |
| // https://uma.googleplex.com/timeline_v2?sid=2db5b7278dad41cbf349f5f2cb30efd9 |
| // [3] Other platforms, Stable and Beta channel (slightly less connected): |
| // https://uma.googleplex.com/timeline_v2?sid=d4ba2f3751d211898f8e69214147c2ec |
| // [4] All platforms, Dev (even less connected): |
| // https://uma.googleplex.com/timeline_v2?sid=5740fb22b17faa823822adfd8e00ec1a |
| // [5] All platforms, Canary (actually fairly well-connected!): |
| // https://uma.googleplex.com/timeline_v2?sid=3e14d3e4887792bb614db9f3f2c1d48c |
| // Note the all of the graphs show a spike on a particular day, presumably due |
| // to server-side instability. Moreover, the Dev channel on desktop is an |
| // outlier – users on the Dev channel can experience just shy of 9 consecutive |
| // failures on some platforms. |
| // Decision: There is not an obvious threshold that both achieves a low |
| // false-positive rate and provides good coverage for true positives. For now, |
| // set a threshold that should minimize false-positives. |
| // TODO(isherman): Check in with the networking team about their thoughts on how |
| // to find a better balance here. |
| constexpr int kFetchFailureStreakThreshold = 25; |
| |
| SafeSeedManager::SafeSeedManager(bool did_previous_session_exit_cleanly, |
| PrefService* local_state) |
| : local_state_(local_state) { |
| // Increment the crash streak if the previous session crashed. |
| // Note that the streak is not cleared if the previous run didn’t crash. |
| // Instead, it’s incremented on each crash until Chrome is able to |
| // successfully fetch a new seed. This way, a seed update that mostly |
| // destabilizes Chrome will still result in a fallback to safe mode. |
| int num_crashes = local_state->GetInteger(prefs::kVariationsCrashStreak); |
| if (!did_previous_session_exit_cleanly) { |
| ++num_crashes; |
| local_state->SetInteger(prefs::kVariationsCrashStreak, num_crashes); |
| } |
| |
| int num_failed_fetches = |
| local_state->GetInteger(prefs::kVariationsFailedToFetchSeedStreak); |
| base::UmaHistogramSparse("Variations.SafeMode.Streak.Crashes", |
| base::ClampToRange(num_crashes, 0, 100)); |
| base::UmaHistogramSparse("Variations.SafeMode.Streak.FetchFailures", |
| base::ClampToRange(num_failed_fetches, 0, 100)); |
| } |
| |
| SafeSeedManager::~SafeSeedManager() = default; |
| |
| // static |
| void SafeSeedManager::RegisterPrefs(PrefRegistrySimple* registry) { |
| // Prefs tracking failures along the way to fetching a seed. |
| registry->RegisterIntegerPref(prefs::kVariationsCrashStreak, 0); |
| registry->RegisterIntegerPref(prefs::kVariationsFailedToFetchSeedStreak, 0); |
| } |
| |
| bool SafeSeedManager::ShouldRunInSafeMode() const { |
| // Ignore any number of failures if the --force-fieldtrials flag is set. This |
| // flag is only used by developers, and there's no need to make the |
| // development process flakier. |
| if (base::CommandLine::ForCurrentProcess()->HasSwitch( |
| ::switches::kForceFieldTrials)) { |
| return false; |
| } |
| |
| int num_crashes = local_state_->GetInteger(prefs::kVariationsCrashStreak); |
| int num_failed_fetches = |
| local_state_->GetInteger(prefs::kVariationsFailedToFetchSeedStreak); |
| return num_crashes >= kCrashStreakThreshold || |
| num_failed_fetches >= kFetchFailureStreakThreshold; |
| } |
| |
| void SafeSeedManager::SetActiveSeedState( |
| const std::string& seed_data, |
| const std::string& base64_seed_signature, |
| std::unique_ptr<ClientFilterableState> client_filterable_state, |
| base::Time seed_fetch_time) { |
| DCHECK(!has_set_active_seed_state_); |
| has_set_active_seed_state_ = true; |
| |
| active_seed_state_ = std::make_unique<ActiveSeedState>( |
| seed_data, base64_seed_signature, std::move(client_filterable_state), |
| seed_fetch_time); |
| } |
| |
| void SafeSeedManager::RecordFetchStarted() { |
| // Pessimistically assume the fetch will fail. The failure streak will be |
| // reset upon success. |
| int num_failures_to_fetch = |
| local_state_->GetInteger(prefs::kVariationsFailedToFetchSeedStreak); |
| local_state_->SetInteger(prefs::kVariationsFailedToFetchSeedStreak, |
| num_failures_to_fetch + 1); |
| } |
| |
| void SafeSeedManager::RecordSuccessfulFetch(VariationsSeedStore* seed_store) { |
| // The first time a fetch succeeds for a given run of Chrome, save the active |
| // seed+filter configuration as safe. Note that it's sufficient to do this |
| // only on the first successful fetch because the active configuration does |
| // not change while Chrome is running. Also, note that it's fine to do this |
| // even if running in safe mode, as the saved seed in that case will just be |
| // the existing safe seed. |
| if (active_seed_state_) { |
| seed_store->StoreSafeSeed(active_seed_state_->seed_data, |
| active_seed_state_->base64_seed_signature, |
| *active_seed_state_->client_filterable_state, |
| active_seed_state_->seed_fetch_time); |
| |
| // The active seed state is only needed for the first time this code path is |
| // reached, so free up its memory once the data is no longer needed. |
| active_seed_state_.reset(); |
| } |
| |
| // Note: It's important to clear the crash streak as well as the fetch |
| // failures streak. Crashes that occur after a successful seed fetch do not |
| // prevent updating to a new seed, and therefore do not necessitate falling |
| // back to a safe seed. |
| local_state_->SetInteger(prefs::kVariationsCrashStreak, 0); |
| local_state_->SetInteger(prefs::kVariationsFailedToFetchSeedStreak, 0); |
| } |
| |
| SafeSeedManager::ActiveSeedState::ActiveSeedState( |
| const std::string& seed_data, |
| const std::string& base64_seed_signature, |
| std::unique_ptr<ClientFilterableState> client_filterable_state, |
| base::Time seed_fetch_time) |
| : seed_data(seed_data), |
| base64_seed_signature(base64_seed_signature), |
| client_filterable_state(std::move(client_filterable_state)), |
| seed_fetch_time(seed_fetch_time) {} |
| |
| SafeSeedManager::ActiveSeedState::~ActiveSeedState() = default; |
| |
| } // namespace variations |