blob: b2741f70b0465ce3b53d6f3f3afa333183cffdcc [file] [log] [blame]
// Copyright 2017 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/ukm/content/source_url_recorder.h"
#include <utility>
#include "base/containers/flat_map.h"
#include "base/macros.h"
#include "content/public/browser/navigation_handle.h"
#include "content/public/browser/web_contents.h"
#include "content/public/browser/web_contents_binding_set.h"
#include "content/public/browser/web_contents_observer.h"
#include "content/public/browser/web_contents_user_data.h"
#include "services/metrics/public/cpp/delegating_ukm_recorder.h"
#include "services/metrics/public/cpp/ukm_builders.h"
#include "services/metrics/public/cpp/ukm_source_id.h"
#include "third_party/blink/public/platform/ukm.mojom.h"
#include "url/gurl.h"
namespace ukm {
namespace internal {
int64_t CreateUniqueTabId() {
static int64_t unique_id_counter = 0;
return ++unique_id_counter;
}
// SourceUrlRecorderWebContentsObserver is responsible for recording UKM source
// URLs, for all (any only) main frame navigations in a given WebContents.
// SourceUrlRecorderWebContentsObserver records both the final URL for a
// navigation, and, if the navigation was redirected, the initial URL as well.
class SourceUrlRecorderWebContentsObserver
: public blink::mojom::UkmSourceIdFrameHost,
public content::WebContentsObserver,
public content::WebContentsUserData<
SourceUrlRecorderWebContentsObserver> {
public:
// Creates a SourceUrlRecorderWebContentsObserver for the given
// WebContents. If a SourceUrlRecorderWebContentsObserver is already
// associated with the WebContents, this method is a no-op.
static void CreateForWebContents(content::WebContents* web_contents);
// content::WebContentsObserver:
void DidStartNavigation(
content::NavigationHandle* navigation_handle) override;
void DidFinishNavigation(
content::NavigationHandle* navigation_handle) override;
void DidOpenRequestedURL(content::WebContents* new_contents,
content::RenderFrameHost* source_render_frame_host,
const GURL& url,
const content::Referrer& referrer,
WindowOpenDisposition disposition,
ui::PageTransition transition,
bool started_from_context_menu,
bool renderer_initiated) override;
ukm::SourceId GetLastCommittedSourceId() const;
// blink::mojom::UkmSourceIdFrameHost
void SetDocumentSourceId(int64_t source_id) override;
private:
explicit SourceUrlRecorderWebContentsObserver(
content::WebContents* web_contents);
friend class content::WebContentsUserData<
SourceUrlRecorderWebContentsObserver>;
// Record any pending DocumentCreated events to UKM.
void MaybeFlushPendingEvents();
void MaybeRecordUrl(content::NavigationHandle* navigation_handle,
const GURL& initial_url);
// Recieves document source IDs from the renderer.
content::WebContentsFrameBindingSet<blink::mojom::UkmSourceIdFrameHost>
bindings_;
// Map from navigation ID to the initial URL for that navigation.
base::flat_map<int64_t, GURL> pending_navigations_;
// Holds pending DocumentCreated events.
struct PendingEvent {
PendingEvent() = delete;
PendingEvent(int64_t source_id,
bool is_main_frame,
bool is_cross_origin_frame)
: source_id(source_id),
is_main_frame(is_main_frame),
is_cross_origin_frame(is_cross_origin_frame) {}
int64_t source_id;
bool is_main_frame;
bool is_cross_origin_frame;
};
std::vector<PendingEvent> pending_document_created_events_;
SourceId last_committed_source_id_;
// The source id before |last_committed_source_id_|.
SourceId previous_committed_source_id_;
// The source id of the last committed source in the tab that opened this tab.
// Will be set to kInvalidSourceId after the first navigation in this tab is
// finished.
SourceId opener_source_id_;
const int64_t tab_id_;
DISALLOW_COPY_AND_ASSIGN(SourceUrlRecorderWebContentsObserver);
};
SourceUrlRecorderWebContentsObserver::SourceUrlRecorderWebContentsObserver(
content::WebContents* web_contents)
: content::WebContentsObserver(web_contents),
bindings_(web_contents, this),
last_committed_source_id_(ukm::kInvalidSourceId),
previous_committed_source_id_(ukm::kInvalidSourceId),
opener_source_id_(ukm::kInvalidSourceId),
tab_id_(CreateUniqueTabId()) {}
void SourceUrlRecorderWebContentsObserver::DidStartNavigation(
content::NavigationHandle* navigation_handle) {
// UKM only records URLs for main frame (web page) navigations, so ignore
// non-main frame navs. Additionally, at least for the time being, we don't
// track metrics for same-document navigations (e.g. changes in URL fragment,
// or URL changes due to history.pushState) in UKM.
if (!navigation_handle->IsInMainFrame() ||
navigation_handle->IsSameDocument()) {
return;
}
// UKM doesn't want to record URLs for downloads. However, at the point a
// navigation is started, we don't yet know if the navigation will result in a
// download. Thus, we record the URL at the time a navigation was initiated,
// and only record it later, once we verify that the navigation didn't result
// in a download.
pending_navigations_.insert(std::make_pair(
navigation_handle->GetNavigationId(), navigation_handle->GetURL()));
// Clear any unassociated pending events.
pending_document_created_events_.clear();
}
void SourceUrlRecorderWebContentsObserver::DidFinishNavigation(
content::NavigationHandle* navigation_handle) {
auto it = pending_navigations_.find(navigation_handle->GetNavigationId());
if (it == pending_navigations_.end())
return;
DCHECK(navigation_handle->IsInMainFrame());
DCHECK(!navigation_handle->IsSameDocument());
if (navigation_handle->HasCommitted()) {
previous_committed_source_id_ = last_committed_source_id_;
last_committed_source_id_ = ukm::ConvertToSourceId(
navigation_handle->GetNavigationId(), ukm::SourceIdType::NAVIGATION_ID);
}
GURL initial_url = std::move(it->second);
pending_navigations_.erase(it);
// UKM doesn't want to record URLs for navigations that result in downloads.
if (navigation_handle->IsDownload())
return;
MaybeRecordUrl(navigation_handle, initial_url);
MaybeFlushPendingEvents();
// Reset the opener source id. Only the first source in a tab should have an
// opener.
opener_source_id_ = kInvalidSourceId;
}
void SourceUrlRecorderWebContentsObserver::DidOpenRequestedURL(
content::WebContents* new_contents,
content::RenderFrameHost* source_render_frame_host,
const GURL& url,
const content::Referrer& referrer,
WindowOpenDisposition disposition,
ui::PageTransition transition,
bool started_from_context_menu,
bool renderer_initiated) {
auto* new_recorder =
SourceUrlRecorderWebContentsObserver::FromWebContents(new_contents);
if (!new_recorder)
return;
new_recorder->opener_source_id_ = GetLastCommittedSourceId();
}
ukm::SourceId SourceUrlRecorderWebContentsObserver::GetLastCommittedSourceId()
const {
return last_committed_source_id_;
}
void SourceUrlRecorderWebContentsObserver::SetDocumentSourceId(
int64_t source_id) {
content::RenderFrameHost* main_frame = web_contents()->GetMainFrame();
content::RenderFrameHost* current_frame = bindings_.GetCurrentTargetFrame();
bool is_main_frame = main_frame == current_frame;
bool is_cross_origin_frame =
is_main_frame ? false
: !main_frame->GetLastCommittedOrigin().IsSameOriginWith(
current_frame->GetLastCommittedOrigin());
pending_document_created_events_.emplace_back(
source_id, !bindings_.GetCurrentTargetFrame()->GetParent(),
is_cross_origin_frame);
MaybeFlushPendingEvents();
}
void SourceUrlRecorderWebContentsObserver::MaybeFlushPendingEvents() {
if (!last_committed_source_id_)
return;
ukm::DelegatingUkmRecorder* ukm_recorder = ukm::DelegatingUkmRecorder::Get();
if (!ukm_recorder)
return;
while (!pending_document_created_events_.empty()) {
auto record = pending_document_created_events_.back();
ukm::builders::DocumentCreated(record.source_id)
.SetNavigationSourceId(last_committed_source_id_)
.SetIsMainFrame(record.is_main_frame)
.SetIsCrossOriginFrame(record.is_cross_origin_frame)
.Record(ukm_recorder);
pending_document_created_events_.pop_back();
}
}
void SourceUrlRecorderWebContentsObserver::MaybeRecordUrl(
content::NavigationHandle* navigation_handle,
const GURL& initial_url) {
DCHECK(navigation_handle->IsInMainFrame());
DCHECK(!navigation_handle->IsSameDocument());
ukm::DelegatingUkmRecorder* ukm_recorder = ukm::DelegatingUkmRecorder::Get();
if (!ukm_recorder)
return;
UkmSource::NavigationData navigation_data;
const GURL& final_url = navigation_handle->GetURL();
// TODO(crbug.com/869123): This check isn't quite correct, as self redirecting
// is possible. This may also be changed to include the entire redirect chain.
if (final_url != initial_url)
navigation_data.urls = {initial_url};
navigation_data.urls.push_back(final_url);
// Careful note: the current navigation may have failed.
navigation_data.previous_source_id = navigation_handle->HasCommitted()
? previous_committed_source_id_
: last_committed_source_id_;
navigation_data.opener_source_id = opener_source_id_;
navigation_data.tab_id = tab_id_;
const ukm::SourceId source_id = ukm::ConvertToSourceId(
navigation_handle->GetNavigationId(), ukm::SourceIdType::NAVIGATION_ID);
ukm_recorder->RecordNavigation(source_id, navigation_data);
}
// static
void SourceUrlRecorderWebContentsObserver::CreateForWebContents(
content::WebContents* web_contents) {
if (!SourceUrlRecorderWebContentsObserver::FromWebContents(web_contents)) {
web_contents->SetUserData(
SourceUrlRecorderWebContentsObserver::UserDataKey(),
base::WrapUnique(
new SourceUrlRecorderWebContentsObserver(web_contents)));
}
}
} // namespace internal
void InitializeSourceUrlRecorderForWebContents(
content::WebContents* web_contents) {
internal::SourceUrlRecorderWebContentsObserver::CreateForWebContents(
web_contents);
}
SourceId GetSourceIdForWebContentsDocument(
const content::WebContents* web_contents) {
const internal::SourceUrlRecorderWebContentsObserver* obs =
internal::SourceUrlRecorderWebContentsObserver::FromWebContents(
web_contents);
return obs ? obs->GetLastCommittedSourceId() : kInvalidSourceId;
}
} // namespace ukm