blob: f07804fcb7c2761b484546f4ea1af6805a828b62 [file] [log] [blame]
//===--- FileDistance.h - File proximity scoring -----------------*- C++-*-===//
// The LLVM Compiler Infrastructure
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
// This library measures the distance between file paths.
// It's used for ranking symbols, e.g. in code completion.
// |foo/bar.h -> foo/bar.h| = 0.
// |foo/bar.h -> foo/baz.h| < |foo/bar.h -> baz.h|.
// This is an edit-distance, where edits go up or down the directory tree.
// It's not symmetrical, the costs of going up and down may not match.
// Dealing with multiple sources:
// In practice we care about the distance from a source file, but files near
// its main-header and #included files are considered "close".
// So we start with a set of (anchor, cost) pairs, and call the distance to a
// path the minimum of `cost + |source -> path|`.
// We allow each source to limit the number of up-traversals paths may start
// with. Up-traversals may reach things that are not "semantically near".
// Symbol URI schemes:
// Symbol locations may be represented by URIs rather than file paths directly.
// In this case we want to perform distance computations in URI space rather
// than in file-space, without performing redundant conversions.
// Therefore we have a lookup structure that accepts URIs, so that intermediate
// calculations for the same scheme can be reused.
// Caveats:
// Assuming up and down traversals each have uniform costs is simplistic.
// Often there are "semantic roots" whose children are almost unrelated.
// (e.g. /usr/include/, or / in an umbrella repository). We ignore this.
#include "URI.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/StringSaver.h"
namespace clang {
namespace clangd {
struct FileDistanceOptions {
unsigned UpCost = 2; // |foo/bar.h -> foo|
unsigned DownCost = 1; // |foo -> foo/bar.h|
unsigned IncludeCost = 2; // | -> included_header.h|
struct SourceParams {
// Base cost for paths starting at this source.
unsigned Cost = 0;
// Limits the number of upwards traversals allowed from this source.
unsigned MaxUpTraversals = std::numeric_limits<unsigned>::max();
// Supports lookups to find the minimum distance to a file from any source.
// This object should be reused, it memoizes intermediate computations.
class FileDistance {
static constexpr unsigned kUnreachable = std::numeric_limits<unsigned>::max();
FileDistance(llvm::StringMap<SourceParams> Sources,
const FileDistanceOptions &Opts = {});
// Computes the minimum distance from any source to the file path.
unsigned distance(llvm::StringRef Path);
// Costs computed so far. Always contains sources and their ancestors.
// We store hash codes only. Collisions are rare and consequences aren't dire.
llvm::DenseMap<llvm::hash_code, unsigned> Cache;
FileDistanceOptions Opts;
// Supports lookups like FileDistance, but the lookup keys are URIs.
// We convert each of the sources to the scheme of the URI and do a FileDistance
// comparison on the bodies.
class URIDistance {
URIDistance(llvm::StringMap<SourceParams> Sources,
const FileDistanceOptions &Opts = {})
: Sources(Sources), Opts(Opts) {}
// Computes the minimum distance from any source to the URI.
// Only sources that can be mapped into the URI's scheme are considered.
unsigned distance(llvm::StringRef URI);
// Returns the FileDistance for a URI scheme, creating it if needed.
FileDistance &forScheme(llvm::StringRef Scheme);
// We cache the results using the original strings so we can skip URI parsing.
llvm::DenseMap<llvm::hash_code, unsigned> Cache;
llvm::StringMap<SourceParams> Sources;
llvm::StringMap<std::unique_ptr<FileDistance>> ByScheme;
FileDistanceOptions Opts;
} // namespace clangd
} // namespace clang