|  | //===--- FileDistance.cpp - File contents container -------------*- C++ -*-===// | 
|  | // | 
|  | //                     The LLVM Compiler Infrastructure | 
|  | // | 
|  | // This file is distributed under the University of Illinois Open Source | 
|  | // License. See LICENSE.TXT for details. | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  | // | 
|  | // The FileDistance structure allows calculating the minimum distance to paths | 
|  | // in a single tree. | 
|  | // We simply walk up the path's ancestors until we find a node whose cost is | 
|  | // known, and add the cost of walking back down. Initialization ensures this | 
|  | // gives the correct path to the roots. | 
|  | // We cache the results, so that the runtime is O(|A|), where A is the set of | 
|  | // all distinct ancestors of visited paths. | 
|  | // | 
|  | // Example after initialization with /=2, /bar=0, DownCost = 1: | 
|  | //  / = 2 | 
|  | //    /bar = 0 | 
|  | // | 
|  | // After querying /foo/bar and /bar/foo: | 
|  | //  / = 2 | 
|  | //    /bar = 0 | 
|  | //      /bar/foo = 1 | 
|  | //    /foo = 3 | 
|  | //      /foo/bar = 4 | 
|  | // | 
|  | // URIDistance creates FileDistance lazily for each URI scheme encountered. In | 
|  | // practice this is a small constant factor. | 
|  | // | 
|  | //===-------------------------------------------------------------------------// | 
|  |  | 
|  | #include "FileDistance.h" | 
|  | #include "Logger.h" | 
|  | #include "llvm/ADT/STLExtras.h" | 
|  | #include <queue> | 
|  |  | 
|  | namespace clang { | 
|  | namespace clangd { | 
|  | using namespace llvm; | 
|  |  | 
|  | // Convert a path into the canonical form. | 
|  | // Canonical form is either "/", or "/segment" * N: | 
|  | //   C:\foo\bar --> /c:/foo/bar | 
|  | //   /foo/      --> /foo | 
|  | //   a/b/c      --> /a/b/c | 
|  | static SmallString<128> canonicalize(StringRef Path) { | 
|  | SmallString<128> Result = Path.rtrim('/'); | 
|  | native(Result, sys::path::Style::posix); | 
|  | if (Result.empty() || Result.front() != '/') | 
|  | Result.insert(Result.begin(), '/'); | 
|  | return Result; | 
|  | } | 
|  |  | 
|  | constexpr const unsigned FileDistance::kUnreachable; | 
|  |  | 
|  | FileDistance::FileDistance(StringMap<SourceParams> Sources, | 
|  | const FileDistanceOptions &Opts) | 
|  | : Opts(Opts) { | 
|  | llvm::DenseMap<hash_code, SmallVector<hash_code, 4>> DownEdges; | 
|  | // Compute the best distance following only up edges. | 
|  | // Keep track of down edges, in case we can use them to improve on this. | 
|  | for (const auto &S : Sources) { | 
|  | auto Canonical = canonicalize(S.getKey()); | 
|  | dlog("Source {0} = {1}, MaxUp = {2}", Canonical, S.second.Cost, | 
|  | S.second.MaxUpTraversals); | 
|  | // Walk up to ancestors of this source, assigning cost. | 
|  | StringRef Rest = Canonical; | 
|  | llvm::hash_code Hash = hash_value(Rest); | 
|  | for (unsigned I = 0; !Rest.empty(); ++I) { | 
|  | Rest = parent_path(Rest, sys::path::Style::posix); | 
|  | auto NextHash = hash_value(Rest); | 
|  | auto &Down = DownEdges[NextHash]; | 
|  | if (std::find(Down.begin(), Down.end(), Hash) == Down.end()) | 
|  | DownEdges[NextHash].push_back(Hash); | 
|  | // We can't just break after MaxUpTraversals, must still set DownEdges. | 
|  | if (I > S.getValue().MaxUpTraversals) { | 
|  | if (Cache.find(Hash) != Cache.end()) | 
|  | break; | 
|  | } else { | 
|  | unsigned Cost = S.getValue().Cost + I * Opts.UpCost; | 
|  | auto R = Cache.try_emplace(Hash, Cost); | 
|  | if (!R.second) { | 
|  | if (Cost < R.first->second) { | 
|  | R.first->second = Cost; | 
|  | } else { | 
|  | // If we're not the best way to get to this path, stop assigning. | 
|  | break; | 
|  | } | 
|  | } | 
|  | } | 
|  | Hash = NextHash; | 
|  | } | 
|  | } | 
|  | // Now propagate scores parent -> child if that's an improvement. | 
|  | // BFS ensures we propagate down chains (must visit parents before children). | 
|  | std::queue<hash_code> Next; | 
|  | for (auto Child : DownEdges.lookup(hash_value(llvm::StringRef("")))) | 
|  | Next.push(Child); | 
|  | while (!Next.empty()) { | 
|  | auto ParentCost = Cache.lookup(Next.front()); | 
|  | for (auto Child : DownEdges.lookup(Next.front())) { | 
|  | auto &ChildCost = | 
|  | Cache.try_emplace(Child, kUnreachable).first->getSecond(); | 
|  | if (ParentCost + Opts.DownCost < ChildCost) | 
|  | ChildCost = ParentCost + Opts.DownCost; | 
|  | Next.push(Child); | 
|  | } | 
|  | Next.pop(); | 
|  | } | 
|  | } | 
|  |  | 
|  | unsigned FileDistance::distance(StringRef Path) { | 
|  | auto Canonical = canonicalize(Path); | 
|  | unsigned Cost = kUnreachable; | 
|  | SmallVector<hash_code, 16> Ancestors; | 
|  | // Walk up ancestors until we find a path we know the distance for. | 
|  | for (StringRef Rest = Canonical; !Rest.empty(); | 
|  | Rest = parent_path(Rest, sys::path::Style::posix)) { | 
|  | auto Hash = hash_value(Rest); | 
|  | auto It = Cache.find(Hash); | 
|  | if (It != Cache.end()) { | 
|  | Cost = It->second; | 
|  | break; | 
|  | } | 
|  | Ancestors.push_back(Hash); | 
|  | } | 
|  | // Now we know the costs for (known node, queried node]. | 
|  | // Fill these in, walking down the directory tree. | 
|  | for (hash_code Hash : reverse(Ancestors)) { | 
|  | if (Cost != kUnreachable) | 
|  | Cost += Opts.DownCost; | 
|  | Cache.try_emplace(Hash, Cost); | 
|  | } | 
|  | dlog("distance({0} = {1})", Path, Cost); | 
|  | return Cost; | 
|  | } | 
|  |  | 
|  | unsigned URIDistance::distance(llvm::StringRef URI) { | 
|  | auto R = Cache.try_emplace(llvm::hash_value(URI), FileDistance::kUnreachable); | 
|  | if (!R.second) | 
|  | return R.first->getSecond(); | 
|  | if (auto U = clangd::URI::parse(URI)) { | 
|  | dlog("distance({0} = {1})", URI, U->body()); | 
|  | R.first->second = forScheme(U->scheme()).distance(U->body()); | 
|  | } else { | 
|  | log("URIDistance::distance() of unparseable {0}: {1}", URI, U.takeError()); | 
|  | } | 
|  | return R.first->second; | 
|  | } | 
|  |  | 
|  | FileDistance &URIDistance::forScheme(llvm::StringRef Scheme) { | 
|  | auto &Delegate = ByScheme[Scheme]; | 
|  | if (!Delegate) { | 
|  | llvm::StringMap<SourceParams> SchemeSources; | 
|  | for (const auto &Source : Sources) { | 
|  | if (auto U = clangd::URI::create(Source.getKey(), Scheme)) | 
|  | SchemeSources.try_emplace(U->body(), Source.getValue()); | 
|  | else | 
|  | consumeError(U.takeError()); | 
|  | } | 
|  | dlog("FileDistance for scheme {0}: {1}/{2} sources", Scheme, | 
|  | SchemeSources.size(), Sources.size()); | 
|  | Delegate.reset(new FileDistance(std::move(SchemeSources), Opts)); | 
|  | } | 
|  | return *Delegate; | 
|  | } | 
|  |  | 
|  | } // namespace clangd | 
|  | } // namespace clang |