llvm-project
506 строк · 18.0 Кб
1//===--- FileIndex.cpp - Indexes for files. ------------------------ C++-*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "FileIndex.h"10#include "CollectMacros.h"11#include "ParsedAST.h"12#include "clang-include-cleaner/Record.h"13#include "index/Index.h"14#include "index/MemIndex.h"15#include "index/Merge.h"16#include "index/Ref.h"17#include "index/Relation.h"18#include "index/Serialization.h"19#include "index/Symbol.h"20#include "index/SymbolCollector.h"21#include "index/SymbolID.h"22#include "index/SymbolOrigin.h"23#include "index/dex/Dex.h"24#include "support/Logger.h"25#include "support/MemoryTree.h"26#include "support/Path.h"27#include "clang/AST/ASTContext.h"28#include "clang/Index/IndexingAction.h"29#include "clang/Index/IndexingOptions.h"30#include "clang/Lex/Preprocessor.h"31#include "llvm/ADT/DenseMap.h"32#include "llvm/ADT/STLExtras.h"33#include "llvm/ADT/StringMap.h"34#include "llvm/ADT/StringRef.h"35#include <algorithm>36#include <memory>37#include <optional>38#include <tuple>39#include <utility>40#include <vector>41
42namespace clang {43namespace clangd {44namespace {45
46SlabTuple indexSymbols(ASTContext &AST, Preprocessor &PP,47llvm::ArrayRef<Decl *> DeclsToIndex,48const MainFileMacros *MacroRefsToIndex,49const include_cleaner::PragmaIncludes &PI,50bool IsIndexMainAST, llvm::StringRef Version,51bool CollectMainFileRefs) {52SymbolCollector::Options CollectorOpts;53CollectorOpts.CollectIncludePath = true;54CollectorOpts.PragmaIncludes = &PI;55CollectorOpts.CountReferences = false;56CollectorOpts.Origin =57IsIndexMainAST ? SymbolOrigin::Open : SymbolOrigin::Preamble;58CollectorOpts.CollectMainFileRefs = CollectMainFileRefs;59// We want stdlib implementation details in the index only if we've opened the60// file in question. This does means xrefs won't work, though.61CollectorOpts.CollectReserved = IsIndexMainAST;62
63index::IndexingOptions IndexOpts;64// We only need declarations, because we don't count references.65IndexOpts.SystemSymbolFilter =66index::IndexingOptions::SystemSymbolFilterKind::DeclarationsOnly;67// We index function-local classes and its member functions only.68IndexOpts.IndexFunctionLocals = true;69if (IsIndexMainAST) {70// We only collect refs when indexing main AST.71CollectorOpts.RefFilter = RefKind::All;72// Comments for main file can always be obtained from sema, do not store73// them in the index.74CollectorOpts.StoreAllDocumentation = false;75} else {76IndexOpts.IndexMacrosInPreprocessor = true;77CollectorOpts.CollectMacro = true;78CollectorOpts.StoreAllDocumentation = true;79}80
81SymbolCollector Collector(std::move(CollectorOpts));82Collector.setPreprocessor(PP);83index::indexTopLevelDecls(AST, PP, DeclsToIndex, Collector, IndexOpts);84if (MacroRefsToIndex)85Collector.handleMacros(*MacroRefsToIndex);86
87const auto &SM = AST.getSourceManager();88const auto MainFileEntry = SM.getFileEntryRefForID(SM.getMainFileID());89std::string FileName =90std::string(MainFileEntry ? MainFileEntry->getName() : "");91
92auto Syms = Collector.takeSymbols();93auto Refs = Collector.takeRefs();94auto Relations = Collector.takeRelations();95
96vlog("indexed {0} AST for {1} version {2}:\n"97" symbol slab: {3} symbols, {4} bytes\n"98" ref slab: {5} symbols, {6} refs, {7} bytes\n"99" relations slab: {8} relations, {9} bytes",100IsIndexMainAST ? "file" : "preamble", FileName, Version, Syms.size(),101Syms.bytes(), Refs.size(), Refs.numRefs(), Refs.bytes(),102Relations.size(), Relations.bytes());103return std::make_tuple(std::move(Syms), std::move(Refs),104std::move(Relations));105}
106
107// We keep only the node "U" and its edges. Any node other than "U" will be
108// empty in the resultant graph.
109IncludeGraph getSubGraph(llvm::StringRef URI, const IncludeGraph &FullGraph) {110IncludeGraph IG;111
112auto Entry = IG.try_emplace(URI).first;113auto &Node = Entry->getValue();114Node = FullGraph.lookup(Entry->getKey());115Node.URI = Entry->getKey();116
117// URIs inside nodes must point into the keys of the same IncludeGraph.118for (auto &Include : Node.DirectIncludes) {119auto I = IG.try_emplace(Include).first;120I->getValue().URI = I->getKey();121Include = I->getKey();122}123return IG;124}
125} // namespace126
127FileShardedIndex::FileShardedIndex(IndexFileIn Input)128: Index(std::move(Input)) {129// Used to build RelationSlabs.130llvm::DenseMap<SymbolID, FileShard *> SymbolIDToFile;131
132// Attribute each Symbol to both their declaration and definition locations.133if (Index.Symbols) {134for (const auto &S : *Index.Symbols) {135auto It = Shards.try_emplace(S.CanonicalDeclaration.FileURI);136It.first->getValue().Symbols.insert(&S);137SymbolIDToFile[S.ID] = &It.first->getValue();138// Only bother if definition file is different than declaration file.139if (S.Definition &&140S.Definition.FileURI != S.CanonicalDeclaration.FileURI) {141auto It = Shards.try_emplace(S.Definition.FileURI);142It.first->getValue().Symbols.insert(&S);143}144}145}146// Attribute references into each file they occured in.147if (Index.Refs) {148for (const auto &SymRefs : *Index.Refs) {149for (const auto &R : SymRefs.second) {150const auto It = Shards.try_emplace(R.Location.FileURI);151It.first->getValue().Refs.insert(&R);152RefToSymID[&R] = SymRefs.first;153}154}155}156// The Subject and/or Object shards might be part of multiple TUs. In157// such cases there will be a race and the last TU to write the shard158// will win and all the other relations will be lost. To avoid this,159// we store relations in both shards. A race might still happen if the160// same translation unit produces different relations under different161// configurations, but that's something clangd doesn't handle in general.162if (Index.Relations) {163for (const auto &R : *Index.Relations) {164// FIXME: RelationSlab shouldn't contain dangling relations.165FileShard *SubjectFile = SymbolIDToFile.lookup(R.Subject);166FileShard *ObjectFile = SymbolIDToFile.lookup(R.Object);167if (SubjectFile)168SubjectFile->Relations.insert(&R);169if (ObjectFile && ObjectFile != SubjectFile)170ObjectFile->Relations.insert(&R);171}172}173// Store only the direct includes of a file in a shard.174if (Index.Sources) {175const auto &FullGraph = *Index.Sources;176for (const auto &It : FullGraph) {177auto ShardIt = Shards.try_emplace(It.first());178ShardIt.first->getValue().IG = getSubGraph(It.first(), FullGraph);179}180}181}
182std::vector<llvm::StringRef> FileShardedIndex::getAllSources() const {183// It should be enough to construct a vector with {Shards.keys().begin(),184// Shards.keys().end()} but MSVC fails to compile that.185std::vector<PathRef> Result;186Result.reserve(Shards.size());187for (auto Key : Shards.keys())188Result.push_back(Key);189return Result;190}
191
192std::optional<IndexFileIn>193FileShardedIndex::getShard(llvm::StringRef Uri) const {194auto It = Shards.find(Uri);195if (It == Shards.end())196return std::nullopt;197
198IndexFileIn IF;199IF.Sources = It->getValue().IG;200IF.Cmd = Index.Cmd;201
202SymbolSlab::Builder SymB;203for (const auto *S : It->getValue().Symbols)204SymB.insert(*S);205IF.Symbols = std::move(SymB).build();206
207RefSlab::Builder RefB;208for (const auto *Ref : It->getValue().Refs) {209auto SID = RefToSymID.lookup(Ref);210RefB.insert(SID, *Ref);211}212IF.Refs = std::move(RefB).build();213
214RelationSlab::Builder RelB;215for (const auto *Rel : It->getValue().Relations) {216RelB.insert(*Rel);217}218IF.Relations = std::move(RelB).build();219// Explicit move here is needed by some compilers.220return std::move(IF);221}
222
223SlabTuple indexMainDecls(ParsedAST &AST) {224return indexSymbols(225AST.getASTContext(), AST.getPreprocessor(), AST.getLocalTopLevelDecls(),226&AST.getMacros(), AST.getPragmaIncludes(),227/*IsIndexMainAST=*/true, AST.version(), /*CollectMainFileRefs=*/true);228}
229
230SlabTuple indexHeaderSymbols(llvm::StringRef Version, ASTContext &AST,231Preprocessor &PP,232const include_cleaner::PragmaIncludes &PI) {233std::vector<Decl *> DeclsToIndex(234AST.getTranslationUnitDecl()->decls().begin(),235AST.getTranslationUnitDecl()->decls().end());236return indexSymbols(AST, PP, DeclsToIndex,237/*MainFileMacros=*/nullptr, PI,238/*IsIndexMainAST=*/false, Version,239/*CollectMainFileRefs=*/false);240}
241
242FileSymbols::FileSymbols(IndexContents IdxContents)243: IdxContents(IdxContents) {}244
245void FileSymbols::update(llvm::StringRef Key,246std::unique_ptr<SymbolSlab> Symbols,247std::unique_ptr<RefSlab> Refs,248std::unique_ptr<RelationSlab> Relations,249bool CountReferences) {250std::lock_guard<std::mutex> Lock(Mutex);251++Version;252if (!Symbols)253SymbolsSnapshot.erase(Key);254else255SymbolsSnapshot[Key] = std::move(Symbols);256if (!Refs) {257RefsSnapshot.erase(Key);258} else {259RefSlabAndCountReferences Item;260Item.CountReferences = CountReferences;261Item.Slab = std::move(Refs);262RefsSnapshot[Key] = std::move(Item);263}264if (!Relations)265RelationsSnapshot.erase(Key);266else267RelationsSnapshot[Key] = std::move(Relations);268}
269
270std::unique_ptr<SymbolIndex>271FileSymbols::buildIndex(IndexType Type, DuplicateHandling DuplicateHandle,272size_t *Version) {273std::vector<std::shared_ptr<SymbolSlab>> SymbolSlabs;274std::vector<std::shared_ptr<RefSlab>> RefSlabs;275std::vector<std::shared_ptr<RelationSlab>> RelationSlabs;276llvm::StringSet<> Files;277std::vector<RefSlab *> MainFileRefs;278{279std::lock_guard<std::mutex> Lock(Mutex);280for (const auto &FileAndSymbols : SymbolsSnapshot) {281SymbolSlabs.push_back(FileAndSymbols.second);282Files.insert(FileAndSymbols.first());283}284for (const auto &FileAndRefs : RefsSnapshot) {285RefSlabs.push_back(FileAndRefs.second.Slab);286Files.insert(FileAndRefs.first());287if (FileAndRefs.second.CountReferences)288MainFileRefs.push_back(RefSlabs.back().get());289}290for (const auto &FileAndRelations : RelationsSnapshot) {291Files.insert(FileAndRelations.first());292RelationSlabs.push_back(FileAndRelations.second);293}294
295if (Version)296*Version = this->Version;297}298std::vector<const Symbol *> AllSymbols;299std::vector<Symbol> SymsStorage;300switch (DuplicateHandle) {301case DuplicateHandling::Merge: {302llvm::DenseMap<SymbolID, Symbol> Merged;303for (const auto &Slab : SymbolSlabs) {304for (const auto &Sym : *Slab) {305assert(Sym.References == 0 &&306"Symbol with non-zero references sent to FileSymbols");307auto I = Merged.try_emplace(Sym.ID, Sym);308if (!I.second)309I.first->second = mergeSymbol(I.first->second, Sym);310}311}312for (const RefSlab *Refs : MainFileRefs)313for (const auto &Sym : *Refs) {314auto It = Merged.find(Sym.first);315// This might happen while background-index is still running.316if (It == Merged.end())317continue;318It->getSecond().References += Sym.second.size();319}320SymsStorage.reserve(Merged.size());321for (auto &Sym : Merged) {322SymsStorage.push_back(std::move(Sym.second));323AllSymbols.push_back(&SymsStorage.back());324}325break;326}327case DuplicateHandling::PickOne: {328llvm::DenseSet<SymbolID> AddedSymbols;329for (const auto &Slab : SymbolSlabs)330for (const auto &Sym : *Slab) {331assert(Sym.References == 0 &&332"Symbol with non-zero references sent to FileSymbols");333if (AddedSymbols.insert(Sym.ID).second)334AllSymbols.push_back(&Sym);335}336break;337}338}339
340std::vector<Ref> RefsStorage; // Contiguous ranges for each SymbolID.341llvm::DenseMap<SymbolID, llvm::ArrayRef<Ref>> AllRefs;342{343llvm::DenseMap<SymbolID, llvm::SmallVector<Ref, 4>> MergedRefs;344size_t Count = 0;345for (const auto &RefSlab : RefSlabs)346for (const auto &Sym : *RefSlab) {347MergedRefs[Sym.first].append(Sym.second.begin(), Sym.second.end());348Count += Sym.second.size();349}350RefsStorage.reserve(Count);351AllRefs.reserve(MergedRefs.size());352for (auto &Sym : MergedRefs) {353auto &SymRefs = Sym.second;354// Sorting isn't required, but yields more stable results over rebuilds.355llvm::sort(SymRefs);356llvm::copy(SymRefs, back_inserter(RefsStorage));357AllRefs.try_emplace(358Sym.first,359llvm::ArrayRef<Ref>(&RefsStorage[RefsStorage.size() - SymRefs.size()],360SymRefs.size()));361}362}363
364std::vector<Relation> AllRelations;365for (const auto &RelationSlab : RelationSlabs) {366for (const auto &R : *RelationSlab)367AllRelations.push_back(R);368}369// Sort relations and remove duplicates that could arise due to370// relations being stored in both the shards containing their371// subject and object.372llvm::sort(AllRelations);373AllRelations.erase(std::unique(AllRelations.begin(), AllRelations.end()),374AllRelations.end());375
376size_t StorageSize =377RefsStorage.size() * sizeof(Ref) + SymsStorage.size() * sizeof(Symbol);378for (const auto &Slab : SymbolSlabs)379StorageSize += Slab->bytes();380for (const auto &RefSlab : RefSlabs)381StorageSize += RefSlab->bytes();382
383// Index must keep the slabs and contiguous ranges alive.384switch (Type) {385case IndexType::Light:386return std::make_unique<MemIndex>(387llvm::make_pointee_range(AllSymbols), std::move(AllRefs),388std::move(AllRelations), std::move(Files), IdxContents,389std::make_tuple(std::move(SymbolSlabs), std::move(RefSlabs),390std::move(RefsStorage), std::move(SymsStorage)),391StorageSize);392case IndexType::Heavy:393return std::make_unique<dex::Dex>(394llvm::make_pointee_range(AllSymbols), std::move(AllRefs),395std::move(AllRelations), std::move(Files), IdxContents,396std::make_tuple(std::move(SymbolSlabs), std::move(RefSlabs),397std::move(RefsStorage), std::move(SymsStorage)),398StorageSize);399}400llvm_unreachable("Unknown clangd::IndexType");401}
402
403void FileSymbols::profile(MemoryTree &MT) const {404std::lock_guard<std::mutex> Lock(Mutex);405for (const auto &SymSlab : SymbolsSnapshot) {406MT.detail(SymSlab.first())407.child("symbols")408.addUsage(SymSlab.second->bytes());409}410for (const auto &RefSlab : RefsSnapshot) {411MT.detail(RefSlab.first())412.child("references")413.addUsage(RefSlab.second.Slab->bytes());414}415for (const auto &RelSlab : RelationsSnapshot) {416MT.detail(RelSlab.first())417.child("relations")418.addUsage(RelSlab.second->bytes());419}420}
421
422FileIndex::FileIndex()423: MergedIndex(&MainFileIndex, &PreambleIndex),424PreambleSymbols(IndexContents::Symbols | IndexContents::Relations),425PreambleIndex(std::make_unique<MemIndex>()),426MainFileSymbols(IndexContents::All),427MainFileIndex(std::make_unique<MemIndex>()) {}428
429void FileIndex::updatePreamble(IndexFileIn IF) {430FileShardedIndex ShardedIndex(std::move(IF));431for (auto Uri : ShardedIndex.getAllSources()) {432auto IF = ShardedIndex.getShard(Uri);433// We are using the key received from ShardedIndex, so it should always434// exist.435assert(IF);436PreambleSymbols.update(437Uri, std::make_unique<SymbolSlab>(std::move(*IF->Symbols)),438std::make_unique<RefSlab>(),439std::make_unique<RelationSlab>(std::move(*IF->Relations)),440/*CountReferences=*/false);441}442size_t IndexVersion = 0;443auto NewIndex = PreambleSymbols.buildIndex(444IndexType::Heavy, DuplicateHandling::PickOne, &IndexVersion);445{446std::lock_guard<std::mutex> Lock(UpdateIndexMu);447if (IndexVersion <= PreambleIndexVersion) {448// We lost the race, some other thread built a later version.449return;450}451PreambleIndexVersion = IndexVersion;452PreambleIndex.reset(std::move(NewIndex));453vlog(454"Build dynamic index for header symbols with estimated memory usage of "455"{0} bytes",456PreambleIndex.estimateMemoryUsage());457}458}
459
460void FileIndex::updatePreamble(PathRef Path, llvm::StringRef Version,461ASTContext &AST, Preprocessor &PP,462const include_cleaner::PragmaIncludes &PI) {463IndexFileIn IF;464std::tie(IF.Symbols, std::ignore, IF.Relations) =465indexHeaderSymbols(Version, AST, PP, PI);466updatePreamble(std::move(IF));467}
468
469void FileIndex::updateMain(PathRef Path, ParsedAST &AST) {470auto Contents = indexMainDecls(AST);471MainFileSymbols.update(472URI::create(Path).toString(),473std::make_unique<SymbolSlab>(std::move(std::get<0>(Contents))),474std::make_unique<RefSlab>(std::move(std::get<1>(Contents))),475std::make_unique<RelationSlab>(std::move(std::get<2>(Contents))),476/*CountReferences=*/true);477size_t IndexVersion = 0;478auto NewIndex = MainFileSymbols.buildIndex(479IndexType::Light, DuplicateHandling::Merge, &IndexVersion);480{481std::lock_guard<std::mutex> Lock(UpdateIndexMu);482if (IndexVersion <= MainIndexVersion) {483// We lost the race, some other thread built a later version.484return;485}486MainIndexVersion = IndexVersion;487MainFileIndex.reset(std::move(NewIndex));488vlog(489"Build dynamic index for main-file symbols with estimated memory usage "490"of {0} bytes",491MainFileIndex.estimateMemoryUsage());492}493}
494
495void FileIndex::profile(MemoryTree &MT) const {496PreambleSymbols.profile(MT.child("preamble").child("slabs"));497MT.child("preamble")498.child("index")499.addUsage(PreambleIndex.estimateMemoryUsage());500MainFileSymbols.profile(MT.child("main_file").child("slabs"));501MT.child("main_file")502.child("index")503.addUsage(MainFileIndex.estimateMemoryUsage());504}
505} // namespace clangd506} // namespace clang507