llvm-project
1121 строка · 43.2 Кб
1//===--- SymbolCollector.cpp -------------------------------------*- C++-*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "SymbolCollector.h"10#include "AST.h"11#include "CodeComplete.h"12#include "CodeCompletionStrings.h"13#include "ExpectedTypes.h"14#include "SourceCode.h"15#include "URI.h"16#include "clang-include-cleaner/Analysis.h"17#include "clang-include-cleaner/IncludeSpeller.h"18#include "clang-include-cleaner/Record.h"19#include "clang-include-cleaner/Types.h"20#include "index/CanonicalIncludes.h"21#include "index/Relation.h"22#include "index/Symbol.h"23#include "index/SymbolID.h"24#include "index/SymbolLocation.h"25#include "clang/AST/Decl.h"26#include "clang/AST/DeclBase.h"27#include "clang/AST/DeclObjC.h"28#include "clang/AST/DeclTemplate.h"29#include "clang/AST/DeclarationName.h"30#include "clang/AST/Expr.h"31#include "clang/Basic/FileEntry.h"32#include "clang/Basic/LangOptions.h"33#include "clang/Basic/SourceLocation.h"34#include "clang/Basic/SourceManager.h"35#include "clang/Index/IndexSymbol.h"36#include "clang/Lex/Preprocessor.h"37#include "clang/Lex/Token.h"38#include "clang/Tooling/Inclusions/HeaderAnalysis.h"39#include "clang/Tooling/Inclusions/StandardLibrary.h"40#include "llvm/ADT/ArrayRef.h"41#include "llvm/ADT/DenseMap.h"42#include "llvm/ADT/SmallVector.h"43#include "llvm/ADT/StringRef.h"44#include "llvm/Support/ErrorHandling.h"45#include "llvm/Support/FileSystem.h"46#include "llvm/Support/Path.h"47#include <cassert>48#include <memory>49#include <optional>50#include <string>51#include <utility>52
53namespace clang {54namespace clangd {55namespace {56
57/// If \p ND is a template specialization, returns the described template.
58/// Otherwise, returns \p ND.
59const NamedDecl &getTemplateOrThis(const NamedDecl &ND) {60if (auto *T = ND.getDescribedTemplate())61return *T;62return ND;63}
64
65// Checks whether the decl is a private symbol in a header generated by
66// protobuf compiler.
67// FIXME: make filtering extensible when there are more use cases for symbol
68// filters.
69bool isPrivateProtoDecl(const NamedDecl &ND) {70const auto &SM = ND.getASTContext().getSourceManager();71if (!isProtoFile(nameLocation(ND, SM), SM))72return false;73
74// ND without identifier can be operators.75if (ND.getIdentifier() == nullptr)76return false;77auto Name = ND.getIdentifier()->getName();78if (!Name.contains('_'))79return false;80// Nested proto entities (e.g. Message::Nested) have top-level decls81// that shouldn't be used (Message_Nested). Ignore them completely.82// The nested entities are dangling type aliases, we may want to reconsider83// including them in the future.84// For enum constants, SOME_ENUM_CONSTANT is not private and should be85// indexed. Outer_INNER is private. This heuristic relies on naming style, it86// will include OUTER_INNER and exclude some_enum_constant.87// FIXME: the heuristic relies on naming style (i.e. no underscore in88// user-defined names) and can be improved.89return (ND.getKind() != Decl::EnumConstant) || llvm::any_of(Name, islower);90}
91
92// We only collect #include paths for symbols that are suitable for global code
93// completion, except for namespaces since #include path for a namespace is hard
94// to define.
95Symbol::IncludeDirective shouldCollectIncludePath(index::SymbolKind Kind) {96using SK = index::SymbolKind;97switch (Kind) {98case SK::Macro:99case SK::Enum:100case SK::Struct:101case SK::Class:102case SK::Union:103case SK::TypeAlias:104case SK::Using:105case SK::Function:106case SK::Variable:107case SK::EnumConstant:108case SK::Concept:109return Symbol::Include | Symbol::Import;110case SK::Protocol:111return Symbol::Import;112default:113return Symbol::Invalid;114}115}
116
117// Return the symbol range of the token at \p TokLoc.
118std::pair<SymbolLocation::Position, SymbolLocation::Position>119getTokenRange(SourceLocation TokLoc, const SourceManager &SM,120const LangOptions &LangOpts) {121auto CreatePosition = [&SM](SourceLocation Loc) {122auto LSPLoc = sourceLocToPosition(SM, Loc);123SymbolLocation::Position Pos;124Pos.setLine(LSPLoc.line);125Pos.setColumn(LSPLoc.character);126return Pos;127};128
129auto TokenLength = clang::Lexer::MeasureTokenLength(TokLoc, SM, LangOpts);130return {CreatePosition(TokLoc),131CreatePosition(TokLoc.getLocWithOffset(TokenLength))};132}
133
134// Checks whether \p ND is a good candidate to be the *canonical* declaration of
135// its symbol (e.g. a go-to-declaration target). This overrides the default of
136// using Clang's canonical declaration, which is the first in the TU.
137//
138// Example: preferring a class declaration over its forward declaration.
139bool isPreferredDeclaration(const NamedDecl &ND, index::SymbolRoleSet Roles) {140const auto &SM = ND.getASTContext().getSourceManager();141if (isa<TagDecl>(ND))142return (Roles & static_cast<unsigned>(index::SymbolRole::Definition)) &&143!isInsideMainFile(ND.getLocation(), SM);144if (const auto *ID = dyn_cast<ObjCInterfaceDecl>(&ND))145return ID->isThisDeclarationADefinition();146if (const auto *PD = dyn_cast<ObjCProtocolDecl>(&ND))147return PD->isThisDeclarationADefinition();148return false;149}
150
151RefKind toRefKind(index::SymbolRoleSet Roles, bool Spelled = false) {152RefKind Result = RefKind::Unknown;153if (Roles & static_cast<unsigned>(index::SymbolRole::Declaration))154Result |= RefKind::Declaration;155if (Roles & static_cast<unsigned>(index::SymbolRole::Definition))156Result |= RefKind::Definition;157if (Roles & static_cast<unsigned>(index::SymbolRole::Reference))158Result |= RefKind::Reference;159if (Spelled)160Result |= RefKind::Spelled;161return Result;162}
163
164std::optional<RelationKind> indexableRelation(const index::SymbolRelation &R) {165if (R.Roles & static_cast<unsigned>(index::SymbolRole::RelationBaseOf))166return RelationKind::BaseOf;167if (R.Roles & static_cast<unsigned>(index::SymbolRole::RelationOverrideOf))168return RelationKind::OverriddenBy;169return std::nullopt;170}
171
172// Check if there is an exact spelling of \p ND at \p Loc.
173bool isSpelled(SourceLocation Loc, const NamedDecl &ND) {174auto Name = ND.getDeclName();175const auto NameKind = Name.getNameKind();176if (NameKind != DeclarationName::Identifier &&177NameKind != DeclarationName::CXXConstructorName &&178NameKind != DeclarationName::ObjCZeroArgSelector &&179NameKind != DeclarationName::ObjCOneArgSelector &&180NameKind != DeclarationName::ObjCMultiArgSelector)181return false;182const auto &AST = ND.getASTContext();183const auto &SM = AST.getSourceManager();184const auto &LO = AST.getLangOpts();185clang::Token Tok;186if (clang::Lexer::getRawToken(Loc, Tok, SM, LO))187return false;188auto TokSpelling = clang::Lexer::getSpelling(Tok, SM, LO);189if (const auto *MD = dyn_cast<ObjCMethodDecl>(&ND))190return TokSpelling == MD->getSelector().getNameForSlot(0);191return TokSpelling == Name.getAsString();192}
193} // namespace194
195// Encapsulates decisions about how to record header paths in the index,
196// including filename normalization, URI conversion etc.
197// Expensive checks are cached internally.
198class SymbolCollector::HeaderFileURICache {199struct FrameworkUmbrellaSpelling {200// Spelling for the public umbrella header, e.g. <Foundation/Foundation.h>201std::optional<std::string> PublicHeader;202// Spelling for the private umbrella header, e.g.203// <Foundation/Foundation_Private.h>204std::optional<std::string> PrivateHeader;205};206// Weird double-indirect access to PP, which might not be ready yet when207// HeaderFiles is created but will be by the time it's used.208// (IndexDataConsumer::setPreprocessor can happen before or after initialize)209Preprocessor *&PP;210const SourceManager &SM;211const include_cleaner::PragmaIncludes *PI;212llvm::StringRef FallbackDir;213llvm::DenseMap<const FileEntry *, const std::string *> CacheFEToURI;214llvm::StringMap<std::string> CachePathToURI;215llvm::DenseMap<FileID, llvm::StringRef> CacheFIDToInclude;216llvm::StringMap<std::string> CachePathToFrameworkSpelling;217llvm::StringMap<FrameworkUmbrellaSpelling>218CacheFrameworkToUmbrellaHeaderSpelling;219
220public:221HeaderFileURICache(Preprocessor *&PP, const SourceManager &SM,222const SymbolCollector::Options &Opts)223: PP(PP), SM(SM), PI(Opts.PragmaIncludes), FallbackDir(Opts.FallbackDir) {224}225
226// Returns a canonical URI for the file \p FE.227// We attempt to make the path absolute first.228const std::string &toURI(const FileEntryRef FE) {229auto R = CacheFEToURI.try_emplace(FE);230if (R.second) {231auto CanonPath = getCanonicalPath(FE, SM.getFileManager());232R.first->second = &toURIInternal(CanonPath ? *CanonPath : FE.getName());233}234return *R.first->second;235}236
237// Returns a canonical URI for \p Path.238// If the file is in the FileManager, use that to canonicalize the path.239// We attempt to make the path absolute in any case.240const std::string &toURI(llvm::StringRef Path) {241if (auto File = SM.getFileManager().getFileRef(Path))242return toURI(*File);243return toURIInternal(Path);244}245
246// Gets a canonical include (URI of the header or <header> or "header") for247// header of \p FID (which should usually be the *expansion* file).248// This does not account for any per-symbol overrides!249// Returns "" if includes should not be inserted for this file.250llvm::StringRef getIncludeHeader(FileID FID) {251auto R = CacheFIDToInclude.try_emplace(FID);252if (R.second)253R.first->second = getIncludeHeaderUncached(FID);254return R.first->second;255}256
257// If a file is mapped by canonical headers, use that mapping, regardless258// of whether it's an otherwise-good header (header guards etc).259llvm::StringRef mapCanonical(llvm::StringRef HeaderPath) {260if (!PP)261return "";262// Populate the system header mapping as late as possible to263// ensure the preprocessor has been set already.264CanonicalIncludes SysHeaderMapping;265SysHeaderMapping.addSystemHeadersMapping(PP->getLangOpts());266auto Canonical = SysHeaderMapping.mapHeader(HeaderPath);267if (Canonical.empty())268return "";269// If we had a mapping, always use it.270assert(Canonical.starts_with("<") || Canonical.starts_with("\""));271return Canonical;272}273
274private:275// This takes care of making paths absolute and path->URI caching, but no276// FileManager-based canonicalization.277const std::string &toURIInternal(llvm::StringRef Path) {278auto R = CachePathToURI.try_emplace(Path);279if (R.second) {280llvm::SmallString<256> AbsPath = Path;281if (!llvm::sys::path::is_absolute(AbsPath) && !FallbackDir.empty())282llvm::sys::fs::make_absolute(FallbackDir, AbsPath);283assert(llvm::sys::path::is_absolute(AbsPath) &&284"If the VFS can't make paths absolute, a FallbackDir must be "285"provided");286llvm::sys::path::remove_dots(AbsPath, /*remove_dot_dot=*/true);287R.first->second = URI::create(AbsPath).toString();288}289return R.first->second;290}291
292struct FrameworkHeaderPath {293// Path to the framework directory containing the Headers/PrivateHeaders294// directories e.g. /Frameworks/Foundation.framework/295llvm::StringRef HeadersParentDir;296// Subpath relative to the Headers or PrivateHeaders dir, e.g. NSObject.h297// Note: This is NOT relative to the `HeadersParentDir`.298llvm::StringRef HeaderSubpath;299// Whether this header is under the PrivateHeaders dir300bool IsPrivateHeader;301};302
303std::optional<FrameworkHeaderPath>304splitFrameworkHeaderPath(llvm::StringRef Path) {305using namespace llvm::sys;306path::reverse_iterator I = path::rbegin(Path);307path::reverse_iterator Prev = I;308path::reverse_iterator E = path::rend(Path);309while (I != E) {310if (*I == "Headers") {311FrameworkHeaderPath HeaderPath;312HeaderPath.HeadersParentDir = Path.substr(0, I - E);313HeaderPath.HeaderSubpath = Path.substr(Prev - E);314HeaderPath.IsPrivateHeader = false;315return HeaderPath;316}317if (*I == "PrivateHeaders") {318FrameworkHeaderPath HeaderPath;319HeaderPath.HeadersParentDir = Path.substr(0, I - E);320HeaderPath.HeaderSubpath = Path.substr(Prev - E);321HeaderPath.IsPrivateHeader = true;322return HeaderPath;323}324Prev = I;325++I;326}327// Unexpected, must not be a framework header.328return std::nullopt;329}330
331// Frameworks typically have an umbrella header of the same name, e.g.332// <Foundation/Foundation.h> instead of <Foundation/NSObject.h> or333// <Foundation/Foundation_Private.h> instead of334// <Foundation/NSObject_Private.h> which should be used instead of directly335// importing the header.336std::optional<std::string>337getFrameworkUmbrellaSpelling(llvm::StringRef Framework,338const HeaderSearch &HS,339FrameworkHeaderPath &HeaderPath) {340auto Res = CacheFrameworkToUmbrellaHeaderSpelling.try_emplace(Framework);341auto *CachedSpelling = &Res.first->second;342if (!Res.second) {343return HeaderPath.IsPrivateHeader ? CachedSpelling->PrivateHeader344: CachedSpelling->PublicHeader;345}346SmallString<256> UmbrellaPath(HeaderPath.HeadersParentDir);347llvm::sys::path::append(UmbrellaPath, "Headers", Framework + ".h");348
349llvm::vfs::Status Status;350auto StatErr = HS.getFileMgr().getNoncachedStatValue(UmbrellaPath, Status);351if (!StatErr)352CachedSpelling->PublicHeader = llvm::formatv("<{0}/{0}.h>", Framework);353
354UmbrellaPath = HeaderPath.HeadersParentDir;355llvm::sys::path::append(UmbrellaPath, "PrivateHeaders",356Framework + "_Private.h");357
358StatErr = HS.getFileMgr().getNoncachedStatValue(UmbrellaPath, Status);359if (!StatErr)360CachedSpelling->PrivateHeader =361llvm::formatv("<{0}/{0}_Private.h>", Framework);362
363return HeaderPath.IsPrivateHeader ? CachedSpelling->PrivateHeader364: CachedSpelling->PublicHeader;365}366
367// Compute the framework include spelling for `FE` which is in a framework368// named `Framework`, e.g. `NSObject.h` in framework `Foundation` would369// give <Foundation/Foundation.h> if the umbrella header exists, otherwise370// <Foundation/NSObject.h>.371std::optional<llvm::StringRef>372getFrameworkHeaderIncludeSpelling(FileEntryRef FE, llvm::StringRef Framework,373HeaderSearch &HS) {374auto Res = CachePathToFrameworkSpelling.try_emplace(FE.getName());375auto *CachedHeaderSpelling = &Res.first->second;376if (!Res.second)377return llvm::StringRef(*CachedHeaderSpelling);378
379auto HeaderPath = splitFrameworkHeaderPath(FE.getName());380if (!HeaderPath) {381// Unexpected: must not be a proper framework header, don't cache the382// failure.383CachePathToFrameworkSpelling.erase(Res.first);384return std::nullopt;385}386if (auto UmbrellaSpelling =387getFrameworkUmbrellaSpelling(Framework, HS, *HeaderPath)) {388*CachedHeaderSpelling = *UmbrellaSpelling;389return llvm::StringRef(*CachedHeaderSpelling);390}391
392*CachedHeaderSpelling =393llvm::formatv("<{0}/{1}>", Framework, HeaderPath->HeaderSubpath).str();394return llvm::StringRef(*CachedHeaderSpelling);395}396
397llvm::StringRef getIncludeHeaderUncached(FileID FID) {398const auto FE = SM.getFileEntryRefForID(FID);399if (!FE || FE->getName().empty())400return "";401
402if (auto Verbatim = PI->getPublic(*FE); !Verbatim.empty())403return Verbatim;404
405llvm::StringRef Filename = FE->getName();406if (auto Canonical = mapCanonical(Filename); !Canonical.empty())407return Canonical;408
409// Framework headers are spelled as <FrameworkName/Foo.h>, not410// "path/FrameworkName.framework/Headers/Foo.h".411auto &HS = PP->getHeaderSearchInfo();412if (const auto *HFI = HS.getExistingFileInfo(*FE))413if (!HFI->Framework.empty())414if (auto Spelling =415getFrameworkHeaderIncludeSpelling(*FE, HFI->Framework, HS))416return *Spelling;417
418if (!tooling::isSelfContainedHeader(*FE, PP->getSourceManager(),419PP->getHeaderSearchInfo())) {420// A .inc or .def file is often included into a real header to define421// symbols (e.g. LLVM tablegen files).422if (Filename.ends_with(".inc") || Filename.ends_with(".def"))423// Don't use cache reentrantly due to iterator invalidation.424return getIncludeHeaderUncached(SM.getFileID(SM.getIncludeLoc(FID)));425// Conservatively refuse to insert #includes to files without guards.426return "";427}428// Standard case: just insert the file itself.429return toURI(*FE);430}431};432
433// Return the symbol location of the token at \p TokLoc.
434std::optional<SymbolLocation>435SymbolCollector::getTokenLocation(SourceLocation TokLoc) {436const auto &SM = ASTCtx->getSourceManager();437const auto FE = SM.getFileEntryRefForID(SM.getFileID(TokLoc));438if (!FE)439return std::nullopt;440
441SymbolLocation Result;442Result.FileURI = HeaderFileURIs->toURI(*FE).c_str();443auto Range = getTokenRange(TokLoc, SM, ASTCtx->getLangOpts());444Result.Start = Range.first;445Result.End = Range.second;446
447return Result;448}
449
450SymbolCollector::SymbolCollector(Options Opts) : Opts(std::move(Opts)) {}451SymbolCollector::~SymbolCollector() = default;452
453void SymbolCollector::initialize(ASTContext &Ctx) {454ASTCtx = &Ctx;455HeaderFileURIs = std::make_unique<HeaderFileURICache>(456this->PP, ASTCtx->getSourceManager(), Opts);457CompletionAllocator = std::make_shared<GlobalCodeCompletionAllocator>();458CompletionTUInfo =459std::make_unique<CodeCompletionTUInfo>(CompletionAllocator);460}
461
462bool SymbolCollector::shouldCollectSymbol(const NamedDecl &ND,463const ASTContext &ASTCtx,464const Options &Opts,465bool IsMainFileOnly) {466// Skip anonymous declarations, e.g (anonymous enum/class/struct).467if (ND.getDeclName().isEmpty())468return false;469
470// Skip main-file symbols if we are not collecting them.471if (IsMainFileOnly && !Opts.CollectMainFileSymbols)472return false;473
474// Skip symbols in anonymous namespaces in header files.475if (!IsMainFileOnly && ND.isInAnonymousNamespace())476return false;477
478// For function local symbols, index only classes and its member functions.479if (index::isFunctionLocalSymbol(&ND))480return isa<RecordDecl>(ND) ||481(ND.isCXXInstanceMember() && ND.isFunctionOrFunctionTemplate());482
483// We want most things but not "local" symbols such as symbols inside484// FunctionDecl, BlockDecl, ObjCMethodDecl and OMPDeclareReductionDecl.485// FIXME: Need a matcher for ExportDecl in order to include symbols declared486// within an export.487const auto *DeclCtx = ND.getDeclContext();488switch (DeclCtx->getDeclKind()) {489case Decl::TranslationUnit:490case Decl::Namespace:491case Decl::LinkageSpec:492case Decl::Enum:493case Decl::ObjCProtocol:494case Decl::ObjCInterface:495case Decl::ObjCCategory:496case Decl::ObjCCategoryImpl:497case Decl::ObjCImplementation:498break;499default:500// Record has a few derivations (e.g. CXXRecord, Class specialization), it's501// easier to cast.502if (!isa<RecordDecl>(DeclCtx))503return false;504}505
506// Avoid indexing internal symbols in protobuf generated headers.507if (isPrivateProtoDecl(ND))508return false;509if (!Opts.CollectReserved &&510(hasReservedName(ND) || hasReservedScope(*ND.getDeclContext())) &&511ASTCtx.getSourceManager().isInSystemHeader(ND.getLocation()))512return false;513
514return true;515}
516
517const Decl *518SymbolCollector::getRefContainer(const Decl *Enclosing,519const SymbolCollector::Options &Opts) {520while (Enclosing) {521const auto *ND = dyn_cast<NamedDecl>(Enclosing);522if (ND && shouldCollectSymbol(*ND, ND->getASTContext(), Opts, true)) {523break;524}525Enclosing = dyn_cast_or_null<Decl>(Enclosing->getDeclContext());526}527return Enclosing;528}
529
530// Always return true to continue indexing.
531bool SymbolCollector::handleDeclOccurrence(532const Decl *D, index::SymbolRoleSet Roles,533llvm::ArrayRef<index::SymbolRelation> Relations, SourceLocation Loc,534index::IndexDataConsumer::ASTNodeInfo ASTNode) {535assert(ASTCtx && PP && HeaderFileURIs);536assert(CompletionAllocator && CompletionTUInfo);537assert(ASTNode.OrigD);538// Indexing API puts canonical decl into D, which might not have a valid539// source location for implicit/built-in decls. Fallback to original decl in540// such cases.541if (D->getLocation().isInvalid())542D = ASTNode.OrigD;543// If OrigD is an declaration associated with a friend declaration and it's544// not a definition, skip it. Note that OrigD is the occurrence that the545// collector is currently visiting.546if ((ASTNode.OrigD->getFriendObjectKind() !=547Decl::FriendObjectKind::FOK_None) &&548!(Roles & static_cast<unsigned>(index::SymbolRole::Definition)))549return true;550// A declaration created for a friend declaration should not be used as the551// canonical declaration in the index. Use OrigD instead, unless we've already552// picked a replacement for D553if (D->getFriendObjectKind() != Decl::FriendObjectKind::FOK_None)554D = CanonicalDecls.try_emplace(D, ASTNode.OrigD).first->second;555// Flag to mark that D should be considered canonical meaning its declaration556// will override any previous declaration for the Symbol.557bool DeclIsCanonical = false;558// Avoid treating ObjCImplementationDecl as a canonical declaration if it has559// a corresponding non-implicit and non-forward declared ObjcInterfaceDecl.560if (const auto *IID = dyn_cast<ObjCImplementationDecl>(D)) {561DeclIsCanonical = true;562if (const auto *CID = IID->getClassInterface())563if (const auto *DD = CID->getDefinition())564if (!DD->isImplicitInterfaceDecl())565D = DD;566}567// Avoid treating ObjCCategoryImplDecl as a canonical declaration in favor of568// its ObjCCategoryDecl if it has one.569if (const auto *CID = dyn_cast<ObjCCategoryImplDecl>(D)) {570DeclIsCanonical = true;571if (const auto *CD = CID->getCategoryDecl())572D = CD;573}574const NamedDecl *ND = dyn_cast<NamedDecl>(D);575if (!ND)576return true;577
578auto ID = getSymbolIDCached(ND);579if (!ID)580return true;581
582// Mark D as referenced if this is a reference coming from the main file.583// D may not be an interesting symbol, but it's cheaper to check at the end.584auto &SM = ASTCtx->getSourceManager();585if (Opts.CountReferences &&586(Roles & static_cast<unsigned>(index::SymbolRole::Reference)) &&587SM.getFileID(SM.getSpellingLoc(Loc)) == SM.getMainFileID())588ReferencedSymbols.insert(ID);589
590// ND is the canonical (i.e. first) declaration. If it's in the main file591// (which is not a header), then no public declaration was visible, so assume592// it's main-file only.593bool IsMainFileOnly =594SM.isWrittenInMainFile(SM.getExpansionLoc(ND->getBeginLoc())) &&595!isHeaderFile(SM.getFileEntryRefForID(SM.getMainFileID())->getName(),596ASTCtx->getLangOpts());597// In C, printf is a redecl of an implicit builtin! So check OrigD instead.598if (ASTNode.OrigD->isImplicit() ||599!shouldCollectSymbol(*ND, *ASTCtx, Opts, IsMainFileOnly))600return true;601
602// Note: we need to process relations for all decl occurrences, including603// refs, because the indexing code only populates relations for specific604// occurrences. For example, RelationBaseOf is only populated for the605// occurrence inside the base-specifier.606processRelations(*ND, ID, Relations);607
608bool CollectRef = static_cast<bool>(Opts.RefFilter & toRefKind(Roles));609// Unlike other fields, e.g. Symbols (which use spelling locations), we use610// file locations for references (as it aligns the behavior of clangd's611// AST-based xref).612// FIXME: we should try to use the file locations for other fields.613if (CollectRef &&614(!IsMainFileOnly || Opts.CollectMainFileRefs ||615ND->isExternallyVisible()) &&616!isa<NamespaceDecl>(ND)) {617auto FileLoc = SM.getFileLoc(Loc);618auto FID = SM.getFileID(FileLoc);619if (Opts.RefsInHeaders || FID == SM.getMainFileID()) {620addRef(ID, SymbolRef{FileLoc, FID, Roles,621getRefContainer(ASTNode.Parent, Opts),622isSpelled(FileLoc, *ND)});623}624}625// Don't continue indexing if this is a mere reference.626if (!(Roles & (static_cast<unsigned>(index::SymbolRole::Declaration) |627static_cast<unsigned>(index::SymbolRole::Definition))))628return true;629
630// FIXME: ObjCPropertyDecl are not properly indexed here:631// - ObjCPropertyDecl may have an OrigD of ObjCPropertyImplDecl, which is632// not a NamedDecl.633auto *OriginalDecl = dyn_cast<NamedDecl>(ASTNode.OrigD);634if (!OriginalDecl)635return true;636
637const Symbol *BasicSymbol = Symbols.find(ID);638if (isPreferredDeclaration(*OriginalDecl, Roles))639// If OriginalDecl is preferred, replace/create the existing canonical640// declaration (e.g. a class forward declaration). There should be at most641// one duplicate as we expect to see only one preferred declaration per642// TU, because in practice they are definitions.643BasicSymbol = addDeclaration(*OriginalDecl, std::move(ID), IsMainFileOnly);644else if (!BasicSymbol || DeclIsCanonical)645BasicSymbol = addDeclaration(*ND, std::move(ID), IsMainFileOnly);646
647if (Roles & static_cast<unsigned>(index::SymbolRole::Definition))648addDefinition(*OriginalDecl, *BasicSymbol);649
650return true;651}
652
653void SymbolCollector::handleMacros(const MainFileMacros &MacroRefsToIndex) {654assert(HeaderFileURIs && PP);655const auto &SM = PP->getSourceManager();656const auto MainFileEntryRef = SM.getFileEntryRefForID(SM.getMainFileID());657assert(MainFileEntryRef);658
659const std::string &MainFileURI = HeaderFileURIs->toURI(*MainFileEntryRef);660// Add macro references.661for (const auto &IDToRefs : MacroRefsToIndex.MacroRefs) {662for (const auto &MacroRef : IDToRefs.second) {663const auto &Range = MacroRef.toRange(SM);664bool IsDefinition = MacroRef.IsDefinition;665Ref R;666R.Location.Start.setLine(Range.start.line);667R.Location.Start.setColumn(Range.start.character);668R.Location.End.setLine(Range.end.line);669R.Location.End.setColumn(Range.end.character);670R.Location.FileURI = MainFileURI.c_str();671R.Kind = IsDefinition ? RefKind::Definition : RefKind::Reference;672Refs.insert(IDToRefs.first, R);673if (IsDefinition) {674Symbol S;675S.ID = IDToRefs.first;676auto StartLoc = cantFail(sourceLocationInMainFile(SM, Range.start));677auto EndLoc = cantFail(sourceLocationInMainFile(SM, Range.end));678S.Name = toSourceCode(SM, SourceRange(StartLoc, EndLoc));679S.SymInfo.Kind = index::SymbolKind::Macro;680S.SymInfo.SubKind = index::SymbolSubKind::None;681S.SymInfo.Properties = index::SymbolPropertySet();682S.SymInfo.Lang = index::SymbolLanguage::C;683S.Origin = Opts.Origin;684S.CanonicalDeclaration = R.Location;685// Make the macro visible for code completion if main file is an686// include-able header.687if (!HeaderFileURIs->getIncludeHeader(SM.getMainFileID()).empty()) {688S.Flags |= Symbol::IndexedForCodeCompletion;689S.Flags |= Symbol::VisibleOutsideFile;690}691Symbols.insert(S);692}693}694}695}
696
697bool SymbolCollector::handleMacroOccurrence(const IdentifierInfo *Name,698const MacroInfo *MI,699index::SymbolRoleSet Roles,700SourceLocation Loc) {701assert(PP);702// Builtin macros don't have useful locations and aren't needed in completion.703if (MI->isBuiltinMacro())704return true;705
706const auto &SM = PP->getSourceManager();707auto DefLoc = MI->getDefinitionLoc();708// Also avoid storing macros that aren't defined in any file, i.e. predefined709// macros like __DBL_MIN__ and those defined on the command line.710if (SM.isWrittenInBuiltinFile(DefLoc) ||711SM.isWrittenInCommandLineFile(DefLoc) ||712Name->getName() == "__GCC_HAVE_DWARF2_CFI_ASM")713return true;714
715auto ID = getSymbolIDCached(Name->getName(), MI, SM);716if (!ID)717return true;718
719auto SpellingLoc = SM.getSpellingLoc(Loc);720bool IsMainFileOnly =721SM.isInMainFile(SM.getExpansionLoc(DefLoc)) &&722!isHeaderFile(SM.getFileEntryRefForID(SM.getMainFileID())->getName(),723ASTCtx->getLangOpts());724// Do not store references to main-file macros.725if ((static_cast<unsigned>(Opts.RefFilter) & Roles) && !IsMainFileOnly &&726(Opts.RefsInHeaders || SM.getFileID(SpellingLoc) == SM.getMainFileID())) {727// FIXME: Populate container information for macro references.728// FIXME: All MacroRefs are marked as Spelled now, but this should be729// checked.730addRef(ID, SymbolRef{Loc, SM.getFileID(Loc), Roles, /*Container=*/nullptr,731/*Spelled=*/true});732}733
734// Collect symbols.735if (!Opts.CollectMacro)736return true;737
738// Skip main-file macros if we are not collecting them.739if (IsMainFileOnly && !Opts.CollectMainFileSymbols)740return false;741
742// Mark the macro as referenced if this is a reference coming from the main743// file. The macro may not be an interesting symbol, but it's cheaper to check744// at the end.745if (Opts.CountReferences &&746(Roles & static_cast<unsigned>(index::SymbolRole::Reference)) &&747SM.getFileID(SpellingLoc) == SM.getMainFileID())748ReferencedSymbols.insert(ID);749
750// Don't continue indexing if this is a mere reference.751// FIXME: remove macro with ID if it is undefined.752if (!(Roles & static_cast<unsigned>(index::SymbolRole::Declaration) ||753Roles & static_cast<unsigned>(index::SymbolRole::Definition)))754return true;755
756// Only collect one instance in case there are multiple.757if (Symbols.find(ID) != nullptr)758return true;759
760Symbol S;761S.ID = std::move(ID);762S.Name = Name->getName();763if (!IsMainFileOnly) {764S.Flags |= Symbol::IndexedForCodeCompletion;765S.Flags |= Symbol::VisibleOutsideFile;766}767S.SymInfo = index::getSymbolInfoForMacro(*MI);768S.Origin = Opts.Origin;769// FIXME: use the result to filter out symbols.770shouldIndexFile(SM.getFileID(Loc));771if (auto DeclLoc = getTokenLocation(DefLoc))772S.CanonicalDeclaration = *DeclLoc;773
774CodeCompletionResult SymbolCompletion(Name);775const auto *CCS = SymbolCompletion.CreateCodeCompletionStringForMacro(776*PP, *CompletionAllocator, *CompletionTUInfo);777std::string Signature;778std::string SnippetSuffix;779getSignature(*CCS, &Signature, &SnippetSuffix, SymbolCompletion.Kind,780SymbolCompletion.CursorKind);781S.Signature = Signature;782S.CompletionSnippetSuffix = SnippetSuffix;783
784IndexedMacros.insert(Name);785
786setIncludeLocation(S, DefLoc, include_cleaner::Macro{Name, DefLoc});787Symbols.insert(S);788return true;789}
790
791void SymbolCollector::processRelations(792const NamedDecl &ND, const SymbolID &ID,793ArrayRef<index::SymbolRelation> Relations) {794for (const auto &R : Relations) {795auto RKind = indexableRelation(R);796if (!RKind)797continue;798const Decl *Object = R.RelatedSymbol;799
800auto ObjectID = getSymbolIDCached(Object);801if (!ObjectID)802continue;803
804// Record the relation.805// TODO: There may be cases where the object decl is not indexed for some806// reason. Those cases should probably be removed in due course, but for807// now there are two possible ways to handle it:808// (A) Avoid storing the relation in such cases.809// (B) Store it anyways. Clients will likely lookup() the SymbolID810// in the index and find nothing, but that's a situation they811// probably need to handle for other reasons anyways.812// We currently do (B) because it's simpler.813if (*RKind == RelationKind::BaseOf)814this->Relations.insert({ID, *RKind, ObjectID});815else if (*RKind == RelationKind::OverriddenBy)816this->Relations.insert({ObjectID, *RKind, ID});817}818}
819
820void SymbolCollector::setIncludeLocation(const Symbol &S, SourceLocation DefLoc,821const include_cleaner::Symbol &Sym) {822const auto &SM = PP->getSourceManager();823if (!Opts.CollectIncludePath ||824shouldCollectIncludePath(S.SymInfo.Kind) == Symbol::Invalid)825return;826
827// Use the expansion location to get the #include header since this is828// where the symbol is exposed.829if (FileID FID = SM.getDecomposedExpansionLoc(DefLoc).first; FID.isValid())830IncludeFiles[S.ID] = FID;831
832// We update providers for a symbol with each occurence, as SymbolCollector833// might run while parsing, rather than at the end of a translation unit.834// Hence we see more and more redecls over time.835SymbolProviders[S.ID] =836include_cleaner::headersForSymbol(Sym, SM, Opts.PragmaIncludes);837}
838
839llvm::StringRef getStdHeader(const Symbol *S, const LangOptions &LangOpts) {840tooling::stdlib::Lang Lang = tooling::stdlib::Lang::CXX;841if (LangOpts.C11)842Lang = tooling::stdlib::Lang::C;843else if(!LangOpts.CPlusPlus)844return "";845
846if (S->Scope == "std::" && S->Name == "move") {847if (!S->Signature.contains(','))848return "<utility>";849return "<algorithm>";850}851
852if (auto StdSym = tooling::stdlib::Symbol::named(S->Scope, S->Name, Lang))853if (auto Header = StdSym->header())854return Header->name();855return "";856}
857
858void SymbolCollector::finish() {859// At the end of the TU, add 1 to the refcount of all referenced symbols.860for (const auto &ID : ReferencedSymbols) {861if (const auto *S = Symbols.find(ID)) {862// SymbolSlab::Builder returns const symbols because strings are interned863// and modifying returned symbols without inserting again wouldn't go864// well. const_cast is safe here as we're modifying a data owned by the865// Symbol. This reduces time spent in SymbolCollector by ~1%.866++const_cast<Symbol *>(S)->References;867}868}869if (Opts.CollectMacro) {870assert(PP);871// First, drop header guards. We can't identify these until EOF.872for (const IdentifierInfo *II : IndexedMacros) {873if (const auto *MI = PP->getMacroDefinition(II).getMacroInfo())874if (auto ID =875getSymbolIDCached(II->getName(), MI, PP->getSourceManager()))876if (MI->isUsedForHeaderGuard())877Symbols.erase(ID);878}879}880llvm::DenseMap<FileID, bool> FileToContainsImportsOrObjC;881llvm::DenseMap<include_cleaner::Header, std::string> HeaderSpelling;882// Fill in IncludeHeaders.883// We delay this until end of TU so header guards are all resolved.884for (const auto &[SID, Providers] : SymbolProviders) {885const Symbol *S = Symbols.find(SID);886if (!S)887continue;888
889FileID FID = IncludeFiles.lookup(SID);890// Determine if the FID is #include'd or #import'ed.891Symbol::IncludeDirective Directives = Symbol::Invalid;892auto CollectDirectives = shouldCollectIncludePath(S->SymInfo.Kind);893if ((CollectDirectives & Symbol::Include) != 0)894Directives |= Symbol::Include;895// Only allow #import for symbols from ObjC-like files.896if ((CollectDirectives & Symbol::Import) != 0 && FID.isValid()) {897auto [It, Inserted] = FileToContainsImportsOrObjC.try_emplace(FID);898if (Inserted)899It->second = FilesWithObjCConstructs.contains(FID) ||900tooling::codeContainsImports(901ASTCtx->getSourceManager().getBufferData(FID));902if (It->second)903Directives |= Symbol::Import;904}905
906if (Directives == Symbol::Invalid)907continue;908
909// Use the include location-based logic for Objective-C symbols.910if (Directives & Symbol::Import) {911llvm::StringRef IncludeHeader = getStdHeader(S, ASTCtx->getLangOpts());912if (IncludeHeader.empty())913IncludeHeader = HeaderFileURIs->getIncludeHeader(FID);914
915if (!IncludeHeader.empty()) {916auto NewSym = *S;917NewSym.IncludeHeaders.push_back({IncludeHeader, 1, Directives});918Symbols.insert(NewSym);919}920// FIXME: use providers from include-cleaner library once it's polished921// for Objective-C.922continue;923}924
925// For #include's, use the providers computed by the include-cleaner926// library.927assert(Directives == Symbol::Include);928// Ignore providers that are not self-contained, this is especially929// important for symbols defined in the main-file. We want to prefer the930// header, if possible.931// TODO: Limit this to specifically ignore main file, when we're indexing a932// non-header file?933auto SelfContainedProvider =934[this](llvm::ArrayRef<include_cleaner::Header> Providers)935-> std::optional<include_cleaner::Header> {936for (const auto &H : Providers) {937if (H.kind() != include_cleaner::Header::Physical)938return H;939if (tooling::isSelfContainedHeader(H.physical(), PP->getSourceManager(),940PP->getHeaderSearchInfo()))941return H;942}943return std::nullopt;944};945const auto OptionalProvider = SelfContainedProvider(Providers);946if (!OptionalProvider)947continue;948const auto &H = *OptionalProvider;949const auto [SpellingIt, Inserted] = HeaderSpelling.try_emplace(H);950if (Inserted) {951auto &SM = ASTCtx->getSourceManager();952if (H.kind() == include_cleaner::Header::Kind::Physical) {953// FIXME: Get rid of this once include-cleaner has support for system954// headers.955if (auto Canonical =956HeaderFileURIs->mapCanonical(H.physical().getName());957!Canonical.empty())958SpellingIt->second = Canonical;959// For physical files, prefer URIs as spellings might change960// depending on the translation unit.961else if (tooling::isSelfContainedHeader(H.physical(), SM,962PP->getHeaderSearchInfo()))963SpellingIt->second =964HeaderFileURIs->toURI(H.physical());965} else {966SpellingIt->second = include_cleaner::spellHeader(967{H, PP->getHeaderSearchInfo(),968SM.getFileEntryForID(SM.getMainFileID())});969}970}971
972if (!SpellingIt->second.empty()) {973auto NewSym = *S;974NewSym.IncludeHeaders.push_back({SpellingIt->second, 1, Directives});975Symbols.insert(NewSym);976}977}978
979ReferencedSymbols.clear();980IncludeFiles.clear();981SymbolProviders.clear();982FilesWithObjCConstructs.clear();983}
984
985const Symbol *SymbolCollector::addDeclaration(const NamedDecl &ND, SymbolID ID,986bool IsMainFileOnly) {987auto &Ctx = ND.getASTContext();988auto &SM = Ctx.getSourceManager();989
990Symbol S;991S.ID = std::move(ID);992std::string QName = printQualifiedName(ND);993// FIXME: this returns foo:bar: for objective-C methods, we prefer only foo:994// for consistency with CodeCompletionString and a clean name/signature split.995std::tie(S.Scope, S.Name) = splitQualifiedName(QName);996std::string TemplateSpecializationArgs = printTemplateSpecializationArgs(ND);997S.TemplateSpecializationArgs = TemplateSpecializationArgs;998
999// We collect main-file symbols, but do not use them for code completion.1000if (!IsMainFileOnly && isIndexedForCodeCompletion(ND, Ctx))1001S.Flags |= Symbol::IndexedForCodeCompletion;1002if (isImplementationDetail(&ND))1003S.Flags |= Symbol::ImplementationDetail;1004if (!IsMainFileOnly)1005S.Flags |= Symbol::VisibleOutsideFile;1006S.SymInfo = index::getSymbolInfo(&ND);1007auto Loc = nameLocation(ND, SM);1008assert(Loc.isValid() && "Invalid source location for NamedDecl");1009// FIXME: use the result to filter out symbols.1010auto FID = SM.getFileID(Loc);1011shouldIndexFile(FID);1012if (auto DeclLoc = getTokenLocation(Loc))1013S.CanonicalDeclaration = *DeclLoc;1014
1015S.Origin = Opts.Origin;1016if (ND.getAvailability() == AR_Deprecated)1017S.Flags |= Symbol::Deprecated;1018
1019// Add completion info.1020// FIXME: we may want to choose a different redecl, or combine from several.1021assert(ASTCtx && PP && "ASTContext and Preprocessor must be set.");1022// We use the primary template, as clang does during code completion.1023CodeCompletionResult SymbolCompletion(&getTemplateOrThis(ND), 0);1024const auto *CCS = SymbolCompletion.CreateCodeCompletionString(1025*ASTCtx, *PP, CodeCompletionContext::CCC_Symbol, *CompletionAllocator,1026*CompletionTUInfo,1027/*IncludeBriefComments*/ false);1028std::string Documentation =1029formatDocumentation(*CCS, getDocComment(Ctx, SymbolCompletion,1030/*CommentsFromHeaders=*/true));1031if (!(S.Flags & Symbol::IndexedForCodeCompletion)) {1032if (Opts.StoreAllDocumentation)1033S.Documentation = Documentation;1034Symbols.insert(S);1035return Symbols.find(S.ID);1036}1037S.Documentation = Documentation;1038std::string Signature;1039std::string SnippetSuffix;1040getSignature(*CCS, &Signature, &SnippetSuffix, SymbolCompletion.Kind,1041SymbolCompletion.CursorKind);1042S.Signature = Signature;1043S.CompletionSnippetSuffix = SnippetSuffix;1044std::string ReturnType = getReturnType(*CCS);1045S.ReturnType = ReturnType;1046
1047std::optional<OpaqueType> TypeStorage;1048if (S.Flags & Symbol::IndexedForCodeCompletion) {1049TypeStorage = OpaqueType::fromCompletionResult(*ASTCtx, SymbolCompletion);1050if (TypeStorage)1051S.Type = TypeStorage->raw();1052}1053
1054Symbols.insert(S);1055setIncludeLocation(S, ND.getLocation(), include_cleaner::Symbol{ND});1056if (S.SymInfo.Lang == index::SymbolLanguage::ObjC)1057FilesWithObjCConstructs.insert(FID);1058return Symbols.find(S.ID);1059}
1060
1061void SymbolCollector::addDefinition(const NamedDecl &ND,1062const Symbol &DeclSym) {1063if (DeclSym.Definition)1064return;1065const auto &SM = ND.getASTContext().getSourceManager();1066auto Loc = nameLocation(ND, SM);1067shouldIndexFile(SM.getFileID(Loc));1068auto DefLoc = getTokenLocation(Loc);1069// If we saw some forward declaration, we end up copying the symbol.1070// This is not ideal, but avoids duplicating the "is this a definition" check1071// in clang::index. We should only see one definition.1072if (!DefLoc)1073return;1074Symbol S = DeclSym;1075// FIXME: use the result to filter out symbols.1076S.Definition = *DefLoc;1077Symbols.insert(S);1078}
1079
1080bool SymbolCollector::shouldIndexFile(FileID FID) {1081if (!Opts.FileFilter)1082return true;1083auto I = FilesToIndexCache.try_emplace(FID);1084if (I.second)1085I.first->second = Opts.FileFilter(ASTCtx->getSourceManager(), FID);1086return I.first->second;1087}
1088
1089void SymbolCollector::addRef(SymbolID ID, const SymbolRef &SR) {1090const auto &SM = ASTCtx->getSourceManager();1091// FIXME: use the result to filter out references.1092shouldIndexFile(SR.FID);1093if (const auto FE = SM.getFileEntryRefForID(SR.FID)) {1094auto Range = getTokenRange(SR.Loc, SM, ASTCtx->getLangOpts());1095Ref R;1096R.Location.Start = Range.first;1097R.Location.End = Range.second;1098R.Location.FileURI = HeaderFileURIs->toURI(*FE).c_str();1099R.Kind = toRefKind(SR.Roles, SR.Spelled);1100R.Container = getSymbolIDCached(SR.Container);1101Refs.insert(ID, R);1102}1103}
1104
1105SymbolID SymbolCollector::getSymbolIDCached(const Decl *D) {1106auto It = DeclToIDCache.try_emplace(D, SymbolID{});1107if (It.second)1108It.first->second = getSymbolID(D);1109return It.first->second;1110}
1111
1112SymbolID SymbolCollector::getSymbolIDCached(const llvm::StringRef MacroName,1113const MacroInfo *MI,1114const SourceManager &SM) {1115auto It = MacroToIDCache.try_emplace(MI, SymbolID{});1116if (It.second)1117It.first->second = getSymbolID(MacroName, MI, SM);1118return It.first->second;1119}
1120} // namespace clangd1121} // namespace clang1122