llvm-project
1137 строк · 45.7 Кб
1//===--- Selection.cpp ----------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "Selection.h"
10#include "AST.h"
11#include "support/Logger.h"
12#include "support/Trace.h"
13#include "clang/AST/ASTConcept.h"
14#include "clang/AST/ASTTypeTraits.h"
15#include "clang/AST/Decl.h"
16#include "clang/AST/DeclCXX.h"
17#include "clang/AST/Expr.h"
18#include "clang/AST/ExprCXX.h"
19#include "clang/AST/PrettyPrinter.h"
20#include "clang/AST/RecursiveASTVisitor.h"
21#include "clang/AST/TypeLoc.h"
22#include "clang/Basic/OperatorKinds.h"
23#include "clang/Basic/SourceLocation.h"
24#include "clang/Basic/SourceManager.h"
25#include "clang/Basic/TokenKinds.h"
26#include "clang/Lex/Lexer.h"
27#include "clang/Tooling/Syntax/Tokens.h"
28#include "llvm/ADT/BitVector.h"
29#include "llvm/ADT/STLExtras.h"
30#include "llvm/ADT/StringExtras.h"
31#include "llvm/Support/Casting.h"
32#include "llvm/Support/raw_ostream.h"
33#include <algorithm>
34#include <optional>
35#include <set>
36#include <string>
37
38namespace clang {
39namespace clangd {
40namespace {
41using Node = SelectionTree::Node;
42
43// Measure the fraction of selections that were enabled by recovery AST.
44void recordMetrics(const SelectionTree &S, const LangOptions &Lang) {
45if (!trace::enabled())
46return;
47const char *LanguageLabel = Lang.CPlusPlus ? "C++" : Lang.ObjC ? "ObjC" : "C";
48static constexpr trace::Metric SelectionUsedRecovery(
49"selection_recovery", trace::Metric::Distribution, "language");
50static constexpr trace::Metric RecoveryType(
51"selection_recovery_type", trace::Metric::Distribution, "language");
52const auto *Common = S.commonAncestor();
53for (const auto *N = Common; N; N = N->Parent) {
54if (const auto *RE = N->ASTNode.get<RecoveryExpr>()) {
55SelectionUsedRecovery.record(1, LanguageLabel); // used recovery ast.
56RecoveryType.record(RE->isTypeDependent() ? 0 : 1, LanguageLabel);
57return;
58}
59}
60if (Common)
61SelectionUsedRecovery.record(0, LanguageLabel); // unused.
62}
63
64// Return the range covering a node and all its children.
65SourceRange getSourceRange(const DynTypedNode &N) {
66// MemberExprs to implicitly access anonymous fields should not claim any
67// tokens for themselves. Given:
68// struct A { struct { int b; }; };
69// The clang AST reports the following nodes for an access to b:
70// A().b;
71// [----] MemberExpr, base = A().<anonymous>, member = b
72// [----] MemberExpr: base = A(), member = <anonymous>
73// [-] CXXConstructExpr
74// For our purposes, we don't want the second MemberExpr to own any tokens,
75// so we reduce its range to match the CXXConstructExpr.
76// (It's not clear that changing the clang AST would be correct in general).
77if (const auto *ME = N.get<MemberExpr>()) {
78if (!ME->getMemberDecl()->getDeclName())
79return ME->getBase()
80? getSourceRange(DynTypedNode::create(*ME->getBase()))
81: SourceRange();
82}
83return N.getSourceRange();
84}
85
86// An IntervalSet maintains a set of disjoint subranges of an array.
87//
88// Initially, it contains the entire array.
89// [-----------------------------------------------------------]
90//
91// When a range is erased(), it will typically split the array in two.
92// Claim: [--------------------]
93// after: [----------------] [-------------------]
94//
95// erase() returns the segments actually erased. Given the state above:
96// Claim: [---------------------------------------]
97// Out: [---------] [------]
98// After: [-----] [-----------]
99//
100// It is used to track (expanded) tokens not yet associated with an AST node.
101// On traversing an AST node, its token range is erased from the unclaimed set.
102// The tokens actually removed are associated with that node, and hit-tested
103// against the selection to determine whether the node is selected.
104template <typename T> class IntervalSet {
105public:
106IntervalSet(llvm::ArrayRef<T> Range) { UnclaimedRanges.insert(Range); }
107
108// Removes the elements of Claim from the set, modifying or removing ranges
109// that overlap it.
110// Returns the continuous subranges of Claim that were actually removed.
111llvm::SmallVector<llvm::ArrayRef<T>> erase(llvm::ArrayRef<T> Claim) {
112llvm::SmallVector<llvm::ArrayRef<T>> Out;
113if (Claim.empty())
114return Out;
115
116// General case:
117// Claim: [-----------------]
118// UnclaimedRanges: [-A-] [-B-] [-C-] [-D-] [-E-] [-F-] [-G-]
119// Overlap: ^first ^second
120// Ranges C and D are fully included. Ranges B and E must be trimmed.
121auto Overlap = std::make_pair(
122UnclaimedRanges.lower_bound({Claim.begin(), Claim.begin()}), // C
123UnclaimedRanges.lower_bound({Claim.end(), Claim.end()})); // F
124// Rewind to cover B.
125if (Overlap.first != UnclaimedRanges.begin()) {
126--Overlap.first;
127// ...unless B isn't selected at all.
128if (Overlap.first->end() <= Claim.begin())
129++Overlap.first;
130}
131if (Overlap.first == Overlap.second)
132return Out;
133
134// First, copy all overlapping ranges into the output.
135auto OutFirst = Out.insert(Out.end(), Overlap.first, Overlap.second);
136// If any of the overlapping ranges were sliced by the claim, split them:
137// - restrict the returned range to the claimed part
138// - save the unclaimed part so it can be reinserted
139llvm::ArrayRef<T> RemainingHead, RemainingTail;
140if (Claim.begin() > OutFirst->begin()) {
141RemainingHead = {OutFirst->begin(), Claim.begin()};
142*OutFirst = {Claim.begin(), OutFirst->end()};
143}
144if (Claim.end() < Out.back().end()) {
145RemainingTail = {Claim.end(), Out.back().end()};
146Out.back() = {Out.back().begin(), Claim.end()};
147}
148
149// Erase all the overlapping ranges (invalidating all iterators).
150UnclaimedRanges.erase(Overlap.first, Overlap.second);
151// Reinsert ranges that were merely trimmed.
152if (!RemainingHead.empty())
153UnclaimedRanges.insert(RemainingHead);
154if (!RemainingTail.empty())
155UnclaimedRanges.insert(RemainingTail);
156
157return Out;
158}
159
160private:
161using TokenRange = llvm::ArrayRef<T>;
162struct RangeLess {
163bool operator()(llvm::ArrayRef<T> L, llvm::ArrayRef<T> R) const {
164return L.begin() < R.begin();
165}
166};
167
168// Disjoint sorted unclaimed ranges of expanded tokens.
169std::set<llvm::ArrayRef<T>, RangeLess> UnclaimedRanges;
170};
171
172// Sentinel value for the selectedness of a node where we've seen no tokens yet.
173// This resolves to Unselected if no tokens are ever seen.
174// But Unselected + Complete -> Partial, while NoTokens + Complete --> Complete.
175// This value is never exposed publicly.
176constexpr SelectionTree::Selection NoTokens =
177static_cast<SelectionTree::Selection>(
178static_cast<unsigned char>(SelectionTree::Complete + 1));
179
180// Nodes start with NoTokens, and then use this function to aggregate the
181// selectedness as more tokens are found.
182void update(SelectionTree::Selection &Result, SelectionTree::Selection New) {
183if (New == NoTokens)
184return;
185if (Result == NoTokens)
186Result = New;
187else if (Result != New)
188// Can only be completely selected (or unselected) if all tokens are.
189Result = SelectionTree::Partial;
190}
191
192// As well as comments, don't count semicolons as real tokens.
193// They're not properly claimed as expr-statement is missing from the AST.
194bool shouldIgnore(const syntax::Token &Tok) {
195switch (Tok.kind()) {
196// Even "attached" comments are not considered part of a node's range.
197case tok::comment:
198// The AST doesn't directly store locations for terminating semicolons.
199case tok::semi:
200// We don't have locations for cvr-qualifiers: see QualifiedTypeLoc.
201case tok::kw_const:
202case tok::kw_volatile:
203case tok::kw_restrict:
204return true;
205default:
206return false;
207}
208}
209
210// Determine whether 'Target' is the first expansion of the macro
211// argument whose top-level spelling location is 'SpellingLoc'.
212bool isFirstExpansion(FileID Target, SourceLocation SpellingLoc,
213const SourceManager &SM) {
214SourceLocation Prev = SpellingLoc;
215while (true) {
216// If the arg is expanded multiple times, getMacroArgExpandedLocation()
217// returns the first expansion.
218SourceLocation Next = SM.getMacroArgExpandedLocation(Prev);
219// So if we reach the target, target is the first-expansion of the
220// first-expansion ...
221if (SM.getFileID(Next) == Target)
222return true;
223
224// Otherwise, if the FileID stops changing, we've reached the innermost
225// macro expansion, and Target was on a different branch.
226if (SM.getFileID(Next) == SM.getFileID(Prev))
227return false;
228
229Prev = Next;
230}
231return false;
232}
233
234// SelectionTester can determine whether a range of tokens from the PP-expanded
235// stream (corresponding to an AST node) is considered selected.
236//
237// When the tokens result from macro expansions, the appropriate tokens in the
238// main file are examined (macro invocation or args). Similarly for #includes.
239// However, only the first expansion of a given spelled token is considered
240// selected.
241//
242// It tests each token in the range (not just the endpoints) as contiguous
243// expanded tokens may not have contiguous spellings (with macros).
244//
245// Non-token text, and tokens not modeled in the AST (comments, semicolons)
246// are ignored when determining selectedness.
247class SelectionTester {
248public:
249// The selection is offsets [SelBegin, SelEnd) in SelFile.
250SelectionTester(const syntax::TokenBuffer &Buf, FileID SelFile,
251unsigned SelBegin, unsigned SelEnd, const SourceManager &SM)
252: SelFile(SelFile), SelFileBounds(SM.getLocForStartOfFile(SelFile),
253SM.getLocForEndOfFile(SelFile)),
254SM(SM) {
255// Find all tokens (partially) selected in the file.
256auto AllSpelledTokens = Buf.spelledTokens(SelFile);
257const syntax::Token *SelFirst =
258llvm::partition_point(AllSpelledTokens, [&](const syntax::Token &Tok) {
259return SM.getFileOffset(Tok.endLocation()) <= SelBegin;
260});
261const syntax::Token *SelLimit = std::partition_point(
262SelFirst, AllSpelledTokens.end(), [&](const syntax::Token &Tok) {
263return SM.getFileOffset(Tok.location()) < SelEnd;
264});
265auto Sel = llvm::ArrayRef(SelFirst, SelLimit);
266// Find which of these are preprocessed to nothing and should be ignored.
267llvm::BitVector PPIgnored(Sel.size(), false);
268for (const syntax::TokenBuffer::Expansion &X :
269Buf.expansionsOverlapping(Sel)) {
270if (X.Expanded.empty()) {
271for (const syntax::Token &Tok : X.Spelled) {
272if (&Tok >= SelFirst && &Tok < SelLimit)
273PPIgnored[&Tok - SelFirst] = true;
274}
275}
276}
277// Precompute selectedness and offset for selected spelled tokens.
278for (unsigned I = 0; I < Sel.size(); ++I) {
279if (shouldIgnore(Sel[I]) || PPIgnored[I])
280continue;
281SelectedSpelled.emplace_back();
282Tok &S = SelectedSpelled.back();
283S.Offset = SM.getFileOffset(Sel[I].location());
284if (S.Offset >= SelBegin && S.Offset + Sel[I].length() <= SelEnd)
285S.Selected = SelectionTree::Complete;
286else
287S.Selected = SelectionTree::Partial;
288}
289MaybeSelectedExpanded = computeMaybeSelectedExpandedTokens(Buf);
290}
291
292// Test whether a consecutive range of tokens is selected.
293// The tokens are taken from the expanded token stream.
294SelectionTree::Selection
295test(llvm::ArrayRef<syntax::Token> ExpandedTokens) const {
296if (ExpandedTokens.empty())
297return NoTokens;
298if (SelectedSpelled.empty())
299return SelectionTree::Unselected;
300// Cheap (pointer) check whether any of the tokens could touch selection.
301// In most cases, the node's overall source range touches ExpandedTokens,
302// or we would have failed mayHit(). However now we're only considering
303// the *unclaimed* spans of expanded tokens.
304// This is a significant performance improvement when a lot of nodes
305// surround the selection, including when generated by macros.
306if (MaybeSelectedExpanded.empty() ||
307&ExpandedTokens.front() > &MaybeSelectedExpanded.back() ||
308&ExpandedTokens.back() < &MaybeSelectedExpanded.front()) {
309return SelectionTree::Unselected;
310}
311
312// The eof token is used as a sentinel.
313// In general, source range from an AST node should not claim the eof token,
314// but it could occur for unmatched-bracket cases.
315// FIXME: fix it in TokenBuffer, expandedTokens(SourceRange) should not
316// return the eof token.
317if (ExpandedTokens.back().kind() == tok::eof)
318ExpandedTokens = ExpandedTokens.drop_back();
319
320SelectionTree::Selection Result = NoTokens;
321while (!ExpandedTokens.empty()) {
322// Take consecutive tokens from the same context together for efficiency.
323SourceLocation Start = ExpandedTokens.front().location();
324FileID FID = SM.getFileID(Start);
325// Comparing SourceLocations against bounds is cheaper than getFileID().
326SourceLocation Limit = SM.getComposedLoc(FID, SM.getFileIDSize(FID));
327auto Batch = ExpandedTokens.take_while([&](const syntax::Token &T) {
328return T.location() >= Start && T.location() < Limit;
329});
330assert(!Batch.empty());
331ExpandedTokens = ExpandedTokens.drop_front(Batch.size());
332
333update(Result, testChunk(FID, Batch));
334}
335return Result;
336}
337
338// Cheap check whether any of the tokens in R might be selected.
339// If it returns false, test() will return NoTokens or Unselected.
340// If it returns true, test() may return any value.
341bool mayHit(SourceRange R) const {
342if (SelectedSpelled.empty() || MaybeSelectedExpanded.empty())
343return false;
344// If the node starts after the selection ends, it is not selected.
345// Tokens a macro location might claim are >= its expansion start.
346// So if the expansion start > last selected token, we can prune it.
347// (This is particularly helpful for GTest's TEST macro).
348if (auto B = offsetInSelFile(getExpansionStart(R.getBegin())))
349if (*B > SelectedSpelled.back().Offset)
350return false;
351// If the node ends before the selection begins, it is not selected.
352SourceLocation EndLoc = R.getEnd();
353while (EndLoc.isMacroID())
354EndLoc = SM.getImmediateExpansionRange(EndLoc).getEnd();
355// In the rare case that the expansion range is a char range, EndLoc is
356// ~one token too far to the right. We may fail to prune, that's OK.
357if (auto E = offsetInSelFile(EndLoc))
358if (*E < SelectedSpelled.front().Offset)
359return false;
360return true;
361}
362
363private:
364// Plausible expanded tokens that might be affected by the selection.
365// This is an overestimate, it may contain tokens that are not selected.
366// The point is to allow cheap pruning in test()
367llvm::ArrayRef<syntax::Token>
368computeMaybeSelectedExpandedTokens(const syntax::TokenBuffer &Toks) {
369if (SelectedSpelled.empty())
370return {};
371
372auto LastAffectedToken = [&](SourceLocation Loc) {
373auto Offset = offsetInSelFile(Loc);
374while (Loc.isValid() && !Offset) {
375Loc = Loc.isMacroID() ? SM.getImmediateExpansionRange(Loc).getEnd()
376: SM.getIncludeLoc(SM.getFileID(Loc));
377Offset = offsetInSelFile(Loc);
378}
379return Offset;
380};
381auto FirstAffectedToken = [&](SourceLocation Loc) {
382auto Offset = offsetInSelFile(Loc);
383while (Loc.isValid() && !Offset) {
384Loc = Loc.isMacroID() ? SM.getImmediateExpansionRange(Loc).getBegin()
385: SM.getIncludeLoc(SM.getFileID(Loc));
386Offset = offsetInSelFile(Loc);
387}
388return Offset;
389};
390
391const syntax::Token *Start = llvm::partition_point(
392Toks.expandedTokens(),
393[&, First = SelectedSpelled.front().Offset](const syntax::Token &Tok) {
394if (Tok.kind() == tok::eof)
395return false;
396// Implausible if upperbound(Tok) < First.
397if (auto Offset = LastAffectedToken(Tok.location()))
398return *Offset < First;
399// A prefix of the expanded tokens may be from an implicit
400// inclusion (e.g. preamble patch, or command-line -include).
401return true;
402});
403
404bool EndInvalid = false;
405const syntax::Token *End = std::partition_point(
406Start, Toks.expandedTokens().end(),
407[&, Last = SelectedSpelled.back().Offset](const syntax::Token &Tok) {
408if (Tok.kind() == tok::eof)
409return false;
410// Plausible if lowerbound(Tok) <= Last.
411if (auto Offset = FirstAffectedToken(Tok.location()))
412return *Offset <= Last;
413// Shouldn't happen: once we've seen tokens traceable to the main
414// file, there shouldn't be any more implicit inclusions.
415assert(false && "Expanded token could not be resolved to main file!");
416EndInvalid = true;
417return true; // conservatively assume this token can overlap
418});
419if (EndInvalid)
420End = Toks.expandedTokens().end();
421
422return llvm::ArrayRef(Start, End);
423}
424
425// Hit-test a consecutive range of tokens from a single file ID.
426SelectionTree::Selection
427testChunk(FileID FID, llvm::ArrayRef<syntax::Token> Batch) const {
428assert(!Batch.empty());
429SourceLocation StartLoc = Batch.front().location();
430// There are several possible categories of FileID depending on how the
431// preprocessor was used to generate these tokens:
432// main file, #included file, macro args, macro bodies.
433// We need to identify the main-file tokens that represent Batch, and
434// determine whether we want to exclusively claim them. Regular tokens
435// represent one AST construct, but a macro invocation can represent many.
436
437// Handle tokens written directly in the main file.
438if (FID == SelFile) {
439return testTokenRange(*offsetInSelFile(Batch.front().location()),
440*offsetInSelFile(Batch.back().location()));
441}
442
443// Handle tokens in another file #included into the main file.
444// Check if the #include is selected, but don't claim it exclusively.
445if (StartLoc.isFileID()) {
446for (SourceLocation Loc = Batch.front().location(); Loc.isValid();
447Loc = SM.getIncludeLoc(SM.getFileID(Loc))) {
448if (auto Offset = offsetInSelFile(Loc))
449// FIXME: use whole #include directive, not just the filename string.
450return testToken(*Offset);
451}
452return NoTokens;
453}
454
455assert(StartLoc.isMacroID());
456// Handle tokens that were passed as a macro argument.
457SourceLocation ArgStart = SM.getTopMacroCallerLoc(StartLoc);
458if (auto ArgOffset = offsetInSelFile(ArgStart)) {
459if (isFirstExpansion(FID, ArgStart, SM)) {
460SourceLocation ArgEnd =
461SM.getTopMacroCallerLoc(Batch.back().location());
462return testTokenRange(*ArgOffset, *offsetInSelFile(ArgEnd));
463} else { // NOLINT(llvm-else-after-return)
464/* fall through and treat as part of the macro body */
465}
466}
467
468// Handle tokens produced by non-argument macro expansion.
469// Check if the macro name is selected, don't claim it exclusively.
470if (auto ExpansionOffset = offsetInSelFile(getExpansionStart(StartLoc)))
471// FIXME: also check ( and ) for function-like macros?
472return testToken(*ExpansionOffset);
473return NoTokens;
474}
475
476// Is the closed token range [Begin, End] selected?
477SelectionTree::Selection testTokenRange(unsigned Begin, unsigned End) const {
478assert(Begin <= End);
479// Outside the selection entirely?
480if (End < SelectedSpelled.front().Offset ||
481Begin > SelectedSpelled.back().Offset)
482return SelectionTree::Unselected;
483
484// Compute range of tokens.
485auto B = llvm::partition_point(
486SelectedSpelled, [&](const Tok &T) { return T.Offset < Begin; });
487auto E = std::partition_point(B, SelectedSpelled.end(), [&](const Tok &T) {
488return T.Offset <= End;
489});
490
491// Aggregate selectedness of tokens in range.
492bool ExtendsOutsideSelection = Begin < SelectedSpelled.front().Offset ||
493End > SelectedSpelled.back().Offset;
494SelectionTree::Selection Result =
495ExtendsOutsideSelection ? SelectionTree::Unselected : NoTokens;
496for (auto It = B; It != E; ++It)
497update(Result, It->Selected);
498return Result;
499}
500
501// Is the token at `Offset` selected?
502SelectionTree::Selection testToken(unsigned Offset) const {
503// Outside the selection entirely?
504if (Offset < SelectedSpelled.front().Offset ||
505Offset > SelectedSpelled.back().Offset)
506return SelectionTree::Unselected;
507// Find the token, if it exists.
508auto It = llvm::partition_point(
509SelectedSpelled, [&](const Tok &T) { return T.Offset < Offset; });
510if (It != SelectedSpelled.end() && It->Offset == Offset)
511return It->Selected;
512return NoTokens;
513}
514
515// Decomposes Loc and returns the offset if the file ID is SelFile.
516std::optional<unsigned> offsetInSelFile(SourceLocation Loc) const {
517// Decoding Loc with SM.getDecomposedLoc is relatively expensive.
518// But SourceLocations for a file are numerically contiguous, so we
519// can use cheap integer operations instead.
520if (Loc < SelFileBounds.getBegin() || Loc >= SelFileBounds.getEnd())
521return std::nullopt;
522// FIXME: subtracting getRawEncoding() is dubious, move this logic into SM.
523return Loc.getRawEncoding() - SelFileBounds.getBegin().getRawEncoding();
524}
525
526SourceLocation getExpansionStart(SourceLocation Loc) const {
527while (Loc.isMacroID())
528Loc = SM.getImmediateExpansionRange(Loc).getBegin();
529return Loc;
530}
531
532struct Tok {
533unsigned Offset;
534SelectionTree::Selection Selected;
535};
536std::vector<Tok> SelectedSpelled;
537llvm::ArrayRef<syntax::Token> MaybeSelectedExpanded;
538FileID SelFile;
539SourceRange SelFileBounds;
540const SourceManager &SM;
541};
542
543// Show the type of a node for debugging.
544void printNodeKind(llvm::raw_ostream &OS, const DynTypedNode &N) {
545if (const TypeLoc *TL = N.get<TypeLoc>()) {
546// TypeLoc is a hierarchy, but has only a single ASTNodeKind.
547// Synthesize the name from the Type subclass (except for QualifiedTypeLoc).
548if (TL->getTypeLocClass() == TypeLoc::Qualified)
549OS << "QualifiedTypeLoc";
550else
551OS << TL->getType()->getTypeClassName() << "TypeLoc";
552} else {
553OS << N.getNodeKind().asStringRef();
554}
555}
556
557#ifndef NDEBUG
558std::string printNodeToString(const DynTypedNode &N, const PrintingPolicy &PP) {
559std::string S;
560llvm::raw_string_ostream OS(S);
561printNodeKind(OS, N);
562return std::move(OS.str());
563}
564#endif
565
566bool isImplicit(const Stmt *S) {
567// Some Stmts are implicit and shouldn't be traversed, but there's no
568// "implicit" attribute on Stmt/Expr.
569// Unwrap implicit casts first if present (other nodes too?).
570if (auto *ICE = llvm::dyn_cast<ImplicitCastExpr>(S))
571S = ICE->getSubExprAsWritten();
572// Implicit this in a MemberExpr is not filtered out by RecursiveASTVisitor.
573// It would be nice if RAV handled this (!shouldTraverseImplicitCode()).
574if (auto *CTI = llvm::dyn_cast<CXXThisExpr>(S))
575if (CTI->isImplicit())
576return true;
577// Make sure implicit access of anonymous structs don't end up owning tokens.
578if (auto *ME = llvm::dyn_cast<MemberExpr>(S)) {
579if (auto *FD = llvm::dyn_cast<FieldDecl>(ME->getMemberDecl()))
580if (FD->isAnonymousStructOrUnion())
581// If Base is an implicit CXXThis, then the whole MemberExpr has no
582// tokens. If it's a normal e.g. DeclRef, we treat the MemberExpr like
583// an implicit cast.
584return isImplicit(ME->getBase());
585}
586// Refs to operator() and [] are (almost?) always implicit as part of calls.
587if (auto *DRE = llvm::dyn_cast<DeclRefExpr>(S)) {
588if (auto *FD = llvm::dyn_cast<FunctionDecl>(DRE->getDecl())) {
589switch (FD->getOverloadedOperator()) {
590case OO_Call:
591case OO_Subscript:
592return true;
593default:
594break;
595}
596}
597}
598return false;
599}
600
601// We find the selection by visiting written nodes in the AST, looking for nodes
602// that intersect with the selected character range.
603//
604// While traversing, we maintain a parent stack. As nodes pop off the stack,
605// we decide whether to keep them or not. To be kept, they must either be
606// selected or contain some nodes that are.
607//
608// For simple cases (not inside macros) we prune subtrees that don't intersect.
609class SelectionVisitor : public RecursiveASTVisitor<SelectionVisitor> {
610public:
611// Runs the visitor to gather selected nodes and their ancestors.
612// If there is any selection, the root (TUDecl) is the first node.
613static std::deque<Node> collect(ASTContext &AST,
614const syntax::TokenBuffer &Tokens,
615const PrintingPolicy &PP, unsigned Begin,
616unsigned End, FileID File) {
617SelectionVisitor V(AST, Tokens, PP, Begin, End, File);
618V.TraverseAST(AST);
619assert(V.Stack.size() == 1 && "Unpaired push/pop?");
620assert(V.Stack.top() == &V.Nodes.front());
621return std::move(V.Nodes);
622}
623
624// We traverse all "well-behaved" nodes the same way:
625// - push the node onto the stack
626// - traverse its children recursively
627// - pop it from the stack
628// - hit testing: is intersection(node, selection) - union(children) empty?
629// - attach it to the tree if it or any children hit the selection
630//
631// Two categories of nodes are not "well-behaved":
632// - those without source range information, we don't record those
633// - those that can't be stored in DynTypedNode.
634bool TraverseDecl(Decl *X) {
635if (llvm::isa_and_nonnull<TranslationUnitDecl>(X))
636return Base::TraverseDecl(X); // Already pushed by constructor.
637// Base::TraverseDecl will suppress children, but not this node itself.
638if (X && X->isImplicit()) {
639// Most implicit nodes have only implicit children and can be skipped.
640// However there are exceptions (`void foo(Concept auto x)`), and
641// the base implementation knows how to find them.
642return Base::TraverseDecl(X);
643}
644return traverseNode(X, [&] { return Base::TraverseDecl(X); });
645}
646bool TraverseTypeLoc(TypeLoc X) {
647return traverseNode(&X, [&] { return Base::TraverseTypeLoc(X); });
648}
649bool TraverseTemplateArgumentLoc(const TemplateArgumentLoc &X) {
650return traverseNode(&X,
651[&] { return Base::TraverseTemplateArgumentLoc(X); });
652}
653bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc X) {
654return traverseNode(
655&X, [&] { return Base::TraverseNestedNameSpecifierLoc(X); });
656}
657bool TraverseConstructorInitializer(CXXCtorInitializer *X) {
658return traverseNode(
659X, [&] { return Base::TraverseConstructorInitializer(X); });
660}
661bool TraverseCXXBaseSpecifier(const CXXBaseSpecifier &X) {
662return traverseNode(&X, [&] { return Base::TraverseCXXBaseSpecifier(X); });
663}
664bool TraverseAttr(Attr *X) {
665return traverseNode(X, [&] { return Base::TraverseAttr(X); });
666}
667bool TraverseConceptReference(ConceptReference *X) {
668return traverseNode(X, [&] { return Base::TraverseConceptReference(X); });
669}
670// Stmt is the same, but this form allows the data recursion optimization.
671bool dataTraverseStmtPre(Stmt *X) {
672if (!X || isImplicit(X))
673return false;
674auto N = DynTypedNode::create(*X);
675if (canSafelySkipNode(N))
676return false;
677push(std::move(N));
678if (shouldSkipChildren(X)) {
679pop();
680return false;
681}
682return true;
683}
684bool dataTraverseStmtPost(Stmt *X) {
685pop();
686return true;
687}
688// QualifiedTypeLoc is handled strangely in RecursiveASTVisitor: the derived
689// TraverseTypeLoc is not called for the inner UnqualTypeLoc.
690// This means we'd never see 'int' in 'const int'! Work around that here.
691// (The reason for the behavior is to avoid traversing the nested Type twice,
692// but we ignore TraverseType anyway).
693bool TraverseQualifiedTypeLoc(QualifiedTypeLoc QX) {
694return traverseNode<TypeLoc>(
695&QX, [&] { return TraverseTypeLoc(QX.getUnqualifiedLoc()); });
696}
697bool TraverseObjCProtocolLoc(ObjCProtocolLoc PL) {
698return traverseNode(&PL, [&] { return Base::TraverseObjCProtocolLoc(PL); });
699}
700// Uninteresting parts of the AST that don't have locations within them.
701bool TraverseNestedNameSpecifier(NestedNameSpecifier *) { return true; }
702bool TraverseType(QualType) { return true; }
703
704// The DeclStmt for the loop variable claims to cover the whole range
705// inside the parens, this causes the range-init expression to not be hit.
706// Traverse the loop VarDecl instead, which has the right source range.
707bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) {
708return traverseNode(S, [&] {
709return TraverseStmt(S->getInit()) && TraverseDecl(S->getLoopVariable()) &&
710TraverseStmt(S->getRangeInit()) && TraverseStmt(S->getBody());
711});
712}
713// OpaqueValueExpr blocks traversal, we must explicitly traverse it.
714bool TraverseOpaqueValueExpr(OpaqueValueExpr *E) {
715return traverseNode(E, [&] { return TraverseStmt(E->getSourceExpr()); });
716}
717// We only want to traverse the *syntactic form* to understand the selection.
718bool TraversePseudoObjectExpr(PseudoObjectExpr *E) {
719return traverseNode(E, [&] { return TraverseStmt(E->getSyntacticForm()); });
720}
721bool TraverseTypeConstraint(const TypeConstraint *C) {
722if (auto *E = C->getImmediatelyDeclaredConstraint()) {
723// Technically this expression is 'implicit' and not traversed by the RAV.
724// However, the range is correct, so we visit expression to avoid adding
725// an extra kind to 'DynTypeNode' that hold 'TypeConstraint'.
726return TraverseStmt(E);
727}
728return Base::TraverseTypeConstraint(C);
729}
730
731// Override child traversal for certain node types.
732using RecursiveASTVisitor::getStmtChildren;
733// PredefinedExpr like __func__ has a StringLiteral child for its value.
734// It's not written, so don't traverse it.
735Stmt::child_range getStmtChildren(PredefinedExpr *) {
736return {StmtIterator{}, StmtIterator{}};
737}
738
739private:
740using Base = RecursiveASTVisitor<SelectionVisitor>;
741
742SelectionVisitor(ASTContext &AST, const syntax::TokenBuffer &Tokens,
743const PrintingPolicy &PP, unsigned SelBegin, unsigned SelEnd,
744FileID SelFile)
745: SM(AST.getSourceManager()), LangOpts(AST.getLangOpts()),
746#ifndef NDEBUG
747PrintPolicy(PP),
748#endif
749TokenBuf(Tokens), SelChecker(Tokens, SelFile, SelBegin, SelEnd, SM),
750UnclaimedExpandedTokens(Tokens.expandedTokens()) {
751// Ensure we have a node for the TU decl, regardless of traversal scope.
752Nodes.emplace_back();
753Nodes.back().ASTNode = DynTypedNode::create(*AST.getTranslationUnitDecl());
754Nodes.back().Parent = nullptr;
755Nodes.back().Selected = SelectionTree::Unselected;
756Stack.push(&Nodes.back());
757}
758
759// Generic case of TraverseFoo. Func should be the call to Base::TraverseFoo.
760// Node is always a pointer so the generic code can handle any null checks.
761template <typename T, typename Func>
762bool traverseNode(T *Node, const Func &Body) {
763if (Node == nullptr)
764return true;
765auto N = DynTypedNode::create(*Node);
766if (canSafelySkipNode(N))
767return true;
768push(DynTypedNode::create(*Node));
769bool Ret = Body();
770pop();
771return Ret;
772}
773
774// HIT TESTING
775//
776// We do rough hit testing on the way down the tree to avoid traversing
777// subtrees that don't touch the selection (canSafelySkipNode), but
778// fine-grained hit-testing is mostly done on the way back up (in pop()).
779// This means children get to claim parts of the selection first, and parents
780// are only selected if they own tokens that no child owned.
781//
782// Nodes *usually* nest nicely: a child's getSourceRange() lies within the
783// parent's, and a node (transitively) owns all tokens in its range.
784//
785// Exception 1: when declarators nest, *inner* declarator is the *outer* type.
786// e.g. void foo[5](int) is an array of functions.
787// To handle this case, declarators are careful to only claim the tokens they
788// own, rather than claim a range and rely on claim ordering.
789//
790// Exception 2: siblings both claim the same node.
791// e.g. `int x, y;` produces two sibling VarDecls.
792// ~~~~~ x
793// ~~~~~~~~ y
794// Here the first ("leftmost") sibling claims the tokens it wants, and the
795// other sibling gets what's left. So selecting "int" only includes the left
796// VarDecl in the selection tree.
797
798// An optimization for a common case: nodes outside macro expansions that
799// don't intersect the selection may be recursively skipped.
800bool canSafelySkipNode(const DynTypedNode &N) {
801SourceRange S = getSourceRange(N);
802if (auto *TL = N.get<TypeLoc>()) {
803// FIXME: TypeLoc::getBeginLoc()/getEndLoc() are pretty fragile
804// heuristics. We should consider only pruning critical TypeLoc nodes, to
805// be more robust.
806
807// AttributedTypeLoc may point to the attribute's range, NOT the modified
808// type's range.
809if (auto AT = TL->getAs<AttributedTypeLoc>())
810S = AT.getModifiedLoc().getSourceRange();
811}
812// SourceRange often doesn't manage to accurately cover attributes.
813// Fortunately, attributes are rare.
814if (llvm::any_of(getAttributes(N),
815[](const Attr *A) { return !A->isImplicit(); }))
816return false;
817if (!SelChecker.mayHit(S)) {
818dlog("{2}skip: {0} {1}", printNodeToString(N, PrintPolicy),
819S.printToString(SM), indent());
820return true;
821}
822return false;
823}
824
825// There are certain nodes we want to treat as leaves in the SelectionTree,
826// although they do have children.
827bool shouldSkipChildren(const Stmt *X) const {
828// UserDefinedLiteral (e.g. 12_i) has two children (12 and _i).
829// Unfortunately TokenBuffer sees 12_i as one token and can't split it.
830// So we treat UserDefinedLiteral as a leaf node, owning the token.
831return llvm::isa<UserDefinedLiteral>(X);
832}
833
834// Pushes a node onto the ancestor stack. Pairs with pop().
835// Performs early hit detection for some nodes (on the earlySourceRange).
836void push(DynTypedNode Node) {
837SourceRange Early = earlySourceRange(Node);
838dlog("{2}push: {0} {1}", printNodeToString(Node, PrintPolicy),
839Node.getSourceRange().printToString(SM), indent());
840Nodes.emplace_back();
841Nodes.back().ASTNode = std::move(Node);
842Nodes.back().Parent = Stack.top();
843Nodes.back().Selected = NoTokens;
844Stack.push(&Nodes.back());
845claimRange(Early, Nodes.back().Selected);
846}
847
848// Pops a node off the ancestor stack, and finalizes it. Pairs with push().
849// Performs primary hit detection.
850void pop() {
851Node &N = *Stack.top();
852dlog("{1}pop: {0}", printNodeToString(N.ASTNode, PrintPolicy), indent(-1));
853claimTokensFor(N.ASTNode, N.Selected);
854if (N.Selected == NoTokens)
855N.Selected = SelectionTree::Unselected;
856if (N.Selected || !N.Children.empty()) {
857// Attach to the tree.
858N.Parent->Children.push_back(&N);
859} else {
860// Neither N any children are selected, it doesn't belong in the tree.
861assert(&N == &Nodes.back());
862Nodes.pop_back();
863}
864Stack.pop();
865}
866
867// Returns the range of tokens that this node will claim directly, and
868// is not available to the node's children.
869// Usually empty, but sometimes children cover tokens but shouldn't own them.
870SourceRange earlySourceRange(const DynTypedNode &N) {
871if (const Decl *VD = N.get<VarDecl>()) {
872// We want the name in the var-decl to be claimed by the decl itself and
873// not by any children. Ususally, we don't need this, because source
874// ranges of children are not overlapped with their parent's.
875// An exception is lambda captured var decl, where AutoTypeLoc is
876// overlapped with the name loc.
877// auto fun = [bar = foo]() { ... }
878// ~~~~~~~~~ VarDecl
879// ~~~ |- AutoTypeLoc
880return VD->getLocation();
881}
882
883// When referring to a destructor ~Foo(), attribute Foo to the destructor
884// rather than the TypeLoc nested inside it.
885// We still traverse the TypeLoc, because it may contain other targeted
886// things like the T in ~Foo<T>().
887if (const auto *CDD = N.get<CXXDestructorDecl>())
888return CDD->getNameInfo().getNamedTypeInfo()->getTypeLoc().getBeginLoc();
889if (const auto *ME = N.get<MemberExpr>()) {
890auto NameInfo = ME->getMemberNameInfo();
891if (NameInfo.getName().getNameKind() ==
892DeclarationName::CXXDestructorName)
893return NameInfo.getNamedTypeInfo()->getTypeLoc().getBeginLoc();
894}
895
896return SourceRange();
897}
898
899// Claim tokens for N, after processing its children.
900// By default this claims all unclaimed tokens in getSourceRange().
901// We override this if we want to claim fewer tokens (e.g. there are gaps).
902void claimTokensFor(const DynTypedNode &N, SelectionTree::Selection &Result) {
903// CXXConstructExpr often shows implicit construction, like `string s;`.
904// Don't associate any tokens with it unless there's some syntax like {}.
905// This prevents it from claiming 's', its primary location.
906if (const auto *CCE = N.get<CXXConstructExpr>()) {
907claimRange(CCE->getParenOrBraceRange(), Result);
908return;
909}
910// ExprWithCleanups is always implicit. It often wraps CXXConstructExpr.
911// Prevent it claiming 's' in the case above.
912if (N.get<ExprWithCleanups>())
913return;
914
915// Declarators nest "inside out", with parent types inside child ones.
916// Instead of claiming the whole range (clobbering parent tokens), carefully
917// claim the tokens owned by this node and non-declarator children.
918// (We could manipulate traversal order instead, but this is easier).
919//
920// Non-declarator types nest normally, and are handled like other nodes.
921//
922// Example:
923// Vec<R<int>(*[2])(A<char>)> is a Vec of arrays of pointers to functions,
924// which accept A<char> and return R<int>.
925// The TypeLoc hierarchy:
926// Vec<R<int>(*[2])(A<char>)> m;
927// Vec<#####################> TemplateSpecialization Vec
928// --------[2]---------- `-Array
929// -------*------------- `-Pointer
930// ------(----)--------- `-Paren
931// ------------(#######) `-Function
932// R<###> |-TemplateSpecialization R
933// int | `-Builtin int
934// A<####> `-TemplateSpecialization A
935// char `-Builtin char
936//
937// In each row
938// --- represents unclaimed parts of the SourceRange.
939// ### represents parts that children already claimed.
940if (const auto *TL = N.get<TypeLoc>()) {
941if (auto PTL = TL->getAs<ParenTypeLoc>()) {
942claimRange(PTL.getLParenLoc(), Result);
943claimRange(PTL.getRParenLoc(), Result);
944return;
945}
946if (auto ATL = TL->getAs<ArrayTypeLoc>()) {
947claimRange(ATL.getBracketsRange(), Result);
948return;
949}
950if (auto PTL = TL->getAs<PointerTypeLoc>()) {
951claimRange(PTL.getStarLoc(), Result);
952return;
953}
954if (auto FTL = TL->getAs<FunctionTypeLoc>()) {
955claimRange(SourceRange(FTL.getLParenLoc(), FTL.getEndLoc()), Result);
956return;
957}
958}
959claimRange(getSourceRange(N), Result);
960}
961
962// Perform hit-testing of a complete Node against the selection.
963// This runs for every node in the AST, and must be fast in common cases.
964// This is usually called from pop(), so we can take children into account.
965// The existing state of Result is relevant.
966void claimRange(SourceRange S, SelectionTree::Selection &Result) {
967for (const auto &ClaimedRange :
968UnclaimedExpandedTokens.erase(TokenBuf.expandedTokens(S)))
969update(Result, SelChecker.test(ClaimedRange));
970
971if (Result && Result != NoTokens)
972dlog("{1}hit selection: {0}", S.printToString(SM), indent());
973}
974
975std::string indent(int Offset = 0) {
976// Cast for signed arithmetic.
977int Amount = int(Stack.size()) + Offset;
978assert(Amount >= 0);
979return std::string(Amount, ' ');
980}
981
982SourceManager &SM;
983const LangOptions &LangOpts;
984#ifndef NDEBUG
985const PrintingPolicy &PrintPolicy;
986#endif
987const syntax::TokenBuffer &TokenBuf;
988std::stack<Node *> Stack;
989SelectionTester SelChecker;
990IntervalSet<syntax::Token> UnclaimedExpandedTokens;
991std::deque<Node> Nodes; // Stable pointers as we add more nodes.
992};
993
994} // namespace
995
996llvm::SmallString<256> abbreviatedString(DynTypedNode N,
997const PrintingPolicy &PP) {
998llvm::SmallString<256> Result;
999{
1000llvm::raw_svector_ostream OS(Result);
1001N.print(OS, PP);
1002}
1003auto Pos = Result.find('\n');
1004if (Pos != llvm::StringRef::npos) {
1005bool MoreText = !llvm::all_of(Result.str().drop_front(Pos), llvm::isSpace);
1006Result.resize(Pos);
1007if (MoreText)
1008Result.append(" …");
1009}
1010return Result;
1011}
1012
1013void SelectionTree::print(llvm::raw_ostream &OS, const SelectionTree::Node &N,
1014int Indent) const {
1015if (N.Selected)
1016OS.indent(Indent - 1) << (N.Selected == SelectionTree::Complete ? '*'
1017: '.');
1018else
1019OS.indent(Indent);
1020printNodeKind(OS, N.ASTNode);
1021OS << ' ' << abbreviatedString(N.ASTNode, PrintPolicy) << "\n";
1022for (const Node *Child : N.Children)
1023print(OS, *Child, Indent + 2);
1024}
1025
1026std::string SelectionTree::Node::kind() const {
1027std::string S;
1028llvm::raw_string_ostream OS(S);
1029printNodeKind(OS, ASTNode);
1030return std::move(OS.str());
1031}
1032
1033// Decide which selections emulate a "point" query in between characters.
1034// If it's ambiguous (the neighboring characters are selectable tokens), returns
1035// both possibilities in preference order.
1036// Always returns at least one range - if no tokens touched, and empty range.
1037static llvm::SmallVector<std::pair<unsigned, unsigned>, 2>
1038pointBounds(unsigned Offset, const syntax::TokenBuffer &Tokens) {
1039const auto &SM = Tokens.sourceManager();
1040SourceLocation Loc = SM.getComposedLoc(SM.getMainFileID(), Offset);
1041llvm::SmallVector<std::pair<unsigned, unsigned>, 2> Result;
1042// Prefer right token over left.
1043for (const syntax::Token &Tok :
1044llvm::reverse(spelledTokensTouching(Loc, Tokens))) {
1045if (shouldIgnore(Tok))
1046continue;
1047unsigned Offset = Tokens.sourceManager().getFileOffset(Tok.location());
1048Result.emplace_back(Offset, Offset + Tok.length());
1049}
1050if (Result.empty())
1051Result.emplace_back(Offset, Offset);
1052return Result;
1053}
1054
1055bool SelectionTree::createEach(ASTContext &AST,
1056const syntax::TokenBuffer &Tokens,
1057unsigned Begin, unsigned End,
1058llvm::function_ref<bool(SelectionTree)> Func) {
1059if (Begin != End)
1060return Func(SelectionTree(AST, Tokens, Begin, End));
1061for (std::pair<unsigned, unsigned> Bounds : pointBounds(Begin, Tokens))
1062if (Func(SelectionTree(AST, Tokens, Bounds.first, Bounds.second)))
1063return true;
1064return false;
1065}
1066
1067SelectionTree SelectionTree::createRight(ASTContext &AST,
1068const syntax::TokenBuffer &Tokens,
1069unsigned int Begin, unsigned int End) {
1070std::optional<SelectionTree> Result;
1071createEach(AST, Tokens, Begin, End, [&](SelectionTree T) {
1072Result = std::move(T);
1073return true;
1074});
1075return std::move(*Result);
1076}
1077
1078SelectionTree::SelectionTree(ASTContext &AST, const syntax::TokenBuffer &Tokens,
1079unsigned Begin, unsigned End)
1080: PrintPolicy(AST.getLangOpts()) {
1081// No fundamental reason the selection needs to be in the main file,
1082// but that's all clangd has needed so far.
1083const SourceManager &SM = AST.getSourceManager();
1084FileID FID = SM.getMainFileID();
1085PrintPolicy.TerseOutput = true;
1086PrintPolicy.IncludeNewlines = false;
1087
1088dlog("Computing selection for {0}",
1089SourceRange(SM.getComposedLoc(FID, Begin), SM.getComposedLoc(FID, End))
1090.printToString(SM));
1091Nodes = SelectionVisitor::collect(AST, Tokens, PrintPolicy, Begin, End, FID);
1092Root = Nodes.empty() ? nullptr : &Nodes.front();
1093recordMetrics(*this, AST.getLangOpts());
1094dlog("Built selection tree\n{0}", *this);
1095}
1096
1097const Node *SelectionTree::commonAncestor() const {
1098const Node *Ancestor = Root;
1099while (Ancestor->Children.size() == 1 && !Ancestor->Selected)
1100Ancestor = Ancestor->Children.front();
1101// Returning nullptr here is a bit unprincipled, but it makes the API safer:
1102// the TranslationUnitDecl contains all of the preamble, so traversing it is a
1103// performance cliff. Callers can check for null and use root() if they want.
1104return Ancestor != Root ? Ancestor : nullptr;
1105}
1106
1107const DeclContext &SelectionTree::Node::getDeclContext() const {
1108for (const Node *CurrentNode = this; CurrentNode != nullptr;
1109CurrentNode = CurrentNode->Parent) {
1110if (const Decl *Current = CurrentNode->ASTNode.get<Decl>()) {
1111if (CurrentNode != this)
1112if (auto *DC = dyn_cast<DeclContext>(Current))
1113return *DC;
1114return *Current->getLexicalDeclContext();
1115}
1116if (const auto *LE = CurrentNode->ASTNode.get<LambdaExpr>())
1117if (CurrentNode != this)
1118return *LE->getCallOperator();
1119}
1120llvm_unreachable("A tree must always be rooted at TranslationUnitDecl.");
1121}
1122
1123const SelectionTree::Node &SelectionTree::Node::ignoreImplicit() const {
1124if (Children.size() == 1 &&
1125getSourceRange(Children.front()->ASTNode) == getSourceRange(ASTNode))
1126return Children.front()->ignoreImplicit();
1127return *this;
1128}
1129
1130const SelectionTree::Node &SelectionTree::Node::outerImplicit() const {
1131if (Parent && getSourceRange(Parent->ASTNode) == getSourceRange(ASTNode))
1132return Parent->outerImplicit();
1133return *this;
1134}
1135
1136} // namespace clangd
1137} // namespace clang
1138