llvm-project
572 строки · 20.0 Кб
1//===--- Quality.cpp ---------------------------------------------*- C++-*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "Quality.h"
10#include "AST.h"
11#include "ASTSignals.h"
12#include "FileDistance.h"
13#include "SourceCode.h"
14#include "index/Symbol.h"
15#include "clang/AST/ASTContext.h"
16#include "clang/AST/Decl.h"
17#include "clang/AST/DeclCXX.h"
18#include "clang/AST/DeclTemplate.h"
19#include "clang/AST/DeclVisitor.h"
20#include "clang/Basic/SourceManager.h"
21#include "clang/Sema/CodeCompleteConsumer.h"
22#include "llvm/ADT/StringRef.h"
23#include "llvm/Support/Casting.h"
24#include "llvm/Support/FormatVariadic.h"
25#include "llvm/Support/MathExtras.h"
26#include "llvm/Support/raw_ostream.h"
27#include <algorithm>
28#include <cmath>
29#include <optional>
30
31namespace clang {
32namespace clangd {
33
34static bool hasDeclInMainFile(const Decl &D) {
35auto &SourceMgr = D.getASTContext().getSourceManager();
36for (auto *Redecl : D.redecls()) {
37if (isInsideMainFile(Redecl->getLocation(), SourceMgr))
38return true;
39}
40return false;
41}
42
43static bool hasUsingDeclInMainFile(const CodeCompletionResult &R) {
44const auto &Context = R.Declaration->getASTContext();
45const auto &SourceMgr = Context.getSourceManager();
46if (R.ShadowDecl) {
47if (isInsideMainFile(R.ShadowDecl->getLocation(), SourceMgr))
48return true;
49}
50return false;
51}
52
53static SymbolQualitySignals::SymbolCategory categorize(const NamedDecl &ND) {
54if (const auto *FD = dyn_cast<FunctionDecl>(&ND)) {
55if (FD->isOverloadedOperator())
56return SymbolQualitySignals::Operator;
57}
58class Switch
59: public ConstDeclVisitor<Switch, SymbolQualitySignals::SymbolCategory> {
60public:
61#define MAP(DeclType, Category) \
62SymbolQualitySignals::SymbolCategory Visit##DeclType(const DeclType *) { \
63return SymbolQualitySignals::Category; \
64}
65MAP(NamespaceDecl, Namespace);
66MAP(NamespaceAliasDecl, Namespace);
67MAP(TypeDecl, Type);
68MAP(TypeAliasTemplateDecl, Type);
69MAP(ClassTemplateDecl, Type);
70MAP(CXXConstructorDecl, Constructor);
71MAP(CXXDestructorDecl, Destructor);
72MAP(ValueDecl, Variable);
73MAP(VarTemplateDecl, Variable);
74MAP(FunctionDecl, Function);
75MAP(FunctionTemplateDecl, Function);
76MAP(Decl, Unknown);
77#undef MAP
78};
79return Switch().Visit(&ND);
80}
81
82static SymbolQualitySignals::SymbolCategory
83categorize(const CodeCompletionResult &R) {
84if (R.Declaration)
85return categorize(*R.Declaration);
86if (R.Kind == CodeCompletionResult::RK_Macro)
87return SymbolQualitySignals::Macro;
88// Everything else is a keyword or a pattern. Patterns are mostly keywords
89// too, except a few which we recognize by cursor kind.
90switch (R.CursorKind) {
91case CXCursor_CXXMethod:
92return SymbolQualitySignals::Function;
93case CXCursor_ModuleImportDecl:
94return SymbolQualitySignals::Namespace;
95case CXCursor_MacroDefinition:
96return SymbolQualitySignals::Macro;
97case CXCursor_TypeRef:
98return SymbolQualitySignals::Type;
99case CXCursor_MemberRef:
100return SymbolQualitySignals::Variable;
101case CXCursor_Constructor:
102return SymbolQualitySignals::Constructor;
103default:
104return SymbolQualitySignals::Keyword;
105}
106}
107
108static SymbolQualitySignals::SymbolCategory
109categorize(const index::SymbolInfo &D) {
110switch (D.Kind) {
111case index::SymbolKind::Namespace:
112case index::SymbolKind::NamespaceAlias:
113return SymbolQualitySignals::Namespace;
114case index::SymbolKind::Macro:
115return SymbolQualitySignals::Macro;
116case index::SymbolKind::Enum:
117case index::SymbolKind::Struct:
118case index::SymbolKind::Class:
119case index::SymbolKind::Protocol:
120case index::SymbolKind::Extension:
121case index::SymbolKind::Union:
122case index::SymbolKind::TypeAlias:
123case index::SymbolKind::TemplateTypeParm:
124case index::SymbolKind::TemplateTemplateParm:
125case index::SymbolKind::Concept:
126return SymbolQualitySignals::Type;
127case index::SymbolKind::Function:
128case index::SymbolKind::ClassMethod:
129case index::SymbolKind::InstanceMethod:
130case index::SymbolKind::StaticMethod:
131case index::SymbolKind::InstanceProperty:
132case index::SymbolKind::ClassProperty:
133case index::SymbolKind::StaticProperty:
134case index::SymbolKind::ConversionFunction:
135return SymbolQualitySignals::Function;
136case index::SymbolKind::Destructor:
137return SymbolQualitySignals::Destructor;
138case index::SymbolKind::Constructor:
139return SymbolQualitySignals::Constructor;
140case index::SymbolKind::Variable:
141case index::SymbolKind::Field:
142case index::SymbolKind::EnumConstant:
143case index::SymbolKind::Parameter:
144case index::SymbolKind::NonTypeTemplateParm:
145return SymbolQualitySignals::Variable;
146case index::SymbolKind::Using:
147case index::SymbolKind::Module:
148case index::SymbolKind::Unknown:
149return SymbolQualitySignals::Unknown;
150}
151llvm_unreachable("Unknown index::SymbolKind");
152}
153
154static bool isInstanceMember(const NamedDecl *ND) {
155if (!ND)
156return false;
157if (const auto *TP = dyn_cast<FunctionTemplateDecl>(ND))
158ND = TP->TemplateDecl::getTemplatedDecl();
159if (const auto *CM = dyn_cast<CXXMethodDecl>(ND))
160return !CM->isStatic();
161return isa<FieldDecl>(ND); // Note that static fields are VarDecl.
162}
163
164static bool isInstanceMember(const index::SymbolInfo &D) {
165switch (D.Kind) {
166case index::SymbolKind::InstanceMethod:
167case index::SymbolKind::InstanceProperty:
168case index::SymbolKind::Field:
169return true;
170default:
171return false;
172}
173}
174
175void SymbolQualitySignals::merge(const CodeCompletionResult &SemaCCResult) {
176Deprecated |= (SemaCCResult.Availability == CXAvailability_Deprecated);
177Category = categorize(SemaCCResult);
178
179if (SemaCCResult.Declaration) {
180ImplementationDetail |= isImplementationDetail(SemaCCResult.Declaration);
181if (auto *ID = SemaCCResult.Declaration->getIdentifier())
182ReservedName = ReservedName || isReservedName(ID->getName());
183} else if (SemaCCResult.Kind == CodeCompletionResult::RK_Macro)
184ReservedName =
185ReservedName || isReservedName(SemaCCResult.Macro->getName());
186}
187
188void SymbolQualitySignals::merge(const Symbol &IndexResult) {
189Deprecated |= (IndexResult.Flags & Symbol::Deprecated);
190ImplementationDetail |= (IndexResult.Flags & Symbol::ImplementationDetail);
191References = std::max(IndexResult.References, References);
192Category = categorize(IndexResult.SymInfo);
193ReservedName = ReservedName || isReservedName(IndexResult.Name);
194}
195
196float SymbolQualitySignals::evaluateHeuristics() const {
197float Score = 1;
198
199// This avoids a sharp gradient for tail symbols, and also neatly avoids the
200// question of whether 0 references means a bad symbol or missing data.
201if (References >= 10) {
202// Use a sigmoid style boosting function, which flats out nicely for large
203// numbers (e.g. 2.58 for 1M references).
204// The following boosting function is equivalent to:
205// m = 0.06
206// f = 12.0
207// boost = f * sigmoid(m * std::log(References)) - 0.5 * f + 0.59
208// Sample data points: (10, 1.00), (100, 1.41), (1000, 1.82),
209// (10K, 2.21), (100K, 2.58), (1M, 2.94)
210float S = std::pow(References, -0.06);
211Score *= 6.0 * (1 - S) / (1 + S) + 0.59;
212}
213
214if (Deprecated)
215Score *= 0.1f;
216if (ReservedName)
217Score *= 0.1f;
218if (ImplementationDetail)
219Score *= 0.2f;
220
221switch (Category) {
222case Keyword: // Often relevant, but misses most signals.
223Score *= 4; // FIXME: important keywords should have specific boosts.
224break;
225case Type:
226case Function:
227case Variable:
228Score *= 1.1f;
229break;
230case Namespace:
231Score *= 0.8f;
232break;
233case Macro:
234case Destructor:
235case Operator:
236Score *= 0.5f;
237break;
238case Constructor: // No boost constructors so they are after class types.
239case Unknown:
240break;
241}
242
243return Score;
244}
245
246llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
247const SymbolQualitySignals &S) {
248OS << llvm::formatv("=== Symbol quality: {0}\n", S.evaluateHeuristics());
249OS << llvm::formatv("\tReferences: {0}\n", S.References);
250OS << llvm::formatv("\tDeprecated: {0}\n", S.Deprecated);
251OS << llvm::formatv("\tReserved name: {0}\n", S.ReservedName);
252OS << llvm::formatv("\tImplementation detail: {0}\n", S.ImplementationDetail);
253OS << llvm::formatv("\tCategory: {0}\n", static_cast<int>(S.Category));
254return OS;
255}
256
257static SymbolRelevanceSignals::AccessibleScope
258computeScope(const NamedDecl *D) {
259// Injected "Foo" within the class "Foo" has file scope, not class scope.
260const DeclContext *DC = D->getDeclContext();
261if (auto *R = dyn_cast_or_null<RecordDecl>(D))
262if (R->isInjectedClassName())
263DC = DC->getParent();
264// Class constructor should have the same scope as the class.
265if (isa<CXXConstructorDecl>(D))
266DC = DC->getParent();
267bool InClass = false;
268for (; !DC->isFileContext(); DC = DC->getParent()) {
269if (DC->isFunctionOrMethod())
270return SymbolRelevanceSignals::FunctionScope;
271InClass = InClass || DC->isRecord();
272}
273if (InClass)
274return SymbolRelevanceSignals::ClassScope;
275// ExternalLinkage threshold could be tweaked, e.g. module-visible as global.
276// Avoid caching linkage if it may change after enclosing code completion.
277if (hasUnstableLinkage(D) || llvm::to_underlying(D->getLinkageInternal()) <
278llvm::to_underlying(Linkage::External))
279return SymbolRelevanceSignals::FileScope;
280return SymbolRelevanceSignals::GlobalScope;
281}
282
283void SymbolRelevanceSignals::merge(const Symbol &IndexResult) {
284SymbolURI = IndexResult.CanonicalDeclaration.FileURI;
285SymbolScope = IndexResult.Scope;
286IsInstanceMember |= isInstanceMember(IndexResult.SymInfo);
287if (!(IndexResult.Flags & Symbol::VisibleOutsideFile)) {
288Scope = AccessibleScope::FileScope;
289}
290if (MainFileSignals) {
291MainFileRefs =
292std::max(MainFileRefs,
293MainFileSignals->ReferencedSymbols.lookup(IndexResult.ID));
294ScopeRefsInFile =
295std::max(ScopeRefsInFile,
296MainFileSignals->RelatedNamespaces.lookup(IndexResult.Scope));
297}
298}
299
300void SymbolRelevanceSignals::computeASTSignals(
301const CodeCompletionResult &SemaResult) {
302if (!MainFileSignals)
303return;
304if ((SemaResult.Kind != CodeCompletionResult::RK_Declaration) &&
305(SemaResult.Kind != CodeCompletionResult::RK_Pattern))
306return;
307if (const NamedDecl *ND = SemaResult.getDeclaration()) {
308if (hasUnstableLinkage(ND))
309return;
310auto ID = getSymbolID(ND);
311if (!ID)
312return;
313MainFileRefs =
314std::max(MainFileRefs, MainFileSignals->ReferencedSymbols.lookup(ID));
315if (const auto *NSD = dyn_cast<NamespaceDecl>(ND->getDeclContext())) {
316if (NSD->isAnonymousNamespace())
317return;
318std::string Scope = printNamespaceScope(*NSD);
319if (!Scope.empty())
320ScopeRefsInFile = std::max(
321ScopeRefsInFile, MainFileSignals->RelatedNamespaces.lookup(Scope));
322}
323}
324}
325
326void SymbolRelevanceSignals::merge(const CodeCompletionResult &SemaCCResult) {
327if (SemaCCResult.Availability == CXAvailability_NotAvailable ||
328SemaCCResult.Availability == CXAvailability_NotAccessible)
329Forbidden = true;
330
331if (SemaCCResult.Declaration) {
332SemaSaysInScope = true;
333// We boost things that have decls in the main file. We give a fixed score
334// for all other declarations in sema as they are already included in the
335// translation unit.
336float DeclProximity = (hasDeclInMainFile(*SemaCCResult.Declaration) ||
337hasUsingDeclInMainFile(SemaCCResult))
338? 1.0
339: 0.6;
340SemaFileProximityScore = std::max(DeclProximity, SemaFileProximityScore);
341IsInstanceMember |= isInstanceMember(SemaCCResult.Declaration);
342InBaseClass |= SemaCCResult.InBaseClass;
343}
344
345computeASTSignals(SemaCCResult);
346// Declarations are scoped, others (like macros) are assumed global.
347if (SemaCCResult.Declaration)
348Scope = std::min(Scope, computeScope(SemaCCResult.Declaration));
349
350NeedsFixIts = !SemaCCResult.FixIts.empty();
351}
352
353static float fileProximityScore(unsigned FileDistance) {
354// Range: [0, 1]
355// FileDistance = [0, 1, 2, 3, 4, .., FileDistance::Unreachable]
356// Score = [1, 0.82, 0.67, 0.55, 0.45, .., 0]
357if (FileDistance == FileDistance::Unreachable)
358return 0;
359// Assume approximately default options are used for sensible scoring.
360return std::exp(FileDistance * -0.4f / FileDistanceOptions().UpCost);
361}
362
363static float scopeProximityScore(unsigned ScopeDistance) {
364// Range: [0.6, 2].
365// ScopeDistance = [0, 1, 2, 3, 4, 5, 6, 7, .., FileDistance::Unreachable]
366// Score = [2.0, 1.55, 1.2, 0.93, 0.72, 0.65, 0.65, 0.65, .., 0.6]
367if (ScopeDistance == FileDistance::Unreachable)
368return 0.6f;
369return std::max(0.65, 2.0 * std::pow(0.6, ScopeDistance / 2.0));
370}
371
372static std::optional<llvm::StringRef>
373wordMatching(llvm::StringRef Name, const llvm::StringSet<> *ContextWords) {
374if (ContextWords)
375for (const auto &Word : ContextWords->keys())
376if (Name.contains_insensitive(Word))
377return Word;
378return std::nullopt;
379}
380
381SymbolRelevanceSignals::DerivedSignals
382SymbolRelevanceSignals::calculateDerivedSignals() const {
383DerivedSignals Derived;
384Derived.NameMatchesContext = wordMatching(Name, ContextWords).has_value();
385Derived.FileProximityDistance = !FileProximityMatch || SymbolURI.empty()
386? FileDistance::Unreachable
387: FileProximityMatch->distance(SymbolURI);
388if (ScopeProximityMatch) {
389// For global symbol, the distance is 0.
390Derived.ScopeProximityDistance =
391SymbolScope ? ScopeProximityMatch->distance(*SymbolScope) : 0;
392}
393return Derived;
394}
395
396float SymbolRelevanceSignals::evaluateHeuristics() const {
397DerivedSignals Derived = calculateDerivedSignals();
398float Score = 1;
399
400if (Forbidden)
401return 0;
402
403Score *= NameMatch;
404
405// File proximity scores are [0,1] and we translate them into a multiplier in
406// the range from 1 to 3.
407Score *= 1 + 2 * std::max(fileProximityScore(Derived.FileProximityDistance),
408SemaFileProximityScore);
409
410if (ScopeProximityMatch)
411// Use a constant scope boost for sema results, as scopes of sema results
412// can be tricky (e.g. class/function scope). Set to the max boost as we
413// don't load top-level symbols from the preamble and sema results are
414// always in the accessible scope.
415Score *= SemaSaysInScope
416? 2.0
417: scopeProximityScore(Derived.ScopeProximityDistance);
418
419if (Derived.NameMatchesContext)
420Score *= 1.5;
421
422// Symbols like local variables may only be referenced within their scope.
423// Conversely if we're in that scope, it's likely we'll reference them.
424if (Query == CodeComplete) {
425// The narrower the scope where a symbol is visible, the more likely it is
426// to be relevant when it is available.
427switch (Scope) {
428case GlobalScope:
429break;
430case FileScope:
431Score *= 1.5f;
432break;
433case ClassScope:
434Score *= 2;
435break;
436case FunctionScope:
437Score *= 4;
438break;
439}
440} else {
441// For non-completion queries, the wider the scope where a symbol is
442// visible, the more likely it is to be relevant.
443switch (Scope) {
444case GlobalScope:
445break;
446case FileScope:
447Score *= 0.5f;
448break;
449default:
450// TODO: Handle other scopes as we start to use them for index results.
451break;
452}
453}
454
455if (TypeMatchesPreferred)
456Score *= 5.0;
457
458// Penalize non-instance members when they are accessed via a class instance.
459if (!IsInstanceMember &&
460(Context == CodeCompletionContext::CCC_DotMemberAccess ||
461Context == CodeCompletionContext::CCC_ArrowMemberAccess)) {
462Score *= 0.2f;
463}
464
465if (InBaseClass)
466Score *= 0.5f;
467
468// Penalize for FixIts.
469if (NeedsFixIts)
470Score *= 0.5f;
471
472// Use a sigmoid style boosting function similar to `References`, which flats
473// out nicely for large values. This avoids a sharp gradient for heavily
474// referenced symbols. Use smaller gradient for ScopeRefsInFile since ideally
475// MainFileRefs <= ScopeRefsInFile.
476if (MainFileRefs >= 2) {
477// E.g.: (2, 1.12), (9, 2.0), (48, 3.0).
478float S = std::pow(MainFileRefs, -0.11);
479Score *= 11.0 * (1 - S) / (1 + S) + 0.7;
480}
481if (ScopeRefsInFile >= 2) {
482// E.g.: (2, 1.04), (14, 2.0), (109, 3.0), (400, 3.6).
483float S = std::pow(ScopeRefsInFile, -0.10);
484Score *= 10.0 * (1 - S) / (1 + S) + 0.7;
485}
486
487return Score;
488}
489
490llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
491const SymbolRelevanceSignals &S) {
492OS << llvm::formatv("=== Symbol relevance: {0}\n", S.evaluateHeuristics());
493OS << llvm::formatv("\tName: {0}\n", S.Name);
494OS << llvm::formatv("\tName match: {0}\n", S.NameMatch);
495if (S.ContextWords)
496OS << llvm::formatv(
497"\tMatching context word: {0}\n",
498wordMatching(S.Name, S.ContextWords).value_or("<none>"));
499OS << llvm::formatv("\tForbidden: {0}\n", S.Forbidden);
500OS << llvm::formatv("\tNeedsFixIts: {0}\n", S.NeedsFixIts);
501OS << llvm::formatv("\tIsInstanceMember: {0}\n", S.IsInstanceMember);
502OS << llvm::formatv("\tInBaseClass: {0}\n", S.InBaseClass);
503OS << llvm::formatv("\tContext: {0}\n", getCompletionKindString(S.Context));
504OS << llvm::formatv("\tQuery type: {0}\n", static_cast<int>(S.Query));
505OS << llvm::formatv("\tScope: {0}\n", static_cast<int>(S.Scope));
506
507OS << llvm::formatv("\tSymbol URI: {0}\n", S.SymbolURI);
508OS << llvm::formatv("\tSymbol scope: {0}\n",
509S.SymbolScope ? *S.SymbolScope : "<None>");
510
511SymbolRelevanceSignals::DerivedSignals Derived = S.calculateDerivedSignals();
512if (S.FileProximityMatch) {
513unsigned Score = fileProximityScore(Derived.FileProximityDistance);
514OS << llvm::formatv("\tIndex URI proximity: {0} (distance={1})\n", Score,
515Derived.FileProximityDistance);
516}
517OS << llvm::formatv("\tSema file proximity: {0}\n", S.SemaFileProximityScore);
518
519OS << llvm::formatv("\tSema says in scope: {0}\n", S.SemaSaysInScope);
520if (S.ScopeProximityMatch)
521OS << llvm::formatv("\tIndex scope boost: {0}\n",
522scopeProximityScore(Derived.ScopeProximityDistance));
523
524OS << llvm::formatv(
525"\tType matched preferred: {0} (Context type: {1}, Symbol type: {2}\n",
526S.TypeMatchesPreferred, S.HadContextType, S.HadSymbolType);
527
528return OS;
529}
530
531float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance) {
532return SymbolQuality * SymbolRelevance;
533}
534
535// Produces an integer that sorts in the same order as F.
536// That is: a < b <==> encodeFloat(a) < encodeFloat(b).
537static uint32_t encodeFloat(float F) {
538static_assert(std::numeric_limits<float>::is_iec559);
539constexpr uint32_t TopBit = ~(~uint32_t{0} >> 1);
540
541// Get the bits of the float. Endianness is the same as for integers.
542uint32_t U = llvm::bit_cast<uint32_t>(F);
543// IEEE 754 floats compare like sign-magnitude integers.
544if (U & TopBit) // Negative float.
545return 0 - U; // Map onto the low half of integers, order reversed.
546return U + TopBit; // Positive floats map onto the high half of integers.
547}
548
549std::string sortText(float Score, llvm::StringRef Name) {
550// We convert -Score to an integer, and hex-encode for readability.
551// Example: [0.5, "foo"] -> "41000000foo"
552std::string S;
553llvm::raw_string_ostream OS(S);
554llvm::write_hex(OS, encodeFloat(-Score), llvm::HexPrintStyle::Lower,
555/*Width=*/2 * sizeof(Score));
556OS << Name;
557OS.flush();
558return S;
559}
560
561llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
562const SignatureQualitySignals &S) {
563OS << llvm::formatv("=== Signature Quality:\n");
564OS << llvm::formatv("\tNumber of parameters: {0}\n", S.NumberOfParameters);
565OS << llvm::formatv("\tNumber of optional parameters: {0}\n",
566S.NumberOfOptionalParameters);
567OS << llvm::formatv("\tKind: {0}\n", S.Kind);
568return OS;
569}
570
571} // namespace clangd
572} // namespace clang
573