llvm-project
601 строка · 22.8 Кб
1//===--- SortJavaScriptImports.cpp - Sort ES6 Imports -----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements a sort operation for JavaScript ES6 imports.
11///
12//===----------------------------------------------------------------------===//
13
14#include "SortJavaScriptImports.h"
15#include "TokenAnalyzer.h"
16#include "TokenAnnotator.h"
17#include "clang/Basic/Diagnostic.h"
18#include "clang/Basic/DiagnosticOptions.h"
19#include "clang/Basic/LLVM.h"
20#include "clang/Basic/SourceLocation.h"
21#include "clang/Basic/SourceManager.h"
22#include "clang/Basic/TokenKinds.h"
23#include "clang/Format/Format.h"
24#include "llvm/ADT/STLExtras.h"
25#include "llvm/ADT/SmallVector.h"
26#include "llvm/Support/Debug.h"
27#include <algorithm>
28#include <string>
29
30#define DEBUG_TYPE "format-formatter"
31
32namespace clang {
33namespace format {
34
35class FormatTokenLexer;
36
37// An imported symbol in a JavaScript ES6 import/export, possibly aliased.
38struct JsImportedSymbol {
39StringRef Symbol;
40StringRef Alias;
41SourceRange Range;
42
43bool operator==(const JsImportedSymbol &RHS) const {
44// Ignore Range for comparison, it is only used to stitch code together,
45// but imports at different code locations are still conceptually the same.
46return Symbol == RHS.Symbol && Alias == RHS.Alias;
47}
48};
49
50// An ES6 module reference.
51//
52// ES6 implements a module system, where individual modules (~= source files)
53// can reference other modules, either importing symbols from them, or exporting
54// symbols from them:
55// import {foo} from 'foo';
56// export {foo};
57// export {bar} from 'bar';
58//
59// `export`s with URLs are syntactic sugar for an import of the symbol from the
60// URL, followed by an export of the symbol, allowing this code to treat both
61// statements more or less identically, with the exception being that `export`s
62// are sorted last.
63//
64// imports and exports support individual symbols, but also a wildcard syntax:
65// import * as prefix from 'foo';
66// export * from 'bar';
67//
68// This struct represents both exports and imports to build up the information
69// required for sorting module references.
70struct JsModuleReference {
71bool FormattingOff = false;
72bool IsExport = false;
73bool IsTypeOnly = false;
74// Module references are sorted into these categories, in order.
75enum ReferenceCategory {
76SIDE_EFFECT, // "import 'something';"
77ABSOLUTE, // from 'something'
78RELATIVE_PARENT, // from '../*'
79RELATIVE, // from './*'
80ALIAS, // import X = A.B;
81};
82ReferenceCategory Category = ReferenceCategory::SIDE_EFFECT;
83// The URL imported, e.g. `import .. from 'url';`. Empty for `export {a, b};`.
84StringRef URL;
85// Prefix from "import * as prefix". Empty for symbol imports and `export *`.
86// Implies an empty names list.
87StringRef Prefix;
88// Default import from "import DefaultName from '...';".
89StringRef DefaultImport;
90// Symbols from `import {SymbolA, SymbolB, ...} from ...;`.
91SmallVector<JsImportedSymbol, 1> Symbols;
92// Whether some symbols were merged into this one. Controls if the module
93// reference needs re-formatting.
94bool SymbolsMerged = false;
95// The source location just after { and just before } in the import.
96// Extracted eagerly to allow modification of Symbols later on.
97SourceLocation SymbolsStart, SymbolsEnd;
98// Textual position of the import/export, including preceding and trailing
99// comments.
100SourceRange Range;
101};
102
103bool operator<(const JsModuleReference &LHS, const JsModuleReference &RHS) {
104if (LHS.IsExport != RHS.IsExport)
105return LHS.IsExport < RHS.IsExport;
106if (LHS.Category != RHS.Category)
107return LHS.Category < RHS.Category;
108if (LHS.Category == JsModuleReference::ReferenceCategory::SIDE_EFFECT ||
109LHS.Category == JsModuleReference::ReferenceCategory::ALIAS) {
110// Side effect imports and aliases might be ordering sensitive. Consider
111// them equal so that they maintain their relative order in the stable sort
112// below. This retains transitivity because LHS.Category == RHS.Category
113// here.
114return false;
115}
116// Empty URLs sort *last* (for export {...};).
117if (LHS.URL.empty() != RHS.URL.empty())
118return LHS.URL.empty() < RHS.URL.empty();
119if (int Res = LHS.URL.compare_insensitive(RHS.URL))
120return Res < 0;
121// '*' imports (with prefix) sort before {a, b, ...} imports.
122if (LHS.Prefix.empty() != RHS.Prefix.empty())
123return LHS.Prefix.empty() < RHS.Prefix.empty();
124if (LHS.Prefix != RHS.Prefix)
125return LHS.Prefix > RHS.Prefix;
126return false;
127}
128
129// JavaScriptImportSorter sorts JavaScript ES6 imports and exports. It is
130// implemented as a TokenAnalyzer because ES6 imports have substantial syntactic
131// structure, making it messy to sort them using regular expressions.
132class JavaScriptImportSorter : public TokenAnalyzer {
133public:
134JavaScriptImportSorter(const Environment &Env, const FormatStyle &Style)
135: TokenAnalyzer(Env, Style),
136FileContents(Env.getSourceManager().getBufferData(Env.getFileID())) {
137// FormatToken.Tok starts out in an uninitialized state.
138invalidToken.Tok.startToken();
139}
140
141std::pair<tooling::Replacements, unsigned>
142analyze(TokenAnnotator &Annotator,
143SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
144FormatTokenLexer &Tokens) override {
145tooling::Replacements Result;
146AffectedRangeMgr.computeAffectedLines(AnnotatedLines);
147
148const AdditionalKeywords &Keywords = Tokens.getKeywords();
149SmallVector<JsModuleReference, 16> References;
150AnnotatedLine *FirstNonImportLine;
151std::tie(References, FirstNonImportLine) =
152parseModuleReferences(Keywords, AnnotatedLines);
153
154if (References.empty())
155return {Result, 0};
156
157// The text range of all parsed imports, to be replaced later.
158SourceRange InsertionPoint = References[0].Range;
159InsertionPoint.setEnd(References[References.size() - 1].Range.getEnd());
160
161References = sortModuleReferences(References);
162
163std::string ReferencesText;
164for (unsigned I = 0, E = References.size(); I != E; ++I) {
165JsModuleReference Reference = References[I];
166appendReference(ReferencesText, Reference);
167if (I + 1 < E) {
168// Insert breaks between imports and exports.
169ReferencesText += "\n";
170// Separate imports groups with two line breaks, but keep all exports
171// in a single group.
172if (!Reference.IsExport &&
173(Reference.IsExport != References[I + 1].IsExport ||
174Reference.Category != References[I + 1].Category)) {
175ReferencesText += "\n";
176}
177}
178}
179StringRef PreviousText = getSourceText(InsertionPoint);
180if (ReferencesText == PreviousText)
181return {Result, 0};
182
183// The loop above might collapse previously existing line breaks between
184// import blocks, and thus shrink the file. SortIncludes must not shrink
185// overall source length as there is currently no re-calculation of ranges
186// after applying source sorting.
187// This loop just backfills trailing spaces after the imports, which are
188// harmless and will be stripped by the subsequent formatting pass.
189// FIXME: A better long term fix is to re-calculate Ranges after sorting.
190unsigned PreviousSize = PreviousText.size();
191while (ReferencesText.size() < PreviousSize)
192ReferencesText += " ";
193
194// Separate references from the main code body of the file.
195if (FirstNonImportLine && FirstNonImportLine->First->NewlinesBefore < 2 &&
196!(FirstNonImportLine->First->is(tok::comment) &&
197isClangFormatOn(FirstNonImportLine->First->TokenText.trim()))) {
198ReferencesText += "\n";
199}
200
201LLVM_DEBUG(llvm::dbgs() << "Replacing imports:\n"
202<< PreviousText << "\nwith:\n"
203<< ReferencesText << "\n");
204auto Err = Result.add(tooling::Replacement(
205Env.getSourceManager(), CharSourceRange::getCharRange(InsertionPoint),
206ReferencesText));
207// FIXME: better error handling. For now, just print error message and skip
208// the replacement for the release version.
209if (Err) {
210llvm::errs() << toString(std::move(Err)) << "\n";
211assert(false);
212}
213
214return {Result, 0};
215}
216
217private:
218FormatToken *Current = nullptr;
219FormatToken *LineEnd = nullptr;
220
221FormatToken invalidToken;
222
223StringRef FileContents;
224
225void skipComments() { Current = skipComments(Current); }
226
227FormatToken *skipComments(FormatToken *Tok) {
228while (Tok && Tok->is(tok::comment))
229Tok = Tok->Next;
230return Tok;
231}
232
233void nextToken() {
234Current = Current->Next;
235skipComments();
236if (!Current || Current == LineEnd->Next) {
237// Set the current token to an invalid token, so that further parsing on
238// this line fails.
239Current = &invalidToken;
240}
241}
242
243StringRef getSourceText(SourceRange Range) {
244return getSourceText(Range.getBegin(), Range.getEnd());
245}
246
247StringRef getSourceText(SourceLocation Begin, SourceLocation End) {
248const SourceManager &SM = Env.getSourceManager();
249return FileContents.substr(SM.getFileOffset(Begin),
250SM.getFileOffset(End) - SM.getFileOffset(Begin));
251}
252
253// Sorts the given module references.
254// Imports can have formatting disabled (FormattingOff), so the code below
255// skips runs of "no-formatting" module references, and sorts/merges the
256// references that have formatting enabled in individual chunks.
257SmallVector<JsModuleReference, 16>
258sortModuleReferences(const SmallVector<JsModuleReference, 16> &References) {
259// Sort module references.
260// Imports can have formatting disabled (FormattingOff), so the code below
261// skips runs of "no-formatting" module references, and sorts other
262// references per group.
263const auto *Start = References.begin();
264SmallVector<JsModuleReference, 16> ReferencesSorted;
265while (Start != References.end()) {
266while (Start != References.end() && Start->FormattingOff) {
267// Skip over all imports w/ disabled formatting.
268ReferencesSorted.push_back(*Start);
269++Start;
270}
271SmallVector<JsModuleReference, 16> SortChunk;
272while (Start != References.end() && !Start->FormattingOff) {
273// Skip over all imports w/ disabled formatting.
274SortChunk.push_back(*Start);
275++Start;
276}
277stable_sort(SortChunk);
278mergeModuleReferences(SortChunk);
279ReferencesSorted.insert(ReferencesSorted.end(), SortChunk.begin(),
280SortChunk.end());
281}
282return ReferencesSorted;
283}
284
285// Merge module references.
286// After sorting, find all references that import named symbols from the
287// same URL and merge their names. E.g.
288// import {X} from 'a';
289// import {Y} from 'a';
290// should be rewritten to:
291// import {X, Y} from 'a';
292// Note: this modifies the passed in ``References`` vector (by removing no
293// longer needed references).
294void mergeModuleReferences(SmallVector<JsModuleReference, 16> &References) {
295if (References.empty())
296return;
297JsModuleReference *PreviousReference = References.begin();
298auto *Reference = std::next(References.begin());
299while (Reference != References.end()) {
300// Skip:
301// import 'foo';
302// import * as foo from 'foo'; on either previous or this.
303// import Default from 'foo'; on either previous or this.
304// mismatching
305if (Reference->Category == JsModuleReference::SIDE_EFFECT ||
306PreviousReference->Category == JsModuleReference::SIDE_EFFECT ||
307Reference->IsExport != PreviousReference->IsExport ||
308Reference->IsTypeOnly != PreviousReference->IsTypeOnly ||
309!PreviousReference->Prefix.empty() || !Reference->Prefix.empty() ||
310!PreviousReference->DefaultImport.empty() ||
311!Reference->DefaultImport.empty() || Reference->Symbols.empty() ||
312PreviousReference->URL != Reference->URL) {
313PreviousReference = Reference;
314++Reference;
315continue;
316}
317// Merge symbols from identical imports.
318PreviousReference->Symbols.append(Reference->Symbols);
319PreviousReference->SymbolsMerged = true;
320// Remove the merged import.
321Reference = References.erase(Reference);
322}
323}
324
325// Appends ``Reference`` to ``Buffer``.
326void appendReference(std::string &Buffer, JsModuleReference &Reference) {
327if (Reference.FormattingOff) {
328Buffer +=
329getSourceText(Reference.Range.getBegin(), Reference.Range.getEnd());
330return;
331}
332// Sort the individual symbols within the import.
333// E.g. `import {b, a} from 'x';` -> `import {a, b} from 'x';`
334SmallVector<JsImportedSymbol, 1> Symbols = Reference.Symbols;
335stable_sort(Symbols,
336[&](const JsImportedSymbol &LHS, const JsImportedSymbol &RHS) {
337return LHS.Symbol.compare_insensitive(RHS.Symbol) < 0;
338});
339if (!Reference.SymbolsMerged && Symbols == Reference.Symbols) {
340// Symbols didn't change, just emit the entire module reference.
341StringRef ReferenceStmt = getSourceText(Reference.Range);
342Buffer += ReferenceStmt;
343return;
344}
345// Stitch together the module reference start...
346Buffer += getSourceText(Reference.Range.getBegin(), Reference.SymbolsStart);
347// ... then the references in order ...
348if (!Symbols.empty()) {
349Buffer += getSourceText(Symbols.front().Range);
350for (const JsImportedSymbol &Symbol : drop_begin(Symbols)) {
351Buffer += ",";
352Buffer += getSourceText(Symbol.Range);
353}
354}
355// ... followed by the module reference end.
356Buffer += getSourceText(Reference.SymbolsEnd, Reference.Range.getEnd());
357}
358
359// Parses module references in the given lines. Returns the module references,
360// and a pointer to the first "main code" line if that is adjacent to the
361// affected lines of module references, nullptr otherwise.
362std::pair<SmallVector<JsModuleReference, 16>, AnnotatedLine *>
363parseModuleReferences(const AdditionalKeywords &Keywords,
364SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
365SmallVector<JsModuleReference, 16> References;
366SourceLocation Start;
367AnnotatedLine *FirstNonImportLine = nullptr;
368bool AnyImportAffected = false;
369bool FormattingOff = false;
370for (auto *Line : AnnotatedLines) {
371assert(Line->First);
372Current = Line->First;
373LineEnd = Line->Last;
374// clang-format comments toggle formatting on/off.
375// This is tracked in FormattingOff here and on JsModuleReference.
376while (Current && Current->is(tok::comment)) {
377StringRef CommentText = Current->TokenText.trim();
378if (isClangFormatOff(CommentText)) {
379FormattingOff = true;
380} else if (isClangFormatOn(CommentText)) {
381FormattingOff = false;
382// Special case: consider a trailing "clang-format on" line to be part
383// of the module reference, so that it gets moved around together with
384// it (as opposed to the next module reference, which might get sorted
385// around).
386if (!References.empty()) {
387References.back().Range.setEnd(Current->Tok.getEndLoc());
388Start = Current->Tok.getEndLoc().getLocWithOffset(1);
389}
390}
391// Handle all clang-format comments on a line, e.g. for an empty block.
392Current = Current->Next;
393}
394skipComments();
395if (Start.isInvalid() || References.empty()) {
396// After the first file level comment, consider line comments to be part
397// of the import that immediately follows them by using the previously
398// set Start.
399Start = Line->First->Tok.getLocation();
400}
401if (!Current) {
402// Only comments on this line. Could be the first non-import line.
403FirstNonImportLine = Line;
404continue;
405}
406JsModuleReference Reference;
407Reference.FormattingOff = FormattingOff;
408Reference.Range.setBegin(Start);
409// References w/o a URL, e.g. export {A}, groups with RELATIVE.
410Reference.Category = JsModuleReference::ReferenceCategory::RELATIVE;
411if (!parseModuleReference(Keywords, Reference)) {
412if (!FirstNonImportLine)
413FirstNonImportLine = Line; // if no comment before.
414break;
415}
416FirstNonImportLine = nullptr;
417AnyImportAffected = AnyImportAffected || Line->Affected;
418Reference.Range.setEnd(LineEnd->Tok.getEndLoc());
419LLVM_DEBUG({
420llvm::dbgs() << "JsModuleReference: {"
421<< "formatting_off: " << Reference.FormattingOff
422<< ", is_export: " << Reference.IsExport
423<< ", cat: " << Reference.Category
424<< ", url: " << Reference.URL
425<< ", prefix: " << Reference.Prefix;
426for (const JsImportedSymbol &Symbol : Reference.Symbols)
427llvm::dbgs() << ", " << Symbol.Symbol << " as " << Symbol.Alias;
428llvm::dbgs() << ", text: " << getSourceText(Reference.Range);
429llvm::dbgs() << "}\n";
430});
431References.push_back(Reference);
432Start = SourceLocation();
433}
434// Sort imports if any import line was affected.
435if (!AnyImportAffected)
436References.clear();
437return std::make_pair(References, FirstNonImportLine);
438}
439
440// Parses a JavaScript/ECMAScript 6 module reference.
441// See http://www.ecma-international.org/ecma-262/6.0/#sec-scripts-and-modules
442// for grammar EBNF (production ModuleItem).
443bool parseModuleReference(const AdditionalKeywords &Keywords,
444JsModuleReference &Reference) {
445if (!Current || !Current->isOneOf(Keywords.kw_import, tok::kw_export))
446return false;
447Reference.IsExport = Current->is(tok::kw_export);
448
449nextToken();
450if (Current->isStringLiteral() && !Reference.IsExport) {
451// "import 'side-effect';"
452Reference.Category = JsModuleReference::ReferenceCategory::SIDE_EFFECT;
453Reference.URL =
454Current->TokenText.substr(1, Current->TokenText.size() - 2);
455return true;
456}
457
458if (!parseModuleBindings(Keywords, Reference))
459return false;
460
461if (Current->is(Keywords.kw_from)) {
462// imports have a 'from' clause, exports might not.
463nextToken();
464if (!Current->isStringLiteral())
465return false;
466// URL = TokenText without the quotes.
467Reference.URL =
468Current->TokenText.substr(1, Current->TokenText.size() - 2);
469if (Reference.URL.starts_with("..")) {
470Reference.Category =
471JsModuleReference::ReferenceCategory::RELATIVE_PARENT;
472} else if (Reference.URL.starts_with(".")) {
473Reference.Category = JsModuleReference::ReferenceCategory::RELATIVE;
474} else {
475Reference.Category = JsModuleReference::ReferenceCategory::ABSOLUTE;
476}
477}
478return true;
479}
480
481bool parseModuleBindings(const AdditionalKeywords &Keywords,
482JsModuleReference &Reference) {
483if (parseStarBinding(Keywords, Reference))
484return true;
485return parseNamedBindings(Keywords, Reference);
486}
487
488bool parseStarBinding(const AdditionalKeywords &Keywords,
489JsModuleReference &Reference) {
490// * as prefix from '...';
491if (Current->is(Keywords.kw_type) && Current->Next &&
492Current->Next->is(tok::star)) {
493Reference.IsTypeOnly = true;
494nextToken();
495}
496if (Current->isNot(tok::star))
497return false;
498nextToken();
499if (Current->isNot(Keywords.kw_as))
500return false;
501nextToken();
502if (Current->isNot(tok::identifier))
503return false;
504Reference.Prefix = Current->TokenText;
505nextToken();
506return true;
507}
508
509bool parseNamedBindings(const AdditionalKeywords &Keywords,
510JsModuleReference &Reference) {
511if (Current->is(Keywords.kw_type) && Current->Next &&
512Current->Next->isOneOf(tok::identifier, tok::l_brace)) {
513Reference.IsTypeOnly = true;
514nextToken();
515}
516
517// eat a potential "import X, " prefix.
518if (!Reference.IsExport && Current->is(tok::identifier)) {
519Reference.DefaultImport = Current->TokenText;
520nextToken();
521if (Current->is(Keywords.kw_from))
522return true;
523// import X = A.B.C;
524if (Current->is(tok::equal)) {
525Reference.Category = JsModuleReference::ReferenceCategory::ALIAS;
526nextToken();
527while (Current->is(tok::identifier)) {
528nextToken();
529if (Current->is(tok::semi))
530return true;
531if (Current->isNot(tok::period))
532return false;
533nextToken();
534}
535}
536if (Current->isNot(tok::comma))
537return false;
538nextToken(); // eat comma.
539}
540if (Current->isNot(tok::l_brace))
541return false;
542
543// {sym as alias, sym2 as ...} from '...';
544Reference.SymbolsStart = Current->Tok.getEndLoc();
545while (Current->isNot(tok::r_brace)) {
546nextToken();
547if (Current->is(tok::r_brace))
548break;
549auto IsIdentifier = [](const auto *Tok) {
550return Tok->isOneOf(tok::identifier, tok::kw_default, tok::kw_template);
551};
552bool isTypeOnly = Current->is(Keywords.kw_type) && Current->Next &&
553IsIdentifier(Current->Next);
554if (!isTypeOnly && !IsIdentifier(Current))
555return false;
556
557JsImportedSymbol Symbol;
558// Make sure to include any preceding comments.
559Symbol.Range.setBegin(
560Current->getPreviousNonComment()->Next->WhitespaceRange.getBegin());
561if (isTypeOnly)
562nextToken();
563Symbol.Symbol = Current->TokenText;
564nextToken();
565
566if (Current->is(Keywords.kw_as)) {
567nextToken();
568if (!IsIdentifier(Current))
569return false;
570Symbol.Alias = Current->TokenText;
571nextToken();
572}
573Symbol.Range.setEnd(Current->Tok.getLocation());
574Reference.Symbols.push_back(Symbol);
575
576if (!Current->isOneOf(tok::r_brace, tok::comma))
577return false;
578}
579Reference.SymbolsEnd = Current->Tok.getLocation();
580// For named imports with a trailing comma ("import {X,}"), consider the
581// comma to be the end of the import list, so that it doesn't get removed.
582if (Current->Previous->is(tok::comma))
583Reference.SymbolsEnd = Current->Previous->Tok.getLocation();
584nextToken(); // consume r_brace
585return true;
586}
587};
588
589tooling::Replacements sortJavaScriptImports(const FormatStyle &Style,
590StringRef Code,
591ArrayRef<tooling::Range> Ranges,
592StringRef FileName) {
593// FIXME: Cursor support.
594auto Env = Environment::make(Code, FileName, Ranges);
595if (!Env)
596return {};
597return JavaScriptImportSorter(*Env, Style).process().first;
598}
599
600} // end namespace format
601} // end namespace clang
602