llvm-project

Форк
0
408 строк · 16.8 Кб
1
//===--- Format.cpp -----------------------------------------*- C++-*------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
#include "Format.h"
9
#include "support/Logger.h"
10
#include "clang/Basic/SourceManager.h"
11
#include "clang/Format/Format.h"
12
#include "clang/Lex/Lexer.h"
13
#include "clang/Tooling/Core/Replacement.h"
14
#include "llvm/Support/Unicode.h"
15

16
namespace clang {
17
namespace clangd {
18
namespace {
19

20
/// Append closing brackets )]} to \p Code to make it well-formed.
21
/// Clang-format conservatively refuses to format files with unmatched brackets
22
/// as it isn't sure where the errors are and so can't correct.
23
/// When editing, it's reasonable to assume code before the cursor is complete.
24
void closeBrackets(std::string &Code, const format::FormatStyle &Style) {
25
  SourceManagerForFile FileSM("mock_file.cpp", Code);
26
  auto &SM = FileSM.get();
27
  FileID FID = SM.getMainFileID();
28
  LangOptions LangOpts = format::getFormattingLangOpts(Style);
29
  Lexer Lex(FID, SM.getBufferOrFake(FID), SM, LangOpts);
30
  Token Tok;
31
  std::vector<char> Brackets;
32
  while (!Lex.LexFromRawLexer(Tok)) {
33
    switch(Tok.getKind()) {
34
      case tok::l_paren:
35
        Brackets.push_back(')');
36
        break;
37
      case tok::l_brace:
38
        Brackets.push_back('}');
39
        break;
40
      case tok::l_square:
41
        Brackets.push_back(']');
42
        break;
43
      case tok::r_paren:
44
        if (!Brackets.empty() && Brackets.back() == ')')
45
          Brackets.pop_back();
46
        break;
47
      case tok::r_brace:
48
        if (!Brackets.empty() && Brackets.back() == '}')
49
          Brackets.pop_back();
50
        break;
51
      case tok::r_square:
52
        if (!Brackets.empty() && Brackets.back() == ']')
53
          Brackets.pop_back();
54
        break;
55
      default:
56
        continue;
57
    }
58
  }
59
  // Attempt to end any open comments first.
60
  Code.append("\n// */\n");
61
  Code.append(Brackets.rbegin(), Brackets.rend());
62
}
63

64
static StringRef commentMarker(llvm::StringRef Line) {
65
  for (StringRef Marker : {"///", "//"}){
66
    auto I = Line.rfind(Marker);
67
    if (I != StringRef::npos)
68
      return Line.substr(I, Marker.size());
69
  }
70
  return "";
71
}
72

73
llvm::StringRef firstLine(llvm::StringRef Code) {
74
  return Code.take_until([](char C) { return C == '\n'; });
75
}
76

77
llvm::StringRef lastLine(llvm::StringRef Code) {
78
  llvm::StringRef Rest = Code;
79
  while (!Rest.empty() && Rest.back() != '\n')
80
    Rest = Rest.drop_back();
81
  return Code.substr(Rest.size());
82
}
83

84
// Filename is needed for tooling::Replacement and some overloads of reformat().
85
// Its value should not affect the outcome. We use the default from reformat().
86
llvm::StringRef Filename = "<stdin>";
87

88
// tooling::Replacement from overlapping StringRefs: From must be part of Code.
89
tooling::Replacement replacement(llvm::StringRef Code, llvm::StringRef From,
90
                                 llvm::StringRef To) {
91
  assert(From.begin() >= Code.begin() && From.end() <= Code.end());
92
  // The filename is required but ignored.
93
  return tooling::Replacement(Filename, From.data() - Code.data(),
94
                              From.size(), To);
95
}
96

97
// High-level representation of incremental formatting changes.
98
// The changes are made in two steps.
99
// 1) a (possibly-empty) set of changes synthesized by clangd (e.g. adding
100
//    comment markers when splitting a line comment with a newline).
101
// 2) a selective clang-format run:
102
//    - the "source code" passed to clang format is the code up to the cursor,
103
//      a placeholder for the cursor, and some closing brackets
104
//    - the formatting is restricted to the cursor and (possibly) other ranges
105
//      (e.g. the old line when inserting a newline).
106
//    - changes before the cursor are applied, those after are discarded.
107
struct IncrementalChanges {
108
  // Changes that should be applied before running clang-format.
109
  tooling::Replacements Changes;
110
  // Ranges of the original source code that should be clang-formatted.
111
  // The CursorProxyText will also be formatted.
112
  std::vector<tooling::Range> FormatRanges;
113
  // The source code that should stand in for the cursor when clang-formatting.
114
  // e.g. after inserting a newline, a line-comment at the cursor is used to
115
  // ensure that the newline is preserved.
116
  std::string CursorPlaceholder;
117
};
118

119
// The two functions below, columnWidth() and columnWidthWithTabs(), were
120
// adapted from similar functions in clang/lib/Format/Encoding.h.
121
// FIXME: Move those functions to clang/include/clang/Format.h and reuse them?
122

123
// Helper function for columnWidthWithTabs().
124
inline unsigned columnWidth(StringRef Text) {
125
  int ContentWidth = llvm::sys::unicode::columnWidthUTF8(Text);
126
  if (ContentWidth < 0)
127
    return Text.size(); // fallback for unprintable characters
128
  return ContentWidth;
129
}
130

131
// Returns the number of columns required to display the \p Text on a terminal
132
// with the \p TabWidth.
133
inline unsigned columnWidthWithTabs(StringRef Text, unsigned TabWidth) {
134
  unsigned TotalWidth = 0;
135
  StringRef Tail = Text;
136
  for (;;) {
137
    StringRef::size_type TabPos = Tail.find('\t');
138
    if (TabPos == StringRef::npos)
139
      return TotalWidth + columnWidth(Tail);
140
    TotalWidth += columnWidth(Tail.substr(0, TabPos));
141
    if (TabWidth)
142
      TotalWidth += TabWidth - TotalWidth % TabWidth;
143
    Tail = Tail.substr(TabPos + 1);
144
  }
145
}
146

147
// After a newline:
148
//  - we continue any line-comment that was split
149
//  - we format the old line in addition to the cursor
150
//  - we represent the cursor with a line comment to preserve the newline
151
IncrementalChanges getIncrementalChangesAfterNewline(llvm::StringRef Code,
152
                                                     unsigned Cursor,
153
                                                     unsigned TabWidth) {
154
  IncrementalChanges Result;
155
  // Before newline, code looked like:
156
  //    leading^trailing
157
  // After newline, code looks like:
158
  //    leading
159
  //    indentation^trailing
160
  // Where indentation was added by the editor.
161
  StringRef Trailing = firstLine(Code.substr(Cursor));
162
  StringRef Indentation = lastLine(Code.take_front(Cursor));
163
  if (Indentation.data() == Code.data()) {
164
    vlog("Typed a newline, but we're still on the first line!");
165
    return Result;
166
  }
167
  StringRef Leading =
168
      lastLine(Code.take_front(Indentation.data() - Code.data() - 1));
169
  StringRef NextLine = firstLine(Code.substr(Cursor + Trailing.size() + 1));
170

171
  // Strip leading whitespace on trailing line.
172
  StringRef TrailingTrim = Trailing.ltrim();
173
  if (unsigned TrailWS = Trailing.size() - TrailingTrim.size())
174
    cantFail(Result.Changes.add(
175
        replacement(Code, StringRef(Trailing.begin(), TrailWS), "")));
176

177
  // If we split a comment, replace indentation with a comment marker.
178
  // If the editor made the new line a comment, also respect that.
179
  StringRef CommentMarker = commentMarker(Leading);
180
  bool NewLineIsComment = !commentMarker(Indentation).empty();
181
  if (!CommentMarker.empty() &&
182
      (NewLineIsComment || !commentMarker(NextLine).empty() ||
183
       (!TrailingTrim.empty() && !TrailingTrim.starts_with("//")))) {
184
    // We indent the new comment to match the previous one.
185
    StringRef PreComment =
186
        Leading.take_front(CommentMarker.data() - Leading.data());
187
    std::string IndentAndComment =
188
        (std::string(columnWidthWithTabs(PreComment, TabWidth), ' ') +
189
         CommentMarker + " ")
190
            .str();
191
    cantFail(
192
        Result.Changes.add(replacement(Code, Indentation, IndentAndComment)));
193
  } else {
194
    // Remove any indentation and let clang-format re-add it.
195
    // This prevents the cursor marker dragging e.g. an aligned comment with it.
196
    cantFail(Result.Changes.add(replacement(Code, Indentation, "")));
197
  }
198

199
  // If we put a the newline inside a {} pair, put } on its own line...
200
  if (CommentMarker.empty() && Leading.ends_with("{") &&
201
      Trailing.starts_with("}")) {
202
    cantFail(
203
        Result.Changes.add(replacement(Code, Trailing.take_front(1), "\n}")));
204
    // ...and format it.
205
    Result.FormatRanges.push_back(
206
        tooling::Range(Trailing.data() - Code.data() + 1, 1));
207
  }
208

209
  // Format the whole leading line.
210
  Result.FormatRanges.push_back(
211
      tooling::Range(Leading.data() - Code.data(), Leading.size()));
212

213
  // We use a comment to represent the cursor, to preserve the newline.
214
  // A trailing identifier improves parsing of e.g. for without braces.
215
  // Exception: if the previous line has a trailing comment, we can't use one
216
  // as the cursor (they will be aligned). But in this case we don't need to.
217
  Result.CursorPlaceholder = !CommentMarker.empty() ? "ident" : "//==\nident";
218

219
  return Result;
220
}
221

222
IncrementalChanges getIncrementalChanges(llvm::StringRef Code, unsigned Cursor,
223
                                         llvm::StringRef InsertedText,
224
                                         unsigned TabWidth) {
225
  IncrementalChanges Result;
226
  if (InsertedText == "\n")
227
    return getIncrementalChangesAfterNewline(Code, Cursor, TabWidth);
228

229
  Result.CursorPlaceholder = " /**/";
230
  return Result;
231
}
232

233
// Returns equivalent replacements that preserve the correspondence between
234
// OldCursor and NewCursor. If OldCursor lies in a replaced region, that
235
// replacement will be split.
236
std::vector<tooling::Replacement>
237
split(const tooling::Replacements &Replacements, unsigned OldCursor,
238
      unsigned NewCursor) {
239
  std::vector<tooling::Replacement> Result;
240
  int LengthChange = 0;
241
  for (const tooling::Replacement &R : Replacements) {
242
    if (R.getOffset() + R.getLength() <= OldCursor) { // before cursor
243
      Result.push_back(R);
244
      LengthChange += R.getReplacementText().size() - R.getLength();
245
    } else if (R.getOffset() < OldCursor) { // overlaps cursor
246
      int ReplacementSplit = NewCursor - LengthChange - R.getOffset();
247
      assert(ReplacementSplit >= 0 &&
248
             ReplacementSplit <= int(R.getReplacementText().size()) &&
249
             "NewCursor incompatible with OldCursor!");
250
      Result.push_back(tooling::Replacement(
251
          R.getFilePath(), R.getOffset(), OldCursor - R.getOffset(),
252
          R.getReplacementText().take_front(ReplacementSplit)));
253
      Result.push_back(tooling::Replacement(
254
          R.getFilePath(), OldCursor,
255
          R.getLength() - (OldCursor - R.getOffset()),
256
          R.getReplacementText().drop_front(ReplacementSplit)));
257
    } else if (R.getOffset() >= OldCursor) { // after cursor
258
      Result.push_back(R);
259
    }
260
  }
261
  return Result;
262
}
263

264
} // namespace
265

266
// We're simulating the following sequence of changes:
267
//   - apply the pre-formatting edits (see getIncrementalChanges)
268
//   - insert a placeholder for the cursor
269
//   - format some of the resulting code
270
//   - remove the cursor placeholder again
271
// The replacements we return are produced by composing these.
272
//
273
// The text we actually pass to clang-format is slightly different from this,
274
// e.g. we have to close brackets. We ensure these differences are *after*
275
// all the regions we want to format, and discard changes in them.
276
std::vector<tooling::Replacement>
277
formatIncremental(llvm::StringRef OriginalCode, unsigned OriginalCursor,
278
                  llvm::StringRef InsertedText, format::FormatStyle Style) {
279
  IncrementalChanges Incremental = getIncrementalChanges(
280
      OriginalCode, OriginalCursor, InsertedText, Style.TabWidth);
281
  // Never *remove* lines in response to pressing enter! This annoys users.
282
  if (InsertedText == "\n") {
283
    Style.MaxEmptyLinesToKeep = 1000;
284
    Style.KeepEmptyLines.AtStartOfBlock = true;
285
  }
286

287
  // Compute the code we want to format:
288
  // 1) Start with code after the pre-formatting edits.
289
  std::string CodeToFormat = cantFail(
290
      tooling::applyAllReplacements(OriginalCode, Incremental.Changes));
291
  unsigned Cursor = Incremental.Changes.getShiftedCodePosition(OriginalCursor);
292
  // 2) Truncate code after the last interesting range.
293
  unsigned FormatLimit = Cursor;
294
  for (tooling::Range &R : Incremental.FormatRanges)
295
    FormatLimit = std::max(FormatLimit, R.getOffset() + R.getLength());
296
  CodeToFormat.resize(FormatLimit);
297
  // 3) Insert a placeholder for the cursor.
298
  CodeToFormat.insert(Cursor, Incremental.CursorPlaceholder);
299
  // 4) Append brackets after FormatLimit so the code is well-formed.
300
  closeBrackets(CodeToFormat, Style);
301

302
  // Determine the ranges to format:
303
  std::vector<tooling::Range> RangesToFormat = Incremental.FormatRanges;
304
  // Ranges after the cursor need to be adjusted for the placeholder.
305
  for (auto &R : RangesToFormat) {
306
    if (R.getOffset() > Cursor)
307
      R = tooling::Range(R.getOffset() + Incremental.CursorPlaceholder.size(),
308
                         R.getLength());
309
  }
310
  // We also format the cursor.
311
  RangesToFormat.push_back(
312
      tooling::Range(Cursor, Incremental.CursorPlaceholder.size()));
313
  // Also update FormatLimit for the placeholder, we'll use this later.
314
  FormatLimit += Incremental.CursorPlaceholder.size();
315

316
  // Run clang-format, and truncate changes at FormatLimit.
317
  tooling::Replacements FormattingChanges;
318
  format::FormattingAttemptStatus Status;
319
  for (const tooling::Replacement &R : format::reformat(
320
           Style, CodeToFormat, RangesToFormat, Filename, &Status)) {
321
    if (R.getOffset() + R.getLength() <= FormatLimit) // Before limit.
322
      cantFail(FormattingChanges.add(R));
323
    else if(R.getOffset() < FormatLimit) { // Overlaps limit.
324
      if (R.getReplacementText().empty()) // Deletions are easy to handle.
325
        cantFail(FormattingChanges.add(tooling::Replacement(Filename,
326
            R.getOffset(), FormatLimit - R.getOffset(), "")));
327
      else
328
        // Hopefully won't happen in practice?
329
        elog("Incremental clang-format edit overlapping cursor @ {0}!\n{1}",
330
             Cursor, CodeToFormat);
331
    }
332
  }
333
  if (!Status.FormatComplete)
334
    vlog("Incremental format incomplete at line {0}", Status.Line);
335

336
  // Now we are ready to compose the changes relative to OriginalCode.
337
  //   edits -> insert placeholder -> format -> remove placeholder.
338
  // We must express insert/remove as Replacements.
339
  tooling::Replacements InsertCursorPlaceholder(
340
      tooling::Replacement(Filename, Cursor, 0, Incremental.CursorPlaceholder));
341
  unsigned FormattedCursorStart =
342
               FormattingChanges.getShiftedCodePosition(Cursor),
343
           FormattedCursorEnd = FormattingChanges.getShiftedCodePosition(
344
               Cursor + Incremental.CursorPlaceholder.size());
345
  tooling::Replacements RemoveCursorPlaceholder(
346
      tooling::Replacement(Filename, FormattedCursorStart,
347
                           FormattedCursorEnd - FormattedCursorStart, ""));
348

349
  // We can't simply merge() and return: tooling::Replacements will combine
350
  // adjacent edits left and right of the cursor. This gives the right source
351
  // code, but loses information about where the cursor is!
352
  // Fortunately, none of the individual passes lose information, so:
353
  //  - we use merge() to compute the final Replacements
354
  //  - we chain getShiftedCodePosition() to compute final cursor position
355
  //  - we split the final Replacements at the cursor position, so that
356
  //    each Replacement lies either before or after the cursor.
357
  tooling::Replacements Final;
358
  unsigned FinalCursor = OriginalCursor;
359
#ifndef NDEBUG
360
  std::string FinalCode = std::string(OriginalCode);
361
  dlog("Initial code: {0}", FinalCode);
362
#endif
363
  for (auto Pass :
364
       std::vector<std::pair<const char *, const tooling::Replacements *>>{
365
           {"Pre-formatting changes", &Incremental.Changes},
366
           {"Insert placeholder", &InsertCursorPlaceholder},
367
           {"clang-format", &FormattingChanges},
368
           {"Remove placeholder", &RemoveCursorPlaceholder}}) {
369
    Final = Final.merge(*Pass.second);
370
    FinalCursor = Pass.second->getShiftedCodePosition(FinalCursor);
371
#ifndef NDEBUG
372
    FinalCode =
373
        cantFail(tooling::applyAllReplacements(FinalCode, *Pass.second));
374
    dlog("After {0}:\n{1}^{2}", Pass.first,
375
         StringRef(FinalCode).take_front(FinalCursor),
376
         StringRef(FinalCode).drop_front(FinalCursor));
377
#endif
378
  }
379
  return split(Final, OriginalCursor, FinalCursor);
380
}
381

382
unsigned
383
transformCursorPosition(unsigned Offset,
384
                        const std::vector<tooling::Replacement> &Replacements) {
385
  unsigned OriginalOffset = Offset;
386
  for (const auto &R : Replacements) {
387
    if (R.getOffset() + R.getLength() <= OriginalOffset) {
388
      // Replacement is before cursor.
389
      Offset += R.getReplacementText().size();
390
      Offset -= R.getLength();
391
    } else if (R.getOffset() < OriginalOffset) {
392
      // Replacement overlaps cursor.
393
      // Preserve position within replacement text, as far as possible.
394
      unsigned PositionWithinReplacement = Offset - R.getOffset();
395
      if (PositionWithinReplacement > R.getReplacementText().size()) {
396
        Offset += R.getReplacementText().size();
397
        Offset -= PositionWithinReplacement;
398
      }
399
    } else {
400
      // Replacement after cursor.
401
      break; // Replacements are sorted, the rest are also after the cursor.
402
    }
403
  }
404
  return Offset;
405
}
406

407
} // namespace clangd
408
} // namespace clang
409

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.