llvm-project
408 строк · 16.8 Кб
1//===--- Format.cpp -----------------------------------------*- C++-*------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8#include "Format.h"9#include "support/Logger.h"10#include "clang/Basic/SourceManager.h"11#include "clang/Format/Format.h"12#include "clang/Lex/Lexer.h"13#include "clang/Tooling/Core/Replacement.h"14#include "llvm/Support/Unicode.h"15
16namespace clang {17namespace clangd {18namespace {19
20/// Append closing brackets )]} to \p Code to make it well-formed.
21/// Clang-format conservatively refuses to format files with unmatched brackets
22/// as it isn't sure where the errors are and so can't correct.
23/// When editing, it's reasonable to assume code before the cursor is complete.
24void closeBrackets(std::string &Code, const format::FormatStyle &Style) {25SourceManagerForFile FileSM("mock_file.cpp", Code);26auto &SM = FileSM.get();27FileID FID = SM.getMainFileID();28LangOptions LangOpts = format::getFormattingLangOpts(Style);29Lexer Lex(FID, SM.getBufferOrFake(FID), SM, LangOpts);30Token Tok;31std::vector<char> Brackets;32while (!Lex.LexFromRawLexer(Tok)) {33switch(Tok.getKind()) {34case tok::l_paren:35Brackets.push_back(')');36break;37case tok::l_brace:38Brackets.push_back('}');39break;40case tok::l_square:41Brackets.push_back(']');42break;43case tok::r_paren:44if (!Brackets.empty() && Brackets.back() == ')')45Brackets.pop_back();46break;47case tok::r_brace:48if (!Brackets.empty() && Brackets.back() == '}')49Brackets.pop_back();50break;51case tok::r_square:52if (!Brackets.empty() && Brackets.back() == ']')53Brackets.pop_back();54break;55default:56continue;57}58}59// Attempt to end any open comments first.60Code.append("\n// */\n");61Code.append(Brackets.rbegin(), Brackets.rend());62}
63
64static StringRef commentMarker(llvm::StringRef Line) {65for (StringRef Marker : {"///", "//"}){66auto I = Line.rfind(Marker);67if (I != StringRef::npos)68return Line.substr(I, Marker.size());69}70return "";71}
72
73llvm::StringRef firstLine(llvm::StringRef Code) {74return Code.take_until([](char C) { return C == '\n'; });75}
76
77llvm::StringRef lastLine(llvm::StringRef Code) {78llvm::StringRef Rest = Code;79while (!Rest.empty() && Rest.back() != '\n')80Rest = Rest.drop_back();81return Code.substr(Rest.size());82}
83
84// Filename is needed for tooling::Replacement and some overloads of reformat().
85// Its value should not affect the outcome. We use the default from reformat().
86llvm::StringRef Filename = "<stdin>";87
88// tooling::Replacement from overlapping StringRefs: From must be part of Code.
89tooling::Replacement replacement(llvm::StringRef Code, llvm::StringRef From,90llvm::StringRef To) {91assert(From.begin() >= Code.begin() && From.end() <= Code.end());92// The filename is required but ignored.93return tooling::Replacement(Filename, From.data() - Code.data(),94From.size(), To);95}
96
97// High-level representation of incremental formatting changes.
98// The changes are made in two steps.
99// 1) a (possibly-empty) set of changes synthesized by clangd (e.g. adding
100// comment markers when splitting a line comment with a newline).
101// 2) a selective clang-format run:
102// - the "source code" passed to clang format is the code up to the cursor,
103// a placeholder for the cursor, and some closing brackets
104// - the formatting is restricted to the cursor and (possibly) other ranges
105// (e.g. the old line when inserting a newline).
106// - changes before the cursor are applied, those after are discarded.
107struct IncrementalChanges {108// Changes that should be applied before running clang-format.109tooling::Replacements Changes;110// Ranges of the original source code that should be clang-formatted.111// The CursorProxyText will also be formatted.112std::vector<tooling::Range> FormatRanges;113// The source code that should stand in for the cursor when clang-formatting.114// e.g. after inserting a newline, a line-comment at the cursor is used to115// ensure that the newline is preserved.116std::string CursorPlaceholder;117};118
119// The two functions below, columnWidth() and columnWidthWithTabs(), were
120// adapted from similar functions in clang/lib/Format/Encoding.h.
121// FIXME: Move those functions to clang/include/clang/Format.h and reuse them?
122
123// Helper function for columnWidthWithTabs().
124inline unsigned columnWidth(StringRef Text) {125int ContentWidth = llvm::sys::unicode::columnWidthUTF8(Text);126if (ContentWidth < 0)127return Text.size(); // fallback for unprintable characters128return ContentWidth;129}
130
131// Returns the number of columns required to display the \p Text on a terminal
132// with the \p TabWidth.
133inline unsigned columnWidthWithTabs(StringRef Text, unsigned TabWidth) {134unsigned TotalWidth = 0;135StringRef Tail = Text;136for (;;) {137StringRef::size_type TabPos = Tail.find('\t');138if (TabPos == StringRef::npos)139return TotalWidth + columnWidth(Tail);140TotalWidth += columnWidth(Tail.substr(0, TabPos));141if (TabWidth)142TotalWidth += TabWidth - TotalWidth % TabWidth;143Tail = Tail.substr(TabPos + 1);144}145}
146
147// After a newline:
148// - we continue any line-comment that was split
149// - we format the old line in addition to the cursor
150// - we represent the cursor with a line comment to preserve the newline
151IncrementalChanges getIncrementalChangesAfterNewline(llvm::StringRef Code,152unsigned Cursor,153unsigned TabWidth) {154IncrementalChanges Result;155// Before newline, code looked like:156// leading^trailing157// After newline, code looks like:158// leading159// indentation^trailing160// Where indentation was added by the editor.161StringRef Trailing = firstLine(Code.substr(Cursor));162StringRef Indentation = lastLine(Code.take_front(Cursor));163if (Indentation.data() == Code.data()) {164vlog("Typed a newline, but we're still on the first line!");165return Result;166}167StringRef Leading =168lastLine(Code.take_front(Indentation.data() - Code.data() - 1));169StringRef NextLine = firstLine(Code.substr(Cursor + Trailing.size() + 1));170
171// Strip leading whitespace on trailing line.172StringRef TrailingTrim = Trailing.ltrim();173if (unsigned TrailWS = Trailing.size() - TrailingTrim.size())174cantFail(Result.Changes.add(175replacement(Code, StringRef(Trailing.begin(), TrailWS), "")));176
177// If we split a comment, replace indentation with a comment marker.178// If the editor made the new line a comment, also respect that.179StringRef CommentMarker = commentMarker(Leading);180bool NewLineIsComment = !commentMarker(Indentation).empty();181if (!CommentMarker.empty() &&182(NewLineIsComment || !commentMarker(NextLine).empty() ||183(!TrailingTrim.empty() && !TrailingTrim.starts_with("//")))) {184// We indent the new comment to match the previous one.185StringRef PreComment =186Leading.take_front(CommentMarker.data() - Leading.data());187std::string IndentAndComment =188(std::string(columnWidthWithTabs(PreComment, TabWidth), ' ') +189CommentMarker + " ")190.str();191cantFail(192Result.Changes.add(replacement(Code, Indentation, IndentAndComment)));193} else {194// Remove any indentation and let clang-format re-add it.195// This prevents the cursor marker dragging e.g. an aligned comment with it.196cantFail(Result.Changes.add(replacement(Code, Indentation, "")));197}198
199// If we put a the newline inside a {} pair, put } on its own line...200if (CommentMarker.empty() && Leading.ends_with("{") &&201Trailing.starts_with("}")) {202cantFail(203Result.Changes.add(replacement(Code, Trailing.take_front(1), "\n}")));204// ...and format it.205Result.FormatRanges.push_back(206tooling::Range(Trailing.data() - Code.data() + 1, 1));207}208
209// Format the whole leading line.210Result.FormatRanges.push_back(211tooling::Range(Leading.data() - Code.data(), Leading.size()));212
213// We use a comment to represent the cursor, to preserve the newline.214// A trailing identifier improves parsing of e.g. for without braces.215// Exception: if the previous line has a trailing comment, we can't use one216// as the cursor (they will be aligned). But in this case we don't need to.217Result.CursorPlaceholder = !CommentMarker.empty() ? "ident" : "//==\nident";218
219return Result;220}
221
222IncrementalChanges getIncrementalChanges(llvm::StringRef Code, unsigned Cursor,223llvm::StringRef InsertedText,224unsigned TabWidth) {225IncrementalChanges Result;226if (InsertedText == "\n")227return getIncrementalChangesAfterNewline(Code, Cursor, TabWidth);228
229Result.CursorPlaceholder = " /**/";230return Result;231}
232
233// Returns equivalent replacements that preserve the correspondence between
234// OldCursor and NewCursor. If OldCursor lies in a replaced region, that
235// replacement will be split.
236std::vector<tooling::Replacement>237split(const tooling::Replacements &Replacements, unsigned OldCursor,238unsigned NewCursor) {239std::vector<tooling::Replacement> Result;240int LengthChange = 0;241for (const tooling::Replacement &R : Replacements) {242if (R.getOffset() + R.getLength() <= OldCursor) { // before cursor243Result.push_back(R);244LengthChange += R.getReplacementText().size() - R.getLength();245} else if (R.getOffset() < OldCursor) { // overlaps cursor246int ReplacementSplit = NewCursor - LengthChange - R.getOffset();247assert(ReplacementSplit >= 0 &&248ReplacementSplit <= int(R.getReplacementText().size()) &&249"NewCursor incompatible with OldCursor!");250Result.push_back(tooling::Replacement(251R.getFilePath(), R.getOffset(), OldCursor - R.getOffset(),252R.getReplacementText().take_front(ReplacementSplit)));253Result.push_back(tooling::Replacement(254R.getFilePath(), OldCursor,255R.getLength() - (OldCursor - R.getOffset()),256R.getReplacementText().drop_front(ReplacementSplit)));257} else if (R.getOffset() >= OldCursor) { // after cursor258Result.push_back(R);259}260}261return Result;262}
263
264} // namespace265
266// We're simulating the following sequence of changes:
267// - apply the pre-formatting edits (see getIncrementalChanges)
268// - insert a placeholder for the cursor
269// - format some of the resulting code
270// - remove the cursor placeholder again
271// The replacements we return are produced by composing these.
272//
273// The text we actually pass to clang-format is slightly different from this,
274// e.g. we have to close brackets. We ensure these differences are *after*
275// all the regions we want to format, and discard changes in them.
276std::vector<tooling::Replacement>277formatIncremental(llvm::StringRef OriginalCode, unsigned OriginalCursor,278llvm::StringRef InsertedText, format::FormatStyle Style) {279IncrementalChanges Incremental = getIncrementalChanges(280OriginalCode, OriginalCursor, InsertedText, Style.TabWidth);281// Never *remove* lines in response to pressing enter! This annoys users.282if (InsertedText == "\n") {283Style.MaxEmptyLinesToKeep = 1000;284Style.KeepEmptyLines.AtStartOfBlock = true;285}286
287// Compute the code we want to format:288// 1) Start with code after the pre-formatting edits.289std::string CodeToFormat = cantFail(290tooling::applyAllReplacements(OriginalCode, Incremental.Changes));291unsigned Cursor = Incremental.Changes.getShiftedCodePosition(OriginalCursor);292// 2) Truncate code after the last interesting range.293unsigned FormatLimit = Cursor;294for (tooling::Range &R : Incremental.FormatRanges)295FormatLimit = std::max(FormatLimit, R.getOffset() + R.getLength());296CodeToFormat.resize(FormatLimit);297// 3) Insert a placeholder for the cursor.298CodeToFormat.insert(Cursor, Incremental.CursorPlaceholder);299// 4) Append brackets after FormatLimit so the code is well-formed.300closeBrackets(CodeToFormat, Style);301
302// Determine the ranges to format:303std::vector<tooling::Range> RangesToFormat = Incremental.FormatRanges;304// Ranges after the cursor need to be adjusted for the placeholder.305for (auto &R : RangesToFormat) {306if (R.getOffset() > Cursor)307R = tooling::Range(R.getOffset() + Incremental.CursorPlaceholder.size(),308R.getLength());309}310// We also format the cursor.311RangesToFormat.push_back(312tooling::Range(Cursor, Incremental.CursorPlaceholder.size()));313// Also update FormatLimit for the placeholder, we'll use this later.314FormatLimit += Incremental.CursorPlaceholder.size();315
316// Run clang-format, and truncate changes at FormatLimit.317tooling::Replacements FormattingChanges;318format::FormattingAttemptStatus Status;319for (const tooling::Replacement &R : format::reformat(320Style, CodeToFormat, RangesToFormat, Filename, &Status)) {321if (R.getOffset() + R.getLength() <= FormatLimit) // Before limit.322cantFail(FormattingChanges.add(R));323else if(R.getOffset() < FormatLimit) { // Overlaps limit.324if (R.getReplacementText().empty()) // Deletions are easy to handle.325cantFail(FormattingChanges.add(tooling::Replacement(Filename,326R.getOffset(), FormatLimit - R.getOffset(), "")));327else328// Hopefully won't happen in practice?329elog("Incremental clang-format edit overlapping cursor @ {0}!\n{1}",330Cursor, CodeToFormat);331}332}333if (!Status.FormatComplete)334vlog("Incremental format incomplete at line {0}", Status.Line);335
336// Now we are ready to compose the changes relative to OriginalCode.337// edits -> insert placeholder -> format -> remove placeholder.338// We must express insert/remove as Replacements.339tooling::Replacements InsertCursorPlaceholder(340tooling::Replacement(Filename, Cursor, 0, Incremental.CursorPlaceholder));341unsigned FormattedCursorStart =342FormattingChanges.getShiftedCodePosition(Cursor),343FormattedCursorEnd = FormattingChanges.getShiftedCodePosition(344Cursor + Incremental.CursorPlaceholder.size());345tooling::Replacements RemoveCursorPlaceholder(346tooling::Replacement(Filename, FormattedCursorStart,347FormattedCursorEnd - FormattedCursorStart, ""));348
349// We can't simply merge() and return: tooling::Replacements will combine350// adjacent edits left and right of the cursor. This gives the right source351// code, but loses information about where the cursor is!352// Fortunately, none of the individual passes lose information, so:353// - we use merge() to compute the final Replacements354// - we chain getShiftedCodePosition() to compute final cursor position355// - we split the final Replacements at the cursor position, so that356// each Replacement lies either before or after the cursor.357tooling::Replacements Final;358unsigned FinalCursor = OriginalCursor;359#ifndef NDEBUG360std::string FinalCode = std::string(OriginalCode);361dlog("Initial code: {0}", FinalCode);362#endif363for (auto Pass :364std::vector<std::pair<const char *, const tooling::Replacements *>>{365{"Pre-formatting changes", &Incremental.Changes},366{"Insert placeholder", &InsertCursorPlaceholder},367{"clang-format", &FormattingChanges},368{"Remove placeholder", &RemoveCursorPlaceholder}}) {369Final = Final.merge(*Pass.second);370FinalCursor = Pass.second->getShiftedCodePosition(FinalCursor);371#ifndef NDEBUG372FinalCode =373cantFail(tooling::applyAllReplacements(FinalCode, *Pass.second));374dlog("After {0}:\n{1}^{2}", Pass.first,375StringRef(FinalCode).take_front(FinalCursor),376StringRef(FinalCode).drop_front(FinalCursor));377#endif378}379return split(Final, OriginalCursor, FinalCursor);380}
381
382unsigned
383transformCursorPosition(unsigned Offset,384const std::vector<tooling::Replacement> &Replacements) {385unsigned OriginalOffset = Offset;386for (const auto &R : Replacements) {387if (R.getOffset() + R.getLength() <= OriginalOffset) {388// Replacement is before cursor.389Offset += R.getReplacementText().size();390Offset -= R.getLength();391} else if (R.getOffset() < OriginalOffset) {392// Replacement overlaps cursor.393// Preserve position within replacement text, as far as possible.394unsigned PositionWithinReplacement = Offset - R.getOffset();395if (PositionWithinReplacement > R.getReplacementText().size()) {396Offset += R.getReplacementText().size();397Offset -= PositionWithinReplacement;398}399} else {400// Replacement after cursor.401break; // Replacements are sorted, the rest are also after the cursor.402}403}404return Offset;405}
406
407} // namespace clangd408} // namespace clang409