llvm-project
326 строк · 11.8 Кб
1//===--- FormatToken.cpp - Format C++ code --------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements specific functions of \c FormatTokens and their
11/// roles.
12///
13//===----------------------------------------------------------------------===//
14
15#include "FormatToken.h"16#include "ContinuationIndenter.h"17#include "llvm/ADT/SmallVector.h"18#include "llvm/Support/Debug.h"19#include <climits>20
21namespace clang {22namespace format {23
24const char *getTokenTypeName(TokenType Type) {25static const char *const TokNames[] = {26#define TYPE(X) #X,27LIST_TOKEN_TYPES
28#undef TYPE29nullptr};30
31if (Type < NUM_TOKEN_TYPES)32return TokNames[Type];33llvm_unreachable("unknown TokenType");34return nullptr;35}
36
37// Sorted common C++ non-keyword types.
38static SmallVector<StringRef> CppNonKeywordTypes = {39"clock_t", "int16_t", "int32_t", "int64_t", "int8_t",40"intptr_t", "ptrdiff_t", "size_t", "time_t", "uint16_t",41"uint32_t", "uint64_t", "uint8_t", "uintptr_t",42};43
44bool FormatToken::isTypeName(const LangOptions &LangOpts) const {45const bool IsCpp = LangOpts.CXXOperatorNames;46return is(TT_TypeName) || Tok.isSimpleTypeSpecifier(LangOpts) ||47(IsCpp && is(tok::identifier) &&48std::binary_search(CppNonKeywordTypes.begin(),49CppNonKeywordTypes.end(), TokenText));50}
51
52bool FormatToken::isTypeOrIdentifier(const LangOptions &LangOpts) const {53return isTypeName(LangOpts) || isOneOf(tok::kw_auto, tok::identifier);54}
55
56bool FormatToken::isBlockIndentedInitRBrace(const FormatStyle &Style) const {57assert(is(tok::r_brace));58if (!Style.Cpp11BracedListStyle ||59Style.AlignAfterOpenBracket != FormatStyle::BAS_BlockIndent) {60return false;61}62const auto *LBrace = MatchingParen;63assert(LBrace && LBrace->is(tok::l_brace));64if (LBrace->is(BK_BracedInit))65return true;66if (LBrace->Previous && LBrace->Previous->is(tok::equal))67return true;68return false;69}
70
71bool FormatToken::opensBlockOrBlockTypeList(const FormatStyle &Style) const {72// C# Does not indent object initialisers as continuations.73if (is(tok::l_brace) && getBlockKind() == BK_BracedInit && Style.isCSharp())74return true;75if (is(TT_TemplateString) && opensScope())76return true;77return is(TT_ArrayInitializerLSquare) || is(TT_ProtoExtensionLSquare) ||78(is(tok::l_brace) &&79(getBlockKind() == BK_Block || is(TT_DictLiteral) ||80(!Style.Cpp11BracedListStyle && NestingLevel == 0))) ||81(is(tok::less) && Style.isProto());82}
83
84TokenRole::~TokenRole() {}85
86void TokenRole::precomputeFormattingInfos(const FormatToken *Token) {}87
88unsigned CommaSeparatedList::formatAfterToken(LineState &State,89ContinuationIndenter *Indenter,90bool DryRun) {91if (!State.NextToken || !State.NextToken->Previous)92return 0;93
94if (Formats.size() <= 1)95return 0; // Handled by formatFromToken (1) or avoid severe penalty (0).96
97// Ensure that we start on the opening brace.98const FormatToken *LBrace =99State.NextToken->Previous->getPreviousNonComment();100if (!LBrace || !LBrace->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||101LBrace->is(BK_Block) || LBrace->is(TT_DictLiteral) ||102LBrace->Next->is(TT_DesignatedInitializerPeriod)) {103return 0;104}105
106// Calculate the number of code points we have to format this list. As the107// first token is already placed, we have to subtract it.108unsigned RemainingCodePoints =109Style.ColumnLimit - State.Column + State.NextToken->Previous->ColumnWidth;110
111// Find the best ColumnFormat, i.e. the best number of columns to use.112const ColumnFormat *Format = getColumnFormat(RemainingCodePoints);113
114// If no ColumnFormat can be used, the braced list would generally be115// bin-packed. Add a severe penalty to this so that column layouts are116// preferred if possible.117if (!Format)118return 10'000;119
120// Format the entire list.121unsigned Penalty = 0;122unsigned Column = 0;123unsigned Item = 0;124while (State.NextToken != LBrace->MatchingParen) {125bool NewLine = false;126unsigned ExtraSpaces = 0;127
128// If the previous token was one of our commas, we are now on the next item.129if (Item < Commas.size() && State.NextToken->Previous == Commas[Item]) {130if (!State.NextToken->isTrailingComment()) {131ExtraSpaces += Format->ColumnSizes[Column] - ItemLengths[Item];132++Column;133}134++Item;135}136
137if (Column == Format->Columns || State.NextToken->MustBreakBefore) {138Column = 0;139NewLine = true;140}141
142// Place token using the continuation indenter and store the penalty.143Penalty += Indenter->addTokenToState(State, NewLine, DryRun, ExtraSpaces);144}145return Penalty;146}
147
148unsigned CommaSeparatedList::formatFromToken(LineState &State,149ContinuationIndenter *Indenter,150bool DryRun) {151// Formatting with 1 Column isn't really a column layout, so we don't need the152// special logic here. We can just avoid bin packing any of the parameters.153if (Formats.size() == 1 || HasNestedBracedList)154State.Stack.back().AvoidBinPacking = true;155return 0;156}
157
158// Returns the lengths in code points between Begin and End (both included),
159// assuming that the entire sequence is put on a single line.
160static unsigned CodePointsBetween(const FormatToken *Begin,161const FormatToken *End) {162assert(End->TotalLength >= Begin->TotalLength);163return End->TotalLength - Begin->TotalLength + Begin->ColumnWidth;164}
165
166void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) {167// FIXME: At some point we might want to do this for other lists, too.168if (!Token->MatchingParen ||169!Token->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare)) {170return;171}172
173// In C++11 braced list style, we should not format in columns unless they174// have many items (20 or more) or we allow bin-packing of function call175// arguments.176if (Style.Cpp11BracedListStyle && !Style.BinPackArguments &&177Commas.size() < 19) {178return;179}180
181// Limit column layout for JavaScript array initializers to 20 or more items182// for now to introduce it carefully. We can become more aggressive if this183// necessary.184if (Token->is(TT_ArrayInitializerLSquare) && Commas.size() < 19)185return;186
187// Column format doesn't really make sense if we don't align after brackets.188if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign)189return;190
191FormatToken *ItemBegin = Token->Next;192while (ItemBegin->isTrailingComment())193ItemBegin = ItemBegin->Next;194SmallVector<bool, 8> MustBreakBeforeItem;195
196// The lengths of an item if it is put at the end of the line. This includes197// trailing comments which are otherwise ignored for column alignment.198SmallVector<unsigned, 8> EndOfLineItemLength;199MustBreakBeforeItem.reserve(Commas.size() + 1);200EndOfLineItemLength.reserve(Commas.size() + 1);201ItemLengths.reserve(Commas.size() + 1);202
203bool HasSeparatingComment = false;204for (unsigned i = 0, e = Commas.size() + 1; i != e; ++i) {205assert(ItemBegin);206// Skip comments on their own line.207while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment()) {208ItemBegin = ItemBegin->Next;209HasSeparatingComment = i > 0;210}211
212MustBreakBeforeItem.push_back(ItemBegin->MustBreakBefore);213if (ItemBegin->is(tok::l_brace))214HasNestedBracedList = true;215const FormatToken *ItemEnd = nullptr;216if (i == Commas.size()) {217ItemEnd = Token->MatchingParen;218const FormatToken *NonCommentEnd = ItemEnd->getPreviousNonComment();219ItemLengths.push_back(CodePointsBetween(ItemBegin, NonCommentEnd));220if (Style.Cpp11BracedListStyle &&221!ItemEnd->Previous->isTrailingComment()) {222// In Cpp11 braced list style, the } and possibly other subsequent223// tokens will need to stay on a line with the last element.224while (ItemEnd->Next && !ItemEnd->Next->CanBreakBefore)225ItemEnd = ItemEnd->Next;226} else {227// In other braced lists styles, the "}" can be wrapped to the new line.228ItemEnd = Token->MatchingParen->Previous;229}230} else {231ItemEnd = Commas[i];232// The comma is counted as part of the item when calculating the length.233ItemLengths.push_back(CodePointsBetween(ItemBegin, ItemEnd));234
235// Consume trailing comments so the are included in EndOfLineItemLength.236if (ItemEnd->Next && !ItemEnd->Next->HasUnescapedNewline &&237ItemEnd->Next->isTrailingComment()) {238ItemEnd = ItemEnd->Next;239}240}241EndOfLineItemLength.push_back(CodePointsBetween(ItemBegin, ItemEnd));242// If there is a trailing comma in the list, the next item will start at the243// closing brace. Don't create an extra item for this.244if (ItemEnd->getNextNonComment() == Token->MatchingParen)245break;246ItemBegin = ItemEnd->Next;247}248
249// Don't use column layout for lists with few elements and in presence of250// separating comments.251if (Commas.size() < 5 || HasSeparatingComment)252return;253
254if (Token->NestingLevel != 0 && Token->is(tok::l_brace) && Commas.size() < 19)255return;256
257// We can never place more than ColumnLimit / 3 items in a row (because of the258// spaces and the comma).259unsigned MaxItems = Style.ColumnLimit / 3;260SmallVector<unsigned> MinSizeInColumn;261MinSizeInColumn.reserve(MaxItems);262for (unsigned Columns = 1; Columns <= MaxItems; ++Columns) {263ColumnFormat Format;264Format.Columns = Columns;265Format.ColumnSizes.resize(Columns);266MinSizeInColumn.assign(Columns, UINT_MAX);267Format.LineCount = 1;268bool HasRowWithSufficientColumns = false;269unsigned Column = 0;270for (unsigned i = 0, e = ItemLengths.size(); i != e; ++i) {271assert(i < MustBreakBeforeItem.size());272if (MustBreakBeforeItem[i] || Column == Columns) {273++Format.LineCount;274Column = 0;275}276if (Column == Columns - 1)277HasRowWithSufficientColumns = true;278unsigned Length =279(Column == Columns - 1) ? EndOfLineItemLength[i] : ItemLengths[i];280Format.ColumnSizes[Column] = std::max(Format.ColumnSizes[Column], Length);281MinSizeInColumn[Column] = std::min(MinSizeInColumn[Column], Length);282++Column;283}284// If all rows are terminated early (e.g. by trailing comments), we don't285// need to look further.286if (!HasRowWithSufficientColumns)287break;288Format.TotalWidth = Columns - 1; // Width of the N-1 spaces.289
290for (unsigned i = 0; i < Columns; ++i)291Format.TotalWidth += Format.ColumnSizes[i];292
293// Don't use this Format, if the difference between the longest and shortest294// element in a column exceeds a threshold to avoid excessive spaces.295if ([&] {296for (unsigned i = 0; i < Columns - 1; ++i)297if (Format.ColumnSizes[i] - MinSizeInColumn[i] > 10)298return true;299return false;300}()) {301continue;302}303
304// Ignore layouts that are bound to violate the column limit.305if (Format.TotalWidth > Style.ColumnLimit && Columns > 1)306continue;307
308Formats.push_back(Format);309}310}
311
312const CommaSeparatedList::ColumnFormat *313CommaSeparatedList::getColumnFormat(unsigned RemainingCharacters) const {314const ColumnFormat *BestFormat = nullptr;315for (const ColumnFormat &Format : llvm::reverse(Formats)) {316if (Format.TotalWidth <= RemainingCharacters || Format.Columns == 1) {317if (BestFormat && Format.LineCount > BestFormat->LineCount)318break;319BestFormat = &Format;320}321}322return BestFormat;323}
324
325} // namespace format326} // namespace clang327