1bfef3995SDimitry Andric //===--- FormatToken.cpp - Format C++ code --------------------------------===//
2bfef3995SDimitry Andric //
322989816SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
422989816SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
522989816SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6bfef3995SDimitry Andric //
7bfef3995SDimitry Andric //===----------------------------------------------------------------------===//
8bfef3995SDimitry Andric ///
9bfef3995SDimitry Andric /// \file
1048675466SDimitry Andric /// This file implements specific functions of \c FormatTokens and their
11bfef3995SDimitry Andric /// roles.
12bfef3995SDimitry Andric ///
13bfef3995SDimitry Andric //===----------------------------------------------------------------------===//
14bfef3995SDimitry Andric
1545b53394SDimitry Andric #include "FormatToken.h"
16bab175ecSDimitry Andric #include "ContinuationIndenter.h"
17bfef3995SDimitry Andric #include "llvm/ADT/SmallVector.h"
18bfef3995SDimitry Andric #include "llvm/Support/Debug.h"
195e20cdd8SDimitry Andric #include <climits>
20bfef3995SDimitry Andric
21bfef3995SDimitry Andric namespace clang {
22bfef3995SDimitry Andric namespace format {
23bfef3995SDimitry Andric
getTokenTypeName(TokenType Type)2451ece4aaSDimitry Andric const char *getTokenTypeName(TokenType Type) {
2551ece4aaSDimitry Andric static const char *const TokNames[] = {
2651ece4aaSDimitry Andric #define TYPE(X) #X,
2751ece4aaSDimitry Andric LIST_TOKEN_TYPES
2851ece4aaSDimitry Andric #undef TYPE
29461a67faSDimitry Andric nullptr};
3051ece4aaSDimitry Andric
3151ece4aaSDimitry Andric if (Type < NUM_TOKEN_TYPES)
3251ece4aaSDimitry Andric return TokNames[Type];
3351ece4aaSDimitry Andric llvm_unreachable("unknown TokenType");
3451ece4aaSDimitry Andric return nullptr;
3551ece4aaSDimitry Andric }
3651ece4aaSDimitry Andric
37ac9a064cSDimitry Andric // Sorted common C++ non-keyword types.
38ac9a064cSDimitry Andric static SmallVector<StringRef> CppNonKeywordTypes = {
39ac9a064cSDimitry Andric "clock_t", "int16_t", "int32_t", "int64_t", "int8_t",
40ac9a064cSDimitry Andric "intptr_t", "ptrdiff_t", "size_t", "time_t", "uint16_t",
41ac9a064cSDimitry Andric "uint32_t", "uint64_t", "uint8_t", "uintptr_t",
42ac9a064cSDimitry Andric };
43ac9a064cSDimitry Andric
isTypeName(const LangOptions & LangOpts) const44ac9a064cSDimitry Andric bool FormatToken::isTypeName(const LangOptions &LangOpts) const {
45ac9a064cSDimitry Andric const bool IsCpp = LangOpts.CXXOperatorNames;
46ac9a064cSDimitry Andric return is(TT_TypeName) || Tok.isSimpleTypeSpecifier(LangOpts) ||
47ac9a064cSDimitry Andric (IsCpp && is(tok::identifier) &&
48ac9a064cSDimitry Andric std::binary_search(CppNonKeywordTypes.begin(),
49ac9a064cSDimitry Andric CppNonKeywordTypes.end(), TokenText));
509f4dbff6SDimitry Andric }
519f4dbff6SDimitry Andric
isTypeOrIdentifier(const LangOptions & LangOpts) const52ac9a064cSDimitry Andric bool FormatToken::isTypeOrIdentifier(const LangOptions &LangOpts) const {
53ac9a064cSDimitry Andric return isTypeName(LangOpts) || isOneOf(tok::kw_auto, tok::identifier);
5477fc4c14SDimitry Andric }
5577fc4c14SDimitry Andric
isBlockIndentedInitRBrace(const FormatStyle & Style) const567fa27ce4SDimitry Andric bool FormatToken::isBlockIndentedInitRBrace(const FormatStyle &Style) const {
577fa27ce4SDimitry Andric assert(is(tok::r_brace));
587fa27ce4SDimitry Andric if (!Style.Cpp11BracedListStyle ||
597fa27ce4SDimitry Andric Style.AlignAfterOpenBracket != FormatStyle::BAS_BlockIndent) {
607fa27ce4SDimitry Andric return false;
617fa27ce4SDimitry Andric }
627fa27ce4SDimitry Andric const auto *LBrace = MatchingParen;
637fa27ce4SDimitry Andric assert(LBrace && LBrace->is(tok::l_brace));
647fa27ce4SDimitry Andric if (LBrace->is(BK_BracedInit))
657fa27ce4SDimitry Andric return true;
667fa27ce4SDimitry Andric if (LBrace->Previous && LBrace->Previous->is(tok::equal))
677fa27ce4SDimitry Andric return true;
687fa27ce4SDimitry Andric return false;
697fa27ce4SDimitry Andric }
707fa27ce4SDimitry Andric
opensBlockOrBlockTypeList(const FormatStyle & Style) const71145449b1SDimitry Andric bool FormatToken::opensBlockOrBlockTypeList(const FormatStyle &Style) const {
72145449b1SDimitry Andric // C# Does not indent object initialisers as continuations.
73145449b1SDimitry Andric if (is(tok::l_brace) && getBlockKind() == BK_BracedInit && Style.isCSharp())
74145449b1SDimitry Andric return true;
75145449b1SDimitry Andric if (is(TT_TemplateString) && opensScope())
76145449b1SDimitry Andric return true;
77145449b1SDimitry Andric return is(TT_ArrayInitializerLSquare) || is(TT_ProtoExtensionLSquare) ||
78145449b1SDimitry Andric (is(tok::l_brace) &&
79145449b1SDimitry Andric (getBlockKind() == BK_Block || is(TT_DictLiteral) ||
80145449b1SDimitry Andric (!Style.Cpp11BracedListStyle && NestingLevel == 0))) ||
81b1c73532SDimitry Andric (is(tok::less) && Style.isProto());
82145449b1SDimitry Andric }
83145449b1SDimitry Andric
~TokenRole()84bfef3995SDimitry Andric TokenRole::~TokenRole() {}
85bfef3995SDimitry Andric
precomputeFormattingInfos(const FormatToken * Token)86bfef3995SDimitry Andric void TokenRole::precomputeFormattingInfos(const FormatToken *Token) {}
87bfef3995SDimitry Andric
formatAfterToken(LineState & State,ContinuationIndenter * Indenter,bool DryRun)889f4dbff6SDimitry Andric unsigned CommaSeparatedList::formatAfterToken(LineState &State,
89bfef3995SDimitry Andric ContinuationIndenter *Indenter,
90bfef3995SDimitry Andric bool DryRun) {
917fa27ce4SDimitry Andric if (!State.NextToken || !State.NextToken->Previous)
92bfef3995SDimitry Andric return 0;
93bfef3995SDimitry Andric
94950076cdSDimitry Andric if (Formats.size() <= 1)
95950076cdSDimitry Andric return 0; // Handled by formatFromToken (1) or avoid severe penalty (0).
96bab175ecSDimitry Andric
97bfef3995SDimitry Andric // Ensure that we start on the opening brace.
985e20cdd8SDimitry Andric const FormatToken *LBrace =
995e20cdd8SDimitry Andric State.NextToken->Previous->getPreviousNonComment();
10045b53394SDimitry Andric if (!LBrace || !LBrace->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
101b60736ecSDimitry Andric LBrace->is(BK_Block) || LBrace->is(TT_DictLiteral) ||
102145449b1SDimitry Andric LBrace->Next->is(TT_DesignatedInitializerPeriod)) {
103bfef3995SDimitry Andric return 0;
104145449b1SDimitry Andric }
105bfef3995SDimitry Andric
106bfef3995SDimitry Andric // Calculate the number of code points we have to format this list. As the
107bfef3995SDimitry Andric // first token is already placed, we have to subtract it.
1089f4dbff6SDimitry Andric unsigned RemainingCodePoints =
1099f4dbff6SDimitry Andric Style.ColumnLimit - State.Column + State.NextToken->Previous->ColumnWidth;
110bfef3995SDimitry Andric
111bfef3995SDimitry Andric // Find the best ColumnFormat, i.e. the best number of columns to use.
112bfef3995SDimitry Andric const ColumnFormat *Format = getColumnFormat(RemainingCodePoints);
113bab175ecSDimitry Andric
1149f4dbff6SDimitry Andric // If no ColumnFormat can be used, the braced list would generally be
1159f4dbff6SDimitry Andric // bin-packed. Add a severe penalty to this so that column layouts are
1169f4dbff6SDimitry Andric // preferred if possible.
117bfef3995SDimitry Andric if (!Format)
118ac9a064cSDimitry Andric return 10'000;
119bfef3995SDimitry Andric
120bfef3995SDimitry Andric // Format the entire list.
121bfef3995SDimitry Andric unsigned Penalty = 0;
122bfef3995SDimitry Andric unsigned Column = 0;
123bfef3995SDimitry Andric unsigned Item = 0;
124bfef3995SDimitry Andric while (State.NextToken != LBrace->MatchingParen) {
125bfef3995SDimitry Andric bool NewLine = false;
126bfef3995SDimitry Andric unsigned ExtraSpaces = 0;
127bfef3995SDimitry Andric
128bfef3995SDimitry Andric // If the previous token was one of our commas, we are now on the next item.
129bfef3995SDimitry Andric if (Item < Commas.size() && State.NextToken->Previous == Commas[Item]) {
130bfef3995SDimitry Andric if (!State.NextToken->isTrailingComment()) {
131bfef3995SDimitry Andric ExtraSpaces += Format->ColumnSizes[Column] - ItemLengths[Item];
132bfef3995SDimitry Andric ++Column;
133bfef3995SDimitry Andric }
134bfef3995SDimitry Andric ++Item;
135bfef3995SDimitry Andric }
136bfef3995SDimitry Andric
137bfef3995SDimitry Andric if (Column == Format->Columns || State.NextToken->MustBreakBefore) {
138bfef3995SDimitry Andric Column = 0;
139bfef3995SDimitry Andric NewLine = true;
140bfef3995SDimitry Andric }
141bfef3995SDimitry Andric
142bfef3995SDimitry Andric // Place token using the continuation indenter and store the penalty.
143bfef3995SDimitry Andric Penalty += Indenter->addTokenToState(State, NewLine, DryRun, ExtraSpaces);
144bfef3995SDimitry Andric }
145bfef3995SDimitry Andric return Penalty;
146bfef3995SDimitry Andric }
147bfef3995SDimitry Andric
formatFromToken(LineState & State,ContinuationIndenter * Indenter,bool DryRun)1489f4dbff6SDimitry Andric unsigned CommaSeparatedList::formatFromToken(LineState &State,
1499f4dbff6SDimitry Andric ContinuationIndenter *Indenter,
1509f4dbff6SDimitry Andric bool DryRun) {
151bab175ecSDimitry Andric // Formatting with 1 Column isn't really a column layout, so we don't need the
152bab175ecSDimitry Andric // special logic here. We can just avoid bin packing any of the parameters.
153bab175ecSDimitry Andric if (Formats.size() == 1 || HasNestedBracedList)
1549f4dbff6SDimitry Andric State.Stack.back().AvoidBinPacking = true;
1559f4dbff6SDimitry Andric return 0;
1569f4dbff6SDimitry Andric }
1579f4dbff6SDimitry Andric
158bfef3995SDimitry Andric // Returns the lengths in code points between Begin and End (both included),
159bfef3995SDimitry Andric // assuming that the entire sequence is put on a single line.
CodePointsBetween(const FormatToken * Begin,const FormatToken * End)160bfef3995SDimitry Andric static unsigned CodePointsBetween(const FormatToken *Begin,
161bfef3995SDimitry Andric const FormatToken *End) {
162bfef3995SDimitry Andric assert(End->TotalLength >= Begin->TotalLength);
163bfef3995SDimitry Andric return End->TotalLength - Begin->TotalLength + Begin->ColumnWidth;
164bfef3995SDimitry Andric }
165bfef3995SDimitry Andric
precomputeFormattingInfos(const FormatToken * Token)166bfef3995SDimitry Andric void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) {
167bfef3995SDimitry Andric // FIXME: At some point we might want to do this for other lists, too.
16845b53394SDimitry Andric if (!Token->MatchingParen ||
169145449b1SDimitry Andric !Token->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare)) {
170bfef3995SDimitry Andric return;
171145449b1SDimitry Andric }
172bfef3995SDimitry Andric
17306d4ba38SDimitry Andric // In C++11 braced list style, we should not format in columns unless they
1745e20cdd8SDimitry Andric // have many items (20 or more) or we allow bin-packing of function call
1755e20cdd8SDimitry Andric // arguments.
1765e20cdd8SDimitry Andric if (Style.Cpp11BracedListStyle && !Style.BinPackArguments &&
177145449b1SDimitry Andric Commas.size() < 19) {
17806d4ba38SDimitry Andric return;
179145449b1SDimitry Andric }
18006d4ba38SDimitry Andric
18145b53394SDimitry Andric // Limit column layout for JavaScript array initializers to 20 or more items
18245b53394SDimitry Andric // for now to introduce it carefully. We can become more aggressive if this
18345b53394SDimitry Andric // necessary.
18445b53394SDimitry Andric if (Token->is(TT_ArrayInitializerLSquare) && Commas.size() < 19)
18545b53394SDimitry Andric return;
18645b53394SDimitry Andric
18706d4ba38SDimitry Andric // Column format doesn't really make sense if we don't align after brackets.
18845b53394SDimitry Andric if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign)
1899f4dbff6SDimitry Andric return;
1909f4dbff6SDimitry Andric
191bfef3995SDimitry Andric FormatToken *ItemBegin = Token->Next;
1925e20cdd8SDimitry Andric while (ItemBegin->isTrailingComment())
1935e20cdd8SDimitry Andric ItemBegin = ItemBegin->Next;
194bfef3995SDimitry Andric SmallVector<bool, 8> MustBreakBeforeItem;
195bfef3995SDimitry Andric
196bfef3995SDimitry Andric // The lengths of an item if it is put at the end of the line. This includes
197bfef3995SDimitry Andric // trailing comments which are otherwise ignored for column alignment.
198bfef3995SDimitry Andric SmallVector<unsigned, 8> EndOfLineItemLength;
199145449b1SDimitry Andric MustBreakBeforeItem.reserve(Commas.size() + 1);
200145449b1SDimitry Andric EndOfLineItemLength.reserve(Commas.size() + 1);
201145449b1SDimitry Andric ItemLengths.reserve(Commas.size() + 1);
202bfef3995SDimitry Andric
2035e20cdd8SDimitry Andric bool HasSeparatingComment = false;
204bfef3995SDimitry Andric for (unsigned i = 0, e = Commas.size() + 1; i != e; ++i) {
2056f8fc217SDimitry Andric assert(ItemBegin);
206bfef3995SDimitry Andric // Skip comments on their own line.
2075e20cdd8SDimitry Andric while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment()) {
208bfef3995SDimitry Andric ItemBegin = ItemBegin->Next;
2095e20cdd8SDimitry Andric HasSeparatingComment = i > 0;
2105e20cdd8SDimitry Andric }
211bfef3995SDimitry Andric
212bfef3995SDimitry Andric MustBreakBeforeItem.push_back(ItemBegin->MustBreakBefore);
213bfef3995SDimitry Andric if (ItemBegin->is(tok::l_brace))
214bfef3995SDimitry Andric HasNestedBracedList = true;
2159f4dbff6SDimitry Andric const FormatToken *ItemEnd = nullptr;
216bfef3995SDimitry Andric if (i == Commas.size()) {
217bfef3995SDimitry Andric ItemEnd = Token->MatchingParen;
218bfef3995SDimitry Andric const FormatToken *NonCommentEnd = ItemEnd->getPreviousNonComment();
219bfef3995SDimitry Andric ItemLengths.push_back(CodePointsBetween(ItemBegin, NonCommentEnd));
22045b53394SDimitry Andric if (Style.Cpp11BracedListStyle &&
22145b53394SDimitry Andric !ItemEnd->Previous->isTrailingComment()) {
222bfef3995SDimitry Andric // In Cpp11 braced list style, the } and possibly other subsequent
223bfef3995SDimitry Andric // tokens will need to stay on a line with the last element.
224bfef3995SDimitry Andric while (ItemEnd->Next && !ItemEnd->Next->CanBreakBefore)
225bfef3995SDimitry Andric ItemEnd = ItemEnd->Next;
226bfef3995SDimitry Andric } else {
227bfef3995SDimitry Andric // In other braced lists styles, the "}" can be wrapped to the new line.
228bfef3995SDimitry Andric ItemEnd = Token->MatchingParen->Previous;
229bfef3995SDimitry Andric }
230bfef3995SDimitry Andric } else {
231bfef3995SDimitry Andric ItemEnd = Commas[i];
232bfef3995SDimitry Andric // The comma is counted as part of the item when calculating the length.
233bfef3995SDimitry Andric ItemLengths.push_back(CodePointsBetween(ItemBegin, ItemEnd));
23406d4ba38SDimitry Andric
235bfef3995SDimitry Andric // Consume trailing comments so the are included in EndOfLineItemLength.
236bfef3995SDimitry Andric if (ItemEnd->Next && !ItemEnd->Next->HasUnescapedNewline &&
237145449b1SDimitry Andric ItemEnd->Next->isTrailingComment()) {
238bfef3995SDimitry Andric ItemEnd = ItemEnd->Next;
239bfef3995SDimitry Andric }
240145449b1SDimitry Andric }
241bfef3995SDimitry Andric EndOfLineItemLength.push_back(CodePointsBetween(ItemBegin, ItemEnd));
242bfef3995SDimitry Andric // If there is a trailing comma in the list, the next item will start at the
243bfef3995SDimitry Andric // closing brace. Don't create an extra item for this.
244bfef3995SDimitry Andric if (ItemEnd->getNextNonComment() == Token->MatchingParen)
245bfef3995SDimitry Andric break;
246bfef3995SDimitry Andric ItemBegin = ItemEnd->Next;
247bfef3995SDimitry Andric }
248bfef3995SDimitry Andric
24997b17066SDimitry Andric // Don't use column layout for lists with few elements and in presence of
25097b17066SDimitry Andric // separating comments.
25197b17066SDimitry Andric if (Commas.size() < 5 || HasSeparatingComment)
25297b17066SDimitry Andric return;
25397b17066SDimitry Andric
25497b17066SDimitry Andric if (Token->NestingLevel != 0 && Token->is(tok::l_brace) && Commas.size() < 19)
2559f4dbff6SDimitry Andric return;
2569f4dbff6SDimitry Andric
257bfef3995SDimitry Andric // We can never place more than ColumnLimit / 3 items in a row (because of the
258bfef3995SDimitry Andric // spaces and the comma).
2592e645aa5SDimitry Andric unsigned MaxItems = Style.ColumnLimit / 3;
2601f917f69SDimitry Andric SmallVector<unsigned> MinSizeInColumn;
2612e645aa5SDimitry Andric MinSizeInColumn.reserve(MaxItems);
2622e645aa5SDimitry Andric for (unsigned Columns = 1; Columns <= MaxItems; ++Columns) {
263bfef3995SDimitry Andric ColumnFormat Format;
264bfef3995SDimitry Andric Format.Columns = Columns;
265bfef3995SDimitry Andric Format.ColumnSizes.resize(Columns);
2662e645aa5SDimitry Andric MinSizeInColumn.assign(Columns, UINT_MAX);
267bfef3995SDimitry Andric Format.LineCount = 1;
268bfef3995SDimitry Andric bool HasRowWithSufficientColumns = false;
269bfef3995SDimitry Andric unsigned Column = 0;
270bfef3995SDimitry Andric for (unsigned i = 0, e = ItemLengths.size(); i != e; ++i) {
271bfef3995SDimitry Andric assert(i < MustBreakBeforeItem.size());
272bfef3995SDimitry Andric if (MustBreakBeforeItem[i] || Column == Columns) {
273bfef3995SDimitry Andric ++Format.LineCount;
274bfef3995SDimitry Andric Column = 0;
275bfef3995SDimitry Andric }
276bfef3995SDimitry Andric if (Column == Columns - 1)
277bfef3995SDimitry Andric HasRowWithSufficientColumns = true;
2785e20cdd8SDimitry Andric unsigned Length =
279bfef3995SDimitry Andric (Column == Columns - 1) ? EndOfLineItemLength[i] : ItemLengths[i];
2805e20cdd8SDimitry Andric Format.ColumnSizes[Column] = std::max(Format.ColumnSizes[Column], Length);
2815e20cdd8SDimitry Andric MinSizeInColumn[Column] = std::min(MinSizeInColumn[Column], Length);
282bfef3995SDimitry Andric ++Column;
283bfef3995SDimitry Andric }
284bfef3995SDimitry Andric // If all rows are terminated early (e.g. by trailing comments), we don't
285bfef3995SDimitry Andric // need to look further.
286bfef3995SDimitry Andric if (!HasRowWithSufficientColumns)
287bfef3995SDimitry Andric break;
288bfef3995SDimitry Andric Format.TotalWidth = Columns - 1; // Width of the N-1 spaces.
2895e20cdd8SDimitry Andric
2905e20cdd8SDimitry Andric for (unsigned i = 0; i < Columns; ++i)
291bfef3995SDimitry Andric Format.TotalWidth += Format.ColumnSizes[i];
2925e20cdd8SDimitry Andric
2935e20cdd8SDimitry Andric // Don't use this Format, if the difference between the longest and shortest
2945e20cdd8SDimitry Andric // element in a column exceeds a threshold to avoid excessive spaces.
2955e20cdd8SDimitry Andric if ([&] {
2965e20cdd8SDimitry Andric for (unsigned i = 0; i < Columns - 1; ++i)
2975e20cdd8SDimitry Andric if (Format.ColumnSizes[i] - MinSizeInColumn[i] > 10)
2985e20cdd8SDimitry Andric return true;
2995e20cdd8SDimitry Andric return false;
300145449b1SDimitry Andric }()) {
3015e20cdd8SDimitry Andric continue;
302145449b1SDimitry Andric }
303bfef3995SDimitry Andric
304bfef3995SDimitry Andric // Ignore layouts that are bound to violate the column limit.
305bab175ecSDimitry Andric if (Format.TotalWidth > Style.ColumnLimit && Columns > 1)
306bfef3995SDimitry Andric continue;
307bfef3995SDimitry Andric
308bfef3995SDimitry Andric Formats.push_back(Format);
309bfef3995SDimitry Andric }
310bfef3995SDimitry Andric }
311bfef3995SDimitry Andric
312bfef3995SDimitry Andric const CommaSeparatedList::ColumnFormat *
getColumnFormat(unsigned RemainingCharacters) const313bfef3995SDimitry Andric CommaSeparatedList::getColumnFormat(unsigned RemainingCharacters) const {
3149f4dbff6SDimitry Andric const ColumnFormat *BestFormat = nullptr;
3156f8fc217SDimitry Andric for (const ColumnFormat &Format : llvm::reverse(Formats)) {
3166f8fc217SDimitry Andric if (Format.TotalWidth <= RemainingCharacters || Format.Columns == 1) {
3176f8fc217SDimitry Andric if (BestFormat && Format.LineCount > BestFormat->LineCount)
318bfef3995SDimitry Andric break;
3196f8fc217SDimitry Andric BestFormat = &Format;
320bfef3995SDimitry Andric }
321bfef3995SDimitry Andric }
322bfef3995SDimitry Andric return BestFormat;
323bfef3995SDimitry Andric }
324bfef3995SDimitry Andric
325bfef3995SDimitry Andric } // namespace format
326bfef3995SDimitry Andric } // namespace clang
327