clang 22.0.0git
BreakableToken.cpp
Go to the documentation of this file.
1//===--- BreakableToken.cpp - Format C++ code -----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// Contains implementation of BreakableToken class and classes derived
11/// from it.
12///
13//===----------------------------------------------------------------------===//
14
15#include "BreakableToken.h"
18#include "clang/Format/Format.h"
19#include "llvm/ADT/STLExtras.h"
20#include "llvm/Support/Debug.h"
21#include <algorithm>
22
23#define DEBUG_TYPE "format-token-breaker"
24
25namespace clang {
26namespace format {
27
28static constexpr StringRef Blanks(" \t\v\f\r");
29
30static StringRef getLineCommentIndentPrefix(StringRef Comment,
31 const FormatStyle &Style) {
32 static constexpr StringRef KnownCStylePrefixes[] = {"///<", "//!<", "///",
33 "//!", "//:", "//"};
34 static constexpr StringRef KnownTextProtoPrefixes[] = {"####", "###", "##",
35 "//", "#"};
36 ArrayRef<StringRef> KnownPrefixes(KnownCStylePrefixes);
37 if (Style.isTextProto())
38 KnownPrefixes = KnownTextProtoPrefixes;
39
40 assert(
41 llvm::is_sorted(KnownPrefixes, [](StringRef Lhs, StringRef Rhs) noexcept {
42 return Lhs.size() > Rhs.size();
43 }));
44
45 for (StringRef KnownPrefix : KnownPrefixes) {
46 if (Comment.starts_with(KnownPrefix)) {
47 const auto PrefixLength =
48 Comment.find_first_not_of(' ', KnownPrefix.size());
49 return Comment.substr(0, PrefixLength);
50 }
51 }
52 return {};
53}
54
56getCommentSplit(StringRef Text, unsigned ContentStartColumn,
57 unsigned ColumnLimit, unsigned TabWidth,
58 encoding::Encoding Encoding, const FormatStyle &Style,
59 bool DecorationEndsWithStar = false) {
60 LLVM_DEBUG(llvm::dbgs() << "Comment split: \"" << Text
61 << "\", Column limit: " << ColumnLimit
62 << ", Content start: " << ContentStartColumn << "\n");
63 if (ColumnLimit <= ContentStartColumn + 1)
64 return BreakableToken::Split(StringRef::npos, 0);
65
66 unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1;
67 unsigned MaxSplitBytes = 0;
68
69 for (unsigned NumChars = 0;
70 NumChars < MaxSplit && MaxSplitBytes < Text.size();) {
71 unsigned BytesInChar =
72 encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding);
74 Text.substr(MaxSplitBytes, BytesInChar), ContentStartColumn + NumChars,
75 TabWidth, Encoding);
76 MaxSplitBytes += BytesInChar;
77 }
78
79 // In JavaScript, some @tags can be followed by {, and machinery that parses
80 // these comments will fail to understand the comment if followed by a line
81 // break. So avoid ever breaking before a {.
82 if (Style.isJavaScript()) {
83 StringRef::size_type SpaceOffset =
84 Text.find_first_of(Blanks, MaxSplitBytes);
85 if (SpaceOffset != StringRef::npos && SpaceOffset + 1 < Text.size() &&
86 Text[SpaceOffset + 1] == '{') {
87 MaxSplitBytes = SpaceOffset + 1;
88 }
89 }
90
91 StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes);
92
93 static const auto kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\.");
94 // Some spaces are unacceptable to break on, rewind past them.
95 while (SpaceOffset != StringRef::npos) {
96 // If a line-comment ends with `\`, the next line continues the comment,
97 // whether or not it starts with `//`. This is confusing and triggers
98 // -Wcomment.
99 // Avoid introducing multiline comments by not allowing a break right
100 // after '\'.
101 if (Style.isCpp()) {
102 StringRef::size_type LastNonBlank =
103 Text.find_last_not_of(Blanks, SpaceOffset);
104 if (LastNonBlank != StringRef::npos && Text[LastNonBlank] == '\\') {
105 SpaceOffset = Text.find_last_of(Blanks, LastNonBlank);
106 continue;
107 }
108 }
109
110 // Do not split before a number followed by a dot: this would be interpreted
111 // as a numbered list, which would prevent re-flowing in subsequent passes.
112 if (kNumberedListRegexp.match(Text.substr(SpaceOffset).ltrim(Blanks))) {
113 SpaceOffset = Text.find_last_of(Blanks, SpaceOffset);
114 continue;
115 }
116
117 // Avoid ever breaking before a @tag or a { in JavaScript.
118 if (Style.isJavaScript() && SpaceOffset + 1 < Text.size() &&
119 (Text[SpaceOffset + 1] == '{' || Text[SpaceOffset + 1] == '@')) {
120 SpaceOffset = Text.find_last_of(Blanks, SpaceOffset);
121 continue;
122 }
123
124 break;
125 }
126
127 if (SpaceOffset == StringRef::npos ||
128 // Don't break at leading whitespace.
129 Text.find_last_not_of(Blanks, SpaceOffset) == StringRef::npos) {
130 // Make sure that we don't break at leading whitespace that
131 // reaches past MaxSplit.
132 StringRef::size_type FirstNonWhitespace = Text.find_first_not_of(Blanks);
133 if (FirstNonWhitespace == StringRef::npos) {
134 // If the comment is only whitespace, we cannot split.
135 return BreakableToken::Split(StringRef::npos, 0);
136 }
137 SpaceOffset = Text.find_first_of(
138 Blanks, std::max<unsigned>(MaxSplitBytes, FirstNonWhitespace));
139 }
140 if (SpaceOffset != StringRef::npos && SpaceOffset != 0) {
141 // adaptStartOfLine will break after lines starting with /** if the comment
142 // is broken anywhere. Avoid emitting this break twice here.
143 // Example: in /** longtextcomesherethatbreaks */ (with ColumnLimit 20) will
144 // insert a break after /**, so this code must not insert the same break.
145 if (SpaceOffset == 1 && Text[SpaceOffset - 1] == '*')
146 return BreakableToken::Split(StringRef::npos, 0);
147 StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim(Blanks);
148 StringRef AfterCut = Text.substr(SpaceOffset);
149 if (!DecorationEndsWithStar)
150 AfterCut = AfterCut.ltrim(Blanks);
151 return BreakableToken::Split(BeforeCut.size(),
152 AfterCut.begin() - BeforeCut.end());
153 }
154 return BreakableToken::Split(StringRef::npos, 0);
155}
156
158getStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit,
159 unsigned TabWidth, encoding::Encoding Encoding) {
160 // FIXME: Reduce unit test case.
161 if (Text.empty())
162 return BreakableToken::Split(StringRef::npos, 0);
163 if (ColumnLimit <= UsedColumns)
164 return BreakableToken::Split(StringRef::npos, 0);
165 unsigned MaxSplit = ColumnLimit - UsedColumns;
166 StringRef::size_type SpaceOffset = 0;
167 StringRef::size_type SlashOffset = 0;
168 StringRef::size_type WordStartOffset = 0;
169 StringRef::size_type SplitPoint = 0;
170 for (unsigned Chars = 0;;) {
171 unsigned Advance;
172 if (Text[0] == '\\') {
174 Chars += Advance;
175 } else {
176 Advance = encoding::getCodePointNumBytes(Text[0], Encoding);
178 Text.substr(0, Advance), UsedColumns + Chars, TabWidth, Encoding);
179 }
180
181 if (Chars > MaxSplit || Text.size() <= Advance)
182 break;
183
184 if (Blanks.contains(Text[0]))
185 SpaceOffset = SplitPoint;
186 if (Text[0] == '/')
187 SlashOffset = SplitPoint;
188 if (Advance == 1 && !isAlphanumeric(Text[0]))
189 WordStartOffset = SplitPoint;
190
191 SplitPoint += Advance;
192 Text = Text.substr(Advance);
193 }
194
195 if (SpaceOffset != 0)
196 return BreakableToken::Split(SpaceOffset + 1, 0);
197 if (SlashOffset != 0)
198 return BreakableToken::Split(SlashOffset + 1, 0);
199 if (WordStartOffset != 0)
200 return BreakableToken::Split(WordStartOffset + 1, 0);
201 if (SplitPoint != 0)
202 return BreakableToken::Split(SplitPoint, 0);
203 return BreakableToken::Split(StringRef::npos, 0);
204}
205
207 assert((Token.is(TT_BlockComment) || Token.is(TT_LineComment)) &&
208 "formatting regions are switched by comment tokens");
209 StringRef Content = Token.TokenText.substr(2).ltrim();
210 return Content.starts_with("clang-format on") ||
211 Content.starts_with("clang-format off");
212}
213
214unsigned
215BreakableToken::getLengthAfterCompression(unsigned RemainingTokenColumns,
216 Split Split) const {
217 // Example: consider the content
218 // lala lala
219 // - RemainingTokenColumns is the original number of columns, 10;
220 // - Split is (4, 2), denoting the two spaces between the two words;
221 //
222 // We compute the number of columns when the split is compressed into a single
223 // space, like:
224 // lala lala
225 //
226 // FIXME: Correctly measure the length of whitespace in Split.second so it
227 // works with tabs.
228 return RemainingTokenColumns + 1 - Split.second;
229}
230
231unsigned BreakableStringLiteral::getLineCount() const { return 1; }
232
233unsigned BreakableStringLiteral::getRangeLength(unsigned LineIndex,
234 unsigned Offset,
235 StringRef::size_type Length,
236 unsigned StartColumn) const {
237 llvm_unreachable("Getting the length of a part of the string literal "
238 "indicates that the code tries to reflow it.");
239}
240
241unsigned
242BreakableStringLiteral::getRemainingLength(unsigned LineIndex, unsigned Offset,
243 unsigned StartColumn) const {
244 return UnbreakableTailLength + Postfix.size() +
247}
248
250 bool Break) const {
251 return StartColumn + Prefix.size();
252}
253
255 const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,
256 StringRef Postfix, unsigned UnbreakableTailLength, bool InPPDirective,
257 encoding::Encoding Encoding, const FormatStyle &Style)
258 : BreakableToken(Tok, InPPDirective, Encoding, Style),
259 StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix),
260 UnbreakableTailLength(UnbreakableTailLength) {
261 assert(Tok.TokenText.starts_with(Prefix) && Tok.TokenText.ends_with(Postfix));
262 Line = Tok.TokenText.substr(
263 Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());
264}
265
267 unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
268 unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const {
269 return getStringSplit(Line.substr(TailOffset), ContentStartColumn,
270 ColumnLimit - Postfix.size(), Style.TabWidth, Encoding);
271}
272
274 unsigned TailOffset, Split Split,
275 unsigned ContentIndent,
276 WhitespaceManager &Whitespaces) const {
277 Whitespaces.replaceWhitespaceInToken(
278 Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix,
280}
281
283 const FormatToken &Tok, QuoteStyleType QuoteStyle, bool UnindentPlus,
284 unsigned StartColumn, unsigned UnbreakableTailLength, bool InPPDirective,
285 encoding::Encoding Encoding, const FormatStyle &Style)
287 Tok, StartColumn, /*Prefix=*/QuoteStyle == SingleQuotes ? "'"
288 : QuoteStyle == AtDoubleQuotes ? "@\""
289 : "\"",
290 /*Postfix=*/QuoteStyle == SingleQuotes ? "'" : "\"",
291 UnbreakableTailLength, InPPDirective, Encoding, Style),
292 BracesNeeded(Tok.isNot(TT_StringInConcatenation)),
293 QuoteStyle(QuoteStyle) {
294 // Find the replacement text for inserting braces and quotes and line breaks.
295 // We don't create an allocated string concatenated from parts here because it
296 // has to outlive the BreakableStringliteral object. The brace replacements
297 // include a quote so that WhitespaceManager can tell it apart from whitespace
298 // replacements between the string and surrounding tokens.
299
300 // The option is not implemented in JavaScript.
301 bool SignOnNewLine =
302 !Style.isJavaScript() &&
304
305 if (Style.isVerilog()) {
306 // In Verilog, all strings are quoted by double quotes, joined by commas,
307 // and wrapped in braces. The comma is always before the newline.
308 assert(QuoteStyle == DoubleQuotes);
309 LeftBraceQuote = Style.Cpp11BracedListStyle ? "{\"" : "{ \"";
310 RightBraceQuote = Style.Cpp11BracedListStyle ? "\"}" : "\" }";
311 Postfix = "\",";
312 Prefix = "\"";
313 } else {
314 // The plus sign may be on either line. And also C# and JavaScript have
315 // several quoting styles.
316 if (QuoteStyle == SingleQuotes) {
319 Postfix = SignOnNewLine ? "'" : "' +";
320 Prefix = SignOnNewLine ? "+ '" : "'";
321 } else {
322 if (QuoteStyle == AtDoubleQuotes) {
324 Prefix = SignOnNewLine ? "+ @\"" : "@\"";
325 } else {
327 Prefix = SignOnNewLine ? "+ \"" : "\"";
328 }
330 Postfix = SignOnNewLine ? "\"" : "\" +";
331 }
332 }
333
334 // Following lines are indented by the width of the brace and space if any.
336 // The plus sign may need to be unindented depending on the style.
337 // FIXME: Add support for DontAlign.
338 if (!Style.isVerilog() && SignOnNewLine && !BracesNeeded && UnindentPlus &&
341 }
342}
343
345 unsigned LineIndex, unsigned Offset, unsigned StartColumn) const {
346 return UnbreakableTailLength + (BracesNeeded ? RightBraceQuote.size() : 1) +
349}
350
351unsigned
353 bool Break) const {
354 return std::max(
355 0,
356 static_cast<int>(StartColumn) +
357 (Break ? ContinuationIndent + static_cast<int>(Prefix.size())
358 : (BracesNeeded ? static_cast<int>(LeftBraceQuote.size()) - 1
359 : 0) +
360 (QuoteStyle == AtDoubleQuotes ? 2 : 1)));
361}
362
364 unsigned LineIndex, unsigned TailOffset, Split Split,
365 unsigned ContentIndent, WhitespaceManager &Whitespaces) const {
366 Whitespaces.replaceWhitespaceInToken(
367 Tok, /*Offset=*/(QuoteStyle == AtDoubleQuotes ? 2 : 1) + TailOffset +
368 Split.first,
369 /*ReplaceChars=*/Split.second, /*PreviousPostfix=*/Postfix,
370 /*CurrentPrefix=*/Prefix, InPPDirective, /*NewLines=*/1,
371 /*Spaces=*/
372 std::max(0, static_cast<int>(StartColumn) + ContinuationIndent));
373}
374
376 WhitespaceManager &Whitespaces) const {
377 // Add the braces required for breaking the token if they are needed.
378 if (!BracesNeeded)
379 return;
380
381 // To add a brace or parenthesis, we replace the quote (or the at sign) with a
382 // brace and another quote. This is because the rest of the program requires
383 // one replacement for each source range. If we replace the empty strings
384 // around the string, it may conflict with whitespace replacements between the
385 // string and adjacent tokens.
386 Whitespaces.replaceWhitespaceInToken(
387 Tok, /*Offset=*/0, /*ReplaceChars=*/1, /*PreviousPostfix=*/"",
388 /*CurrentPrefix=*/LeftBraceQuote, InPPDirective, /*NewLines=*/0,
389 /*Spaces=*/0);
390 Whitespaces.replaceWhitespaceInToken(
391 Tok, /*Offset=*/Tok.TokenText.size() - 1, /*ReplaceChars=*/1,
392 /*PreviousPostfix=*/RightBraceQuote,
393 /*CurrentPrefix=*/"", InPPDirective, /*NewLines=*/0, /*Spaces=*/0);
394}
395
397 unsigned StartColumn, bool InPPDirective,
398 encoding::Encoding Encoding,
399 const FormatStyle &Style)
400 : BreakableToken(Token, InPPDirective, Encoding, Style),
401 StartColumn(StartColumn) {}
402
403unsigned BreakableComment::getLineCount() const { return Lines.size(); }
404
406BreakableComment::getSplit(unsigned LineIndex, unsigned TailOffset,
407 unsigned ColumnLimit, unsigned ContentStartColumn,
408 const llvm::Regex &CommentPragmasRegex) const {
409 // Don't break lines matching the comment pragmas regex.
410 if (!AlwaysReflow || CommentPragmasRegex.match(Content[LineIndex]))
411 return Split(StringRef::npos, 0);
412 return getCommentSplit(Content[LineIndex].substr(TailOffset),
413 ContentStartColumn, ColumnLimit, Style.TabWidth,
414 Encoding, Style);
415}
416
418 unsigned LineIndex, unsigned TailOffset, Split Split,
419 WhitespaceManager &Whitespaces) const {
420 StringRef Text = Content[LineIndex].substr(TailOffset);
421 // Text is relative to the content line, but Whitespaces operates relative to
422 // the start of the corresponding token, so compute the start of the Split
423 // that needs to be compressed into a single space relative to the start of
424 // its token.
425 unsigned BreakOffsetInToken =
426 Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
427 unsigned CharsToRemove = Split.second;
428 Whitespaces.replaceWhitespaceInToken(
429 tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", "",
430 /*InPPDirective=*/false, /*Newlines=*/0, /*Spaces=*/1);
431}
432
433const FormatToken &BreakableComment::tokenAt(unsigned LineIndex) const {
434 return Tokens[LineIndex] ? *Tokens[LineIndex] : Tok;
435}
436
437static bool mayReflowContent(StringRef Content) {
438 Content = Content.trim(Blanks);
439 // Lines starting with '@' or '\' commonly have special meaning.
440 // Lines starting with '-', '-#', '+' or '*' are bulleted/numbered lists.
441 bool hasSpecialMeaningPrefix = false;
442 for (StringRef Prefix :
443 {"@", "\\", "TODO", "FIXME", "XXX", "-# ", "- ", "+ ", "* "}) {
444 if (Content.starts_with(Prefix)) {
445 hasSpecialMeaningPrefix = true;
446 break;
447 }
448 }
449
450 // Numbered lists may also start with a number followed by '.'
451 // To avoid issues if a line starts with a number which is actually the end
452 // of a previous line, we only consider numbers with up to 2 digits.
453 static const auto kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\. ");
454 hasSpecialMeaningPrefix =
455 hasSpecialMeaningPrefix || kNumberedListRegexp.match(Content);
456
457 // Simple heuristic for what to reflow: content should contain at least two
458 // characters and either the first or second character must be
459 // non-punctuation.
460 return Content.size() >= 2 && !hasSpecialMeaningPrefix &&
461 !Content.ends_with("\\") &&
462 // Note that this is UTF-8 safe, since if isPunctuation(Content[0]) is
463 // true, then the first code point must be 1 byte long.
464 (!isPunctuation(Content[0]) || !isPunctuation(Content[1]));
465}
466
468 const FormatToken &Token, unsigned StartColumn,
469 unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
470 encoding::Encoding Encoding, const FormatStyle &Style, bool UseCRLF)
471 : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style),
472 DelimitersOnNewline(false),
473 UnbreakableTailLength(Token.UnbreakableTailLength) {
474 assert(Tok.is(TT_BlockComment) &&
475 "block comment section must start with a block comment");
476
477 StringRef TokenText(Tok.TokenText);
478 assert(TokenText.starts_with("/*") && TokenText.ends_with("*/"));
479 TokenText.substr(2, TokenText.size() - 4)
480 .split(Lines, UseCRLF ? "\r\n" : "\n");
481
482 int IndentDelta = StartColumn - OriginalStartColumn;
483 Content.resize(Lines.size());
484 Content[0] = Lines[0];
485 ContentColumn.resize(Lines.size());
486 // Account for the initial '/*'.
487 ContentColumn[0] = StartColumn + 2;
488 Tokens.resize(Lines.size());
489 for (size_t i = 1; i < Lines.size(); ++i)
490 adjustWhitespace(i, IndentDelta);
491
492 // Align decorations with the column of the star on the first line,
493 // that is one column after the start "/*".
494 DecorationColumn = StartColumn + 1;
495
496 // Account for comment decoration patterns like this:
497 //
498 // /*
499 // ** blah blah blah
500 // */
501 if (Lines.size() >= 2 && Content[1].starts_with("**") &&
502 static_cast<unsigned>(ContentColumn[1]) == StartColumn) {
503 DecorationColumn = StartColumn;
504 }
505
506 Decoration = "* ";
507 if (Lines.size() == 1 && !FirstInLine) {
508 // Comments for which FirstInLine is false can start on arbitrary column,
509 // and available horizontal space can be too small to align consecutive
510 // lines with the first one.
511 // FIXME: We could, probably, align them to current indentation level, but
512 // now we just wrap them without stars.
513 Decoration = "";
514 }
515 for (size_t i = 1, e = Content.size(); i < e && !Decoration.empty(); ++i) {
516 const StringRef Text(Content[i]);
517 if (i + 1 == e) {
518 // If the last line is empty, the closing "*/" will have a star.
519 if (Text.empty())
520 break;
521 } else if (!Text.empty() && Decoration.starts_with(Text)) {
522 continue;
523 }
524 while (!Text.starts_with(Decoration))
525 Decoration = Decoration.drop_back(1);
526 }
527
528 LastLineNeedsDecoration = true;
529 IndentAtLineBreak = ContentColumn[0] + 1;
530 for (size_t i = 1, e = Lines.size(); i < e; ++i) {
531 if (Content[i].empty()) {
532 if (i + 1 == e) {
533 // Empty last line means that we already have a star as a part of the
534 // trailing */. We also need to preserve whitespace, so that */ is
535 // correctly indented.
536 LastLineNeedsDecoration = false;
537 // Align the star in the last '*/' with the stars on the previous lines.
538 if (e >= 2 && !Decoration.empty())
539 ContentColumn[i] = DecorationColumn;
540 } else if (Decoration.empty()) {
541 // For all other lines, set the start column to 0 if they're empty, so
542 // we do not insert trailing whitespace anywhere.
543 ContentColumn[i] = 0;
544 }
545 continue;
546 }
547
548 // The first line already excludes the star.
549 // The last line excludes the star if LastLineNeedsDecoration is false.
550 // For all other lines, adjust the line to exclude the star and
551 // (optionally) the first whitespace.
552 unsigned DecorationSize = Decoration.starts_with(Content[i])
553 ? Content[i].size()
554 : Decoration.size();
555 if (DecorationSize)
556 ContentColumn[i] = DecorationColumn + DecorationSize;
557 Content[i] = Content[i].substr(DecorationSize);
558 if (!Decoration.starts_with(Content[i])) {
559 IndentAtLineBreak =
560 std::min<int>(IndentAtLineBreak, std::max(0, ContentColumn[i]));
561 }
562 }
563 IndentAtLineBreak = std::max<unsigned>(IndentAtLineBreak, Decoration.size());
564
565 // Detect a multiline jsdoc comment and set DelimitersOnNewline in that case.
566 if (Style.isJavaScript() || Style.isJava()) {
567 if ((Lines[0] == "*" || Lines[0].starts_with("* ")) && Lines.size() > 1) {
568 // This is a multiline jsdoc comment.
569 DelimitersOnNewline = true;
570 } else if (Lines[0].starts_with("* ") && Lines.size() == 1) {
571 // Detect a long single-line comment, like:
572 // /** long long long */
573 // Below, '2' is the width of '*/'.
574 unsigned EndColumn =
575 ContentColumn[0] +
578 2;
579 DelimitersOnNewline = EndColumn > Style.ColumnLimit;
580 }
581 }
582
583 LLVM_DEBUG({
584 llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n";
585 llvm::dbgs() << "DelimitersOnNewline " << DelimitersOnNewline << "\n";
586 for (size_t i = 0; i < Lines.size(); ++i) {
587 llvm::dbgs() << i << " |" << Content[i] << "| "
588 << "CC=" << ContentColumn[i] << "| "
589 << "IN=" << (Content[i].data() - Lines[i].data()) << "\n";
590 }
591 });
592}
593
595 unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
596 unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const {
597 // Don't break lines matching the comment pragmas regex.
598 if (!AlwaysReflow || CommentPragmasRegex.match(Content[LineIndex]))
599 return Split(StringRef::npos, 0);
600 return getCommentSplit(Content[LineIndex].substr(TailOffset),
601 ContentStartColumn, ColumnLimit, Style.TabWidth,
602 Encoding, Style, Decoration.ends_with("*"));
603}
604
605void BreakableBlockComment::adjustWhitespace(unsigned LineIndex,
606 int IndentDelta) {
607 // When in a preprocessor directive, the trailing backslash in a block comment
608 // is not needed, but can serve a purpose of uniformity with necessary escaped
609 // newlines outside the comment. In this case we remove it here before
610 // trimming the trailing whitespace. The backslash will be re-added later when
611 // inserting a line break.
612 size_t EndOfPreviousLine = Lines[LineIndex - 1].size();
613 if (InPPDirective && Lines[LineIndex - 1].ends_with("\\"))
614 --EndOfPreviousLine;
615
616 // Calculate the end of the non-whitespace text in the previous line.
617 EndOfPreviousLine =
618 Lines[LineIndex - 1].find_last_not_of(Blanks, EndOfPreviousLine);
619 if (EndOfPreviousLine == StringRef::npos)
620 EndOfPreviousLine = 0;
621 else
622 ++EndOfPreviousLine;
623 // Calculate the start of the non-whitespace text in the current line.
624 size_t StartOfLine = Lines[LineIndex].find_first_not_of(Blanks);
625 if (StartOfLine == StringRef::npos)
626 StartOfLine = Lines[LineIndex].size();
627
628 StringRef Whitespace = Lines[LineIndex].substr(0, StartOfLine);
629 // Adjust Lines to only contain relevant text.
630 size_t PreviousContentOffset =
631 Content[LineIndex - 1].data() - Lines[LineIndex - 1].data();
632 Content[LineIndex - 1] = Lines[LineIndex - 1].substr(
633 PreviousContentOffset, EndOfPreviousLine - PreviousContentOffset);
634 Content[LineIndex] = Lines[LineIndex].substr(StartOfLine);
635
636 // Adjust the start column uniformly across all lines.
637 ContentColumn[LineIndex] =
639 IndentDelta;
640}
641
642unsigned BreakableBlockComment::getRangeLength(unsigned LineIndex,
643 unsigned Offset,
644 StringRef::size_type Length,
645 unsigned StartColumn) const {
647 Content[LineIndex].substr(Offset, Length), StartColumn, Style.TabWidth,
648 Encoding);
649}
650
652 unsigned Offset,
653 unsigned StartColumn) const {
654 unsigned LineLength =
655 UnbreakableTailLength +
656 getRangeLength(LineIndex, Offset, StringRef::npos, StartColumn);
657 if (LineIndex + 1 == Lines.size()) {
658 LineLength += 2;
659 // We never need a decoration when breaking just the trailing "*/" postfix.
660 bool HasRemainingText = Offset < Content[LineIndex].size();
661 if (!HasRemainingText) {
662 bool HasDecoration = Lines[LineIndex].ltrim().starts_with(Decoration);
663 if (HasDecoration)
664 LineLength -= Decoration.size();
665 }
666 }
667 return LineLength;
668}
669
671 bool Break) const {
672 if (Break)
673 return IndentAtLineBreak;
674 return std::max(0, ContentColumn[LineIndex]);
675}
676
677const llvm::StringSet<>
679 "@param", "@return", "@returns", "@throws", "@type", "@template",
680 "@see", "@deprecated", "@define", "@exports", "@mods", "@private",
681};
682
683unsigned BreakableBlockComment::getContentIndent(unsigned LineIndex) const {
684 if (!Style.isJava() && !Style.isJavaScript())
685 return 0;
686 // The content at LineIndex 0 of a comment like:
687 // /** line 0 */
688 // is "* line 0", so we need to skip over the decoration in that case.
689 StringRef ContentWithNoDecoration = Content[LineIndex];
690 if (LineIndex == 0 && ContentWithNoDecoration.starts_with("*"))
691 ContentWithNoDecoration = ContentWithNoDecoration.substr(1).ltrim(Blanks);
692 StringRef FirstWord = ContentWithNoDecoration.substr(
693 0, ContentWithNoDecoration.find_first_of(Blanks));
694 if (ContentIndentingJavadocAnnotations.contains(FirstWord))
696 return 0;
697}
698
699void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
700 Split Split, unsigned ContentIndent,
701 WhitespaceManager &Whitespaces) const {
702 StringRef Text = Content[LineIndex].substr(TailOffset);
703 StringRef Prefix = Decoration;
704 // We need this to account for the case when we have a decoration "* " for all
705 // the lines except for the last one, where the star in "*/" acts as a
706 // decoration.
707 unsigned LocalIndentAtLineBreak = IndentAtLineBreak;
708 if (LineIndex + 1 == Lines.size() &&
709 Text.size() == Split.first + Split.second) {
710 // For the last line we need to break before "*/", but not to add "* ".
711 Prefix = "";
712 if (LocalIndentAtLineBreak >= 2)
713 LocalIndentAtLineBreak -= 2;
714 }
715 // The split offset is from the beginning of the line. Convert it to an offset
716 // from the beginning of the token text.
717 unsigned BreakOffsetInToken =
718 Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
719 unsigned CharsToRemove = Split.second;
720 assert(LocalIndentAtLineBreak >= Prefix.size());
721 std::string PrefixWithTrailingIndent = std::string(Prefix);
722 PrefixWithTrailingIndent.append(ContentIndent, ' ');
723 Whitespaces.replaceWhitespaceInToken(
724 tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "",
725 PrefixWithTrailingIndent, InPPDirective, /*Newlines=*/1,
726 /*Spaces=*/LocalIndentAtLineBreak + ContentIndent -
727 PrefixWithTrailingIndent.size());
728}
729
731 unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
732 if (!mayReflow(LineIndex, CommentPragmasRegex))
733 return Split(StringRef::npos, 0);
734
735 // If we're reflowing into a line with content indent, only reflow the next
736 // line if its starting whitespace matches the content indent.
737 size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks);
738 if (LineIndex) {
739 unsigned PreviousContentIndent = getContentIndent(LineIndex - 1);
740 if (PreviousContentIndent && Trimmed != StringRef::npos &&
741 Trimmed != PreviousContentIndent) {
742 return Split(StringRef::npos, 0);
743 }
744 }
745
746 return Split(0, Trimmed != StringRef::npos ? Trimmed : 0);
747}
748
750 // A break is introduced when we want delimiters on newline.
751 return DelimitersOnNewline &&
752 Lines[0].substr(1).find_first_not_of(Blanks) != StringRef::npos;
753}
754
755void BreakableBlockComment::reflow(unsigned LineIndex,
756 WhitespaceManager &Whitespaces) const {
757 StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);
758 // Here we need to reflow.
759 assert(Tokens[LineIndex - 1] == Tokens[LineIndex] &&
760 "Reflowing whitespace within a token");
761 // This is the offset of the end of the last line relative to the start of
762 // the token text in the token.
763 unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +
764 Content[LineIndex - 1].size() -
765 tokenAt(LineIndex).TokenText.data();
766 unsigned WhitespaceLength = TrimmedContent.data() -
767 tokenAt(LineIndex).TokenText.data() -
768 WhitespaceOffsetInToken;
769 Whitespaces.replaceWhitespaceInToken(
770 tokenAt(LineIndex), WhitespaceOffsetInToken,
771 /*ReplaceChars=*/WhitespaceLength, /*PreviousPostfix=*/"",
772 /*CurrentPrefix=*/ReflowPrefix, InPPDirective, /*Newlines=*/0,
773 /*Spaces=*/0);
774}
775
777 unsigned LineIndex, WhitespaceManager &Whitespaces) const {
778 if (LineIndex == 0) {
779 if (DelimitersOnNewline) {
780 // Since we're breaking at index 1 below, the break position and the
781 // break length are the same.
782 // Note: this works because getCommentSplit is careful never to split at
783 // the beginning of a line.
784 size_t BreakLength = Lines[0].substr(1).find_first_not_of(Blanks);
785 if (BreakLength != StringRef::npos) {
786 insertBreak(LineIndex, 0, Split(1, BreakLength), /*ContentIndent=*/0,
787 Whitespaces);
788 }
789 }
790 return;
791 }
792 // Here no reflow with the previous line will happen.
793 // Fix the decoration of the line at LineIndex.
794 StringRef Prefix = Decoration;
795 if (Content[LineIndex].empty()) {
796 if (LineIndex + 1 == Lines.size()) {
797 if (!LastLineNeedsDecoration) {
798 // If the last line was empty, we don't need a prefix, as the */ will
799 // line up with the decoration (if it exists).
800 Prefix = "";
801 }
802 } else if (!Decoration.empty()) {
803 // For other empty lines, if we do have a decoration, adapt it to not
804 // contain a trailing whitespace.
805 Prefix = Prefix.substr(0, 1);
806 }
807 } else if (ContentColumn[LineIndex] == 1) {
808 // This line starts immediately after the decorating *.
809 Prefix = Prefix.substr(0, 1);
810 }
811 // This is the offset of the end of the last line relative to the start of the
812 // token text in the token.
813 unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +
814 Content[LineIndex - 1].size() -
815 tokenAt(LineIndex).TokenText.data();
816 unsigned WhitespaceLength = Content[LineIndex].data() -
817 tokenAt(LineIndex).TokenText.data() -
818 WhitespaceOffsetInToken;
819 Whitespaces.replaceWhitespaceInToken(
820 tokenAt(LineIndex), WhitespaceOffsetInToken, WhitespaceLength, "", Prefix,
821 InPPDirective, /*Newlines=*/1, ContentColumn[LineIndex] - Prefix.size());
822}
823
826 if (DelimitersOnNewline) {
827 // Replace the trailing whitespace of the last line with a newline.
828 // In case the last line is empty, the ending '*/' is already on its own
829 // line.
830 StringRef Line = Content.back().substr(TailOffset);
831 StringRef TrimmedLine = Line.rtrim(Blanks);
832 if (!TrimmedLine.empty())
833 return Split(TrimmedLine.size(), Line.size() - TrimmedLine.size());
834 }
835 return Split(StringRef::npos, 0);
836}
837
839 unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
840 // Content[LineIndex] may exclude the indent after the '*' decoration. In that
841 // case, we compute the start of the comment pragma manually.
842 StringRef IndentContent = Content[LineIndex];
843 if (Lines[LineIndex].ltrim(Blanks).starts_with("*"))
844 IndentContent = Lines[LineIndex].ltrim(Blanks).substr(1);
845 return LineIndex > 0 && AlwaysReflow &&
846 !CommentPragmasRegex.match(IndentContent) &&
847 mayReflowContent(Content[LineIndex]) && !Tok.Finalized &&
848 !switchesFormatting(tokenAt(LineIndex));
849}
850
852 const FormatToken &Token, unsigned StartColumn, bool InPPDirective,
853 encoding::Encoding Encoding, const FormatStyle &Style)
854 : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style) {
855 assert(Tok.is(TT_LineComment) &&
856 "line comment section must start with a line comment");
857 FormatToken *LineTok = nullptr;
858 const int Minimum = Style.SpacesInLineCommentPrefix.Minimum;
859 // How many spaces we changed in the first line of the section, this will be
860 // applied in all following lines
861 int FirstLineSpaceChange = 0;
862 for (const FormatToken *CurrentTok = &Tok;
863 CurrentTok && CurrentTok->is(TT_LineComment);
864 CurrentTok = CurrentTok->Next) {
865 LastLineTok = LineTok;
866 StringRef TokenText(CurrentTok->TokenText);
867 assert((TokenText.starts_with("//") || TokenText.starts_with("#")) &&
868 "unsupported line comment prefix, '//' and '#' are supported");
869 size_t FirstLineIndex = Lines.size();
870 TokenText.split(Lines, "\n");
871 Content.resize(Lines.size());
872 ContentColumn.resize(Lines.size());
873 PrefixSpaceChange.resize(Lines.size());
874 Tokens.resize(Lines.size());
875 Prefix.resize(Lines.size());
876 OriginalPrefix.resize(Lines.size());
877 for (size_t i = FirstLineIndex, e = Lines.size(); i < e; ++i) {
878 Lines[i] = Lines[i].ltrim(Blanks);
879 StringRef IndentPrefix = getLineCommentIndentPrefix(Lines[i], Style);
880 OriginalPrefix[i] = IndentPrefix;
881 const int SpacesInPrefix = llvm::count(IndentPrefix, ' ');
882
883 // This lambda also considers multibyte character that is not handled in
884 // functions like isPunctuation provided by CharInfo.
885 const auto NoSpaceBeforeFirstCommentChar = [&]() {
886 assert(Lines[i].size() > IndentPrefix.size());
887 const char FirstCommentChar = Lines[i][IndentPrefix.size()];
888 const unsigned FirstCharByteSize =
889 encoding::getCodePointNumBytes(FirstCommentChar, Encoding);
891 Lines[i].substr(IndentPrefix.size(), FirstCharByteSize),
892 Encoding) != 1) {
893 return false;
894 }
895 // In C-like comments, add a space before #. For example this is useful
896 // to preserve the relative indentation when commenting out code with
897 // #includes.
898 //
899 // In languages using # as the comment leader such as proto, don't
900 // add a space to support patterns like:
901 // #########
902 // # section
903 // #########
904 if (FirstCommentChar == '#' && !TokenText.starts_with("#"))
905 return false;
906 return FirstCommentChar == '\\' || isPunctuation(FirstCommentChar) ||
907 isHorizontalWhitespace(FirstCommentChar);
908 };
909
910 // On the first line of the comment section we calculate how many spaces
911 // are to be added or removed, all lines after that just get only the
912 // change and we will not look at the maximum anymore. Additionally to the
913 // actual first line, we calculate that when the non space Prefix changes,
914 // e.g. from "///" to "//".
915 if (i == 0 || OriginalPrefix[i].rtrim(Blanks) !=
916 OriginalPrefix[i - 1].rtrim(Blanks)) {
917 if (SpacesInPrefix < Minimum && Lines[i].size() > IndentPrefix.size() &&
918 !NoSpaceBeforeFirstCommentChar()) {
919 FirstLineSpaceChange = Minimum - SpacesInPrefix;
920 } else if (static_cast<unsigned>(SpacesInPrefix) >
922 FirstLineSpaceChange =
923 Style.SpacesInLineCommentPrefix.Maximum - SpacesInPrefix;
924 } else {
925 FirstLineSpaceChange = 0;
926 }
927 }
928
929 if (Lines[i].size() != IndentPrefix.size()) {
930 assert(Lines[i].size() > IndentPrefix.size());
931
932 PrefixSpaceChange[i] = SpacesInPrefix + FirstLineSpaceChange < Minimum
933 ? Minimum - SpacesInPrefix
934 : FirstLineSpaceChange;
935
936 const auto FirstNonSpace = Lines[i][IndentPrefix.size()];
937 const bool IsFormatComment = LineTok && switchesFormatting(*LineTok);
938 const bool LineRequiresLeadingSpace =
939 !NoSpaceBeforeFirstCommentChar() ||
940 (FirstNonSpace == '}' && FirstLineSpaceChange != 0);
941 const bool AllowsSpaceChange =
942 !IsFormatComment &&
943 (SpacesInPrefix != 0 || LineRequiresLeadingSpace);
944
945 if (PrefixSpaceChange[i] > 0 && AllowsSpaceChange) {
946 Prefix[i] = IndentPrefix.str();
947 Prefix[i].append(PrefixSpaceChange[i], ' ');
948 } else if (PrefixSpaceChange[i] < 0 && AllowsSpaceChange) {
949 Prefix[i] = IndentPrefix
950 .drop_back(std::min<std::size_t>(
951 -PrefixSpaceChange[i], SpacesInPrefix))
952 .str();
953 } else {
954 Prefix[i] = IndentPrefix.str();
955 }
956 } else {
957 // If the IndentPrefix is the whole line, there is no content and we
958 // drop just all space
959 Prefix[i] = IndentPrefix.drop_back(SpacesInPrefix).str();
960 }
961
962 Tokens[i] = LineTok;
963 Content[i] = Lines[i].substr(IndentPrefix.size());
964 ContentColumn[i] =
967
968 // Calculate the end of the non-whitespace text in this line.
969 size_t EndOfLine = Content[i].find_last_not_of(Blanks);
970 if (EndOfLine == StringRef::npos)
971 EndOfLine = Content[i].size();
972 else
973 ++EndOfLine;
974 Content[i] = Content[i].substr(0, EndOfLine);
975 }
976 LineTok = CurrentTok->Next;
977 if (CurrentTok->Next && !CurrentTok->Next->ContinuesLineCommentSection) {
978 // A line comment section needs to broken by a line comment that is
979 // preceded by at least two newlines. Note that we put this break here
980 // instead of breaking at a previous stage during parsing, since that
981 // would split the contents of the enum into two unwrapped lines in this
982 // example, which is undesirable:
983 // enum A {
984 // a, // comment about a
985 //
986 // // comment about b
987 // b
988 // };
989 //
990 // FIXME: Consider putting separate line comment sections as children to
991 // the unwrapped line instead.
992 break;
993 }
994 }
995}
996
997unsigned
998BreakableLineCommentSection::getRangeLength(unsigned LineIndex, unsigned Offset,
999 StringRef::size_type Length,
1000 unsigned StartColumn) const {
1002 Content[LineIndex].substr(Offset, Length), StartColumn, Style.TabWidth,
1003 Encoding);
1004}
1005
1006unsigned
1008 bool /*Break*/) const {
1009 return ContentColumn[LineIndex];
1010}
1011
1013 unsigned LineIndex, unsigned TailOffset, Split Split,
1014 unsigned ContentIndent, WhitespaceManager &Whitespaces) const {
1015 StringRef Text = Content[LineIndex].substr(TailOffset);
1016 // Compute the offset of the split relative to the beginning of the token
1017 // text.
1018 unsigned BreakOffsetInToken =
1019 Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
1020 unsigned CharsToRemove = Split.second;
1021 Whitespaces.replaceWhitespaceInToken(
1022 tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "",
1023 Prefix[LineIndex], InPPDirective, /*Newlines=*/1,
1024 /*Spaces=*/ContentColumn[LineIndex] - Prefix[LineIndex].size());
1025}
1026
1028 unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
1029 if (!mayReflow(LineIndex, CommentPragmasRegex))
1030 return Split(StringRef::npos, 0);
1031
1032 size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks);
1033
1034 // In a line comment section each line is a separate token; thus, after a
1035 // split we replace all whitespace before the current line comment token
1036 // (which does not need to be included in the split), plus the start of the
1037 // line up to where the content starts.
1038 return Split(0, Trimmed != StringRef::npos ? Trimmed : 0);
1039}
1040
1042 WhitespaceManager &Whitespaces) const {
1043 if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) {
1044 // Reflow happens between tokens. Replace the whitespace between the
1045 // tokens by the empty string.
1046 Whitespaces.replaceWhitespace(
1047 *Tokens[LineIndex], /*Newlines=*/0, /*Spaces=*/0,
1048 /*StartOfTokenColumn=*/StartColumn, /*IsAligned=*/true,
1049 /*InPPDirective=*/false);
1050 } else if (LineIndex > 0) {
1051 // In case we're reflowing after the '\' in:
1052 //
1053 // // line comment \
1054 // // line 2
1055 //
1056 // the reflow happens inside the single comment token (it is a single line
1057 // comment with an unescaped newline).
1058 // Replace the whitespace between the '\' and '//' with the empty string.
1059 //
1060 // Offset points to after the '\' relative to start of the token.
1061 unsigned Offset = Lines[LineIndex - 1].data() +
1062 Lines[LineIndex - 1].size() -
1063 tokenAt(LineIndex - 1).TokenText.data();
1064 // WhitespaceLength is the number of chars between the '\' and the '//' on
1065 // the next line.
1066 unsigned WhitespaceLength =
1067 Lines[LineIndex].data() - tokenAt(LineIndex).TokenText.data() - Offset;
1068 Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex], Offset,
1069 /*ReplaceChars=*/WhitespaceLength,
1070 /*PreviousPostfix=*/"",
1071 /*CurrentPrefix=*/"",
1072 /*InPPDirective=*/false,
1073 /*Newlines=*/0,
1074 /*Spaces=*/0);
1075 }
1076 // Replace the indent and prefix of the token with the reflow prefix.
1077 unsigned Offset =
1078 Lines[LineIndex].data() - tokenAt(LineIndex).TokenText.data();
1079 unsigned WhitespaceLength =
1080 Content[LineIndex].data() - Lines[LineIndex].data();
1081 Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex], Offset,
1082 /*ReplaceChars=*/WhitespaceLength,
1083 /*PreviousPostfix=*/"",
1084 /*CurrentPrefix=*/ReflowPrefix,
1085 /*InPPDirective=*/false,
1086 /*Newlines=*/0,
1087 /*Spaces=*/0);
1088}
1089
1091 unsigned LineIndex, WhitespaceManager &Whitespaces) const {
1092 // If this is the first line of a token, we need to inform Whitespace Manager
1093 // about it: either adapt the whitespace range preceding it, or mark it as an
1094 // untouchable token.
1095 // This happens for instance here:
1096 // // line 1 \
1097 // // line 2
1098 if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) {
1099 // This is the first line for the current token, but no reflow with the
1100 // previous token is necessary. However, we still may need to adjust the
1101 // start column. Note that ContentColumn[LineIndex] is the expected
1102 // content column after a possible update to the prefix, hence the prefix
1103 // length change is included.
1104 unsigned LineColumn =
1105 ContentColumn[LineIndex] -
1106 (Content[LineIndex].data() - Lines[LineIndex].data()) +
1107 (OriginalPrefix[LineIndex].size() - Prefix[LineIndex].size());
1108
1109 // We always want to create a replacement instead of adding an untouchable
1110 // token, even if LineColumn is the same as the original column of the
1111 // token. This is because WhitespaceManager doesn't align trailing
1112 // comments if they are untouchable.
1113 Whitespaces.replaceWhitespace(*Tokens[LineIndex],
1114 /*Newlines=*/1,
1115 /*Spaces=*/LineColumn,
1116 /*StartOfTokenColumn=*/LineColumn,
1117 /*IsAligned=*/true,
1118 /*InPPDirective=*/false);
1119 }
1120 if (OriginalPrefix[LineIndex] != Prefix[LineIndex]) {
1121 // Adjust the prefix if necessary.
1122 const auto SpacesToRemove = -std::min(PrefixSpaceChange[LineIndex], 0);
1123 const auto SpacesToAdd = std::max(PrefixSpaceChange[LineIndex], 0);
1124 Whitespaces.replaceWhitespaceInToken(
1125 tokenAt(LineIndex), OriginalPrefix[LineIndex].size() - SpacesToRemove,
1126 /*ReplaceChars=*/SpacesToRemove, "", "", /*InPPDirective=*/false,
1127 /*Newlines=*/0, /*Spaces=*/SpacesToAdd);
1128 }
1129}
1130
1132 if (LastLineTok)
1133 State.NextToken = LastLineTok->Next;
1134}
1135
1137 unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
1138 // Line comments have the indent as part of the prefix, so we need to
1139 // recompute the start of the line.
1140 StringRef IndentContent = Content[LineIndex];
1141 if (Lines[LineIndex].starts_with("//"))
1142 IndentContent = Lines[LineIndex].substr(2);
1143 // FIXME: Decide whether we want to reflow non-regular indents:
1144 // Currently, we only reflow when the OriginalPrefix[LineIndex] matches the
1145 // OriginalPrefix[LineIndex-1]. That means we don't reflow
1146 // // text that protrudes
1147 // // into text with different indent
1148 // We do reflow in that case in block comments.
1149 return LineIndex > 0 && AlwaysReflow &&
1150 !CommentPragmasRegex.match(IndentContent) &&
1151 mayReflowContent(Content[LineIndex]) && !Tok.Finalized &&
1152 !switchesFormatting(tokenAt(LineIndex)) &&
1153 OriginalPrefix[LineIndex] == OriginalPrefix[LineIndex - 1];
1154}
1155
1156} // namespace format
1157} // namespace clang
Declares BreakableToken, BreakableStringLiteral, BreakableComment, BreakableBlockComment and Breakabl...
This file implements an indenter that manages the indentation of continuations.
Various functions to configurably format source code.
Token - This structure provides full information about a lexed token.
Definition: Token.h:36
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
Definition: Token.h:102
void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, unsigned ContentIndent, WhitespaceManager &Whitespaces) const override
Emits the previously retrieved Split via Whitespaces.
unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override
Returns the column at which content in line LineIndex starts, assuming no reflow.
Split getReflowSplit(unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const override
Returns a whitespace range (offset, length) of the content at LineIndex such that the content of that...
bool mayReflow(unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const override
Split getSplitAfterLastLine(unsigned TailOffset) const override
Returns a whitespace range (offset, length) of the content at the last line that needs to be reformat...
BreakableBlockComment(const FormatToken &Token, unsigned StartColumn, unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style, bool UseCRLF)
static const llvm::StringSet ContentIndentingJavadocAnnotations
unsigned getRemainingLength(unsigned LineIndex, unsigned Offset, unsigned StartColumn) const override
Returns the number of columns required to format the text following the byte Offset in the line LineI...
unsigned getContentIndent(unsigned LineIndex) const override
Returns additional content indent required for the second line after the content at line LineIndex is...
void adaptStartOfLine(unsigned LineIndex, WhitespaceManager &Whitespaces) const override
Replaces the whitespace between LineIndex-1 and LineIndex.
void reflow(unsigned LineIndex, WhitespaceManager &Whitespaces) const override
Reflows the current line into the end of the previous one.
unsigned getRangeLength(unsigned LineIndex, unsigned Offset, StringRef::size_type Length, unsigned StartColumn) const override
Returns the number of columns required to format the text in the byte range [Offset,...
Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const override
Returns a range (offset, length) at which to break the line at LineIndex, if previously broken at Tai...
bool introducesBreakBeforeToken() const override
Returns whether there will be a line break at the start of the token.
unsigned getLineCount() const override
Returns the number of lines in this token in the original code.
SmallVector< StringRef, 16 > Lines
SmallVector< int, 16 > ContentColumn
Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const override
Returns a range (offset, length) at which to break the line at LineIndex, if previously broken at Tai...
SmallVector< FormatToken *, 16 > Tokens
SmallVector< StringRef, 16 > Content
void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces) const override
Replaces the whitespace range described by Split with a single space.
const FormatToken & tokenAt(unsigned LineIndex) const
BreakableComment(const FormatToken &Token, unsigned StartColumn, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
Creates a breakable token for a comment.
BreakableLineCommentSection(const FormatToken &Token, unsigned StartColumn, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override
Returns the column at which content in line LineIndex starts, assuming no reflow.
void reflow(unsigned LineIndex, WhitespaceManager &Whitespaces) const override
Reflows the current line into the end of the previous one.
Split getReflowSplit(unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const override
Returns a whitespace range (offset, length) of the content at LineIndex such that the content of that...
void updateNextToken(LineState &State) const override
Updates the next token of State to the next token after this one.
bool mayReflow(unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const override
void adaptStartOfLine(unsigned LineIndex, WhitespaceManager &Whitespaces) const override
Replaces the whitespace between LineIndex-1 and LineIndex.
void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, unsigned ContentIndent, WhitespaceManager &Whitespaces) const override
Emits the previously retrieved Split via Whitespaces.
unsigned getRangeLength(unsigned LineIndex, unsigned Offset, StringRef::size_type Length, unsigned StartColumn) const override
Returns the number of columns required to format the text in the byte range [Offset,...
BreakableStringLiteralUsingOperators(const FormatToken &Tok, QuoteStyleType QuoteStyle, bool UnindentPlus, unsigned StartColumn, unsigned UnbreakableTailLength, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
Creates a breakable token for a single line string literal for C#, Java, JavaScript,...
void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, unsigned ContentIndent, WhitespaceManager &Whitespaces) const override
Emits the previously retrieved Split via Whitespaces.
unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override
Returns the column at which content in line LineIndex starts, assuming no reflow.
void updateAfterBroken(WhitespaceManager &Whitespaces) const override
Adds replacements that are needed when the token is broken.
unsigned getRemainingLength(unsigned LineIndex, unsigned Offset, unsigned StartColumn) const override
Returns the number of columns required to format the text following the byte Offset in the line LineI...
Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const override
Returns a range (offset, length) at which to break the line at LineIndex, if previously broken at Tai...
unsigned getRangeLength(unsigned LineIndex, unsigned Offset, StringRef::size_type Length, unsigned StartColumn) const override
Returns the number of columns required to format the text in the byte range [Offset,...
unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override
Returns the column at which content in line LineIndex starts, assuming no reflow.
unsigned getLineCount() const override
Returns the number of lines in this token in the original code.
unsigned getRemainingLength(unsigned LineIndex, unsigned Offset, unsigned StartColumn) const override
Returns the number of columns required to format the text following the byte Offset in the line LineI...
BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn, StringRef Prefix, StringRef Postfix, unsigned UnbreakableTailLength, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
Creates a breakable token for a single line string literal.
void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, unsigned ContentIndent, WhitespaceManager &Whitespaces) const override
Emits the previously retrieved Split via Whitespaces.
Base class for tokens / ranges of tokens that can allow breaking within the tokens - for example,...
std::pair< StringRef::size_type, unsigned > Split
Contains starting character index and length of split.
unsigned getLengthAfterCompression(unsigned RemainingTokenColumns, Split Split) const
Returns the number of columns needed to format RemainingTokenColumns, assuming that Split is within t...
const encoding::Encoding Encoding
Manages the whitespaces around tokens and their replacements.
unsigned columnWidthWithTabs(StringRef Text, unsigned StartColumn, unsigned TabWidth, Encoding Encoding)
Returns the number of columns required to display the Text, starting from the StartColumn on a termin...
Definition: Encoding.h:60
unsigned getEscapeSequenceLength(StringRef Text)
Gets the length of an escape sequence inside a C++ string literal.
Definition: Encoding.h:96
unsigned getCodePointNumBytes(char FirstChar, Encoding Encoding)
Gets the number of bytes in a sequence representing a single codepoint and starting with FirstChar in...
Definition: Encoding.h:77
unsigned columnWidth(StringRef Text, Encoding Encoding)
Returns the number of columns required to display the Text on a generic Unicode-capable terminal.
Definition: Encoding.h:44
bool switchesFormatting(const FormatToken &Token)
Checks if Token switches formatting, like /* clang-format off *‍/.
static BreakableToken::Split getStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit, unsigned TabWidth, encoding::Encoding Encoding)
static StringRef getLineCommentIndentPrefix(StringRef Comment, const FormatStyle &Style)
static bool mayReflowContent(StringRef Content)
static constexpr StringRef Blanks(" \t\v\f\r")
static BreakableToken::Split getCommentSplit(StringRef Text, unsigned ContentStartColumn, unsigned ColumnLimit, unsigned TabWidth, encoding::Encoding Encoding, const FormatStyle &Style, bool DecorationEndsWithStar=false)
The JSON file list parser is used to communicate input to InstallAPI.
LLVM_READONLY bool isAlphanumeric(unsigned char c)
Return true if this character is an ASCII letter or digit: [a-zA-Z0-9].
Definition: CharInfo.h:138
LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: ' ', '\t', '\f', '\v'.
Definition: CharInfo.h:91
LLVM_READONLY bool isPunctuation(unsigned char c)
Return true if this character is an ASCII punctuation character.
Definition: CharInfo.h:152
#define false
Definition: stdbool.h:26
unsigned Maximum
The maximum number of spaces at the start of the comment.
Definition: Format.h:4934
unsigned Minimum
The minimum number of spaces at the start of the comment.
Definition: Format.h:4932
bool Other
Put a space in parentheses not covered by preceding options.
Definition: Format.h:5052
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:55
unsigned ContinuationIndentWidth
Indent width for line continuations.
Definition: Format.h:2550
bool Cpp11BracedListStyle
If true, format braced lists as best suited for C++11 braced lists.
Definition: Format.h:2574
BinaryOperatorStyle BreakBeforeBinaryOperators
The way to wrap binary operators.
Definition: Format.h:1781
@ BOS_None
Break after operators.
Definition: Format.h:1752
unsigned TabWidth
The number of columns used for tab stops.
Definition: Format.h:5222
OperandAlignmentStyle AlignOperands
If true, horizontally align operands of binary and ternary expressions.
Definition: Format.h:565
SpacesInParensCustom SpacesInParensOptions
Control of individual spaces in parentheses.
Definition: Format.h:5091
bool isTextProto() const
Definition: Format.h:3389
bool isVerilog() const
Definition: Format.h:3388
SpacesInLineComment SpacesInLineCommentPrefix
How many spaces are allowed at the start of a line comment.
Definition: Format.h:4969
bool isJavaScript() const
Definition: Format.h:3387
@ OAS_AlignAfterOperator
Horizontally align operands of binary and ternary expressions.
Definition: Format.h:559
unsigned ColumnLimit
The column limit.
Definition: Format.h:2451
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:300
StringRef TokenText
The raw text of the token.
Definition: FormatToken.h:320
unsigned Finalized
If true, this token has been fully formatted (indented and potentially re-formatted inside),...
Definition: FormatToken.h:379
FormatToken * Next
The next token in the unwrapped line.
Definition: FormatToken.h:572
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:618
The current state when indenting a unwrapped line.