proxy/index.php

//===--- BreakableToken.cpp - Format C++ code -----------------------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

///

/// \file

/// Contains implementation of BreakableToken class and classes derived

/// from it.

///

//===----------------------------------------------------------------------===//


#include "BreakableToken.h"

#include "ContinuationIndenter.h"

#include "clang/Basic/CharInfo.h"

#include "clang/Format/Format.h"

#include "llvm/ADT/STLExtras.h"

#include "llvm/Support/Debug.h"

#include <algorithm>


#define DEBUG_TYPE "format-token-breaker"


namespace clang {

namespace format {


static constexpr StringRef Blanks(" \t\v\f\r");


static StringRef getLineCommentIndentPrefix(StringRef Comment,

                                            const FormatStyle &Style) {

  static constexpr StringRef KnownCStylePrefixes[] = {"///<", "//!<", "///",

                                                      "//!",  "//:",  "//"};

  static constexpr StringRef KnownTextProtoPrefixes[] = {"####", "###", "##",

                                                         "//", "#"};

  ArrayRef<StringRef> KnownPrefixes(KnownCStylePrefixes);

  if (Style.isTextProto())

    KnownPrefixes = KnownTextProtoPrefixes;


  assert(

      llvm::is_sorted(KnownPrefixes, [](StringRef Lhs, StringRef Rhs) noexcept {

        return Lhs.size() > Rhs.size();

      }));


  for (StringRef KnownPrefix : KnownPrefixes) {

    if (Comment.starts_with(KnownPrefix)) {

      const auto PrefixLength =

          Comment.find_first_not_of(' ', KnownPrefix.size());

      return Comment.substr(0, PrefixLength);

    }

  }

  return {};

}


static BreakableToken::Split

getCommentSplit(StringRef Text, unsigned ContentStartColumn,

                unsigned ColumnLimit, unsigned TabWidth,

                encoding::Encoding Encoding, const FormatStyle &Style,

                bool DecorationEndsWithStar = false) {

  LLVM_DEBUG(llvm::dbgs() << "Comment split: \"" << Text

                          << "\", Column limit: " << ColumnLimit

                          << ", Content start: " << ContentStartColumn << "\n");

  if (ColumnLimit <= ContentStartColumn + 1)

    return BreakableToken::Split(StringRef::npos, 0);


  unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1;

  unsigned MaxSplitBytes = 0;


  for (unsigned NumChars = 0;

       NumChars < MaxSplit && MaxSplitBytes < Text.size();) {

    unsigned BytesInChar =

        encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding);

    NumChars += encoding::columnWidthWithTabs(

        Text.substr(MaxSplitBytes, BytesInChar), ContentStartColumn + NumChars,

        TabWidth, Encoding);

    MaxSplitBytes += BytesInChar;

  }


  // In JavaScript, some @tags can be followed by {, and machinery that parses

  // these comments will fail to understand the comment if followed by a line

  // break. So avoid ever breaking before a {.

  if (Style.isJavaScript()) {

    StringRef::size_type SpaceOffset =

        Text.find_first_of(Blanks, MaxSplitBytes);

    if (SpaceOffset != StringRef::npos && SpaceOffset + 1 < Text.size() &&

        Text[SpaceOffset + 1] == '{') {

      MaxSplitBytes = SpaceOffset + 1;

    }

  }


  StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes);


  static const auto kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\.");

  // Some spaces are unacceptable to break on, rewind past them.

  while (SpaceOffset != StringRef::npos) {

    // If a line-comment ends with `\`, the next line continues the comment,

    // whether or not it starts with `//`. This is confusing and triggers

    // -Wcomment.

    // Avoid introducing multiline comments by not allowing a break right

    // after '\'.

    if (Style.isCpp()) {

      StringRef::size_type LastNonBlank =

          Text.find_last_not_of(Blanks, SpaceOffset);

      if (LastNonBlank != StringRef::npos && Text[LastNonBlank] == '\\') {

        SpaceOffset = Text.find_last_of(Blanks, LastNonBlank);

        continue;

      }

    }


    // Do not split before a number followed by a dot: this would be interpreted

    // as a numbered list, which would prevent re-flowing in subsequent passes.

    if (kNumberedListRegexp.match(Text.substr(SpaceOffset).ltrim(Blanks))) {

      SpaceOffset = Text.find_last_of(Blanks, SpaceOffset);

      continue;

    }


    // Avoid ever breaking before a @tag or a { in JavaScript.

    if (Style.isJavaScript() && SpaceOffset + 1 < Text.size() &&

        (Text[SpaceOffset + 1] == '{' || Text[SpaceOffset + 1] == '@')) {

      SpaceOffset = Text.find_last_of(Blanks, SpaceOffset);

      continue;

    }


    break;

  }


  if (SpaceOffset == StringRef::npos ||

      // Don't break at leading whitespace.

      Text.find_last_not_of(Blanks, SpaceOffset) == StringRef::npos) {

    // Make sure that we don't break at leading whitespace that

    // reaches past MaxSplit.

    StringRef::size_type FirstNonWhitespace = Text.find_first_not_of(Blanks);

    if (FirstNonWhitespace == StringRef::npos) {

      // If the comment is only whitespace, we cannot split.

      return BreakableToken::Split(StringRef::npos, 0);

    }

    SpaceOffset = Text.find_first_of(

        Blanks, std::max<unsigned>(MaxSplitBytes, FirstNonWhitespace));

  }

  if (SpaceOffset != StringRef::npos && SpaceOffset != 0) {

    // adaptStartOfLine will break after lines starting with /** if the comment

    // is broken anywhere. Avoid emitting this break twice here.

    // Example: in /** longtextcomesherethatbreaks */ (with ColumnLimit 20) will

    // insert a break after /**, so this code must not insert the same break.

    if (SpaceOffset == 1 && Text[SpaceOffset - 1] == '*')

      return BreakableToken::Split(StringRef::npos, 0);

    StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim(Blanks);

    StringRef AfterCut = Text.substr(SpaceOffset);

    if (!DecorationEndsWithStar)

      AfterCut = AfterCut.ltrim(Blanks);

    return BreakableToken::Split(BeforeCut.size(),

                                 AfterCut.begin() - BeforeCut.end());

  }

  return BreakableToken::Split(StringRef::npos, 0);

}


static BreakableToken::Split

getStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit,

               unsigned TabWidth, encoding::Encoding Encoding) {

  // FIXME: Reduce unit test case.

  if (Text.empty())

    return BreakableToken::Split(StringRef::npos, 0);

  if (ColumnLimit <= UsedColumns)

    return BreakableToken::Split(StringRef::npos, 0);

  unsigned MaxSplit = ColumnLimit - UsedColumns;

  StringRef::size_type SpaceOffset = 0;

  StringRef::size_type SlashOffset = 0;

  StringRef::size_type WordStartOffset = 0;

  StringRef::size_type SplitPoint = 0;

  for (unsigned Chars = 0;;) {

    unsigned Advance;

    if (Text[0] == '\\') {

      Advance = encoding::getEscapeSequenceLength(Text);

      Chars += Advance;

    } else {

      Advance = encoding::getCodePointNumBytes(Text[0], Encoding);

      Chars += encoding::columnWidthWithTabs(

          Text.substr(0, Advance), UsedColumns + Chars, TabWidth, Encoding);

    }


    if (Chars > MaxSplit || Text.size() <= Advance)

      break;


    if (Blanks.contains(Text[0]))

      SpaceOffset = SplitPoint;

    if (Text[0] == '/')

      SlashOffset = SplitPoint;

    if (Advance == 1 && !isAlphanumeric(Text[0]))

      WordStartOffset = SplitPoint;


    SplitPoint += Advance;

    Text = Text.substr(Advance);

  }


  if (SpaceOffset != 0)

    return BreakableToken::Split(SpaceOffset + 1, 0);

  if (SlashOffset != 0)

    return BreakableToken::Split(SlashOffset + 1, 0);

  if (WordStartOffset != 0)

    return BreakableToken::Split(WordStartOffset + 1, 0);

  if (SplitPoint != 0)

    return BreakableToken::Split(SplitPoint, 0);

  return BreakableToken::Split(StringRef::npos, 0);

}


bool switchesFormatting(const FormatToken &Token) {

  assert((Token.is(TT_BlockComment) || Token.is(TT_LineComment)) &&

         "formatting regions are switched by comment tokens");

  StringRef Content = Token.TokenText.substr(2).ltrim();

  return Content.starts_with("clang-format on") ||

         Content.starts_with("clang-format off");

}


unsigned

BreakableToken::getLengthAfterCompression(unsigned RemainingTokenColumns,

                                          Split Split) const {

  // Example: consider the content

  // lala  lala

  // - RemainingTokenColumns is the original number of columns, 10;

  // - Split is (4, 2), denoting the two spaces between the two words;

  //

  // We compute the number of columns when the split is compressed into a single

  // space, like:

  // lala lala

  //

  // FIXME: Correctly measure the length of whitespace in Split.second so it

  // works with tabs.

  return RemainingTokenColumns + 1 - Split.second;

}


unsigned BreakableStringLiteral::getLineCount() const { return 1; }


unsigned BreakableStringLiteral::getRangeLength(unsigned LineIndex,

                                                unsigned Offset,

                                                StringRef::size_type Length,

                                                unsigned StartColumn) const {

  llvm_unreachable("Getting the length of a part of the string literal "

                   "indicates that the code tries to reflow it.");

}


unsigned

BreakableStringLiteral::getRemainingLength(unsigned LineIndex, unsigned Offset,

                                           unsigned StartColumn) const {

  return UnbreakableTailLength + Postfix.size() +

         encoding::columnWidthWithTabs(Line.substr(Offset), StartColumn,

                                       Style.TabWidth, Encoding);

}


unsigned BreakableStringLiteral::getContentStartColumn(unsigned LineIndex,

                                                       bool Break) const {

  return StartColumn + Prefix.size();

}


BreakableStringLiteral::BreakableStringLiteral(

    const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,

    StringRef Postfix, unsigned UnbreakableTailLength, bool InPPDirective,

    encoding::Encoding Encoding, const FormatStyle &Style)

    : BreakableToken(Tok, InPPDirective, Encoding, Style),

      StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix),

      UnbreakableTailLength(UnbreakableTailLength) {

  assert(Tok.TokenText.starts_with(Prefix) && Tok.TokenText.ends_with(Postfix));

  Line = Tok.TokenText.substr(

      Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());

}


BreakableToken::Split BreakableStringLiteral::getSplit(

    unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,

    unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const {

  return getStringSplit(Line.substr(TailOffset), ContentStartColumn,

                        ColumnLimit - Postfix.size(), Style.TabWidth, Encoding);

}


void BreakableStringLiteral::insertBreak(unsigned LineIndex,

                                         unsigned TailOffset, Split Split,

                                         unsigned ContentIndent,

                                         WhitespaceManager &Whitespaces) const {

  Whitespaces.replaceWhitespaceInToken(

      Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix,

      Prefix, InPPDirective, 1, StartColumn);

}


BreakableStringLiteralUsingOperators::BreakableStringLiteralUsingOperators(

    const FormatToken &Tok, QuoteStyleType QuoteStyle, bool UnindentPlus,

    unsigned StartColumn, unsigned UnbreakableTailLength, bool InPPDirective,

    encoding::Encoding Encoding, const FormatStyle &Style)

    : BreakableStringLiteral(

          Tok, StartColumn, /*Prefix=*/QuoteStyle == SingleQuotes ? "'"

                            : QuoteStyle == AtDoubleQuotes        ? "@\""

                                                                  : "\"",

          /*Postfix=*/QuoteStyle == SingleQuotes ? "'" : "\"",

          UnbreakableTailLength, InPPDirective, Encoding, Style),

      BracesNeeded(Tok.isNot(TT_StringInConcatenation)),

      QuoteStyle(QuoteStyle) {

  // Find the replacement text for inserting braces and quotes and line breaks.

  // We don't create an allocated string concatenated from parts here because it

  // has to outlive the BreakableStringliteral object.  The brace replacements

  // include a quote so that WhitespaceManager can tell it apart from whitespace

  // replacements between the string and surrounding tokens.


  // The option is not implemented in JavaScript.

  bool SignOnNewLine =

      !Style.isJavaScript() &&

      Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None;


  if (Style.isVerilog()) {

    // In Verilog, all strings are quoted by double quotes, joined by commas,

    // and wrapped in braces.  The comma is always before the newline.

    assert(QuoteStyle == DoubleQuotes);

    LeftBraceQuote = Style.Cpp11BracedListStyle ? "{\"" : "{ \"";

    RightBraceQuote = Style.Cpp11BracedListStyle ? "\"}" : "\" }";

    Postfix = "\",";

    Prefix = "\"";

  } else {

    // The plus sign may be on either line.  And also C# and JavaScript have

    // several quoting styles.

    if (QuoteStyle == SingleQuotes) {

      LeftBraceQuote = Style.SpacesInParensOptions.Other ? "( '" : "('";

      RightBraceQuote = Style.SpacesInParensOptions.Other ? "' )" : "')";

      Postfix = SignOnNewLine ? "'" : "' +";

      Prefix = SignOnNewLine ? "+ '" : "'";

    } else {

      if (QuoteStyle == AtDoubleQuotes) {

        LeftBraceQuote = Style.SpacesInParensOptions.Other ? "( @" : "(@";

        Prefix = SignOnNewLine ? "+ @\"" : "@\"";

      } else {

        LeftBraceQuote = Style.SpacesInParensOptions.Other ? "( \"" : "(\"";

        Prefix = SignOnNewLine ? "+ \"" : "\"";

      }

      RightBraceQuote = Style.SpacesInParensOptions.Other ? "\" )" : "\")";

      Postfix = SignOnNewLine ? "\"" : "\" +";

    }

  }


  // Following lines are indented by the width of the brace and space if any.

  ContinuationIndent = BracesNeeded ? LeftBraceQuote.size() - 1 : 0;

  // The plus sign may need to be unindented depending on the style.

  // FIXME: Add support for DontAlign.

  if (!Style.isVerilog() && SignOnNewLine && !BracesNeeded && UnindentPlus &&

      Style.AlignOperands == FormatStyle::OAS_AlignAfterOperator) {

    ContinuationIndent -= 2;

  }

}


unsigned BreakableStringLiteralUsingOperators::getRemainingLength(

    unsigned LineIndex, unsigned Offset, unsigned StartColumn) const {

  return UnbreakableTailLength + (BracesNeeded ? RightBraceQuote.size() : 1) +

         encoding::columnWidthWithTabs(Line.substr(Offset), StartColumn,

                                       Style.TabWidth, Encoding);

}


unsigned

BreakableStringLiteralUsingOperators::getContentStartColumn(unsigned LineIndex,

                                                            bool Break) const {

  return std::max(

      0,

      static_cast<int>(StartColumn) +

          (Break ? ContinuationIndent + static_cast<int>(Prefix.size())

                 : (BracesNeeded ? static_cast<int>(LeftBraceQuote.size()) - 1

                                 : 0) +

                       (QuoteStyle == AtDoubleQuotes ? 2 : 1)));

}


void BreakableStringLiteralUsingOperators::insertBreak(

    unsigned LineIndex, unsigned TailOffset, Split Split,

    unsigned ContentIndent, WhitespaceManager &Whitespaces) const {

  Whitespaces.replaceWhitespaceInToken(

      Tok, /*Offset=*/(QuoteStyle == AtDoubleQuotes ? 2 : 1) + TailOffset +

               Split.first,

      /*ReplaceChars=*/Split.second, /*PreviousPostfix=*/Postfix,

      /*CurrentPrefix=*/Prefix, InPPDirective, /*NewLines=*/1,

      /*Spaces=*/

      std::max(0, static_cast<int>(StartColumn) + ContinuationIndent));

}


void BreakableStringLiteralUsingOperators::updateAfterBroken(

    WhitespaceManager &Whitespaces) const {

  // Add the braces required for breaking the token if they are needed.

  if (!BracesNeeded)

    return;


  // To add a brace or parenthesis, we replace the quote (or the at sign) with a

  // brace and another quote.  This is because the rest of the program requires

  // one replacement for each source range.  If we replace the empty strings

  // around the string, it may conflict with whitespace replacements between the

  // string and adjacent tokens.

  Whitespaces.replaceWhitespaceInToken(

      Tok, /*Offset=*/0, /*ReplaceChars=*/1, /*PreviousPostfix=*/"",

      /*CurrentPrefix=*/LeftBraceQuote, InPPDirective, /*NewLines=*/0,

      /*Spaces=*/0);

  Whitespaces.replaceWhitespaceInToken(

      Tok, /*Offset=*/Tok.TokenText.size() - 1, /*ReplaceChars=*/1,

      /*PreviousPostfix=*/RightBraceQuote,

      /*CurrentPrefix=*/"", InPPDirective, /*NewLines=*/0, /*Spaces=*/0);

}


BreakableComment::BreakableComment(const FormatToken &Token,

                                   unsigned StartColumn, bool InPPDirective,

                                   encoding::Encoding Encoding,

                                   const FormatStyle &Style)

    : BreakableToken(Token, InPPDirective, Encoding, Style),

      StartColumn(StartColumn) {}


unsigned BreakableComment::getLineCount() const { return Lines.size(); }


BreakableToken::Split

BreakableComment::getSplit(unsigned LineIndex, unsigned TailOffset,

                           unsigned ColumnLimit, unsigned ContentStartColumn,

                           const llvm::Regex &CommentPragmasRegex) const {

  // Don't break lines matching the comment pragmas regex.

  if (!AlwaysReflow || CommentPragmasRegex.match(Content[LineIndex]))

    return Split(StringRef::npos, 0);

  return getCommentSplit(Content[LineIndex].substr(TailOffset),

                         ContentStartColumn, ColumnLimit, Style.TabWidth,

                         Encoding, Style);

}


void BreakableComment::compressWhitespace(

    unsigned LineIndex, unsigned TailOffset, Split Split,

    WhitespaceManager &Whitespaces) const {

  StringRef Text = Content[LineIndex].substr(TailOffset);

  // Text is relative to the content line, but Whitespaces operates relative to

  // the start of the corresponding token, so compute the start of the Split

  // that needs to be compressed into a single space relative to the start of

  // its token.

  unsigned BreakOffsetInToken =

      Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;

  unsigned CharsToRemove = Split.second;

  Whitespaces.replaceWhitespaceInToken(

      tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", "",

      /*InPPDirective=*/false, /*Newlines=*/0, /*Spaces=*/1);

}


const FormatToken &BreakableComment::tokenAt(unsigned LineIndex) const {

  return Tokens[LineIndex] ? *Tokens[LineIndex] : Tok;

}


static bool mayReflowContent(StringRef Content) {

  Content = Content.trim(Blanks);

  // Lines starting with '@' or '\' commonly have special meaning.

  // Lines starting with '-', '-#', '+' or '*' are bulleted/numbered lists.

  bool hasSpecialMeaningPrefix = false;

  for (StringRef Prefix :

       {"@", "\\", "TODO", "FIXME", "XXX", "-# ", "- ", "+ ", "* "}) {

    if (Content.starts_with(Prefix)) {

      hasSpecialMeaningPrefix = true;

      break;

    }

  }


  // Numbered lists may also start with a number followed by '.'

  // To avoid issues if a line starts with a number which is actually the end

  // of a previous line, we only consider numbers with up to 2 digits.

  static const auto kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\. ");

  hasSpecialMeaningPrefix =

      hasSpecialMeaningPrefix || kNumberedListRegexp.match(Content);


  // Simple heuristic for what to reflow: content should contain at least two

  // characters and either the first or second character must be

  // non-punctuation.

  return Content.size() >= 2 && !hasSpecialMeaningPrefix &&

         !Content.ends_with("\\") &&

         // Note that this is UTF-8 safe, since if isPunctuation(Content[0]) is

         // true, then the first code point must be 1 byte long.

         (!isPunctuation(Content[0]) || !isPunctuation(Content[1]));

}


BreakableBlockComment::BreakableBlockComment(

    const FormatToken &Token, unsigned StartColumn,

    unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,

    encoding::Encoding Encoding, const FormatStyle &Style, bool UseCRLF)

    : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style),

      DelimitersOnNewline(false),

      UnbreakableTailLength(Token.UnbreakableTailLength) {

  assert(Tok.is(TT_BlockComment) &&

         "block comment section must start with a block comment");


  StringRef TokenText(Tok.TokenText);

  assert(TokenText.starts_with("/*") && TokenText.ends_with("*/"));

  TokenText.substr(2, TokenText.size() - 4)

      .split(Lines, UseCRLF ? "\r\n" : "\n");


  int IndentDelta = StartColumn - OriginalStartColumn;

  Content.resize(Lines.size());

  Content[0] = Lines[0];

  ContentColumn.resize(Lines.size());

  // Account for the initial '/*'.

  ContentColumn[0] = StartColumn + 2;

  Tokens.resize(Lines.size());

  for (size_t i = 1; i < Lines.size(); ++i)

    adjustWhitespace(i, IndentDelta);


  // Align decorations with the column of the star on the first line,

  // that is one column after the start "/*".

  DecorationColumn = StartColumn + 1;


  // Account for comment decoration patterns like this:

  //

  // /*

  // ** blah blah blah

  // */

  if (Lines.size() >= 2 && Content[1].starts_with("**") &&

      static_cast<unsigned>(ContentColumn[1]) == StartColumn) {

    DecorationColumn = StartColumn;

  }


  Decoration = "* ";

  if (Lines.size() == 1 && !FirstInLine) {

    // Comments for which FirstInLine is false can start on arbitrary column,

    // and available horizontal space can be too small to align consecutive

    // lines with the first one.

    // FIXME: We could, probably, align them to current indentation level, but

    // now we just wrap them without stars.

    Decoration = "";

  }

  for (size_t i = 1, e = Content.size(); i < e && !Decoration.empty(); ++i) {

    const StringRef Text(Content[i]);

    if (i + 1 == e) {

      // If the last line is empty, the closing "*/" will have a star.

      if (Text.empty())

        break;

    } else if (!Text.empty() && Decoration.starts_with(Text)) {

      continue;

    }

    while (!Text.starts_with(Decoration))

      Decoration = Decoration.drop_back(1);

  }


  LastLineNeedsDecoration = true;

  IndentAtLineBreak = ContentColumn[0] + 1;

  for (size_t i = 1, e = Lines.size(); i < e; ++i) {

    if (Content[i].empty()) {

      if (i + 1 == e) {

        // Empty last line means that we already have a star as a part of the

        // trailing */. We also need to preserve whitespace, so that */ is

        // correctly indented.

        LastLineNeedsDecoration = false;

        // Align the star in the last '*/' with the stars on the previous lines.

        if (e >= 2 && !Decoration.empty())

          ContentColumn[i] = DecorationColumn;

      } else if (Decoration.empty()) {

        // For all other lines, set the start column to 0 if they're empty, so

        // we do not insert trailing whitespace anywhere.

        ContentColumn[i] = 0;

      }

      continue;

    }


    // The first line already excludes the star.

    // The last line excludes the star if LastLineNeedsDecoration is false.

    // For all other lines, adjust the line to exclude the star and

    // (optionally) the first whitespace.

    unsigned DecorationSize = Decoration.starts_with(Content[i])

                                  ? Content[i].size()

                                  : Decoration.size();

    if (DecorationSize)

      ContentColumn[i] = DecorationColumn + DecorationSize;

    Content[i] = Content[i].substr(DecorationSize);

    if (!Decoration.starts_with(Content[i])) {

      IndentAtLineBreak =

          std::min<int>(IndentAtLineBreak, std::max(0, ContentColumn[i]));

    }

  }

  IndentAtLineBreak = std::max<unsigned>(IndentAtLineBreak, Decoration.size());


  // Detect a multiline jsdoc comment and set DelimitersOnNewline in that case.

  if (Style.isJavaScript() || Style.isJava()) {

    if ((Lines[0] == "*" || Lines[0].starts_with("* ")) && Lines.size() > 1) {

      // This is a multiline jsdoc comment.

      DelimitersOnNewline = true;

    } else if (Lines[0].starts_with("* ") && Lines.size() == 1) {

      // Detect a long single-line comment, like:

      // /** long long long */

      // Below, '2' is the width of '*/'.

      unsigned EndColumn =

          ContentColumn[0] +

          encoding::columnWidthWithTabs(Lines[0], ContentColumn[0],

                                        Style.TabWidth, Encoding) +

          2;

      DelimitersOnNewline = EndColumn > Style.ColumnLimit;

    }

  }


  LLVM_DEBUG({

    llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n";

    llvm::dbgs() << "DelimitersOnNewline " << DelimitersOnNewline << "\n";

    for (size_t i = 0; i < Lines.size(); ++i) {

      llvm::dbgs() << i << " |" << Content[i] << "| "

                   << "CC=" << ContentColumn[i] << "| "

                   << "IN=" << (Content[i].data() - Lines[i].data()) << "\n";

    }

  });

}


BreakableToken::Split BreakableBlockComment::getSplit(

    unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,

    unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const {

  // Don't break lines matching the comment pragmas regex.

  if (!AlwaysReflow || CommentPragmasRegex.match(Content[LineIndex]))

    return Split(StringRef::npos, 0);

  return getCommentSplit(Content[LineIndex].substr(TailOffset),

                         ContentStartColumn, ColumnLimit, Style.TabWidth,

                         Encoding, Style, Decoration.ends_with("*"));

}


void BreakableBlockComment::adjustWhitespace(unsigned LineIndex,

                                             int IndentDelta) {

  // When in a preprocessor directive, the trailing backslash in a block comment

  // is not needed, but can serve a purpose of uniformity with necessary escaped

  // newlines outside the comment. In this case we remove it here before

  // trimming the trailing whitespace. The backslash will be re-added later when

  // inserting a line break.

  size_t EndOfPreviousLine = Lines[LineIndex - 1].size();

  if (InPPDirective && Lines[LineIndex - 1].ends_with("\\"))

    --EndOfPreviousLine;


  // Calculate the end of the non-whitespace text in the previous line.

  EndOfPreviousLine =

      Lines[LineIndex - 1].find_last_not_of(Blanks, EndOfPreviousLine);

  if (EndOfPreviousLine == StringRef::npos)

    EndOfPreviousLine = 0;

  else

    ++EndOfPreviousLine;

  // Calculate the start of the non-whitespace text in the current line.

  size_t StartOfLine = Lines[LineIndex].find_first_not_of(Blanks);

  if (StartOfLine == StringRef::npos)

    StartOfLine = Lines[LineIndex].size();


  StringRef Whitespace = Lines[LineIndex].substr(0, StartOfLine);

  // Adjust Lines to only contain relevant text.

  size_t PreviousContentOffset =

      Content[LineIndex - 1].data() - Lines[LineIndex - 1].data();

  Content[LineIndex - 1] = Lines[LineIndex - 1].substr(

      PreviousContentOffset, EndOfPreviousLine - PreviousContentOffset);

  Content[LineIndex] = Lines[LineIndex].substr(StartOfLine);


  // Adjust the start column uniformly across all lines.

  ContentColumn[LineIndex] =

      encoding::columnWidthWithTabs(Whitespace, 0, Style.TabWidth, Encoding) +

      IndentDelta;

}


unsigned BreakableBlockComment::getRangeLength(unsigned LineIndex,

                                               unsigned Offset,

                                               StringRef::size_type Length,

                                               unsigned StartColumn) const {

  return encoding::columnWidthWithTabs(

      Content[LineIndex].substr(Offset, Length), StartColumn, Style.TabWidth,

      Encoding);

}


unsigned BreakableBlockComment::getRemainingLength(unsigned LineIndex,

                                                   unsigned Offset,

                                                   unsigned StartColumn) const {

  unsigned LineLength =

      UnbreakableTailLength +

      getRangeLength(LineIndex, Offset, StringRef::npos, StartColumn);

  if (LineIndex + 1 == Lines.size()) {

    LineLength += 2;

    // We never need a decoration when breaking just the trailing "*/" postfix.

    bool HasRemainingText = Offset < Content[LineIndex].size();

    if (!HasRemainingText) {

      bool HasDecoration = Lines[LineIndex].ltrim().starts_with(Decoration);

      if (HasDecoration)

        LineLength -= Decoration.size();

    }

  }

  return LineLength;

}


unsigned BreakableBlockComment::getContentStartColumn(unsigned LineIndex,

                                                      bool Break) const {

  if (Break)

    return IndentAtLineBreak;

  return std::max(0, ContentColumn[LineIndex]);

}


const llvm::StringSet<>

    BreakableBlockComment::ContentIndentingJavadocAnnotations = {

        "@param", "@return",     "@returns", "@throws",  "@type", "@template",

        "@see",   "@deprecated", "@define",  "@exports", "@mods", "@private",

};


unsigned BreakableBlockComment::getContentIndent(unsigned LineIndex) const {

  if (!Style.isJava() && !Style.isJavaScript())

    return 0;

  // The content at LineIndex 0 of a comment like:

  // /** line 0 */

  // is "* line 0", so we need to skip over the decoration in that case.

  StringRef ContentWithNoDecoration = Content[LineIndex];

  if (LineIndex == 0 && ContentWithNoDecoration.starts_with("*"))

    ContentWithNoDecoration = ContentWithNoDecoration.substr(1).ltrim(Blanks);

  StringRef FirstWord = ContentWithNoDecoration.substr(

      0, ContentWithNoDecoration.find_first_of(Blanks));

  if (ContentIndentingJavadocAnnotations.contains(FirstWord))

    return Style.ContinuationIndentWidth;

  return 0;

}


void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,

                                        Split Split, unsigned ContentIndent,

                                        WhitespaceManager &Whitespaces) const {

  StringRef Text = Content[LineIndex].substr(TailOffset);

  StringRef Prefix = Decoration;

  // We need this to account for the case when we have a decoration "* " for all

  // the lines except for the last one, where the star in "*/" acts as a

  // decoration.

  unsigned LocalIndentAtLineBreak = IndentAtLineBreak;

  if (LineIndex + 1 == Lines.size() &&

      Text.size() == Split.first + Split.second) {

    // For the last line we need to break before "*/", but not to add "* ".

    Prefix = "";

    if (LocalIndentAtLineBreak >= 2)

      LocalIndentAtLineBreak -= 2;

  }

  // The split offset is from the beginning of the line. Convert it to an offset

  // from the beginning of the token text.

  unsigned BreakOffsetInToken =

      Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;

  unsigned CharsToRemove = Split.second;

  assert(LocalIndentAtLineBreak >= Prefix.size());

  std::string PrefixWithTrailingIndent = std::string(Prefix);

  PrefixWithTrailingIndent.append(ContentIndent, ' ');

  Whitespaces.replaceWhitespaceInToken(

      tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "",

      PrefixWithTrailingIndent, InPPDirective, /*Newlines=*/1,

      /*Spaces=*/LocalIndentAtLineBreak + ContentIndent -

          PrefixWithTrailingIndent.size());

}


BreakableToken::Split BreakableBlockComment::getReflowSplit(

    unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {

  if (!mayReflow(LineIndex, CommentPragmasRegex))

    return Split(StringRef::npos, 0);


  // If we're reflowing into a line with content indent, only reflow the next

  // line if its starting whitespace matches the content indent.

  size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks);

  if (LineIndex) {

    unsigned PreviousContentIndent = getContentIndent(LineIndex - 1);

    if (PreviousContentIndent && Trimmed != StringRef::npos &&

        Trimmed != PreviousContentIndent) {

      return Split(StringRef::npos, 0);

    }

  }


  return Split(0, Trimmed != StringRef::npos ? Trimmed : 0);

}


bool BreakableBlockComment::introducesBreakBeforeToken() const {

  // A break is introduced when we want delimiters on newline.

  return DelimitersOnNewline &&

         Lines[0].substr(1).find_first_not_of(Blanks) != StringRef::npos;

}


void BreakableBlockComment::reflow(unsigned LineIndex,

                                   WhitespaceManager &Whitespaces) const {

  StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);

  // Here we need to reflow.

  assert(Tokens[LineIndex - 1] == Tokens[LineIndex] &&

         "Reflowing whitespace within a token");

  // This is the offset of the end of the last line relative to the start of

  // the token text in the token.

  unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +

                                     Content[LineIndex - 1].size() -

                                     tokenAt(LineIndex).TokenText.data();

  unsigned WhitespaceLength = TrimmedContent.data() -

                              tokenAt(LineIndex).TokenText.data() -

                              WhitespaceOffsetInToken;

  Whitespaces.replaceWhitespaceInToken(

      tokenAt(LineIndex), WhitespaceOffsetInToken,

      /*ReplaceChars=*/WhitespaceLength, /*PreviousPostfix=*/"",

      /*CurrentPrefix=*/ReflowPrefix, InPPDirective, /*Newlines=*/0,

      /*Spaces=*/0);

}


void BreakableBlockComment::adaptStartOfLine(

    unsigned LineIndex, WhitespaceManager &Whitespaces) const {

  if (LineIndex == 0) {

    if (DelimitersOnNewline) {

      // Since we're breaking at index 1 below, the break position and the

      // break length are the same.

      // Note: this works because getCommentSplit is careful never to split at

      // the beginning of a line.

      size_t BreakLength = Lines[0].substr(1).find_first_not_of(Blanks);

      if (BreakLength != StringRef::npos) {

        insertBreak(LineIndex, 0, Split(1, BreakLength), /*ContentIndent=*/0,

                    Whitespaces);

      }

    }

    return;

  }

  // Here no reflow with the previous line will happen.

  // Fix the decoration of the line at LineIndex.

  StringRef Prefix = Decoration;

  if (Content[LineIndex].empty()) {

    if (LineIndex + 1 == Lines.size()) {

      if (!LastLineNeedsDecoration) {

        // If the last line was empty, we don't need a prefix, as the */ will

        // line up with the decoration (if it exists).

        Prefix = "";

      }

    } else if (!Decoration.empty()) {

      // For other empty lines, if we do have a decoration, adapt it to not

      // contain a trailing whitespace.

      Prefix = Prefix.substr(0, 1);

    }

  } else if (ContentColumn[LineIndex] == 1) {

    // This line starts immediately after the decorating *.

    Prefix = Prefix.substr(0, 1);

  }

  // This is the offset of the end of the last line relative to the start of the

  // token text in the token.

  unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +

                                     Content[LineIndex - 1].size() -

                                     tokenAt(LineIndex).TokenText.data();

  unsigned WhitespaceLength = Content[LineIndex].data() -

                              tokenAt(LineIndex).TokenText.data() -

                              WhitespaceOffsetInToken;

  Whitespaces.replaceWhitespaceInToken(

      tokenAt(LineIndex), WhitespaceOffsetInToken, WhitespaceLength, "", Prefix,

      InPPDirective, /*Newlines=*/1, ContentColumn[LineIndex] - Prefix.size());

}


BreakableToken::Split

BreakableBlockComment::getSplitAfterLastLine(unsigned TailOffset) const {

  if (DelimitersOnNewline) {

    // Replace the trailing whitespace of the last line with a newline.

    // In case the last line is empty, the ending '*/' is already on its own

    // line.

    StringRef Line = Content.back().substr(TailOffset);

    StringRef TrimmedLine = Line.rtrim(Blanks);

    if (!TrimmedLine.empty())

      return Split(TrimmedLine.size(), Line.size() - TrimmedLine.size());

  }

  return Split(StringRef::npos, 0);

}


bool BreakableBlockComment::mayReflow(

    unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {

  // Content[LineIndex] may exclude the indent after the '*' decoration. In that

  // case, we compute the start of the comment pragma manually.

  StringRef IndentContent = Content[LineIndex];

  if (Lines[LineIndex].ltrim(Blanks).starts_with("*"))

    IndentContent = Lines[LineIndex].ltrim(Blanks).substr(1);

  return LineIndex > 0 && AlwaysReflow &&

         !CommentPragmasRegex.match(IndentContent) &&

         mayReflowContent(Content[LineIndex]) && !Tok.Finalized &&

         !switchesFormatting(tokenAt(LineIndex));

}


BreakableLineCommentSection::BreakableLineCommentSection(

    const FormatToken &Token, unsigned StartColumn, bool InPPDirective,

    encoding::Encoding Encoding, const FormatStyle &Style)

    : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style) {

  assert(Tok.is(TT_LineComment) &&

         "line comment section must start with a line comment");

  FormatToken *LineTok = nullptr;

  const int Minimum = Style.SpacesInLineCommentPrefix.Minimum;

  // How many spaces we changed in the first line of the section, this will be

  // applied in all following lines

  int FirstLineSpaceChange = 0;

  for (const FormatToken *CurrentTok = &Tok;

       CurrentTok && CurrentTok->is(TT_LineComment);

       CurrentTok = CurrentTok->Next) {

    LastLineTok = LineTok;

    StringRef TokenText(CurrentTok->TokenText);

    assert((TokenText.starts_with("//") || TokenText.starts_with("#")) &&

           "unsupported line comment prefix, '//' and '#' are supported");

    size_t FirstLineIndex = Lines.size();

    TokenText.split(Lines, "\n");

    Content.resize(Lines.size());

    ContentColumn.resize(Lines.size());

    PrefixSpaceChange.resize(Lines.size());

    Tokens.resize(Lines.size());

    Prefix.resize(Lines.size());

    OriginalPrefix.resize(Lines.size());

    for (size_t i = FirstLineIndex, e = Lines.size(); i < e; ++i) {

      Lines[i] = Lines[i].ltrim(Blanks);

      StringRef IndentPrefix = getLineCommentIndentPrefix(Lines[i], Style);

      OriginalPrefix[i] = IndentPrefix;

      const int SpacesInPrefix = llvm::count(IndentPrefix, ' ');


      // This lambda also considers multibyte character that is not handled in

      // functions like isPunctuation provided by CharInfo.

      const auto NoSpaceBeforeFirstCommentChar = [&]() {

        assert(Lines[i].size() > IndentPrefix.size());

        const char FirstCommentChar = Lines[i][IndentPrefix.size()];

        const unsigned FirstCharByteSize =

            encoding::getCodePointNumBytes(FirstCommentChar, Encoding);

        if (encoding::columnWidth(

                Lines[i].substr(IndentPrefix.size(), FirstCharByteSize),

                Encoding) != 1) {

          return false;

        }

        // In C-like comments, add a space before #. For example this is useful

        // to preserve the relative indentation when commenting out code with

        // #includes.

        //

        // In languages using # as the comment leader such as proto, don't

        // add a space to support patterns like:

        // #########

        // # section

        // #########

        if (FirstCommentChar == '#' && !TokenText.starts_with("#"))

          return false;

        return FirstCommentChar == '\\' || isPunctuation(FirstCommentChar) ||

               isHorizontalWhitespace(FirstCommentChar);

      };


      // On the first line of the comment section we calculate how many spaces

      // are to be added or removed, all lines after that just get only the

      // change and we will not look at the maximum anymore. Additionally to the

      // actual first line, we calculate that when the non space Prefix changes,

      // e.g. from "///" to "//".

      if (i == 0 || OriginalPrefix[i].rtrim(Blanks) !=

                        OriginalPrefix[i - 1].rtrim(Blanks)) {

        if (SpacesInPrefix < Minimum && Lines[i].size() > IndentPrefix.size() &&

            !NoSpaceBeforeFirstCommentChar()) {

          FirstLineSpaceChange = Minimum - SpacesInPrefix;

        } else if (static_cast<unsigned>(SpacesInPrefix) >

                   Style.SpacesInLineCommentPrefix.Maximum) {

          FirstLineSpaceChange =

              Style.SpacesInLineCommentPrefix.Maximum - SpacesInPrefix;

        } else {

          FirstLineSpaceChange = 0;

        }

      }


      if (Lines[i].size() != IndentPrefix.size()) {

        assert(Lines[i].size() > IndentPrefix.size());


        PrefixSpaceChange[i] = SpacesInPrefix + FirstLineSpaceChange < Minimum

                                   ? Minimum - SpacesInPrefix

                                   : FirstLineSpaceChange;


        const auto FirstNonSpace = Lines[i][IndentPrefix.size()];

        const bool IsFormatComment = LineTok && switchesFormatting(*LineTok);

        const bool LineRequiresLeadingSpace =

            !NoSpaceBeforeFirstCommentChar() ||

            (FirstNonSpace == '}' && FirstLineSpaceChange != 0);

        const bool AllowsSpaceChange =

            !IsFormatComment &&

            (SpacesInPrefix != 0 || LineRequiresLeadingSpace);


        if (PrefixSpaceChange[i] > 0 && AllowsSpaceChange) {

          Prefix[i] = IndentPrefix.str();

          Prefix[i].append(PrefixSpaceChange[i], ' ');

        } else if (PrefixSpaceChange[i] < 0 && AllowsSpaceChange) {

          Prefix[i] = IndentPrefix

                          .drop_back(std::min<std::size_t>(

                              -PrefixSpaceChange[i], SpacesInPrefix))

                          .str();

        } else {

          Prefix[i] = IndentPrefix.str();

        }

      } else {

        // If the IndentPrefix is the whole line, there is no content and we

        // drop just all space

        Prefix[i] = IndentPrefix.drop_back(SpacesInPrefix).str();

      }


      Tokens[i] = LineTok;

      Content[i] = Lines[i].substr(IndentPrefix.size());

      ContentColumn[i] =

          StartColumn + encoding::columnWidthWithTabs(Prefix[i], StartColumn,

                                                      Style.TabWidth, Encoding);


      // Calculate the end of the non-whitespace text in this line.

      size_t EndOfLine = Content[i].find_last_not_of(Blanks);

      if (EndOfLine == StringRef::npos)

        EndOfLine = Content[i].size();

      else

        ++EndOfLine;

      Content[i] = Content[i].substr(0, EndOfLine);

    }

    LineTok = CurrentTok->Next;

    if (CurrentTok->Next && !CurrentTok->Next->ContinuesLineCommentSection) {

      // A line comment section needs to broken by a line comment that is

      // preceded by at least two newlines. Note that we put this break here

      // instead of breaking at a previous stage during parsing, since that

      // would split the contents of the enum into two unwrapped lines in this

      // example, which is undesirable:

      // enum A {

      //   a, // comment about a

      //

      //   // comment about b

      //   b

      // };

      //

      // FIXME: Consider putting separate line comment sections as children to

      // the unwrapped line instead.

      break;

    }

  }

}


unsigned

BreakableLineCommentSection::getRangeLength(unsigned LineIndex, unsigned Offset,

                                            StringRef::size_type Length,

                                            unsigned StartColumn) const {

  return encoding::columnWidthWithTabs(

      Content[LineIndex].substr(Offset, Length), StartColumn, Style.TabWidth,

      Encoding);

}


unsigned

BreakableLineCommentSection::getContentStartColumn(unsigned LineIndex,

                                                   bool /*Break*/) const {

  return ContentColumn[LineIndex];

}


void BreakableLineCommentSection::insertBreak(

    unsigned LineIndex, unsigned TailOffset, Split Split,

    unsigned ContentIndent, WhitespaceManager &Whitespaces) const {

  StringRef Text = Content[LineIndex].substr(TailOffset);

  // Compute the offset of the split relative to the beginning of the token

  // text.

  unsigned BreakOffsetInToken =

      Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;

  unsigned CharsToRemove = Split.second;

  Whitespaces.replaceWhitespaceInToken(

      tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "",

      Prefix[LineIndex], InPPDirective, /*Newlines=*/1,

      /*Spaces=*/ContentColumn[LineIndex] - Prefix[LineIndex].size());

}


BreakableComment::Split BreakableLineCommentSection::getReflowSplit(

    unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {

  if (!mayReflow(LineIndex, CommentPragmasRegex))

    return Split(StringRef::npos, 0);


  size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks);


  // In a line comment section each line is a separate token; thus, after a

  // split we replace all whitespace before the current line comment token

  // (which does not need to be included in the split), plus the start of the

  // line up to where the content starts.

  return Split(0, Trimmed != StringRef::npos ? Trimmed : 0);

}


void BreakableLineCommentSection::reflow(unsigned LineIndex,

                                         WhitespaceManager &Whitespaces) const {

  if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) {

    // Reflow happens between tokens. Replace the whitespace between the

    // tokens by the empty string.

    Whitespaces.replaceWhitespace(

        *Tokens[LineIndex], /*Newlines=*/0, /*Spaces=*/0,

        /*StartOfTokenColumn=*/StartColumn, /*IsAligned=*/true,

        /*InPPDirective=*/false);

  } else if (LineIndex > 0) {

    // In case we're reflowing after the '\' in:

    //

    //   // line comment \

    //   // line 2

    //

    // the reflow happens inside the single comment token (it is a single line

    // comment with an unescaped newline).

    // Replace the whitespace between the '\' and '//' with the empty string.

    //

    // Offset points to after the '\' relative to start of the token.

    unsigned Offset = Lines[LineIndex - 1].data() +

                      Lines[LineIndex - 1].size() -

                      tokenAt(LineIndex - 1).TokenText.data();

    // WhitespaceLength is the number of chars between the '\' and the '//' on

    // the next line.

    unsigned WhitespaceLength =

        Lines[LineIndex].data() - tokenAt(LineIndex).TokenText.data() - Offset;

    Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex], Offset,

                                         /*ReplaceChars=*/WhitespaceLength,

                                         /*PreviousPostfix=*/"",

                                         /*CurrentPrefix=*/"",

                                         /*InPPDirective=*/false,

                                         /*Newlines=*/0,

                                         /*Spaces=*/0);

  }

  // Replace the indent and prefix of the token with the reflow prefix.

  unsigned Offset =

      Lines[LineIndex].data() - tokenAt(LineIndex).TokenText.data();

  unsigned WhitespaceLength =

      Content[LineIndex].data() - Lines[LineIndex].data();

  Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex], Offset,

                                       /*ReplaceChars=*/WhitespaceLength,

                                       /*PreviousPostfix=*/"",

                                       /*CurrentPrefix=*/ReflowPrefix,

                                       /*InPPDirective=*/false,

                                       /*Newlines=*/0,

                                       /*Spaces=*/0);

}


void BreakableLineCommentSection::adaptStartOfLine(

    unsigned LineIndex, WhitespaceManager &Whitespaces) const {

  // If this is the first line of a token, we need to inform Whitespace Manager

  // about it: either adapt the whitespace range preceding it, or mark it as an

  // untouchable token.

  // This happens for instance here:

  // // line 1 \

  // // line 2

  if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) {

    // This is the first line for the current token, but no reflow with the

    // previous token is necessary. However, we still may need to adjust the

    // start column. Note that ContentColumn[LineIndex] is the expected

    // content column after a possible update to the prefix, hence the prefix

    // length change is included.

    unsigned LineColumn =

        ContentColumn[LineIndex] -

        (Content[LineIndex].data() - Lines[LineIndex].data()) +

        (OriginalPrefix[LineIndex].size() - Prefix[LineIndex].size());


    // We always want to create a replacement instead of adding an untouchable

    // token, even if LineColumn is the same as the original column of the

    // token. This is because WhitespaceManager doesn't align trailing

    // comments if they are untouchable.

    Whitespaces.replaceWhitespace(*Tokens[LineIndex],

                                  /*Newlines=*/1,

                                  /*Spaces=*/LineColumn,

                                  /*StartOfTokenColumn=*/LineColumn,

                                  /*IsAligned=*/true,

                                  /*InPPDirective=*/false);

  }

  if (OriginalPrefix[LineIndex] != Prefix[LineIndex]) {

    // Adjust the prefix if necessary.

    const auto SpacesToRemove = -std::min(PrefixSpaceChange[LineIndex], 0);

    const auto SpacesToAdd = std::max(PrefixSpaceChange[LineIndex], 0);

    Whitespaces.replaceWhitespaceInToken(

        tokenAt(LineIndex), OriginalPrefix[LineIndex].size() - SpacesToRemove,

        /*ReplaceChars=*/SpacesToRemove, "", "", /*InPPDirective=*/false,

        /*Newlines=*/0, /*Spaces=*/SpacesToAdd);

  }

}


void BreakableLineCommentSection::updateNextToken(LineState &State) const {

  if (LastLineTok)

    State.NextToken = LastLineTok->Next;

}


bool BreakableLineCommentSection::mayReflow(

    unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {

  // Line comments have the indent as part of the prefix, so we need to

  // recompute the start of the line.

  StringRef IndentContent = Content[LineIndex];

  if (Lines[LineIndex].starts_with("//"))

    IndentContent = Lines[LineIndex].substr(2);

  // FIXME: Decide whether we want to reflow non-regular indents:

  // Currently, we only reflow when the OriginalPrefix[LineIndex] matches the

  // OriginalPrefix[LineIndex-1]. That means we don't reflow

  // // text that protrudes

  // //    into text with different indent

  // We do reflow in that case in block comments.

  return LineIndex > 0 && AlwaysReflow &&

         !CommentPragmasRegex.match(IndentContent) &&

         mayReflowContent(Content[LineIndex]) && !Tok.Finalized &&

         !switchesFormatting(tokenAt(LineIndex)) &&

         OriginalPrefix[LineIndex] == OriginalPrefix[LineIndex - 1];

}


} // namespace format

} // namespace clang

BreakableToken.h
Declares BreakableToken, BreakableStringLiteral, BreakableComment, BreakableBlockComment and Breakabl...

CharInfo.h

ContinuationIndenter.h
This file implements an indenter that manages the indentation of continuations.

Format.h
Various functions to configurably format source code.

clang::Token
Token - This structure provides full information about a lexed token.
Definition: Token.h:36

clang::Token::is
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
Definition: Token.h:102

clang::format::BreakableBlockComment::insertBreak
void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, unsigned ContentIndent, WhitespaceManager &Whitespaces) const override
Emits the previously retrieved Split via Whitespaces.
Definition: BreakableToken.cpp:699

clang::format::BreakableBlockComment::getContentStartColumn
unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override
Returns the column at which content in line LineIndex starts, assuming no reflow.
Definition: BreakableToken.cpp:670

clang::format::BreakableBlockComment::getReflowSplit
Split getReflowSplit(unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const override
Returns a whitespace range (offset, length) of the content at LineIndex such that the content of that...
Definition: BreakableToken.cpp:730

clang::format::BreakableBlockComment::mayReflow
bool mayReflow(unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const override
Definition: BreakableToken.cpp:838

clang::format::BreakableBlockComment::getSplitAfterLastLine
Split getSplitAfterLastLine(unsigned TailOffset) const override
Returns a whitespace range (offset, length) of the content at the last line that needs to be reformat...
Definition: BreakableToken.cpp:825

clang::format::BreakableBlockComment::BreakableBlockComment
BreakableBlockComment(const FormatToken &Token, unsigned StartColumn, unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style, bool UseCRLF)
Definition: BreakableToken.cpp:467

clang::format::BreakableBlockComment::ContentIndentingJavadocAnnotations
static const llvm::StringSet ContentIndentingJavadocAnnotations
Definition: BreakableToken.h:434

clang::format::BreakableBlockComment::getRemainingLength
unsigned getRemainingLength(unsigned LineIndex, unsigned Offset, unsigned StartColumn) const override
Returns the number of columns required to format the text following the byte Offset in the line LineI...
Definition: BreakableToken.cpp:651

clang::format::BreakableBlockComment::getContentIndent
unsigned getContentIndent(unsigned LineIndex) const override
Returns additional content indent required for the second line after the content at line LineIndex is...
Definition: BreakableToken.cpp:683

clang::format::BreakableBlockComment::adaptStartOfLine
void adaptStartOfLine(unsigned LineIndex, WhitespaceManager &Whitespaces) const override
Replaces the whitespace between LineIndex-1 and LineIndex.
Definition: BreakableToken.cpp:776

clang::format::BreakableBlockComment::reflow
void reflow(unsigned LineIndex, WhitespaceManager &Whitespaces) const override
Reflows the current line into the end of the previous one.
Definition: BreakableToken.cpp:755

clang::format::BreakableBlockComment::getRangeLength
unsigned getRangeLength(unsigned LineIndex, unsigned Offset, StringRef::size_type Length, unsigned StartColumn) const override
Returns the number of columns required to format the text in the byte range [Offset,...
Definition: BreakableToken.cpp:642

clang::format::BreakableBlockComment::getSplit
Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const override
Returns a range (offset, length) at which to break the line at LineIndex, if previously broken at Tai...
Definition: BreakableToken.cpp:594

clang::format::BreakableBlockComment::introducesBreakBeforeToken
bool introducesBreakBeforeToken() const override
Returns whether there will be a line break at the start of the token.
Definition: BreakableToken.cpp:749

clang::format::BreakableComment
Definition: BreakableToken.h:327

clang::format::BreakableComment::getLineCount
unsigned getLineCount() const override
Returns the number of lines in this token in the original code.
Definition: BreakableToken.cpp:403

clang::format::BreakableComment::Lines
SmallVector< StringRef, 16 > Lines
Definition: BreakableToken.h:360

clang::format::BreakableComment::ReflowPrefix
StringRef ReflowPrefix
Definition: BreakableToken.h:397

clang::format::BreakableComment::ContentColumn
SmallVector< int, 16 > ContentColumn
Definition: BreakableToken.h:382

clang::format::BreakableComment::getSplit
Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const override
Returns a range (offset, length) at which to break the line at LineIndex, if previously broken at Tai...
Definition: BreakableToken.cpp:406

clang::format::BreakableComment::Tokens
SmallVector< FormatToken *, 16 > Tokens
Definition: BreakableToken.h:370

clang::format::BreakableComment::Content
SmallVector< StringRef, 16 > Content
Definition: BreakableToken.h:365

clang::format::BreakableComment::AlwaysReflow
const bool AlwaysReflow
Definition: BreakableToken.h:387

clang::format::BreakableComment::StartColumn
unsigned StartColumn
Definition: BreakableToken.h:385

clang::format::BreakableComment::compressWhitespace
void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces) const override
Replaces the whitespace range described by Split with a single space.
Definition: BreakableToken.cpp:417

clang::format::BreakableComment::tokenAt
const FormatToken & tokenAt(unsigned LineIndex) const
Definition: BreakableToken.cpp:433

clang::format::BreakableComment::BreakableComment
BreakableComment(const FormatToken &Token, unsigned StartColumn, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
Creates a breakable token for a comment.
Definition: BreakableToken.cpp:396

clang::format::BreakableLineCommentSection::BreakableLineCommentSection
BreakableLineCommentSection(const FormatToken &Token, unsigned StartColumn, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
Definition: BreakableToken.cpp:851

clang::format::BreakableLineCommentSection::getContentStartColumn
unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override
Returns the column at which content in line LineIndex starts, assuming no reflow.
Definition: BreakableToken.cpp:1007

clang::format::BreakableLineCommentSection::reflow
void reflow(unsigned LineIndex, WhitespaceManager &Whitespaces) const override
Reflows the current line into the end of the previous one.
Definition: BreakableToken.cpp:1041

clang::format::BreakableLineCommentSection::getReflowSplit
Split getReflowSplit(unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const override
Returns a whitespace range (offset, length) of the content at LineIndex such that the content of that...
Definition: BreakableToken.cpp:1027

clang::format::BreakableLineCommentSection::updateNextToken
void updateNextToken(LineState &State) const override
Updates the next token of State to the next token after this one.
Definition: BreakableToken.cpp:1131

clang::format::BreakableLineCommentSection::mayReflow
bool mayReflow(unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const override
Definition: BreakableToken.cpp:1136

clang::format::BreakableLineCommentSection::adaptStartOfLine
void adaptStartOfLine(unsigned LineIndex, WhitespaceManager &Whitespaces) const override
Replaces the whitespace between LineIndex-1 and LineIndex.
Definition: BreakableToken.cpp:1090

clang::format::BreakableLineCommentSection::insertBreak
void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, unsigned ContentIndent, WhitespaceManager &Whitespaces) const override
Emits the previously retrieved Split via Whitespaces.
Definition: BreakableToken.cpp:1012

clang::format::BreakableLineCommentSection::getRangeLength
unsigned getRangeLength(unsigned LineIndex, unsigned Offset, StringRef::size_type Length, unsigned StartColumn) const override
Returns the number of columns required to format the text in the byte range [Offset,...
Definition: BreakableToken.cpp:998

clang::format::BreakableStringLiteralUsingOperators::BracesNeeded
bool BracesNeeded
Definition: BreakableToken.h:316

clang::format::BreakableStringLiteralUsingOperators::BreakableStringLiteralUsingOperators
BreakableStringLiteralUsingOperators(const FormatToken &Tok, QuoteStyleType QuoteStyle, bool UnindentPlus, unsigned StartColumn, unsigned UnbreakableTailLength, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
Creates a breakable token for a single line string literal for C#, Java, JavaScript,...
Definition: BreakableToken.cpp:282

clang::format::BreakableStringLiteralUsingOperators::QuoteStyle
QuoteStyleType QuoteStyle
Definition: BreakableToken.h:317

clang::format::BreakableStringLiteralUsingOperators::ContinuationIndent
int ContinuationIndent
Definition: BreakableToken.h:324

clang::format::BreakableStringLiteralUsingOperators::LeftBraceQuote
StringRef LeftBraceQuote
Definition: BreakableToken.h:320

clang::format::BreakableStringLiteralUsingOperators::insertBreak
void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, unsigned ContentIndent, WhitespaceManager &Whitespaces) const override
Emits the previously retrieved Split via Whitespaces.
Definition: BreakableToken.cpp:363

clang::format::BreakableStringLiteralUsingOperators::RightBraceQuote
StringRef RightBraceQuote
Definition: BreakableToken.h:321

clang::format::BreakableStringLiteralUsingOperators::getContentStartColumn
unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override
Returns the column at which content in line LineIndex starts, assuming no reflow.
Definition: BreakableToken.cpp:352

clang::format::BreakableStringLiteralUsingOperators::QuoteStyleType
QuoteStyleType
Definition: BreakableToken.h:290

clang::format::BreakableStringLiteralUsingOperators::SingleQuotes
@ SingleQuotes
Definition: BreakableToken.h:292

clang::format::BreakableStringLiteralUsingOperators::AtDoubleQuotes
@ AtDoubleQuotes
Definition: BreakableToken.h:293

clang::format::BreakableStringLiteralUsingOperators::DoubleQuotes
@ DoubleQuotes
Definition: BreakableToken.h:291

clang::format::BreakableStringLiteralUsingOperators::updateAfterBroken
void updateAfterBroken(WhitespaceManager &Whitespaces) const override
Adds replacements that are needed when the token is broken.
Definition: BreakableToken.cpp:375

clang::format::BreakableStringLiteralUsingOperators::getRemainingLength
unsigned getRemainingLength(unsigned LineIndex, unsigned Offset, unsigned StartColumn) const override
Returns the number of columns required to format the text following the byte Offset in the line LineI...
Definition: BreakableToken.cpp:344

clang::format::BreakableStringLiteral
Definition: BreakableToken.h:247

clang::format::BreakableStringLiteral::Prefix
StringRef Prefix
Definition: BreakableToken.h:278

clang::format::BreakableStringLiteral::getSplit
Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const override
Returns a range (offset, length) at which to break the line at LineIndex, if previously broken at Tai...
Definition: BreakableToken.cpp:266

clang::format::BreakableStringLiteral::Line
StringRef Line
Definition: BreakableToken.h:282

clang::format::BreakableStringLiteral::UnbreakableTailLength
unsigned UnbreakableTailLength
Definition: BreakableToken.h:285

clang::format::BreakableStringLiteral::getRangeLength
unsigned getRangeLength(unsigned LineIndex, unsigned Offset, StringRef::size_type Length, unsigned StartColumn) const override
Returns the number of columns required to format the text in the byte range [Offset,...
Definition: BreakableToken.cpp:233

clang::format::BreakableStringLiteral::getContentStartColumn
unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override
Returns the column at which content in line LineIndex starts, assuming no reflow.
Definition: BreakableToken.cpp:249

clang::format::BreakableStringLiteral::getLineCount
unsigned getLineCount() const override
Returns the number of lines in this token in the original code.
Definition: BreakableToken.cpp:231

clang::format::BreakableStringLiteral::StartColumn
unsigned StartColumn
Definition: BreakableToken.h:276

clang::format::BreakableStringLiteral::getRemainingLength
unsigned getRemainingLength(unsigned LineIndex, unsigned Offset, unsigned StartColumn) const override
Returns the number of columns required to format the text following the byte Offset in the line LineI...
Definition: BreakableToken.cpp:242

clang::format::BreakableStringLiteral::BreakableStringLiteral
BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn, StringRef Prefix, StringRef Postfix, unsigned UnbreakableTailLength, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
Creates a breakable token for a single line string literal.
Definition: BreakableToken.cpp:254

clang::format::BreakableStringLiteral::insertBreak
void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, unsigned ContentIndent, WhitespaceManager &Whitespaces) const override
Emits the previously retrieved Split via Whitespaces.
Definition: BreakableToken.cpp:273

clang::format::BreakableStringLiteral::Postfix
StringRef Postfix
Definition: BreakableToken.h:280

clang::format::BreakableToken
Base class for tokens / ranges of tokens that can allow breaking within the tokens - for example,...
Definition: BreakableToken.h:86

clang::format::BreakableToken::Split
std::pair< StringRef::size_type, unsigned > Split
Contains starting character index and length of split.
Definition: BreakableToken.h:89

clang::format::BreakableToken::Style
const FormatStyle & Style
Definition: BreakableToken.h:244

clang::format::BreakableToken::getLengthAfterCompression
unsigned getLengthAfterCompression(unsigned RemainingTokenColumns, Split Split) const
Returns the number of columns needed to format RemainingTokenColumns, assuming that Split is within t...
Definition: BreakableToken.cpp:215

clang::format::BreakableToken::InPPDirective
const bool InPPDirective
Definition: BreakableToken.h:242

clang::format::BreakableToken::Tok
const FormatToken & Tok
Definition: BreakableToken.h:241

clang::format::BreakableToken::Encoding
const encoding::Encoding Encoding
Definition: BreakableToken.h:243

clang::format::WhitespaceManager
Manages the whitespaces around tokens and their replacements.
Definition: WhitespaceManager.h:35

llvm::ArrayRef
Definition: LLVM.h:31

clang::format::encoding::columnWidthWithTabs
unsigned columnWidthWithTabs(StringRef Text, unsigned StartColumn, unsigned TabWidth, Encoding Encoding)
Returns the number of columns required to display the Text, starting from the StartColumn on a termin...
Definition: Encoding.h:60

clang::format::encoding::getEscapeSequenceLength
unsigned getEscapeSequenceLength(StringRef Text)
Gets the length of an escape sequence inside a C++ string literal.
Definition: Encoding.h:96

clang::format::encoding::Encoding
Encoding
Definition: Encoding.h:26

clang::format::encoding::getCodePointNumBytes
unsigned getCodePointNumBytes(char FirstChar, Encoding Encoding)
Gets the number of bytes in a sequence representing a single codepoint and starting with FirstChar in...
Definition: Encoding.h:77

clang::format::encoding::columnWidth
unsigned columnWidth(StringRef Text, Encoding Encoding)
Returns the number of columns required to display the Text on a generic Unicode-capable terminal.
Definition: Encoding.h:44

clang::format::switchesFormatting
bool switchesFormatting(const FormatToken &Token)
Checks if Token switches formatting, like /* clang-format off *‍/.
Definition: BreakableToken.cpp:206

clang::format::getStringSplit
static BreakableToken::Split getStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit, unsigned TabWidth, encoding::Encoding Encoding)
Definition: BreakableToken.cpp:158

clang::format::getLineCommentIndentPrefix
static StringRef getLineCommentIndentPrefix(StringRef Comment, const FormatStyle &Style)
Definition: BreakableToken.cpp:30

clang::format::mayReflowContent
static bool mayReflowContent(StringRef Content)
Definition: BreakableToken.cpp:437

clang::format::Blanks
static constexpr StringRef Blanks(" \t\v\f\r")

clang::format::getCommentSplit
static BreakableToken::Split getCommentSplit(StringRef Text, unsigned ContentStartColumn, unsigned ColumnLimit, unsigned TabWidth, encoding::Encoding Encoding, const FormatStyle &Style, bool DecorationEndsWithStar=false)
Definition: BreakableToken.cpp:56

clang
The JSON file list parser is used to communicate input to InstallAPI.
Definition: CalledOnceCheck.h:17

clang::isAlphanumeric
LLVM_READONLY bool isAlphanumeric(unsigned char c)
Return true if this character is an ASCII letter or digit: [a-zA-Z0-9].
Definition: CharInfo.h:138

clang::isHorizontalWhitespace
LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: ' ', '\t', '\f', '\v'.
Definition: CharInfo.h:91

clang::PragmaClangSectionKind::Text
@ Text

clang::isPunctuation
LLVM_READONLY bool isPunctuation(unsigned char c)
Return true if this character is an ASCII punctuation character.
Definition: CharInfo.h:152

clang::SourceLocIdentKind::Line
@ Line

false
#define false
Definition: stdbool.h:26

clang::format::FormatStyle::SpacesInLineComment::Maximum
unsigned Maximum
The maximum number of spaces at the start of the comment.
Definition: Format.h:4934

clang::format::FormatStyle::SpacesInLineComment::Minimum
unsigned Minimum
The minimum number of spaces at the start of the comment.
Definition: Format.h:4932

clang::format::FormatStyle::SpacesInParensCustom::Other
bool Other
Put a space in parentheses not covered by preceding options.
Definition: Format.h:5052

clang::format::FormatStyle
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:55

clang::format::FormatStyle::ContinuationIndentWidth
unsigned ContinuationIndentWidth
Indent width for line continuations.
Definition: Format.h:2550

clang::format::FormatStyle::Cpp11BracedListStyle
bool Cpp11BracedListStyle
If true, format braced lists as best suited for C++11 braced lists.
Definition: Format.h:2574

clang::format::FormatStyle::BreakBeforeBinaryOperators
BinaryOperatorStyle BreakBeforeBinaryOperators
The way to wrap binary operators.
Definition: Format.h:1781

clang::format::FormatStyle::isCpp
bool isCpp() const
Definition: Format.h:3381

clang::format::FormatStyle::BOS_None
@ BOS_None
Break after operators.
Definition: Format.h:1752

clang::format::FormatStyle::TabWidth
unsigned TabWidth
The number of columns used for tab stops.
Definition: Format.h:5222

clang::format::FormatStyle::AlignOperands
OperandAlignmentStyle AlignOperands
If true, horizontally align operands of binary and ternary expressions.
Definition: Format.h:565

clang::format::FormatStyle::SpacesInParensOptions
SpacesInParensCustom SpacesInParensOptions
Control of individual spaces in parentheses.
Definition: Format.h:5091

clang::format::FormatStyle::isTextProto
bool isTextProto() const
Definition: Format.h:3389

clang::format::FormatStyle::isVerilog
bool isVerilog() const
Definition: Format.h:3388

clang::format::FormatStyle::SpacesInLineCommentPrefix
SpacesInLineComment SpacesInLineCommentPrefix
How many spaces are allowed at the start of a line comment.
Definition: Format.h:4969

clang::format::FormatStyle::isJavaScript
bool isJavaScript() const
Definition: Format.h:3387

clang::format::FormatStyle::isJava
bool isJava() const
Definition: Format.h:3386

clang::format::FormatStyle::OAS_AlignAfterOperator
@ OAS_AlignAfterOperator
Horizontally align operands of binary and ternary expressions.
Definition: Format.h:559

clang::format::FormatStyle::ColumnLimit
unsigned ColumnLimit
The column limit.
Definition: Format.h:2451

clang::format::FormatToken
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:300

clang::format::FormatToken::TokenText
StringRef TokenText
The raw text of the token.
Definition: FormatToken.h:320

clang::format::FormatToken::Finalized
unsigned Finalized
If true, this token has been fully formatted (indented and potentially re-formatted inside),...
Definition: FormatToken.h:379

clang::format::FormatToken::Next
FormatToken * Next
The next token in the unwrapped line.
Definition: FormatToken.h:572

clang::format::FormatToken::is
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:618

clang::format::LineState
The current state when indenting a unwrapped line.
Definition: ContinuationIndenter.h:416