clang 22.0.0git
DefinitionBlockSeparator.cpp
Go to the documentation of this file.
1//===--- DefinitionBlockSeparator.cpp ---------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements DefinitionBlockSeparator, a TokenAnalyzer that inserts
11/// or removes empty lines separating definition blocks like classes, structs,
12/// functions, enums, and namespaces in between.
13///
14//===----------------------------------------------------------------------===//
15
17#define DEBUG_TYPE "definition-block-separator"
18
19namespace clang {
20namespace format {
21std::pair<tooling::Replacements, unsigned> DefinitionBlockSeparator::analyze(
22 TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
23 FormatTokenLexer &Tokens) {
27 separateBlocks(AnnotatedLines, Result, Tokens);
28 return {Result, 0};
29}
30
31void DefinitionBlockSeparator::separateBlocks(
33 FormatTokenLexer &Tokens) {
34 const bool IsNeverStyle =
36 const AdditionalKeywords &ExtraKeywords = Tokens.getKeywords();
37 auto GetBracketLevelChange = [](const FormatToken *Tok) {
38 if (Tok->isOneOf(tok::l_brace, tok::l_paren, tok::l_square))
39 return 1;
40 if (Tok->isOneOf(tok::r_brace, tok::r_paren, tok::r_square))
41 return -1;
42 return 0;
43 };
44 auto LikelyDefinition = [&](const AnnotatedLine *Line,
45 bool ExcludeEnum = false) {
46 if ((Line->MightBeFunctionDecl && Line->mightBeFunctionDefinition()) ||
47 Line->startsWithNamespace()) {
48 return true;
49 }
50 int BracketLevel = 0;
51 for (const FormatToken *CurrentToken = Line->First; CurrentToken;
52 CurrentToken = CurrentToken->Next) {
53 if (BracketLevel == 0) {
54 if (CurrentToken->isOneOf(tok::kw_class, tok::kw_struct,
55 tok::kw_union) ||
57 CurrentToken->is(ExtraKeywords.kw_function))) {
58 return true;
59 }
60 if (!ExcludeEnum && CurrentToken->is(tok::kw_enum))
61 return true;
62 }
63 BracketLevel += GetBracketLevelChange(CurrentToken);
64 }
65 return false;
66 };
67 unsigned NewlineCount =
69 WhitespaceManager Whitespaces(
76 for (unsigned I = 0; I < Lines.size(); ++I) {
77 const auto &CurrentLine = Lines[I];
78 if (CurrentLine->InPPDirective)
79 continue;
80 FormatToken *TargetToken = nullptr;
81 AnnotatedLine *TargetLine;
82 auto OpeningLineIndex = CurrentLine->MatchingOpeningBlockLineIndex;
83 AnnotatedLine *OpeningLine = nullptr;
84 const auto IsAccessSpecifierToken = [](const FormatToken *Token) {
85 return Token->isAccessSpecifier() || Token->isObjCAccessSpecifier();
86 };
87 const auto InsertReplacement = [&](const int NewlineToInsert) {
88 assert(TargetLine);
89 assert(TargetToken);
90
91 // Do not handle EOF newlines.
92 if (TargetToken->is(tok::eof))
93 return;
94 if (IsAccessSpecifierToken(TargetToken) ||
95 (OpeningLineIndex > 0 &&
96 IsAccessSpecifierToken(Lines[OpeningLineIndex - 1]->First))) {
97 return;
98 }
99 if (!TargetLine->Affected)
100 return;
101 Whitespaces.replaceWhitespace(*TargetToken, NewlineToInsert,
102 TargetToken->OriginalColumn,
103 TargetToken->OriginalColumn);
104 };
105 const auto IsPPConditional = [&](const size_t LineIndex) {
106 const auto &Line = Lines[LineIndex];
107 return Line->First->is(tok::hash) && Line->First->Next &&
108 Line->First->Next->isOneOf(tok::pp_if, tok::pp_ifdef, tok::pp_else,
109 tok::pp_ifndef, tok::pp_elifndef,
110 tok::pp_elifdef, tok::pp_elif,
111 tok::pp_endif);
112 };
113 const auto FollowingOtherOpening = [&]() {
114 return OpeningLineIndex == 0 ||
115 Lines[OpeningLineIndex - 1]->Last->opensScope() ||
116 IsPPConditional(OpeningLineIndex - 1);
117 };
118 const auto HasEnumOnLine = [&]() {
119 bool FoundEnumKeyword = false;
120 int BracketLevel = 0;
121 for (const FormatToken *CurrentToken = CurrentLine->First; CurrentToken;
122 CurrentToken = CurrentToken->Next) {
123 if (BracketLevel == 0) {
124 if (CurrentToken->is(tok::kw_enum))
125 FoundEnumKeyword = true;
126 else if (FoundEnumKeyword && CurrentToken->is(tok::l_brace))
127 return true;
128 }
129 BracketLevel += GetBracketLevelChange(CurrentToken);
130 }
131 return FoundEnumKeyword && I + 1 < Lines.size() &&
132 Lines[I + 1]->First->is(tok::l_brace);
133 };
134
135 bool IsDefBlock = false;
136 const auto MayPrecedeDefinition = [&](const int Direction = -1) {
137 assert(Direction >= -1);
138 assert(Direction <= 1);
139
140 if (Lines[OpeningLineIndex]->First->is(TT_CSharpGenericTypeConstraint))
141 return true;
142
143 const size_t OperateIndex = OpeningLineIndex + Direction;
144 assert(OperateIndex < Lines.size());
145 const auto &OperateLine = Lines[OperateIndex];
146 if (LikelyDefinition(OperateLine))
147 return false;
148
149 if (const auto *Tok = OperateLine->First;
150 Tok->is(tok::comment) && !isClangFormatOn(Tok->TokenText)) {
151 return true;
152 }
153
154 // A single line identifier that is not in the last line.
155 if (OperateLine->First->is(tok::identifier) &&
156 OperateLine->First == OperateLine->Last &&
157 OperateIndex + 1 < Lines.size()) {
158 // UnwrappedLineParser's recognition of free-standing macro like
159 // Q_OBJECT may also recognize some uppercased type names that may be
160 // used as return type as that kind of macros, which is a bit hard to
161 // distinguish one from another purely from token patterns. Here, we
162 // try not to add new lines below those identifiers.
163 AnnotatedLine *NextLine = Lines[OperateIndex + 1];
164 if (NextLine->MightBeFunctionDecl &&
165 NextLine->mightBeFunctionDefinition() &&
166 NextLine->First->NewlinesBefore == 1 &&
167 OperateLine->First->is(TT_FunctionLikeOrFreestandingMacro)) {
168 return true;
169 }
170 }
171
172 if (Style.isCSharp() && OperateLine->First->is(TT_AttributeSquare))
173 return true;
174 return false;
175 };
176
177 if (HasEnumOnLine() &&
178 !LikelyDefinition(CurrentLine, /*ExcludeEnum=*/true)) {
179 // We have no scope opening/closing information for enum.
180 IsDefBlock = true;
181 OpeningLineIndex = I;
182 while (OpeningLineIndex > 0 && MayPrecedeDefinition())
183 --OpeningLineIndex;
184 OpeningLine = Lines[OpeningLineIndex];
185 TargetLine = OpeningLine;
186 TargetToken = TargetLine->First;
187 if (!FollowingOtherOpening())
188 InsertReplacement(NewlineCount);
189 else if (IsNeverStyle)
190 InsertReplacement(OpeningLineIndex != 0);
191 TargetLine = CurrentLine;
192 TargetToken = TargetLine->First;
193 while (TargetToken && TargetToken->isNot(tok::r_brace))
194 TargetToken = TargetToken->Next;
195 if (!TargetToken)
196 while (I < Lines.size() && Lines[I]->First->isNot(tok::r_brace))
197 ++I;
198 } else if (CurrentLine->First->closesScope()) {
199 if (OpeningLineIndex > Lines.size())
200 continue;
201 // Handling the case that opening brace has its own line, with checking
202 // whether the last line already had an opening brace to guard against
203 // misrecognition.
204 if (OpeningLineIndex > 0 &&
205 Lines[OpeningLineIndex]->First->is(tok::l_brace) &&
206 Lines[OpeningLineIndex - 1]->Last->isNot(tok::l_brace)) {
207 --OpeningLineIndex;
208 }
209 OpeningLine = Lines[OpeningLineIndex];
210 // Closing a function definition.
211 if (LikelyDefinition(OpeningLine)) {
212 IsDefBlock = true;
213 while (OpeningLineIndex > 0 && MayPrecedeDefinition())
214 --OpeningLineIndex;
215 OpeningLine = Lines[OpeningLineIndex];
216 TargetLine = OpeningLine;
217 TargetToken = TargetLine->First;
218 if (!FollowingOtherOpening()) {
219 // Avoid duplicated replacement.
220 if (TargetToken->isNot(tok::l_brace))
221 InsertReplacement(NewlineCount);
222 } else if (IsNeverStyle) {
223 InsertReplacement(OpeningLineIndex != 0);
224 }
225 }
226 }
227
228 // Not the last token.
229 if (IsDefBlock && I + 1 < Lines.size()) {
230 OpeningLineIndex = I + 1;
231 TargetLine = Lines[OpeningLineIndex];
232 TargetToken = TargetLine->First;
233
234 // No empty line for continuously closing scopes. The token will be
235 // handled in another case if the line following is opening a
236 // definition.
237 if (!TargetToken->closesScope() && !IsPPConditional(OpeningLineIndex)) {
238 // Check whether current line may precede a definition line.
239 while (OpeningLineIndex + 1 < Lines.size() &&
240 MayPrecedeDefinition(/*Direction=*/0)) {
241 ++OpeningLineIndex;
242 }
243 TargetLine = Lines[OpeningLineIndex];
244 if (!LikelyDefinition(TargetLine)) {
245 OpeningLineIndex = I + 1;
246 TargetLine = Lines[I + 1];
247 TargetToken = TargetLine->First;
248 InsertReplacement(NewlineCount);
249 }
250 } else if (IsNeverStyle) {
251 InsertReplacement(/*NewlineToInsert=*/1);
252 }
253 }
254 }
255 for (const auto &R : Whitespaces.generateReplacements()) {
256 // The add method returns an Error instance which simulates program exit
257 // code through overloading boolean operator, thus false here indicates
258 // success.
259 if (Result.add(R))
260 return;
261 }
262}
263} // namespace format
264} // namespace clang
This file declares DefinitionBlockSeparator, a TokenAnalyzer that inserts or removes empty lines sepa...
StringRef getBufferData(FileID FID, bool *Invalid=nullptr) const
Return a StringRef to the source buffer data for the specified FileID.
bool computeAffectedLines(SmallVectorImpl< AnnotatedLine * > &Lines)
std::pair< tooling::Replacements, unsigned > analyze(TokenAnnotator &Annotator, SmallVectorImpl< AnnotatedLine * > &AnnotatedLines, FormatTokenLexer &Tokens) override
SourceManager & getSourceManager() const
Definition: TokenAnalyzer.h:38
AffectedRangeManager AffectedRangeMgr
Definition: TokenAnalyzer.h:99
const Environment & Env
Definition: TokenAnalyzer.h:97
Determines extra information about the tokens comprising an UnwrappedLine.
static bool inputUsesCRLF(StringRef Text, bool DefaultToCRLF)
Infers whether the input is using CRLF.
Maintains a set of replacements that are conflict-free.
Definition: Replacement.h:212
bool isClangFormatOn(StringRef Comment)
Definition: Format.cpp:4432
The JSON file list parser is used to communicate input to InstallAPI.
@ Result
The result type of a method or function.
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang's l...
Definition: FormatToken.h:1026
@ SDS_Never
Remove any empty line between definition blocks.
Definition: Format.h:4294
@ SDS_Always
Insert an empty line between definition blocks.
Definition: Format.h:4292
@ SDS_Leave
Leave definition blocks as they are.
Definition: Format.h:4290
@ LE_DeriveCRLF
Use \r\n unless the input has more lines ending in \n.
Definition: Format.h:3411
bool isCSharp() const
Definition: Format.h:3384
bool isJavaScript() const
Definition: Format.h:3387
LineEndingStyle LineEnding
Line ending style (\n or \r\n) to use.
Definition: Format.h:3416
SeparateDefinitionStyle SeparateDefinitionBlocks
Specifies the use of empty lines to separate definition blocks, including classes,...
Definition: Format.h:4343
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:300