clang 22.0.0git
UnwrappedLineParser.cpp
Go to the documentation of this file.
1//===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains the implementation of the UnwrappedLineParser,
11/// which turns a stream of tokens into UnwrappedLines.
12///
13//===----------------------------------------------------------------------===//
14
15#include "UnwrappedLineParser.h"
16#include "FormatToken.h"
17#include "FormatTokenSource.h"
18#include "Macros.h"
19#include "TokenAnnotator.h"
21#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/StringRef.h"
23#include "llvm/Support/Debug.h"
24#include "llvm/Support/raw_os_ostream.h"
25#include "llvm/Support/raw_ostream.h"
26
27#include <utility>
28
29#define DEBUG_TYPE "format-parser"
30
31namespace clang {
32namespace format {
33
34namespace {
35
36void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line,
37 StringRef Prefix = "", bool PrintText = false) {
38 OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn
39 << ")" << (Line.InPPDirective ? " MACRO" : "") << ": ";
40 bool NewLine = false;
41 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
42 E = Line.Tokens.end();
43 I != E; ++I) {
44 if (NewLine) {
45 OS << Prefix;
46 NewLine = false;
47 }
48 OS << I->Tok->Tok.getName() << "["
49 << "T=" << (unsigned)I->Tok->getType()
50 << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText
51 << "\"] ";
52 for (const auto *CI = I->Children.begin(), *CE = I->Children.end();
53 CI != CE; ++CI) {
54 OS << "\n";
55 printLine(OS, *CI, (Prefix + " ").str());
56 NewLine = true;
57 }
58 }
59 if (!NewLine)
60 OS << "\n";
61}
62
63LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line) {
64 printLine(llvm::dbgs(), Line);
65}
66
67class ScopedDeclarationState {
68public:
69 ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
70 bool MustBeDeclaration)
71 : Line(Line), Stack(Stack) {
72 Line.MustBeDeclaration = MustBeDeclaration;
73 Stack.push_back(MustBeDeclaration);
74 }
75 ~ScopedDeclarationState() {
76 Stack.pop_back();
77 if (!Stack.empty())
78 Line.MustBeDeclaration = Stack.back();
79 else
80 Line.MustBeDeclaration = true;
81 }
82
83private:
84 UnwrappedLine &Line;
85 llvm::BitVector &Stack;
86};
87
88} // end anonymous namespace
89
90std::ostream &operator<<(std::ostream &Stream, const UnwrappedLine &Line) {
91 llvm::raw_os_ostream OS(Stream);
92 printLine(OS, Line);
93 return Stream;
94}
95
97public:
99 bool SwitchToPreprocessorLines = false)
100 : Parser(Parser), OriginalLines(Parser.CurrentLines) {
101 if (SwitchToPreprocessorLines)
102 Parser.CurrentLines = &Parser.PreprocessorDirectives;
103 else if (!Parser.Line->Tokens.empty())
104 Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
105 PreBlockLine = std::move(Parser.Line);
106 Parser.Line = std::make_unique<UnwrappedLine>();
107 Parser.Line->Level = PreBlockLine->Level;
108 Parser.Line->PPLevel = PreBlockLine->PPLevel;
109 Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
110 Parser.Line->InMacroBody = PreBlockLine->InMacroBody;
111 Parser.Line->UnbracedBodyLevel = PreBlockLine->UnbracedBodyLevel;
112 }
113
115 if (!Parser.Line->Tokens.empty())
116 Parser.addUnwrappedLine();
117 assert(Parser.Line->Tokens.empty());
118 Parser.Line = std::move(PreBlockLine);
119 if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
120 Parser.AtEndOfPPLine = true;
121 Parser.CurrentLines = OriginalLines;
122 }
123
124private:
126
127 std::unique_ptr<UnwrappedLine> PreBlockLine;
128 SmallVectorImpl<UnwrappedLine> *OriginalLines;
129};
130
132public:
134 const FormatStyle &Style, unsigned &LineLevel)
136 Style.BraceWrapping.AfterControlStatement ==
137 FormatStyle::BWACS_Always,
138 Style.BraceWrapping.IndentBraces) {}
140 bool WrapBrace, bool IndentBrace)
141 : LineLevel(LineLevel), OldLineLevel(LineLevel) {
142 if (WrapBrace)
143 Parser->addUnwrappedLine();
144 if (IndentBrace)
145 ++LineLevel;
146 }
147 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
148
149private:
150 unsigned &LineLevel;
151 unsigned OldLineLevel;
152};
153
155 SourceManager &SourceMgr, const FormatStyle &Style,
156 const AdditionalKeywords &Keywords, unsigned FirstStartColumn,
158 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
159 IdentifierTable &IdentTable)
160 : Line(new UnwrappedLine), AtEndOfPPLine(false), CurrentLines(&Lines),
161 Style(Style), IsCpp(Style.isCpp()),
162 LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords),
163 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
164 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
165 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
166 ? IG_Rejected
167 : IG_Inited),
168 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn),
169 Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {}
170
171void UnwrappedLineParser::reset() {
172 PPBranchLevel = -1;
173 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
174 ? IG_Rejected
175 : IG_Inited;
176 IncludeGuardToken = nullptr;
177 Line.reset(new UnwrappedLine);
178 CommentsBeforeNextToken.clear();
179 FormatTok = nullptr;
180 AtEndOfPPLine = false;
181 IsDecltypeAutoFunction = false;
182 PreprocessorDirectives.clear();
183 CurrentLines = &Lines;
184 DeclarationScopeStack.clear();
185 NestedTooDeep.clear();
186 NestedLambdas.clear();
187 PPStack.clear();
188 Line->FirstStartColumn = FirstStartColumn;
189
190 if (!Unexpanded.empty())
191 for (FormatToken *Token : AllTokens)
192 Token->MacroCtx.reset();
193 CurrentExpandedLines.clear();
194 ExpandedLines.clear();
195 Unexpanded.clear();
196 InExpansion = false;
197 Reconstruct.reset();
198}
199
201 IndexedTokenSource TokenSource(AllTokens);
202 Line->FirstStartColumn = FirstStartColumn;
203 do {
204 LLVM_DEBUG(llvm::dbgs() << "----\n");
205 reset();
206 Tokens = &TokenSource;
207 TokenSource.reset();
208
209 readToken();
210 parseFile();
211
212 // If we found an include guard then all preprocessor directives (other than
213 // the guard) are over-indented by one.
214 if (IncludeGuard == IG_Found) {
215 for (auto &Line : Lines)
216 if (Line.InPPDirective && Line.Level > 0)
217 --Line.Level;
218 }
219
220 // Create line with eof token.
221 assert(eof());
222 pushToken(FormatTok);
223 addUnwrappedLine();
224
225 // In a first run, format everything with the lines containing macro calls
226 // replaced by the expansion.
227 if (!ExpandedLines.empty()) {
228 LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n");
229 for (const auto &Line : Lines) {
230 if (!Line.Tokens.empty()) {
231 auto it = ExpandedLines.find(Line.Tokens.begin()->Tok);
232 if (it != ExpandedLines.end()) {
233 for (const auto &Expanded : it->second) {
234 LLVM_DEBUG(printDebugInfo(Expanded));
235 Callback.consumeUnwrappedLine(Expanded);
236 }
237 continue;
238 }
239 }
240 LLVM_DEBUG(printDebugInfo(Line));
241 Callback.consumeUnwrappedLine(Line);
242 }
243 Callback.finishRun();
244 }
245
246 LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n");
247 for (const UnwrappedLine &Line : Lines) {
248 LLVM_DEBUG(printDebugInfo(Line));
249 Callback.consumeUnwrappedLine(Line);
250 }
251 Callback.finishRun();
252 Lines.clear();
253 while (!PPLevelBranchIndex.empty() &&
254 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
255 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
256 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
257 }
258 if (!PPLevelBranchIndex.empty()) {
259 ++PPLevelBranchIndex.back();
260 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
261 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
262 }
263 } while (!PPLevelBranchIndex.empty());
264}
265
266void UnwrappedLineParser::parseFile() {
267 // The top-level context in a file always has declarations, except for pre-
268 // processor directives and JavaScript files.
269 bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
270 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
271 MustBeDeclaration);
272 if (Style.isTextProto() || (Style.isJson() && FormatTok->IsFirst))
273 parseBracedList();
274 else
275 parseLevel();
276 // Make sure to format the remaining tokens.
277 //
278 // LK_TextProto is special since its top-level is parsed as the body of a
279 // braced list, which does not necessarily have natural line separators such
280 // as a semicolon. Comments after the last entry that have been determined to
281 // not belong to that line, as in:
282 // key: value
283 // // endfile comment
284 // do not have a chance to be put on a line of their own until this point.
285 // Here we add this newline before end-of-file comments.
286 if (Style.isTextProto() && !CommentsBeforeNextToken.empty())
287 addUnwrappedLine();
288 flushComments(true);
289 addUnwrappedLine();
290}
291
292void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
293 do {
294 switch (FormatTok->Tok.getKind()) {
295 case tok::l_brace:
296 case tok::semi:
297 return;
298 default:
299 if (FormatTok->is(Keywords.kw_where)) {
300 addUnwrappedLine();
301 nextToken();
302 parseCSharpGenericTypeConstraint();
303 break;
304 }
305 nextToken();
306 break;
307 }
308 } while (!eof());
309}
310
311void UnwrappedLineParser::parseCSharpAttribute() {
312 int UnpairedSquareBrackets = 1;
313 do {
314 switch (FormatTok->Tok.getKind()) {
315 case tok::r_square:
316 nextToken();
317 --UnpairedSquareBrackets;
318 if (UnpairedSquareBrackets == 0) {
319 addUnwrappedLine();
320 return;
321 }
322 break;
323 case tok::l_square:
324 ++UnpairedSquareBrackets;
325 nextToken();
326 break;
327 default:
328 nextToken();
329 break;
330 }
331 } while (!eof());
332}
333
334bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
335 if (!Lines.empty() && Lines.back().InPPDirective)
336 return true;
337
338 const FormatToken *Previous = Tokens->getPreviousToken();
339 return Previous && Previous->is(tok::comment) &&
340 (Previous->IsMultiline || Previous->NewlinesBefore > 0);
341}
342
343/// Parses a level, that is ???.
344/// \param OpeningBrace Opening brace (\p nullptr if absent) of that level.
345/// \param IfKind The \p if statement kind in the level.
346/// \param IfLeftBrace The left brace of the \p if block in the level.
347/// \returns true if a simple block of if/else/for/while, or false otherwise.
348/// (A simple block has a single statement.)
349bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace,
350 IfStmtKind *IfKind,
351 FormatToken **IfLeftBrace) {
352 const bool InRequiresExpression =
353 OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace);
354 const bool IsPrecededByCommentOrPPDirective =
355 !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
356 FormatToken *IfLBrace = nullptr;
357 bool HasDoWhile = false;
358 bool HasLabel = false;
359 unsigned StatementCount = 0;
360 bool SwitchLabelEncountered = false;
361
362 do {
363 if (FormatTok->isAttribute()) {
364 nextToken();
365 if (FormatTok->is(tok::l_paren))
366 parseParens();
367 continue;
368 }
369 tok::TokenKind Kind = FormatTok->Tok.getKind();
370 if (FormatTok->is(TT_MacroBlockBegin))
371 Kind = tok::l_brace;
372 else if (FormatTok->is(TT_MacroBlockEnd))
373 Kind = tok::r_brace;
374
375 auto ParseDefault = [this, OpeningBrace, IfKind, &IfLBrace, &HasDoWhile,
376 &HasLabel, &StatementCount] {
377 parseStructuralElement(OpeningBrace, IfKind, &IfLBrace,
378 HasDoWhile ? nullptr : &HasDoWhile,
379 HasLabel ? nullptr : &HasLabel);
380 ++StatementCount;
381 assert(StatementCount > 0 && "StatementCount overflow!");
382 };
383
384 switch (Kind) {
385 case tok::comment:
386 nextToken();
387 addUnwrappedLine();
388 break;
389 case tok::l_brace:
390 if (InRequiresExpression) {
391 FormatTok->setFinalizedType(TT_CompoundRequirementLBrace);
392 } else if (FormatTok->Previous &&
393 FormatTok->Previous->ClosesRequiresClause) {
394 // We need the 'default' case here to correctly parse a function
395 // l_brace.
396 ParseDefault();
397 continue;
398 }
399 if (!InRequiresExpression && FormatTok->isNot(TT_MacroBlockBegin)) {
400 if (tryToParseBracedList())
401 continue;
402 FormatTok->setFinalizedType(TT_BlockLBrace);
403 }
404 parseBlock();
405 ++StatementCount;
406 assert(StatementCount > 0 && "StatementCount overflow!");
407 addUnwrappedLine();
408 break;
409 case tok::r_brace:
410 if (OpeningBrace) {
411 if (!Style.RemoveBracesLLVM || Line->InPPDirective ||
412 !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) {
413 return false;
414 }
415 if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel ||
416 HasDoWhile || IsPrecededByCommentOrPPDirective ||
417 precededByCommentOrPPDirective()) {
418 return false;
419 }
420 const FormatToken *Next = Tokens->peekNextToken();
421 if (Next->is(tok::comment) && Next->NewlinesBefore == 0)
422 return false;
423 if (IfLeftBrace)
424 *IfLeftBrace = IfLBrace;
425 return true;
426 }
427 nextToken();
428 addUnwrappedLine();
429 break;
430 case tok::kw_default: {
431 unsigned StoredPosition = Tokens->getPosition();
432 auto *Next = Tokens->getNextNonComment();
433 FormatTok = Tokens->setPosition(StoredPosition);
434 if (!Next->isOneOf(tok::colon, tok::arrow)) {
435 // default not followed by `:` or `->` is not a case label; treat it
436 // like an identifier.
437 parseStructuralElement();
438 break;
439 }
440 // Else, if it is 'default:', fall through to the case handling.
441 [[fallthrough]];
442 }
443 case tok::kw_case:
444 if (Style.Language == FormatStyle::LK_Proto || Style.isVerilog() ||
445 (Style.isJavaScript() && Line->MustBeDeclaration)) {
446 // Proto: there are no switch/case statements
447 // Verilog: Case labels don't have this word. We handle case
448 // labels including default in TokenAnnotator.
449 // JavaScript: A 'case: string' style field declaration.
450 ParseDefault();
451 break;
452 }
453 if (!SwitchLabelEncountered &&
454 (Style.IndentCaseLabels ||
455 (OpeningBrace && OpeningBrace->is(TT_SwitchExpressionLBrace)) ||
456 (Line->InPPDirective && Line->Level == 1))) {
457 ++Line->Level;
458 }
459 SwitchLabelEncountered = true;
460 parseStructuralElement();
461 break;
462 case tok::l_square:
463 if (Style.isCSharp()) {
464 nextToken();
465 parseCSharpAttribute();
466 break;
467 }
468 if (handleCppAttributes())
469 break;
470 [[fallthrough]];
471 default:
472 ParseDefault();
473 break;
474 }
475 } while (!eof());
476
477 return false;
478}
479
480void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
481 // We'll parse forward through the tokens until we hit
482 // a closing brace or eof - note that getNextToken() will
483 // parse macros, so this will magically work inside macro
484 // definitions, too.
485 unsigned StoredPosition = Tokens->getPosition();
486 FormatToken *Tok = FormatTok;
487 const FormatToken *PrevTok = Tok->Previous;
488 // Keep a stack of positions of lbrace tokens. We will
489 // update information about whether an lbrace starts a
490 // braced init list or a different block during the loop.
491 struct StackEntry {
492 FormatToken *Tok;
493 const FormatToken *PrevTok;
494 };
495 SmallVector<StackEntry, 8> LBraceStack;
496 assert(Tok->is(tok::l_brace));
497
498 do {
499 auto *NextTok = Tokens->getNextNonComment();
500
501 if (!Line->InMacroBody && !Style.isTableGen()) {
502 // Skip PPDirective lines (except macro definitions) and comments.
503 while (NextTok->is(tok::hash)) {
504 NextTok = Tokens->getNextToken();
505 if (NextTok->isOneOf(tok::pp_not_keyword, tok::pp_define))
506 break;
507 do {
508 NextTok = Tokens->getNextToken();
509 } while (!NextTok->HasUnescapedNewline && NextTok->isNot(tok::eof));
510
511 while (NextTok->is(tok::comment))
512 NextTok = Tokens->getNextToken();
513 }
514 }
515
516 switch (Tok->Tok.getKind()) {
517 case tok::l_brace:
518 if (Style.isJavaScript() && PrevTok) {
519 if (PrevTok->isOneOf(tok::colon, tok::less)) {
520 // A ':' indicates this code is in a type, or a braced list
521 // following a label in an object literal ({a: {b: 1}}).
522 // A '<' could be an object used in a comparison, but that is nonsense
523 // code (can never return true), so more likely it is a generic type
524 // argument (`X<{a: string; b: number}>`).
525 // The code below could be confused by semicolons between the
526 // individual members in a type member list, which would normally
527 // trigger BK_Block. In both cases, this must be parsed as an inline
528 // braced init.
530 } else if (PrevTok->is(tok::r_paren)) {
531 // `) { }` can only occur in function or method declarations in JS.
532 Tok->setBlockKind(BK_Block);
533 }
534 } else {
535 Tok->setBlockKind(BK_Unknown);
536 }
537 LBraceStack.push_back({Tok, PrevTok});
538 break;
539 case tok::r_brace:
540 if (LBraceStack.empty())
541 break;
542 if (auto *LBrace = LBraceStack.back().Tok; LBrace->is(BK_Unknown)) {
543 bool ProbablyBracedList = false;
544 if (Style.Language == FormatStyle::LK_Proto) {
545 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
546 } else if (LBrace->isNot(TT_EnumLBrace)) {
547 // Using OriginalColumn to distinguish between ObjC methods and
548 // binary operators is a bit hacky.
549 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
550 NextTok->OriginalColumn == 0;
551
552 // Try to detect a braced list. Note that regardless how we mark inner
553 // braces here, we will overwrite the BlockKind later if we parse a
554 // braced list (where all blocks inside are by default braced lists),
555 // or when we explicitly detect blocks (for example while parsing
556 // lambdas).
557
558 // If we already marked the opening brace as braced list, the closing
559 // must also be part of it.
560 ProbablyBracedList = LBrace->is(TT_BracedListLBrace);
561
562 ProbablyBracedList = ProbablyBracedList ||
563 (Style.isJavaScript() &&
564 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
565 Keywords.kw_as));
566 ProbablyBracedList =
567 ProbablyBracedList ||
568 (IsCpp && (PrevTok->Tok.isLiteral() ||
569 NextTok->isOneOf(tok::l_paren, tok::arrow)));
570
571 // If there is a comma, semicolon or right paren after the closing
572 // brace, we assume this is a braced initializer list.
573 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
574 // braced list in JS.
575 ProbablyBracedList =
576 ProbablyBracedList ||
577 NextTok->isOneOf(tok::comma, tok::period, tok::colon,
578 tok::r_paren, tok::r_square, tok::ellipsis);
579
580 // Distinguish between braced list in a constructor initializer list
581 // followed by constructor body, or just adjacent blocks.
582 ProbablyBracedList =
583 ProbablyBracedList ||
584 (NextTok->is(tok::l_brace) && LBraceStack.back().PrevTok &&
585 LBraceStack.back().PrevTok->isOneOf(tok::identifier,
586 tok::greater));
587
588 ProbablyBracedList =
589 ProbablyBracedList ||
590 (NextTok->is(tok::identifier) &&
591 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace));
592
593 ProbablyBracedList = ProbablyBracedList ||
594 (NextTok->is(tok::semi) &&
595 (!ExpectClassBody || LBraceStack.size() != 1));
596
597 ProbablyBracedList =
598 ProbablyBracedList ||
599 (NextTok->isBinaryOperator() && !NextIsObjCMethod);
600
601 if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
602 // We can have an array subscript after a braced init
603 // list, but C++11 attributes are expected after blocks.
604 NextTok = Tokens->getNextToken();
605 ProbablyBracedList = NextTok->isNot(tok::l_square);
606 }
607
608 // Cpp macro definition body that is a nonempty braced list or block:
609 if (IsCpp && Line->InMacroBody && PrevTok != FormatTok &&
610 !FormatTok->Previous && NextTok->is(tok::eof) &&
611 // A statement can end with only `;` (simple statement), a block
612 // closing brace (compound statement), or `:` (label statement).
613 // If PrevTok is a block opening brace, Tok ends an empty block.
614 !PrevTok->isOneOf(tok::semi, BK_Block, tok::colon)) {
615 ProbablyBracedList = true;
616 }
617 }
618 const auto BlockKind = ProbablyBracedList ? BK_BracedInit : BK_Block;
619 Tok->setBlockKind(BlockKind);
620 LBrace->setBlockKind(BlockKind);
621 }
622 LBraceStack.pop_back();
623 break;
624 case tok::identifier:
625 if (Tok->isNot(TT_StatementMacro))
626 break;
627 [[fallthrough]];
628 case tok::at:
629 case tok::semi:
630 case tok::kw_if:
631 case tok::kw_while:
632 case tok::kw_for:
633 case tok::kw_switch:
634 case tok::kw_try:
635 case tok::kw___try:
636 if (!LBraceStack.empty() && LBraceStack.back().Tok->is(BK_Unknown))
637 LBraceStack.back().Tok->setBlockKind(BK_Block);
638 break;
639 default:
640 break;
641 }
642
643 PrevTok = Tok;
644 Tok = NextTok;
645 } while (Tok->isNot(tok::eof) && !LBraceStack.empty());
646
647 // Assume other blocks for all unclosed opening braces.
648 for (const auto &Entry : LBraceStack)
649 if (Entry.Tok->is(BK_Unknown))
650 Entry.Tok->setBlockKind(BK_Block);
651
652 FormatTok = Tokens->setPosition(StoredPosition);
653}
654
655// Sets the token type of the directly previous right brace.
656void UnwrappedLineParser::setPreviousRBraceType(TokenType Type) {
657 if (auto Prev = FormatTok->getPreviousNonComment();
658 Prev && Prev->is(tok::r_brace)) {
659 Prev->setFinalizedType(Type);
660 }
661}
662
663template <class T>
664static inline void hash_combine(std::size_t &seed, const T &v) {
665 std::hash<T> hasher;
666 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
667}
668
669size_t UnwrappedLineParser::computePPHash() const {
670 size_t h = 0;
671 for (const auto &i : PPStack) {
672 hash_combine(h, size_t(i.Kind));
673 hash_combine(h, i.Line);
674 }
675 return h;
676}
677
678// Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace
679// is not null, subtracts its length (plus the preceding space) when computing
680// the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before
681// running the token annotator on it so that we can restore them afterward.
682bool UnwrappedLineParser::mightFitOnOneLine(
683 UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const {
684 const auto ColumnLimit = Style.ColumnLimit;
685 if (ColumnLimit == 0)
686 return true;
687
688 auto &Tokens = ParsedLine.Tokens;
689 assert(!Tokens.empty());
690
691 const auto *LastToken = Tokens.back().Tok;
692 assert(LastToken);
693
694 SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size());
695
696 int Index = 0;
697 for (const auto &Token : Tokens) {
698 assert(Token.Tok);
699 auto &SavedToken = SavedTokens[Index++];
700 SavedToken.Tok = new FormatToken;
701 SavedToken.Tok->copyFrom(*Token.Tok);
702 SavedToken.Children = std::move(Token.Children);
703 }
704
705 AnnotatedLine Line(ParsedLine);
706 assert(Line.Last == LastToken);
707
708 TokenAnnotator Annotator(Style, Keywords);
709 Annotator.annotate(Line);
710 Annotator.calculateFormattingInformation(Line);
711
712 auto Length = LastToken->TotalLength;
713 if (OpeningBrace) {
714 assert(OpeningBrace != Tokens.front().Tok);
715 if (auto Prev = OpeningBrace->Previous;
716 Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) {
717 Length -= ColumnLimit;
718 }
719 Length -= OpeningBrace->TokenText.size() + 1;
720 }
721
722 if (const auto *FirstToken = Line.First; FirstToken->is(tok::r_brace)) {
723 assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace));
724 Length -= FirstToken->TokenText.size() + 1;
725 }
726
727 Index = 0;
728 for (auto &Token : Tokens) {
729 const auto &SavedToken = SavedTokens[Index++];
730 Token.Tok->copyFrom(*SavedToken.Tok);
731 Token.Children = std::move(SavedToken.Children);
732 delete SavedToken.Tok;
733 }
734
735 // If these change PPLevel needs to be used for get correct indentation.
736 assert(!Line.InMacroBody);
737 assert(!Line.InPPDirective);
738 return Line.Level * Style.IndentWidth + Length <= ColumnLimit;
739}
740
741FormatToken *UnwrappedLineParser::parseBlock(bool MustBeDeclaration,
742 unsigned AddLevels, bool MunchSemi,
743 bool KeepBraces,
744 IfStmtKind *IfKind,
745 bool UnindentWhitesmithsBraces) {
746 auto HandleVerilogBlockLabel = [this]() {
747 // ":" name
748 if (Style.isVerilog() && FormatTok->is(tok::colon)) {
749 nextToken();
750 if (Keywords.isVerilogIdentifier(*FormatTok))
751 nextToken();
752 }
753 };
754
755 // Whether this is a Verilog-specific block that has a special header like a
756 // module.
757 const bool VerilogHierarchy =
758 Style.isVerilog() && Keywords.isVerilogHierarchy(*FormatTok);
759 assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) ||
760 (Style.isVerilog() &&
761 (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) &&
762 "'{' or macro block token expected");
763 FormatToken *Tok = FormatTok;
764 const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment);
765 auto Index = CurrentLines->size();
766 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
767 FormatTok->setBlockKind(BK_Block);
768
769 // For Whitesmiths mode, jump to the next level prior to skipping over the
770 // braces.
771 if (!VerilogHierarchy && AddLevels > 0 &&
773 ++Line->Level;
774 }
775
776 size_t PPStartHash = computePPHash();
777
778 const unsigned InitialLevel = Line->Level;
779 if (VerilogHierarchy) {
780 AddLevels += parseVerilogHierarchyHeader();
781 } else {
782 nextToken(/*LevelDifference=*/AddLevels);
783 HandleVerilogBlockLabel();
784 }
785
786 // Bail out if there are too many levels. Otherwise, the stack might overflow.
787 if (Line->Level > 300)
788 return nullptr;
789
790 if (MacroBlock && FormatTok->is(tok::l_paren))
791 parseParens();
792
793 size_t NbPreprocessorDirectives =
794 !parsingPPDirective() ? PreprocessorDirectives.size() : 0;
795 addUnwrappedLine();
796 size_t OpeningLineIndex =
797 CurrentLines->empty()
799 : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
800
801 // Whitesmiths is weird here. The brace needs to be indented for the namespace
802 // block, but the block itself may not be indented depending on the style
803 // settings. This allows the format to back up one level in those cases.
804 if (UnindentWhitesmithsBraces)
805 --Line->Level;
806
807 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
808 MustBeDeclaration);
809 if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
810 Line->Level += AddLevels;
811
812 FormatToken *IfLBrace = nullptr;
813 const bool SimpleBlock = parseLevel(Tok, IfKind, &IfLBrace);
814
815 if (eof())
816 return IfLBrace;
817
818 if (MacroBlock ? FormatTok->isNot(TT_MacroBlockEnd)
819 : FormatTok->isNot(tok::r_brace)) {
820 Line->Level = InitialLevel;
821 FormatTok->setBlockKind(BK_Block);
822 return IfLBrace;
823 }
824
825 if (FormatTok->is(tok::r_brace)) {
826 FormatTok->setBlockKind(BK_Block);
827 if (Tok->is(TT_NamespaceLBrace))
828 FormatTok->setFinalizedType(TT_NamespaceRBrace);
829 }
830
831 const bool IsFunctionRBrace =
832 FormatTok->is(tok::r_brace) && Tok->is(TT_FunctionLBrace);
833
834 auto RemoveBraces = [=]() mutable {
835 if (!SimpleBlock)
836 return false;
837 assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace));
838 assert(FormatTok->is(tok::r_brace));
839 const bool WrappedOpeningBrace = !Tok->Previous;
840 if (WrappedOpeningBrace && FollowedByComment)
841 return false;
842 const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional;
843 if (KeepBraces && !HasRequiredIfBraces)
844 return false;
845 if (Tok->isNot(TT_ElseLBrace) || !HasRequiredIfBraces) {
846 const FormatToken *Previous = Tokens->getPreviousToken();
847 assert(Previous);
848 if (Previous->is(tok::r_brace) && !Previous->Optional)
849 return false;
850 }
851 assert(!CurrentLines->empty());
852 auto &LastLine = CurrentLines->back();
853 if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(LastLine))
854 return false;
855 if (Tok->is(TT_ElseLBrace))
856 return true;
857 if (WrappedOpeningBrace) {
858 assert(Index > 0);
859 --Index; // The line above the wrapped l_brace.
860 Tok = nullptr;
861 }
862 return mightFitOnOneLine((*CurrentLines)[Index], Tok);
863 };
864 if (RemoveBraces()) {
865 Tok->MatchingParen = FormatTok;
866 FormatTok->MatchingParen = Tok;
867 }
868
869 size_t PPEndHash = computePPHash();
870
871 // Munch the closing brace.
872 nextToken(/*LevelDifference=*/-AddLevels);
873
874 // When this is a function block and there is an unnecessary semicolon
875 // afterwards then mark it as optional (so the RemoveSemi pass can get rid of
876 // it later).
877 if (Style.RemoveSemicolon && IsFunctionRBrace) {
878 while (FormatTok->is(tok::semi)) {
879 FormatTok->Optional = true;
880 nextToken();
881 }
882 }
883
884 HandleVerilogBlockLabel();
885
886 if (MacroBlock && FormatTok->is(tok::l_paren))
887 parseParens();
888
889 Line->Level = InitialLevel;
890
891 if (FormatTok->is(tok::kw_noexcept)) {
892 // A noexcept in a requires expression.
893 nextToken();
894 }
895
896 if (FormatTok->is(tok::arrow)) {
897 // Following the } or noexcept we can find a trailing return type arrow
898 // as part of an implicit conversion constraint.
899 nextToken();
900 parseStructuralElement();
901 }
902
903 if (MunchSemi && FormatTok->is(tok::semi))
904 nextToken();
905
906 if (PPStartHash == PPEndHash) {
907 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
908 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
909 // Update the opening line to add the forward reference as well
910 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
911 CurrentLines->size() - 1;
912 }
913 }
914
915 return IfLBrace;
916}
917
918static bool isGoogScope(const UnwrappedLine &Line) {
919 // FIXME: Closure-library specific stuff should not be hard-coded but be
920 // configurable.
921 if (Line.Tokens.size() < 4)
922 return false;
923 auto I = Line.Tokens.begin();
924 if (I->Tok->TokenText != "goog")
925 return false;
926 ++I;
927 if (I->Tok->isNot(tok::period))
928 return false;
929 ++I;
930 if (I->Tok->TokenText != "scope")
931 return false;
932 ++I;
933 return I->Tok->is(tok::l_paren);
934}
935
936static bool isIIFE(const UnwrappedLine &Line,
937 const AdditionalKeywords &Keywords) {
938 // Look for the start of an immediately invoked anonymous function.
939 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
940 // This is commonly done in JavaScript to create a new, anonymous scope.
941 // Example: (function() { ... })()
942 if (Line.Tokens.size() < 3)
943 return false;
944 auto I = Line.Tokens.begin();
945 if (I->Tok->isNot(tok::l_paren))
946 return false;
947 ++I;
948 if (I->Tok->isNot(Keywords.kw_function))
949 return false;
950 ++I;
951 return I->Tok->is(tok::l_paren);
952}
953
954static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
955 const FormatToken &InitialToken) {
956 tok::TokenKind Kind = InitialToken.Tok.getKind();
957 if (InitialToken.is(TT_NamespaceMacro))
958 Kind = tok::kw_namespace;
959
960 switch (Kind) {
961 case tok::kw_namespace:
962 return Style.BraceWrapping.AfterNamespace;
963 case tok::kw_class:
964 return Style.BraceWrapping.AfterClass;
965 case tok::kw_union:
966 return Style.BraceWrapping.AfterUnion;
967 case tok::kw_struct:
968 return Style.BraceWrapping.AfterStruct;
969 case tok::kw_enum:
970 return Style.BraceWrapping.AfterEnum;
971 default:
972 return false;
973 }
974}
975
976void UnwrappedLineParser::parseChildBlock() {
977 assert(FormatTok->is(tok::l_brace));
978 FormatTok->setBlockKind(BK_Block);
979 const FormatToken *OpeningBrace = FormatTok;
980 nextToken();
981 {
982 bool SkipIndent = (Style.isJavaScript() &&
983 (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
984 ScopedLineState LineState(*this);
985 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
986 /*MustBeDeclaration=*/false);
987 Line->Level += SkipIndent ? 0 : 1;
988 parseLevel(OpeningBrace);
989 flushComments(isOnNewLine(*FormatTok));
990 Line->Level -= SkipIndent ? 0 : 1;
991 }
992 nextToken();
993}
994
995void UnwrappedLineParser::parsePPDirective() {
996 assert(FormatTok->is(tok::hash) && "'#' expected");
997 ScopedMacroState MacroState(*Line, Tokens, FormatTok);
998
999 nextToken();
1000
1001 if (!FormatTok->Tok.getIdentifierInfo()) {
1002 parsePPUnknown();
1003 return;
1004 }
1005
1006 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
1007 case tok::pp_define:
1008 parsePPDefine();
1009 return;
1010 case tok::pp_if:
1011 parsePPIf(/*IfDef=*/false);
1012 break;
1013 case tok::pp_ifdef:
1014 case tok::pp_ifndef:
1015 parsePPIf(/*IfDef=*/true);
1016 break;
1017 case tok::pp_else:
1018 case tok::pp_elifdef:
1019 case tok::pp_elifndef:
1020 case tok::pp_elif:
1021 parsePPElse();
1022 break;
1023 case tok::pp_endif:
1024 parsePPEndIf();
1025 break;
1026 case tok::pp_pragma:
1027 parsePPPragma();
1028 break;
1029 case tok::pp_error:
1030 case tok::pp_warning:
1031 nextToken();
1032 if (!eof() && Style.isCpp())
1033 FormatTok->setFinalizedType(TT_AfterPPDirective);
1034 [[fallthrough]];
1035 default:
1036 parsePPUnknown();
1037 break;
1038 }
1039}
1040
1041void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
1042 size_t Line = CurrentLines->size();
1043 if (CurrentLines == &PreprocessorDirectives)
1044 Line += Lines.size();
1045
1046 if (Unreachable ||
1047 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) {
1048 PPStack.push_back({PP_Unreachable, Line});
1049 } else {
1050 PPStack.push_back({PP_Conditional, Line});
1051 }
1052}
1053
1054void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
1055 ++PPBranchLevel;
1056 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
1057 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
1058 PPLevelBranchIndex.push_back(0);
1059 PPLevelBranchCount.push_back(0);
1060 }
1061 PPChainBranchIndex.push(Unreachable ? -1 : 0);
1062 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
1063 conditionalCompilationCondition(Unreachable || Skip);
1064}
1065
1066void UnwrappedLineParser::conditionalCompilationAlternative() {
1067 if (!PPStack.empty())
1068 PPStack.pop_back();
1069 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1070 if (!PPChainBranchIndex.empty())
1071 ++PPChainBranchIndex.top();
1072 conditionalCompilationCondition(
1073 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
1074 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
1075}
1076
1077void UnwrappedLineParser::conditionalCompilationEnd() {
1078 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1079 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1080 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1081 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1082 }
1083 // Guard against #endif's without #if.
1084 if (PPBranchLevel > -1)
1085 --PPBranchLevel;
1086 if (!PPChainBranchIndex.empty())
1087 PPChainBranchIndex.pop();
1088 if (!PPStack.empty())
1089 PPStack.pop_back();
1090}
1091
1092void UnwrappedLineParser::parsePPIf(bool IfDef) {
1093 bool IfNDef = FormatTok->is(tok::pp_ifndef);
1094 nextToken();
1095 bool Unreachable = false;
1096 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
1097 Unreachable = true;
1098 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1099 Unreachable = true;
1100 conditionalCompilationStart(Unreachable);
1101 FormatToken *IfCondition = FormatTok;
1102 // If there's a #ifndef on the first line, and the only lines before it are
1103 // comments, it could be an include guard.
1104 bool MaybeIncludeGuard = IfNDef;
1105 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1106 for (auto &Line : Lines) {
1107 if (Line.Tokens.front().Tok->isNot(tok::comment)) {
1108 MaybeIncludeGuard = false;
1109 IncludeGuard = IG_Rejected;
1110 break;
1111 }
1112 }
1113 }
1114 --PPBranchLevel;
1115 parsePPUnknown();
1116 ++PPBranchLevel;
1117 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1118 IncludeGuard = IG_IfNdefed;
1119 IncludeGuardToken = IfCondition;
1120 }
1121}
1122
1123void UnwrappedLineParser::parsePPElse() {
1124 // If a potential include guard has an #else, it's not an include guard.
1125 if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1126 IncludeGuard = IG_Rejected;
1127 // Don't crash when there is an #else without an #if.
1128 assert(PPBranchLevel >= -1);
1129 if (PPBranchLevel == -1)
1130 conditionalCompilationStart(/*Unreachable=*/true);
1131 conditionalCompilationAlternative();
1132 --PPBranchLevel;
1133 parsePPUnknown();
1134 ++PPBranchLevel;
1135}
1136
1137void UnwrappedLineParser::parsePPEndIf() {
1138 conditionalCompilationEnd();
1139 parsePPUnknown();
1140 // If the #endif of a potential include guard is the last thing in the file,
1141 // then we found an include guard.
1142 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1144 IncludeGuard = IG_Found;
1145 }
1146}
1147
1148void UnwrappedLineParser::parsePPDefine() {
1149 nextToken();
1150
1151 if (!FormatTok->Tok.getIdentifierInfo()) {
1152 IncludeGuard = IG_Rejected;
1153 IncludeGuardToken = nullptr;
1154 parsePPUnknown();
1155 return;
1156 }
1157
1158 bool MaybeIncludeGuard = false;
1159 if (IncludeGuard == IG_IfNdefed &&
1160 IncludeGuardToken->TokenText == FormatTok->TokenText) {
1161 IncludeGuard = IG_Defined;
1162 IncludeGuardToken = nullptr;
1163 for (auto &Line : Lines) {
1164 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
1165 IncludeGuard = IG_Rejected;
1166 break;
1167 }
1168 }
1169 MaybeIncludeGuard = IncludeGuard == IG_Defined;
1170 }
1171
1172 // In the context of a define, even keywords should be treated as normal
1173 // identifiers. Setting the kind to identifier is not enough, because we need
1174 // to treat additional keywords like __except as well, which are already
1175 // identifiers. Setting the identifier info to null interferes with include
1176 // guard processing above, and changes preprocessing nesting.
1177 FormatTok->Tok.setKind(tok::identifier);
1179 nextToken();
1180
1181 // IncludeGuard can't have a non-empty macro definition.
1182 if (MaybeIncludeGuard && !eof())
1183 IncludeGuard = IG_Rejected;
1184
1185 if (FormatTok->is(tok::l_paren) && !FormatTok->hasWhitespaceBefore())
1186 parseParens();
1188 Line->Level += PPBranchLevel + 1;
1189 addUnwrappedLine();
1190 ++Line->Level;
1191
1192 Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1);
1193 assert((int)Line->PPLevel >= 0);
1194
1195 if (eof())
1196 return;
1197
1198 Line->InMacroBody = true;
1199
1200 if (!Style.SkipMacroDefinitionBody) {
1201 // Errors during a preprocessor directive can only affect the layout of the
1202 // preprocessor directive, and thus we ignore them. An alternative approach
1203 // would be to use the same approach we use on the file level (no
1204 // re-indentation if there was a structural error) within the macro
1205 // definition.
1206 parseFile();
1207 return;
1208 }
1209
1210 for (auto *Comment : CommentsBeforeNextToken)
1211 Comment->Finalized = true;
1212
1213 do {
1214 FormatTok->Finalized = true;
1215 FormatTok = Tokens->getNextToken();
1216 } while (!eof());
1217
1218 addUnwrappedLine();
1219}
1220
1221void UnwrappedLineParser::parsePPPragma() {
1222 Line->InPragmaDirective = true;
1223 parsePPUnknown();
1224}
1225
1226void UnwrappedLineParser::parsePPUnknown() {
1227 while (!eof())
1228 nextToken();
1230 Line->Level += PPBranchLevel + 1;
1231 addUnwrappedLine();
1232}
1233
1234// Here we exclude certain tokens that are not usually the first token in an
1235// unwrapped line. This is used in attempt to distinguish macro calls without
1236// trailing semicolons from other constructs split to several lines.
1237static bool tokenCanStartNewLine(const FormatToken &Tok) {
1238 // Semicolon can be a null-statement, l_square can be a start of a macro or
1239 // a C++11 attribute, but this doesn't seem to be common.
1240 return !Tok.isOneOf(tok::semi, tok::l_brace,
1241 // Tokens that can only be used as binary operators and a
1242 // part of overloaded operator names.
1243 tok::period, tok::periodstar, tok::arrow, tok::arrowstar,
1244 tok::less, tok::greater, tok::slash, tok::percent,
1245 tok::lessless, tok::greatergreater, tok::equal,
1246 tok::plusequal, tok::minusequal, tok::starequal,
1247 tok::slashequal, tok::percentequal, tok::ampequal,
1248 tok::pipeequal, tok::caretequal, tok::greatergreaterequal,
1249 tok::lesslessequal,
1250 // Colon is used in labels, base class lists, initializer
1251 // lists, range-based for loops, ternary operator, but
1252 // should never be the first token in an unwrapped line.
1253 tok::colon,
1254 // 'noexcept' is a trailing annotation.
1255 tok::kw_noexcept);
1256}
1257
1258static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1259 const FormatToken *FormatTok) {
1260 // FIXME: This returns true for C/C++ keywords like 'struct'.
1261 return FormatTok->is(tok::identifier) &&
1262 (!FormatTok->Tok.getIdentifierInfo() ||
1263 !FormatTok->isOneOf(
1264 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1265 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1266 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1267 Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1268 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1269 Keywords.kw_instanceof, Keywords.kw_interface,
1270 Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1271}
1272
1273static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1274 const FormatToken *FormatTok) {
1275 return FormatTok->Tok.isLiteral() ||
1276 FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1277 mustBeJSIdent(Keywords, FormatTok);
1278}
1279
1280// isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1281// when encountered after a value (see mustBeJSIdentOrValue).
1282static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1283 const FormatToken *FormatTok) {
1284 return FormatTok->isOneOf(
1285 tok::kw_return, Keywords.kw_yield,
1286 // conditionals
1287 tok::kw_if, tok::kw_else,
1288 // loops
1289 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1290 // switch/case
1291 tok::kw_switch, tok::kw_case,
1292 // exceptions
1293 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1294 // declaration
1295 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1296 Keywords.kw_async, Keywords.kw_function,
1297 // import/export
1298 Keywords.kw_import, tok::kw_export);
1299}
1300
1301// Checks whether a token is a type in K&R C (aka C78).
1302static bool isC78Type(const FormatToken &Tok) {
1303 return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1304 tok::kw_unsigned, tok::kw_float, tok::kw_double,
1305 tok::identifier);
1306}
1307
1308// This function checks whether a token starts the first parameter declaration
1309// in a K&R C (aka C78) function definition, e.g.:
1310// int f(a, b)
1311// short a, b;
1312// {
1313// return a + b;
1314// }
1315static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1316 const FormatToken *FuncName) {
1317 assert(Tok);
1318 assert(Next);
1319 assert(FuncName);
1320
1321 if (FuncName->isNot(tok::identifier))
1322 return false;
1323
1324 const FormatToken *Prev = FuncName->Previous;
1325 if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1326 return false;
1327
1328 if (!isC78Type(*Tok) &&
1329 !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) {
1330 return false;
1331 }
1332
1333 if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1334 return false;
1335
1336 Tok = Tok->Previous;
1337 if (!Tok || Tok->isNot(tok::r_paren))
1338 return false;
1339
1340 Tok = Tok->Previous;
1341 if (!Tok || Tok->isNot(tok::identifier))
1342 return false;
1343
1344 return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1345}
1346
1347bool UnwrappedLineParser::parseModuleImport() {
1348 assert(FormatTok->is(Keywords.kw_import) && "'import' expected");
1349
1350 if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true);
1351 !Token->Tok.getIdentifierInfo() &&
1352 !Token->isOneOf(tok::colon, tok::less, tok::string_literal)) {
1353 return false;
1354 }
1355
1356 nextToken();
1357 while (!eof()) {
1358 if (FormatTok->is(tok::colon)) {
1359 FormatTok->setFinalizedType(TT_ModulePartitionColon);
1360 }
1361 // Handle import <foo/bar.h> as we would an include statement.
1362 else if (FormatTok->is(tok::less)) {
1363 nextToken();
1364 while (!FormatTok->isOneOf(tok::semi, tok::greater) && !eof()) {
1365 // Mark tokens up to the trailing line comments as implicit string
1366 // literals.
1367 if (FormatTok->isNot(tok::comment) &&
1368 !FormatTok->TokenText.starts_with("//")) {
1369 FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
1370 }
1371 nextToken();
1372 }
1373 }
1374 if (FormatTok->is(tok::semi)) {
1375 nextToken();
1376 break;
1377 }
1378 nextToken();
1379 }
1380
1381 addUnwrappedLine();
1382 return true;
1383}
1384
1385// readTokenWithJavaScriptASI reads the next token and terminates the current
1386// line if JavaScript Automatic Semicolon Insertion must
1387// happen between the current token and the next token.
1388//
1389// This method is conservative - it cannot cover all edge cases of JavaScript,
1390// but only aims to correctly handle certain well known cases. It *must not*
1391// return true in speculative cases.
1392void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1393 FormatToken *Previous = FormatTok;
1394 readToken();
1395 FormatToken *Next = FormatTok;
1396
1397 bool IsOnSameLine =
1398 CommentsBeforeNextToken.empty()
1399 ? Next->NewlinesBefore == 0
1400 : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1401 if (IsOnSameLine)
1402 return;
1403
1404 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1405 bool PreviousStartsTemplateExpr =
1406 Previous->is(TT_TemplateString) && Previous->TokenText.ends_with("${");
1407 if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1408 // If the line contains an '@' sign, the previous token might be an
1409 // annotation, which can precede another identifier/value.
1410 bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1411 return LineNode.Tok->is(tok::at);
1412 });
1413 if (HasAt)
1414 return;
1415 }
1416 if (Next->is(tok::exclaim) && PreviousMustBeValue)
1417 return addUnwrappedLine();
1418 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1419 bool NextEndsTemplateExpr =
1420 Next->is(TT_TemplateString) && Next->TokenText.starts_with("}");
1421 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1422 (PreviousMustBeValue ||
1423 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1424 tok::minusminus))) {
1425 return addUnwrappedLine();
1426 }
1427 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1428 isJSDeclOrStmt(Keywords, Next)) {
1429 return addUnwrappedLine();
1430 }
1431}
1432
1433void UnwrappedLineParser::parseStructuralElement(
1434 const FormatToken *OpeningBrace, IfStmtKind *IfKind,
1435 FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) {
1436 if (Style.isTableGen() && FormatTok->is(tok::pp_include)) {
1437 nextToken();
1438 if (FormatTok->is(tok::string_literal))
1439 nextToken();
1440 addUnwrappedLine();
1441 return;
1442 }
1443
1444 if (IsCpp) {
1445 while (FormatTok->is(tok::l_square) && handleCppAttributes()) {
1446 }
1447 } else if (Style.isVerilog()) {
1448 if (Keywords.isVerilogStructuredProcedure(*FormatTok)) {
1449 parseForOrWhileLoop(/*HasParens=*/false);
1450 return;
1451 }
1452 if (FormatTok->isOneOf(Keywords.kw_foreach, Keywords.kw_repeat)) {
1453 parseForOrWhileLoop();
1454 return;
1455 }
1456 if (FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
1457 Keywords.kw_assume, Keywords.kw_cover)) {
1458 parseIfThenElse(IfKind, /*KeepBraces=*/false, /*IsVerilogAssert=*/true);
1459 return;
1460 }
1461
1462 // Skip things that can exist before keywords like 'if' and 'case'.
1463 while (true) {
1464 if (FormatTok->isOneOf(Keywords.kw_priority, Keywords.kw_unique,
1465 Keywords.kw_unique0)) {
1466 nextToken();
1467 } else if (FormatTok->is(tok::l_paren) &&
1468 Tokens->peekNextToken()->is(tok::star)) {
1469 parseParens();
1470 } else {
1471 break;
1472 }
1473 }
1474 }
1475
1476 // Tokens that only make sense at the beginning of a line.
1477 if (FormatTok->isAccessSpecifierKeyword()) {
1478 if (Style.isJava() || Style.isJavaScript() || Style.isCSharp())
1479 nextToken();
1480 else
1481 parseAccessSpecifier();
1482 return;
1483 }
1484 switch (FormatTok->Tok.getKind()) {
1485 case tok::kw_asm:
1486 nextToken();
1487 if (FormatTok->is(tok::l_brace)) {
1488 FormatTok->setFinalizedType(TT_InlineASMBrace);
1489 nextToken();
1490 while (FormatTok && !eof()) {
1491 if (FormatTok->is(tok::r_brace)) {
1492 FormatTok->setFinalizedType(TT_InlineASMBrace);
1493 nextToken();
1494 addUnwrappedLine();
1495 break;
1496 }
1497 FormatTok->Finalized = true;
1498 nextToken();
1499 }
1500 }
1501 break;
1502 case tok::kw_namespace:
1503 parseNamespace();
1504 return;
1505 case tok::kw_if: {
1506 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1507 // field/method declaration.
1508 break;
1509 }
1510 FormatToken *Tok = parseIfThenElse(IfKind);
1511 if (IfLeftBrace)
1512 *IfLeftBrace = Tok;
1513 return;
1514 }
1515 case tok::kw_for:
1516 case tok::kw_while:
1517 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1518 // field/method declaration.
1519 break;
1520 }
1521 parseForOrWhileLoop();
1522 return;
1523 case tok::kw_do:
1524 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1525 // field/method declaration.
1526 break;
1527 }
1528 parseDoWhile();
1529 if (HasDoWhile)
1530 *HasDoWhile = true;
1531 return;
1532 case tok::kw_switch:
1533 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1534 // 'switch: string' field declaration.
1535 break;
1536 }
1537 parseSwitch(/*IsExpr=*/false);
1538 return;
1539 case tok::kw_default: {
1540 // In Verilog default along with other labels are handled in the next loop.
1541 if (Style.isVerilog())
1542 break;
1543 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1544 // 'default: string' field declaration.
1545 break;
1546 }
1547 auto *Default = FormatTok;
1548 nextToken();
1549 if (FormatTok->is(tok::colon)) {
1550 FormatTok->setFinalizedType(TT_CaseLabelColon);
1551 parseLabel();
1552 return;
1553 }
1554 if (FormatTok->is(tok::arrow)) {
1555 FormatTok->setFinalizedType(TT_CaseLabelArrow);
1556 Default->setFinalizedType(TT_SwitchExpressionLabel);
1557 parseLabel();
1558 return;
1559 }
1560 // e.g. "default void f() {}" in a Java interface.
1561 break;
1562 }
1563 case tok::kw_case:
1564 // Proto: there are no switch/case statements.
1565 if (Style.Language == FormatStyle::LK_Proto) {
1566 nextToken();
1567 return;
1568 }
1569 if (Style.isVerilog()) {
1570 parseBlock();
1571 addUnwrappedLine();
1572 return;
1573 }
1574 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1575 // 'case: string' field declaration.
1576 nextToken();
1577 break;
1578 }
1579 parseCaseLabel();
1580 return;
1581 case tok::kw_goto:
1582 nextToken();
1583 if (FormatTok->is(tok::kw_case))
1584 nextToken();
1585 break;
1586 case tok::kw_try:
1587 case tok::kw___try:
1588 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1589 // field/method declaration.
1590 break;
1591 }
1592 parseTryCatch();
1593 return;
1594 case tok::kw_extern:
1595 nextToken();
1596 if (Style.isVerilog()) {
1597 // In Verilog and extern module declaration looks like a start of module.
1598 // But there is no body and endmodule. So we handle it separately.
1599 if (Keywords.isVerilogHierarchy(*FormatTok)) {
1600 parseVerilogHierarchyHeader();
1601 return;
1602 }
1603 } else if (FormatTok->is(tok::string_literal)) {
1604 nextToken();
1605 if (FormatTok->is(tok::l_brace)) {
1607 addUnwrappedLine();
1608 // Either we indent or for backwards compatibility we follow the
1609 // AfterExternBlock style.
1610 unsigned AddLevels =
1613 Style.IndentExternBlock ==
1615 ? 1u
1616 : 0u;
1617 parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1618 addUnwrappedLine();
1619 return;
1620 }
1621 }
1622 break;
1623 case tok::kw_export:
1624 if (Style.isJavaScript()) {
1625 parseJavaScriptEs6ImportExport();
1626 return;
1627 }
1628 if (IsCpp) {
1629 nextToken();
1630 if (FormatTok->is(tok::kw_namespace)) {
1631 parseNamespace();
1632 return;
1633 }
1634 if (FormatTok->is(tok::l_brace)) {
1635 parseCppExportBlock();
1636 return;
1637 }
1638 if (FormatTok->is(Keywords.kw_import) && parseModuleImport())
1639 return;
1640 }
1641 break;
1642 case tok::kw_inline:
1643 nextToken();
1644 if (FormatTok->is(tok::kw_namespace)) {
1645 parseNamespace();
1646 return;
1647 }
1648 break;
1649 case tok::identifier:
1650 if (FormatTok->is(TT_ForEachMacro)) {
1651 parseForOrWhileLoop();
1652 return;
1653 }
1654 if (FormatTok->is(TT_MacroBlockBegin)) {
1655 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1656 /*MunchSemi=*/false);
1657 return;
1658 }
1659 if (FormatTok->is(Keywords.kw_import)) {
1660 if (Style.isJavaScript()) {
1661 parseJavaScriptEs6ImportExport();
1662 return;
1663 }
1664 if (Style.Language == FormatStyle::LK_Proto) {
1665 nextToken();
1666 if (FormatTok->is(tok::kw_public))
1667 nextToken();
1668 if (FormatTok->isNot(tok::string_literal))
1669 return;
1670 nextToken();
1671 if (FormatTok->is(tok::semi))
1672 nextToken();
1673 addUnwrappedLine();
1674 return;
1675 }
1676 if (IsCpp && parseModuleImport())
1677 return;
1678 }
1679 if (IsCpp && FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1680 Keywords.kw_slots, Keywords.kw_qslots)) {
1681 nextToken();
1682 if (FormatTok->is(tok::colon)) {
1683 nextToken();
1684 addUnwrappedLine();
1685 return;
1686 }
1687 }
1688 if (IsCpp && FormatTok->is(TT_StatementMacro)) {
1689 parseStatementMacro();
1690 return;
1691 }
1692 if (IsCpp && FormatTok->is(TT_NamespaceMacro)) {
1693 parseNamespace();
1694 return;
1695 }
1696 // In Verilog labels can be any expression, so we don't do them here.
1697 // JS doesn't have macros, and within classes colons indicate fields, not
1698 // labels.
1699 // TableGen doesn't have labels.
1700 if (!Style.isJavaScript() && !Style.isVerilog() && !Style.isTableGen() &&
1701 Tokens->peekNextToken()->is(tok::colon) && !Line->MustBeDeclaration) {
1702 nextToken();
1703 if (!Line->InMacroBody || CurrentLines->size() > 1)
1704 Line->Tokens.begin()->Tok->MustBreakBefore = true;
1705 FormatTok->setFinalizedType(TT_GotoLabelColon);
1706 parseLabel(!Style.IndentGotoLabels);
1707 if (HasLabel)
1708 *HasLabel = true;
1709 return;
1710 }
1711 if (Style.isJava() && FormatTok->is(Keywords.kw_record)) {
1712 parseRecord(/*ParseAsExpr=*/false, /*IsJavaRecord=*/true);
1713 addUnwrappedLine();
1714 return;
1715 }
1716 // In all other cases, parse the declaration.
1717 break;
1718 default:
1719 break;
1720 }
1721
1722 bool SeenEqual = false;
1723 for (const bool InRequiresExpression =
1724 OpeningBrace && OpeningBrace->isOneOf(TT_RequiresExpressionLBrace,
1725 TT_CompoundRequirementLBrace);
1726 !eof();) {
1727 const FormatToken *Previous = FormatTok->Previous;
1728 switch (FormatTok->Tok.getKind()) {
1729 case tok::at:
1730 nextToken();
1731 if (FormatTok->is(tok::l_brace)) {
1732 nextToken();
1733 parseBracedList();
1734 break;
1735 }
1736 if (Style.isJava() && FormatTok->is(Keywords.kw_interface)) {
1737 nextToken();
1738 break;
1739 }
1740 switch (bool IsAutoRelease = false; FormatTok->Tok.getObjCKeywordID()) {
1741 case tok::objc_public:
1742 case tok::objc_protected:
1743 case tok::objc_package:
1744 case tok::objc_private:
1745 return parseAccessSpecifier();
1746 case tok::objc_interface:
1747 case tok::objc_implementation:
1748 return parseObjCInterfaceOrImplementation();
1749 case tok::objc_protocol:
1750 if (parseObjCProtocol())
1751 return;
1752 break;
1753 case tok::objc_end:
1754 return; // Handled by the caller.
1755 case tok::objc_optional:
1756 case tok::objc_required:
1757 nextToken();
1758 addUnwrappedLine();
1759 return;
1760 case tok::objc_autoreleasepool:
1761 IsAutoRelease = true;
1762 [[fallthrough]];
1763 case tok::objc_synchronized:
1764 nextToken();
1765 if (!IsAutoRelease && FormatTok->is(tok::l_paren)) {
1766 // Skip synchronization object
1767 parseParens();
1768 }
1769 if (FormatTok->is(tok::l_brace)) {
1772 addUnwrappedLine();
1773 }
1774 parseBlock();
1775 }
1776 addUnwrappedLine();
1777 return;
1778 case tok::objc_try:
1779 // This branch isn't strictly necessary (the kw_try case below would
1780 // do this too after the tok::at is parsed above). But be explicit.
1781 parseTryCatch();
1782 return;
1783 default:
1784 break;
1785 }
1786 break;
1787 case tok::kw_requires: {
1788 if (IsCpp) {
1789 bool ParsedClause = parseRequires(SeenEqual);
1790 if (ParsedClause)
1791 return;
1792 } else {
1793 nextToken();
1794 }
1795 break;
1796 }
1797 case tok::kw_enum:
1798 // Ignore if this is part of "template <enum ..." or "... -> enum" or
1799 // "template <..., enum ...>".
1800 if (Previous && Previous->isOneOf(tok::less, tok::arrow, tok::comma)) {
1801 nextToken();
1802 break;
1803 }
1804
1805 // parseEnum falls through and does not yet add an unwrapped line as an
1806 // enum definition can start a structural element.
1807 if (!parseEnum())
1808 break;
1809 // This only applies to C++ and Verilog.
1810 if (!IsCpp && !Style.isVerilog()) {
1811 addUnwrappedLine();
1812 return;
1813 }
1814 break;
1815 case tok::kw_typedef:
1816 nextToken();
1817 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1818 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1819 Keywords.kw_CF_CLOSED_ENUM,
1820 Keywords.kw_NS_CLOSED_ENUM)) {
1821 parseEnum();
1822 }
1823 break;
1824 case tok::kw_class:
1825 if (Style.isVerilog()) {
1826 parseBlock();
1827 addUnwrappedLine();
1828 return;
1829 }
1830 if (Style.isTableGen()) {
1831 // Do nothing special. In this case the l_brace becomes FunctionLBrace.
1832 // This is same as def and so on.
1833 nextToken();
1834 break;
1835 }
1836 [[fallthrough]];
1837 case tok::kw_struct:
1838 case tok::kw_union:
1839 if (parseStructLike())
1840 return;
1841 break;
1842 case tok::kw_decltype:
1843 nextToken();
1844 if (FormatTok->is(tok::l_paren)) {
1845 parseParens();
1846 if (FormatTok->Previous &&
1847 FormatTok->Previous->endsSequence(tok::r_paren, tok::kw_auto,
1848 tok::l_paren)) {
1849 Line->SeenDecltypeAuto = true;
1850 }
1851 }
1852 break;
1853 case tok::period:
1854 nextToken();
1855 // In Java, classes have an implicit static member "class".
1856 if (Style.isJava() && FormatTok && FormatTok->is(tok::kw_class))
1857 nextToken();
1858 if (Style.isJavaScript() && FormatTok &&
1859 FormatTok->Tok.getIdentifierInfo()) {
1860 // JavaScript only has pseudo keywords, all keywords are allowed to
1861 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1862 nextToken();
1863 }
1864 break;
1865 case tok::semi:
1866 nextToken();
1867 addUnwrappedLine();
1868 return;
1869 case tok::r_brace:
1870 addUnwrappedLine();
1871 return;
1872 case tok::l_paren: {
1873 parseParens();
1874 // Break the unwrapped line if a K&R C function definition has a parameter
1875 // declaration.
1876 if (OpeningBrace || !IsCpp || !Previous || eof())
1877 break;
1878 if (isC78ParameterDecl(FormatTok,
1879 Tokens->peekNextToken(/*SkipComment=*/true),
1880 Previous)) {
1881 addUnwrappedLine();
1882 return;
1883 }
1884 break;
1885 }
1886 case tok::kw_operator:
1887 nextToken();
1888 if (FormatTok->isBinaryOperator())
1889 nextToken();
1890 break;
1891 case tok::caret: {
1892 const auto *Prev = FormatTok->getPreviousNonComment();
1893 nextToken();
1894 if (Prev && Prev->is(tok::identifier))
1895 break;
1896 // Block return type.
1897 if (FormatTok->Tok.isAnyIdentifier() || FormatTok->isTypeName(LangOpts)) {
1898 nextToken();
1899 // Return types: pointers are ok too.
1900 while (FormatTok->is(tok::star))
1901 nextToken();
1902 }
1903 // Block argument list.
1904 if (FormatTok->is(tok::l_paren))
1905 parseParens();
1906 // Block body.
1907 if (FormatTok->is(tok::l_brace))
1908 parseChildBlock();
1909 break;
1910 }
1911 case tok::l_brace:
1912 if (InRequiresExpression)
1913 FormatTok->setFinalizedType(TT_BracedListLBrace);
1914 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1915 IsDecltypeAutoFunction = Line->SeenDecltypeAuto;
1916 // A block outside of parentheses must be the last part of a
1917 // structural element.
1918 // FIXME: Figure out cases where this is not true, and add projections
1919 // for them (the one we know is missing are lambdas).
1920 if (Style.isJava() &&
1921 Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
1922 // If necessary, we could set the type to something different than
1923 // TT_FunctionLBrace.
1926 addUnwrappedLine();
1927 }
1928 } else if (Style.BraceWrapping.AfterFunction) {
1929 addUnwrappedLine();
1930 }
1931 if (!Previous || Previous->isNot(TT_TypeDeclarationParen))
1932 FormatTok->setFinalizedType(TT_FunctionLBrace);
1933 parseBlock();
1934 IsDecltypeAutoFunction = false;
1935 addUnwrappedLine();
1936 return;
1937 }
1938 // Otherwise this was a braced init list, and the structural
1939 // element continues.
1940 break;
1941 case tok::kw_try:
1942 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1943 // field/method declaration.
1944 nextToken();
1945 break;
1946 }
1947 // We arrive here when parsing function-try blocks.
1948 if (Style.BraceWrapping.AfterFunction)
1949 addUnwrappedLine();
1950 parseTryCatch();
1951 return;
1952 case tok::identifier: {
1953 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1954 Line->MustBeDeclaration) {
1955 addUnwrappedLine();
1956 parseCSharpGenericTypeConstraint();
1957 break;
1958 }
1959 if (FormatTok->is(TT_MacroBlockEnd)) {
1960 addUnwrappedLine();
1961 return;
1962 }
1963
1964 // Function declarations (as opposed to function expressions) are parsed
1965 // on their own unwrapped line by continuing this loop. Function
1966 // expressions (functions that are not on their own line) must not create
1967 // a new unwrapped line, so they are special cased below.
1968 size_t TokenCount = Line->Tokens.size();
1969 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1970 (TokenCount > 1 ||
1971 (TokenCount == 1 &&
1972 Line->Tokens.front().Tok->isNot(Keywords.kw_async)))) {
1973 tryToParseJSFunction();
1974 break;
1975 }
1976 if ((Style.isJavaScript() || Style.isJava()) &&
1977 FormatTok->is(Keywords.kw_interface)) {
1978 if (Style.isJavaScript()) {
1979 // In JavaScript/TypeScript, "interface" can be used as a standalone
1980 // identifier, e.g. in `var interface = 1;`. If "interface" is
1981 // followed by another identifier, it is very like to be an actual
1982 // interface declaration.
1983 unsigned StoredPosition = Tokens->getPosition();
1984 FormatToken *Next = Tokens->getNextToken();
1985 FormatTok = Tokens->setPosition(StoredPosition);
1986 if (!mustBeJSIdent(Keywords, Next)) {
1987 nextToken();
1988 break;
1989 }
1990 }
1991 parseRecord();
1992 addUnwrappedLine();
1993 return;
1994 }
1995
1996 if (Style.isVerilog()) {
1997 if (FormatTok->is(Keywords.kw_table)) {
1998 parseVerilogTable();
1999 return;
2000 }
2001 if (Keywords.isVerilogBegin(*FormatTok) ||
2002 Keywords.isVerilogHierarchy(*FormatTok)) {
2003 parseBlock();
2004 addUnwrappedLine();
2005 return;
2006 }
2007 }
2008
2009 if (!IsCpp && FormatTok->is(Keywords.kw_interface)) {
2010 if (parseStructLike())
2011 return;
2012 break;
2013 }
2014
2015 if (IsCpp && FormatTok->is(TT_StatementMacro)) {
2016 parseStatementMacro();
2017 return;
2018 }
2019
2020 // See if the following token should start a new unwrapped line.
2021 StringRef Text = FormatTok->TokenText;
2022
2023 FormatToken *PreviousToken = FormatTok;
2024 nextToken();
2025
2026 // JS doesn't have macros, and within classes colons indicate fields, not
2027 // labels.
2028 if (Style.isJavaScript())
2029 break;
2030
2031 auto OneTokenSoFar = [&]() {
2032 auto I = Line->Tokens.begin(), E = Line->Tokens.end();
2033 while (I != E && I->Tok->is(tok::comment))
2034 ++I;
2035 if (Style.isVerilog())
2036 while (I != E && I->Tok->is(tok::hash))
2037 ++I;
2038 return I != E && (++I == E);
2039 };
2040 if (OneTokenSoFar()) {
2041 // Recognize function-like macro usages without trailing semicolon as
2042 // well as free-standing macros like Q_OBJECT.
2043 bool FunctionLike = FormatTok->is(tok::l_paren);
2044 if (FunctionLike)
2045 parseParens();
2046
2047 bool FollowedByNewline =
2048 CommentsBeforeNextToken.empty()
2049 ? FormatTok->NewlinesBefore > 0
2050 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
2051
2052 if (FollowedByNewline &&
2053 (Text.size() >= 5 ||
2054 (FunctionLike && FormatTok->isNot(tok::l_paren))) &&
2055 tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
2056 if (PreviousToken->isNot(TT_UntouchableMacroFunc))
2057 PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
2058 addUnwrappedLine();
2059 return;
2060 }
2061 }
2062 break;
2063 }
2064 case tok::equal:
2065 if ((Style.isJavaScript() || Style.isCSharp()) &&
2066 FormatTok->is(TT_FatArrow)) {
2067 tryToParseChildBlock();
2068 break;
2069 }
2070
2071 SeenEqual = true;
2072 nextToken();
2073 if (FormatTok->is(tok::l_brace)) {
2074 // Block kind should probably be set to BK_BracedInit for any language.
2075 // C# needs this change to ensure that array initialisers and object
2076 // initialisers are indented the same way.
2077 if (Style.isCSharp())
2078 FormatTok->setBlockKind(BK_BracedInit);
2079 // TableGen's defset statement has syntax of the form,
2080 // `defset <type> <name> = { <statement>... }`
2081 if (Style.isTableGen() &&
2082 Line->Tokens.begin()->Tok->is(Keywords.kw_defset)) {
2083 FormatTok->setFinalizedType(TT_FunctionLBrace);
2084 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2085 /*MunchSemi=*/false);
2086 addUnwrappedLine();
2087 break;
2088 }
2089 nextToken();
2090 parseBracedList();
2091 } else if (Style.Language == FormatStyle::LK_Proto &&
2092 FormatTok->is(tok::less)) {
2093 nextToken();
2094 parseBracedList(/*IsAngleBracket=*/true);
2095 }
2096 break;
2097 case tok::l_square:
2098 parseSquare();
2099 break;
2100 case tok::kw_new:
2101 if (Style.isCSharp() &&
2102 (Tokens->peekNextToken()->isAccessSpecifierKeyword() ||
2103 (Previous && Previous->isAccessSpecifierKeyword()))) {
2104 nextToken();
2105 } else {
2106 parseNew();
2107 }
2108 break;
2109 case tok::kw_switch:
2110 if (Style.isJava())
2111 parseSwitch(/*IsExpr=*/true);
2112 else
2113 nextToken();
2114 break;
2115 case tok::kw_case:
2116 // Proto: there are no switch/case statements.
2117 if (Style.Language == FormatStyle::LK_Proto) {
2118 nextToken();
2119 return;
2120 }
2121 // In Verilog switch is called case.
2122 if (Style.isVerilog()) {
2123 parseBlock();
2124 addUnwrappedLine();
2125 return;
2126 }
2127 if (Style.isJavaScript() && Line->MustBeDeclaration) {
2128 // 'case: string' field declaration.
2129 nextToken();
2130 break;
2131 }
2132 parseCaseLabel();
2133 break;
2134 case tok::kw_default:
2135 nextToken();
2136 if (Style.isVerilog()) {
2137 if (FormatTok->is(tok::colon)) {
2138 // The label will be handled in the next iteration.
2139 break;
2140 }
2141 if (FormatTok->is(Keywords.kw_clocking)) {
2142 // A default clocking block.
2143 parseBlock();
2144 addUnwrappedLine();
2145 return;
2146 }
2147 parseVerilogCaseLabel();
2148 return;
2149 }
2150 break;
2151 case tok::colon:
2152 nextToken();
2153 if (Style.isVerilog()) {
2154 parseVerilogCaseLabel();
2155 return;
2156 }
2157 break;
2158 case tok::greater:
2159 nextToken();
2160 if (FormatTok->is(tok::l_brace))
2161 FormatTok->Previous->setFinalizedType(TT_TemplateCloser);
2162 break;
2163 default:
2164 nextToken();
2165 break;
2166 }
2167 }
2168}
2169
2170bool UnwrappedLineParser::tryToParsePropertyAccessor() {
2171 assert(FormatTok->is(tok::l_brace));
2172 if (!Style.isCSharp())
2173 return false;
2174 // See if it's a property accessor.
2175 if (!FormatTok->Previous || FormatTok->Previous->isNot(tok::identifier))
2176 return false;
2177
2178 // See if we are inside a property accessor.
2179 //
2180 // Record the current tokenPosition so that we can advance and
2181 // reset the current token. `Next` is not set yet so we need
2182 // another way to advance along the token stream.
2183 unsigned int StoredPosition = Tokens->getPosition();
2184 FormatToken *Tok = Tokens->getNextToken();
2185
2186 // A trivial property accessor is of the form:
2187 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
2188 // Track these as they do not require line breaks to be introduced.
2189 bool HasSpecialAccessor = false;
2190 bool IsTrivialPropertyAccessor = true;
2191 bool HasAttribute = false;
2192 while (!eof()) {
2193 if (const bool IsAccessorKeyword =
2194 Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set);
2195 IsAccessorKeyword || Tok->isAccessSpecifierKeyword() ||
2196 Tok->isOneOf(tok::l_square, tok::semi, Keywords.kw_internal)) {
2197 if (IsAccessorKeyword)
2198 HasSpecialAccessor = true;
2199 else if (Tok->is(tok::l_square))
2200 HasAttribute = true;
2201 Tok = Tokens->getNextToken();
2202 continue;
2203 }
2204 if (Tok->isNot(tok::r_brace))
2205 IsTrivialPropertyAccessor = false;
2206 break;
2207 }
2208
2209 if (!HasSpecialAccessor || HasAttribute) {
2210 Tokens->setPosition(StoredPosition);
2211 return false;
2212 }
2213
2214 // Try to parse the property accessor:
2215 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
2216 Tokens->setPosition(StoredPosition);
2217 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
2218 addUnwrappedLine();
2219 nextToken();
2220 do {
2221 switch (FormatTok->Tok.getKind()) {
2222 case tok::r_brace:
2223 nextToken();
2224 if (FormatTok->is(tok::equal)) {
2225 while (!eof() && FormatTok->isNot(tok::semi))
2226 nextToken();
2227 nextToken();
2228 }
2229 addUnwrappedLine();
2230 return true;
2231 case tok::l_brace:
2232 ++Line->Level;
2233 parseBlock(/*MustBeDeclaration=*/true);
2234 addUnwrappedLine();
2235 --Line->Level;
2236 break;
2237 case tok::equal:
2238 if (FormatTok->is(TT_FatArrow)) {
2239 ++Line->Level;
2240 do {
2241 nextToken();
2242 } while (!eof() && FormatTok->isNot(tok::semi));
2243 nextToken();
2244 addUnwrappedLine();
2245 --Line->Level;
2246 break;
2247 }
2248 nextToken();
2249 break;
2250 default:
2251 if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init,
2252 Keywords.kw_set) &&
2253 !IsTrivialPropertyAccessor) {
2254 // Non-trivial get/set needs to be on its own line.
2255 addUnwrappedLine();
2256 }
2257 nextToken();
2258 }
2259 } while (!eof());
2260
2261 // Unreachable for well-formed code (paired '{' and '}').
2262 return true;
2263}
2264
2265bool UnwrappedLineParser::tryToParseLambda() {
2266 assert(FormatTok->is(tok::l_square));
2267 if (!IsCpp) {
2268 nextToken();
2269 return false;
2270 }
2271 FormatToken &LSquare = *FormatTok;
2272 if (!tryToParseLambdaIntroducer())
2273 return false;
2274
2275 bool SeenArrow = false;
2276 bool InTemplateParameterList = false;
2277
2278 while (FormatTok->isNot(tok::l_brace)) {
2279 if (FormatTok->isTypeName(LangOpts) || FormatTok->isAttribute()) {
2280 nextToken();
2281 continue;
2282 }
2283 switch (FormatTok->Tok.getKind()) {
2284 case tok::l_brace:
2285 break;
2286 case tok::l_paren:
2287 parseParens(/*AmpAmpTokenType=*/TT_PointerOrReference);
2288 break;
2289 case tok::l_square:
2290 parseSquare();
2291 break;
2292 case tok::less:
2293 assert(FormatTok->Previous);
2294 if (FormatTok->Previous->is(tok::r_square))
2295 InTemplateParameterList = true;
2296 nextToken();
2297 break;
2298 case tok::kw_auto:
2299 case tok::kw_class:
2300 case tok::kw_struct:
2301 case tok::kw_union:
2302 case tok::kw_template:
2303 case tok::kw_typename:
2304 case tok::amp:
2305 case tok::star:
2306 case tok::kw_const:
2307 case tok::kw_constexpr:
2308 case tok::kw_consteval:
2309 case tok::comma:
2310 case tok::greater:
2311 case tok::identifier:
2312 case tok::numeric_constant:
2313 case tok::coloncolon:
2314 case tok::kw_mutable:
2315 case tok::kw_noexcept:
2316 case tok::kw_static:
2317 nextToken();
2318 break;
2319 // Specialization of a template with an integer parameter can contain
2320 // arithmetic, logical, comparison and ternary operators.
2321 //
2322 // FIXME: This also accepts sequences of operators that are not in the scope
2323 // of a template argument list.
2324 //
2325 // In a C++ lambda a template type can only occur after an arrow. We use
2326 // this as an heuristic to distinguish between Objective-C expressions
2327 // followed by an `a->b` expression, such as:
2328 // ([obj func:arg] + a->b)
2329 // Otherwise the code below would parse as a lambda.
2330 case tok::plus:
2331 case tok::minus:
2332 case tok::exclaim:
2333 case tok::tilde:
2334 case tok::slash:
2335 case tok::percent:
2336 case tok::lessless:
2337 case tok::pipe:
2338 case tok::pipepipe:
2339 case tok::ampamp:
2340 case tok::caret:
2341 case tok::equalequal:
2342 case tok::exclaimequal:
2343 case tok::greaterequal:
2344 case tok::lessequal:
2345 case tok::question:
2346 case tok::colon:
2347 case tok::ellipsis:
2348 case tok::kw_true:
2349 case tok::kw_false:
2350 if (SeenArrow || InTemplateParameterList) {
2351 nextToken();
2352 break;
2353 }
2354 return true;
2355 case tok::arrow:
2356 // This might or might not actually be a lambda arrow (this could be an
2357 // ObjC method invocation followed by a dereferencing arrow). We might
2358 // reset this back to TT_Unknown in TokenAnnotator.
2359 FormatTok->setFinalizedType(TT_LambdaArrow);
2360 SeenArrow = true;
2361 nextToken();
2362 break;
2363 case tok::kw_requires: {
2364 auto *RequiresToken = FormatTok;
2365 nextToken();
2366 parseRequiresClause(RequiresToken);
2367 break;
2368 }
2369 case tok::equal:
2370 if (!InTemplateParameterList)
2371 return true;
2372 nextToken();
2373 break;
2374 default:
2375 return true;
2376 }
2377 }
2378
2379 FormatTok->setFinalizedType(TT_LambdaLBrace);
2380 LSquare.setFinalizedType(TT_LambdaLSquare);
2381
2382 NestedLambdas.push_back(Line->SeenDecltypeAuto);
2383 parseChildBlock();
2384 assert(!NestedLambdas.empty());
2385 NestedLambdas.pop_back();
2386
2387 return true;
2388}
2389
2390bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2391 const FormatToken *Previous = FormatTok->Previous;
2392 const FormatToken *LeftSquare = FormatTok;
2393 nextToken();
2394 if (Previous) {
2395 if (Previous->Tok.getIdentifierInfo() &&
2396 !Previous->isOneOf(tok::kw_return, tok::kw_co_await, tok::kw_co_yield,
2397 tok::kw_co_return)) {
2398 return false;
2399 }
2400 if (Previous->closesScope()) {
2401 // Not a potential C-style cast.
2402 if (Previous->isNot(tok::r_paren))
2403 return false;
2404 const auto *BeforeRParen = Previous->getPreviousNonComment();
2405 // Lambdas can be cast to function types only, e.g. `std::function<int()>`
2406 // and `int (*)()`.
2407 if (!BeforeRParen || !BeforeRParen->isOneOf(tok::greater, tok::r_paren))
2408 return false;
2409 }
2410 }
2411 if (LeftSquare->isCppStructuredBinding(IsCpp))
2412 return false;
2413 if (FormatTok->is(tok::l_square) || tok::isLiteral(FormatTok->Tok.getKind()))
2414 return false;
2415 if (FormatTok->is(tok::r_square)) {
2416 const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true);
2417 if (Next->is(tok::greater))
2418 return false;
2419 }
2420 parseSquare(/*LambdaIntroducer=*/true);
2421 return true;
2422}
2423
2424void UnwrappedLineParser::tryToParseJSFunction() {
2425 assert(FormatTok->is(Keywords.kw_function));
2426 if (FormatTok->is(Keywords.kw_async))
2427 nextToken();
2428 // Consume "function".
2429 nextToken();
2430
2431 // Consume * (generator function). Treat it like C++'s overloaded operators.
2432 if (FormatTok->is(tok::star)) {
2433 FormatTok->setFinalizedType(TT_OverloadedOperator);
2434 nextToken();
2435 }
2436
2437 // Consume function name.
2438 if (FormatTok->is(tok::identifier))
2439 nextToken();
2440
2441 if (FormatTok->isNot(tok::l_paren))
2442 return;
2443
2444 // Parse formal parameter list.
2445 parseParens();
2446
2447 if (FormatTok->is(tok::colon)) {
2448 // Parse a type definition.
2449 nextToken();
2450
2451 // Eat the type declaration. For braced inline object types, balance braces,
2452 // otherwise just parse until finding an l_brace for the function body.
2453 if (FormatTok->is(tok::l_brace))
2454 tryToParseBracedList();
2455 else
2456 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
2457 nextToken();
2458 }
2459
2460 if (FormatTok->is(tok::semi))
2461 return;
2462
2463 parseChildBlock();
2464}
2465
2466bool UnwrappedLineParser::tryToParseBracedList() {
2467 if (FormatTok->is(BK_Unknown))
2468 calculateBraceTypes();
2469 assert(FormatTok->isNot(BK_Unknown));
2470 if (FormatTok->is(BK_Block))
2471 return false;
2472 nextToken();
2473 parseBracedList();
2474 return true;
2475}
2476
2477bool UnwrappedLineParser::tryToParseChildBlock() {
2478 assert(Style.isJavaScript() || Style.isCSharp());
2479 assert(FormatTok->is(TT_FatArrow));
2480 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2481 // They always start an expression or a child block if followed by a curly
2482 // brace.
2483 nextToken();
2484 if (FormatTok->isNot(tok::l_brace))
2485 return false;
2486 parseChildBlock();
2487 return true;
2488}
2489
2490bool UnwrappedLineParser::parseBracedList(bool IsAngleBracket, bool IsEnum) {
2491 assert(!IsAngleBracket || !IsEnum);
2492 bool HasError = false;
2493
2494 // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2495 // replace this by using parseAssignmentExpression() inside.
2496 do {
2497 if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2498 tryToParseChildBlock()) {
2499 continue;
2500 }
2501 if (Style.isJavaScript()) {
2502 if (FormatTok->is(Keywords.kw_function)) {
2503 tryToParseJSFunction();
2504 continue;
2505 }
2506 if (FormatTok->is(tok::l_brace)) {
2507 // Could be a method inside of a braced list `{a() { return 1; }}`.
2508 if (tryToParseBracedList())
2509 continue;
2510 parseChildBlock();
2511 }
2512 }
2513 if (FormatTok->is(IsAngleBracket ? tok::greater : tok::r_brace)) {
2514 if (IsEnum) {
2515 FormatTok->setBlockKind(BK_Block);
2517 addUnwrappedLine();
2518 }
2519 nextToken();
2520 return !HasError;
2521 }
2522 switch (FormatTok->Tok.getKind()) {
2523 case tok::l_square:
2524 if (Style.isCSharp())
2525 parseSquare();
2526 else
2527 tryToParseLambda();
2528 break;
2529 case tok::l_paren:
2530 parseParens();
2531 // JavaScript can just have free standing methods and getters/setters in
2532 // object literals. Detect them by a "{" following ")".
2533 if (Style.isJavaScript()) {
2534 if (FormatTok->is(tok::l_brace))
2535 parseChildBlock();
2536 break;
2537 }
2538 break;
2539 case tok::l_brace:
2540 // Assume there are no blocks inside a braced init list apart
2541 // from the ones we explicitly parse out (like lambdas).
2542 FormatTok->setBlockKind(BK_BracedInit);
2543 if (!IsAngleBracket) {
2544 auto *Prev = FormatTok->Previous;
2545 if (Prev && Prev->is(tok::greater))
2546 Prev->setFinalizedType(TT_TemplateCloser);
2547 }
2548 nextToken();
2549 parseBracedList();
2550 break;
2551 case tok::less:
2552 nextToken();
2553 if (IsAngleBracket)
2554 parseBracedList(/*IsAngleBracket=*/true);
2555 break;
2556 case tok::semi:
2557 // JavaScript (or more precisely TypeScript) can have semicolons in braced
2558 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2559 // used for error recovery if we have otherwise determined that this is
2560 // a braced list.
2561 if (Style.isJavaScript()) {
2562 nextToken();
2563 break;
2564 }
2565 HasError = true;
2566 if (!IsEnum)
2567 return false;
2568 nextToken();
2569 break;
2570 case tok::comma:
2571 nextToken();
2572 if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2573 addUnwrappedLine();
2574 break;
2575 default:
2576 nextToken();
2577 break;
2578 }
2579 } while (!eof());
2580 return false;
2581}
2582
2583/// Parses a pair of parentheses (and everything between them).
2584/// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2585/// double ampersands. This applies for all nested scopes as well.
2586///
2587/// Returns whether there is a `=` token between the parentheses.
2588bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType,
2589 bool InMacroCall) {
2590 assert(FormatTok->is(tok::l_paren) && "'(' expected.");
2591 auto *LParen = FormatTok;
2592 auto *Prev = FormatTok->Previous;
2593 bool SeenComma = false;
2594 bool SeenEqual = false;
2595 bool MightBeFoldExpr = false;
2596 nextToken();
2597 const bool MightBeStmtExpr = FormatTok->is(tok::l_brace);
2598 if (!InMacroCall && Prev && Prev->is(TT_FunctionLikeMacro))
2599 InMacroCall = true;
2600 do {
2601 switch (FormatTok->Tok.getKind()) {
2602 case tok::l_paren:
2603 if (parseParens(AmpAmpTokenType, InMacroCall))
2604 SeenEqual = true;
2605 if (Style.isJava() && FormatTok->is(tok::l_brace))
2606 parseChildBlock();
2607 break;
2608 case tok::r_paren: {
2609 auto *RParen = FormatTok;
2610 nextToken();
2611 if (Prev) {
2612 auto OptionalParens = [&] {
2613 if (MightBeStmtExpr || MightBeFoldExpr || SeenComma || InMacroCall ||
2614 Line->InMacroBody ||
2616 RParen->getPreviousNonComment() == LParen) {
2617 return false;
2618 }
2619 const bool DoubleParens =
2620 Prev->is(tok::l_paren) && FormatTok->is(tok::r_paren);
2621 if (DoubleParens) {
2622 const auto *PrevPrev = Prev->getPreviousNonComment();
2623 const bool Excluded =
2624 PrevPrev &&
2625 (PrevPrev->isOneOf(tok::kw___attribute, tok::kw_decltype) ||
2626 (SeenEqual &&
2627 (PrevPrev->isOneOf(tok::kw_if, tok::kw_while) ||
2628 PrevPrev->endsSequence(tok::kw_constexpr, tok::kw_if))));
2629 if (!Excluded)
2630 return true;
2631 } else {
2632 const bool CommaSeparated =
2633 Prev->isOneOf(tok::l_paren, tok::comma) &&
2634 FormatTok->isOneOf(tok::comma, tok::r_paren);
2635 if (CommaSeparated &&
2636 // LParen is not preceded by ellipsis, comma.
2637 !Prev->endsSequence(tok::comma, tok::ellipsis) &&
2638 // RParen is not followed by comma, ellipsis.
2639 !(FormatTok->is(tok::comma) &&
2640 Tokens->peekNextToken()->is(tok::ellipsis))) {
2641 return true;
2642 }
2643 const bool ReturnParens =
2645 ((NestedLambdas.empty() && !IsDecltypeAutoFunction) ||
2646 (!NestedLambdas.empty() && !NestedLambdas.back())) &&
2647 Prev->isOneOf(tok::kw_return, tok::kw_co_return) &&
2648 FormatTok->is(tok::semi);
2649 if (ReturnParens)
2650 return true;
2651 }
2652 return false;
2653 };
2654 if (Prev->is(TT_TypenameMacro)) {
2655 LParen->setFinalizedType(TT_TypeDeclarationParen);
2656 RParen->setFinalizedType(TT_TypeDeclarationParen);
2657 } else if (Prev->is(tok::greater) && RParen->Previous == LParen) {
2658 Prev->setFinalizedType(TT_TemplateCloser);
2659 } else if (OptionalParens()) {
2660 LParen->Optional = true;
2661 RParen->Optional = true;
2662 }
2663 }
2664 return SeenEqual;
2665 }
2666 case tok::r_brace:
2667 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2668 return SeenEqual;
2669 case tok::l_square:
2670 tryToParseLambda();
2671 break;
2672 case tok::l_brace:
2673 if (!tryToParseBracedList())
2674 parseChildBlock();
2675 break;
2676 case tok::at:
2677 nextToken();
2678 if (FormatTok->is(tok::l_brace)) {
2679 nextToken();
2680 parseBracedList();
2681 }
2682 break;
2683 case tok::comma:
2684 SeenComma = true;
2685 nextToken();
2686 break;
2687 case tok::ellipsis:
2688 MightBeFoldExpr = true;
2689 nextToken();
2690 break;
2691 case tok::equal:
2692 SeenEqual = true;
2693 if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2694 tryToParseChildBlock();
2695 else
2696 nextToken();
2697 break;
2698 case tok::kw_class:
2699 if (Style.isJavaScript())
2700 parseRecord(/*ParseAsExpr=*/true);
2701 else
2702 nextToken();
2703 break;
2704 case tok::identifier:
2705 if (Style.isJavaScript() && (FormatTok->is(Keywords.kw_function)))
2706 tryToParseJSFunction();
2707 else
2708 nextToken();
2709 break;
2710 case tok::kw_switch:
2711 if (Style.isJava())
2712 parseSwitch(/*IsExpr=*/true);
2713 else
2714 nextToken();
2715 break;
2716 case tok::kw_requires: {
2717 auto RequiresToken = FormatTok;
2718 nextToken();
2719 parseRequiresExpression(RequiresToken);
2720 break;
2721 }
2722 case tok::ampamp:
2723 if (AmpAmpTokenType != TT_Unknown)
2724 FormatTok->setFinalizedType(AmpAmpTokenType);
2725 [[fallthrough]];
2726 default:
2727 nextToken();
2728 break;
2729 }
2730 } while (!eof());
2731 return SeenEqual;
2732}
2733
2734void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2735 if (!LambdaIntroducer) {
2736 assert(FormatTok->is(tok::l_square) && "'[' expected.");
2737 if (tryToParseLambda())
2738 return;
2739 }
2740 do {
2741 switch (FormatTok->Tok.getKind()) {
2742 case tok::l_paren:
2743 parseParens();
2744 break;
2745 case tok::r_square:
2746 nextToken();
2747 return;
2748 case tok::r_brace:
2749 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2750 return;
2751 case tok::l_square:
2752 parseSquare();
2753 break;
2754 case tok::l_brace: {
2755 if (!tryToParseBracedList())
2756 parseChildBlock();
2757 break;
2758 }
2759 case tok::at:
2760 case tok::colon:
2761 nextToken();
2762 if (FormatTok->is(tok::l_brace)) {
2763 nextToken();
2764 parseBracedList();
2765 }
2766 break;
2767 default:
2768 nextToken();
2769 break;
2770 }
2771 } while (!eof());
2772}
2773
2774void UnwrappedLineParser::keepAncestorBraces() {
2775 if (!Style.RemoveBracesLLVM)
2776 return;
2777
2778 const int MaxNestingLevels = 2;
2779 const int Size = NestedTooDeep.size();
2780 if (Size >= MaxNestingLevels)
2781 NestedTooDeep[Size - MaxNestingLevels] = true;
2782 NestedTooDeep.push_back(false);
2783}
2784
2786 for (const auto &Token : llvm::reverse(Line.Tokens))
2787 if (Token.Tok->isNot(tok::comment))
2788 return Token.Tok;
2789
2790 return nullptr;
2791}
2792
2793void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2794 FormatToken *Tok = nullptr;
2795
2796 if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2797 PreprocessorDirectives.empty() && FormatTok->isNot(tok::semi)) {
2799 ? getLastNonComment(*Line)
2800 : Line->Tokens.back().Tok;
2801 assert(Tok);
2802 if (Tok->BraceCount < 0) {
2803 assert(Tok->BraceCount == -1);
2804 Tok = nullptr;
2805 } else {
2806 Tok->BraceCount = -1;
2807 }
2808 }
2809
2810 addUnwrappedLine();
2811 ++Line->Level;
2812 ++Line->UnbracedBodyLevel;
2813 parseStructuralElement();
2814 --Line->UnbracedBodyLevel;
2815
2816 if (Tok) {
2817 assert(!Line->InPPDirective);
2818 Tok = nullptr;
2819 for (const auto &L : llvm::reverse(*CurrentLines)) {
2820 if (!L.InPPDirective && getLastNonComment(L)) {
2821 Tok = L.Tokens.back().Tok;
2822 break;
2823 }
2824 }
2825 assert(Tok);
2826 ++Tok->BraceCount;
2827 }
2828
2829 if (CheckEOF && eof())
2830 addUnwrappedLine();
2831
2832 --Line->Level;
2833}
2834
2835static void markOptionalBraces(FormatToken *LeftBrace) {
2836 if (!LeftBrace)
2837 return;
2838
2839 assert(LeftBrace->is(tok::l_brace));
2840
2841 FormatToken *RightBrace = LeftBrace->MatchingParen;
2842 if (!RightBrace) {
2843 assert(!LeftBrace->Optional);
2844 return;
2845 }
2846
2847 assert(RightBrace->is(tok::r_brace));
2848 assert(RightBrace->MatchingParen == LeftBrace);
2849 assert(LeftBrace->Optional == RightBrace->Optional);
2850
2851 LeftBrace->Optional = true;
2852 RightBrace->Optional = true;
2853}
2854
2855void UnwrappedLineParser::handleAttributes() {
2856 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2857 if (FormatTok->isAttribute())
2858 nextToken();
2859 else if (FormatTok->is(tok::l_square))
2860 handleCppAttributes();
2861}
2862
2863bool UnwrappedLineParser::handleCppAttributes() {
2864 // Handle [[likely]] / [[unlikely]] attributes.
2865 assert(FormatTok->is(tok::l_square));
2866 if (!tryToParseSimpleAttribute())
2867 return false;
2868 parseSquare();
2869 return true;
2870}
2871
2872/// Returns whether \c Tok begins a block.
2873bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const {
2874 // FIXME: rename the function or make
2875 // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work.
2876 return Style.isVerilog() ? Keywords.isVerilogBegin(Tok)
2877 : Tok.is(tok::l_brace);
2878}
2879
2880FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2881 bool KeepBraces,
2882 bool IsVerilogAssert) {
2883 assert((FormatTok->is(tok::kw_if) ||
2884 (Style.isVerilog() &&
2885 FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
2886 Keywords.kw_assume, Keywords.kw_cover))) &&
2887 "'if' expected");
2888 nextToken();
2889
2890 if (IsVerilogAssert) {
2891 // Handle `assert #0` and `assert final`.
2892 if (FormatTok->is(Keywords.kw_verilogHash)) {
2893 nextToken();
2894 if (FormatTok->is(tok::numeric_constant))
2895 nextToken();
2896 } else if (FormatTok->isOneOf(Keywords.kw_final, Keywords.kw_property,
2897 Keywords.kw_sequence)) {
2898 nextToken();
2899 }
2900 }
2901
2902 // TableGen's if statement has the form of `if <cond> then { ... }`.
2903 if (Style.isTableGen()) {
2904 while (!eof() && FormatTok->isNot(Keywords.kw_then)) {
2905 // Simply skip until then. This range only contains a value.
2906 nextToken();
2907 }
2908 }
2909
2910 // Handle `if !consteval`.
2911 if (FormatTok->is(tok::exclaim))
2912 nextToken();
2913
2914 bool KeepIfBraces = true;
2915 if (FormatTok->is(tok::kw_consteval)) {
2916 nextToken();
2917 } else {
2918 KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces;
2919 if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier))
2920 nextToken();
2921 if (FormatTok->is(tok::l_paren)) {
2922 FormatTok->setFinalizedType(TT_ConditionLParen);
2923 parseParens();
2924 }
2925 }
2926 handleAttributes();
2927 // The then action is optional in Verilog assert statements.
2928 if (IsVerilogAssert && FormatTok->is(tok::semi)) {
2929 nextToken();
2930 addUnwrappedLine();
2931 return nullptr;
2932 }
2933
2934 bool NeedsUnwrappedLine = false;
2935 keepAncestorBraces();
2936
2937 FormatToken *IfLeftBrace = nullptr;
2938 IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2939
2940 if (isBlockBegin(*FormatTok)) {
2941 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
2942 IfLeftBrace = FormatTok;
2943 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2944 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2945 /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind);
2946 setPreviousRBraceType(TT_ControlStatementRBrace);
2947 if (Style.BraceWrapping.BeforeElse)
2948 addUnwrappedLine();
2949 else
2950 NeedsUnwrappedLine = true;
2951 } else if (IsVerilogAssert && FormatTok->is(tok::kw_else)) {
2952 addUnwrappedLine();
2953 } else {
2954 parseUnbracedBody();
2955 }
2956
2957 if (Style.RemoveBracesLLVM) {
2958 assert(!NestedTooDeep.empty());
2959 KeepIfBraces = KeepIfBraces ||
2960 (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2961 NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2962 IfBlockKind == IfStmtKind::IfElseIf;
2963 }
2964
2965 bool KeepElseBraces = KeepIfBraces;
2966 FormatToken *ElseLeftBrace = nullptr;
2967 IfStmtKind Kind = IfStmtKind::IfOnly;
2968
2969 if (FormatTok->is(tok::kw_else)) {
2970 if (Style.RemoveBracesLLVM) {
2971 NestedTooDeep.back() = false;
2972 Kind = IfStmtKind::IfElse;
2973 }
2974 nextToken();
2975 handleAttributes();
2976 if (isBlockBegin(*FormatTok)) {
2977 const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if);
2978 FormatTok->setFinalizedType(TT_ElseLBrace);
2979 ElseLeftBrace = FormatTok;
2980 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2981 IfStmtKind ElseBlockKind = IfStmtKind::NotIf;
2982 FormatToken *IfLBrace =
2983 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2984 /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind);
2985 setPreviousRBraceType(TT_ElseRBrace);
2986 if (FormatTok->is(tok::kw_else)) {
2987 KeepElseBraces = KeepElseBraces ||
2988 ElseBlockKind == IfStmtKind::IfOnly ||
2989 ElseBlockKind == IfStmtKind::IfElseIf;
2990 } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) {
2991 KeepElseBraces = true;
2992 assert(ElseLeftBrace->MatchingParen);
2993 markOptionalBraces(ElseLeftBrace);
2994 }
2995 addUnwrappedLine();
2996 } else if (!IsVerilogAssert && FormatTok->is(tok::kw_if)) {
2997 const FormatToken *Previous = Tokens->getPreviousToken();
2998 assert(Previous);
2999 const bool IsPrecededByComment = Previous->is(tok::comment);
3000 if (IsPrecededByComment) {
3001 addUnwrappedLine();
3002 ++Line->Level;
3003 }
3004 bool TooDeep = true;
3005 if (Style.RemoveBracesLLVM) {
3006 Kind = IfStmtKind::IfElseIf;
3007 TooDeep = NestedTooDeep.pop_back_val();
3008 }
3009 ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces);
3010 if (Style.RemoveBracesLLVM)
3011 NestedTooDeep.push_back(TooDeep);
3012 if (IsPrecededByComment)
3013 --Line->Level;
3014 } else {
3015 parseUnbracedBody(/*CheckEOF=*/true);
3016 }
3017 } else {
3018 KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
3019 if (NeedsUnwrappedLine)
3020 addUnwrappedLine();
3021 }
3022
3023 if (!Style.RemoveBracesLLVM)
3024 return nullptr;
3025
3026 assert(!NestedTooDeep.empty());
3027 KeepElseBraces = KeepElseBraces ||
3028 (ElseLeftBrace && !ElseLeftBrace->MatchingParen) ||
3029 NestedTooDeep.back();
3030
3031 NestedTooDeep.pop_back();
3032
3033 if (!KeepIfBraces && !KeepElseBraces) {
3034 markOptionalBraces(IfLeftBrace);
3035 markOptionalBraces(ElseLeftBrace);
3036 } else if (IfLeftBrace) {
3037 FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
3038 if (IfRightBrace) {
3039 assert(IfRightBrace->MatchingParen == IfLeftBrace);
3040 assert(!IfLeftBrace->Optional);
3041 assert(!IfRightBrace->Optional);
3042 IfLeftBrace->MatchingParen = nullptr;
3043 IfRightBrace->MatchingParen = nullptr;
3044 }
3045 }
3046
3047 if (IfKind)
3048 *IfKind = Kind;
3049
3050 return IfLeftBrace;
3051}
3052
3053void UnwrappedLineParser::parseTryCatch() {
3054 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
3055 nextToken();
3056 bool NeedsUnwrappedLine = false;
3057 bool HasCtorInitializer = false;
3058 if (FormatTok->is(tok::colon)) {
3059 auto *Colon = FormatTok;
3060 // We are in a function try block, what comes is an initializer list.
3061 nextToken();
3062 if (FormatTok->is(tok::identifier)) {
3063 HasCtorInitializer = true;
3064 Colon->setFinalizedType(TT_CtorInitializerColon);
3065 }
3066
3067 // In case identifiers were removed by clang-tidy, what might follow is
3068 // multiple commas in sequence - before the first identifier.
3069 while (FormatTok->is(tok::comma))
3070 nextToken();
3071
3072 while (FormatTok->is(tok::identifier)) {
3073 nextToken();
3074 if (FormatTok->is(tok::l_paren)) {
3075 parseParens();
3076 } else if (FormatTok->is(tok::l_brace)) {
3077 nextToken();
3078 parseBracedList();
3079 }
3080
3081 // In case identifiers were removed by clang-tidy, what might follow is
3082 // multiple commas in sequence - after the first identifier.
3083 while (FormatTok->is(tok::comma))
3084 nextToken();
3085 }
3086 }
3087 // Parse try with resource.
3088 if (Style.isJava() && FormatTok->is(tok::l_paren))
3089 parseParens();
3090
3091 keepAncestorBraces();
3092
3093 if (FormatTok->is(tok::l_brace)) {
3094 if (HasCtorInitializer)
3095 FormatTok->setFinalizedType(TT_FunctionLBrace);
3096 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3097 parseBlock();
3098 if (Style.BraceWrapping.BeforeCatch)
3099 addUnwrappedLine();
3100 else
3101 NeedsUnwrappedLine = true;
3102 } else if (FormatTok->isNot(tok::kw_catch)) {
3103 // The C++ standard requires a compound-statement after a try.
3104 // If there's none, we try to assume there's a structuralElement
3105 // and try to continue.
3106 addUnwrappedLine();
3107 ++Line->Level;
3108 parseStructuralElement();
3109 --Line->Level;
3110 }
3111 for (bool SeenCatch = false;;) {
3112 if (FormatTok->is(tok::at))
3113 nextToken();
3114 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
3115 tok::kw___finally, tok::objc_catch,
3116 tok::objc_finally) ||
3117 ((Style.isJava() || Style.isJavaScript()) &&
3118 FormatTok->is(Keywords.kw_finally)))) {
3119 break;
3120 }
3121 if (FormatTok->is(tok::kw_catch))
3122 SeenCatch = true;
3123 nextToken();
3124 while (FormatTok->isNot(tok::l_brace)) {
3125 if (FormatTok->is(tok::l_paren)) {
3126 parseParens();
3127 continue;
3128 }
3129 if (FormatTok->isOneOf(tok::semi, tok::r_brace) || eof()) {
3130 if (Style.RemoveBracesLLVM)
3131 NestedTooDeep.pop_back();
3132 return;
3133 }
3134 nextToken();
3135 }
3136 if (SeenCatch) {
3137 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3138 SeenCatch = false;
3139 }
3140 NeedsUnwrappedLine = false;
3141 Line->MustBeDeclaration = false;
3142 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3143 parseBlock();
3144 if (Style.BraceWrapping.BeforeCatch)
3145 addUnwrappedLine();
3146 else
3147 NeedsUnwrappedLine = true;
3148 }
3149
3150 if (Style.RemoveBracesLLVM)
3151 NestedTooDeep.pop_back();
3152
3153 if (NeedsUnwrappedLine)
3154 addUnwrappedLine();
3155}
3156
3157void UnwrappedLineParser::parseNamespaceOrExportBlock(unsigned AddLevels) {
3158 bool ManageWhitesmithsBraces =
3159 AddLevels == 0u && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3160
3161 // If we're in Whitesmiths mode, indent the brace if we're not indenting
3162 // the whole block.
3163 if (ManageWhitesmithsBraces)
3164 ++Line->Level;
3165
3166 // Munch the semicolon after the block. This is more common than one would
3167 // think. Putting the semicolon into its own line is very ugly.
3168 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true,
3169 /*KeepBraces=*/true, /*IfKind=*/nullptr, ManageWhitesmithsBraces);
3170
3171 addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
3172
3173 if (ManageWhitesmithsBraces)
3174 --Line->Level;
3175}
3176
3177void UnwrappedLineParser::parseNamespace() {
3178 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
3179 "'namespace' expected");
3180
3181 const FormatToken &InitialToken = *FormatTok;
3182 nextToken();
3183 if (InitialToken.is(TT_NamespaceMacro)) {
3184 parseParens();
3185 } else {
3186 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
3187 tok::l_square, tok::period, tok::l_paren) ||
3188 (Style.isCSharp() && FormatTok->is(tok::kw_union))) {
3189 if (FormatTok->is(tok::l_square))
3190 parseSquare();
3191 else if (FormatTok->is(tok::l_paren))
3192 parseParens();
3193 else
3194 nextToken();
3195 }
3196 }
3197 if (FormatTok->is(tok::l_brace)) {
3198 FormatTok->setFinalizedType(TT_NamespaceLBrace);
3199
3200 if (ShouldBreakBeforeBrace(Style, InitialToken))
3201 addUnwrappedLine();
3202
3203 unsigned AddLevels =
3206 DeclarationScopeStack.size() > 1)
3207 ? 1u
3208 : 0u;
3209 parseNamespaceOrExportBlock(AddLevels);
3210 }
3211 // FIXME: Add error handling.
3212}
3213
3214void UnwrappedLineParser::parseCppExportBlock() {
3215 parseNamespaceOrExportBlock(/*AddLevels=*/Style.IndentExportBlock ? 1 : 0);
3216}
3217
3218void UnwrappedLineParser::parseNew() {
3219 assert(FormatTok->is(tok::kw_new) && "'new' expected");
3220 nextToken();
3221
3222 if (Style.isCSharp()) {
3223 do {
3224 // Handle constructor invocation, e.g. `new(field: value)`.
3225 if (FormatTok->is(tok::l_paren))
3226 parseParens();
3227
3228 // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`.
3229 if (FormatTok->is(tok::l_brace))
3230 parseBracedList();
3231
3232 if (FormatTok->isOneOf(tok::semi, tok::comma))
3233 return;
3234
3235 nextToken();
3236 } while (!eof());
3237 }
3238
3239 if (!Style.isJava())
3240 return;
3241
3242 // In Java, we can parse everything up to the parens, which aren't optional.
3243 do {
3244 // There should not be a ;, { or } before the new's open paren.
3245 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
3246 return;
3247
3248 // Consume the parens.
3249 if (FormatTok->is(tok::l_paren)) {
3250 parseParens();
3251
3252 // If there is a class body of an anonymous class, consume that as child.
3253 if (FormatTok->is(tok::l_brace))
3254 parseChildBlock();
3255 return;
3256 }
3257 nextToken();
3258 } while (!eof());
3259}
3260
3261void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) {
3262 keepAncestorBraces();
3263
3264 if (isBlockBegin(*FormatTok)) {
3265 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3266 FormatToken *LeftBrace = FormatTok;
3267 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3268 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
3269 /*MunchSemi=*/true, KeepBraces);
3270 setPreviousRBraceType(TT_ControlStatementRBrace);
3271 if (!KeepBraces) {
3272 assert(!NestedTooDeep.empty());
3273 if (!NestedTooDeep.back())
3274 markOptionalBraces(LeftBrace);
3275 }
3276 if (WrapRightBrace)
3277 addUnwrappedLine();
3278 } else {
3279 parseUnbracedBody();
3280 }
3281
3282 if (!KeepBraces)
3283 NestedTooDeep.pop_back();
3284}
3285
3286void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) {
3287 assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) ||
3288 (Style.isVerilog() &&
3289 FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb,
3290 Keywords.kw_always_ff, Keywords.kw_always_latch,
3291 Keywords.kw_final, Keywords.kw_initial,
3292 Keywords.kw_foreach, Keywords.kw_forever,
3293 Keywords.kw_repeat))) &&
3294 "'for', 'while' or foreach macro expected");
3295 const bool KeepBraces = !Style.RemoveBracesLLVM ||
3296 !FormatTok->isOneOf(tok::kw_for, tok::kw_while);
3297
3298 nextToken();
3299 // JS' for await ( ...
3300 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
3301 nextToken();
3302 if (IsCpp && FormatTok->is(tok::kw_co_await))
3303 nextToken();
3304 if (HasParens && FormatTok->is(tok::l_paren)) {
3305 // The type is only set for Verilog basically because we were afraid to
3306 // change the existing behavior for loops. See the discussion on D121756 for
3307 // details.
3308 if (Style.isVerilog())
3309 FormatTok->setFinalizedType(TT_ConditionLParen);
3310 parseParens();
3311 }
3312
3313 if (Style.isVerilog()) {
3314 // Event control.
3315 parseVerilogSensitivityList();
3316 } else if (Style.AllowShortLoopsOnASingleLine && FormatTok->is(tok::semi) &&
3317 Tokens->getPreviousToken()->is(tok::r_paren)) {
3318 nextToken();
3319 addUnwrappedLine();
3320 return;
3321 }
3322
3323 handleAttributes();
3324 parseLoopBody(KeepBraces, /*WrapRightBrace=*/true);
3325}
3326
3327void UnwrappedLineParser::parseDoWhile() {
3328 assert(FormatTok->is(tok::kw_do) && "'do' expected");
3329 nextToken();
3330
3331 parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile);
3332
3333 // FIXME: Add error handling.
3334 if (FormatTok->isNot(tok::kw_while)) {
3335 addUnwrappedLine();
3336 return;
3337 }
3338
3339 FormatTok->setFinalizedType(TT_DoWhile);
3340
3341 // If in Whitesmiths mode, the line with the while() needs to be indented
3342 // to the same level as the block.
3344 ++Line->Level;
3345
3346 nextToken();
3347 parseStructuralElement();
3348}
3349
3350void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
3351 nextToken();
3352 unsigned OldLineLevel = Line->Level;
3353
3354 if (LeftAlignLabel)
3355 Line->Level = 0;
3356 else if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
3357 --Line->Level;
3358
3359 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
3360 FormatTok->is(tok::l_brace)) {
3361
3362 CompoundStatementIndenter Indenter(this, Line->Level,
3365 parseBlock();
3366 if (FormatTok->is(tok::kw_break)) {
3369 addUnwrappedLine();
3370 if (!Style.IndentCaseBlocks &&
3372 ++Line->Level;
3373 }
3374 }
3375 parseStructuralElement();
3376 }
3377 addUnwrappedLine();
3378 } else {
3379 if (FormatTok->is(tok::semi))
3380 nextToken();
3381 addUnwrappedLine();
3382 }
3383 Line->Level = OldLineLevel;
3384 if (FormatTok->isNot(tok::l_brace)) {
3385 parseStructuralElement();
3386 addUnwrappedLine();
3387 }
3388}
3389
3390void UnwrappedLineParser::parseCaseLabel() {
3391 assert(FormatTok->is(tok::kw_case) && "'case' expected");
3392 auto *Case = FormatTok;
3393
3394 // FIXME: fix handling of complex expressions here.
3395 do {
3396 nextToken();
3397 if (FormatTok->is(tok::colon)) {
3398 FormatTok->setFinalizedType(TT_CaseLabelColon);
3399 break;
3400 }
3401 if (Style.isJava() && FormatTok->is(tok::arrow)) {
3402 FormatTok->setFinalizedType(TT_CaseLabelArrow);
3403 Case->setFinalizedType(TT_SwitchExpressionLabel);
3404 break;
3405 }
3406 } while (!eof());
3407 parseLabel();
3408}
3409
3410void UnwrappedLineParser::parseSwitch(bool IsExpr) {
3411 assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
3412 nextToken();
3413 if (FormatTok->is(tok::l_paren))
3414 parseParens();
3415
3416 keepAncestorBraces();
3417
3418 if (FormatTok->is(tok::l_brace)) {
3419 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3420 FormatTok->setFinalizedType(IsExpr ? TT_SwitchExpressionLBrace
3421 : TT_ControlStatementLBrace);
3422 if (IsExpr)
3423 parseChildBlock();
3424 else
3425 parseBlock();
3426 setPreviousRBraceType(TT_ControlStatementRBrace);
3427 if (!IsExpr)
3428 addUnwrappedLine();
3429 } else {
3430 addUnwrappedLine();
3431 ++Line->Level;
3432 parseStructuralElement();
3433 --Line->Level;
3434 }
3435
3436 if (Style.RemoveBracesLLVM)
3437 NestedTooDeep.pop_back();
3438}
3439
3440void UnwrappedLineParser::parseAccessSpecifier() {
3441 nextToken();
3442 // Understand Qt's slots.
3443 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
3444 nextToken();
3445 // Otherwise, we don't know what it is, and we'd better keep the next token.
3446 if (FormatTok->is(tok::colon))
3447 nextToken();
3448 addUnwrappedLine();
3449}
3450
3451/// Parses a requires, decides if it is a clause or an expression.
3452/// \pre The current token has to be the requires keyword.
3453/// \returns true if it parsed a clause.
3454bool UnwrappedLineParser::parseRequires(bool SeenEqual) {
3455 assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3456 auto RequiresToken = FormatTok;
3457
3458 // We try to guess if it is a requires clause, or a requires expression. For
3459 // that we first consume the keyword and check the next token.
3460 nextToken();
3461
3462 switch (FormatTok->Tok.getKind()) {
3463 case tok::l_brace:
3464 // This can only be an expression, never a clause.
3465 parseRequiresExpression(RequiresToken);
3466 return false;
3467 case tok::l_paren:
3468 // Clauses and expression can start with a paren, it's unclear what we have.
3469 break;
3470 default:
3471 // All other tokens can only be a clause.
3472 parseRequiresClause(RequiresToken);
3473 return true;
3474 }
3475
3476 // Looking forward we would have to decide if there are function declaration
3477 // like arguments to the requires expression:
3478 // requires (T t) {
3479 // Or there is a constraint expression for the requires clause:
3480 // requires (C<T> && ...
3481
3482 // But first let's look behind.
3483 auto *PreviousNonComment = RequiresToken->getPreviousNonComment();
3484
3485 if (!PreviousNonComment ||
3486 PreviousNonComment->is(TT_RequiresExpressionLBrace)) {
3487 // If there is no token, or an expression left brace, we are a requires
3488 // clause within a requires expression.
3489 parseRequiresClause(RequiresToken);
3490 return true;
3491 }
3492
3493 switch (PreviousNonComment->Tok.getKind()) {
3494 case tok::greater:
3495 case tok::r_paren:
3496 case tok::kw_noexcept:
3497 case tok::kw_const:
3498 case tok::star:
3499 case tok::amp:
3500 // This is a requires clause.
3501 parseRequiresClause(RequiresToken);
3502 return true;
3503 case tok::ampamp: {
3504 // This can be either:
3505 // if (... && requires (T t) ...)
3506 // Or
3507 // void member(...) && requires (C<T> ...
3508 // We check the one token before that for a const:
3509 // void member(...) const && requires (C<T> ...
3510 auto PrevPrev = PreviousNonComment->getPreviousNonComment();
3511 if ((PrevPrev && PrevPrev->is(tok::kw_const)) || !SeenEqual) {
3512 parseRequiresClause(RequiresToken);
3513 return true;
3514 }
3515 break;
3516 }
3517 default:
3518 if (PreviousNonComment->isTypeOrIdentifier(LangOpts)) {
3519 // This is a requires clause.
3520 parseRequiresClause(RequiresToken);
3521 return true;
3522 }
3523 // It's an expression.
3524 parseRequiresExpression(RequiresToken);
3525 return false;
3526 }
3527
3528 // Now we look forward and try to check if the paren content is a parameter
3529 // list. The parameters can be cv-qualified and contain references or
3530 // pointers.
3531 // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3532 // of stuff: typename, const, *, &, &&, ::, identifiers.
3533
3534 unsigned StoredPosition = Tokens->getPosition();
3535 FormatToken *NextToken = Tokens->getNextToken();
3536 int Lookahead = 0;
3537 auto PeekNext = [&Lookahead, &NextToken, this] {
3538 ++Lookahead;
3539 NextToken = Tokens->getNextToken();
3540 };
3541
3542 bool FoundType = false;
3543 bool LastWasColonColon = false;
3544 int OpenAngles = 0;
3545
3546 for (; Lookahead < 50; PeekNext()) {
3547 switch (NextToken->Tok.getKind()) {
3548 case tok::kw_volatile:
3549 case tok::kw_const:
3550 case tok::comma:
3551 if (OpenAngles == 0) {
3552 FormatTok = Tokens->setPosition(StoredPosition);
3553 parseRequiresExpression(RequiresToken);
3554 return false;
3555 }
3556 break;
3557 case tok::eof:
3558 // Break out of the loop.
3559 Lookahead = 50;
3560 break;
3561 case tok::coloncolon:
3562 LastWasColonColon = true;
3563 break;
3564 case tok::kw_decltype:
3565 case tok::identifier:
3566 if (FoundType && !LastWasColonColon && OpenAngles == 0) {
3567 FormatTok = Tokens->setPosition(StoredPosition);
3568 parseRequiresExpression(RequiresToken);
3569 return false;
3570 }
3571 FoundType = true;
3572 LastWasColonColon = false;
3573 break;
3574 case tok::less:
3575 ++OpenAngles;
3576 break;
3577 case tok::greater:
3578 --OpenAngles;
3579 break;
3580 default:
3581 if (NextToken->isTypeName(LangOpts)) {
3582 FormatTok = Tokens->setPosition(StoredPosition);
3583 parseRequiresExpression(RequiresToken);
3584 return false;
3585 }
3586 break;
3587 }
3588 }
3589 // This seems to be a complicated expression, just assume it's a clause.
3590 FormatTok = Tokens->setPosition(StoredPosition);
3591 parseRequiresClause(RequiresToken);
3592 return true;
3593}
3594
3595/// Parses a requires clause.
3596/// \param RequiresToken The requires keyword token, which starts this clause.
3597/// \pre We need to be on the next token after the requires keyword.
3598/// \sa parseRequiresExpression
3599///
3600/// Returns if it either has finished parsing the clause, or it detects, that
3601/// the clause is incorrect.
3602void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) {
3603 assert(FormatTok->getPreviousNonComment() == RequiresToken);
3604 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3605
3606 // If there is no previous token, we are within a requires expression,
3607 // otherwise we will always have the template or function declaration in front
3608 // of it.
3609 bool InRequiresExpression =
3610 !RequiresToken->Previous ||
3611 RequiresToken->Previous->is(TT_RequiresExpressionLBrace);
3612
3613 RequiresToken->setFinalizedType(InRequiresExpression
3614 ? TT_RequiresClauseInARequiresExpression
3615 : TT_RequiresClause);
3616
3617 // NOTE: parseConstraintExpression is only ever called from this function.
3618 // It could be inlined into here.
3619 parseConstraintExpression();
3620
3621 if (!InRequiresExpression && FormatTok->Previous)
3622 FormatTok->Previous->ClosesRequiresClause = true;
3623}
3624
3625/// Parses a requires expression.
3626/// \param RequiresToken The requires keyword token, which starts this clause.
3627/// \pre We need to be on the next token after the requires keyword.
3628/// \sa parseRequiresClause
3629///
3630/// Returns if it either has finished parsing the expression, or it detects,
3631/// that the expression is incorrect.
3632void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) {
3633 assert(FormatTok->getPreviousNonComment() == RequiresToken);
3634 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3635
3636 RequiresToken->setFinalizedType(TT_RequiresExpression);
3637
3638 if (FormatTok->is(tok::l_paren)) {
3639 FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
3640 parseParens();
3641 }
3642
3643 if (FormatTok->is(tok::l_brace)) {
3644 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
3645 parseChildBlock();
3646 }
3647}
3648
3649/// Parses a constraint expression.
3650///
3651/// This is the body of a requires clause. It returns, when the parsing is
3652/// complete, or the expression is incorrect.
3653void UnwrappedLineParser::parseConstraintExpression() {
3654 // The special handling for lambdas is needed since tryToParseLambda() eats a
3655 // token and if a requires expression is the last part of a requires clause
3656 // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3657 // not set on the correct token. Thus we need to be aware if we even expect a
3658 // lambda to be possible.
3659 // template <typename T> requires requires { ... } [[nodiscard]] ...;
3660 bool LambdaNextTimeAllowed = true;
3661
3662 // Within lambda declarations, it is permitted to put a requires clause after
3663 // its template parameter list, which would place the requires clause right
3664 // before the parentheses of the parameters of the lambda declaration. Thus,
3665 // we track if we expect to see grouping parentheses at all.
3666 // Without this check, `requires foo<T> (T t)` in the below example would be
3667 // seen as the whole requires clause, accidentally eating the parameters of
3668 // the lambda.
3669 // [&]<typename T> requires foo<T> (T t) { ... };
3670 bool TopLevelParensAllowed = true;
3671
3672 do {
3673 bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false);
3674
3675 switch (FormatTok->Tok.getKind()) {
3676 case tok::kw_requires: {
3677 auto RequiresToken = FormatTok;
3678 nextToken();
3679 parseRequiresExpression(RequiresToken);
3680 break;
3681 }
3682
3683 case tok::l_paren:
3684 if (!TopLevelParensAllowed)
3685 return;
3686 parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
3687 TopLevelParensAllowed = false;
3688 break;
3689
3690 case tok::l_square:
3691 if (!LambdaThisTimeAllowed || !tryToParseLambda())
3692 return;
3693 break;
3694
3695 case tok::kw_const:
3696 case tok::semi:
3697 case tok::kw_class:
3698 case tok::kw_struct:
3699 case tok::kw_union:
3700 return;
3701
3702 case tok::l_brace:
3703 // Potential function body.
3704 return;
3705
3706 case tok::ampamp:
3707 case tok::pipepipe:
3708 FormatTok->setFinalizedType(TT_BinaryOperator);
3709 nextToken();
3710 LambdaNextTimeAllowed = true;
3711 TopLevelParensAllowed = true;
3712 break;
3713
3714 case tok::comma:
3715 case tok::comment:
3716 LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3717 nextToken();
3718 break;
3719
3720 case tok::kw_sizeof:
3721 case tok::greater:
3722 case tok::greaterequal:
3723 case tok::greatergreater:
3724 case tok::less:
3725 case tok::lessequal:
3726 case tok::lessless:
3727 case tok::equalequal:
3728 case tok::exclaim:
3729 case tok::exclaimequal:
3730 case tok::plus:
3731 case tok::minus:
3732 case tok::star:
3733 case tok::slash:
3734 LambdaNextTimeAllowed = true;
3735 TopLevelParensAllowed = true;
3736 // Just eat them.
3737 nextToken();
3738 break;
3739
3740 case tok::numeric_constant:
3741 case tok::coloncolon:
3742 case tok::kw_true:
3743 case tok::kw_false:
3744 TopLevelParensAllowed = false;
3745 // Just eat them.
3746 nextToken();
3747 break;
3748
3749 case tok::kw_static_cast:
3750 case tok::kw_const_cast:
3751 case tok::kw_reinterpret_cast:
3752 case tok::kw_dynamic_cast:
3753 nextToken();
3754 if (FormatTok->isNot(tok::less))
3755 return;
3756
3757 nextToken();
3758 parseBracedList(/*IsAngleBracket=*/true);
3759 break;
3760
3761 default:
3762 if (!FormatTok->Tok.getIdentifierInfo()) {
3763 // Identifiers are part of the default case, we check for more then
3764 // tok::identifier to handle builtin type traits.
3765 return;
3766 }
3767
3768 // We need to differentiate identifiers for a template deduction guide,
3769 // variables, or function return types (the constraint expression has
3770 // ended before that), and basically all other cases. But it's easier to
3771 // check the other way around.
3772 assert(FormatTok->Previous);
3773 switch (FormatTok->Previous->Tok.getKind()) {
3774 case tok::coloncolon: // Nested identifier.
3775 case tok::ampamp: // Start of a function or variable for the
3776 case tok::pipepipe: // constraint expression. (binary)
3777 case tok::exclaim: // The same as above, but unary.
3778 case tok::kw_requires: // Initial identifier of a requires clause.
3779 case tok::equal: // Initial identifier of a concept declaration.
3780 break;
3781 default:
3782 return;
3783 }
3784
3785 // Read identifier with optional template declaration.
3786 nextToken();
3787 if (FormatTok->is(tok::less)) {
3788 nextToken();
3789 parseBracedList(/*IsAngleBracket=*/true);
3790 }
3791 TopLevelParensAllowed = false;
3792 break;
3793 }
3794 } while (!eof());
3795}
3796
3797bool UnwrappedLineParser::parseEnum() {
3798 const FormatToken &InitialToken = *FormatTok;
3799
3800 // Won't be 'enum' for NS_ENUMs.
3801 if (FormatTok->is(tok::kw_enum))
3802 nextToken();
3803
3804 // In TypeScript, "enum" can also be used as property name, e.g. in interface
3805 // declarations. An "enum" keyword followed by a colon would be a syntax
3806 // error and thus assume it is just an identifier.
3807 if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
3808 return false;
3809
3810 // In protobuf, "enum" can be used as a field name.
3811 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
3812 return false;
3813
3814 if (IsCpp) {
3815 // Eat up enum class ...
3816 if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct))
3817 nextToken();
3818 while (FormatTok->is(tok::l_square))
3819 if (!handleCppAttributes())
3820 return false;
3821 }
3822
3823 while (FormatTok->Tok.getIdentifierInfo() ||
3824 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
3825 tok::greater, tok::comma, tok::question,
3826 tok::l_square)) {
3827 if (Style.isVerilog()) {
3828 FormatTok->setFinalizedType(TT_VerilogDimensionedTypeName);
3829 nextToken();
3830 // In Verilog the base type can have dimensions.
3831 while (FormatTok->is(tok::l_square))
3832 parseSquare();
3833 } else {
3834 nextToken();
3835 }
3836 // We can have macros or attributes in between 'enum' and the enum name.
3837 if (FormatTok->is(tok::l_paren))
3838 parseParens();
3839 if (FormatTok->is(tok::identifier)) {
3840 nextToken();
3841 // If there are two identifiers in a row, this is likely an elaborate
3842 // return type. In Java, this can be "implements", etc.
3843 if (IsCpp && FormatTok->is(tok::identifier))
3844 return false;
3845 }
3846 }
3847
3848 // Just a declaration or something is wrong.
3849 if (FormatTok->isNot(tok::l_brace))
3850 return true;
3851 FormatTok->setFinalizedType(TT_EnumLBrace);
3852 FormatTok->setBlockKind(BK_Block);
3853
3854 if (Style.isJava()) {
3855 // Java enums are different.
3856 parseJavaEnumBody();
3857 return true;
3858 }
3859 if (Style.Language == FormatStyle::LK_Proto) {
3860 parseBlock(/*MustBeDeclaration=*/true);
3861 return true;
3862 }
3863
3864 if (!Style.AllowShortEnumsOnASingleLine &&
3865 ShouldBreakBeforeBrace(Style, InitialToken)) {
3866 addUnwrappedLine();
3867 }
3868 // Parse enum body.
3869 nextToken();
3870 if (!Style.AllowShortEnumsOnASingleLine) {
3871 addUnwrappedLine();
3872 Line->Level += 1;
3873 }
3874 bool HasError = !parseBracedList(/*IsAngleBracket=*/false, /*IsEnum=*/true);
3876 Line->Level -= 1;
3877 if (HasError) {
3878 if (FormatTok->is(tok::semi))
3879 nextToken();
3880 addUnwrappedLine();
3881 }
3882 setPreviousRBraceType(TT_EnumRBrace);
3883 return true;
3884
3885 // There is no addUnwrappedLine() here so that we fall through to parsing a
3886 // structural element afterwards. Thus, in "enum A {} n, m;",
3887 // "} n, m;" will end up in one unwrapped line.
3888}
3889
3890bool UnwrappedLineParser::parseStructLike() {
3891 // parseRecord falls through and does not yet add an unwrapped line as a
3892 // record declaration or definition can start a structural element.
3893 parseRecord();
3894 // This does not apply to Java, JavaScript and C#.
3895 if (Style.isJava() || Style.isJavaScript() || Style.isCSharp()) {
3896 if (FormatTok->is(tok::semi))
3897 nextToken();
3898 addUnwrappedLine();
3899 return true;
3900 }
3901 return false;
3902}
3903
3904namespace {
3905// A class used to set and restore the Token position when peeking
3906// ahead in the token source.
3907class ScopedTokenPosition {
3908 unsigned StoredPosition;
3909 FormatTokenSource *Tokens;
3910
3911public:
3912 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
3913 assert(Tokens && "Tokens expected to not be null");
3914 StoredPosition = Tokens->getPosition();
3915 }
3916
3917 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
3918};
3919} // namespace
3920
3921// Look to see if we have [[ by looking ahead, if
3922// its not then rewind to the original position.
3923bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3924 ScopedTokenPosition AutoPosition(Tokens);
3925 FormatToken *Tok = Tokens->getNextToken();
3926 // We already read the first [ check for the second.
3927 if (Tok->isNot(tok::l_square))
3928 return false;
3929 // Double check that the attribute is just something
3930 // fairly simple.
3931 while (Tok->isNot(tok::eof)) {
3932 if (Tok->is(tok::r_square))
3933 break;
3934 Tok = Tokens->getNextToken();
3935 }
3936 if (Tok->is(tok::eof))
3937 return false;
3938 Tok = Tokens->getNextToken();
3939 if (Tok->isNot(tok::r_square))
3940 return false;
3941 Tok = Tokens->getNextToken();
3942 if (Tok->is(tok::semi))
3943 return false;
3944 return true;
3945}
3946
3947void UnwrappedLineParser::parseJavaEnumBody() {
3948 assert(FormatTok->is(tok::l_brace));
3949 const FormatToken *OpeningBrace = FormatTok;
3950
3951 // Determine whether the enum is simple, i.e. does not have a semicolon or
3952 // constants with class bodies. Simple enums can be formatted like braced
3953 // lists, contracted to a single line, etc.
3954 unsigned StoredPosition = Tokens->getPosition();
3955 bool IsSimple = true;
3956 FormatToken *Tok = Tokens->getNextToken();
3957 while (Tok->isNot(tok::eof)) {
3958 if (Tok->is(tok::r_brace))
3959 break;
3960 if (Tok->isOneOf(tok::l_brace, tok::semi)) {
3961 IsSimple = false;
3962 break;
3963 }
3964 // FIXME: This will also mark enums with braces in the arguments to enum
3965 // constants as "not simple". This is probably fine in practice, though.
3966 Tok = Tokens->getNextToken();
3967 }
3968 FormatTok = Tokens->setPosition(StoredPosition);
3969
3970 if (IsSimple) {
3971 nextToken();
3972 parseBracedList();
3973 addUnwrappedLine();
3974 return;
3975 }
3976
3977 // Parse the body of a more complex enum.
3978 // First add a line for everything up to the "{".
3979 nextToken();
3980 addUnwrappedLine();
3981 ++Line->Level;
3982
3983 // Parse the enum constants.
3984 while (!eof()) {
3985 if (FormatTok->is(tok::l_brace)) {
3986 // Parse the constant's class body.
3987 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3988 /*MunchSemi=*/false);
3989 } else if (FormatTok->is(tok::l_paren)) {
3990 parseParens();
3991 } else if (FormatTok->is(tok::comma)) {
3992 nextToken();
3993 addUnwrappedLine();
3994 } else if (FormatTok->is(tok::semi)) {
3995 nextToken();
3996 addUnwrappedLine();
3997 break;
3998 } else if (FormatTok->is(tok::r_brace)) {
3999 addUnwrappedLine();
4000 break;
4001 } else {
4002 nextToken();
4003 }
4004 }
4005
4006 // Parse the class body after the enum's ";" if any.
4007 parseLevel(OpeningBrace);
4008 nextToken();
4009 --Line->Level;
4010 addUnwrappedLine();
4011}
4012
4013void UnwrappedLineParser::parseRecord(bool ParseAsExpr, bool IsJavaRecord) {
4014 assert(!IsJavaRecord || FormatTok->is(Keywords.kw_record));
4015 const FormatToken &InitialToken = *FormatTok;
4016 nextToken();
4017
4018 FormatToken *ClassName =
4019 IsJavaRecord && FormatTok->is(tok::identifier) ? FormatTok : nullptr;
4020 bool IsDerived = false;
4021 auto IsNonMacroIdentifier = [](const FormatToken *Tok) {
4022 return Tok->is(tok::identifier) && Tok->TokenText != Tok->TokenText.upper();
4023 };
4024 // JavaScript/TypeScript supports anonymous classes like:
4025 // a = class extends foo { }
4026 bool JSPastExtendsOrImplements = false;
4027 // The actual identifier can be a nested name specifier, and in macros
4028 // it is often token-pasted.
4029 // An [[attribute]] can be before the identifier.
4030 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
4031 tok::kw_alignas, tok::l_square) ||
4032 FormatTok->isAttribute() ||
4033 ((Style.isJava() || Style.isJavaScript()) &&
4034 FormatTok->isOneOf(tok::period, tok::comma))) {
4035 if (Style.isJavaScript() &&
4036 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
4037 JSPastExtendsOrImplements = true;
4038 // JavaScript/TypeScript supports inline object types in
4039 // extends/implements positions:
4040 // class Foo implements {bar: number} { }
4041 nextToken();
4042 if (FormatTok->is(tok::l_brace)) {
4043 tryToParseBracedList();
4044 continue;
4045 }
4046 }
4047 if (FormatTok->is(tok::l_square) && handleCppAttributes())
4048 continue;
4049 auto *Previous = FormatTok;
4050 nextToken();
4051 switch (FormatTok->Tok.getKind()) {
4052 case tok::l_paren:
4053 // We can have macros in between 'class' and the class name.
4054 if (IsJavaRecord || !IsNonMacroIdentifier(Previous) ||
4055 // e.g. `struct macro(a) S { int i; };`
4056 Previous->Previous == &InitialToken) {
4057 parseParens();
4058 }
4059 break;
4060 case tok::coloncolon:
4061 case tok::hashhash:
4062 break;
4063 default:
4064 if (JSPastExtendsOrImplements || ClassName ||
4065 Previous->isNot(tok::identifier) || Previous->is(TT_AttributeMacro)) {
4066 break;
4067 }
4068 if (const auto Text = Previous->TokenText;
4069 Text.size() == 1 || Text != Text.upper()) {
4070 ClassName = Previous;
4071 }
4072 }
4073 }
4074
4075 auto IsListInitialization = [&] {
4076 if (!ClassName || IsDerived || JSPastExtendsOrImplements)
4077 return false;
4078 assert(FormatTok->is(tok::l_brace));
4079 const auto *Prev = FormatTok->getPreviousNonComment();
4080 assert(Prev);
4081 return Prev != ClassName && Prev->is(tok::identifier) &&
4082 Prev->isNot(Keywords.kw_final) && tryToParseBracedList();
4083 };
4084
4085 if (FormatTok->isOneOf(tok::colon, tok::less)) {
4086 int AngleNestingLevel = 0;
4087 do {
4088 if (FormatTok->is(tok::less))
4089 ++AngleNestingLevel;
4090 else if (FormatTok->is(tok::greater))
4091 --AngleNestingLevel;
4092
4093 if (AngleNestingLevel == 0) {
4094 if (FormatTok->is(tok::colon)) {
4095 IsDerived = true;
4096 } else if (!IsDerived && FormatTok->is(tok::identifier) &&
4097 FormatTok->Previous->is(tok::coloncolon)) {
4098 ClassName = FormatTok;
4099 } else if (FormatTok->is(tok::l_paren) &&
4100 IsNonMacroIdentifier(FormatTok->Previous)) {
4101 break;
4102 }
4103 }
4104 if (FormatTok->is(tok::l_brace)) {
4105 if (AngleNestingLevel == 0 && IsListInitialization())
4106 return;
4107 calculateBraceTypes(/*ExpectClassBody=*/true);
4108 if (!tryToParseBracedList())
4109 break;
4110 }
4111 if (FormatTok->is(tok::l_square)) {
4112 FormatToken *Previous = FormatTok->Previous;
4113 if (!Previous || (Previous->isNot(tok::r_paren) &&
4114 !Previous->isTypeOrIdentifier(LangOpts))) {
4115 // Don't try parsing a lambda if we had a closing parenthesis before,
4116 // it was probably a pointer to an array: int (*)[].
4117 if (!tryToParseLambda())
4118 continue;
4119 } else {
4120 parseSquare();
4121 continue;
4122 }
4123 }
4124 if (FormatTok->is(tok::semi))
4125 return;
4126 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
4127 addUnwrappedLine();
4128 nextToken();
4129 parseCSharpGenericTypeConstraint();
4130 break;
4131 }
4132 nextToken();
4133 } while (!eof());
4134 }
4135
4136 auto GetBraceTypes =
4137 [](const FormatToken &RecordTok) -> std::pair<TokenType, TokenType> {
4138 switch (RecordTok.Tok.getKind()) {
4139 case tok::kw_class:
4140 return {TT_ClassLBrace, TT_ClassRBrace};
4141 case tok::kw_struct:
4142 return {TT_StructLBrace, TT_StructRBrace};
4143 case tok::kw_union:
4144 return {TT_UnionLBrace, TT_UnionRBrace};
4145 default:
4146 // Useful for e.g. interface.
4147 return {TT_RecordLBrace, TT_RecordRBrace};
4148 }
4149 };
4150 if (FormatTok->is(tok::l_brace)) {
4151 if (IsListInitialization())
4152 return;
4153 if (ClassName)
4154 ClassName->setFinalizedType(TT_ClassHeadName);
4155 auto [OpenBraceType, ClosingBraceType] = GetBraceTypes(InitialToken);
4156 FormatTok->setFinalizedType(OpenBraceType);
4157 if (ParseAsExpr) {
4158 parseChildBlock();
4159 } else {
4160 if (ShouldBreakBeforeBrace(Style, InitialToken))
4161 addUnwrappedLine();
4162
4163 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
4164 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
4165 }
4166 setPreviousRBraceType(ClosingBraceType);
4167 }
4168 // There is no addUnwrappedLine() here so that we fall through to parsing a
4169 // structural element afterwards. Thus, in "class A {} n, m;",
4170 // "} n, m;" will end up in one unwrapped line.
4171}
4172
4173void UnwrappedLineParser::parseObjCMethod() {
4174 assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
4175 "'(' or identifier expected.");
4176 do {
4177 if (FormatTok->is(tok::semi)) {
4178 nextToken();
4179 addUnwrappedLine();
4180 return;
4181 } else if (FormatTok->is(tok::l_brace)) {
4182 if (Style.BraceWrapping.AfterFunction)
4183 addUnwrappedLine();
4184 parseBlock();
4185 addUnwrappedLine();
4186 return;
4187 } else {
4188 nextToken();
4189 }
4190 } while (!eof());
4191}
4192
4193void UnwrappedLineParser::parseObjCProtocolList() {
4194 assert(FormatTok->is(tok::less) && "'<' expected.");
4195 do {
4196 nextToken();
4197 // Early exit in case someone forgot a close angle.
4198 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::objc_end))
4199 return;
4200 } while (!eof() && FormatTok->isNot(tok::greater));
4201 nextToken(); // Skip '>'.
4202}
4203
4204void UnwrappedLineParser::parseObjCUntilAtEnd() {
4205 do {
4206 if (FormatTok->is(tok::objc_end)) {
4207 nextToken();
4208 addUnwrappedLine();
4209 break;
4210 }
4211 if (FormatTok->is(tok::l_brace)) {
4212 parseBlock();
4213 // In ObjC interfaces, nothing should be following the "}".
4214 addUnwrappedLine();
4215 } else if (FormatTok->is(tok::r_brace)) {
4216 // Ignore stray "}". parseStructuralElement doesn't consume them.
4217 nextToken();
4218 addUnwrappedLine();
4219 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
4220 nextToken();
4221 parseObjCMethod();
4222 } else {
4223 parseStructuralElement();
4224 }
4225 } while (!eof());
4226}
4227
4228void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
4229 assert(FormatTok->isOneOf(tok::objc_interface, tok::objc_implementation));
4230 nextToken();
4231 nextToken(); // interface name
4232
4233 // @interface can be followed by a lightweight generic
4234 // specialization list, then either a base class or a category.
4235 if (FormatTok->is(tok::less))
4236 parseObjCLightweightGenerics();
4237 if (FormatTok->is(tok::colon)) {
4238 nextToken();
4239 nextToken(); // base class name
4240 // The base class can also have lightweight generics applied to it.
4241 if (FormatTok->is(tok::less))
4242 parseObjCLightweightGenerics();
4243 } else if (FormatTok->is(tok::l_paren)) {
4244 // Skip category, if present.
4245 parseParens();
4246 }
4247
4248 if (FormatTok->is(tok::less))
4249 parseObjCProtocolList();
4250
4251 if (FormatTok->is(tok::l_brace)) {
4253 addUnwrappedLine();
4254 parseBlock(/*MustBeDeclaration=*/true);
4255 }
4256
4257 // With instance variables, this puts '}' on its own line. Without instance
4258 // variables, this ends the @interface line.
4259 addUnwrappedLine();
4260
4261 parseObjCUntilAtEnd();
4262}
4263
4264void UnwrappedLineParser::parseObjCLightweightGenerics() {
4265 assert(FormatTok->is(tok::less));
4266 // Unlike protocol lists, generic parameterizations support
4267 // nested angles:
4268 //
4269 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
4270 // NSObject <NSCopying, NSSecureCoding>
4271 //
4272 // so we need to count how many open angles we have left.
4273 unsigned NumOpenAngles = 1;
4274 do {
4275 nextToken();
4276 // Early exit in case someone forgot a close angle.
4277 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::objc_end))
4278 break;
4279 if (FormatTok->is(tok::less)) {
4280 ++NumOpenAngles;
4281 } else if (FormatTok->is(tok::greater)) {
4282 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
4283 --NumOpenAngles;
4284 }
4285 } while (!eof() && NumOpenAngles != 0);
4286 nextToken(); // Skip '>'.
4287}
4288
4289// Returns true for the declaration/definition form of @protocol,
4290// false for the expression form.
4291bool UnwrappedLineParser::parseObjCProtocol() {
4292 assert(FormatTok->is(tok::objc_protocol));
4293 nextToken();
4294
4295 if (FormatTok->is(tok::l_paren)) {
4296 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
4297 return false;
4298 }
4299
4300 // The definition/declaration form,
4301 // @protocol Foo
4302 // - (int)someMethod;
4303 // @end
4304
4305 nextToken(); // protocol name
4306
4307 if (FormatTok->is(tok::less))
4308 parseObjCProtocolList();
4309
4310 // Check for protocol declaration.
4311 if (FormatTok->is(tok::semi)) {
4312 nextToken();
4313 addUnwrappedLine();
4314 return true;
4315 }
4316
4317 addUnwrappedLine();
4318 parseObjCUntilAtEnd();
4319 return true;
4320}
4321
4322void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
4323 bool IsImport = FormatTok->is(Keywords.kw_import);
4324 assert(IsImport || FormatTok->is(tok::kw_export));
4325 nextToken();
4326
4327 // Consume the "default" in "export default class/function".
4328 if (FormatTok->is(tok::kw_default))
4329 nextToken();
4330
4331 // Consume "async function", "function" and "default function", so that these
4332 // get parsed as free-standing JS functions, i.e. do not require a trailing
4333 // semicolon.
4334 if (FormatTok->is(Keywords.kw_async))
4335 nextToken();
4336 if (FormatTok->is(Keywords.kw_function)) {
4337 nextToken();
4338 return;
4339 }
4340
4341 // For imports, `export *`, `export {...}`, consume the rest of the line up
4342 // to the terminating `;`. For everything else, just return and continue
4343 // parsing the structural element, i.e. the declaration or expression for
4344 // `export default`.
4345 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
4346 !FormatTok->isStringLiteral() &&
4347 !(FormatTok->is(Keywords.kw_type) &&
4348 Tokens->peekNextToken()->isOneOf(tok::l_brace, tok::star))) {
4349 return;
4350 }
4351
4352 while (!eof()) {
4353 if (FormatTok->is(tok::semi))
4354 return;
4355 if (Line->Tokens.empty()) {
4356 // Common issue: Automatic Semicolon Insertion wrapped the line, so the
4357 // import statement should terminate.
4358 return;
4359 }
4360 if (FormatTok->is(tok::l_brace)) {
4361 FormatTok->setBlockKind(BK_Block);
4362 nextToken();
4363 parseBracedList();
4364 } else {
4365 nextToken();
4366 }
4367 }
4368}
4369
4370void UnwrappedLineParser::parseStatementMacro() {
4371 nextToken();
4372 if (FormatTok->is(tok::l_paren))
4373 parseParens();
4374 if (FormatTok->is(tok::semi))
4375 nextToken();
4376 addUnwrappedLine();
4377}
4378
4379void UnwrappedLineParser::parseVerilogHierarchyIdentifier() {
4380 // consume things like a::`b.c[d:e] or a::*
4381 while (true) {
4382 if (FormatTok->isOneOf(tok::star, tok::period, tok::periodstar,
4383 tok::coloncolon, tok::hash) ||
4384 Keywords.isVerilogIdentifier(*FormatTok)) {
4385 nextToken();
4386 } else if (FormatTok->is(tok::l_square)) {
4387 parseSquare();
4388 } else {
4389 break;
4390 }
4391 }
4392}
4393
4394void UnwrappedLineParser::parseVerilogSensitivityList() {
4395 if (FormatTok->isNot(tok::at))
4396 return;
4397 nextToken();
4398 // A block event expression has 2 at signs.
4399 if (FormatTok->is(tok::at))
4400 nextToken();
4401 switch (FormatTok->Tok.getKind()) {
4402 case tok::star:
4403 nextToken();
4404 break;
4405 case tok::l_paren:
4406 parseParens();
4407 break;
4408 default:
4409 parseVerilogHierarchyIdentifier();
4410 break;
4411 }
4412}
4413
4414unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() {
4415 unsigned AddLevels = 0;
4416
4417 if (FormatTok->is(Keywords.kw_clocking)) {
4418 nextToken();
4419 if (Keywords.isVerilogIdentifier(*FormatTok))
4420 nextToken();
4421 parseVerilogSensitivityList();
4422 if (FormatTok->is(tok::semi))
4423 nextToken();
4424 } else if (FormatTok->isOneOf(tok::kw_case, Keywords.kw_casex,
4425 Keywords.kw_casez, Keywords.kw_randcase,
4426 Keywords.kw_randsequence)) {
4427 if (Style.IndentCaseLabels)
4428 AddLevels++;
4429 nextToken();
4430 if (FormatTok->is(tok::l_paren)) {
4431 FormatTok->setFinalizedType(TT_ConditionLParen);
4432 parseParens();
4433 }
4434 if (FormatTok->isOneOf(Keywords.kw_inside, Keywords.kw_matches))
4435 nextToken();
4436 // The case header has no semicolon.
4437 } else {
4438 // "module" etc.
4439 nextToken();
4440 // all the words like the name of the module and specifiers like
4441 // "automatic" and the width of function return type
4442 while (true) {
4443 if (FormatTok->is(tok::l_square)) {
4444 auto Prev = FormatTok->getPreviousNonComment();
4445 if (Prev && Keywords.isVerilogIdentifier(*Prev))
4446 Prev->setFinalizedType(TT_VerilogDimensionedTypeName);
4447 parseSquare();
4448 } else if (Keywords.isVerilogIdentifier(*FormatTok) ||
4449 FormatTok->isOneOf(tok::hash, tok::hashhash, tok::coloncolon,
4450 Keywords.kw_automatic, tok::kw_static)) {
4451 nextToken();
4452 } else {
4453 break;
4454 }
4455 }
4456
4457 auto NewLine = [this]() {
4458 addUnwrappedLine();
4459 Line->IsContinuation = true;
4460 };
4461
4462 // package imports
4463 while (FormatTok->is(Keywords.kw_import)) {
4464 NewLine();
4465 nextToken();
4466 parseVerilogHierarchyIdentifier();
4467 if (FormatTok->is(tok::semi))
4468 nextToken();
4469 }
4470
4471 // parameters and ports
4472 if (FormatTok->is(Keywords.kw_verilogHash)) {
4473 NewLine();
4474 nextToken();
4475 if (FormatTok->is(tok::l_paren)) {
4476 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4477 parseParens();
4478 }
4479 }
4480 if (FormatTok->is(tok::l_paren)) {
4481 NewLine();
4482 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4483 parseParens();
4484 }
4485
4486 // extends and implements
4487 if (FormatTok->is(Keywords.kw_extends)) {
4488 NewLine();
4489 nextToken();
4490 parseVerilogHierarchyIdentifier();
4491 if (FormatTok->is(tok::l_paren))
4492 parseParens();
4493 }
4494 if (FormatTok->is(Keywords.kw_implements)) {
4495 NewLine();
4496 do {
4497 nextToken();
4498 parseVerilogHierarchyIdentifier();
4499 } while (FormatTok->is(tok::comma));
4500 }
4501
4502 // Coverage event for cover groups.
4503 if (FormatTok->is(tok::at)) {
4504 NewLine();
4505 parseVerilogSensitivityList();
4506 }
4507
4508 if (FormatTok->is(tok::semi))
4509 nextToken(/*LevelDifference=*/1);
4510 addUnwrappedLine();
4511 }
4512
4513 return AddLevels;
4514}
4515
4516void UnwrappedLineParser::parseVerilogTable() {
4517 assert(FormatTok->is(Keywords.kw_table));
4518 nextToken(/*LevelDifference=*/1);
4519 addUnwrappedLine();
4520
4521 auto InitialLevel = Line->Level++;
4522 while (!eof() && !Keywords.isVerilogEnd(*FormatTok)) {
4523 FormatToken *Tok = FormatTok;
4524 nextToken();
4525 if (Tok->is(tok::semi))
4526 addUnwrappedLine();
4527 else if (Tok->isOneOf(tok::star, tok::colon, tok::question, tok::minus))
4528 Tok->setFinalizedType(TT_VerilogTableItem);
4529 }
4530 Line->Level = InitialLevel;
4531 nextToken(/*LevelDifference=*/-1);
4532 addUnwrappedLine();
4533}
4534
4535void UnwrappedLineParser::parseVerilogCaseLabel() {
4536 // The label will get unindented in AnnotatingParser. If there are no leading
4537 // spaces, indent the rest here so that things inside the block will be
4538 // indented relative to things outside. We don't use parseLabel because we
4539 // don't know whether this colon is a label or a ternary expression at this
4540 // point.
4541 auto OrigLevel = Line->Level;
4542 auto FirstLine = CurrentLines->size();
4543 if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1))
4544 ++Line->Level;
4545 else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(*FormatTok))
4546 --Line->Level;
4547 parseStructuralElement();
4548 // Restore the indentation in both the new line and the line that has the
4549 // label.
4550 if (CurrentLines->size() > FirstLine)
4551 (*CurrentLines)[FirstLine].Level = OrigLevel;
4552 Line->Level = OrigLevel;
4553}
4554
4555bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const {
4556 for (const auto &N : Line.Tokens) {
4557 if (N.Tok->MacroCtx)
4558 return true;
4559 for (const UnwrappedLine &Child : N.Children)
4560 if (containsExpansion(Child))
4561 return true;
4562 }
4563 return false;
4564}
4565
4566void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
4567 if (Line->Tokens.empty())
4568 return;
4569 LLVM_DEBUG({
4570 if (!parsingPPDirective()) {
4571 llvm::dbgs() << "Adding unwrapped line:\n";
4572 printDebugInfo(*Line);
4573 }
4574 });
4575
4576 // If this line closes a block when in Whitesmiths mode, remember that
4577 // information so that the level can be decreased after the line is added.
4578 // This has to happen after the addition of the line since the line itself
4579 // needs to be indented.
4580 bool ClosesWhitesmithsBlock =
4581 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
4583
4584 // If the current line was expanded from a macro call, we use it to
4585 // reconstruct an unwrapped line from the structure of the expanded unwrapped
4586 // line and the unexpanded token stream.
4587 if (!parsingPPDirective() && !InExpansion && containsExpansion(*Line)) {
4588 if (!Reconstruct)
4589 Reconstruct.emplace(Line->Level, Unexpanded);
4590 Reconstruct->addLine(*Line);
4591
4592 // While the reconstructed unexpanded lines are stored in the normal
4593 // flow of lines, the expanded lines are stored on the side to be analyzed
4594 // in an extra step.
4595 CurrentExpandedLines.push_back(std::move(*Line));
4596
4597 if (Reconstruct->finished()) {
4598 UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult();
4599 assert(!Reconstructed.Tokens.empty() &&
4600 "Reconstructed must at least contain the macro identifier.");
4601 assert(!parsingPPDirective());
4602 LLVM_DEBUG({
4603 llvm::dbgs() << "Adding unexpanded line:\n";
4604 printDebugInfo(Reconstructed);
4605 });
4606 ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines;
4607 Lines.push_back(std::move(Reconstructed));
4608 CurrentExpandedLines.clear();
4609 Reconstruct.reset();
4610 }
4611 } else {
4612 // At the top level we only get here when no unexpansion is going on, or
4613 // when conditional formatting led to unfinished macro reconstructions.
4614 assert(!Reconstruct || (CurrentLines != &Lines) || !PPStack.empty());
4615 CurrentLines->push_back(std::move(*Line));
4616 }
4617 Line->Tokens.clear();
4618 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
4619 Line->FirstStartColumn = 0;
4620 Line->IsContinuation = false;
4621 Line->SeenDecltypeAuto = false;
4622
4623 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
4624 --Line->Level;
4625 if (!parsingPPDirective() && !PreprocessorDirectives.empty()) {
4626 CurrentLines->append(
4627 std::make_move_iterator(PreprocessorDirectives.begin()),
4628 std::make_move_iterator(PreprocessorDirectives.end()));
4629 PreprocessorDirectives.clear();
4630 }
4631 // Disconnect the current token from the last token on the previous line.
4632 FormatTok->Previous = nullptr;
4633}
4634
4635bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); }
4636
4637bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
4638 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
4639 FormatTok.NewlinesBefore > 0;
4640}
4641
4642// Checks if \p FormatTok is a line comment that continues the line comment
4643// section on \p Line.
4644static bool
4646 const UnwrappedLine &Line, const FormatStyle &Style,
4647 const llvm::Regex &CommentPragmasRegex) {
4648 if (Line.Tokens.empty() || Style.ReflowComments != FormatStyle::RCS_Always)
4649 return false;
4650
4651 StringRef IndentContent = FormatTok.TokenText;
4652 if (FormatTok.TokenText.starts_with("//") ||
4653 FormatTok.TokenText.starts_with("/*")) {
4654 IndentContent = FormatTok.TokenText.substr(2);
4655 }
4656 if (CommentPragmasRegex.match(IndentContent))
4657 return false;
4658
4659 // If Line starts with a line comment, then FormatTok continues the comment
4660 // section if its original column is greater or equal to the original start
4661 // column of the line.
4662 //
4663 // Define the min column token of a line as follows: if a line ends in '{' or
4664 // contains a '{' followed by a line comment, then the min column token is
4665 // that '{'. Otherwise, the min column token of the line is the first token of
4666 // the line.
4667 //
4668 // If Line starts with a token other than a line comment, then FormatTok
4669 // continues the comment section if its original column is greater than the
4670 // original start column of the min column token of the line.
4671 //
4672 // For example, the second line comment continues the first in these cases:
4673 //
4674 // // first line
4675 // // second line
4676 //
4677 // and:
4678 //
4679 // // first line
4680 // // second line
4681 //
4682 // and:
4683 //
4684 // int i; // first line
4685 // // second line
4686 //
4687 // and:
4688 //
4689 // do { // first line
4690 // // second line
4691 // int i;
4692 // } while (true);
4693 //
4694 // and:
4695 //
4696 // enum {
4697 // a, // first line
4698 // // second line
4699 // b
4700 // };
4701 //
4702 // The second line comment doesn't continue the first in these cases:
4703 //
4704 // // first line
4705 // // second line
4706 //
4707 // and:
4708 //
4709 // int i; // first line
4710 // // second line
4711 //
4712 // and:
4713 //
4714 // do { // first line
4715 // // second line
4716 // int i;
4717 // } while (true);
4718 //
4719 // and:
4720 //
4721 // enum {
4722 // a, // first line
4723 // // second line
4724 // };
4725 const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
4726
4727 // Scan for '{//'. If found, use the column of '{' as a min column for line
4728 // comment section continuation.
4729 const FormatToken *PreviousToken = nullptr;
4730 for (const UnwrappedLineNode &Node : Line.Tokens) {
4731 if (PreviousToken && PreviousToken->is(tok::l_brace) &&
4732 isLineComment(*Node.Tok)) {
4733 MinColumnToken = PreviousToken;
4734 break;
4735 }
4736 PreviousToken = Node.Tok;
4737
4738 // Grab the last newline preceding a token in this unwrapped line.
4739 if (Node.Tok->NewlinesBefore > 0)
4740 MinColumnToken = Node.Tok;
4741 }
4742 if (PreviousToken && PreviousToken->is(tok::l_brace))
4743 MinColumnToken = PreviousToken;
4744
4745 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
4746 MinColumnToken);
4747}
4748
4749void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
4750 bool JustComments = Line->Tokens.empty();
4751 for (FormatToken *Tok : CommentsBeforeNextToken) {
4752 // Line comments that belong to the same line comment section are put on the
4753 // same line since later we might want to reflow content between them.
4754 // Additional fine-grained breaking of line comment sections is controlled
4755 // by the class BreakableLineCommentSection in case it is desirable to keep
4756 // several line comment sections in the same unwrapped line.
4757 //
4758 // FIXME: Consider putting separate line comment sections as children to the
4759 // unwrapped line instead.
4760 Tok->ContinuesLineCommentSection =
4761 continuesLineCommentSection(*Tok, *Line, Style, CommentPragmasRegex);
4762 if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
4763 addUnwrappedLine();
4764 pushToken(Tok);
4765 }
4766 if (NewlineBeforeNext && JustComments)
4767 addUnwrappedLine();
4768 CommentsBeforeNextToken.clear();
4769}
4770
4771void UnwrappedLineParser::nextToken(int LevelDifference) {
4772 if (eof())
4773 return;
4774 flushComments(isOnNewLine(*FormatTok));
4775 pushToken(FormatTok);
4776 FormatToken *Previous = FormatTok;
4777 if (!Style.isJavaScript())
4778 readToken(LevelDifference);
4779 else
4780 readTokenWithJavaScriptASI();
4781 FormatTok->Previous = Previous;
4782 if (Style.isVerilog()) {
4783 // Blocks in Verilog can have `begin` and `end` instead of braces. For
4784 // keywords like `begin`, we can't treat them the same as left braces
4785 // because some contexts require one of them. For example structs use
4786 // braces and if blocks use keywords, and a left brace can occur in an if
4787 // statement, but it is not a block. For keywords like `end`, we simply
4788 // treat them the same as right braces.
4789 if (Keywords.isVerilogEnd(*FormatTok))
4790 FormatTok->Tok.setKind(tok::r_brace);
4791 }
4792}
4793
4794void UnwrappedLineParser::distributeComments(
4795 const ArrayRef<FormatToken *> &Comments, const FormatToken *NextTok) {
4796 // Whether or not a line comment token continues a line is controlled by
4797 // the method continuesLineCommentSection, with the following caveat:
4798 //
4799 // Define a trail of Comments to be a nonempty proper postfix of Comments such
4800 // that each comment line from the trail is aligned with the next token, if
4801 // the next token exists. If a trail exists, the beginning of the maximal
4802 // trail is marked as a start of a new comment section.
4803 //
4804 // For example in this code:
4805 //
4806 // int a; // line about a
4807 // // line 1 about b
4808 // // line 2 about b
4809 // int b;
4810 //
4811 // the two lines about b form a maximal trail, so there are two sections, the
4812 // first one consisting of the single comment "// line about a" and the
4813 // second one consisting of the next two comments.
4814 if (Comments.empty())
4815 return;
4816 bool ShouldPushCommentsInCurrentLine = true;
4817 bool HasTrailAlignedWithNextToken = false;
4818 unsigned StartOfTrailAlignedWithNextToken = 0;
4819 if (NextTok) {
4820 // We are skipping the first element intentionally.
4821 for (unsigned i = Comments.size() - 1; i > 0; --i) {
4822 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
4823 HasTrailAlignedWithNextToken = true;
4824 StartOfTrailAlignedWithNextToken = i;
4825 }
4826 }
4827 }
4828 for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
4829 FormatToken *FormatTok = Comments[i];
4830 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
4831 FormatTok->ContinuesLineCommentSection = false;
4832 } else {
4833 FormatTok->ContinuesLineCommentSection = continuesLineCommentSection(
4834 *FormatTok, *Line, Style, CommentPragmasRegex);
4835 }
4836 if (!FormatTok->ContinuesLineCommentSection &&
4837 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
4838 ShouldPushCommentsInCurrentLine = false;
4839 }
4840 if (ShouldPushCommentsInCurrentLine)
4841 pushToken(FormatTok);
4842 else
4843 CommentsBeforeNextToken.push_back(FormatTok);
4844 }
4845}
4846
4847void UnwrappedLineParser::readToken(int LevelDifference) {
4848 SmallVector<FormatToken *, 1> Comments;
4849 bool PreviousWasComment = false;
4850 bool FirstNonCommentOnLine = false;
4851 do {
4852 FormatTok = Tokens->getNextToken();
4853 assert(FormatTok);
4854 while (FormatTok->isOneOf(TT_ConflictStart, TT_ConflictEnd,
4855 TT_ConflictAlternative)) {
4856 if (FormatTok->is(TT_ConflictStart))
4857 conditionalCompilationStart(/*Unreachable=*/false);
4858 else if (FormatTok->is(TT_ConflictAlternative))
4859 conditionalCompilationAlternative();
4860 else if (FormatTok->is(TT_ConflictEnd))
4861 conditionalCompilationEnd();
4862 FormatTok = Tokens->getNextToken();
4863 FormatTok->MustBreakBefore = true;
4864 FormatTok->MustBreakBeforeFinalized = true;
4865 }
4866
4867 auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
4868 const FormatToken &Tok,
4869 bool PreviousWasComment) {
4870 auto IsFirstOnLine = [](const FormatToken &Tok) {
4871 return Tok.HasUnescapedNewline || Tok.IsFirst;
4872 };
4873
4874 // Consider preprocessor directives preceded by block comments as first
4875 // on line.
4876 if (PreviousWasComment)
4877 return FirstNonCommentOnLine || IsFirstOnLine(Tok);
4878 return IsFirstOnLine(Tok);
4879 };
4880
4881 FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4882 FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4883 PreviousWasComment = FormatTok->is(tok::comment);
4884
4885 while (!Line->InPPDirective && FormatTok->is(tok::hash) &&
4886 FirstNonCommentOnLine) {
4887 // In Verilog, the backtick is used for macro invocations. In TableGen,
4888 // the single hash is used for the paste operator.
4889 const auto *Next = Tokens->peekNextToken();
4890 if ((Style.isVerilog() && !Keywords.isVerilogPPDirective(*Next)) ||
4891 (Style.isTableGen() &&
4892 !Next->isOneOf(tok::kw_else, tok::pp_define, tok::pp_ifdef,
4893 tok::pp_ifndef, tok::pp_endif))) {
4894 break;
4895 }
4896 distributeComments(Comments, FormatTok);
4897 Comments.clear();
4898 // If there is an unfinished unwrapped line, we flush the preprocessor
4899 // directives only after that unwrapped line was finished later.
4900 bool SwitchToPreprocessorLines = !Line->Tokens.empty();
4901 ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
4902 assert((LevelDifference >= 0 ||
4903 static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
4904 "LevelDifference makes Line->Level negative");
4905 Line->Level += LevelDifference;
4906 // Comments stored before the preprocessor directive need to be output
4907 // before the preprocessor directive, at the same level as the
4908 // preprocessor directive, as we consider them to apply to the directive.
4910 PPBranchLevel > 0) {
4911 Line->Level += PPBranchLevel;
4912 }
4913 assert(Line->Level >= Line->UnbracedBodyLevel);
4914 Line->Level -= Line->UnbracedBodyLevel;
4915 flushComments(isOnNewLine(*FormatTok));
4916 parsePPDirective();
4917 PreviousWasComment = FormatTok->is(tok::comment);
4918 FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4919 FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4920 }
4921
4922 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
4923 !Line->InPPDirective) {
4924 continue;
4925 }
4926
4927 if (FormatTok->is(tok::identifier) &&
4928 Macros.defined(FormatTok->TokenText) &&
4929 // FIXME: Allow expanding macros in preprocessor directives.
4930 !Line->InPPDirective) {
4931 FormatToken *ID = FormatTok;
4932 unsigned Position = Tokens->getPosition();
4933
4934 // To correctly parse the code, we need to replace the tokens of the macro
4935 // call with its expansion.
4936 auto PreCall = std::move(Line);
4937 Line.reset(new UnwrappedLine);
4938 bool OldInExpansion = InExpansion;
4939 InExpansion = true;
4940 // We parse the macro call into a new line.
4941 auto Args = parseMacroCall();
4942 InExpansion = OldInExpansion;
4943 assert(Line->Tokens.front().Tok == ID);
4944 // And remember the unexpanded macro call tokens.
4945 auto UnexpandedLine = std::move(Line);
4946 // Reset to the old line.
4947 Line = std::move(PreCall);
4948
4949 LLVM_DEBUG({
4950 llvm::dbgs() << "Macro call: " << ID->TokenText << "(";
4951 if (Args) {
4952 llvm::dbgs() << "(";
4953 for (const auto &Arg : Args.value())
4954 for (const auto &T : Arg)
4955 llvm::dbgs() << T->TokenText << " ";
4956 llvm::dbgs() << ")";
4957 }
4958 llvm::dbgs() << "\n";
4959 });
4960 if (Macros.objectLike(ID->TokenText) && Args &&
4961 !Macros.hasArity(ID->TokenText, Args->size())) {
4962 // The macro is either
4963 // - object-like, but we got argumnets, or
4964 // - overloaded to be both object-like and function-like, but none of
4965 // the function-like arities match the number of arguments.
4966 // Thus, expand as object-like macro.
4967 LLVM_DEBUG(llvm::dbgs()
4968 << "Macro \"" << ID->TokenText
4969 << "\" not overloaded for arity " << Args->size()
4970 << "or not function-like, using object-like overload.");
4971 Args.reset();
4972 UnexpandedLine->Tokens.resize(1);
4973 Tokens->setPosition(Position);
4974 nextToken();
4975 assert(!Args && Macros.objectLike(ID->TokenText));
4976 }
4977 if ((!Args && Macros.objectLike(ID->TokenText)) ||
4978 (Args && Macros.hasArity(ID->TokenText, Args->size()))) {
4979 // Next, we insert the expanded tokens in the token stream at the
4980 // current position, and continue parsing.
4981 Unexpanded[ID] = std::move(UnexpandedLine);
4982 SmallVector<FormatToken *, 8> Expansion =
4983 Macros.expand(ID, std::move(Args));
4984 if (!Expansion.empty())
4985 FormatTok = Tokens->insertTokens(Expansion);
4986
4987 LLVM_DEBUG({
4988 llvm::dbgs() << "Expanded: ";
4989 for (const auto &T : Expansion)
4990 llvm::dbgs() << T->TokenText << " ";
4991 llvm::dbgs() << "\n";
4992 });
4993 } else {
4994 LLVM_DEBUG({
4995 llvm::dbgs() << "Did not expand macro \"" << ID->TokenText
4996 << "\", because it was used ";
4997 if (Args)
4998 llvm::dbgs() << "with " << Args->size();
4999 else
5000 llvm::dbgs() << "without";
5001 llvm::dbgs() << " arguments, which doesn't match any definition.\n";
5002 });
5003 Tokens->setPosition(Position);
5004 FormatTok = ID;
5005 }
5006 }
5007
5008 if (FormatTok->isNot(tok::comment)) {
5009 distributeComments(Comments, FormatTok);
5010 Comments.clear();
5011 return;
5012 }
5013
5014 Comments.push_back(FormatTok);
5015 } while (!eof());
5016
5017 distributeComments(Comments, nullptr);
5018 Comments.clear();
5019}
5020
5021namespace {
5022template <typename Iterator>
5023void pushTokens(Iterator Begin, Iterator End,
5024 SmallVectorImpl<FormatToken *> &Into) {
5025 for (auto I = Begin; I != End; ++I) {
5026 Into.push_back(I->Tok);
5027 for (const auto &Child : I->Children)
5028 pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into);
5029 }
5030}
5031} // namespace
5032
5033std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>>
5034UnwrappedLineParser::parseMacroCall() {
5035 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args;
5036 assert(Line->Tokens.empty());
5037 nextToken();
5038 if (FormatTok->isNot(tok::l_paren))
5039 return Args;
5040 unsigned Position = Tokens->getPosition();
5041 FormatToken *Tok = FormatTok;
5042 nextToken();
5043 Args.emplace();
5044 auto ArgStart = std::prev(Line->Tokens.end());
5045
5046 int Parens = 0;
5047 do {
5048 switch (FormatTok->Tok.getKind()) {
5049 case tok::l_paren:
5050 ++Parens;
5051 nextToken();
5052 break;
5053 case tok::r_paren: {
5054 if (Parens > 0) {
5055 --Parens;
5056 nextToken();
5057 break;
5058 }
5059 Args->push_back({});
5060 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
5061 nextToken();
5062 return Args;
5063 }
5064 case tok::comma: {
5065 if (Parens > 0) {
5066 nextToken();
5067 break;
5068 }
5069 Args->push_back({});
5070 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
5071 nextToken();
5072 ArgStart = std::prev(Line->Tokens.end());
5073 break;
5074 }
5075 default:
5076 nextToken();
5077 break;
5078 }
5079 } while (!eof());
5080 Line->Tokens.resize(1);
5081 Tokens->setPosition(Position);
5082 FormatTok = Tok;
5083 return {};
5084}
5085
5086void UnwrappedLineParser::pushToken(FormatToken *Tok) {
5087 Line->Tokens.push_back(UnwrappedLineNode(Tok));
5088 if (AtEndOfPPLine) {
5089 auto &Tok = *Line->Tokens.back().Tok;
5090 Tok.MustBreakBefore = true;
5091 Tok.MustBreakBeforeFinalized = true;
5092 Tok.FirstAfterPPLine = true;
5093 AtEndOfPPLine = false;
5094 }
5095}
5096
5097} // end namespace format
5098} // end namespace clang
DynTypedNode Node
static char ID
Definition: Arena.cpp:183
enum clang::sema::@1840::IndirectLocalPathEntry::EntryKind Kind
Expr * E
This file defines the FormatTokenSource interface, which provides a token stream as well as the abili...
This file contains the declaration of the FormatToken, a wrapper around Token with additional informa...
This file contains the main building blocks of macro support in clang-format.
static bool HasAttribute(const QualType &T)
This file implements a token annotator, i.e.
Defines the clang::TokenKind enum and support functions.
SourceLocation Begin
StateNode * Previous
ContinuationIndenter * Indenter
This file contains the declaration of the UnwrappedLineParser, which turns a stream of tokens into Un...
tok::PPKeywordKind getPPKeywordID() const
Return the preprocessor keyword ID for this identifier.
Implements an efficient mapping from strings to IdentifierInfo nodes.
Parser - This implements a parser for the C family of languages.
Definition: Parser.h:171
This class handles loading and caching of source files into memory.
Token - This structure provides full information about a lexed token.
Definition: Token.h:36
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:189
bool isAnyIdentifier() const
Return true if this is a raw identifier (when lexing in raw mode) or a non-keyword identifier (when l...
Definition: Token.h:112
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Definition: Token.h:118
void setKind(tok::TokenKind K)
Definition: Token.h:98
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
Definition: Lexer.cpp:69
tok::TokenKind getKind() const
Definition: Token.h:97
bool isOneOf(Ts... Ks) const
Definition: Token.h:104
bool isNot(tok::TokenKind K) const
Definition: Token.h:103
void setIdentifierInfo(IdentifierInfo *II)
Definition: Token.h:198
CompoundStatementIndenter(UnwrappedLineParser *Parser, const FormatStyle &Style, unsigned &LineLevel)
CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel, bool WrapBrace, bool IndentBrace)
virtual FormatToken * peekNextToken(bool SkipComment=false)=0
virtual unsigned getPosition()=0
virtual FormatToken * getPreviousToken()=0
virtual FormatToken * setPosition(unsigned Position)=0
virtual FormatToken * getNextToken()=0
bool objectLike(StringRef Name) const
Returns whetherh there is an object-like overload, i.e.
SmallVector< FormatToken *, 8 > expand(FormatToken *ID, std::optional< ArgsList > OptionalArgs) const
Returns the expanded stream of format tokens for ID, where each element in Args is a positional argum...
bool hasArity(StringRef Name, unsigned Arity) const
Returns whether macro Name provides an overload with the given arity.
bool defined(StringRef Name) const
Returns whether any macro Name is defined, regardless of overloads.
ScopedLineState(UnwrappedLineParser &Parser, bool SwitchToPreprocessorLines=false)
Interface for users of the UnwrappedLineParser to receive the parsed lines.
virtual void consumeUnwrappedLine(const UnwrappedLine &Line)=0
UnwrappedLineParser(SourceManager &SourceMgr, const FormatStyle &Style, const AdditionalKeywords &Keywords, unsigned FirstStartColumn, ArrayRef< FormatToken * > Tokens, UnwrappedLineConsumer &Callback, llvm::SpecificBumpPtrAllocator< FormatToken > &Allocator, IdentifierTable &IdentTable)
static void hash_combine(std::size_t &seed, const T &v)
static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
std::ostream & operator<<(std::ostream &Stream, const UnwrappedLine &Line)
bool continuesLineComment(const FormatToken &FormatTok, const FormatToken *Previous, const FormatToken *MinColumnToken)
Definition: FormatToken.h:1946
static bool tokenCanStartNewLine(const FormatToken &Tok)
static bool continuesLineCommentSection(const FormatToken &FormatTok, const UnwrappedLine &Line, const FormatStyle &Style, const llvm::Regex &CommentPragmasRegex)
static bool isC78Type(const FormatToken &Tok)
bool isLineComment(const FormatToken &FormatTok)
Definition: FormatToken.h:1939
static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
static bool ShouldBreakBeforeBrace(const FormatStyle &Style, const FormatToken &InitialToken)
LangOptions getFormattingLangOpts(const FormatStyle &Style=getLLVMStyle())
Returns the LangOpts that the formatter expects you to set.
Definition: Format.cpp:4077
static void markOptionalBraces(FormatToken *LeftBrace)
static bool mustBeJSIdent(const AdditionalKeywords &Keywords, const FormatToken *FormatTok)
static bool isIIFE(const UnwrappedLine &Line, const AdditionalKeywords &Keywords)
static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next, const FormatToken *FuncName)
static bool isGoogScope(const UnwrappedLine &Line)
static FormatToken * getLastNonComment(const UnwrappedLine &Line)
TokenType
Determines the semantic type of a syntactic token, e.g.
Definition: FormatToken.h:215
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:25
bool isLiteral(TokenKind K)
Return true if this is a "literal" kind, like a numeric constant, string, etc.
Definition: TokenKinds.h:97
The JSON file list parser is used to communicate input to InstallAPI.
const FunctionProtoType * T
@ Parens
New-expression has a C++98 paren-delimited initializer.
#define false
Definition: stdbool.h:26
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang's l...
Definition: FormatToken.h:1026
bool isVerilogEnd(const FormatToken &Tok) const
Returns whether Tok is a Verilog keyword that closes a block.
Definition: FormatToken.h:1840
bool isVerilogBegin(const FormatToken &Tok) const
Returns whether Tok is a Verilog keyword that opens a block.
Definition: FormatToken.h:1829
bool isVerilogStructuredProcedure(const FormatToken &Tok) const
Returns whether Tok is a Verilog keyword that starts a structured procedure like 'always'.
Definition: FormatToken.h:1878
bool isVerilogHierarchy(const FormatToken &Tok) const
Returns whether Tok is a Verilog keyword that opens a module, etc.
Definition: FormatToken.h:1852
bool isVerilogPPDirective(const FormatToken &Tok) const
Returns whether Tok is a Verilog preprocessor directive.
Definition: FormatToken.h:1802
IdentifierInfo * kw_internal_ident_after_define
Definition: FormatToken.h:1435
bool isVerilogIdentifier(const FormatToken &Tok) const
Definition: FormatToken.h:1766
bool AfterClass
Wrap class definitions.
Definition: Format.h:1391
bool AfterStruct
Wrap struct definitions.
Definition: Format.h:1458
bool AfterUnion
Wrap union definitions.
Definition: Format.h:1472
bool AfterEnum
Wrap enum definitions.
Definition: Format.h:1406
bool IndentBraces
Indent the wrapped braces themselves.
Definition: Format.h:1549
bool AfterObjCDeclaration
Wrap ObjC definitions (interfaces, implementations...).
Definition: Format.h:1444
bool AfterNamespace
Wrap namespace definitions.
Definition: Format.h:1438
BraceWrappingAfterControlStatementStyle AfterControlStatement
Wrap control statements (if/for/while/switch/..).
Definition: Format.h:1394
bool AfterFunction
Wrap function definitions.
Definition: Format.h:1422
bool AfterExternBlock
Wrap extern blocks.
Definition: Format.h:1486
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:55
bool isTableGen() const
Definition: Format.h:3391
@ LK_Proto
Should be used for Protocol Buffers (https://developers.google.com/protocol-buffers/).
Definition: Format.h:3370
unsigned IndentWidth
The number of columns to use for indentation.
Definition: Format.h:3021
bool IndentCaseLabels
Indent case labels one level from the switch statement.
Definition: Format.h:2879
PPDirectiveIndentStyle IndentPPDirectives
The preprocessor directive indenting style to use.
Definition: Format.h:2984
bool RemoveSemicolon
Remove semicolons after the closing braces of functions and constructors/destructors.
Definition: Format.h:4156
bool IndentExportBlock
If true, clang-format will indent the body of an export { ... } block.
Definition: Format.h:2892
@ RCS_Always
Apply indentation rules and reflow long comments into new lines, trying to obey the ColumnLimit.
Definition: Format.h:4019
@ IEBS_AfterExternBlock
Backwards compatible with AfterExternBlock's indenting.
Definition: Format.h:2913
@ IEBS_Indent
Indents extern blocks.
Definition: Format.h:2927
bool IndentCaseBlocks
Indent case label blocks one level from the case label.
Definition: Format.h:2860
bool InsertBraces
Insert braces after control statements (if, else, for, do, and while) in C++ unless the control state...
Definition: Format.h:3067
RemoveParenthesesStyle RemoveParentheses
Remove redundant parentheses.
Definition: Format.h:4138
LanguageKind Language
The language that this format style targets.
Definition: Format.h:3400
bool RemoveBracesLLVM
Remove optional braces of control statements (if, else, for, and while) in C++ according to the LLVM ...
Definition: Format.h:4079
@ PPDIS_BeforeHash
Indents directives before the hash.
Definition: Format.h:2979
@ PPDIS_None
Does not indent any directives.
Definition: Format.h:2961
bool AllowShortLoopsOnASingleLine
If true, while (true) continue; can be put on a single line.
Definition: Format.h:989
bool AllowShortEnumsOnASingleLine
Allow short enums on a single line.
Definition: Format.h:826
NamespaceIndentationKind NamespaceIndentation
The indentation used for namespaces.
Definition: Format.h:3546
bool isTextProto() const
Definition: Format.h:3389
BraceBreakingStyle BreakBeforeBraces
The brace breaking style to use.
Definition: Format.h:2216
bool isCSharp() const
Definition: Format.h:3384
@ BWACS_Always
Always wrap braces after a control statement.
Definition: Format.h:1355
@ BWACS_Never
Never wrap braces after a control statement.
Definition: Format.h:1334
@ BS_Whitesmiths
Like Allman but always indent braces and line up code with braces.
Definition: Format.h:2099
ReflowCommentsStyle ReflowComments
Comment reformatting style.
Definition: Format.h:4025
bool isVerilog() const
Definition: Format.h:3388
bool isJavaScript() const
Definition: Format.h:3387
bool IndentGotoLabels
Indent goto labels.
Definition: Format.h:2949
BraceWrappingFlags BraceWrapping
Control of individual brace wrapping cases.
Definition: Format.h:1603
@ RPS_Leave
Do not remove parentheses.
Definition: Format.h:4112
@ RPS_ReturnStatement
Also remove parentheses enclosing the expression in a return/co_return statement.
Definition: Format.h:4127
bool SkipMacroDefinitionBody
Do not format macro definition body.
Definition: Format.h:4369
@ NI_All
Indent in all namespaces.
Definition: Format.h:3541
@ NI_Inner
Indent only in inner namespaces (nested in other namespaces).
Definition: Format.h:3531
bool IndentAccessModifiers
Specify whether access modifiers should have their own indentation level.
Definition: Format.h:2837
IndentExternBlockStyle IndentExternBlock
IndentExternBlockStyle is the type of indenting of extern blocks.
Definition: Format.h:2932
unsigned ColumnLimit
The column limit.
Definition: Format.h:2451
A wrapper around a Token storing information about the whitespace characters preceding it.
Definition: FormatToken.h:300
bool Optional
Is optional and can be removed.
Definition: FormatToken.h:584
bool isTypeName(const LangOptions &LangOpts) const
Definition: FormatToken.cpp:43
bool isNot(T Kind) const
Definition: FormatToken.h:640
StringRef TokenText
The raw text of the token.
Definition: FormatToken.h:320
FormatToken * getPreviousNonComment() const
Returns the previous token ignoring comments.
Definition: FormatToken.h:834
unsigned Finalized
If true, this token has been fully formatted (indented and potentially re-formatted inside),...
Definition: FormatToken.h:379
unsigned NewlinesBefore
The number of newlines immediately before the Token.
Definition: FormatToken.h:469
void setBlockKind(BraceBlockKind BBK)
Definition: FormatToken.h:395
bool isStringLiteral() const
Definition: FormatToken.h:673
bool isBinaryOperator() const
Definition: FormatToken.h:771
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:618
bool hasWhitespaceBefore() const
Returns true if the range of whitespace immediately preceding the Token is not empty.
Definition: FormatToken.h:822
bool isOneOf(A K1, B K2) const
Definition: FormatToken.h:633
unsigned IsFirst
Indicates that this is the first token of the file.
Definition: FormatToken.h:339
unsigned ClosesRequiresClause
true if this is the last token within requires clause.
Definition: FormatToken.h:382
bool isAccessSpecifierKeyword() const
Definition: FormatToken.h:679
FormatToken * MatchingParen
If this is a bracket, this points to the matching one.
Definition: FormatToken.h:566
FormatToken * Previous
The previous token in the unwrapped line.
Definition: FormatToken.h:569
bool endsSequence(A K1, Ts... Tokens) const
true if this token ends a sequence with the given tokens in order, following the Previous pointers,...
Definition: FormatToken.h:669
void setFinalizedType(TokenType T)
Sets the type and also the finalized flag.
Definition: FormatToken.h:448
An unwrapped line is a sequence of Token, that we would like to put on a single line if there was no ...
static const size_t kInvalidIndex