clang 22.0.0git
TokenAnalyzer.cpp
Go to the documentation of this file.
1//===--- TokenAnalyzer.cpp - Analyze Token Streams --------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements an abstract TokenAnalyzer and associated helper
11/// classes. TokenAnalyzer can be extended to generate replacements based on
12/// an annotated and pre-processed token stream.
13///
14//===----------------------------------------------------------------------===//
15
16#include "TokenAnalyzer.h"
18#include "Encoding.h"
19#include "FormatToken.h"
20#include "FormatTokenLexer.h"
21#include "TokenAnnotator.h"
22#include "UnwrappedLineParser.h"
26#include "clang/Format/Format.h"
27#include "llvm/ADT/SmallVector.h"
28#include "llvm/Support/Debug.h"
29
30#define DEBUG_TYPE "format-formatter"
31
32namespace clang {
33namespace format {
34
35// FIXME: Instead of printing the diagnostic we should store it and have a
36// better way to return errors through the format APIs.
38public:
40 const Diagnostic &Info) override {
41 if (DiagLevel == DiagnosticsEngine::Fatal) {
42 Fatal = true;
44 Info.FormatDiagnostic(Message);
45 llvm::errs() << Message << "\n";
46 }
47 }
48
49 bool fatalError() const { return Fatal; }
50
51private:
52 bool Fatal = false;
53};
54
55std::unique_ptr<Environment>
56Environment::make(StringRef Code, StringRef FileName,
57 ArrayRef<tooling::Range> Ranges, unsigned FirstStartColumn,
58 unsigned NextStartColumn, unsigned LastStartColumn) {
59 auto Env = std::make_unique<Environment>(Code, FileName, FirstStartColumn,
60 NextStartColumn, LastStartColumn);
62 Env->SM.getDiagnostics().setClient(&Diags, /*ShouldOwnClient=*/false);
63 SourceLocation StartOfFile = Env->SM.getLocForStartOfFile(Env->ID);
64 for (const tooling::Range &Range : Ranges) {
65 SourceLocation Start = StartOfFile.getLocWithOffset(Range.getOffset());
66 SourceLocation End = Start.getLocWithOffset(Range.getLength());
67 Env->CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
68 }
69 // Validate that we can get the buffer data without a fatal error.
70 Env->SM.getBufferData(Env->ID);
71 if (Diags.fatalError())
72 return nullptr;
73 return Env;
74}
75
76Environment::Environment(StringRef Code, StringRef FileName,
77 unsigned FirstStartColumn, unsigned NextStartColumn,
78 unsigned LastStartColumn)
79 : VirtualSM(new SourceManagerForFile(FileName, Code)), SM(VirtualSM->get()),
80 ID(VirtualSM->get().getMainFileID()), FirstStartColumn(FirstStartColumn),
81 NextStartColumn(NextStartColumn), LastStartColumn(LastStartColumn) {}
82
84 : Style(Style), LangOpts(getFormattingLangOpts(Style)), Env(Env),
85 AffectedRangeMgr(Env.getSourceManager(), Env.getCharRanges()),
86 UnwrappedLines(1),
87 Encoding(encoding::detectEncoding(
88 Env.getSourceManager().getBufferData(Env.getFileID()))) {
89 LLVM_DEBUG(
90 llvm::dbgs() << "File encoding: "
91 << (Encoding == encoding::Encoding_UTF8 ? "UTF8" : "unknown")
92 << "\n");
93 LLVM_DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language)
94 << "\n");
95}
96
97std::pair<tooling::Replacements, unsigned>
98TokenAnalyzer::process(bool SkipAnnotation) {
100 llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
101 IdentifierTable IdentTable(LangOpts);
103 Env.getFirstStartColumn(), Style, Encoding, Allocator,
104 IdentTable);
105 ArrayRef<FormatToken *> Toks(Lex.lex());
108 Env.getFirstStartColumn(), Tokens, *this,
109 Allocator, IdentTable);
110 Parser.parse();
111 assert(UnwrappedLines.back().empty());
112 unsigned Penalty = 0;
113 for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE; ++Run) {
114 const auto &Lines = UnwrappedLines[Run];
115 LLVM_DEBUG(llvm::dbgs() << "Run " << Run << "...\n");
117 AnnotatedLines.reserve(Lines.size());
118
119 TokenAnnotator Annotator(Style, Lex.getKeywords());
120 for (const UnwrappedLine &Line : Lines) {
121 AnnotatedLines.push_back(new AnnotatedLine(Line));
122 if (!SkipAnnotation)
123 Annotator.annotate(*AnnotatedLines.back());
124 }
125
126 std::pair<tooling::Replacements, unsigned> RunResult =
127 analyze(Annotator, AnnotatedLines, Lex);
128
129 LLVM_DEBUG({
130 llvm::dbgs() << "Replacements for run " << Run << ":\n";
131 for (const tooling::Replacement &Fix : RunResult.first)
132 llvm::dbgs() << Fix.toString() << "\n";
133 });
134 for (AnnotatedLine *Line : AnnotatedLines)
135 delete Line;
136
137 Penalty += RunResult.second;
138 for (const auto &R : RunResult.first) {
139 auto Err = Result.add(R);
140 // FIXME: better error handling here. For now, simply return an empty
141 // Replacements to indicate failure.
142 if (Err) {
143 llvm::errs() << llvm::toString(std::move(Err)) << "\n";
144 return {tooling::Replacements(), 0};
145 }
146 }
147 }
148 return {Result, Penalty};
149}
150
152 assert(!UnwrappedLines.empty());
153 UnwrappedLines.back().push_back(TheLine);
154}
155
158}
159
160} // end namespace format
161} // end namespace clang
AffectedRangeManager class manages affected ranges in the code.
static char ID
Definition: Arena.cpp:183
Defines the Diagnostic-related interfaces.
Contains functions for text encoding manipulation.
This file contains FormatTokenLexer, which tokenizes a source file into a token stream suitable for C...
This file contains the declaration of the FormatToken, a wrapper around Token with additional informa...
Various functions to configurably format source code.
const Environment & Env
Definition: HTMLLogger.cpp:147
#define SM(sm)
Definition: OffloadArch.cpp:16
SourceRange Range
Definition: SemaObjC.cpp:753
Defines the SourceManager interface.
This file declares an abstract TokenAnalyzer, and associated helper classes.
This file implements a token annotator, i.e.
This file contains the declaration of the UnwrappedLineParser, which turns a stream of tokens into Un...
static CharSourceRange getCharRange(SourceRange R)
Abstract interface, implemented by clients of the front-end, which formats and prints fully processed...
Definition: Diagnostic.h:1722
A little helper class (which is basically a smart pointer that forwards info from DiagnosticsEngine a...
Definition: Diagnostic.h:1548
void FormatDiagnostic(SmallVectorImpl< char > &OutStr) const
Format this diagnostic into a string, substituting the formal arguments into the %0 slots.
Level
The level of the diagnostic, after it has been through mapping.
Definition: Diagnostic.h:236
Implements an efficient mapping from strings to IdentifierInfo nodes.
Parser - This implements a parser for the C family of languages.
Definition: Parser.h:171
Encodes a location in the source.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
SourceManager and necessary dependencies (e.g.
SourceManager & getSourceManager() const
Definition: TokenAnalyzer.h:38
Environment(StringRef Code, StringRef FileName, unsigned FirstStartColumn=0, unsigned NextStartColumn=0, unsigned LastStartColumn=0)
static std::unique_ptr< Environment > make(StringRef Code, StringRef FileName, ArrayRef< tooling::Range > Ranges, unsigned FirstStartColumn=0, unsigned NextStartColumn=0, unsigned LastStartColumn=0)
unsigned getFirstStartColumn() const
Definition: TokenAnalyzer.h:44
void HandleDiagnostic(DiagnosticsEngine::Level DiagLevel, const Diagnostic &Info) override
Handle this diagnostic, reporting it to the user or capturing it to a log as needed.
const AdditionalKeywords & getKeywords()
ArrayRef< FormatToken * > lex()
encoding::Encoding Encoding
virtual std::pair< tooling::Replacements, unsigned > analyze(TokenAnnotator &Annotator, SmallVectorImpl< AnnotatedLine * > &AnnotatedLines, FormatTokenLexer &Tokens)=0
const Environment & Env
Definition: TokenAnalyzer.h:97
SmallVector< SmallVector< UnwrappedLine, 16 >, 2 > UnwrappedLines
TokenAnalyzer(const Environment &Env, const FormatStyle &Style)
std::pair< tooling::Replacements, unsigned > process(bool SkipAnnotation=false)
void consumeUnwrappedLine(const UnwrappedLine &TheLine) override
Determines extra information about the tokens comprising an UnwrappedLine.
void annotate(AnnotatedLine &Line)
A source range independent of the SourceManager.
Definition: Replacement.h:44
A text replacement.
Definition: Replacement.h:83
std::string toString() const
Returns a human readable string representation.
Definition: Replacement.cpp:87
Maintains a set of replacements that are conflict-free.
Definition: Replacement.h:212
LangOptions getFormattingLangOpts(const FormatStyle &Style=getLLVMStyle())
Returns the LangOpts that the formatter expects you to set.
Definition: Format.cpp:4077
StringRef getLanguageName(FormatStyle::LanguageKind Language)
Definition: Format.h:5835
The JSON file list parser is used to communicate input to InstallAPI.
@ Result
The result type of a method or function.
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:55
LanguageKind Language
The language that this format style targets.
Definition: Format.h:3400
An unwrapped line is a sequence of Token, that we would like to put on a single line if there was no ...