clang 22.0.0git
HeaderIncludes.cpp
Go to the documentation of this file.
1//===--- HeaderIncludes.cpp - Insert/Delete #includes --*- C++ -*----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
11#include "clang/Lex/Lexer.h"
12#include "llvm/Support/FormatVariadic.h"
13#include "llvm/Support/Path.h"
14#include <optional>
15
16namespace clang {
17namespace tooling {
18namespace {
19
20LangOptions createLangOpts() {
21 LangOptions LangOpts;
22 LangOpts.CPlusPlus = 1;
23 LangOpts.CPlusPlus11 = 1;
24 LangOpts.CPlusPlus14 = 1;
25 LangOpts.LineComment = 1;
26 LangOpts.CXXOperatorNames = 1;
27 LangOpts.Bool = 1;
28 LangOpts.ObjC = 1;
29 LangOpts.MicrosoftExt = 1; // To get kw___try, kw___finally.
30 LangOpts.DeclSpecKeyword = 1; // To get __declspec.
31 LangOpts.WChar = 1; // To get wchar_t
32 return LangOpts;
33}
34
35// Returns the offset after skipping a sequence of tokens, matched by \p
36// GetOffsetAfterSequence, from the start of the code.
37// \p GetOffsetAfterSequence should be a function that matches a sequence of
38// tokens and returns an offset after the sequence.
39unsigned getOffsetAfterTokenSequence(
40 StringRef FileName, StringRef Code, const IncludeStyle &Style,
41 llvm::function_ref<unsigned(const SourceManager &, Lexer &, Token &)>
42 GetOffsetAfterSequence) {
43 SourceManagerForFile VirtualSM(FileName, Code);
44 SourceManager &SM = VirtualSM.get();
45 LangOptions LangOpts = createLangOpts();
46 Lexer Lex(SM.getMainFileID(), SM.getBufferOrFake(SM.getMainFileID()), SM,
47 LangOpts);
48 Token Tok;
49 // Get the first token.
50 Lex.LexFromRawLexer(Tok);
51 return GetOffsetAfterSequence(SM, Lex, Tok);
52}
53
54// Check if a sequence of tokens is like "#<Name> <raw_identifier>". If it is,
55// \p Tok will be the token after this directive; otherwise, it can be any token
56// after the given \p Tok (including \p Tok). If \p RawIDName is provided, the
57// (second) raw_identifier name is checked.
58bool checkAndConsumeDirectiveWithName(
59 Lexer &Lex, StringRef Name, Token &Tok,
60 std::optional<StringRef> RawIDName = std::nullopt) {
61 bool Matched = Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) &&
62 Tok.is(tok::raw_identifier) &&
63 Tok.getRawIdentifier() == Name && !Lex.LexFromRawLexer(Tok) &&
64 Tok.is(tok::raw_identifier) &&
65 (!RawIDName || Tok.getRawIdentifier() == *RawIDName);
66 if (Matched)
67 Lex.LexFromRawLexer(Tok);
68 return Matched;
69}
70
71void skipComments(Lexer &Lex, Token &Tok) {
72 while (Tok.is(tok::comment))
73 if (Lex.LexFromRawLexer(Tok))
74 return;
75}
76
77bool checkAndConsumeModuleDecl(const SourceManager &SM, Lexer &Lex,
78 Token &Tok) {
79 bool Matched = Tok.is(tok::raw_identifier) &&
80 Tok.getRawIdentifier() == "module" &&
81 !Lex.LexFromRawLexer(Tok) && Tok.is(tok::semi) &&
82 !Lex.LexFromRawLexer(Tok);
83 return Matched;
84}
85
86// Determines the minimum offset into the file where we want to insert header
87// includes. This will be put (when available):
88// - after `#pragma once`
89// - after header guards (`#ifdef` and `#define`)
90// - after opening global module (`module;`)
91// - after any comments at the start of the file or immediately following one of
92// the above constructs
93unsigned getMinHeaderInsertionOffset(StringRef FileName, StringRef Code,
94 const IncludeStyle &Style) {
95 // \p Consume returns location after header guard or 0 if no header guard is
96 // found.
97 auto ConsumeHeaderGuardAndComment =
98 [&](std::function<unsigned(const SourceManager &SM, Lexer &Lex,
99 Token Tok)>
100 Consume) {
101 return getOffsetAfterTokenSequence(
102 FileName, Code, Style,
103 [&Consume](const SourceManager &SM, Lexer &Lex, Token Tok) {
104 skipComments(Lex, Tok);
105 unsigned InitialOffset = SM.getFileOffset(Tok.getLocation());
106 return std::max(InitialOffset, Consume(SM, Lex, Tok));
107 });
108 };
109
110 auto ModuleDecl = ConsumeHeaderGuardAndComment(
111 [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned {
112 if (checkAndConsumeModuleDecl(SM, Lex, Tok)) {
113 skipComments(Lex, Tok);
114 return SM.getFileOffset(Tok.getLocation());
115 }
116 return 0;
117 });
118
119 auto HeaderAndPPOffset = std::max(
120 // #ifndef/#define
121 ConsumeHeaderGuardAndComment(
122 [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned {
123 if (checkAndConsumeDirectiveWithName(Lex, "ifndef", Tok)) {
124 skipComments(Lex, Tok);
125 if (checkAndConsumeDirectiveWithName(Lex, "define", Tok) &&
126 Tok.isAtStartOfLine())
127 return SM.getFileOffset(Tok.getLocation());
128 }
129 return 0;
130 }),
131 // #pragma once
132 ConsumeHeaderGuardAndComment(
133 [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned {
134 if (checkAndConsumeDirectiveWithName(Lex, "pragma", Tok,
135 StringRef("once")))
136 return SM.getFileOffset(Tok.getLocation());
137 return 0;
138 }));
139 return std::max(HeaderAndPPOffset, ModuleDecl);
140}
141
142// Check if a sequence of tokens is like
143// "#include ("header.h" | <header.h>)".
144// If it is, \p Tok will be the token after this directive; otherwise, it can be
145// any token after the given \p Tok (including \p Tok).
146bool checkAndConsumeInclusiveDirective(Lexer &Lex, Token &Tok) {
147 auto Matched = [&]() {
148 Lex.LexFromRawLexer(Tok);
149 return true;
150 };
151 if (Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) &&
152 Tok.is(tok::raw_identifier) && Tok.getRawIdentifier() == "include") {
153 if (Lex.LexFromRawLexer(Tok))
154 return false;
155 if (Tok.is(tok::string_literal))
156 return Matched();
157 if (Tok.is(tok::less)) {
158 while (!Lex.LexFromRawLexer(Tok) && Tok.isNot(tok::greater)) {
159 }
160 if (Tok.is(tok::greater))
161 return Matched();
162 }
163 }
164 return false;
165}
166
167// Returns the offset of the last #include directive after which a new
168// #include can be inserted. This ignores #include's after the #include block(s)
169// in the beginning of a file to avoid inserting headers into code sections
170// where new #include's should not be added by default.
171// These code sections include:
172// - raw string literals (containing #include).
173// - #if blocks.
174// - Special #include's among declarations (e.g. functions).
175//
176// If no #include after which a new #include can be inserted, this returns the
177// offset after skipping all comments from the start of the code.
178// Inserting after an #include is not allowed if it comes after code that is not
179// #include (e.g. pre-processing directive that is not #include, declarations).
180unsigned getMaxHeaderInsertionOffset(StringRef FileName, StringRef Code,
181 const IncludeStyle &Style) {
182 return getOffsetAfterTokenSequence(
183 FileName, Code, Style,
184 [](const SourceManager &SM, Lexer &Lex, Token Tok) {
185 skipComments(Lex, Tok);
186 unsigned MaxOffset = SM.getFileOffset(Tok.getLocation());
187 while (checkAndConsumeInclusiveDirective(Lex, Tok))
188 MaxOffset = SM.getFileOffset(Tok.getLocation());
189 return MaxOffset;
190 });
191}
192
193inline StringRef trimInclude(StringRef IncludeName) {
194 return IncludeName.trim("\"<>");
195}
196
197const char IncludeRegexPattern[] =
198 R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))";
199
200// The filename of Path excluding extension.
201// Used to match implementation with headers, this differs from sys::path::stem:
202// - in names with multiple dots (foo.cu.cc) it terminates at the *first*
203// - an empty stem is never returned: /foo/.bar.x => .bar
204// - we don't bother to handle . and .. specially
205StringRef matchingStem(llvm::StringRef Path) {
206 StringRef Name = llvm::sys::path::filename(Path);
207 return Name.substr(0, Name.find('.', 1));
208}
209
210} // anonymous namespace
211
213 StringRef FileName)
214 : Style(Style), FileName(FileName) {
215 for (const auto &Category : Style.IncludeCategories) {
216 CategoryRegexs.emplace_back(Category.Regex, Category.RegexIsCaseSensitive
217 ? llvm::Regex::NoFlags
218 : llvm::Regex::IgnoreCase);
219 }
220 IsMainFile = FileName.ends_with(".c") || FileName.ends_with(".cc") ||
221 FileName.ends_with(".cpp") || FileName.ends_with(".c++") ||
222 FileName.ends_with(".cxx") || FileName.ends_with(".m") ||
223 FileName.ends_with(".mm");
224 if (!Style.IncludeIsMainSourceRegex.empty()) {
225 llvm::Regex MainFileRegex(Style.IncludeIsMainSourceRegex);
226 IsMainFile |= MainFileRegex.match(FileName);
227 }
228}
229
231 bool CheckMainHeader) const {
232 int Ret = INT_MAX;
233 for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i)
234 if (CategoryRegexs[i].match(IncludeName)) {
235 Ret = Style.IncludeCategories[i].Priority;
236 break;
237 }
238 if (CheckMainHeader && IsMainFile && Ret > 0 && isMainHeader(IncludeName))
239 Ret = 0;
240 return Ret;
241}
242
244 bool CheckMainHeader) const {
245 int Ret = INT_MAX;
246 for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i)
247 if (CategoryRegexs[i].match(IncludeName)) {
248 Ret = Style.IncludeCategories[i].SortPriority;
249 if (Ret == 0)
250 Ret = Style.IncludeCategories[i].Priority;
251 break;
252 }
253 if (CheckMainHeader && IsMainFile && Ret > 0 && isMainHeader(IncludeName))
254 Ret = 0;
255 return Ret;
256}
257bool IncludeCategoryManager::isMainHeader(StringRef IncludeName) const {
258 switch (Style.MainIncludeChar) {
260 if (!IncludeName.starts_with("\""))
261 return false;
262 break;
264 if (!IncludeName.starts_with("<"))
265 return false;
266 break;
268 break;
269 }
270
271 IncludeName =
272 IncludeName.drop_front(1).drop_back(1); // remove the surrounding "" or <>
273 // Not matchingStem: implementation files may have compound extensions but
274 // headers may not.
275 StringRef HeaderStem = llvm::sys::path::stem(IncludeName);
276 StringRef FileStem = llvm::sys::path::stem(FileName); // foo.cu for foo.cu.cc
277 StringRef MatchingFileStem = matchingStem(FileName); // foo for foo.cu.cc
278 // main-header examples:
279 // 1) foo.h => foo.cc
280 // 2) foo.h => foo.cu.cc
281 // 3) foo.proto.h => foo.proto.cc
282 //
283 // non-main-header examples:
284 // 1) foo.h => bar.cc
285 // 2) foo.proto.h => foo.cc
286 StringRef Matching;
287 if (MatchingFileStem.starts_with_insensitive(HeaderStem))
288 Matching = MatchingFileStem; // example 1), 2)
289 else if (FileStem.equals_insensitive(HeaderStem))
290 Matching = FileStem; // example 3)
291 if (!Matching.empty()) {
292 llvm::Regex MainIncludeRegex(HeaderStem.str() + Style.IncludeIsMainRegex,
293 llvm::Regex::IgnoreCase);
294 if (MainIncludeRegex.match(Matching))
295 return true;
296 }
297 return false;
298}
299
300const llvm::Regex HeaderIncludes::IncludeRegex(IncludeRegexPattern);
301
302HeaderIncludes::HeaderIncludes(StringRef FileName, StringRef Code,
303 const IncludeStyle &Style)
304 : FileName(FileName), Code(Code), FirstIncludeOffset(-1),
305 MinInsertOffset(getMinHeaderInsertionOffset(FileName, Code, Style)),
306 MaxInsertOffset(MinInsertOffset +
307 getMaxHeaderInsertionOffset(
308 FileName, Code.drop_front(MinInsertOffset), Style)),
309 MainIncludeFound(false), Categories(Style, FileName) {
310 // Add 0 for main header and INT_MAX for headers that are not in any
311 // category.
312 Priorities = {0, INT_MAX};
313 for (const auto &Category : Style.IncludeCategories)
314 Priorities.insert(Category.Priority);
316 Code.drop_front(MinInsertOffset).split(Lines, "\n");
317
318 unsigned Offset = MinInsertOffset;
319 unsigned NextLineOffset;
321 for (auto Line : Lines) {
322 NextLineOffset = std::min(Code.size(), Offset + Line.size() + 1);
323 if (IncludeRegex.match(Line, &Matches)) {
324 // If this is the last line without trailing newline, we need to make
325 // sure we don't delete across the file boundary.
326 addExistingInclude(
327 Include(Matches[2],
329 Offset, std::min(Line.size() + 1, Code.size() - Offset)),
330 Matches[1] == "import" ? tooling::IncludeDirective::Import
332 NextLineOffset);
333 }
334 Offset = NextLineOffset;
335 }
336
337 // Populate CategoryEndOfssets:
338 // - Ensure that CategoryEndOffset[Highest] is always populated.
339 // - If CategoryEndOffset[Priority] isn't set, use the next higher value
340 // that is set, up to CategoryEndOffset[Highest].
341 auto Highest = Priorities.begin();
342 auto [It, Inserted] = CategoryEndOffsets.try_emplace(*Highest);
343 if (Inserted)
344 It->second = FirstIncludeOffset >= 0 ? FirstIncludeOffset : MinInsertOffset;
345 // By this point, CategoryEndOffset[Highest] is always set appropriately:
346 // - to an appropriate location before/after existing #includes, or
347 // - to right after the header guard, or
348 // - to the beginning of the file.
349 for (auto I = ++Priorities.begin(), E = Priorities.end(); I != E; ++I)
350 if (CategoryEndOffsets.find(*I) == CategoryEndOffsets.end())
351 CategoryEndOffsets[*I] = CategoryEndOffsets[*std::prev(I)];
352}
353
354// \p Offset: the start of the line following this include directive.
355void HeaderIncludes::addExistingInclude(Include IncludeToAdd,
356 unsigned NextLineOffset) {
357 auto &Incs = ExistingIncludes[trimInclude(IncludeToAdd.Name)];
358 Incs.push_back(std::move(IncludeToAdd));
359 auto &CurInclude = Incs.back();
360 // The header name with quotes or angle brackets.
361 // Only record the offset of current #include if we can insert after it.
362 if (CurInclude.R.getOffset() <= MaxInsertOffset) {
363 int Priority = Categories.getIncludePriority(
364 CurInclude.Name, /*CheckMainHeader=*/!MainIncludeFound);
365 if (Priority == 0)
366 MainIncludeFound = true;
367 CategoryEndOffsets[Priority] = NextLineOffset;
368 IncludesByPriority[Priority].push_back(&CurInclude);
369 if (FirstIncludeOffset < 0)
370 FirstIncludeOffset = CurInclude.R.getOffset();
371 }
372}
373
374std::optional<tooling::Replacement>
375HeaderIncludes::insert(llvm::StringRef IncludeName, bool IsAngled,
377 assert(IncludeName == trimInclude(IncludeName));
378 // If a <header> ("header") already exists in code, "header" (<header>) with
379 // different quotation and/or directive will still be inserted.
380 // FIXME: figure out if this is the best behavior.
381 auto It = ExistingIncludes.find(IncludeName);
382 if (It != ExistingIncludes.end()) {
383 for (const auto &Inc : It->second)
384 if (Inc.Directive == Directive &&
385 ((IsAngled && StringRef(Inc.Name).starts_with("<")) ||
386 (!IsAngled && StringRef(Inc.Name).starts_with("\""))))
387 return std::nullopt;
388 }
389 std::string Quoted =
390 std::string(llvm::formatv(IsAngled ? "<{0}>" : "\"{0}\"", IncludeName));
391 StringRef QuotedName = Quoted;
392 int Priority = Categories.getIncludePriority(
393 QuotedName, /*CheckMainHeader=*/!MainIncludeFound);
394 auto CatOffset = CategoryEndOffsets.find(Priority);
395 assert(CatOffset != CategoryEndOffsets.end());
396 unsigned InsertOffset = CatOffset->second; // Fall back offset
397 auto Iter = IncludesByPriority.find(Priority);
398 if (Iter != IncludesByPriority.end()) {
399 for (const auto *Inc : Iter->second) {
400 if (QuotedName < Inc->Name) {
401 InsertOffset = Inc->R.getOffset();
402 break;
403 }
404 }
405 }
406 assert(InsertOffset <= Code.size());
407 llvm::StringRef DirectiveSpelling =
408 Directive == IncludeDirective::Include ? "include" : "import";
409 std::string NewInclude =
410 llvm::formatv("#{0} {1}\n", DirectiveSpelling, QuotedName);
411 // When inserting headers at end of the code, also append '\n' to the code
412 // if it does not end with '\n'.
413 // FIXME: when inserting multiple #includes at the end of code, only one
414 // newline should be added.
415 if (InsertOffset == Code.size() && (!Code.empty() && Code.back() != '\n'))
416 NewInclude = "\n" + NewInclude;
417 return tooling::Replacement(FileName, InsertOffset, 0, NewInclude);
418}
419
421 bool IsAngled) const {
422 assert(IncludeName == trimInclude(IncludeName));
424 auto Iter = ExistingIncludes.find(IncludeName);
425 if (Iter == ExistingIncludes.end())
426 return Result;
427 for (const auto &Inc : Iter->second) {
428 if ((IsAngled && StringRef(Inc.Name).starts_with("\"")) ||
429 (!IsAngled && StringRef(Inc.Name).starts_with("<")))
430 continue;
431 llvm::Error Err = Result.add(tooling::Replacement(
432 FileName, Inc.R.getOffset(), Inc.R.getLength(), ""));
433 if (Err) {
434 auto ErrMsg = "Unexpected conflicts in #include deletions: " +
435 llvm::toString(std::move(Err));
436 llvm_unreachable(ErrMsg.c_str());
437 }
438 }
439 return Result;
440}
441
442} // namespace tooling
443} // namespace clang
IndirectLocalPath & Path
Expr * E
int Priority
Definition: Format.cpp:3181
int Category
Definition: Format.cpp:3180
unsigned Iter
Definition: HTMLLogger.cpp:153
#define SM(sm)
Definition: OffloadArch.cpp:16
Defines the SourceManager interface.
Directive - Abstract class representing a parsed verify directive.
tooling::Replacements remove(llvm::StringRef Header, bool IsAngled) const
Removes all existing #includes and #imports of Header quoted with <> if IsAngled is true or "" if IsA...
static const llvm::Regex IncludeRegex
HeaderIncludes(llvm::StringRef FileName, llvm::StringRef Code, const IncludeStyle &Style)
std::optional< tooling::Replacement > insert(llvm::StringRef Header, bool IsAngled, IncludeDirective Directive) const
Inserts an #include or #import directive of Header into the code.
int getIncludePriority(StringRef IncludeName, bool CheckMainHeader) const
Returns the priority of the category which IncludeName belongs to.
IncludeCategoryManager(const IncludeStyle &Style, StringRef FileName)
int getSortIncludePriority(StringRef IncludeName, bool CheckMainHeader) const
A source range independent of the SourceManager.
Definition: Replacement.h:44
A text replacement.
Definition: Replacement.h:83
Maintains a set of replacements that are conflict-free.
Definition: Replacement.h:212
#define INT_MAX
Definition: limits.h:50
SmallVector< BoundNodes, 1 > match(MatcherT Matcher, const NodeT &Node, ASTContext &Context)
Returns the results of matching Matcher on Node.
The JSON file list parser is used to communicate input to InstallAPI.
@ Result
The result type of a method or function.
int const char * function
Definition: c++config.h:31
#define false
Definition: stdbool.h:26
Style for sorting and grouping C++ #include directives.
Definition: IncludeStyle.h:20
@ MICD_Quote
Main include uses quotes: #include "foo.hpp" (the default).
Definition: IncludeStyle.h:158
@ MICD_AngleBracket
Main include uses angle brackets: #include <foo.hpp>.
Definition: IncludeStyle.h:160
@ MICD_Any
Main include uses either quotes or angle brackets.
Definition: IncludeStyle.h:162