clang 22.0.0git
PPLexerChange.cpp
Go to the documentation of this file.
1//===--- PPLexerChange.cpp - Handle changing lexers in the preprocessor ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements pieces of the Preprocessor interface that manage the
10// current lexer stack.
11//
12//===----------------------------------------------------------------------===//
13
18#include "clang/Lex/MacroInfo.h"
21#include "llvm/ADT/StringSwitch.h"
22#include "llvm/Support/MemoryBufferRef.h"
23#include "llvm/Support/Path.h"
24#include <optional>
25
26using namespace clang;
27
28//===----------------------------------------------------------------------===//
29// Miscellaneous Methods.
30//===----------------------------------------------------------------------===//
31
32/// isInPrimaryFile - Return true if we're in the top-level file, not in a
33/// \#include. This looks through macro expansions and active _Pragma lexers.
35 if (IsFileLexer())
36 return IncludeMacroStack.empty();
37
38 // If there are any stacked lexers, we're in a #include.
39 assert(IsFileLexer(IncludeMacroStack[0]) &&
40 "Top level include stack isn't our primary lexer?");
41 return llvm::none_of(
42 llvm::drop_begin(IncludeMacroStack),
43 [&](const IncludeStackInfo &ISI) -> bool { return IsFileLexer(ISI); });
44}
45
46/// getCurrentLexer - Return the current file lexer being lexed from. Note
47/// that this ignores any potentially active macro expansions and _Pragma
48/// expansions going on at the time.
50 if (IsFileLexer())
51 return CurPPLexer;
52
53 // Look for a stacked lexer.
54 for (const IncludeStackInfo &ISI : llvm::reverse(IncludeMacroStack)) {
55 if (IsFileLexer(ISI))
56 return ISI.ThePPLexer;
57 }
58 return nullptr;
59}
60
61
62//===----------------------------------------------------------------------===//
63// Methods for Entering and Callbacks for leaving various contexts
64//===----------------------------------------------------------------------===//
65
66/// EnterSourceFile - Add a source file to the top of the include stack and
67/// start lexing tokens from it instead of the current buffer.
70 bool IsFirstIncludeOfFile) {
71 assert(!CurTokenLexer && "Cannot #include a file inside a macro!");
72 ++NumEnteredSourceFiles;
73
74 if (MaxIncludeStackDepth < IncludeMacroStack.size())
75 MaxIncludeStackDepth = IncludeMacroStack.size();
76
77 // Get the MemoryBuffer for this FID, if it fails, we fail.
78 std::optional<llvm::MemoryBufferRef> InputFile =
80 if (!InputFile) {
81 SourceLocation FileStart = SourceMgr.getLocForStartOfFile(FID);
82 Diag(Loc, diag::err_pp_error_opening_file)
83 << std::string(SourceMgr.getBufferName(FileStart)) << "";
84 return true;
85 }
86
88 SourceMgr.getFileEntryForID(FID) == CodeCompletionFile) {
89 CodeCompletionFileLoc = SourceMgr.getLocForStartOfFile(FID);
90 CodeCompletionLoc =
91 CodeCompletionFileLoc.getLocWithOffset(CodeCompletionOffset);
92 }
93
94 Lexer *TheLexer = new Lexer(FID, *InputFile, *this, IsFirstIncludeOfFile);
95 if (GetDependencyDirectives && FID != PredefinesFileID)
97 if (auto MaybeDepDirectives = (*GetDependencyDirectives)(*File))
98 TheLexer->DepDirectives = *MaybeDepDirectives;
99
100 EnterSourceFileWithLexer(TheLexer, CurDir);
101 return false;
102}
103
104/// EnterSourceFileWithLexer - Add a source file to the top of the include stack
105/// and start lexing tokens from it instead of the current buffer.
106void Preprocessor::EnterSourceFileWithLexer(Lexer *TheLexer,
107 ConstSearchDirIterator CurDir) {
108 PreprocessorLexer *PrevPPLexer = CurPPLexer;
109
110 // Add the current lexer to the include stack.
111 if (CurPPLexer || CurTokenLexer)
112 PushIncludeMacroStack();
113
114 CurLexer.reset(TheLexer);
115 CurPPLexer = TheLexer;
116 CurDirLookup = CurDir;
117 CurLexerSubmodule = nullptr;
118 if (CurLexerCallback != CLK_LexAfterModuleImport)
119 CurLexerCallback = TheLexer->isDependencyDirectivesLexer()
120 ? CLK_DependencyDirectivesLexer
121 : CLK_Lexer;
122
123 // Notify the client, if desired, that we are in a new source file.
124 if (Callbacks && !CurLexer->Is_PragmaLexer) {
126 SourceMgr.getFileCharacteristic(CurLexer->getFileLoc());
127
128 FileID PrevFID;
129 SourceLocation EnterLoc;
130 if (PrevPPLexer) {
131 PrevFID = PrevPPLexer->getFileID();
132 EnterLoc = PrevPPLexer->getSourceLocation();
133 }
134 Callbacks->FileChanged(CurLexer->getFileLoc(), PPCallbacks::EnterFile,
135 FileType, PrevFID);
136 Callbacks->LexedFileChanged(CurLexer->getFileID(),
138 FileType, PrevFID, EnterLoc);
139 }
140}
141
142/// EnterMacro - Add a Macro to the top of the include stack and start lexing
143/// tokens from it instead of the current buffer.
145 MacroInfo *Macro, MacroArgs *Args) {
146 std::unique_ptr<TokenLexer> TokLexer;
147 if (NumCachedTokenLexers == 0) {
148 TokLexer = std::make_unique<TokenLexer>(Tok, ILEnd, Macro, Args, *this);
149 } else {
150 TokLexer = std::move(TokenLexerCache[--NumCachedTokenLexers]);
151 TokLexer->Init(Tok, ILEnd, Macro, Args);
152 }
153
154 PushIncludeMacroStack();
155 CurDirLookup = nullptr;
156 CurTokenLexer = std::move(TokLexer);
157 if (CurLexerCallback != CLK_LexAfterModuleImport)
158 CurLexerCallback = CLK_TokenLexer;
159}
160
161/// EnterTokenStream - Add a "macro" context to the top of the include stack,
162/// which will cause the lexer to start returning the specified tokens.
163///
164/// If DisableMacroExpansion is true, tokens lexed from the token stream will
165/// not be subject to further macro expansion. Otherwise, these tokens will
166/// be re-macro-expanded when/if expansion is enabled.
167///
168/// If OwnsTokens is false, this method assumes that the specified stream of
169/// tokens has a permanent owner somewhere, so they do not need to be copied.
170/// If it is true, it assumes the array of tokens is allocated with new[] and
171/// must be freed.
172///
173void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks,
174 bool DisableMacroExpansion, bool OwnsTokens,
175 bool IsReinject) {
176 if (CurLexerCallback == CLK_CachingLexer) {
177 if (CachedLexPos < CachedTokens.size()) {
178 assert(IsReinject && "new tokens in the middle of cached stream");
179 // We're entering tokens into the middle of our cached token stream. We
180 // can't represent that, so just insert the tokens into the buffer.
181 CachedTokens.insert(CachedTokens.begin() + CachedLexPos,
182 Toks, Toks + NumToks);
183 if (OwnsTokens)
184 delete [] Toks;
185 return;
186 }
187
188 // New tokens are at the end of the cached token sequnece; insert the
189 // token stream underneath the caching lexer.
190 ExitCachingLexMode();
191 EnterTokenStream(Toks, NumToks, DisableMacroExpansion, OwnsTokens,
192 IsReinject);
193 EnterCachingLexMode();
194 return;
195 }
196
197 // Create a macro expander to expand from the specified token stream.
198 std::unique_ptr<TokenLexer> TokLexer;
199 if (NumCachedTokenLexers == 0) {
200 TokLexer = std::make_unique<TokenLexer>(
201 Toks, NumToks, DisableMacroExpansion, OwnsTokens, IsReinject, *this);
202 } else {
203 TokLexer = std::move(TokenLexerCache[--NumCachedTokenLexers]);
204 TokLexer->Init(Toks, NumToks, DisableMacroExpansion, OwnsTokens,
205 IsReinject);
206 }
207
208 // Save our current state.
209 PushIncludeMacroStack();
210 CurDirLookup = nullptr;
211 CurTokenLexer = std::move(TokLexer);
212 if (CurLexerCallback != CLK_LexAfterModuleImport)
213 CurLexerCallback = CLK_TokenLexer;
214}
215
216/// Compute the relative path that names the given file relative to
217/// the given directory.
220 Result.clear();
221
222 StringRef FilePath = File.getDir().getName();
223 StringRef Path = FilePath;
224 while (!Path.empty()) {
225 if (auto CurDir = FM.getOptionalDirectoryRef(Path)) {
226 if (*CurDir == Dir) {
227 Result = FilePath.substr(Path.size());
228 llvm::sys::path::append(Result,
229 llvm::sys::path::filename(File.getName()));
230 return;
231 }
232 }
233
234 Path = llvm::sys::path::parent_path(Path);
235 }
236
237 Result = File.getName();
238}
239
240void Preprocessor::PropagateLineStartLeadingSpaceInfo(Token &Result) {
241 if (CurTokenLexer) {
242 CurTokenLexer->PropagateLineStartLeadingSpaceInfo(Result);
243 return;
244 }
245 if (CurLexer) {
246 CurLexer->PropagateLineStartLeadingSpaceInfo(Result);
247 return;
248 }
249 // FIXME: Handle other kinds of lexers? It generally shouldn't matter,
250 // but it might if they're empty?
251}
252
253/// Determine the location to use as the end of the buffer for a lexer.
254///
255/// If the file ends with a newline, form the EOF token on the newline itself,
256/// rather than "on the line following it", which doesn't exist. This makes
257/// diagnostics relating to the end of file include the last file that the user
258/// actually typed, which is goodness.
259const char *Preprocessor::getCurLexerEndPos() {
260 const char *EndPos = CurLexer->BufferEnd;
261 if (EndPos != CurLexer->BufferStart &&
262 (EndPos[-1] == '\n' || EndPos[-1] == '\r')) {
263 --EndPos;
264
265 // Handle \n\r and \r\n:
266 if (EndPos != CurLexer->BufferStart &&
267 (EndPos[-1] == '\n' || EndPos[-1] == '\r') &&
268 EndPos[-1] != EndPos[0])
269 --EndPos;
270 }
271
272 return EndPos;
273}
274
276 const Module &Mod, SmallVectorImpl<const Module *> &SubMods) {
278 SubMods.push_back(&Mod);
279 for (auto *M : Mod.submodules())
281}
282
283void Preprocessor::diagnoseMissingHeaderInUmbrellaDir(const Module &Mod) {
284 std::optional<Module::Header> UmbrellaHeader =
286 assert(UmbrellaHeader && "Module must use umbrella header");
287 const FileID &File = SourceMgr.translateFile(UmbrellaHeader->Entry);
288 SourceLocation ExpectedHeadersLoc = SourceMgr.getLocForEndOfFile(File);
289 if (getDiagnostics().isIgnored(diag::warn_uncovered_module_header,
290 ExpectedHeadersLoc))
291 return;
292
295 llvm::vfs::FileSystem &FS = FileMgr.getVirtualFileSystem();
296 std::error_code EC;
297 for (llvm::vfs::recursive_directory_iterator Entry(FS, Dir->getName(), EC),
298 End;
299 Entry != End && !EC; Entry.increment(EC)) {
300 using llvm::StringSwitch;
301
302 // Check whether this entry has an extension typically associated with
303 // headers.
304 if (!StringSwitch<bool>(llvm::sys::path::extension(Entry->path()))
305 .Cases(".h", ".H", ".hh", ".hpp", true)
306 .Default(false))
307 continue;
308
309 if (auto Header = getFileManager().getOptionalFileRef(Entry->path()))
310 if (!getSourceManager().hasFileInfo(*Header)) {
311 if (!ModMap.isHeaderInUnavailableModule(*Header)) {
312 // Find the relative path that would access this header.
313 SmallString<128> RelativePath;
314 computeRelativePath(FileMgr, *Dir, *Header, RelativePath);
315 Diag(ExpectedHeadersLoc, diag::warn_uncovered_module_header)
316 << Mod.getFullModuleName() << RelativePath;
317 }
318 }
319 }
320}
321
322/// HandleEndOfFile - This callback is invoked when the lexer hits the end of
323/// the current file. This either returns the EOF token or pops a level off
324/// the include stack and keeps going.
325bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) {
326 assert(!CurTokenLexer &&
327 "Ending a file when currently in a macro!");
328
329 SourceLocation UnclosedSafeBufferOptOutLoc;
330
331 if (IncludeMacroStack.empty() &&
332 isPPInSafeBufferOptOutRegion(UnclosedSafeBufferOptOutLoc)) {
333 // To warn if a "-Wunsafe-buffer-usage" opt-out region is still open by the
334 // end of a file.
335 Diag(UnclosedSafeBufferOptOutLoc,
336 diag::err_pp_unclosed_pragma_unsafe_buffer_usage);
337 }
338 // If we have an unclosed module region from a pragma at the end of a
339 // module, complain and close it now.
340 const bool LeavingSubmodule = CurLexer && CurLexerSubmodule;
341 if ((LeavingSubmodule || IncludeMacroStack.empty()) &&
342 !BuildingSubmoduleStack.empty() &&
343 BuildingSubmoduleStack.back().IsPragma) {
344 Diag(BuildingSubmoduleStack.back().ImportLoc,
345 diag::err_pp_module_begin_without_module_end);
346 Module *M = LeaveSubmodule(/*ForPragma*/true);
347
348 Result.startToken();
349 const char *EndPos = getCurLexerEndPos();
350 CurLexer->BufferPtr = EndPos;
351 CurLexer->FormTokenWithChars(Result, EndPos, tok::annot_module_end);
352 Result.setAnnotationEndLoc(Result.getLocation());
353 Result.setAnnotationValue(M);
354 return true;
355 }
356
357 // See if this file had a controlling macro.
358 if (CurPPLexer) { // Not ending a macro, ignore it.
359 if (const IdentifierInfo *ControllingMacro =
361 // Okay, this has a controlling macro, remember in HeaderFileInfo.
362 if (OptionalFileEntryRef FE = CurPPLexer->getFileEntry()) {
363 HeaderInfo.SetFileControllingMacro(*FE, ControllingMacro);
364 if (MacroInfo *MI = getMacroInfo(ControllingMacro))
365 MI->setUsedForHeaderGuard(true);
366 if (const IdentifierInfo *DefinedMacro =
367 CurPPLexer->MIOpt.GetDefinedMacro()) {
368 if (!isMacroDefined(ControllingMacro) &&
369 DefinedMacro != ControllingMacro &&
370 CurLexer->isFirstTimeLexingFile()) {
371
372 // If the edit distance between the two macros is more than 50%,
373 // DefinedMacro may not be header guard, or can be header guard of
374 // another header file. Therefore, it maybe defining something
375 // completely different. This can be observed in the wild when
376 // handling feature macros or header guards in different files.
377
378 const StringRef ControllingMacroName = ControllingMacro->getName();
379 const StringRef DefinedMacroName = DefinedMacro->getName();
380 const size_t MaxHalfLength = std::max(ControllingMacroName.size(),
381 DefinedMacroName.size()) / 2;
382 const unsigned ED = ControllingMacroName.edit_distance(
383 DefinedMacroName, true, MaxHalfLength);
384 if (ED <= MaxHalfLength) {
385 // Emit a warning for a bad header guard.
386 Diag(CurPPLexer->MIOpt.GetMacroLocation(),
387 diag::warn_header_guard)
388 << CurPPLexer->MIOpt.GetMacroLocation() << ControllingMacro;
389 Diag(CurPPLexer->MIOpt.GetDefinedLocation(),
390 diag::note_header_guard)
391 << CurPPLexer->MIOpt.GetDefinedLocation() << DefinedMacro
392 << ControllingMacro
394 CurPPLexer->MIOpt.GetDefinedLocation(),
395 ControllingMacro->getName());
396 }
397 }
398 }
399 }
400 }
401 }
402
403 // Complain about reaching a true EOF within arc_cf_code_audited.
404 // We don't want to complain about reaching the end of a macro
405 // instantiation or a _Pragma.
406 if (PragmaARCCFCodeAuditedInfo.getLoc().isValid() && !isEndOfMacro &&
407 !(CurLexer && CurLexer->Is_PragmaLexer)) {
408 Diag(PragmaARCCFCodeAuditedInfo.getLoc(),
409 diag::err_pp_eof_in_arc_cf_code_audited);
410
411 // Recover by leaving immediately.
412 PragmaARCCFCodeAuditedInfo = IdentifierLoc();
413 }
414
415 // Complain about reaching a true EOF within assume_nonnull.
416 // We don't want to complain about reaching the end of a macro
417 // instantiation or a _Pragma.
418 if (PragmaAssumeNonNullLoc.isValid() &&
419 !isEndOfMacro && !(CurLexer && CurLexer->Is_PragmaLexer)) {
420 // If we're at the end of generating a preamble, we should record the
421 // unterminated \#pragma clang assume_nonnull so we can restore it later
422 // when the preamble is loaded into the main file.
424 PreambleRecordedPragmaAssumeNonNullLoc = PragmaAssumeNonNullLoc;
425 else
426 Diag(PragmaAssumeNonNullLoc, diag::err_pp_eof_in_assume_nonnull);
427 // Recover by leaving immediately.
428 PragmaAssumeNonNullLoc = SourceLocation();
429 }
430
431 bool LeavingPCHThroughHeader = false;
432
433 // If this is a #include'd file, pop it off the include stack and continue
434 // lexing the #includer file.
435 if (!IncludeMacroStack.empty()) {
436
437 // If we lexed the code-completion file, act as if we reached EOF.
438 if (isCodeCompletionEnabled() && CurPPLexer &&
439 SourceMgr.getLocForStartOfFile(CurPPLexer->getFileID()) ==
440 CodeCompletionFileLoc) {
441 assert(CurLexer && "Got EOF but no current lexer set!");
442 Result.startToken();
443 CurLexer->FormTokenWithChars(Result, CurLexer->BufferEnd, tok::eof);
444 CurLexer.reset();
445
446 CurPPLexer = nullptr;
448 return true;
449 }
450
451 if (!isEndOfMacro && CurPPLexer &&
452 (SourceMgr.getIncludeLoc(CurPPLexer->getFileID()).isValid() ||
453 // Predefines file doesn't have a valid include location.
454 (PredefinesFileID.isValid() &&
455 CurPPLexer->getFileID() == PredefinesFileID))) {
456 // Notify SourceManager to record the number of FileIDs that were created
457 // during lexing of the #include'd file.
458 unsigned NumFIDs =
459 SourceMgr.local_sloc_entry_size() -
460 CurPPLexer->getInitialNumSLocEntries() + 1/*#include'd file*/;
461 SourceMgr.setNumCreatedFIDsForFileID(CurPPLexer->getFileID(), NumFIDs);
462 }
463
464 bool ExitedFromPredefinesFile = false;
465 FileID ExitedFID;
466 if (!isEndOfMacro && CurPPLexer) {
467 ExitedFID = CurPPLexer->getFileID();
468
469 assert(PredefinesFileID.isValid() &&
470 "HandleEndOfFile is called before PredefinesFileId is set");
471 ExitedFromPredefinesFile = (PredefinesFileID == ExitedFID);
472 }
473
474 if (LeavingSubmodule) {
475 // We're done with this submodule.
476 Module *M = LeaveSubmodule(/*ForPragma*/false);
477
478 // Notify the parser that we've left the module.
479 const char *EndPos = getCurLexerEndPos();
480 Result.startToken();
481 CurLexer->BufferPtr = EndPos;
482 CurLexer->FormTokenWithChars(Result, EndPos, tok::annot_module_end);
483 Result.setAnnotationEndLoc(Result.getLocation());
484 Result.setAnnotationValue(M);
485 }
486
487 bool FoundPCHThroughHeader = false;
488 if (CurPPLexer && creatingPCHWithThroughHeader() &&
490 SourceMgr.getFileEntryForID(CurPPLexer->getFileID())))
491 FoundPCHThroughHeader = true;
492
493 // We're done with the #included file.
495
496 // Propagate info about start-of-line/leading white-space/etc.
497 PropagateLineStartLeadingSpaceInfo(Result);
498
499 // Notify the client, if desired, that we are in a new source file.
500 if (Callbacks && !isEndOfMacro && CurPPLexer) {
501 SourceLocation Loc = CurPPLexer->getSourceLocation();
503 SourceMgr.getFileCharacteristic(Loc);
504 Callbacks->FileChanged(Loc, PPCallbacks::ExitFile, FileType, ExitedFID);
505 Callbacks->LexedFileChanged(CurPPLexer->getFileID(),
507 FileType, ExitedFID, Loc);
508 }
509
510 // Restore conditional stack as well as the recorded
511 // \#pragma clang assume_nonnull from the preamble right after exiting
512 // from the predefines file.
513 if (ExitedFromPredefinesFile) {
514 replayPreambleConditionalStack();
515 if (PreambleRecordedPragmaAssumeNonNullLoc.isValid())
516 PragmaAssumeNonNullLoc = PreambleRecordedPragmaAssumeNonNullLoc;
517 }
518
519 if (!isEndOfMacro && CurPPLexer && FoundPCHThroughHeader &&
520 (isInPrimaryFile() ||
521 CurPPLexer->getFileID() == getPredefinesFileID())) {
522 // Leaving the through header. Continue directly to end of main file
523 // processing.
524 LeavingPCHThroughHeader = true;
525 } else {
526 // Client should lex another token unless we generated an EOM.
527 return LeavingSubmodule;
528 }
529 }
530 // If this is the end of the main file, form an EOF token.
531 assert(CurLexer && "Got EOF but no current lexer set!");
532 const char *EndPos = getCurLexerEndPos();
533 Result.startToken();
534 CurLexer->BufferPtr = EndPos;
535
536 if (getLangOpts().IncrementalExtensions) {
537 CurLexer->FormTokenWithChars(Result, EndPos, tok::annot_repl_input_end);
538 Result.setAnnotationEndLoc(Result.getLocation());
539 Result.setAnnotationValue(nullptr);
540 } else {
541 CurLexer->FormTokenWithChars(Result, EndPos, tok::eof);
542 }
543
545 // Inserting the code-completion point increases the source buffer by 1,
546 // but the main FileID was created before inserting the point.
547 // Compensate by reducing the EOF location by 1, otherwise the location
548 // will point to the next FileID.
549 // FIXME: This is hacky, the code-completion point should probably be
550 // inserted before the main FileID is created.
551 if (CurLexer->getFileLoc() == CodeCompletionFileLoc)
552 Result.setLocation(Result.getLocation().getLocWithOffset(-1));
553 }
554
555 if (creatingPCHWithThroughHeader() && !LeavingPCHThroughHeader) {
556 // Reached the end of the compilation without finding the through header.
557 Diag(CurLexer->getFileLoc(), diag::err_pp_through_header_not_seen)
558 << PPOpts.PCHThroughHeader << 0;
559 }
560
562 // We're done with lexing.
563 CurLexer.reset();
564
566 CurPPLexer = nullptr;
567
568 if (TUKind == TU_Complete) {
569 // This is the end of the top-level file. 'WarnUnusedMacroLocs' has
570 // collected all macro locations that we need to warn because they are not
571 // used.
572 for (WarnUnusedMacroLocsTy::iterator
573 I=WarnUnusedMacroLocs.begin(), E=WarnUnusedMacroLocs.end();
574 I!=E; ++I)
575 Diag(*I, diag::pp_macro_not_used);
576 }
577
578 // If we are building a module that has an umbrella header, make sure that
579 // each of the headers within the directory, including all submodules, is
580 // covered by the umbrella header was actually included by the umbrella
581 // header.
582 if (Module *Mod = getCurrentModule()) {
585 for (auto *M : AllMods)
586 diagnoseMissingHeaderInUmbrellaDir(*M);
587 }
588
589 return true;
590}
591
592/// HandleEndOfTokenLexer - This callback is invoked when the current TokenLexer
593/// hits the end of its token stream.
595 assert(CurTokenLexer && !CurPPLexer &&
596 "Ending a macro when currently in a #include file!");
597
598 if (!MacroExpandingLexersStack.empty() &&
599 MacroExpandingLexersStack.back().first == CurTokenLexer.get())
600 removeCachedMacroExpandedTokensOfLastLexer();
601
602 // Delete or cache the now-dead macro expander.
603 if (NumCachedTokenLexers == TokenLexerCacheSize)
604 CurTokenLexer.reset();
605 else
606 TokenLexerCache[NumCachedTokenLexers++] = std::move(CurTokenLexer);
607
608 // Handle this like a #include file being popped off the stack.
609 return HandleEndOfFile(Result, true);
610}
611
612/// RemoveTopOfLexerStack - Pop the current lexer/macro exp off the top of the
613/// lexer stack. This should only be used in situations where the current
614/// state of the top-of-stack lexer is unknown.
616 assert(!IncludeMacroStack.empty() && "Ran out of stack entries to load");
617
618 if (CurTokenLexer) {
619 // Delete or cache the now-dead macro expander.
620 if (NumCachedTokenLexers == TokenLexerCacheSize)
621 CurTokenLexer.reset();
622 else
623 TokenLexerCache[NumCachedTokenLexers++] = std::move(CurTokenLexer);
624 }
625
626 PopIncludeMacroStack();
627}
628
629/// HandleMicrosoftCommentPaste - When the macro expander pastes together a
630/// comment (/##/) in microsoft mode, this method handles updating the current
631/// state, returning the token on the next source line.
633 assert(CurTokenLexer && !CurPPLexer &&
634 "Pasted comment can only be formed from macro");
635 // We handle this by scanning for the closest real lexer, switching it to
636 // raw mode and preprocessor mode. This will cause it to return \n as an
637 // explicit EOD token.
638 PreprocessorLexer *FoundLexer = nullptr;
639 bool LexerWasInPPMode = false;
640 for (const IncludeStackInfo &ISI : llvm::reverse(IncludeMacroStack)) {
641 if (ISI.ThePPLexer == nullptr) continue; // Scan for a real lexer.
642
643 // Once we find a real lexer, mark it as raw mode (disabling macro
644 // expansions) and preprocessor mode (return EOD). We know that the lexer
645 // was *not* in raw mode before, because the macro that the comment came
646 // from was expanded. However, it could have already been in preprocessor
647 // mode (#if COMMENT) in which case we have to return it to that mode and
648 // return EOD.
649 FoundLexer = ISI.ThePPLexer;
650 FoundLexer->LexingRawMode = true;
651 LexerWasInPPMode = FoundLexer->ParsingPreprocessorDirective;
652 FoundLexer->ParsingPreprocessorDirective = true;
653 break;
654 }
655
656 // Okay, we either found and switched over the lexer, or we didn't find a
657 // lexer. In either case, finish off the macro the comment came from, getting
658 // the next token.
659 if (!HandleEndOfTokenLexer(Tok)) Lex(Tok);
660
661 // Discarding comments as long as we don't have EOF or EOD. This 'comments
662 // out' the rest of the line, including any tokens that came from other macros
663 // that were active, as in:
664 // #define submacro a COMMENT b
665 // submacro c
666 // which should lex to 'a' only: 'b' and 'c' should be removed.
667 while (Tok.isNot(tok::eod) && Tok.isNot(tok::eof))
668 Lex(Tok);
669
670 // If we got an eod token, then we successfully found the end of the line.
671 if (Tok.is(tok::eod)) {
672 assert(FoundLexer && "Can't get end of line without an active lexer");
673 // Restore the lexer back to normal mode instead of raw mode.
674 FoundLexer->LexingRawMode = false;
675
676 // If the lexer was already in preprocessor mode, just return the EOD token
677 // to finish the preprocessor line.
678 if (LexerWasInPPMode) return;
679
680 // Otherwise, switch out of PP mode and return the next lexed token.
681 FoundLexer->ParsingPreprocessorDirective = false;
682 return Lex(Tok);
683 }
684
685 // If we got an EOF token, then we reached the end of the token stream but
686 // didn't find an explicit \n. This can only happen if there was no lexer
687 // active (an active lexer would return EOD at EOF if there was no \n in
688 // preprocessor directive mode), so just return EOF as our token.
689 assert(!FoundLexer && "Lexer should return EOD before EOF in PP mode");
690}
691
693 bool ForPragma) {
694 if (!getLangOpts().ModulesLocalVisibility) {
695 // Just track that we entered this submodule.
696 BuildingSubmoduleStack.push_back(
697 BuildingSubmoduleInfo(M, ImportLoc, ForPragma, CurSubmoduleState,
698 PendingModuleMacroNames.size()));
699 if (Callbacks)
700 Callbacks->EnteredSubmodule(M, ImportLoc, ForPragma);
701 return;
702 }
703
704 // Resolve as much of the module definition as we can now, before we enter
705 // one of its headers.
706 // FIXME: Can we enable Complain here?
707 // FIXME: Can we do this when local visibility is disabled?
709 ModMap.resolveExports(M, /*Complain=*/false);
710 ModMap.resolveUses(M, /*Complain=*/false);
711 ModMap.resolveConflicts(M, /*Complain=*/false);
712
713 // If this is the first time we've entered this module, set up its state.
714 auto R = Submodules.try_emplace(M);
715 auto &State = R.first->second;
716 bool FirstTime = R.second;
717 if (FirstTime) {
718 // Determine the set of starting macros for this submodule; take these
719 // from the "null" module (the predefines buffer).
720 //
721 // FIXME: If we have local visibility but not modules enabled, the
722 // NullSubmoduleState is polluted by #defines in the top-level source
723 // file.
724 auto &StartingMacros = NullSubmoduleState.Macros;
725
726 // Restore to the starting state.
727 // FIXME: Do this lazily, when each macro name is first referenced.
728 for (auto &Macro : StartingMacros) {
729 // Skip uninteresting macros.
730 if (!Macro.second.getLatest() &&
731 Macro.second.getOverriddenMacros().empty())
732 continue;
733
734 MacroState MS(Macro.second.getLatest());
735 MS.setOverriddenMacros(*this, Macro.second.getOverriddenMacros());
736 State.Macros.insert(std::make_pair(Macro.first, std::move(MS)));
737 }
738 }
739
740 // Track that we entered this module.
741 BuildingSubmoduleStack.push_back(
742 BuildingSubmoduleInfo(M, ImportLoc, ForPragma, CurSubmoduleState,
743 PendingModuleMacroNames.size()));
744
745 if (Callbacks)
746 Callbacks->EnteredSubmodule(M, ImportLoc, ForPragma);
747
748 // Switch to this submodule as the current submodule.
749 CurSubmoduleState = &State;
750
751 // This module is visible to itself, but exports should not be made visible
752 // until they are imported.
753 if (FirstTime)
754 makeModuleVisible(M, ImportLoc, /*IncludeExports=*/false);
755}
756
757bool Preprocessor::needModuleMacros() const {
758 // If we're not within a submodule, we never need to create ModuleMacros.
759 if (BuildingSubmoduleStack.empty())
760 return false;
761 // If we are tracking module macro visibility even for textually-included
762 // headers, we need ModuleMacros.
763 if (getLangOpts().ModulesLocalVisibility)
764 return true;
765 // Otherwise, we only need module macros if we're actually compiling a module
766 // interface.
768}
769
771 if (BuildingSubmoduleStack.empty() ||
772 BuildingSubmoduleStack.back().IsPragma != ForPragma) {
773 assert(ForPragma && "non-pragma module enter/leave mismatch");
774 return nullptr;
775 }
776
777 auto &Info = BuildingSubmoduleStack.back();
778
779 Module *LeavingMod = Info.M;
780 SourceLocation ImportLoc = Info.ImportLoc;
781
782 if (!needModuleMacros() ||
783 (!getLangOpts().ModulesLocalVisibility &&
784 LeavingMod->getTopLevelModuleName() != getLangOpts().CurrentModule)) {
785 // If we don't need module macros, or this is not a module for which we
786 // are tracking macro visibility, don't build any, and preserve the list
787 // of pending names for the surrounding submodule.
788 BuildingSubmoduleStack.pop_back();
789
790 if (Callbacks)
791 Callbacks->LeftSubmodule(LeavingMod, ImportLoc, ForPragma);
792
793 makeModuleVisible(LeavingMod, ImportLoc);
794 return LeavingMod;
795 }
796
797 // Create ModuleMacros for any macros defined in this submodule.
799 for (unsigned I = Info.OuterPendingModuleMacroNames;
800 I != PendingModuleMacroNames.size(); ++I) {
801 auto *II = PendingModuleMacroNames[I];
802 if (!VisitedMacros.insert(II).second)
803 continue;
804
805 auto MacroIt = CurSubmoduleState->Macros.find(II);
806 if (MacroIt == CurSubmoduleState->Macros.end())
807 continue;
808 auto &Macro = MacroIt->second;
809
810 // Find the starting point for the MacroDirective chain in this submodule.
811 MacroDirective *OldMD = nullptr;
812 auto *OldState = Info.OuterSubmoduleState;
813 if (getLangOpts().ModulesLocalVisibility)
814 OldState = &NullSubmoduleState;
815 if (OldState && OldState != CurSubmoduleState) {
816 // FIXME: It'd be better to start at the state from when we most recently
817 // entered this submodule, but it doesn't really matter.
818 auto &OldMacros = OldState->Macros;
819 auto OldMacroIt = OldMacros.find(II);
820 if (OldMacroIt == OldMacros.end())
821 OldMD = nullptr;
822 else
823 OldMD = OldMacroIt->second.getLatest();
824 }
825
826 // This module may have exported a new macro. If so, create a ModuleMacro
827 // representing that fact.
828 bool ExplicitlyPublic = false;
829 for (auto *MD = Macro.getLatest(); MD != OldMD; MD = MD->getPrevious()) {
830 assert(MD && "broken macro directive chain");
831
832 if (auto *VisMD = dyn_cast<VisibilityMacroDirective>(MD)) {
833 // The latest visibility directive for a name in a submodule affects
834 // all the directives that come before it.
835 if (VisMD->isPublic())
836 ExplicitlyPublic = true;
837 else if (!ExplicitlyPublic)
838 // Private with no following public directive: not exported.
839 break;
840 } else {
841 MacroInfo *Def = nullptr;
842 if (DefMacroDirective *DefMD = dyn_cast<DefMacroDirective>(MD))
843 Def = DefMD->getInfo();
844
845 // FIXME: Issue a warning if multiple headers for the same submodule
846 // define a macro, rather than silently ignoring all but the first.
847 bool IsNew;
848 // Don't bother creating a module macro if it would represent a #undef
849 // that doesn't override anything.
850 if (Def || !Macro.getOverriddenMacros().empty())
851 addModuleMacro(LeavingMod, II, Def, Macro.getOverriddenMacros(),
852 IsNew);
853
854 if (!getLangOpts().ModulesLocalVisibility) {
855 // This macro is exposed to the rest of this compilation as a
856 // ModuleMacro; we don't need to track its MacroDirective any more.
857 Macro.setLatest(nullptr);
858 Macro.setOverriddenMacros(*this, {});
859 }
860 break;
861 }
862 }
863 }
864 PendingModuleMacroNames.resize(Info.OuterPendingModuleMacroNames);
865
866 // FIXME: Before we leave this submodule, we should parse all the other
867 // headers within it. Otherwise, we're left with an inconsistent state
868 // where we've made the module visible but don't yet have its complete
869 // contents.
870
871 // Put back the outer module's state, if we're tracking it.
872 if (getLangOpts().ModulesLocalVisibility)
873 CurSubmoduleState = Info.OuterSubmoduleState;
874
875 BuildingSubmoduleStack.pop_back();
876
877 if (Callbacks)
878 Callbacks->LeftSubmodule(LeavingMod, ImportLoc, ForPragma);
879
880 // A nested #include makes the included submodule visible.
881 makeModuleVisible(LeavingMod, ImportLoc);
882 return LeavingMod;
883}
IndirectLocalPath & Path
Expr * E
Defines the clang::FileManager interface and associated types.
llvm::MachO::FileType FileType
Definition: MachO.h:46
Defines the clang::MacroInfo and clang::MacroDirective classes.
static void collectAllSubModulesWithUmbrellaHeader(const Module &Mod, SmallVectorImpl< const Module * > &SubMods)
static void computeRelativePath(FileManager &FM, const DirectoryEntry *Dir, FileEntryRef File, SmallString< 128 > &Result)
Compute the relative path that names the given file relative to the given directory.
Defines the clang::Preprocessor interface.
SourceLocation Loc
Definition: SemaObjC.cpp:754
Defines the SourceManager interface.
A directive for a defined macro or a macro imported from a module.
Definition: MacroInfo.h:432
StringRef getName() const
Cached information about one directory (either on disk or in the virtual file system).
A reference to a FileEntry that includes the name of the file as it was accessed by the FileManager's...
Definition: FileEntry.h:57
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
bool isValid() const
Implements support for file system lookup, file system caching, and directory search management.
Definition: FileManager.h:53
llvm::vfs::FileSystem & getVirtualFileSystem() const
Definition: FileManager.h:219
OptionalDirectoryEntryRef getOptionalDirectoryRef(StringRef DirName, bool CacheFailure=true)
Get a DirectoryEntryRef if it exists, without doing anything on error.
Definition: FileManager.h:175
static FixItHint CreateReplacement(CharSourceRange RemoveRange, StringRef Code)
Create a code modification hint that replaces the given source range with the given code string.
Definition: Diagnostic.h:139
void SetFileControllingMacro(FileEntryRef File, const IdentifierInfo *ControllingMacro)
Mark the specified file as having a controlling macro.
Definition: HeaderSearch.h:574
ModuleMap & getModuleMap()
Retrieve the module map.
Definition: HeaderSearch.h:831
One of these records is kept for each identifier that is lexed.
A simple pair of identifier info and location.
SourceLocation getLoc() const
bool isCompilingModule() const
Are we compiling a module?
Definition: LangOptions.h:596
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens.
Definition: Lexer.h:78
MacroArgs - An instance of this class captures information about the formal arguments specified to a ...
Definition: MacroArgs.h:30
Encapsulates changes to the "macros namespace" (the location where the macro name became active,...
Definition: MacroInfo.h:313
const MacroDirective * getPrevious() const
Get previous definition of the macro with the same name.
Definition: MacroInfo.h:354
Encapsulates the data about a macro definition (e.g.
Definition: MacroInfo.h:39
bool resolveExports(Module *Mod, bool Complain)
Resolve all of the unresolved exports in the given module.
Definition: ModuleMap.cpp:1491
bool resolveConflicts(Module *Mod, bool Complain)
Resolve all of the unresolved conflicts in the given module.
Definition: ModuleMap.cpp:1518
bool isHeaderInUnavailableModule(FileEntryRef Header) const
Determine whether the given header is part of a module marked 'unavailable'.
Definition: ModuleMap.cpp:714
bool resolveUses(Module *Mod, bool Complain)
Resolve all of the unresolved uses in the given module.
Definition: ModuleMap.cpp:1504
Describes a module or submodule.
Definition: Module.h:144
StringRef getTopLevelModuleName() const
Retrieve the name of the top-level module.
Definition: Module.h:732
llvm::iterator_range< submodule_iterator > submodules()
Definition: Module.h:838
std::optional< Header > getUmbrellaHeaderAsWritten() const
Retrieve the umbrella header as written.
Definition: Module.h:756
std::string getFullModuleName(bool AllowStringLiterals=false) const
Retrieve the full name of this module, including the path from its top-level module.
Definition: Module.cpp:239
OptionalDirectoryEntryRef getEffectiveUmbrellaDir() const
Get the effective umbrella directory for this module: either the one explicitly written in the module...
Definition: Module.cpp:263
const IdentifierInfo * GetDefinedMacro() const
If the ControllingMacro is followed by a macro definition, return the macro that was defined.
const IdentifierInfo * GetControllingMacroAtEndOfFile() const
Once the entire file has been lexed, if there is a controlling macro, return it.
SourceLocation GetDefinedLocation() const
SourceLocation GetMacroLocation() const
unsigned getInitialNumSLocEntries() const
Number of SLocEntries before lexing the file.
bool LexingRawMode
True if in raw mode.
bool ParsingPreprocessorDirective
True when parsing #XXX; turns '\n' into a tok::eod token.
MultipleIncludeOpt MIOpt
A state machine that detects the #ifndef-wrapping a file idiom for the multiple-include optimization.
OptionalFileEntryRef getFileEntry() const
getFileEntry - Return the FileEntry corresponding to this FileID.
virtual SourceLocation getSourceLocation()=0
Return the source location for the next observable location.
std::string PCHThroughHeader
If non-empty, the filename used in an #include directive in the primary source file (or command-line ...
bool creatingPCHWithThroughHeader()
True if creating a PCH with a through header.
ModuleMacro * addModuleMacro(Module *Mod, IdentifierInfo *II, MacroInfo *Macro, ArrayRef< ModuleMacro * > Overrides, bool &IsNew)
Register an exported macro for a module and identifier.
bool isIncrementalProcessingEnabled() const
Returns true if incremental processing is enabled.
const MacroInfo * getMacroInfo(const IdentifierInfo *II) const
bool isRecordingPreamble() const
bool isInPrimaryFile() const
Return true if we're in the top-level file, not in a #include.
void EnterSubmodule(Module *M, SourceLocation ImportLoc, bool ForPragma)
bool HandleEndOfTokenLexer(Token &Result)
Callback invoked when the current TokenLexer hits the end of its token stream.
void Lex(Token &Result)
Lex the next token for this preprocessor.
const TranslationUnitKind TUKind
The kind of translation unit we are processing.
Definition: Preprocessor.h:309
bool EnterSourceFile(FileID FID, ConstSearchDirIterator Dir, SourceLocation Loc, bool IsFirstIncludeOfFile=true)
Add a source file to the top of the include stack and start lexing tokens from it instead of the curr...
SourceManager & getSourceManager() const
bool isMacroDefined(StringRef Id)
Module * getCurrentModule()
Retrieves the module that we're currently building, if any.
bool isPPInSafeBufferOptOutRegion()
void makeModuleVisible(Module *M, SourceLocation Loc, bool IncludeExports=true)
FileManager & getFileManager() const
bool isPCHThroughHeader(const FileEntry *FE)
Returns true if the FileEntry is the PCH through header.
FileID getPredefinesFileID() const
Returns the FileID for the preprocessor predefines.
bool isCodeCompletionEnabled() const
Determine if we are performing code completion.
PreprocessorLexer * getCurrentFileLexer() const
Return the current file lexer being lexed from.
HeaderSearch & getHeaderSearchInfo() const
Module * LeaveSubmodule(bool ForPragma)
void recomputeCurLexerKind()
Recompute the current lexer kind based on the CurLexer/ CurTokenLexer pointers.
const LangOptions & getLangOpts() const
void RemoveTopOfLexerStack()
Pop the current lexer/macro exp off the top of the lexer stack.
bool HandleEndOfFile(Token &Result, bool isEndOfMacro=false)
Callback invoked when the lexer hits the end of the current file.
DiagnosticsEngine & getDiagnostics() const
void EnterMacro(Token &Tok, SourceLocation ILEnd, MacroInfo *Macro, MacroArgs *Args)
Add a Macro to the top of the include stack and start lexing tokens from it instead of the current bu...
DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const
Forwarding function for diagnostics.
void HandleMicrosoftCommentPaste(Token &Tok)
When the macro expander pastes together a comment (/##/) in Microsoft mode, this method handles updat...
Encodes a location in the source.
bool isValid() const
Return true if this is a valid SourceLocation object.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
OptionalFileEntryRef getFileEntryRefForID(FileID FID) const
Returns the FileEntryRef for the provided FileID.
StringRef getBufferName(SourceLocation Loc, bool *Invalid=nullptr) const
Return the filename or buffer identifier of the buffer the location is in.
FileID translateFile(const FileEntry *SourceFile) const
Get the FileID for the given file.
unsigned local_sloc_entry_size() const
Get the number of local SLocEntries we have.
SourceLocation getLocForEndOfFile(FileID FID) const
Return the source location corresponding to the last byte of the specified file.
SourceLocation getIncludeLoc(FileID FID) const
Returns the include location if FID is a #include'd file otherwise it returns an invalid location.
void setNumCreatedFIDsForFileID(FileID FID, unsigned NumFIDs, bool Force=false)
Set the number of FileIDs (files and macros) that were created during preprocessing of FID,...
const FileEntry * getFileEntryForID(FileID FID) const
Returns the FileEntry record for the provided FileID.
SourceLocation getLocForStartOfFile(FileID FID) const
Return the source location corresponding to the first byte of the specified file.
SrcMgr::CharacteristicKind getFileCharacteristic(SourceLocation Loc) const
Return the file characteristic of the specified source location, indicating whether this is a normal ...
std::optional< llvm::MemoryBufferRef > getBufferOrNone(FileID FID, SourceLocation Loc=SourceLocation()) const
Return the buffer for the specified FileID.
Token - This structure provides full information about a lexed token.
Definition: Token.h:36
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
Definition: Token.h:102
bool isNot(tok::TokenKind K) const
Definition: Token.h:103
CharacteristicKind
Indicates whether a file or directory holds normal user code, system code, or system code which is im...
Definition: SourceManager.h:81
The JSON file list parser is used to communicate input to InstallAPI.
@ Result
The result type of a method or function.
@ TU_Complete
The translation unit is a complete translation unit.
Definition: LangOptions.h:1099