clang 22.0.0git
Preprocessor.cpp
Go to the documentation of this file.
1//===- Preprocessor.cpp - C Language Family Preprocessor Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the Preprocessor interface.
10//
11//===----------------------------------------------------------------------===//
12//
13// Options to support:
14// -H - Print the name of each header file used.
15// -d[DNI] - Dump various things.
16// -fworking-directory - #line's with preprocessor's working dir.
17// -fpreprocessed
18// -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
19// -W*
20// -w
21//
22// Messages to emit:
23// "Multiple include guards may be useful for:\n"
24//
25//===----------------------------------------------------------------------===//
26
31#include "clang/Basic/LLVM.h"
33#include "clang/Basic/Module.h"
41#include "clang/Lex/Lexer.h"
43#include "clang/Lex/MacroArgs.h"
44#include "clang/Lex/MacroInfo.h"
47#include "clang/Lex/Pragma.h"
52#include "clang/Lex/Token.h"
54#include "llvm/ADT/APInt.h"
55#include "llvm/ADT/ArrayRef.h"
56#include "llvm/ADT/DenseMap.h"
57#include "llvm/ADT/STLExtras.h"
58#include "llvm/ADT/SmallVector.h"
59#include "llvm/ADT/StringRef.h"
60#include "llvm/Support/Capacity.h"
61#include "llvm/Support/ErrorHandling.h"
62#include "llvm/Support/MemoryBuffer.h"
63#include "llvm/Support/raw_ostream.h"
64#include <algorithm>
65#include <cassert>
66#include <memory>
67#include <optional>
68#include <string>
69#include <utility>
70#include <vector>
71
72using namespace clang;
73
74/// Minimum distance between two check points, in tokens.
75static constexpr unsigned CheckPointStepSize = 1024;
76
77LLVM_INSTANTIATE_REGISTRY(PragmaHandlerRegistry)
78
80
82 DiagnosticsEngine &diags, const LangOptions &opts,
83 SourceManager &SM, HeaderSearch &Headers,
84 ModuleLoader &TheModuleLoader,
85 IdentifierInfoLookup *IILookup, bool OwnsHeaders,
87 : PPOpts(PPOpts), Diags(&diags), LangOpts(opts),
88 FileMgr(Headers.getFileMgr()), SourceMgr(SM),
89 ScratchBuf(new ScratchBuffer(SourceMgr)), HeaderInfo(Headers),
90 TheModuleLoader(TheModuleLoader), ExternalSource(nullptr),
91 // As the language options may have not been loaded yet (when
92 // deserializing an ASTUnit), adding keywords to the identifier table is
93 // deferred to Preprocessor::Initialize().
94 Identifiers(IILookup), PragmaHandlers(new PragmaNamespace(StringRef())),
95 TUKind(TUKind), SkipMainFilePreamble(0, true),
96 CurSubmoduleState(&NullSubmoduleState) {
97 OwnsHeaderSearch = OwnsHeaders;
98
99 // Default to discarding comments.
100 KeepComments = false;
101 KeepMacroComments = false;
102 SuppressIncludeNotFoundError = false;
103
104 // Macro expansion is enabled.
105 DisableMacroExpansion = false;
106 MacroExpansionInDirectivesOverride = false;
107 InMacroArgs = false;
108 ArgMacro = nullptr;
109 InMacroArgPreExpansion = false;
110 NumCachedTokenLexers = 0;
111 PragmasEnabled = true;
112 ParsingIfOrElifDirective = false;
113 PreprocessedOutput = false;
114
115 // We haven't read anything from the external source.
116 ReadMacrosFromExternalSource = false;
117
118 BuiltinInfo = std::make_unique<Builtin::Context>();
119
120 // "Poison" __VA_ARGS__, __VA_OPT__ which can only appear in the expansion of
121 // a macro. They get unpoisoned where it is allowed.
122 (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
123 SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use);
124 (Ident__VA_OPT__ = getIdentifierInfo("__VA_OPT__"))->setIsPoisoned();
125 SetPoisonReason(Ident__VA_OPT__,diag::ext_pp_bad_vaopt_use);
126
127 // Initialize the pragma handlers.
128 RegisterBuiltinPragmas();
129
130 // Initialize builtin macros like __LINE__ and friends.
131 RegisterBuiltinMacros();
132
133 if(LangOpts.Borland) {
134 Ident__exception_info = getIdentifierInfo("_exception_info");
135 Ident___exception_info = getIdentifierInfo("__exception_info");
136 Ident_GetExceptionInfo = getIdentifierInfo("GetExceptionInformation");
137 Ident__exception_code = getIdentifierInfo("_exception_code");
138 Ident___exception_code = getIdentifierInfo("__exception_code");
139 Ident_GetExceptionCode = getIdentifierInfo("GetExceptionCode");
140 Ident__abnormal_termination = getIdentifierInfo("_abnormal_termination");
141 Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination");
142 Ident_AbnormalTermination = getIdentifierInfo("AbnormalTermination");
143 } else {
144 Ident__exception_info = Ident__exception_code = nullptr;
145 Ident__abnormal_termination = Ident___exception_info = nullptr;
146 Ident___exception_code = Ident___abnormal_termination = nullptr;
147 Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr;
148 Ident_AbnormalTermination = nullptr;
149 }
150
151 // Default incremental processing to -fincremental-extensions, clients can
152 // override with `enableIncrementalProcessing` if desired.
153 IncrementalProcessing = LangOpts.IncrementalExtensions;
154
155 // If using a PCH where a #pragma hdrstop is expected, start skipping tokens.
157 SkippingUntilPragmaHdrStop = true;
158
159 // If using a PCH with a through header, start skipping tokens.
160 if (!this->PPOpts.PCHThroughHeader.empty() &&
161 !this->PPOpts.ImplicitPCHInclude.empty())
162 SkippingUntilPCHThroughHeader = true;
163
164 if (this->PPOpts.GeneratePreamble)
165 PreambleConditionalStack.startRecording();
166
167 MaxTokens = LangOpts.MaxTokens;
168}
169
171 assert(!isBacktrackEnabled() && "EnableBacktrack/Backtrack imbalance!");
172
173 IncludeMacroStack.clear();
174
175 // Free any cached macro expanders.
176 // This populates MacroArgCache, so all TokenLexers need to be destroyed
177 // before the code below that frees up the MacroArgCache list.
178 std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr);
179 CurTokenLexer.reset();
180
181 // Free any cached MacroArgs.
182 for (MacroArgs *ArgList = MacroArgCache; ArgList;)
183 ArgList = ArgList->deallocate();
184
185 // Delete the header search info, if we own it.
186 if (OwnsHeaderSearch)
187 delete &HeaderInfo;
188}
189
191 const TargetInfo *AuxTarget) {
192 assert((!this->Target || this->Target == &Target) &&
193 "Invalid override of target information");
194 this->Target = &Target;
195
196 assert((!this->AuxTarget || this->AuxTarget == AuxTarget) &&
197 "Invalid override of aux target information.");
198 this->AuxTarget = AuxTarget;
199
200 // Initialize information about built-ins.
201 BuiltinInfo->InitializeTarget(Target, AuxTarget);
202 HeaderInfo.setTarget(Target);
203
204 // Populate the identifier table with info about keywords for the current language.
205 Identifiers.AddKeywords(LangOpts);
206
207 // Initialize the __FTL_EVAL_METHOD__ macro to the TargetInfo.
208 setTUFPEvalMethod(getTargetInfo().getFPEvalMethod());
209
210 if (getLangOpts().getFPEvalMethod() == LangOptions::FEM_UnsetOnCommandLine)
211 // Use setting from TargetInfo.
212 setCurrentFPEvalMethod(SourceLocation(), Target.getFPEvalMethod());
213 else
214 // Set initial value of __FLT_EVAL_METHOD__ from the command line.
215 setCurrentFPEvalMethod(SourceLocation(), getLangOpts().getFPEvalMethod());
216}
217
219 NumEnteredSourceFiles = 0;
220
221 // Reset pragmas
222 PragmaHandlersBackup = std::move(PragmaHandlers);
223 PragmaHandlers = std::make_unique<PragmaNamespace>(StringRef());
224 RegisterBuiltinPragmas();
225
226 // Reset PredefinesFileID
227 PredefinesFileID = FileID();
228}
229
231 NumEnteredSourceFiles = 1;
232
233 PragmaHandlers = std::move(PragmaHandlersBackup);
234}
235
236void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
237 llvm::errs() << tok::getTokenName(Tok.getKind());
238
239 if (!Tok.isAnnotation())
240 llvm::errs() << " '" << getSpelling(Tok) << "'";
241
242 if (!DumpFlags) return;
243
244 llvm::errs() << "\t";
245 if (Tok.isAtStartOfLine())
246 llvm::errs() << " [StartOfLine]";
247 if (Tok.hasLeadingSpace())
248 llvm::errs() << " [LeadingSpace]";
249 if (Tok.isExpandDisabled())
250 llvm::errs() << " [ExpandDisabled]";
251 if (Tok.needsCleaning()) {
252 const char *Start = SourceMgr.getCharacterData(Tok.getLocation());
253 llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
254 << "']";
255 }
256
257 llvm::errs() << "\tLoc=<";
259 llvm::errs() << ">";
260}
261
263 Loc.print(llvm::errs(), SourceMgr);
264}
265
266void Preprocessor::DumpMacro(const MacroInfo &MI) const {
267 llvm::errs() << "MACRO: ";
268 for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) {
270 llvm::errs() << " ";
271 }
272 llvm::errs() << "\n";
273}
274
276 llvm::errs() << "\n*** Preprocessor Stats:\n";
277 llvm::errs() << NumDirectives << " directives found:\n";
278 llvm::errs() << " " << NumDefined << " #define.\n";
279 llvm::errs() << " " << NumUndefined << " #undef.\n";
280 llvm::errs() << " #include/#include_next/#import:\n";
281 llvm::errs() << " " << NumEnteredSourceFiles << " source files entered.\n";
282 llvm::errs() << " " << MaxIncludeStackDepth << " max include stack depth\n";
283 llvm::errs() << " " << NumIf << " #if/#ifndef/#ifdef.\n";
284 llvm::errs() << " " << NumElse << " #else/#elif/#elifdef/#elifndef.\n";
285 llvm::errs() << " " << NumEndif << " #endif.\n";
286 llvm::errs() << " " << NumPragma << " #pragma.\n";
287 llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
288
289 llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
290 << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
291 << NumFastMacroExpanded << " on the fast path.\n";
292 llvm::errs() << (NumFastTokenPaste+NumTokenPaste)
293 << " token paste (##) operations performed, "
294 << NumFastTokenPaste << " on the fast path.\n";
295
296 llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total";
297
298 llvm::errs() << "\n BumpPtr: " << BP.getTotalMemory();
299 llvm::errs() << "\n Macro Expanded Tokens: "
300 << llvm::capacity_in_bytes(MacroExpandedTokens);
301 llvm::errs() << "\n Predefines Buffer: " << Predefines.capacity();
302 // FIXME: List information for all submodules.
303 llvm::errs() << "\n Macros: "
304 << llvm::capacity_in_bytes(CurSubmoduleState->Macros);
305 llvm::errs() << "\n #pragma push_macro Info: "
306 << llvm::capacity_in_bytes(PragmaPushMacroInfo);
307 llvm::errs() << "\n Poison Reasons: "
308 << llvm::capacity_in_bytes(PoisonReasons);
309 llvm::errs() << "\n Comment Handlers: "
310 << llvm::capacity_in_bytes(CommentHandlers) << "\n";
311}
312
314Preprocessor::macro_begin(bool IncludeExternalMacros) const {
315 if (IncludeExternalMacros && ExternalSource &&
316 !ReadMacrosFromExternalSource) {
317 ReadMacrosFromExternalSource = true;
318 ExternalSource->ReadDefinedMacros();
319 }
320
321 // Make sure we cover all macros in visible modules.
322 for (const ModuleMacro &Macro : ModuleMacros)
323 CurSubmoduleState->Macros.try_emplace(Macro.II);
324
325 return CurSubmoduleState->Macros.begin();
326}
327
329 return BP.getTotalMemory()
330 + llvm::capacity_in_bytes(MacroExpandedTokens)
331 + Predefines.capacity() /* Predefines buffer. */
332 // FIXME: Include sizes from all submodules, and include MacroInfo sizes,
333 // and ModuleMacros.
334 + llvm::capacity_in_bytes(CurSubmoduleState->Macros)
335 + llvm::capacity_in_bytes(PragmaPushMacroInfo)
336 + llvm::capacity_in_bytes(PoisonReasons)
337 + llvm::capacity_in_bytes(CommentHandlers);
338}
339
341Preprocessor::macro_end(bool IncludeExternalMacros) const {
342 if (IncludeExternalMacros && ExternalSource &&
343 !ReadMacrosFromExternalSource) {
344 ReadMacrosFromExternalSource = true;
345 ExternalSource->ReadDefinedMacros();
346 }
347
348 return CurSubmoduleState->Macros.end();
349}
350
351/// Compares macro tokens with a specified token value sequence.
352static bool MacroDefinitionEquals(const MacroInfo *MI,
353 ArrayRef<TokenValue> Tokens) {
354 return Tokens.size() == MI->getNumTokens() &&
355 std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin());
356}
357
360 ArrayRef<TokenValue> Tokens) const {
361 SourceLocation BestLocation;
362 StringRef BestSpelling;
364 I != E; ++I) {
366 Def = I->second.findDirectiveAtLoc(Loc, SourceMgr);
367 if (!Def || !Def.getMacroInfo())
368 continue;
369 if (!Def.getMacroInfo()->isObjectLike())
370 continue;
371 if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens))
372 continue;
373 SourceLocation Location = Def.getLocation();
374 // Choose the macro defined latest.
375 if (BestLocation.isInvalid() ||
376 (Location.isValid() &&
377 SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) {
378 BestLocation = Location;
379 BestSpelling = I->first->getName();
380 }
381 }
382 return BestSpelling;
383}
384
386 if (CurLexer)
387 CurLexerCallback = CurLexer->isDependencyDirectivesLexer()
388 ? CLK_DependencyDirectivesLexer
389 : CLK_Lexer;
390 else if (CurTokenLexer)
391 CurLexerCallback = CLK_TokenLexer;
392 else
393 CurLexerCallback = CLK_CachingLexer;
394}
395
397 unsigned CompleteLine,
398 unsigned CompleteColumn) {
399 assert(CompleteLine && CompleteColumn && "Starts from 1:1");
400 assert(!CodeCompletionFile && "Already set");
401
402 // Load the actual file's contents.
403 std::optional<llvm::MemoryBufferRef> Buffer =
405 if (!Buffer)
406 return true;
407
408 // Find the byte position of the truncation point.
409 const char *Position = Buffer->getBufferStart();
410 for (unsigned Line = 1; Line < CompleteLine; ++Line) {
411 for (; *Position; ++Position) {
412 if (*Position != '\r' && *Position != '\n')
413 continue;
414
415 // Eat \r\n or \n\r as a single line.
416 if ((Position[1] == '\r' || Position[1] == '\n') &&
417 Position[0] != Position[1])
418 ++Position;
419 ++Position;
420 break;
421 }
422 }
423
424 Position += CompleteColumn - 1;
425
426 // If pointing inside the preamble, adjust the position at the beginning of
427 // the file after the preamble.
428 if (SkipMainFilePreamble.first &&
429 SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) {
430 if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first)
431 Position = Buffer->getBufferStart() + SkipMainFilePreamble.first;
432 }
433
434 if (Position > Buffer->getBufferEnd())
435 Position = Buffer->getBufferEnd();
436
437 CodeCompletionFile = File;
438 CodeCompletionOffset = Position - Buffer->getBufferStart();
439
440 auto NewBuffer = llvm::WritableMemoryBuffer::getNewUninitMemBuffer(
441 Buffer->getBufferSize() + 1, Buffer->getBufferIdentifier());
442 char *NewBuf = NewBuffer->getBufferStart();
443 char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf);
444 *NewPos = '\0';
445 std::copy(Position, Buffer->getBufferEnd(), NewPos+1);
446 SourceMgr.overrideFileContents(File, std::move(NewBuffer));
447
448 return false;
449}
450
452 bool IsAngled) {
454 if (CodeComplete)
455 CodeComplete->CodeCompleteIncludedFile(Dir, IsAngled);
456}
457
460 if (CodeComplete)
461 CodeComplete->CodeCompleteNaturalLanguage();
462}
463
464/// getSpelling - This method is used to get the spelling of a token into a
465/// SmallVector. Note that the returned StringRef may not point to the
466/// supplied buffer if a copy can be avoided.
467StringRef Preprocessor::getSpelling(const Token &Tok,
468 SmallVectorImpl<char> &Buffer,
469 bool *Invalid) const {
470 // NOTE: this has to be checked *before* testing for an IdentifierInfo.
471 if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) {
472 // Try the fast path.
473 if (const IdentifierInfo *II = Tok.getIdentifierInfo())
474 return II->getName();
475 }
476
477 // Resize the buffer if we need to copy into it.
478 if (Tok.needsCleaning())
479 Buffer.resize(Tok.getLength());
480
481 const char *Ptr = Buffer.data();
482 unsigned Len = getSpelling(Tok, Ptr, Invalid);
483 return StringRef(Ptr, Len);
484}
485
486/// CreateString - Plop the specified string into a scratch buffer and return a
487/// location for it. If specified, the source location provides a source
488/// location for the token.
489void Preprocessor::CreateString(StringRef Str, Token &Tok,
490 SourceLocation ExpansionLocStart,
491 SourceLocation ExpansionLocEnd) {
492 Tok.setLength(Str.size());
493
494 const char *DestPtr;
495 SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr);
496
497 if (ExpansionLocStart.isValid())
498 Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart,
499 ExpansionLocEnd, Str.size());
500 Tok.setLocation(Loc);
501
502 // If this is a raw identifier or a literal token, set the pointer data.
503 if (Tok.is(tok::raw_identifier))
504 Tok.setRawIdentifierData(DestPtr);
505 else if (Tok.isLiteral())
506 Tok.setLiteralData(DestPtr);
507}
508
510 auto &SM = getSourceManager();
511 SourceLocation SpellingLoc = SM.getSpellingLoc(Loc);
512 FileIDAndOffset LocInfo = SM.getDecomposedLoc(SpellingLoc);
513 bool Invalid = false;
514 StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid);
515 if (Invalid)
516 return SourceLocation();
517
518 // FIXME: We could consider re-using spelling for tokens we see repeatedly.
519 const char *DestPtr;
520 SourceLocation Spelling =
521 ScratchBuf->getToken(Buffer.data() + LocInfo.second, Length, DestPtr);
522 return SM.createTokenSplitLoc(Spelling, Loc, Loc.getLocWithOffset(Length));
523}
524
526 if (!getLangOpts().isCompilingModule())
527 return nullptr;
528
529 return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule);
530}
531
533 if (!getLangOpts().isCompilingModuleImplementation())
534 return nullptr;
535
536 return getHeaderSearchInfo().lookupModule(getLangOpts().ModuleName);
537}
538
539//===----------------------------------------------------------------------===//
540// Preprocessor Initialization Methods
541//===----------------------------------------------------------------------===//
542
543/// EnterMainSourceFile - Enter the specified FileID as the main source file,
544/// which implicitly adds the builtin defines etc.
546 // We do not allow the preprocessor to reenter the main file. Doing so will
547 // cause FileID's to accumulate information from both runs (e.g. #line
548 // information) and predefined macros aren't guaranteed to be set properly.
549 assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!");
550 FileID MainFileID = SourceMgr.getMainFileID();
551
552 // If MainFileID is loaded it means we loaded an AST file, no need to enter
553 // a main file.
554 if (!SourceMgr.isLoadedFileID(MainFileID)) {
555 // Enter the main file source buffer.
556 EnterSourceFile(MainFileID, nullptr, SourceLocation());
557
558 // If we've been asked to skip bytes in the main file (e.g., as part of a
559 // precompiled preamble), do so now.
560 if (SkipMainFilePreamble.first > 0)
561 CurLexer->SetByteOffset(SkipMainFilePreamble.first,
562 SkipMainFilePreamble.second);
563
564 // Tell the header info that the main file was entered. If the file is later
565 // #imported, it won't be re-entered.
566 if (OptionalFileEntryRef FE = SourceMgr.getFileEntryRefForID(MainFileID))
567 markIncluded(*FE);
568
569 // Record the first PP token in the main file. This is used to generate
570 // better diagnostics for C++ modules.
571 //
572 // // This is a comment.
573 // #define FOO int // note: add 'module;' to the start of the file
574 // ^ FirstPPToken // to introduce a global module fragment.
575 //
576 // export module M; // error: module declaration must occur
577 // // at the start of the translation unit.
578 if (getLangOpts().CPlusPlusModules) {
579 auto Tracer = std::make_unique<NoTrivialPPDirectiveTracer>(*this);
580 DirTracer = Tracer.get();
581 addPPCallbacks(std::move(Tracer));
582 std::optional<Token> FirstPPTok = CurLexer->peekNextPPToken();
583 if (FirstPPTok)
584 FirstPPTokenLoc = FirstPPTok->getLocation();
585 }
586 }
587
588 // Preprocess Predefines to populate the initial preprocessor state.
589 std::unique_ptr<llvm::MemoryBuffer> SB =
590 llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>");
591 assert(SB && "Cannot create predefined source buffer");
592 FileID FID = SourceMgr.createFileID(std::move(SB));
593 assert(FID.isValid() && "Could not create FileID for predefines?");
594 setPredefinesFileID(FID);
595
596 // Start parsing the predefines.
597 EnterSourceFile(FID, nullptr, SourceLocation());
598
599 if (!PPOpts.PCHThroughHeader.empty()) {
600 // Lookup and save the FileID for the through header. If it isn't found
601 // in the search path, it's a fatal error.
604 /*isAngled=*/false, /*FromDir=*/nullptr, /*FromFile=*/nullptr,
605 /*CurDir=*/nullptr, /*SearchPath=*/nullptr, /*RelativePath=*/nullptr,
606 /*SuggestedModule=*/nullptr, /*IsMapped=*/nullptr,
607 /*IsFrameworkFound=*/nullptr);
608 if (!File) {
609 Diag(SourceLocation(), diag::err_pp_through_header_not_found)
610 << PPOpts.PCHThroughHeader;
611 return;
612 }
613 setPCHThroughHeaderFileID(
615 }
616
617 // Skip tokens from the Predefines and if needed the main file.
618 if ((usingPCHWithThroughHeader() && SkippingUntilPCHThroughHeader) ||
619 (usingPCHWithPragmaHdrStop() && SkippingUntilPragmaHdrStop))
621}
622
623void Preprocessor::setPCHThroughHeaderFileID(FileID FID) {
624 assert(PCHThroughHeaderFileID.isInvalid() &&
625 "PCHThroughHeaderFileID already set!");
626 PCHThroughHeaderFileID = FID;
627}
628
630 assert(PCHThroughHeaderFileID.isValid() &&
631 "Invalid PCH through header FileID");
632 return FE == SourceMgr.getFileEntryForID(PCHThroughHeaderFileID);
633}
634
636 return TUKind == TU_Prefix && !PPOpts.PCHThroughHeader.empty() &&
637 PCHThroughHeaderFileID.isValid();
638}
639
641 return TUKind != TU_Prefix && !PPOpts.PCHThroughHeader.empty() &&
642 PCHThroughHeaderFileID.isValid();
643}
644
646 return TUKind == TU_Prefix && PPOpts.PCHWithHdrStop;
647}
648
650 return TUKind != TU_Prefix && PPOpts.PCHWithHdrStop;
651}
652
653/// Skip tokens until after the #include of the through header or
654/// until after a #pragma hdrstop is seen. Tokens in the predefines file
655/// and the main file may be skipped. If the end of the predefines file
656/// is reached, skipping continues into the main file. If the end of the
657/// main file is reached, it's a fatal error.
659 bool ReachedMainFileEOF = false;
660 bool UsingPCHThroughHeader = SkippingUntilPCHThroughHeader;
661 bool UsingPragmaHdrStop = SkippingUntilPragmaHdrStop;
662 Token Tok;
663 while (true) {
664 bool InPredefines =
665 (CurLexer && CurLexer->getFileID() == getPredefinesFileID());
666 CurLexerCallback(*this, Tok);
667 if (Tok.is(tok::eof) && !InPredefines) {
668 ReachedMainFileEOF = true;
669 break;
670 }
671 if (UsingPCHThroughHeader && !SkippingUntilPCHThroughHeader)
672 break;
673 if (UsingPragmaHdrStop && !SkippingUntilPragmaHdrStop)
674 break;
675 }
676 if (ReachedMainFileEOF) {
677 if (UsingPCHThroughHeader)
678 Diag(SourceLocation(), diag::err_pp_through_header_not_seen)
679 << PPOpts.PCHThroughHeader << 1;
680 else if (!PPOpts.PCHWithHdrStopCreate)
681 Diag(SourceLocation(), diag::err_pp_pragma_hdrstop_not_seen);
682 }
683}
684
685void Preprocessor::replayPreambleConditionalStack() {
686 // Restore the conditional stack from the preamble, if there is one.
687 if (PreambleConditionalStack.isReplaying()) {
688 assert(CurPPLexer &&
689 "CurPPLexer is null when calling replayPreambleConditionalStack.");
690 CurPPLexer->setConditionalLevels(PreambleConditionalStack.getStack());
691 PreambleConditionalStack.doneReplaying();
692 if (PreambleConditionalStack.reachedEOFWhileSkipping())
693 SkipExcludedConditionalBlock(
694 PreambleConditionalStack.SkipInfo->HashTokenLoc,
695 PreambleConditionalStack.SkipInfo->IfTokenLoc,
696 PreambleConditionalStack.SkipInfo->FoundNonSkipPortion,
697 PreambleConditionalStack.SkipInfo->FoundElse,
698 PreambleConditionalStack.SkipInfo->ElseLoc);
699 }
700}
701
703 // Notify the client that we reached the end of the source file.
704 if (Callbacks)
705 Callbacks->EndOfMainFile();
706}
707
708//===----------------------------------------------------------------------===//
709// Lexer Event Handling.
710//===----------------------------------------------------------------------===//
711
712/// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
713/// identifier information for the token and install it into the token,
714/// updating the token kind accordingly.
716 assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!");
717
718 // Look up this token, see if it is a macro, or if it is a language keyword.
719 IdentifierInfo *II;
720 if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
721 // No cleaning needed, just use the characters from the lexed buffer.
722 II = getIdentifierInfo(Identifier.getRawIdentifier());
723 } else {
724 // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
725 SmallString<64> IdentifierBuffer;
726 StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);
727
728 if (Identifier.hasUCN()) {
729 SmallString<64> UCNIdentifierBuffer;
730 expandUCNs(UCNIdentifierBuffer, CleanedStr);
731 II = getIdentifierInfo(UCNIdentifierBuffer);
732 } else {
733 II = getIdentifierInfo(CleanedStr);
734 }
735 }
736
737 // Update the token info (identifier info and appropriate token kind).
738 // FIXME: the raw_identifier may contain leading whitespace which is removed
739 // from the cleaned identifier token. The SourceLocation should be updated to
740 // refer to the non-whitespace character. For instance, the text "\\\nB" (a
741 // line continuation before 'B') is parsed as a single tok::raw_identifier and
742 // is cleaned to tok::identifier "B". After cleaning the token's length is
743 // still 3 and the SourceLocation refers to the location of the backslash.
744 Identifier.setIdentifierInfo(II);
745 Identifier.setKind(II->getTokenID());
746
747 return II;
748}
749
751 PoisonReasons[II] = DiagID;
752}
753
755 assert(Ident__exception_code && Ident__exception_info);
756 assert(Ident___exception_code && Ident___exception_info);
757 Ident__exception_code->setIsPoisoned(Poison);
758 Ident___exception_code->setIsPoisoned(Poison);
759 Ident_GetExceptionCode->setIsPoisoned(Poison);
760 Ident__exception_info->setIsPoisoned(Poison);
761 Ident___exception_info->setIsPoisoned(Poison);
762 Ident_GetExceptionInfo->setIsPoisoned(Poison);
763 Ident__abnormal_termination->setIsPoisoned(Poison);
764 Ident___abnormal_termination->setIsPoisoned(Poison);
765 Ident_AbnormalTermination->setIsPoisoned(Poison);
766}
767
769 assert(Identifier.getIdentifierInfo() &&
770 "Can't handle identifiers without identifier info!");
771 llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it =
772 PoisonReasons.find(Identifier.getIdentifierInfo());
773 if(it == PoisonReasons.end())
774 Diag(Identifier, diag::err_pp_used_poisoned_id);
775 else
776 Diag(Identifier,it->second) << Identifier.getIdentifierInfo();
777}
778
779void Preprocessor::updateOutOfDateIdentifier(const IdentifierInfo &II) const {
780 assert(II.isOutOfDate() && "not out of date");
781 assert(getExternalSource() &&
782 "getExternalSource() should not return nullptr");
784}
785
786/// HandleIdentifier - This callback is invoked when the lexer reads an
787/// identifier. This callback looks up the identifier in the map and/or
788/// potentially macro expands it or turns it into a named token (like 'for').
789///
790/// Note that callers of this method are guarded by checking the
791/// IdentifierInfo's 'isHandleIdentifierCase' bit. If this method changes, the
792/// IdentifierInfo methods that compute these properties will need to change to
793/// match.
795 assert(Identifier.getIdentifierInfo() &&
796 "Can't handle identifiers without identifier info!");
797
798 IdentifierInfo &II = *Identifier.getIdentifierInfo();
799
800 // If the information about this identifier is out of date, update it from
801 // the external source.
802 // We have to treat __VA_ARGS__ in a special way, since it gets
803 // serialized with isPoisoned = true, but our preprocessor may have
804 // unpoisoned it if we're defining a C99 macro.
805 if (II.isOutOfDate()) {
806 bool CurrentIsPoisoned = false;
807 const bool IsSpecialVariadicMacro =
808 &II == Ident__VA_ARGS__ || &II == Ident__VA_OPT__;
809 if (IsSpecialVariadicMacro)
810 CurrentIsPoisoned = II.isPoisoned();
811
812 updateOutOfDateIdentifier(II);
813 Identifier.setKind(II.getTokenID());
814
815 if (IsSpecialVariadicMacro)
816 II.setIsPoisoned(CurrentIsPoisoned);
817 }
818
819 // If this identifier was poisoned, and if it was not produced from a macro
820 // expansion, emit an error.
821 if (II.isPoisoned() && CurPPLexer) {
823 }
824
825 // If this is a macro to be expanded, do it.
826 if (const MacroDefinition MD = getMacroDefinition(&II)) {
827 const auto *MI = MD.getMacroInfo();
828 assert(MI && "macro definition with no macro info?");
829 if (!DisableMacroExpansion) {
830 if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
831 // C99 6.10.3p10: If the preprocessing token immediately after the
832 // macro name isn't a '(', this macro should not be expanded.
833 if (!MI->isFunctionLike() || isNextPPTokenOneOf(tok::l_paren))
834 return HandleMacroExpandedIdentifier(Identifier, MD);
835 } else {
836 // C99 6.10.3.4p2 says that a disabled macro may never again be
837 // expanded, even if it's in a context where it could be expanded in the
838 // future.
840 if (MI->isObjectLike() || isNextPPTokenOneOf(tok::l_paren))
841 Diag(Identifier, diag::pp_disabled_macro_expansion);
842 }
843 }
844 }
845
846 // If this identifier is a keyword in a newer Standard or proposed Standard,
847 // produce a warning. Don't warn if we're not considering macro expansion,
848 // since this identifier might be the name of a macro.
849 // FIXME: This warning is disabled in cases where it shouldn't be, like
850 // "#define constexpr constexpr", "int constexpr;"
851 if (II.isFutureCompatKeyword() && !DisableMacroExpansion) {
852 Diag(Identifier, getIdentifierTable().getFutureCompatDiagKind(II, getLangOpts()))
853 << II.getName();
854 // Don't diagnose this keyword again in this translation unit.
855 II.setIsFutureCompatKeyword(false);
856 }
857
858 // If this identifier would be a keyword in C++, diagnose as a compatibility
859 // issue.
860 if (II.IsKeywordInCPlusPlus() && !DisableMacroExpansion)
861 Diag(Identifier, diag::warn_pp_identifier_is_cpp_keyword) << &II;
862
863 // If this is an extension token, diagnose its use.
864 // We avoid diagnosing tokens that originate from macro definitions.
865 // FIXME: This warning is disabled in cases where it shouldn't be,
866 // like "#define TY typeof", "TY(1) x".
867 if (II.isExtensionToken() && !DisableMacroExpansion)
868 Diag(Identifier, diag::ext_token_used);
869
870 // If this is the 'import' contextual keyword following an '@', note
871 // that the next token indicates a module name.
872 //
873 // Note that we do not treat 'import' as a contextual
874 // keyword when we're in a caching lexer, because caching lexers only get
875 // used in contexts where import declarations are disallowed.
876 //
877 // Likewise if this is the standard C++ import keyword.
878 if (((LastTokenWasAt && II.isModulesImport()) ||
879 Identifier.is(tok::kw_import)) &&
880 !InMacroArgs && !DisableMacroExpansion &&
881 (getLangOpts().Modules || getLangOpts().DebuggerSupport) &&
882 CurLexerCallback != CLK_CachingLexer) {
883 ModuleImportLoc = Identifier.getLocation();
884 NamedModuleImportPath.clear();
885 IsAtImport = true;
886 ModuleImportExpectsIdentifier = true;
887 CurLexerCallback = CLK_LexAfterModuleImport;
888 }
889 return true;
890}
891
893 ++LexLevel;
894
895 // We loop here until a lex function returns a token; this avoids recursion.
896 while (!CurLexerCallback(*this, Result))
897 ;
898
899 if (Result.is(tok::unknown) && TheModuleLoader.HadFatalFailure)
900 return;
901
902 if (Result.is(tok::code_completion) && Result.getIdentifierInfo()) {
903 // Remember the identifier before code completion token.
904 setCodeCompletionIdentifierInfo(Result.getIdentifierInfo());
905 setCodeCompletionTokenRange(Result.getLocation(), Result.getEndLoc());
906 // Set IdenfitierInfo to null to avoid confusing code that handles both
907 // identifiers and completion tokens.
908 Result.setIdentifierInfo(nullptr);
909 }
910
911 // Update StdCXXImportSeqState to track our position within a C++20 import-seq
912 // if this token is being produced as a result of phase 4 of translation.
913 // Update TrackGMFState to decide if we are currently in a Global Module
914 // Fragment. GMF state updates should precede StdCXXImportSeq ones, since GMF state
915 // depends on the prevailing StdCXXImportSeq state in two cases.
916 if (getLangOpts().CPlusPlusModules && LexLevel == 1 &&
917 !Result.getFlag(Token::IsReinjected)) {
918 switch (Result.getKind()) {
919 case tok::l_paren: case tok::l_square: case tok::l_brace:
920 StdCXXImportSeqState.handleOpenBracket();
921 break;
922 case tok::r_paren: case tok::r_square:
923 StdCXXImportSeqState.handleCloseBracket();
924 break;
925 case tok::r_brace:
926 StdCXXImportSeqState.handleCloseBrace();
927 break;
928#define PRAGMA_ANNOTATION(X) case tok::annot_##X:
929// For `#pragma ...` mimic ';'.
930#include "clang/Basic/TokenKinds.def"
931#undef PRAGMA_ANNOTATION
932 // This token is injected to represent the translation of '#include "a.h"'
933 // into "import a.h;". Mimic the notional ';'.
934 case tok::annot_module_include:
935 case tok::semi:
936 TrackGMFState.handleSemi();
937 StdCXXImportSeqState.handleSemi();
938 ModuleDeclState.handleSemi();
939 break;
940 case tok::header_name:
941 case tok::annot_header_unit:
942 StdCXXImportSeqState.handleHeaderName();
943 break;
944 case tok::kw_export:
947 TrackGMFState.handleExport();
948 StdCXXImportSeqState.handleExport();
949 ModuleDeclState.handleExport();
950 break;
951 case tok::colon:
952 ModuleDeclState.handleColon();
953 break;
954 case tok::period:
955 ModuleDeclState.handlePeriod();
956 break;
957 case tok::eod:
958 break;
959 case tok::identifier:
960 // Check "import" and "module" when there is no open bracket. The two
961 // identifiers are not meaningful with open brackets.
962 if (StdCXXImportSeqState.atTopLevel()) {
963 if (Result.getIdentifierInfo()->isModulesImport()) {
964 TrackGMFState.handleImport(StdCXXImportSeqState.afterTopLevelSeq());
965 StdCXXImportSeqState.handleImport();
966 if (StdCXXImportSeqState.afterImportSeq()) {
967 ModuleImportLoc = Result.getLocation();
968 NamedModuleImportPath.clear();
969 IsAtImport = false;
970 ModuleImportExpectsIdentifier = true;
971 CurLexerCallback = CLK_LexAfterModuleImport;
972 }
973 break;
974 } else if (Result.getIdentifierInfo() == getIdentifierInfo("module")) {
977 TrackGMFState.handleModule(StdCXXImportSeqState.afterTopLevelSeq());
978 ModuleDeclState.handleModule();
979 break;
980 }
981 }
982 ModuleDeclState.handleIdentifier(Result.getIdentifierInfo());
983 if (ModuleDeclState.isModuleCandidate())
984 break;
985 [[fallthrough]];
986 default:
987 TrackGMFState.handleMisc();
988 StdCXXImportSeqState.handleMisc();
989 ModuleDeclState.handleMisc();
990 break;
991 }
992 }
993
994 if (CurLexer && ++CheckPointCounter == CheckPointStepSize) {
995 CheckPoints[CurLexer->getFileID()].push_back(CurLexer->BufferPtr);
996 CheckPointCounter = 0;
997 }
998
999 LastTokenWasAt = Result.is(tok::at);
1000 --LexLevel;
1001
1002 if ((LexLevel == 0 || PreprocessToken) &&
1003 !Result.getFlag(Token::IsReinjected)) {
1004 if (LexLevel == 0)
1005 ++TokenCount;
1006 if (OnToken)
1007 OnToken(Result);
1008 }
1009}
1010
1011void Preprocessor::LexTokensUntilEOF(std::vector<Token> *Tokens) {
1012 while (1) {
1013 Token Tok;
1014 Lex(Tok);
1015 if (Tok.isOneOf(tok::unknown, tok::eof, tok::eod,
1016 tok::annot_repl_input_end))
1017 break;
1018 if (Tokens != nullptr)
1019 Tokens->push_back(Tok);
1020 }
1021}
1022
1023/// Lex a header-name token (including one formed from header-name-tokens if
1024/// \p AllowMacroExpansion is \c true).
1025///
1026/// \param FilenameTok Filled in with the next token. On success, this will
1027/// be either a header_name token. On failure, it will be whatever other
1028/// token was found instead.
1029/// \param AllowMacroExpansion If \c true, allow the header name to be formed
1030/// by macro expansion (concatenating tokens as necessary if the first
1031/// token is a '<').
1032/// \return \c true if we reached EOD or EOF while looking for a > token in
1033/// a concatenated header name and diagnosed it. \c false otherwise.
1034bool Preprocessor::LexHeaderName(Token &FilenameTok, bool AllowMacroExpansion) {
1035 // Lex using header-name tokenization rules if tokens are being lexed from
1036 // a file. Just grab a token normally if we're in a macro expansion.
1037 if (CurPPLexer)
1038 CurPPLexer->LexIncludeFilename(FilenameTok);
1039 else
1040 Lex(FilenameTok);
1041
1042 // This could be a <foo/bar.h> file coming from a macro expansion. In this
1043 // case, glue the tokens together into an angle_string_literal token.
1044 SmallString<128> FilenameBuffer;
1045 if (FilenameTok.is(tok::less) && AllowMacroExpansion) {
1046 bool StartOfLine = FilenameTok.isAtStartOfLine();
1047 bool LeadingSpace = FilenameTok.hasLeadingSpace();
1048 bool LeadingEmptyMacro = FilenameTok.hasLeadingEmptyMacro();
1049
1050 SourceLocation Start = FilenameTok.getLocation();
1051 SourceLocation End;
1052 FilenameBuffer.push_back('<');
1053
1054 // Consume tokens until we find a '>'.
1055 // FIXME: A header-name could be formed starting or ending with an
1056 // alternative token. It's not clear whether that's ill-formed in all
1057 // cases.
1058 while (FilenameTok.isNot(tok::greater)) {
1059 Lex(FilenameTok);
1060 if (FilenameTok.isOneOf(tok::eod, tok::eof)) {
1061 Diag(FilenameTok.getLocation(), diag::err_expected) << tok::greater;
1062 Diag(Start, diag::note_matching) << tok::less;
1063 return true;
1064 }
1065
1066 End = FilenameTok.getLocation();
1067
1068 // FIXME: Provide code completion for #includes.
1069 if (FilenameTok.is(tok::code_completion)) {
1071 Lex(FilenameTok);
1072 continue;
1073 }
1074
1075 // Append the spelling of this token to the buffer. If there was a space
1076 // before it, add it now.
1077 if (FilenameTok.hasLeadingSpace())
1078 FilenameBuffer.push_back(' ');
1079
1080 // Get the spelling of the token, directly into FilenameBuffer if
1081 // possible.
1082 size_t PreAppendSize = FilenameBuffer.size();
1083 FilenameBuffer.resize(PreAppendSize + FilenameTok.getLength());
1084
1085 const char *BufPtr = &FilenameBuffer[PreAppendSize];
1086 unsigned ActualLen = getSpelling(FilenameTok, BufPtr);
1087
1088 // If the token was spelled somewhere else, copy it into FilenameBuffer.
1089 if (BufPtr != &FilenameBuffer[PreAppendSize])
1090 memcpy(&FilenameBuffer[PreAppendSize], BufPtr, ActualLen);
1091
1092 // Resize FilenameBuffer to the correct size.
1093 if (FilenameTok.getLength() != ActualLen)
1094 FilenameBuffer.resize(PreAppendSize + ActualLen);
1095 }
1096
1097 FilenameTok.startToken();
1098 FilenameTok.setKind(tok::header_name);
1099 FilenameTok.setFlagValue(Token::StartOfLine, StartOfLine);
1100 FilenameTok.setFlagValue(Token::LeadingSpace, LeadingSpace);
1101 FilenameTok.setFlagValue(Token::LeadingEmptyMacro, LeadingEmptyMacro);
1102 CreateString(FilenameBuffer, FilenameTok, Start, End);
1103 } else if (FilenameTok.is(tok::string_literal) && AllowMacroExpansion) {
1104 // Convert a string-literal token of the form " h-char-sequence "
1105 // (produced by macro expansion) into a header-name token.
1106 //
1107 // The rules for header-names don't quite match the rules for
1108 // string-literals, but all the places where they differ result in
1109 // undefined behavior, so we can and do treat them the same.
1110 //
1111 // A string-literal with a prefix or suffix is not translated into a
1112 // header-name. This could theoretically be observable via the C++20
1113 // context-sensitive header-name formation rules.
1114 StringRef Str = getSpelling(FilenameTok, FilenameBuffer);
1115 if (Str.size() >= 2 && Str.front() == '"' && Str.back() == '"')
1116 FilenameTok.setKind(tok::header_name);
1117 }
1118
1119 return false;
1120}
1121
1122/// Collect the tokens of a C++20 pp-import-suffix.
1124 // FIXME: For error recovery, consider recognizing attribute syntax here
1125 // and terminating / diagnosing a missing semicolon if we find anything
1126 // else? (Can we leave that to the parser?)
1127 unsigned BracketDepth = 0;
1128 while (true) {
1129 Toks.emplace_back();
1130 Lex(Toks.back());
1131
1132 switch (Toks.back().getKind()) {
1133 case tok::l_paren: case tok::l_square: case tok::l_brace:
1134 ++BracketDepth;
1135 break;
1136
1137 case tok::r_paren: case tok::r_square: case tok::r_brace:
1138 if (BracketDepth == 0)
1139 return;
1140 --BracketDepth;
1141 break;
1142
1143 case tok::semi:
1144 if (BracketDepth == 0)
1145 return;
1146 break;
1147
1148 case tok::eof:
1149 return;
1150
1151 default:
1152 break;
1153 }
1154 }
1155}
1156
1157
1158/// Lex a token following the 'import' contextual keyword.
1159///
1160/// pp-import: [C++20]
1161/// import header-name pp-import-suffix[opt] ;
1162/// import header-name-tokens pp-import-suffix[opt] ;
1163/// [ObjC] @ import module-name ;
1164/// [Clang] import module-name ;
1165///
1166/// header-name-tokens:
1167/// string-literal
1168/// < [any sequence of preprocessing-tokens other than >] >
1169///
1170/// module-name:
1171/// module-name-qualifier[opt] identifier
1172///
1173/// module-name-qualifier
1174/// module-name-qualifier[opt] identifier .
1175///
1176/// We respond to a pp-import by importing macros from the named module.
1178 // Figure out what kind of lexer we actually have.
1180
1181 // Lex the next token. The header-name lexing rules are used at the start of
1182 // a pp-import.
1183 //
1184 // For now, we only support header-name imports in C++20 mode.
1185 // FIXME: Should we allow this in all language modes that support an import
1186 // declaration as an extension?
1187 if (NamedModuleImportPath.empty() && getLangOpts().CPlusPlusModules) {
1188 if (LexHeaderName(Result))
1189 return true;
1190
1191 if (Result.is(tok::colon) && ModuleDeclState.isNamedModule()) {
1192 std::string Name = ModuleDeclState.getPrimaryName().str();
1193 Name += ":";
1194 NamedModuleImportPath.emplace_back(Result.getLocation(),
1195 getIdentifierInfo(Name));
1196 CurLexerCallback = CLK_LexAfterModuleImport;
1197 return true;
1198 }
1199 } else {
1200 Lex(Result);
1201 }
1202
1203 // Allocate a holding buffer for a sequence of tokens and introduce it into
1204 // the token stream.
1205 auto EnterTokens = [this](ArrayRef<Token> Toks) {
1206 auto ToksCopy = std::make_unique<Token[]>(Toks.size());
1207 std::copy(Toks.begin(), Toks.end(), ToksCopy.get());
1208 EnterTokenStream(std::move(ToksCopy), Toks.size(),
1209 /*DisableMacroExpansion*/ true, /*IsReinject*/ false);
1210 };
1211
1212 bool ImportingHeader = Result.is(tok::header_name);
1213 // Check for a header-name.
1215 if (ImportingHeader) {
1216 // Enter the header-name token into the token stream; a Lex action cannot
1217 // both return a token and cache tokens (doing so would corrupt the token
1218 // cache if the call to Lex comes from CachingLex / PeekAhead).
1219 Suffix.push_back(Result);
1220
1221 // Consume the pp-import-suffix and expand any macros in it now. We'll add
1222 // it back into the token stream later.
1223 CollectPpImportSuffix(Suffix);
1224 if (Suffix.back().isNot(tok::semi)) {
1225 // This is not a pp-import after all.
1226 EnterTokens(Suffix);
1227 return false;
1228 }
1229
1230 // C++2a [cpp.module]p1:
1231 // The ';' preprocessing-token terminating a pp-import shall not have
1232 // been produced by macro replacement.
1233 SourceLocation SemiLoc = Suffix.back().getLocation();
1234 if (SemiLoc.isMacroID())
1235 Diag(SemiLoc, diag::err_header_import_semi_in_macro);
1236
1237 // Reconstitute the import token.
1238 Token ImportTok;
1239 ImportTok.startToken();
1240 ImportTok.setKind(tok::kw_import);
1241 ImportTok.setLocation(ModuleImportLoc);
1242 ImportTok.setIdentifierInfo(getIdentifierInfo("import"));
1243 ImportTok.setLength(6);
1244
1245 auto Action = HandleHeaderIncludeOrImport(
1246 /*HashLoc*/ SourceLocation(), ImportTok, Suffix.front(), SemiLoc);
1247 switch (Action.Kind) {
1248 case ImportAction::None:
1249 break;
1250
1251 case ImportAction::ModuleBegin:
1252 // Let the parser know we're textually entering the module.
1253 Suffix.emplace_back();
1254 Suffix.back().startToken();
1255 Suffix.back().setKind(tok::annot_module_begin);
1256 Suffix.back().setLocation(SemiLoc);
1257 Suffix.back().setAnnotationEndLoc(SemiLoc);
1258 Suffix.back().setAnnotationValue(Action.ModuleForHeader);
1259 [[fallthrough]];
1260
1261 case ImportAction::ModuleImport:
1262 case ImportAction::HeaderUnitImport:
1263 case ImportAction::SkippedModuleImport:
1264 // We chose to import (or textually enter) the file. Convert the
1265 // header-name token into a header unit annotation token.
1266 Suffix[0].setKind(tok::annot_header_unit);
1267 Suffix[0].setAnnotationEndLoc(Suffix[0].getLocation());
1268 Suffix[0].setAnnotationValue(Action.ModuleForHeader);
1269 // FIXME: Call the moduleImport callback?
1270 break;
1271 case ImportAction::Failure:
1272 assert(TheModuleLoader.HadFatalFailure &&
1273 "This should be an early exit only to a fatal error");
1274 Result.setKind(tok::eof);
1275 CurLexer->cutOffLexing();
1276 EnterTokens(Suffix);
1277 return true;
1278 }
1279
1280 EnterTokens(Suffix);
1281 return false;
1282 }
1283
1284 // The token sequence
1285 //
1286 // import identifier (. identifier)*
1287 //
1288 // indicates a module import directive. We already saw the 'import'
1289 // contextual keyword, so now we're looking for the identifiers.
1290 if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) {
1291 // We expected to see an identifier here, and we did; continue handling
1292 // identifiers.
1293 NamedModuleImportPath.emplace_back(Result.getLocation(),
1294 Result.getIdentifierInfo());
1295 ModuleImportExpectsIdentifier = false;
1296 CurLexerCallback = CLK_LexAfterModuleImport;
1297 return true;
1298 }
1299
1300 // If we're expecting a '.' or a ';', and we got a '.', then wait until we
1301 // see the next identifier. (We can also see a '[[' that begins an
1302 // attribute-specifier-seq here under the Standard C++ Modules.)
1303 if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) {
1304 ModuleImportExpectsIdentifier = true;
1305 CurLexerCallback = CLK_LexAfterModuleImport;
1306 return true;
1307 }
1308
1309 // If we didn't recognize a module name at all, this is not a (valid) import.
1310 if (NamedModuleImportPath.empty() || Result.is(tok::eof))
1311 return true;
1312
1313 // Consume the pp-import-suffix and expand any macros in it now, if we're not
1314 // at the semicolon already.
1315 SourceLocation SemiLoc = Result.getLocation();
1316 if (Result.isNot(tok::semi)) {
1317 Suffix.push_back(Result);
1318 CollectPpImportSuffix(Suffix);
1319 if (Suffix.back().isNot(tok::semi)) {
1320 // This is not an import after all.
1321 EnterTokens(Suffix);
1322 return false;
1323 }
1324 SemiLoc = Suffix.back().getLocation();
1325 }
1326
1327 // Under the standard C++ Modules, the dot is just part of the module name,
1328 // and not a real hierarchy separator. Flatten such module names now.
1329 //
1330 // FIXME: Is this the right level to be performing this transformation?
1331 std::string FlatModuleName;
1332 if (getLangOpts().CPlusPlusModules) {
1333 for (auto &Piece : NamedModuleImportPath) {
1334 // If the FlatModuleName ends with colon, it implies it is a partition.
1335 if (!FlatModuleName.empty() && FlatModuleName.back() != ':')
1336 FlatModuleName += ".";
1337 FlatModuleName += Piece.getIdentifierInfo()->getName();
1338 }
1339 SourceLocation FirstPathLoc = NamedModuleImportPath[0].getLoc();
1340 NamedModuleImportPath.clear();
1341 NamedModuleImportPath.emplace_back(FirstPathLoc,
1342 getIdentifierInfo(FlatModuleName));
1343 }
1344
1345 Module *Imported = nullptr;
1346 // We don't/shouldn't load the standard c++20 modules when preprocessing.
1347 if (getLangOpts().Modules && !isInImportingCXXNamedModules()) {
1348 Imported = TheModuleLoader.loadModule(ModuleImportLoc,
1349 NamedModuleImportPath,
1351 /*IsInclusionDirective=*/false);
1352 if (Imported)
1353 makeModuleVisible(Imported, SemiLoc);
1354 }
1355
1356 if (Callbacks)
1357 Callbacks->moduleImport(ModuleImportLoc, NamedModuleImportPath, Imported);
1358
1359 if (!Suffix.empty()) {
1360 EnterTokens(Suffix);
1361 return false;
1362 }
1363 return true;
1364}
1365
1367 bool IncludeExports) {
1368 CurSubmoduleState->VisibleModules.setVisible(
1369 M, Loc, IncludeExports, [](Module *) {},
1370 [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) {
1371 // FIXME: Include the path in the diagnostic.
1372 // FIXME: Include the import location for the conflicting module.
1373 Diag(ModuleImportLoc, diag::warn_module_conflict)
1374 << Path[0]->getFullModuleName()
1375 << Conflict->getFullModuleName()
1376 << Message;
1377 });
1378
1379 // Add this module to the imports list of the currently-built submodule.
1380 if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M)
1381 BuildingSubmoduleStack.back().M->Imports.insert(M);
1382}
1383
1385 const char *DiagnosticTag,
1386 bool AllowMacroExpansion) {
1387 // We need at least one string literal.
1388 if (Result.isNot(tok::string_literal)) {
1389 Diag(Result, diag::err_expected_string_literal)
1390 << /*Source='in...'*/0 << DiagnosticTag;
1391 return false;
1392 }
1393
1394 // Lex string literal tokens, optionally with macro expansion.
1395 SmallVector<Token, 4> StrToks;
1396 do {
1397 StrToks.push_back(Result);
1398
1399 if (Result.hasUDSuffix())
1400 Diag(Result, diag::err_invalid_string_udl);
1401
1402 if (AllowMacroExpansion)
1403 Lex(Result);
1404 else
1406 } while (Result.is(tok::string_literal));
1407
1408 // Concatenate and parse the strings.
1409 StringLiteralParser Literal(StrToks, *this);
1410 assert(Literal.isOrdinary() && "Didn't allow wide strings in");
1411
1412 if (Literal.hadError)
1413 return false;
1414
1415 if (Literal.Pascal) {
1416 Diag(StrToks[0].getLocation(), diag::err_expected_string_literal)
1417 << /*Source='in...'*/0 << DiagnosticTag;
1418 return false;
1419 }
1420
1421 String = std::string(Literal.GetString());
1422 return true;
1423}
1424
1426 assert(Tok.is(tok::numeric_constant));
1427 SmallString<8> IntegerBuffer;
1428 bool NumberInvalid = false;
1429 StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid);
1430 if (NumberInvalid)
1431 return false;
1432 NumericLiteralParser Literal(Spelling, Tok.getLocation(), getSourceManager(),
1434 getDiagnostics());
1435 if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix())
1436 return false;
1437 llvm::APInt APVal(64, 0);
1438 if (Literal.GetIntegerValue(APVal))
1439 return false;
1440 Lex(Tok);
1441 Value = APVal.getLimitedValue();
1442 return true;
1443}
1444
1446 assert(Handler && "NULL comment handler");
1447 assert(!llvm::is_contained(CommentHandlers, Handler) &&
1448 "Comment handler already registered");
1449 CommentHandlers.push_back(Handler);
1450}
1451
1453 std::vector<CommentHandler *>::iterator Pos =
1454 llvm::find(CommentHandlers, Handler);
1455 assert(Pos != CommentHandlers.end() && "Comment handler not registered");
1456 CommentHandlers.erase(Pos);
1457}
1458
1460 bool AnyPendingTokens = false;
1461 for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(),
1462 HEnd = CommentHandlers.end();
1463 H != HEnd; ++H) {
1464 if ((*H)->HandleComment(*this, Comment))
1465 AnyPendingTokens = true;
1466 }
1467 if (!AnyPendingTokens || getCommentRetentionState())
1468 return false;
1469 Lex(result);
1470 return true;
1471}
1472
1473void Preprocessor::emitMacroDeprecationWarning(const Token &Identifier) const {
1474 const MacroAnnotations &A =
1475 getMacroAnnotations(Identifier.getIdentifierInfo());
1476 assert(A.DeprecationInfo &&
1477 "Macro deprecation warning without recorded annotation!");
1478 const MacroAnnotationInfo &Info = *A.DeprecationInfo;
1479 if (Info.Message.empty())
1480 Diag(Identifier, diag::warn_pragma_deprecated_macro_use)
1481 << Identifier.getIdentifierInfo() << 0;
1482 else
1483 Diag(Identifier, diag::warn_pragma_deprecated_macro_use)
1484 << Identifier.getIdentifierInfo() << 1 << Info.Message;
1485 Diag(Info.Location, diag::note_pp_macro_annotation) << 0;
1486}
1487
1488void Preprocessor::emitRestrictExpansionWarning(const Token &Identifier) const {
1489 const MacroAnnotations &A =
1490 getMacroAnnotations(Identifier.getIdentifierInfo());
1491 assert(A.RestrictExpansionInfo &&
1492 "Macro restricted expansion warning without recorded annotation!");
1493 const MacroAnnotationInfo &Info = *A.RestrictExpansionInfo;
1494 if (Info.Message.empty())
1495 Diag(Identifier, diag::warn_pragma_restrict_expansion_macro_use)
1496 << Identifier.getIdentifierInfo() << 0;
1497 else
1498 Diag(Identifier, diag::warn_pragma_restrict_expansion_macro_use)
1499 << Identifier.getIdentifierInfo() << 1 << Info.Message;
1500 Diag(Info.Location, diag::note_pp_macro_annotation) << 1;
1501}
1502
1503void Preprocessor::emitRestrictInfNaNWarning(const Token &Identifier,
1504 unsigned DiagSelection) const {
1505 Diag(Identifier, diag::warn_fp_nan_inf_when_disabled) << DiagSelection << 1;
1506}
1507
1508void Preprocessor::emitFinalMacroWarning(const Token &Identifier,
1509 bool IsUndef) const {
1510 const MacroAnnotations &A =
1511 getMacroAnnotations(Identifier.getIdentifierInfo());
1512 assert(A.FinalAnnotationLoc &&
1513 "Final macro warning without recorded annotation!");
1514
1515 Diag(Identifier, diag::warn_pragma_final_macro)
1516 << Identifier.getIdentifierInfo() << (IsUndef ? 0 : 1);
1517 Diag(*A.FinalAnnotationLoc, diag::note_pp_macro_annotation) << 2;
1518}
1519
1521 const SourceLocation &Loc) const {
1522 // The lambda that tests if a `Loc` is in an opt-out region given one opt-out
1523 // region map:
1524 auto TestInMap = [&SourceMgr](const SafeBufferOptOutRegionsTy &Map,
1525 const SourceLocation &Loc) -> bool {
1526 // Try to find a region in `SafeBufferOptOutMap` where `Loc` is in:
1527 auto FirstRegionEndingAfterLoc = llvm::partition_point(
1528 Map, [&SourceMgr,
1529 &Loc](const std::pair<SourceLocation, SourceLocation> &Region) {
1530 return SourceMgr.isBeforeInTranslationUnit(Region.second, Loc);
1531 });
1532
1533 if (FirstRegionEndingAfterLoc != Map.end()) {
1534 // To test if the start location of the found region precedes `Loc`:
1535 return SourceMgr.isBeforeInTranslationUnit(
1536 FirstRegionEndingAfterLoc->first, Loc);
1537 }
1538 // If we do not find a region whose end location passes `Loc`, we want to
1539 // check if the current region is still open:
1540 if (!Map.empty() && Map.back().first == Map.back().second)
1541 return SourceMgr.isBeforeInTranslationUnit(Map.back().first, Loc);
1542 return false;
1543 };
1544
1545 // What the following does:
1546 //
1547 // If `Loc` belongs to the local TU, we just look up `SafeBufferOptOutMap`.
1548 // Otherwise, `Loc` is from a loaded AST. We look up the
1549 // `LoadedSafeBufferOptOutMap` first to get the opt-out region map of the
1550 // loaded AST where `Loc` is at. Then we find if `Loc` is in an opt-out
1551 // region w.r.t. the region map. If the region map is absent, it means there
1552 // is no opt-out pragma in that loaded AST.
1553 //
1554 // Opt-out pragmas in the local TU or a loaded AST is not visible to another
1555 // one of them. That means if you put the pragmas around a `#include
1556 // "module.h"`, where module.h is a module, it is not actually suppressing
1557 // warnings in module.h. This is fine because warnings in module.h will be
1558 // reported when module.h is compiled in isolation and nothing in module.h
1559 // will be analyzed ever again. So you will not see warnings from the file
1560 // that imports module.h anyway. And you can't even do the same thing for PCHs
1561 // because they can only be included from the command line.
1562
1563 if (SourceMgr.isLocalSourceLocation(Loc))
1564 return TestInMap(SafeBufferOptOutMap, Loc);
1565
1567 LoadedSafeBufferOptOutMap.lookupLoadedOptOutMap(Loc, SourceMgr);
1568
1569 if (LoadedRegions)
1570 return TestInMap(*LoadedRegions, Loc);
1571 return false;
1572}
1573
1575 bool isEnter, const SourceLocation &Loc) {
1576 if (isEnter) {
1578 return true; // invalid enter action
1579 InSafeBufferOptOutRegion = true;
1580 CurrentSafeBufferOptOutStart = Loc;
1581
1582 // To set the start location of a new region:
1583
1584 if (!SafeBufferOptOutMap.empty()) {
1585 [[maybe_unused]] auto *PrevRegion = &SafeBufferOptOutMap.back();
1586 assert(PrevRegion->first != PrevRegion->second &&
1587 "Shall not begin a safe buffer opt-out region before closing the "
1588 "previous one.");
1589 }
1590 // If the start location equals to the end location, we call the region a
1591 // open region or a unclosed region (i.e., end location has not been set
1592 // yet).
1593 SafeBufferOptOutMap.emplace_back(Loc, Loc);
1594 } else {
1596 return true; // invalid enter action
1597 InSafeBufferOptOutRegion = false;
1598
1599 // To set the end location of the current open region:
1600
1601 assert(!SafeBufferOptOutMap.empty() &&
1602 "Misordered safe buffer opt-out regions");
1603 auto *CurrRegion = &SafeBufferOptOutMap.back();
1604 assert(CurrRegion->first == CurrRegion->second &&
1605 "Set end location to a closed safe buffer opt-out region");
1606 CurrRegion->second = Loc;
1607 }
1608 return false;
1609}
1610
1612 return InSafeBufferOptOutRegion;
1613}
1615 StartLoc = CurrentSafeBufferOptOutStart;
1616 return InSafeBufferOptOutRegion;
1617}
1618
1621 assert(!InSafeBufferOptOutRegion &&
1622 "Attempt to serialize safe buffer opt-out regions before file being "
1623 "completely preprocessed");
1624
1626
1627 for (const auto &[begin, end] : SafeBufferOptOutMap) {
1628 SrcSeq.push_back(begin);
1629 SrcSeq.push_back(end);
1630 }
1631 // Only `SafeBufferOptOutMap` gets serialized. No need to serialize
1632 // `LoadedSafeBufferOptOutMap` because if this TU loads a pch/module, every
1633 // pch/module in the pch-chain/module-DAG will be loaded one by one in order.
1634 // It means that for each loading pch/module m, it just needs to load m's own
1635 // `SafeBufferOptOutMap`.
1636 return SrcSeq;
1637}
1638
1640 const SmallVectorImpl<SourceLocation> &SourceLocations) {
1641 if (SourceLocations.size() == 0)
1642 return false;
1643
1644 assert(SourceLocations.size() % 2 == 0 &&
1645 "ill-formed SourceLocation sequence");
1646
1647 auto It = SourceLocations.begin();
1648 SafeBufferOptOutRegionsTy &Regions =
1649 LoadedSafeBufferOptOutMap.findAndConsLoadedOptOutMap(*It, SourceMgr);
1650
1651 do {
1652 SourceLocation Begin = *It++;
1653 SourceLocation End = *It++;
1654
1655 Regions.emplace_back(Begin, End);
1656 } while (It != SourceLocations.end());
1657 return true;
1658}
1659
1660ModuleLoader::~ModuleLoader() = default;
1661
1663
1665
1667
1669 if (Record)
1670 return;
1671
1673 addPPCallbacks(std::unique_ptr<PPCallbacks>(Record));
1674}
1675
1676const char *Preprocessor::getCheckPoint(FileID FID, const char *Start) const {
1677 if (auto It = CheckPoints.find(FID); It != CheckPoints.end()) {
1678 const SmallVector<const char *> &FileCheckPoints = It->second;
1679 const char *Last = nullptr;
1680 // FIXME: Do better than a linear search.
1681 for (const char *P : FileCheckPoints) {
1682 if (P > Start)
1683 break;
1684 Last = P;
1685 }
1686 return Last;
1687 }
1688
1689 return nullptr;
1690}
1691
1693 return DirTracer && DirTracer->hasSeenNoTrivialPPDirective();
1694}
1695
1697 return SeenNoTrivialPPDirective;
1698}
1699
1700void NoTrivialPPDirectiveTracer::setSeenNoTrivialPPDirective() {
1701 if (InMainFile && !SeenNoTrivialPPDirective)
1702 SeenNoTrivialPPDirective = true;
1703}
1704
1706 FileID FID, LexedFileChangeReason Reason,
1708 InMainFile = (FID == PP.getSourceManager().getMainFileID());
1709}
1710
1712 const MacroDefinition &MD,
1714 const MacroArgs *Args) {
1715 // FIXME: Does only enable builtin macro expansion make sense?
1716 if (!MD.getMacroInfo()->isBuiltinMacro())
1717 setSeenNoTrivialPPDirective();
1718}
StringRef P
Defines enum values for all the target-independent builtin functions.
IndirectLocalPath & Path
Expr * E
Defines the clang::FileManager interface and associated types.
StringRef Identifier
Definition: Format.cpp:3185
Defines the clang::IdentifierInfo, clang::IdentifierTable, and clang::Selector interfaces.
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified.
Defines the clang::LangOptions interface.
llvm::MachO::FileType FileType
Definition: MachO.h:46
llvm::MachO::Target Target
Definition: MachO.h:51
llvm::MachO::Record Record
Definition: MachO.h:31
Defines the clang::MacroInfo and clang::MacroDirective classes.
Defines the clang::Module class, which describes a module in the source code.
#define SM(sm)
Definition: OffloadArch.cpp:16
Defines the PreprocessorLexer interface.
static bool MacroDefinitionEquals(const MacroInfo *MI, ArrayRef< TokenValue > Tokens)
Compares macro tokens with a specified token value sequence.
static constexpr unsigned CheckPointStepSize
Minimum distance between two check points, in tokens.
Defines the clang::Preprocessor interface.
SourceRange Range
Definition: SemaObjC.cpp:753
SourceLocation Loc
Definition: SemaObjC.cpp:754
Defines the clang::SourceLocation class and associated facilities.
Defines the SourceManager interface.
SourceLocation Begin
__DEVICE__ void * memcpy(void *__a, const void *__b, size_t __c)
virtual void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled)
Callback invoked when performing code completion inside the filename part of an #include directive.
virtual void CodeCompleteNaturalLanguage()
Callback invoked when performing code completion in a part of the file where we expect natural langua...
Abstract base class that describes a handler that will receive source ranges for each of the comments...
Concrete class used by the front-end to report problems and issues.
Definition: Diagnostic.h:231
virtual void updateOutOfDateIdentifier(const IdentifierInfo &II)=0
Update an out-of-date identifier.
A reference to a FileEntry that includes the name of the file as it was accessed by the FileManager's...
Definition: FileEntry.h:57
Cached information about one file (either on disk or in the virtual file system).
Definition: FileEntry.h:306
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
bool isValid() const
bool isInvalid() const
Encapsulates the information needed to find the file referenced by a #include or #include_next,...
Definition: HeaderSearch.h:237
Module * lookupModule(StringRef ModuleName, SourceLocation ImportLoc=SourceLocation(), bool AllowSearch=true, bool AllowExtraModuleMapSearch=false)
Lookup a module Search for a module with the given name.
void setTarget(const TargetInfo &Target)
Set the target information for the header search, if not already known.
Provides lookups to, and iteration over, IdentiferInfo objects.
One of these records is kept for each identifier that is lexed.
bool isModulesImport() const
Determine whether this is the contextual keyword import.
bool IsKeywordInCPlusPlus() const
Return true if this identifier would be a keyword in C++ mode.
tok::TokenKind getTokenID() const
If this is a source-language token (e.g.
void setIsPoisoned(bool Value=true)
setIsPoisoned - Mark this identifier as poisoned.
bool isPoisoned() const
Return true if this token has been poisoned.
bool isOutOfDate() const
Determine whether the information for this identifier is out of date with respect to the external sou...
void setIsFutureCompatKeyword(bool Val)
StringRef getName() const
Return the actual identifier string.
bool isFutureCompatKeyword() const
is/setIsFutureCompatKeyword - Initialize information about whether or not this language token is a ke...
bool isExtensionToken() const
get/setExtension - Initialize information about whether or not this language token is an extension.
void AddKeywords(const LangOptions &LangOpts)
Populate the identifier table with info about the language keywords for the language specified by Lan...
@ FEM_UnsetOnCommandLine
Used only for FE option processing; this is only used to indicate that the user did not specify an ex...
Definition: LangOptions.h:250
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:434
MacroArgs - An instance of this class captures information about the formal arguments specified to a ...
Definition: MacroArgs.h:30
A description of the current definition of a macro.
Definition: MacroInfo.h:590
MacroInfo * getMacroInfo() const
Get the MacroInfo that should be used for this definition.
Definition: MacroInfo.h:606
SourceLocation getLocation() const
Definition: MacroInfo.h:488
Encapsulates the data about a macro definition (e.g.
Definition: MacroInfo.h:39
const_tokens_iterator tokens_begin() const
Definition: MacroInfo.h:244
unsigned getNumTokens() const
Return the number of tokens that this macro expands to.
Definition: MacroInfo.h:235
const Token & getReplacementToken(unsigned Tok) const
Definition: MacroInfo.h:237
bool isBuiltinMacro() const
Return true if this macro requires processing before expansion.
Definition: MacroInfo.h:217
bool isObjectLike() const
Definition: MacroInfo.h:202
Abstract interface for a module loader.
Definition: ModuleLoader.h:83
virtual ModuleLoadResult loadModule(SourceLocation ImportLoc, ModuleIdPath Path, Module::NameVisibilityKind Visibility, bool IsInclusionDirective)=0
Attempt to load the given module.
virtual ~ModuleLoader()
Represents a macro directive exported by a module.
Definition: MacroInfo.h:514
Describes a module or submodule.
Definition: Module.h:144
@ Hidden
All of the names in this module are hidden.
Definition: Module.h:445
void MacroExpands(const Token &MacroNameTok, const MacroDefinition &MD, SourceRange Range, const MacroArgs *Args) override
Called by Preprocessor::HandleMacroExpandedIdentifier when a macro invocation is found.
void LexedFileChanged(FileID FID, LexedFileChangeReason Reason, SrcMgr::CharacteristicKind FileType, FileID PrevFID, SourceLocation Loc) override
Callback invoked whenever the Lexer moves to a different file for lexing.
NumericLiteralParser - This performs strict semantic analysis of the content of a ppnumber,...
PragmaNamespace - This PragmaHandler subdivides the namespace of pragmas, allowing hierarchical pragm...
Definition: Pragma.h:96
A record of the steps taken while preprocessing a source file, including the various preprocessing di...
void setConditionalLevels(ArrayRef< PPConditionalInfo > CL)
void LexIncludeFilename(Token &FilenameTok)
Lex a token, producing a header-name token if possible.
PreprocessorOptions - This class is used for passing the various options used in preprocessor initial...
bool PCHWithHdrStopCreate
When true, we are creating a PCH or creating the PCH object while expecting a #pragma hdrstop to sepa...
std::string PCHThroughHeader
If non-empty, the filename used in an #include directive in the primary source file (or command-line ...
bool PCHWithHdrStop
When true, we are creating or using a PCH where a #pragma hdrstop is expected to indicate the beginni...
bool GeneratePreamble
True indicates that a preamble is being generated.
bool markIncluded(FileEntryRef File)
Mark the file as included.
void FinalizeForModelFile()
Cleanup after model file parsing.
bool FinishLexStringLiteral(Token &Result, std::string &String, const char *DiagnosticTag, bool AllowMacroExpansion)
Complete the lexing of a string literal where the first token has already been lexed (see LexStringLi...
bool creatingPCHWithThroughHeader()
True if creating a PCH with a through header.
void DumpToken(const Token &Tok, bool DumpFlags=false) const
Print the token to stderr, used for debugging.
void InitializeForModelFile()
Initialize the preprocessor to parse a model file.
void CollectPpImportSuffix(SmallVectorImpl< Token > &Toks)
Collect the tokens of a C++20 pp-import-suffix.
void setCodeCompletionTokenRange(const SourceLocation Start, const SourceLocation End)
Set the code completion token range for detecting replacement range later on.
bool LexAfterModuleImport(Token &Result)
Lex a token following the 'import' contextual keyword.
macro_iterator macro_begin(bool IncludeExternalMacros=true) const
void CreateString(StringRef Str, Token &Tok, SourceLocation ExpansionLocStart=SourceLocation(), SourceLocation ExpansionLocEnd=SourceLocation())
Plop the specified string into a scratch buffer and set the specified token's location and length to ...
bool isSafeBufferOptOut(const SourceManager &SourceMgr, const SourceLocation &Loc) const
const char * getCheckPoint(FileID FID, const char *Start) const
Returns a pointer into the given file's buffer that's guaranteed to be between tokens.
IdentifierInfo * LookUpIdentifierInfo(Token &Identifier) const
Given a tok::raw_identifier token, look up the identifier information for the token and install it in...
void DumpMacro(const MacroInfo &MI) const
void setCodeCompletionReached()
Note that we hit the code-completion point.
bool SetCodeCompletionPoint(FileEntryRef File, unsigned Line, unsigned Column)
Specify the point at which code-completion will be performed.
bool isInImportingCXXNamedModules() const
If we're importing a standard C++20 Named Modules.
void Lex(Token &Result)
Lex the next token for this preprocessor.
const TranslationUnitKind TUKind
The kind of translation unit we are processing.
Definition: Preprocessor.h:309
bool EnterSourceFile(FileID FID, ConstSearchDirIterator Dir, SourceLocation Loc, bool IsFirstIncludeOfFile=true)
Add a source file to the top of the include stack and start lexing tokens from it instead of the curr...
void addCommentHandler(CommentHandler *Handler)
Add the specified comment handler to the preprocessor.
void removeCommentHandler(CommentHandler *Handler)
Remove the specified comment handler.
void HandlePoisonedIdentifier(Token &Identifier)
Display reason for poisoned identifier.
bool HandleIdentifier(Token &Identifier)
Callback invoked when the lexer reads an identifier and has filled in the tokens IdentifierInfo membe...
void addPPCallbacks(std::unique_ptr< PPCallbacks > C)
bool enterOrExitSafeBufferOptOutRegion(bool isEnter, const SourceLocation &Loc)
Alter the state of whether this PP currently is in a "-Wunsafe-buffer-usage" opt-out region.
void EnterMainSourceFile()
Enter the specified FileID as the main source file, which implicitly adds the builtin defines etc.
const MacroAnnotations & getMacroAnnotations(const IdentifierInfo *II) const
IdentifierInfo * getIdentifierInfo(StringRef Name) const
Return information about the specified preprocessor identifier token.
macro_iterator macro_end(bool IncludeExternalMacros=true) const
SourceManager & getSourceManager() const
bool isBacktrackEnabled() const
True if EnableBacktrackAtThisPos() was called and caching of tokens is on.
MacroDefinition getMacroDefinition(const IdentifierInfo *II)
void SetPoisonReason(IdentifierInfo *II, unsigned DiagID)
Specifies the reason for poisoning an identifier.
bool getCommentRetentionState() const
Module * getCurrentModuleImplementation()
Retrieves the module whose implementation we're current compiling, if any.
bool isNextPPTokenOneOf(Ts... Ks)
Check whether the next pp-token is one of the specificed token kind.
MacroMap::const_iterator macro_iterator
void createPreprocessingRecord()
Create a new preprocessing record, which will keep track of all macro expansions, macro definitions,...
SourceLocation SplitToken(SourceLocation TokLoc, unsigned Length)
Split the first Length characters out of the token starting at TokLoc and return a location pointing ...
Module * getCurrentModule()
Retrieves the module that we're currently building, if any.
bool isPPInSafeBufferOptOutRegion()
void makeModuleVisible(Module *M, SourceLocation Loc, bool IncludeExports=true)
void setCurrentFPEvalMethod(SourceLocation PragmaLoc, LangOptions::FPEvalMethodKind Val)
const TargetInfo & getTargetInfo() const
bool LexHeaderName(Token &Result, bool AllowMacroExpansion=true)
Lex a token, forming a header-name token if possible.
bool isPCHThroughHeader(const FileEntry *FE)
Returns true if the FileEntry is the PCH through header.
void DumpLocation(SourceLocation Loc) const
bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value)
Parses a simple integer literal to get its numeric value.
void LexUnexpandedToken(Token &Result)
Just like Lex, but disables macro expansion of identifier tokens.
bool creatingPCHWithPragmaHdrStop()
True if creating a PCH with a #pragma hdrstop.
void Initialize(const TargetInfo &Target, const TargetInfo *AuxTarget=nullptr)
Initialize the preprocessor using information about the target.
FileID getPredefinesFileID() const
Returns the FileID for the preprocessor predefines.
StringRef getSpelling(SourceLocation loc, SmallVectorImpl< char > &buffer, bool *invalid=nullptr) const
Return the 'spelling' of the token at the given location; does not go up to the spelling location or ...
bool HandleComment(Token &result, SourceRange Comment)
HeaderSearch & getHeaderSearchInfo() const
bool setDeserializedSafeBufferOptOutMap(const SmallVectorImpl< SourceLocation > &SrcLocSeqs)
ExternalPreprocessorSource * getExternalSource() const
SmallVector< SourceLocation, 64 > serializeSafeBufferOptOutMap() const
void recomputeCurLexerKind()
Recompute the current lexer kind based on the CurLexer/ CurTokenLexer pointers.
OptionalFileEntryRef LookupFile(SourceLocation FilenameLoc, StringRef Filename, bool isAngled, ConstSearchDirIterator FromDir, const FileEntry *FromFile, ConstSearchDirIterator *CurDir, SmallVectorImpl< char > *SearchPath, SmallVectorImpl< char > *RelativePath, ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped, bool *IsFrameworkFound, bool SkipCache=false, bool OpenFile=true, bool CacheFailures=true)
Given a "foo" or <foo> reference, look up the indicated file.
IdentifierTable & getIdentifierTable()
const LangOptions & getLangOpts() const
void setTUFPEvalMethod(LangOptions::FPEvalMethodKind Val)
void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled)
Hook used by the lexer to invoke the "included file" code completion point.
llvm::DenseMap< FileID, SafeBufferOptOutRegionsTy > LoadedRegions
void PoisonSEHIdentifiers(bool Poison=true)
size_t getTotalMemory() const
void LexTokensUntilEOF(std::vector< Token > *Tokens=nullptr)
Lex all tokens for this preprocessor until (and excluding) end of file.
bool usingPCHWithPragmaHdrStop()
True if using a PCH with a #pragma hdrstop.
void CodeCompleteNaturalLanguage()
Hook used by the lexer to invoke the "natural language" code completion point.
void EndSourceFile()
Inform the preprocessor callbacks that processing is complete.
DiagnosticsEngine & getDiagnostics() const
bool hasSeenNoTrivialPPDirective() const
Whether we've seen pp-directives which may have changed the preprocessing state.
StringRef getLastMacroWithSpelling(SourceLocation Loc, ArrayRef< TokenValue > Tokens) const
Return the name of the macro defined before Loc that has spelling Tokens.
void setCodeCompletionIdentifierInfo(IdentifierInfo *Filter)
Set the code completion token for filtering purposes.
DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const
Forwarding function for diagnostics.
void SkipTokensWhileUsingPCH()
Skip tokens until after the #include of the through header or until after a #pragma hdrstop.
bool usingPCHWithThroughHeader()
True if using a PCH with a through header.
Preprocessor(const PreprocessorOptions &PPOpts, DiagnosticsEngine &diags, const LangOptions &LangOpts, SourceManager &SM, HeaderSearch &Headers, ModuleLoader &TheModuleLoader, IdentifierInfoLookup *IILookup=nullptr, bool OwnsHeaderSearch=false, TranslationUnitKind TUKind=TU_Complete)
ScratchBuffer - This class exposes a simple interface for the dynamic construction of tokens.
Definition: ScratchBuffer.h:24
Encodes a location in the source.
bool isValid() const
Return true if this is a valid SourceLocation object.
void print(raw_ostream &OS, const SourceManager &SM) const
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
This class handles loading and caching of source files into memory.
bool isLocalSourceLocation(SourceLocation Loc) const
Returns true if Loc did not come from a PCH/Module.
OptionalFileEntryRef getFileEntryRefForID(FileID FID) const
Returns the FileEntryRef for the provided FileID.
FileID createFileID(FileEntryRef SourceFile, SourceLocation IncludePos, SrcMgr::CharacteristicKind FileCharacter, int LoadedID=0, SourceLocation::UIntTy LoadedOffset=0)
Create a new FileID that represents the specified file being #included from the specified IncludePosi...
FileID getMainFileID() const
Returns the FileID of the main source file.
const char * getCharacterData(SourceLocation SL, bool *Invalid=nullptr) const
Return a pointer to the start of the specified location in the appropriate spelling MemoryBuffer.
void overrideFileContents(FileEntryRef SourceFile, const llvm::MemoryBufferRef &Buffer)
Override the contents of the given source file by providing an already-allocated buffer.
bool isLoadedFileID(FileID FID) const
Returns true if FID came from a PCH/Module.
const FileEntry * getFileEntryForID(FileID FID) const
Returns the FileEntry record for the provided FileID.
SourceLocation createExpansionLoc(SourceLocation SpellingLoc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned Length, bool ExpansionIsTokenRange=true, int LoadedID=0, SourceLocation::UIntTy LoadedOffset=0)
Creates an expansion SLocEntry for a macro use.
bool isBeforeInTranslationUnit(SourceLocation LHS, SourceLocation RHS) const
Determines the order of 2 source locations in the translation unit.
std::optional< llvm::MemoryBufferRef > getMemoryBufferForFileOrNone(FileEntryRef File)
Retrieve the memory buffer associated with the given file.
A trivial tuple used to represent a source range.
StringLiteralParser - This decodes string escape characters and performs wide string analysis and Tra...
Exposes information about the current target.
Definition: TargetInfo.h:226
Token - This structure provides full information about a lexed token.
Definition: Token.h:36
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:189
void setLiteralData(const char *Ptr)
Definition: Token.h:231
bool hasUCN() const
Returns true if this token contains a universal character name.
Definition: Token.h:308
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Definition: Token.h:118
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
Definition: Token.h:134
unsigned getLength() const
Definition: Token.h:137
void setLength(unsigned Len)
Definition: Token.h:143
bool isExpandDisabled() const
Return true if this identifier token should never be expanded in the future, due to C99 6....
Definition: Token.h:286
void setKind(tok::TokenKind K)
Definition: Token.h:98
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
Definition: Token.h:102
tok::TokenKind getKind() const
Definition: Token.h:97
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
Definition: Token.h:278
bool isOneOf(Ts... Ks) const
Definition: Token.h:104
@ DisableExpand
Definition: Token.h:79
@ HasSeenNoTrivialPPDirective
Definition: Token.h:92
@ IsReinjected
Definition: Token.h:89
@ LeadingEmptyMacro
Definition: Token.h:81
@ LeadingSpace
Definition: Token.h:77
@ StartOfLine
Definition: Token.h:75
bool hasLeadingSpace() const
Return true if this token has whitespace before it.
Definition: Token.h:282
void setLocation(SourceLocation L)
Definition: Token.h:142
bool hasLeadingEmptyMacro() const
Return true if this token has an empty macro before it.
Definition: Token.h:301
void setRawIdentifierData(const char *Ptr)
Definition: Token.h:219
bool isNot(tok::TokenKind K) const
Definition: Token.h:103
bool isAnnotation() const
Return true if this is any of tok::annot_* kind tokens.
Definition: Token.h:123
void startToken()
Reset all flags to cleared.
Definition: Token.h:179
bool needsCleaning() const
Return true if this token has trigraphs or escaped newlines in it.
Definition: Token.h:297
void setIdentifierInfo(IdentifierInfo *II)
Definition: Token.h:198
void setFlagValue(TokenFlags Flag, bool Val)
Set a flag to either true or false.
Definition: Token.h:269
Defines the clang::TargetInfo interface.
CharacteristicKind
Indicates whether a file or directory holds normal user code, system code, or system code which is im...
Definition: SourceManager.h:81
const char * getTokenName(TokenKind Kind) LLVM_READNONE
Determines the name of a token as used within the front end.
Definition: TokenKinds.cpp:24
The JSON file list parser is used to communicate input to InstallAPI.
std::pair< FileID, unsigned > FileIDAndOffset
void expandUCNs(SmallVectorImpl< char > &Buf, StringRef Input)
Copy characters from Input to Buf, expanding any UCNs.
llvm::Registry< PragmaHandler > PragmaHandlerRegistry
Registry of pragma handlers added by plugins.
@ Result
The result type of a method or function.
TranslationUnitKind
Describes the kind of translation unit being processed.
Definition: LangOptions.h:1097
@ TU_Prefix
The translation unit is a prefix to a translation unit, and is not complete.
Definition: LangOptions.h:1103
#define true
Definition: stdbool.h:25