27#include "llvm/ADT/ArrayRef.h"
28#include "llvm/ADT/STLExtras.h"
29#include "llvm/ADT/SmallVector.h"
30#include "llvm/ADT/iterator_range.h"
53 NextTokGetsSpace =
false;
56 DisableMacroExpansion =
false;
62 MacroStartSLocOffset =
SM.getNextLocalOffset();
65 assert(Tokens[0].getLocation().isValid());
66 assert((Tokens[0].getLocation().isFileID() || Tokens[0].is(tok::comment)) &&
67 "Macro defined in macro?");
68 assert(ExpandLocStart.
isValid());
74 MacroDefStart =
SM.getExpansionLoc(Tokens[0].getLocation());
76 MacroExpansionStart =
SM.createExpansionLoc(MacroDefStart,
85 ExpandFunctionArguments();
96 bool disableMacroExpansion,
bool ownsTokens,
98 assert(!isReinject || disableMacroExpansion);
104 ActualArgs =
nullptr;
106 OwnsTokens = ownsTokens;
107 DisableMacroExpansion = disableMacroExpansion;
108 IsReinject = isReinject;
112 AtStartOfLine =
false;
113 HasLeadingSpace =
false;
114 NextTokGetsSpace =
false;
125void TokenLexer::destroy() {
135 if (ActualArgs) ActualArgs->
destroy(PP);
138bool TokenLexer::MaybeRemoveCommaBeforeVaArgs(
142 if (!
Macro->isVariadic() || MacroArgNo !=
Macro->getNumParams()-1)
148 if (!HasPasteOperator && !PP.
getLangOpts().MSVCCompat)
157 &&
Macro->getNumParams() < 2)
161 if (ResultToks.empty() || !ResultToks.back().is(tok::comma))
165 if (HasPasteOperator)
166 PP.
Diag(ResultToks.back().getLocation(), diag::ext_paste_comma);
169 ResultToks.pop_back();
171 if (!ResultToks.empty()) {
176 if (ResultToks.back().is(tok::hashhash))
177 ResultToks.pop_back();
184 NextTokGetsSpace =
false;
188void TokenLexer::stringifyVAOPTContents(
192 const unsigned int NumVAOptTokens = ResultToks.size() - NumToksPriorToVAOpt;
193 Token *
const VAOPTTokens =
194 NumVAOptTokens ? &ResultToks[NumToksPriorToVAOpt] :
nullptr;
201 for (
unsigned int CurTokenIdx = 0; CurTokenIdx != NumVAOptTokens;
203 if (VAOPTTokens[CurTokenIdx].is(tok::hashhash)) {
204 assert(CurTokenIdx != 0 &&
205 "Can not have __VAOPT__ contents begin with a ##");
206 Token &LHS = VAOPTTokens[CurTokenIdx - 1];
210 ConcatenatedVAOPTResultToks.back() = LHS;
211 if (CurTokenIdx == NumVAOptTokens)
214 ConcatenatedVAOPTResultToks.push_back(VAOPTTokens[CurTokenIdx]);
217 ConcatenatedVAOPTResultToks.push_back(VCtx.
getEOFTok());
226 getExpansionLocForMacroDefLoc(VAOPTClosingParenLoc);
230 ExpansionLocStartWithinMacro, ExpansionLocEndWithinMacro);
237 ResultToks.resize(NumToksPriorToVAOpt + 1);
238 ResultToks.back() = StringifiedVAOPT;
243void TokenLexer::ExpandFunctionArguments() {
249 bool MadeChange =
false;
251 std::optional<bool> CalledWithVariadicArguments;
255 for (
unsigned I = 0,
E = NumTokens; I !=
E; ++I) {
256 const Token &CurTok = Tokens[I];
263 if (I != 0 && !Tokens[I-1].is(tok::hashhash) && CurTok.
hasLeadingSpace())
264 NextTokGetsSpace =
true;
268 assert(Tokens[I + 1].is(tok::l_paren) &&
269 "__VA_OPT__ must be followed by '('");
291 if (Tokens[I].is(tok::l_paren))
299 if (!CalledWithVariadicArguments) {
300 CalledWithVariadicArguments =
303 if (!*CalledWithVariadicArguments) {
323 stringifyVAOPTContents(ResultToks, VCtx,
324 Tokens[I].getLocation());
333 if (ResultToks.size() && ResultToks.back().is(tok::hashhash)) {
334 ResultToks.pop_back();
335 }
else if ((I + 1 !=
E) && Tokens[I + 1].is(tok::hashhash)) {
348 "no token paste before __VA_OPT__");
349 ResultToks.erase(ResultToks.begin() +
355 Tokens[I + 1].is(tok::hashhash)) {
370 if (CurTok.
isOneOf(tok::hash, tok::hashat)) {
371 int ArgNo =
Macro->getParameterNum(Tokens[I+1].getIdentifierInfo());
372 assert((ArgNo != -1 || VCtx.
isVAOptToken(Tokens[I + 1])) &&
373 "Token following # is not an argument or __VA_OPT__!");
378 CurTok.
is(tok::hashat));
383 getExpansionLocForMacroDefLoc(CurTok.
getLocation());
385 getExpansionLocForMacroDefLoc(Tokens[I+1].getLocation());
387 bool Charify = CurTok.
is(tok::hashat);
390 UnexpArg, PP, Charify, ExpansionLocStart, ExpansionLocEnd);
395 if (NextTokGetsSpace)
398 ResultToks.push_back(Res);
401 NextTokGetsSpace =
false;
406 bool NonEmptyPasteBefore =
407 !ResultToks.empty() && ResultToks.back().is(tok::hashhash);
408 bool PasteBefore = I != 0 && Tokens[I-1].
is(tok::hashhash);
409 bool PasteAfter = I+1 !=
E && Tokens[I+1].
is(tok::hashhash);
410 bool RParenAfter = I+1 !=
E && Tokens[I+1].
is(tok::r_paren);
412 assert((!NonEmptyPasteBefore || PasteBefore || VCtx.
isInVAOpt()) &&
413 "unexpected ## in ResultToks");
418 int ArgNo = II ?
Macro->getParameterNum(II) : -1;
421 ResultToks.push_back(CurTok);
423 if (NextTokGetsSpace) {
425 NextTokGetsSpace =
false;
426 }
else if (PasteBefore && !NonEmptyPasteBefore)
441 MaybeRemoveCommaBeforeVaArgs(ResultToks,
449 if (!PasteBefore && !PasteAfter) {
450 const Token *ResultArgToks;
458 ResultArgToks = ArgTok;
461 if (ResultArgToks->
isNot(tok::eof)) {
462 size_t FirstResult = ResultToks.size();
464 ResultToks.append(ResultArgToks, ResultArgToks+NumToks);
471 ResultToks.back().is(tok::comma))
476 for (
Token &Tok : llvm::drop_begin(ResultToks, FirstResult))
477 if (Tok.is(tok::hashhash))
478 Tok.setKind(tok::unknown);
482 ResultToks.begin()+FirstResult,
492 NextTokGetsSpace =
false;
497 if (NonEmptyPasteBefore) {
500 assert(VCtx.
isInVAOpt() &&
"should only happen inside a __VA_OPT__");
502 }
else if (RParenAfter)
513 bool VaArgsPseudoPaste =
false;
518 if (NonEmptyPasteBefore && ResultToks.size() >= 2 &&
519 ResultToks[ResultToks.size()-2].is(tok::comma) &&
520 (
unsigned)ArgNo ==
Macro->getNumParams()-1 &&
521 Macro->isVariadic()) {
522 VaArgsPseudoPaste =
true;
524 PP.
Diag(ResultToks.pop_back_val().getLocation(), diag::ext_paste_comma);
527 ResultToks.append(ArgToks, ArgToks+NumToks);
531 for (
Token &Tok : llvm::make_range(ResultToks.end() - NumToks,
533 if (Tok.is(tok::hashhash))
534 Tok.setKind(tok::unknown);
537 if (ExpandLocStart.
isValid()) {
539 ResultToks.end()-NumToks, ResultToks.end());
546 if (!VaArgsPseudoPaste) {
549 ResultToks[ResultToks.size() - NumToks].setFlagValue(
553 NextTokGetsSpace =
false;
568 if (RParenAfter && !NonEmptyPasteBefore)
575 if (NonEmptyPasteBefore) {
576 assert(ResultToks.back().is(tok::hashhash));
583 ResultToks.pop_back();
593 MaybeRemoveCommaBeforeVaArgs(ResultToks,
600 assert(!OwnsTokens &&
"This would leak if we already own the token list");
602 NumTokens = ResultToks.size();
605 Tokens = PP.cacheMacroExpandedTokens(
this, ResultToks);
614 const Token &SecondTok) {
615 return FirstTok.
is(tok::identifier) &&
626 if (Macro) Macro->EnableMacro();
631 if (CurTokenIdx == 0)
640 bool isFirstToken = CurTokenIdx == 0;
643 Tok = Tokens[CurTokenIdx++];
647 bool TokenIsFromPaste =
false;
651 if (!isAtEnd() && Macro &&
652 (Tokens[CurTokenIdx].is(tok::hashhash) ||
660 if (pasteTokens(Tok))
663 TokenIsFromPaste =
true;
671 if (ExpandLocStart.
isValid() &&
673 SM.isBeforeInSLocAddrSpace(Tok.
getLocation(), MacroStartSLocOffset)) {
675 if (Tok.
is(tok::comment)) {
681 instLoc = getExpansionLocForMacroDefLoc(Tok.
getLocation());
698 AtStartOfLine =
false;
699 HasLeadingSpace =
false;
723bool TokenLexer::pasteTokens(
Token &Tok) {
724 return pasteTokens(Tok,
llvm::ArrayRef(Tokens, NumTokens), CurTokenIdx);
732 unsigned int &CurIdx) {
733 assert(CurIdx > 0 &&
"## can not be the first token within tokens");
734 assert((TokenStream[CurIdx].is(tok::hashhash) ||
737 "Token at this Index must be ## or part of the MSVC 'L "
738 "#macro-arg' pasting pair");
743 if (PP.
getLangOpts().MicrosoftExt && (CurIdx >= 2) &&
744 TokenStream[CurIdx - 2].is(tok::hashhash))
748 const char *ResultTokStrPtr =
nullptr;
751 bool HasUCNs =
false;
753 auto IsAtEnd = [&TokenStream, &CurIdx] {
754 return TokenStream.size() == CurIdx;
759 PasteOpLoc = TokenStream[CurIdx].getLocation();
760 if (TokenStream[CurIdx].is(tok::hashhash))
762 assert(!IsAtEnd() &&
"No token on the RHS of a paste operator!");
765 const Token &RHS = TokenStream[CurIdx];
772 const char *BufPtr = &Buffer[0];
775 if (BufPtr != &Buffer[0])
776 memcpy(&Buffer[0], BufPtr, LHSLen);
780 BufPtr = Buffer.data() + LHSLen;
784 if (RHSLen && BufPtr != &Buffer[LHSLen])
786 memcpy(&Buffer[LHSLen], BufPtr, RHSLen);
789 Buffer.resize(LHSLen+RHSLen);
798 ResultTokTmp.
setKind(tok::string_literal);
811 Result.setKind(tok::raw_identifier);
812 Result.setRawIdentifierData(ResultTokStrPtr);
813 Result.setLocation(ResultTokLoc);
814 Result.setLength(LHSLen+RHSLen);
819 "Should be a raw location into scratch buffer");
824 const char *ScratchBufStart
833 ResultTokStrPtr, ResultTokStrPtr+LHSLen+RHSLen);
853 SM.createExpansionLoc(PasteOpLoc, ExpandLocStart, ExpandLocEnd, 2);
857 if (PP.
getLangOpts().MicrosoftExt && LHSTok.
is(tok::slash) &&
858 RHS.
is(tok::slash)) {
859 HandleMicrosoftCommentPaste(LHSTok,
Loc);
869 : diag::err_pp_bad_paste)
879 if (
Result.is(tok::hashhash))
880 Result.setKind(tok::unknown);
893 }
while (!IsAtEnd() && TokenStream[CurIdx].is(tok::hashhash));
904 StartLoc = getExpansionLocForMacroDefLoc(StartLoc);
906 EndLoc = getExpansionLocForMacroDefLoc(EndLoc);
907 FileID MacroFID =
SM.getFileID(MacroExpansionStart);
908 while (
SM.getFileID(StartLoc) != MacroFID)
909 StartLoc =
SM.getImmediateExpansionRange(StartLoc).getBegin();
910 while (
SM.getFileID(EndLoc) != MacroFID)
911 EndLoc =
SM.getImmediateExpansionRange(EndLoc).getEnd();
919 if (LHSTok.
is(tok::raw_identifier)) {
941 return Tokens[CurTokenIdx];
947 return Tokens[NumTokens-1].
is(tok::eod) && !isAtEnd();
956 PP.
Diag(OpLoc, diag::ext_comment_paste_microsoft);
963 assert(Macro &&
"Token streams can't paste comments");
964 Macro->EnableMacro();
974TokenLexer::getExpansionLocForMacroDefLoc(
SourceLocation loc)
const {
975 assert(ExpandLocStart.
isValid() && MacroExpansionStart.
isValid() &&
976 "Not appropriate for token streams");
980 assert(
SM.isInSLocAddrSpace(loc, MacroDefStart, MacroDefLength) &&
981 "Expected loc to come from the macro definition");
984 SM.isInSLocAddrSpace(loc, MacroDefStart, MacroDefLength, &relativeOffset);
999 Token *&begin_tokens,
1000 Token * end_tokens) {
1001 assert(begin_tokens + 1 < end_tokens);
1013 return Distance <= MaxDistance;
1022 Partition =
All.take_while([&](
const Token &
T) {
1023 return T.getLocation().isFileID() && NearLast(
T.getLocation());
1028 FileID BeginFID =
SM.getFileID(BeginLoc);
1030 SM.getComposedLoc(BeginFID,
SM.getFileIDSize(BeginFID));
1031 Partition =
All.take_while([&](
const Token &
T) {
1040 return T.getLocation() >= BeginLoc &&
T.getLocation() <= Limit
1041 && NearLast(
T.getLocation());
1044 assert(!Partition.empty());
1049 Partition.back().getEndLoc().getRawEncoding() -
1050 Partition.front().getLocation().getRawEncoding();
1053 SM.createMacroArgExpansionLoc(BeginLoc, ExpandLoc, FullLength);
1055#ifdef EXPENSIVE_CHECKS
1056 assert(llvm::all_of(Partition.drop_front(),
1057 [&
SM, ID =
SM.getFileID(Partition.front().getLocation())](
1059 return ID == SM.getFileID(T.getLocation());
1061 "Must have the same FIleID!");
1065 for (
Token&
T : Partition) {
1070 begin_tokens = &Partition.back() + 1;
1078void TokenLexer::updateLocForMacroArgTokens(
SourceLocation ArgIdSpellLoc,
1079 Token *begin_tokens,
1080 Token *end_tokens) {
1084 getExpansionLocForMacroDefLoc(ArgIdSpellLoc);
1086 while (begin_tokens < end_tokens) {
1088 if (end_tokens - begin_tokens == 1) {
1089 Token &Tok = *begin_tokens;
1100void TokenLexer::PropagateLineStartLeadingSpaceInfo(
Token &
Result) {
1101 AtStartOfLine =
Result.isAtStartOfLine();
1102 HasLeadingSpace =
Result.hasLeadingSpace();
Defines the Diagnostic-related interfaces.
Defines the clang::IdentifierInfo, clang::IdentifierTable, and clang::Selector interfaces.
Defines the clang::LangOptions interface.
Defines the clang::MacroInfo and clang::MacroDirective classes.
Defines the clang::Preprocessor interface.
Defines the clang::SourceLocation class and associated facilities.
static bool isInvalid(LocType Loc, bool *Invalid)
Defines the SourceManager interface.
Defines the clang::TokenKind enum and support functions.
static bool isWideStringLiteralFromMacro(const Token &FirstTok, const Token &SecondTok)
Checks if two tokens form wide string literal.
static void updateConsecutiveMacroArgTokens(SourceManager &SM, SourceLocation ExpandLoc, Token *&begin_tokens, Token *end_tokens)
Finds the tokens that are consecutive (from the same FileID) creates a single SLocEntry,...
__DEVICE__ void * memcpy(void *__a, const void *__b, size_t __c)
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
One of these records is kept for each identifier that is lexed.
bool isHandleIdentifierCase() const
Return true if the Preprocessor::HandleIdentifier must be called on a token of this identifier.
tok::TokenKind getTokenID() const
If this is a source-language token (e.g.
bool isPoisoned() const
Return true if this token has been poisoned.
bool isStr(const char(&Str)[StrLen]) const
Return true if this is the identifier for the specified string.
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens.
MacroArgs - An instance of this class captures information about the formal arguments specified to a ...
const Token * getUnexpArgument(unsigned Arg) const
getUnexpArgument - Return a pointer to the first token of the unexpanded token list for the specified...
const std::vector< Token > & getPreExpArgument(unsigned Arg, Preprocessor &PP)
getPreExpArgument - Return the pre-expanded form of the specified argument.
static unsigned getArgLength(const Token *ArgPtr)
getArgLength - Given a pointer to an expanded or unexpanded argument, return the number of tokens,...
bool ArgNeedsPreexpansion(const Token *ArgTok, Preprocessor &PP) const
ArgNeedsPreexpansion - If we can prove that the argument won't be affected by pre-expansion,...
bool invokedWithVariadicArgument(const MacroInfo *const MI, Preprocessor &PP)
Returns true if the macro was defined with a variadic (ellipsis) parameter AND was invoked with at le...
bool isVarargsElidedUse() const
isVarargsElidedUse - Return true if this is a C99 style varargs macro invocation and there was no arg...
static Token StringifyArgument(const Token *ArgToks, Preprocessor &PP, bool Charify, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd)
StringifyArgument - Implement C99 6.10.3.2p2, converting a sequence of tokens into the literal string...
void destroy(Preprocessor &PP)
destroy - Destroy and deallocate the memory for this object.
Encapsulates the data about a macro definition (e.g.
bool isFunctionLike() const
const_tokens_iterator tokens_begin() const
const_tokens_iterator tokens_end() const
unsigned getNumParams() const
unsigned getDefinitionLength(const SourceManager &SM) const
Get length in characters of the macro definition.
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
void CreateString(StringRef Str, Token &Tok, SourceLocation ExpansionLocStart=SourceLocation(), SourceLocation ExpansionLocEnd=SourceLocation())
Plop the specified string into a scratch buffer and set the specified token's location and length to ...
IdentifierInfo * LookUpIdentifierInfo(Token &Identifier) const
Given a tok::raw_identifier token, look up the identifier information for the token and install it in...
bool HandleEndOfTokenLexer(Token &Result)
Callback invoked when the current TokenLexer hits the end of its token stream.
void HandlePoisonedIdentifier(Token &Identifier)
Display reason for poisoned identifier.
bool HandleIdentifier(Token &Identifier)
Callback invoked when the lexer reads an identifier and has filled in the tokens IdentifierInfo membe...
void IncrementPasteCounter(bool isFast)
Increment the counters for the number of token paste operations performed.
SourceManager & getSourceManager() const
StringRef getSpelling(SourceLocation loc, SmallVectorImpl< char > &buffer, bool *invalid=nullptr) const
Return the 'spelling' of the token at the given location; does not go up to the spelling location or ...
const LangOptions & getLangOpts() const
DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const
Forwarding function for diagnostics.
void HandleMicrosoftCommentPaste(Token &Tok)
When the macro expander pastes together a comment (/##/) in Microsoft mode, this method handles updat...
Encodes a location in the source.
bool isValid() const
Return true if this is a valid SourceLocation object.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
UIntTy getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it.
This class handles loading and caching of source files into memory.
FileID getFileID(SourceLocation SpellingLoc) const
Return the FileID for a SourceLocation.
StringRef getBufferData(FileID FID, bool *Invalid=nullptr) const
Return a StringRef to the source buffer data for the specified FileID.
SourceLocation getLocForStartOfFile(FileID FID) const
Return the source location corresponding to the first byte of the specified file.
bool isParsingPreprocessorDirective() const
isParsingPreprocessorDirective - Return true if we are in the middle of a preprocessor directive.
bool Lex(Token &Tok)
Lex and return a token from this macro stream.
std::optional< Token > peekNextPPToken() const
If TokenLexer::isAtEnd returns true(the next token lexed will pop this macro off the expansion stack)...
void Init(Token &Tok, SourceLocation ELEnd, MacroInfo *MI, MacroArgs *Actuals)
Initialize this TokenLexer to expand from the specified macro with the specified argument information...
Token - This structure provides full information about a lexed token.
IdentifierInfo * getIdentifierInfo() const
bool isAnyIdentifier() const
Return true if this is a raw identifier (when lexing in raw mode) or a non-keyword identifier (when l...
bool hasUCN() const
Returns true if this token contains a universal character name.
void clearFlag(TokenFlags Flag)
Unset the specified flag.
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
unsigned getLength() const
void setKind(tok::TokenKind K)
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
bool isOneOf(Ts... Ks) const
bool hasLeadingSpace() const
Return true if this token has whitespace before it.
void setLocation(SourceLocation L)
bool isNot(tok::TokenKind K) const
bool isAnnotation() const
Return true if this is any of tok::annot_* kind tokens.
bool stringifiedInMacro() const
Returns true if this token is formed by macro by stringizing or charizing operator.
void startToken()
Reset all flags to cleared.
void setFlagValue(TokenFlags Flag, bool Val)
Set a flag to either true or false.
const char * getLiteralData() const
getLiteralData - For a literal token (numeric constant, string, etc), this returns a pointer to the s...
void setFlag(TokenFlags Flag)
Set the specified flag.
A class for tracking whether we're inside a VA_OPT during a traversal of the tokens of a macro during...
void hasPlaceholderAfterHashhashAtStart()
bool isInVAOpt() const
Returns true if we have seen the VA_OPT and '(' but before having seen the matching ')'.
bool isVAOptToken(const Token &T) const
void sawHashOrHashAtBefore(const bool HasLeadingSpace, const bool IsHashAt)
SourceLocation getVAOptLoc() const
unsigned int getNumberOfTokensPriorToVAOpt() const
bool getLeadingSpaceForStringifiedToken() const
bool hasStringifyOrCharifyBefore() const
bool hasCharifyBefore() const
void sawOpeningParen(SourceLocation LParenLoc)
Call this function each time an lparen is seen.
void hasPlaceholderBeforeRParen()
const Token & getEOFTok() const
bool sawClosingParen()
Call this function each time an rparen is seen.
bool beginsWithPlaceholder() const
bool endsWithPlaceholder() const
void sawVAOptFollowedByOpeningParens(const SourceLocation VAOptLoc, const unsigned int NumPriorTokens)
The JSON file list parser is used to communicate input to InstallAPI.
@ Result
The result type of a method or function.
const FunctionProtoType * T