29#include "llvm/ADT/STLExtras.h"
30#include "llvm/ADT/StringExtras.h"
31#include "llvm/ADT/StringRef.h"
32#include "llvm/ADT/StringSwitch.h"
33#include "llvm/Support/Compiler.h"
34#include "llvm/Support/ConvertUTF.h"
35#include "llvm/Support/MemoryBufferRef.h"
36#include "llvm/Support/NativeFormatting.h"
37#include "llvm/Support/Unicode.h"
38#include "llvm/Support/UnicodeCharRanges.h"
64 return II->getObjCKeywordID() == objcKey;
71 return tok::objc_not_keyword;
79 case tok::annot_typename:
80 case tok::annot_decltype:
81 case tok::annot_pack_indexing_type:
87 case tok::kw___int128:
89 case tok::kw_unsigned:
97 case tok::kw__Float16:
98 case tok::kw___float128:
99 case tok::kw___ibm128:
100 case tok::kw_wchar_t:
106#define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) case tok::kw___##Trait:
107#include "clang/Basic/TransformTypeTraits.def"
108 case tok::kw___auto_type:
109 case tok::kw_char16_t:
110 case tok::kw_char32_t:
112 case tok::kw_decltype:
113 case tok::kw_char8_t:
125void Lexer::anchor() {}
127void Lexer::InitLexer(
const char *BufStart,
const char *BufPtr,
128 const char *BufEnd) {
129 BufferStart = BufStart;
133 assert(BufEnd[0] == 0 &&
134 "We assume that the input buffer has a null character at the end"
135 " to simplify lexing!");
140 if (BufferStart == BufferPtr) {
142 StringRef Buf(BufferStart, BufferEnd - BufferStart);
143 size_t BOMLength = llvm::StringSwitch<size_t>(Buf)
144 .StartsWith(
"\xEF\xBB\xBF", 3)
148 BufferPtr += BOMLength;
151 Is_PragmaLexer =
false;
152 CurrentConflictMarkerState =
CMK_None;
155 IsAtStartOfLine =
true;
156 IsAtPhysicalStartOfLine =
true;
158 HasLeadingSpace =
false;
159 HasLeadingEmptyMacro =
false;
174 ExtendedTokenMode = 0;
176 NewLinePtr =
nullptr;
186 FileLoc(PP.getSourceManager().getLocForStartOfFile(FID)),
188 IsFirstTimeLexingFile(IsFirstIncludeOfFile) {
189 InitLexer(InputFile.getBufferStart(), InputFile.getBufferStart(),
190 InputFile.getBufferEnd());
199 const char *BufStart,
const char *BufPtr,
const char *BufEnd,
200 bool IsFirstIncludeOfFile)
202 IsFirstTimeLexingFile(IsFirstIncludeOfFile) {
203 InitLexer(BufStart, BufPtr, BufEnd);
214 bool IsFirstIncludeOfFile)
215 :
Lexer(
SM.getLocForStartOfFile(FID), langOpts, FromFile.getBufferStart(),
216 FromFile.getBufferStart(), FromFile.getBufferEnd(),
217 IsFirstIncludeOfFile) {}
220 assert(
PP &&
"Cannot reset token mode without a preprocessor");
221 if (LangOpts.TraditionalCPP)
249 FileID SpellingFID =
SM.getFileID(SpellingLoc);
250 llvm::MemoryBufferRef InputFile =
SM.getBufferOrFake(SpellingFID);
256 const char *StrData =
SM.getCharacterData(SpellingLoc);
258 L->BufferPtr = StrData;
259 L->BufferEnd = StrData+TokLen;
260 assert(L->BufferEnd[0] == 0 &&
"Buffer is not nul terminated!");
264 L->FileLoc =
SM.createExpansionLoc(
SM.getLocForStartOfFile(SpellingFID),
266 ExpansionLocEnd, TokLen);
273 L->Is_PragmaLexer =
true;
278 this->IsAtPhysicalStartOfLine = IsAtStartOfLine;
279 this->IsAtStartOfLine = IsAtStartOfLine;
280 assert((BufferStart + Offset) <= BufferEnd);
281 BufferPtr = BufferStart + Offset;
285 typename T::size_type i = 0, e = Str.size();
287 if (Str[i] ==
'\\' || Str[i] == Quote) {
288 Str.insert(Str.begin() + i,
'\\');
291 }
else if (Str[i] ==
'\n' || Str[i] ==
'\r') {
293 if ((i < e - 1) && (Str[i + 1] ==
'\n' || Str[i + 1] ==
'\r') &&
294 Str[i] != Str[i + 1]) {
300 Str.insert(Str.begin() + i + 1,
'n');
310 std::string
Result = std::string(Str);
311 char Quote = Charify ?
'\'' :
'"';
326 assert(Tok.
needsCleaning() &&
"getSpellingSlow called on simple token");
329 const char *BufEnd = BufPtr + Tok.
getLength();
333 while (BufPtr < BufEnd) {
335 Spelling[Length++] = CharAndSize.Char;
336 BufPtr += CharAndSize.Size;
338 if (Spelling[Length - 1] ==
'"')
346 Spelling[Length - 2] ==
'R' && Spelling[Length - 1] ==
'"') {
349 const char *RawEnd = BufEnd;
350 do --RawEnd;
while (*RawEnd !=
'"');
351 size_t RawLength = RawEnd - BufPtr + 1;
354 memcpy(Spelling + Length, BufPtr, RawLength);
362 while (BufPtr < BufEnd) {
364 Spelling[Length++] = CharAndSize.Char;
365 BufPtr += CharAndSize.Size;
369 "NeedsCleaning flag set on token that didn't need cleaning!");
387 bool invalidTemp =
false;
388 StringRef file =
SM.getBufferData(locInfo.first, &invalidTemp);
390 if (invalid) *invalid =
true;
394 const char *tokenBegin = file.data() + locInfo.second;
397 Lexer lexer(
SM.getLocForStartOfFile(locInfo.first), options,
398 file.begin(), tokenBegin, file.end());
406 return StringRef(tokenBegin,
length);
410 buffer.resize(
getSpellingSlow(token, tokenBegin, options, buffer.data()));
411 return StringRef(buffer.data(), buffer.size());
421 assert((
int)Tok.
getLength() >= 0 &&
"Token character range is bogus!");
423 bool CharDataInvalid =
false;
433 return std::string(TokStart, TokStart + Tok.
getLength());
454 assert((
int)Tok.
getLength() >= 0 &&
"Token character range is bogus!");
456 const char *TokStart =
nullptr;
458 if (Tok.
is(tok::raw_identifier))
463 Buffer = II->getNameStart();
464 return II->getLength();
474 bool CharDataInvalid =
false;
478 if (CharDataInvalid) {
491 return getSpellingSlow(Tok, TokStart, LangOpts,
const_cast<char*
>(Buffer));
512 bool IgnoreWhiteSpace) {
524 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
528 const char *StrData = Buffer.data()+LocInfo.second;
530 if (!IgnoreWhiteSpace &&
isWhitespace(SkipEscapedNewLines(StrData)[0]))
534 Lexer TheLexer(
SM.getLocForStartOfFile(LocInfo.first), LangOpts,
535 Buffer.begin(), StrData, Buffer.end());
544 const char *BufStart = Buffer.data();
545 if (Offset >= Buffer.size())
548 const char *LexStart = BufStart + Offset;
549 for (; LexStart != BufStart; --LexStart) {
565 if (LocInfo.first.isInvalid())
569 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
575 const char *StrData = Buffer.data() + LocInfo.second;
577 if (!LexStart || LexStart == StrData)
582 Lexer TheLexer(LexerStartLoc, LangOpts, Buffer.data(), LexStart,
602 }
while (TheTok.
getKind() != tok::eof);
614 if (!
SM.isMacroArgExpansion(
Loc))
621 assert(FileLocInfo.first == BeginFileLocInfo.first &&
622 FileLocInfo.second >= BeginFileLocInfo.second);
628enum PreambleDirectiveKind {
643 Lexer TheLexer(FileLoc, LangOpts, Buffer.begin(), Buffer.begin(),
647 bool InPreprocessorDirective =
false;
651 unsigned MaxLineOffset = 0;
653 const char *CurPtr = Buffer.begin();
654 unsigned CurLine = 0;
655 while (CurPtr != Buffer.end()) {
659 if (CurLine == MaxLines)
663 if (CurPtr != Buffer.end())
664 MaxLineOffset = CurPtr - Buffer.begin();
670 if (InPreprocessorDirective) {
672 if (TheTok.
getKind() == tok::eof) {
683 InPreprocessorDirective =
false;
692 if (MaxLineOffset && TokOffset >= MaxLineOffset)
697 if (TheTok.
getKind() == tok::comment) {
705 Token HashTok = TheTok;
706 InPreprocessorDirective =
true;
715 PreambleDirectiveKind PDK
716 = llvm::StringSwitch<PreambleDirectiveKind>(
Keyword)
717 .Case(
"include", PDK_Skipped)
718 .Case(
"__include_macros", PDK_Skipped)
719 .Case(
"define", PDK_Skipped)
720 .Case(
"undef", PDK_Skipped)
721 .Case(
"line", PDK_Skipped)
722 .Case(
"error", PDK_Skipped)
723 .Case(
"pragma", PDK_Skipped)
724 .Case(
"import", PDK_Skipped)
725 .Case(
"include_next", PDK_Skipped)
726 .Case(
"warning", PDK_Skipped)
727 .Case(
"ident", PDK_Skipped)
728 .Case(
"sccs", PDK_Skipped)
729 .Case(
"assert", PDK_Skipped)
730 .Case(
"unassert", PDK_Skipped)
731 .Case(
"if", PDK_Skipped)
732 .Case(
"ifdef", PDK_Skipped)
733 .Case(
"ifndef", PDK_Skipped)
734 .Case(
"elif", PDK_Skipped)
735 .Case(
"elifdef", PDK_Skipped)
736 .Case(
"elifndef", PDK_Skipped)
737 .Case(
"else", PDK_Skipped)
738 .Case(
"endif", PDK_Skipped)
739 .Default(PDK_Unknown);
756 TheTok.
getKind() == tok::raw_identifier &&
758 LangOpts.CPlusPlusModules) {
761 Token ModuleTok = TheTok;
764 }
while (TheTok.
getKind() == tok::comment);
765 if (TheTok.
getKind() != tok::semi) {
780 if (ActiveCommentLoc.
isValid())
781 End = ActiveCommentLoc;
796 const char *TokPtr =
SM.getCharacterData(TokStart, &
Invalid);
799 if (
Invalid || (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr)))
802 unsigned PhysOffset = 0;
807 while (Lexer::isObviouslySimpleCharacter(*TokPtr)) {
817 for (; CharNo; --CharNo) {
819 TokPtr += CharAndSize.Size;
820 PhysOffset += CharAndSize.Size;
827 if (!Lexer::isObviouslySimpleCharacter(*TokPtr))
828 PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr;
877 if (!
SM.isAtStartOfImmediateMacroExpansion(loc, &expansionLoc))
883 *MacroBegin = expansionLoc;
905 if (!
SM.isAtEndOfImmediateMacroExpansion(afterLoc, &expansionLoc))
911 *MacroEnd = expansionLoc;
924 if (
Range.isTokenRange()) {
931 auto [FID, BeginOffs] =
SM.getDecomposedLoc(
Begin);
936 if (!
SM.isInFileID(End, FID, &EndOffs) ||
946 return SM.getSLocEntry(
SM.getFileID(
Loc))
948 .isExpansionTokenRange();
970 if (
Range.isTokenRange()) {
991 if (
Range.isTokenRange())
1032 if (beginInfo.first.isInvalid()) {
1038 if (!
SM.isInFileID(
Range.
getEnd(), beginInfo.first, &EndOffs) ||
1039 beginInfo.second > EndOffs) {
1045 bool invalidTemp =
false;
1046 StringRef file =
SM.getBufferData(beginInfo.first, &invalidTemp);
1053 return file.substr(beginInfo.second, EndOffs - beginInfo.second);
1059 assert(
Loc.
isMacroID() &&
"Only reasonable to call this on macros");
1075 Loc =
SM.getImmediateExpansionRange(
Loc).getBegin();
1083 if (
SM.isInFileID(SpellLoc, MacroFID))
1099 StringRef ExpansionBuffer =
SM.getBufferData(ExpansionInfo.first);
1100 return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
1105 assert(
Loc.
isMacroID() &&
"Only reasonable to call this on macros");
1107 while (
SM.isMacroArgExpansion(
Loc))
1108 Loc =
SM.getImmediateExpansionRange(
Loc).getBegin();
1114 if (!SpellLoc.
isFileID() ||
SM.isWrittenInScratchSpace(SpellLoc))
1120 Loc =
SM.getSpellingLoc(
SM.getImmediateExpansionRange(
Loc).getBegin());
1126 StringRef ExpansionBuffer =
SM.getBufferData(ExpansionInfo.first);
1127 return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
1136 if (Str - 1 < BufferStart)
1139 if ((Str[0] ==
'\n' && Str[-1] ==
'\r') ||
1140 (Str[0] ==
'\r' && Str[-1] ==
'\n')) {
1141 if (Str - 2 < BufferStart)
1151 return *Str ==
'\\';
1159 if (LocInfo.first.isInvalid())
1162 StringRef Buffer =
SM.getBufferData(LocInfo.first, &
Invalid);
1168 StringRef Rest = Buffer.substr(
Line - Buffer.data());
1169 size_t NumWhitespaceChars = Rest.find_first_not_of(
" \t");
1170 return NumWhitespaceChars == StringRef::npos
1172 : Rest.take_front(NumWhitespaceChars);
1187 unsigned CharNo,
unsigned TokLen) {
1188 assert(FileLoc.
isMacroID() &&
"Must be a macro expansion");
1204 return SM.createExpansionLoc(SpellingLoc, II.
getBegin(), II.
getEnd(), TokLen);
1210 unsigned TokLen)
const {
1211 assert(
Loc >= BufferStart &&
Loc <= BufferEnd &&
1212 "Location out of range for this buffer!");
1216 unsigned CharNo =
Loc-BufferStart;
1222 assert(
PP &&
"This doesn't work on raw lexers");
1241 case '=':
return '#';
1242 case ')':
return ']';
1243 case '(':
return '[';
1244 case '!':
return '|';
1245 case '\'':
return '^';
1246 case '>':
return '}';
1247 case '/':
return '\\';
1248 case '<':
return '{';
1249 case '-':
return '~';
1264 L->
Diag(CP-2, diag::trigraph_ignored);
1269 L->
Diag(CP-2, diag::trigraph_converted) << StringRef(&Res, 1);
1281 if (Ptr[Size-1] !=
'\n' && Ptr[Size-1] !=
'\r')
1285 if ((Ptr[Size] ==
'\r' || Ptr[Size] ==
'\n') &&
1286 Ptr[Size-1] != Ptr[Size])
1299const char *Lexer::SkipEscapedNewLines(
const char *
P) {
1301 const char *AfterEscape;
1304 }
else if (*
P ==
'?') {
1306 if (
P[1] !=
'?' ||
P[2] !=
'/')
1316 if (NewLineSize == 0)
return P;
1317 P = AfterEscape+NewLineSize;
1324 bool IncludeComments) {
1327 return std::nullopt;
1335 bool InvalidTemp =
false;
1336 StringRef
File =
SM.getBufferData(LocInfo.first, &InvalidTemp);
1338 return std::nullopt;
1340 const char *TokenBegin =
File.data() + LocInfo.second;
1343 Lexer lexer(
SM.getLocForStartOfFile(LocInfo.first), LangOpts,
File.begin(),
1344 TokenBegin,
File.end());
1355 bool IncludeComments) {
1356 const auto StartOfFile =
SM.getLocForStartOfFile(
SM.getFileID(
Loc));
1357 while (
Loc != StartOfFile) {
1360 return std::nullopt;
1366 if (!Tok.
is(tok::comment) || IncludeComments) {
1370 return std::nullopt;
1379 const LangOptions &LangOpts,
bool SkipTrailingWhitespaceAndNewLine) {
1381 if (!Tok || Tok->isNot(TKind))
1386 unsigned NumWhitespaceChars = 0;
1387 if (SkipTrailingWhitespaceAndNewLine) {
1388 const char *TokenEnd =
SM.getCharacterData(TokenLoc) + Tok->getLength();
1389 unsigned char C = *TokenEnd;
1392 NumWhitespaceChars++;
1396 if (
C ==
'\n' ||
C ==
'\r') {
1399 NumWhitespaceChars++;
1400 if ((
C ==
'\n' ||
C ==
'\r') &&
C != PrevC)
1401 NumWhitespaceChars++;
1426 if (Ptr[0] ==
'\\') {
1432 return {
'\\', Size};
1442 Diag(Ptr, diag::backslash_newline_space);
1445 Size += EscapedNewLineSize;
1446 Ptr += EscapedNewLineSize;
1449 auto CharAndSize = getCharAndSizeSlow(Ptr, Tok);
1450 CharAndSize.Size += Size;
1455 return {
'\\',
Size};
1459 if (Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1463 LangOpts.Trigraphs)) {
1469 if (
C ==
'\\')
goto Slash;
1475 return {*Ptr,
Size + 1u};
1489 if (Ptr[0] ==
'\\') {
1495 return {
'\\',
Size};
1500 Size += EscapedNewLineSize;
1501 Ptr += EscapedNewLineSize;
1504 auto CharAndSize = getCharAndSizeSlowNoWarn(Ptr, LangOpts);
1505 CharAndSize.Size +=
Size;
1510 return {
'\\',
Size};
1514 if (LangOpts.Trigraphs && Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1520 if (
C ==
'\\')
goto Slash;
1526 return {*Ptr,
Size + 1u};
1534void Lexer::SetByteOffset(
unsigned Offset,
bool StartOfLine) {
1535 BufferPtr = BufferStart + Offset;
1536 if (BufferPtr > BufferEnd)
1537 BufferPtr = BufferEnd;
1541 IsAtStartOfLine = StartOfLine;
1542 IsAtPhysicalStartOfLine = StartOfLine;
1546 static const llvm::sys::UnicodeCharSet UnicodeWhitespaceChars(
1548 return UnicodeWhitespaceChars.contains(Codepoint);
1553 llvm::raw_svector_ostream CharOS(CharBuf);
1554 llvm::write_hex(CharOS,
C, llvm::HexPrintStyle::Upper, 4);
1565 bool IsStart,
bool &IsExtension) {
1566 static const llvm::sys::UnicodeCharSet MathStartChars(
1568 static const llvm::sys::UnicodeCharSet MathContinueChars(
1570 if (MathStartChars.contains(
C) ||
1571 (!IsStart && MathContinueChars.contains(
C))) {
1579 bool &IsExtension) {
1580 if (LangOpts.AsmPreprocessor) {
1582 }
else if (LangOpts.DollarIdents &&
'$' ==
C) {
1584 }
else if (LangOpts.CPlusPlus || LangOpts.C23) {
1589 static const llvm::sys::UnicodeCharSet XIDStartChars(
XIDStartRanges);
1591 if (
C ==
'_' || XIDStartChars.contains(
C) || XIDContinueChars.contains(
C))
1595 }
else if (LangOpts.C11) {
1596 static const llvm::sys::UnicodeCharSet C11AllowedIDChars(
1598 return C11AllowedIDChars.contains(
C);
1600 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1602 return C99AllowedIDChars.contains(
C);
1607 bool &IsExtension) {
1608 assert(
C > 0x7F &&
"isAllowedInitiallyIDChar called with an ASCII codepoint");
1609 IsExtension =
false;
1610 if (LangOpts.AsmPreprocessor) {
1613 if (LangOpts.CPlusPlus || LangOpts.C23) {
1614 static const llvm::sys::UnicodeCharSet XIDStartChars(
XIDStartRanges);
1615 if (XIDStartChars.contains(
C))
1623 static const llvm::sys::UnicodeCharSet C11DisallowedInitialIDChars(
1625 return !C11DisallowedInitialIDChars.contains(
C);
1627 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1629 return !C99DisallowedInitialIDChars.contains(
C);
1635 static const llvm::sys::UnicodeCharSet MathStartChars(
1637 static const llvm::sys::UnicodeCharSet MathContinueChars(
1640 (void)MathStartChars;
1641 (void)MathContinueChars;
1642 assert((MathStartChars.contains(
C) || MathContinueChars.contains(
C)) &&
1643 "Unexpected mathematical notation codepoint");
1659 CannotAppearInIdentifier = 0,
1660 CannotStartIdentifier
1663 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1665 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1667 if (!C99AllowedIDChars.contains(
C)) {
1670 << CannotAppearInIdentifier;
1671 }
else if (IsFirst && C99DisallowedInitialIDChars.contains(
C)) {
1674 << CannotStartIdentifier;
1686 struct HomoglyphPair {
1689 bool operator<(HomoglyphPair R)
const {
return Character < R.Character; }
1691 static constexpr HomoglyphPair SortedHomoglyphs[] = {
1744 std::lower_bound(std::begin(SortedHomoglyphs),
1745 std::end(SortedHomoglyphs) - 1, HomoglyphPair{
C,
'\0'});
1746 if (Homoglyph->Character ==
C) {
1747 if (Homoglyph->LooksLike) {
1748 const char LooksLikeStr[] = {Homoglyph->LooksLike, 0};
1769 if ((IsFirst && IsIDStart) || (!IsFirst && IsIDContinue))
1772 bool InvalidOnlyAtStart = IsFirst && !IsIDStart && IsIDContinue;
1774 if (!IsFirst || InvalidOnlyAtStart) {
1785bool Lexer::tryConsumeIdentifierUCN(
const char *&CurPtr,
unsigned Size,
1787 const char *UCNPtr = CurPtr +
Size;
1788 uint32_t CodePoint = tryReadUCN(UCNPtr, CurPtr,
nullptr);
1789 if (CodePoint == 0) {
1792 bool IsExtension =
false;
1817 if ((UCNPtr - CurPtr == 6 && CurPtr[1] ==
'u') ||
1818 (UCNPtr - CurPtr == 10 && CurPtr[1] ==
'U'))
1821 while (CurPtr != UCNPtr)
1822 (void)getAndAdvanceChar(CurPtr,
Result);
1826bool Lexer::tryConsumeIdentifierUTF8Char(
const char *&CurPtr,
Token &
Result) {
1827 llvm::UTF32 CodePoint;
1832 unsigned FirstCodeUnitSize;
1833 getCharAndSize(CurPtr, FirstCodeUnitSize);
1834 const char *CharStart = CurPtr + FirstCodeUnitSize - 1;
1835 const char *UnicodePtr = CharStart;
1837 llvm::ConversionResult ConvResult = llvm::convertUTF8Sequence(
1838 (
const llvm::UTF8 **)&UnicodePtr, (
const llvm::UTF8 *)BufferEnd,
1839 &CodePoint, llvm::strictConversion);
1840 if (ConvResult != llvm::conversionOK)
1843 bool IsExtension =
false;
1872 ConsumeChar(CurPtr, FirstCodeUnitSize,
Result);
1873 CurPtr = UnicodePtr;
1877bool Lexer::LexUnicodeIdentifierStart(
Token &
Result, uint32_t
C,
1878 const char *CurPtr) {
1879 bool IsExtension =
false;
1894 return LexIdentifierContinue(
Result, CurPtr);
1919 FormTokenWithChars(
Result, CurPtr, tok::unknown);
1925 [[maybe_unused]]
const char *BufferEnd) {
1927 alignas(16)
static constexpr char AsciiIdentifierRange[16] = {
1928 '_',
'_',
'A',
'Z',
'a',
'z',
'0',
'9',
1930 constexpr ssize_t BytesPerRegister = 16;
1932 __m128i AsciiIdentifierRangeV =
1935 while (LLVM_LIKELY(BufferEnd - CurPtr >= BytesPerRegister)) {
1942 if (Consumed == BytesPerRegister)
1948 unsigned char C = *CurPtr;
1954bool Lexer::LexIdentifierContinue(
Token &
Result,
const char *CurPtr) {
1963 unsigned char C = getCharAndSize(CurPtr, Size);
1965 CurPtr = ConsumeChar(CurPtr, Size,
Result);
1970 if (!LangOpts.DollarIdents)
1974 Diag(CurPtr, diag::ext_dollar_in_identifier);
1975 CurPtr = ConsumeChar(CurPtr, Size,
Result);
1978 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
1986 const char *IdStart = BufferPtr;
1987 FormTokenWithChars(
Result, CurPtr, tok::raw_identifier);
1988 Result.setRawIdentifierData(IdStart);
2004 if (isCodeCompletionPoint(CurPtr)) {
2006 Result.setKind(tok::code_completion);
2012 assert(*CurPtr == 0 &&
"Completion character must be 0");
2017 if (CurPtr < BufferEnd) {
2035bool Lexer::isHexaLiteral(
const char *Start,
const LangOptions &LangOpts) {
2037 char C1 = CharAndSize1.Char;
2043 char C2 = CharAndSize2.Char;
2044 return (C2 ==
'x' || C2 ==
'X');
2050bool Lexer::LexNumericConstant(
Token &
Result,
const char *CurPtr) {
2052 char C = getCharAndSize(CurPtr, Size);
2055 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2057 if (LangOpts.HLSL &&
C ==
'.' && (*CurPtr ==
'x' || *CurPtr ==
'r')) {
2061 C = getCharAndSize(CurPtr, Size);
2065 if ((
C ==
'-' ||
C ==
'+') && (PrevCh ==
'E' || PrevCh ==
'e')) {
2068 if (!LangOpts.MicrosoftExt || !isHexaLiteral(BufferPtr, LangOpts))
2069 return LexNumericConstant(
Result, ConsumeChar(CurPtr, Size,
Result));
2073 if ((
C ==
'-' ||
C ==
'+') && (PrevCh ==
'P' || PrevCh ==
'p')) {
2077 bool IsHexFloat =
true;
2078 if (!LangOpts.C99) {
2079 if (!isHexaLiteral(BufferPtr, LangOpts))
2081 else if (!LangOpts.CPlusPlus17 &&
2082 std::find(BufferPtr, CurPtr,
'_') != CurPtr)
2086 return LexNumericConstant(
Result, ConsumeChar(CurPtr, Size,
Result));
2090 if (
C ==
'\'' && (LangOpts.CPlusPlus14 || LangOpts.C23)) {
2094 Diag(CurPtr, LangOpts.CPlusPlus
2095 ? diag::warn_cxx11_compat_digit_separator
2096 : diag::warn_c23_compat_digit_separator);
2097 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2098 CurPtr = ConsumeChar(CurPtr, NextSize,
Result);
2099 return LexNumericConstant(
Result, CurPtr);
2104 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
2105 return LexNumericConstant(
Result, CurPtr);
2107 return LexNumericConstant(
Result, CurPtr);
2110 const char *TokStart = BufferPtr;
2111 FormTokenWithChars(
Result, CurPtr, tok::numeric_constant);
2112 Result.setLiteralData(TokStart);
2118const char *Lexer::LexUDSuffix(
Token &
Result,
const char *CurPtr,
2119 bool IsStringLiteral) {
2120 assert(LangOpts.CPlusPlus);
2124 char C = getCharAndSize(CurPtr, Size);
2125 bool Consumed =
false;
2128 if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result))
2130 else if (!
isASCII(
C) && tryConsumeIdentifierUTF8Char(CurPtr,
Result))
2136 if (!LangOpts.CPlusPlus11) {
2139 C ==
'_' ? diag::warn_cxx11_compat_user_defined_literal
2140 : diag::warn_cxx11_compat_reserved_user_defined_literal)
2151 bool IsUDSuffix =
false;
2154 else if (IsStringLiteral && LangOpts.CPlusPlus14) {
2158 const unsigned MaxStandardSuffixLength = 3;
2159 char Buffer[MaxStandardSuffixLength] = {
C };
2160 unsigned Consumed =
Size;
2163 auto [Next, NextSize] =
2167 const StringRef CompleteSuffix(Buffer, Chars);
2173 if (Chars == MaxStandardSuffixLength)
2177 Buffer[Chars++] = Next;
2178 Consumed += NextSize;
2184 Diag(CurPtr, LangOpts.MSVCCompat
2185 ? diag::ext_ms_reserved_user_defined_literal
2186 : diag::ext_reserved_user_defined_literal)
2191 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2196 C = getCharAndSize(CurPtr, Size);
2198 CurPtr = ConsumeChar(CurPtr, Size,
Result);
2199 }
else if (
C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size,
Result)) {
2200 }
else if (!
isASCII(
C) && tryConsumeIdentifierUTF8Char(CurPtr,
Result)) {
2210bool Lexer::LexStringLiteral(
Token &
Result,
const char *CurPtr,
2212 const char *AfterQuote = CurPtr;
2214 const char *NulCharacter =
nullptr;
2217 (Kind == tok::utf8_string_literal ||
2218 Kind == tok::utf16_string_literal ||
2219 Kind == tok::utf32_string_literal))
2220 Diag(BufferPtr, LangOpts.CPlusPlus ? diag::warn_cxx98_compat_unicode_literal
2221 : diag::warn_c99_compat_unicode_literal);
2223 char C = getAndAdvanceChar(CurPtr,
Result);
2228 C = getAndAdvanceChar(CurPtr,
Result);
2230 if (
C ==
'\n' ||
C ==
'\r' ||
2231 (
C == 0 && CurPtr-1 == BufferEnd)) {
2233 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 1;
2234 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2239 if (isCodeCompletionPoint(CurPtr-1)) {
2241 codeCompleteIncludedFile(AfterQuote, CurPtr - 1,
false);
2244 FormTokenWithChars(
Result, CurPtr - 1, tok::unknown);
2249 NulCharacter = CurPtr-1;
2251 C = getAndAdvanceChar(CurPtr,
Result);
2255 if (LangOpts.CPlusPlus)
2256 CurPtr = LexUDSuffix(
Result, CurPtr,
true);
2260 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
2263 const char *TokStart = BufferPtr;
2264 FormTokenWithChars(
Result, CurPtr, Kind);
2265 Result.setLiteralData(TokStart);
2271bool Lexer::LexRawStringLiteral(
Token &
Result,
const char *CurPtr,
2279 Diag(BufferPtr, diag::warn_cxx98_compat_raw_string_literal);
2281 unsigned PrefixLen = 0;
2285 llvm::is_contained({
'$',
'@',
'`'}, CurPtr[PrefixLen])) {
2286 const char *Pos = &CurPtr[PrefixLen];
2287 Diag(Pos, LangOpts.CPlusPlus26
2288 ? diag::warn_cxx26_compat_raw_string_literal_character_set
2289 : diag::ext_cxx26_raw_string_literal_character_set)
2290 << StringRef(Pos, 1);
2296 if (CurPtr[PrefixLen] !=
'(') {
2298 const char *PrefixEnd = &CurPtr[PrefixLen];
2299 if (PrefixLen == 16) {
2300 Diag(PrefixEnd, diag::err_raw_delim_too_long);
2301 }
else if (*PrefixEnd ==
'\n') {
2302 Diag(PrefixEnd, diag::err_invalid_newline_raw_delim);
2304 Diag(PrefixEnd, diag::err_invalid_char_raw_delim)
2305 << StringRef(PrefixEnd, 1);
2317 if (
C == 0 && CurPtr-1 == BufferEnd) {
2323 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2328 const char *Prefix = CurPtr;
2329 CurPtr += PrefixLen + 1;
2336 if (strncmp(CurPtr, Prefix, PrefixLen) == 0 && CurPtr[PrefixLen] ==
'"') {
2337 CurPtr += PrefixLen + 1;
2340 }
else if (
C == 0 && CurPtr-1 == BufferEnd) {
2342 Diag(BufferPtr, diag::err_unterminated_raw_string)
2343 << StringRef(Prefix, PrefixLen);
2344 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2350 if (LangOpts.CPlusPlus)
2351 CurPtr = LexUDSuffix(
Result, CurPtr,
true);
2354 const char *TokStart = BufferPtr;
2355 FormTokenWithChars(
Result, CurPtr, Kind);
2356 Result.setLiteralData(TokStart);
2362bool Lexer::LexAngledStringLiteral(
Token &
Result,
const char *CurPtr) {
2364 const char *NulCharacter =
nullptr;
2365 const char *AfterLessPos = CurPtr;
2366 char C = getAndAdvanceChar(CurPtr,
Result);
2371 C = getAndAdvanceChar(CurPtr,
Result);
2374 (
C == 0 && (CurPtr - 1 == BufferEnd))) {
2377 FormTokenWithChars(
Result, AfterLessPos, tok::less);
2382 if (isCodeCompletionPoint(CurPtr - 1)) {
2383 codeCompleteIncludedFile(AfterLessPos, CurPtr - 1,
true);
2385 FormTokenWithChars(
Result, CurPtr - 1, tok::unknown);
2388 NulCharacter = CurPtr-1;
2390 C = getAndAdvanceChar(CurPtr,
Result);
2395 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
2398 const char *TokStart = BufferPtr;
2399 FormTokenWithChars(
Result, CurPtr, tok::header_name);
2400 Result.setLiteralData(TokStart);
2404void Lexer::codeCompleteIncludedFile(
const char *PathStart,
2405 const char *CompletionPoint,
2408 StringRef PartialPath(PathStart, CompletionPoint - PathStart);
2409 llvm::StringRef SlashChars = LangOpts.MSVCCompat ?
"/\\" :
"/";
2410 auto Slash = PartialPath.find_last_of(SlashChars);
2412 (Slash == StringRef::npos) ?
"" : PartialPath.take_front(Slash);
2413 const char *StartOfFilename =
2414 (Slash == StringRef::npos) ? PathStart : PathStart + Slash + 1;
2417 StringRef(StartOfFilename, CompletionPoint - StartOfFilename)));
2420 while (CompletionPoint < BufferEnd) {
2421 char Next = *(CompletionPoint + 1);
2422 if (Next == 0 || Next ==
'\r' || Next ==
'\n')
2425 if (Next == (IsAngled ?
'>' :
'"'))
2427 if (SlashChars.contains(Next))
2439bool Lexer::LexCharConstant(
Token &
Result,
const char *CurPtr,
2442 const char *NulCharacter =
nullptr;
2445 if (Kind == tok::utf16_char_constant || Kind == tok::utf32_char_constant)
2446 Diag(BufferPtr, LangOpts.CPlusPlus
2447 ? diag::warn_cxx98_compat_unicode_literal
2448 : diag::warn_c99_compat_unicode_literal);
2449 else if (Kind == tok::utf8_char_constant)
2450 Diag(BufferPtr, LangOpts.CPlusPlus
2451 ? diag::warn_cxx14_compat_u8_character_literal
2452 : diag::warn_c17_compat_u8_character_literal);
2455 char C = getAndAdvanceChar(CurPtr,
Result);
2458 Diag(BufferPtr, diag::ext_empty_character);
2459 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2466 C = getAndAdvanceChar(CurPtr,
Result);
2468 if (
C ==
'\n' ||
C ==
'\r' ||
2469 (
C == 0 && CurPtr-1 == BufferEnd)) {
2471 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 0;
2472 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2477 if (isCodeCompletionPoint(CurPtr-1)) {
2479 FormTokenWithChars(
Result, CurPtr-1, tok::unknown);
2484 NulCharacter = CurPtr-1;
2486 C = getAndAdvanceChar(CurPtr,
Result);
2490 if (LangOpts.CPlusPlus)
2491 CurPtr = LexUDSuffix(
Result, CurPtr,
false);
2495 Diag(NulCharacter, diag::null_in_char_or_string) << 0;
2498 const char *TokStart = BufferPtr;
2499 FormTokenWithChars(
Result, CurPtr, Kind);
2500 Result.setLiteralData(TokStart);
2508bool Lexer::SkipWhitespace(
Token &
Result,
const char *CurPtr,
2509 bool &TokAtPhysicalStartOfLine) {
2513 unsigned char Char = *CurPtr;
2515 const char *lastNewLine =
nullptr;
2516 auto setLastNewLine = [&](
const char *Ptr) {
2522 setLastNewLine(CurPtr - 1);
2541 if (*CurPtr ==
'\n')
2542 setLastNewLine(CurPtr);
2549 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2551 IsAtStartOfLine =
true;
2552 IsAtPhysicalStartOfLine =
true;
2559 char PrevChar = CurPtr[-1];
2565 TokAtPhysicalStartOfLine =
true;
2567 if (NewLinePtr && lastNewLine && NewLinePtr != lastNewLine &&
PP) {
2584bool Lexer::SkipLineComment(
Token &
Result,
const char *CurPtr,
2585 bool &TokAtPhysicalStartOfLine) {
2590 Diag(BufferPtr, diag::ext_line_comment);
2608 bool UnicodeDecodingAlreadyDiagnosed =
false;
2615 C !=
'\n' &&
C !=
'\r') {
2617 UnicodeDecodingAlreadyDiagnosed =
false;
2621 unsigned Length = llvm::getUTF8SequenceSize(
2622 (
const llvm::UTF8 *)CurPtr, (
const llvm::UTF8 *)BufferEnd);
2625 Diag(CurPtr, diag::warn_invalid_utf8_in_comment);
2626 UnicodeDecodingAlreadyDiagnosed =
true;
2629 UnicodeDecodingAlreadyDiagnosed =
false;
2635 const char *NextLine = CurPtr;
2638 const char *EscapePtr = CurPtr-1;
2639 bool HasSpace =
false;
2645 if (*EscapePtr ==
'\\')
2648 else if (EscapePtr[0] ==
'/' && EscapePtr[-1] ==
'?' &&
2649 EscapePtr[-2] ==
'?' && LangOpts.Trigraphs)
2651 CurPtr = EscapePtr-2;
2657 Diag(EscapePtr, diag::backslash_newline_space);
2664 const char *OldPtr = CurPtr;
2667 C = getAndAdvanceChar(CurPtr,
Result);
2672 if (
C != 0 && CurPtr == OldPtr+1) {
2680 if (CurPtr != OldPtr + 1 &&
C !=
'/' &&
2681 (CurPtr == BufferEnd + 1 || CurPtr[0] !=
'/')) {
2682 for (; OldPtr != CurPtr; ++OldPtr)
2683 if (OldPtr[0] ==
'\n' || OldPtr[0] ==
'\r') {
2687 const char *ForwardPtr = CurPtr;
2690 if (ForwardPtr[0] ==
'/' && ForwardPtr[1] ==
'/')
2695 Diag(OldPtr-1, diag::ext_multi_line_line_comment);
2700 if (
C ==
'\r' ||
C ==
'\n' || CurPtr == BufferEnd + 1) {
2705 if (
C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
2723 return SaveLineComment(
Result, CurPtr);
2737 NewLinePtr = CurPtr++;
2741 TokAtPhysicalStartOfLine =
true;
2750bool Lexer::SaveLineComment(
Token &
Result,
const char *CurPtr) {
2753 FormTokenWithChars(
Result, CurPtr, tok::comment);
2765 assert(Spelling[0] ==
'/' && Spelling[1] ==
'/' &&
"Not line comment?");
2769 Result.setKind(tok::comment);
2780 assert(CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r');
2783 const char *TrigraphPos =
nullptr;
2785 const char *SpacePos =
nullptr;
2792 if (CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r') {
2794 if (CurPtr[0] == CurPtr[1])
2808 if (*CurPtr ==
'\\') {
2810 }
else if (CurPtr[0] ==
'/' && CurPtr[-1] ==
'?' && CurPtr[-2] ==
'?') {
2812 TrigraphPos = CurPtr - 2;
2823 if (*CurPtr !=
'\n' && *CurPtr !=
'\r')
2832 L->
Diag(TrigraphPos, diag::trigraph_ignored_block_comment);
2836 L->
Diag(TrigraphPos, diag::trigraph_ends_block_comment);
2841 L->
Diag(CurPtr + 1, diag::escaped_newline_block_comment_end);
2845 L->
Diag(SpacePos, diag::backslash_newline_space);
2851#include <emmintrin.h>
2866bool Lexer::SkipBlockComment(
Token &
Result,
const char *CurPtr,
2867 bool &TokAtPhysicalStartOfLine) {
2877 unsigned char C = getCharAndSize(CurPtr, CharSize);
2879 if (
C == 0 && CurPtr == BufferEnd+1) {
2881 Diag(BufferPtr, diag::err_unterminated_block_comment);
2887 FormTokenWithChars(
Result, CurPtr, tok::unknown);
2904 bool UnicodeDecodingAlreadyDiagnosed =
false;
2909 if (CurPtr + 24 < BufferEnd &&
2914 while (
C !=
'/' && (
intptr_t)CurPtr % 16 != 0) {
2919 if (
C ==
'/')
goto FoundSlash;
2923 while (CurPtr + 16 < BufferEnd) {
2925 if (LLVM_UNLIKELY(Mask != 0)) {
2935 CurPtr += llvm::countr_zero<unsigned>(cmp) + 1;
2941 __vector
unsigned char LongUTF = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
2942 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
2943 0x80, 0x80, 0x80, 0x80};
2944 __vector
unsigned char Slashes = {
2945 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/',
2946 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/'
2948 while (CurPtr + 16 < BufferEnd) {
2950 vec_any_ge(*(
const __vector
unsigned char *)CurPtr, LongUTF)))
2952 if (
vec_any_eq(*(
const __vector
unsigned char *)CurPtr, Slashes)) {
2959 while (CurPtr + 16 < BufferEnd) {
2960 bool HasNonASCII =
false;
2961 for (
unsigned I = 0; I < 16; ++I)
2962 HasNonASCII |= !
isASCII(CurPtr[I]);
2964 if (LLVM_UNLIKELY(HasNonASCII))
2967 bool HasSlash =
false;
2968 for (
unsigned I = 0; I < 16; ++I)
2969 HasSlash |= CurPtr[I] ==
'/';
2983 while (
C !=
'/' &&
C !=
'\0') {
2985 UnicodeDecodingAlreadyDiagnosed =
false;
2992 unsigned Length = llvm::getUTF8SequenceSize(
2993 (
const llvm::UTF8 *)CurPtr - 1, (
const llvm::UTF8 *)BufferEnd);
2996 Diag(CurPtr - 1, diag::warn_invalid_utf8_in_comment);
2997 UnicodeDecodingAlreadyDiagnosed =
true;
2999 UnicodeDecodingAlreadyDiagnosed =
false;
3000 CurPtr += Length - 1;
3007 if (CurPtr[-2] ==
'*')
3010 if ((CurPtr[-2] ==
'\n' || CurPtr[-2] ==
'\r')) {
3012 LangOpts.Trigraphs)) {
3018 if (CurPtr[0] ==
'*' && CurPtr[1] !=
'/') {
3023 Diag(CurPtr-1, diag::warn_nested_block_comment);
3025 }
else if (
C == 0 && CurPtr == BufferEnd+1) {
3027 Diag(BufferPtr, diag::err_unterminated_block_comment);
3036 FormTokenWithChars(
Result, CurPtr, tok::unknown);
3042 }
else if (
C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
3061 FormTokenWithChars(
Result, CurPtr, tok::comment);
3070 SkipWhitespace(
Result, CurPtr+1, TokAtPhysicalStartOfLine);
3088 "Must be in a preprocessing directive!");
3093 const char *CurPtr = BufferPtr;
3095 char Char = getAndAdvanceChar(CurPtr, Tmp);
3103 if (CurPtr-1 != BufferEnd) {
3104 if (isCodeCompletionPoint(CurPtr-1)) {
3120 assert(CurPtr[-1] == Char &&
"Trigraphs for newline?");
3121 BufferPtr = CurPtr-1;
3125 if (Tmp.
is(tok::code_completion)) {
3130 assert(Tmp.
is(tok::eod) &&
"Unexpected token!");
3142bool Lexer::LexEndOfFile(
Token &
Result,
const char *CurPtr) {
3150 FormTokenWithChars(
Result, CurPtr, tok::eod);
3162 BufferPtr = BufferEnd;
3163 FormTokenWithChars(
Result, BufferEnd, tok::eof);
3183 diag::err_pp_unterminated_conditional);
3190 if (CurPtr != BufferStart && (CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r'))
3191 Diag(BufferEnd, diag::warn_no_newline_eof)
3203std::optional<Token> Lexer::peekNextPPToken() {
3204 assert(!
LexingRawMode &&
"How can we expand a macro from a skipping buffer?");
3206 if (isDependencyDirectivesLexer()) {
3207 if (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size())
3208 return std::nullopt;
3210 (void)convertDependencyDirectiveToken(
3211 DepDirectives.front().Tokens[NextDepDirectiveTokenIndex],
Result);
3221 const char *TmpBufferPtr = BufferPtr;
3223 bool atStartOfLine = IsAtStartOfLine;
3224 bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
3225 bool leadingSpace = HasLeadingSpace;
3231 BufferPtr = TmpBufferPtr;
3233 HasLeadingSpace = leadingSpace;
3234 IsAtStartOfLine = atStartOfLine;
3235 IsAtPhysicalStartOfLine = atPhysicalStartOfLine;
3239 if (Tok.
is(tok::eof))
3240 return std::nullopt;
3247 const char *Terminator = CMK ==
CMK_Perforce ?
"<<<<\n" :
">>>>>>>";
3249 auto RestOfBuffer = StringRef(CurPtr, BufferEnd - CurPtr).substr(TermLen);
3250 size_t Pos = RestOfBuffer.find(Terminator);
3251 while (Pos != StringRef::npos) {
3254 (RestOfBuffer[Pos - 1] !=
'\r' && RestOfBuffer[Pos - 1] !=
'\n')) {
3255 RestOfBuffer = RestOfBuffer.substr(Pos+TermLen);
3256 Pos = RestOfBuffer.find(Terminator);
3259 return RestOfBuffer.data()+Pos;
3268bool Lexer::IsStartOfConflictMarker(
const char *CurPtr) {
3270 if (CurPtr != BufferStart &&
3271 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
3275 if (!StringRef(CurPtr, BufferEnd - CurPtr).starts_with(
"<<<<<<<") &&
3276 !StringRef(CurPtr, BufferEnd - CurPtr).starts_with(
">>>> "))
3291 Diag(CurPtr, diag::err_conflict_marker);
3292 CurrentConflictMarkerState =
Kind;
3296 while (*CurPtr !=
'\r' && *CurPtr !=
'\n') {
3297 assert(CurPtr != BufferEnd &&
"Didn't find end of line");
3312bool Lexer::HandleEndOfConflictMarker(
const char *CurPtr) {
3314 if (CurPtr != BufferStart &&
3315 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
3324 for (
unsigned i = 1; i != 4; ++i)
3325 if (CurPtr[i] != CurPtr[0])
3332 CurrentConflictMarkerState)) {
3336 while (CurPtr != BufferEnd && *CurPtr !=
'\r' && *CurPtr !=
'\n')
3342 CurrentConflictMarkerState =
CMK_None;
3350 const char *BufferEnd) {
3351 if (CurPtr == BufferEnd)
3354 for (; CurPtr != BufferEnd; ++CurPtr) {
3355 if (CurPtr[0] ==
'#' && CurPtr[1] ==
'>')
3361bool Lexer::lexEditorPlaceholder(
Token &
Result,
const char *CurPtr) {
3362 assert(CurPtr[-1] ==
'<' && CurPtr[0] ==
'#' &&
"Not a placeholder!");
3368 const char *Start = CurPtr - 1;
3369 if (!LangOpts.AllowEditorPlaceholders)
3370 Diag(Start, diag::err_placeholder_in_source);
3372 FormTokenWithChars(
Result, End, tok::raw_identifier);
3373 Result.setRawIdentifierData(Start);
3380bool Lexer::isCodeCompletionPoint(
const char *CurPtr)
const {
3394 if (Opts.CPlusPlus23)
3395 DiagId = diag::warn_cxx23_delimited_escape_sequence;
3396 else if (Opts.C2y && !Named)
3397 DiagId = diag::warn_c2y_delimited_escape_sequence;
3399 DiagId = diag::ext_delimited_escape_sequence;
3405 if (!Opts.CPlusPlus)
3406 Ext = Named ? 2 : 1 ;
3410 Diags.
Report(
Loc, DiagId) << Named << Ext;
3413std::optional<uint32_t> Lexer::tryReadNumericUCN(
const char *&StartPtr,
3414 const char *SlashLoc,
3417 char Kind = getCharAndSize(StartPtr, CharSize);
3418 assert((Kind ==
'u' || Kind ==
'U') &&
"expected a UCN");
3420 unsigned NumHexDigits;
3423 else if (Kind ==
'U')
3426 bool Delimited =
false;
3427 bool FoundEndDelimiter =
false;
3431 if (!LangOpts.CPlusPlus && !LangOpts.C99) {
3433 Diag(SlashLoc, diag::warn_ucn_not_valid_in_c89);
3434 return std::nullopt;
3437 const char *CurPtr = StartPtr + CharSize;
3438 const char *KindLoc = &CurPtr[-1];
3440 uint32_t CodePoint = 0;
3441 while (Count != NumHexDigits || Delimited) {
3442 char C = getCharAndSize(CurPtr, CharSize);
3443 if (!Delimited && Count == 0 &&
C ==
'{') {
3449 if (Delimited &&
C ==
'}') {
3451 FoundEndDelimiter =
true;
3455 unsigned Value = llvm::hexDigitValue(
C);
3456 if (
Value == std::numeric_limits<unsigned>::max()) {
3460 Diag(SlashLoc, diag::warn_delimited_ucn_incomplete)
3461 << StringRef(KindLoc, 1);
3462 return std::nullopt;
3465 if (CodePoint & 0xF000'0000) {
3467 Diag(KindLoc, diag::err_escape_too_large) << 0;
3468 return std::nullopt;
3479 Diag(SlashLoc, FoundEndDelimiter ? diag::warn_delimited_ucn_empty
3480 : diag::warn_ucn_escape_no_digits)
3481 << StringRef(KindLoc, 1);
3482 return std::nullopt;
3485 if (Delimited && Kind ==
'U') {
3487 Diag(SlashLoc, diag::err_hex_escape_no_digits) << StringRef(KindLoc, 1);
3488 return std::nullopt;
3491 if (!Delimited && Count != NumHexDigits) {
3493 Diag(SlashLoc, diag::warn_ucn_escape_incomplete);
3495 if (Count == 4 && NumHexDigits == 8) {
3497 Diag(KindLoc, diag::note_ucn_four_not_eight)
3501 return std::nullopt;
3504 if (Delimited &&
PP)
3514 if (CurPtr - StartPtr == (
ptrdiff_t)(Count + 1 + (Delimited ? 2 : 0)))
3517 while (StartPtr != CurPtr)
3518 (void)getAndAdvanceChar(StartPtr, *
Result);
3525std::optional<uint32_t> Lexer::tryReadNamedUCN(
const char *&StartPtr,
3526 const char *SlashLoc,
3531 char C = getCharAndSize(StartPtr, CharSize);
3532 assert(
C ==
'N' &&
"expected \\N{...}");
3534 const char *CurPtr = StartPtr + CharSize;
3535 const char *KindLoc = &CurPtr[-1];
3537 C = getCharAndSize(CurPtr, CharSize);
3540 Diag(SlashLoc, diag::warn_ucn_escape_incomplete);
3541 return std::nullopt;
3544 const char *StartName = CurPtr;
3545 bool FoundEndDelimiter =
false;
3548 C = getCharAndSize(CurPtr, CharSize);
3551 FoundEndDelimiter =
true;
3557 Buffer.push_back(
C);
3560 if (!FoundEndDelimiter || Buffer.empty()) {
3562 Diag(SlashLoc, FoundEndDelimiter ? diag::warn_delimited_ucn_empty
3563 : diag::warn_delimited_ucn_incomplete)
3564 << StringRef(KindLoc, 1);
3565 return std::nullopt;
3568 StringRef Name(Buffer.data(), Buffer.size());
3569 std::optional<char32_t>
Match =
3570 llvm::sys::unicode::nameToCodepointStrict(Name);
3571 std::optional<llvm::sys::unicode::LooseMatchingResult> LooseMatch;
3573 LooseMatch = llvm::sys::unicode::nameToCodepointLooseMatching(Name);
3575 Diag(StartName, diag::err_invalid_ucn_name)
3576 << StringRef(Buffer.data(), Buffer.size())
3579 Diag(StartName, diag::note_invalid_ucn_name_loose_matching)
3590 if (Diagnose &&
Match)
3599 if (LooseMatch && Diagnose)
3600 Match = LooseMatch->CodePoint;
3607 if (CurPtr - StartPtr == (
ptrdiff_t)(Buffer.size() + 3))
3610 while (StartPtr != CurPtr)
3611 (void)getAndAdvanceChar(StartPtr, *
Result);
3615 return Match ? std::optional<uint32_t>(*
Match) :
std::nullopt;
3618uint32_t Lexer::tryReadUCN(
const char *&StartPtr,
const char *SlashLoc,
3622 std::optional<uint32_t> CodePointOpt;
3623 char Kind = getCharAndSize(StartPtr, CharSize);
3624 if (Kind ==
'u' || Kind ==
'U')
3625 CodePointOpt = tryReadNumericUCN(StartPtr, SlashLoc,
Result);
3626 else if (Kind ==
'N')
3627 CodePointOpt = tryReadNamedUCN(StartPtr, SlashLoc,
Result);
3632 uint32_t CodePoint = *CodePointOpt;
3635 if (LangOpts.AsmPreprocessor)
3654 if (CodePoint < 0xA0) {
3658 if (CodePoint < 0x20 || CodePoint >= 0x7F)
3659 Diag(BufferPtr, diag::err_ucn_control_character);
3661 char C =
static_cast<char>(CodePoint);
3662 Diag(BufferPtr, diag::err_ucn_escape_basic_scs) << StringRef(&
C, 1);
3667 }
else if (CodePoint >= 0xD800 && CodePoint <= 0xDFFF) {
3672 if (LangOpts.CPlusPlus && !LangOpts.CPlusPlus11)
3673 Diag(BufferPtr, diag::warn_ucn_escape_surrogate);
3675 Diag(BufferPtr, diag::err_ucn_escape_invalid);
3683bool Lexer::CheckUnicodeWhitespace(
Token &
Result, uint32_t
C,
3684 const char *CurPtr) {
3687 Diag(BufferPtr, diag::ext_unicode_whitespace)
3696void Lexer::PropagateLineStartLeadingSpaceInfo(
Token &
Result) {
3697 IsAtStartOfLine =
Result.isAtStartOfLine();
3698 HasLeadingSpace =
Result.hasLeadingSpace();
3699 HasLeadingEmptyMacro =
Result.hasLeadingEmptyMacro();
3704 assert(!isDependencyDirectivesLexer());
3710 if (IsAtStartOfLine) {
3712 IsAtStartOfLine =
false;
3715 if (HasLeadingSpace) {
3717 HasLeadingSpace =
false;
3720 if (HasLeadingEmptyMacro) {
3722 HasLeadingEmptyMacro =
false;
3725 bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
3726 IsAtPhysicalStartOfLine =
false;
3729 bool returnedToken = LexTokenInternal(
Result, atPhysicalStartOfLine);
3731 assert((returnedToken || !isRawLex) &&
"Raw lex must succeed");
3732 return returnedToken;
3740bool Lexer::LexTokenInternal(
Token &
Result,
bool TokAtPhysicalStartOfLine) {
3742 assert(!
Result.needsCleaning() &&
"Result needs cleaning");
3743 assert(!
Result.hasPtrData() &&
"Result has not been reset");
3746 const char *CurPtr = BufferPtr;
3758 FormTokenWithChars(
Result, CurPtr, tok::unknown);
3767 unsigned SizeTmp, SizeTmp2;
3770 char Char = getAndAdvanceChar(CurPtr,
Result);
3774 NewLinePtr =
nullptr;
3779 if (CurPtr-1 == BufferEnd)
3780 return LexEndOfFile(
Result, CurPtr-1);
3783 if (isCodeCompletionPoint(CurPtr-1)) {
3786 FormTokenWithChars(
Result, CurPtr, tok::code_completion);
3791 Diag(CurPtr-1, diag::null_in_file);
3793 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
3802 if (LangOpts.MicrosoftExt) {
3804 Diag(CurPtr-1, diag::ext_ctrl_z_eof_microsoft);
3805 return LexEndOfFile(
Result, CurPtr-1);
3809 Kind = tok::unknown;
3813 if (CurPtr[0] ==
'\n')
3814 (void)getAndAdvanceChar(CurPtr,
Result);
3828 IsAtStartOfLine =
true;
3829 IsAtPhysicalStartOfLine =
true;
3830 NewLinePtr = CurPtr - 1;
3839 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
3849 SkipHorizontalWhitespace:
3851 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
3860 LineComment && (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP)) {
3861 if (SkipLineComment(
Result, CurPtr+2, TokAtPhysicalStartOfLine))
3863 goto SkipIgnoredUnits;
3865 if (SkipBlockComment(
Result, CurPtr+2, TokAtPhysicalStartOfLine))
3867 goto SkipIgnoredUnits;
3869 goto SkipHorizontalWhitespace;
3877 case '0':
case '1':
case '2':
case '3':
case '4':
3878 case '5':
case '6':
case '7':
case '8':
case '9':
3881 return LexNumericConstant(
Result, CurPtr);
3890 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
3891 Char = getCharAndSize(CurPtr, SizeTmp);
3895 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3896 tok::utf16_string_literal);
3900 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3901 tok::utf16_char_constant);
3904 if (Char ==
'R' && LangOpts.RawStringLiterals &&
3905 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3906 return LexRawStringLiteral(
Result,
3907 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3909 tok::utf16_string_literal);
3912 char Char2 = getCharAndSize(CurPtr + SizeTmp, SizeTmp2);
3916 return LexStringLiteral(
Result,
3917 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3919 tok::utf8_string_literal);
3920 if (Char2 ==
'\'' && (LangOpts.CPlusPlus17 || LangOpts.C23))
3921 return LexCharConstant(
3922 Result, ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3924 tok::utf8_char_constant);
3926 if (Char2 ==
'R' && LangOpts.RawStringLiterals) {
3928 char Char3 = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
3931 return LexRawStringLiteral(
Result,
3932 ConsumeChar(ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3935 tok::utf8_string_literal);
3942 return LexIdentifierContinue(
Result, CurPtr);
3948 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
3949 Char = getCharAndSize(CurPtr, SizeTmp);
3953 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3954 tok::utf32_string_literal);
3958 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3959 tok::utf32_char_constant);
3962 if (Char ==
'R' && LangOpts.RawStringLiterals &&
3963 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3964 return LexRawStringLiteral(
Result,
3965 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
3967 tok::utf32_string_literal);
3971 return LexIdentifierContinue(
Result, CurPtr);
3977 if (LangOpts.RawStringLiterals) {
3978 Char = getCharAndSize(CurPtr, SizeTmp);
3981 return LexRawStringLiteral(
Result,
3982 ConsumeChar(CurPtr, SizeTmp,
Result),
3983 tok::string_literal);
3987 return LexIdentifierContinue(
Result, CurPtr);
3992 Char = getCharAndSize(CurPtr, SizeTmp);
3996 return LexStringLiteral(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
3997 tok::wide_string_literal);
4000 if (LangOpts.RawStringLiterals && Char ==
'R' &&
4001 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
4002 return LexRawStringLiteral(
Result,
4003 ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4005 tok::wide_string_literal);
4009 return LexCharConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
4010 tok::wide_char_constant);
4015 case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
case 'G':
4016 case 'H':
case 'I':
case 'J':
case 'K':
case 'M':
case 'N':
4017 case 'O':
case 'P':
case 'Q':
case 'S':
case 'T':
4018 case 'V':
case 'W':
case 'X':
case 'Y':
case 'Z':
4019 case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
case 'g':
4020 case 'h':
case 'i':
case 'j':
case 'k':
case 'l':
case 'm':
case 'n':
4021 case 'o':
case 'p':
case 'q':
case 'r':
case 's':
case 't':
4022 case 'v':
case 'w':
case 'x':
case 'y':
case 'z':
4026 return LexIdentifierContinue(
Result, CurPtr);
4029 if (LangOpts.DollarIdents) {
4031 Diag(CurPtr-1, diag::ext_dollar_in_identifier);
4034 return LexIdentifierContinue(
Result, CurPtr);
4037 Kind = tok::unknown;
4044 return LexCharConstant(
Result, CurPtr, tok::char_constant);
4050 return LexStringLiteral(
Result, CurPtr,
4052 : tok::string_literal);
4056 Kind = tok::question;
4059 Kind = tok::l_square;
4062 Kind = tok::r_square;
4065 Kind = tok::l_paren;
4068 Kind = tok::r_paren;
4071 Kind = tok::l_brace;
4074 Kind = tok::r_brace;
4077 Char = getCharAndSize(CurPtr, SizeTmp);
4078 if (Char >=
'0' && Char <=
'9') {
4082 return LexNumericConstant(
Result, ConsumeChar(CurPtr, SizeTmp,
Result));
4083 }
else if (LangOpts.CPlusPlus && Char ==
'*') {
4084 Kind = tok::periodstar;
4086 }
else if (Char ==
'.' &&
4087 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'.') {
4088 Kind = tok::ellipsis;
4089 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4096 Char = getCharAndSize(CurPtr, SizeTmp);
4099 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4100 }
else if (Char ==
'=') {
4101 Kind = tok::ampequal;
4102 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4108 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
4109 Kind = tok::starequal;
4110 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4116 Char = getCharAndSize(CurPtr, SizeTmp);
4118 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4119 Kind = tok::plusplus;
4120 }
else if (Char ==
'=') {
4121 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4122 Kind = tok::plusequal;
4128 Char = getCharAndSize(CurPtr, SizeTmp);
4130 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4131 Kind = tok::minusminus;
4132 }
else if (Char ==
'>' && LangOpts.CPlusPlus &&
4133 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'*') {
4134 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4136 Kind = tok::arrowstar;
4137 }
else if (Char ==
'>') {
4138 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4140 }
else if (Char ==
'=') {
4141 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4142 Kind = tok::minusequal;
4151 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
4152 Kind = tok::exclaimequal;
4153 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4155 Kind = tok::exclaim;
4160 Char = getCharAndSize(CurPtr, SizeTmp);
4170 bool TreatAsComment =
4171 LineComment && (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP);
4172 if (!TreatAsComment)
4174 TreatAsComment = getCharAndSize(CurPtr+SizeTmp, SizeTmp2) !=
'*';
4176 if (TreatAsComment) {
4177 if (SkipLineComment(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
4178 TokAtPhysicalStartOfLine))
4184 goto SkipIgnoredUnits;
4189 if (SkipBlockComment(
Result, ConsumeChar(CurPtr, SizeTmp,
Result),
4190 TokAtPhysicalStartOfLine))
4199 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4200 Kind = tok::slashequal;
4206 Char = getCharAndSize(CurPtr, SizeTmp);
4208 Kind = tok::percentequal;
4209 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4210 }
else if (LangOpts.Digraphs && Char ==
'>') {
4211 Kind = tok::r_brace;
4212 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4213 }
else if (LangOpts.Digraphs && Char ==
':') {
4214 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4215 Char = getCharAndSize(CurPtr, SizeTmp);
4216 if (Char ==
'%' && getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
':') {
4217 Kind = tok::hashhash;
4218 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4220 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
4221 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4223 Diag(BufferPtr, diag::ext_charize_microsoft);
4230 if (TokAtPhysicalStartOfLine && !
LexingRawMode && !Is_PragmaLexer)
4231 goto HandleDirective;
4236 Kind = tok::percent;
4240 Char = getCharAndSize(CurPtr, SizeTmp);
4242 return LexAngledStringLiteral(
Result, CurPtr);
4243 }
else if (Char ==
'<') {
4244 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4246 Kind = tok::lesslessequal;
4247 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4249 }
else if (After ==
'<' && IsStartOfConflictMarker(CurPtr-1)) {
4253 }
else if (After ==
'<' && HandleEndOfConflictMarker(CurPtr-1)) {
4257 }
else if (LangOpts.CUDA && After ==
'<') {
4258 Kind = tok::lesslessless;
4259 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4262 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4263 Kind = tok::lessless;
4265 }
else if (Char ==
'=') {
4266 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4268 if (LangOpts.CPlusPlus20) {
4270 Diag(BufferPtr, diag::warn_cxx17_compat_spaceship);
4271 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4273 Kind = tok::spaceship;
4279 Diag(BufferPtr, diag::warn_cxx20_compat_spaceship)
4284 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4285 Kind = tok::lessequal;
4286 }
else if (LangOpts.Digraphs && Char ==
':') {
4287 if (LangOpts.CPlusPlus11 &&
4288 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
':') {
4295 char After = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
4296 if (After !=
':' && After !=
'>') {
4299 Diag(BufferPtr, diag::warn_cxx98_compat_less_colon_colon);
4304 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4305 Kind = tok::l_square;
4306 }
else if (LangOpts.Digraphs && Char ==
'%') {
4307 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4308 Kind = tok::l_brace;
4309 }
else if (Char ==
'#' && SizeTmp == 1 &&
4310 lexEditorPlaceholder(
Result, CurPtr)) {
4317 Char = getCharAndSize(CurPtr, SizeTmp);
4319 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4320 Kind = tok::greaterequal;
4321 }
else if (Char ==
'>') {
4322 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
4324 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4326 Kind = tok::greatergreaterequal;
4327 }
else if (After ==
'>' && IsStartOfConflictMarker(CurPtr-1)) {
4331 }
else if (After ==
'>' && HandleEndOfConflictMarker(CurPtr-1)) {
4334 }
else if (LangOpts.CUDA && After ==
'>') {
4335 Kind = tok::greatergreatergreater;
4336 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp,
Result),
4339 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4340 Kind = tok::greatergreater;
4343 Kind = tok::greater;
4347 Char = getCharAndSize(CurPtr, SizeTmp);
4349 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4350 Kind = tok::caretequal;
4352 if (LangOpts.OpenCL && Char ==
'^')
4353 Diag(CurPtr, diag::err_opencl_logical_exclusive_or);
4358 Char = getCharAndSize(CurPtr, SizeTmp);
4360 Kind = tok::pipeequal;
4361 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4362 }
else if (Char ==
'|') {
4364 if (CurPtr[1] ==
'|' && HandleEndOfConflictMarker(CurPtr-1))
4366 Kind = tok::pipepipe;
4367 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4373 Char = getCharAndSize(CurPtr, SizeTmp);
4374 if (LangOpts.Digraphs && Char ==
'>') {
4375 Kind = tok::r_square;
4376 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4377 }
else if (Char ==
':') {
4378 Kind = tok::coloncolon;
4379 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4388 Char = getCharAndSize(CurPtr, SizeTmp);
4391 if (CurPtr[1] ==
'=' && HandleEndOfConflictMarker(CurPtr-1))
4394 Kind = tok::equalequal;
4395 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4404 Char = getCharAndSize(CurPtr, SizeTmp);
4406 Kind = tok::hashhash;
4407 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4408 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
4411 Diag(BufferPtr, diag::ext_charize_microsoft);
4412 CurPtr = ConsumeChar(CurPtr, SizeTmp,
Result);
4418 if (TokAtPhysicalStartOfLine && !
LexingRawMode && !Is_PragmaLexer)
4419 goto HandleDirective;
4427 if (CurPtr[-1] ==
'@' && LangOpts.ObjC)
4430 Kind = tok::unknown;
4435 if (!LangOpts.AsmPreprocessor) {
4436 if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &
Result)) {
4437 if (CheckUnicodeWhitespace(
Result, CodePoint, CurPtr)) {
4438 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
4446 return LexUnicodeIdentifierStart(
Result, CodePoint, CurPtr);
4450 Kind = tok::unknown;
4455 Kind = tok::unknown;
4459 llvm::UTF32 CodePoint;
4464 llvm::ConversionResult Status =
4465 llvm::convertUTF8Sequence((
const llvm::UTF8 **)&CurPtr,
4466 (
const llvm::UTF8 *)BufferEnd,
4468 llvm::strictConversion);
4469 if (Status == llvm::conversionOK) {
4470 if (CheckUnicodeWhitespace(
Result, CodePoint, CurPtr)) {
4471 if (SkipWhitespace(
Result, CurPtr, TokAtPhysicalStartOfLine))
4478 return LexUnicodeIdentifierStart(
Result, CodePoint, CurPtr);
4484 Kind = tok::unknown;
4491 Diag(CurPtr, diag::err_invalid_utf8);
4493 BufferPtr = CurPtr+1;
4505 FormTokenWithChars(
Result, CurPtr, Kind);
4511 FormTokenWithChars(
Result, CurPtr, tok::hash);
4526const char *Lexer::convertDependencyDirectiveToken(
4528 const char *TokPtr = BufferStart + DDTok.
Offset;
4534 BufferPtr = TokPtr + DDTok.
Length;
4538bool Lexer::LexDependencyDirectiveToken(
Token &
Result) {
4539 assert(isDependencyDirectivesLexer());
4541 using namespace dependency_directives_scan;
4543 if (BufferPtr == BufferEnd)
4544 return LexEndOfFile(
Result, BufferPtr);
4546 while (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size()) {
4547 if (DepDirectives.front().Kind == pp_eof)
4548 return LexEndOfFile(
Result, BufferEnd);
4549 if (DepDirectives.front().Kind == tokens_present_before_eof)
4551 NextDepDirectiveTokenIndex = 0;
4552 DepDirectives = DepDirectives.drop_front();
4556 DepDirectives.front().Tokens[NextDepDirectiveTokenIndex++];
4557 if (NextDepDirectiveTokenIndex > 1 || DDTok.
Kind != tok::hash) {
4563 BufferPtr = BufferStart + DDTok.
Offset;
4564 LexAngledStringLiteral(
Result, BufferPtr + 1);
4565 if (
Result.isNot(tok::header_name))
4570 DepDirectives.front().Tokens[NextDepDirectiveTokenIndex];
4571 if (BufferStart + NextTok.
Offset >= BufferPtr)
4573 ++NextDepDirectiveTokenIndex;
4578 const char *TokPtr = convertDependencyDirectiveToken(DDTok,
Result);
4580 if (
Result.is(tok::hash) &&
Result.isAtStartOfLine()) {
4587 if (
Result.is(tok::raw_identifier)) {
4588 Result.setRawIdentifierData(TokPtr);
4596 if (
Result.isLiteral()) {
4597 Result.setLiteralData(TokPtr);
4600 if (
Result.is(tok::colon)) {
4602 if (*BufferPtr ==
':') {
4603 assert(DepDirectives.front().Tokens[NextDepDirectiveTokenIndex].is(
4605 ++NextDepDirectiveTokenIndex;
4606 Result.setKind(tok::coloncolon);
4616bool Lexer::LexDependencyDirectiveTokenWhileSkipping(
Token &
Result) {
4617 assert(isDependencyDirectivesLexer());
4619 using namespace dependency_directives_scan;
4622 unsigned NestedIfs = 0;
4624 DepDirectives = DepDirectives.drop_front();
4625 switch (DepDirectives.front().Kind) {
4627 llvm_unreachable(
"unexpected 'pp_none'");
4668 NextDepDirectiveTokenIndex = 0;
4669 return LexEndOfFile(
Result, BufferEnd);
4674 DepDirectives.front().Tokens.front();
4675 assert(DDTok.
is(tok::hash));
4676 NextDepDirectiveTokenIndex = 1;
4678 convertDependencyDirectiveToken(DDTok,
Result);
Defines the Diagnostic-related interfaces.
Defines the clang::IdentifierInfo, clang::IdentifierTable, and clang::Selector interfaces.
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified.
Defines the clang::LangOptions interface.
static bool isInExpansionTokenRange(const SourceLocation Loc, const SourceManager &SM)
static bool isMathematicalExtensionID(uint32_t C, const LangOptions &LangOpts, bool IsStart, bool &IsExtension)
static void diagnoseInvalidUnicodeCodepointInIdentifier(DiagnosticsEngine &Diags, const LangOptions &LangOpts, uint32_t CodePoint, CharSourceRange Range, bool IsFirst)
static char DecodeTrigraphChar(const char *CP, Lexer *L, bool Trigraphs)
DecodeTrigraphChar - If the specified character is a legal trigraph when prefixed with ?...
static size_t getSpellingSlow(const Token &Tok, const char *BufPtr, const LangOptions &LangOpts, char *Spelling)
Slow case of getSpelling.
static const char * FindConflictEnd(const char *CurPtr, const char *BufferEnd, ConflictMarkerKind CMK)
Find the end of a version control conflict marker.
static void maybeDiagnoseUTF8Homoglyph(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range)
After encountering UTF-8 character C and interpreting it as an identifier character,...
static SourceLocation getBeginningOfFileToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
static void StringifyImpl(T &Str, char Quote)
static LLVM_ATTRIBUTE_NOINLINE SourceLocation GetMappedTokenLoc(Preprocessor &PP, SourceLocation FileLoc, unsigned CharNo, unsigned TokLen)
GetMappedTokenLoc - If lexing out of a 'mapped buffer', where we pretend the lexer buffer was all exp...
static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts, bool &IsExtension)
static CharSourceRange makeCharRange(Lexer &L, const char *Begin, const char *End)
static bool isUnicodeWhitespace(uint32_t Codepoint)
static void diagnoseExtensionInIdentifier(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range)
static const char * findPlaceholderEnd(const char *CurPtr, const char *BufferEnd)
static llvm::SmallString< 5 > codepointAsHexString(uint32_t C)
static CharSourceRange makeRangeFromFileLocs(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, Lexer *L, bool Trigraphs)
isBlockCommentEndOfEscapedNewLine - Return true if the specified newline character (either \n or \r) ...
static const char * fastParseASCIIIdentifier(const char *CurPtr, const char *BufferEnd)
static char GetTrigraphCharForLetter(char Letter)
GetTrigraphCharForLetter - Given a character that occurs after a ?? pair, return the decoded trigraph...
static bool isAllowedInitiallyIDChar(uint32_t C, const LangOptions &LangOpts, bool &IsExtension)
static void maybeDiagnoseIDCharCompat(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range, bool IsFirst)
static const char * findBeginningOfLine(StringRef Buffer, unsigned Offset)
Returns the pointer that points to the beginning of line that contains the given offset,...
Defines the MultipleIncludeOpt interface.
Defines the clang::Preprocessor interface.
Defines the clang::SourceLocation class and associated facilities.
Defines the SourceManager interface.
Defines the clang::TokenKind enum and support functions.
static const llvm::sys::UnicodeCharRange C11DisallowedInitialIDCharRanges[]
static const llvm::sys::UnicodeCharRange C99DisallowedInitialIDCharRanges[]
static const llvm::sys::UnicodeCharRange UnicodeWhitespaceCharRanges[]
static const llvm::sys::UnicodeCharRange C99AllowedIDCharRanges[]
static const llvm::sys::UnicodeCharRange C11AllowedIDCharRanges[]
static const llvm::sys::UnicodeCharRange MathematicalNotationProfileIDStartRanges[]
static const llvm::sys::UnicodeCharRange MathematicalNotationProfileIDContinueRanges[]
static const llvm::sys::UnicodeCharRange XIDStartRanges[]
static const llvm::sys::UnicodeCharRange XIDContinueRanges[]
__DEVICE__ void * memcpy(void *__a, const void *__b, size_t __c)
__device__ __2f16 float c
__PTRDIFF_TYPE__ ptrdiff_t
static __inline__ int __ATTRS_o_ai vec_any_ge(vector signed char __a, vector signed char __b)
static __inline__ int __ATTRS_o_ai vec_any_eq(vector signed char __a, vector signed char __b)
Represents a character-granular source range.
static CharSourceRange getCharRange(SourceRange R)
SourceLocation getEnd() const
SourceLocation getBegin() const
A little helper class used to produce diagnostics.
Concrete class used by the front-end to report problems and issues.
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
bool isIgnored(unsigned DiagID, SourceLocation Loc) const
Determine whether the diagnostic is known to be ignored.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
static FixItHint CreateReplacement(CharSourceRange RemoveRange, StringRef Code)
Create a code modification hint that replaces the given source range with the given code string.
static FixItHint CreateRemoval(CharSourceRange RemoveRange)
Create a code modification hint that removes the given source range.
static FixItHint CreateInsertion(SourceLocation InsertionLoc, StringRef Code, bool BeforePreviousInsertions=false)
Create a code modification hint that inserts the given code string at a specific location.
One of these records is kept for each identifier that is lexed.
bool isHandleIdentifierCase() const
Return true if the Preprocessor::HandleIdentifier must be called on a token of this identifier.
bool isKeyword(const LangOptions &LangOpts) const
Return true if this token is a keyword in the specified language.
tok::ObjCKeywordKind getObjCKeywordID() const
Return the Objective-C keyword ID for the this identifier.
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens.
static StringRef getSourceText(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts, bool *Invalid=nullptr)
Returns a string for the source that the range encompasses.
void SetKeepWhitespaceMode(bool Val)
SetKeepWhitespaceMode - This method lets clients enable or disable whitespace retention mode.
static SourceLocation findLocationAfterToken(SourceLocation loc, tok::TokenKind TKind, const SourceManager &SM, const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine)
Checks that the given token is the first token that occurs after the given location (this excludes co...
bool LexFromRawLexer(Token &Result)
LexFromRawLexer - Lex a token from a designated raw lexer (one with no associated preprocessor object...
static unsigned getEscapedNewLineSize(const char *P)
getEscapedNewLineSize - Return the size of the specified escaped newline, or 0 if it is not an escape...
bool inKeepCommentMode() const
inKeepCommentMode - Return true if the lexer should return comments as tokens.
void SetCommentRetentionState(bool Mode)
SetCommentRetentionMode - Change the comment retention mode of the lexer to the specified mode.
static std::optional< Token > findPreviousToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts, bool IncludeComments)
Finds the token that comes before the given location.
void seek(unsigned Offset, bool IsAtStartOfLine)
Set the lexer's buffer pointer to Offset.
static StringRef getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
void ReadToEndOfLine(SmallVectorImpl< char > *Result=nullptr)
ReadToEndOfLine - Read the rest of the current preprocessor line as an uninterpreted string.
static bool isAtStartOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroBegin=nullptr)
Returns true if the given MacroID location points at the first token of the macro expansion.
DiagnosticBuilder Diag(const char *Loc, unsigned DiagID) const
Diag - Forwarding function for diagnostics.
const char * getBufferLocation() const
Return the current location in the buffer.
bool Lex(Token &Result)
Lex - Return the next token in the file.
bool isPragmaLexer() const
isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.
static void DiagnoseDelimitedOrNamedEscapeSequence(SourceLocation Loc, bool Named, const LangOptions &Opts, DiagnosticsEngine &Diags)
Diagnose use of a delimited or named escape sequence.
static unsigned getTokenPrefixLength(SourceLocation TokStart, unsigned CharNo, const SourceManager &SM, const LangOptions &LangOpts)
Get the physical length (including trigraphs and escaped newlines) of the first Characters characters...
Lexer(FileID FID, const llvm::MemoryBufferRef &InputFile, Preprocessor &PP, bool IsFirstIncludeOfFile=true)
Lexer constructor - Create a new lexer object for the specified buffer with the specified preprocesso...
static bool isAtEndOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroEnd=nullptr)
Returns true if the given MacroID location points at the last token of the macro expansion.
SourceLocation getSourceLocation() override
getSourceLocation - Return a source location for the next character in the current file.
static CharSourceRange makeFileCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Accepts a range and returns a character range with file locations.
static bool isNewLineEscaped(const char *BufferStart, const char *Str)
Checks whether new line pointed by Str is preceded by escape sequence.
SourceLocation getSourceLocation(const char *Loc, unsigned TokLen=1) const
getSourceLocation - Return a source location identifier for the specified offset in the current file.
static StringRef getIndentationForLine(SourceLocation Loc, const SourceManager &SM)
Returns the leading whitespace for line that corresponds to the given location Loc.
static unsigned getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr)
getSpelling - This method is used to get the spelling of a token into a preallocated buffer,...
bool isKeepWhitespaceMode() const
isKeepWhitespaceMode - Return true if the lexer should return tokens for every character in the file,...
static bool isAsciiIdentifierContinueChar(char c, const LangOptions &LangOpts)
Returns true if the given character could appear in an identifier.
static std::optional< Token > findNextToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts, bool IncludeComments=false)
Finds the token that comes right after the given location.
static unsigned MeasureTokenLength(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
MeasureTokenLength - Relex the token at the specified location and return its length in bytes in the ...
static SourceLocation GetBeginningOfToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Given a location any where in a source buffer, find the location that corresponds to the beginning of...
void resetExtendedTokenMode()
Sets the extended token mode back to its initial value, according to the language options and preproc...
static StringRef getImmediateMacroNameForDiagnostics(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
static Lexer * Create_PragmaLexer(SourceLocation SpellingLoc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLen, Preprocessor &PP)
Create_PragmaLexer: Lexer constructor - Create a new lexer object for _Pragma expansion.
static PreambleBounds ComputePreamble(StringRef Buffer, const LangOptions &LangOpts, unsigned MaxLines=0)
Compute the preamble of the given file.
static bool getRawToken(SourceLocation Loc, Token &Result, const SourceManager &SM, const LangOptions &LangOpts, bool IgnoreWhiteSpace=false)
Relex the token at the specified location.
static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset, const SourceManager &SM, const LangOptions &LangOpts)
Computes the source location just past the end of the token at this source location.
static std::string Stringify(StringRef Str, bool Charify=false)
Stringify - Convert the specified string into a C string by i) escaping '\' and " characters and ii) ...
static SizedChar getCharAndSizeNoWarn(const char *Ptr, const LangOptions &LangOpts)
getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever emit a warning.
void ExitTopLevelConditional()
Called when the lexer exits the top-level conditional.
bool LexingRawMode
True if in raw mode.
SmallVector< PPConditionalInfo, 4 > ConditionalStack
Information about the set of #if/#ifdef/#ifndef blocks we are currently in.
bool ParsingPreprocessorDirective
True when parsing #XXX; turns '\n' into a tok::eod token.
MultipleIncludeOpt MIOpt
A state machine that detects the #ifndef-wrapping a file idiom for the multiple-include optimization.
bool ParsingFilename
True after #include; turns <xx> or "xxx" into a tok::header_name token.
bool isLexingRawMode() const
Return true if this lexer is in raw mode or not.
const FileID FID
The SourceManager FileID corresponding to the file being lexed.
bool LexEditorPlaceholders
When enabled, the preprocessor will construct editor placeholder tokens.
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
SourceLocation getCodeCompletionLoc() const
Returns the location of the code-completion point.
SourceLocation getCodeCompletionFileLoc() const
Returns the start location of the file of code-completion point.
void setCodeCompletionTokenRange(const SourceLocation Start, const SourceLocation End)
Set the code completion token range for detecting replacement range later on.
bool isRecordingPreamble() const
void setRecordedPreambleConditionalStack(ArrayRef< PPConditionalInfo > s)
bool isInPrimaryFile() const
Return true if we're in the top-level file, not in a #include.
void CreateString(StringRef Str, Token &Tok, SourceLocation ExpansionLocStart=SourceLocation(), SourceLocation ExpansionLocEnd=SourceLocation())
Plop the specified string into a scratch buffer and set the specified token's location and length to ...
IdentifierInfo * LookUpIdentifierInfo(Token &Identifier) const
Given a tok::raw_identifier token, look up the identifier information for the token and install it in...
bool isPreprocessedOutput() const
Returns true if the preprocessor is responsible for generating output, false if it is producing token...
bool HandleIdentifier(Token &Identifier)
Callback invoked when the lexer reads an identifier and has filled in the tokens IdentifierInfo membe...
SourceManager & getSourceManager() const
EmptylineHandler * getEmptylineHandler() const
bool getCommentRetentionState() const
bool hadModuleLoaderFatalFailure() const
StringRef getSpelling(SourceLocation loc, SmallVectorImpl< char > &buffer, bool *invalid=nullptr) const
Return the 'spelling' of the token at the given location; does not go up to the spelling location or ...
bool HandleComment(Token &result, SourceRange Comment)
bool isCodeCompletionEnabled() const
Determine if we are performing code completion.
void HandleDirective(Token &Result)
Callback invoked when the lexer sees a # token at the start of a line.
IdentifierTable & getIdentifierTable()
const PreprocessorOptions & getPreprocessorOpts() const
Retrieve the preprocessor options used to initialize this preprocessor.
const LangOptions & getLangOpts() const
void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled)
Hook used by the lexer to invoke the "included file" code completion point.
void CodeCompleteNaturalLanguage()
Hook used by the lexer to invoke the "natural language" code completion point.
bool HandleEndOfFile(Token &Result, bool isEndOfMacro=false)
Callback invoked when the lexer hits the end of the current file.
DiagnosticsEngine & getDiagnostics() const
void setCodeCompletionIdentifierInfo(IdentifierInfo *Filter)
Set the code completion token for filtering purposes.
DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const
Forwarding function for diagnostics.
Encodes a location in the source.
static SourceLocation getFromRawEncoding(UIntTy Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
bool isValid() const
Return true if this is a valid SourceLocation object.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
UIntTy getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it.
This class handles loading and caching of source files into memory.
const char * getCharacterData(SourceLocation SL, bool *Invalid=nullptr) const
Return a pointer to the start of the specified location in the appropriate spelling MemoryBuffer.
A trivial tuple used to represent a source range.
void setBegin(SourceLocation b)
SourceLocation getEnd() const
SourceLocation getBegin() const
void setEnd(SourceLocation e)
Each ExpansionInfo encodes the expansion location - where the token was ultimately expanded,...
SourceLocation getExpansionLocStart() const
SourceLocation getSpellingLoc() const
bool isMacroArgExpansion() const
This is a discriminated union of FileInfo and ExpansionInfo.
const ExpansionInfo & getExpansion() const
static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix)
Determine whether a suffix is a valid ud-suffix.
Token - This structure provides full information about a lexed token.
IdentifierInfo * getIdentifierInfo() const
bool hasUCN() const
Returns true if this token contains a universal character name.
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
unsigned getLength() const
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
tok::TokenKind getKind() const
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
bool isAnnotation() const
Return true if this is any of tok::annot_* kind tokens.
bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const
Return true if we have an ObjC keyword identifier.
bool isSimpleTypeSpecifier(const LangOptions &LangOpts) const
Determine whether the token kind starts a simple-type-specifier.
void startToken()
Reset all flags to cleared.
bool needsCleaning() const
Return true if this token has trigraphs or escaped newlines in it.
StringRef getRawIdentifier() const
getRawIdentifier - For a raw identifier token (i.e., an identifier lexed in raw mode),...
const char * getLiteralData() const
getLiteralData - For a literal token (numeric constant, string, etc), this returns a pointer to the s...
void setFlag(TokenFlags Flag)
Set the specified flag.
static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a)
Copies the values of the most significant bits from each 8-bit element in a 128-bit integer vector of...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpeq_epi8(__m128i __a, __m128i __b)
Compares each of the corresponding 8-bit values of the 128-bit integer vectors for equality.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si128(__m128i_u const *__p)
Moves packed integer values from an unaligned 128-bit memory location to elements in a 128-bit intege...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_load_si128(__m128i const *__p)
Moves packed integer values from an aligned 128-bit memory location to elements in a 128-bit integer ...
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_epi8(char __b)
Initializes all values in a 128-bit vector of [16 x i8] with the specified 8-bit value.
@ tokens_present_before_eof
Indicates that there are tokens present between the last scanned directive and eof.
@ pp_pragma_system_header
@ pp_pragma_include_alias
@ After
Like System, but searched after the system directories.
bool isStringLiteral(TokenKind K)
Return true if this is a C or C++ string-literal (or C++11 user-defined-string-literal) token.
ObjCKeywordKind
Provides a namespace for Objective-C keywords which start with an '@'.
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
The JSON file list parser is used to communicate input to InstallAPI.
LLVM_READNONE bool isASCII(char c)
Returns true if a byte is an ASCII character.
@ Match
This is not an overload because the signature exactly matches an existing declaration.
LLVM_READONLY bool isVerticalWhitespace(unsigned char c)
Returns true if this character is vertical ASCII whitespace: '\n', '\r'.
ConflictMarkerKind
ConflictMarkerKind - Kinds of conflict marker which the lexer might be recovering from.
@ CMK_Perforce
A Perforce-style conflict marker, initiated by 4 ">"s, separated by 4 "="s, and terminated by 4 "<"s.
@ CMK_None
Not within a conflict marker.
@ CMK_Normal
A normal or diff3 conflict marker, initiated by at least 7 "<"s, separated by at least 7 "="s or "|"s...
std::pair< FileID, unsigned > FileIDAndOffset
LLVM_READONLY bool isAsciiIdentifierContinue(unsigned char c)
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: ' ', '\t', '\f', '\v'.
@ Result
The result type of a method or function.
LLVM_READONLY bool isRawStringDelimBody(unsigned char c)
Return true if this is the body character of a C++ raw string delimiter.
LLVM_READONLY bool isWhitespace(unsigned char c)
Return true if this character is horizontal or vertical ASCII whitespace: ' ', '\t',...
LLVM_READONLY bool isPreprocessingNumberBody(unsigned char c)
Return true if this is the body character of a C preprocessing number, which is [a-zA-Z0-9_.
@ Keyword
The name has been typo-corrected to a keyword.
const FunctionProtoType * T
LLVM_READONLY bool isAsciiIdentifierStart(unsigned char c, bool AllowDollar=false)
Returns true if this is a valid first character of a C identifier, which is [a-zA-Z_].
__INTPTR_TYPE__ intptr_t
A signed integer type with the property that any valid pointer to void can be converted to this type,...
float __ovld __cnfn length(float)
Return the length of vector p, i.e., sqrt(p.x2 + p.y 2 + ...)
#define _mm_cmpistri(A, B, M)
Uses the immediate operand M to perform a comparison of string data with implicitly defined lengths t...
#define _SIDD_LEAST_SIGNIFICANT
#define _SIDD_NEGATIVE_POLARITY
Represents a char and the number of bytes parsed to produce it.
Describes the bounds (start, size) of the preamble and a flag required by PreprocessorOptions::Precom...
Token lexed as part of dependency directive scanning.
unsigned Offset
Offset into the original source input.
bool is(tok::TokenKind K) const