13#include "llvm/ADT/StringExtras.h"
14#include "llvm/ADT/StringSwitch.h"
15#include "llvm/Support/ConvertUTF.h"
16#include "llvm/Support/ErrorHandling.h"
22 llvm::errs() <<
"comments::Token Kind=" << Kind <<
" ";
24 llvm::errs() <<
" " << Length <<
" \"" << L.
getSpelling(*
this,
SM) <<
"\"\n";
40 llvm::BumpPtrAllocator &Allocator,
42 char *Resolved = Allocator.Allocate<
char>(UNI_MAX_UTF8_BYTES_PER_CODE_POINT);
43 char *ResolvedPtr = Resolved;
44 if (llvm::ConvertCodePointToUTF8(CodePoint, ResolvedPtr))
45 return StringRef(Resolved, ResolvedPtr - Resolved);
52#include "clang/AST/CommentHTMLTags.inc"
53#include "clang/AST/CommentHTMLNamedCharacterReferences.inc"
57StringRef Lexer::resolveHTMLNamedCharacterReference(StringRef Name)
const {
59 return llvm::StringSwitch<StringRef>(Name)
66 .Default(translateHTMLNamedCharacterReferenceToUTF8(Name));
69StringRef Lexer::resolveHTMLDecimalCharacterReference(StringRef Name)
const {
70 unsigned CodePoint = 0;
71 for (
unsigned i = 0, e = Name.size(); i != e; ++i) {
74 CodePoint += Name[i] -
'0';
79StringRef Lexer::resolveHTMLHexCharacterReference(StringRef Name)
const {
80 unsigned CodePoint = 0;
81 for (
unsigned i = 0, e = Name.size(); i != e; ++i) {
83 const char C = Name[i];
85 CodePoint += llvm::hexDigitValue(
C);
90void Lexer::skipLineStartingDecorations() {
92 assert(CommentState == LCS_InsideCComment);
94 if (BufferPtr == CommentEnd)
97 const char *NewBufferPtr = BufferPtr;
99 if (++NewBufferPtr == CommentEnd)
101 if (*NewBufferPtr ==
'*')
102 BufferPtr = NewBufferPtr + 1;
107const char *findNewline(
const char *BufferPtr,
const char *BufferEnd) {
108 for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
115const char *
skipNewline(
const char *BufferPtr,
const char *BufferEnd) {
116 if (BufferPtr == BufferEnd)
119 if (*BufferPtr ==
'\n')
122 assert(*BufferPtr ==
'\r');
124 if (BufferPtr != BufferEnd && *BufferPtr ==
'\n')
130const char *skipNamedCharacterReference(
const char *BufferPtr,
131 const char *BufferEnd) {
132 for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
139const char *skipDecimalCharacterReference(
const char *BufferPtr,
140 const char *BufferEnd) {
141 for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
148const char *skipHexCharacterReference(
const char *BufferPtr,
149 const char *BufferEnd) {
150 for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
157bool isHTMLIdentifierStartingCharacter(
char C) {
161bool isHTMLIdentifierCharacter(
char C) {
165const char *skipHTMLIdentifier(
const char *BufferPtr,
const char *BufferEnd) {
166 for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
167 if (!isHTMLIdentifierCharacter(*BufferPtr))
177const char *skipHTMLQuotedString(
const char *BufferPtr,
const char *BufferEnd)
179 const char Quote = *BufferPtr;
180 assert(Quote ==
'\"' || Quote ==
'\'');
183 for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
184 const char C = *BufferPtr;
185 if (
C == Quote && BufferPtr[-1] !=
'\\')
191const char *
skipWhitespace(
const char *BufferPtr,
const char *BufferEnd) {
192 for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
199const char *skipHorizontalWhitespace(
const char *BufferPtr,
200 const char *BufferEnd) {
201 for (; BufferPtr != BufferEnd; ++BufferPtr) {
208bool isWhitespace(
const char *BufferPtr,
const char *BufferEnd) {
212bool isCommandNameStartCharacter(
char C) {
216bool isCommandNameCharacter(
char C) {
220const char *skipCommandName(
const char *BufferPtr,
const char *BufferEnd) {
221 for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
222 if (!isCommandNameCharacter(*BufferPtr))
230const char *findBCPLCommentEnd(
const char *BufferPtr,
const char *BufferEnd) {
231 const char *CurPtr = BufferPtr;
232 while (CurPtr != BufferEnd) {
235 if (CurPtr == BufferEnd)
239 const char *EscapePtr = CurPtr - 1;
243 if (*EscapePtr ==
'\\' ||
244 (EscapePtr - 2 >= BufferPtr && EscapePtr[0] ==
'/' &&
245 EscapePtr[-1] ==
'?' && EscapePtr[-2] ==
'?')) {
256const char *findCCommentEnd(
const char *BufferPtr,
const char *BufferEnd) {
257 for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
258 if (*BufferPtr ==
'*') {
259 assert(BufferPtr + 1 != BufferEnd);
260 if (*(BufferPtr + 1) ==
'/')
264 llvm_unreachable(
"buffer end hit before '*/' was seen");
269void Lexer::formTokenWithChars(Token &
Result,
const char *TokEnd,
271 const unsigned TokLen = TokEnd - BufferPtr;
272 Result.setLocation(getSourceLocation(BufferPtr));
276 Result.TextPtr =
"<UNSET>";
282const char *Lexer::skipTextToken() {
283 const char *TokenPtr = BufferPtr;
284 assert(TokenPtr < CommentEnd);
285 StringRef TokStartSymbols = ParseCommands ?
"\n\r\\@\"&<" :
"\n\r";
289 StringRef(TokenPtr, CommentEnd - TokenPtr).find_first_of(TokStartSymbols);
290 if (End == StringRef::npos)
295 if (*(TokenPtr + End) ==
'\"') {
297 End = StringRef(TokenPtr, CommentEnd - TokenPtr).find_first_of(
"\n\r\"");
298 if (End != StringRef::npos && *(TokenPtr + End) ==
'\"')
302 return TokenPtr + End;
305void Lexer::lexCommentText(Token &
T) {
306 assert(CommentState == LCS_InsideBCPLComment ||
307 CommentState == LCS_InsideCComment);
310 auto HandleNonCommandToken = [&]() ->
void {
311 assert(State == LS_Normal);
313 const char *TokenPtr = BufferPtr;
314 assert(TokenPtr < CommentEnd);
321 if (CommentState == LCS_InsideCComment)
322 skipLineStartingDecorations();
326 return formTextToken(
T, skipTextToken());
331 return HandleNonCommandToken();
336 case LS_VerbatimBlockFirstLine:
337 lexVerbatimBlockFirstLine(
T);
339 case LS_VerbatimBlockBody:
340 lexVerbatimBlockBody(
T);
342 case LS_VerbatimLineText:
343 lexVerbatimLineText(
T);
345 case LS_HTMLStartTag:
353 assert(State == LS_Normal);
354 const char *TokenPtr = BufferPtr;
355 assert(TokenPtr < CommentEnd);
365 if (TokenPtr == CommentEnd) {
366 formTextToken(
T, TokenPtr);
374 case '\\':
case '@':
case '&':
case '$':
375 case '#':
case '<':
case '>':
case '%':
376 case '\"':
case '.':
case ':':
379 if (
C ==
':' && TokenPtr != CommentEnd && *TokenPtr ==
':') {
383 StringRef UnescapedText(BufferPtr + 1, TokenPtr - (BufferPtr + 1));
385 T.setText(UnescapedText);
390 if (!isCommandNameStartCharacter(*TokenPtr)) {
391 formTextToken(
T, TokenPtr);
395 TokenPtr = skipCommandName(TokenPtr, CommentEnd);
396 unsigned Length = TokenPtr - (BufferPtr + 1);
400 if (Length == 1 && TokenPtr[-1] ==
'f' && TokenPtr != CommentEnd) {
402 if (
C ==
'$' ||
C ==
'(' ||
C ==
')' ||
C ==
'[' ||
C ==
']' ||
403 C ==
'{' ||
C ==
'}') {
409 StringRef CommandName(BufferPtr + 1, Length);
414 StringRef CorrectedName = Info->
Name;
415 SourceLocation
Loc = getSourceLocation(BufferPtr);
416 SourceLocation EndLoc = getSourceLocation(TokenPtr);
417 SourceRange FullRange = SourceRange(
Loc, EndLoc);
418 SourceRange CommandRange(
Loc.getLocWithOffset(1), EndLoc);
419 Diag(
Loc, diag::warn_correct_comment_command_name)
420 << FullRange << CommandName << CorrectedName
424 T.setUnknownCommandName(CommandName);
425 Diag(
T.getLocation(), diag::warn_unknown_comment_command_name)
426 << SourceRange(
T.getLocation(),
T.getEndLocation());
430 if (Info->IsVerbatimBlockCommand) {
431 setupAndLexVerbatimBlock(
T, TokenPtr, *BufferPtr, Info);
434 if (Info->IsVerbatimLineCommand) {
435 setupAndLexVerbatimLine(
T, TokenPtr, Info);
438 formTokenWithChars(
T, TokenPtr, CommandKind);
439 T.setCommandID(Info->getID());
444 lexHTMLCharacterReference(
T);
449 if (TokenPtr == CommentEnd) {
450 formTextToken(
T, TokenPtr);
453 const char C = *TokenPtr;
454 if (isHTMLIdentifierStartingCharacter(
C))
455 setupAndLexHTMLStartTag(
T);
457 setupAndLexHTMLEndTag(
T);
459 formTextToken(
T, TokenPtr);
464 return HandleNonCommandToken();
468void Lexer::setupAndLexVerbatimBlock(Token &
T,
469 const char *TextBegin,
470 char Marker,
const CommandInfo *Info) {
471 assert(Info->IsVerbatimBlockCommand);
473 VerbatimBlockEndCommandName.clear();
474 VerbatimBlockEndCommandName.append(Marker ==
'\\' ?
"\\" :
"@");
475 VerbatimBlockEndCommandName.append(Info->EndCommandName);
478 T.setVerbatimBlockID(Info->getID());
483 if (BufferPtr != CommentEnd &&
486 State = LS_VerbatimBlockBody;
490 State = LS_VerbatimBlockFirstLine;
493void Lexer::lexVerbatimBlockFirstLine(Token &
T) {
495 assert(BufferPtr < CommentEnd);
501 const char *Newline = findNewline(BufferPtr, CommentEnd);
502 StringRef
Line(BufferPtr, Newline - BufferPtr);
505 size_t Pos =
Line.find(VerbatimBlockEndCommandName);
507 const char *NextLine;
508 if (Pos == StringRef::npos) {
512 }
else if (Pos == 0) {
514 const char *End = BufferPtr + VerbatimBlockEndCommandName.size();
515 StringRef Name(BufferPtr + 1, End - (BufferPtr + 1));
522 TextEnd = BufferPtr + Pos;
531 StringRef
Text(BufferPtr, TextEnd - BufferPtr);
533 T.setVerbatimBlockText(
Text);
535 State = LS_VerbatimBlockBody;
538void Lexer::lexVerbatimBlockBody(Token &
T) {
539 assert(State == LS_VerbatimBlockBody);
541 if (CommentState == LCS_InsideCComment)
542 skipLineStartingDecorations();
544 if (BufferPtr == CommentEnd) {
546 T.setVerbatimBlockText(
"");
550 lexVerbatimBlockFirstLine(
T);
553void Lexer::setupAndLexVerbatimLine(Token &
T,
const char *TextBegin,
554 const CommandInfo *Info) {
555 assert(Info->IsVerbatimLineCommand);
557 T.setVerbatimLineID(Info->getID());
559 State = LS_VerbatimLineText;
562void Lexer::lexVerbatimLineText(Token &
T) {
563 assert(State == LS_VerbatimLineText);
566 const char *Newline = findNewline(BufferPtr, CommentEnd);
567 StringRef
Text(BufferPtr, Newline - BufferPtr);
569 T.setVerbatimLineText(
Text);
574void Lexer::lexHTMLCharacterReference(Token &
T) {
575 const char *TokenPtr = BufferPtr;
576 assert(*TokenPtr ==
'&');
578 if (TokenPtr == CommentEnd) {
579 formTextToken(
T, TokenPtr);
584 bool isDecimal =
false;
588 TokenPtr = skipNamedCharacterReference(TokenPtr, CommentEnd);
590 }
else if (
C ==
'#') {
592 if (TokenPtr == CommentEnd) {
593 formTextToken(
T, TokenPtr);
599 TokenPtr = skipDecimalCharacterReference(TokenPtr, CommentEnd);
601 }
else if (
C ==
'x' ||
C ==
'X') {
604 TokenPtr = skipHexCharacterReference(TokenPtr, CommentEnd);
606 formTextToken(
T, TokenPtr);
610 formTextToken(
T, TokenPtr);
613 if (NamePtr == TokenPtr || TokenPtr == CommentEnd ||
615 formTextToken(
T, TokenPtr);
618 StringRef Name(NamePtr, TokenPtr - NamePtr);
622 Resolved = resolveHTMLNamedCharacterReference(Name);
624 Resolved = resolveHTMLDecimalCharacterReference(Name);
626 Resolved = resolveHTMLHexCharacterReference(Name);
628 if (Resolved.empty()) {
629 formTextToken(
T, TokenPtr);
636void Lexer::setupAndLexHTMLStartTag(Token &
T) {
637 assert(BufferPtr[0] ==
'<' &&
638 isHTMLIdentifierStartingCharacter(BufferPtr[1]));
639 const char *TagNameEnd = skipHTMLIdentifier(BufferPtr + 2, CommentEnd);
640 StringRef Name(BufferPtr + 1, TagNameEnd - (BufferPtr + 1));
641 if (!isHTMLTagName(Name)) {
642 formTextToken(
T, TagNameEnd);
647 T.setHTMLTagStartName(Name);
649 BufferPtr = skipHorizontalWhitespace(BufferPtr, CommentEnd);
650 if (BufferPtr == CommentEnd) {
651 State = LS_HTMLStartTag;
655 const char C = *BufferPtr;
656 if (BufferPtr != CommentEnd &&
658 isHTMLIdentifierStartingCharacter(
C)))
659 State = LS_HTMLStartTag;
662void Lexer::lexHTMLStartTag(Token &
T) {
663 assert(State == LS_HTMLStartTag);
669 if (CommentState == LCS_InsideCComment)
670 skipLineStartingDecorations();
672 BufferPtr = skipHorizontalWhitespace(BufferPtr, CommentEnd);
673 if (BufferPtr == CommentEnd) {
684 const char *TokenPtr = BufferPtr;
686 if (isHTMLIdentifierCharacter(
C)) {
687 TokenPtr = skipHTMLIdentifier(TokenPtr, CommentEnd);
688 StringRef Ident(BufferPtr, TokenPtr - BufferPtr);
690 T.setHTMLIdent(Ident);
699 const char *OpenQuote = TokenPtr;
700 TokenPtr = skipHTMLQuotedString(TokenPtr, CommentEnd);
701 const char *ClosingQuote = TokenPtr;
702 if (TokenPtr != CommentEnd)
705 T.setHTMLQuotedString(StringRef(OpenQuote + 1,
706 ClosingQuote - (OpenQuote + 1)));
716 if (TokenPtr != CommentEnd && *TokenPtr ==
'>') {
720 formTextToken(
T, TokenPtr);
729 BufferPtr = skipHorizontalWhitespace(BufferPtr, CommentEnd);
730 if (BufferPtr == CommentEnd) {
736 C !=
'=' &&
C !=
'\"' &&
C !=
'\'' &&
C !=
'>' &&
C !=
'/') {
742void Lexer::setupAndLexHTMLEndTag(Token &
T) {
743 assert(BufferPtr[0] ==
'<' && BufferPtr[1] ==
'/');
745 const char *TagNameBegin =
skipWhitespace(BufferPtr + 2, CommentEnd);
746 const char *TagNameEnd = skipHTMLIdentifier(TagNameBegin, CommentEnd);
747 StringRef Name(TagNameBegin, TagNameEnd - TagNameBegin);
748 if (!isHTMLTagName(Name)) {
749 formTextToken(
T, TagNameEnd);
756 T.setHTMLTagEndName(Name);
758 if (BufferPtr != CommentEnd && *BufferPtr ==
'>')
759 State = LS_HTMLEndTag;
762void Lexer::lexHTMLEndTag(Token &
T) {
763 assert(BufferPtr != CommentEnd && *BufferPtr ==
'>');
771 const char *BufferStart,
const char *BufferEnd,
bool ParseCommands)
772 : Allocator(Allocator), Diags(Diags), Traits(Traits),
773 BufferStart(BufferStart), BufferEnd(BufferEnd), BufferPtr(BufferStart),
774 FileLoc(FileLoc), ParseCommands(ParseCommands),
775 CommentState(LCS_BeforeComment), State(LS_Normal) {}
779 switch (CommentState) {
780 case LCS_BeforeComment:
781 if (BufferPtr == BufferEnd) {
782 formTokenWithChars(
T, BufferPtr,
tok::eof);
786 assert(*BufferPtr ==
'/');
792 if (BufferPtr != BufferEnd) {
797 const char C = *BufferPtr;
798 if (
C ==
'/' ||
C ==
'!')
805 if (BufferPtr != BufferEnd && *BufferPtr ==
'<')
808 CommentState = LCS_InsideBCPLComment;
810 case LS_VerbatimBlockFirstLine:
811 case LS_VerbatimBlockBody:
813 case LS_HTMLStartTag:
814 BufferPtr = skipHorizontalWhitespace(BufferPtr, BufferEnd);
820 CommentEnd = findBCPLCommentEnd(BufferPtr, BufferEnd);
827 const char C = *BufferPtr;
828 if ((
C ==
'*' && *(BufferPtr + 1) !=
'/') ||
C ==
'!')
832 if (BufferPtr != BufferEnd && *BufferPtr ==
'<')
835 CommentState = LCS_InsideCComment;
837 CommentEnd = findCCommentEnd(BufferPtr, BufferEnd);
841 llvm_unreachable(
"second character of comment should be '/' or '*'");
844 case LCS_BetweenComments: {
847 const char *EndWhitespace = BufferPtr;
848 while(EndWhitespace != BufferEnd && *EndWhitespace !=
'/')
853 if (State == LS_HTMLStartTag && EndWhitespace != BufferEnd) {
854 CommentState = LCS_BeforeComment;
855 BufferPtr = EndWhitespace;
865 CommentState = LCS_BeforeComment;
869 case LCS_InsideBCPLComment:
870 case LCS_InsideCComment:
871 if (BufferPtr != CommentEnd) {
876 if (CommentState == LCS_InsideCComment) {
877 assert(BufferPtr[0] ==
'*' && BufferPtr[1] ==
'/');
879 assert(BufferPtr <= BufferEnd);
884 if (State == LS_HTMLStartTag && BufferPtr != BufferEnd) {
885 CommentState = LCS_BetweenComments;
893 CommentState = LCS_BetweenComments;
897 CommentState = LCS_BetweenComments;
909 bool InvalidTemp =
false;
914 const char *
Begin =
File.data() + LocInfo.second;
enum clang::sema::@1840::IndirectLocalPathEntry::EntryKind Kind
static bool isNamed(const NamedDecl *ND, const char(&Str)[Len])
static unsigned skipNewline(const char *&First, const char *End)
static unsigned skipWhitespace(unsigned Idx, StringRef Str, unsigned Length)
Skip over whitespace in the string, starting at the given index.
Concrete class used by the front-end to report problems and issues.
static FixItHint CreateReplacement(CharSourceRange RemoveRange, StringRef Code)
Create a code modification hint that replaces the given source range with the given code string.
Encodes a location in the source.
void print(raw_ostream &OS, const SourceManager &SM) const
This class handles loading and caching of source files into memory.
FileIDAndOffset getDecomposedLoc(SourceLocation Loc) const
Decompose the specified location into a raw FileID + Offset pair.
StringRef getBufferData(FileID FID, bool *Invalid=nullptr) const
Return a StringRef to the source buffer data for the specified FileID.
The JSON file list parser is used to communicate input to InstallAPI.
LLVM_READONLY bool isVerticalWhitespace(unsigned char c)
Returns true if this character is vertical ASCII whitespace: '\n', '\r'.
std::pair< FileID, unsigned > FileIDAndOffset
LLVM_READONLY bool isAsciiIdentifierContinue(unsigned char c)
LLVM_READONLY bool isLetter(unsigned char c)
Return true if this character is an ASCII letter: [a-zA-Z].
LLVM_READONLY bool isAlphanumeric(unsigned char c)
Return true if this character is an ASCII letter or digit: [a-zA-Z0-9].
LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: ' ', '\t', '\f', '\v'.
@ Result
The result type of a method or function.
LLVM_READONLY bool isDigit(unsigned char c)
Return true if this character is an ASCII digit: [0-9].
LLVM_READONLY bool isWhitespace(unsigned char c)
Return true if this character is horizontal or vertical ASCII whitespace: ' ', '\t',...
LLVM_READONLY bool isHexDigit(unsigned char c)
Return true if this character is an ASCII hex digit: [0-9a-fA-F].
const FunctionProtoType * T