clang 22.0.0git
Token.h
Go to the documentation of this file.
1//===--- Token.h - Token interface ------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the Token interface.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_CLANG_LEX_TOKEN_H
14#define LLVM_CLANG_LEX_TOKEN_H
15
18#include "llvm/ADT/ArrayRef.h"
19#include "llvm/ADT/StringRef.h"
20#include <cassert>
21
22namespace clang {
23
24class IdentifierInfo;
25class LangOptions;
26
27/// Token - This structure provides full information about a lexed token.
28/// It is not intended to be space efficient, it is intended to return as much
29/// information as possible about each returned token. This is expected to be
30/// compressed into a smaller form if memory footprint is important.
31///
32/// The parser can create a special "annotation token" representing a stream of
33/// tokens that were parsed and semantically resolved, e.g.: "foo::MyClass<int>"
34/// can be represented by a single typename annotation token that carries
35/// information about the SourceRange of the tokens and the type object.
36class Token {
37 /// The location of the token. This is actually a SourceLocation.
39
40 // Conceptually these next two fields could be in a union. However, this
41 // causes gcc 4.2 to pessimize LexTokenInternal, a very performance critical
42 // routine. Keeping as separate members with casts until a more beautiful fix
43 // presents itself.
44
45 /// UintData - This holds either the length of the token text, when
46 /// a normal token, or the end of the SourceRange when an annotation
47 /// token.
49
50 /// PtrData - This is a union of four different pointer types, which depends
51 /// on what type of token this is:
52 /// Identifiers, keywords, etc:
53 /// This is an IdentifierInfo*, which contains the uniqued identifier
54 /// spelling.
55 /// Literals: isLiteral() returns true.
56 /// This is a pointer to the start of the token in a text buffer, which
57 /// may be dirty (have trigraphs / escaped newlines).
58 /// Annotations (resolved type names, C++ scopes, etc): isAnnotation().
59 /// This is a pointer to sema-specific data for the annotation token.
60 /// Eof:
61 /// This is a pointer to a Decl.
62 /// Other:
63 /// This is null.
64 void *PtrData;
65
66 /// Kind - The actual flavor of token this is.
67 tok::TokenKind Kind;
68
69 /// Flags - Bits we track about this token, members of the TokenFlags enum.
70 unsigned short Flags;
71
72public:
73 // Various flags set per token:
75 StartOfLine = 0x01, // At start of line or only after whitespace
76 // (considering the line after macro expansion).
77 LeadingSpace = 0x02, // Whitespace exists before this token (considering
78 // whitespace after macro expansion).
79 DisableExpand = 0x04, // This identifier may never be macro expanded.
80 NeedsCleaning = 0x08, // Contained an escaped newline or trigraph.
81 LeadingEmptyMacro = 0x10, // Empty macro exists before this token.
82 HasUDSuffix = 0x20, // This string or character literal has a ud-suffix.
83 HasUCN = 0x40, // This identifier contains a UCN.
84 IgnoredComma = 0x80, // This comma is not a macro argument separator (MS).
85 StringifiedInMacro = 0x100, // This string or character literal is formed by
86 // macro stringizing or charizing operator.
87 CommaAfterElided = 0x200, // The comma following this token was elided (MS).
88 IsEditorPlaceholder = 0x400, // This identifier is a placeholder.
89 IsReinjected = 0x800, // A phase 4 token that was produced before and
90 // re-added, e.g. via EnterTokenStream. Annotation
91 // tokens are *not* reinjected.
93 0x1000, // Whether we've seen any 'no-trivial' pp-directives before
94 // current position.
95 };
96
97 tok::TokenKind getKind() const { return Kind; }
98 void setKind(tok::TokenKind K) { Kind = K; }
99
100 /// is/isNot - Predicates to check if this token is a specific kind, as in
101 /// "if (Tok.is(tok::l_brace)) {...}".
102 bool is(tok::TokenKind K) const { return Kind == K; }
103 bool isNot(tok::TokenKind K) const { return Kind != K; }
104 template <typename... Ts> bool isOneOf(Ts... Ks) const {
105 static_assert(sizeof...(Ts) > 0,
106 "requires at least one tok::TokenKind specified");
107 return (is(Ks) || ...);
108 }
109
110 /// Return true if this is a raw identifier (when lexing
111 /// in raw mode) or a non-keyword identifier (when lexing in non-raw mode).
112 bool isAnyIdentifier() const {
114 }
115
116 /// Return true if this is a "literal", like a numeric
117 /// constant, string, etc.
118 bool isLiteral() const {
119 return tok::isLiteral(getKind());
120 }
121
122 /// Return true if this is any of tok::annot_* kind tokens.
123 bool isAnnotation() const { return tok::isAnnotation(getKind()); }
124
125 /// Return true if the token is a keyword that is parsed in the same
126 /// position as a standard attribute, but that has semantic meaning
127 /// and so cannot be a true attribute.
130 }
131
132 /// Return a source location identifier for the specified
133 /// offset in the current file.
136 }
137 unsigned getLength() const {
138 assert(!isAnnotation() && "Annotation tokens have no length field");
139 return UintData;
140 }
141
143 void setLength(unsigned Len) {
144 assert(!isAnnotation() && "Annotation tokens have no length field");
145 UintData = Len;
146 }
147
149 assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token");
150 return SourceLocation::getFromRawEncoding(UintData ? UintData : Loc);
151 }
153 assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token");
154 UintData = L.getRawEncoding();
155 }
156
159 }
160
164 }
165
166 /// SourceRange of the group of tokens that this annotation token
167 /// represents.
170 }
174 }
175
176 const char *getName() const { return tok::getTokenName(Kind); }
177
178 /// Reset all flags to cleared.
179 void startToken() {
180 Kind = tok::unknown;
181 Flags = 0;
182 PtrData = nullptr;
183 UintData = 0;
185 }
186
187 bool hasPtrData() const { return PtrData != nullptr; }
188
190 assert(isNot(tok::raw_identifier) &&
191 "getIdentifierInfo() on a tok::raw_identifier token!");
192 assert(!isAnnotation() &&
193 "getIdentifierInfo() on an annotation token!");
194 if (isLiteral()) return nullptr;
195 if (is(tok::eof)) return nullptr;
196 return (IdentifierInfo*) PtrData;
197 }
199 PtrData = (void*) II;
200 }
201
202 const void *getEofData() const {
203 assert(is(tok::eof));
204 return reinterpret_cast<const void *>(PtrData);
205 }
206 void setEofData(const void *D) {
207 assert(is(tok::eof));
208 assert(!PtrData);
209 PtrData = const_cast<void *>(D);
210 }
211
212 /// getRawIdentifier - For a raw identifier token (i.e., an identifier
213 /// lexed in raw mode), returns a reference to the text substring in the
214 /// buffer if known.
215 StringRef getRawIdentifier() const {
216 assert(is(tok::raw_identifier));
217 return StringRef(reinterpret_cast<const char *>(PtrData), getLength());
218 }
219 void setRawIdentifierData(const char *Ptr) {
220 assert(is(tok::raw_identifier));
221 PtrData = const_cast<char*>(Ptr);
222 }
223
224 /// getLiteralData - For a literal token (numeric constant, string, etc), this
225 /// returns a pointer to the start of it in the text buffer if known, null
226 /// otherwise.
227 const char *getLiteralData() const {
228 assert(isLiteral() && "Cannot get literal data of non-literal");
229 return reinterpret_cast<const char*>(PtrData);
230 }
231 void setLiteralData(const char *Ptr) {
232 assert(isLiteral() && "Cannot set literal data of non-literal");
233 PtrData = const_cast<char*>(Ptr);
234 }
235
236 void *getAnnotationValue() const {
237 assert(isAnnotation() && "Used AnnotVal on non-annotation token");
238 return PtrData;
239 }
240 void setAnnotationValue(void *val) {
241 assert(isAnnotation() && "Used AnnotVal on non-annotation token");
242 PtrData = val;
243 }
244
245 /// Set the specified flag.
246 void setFlag(TokenFlags Flag) {
247 Flags |= Flag;
248 }
249
250 /// Get the specified flag.
251 bool getFlag(TokenFlags Flag) const {
252 return (Flags & Flag) != 0;
253 }
254
255 /// Unset the specified flag.
257 Flags &= ~Flag;
258 }
259
260 /// Return the internal represtation of the flags.
261 ///
262 /// This is only intended for low-level operations such as writing tokens to
263 /// disk.
264 unsigned getFlags() const {
265 return Flags;
266 }
267
268 /// Set a flag to either true or false.
269 void setFlagValue(TokenFlags Flag, bool Val) {
270 if (Val)
271 setFlag(Flag);
272 else
273 clearFlag(Flag);
274 }
275
276 /// isAtStartOfLine - Return true if this token is at the start of a line.
277 ///
278 bool isAtStartOfLine() const { return getFlag(StartOfLine); }
279
280 /// Return true if this token has whitespace before it.
281 ///
282 bool hasLeadingSpace() const { return getFlag(LeadingSpace); }
283
284 /// Return true if this identifier token should never
285 /// be expanded in the future, due to C99 6.10.3.4p2.
286 bool isExpandDisabled() const { return getFlag(DisableExpand); }
287
288 /// Return true if we have an ObjC keyword identifier.
289 bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const;
290
291 /// Return the ObjC keyword kind.
293
294 bool isSimpleTypeSpecifier(const LangOptions &LangOpts) const;
295
296 /// Return true if this token has trigraphs or escaped newlines in it.
297 bool needsCleaning() const { return getFlag(NeedsCleaning); }
298
299 /// Return true if this token has an empty macro before it.
300 ///
302
303 /// Return true if this token is a string or character literal which
304 /// has a ud-suffix.
305 bool hasUDSuffix() const { return getFlag(HasUDSuffix); }
306
307 /// Returns true if this token contains a universal character name.
308 bool hasUCN() const { return getFlag(HasUCN); }
309
310 /// Returns true if this token is formed by macro by stringizing or charizing
311 /// operator.
313
314 /// Returns true if the comma after this token was elided.
315 bool commaAfterElided() const { return getFlag(CommaAfterElided); }
316
317 /// Returns true if this token is an editor placeholder.
318 ///
319 /// Editor placeholders are produced by the code-completion engine and are
320 /// represented as characters between '<#' and '#>' in the source code. The
321 /// lexer uses identifier tokens to represent placeholders.
323
326 }
327};
328
329/// Information about the conditional stack (\#if directives)
330/// currently active.
332 /// Location where the conditional started.
334
335 /// True if this was contained in a skipping directive, e.g.,
336 /// in a "\#if 0" block.
338
339 /// True if we have emitted tokens already, and now we're in
340 /// an \#else block or something. Only useful in Skipping blocks.
342
343 /// True if we've seen a \#else in this block. If so,
344 /// \#elif/\#else directives are not allowed.
346};
347
348// Extra information needed for annonation tokens.
353};
354} // end namespace clang
355
356#endif // LLVM_CLANG_LEX_TOKEN_H
const Decl * D
SourceLocation Loc
Definition: SemaObjC.cpp:754
Defines the clang::SourceLocation class and associated facilities.
Defines the clang::TokenKind enum and support functions.
One of these records is kept for each identifier that is lexed.
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:434
Encodes a location in the source.
static SourceLocation getFromRawEncoding(UIntTy Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
UIntTy getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it.
A trivial tuple used to represent a source range.
SourceLocation getEnd() const
SourceLocation getBegin() const
Token - This structure provides full information about a lexed token.
Definition: Token.h:36
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:189
void setLiteralData(const char *Ptr)
Definition: Token.h:231
bool isAnyIdentifier() const
Return true if this is a raw identifier (when lexing in raw mode) or a non-keyword identifier (when l...
Definition: Token.h:112
SourceLocation getEndLoc() const
Definition: Token.h:161
unsigned getFlags() const
Return the internal represtation of the flags.
Definition: Token.h:264
void setAnnotationEndLoc(SourceLocation L)
Definition: Token.h:152
bool hasUCN() const
Returns true if this token contains a universal character name.
Definition: Token.h:308
void clearFlag(TokenFlags Flag)
Unset the specified flag.
Definition: Token.h:256
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Definition: Token.h:118
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
Definition: Token.h:134
const char * getName() const
Definition: Token.h:176
unsigned getLength() const
Definition: Token.h:137
void setLength(unsigned Len)
Definition: Token.h:143
bool isEditorPlaceholder() const
Returns true if this token is an editor placeholder.
Definition: Token.h:322
bool isExpandDisabled() const
Return true if this identifier token should never be expanded in the future, due to C99 6....
Definition: Token.h:286
void setKind(tok::TokenKind K)
Definition: Token.h:98
bool commaAfterElided() const
Returns true if the comma after this token was elided.
Definition: Token.h:315
SourceLocation getAnnotationEndLoc() const
Definition: Token.h:148
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
Definition: Lexer.cpp:69
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
Definition: Token.h:102
void * getAnnotationValue() const
Definition: Token.h:236
tok::TokenKind getKind() const
Definition: Token.h:97
bool isRegularKeywordAttribute() const
Return true if the token is a keyword that is parsed in the same position as a standard attribute,...
Definition: Token.h:128
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
Definition: Token.h:278
bool isOneOf(Ts... Ks) const
Definition: Token.h:104
void setEofData(const void *D)
Definition: Token.h:206
bool getFlag(TokenFlags Flag) const
Get the specified flag.
Definition: Token.h:251
@ DisableExpand
Definition: Token.h:79
@ HasUCN
Definition: Token.h:83
@ IsEditorPlaceholder
Definition: Token.h:88
@ IgnoredComma
Definition: Token.h:84
@ HasSeenNoTrivialPPDirective
Definition: Token.h:92
@ IsReinjected
Definition: Token.h:89
@ LeadingEmptyMacro
Definition: Token.h:81
@ LeadingSpace
Definition: Token.h:77
@ StartOfLine
Definition: Token.h:75
@ StringifiedInMacro
Definition: Token.h:85
@ HasUDSuffix
Definition: Token.h:82
@ CommaAfterElided
Definition: Token.h:87
@ NeedsCleaning
Definition: Token.h:80
bool hasLeadingSpace() const
Return true if this token has whitespace before it.
Definition: Token.h:282
SourceRange getAnnotationRange() const
SourceRange of the group of tokens that this annotation token represents.
Definition: Token.h:168
void setLocation(SourceLocation L)
Definition: Token.h:142
bool hasLeadingEmptyMacro() const
Return true if this token has an empty macro before it.
Definition: Token.h:301
void setRawIdentifierData(const char *Ptr)
Definition: Token.h:219
bool isNot(tok::TokenKind K) const
Definition: Token.h:103
bool hasPtrData() const
Definition: Token.h:187
bool isAnnotation() const
Return true if this is any of tok::annot_* kind tokens.
Definition: Token.h:123
void setAnnotationValue(void *val)
Definition: Token.h:240
const void * getEofData() const
Definition: Token.h:202
bool hasUDSuffix() const
Return true if this token is a string or character literal which has a ud-suffix.
Definition: Token.h:305
bool stringifiedInMacro() const
Returns true if this token is formed by macro by stringizing or charizing operator.
Definition: Token.h:312
bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const
Return true if we have an ObjC keyword identifier.
Definition: Lexer.cpp:60
void setAnnotationRange(SourceRange R)
Definition: Token.h:171
bool isSimpleTypeSpecifier(const LangOptions &LangOpts) const
Determine whether the token kind starts a simple-type-specifier.
Definition: Lexer.cpp:77
void startToken()
Reset all flags to cleared.
Definition: Token.h:179
bool needsCleaning() const
Return true if this token has trigraphs or escaped newlines in it.
Definition: Token.h:297
bool hasSeenNoTrivialPPDirective() const
Definition: Token.h:324
void setIdentifierInfo(IdentifierInfo *II)
Definition: Token.h:198
SourceLocation getLastLoc() const
Definition: Token.h:157
void setFlagValue(TokenFlags Flag, bool Val)
Set a flag to either true or false.
Definition: Token.h:269
StringRef getRawIdentifier() const
getRawIdentifier - For a raw identifier token (i.e., an identifier lexed in raw mode),...
Definition: Token.h:215
const char * getLiteralData() const
getLiteralData - For a literal token (numeric constant, string, etc), this returns a pointer to the s...
Definition: Token.h:227
void setFlag(TokenFlags Flag)
Set the specified flag.
Definition: Token.h:246
const char * getTokenName(TokenKind Kind) LLVM_READNONE
Determines the name of a token as used within the front end.
Definition: TokenKinds.cpp:24
bool isAnyIdentifier(TokenKind K)
Return true if this is a raw identifier or an identifier kind.
Definition: TokenKinds.h:83
ObjCKeywordKind
Provides a namespace for Objective-C keywords which start with an '@'.
Definition: TokenKinds.h:41
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:25
constexpr bool isRegularKeywordAttribute(TokenKind K)
Definition: TokenKinds.h:120
bool isLiteral(TokenKind K)
Return true if this is a "literal" kind, like a numeric constant, string, etc.
Definition: TokenKinds.h:97
bool isAnnotation(TokenKind K)
Return true if this is any of tok::annot_* kinds.
Definition: TokenKinds.cpp:58
The JSON file list parser is used to communicate input to InstallAPI.
Information about the conditional stack (#if directives) currently active.
Definition: Token.h:331
bool FoundNonSkip
True if we have emitted tokens already, and now we're in an #else block or something.
Definition: Token.h:341
SourceLocation IfLoc
Location where the conditional started.
Definition: Token.h:333
bool WasSkipping
True if this was contained in a skipping directive, e.g., in a "\#if 0" block.
Definition: Token.h:337
bool FoundElse
True if we've seen a #else in this block.
Definition: Token.h:345
ArrayRef< Token > Toks
Definition: Token.h:352