clang 22.0.0git
IdentifierTable.cpp
Go to the documentation of this file.
1//===- IdentifierTable.cpp - Hash table for identifier lookup -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the IdentifierInfo, IdentifierVisitor, and
10// IdentifierTable interfaces.
11//
12//===----------------------------------------------------------------------===//
13
22#include "llvm/ADT/DenseMapInfo.h"
23#include "llvm/ADT/FoldingSet.h"
24#include "llvm/ADT/StringMap.h"
25#include "llvm/ADT/StringRef.h"
26#include "llvm/Support/Allocator.h"
27#include "llvm/Support/raw_ostream.h"
28#include <cassert>
29#include <cstdio>
30#include <cstring>
31#include <string>
32
33using namespace clang;
34
35// A check to make sure the ObjCOrBuiltinID has sufficient room to store the
36// largest possible target/aux-target combination. If we exceed this, we likely
37// need to just change the ObjCOrBuiltinIDBits value in IdentifierTable.h.
38static_assert(2 * LargestBuiltinID < (2 << (InterestingIdentifierBits - 1)),
39 "Insufficient ObjCOrBuiltinID Bits");
40
41//===----------------------------------------------------------------------===//
42// IdentifierTable Implementation
43//===----------------------------------------------------------------------===//
44
46
48
49namespace {
50
51/// A simple identifier lookup iterator that represents an
52/// empty sequence of identifiers.
53class EmptyLookupIterator : public IdentifierIterator {
54public:
55 StringRef Next() override { return StringRef(); }
56};
57
58} // namespace
59
61 return new EmptyLookupIterator();
62}
63
65 : HashTable(8192), // Start with space for 8K identifiers.
66 ExternalLookup(ExternalLookup) {}
67
69 IdentifierInfoLookup *ExternalLookup)
70 : IdentifierTable(ExternalLookup) {
71 // Populate the identifier table with info about keywords for the current
72 // language.
73 AddKeywords(LangOpts);
74}
75
76//===----------------------------------------------------------------------===//
77// Language Keyword Implementation
78//===----------------------------------------------------------------------===//
79
80// Constants for TokenKinds.def
81namespace {
82
83enum TokenKey : unsigned {
84 KEYC99 = 0x1,
85 KEYCXX = 0x2,
86 KEYCXX11 = 0x4,
87 KEYGNU = 0x8,
88 KEYMS = 0x10,
89 BOOLSUPPORT = 0x20,
90 KEYALTIVEC = 0x40,
91 KEYNOCXX = 0x80,
92 KEYBORLAND = 0x100,
93 KEYOPENCLC = 0x200,
94 KEYC23 = 0x400,
95 KEYNOMS18 = 0x800,
96 KEYNOOPENCL = 0x1000,
97 WCHARSUPPORT = 0x2000,
98 HALFSUPPORT = 0x4000,
99 CHAR8SUPPORT = 0x8000,
100 KEYOBJC = 0x10000,
101 KEYZVECTOR = 0x20000,
102 KEYCOROUTINES = 0x40000,
103 KEYMODULES = 0x80000,
104 KEYCXX20 = 0x100000,
105 KEYOPENCLCXX = 0x200000,
106 KEYMSCOMPAT = 0x400000,
107 KEYSYCL = 0x800000,
108 KEYCUDA = 0x1000000,
109 KEYZOS = 0x2000000,
110 KEYNOZOS = 0x4000000,
111 KEYHLSL = 0x8000000,
112 KEYFIXEDPOINT = 0x10000000,
113 KEYMAX = KEYFIXEDPOINT, // The maximum key
114 KEYALLCXX = KEYCXX | KEYCXX11 | KEYCXX20,
115 KEYALL = (KEYMAX | (KEYMAX - 1)) & ~KEYNOMS18 & ~KEYNOOPENCL &
116 ~KEYNOZOS // KEYNOMS18, KEYNOOPENCL, KEYNOZOS are excluded.
117};
118
119/// How a keyword is treated in the selected standard. This enum is ordered
120/// intentionally so that the value that 'wins' is the most 'permissive'.
121enum KeywordStatus {
122 KS_Unknown, // Not yet calculated. Used when figuring out the status.
123 KS_Disabled, // Disabled
124 KS_Future, // Is a keyword in future standard
125 KS_Extension, // Is an extension
126 KS_Enabled, // Enabled
127};
128
129} // namespace
130
131// This works on a single TokenKey flag and checks the LangOpts to get the
132// KeywordStatus based exclusively on this flag, so that it can be merged in
133// getKeywordStatus. Most should be enabled/disabled, but some might imply
134// 'future' versions, or extensions. Returns 'unknown' unless this is KNOWN to
135// be disabled, and the calling function makes it 'disabled' if no other flag
136// changes it. This is necessary for the KEYNOCXX and KEYNOOPENCL flags.
137static KeywordStatus getKeywordStatusHelper(const LangOptions &LangOpts,
138 TokenKey Flag) {
139 // Flag is a single bit version of TokenKey (that is, not
140 // KEYALL/KEYALLCXX/etc), so we can check with == throughout this function.
141 assert((Flag & ~(Flag - 1)) == Flag && "Multiple bits set?");
142
143 switch (Flag) {
144 case KEYC99:
145 if (LangOpts.C99)
146 return KS_Enabled;
147 return !LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
148 case KEYC23:
149 if (LangOpts.C23)
150 return KS_Enabled;
151 return !LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
152 case KEYCXX:
153 return LangOpts.CPlusPlus ? KS_Enabled : KS_Unknown;
154 case KEYCXX11:
155 if (LangOpts.CPlusPlus11)
156 return KS_Enabled;
157 return LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
158 case KEYCXX20:
159 if (LangOpts.CPlusPlus20)
160 return KS_Enabled;
161 return LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
162 case KEYGNU:
163 return LangOpts.GNUKeywords ? KS_Extension : KS_Unknown;
164 case KEYMS:
165 return LangOpts.MicrosoftExt ? KS_Extension : KS_Unknown;
166 case BOOLSUPPORT:
167 if (LangOpts.Bool) return KS_Enabled;
168 return !LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
169 case KEYALTIVEC:
170 return LangOpts.AltiVec ? KS_Enabled : KS_Unknown;
171 case KEYBORLAND:
172 return LangOpts.Borland ? KS_Extension : KS_Unknown;
173 case KEYOPENCLC:
174 return LangOpts.OpenCL && !LangOpts.OpenCLCPlusPlus ? KS_Enabled
175 : KS_Unknown;
176 case WCHARSUPPORT:
177 return LangOpts.WChar ? KS_Enabled : KS_Unknown;
178 case HALFSUPPORT:
179 return LangOpts.Half ? KS_Enabled : KS_Unknown;
180 case CHAR8SUPPORT:
181 if (LangOpts.Char8) return KS_Enabled;
182 if (LangOpts.CPlusPlus20) return KS_Unknown;
183 if (LangOpts.CPlusPlus) return KS_Future;
184 return KS_Unknown;
185 case KEYOBJC:
186 // We treat bridge casts as objective-C keywords so we can warn on them
187 // in non-arc mode.
188 return LangOpts.ObjC ? KS_Enabled : KS_Unknown;
189 case KEYZVECTOR:
190 return LangOpts.ZVector ? KS_Enabled : KS_Unknown;
191 case KEYCOROUTINES:
192 return LangOpts.Coroutines ? KS_Enabled : KS_Unknown;
193 case KEYMODULES:
194 return KS_Unknown;
195 case KEYOPENCLCXX:
196 return LangOpts.OpenCLCPlusPlus ? KS_Enabled : KS_Unknown;
197 case KEYMSCOMPAT:
198 return LangOpts.MSVCCompat ? KS_Enabled : KS_Unknown;
199 case KEYSYCL:
200 return LangOpts.isSYCL() ? KS_Enabled : KS_Unknown;
201 case KEYCUDA:
202 return LangOpts.CUDA ? KS_Enabled : KS_Unknown;
203 case KEYZOS:
204 return LangOpts.ZOSExt ? KS_Enabled : KS_Unknown;
205 case KEYHLSL:
206 return LangOpts.HLSL ? KS_Enabled : KS_Unknown;
207 case KEYNOCXX:
208 // This is enabled in all non-C++ modes, but might be enabled for other
209 // reasons as well.
210 return LangOpts.CPlusPlus ? KS_Unknown : KS_Enabled;
211 case KEYNOOPENCL:
212 case KEYNOMS18:
213 case KEYNOZOS:
214 // The disable behavior for this is handled in getKeywordStatus.
215 return KS_Unknown;
216 case KEYFIXEDPOINT:
217 return LangOpts.FixedPoint ? KS_Enabled : KS_Disabled;
218 default:
219 llvm_unreachable("Unknown KeywordStatus flag");
220 }
221}
222
223/// Translates flags as specified in TokenKinds.def into keyword status
224/// in the given language standard.
225static KeywordStatus getKeywordStatus(const LangOptions &LangOpts,
226 unsigned Flags) {
227 // KEYALL means always enabled, so special case this one.
228 if (Flags == KEYALL) return KS_Enabled;
229 // These are tests that need to 'always win', as they are special in that they
230 // disable based on certain conditions.
231 if (LangOpts.OpenCL && (Flags & KEYNOOPENCL)) return KS_Disabled;
232 if (LangOpts.MSVCCompat && (Flags & KEYNOMS18) &&
234 return KS_Disabled;
235 if (LangOpts.ZOSExt && (Flags & KEYNOZOS))
236 return KS_Disabled;
237 KeywordStatus CurStatus = KS_Unknown;
238
239 while (Flags != 0) {
240 unsigned CurFlag = Flags & ~(Flags - 1);
241 Flags = Flags & ~CurFlag;
242 CurStatus = std::max(
243 CurStatus,
244 getKeywordStatusHelper(LangOpts, static_cast<TokenKey>(CurFlag)));
245 }
246
247 if (CurStatus == KS_Unknown)
248 return KS_Disabled;
249 return CurStatus;
250}
251
252static bool IsKeywordInCpp(unsigned Flags) {
253 return (Flags & (KEYCXX | KEYCXX11 | KEYCXX20 | BOOLSUPPORT | WCHARSUPPORT |
254 CHAR8SUPPORT)) != 0;
255}
256
258 StringRef Name) {
259 IdentifierInfo &II = Table.get(Name, tok::identifier);
262}
263
264/// AddKeyword - This method is used to associate a token ID with specific
265/// identifiers because they are language keywords. This causes the lexer to
266/// automatically map matching identifiers to specialized token codes.
267static void AddKeyword(StringRef Keyword,
268 tok::TokenKind TokenCode, unsigned Flags,
269 const LangOptions &LangOpts, IdentifierTable &Table) {
270 KeywordStatus AddResult = getKeywordStatus(LangOpts, Flags);
271
272 // Don't add this keyword if disabled in this language and isn't otherwise
273 // special.
274 if (AddResult == KS_Disabled) {
275 // We do not consider any identifiers to be C++ keywords when in
276 // Objective-C because @ effectively introduces a custom grammar where C++
277 // keywords can be used (and similar for selectors). We could enable this
278 // for Objective-C, but it would require more logic to ensure we do not
279 // issue compatibility diagnostics in these cases.
280 if (!LangOpts.ObjC && IsKeywordInCpp(Flags))
282 return;
283 }
284
285 IdentifierInfo &Info =
286 Table.get(Keyword, AddResult == KS_Future ? tok::identifier : TokenCode);
287 Info.setIsExtensionToken(AddResult == KS_Extension);
288 Info.setIsFutureCompatKeyword(AddResult == KS_Future);
289}
290
291/// AddCXXOperatorKeyword - Register a C++ operator keyword alternative
292/// representations.
293static void AddCXXOperatorKeyword(StringRef Keyword,
294 tok::TokenKind TokenCode,
295 IdentifierTable &Table) {
296 IdentifierInfo &Info = Table.get(Keyword, TokenCode);
298}
299
300/// AddObjCKeyword - Register an Objective-C \@keyword like "class" "selector"
301/// or "property".
302static void AddObjCKeyword(StringRef Name,
304 IdentifierTable &Table) {
305 Table.get(Name).setObjCKeywordID(ObjCID);
306}
307
308static void AddNotableIdentifier(StringRef Name,
310 IdentifierTable &Table) {
311 // Don't add 'not_notable' identifier.
312 if (BTID != tok::not_notable) {
313 IdentifierInfo &Info = Table.get(Name, tok::identifier);
314 Info.setNotableIdentifierID(BTID);
315 }
316}
317
318/// AddKeywords - Add all keywords to the symbol table.
319///
321 // Add keywords and tokens for the current language.
322#define KEYWORD(NAME, FLAGS) \
323 AddKeyword(StringRef(#NAME), tok::kw_ ## NAME, \
324 FLAGS, LangOpts, *this);
325#define ALIAS(NAME, TOK, FLAGS) \
326 AddKeyword(StringRef(NAME), tok::kw_ ## TOK, \
327 FLAGS, LangOpts, *this);
328#define CXX_KEYWORD_OPERATOR(NAME, ALIAS) \
329 if (LangOpts.CXXOperatorNames) \
330 AddCXXOperatorKeyword(StringRef(#NAME), tok::ALIAS, *this); \
331 else \
332 MarkIdentifierAsKeywordInCpp(*this, StringRef(#NAME));
333#define OBJC_AT_KEYWORD(NAME) \
334 if (LangOpts.ObjC) \
335 AddObjCKeyword(StringRef(#NAME), tok::objc_##NAME, *this);
336#define NOTABLE_IDENTIFIER(NAME) \
337 AddNotableIdentifier(StringRef(#NAME), tok::NAME, *this);
338
339#define TESTING_KEYWORD(NAME, FLAGS)
340#include "clang/Basic/TokenKinds.def"
341
342 if (LangOpts.ParseUnknownAnytype)
343 AddKeyword("__unknown_anytype", tok::kw___unknown_anytype, KEYALL,
344 LangOpts, *this);
345
346 if (LangOpts.DeclSpecKeyword)
347 AddKeyword("__declspec", tok::kw___declspec, KEYALL, LangOpts, *this);
348
349 if (LangOpts.IEEE128)
350 AddKeyword("__ieee128", tok::kw___float128, KEYALL, LangOpts, *this);
351
352 // Add the 'import' contextual keyword.
353 get("import").setModulesImport(true);
354}
355
356/// Checks if the specified token kind represents a keyword in the
357/// specified language.
358/// \returns Status of the keyword in the language.
359static KeywordStatus getTokenKwStatus(const LangOptions &LangOpts,
360 tok::TokenKind K) {
361 switch (K) {
362#define KEYWORD(NAME, FLAGS) \
363 case tok::kw_##NAME: return getKeywordStatus(LangOpts, FLAGS);
364#include "clang/Basic/TokenKinds.def"
365 default: return KS_Disabled;
366 }
367}
368
369/// Returns true if the identifier represents a keyword in the
370/// specified language.
371bool IdentifierInfo::isKeyword(const LangOptions &LangOpts) const {
372 switch (getTokenKwStatus(LangOpts, getTokenID())) {
373 case KS_Enabled:
374 case KS_Extension:
375 return true;
376 default:
377 return false;
378 }
379}
380
381/// Returns true if the identifier represents a C++ keyword in the
382/// specified language.
384 if (!LangOpts.CPlusPlus || !isKeyword(LangOpts))
385 return false;
386 // This is a C++ keyword if this identifier is not a keyword when checked
387 // using LangOptions without C++ support.
388 LangOptions LangOptsNoCPP = LangOpts;
389 LangOptsNoCPP.CPlusPlus = false;
390 LangOptsNoCPP.CPlusPlus11 = false;
391 LangOptsNoCPP.CPlusPlus20 = false;
392 return !isKeyword(LangOptsNoCPP);
393}
394
397 StringRef Name = getName();
398
399 // '_' is a reserved identifier, but its use is so common (e.g. to store
400 // ignored values) that we don't warn on it.
401 if (Name.size() <= 1)
403
404 // [lex.name] p3
405 if (Name[0] == '_') {
406
407 // Each name that begins with an underscore followed by an uppercase letter
408 // or another underscore is reserved.
409 if (Name[1] == '_')
411
412 if ('A' <= Name[1] && Name[1] <= 'Z')
415
416 // This is a bit misleading: it actually means it's only reserved if we're
417 // at global scope because it starts with an underscore.
419 }
420
421 // Each name that contains a double underscore (__) is reserved.
422 if (LangOpts.CPlusPlus && Name.contains("__"))
424
426}
427
430 StringRef Name = getName();
431
432 // Note: the diag::warn_deprecated_literal_operator_id diagnostic depends on
433 // this being the first check we do, so if this order changes, we have to fix
434 // that as well.
435 if (Name[0] != '_')
437
438 if (Name.contains("__"))
440
442}
443
445 StringRef Name = getName();
446 if (Name.size() >= 2 && Name.front() == '_' &&
447 (Name[1] == '_' || (Name[1] >= 'A' && Name[1] <= 'Z')))
448 return Name.ltrim('_');
449 return Name;
450}
451
453 // We use a perfect hash function here involving the length of the keyword,
454 // the first and third character. For preprocessor ID's there are no
455 // collisions (if there were, the switch below would complain about duplicate
456 // case values). Note that this depends on 'if' being null terminated.
457
458#define HASH(LEN, FIRST, THIRD) \
459 (LEN << 6) + (((FIRST - 'a') - (THIRD - 'a')) & 63)
460#define CASE(LEN, FIRST, THIRD, NAME) \
461 case HASH(LEN, FIRST, THIRD): \
462 return memcmp(Name, #NAME, LEN) ? tok::pp_not_keyword : tok::pp_ ## NAME
463
464 unsigned Len = getLength();
465 if (Len < 2) return tok::pp_not_keyword;
466 const char *Name = getNameStart();
467 switch (HASH(Len, Name[0], Name[2])) {
468 default: return tok::pp_not_keyword;
469 CASE( 2, 'i', '\0', if);
470 CASE( 4, 'e', 'i', elif);
471 CASE( 4, 'e', 's', else);
472 CASE( 4, 'l', 'n', line);
473 CASE( 4, 's', 'c', sccs);
474 CASE( 5, 'e', 'b', embed);
475 CASE( 5, 'e', 'd', endif);
476 CASE( 5, 'e', 'r', error);
477 CASE( 5, 'i', 'e', ident);
478 CASE( 5, 'i', 'd', ifdef);
479 CASE( 5, 'u', 'd', undef);
480
481 CASE( 6, 'a', 's', assert);
482 CASE( 6, 'd', 'f', define);
483 CASE( 6, 'i', 'n', ifndef);
484 CASE( 6, 'i', 'p', import);
485 CASE( 6, 'p', 'a', pragma);
486
487 CASE( 7, 'd', 'f', defined);
488 CASE( 7, 'e', 'i', elifdef);
489 CASE( 7, 'i', 'c', include);
490 CASE( 7, 'w', 'r', warning);
491
492 CASE( 8, 'e', 'i', elifndef);
493 CASE( 8, 'u', 'a', unassert);
494 CASE(12, 'i', 'c', include_next);
495
496 CASE(14, '_', 'p', __public_macro);
497
498 CASE(15, '_', 'p', __private_macro);
499
500 CASE(16, '_', 'i', __include_macros);
501#undef CASE
502#undef HASH
503 }
504}
505
506//===----------------------------------------------------------------------===//
507// Stats Implementation
508//===----------------------------------------------------------------------===//
509
510/// PrintStats - Print statistics about how well the identifier table is doing
511/// at hashing identifiers.
513 unsigned NumBuckets = HashTable.getNumBuckets();
514 unsigned NumIdentifiers = HashTable.getNumItems();
515 unsigned NumEmptyBuckets = NumBuckets-NumIdentifiers;
516 unsigned AverageIdentifierSize = 0;
517 unsigned MaxIdentifierLength = 0;
518
519 // TODO: Figure out maximum times an identifier had to probe for -stats.
520 for (llvm::StringMap<IdentifierInfo*, llvm::BumpPtrAllocator>::const_iterator
521 I = HashTable.begin(), E = HashTable.end(); I != E; ++I) {
522 unsigned IdLen = I->getKeyLength();
523 AverageIdentifierSize += IdLen;
524 if (MaxIdentifierLength < IdLen)
525 MaxIdentifierLength = IdLen;
526 }
527
528 fprintf(stderr, "\n*** Identifier Table Stats:\n");
529 fprintf(stderr, "# Identifiers: %d\n", NumIdentifiers);
530 fprintf(stderr, "# Empty Buckets: %d\n", NumEmptyBuckets);
531 fprintf(stderr, "Hash density (#identifiers per bucket): %f\n",
532 NumIdentifiers/(double)NumBuckets);
533 fprintf(stderr, "Ave identifier length: %f\n",
534 (AverageIdentifierSize/(double)NumIdentifiers));
535 fprintf(stderr, "Max identifier length: %d\n", MaxIdentifierLength);
536
537 // Compute statistics about the memory allocated for identifiers.
538 HashTable.getAllocator().PrintStats();
539}
540
541//===----------------------------------------------------------------------===//
542// SelectorTable Implementation
543//===----------------------------------------------------------------------===//
544
545unsigned llvm::DenseMapInfo<clang::Selector>::getHashValue(clang::Selector S) {
546 return DenseMapInfo<void*>::getHashValue(S.getAsOpaquePtr());
547}
548
550 assert(!Names.empty() && "must have >= 1 selector slots");
551 if (getNumArgs() != Names.size())
552 return false;
553 for (unsigned I = 0, E = Names.size(); I != E; ++I) {
554 if (getNameForSlot(I) != Names[I])
555 return false;
556 }
557 return true;
558}
559
560bool Selector::isUnarySelector(StringRef Name) const {
561 return isUnarySelector() && getNameForSlot(0) == Name;
562}
563
564unsigned Selector::getNumArgs() const {
565 unsigned IIF = getIdentifierInfoFlag();
566 if (IIF <= ZeroArg)
567 return 0;
568 if (IIF == OneArg)
569 return 1;
570 // We point to a MultiKeywordSelector.
571 MultiKeywordSelector *SI = getMultiKeywordSelector();
572 return SI->getNumArgs();
573}
574
575const IdentifierInfo *
576Selector::getIdentifierInfoForSlot(unsigned argIndex) const {
577 if (getIdentifierInfoFlag() < MultiArg) {
578 assert(argIndex == 0 && "illegal keyword index");
579 return getAsIdentifierInfo();
580 }
581
582 // We point to a MultiKeywordSelector.
583 MultiKeywordSelector *SI = getMultiKeywordSelector();
584 return SI->getIdentifierInfoForSlot(argIndex);
585}
586
587StringRef Selector::getNameForSlot(unsigned int argIndex) const {
588 const IdentifierInfo *II = getIdentifierInfoForSlot(argIndex);
589 return II ? II->getName() : StringRef();
590}
591
592std::string MultiKeywordSelector::getName() const {
594 llvm::raw_svector_ostream OS(Str);
595 for (keyword_iterator I = keyword_begin(), E = keyword_end(); I != E; ++I) {
596 if (*I)
597 OS << (*I)->getName();
598 OS << ':';
599 }
600
601 return std::string(OS.str());
602}
603
604std::string Selector::getAsString() const {
605 if (isNull())
606 return "<null selector>";
607
608 if (getIdentifierInfoFlag() < MultiArg) {
609 const IdentifierInfo *II = getAsIdentifierInfo();
610
611 if (getNumArgs() == 0) {
612 assert(II && "If the number of arguments is 0 then II is guaranteed to "
613 "not be null.");
614 return std::string(II->getName());
615 }
616
617 if (!II)
618 return ":";
619
620 return II->getName().str() + ":";
621 }
622
623 // We have a multiple keyword selector.
624 return getMultiKeywordSelector()->getName();
625}
626
627void Selector::print(llvm::raw_ostream &OS) const {
628 OS << getAsString();
629}
630
631LLVM_DUMP_METHOD void Selector::dump() const { print(llvm::errs()); }
632
633/// Interpreting the given string using the normal CamelCase
634/// conventions, determine whether the given string starts with the
635/// given "word", which is assumed to end in a lowercase letter.
636static bool startsWithWord(StringRef name, StringRef word) {
637 if (name.size() < word.size()) return false;
638 return ((name.size() == word.size() || !isLowercase(name[word.size()])) &&
639 name.starts_with(word));
640}
641
642ObjCMethodFamily Selector::getMethodFamilyImpl(Selector sel) {
643 const IdentifierInfo *first = sel.getIdentifierInfoForSlot(0);
644 if (!first) return OMF_None;
645
646 StringRef name = first->getName();
647 if (sel.isUnarySelector()) {
648 if (name == "autorelease") return OMF_autorelease;
649 if (name == "dealloc") return OMF_dealloc;
650 if (name == "finalize") return OMF_finalize;
651 if (name == "release") return OMF_release;
652 if (name == "retain") return OMF_retain;
653 if (name == "retainCount") return OMF_retainCount;
654 if (name == "self") return OMF_self;
655 if (name == "initialize") return OMF_initialize;
656 }
657
658 if (name == "performSelector" || name == "performSelectorInBackground" ||
659 name == "performSelectorOnMainThread")
660 return OMF_performSelector;
661
662 // The other method families may begin with a prefix of underscores.
663 name = name.ltrim('_');
664
665 if (name.empty()) return OMF_None;
666 switch (name.front()) {
667 case 'a':
668 if (startsWithWord(name, "alloc")) return OMF_alloc;
669 break;
670 case 'c':
671 if (startsWithWord(name, "copy")) return OMF_copy;
672 break;
673 case 'i':
674 if (startsWithWord(name, "init")) return OMF_init;
675 break;
676 case 'm':
677 if (startsWithWord(name, "mutableCopy")) return OMF_mutableCopy;
678 break;
679 case 'n':
680 if (startsWithWord(name, "new")) return OMF_new;
681 break;
682 default:
683 break;
684 }
685
686 return OMF_None;
687}
688
690 const IdentifierInfo *first = sel.getIdentifierInfoForSlot(0);
691 if (!first) return OIT_None;
692
693 StringRef name = first->getName();
694
695 if (name.empty()) return OIT_None;
696 switch (name.front()) {
697 case 'a':
698 if (startsWithWord(name, "array")) return OIT_Array;
699 break;
700 case 'd':
701 if (startsWithWord(name, "default")) return OIT_ReturnsSelf;
702 if (startsWithWord(name, "dictionary")) return OIT_Dictionary;
703 break;
704 case 's':
705 if (startsWithWord(name, "shared")) return OIT_ReturnsSelf;
706 if (startsWithWord(name, "standard")) return OIT_Singleton;
707 break;
708 case 'i':
709 if (startsWithWord(name, "init")) return OIT_Init;
710 break;
711 default:
712 break;
713 }
714 return OIT_None;
715}
716
717ObjCStringFormatFamily Selector::getStringFormatFamilyImpl(Selector sel) {
718 const IdentifierInfo *first = sel.getIdentifierInfoForSlot(0);
719 if (!first) return SFF_None;
720
721 StringRef name = first->getName();
722
723 switch (name.front()) {
724 case 'a':
725 if (name == "appendFormat") return SFF_NSString;
726 break;
727
728 case 'i':
729 if (name == "initWithFormat") return SFF_NSString;
730 break;
731
732 case 'l':
733 if (name == "localizedStringWithFormat") return SFF_NSString;
734 break;
735
736 case 's':
737 if (name == "stringByAppendingFormat" ||
738 name == "stringWithFormat") return SFF_NSString;
739 break;
740 }
741 return SFF_None;
742}
743
744namespace {
745
746struct SelectorTableImpl {
747 llvm::FoldingSet<MultiKeywordSelector> Table;
748 llvm::BumpPtrAllocator Allocator;
749};
750
751} // namespace
752
753static SelectorTableImpl &getSelectorTableImpl(void *P) {
754 return *static_cast<SelectorTableImpl*>(P);
755}
756
759 SmallString<64> SetterName("set");
760 SetterName += Name;
761 SetterName[3] = toUppercase(SetterName[3]);
762 return SetterName;
763}
764
767 SelectorTable &SelTable,
768 const IdentifierInfo *Name) {
769 IdentifierInfo *SetterName =
770 &Idents.get(constructSetterName(Name->getName()));
771 return SelTable.getUnarySelector(SetterName);
772}
773
775 StringRef Name = Sel.getNameForSlot(0);
776 assert(Name.starts_with("set") && "invalid setter name");
777 return (Twine(toLowercase(Name[3])) + Name.drop_front(4)).str();
778}
779
781 SelectorTableImpl &SelTabImpl = getSelectorTableImpl(Impl);
782 return SelTabImpl.Allocator.getTotalMemory();
783}
784
786 const IdentifierInfo **IIV) {
787 if (nKeys < 2)
788 return Selector(IIV[0], nKeys);
789
790 SelectorTableImpl &SelTabImpl = getSelectorTableImpl(Impl);
791
792 // Unique selector, to guarantee there is one per name.
793 llvm::FoldingSetNodeID ID;
794 MultiKeywordSelector::Profile(ID, IIV, nKeys);
795
796 void *InsertPos = nullptr;
797 if (MultiKeywordSelector *SI =
798 SelTabImpl.Table.FindNodeOrInsertPos(ID, InsertPos))
799 return Selector(SI);
800
801 // MultiKeywordSelector objects are not allocated with new because they have a
802 // variable size array (for parameter types) at the end of them.
803 unsigned Size = sizeof(MultiKeywordSelector) + nKeys*sizeof(IdentifierInfo *);
805 (MultiKeywordSelector *)SelTabImpl.Allocator.Allocate(
806 Size, alignof(MultiKeywordSelector));
807 new (SI) MultiKeywordSelector(nKeys, IIV);
808 SelTabImpl.Table.InsertNode(SI, InsertPos);
809 return Selector(SI);
810}
811
813 Impl = new SelectorTableImpl();
814}
815
817 delete &getSelectorTableImpl(Impl);
818}
819
821 switch (Operator) {
822 case OO_None:
824 return nullptr;
825
826#define OVERLOADED_OPERATOR(Name,Spelling,Token,Unary,Binary,MemberOnly) \
827 case OO_##Name: return Spelling;
828#include "clang/Basic/OperatorKinds.def"
829 }
830
831 llvm_unreachable("Invalid OverloadedOperatorKind!");
832}
833
835 bool isContextSensitive) {
836 switch (kind) {
838 return isContextSensitive ? "nonnull" : "_Nonnull";
839
841 return isContextSensitive ? "nullable" : "_Nullable";
842
844 assert(!isContextSensitive &&
845 "_Nullable_result isn't supported as context-sensitive keyword");
846 return "_Nullable_result";
847
849 return isContextSensitive ? "null_unspecified" : "_Null_unspecified";
850 }
851 llvm_unreachable("Unknown nullability kind.");
852}
853
854llvm::raw_ostream &clang::operator<<(llvm::raw_ostream &OS,
855 NullabilityKind NK) {
856 switch (NK) {
858 return OS << "NonNull";
860 return OS << "Nullable";
862 return OS << "NullableResult";
864 return OS << "Unspecified";
865 }
866 llvm_unreachable("Unknown nullability kind.");
867}
868
871 const LangOptions &LangOpts) {
872 assert(II.isFutureCompatKeyword() && "diagnostic should not be needed");
873
874 unsigned Flags = llvm::StringSwitch<unsigned>(II.getName())
875#define KEYWORD(NAME, FLAGS) .Case(#NAME, FLAGS)
876#include "clang/Basic/TokenKinds.def"
877#undef KEYWORD
878 ;
879
880 if (LangOpts.CPlusPlus) {
881 if ((Flags & KEYCXX11) == KEYCXX11)
882 return diag::warn_cxx11_keyword;
883
884 // char8_t is not modeled as a CXX20_KEYWORD because it's not
885 // unconditionally enabled in C++20 mode. (It can be disabled
886 // by -fno-char8_t.)
887 if (((Flags & KEYCXX20) == KEYCXX20) ||
888 ((Flags & CHAR8SUPPORT) == CHAR8SUPPORT))
889 return diag::warn_cxx20_keyword;
890 } else {
891 if ((Flags & KEYC99) == KEYC99)
892 return diag::warn_c99_keyword;
893 if ((Flags & KEYC23) == KEYC23)
894 return diag::warn_c23_keyword;
895 }
896
897 llvm_unreachable(
898 "Keyword not known to come from a newer Standard or proposed Standard");
899}
StringRef P
Expr * E
static void AddObjCKeyword(StringRef Name, tok::ObjCKeywordKind ObjCID, IdentifierTable &Table)
AddObjCKeyword - Register an Objective-C @keyword like "class" "selector" or "property".
static bool IsKeywordInCpp(unsigned Flags)
static void AddCXXOperatorKeyword(StringRef Keyword, tok::TokenKind TokenCode, IdentifierTable &Table)
AddCXXOperatorKeyword - Register a C++ operator keyword alternative representations.
static void AddNotableIdentifier(StringRef Name, tok::NotableIdentifierKind BTID, IdentifierTable &Table)
static void MarkIdentifierAsKeywordInCpp(IdentifierTable &Table, StringRef Name)
static KeywordStatus getKeywordStatusHelper(const LangOptions &LangOpts, TokenKey Flag)
static KeywordStatus getTokenKwStatus(const LangOptions &LangOpts, tok::TokenKind K)
Checks if the specified token kind represents a keyword in the specified language.
static bool startsWithWord(StringRef name, StringRef word)
Interpreting the given string using the normal CamelCase conventions, determine whether the given str...
#define CASE(LEN, FIRST, THIRD, NAME)
static void AddKeyword(StringRef Keyword, tok::TokenKind TokenCode, unsigned Flags, const LangOptions &LangOpts, IdentifierTable &Table)
AddKeyword - This method is used to associate a token ID with specific identifiers because they are l...
static SelectorTableImpl & getSelectorTableImpl(void *P)
static KeywordStatus getKeywordStatus(const LangOptions &LangOpts, unsigned Flags)
Translates flags as specified in TokenKinds.def into keyword status in the given language standard.
#define HASH(LEN, FIRST, THIRD)
Defines the clang::IdentifierInfo, clang::IdentifierTable, and clang::Selector interfaces.
Defines the clang::LangOptions interface.
Defines an enumeration for C++ overloaded operators.
Defines various enumerations that describe declaration and type specifiers.
Enumerates target-specific builtins in their own namespaces within namespace clang.
Defines the clang::TokenKind enum and support functions.
Provides lookups to, and iteration over, IdentiferInfo objects.
virtual IdentifierIterator * getIdentifiers()
Retrieve an iterator into the set of all identifiers known to this identifier lookup source.
One of these records is kept for each identifier that is lexed.
bool isCPlusPlusKeyword(const LangOptions &LangOpts) const
Return true if this token is a C++ keyword in the specified language.
unsigned getLength() const
Efficiently return the length of this identifier info.
void setModulesImport(bool I)
Set whether this identifier is the contextual keyword import.
void setNotableIdentifierID(unsigned ID)
void setIsExtensionToken(bool Val)
tok::PPKeywordKind getPPKeywordID() const
Return the preprocessor keyword ID for this identifier.
tok::TokenKind getTokenID() const
If this is a source-language token (e.g.
void setObjCKeywordID(tok::ObjCKeywordKind ID)
void setHandleIdentifierCase(bool Val=true)
void setIsKeywordInCPlusPlus(bool Val=true)
const char * getNameStart() const
Return the beginning of the actual null-terminated string for this identifier.
bool isKeyword(const LangOptions &LangOpts) const
Return true if this token is a keyword in the specified language.
ReservedIdentifierStatus isReserved(const LangOptions &LangOpts) const
Determine whether this is a name reserved for the implementation (C99 7.1.3, C++ [lib....
void setIsCPlusPlusOperatorKeyword(bool Val=true)
isCPlusPlusOperatorKeyword/setIsCPlusPlusOperatorKeyword controls whether this identifier is a C++ al...
ReservedLiteralSuffixIdStatus isReservedLiteralSuffixId() const
Determine whether this is a name reserved for future standardization or the implementation (C++ [usrl...
void setIsFutureCompatKeyword(bool Val)
StringRef deuglifiedName() const
If the identifier is an "uglified" reserved name, return a cleaned form.
StringRef getName() const
Return the actual identifier string.
bool isFutureCompatKeyword() const
is/setIsFutureCompatKeyword - Initialize information about whether or not this language token is a ke...
An iterator that walks over all of the known identifiers in the lookup table.
virtual StringRef Next()=0
Retrieve the next string in the identifier table and advances the iterator for the following string.
Implements an efficient mapping from strings to IdentifierInfo nodes.
IdentifierTable(IdentifierInfoLookup *ExternalLookup=nullptr)
Create the identifier table.
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
void PrintStats() const
Print some statistics to stderr that indicate how well the hashing is doing.
void AddKeywords(const LangOptions &LangOpts)
Populate the identifier table with info about the language keywords for the language specified by Lan...
diag::kind getFutureCompatDiagKind(const IdentifierInfo &II, const LangOptions &LangOpts)
Returns the correct diagnostic to issue for a future-compat diagnostic warning.
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:434
bool isCompatibleWithMSVC(MSVCMajorVersion MajorVersion) const
Definition: LangOptions.h:619
bool isSYCL() const
Definition: LangOptions.h:702
One of these variable length records is kept for each selector containing more than one keyword.
keyword_iterator keyword_end() const
const IdentifierInfo *const * keyword_iterator
static void Profile(llvm::FoldingSetNodeID &ID, keyword_iterator ArgTys, unsigned NumArgs)
keyword_iterator keyword_begin() const
const IdentifierInfo * getIdentifierInfoForSlot(unsigned i) const
This table allows us to fully hide how we implement multi-keyword caching.
static std::string getPropertyNameFromSetterSelector(Selector Sel)
Return the property name for the given setter selector.
static Selector constructSetterSelector(IdentifierTable &Idents, SelectorTable &SelTable, const IdentifierInfo *Name)
Return the default setter selector for the given identifier.
size_t getTotalMemory() const
Return the total amount of memory allocated for managing selectors.
Selector getSelector(unsigned NumArgs, const IdentifierInfo **IIV)
Can create any sort of selector.
Selector getUnarySelector(const IdentifierInfo *ID)
static SmallString< 64 > constructSetterName(StringRef Name)
Return the default setter name for the given identifier.
Smart pointer class that efficiently represents Objective-C method names.
StringRef getNameForSlot(unsigned argIndex) const
Retrieve the name at a given position in the selector.
const IdentifierInfo * getIdentifierInfoForSlot(unsigned argIndex) const
Retrieve the identifier at a given position in the selector.
std::string getAsString() const
Derive the full selector name (e.g.
void print(llvm::raw_ostream &OS) const
Prints the full selector name (e.g. "foo:bar:").
bool isKeywordSelector() const
static ObjCInstanceTypeFamily getInstTypeMethodFamily(Selector sel)
bool isUnarySelector() const
bool isNull() const
Determine whether this is the empty selector.
unsigned getNumArgs() const
unsigned getNumArgs() const
Return the number of arguments in an ObjC selector.
unsigned kind
All of the diagnostics that can be emitted by the frontend.
Definition: DiagnosticIDs.h:76
NotableIdentifierKind
Provides a namespace for notable identifers such as float_t and double_t.
Definition: TokenKinds.h:49
ObjCKeywordKind
Provides a namespace for Objective-C keywords which start with an '@'.
Definition: TokenKinds.h:41
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:25
PPKeywordKind
Provides a namespace for preprocessor keywords which start with a '#' at the beginning of the line.
Definition: TokenKinds.h:33
RangeSelector name(std::string ID)
Given a node with a "name", (like NamedDecl, DeclRefExpr, CxxCtorInitializer, and TypeLoc) selects th...
The JSON file list parser is used to communicate input to InstallAPI.
OverloadedOperatorKind
Enumeration specifying the different kinds of C++ overloaded operators.
Definition: OperatorKinds.h:21
@ OO_None
Not an overloaded operator.
Definition: OperatorKinds.h:22
@ NUM_OVERLOADED_OPERATORS
Definition: OperatorKinds.h:26
ObjCStringFormatFamily
NullabilityKind
Describes the nullability of a particular type.
Definition: Specifiers.h:348
@ Nullable
Values of this type can be null.
@ Unspecified
Whether values of this type can be null is (explicitly) unspecified.
@ NonNull
Values of this type can never be null.
LLVM_READONLY char toLowercase(char c)
Converts the given ASCII character to its lowercase equivalent.
Definition: CharInfo.h:224
ObjCMethodFamily
A family of Objective-C methods.
@ OMF_initialize
@ OMF_autorelease
@ OMF_mutableCopy
@ OMF_performSelector
@ OMF_None
No particular method family.
@ OMF_retainCount
const StreamingDiagnostic & operator<<(const StreamingDiagnostic &DB, const ASTContext::SectionInfo &Section)
Insertion operator for diagnostics.
llvm::StringRef getNullabilitySpelling(NullabilityKind kind, bool isContextSensitive=false)
Retrieve the spelling of the given nullability kind.
ObjCInstanceTypeFamily
A family of Objective-C methods.
@ OIT_Dictionary
@ OIT_ReturnsSelf
ReservedLiteralSuffixIdStatus
LLVM_READONLY bool isLowercase(unsigned char c)
Return true if this character is a lowercase ASCII letter: [a-z].
Definition: CharInfo.h:120
LLVM_READONLY char toUppercase(char c)
Converts the given ASCII character to its uppercase equivalent.
Definition: CharInfo.h:233
@ Keyword
The name has been typo-corrected to a keyword.
static constexpr int InterestingIdentifierBits
static constexpr uint64_t LargestBuiltinID
const char * getOperatorSpelling(OverloadedOperatorKind Operator)
Retrieve the spelling of the given overloaded operator, without the preceding "operator" keyword.
ReservedIdentifierStatus