|
495 | 495 | \indextext{token!preprocessing|(}%
|
496 | 496 | \begin{bnf}
|
497 | 497 | \nontermdef{preprocessing-token}\br
|
498 |
| - header-name\br |
499 | 498 | import-keyword\br
|
500 | 499 | module-keyword\br
|
501 | 500 | export-keyword\br
|
502 |
| - identifier\br |
| 501 | + header-name\br |
503 | 502 | pp-number\br
|
| 503 | + preprocessing-op-or-punc\br |
| 504 | + identifier\br |
504 | 505 | character-literal\br
|
505 | 506 | user-defined-character-literal\br
|
506 | 507 | string-literal\br
|
507 | 508 | user-defined-string-literal\br
|
508 |
| - preprocessing-op-or-punc\br |
509 | 509 | \textnormal{each non-whitespace character that cannot be one of the above}
|
510 | 510 | \end{bnf}
|
511 | 511 |
|
512 |
| -\pnum |
513 |
| -Each preprocessing token that is converted to a token\iref{lex.token} |
514 |
| -shall have the lexical form of a keyword, an identifier, a literal, |
515 |
| -or an operator or punctuator. |
516 |
| - |
517 | 512 | \pnum
|
518 | 513 | A preprocessing token is the minimal lexical element of the language in translation
|
519 | 514 | phases 3 through 6.
|
|
548 | 543 | between the quotation characters in a character literal or
|
549 | 544 | string literal.
|
550 | 545 |
|
| 546 | +\pnum |
| 547 | +Each preprocessing token that is converted to a token\iref{lex.token} |
| 548 | +shall have the lexical form of a keyword, an identifier, a literal, |
| 549 | +or an operator or punctuator. |
| 550 | + |
| 551 | +\pnum |
| 552 | +The \grammarterm{import-keyword} is produced |
| 553 | +by processing an \keyword{import} directive\iref{cpp.import}, |
| 554 | +the \grammarterm{module-keyword} is produced |
| 555 | +by preprocessing a \keyword{module} directive\iref{cpp.module}, and |
| 556 | +the \grammarterm{export-keyword} is produced |
| 557 | +by preprocessing either of the previous two directives. |
| 558 | +\begin{note} |
| 559 | +None has any observable spelling. |
| 560 | +\end{note} |
| 561 | + |
551 | 562 | \pnum
|
552 | 563 | If the input stream has been parsed into preprocessing tokens up to a
|
553 | 564 | given character:
|
|
587 | 598 | \end{itemize}
|
588 | 599 | \end{itemize}
|
589 | 600 |
|
| 601 | +\pnum |
590 | 602 | \begin{example}
|
591 | 603 | \begin{codeblock}
|
592 | 604 | #define R "x"
|
593 | 605 | const char* s = R"y"; // ill-formed raw string, not \tcode{"x" "y"}
|
594 | 606 | \end{codeblock}
|
595 | 607 | \end{example}
|
596 | 608 |
|
597 |
| -\pnum |
598 |
| -The \grammarterm{import-keyword} is produced |
599 |
| -by preprocessing an \keyword{import} directive\iref{cpp.import}, |
600 |
| -the \grammarterm{module-keyword} is produced |
601 |
| -by preprocessing a \keyword{module} directive\iref{cpp.module}, and |
602 |
| -the \grammarterm{export-keyword} is produced |
603 |
| -by preprocessing either of the previous two directives. |
604 |
| -\begin{note} |
605 |
| -None has any observable spelling. |
606 |
| -\end{note} |
607 |
| - |
608 | 609 | \pnum
|
609 | 610 | \begin{example}
|
610 | 611 | The program fragment \tcode{0xe+foo} is parsed as a
|
|
627 | 628 | \end{example}
|
628 | 629 | \indextext{token!preprocessing|)}
|
629 | 630 |
|
630 |
| -\rSec1[lex.digraph]{Alternative tokens} |
631 |
| - |
632 |
| -\pnum |
633 |
| -\indextext{token!alternative|(}% |
634 |
| -Alternative token representations are provided for some operators and |
635 |
| -punctuators. |
636 |
| -\begin{footnote} |
637 |
| -\indextext{digraph}% |
638 |
| -These include ``digraphs'' and additional reserved words. The term |
639 |
| -``digraph'' (token consisting of two characters) is not perfectly |
640 |
| -descriptive, since one of the alternative \grammarterm{preprocessing-token}s is |
641 |
| -\tcode{\%:\%:} and of course several primary tokens contain two |
642 |
| -characters. Nonetheless, those alternative tokens that aren't lexical |
643 |
| -keywords are colloquially known as ``digraphs''. |
644 |
| -\end{footnote} |
645 |
| - |
646 |
| -\pnum |
647 |
| -In all respects of the language, each alternative token behaves the |
648 |
| -same, respectively, as its primary token, except for its spelling. |
649 |
| -\begin{footnote} |
650 |
| -Thus the ``stringized'' values\iref{cpp.stringize} of |
651 |
| -\tcode{[} and \tcode{<:} will be different, maintaining the source |
652 |
| -spelling, but the tokens can otherwise be freely interchanged. |
653 |
| -\end{footnote} |
654 |
| -The set of alternative tokens is defined in |
655 |
| -\tref{lex.digraph}. |
656 |
| - |
657 |
| -\begin{tokentable}{Alternative tokens}{lex.digraph}{Alternative}{Primary} |
658 |
| -\tcode{<\%} & \tcode{\{} & |
659 |
| -\keyword{and} & \tcode{\&\&} & |
660 |
| -\keyword{and_eq} & \tcode{\&=} \\ \rowsep |
661 |
| -\tcode{\%>} & \tcode{\}} & |
662 |
| -\keyword{bitor} & \tcode{|} & |
663 |
| -\keyword{or_eq} & \tcode{|=} \\ \rowsep |
664 |
| -\tcode{<:} & \tcode{[} & |
665 |
| -\keyword{or} & \tcode{||} & |
666 |
| -\keyword{xor_eq} & \tcode{\caret=} \\ \rowsep |
667 |
| -\tcode{:>} & \tcode{]} & |
668 |
| -\keyword{xor} & \tcode{\caret} & |
669 |
| -\keyword{not} & \tcode{!} \\ \rowsep |
670 |
| -\tcode{\%:} & \tcode{\#} & |
671 |
| -\keyword{compl} & \tcode{\~} & |
672 |
| -\keyword{not_eq} & \tcode{!=} \\ \rowsep |
673 |
| -\tcode{\%:\%:} & \tcode{\#\#} & |
674 |
| -\keyword{bitand} & \tcode{\&} & |
675 |
| - & \\ |
676 |
| -\end{tokentable}% |
677 |
| -\indextext{token!alternative|)} |
678 |
| - |
679 |
| -\rSec1[lex.token]{Tokens} |
680 |
| - |
681 |
| -\indextext{token|(}% |
682 |
| -\begin{bnf} |
683 |
| -\nontermdef{token}\br |
684 |
| - identifier\br |
685 |
| - keyword\br |
686 |
| - literal\br |
687 |
| - operator-or-punctuator |
688 |
| -\end{bnf} |
689 |
| - |
690 |
| -\pnum |
691 |
| -\indextext{\idxgram{token}}% |
692 |
| -There are five kinds of tokens: identifiers, keywords, literals,% |
693 |
| -\begin{footnote} |
694 |
| -Literals include strings and character and numeric literals. |
695 |
| -\end{footnote} |
696 |
| -operators, and other separators. |
697 |
| -\indextext{whitespace}% |
698 |
| -Blanks, horizontal and vertical tabs, newlines, formfeeds, and comments |
699 |
| -(collectively, ``whitespace''), as described below, are ignored except |
700 |
| -as they serve to separate tokens. |
701 |
| -\begin{note} |
702 |
| -Whitespace can separate otherwise adjacent identifiers, keywords, numeric |
703 |
| -literals, and alternative tokens containing alphabetic characters. |
704 |
| -\end{note} |
705 |
| -\indextext{token|)} |
706 |
| - |
707 | 631 | \rSec1[lex.header]{Header names}
|
708 | 632 |
|
709 | 633 | \indextext{header!name|(}%
|
|
793 | 717 | a \grammarterm{floating-point-literal} token.%
|
794 | 718 | \indextext{number!preprocessing|)}
|
795 | 719 |
|
| 720 | +\rSec1[lex.operators]{Operators and punctuators} |
| 721 | + |
| 722 | +\pnum |
| 723 | +\indextext{operator|(}% |
| 724 | +\indextext{punctuator|(}% |
| 725 | +The lexical representation of \Cpp{} programs includes a number of |
| 726 | +preprocessing tokens that are used in the syntax of the preprocessor or |
| 727 | +are converted into tokens for operators and punctuators: |
| 728 | + |
| 729 | +\begin{bnf} |
| 730 | +\nontermdef{preprocessing-op-or-punc}\br |
| 731 | + preprocessing-operator\br |
| 732 | + operator-or-punctuator |
| 733 | +\end{bnf} |
| 734 | + |
| 735 | +\begin{bnf} |
| 736 | +%% Ed. note: character protrusion would misalign various operators. |
| 737 | +\microtypesetup{protrusion=false}\obeyspaces |
| 738 | +\nontermdef{preprocessing-operator} \textnormal{one of}\br |
| 739 | + \terminal{\# \#\# \%: \%:\%:} |
| 740 | +\end{bnf} |
| 741 | + |
| 742 | +\begin{bnf} |
| 743 | +\microtypesetup{protrusion=false}\obeyspaces |
| 744 | +\nontermdef{operator-or-punctuator} \textnormal{one of}\br |
| 745 | + \terminal{\{ \} [ ] ( )}\br |
| 746 | + \terminal{<: :> <\% \%> ; : ...}\br |
| 747 | + \terminal{? :: . .* -> ->* \~}\br |
| 748 | + \terminal{! + - * / \% \caret{} \& |}\br |
| 749 | + \terminal{= += -= *= /= \%= \caret{}= \&= |=}\br |
| 750 | + \terminal{== != < > <= >= <=> \&\& ||}\br |
| 751 | + \terminal{<< >> <<= >>= ++ -- ,}\br |
| 752 | + \terminal{\keyword{and} \keyword{or} \keyword{xor} \keyword{not} \keyword{bitand} \keyword{bitor} \keyword{compl}}\br |
| 753 | + \terminal{\keyword{and_eq} \keyword{or_eq} \keyword{xor_eq} \keyword{not_eq}} |
| 754 | +\end{bnf} |
| 755 | + |
| 756 | +Each \grammarterm{operator-or-punctuator} is converted to a single token |
| 757 | +in translation phase 7\iref{lex.phases}.% |
| 758 | +\indextext{punctuator|)}% |
| 759 | +\indextext{operator|)} |
| 760 | + |
| 761 | +\rSec1[lex.digraph]{Alternative tokens} |
| 762 | + |
| 763 | +\pnum |
| 764 | +\indextext{token!alternative|(}% |
| 765 | +Alternative token representations are provided for some operators and |
| 766 | +punctuators. |
| 767 | +\begin{footnote} |
| 768 | +\indextext{digraph}% |
| 769 | +These include ``digraphs'' and additional reserved words. The term |
| 770 | +``digraph'' (token consisting of two characters) is not perfectly |
| 771 | +descriptive, since one of the alternative \grammarterm{preprocessing-token}s is |
| 772 | +\tcode{\%:\%:} and of course several primary tokens contain two |
| 773 | +characters. Nonetheless, those alternative tokens that aren't lexical |
| 774 | +keywords are colloquially known as ``digraphs''. |
| 775 | +\end{footnote} |
| 776 | + |
| 777 | +\pnum |
| 778 | +In all respects of the language, each alternative token behaves the |
| 779 | +same, respectively, as its primary token, except for its spelling. |
| 780 | +\begin{footnote} |
| 781 | +Thus the ``stringized'' values\iref{cpp.stringize} of |
| 782 | +\tcode{[} and \tcode{<:} will be different, maintaining the source |
| 783 | +spelling, but the tokens can otherwise be freely interchanged. |
| 784 | +\end{footnote} |
| 785 | +The set of alternative tokens is defined in |
| 786 | +\tref{lex.digraph}. |
| 787 | + |
| 788 | +\begin{tokentable}{Alternative tokens}{lex.digraph}{Alternative}{Primary} |
| 789 | +\tcode{<\%} & \tcode{\{} & |
| 790 | +\keyword{and} & \tcode{\&\&} & |
| 791 | +\keyword{and_eq} & \tcode{\&=} \\ \rowsep |
| 792 | +\tcode{\%>} & \tcode{\}} & |
| 793 | +\keyword{bitor} & \tcode{|} & |
| 794 | +\keyword{or_eq} & \tcode{|=} \\ \rowsep |
| 795 | +\tcode{<:} & \tcode{[} & |
| 796 | +\keyword{or} & \tcode{||} & |
| 797 | +\keyword{xor_eq} & \tcode{\caret=} \\ \rowsep |
| 798 | +\tcode{:>} & \tcode{]} & |
| 799 | +\keyword{xor} & \tcode{\caret} & |
| 800 | +\keyword{not} & \tcode{!} \\ \rowsep |
| 801 | +\tcode{\%:} & \tcode{\#} & |
| 802 | +\keyword{compl} & \tcode{\~} & |
| 803 | +\keyword{not_eq} & \tcode{!=} \\ \rowsep |
| 804 | +\tcode{\%:\%:} & \tcode{\#\#} & |
| 805 | +\keyword{bitand} & \tcode{\&} & |
| 806 | + & \\ |
| 807 | +\end{tokentable}% |
| 808 | +\indextext{token!alternative|)} |
| 809 | + |
| 810 | +\rSec1[lex.token]{Tokens} |
| 811 | + |
| 812 | +\indextext{token|(}% |
| 813 | +\begin{bnf} |
| 814 | +\nontermdef{token}\br |
| 815 | + identifier\br |
| 816 | + keyword\br |
| 817 | + literal\br |
| 818 | + operator-or-punctuator |
| 819 | +\end{bnf} |
| 820 | + |
| 821 | +\pnum |
| 822 | +\indextext{\idxgram{token}}% |
| 823 | +There are five kinds of tokens: identifiers, keywords, literals,% |
| 824 | +\begin{footnote} |
| 825 | +Literals include strings and character and numeric literals. |
| 826 | +\end{footnote} |
| 827 | +operators, and other separators. |
| 828 | +\indextext{whitespace}% |
| 829 | +Blanks, horizontal and vertical tabs, newlines, formfeeds, and comments |
| 830 | +(collectively, ``whitespace''), as described below, are ignored except |
| 831 | +as they serve to separate tokens. |
| 832 | +\begin{note} |
| 833 | +Whitespace can separate otherwise adjacent identifiers, keywords, numeric |
| 834 | +literals, and alternative tokens containing alphabetic characters. |
| 835 | +\end{note} |
| 836 | +\indextext{token|)} |
| 837 | + |
796 | 838 | \rSec1[lex.name]{Identifiers}
|
797 | 839 |
|
798 | 840 | \indextext{identifier|(}%
|
|
1038 | 1080 | \indextext{keyword|)}%
|
1039 | 1081 |
|
1040 | 1082 |
|
1041 |
| -\rSec1[lex.operators]{Operators and punctuators} |
1042 |
| - |
1043 |
| -\pnum |
1044 |
| -\indextext{operator|(}% |
1045 |
| -\indextext{punctuator|(}% |
1046 |
| -The lexical representation of \Cpp{} programs includes a number of |
1047 |
| -preprocessing tokens that are used in the syntax of the preprocessor or |
1048 |
| -are converted into tokens for operators and punctuators: |
1049 |
| - |
1050 |
| -\begin{bnf} |
1051 |
| -\nontermdef{preprocessing-op-or-punc}\br |
1052 |
| - preprocessing-operator\br |
1053 |
| - operator-or-punctuator |
1054 |
| -\end{bnf} |
1055 |
| - |
1056 |
| -\begin{bnf} |
1057 |
| -%% Ed. note: character protrusion would misalign various operators. |
1058 |
| -\microtypesetup{protrusion=false}\obeyspaces |
1059 |
| -\nontermdef{preprocessing-operator} \textnormal{one of}\br |
1060 |
| - \terminal{\# \#\# \%: \%:\%:} |
1061 |
| -\end{bnf} |
1062 |
| - |
1063 |
| -\begin{bnf} |
1064 |
| -\microtypesetup{protrusion=false}\obeyspaces |
1065 |
| -\nontermdef{operator-or-punctuator} \textnormal{one of}\br |
1066 |
| - \terminal{\{ \} [ ] ( )}\br |
1067 |
| - \terminal{<: :> <\% \%> ; : ...}\br |
1068 |
| - \terminal{? :: . .* -> ->* \~}\br |
1069 |
| - \terminal{! + - * / \% \caret{} \& |}\br |
1070 |
| - \terminal{= += -= *= /= \%= \caret{}= \&= |=}\br |
1071 |
| - \terminal{== != < > <= >= <=> \&\& ||}\br |
1072 |
| - \terminal{<< >> <<= >>= ++ -- ,}\br |
1073 |
| - \terminal{\keyword{and} \keyword{or} \keyword{xor} \keyword{not} \keyword{bitand} \keyword{bitor} \keyword{compl}}\br |
1074 |
| - \terminal{\keyword{and_eq} \keyword{or_eq} \keyword{xor_eq} \keyword{not_eq}} |
1075 |
| -\end{bnf} |
1076 |
| - |
1077 |
| -Each \grammarterm{operator-or-punctuator} is converted to a single token |
1078 |
| -in translation phase 7\iref{lex.phases}.% |
1079 |
| -\indextext{punctuator|)}% |
1080 |
| -\indextext{operator|)} |
1081 |
| - |
1082 | 1083 | \rSec1[lex.literal]{Literals}%
|
1083 | 1084 | \indextext{literal|(}
|
1084 | 1085 |
|
|
0 commit comments