rustdoc/html/
highlight.rs

1//! Basic syntax highlighting functionality.
2//!
3//! This module uses librustc_ast's lexer to provide token-based highlighting for
4//! the HTML documentation generated by rustdoc.
5//!
6//! Use the `render_with_highlighting` to highlight some rust code.
7
8use std::collections::VecDeque;
9use std::fmt::{self, Display, Write};
10
11use rustc_data_structures::fx::FxIndexMap;
12use rustc_lexer::{Cursor, FrontmatterAllowed, LiteralKind, TokenKind};
13use rustc_span::edition::Edition;
14use rustc_span::symbol::Symbol;
15use rustc_span::{BytePos, DUMMY_SP, Span};
16
17use super::format::{self, write_str};
18use crate::clean::PrimitiveType;
19use crate::html::escape::EscapeBodyText;
20use crate::html::render::{Context, LinkFromSrc};
21
22/// This type is needed in case we want to render links on items to allow to go to their definition.
23pub(crate) struct HrefContext<'a, 'tcx> {
24    pub(crate) context: &'a Context<'tcx>,
25    /// This span contains the current file we're going through.
26    pub(crate) file_span: Span,
27    /// This field is used to know "how far" from the top of the directory we are to link to either
28    /// documentation pages or other source pages.
29    pub(crate) root_path: &'a str,
30    /// This field is used to calculate precise local URLs.
31    pub(crate) current_href: String,
32}
33
34/// Decorations are represented as a map from CSS class to vector of character ranges.
35/// Each range will be wrapped in a span with that class.
36#[derive(Default)]
37pub(crate) struct DecorationInfo(pub(crate) FxIndexMap<&'static str, Vec<(u32, u32)>>);
38
39#[derive(Eq, PartialEq, Clone)]
40pub(crate) enum Tooltip {
41    IgnoreAll,
42    IgnoreSome(Vec<String>),
43    CompileFail,
44    ShouldPanic,
45    Edition(Edition),
46    None,
47}
48
49/// Highlights `src` as an inline example, returning the HTML output.
50pub(crate) fn render_example_with_highlighting(
51    src: &str,
52    out: &mut String,
53    tooltip: Tooltip,
54    playground_button: Option<&str>,
55    extra_classes: &[String],
56) {
57    write_header(out, "rust-example-rendered", None, tooltip, extra_classes);
58    write_code(out, src, None, None, None);
59    write_footer(out, playground_button);
60}
61
62fn write_header(
63    out: &mut String,
64    class: &str,
65    extra_content: Option<&str>,
66    tooltip: Tooltip,
67    extra_classes: &[String],
68) {
69    write_str(
70        out,
71        format_args!(
72            "<div class=\"example-wrap{}\">",
73            match tooltip {
74                Tooltip::IgnoreAll | Tooltip::IgnoreSome(_) => " ignore",
75                Tooltip::CompileFail => " compile_fail",
76                Tooltip::ShouldPanic => " should_panic",
77                Tooltip::Edition(_) => " edition",
78                Tooltip::None => "",
79            }
80        ),
81    );
82
83    if tooltip != Tooltip::None {
84        let tooltip = fmt::from_fn(|f| match &tooltip {
85            Tooltip::IgnoreAll => f.write_str("This example is not tested"),
86            Tooltip::IgnoreSome(platforms) => {
87                f.write_str("This example is not tested on ")?;
88                match &platforms[..] {
89                    [] => unreachable!(),
90                    [platform] => f.write_str(platform)?,
91                    [first, second] => write!(f, "{first} or {second}")?,
92                    [platforms @ .., last] => {
93                        for platform in platforms {
94                            write!(f, "{platform}, ")?;
95                        }
96                        write!(f, "or {last}")?;
97                    }
98                }
99                Ok(())
100            }
101            Tooltip::CompileFail => f.write_str("This example deliberately fails to compile"),
102            Tooltip::ShouldPanic => f.write_str("This example panics"),
103            Tooltip::Edition(edition) => write!(f, "This example runs with edition {edition}"),
104            Tooltip::None => unreachable!(),
105        });
106        write_str(out, format_args!("<a href=\"#\" class=\"tooltip\" title=\"{tooltip}\">ⓘ</a>"));
107    }
108
109    if let Some(extra) = extra_content {
110        out.push_str(extra);
111    }
112    if class.is_empty() {
113        write_str(
114            out,
115            format_args!(
116                "<pre class=\"rust{}{}\">",
117                if extra_classes.is_empty() { "" } else { " " },
118                extra_classes.join(" ")
119            ),
120        );
121    } else {
122        write_str(
123            out,
124            format_args!(
125                "<pre class=\"rust {class}{}{}\">",
126                if extra_classes.is_empty() { "" } else { " " },
127                extra_classes.join(" ")
128            ),
129        );
130    }
131    write_str(out, format_args!("<code>"));
132}
133
134/// Check if two `Class` can be merged together. In the following rules, "unclassified" means `None`
135/// basically (since it's `Option<Class>`). The following rules apply:
136///
137/// * If two `Class` have the same variant, then they can be merged.
138/// * If the other `Class` is unclassified and only contains white characters (backline,
139///   whitespace, etc), it can be merged.
140/// * `Class::Ident` is considered the same as unclassified (because it doesn't have an associated
141///   CSS class).
142fn can_merge(class1: Option<Class>, class2: Option<Class>, text: &str) -> bool {
143    match (class1, class2) {
144        (Some(c1), Some(c2)) => c1.is_equal_to(c2),
145        (Some(Class::Ident(_)), None) | (None, Some(Class::Ident(_))) => true,
146        (Some(Class::Macro(_)), _) => false,
147        (Some(_), None) | (None, Some(_)) => text.trim().is_empty(),
148        (None, None) => true,
149    }
150}
151
152/// This type is used as a conveniency to prevent having to pass all its fields as arguments into
153/// the various functions (which became its methods).
154struct TokenHandler<'a, 'tcx, F: Write> {
155    out: &'a mut F,
156    /// It contains the closing tag and the associated `Class`.
157    closing_tags: Vec<(&'static str, Class)>,
158    /// This is used because we don't automatically generate the closing tag on `ExitSpan` in
159    /// case an `EnterSpan` event with the same class follows.
160    pending_exit_span: Option<Class>,
161    /// `current_class` and `pending_elems` are used to group HTML elements with same `class`
162    /// attributes to reduce the DOM size.
163    current_class: Option<Class>,
164    /// We need to keep the `Class` for each element because it could contain a `Span` which is
165    /// used to generate links.
166    pending_elems: Vec<(&'a str, Option<Class>)>,
167    href_context: Option<HrefContext<'a, 'tcx>>,
168    write_line_number: fn(&mut F, u32, &'static str),
169}
170
171impl<F: Write> TokenHandler<'_, '_, F> {
172    fn handle_exit_span(&mut self) {
173        // We can't get the last `closing_tags` element using `pop()` because `closing_tags` is
174        // being used in `write_pending_elems`.
175        let class = self.closing_tags.last().expect("ExitSpan without EnterSpan").1;
176        // We flush everything just in case...
177        self.write_pending_elems(Some(class));
178
179        exit_span(self.out, self.closing_tags.pop().expect("ExitSpan without EnterSpan").0);
180        self.pending_exit_span = None;
181    }
182
183    /// Write all the pending elements sharing a same (or at mergeable) `Class`.
184    ///
185    /// If there is a "parent" (if a `EnterSpan` event was encountered) and the parent can be merged
186    /// with the elements' class, then we simply write the elements since the `ExitSpan` event will
187    /// close the tag.
188    ///
189    /// Otherwise, if there is only one pending element, we let the `string` function handle both
190    /// opening and closing the tag, otherwise we do it into this function.
191    ///
192    /// It returns `true` if `current_class` must be set to `None` afterwards.
193    fn write_pending_elems(&mut self, current_class: Option<Class>) -> bool {
194        if self.pending_elems.is_empty() {
195            return false;
196        }
197        if let Some((_, parent_class)) = self.closing_tags.last()
198            && can_merge(current_class, Some(*parent_class), "")
199        {
200            for (text, class) in self.pending_elems.iter() {
201                string(
202                    self.out,
203                    EscapeBodyText(text),
204                    *class,
205                    &self.href_context,
206                    false,
207                    self.write_line_number,
208                );
209            }
210        } else {
211            // We only want to "open" the tag ourselves if we have more than one pending and if the
212            // current parent tag is not the same as our pending content.
213            let close_tag = if self.pending_elems.len() > 1
214                && let Some(current_class) = current_class
215                // `PreludeTy` can never include more than an ident so it should not generate
216                // a wrapping `span`.
217                && !matches!(current_class, Class::PreludeTy(_))
218            {
219                Some(enter_span(self.out, current_class, &self.href_context))
220            } else {
221                None
222            };
223            for (text, class) in self.pending_elems.iter() {
224                string(
225                    self.out,
226                    EscapeBodyText(text),
227                    *class,
228                    &self.href_context,
229                    close_tag.is_none(),
230                    self.write_line_number,
231                );
232            }
233            if let Some(close_tag) = close_tag {
234                exit_span(self.out, close_tag);
235            }
236        }
237        self.pending_elems.clear();
238        true
239    }
240
241    #[inline]
242    fn write_line_number(&mut self, line: u32, extra: &'static str) {
243        (self.write_line_number)(self.out, line, extra);
244    }
245}
246
247impl<F: Write> Drop for TokenHandler<'_, '_, F> {
248    /// When leaving, we need to flush all pending data to not have missing content.
249    fn drop(&mut self) {
250        if self.pending_exit_span.is_some() {
251            self.handle_exit_span();
252        } else {
253            self.write_pending_elems(self.current_class);
254        }
255    }
256}
257
258fn write_scraped_line_number(out: &mut impl Write, line: u32, extra: &'static str) {
259    // https://developers.google.com/search/docs/crawling-indexing/robots-meta-tag#data-nosnippet-attr
260    // Do not show "1 2 3 4 5 ..." in web search results.
261    write!(out, "{extra}<span data-nosnippet>{line}</span>",).unwrap();
262}
263
264fn write_line_number(out: &mut impl Write, line: u32, extra: &'static str) {
265    // https://developers.google.com/search/docs/crawling-indexing/robots-meta-tag#data-nosnippet-attr
266    // Do not show "1 2 3 4 5 ..." in web search results.
267    write!(out, "{extra}<a href=#{line} id={line} data-nosnippet>{line}</a>",).unwrap();
268}
269
270fn empty_line_number(out: &mut impl Write, _: u32, extra: &'static str) {
271    out.write_str(extra).unwrap();
272}
273
274#[derive(Clone, Copy)]
275pub(super) struct LineInfo {
276    pub(super) start_line: u32,
277    max_lines: u32,
278    pub(super) is_scraped_example: bool,
279}
280
281impl LineInfo {
282    pub(super) fn new(max_lines: u32) -> Self {
283        Self { start_line: 1, max_lines: max_lines + 1, is_scraped_example: false }
284    }
285
286    pub(super) fn new_scraped(max_lines: u32, start_line: u32) -> Self {
287        Self {
288            start_line: start_line + 1,
289            max_lines: max_lines + start_line + 1,
290            is_scraped_example: true,
291        }
292    }
293}
294
295/// Convert the given `src` source code into HTML by adding classes for highlighting.
296///
297/// This code is used to render code blocks (in the documentation) as well as the source code pages.
298///
299/// Some explanations on the last arguments:
300///
301/// In case we are rendering a code block and not a source code file, `href_context` will be `None`.
302/// To put it more simply: if `href_context` is `None`, the code won't try to generate links to an
303/// item definition.
304///
305/// More explanations about spans and how we use them here are provided in the
306pub(super) fn write_code(
307    out: &mut impl Write,
308    src: &str,
309    href_context: Option<HrefContext<'_, '_>>,
310    decoration_info: Option<&DecorationInfo>,
311    line_info: Option<LineInfo>,
312) {
313    // This replace allows to fix how the code source with DOS backline characters is displayed.
314    let src = src.replace("\r\n", "\n");
315    let mut token_handler = TokenHandler {
316        out,
317        closing_tags: Vec::new(),
318        pending_exit_span: None,
319        current_class: None,
320        pending_elems: Vec::new(),
321        href_context,
322        write_line_number: match line_info {
323            Some(line_info) => {
324                if line_info.is_scraped_example {
325                    write_scraped_line_number
326                } else {
327                    write_line_number
328                }
329            }
330            None => empty_line_number,
331        },
332    };
333
334    let (mut line, max_lines) = if let Some(line_info) = line_info {
335        token_handler.write_line_number(line_info.start_line, "");
336        (line_info.start_line, line_info.max_lines)
337    } else {
338        (0, u32::MAX)
339    };
340
341    Classifier::new(
342        &src,
343        token_handler.href_context.as_ref().map(|c| c.file_span).unwrap_or(DUMMY_SP),
344        decoration_info,
345    )
346    .highlight(&mut |highlight| {
347        match highlight {
348            Highlight::Token { text, class } => {
349                // If we received a `ExitSpan` event and then have a non-compatible `Class`, we
350                // need to close the `<span>`.
351                let need_current_class_update = if let Some(pending) =
352                    token_handler.pending_exit_span
353                    && !can_merge(Some(pending), class, text)
354                {
355                    token_handler.handle_exit_span();
356                    true
357                // If the two `Class` are different, time to flush the current content and start
358                // a new one.
359                } else if !can_merge(token_handler.current_class, class, text) {
360                    token_handler.write_pending_elems(token_handler.current_class);
361                    true
362                } else {
363                    token_handler.current_class.is_none()
364                };
365
366                if need_current_class_update {
367                    token_handler.current_class = class.map(Class::dummy);
368                }
369                if text == "\n" {
370                    line += 1;
371                    if line < max_lines {
372                        token_handler.pending_elems.push((text, Some(Class::Backline(line))));
373                    }
374                } else {
375                    token_handler.pending_elems.push((text, class));
376                }
377            }
378            Highlight::EnterSpan { class } => {
379                let mut should_add = true;
380                if let Some(pending_exit_span) = token_handler.pending_exit_span {
381                    if class.is_equal_to(pending_exit_span) {
382                        should_add = false;
383                    } else {
384                        token_handler.handle_exit_span();
385                    }
386                } else {
387                    // We flush everything just in case...
388                    if token_handler.write_pending_elems(token_handler.current_class) {
389                        token_handler.current_class = None;
390                    }
391                }
392                if should_add {
393                    let closing_tag =
394                        enter_span(token_handler.out, class, &token_handler.href_context);
395                    token_handler.closing_tags.push((closing_tag, class));
396                }
397
398                token_handler.current_class = None;
399                token_handler.pending_exit_span = None;
400            }
401            Highlight::ExitSpan => {
402                token_handler.current_class = None;
403                token_handler.pending_exit_span = Some(
404                    token_handler
405                        .closing_tags
406                        .last()
407                        .as_ref()
408                        .expect("ExitSpan without EnterSpan")
409                        .1,
410                );
411            }
412        };
413    });
414}
415
416fn write_footer(out: &mut String, playground_button: Option<&str>) {
417    write_str(out, format_args_nl!("</code></pre>{}</div>", playground_button.unwrap_or_default()));
418}
419
420/// How a span of text is classified. Mostly corresponds to token kinds.
421#[derive(Clone, Copy, Debug, Eq, PartialEq)]
422enum Class {
423    Comment,
424    DocComment,
425    Attribute,
426    KeyWord,
427    /// Keywords that do pointer/reference stuff.
428    RefKeyWord,
429    Self_(Span),
430    Macro(Span),
431    MacroNonTerminal,
432    String,
433    Number,
434    Bool,
435    /// `Ident` isn't rendered in the HTML but we still need it for the `Span` it contains.
436    Ident(Span),
437    Lifetime,
438    PreludeTy(Span),
439    PreludeVal(Span),
440    QuestionMark,
441    Decoration(&'static str),
442    Backline(u32),
443}
444
445impl Class {
446    /// It is only looking at the variant, not the variant content.
447    ///
448    /// It is used mostly to group multiple similar HTML elements into one `<span>` instead of
449    /// multiple ones.
450    fn is_equal_to(self, other: Self) -> bool {
451        match (self, other) {
452            (Self::Self_(_), Self::Self_(_))
453            | (Self::Macro(_), Self::Macro(_))
454            | (Self::Ident(_), Self::Ident(_)) => true,
455            (Self::Decoration(c1), Self::Decoration(c2)) => c1 == c2,
456            (x, y) => x == y,
457        }
458    }
459
460    /// If `self` contains a `Span`, it'll be replaced with `DUMMY_SP` to prevent creating links
461    /// on "empty content" (because of the attributes merge).
462    fn dummy(self) -> Self {
463        match self {
464            Self::Self_(_) => Self::Self_(DUMMY_SP),
465            Self::Macro(_) => Self::Macro(DUMMY_SP),
466            Self::Ident(_) => Self::Ident(DUMMY_SP),
467            s => s,
468        }
469    }
470
471    /// Returns the css class expected by rustdoc for each `Class`.
472    fn as_html(self) -> &'static str {
473        match self {
474            Class::Comment => "comment",
475            Class::DocComment => "doccomment",
476            Class::Attribute => "attr",
477            Class::KeyWord => "kw",
478            Class::RefKeyWord => "kw-2",
479            Class::Self_(_) => "self",
480            Class::Macro(_) => "macro",
481            Class::MacroNonTerminal => "macro-nonterminal",
482            Class::String => "string",
483            Class::Number => "number",
484            Class::Bool => "bool-val",
485            Class::Ident(_) => "",
486            Class::Lifetime => "lifetime",
487            Class::PreludeTy(_) => "prelude-ty",
488            Class::PreludeVal(_) => "prelude-val",
489            Class::QuestionMark => "question-mark",
490            Class::Decoration(kind) => kind,
491            Class::Backline(_) => "",
492        }
493    }
494
495    /// In case this is an item which can be converted into a link to a definition, it'll contain
496    /// a "span" (a tuple representing `(lo, hi)` equivalent of `Span`).
497    fn get_span(self) -> Option<Span> {
498        match self {
499            Self::Ident(sp)
500            | Self::Self_(sp)
501            | Self::Macro(sp)
502            | Self::PreludeTy(sp)
503            | Self::PreludeVal(sp) => Some(sp),
504            Self::Comment
505            | Self::DocComment
506            | Self::Attribute
507            | Self::KeyWord
508            | Self::RefKeyWord
509            | Self::MacroNonTerminal
510            | Self::String
511            | Self::Number
512            | Self::Bool
513            | Self::Lifetime
514            | Self::QuestionMark
515            | Self::Decoration(_)
516            | Self::Backline(_) => None,
517        }
518    }
519}
520
521#[derive(Debug)]
522enum Highlight<'a> {
523    Token { text: &'a str, class: Option<Class> },
524    EnterSpan { class: Class },
525    ExitSpan,
526}
527
528struct TokenIter<'a> {
529    src: &'a str,
530    cursor: Cursor<'a>,
531}
532
533impl<'a> Iterator for TokenIter<'a> {
534    type Item = (TokenKind, &'a str);
535    fn next(&mut self) -> Option<(TokenKind, &'a str)> {
536        let token = self.cursor.advance_token();
537        if token.kind == TokenKind::Eof {
538            return None;
539        }
540        let (text, rest) = self.src.split_at(token.len as usize);
541        self.src = rest;
542        Some((token.kind, text))
543    }
544}
545
546/// Classifies into identifier class; returns `None` if this is a non-keyword identifier.
547fn get_real_ident_class(text: &str, allow_path_keywords: bool) -> Option<Class> {
548    let ignore: &[&str] =
549        if allow_path_keywords { &["self", "Self", "super", "crate"] } else { &["self", "Self"] };
550    if ignore.iter().any(|k| *k == text) {
551        return None;
552    }
553    Some(match text {
554        "ref" | "mut" => Class::RefKeyWord,
555        "false" | "true" => Class::Bool,
556        _ if Symbol::intern(text).is_reserved(|| Edition::Edition2021) => Class::KeyWord,
557        _ => return None,
558    })
559}
560
561/// This iterator comes from the same idea than "Peekable" except that it allows to "peek" more than
562/// just the next item by using `peek_next`. The `peek` method always returns the next item after
563/// the current one whereas `peek_next` will return the next item after the last one peeked.
564///
565/// You can use both `peek` and `peek_next` at the same time without problem.
566struct PeekIter<'a> {
567    stored: VecDeque<(TokenKind, &'a str)>,
568    /// This position is reinitialized when using `next`. It is used in `peek_next`.
569    peek_pos: usize,
570    iter: TokenIter<'a>,
571}
572
573impl<'a> PeekIter<'a> {
574    fn new(iter: TokenIter<'a>) -> Self {
575        Self { stored: VecDeque::new(), peek_pos: 0, iter }
576    }
577    /// Returns the next item after the current one. It doesn't interfere with `peek_next` output.
578    fn peek(&mut self) -> Option<&(TokenKind, &'a str)> {
579        if self.stored.is_empty()
580            && let Some(next) = self.iter.next()
581        {
582            self.stored.push_back(next);
583        }
584        self.stored.front()
585    }
586    /// Returns the next item after the last one peeked. It doesn't interfere with `peek` output.
587    fn peek_next(&mut self) -> Option<&(TokenKind, &'a str)> {
588        self.peek_pos += 1;
589        if self.peek_pos - 1 < self.stored.len() {
590            self.stored.get(self.peek_pos - 1)
591        } else if let Some(next) = self.iter.next() {
592            self.stored.push_back(next);
593            self.stored.back()
594        } else {
595            None
596        }
597    }
598}
599
600impl<'a> Iterator for PeekIter<'a> {
601    type Item = (TokenKind, &'a str);
602    fn next(&mut self) -> Option<Self::Item> {
603        self.peek_pos = 0;
604        if let Some(first) = self.stored.pop_front() { Some(first) } else { self.iter.next() }
605    }
606}
607
608/// Custom spans inserted into the source. Eg --scrape-examples uses this to highlight function calls
609struct Decorations {
610    starts: Vec<(u32, &'static str)>,
611    ends: Vec<u32>,
612}
613
614impl Decorations {
615    fn new(info: &DecorationInfo) -> Self {
616        // Extract tuples (start, end, kind) into separate sequences of (start, kind) and (end).
617        let (mut starts, mut ends): (Vec<_>, Vec<_>) = info
618            .0
619            .iter()
620            .flat_map(|(&kind, ranges)| ranges.iter().map(move |&(lo, hi)| ((lo, kind), hi)))
621            .unzip();
622
623        // Sort the sequences in document order.
624        starts.sort_by_key(|(lo, _)| *lo);
625        ends.sort();
626
627        Decorations { starts, ends }
628    }
629}
630
631/// Processes program tokens, classifying strings of text by highlighting
632/// category (`Class`).
633struct Classifier<'src> {
634    tokens: PeekIter<'src>,
635    in_attribute: bool,
636    in_macro: bool,
637    in_macro_nonterminal: bool,
638    byte_pos: u32,
639    file_span: Span,
640    src: &'src str,
641    decorations: Option<Decorations>,
642}
643
644impl<'src> Classifier<'src> {
645    /// Takes as argument the source code to HTML-ify, the rust edition to use and the source code
646    /// file span which will be used later on by the `span_correspondence_map`.
647    fn new(src: &'src str, file_span: Span, decoration_info: Option<&DecorationInfo>) -> Self {
648        let tokens =
649            PeekIter::new(TokenIter { src, cursor: Cursor::new(src, FrontmatterAllowed::Yes) });
650        let decorations = decoration_info.map(Decorations::new);
651        Classifier {
652            tokens,
653            in_attribute: false,
654            in_macro: false,
655            in_macro_nonterminal: false,
656            byte_pos: 0,
657            file_span,
658            src,
659            decorations,
660        }
661    }
662
663    /// Convenient wrapper to create a [`Span`] from a position in the file.
664    fn new_span(&self, lo: u32, text: &str) -> Span {
665        let hi = lo + text.len() as u32;
666        let file_lo = self.file_span.lo();
667        self.file_span.with_lo(file_lo + BytePos(lo)).with_hi(file_lo + BytePos(hi))
668    }
669
670    /// Concatenate colons and idents as one when possible.
671    fn get_full_ident_path(&mut self) -> Vec<(TokenKind, usize, usize)> {
672        let start = self.byte_pos as usize;
673        let mut pos = start;
674        let mut has_ident = false;
675
676        loop {
677            let mut nb = 0;
678            while let Some((TokenKind::Colon, _)) = self.tokens.peek() {
679                self.tokens.next();
680                nb += 1;
681            }
682            // Ident path can start with "::" but if we already have content in the ident path,
683            // the "::" is mandatory.
684            if has_ident && nb == 0 {
685                return vec![(TokenKind::Ident, start, pos)];
686            } else if nb != 0 && nb != 2 {
687                if has_ident {
688                    return vec![(TokenKind::Ident, start, pos), (TokenKind::Colon, pos, pos + nb)];
689                } else {
690                    return vec![(TokenKind::Colon, start, pos + nb)];
691                }
692            }
693
694            if let Some((None, text)) = self.tokens.peek().map(|(token, text)| {
695                if *token == TokenKind::Ident {
696                    let class = get_real_ident_class(text, true);
697                    (class, text)
698                } else {
699                    // Doesn't matter which Class we put in here...
700                    (Some(Class::Comment), text)
701                }
702            }) {
703                // We only "add" the colon if there is an ident behind.
704                pos += text.len() + nb;
705                has_ident = true;
706                self.tokens.next();
707            } else if nb > 0 && has_ident {
708                return vec![(TokenKind::Ident, start, pos), (TokenKind::Colon, pos, pos + nb)];
709            } else if nb > 0 {
710                return vec![(TokenKind::Colon, start, start + nb)];
711            } else if has_ident {
712                return vec![(TokenKind::Ident, start, pos)];
713            } else {
714                return Vec::new();
715            }
716        }
717    }
718
719    /// Wraps the tokens iteration to ensure that the `byte_pos` is always correct.
720    ///
721    /// It returns the token's kind, the token as a string and its byte position in the source
722    /// string.
723    fn next(&mut self) -> Option<(TokenKind, &'src str, u32)> {
724        if let Some((kind, text)) = self.tokens.next() {
725            let before = self.byte_pos;
726            self.byte_pos += text.len() as u32;
727            Some((kind, text, before))
728        } else {
729            None
730        }
731    }
732
733    /// Exhausts the `Classifier` writing the output into `sink`.
734    ///
735    /// The general structure for this method is to iterate over each token,
736    /// possibly giving it an HTML span with a class specifying what flavor of
737    /// token is used.
738    fn highlight(mut self, sink: &mut dyn FnMut(Highlight<'src>)) {
739        loop {
740            if let Some(decs) = self.decorations.as_mut() {
741                let byte_pos = self.byte_pos;
742                let n_starts = decs.starts.iter().filter(|(i, _)| byte_pos >= *i).count();
743                for (_, kind) in decs.starts.drain(0..n_starts) {
744                    sink(Highlight::EnterSpan { class: Class::Decoration(kind) });
745                }
746
747                let n_ends = decs.ends.iter().filter(|i| byte_pos >= **i).count();
748                for _ in decs.ends.drain(0..n_ends) {
749                    sink(Highlight::ExitSpan);
750                }
751            }
752
753            if self
754                .tokens
755                .peek()
756                .map(|t| matches!(t.0, TokenKind::Colon | TokenKind::Ident))
757                .unwrap_or(false)
758            {
759                let tokens = self.get_full_ident_path();
760                for (token, start, end) in &tokens {
761                    let text = &self.src[*start..*end];
762                    self.advance(*token, text, sink, *start as u32);
763                    self.byte_pos += text.len() as u32;
764                }
765                if !tokens.is_empty() {
766                    continue;
767                }
768            }
769            if let Some((token, text, before)) = self.next() {
770                self.advance(token, text, sink, before);
771            } else {
772                break;
773            }
774        }
775    }
776
777    /// Single step of highlighting. This will classify `token`, but maybe also a couple of
778    /// following ones as well.
779    ///
780    /// `before` is the position of the given token in the `source` string and is used as "lo" byte
781    /// in case we want to try to generate a link for this token using the
782    /// `span_correspondence_map`.
783    fn advance(
784        &mut self,
785        token: TokenKind,
786        text: &'src str,
787        sink: &mut dyn FnMut(Highlight<'src>),
788        before: u32,
789    ) {
790        let lookahead = self.peek();
791        let no_highlight = |sink: &mut dyn FnMut(_)| sink(Highlight::Token { text, class: None });
792        let whitespace = |sink: &mut dyn FnMut(_)| {
793            for part in text.split('\n').intersperse("\n").filter(|s| !s.is_empty()) {
794                sink(Highlight::Token { text: part, class: None });
795            }
796        };
797        let class = match token {
798            TokenKind::Whitespace => return whitespace(sink),
799            TokenKind::LineComment { doc_style } | TokenKind::BlockComment { doc_style, .. } => {
800                if doc_style.is_some() {
801                    Class::DocComment
802                } else {
803                    Class::Comment
804                }
805            }
806            // Consider this as part of a macro invocation if there was a
807            // leading identifier.
808            TokenKind::Bang if self.in_macro => {
809                self.in_macro = false;
810                sink(Highlight::Token { text, class: None });
811                sink(Highlight::ExitSpan);
812                return;
813            }
814
815            // Assume that '&' or '*' is the reference or dereference operator
816            // or a reference or pointer type. Unless, of course, it looks like
817            // a logical and or a multiplication operator: `&&` or `* `.
818            TokenKind::Star => match self.tokens.peek() {
819                Some((TokenKind::Whitespace, _)) => return whitespace(sink),
820                Some((TokenKind::Ident, "mut")) => {
821                    self.next();
822                    sink(Highlight::Token { text: "*mut", class: Some(Class::RefKeyWord) });
823                    return;
824                }
825                Some((TokenKind::Ident, "const")) => {
826                    self.next();
827                    sink(Highlight::Token { text: "*const", class: Some(Class::RefKeyWord) });
828                    return;
829                }
830                _ => Class::RefKeyWord,
831            },
832            TokenKind::And => match self.tokens.peek() {
833                Some((TokenKind::And, _)) => {
834                    self.next();
835                    sink(Highlight::Token { text: "&&", class: None });
836                    return;
837                }
838                Some((TokenKind::Eq, _)) => {
839                    self.next();
840                    sink(Highlight::Token { text: "&=", class: None });
841                    return;
842                }
843                Some((TokenKind::Whitespace, _)) => return whitespace(sink),
844                Some((TokenKind::Ident, "mut")) => {
845                    self.next();
846                    sink(Highlight::Token { text: "&mut", class: Some(Class::RefKeyWord) });
847                    return;
848                }
849                _ => Class::RefKeyWord,
850            },
851
852            // These can either be operators, or arrows.
853            TokenKind::Eq => match lookahead {
854                Some(TokenKind::Eq) => {
855                    self.next();
856                    sink(Highlight::Token { text: "==", class: None });
857                    return;
858                }
859                Some(TokenKind::Gt) => {
860                    self.next();
861                    sink(Highlight::Token { text: "=>", class: None });
862                    return;
863                }
864                _ => return no_highlight(sink),
865            },
866            TokenKind::Minus if lookahead == Some(TokenKind::Gt) => {
867                self.next();
868                sink(Highlight::Token { text: "->", class: None });
869                return;
870            }
871
872            // Other operators.
873            TokenKind::Minus
874            | TokenKind::Plus
875            | TokenKind::Or
876            | TokenKind::Slash
877            | TokenKind::Caret
878            | TokenKind::Percent
879            | TokenKind::Bang
880            | TokenKind::Lt
881            | TokenKind::Gt => return no_highlight(sink),
882
883            // Miscellaneous, no highlighting.
884            TokenKind::Dot
885            | TokenKind::Semi
886            | TokenKind::Comma
887            | TokenKind::OpenParen
888            | TokenKind::CloseParen
889            | TokenKind::OpenBrace
890            | TokenKind::CloseBrace
891            | TokenKind::OpenBracket
892            | TokenKind::At
893            | TokenKind::Tilde
894            | TokenKind::Colon
895            | TokenKind::Frontmatter { .. }
896            | TokenKind::Unknown => return no_highlight(sink),
897
898            TokenKind::Question => Class::QuestionMark,
899
900            TokenKind::Dollar => match lookahead {
901                Some(TokenKind::Ident) => {
902                    self.in_macro_nonterminal = true;
903                    Class::MacroNonTerminal
904                }
905                _ => return no_highlight(sink),
906            },
907
908            // This might be the start of an attribute. We're going to want to
909            // continue highlighting it as an attribute until the ending ']' is
910            // seen, so skip out early. Down below we terminate the attribute
911            // span when we see the ']'.
912            TokenKind::Pound => {
913                match lookahead {
914                    // Case 1: #![inner_attribute]
915                    Some(TokenKind::Bang) => {
916                        self.next();
917                        if let Some(TokenKind::OpenBracket) = self.peek() {
918                            self.in_attribute = true;
919                            sink(Highlight::EnterSpan { class: Class::Attribute });
920                        }
921                        sink(Highlight::Token { text: "#", class: None });
922                        sink(Highlight::Token { text: "!", class: None });
923                        return;
924                    }
925                    // Case 2: #[outer_attribute]
926                    Some(TokenKind::OpenBracket) => {
927                        self.in_attribute = true;
928                        sink(Highlight::EnterSpan { class: Class::Attribute });
929                    }
930                    _ => (),
931                }
932                return no_highlight(sink);
933            }
934            TokenKind::CloseBracket => {
935                if self.in_attribute {
936                    self.in_attribute = false;
937                    sink(Highlight::Token { text: "]", class: None });
938                    sink(Highlight::ExitSpan);
939                    return;
940                }
941                return no_highlight(sink);
942            }
943            TokenKind::Literal { kind, .. } => match kind {
944                // Text literals.
945                LiteralKind::Byte { .. }
946                | LiteralKind::Char { .. }
947                | LiteralKind::Str { .. }
948                | LiteralKind::ByteStr { .. }
949                | LiteralKind::RawStr { .. }
950                | LiteralKind::RawByteStr { .. }
951                | LiteralKind::CStr { .. }
952                | LiteralKind::RawCStr { .. } => Class::String,
953                // Number literals.
954                LiteralKind::Float { .. } | LiteralKind::Int { .. } => Class::Number,
955            },
956            TokenKind::GuardedStrPrefix => return no_highlight(sink),
957            TokenKind::Ident | TokenKind::RawIdent if lookahead == Some(TokenKind::Bang) => {
958                self.in_macro = true;
959                sink(Highlight::EnterSpan { class: Class::Macro(self.new_span(before, text)) });
960                sink(Highlight::Token { text, class: None });
961                return;
962            }
963            TokenKind::Ident => match get_real_ident_class(text, false) {
964                None => match text {
965                    "Option" | "Result" => Class::PreludeTy(self.new_span(before, text)),
966                    "Some" | "None" | "Ok" | "Err" => {
967                        Class::PreludeVal(self.new_span(before, text))
968                    }
969                    // "union" is a weak keyword and is only considered as a keyword when declaring
970                    // a union type.
971                    "union" if self.check_if_is_union_keyword() => Class::KeyWord,
972                    _ if self.in_macro_nonterminal => {
973                        self.in_macro_nonterminal = false;
974                        Class::MacroNonTerminal
975                    }
976                    "self" | "Self" => Class::Self_(self.new_span(before, text)),
977                    _ => Class::Ident(self.new_span(before, text)),
978                },
979                Some(c) => c,
980            },
981            TokenKind::RawIdent | TokenKind::UnknownPrefix | TokenKind::InvalidIdent => {
982                Class::Ident(self.new_span(before, text))
983            }
984            TokenKind::Lifetime { .. }
985            | TokenKind::RawLifetime
986            | TokenKind::UnknownPrefixLifetime => Class::Lifetime,
987            TokenKind::Eof => panic!("Eof in advance"),
988        };
989        // Anything that didn't return above is the simple case where we the
990        // class just spans a single token, so we can use the `string` method.
991        for part in text.split('\n').intersperse("\n").filter(|s| !s.is_empty()) {
992            sink(Highlight::Token { text: part, class: Some(class) });
993        }
994    }
995
996    fn peek(&mut self) -> Option<TokenKind> {
997        self.tokens.peek().map(|(token_kind, _text)| *token_kind)
998    }
999
1000    fn check_if_is_union_keyword(&mut self) -> bool {
1001        while let Some(kind) = self.tokens.peek_next().map(|(token_kind, _text)| token_kind) {
1002            if *kind == TokenKind::Whitespace {
1003                continue;
1004            }
1005            return *kind == TokenKind::Ident;
1006        }
1007        false
1008    }
1009}
1010
1011/// Called when we start processing a span of text that should be highlighted.
1012/// The `Class` argument specifies how it should be highlighted.
1013fn enter_span(
1014    out: &mut impl Write,
1015    klass: Class,
1016    href_context: &Option<HrefContext<'_, '_>>,
1017) -> &'static str {
1018    string_without_closing_tag(out, "", Some(klass), href_context, true).expect(
1019        "internal error: enter_span was called with Some(klass) but did not return a \
1020            closing HTML tag",
1021    )
1022}
1023
1024/// Called at the end of a span of highlighted text.
1025fn exit_span(out: &mut impl Write, closing_tag: &str) {
1026    out.write_str(closing_tag).unwrap();
1027}
1028
1029/// Called for a span of text. If the text should be highlighted differently
1030/// from the surrounding text, then the `Class` argument will be a value other
1031/// than `None`.
1032///
1033/// The following sequences of callbacks are equivalent:
1034/// ```plain
1035///     enter_span(Foo), string("text", None), exit_span()
1036///     string("text", Foo)
1037/// ```
1038///
1039/// The latter can be thought of as a shorthand for the former, which is more
1040/// flexible.
1041///
1042/// Note that if `context` is not `None` and that the given `klass` contains a `Span`, the function
1043/// will then try to find this `span` in the `span_correspondence_map`. If found, it'll then
1044/// generate a link for this element (which corresponds to where its definition is located).
1045fn string<T: Display, W: Write>(
1046    out: &mut W,
1047    text: T,
1048    klass: Option<Class>,
1049    href_context: &Option<HrefContext<'_, '_>>,
1050    open_tag: bool,
1051    write_line_number_callback: fn(&mut W, u32, &'static str),
1052) {
1053    if let Some(Class::Backline(line)) = klass {
1054        write_line_number_callback(out, line, "\n");
1055    } else if let Some(closing_tag) =
1056        string_without_closing_tag(out, text, klass, href_context, open_tag)
1057    {
1058        out.write_str(closing_tag).unwrap();
1059    }
1060}
1061
1062/// This function writes `text` into `out` with some modifications depending on `klass`:
1063///
1064/// * If `klass` is `None`, `text` is written into `out` with no modification.
1065/// * If `klass` is `Some` but `klass.get_span()` is `None`, it writes the text wrapped in a
1066///   `<span>` with the provided `klass`.
1067/// * If `klass` is `Some` and has a [`rustc_span::Span`], it then tries to generate a link (`<a>`
1068///   element) by retrieving the link information from the `span_correspondence_map` that was filled
1069///   in `span_map.rs::collect_spans_and_sources`. If it cannot retrieve the information, then it's
1070///   the same as the second point (`klass` is `Some` but doesn't have a [`rustc_span::Span`]).
1071fn string_without_closing_tag<T: Display>(
1072    out: &mut impl Write,
1073    text: T,
1074    klass: Option<Class>,
1075    href_context: &Option<HrefContext<'_, '_>>,
1076    open_tag: bool,
1077) -> Option<&'static str> {
1078    let Some(klass) = klass else {
1079        write!(out, "{text}").unwrap();
1080        return None;
1081    };
1082    let Some(def_span) = klass.get_span() else {
1083        if !open_tag {
1084            write!(out, "{text}").unwrap();
1085            return None;
1086        }
1087        write!(out, "<span class=\"{klass}\">{text}", klass = klass.as_html()).unwrap();
1088        return Some("</span>");
1089    };
1090
1091    let mut text_s = text.to_string();
1092    if text_s.contains("::") {
1093        text_s = text_s.split("::").intersperse("::").fold(String::new(), |mut path, t| {
1094            match t {
1095                "self" | "Self" => write!(
1096                    &mut path,
1097                    "<span class=\"{klass}\">{t}</span>",
1098                    klass = Class::Self_(DUMMY_SP).as_html(),
1099                ),
1100                "crate" | "super" => {
1101                    write!(
1102                        &mut path,
1103                        "<span class=\"{klass}\">{t}</span>",
1104                        klass = Class::KeyWord.as_html(),
1105                    )
1106                }
1107                t => write!(&mut path, "{t}"),
1108            }
1109            .expect("Failed to build source HTML path");
1110            path
1111        });
1112    }
1113
1114    if let Some(href_context) = href_context
1115        && let Some(href) = href_context.context.shared.span_correspondence_map.get(&def_span)
1116        && let Some(href) = {
1117            let context = href_context.context;
1118            // FIXME: later on, it'd be nice to provide two links (if possible) for all items:
1119            // one to the documentation page and one to the source definition.
1120            // FIXME: currently, external items only generate a link to their documentation,
1121            // a link to their definition can be generated using this:
1122            // https://github.com/rust-lang/rust/blob/60f1a2fc4b535ead9c85ce085fdce49b1b097531/src/librustdoc/html/render/context.rs#L315-L338
1123            match href {
1124                LinkFromSrc::Local(span) => {
1125                    context.href_from_span_relative(*span, &href_context.current_href)
1126                }
1127                LinkFromSrc::External(def_id) => {
1128                    format::href_with_root_path(*def_id, context, Some(href_context.root_path))
1129                        .ok()
1130                        .map(|(url, _, _)| url)
1131                }
1132                LinkFromSrc::Primitive(prim) => format::href_with_root_path(
1133                    PrimitiveType::primitive_locations(context.tcx())[prim],
1134                    context,
1135                    Some(href_context.root_path),
1136                )
1137                .ok()
1138                .map(|(url, _, _)| url),
1139                LinkFromSrc::Doc(def_id) => {
1140                    format::href_with_root_path(*def_id, context, Some(href_context.root_path))
1141                        .ok()
1142                        .map(|(doc_link, _, _)| doc_link)
1143                }
1144            }
1145        }
1146    {
1147        if !open_tag {
1148            // We're already inside an element which has the same klass, no need to give it
1149            // again.
1150            write!(out, "<a href=\"{href}\">{text_s}").unwrap();
1151        } else {
1152            let klass_s = klass.as_html();
1153            if klass_s.is_empty() {
1154                write!(out, "<a href=\"{href}\">{text_s}").unwrap();
1155            } else {
1156                write!(out, "<a class=\"{klass_s}\" href=\"{href}\">{text_s}").unwrap();
1157            }
1158        }
1159        return Some("</a>");
1160    }
1161    if !open_tag {
1162        write!(out, "{}", text_s).unwrap();
1163        return None;
1164    }
1165    let klass_s = klass.as_html();
1166    if klass_s.is_empty() {
1167        out.write_str(&text_s).unwrap();
1168        Some("")
1169    } else {
1170        write!(out, "<span class=\"{klass_s}\">{text_s}").unwrap();
1171        Some("</span>")
1172    }
1173}
1174
1175#[cfg(test)]
1176mod tests;