1use std::borrow::Cow;
29use std::collections::VecDeque;
30use std::fmt::Write;
31use std::iter::Peekable;
32use std::ops::{ControlFlow, Range};
33use std::path::PathBuf;
34use std::str::{self, CharIndices};
35use std::sync::atomic::AtomicUsize;
36use std::sync::{Arc, Weak};
37
38use pulldown_cmark::{
39 BrokenLink, CodeBlockKind, CowStr, Event, LinkType, Options, Parser, Tag, TagEnd, html,
40};
41use rustc_data_structures::fx::{FxHashMap, FxIndexMap};
42use rustc_errors::{Diag, DiagMessage};
43use rustc_hir::def_id::LocalDefId;
44use rustc_middle::ty::TyCtxt;
45pub(crate) use rustc_resolve::rustdoc::main_body_opts;
46use rustc_resolve::rustdoc::may_be_doc_link;
47use rustc_span::edition::Edition;
48use rustc_span::{Span, Symbol};
49use tracing::{debug, trace};
50
51use crate::clean::RenderedLink;
52use crate::doctest;
53use crate::doctest::GlobalTestOptions;
54use crate::html::escape::{Escape, EscapeBodyText};
55use crate::html::highlight;
56use crate::html::length_limit::HtmlWithLimit;
57use crate::html::render::small_url_encode;
58use crate::html::toc::{Toc, TocBuilder};
59
60mod footnotes;
61#[cfg(test)]
62mod tests;
63
64const MAX_HEADER_LEVEL: u32 = 6;
65
66pub(crate) fn summary_opts() -> Options {
68 Options::ENABLE_TABLES
69 | Options::ENABLE_FOOTNOTES
70 | Options::ENABLE_STRIKETHROUGH
71 | Options::ENABLE_TASKLISTS
72 | Options::ENABLE_SMART_PUNCTUATION
73}
74
75#[derive(Debug, Clone, Copy)]
76pub enum HeadingOffset {
77 H1 = 0,
78 H2,
79 H3,
80 H4,
81 H5,
82 H6,
83}
84
85pub struct Markdown<'a> {
88 pub content: &'a str,
89 pub links: &'a [RenderedLink],
91 pub ids: &'a mut IdMap,
93 pub error_codes: ErrorCodes,
95 pub edition: Edition,
97 pub playground: &'a Option<Playground>,
98 pub heading_offset: HeadingOffset,
101}
102pub(crate) struct MarkdownWithToc<'a> {
104 pub(crate) content: &'a str,
105 pub(crate) links: &'a [RenderedLink],
106 pub(crate) ids: &'a mut IdMap,
107 pub(crate) error_codes: ErrorCodes,
108 pub(crate) edition: Edition,
109 pub(crate) playground: &'a Option<Playground>,
110}
111pub(crate) struct MarkdownItemInfo<'a>(pub(crate) &'a str, pub(crate) &'a mut IdMap);
114pub(crate) struct MarkdownSummaryLine<'a>(pub &'a str, pub &'a [RenderedLink]);
116
117#[derive(Copy, Clone, PartialEq, Debug)]
118pub enum ErrorCodes {
119 Yes,
120 No,
121}
122
123impl ErrorCodes {
124 pub(crate) fn from(b: bool) -> Self {
125 match b {
126 true => ErrorCodes::Yes,
127 false => ErrorCodes::No,
128 }
129 }
130
131 pub(crate) fn as_bool(self) -> bool {
132 match self {
133 ErrorCodes::Yes => true,
134 ErrorCodes::No => false,
135 }
136 }
137}
138
139pub(crate) enum Line<'a> {
143 Hidden(&'a str),
144 Shown(Cow<'a, str>),
145}
146
147impl<'a> Line<'a> {
148 fn for_html(self) -> Option<Cow<'a, str>> {
149 match self {
150 Line::Shown(l) => Some(l),
151 Line::Hidden(_) => None,
152 }
153 }
154
155 pub(crate) fn for_code(self) -> Cow<'a, str> {
156 match self {
157 Line::Shown(l) => l,
158 Line::Hidden(l) => Cow::Borrowed(l),
159 }
160 }
161}
162
163pub(crate) fn map_line(s: &str) -> Line<'_> {
171 let trimmed = s.trim();
172 if trimmed.starts_with("##") {
173 Line::Shown(Cow::Owned(s.replacen("##", "#", 1)))
174 } else if let Some(stripped) = trimmed.strip_prefix("# ") {
175 Line::Hidden(stripped)
177 } else if trimmed == "#" {
178 Line::Hidden("")
180 } else {
181 Line::Shown(Cow::Borrowed(s))
182 }
183}
184
185fn slugify(c: char) -> Option<char> {
189 if c.is_alphanumeric() || c == '-' || c == '_' {
190 if c.is_ascii() { Some(c.to_ascii_lowercase()) } else { Some(c) }
191 } else if c.is_whitespace() && c.is_ascii() {
192 Some('-')
193 } else {
194 None
195 }
196}
197
198#[derive(Debug)]
199pub struct Playground {
200 pub crate_name: Option<Symbol>,
201 pub url: String,
202}
203
204struct CodeBlocks<'p, 'a, I: Iterator<Item = Event<'a>>> {
206 inner: I,
207 check_error_codes: ErrorCodes,
208 edition: Edition,
209 playground: &'p Option<Playground>,
212}
213
214impl<'p, 'a, I: Iterator<Item = Event<'a>>> CodeBlocks<'p, 'a, I> {
215 fn new(
216 iter: I,
217 error_codes: ErrorCodes,
218 edition: Edition,
219 playground: &'p Option<Playground>,
220 ) -> Self {
221 CodeBlocks { inner: iter, check_error_codes: error_codes, edition, playground }
222 }
223}
224
225impl<'a, I: Iterator<Item = Event<'a>>> Iterator for CodeBlocks<'_, 'a, I> {
226 type Item = Event<'a>;
227
228 fn next(&mut self) -> Option<Self::Item> {
229 let event = self.inner.next();
230 let Some(Event::Start(Tag::CodeBlock(kind))) = event else {
231 return event;
232 };
233
234 let mut original_text = String::new();
235 for event in &mut self.inner {
236 match event {
237 Event::End(TagEnd::CodeBlock) => break,
238 Event::Text(ref s) => {
239 original_text.push_str(s);
240 }
241 _ => {}
242 }
243 }
244
245 let LangString { added_classes, compile_fail, should_panic, ignore, edition, .. } =
246 match kind {
247 CodeBlockKind::Fenced(ref lang) => {
248 let parse_result =
249 LangString::parse_without_check(lang, self.check_error_codes);
250 if !parse_result.rust {
251 let added_classes = parse_result.added_classes;
252 let lang_string = if let Some(lang) = parse_result.unknown.first() {
253 format!("language-{}", lang)
254 } else {
255 String::new()
256 };
257 let whitespace = if added_classes.is_empty() { "" } else { " " };
258 return Some(Event::Html(
259 format!(
260 "<div class=\"example-wrap\">\
261 <pre class=\"{lang_string}{whitespace}{added_classes}\">\
262 <code>{text}</code>\
263 </pre>\
264 </div>",
265 added_classes = added_classes.join(" "),
266 text = Escape(
267 original_text.strip_suffix('\n').unwrap_or(&original_text)
268 ),
269 )
270 .into(),
271 ));
272 }
273 parse_result
274 }
275 CodeBlockKind::Indented => Default::default(),
276 };
277
278 let lines = original_text.lines().filter_map(|l| map_line(l).for_html());
279 let text = lines.intersperse("\n".into()).collect::<String>();
280
281 let explicit_edition = edition.is_some();
282 let edition = edition.unwrap_or(self.edition);
283
284 let playground_button = self.playground.as_ref().and_then(|playground| {
285 let krate = &playground.crate_name;
286 let url = &playground.url;
287 if url.is_empty() {
288 return None;
289 }
290 let test = original_text
291 .lines()
292 .map(|l| map_line(l).for_code())
293 .intersperse("\n".into())
294 .collect::<String>();
295 let krate = krate.as_ref().map(|s| s.as_str());
296
297 let opts = GlobalTestOptions {
300 crate_name: krate.map(String::from).unwrap_or_default(),
301 no_crate_inject: false,
302 insert_indent_space: true,
303 args_file: PathBuf::new(),
304 };
305 let mut builder = doctest::BuildDocTestBuilder::new(&test).edition(edition);
306 if let Some(krate) = krate {
307 builder = builder.crate_name(krate);
308 }
309 let doctest = builder.build(None);
310 let (test, _) = doctest.generate_unique_doctest(&test, false, &opts, krate);
311 let channel = if test.contains("#![feature(") { "&version=nightly" } else { "" };
312
313 let test_escaped = small_url_encode(test);
314 Some(format!(
315 "<a class=\"test-arrow\" \
316 target=\"_blank\" \
317 title=\"Run code\" \
318 href=\"{url}?code={test_escaped}{channel}&edition={edition}\"></a>",
319 ))
320 });
321
322 let tooltip = if ignore == Ignore::All {
323 highlight::Tooltip::IgnoreAll
324 } else if let Ignore::Some(platforms) = ignore {
325 highlight::Tooltip::IgnoreSome(platforms)
326 } else if compile_fail {
327 highlight::Tooltip::CompileFail
328 } else if should_panic {
329 highlight::Tooltip::ShouldPanic
330 } else if explicit_edition {
331 highlight::Tooltip::Edition(edition)
332 } else {
333 highlight::Tooltip::None
334 };
335
336 let mut s = String::new();
339 s.push('\n');
340
341 highlight::render_example_with_highlighting(
342 &text,
343 &mut s,
344 tooltip,
345 playground_button.as_deref(),
346 &added_classes,
347 );
348 Some(Event::Html(s.into()))
349 }
350}
351
352struct LinkReplacerInner<'a> {
354 links: &'a [RenderedLink],
355 shortcut_link: Option<&'a RenderedLink>,
356}
357
358struct LinkReplacer<'a, I: Iterator<Item = Event<'a>>> {
359 iter: I,
360 inner: LinkReplacerInner<'a>,
361}
362
363impl<'a, I: Iterator<Item = Event<'a>>> LinkReplacer<'a, I> {
364 fn new(iter: I, links: &'a [RenderedLink]) -> Self {
365 LinkReplacer { iter, inner: { LinkReplacerInner { links, shortcut_link: None } } }
366 }
367}
368
369struct SpannedLinkReplacer<'a, I: Iterator<Item = SpannedEvent<'a>>> {
372 iter: I,
373 inner: LinkReplacerInner<'a>,
374}
375
376impl<'a, I: Iterator<Item = SpannedEvent<'a>>> SpannedLinkReplacer<'a, I> {
377 fn new(iter: I, links: &'a [RenderedLink]) -> Self {
378 SpannedLinkReplacer { iter, inner: { LinkReplacerInner { links, shortcut_link: None } } }
379 }
380}
381
382impl<'a> LinkReplacerInner<'a> {
383 fn handle_event(&mut self, event: &mut Event<'a>) {
384 match event {
386 Event::Start(Tag::Link {
389 link_type: LinkType::ShortcutUnknown | LinkType::CollapsedUnknown,
391 dest_url,
392 title,
393 ..
394 }) => {
395 debug!("saw start of shortcut link to {dest_url} with title {title}");
396 let link = self.links.iter().find(|&link| *link.href == **dest_url);
399 if let Some(link) = link {
402 trace!("it matched");
403 assert!(self.shortcut_link.is_none(), "shortcut links cannot be nested");
404 self.shortcut_link = Some(link);
405 if title.is_empty() && !link.tooltip.is_empty() {
406 *title = CowStr::Borrowed(link.tooltip.as_ref());
407 }
408 }
409 }
410 Event::End(TagEnd::Link) if self.shortcut_link.is_some() => {
412 debug!("saw end of shortcut link");
413 self.shortcut_link = None;
414 }
415 Event::Code(text) => {
418 trace!("saw code {text}");
419 if let Some(link) = self.shortcut_link {
420 if let Some(link) = self.links.iter().find(|l| {
430 l.href == link.href
431 && Some(&**text) == l.original_text.get(1..l.original_text.len() - 1)
432 }) {
433 debug!("replacing {text} with {new_text}", new_text = link.new_text);
434 *text = CowStr::Borrowed(&link.new_text);
435 }
436 }
437 }
438 Event::Text(text) => {
441 trace!("saw text {text}");
442 if let Some(link) = self.shortcut_link {
443 if let Some(link) = self
445 .links
446 .iter()
447 .find(|l| l.href == link.href && **text == *l.original_text)
448 {
449 debug!("replacing {text} with {new_text}", new_text = link.new_text);
450 *text = CowStr::Borrowed(&link.new_text);
451 }
452 }
453 }
454 Event::Start(Tag::Link { dest_url, title, .. }) => {
457 if let Some(link) =
458 self.links.iter().find(|&link| *link.original_text == **dest_url)
459 {
460 *dest_url = CowStr::Borrowed(link.href.as_ref());
461 if title.is_empty() && !link.tooltip.is_empty() {
462 *title = CowStr::Borrowed(link.tooltip.as_ref());
463 }
464 }
465 }
466 _ => {}
468 }
469 }
470}
471
472impl<'a, I: Iterator<Item = Event<'a>>> Iterator for LinkReplacer<'a, I> {
473 type Item = Event<'a>;
474
475 fn next(&mut self) -> Option<Self::Item> {
476 let mut event = self.iter.next();
477 if let Some(ref mut event) = event {
478 self.inner.handle_event(event);
479 }
480 event
482 }
483}
484
485impl<'a, I: Iterator<Item = SpannedEvent<'a>>> Iterator for SpannedLinkReplacer<'a, I> {
486 type Item = SpannedEvent<'a>;
487
488 fn next(&mut self) -> Option<Self::Item> {
489 let (mut event, range) = self.iter.next()?;
490 self.inner.handle_event(&mut event);
491 Some((event, range))
493 }
494}
495
496struct TableWrapper<'a, I: Iterator<Item = Event<'a>>> {
498 inner: I,
499 stored_events: VecDeque<Event<'a>>,
500}
501
502impl<'a, I: Iterator<Item = Event<'a>>> TableWrapper<'a, I> {
503 fn new(iter: I) -> Self {
504 Self { inner: iter, stored_events: VecDeque::new() }
505 }
506}
507
508impl<'a, I: Iterator<Item = Event<'a>>> Iterator for TableWrapper<'a, I> {
509 type Item = Event<'a>;
510
511 fn next(&mut self) -> Option<Self::Item> {
512 if let Some(first) = self.stored_events.pop_front() {
513 return Some(first);
514 }
515
516 let event = self.inner.next()?;
517
518 Some(match event {
519 Event::Start(Tag::Table(t)) => {
520 self.stored_events.push_back(Event::Start(Tag::Table(t)));
521 Event::Html(CowStr::Borrowed("<div>"))
522 }
523 Event::End(TagEnd::Table) => {
524 self.stored_events.push_back(Event::Html(CowStr::Borrowed("</div>")));
525 Event::End(TagEnd::Table)
526 }
527 e => e,
528 })
529 }
530}
531
532type SpannedEvent<'a> = (Event<'a>, Range<usize>);
533
534struct HeadingLinks<'a, 'b, 'ids, I> {
536 inner: I,
537 toc: Option<&'b mut TocBuilder>,
538 buf: VecDeque<SpannedEvent<'a>>,
539 id_map: &'ids mut IdMap,
540 heading_offset: HeadingOffset,
541}
542
543impl<'b, 'ids, I> HeadingLinks<'_, 'b, 'ids, I> {
544 fn new(
545 iter: I,
546 toc: Option<&'b mut TocBuilder>,
547 ids: &'ids mut IdMap,
548 heading_offset: HeadingOffset,
549 ) -> Self {
550 HeadingLinks { inner: iter, toc, buf: VecDeque::new(), id_map: ids, heading_offset }
551 }
552}
553
554impl<'a, I: Iterator<Item = SpannedEvent<'a>>> Iterator for HeadingLinks<'a, '_, '_, I> {
555 type Item = SpannedEvent<'a>;
556
557 fn next(&mut self) -> Option<Self::Item> {
558 if let Some(e) = self.buf.pop_front() {
559 return Some(e);
560 }
561
562 let event = self.inner.next();
563 if let Some((Event::Start(Tag::Heading { level, .. }), _)) = event {
564 let mut id = String::new();
565 for event in &mut self.inner {
566 match &event.0 {
567 Event::End(TagEnd::Heading(_)) => break,
568 Event::Text(text) | Event::Code(text) => {
569 id.extend(text.chars().filter_map(slugify));
570 self.buf.push_back(event);
571 }
572 _ => self.buf.push_back(event),
573 }
574 }
575 let id = self.id_map.derive(id);
576
577 if let Some(ref mut builder) = self.toc {
578 let mut text_header = String::new();
579 plain_text_from_events(self.buf.iter().map(|(ev, _)| ev.clone()), &mut text_header);
580 let mut html_header = String::new();
581 html_text_from_events(self.buf.iter().map(|(ev, _)| ev.clone()), &mut html_header);
582 let sec = builder.push(level as u32, text_header, html_header, id.clone());
583 self.buf.push_front((Event::Html(format!("{sec} ").into()), 0..0));
584 }
585
586 let level =
587 std::cmp::min(level as u32 + (self.heading_offset as u32), MAX_HEADER_LEVEL);
588 self.buf.push_back((Event::Html(format!("</h{level}>").into()), 0..0));
589
590 let start_tags =
591 format!("<h{level} id=\"{id}\"><a class=\"doc-anchor\" href=\"#{id}\">§</a>");
592 return Some((Event::Html(start_tags.into()), 0..0));
593 }
594 event
595 }
596}
597
598struct SummaryLine<'a, I: Iterator<Item = Event<'a>>> {
600 inner: I,
601 started: bool,
602 depth: u32,
603 skipped_tags: u32,
604}
605
606impl<'a, I: Iterator<Item = Event<'a>>> SummaryLine<'a, I> {
607 fn new(iter: I) -> Self {
608 SummaryLine { inner: iter, started: false, depth: 0, skipped_tags: 0 }
609 }
610}
611
612fn check_if_allowed_tag(t: &TagEnd) -> bool {
613 matches!(
614 t,
615 TagEnd::Paragraph
616 | TagEnd::Emphasis
617 | TagEnd::Strong
618 | TagEnd::Strikethrough
619 | TagEnd::Link
620 | TagEnd::BlockQuote
621 )
622}
623
624fn is_forbidden_tag(t: &TagEnd) -> bool {
625 matches!(
626 t,
627 TagEnd::CodeBlock
628 | TagEnd::Table
629 | TagEnd::TableHead
630 | TagEnd::TableRow
631 | TagEnd::TableCell
632 | TagEnd::FootnoteDefinition
633 )
634}
635
636impl<'a, I: Iterator<Item = Event<'a>>> Iterator for SummaryLine<'a, I> {
637 type Item = Event<'a>;
638
639 fn next(&mut self) -> Option<Self::Item> {
640 if self.started && self.depth == 0 {
641 return None;
642 }
643 if !self.started {
644 self.started = true;
645 }
646 if let Some(event) = self.inner.next() {
647 let mut is_start = true;
648 let is_allowed_tag = match event {
649 Event::Start(ref c) => {
650 if is_forbidden_tag(&c.to_end()) {
651 self.skipped_tags += 1;
652 return None;
653 }
654 self.depth += 1;
655 check_if_allowed_tag(&c.to_end())
656 }
657 Event::End(ref c) => {
658 if is_forbidden_tag(c) {
659 self.skipped_tags += 1;
660 return None;
661 }
662 self.depth -= 1;
663 is_start = false;
664 check_if_allowed_tag(c)
665 }
666 Event::FootnoteReference(_) => {
667 self.skipped_tags += 1;
668 false
669 }
670 _ => true,
671 };
672 if !is_allowed_tag {
673 self.skipped_tags += 1;
674 }
675 return if !is_allowed_tag {
676 if is_start {
677 Some(Event::Start(Tag::Paragraph))
678 } else {
679 Some(Event::End(TagEnd::Paragraph))
680 }
681 } else {
682 Some(event)
683 };
684 }
685 None
686 }
687}
688
689pub(crate) struct MdRelLine {
696 offset: usize,
697}
698
699impl MdRelLine {
700 pub(crate) const fn new(offset: usize) -> Self {
702 Self { offset }
703 }
704
705 pub(crate) const fn offset(self) -> usize {
707 self.offset
708 }
709}
710
711pub(crate) fn find_testable_code<T: doctest::DocTestVisitor>(
712 doc: &str,
713 tests: &mut T,
714 error_codes: ErrorCodes,
715 extra_info: Option<&ExtraInfo<'_>>,
716) {
717 find_codes(doc, tests, error_codes, extra_info, false)
718}
719
720pub(crate) fn find_codes<T: doctest::DocTestVisitor>(
721 doc: &str,
722 tests: &mut T,
723 error_codes: ErrorCodes,
724 extra_info: Option<&ExtraInfo<'_>>,
725 include_non_rust: bool,
726) {
727 let mut parser = Parser::new_ext(doc, main_body_opts()).into_offset_iter();
728 let mut prev_offset = 0;
729 let mut nb_lines = 0;
730 let mut register_header = None;
731 while let Some((event, offset)) = parser.next() {
732 match event {
733 Event::Start(Tag::CodeBlock(kind)) => {
734 let block_info = match kind {
735 CodeBlockKind::Fenced(ref lang) => {
736 if lang.is_empty() {
737 Default::default()
738 } else {
739 LangString::parse(lang, error_codes, extra_info)
740 }
741 }
742 CodeBlockKind::Indented => Default::default(),
743 };
744 if !include_non_rust && !block_info.rust {
745 continue;
746 }
747
748 let mut test_s = String::new();
749
750 while let Some((Event::Text(s), _)) = parser.next() {
751 test_s.push_str(&s);
752 }
753 let text = test_s
754 .lines()
755 .map(|l| map_line(l).for_code())
756 .collect::<Vec<Cow<'_, str>>>()
757 .join("\n");
758
759 nb_lines += doc[prev_offset..offset.start].lines().count();
760 if nb_lines != 0 && !&doc[prev_offset..offset.start].ends_with('\n') {
764 nb_lines -= 1;
765 }
766 let line = MdRelLine::new(nb_lines);
767 tests.visit_test(text, block_info, line);
768 prev_offset = offset.start;
769 }
770 Event::Start(Tag::Heading { level, .. }) => {
771 register_header = Some(level as u32);
772 }
773 Event::Text(ref s) if register_header.is_some() => {
774 let level = register_header.unwrap();
775 tests.visit_header(s, level);
776 register_header = None;
777 }
778 _ => {}
779 }
780 }
781}
782
783pub(crate) struct ExtraInfo<'tcx> {
784 def_id: LocalDefId,
785 sp: Span,
786 tcx: TyCtxt<'tcx>,
787}
788
789impl<'tcx> ExtraInfo<'tcx> {
790 pub(crate) fn new(tcx: TyCtxt<'tcx>, def_id: LocalDefId, sp: Span) -> ExtraInfo<'tcx> {
791 ExtraInfo { def_id, sp, tcx }
792 }
793
794 fn error_invalid_codeblock_attr(&self, msg: impl Into<DiagMessage>) {
795 self.tcx.node_span_lint(
796 crate::lint::INVALID_CODEBLOCK_ATTRIBUTES,
797 self.tcx.local_def_id_to_hir_id(self.def_id),
798 self.sp,
799 |lint| {
800 lint.primary_message(msg);
801 },
802 );
803 }
804
805 fn error_invalid_codeblock_attr_with_help(
806 &self,
807 msg: impl Into<DiagMessage>,
808 f: impl for<'a, 'b> FnOnce(&'b mut Diag<'a, ()>),
809 ) {
810 self.tcx.node_span_lint(
811 crate::lint::INVALID_CODEBLOCK_ATTRIBUTES,
812 self.tcx.local_def_id_to_hir_id(self.def_id),
813 self.sp,
814 |lint| {
815 lint.primary_message(msg);
816 f(lint);
817 },
818 );
819 }
820}
821
822#[derive(Eq, PartialEq, Clone, Debug)]
823pub(crate) struct LangString {
824 pub(crate) original: String,
825 pub(crate) should_panic: bool,
826 pub(crate) no_run: bool,
827 pub(crate) ignore: Ignore,
828 pub(crate) rust: bool,
829 pub(crate) test_harness: bool,
830 pub(crate) compile_fail: bool,
831 pub(crate) standalone_crate: bool,
832 pub(crate) error_codes: Vec<String>,
833 pub(crate) edition: Option<Edition>,
834 pub(crate) added_classes: Vec<String>,
835 pub(crate) unknown: Vec<String>,
836}
837
838#[derive(Eq, PartialEq, Clone, Debug)]
839pub(crate) enum Ignore {
840 All,
841 None,
842 Some(Vec<String>),
843}
844
845pub(crate) struct TagIterator<'a, 'tcx> {
885 inner: Peekable<CharIndices<'a>>,
886 data: &'a str,
887 is_in_attribute_block: bool,
888 extra: Option<&'a ExtraInfo<'tcx>>,
889 is_error: bool,
890}
891
892#[derive(Clone, Debug, Eq, PartialEq)]
893pub(crate) enum LangStringToken<'a> {
894 LangToken(&'a str),
895 ClassAttribute(&'a str),
896 KeyValueAttribute(&'a str, &'a str),
897}
898
899fn is_leading_char(c: char) -> bool {
900 c == '_' || c == '-' || c == ':' || c.is_ascii_alphabetic() || c.is_ascii_digit()
901}
902fn is_bareword_char(c: char) -> bool {
903 is_leading_char(c) || ".!#$%&*+/;<>?@^|~".contains(c)
904}
905fn is_separator(c: char) -> bool {
906 c == ' ' || c == ',' || c == '\t'
907}
908
909struct Indices {
910 start: usize,
911 end: usize,
912}
913
914impl<'a, 'tcx> TagIterator<'a, 'tcx> {
915 pub(crate) fn new(data: &'a str, extra: Option<&'a ExtraInfo<'tcx>>) -> Self {
916 Self {
917 inner: data.char_indices().peekable(),
918 data,
919 is_in_attribute_block: false,
920 extra,
921 is_error: false,
922 }
923 }
924
925 fn emit_error(&mut self, err: impl Into<DiagMessage>) {
926 if let Some(extra) = self.extra {
927 extra.error_invalid_codeblock_attr(err);
928 }
929 self.is_error = true;
930 }
931
932 fn skip_separators(&mut self) -> Option<usize> {
933 while let Some((pos, c)) = self.inner.peek() {
934 if !is_separator(*c) {
935 return Some(*pos);
936 }
937 self.inner.next();
938 }
939 None
940 }
941
942 fn parse_string(&mut self, start: usize) -> Option<Indices> {
943 for (pos, c) in self.inner.by_ref() {
944 if c == '"' {
945 return Some(Indices { start: start + 1, end: pos });
946 }
947 }
948 self.emit_error("unclosed quote string `\"`");
949 None
950 }
951
952 fn parse_class(&mut self, start: usize) -> Option<LangStringToken<'a>> {
953 while let Some((pos, c)) = self.inner.peek().copied() {
954 if is_bareword_char(c) {
955 self.inner.next();
956 } else {
957 let class = &self.data[start + 1..pos];
958 if class.is_empty() {
959 self.emit_error(format!("unexpected `{c}` character after `.`"));
960 return None;
961 } else if self.check_after_token() {
962 return Some(LangStringToken::ClassAttribute(class));
963 } else {
964 return None;
965 }
966 }
967 }
968 let class = &self.data[start + 1..];
969 if class.is_empty() {
970 self.emit_error("missing character after `.`");
971 None
972 } else if self.check_after_token() {
973 Some(LangStringToken::ClassAttribute(class))
974 } else {
975 None
976 }
977 }
978
979 fn parse_token(&mut self, start: usize) -> Option<Indices> {
980 while let Some((pos, c)) = self.inner.peek() {
981 if !is_bareword_char(*c) {
982 return Some(Indices { start, end: *pos });
983 }
984 self.inner.next();
985 }
986 self.emit_error("unexpected end");
987 None
988 }
989
990 fn parse_key_value(&mut self, c: char, start: usize) -> Option<LangStringToken<'a>> {
991 let key_indices =
992 if c == '"' { self.parse_string(start)? } else { self.parse_token(start)? };
993 if key_indices.start == key_indices.end {
994 self.emit_error("unexpected empty string as key");
995 return None;
996 }
997
998 if let Some((_, c)) = self.inner.next() {
999 if c != '=' {
1000 self.emit_error(format!("expected `=`, found `{}`", c));
1001 return None;
1002 }
1003 } else {
1004 self.emit_error("unexpected end");
1005 return None;
1006 }
1007 let value_indices = match self.inner.next() {
1008 Some((pos, '"')) => self.parse_string(pos)?,
1009 Some((pos, c)) if is_bareword_char(c) => self.parse_token(pos)?,
1010 Some((_, c)) => {
1011 self.emit_error(format!("unexpected `{c}` character after `=`"));
1012 return None;
1013 }
1014 None => {
1015 self.emit_error("expected value after `=`");
1016 return None;
1017 }
1018 };
1019 if value_indices.start == value_indices.end {
1020 self.emit_error("unexpected empty string as value");
1021 None
1022 } else if self.check_after_token() {
1023 Some(LangStringToken::KeyValueAttribute(
1024 &self.data[key_indices.start..key_indices.end],
1025 &self.data[value_indices.start..value_indices.end],
1026 ))
1027 } else {
1028 None
1029 }
1030 }
1031
1032 fn check_after_token(&mut self) -> bool {
1034 if let Some((_, c)) = self.inner.peek().copied() {
1035 if c == '}' || is_separator(c) || c == '(' {
1036 true
1037 } else {
1038 self.emit_error(format!("unexpected `{c}` character"));
1039 false
1040 }
1041 } else {
1042 true
1044 }
1045 }
1046
1047 fn parse_in_attribute_block(&mut self) -> Option<LangStringToken<'a>> {
1048 if let Some((pos, c)) = self.inner.next() {
1049 if c == '}' {
1050 self.is_in_attribute_block = false;
1051 return self.next();
1052 } else if c == '.' {
1053 return self.parse_class(pos);
1054 } else if c == '"' || is_leading_char(c) {
1055 return self.parse_key_value(c, pos);
1056 } else {
1057 self.emit_error(format!("unexpected character `{c}`"));
1058 return None;
1059 }
1060 }
1061 self.emit_error("unclosed attribute block (`{}`): missing `}` at the end");
1062 None
1063 }
1064
1065 fn skip_paren_block(&mut self) -> bool {
1067 for (_, c) in self.inner.by_ref() {
1068 if c == ')' {
1069 return true;
1070 }
1071 }
1072 self.emit_error("unclosed comment: missing `)` at the end");
1073 false
1074 }
1075
1076 fn parse_outside_attribute_block(&mut self, start: usize) -> Option<LangStringToken<'a>> {
1077 while let Some((pos, c)) = self.inner.next() {
1078 if c == '"' {
1079 if pos != start {
1080 self.emit_error("expected ` `, `{` or `,` found `\"`");
1081 return None;
1082 }
1083 let indices = self.parse_string(pos)?;
1084 if let Some((_, c)) = self.inner.peek().copied()
1085 && c != '{'
1086 && !is_separator(c)
1087 && c != '('
1088 {
1089 self.emit_error(format!("expected ` `, `{{` or `,` after `\"`, found `{c}`"));
1090 return None;
1091 }
1092 return Some(LangStringToken::LangToken(&self.data[indices.start..indices.end]));
1093 } else if c == '{' {
1094 self.is_in_attribute_block = true;
1095 return self.next();
1096 } else if is_separator(c) {
1097 if pos != start {
1098 return Some(LangStringToken::LangToken(&self.data[start..pos]));
1099 }
1100 return self.next();
1101 } else if c == '(' {
1102 if !self.skip_paren_block() {
1103 return None;
1104 }
1105 if pos != start {
1106 return Some(LangStringToken::LangToken(&self.data[start..pos]));
1107 }
1108 return self.next();
1109 } else if (pos == start && is_leading_char(c)) || (pos != start && is_bareword_char(c))
1110 {
1111 continue;
1112 } else {
1113 self.emit_error(format!("unexpected character `{c}`"));
1114 return None;
1115 }
1116 }
1117 let token = &self.data[start..];
1118 if token.is_empty() { None } else { Some(LangStringToken::LangToken(&self.data[start..])) }
1119 }
1120}
1121
1122impl<'a> Iterator for TagIterator<'a, '_> {
1123 type Item = LangStringToken<'a>;
1124
1125 fn next(&mut self) -> Option<Self::Item> {
1126 if self.is_error {
1127 return None;
1128 }
1129 let Some(start) = self.skip_separators() else {
1130 if self.is_in_attribute_block {
1131 self.emit_error("unclosed attribute block (`{}`): missing `}` at the end");
1132 }
1133 return None;
1134 };
1135 if self.is_in_attribute_block {
1136 self.parse_in_attribute_block()
1137 } else {
1138 self.parse_outside_attribute_block(start)
1139 }
1140 }
1141}
1142
1143impl Default for LangString {
1144 fn default() -> Self {
1145 Self {
1146 original: String::new(),
1147 should_panic: false,
1148 no_run: false,
1149 ignore: Ignore::None,
1150 rust: true,
1151 test_harness: false,
1152 compile_fail: false,
1153 standalone_crate: false,
1154 error_codes: Vec::new(),
1155 edition: None,
1156 added_classes: Vec::new(),
1157 unknown: Vec::new(),
1158 }
1159 }
1160}
1161
1162impl LangString {
1163 fn parse_without_check(string: &str, allow_error_code_check: ErrorCodes) -> Self {
1164 Self::parse(string, allow_error_code_check, None)
1165 }
1166
1167 fn parse(
1168 string: &str,
1169 allow_error_code_check: ErrorCodes,
1170 extra: Option<&ExtraInfo<'_>>,
1171 ) -> Self {
1172 let allow_error_code_check = allow_error_code_check.as_bool();
1173 let mut seen_rust_tags = false;
1174 let mut seen_other_tags = false;
1175 let mut seen_custom_tag = false;
1176 let mut data = LangString::default();
1177 let mut ignores = vec![];
1178
1179 data.original = string.to_owned();
1180
1181 let mut call = |tokens: &mut dyn Iterator<Item = LangStringToken<'_>>| {
1182 for token in tokens {
1183 match token {
1184 LangStringToken::LangToken("should_panic") => {
1185 data.should_panic = true;
1186 seen_rust_tags = !seen_other_tags;
1187 }
1188 LangStringToken::LangToken("no_run") => {
1189 data.no_run = true;
1190 seen_rust_tags = !seen_other_tags;
1191 }
1192 LangStringToken::LangToken("ignore") => {
1193 data.ignore = Ignore::All;
1194 seen_rust_tags = !seen_other_tags;
1195 }
1196 LangStringToken::LangToken(x)
1197 if let Some(ignore) = x.strip_prefix("ignore-") =>
1198 {
1199 ignores.push(ignore.to_owned());
1200 seen_rust_tags = !seen_other_tags;
1201 }
1202 LangStringToken::LangToken("rust") => {
1203 data.rust = true;
1204 seen_rust_tags = true;
1205 }
1206 LangStringToken::LangToken("custom") => {
1207 seen_custom_tag = true;
1208 }
1209 LangStringToken::LangToken("test_harness") => {
1210 data.test_harness = true;
1211 seen_rust_tags = !seen_other_tags || seen_rust_tags;
1212 }
1213 LangStringToken::LangToken("compile_fail") => {
1214 data.compile_fail = true;
1215 seen_rust_tags = !seen_other_tags || seen_rust_tags;
1216 data.no_run = true;
1217 }
1218 LangStringToken::LangToken("standalone_crate") => {
1219 data.standalone_crate = true;
1220 seen_rust_tags = !seen_other_tags || seen_rust_tags;
1221 }
1222 LangStringToken::LangToken(x)
1223 if let Some(edition) = x.strip_prefix("edition") =>
1224 {
1225 data.edition = edition.parse::<Edition>().ok();
1226 }
1227 LangStringToken::LangToken(x)
1228 if let Some(edition) = x.strip_prefix("rust")
1229 && edition.parse::<Edition>().is_ok()
1230 && let Some(extra) = extra =>
1231 {
1232 extra.error_invalid_codeblock_attr_with_help(
1233 format!("unknown attribute `{x}`"),
1234 |lint| {
1235 lint.help(format!(
1236 "there is an attribute with a similar name: `edition{edition}`"
1237 ));
1238 },
1239 );
1240 }
1241 LangStringToken::LangToken(x)
1242 if allow_error_code_check
1243 && let Some(error_code) = x.strip_prefix('E')
1244 && error_code.len() == 4 =>
1245 {
1246 if error_code.parse::<u32>().is_ok() {
1247 data.error_codes.push(x.to_owned());
1248 seen_rust_tags = !seen_other_tags || seen_rust_tags;
1249 } else {
1250 seen_other_tags = true;
1251 }
1252 }
1253 LangStringToken::LangToken(x) if let Some(extra) = extra => {
1254 if let Some(help) = match x.to_lowercase().as_str() {
1255 "compile-fail" | "compile_fail" | "compilefail" => Some(
1256 "use `compile_fail` to invert the results of this test, so that it \
1257 passes if it cannot be compiled and fails if it can",
1258 ),
1259 "should-panic" | "should_panic" | "shouldpanic" => Some(
1260 "use `should_panic` to invert the results of this test, so that if \
1261 passes if it panics and fails if it does not",
1262 ),
1263 "no-run" | "no_run" | "norun" => Some(
1264 "use `no_run` to compile, but not run, the code sample during \
1265 testing",
1266 ),
1267 "test-harness" | "test_harness" | "testharness" => Some(
1268 "use `test_harness` to run functions marked `#[test]` instead of a \
1269 potentially-implicit `main` function",
1270 ),
1271 "standalone" | "standalone_crate" | "standalone-crate"
1272 if extra.sp.at_least_rust_2024() =>
1273 {
1274 Some(
1275 "use `standalone_crate` to compile this code block \
1276 separately",
1277 )
1278 }
1279 _ => None,
1280 } {
1281 extra.error_invalid_codeblock_attr_with_help(
1282 format!("unknown attribute `{x}`"),
1283 |lint| {
1284 lint.help(help).help(
1285 "this code block may be skipped during testing, \
1286 because unknown attributes are treated as markers for \
1287 code samples written in other programming languages, \
1288 unless it is also explicitly marked as `rust`",
1289 );
1290 },
1291 );
1292 }
1293 seen_other_tags = true;
1294 data.unknown.push(x.to_owned());
1295 }
1296 LangStringToken::LangToken(x) => {
1297 seen_other_tags = true;
1298 data.unknown.push(x.to_owned());
1299 }
1300 LangStringToken::KeyValueAttribute("class", value) => {
1301 data.added_classes.push(value.to_owned());
1302 }
1303 LangStringToken::KeyValueAttribute(key, ..) if let Some(extra) = extra => {
1304 extra
1305 .error_invalid_codeblock_attr(format!("unsupported attribute `{key}`"));
1306 }
1307 LangStringToken::ClassAttribute(class) => {
1308 data.added_classes.push(class.to_owned());
1309 }
1310 _ => {}
1311 }
1312 }
1313 };
1314
1315 let mut tag_iter = TagIterator::new(string, extra);
1316 call(&mut tag_iter);
1317
1318 if !ignores.is_empty() {
1320 data.ignore = Ignore::Some(ignores);
1321 }
1322
1323 data.rust &= !seen_custom_tag && (!seen_other_tags || seen_rust_tags) && !tag_iter.is_error;
1324
1325 data
1326 }
1327}
1328
1329impl<'a> Markdown<'a> {
1330 pub fn into_string(self) -> String {
1331 if self.content.is_empty() {
1333 return String::new();
1334 }
1335
1336 let mut s = String::with_capacity(self.content.len() * 3 / 2);
1337 html::push_html(&mut s, self.into_iter());
1338
1339 s
1340 }
1341
1342 fn into_iter(self) -> CodeBlocks<'a, 'a, impl Iterator<Item = Event<'a>>> {
1343 let Markdown {
1344 content: md,
1345 links,
1346 ids,
1347 error_codes: codes,
1348 edition,
1349 playground,
1350 heading_offset,
1351 } = self;
1352
1353 let replacer = move |broken_link: BrokenLink<'_>| {
1354 links
1355 .iter()
1356 .find(|link| *link.original_text == *broken_link.reference)
1357 .map(|link| (link.href.as_str().into(), link.tooltip.as_str().into()))
1358 };
1359
1360 let p = Parser::new_with_broken_link_callback(md, main_body_opts(), Some(replacer));
1361 let p = p.into_offset_iter();
1362
1363 ids.handle_footnotes(|ids, existing_footnotes| {
1364 let p = HeadingLinks::new(p, None, ids, heading_offset);
1365 let p = SpannedLinkReplacer::new(p, links);
1366 let p = footnotes::Footnotes::new(p, existing_footnotes);
1367 let p = TableWrapper::new(p.map(|(ev, _)| ev));
1368 CodeBlocks::new(p, codes, edition, playground)
1369 })
1370 }
1371
1372 pub(crate) fn split_summary_and_content(self) -> (Option<String>, Option<String>) {
1378 if self.content.is_empty() {
1379 return (None, None);
1380 }
1381 let mut p = self.into_iter();
1382
1383 let mut event_level = 0;
1384 let mut summary_events = Vec::new();
1385 let mut get_next_tag = false;
1386
1387 let mut end_of_summary = false;
1388 while let Some(event) = p.next() {
1389 match event {
1390 Event::Start(_) => event_level += 1,
1391 Event::End(kind) => {
1392 event_level -= 1;
1393 if event_level == 0 {
1394 end_of_summary = true;
1396 get_next_tag = kind == TagEnd::Table;
1398 }
1399 }
1400 _ => {}
1401 }
1402 summary_events.push(event);
1403 if end_of_summary {
1404 if get_next_tag && let Some(event) = p.next() {
1405 summary_events.push(event);
1406 }
1407 break;
1408 }
1409 }
1410 let mut summary = String::new();
1411 html::push_html(&mut summary, summary_events.into_iter());
1412 if summary.is_empty() {
1413 return (None, None);
1414 }
1415 let mut content = String::new();
1416 html::push_html(&mut content, p);
1417
1418 if content.is_empty() { (Some(summary), None) } else { (Some(summary), Some(content)) }
1419 }
1420}
1421
1422impl MarkdownWithToc<'_> {
1423 pub(crate) fn into_parts(self) -> (Toc, String) {
1424 let MarkdownWithToc { content: md, links, ids, error_codes: codes, edition, playground } =
1425 self;
1426
1427 if md.is_empty() {
1429 return (Toc { entries: Vec::new() }, String::new());
1430 }
1431 let mut replacer = |broken_link: BrokenLink<'_>| {
1432 links
1433 .iter()
1434 .find(|link| *link.original_text == *broken_link.reference)
1435 .map(|link| (link.href.as_str().into(), link.tooltip.as_str().into()))
1436 };
1437
1438 let p = Parser::new_with_broken_link_callback(md, main_body_opts(), Some(&mut replacer));
1439 let p = p.into_offset_iter();
1440
1441 let mut s = String::with_capacity(md.len() * 3 / 2);
1442
1443 let mut toc = TocBuilder::new();
1444
1445 ids.handle_footnotes(|ids, existing_footnotes| {
1446 let p = HeadingLinks::new(p, Some(&mut toc), ids, HeadingOffset::H1);
1447 let p = footnotes::Footnotes::new(p, existing_footnotes);
1448 let p = TableWrapper::new(p.map(|(ev, _)| ev));
1449 let p = CodeBlocks::new(p, codes, edition, playground);
1450 html::push_html(&mut s, p);
1451 });
1452
1453 (toc.into_toc(), s)
1454 }
1455 pub(crate) fn into_string(self) -> String {
1456 let (toc, s) = self.into_parts();
1457 format!("<nav id=\"rustdoc\">{toc}</nav>{s}", toc = toc.print())
1458 }
1459}
1460
1461impl MarkdownItemInfo<'_> {
1462 pub(crate) fn into_string(self) -> String {
1463 let MarkdownItemInfo(md, ids) = self;
1464
1465 if md.is_empty() {
1467 return String::new();
1468 }
1469 let p = Parser::new_ext(md, main_body_opts()).into_offset_iter();
1470
1471 let p = p.map(|event| match event.0 {
1473 Event::Html(text) | Event::InlineHtml(text) => (Event::Text(text), event.1),
1474 _ => event,
1475 });
1476
1477 let mut s = String::with_capacity(md.len() * 3 / 2);
1478
1479 ids.handle_footnotes(|ids, existing_footnotes| {
1480 let p = HeadingLinks::new(p, None, ids, HeadingOffset::H1);
1481 let p = footnotes::Footnotes::new(p, existing_footnotes);
1482 let p = TableWrapper::new(p.map(|(ev, _)| ev));
1483 let p = p.filter(|event| {
1484 !matches!(event, Event::Start(Tag::Paragraph) | Event::End(TagEnd::Paragraph))
1485 });
1486 html::push_html(&mut s, p);
1487 });
1488
1489 s
1490 }
1491}
1492
1493impl MarkdownSummaryLine<'_> {
1494 pub(crate) fn into_string_with_has_more_content(self) -> (String, bool) {
1495 let MarkdownSummaryLine(md, links) = self;
1496 if md.is_empty() {
1498 return (String::new(), false);
1499 }
1500
1501 let mut replacer = |broken_link: BrokenLink<'_>| {
1502 links
1503 .iter()
1504 .find(|link| *link.original_text == *broken_link.reference)
1505 .map(|link| (link.href.as_str().into(), link.tooltip.as_str().into()))
1506 };
1507
1508 let p = Parser::new_with_broken_link_callback(md, summary_opts(), Some(&mut replacer))
1509 .peekable();
1510 let mut summary = SummaryLine::new(p);
1511
1512 let mut s = String::new();
1513
1514 let without_paragraphs = LinkReplacer::new(&mut summary, links).filter(|event| {
1515 !matches!(event, Event::Start(Tag::Paragraph) | Event::End(TagEnd::Paragraph))
1516 });
1517
1518 html::push_html(&mut s, without_paragraphs);
1519
1520 let has_more_content =
1521 matches!(summary.inner.peek(), Some(Event::Start(_))) || summary.skipped_tags > 0;
1522
1523 (s, has_more_content)
1524 }
1525
1526 pub(crate) fn into_string(self) -> String {
1527 self.into_string_with_has_more_content().0
1528 }
1529}
1530
1531fn markdown_summary_with_limit(
1540 md: &str,
1541 link_names: &[RenderedLink],
1542 length_limit: usize,
1543) -> (String, bool) {
1544 if md.is_empty() {
1545 return (String::new(), false);
1546 }
1547
1548 let mut replacer = |broken_link: BrokenLink<'_>| {
1549 link_names
1550 .iter()
1551 .find(|link| *link.original_text == *broken_link.reference)
1552 .map(|link| (link.href.as_str().into(), link.tooltip.as_str().into()))
1553 };
1554
1555 let p = Parser::new_with_broken_link_callback(md, summary_opts(), Some(&mut replacer));
1556 let mut p = LinkReplacer::new(p, link_names);
1557
1558 let mut buf = HtmlWithLimit::new(length_limit);
1559 let mut stopped_early = false;
1560 let _ = p.try_for_each(|event| {
1561 match &event {
1562 Event::Text(text) => {
1563 let r =
1564 text.split_inclusive(char::is_whitespace).try_for_each(|word| buf.push(word));
1565 if r.is_break() {
1566 stopped_early = true;
1567 }
1568 return r;
1569 }
1570 Event::Code(code) => {
1571 buf.open_tag("code");
1572 let r = buf.push(code);
1573 if r.is_break() {
1574 stopped_early = true;
1575 } else {
1576 buf.close_tag();
1577 }
1578 return r;
1579 }
1580 Event::Start(tag) => match tag {
1581 Tag::Emphasis => buf.open_tag("em"),
1582 Tag::Strong => buf.open_tag("strong"),
1583 Tag::CodeBlock(..) => return ControlFlow::Break(()),
1584 _ => {}
1585 },
1586 Event::End(tag) => match tag {
1587 TagEnd::Emphasis | TagEnd::Strong => buf.close_tag(),
1588 TagEnd::Paragraph | TagEnd::Heading(_) => return ControlFlow::Break(()),
1589 _ => {}
1590 },
1591 Event::HardBreak | Event::SoftBreak => buf.push(" ")?,
1592 _ => {}
1593 };
1594 ControlFlow::Continue(())
1595 });
1596
1597 (buf.finish(), stopped_early)
1598}
1599
1600pub(crate) fn short_markdown_summary(markdown: &str, link_names: &[RenderedLink]) -> String {
1607 let (mut s, was_shortened) = markdown_summary_with_limit(markdown, link_names, 59);
1608
1609 if was_shortened {
1610 s.push('…');
1611 }
1612
1613 s
1614}
1615
1616pub(crate) fn plain_text_summary(md: &str, link_names: &[RenderedLink]) -> String {
1623 if md.is_empty() {
1624 return String::new();
1625 }
1626
1627 let mut s = String::with_capacity(md.len() * 3 / 2);
1628
1629 let mut replacer = |broken_link: BrokenLink<'_>| {
1630 link_names
1631 .iter()
1632 .find(|link| *link.original_text == *broken_link.reference)
1633 .map(|link| (link.href.as_str().into(), link.tooltip.as_str().into()))
1634 };
1635
1636 let p = Parser::new_with_broken_link_callback(md, summary_opts(), Some(&mut replacer));
1637
1638 plain_text_from_events(p, &mut s);
1639
1640 s
1641}
1642
1643pub(crate) fn plain_text_from_events<'a>(
1644 events: impl Iterator<Item = pulldown_cmark::Event<'a>>,
1645 s: &mut String,
1646) {
1647 for event in events {
1648 match &event {
1649 Event::Text(text) => s.push_str(text),
1650 Event::Code(code) => {
1651 s.push('`');
1652 s.push_str(code);
1653 s.push('`');
1654 }
1655 Event::HardBreak | Event::SoftBreak => s.push(' '),
1656 Event::Start(Tag::CodeBlock(..)) => break,
1657 Event::End(TagEnd::Paragraph) => break,
1658 Event::End(TagEnd::Heading(..)) => break,
1659 _ => (),
1660 }
1661 }
1662}
1663
1664pub(crate) fn html_text_from_events<'a>(
1665 events: impl Iterator<Item = pulldown_cmark::Event<'a>>,
1666 s: &mut String,
1667) {
1668 for event in events {
1669 match &event {
1670 Event::Text(text) => {
1671 write!(s, "{}", EscapeBodyText(text)).expect("string alloc infallible")
1672 }
1673 Event::Code(code) => {
1674 s.push_str("<code>");
1675 write!(s, "{}", EscapeBodyText(code)).expect("string alloc infallible");
1676 s.push_str("</code>");
1677 }
1678 Event::HardBreak | Event::SoftBreak => s.push(' '),
1679 Event::Start(Tag::CodeBlock(..)) => break,
1680 Event::End(TagEnd::Paragraph) => break,
1681 Event::End(TagEnd::Heading(..)) => break,
1682 _ => (),
1683 }
1684 }
1685}
1686
1687#[derive(Debug)]
1688pub(crate) struct MarkdownLink {
1689 pub kind: LinkType,
1690 pub link: String,
1691 pub range: MarkdownLinkRange,
1692}
1693
1694#[derive(Clone, Debug)]
1695pub(crate) enum MarkdownLinkRange {
1696 Destination(Range<usize>),
1698 WholeLink(Range<usize>),
1702}
1703
1704impl MarkdownLinkRange {
1705 pub fn inner_range(&self) -> &Range<usize> {
1707 match self {
1708 MarkdownLinkRange::Destination(range) => range,
1709 MarkdownLinkRange::WholeLink(range) => range,
1710 }
1711 }
1712}
1713
1714pub(crate) fn markdown_links<'md, R>(
1715 md: &'md str,
1716 preprocess_link: impl Fn(MarkdownLink) -> Option<R>,
1717) -> Vec<R> {
1718 use itertools::Itertools;
1719 if md.is_empty() {
1720 return vec![];
1721 }
1722
1723 let locate = |s: &str, fallback: Range<usize>| unsafe {
1725 let s_start = s.as_ptr();
1726 let s_end = s_start.add(s.len());
1727 let md_start = md.as_ptr();
1728 let md_end = md_start.add(md.len());
1729 if md_start <= s_start && s_end <= md_end {
1730 let start = s_start.offset_from(md_start) as usize;
1731 let end = s_end.offset_from(md_start) as usize;
1732 MarkdownLinkRange::Destination(start..end)
1733 } else {
1734 MarkdownLinkRange::WholeLink(fallback)
1735 }
1736 };
1737
1738 let span_for_link = |link: &CowStr<'_>, span: Range<usize>| {
1739 match link {
1744 CowStr::Borrowed(s) => locate(s, span),
1749
1750 CowStr::Boxed(_) | CowStr::Inlined(_) => MarkdownLinkRange::WholeLink(span),
1752 }
1753 };
1754
1755 let span_for_refdef = |link: &CowStr<'_>, span: Range<usize>| {
1756 let mut square_brace_count = 0;
1759 let mut iter = md.as_bytes()[span.start..span.end].iter().copied().enumerate();
1760 for (_i, c) in &mut iter {
1761 match c {
1762 b':' if square_brace_count == 0 => break,
1763 b'[' => square_brace_count += 1,
1764 b']' => square_brace_count -= 1,
1765 _ => {}
1766 }
1767 }
1768 while let Some((i, c)) = iter.next() {
1769 if c == b'<' {
1770 while let Some((j, c)) = iter.next() {
1771 match c {
1772 b'\\' => {
1773 let _ = iter.next();
1774 }
1775 b'>' => {
1776 return MarkdownLinkRange::Destination(
1777 i + 1 + span.start..j + span.start,
1778 );
1779 }
1780 _ => {}
1781 }
1782 }
1783 } else if !c.is_ascii_whitespace() {
1784 for (j, c) in iter.by_ref() {
1785 if c.is_ascii_whitespace() {
1786 return MarkdownLinkRange::Destination(i + span.start..j + span.start);
1787 }
1788 }
1789 return MarkdownLinkRange::Destination(i + span.start..span.end);
1790 }
1791 }
1792 span_for_link(link, span)
1793 };
1794
1795 let span_for_offset_backward = |span: Range<usize>, open: u8, close: u8| {
1796 let mut open_brace = !0;
1797 let mut close_brace = !0;
1798 for (i, b) in md.as_bytes()[span.clone()].iter().copied().enumerate().rev() {
1799 let i = i + span.start;
1800 if b == close {
1801 close_brace = i;
1802 break;
1803 }
1804 }
1805 if close_brace < span.start || close_brace >= span.end {
1806 return MarkdownLinkRange::WholeLink(span);
1807 }
1808 let mut nesting = 1;
1809 for (i, b) in md.as_bytes()[span.start..close_brace].iter().copied().enumerate().rev() {
1810 let i = i + span.start;
1811 if b == close {
1812 nesting += 1;
1813 }
1814 if b == open {
1815 nesting -= 1;
1816 }
1817 if nesting == 0 {
1818 open_brace = i;
1819 break;
1820 }
1821 }
1822 assert!(open_brace != close_brace);
1823 if open_brace < span.start || open_brace >= span.end {
1824 return MarkdownLinkRange::WholeLink(span);
1825 }
1826 let range = (open_brace + 1)..close_brace;
1828 MarkdownLinkRange::Destination(range)
1829 };
1830
1831 let span_for_offset_forward = |span: Range<usize>, open: u8, close: u8| {
1832 let mut open_brace = !0;
1833 let mut close_brace = !0;
1834 for (i, b) in md.as_bytes()[span.clone()].iter().copied().enumerate() {
1835 let i = i + span.start;
1836 if b == open {
1837 open_brace = i;
1838 break;
1839 }
1840 }
1841 if open_brace < span.start || open_brace >= span.end {
1842 return MarkdownLinkRange::WholeLink(span);
1843 }
1844 let mut nesting = 0;
1845 for (i, b) in md.as_bytes()[open_brace..span.end].iter().copied().enumerate() {
1846 let i = i + open_brace;
1847 if b == close {
1848 nesting -= 1;
1849 }
1850 if b == open {
1851 nesting += 1;
1852 }
1853 if nesting == 0 {
1854 close_brace = i;
1855 break;
1856 }
1857 }
1858 assert!(open_brace != close_brace);
1859 if open_brace < span.start || open_brace >= span.end {
1860 return MarkdownLinkRange::WholeLink(span);
1861 }
1862 let range = (open_brace + 1)..close_brace;
1864 MarkdownLinkRange::Destination(range)
1865 };
1866
1867 let mut broken_link_callback = |link: BrokenLink<'md>| Some((link.reference, "".into()));
1868 let event_iter = Parser::new_with_broken_link_callback(
1869 md,
1870 main_body_opts(),
1871 Some(&mut broken_link_callback),
1872 )
1873 .into_offset_iter();
1874 let mut links = Vec::new();
1875
1876 let mut refdefs = FxIndexMap::default();
1877 for (label, refdef) in event_iter.reference_definitions().iter().sorted_by_key(|x| x.0) {
1878 refdefs.insert(label.to_string(), (false, refdef.dest.to_string(), refdef.span.clone()));
1879 }
1880
1881 for (event, span) in event_iter {
1882 match event {
1883 Event::Start(Tag::Link { link_type, dest_url, id, .. })
1884 if may_be_doc_link(link_type) =>
1885 {
1886 let range = match link_type {
1887 LinkType::ReferenceUnknown | LinkType::ShortcutUnknown => {
1889 span_for_offset_backward(span, b'[', b']')
1890 }
1891 LinkType::CollapsedUnknown => span_for_offset_forward(span, b'[', b']'),
1892 LinkType::Inline => span_for_offset_backward(span, b'(', b')'),
1893 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut => {
1895 if let Some((is_used, dest_url, span)) = refdefs.get_mut(&id[..]) {
1896 *is_used = true;
1897 span_for_refdef(&CowStr::from(&dest_url[..]), span.clone())
1898 } else {
1899 span_for_link(&dest_url, span)
1900 }
1901 }
1902 LinkType::Autolink | LinkType::Email => unreachable!(),
1903 };
1904
1905 if let Some(link) = preprocess_link(MarkdownLink {
1906 kind: link_type,
1907 link: dest_url.into_string(),
1908 range,
1909 }) {
1910 links.push(link);
1911 }
1912 }
1913 _ => {}
1914 }
1915 }
1916
1917 for (_label, (is_used, dest_url, span)) in refdefs.into_iter() {
1918 if !is_used
1919 && let Some(link) = preprocess_link(MarkdownLink {
1920 kind: LinkType::Reference,
1921 range: span_for_refdef(&CowStr::from(&dest_url[..]), span),
1922 link: dest_url,
1923 })
1924 {
1925 links.push(link);
1926 }
1927 }
1928
1929 links
1930}
1931
1932#[derive(Debug)]
1933pub(crate) struct RustCodeBlock {
1934 pub(crate) range: Range<usize>,
1937 pub(crate) code: Range<usize>,
1939 pub(crate) is_fenced: bool,
1940 pub(crate) lang_string: LangString,
1941}
1942
1943pub(crate) fn rust_code_blocks(md: &str, extra_info: &ExtraInfo<'_>) -> Vec<RustCodeBlock> {
1946 let mut code_blocks = vec![];
1947
1948 if md.is_empty() {
1949 return code_blocks;
1950 }
1951
1952 let mut p = Parser::new_ext(md, main_body_opts()).into_offset_iter();
1953
1954 while let Some((event, offset)) = p.next() {
1955 if let Event::Start(Tag::CodeBlock(syntax)) = event {
1956 let (lang_string, code_start, code_end, range, is_fenced) = match syntax {
1957 CodeBlockKind::Fenced(syntax) => {
1958 let syntax = syntax.as_ref();
1959 let lang_string = if syntax.is_empty() {
1960 Default::default()
1961 } else {
1962 LangString::parse(syntax, ErrorCodes::Yes, Some(extra_info))
1963 };
1964 if !lang_string.rust {
1965 continue;
1966 }
1967 let (code_start, mut code_end) = match p.next() {
1968 Some((Event::Text(_), offset)) => (offset.start, offset.end),
1969 Some((_, sub_offset)) => {
1970 let code = Range { start: sub_offset.start, end: sub_offset.start };
1971 code_blocks.push(RustCodeBlock {
1972 is_fenced: true,
1973 range: offset,
1974 code,
1975 lang_string,
1976 });
1977 continue;
1978 }
1979 None => {
1980 let code = Range { start: offset.end, end: offset.end };
1981 code_blocks.push(RustCodeBlock {
1982 is_fenced: true,
1983 range: offset,
1984 code,
1985 lang_string,
1986 });
1987 continue;
1988 }
1989 };
1990 while let Some((Event::Text(_), offset)) = p.next() {
1991 code_end = offset.end;
1992 }
1993 (lang_string, code_start, code_end, offset, true)
1994 }
1995 CodeBlockKind::Indented => {
1996 if offset.end > offset.start && md.get(offset.end..=offset.end) == Some("\n") {
1999 (
2000 LangString::default(),
2001 offset.start,
2002 offset.end,
2003 Range { start: offset.start, end: offset.end - 1 },
2004 false,
2005 )
2006 } else {
2007 (LangString::default(), offset.start, offset.end, offset, false)
2008 }
2009 }
2010 };
2011
2012 code_blocks.push(RustCodeBlock {
2013 is_fenced,
2014 range,
2015 code: Range { start: code_start, end: code_end },
2016 lang_string,
2017 });
2018 }
2019 }
2020
2021 code_blocks
2022}
2023
2024#[derive(Clone, Default, Debug)]
2025pub struct IdMap {
2026 map: FxHashMap<String, usize>,
2027 existing_footnotes: Arc<AtomicUsize>,
2028}
2029
2030fn is_default_id(id: &str) -> bool {
2031 matches!(
2032 id,
2033 "help"
2035 | "settings"
2036 | "not-displayed"
2037 | "alternative-display"
2038 | "search"
2039 | "crate-search"
2040 | "crate-search-div"
2041 | "themeStyle"
2044 | "settings-menu"
2045 | "help-button"
2046 | "sidebar-button"
2047 | "main-content"
2048 | "toggle-all-docs"
2049 | "all-types"
2050 | "default-settings"
2051 | "sidebar-vars"
2052 | "copy-path"
2053 | "rustdoc-toc"
2054 | "rustdoc-modnav"
2055 | "fields"
2058 | "variants"
2059 | "implementors-list"
2060 | "synthetic-implementors-list"
2061 | "foreign-impls"
2062 | "implementations"
2063 | "trait-implementations"
2064 | "synthetic-implementations"
2065 | "blanket-implementations"
2066 | "required-associated-types"
2067 | "provided-associated-types"
2068 | "provided-associated-consts"
2069 | "required-associated-consts"
2070 | "required-methods"
2071 | "provided-methods"
2072 | "dyn-compatibility"
2073 | "implementors"
2074 | "synthetic-implementors"
2075 | "implementations-list"
2076 | "trait-implementations-list"
2077 | "synthetic-implementations-list"
2078 | "blanket-implementations-list"
2079 | "deref-methods"
2080 | "layout"
2081 | "aliased-type"
2082 )
2083}
2084
2085impl IdMap {
2086 pub fn new() -> Self {
2087 IdMap { map: FxHashMap::default(), existing_footnotes: Arc::new(AtomicUsize::new(0)) }
2088 }
2089
2090 pub(crate) fn derive<S: AsRef<str> + ToString>(&mut self, candidate: S) -> String {
2091 let id = match self.map.get_mut(candidate.as_ref()) {
2092 None => {
2093 let candidate = candidate.to_string();
2094 if is_default_id(&candidate) {
2095 let id = format!("{}-{}", candidate, 1);
2096 self.map.insert(candidate, 2);
2097 id
2098 } else {
2099 candidate
2100 }
2101 }
2102 Some(a) => {
2103 let id = format!("{}-{}", candidate.as_ref(), *a);
2104 *a += 1;
2105 id
2106 }
2107 };
2108
2109 self.map.insert(id.clone(), 1);
2110 id
2111 }
2112
2113 pub(crate) fn handle_footnotes<'a, T, F: FnOnce(&'a mut Self, Weak<AtomicUsize>) -> T>(
2116 &'a mut self,
2117 closure: F,
2118 ) -> T {
2119 let existing_footnotes = Arc::downgrade(&self.existing_footnotes);
2120
2121 closure(self, existing_footnotes)
2122 }
2123
2124 pub(crate) fn clear(&mut self) {
2125 self.map.clear();
2126 self.existing_footnotes = Arc::new(AtomicUsize::new(0));
2127 }
2128}