1use std::borrow::Cow;
29use std::collections::VecDeque;
30use std::fmt::Write;
31use std::iter::Peekable;
32use std::ops::{ControlFlow, Range};
33use std::path::PathBuf;
34use std::str::{self, CharIndices};
35use std::sync::atomic::AtomicUsize;
36use std::sync::{Arc, Weak};
37
38use pulldown_cmark::{
39 BrokenLink, CodeBlockKind, CowStr, Event, LinkType, Options, Parser, Tag, TagEnd, html,
40};
41use rustc_data_structures::fx::{FxHashMap, FxIndexMap};
42use rustc_errors::{Diag, DiagMessage};
43use rustc_hir::def_id::LocalDefId;
44use rustc_middle::ty::TyCtxt;
45pub(crate) use rustc_resolve::rustdoc::main_body_opts;
46use rustc_resolve::rustdoc::may_be_doc_link;
47use rustc_span::edition::Edition;
48use rustc_span::{Span, Symbol};
49use tracing::{debug, trace};
50
51use crate::clean::RenderedLink;
52use crate::doctest;
53use crate::doctest::GlobalTestOptions;
54use crate::html::escape::{Escape, EscapeBodyText};
55use crate::html::highlight;
56use crate::html::length_limit::HtmlWithLimit;
57use crate::html::render::small_url_encode;
58use crate::html::toc::{Toc, TocBuilder};
59
60mod footnotes;
61#[cfg(test)]
62mod tests;
63
64const MAX_HEADER_LEVEL: u32 = 6;
65
66pub(crate) fn summary_opts() -> Options {
68 Options::ENABLE_TABLES
69 | Options::ENABLE_FOOTNOTES
70 | Options::ENABLE_STRIKETHROUGH
71 | Options::ENABLE_TASKLISTS
72 | Options::ENABLE_SMART_PUNCTUATION
73}
74
75#[derive(Debug, Clone, Copy)]
76pub enum HeadingOffset {
77 H1 = 0,
78 H2,
79 H3,
80 H4,
81 H5,
82 H6,
83}
84
85pub struct Markdown<'a> {
88 pub content: &'a str,
89 pub links: &'a [RenderedLink],
91 pub ids: &'a mut IdMap,
93 pub error_codes: ErrorCodes,
95 pub edition: Edition,
97 pub playground: &'a Option<Playground>,
98 pub heading_offset: HeadingOffset,
101}
102pub(crate) struct MarkdownWithToc<'a> {
104 pub(crate) content: &'a str,
105 pub(crate) links: &'a [RenderedLink],
106 pub(crate) ids: &'a mut IdMap,
107 pub(crate) error_codes: ErrorCodes,
108 pub(crate) edition: Edition,
109 pub(crate) playground: &'a Option<Playground>,
110}
111pub(crate) struct MarkdownItemInfo<'a>(pub(crate) &'a str, pub(crate) &'a mut IdMap);
114pub(crate) struct MarkdownSummaryLine<'a>(pub &'a str, pub &'a [RenderedLink]);
116
117#[derive(Copy, Clone, PartialEq, Debug)]
118pub enum ErrorCodes {
119 Yes,
120 No,
121}
122
123impl ErrorCodes {
124 pub(crate) fn from(b: bool) -> Self {
125 match b {
126 true => ErrorCodes::Yes,
127 false => ErrorCodes::No,
128 }
129 }
130
131 pub(crate) fn as_bool(self) -> bool {
132 match self {
133 ErrorCodes::Yes => true,
134 ErrorCodes::No => false,
135 }
136 }
137}
138
139pub(crate) enum Line<'a> {
143 Hidden(&'a str),
144 Shown(Cow<'a, str>),
145}
146
147impl<'a> Line<'a> {
148 fn for_html(self) -> Option<Cow<'a, str>> {
149 match self {
150 Line::Shown(l) => Some(l),
151 Line::Hidden(_) => None,
152 }
153 }
154
155 pub(crate) fn for_code(self) -> Cow<'a, str> {
156 match self {
157 Line::Shown(l) => l,
158 Line::Hidden(l) => Cow::Borrowed(l),
159 }
160 }
161}
162
163pub(crate) fn map_line(s: &str) -> Line<'_> {
171 let trimmed = s.trim();
172 if trimmed.starts_with("##") {
173 Line::Shown(Cow::Owned(s.replacen("##", "#", 1)))
174 } else if let Some(stripped) = trimmed.strip_prefix("# ") {
175 Line::Hidden(stripped)
177 } else if trimmed == "#" {
178 Line::Hidden("")
180 } else {
181 Line::Shown(Cow::Borrowed(s))
182 }
183}
184
185fn slugify(c: char) -> Option<char> {
189 if c.is_alphanumeric() || c == '-' || c == '_' {
190 if c.is_ascii() { Some(c.to_ascii_lowercase()) } else { Some(c) }
191 } else if c.is_whitespace() && c.is_ascii() {
192 Some('-')
193 } else {
194 None
195 }
196}
197
198#[derive(Debug)]
199pub struct Playground {
200 pub crate_name: Option<Symbol>,
201 pub url: String,
202}
203
204struct CodeBlocks<'p, 'a, I: Iterator<Item = Event<'a>>> {
206 inner: I,
207 check_error_codes: ErrorCodes,
208 edition: Edition,
209 playground: &'p Option<Playground>,
212}
213
214impl<'p, 'a, I: Iterator<Item = Event<'a>>> CodeBlocks<'p, 'a, I> {
215 fn new(
216 iter: I,
217 error_codes: ErrorCodes,
218 edition: Edition,
219 playground: &'p Option<Playground>,
220 ) -> Self {
221 CodeBlocks { inner: iter, check_error_codes: error_codes, edition, playground }
222 }
223}
224
225impl<'a, I: Iterator<Item = Event<'a>>> Iterator for CodeBlocks<'_, 'a, I> {
226 type Item = Event<'a>;
227
228 fn next(&mut self) -> Option<Self::Item> {
229 let event = self.inner.next();
230 let Some(Event::Start(Tag::CodeBlock(kind))) = event else {
231 return event;
232 };
233
234 let mut original_text = String::new();
235 for event in &mut self.inner {
236 match event {
237 Event::End(TagEnd::CodeBlock) => break,
238 Event::Text(ref s) => {
239 original_text.push_str(s);
240 }
241 _ => {}
242 }
243 }
244
245 let LangString { added_classes, compile_fail, should_panic, ignore, edition, .. } =
246 match kind {
247 CodeBlockKind::Fenced(ref lang) => {
248 let parse_result =
249 LangString::parse_without_check(lang, self.check_error_codes);
250 if !parse_result.rust {
251 let added_classes = parse_result.added_classes;
252 let lang_string = if let Some(lang) = parse_result.unknown.first() {
253 format!("language-{}", lang)
254 } else {
255 String::new()
256 };
257 let whitespace = if added_classes.is_empty() { "" } else { " " };
258 return Some(Event::Html(
259 format!(
260 "<div class=\"example-wrap\">\
261 <pre class=\"{lang_string}{whitespace}{added_classes}\">\
262 <code>{text}</code>\
263 </pre>\
264 </div>",
265 added_classes = added_classes.join(" "),
266 text = Escape(
267 original_text.strip_suffix('\n').unwrap_or(&original_text)
268 ),
269 )
270 .into(),
271 ));
272 }
273 parse_result
274 }
275 CodeBlockKind::Indented => Default::default(),
276 };
277
278 let lines = original_text.lines().filter_map(|l| map_line(l).for_html());
279 let text = lines.intersperse("\n".into()).collect::<String>();
280
281 let explicit_edition = edition.is_some();
282 let edition = edition.unwrap_or(self.edition);
283
284 let playground_button = self.playground.as_ref().and_then(|playground| {
285 let krate = &playground.crate_name;
286 let url = &playground.url;
287 if url.is_empty() {
288 return None;
289 }
290 let test = original_text
291 .lines()
292 .map(|l| map_line(l).for_code())
293 .intersperse("\n".into())
294 .collect::<String>();
295 let krate = krate.as_ref().map(|s| s.as_str());
296
297 let opts = GlobalTestOptions {
300 crate_name: krate.map(String::from).unwrap_or_default(),
301 no_crate_inject: false,
302 insert_indent_space: true,
303 attrs: vec![],
304 args_file: PathBuf::new(),
305 };
306 let mut builder = doctest::BuildDocTestBuilder::new(&test).edition(edition);
307 if let Some(krate) = krate {
308 builder = builder.crate_name(krate);
309 }
310 let doctest = builder.build(None);
311 let (test, _) = doctest.generate_unique_doctest(&test, false, &opts, krate);
312 let channel = if test.contains("#![feature(") { "&version=nightly" } else { "" };
313
314 let test_escaped = small_url_encode(test);
315 Some(format!(
316 "<a class=\"test-arrow\" \
317 target=\"_blank\" \
318 title=\"Run code\" \
319 href=\"{url}?code={test_escaped}{channel}&edition={edition}\"></a>",
320 ))
321 });
322
323 let tooltip = if ignore == Ignore::All {
324 highlight::Tooltip::IgnoreAll
325 } else if let Ignore::Some(platforms) = ignore {
326 highlight::Tooltip::IgnoreSome(platforms)
327 } else if compile_fail {
328 highlight::Tooltip::CompileFail
329 } else if should_panic {
330 highlight::Tooltip::ShouldPanic
331 } else if explicit_edition {
332 highlight::Tooltip::Edition(edition)
333 } else {
334 highlight::Tooltip::None
335 };
336
337 let mut s = String::new();
340 s.push('\n');
341
342 highlight::render_example_with_highlighting(
343 &text,
344 &mut s,
345 tooltip,
346 playground_button.as_deref(),
347 &added_classes,
348 );
349 Some(Event::Html(s.into()))
350 }
351}
352
353struct LinkReplacerInner<'a> {
355 links: &'a [RenderedLink],
356 shortcut_link: Option<&'a RenderedLink>,
357}
358
359struct LinkReplacer<'a, I: Iterator<Item = Event<'a>>> {
360 iter: I,
361 inner: LinkReplacerInner<'a>,
362}
363
364impl<'a, I: Iterator<Item = Event<'a>>> LinkReplacer<'a, I> {
365 fn new(iter: I, links: &'a [RenderedLink]) -> Self {
366 LinkReplacer { iter, inner: { LinkReplacerInner { links, shortcut_link: None } } }
367 }
368}
369
370struct SpannedLinkReplacer<'a, I: Iterator<Item = SpannedEvent<'a>>> {
373 iter: I,
374 inner: LinkReplacerInner<'a>,
375}
376
377impl<'a, I: Iterator<Item = SpannedEvent<'a>>> SpannedLinkReplacer<'a, I> {
378 fn new(iter: I, links: &'a [RenderedLink]) -> Self {
379 SpannedLinkReplacer { iter, inner: { LinkReplacerInner { links, shortcut_link: None } } }
380 }
381}
382
383impl<'a> LinkReplacerInner<'a> {
384 fn handle_event(&mut self, event: &mut Event<'a>) {
385 match event {
387 Event::Start(Tag::Link {
390 link_type: LinkType::ShortcutUnknown | LinkType::CollapsedUnknown,
392 dest_url,
393 title,
394 ..
395 }) => {
396 debug!("saw start of shortcut link to {dest_url} with title {title}");
397 let link = self.links.iter().find(|&link| *link.href == **dest_url);
400 if let Some(link) = link {
403 trace!("it matched");
404 assert!(self.shortcut_link.is_none(), "shortcut links cannot be nested");
405 self.shortcut_link = Some(link);
406 if title.is_empty() && !link.tooltip.is_empty() {
407 *title = CowStr::Borrowed(link.tooltip.as_ref());
408 }
409 }
410 }
411 Event::End(TagEnd::Link) if self.shortcut_link.is_some() => {
413 debug!("saw end of shortcut link");
414 self.shortcut_link = None;
415 }
416 Event::Code(text) => {
419 trace!("saw code {text}");
420 if let Some(link) = self.shortcut_link {
421 if let Some(link) = self.links.iter().find(|l| {
431 l.href == link.href
432 && Some(&**text) == l.original_text.get(1..l.original_text.len() - 1)
433 }) {
434 debug!("replacing {text} with {new_text}", new_text = link.new_text);
435 *text = CowStr::Borrowed(&link.new_text);
436 }
437 }
438 }
439 Event::Text(text) => {
442 trace!("saw text {text}");
443 if let Some(link) = self.shortcut_link {
444 if let Some(link) = self
446 .links
447 .iter()
448 .find(|l| l.href == link.href && **text == *l.original_text)
449 {
450 debug!("replacing {text} with {new_text}", new_text = link.new_text);
451 *text = CowStr::Borrowed(&link.new_text);
452 }
453 }
454 }
455 Event::Start(Tag::Link { dest_url, title, .. }) => {
458 if let Some(link) =
459 self.links.iter().find(|&link| *link.original_text == **dest_url)
460 {
461 *dest_url = CowStr::Borrowed(link.href.as_ref());
462 if title.is_empty() && !link.tooltip.is_empty() {
463 *title = CowStr::Borrowed(link.tooltip.as_ref());
464 }
465 }
466 }
467 _ => {}
469 }
470 }
471}
472
473impl<'a, I: Iterator<Item = Event<'a>>> Iterator for LinkReplacer<'a, I> {
474 type Item = Event<'a>;
475
476 fn next(&mut self) -> Option<Self::Item> {
477 let mut event = self.iter.next();
478 if let Some(ref mut event) = event {
479 self.inner.handle_event(event);
480 }
481 event
483 }
484}
485
486impl<'a, I: Iterator<Item = SpannedEvent<'a>>> Iterator for SpannedLinkReplacer<'a, I> {
487 type Item = SpannedEvent<'a>;
488
489 fn next(&mut self) -> Option<Self::Item> {
490 let (mut event, range) = self.iter.next()?;
491 self.inner.handle_event(&mut event);
492 Some((event, range))
494 }
495}
496
497struct TableWrapper<'a, I: Iterator<Item = Event<'a>>> {
499 inner: I,
500 stored_events: VecDeque<Event<'a>>,
501}
502
503impl<'a, I: Iterator<Item = Event<'a>>> TableWrapper<'a, I> {
504 fn new(iter: I) -> Self {
505 Self { inner: iter, stored_events: VecDeque::new() }
506 }
507}
508
509impl<'a, I: Iterator<Item = Event<'a>>> Iterator for TableWrapper<'a, I> {
510 type Item = Event<'a>;
511
512 fn next(&mut self) -> Option<Self::Item> {
513 if let Some(first) = self.stored_events.pop_front() {
514 return Some(first);
515 }
516
517 let event = self.inner.next()?;
518
519 Some(match event {
520 Event::Start(Tag::Table(t)) => {
521 self.stored_events.push_back(Event::Start(Tag::Table(t)));
522 Event::Html(CowStr::Borrowed("<div>"))
523 }
524 Event::End(TagEnd::Table) => {
525 self.stored_events.push_back(Event::Html(CowStr::Borrowed("</div>")));
526 Event::End(TagEnd::Table)
527 }
528 e => e,
529 })
530 }
531}
532
533type SpannedEvent<'a> = (Event<'a>, Range<usize>);
534
535struct HeadingLinks<'a, 'b, 'ids, I> {
537 inner: I,
538 toc: Option<&'b mut TocBuilder>,
539 buf: VecDeque<SpannedEvent<'a>>,
540 id_map: &'ids mut IdMap,
541 heading_offset: HeadingOffset,
542}
543
544impl<'b, 'ids, I> HeadingLinks<'_, 'b, 'ids, I> {
545 fn new(
546 iter: I,
547 toc: Option<&'b mut TocBuilder>,
548 ids: &'ids mut IdMap,
549 heading_offset: HeadingOffset,
550 ) -> Self {
551 HeadingLinks { inner: iter, toc, buf: VecDeque::new(), id_map: ids, heading_offset }
552 }
553}
554
555impl<'a, I: Iterator<Item = SpannedEvent<'a>>> Iterator for HeadingLinks<'a, '_, '_, I> {
556 type Item = SpannedEvent<'a>;
557
558 fn next(&mut self) -> Option<Self::Item> {
559 if let Some(e) = self.buf.pop_front() {
560 return Some(e);
561 }
562
563 let event = self.inner.next();
564 if let Some((Event::Start(Tag::Heading { level, .. }), _)) = event {
565 let mut id = String::new();
566 for event in &mut self.inner {
567 match &event.0 {
568 Event::End(TagEnd::Heading(_)) => break,
569 Event::Text(text) | Event::Code(text) => {
570 id.extend(text.chars().filter_map(slugify));
571 self.buf.push_back(event);
572 }
573 _ => self.buf.push_back(event),
574 }
575 }
576 let id = self.id_map.derive(id);
577
578 if let Some(ref mut builder) = self.toc {
579 let mut text_header = String::new();
580 plain_text_from_events(self.buf.iter().map(|(ev, _)| ev.clone()), &mut text_header);
581 let mut html_header = String::new();
582 html_text_from_events(self.buf.iter().map(|(ev, _)| ev.clone()), &mut html_header);
583 let sec = builder.push(level as u32, text_header, html_header, id.clone());
584 self.buf.push_front((Event::Html(format!("{sec} ").into()), 0..0));
585 }
586
587 let level =
588 std::cmp::min(level as u32 + (self.heading_offset as u32), MAX_HEADER_LEVEL);
589 self.buf.push_back((Event::Html(format!("</h{level}>").into()), 0..0));
590
591 let start_tags =
592 format!("<h{level} id=\"{id}\"><a class=\"doc-anchor\" href=\"#{id}\">§</a>");
593 return Some((Event::Html(start_tags.into()), 0..0));
594 }
595 event
596 }
597}
598
599struct SummaryLine<'a, I: Iterator<Item = Event<'a>>> {
601 inner: I,
602 started: bool,
603 depth: u32,
604 skipped_tags: u32,
605}
606
607impl<'a, I: Iterator<Item = Event<'a>>> SummaryLine<'a, I> {
608 fn new(iter: I) -> Self {
609 SummaryLine { inner: iter, started: false, depth: 0, skipped_tags: 0 }
610 }
611}
612
613fn check_if_allowed_tag(t: &TagEnd) -> bool {
614 matches!(
615 t,
616 TagEnd::Paragraph
617 | TagEnd::Emphasis
618 | TagEnd::Strong
619 | TagEnd::Strikethrough
620 | TagEnd::Link
621 | TagEnd::BlockQuote
622 )
623}
624
625fn is_forbidden_tag(t: &TagEnd) -> bool {
626 matches!(
627 t,
628 TagEnd::CodeBlock
629 | TagEnd::Table
630 | TagEnd::TableHead
631 | TagEnd::TableRow
632 | TagEnd::TableCell
633 | TagEnd::FootnoteDefinition
634 )
635}
636
637impl<'a, I: Iterator<Item = Event<'a>>> Iterator for SummaryLine<'a, I> {
638 type Item = Event<'a>;
639
640 fn next(&mut self) -> Option<Self::Item> {
641 if self.started && self.depth == 0 {
642 return None;
643 }
644 if !self.started {
645 self.started = true;
646 }
647 if let Some(event) = self.inner.next() {
648 let mut is_start = true;
649 let is_allowed_tag = match event {
650 Event::Start(ref c) => {
651 if is_forbidden_tag(&c.to_end()) {
652 self.skipped_tags += 1;
653 return None;
654 }
655 self.depth += 1;
656 check_if_allowed_tag(&c.to_end())
657 }
658 Event::End(ref c) => {
659 if is_forbidden_tag(c) {
660 self.skipped_tags += 1;
661 return None;
662 }
663 self.depth -= 1;
664 is_start = false;
665 check_if_allowed_tag(c)
666 }
667 Event::FootnoteReference(_) => {
668 self.skipped_tags += 1;
669 false
670 }
671 _ => true,
672 };
673 if !is_allowed_tag {
674 self.skipped_tags += 1;
675 }
676 return if !is_allowed_tag {
677 if is_start {
678 Some(Event::Start(Tag::Paragraph))
679 } else {
680 Some(Event::End(TagEnd::Paragraph))
681 }
682 } else {
683 Some(event)
684 };
685 }
686 None
687 }
688}
689
690pub(crate) struct MdRelLine {
697 offset: usize,
698}
699
700impl MdRelLine {
701 pub(crate) const fn new(offset: usize) -> Self {
703 Self { offset }
704 }
705
706 pub(crate) const fn offset(self) -> usize {
708 self.offset
709 }
710}
711
712pub(crate) fn find_testable_code<T: doctest::DocTestVisitor>(
713 doc: &str,
714 tests: &mut T,
715 error_codes: ErrorCodes,
716 extra_info: Option<&ExtraInfo<'_>>,
717) {
718 find_codes(doc, tests, error_codes, extra_info, false)
719}
720
721pub(crate) fn find_codes<T: doctest::DocTestVisitor>(
722 doc: &str,
723 tests: &mut T,
724 error_codes: ErrorCodes,
725 extra_info: Option<&ExtraInfo<'_>>,
726 include_non_rust: bool,
727) {
728 let mut parser = Parser::new_ext(doc, main_body_opts()).into_offset_iter();
729 let mut prev_offset = 0;
730 let mut nb_lines = 0;
731 let mut register_header = None;
732 while let Some((event, offset)) = parser.next() {
733 match event {
734 Event::Start(Tag::CodeBlock(kind)) => {
735 let block_info = match kind {
736 CodeBlockKind::Fenced(ref lang) => {
737 if lang.is_empty() {
738 Default::default()
739 } else {
740 LangString::parse(lang, error_codes, extra_info)
741 }
742 }
743 CodeBlockKind::Indented => Default::default(),
744 };
745 if !include_non_rust && !block_info.rust {
746 continue;
747 }
748
749 let mut test_s = String::new();
750
751 while let Some((Event::Text(s), _)) = parser.next() {
752 test_s.push_str(&s);
753 }
754 let text = test_s
755 .lines()
756 .map(|l| map_line(l).for_code())
757 .collect::<Vec<Cow<'_, str>>>()
758 .join("\n");
759
760 nb_lines += doc[prev_offset..offset.start].lines().count();
761 if nb_lines != 0 && !&doc[prev_offset..offset.start].ends_with('\n') {
765 nb_lines -= 1;
766 }
767 let line = MdRelLine::new(nb_lines);
768 tests.visit_test(text, block_info, line);
769 prev_offset = offset.start;
770 }
771 Event::Start(Tag::Heading { level, .. }) => {
772 register_header = Some(level as u32);
773 }
774 Event::Text(ref s) if register_header.is_some() => {
775 let level = register_header.unwrap();
776 tests.visit_header(s, level);
777 register_header = None;
778 }
779 _ => {}
780 }
781 }
782}
783
784pub(crate) struct ExtraInfo<'tcx> {
785 def_id: LocalDefId,
786 sp: Span,
787 tcx: TyCtxt<'tcx>,
788}
789
790impl<'tcx> ExtraInfo<'tcx> {
791 pub(crate) fn new(tcx: TyCtxt<'tcx>, def_id: LocalDefId, sp: Span) -> ExtraInfo<'tcx> {
792 ExtraInfo { def_id, sp, tcx }
793 }
794
795 fn error_invalid_codeblock_attr(&self, msg: impl Into<DiagMessage>) {
796 self.tcx.node_span_lint(
797 crate::lint::INVALID_CODEBLOCK_ATTRIBUTES,
798 self.tcx.local_def_id_to_hir_id(self.def_id),
799 self.sp,
800 |lint| {
801 lint.primary_message(msg);
802 },
803 );
804 }
805
806 fn error_invalid_codeblock_attr_with_help(
807 &self,
808 msg: impl Into<DiagMessage>,
809 f: impl for<'a, 'b> FnOnce(&'b mut Diag<'a, ()>),
810 ) {
811 self.tcx.node_span_lint(
812 crate::lint::INVALID_CODEBLOCK_ATTRIBUTES,
813 self.tcx.local_def_id_to_hir_id(self.def_id),
814 self.sp,
815 |lint| {
816 lint.primary_message(msg);
817 f(lint);
818 },
819 );
820 }
821}
822
823#[derive(Eq, PartialEq, Clone, Debug)]
824pub(crate) struct LangString {
825 pub(crate) original: String,
826 pub(crate) should_panic: bool,
827 pub(crate) no_run: bool,
828 pub(crate) ignore: Ignore,
829 pub(crate) rust: bool,
830 pub(crate) test_harness: bool,
831 pub(crate) compile_fail: bool,
832 pub(crate) standalone_crate: bool,
833 pub(crate) error_codes: Vec<String>,
834 pub(crate) edition: Option<Edition>,
835 pub(crate) added_classes: Vec<String>,
836 pub(crate) unknown: Vec<String>,
837}
838
839#[derive(Eq, PartialEq, Clone, Debug)]
840pub(crate) enum Ignore {
841 All,
842 None,
843 Some(Vec<String>),
844}
845
846pub(crate) struct TagIterator<'a, 'tcx> {
886 inner: Peekable<CharIndices<'a>>,
887 data: &'a str,
888 is_in_attribute_block: bool,
889 extra: Option<&'a ExtraInfo<'tcx>>,
890 is_error: bool,
891}
892
893#[derive(Clone, Debug, Eq, PartialEq)]
894pub(crate) enum LangStringToken<'a> {
895 LangToken(&'a str),
896 ClassAttribute(&'a str),
897 KeyValueAttribute(&'a str, &'a str),
898}
899
900fn is_leading_char(c: char) -> bool {
901 c == '_' || c == '-' || c == ':' || c.is_ascii_alphabetic() || c.is_ascii_digit()
902}
903fn is_bareword_char(c: char) -> bool {
904 is_leading_char(c) || ".!#$%&*+/;<>?@^|~".contains(c)
905}
906fn is_separator(c: char) -> bool {
907 c == ' ' || c == ',' || c == '\t'
908}
909
910struct Indices {
911 start: usize,
912 end: usize,
913}
914
915impl<'a, 'tcx> TagIterator<'a, 'tcx> {
916 pub(crate) fn new(data: &'a str, extra: Option<&'a ExtraInfo<'tcx>>) -> Self {
917 Self {
918 inner: data.char_indices().peekable(),
919 data,
920 is_in_attribute_block: false,
921 extra,
922 is_error: false,
923 }
924 }
925
926 fn emit_error(&mut self, err: impl Into<DiagMessage>) {
927 if let Some(extra) = self.extra {
928 extra.error_invalid_codeblock_attr(err);
929 }
930 self.is_error = true;
931 }
932
933 fn skip_separators(&mut self) -> Option<usize> {
934 while let Some((pos, c)) = self.inner.peek() {
935 if !is_separator(*c) {
936 return Some(*pos);
937 }
938 self.inner.next();
939 }
940 None
941 }
942
943 fn parse_string(&mut self, start: usize) -> Option<Indices> {
944 for (pos, c) in self.inner.by_ref() {
945 if c == '"' {
946 return Some(Indices { start: start + 1, end: pos });
947 }
948 }
949 self.emit_error("unclosed quote string `\"`");
950 None
951 }
952
953 fn parse_class(&mut self, start: usize) -> Option<LangStringToken<'a>> {
954 while let Some((pos, c)) = self.inner.peek().copied() {
955 if is_bareword_char(c) {
956 self.inner.next();
957 } else {
958 let class = &self.data[start + 1..pos];
959 if class.is_empty() {
960 self.emit_error(format!("unexpected `{c}` character after `.`"));
961 return None;
962 } else if self.check_after_token() {
963 return Some(LangStringToken::ClassAttribute(class));
964 } else {
965 return None;
966 }
967 }
968 }
969 let class = &self.data[start + 1..];
970 if class.is_empty() {
971 self.emit_error("missing character after `.`");
972 None
973 } else if self.check_after_token() {
974 Some(LangStringToken::ClassAttribute(class))
975 } else {
976 None
977 }
978 }
979
980 fn parse_token(&mut self, start: usize) -> Option<Indices> {
981 while let Some((pos, c)) = self.inner.peek() {
982 if !is_bareword_char(*c) {
983 return Some(Indices { start, end: *pos });
984 }
985 self.inner.next();
986 }
987 self.emit_error("unexpected end");
988 None
989 }
990
991 fn parse_key_value(&mut self, c: char, start: usize) -> Option<LangStringToken<'a>> {
992 let key_indices =
993 if c == '"' { self.parse_string(start)? } else { self.parse_token(start)? };
994 if key_indices.start == key_indices.end {
995 self.emit_error("unexpected empty string as key");
996 return None;
997 }
998
999 if let Some((_, c)) = self.inner.next() {
1000 if c != '=' {
1001 self.emit_error(format!("expected `=`, found `{}`", c));
1002 return None;
1003 }
1004 } else {
1005 self.emit_error("unexpected end");
1006 return None;
1007 }
1008 let value_indices = match self.inner.next() {
1009 Some((pos, '"')) => self.parse_string(pos)?,
1010 Some((pos, c)) if is_bareword_char(c) => self.parse_token(pos)?,
1011 Some((_, c)) => {
1012 self.emit_error(format!("unexpected `{c}` character after `=`"));
1013 return None;
1014 }
1015 None => {
1016 self.emit_error("expected value after `=`");
1017 return None;
1018 }
1019 };
1020 if value_indices.start == value_indices.end {
1021 self.emit_error("unexpected empty string as value");
1022 None
1023 } else if self.check_after_token() {
1024 Some(LangStringToken::KeyValueAttribute(
1025 &self.data[key_indices.start..key_indices.end],
1026 &self.data[value_indices.start..value_indices.end],
1027 ))
1028 } else {
1029 None
1030 }
1031 }
1032
1033 fn check_after_token(&mut self) -> bool {
1035 if let Some((_, c)) = self.inner.peek().copied() {
1036 if c == '}' || is_separator(c) || c == '(' {
1037 true
1038 } else {
1039 self.emit_error(format!("unexpected `{c}` character"));
1040 false
1041 }
1042 } else {
1043 true
1045 }
1046 }
1047
1048 fn parse_in_attribute_block(&mut self) -> Option<LangStringToken<'a>> {
1049 if let Some((pos, c)) = self.inner.next() {
1050 if c == '}' {
1051 self.is_in_attribute_block = false;
1052 return self.next();
1053 } else if c == '.' {
1054 return self.parse_class(pos);
1055 } else if c == '"' || is_leading_char(c) {
1056 return self.parse_key_value(c, pos);
1057 } else {
1058 self.emit_error(format!("unexpected character `{c}`"));
1059 return None;
1060 }
1061 }
1062 self.emit_error("unclosed attribute block (`{}`): missing `}` at the end");
1063 None
1064 }
1065
1066 fn skip_paren_block(&mut self) -> bool {
1068 for (_, c) in self.inner.by_ref() {
1069 if c == ')' {
1070 return true;
1071 }
1072 }
1073 self.emit_error("unclosed comment: missing `)` at the end");
1074 false
1075 }
1076
1077 fn parse_outside_attribute_block(&mut self, start: usize) -> Option<LangStringToken<'a>> {
1078 while let Some((pos, c)) = self.inner.next() {
1079 if c == '"' {
1080 if pos != start {
1081 self.emit_error("expected ` `, `{` or `,` found `\"`");
1082 return None;
1083 }
1084 let indices = self.parse_string(pos)?;
1085 if let Some((_, c)) = self.inner.peek().copied()
1086 && c != '{'
1087 && !is_separator(c)
1088 && c != '('
1089 {
1090 self.emit_error(format!("expected ` `, `{{` or `,` after `\"`, found `{c}`"));
1091 return None;
1092 }
1093 return Some(LangStringToken::LangToken(&self.data[indices.start..indices.end]));
1094 } else if c == '{' {
1095 self.is_in_attribute_block = true;
1096 return self.next();
1097 } else if is_separator(c) {
1098 if pos != start {
1099 return Some(LangStringToken::LangToken(&self.data[start..pos]));
1100 }
1101 return self.next();
1102 } else if c == '(' {
1103 if !self.skip_paren_block() {
1104 return None;
1105 }
1106 if pos != start {
1107 return Some(LangStringToken::LangToken(&self.data[start..pos]));
1108 }
1109 return self.next();
1110 } else if (pos == start && is_leading_char(c)) || (pos != start && is_bareword_char(c))
1111 {
1112 continue;
1113 } else {
1114 self.emit_error(format!("unexpected character `{c}`"));
1115 return None;
1116 }
1117 }
1118 let token = &self.data[start..];
1119 if token.is_empty() { None } else { Some(LangStringToken::LangToken(&self.data[start..])) }
1120 }
1121}
1122
1123impl<'a> Iterator for TagIterator<'a, '_> {
1124 type Item = LangStringToken<'a>;
1125
1126 fn next(&mut self) -> Option<Self::Item> {
1127 if self.is_error {
1128 return None;
1129 }
1130 let Some(start) = self.skip_separators() else {
1131 if self.is_in_attribute_block {
1132 self.emit_error("unclosed attribute block (`{}`): missing `}` at the end");
1133 }
1134 return None;
1135 };
1136 if self.is_in_attribute_block {
1137 self.parse_in_attribute_block()
1138 } else {
1139 self.parse_outside_attribute_block(start)
1140 }
1141 }
1142}
1143
1144impl Default for LangString {
1145 fn default() -> Self {
1146 Self {
1147 original: String::new(),
1148 should_panic: false,
1149 no_run: false,
1150 ignore: Ignore::None,
1151 rust: true,
1152 test_harness: false,
1153 compile_fail: false,
1154 standalone_crate: false,
1155 error_codes: Vec::new(),
1156 edition: None,
1157 added_classes: Vec::new(),
1158 unknown: Vec::new(),
1159 }
1160 }
1161}
1162
1163impl LangString {
1164 fn parse_without_check(string: &str, allow_error_code_check: ErrorCodes) -> Self {
1165 Self::parse(string, allow_error_code_check, None)
1166 }
1167
1168 fn parse(
1169 string: &str,
1170 allow_error_code_check: ErrorCodes,
1171 extra: Option<&ExtraInfo<'_>>,
1172 ) -> Self {
1173 let allow_error_code_check = allow_error_code_check.as_bool();
1174 let mut seen_rust_tags = false;
1175 let mut seen_other_tags = false;
1176 let mut seen_custom_tag = false;
1177 let mut data = LangString::default();
1178 let mut ignores = vec![];
1179
1180 data.original = string.to_owned();
1181
1182 let mut call = |tokens: &mut dyn Iterator<Item = LangStringToken<'_>>| {
1183 for token in tokens {
1184 match token {
1185 LangStringToken::LangToken("should_panic") => {
1186 data.should_panic = true;
1187 seen_rust_tags = !seen_other_tags;
1188 }
1189 LangStringToken::LangToken("no_run") => {
1190 data.no_run = true;
1191 seen_rust_tags = !seen_other_tags;
1192 }
1193 LangStringToken::LangToken("ignore") => {
1194 data.ignore = Ignore::All;
1195 seen_rust_tags = !seen_other_tags;
1196 }
1197 LangStringToken::LangToken(x)
1198 if let Some(ignore) = x.strip_prefix("ignore-") =>
1199 {
1200 ignores.push(ignore.to_owned());
1201 seen_rust_tags = !seen_other_tags;
1202 }
1203 LangStringToken::LangToken("rust") => {
1204 data.rust = true;
1205 seen_rust_tags = true;
1206 }
1207 LangStringToken::LangToken("custom") => {
1208 seen_custom_tag = true;
1209 }
1210 LangStringToken::LangToken("test_harness") => {
1211 data.test_harness = true;
1212 seen_rust_tags = !seen_other_tags || seen_rust_tags;
1213 }
1214 LangStringToken::LangToken("compile_fail") => {
1215 data.compile_fail = true;
1216 seen_rust_tags = !seen_other_tags || seen_rust_tags;
1217 data.no_run = true;
1218 }
1219 LangStringToken::LangToken("standalone_crate") => {
1220 data.standalone_crate = true;
1221 seen_rust_tags = !seen_other_tags || seen_rust_tags;
1222 }
1223 LangStringToken::LangToken(x)
1224 if let Some(edition) = x.strip_prefix("edition") =>
1225 {
1226 data.edition = edition.parse::<Edition>().ok();
1227 }
1228 LangStringToken::LangToken(x)
1229 if let Some(edition) = x.strip_prefix("rust")
1230 && edition.parse::<Edition>().is_ok()
1231 && let Some(extra) = extra =>
1232 {
1233 extra.error_invalid_codeblock_attr_with_help(
1234 format!("unknown attribute `{x}`"),
1235 |lint| {
1236 lint.help(format!(
1237 "there is an attribute with a similar name: `edition{edition}`"
1238 ));
1239 },
1240 );
1241 }
1242 LangStringToken::LangToken(x)
1243 if allow_error_code_check
1244 && let Some(error_code) = x.strip_prefix('E')
1245 && error_code.len() == 4 =>
1246 {
1247 if error_code.parse::<u32>().is_ok() {
1248 data.error_codes.push(x.to_owned());
1249 seen_rust_tags = !seen_other_tags || seen_rust_tags;
1250 } else {
1251 seen_other_tags = true;
1252 }
1253 }
1254 LangStringToken::LangToken(x) if let Some(extra) = extra => {
1255 if let Some(help) = match x.to_lowercase().as_str() {
1256 "compile-fail" | "compile_fail" | "compilefail" => Some(
1257 "use `compile_fail` to invert the results of this test, so that it \
1258 passes if it cannot be compiled and fails if it can",
1259 ),
1260 "should-panic" | "should_panic" | "shouldpanic" => Some(
1261 "use `should_panic` to invert the results of this test, so that if \
1262 passes if it panics and fails if it does not",
1263 ),
1264 "no-run" | "no_run" | "norun" => Some(
1265 "use `no_run` to compile, but not run, the code sample during \
1266 testing",
1267 ),
1268 "test-harness" | "test_harness" | "testharness" => Some(
1269 "use `test_harness` to run functions marked `#[test]` instead of a \
1270 potentially-implicit `main` function",
1271 ),
1272 "standalone" | "standalone_crate" | "standalone-crate"
1273 if extra.sp.at_least_rust_2024() =>
1274 {
1275 Some(
1276 "use `standalone_crate` to compile this code block \
1277 separately",
1278 )
1279 }
1280 _ => None,
1281 } {
1282 extra.error_invalid_codeblock_attr_with_help(
1283 format!("unknown attribute `{x}`"),
1284 |lint| {
1285 lint.help(help).help(
1286 "this code block may be skipped during testing, \
1287 because unknown attributes are treated as markers for \
1288 code samples written in other programming languages, \
1289 unless it is also explicitly marked as `rust`",
1290 );
1291 },
1292 );
1293 }
1294 seen_other_tags = true;
1295 data.unknown.push(x.to_owned());
1296 }
1297 LangStringToken::LangToken(x) => {
1298 seen_other_tags = true;
1299 data.unknown.push(x.to_owned());
1300 }
1301 LangStringToken::KeyValueAttribute("class", value) => {
1302 data.added_classes.push(value.to_owned());
1303 }
1304 LangStringToken::KeyValueAttribute(key, ..) if let Some(extra) = extra => {
1305 extra
1306 .error_invalid_codeblock_attr(format!("unsupported attribute `{key}`"));
1307 }
1308 LangStringToken::ClassAttribute(class) => {
1309 data.added_classes.push(class.to_owned());
1310 }
1311 _ => {}
1312 }
1313 }
1314 };
1315
1316 let mut tag_iter = TagIterator::new(string, extra);
1317 call(&mut tag_iter);
1318
1319 if !ignores.is_empty() {
1321 data.ignore = Ignore::Some(ignores);
1322 }
1323
1324 data.rust &= !seen_custom_tag && (!seen_other_tags || seen_rust_tags) && !tag_iter.is_error;
1325
1326 data
1327 }
1328}
1329
1330impl<'a> Markdown<'a> {
1331 pub fn into_string(self) -> String {
1332 if self.content.is_empty() {
1334 return String::new();
1335 }
1336
1337 let mut s = String::with_capacity(self.content.len() * 3 / 2);
1338 html::push_html(&mut s, self.into_iter());
1339
1340 s
1341 }
1342
1343 fn into_iter(self) -> CodeBlocks<'a, 'a, impl Iterator<Item = Event<'a>>> {
1344 let Markdown {
1345 content: md,
1346 links,
1347 ids,
1348 error_codes: codes,
1349 edition,
1350 playground,
1351 heading_offset,
1352 } = self;
1353
1354 let replacer = move |broken_link: BrokenLink<'_>| {
1355 links
1356 .iter()
1357 .find(|link| *link.original_text == *broken_link.reference)
1358 .map(|link| (link.href.as_str().into(), link.tooltip.as_str().into()))
1359 };
1360
1361 let p = Parser::new_with_broken_link_callback(md, main_body_opts(), Some(replacer));
1362 let p = p.into_offset_iter();
1363
1364 ids.handle_footnotes(|ids, existing_footnotes| {
1365 let p = HeadingLinks::new(p, None, ids, heading_offset);
1366 let p = SpannedLinkReplacer::new(p, links);
1367 let p = footnotes::Footnotes::new(p, existing_footnotes);
1368 let p = TableWrapper::new(p.map(|(ev, _)| ev));
1369 CodeBlocks::new(p, codes, edition, playground)
1370 })
1371 }
1372
1373 pub(crate) fn split_summary_and_content(self) -> (Option<String>, Option<String>) {
1379 if self.content.is_empty() {
1380 return (None, None);
1381 }
1382 let mut p = self.into_iter();
1383
1384 let mut event_level = 0;
1385 let mut summary_events = Vec::new();
1386 let mut get_next_tag = false;
1387
1388 let mut end_of_summary = false;
1389 while let Some(event) = p.next() {
1390 match event {
1391 Event::Start(_) => event_level += 1,
1392 Event::End(kind) => {
1393 event_level -= 1;
1394 if event_level == 0 {
1395 end_of_summary = true;
1397 get_next_tag = kind == TagEnd::Table;
1399 }
1400 }
1401 _ => {}
1402 }
1403 summary_events.push(event);
1404 if end_of_summary {
1405 if get_next_tag && let Some(event) = p.next() {
1406 summary_events.push(event);
1407 }
1408 break;
1409 }
1410 }
1411 let mut summary = String::new();
1412 html::push_html(&mut summary, summary_events.into_iter());
1413 if summary.is_empty() {
1414 return (None, None);
1415 }
1416 let mut content = String::new();
1417 html::push_html(&mut content, p);
1418
1419 if content.is_empty() { (Some(summary), None) } else { (Some(summary), Some(content)) }
1420 }
1421}
1422
1423impl MarkdownWithToc<'_> {
1424 pub(crate) fn into_parts(self) -> (Toc, String) {
1425 let MarkdownWithToc { content: md, links, ids, error_codes: codes, edition, playground } =
1426 self;
1427
1428 if md.is_empty() {
1430 return (Toc { entries: Vec::new() }, String::new());
1431 }
1432 let mut replacer = |broken_link: BrokenLink<'_>| {
1433 links
1434 .iter()
1435 .find(|link| *link.original_text == *broken_link.reference)
1436 .map(|link| (link.href.as_str().into(), link.tooltip.as_str().into()))
1437 };
1438
1439 let p = Parser::new_with_broken_link_callback(md, main_body_opts(), Some(&mut replacer));
1440 let p = p.into_offset_iter();
1441
1442 let mut s = String::with_capacity(md.len() * 3 / 2);
1443
1444 let mut toc = TocBuilder::new();
1445
1446 ids.handle_footnotes(|ids, existing_footnotes| {
1447 let p = HeadingLinks::new(p, Some(&mut toc), ids, HeadingOffset::H1);
1448 let p = footnotes::Footnotes::new(p, existing_footnotes);
1449 let p = TableWrapper::new(p.map(|(ev, _)| ev));
1450 let p = CodeBlocks::new(p, codes, edition, playground);
1451 html::push_html(&mut s, p);
1452 });
1453
1454 (toc.into_toc(), s)
1455 }
1456 pub(crate) fn into_string(self) -> String {
1457 let (toc, s) = self.into_parts();
1458 format!("<nav id=\"rustdoc\">{toc}</nav>{s}", toc = toc.print())
1459 }
1460}
1461
1462impl MarkdownItemInfo<'_> {
1463 pub(crate) fn into_string(self) -> String {
1464 let MarkdownItemInfo(md, ids) = self;
1465
1466 if md.is_empty() {
1468 return String::new();
1469 }
1470 let p = Parser::new_ext(md, main_body_opts()).into_offset_iter();
1471
1472 let p = p.map(|event| match event.0 {
1474 Event::Html(text) | Event::InlineHtml(text) => (Event::Text(text), event.1),
1475 _ => event,
1476 });
1477
1478 let mut s = String::with_capacity(md.len() * 3 / 2);
1479
1480 ids.handle_footnotes(|ids, existing_footnotes| {
1481 let p = HeadingLinks::new(p, None, ids, HeadingOffset::H1);
1482 let p = footnotes::Footnotes::new(p, existing_footnotes);
1483 let p = TableWrapper::new(p.map(|(ev, _)| ev));
1484 let p = p.filter(|event| {
1485 !matches!(event, Event::Start(Tag::Paragraph) | Event::End(TagEnd::Paragraph))
1486 });
1487 html::push_html(&mut s, p);
1488 });
1489
1490 s
1491 }
1492}
1493
1494impl MarkdownSummaryLine<'_> {
1495 pub(crate) fn into_string_with_has_more_content(self) -> (String, bool) {
1496 let MarkdownSummaryLine(md, links) = self;
1497 if md.is_empty() {
1499 return (String::new(), false);
1500 }
1501
1502 let mut replacer = |broken_link: BrokenLink<'_>| {
1503 links
1504 .iter()
1505 .find(|link| *link.original_text == *broken_link.reference)
1506 .map(|link| (link.href.as_str().into(), link.tooltip.as_str().into()))
1507 };
1508
1509 let p = Parser::new_with_broken_link_callback(md, summary_opts(), Some(&mut replacer))
1510 .peekable();
1511 let mut summary = SummaryLine::new(p);
1512
1513 let mut s = String::new();
1514
1515 let without_paragraphs = LinkReplacer::new(&mut summary, links).filter(|event| {
1516 !matches!(event, Event::Start(Tag::Paragraph) | Event::End(TagEnd::Paragraph))
1517 });
1518
1519 html::push_html(&mut s, without_paragraphs);
1520
1521 let has_more_content =
1522 matches!(summary.inner.peek(), Some(Event::Start(_))) || summary.skipped_tags > 0;
1523
1524 (s, has_more_content)
1525 }
1526
1527 pub(crate) fn into_string(self) -> String {
1528 self.into_string_with_has_more_content().0
1529 }
1530}
1531
1532fn markdown_summary_with_limit(
1541 md: &str,
1542 link_names: &[RenderedLink],
1543 length_limit: usize,
1544) -> (String, bool) {
1545 if md.is_empty() {
1546 return (String::new(), false);
1547 }
1548
1549 let mut replacer = |broken_link: BrokenLink<'_>| {
1550 link_names
1551 .iter()
1552 .find(|link| *link.original_text == *broken_link.reference)
1553 .map(|link| (link.href.as_str().into(), link.tooltip.as_str().into()))
1554 };
1555
1556 let p = Parser::new_with_broken_link_callback(md, summary_opts(), Some(&mut replacer));
1557 let mut p = LinkReplacer::new(p, link_names);
1558
1559 let mut buf = HtmlWithLimit::new(length_limit);
1560 let mut stopped_early = false;
1561 let _ = p.try_for_each(|event| {
1562 match &event {
1563 Event::Text(text) => {
1564 let r =
1565 text.split_inclusive(char::is_whitespace).try_for_each(|word| buf.push(word));
1566 if r.is_break() {
1567 stopped_early = true;
1568 }
1569 return r;
1570 }
1571 Event::Code(code) => {
1572 buf.open_tag("code");
1573 let r = buf.push(code);
1574 if r.is_break() {
1575 stopped_early = true;
1576 } else {
1577 buf.close_tag();
1578 }
1579 return r;
1580 }
1581 Event::Start(tag) => match tag {
1582 Tag::Emphasis => buf.open_tag("em"),
1583 Tag::Strong => buf.open_tag("strong"),
1584 Tag::CodeBlock(..) => return ControlFlow::Break(()),
1585 _ => {}
1586 },
1587 Event::End(tag) => match tag {
1588 TagEnd::Emphasis | TagEnd::Strong => buf.close_tag(),
1589 TagEnd::Paragraph | TagEnd::Heading(_) => return ControlFlow::Break(()),
1590 _ => {}
1591 },
1592 Event::HardBreak | Event::SoftBreak => buf.push(" ")?,
1593 _ => {}
1594 };
1595 ControlFlow::Continue(())
1596 });
1597
1598 (buf.finish(), stopped_early)
1599}
1600
1601pub(crate) fn short_markdown_summary(markdown: &str, link_names: &[RenderedLink]) -> String {
1608 let (mut s, was_shortened) = markdown_summary_with_limit(markdown, link_names, 59);
1609
1610 if was_shortened {
1611 s.push('…');
1612 }
1613
1614 s
1615}
1616
1617pub(crate) fn plain_text_summary(md: &str, link_names: &[RenderedLink]) -> String {
1624 if md.is_empty() {
1625 return String::new();
1626 }
1627
1628 let mut s = String::with_capacity(md.len() * 3 / 2);
1629
1630 let mut replacer = |broken_link: BrokenLink<'_>| {
1631 link_names
1632 .iter()
1633 .find(|link| *link.original_text == *broken_link.reference)
1634 .map(|link| (link.href.as_str().into(), link.tooltip.as_str().into()))
1635 };
1636
1637 let p = Parser::new_with_broken_link_callback(md, summary_opts(), Some(&mut replacer));
1638
1639 plain_text_from_events(p, &mut s);
1640
1641 s
1642}
1643
1644pub(crate) fn plain_text_from_events<'a>(
1645 events: impl Iterator<Item = pulldown_cmark::Event<'a>>,
1646 s: &mut String,
1647) {
1648 for event in events {
1649 match &event {
1650 Event::Text(text) => s.push_str(text),
1651 Event::Code(code) => {
1652 s.push('`');
1653 s.push_str(code);
1654 s.push('`');
1655 }
1656 Event::HardBreak | Event::SoftBreak => s.push(' '),
1657 Event::Start(Tag::CodeBlock(..)) => break,
1658 Event::End(TagEnd::Paragraph) => break,
1659 Event::End(TagEnd::Heading(..)) => break,
1660 _ => (),
1661 }
1662 }
1663}
1664
1665pub(crate) fn html_text_from_events<'a>(
1666 events: impl Iterator<Item = pulldown_cmark::Event<'a>>,
1667 s: &mut String,
1668) {
1669 for event in events {
1670 match &event {
1671 Event::Text(text) => {
1672 write!(s, "{}", EscapeBodyText(text)).expect("string alloc infallible")
1673 }
1674 Event::Code(code) => {
1675 s.push_str("<code>");
1676 write!(s, "{}", EscapeBodyText(code)).expect("string alloc infallible");
1677 s.push_str("</code>");
1678 }
1679 Event::HardBreak | Event::SoftBreak => s.push(' '),
1680 Event::Start(Tag::CodeBlock(..)) => break,
1681 Event::End(TagEnd::Paragraph) => break,
1682 Event::End(TagEnd::Heading(..)) => break,
1683 _ => (),
1684 }
1685 }
1686}
1687
1688#[derive(Debug)]
1689pub(crate) struct MarkdownLink {
1690 pub kind: LinkType,
1691 pub link: String,
1692 pub range: MarkdownLinkRange,
1693}
1694
1695#[derive(Clone, Debug)]
1696pub(crate) enum MarkdownLinkRange {
1697 Destination(Range<usize>),
1699 WholeLink(Range<usize>),
1703}
1704
1705impl MarkdownLinkRange {
1706 pub fn inner_range(&self) -> &Range<usize> {
1708 match self {
1709 MarkdownLinkRange::Destination(range) => range,
1710 MarkdownLinkRange::WholeLink(range) => range,
1711 }
1712 }
1713}
1714
1715pub(crate) fn markdown_links<'md, R>(
1716 md: &'md str,
1717 preprocess_link: impl Fn(MarkdownLink) -> Option<R>,
1718) -> Vec<R> {
1719 use itertools::Itertools;
1720 if md.is_empty() {
1721 return vec![];
1722 }
1723
1724 let locate = |s: &str, fallback: Range<usize>| unsafe {
1726 let s_start = s.as_ptr();
1727 let s_end = s_start.add(s.len());
1728 let md_start = md.as_ptr();
1729 let md_end = md_start.add(md.len());
1730 if md_start <= s_start && s_end <= md_end {
1731 let start = s_start.offset_from(md_start) as usize;
1732 let end = s_end.offset_from(md_start) as usize;
1733 MarkdownLinkRange::Destination(start..end)
1734 } else {
1735 MarkdownLinkRange::WholeLink(fallback)
1736 }
1737 };
1738
1739 let span_for_link = |link: &CowStr<'_>, span: Range<usize>| {
1740 match link {
1745 CowStr::Borrowed(s) => locate(s, span),
1750
1751 CowStr::Boxed(_) | CowStr::Inlined(_) => MarkdownLinkRange::WholeLink(span),
1753 }
1754 };
1755
1756 let span_for_refdef = |link: &CowStr<'_>, span: Range<usize>| {
1757 let mut square_brace_count = 0;
1760 let mut iter = md.as_bytes()[span.start..span.end].iter().copied().enumerate();
1761 for (_i, c) in &mut iter {
1762 match c {
1763 b':' if square_brace_count == 0 => break,
1764 b'[' => square_brace_count += 1,
1765 b']' => square_brace_count -= 1,
1766 _ => {}
1767 }
1768 }
1769 while let Some((i, c)) = iter.next() {
1770 if c == b'<' {
1771 while let Some((j, c)) = iter.next() {
1772 match c {
1773 b'\\' => {
1774 let _ = iter.next();
1775 }
1776 b'>' => {
1777 return MarkdownLinkRange::Destination(
1778 i + 1 + span.start..j + span.start,
1779 );
1780 }
1781 _ => {}
1782 }
1783 }
1784 } else if !c.is_ascii_whitespace() {
1785 for (j, c) in iter.by_ref() {
1786 if c.is_ascii_whitespace() {
1787 return MarkdownLinkRange::Destination(i + span.start..j + span.start);
1788 }
1789 }
1790 return MarkdownLinkRange::Destination(i + span.start..span.end);
1791 }
1792 }
1793 span_for_link(link, span)
1794 };
1795
1796 let span_for_offset_backward = |span: Range<usize>, open: u8, close: u8| {
1797 let mut open_brace = !0;
1798 let mut close_brace = !0;
1799 for (i, b) in md.as_bytes()[span.clone()].iter().copied().enumerate().rev() {
1800 let i = i + span.start;
1801 if b == close {
1802 close_brace = i;
1803 break;
1804 }
1805 }
1806 if close_brace < span.start || close_brace >= span.end {
1807 return MarkdownLinkRange::WholeLink(span);
1808 }
1809 let mut nesting = 1;
1810 for (i, b) in md.as_bytes()[span.start..close_brace].iter().copied().enumerate().rev() {
1811 let i = i + span.start;
1812 if b == close {
1813 nesting += 1;
1814 }
1815 if b == open {
1816 nesting -= 1;
1817 }
1818 if nesting == 0 {
1819 open_brace = i;
1820 break;
1821 }
1822 }
1823 assert!(open_brace != close_brace);
1824 if open_brace < span.start || open_brace >= span.end {
1825 return MarkdownLinkRange::WholeLink(span);
1826 }
1827 let range = (open_brace + 1)..close_brace;
1829 MarkdownLinkRange::Destination(range)
1830 };
1831
1832 let span_for_offset_forward = |span: Range<usize>, open: u8, close: u8| {
1833 let mut open_brace = !0;
1834 let mut close_brace = !0;
1835 for (i, b) in md.as_bytes()[span.clone()].iter().copied().enumerate() {
1836 let i = i + span.start;
1837 if b == open {
1838 open_brace = i;
1839 break;
1840 }
1841 }
1842 if open_brace < span.start || open_brace >= span.end {
1843 return MarkdownLinkRange::WholeLink(span);
1844 }
1845 let mut nesting = 0;
1846 for (i, b) in md.as_bytes()[open_brace..span.end].iter().copied().enumerate() {
1847 let i = i + open_brace;
1848 if b == close {
1849 nesting -= 1;
1850 }
1851 if b == open {
1852 nesting += 1;
1853 }
1854 if nesting == 0 {
1855 close_brace = i;
1856 break;
1857 }
1858 }
1859 assert!(open_brace != close_brace);
1860 if open_brace < span.start || open_brace >= span.end {
1861 return MarkdownLinkRange::WholeLink(span);
1862 }
1863 let range = (open_brace + 1)..close_brace;
1865 MarkdownLinkRange::Destination(range)
1866 };
1867
1868 let mut broken_link_callback = |link: BrokenLink<'md>| Some((link.reference, "".into()));
1869 let event_iter = Parser::new_with_broken_link_callback(
1870 md,
1871 main_body_opts(),
1872 Some(&mut broken_link_callback),
1873 )
1874 .into_offset_iter();
1875 let mut links = Vec::new();
1876
1877 let mut refdefs = FxIndexMap::default();
1878 for (label, refdef) in event_iter.reference_definitions().iter().sorted_by_key(|x| x.0) {
1879 refdefs.insert(label.to_string(), (false, refdef.dest.to_string(), refdef.span.clone()));
1880 }
1881
1882 for (event, span) in event_iter {
1883 match event {
1884 Event::Start(Tag::Link { link_type, dest_url, id, .. })
1885 if may_be_doc_link(link_type) =>
1886 {
1887 let range = match link_type {
1888 LinkType::ReferenceUnknown | LinkType::ShortcutUnknown => {
1890 span_for_offset_backward(span, b'[', b']')
1891 }
1892 LinkType::CollapsedUnknown => span_for_offset_forward(span, b'[', b']'),
1893 LinkType::Inline => span_for_offset_backward(span, b'(', b')'),
1894 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut => {
1896 if let Some((is_used, dest_url, span)) = refdefs.get_mut(&id[..]) {
1897 *is_used = true;
1898 span_for_refdef(&CowStr::from(&dest_url[..]), span.clone())
1899 } else {
1900 span_for_link(&dest_url, span)
1901 }
1902 }
1903 LinkType::Autolink | LinkType::Email => unreachable!(),
1904 };
1905
1906 if let Some(link) = preprocess_link(MarkdownLink {
1907 kind: link_type,
1908 link: dest_url.into_string(),
1909 range,
1910 }) {
1911 links.push(link);
1912 }
1913 }
1914 _ => {}
1915 }
1916 }
1917
1918 for (_label, (is_used, dest_url, span)) in refdefs.into_iter() {
1919 if !is_used
1920 && let Some(link) = preprocess_link(MarkdownLink {
1921 kind: LinkType::Reference,
1922 range: span_for_refdef(&CowStr::from(&dest_url[..]), span),
1923 link: dest_url,
1924 })
1925 {
1926 links.push(link);
1927 }
1928 }
1929
1930 links
1931}
1932
1933#[derive(Debug)]
1934pub(crate) struct RustCodeBlock {
1935 pub(crate) range: Range<usize>,
1938 pub(crate) code: Range<usize>,
1940 pub(crate) is_fenced: bool,
1941 pub(crate) lang_string: LangString,
1942}
1943
1944pub(crate) fn rust_code_blocks(md: &str, extra_info: &ExtraInfo<'_>) -> Vec<RustCodeBlock> {
1947 let mut code_blocks = vec![];
1948
1949 if md.is_empty() {
1950 return code_blocks;
1951 }
1952
1953 let mut p = Parser::new_ext(md, main_body_opts()).into_offset_iter();
1954
1955 while let Some((event, offset)) = p.next() {
1956 if let Event::Start(Tag::CodeBlock(syntax)) = event {
1957 let (lang_string, code_start, code_end, range, is_fenced) = match syntax {
1958 CodeBlockKind::Fenced(syntax) => {
1959 let syntax = syntax.as_ref();
1960 let lang_string = if syntax.is_empty() {
1961 Default::default()
1962 } else {
1963 LangString::parse(syntax, ErrorCodes::Yes, Some(extra_info))
1964 };
1965 if !lang_string.rust {
1966 continue;
1967 }
1968 let (code_start, mut code_end) = match p.next() {
1969 Some((Event::Text(_), offset)) => (offset.start, offset.end),
1970 Some((_, sub_offset)) => {
1971 let code = Range { start: sub_offset.start, end: sub_offset.start };
1972 code_blocks.push(RustCodeBlock {
1973 is_fenced: true,
1974 range: offset,
1975 code,
1976 lang_string,
1977 });
1978 continue;
1979 }
1980 None => {
1981 let code = Range { start: offset.end, end: offset.end };
1982 code_blocks.push(RustCodeBlock {
1983 is_fenced: true,
1984 range: offset,
1985 code,
1986 lang_string,
1987 });
1988 continue;
1989 }
1990 };
1991 while let Some((Event::Text(_), offset)) = p.next() {
1992 code_end = offset.end;
1993 }
1994 (lang_string, code_start, code_end, offset, true)
1995 }
1996 CodeBlockKind::Indented => {
1997 if offset.end > offset.start && md.get(offset.end..=offset.end) == Some("\n") {
2000 (
2001 LangString::default(),
2002 offset.start,
2003 offset.end,
2004 Range { start: offset.start, end: offset.end - 1 },
2005 false,
2006 )
2007 } else {
2008 (LangString::default(), offset.start, offset.end, offset, false)
2009 }
2010 }
2011 };
2012
2013 code_blocks.push(RustCodeBlock {
2014 is_fenced,
2015 range,
2016 code: Range { start: code_start, end: code_end },
2017 lang_string,
2018 });
2019 }
2020 }
2021
2022 code_blocks
2023}
2024
2025#[derive(Clone, Default, Debug)]
2026pub struct IdMap {
2027 map: FxHashMap<String, usize>,
2028 existing_footnotes: Arc<AtomicUsize>,
2029}
2030
2031fn is_default_id(id: &str) -> bool {
2032 matches!(
2033 id,
2034 "help"
2036 | "settings"
2037 | "not-displayed"
2038 | "alternative-display"
2039 | "search"
2040 | "crate-search"
2041 | "crate-search-div"
2042 | "themeStyle"
2045 | "settings-menu"
2046 | "help-button"
2047 | "sidebar-button"
2048 | "main-content"
2049 | "toggle-all-docs"
2050 | "all-types"
2051 | "default-settings"
2052 | "sidebar-vars"
2053 | "copy-path"
2054 | "rustdoc-toc"
2055 | "rustdoc-modnav"
2056 | "fields"
2059 | "variants"
2060 | "implementors-list"
2061 | "synthetic-implementors-list"
2062 | "foreign-impls"
2063 | "implementations"
2064 | "trait-implementations"
2065 | "synthetic-implementations"
2066 | "blanket-implementations"
2067 | "required-associated-types"
2068 | "provided-associated-types"
2069 | "provided-associated-consts"
2070 | "required-associated-consts"
2071 | "required-methods"
2072 | "provided-methods"
2073 | "dyn-compatibility"
2074 | "implementors"
2075 | "synthetic-implementors"
2076 | "implementations-list"
2077 | "trait-implementations-list"
2078 | "synthetic-implementations-list"
2079 | "blanket-implementations-list"
2080 | "deref-methods"
2081 | "layout"
2082 | "aliased-type"
2083 )
2084}
2085
2086impl IdMap {
2087 pub fn new() -> Self {
2088 IdMap { map: FxHashMap::default(), existing_footnotes: Arc::new(AtomicUsize::new(0)) }
2089 }
2090
2091 pub(crate) fn derive<S: AsRef<str> + ToString>(&mut self, candidate: S) -> String {
2092 let id = match self.map.get_mut(candidate.as_ref()) {
2093 None => {
2094 let candidate = candidate.to_string();
2095 if is_default_id(&candidate) {
2096 let id = format!("{}-{}", candidate, 1);
2097 self.map.insert(candidate, 2);
2098 id
2099 } else {
2100 candidate
2101 }
2102 }
2103 Some(a) => {
2104 let id = format!("{}-{}", candidate.as_ref(), *a);
2105 *a += 1;
2106 id
2107 }
2108 };
2109
2110 self.map.insert(id.clone(), 1);
2111 id
2112 }
2113
2114 pub(crate) fn handle_footnotes<'a, T, F: FnOnce(&'a mut Self, Weak<AtomicUsize>) -> T>(
2117 &'a mut self,
2118 closure: F,
2119 ) -> T {
2120 let existing_footnotes = Arc::downgrade(&self.existing_footnotes);
2121
2122 closure(self, existing_footnotes)
2123 }
2124
2125 pub(crate) fn clear(&mut self) {
2126 self.map.clear();
2127 self.existing_footnotes = Arc::new(AtomicUsize::new(0));
2128 }
2129}