Skip to content

rustdoc: check parsing diffs between pulldown-cmark 0.9.6 and 0.10 #121659

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
@@ -4704,6 +4704,7 @@ dependencies = [
"itertools",
"minifier",
"once_cell",
"pulldown-cmark 0.10.0",
"regex",
"rustdoc-json-types",
"serde",
1 change: 1 addition & 0 deletions src/librustdoc/Cargo.toml
Original file line number Diff line number Diff line change
@@ -13,6 +13,7 @@ itertools = "0.11"
indexmap = "2"
minifier = "0.3.0"
once_cell = "1.10.0"
pulldown-cmark-new = { version = "0.10", package = "pulldown-cmark", default-features = false }
regex = "1"
rustdoc-json-types = { path = "../rustdoc-json-types" }
serde_json = "1.0"
9 changes: 9 additions & 0 deletions src/librustdoc/lint.rs
Original file line number Diff line number Diff line change
@@ -196,6 +196,14 @@ declare_rustdoc_lint! {
"detects redundant explicit links in doc comments"
}

declare_rustdoc_lint! {
/// This compatibility lint checks for Markdown syntax that works in the old engine but not
/// the new one.
UNPORTABLE_MARKDOWN,
Deny,
"detects markdown that is interpreted differently in different parser"
}

pub(crate) static RUSTDOC_LINTS: Lazy<Vec<&'static Lint>> = Lazy::new(|| {
vec![
BROKEN_INTRA_DOC_LINKS,
@@ -209,6 +217,7 @@ pub(crate) static RUSTDOC_LINTS: Lazy<Vec<&'static Lint>> = Lazy::new(|| {
MISSING_CRATE_LEVEL_DOCS,
UNESCAPED_BACKTICKS,
REDUNDANT_EXPLICIT_LINKS,
UNPORTABLE_MARKDOWN,
]
});

2 changes: 2 additions & 0 deletions src/librustdoc/passes/lint.rs
Original file line number Diff line number Diff line change
@@ -6,6 +6,7 @@ mod check_code_block_syntax;
mod html_tags;
mod redundant_explicit_links;
mod unescaped_backticks;
mod unportable_markdown;

use super::Pass;
use crate::clean::*;
@@ -31,6 +32,7 @@ impl<'a, 'tcx> DocVisitor for Linter<'a, 'tcx> {
html_tags::visit_item(self.cx, item);
unescaped_backticks::visit_item(self.cx, item);
redundant_explicit_links::visit_item(self.cx, item);
unportable_markdown::visit_item(self.cx, item);

self.visit_item_recur(item)
}
316 changes: 316 additions & 0 deletions src/librustdoc/passes/lint/unportable_markdown.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,316 @@
//! Detects markdown syntax that's different between pulldown-cmark
//! 0.9 and 0.10.
use crate::clean::Item;
use crate::core::DocContext;
use crate::html::markdown::main_body_opts;
use pulldown_cmark as cmarko;
use pulldown_cmark_new as cmarkn;
use rustc_resolve::rustdoc::source_span_for_markdown_range;

pub(crate) fn visit_item(cx: &DocContext<'_>, item: &Item) {
let tcx = cx.tcx;
let Some(hir_id) = DocContext::as_local_hir_id(tcx, item.item_id) else {
// If non-local, no need to check anything.
return;
};

let dox = item.doc_value();
if dox.is_empty() {
return;
}

let link_names = item.link_names(&cx.cache);
let mut replacer_old = |broken_link: cmarko::BrokenLink<'_>| {
link_names
.iter()
.find(|link| *link.original_text == *broken_link.reference)
.map(|link| ((*link.href).into(), (*link.new_text).into()))
};
let parser_old = cmarko::Parser::new_with_broken_link_callback(
&dox,
main_body_opts(),
Some(&mut replacer_old),
)
.into_offset_iter()
// Not worth cleaning up minor "distinctions without difference" in the AST.
// Text events get chopped up differently between versions.
// <html> and `code` mistakes are usually covered by unescaped_backticks and html_tags lints.
.filter(|(event, _event_range)| {
!matches!(
event,
cmarko::Event::Code(_)
| cmarko::Event::Text(_)
| cmarko::Event::Html(_)
| cmarko::Event::SoftBreak
)
});

pub fn main_body_opts_new() -> cmarkn::Options {
cmarkn::Options::ENABLE_TABLES
| cmarkn::Options::ENABLE_FOOTNOTES
| cmarkn::Options::ENABLE_STRIKETHROUGH
| cmarkn::Options::ENABLE_TASKLISTS
| cmarkn::Options::ENABLE_SMART_PUNCTUATION
}
let mut replacer_new = |broken_link: cmarkn::BrokenLink<'_>| {
link_names
.iter()
.find(|link| *link.original_text.trim() == *broken_link.reference.trim())
.map(|link| ((*link.href).into(), (*link.new_text).into()))
};
let parser_new = cmarkn::Parser::new_with_broken_link_callback(
&dox,
main_body_opts_new(),
Some(&mut replacer_new),
)
.into_offset_iter()
.filter(|(event, _event_range)| {
!matches!(
event,
cmarkn::Event::Code(_)
| cmarkn::Event::Text(_)
| cmarkn::Event::Html(_)
| cmarkn::Event::InlineHtml(_)
| cmarkn::Event::Start(cmarkn::Tag::HtmlBlock)
| cmarkn::Event::End(cmarkn::TagEnd::HtmlBlock)
| cmarkn::Event::SoftBreak
)
});

let mut reported_an_error = false;
for ((event_old, event_range_old), (event_new, event_range_new)) in parser_old.zip(parser_new) {
match (event_old, event_new) {
(
cmarko::Event::Start(cmarko::Tag::Emphasis),
cmarkn::Event::Start(cmarkn::Tag::Emphasis),
)
| (
cmarko::Event::Start(cmarko::Tag::Strong),
cmarkn::Event::Start(cmarkn::Tag::Strong),
)
| (
cmarko::Event::Start(cmarko::Tag::Strikethrough),
cmarkn::Event::Start(cmarkn::Tag::Strikethrough),
)
| (
cmarko::Event::End(cmarko::Tag::Emphasis),
cmarkn::Event::End(cmarkn::TagEnd::Emphasis),
)
| (
cmarko::Event::End(cmarko::Tag::Strong),
cmarkn::Event::End(cmarkn::TagEnd::Strong),
)
| (
cmarko::Event::End(cmarko::Tag::Strikethrough),
cmarkn::Event::End(cmarkn::TagEnd::Strikethrough),
)
| (
cmarko::Event::End(cmarko::Tag::Link(..)),
cmarkn::Event::End(cmarkn::TagEnd::Link),
)
| (
cmarko::Event::End(cmarko::Tag::Image(..)),
cmarkn::Event::End(cmarkn::TagEnd::Image),
)
| (cmarko::Event::FootnoteReference(..), cmarkn::Event::FootnoteReference(..))
| (cmarko::Event::TaskListMarker(false), cmarkn::Event::TaskListMarker(false))
| (cmarko::Event::TaskListMarker(true), cmarkn::Event::TaskListMarker(true))
if event_range_old == event_range_new =>
{
// Matching tags. Do nothing.
}
(
cmarko::Event::Start(cmarko::Tag::Link(_, old_dest_url, old_title)),
cmarkn::Event::Start(cmarkn::Tag::Link { dest_url, title, .. }),
)
| (
cmarko::Event::Start(cmarko::Tag::Image(_, old_dest_url, old_title)),
cmarkn::Event::Start(cmarkn::Tag::Image { dest_url, title, .. }),
) if event_range_old == event_range_new
&& &old_dest_url[..] == &dest_url[..]
&& &old_title[..] == &title[..] =>
{
// Matching tags. Do nothing.
}
(cmarko::Event::SoftBreak, cmarkn::Event::SoftBreak)
| (cmarko::Event::HardBreak, cmarkn::Event::HardBreak)
| (cmarko::Event::Rule, cmarkn::Event::Rule)
| (
cmarko::Event::Start(cmarko::Tag::Paragraph),
cmarkn::Event::Start(cmarkn::Tag::Paragraph),
)
| (
cmarko::Event::Start(cmarko::Tag::Heading(..)),
cmarkn::Event::Start(cmarkn::Tag::Heading { .. }),
)
| (
cmarko::Event::Start(cmarko::Tag::BlockQuote),
cmarkn::Event::Start(cmarkn::Tag::BlockQuote),
)
| (
cmarko::Event::Start(cmarko::Tag::CodeBlock(..)),
cmarkn::Event::Start(cmarkn::Tag::CodeBlock(..)),
)
| (
cmarko::Event::Start(cmarko::Tag::List(..)),
cmarkn::Event::Start(cmarkn::Tag::List(..)),
)
| (cmarko::Event::Start(cmarko::Tag::Item), cmarkn::Event::Start(cmarkn::Tag::Item))
| (
cmarko::Event::Start(cmarko::Tag::FootnoteDefinition(..)),
cmarkn::Event::Start(cmarkn::Tag::FootnoteDefinition(..)),
)
| (
cmarko::Event::Start(cmarko::Tag::Table(..)),
cmarkn::Event::Start(cmarkn::Tag::Table(..)),
)
| (
cmarko::Event::Start(cmarko::Tag::TableHead),
cmarkn::Event::Start(cmarkn::Tag::TableHead),
)
| (
cmarko::Event::Start(cmarko::Tag::TableRow),
cmarkn::Event::Start(cmarkn::Tag::TableRow),
)
| (
cmarko::Event::Start(cmarko::Tag::TableCell),
cmarkn::Event::Start(cmarkn::Tag::TableCell),
)
| (
cmarko::Event::End(cmarko::Tag::Paragraph),
cmarkn::Event::End(cmarkn::TagEnd::Paragraph),
)
| (
cmarko::Event::End(cmarko::Tag::Heading(..)),
cmarkn::Event::End(cmarkn::TagEnd::Heading(_)),
)
| (
cmarko::Event::End(cmarko::Tag::BlockQuote),
cmarkn::Event::End(cmarkn::TagEnd::BlockQuote),
)
| (
cmarko::Event::End(cmarko::Tag::CodeBlock(..)),
cmarkn::Event::End(cmarkn::TagEnd::CodeBlock),
)
| (
cmarko::Event::End(cmarko::Tag::List(..)),
cmarkn::Event::End(cmarkn::TagEnd::List(_)),
)
| (cmarko::Event::End(cmarko::Tag::Item), cmarkn::Event::End(cmarkn::TagEnd::Item))
| (
cmarko::Event::End(cmarko::Tag::FootnoteDefinition(..)),
cmarkn::Event::End(cmarkn::TagEnd::FootnoteDefinition),
)
| (
cmarko::Event::End(cmarko::Tag::Table(..)),
cmarkn::Event::End(cmarkn::TagEnd::Table),
)
| (
cmarko::Event::End(cmarko::Tag::TableHead),
cmarkn::Event::End(cmarkn::TagEnd::TableHead),
)
| (
cmarko::Event::End(cmarko::Tag::TableRow),
cmarkn::Event::End(cmarkn::TagEnd::TableRow),
)
| (
cmarko::Event::End(cmarko::Tag::TableCell),
cmarkn::Event::End(cmarkn::TagEnd::TableCell),
) => {
// Matching tags. Do nothing.
//
// Parsers sometimes differ in what they consider the "range of an event,"
// even though the event is really the same. Inlines are pretty consistent,
// but stuff like list items? Not really.
//
// Mismatched block elements will usually nest differently, so ignoring it
// works good enough.
}
// If we've already reported an error on the start tag, don't bother on the end tag.
(cmarko::Event::End(_), _) | (_, cmarkn::Event::End(_)) if reported_an_error => {}
// Non-matching inline.
(cmarko::Event::Start(cmarko::Tag::Link(..)), cmarkn::Event::FootnoteReference(..))
| (
cmarko::Event::Start(cmarko::Tag::Image(..)),
cmarkn::Event::FootnoteReference(..),
)
| (
cmarko::Event::FootnoteReference(..),
cmarkn::Event::Start(cmarkn::Tag::Link { .. }),
)
| (
cmarko::Event::FootnoteReference(..),
cmarkn::Event::Start(cmarkn::Tag::Image { .. }),
) if event_range_old == event_range_new => {
reported_an_error = true;
// If we can't get a span of the backtick, because it is in a `#[doc = ""]` attribute,
// use the span of the entire attribute as a fallback.
let span = source_span_for_markdown_range(
tcx,
&dox,
&event_range_old,
&item.attrs.doc_strings,
)
.unwrap_or_else(|| item.attr_span(tcx));
tcx.node_span_lint(
crate::lint::UNPORTABLE_MARKDOWN,
hir_id,
span,
"unportable markdown",
|lint| {
lint.help(format!("syntax ambiguous between footnote and link"));
},
);
}
// Non-matching results.
(event_old, event_new) => {
reported_an_error = true;
let (range, range_other, desc, desc_other, tag, tag_other) = if event_range_old.end
- event_range_old.start
< event_range_new.end - event_range_new.start
{
(
event_range_old,
event_range_new,
"old",
"new",
format!("{event_old:?}"),
format!("{event_new:?}"),
)
} else {
(
event_range_new,
event_range_old,
"new",
"old",
format!("{event_new:?}"),
format!("{event_old:?}"),
)
};
let (range, tag_other) =
if range_other.start <= range.start && range_other.end <= range.end {
(range_other.start..range.end, tag_other)
} else {
(range, format!("nothing"))
};
// If we can't get a span of the backtick, because it is in a `#[doc = ""]` attribute,
// use the span of the entire attribute as a fallback.
let span =
source_span_for_markdown_range(tcx, &dox, &range, &item.attrs.doc_strings)
.unwrap_or_else(|| item.attr_span(tcx));
tcx.node_span_lint(
crate::lint::UNPORTABLE_MARKDOWN,
hir_id,
span,
"unportable markdown",
|lint| {
lint.help(format!(
"{desc} parser sees {tag}, {desc_other} sees {tag_other}"
));
},
);
}
}
}
}
43 changes: 43 additions & 0 deletions tests/rustdoc-ui/unportable-markdown.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// https://internals.rust-lang.org/t/proposal-migrate-the-syntax-of-rustdoc-markdown-footnotes-to-be-compatible-with-the-syntax-used-in-github/18929
//
// A series of test cases for CommonMark corner cases that pulldown-cmark 0.10 fixes.

/// <https://github.com/pulldown-cmark/pulldown-cmark/pull/654>
///
/// Test footnote [^foot].
///
/// [^foot]: This is nested within the footnote now, but didn't used to be.
//~^ ERROR unportable markdown
///
/// This is a multi-paragraph footnote.
pub struct GfmFootnotes;

/// <https://github.com/pulldown-cmark/pulldown-cmark/pull/750>
///
/// test [^]
//~^ ERROR unportable markdown
///
/// [^]: test2
pub struct FootnoteEmptyName;

/// <https://github.com/pulldown-cmark/pulldown-cmark/pull/829>
///
/// - _t
/// # test
//~^ ERROR unportable markdown
/// t_
pub struct NestingCornerCase;

/// <https://github.com/pulldown-cmark/pulldown-cmark/pull/650>
///
/// *~~__emphasis strike strong__~~* ~~*__strike emphasis strong__*~~
//~^ ERROR unportable markdown
//~| ERROR unportable markdown
pub struct Emphasis1;

/// <https://github.com/pulldown-cmark/pulldown-cmark/pull/732>
///
/// |
//~^ ERROR unportable markdown
/// |
pub struct NotEnoughTable;
61 changes: 61 additions & 0 deletions tests/rustdoc-ui/unportable-markdown.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
error: unportable markdown
--> $DIR/unportable-markdown.rs:9:5
|
LL | /// [^foot]: This is nested within the footnote now, but didn't used to be.
| _____^
LL | |
LL | | ///
LL | | /// This is a multi-paragraph footnote.
| |___________________________________________^
|
= help: new parser sees Start(Paragraph), old sees End(FootnoteDefinition(Borrowed("foot")))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see why it's needed, but it also displays internal stuff. Not sure how helpful it would be for users...

= note: `#[deny(rustdoc::unportable_markdown)]` on by default

error: unportable markdown
--> $DIR/unportable-markdown.rs:17:10
|
LL | /// test [^]
| ^^^
|
= help: new parser sees Start(Link { link_type: Shortcut, dest_url: Borrowed("test2"), title: Borrowed(""), id: Borrowed("^") }), old sees nothing

error: unportable markdown
--> $DIR/unportable-markdown.rs:26:7
|
LL | /// # test
| _______^
LL | |
LL | | /// t_
| |____^
|
= help: new parser sees Start(Heading { level: H1, id: None, classes: [], attrs: [] }), old sees nothing

error: unportable markdown
--> $DIR/unportable-markdown.rs:33:40
|
LL | /// *~~__emphasis strike strong__~~* ~~*__strike emphasis strong__*~~
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
= help: old parser sees Start(Emphasis), new sees nothing

error: unportable markdown
--> $DIR/unportable-markdown.rs:33:41
|
LL | /// *~~__emphasis strike strong__~~* ~~*__strike emphasis strong__*~~
| ^^^^^^^^^^^^^^^^^^^^^^^^^^
|
= help: old parser sees Start(Strong), new sees nothing

error: unportable markdown
--> $DIR/unportable-markdown.rs:40:5
|
LL | /// |
| _____^
LL | |
LL | | /// |
| |_____^
|
= help: new parser sees Start(Paragraph), old sees Start(Table([]))

error: aborting due to 6 previous errors