Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
132 changes: 88 additions & 44 deletions crates/mdbook-html/src/html/tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use pulldown_cmark::{Alignment, CodeBlockKind, CowStr, Event, LinkType, Tag, Tag
use std::borrow::Cow;
use std::collections::{HashMap, HashSet};
use std::ops::Deref;
use tracing::{error, trace, warn};
use tracing::{trace, warn};

/// Helper to create a [`QualName`].
macro_rules! attr_qual_name {
Expand Down Expand Up @@ -307,6 +307,8 @@ where
match event {
Event::Start(tag) => self.start_tag(tag),
Event::End(tag) => {
// TODO: This should validate that the event stack is
// properly synchronized with the tag stack.
self.pop();
match tag {
TagEnd::TableHead => {
Expand Down Expand Up @@ -378,6 +380,7 @@ where
}
}
}
self.finish_stack();
self.collect_footnote_defs();
}

Expand Down Expand Up @@ -606,40 +609,10 @@ where
trace!("html token={token:?}");
match token {
Token::DoctypeToken(_) => {}
Token::TagToken(tag) => {
match tag.kind {
TagKind::StartTag => {
let is_closed = is_void_element(&tag.name) || tag.self_closing;
is_raw = matches!(&*tag.name, "script" | "style");
let name = QualName::new(None, html5ever::ns!(html), tag.name);
let attrs = tag
.attrs
.into_iter()
.map(|attr| (attr.name, attr.value))
.collect();
let mut el = Element {
name,
attrs,
self_closing: tag.self_closing,
was_raw: true,
};
fix_html_link(&mut el);
self.push(Node::Element(el));
if is_closed {
// No end element.
self.pop();
}
}
TagKind::EndTag => {
is_raw = false;
if self.is_html_tag_matching(&tag.name) {
self.pop();
}
// else the stack is corrupt. I'm not really sure
// what to do here...
}
}
}
Token::TagToken(tag) => match tag.kind {
TagKind::StartTag => self.start_html_tag(tag, &mut is_raw),
TagKind::EndTag => self.end_html_tag(tag, &mut is_raw),
},
Token::CommentToken(comment) => {
self.append(Node::Comment(comment));
}
Expand All @@ -664,22 +637,59 @@ where
}
}

/// Adds an open HTML tag.
fn start_html_tag(&mut self, tag: html5ever::tokenizer::Tag, is_raw: &mut bool) {
let is_closed = is_void_element(&tag.name) || tag.self_closing;
*is_raw = matches!(&*tag.name, "script" | "style");
let name = QualName::new(None, html5ever::ns!(html), tag.name);
let attrs = tag
.attrs
.into_iter()
.map(|attr| (attr.name, attr.value))
.collect();
let mut el = Element {
name,
attrs,
self_closing: tag.self_closing,
was_raw: true,
};
fix_html_link(&mut el);
self.push(Node::Element(el));
if is_closed {
// No end element.
self.pop();
}
}

/// Closes the given HTML tag.
fn end_html_tag(&mut self, tag: html5ever::tokenizer::Tag, is_raw: &mut bool) {
*is_raw = false;
if self.is_html_tag_matching(&tag.name) {
self.pop();
} else {
// The proper thing to do here is to recover. However, the HTML
// parsing algorithm for that is quite complex. See
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inbody
// and the adoption agency algorithm.
warn!(
"unexpected HTML end tag `</{}>` found in `{}`\n\
Check that the HTML tags are properly balanced.",
tag.name,
self.options.path.display()
);
}
}

/// This is used to verify HTML parsing keeps the stack of tags in sync.
fn is_html_tag_matching(&self, name: &str) -> bool {
let current = self.tree.get(self.current_node).unwrap().value();
if let Node::Element(el) = current
&& el.name() == name
{
return true;
true
} else {
false
}
error!(
"internal error: HTML tag stack out of sync.\n
path: `{}`\n\
current={current:?}\n\
pop name: {name}",
self.options.path.display()
);
false
}

/// Eats all pulldown-cmark events until the next `End` matching the
Expand Down Expand Up @@ -736,6 +746,40 @@ where
output
}

/// Deals with any unclosed elements on the stack.
fn finish_stack(&mut self) {
while let Some(node_id) = self.tag_stack.pop() {
let node = self.tree.get(node_id).unwrap().value();
match node {
Node::Fragment => {}
Node::Element(el) => {
if el.was_raw {
warn!(
"unclosed HTML tag `<{}>` found in `{}`",
el.name.local,
self.options.path.display()
);
} else {
panic!(
"internal error: expected empty tag stack.\n
path: `{}`\n\
element={el:?}",
self.options.path.display()
);
}
}
node => {
panic!(
"internal error: expected empty tag stack.\n
path: `{}`\n\
node={node:?}",
self.options.path.display()
);
}
}
}
}

/// Appends a new footnote reference.
fn footnote_reference(&mut self, name: CowStr<'event>) {
let len = self.footnote_numbers.len() + 1;
Expand Down
44 changes: 44 additions & 0 deletions tests/testsuite/rendering.rs
Original file line number Diff line number Diff line change
Expand Up @@ -239,3 +239,47 @@ fn html_blocks() {
fn code_block_fenced_with_indent() {
BookTest::from_dir("rendering/code_blocks_fenced_with_indent").check_all_main_files();
}

// Unclosed HTML tags.
//
// Note that the HTML parsing algorithm is much more complicated than what
// this is checking.
#[test]
fn unclosed_html_tags() {
BookTest::init(|_| {})
.change_file("src/chapter_1.md", "<div>x<span>foo<i>xyz")
.run("build", |cmd| {
cmd.expect_stderr(str![[r#"
INFO Book building has started
INFO Running the html backend
WARN unclosed HTML tag `<i>` found in `chapter_1.md`
WARN unclosed HTML tag `<span>` found in `chapter_1.md`
WARN unclosed HTML tag `<div>` found in `chapter_1.md`
INFO HTML book written to `[ROOT]/book`

"#]]);
})
.check_main_file(
"book/chapter_1.html",
str!["<div>x<span>foo<i>xyz</i></span></div>"],
);
}

// Test for HTML tags out of sync.
#[test]
fn unbalanced_html_tags() {
BookTest::init(|_| {})
.change_file("src/chapter_1.md", "<div>x<span>foo</div></span>")
.run("build", |cmd| {
cmd.expect_stderr(str![[r#"
INFO Book building has started
INFO Running the html backend
WARN unexpected HTML end tag `</div>` found in `chapter_1.md`
Check that the HTML tags are properly balanced.
WARN unclosed HTML tag `<div>` found in `chapter_1.md`
INFO HTML book written to `[ROOT]/book`

"#]]);
})
.check_main_file("book/chapter_1.html", str!["<div>x<span>foo</span></div>"]);
}