diff --git a/crates/mdbook-html/src/html/tree.rs b/crates/mdbook-html/src/html/tree.rs index 4184740d09..432cf7bea0 100644 --- a/crates/mdbook-html/src/html/tree.rs +++ b/crates/mdbook-html/src/html/tree.rs @@ -19,7 +19,7 @@ use pulldown_cmark::{Alignment, CodeBlockKind, CowStr, Event, LinkType, Tag, Tag use std::borrow::Cow; use std::collections::{HashMap, HashSet}; use std::ops::Deref; -use tracing::{error, trace, warn}; +use tracing::{trace, warn}; /// Helper to create a [`QualName`]. macro_rules! attr_qual_name { @@ -307,6 +307,8 @@ where match event { Event::Start(tag) => self.start_tag(tag), Event::End(tag) => { + // TODO: This should validate that the event stack is + // properly synchronized with the tag stack. self.pop(); match tag { TagEnd::TableHead => { @@ -378,6 +380,7 @@ where } } } + self.finish_stack(); self.collect_footnote_defs(); } @@ -606,40 +609,10 @@ where trace!("html token={token:?}"); match token { Token::DoctypeToken(_) => {} - Token::TagToken(tag) => { - match tag.kind { - TagKind::StartTag => { - let is_closed = is_void_element(&tag.name) || tag.self_closing; - is_raw = matches!(&*tag.name, "script" | "style"); - let name = QualName::new(None, html5ever::ns!(html), tag.name); - let attrs = tag - .attrs - .into_iter() - .map(|attr| (attr.name, attr.value)) - .collect(); - let mut el = Element { - name, - attrs, - self_closing: tag.self_closing, - was_raw: true, - }; - fix_html_link(&mut el); - self.push(Node::Element(el)); - if is_closed { - // No end element. - self.pop(); - } - } - TagKind::EndTag => { - is_raw = false; - if self.is_html_tag_matching(&tag.name) { - self.pop(); - } - // else the stack is corrupt. I'm not really sure - // what to do here... - } - } - } + Token::TagToken(tag) => match tag.kind { + TagKind::StartTag => self.start_html_tag(tag, &mut is_raw), + TagKind::EndTag => self.end_html_tag(tag, &mut is_raw), + }, Token::CommentToken(comment) => { self.append(Node::Comment(comment)); } @@ -664,22 +637,59 @@ where } } + /// Adds an open HTML tag. + fn start_html_tag(&mut self, tag: html5ever::tokenizer::Tag, is_raw: &mut bool) { + let is_closed = is_void_element(&tag.name) || tag.self_closing; + *is_raw = matches!(&*tag.name, "script" | "style"); + let name = QualName::new(None, html5ever::ns!(html), tag.name); + let attrs = tag + .attrs + .into_iter() + .map(|attr| (attr.name, attr.value)) + .collect(); + let mut el = Element { + name, + attrs, + self_closing: tag.self_closing, + was_raw: true, + }; + fix_html_link(&mut el); + self.push(Node::Element(el)); + if is_closed { + // No end element. + self.pop(); + } + } + + /// Closes the given HTML tag. + fn end_html_tag(&mut self, tag: html5ever::tokenizer::Tag, is_raw: &mut bool) { + *is_raw = false; + if self.is_html_tag_matching(&tag.name) { + self.pop(); + } else { + // The proper thing to do here is to recover. However, the HTML + // parsing algorithm for that is quite complex. See + // https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inbody + // and the adoption agency algorithm. + warn!( + "unexpected HTML end tag `` found in `{}`\n\ + Check that the HTML tags are properly balanced.", + tag.name, + self.options.path.display() + ); + } + } + /// This is used to verify HTML parsing keeps the stack of tags in sync. fn is_html_tag_matching(&self, name: &str) -> bool { let current = self.tree.get(self.current_node).unwrap().value(); if let Node::Element(el) = current && el.name() == name { - return true; + true + } else { + false } - error!( - "internal error: HTML tag stack out of sync.\n - path: `{}`\n\ - current={current:?}\n\ - pop name: {name}", - self.options.path.display() - ); - false } /// Eats all pulldown-cmark events until the next `End` matching the @@ -736,6 +746,40 @@ where output } + /// Deals with any unclosed elements on the stack. + fn finish_stack(&mut self) { + while let Some(node_id) = self.tag_stack.pop() { + let node = self.tree.get(node_id).unwrap().value(); + match node { + Node::Fragment => {} + Node::Element(el) => { + if el.was_raw { + warn!( + "unclosed HTML tag `<{}>` found in `{}`", + el.name.local, + self.options.path.display() + ); + } else { + panic!( + "internal error: expected empty tag stack.\n + path: `{}`\n\ + element={el:?}", + self.options.path.display() + ); + } + } + node => { + panic!( + "internal error: expected empty tag stack.\n + path: `{}`\n\ + node={node:?}", + self.options.path.display() + ); + } + } + } + } + /// Appends a new footnote reference. fn footnote_reference(&mut self, name: CowStr<'event>) { let len = self.footnote_numbers.len() + 1; diff --git a/tests/testsuite/rendering.rs b/tests/testsuite/rendering.rs index 7f2fffa92a..e0e770398d 100644 --- a/tests/testsuite/rendering.rs +++ b/tests/testsuite/rendering.rs @@ -239,3 +239,47 @@ fn html_blocks() { fn code_block_fenced_with_indent() { BookTest::from_dir("rendering/code_blocks_fenced_with_indent").check_all_main_files(); } + +// Unclosed HTML tags. +// +// Note that the HTML parsing algorithm is much more complicated than what +// this is checking. +#[test] +fn unclosed_html_tags() { + BookTest::init(|_| {}) + .change_file("src/chapter_1.md", "
xfooxyz") + .run("build", |cmd| { + cmd.expect_stderr(str![[r#" + INFO Book building has started + INFO Running the html backend + WARN unclosed HTML tag `` found in `chapter_1.md` + WARN unclosed HTML tag `` found in `chapter_1.md` + WARN unclosed HTML tag `
` found in `chapter_1.md` + INFO HTML book written to `[ROOT]/book` + +"#]]); + }) + .check_main_file( + "book/chapter_1.html", + str!["
xfooxyz
"], + ); +} + +// Test for HTML tags out of sync. +#[test] +fn unbalanced_html_tags() { + BookTest::init(|_| {}) + .change_file("src/chapter_1.md", "
xfoo
") + .run("build", |cmd| { + cmd.expect_stderr(str![[r#" + INFO Book building has started + INFO Running the html backend + WARN unexpected HTML end tag `
` found in `chapter_1.md` +Check that the HTML tags are properly balanced. + WARN unclosed HTML tag `
` found in `chapter_1.md` + INFO HTML book written to `[ROOT]/book` + +"#]]); + }) + .check_main_file("book/chapter_1.html", str!["
xfoo
"]); +}