diff --git a/CHANGELOG.md b/CHANGELOG.md index 856201265c..8db03518b5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -58,8 +58,11 @@ The following is a summary of the changes that may require your attention when u [#2847](https://github.com/rust-lang/mdBook/pull/2847) - Added support for admonitions. These are enabled by default, with the option `output.html.admonitions` to disable it. [#2851](https://github.com/rust-lang/mdBook/pull/2851) -- Headers that start or end with HTML characters like `<`, `&`, or `>` now replace those characters in the link ID with `-` instead of being stripped. This brings the header ID generation closer to other tools and sites. - [#2844](https://github.com/rust-lang/mdBook/pull/2844) +- Header ID generation has some minor changes to bring the ID generation closer to other tools and sites: + - IDs now use Unicode lowercase instead of ASCII lowercase. + [#2922](https://github.com/rust-lang/mdBook/pull/2922) + - Headers that start or end with HTML characters like `<`, `&`, or `>` now replace those characters in the link ID with `-` instead of being stripped. + [#2844](https://github.com/rust-lang/mdBook/pull/2844) ### CLI changes diff --git a/crates/mdbook-html/src/utils.rs b/crates/mdbook-html/src/utils.rs index 6c17b8d5a3..68f42a4094 100644 --- a/crates/mdbook-html/src/utils.rs +++ b/crates/mdbook-html/src/utils.rs @@ -74,12 +74,22 @@ pub(crate) fn unique_id(id: &str, used: &mut HashSet) -> String { /// Generates an HTML id from the given text. pub(crate) fn id_from_content(content: &str) -> String { + // This is intended to be close to how header ID generation is done in + // other sites and tools, but is not 100% the same. Not all sites and + // tools use the same algorithm. See these for more information: + // + // - https://docs.github.com/en/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax#section-links + // - https://docs.gitlab.com/user/markdown/#heading-ids-and-links + // - https://pandoc.org/MANUAL.html#extension-auto_identifiers + // - https://kramdown.gettalong.org/converter/html#auto-ids + // - https://docs.rs/comrak/latest/comrak/options/struct.Extension.html#structfield.header_ids content .trim() + .to_lowercase() .chars() .filter_map(|ch| { if ch.is_alphanumeric() || ch == '_' || ch == '-' { - Some(ch.to_ascii_lowercase()) + Some(ch) } else if ch.is_whitespace() { Some('-') } else { @@ -120,6 +130,6 @@ mod tests { assert_eq!(id_from_content("한국어"), "한국어"); assert_eq!(id_from_content(""), ""); assert_eq!(id_from_content("中文標題 CJK title"), "中文標題-cjk-title"); - assert_eq!(id_from_content("Über"), "Über"); + assert_eq!(id_from_content("Über"), "über"); } }