//! A representation of HTML using Rust types. #![deny(missing_docs)] use html_escape::{encode_double_quoted_attribute, encode_text}; use log::{debug, trace}; use serde::{Deserialize, Serialize}; use std::collections::HashMap; use std::fmt::Write as _; use std::io::Write; use std::path::{Path, PathBuf}; const DOCTYPE: &str = ""; /// A HTML page, consisting of a head and a body. #[derive(Debug)] pub struct HtmlPage { head: Element, body: Element, } impl Default for HtmlPage { fn default() -> Self { Self { head: Element::new(ElementTag::Head), body: Element::new(ElementTag::Body), } } } impl HtmlPage { /// Create a new HTML page from a head and a body element. pub fn new(head: Element, body: Element) -> Self { Self { head, body } } /// Return the page's head element. pub fn head(&self) -> &Element { &self.head } /// Return the page's body element. pub fn body(&self) -> &Element { &self.body } /// Try to serialize an HTML page into HTML text. pub fn serialize(&self) -> Result { let mut html = Element::new(ElementTag::Html); html.push_child(Content::Elt(self.head.clone())); let mut body = Element::new(ElementTag::Body); body.push_child(Content::Elt(self.body.clone())); html.push_child(Content::Elt(body)); let html = html.serialize()?; Ok(format!("{}\n{}", DOCTYPE, html)) } /// Try to write an HTML page as text into a file. pub fn write(&self, filename: &Path) -> Result<(), HtmlError> { if let Some(parent) = filename.parent() { trace!("parent: {}", parent.display()); if !parent.exists() { debug!("creating directory {}", parent.display()); std::fs::create_dir_all(parent) .map_err(|e| HtmlError::CreateDir(parent.into(), e))?; } } trace!("writing HTML: {}", filename.display()); let mut f = std::fs::File::create(filename) .map_err(|e| HtmlError::CreateFile(filename.into(), e))?; let html = self.serialize()?; f.write_all(html.as_bytes()) .map_err(|e| HtmlError::FileWrite(filename.into(), e))?; Ok(()) } } /// Return text of a sequence of contents as a string. pub fn as_plain_text(content: &[Content]) -> String { let mut buf = String::new(); for c in content { if let Content::Text(s) = c { buf.push_str(s); } } buf } /// An HTML element. #[derive(Debug, Clone)] pub struct Element { loc: Option, tag: ElementTag, attrs: Vec, children: Vec, } impl Element { /// Create a new element. pub fn new(tag: ElementTag) -> Self { Self { loc: None, tag, attrs: vec![], children: vec![], } } /// Add location to an element. pub fn with_location(mut self, loc: Location) -> Self { self.loc = Some(loc); self } /// Set location. pub fn set_location(&mut self, loc: Location) { self.loc = Some(loc); } /// Get location. pub fn location(&self) -> Location { if let Some(loc) = &self.loc { loc.clone() } else { Location::unknown() } } /// Set the block attributes for an element. pub fn set_block_attributes(&mut self, block_attrs: Vec) { for block_attr in block_attrs { let attr = Attribute::from(block_attr); self.attrs.push(attr); } } /// Add a new attribute. pub fn push_attribute(&mut self, attr: Attribute) { self.attrs.push(attr); } /// Drop all attributes with a given name. pub fn drop_attributes(&mut self, unwanted: &[&str]) { for uw in unwanted { self.attrs.retain(|a| a.name() != *uw); } } /// Append a new child to the element. pub fn push_child(&mut self, child: Content) { self.children.push(child); } /// Return an element's tag. pub fn tag(&self) -> ElementTag { self.tag } /// All attributes. pub fn all_attrs(&self) -> &[Attribute] { &self.attrs } /// Return value of a named attribute, if any. pub fn attr(&self, name: &str) -> Option<&Attribute> { self.attrs.iter().find(|a| a.name() == name) } /// Has an attribute with a specific value? pub fn has_attr(&self, name: &str, wanted: &str) -> bool { self.attrs .iter() .filter(|a| a.name() == name && a.value() == Some(wanted)) .count() > 0 } /// Compute a short name, called a slug, for a heading element. pub fn heading_slug(&self) -> String { const SAFE: &str = "abcdefghijklmnopqrstuvwxyz"; let mut slug = String::new(); for s in self.content().to_lowercase().split_whitespace() { for c in s.chars() { if SAFE.contains(c) { slug.push(c); } } } slug } /// Return the concatenated text content of direct children, /// ignoring any elements. pub fn content(&self) -> String { let mut buf = String::new(); for child in self.children() { buf.push_str(&child.content()); } buf } /// Return all the children of an element. pub fn children(&self) -> &[Content] { &self.children } /// Try to add an alt attribute to an img element. pub fn fix_up_img_alt(&mut self) { if self.tag == ElementTag::Img { if !self.attrs.iter().any(|a| a.name() == "alt") { let alt = as_plain_text(self.children()); self.push_attribute(Attribute::new("alt", &alt)); self.children.clear(); } } else { for child in self.children.iter_mut() { if let Content::Elt(kid) = child { kid.fix_up_img_alt(); } } } } /// Serialize an element into HTML text. pub fn serialize(&self) -> Result { let mut buf = String::new(); self.serialize_to_buf_without_added_newlines(&mut buf) .map_err(HtmlError::Format)?; Ok(buf) } fn serialize_to_buf_without_added_newlines( &self, buf: &mut String, ) -> Result<(), std::fmt::Error> { if self.children.is_empty() { write!(buf, "<{}", self.tag.name())?; self.serialize_attrs_to_buf(buf)?; write!(buf, "/>")?; } else { write!(buf, "<{}", self.tag.name())?; self.serialize_attrs_to_buf(buf)?; write!(buf, ">")?; for c in self.children() { match c { Content::Text(s) => buf.push_str(&encode_text(s)), Content::Elt(e) => e.serialize_to_buf_adding_block_newline(buf)?, Content::Html(s) => buf.push_str(s), } } write!(buf, "", self.tag.name())?; } Ok(()) } fn serialize_to_buf_adding_block_newline( &self, buf: &mut String, ) -> Result<(), std::fmt::Error> { if self.tag.is_block() { writeln!(buf)?; } self.serialize_to_buf_without_added_newlines(buf) } fn serialize_attrs_to_buf(&self, buf: &mut String) -> Result<(), std::fmt::Error> { let mut attrs = Attributes::default(); for attr in self.attrs.iter() { attrs.push(attr); } for (name, value) in attrs.iter() { write!(buf, " {}", name)?; if !value.is_empty() { write!(buf, "=\"{}\"", encode_double_quoted_attribute(value))?; } } Ok(()) } } /// The tag of an HTML element. #[derive(Copy, Clone, Debug, Eq, PartialEq)] #[allow(missing_docs)] pub enum ElementTag { Html, Head, Meta, Body, Div, H1, H2, H3, H4, H5, H6, P, Ol, Ul, Li, Link, Blockquote, Pre, Em, Strong, Del, A, Img, Table, Title, Th, Tr, Td, Br, Hr, Code, Span, Style, } impl ElementTag { /// Name of the tag. pub fn name(&self) -> &str { match self { Self::Html => "html", Self::Head => "head", Self::Meta => "meta", Self::Body => "body", Self::Div => "div", Self::H1 => "h1", Self::H2 => "h2", Self::H3 => "h3", Self::H4 => "h4", Self::H5 => "h5", Self::H6 => "h6", Self::P => "p", Self::Ol => "ol", Self::Ul => "ul", Self::Li => "li", Self::Link => "link", Self::Blockquote => "blockquote", Self::Pre => "pre", Self::Em => "em", Self::Strong => "strong", Self::Del => "del", Self::A => "a", Self::Img => "img", Self::Table => "table", Self::Th => "th", Self::Title => "title", Self::Tr => "tr", Self::Td => "td", Self::Br => "br", Self::Hr => "hr", Self::Code => "code", Self::Span => "span", Self::Style => "style", } } fn is_block(&self) -> bool { matches!( self, Self::Html | Self::Head | Self::Meta | Self::Body | Self::Div | Self::H1 | Self::H2 | Self::H3 | Self::H4 | Self::H5 | Self::H6 | Self::P | Self::Ol | Self::Ul | Self::Li | Self::Blockquote | Self::Table | Self::Th | Self::Tr | Self::Br | Self::Hr ) } } #[derive(Debug, Default, Clone)] struct Attributes { attrs: HashMap, } impl Attributes { fn push(&mut self, attr: &Attribute) { if let Some(new_value) = attr.value() { if let Some(old_value) = self.attrs.get_mut(attr.name()) { assert!(!old_value.is_empty()); old_value.push(' '); old_value.push_str(new_value); } else { self.attrs.insert(attr.name().into(), new_value.into()); } } else { assert!(!self.attrs.contains_key(attr.name())); self.attrs.insert(attr.name().into(), "".into()); } } fn iter(&self) -> impl Iterator { self.attrs.iter() } } /// An attribute of an HTML element. #[derive(Clone, Debug)] pub struct Attribute { name: String, value: Option, } impl Attribute { /// Create a new element attribute. pub fn new(name: &str, value: &str) -> Self { Self { name: name.into(), value: Some(value.into()), } } /// Return the name of the attribute. pub fn name(&self) -> &str { &self.name } /// Return the value of the attribute, if any. pub fn value(&self) -> Option<&str> { self.value.as_deref() } } impl From for Attribute { fn from(block_attr: BlockAttr) -> Self { match block_attr { BlockAttr::Id(v) => Self::new("id", &v), BlockAttr::Class(v) => Self::new("class", &v), BlockAttr::KeyValue(k, v) => Self::new(&k, &v), } } } /// Content in HTML. #[derive(Clone, Debug)] pub enum Content { /// Arbitrary text. Text(String), /// An HTML element. Elt(Element), /// Arbitrary HTML text. Html(String), } impl Content { fn content(&self) -> String { match self { Self::Text(s) => s.clone(), Self::Elt(e) => e.content(), Self::Html(h) => h.clone(), } } } /// Location of element in source file. #[derive(Debug, Clone, Eq, Serialize, Deserialize, PartialEq)] #[serde(untagged)] pub enum Location { /// A known location. Known { /// Name of file. filename: PathBuf, /// Line in file. line: usize, /// Column in line. col: usize, }, /// An unknown location. Unknown, } impl Location { /// Create a new location. pub fn new(filename: &Path, line: usize, col: usize) -> Self { Self::Known { filename: filename.into(), line, col, } } /// Create an unknown location. pub fn unknown() -> Self { Self::Unknown } /// Report name of source file from where this element comes from. pub fn filename(&self) -> &Path { if let Self::Known { filename, line: _, col: _, } = self { filename } else { Path::new("") } } /// Report row and column in source where this element comes from. pub fn rowcol(&self) -> (usize, usize) { if let Self::Known { filename: _, line, col, } = self { (*line, *col) } else { (0, 0) } } } impl std::fmt::Display for Location { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> { if let Self::Known { filename, line, col, } = self { write!(f, "{}:{}:{}", filename.display(), line, col) } else { write!(f, "(unknown location)") } } } /// Errors from the `html` module. #[derive(Debug, thiserror::Error)] pub enum HtmlError { /// Failed to create a directory. #[error("failed to create directory {0}")] CreateDir(PathBuf, #[source] std::io::Error), /// Failed to create a file. #[error("failed to create file {0}")] CreateFile(PathBuf, #[source] std::io::Error), /// Failed to write to a file. #[error("failed to write to file {0}")] FileWrite(PathBuf, #[source] std::io::Error), /// Input contains an attempt to use a definition list in /// Markdown. #[error("{0}: attempt to use definition lists in Markdown")] DefinitionList(Location), /// String formatting error. This is likely a programming error. #[error("string formatting error: {0}")] Format(#[source] std::fmt::Error), } /// Code block attribute. #[derive(Debug, Clone, Eq, PartialEq)] pub enum BlockAttr { /// An identifier. Id(String), /// A class. Class(String), /// A key/value pair. KeyValue(String, String), } impl BlockAttr { fn id(s: &str) -> Self { Self::Id(s.into()) } fn class(s: &str) -> Self { Self::Class(s.into()) } fn key_value(k: &str, v: &str) -> Self { Self::KeyValue(k.into(), v.into()) } /// Parse a fenced code block tag. pub fn parse(attrs: &str) -> Vec { let mut result = vec![]; for word in Self::parse_words(attrs) { let attr = Self::parse_word(word); result.push(attr); } result } fn parse_words(attrs: &str) -> impl Iterator { if attrs.starts_with('{') && attrs.ends_with('}') { attrs[1..attrs.len() - 1].split_ascii_whitespace() } else { attrs.split_ascii_whitespace() } } fn parse_word(word: &str) -> Self { if let Some(id) = word.strip_prefix('#') { Self::id(id) } else if let Some(class) = word.strip_prefix('.') { Self::class(class) } else if let Some((key, value)) = word.split_once('=') { Self::key_value(key, value) } else { Self::class(word) } } } #[cfg(test)] mod test_block_attr { use super::BlockAttr; #[test] fn empty_string() { assert_eq!(BlockAttr::parse(""), vec![]); } #[test] fn plain_word() { assert_eq!( BlockAttr::parse("foo"), vec![BlockAttr::Class("foo".into())] ); } #[test] fn dot_word() { assert_eq!( BlockAttr::parse(".foo"), vec![BlockAttr::Class("foo".into())] ); } #[test] fn hash_word() { assert_eq!(BlockAttr::parse("#foo"), vec![BlockAttr::Id("foo".into())]); } #[test] fn key_value() { assert_eq!( BlockAttr::parse("foo=bar"), vec![BlockAttr::KeyValue("foo".into(), "bar".into())] ); } #[test] fn several() { assert_eq!( BlockAttr::parse("{#foo .bar foobar yo=yoyo}"), vec![ BlockAttr::Id("foo".into()), BlockAttr::Class("bar".into()), BlockAttr::Class("foobar".into()), BlockAttr::KeyValue("yo".into(), "yoyo".into()), ] ); } }