summaryrefslogtreecommitdiff
path: root/src/html.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/html.rs')
-rw-r--r--src/html.rs682
1 files changed, 682 insertions, 0 deletions
diff --git a/src/html.rs b/src/html.rs
new file mode 100644
index 0000000..9365eb9
--- /dev/null
+++ b/src/html.rs
@@ -0,0 +1,682 @@
+//! A representation of HTML using Rust types.
+
+#![deny(missing_docs)]
+
+use html_escape::{encode_double_quoted_attribute, encode_text};
+use log::{debug, trace};
+use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
+use std::fmt::Write as _;
+use std::io::Write;
+use std::path::{Path, PathBuf};
+
+const DOCTYPE: &str = "<!DOCTYPE html>";
+
+/// A HTML page, consisting of a head and a body.
+#[derive(Debug)]
+pub struct HtmlPage {
+ head: Element,
+ body: Element,
+}
+
+impl Default for HtmlPage {
+ fn default() -> Self {
+ Self {
+ head: Element::new(ElementTag::Head),
+ body: Element::new(ElementTag::Body),
+ }
+ }
+}
+
+impl HtmlPage {
+ /// Create a new HTML page from a head and a body element.
+ pub fn new(head: Element, body: Element) -> Self {
+ Self { head, body }
+ }
+
+ /// Return the page's head element.
+ pub fn head(&self) -> &Element {
+ &self.head
+ }
+
+ /// Return the page's body element.
+ pub fn body(&self) -> &Element {
+ &self.body
+ }
+
+ /// Try to serialize an HTML page into HTML text.
+ pub fn serialize(&self) -> Result<String, HtmlError> {
+ let mut html = Element::new(ElementTag::Html);
+ html.push_child(Content::Elt(self.head.clone()));
+ let mut body = Element::new(ElementTag::Body);
+ body.push_child(Content::Elt(self.body.clone()));
+ html.push_child(Content::Elt(body));
+ let html = html.serialize()?;
+ Ok(format!("{}\n{}", DOCTYPE, html))
+ }
+
+ /// Try to write an HTML page as text into a file.
+ pub fn write(&self, filename: &Path) -> Result<(), HtmlError> {
+ if let Some(parent) = filename.parent() {
+ trace!("parent: {}", parent.display());
+ if !parent.exists() {
+ debug!("creating directory {}", parent.display());
+ std::fs::create_dir_all(parent)
+ .map_err(|e| HtmlError::CreateDir(parent.into(), e))?;
+ }
+ }
+
+ trace!("writing HTML: {}", filename.display());
+ let mut f = std::fs::File::create(filename)
+ .map_err(|e| HtmlError::CreateFile(filename.into(), e))?;
+ let html = self.serialize()?;
+ f.write_all(html.as_bytes())
+ .map_err(|e| HtmlError::FileWrite(filename.into(), e))?;
+ Ok(())
+ }
+}
+
+/// Return text of a sequence of contents as a string.
+pub fn as_plain_text(content: &[Content]) -> String {
+ let mut buf = String::new();
+ for c in content {
+ if let Content::Text(s) = c {
+ buf.push_str(s);
+ }
+ }
+ buf
+}
+
+/// An HTML element.
+#[derive(Debug, Clone)]
+pub struct Element {
+ loc: Option<Location>,
+ tag: ElementTag,
+ attrs: Vec<Attribute>,
+ children: Vec<Content>,
+}
+
+impl Element {
+ /// Create a new element.
+ pub fn new(tag: ElementTag) -> Self {
+ Self {
+ loc: None,
+ tag,
+ attrs: vec![],
+ children: vec![],
+ }
+ }
+
+ /// Add location to an element.
+ pub fn with_location(mut self, loc: Location) -> Self {
+ self.loc = Some(loc);
+ self
+ }
+
+ /// Set location.
+ pub fn set_location(&mut self, loc: Location) {
+ self.loc = Some(loc);
+ }
+
+ /// Get location.
+ pub fn location(&self) -> Location {
+ if let Some(loc) = &self.loc {
+ loc.clone()
+ } else {
+ Location::unknown()
+ }
+ }
+
+ /// Set the block attributes for an element.
+ pub fn set_block_attributes(&mut self, block_attrs: Vec<BlockAttr>) {
+ for block_attr in block_attrs {
+ let attr = Attribute::from(block_attr);
+ self.attrs.push(attr);
+ }
+ }
+
+ /// Add a new attribute.
+ pub fn push_attribute(&mut self, attr: Attribute) {
+ self.attrs.push(attr);
+ }
+
+ /// Drop all attributes with a given name.
+ pub fn drop_attributes(&mut self, unwanted: &[&str]) {
+ for uw in unwanted {
+ self.attrs.retain(|a| a.name() != *uw);
+ }
+ }
+
+ /// Append a new child to the element.
+ pub fn push_child(&mut self, child: Content) {
+ self.children.push(child);
+ }
+
+ /// Return an element's tag.
+ pub fn tag(&self) -> ElementTag {
+ self.tag
+ }
+
+ /// All attributes.
+ pub fn all_attrs(&self) -> &[Attribute] {
+ &self.attrs
+ }
+
+ /// Return value of a named attribute, if any.
+ pub fn attr(&self, name: &str) -> Option<&Attribute> {
+ self.attrs.iter().find(|a| a.name() == name)
+ }
+
+ /// Has an attribute with a specific value?
+ pub fn has_attr(&self, name: &str, wanted: &str) -> bool {
+ self.attrs
+ .iter()
+ .filter(|a| a.name() == name && a.value() == Some(wanted))
+ .count()
+ > 0
+ }
+
+ /// Compute a short name, called a slug, for a heading element.
+ pub fn heading_slug(&self) -> String {
+ const SAFE: &str = "abcdefghijklmnopqrstuvwxyz";
+ let mut slug = String::new();
+ for s in self.content().to_lowercase().split_whitespace() {
+ for c in s.chars() {
+ if SAFE.contains(c) {
+ slug.push(c);
+ }
+ }
+ }
+ slug
+ }
+
+ /// Return the concatenated text content of direct children,
+ /// ignoring any elements.
+ pub fn content(&self) -> String {
+ let mut buf = String::new();
+ for child in self.children() {
+ buf.push_str(&child.content());
+ }
+ buf
+ }
+
+ /// Return all the children of an element.
+ pub fn children(&self) -> &[Content] {
+ &self.children
+ }
+
+ /// Try to add an alt attribute to an img element.
+ pub fn fix_up_img_alt(&mut self) {
+ if self.tag == ElementTag::Img {
+ if !self.attrs.iter().any(|a| a.name() == "alt") {
+ let alt = as_plain_text(self.children());
+ self.push_attribute(Attribute::new("alt", &alt));
+ self.children.clear();
+ }
+ } else {
+ for child in self.children.iter_mut() {
+ if let Content::Elt(kid) = child {
+ kid.fix_up_img_alt();
+ }
+ }
+ }
+ }
+
+ /// Serialize an element into HTML text.
+ pub fn serialize(&self) -> Result<String, HtmlError> {
+ let mut buf = String::new();
+ self.serialize_to_buf_without_added_newlines(&mut buf)
+ .map_err(HtmlError::Format)?;
+ Ok(buf)
+ }
+
+ fn serialize_to_buf_without_added_newlines(
+ &self,
+ buf: &mut String,
+ ) -> Result<(), std::fmt::Error> {
+ if self.children.is_empty() {
+ write!(buf, "<{}", self.tag.name())?;
+ self.serialize_attrs_to_buf(buf)?;
+ write!(buf, "/>")?;
+ } else {
+ write!(buf, "<{}", self.tag.name())?;
+ self.serialize_attrs_to_buf(buf)?;
+ write!(buf, ">")?;
+ for c in self.children() {
+ match c {
+ Content::Text(s) => buf.push_str(&encode_text(s)),
+ Content::Elt(e) => e.serialize_to_buf_adding_block_newline(buf)?,
+ Content::Html(s) => buf.push_str(s),
+ }
+ }
+ write!(buf, "</{}>", self.tag.name())?;
+ }
+ Ok(())
+ }
+
+ fn serialize_to_buf_adding_block_newline(
+ &self,
+ buf: &mut String,
+ ) -> Result<(), std::fmt::Error> {
+ if self.tag.is_block() {
+ writeln!(buf)?;
+ }
+ self.serialize_to_buf_without_added_newlines(buf)
+ }
+
+ fn serialize_attrs_to_buf(&self, buf: &mut String) -> Result<(), std::fmt::Error> {
+ let mut attrs = Attributes::default();
+ for attr in self.attrs.iter() {
+ attrs.push(attr);
+ }
+
+ for (name, value) in attrs.iter() {
+ write!(buf, " {}", name)?;
+ if !value.is_empty() {
+ write!(buf, "=\"{}\"", encode_double_quoted_attribute(value))?;
+ }
+ }
+ Ok(())
+ }
+}
+
+/// The tag of an HTML element.
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+#[allow(missing_docs)]
+pub enum ElementTag {
+ Html,
+ Head,
+ Meta,
+ Body,
+ Div,
+ H1,
+ H2,
+ H3,
+ H4,
+ H5,
+ H6,
+ P,
+ Ol,
+ Ul,
+ Li,
+ Link,
+ Blockquote,
+ Pre,
+ Em,
+ Strong,
+ Del,
+ A,
+ Img,
+ Table,
+ Title,
+ Th,
+ Tr,
+ Td,
+ Br,
+ Hr,
+ Code,
+ Span,
+ Style,
+}
+
+impl ElementTag {
+ /// Name of the tag.
+ pub fn name(&self) -> &str {
+ match self {
+ Self::Html => "html",
+ Self::Head => "head",
+ Self::Meta => "meta",
+ Self::Body => "body",
+ Self::Div => "div",
+ Self::H1 => "h1",
+ Self::H2 => "h2",
+ Self::H3 => "h3",
+ Self::H4 => "h4",
+ Self::H5 => "h5",
+ Self::H6 => "h6",
+ Self::P => "p",
+ Self::Ol => "ol",
+ Self::Ul => "ul",
+ Self::Li => "li",
+ Self::Link => "link",
+ Self::Blockquote => "blockquote",
+ Self::Pre => "pre",
+ Self::Em => "em",
+ Self::Strong => "strong",
+ Self::Del => "del",
+ Self::A => "a",
+ Self::Img => "img",
+ Self::Table => "table",
+ Self::Th => "th",
+ Self::Title => "title",
+ Self::Tr => "tr",
+ Self::Td => "td",
+ Self::Br => "br",
+ Self::Hr => "hr",
+ Self::Code => "code",
+ Self::Span => "span",
+ Self::Style => "style",
+ }
+ }
+
+ fn is_block(&self) -> bool {
+ matches!(
+ self,
+ Self::Html
+ | Self::Head
+ | Self::Meta
+ | Self::Body
+ | Self::Div
+ | Self::H1
+ | Self::H2
+ | Self::H3
+ | Self::H4
+ | Self::H5
+ | Self::H6
+ | Self::P
+ | Self::Ol
+ | Self::Ul
+ | Self::Li
+ | Self::Blockquote
+ | Self::Table
+ | Self::Th
+ | Self::Tr
+ | Self::Br
+ | Self::Hr
+ )
+ }
+}
+
+#[derive(Debug, Default, Clone)]
+struct Attributes {
+ attrs: HashMap<String, String>,
+}
+
+impl Attributes {
+ fn push(&mut self, attr: &Attribute) {
+ if let Some(new_value) = attr.value() {
+ if let Some(old_value) = self.attrs.get_mut(attr.name()) {
+ assert!(!old_value.is_empty());
+ old_value.push(' ');
+ old_value.push_str(new_value);
+ } else {
+ self.attrs.insert(attr.name().into(), new_value.into());
+ }
+ } else {
+ assert!(!self.attrs.contains_key(attr.name()));
+ self.attrs.insert(attr.name().into(), "".into());
+ }
+ }
+
+ fn iter(&self) -> impl Iterator<Item = (&String, &String)> {
+ self.attrs.iter()
+ }
+}
+
+/// An attribute of an HTML element.
+#[derive(Clone, Debug)]
+pub struct Attribute {
+ name: String,
+ value: Option<String>,
+}
+
+impl Attribute {
+ /// Create a new element attribute.
+ pub fn new(name: &str, value: &str) -> Self {
+ Self {
+ name: name.into(),
+ value: Some(value.into()),
+ }
+ }
+
+ /// Return the name of the attribute.
+ pub fn name(&self) -> &str {
+ &self.name
+ }
+
+ /// Return the value of the attribute, if any.
+ pub fn value(&self) -> Option<&str> {
+ self.value.as_deref()
+ }
+}
+
+impl From<BlockAttr> for Attribute {
+ fn from(block_attr: BlockAttr) -> Self {
+ match block_attr {
+ BlockAttr::Id(v) => Self::new("id", &v),
+ BlockAttr::Class(v) => Self::new("class", &v),
+ BlockAttr::KeyValue(k, v) => Self::new(&k, &v),
+ }
+ }
+}
+
+/// Content in HTML.
+#[derive(Clone, Debug)]
+pub enum Content {
+ /// Arbitrary text.
+ Text(String),
+
+ /// An HTML element.
+ Elt(Element),
+
+ /// Arbitrary HTML text.
+ Html(String),
+}
+
+impl Content {
+ fn content(&self) -> String {
+ match self {
+ Self::Text(s) => s.clone(),
+ Self::Elt(e) => e.content(),
+ Self::Html(h) => h.clone(),
+ }
+ }
+}
+
+/// Location of element in source file.
+#[derive(Debug, Clone, Eq, Serialize, Deserialize, PartialEq)]
+#[serde(untagged)]
+pub enum Location {
+ /// A known location.
+ Known {
+ /// Name of file.
+ filename: PathBuf,
+ /// Line in file.
+ line: usize,
+ /// Column in line.
+ col: usize,
+ },
+ /// An unknown location.
+ Unknown,
+}
+
+impl Location {
+ /// Create a new location.
+ pub fn new(filename: &Path, line: usize, col: usize) -> Self {
+ Self::Known {
+ filename: filename.into(),
+ line,
+ col,
+ }
+ }
+
+ /// Create an unknown location.
+ pub fn unknown() -> Self {
+ Self::Unknown
+ }
+
+ /// Report name of source file from where this element comes from.
+ pub fn filename(&self) -> &Path {
+ if let Self::Known {
+ filename,
+ line: _,
+ col: _,
+ } = self
+ {
+ filename
+ } else {
+ Path::new("")
+ }
+ }
+
+ /// Report row and column in source where this element comes from.
+ pub fn rowcol(&self) -> (usize, usize) {
+ if let Self::Known {
+ filename: _,
+ line,
+ col,
+ } = self
+ {
+ (*line, *col)
+ } else {
+ (0, 0)
+ }
+ }
+}
+
+impl std::fmt::Display for Location {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
+ if let Self::Known {
+ filename,
+ line,
+ col,
+ } = self
+ {
+ write!(f, "{}:{}:{}", filename.display(), line, col)
+ } else {
+ write!(f, "(unknown location)")
+ }
+ }
+}
+
+/// Errors from the `html` module.
+#[derive(Debug, thiserror::Error)]
+pub enum HtmlError {
+ /// Failed to create a directory.
+ #[error("failed to create directory {0}")]
+ CreateDir(PathBuf, #[source] std::io::Error),
+
+ /// Failed to create a file.
+ #[error("failed to create file {0}")]
+ CreateFile(PathBuf, #[source] std::io::Error),
+
+ /// Failed to write to a file.
+ #[error("failed to write to file {0}")]
+ FileWrite(PathBuf, #[source] std::io::Error),
+
+ /// Input contains an attempt to use a definition list in
+ /// Markdown.
+ #[error("{0}: attempt to use definition lists in Markdown")]
+ DefinitionList(Location),
+
+ /// String formatting error. This is likely a programming error.
+ #[error("string formatting error: {0}")]
+ Format(#[source] std::fmt::Error),
+}
+
+/// Code block attribute.
+#[derive(Debug, Clone, Eq, PartialEq)]
+pub enum BlockAttr {
+ /// An identifier.
+ Id(String),
+ /// A class.
+ Class(String),
+ /// A key/value pair.
+ KeyValue(String, String),
+}
+
+impl BlockAttr {
+ fn id(s: &str) -> Self {
+ Self::Id(s.into())
+ }
+
+ fn class(s: &str) -> Self {
+ Self::Class(s.into())
+ }
+
+ fn key_value(k: &str, v: &str) -> Self {
+ Self::KeyValue(k.into(), v.into())
+ }
+
+ /// Parse a fenced code block tag.
+ pub fn parse(attrs: &str) -> Vec<Self> {
+ let mut result = vec![];
+ for word in Self::parse_words(attrs) {
+ let attr = Self::parse_word(word);
+ result.push(attr);
+ }
+ result
+ }
+
+ fn parse_words(attrs: &str) -> impl Iterator<Item = &str> {
+ if attrs.starts_with('{') && attrs.ends_with('}') {
+ attrs[1..attrs.len() - 1].split_ascii_whitespace()
+ } else {
+ attrs.split_ascii_whitespace()
+ }
+ }
+
+ fn parse_word(word: &str) -> Self {
+ if let Some(id) = word.strip_prefix('#') {
+ Self::id(id)
+ } else if let Some(class) = word.strip_prefix('.') {
+ Self::class(class)
+ } else if let Some((key, value)) = word.split_once('=') {
+ Self::key_value(key, value)
+ } else {
+ Self::class(word)
+ }
+ }
+}
+
+#[cfg(test)]
+mod test_block_attr {
+ use super::BlockAttr;
+
+ #[test]
+ fn empty_string() {
+ assert_eq!(BlockAttr::parse(""), vec![]);
+ }
+
+ #[test]
+ fn plain_word() {
+ assert_eq!(
+ BlockAttr::parse("foo"),
+ vec![BlockAttr::Class("foo".into())]
+ );
+ }
+
+ #[test]
+ fn dot_word() {
+ assert_eq!(
+ BlockAttr::parse(".foo"),
+ vec![BlockAttr::Class("foo".into())]
+ );
+ }
+
+ #[test]
+ fn hash_word() {
+ assert_eq!(BlockAttr::parse("#foo"), vec![BlockAttr::Id("foo".into())]);
+ }
+
+ #[test]
+ fn key_value() {
+ assert_eq!(
+ BlockAttr::parse("foo=bar"),
+ vec![BlockAttr::KeyValue("foo".into(), "bar".into())]
+ );
+ }
+
+ #[test]
+ fn several() {
+ assert_eq!(
+ BlockAttr::parse("{#foo .bar foobar yo=yoyo}"),
+ vec![
+ BlockAttr::Id("foo".into()),
+ BlockAttr::Class("bar".into()),
+ BlockAttr::Class("foobar".into()),
+ BlockAttr::KeyValue("yo".into(), "yoyo".into()),
+ ]
+ );
+ }
+}