use crate::error::SiteError; use crate::util::mkdir; use html_escape::{encode_double_quoted_attribute, encode_text}; use log::trace; use pulldown_cmark::{Event, HeadingLevel, Options, Parser, Tag}; use std::fmt::Write as _; use std::io::Write; use std::path::Path; #[derive(Debug)] pub struct HtmlPage { head: Element, body: Element, } impl Default for HtmlPage { fn default() -> Self { Self { head: Element::new(ElementTag::Head), body: Element::new(ElementTag::Body), } } } impl HtmlPage { pub fn new(head: Element, body: Element) -> Self { Self { head, body } } pub fn head(&self) -> &Element { &self.head } pub fn body(&self) -> &Element { &self.body } pub fn serialize(&self) -> Result { let mut html = Element::new(ElementTag::Html); html.push_child(Content::Elt(self.head.clone())); html.push_child(Content::Elt(self.body.clone())); html.serialize() } pub fn write(&self, filename: &Path) -> Result<(), SiteError> { if let Some(parent) = filename.parent() { trace!("parent: {}", parent.display()); if !parent.exists() { mkdir(parent)?; } } trace!("writing HTML: {}", filename.display()); let mut f = std::fs::File::create(filename) .map_err(|e| SiteError::CreateFile(filename.into(), e))?; let html = self.serialize()?; f.write_all(html.as_bytes()) .map_err(|e| SiteError::FileWrite(filename.into(), e))?; Ok(()) } } pub fn parse(markdown: &str) -> Result { let mut options = Options::empty(); options.insert(Options::ENABLE_HEADING_ATTRIBUTES); options.insert(Options::ENABLE_STRIKETHROUGH); options.insert(Options::ENABLE_TABLES); options.insert(Options::ENABLE_TASKLISTS); let p = Parser::new_ext(markdown, options); let mut stack = Stack::new(); stack.push(Element::new(ElementTag::Body)); for event in p { trace!("event {:?}", event); match event { Event::Start(tag) => match tag { Tag::Paragraph => stack.push_tag(ElementTag::P), Tag::Heading(level, id, classes) => { let tag = match level { HeadingLevel::H1 => ElementTag::H1, HeadingLevel::H2 => ElementTag::H2, HeadingLevel::H3 => ElementTag::H3, HeadingLevel::H4 => ElementTag::H4, HeadingLevel::H5 => ElementTag::H5, HeadingLevel::H6 => ElementTag::H6, }; let mut h = Element::new(tag); if let Some(id) = id { h.push_attribute(Attribute::new("id", id)); } if !classes.is_empty() { let mut names = String::new(); for c in classes { if !names.is_empty() { names.push(' '); } names.push_str(c); } h.push_attribute(Attribute::new("class", &names)); } stack.push(h); } Tag::BlockQuote => stack.push_tag(ElementTag::Blockquote), Tag::CodeBlock(_) => stack.push_tag(ElementTag::Pre), Tag::List(None) => stack.push_tag(ElementTag::Ul), Tag::List(Some(start)) => { let mut e = Element::new(ElementTag::Ol); e.push_attribute(Attribute::new("start", &format!("{}", start))); stack.push(e); } Tag::Item => stack.push_tag(ElementTag::Li), Tag::FootnoteDefinition(_) => unreachable!("{:?}", tag), Tag::Table(_) => stack.push_tag(ElementTag::Table), Tag::TableHead => stack.push_tag(ElementTag::Th), Tag::TableRow => stack.push_tag(ElementTag::Tr), Tag::TableCell => stack.push_tag(ElementTag::Td), Tag::Emphasis => stack.push_tag(ElementTag::Em), Tag::Strong => stack.push_tag(ElementTag::Strong), Tag::Strikethrough => stack.push_tag(ElementTag::Del), Tag::Link(_, url, title) => { let mut link = Element::new(ElementTag::A); link.push_attribute(Attribute::new("href", url.as_ref())); if !title.is_empty() { link.push_attribute(Attribute::new("title", title.as_ref())); } stack.push(link); } Tag::Image(_, url, title) => { let mut e = Element::new(ElementTag::Img); e.push_attribute(Attribute::new("src", url.as_ref())); if !title.is_empty() { e.push_attribute(Attribute::new("title", title.as_ref())); } stack.push(e); } }, Event::End(tag) => match &tag { Tag::Paragraph | Tag::Heading(_, _, _) | Tag::List(_) | Tag::Item | Tag::Link(_, _, _) | Tag::Image(_, _, _) | Tag::Emphasis | Tag::Table(_) | Tag::TableHead | Tag::TableRow | Tag::TableCell | Tag::Strong | Tag::Strikethrough | Tag::BlockQuote | Tag::CodeBlock(_) => { let e = stack.pop(); stack.append_child(Content::Elt(e)); } Tag::FootnoteDefinition(_) => unreachable!("{:?}", tag), }, Event::Text(s) => stack.append_str(s.as_ref()), Event::Code(s) => { let mut code = Element::new(ElementTag::Code); code.push_child(Content::Text(s.to_string())); stack.append_element(code); } Event::Html(s) => stack.append_child(Content::Html(s.to_string())), Event::FootnoteReference(s) => trace!("footnote ref {:?}", s), Event::SoftBreak => stack.append_str(" "), Event::HardBreak => stack.append_element(Element::new(ElementTag::Br)), Event::Rule => stack.append_element(Element::new(ElementTag::Hr)), Event::TaskListMarker(done) => { let marker = if done { "\u{2612} " // Unicode for box with X } else { "\u{2610} " // Unicode for empty box }; stack.append_str(marker); } } } let mut body = stack.pop(); assert!(stack.is_empty()); body.fix_up_img_alt(); Ok(body) } fn as_plain_text(content: &[Content]) -> String { let mut buf = String::new(); for c in content { if let Content::Text(s) = c { buf.push_str(s); } } buf } #[derive(Debug, Clone)] pub struct Element { tag: ElementTag, attrs: Vec, children: Vec, } impl Element { pub fn new(tag: ElementTag) -> Self { Self { tag, attrs: vec![], children: vec![], } } fn push_attribute(&mut self, attr: Attribute) { self.attrs.push(attr); } pub fn push_child(&mut self, child: Content) { self.children.push(child); } fn children(&self) -> &[Content] { &self.children } fn fix_up_img_alt(&mut self) { if self.tag == ElementTag::Img { let alt = as_plain_text(self.children()); self.push_attribute(Attribute::new("alt", &alt)); self.children.clear(); } else { for child in self.children.iter_mut() { if let Content::Elt(kid) = child { kid.fix_up_img_alt(); } } } } pub fn serialize(&self) -> Result { let mut buf = String::new(); self.serialize_to_buf_without_added_newlines(&mut buf) .map_err(SiteError::Format)?; Ok(buf) } fn serialize_to_buf_without_added_newlines( &self, buf: &mut String, ) -> Result<(), std::fmt::Error> { if self.children.is_empty() { write!(buf, "<{}", self.tag.name())?; self.serialize_attrs_to_buf(buf)?; write!(buf, "/>")?; } else { write!(buf, "<{}", self.tag.name())?; self.serialize_attrs_to_buf(buf)?; write!(buf, ">")?; for c in self.children() { match c { Content::Text(s) => buf.push_str(&encode_text(s)), Content::Elt(e) => e.serialize_to_buf_adding_block_newline(buf)?, Content::Html(s) => buf.push_str(s), } } write!(buf, "", self.tag.name())?; } Ok(()) } fn serialize_to_buf_adding_block_newline( &self, buf: &mut String, ) -> Result<(), std::fmt::Error> { if self.tag.is_block() { writeln!(buf)?; } self.serialize_to_buf_without_added_newlines(buf) } fn serialize_attrs_to_buf(&self, buf: &mut String) -> Result<(), std::fmt::Error> { for attr in self.attrs.iter() { write!(buf, " {}", attr.name())?; if let Some(value) = attr.value() { write!(buf, "=\"{}\"", encode_double_quoted_attribute(value))?; } } Ok(()) } } #[derive(Clone, Debug, Eq, PartialEq)] pub enum ElementTag { Html, Head, Body, H1, H2, H3, H4, H5, H6, P, Ol, Ul, Li, Blockquote, Pre, Em, Strong, Del, A, Img, Table, Title, Th, Tr, Td, Br, Hr, Code, } impl ElementTag { fn name(&self) -> &str { match self { Self::Html => "html", Self::Head => "head", Self::Body => "body", Self::H1 => "h1", Self::H2 => "h2", Self::H3 => "h3", Self::H4 => "h4", Self::H5 => "h5", Self::H6 => "h6", Self::P => "p", Self::Ol => "ol", Self::Ul => "ul", Self::Li => "li", Self::Blockquote => "blockquote", Self::Pre => "pre", Self::Em => "em", Self::Strong => "strong", Self::Del => "del", Self::A => "a", Self::Img => "img", Self::Table => "table", Self::Th => "th", Self::Title => "title", Self::Tr => "tr", Self::Td => "td", Self::Br => "br", Self::Hr => "hr", Self::Code => "code", } } fn is_block(&self) -> bool { matches!( self, Self::Html | Self::Head | Self::Body | Self::H1 | Self::H2 | Self::H3 | Self::H4 | Self::H5 | Self::H6 | Self::P | Self::Ol | Self::Ul | Self::Li | Self::Blockquote | Self::Table | Self::Th | Self::Tr | Self::Br | Self::Hr ) } } #[derive(Clone, Debug)] pub struct Attribute { name: String, value: Option, } impl Attribute { fn new(name: &str, value: &str) -> Self { Self { name: name.into(), value: Some(value.into()), } } pub fn name(&self) -> &str { &self.name } pub fn value(&self) -> Option<&str> { self.value.as_deref() } } #[derive(Clone, Debug)] pub enum Content { Text(String), Elt(Element), Html(String), } struct Stack { stack: Vec, } impl Stack { fn new() -> Self { Self { stack: vec![] } } fn is_empty(&self) -> bool { self.stack.is_empty() } fn push(&mut self, e: Element) { trace!("pushed {:?}", e); self.stack.push(e); } fn push_tag(&mut self, tag: ElementTag) { self.push(Element::new(tag)); } fn pop(&mut self) -> Element { let e = self.stack.pop().unwrap(); trace!("popped {:?}", e); e } fn append_child(&mut self, child: Content) { trace!("appended {:?}", child); let mut parent = self.stack.pop().unwrap(); parent.push_child(child); self.stack.push(parent); } fn append_str(&mut self, text: &str) { self.append_child(Content::Text(text.into())); } fn append_element(&mut self, e: Element) { self.append_child(Content::Elt(e)); } }