summaryrefslogtreecommitdiff
path: root/src/html.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/html.rs')
-rw-r--r--src/html.rs485
1 files changed, 78 insertions, 407 deletions
diff --git a/src/html.rs b/src/html.rs
index e550ba7..adba870 100644
--- a/src/html.rs
+++ b/src/html.rs
@@ -2,74 +2,10 @@
#![deny(missing_docs)]
-use html_escape::{encode_double_quoted_attribute, encode_text};
+use html_page::Element;
use line_col::LineColLookup;
-use log::{debug, trace};
+use log::trace;
use pulldown_cmark::{Event, HeadingLevel, Options, Parser, Tag};
-use std::fmt::Write as _;
-use std::io::Write;
-use std::path::{Path, PathBuf};
-
-/// A HTML page, consisting of a head and a body.
-#[derive(Debug)]
-pub struct HtmlPage {
- head: Element,
- body: Element,
-}
-
-impl Default for HtmlPage {
- fn default() -> Self {
- Self {
- head: Element::new(ElementTag::Head),
- body: Element::new(ElementTag::Body),
- }
- }
-}
-
-impl HtmlPage {
- /// Create a new HTML page from a head and a body element.
- pub fn new(head: Element, body: Element) -> Self {
- Self { head, body }
- }
-
- /// Return the page's head element.
- pub fn head(&self) -> &Element {
- &self.head
- }
-
- /// Return the page's body element.
- pub fn body(&self) -> &Element {
- &self.body
- }
-
- /// Try to serialize an HTML page into HTML text.
- pub fn serialize(&self) -> Result<String, HtmlError> {
- let mut html = Element::new(ElementTag::Html);
- html.push_child(Content::Elt(self.head.clone()));
- html.push_child(Content::Elt(self.body.clone()));
- html.serialize()
- }
-
- /// Try to write an HTML page as text into a file.
- pub fn write(&self, filename: &Path) -> Result<(), HtmlError> {
- if let Some(parent) = filename.parent() {
- trace!("parent: {}", parent.display());
- if !parent.exists() {
- debug!("creating directory {}", parent.display());
- std::fs::create_dir_all(parent)
- .map_err(|e| HtmlError::CreateDir(parent.into(), e))?;
- }
- }
-
- trace!("writing HTML: {}", filename.display());
- let mut f = std::fs::File::create(filename)
- .map_err(|e| HtmlError::CreateFile(filename.into(), e))?;
- let html = self.serialize()?;
- f.write_all(html.as_bytes())
- .map_err(|e| HtmlError::FileWrite(filename.into(), e))?;
- Ok(())
- }
-}
/// Parse Markdown text into an HTML element.
pub fn parse(markdown: &str) -> Result<Element, HtmlError> {
@@ -80,27 +16,26 @@ pub fn parse(markdown: &str) -> Result<Element, HtmlError> {
options.insert(Options::ENABLE_TASKLISTS);
let p = Parser::new_ext(markdown, options).into_offset_iter();
let linecol = LineColLookup::new(markdown);
- let mut stack = Stack::new();
- stack.push(Element::new(ElementTag::Body));
+ let mut stack = Stack::default();
+ stack.push(Element::new(html_page::Tag::Body));
for (event, loc) in p {
trace!("event {:?}", event);
let (line, col) = linecol.get(loc.start);
- let loc = Location::new(line, col);
match event {
Event::Start(tag) => match tag {
- Tag::Paragraph => stack.push_tag(ElementTag::P, loc),
+ Tag::Paragraph => stack.push_tag(html_page::Tag::P, line, col),
Tag::Heading(level, id, classes) => {
let tag = match level {
- HeadingLevel::H1 => ElementTag::H1,
- HeadingLevel::H2 => ElementTag::H2,
- HeadingLevel::H3 => ElementTag::H3,
- HeadingLevel::H4 => ElementTag::H4,
- HeadingLevel::H5 => ElementTag::H5,
- HeadingLevel::H6 => ElementTag::H6,
+ HeadingLevel::H1 => html_page::Tag::H1,
+ HeadingLevel::H2 => html_page::Tag::H2,
+ HeadingLevel::H3 => html_page::Tag::H3,
+ HeadingLevel::H4 => html_page::Tag::H4,
+ HeadingLevel::H5 => html_page::Tag::H5,
+ HeadingLevel::H6 => html_page::Tag::H6,
};
let mut h = Element::new(tag);
if let Some(id) = id {
- h.push_attribute(Attribute::new("id", id));
+ h.set_attribute("id", id);
}
if !classes.is_empty() {
let mut names = String::new();
@@ -110,40 +45,40 @@ pub fn parse(markdown: &str) -> Result<Element, HtmlError> {
}
names.push_str(c);
}
- h.push_attribute(Attribute::new("class", &names));
+ h.set_attribute("class", &names);
}
stack.push(h);
}
- Tag::BlockQuote => stack.push_tag(ElementTag::Blockquote, loc),
- Tag::CodeBlock(_) => stack.push_tag(ElementTag::Pre, loc),
- Tag::List(None) => stack.push_tag(ElementTag::Ul, loc),
+ Tag::BlockQuote => stack.push_tag(html_page::Tag::Blockquote, line, col),
+ Tag::CodeBlock(_) => stack.push_tag(html_page::Tag::Pre, line, col),
+ Tag::List(None) => stack.push_tag(html_page::Tag::Ul, line, col),
Tag::List(Some(start)) => {
- let mut e = Element::new(ElementTag::Ol).with_location(loc);
- e.push_attribute(Attribute::new("start", &format!("{}", start)));
+ let mut e = Element::new(html_page::Tag::Ol).with_location(line, col);
+ e.set_attribute("start", &format!("{}", start));
stack.push(e);
}
- Tag::Item => stack.push_tag(ElementTag::Li, loc),
+ Tag::Item => stack.push_tag(html_page::Tag::Li, line, col),
Tag::FootnoteDefinition(_) => unreachable!("{:?}", tag),
- Tag::Table(_) => stack.push_tag(ElementTag::Table, loc),
- Tag::TableHead => stack.push_tag(ElementTag::Th, loc),
- Tag::TableRow => stack.push_tag(ElementTag::Tr, loc),
- Tag::TableCell => stack.push_tag(ElementTag::Td, loc),
- Tag::Emphasis => stack.push_tag(ElementTag::Em, loc),
- Tag::Strong => stack.push_tag(ElementTag::Strong, loc),
- Tag::Strikethrough => stack.push_tag(ElementTag::Del, loc),
+ Tag::Table(_) => stack.push_tag(html_page::Tag::Table, line, col),
+ Tag::TableHead => stack.push_tag(html_page::Tag::Th, line, col),
+ Tag::TableRow => stack.push_tag(html_page::Tag::Tr, line, col),
+ Tag::TableCell => stack.push_tag(html_page::Tag::Td, line, col),
+ Tag::Emphasis => stack.push_tag(html_page::Tag::Em, line, col),
+ Tag::Strong => stack.push_tag(html_page::Tag::Strong, line, col),
+ Tag::Strikethrough => stack.push_tag(html_page::Tag::Del, line, col),
Tag::Link(_, url, title) => {
- let mut link = Element::new(ElementTag::A);
- link.push_attribute(Attribute::new("href", url.as_ref()));
+ let mut link = Element::new(html_page::Tag::A);
+ link.set_attribute("href", url.as_ref());
if !title.is_empty() {
- link.push_attribute(Attribute::new("title", title.as_ref()));
+ link.set_attribute("title", title.as_ref());
}
stack.push(link);
}
Tag::Image(_, url, title) => {
- let mut e = Element::new(ElementTag::Img);
- e.push_attribute(Attribute::new("src", url.as_ref()));
+ let mut e = Element::new(html_page::Tag::Img);
+ e.set_attribute("src", url.as_ref());
if !title.is_empty() {
- e.push_attribute(Attribute::new("title", title.as_ref()));
+ e.set_attribute("title", title.as_ref());
}
stack.push(e);
}
@@ -152,18 +87,32 @@ pub fn parse(markdown: &str) -> Result<Element, HtmlError> {
Tag::Paragraph => {
trace!("at end of paragraph, looking for definition list use");
let e = stack.pop();
- let s = as_plain_text(e.children());
+ let s = e.plain_text();
trace!("paragraph text: {:?}", s);
if s.starts_with(": ") || s.contains("\n: ") {
- return Err(HtmlError::DefinitionList(loc.line, loc.col));
+ return Err(HtmlError::DefinitionList(line, col));
}
- stack.append_child(Content::Elt(e));
+ stack.append_child(e);
+ }
+ Tag::Image(_, _, _) => {
+ // The way pulldown_cmark feeds us events, the alt
+ // text of an image ends up being the content of
+ // the img element. That's wrong for HTML, so we
+ // remove the content, and use it as the alt
+ // attribute instead.
+ let mut img = stack.pop();
+ eprintln!("IMAGE: {img:#?}");
+ assert_eq!(img.tag(), html_page::Tag::Img);
+ let alt_text = img.plain_text();
+ img.clear_children();
+ img.set_attribute("alt", &alt_text);
+ eprintln!("IMAGE after: {img:#?}");
+ stack.append_child(img);
}
Tag::Heading(_, _, _)
| Tag::List(_)
| Tag::Item
| Tag::Link(_, _, _)
- | Tag::Image(_, _, _)
| Tag::Emphasis
| Tag::Table(_)
| Tag::TableHead
@@ -174,313 +123,45 @@ pub fn parse(markdown: &str) -> Result<Element, HtmlError> {
| Tag::BlockQuote
| Tag::CodeBlock(_) => {
let e = stack.pop();
- stack.append_child(Content::Elt(e));
+ stack.append_child(e);
}
Tag::FootnoteDefinition(_) => unreachable!("{:?}", tag),
},
- Event::Text(s) => stack.append_str(s.as_ref()),
+ Event::Text(s) => stack.append_text(s.as_ref()),
Event::Code(s) => {
- let mut code = Element::new(ElementTag::Code);
- code.push_child(Content::Text(s.to_string()));
- stack.append_element(code);
+ let mut code = Element::new(html_page::Tag::Code);
+ code.push_text(s.to_string().as_ref());
+ stack.append_child(code);
}
- Event::Html(s) => stack.append_child(Content::Html(s.to_string())),
+ Event::Html(s) => stack.append_html(s.as_ref()),
Event::FootnoteReference(s) => trace!("footnote ref {:?}", s),
- Event::SoftBreak => stack.append_str("\n"),
- Event::HardBreak => stack.append_element(Element::new(ElementTag::Br)),
- Event::Rule => stack.append_element(Element::new(ElementTag::Hr)),
+ Event::SoftBreak => stack.append_text("\n"),
+ Event::HardBreak => stack.append_child(Element::new(html_page::Tag::Br)),
+ Event::Rule => stack.append_child(Element::new(html_page::Tag::Hr)),
Event::TaskListMarker(done) => {
let marker = if done {
"\u{2612} " // Unicode for box with X
} else {
"\u{2610} " // Unicode for empty box
};
- stack.append_str(marker);
+ stack.append_text(marker);
}
}
}
- let mut body = stack.pop();
+ eprintln!("STACK: {stack:#?}");
+ let body = stack.pop();
assert!(stack.is_empty());
- body.fix_up_img_alt();
+ // body.fix_up_img_alt();
Ok(body)
}
-fn as_plain_text(content: &[Content]) -> String {
- let mut buf = String::new();
- for c in content {
- if let Content::Text(s) = c {
- buf.push_str(s);
- }
- }
- buf
-}
-
-/// An HTML element.
-#[derive(Debug, Clone)]
-pub struct Element {
- loc: Option<Location>,
- tag: ElementTag,
- attrs: Vec<Attribute>,
- children: Vec<Content>,
-}
-
-impl Element {
- /// Create a new element.
- pub fn new(tag: ElementTag) -> Self {
- Self {
- loc: None,
- tag,
- attrs: vec![],
- children: vec![],
- }
- }
-
- fn with_location(mut self, loc: Location) -> Self {
- self.loc = Some(loc);
- self
- }
-
- fn push_attribute(&mut self, attr: Attribute) {
- self.attrs.push(attr);
- }
-
- /// Append a new child to the element.
- pub fn push_child(&mut self, child: Content) {
- self.children.push(child);
- }
-
- /// Return an element's tag.
- pub fn tag(&self) -> ElementTag {
- self.tag
- }
-
- /// Return all the children of an element.
- pub fn children(&self) -> &[Content] {
- &self.children
- }
-
- fn fix_up_img_alt(&mut self) {
- if self.tag == ElementTag::Img {
- let alt = as_plain_text(self.children());
- self.push_attribute(Attribute::new("alt", &alt));
- self.children.clear();
- } else {
- for child in self.children.iter_mut() {
- if let Content::Elt(kid) = child {
- kid.fix_up_img_alt();
- }
- }
- }
- }
-
- /// Serialize an element into HTML text.
- pub fn serialize(&self) -> Result<String, HtmlError> {
- let mut buf = String::new();
- self.serialize_to_buf_without_added_newlines(&mut buf)
- .map_err(HtmlError::Format)?;
- Ok(buf)
- }
-
- fn serialize_to_buf_without_added_newlines(
- &self,
- buf: &mut String,
- ) -> Result<(), std::fmt::Error> {
- if self.children.is_empty() {
- write!(buf, "<{}", self.tag.name())?;
- self.serialize_attrs_to_buf(buf)?;
- write!(buf, "/>")?;
- } else {
- write!(buf, "<{}", self.tag.name())?;
- self.serialize_attrs_to_buf(buf)?;
- write!(buf, ">")?;
- for c in self.children() {
- match c {
- Content::Text(s) => buf.push_str(&encode_text(s)),
- Content::Elt(e) => e.serialize_to_buf_adding_block_newline(buf)?,
- Content::Html(s) => buf.push_str(s),
- }
- }
- write!(buf, "</{}>", self.tag.name())?;
- }
- Ok(())
- }
-
- fn serialize_to_buf_adding_block_newline(
- &self,
- buf: &mut String,
- ) -> Result<(), std::fmt::Error> {
- if self.tag.is_block() {
- writeln!(buf)?;
- }
- self.serialize_to_buf_without_added_newlines(buf)
- }
-
- fn serialize_attrs_to_buf(&self, buf: &mut String) -> Result<(), std::fmt::Error> {
- for attr in self.attrs.iter() {
- write!(buf, " {}", attr.name())?;
- if let Some(value) = attr.value() {
- write!(buf, "=\"{}\"", encode_double_quoted_attribute(value))?;
- }
- }
- Ok(())
- }
-}
-
-/// The tag of an HTML element.
-#[derive(Copy, Clone, Debug, Eq, PartialEq)]
-#[allow(missing_docs)]
-pub enum ElementTag {
- Html,
- Head,
- Body,
- H1,
- H2,
- H3,
- H4,
- H5,
- H6,
- P,
- Ol,
- Ul,
- Li,
- Blockquote,
- Pre,
- Em,
- Strong,
- Del,
- A,
- Img,
- Table,
- Title,
- Th,
- Tr,
- Td,
- Br,
- Hr,
- Code,
-}
-
-impl ElementTag {
- fn name(&self) -> &str {
- match self {
- Self::Html => "html",
- Self::Head => "head",
- Self::Body => "body",
- Self::H1 => "h1",
- Self::H2 => "h2",
- Self::H3 => "h3",
- Self::H4 => "h4",
- Self::H5 => "h5",
- Self::H6 => "h6",
- Self::P => "p",
- Self::Ol => "ol",
- Self::Ul => "ul",
- Self::Li => "li",
- Self::Blockquote => "blockquote",
- Self::Pre => "pre",
- Self::Em => "em",
- Self::Strong => "strong",
- Self::Del => "del",
- Self::A => "a",
- Self::Img => "img",
- Self::Table => "table",
- Self::Th => "th",
- Self::Title => "title",
- Self::Tr => "tr",
- Self::Td => "td",
- Self::Br => "br",
- Self::Hr => "hr",
- Self::Code => "code",
- }
- }
-
- fn is_block(&self) -> bool {
- matches!(
- self,
- Self::Html
- | Self::Head
- | Self::Body
- | Self::H1
- | Self::H2
- | Self::H3
- | Self::H4
- | Self::H5
- | Self::H6
- | Self::P
- | Self::Ol
- | Self::Ul
- | Self::Li
- | Self::Blockquote
- | Self::Table
- | Self::Th
- | Self::Tr
- | Self::Br
- | Self::Hr
- )
- }
-}
-
-/// An attribute of an HTML element.
-#[derive(Clone, Debug)]
-pub struct Attribute {
- name: String,
- value: Option<String>,
-}
-
-impl Attribute {
- fn new(name: &str, value: &str) -> Self {
- Self {
- name: name.into(),
- value: Some(value.into()),
- }
- }
-
- /// Return the name of the attribute.
- pub fn name(&self) -> &str {
- &self.name
- }
-
- /// Return the value of the attribute, if any.
- pub fn value(&self) -> Option<&str> {
- self.value.as_deref()
- }
-}
-
-/// Content in HTML.
-#[derive(Clone, Debug)]
-pub enum Content {
- /// Arbitrary text.
- Text(String),
-
- /// An HTML element.
- Elt(Element),
-
- /// Arbitrary HTML text.
- Html(String),
-}
-
-#[derive(Debug, Clone, Copy)]
-struct Location {
- line: usize,
- col: usize,
-}
-
-impl Location {
- fn new(line: usize, col: usize) -> Self {
- Self { line, col }
- }
-}
-
+#[derive(Debug, Default)]
struct Stack {
stack: Vec<Element>,
}
impl Stack {
- fn new() -> Self {
- Self { stack: vec![] }
- }
-
fn is_empty(&self) -> bool {
self.stack.is_empty()
}
@@ -490,8 +171,8 @@ impl Stack {
self.stack.push(e);
}
- fn push_tag(&mut self, tag: ElementTag, loc: Location) {
- self.push(Element::new(tag).with_location(loc));
+ fn push_tag(&mut self, tag: html_page::Tag, line: usize, col: usize) {
+ self.push(Element::new(tag).with_location(line, col));
}
fn pop(&mut self) -> Element {
@@ -500,43 +181,33 @@ impl Stack {
e
}
- fn append_child(&mut self, child: Content) {
+ fn append_child(&mut self, child: Element) {
trace!("appended {:?}", child);
let mut parent = self.stack.pop().unwrap();
- parent.push_child(child);
+ parent.push_child(&child);
self.stack.push(parent);
}
- fn append_str(&mut self, text: &str) {
- self.append_child(Content::Text(text.into()));
+ fn append_text(&mut self, child: &str) {
+ trace!("appended {:?}", child);
+ let mut parent = self.stack.pop().unwrap();
+ parent.push_text(child);
+ self.stack.push(parent);
}
- fn append_element(&mut self, e: Element) {
- self.append_child(Content::Elt(e));
+ fn append_html(&mut self, child: &str) {
+ trace!("appended {:?}", child);
+ let mut parent = self.stack.pop().unwrap();
+ parent.push_html(child);
+ self.stack.push(parent);
}
}
/// Errors from the `html` module.
#[derive(Debug, thiserror::Error)]
pub enum HtmlError {
- /// Failed to create a directory.
- #[error("failed to create directory {0}")]
- CreateDir(PathBuf, #[source] std::io::Error),
-
- /// Failed to create a file.
- #[error("failed to create file {0}")]
- CreateFile(PathBuf, #[source] std::io::Error),
-
- /// Failed to write to a file.
- #[error("failed to write to file {0}")]
- FileWrite(PathBuf, #[source] std::io::Error),
-
/// Input contains an attempt to use a definition list in
/// Markdown.
#[error("attempt to use definition lists in Markdown: line {0}, column {1}")]
DefinitionList(usize, usize),
-
- /// String formatting error. This is likely a programming error.
- #[error("string formatting error: {0}")]
- Format(#[source] std::fmt::Error),
}