//! A representation of HTML using Rust types.
#![deny(missing_docs)]
use html_page::Element;
use line_col::LineColLookup;
use log::trace;
use pulldown_cmark::{Event, HeadingLevel, Options, Parser, Tag};
/// Parse Markdown text into an HTML element.
pub fn parse(markdown: &str) -> Result {
let mut options = Options::empty();
options.insert(Options::ENABLE_HEADING_ATTRIBUTES);
options.insert(Options::ENABLE_STRIKETHROUGH);
options.insert(Options::ENABLE_TABLES);
options.insert(Options::ENABLE_TASKLISTS);
let p = Parser::new_ext(markdown, options).into_offset_iter();
let linecol = LineColLookup::new(markdown);
let mut stack = Stack::default();
stack.push(Element::new(html_page::Tag::Body));
for (event, loc) in p {
trace!("event {:?}", event);
let (line, col) = linecol.get(loc.start);
match event {
Event::Start(tag) => match tag {
Tag::Paragraph => stack.push_tag(html_page::Tag::P, line, col),
Tag::Heading(level, id, classes) => {
let tag = match level {
HeadingLevel::H1 => html_page::Tag::H1,
HeadingLevel::H2 => html_page::Tag::H2,
HeadingLevel::H3 => html_page::Tag::H3,
HeadingLevel::H4 => html_page::Tag::H4,
HeadingLevel::H5 => html_page::Tag::H5,
HeadingLevel::H6 => html_page::Tag::H6,
};
let mut h = Element::new(tag);
if let Some(id) = id {
h.set_attribute("id", id);
}
if !classes.is_empty() {
let mut names = String::new();
for c in classes {
if !names.is_empty() {
names.push(' ');
}
names.push_str(c);
}
h.set_attribute("class", &names);
}
stack.push(h);
}
Tag::BlockQuote => stack.push_tag(html_page::Tag::Blockquote, line, col),
Tag::CodeBlock(_) => stack.push_tag(html_page::Tag::Pre, line, col),
Tag::List(None) => stack.push_tag(html_page::Tag::Ul, line, col),
Tag::List(Some(start)) => {
let mut e = Element::new(html_page::Tag::Ol).with_location(line, col);
e.set_attribute("start", &format!("{}", start));
stack.push(e);
}
Tag::Item => stack.push_tag(html_page::Tag::Li, line, col),
Tag::FootnoteDefinition(_) => unreachable!("{:?}", tag),
Tag::Table(_) => stack.push_tag(html_page::Tag::Table, line, col),
Tag::TableHead => stack.push_tag(html_page::Tag::Th, line, col),
Tag::TableRow => stack.push_tag(html_page::Tag::Tr, line, col),
Tag::TableCell => stack.push_tag(html_page::Tag::Td, line, col),
Tag::Emphasis => stack.push_tag(html_page::Tag::Em, line, col),
Tag::Strong => stack.push_tag(html_page::Tag::Strong, line, col),
Tag::Strikethrough => stack.push_tag(html_page::Tag::Del, line, col),
Tag::Link(_, url, title) => {
let mut link = Element::new(html_page::Tag::A);
link.set_attribute("href", url.as_ref());
if !title.is_empty() {
link.set_attribute("title", title.as_ref());
}
stack.push(link);
}
Tag::Image(_, url, title) => {
let mut e = Element::new(html_page::Tag::Img);
e.set_attribute("src", url.as_ref());
if !title.is_empty() {
e.set_attribute("title", title.as_ref());
}
stack.push(e);
}
},
Event::End(tag) => match &tag {
Tag::Paragraph => {
trace!("at end of paragraph, looking for definition list use");
let e = stack.pop();
let s = e.plain_text();
trace!("paragraph text: {:?}", s);
if s.starts_with(": ") || s.contains("\n: ") {
return Err(HtmlError::DefinitionList(line, col));
}
stack.append_child(e);
}
Tag::Image(_, _, _) => {
// The way pulldown_cmark feeds us events, the alt
// text of an image ends up being the content of
// the img element. That's wrong for HTML, so we
// remove the content, and use it as the alt
// attribute instead.
let mut img = stack.pop();
eprintln!("IMAGE: {img:#?}");
assert_eq!(img.tag(), html_page::Tag::Img);
let alt_text = img.plain_text();
img.clear_children();
img.set_attribute("alt", &alt_text);
eprintln!("IMAGE after: {img:#?}");
stack.append_child(img);
}
Tag::Heading(_, _, _)
| Tag::List(_)
| Tag::Item
| Tag::Link(_, _, _)
| Tag::Emphasis
| Tag::Table(_)
| Tag::TableHead
| Tag::TableRow
| Tag::TableCell
| Tag::Strong
| Tag::Strikethrough
| Tag::BlockQuote
| Tag::CodeBlock(_) => {
let e = stack.pop();
stack.append_child(e);
}
Tag::FootnoteDefinition(_) => unreachable!("{:?}", tag),
},
Event::Text(s) => stack.append_text(s.as_ref()),
Event::Code(s) => {
let mut code = Element::new(html_page::Tag::Code);
code.push_text(s.to_string().as_ref());
stack.append_child(code);
}
Event::Html(s) => stack.append_html(s.as_ref()),
Event::FootnoteReference(s) => trace!("footnote ref {:?}", s),
Event::SoftBreak => stack.append_text("\n"),
Event::HardBreak => stack.append_child(Element::new(html_page::Tag::Br)),
Event::Rule => stack.append_child(Element::new(html_page::Tag::Hr)),
Event::TaskListMarker(done) => {
let marker = if done {
"\u{2612} " // Unicode for box with X
} else {
"\u{2610} " // Unicode for empty box
};
stack.append_text(marker);
}
}
}
let body = stack.pop();
assert!(stack.is_empty());
// body.fix_up_img_alt();
Ok(body)
}
#[derive(Debug, Default)]
struct Stack {
stack: Vec,
}
impl Stack {
fn is_empty(&self) -> bool {
self.stack.is_empty()
}
fn push(&mut self, e: Element) {
trace!("pushed {:?}", e);
self.stack.push(e);
}
fn push_tag(&mut self, tag: html_page::Tag, line: usize, col: usize) {
self.push(Element::new(tag).with_location(line, col));
}
fn pop(&mut self) -> Element {
let e = self.stack.pop().unwrap();
trace!("popped {:?}", e);
e
}
fn append_child(&mut self, child: Element) {
trace!("appended {:?}", child);
let mut parent = self.stack.pop().unwrap();
parent.push_child(&child);
self.stack.push(parent);
}
fn append_text(&mut self, child: &str) {
trace!("appended {:?}", child);
let mut parent = self.stack.pop().unwrap();
parent.push_text(child);
self.stack.push(parent);
}
fn append_html(&mut self, child: &str) {
trace!("appended {:?}", child);
let mut parent = self.stack.pop().unwrap();
parent.push_html(child);
self.stack.push(parent);
}
}
/// Errors from the `html` module.
#[derive(Debug, thiserror::Error)]
pub enum HtmlError {
/// Input contains an attempt to use a definition list in
/// Markdown.
#[error("attempt to use definition lists in Markdown: line {0}, column {1}")]
DefinitionList(usize, usize),
}