//! A representation of HTML using Rust types. #![deny(missing_docs)] use html_page::Element; use line_col::LineColLookup; use log::trace; use pulldown_cmark::{Event, HeadingLevel, Options, Parser, Tag}; /// Parse Markdown text into an HTML element. pub fn parse(markdown: &str) -> Result { let mut options = Options::empty(); options.insert(Options::ENABLE_HEADING_ATTRIBUTES); options.insert(Options::ENABLE_STRIKETHROUGH); options.insert(Options::ENABLE_TABLES); options.insert(Options::ENABLE_TASKLISTS); let p = Parser::new_ext(markdown, options).into_offset_iter(); let linecol = LineColLookup::new(markdown); let mut stack = Stack::default(); stack.push(Element::new(html_page::Tag::Body)); for (event, loc) in p { trace!("event {:?}", event); let (line, col) = linecol.get(loc.start); match event { Event::Start(tag) => match tag { Tag::Paragraph => stack.push_tag(html_page::Tag::P, line, col), Tag::Heading(level, id, classes) => { let tag = match level { HeadingLevel::H1 => html_page::Tag::H1, HeadingLevel::H2 => html_page::Tag::H2, HeadingLevel::H3 => html_page::Tag::H3, HeadingLevel::H4 => html_page::Tag::H4, HeadingLevel::H5 => html_page::Tag::H5, HeadingLevel::H6 => html_page::Tag::H6, }; let mut h = Element::new(tag); if let Some(id) = id { h.set_attribute("id", id); } if !classes.is_empty() { let mut names = String::new(); for c in classes { if !names.is_empty() { names.push(' '); } names.push_str(c); } h.set_attribute("class", &names); } stack.push(h); } Tag::BlockQuote => stack.push_tag(html_page::Tag::Blockquote, line, col), Tag::CodeBlock(_) => stack.push_tag(html_page::Tag::Pre, line, col), Tag::List(None) => stack.push_tag(html_page::Tag::Ul, line, col), Tag::List(Some(start)) => { let mut e = Element::new(html_page::Tag::Ol).with_location(line, col); e.set_attribute("start", &format!("{}", start)); stack.push(e); } Tag::Item => stack.push_tag(html_page::Tag::Li, line, col), Tag::FootnoteDefinition(_) => unreachable!("{:?}", tag), Tag::Table(_) => stack.push_tag(html_page::Tag::Table, line, col), Tag::TableHead => stack.push_tag(html_page::Tag::Th, line, col), Tag::TableRow => stack.push_tag(html_page::Tag::Tr, line, col), Tag::TableCell => stack.push_tag(html_page::Tag::Td, line, col), Tag::Emphasis => stack.push_tag(html_page::Tag::Em, line, col), Tag::Strong => stack.push_tag(html_page::Tag::Strong, line, col), Tag::Strikethrough => stack.push_tag(html_page::Tag::Del, line, col), Tag::Link(_, url, title) => { let mut link = Element::new(html_page::Tag::A); link.set_attribute("href", url.as_ref()); if !title.is_empty() { link.set_attribute("title", title.as_ref()); } stack.push(link); } Tag::Image(_, url, title) => { let mut e = Element::new(html_page::Tag::Img); e.set_attribute("src", url.as_ref()); if !title.is_empty() { e.set_attribute("title", title.as_ref()); } stack.push(e); } }, Event::End(tag) => match &tag { Tag::Paragraph => { trace!("at end of paragraph, looking for definition list use"); let e = stack.pop(); let s = e.plain_text(); trace!("paragraph text: {:?}", s); if s.starts_with(": ") || s.contains("\n: ") { return Err(HtmlError::DefinitionList(line, col)); } stack.append_child(e); } Tag::Image(_, _, _) => { // The way pulldown_cmark feeds us events, the alt // text of an image ends up being the content of // the img element. That's wrong for HTML, so we // remove the content, and use it as the alt // attribute instead. let mut img = stack.pop(); eprintln!("IMAGE: {img:#?}"); assert_eq!(img.tag(), html_page::Tag::Img); let alt_text = img.plain_text(); img.clear_children(); img.set_attribute("alt", &alt_text); eprintln!("IMAGE after: {img:#?}"); stack.append_child(img); } Tag::Heading(_, _, _) | Tag::List(_) | Tag::Item | Tag::Link(_, _, _) | Tag::Emphasis | Tag::Table(_) | Tag::TableHead | Tag::TableRow | Tag::TableCell | Tag::Strong | Tag::Strikethrough | Tag::BlockQuote | Tag::CodeBlock(_) => { let e = stack.pop(); stack.append_child(e); } Tag::FootnoteDefinition(_) => unreachable!("{:?}", tag), }, Event::Text(s) => stack.append_text(s.as_ref()), Event::Code(s) => { let mut code = Element::new(html_page::Tag::Code); code.push_text(s.to_string().as_ref()); stack.append_child(code); } Event::Html(s) => stack.append_html(s.as_ref()), Event::FootnoteReference(s) => trace!("footnote ref {:?}", s), Event::SoftBreak => stack.append_text("\n"), Event::HardBreak => stack.append_child(Element::new(html_page::Tag::Br)), Event::Rule => stack.append_child(Element::new(html_page::Tag::Hr)), Event::TaskListMarker(done) => { let marker = if done { "\u{2612} " // Unicode for box with X } else { "\u{2610} " // Unicode for empty box }; stack.append_text(marker); } } } let body = stack.pop(); assert!(stack.is_empty()); // body.fix_up_img_alt(); Ok(body) } #[derive(Debug, Default)] struct Stack { stack: Vec, } impl Stack { fn is_empty(&self) -> bool { self.stack.is_empty() } fn push(&mut self, e: Element) { trace!("pushed {:?}", e); self.stack.push(e); } fn push_tag(&mut self, tag: html_page::Tag, line: usize, col: usize) { self.push(Element::new(tag).with_location(line, col)); } fn pop(&mut self) -> Element { let e = self.stack.pop().unwrap(); trace!("popped {:?}", e); e } fn append_child(&mut self, child: Element) { trace!("appended {:?}", child); let mut parent = self.stack.pop().unwrap(); parent.push_child(&child); self.stack.push(parent); } fn append_text(&mut self, child: &str) { trace!("appended {:?}", child); let mut parent = self.stack.pop().unwrap(); parent.push_text(child); self.stack.push(parent); } fn append_html(&mut self, child: &str) { trace!("appended {:?}", child); let mut parent = self.stack.pop().unwrap(); parent.push_html(child); self.stack.push(parent); } } /// Errors from the `html` module. #[derive(Debug, thiserror::Error)] pub enum HtmlError { /// Input contains an attempt to use a definition list in /// Markdown. #[error("attempt to use definition lists in Markdown: line {0}, column {1}")] DefinitionList(usize, usize), }