diff options
author | Daniel Silverstone <dsilvers+gitlab@digital-scurf.org> | 2024-04-29 17:51:07 +0000 |
---|---|---|
committer | Daniel Silverstone <dsilvers+gitlab@digital-scurf.org> | 2024-04-29 17:51:07 +0000 |
commit | 789cb00bf01f8f9311a37f8f1cb71a8e5b2a62ea (patch) | |
tree | 1fdab7b5e668cc572d310f1f62c2211f36a4b44e /src | |
parent | 08cc51767573af3e98e5b594fc8978cd889f31fa (diff) | |
parent | f90f9e6de282159df40e5c720ba0b70bcff1ef5c (diff) | |
download | subplot-main.tar.gz |
refactor: move markdown-to-html parser into mdparse.rs
See merge request subplot/subplot!377
Diffstat (limited to 'src')
-rw-r--r-- | src/html.rs | 258 | ||||
-rw-r--r-- | src/lib.rs | 1 | ||||
-rw-r--r-- | src/md.rs | 3 | ||||
-rw-r--r-- | src/mdparse.rs | 250 |
4 files changed, 264 insertions, 248 deletions
diff --git a/src/html.rs b/src/html.rs index b76276b..9365eb9 100644 --- a/src/html.rs +++ b/src/html.rs @@ -3,11 +3,9 @@ #![deny(missing_docs)] use html_escape::{encode_double_quoted_attribute, encode_text}; -use line_col::LineColLookup; use log::{debug, trace}; -use pulldown_cmark::{CodeBlockKind, Event, HeadingLevel, Options, Parser, Tag}; use serde::{Deserialize, Serialize}; -use std::collections::{HashMap, HashSet}; +use std::collections::HashMap; use std::fmt::Write as _; use std::io::Write; use std::path::{Path, PathBuf}; @@ -78,160 +76,8 @@ impl HtmlPage { } } -/// Parse Markdown text into an HTML element. -pub fn parse(filename: &Path, markdown: &str) -> Result<Element, HtmlError> { - let mut options = Options::empty(); - options.insert(Options::ENABLE_HEADING_ATTRIBUTES); - options.insert(Options::ENABLE_STRIKETHROUGH); - options.insert(Options::ENABLE_TABLES); - options.insert(Options::ENABLE_TASKLISTS); - let p = Parser::new_ext(markdown, options).into_offset_iter(); - let linecol = LineColLookup::new(markdown); - let mut stack = Stack::new(); - stack.push(Element::new(ElementTag::Div)); - let mut slugs = Slugs::default(); - for (event, loc) in p { - trace!("event {:?}", event); - let (line, col) = linecol.get(loc.start); - let loc = Location::new(filename, line, col); - match event { - Event::Start(tag) => match tag { - Tag::Paragraph => stack.push_tag(ElementTag::P, loc), - Tag::Heading(level, id, classes) => { - let tag = match level { - HeadingLevel::H1 => ElementTag::H1, - HeadingLevel::H2 => ElementTag::H2, - HeadingLevel::H3 => ElementTag::H3, - HeadingLevel::H4 => ElementTag::H4, - HeadingLevel::H5 => ElementTag::H5, - HeadingLevel::H6 => ElementTag::H6, - }; - let mut h = Element::new(tag).with_location(loc); - if let Some(id) = id { - h.push_attribute(Attribute::new("id", id)); - slugs.remember(id); - } - if !classes.is_empty() { - let mut names = String::new(); - for c in classes { - if !names.is_empty() { - names.push(' '); - } - names.push_str(c); - } - h.push_attribute(Attribute::new("class", &names)); - } - stack.push(h); - } - Tag::BlockQuote => stack.push_tag(ElementTag::Blockquote, loc), - Tag::CodeBlock(kind) => { - stack.push_tag(ElementTag::Pre, loc); - if let CodeBlockKind::Fenced(attrs) = kind { - let mut e = stack.pop(); - e.set_block_attributes(BlockAttr::parse(&attrs)); - stack.push(e); - } - } - Tag::List(None) => stack.push_tag(ElementTag::Ul, loc), - Tag::List(Some(start)) => { - let mut e = Element::new(ElementTag::Ol).with_location(loc); - e.push_attribute(Attribute::new("start", &format!("{}", start))); - stack.push(e); - } - Tag::Item => stack.push_tag(ElementTag::Li, loc), - Tag::FootnoteDefinition(_) => unreachable!("{:?}", tag), - Tag::Table(_) => stack.push_tag(ElementTag::Table, loc), - Tag::TableHead => stack.push_tag(ElementTag::Th, loc), - Tag::TableRow => stack.push_tag(ElementTag::Tr, loc), - Tag::TableCell => stack.push_tag(ElementTag::Td, loc), - Tag::Emphasis => stack.push_tag(ElementTag::Em, loc), - Tag::Strong => stack.push_tag(ElementTag::Strong, loc), - Tag::Strikethrough => stack.push_tag(ElementTag::Del, loc), - Tag::Link(_, url, title) => { - let mut link = Element::new(ElementTag::A); - link.push_attribute(Attribute::new("href", url.as_ref())); - if !title.is_empty() { - link.push_attribute(Attribute::new("title", title.as_ref())); - } - stack.push(link); - } - Tag::Image(_, url, title) => { - let mut e = Element::new(ElementTag::Img); - e.push_attribute(Attribute::new("src", url.as_ref())); - e.push_attribute(Attribute::new("alt", title.as_ref())); - if !title.is_empty() { - e.push_attribute(Attribute::new("title", title.as_ref())); - } - stack.push(e); - } - }, - Event::End(tag) => match &tag { - Tag::Paragraph => { - trace!("at end of paragraph, looking for definition list use"); - let e = stack.pop(); - let s = as_plain_text(e.children()); - trace!("paragraph text: {:?}", s); - if s.starts_with(": ") || s.contains("\n: ") { - return Err(HtmlError::DefinitionList(loc)); - } - stack.append_child(Content::Elt(e)); - } - Tag::Heading(_, _, _) => { - let mut e = stack.pop(); - if e.attr("id").is_none() { - let slug = slugs.unique(&e.heading_slug()); - let id = Attribute::new("id", &slug); - e.push_attribute(id); - } - stack.append_child(Content::Elt(e)); - } - Tag::List(_) - | Tag::Item - | Tag::Link(_, _, _) - | Tag::Image(_, _, _) - | Tag::Emphasis - | Tag::Table(_) - | Tag::TableHead - | Tag::TableRow - | Tag::TableCell - | Tag::Strong - | Tag::Strikethrough - | Tag::BlockQuote - | Tag::CodeBlock(_) => { - let e = stack.pop(); - stack.append_child(Content::Elt(e)); - } - Tag::FootnoteDefinition(_) => unreachable!("{:?}", tag), - }, - Event::Text(s) => stack.append_str(s.as_ref()), - Event::Code(s) => { - let mut code = Element::new(ElementTag::Code); - code.push_child(Content::Text(s.to_string())); - stack.append_element(code); - } - Event::Html(s) => stack.append_child(Content::Html(s.to_string())), - Event::FootnoteReference(s) => trace!("footnote ref {:?}", s), - Event::SoftBreak => stack.append_str("\n"), - Event::HardBreak => stack.append_element(Element::new(ElementTag::Br)), - Event::Rule => stack.append_element(Element::new(ElementTag::Hr)), - Event::TaskListMarker(done) => { - let marker = if done { - "\u{2612} " // Unicode for box with X - } else { - "\u{2610} " // Unicode for empty box - }; - stack.append_str(marker); - } - } - } - - let mut body = stack.pop(); - assert!(stack.is_empty()); - body.fix_up_img_alt(); - Ok(body) -} - -fn as_plain_text(content: &[Content]) -> String { +/// Return text of a sequence of contents as a string. +pub fn as_plain_text(content: &[Content]) -> String { let mut buf = String::new(); for c in content { if let Content::Text(s) = c { @@ -261,7 +107,8 @@ impl Element { } } - fn with_location(mut self, loc: Location) -> Self { + /// Add location to an element. + pub fn with_location(mut self, loc: Location) -> Self { self.loc = Some(loc); self } @@ -280,7 +127,8 @@ impl Element { } } - fn set_block_attributes(&mut self, block_attrs: Vec<BlockAttr>) { + /// Set the block attributes for an element. + pub fn set_block_attributes(&mut self, block_attrs: Vec<BlockAttr>) { for block_attr in block_attrs { let attr = Attribute::from(block_attr); self.attrs.push(attr); @@ -328,7 +176,8 @@ impl Element { > 0 } - fn heading_slug(&self) -> String { + /// Compute a short name, called a slug, for a heading element. + pub fn heading_slug(&self) -> String { const SAFE: &str = "abcdefghijklmnopqrstuvwxyz"; let mut slug = String::new(); for s in self.content().to_lowercase().split_whitespace() { @@ -356,7 +205,8 @@ impl Element { &self.children } - fn fix_up_img_alt(&mut self) { + /// Try to add an alt attribute to an img element. + pub fn fix_up_img_alt(&mut self) { if self.tag == ElementTag::Img { if !self.attrs.iter().any(|a| a.name() == "alt") { let alt = as_plain_text(self.children()); @@ -699,50 +549,6 @@ impl std::fmt::Display for Location { } } -struct Stack { - stack: Vec<Element>, -} - -impl Stack { - fn new() -> Self { - Self { stack: vec![] } - } - - fn is_empty(&self) -> bool { - self.stack.is_empty() - } - - fn push(&mut self, e: Element) { - trace!("pushed {:?}", e); - self.stack.push(e); - } - - fn push_tag(&mut self, tag: ElementTag, loc: Location) { - self.push(Element::new(tag).with_location(loc)); - } - - fn pop(&mut self) -> Element { - let e = self.stack.pop().unwrap(); - trace!("popped {:?}", e); - e - } - - fn append_child(&mut self, child: Content) { - trace!("appended {:?}", child); - let mut parent = self.stack.pop().unwrap(); - parent.push_child(child); - self.stack.push(parent); - } - - fn append_str(&mut self, text: &str) { - self.append_child(Content::Text(text.into())); - } - - fn append_element(&mut self, e: Element) { - self.append_child(Content::Elt(e)); - } -} - /// Errors from the `html` module. #[derive(Debug, thiserror::Error)] pub enum HtmlError { @@ -874,45 +680,3 @@ mod test_block_attr { ); } } - -#[derive(Debug, Default)] -struct Slugs { - slugs: HashSet<String>, -} - -impl Slugs { - const MAX: usize = 8; - - fn remember(&mut self, slug: &str) { - self.slugs.insert(slug.into()); - } - - fn unique(&mut self, candidate: &str) -> String { - let slug = self.helper(candidate); - self.remember(&slug); - slug - } - - fn helper(&mut self, candidate: &str) -> String { - let mut slug0 = String::new(); - for c in candidate.chars() { - if slug0.len() >= Self::MAX { - break; - } - slug0.push(c); - } - - if !self.slugs.contains(&slug0) { - return slug0.to_string(); - } - - let mut i = 0; - loop { - i += 1; - let slug = format!("{}{}", slug0, i); - if !self.slugs.contains(&slug) { - return slug; - } - } - } -} @@ -29,6 +29,7 @@ pub use metadata::{Metadata, YamlMetadata}; mod doc; pub mod html; pub mod md; +pub mod mdparse; pub use doc::Document; pub use doc::{codegen, load_document, load_document_with_pullmark}; @@ -1,7 +1,8 @@ //! A parsed Markdown document. use crate::{ - html::{parse, Attribute, Content, Element, ElementTag, Location}, + html::{Attribute, Content, Element, ElementTag, Location}, + mdparse::parse, steps::parse_scenario_snippet, Bindings, EmbeddedFile, EmbeddedFiles, Scenario, Style, SubplotError, Warnings, }; diff --git a/src/mdparse.rs b/src/mdparse.rs new file mode 100644 index 0000000..e1c89c9 --- /dev/null +++ b/src/mdparse.rs @@ -0,0 +1,250 @@ +//! Parse markdown into an HTML representation. + +use std::{collections::HashSet, path::Path}; + +use line_col::LineColLookup; +use log::trace; +use pulldown_cmark::{CodeBlockKind, Event, HeadingLevel, Options, Parser, Tag}; + +use crate::html::{ + as_plain_text, Attribute, BlockAttr, Content, Element, ElementTag, HtmlError, Location, +}; + +/// Parse Markdown text into an HTML element. +pub fn parse(filename: &Path, markdown: &str) -> Result<Element, HtmlError> { + let mut options = Options::empty(); + options.insert(Options::ENABLE_HEADING_ATTRIBUTES); + options.insert(Options::ENABLE_STRIKETHROUGH); + options.insert(Options::ENABLE_TABLES); + options.insert(Options::ENABLE_TASKLISTS); + let p = Parser::new_ext(markdown, options).into_offset_iter(); + let linecol = LineColLookup::new(markdown); + let mut stack = Stack::new(); + stack.push(Element::new(ElementTag::Div)); + let mut slugs = Slugs::default(); + for (event, loc) in p { + trace!("event {:?}", event); + let (line, col) = linecol.get(loc.start); + let loc = Location::new(filename, line, col); + match event { + Event::Start(tag) => match tag { + Tag::Paragraph => stack.push_tag(ElementTag::P, loc), + Tag::Heading(level, id, classes) => { + let tag = match level { + HeadingLevel::H1 => ElementTag::H1, + HeadingLevel::H2 => ElementTag::H2, + HeadingLevel::H3 => ElementTag::H3, + HeadingLevel::H4 => ElementTag::H4, + HeadingLevel::H5 => ElementTag::H5, + HeadingLevel::H6 => ElementTag::H6, + }; + let mut h = Element::new(tag).with_location(loc); + if let Some(id) = id { + h.push_attribute(Attribute::new("id", id)); + slugs.remember(id); + } + if !classes.is_empty() { + let mut names = String::new(); + for c in classes { + if !names.is_empty() { + names.push(' '); + } + names.push_str(c); + } + h.push_attribute(Attribute::new("class", &names)); + } + stack.push(h); + } + Tag::BlockQuote => stack.push_tag(ElementTag::Blockquote, loc), + Tag::CodeBlock(kind) => { + stack.push_tag(ElementTag::Pre, loc); + if let CodeBlockKind::Fenced(attrs) = kind { + let mut e = stack.pop(); + e.set_block_attributes(BlockAttr::parse(&attrs)); + stack.push(e); + } + } + Tag::List(None) => stack.push_tag(ElementTag::Ul, loc), + Tag::List(Some(start)) => { + let mut e = Element::new(ElementTag::Ol).with_location(loc); + e.push_attribute(Attribute::new("start", &format!("{}", start))); + stack.push(e); + } + Tag::Item => stack.push_tag(ElementTag::Li, loc), + Tag::FootnoteDefinition(_) => unreachable!("{:?}", tag), + Tag::Table(_) => stack.push_tag(ElementTag::Table, loc), + Tag::TableHead => stack.push_tag(ElementTag::Th, loc), + Tag::TableRow => stack.push_tag(ElementTag::Tr, loc), + Tag::TableCell => stack.push_tag(ElementTag::Td, loc), + Tag::Emphasis => stack.push_tag(ElementTag::Em, loc), + Tag::Strong => stack.push_tag(ElementTag::Strong, loc), + Tag::Strikethrough => stack.push_tag(ElementTag::Del, loc), + Tag::Link(_, url, title) => { + let mut link = Element::new(ElementTag::A); + link.push_attribute(Attribute::new("href", url.as_ref())); + if !title.is_empty() { + link.push_attribute(Attribute::new("title", title.as_ref())); + } + stack.push(link); + } + Tag::Image(_, url, title) => { + let mut e = Element::new(ElementTag::Img); + e.push_attribute(Attribute::new("src", url.as_ref())); + e.push_attribute(Attribute::new("alt", title.as_ref())); + if !title.is_empty() { + e.push_attribute(Attribute::new("title", title.as_ref())); + } + stack.push(e); + } + }, + Event::End(tag) => match &tag { + Tag::Paragraph => { + trace!("at end of paragraph, looking for definition list use"); + let e = stack.pop(); + let s = as_plain_text(e.children()); + trace!("paragraph text: {:?}", s); + if s.starts_with(": ") || s.contains("\n: ") { + return Err(HtmlError::DefinitionList(loc)); + } + stack.append_child(Content::Elt(e)); + } + Tag::Heading(_, _, _) => { + let mut e = stack.pop(); + if e.attr("id").is_none() { + let slug = slugs.unique(&e.heading_slug()); + let id = Attribute::new("id", &slug); + e.push_attribute(id); + } + stack.append_child(Content::Elt(e)); + } + Tag::List(_) + | Tag::Item + | Tag::Link(_, _, _) + | Tag::Image(_, _, _) + | Tag::Emphasis + | Tag::Table(_) + | Tag::TableHead + | Tag::TableRow + | Tag::TableCell + | Tag::Strong + | Tag::Strikethrough + | Tag::BlockQuote + | Tag::CodeBlock(_) => { + let e = stack.pop(); + stack.append_child(Content::Elt(e)); + } + Tag::FootnoteDefinition(_) => unreachable!("{:?}", tag), + }, + Event::Text(s) => stack.append_str(s.as_ref()), + Event::Code(s) => { + let mut code = Element::new(ElementTag::Code); + code.push_child(Content::Text(s.to_string())); + stack.append_element(code); + } + Event::Html(s) => stack.append_child(Content::Html(s.to_string())), + Event::FootnoteReference(s) => trace!("footnote ref {:?}", s), + Event::SoftBreak => stack.append_str("\n"), + Event::HardBreak => stack.append_element(Element::new(ElementTag::Br)), + Event::Rule => stack.append_element(Element::new(ElementTag::Hr)), + Event::TaskListMarker(done) => { + let marker = if done { + "\u{2612} " // Unicode for box with X + } else { + "\u{2610} " // Unicode for empty box + }; + stack.append_str(marker); + } + } + } + + let mut body = stack.pop(); + assert!(stack.is_empty()); + body.fix_up_img_alt(); + Ok(body) +} + +struct Stack { + stack: Vec<Element>, +} + +impl Stack { + fn new() -> Self { + Self { stack: vec![] } + } + + fn is_empty(&self) -> bool { + self.stack.is_empty() + } + + fn push(&mut self, e: Element) { + trace!("pushed {:?}", e); + self.stack.push(e); + } + + fn push_tag(&mut self, tag: ElementTag, loc: Location) { + self.push(Element::new(tag).with_location(loc)); + } + + fn pop(&mut self) -> Element { + let e = self.stack.pop().unwrap(); + trace!("popped {:?}", e); + e + } + + fn append_child(&mut self, child: Content) { + trace!("appended {:?}", child); + let mut parent = self.stack.pop().unwrap(); + parent.push_child(child); + self.stack.push(parent); + } + + fn append_str(&mut self, text: &str) { + self.append_child(Content::Text(text.into())); + } + + fn append_element(&mut self, e: Element) { + self.append_child(Content::Elt(e)); + } +} + +#[derive(Debug, Default)] +struct Slugs { + slugs: HashSet<String>, +} + +impl Slugs { + const MAX: usize = 8; + + fn remember(&mut self, slug: &str) { + self.slugs.insert(slug.into()); + } + + fn unique(&mut self, candidate: &str) -> String { + let slug = self.helper(candidate); + self.remember(&slug); + slug + } + + fn helper(&mut self, candidate: &str) -> String { + let mut slug0 = String::new(); + for c in candidate.chars() { + if slug0.len() >= Self::MAX { + break; + } + slug0.push(c); + } + + if !self.slugs.contains(&slug0) { + return slug0.to_string(); + } + + let mut i = 0; + loop { + i += 1; + let slug = format!("{}{}", slug0, i); + if !self.slugs.contains(&slug) { + return slug; + } + } + } +} |