summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorDaniel Silverstone <dsilvers+gitlab@digital-scurf.org>2024-04-29 17:51:07 +0000
committerDaniel Silverstone <dsilvers+gitlab@digital-scurf.org>2024-04-29 17:51:07 +0000
commit789cb00bf01f8f9311a37f8f1cb71a8e5b2a62ea (patch)
tree1fdab7b5e668cc572d310f1f62c2211f36a4b44e /src
parent08cc51767573af3e98e5b594fc8978cd889f31fa (diff)
parentf90f9e6de282159df40e5c720ba0b70bcff1ef5c (diff)
downloadsubplot-main.tar.gz
Merge branch 'mdparse' into 'main'HEADmain
refactor: move markdown-to-html parser into mdparse.rs See merge request subplot/subplot!377
Diffstat (limited to 'src')
-rw-r--r--src/html.rs258
-rw-r--r--src/lib.rs1
-rw-r--r--src/md.rs3
-rw-r--r--src/mdparse.rs250
4 files changed, 264 insertions, 248 deletions
diff --git a/src/html.rs b/src/html.rs
index b76276b..9365eb9 100644
--- a/src/html.rs
+++ b/src/html.rs
@@ -3,11 +3,9 @@
#![deny(missing_docs)]
use html_escape::{encode_double_quoted_attribute, encode_text};
-use line_col::LineColLookup;
use log::{debug, trace};
-use pulldown_cmark::{CodeBlockKind, Event, HeadingLevel, Options, Parser, Tag};
use serde::{Deserialize, Serialize};
-use std::collections::{HashMap, HashSet};
+use std::collections::HashMap;
use std::fmt::Write as _;
use std::io::Write;
use std::path::{Path, PathBuf};
@@ -78,160 +76,8 @@ impl HtmlPage {
}
}
-/// Parse Markdown text into an HTML element.
-pub fn parse(filename: &Path, markdown: &str) -> Result<Element, HtmlError> {
- let mut options = Options::empty();
- options.insert(Options::ENABLE_HEADING_ATTRIBUTES);
- options.insert(Options::ENABLE_STRIKETHROUGH);
- options.insert(Options::ENABLE_TABLES);
- options.insert(Options::ENABLE_TASKLISTS);
- let p = Parser::new_ext(markdown, options).into_offset_iter();
- let linecol = LineColLookup::new(markdown);
- let mut stack = Stack::new();
- stack.push(Element::new(ElementTag::Div));
- let mut slugs = Slugs::default();
- for (event, loc) in p {
- trace!("event {:?}", event);
- let (line, col) = linecol.get(loc.start);
- let loc = Location::new(filename, line, col);
- match event {
- Event::Start(tag) => match tag {
- Tag::Paragraph => stack.push_tag(ElementTag::P, loc),
- Tag::Heading(level, id, classes) => {
- let tag = match level {
- HeadingLevel::H1 => ElementTag::H1,
- HeadingLevel::H2 => ElementTag::H2,
- HeadingLevel::H3 => ElementTag::H3,
- HeadingLevel::H4 => ElementTag::H4,
- HeadingLevel::H5 => ElementTag::H5,
- HeadingLevel::H6 => ElementTag::H6,
- };
- let mut h = Element::new(tag).with_location(loc);
- if let Some(id) = id {
- h.push_attribute(Attribute::new("id", id));
- slugs.remember(id);
- }
- if !classes.is_empty() {
- let mut names = String::new();
- for c in classes {
- if !names.is_empty() {
- names.push(' ');
- }
- names.push_str(c);
- }
- h.push_attribute(Attribute::new("class", &names));
- }
- stack.push(h);
- }
- Tag::BlockQuote => stack.push_tag(ElementTag::Blockquote, loc),
- Tag::CodeBlock(kind) => {
- stack.push_tag(ElementTag::Pre, loc);
- if let CodeBlockKind::Fenced(attrs) = kind {
- let mut e = stack.pop();
- e.set_block_attributes(BlockAttr::parse(&attrs));
- stack.push(e);
- }
- }
- Tag::List(None) => stack.push_tag(ElementTag::Ul, loc),
- Tag::List(Some(start)) => {
- let mut e = Element::new(ElementTag::Ol).with_location(loc);
- e.push_attribute(Attribute::new("start", &format!("{}", start)));
- stack.push(e);
- }
- Tag::Item => stack.push_tag(ElementTag::Li, loc),
- Tag::FootnoteDefinition(_) => unreachable!("{:?}", tag),
- Tag::Table(_) => stack.push_tag(ElementTag::Table, loc),
- Tag::TableHead => stack.push_tag(ElementTag::Th, loc),
- Tag::TableRow => stack.push_tag(ElementTag::Tr, loc),
- Tag::TableCell => stack.push_tag(ElementTag::Td, loc),
- Tag::Emphasis => stack.push_tag(ElementTag::Em, loc),
- Tag::Strong => stack.push_tag(ElementTag::Strong, loc),
- Tag::Strikethrough => stack.push_tag(ElementTag::Del, loc),
- Tag::Link(_, url, title) => {
- let mut link = Element::new(ElementTag::A);
- link.push_attribute(Attribute::new("href", url.as_ref()));
- if !title.is_empty() {
- link.push_attribute(Attribute::new("title", title.as_ref()));
- }
- stack.push(link);
- }
- Tag::Image(_, url, title) => {
- let mut e = Element::new(ElementTag::Img);
- e.push_attribute(Attribute::new("src", url.as_ref()));
- e.push_attribute(Attribute::new("alt", title.as_ref()));
- if !title.is_empty() {
- e.push_attribute(Attribute::new("title", title.as_ref()));
- }
- stack.push(e);
- }
- },
- Event::End(tag) => match &tag {
- Tag::Paragraph => {
- trace!("at end of paragraph, looking for definition list use");
- let e = stack.pop();
- let s = as_plain_text(e.children());
- trace!("paragraph text: {:?}", s);
- if s.starts_with(": ") || s.contains("\n: ") {
- return Err(HtmlError::DefinitionList(loc));
- }
- stack.append_child(Content::Elt(e));
- }
- Tag::Heading(_, _, _) => {
- let mut e = stack.pop();
- if e.attr("id").is_none() {
- let slug = slugs.unique(&e.heading_slug());
- let id = Attribute::new("id", &slug);
- e.push_attribute(id);
- }
- stack.append_child(Content::Elt(e));
- }
- Tag::List(_)
- | Tag::Item
- | Tag::Link(_, _, _)
- | Tag::Image(_, _, _)
- | Tag::Emphasis
- | Tag::Table(_)
- | Tag::TableHead
- | Tag::TableRow
- | Tag::TableCell
- | Tag::Strong
- | Tag::Strikethrough
- | Tag::BlockQuote
- | Tag::CodeBlock(_) => {
- let e = stack.pop();
- stack.append_child(Content::Elt(e));
- }
- Tag::FootnoteDefinition(_) => unreachable!("{:?}", tag),
- },
- Event::Text(s) => stack.append_str(s.as_ref()),
- Event::Code(s) => {
- let mut code = Element::new(ElementTag::Code);
- code.push_child(Content::Text(s.to_string()));
- stack.append_element(code);
- }
- Event::Html(s) => stack.append_child(Content::Html(s.to_string())),
- Event::FootnoteReference(s) => trace!("footnote ref {:?}", s),
- Event::SoftBreak => stack.append_str("\n"),
- Event::HardBreak => stack.append_element(Element::new(ElementTag::Br)),
- Event::Rule => stack.append_element(Element::new(ElementTag::Hr)),
- Event::TaskListMarker(done) => {
- let marker = if done {
- "\u{2612} " // Unicode for box with X
- } else {
- "\u{2610} " // Unicode for empty box
- };
- stack.append_str(marker);
- }
- }
- }
-
- let mut body = stack.pop();
- assert!(stack.is_empty());
- body.fix_up_img_alt();
- Ok(body)
-}
-
-fn as_plain_text(content: &[Content]) -> String {
+/// Return text of a sequence of contents as a string.
+pub fn as_plain_text(content: &[Content]) -> String {
let mut buf = String::new();
for c in content {
if let Content::Text(s) = c {
@@ -261,7 +107,8 @@ impl Element {
}
}
- fn with_location(mut self, loc: Location) -> Self {
+ /// Add location to an element.
+ pub fn with_location(mut self, loc: Location) -> Self {
self.loc = Some(loc);
self
}
@@ -280,7 +127,8 @@ impl Element {
}
}
- fn set_block_attributes(&mut self, block_attrs: Vec<BlockAttr>) {
+ /// Set the block attributes for an element.
+ pub fn set_block_attributes(&mut self, block_attrs: Vec<BlockAttr>) {
for block_attr in block_attrs {
let attr = Attribute::from(block_attr);
self.attrs.push(attr);
@@ -328,7 +176,8 @@ impl Element {
> 0
}
- fn heading_slug(&self) -> String {
+ /// Compute a short name, called a slug, for a heading element.
+ pub fn heading_slug(&self) -> String {
const SAFE: &str = "abcdefghijklmnopqrstuvwxyz";
let mut slug = String::new();
for s in self.content().to_lowercase().split_whitespace() {
@@ -356,7 +205,8 @@ impl Element {
&self.children
}
- fn fix_up_img_alt(&mut self) {
+ /// Try to add an alt attribute to an img element.
+ pub fn fix_up_img_alt(&mut self) {
if self.tag == ElementTag::Img {
if !self.attrs.iter().any(|a| a.name() == "alt") {
let alt = as_plain_text(self.children());
@@ -699,50 +549,6 @@ impl std::fmt::Display for Location {
}
}
-struct Stack {
- stack: Vec<Element>,
-}
-
-impl Stack {
- fn new() -> Self {
- Self { stack: vec![] }
- }
-
- fn is_empty(&self) -> bool {
- self.stack.is_empty()
- }
-
- fn push(&mut self, e: Element) {
- trace!("pushed {:?}", e);
- self.stack.push(e);
- }
-
- fn push_tag(&mut self, tag: ElementTag, loc: Location) {
- self.push(Element::new(tag).with_location(loc));
- }
-
- fn pop(&mut self) -> Element {
- let e = self.stack.pop().unwrap();
- trace!("popped {:?}", e);
- e
- }
-
- fn append_child(&mut self, child: Content) {
- trace!("appended {:?}", child);
- let mut parent = self.stack.pop().unwrap();
- parent.push_child(child);
- self.stack.push(parent);
- }
-
- fn append_str(&mut self, text: &str) {
- self.append_child(Content::Text(text.into()));
- }
-
- fn append_element(&mut self, e: Element) {
- self.append_child(Content::Elt(e));
- }
-}
-
/// Errors from the `html` module.
#[derive(Debug, thiserror::Error)]
pub enum HtmlError {
@@ -874,45 +680,3 @@ mod test_block_attr {
);
}
}
-
-#[derive(Debug, Default)]
-struct Slugs {
- slugs: HashSet<String>,
-}
-
-impl Slugs {
- const MAX: usize = 8;
-
- fn remember(&mut self, slug: &str) {
- self.slugs.insert(slug.into());
- }
-
- fn unique(&mut self, candidate: &str) -> String {
- let slug = self.helper(candidate);
- self.remember(&slug);
- slug
- }
-
- fn helper(&mut self, candidate: &str) -> String {
- let mut slug0 = String::new();
- for c in candidate.chars() {
- if slug0.len() >= Self::MAX {
- break;
- }
- slug0.push(c);
- }
-
- if !self.slugs.contains(&slug0) {
- return slug0.to_string();
- }
-
- let mut i = 0;
- loop {
- i += 1;
- let slug = format!("{}{}", slug0, i);
- if !self.slugs.contains(&slug) {
- return slug;
- }
- }
- }
-}
diff --git a/src/lib.rs b/src/lib.rs
index 2f55ede..702330a 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -29,6 +29,7 @@ pub use metadata::{Metadata, YamlMetadata};
mod doc;
pub mod html;
pub mod md;
+pub mod mdparse;
pub use doc::Document;
pub use doc::{codegen, load_document, load_document_with_pullmark};
diff --git a/src/md.rs b/src/md.rs
index b8f9beb..09fe880 100644
--- a/src/md.rs
+++ b/src/md.rs
@@ -1,7 +1,8 @@
//! A parsed Markdown document.
use crate::{
- html::{parse, Attribute, Content, Element, ElementTag, Location},
+ html::{Attribute, Content, Element, ElementTag, Location},
+ mdparse::parse,
steps::parse_scenario_snippet,
Bindings, EmbeddedFile, EmbeddedFiles, Scenario, Style, SubplotError, Warnings,
};
diff --git a/src/mdparse.rs b/src/mdparse.rs
new file mode 100644
index 0000000..e1c89c9
--- /dev/null
+++ b/src/mdparse.rs
@@ -0,0 +1,250 @@
+//! Parse markdown into an HTML representation.
+
+use std::{collections::HashSet, path::Path};
+
+use line_col::LineColLookup;
+use log::trace;
+use pulldown_cmark::{CodeBlockKind, Event, HeadingLevel, Options, Parser, Tag};
+
+use crate::html::{
+ as_plain_text, Attribute, BlockAttr, Content, Element, ElementTag, HtmlError, Location,
+};
+
+/// Parse Markdown text into an HTML element.
+pub fn parse(filename: &Path, markdown: &str) -> Result<Element, HtmlError> {
+ let mut options = Options::empty();
+ options.insert(Options::ENABLE_HEADING_ATTRIBUTES);
+ options.insert(Options::ENABLE_STRIKETHROUGH);
+ options.insert(Options::ENABLE_TABLES);
+ options.insert(Options::ENABLE_TASKLISTS);
+ let p = Parser::new_ext(markdown, options).into_offset_iter();
+ let linecol = LineColLookup::new(markdown);
+ let mut stack = Stack::new();
+ stack.push(Element::new(ElementTag::Div));
+ let mut slugs = Slugs::default();
+ for (event, loc) in p {
+ trace!("event {:?}", event);
+ let (line, col) = linecol.get(loc.start);
+ let loc = Location::new(filename, line, col);
+ match event {
+ Event::Start(tag) => match tag {
+ Tag::Paragraph => stack.push_tag(ElementTag::P, loc),
+ Tag::Heading(level, id, classes) => {
+ let tag = match level {
+ HeadingLevel::H1 => ElementTag::H1,
+ HeadingLevel::H2 => ElementTag::H2,
+ HeadingLevel::H3 => ElementTag::H3,
+ HeadingLevel::H4 => ElementTag::H4,
+ HeadingLevel::H5 => ElementTag::H5,
+ HeadingLevel::H6 => ElementTag::H6,
+ };
+ let mut h = Element::new(tag).with_location(loc);
+ if let Some(id) = id {
+ h.push_attribute(Attribute::new("id", id));
+ slugs.remember(id);
+ }
+ if !classes.is_empty() {
+ let mut names = String::new();
+ for c in classes {
+ if !names.is_empty() {
+ names.push(' ');
+ }
+ names.push_str(c);
+ }
+ h.push_attribute(Attribute::new("class", &names));
+ }
+ stack.push(h);
+ }
+ Tag::BlockQuote => stack.push_tag(ElementTag::Blockquote, loc),
+ Tag::CodeBlock(kind) => {
+ stack.push_tag(ElementTag::Pre, loc);
+ if let CodeBlockKind::Fenced(attrs) = kind {
+ let mut e = stack.pop();
+ e.set_block_attributes(BlockAttr::parse(&attrs));
+ stack.push(e);
+ }
+ }
+ Tag::List(None) => stack.push_tag(ElementTag::Ul, loc),
+ Tag::List(Some(start)) => {
+ let mut e = Element::new(ElementTag::Ol).with_location(loc);
+ e.push_attribute(Attribute::new("start", &format!("{}", start)));
+ stack.push(e);
+ }
+ Tag::Item => stack.push_tag(ElementTag::Li, loc),
+ Tag::FootnoteDefinition(_) => unreachable!("{:?}", tag),
+ Tag::Table(_) => stack.push_tag(ElementTag::Table, loc),
+ Tag::TableHead => stack.push_tag(ElementTag::Th, loc),
+ Tag::TableRow => stack.push_tag(ElementTag::Tr, loc),
+ Tag::TableCell => stack.push_tag(ElementTag::Td, loc),
+ Tag::Emphasis => stack.push_tag(ElementTag::Em, loc),
+ Tag::Strong => stack.push_tag(ElementTag::Strong, loc),
+ Tag::Strikethrough => stack.push_tag(ElementTag::Del, loc),
+ Tag::Link(_, url, title) => {
+ let mut link = Element::new(ElementTag::A);
+ link.push_attribute(Attribute::new("href", url.as_ref()));
+ if !title.is_empty() {
+ link.push_attribute(Attribute::new("title", title.as_ref()));
+ }
+ stack.push(link);
+ }
+ Tag::Image(_, url, title) => {
+ let mut e = Element::new(ElementTag::Img);
+ e.push_attribute(Attribute::new("src", url.as_ref()));
+ e.push_attribute(Attribute::new("alt", title.as_ref()));
+ if !title.is_empty() {
+ e.push_attribute(Attribute::new("title", title.as_ref()));
+ }
+ stack.push(e);
+ }
+ },
+ Event::End(tag) => match &tag {
+ Tag::Paragraph => {
+ trace!("at end of paragraph, looking for definition list use");
+ let e = stack.pop();
+ let s = as_plain_text(e.children());
+ trace!("paragraph text: {:?}", s);
+ if s.starts_with(": ") || s.contains("\n: ") {
+ return Err(HtmlError::DefinitionList(loc));
+ }
+ stack.append_child(Content::Elt(e));
+ }
+ Tag::Heading(_, _, _) => {
+ let mut e = stack.pop();
+ if e.attr("id").is_none() {
+ let slug = slugs.unique(&e.heading_slug());
+ let id = Attribute::new("id", &slug);
+ e.push_attribute(id);
+ }
+ stack.append_child(Content::Elt(e));
+ }
+ Tag::List(_)
+ | Tag::Item
+ | Tag::Link(_, _, _)
+ | Tag::Image(_, _, _)
+ | Tag::Emphasis
+ | Tag::Table(_)
+ | Tag::TableHead
+ | Tag::TableRow
+ | Tag::TableCell
+ | Tag::Strong
+ | Tag::Strikethrough
+ | Tag::BlockQuote
+ | Tag::CodeBlock(_) => {
+ let e = stack.pop();
+ stack.append_child(Content::Elt(e));
+ }
+ Tag::FootnoteDefinition(_) => unreachable!("{:?}", tag),
+ },
+ Event::Text(s) => stack.append_str(s.as_ref()),
+ Event::Code(s) => {
+ let mut code = Element::new(ElementTag::Code);
+ code.push_child(Content::Text(s.to_string()));
+ stack.append_element(code);
+ }
+ Event::Html(s) => stack.append_child(Content::Html(s.to_string())),
+ Event::FootnoteReference(s) => trace!("footnote ref {:?}", s),
+ Event::SoftBreak => stack.append_str("\n"),
+ Event::HardBreak => stack.append_element(Element::new(ElementTag::Br)),
+ Event::Rule => stack.append_element(Element::new(ElementTag::Hr)),
+ Event::TaskListMarker(done) => {
+ let marker = if done {
+ "\u{2612} " // Unicode for box with X
+ } else {
+ "\u{2610} " // Unicode for empty box
+ };
+ stack.append_str(marker);
+ }
+ }
+ }
+
+ let mut body = stack.pop();
+ assert!(stack.is_empty());
+ body.fix_up_img_alt();
+ Ok(body)
+}
+
+struct Stack {
+ stack: Vec<Element>,
+}
+
+impl Stack {
+ fn new() -> Self {
+ Self { stack: vec![] }
+ }
+
+ fn is_empty(&self) -> bool {
+ self.stack.is_empty()
+ }
+
+ fn push(&mut self, e: Element) {
+ trace!("pushed {:?}", e);
+ self.stack.push(e);
+ }
+
+ fn push_tag(&mut self, tag: ElementTag, loc: Location) {
+ self.push(Element::new(tag).with_location(loc));
+ }
+
+ fn pop(&mut self) -> Element {
+ let e = self.stack.pop().unwrap();
+ trace!("popped {:?}", e);
+ e
+ }
+
+ fn append_child(&mut self, child: Content) {
+ trace!("appended {:?}", child);
+ let mut parent = self.stack.pop().unwrap();
+ parent.push_child(child);
+ self.stack.push(parent);
+ }
+
+ fn append_str(&mut self, text: &str) {
+ self.append_child(Content::Text(text.into()));
+ }
+
+ fn append_element(&mut self, e: Element) {
+ self.append_child(Content::Elt(e));
+ }
+}
+
+#[derive(Debug, Default)]
+struct Slugs {
+ slugs: HashSet<String>,
+}
+
+impl Slugs {
+ const MAX: usize = 8;
+
+ fn remember(&mut self, slug: &str) {
+ self.slugs.insert(slug.into());
+ }
+
+ fn unique(&mut self, candidate: &str) -> String {
+ let slug = self.helper(candidate);
+ self.remember(&slug);
+ slug
+ }
+
+ fn helper(&mut self, candidate: &str) -> String {
+ let mut slug0 = String::new();
+ for c in candidate.chars() {
+ if slug0.len() >= Self::MAX {
+ break;
+ }
+ slug0.push(c);
+ }
+
+ if !self.slugs.contains(&slug0) {
+ return slug0.to_string();
+ }
+
+ let mut i = 0;
+ loop {
+ i += 1;
+ let slug = format!("{}{}", slug0, i);
+ if !self.slugs.contains(&slug) {
+ return slug;
+ }
+ }
+ }
+}