diff options
author | Lars Wirzenius <liw@liw.fi> | 2023-04-05 18:51:14 +0300 |
---|---|---|
committer | Lars Wirzenius <liw@liw.fi> | 2023-04-08 16:51:58 +0300 |
commit | 81a5a3d01b814afe773837a20f1d01bb586193e7 (patch) | |
tree | c2b9abf30df0322a1f8677c030a6852a9ef5451a | |
parent | 14cc8a66d199cab7b9407ee27a4fd352ba3baf38 (diff) | |
download | subplot-81a5a3d01b814afe773837a20f1d01bb586193e7.tar.gz |
feat! convert markdown parsing to use pulldown_cmark
This doesn't compile yet.
Sponsored-by: author
-rw-r--r-- | src/error.rs | 14 | ||||
-rw-r--r-- | src/md.rs | 550 | ||||
-rw-r--r-- | src/md/panhelper.rs | 26 | ||||
-rw-r--r-- | src/md/typeset.rs | 229 | ||||
-rw-r--r-- | src/md/visitor/block_class.rs | 25 | ||||
-rw-r--r-- | src/md/visitor/embedded.rs | 35 | ||||
-rw-r--r-- | src/md/visitor/image.rs | 25 | ||||
-rw-r--r-- | src/md/visitor/linting.rs | 40 | ||||
-rw-r--r-- | src/md/visitor/mod.rs | 17 | ||||
-rw-r--r-- | src/md/visitor/structure.rs | 100 | ||||
-rw-r--r-- | src/md/visitor/typesetting.rs | 85 |
11 files changed, 405 insertions, 741 deletions
diff --git a/src/error.rs b/src/error.rs index 2469a5b..9409fcf 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,4 +1,6 @@ +use crate::html::HtmlError; use crate::matches::MatchedSteps; +use crate::md::MdError; use std::path::PathBuf; use std::process::Output; @@ -287,6 +289,10 @@ pub enum SubplotError { #[error("Pandoc failed")] Pandoc(#[source] pandoc::PandocError), + /// Error parsing markdown into HTML. + #[error(transparent)] + ParseMarkdown(#[from] HtmlError), + /// Regular expression error /// /// Subplot uses regular expressions. This is a generic wrapper for @@ -318,6 +324,10 @@ pub enum SubplotError { #[error(transparent)] Utf8Error(#[from] std::str::Utf8Error), + /// Markdown errors. + #[error(transparent)] + MdError(#[from] MdError), + /// String formatting failed. #[error("Failed in string formattiing: {0}")] StringFormat(std::fmt::Error), @@ -329,6 +339,10 @@ pub enum SubplotError { /// Input file mtime lookup. #[error("Failed to get modification time of {0}")] InputFileMtime(PathBuf, #[source] std::io::Error), + + /// Error typesetting a roadmap diagram. + #[error(transparent)] + Roadmap(#[from] roadmap::RoadmapError), } impl SubplotError { @@ -1,26 +1,19 @@ //! A parsed Markdown document. use crate::{ - parse_scenario_snippet, Bindings, EmbeddedFiles, Scenario, ScenarioStep, Style, SubplotError, - Warnings, YamlMetadata, + html::{parse, Attribute, Content, Element, ElementTag}, + parse_scenario_snippet, Bindings, EmbeddedFile, EmbeddedFiles, Scenario, ScenarioStep, Style, + SubplotError, Warnings, YamlMetadata, }; use log::trace; -use pandoc_ast::{Map, MetaValue, MutVisitor, Pandoc}; -use serde_yaml::{Mapping, Value}; -use std::cell::RefCell; use std::collections::HashSet; use std::path::{Path, PathBuf}; -mod panhelper; -mod typeset; - -mod visitor; -use visitor::LintingVisitor; - /// A parsed Markdown document. #[derive(Debug)] pub struct Markdown { - pandoc: RefCell<Pandoc>, + html: Element, + meta: Option<YamlMetadata>, } impl Markdown { @@ -34,96 +27,143 @@ impl Markdown { } fn new_from_str(text: &str) -> Result<Self, SubplotError> { - let mut pandoc = pandoc::new(); - pandoc.set_input(pandoc::InputKind::Pipe(text.into())); - pandoc.set_input_format( - pandoc::InputFormat::Markdown, - vec![pandoc::MarkdownExtension::Citations], - ); - pandoc.set_output_format(pandoc::OutputFormat::Json, vec![]); - pandoc.set_output(pandoc::OutputKind::Pipe); + let html = parse(text)?; + Ok(Self::new(html)) + } - // Add external Pandoc filters. - crate::policy::add_citeproc(&mut pandoc); + fn new(html: Element) -> Self { + Self { html, meta: None } + } - let json = match pandoc.execute().map_err(SubplotError::Pandoc)? { - pandoc::PandocOutput::ToBuffer(o) => o, - _ => return Err(SubplotError::NotJson), - }; + /// Set document metadata from subplot. + pub fn set_metadata(&mut self, meta: &YamlMetadata) { + self.meta = Some(meta.clone()); + } - let ast: Pandoc = serde_json::from_str(&json).map_err(SubplotError::AstJson)?; - Ok(Self::new(ast)) + /// Set date. + pub fn set_date(&mut self, date: String) { + if let Some(meta) = &mut self.meta { + meta.set_date(date); + } } - fn new(pandoc: Pandoc) -> Self { - Self { - pandoc: RefCell::new(pandoc), + /// Return parsed HTML of the markdown. + pub fn to_html(&self) -> Element { + if let Some(meta) = &self.meta { + let mut div = Element::new(ElementTag::Div); + div.push_child(Content::Elt(Self::title(meta.title()))); + if let Some(authors) = meta.authors() { + div.push_child(Content::Elt(Self::authors(authors))); + } + if let Some(date) = meta.date() { + div.push_child(Content::Elt(Self::date(date))); + } + div.push_child(Content::Elt(self.html.clone())); + div + } else { + self.html.clone() } } - /// Set document metadata from subplot. - pub fn set_metadata(&mut self, meta: &YamlMetadata) { - self.pandoc.borrow_mut().meta = to_pandoc_meta(meta); + fn title(title: &str) -> Element { + let mut e = Element::new(ElementTag::H1); + e.push_child(Content::Text(title.into())); + e } - /// JSON representation of Pandoc AST. - pub fn to_json(&self) -> Result<String, SubplotError> { - let json = serde_json::to_string(&self.pandoc).map_err(SubplotError::AstJson)?; - Ok(json) + fn authors(authors: &[String]) -> Element { + let mut list = Element::new(ElementTag::P); + list.push_child(Content::Text("By: ".into())); + let mut first = true; + for a in authors { + if !first { + list.push_child(Content::Text(", ".into())); + } + list.push_child(Content::Text(a.into())); + first = false; + } + list } - /// Find problems. - pub fn lint(&self) -> Vec<SubplotError> { - let mut linter = LintingVisitor::default(); - linter.walk_pandoc(&mut self.pandoc.borrow_mut()); - linter.issues + fn date(date: &str) -> Element { + let mut e = Element::new(ElementTag::P); + e.push_child(Content::Text(date.into())); + e } /// Find included images. pub fn images(&self) -> Vec<PathBuf> { let mut names = vec![]; - let mut visitor = visitor::ImageVisitor::new(); - visitor.walk_pandoc(&mut self.pandoc.borrow_mut()); - for x in visitor.images().iter() { - names.push(x.to_path_buf()); + for e in Self::visit(&self.html) { + if e.tag() == ElementTag::Img { + if let Some(attr) = e.attr("src") { + if let Some(href) = attr.value() { + names.push(PathBuf::from(&href)); + } + } + } } names } + fn visit(e: &Element) -> Vec<&Element> { + let mut elements = vec![]; + Self::visit_helper(e, &mut elements); + elements + } + + fn visit_helper<'a>(e: &'a Element, elements: &mut Vec<&'a Element>) { + elements.push(e); + for child in e.children() { + if let Content::Elt(ee) = child { + Self::visit_helper(ee, elements); + } + } + } + /// Find classes used for fenced blocks. pub fn block_classes(&self) -> HashSet<String> { - let mut visitor = visitor::BlockClassVisitor::default(); - visitor.walk_pandoc(&mut self.pandoc.borrow_mut()); - visitor.classes + let mut classes: HashSet<String> = HashSet::new(); + + for e in Self::visit(&self.html) { + if e.tag() == ElementTag::Pre { + if let Some(attr) = e.attr("class") { + if let Some(value) = attr.value() { + classes.insert(value.into()); + } + } + } + } + + classes } /// Typeset. - pub fn typeset(&mut self, style: Style, bindings: &Bindings) -> Warnings { - let mut visitor = visitor::TypesettingVisitor::new(style, bindings); - visitor.walk_pandoc(&mut self.pandoc.borrow_mut()); - visitor.warnings() + pub fn typeset(&mut self, _style: Style, _bindings: &Bindings) -> Warnings { + let result = typeset::typeset_element(&self.html); + if let Ok(html) = result { + self.html = html; + Warnings::default() + } else { + // FIXME: handle warnings in some way + Warnings::default() + } } /// Find scenarios. pub fn scenarios(&self) -> Result<Vec<Scenario>, SubplotError> { - trace!( - "Metadata::scenarios: looking for scenarios: {:#?}", - self.pandoc - ); - - let mut visitor = visitor::StructureVisitor::new(); - visitor.walk_pandoc(&mut self.pandoc.borrow_mut()); - trace!( - "Metadata::scenarios: visitor found {} elements: {:#?}", - visitor.elements.len(), - visitor.elements - ); + let mut elements = vec![]; + for e in Self::visit(&self.html) { + if let Some(se) = Self::is_structure_element(e) { + elements.push(se); + } + } let mut scenarios: Vec<Scenario> = vec![]; let mut i = 0; - while i < visitor.elements.len() { - let (maybe, new_i) = extract_scenario(&visitor.elements[i..])?; + while i < elements.len() { + let (maybe, new_i) = extract_scenario(&elements[i..])?; if let Some(scen) = maybe { scenarios.push(scen); } @@ -133,128 +173,151 @@ impl Markdown { Ok(scenarios) } + fn is_structure_element(e: &Element) -> Option<StructureElement> { + match e.tag() { + ElementTag::H1 => Some(StructureElement::heading(e, 1)), + ElementTag::H2 => Some(StructureElement::heading(e, 2)), + ElementTag::H3 => Some(StructureElement::heading(e, 3)), + ElementTag::H4 => Some(StructureElement::heading(e, 4)), + ElementTag::H5 => Some(StructureElement::heading(e, 5)), + ElementTag::H6 => Some(StructureElement::heading(e, 6)), + ElementTag::Pre => { + if e.has_attr("class", "scenario") { + Some(StructureElement::snippet(e)) + } else { + None + } + } + _ => None, + } + } + /// Find embedded files. + // FIXME: this should return a result pub fn embedded_files(&self) -> EmbeddedFiles { let mut files = EmbeddedFiles::default(); - files.walk_pandoc(&mut self.pandoc.borrow_mut()); + + for e in Self::visit(&self.html) { + if let Ok(file) = embedded_file(e) { + files.push(file); + } + } + files } } -fn to_pandoc_meta(yaml: &YamlMetadata) -> Map<String, MetaValue> { - trace!("Creating metadata map from parsed YAML: {:#?}", yaml); - - let mut map: Map<String, MetaValue> = Map::new(); +// A structure element in the document: a heading or a scenario snippet. +#[derive(Debug)] +enum StructureElement { + // Headings consist of the text and the level of the heading. + Heading(String, i64), - map.insert("title".into(), meta_string(yaml.title())); + // Scenario snippets consist just of the unparsed text. + Snippet(String), +} - if let Some(v) = &yaml.subtitle() { - map.insert("subtitle".into(), meta_string(v)); +impl StructureElement { + fn heading(e: &Element, level: i64) -> Self { + Self::Heading(e.content(), level) } - if let Some(authors) = yaml.authors() { - let authors: Vec<MetaValue> = authors - .iter() - .map(|s| MetaValue::MetaString(s.into())) - .collect(); - map.insert("author".into(), MetaValue::MetaList(authors)); + fn snippet(e: &Element) -> Self { + Self::Snippet(e.content()) } +} - if let Some(v) = yaml.date() { - map.insert("date".into(), meta_string(v)); +fn embedded_file(e: &Element) -> Result<EmbeddedFile, MdError> { + if e.tag() != ElementTag::Pre { + return Err(MdError::NotCodeBlockElement(e.tag().name().to_string())); } - if let Some(classes) = yaml.classes() { - map.insert("classes".into(), meta_strings(classes)); + if !e.has_attr("class", "file") { + return Err(MdError::NotFile); } - if !yaml.impls().is_empty() { - let impls = yaml - .impls() - .iter() - .map(|(k, v)| (k.to_owned(), Box::new(meta_path_bufs(v)))) - .collect(); - map.insert("impls".into(), MetaValue::MetaMap(impls)); + let id = e.attr("id"); + if id.is_none() { + return Err(MdError::NoId); } - - if let Some(v) = yaml.bibliographies() { - map.insert("bibliography".into(), meta_path_bufs(v)); + let id = id.unwrap(); + if id.value().is_none() { + return Err(MdError::NoIdValue); } - - if let Some(v) = yaml.bindings_filenames() { - map.insert("bindings".into(), meta_path_bufs(v)); + let id = id.value().unwrap(); + if id.is_empty() { + return Err(MdError::NoIdValue); } - if let Some(v) = yaml.documentclass() { - map.insert("documentclass".into(), meta_string(v)); + // The contents we get from the pulldown_cmark parser for a code + // block will always end in a newline, unless the block is empty. + // This is different from the parser we previously used, which + // didn't end in a newline, if the contents is exactly one line. + // The add-newline attribute was designed for the previous parser + // behavior, and so its interpretations for new new parser is a + // little less straightforward. To avoid convoluted logic, we + // remove the newline if it's there before obeying add-newline. + let mut contents = e.content(); + if contents.ends_with('\n') { + contents.truncate(contents.len() - 1); } - - if let Some(pandoc) = yaml.pandoc() { - for (key, value) in pandoc.iter() { - map.insert(key.to_string(), value_to_pandoc(value)); + match AddNewline::parse(e.attr("add-newline"))? { + AddNewline::No => { + // Newline already isn't there. } - } + AddNewline::Yes => { + // Add newline. + contents.push('\n'); + } + AddNewline::Auto => { + // Add newline if not there. + if !contents.ends_with('\n') { + contents.push('\n'); + } + } + }; - trace!("Created metadata map from parsed YAML"); - map + Ok(EmbeddedFile::new(id.into(), contents)) } -fn mapping_to_pandoc(mapping: &Mapping) -> MetaValue { - let mut map = Map::new(); - for (key, value) in mapping.iter() { - let key = if let MetaValue::MetaString(s) = value_to_pandoc(key) { - s - } else { - panic!("key not a string: {:?}", key); - }; - map.insert(key, Box::new(value_to_pandoc(value))); - } - - MetaValue::MetaMap(map) +#[derive(Debug, Eq, PartialEq, Copy, Clone)] +enum AddNewline { + Auto, + Yes, + No, } -fn value_to_pandoc(data: &Value) -> MetaValue { - match data { - Value::Null => unreachable!("null not OK"), - Value::Number(_) => unreachable!("number not OK"), - Value::Sequence(_) => unreachable!("sequence not OK"), - - Value::Bool(b) => MetaValue::MetaBool(*b), - Value::String(s) => MetaValue::MetaString(s.clone()), - Value::Mapping(mapping) => mapping_to_pandoc(mapping), +impl AddNewline { + fn parse(attr: Option<&Attribute>) -> Result<Self, MdError> { + if let Some(attr) = attr { + if let Some(value) = attr.value() { + let value = match value { + "yes" => Self::Yes, + "no" => Self::No, + "auto" => Self::Auto, + _ => return Err(MdError::BadAddNewline(value.into())), + }; + return Ok(value); + } + }; + Ok(Self::Auto) } } -fn meta_string(s: &str) -> MetaValue { - MetaValue::MetaString(s.to_string()) -} - -fn meta_strings(v: &[String]) -> MetaValue { - MetaValue::MetaList(v.iter().map(|s| meta_string(s)).collect()) -} - -fn meta_path_buf(p: &Path) -> MetaValue { - meta_string(&p.display().to_string()) -} - -fn meta_path_bufs(v: &[PathBuf]) -> MetaValue { - MetaValue::MetaList(v.iter().map(|p| meta_path_buf(p)).collect()) -} - -fn extract_scenario(e: &[visitor::Element]) -> Result<(Option<Scenario>, usize), SubplotError> { +fn extract_scenario(e: &[StructureElement]) -> Result<(Option<Scenario>, usize), SubplotError> { if e.is_empty() { // If we get here, it's a programming error. panic!("didn't expect empty list of elements"); } match &e[0] { - visitor::Element::Snippet(_) => Err(SubplotError::ScenarioBeforeHeading), - visitor::Element::Heading(title, level) => { + StructureElement::Snippet(_) => Err(SubplotError::ScenarioBeforeHeading), + StructureElement::Heading(title, level) => { let mut scen = Scenario::new(title); let mut prevkind = None; for (i, item) in e.iter().enumerate().skip(1) { match item { - visitor::Element::Heading(_, level2) => { + StructureElement::Heading(_, level2) => { let is_subsection = *level2 > *level; if is_subsection { if scen.has_steps() { @@ -267,7 +330,7 @@ fn extract_scenario(e: &[visitor::Element]) -> Result<(Option<Scenario>, usize), return Ok((None, i)); } } - visitor::Element::Snippet(text) => { + StructureElement::Snippet(text) => { for line in parse_scenario_snippet(text) { let step = ScenarioStep::new_from_str(line, prevkind)?; scen.add(&step); @@ -285,19 +348,148 @@ fn extract_scenario(e: &[visitor::Element]) -> Result<(Option<Scenario>, usize), } } +mod typeset { + + use crate::html::{Attribute, Content, Element, ElementTag}; + // use crate::parser::parse_scenario_snippet; + // use crate::Bindings; + // use crate::PartialStep; + // use crate::ScenarioStep; + // use crate::StepKind; + use crate::SubplotError; + use crate::{DiagramMarkup, DotMarkup, PikchrMarkup, PlantumlMarkup, Svg}; + // use crate::{Warning, Warnings}; + + pub(crate) fn typeset_element(e: &Element) -> Result<Element, SubplotError> { + match e.tag() { + ElementTag::Pre if e.has_attr("class", "scenario") => typeset_scenario(e), + ElementTag::Pre if e.has_attr("class", "file") => typeset_file(e), + ElementTag::Pre if e.has_attr("class", "example") => typeset_example(e), + ElementTag::Pre if e.has_attr("class", "dot") => typeset_dot(e), + ElementTag::Pre if e.has_attr("class", "plantuml") => typeset_plantuml(e), + ElementTag::Pre if e.has_attr("class", "roadmap") => typeset_roadmap(e), + ElementTag::Pre if e.has_attr("class", "pikchr") => typeset_pikchr(e), + _ => { + let mut new = Element::new(e.tag()); + for attr in e.all_attrs() { + new.push_attribute(attr.clone()); + } + for child in e.children() { + if let Content::Elt(ce) = child { + new.push_child(Content::Elt(typeset_element(ce)?)); + } else { + new.push_child(child.clone()); + } + } + Ok(new) + } + } + } + + fn typeset_scenario(e: &Element) -> Result<Element, SubplotError> { + Ok(e.clone()) // FIXME + } + + fn typeset_file(e: &Element) -> Result<Element, SubplotError> { + Ok(e.clone()) // FIXME + } + + fn typeset_example(e: &Element) -> Result<Element, SubplotError> { + Ok(e.clone()) // FIXME + } + + fn typeset_dot(e: &Element) -> Result<Element, SubplotError> { + let dot = e.content(); + let svg = DotMarkup::new(&dot).as_svg()?; + Ok(svg_to_element(svg)) + } + + fn typeset_plantuml(e: &Element) -> Result<Element, SubplotError> { + let markup = e.content(); + let svg = PlantumlMarkup::new(&markup).as_svg()?; + Ok(svg_to_element(svg)) + } + + fn typeset_pikchr(e: &Element) -> Result<Element, SubplotError> { + let markup = e.content(); + // FIXME: is there ever a need to use classes other than .pikchr? + let svg = PikchrMarkup::new(&markup, None).as_svg()?; + Ok(svg_to_element(svg)) + } + + fn typeset_roadmap(e: &Element) -> Result<Element, SubplotError> { + const WIDTH: usize = 50; + + let yaml = e.content(); + let roadmap = roadmap::from_yaml(&yaml)?; + let dot = roadmap.format_as_dot(WIDTH)?; + let svg = DotMarkup::new(&dot).as_svg()?; + Ok(svg_to_element(svg)) + } + + fn svg_to_element(svg: Svg) -> Element { + let url = svg_as_data_url(svg); + let img = html_img(&url); + html_p(vec![Content::Elt(img)]) + } + + fn svg_as_data_url(svg: Svg) -> String { + let svg = base64::encode(svg.data()); + format!("data:image/svg+xml;base64,{svg}") + } + + fn html_p(children: Vec<Content>) -> Element { + let mut new = Element::new(ElementTag::P); + for child in children { + new.push_child(child); + } + new + } + + fn html_img(src: &str) -> Element { + let mut new = Element::new(ElementTag::Img); + new.push_attribute(Attribute::new("src", src)); + new + } +} + +/// Errors returned from the module. +#[derive(Debug, thiserror::Error, Eq, PartialEq)] +pub enum MdError { + /// Trie to treat a non-PRE element as an embedded file. + #[error("tried to treat wrong element as an embedded file: {0}")] + NotCodeBlockElement(String), + + /// Code block lacks the "file" attribute. + #[error("code block is not a file")] + NotFile, + + /// Code block lacks an identifile to use as th filename. + #[error("code block lacks a filename identifier")] + NoId, + + /// Identifier is empty. + #[error("code block has an empty filename identifier")] + NoIdValue, + + /// Value ofv add-newline attribute ie not understood. + #[error("value of add-newline attirubte is not understood: {0}")] + BadAddNewline(String), +} + #[cfg(test)] mod test_extract { use super::extract_scenario; - use super::visitor::Element; + use super::StructureElement; use crate::Scenario; use crate::SubplotError; - fn h(title: &str, level: i64) -> Element { - Element::Heading(title.to_string(), level) + fn h(title: &str, level: i64) -> StructureElement { + StructureElement::Heading(title.to_string(), level) } - fn s(text: &str) -> Element { - Element::Snippet(text.to_string()) + fn s(text: &str) -> StructureElement { + StructureElement::Snippet(text.to_string()) } fn check_result( @@ -319,7 +511,7 @@ mod test_extract { #[test] fn returns_nothing_if_there_is_no_scenario() { - let elements: Vec<Element> = vec![h("title", 1)]; + let elements: Vec<StructureElement> = vec![h("title", 1)]; let r = extract_scenario(&elements); check_result(r, None, 1); } @@ -396,14 +588,13 @@ mod test_extract { #[cfg(test)] mod test { - use super::Markdown; + use super::{AddNewline, Attribute, Markdown, MdError}; use std::path::PathBuf; #[test] fn loads_empty_doc() { let md = Markdown::new_from_str("").unwrap(); - let ast = md.pandoc.borrow(); - assert!(ast.blocks.is_empty()); + assert!(md.html.content().is_empty()); } #[test] @@ -507,4 +698,45 @@ hello, world assert_eq!(file.filename(), "fileid"); assert_eq!(file.contents(), "hello, world\n"); } + + #[test] + fn parses_no_auto_newline_as_auto() { + assert_eq!(AddNewline::parse(None).unwrap(), AddNewline::Auto); + } + + #[test] + fn parses_auto_as_auto() { + let attr = Attribute::new("add-newline", "auto"); + assert_eq!(AddNewline::parse(Some(&attr)).unwrap(), AddNewline::Auto); + } + + #[test] + fn parses_yes_as_yes() { + let attr = Attribute::new("add-newline", "yes"); + assert_eq!(AddNewline::parse(Some(&attr)).unwrap(), AddNewline::Yes); + } + + #[test] + fn parses_no_as_no() { + let attr = Attribute::new("add-newline", "no"); + assert_eq!(AddNewline::parse(Some(&attr)).unwrap(), AddNewline::No); + } + + #[test] + fn parses_empty_as_error() { + let attr = Attribute::new("add-newline", ""); + assert_eq!( + AddNewline::parse(Some(&attr)), + Err(MdError::BadAddNewline("".into())) + ); + } + + #[test] + fn parses_garbage_as_error() { + let attr = Attribute::new("add-newline", "garbage"); + assert_eq!( + AddNewline::parse(Some(&attr)), + Err(MdError::BadAddNewline("garbage".into())) + ); + } } diff --git a/src/md/panhelper.rs b/src/md/panhelper.rs deleted file mode 100644 index f7ab801..0000000 --- a/src/md/panhelper.rs +++ /dev/null @@ -1,26 +0,0 @@ -use pandoc_ast::Attr; - -/// Is a code block marked as being of a given type? -pub fn is_class(attr: &Attr, class: &str) -> bool { - let (_id, classes, _kvpairs) = attr; - classes.iter().any(|s| s == class) -} - -/// Utility function to find key/value pairs from an attribute -pub fn find_attr_kv<'a>(attr: &'a Attr, key: &'static str) -> impl Iterator<Item = &'a str> { - attr.2.iter().flat_map(move |(key_, value)| { - if key == key_ { - Some(value.as_ref()) - } else { - None - } - }) -} - -/// Get the filename for a fenced code block tagged .file. -/// -/// The filename is the first (and presumably only) identifier for the -/// block. -pub fn get_filename(attr: &Attr) -> String { - attr.0.to_string() -} diff --git a/src/md/typeset.rs b/src/md/typeset.rs deleted file mode 100644 index aa6528d..0000000 --- a/src/md/typeset.rs +++ /dev/null @@ -1,229 +0,0 @@ -use crate::parser::parse_scenario_snippet; -use crate::Bindings; -use crate::PartialStep; -use crate::ScenarioStep; -use crate::StepKind; -use crate::SubplotError; -use crate::{DiagramMarkup, DotMarkup, PikchrMarkup, PlantumlMarkup, Svg}; -use crate::{Warning, Warnings}; - -use pandoc_ast::Attr; -use pandoc_ast::Block; -use pandoc_ast::Inline; -use pandoc_ast::Target; - -/// Typeset an error as a Pandoc AST Block element. -pub fn error(err: SubplotError) -> Block { - let msg = format!("ERROR: {err}"); - Block::Para(error_msg(&msg)) -} - -/// Typeset an error message a vector of inlines. -pub fn error_msg(msg: &str) -> Vec<Inline> { - vec![Inline::Strong(vec![inlinestr(msg)])] -} - -/// Typeset a string as an inline element. -pub fn inlinestr(s: &str) -> Inline { - Inline::Str(String::from(s)) -} - -/// Typeset a code block tagged as a file. -pub fn file_block(attr: &Attr, text: &str) -> Block { - let filename = inlinestr(&attr.0); - let filename = Inline::Strong(vec![filename]); - let intro = Block::Para(vec![inlinestr("File:"), space(), filename]); - let mut cbattrs = attr.clone(); - if cbattrs.1.iter().any(|s| s == "noNumberLines") { - // If the block says "noNumberLines" we remove that class - cbattrs.1.retain(|s| s != "noNumberLines"); - } else if cbattrs.1.iter().all(|s| s != "numberLines") { - // Otherwise if it doesn't say numberLines we add that in. - cbattrs.1.push("numberLines".to_string()); - } - // If this was an `example`, convert that class to `file` - if cbattrs.1.iter().any(|s| s == "example") { - cbattrs.1.retain(|s| s != "example"); - cbattrs.1.push("file".into()); - } - let codeblock = Block::CodeBlock(cbattrs, text.to_string()); - let noattr = ("".to_string(), vec![], vec![]); - Block::Div(noattr, vec![intro, codeblock]) -} - -/// Typeset a scenario snippet as a Pandoc AST Block. -/// -/// Typesetting here means producing the Pandoc abstract syntax tree -/// nodes that result in the desired output, when Pandoc processes -/// them. -/// -/// The snippet is given as a text string, which is parsed. It need -/// not be a complete scenario, but it should consist of complete steps. -pub fn scenario_snippet(bindings: &Bindings, snippet: &str, warnings: &mut Warnings) -> Block { - let lines = parse_scenario_snippet(snippet); - let mut steps = vec![]; - let mut prevkind: Option<StepKind> = None; - - for line in lines { - let (this, thiskind) = step(bindings, line, prevkind, warnings); - steps.push(this); - prevkind = thiskind; - } - Block::LineBlock(steps) -} - -// Typeset a single scenario step as a sequence of Pandoc AST Inlines. -fn step( - bindings: &Bindings, - text: &str, - prevkind: Option<StepKind>, - warnings: &mut Warnings, -) -> (Vec<Inline>, Option<StepKind>) { - let step = ScenarioStep::new_from_str(text, prevkind); - if step.is_err() { - return ( - error_msg(&format!("Could not parse step: {text}")), - prevkind, - ); - } - let step = step.unwrap(); - - let m = match bindings.find("", &step) { - Ok(m) => m, - Err(e) => { - let w = Warning::UnknownBinding(format!("{e}")); - warnings.push(w.clone()); - return (error_msg(&format!("{w}")), prevkind); - } - }; - - let mut inlines = vec![keyword(&step, prevkind), space()]; - - for part in m.parts() { - match part { - PartialStep::UncapturedText(s) => inlines.push(uncaptured(s.text())), - PartialStep::CapturedText { text, .. } => inlines.push(captured(text)), - } - } - - (inlines, Some(step.kind())) -} - -// Typeset first word, which is assumed to be a keyword, of a scenario -// step. -fn keyword(step: &ScenarioStep, prevkind: Option<StepKind>) -> Inline { - let actual = inlinestr(&format!("{}", step.kind())); - let and = inlinestr("and"); - let keyword = if let Some(prevkind) = prevkind { - if prevkind == step.kind() { - and - } else { - actual - } - } else { - actual - }; - Inline::Emph(vec![keyword]) -} - -// Typeset a space between words. -fn space() -> Inline { - Inline::Space -} - -// Typeset an uncaptured part of a step. -fn uncaptured(s: &str) -> Inline { - inlinestr(s) -} - -// Typeset a captured part of a step. -fn captured(s: &str) -> Inline { - Inline::Strong(vec![inlinestr(s)]) -} - -/// Typeset a link as a note. -pub fn link_as_note(attr: Attr, text: Vec<Inline>, target: Target) -> Inline { - let (url, _) = target.clone(); - let url = Inline::Code(attr.clone(), url); - let link = Inline::Link(attr.clone(), vec![url], target); - let note = Inline::Note(vec![Block::Para(vec![link])]); - let mut text = text; - text.push(note); - Inline::Span(attr, text) -} - -/// Take a pikchr diagram, render it as SVG, and return an AST block element. -/// -/// The `Block` will contain the SVG data. This allows the diagram to -/// be rendered without referencing external entities. -/// -/// If the code block which contained the pikchr contains other classes, they -/// can be added to the SVG for use in later typesetting etc. -pub fn pikchr_to_block(pikchr: &str, class: Option<&str>, warnings: &mut Warnings) -> Block { - match PikchrMarkup::new(pikchr, class).as_svg() { - Ok(svg) => typeset_svg(svg), - Err(err) => { - warnings.push(Warning::Pikchr(format!("{err}"))); - error(err) - } - } -} - -// Take a dot diagram, render it as SVG, and return an AST Block -// element. The Block will contain the SVG data. This allows the -// diagram to be rendered without referending external entities. -pub fn dot_to_block(dot: &str, warnings: &mut Warnings) -> Block { - match DotMarkup::new(dot).as_svg() { - Ok(svg) => typeset_svg(svg), - Err(err) => { - warnings.push(Warning::Dot(format!("{err}"))); - error(err) - } - } -} - -// Take a PlantUML diagram, render it as SVG, and return an AST Block -// element. The Block will contain the SVG data. This allows the -// diagram to be rendered without referending external entities. -pub fn plantuml_to_block(markup: &str, warnings: &mut Warnings) -> Block { - match PlantumlMarkup::new(markup).as_svg() { - Ok(svg) => typeset_svg(svg), - Err(err) => { - warnings.push(Warning::Plantuml(format!("{err}"))); - error(err) - } - } -} - -/// Typeset a project roadmap expressed as textual YAML, and render it -/// as an SVG image. -pub fn roadmap_to_block(yaml: &str, warnings: &mut Warnings) -> Block { - match roadmap::from_yaml(yaml) { - Ok(ref mut roadmap) => { - roadmap.set_missing_statuses(); - let width = 50; - match roadmap.format_as_dot(width) { - Ok(dot) => dot_to_block(&dot, warnings), - Err(e) => Block::Para(vec![inlinestr(&e.to_string())]), - } - } - Err(e) => Block::Para(vec![inlinestr(&e.to_string())]), - } -} - -// Typeset an SVG, represented as a byte vector, as a Pandoc AST Block -// element. -fn typeset_svg(svg: Svg) -> Block { - let url = svg_as_data_url(svg); - let attr = ("".to_string(), vec![], vec![]); - let img = Inline::Image(attr, vec![], (url, "".to_string())); - Block::Para(vec![img]) -} - -// Convert an SVG, represented as a byte vector, into a data: URL, -// which can be inlined so the image can be rendered without -// referencing external files. -fn svg_as_data_url(svg: Svg) -> String { - let svg = base64::encode(svg.data()); - format!("data:image/svg+xml;base64,{svg}") -} diff --git a/src/md/visitor/block_class.rs b/src/md/visitor/block_class.rs deleted file mode 100644 index 303616b..0000000 --- a/src/md/visitor/block_class.rs +++ /dev/null @@ -1,25 +0,0 @@ -use std::collections::HashSet; - -use pandoc_ast::{Block, MutVisitor}; - -#[derive(Default)] -pub struct BlockClassVisitor { - pub classes: HashSet<String>, -} - -impl MutVisitor for BlockClassVisitor { - fn visit_vec_block(&mut self, vec_block: &mut Vec<Block>) { - for block in vec_block { - match block { - Block::CodeBlock(attr, _) => { - for class in &attr.1 { - self.classes.insert(class.to_string()); - } - } - _ => { - self.visit_block(block); - } - } - } - } -} diff --git a/src/md/visitor/embedded.rs b/src/md/visitor/embedded.rs deleted file mode 100644 index 68a4118..0000000 --- a/src/md/visitor/embedded.rs +++ /dev/null @@ -1,35 +0,0 @@ -use crate::md::panhelper; -use crate::EmbeddedFile; -use crate::EmbeddedFiles; - -use pandoc_ast::{Block, MutVisitor}; - -impl MutVisitor for EmbeddedFiles { - fn visit_vec_block(&mut self, vec_block: &mut Vec<Block>) { - use panhelper::is_class; - for block in vec_block { - match block { - Block::CodeBlock(attr, contents) => { - if is_class(attr, "file") { - let add_newline = match panhelper::find_attr_kv(attr, "add-newline").next() - { - None | Some("auto") => !contents.ends_with('\n'), - Some("yes") => true, - Some("no") => false, - _ => unreachable!(), - }; - let contents = if add_newline { - format!("{contents}\n") - } else { - contents.clone() - }; - self.push(EmbeddedFile::new(panhelper::get_filename(attr), contents)); - } - } - _ => { - self.visit_block(block); - } - } - } - } -} diff --git a/src/md/visitor/image.rs b/src/md/visitor/image.rs deleted file mode 100644 index be49d66..0000000 --- a/src/md/visitor/image.rs +++ /dev/null @@ -1,25 +0,0 @@ -use std::path::PathBuf; - -use pandoc_ast::{Inline, MutVisitor}; - -pub struct ImageVisitor { - images: Vec<PathBuf>, -} - -impl ImageVisitor { - pub fn new() -> Self { - ImageVisitor { images: vec![] } - } - - pub fn images(&self) -> Vec<PathBuf> { - self.images.clone() - } -} - -impl MutVisitor for ImageVisitor { - fn visit_inline(&mut self, inline: &mut Inline) { - if let Inline::Image(_attr, _inlines, target) = inline { - self.images.push(PathBuf::from(&target.0)); - } - } -} diff --git a/src/md/visitor/linting.rs b/src/md/visitor/linting.rs deleted file mode 100644 index d64b03e..0000000 --- a/src/md/visitor/linting.rs +++ /dev/null @@ -1,40 +0,0 @@ -use crate::md::panhelper; -use crate::SubplotError; - -use pandoc_ast::{Block, MutVisitor}; - -#[derive(Default)] -pub struct LintingVisitor { - pub issues: Vec<SubplotError>, -} - -impl MutVisitor for LintingVisitor { - fn visit_vec_block(&mut self, vec_block: &mut Vec<Block>) { - for block in vec_block { - match block { - Block::CodeBlock(attr, _) => { - if panhelper::is_class(attr, "file") || panhelper::is_class(attr, "example") { - let newlines: Vec<_> = - panhelper::find_attr_kv(attr, "add-newline").collect(); - match newlines.len() { - 0 => {} - 1 => match newlines[0].to_ascii_lowercase().as_ref() { - "auto" | "yes" | "no" => {} - _ => self.issues.push(SubplotError::UnrecognisedAddNewline( - panhelper::get_filename(attr), - newlines[0].to_owned(), - )), - }, - _ => self.issues.push(SubplotError::RepeatedAddNewlineAttribute( - panhelper::get_filename(attr), - )), - } - } - } - _ => { - self.visit_block(block); - } - } - } - } -} diff --git a/src/md/visitor/mod.rs b/src/md/visitor/mod.rs deleted file mode 100644 index 1c095ac..0000000 --- a/src/md/visitor/mod.rs +++ /dev/null @@ -1,17 +0,0 @@ -mod block_class; -pub use block_class::BlockClassVisitor; - -mod embedded; - -mod image; -pub use image::ImageVisitor; - -mod linting; -pub use linting::LintingVisitor; - -mod structure; -pub use structure::Element; -pub use structure::StructureVisitor; - -mod typesetting; -pub use typesetting::TypesettingVisitor; diff --git a/src/md/visitor/structure.rs b/src/md/visitor/structure.rs deleted file mode 100644 index d8faef6..0000000 --- a/src/md/visitor/structure.rs +++ /dev/null @@ -1,100 +0,0 @@ -use crate::md::panhelper; - -use pandoc_ast::{Block, Inline, MutVisitor}; - -// A structure element in the document: a heading or a scenario snippet. -#[derive(Debug)] -pub enum Element { - // Headings consist of the text and the level of the heading. - Heading(String, i64), - - // Scenario snippets consist just of the unparsed text. - Snippet(String), -} - -impl Element { - pub fn heading(text: &str, level: i64) -> Element { - Element::Heading(text.to_string(), level) - } - - pub fn snippet(text: &str) -> Element { - Element::Snippet(text.to_string()) - } -} - -// A MutVisitor for extracting document structure. -pub struct StructureVisitor { - pub elements: Vec<Element>, -} - -impl StructureVisitor { - pub fn new() -> Self { - Self { elements: vec![] } - } -} - -impl MutVisitor for StructureVisitor { - fn visit_vec_block(&mut self, vec_block: &mut Vec<Block>) { - use panhelper::is_class; - for block in vec_block { - match block { - Block::Header(level, _attr, inlines) => { - let text = join(inlines); - let heading = Element::heading(&text, *level); - self.elements.push(heading); - } - Block::CodeBlock(attr, s) => { - if is_class(attr, "scenario") { - let snippet = Element::snippet(s); - self.elements.push(snippet); - } - } - _ => { - self.visit_block(block); - } - } - } - } -} - -fn join(vec: &[Inline]) -> String { - let mut buf = String::new(); - join_into_buffer(vec, &mut buf); - buf -} - -fn join_into_buffer(vec: &[Inline], buf: &mut String) { - for item in vec { - match item { - Inline::Str(s) => buf.push_str(s), - Inline::Emph(v) => join_into_buffer(v, buf), - Inline::Strong(v) => join_into_buffer(v, buf), - Inline::Strikeout(v) => join_into_buffer(v, buf), - Inline::Superscript(v) => join_into_buffer(v, buf), - Inline::Subscript(v) => join_into_buffer(v, buf), - Inline::SmallCaps(v) => join_into_buffer(v, buf), - Inline::Quoted(qt, v) => { - let q = match qt { - pandoc_ast::QuoteType::SingleQuote => "'", - pandoc_ast::QuoteType::DoubleQuote => "\"", - }; - buf.push_str(q); - join_into_buffer(v, buf); - buf.push_str(q); - } - Inline::Cite(_, v) => join_into_buffer(v, buf), - Inline::Code(_attr, s) => buf.push_str(s), - Inline::Space => buf.push(' '), - Inline::SoftBreak => buf.push(' '), - Inline::LineBreak => buf.push(' '), - Inline::Math(_, s) => buf.push_str(s), - Inline::RawInline(_, s) => buf.push_str(s), - Inline::Link(_, v, _) => join_into_buffer(v, buf), - Inline::Image(_, v, _) => join_into_buffer(v, buf), - Inline::Note(_) => buf.push_str(""), - Inline::Span(_attr, v) => join_into_buffer(v, buf), - #[cfg(feature = "pandoc_ast_08")] - Inline::Underline(v) => join_into_buffer(v, buf), - } - } -} diff --git a/src/md/visitor/typesetting.rs b/src/md/visitor/typesetting.rs deleted file mode 100644 index 2405c03..0000000 --- a/src/md/visitor/typesetting.rs +++ /dev/null @@ -1,85 +0,0 @@ -use crate::md::panhelper; -use crate::md::typeset; -use crate::{Bindings, Style, Warnings}; - -use pandoc_ast::{Block, Inline, MutVisitor}; - -/// Visitor for the pandoc AST. -/// -/// This includes rendering stuff which we find as we go -pub struct TypesettingVisitor<'a> { - style: Style, - bindings: &'a Bindings, - warnings: Warnings, -} - -impl<'a> TypesettingVisitor<'a> { - pub fn new(style: Style, bindings: &'a Bindings) -> Self { - TypesettingVisitor { - style, - bindings, - warnings: Warnings::default(), - } - } - - pub fn warnings(self) -> Warnings { - self.warnings - } -} - -// Visit interesting parts of the Pandoc abstract syntax tree. The -// document top level is a vector of blocks and we visit that and -// replace any fenced code block with the scenario tag with a typeset -// paragraph. Also, replace fenced code blocks with known diagram -// markup with the rendered SVG image. -impl<'a> MutVisitor for TypesettingVisitor<'a> { - fn visit_vec_block(&mut self, vec_block: &mut Vec<Block>) { - use panhelper::is_class; - for block in vec_block { - match block { - Block::CodeBlock(attr, s) => { - if is_class(attr, "scenario") { - *block = typeset::scenario_snippet(self.bindings, s, &mut self.warnings) - } else if is_class(attr, "file") || is_class(attr, "example") { - *block = typeset::file_block(attr, s) - } else if is_class(attr, "dot") { - *block = typeset::dot_to_block(s, &mut self.warnings) - } else if is_class(attr, "plantuml") { - *block = typeset::plantuml_to_block(s, &mut self.warnings) - } else if is_class(attr, "roadmap") { - *block = typeset::roadmap_to_block(s, &mut self.warnings) - } else if is_class(attr, "pikchr") { - let other_classes: Vec<_> = attr - .1 - .iter() - .map(String::as_str) - .filter(|s| *s != "pikchr") - .collect(); - let class = if other_classes.is_empty() { - None - } else { - Some(other_classes.join(" ")) - }; - let class = class.as_deref(); - *block = typeset::pikchr_to_block(s, class, &mut self.warnings) - } - } - _ => { - self.visit_block(block); - } - } - } - } - fn visit_vec_inline(&mut self, vec_inline: &mut Vec<Inline>) { - for inline in vec_inline { - match inline { - Inline::Link(attr, vec, target) if self.style.links_as_notes() => { - *inline = typeset::link_as_note(attr.clone(), vec.to_vec(), target.clone()); - } - _ => { - self.visit_inline(inline); - } - } - } - } -} |