//! A parsed Markdown document. use crate::{ html::{Attribute, Content, Element, ElementTag, Location}, mdparse::parse, steps::parse_scenario_snippet, Bindings, EmbeddedFile, EmbeddedFiles, Scenario, Style, SubplotError, Warnings, }; use log::trace; use std::collections::HashSet; use std::path::{Path, PathBuf}; /// A parsed Markdown document. #[derive(Debug)] pub struct Markdown { html: Element, } impl Markdown { /// Load a Markdown file. pub fn load_file(filename: &Path) -> Result { trace!("parsing file as markdown: {}", filename.display()); let text = std::fs::read(filename) .map_err(|e| SubplotError::InputFileUnreadable(filename.into(), e))?; let text = std::str::from_utf8(&text).map_err(SubplotError::Utf8Error)?; Self::new_from_str(filename, text) } fn new_from_str(filename: &Path, text: &str) -> Result { let html = parse(filename, text)?; Ok(Self::new(html)) } fn new(html: Element) -> Self { Self { html } } /// Return root element of markdown. pub fn root_element(&self) -> &Element { &self.html } /// Find included images. pub fn images(&self) -> Vec { let mut names = vec![]; for e in Self::visit(&self.html) { if e.tag() == ElementTag::Img { if let Some(attr) = e.attr("src") { if let Some(href) = attr.value() { names.push(PathBuf::from(&href)); } } } } names } /// Turn an element tree into a flat vector. pub fn visit(e: &Element) -> Vec<&Element> { let mut elements = vec![]; Self::visit_helper(e, &mut elements); elements } fn visit_helper<'a>(e: &'a Element, elements: &mut Vec<&'a Element>) { elements.push(e); for child in e.children() { if let Content::Elt(ee) = child { Self::visit_helper(ee, elements); } } } /// Find classes used for fenced blocks. pub fn block_classes(&self) -> HashSet { let mut classes: HashSet = HashSet::new(); for e in Self::visit(&self.html) { if e.tag() == ElementTag::Pre { if let Some(attr) = e.attr("class") { if let Some(value) = attr.value() { classes.insert(value.into()); } } } } classes } /// Typeset. pub fn typeset( &mut self, _style: Style, template: Option<&str>, bindings: &Bindings, ) -> Warnings { let result = typeset::typeset_element(&self.html, template, bindings); if let Ok(html) = result { self.html = html; Warnings::default() } else { // FIXME: handle warnings in some way Warnings::default() } } /// Find scenarios. pub fn scenarios(&self) -> Result, SubplotError> { let mut elements = vec![]; for e in Self::visit(&self.html) { if let Some(se) = Self::is_structure_element(e) { elements.push(se); } } let mut scenarios: Vec = vec![]; let mut i = 0; while i < elements.len() { let (maybe, new_i) = extract_scenario(&elements[i..])?; if let Some(scen) = maybe { scenarios.push(scen); } i += new_i; } trace!("Metadata::scenarios: found {} scenarios", scenarios.len()); Ok(scenarios) } fn is_structure_element(e: &Element) -> Option { match e.tag() { ElementTag::H1 => Some(StructureElement::heading(e, 1)), ElementTag::H2 => Some(StructureElement::heading(e, 2)), ElementTag::H3 => Some(StructureElement::heading(e, 3)), ElementTag::H4 => Some(StructureElement::heading(e, 4)), ElementTag::H5 => Some(StructureElement::heading(e, 5)), ElementTag::H6 => Some(StructureElement::heading(e, 6)), ElementTag::Pre => { if e.has_attr("class", "scenario") { Some(StructureElement::snippet(e)) } else { None } } _ => None, } } /// Find embedded files. pub fn embedded_files(&self) -> Result { let mut files = EmbeddedFiles::default(); for e in Self::visit(&self.html) { if let MaybeEmbeddedFile::IsFile(file) = embedded_file(e)? { files.push(file); } } Ok(files) } /// Find all code blocks which have identifiers and return them pub fn named_blocks(&self) -> impl Iterator { Self::visit(&self.html) .into_iter() .filter(|e| e.tag() == ElementTag::Pre && e.attr("id").is_some()) } } // A structure element in the document: a heading or a scenario snippet. #[derive(Debug)] enum StructureElement { // Headings consist of the text and the level of the heading. Heading(String, i64, Location), // Scenario snippets consist just of the unparsed text. Snippet(String, Location), } impl StructureElement { fn heading(e: &Element, level: i64) -> Self { Self::Heading(e.content(), level, e.location()) } fn snippet(e: &Element) -> Self { Self::Snippet(e.content(), e.location()) } } enum MaybeEmbeddedFile { IsFile(EmbeddedFile), NotFile, } fn embedded_file(e: &Element) -> Result { if e.tag() != ElementTag::Pre { return Ok(MaybeEmbeddedFile::NotFile); } if !e.has_attr("class", "file") { return Ok(MaybeEmbeddedFile::NotFile); } let id = e.attr("id"); if id.is_none() { return Ok(MaybeEmbeddedFile::NotFile); } let id = id.unwrap(); if id.value().is_none() { return Err(MdError::NoIdValue(e.location())); } let id = id.value().unwrap(); if id.is_empty() { return Err(MdError::NoIdValue(e.location())); } // The contents we get from the pulldown_cmark parser for a code // block will always end in a newline, unless the block is empty. // This is different from the parser we previously used, which // didn't end in a newline, if the contents is exactly one line. // The add-newline attribute was designed for the previous parser // behavior, and so its interpretations for new new parser is a // little less straightforward. To avoid convoluted logic, we // remove the newline if it's there before obeying add-newline. let mut contents = e.content(); if contents.ends_with('\n') { contents.truncate(contents.len() - 1); } let addnl = AddNewline::parse(e.attr("add-newline"), e.location()); match addnl? { AddNewline::No => { // Newline already isn't there. } AddNewline::Yes => { // Add newline. contents.push('\n'); } AddNewline::Auto => { // Add newline if not there. if !contents.ends_with('\n') { contents.push('\n'); } } }; Ok(MaybeEmbeddedFile::IsFile(EmbeddedFile::new( id.into(), contents, ))) } #[derive(Debug, Eq, PartialEq, Copy, Clone)] enum AddNewline { Auto, Yes, No, } impl AddNewline { fn parse(attr: Option<&Attribute>, loc: Location) -> Result { if let Some(attr) = attr { if let Some(value) = attr.value() { let value = match value { "yes" => Self::Yes, "no" => Self::No, "auto" => Self::Auto, _ => return Err(MdError::BadAddNewline(value.into(), loc)), }; return Ok(value); } }; Ok(Self::Auto) } } fn extract_scenario(e: &[StructureElement]) -> Result<(Option, usize), SubplotError> { if e.is_empty() { // If we get here, it's a programming error. panic!("didn't expect empty list of elements"); } match &e[0] { StructureElement::Snippet(_, loc) => Err(SubplotError::ScenarioBeforeHeading(loc.clone())), StructureElement::Heading(title, level, loc) => { let mut scen = Scenario::new(title, loc.clone()); for (i, item) in e.iter().enumerate().skip(1) { match item { StructureElement::Heading(_, level2, _loc) => { let is_subsection = *level2 > *level; if is_subsection { if scen.has_steps() { } else { return Ok((None, i)); } } else if scen.has_steps() { return Ok((Some(scen), i)); } else { return Ok((None, i)); } } StructureElement::Snippet(text, loc) => { let steps = parse_scenario_snippet(text, loc)?; for step in steps { scen.add(&step); } } } } if scen.has_steps() { Ok((Some(scen), e.len())) } else { Ok((None, e.len())) } } } } mod typeset { const UNWANTED_ATTRS: &[&str] = &["add-newline"]; use crate::{ html::{Attribute, Content, Element, ElementTag, Location}, Bindings, PartialStep, }; // use crate::parser::parse_scenario_snippet; // use crate::Bindings; // use crate::PartialStep; // use crate::ScenarioStep; // use crate::StepKind; use crate::SubplotError; use crate::{DiagramMarkup, DotMarkup, MatchedStep, PikchrMarkup, PlantumlMarkup, Svg}; // use crate::{Warning, Warnings}; use base64::prelude::{Engine as _, BASE64_STANDARD}; pub(crate) fn typeset_element( e: &Element, template: Option<&str>, bindings: &Bindings, ) -> Result { let new = match e.tag() { ElementTag::Pre if e.has_attr("class", "scenario") => { typeset_scenario(e, template, bindings) } ElementTag::Pre if e.has_attr("class", "file") => typeset_file(e), ElementTag::Pre if e.has_attr("class", "example") => typeset_example(e), ElementTag::Pre if e.has_attr("class", "dot") => typeset_dot(e), ElementTag::Pre if e.has_attr("class", "plantuml") => typeset_plantuml(e), ElementTag::Pre if e.has_attr("class", "roadmap") => typeset_roadmap(e), ElementTag::Pre if e.has_attr("class", "pikchr") => typeset_pikchr(e), _ => { let mut new = Element::new(e.tag()); for attr in e.all_attrs() { new.push_attribute(attr.clone()); } for child in e.children() { if let Content::Elt(ce) = child { new.push_child(Content::Elt(typeset_element(ce, template, bindings)?)); } else { new.push_child(child.clone()); } } Ok(new) } }; let mut new = new?; new.drop_attributes(UNWANTED_ATTRS); Ok(new) } fn typeset_scenario( e: &Element, template: Option<&str>, bindings: &Bindings, ) -> Result { let template = template.unwrap_or("python"); // FIXME let text = e.content(); let steps = crate::steps::parse_scenario_snippet(&text, &Location::Unknown)?; let mut scenario = Element::new(ElementTag::Div); scenario.push_attribute(Attribute::new("class", "scenario")); for step in steps { if let Ok(matched) = bindings.find(template, &step) { scenario.push_child(Content::Elt(typeset_step(&matched))); } else { scenario.push_child(Content::Text(step.text().into())); } } Ok(scenario) } fn typeset_step(matched: &MatchedStep) -> Element { let mut e = Element::new(ElementTag::Div); let mut keyword = Element::new(ElementTag::Span); keyword.push_attribute(Attribute::new("class", "keyword")); keyword.push_child(Content::Text(matched.kind().to_string())); keyword.push_child(Content::Text(" ".into())); e.push_child(Content::Elt(keyword)); for part in matched.parts() { match part { PartialStep::UncapturedText(snippet) => { let text = snippet.text(); if !text.trim().is_empty() { let mut estep = Element::new(ElementTag::Span); estep.push_attribute(Attribute::new("class", "uncaptured")); estep.push_child(Content::Text(text.into())); e.push_child(Content::Elt(estep)); } } PartialStep::CapturedText { name: _, text, kind, } => { if !text.trim().is_empty() { let mut estep = Element::new(ElementTag::Span); let class = format!("capture-{}", kind.as_str()); estep.push_attribute(Attribute::new("class", &class)); estep.push_child(Content::Text(text.into())); e.push_child(Content::Elt(estep)); } } } } e } fn typeset_file(e: &Element) -> Result { Ok(e.clone()) // FIXME } fn typeset_example(e: &Element) -> Result { Ok(e.clone()) // FIXME } fn typeset_dot(e: &Element) -> Result { let dot = e.content(); let svg = DotMarkup::new(&dot).as_svg()?; Ok(svg_to_element(svg, "Dot diagram")) } fn typeset_plantuml(e: &Element) -> Result { let markup = e.content(); let svg = PlantumlMarkup::new(&markup).as_svg()?; Ok(svg_to_element(svg, "UML diagram")) } fn typeset_pikchr(e: &Element) -> Result { let markup = e.content(); let svg = PikchrMarkup::new(&markup, None).as_svg()?; Ok(svg_to_element(svg, "Pikchr diagram")) } fn typeset_roadmap(e: &Element) -> Result { const WIDTH: usize = 50; let yaml = e.content(); let roadmap = roadmap::from_yaml(&yaml)?; let dot = roadmap.format_as_dot(WIDTH)?; let svg = DotMarkup::new(&dot).as_svg()?; Ok(svg_to_element(svg, "Road map")) } fn svg_to_element(svg: Svg, alt: &str) -> Element { let url = svg_as_data_url(svg); let img = html_img(&url, alt); html_p(vec![Content::Elt(img)]) } fn svg_as_data_url(svg: Svg) -> String { let svg = BASE64_STANDARD.encode(svg.data()); format!("data:image/svg+xml;base64,{svg}") } fn html_p(children: Vec) -> Element { let mut new = Element::new(ElementTag::P); for child in children { new.push_child(child); } new } fn html_img(src: &str, alt: &str) -> Element { let mut new = Element::new(ElementTag::Img); new.push_attribute(Attribute::new("src", src)); new.push_attribute(Attribute::new("alt", alt)); new } } /// Errors returned from the module. #[derive(Debug, thiserror::Error, Eq, PartialEq)] pub enum MdError { /// Tried to treat a non-PRE element as an embedded file. #[error("{1}: tried to treat wrong kind of element as an embedded file: {0}")] NotCodeBlockElement(String, Location), /// Code block lacks the "file" attribute. #[error("{0}; code block is not a file")] NotFile(Location), /// Code block lacks an identifier to use as the filename. #[error("{0}: code block lacks a filename identifier")] NoId(Location), /// Identifier is empty. #[error("{0}: code block has an empty filename identifier")] NoIdValue(Location), /// Value of add-newline attribute is not understood. #[error("{1}: value of add-newline attribute is not understood: {0}")] BadAddNewline(String, Location), } #[cfg(test)] mod test_extract { use super::extract_scenario; use super::Location; use super::StructureElement; use crate::Scenario; use crate::SubplotError; fn h(title: &str, level: i64) -> StructureElement { StructureElement::Heading(title.to_string(), level, Location::unknown()) } fn s(text: &str) -> StructureElement { StructureElement::Snippet(text.to_string(), Location::unknown()) } fn check_result( r: Result<(Option, usize), SubplotError>, title: Option<&str>, i: usize, ) { assert!(r.is_ok()); let (actual_scen, actual_i) = r.unwrap(); if title.is_none() { assert!(actual_scen.is_none()); } else { assert!(actual_scen.is_some()); let scen = actual_scen.unwrap(); assert_eq!(scen.title(), title.unwrap()); } assert_eq!(actual_i, i); } #[test] fn returns_nothing_if_there_is_no_scenario() { let elements: Vec = vec![h("title", 1)]; let r = extract_scenario(&elements); check_result(r, None, 1); } #[test] fn returns_scenario_if_there_is_one() { let elements = vec![h("title", 1), s("given something")]; let r = extract_scenario(&elements); check_result(r, Some("title"), 2); } #[test] fn skips_scenarioless_section_in_favour_of_same_level() { let elements = vec![h("first", 1), h("second", 1), s("given something")]; let r = extract_scenario(&elements); check_result(r, None, 1); let r = extract_scenario(&elements[1..]); check_result(r, Some("second"), 2); } #[test] fn returns_parent_section_with_scenario_snippet() { let elements = vec![ h("1", 1), s("given something"), h("1.1", 2), s("when something"), h("2", 1), ]; let r = extract_scenario(&elements); check_result(r, Some("1"), 4); let r = extract_scenario(&elements[4..]); check_result(r, None, 1); } #[test] fn skips_scenarioless_parent_heading() { let elements = vec![h("1", 1), h("1.1", 2), s("given something"), h("2", 1)]; let r = extract_scenario(&elements); check_result(r, None, 1); let r = extract_scenario(&elements[1..]); check_result(r, Some("1.1"), 2); let r = extract_scenario(&elements[3..]); check_result(r, None, 1); } #[test] fn skips_scenarioless_deeper_headings() { let elements = vec![h("1", 1), h("1.1", 2), h("2", 1), s("given something")]; let r = extract_scenario(&elements); check_result(r, None, 1); let r = extract_scenario(&elements[1..]); check_result(r, None, 1); let r = extract_scenario(&elements[2..]); check_result(r, Some("2"), 2); } #[test] fn returns_error_if_scenario_has_no_title() { let elements = vec![s("given something")]; let r = extract_scenario(&elements); match r { Err(SubplotError::ScenarioBeforeHeading(_)) => (), _ => panic!("unexpected result {:?}", r), } } } #[cfg(test)] mod test { use super::{AddNewline, Attribute, Location, Markdown, MdError}; use std::path::{Path, PathBuf}; #[test] fn loads_empty_doc() { let md = Markdown::new_from_str(Path::new(""), "").unwrap(); assert!(md.html.content().is_empty()); } #[test] fn finds_no_images_in_empty_doc() { let md = Markdown::new_from_str(Path::new(""), "").unwrap(); assert!(md.images().is_empty()); } #[test] fn finds_images() { let md = Markdown::new_from_str( Path::new(""), r#" ![alt text](filename.jpg) "#, ) .unwrap(); assert_eq!(md.images(), vec![PathBuf::from("filename.jpg")]); } #[test] fn finds_no_blocks_in_empty_doc() { let md = Markdown::new_from_str(Path::new(""), "").unwrap(); assert!(md.block_classes().is_empty()); } #[test] fn finds_no_classes_when_no_blocks_have_them() { let md = Markdown::new_from_str( Path::new(""), r#" ~~~ ~~~ "#, ) .unwrap(); assert!(md.block_classes().is_empty()); } #[test] fn finds_block_classes() { let md = Markdown::new_from_str( Path::new(""), r#" ~~~scenario ~~~ "#, ) .unwrap(); let classes: Vec = md.block_classes().iter().map(|s| s.into()).collect(); assert_eq!(classes, vec!["scenario"]); } #[test] fn finds_no_scenarios_in_empty_doc() { let md = Markdown::new_from_str(Path::new(""), "").unwrap(); let scenarios = md.scenarios().unwrap(); assert!(scenarios.is_empty()); } #[test] fn finds_scenarios() { let md = Markdown::new_from_str( Path::new(""), r#" # Super trooper ~~~scenario given ABBA ~~~ "#, ) .unwrap(); let scenarios = md.scenarios().unwrap(); assert_eq!(scenarios.len(), 1); let scen = scenarios.first().unwrap(); assert_eq!(scen.title(), "Super trooper"); let steps = scen.steps(); assert_eq!(steps.len(), 1); let step = steps.first().unwrap(); assert_eq!(step.kind(), crate::StepKind::Given); assert_eq!(step.text(), "ABBA"); } #[test] fn finds_no_embedded_files_in_empty_doc() { let md = Markdown::new_from_str(Path::new(""), "").unwrap(); let files = md.embedded_files(); assert!(files.unwrap().files().is_empty()); } #[test] fn finds_embedded_files() { let md = Markdown::new_from_str( Path::new(""), r#" ~~~{#fileid .file .text} hello, world ~~~ "#, ) .unwrap(); let files = md.embedded_files().unwrap(); assert_eq!(files.files().len(), 1); let file = files.files().first().unwrap(); assert_eq!(file.filename(), "fileid"); assert_eq!(file.contents(), "hello, world\n"); } #[test] fn parses_no_auto_newline_as_auto() { assert_eq!( AddNewline::parse(None, Location::unknown()).unwrap(), AddNewline::Auto ); } #[test] fn parses_auto_as_auto() { let attr = Attribute::new("add-newline", "auto"); assert_eq!( AddNewline::parse(Some(&attr), Location::unknown()).unwrap(), AddNewline::Auto ); } #[test] fn parses_yes_as_yes() { let attr = Attribute::new("add-newline", "yes"); assert_eq!( AddNewline::parse(Some(&attr), Location::unknown()).unwrap(), AddNewline::Yes ); } #[test] fn parses_no_as_no() { let attr = Attribute::new("add-newline", "no"); assert_eq!( AddNewline::parse(Some(&attr), Location::unknown()).unwrap(), AddNewline::No ); } #[test] fn parses_empty_as_error() { let attr = Attribute::new("add-newline", ""); assert_eq!( AddNewline::parse(Some(&attr), Location::unknown()), Err(MdError::BadAddNewline("".into(), Location::unknown())) ); } #[test] fn parses_garbage_as_error() { let attr = Attribute::new("add-newline", "garbage"); assert_eq!( AddNewline::parse(Some(&attr), Location::unknown()), Err(MdError::BadAddNewline( "garbage".into(), Location::unknown() )) ); } }