From d00d3013168f39c136bd877ff3d4328e29e3d991 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Sun, 22 Jan 2023 11:34:56 +0200 Subject: refactor: use new Markdown type instead of pandoc_ast visitors Sponsored-by: author --- src/doc.rs | 321 +++++++------------------------------------------------- src/embedded.rs | 10 +- 2 files changed, 39 insertions(+), 292 deletions(-) diff --git a/src/doc.rs b/src/doc.rs index cc6a616..7a7e64c 100644 --- a/src/doc.rs +++ b/src/doc.rs @@ -1,30 +1,27 @@ use crate::ast; +use crate::bindings::CaptureType; use crate::generate_test_program; use crate::get_basedir_from; -use crate::visitor; +use crate::md::Markdown; use crate::EmbeddedFile; use crate::EmbeddedFiles; -use crate::LintingVisitor; use crate::MatchedScenario; use crate::Metadata; use crate::PartialStep; use crate::Scenario; -use crate::ScenarioStep; use crate::Style; use crate::SubplotError; use crate::YamlMetadata; -use crate::{bindings::CaptureType, parser::parse_scenario_snippet}; use crate::{Warning, Warnings}; use std::collections::HashSet; +use std::convert::TryFrom; use std::default::Default; use std::fmt::Debug; use std::fs::read; use std::ops::Deref; use std::path::{Path, PathBuf}; -use pandoc_ast::{MutVisitor, Pandoc}; - use log::{error, trace}; /// The set of known (special) classes which subplot will always recognise @@ -44,19 +41,6 @@ static KNOWN_PANDOC_CLASSES: &[&str] = &["numberLines", "noNumberLines"]; /// A parsed Subplot document. /// -/// Pandoc works by parsing its various input files and constructing -/// an abstract syntax tree or AST. When Pandoc generates output, it -/// works based on the AST. This way, the input parsing and output -/// generation are cleanly separated. -/// -/// A Pandoc filter can modify the AST before output generation -/// starts working. This allows the filter to make changes to what -/// gets output, without having to understand the input documents at -/// all. -/// -/// This function is a Pandoc filter, to be use with -/// pandoc_ast::filter, for typesetting Subplot documents. -/// /// # Example /// /// fix this example; @@ -84,7 +68,7 @@ static KNOWN_PANDOC_CLASSES: &[&str] = &["numberLines", "noNumberLines"]; pub struct Document { subplot: PathBuf, markdowns: Vec, - ast: Pandoc, + md: Markdown, meta: Metadata, files: EmbeddedFiles, style: Style, @@ -95,20 +79,22 @@ impl Document { fn new( subplot: PathBuf, markdowns: Vec, - ast: Pandoc, + md: Markdown, meta: Metadata, files: EmbeddedFiles, style: Style, ) -> Document { - Document { + let doc = Document { subplot, markdowns, - ast, + md, meta, files, style, warnings: Warnings::default(), - } + }; + trace!("Document::new -> {:#?}", doc); + doc } /// Return all warnings about this document. @@ -121,23 +107,22 @@ impl Document { subplot: PathBuf, markdowns: Vec, yamlmeta: &ast::YamlMetadata, - mut ast: Pandoc, + mut md: Markdown, style: Style, template: Option<&str>, ) -> Result where P: AsRef + Debug, { - let meta = Metadata::new(basedir, yamlmeta, template)?; - let mut linter = LintingVisitor::default(); - trace!("Walking AST for linting..."); - linter.walk_pandoc(&mut ast); - if !linter.issues.is_empty() { + let meta = Metadata::from_yaml_metadata(basedir, yamlmeta, template)?; + trace!("metadata from YAML: {:#?}", meta); + let mut issues = md.lint(); + if !issues.is_empty() { // Currently we can't really return more than one error so return one - return Err(linter.issues.remove(0)); + return Err(issues.remove(0)); } - let files = EmbeddedFiles::new(&mut ast); - let doc = Document::new(subplot, markdowns, ast, meta, files, style); + let files = md.embedded_files(); + let doc = Document::new(subplot, markdowns, md, meta, files, style); trace!("Loaded from JSON OK"); Ok(doc) } @@ -160,42 +145,20 @@ impl Document { ); let meta = load_metadata_from_yaml_file(filename)?; + trace!("metadata from YAML file: {:#?}", meta); let mdfile = meta.markdown(); let mdfile = basedir.join(mdfile); - let markdowns = vec![mdfile.clone()]; + let mut md = Markdown::try_from(mdfile.as_path())?; + md.set_metadata(&meta); + let markdowns = vec![mdfile]; - let mut pandoc = pandoc::new(); - pandoc.add_input(&mdfile); - pandoc.set_input_format( - pandoc::InputFormat::Markdown, - vec![pandoc::MarkdownExtension::Citations], - ); - pandoc.set_output_format(pandoc::OutputFormat::Json, vec![]); - pandoc.set_output(pandoc::OutputKind::Pipe); - - // Add external Pandoc filters. - crate::policy::add_citeproc(&mut pandoc); - - trace!( - "Invoking Pandoc to parse document {:?} into AST as JSON", - mdfile, - ); - let json = match pandoc.execute().map_err(SubplotError::Pandoc)? { - pandoc::PandocOutput::ToBuffer(o) => o, - _ => return Err(SubplotError::NotJson), - }; - trace!("Pandoc was happy"); - - trace!("Parsing document AST as JSON..."); - let mut ast: Pandoc = serde_json::from_str(&json).map_err(SubplotError::AstJson)?; - ast.meta = meta.to_map(); let doc = Self::from_ast( basedir, filename.into(), markdowns, &meta, - ast, + md, style, template, )?; @@ -204,44 +167,12 @@ impl Document { Ok(doc) } - /// Construct a Document from a named file, using the pullmark_cmark crate. - /// - /// The file can be in the CommonMark format, with some - /// extensions. This uses the pulldown-cmark crate to parse the - /// file into an AST. - pub fn from_file_with_pullmark( - basedir: &Path, - filename: &Path, - style: Style, - template: Option<&str>, - ) -> Result { - trace!("Parsing document with pullmark-cmark from {:?}", filename); - let meta = load_metadata_from_yaml_file(filename)?; - let mdfile = meta.markdown(); - let mdfile = basedir.join(mdfile); - let markdown = std::fs::read_to_string(&mdfile) - .map_err(|err| SubplotError::ReadFile(mdfile.clone(), err))?; - let ast = ast::AbstractSyntaxTree::new(meta.clone(), &markdown); - - trace!("Parsed document OK"); - Self::from_ast( - basedir, - filename.into(), - vec![mdfile], - &meta, - ast.to_pandoc(), - style, - template, - ) - } - /// Return the AST of a Document, serialized as JSON. /// /// This is useful in a Pandoc filter, so that the filter can give /// it back to Pandoc for typesetting. - pub fn ast(&self) -> Result { - let json = serde_json::to_string(&self.ast).map_err(SubplotError::AstJson)?; - Ok(json) + pub fn ast(&mut self) -> Result { + self.md.to_json() } /// Return the document's metadata. @@ -284,11 +215,8 @@ impl Document { names.push(x.to_path_buf()); } - let mut visitor = visitor::ImageVisitor::new(); - visitor.walk_pandoc(&mut self.ast); - for x in visitor.images().iter() { - names.push(x.to_path_buf()); - } + let mut images = self.md.images(); + names.append(&mut images); names } @@ -299,7 +227,7 @@ impl Document { } /// Check the document for common problems. - pub fn lint(&self) -> Result<(), SubplotError> { + pub fn lint(&mut self) -> Result<(), SubplotError> { trace!("Linting document"); self.check_doc_has_title()?; self.check_filenames_are_unique()?; @@ -330,12 +258,9 @@ impl Document { } /// Check that all the block classes in the document are known - fn check_block_classes(&self) -> Result<(), SubplotError> { - let mut visitor = visitor::BlockClassVisitor::default(); - // Irritatingly we can't immutably visit the AST for some reason - // This clone() is expensive and unwanted, but I'm not sure how - // to get around it for now - visitor.walk_pandoc(&mut self.ast.clone()); + fn check_block_classes(&mut self) -> Result<(), SubplotError> { + let classes_in_doc = self.md.block_classes(); + // Build the set of known good classes let mut known_classes: HashSet = HashSet::new(); for class in std::iter::empty() @@ -347,11 +272,7 @@ impl Document { known_classes.insert(class.to_string()); } // Acquire the set of used names which are not known - let unknown_classes: Vec<_> = visitor - .classes - .difference(&known_classes) - .cloned() - .collect(); + let unknown_classes: Vec<_> = classes_in_doc.difference(&known_classes).cloned().collect(); // If the unknown classes list is not empty, we had a problem and // we will report it to the user. if !unknown_classes.is_empty() { @@ -457,28 +378,15 @@ impl Document { /// Typeset a Subplot document. pub fn typeset(&mut self) { - let mut visitor = - visitor::TypesettingVisitor::new(self.style.clone(), self.meta.bindings()); - visitor.walk_pandoc(&mut self.ast); - self.warnings.push_all(visitor.warnings()); + let warnings = self.md.typeset(self.style.clone(), self.meta.bindings()); + for w in warnings { + self.warnings.push(w); + } } /// Return all scenarios in a document. pub fn scenarios(&mut self) -> Result, SubplotError> { - let mut visitor = visitor::StructureVisitor::new(); - visitor.walk_pandoc(&mut self.ast); - - let mut scenarios: Vec = vec![]; - - let mut i = 0; - while i < visitor.elements.len() { - let (maybe, new_i) = extract_scenario(&visitor.elements[i..])?; - if let Some(scen) = maybe { - scenarios.push(scen); - } - i += new_i; - } - Ok(scenarios) + self.md.scenarios() } /// Return matched scenarios in a document. @@ -564,7 +472,7 @@ where style ); crate::resource::add_search_path(filename.parent().unwrap()); - let doc = Document::from_file_with_pullmark(&base_path, filename, style, template)?; + let doc = Document::from_file(&base_path, filename, style, template)?; trace!("Loaded doc from file OK"); Ok(doc) } @@ -617,156 +525,3 @@ impl CodegenOutput { Self { template, doc } } } - -fn extract_scenario(e: &[visitor::Element]) -> Result<(Option, usize), SubplotError> { - if e.is_empty() { - // If we get here, it's a programming error. - panic!("didn't expect empty list of elements"); - } - - match &e[0] { - visitor::Element::Snippet(_) => Err(SubplotError::ScenarioBeforeHeading), - visitor::Element::Heading(title, level) => { - let mut scen = Scenario::new(title); - let mut prevkind = None; - for (i, item) in e.iter().enumerate().skip(1) { - match item { - visitor::Element::Heading(_, level2) => { - let is_subsection = *level2 > *level; - if is_subsection { - if scen.has_steps() { - } else { - return Ok((None, i)); - } - } else if scen.has_steps() { - return Ok((Some(scen), i)); - } else { - return Ok((None, i)); - } - } - visitor::Element::Snippet(text) => { - for line in parse_scenario_snippet(text) { - let step = ScenarioStep::new_from_str(line, prevkind)?; - scen.add(&step); - prevkind = Some(step.kind()); - } - } - } - } - if scen.has_steps() { - Ok((Some(scen), e.len())) - } else { - Ok((None, e.len())) - } - } - } -} - -#[cfg(test)] -mod test_extract { - use super::extract_scenario; - use super::visitor::Element; - use crate::Scenario; - use crate::SubplotError; - - fn h(title: &str, level: i64) -> Element { - Element::Heading(title.to_string(), level) - } - - fn s(text: &str) -> Element { - Element::Snippet(text.to_string()) - } - - fn check_result( - r: Result<(Option, usize), SubplotError>, - title: Option<&str>, - i: usize, - ) { - assert!(r.is_ok()); - let (actual_scen, actual_i) = r.unwrap(); - if title.is_none() { - assert!(actual_scen.is_none()); - } else { - assert!(actual_scen.is_some()); - let scen = actual_scen.unwrap(); - assert_eq!(scen.title(), title.unwrap()); - } - assert_eq!(actual_i, i); - } - - #[test] - fn returns_nothing_if_there_is_no_scenario() { - let elements: Vec = vec![h("title", 1)]; - let r = extract_scenario(&elements); - check_result(r, None, 1); - } - - #[test] - fn returns_scenario_if_there_is_one() { - let elements = vec![h("title", 1), s("given something")]; - let r = extract_scenario(&elements); - check_result(r, Some("title"), 2); - } - - #[test] - fn skips_scenarioless_section_in_favour_of_same_level() { - let elements = vec![h("first", 1), h("second", 1), s("given something")]; - let r = extract_scenario(&elements); - check_result(r, None, 1); - let r = extract_scenario(&elements[1..]); - check_result(r, Some("second"), 2); - } - - #[test] - fn returns_parent_section_with_scenario_snippet() { - let elements = vec![ - h("1", 1), - s("given something"), - h("1.1", 2), - s("when something"), - h("2", 1), - ]; - let r = extract_scenario(&elements); - check_result(r, Some("1"), 4); - let r = extract_scenario(&elements[4..]); - check_result(r, None, 1); - } - - #[test] - fn skips_scenarioless_parent_heading() { - let elements = vec![h("1", 1), h("1.1", 2), s("given something"), h("2", 1)]; - - let r = extract_scenario(&elements); - check_result(r, None, 1); - - let r = extract_scenario(&elements[1..]); - check_result(r, Some("1.1"), 2); - - let r = extract_scenario(&elements[3..]); - check_result(r, None, 1); - } - - #[test] - fn skips_scenarioless_deeper_headings() { - let elements = vec![h("1", 1), h("1.1", 2), h("2", 1), s("given something")]; - - let r = extract_scenario(&elements); - check_result(r, None, 1); - - let r = extract_scenario(&elements[1..]); - check_result(r, None, 1); - - let r = extract_scenario(&elements[2..]); - check_result(r, Some("2"), 2); - } - - #[test] - fn returns_error_if_scenario_has_no_title() { - let elements = vec![s("given something")]; - let r = extract_scenario(&elements); - match r { - Err(SubplotError::ScenarioBeforeHeading) => (), - _ => panic!("unexpected result {:?}", r), - } - } -} diff --git a/src/embedded.rs b/src/embedded.rs index c868054..e71fa54 100644 --- a/src/embedded.rs +++ b/src/embedded.rs @@ -1,4 +1,3 @@ -use pandoc_ast::{MutVisitor, Pandoc}; use serde::{Deserialize, Serialize}; /// A data file embedded in the document. @@ -26,19 +25,12 @@ impl EmbeddedFile { } /// A collection of data files embedded in document. -#[derive(Debug, Eq, PartialEq, Clone, Serialize, Deserialize)] +#[derive(Debug, Default, Eq, PartialEq, Clone, Serialize, Deserialize)] pub struct EmbeddedFiles { files: Vec, } impl EmbeddedFiles { - /// Create new set of data files. - pub fn new(ast: &mut Pandoc) -> EmbeddedFiles { - let mut files = EmbeddedFiles { files: vec![] }; - files.walk_pandoc(ast); - files - } - /// Return slice of all data files. pub fn files(&self) -> &[EmbeddedFile] { &self.files -- cgit v1.2.1