diff options
author | Daniel Silverstone <dsilvers+gitlab@digital-scurf.org> | 2023-04-10 09:35:39 +0000 |
---|---|---|
committer | Daniel Silverstone <dsilvers+gitlab@digital-scurf.org> | 2023-04-10 09:35:39 +0000 |
commit | 4d43a0b7eb0bbe46347d08f7d7c50a619f702061 (patch) | |
tree | 1f3cf140e4a1789cdb3f660f275514ec1d642f3f | |
parent | 103e14b2da6aa0d2f5597beff128e630ab2fb292 (diff) | |
parent | b526ed863de0d1138315cc8b41e456b9925ef3e3 (diff) | |
download | subplot-4d43a0b7eb0bbe46347d08f7d7c50a619f702061.tar.gz |
Merge branch 'liw/drop-pandoc' into 'main'
Drop use of Pandoc, drop support for PDF output
See merge request subplot/subplot!319
-rw-r--r-- | Cargo.lock | 72 | ||||
-rw-r--r-- | Cargo.toml | 10 | ||||
-rw-r--r-- | README.md | 4 | ||||
-rwxr-xr-x | check | 13 | ||||
-rw-r--r-- | debian/control | 16 | ||||
-rw-r--r-- | flake.nix | 2 | ||||
-rw-r--r-- | src/bin/subplot.rs | 24 | ||||
-rw-r--r-- | src/diagrams.rs | 2 | ||||
-rw-r--r-- | src/doc.rs | 45 | ||||
-rw-r--r-- | src/error.rs | 32 | ||||
-rw-r--r-- | src/html.rs | 724 | ||||
-rw-r--r-- | src/lib.rs | 8 | ||||
-rw-r--r-- | src/md.rs | 550 | ||||
-rw-r--r-- | src/md/panhelper.rs | 26 | ||||
-rw-r--r-- | src/md/typeset.rs | 229 | ||||
-rw-r--r-- | src/md/visitor/block_class.rs | 25 | ||||
-rw-r--r-- | src/md/visitor/embedded.rs | 35 | ||||
-rw-r--r-- | src/md/visitor/image.rs | 25 | ||||
-rw-r--r-- | src/md/visitor/linting.rs | 40 | ||||
-rw-r--r-- | src/md/visitor/mod.rs | 17 | ||||
-rw-r--r-- | src/md/visitor/structure.rs | 100 | ||||
-rw-r--r-- | src/md/visitor/typesetting.rs | 85 | ||||
-rw-r--r-- | src/metadata.rs | 21 | ||||
-rw-r--r-- | src/policy.rs | 23 | ||||
-rw-r--r-- | subplot.md | 155 |
25 files changed, 1247 insertions, 1036 deletions
@@ -242,12 +242,6 @@ dependencies = [ ] [[package]] -name = "either" -version = "1.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" - -[[package]] name = "env_logger" version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -486,6 +480,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286" [[package]] +name = "html-escape" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d1ad449764d627e22bfd7cd5e8868264fc9236e07c752972b4080cd351cb476" +dependencies = [ + "utf8-width", +] + +[[package]] name = "humansize" version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -557,15 +560,6 @@ dependencies = [ ] [[package]] -name = "itertools" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f56a2d0bc861f9165be4eb3442afd3c236d8a98afd426f65d92324ae1091a484" -dependencies = [ - "either", -] - -[[package]] name = "itoa" version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -584,6 +578,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79" [[package]] +name = "line-col" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e69cdf6b85b5c8dce514f694089a2cf8b1a702f6cd28607bcb3cf296c9778db" + +[[package]] name = "linked-hash-map" version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -699,37 +699,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" [[package]] -name = "pandoc" -version = "0.8.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2eb8469d27ed9fd7925629076a3675fea964c3f44c49662bdf549a8b7ddf0820" -dependencies = [ - "itertools", -] - -[[package]] -name = "pandoc_ast" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b960d9b78f94feb2a43ace4dda1d2b924a0d5a0639f399620fb54fe2943a9e7" -dependencies = [ - "serde", - "serde_derive", - "serde_json", -] - -[[package]] -name = "pandoc_ast" -version = "0.8.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b89d3de9c9dd508abc39cd5c0ab022b49ef5b18145ce30abacf00333bba1a74" -dependencies = [ - "serde", - "serde_derive", - "serde_json", -] - -[[package]] name = "percent-encoding" version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -1139,11 +1108,10 @@ dependencies = [ "env_logger", "file_diff", "git-testament", + "html-escape", "lazy_static", + "line-col", "log", - "pandoc", - "pandoc_ast 0.7.3", - "pandoc_ast 0.8.4", "pikchr", "pulldown-cmark", "regex", @@ -1501,6 +1469,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" [[package]] +name = "utf8-width" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5190c9442dcdaf0ddd50f37420417d219ae5261bbf5db120d0f9bab996c9cba1" + +[[package]] name = "valuable" version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -17,11 +17,6 @@ default-run = "subplot" [workspace] members = ["subplotlib", "subplotlib-derive", "subplot-build", "examples/seq"] -[features] -default = ["ast_07"] -ast_07 = ["pandoc_ast_07"] -ast_08 = ["pandoc_ast_08"] - [dependencies] anyhow = "1" base64 = "0.13.0" @@ -30,9 +25,6 @@ file_diff = "1" git-testament = "0.2" lazy_static = "1" log = "0.4.16" -pandoc = "0.8.0" -pandoc_ast_07 = { package = "pandoc_ast", version = "0.7", optional = true } -pandoc_ast_08 = { package = "pandoc_ast", version = "0.8", optional = true } pikchr = "0.1" pulldown-cmark = "0.9.0" regex = "1" @@ -46,6 +38,8 @@ tempfile-fast = "0.3.1" thiserror = "1" time = { version = "0.3", features = ["formatting", "macros"] } env_logger = "0.9.0" +html-escape = "0.2.13" +line-col = "0.2.1" [dependencies.tera] version = "1" @@ -49,9 +49,7 @@ Debian or a derivative of it: ~~~sh $ sudo apt-get install build-essential git debhelper dh-cargo python3 \ - pandoc texlive-latex-base texlive-latex-recommended \ - texlive-fonts-recommended librsvg2-bin graphviz pandoc-citeproc \ - plantuml daemonize lmodern procps + librsvg2-bin graphviz plantuml daemonize procps ~~~ Additionally, any packages reported by running the following command: @@ -94,11 +94,6 @@ class Runcmd: p = self.runcmd_unchecked(["which", name], stdout=DEVNULL) return p.returncode == 0 - def pandoc_is_newer(self): - """Is pandoc new enough for --citeproc""" - p = self.runcmd(["pandoc", "--help"], stdout=PIPE) - return "--citeproc" in p.stdout.decode("UTF-8") - def cargo(self, args, **kwargs): """Run cargo with arguments.""" return self.runcmd(["cargo"] + args, **kwargs) @@ -341,20 +336,12 @@ def check_tooling(r): "bash", "cargo", "dot", - "pandoc", - "pandoc-citeproc", "plantuml", "rustc", "rustfmt", ] for command in commands: if not r.got_command(command): - if command == "pandoc-citeproc": - if r.pandoc_is_newer(): - r.msg( - " Fortunately pandoc is new enough for --citeproc, no need for pandoc-citeproc" - ) - continue sys.exit(f"can't find {command}, which is needed for test suite") if not r.got_command("daemonize") and not r.got_command("/usr/sbin/daemonize"): diff --git a/debian/control b/debian/control index c44f536..3c5fe2d 100644 --- a/debian/control +++ b/debian/control @@ -3,21 +3,15 @@ Maintainer: Lars Wirzenius <liw@liw.fi> Section: utils Priority: optional Standards-Version: 4.2.0 -Build-Depends: debhelper (>= 10~), dh-cargo, python3, python3-requests, pandoc, texlive-latex-base, - texlive-latex-recommended, texlive-fonts-recommended, texlive-plain-generic, librsvg2-bin, graphviz, - pandoc-citeproc, plantuml, daemonize, lmodern, procps +Build-Depends: debhelper (>= 10~), dh-cargo, python3, python3-requests, + librsvg2-bin, graphviz, plantuml, daemonize, procps Homepage: https://subplot.liw.fi Package: subplot Architecture: any -Depends: ${misc:Depends}, ${shlibs:Depends}, pandoc, pandoc-citeproc, lmodern -Recommends: librsvg2-bin, - graphviz -Suggests: texlive-latex-base, - texlive-latex-recommended, - texlive-fonts-recommended, - texlive-plain-generic, - plantuml +Depends: ${misc:Depends}, ${shlibs:Depends} +Recommends: librsvg2-bin, graphviz +Suggests: plantuml Built-Using: ${cargo:Built-Using} Description: automatic tool for acceptance testing Capture and communicate acceptance criteria for software and systems, @@ -23,8 +23,6 @@ stdenv graphviz plantuml - pandoc - texlive.combined.scheme-medium daemonize librsvg (python3.withPackages test-python-packages) diff --git a/src/bin/subplot.rs b/src/bin/subplot.rs index 7998416..41d8894 100644 --- a/src/bin/subplot.rs +++ b/src/bin/subplot.rs @@ -221,7 +221,7 @@ impl Metadata { #[derive(Debug, Parser)] /// Typeset subplot document /// -/// Process a subplot document and typeset it using Pandoc. +/// Process a subplot document and typeset it. struct Docgen { /// Allow warnings in document? #[clap(long)] @@ -254,7 +254,6 @@ impl Docgen { let style = Style::default(); let mut doc = load_linted_doc(&self.input, style, self.template.as_deref(), self.merciful)?; - let mut pandoc = pandoc::new(); // Metadata date from command line or file mtime. However, we // can't set it directly, since we don't want to override the date // in the actual document, if given, so we only set @@ -268,28 +267,11 @@ impl Docgen { let filename = doc.meta().basedir().join(doc.meta().markdown_filename()); Self::mtime_formatted(Self::mtime(&filename)?) }; - pandoc.add_option(pandoc::PandocOption::Meta("date".to_string(), Some(date))); - pandoc.add_option(pandoc::PandocOption::TableOfContents); - pandoc.add_option(pandoc::PandocOption::Standalone); - pandoc.add_option(pandoc::PandocOption::NumberSections); if Self::need_output(&mut doc, self.template.as_deref(), &self.output) { doc.typeset(&mut Warnings::default()); - pandoc.set_input_format(pandoc::InputFormat::Json, vec![]); - pandoc.set_input(pandoc::InputKind::Pipe(doc.ast()?)); - pandoc.set_output(pandoc::OutputKind::File(self.output.clone())); - - debug!("Executing pandoc to produce {}", self.output.display()); - let r = pandoc.execute(); - if let Err(pandoc::PandocError::Err(output)) = r { - let code = output.status.code().unwrap_or(127); - let stderr = String::from_utf8_lossy(&output.stderr); - error!("Failed to execute Pandoc: exit code {}", code); - error!("{}", stderr.strip_suffix('\n').unwrap()); - - return Err(anyhow::Error::msg("Pandoc failed")); - } - r?; + std::fs::write(&self.output, doc.to_html(&date)) + .map_err(|e| SubplotError::WriteFile(self.output.clone(), e))?; } Ok(()) diff --git a/src/diagrams.rs b/src/diagrams.rs index a62553f..5d91c2e 100644 --- a/src/diagrams.rs +++ b/src/diagrams.rs @@ -217,8 +217,6 @@ impl PlantumlMarkup { } env::join_paths(Some(java_bin).iter().chain(cur_path.iter())).ok() } - - // Acquire path to JAR for pandoc } impl DiagramMarkup for PlantumlMarkup { @@ -1,6 +1,8 @@ use crate::bindings::CaptureType; use crate::generate_test_program; use crate::get_basedir_from; +use crate::html::Element; +use crate::html::HtmlPage; use crate::md::Markdown; use crate::EmbeddedFile; use crate::EmbeddedFiles; @@ -31,10 +33,10 @@ static SPECIAL_CLASSES: &[&str] = &[ /// as being valid. static KNOWN_FILE_CLASSES: &[&str] = &["rust", "yaml", "python", "sh", "shell", "markdown", "bash"]; -/// The set of known (special-to-pandoc) classes which subplot will always recognise -/// as being valid. We include the subplot-specific noNumberLines class which we use -/// to invert the default numberLines on .file blocks. -static KNOWN_PANDOC_CLASSES: &[&str] = &["numberLines", "noNumberLines"]; +/// The set of known classes which subplot will always recognise as +/// being valid. We include the subplot-specific noNumberLines class +/// which we use to invert the default numberLines on .file blocks. +static KNOWN_BLOCK_CLASSES: &[&str] = &["numberLines", "noNumberLines"]; /// A parsed Subplot document. /// @@ -102,11 +104,6 @@ impl Document { { let meta = Metadata::from_yaml_metadata(basedir, yamlmeta, template)?; trace!("metadata from YAML: {:#?}", meta); - let mut issues = md.lint(); - if !issues.is_empty() { - // Currently we can't really return more than one error so return one - return Err(issues.remove(0)); - } let files = md.embedded_files(); let doc = Document::new(subplot, md, meta, files, style); trace!("Loaded from JSON OK"); @@ -114,10 +111,6 @@ impl Document { } /// Construct a Document from a named file. - /// - /// The file can be in any format Pandoc understands. This runs - /// Pandoc to parse the file into an AST, so it can be a little - /// slow. pub fn from_file( basedir: &Path, filename: &Path, @@ -144,12 +137,17 @@ impl Document { Ok(doc) } - /// Return the AST of a Document, serialized as JSON. - /// - /// This is useful in a Pandoc filter, so that the filter can give - /// it back to Pandoc for typesetting. - pub fn ast(&self) -> Result<String, SubplotError> { - self.md.to_json() + /// Return Document as an HTML page serialized into HTML text + pub fn to_html(&mut self, date: &str) -> String { + let mut head = Element::new(crate::html::ElementTag::Head); + let mut title = Element::new(crate::html::ElementTag::Title); + title.push_child(crate::html::Content::Text(self.meta().title().into())); + head.push_child(crate::html::Content::Elt(title)); + + self.md.set_date(date.into()); + + let page = HtmlPage::new(head, self.md.to_html()); + page.serialize().unwrap() // FIXME } /// Return the document's metadata. @@ -157,6 +155,11 @@ impl Document { &self.meta } + /// Set document date. + pub fn set_date(&mut self, date: String) { + self.meta.set_date(date); + } + /// Return all source filenames for the document. /// /// The sources are any files that affect the output so that if @@ -241,7 +244,7 @@ impl Document { for class in std::iter::empty() .chain(SPECIAL_CLASSES.iter().map(Deref::deref)) .chain(KNOWN_FILE_CLASSES.iter().map(Deref::deref)) - .chain(KNOWN_PANDOC_CLASSES.iter().map(Deref::deref)) + .chain(KNOWN_BLOCK_CLASSES.iter().map(Deref::deref)) .chain(self.meta().classes()) { known_classes.insert(class.to_string()); @@ -404,8 +407,6 @@ fn load_metadata_from_yaml_file(filename: &Path) -> Result<YamlMetadata, Subplot } /// Load a `Document` from a file. -/// -/// This version uses Pandoc to parse the Markdown. pub fn load_document<P>( filename: P, style: Style, diff --git a/src/error.rs b/src/error.rs index 2469a5b..a322bd6 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,4 +1,6 @@ +use crate::html::HtmlError; use crate::matches::MatchedSteps; +use crate::md::MdError; use std::path::PathBuf; use std::process::Output; @@ -138,18 +140,6 @@ pub enum SubplotError { #[error("document lacks specified template support")] TemplateSupportNotPresent, - /// Pandoc AST is not JSON - /// - /// Subplot acts as a Pandoc filter, and as part of that Pandoc - /// constructs an _abstract syntax tree_ from the input document, - /// and feeds it to the filter as JSON. However, when Subplot was - /// parsing the AST, it wasn't JSON. - /// - /// This probably means there's something wrong with Pandoc, it's - /// Rust bindings, or Subplot. - #[error("Pandoc produce AST not in JSON")] - NotJson, - /// First scenario is before first heading /// /// Subplot scenarios are group by the input document's structure. @@ -283,9 +273,9 @@ pub enum SubplotError { #[error("Error when writing to {0}")] WriteFile(PathBuf, #[source] std::io::Error), - /// Error executing Pandoc. - #[error("Pandoc failed")] - Pandoc(#[source] pandoc::PandocError), + /// Error parsing markdown into HTML. + #[error(transparent)] + ParseMarkdown(#[from] HtmlError), /// Regular expression error /// @@ -294,10 +284,6 @@ pub enum SubplotError { #[error("Failed to compile regular expression: {0:?}")] Regex(String, #[source] regex::Error), - /// Error parsing the Pandoc abstract syntax tree as JSON. - #[error("Failed to parse document AST as JSON")] - AstJson(#[source] serde_json::Error), - /// Error parsing YAML metadata for document. #[error("Failed to parse YAML metadata")] Metadata(#[source] serde_yaml::Error), @@ -318,6 +304,10 @@ pub enum SubplotError { #[error(transparent)] Utf8Error(#[from] std::str::Utf8Error), + /// Markdown errors. + #[error(transparent)] + MdError(#[from] MdError), + /// String formatting failed. #[error("Failed in string formattiing: {0}")] StringFormat(std::fmt::Error), @@ -329,6 +319,10 @@ pub enum SubplotError { /// Input file mtime lookup. #[error("Failed to get modification time of {0}")] InputFileMtime(PathBuf, #[source] std::io::Error), + + /// Error typesetting a roadmap diagram. + #[error(transparent)] + Roadmap(#[from] roadmap::RoadmapError), } impl SubplotError { diff --git a/src/html.rs b/src/html.rs new file mode 100644 index 0000000..bff9c75 --- /dev/null +++ b/src/html.rs @@ -0,0 +1,724 @@ +//! A representation of HTML using Rust types. + +#![deny(missing_docs)] + +use html_escape::{encode_double_quoted_attribute, encode_text}; +use line_col::LineColLookup; +use log::{debug, trace}; +use pulldown_cmark::{CodeBlockKind, Event, HeadingLevel, Options, Parser, Tag}; +use std::fmt::Write as _; +use std::io::Write; +use std::path::{Path, PathBuf}; + +/// A HTML page, consisting of a head and a body. +#[derive(Debug)] +pub struct HtmlPage { + head: Element, + body: Element, +} + +impl Default for HtmlPage { + fn default() -> Self { + Self { + head: Element::new(ElementTag::Head), + body: Element::new(ElementTag::Body), + } + } +} + +impl HtmlPage { + /// Create a new HTML page from a head and a body element. + pub fn new(head: Element, body: Element) -> Self { + Self { head, body } + } + + /// Return the page's head element. + pub fn head(&self) -> &Element { + &self.head + } + + /// Return the page's body element. + pub fn body(&self) -> &Element { + &self.body + } + + /// Try to serialize an HTML page into HTML text. + pub fn serialize(&self) -> Result<String, HtmlError> { + let mut html = Element::new(ElementTag::Html); + html.push_child(Content::Elt(self.head.clone())); + html.push_child(Content::Elt(self.body.clone())); + html.serialize() + } + + /// Try to write an HTML page as text into a file. + pub fn write(&self, filename: &Path) -> Result<(), HtmlError> { + if let Some(parent) = filename.parent() { + trace!("parent: {}", parent.display()); + if !parent.exists() { + debug!("creating directory {}", parent.display()); + std::fs::create_dir_all(parent) + .map_err(|e| HtmlError::CreateDir(parent.into(), e))?; + } + } + + trace!("writing HTML: {}", filename.display()); + let mut f = std::fs::File::create(filename) + .map_err(|e| HtmlError::CreateFile(filename.into(), e))?; + let html = self.serialize()?; + f.write_all(html.as_bytes()) + .map_err(|e| HtmlError::FileWrite(filename.into(), e))?; + Ok(()) + } +} + +/// Parse Markdown text into an HTML element. +pub fn parse(markdown: &str) -> Result<Element, HtmlError> { + let mut options = Options::empty(); + options.insert(Options::ENABLE_HEADING_ATTRIBUTES); + options.insert(Options::ENABLE_STRIKETHROUGH); + options.insert(Options::ENABLE_TABLES); + options.insert(Options::ENABLE_TASKLISTS); + let p = Parser::new_ext(markdown, options).into_offset_iter(); + let linecol = LineColLookup::new(markdown); + let mut stack = Stack::new(); + stack.push(Element::new(ElementTag::Body)); + for (event, loc) in p { + trace!("event {:?}", event); + let (line, col) = linecol.get(loc.start); + let loc = Location::new(line, col); + match event { + Event::Start(tag) => match tag { + Tag::Paragraph => stack.push_tag(ElementTag::P, loc), + Tag::Heading(level, id, classes) => { + let tag = match level { + HeadingLevel::H1 => ElementTag::H1, + HeadingLevel::H2 => ElementTag::H2, + HeadingLevel::H3 => ElementTag::H3, + HeadingLevel::H4 => ElementTag::H4, + HeadingLevel::H5 => ElementTag::H5, + HeadingLevel::H6 => ElementTag::H6, + }; + let mut h = Element::new(tag); + if let Some(id) = id { + h.push_attribute(Attribute::new("id", id)); + } + if !classes.is_empty() { + let mut names = String::new(); + for c in classes { + if !names.is_empty() { + names.push(' '); + } + names.push_str(c); + } + h.push_attribute(Attribute::new("class", &names)); + } + stack.push(h); + } + Tag::BlockQuote => stack.push_tag(ElementTag::Blockquote, loc), + Tag::CodeBlock(kind) => { + stack.push_tag(ElementTag::Pre, loc); + if let CodeBlockKind::Fenced(attrs) = kind { + let mut e = stack.pop(); + e.set_block_attributes(BlockAttr::parse(&attrs)); + stack.push(e); + } + } + Tag::List(None) => stack.push_tag(ElementTag::Ul, loc), + Tag::List(Some(start)) => { + let mut e = Element::new(ElementTag::Ol).with_location(loc); + e.push_attribute(Attribute::new("start", &format!("{}", start))); + stack.push(e); + } + Tag::Item => stack.push_tag(ElementTag::Li, loc), + Tag::FootnoteDefinition(_) => unreachable!("{:?}", tag), + Tag::Table(_) => stack.push_tag(ElementTag::Table, loc), + Tag::TableHead => stack.push_tag(ElementTag::Th, loc), + Tag::TableRow => stack.push_tag(ElementTag::Tr, loc), + Tag::TableCell => stack.push_tag(ElementTag::Td, loc), + Tag::Emphasis => stack.push_tag(ElementTag::Em, loc), + Tag::Strong => stack.push_tag(ElementTag::Strong, loc), + Tag::Strikethrough => stack.push_tag(ElementTag::Del, loc), + Tag::Link(_, url, title) => { + let mut link = Element::new(ElementTag::A); + link.push_attribute(Attribute::new("href", url.as_ref())); + if !title.is_empty() { + link.push_attribute(Attribute::new("title", title.as_ref())); + } + stack.push(link); + } + Tag::Image(_, url, title) => { + let mut e = Element::new(ElementTag::Img); + e.push_attribute(Attribute::new("src", url.as_ref())); + if !title.is_empty() { + e.push_attribute(Attribute::new("title", title.as_ref())); + } + stack.push(e); + } + }, + Event::End(tag) => match &tag { + Tag::Paragraph => { + trace!("at end of paragraph, looking for definition list use"); + let e = stack.pop(); + let s = as_plain_text(e.children()); + trace!("paragraph text: {:?}", s); + if s.starts_with(": ") || s.contains("\n: ") { + return Err(HtmlError::DefinitionList(loc.line, loc.col)); + } + stack.append_child(Content::Elt(e)); + } + Tag::Heading(_, _, _) + | Tag::List(_) + | Tag::Item + | Tag::Link(_, _, _) + | Tag::Image(_, _, _) + | Tag::Emphasis + | Tag::Table(_) + | Tag::TableHead + | Tag::TableRow + | Tag::TableCell + | Tag::Strong + | Tag::Strikethrough + | Tag::BlockQuote + | Tag::CodeBlock(_) => { + let e = stack.pop(); + stack.append_child(Content::Elt(e)); + } + Tag::FootnoteDefinition(_) => unreachable!("{:?}", tag), + }, + Event::Text(s) => stack.append_str(s.as_ref()), + Event::Code(s) => { + let mut code = Element::new(ElementTag::Code); + code.push_child(Content::Text(s.to_string())); + stack.append_element(code); + } + Event::Html(s) => stack.append_child(Content::Html(s.to_string())), + Event::FootnoteReference(s) => trace!("footnote ref {:?}", s), + Event::SoftBreak => stack.append_str("\n"), + Event::HardBreak => stack.append_element(Element::new(ElementTag::Br)), + Event::Rule => stack.append_element(Element::new(ElementTag::Hr)), + Event::TaskListMarker(done) => { + let marker = if done { + "\u{2612} " // Unicode for box with X + } else { + "\u{2610} " // Unicode for empty box + }; + stack.append_str(marker); + } + } + } + + let mut body = stack.pop(); + assert!(stack.is_empty()); + body.fix_up_img_alt(); + Ok(body) +} + +fn as_plain_text(content: &[Content]) -> String { + let mut buf = String::new(); + for c in content { + if let Content::Text(s) = c { + buf.push_str(s); + } + } + buf +} + +/// An HTML element. +#[derive(Debug, Clone)] +pub struct Element { + loc: Option<Location>, + tag: ElementTag, + attrs: Vec<Attribute>, + children: Vec<Content>, +} + +impl Element { + /// Create a new element. + pub fn new(tag: ElementTag) -> Self { + Self { + loc: None, + tag, + attrs: vec![], + children: vec![], + } + } + + fn with_location(mut self, loc: Location) -> Self { + self.loc = Some(loc); + self + } + + /// Set location. + pub fn set_location(&mut self, loc: Location) { + self.loc = Some(loc); + } + + /// Get location. + pub fn location(&self) -> &Option<Location> { + &self.loc + } + + fn set_block_attributes(&mut self, block_attrs: Vec<BlockAttr>) { + for block_attr in block_attrs { + let attr = Attribute::from(block_attr); + self.attrs.push(attr); + } + } + + /// Add a new attribute. + pub fn push_attribute(&mut self, attr: Attribute) { + self.attrs.push(attr); + } + + /// Append a new child to the element. + pub fn push_child(&mut self, child: Content) { + self.children.push(child); + } + + /// Return an element's tag. + pub fn tag(&self) -> ElementTag { + self.tag + } + + /// All attributes. + pub fn all_attrs(&self) -> &[Attribute] { + &self.attrs + } + + /// Return value of a named attribute, if any. + pub fn attr(&self, name: &str) -> Option<&Attribute> { + self.attrs.iter().find(|a| a.name() == name) + } + + /// Has an attribute with a specific value? + pub fn has_attr(&self, name: &str, wanted: &str) -> bool { + self.attrs + .iter() + .filter(|a| a.name() == name && a.value() == Some(wanted)) + .count() + > 0 + } + + /// Return the concatenated text content of direct children, + /// ignoring any elements. + pub fn content(&self) -> String { + let mut buf = String::new(); + for child in self.children() { + if let Content::Text(s) = child { + buf.push_str(s) + } + } + buf + } + + /// Return all the children of an element. + pub fn children(&self) -> &[Content] { + &self.children + } + + fn fix_up_img_alt(&mut self) { + if self.tag == ElementTag::Img { + let alt = as_plain_text(self.children()); + self.push_attribute(Attribute::new("alt", &alt)); + self.children.clear(); + } else { + for child in self.children.iter_mut() { + if let Content::Elt(kid) = child { + kid.fix_up_img_alt(); + } + } + } + } + + /// Serialize an element into HTML text. + pub fn serialize(&self) -> Result<String, HtmlError> { + let mut buf = String::new(); + self.serialize_to_buf_without_added_newlines(&mut buf) + .map_err(HtmlError::Format)?; + Ok(buf) + } + + fn serialize_to_buf_without_added_newlines( + &self, + buf: &mut String, + ) -> Result<(), std::fmt::Error> { + if self.children.is_empty() { + write!(buf, "<{}", self.tag.name())?; + self.serialize_attrs_to_buf(buf)?; + write!(buf, "/>")?; + } else { + write!(buf, "<{}", self.tag.name())?; + self.serialize_attrs_to_buf(buf)?; + write!(buf, ">")?; + for c in self.children() { + match c { + Content::Text(s) => buf.push_str(&encode_text(s)), + Content::Elt(e) => e.serialize_to_buf_adding_block_newline(buf)?, + Content::Html(s) => buf.push_str(s), + } + } + write!(buf, "</{}>", self.tag.name())?; + } + Ok(()) + } + + fn serialize_to_buf_adding_block_newline( + &self, + buf: &mut String, + ) -> Result<(), std::fmt::Error> { + if self.tag.is_block() { + writeln!(buf)?; + } + self.serialize_to_buf_without_added_newlines(buf) + } + + fn serialize_attrs_to_buf(&self, buf: &mut String) -> Result<(), std::fmt::Error> { + for attr in self.attrs.iter() { + write!(buf, " {}", attr.name())?; + if let Some(value) = attr.value() { + write!(buf, "=\"{}\"", encode_double_quoted_attribute(value))?; + } + } + Ok(()) + } +} + +/// The tag of an HTML element. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +#[allow(missing_docs)] +pub enum ElementTag { + Html, + Head, + Meta, + Body, + Div, + H1, + H2, + H3, + H4, + H5, + H6, + P, + Ol, + Ul, + Li, + Blockquote, + Pre, + Em, + Strong, + Del, + A, + Img, + Table, + Title, + Th, + Tr, + Td, + Br, + Hr, + Code, +} + +impl ElementTag { + /// Name of the tag. + pub fn name(&self) -> &str { + match self { + Self::Html => "html", + Self::Head => "head", + Self::Meta => "meta", + Self::Body => "body", + Self::Div => "div", + Self::H1 => "h1", + Self::H2 => "h2", + Self::H3 => "h3", + Self::H4 => "h4", + Self::H5 => "h5", + Self::H6 => "h6", + Self::P => "p", + Self::Ol => "ol", + Self::Ul => "ul", + Self::Li => "li", + Self::Blockquote => "blockquote", + Self::Pre => "pre", + Self::Em => "em", + Self::Strong => "strong", + Self::Del => "del", + Self::A => "a", + Self::Img => "img", + Self::Table => "table", + Self::Th => "th", + Self::Title => "title", + Self::Tr => "tr", + Self::Td => "td", + Self::Br => "br", + Self::Hr => "hr", + Self::Code => "code", + } + } + + fn is_block(&self) -> bool { + matches!( + self, + Self::Html + | Self::Head + | Self::Meta + | Self::Body + | Self::Div + | Self::H1 + | Self::H2 + | Self::H3 + | Self::H4 + | Self::H5 + | Self::H6 + | Self::P + | Self::Ol + | Self::Ul + | Self::Li + | Self::Blockquote + | Self::Table + | Self::Th + | Self::Tr + | Self::Br + | Self::Hr + ) + } +} + +/// An attribute of an HTML element. +#[derive(Clone, Debug)] +pub struct Attribute { + name: String, + value: Option<String>, +} + +impl Attribute { + /// Create a new element attribute. + pub fn new(name: &str, value: &str) -> Self { + Self { + name: name.into(), + value: Some(value.into()), + } + } + + /// Return the name of the attribute. + pub fn name(&self) -> &str { + &self.name + } + + /// Return the value of the attribute, if any. + pub fn value(&self) -> Option<&str> { + self.value.as_deref() + } +} + +impl From<BlockAttr> for Attribute { + fn from(block_attr: BlockAttr) -> Self { + match block_attr { + BlockAttr::Id(v) => Self::new("id", &v), + BlockAttr::Class(v) => Self::new("class", &v), + BlockAttr::KeyValue(k, v) => Self::new(&k, &v), + } + } +} + +/// Content in HTML. +#[derive(Clone, Debug)] +pub enum Content { + /// Arbitrary text. + Text(String), + + /// An HTML element. + Elt(Element), + + /// Arbitrary HTML text. + Html(String), +} + +/// Location of element in source file. +#[derive(Debug, Clone, Copy)] +pub struct Location { + line: usize, + col: usize, +} + +impl Location { + fn new(line: usize, col: usize) -> Self { + Self { line, col } + } +} + +struct Stack { + stack: Vec<Element>, +} + +impl Stack { + fn new() -> Self { + Self { stack: vec![] } + } + + fn is_empty(&self) -> bool { + self.stack.is_empty() + } + + fn push(&mut self, e: Element) { + trace!("pushed {:?}", e); + self.stack.push(e); + } + + fn push_tag(&mut self, tag: ElementTag, loc: Location) { + self.push(Element::new(tag).with_location(loc)); + } + + fn pop(&mut self) -> Element { + let e = self.stack.pop().unwrap(); + trace!("popped {:?}", e); + e + } + + fn append_child(&mut self, child: Content) { + trace!("appended {:?}", child); + let mut parent = self.stack.pop().unwrap(); + parent.push_child(child); + self.stack.push(parent); + } + + fn append_str(&mut self, text: &str) { + self.append_child(Content::Text(text.into())); + } + + fn append_element(&mut self, e: Element) { + self.append_child(Content::Elt(e)); + } +} + +/// Errors from the `html` module. +#[derive(Debug, thiserror::Error)] +pub enum HtmlError { + /// Failed to create a directory. + #[error("failed to create directory {0}")] + CreateDir(PathBuf, #[source] std::io::Error), + + /// Failed to create a file. + #[error("failed to create file {0}")] + CreateFile(PathBuf, #[source] std::io::Error), + + /// Failed to write to a file. + #[error("failed to write to file {0}")] + FileWrite(PathBuf, #[source] std::io::Error), + + /// Input contains an attempt to use a definition list in + /// Markdown. + #[error("attempt to use definition lists in Markdown: line {0}, column {1}")] + DefinitionList(usize, usize), + + /// String formatting error. This is likely a programming error. + #[error("string formatting error: {0}")] + Format(#[source] std::fmt::Error), +} + +/// Code block attribute. +#[derive(Debug, Clone, Eq, PartialEq)] +pub enum BlockAttr { + /// An identifier. + Id(String), + /// A class. + Class(String), + /// A key/value pair. + KeyValue(String, String), +} + +impl BlockAttr { + fn id(s: &str) -> Self { + Self::Id(s.into()) + } + + fn class(s: &str) -> Self { + Self::Class(s.into()) + } + + fn key_value(k: &str, v: &str) -> Self { + Self::KeyValue(k.into(), v.into()) + } + + /// Parse a fenced code block tag. + pub fn parse(attrs: &str) -> Vec<Self> { + let mut result = vec![]; + for word in Self::parse_words(attrs) { + let attr = Self::parse_word(word); + result.push(attr); + } + result + } + + fn parse_words(attrs: &str) -> impl Iterator<Item = &str> { + if attrs.starts_with('{') && attrs.ends_with('}') { + attrs[1..attrs.len() - 1].split_ascii_whitespace() + } else { + attrs.split_ascii_whitespace() + } + } + + fn parse_word(word: &str) -> Self { + if let Some(id) = word.strip_prefix('#') { + Self::id(id) + } else if let Some(class) = word.strip_prefix('.') { + Self::class(class) + } else if let Some((key, value)) = word.split_once('=') { + Self::key_value(key, value) + } else { + Self::class(word) + } + } +} + +#[cfg(test)] +mod test_block_attr { + use super::BlockAttr; + + #[test] + fn empty_string() { + assert_eq!(BlockAttr::parse(""), vec![]); + } + + #[test] + fn plain_word() { + assert_eq!( + BlockAttr::parse("foo"), + vec![BlockAttr::Class("foo".into())] + ); + } + + #[test] + fn dot_word() { + assert_eq!( + BlockAttr::parse(".foo"), + vec![BlockAttr::Class("foo".into())] + ); + } + + #[test] + fn hash_word() { + assert_eq!(BlockAttr::parse("#foo"), vec![BlockAttr::Id("foo".into())]); + } + + #[test] + fn key_value() { + assert_eq!( + BlockAttr::parse("foo=bar"), + vec![BlockAttr::KeyValue("foo".into(), "bar".into())] + ); + } + + #[test] + fn several() { + assert_eq!( + BlockAttr::parse("{#foo .bar foobar yo=yoyo}"), + vec![ + BlockAttr::Id("foo".into()), + BlockAttr::Class("bar".into()), + BlockAttr::Class("foobar".into()), + BlockAttr::KeyValue("yo".into(), "yoyo".into()), + ] + ); + } +} @@ -6,13 +6,6 @@ #![deny(missing_docs)] -// Handle the multiple pandoc_ast versions - -#[cfg(feature = "pandoc_ast_07")] -extern crate pandoc_ast_07 as pandoc_ast; -#[cfg(feature = "pandoc_ast_08")] -extern crate pandoc_ast_08 as pandoc_ast; - mod error; pub use error::SubplotError; pub use error::Warning; @@ -34,6 +27,7 @@ mod metadata; pub use metadata::{Metadata, YamlMetadata}; mod doc; +pub mod html; pub mod md; pub use doc::Document; pub use doc::{codegen, load_document, load_document_with_pullmark}; @@ -1,26 +1,19 @@ //! A parsed Markdown document. use crate::{ - parse_scenario_snippet, Bindings, EmbeddedFiles, Scenario, ScenarioStep, Style, SubplotError, - Warnings, YamlMetadata, + html::{parse, Attribute, Content, Element, ElementTag}, + parse_scenario_snippet, Bindings, EmbeddedFile, EmbeddedFiles, Scenario, ScenarioStep, Style, + SubplotError, Warnings, YamlMetadata, }; use log::trace; -use pandoc_ast::{Map, MetaValue, MutVisitor, Pandoc}; -use serde_yaml::{Mapping, Value}; -use std::cell::RefCell; use std::collections::HashSet; use std::path::{Path, PathBuf}; -mod panhelper; -mod typeset; - -mod visitor; -use visitor::LintingVisitor; - /// A parsed Markdown document. #[derive(Debug)] pub struct Markdown { - pandoc: RefCell<Pandoc>, + html: Element, + meta: Option<YamlMetadata>, } impl Markdown { @@ -34,96 +27,143 @@ impl Markdown { } fn new_from_str(text: &str) -> Result<Self, SubplotError> { - let mut pandoc = pandoc::new(); - pandoc.set_input(pandoc::InputKind::Pipe(text.into())); - pandoc.set_input_format( - pandoc::InputFormat::Markdown, - vec![pandoc::MarkdownExtension::Citations], - ); - pandoc.set_output_format(pandoc::OutputFormat::Json, vec![]); - pandoc.set_output(pandoc::OutputKind::Pipe); + let html = parse(text)?; + Ok(Self::new(html)) + } - // Add external Pandoc filters. - crate::policy::add_citeproc(&mut pandoc); + fn new(html: Element) -> Self { + Self { html, meta: None } + } - let json = match pandoc.execute().map_err(SubplotError::Pandoc)? { - pandoc::PandocOutput::ToBuffer(o) => o, - _ => return Err(SubplotError::NotJson), - }; + /// Set document metadata from subplot. + pub fn set_metadata(&mut self, meta: &YamlMetadata) { + self.meta = Some(meta.clone()); + } - let ast: Pandoc = serde_json::from_str(&json).map_err(SubplotError::AstJson)?; - Ok(Self::new(ast)) + /// Set date. + pub fn set_date(&mut self, date: String) { + if let Some(meta) = &mut self.meta { + meta.set_date(date); + } } - fn new(pandoc: Pandoc) -> Self { - Self { - pandoc: RefCell::new(pandoc), + /// Return parsed HTML of the markdown. + pub fn to_html(&self) -> Element { + if let Some(meta) = &self.meta { + let mut div = Element::new(ElementTag::Div); + div.push_child(Content::Elt(Self::title(meta.title()))); + if let Some(authors) = meta.authors() { + div.push_child(Content::Elt(Self::authors(authors))); + } + if let Some(date) = meta.date() { + div.push_child(Content::Elt(Self::date(date))); + } + div.push_child(Content::Elt(self.html.clone())); + div + } else { + self.html.clone() } } - /// Set document metadata from subplot. - pub fn set_metadata(&mut self, meta: &YamlMetadata) { - self.pandoc.borrow_mut().meta = to_pandoc_meta(meta); + fn title(title: &str) -> Element { + let mut e = Element::new(ElementTag::H1); + e.push_child(Content::Text(title.into())); + e } - /// JSON representation of Pandoc AST. - pub fn to_json(&self) -> Result<String, SubplotError> { - let json = serde_json::to_string(&self.pandoc).map_err(SubplotError::AstJson)?; - Ok(json) + fn authors(authors: &[String]) -> Element { + let mut list = Element::new(ElementTag::P); + list.push_child(Content::Text("By: ".into())); + let mut first = true; + for a in authors { + if !first { + list.push_child(Content::Text(", ".into())); + } + list.push_child(Content::Text(a.into())); + first = false; + } + list } - /// Find problems. - pub fn lint(&self) -> Vec<SubplotError> { - let mut linter = LintingVisitor::default(); - linter.walk_pandoc(&mut self.pandoc.borrow_mut()); - linter.issues + fn date(date: &str) -> Element { + let mut e = Element::new(ElementTag::P); + e.push_child(Content::Text(date.into())); + e } /// Find included images. pub fn images(&self) -> Vec<PathBuf> { let mut names = vec![]; - let mut visitor = visitor::ImageVisitor::new(); - visitor.walk_pandoc(&mut self.pandoc.borrow_mut()); - for x in visitor.images().iter() { - names.push(x.to_path_buf()); + for e in Self::visit(&self.html) { + if e.tag() == ElementTag::Img { + if let Some(attr) = e.attr("src") { + if let Some(href) = attr.value() { + names.push(PathBuf::from(&href)); + } + } + } } names } + fn visit(e: &Element) -> Vec<&Element> { + let mut elements = vec![]; + Self::visit_helper(e, &mut elements); + elements + } + + fn visit_helper<'a>(e: &'a Element, elements: &mut Vec<&'a Element>) { + elements.push(e); + for child in e.children() { + if let Content::Elt(ee) = child { + Self::visit_helper(ee, elements); + } + } + } + /// Find classes used for fenced blocks. pub fn block_classes(&self) -> HashSet<String> { - let mut visitor = visitor::BlockClassVisitor::default(); - visitor.walk_pandoc(&mut self.pandoc.borrow_mut()); - visitor.classes + let mut classes: HashSet<String> = HashSet::new(); + + for e in Self::visit(&self.html) { + if e.tag() == ElementTag::Pre { + if let Some(attr) = e.attr("class") { + if let Some(value) = attr.value() { + classes.insert(value.into()); + } + } + } + } + + classes } /// Typeset. - pub fn typeset(&mut self, style: Style, bindings: &Bindings) -> Warnings { - let mut visitor = visitor::TypesettingVisitor::new(style, bindings); - visitor.walk_pandoc(&mut self.pandoc.borrow_mut()); - visitor.warnings() + pub fn typeset(&mut self, _style: Style, _bindings: &Bindings) -> Warnings { + let result = typeset::typeset_element(&self.html); + if let Ok(html) = result { + self.html = html; + Warnings::default() + } else { + // FIXME: handle warnings in some way + Warnings::default() + } } /// Find scenarios. pub fn scenarios(&self) -> Result<Vec<Scenario>, SubplotError> { - trace!( - "Metadata::scenarios: looking for scenarios: {:#?}", - self.pandoc - ); - - let mut visitor = visitor::StructureVisitor::new(); - visitor.walk_pandoc(&mut self.pandoc.borrow_mut()); - trace!( - "Metadata::scenarios: visitor found {} elements: {:#?}", - visitor.elements.len(), - visitor.elements - ); + let mut elements = vec![]; + for e in Self::visit(&self.html) { + if let Some(se) = Self::is_structure_element(e) { + elements.push(se); + } + } let mut scenarios: Vec<Scenario> = vec![]; let mut i = 0; - while i < visitor.elements.len() { - let (maybe, new_i) = extract_scenario(&visitor.elements[i..])?; + while i < elements.len() { + let (maybe, new_i) = extract_scenario(&elements[i..])?; if let Some(scen) = maybe { scenarios.push(scen); } @@ -133,128 +173,151 @@ impl Markdown { Ok(scenarios) } + fn is_structure_element(e: &Element) -> Option<StructureElement> { + match e.tag() { + ElementTag::H1 => Some(StructureElement::heading(e, 1)), + ElementTag::H2 => Some(StructureElement::heading(e, 2)), + ElementTag::H3 => Some(StructureElement::heading(e, 3)), + ElementTag::H4 => Some(StructureElement::heading(e, 4)), + ElementTag::H5 => Some(StructureElement::heading(e, 5)), + ElementTag::H6 => Some(StructureElement::heading(e, 6)), + ElementTag::Pre => { + if e.has_attr("class", "scenario") { + Some(StructureElement::snippet(e)) + } else { + None + } + } + _ => None, + } + } + /// Find embedded files. + // FIXME: this should return a result pub fn embedded_files(&self) -> EmbeddedFiles { let mut files = EmbeddedFiles::default(); - files.walk_pandoc(&mut self.pandoc.borrow_mut()); + + for e in Self::visit(&self.html) { + if let Ok(file) = embedded_file(e) { + files.push(file); + } + } + files } } -fn to_pandoc_meta(yaml: &YamlMetadata) -> Map<String, MetaValue> { - trace!("Creating metadata map from parsed YAML: {:#?}", yaml); - - let mut map: Map<String, MetaValue> = Map::new(); +// A structure element in the document: a heading or a scenario snippet. +#[derive(Debug)] +enum StructureElement { + // Headings consist of the text and the level of the heading. + Heading(String, i64), - map.insert("title".into(), meta_string(yaml.title())); + // Scenario snippets consist just of the unparsed text. + Snippet(String), +} - if let Some(v) = &yaml.subtitle() { - map.insert("subtitle".into(), meta_string(v)); +impl StructureElement { + fn heading(e: &Element, level: i64) -> Self { + Self::Heading(e.content(), level) } - if let Some(authors) = yaml.authors() { - let authors: Vec<MetaValue> = authors - .iter() - .map(|s| MetaValue::MetaString(s.into())) - .collect(); - map.insert("author".into(), MetaValue::MetaList(authors)); + fn snippet(e: &Element) -> Self { + Self::Snippet(e.content()) } +} - if let Some(v) = yaml.date() { - map.insert("date".into(), meta_string(v)); +fn embedded_file(e: &Element) -> Result<EmbeddedFile, MdError> { + if e.tag() != ElementTag::Pre { + return Err(MdError::NotCodeBlockElement(e.tag().name().to_string())); } - if let Some(classes) = yaml.classes() { - map.insert("classes".into(), meta_strings(classes)); + if !e.has_attr("class", "file") { + return Err(MdError::NotFile); } - if !yaml.impls().is_empty() { - let impls = yaml - .impls() - .iter() - .map(|(k, v)| (k.to_owned(), Box::new(meta_path_bufs(v)))) - .collect(); - map.insert("impls".into(), MetaValue::MetaMap(impls)); + let id = e.attr("id"); + if id.is_none() { + return Err(MdError::NoId); } - - if let Some(v) = yaml.bibliographies() { - map.insert("bibliography".into(), meta_path_bufs(v)); + let id = id.unwrap(); + if id.value().is_none() { + return Err(MdError::NoIdValue); } - - if let Some(v) = yaml.bindings_filenames() { - map.insert("bindings".into(), meta_path_bufs(v)); + let id = id.value().unwrap(); + if id.is_empty() { + return Err(MdError::NoIdValue); } - if let Some(v) = yaml.documentclass() { - map.insert("documentclass".into(), meta_string(v)); + // The contents we get from the pulldown_cmark parser for a code + // block will always end in a newline, unless the block is empty. + // This is different from the parser we previously used, which + // didn't end in a newline, if the contents is exactly one line. + // The add-newline attribute was designed for the previous parser + // behavior, and so its interpretations for new new parser is a + // little less straightforward. To avoid convoluted logic, we + // remove the newline if it's there before obeying add-newline. + let mut contents = e.content(); + if contents.ends_with('\n') { + contents.truncate(contents.len() - 1); } - - if let Some(pandoc) = yaml.pandoc() { - for (key, value) in pandoc.iter() { - map.insert(key.to_string(), value_to_pandoc(value)); + match AddNewline::parse(e.attr("add-newline"))? { + AddNewline::No => { + // Newline already isn't there. } - } + AddNewline::Yes => { + // Add newline. + contents.push('\n'); + } + AddNewline::Auto => { + // Add newline if not there. + if !contents.ends_with('\n') { + contents.push('\n'); + } + } + }; - trace!("Created metadata map from parsed YAML"); - map + Ok(EmbeddedFile::new(id.into(), contents)) } -fn mapping_to_pandoc(mapping: &Mapping) -> MetaValue { - let mut map = Map::new(); - for (key, value) in mapping.iter() { - let key = if let MetaValue::MetaString(s) = value_to_pandoc(key) { - s - } else { - panic!("key not a string: {:?}", key); - }; - map.insert(key, Box::new(value_to_pandoc(value))); - } - - MetaValue::MetaMap(map) +#[derive(Debug, Eq, PartialEq, Copy, Clone)] +enum AddNewline { + Auto, + Yes, + No, } -fn value_to_pandoc(data: &Value) -> MetaValue { - match data { - Value::Null => unreachable!("null not OK"), - Value::Number(_) => unreachable!("number not OK"), - Value::Sequence(_) => unreachable!("sequence not OK"), - - Value::Bool(b) => MetaValue::MetaBool(*b), - Value::String(s) => MetaValue::MetaString(s.clone()), - Value::Mapping(mapping) => mapping_to_pandoc(mapping), +impl AddNewline { + fn parse(attr: Option<&Attribute>) -> Result<Self, MdError> { + if let Some(attr) = attr { + if let Some(value) = attr.value() { + let value = match value { + "yes" => Self::Yes, + "no" => Self::No, + "auto" => Self::Auto, + _ => return Err(MdError::BadAddNewline(value.into())), + }; + return Ok(value); + } + }; + Ok(Self::Auto) } } -fn meta_string(s: &str) -> MetaValue { - MetaValue::MetaString(s.to_string()) -} - -fn meta_strings(v: &[String]) -> MetaValue { - MetaValue::MetaList(v.iter().map(|s| meta_string(s)).collect()) -} - -fn meta_path_buf(p: &Path) -> MetaValue { - meta_string(&p.display().to_string()) -} - -fn meta_path_bufs(v: &[PathBuf]) -> MetaValue { - MetaValue::MetaList(v.iter().map(|p| meta_path_buf(p)).collect()) -} - -fn extract_scenario(e: &[visitor::Element]) -> Result<(Option<Scenario>, usize), SubplotError> { +fn extract_scenario(e: &[StructureElement]) -> Result<(Option<Scenario>, usize), SubplotError> { if e.is_empty() { // If we get here, it's a programming error. panic!("didn't expect empty list of elements"); } match &e[0] { - visitor::Element::Snippet(_) => Err(SubplotError::ScenarioBeforeHeading), - visitor::Element::Heading(title, level) => { + StructureElement::Snippet(_) => Err(SubplotError::ScenarioBeforeHeading), + StructureElement::Heading(title, level) => { let mut scen = Scenario::new(title); let mut prevkind = None; for (i, item) in e.iter().enumerate().skip(1) { match item { - visitor::Element::Heading(_, level2) => { + StructureElement::Heading(_, level2) => { let is_subsection = *level2 > *level; if is_subsection { if scen.has_steps() { @@ -267,7 +330,7 @@ fn extract_scenario(e: &[visitor::Element]) -> Result<(Option<Scenario>, usize), return Ok((None, i)); } } - visitor::Element::Snippet(text) => { + StructureElement::Snippet(text) => { for line in parse_scenario_snippet(text) { let step = ScenarioStep::new_from_str(line, prevkind)?; scen.add(&step); @@ -285,19 +348,148 @@ fn extract_scenario(e: &[visitor::Element]) -> Result<(Option<Scenario>, usize), } } +mod typeset { + + use crate::html::{Attribute, Content, Element, ElementTag}; + // use crate::parser::parse_scenario_snippet; + // use crate::Bindings; + // use crate::PartialStep; + // use crate::ScenarioStep; + // use crate::StepKind; + use crate::SubplotError; + use crate::{DiagramMarkup, DotMarkup, PikchrMarkup, PlantumlMarkup, Svg}; + // use crate::{Warning, Warnings}; + + pub(crate) fn typeset_element(e: &Element) -> Result<Element, SubplotError> { + match e.tag() { + ElementTag::Pre if e.has_attr("class", "scenario") => typeset_scenario(e), + ElementTag::Pre if e.has_attr("class", "file") => typeset_file(e), + ElementTag::Pre if e.has_attr("class", "example") => typeset_example(e), + ElementTag::Pre if e.has_attr("class", "dot") => typeset_dot(e), + ElementTag::Pre if e.has_attr("class", "plantuml") => typeset_plantuml(e), + ElementTag::Pre if e.has_attr("class", "roadmap") => typeset_roadmap(e), + ElementTag::Pre if e.has_attr("class", "pikchr") => typeset_pikchr(e), + _ => { + let mut new = Element::new(e.tag()); + for attr in e.all_attrs() { + new.push_attribute(attr.clone()); + } + for child in e.children() { + if let Content::Elt(ce) = child { + new.push_child(Content::Elt(typeset_element(ce)?)); + } else { + new.push_child(child.clone()); + } + } + Ok(new) + } + } + } + + fn typeset_scenario(e: &Element) -> Result<Element, SubplotError> { + Ok(e.clone()) // FIXME + } + + fn typeset_file(e: &Element) -> Result<Element, SubplotError> { + Ok(e.clone()) // FIXME + } + + fn typeset_example(e: &Element) -> Result<Element, SubplotError> { + Ok(e.clone()) // FIXME + } + + fn typeset_dot(e: &Element) -> Result<Element, SubplotError> { + let dot = e.content(); + let svg = DotMarkup::new(&dot).as_svg()?; + Ok(svg_to_element(svg)) + } + + fn typeset_plantuml(e: &Element) -> Result<Element, SubplotError> { + let markup = e.content(); + let svg = PlantumlMarkup::new(&markup).as_svg()?; + Ok(svg_to_element(svg)) + } + + fn typeset_pikchr(e: &Element) -> Result<Element, SubplotError> { + let markup = e.content(); + // FIXME: is there ever a need to use classes other than .pikchr? + let svg = PikchrMarkup::new(&markup, None).as_svg()?; + Ok(svg_to_element(svg)) + } + + fn typeset_roadmap(e: &Element) -> Result<Element, SubplotError> { + const WIDTH: usize = 50; + + let yaml = e.content(); + let roadmap = roadmap::from_yaml(&yaml)?; + let dot = roadmap.format_as_dot(WIDTH)?; + let svg = DotMarkup::new(&dot).as_svg()?; + Ok(svg_to_element(svg)) + } + + fn svg_to_element(svg: Svg) -> Element { + let url = svg_as_data_url(svg); + let img = html_img(&url); + html_p(vec![Content::Elt(img)]) + } + + fn svg_as_data_url(svg: Svg) -> String { + let svg = base64::encode(svg.data()); + format!("data:image/svg+xml;base64,{svg}") + } + + fn html_p(children: Vec<Content>) -> Element { + let mut new = Element::new(ElementTag::P); + for child in children { + new.push_child(child); + } + new + } + + fn html_img(src: &str) -> Element { + let mut new = Element::new(ElementTag::Img); + new.push_attribute(Attribute::new("src", src)); + new + } +} + +/// Errors returned from the module. +#[derive(Debug, thiserror::Error, Eq, PartialEq)] +pub enum MdError { + /// Trie to treat a non-PRE element as an embedded file. + #[error("tried to treat wrong element as an embedded file: {0}")] + NotCodeBlockElement(String), + + /// Code block lacks the "file" attribute. + #[error("code block is not a file")] + NotFile, + + /// Code block lacks an identifile to use as th filename. + #[error("code block lacks a filename identifier")] + NoId, + + /// Identifier is empty. + #[error("code block has an empty filename identifier")] + NoIdValue, + + /// Value ofv add-newline attribute ie not understood. + #[error("value of add-newline attirubte is not understood: {0}")] + BadAddNewline(String), +} + #[cfg(test)] mod test_extract { use super::extract_scenario; - use super::visitor::Element; + use super::StructureElement; use crate::Scenario; use crate::SubplotError; - fn h(title: &str, level: i64) -> Element { - Element::Heading(title.to_string(), level) + fn h(title: &str, level: i64) -> StructureElement { + StructureElement::Heading(title.to_string(), level) } - fn s(text: &str) -> Element { - Element::Snippet(text.to_string()) + fn s(text: &str) -> StructureElement { + StructureElement::Snippet(text.to_string()) } fn check_result( @@ -319,7 +511,7 @@ mod test_extract { #[test] fn returns_nothing_if_there_is_no_scenario() { - let elements: Vec<Element> = vec![h("title", 1)]; + let elements: Vec<StructureElement> = vec![h("title", 1)]; let r = extract_scenario(&elements); check_result(r, None, 1); } @@ -396,14 +588,13 @@ mod test_extract { #[cfg(test)] mod test { - use super::Markdown; + use super::{AddNewline, Attribute, Markdown, MdError}; use std::path::PathBuf; #[test] fn loads_empty_doc() { let md = Markdown::new_from_str("").unwrap(); - let ast = md.pandoc.borrow(); - assert!(ast.blocks.is_empty()); + assert!(md.html.content().is_empty()); } #[test] @@ -507,4 +698,45 @@ hello, world assert_eq!(file.filename(), "fileid"); assert_eq!(file.contents(), "hello, world\n"); } + + #[test] + fn parses_no_auto_newline_as_auto() { + assert_eq!(AddNewline::parse(None).unwrap(), AddNewline::Auto); + } + + #[test] + fn parses_auto_as_auto() { + let attr = Attribute::new("add-newline", "auto"); + assert_eq!(AddNewline::parse(Some(&attr)).unwrap(), AddNewline::Auto); + } + + #[test] + fn parses_yes_as_yes() { + let attr = Attribute::new("add-newline", "yes"); + assert_eq!(AddNewline::parse(Some(&attr)).unwrap(), AddNewline::Yes); + } + + #[test] + fn parses_no_as_no() { + let attr = Attribute::new("add-newline", "no"); + assert_eq!(AddNewline::parse(Some(&attr)).unwrap(), AddNewline::No); + } + + #[test] + fn parses_empty_as_error() { + let attr = Attribute::new("add-newline", ""); + assert_eq!( + AddNewline::parse(Some(&attr)), + Err(MdError::BadAddNewline("".into())) + ); + } + + #[test] + fn parses_garbage_as_error() { + let attr = Attribute::new("add-newline", "garbage"); + assert_eq!( + AddNewline::parse(Some(&attr)), + Err(MdError::BadAddNewline("garbage".into())) + ); + } } diff --git a/src/md/panhelper.rs b/src/md/panhelper.rs deleted file mode 100644 index f7ab801..0000000 --- a/src/md/panhelper.rs +++ /dev/null @@ -1,26 +0,0 @@ -use pandoc_ast::Attr; - -/// Is a code block marked as being of a given type? -pub fn is_class(attr: &Attr, class: &str) -> bool { - let (_id, classes, _kvpairs) = attr; - classes.iter().any(|s| s == class) -} - -/// Utility function to find key/value pairs from an attribute -pub fn find_attr_kv<'a>(attr: &'a Attr, key: &'static str) -> impl Iterator<Item = &'a str> { - attr.2.iter().flat_map(move |(key_, value)| { - if key == key_ { - Some(value.as_ref()) - } else { - None - } - }) -} - -/// Get the filename for a fenced code block tagged .file. -/// -/// The filename is the first (and presumably only) identifier for the -/// block. -pub fn get_filename(attr: &Attr) -> String { - attr.0.to_string() -} diff --git a/src/md/typeset.rs b/src/md/typeset.rs deleted file mode 100644 index aa6528d..0000000 --- a/src/md/typeset.rs +++ /dev/null @@ -1,229 +0,0 @@ -use crate::parser::parse_scenario_snippet; -use crate::Bindings; -use crate::PartialStep; -use crate::ScenarioStep; -use crate::StepKind; -use crate::SubplotError; -use crate::{DiagramMarkup, DotMarkup, PikchrMarkup, PlantumlMarkup, Svg}; -use crate::{Warning, Warnings}; - -use pandoc_ast::Attr; -use pandoc_ast::Block; -use pandoc_ast::Inline; -use pandoc_ast::Target; - -/// Typeset an error as a Pandoc AST Block element. -pub fn error(err: SubplotError) -> Block { - let msg = format!("ERROR: {err}"); - Block::Para(error_msg(&msg)) -} - -/// Typeset an error message a vector of inlines. -pub fn error_msg(msg: &str) -> Vec<Inline> { - vec![Inline::Strong(vec![inlinestr(msg)])] -} - -/// Typeset a string as an inline element. -pub fn inlinestr(s: &str) -> Inline { - Inline::Str(String::from(s)) -} - -/// Typeset a code block tagged as a file. -pub fn file_block(attr: &Attr, text: &str) -> Block { - let filename = inlinestr(&attr.0); - let filename = Inline::Strong(vec![filename]); - let intro = Block::Para(vec![inlinestr("File:"), space(), filename]); - let mut cbattrs = attr.clone(); - if cbattrs.1.iter().any(|s| s == "noNumberLines") { - // If the block says "noNumberLines" we remove that class - cbattrs.1.retain(|s| s != "noNumberLines"); - } else if cbattrs.1.iter().all(|s| s != "numberLines") { - // Otherwise if it doesn't say numberLines we add that in. - cbattrs.1.push("numberLines".to_string()); - } - // If this was an `example`, convert that class to `file` - if cbattrs.1.iter().any(|s| s == "example") { - cbattrs.1.retain(|s| s != "example"); - cbattrs.1.push("file".into()); - } - let codeblock = Block::CodeBlock(cbattrs, text.to_string()); - let noattr = ("".to_string(), vec![], vec![]); - Block::Div(noattr, vec![intro, codeblock]) -} - -/// Typeset a scenario snippet as a Pandoc AST Block. -/// -/// Typesetting here means producing the Pandoc abstract syntax tree -/// nodes that result in the desired output, when Pandoc processes -/// them. -/// -/// The snippet is given as a text string, which is parsed. It need -/// not be a complete scenario, but it should consist of complete steps. -pub fn scenario_snippet(bindings: &Bindings, snippet: &str, warnings: &mut Warnings) -> Block { - let lines = parse_scenario_snippet(snippet); - let mut steps = vec![]; - let mut prevkind: Option<StepKind> = None; - - for line in lines { - let (this, thiskind) = step(bindings, line, prevkind, warnings); - steps.push(this); - prevkind = thiskind; - } - Block::LineBlock(steps) -} - -// Typeset a single scenario step as a sequence of Pandoc AST Inlines. -fn step( - bindings: &Bindings, - text: &str, - prevkind: Option<StepKind>, - warnings: &mut Warnings, -) -> (Vec<Inline>, Option<StepKind>) { - let step = ScenarioStep::new_from_str(text, prevkind); - if step.is_err() { - return ( - error_msg(&format!("Could not parse step: {text}")), - prevkind, - ); - } - let step = step.unwrap(); - - let m = match bindings.find("", &step) { - Ok(m) => m, - Err(e) => { - let w = Warning::UnknownBinding(format!("{e}")); - warnings.push(w.clone()); - return (error_msg(&format!("{w}")), prevkind); - } - }; - - let mut inlines = vec![keyword(&step, prevkind), space()]; - - for part in m.parts() { - match part { - PartialStep::UncapturedText(s) => inlines.push(uncaptured(s.text())), - PartialStep::CapturedText { text, .. } => inlines.push(captured(text)), - } - } - - (inlines, Some(step.kind())) -} - -// Typeset first word, which is assumed to be a keyword, of a scenario -// step. -fn keyword(step: &ScenarioStep, prevkind: Option<StepKind>) -> Inline { - let actual = inlinestr(&format!("{}", step.kind())); - let and = inlinestr("and"); - let keyword = if let Some(prevkind) = prevkind { - if prevkind == step.kind() { - and - } else { - actual - } - } else { - actual - }; - Inline::Emph(vec![keyword]) -} - -// Typeset a space between words. -fn space() -> Inline { - Inline::Space -} - -// Typeset an uncaptured part of a step. -fn uncaptured(s: &str) -> Inline { - inlinestr(s) -} - -// Typeset a captured part of a step. -fn captured(s: &str) -> Inline { - Inline::Strong(vec![inlinestr(s)]) -} - -/// Typeset a link as a note. -pub fn link_as_note(attr: Attr, text: Vec<Inline>, target: Target) -> Inline { - let (url, _) = target.clone(); - let url = Inline::Code(attr.clone(), url); - let link = Inline::Link(attr.clone(), vec![url], target); - let note = Inline::Note(vec![Block::Para(vec![link])]); - let mut text = text; - text.push(note); - Inline::Span(attr, text) -} - -/// Take a pikchr diagram, render it as SVG, and return an AST block element. -/// -/// The `Block` will contain the SVG data. This allows the diagram to -/// be rendered without referencing external entities. -/// -/// If the code block which contained the pikchr contains other classes, they -/// can be added to the SVG for use in later typesetting etc. -pub fn pikchr_to_block(pikchr: &str, class: Option<&str>, warnings: &mut Warnings) -> Block { - match PikchrMarkup::new(pikchr, class).as_svg() { - Ok(svg) => typeset_svg(svg), - Err(err) => { - warnings.push(Warning::Pikchr(format!("{err}"))); - error(err) - } - } -} - -// Take a dot diagram, render it as SVG, and return an AST Block -// element. The Block will contain the SVG data. This allows the -// diagram to be rendered without referending external entities. -pub fn dot_to_block(dot: &str, warnings: &mut Warnings) -> Block { - match DotMarkup::new(dot).as_svg() { - Ok(svg) => typeset_svg(svg), - Err(err) => { - warnings.push(Warning::Dot(format!("{err}"))); - error(err) - } - } -} - -// Take a PlantUML diagram, render it as SVG, and return an AST Block -// element. The Block will contain the SVG data. This allows the -// diagram to be rendered without referending external entities. -pub fn plantuml_to_block(markup: &str, warnings: &mut Warnings) -> Block { - match PlantumlMarkup::new(markup).as_svg() { - Ok(svg) => typeset_svg(svg), - Err(err) => { - warnings.push(Warning::Plantuml(format!("{err}"))); - error(err) - } - } -} - -/// Typeset a project roadmap expressed as textual YAML, and render it -/// as an SVG image. -pub fn roadmap_to_block(yaml: &str, warnings: &mut Warnings) -> Block { - match roadmap::from_yaml(yaml) { - Ok(ref mut roadmap) => { - roadmap.set_missing_statuses(); - let width = 50; - match roadmap.format_as_dot(width) { - Ok(dot) => dot_to_block(&dot, warnings), - Err(e) => Block::Para(vec![inlinestr(&e.to_string())]), - } - } - Err(e) => Block::Para(vec![inlinestr(&e.to_string())]), - } -} - -// Typeset an SVG, represented as a byte vector, as a Pandoc AST Block -// element. -fn typeset_svg(svg: Svg) -> Block { - let url = svg_as_data_url(svg); - let attr = ("".to_string(), vec![], vec![]); - let img = Inline::Image(attr, vec![], (url, "".to_string())); - Block::Para(vec![img]) -} - -// Convert an SVG, represented as a byte vector, into a data: URL, -// which can be inlined so the image can be rendered without -// referencing external files. -fn svg_as_data_url(svg: Svg) -> String { - let svg = base64::encode(svg.data()); - format!("data:image/svg+xml;base64,{svg}") -} diff --git a/src/md/visitor/block_class.rs b/src/md/visitor/block_class.rs deleted file mode 100644 index 303616b..0000000 --- a/src/md/visitor/block_class.rs +++ /dev/null @@ -1,25 +0,0 @@ -use std::collections::HashSet; - -use pandoc_ast::{Block, MutVisitor}; - -#[derive(Default)] -pub struct BlockClassVisitor { - pub classes: HashSet<String>, -} - -impl MutVisitor for BlockClassVisitor { - fn visit_vec_block(&mut self, vec_block: &mut Vec<Block>) { - for block in vec_block { - match block { - Block::CodeBlock(attr, _) => { - for class in &attr.1 { - self.classes.insert(class.to_string()); - } - } - _ => { - self.visit_block(block); - } - } - } - } -} diff --git a/src/md/visitor/embedded.rs b/src/md/visitor/embedded.rs deleted file mode 100644 index 68a4118..0000000 --- a/src/md/visitor/embedded.rs +++ /dev/null @@ -1,35 +0,0 @@ -use crate::md::panhelper; -use crate::EmbeddedFile; -use crate::EmbeddedFiles; - -use pandoc_ast::{Block, MutVisitor}; - -impl MutVisitor for EmbeddedFiles { - fn visit_vec_block(&mut self, vec_block: &mut Vec<Block>) { - use panhelper::is_class; - for block in vec_block { - match block { - Block::CodeBlock(attr, contents) => { - if is_class(attr, "file") { - let add_newline = match panhelper::find_attr_kv(attr, "add-newline").next() - { - None | Some("auto") => !contents.ends_with('\n'), - Some("yes") => true, - Some("no") => false, - _ => unreachable!(), - }; - let contents = if add_newline { - format!("{contents}\n") - } else { - contents.clone() - }; - self.push(EmbeddedFile::new(panhelper::get_filename(attr), contents)); - } - } - _ => { - self.visit_block(block); - } - } - } - } -} diff --git a/src/md/visitor/image.rs b/src/md/visitor/image.rs deleted file mode 100644 index be49d66..0000000 --- a/src/md/visitor/image.rs +++ /dev/null @@ -1,25 +0,0 @@ -use std::path::PathBuf; - -use pandoc_ast::{Inline, MutVisitor}; - -pub struct ImageVisitor { - images: Vec<PathBuf>, -} - -impl ImageVisitor { - pub fn new() -> Self { - ImageVisitor { images: vec![] } - } - - pub fn images(&self) -> Vec<PathBuf> { - self.images.clone() - } -} - -impl MutVisitor for ImageVisitor { - fn visit_inline(&mut self, inline: &mut Inline) { - if let Inline::Image(_attr, _inlines, target) = inline { - self.images.push(PathBuf::from(&target.0)); - } - } -} diff --git a/src/md/visitor/linting.rs b/src/md/visitor/linting.rs deleted file mode 100644 index d64b03e..0000000 --- a/src/md/visitor/linting.rs +++ /dev/null @@ -1,40 +0,0 @@ -use crate::md::panhelper; -use crate::SubplotError; - -use pandoc_ast::{Block, MutVisitor}; - -#[derive(Default)] -pub struct LintingVisitor { - pub issues: Vec<SubplotError>, -} - -impl MutVisitor for LintingVisitor { - fn visit_vec_block(&mut self, vec_block: &mut Vec<Block>) { - for block in vec_block { - match block { - Block::CodeBlock(attr, _) => { - if panhelper::is_class(attr, "file") || panhelper::is_class(attr, "example") { - let newlines: Vec<_> = - panhelper::find_attr_kv(attr, "add-newline").collect(); - match newlines.len() { - 0 => {} - 1 => match newlines[0].to_ascii_lowercase().as_ref() { - "auto" | "yes" | "no" => {} - _ => self.issues.push(SubplotError::UnrecognisedAddNewline( - panhelper::get_filename(attr), - newlines[0].to_owned(), - )), - }, - _ => self.issues.push(SubplotError::RepeatedAddNewlineAttribute( - panhelper::get_filename(attr), - )), - } - } - } - _ => { - self.visit_block(block); - } - } - } - } -} diff --git a/src/md/visitor/mod.rs b/src/md/visitor/mod.rs deleted file mode 100644 index 1c095ac..0000000 --- a/src/md/visitor/mod.rs +++ /dev/null @@ -1,17 +0,0 @@ -mod block_class; -pub use block_class::BlockClassVisitor; - -mod embedded; - -mod image; -pub use image::ImageVisitor; - -mod linting; -pub use linting::LintingVisitor; - -mod structure; -pub use structure::Element; -pub use structure::StructureVisitor; - -mod typesetting; -pub use typesetting::TypesettingVisitor; diff --git a/src/md/visitor/structure.rs b/src/md/visitor/structure.rs deleted file mode 100644 index d8faef6..0000000 --- a/src/md/visitor/structure.rs +++ /dev/null @@ -1,100 +0,0 @@ -use crate::md::panhelper; - -use pandoc_ast::{Block, Inline, MutVisitor}; - -// A structure element in the document: a heading or a scenario snippet. -#[derive(Debug)] -pub enum Element { - // Headings consist of the text and the level of the heading. - Heading(String, i64), - - // Scenario snippets consist just of the unparsed text. - Snippet(String), -} - -impl Element { - pub fn heading(text: &str, level: i64) -> Element { - Element::Heading(text.to_string(), level) - } - - pub fn snippet(text: &str) -> Element { - Element::Snippet(text.to_string()) - } -} - -// A MutVisitor for extracting document structure. -pub struct StructureVisitor { - pub elements: Vec<Element>, -} - -impl StructureVisitor { - pub fn new() -> Self { - Self { elements: vec![] } - } -} - -impl MutVisitor for StructureVisitor { - fn visit_vec_block(&mut self, vec_block: &mut Vec<Block>) { - use panhelper::is_class; - for block in vec_block { - match block { - Block::Header(level, _attr, inlines) => { - let text = join(inlines); - let heading = Element::heading(&text, *level); - self.elements.push(heading); - } - Block::CodeBlock(attr, s) => { - if is_class(attr, "scenario") { - let snippet = Element::snippet(s); - self.elements.push(snippet); - } - } - _ => { - self.visit_block(block); - } - } - } - } -} - -fn join(vec: &[Inline]) -> String { - let mut buf = String::new(); - join_into_buffer(vec, &mut buf); - buf -} - -fn join_into_buffer(vec: &[Inline], buf: &mut String) { - for item in vec { - match item { - Inline::Str(s) => buf.push_str(s), - Inline::Emph(v) => join_into_buffer(v, buf), - Inline::Strong(v) => join_into_buffer(v, buf), - Inline::Strikeout(v) => join_into_buffer(v, buf), - Inline::Superscript(v) => join_into_buffer(v, buf), - Inline::Subscript(v) => join_into_buffer(v, buf), - Inline::SmallCaps(v) => join_into_buffer(v, buf), - Inline::Quoted(qt, v) => { - let q = match qt { - pandoc_ast::QuoteType::SingleQuote => "'", - pandoc_ast::QuoteType::DoubleQuote => "\"", - }; - buf.push_str(q); - join_into_buffer(v, buf); - buf.push_str(q); - } - Inline::Cite(_, v) => join_into_buffer(v, buf), - Inline::Code(_attr, s) => buf.push_str(s), - Inline::Space => buf.push(' '), - Inline::SoftBreak => buf.push(' '), - Inline::LineBreak => buf.push(' '), - Inline::Math(_, s) => buf.push_str(s), - Inline::RawInline(_, s) => buf.push_str(s), - Inline::Link(_, v, _) => join_into_buffer(v, buf), - Inline::Image(_, v, _) => join_into_buffer(v, buf), - Inline::Note(_) => buf.push_str(""), - Inline::Span(_attr, v) => join_into_buffer(v, buf), - #[cfg(feature = "pandoc_ast_08")] - Inline::Underline(v) => join_into_buffer(v, buf), - } - } -} diff --git a/src/md/visitor/typesetting.rs b/src/md/visitor/typesetting.rs deleted file mode 100644 index 2405c03..0000000 --- a/src/md/visitor/typesetting.rs +++ /dev/null @@ -1,85 +0,0 @@ -use crate::md::panhelper; -use crate::md::typeset; -use crate::{Bindings, Style, Warnings}; - -use pandoc_ast::{Block, Inline, MutVisitor}; - -/// Visitor for the pandoc AST. -/// -/// This includes rendering stuff which we find as we go -pub struct TypesettingVisitor<'a> { - style: Style, - bindings: &'a Bindings, - warnings: Warnings, -} - -impl<'a> TypesettingVisitor<'a> { - pub fn new(style: Style, bindings: &'a Bindings) -> Self { - TypesettingVisitor { - style, - bindings, - warnings: Warnings::default(), - } - } - - pub fn warnings(self) -> Warnings { - self.warnings - } -} - -// Visit interesting parts of the Pandoc abstract syntax tree. The -// document top level is a vector of blocks and we visit that and -// replace any fenced code block with the scenario tag with a typeset -// paragraph. Also, replace fenced code blocks with known diagram -// markup with the rendered SVG image. -impl<'a> MutVisitor for TypesettingVisitor<'a> { - fn visit_vec_block(&mut self, vec_block: &mut Vec<Block>) { - use panhelper::is_class; - for block in vec_block { - match block { - Block::CodeBlock(attr, s) => { - if is_class(attr, "scenario") { - *block = typeset::scenario_snippet(self.bindings, s, &mut self.warnings) - } else if is_class(attr, "file") || is_class(attr, "example") { - *block = typeset::file_block(attr, s) - } else if is_class(attr, "dot") { - *block = typeset::dot_to_block(s, &mut self.warnings) - } else if is_class(attr, "plantuml") { - *block = typeset::plantuml_to_block(s, &mut self.warnings) - } else if is_class(attr, "roadmap") { - *block = typeset::roadmap_to_block(s, &mut self.warnings) - } else if is_class(attr, "pikchr") { - let other_classes: Vec<_> = attr - .1 - .iter() - .map(String::as_str) - .filter(|s| *s != "pikchr") - .collect(); - let class = if other_classes.is_empty() { - None - } else { - Some(other_classes.join(" ")) - }; - let class = class.as_deref(); - *block = typeset::pikchr_to_block(s, class, &mut self.warnings) - } - } - _ => { - self.visit_block(block); - } - } - } - } - fn visit_vec_inline(&mut self, vec_inline: &mut Vec<Inline>) { - for inline in vec_inline { - match inline { - Inline::Link(attr, vec, target) if self.style.links_as_notes() => { - *inline = typeset::link_as_note(attr.clone(), vec.to_vec(), target.clone()); - } - _ => { - self.visit_inline(inline); - } - } - } - } -} diff --git a/src/metadata.rs b/src/metadata.rs index b840633..9f25621 100644 --- a/src/metadata.rs +++ b/src/metadata.rs @@ -4,7 +4,6 @@ use lazy_static::lazy_static; use log::trace; use regex::Regex; use serde::Deserialize; -use serde_yaml::Value; use std::collections::{BTreeMap, HashMap}; use std::fmt::Debug; use std::ops::Deref; @@ -55,7 +54,6 @@ pub struct YamlMetadata { documentclass: Option<String>, #[serde(default)] impls: BTreeMap<String, Vec<PathBuf>>, - pandoc: Option<HashMap<String, Value>>, } impl YamlMetadata { @@ -85,6 +83,11 @@ impl YamlMetadata { self.date.as_deref() } + /// Set date. + pub fn set_date(&mut self, date: String) { + self.date = Some(date); + } + /// Authors. pub fn authors(&self) -> Option<&[String]> { self.authors.as_deref() @@ -114,15 +117,6 @@ impl YamlMetadata { pub fn documentclass(&self) -> Option<&str> { self.documentclass.as_deref() } - - /// Pandoc metadata. - pub fn pandoc(&self) -> Option<&HashMap<String, Value>> { - if let Some(x) = &self.pandoc { - Some(x) - } else { - None - } - } } #[cfg(test)] @@ -262,6 +256,11 @@ impl Metadata { self.date.as_deref() } + /// Set date. + pub fn set_date(&mut self, date: String) { + self.date = Some(date); + } + /// Return base dir for all relative filenames. pub fn basedir(&self) -> &Path { &self.basedir diff --git a/src/policy.rs b/src/policy.rs index 972d081..e24bf8f 100644 --- a/src/policy.rs +++ b/src/policy.rs @@ -1,8 +1,5 @@ use std::path::{Component, Path, PathBuf}; -use log::trace; -use pandoc::{InputFormat, InputKind, OutputFormat, OutputKind, Pandoc, PandocOption}; - /// Get the base directory given the name of the markdown file. /// /// All relative filename, such as bindings files, are resolved @@ -17,23 +14,3 @@ pub fn get_basedir_from(filename: &Path) -> PathBuf { Some(x) => x.to_path_buf(), } } - -/// Add 'citeproc' to a Pandoc instance. -/// -/// This attempts to determine if `--citeproc` or `--filter pandoc-citeproc` -/// is needed, and then does that specific thing. -pub fn add_citeproc(pandoc: &mut Pandoc) { - let mut guesser = Pandoc::new(); - guesser.set_input(InputKind::Pipe("".to_string())); - guesser.set_input_format(InputFormat::Markdown, vec![]); - guesser.set_output_format(OutputFormat::Markdown, vec![]); - guesser.set_output(OutputKind::Pipe); - guesser.add_option(PandocOption::Citeproc); - if guesser.execute().is_ok() { - trace!("Discovered --citeproc"); - pandoc.add_option(PandocOption::Citeproc); - } else { - trace!("Discovered --filter pandoc-citeproc"); - pandoc.add_option(PandocOption::Filter("pandoc-citeproc".into())); - } -} @@ -20,20 +20,20 @@ document. We define the various concepts relevant to Subplot as follows: -* **Acceptance criteria**: What the stakeholders require of the system +* **Acceptance criteria:** What the stakeholders require of the system for them to be happy with it and use it. -* **Stakeholder**: Someone with a keen interest in the success of a +* **Stakeholder:** Someone with a keen interest in the success of a system. They might be a paying client, someone who uses the system, or someone involved in developing the system. Depending on the system and project, some stakeholders may have a bigger say than others. -* **Acceptance test**: How stakeholders verify that the system +* **Acceptance test:** How stakeholders verify that the system fulfills the acceptance criteria, in an automated way. Some criteria may not be possible to verify automatically. -* **Scenario**: In Subplot, the acceptance criteria are written as +* **Scenario:** In Subplot, the acceptance criteria are written as freeform prose, with diagrams, etc. The scenarios, which are embedded blocks of Subplot scenario language, capture the mechanisms of verifying that criteria are met - the acceptance tests - showing @@ -103,13 +103,7 @@ testprog -> report; } ``` -[Pandoc]: https://pandoc.org/ - -Subplot uses the [Pandoc][] software for generating HTML -output documents. In fact, any output format supported by Pandoc can -be requested by the user. Depending on the output format, Pandoc may -use, for example, LaTeX. Subplot interprets parts of the Markdown -input file itself. +Subplot generated HTML itself. Subplot actually consists mainly of two separate programs: **subplot docgen** for generating output documents, and **subplot codegen** for @@ -307,56 +301,56 @@ tests for Subplot](#acceptance). Each requirement here is given a unique mnemonic id for easier reference in discussions. -**UnderstandableTests** +* **UnderstandableTests** -: Acceptance tests should be possible to express in a way that's - easily understood by all stakeholders, including those who are - not software developers. + Acceptance tests should be possible to express in a way that's + easily understood by all stakeholders, including those who are not + software developers. _Done_ but requires the Subplot document to be written with care. -**EasyToWriteDocs** +* **EasyToWriteDocs** -: The markup language for writing documentation should be easy to + The markup language for writing documentation should be easy to write. _Done_ by using Markdown. -**AidsComprehension** +* **AidsComprehension** -: The formatted human-readable documentation should use good layout + The formatted human-readable documentation should use good layout and typography to enhance comprehension. - _In progress_ — typesetting via Pandoc works, but may need - review and improvement. + _In progress_ — we currently only output HTML, but may add + PDF output back later. -**CodeSeparately** +* **CodeSeparately** -: The code to implement the acceptance criteria should not be + The code to implement the acceptance criteria should not be embedded in the documentation source, but be in separate files. This makes it easier to edit without specialised tooling. _Done_ by keeping scenario step implementations in a separate file. -**AnyProgammingLanguage** +* **AnyProgammingLanguage** -: The developers implementing the acceptance tests should be free to + The developers implementing the acceptance tests should be free to use a language they're familiar and comfortable with. Subplot should not require them to use a specific language. _Not done_ — only Python supported at the moment. -**FastTestExecution** +* **FastTestExecution** -: Executing the acceptance tests should be fast. + Executing the acceptance tests should be fast. _Not done_ — the generated Python test program is simplistic and linear. -**NoDeployment** +* **NoDeployment** -: The acceptance test tooling should assume the system under test is + The acceptance test tooling should assume the system under test is already deployed and available. Deploying is too big of a problem space to bring into the scope of acceptance testing, and there are already good tools for deployment. @@ -364,9 +358,9 @@ reference in discussions. _Done_ by virtue of letting those who implement the scenario steps worry about it. -**MachineParseableResults** +* **MachineParseableResults** -: The tests should produce a machine parseable result that can be + The tests should produce a machine parseable result that can be archived, post-processed, and analyzed in ways that are of interest to the project using Subplot. For example, to see trends in how long tests take, how often tests fail, to find regressions, @@ -379,16 +373,13 @@ reference in discussions. Subplot reads three input files, each in a different format: -* The document file, which uses the Markdown dialects understood by - Pandoc. +* The document file in [GitHub Flavored Markdown](https://github.github.com/gfm/). * The bindings file, in YAML. * The functions file, in Bash or Python. Subplot interprets marked parts of the input document -specially. It does this via the Pandoc abstract syntax tree, rather -than text manipulation, and thus anything that Pandoc understands is -understood by Subplot. We will not specify Pandoc's dialect of -Markdown here, only the parts Subplot pays attention to. +specially. These are fenced code blocks tagged with the `sceanrio`, +`file`, or `example` classes. ## Scenario language @@ -513,14 +504,9 @@ will deal with formatting that nicely for you. ## Document markup -[Pandoc]: https://pandoc.org/ - -Subplot uses [Pandoc][], the universal document converter, to parse -the Markdown file, and thus understands the variants of Markdown that -Pandoc supports. This includes traditional Markdown, CommonMark, and -GitHub-flavored Markdown. +Subplot parses Markdown input files using GitHub-flavored Markdown. -[fenced code blocks]: https://pandoc.org/MANUAL.html#fenced-code-blocks +[fenced code blocks]: https://github.github.com/gfm/#fenced-code-blocks Subplot extends Markdown by treating certain certain tags for [fenced code blocks][] specially. A scenario, for example, would look like @@ -553,11 +539,7 @@ This data is accessible to the test program as 'filename'. The `.file` attribute is necessary, as is the identifier, here `#filename`. The generated test program can access the data using the -identifier (without the #). The mechanism used is generic to Pandoc, -and can be used to affect the typesetting by adding more attributes. -For example, Pandoc can typeset the data in the code block using -syntax highlighting, if the language is specified: `.markdown`, -`.yaml`, or `.python`, for example. +identifier (without the #). Subplot also understands the `dot` and `roadmap` tags, and can use the Graphviz dot program, or the [roadmap][] Rust crate, to produce @@ -613,31 +595,23 @@ given file not-numbered-lines.txt ## Document metadata -Pandoc supports, and Subplot makes use of, a [YAML metadata block][] in a -Markdown document. This can and should be used to set the document -title, authors, date (version), and can be used to control some of the -typesetting. Crucially for Subplot, the bindings and functions files -are named in the metadata block, rather than Subplot deriving them -from the input file name. - -[YAML metadata block]: https://pandoc.org/MANUAL.html#extension-yaml_metadata_block +Document metadata is read from a YAML file. This can used to set the +document title, authors, date (version), and more. Crucially for +Subplot, the bindings and functions files are named in the metadata +block, rather than Subplot deriving them from the input file name. -As an example, the metadata block for the Subplot document might look -as follows. The `---` before and `...` after the block are mandatory: -they are how Pandoc recongizes the block. - -~~~{.yaml .numberLines} ---- +~~~{.file .yaml .numberLines} title: "Subplot" authors: - The Subplot project date: work in progress +markdowns: +- subplot.md bindings: - subplot.yaml impls: python: - subplot.py -... ~~~ There can be more than one bindings or functions file: use a YAML @@ -1193,6 +1167,7 @@ but foobar was done ``` ~~~ +<!-- disabled until Lars fixes typesetting of scenarios ### Keyword aliases in output We support **and** and **but** in input lines, and we always render @@ -1234,6 +1209,7 @@ then bar was done then foobar was done ``` ~~~ +--> ### Misuse of continuation keywords @@ -2174,9 +2150,7 @@ given precondition foo The document and code generators require a document title, because it's a common user error to not have one, and Subplot should help make -good documents. The Pandoc filter, however, mustn't require a document -title, because it's used for things like formatting websites using -ikiwiki, and ikiwiki has a different way of specifying page titles. +good documents. #### Document generator gives an error if input document lacks title @@ -2496,32 +2470,6 @@ and file mtime.html contains "Geoffrey Butler" and file mtime.html contains "2020-02-26 07:53" ~~~ -### Pandoc metadata - -~~~scenario -given file pandoc.subplot -given file pandoc.md -and an installed subplot -when I run subplot docgen pandoc.subplot -o pandoc.html -when I run cat pandoc.html -then file pandoc.html exists -and file pandoc.html contains "<title>The Fabulous Title</title>" -and file pandoc.html contains "Superlative Subtitle" -~~~ - -~~~{#pandoc.subplot .file .yaml .numberLines} -title: The Fabulous Title -markdowns: -- pandoc.md -pandoc: - subtitle: Superlative Subtitle -~~~ - -~~~{#pandoc.md .file .markdown .numberLines} -# Introduction -This is a test document. That's all. -~~~ - ### Missing bindings file If a bindings file is missing, the error message should name the @@ -2708,12 +2656,11 @@ bibliography: [foo.bib, bar.bib] Subplot allows data files to be embedded in the input document. This is handy for small test files and the like. -Handling of a newline character on the last line is tricky. Pandoc -doesn't include a newline on the last line. Sometimes one is -needed—but sometimes it's not wanted. A newline can be added by -having an empty line at the end, but that is subtle and easy to miss. -Subplot helps the situation by allowing a `add-newline=` class to be added -to the code blocks, with one of three allowed cases: +Handling of a newline character on the last line is tricky. The block +ends in a newline on the last line. Sometimes one is needed—but +sometimes it's not wanted. Subplot helps the situation by allowing a +`add-newline=` class to be added to the code blocks, with one of three +allowed cases: * no `add-newline` class—default handling: same as `add-newline=auto` * `add-newline=auto`—add a newline, if one isn't there @@ -3186,7 +3133,7 @@ when I run subplot docgen pikchr.subplot -o pikchr.html then file pikchr.html matches regex /src="data:image/svg\+xml;base64,/ ~~~ -The sample input file **pikchr.md**: +The sample input file **pikchr.md:** ~~~~~~~~{#pikchr.md .file .markdown .numberLines} --- @@ -3233,7 +3180,7 @@ when I run subplot docgen dot.subplot -o dot.html then file dot.html matches regex /src="data:image/svg\+xml;base64,/ ~~~ -The sample input file **dot.md**: +The sample input file **dot.md:** ~~~~~~~~{#dot.md .file .markdown .numberLines} This is an example Markdown file, which embeds a diagram using dot markup. @@ -3282,7 +3229,7 @@ when I run subplot docgen plantuml.subplot -o plantuml.html then file plantuml.html matches regex /src="data:image/svg\+xml;base64,/ ~~~ -The sample input file **plantuml.md**: +The sample input file **plantuml.md:** ~~~~~~~~{#plantuml.md .file .markdown .numberLines} This is an example Markdown file, which embeds a diagram using @@ -3370,7 +3317,7 @@ when I run subplot docgen roadmap.subplot -o roadmap.html then file roadmap.html matches regex /src="data:image/svg\+xml;base64,/ ~~~ -The sample input file **roadmap.md**: +The sample input file **roadmap.md:** ~~~~~~~~{#roadmap.md .file .markdown .numberLines} This is an example Markdown file, which embeds a roadmap. @@ -3430,7 +3377,7 @@ markdowns: When Subplot loads a document it will validate that the block classes match a known set. Subplot has a built-in set which it treats as special, -and it knows some pandoc-specific classes and a number of file type classes. +and it knows some custom classes and a number of file type classes. If the author of a document wishes to use additional class names then they can include a `classes` list in the document metadata which subplot will treat |