summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Silverstone <dsilvers+gitlab@digital-scurf.org>2021-09-16 20:05:40 +0000
committerDaniel Silverstone <dsilvers+gitlab@digital-scurf.org>2021-09-16 20:05:40 +0000
commit85e1d8fe7e0b2a6e020ec1d1319b3759c5ae0c2d (patch)
treeac8b166cf83c922aa3657046db72bdcc14a1493f
parent9198b69c561e582704dbd99000d59e474c406a67 (diff)
parent53dad377a388b55b4ec74613ae01143e7ba62562 (diff)
downloadsubplot-85e1d8fe7e0b2a6e020ec1d1319b3759c5ae0c2d.tar.gz
Merge branch 'cmark-parser-cleanly' into 'main'
feat! parse Markdown input with pull-cmark instead of Pandoc See merge request subplot/subplot!213
-rw-r--r--Cargo.lock31
-rw-r--r--Cargo.toml3
-rw-r--r--examples/echo/echo.md6
-rw-r--r--examples/muck/muck.md4
-rw-r--r--examples/website/website.md4
-rw-r--r--share/bash/template/template.sh.tera1
-rw-r--r--src/ast.rs455
-rw-r--r--src/bin/cli/mod.rs22
-rw-r--r--src/bin/subplot.rs2
-rw-r--r--src/doc.rs51
-rw-r--r--src/error.rs10
-rw-r--r--src/lib.rs3
-rw-r--r--src/visitor/datafiles.rs2
-rw-r--r--subplot.md36
-rw-r--r--subplotlib/files.md2
-rw-r--r--subplotlib/runcmd.md2
16 files changed, 597 insertions, 37 deletions
diff --git a/Cargo.lock b/Cargo.lock
index b8d5b5e..b3e61d5 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -297,6 +297,15 @@ dependencies = [
]
[[package]]
+name = "getopts"
+version = "0.2.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5"
+dependencies = [
+ "unicode-width",
+]
+
+[[package]]
name = "getrandom"
version = "0.1.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -671,6 +680,18 @@ dependencies = [
]
[[package]]
+name = "pulldown-cmark"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ffade02495f22453cd593159ea2f59827aae7f53fa8323f756799b670881dcf8"
+dependencies = [
+ "bitflags",
+ "getopts",
+ "memchr",
+ "unicase",
+]
+
+[[package]]
name = "quote"
version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -1022,6 +1043,7 @@ dependencies = [
"pandoc_ast 0.7.3",
"pandoc_ast 0.8.0",
"pikchr",
+ "pulldown-cmark",
"regex",
"roadmap",
"serde",
@@ -1329,6 +1351,15 @@ dependencies = [
]
[[package]]
+name = "unicase"
+version = "2.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "50f37be617794602aabbeee0be4f259dc1778fabe05e2d67ee8f79326d5cb4f6"
+dependencies = [
+ "version_check",
+]
+
+[[package]]
name = "unicode-segmentation"
version = "1.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
diff --git a/Cargo.toml b/Cargo.toml
index 451c378..5753f63 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -46,6 +46,7 @@ git-testament = "0.2"
tracing = "0.1"
tracing-appender = "0.1"
tracing-subscriber = "0.2"
+pulldown-cmark = "0.8.0"
[dependencies.tera]
version = "1"
@@ -53,4 +54,4 @@ default-features = true
[build-dependencies]
walkdir = "2"
-anyhow = "1" \ No newline at end of file
+anyhow = "1"
diff --git a/examples/echo/echo.md b/examples/echo/echo.md
index c335f53..8bf5405 100644
--- a/examples/echo/echo.md
+++ b/examples/echo/echo.md
@@ -2,9 +2,9 @@
title: "**echo**(1) acceptance tests"
author: The Subplot project
template: bash
-bindings: echo.yaml
-functions: echo.sh
-bibliography: echo.bib
+bindings: [echo.yaml]
+functions: [echo.sh]
+bibliography: [echo.bib]
...
Introduction
diff --git a/examples/muck/muck.md b/examples/muck/muck.md
index d5470a0..e02e522 100644
--- a/examples/muck/muck.md
+++ b/examples/muck/muck.md
@@ -2,8 +2,8 @@
title: Muck JSON storage server and API
author: Lars Wirzenius
date: work in progress
-bindings: muck.yaml
-functions: muck.py
+bindings: [muck.yaml]
+functions: [muck.py]
template: python
...
diff --git a/examples/website/website.md b/examples/website/website.md
index 89b94bb..5d4f61e 100644
--- a/examples/website/website.md
+++ b/examples/website/website.md
@@ -180,6 +180,6 @@ achieve this, you should make the following changes:
title: Subplot website tutorial
author: The Subplot project
template: python
-bindings: website.yaml
-functions: website.py
+bindings: [website.yaml]
+functions: [website.py]
...
diff --git a/share/bash/template/template.sh.tera b/share/bash/template/template.sh.tera
index 67134f3..da748be 100644
--- a/share/bash/template/template.sh.tera
+++ b/share/bash/template/template.sh.tera
@@ -191,6 +191,7 @@ done
if [ "$#" = 0 ]
then {% for scenario in scenarios %}
scenario_{{ loop.index }}{% endfor %}
+ : # Avoid empty then-branch if no scenarios
else
diff --git a/src/ast.rs b/src/ast.rs
new file mode 100644
index 0000000..60d234d
--- /dev/null
+++ b/src/ast.rs
@@ -0,0 +1,455 @@
+use lazy_static::lazy_static;
+use pandoc_ast::{Attr, Block, Inline, Map, MetaValue, Pandoc};
+use pulldown_cmark::{CodeBlockKind, Event, Options, Parser, Tag};
+use regex::Regex;
+use serde::Deserialize;
+use std::path::{Path, PathBuf};
+use tracing::{event, span, Level};
+
+lazy_static! {
+ // Pattern that recognises a YAML block at the beginning of a file.
+ static ref LEADING_YAML_PATTERN: Regex = Regex::new(r"^(?:\S*\n)*-{3,}\n(?P<yaml>([^.]+.*?\n)*)\.{3,}\n(?P<text>(.*\n)*)$").unwrap();
+
+
+ // Pattern that recognises a YAML block at the beginning of a file.
+ static ref TRAILING_YAML_PATTERN: Regex = Regex::new(r"(?P<text>(.*\n)*)\n*-{3,}\n(?P<yaml>(.*?\n)*)\.{3,}\n(?:\S*\n)*$").unwrap();
+}
+
+/// An abstract syntax tree representation of a Markdown file.
+///
+/// This represents a Markdown file as an abstract syntax tree
+/// compatible with Pandoc's AST. The document YAML metadata MUST be
+/// at the top or bottom of the file, excluding leading or trailing
+/// empty lines.
+#[derive(Debug)]
+pub struct AbstractSyntaxTree {
+ blocks: Vec<Block>,
+ meta: Map<String, MetaValue>,
+}
+
+impl AbstractSyntaxTree {
+ // Create a new AST.
+ //
+ // Note that this is not public.
+ fn new(meta: Map<String, MetaValue>, blocks: Vec<Block>) -> Self {
+ Self { blocks, meta }
+ }
+
+ /// Return a Pandoc-compatible AST.
+ pub fn to_pandoc(&self) -> Pandoc {
+ Pandoc {
+ meta: self.meta.clone(),
+ blocks: self.blocks.clone(),
+ pandoc_api_version: vec![1, 20],
+ }
+ }
+}
+
+impl std::str::FromStr for AbstractSyntaxTree {
+ type Err = Error;
+
+ /// Create an abstract syntax tree from a string.
+ fn from_str(markdown: &str) -> Result<Self, Self::Err> {
+ let span = span!(Level::TRACE, "parse markdown");
+ let _ = span.enter();
+ event!(Level::TRACE, "Parsing markdown");
+ let ast = if let Some((yaml, markdown)) = get_yaml(&LEADING_YAML_PATTERN, markdown) {
+ event!(Level::TRACE, ?yaml, "Found leading YAML");
+ let meta = Metadata::new(yaml)?.to_map();
+ let blocks = parse_blocks(markdown);
+ AbstractSyntaxTree::new(meta, blocks)
+ } else if let Some((yaml, _markdown)) = get_yaml(&TRAILING_YAML_PATTERN, markdown) {
+ event!(Level::TRACE, ?yaml, "Found trailing YAML");
+ let meta = Metadata::new(yaml)?.to_map();
+ let blocks = parse_blocks(markdown);
+ AbstractSyntaxTree::new(meta, blocks)
+ } else {
+ event!(Level::TRACE, "No YAML to be found");
+ let blocks = parse_blocks(markdown);
+ AbstractSyntaxTree::new(Map::new(), blocks)
+ };
+ event!(Level::TRACE, "Parsing markdown: OK");
+ Ok(ast)
+ }
+}
+
+// Extract a YAML metadata block using a given regex.
+fn get_yaml<'a>(pat: &Regex, markdown: &'a str) -> Option<(&'a str, &'a str)> {
+ if let Some(c) = pat.captures(markdown) {
+ event!(Level::TRACE, "YAML regex matches");
+ let yaml = c.name("yaml");
+ let text = c.name("text");
+ if yaml.is_some() && text.is_some() {
+ event!(Level::TRACE, "YAML regex captures YAML and text");
+ let yaml = &markdown[yaml?.start()..yaml?.end()];
+
+ let text = &markdown[text?.start()..text?.end()];
+ return Some((yaml, text));
+ } else {
+ event!(Level::TRACE, ?c, "YAML regex fails to capture YAML");
+ }
+ } else {
+ event!(Level::TRACE, ?pat, "YAML regex does not match");
+ }
+ None
+}
+
+// Parse Markdown into a sequence of Blocks.
+fn parse_blocks(markdown: &str) -> Vec<Block> {
+ event!(Level::TRACE, "Parsing blocks");
+
+ // Define the Markdown parser.
+ let mut options = Options::empty();
+ options.insert(Options::ENABLE_TABLES);
+ options.insert(Options::ENABLE_FOOTNOTES);
+ options.insert(Options::ENABLE_STRIKETHROUGH);
+ options.insert(Options::ENABLE_TASKLISTS);
+ options.insert(Options::ENABLE_SMART_PUNCTUATION);
+ let parser = Parser::new_ext(markdown, options);
+
+ // The sequence of blocks that represents the parsed document.
+ let mut blocks = vec![];
+
+ // The current set of inline elements we've collected. This gets
+ // emptied whenever we finish a block.
+ let mut inlines: Vec<Inline> = vec![];
+
+ for event in parser {
+ event!(Level::TRACE, ?event);
+ match event {
+ // We ignore these for now. They're not needed for codegen.
+ Event::Html(_)
+ | Event::FootnoteReference(_)
+ | Event::SoftBreak
+ | Event::HardBreak
+ | Event::Rule
+ | Event::TaskListMarker(_) => (),
+
+ // Inline text of various kinds.
+ Event::Text(text) => inlines.push(inline_text(&text)),
+ Event::Code(text) => inlines.push(inline_code(&text)),
+
+ // We only handle the end events.
+ Event::Start(_) => (),
+
+ // End of a block or inline.
+ Event::End(tag) => match tag {
+ // Collect inline elements for later inclusion in a block.
+ Tag::Emphasis | Tag::Strong | Tag::Strikethrough => {
+ inline_from_inlines(&tag, &mut inlines)
+ }
+ Tag::Paragraph => blocks.push(paragraph(&mut inlines)),
+ Tag::Heading(level) => blocks.push(heading(level as i64, &mut inlines)),
+ Tag::CodeBlock(kind) => blocks.push(code_block(&kind, &mut inlines)),
+
+ // We don't handle anything else yet.
+ _ => (),
+ },
+ }
+ }
+
+ // We MUST have emptied all inline elements.
+ // assert!(inlines.is_empty());
+
+ event!(Level::TRACE, "Parsing blocks: OK");
+ blocks
+}
+
+fn inline_text(text: &str) -> Inline {
+ Inline::Str(text.to_string())
+}
+
+fn inline_code(text: &str) -> Inline {
+ let attr = ("".to_string(), vec![], vec![]);
+ Inline::Code(attr, text.to_string())
+}
+
+fn paragraph(inlines: &mut Vec<Inline>) -> Block {
+ Block::Para(std::mem::take(inlines))
+}
+
+fn heading(level: i64, inlines: &mut Vec<Inline>) -> Block {
+ let attr = ("".to_string(), vec![], vec![]);
+ Block::Header(level, attr, std::mem::take(inlines))
+}
+
+fn code_block(kind: &CodeBlockKind, inlines: &mut Vec<Inline>) -> Block {
+ event!(Level::TRACE, ?kind, "code block");
+ let attr = if let CodeBlockKind::Fenced(lang) = kind {
+ event!(Level::TRACE, ?lang, "fenced code block");
+ parse_code_block_attrs(lang)
+ } else {
+ event!(Level::TRACE, "indented code block");
+ parse_code_block_attrs("")
+ };
+ event!(Level::TRACE, ?attr, "code block attrs");
+ let mut code = String::new();
+ for inline in inlines.drain(0..) {
+ let text = plain_text_inline(inline);
+ code.push_str(&text);
+ }
+ Block::CodeBlock(attr, code)
+}
+
+fn plain_text_inline(inline: Inline) -> String {
+ match inline {
+ Inline::Str(text) => text,
+ Inline::Code(_, text) => text,
+ Inline::Emph(inlines) => {
+ let mut text = String::new();
+ for inline in inlines {
+ text.push_str(&plain_text_inline(inline));
+ }
+ text
+ }
+ _ => panic!("not text in code block: {:?}", inline),
+ }
+}
+
+fn parse_code_block_attrs(attrs: &str) -> Attr {
+ event!(Level::TRACE, ?attrs, "parsing code block attrs");
+ let mut id = "".to_string();
+ let mut classes = vec![];
+ let mut keyvalues = vec![];
+ if attrs.starts_with('{') && attrs.ends_with('}') {
+ let attrs = &attrs[1..attrs.len() - 1];
+ for word in attrs.split_ascii_whitespace() {
+ if let Some(x) = word.strip_prefix('#') {
+ id = x.to_string();
+ } else if let Some(x) = word.strip_prefix('.') {
+ classes.push(x.to_string());
+ } else if let Some((k, v)) = word.split_once('=') {
+ keyvalues.push((k.to_string(), v.to_string()));
+ }
+ }
+ } else {
+ id = attrs.to_string();
+ }
+ (id, classes, keyvalues)
+}
+
+fn inline_from_inlines(tag: &Tag, inlines: &mut Vec<Inline>) {
+ let new_inlines = inlines.clone();
+ inlines.clear();
+
+ let inline = match tag {
+ Tag::Emphasis => Inline::Emph(new_inlines),
+ Tag::Strong => Inline::Strong(new_inlines),
+ Tag::Strikethrough => Inline::Strikeout(new_inlines),
+ _ => unreachable!(),
+ };
+
+ inlines.push(inline);
+}
+
+/// Errors from Markdown parsing.
+#[derive(Debug, thiserror::Error)]
+pub enum Error {
+ #[error(transparent)]
+ Regex(#[from] regex::Error),
+
+ #[error(transparent)]
+ Yaml(#[from] serde_yaml::Error),
+}
+
+// Document metadata.
+//
+// This is expressed in the Markdown input file as an embedded YAML
+// block.
+//
+// Note that this structure needs to be able to capture any metadata
+// block we can work with, in any input file. By being strict here we
+// make it easier to tell the user when a metadata block has, say, a
+// misspelled field.
+#[derive(Debug, Default, Deserialize)]
+#[serde(deny_unknown_fields)]
+struct Metadata {
+ title: String,
+ subtitle: Option<String>,
+ author: Option<String>,
+ date: Option<String>,
+ classes: Option<Vec<String>>,
+ template: Option<String>,
+ bibliography: Option<Vec<PathBuf>>,
+ bindings: Option<Vec<PathBuf>>,
+ functions: Option<Vec<PathBuf>>,
+}
+
+impl Metadata {
+ fn new(yaml_text: &str) -> Result<Self, Error> {
+ event!(Level::TRACE, "Parsing YAML");
+ let meta: Self = serde_yaml::from_str(yaml_text)?;
+ Ok(meta)
+ }
+
+ fn to_map(&self) -> Map<String, MetaValue> {
+ event!(Level::TRACE, "Creating metadata map from parsed YAML");
+ let mut map: Map<String, MetaValue> = Map::new();
+ map.insert(s("title"), meta_string(&self.title));
+ if let Some(v) = &self.subtitle {
+ map.insert(s("subtitle"), meta_string(v));
+ }
+ if let Some(v) = &self.author {
+ map.insert(s("author"), meta_string(v));
+ }
+ if let Some(v) = &self.date {
+ map.insert(s("date"), meta_string(v));
+ }
+ if let Some(v) = &self.classes {
+ map.insert(s("classes"), meta_strings(v));
+ }
+ if let Some(v) = &self.template {
+ map.insert(s("template"), meta_string(v));
+ }
+ if let Some(v) = &self.bibliography {
+ map.insert(s("bibliographies"), meta_path_bufs(v));
+ }
+ if let Some(v) = &self.bindings {
+ map.insert(s("bindings"), meta_path_bufs(v));
+ }
+ if let Some(v) = &self.functions {
+ map.insert(s("functions"), meta_path_bufs(v));
+ }
+ event!(
+ Level::TRACE,
+ ?self,
+ ?map,
+ "Created metadata map from parsed YAML"
+ );
+ map
+ }
+}
+
+fn s(s: &str) -> String {
+ s.to_string()
+}
+
+fn meta_string(s: &str) -> MetaValue {
+ MetaValue::MetaString(s.to_string())
+}
+
+fn meta_strings(v: &[String]) -> MetaValue {
+ MetaValue::MetaList(v.iter().map(|s| meta_string(s)).collect())
+}
+
+fn meta_path_buf(p: &Path) -> MetaValue {
+ meta_string(&p.display().to_string())
+}
+
+fn meta_path_bufs(v: &[PathBuf]) -> MetaValue {
+ MetaValue::MetaList(v.iter().map(|p| meta_path_buf(p)).collect())
+}
+
+#[cfg(test)]
+mod test {
+ use super::{parse_code_block_attrs, AbstractSyntaxTree, Metadata};
+ use super::{Block, Inline};
+ use std::path::PathBuf;
+ use std::str::FromStr;
+
+ #[test]
+ fn code_block_attrs() {
+ assert_eq!(
+ parse_code_block_attrs("foo"),
+ ("foo".to_string(), vec![], vec![])
+ );
+ assert_eq!(
+ parse_code_block_attrs("{#foo}"),
+ ("foo".to_string(), vec![], vec![])
+ );
+ assert_eq!(
+ parse_code_block_attrs("{#foo .file bar=yo}"),
+ (
+ "foo".to_string(),
+ vec!["file".to_string()],
+ vec![("bar".to_string(), "yo".to_string())]
+ )
+ );
+ }
+
+ #[test]
+ fn empty_input() {
+ let ast = AbstractSyntaxTree::from_str("").unwrap();
+ let doc = ast.to_pandoc();
+ assert!(doc.blocks.is_empty());
+ assert!(doc.meta.is_empty());
+ assert!(!doc.pandoc_api_version.is_empty());
+ }
+
+ #[test]
+ fn simple() {
+ let ast = AbstractSyntaxTree::from_str(
+ "\
+ # Introduction \n\
+ \n\
+ First paragraph.\n\
+ ",
+ )
+ .unwrap();
+ let doc = ast.to_pandoc();
+ assert!(doc.meta.is_empty());
+ assert!(!doc.pandoc_api_version.is_empty());
+
+ let attr = ("".to_string(), vec![], vec![]);
+ let h = Block::Header(1, attr, vec![Inline::Str("Introduction".to_string())]);
+ let para = Block::Para(vec![Inline::Str("First paragraph.".to_string())]);
+ assert_eq!(doc.blocks, &[h, para]);
+ }
+
+ #[test]
+ fn parses_leading_meta() {
+ let markdown = "\n\n---\ntitle: Foo Bar\n...\nfoobar\n";
+ let ast = AbstractSyntaxTree::from_str(markdown).unwrap();
+ let doc = ast.to_pandoc();
+ let keys: Vec<String> = doc.meta.keys().cloned().collect();
+ assert_eq!(keys, ["title"]);
+ }
+
+ #[test]
+ fn parses_trailing_meta() {
+ let markdown = "foobar\n---\ntitle: Foo Bar\n...\n\n\n";
+ let ast = AbstractSyntaxTree::from_str(markdown).unwrap();
+ let doc = ast.to_pandoc();
+ let keys: Vec<String> = doc.meta.keys().cloned().collect();
+ assert_eq!(keys, ["title"]);
+ }
+
+ #[test]
+ fn full_meta() {
+ let meta = Metadata::new(
+ "\
+title: Foo Bar
+date: today
+classes: [json, text]
+template: rust
+bibliography:
+- foo.bib
+- bar.bib
+bindings:
+- foo.yaml
+- bar.yaml
+functions:
+- foo.py
+- bar.py
+",
+ )
+ .unwrap();
+ assert_eq!(meta.title, "Foo Bar");
+ assert_eq!(meta.date.unwrap(), "today");
+ assert_eq!(meta.classes.unwrap(), &["json", "text"]);
+ assert_eq!(meta.template.unwrap(), "rust");
+ assert_eq!(
+ meta.bibliography.unwrap(),
+ &[path("foo.bib"), path("bar.bib")]
+ );
+ assert_eq!(
+ meta.bindings.unwrap(),
+ &[path("foo.yaml"), path("bar.yaml")]
+ );
+ assert_eq!(meta.functions.unwrap(), &[path("foo.py"), path("bar.py")]);
+ }
+
+ fn path(s: &str) -> PathBuf {
+ PathBuf::from(s)
+ }
+}
diff --git a/src/bin/cli/mod.rs b/src/bin/cli/mod.rs
index 30f5601..c53da0f 100644
--- a/src/bin/cli/mod.rs
+++ b/src/bin/cli/mod.rs
@@ -33,6 +33,28 @@ where
Ok(doc)
}
+#[instrument(level = "trace")]
+pub fn load_document_with_pullmark<P>(filename: P, style: Style) -> Result<Document>
+where
+ P: AsRef<Path> + Debug,
+{
+ let filename = filename.as_ref();
+ let base_path = subplot::get_basedir_from(filename);
+ event!(
+ Level::TRACE,
+ ?filename,
+ ?base_path,
+ "Loading document based at `{}` called `{}` with {:?} using pullmark-cmark",
+ base_path.display(),
+ filename.display(),
+ style
+ );
+ let doc = Document::from_file_with_pullmark(&base_path, filename, style)?;
+ event!(Level::TRACE, "Loaded doc from file OK");
+
+ Ok(doc)
+}
+
pub fn extract_file<'a>(doc: &'a Document, filename: &str) -> Result<&'a DataFile> {
for file in doc.files() {
if file.filename() == filename {
diff --git a/src/bin/subplot.rs b/src/bin/subplot.rs
index 48406f1..3e865fb 100644
--- a/src/bin/subplot.rs
+++ b/src/bin/subplot.rs
@@ -415,7 +415,7 @@ impl Codegen {
fn run(&self) -> Result<()> {
let span = span!(Level::TRACE, "codegen");
let _enter = span.enter();
- let mut doc = cli::load_document(&self.filename, Style::default())?;
+ let mut doc = cli::load_document_with_pullmark(&self.filename, Style::default())?;
doc.lint()?;
let template = doc
.meta()
diff --git a/src/doc.rs b/src/doc.rs
index 5294738..10ac20f 100644
--- a/src/doc.rs
+++ b/src/doc.rs
@@ -1,3 +1,4 @@
+use crate::ast;
use crate::visitor;
use crate::DataFile;
use crate::DataFiles;
@@ -16,6 +17,7 @@ use std::default::Default;
use std::fmt::Debug;
use std::ops::Deref;
use std::path::{Path, PathBuf};
+use std::str::FromStr;
use pandoc_ast::{MutVisitor, Pandoc};
@@ -98,19 +100,16 @@ impl<'a> Document {
}
}
- /// Construct a Document from a JSON AST
- #[instrument(level = "trace", skip(json))]
- pub fn from_json<P>(
+ #[instrument(level = "trace", skip(ast))]
+ fn from_ast<P>(
basedir: P,
markdowns: Vec<PathBuf>,
- json: &str,
+ mut ast: Pandoc,
style: Style,
) -> Result<Document>
where
P: AsRef<Path> + Debug,
{
- event!(Level::TRACE, "Parsing document...");
- let mut ast: Pandoc = serde_json::from_str(json)?;
let meta = Metadata::new(basedir, &ast)?;
let mut linter = LintingVisitor::default();
event!(Level::TRACE, "Walking AST for linting...");
@@ -125,6 +124,22 @@ impl<'a> Document {
Ok(doc)
}
+ /// Construct a Document from a JSON AST
+ #[instrument(level = "trace", skip(json))]
+ pub fn from_json<P>(
+ basedir: P,
+ markdowns: Vec<PathBuf>,
+ json: &str,
+ style: Style,
+ ) -> Result<Document>
+ where
+ P: AsRef<Path> + Debug,
+ {
+ event!(Level::TRACE, "Parsing document...");
+ let ast: Pandoc = serde_json::from_str(json)?;
+ Self::from_ast(basedir, markdowns, ast, style)
+ }
+
/// Construct a Document from a named file.
///
/// The file can be in any format Pandoc understands. This runs
@@ -157,6 +172,30 @@ impl<'a> Document {
Ok(doc)
}
+ /// Construct a Document from a named file, using the pullmark_cmark crate.
+ ///
+ /// The file can be in the CommonMark format, with some
+ /// extensions. This uses the pulldown-cmark crate to parse the
+ /// file into an AST.
+ #[instrument(level = "trace")]
+ pub fn from_file_with_pullmark(
+ basedir: &Path,
+ filename: &Path,
+ style: Style,
+ ) -> Result<Document> {
+ event!(
+ Level::TRACE,
+ ?filename,
+ "Parsing document with pullmark-cmark"
+ );
+ let filename = filename.to_path_buf();
+ let markdown = std::fs::read_to_string(&filename)?;
+ let ast = ast::AbstractSyntaxTree::from_str(&markdown)?;
+
+ event!(Level::TRACE, "Parsed document OK");
+ Self::from_ast(basedir, vec![filename], ast.to_pandoc(), style)
+ }
+
/// Return the AST of a Document, serialized as JSON.
///
/// This is useful in a Pandoc filter, so that the filter can give
diff --git a/src/error.rs b/src/error.rs
index 92e8edb..e69bffe 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -165,7 +165,7 @@ pub enum SubplotError {
///
/// Names of embedded files must be unique in the input document,
/// but Subplot found at least one with the same name as another.
- #[error("Duplicate embedded file name: {0}")]
+ #[error("Duplicate embedded file name: {0:?}")]
DuplicateEmbeddedFilename(String),
/// Embedded file has more than one `add-newline` attribute
@@ -287,6 +287,14 @@ pub enum SubplotError {
#[from]
source: serde_yaml::Error,
},
+
+ /// Abstract syntax tree error.
+ #[error(transparent)]
+ Ast(#[from] crate::ast::Error),
+
+ /// UTF8 conversion error.
+ #[error(transparent)]
+ Utf8Error(#[from] std::str::Utf8Error),
}
impl SubplotError {
diff --git a/src/lib.rs b/src/lib.rs
index e2b1431..e818c5a 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -69,3 +69,6 @@ pub use templatespec::TemplateSpec;
mod codegen;
pub use codegen::{generate_test_program, template_spec};
+
+mod ast;
+pub use ast::AbstractSyntaxTree;
diff --git a/src/visitor/datafiles.rs b/src/visitor/datafiles.rs
index 53ab7bb..09d9245 100644
--- a/src/visitor/datafiles.rs
+++ b/src/visitor/datafiles.rs
@@ -3,6 +3,7 @@ use crate::DataFile;
use crate::DataFiles;
use pandoc_ast::{Block, MutVisitor};
+use tracing::{event, Level};
impl MutVisitor for DataFiles {
fn visit_vec_block(&mut self, vec_block: &mut Vec<Block>) {
@@ -11,6 +12,7 @@ impl MutVisitor for DataFiles {
match block {
Block::CodeBlock(attr, contents) => {
if is_class(attr, "file") {
+ event!(Level::TRACE, ?attr, "visiting code block");
let add_newline = match panhelper::find_attr_kv(attr, "add-newline").next()
{
None | Some("auto") => !contents.ends_with('\n'),
diff --git a/subplot.md b/subplot.md
index 83fbda3..824e44a 100644
--- a/subplot.md
+++ b/subplot.md
@@ -1,3 +1,20 @@
+---
+title: "Subplot"
+author: The Subplot project
+template: python
+bindings:
+- subplot.yaml
+- lib/runcmd.yaml
+- lib/files.yaml
+functions:
+- subplot.py
+- lib/files.py
+- lib/runcmd.py
+classes:
+- json
+...
+
+
# Introduction
Subplot is software to help capture and communicate acceptance
@@ -3153,22 +3170,3 @@ This is a test file.
~~~{#expected.txt .file}
This is a test file.
~~~
-
-
-
-
----
-title: "Subplot"
-author: The Subplot project
-template: python
-bindings:
-- subplot.yaml
-- lib/runcmd.yaml
-- lib/files.yaml
-functions:
-- subplot.py
-- lib/files.py
-- lib/runcmd.py
-classes:
-- json
-...
diff --git a/subplotlib/files.md b/subplotlib/files.md
index 56bea41..a5b54fc 100644
--- a/subplotlib/files.md
+++ b/subplotlib/files.md
@@ -4,7 +4,7 @@ author: The Subplot project
template: rust
bindings:
- lib/files.yaml
----
+...
# Introduction
diff --git a/subplotlib/runcmd.md b/subplotlib/runcmd.md
index 83ba5cb..d6210fb 100644
--- a/subplotlib/runcmd.md
+++ b/subplotlib/runcmd.md
@@ -5,7 +5,7 @@ template: rust
bindings:
- lib/runcmd.yaml
- lib/files.yaml
----
+...
# Introduction