summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2021-09-14 14:14:38 +0300
committerLars Wirzenius <liw@liw.fi>2021-09-16 16:40:18 +0300
commit458525d21c9960d98fa932be06cf133cd910ce7b (patch)
tree08f3d913e388c5870231b757f64ed0fe157ba105
parent9198b69c561e582704dbd99000d59e474c406a67 (diff)
downloadsubplot-458525d21c9960d98fa932be06cf133cd910ce7b.tar.gz
feat! parse Markdown input with pull-cmark instead of Pandoc
This is a first step towards being able to use Subplot codegen from a project's build.rs, and with only pure-Rust build dependencies. Replace Pandoc for parsing Markdown input with pulldown-cmark. This is mostly a drop-in replacement, but not entirely. The YAML parsing is more strict now. Note that this is a breaking change. Some subplots that used to work, and still work with docgen, no longer work with the new parser. Major differences are: * Only specific fields are supported. All the Markdown files in the Subplot source tree work. If anything else is needed, and it likely is, the new parser needs to be extended. * The bindings, functions, classes, and bibliography fields MUST be lists of strings. A single string value will no longer work. Sponsored-by: pep.foundation
-rw-r--r--Cargo.lock31
-rw-r--r--Cargo.toml3
-rw-r--r--src/ast.rs455
-rw-r--r--src/bin/cli/mod.rs22
-rw-r--r--src/bin/subplot.rs2
-rw-r--r--src/doc.rs51
-rw-r--r--src/error.rs10
-rw-r--r--src/lib.rs3
-rw-r--r--src/visitor/datafiles.rs2
-rw-r--r--subplot.md36
10 files changed, 587 insertions, 28 deletions
diff --git a/Cargo.lock b/Cargo.lock
index b8d5b5e..b3e61d5 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -297,6 +297,15 @@ dependencies = [
]
[[package]]
+name = "getopts"
+version = "0.2.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5"
+dependencies = [
+ "unicode-width",
+]
+
+[[package]]
name = "getrandom"
version = "0.1.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -671,6 +680,18 @@ dependencies = [
]
[[package]]
+name = "pulldown-cmark"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ffade02495f22453cd593159ea2f59827aae7f53fa8323f756799b670881dcf8"
+dependencies = [
+ "bitflags",
+ "getopts",
+ "memchr",
+ "unicase",
+]
+
+[[package]]
name = "quote"
version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -1022,6 +1043,7 @@ dependencies = [
"pandoc_ast 0.7.3",
"pandoc_ast 0.8.0",
"pikchr",
+ "pulldown-cmark",
"regex",
"roadmap",
"serde",
@@ -1329,6 +1351,15 @@ dependencies = [
]
[[package]]
+name = "unicase"
+version = "2.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "50f37be617794602aabbeee0be4f259dc1778fabe05e2d67ee8f79326d5cb4f6"
+dependencies = [
+ "version_check",
+]
+
+[[package]]
name = "unicode-segmentation"
version = "1.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
diff --git a/Cargo.toml b/Cargo.toml
index 451c378..5753f63 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -46,6 +46,7 @@ git-testament = "0.2"
tracing = "0.1"
tracing-appender = "0.1"
tracing-subscriber = "0.2"
+pulldown-cmark = "0.8.0"
[dependencies.tera]
version = "1"
@@ -53,4 +54,4 @@ default-features = true
[build-dependencies]
walkdir = "2"
-anyhow = "1" \ No newline at end of file
+anyhow = "1"
diff --git a/src/ast.rs b/src/ast.rs
new file mode 100644
index 0000000..60d234d
--- /dev/null
+++ b/src/ast.rs
@@ -0,0 +1,455 @@
+use lazy_static::lazy_static;
+use pandoc_ast::{Attr, Block, Inline, Map, MetaValue, Pandoc};
+use pulldown_cmark::{CodeBlockKind, Event, Options, Parser, Tag};
+use regex::Regex;
+use serde::Deserialize;
+use std::path::{Path, PathBuf};
+use tracing::{event, span, Level};
+
+lazy_static! {
+ // Pattern that recognises a YAML block at the beginning of a file.
+ static ref LEADING_YAML_PATTERN: Regex = Regex::new(r"^(?:\S*\n)*-{3,}\n(?P<yaml>([^.]+.*?\n)*)\.{3,}\n(?P<text>(.*\n)*)$").unwrap();
+
+
+ // Pattern that recognises a YAML block at the beginning of a file.
+ static ref TRAILING_YAML_PATTERN: Regex = Regex::new(r"(?P<text>(.*\n)*)\n*-{3,}\n(?P<yaml>(.*?\n)*)\.{3,}\n(?:\S*\n)*$").unwrap();
+}
+
+/// An abstract syntax tree representation of a Markdown file.
+///
+/// This represents a Markdown file as an abstract syntax tree
+/// compatible with Pandoc's AST. The document YAML metadata MUST be
+/// at the top or bottom of the file, excluding leading or trailing
+/// empty lines.
+#[derive(Debug)]
+pub struct AbstractSyntaxTree {
+ blocks: Vec<Block>,
+ meta: Map<String, MetaValue>,
+}
+
+impl AbstractSyntaxTree {
+ // Create a new AST.
+ //
+ // Note that this is not public.
+ fn new(meta: Map<String, MetaValue>, blocks: Vec<Block>) -> Self {
+ Self { blocks, meta }
+ }
+
+ /// Return a Pandoc-compatible AST.
+ pub fn to_pandoc(&self) -> Pandoc {
+ Pandoc {
+ meta: self.meta.clone(),
+ blocks: self.blocks.clone(),
+ pandoc_api_version: vec![1, 20],
+ }
+ }
+}
+
+impl std::str::FromStr for AbstractSyntaxTree {
+ type Err = Error;
+
+ /// Create an abstract syntax tree from a string.
+ fn from_str(markdown: &str) -> Result<Self, Self::Err> {
+ let span = span!(Level::TRACE, "parse markdown");
+ let _ = span.enter();
+ event!(Level::TRACE, "Parsing markdown");
+ let ast = if let Some((yaml, markdown)) = get_yaml(&LEADING_YAML_PATTERN, markdown) {
+ event!(Level::TRACE, ?yaml, "Found leading YAML");
+ let meta = Metadata::new(yaml)?.to_map();
+ let blocks = parse_blocks(markdown);
+ AbstractSyntaxTree::new(meta, blocks)
+ } else if let Some((yaml, _markdown)) = get_yaml(&TRAILING_YAML_PATTERN, markdown) {
+ event!(Level::TRACE, ?yaml, "Found trailing YAML");
+ let meta = Metadata::new(yaml)?.to_map();
+ let blocks = parse_blocks(markdown);
+ AbstractSyntaxTree::new(meta, blocks)
+ } else {
+ event!(Level::TRACE, "No YAML to be found");
+ let blocks = parse_blocks(markdown);
+ AbstractSyntaxTree::new(Map::new(), blocks)
+ };
+ event!(Level::TRACE, "Parsing markdown: OK");
+ Ok(ast)
+ }
+}
+
+// Extract a YAML metadata block using a given regex.
+fn get_yaml<'a>(pat: &Regex, markdown: &'a str) -> Option<(&'a str, &'a str)> {
+ if let Some(c) = pat.captures(markdown) {
+ event!(Level::TRACE, "YAML regex matches");
+ let yaml = c.name("yaml");
+ let text = c.name("text");
+ if yaml.is_some() && text.is_some() {
+ event!(Level::TRACE, "YAML regex captures YAML and text");
+ let yaml = &markdown[yaml?.start()..yaml?.end()];
+
+ let text = &markdown[text?.start()..text?.end()];
+ return Some((yaml, text));
+ } else {
+ event!(Level::TRACE, ?c, "YAML regex fails to capture YAML");
+ }
+ } else {
+ event!(Level::TRACE, ?pat, "YAML regex does not match");
+ }
+ None
+}
+
+// Parse Markdown into a sequence of Blocks.
+fn parse_blocks(markdown: &str) -> Vec<Block> {
+ event!(Level::TRACE, "Parsing blocks");
+
+ // Define the Markdown parser.
+ let mut options = Options::empty();
+ options.insert(Options::ENABLE_TABLES);
+ options.insert(Options::ENABLE_FOOTNOTES);
+ options.insert(Options::ENABLE_STRIKETHROUGH);
+ options.insert(Options::ENABLE_TASKLISTS);
+ options.insert(Options::ENABLE_SMART_PUNCTUATION);
+ let parser = Parser::new_ext(markdown, options);
+
+ // The sequence of blocks that represents the parsed document.
+ let mut blocks = vec![];
+
+ // The current set of inline elements we've collected. This gets
+ // emptied whenever we finish a block.
+ let mut inlines: Vec<Inline> = vec![];
+
+ for event in parser {
+ event!(Level::TRACE, ?event);
+ match event {
+ // We ignore these for now. They're not needed for codegen.
+ Event::Html(_)
+ | Event::FootnoteReference(_)
+ | Event::SoftBreak
+ | Event::HardBreak
+ | Event::Rule
+ | Event::TaskListMarker(_) => (),
+
+ // Inline text of various kinds.
+ Event::Text(text) => inlines.push(inline_text(&text)),
+ Event::Code(text) => inlines.push(inline_code(&text)),
+
+ // We only handle the end events.
+ Event::Start(_) => (),
+
+ // End of a block or inline.
+ Event::End(tag) => match tag {
+ // Collect inline elements for later inclusion in a block.
+ Tag::Emphasis | Tag::Strong | Tag::Strikethrough => {
+ inline_from_inlines(&tag, &mut inlines)
+ }
+ Tag::Paragraph => blocks.push(paragraph(&mut inlines)),
+ Tag::Heading(level) => blocks.push(heading(level as i64, &mut inlines)),
+ Tag::CodeBlock(kind) => blocks.push(code_block(&kind, &mut inlines)),
+
+ // We don't handle anything else yet.
+ _ => (),
+ },
+ }
+ }
+
+ // We MUST have emptied all inline elements.
+ // assert!(inlines.is_empty());
+
+ event!(Level::TRACE, "Parsing blocks: OK");
+ blocks
+}
+
+fn inline_text(text: &str) -> Inline {
+ Inline::Str(text.to_string())
+}
+
+fn inline_code(text: &str) -> Inline {
+ let attr = ("".to_string(), vec![], vec![]);
+ Inline::Code(attr, text.to_string())
+}
+
+fn paragraph(inlines: &mut Vec<Inline>) -> Block {
+ Block::Para(std::mem::take(inlines))
+}
+
+fn heading(level: i64, inlines: &mut Vec<Inline>) -> Block {
+ let attr = ("".to_string(), vec![], vec![]);
+ Block::Header(level, attr, std::mem::take(inlines))
+}
+
+fn code_block(kind: &CodeBlockKind, inlines: &mut Vec<Inline>) -> Block {
+ event!(Level::TRACE, ?kind, "code block");
+ let attr = if let CodeBlockKind::Fenced(lang) = kind {
+ event!(Level::TRACE, ?lang, "fenced code block");
+ parse_code_block_attrs(lang)
+ } else {
+ event!(Level::TRACE, "indented code block");
+ parse_code_block_attrs("")
+ };
+ event!(Level::TRACE, ?attr, "code block attrs");
+ let mut code = String::new();
+ for inline in inlines.drain(0..) {
+ let text = plain_text_inline(inline);
+ code.push_str(&text);
+ }
+ Block::CodeBlock(attr, code)
+}
+
+fn plain_text_inline(inline: Inline) -> String {
+ match inline {
+ Inline::Str(text) => text,
+ Inline::Code(_, text) => text,
+ Inline::Emph(inlines) => {
+ let mut text = String::new();
+ for inline in inlines {
+ text.push_str(&plain_text_inline(inline));
+ }
+ text
+ }
+ _ => panic!("not text in code block: {:?}", inline),
+ }
+}
+
+fn parse_code_block_attrs(attrs: &str) -> Attr {
+ event!(Level::TRACE, ?attrs, "parsing code block attrs");
+ let mut id = "".to_string();
+ let mut classes = vec![];
+ let mut keyvalues = vec![];
+ if attrs.starts_with('{') && attrs.ends_with('}') {
+ let attrs = &attrs[1..attrs.len() - 1];
+ for word in attrs.split_ascii_whitespace() {
+ if let Some(x) = word.strip_prefix('#') {
+ id = x.to_string();
+ } else if let Some(x) = word.strip_prefix('.') {
+ classes.push(x.to_string());
+ } else if let Some((k, v)) = word.split_once('=') {
+ keyvalues.push((k.to_string(), v.to_string()));
+ }
+ }
+ } else {
+ id = attrs.to_string();
+ }
+ (id, classes, keyvalues)
+}
+
+fn inline_from_inlines(tag: &Tag, inlines: &mut Vec<Inline>) {
+ let new_inlines = inlines.clone();
+ inlines.clear();
+
+ let inline = match tag {
+ Tag::Emphasis => Inline::Emph(new_inlines),
+ Tag::Strong => Inline::Strong(new_inlines),
+ Tag::Strikethrough => Inline::Strikeout(new_inlines),
+ _ => unreachable!(),
+ };
+
+ inlines.push(inline);
+}
+
+/// Errors from Markdown parsing.
+#[derive(Debug, thiserror::Error)]
+pub enum Error {
+ #[error(transparent)]
+ Regex(#[from] regex::Error),
+
+ #[error(transparent)]
+ Yaml(#[from] serde_yaml::Error),
+}
+
+// Document metadata.
+//
+// This is expressed in the Markdown input file as an embedded YAML
+// block.
+//
+// Note that this structure needs to be able to capture any metadata
+// block we can work with, in any input file. By being strict here we
+// make it easier to tell the user when a metadata block has, say, a
+// misspelled field.
+#[derive(Debug, Default, Deserialize)]
+#[serde(deny_unknown_fields)]
+struct Metadata {
+ title: String,
+ subtitle: Option<String>,
+ author: Option<String>,
+ date: Option<String>,
+ classes: Option<Vec<String>>,
+ template: Option<String>,
+ bibliography: Option<Vec<PathBuf>>,
+ bindings: Option<Vec<PathBuf>>,
+ functions: Option<Vec<PathBuf>>,
+}
+
+impl Metadata {
+ fn new(yaml_text: &str) -> Result<Self, Error> {
+ event!(Level::TRACE, "Parsing YAML");
+ let meta: Self = serde_yaml::from_str(yaml_text)?;
+ Ok(meta)
+ }
+
+ fn to_map(&self) -> Map<String, MetaValue> {
+ event!(Level::TRACE, "Creating metadata map from parsed YAML");
+ let mut map: Map<String, MetaValue> = Map::new();
+ map.insert(s("title"), meta_string(&self.title));
+ if let Some(v) = &self.subtitle {
+ map.insert(s("subtitle"), meta_string(v));
+ }
+ if let Some(v) = &self.author {
+ map.insert(s("author"), meta_string(v));
+ }
+ if let Some(v) = &self.date {
+ map.insert(s("date"), meta_string(v));
+ }
+ if let Some(v) = &self.classes {
+ map.insert(s("classes"), meta_strings(v));
+ }
+ if let Some(v) = &self.template {
+ map.insert(s("template"), meta_string(v));
+ }
+ if let Some(v) = &self.bibliography {
+ map.insert(s("bibliographies"), meta_path_bufs(v));
+ }
+ if let Some(v) = &self.bindings {
+ map.insert(s("bindings"), meta_path_bufs(v));
+ }
+ if let Some(v) = &self.functions {
+ map.insert(s("functions"), meta_path_bufs(v));
+ }
+ event!(
+ Level::TRACE,
+ ?self,
+ ?map,
+ "Created metadata map from parsed YAML"
+ );
+ map
+ }
+}
+
+fn s(s: &str) -> String {
+ s.to_string()
+}
+
+fn meta_string(s: &str) -> MetaValue {
+ MetaValue::MetaString(s.to_string())
+}
+
+fn meta_strings(v: &[String]) -> MetaValue {
+ MetaValue::MetaList(v.iter().map(|s| meta_string(s)).collect())
+}
+
+fn meta_path_buf(p: &Path) -> MetaValue {
+ meta_string(&p.display().to_string())
+}
+
+fn meta_path_bufs(v: &[PathBuf]) -> MetaValue {
+ MetaValue::MetaList(v.iter().map(|p| meta_path_buf(p)).collect())
+}
+
+#[cfg(test)]
+mod test {
+ use super::{parse_code_block_attrs, AbstractSyntaxTree, Metadata};
+ use super::{Block, Inline};
+ use std::path::PathBuf;
+ use std::str::FromStr;
+
+ #[test]
+ fn code_block_attrs() {
+ assert_eq!(
+ parse_code_block_attrs("foo"),
+ ("foo".to_string(), vec![], vec![])
+ );
+ assert_eq!(
+ parse_code_block_attrs("{#foo}"),
+ ("foo".to_string(), vec![], vec![])
+ );
+ assert_eq!(
+ parse_code_block_attrs("{#foo .file bar=yo}"),
+ (
+ "foo".to_string(),
+ vec!["file".to_string()],
+ vec![("bar".to_string(), "yo".to_string())]
+ )
+ );
+ }
+
+ #[test]
+ fn empty_input() {
+ let ast = AbstractSyntaxTree::from_str("").unwrap();
+ let doc = ast.to_pandoc();
+ assert!(doc.blocks.is_empty());
+ assert!(doc.meta.is_empty());
+ assert!(!doc.pandoc_api_version.is_empty());
+ }
+
+ #[test]
+ fn simple() {
+ let ast = AbstractSyntaxTree::from_str(
+ "\
+ # Introduction \n\
+ \n\
+ First paragraph.\n\
+ ",
+ )
+ .unwrap();
+ let doc = ast.to_pandoc();
+ assert!(doc.meta.is_empty());
+ assert!(!doc.pandoc_api_version.is_empty());
+
+ let attr = ("".to_string(), vec![], vec![]);
+ let h = Block::Header(1, attr, vec![Inline::Str("Introduction".to_string())]);
+ let para = Block::Para(vec![Inline::Str("First paragraph.".to_string())]);
+ assert_eq!(doc.blocks, &[h, para]);
+ }
+
+ #[test]
+ fn parses_leading_meta() {
+ let markdown = "\n\n---\ntitle: Foo Bar\n...\nfoobar\n";
+ let ast = AbstractSyntaxTree::from_str(markdown).unwrap();
+ let doc = ast.to_pandoc();
+ let keys: Vec<String> = doc.meta.keys().cloned().collect();
+ assert_eq!(keys, ["title"]);
+ }
+
+ #[test]
+ fn parses_trailing_meta() {
+ let markdown = "foobar\n---\ntitle: Foo Bar\n...\n\n\n";
+ let ast = AbstractSyntaxTree::from_str(markdown).unwrap();
+ let doc = ast.to_pandoc();
+ let keys: Vec<String> = doc.meta.keys().cloned().collect();
+ assert_eq!(keys, ["title"]);
+ }
+
+ #[test]
+ fn full_meta() {
+ let meta = Metadata::new(
+ "\
+title: Foo Bar
+date: today
+classes: [json, text]
+template: rust
+bibliography:
+- foo.bib
+- bar.bib
+bindings:
+- foo.yaml
+- bar.yaml
+functions:
+- foo.py
+- bar.py
+",
+ )
+ .unwrap();
+ assert_eq!(meta.title, "Foo Bar");
+ assert_eq!(meta.date.unwrap(), "today");
+ assert_eq!(meta.classes.unwrap(), &["json", "text"]);
+ assert_eq!(meta.template.unwrap(), "rust");
+ assert_eq!(
+ meta.bibliography.unwrap(),
+ &[path("foo.bib"), path("bar.bib")]
+ );
+ assert_eq!(
+ meta.bindings.unwrap(),
+ &[path("foo.yaml"), path("bar.yaml")]
+ );
+ assert_eq!(meta.functions.unwrap(), &[path("foo.py"), path("bar.py")]);
+ }
+
+ fn path(s: &str) -> PathBuf {
+ PathBuf::from(s)
+ }
+}
diff --git a/src/bin/cli/mod.rs b/src/bin/cli/mod.rs
index 30f5601..c53da0f 100644
--- a/src/bin/cli/mod.rs
+++ b/src/bin/cli/mod.rs
@@ -33,6 +33,28 @@ where
Ok(doc)
}
+#[instrument(level = "trace")]
+pub fn load_document_with_pullmark<P>(filename: P, style: Style) -> Result<Document>
+where
+ P: AsRef<Path> + Debug,
+{
+ let filename = filename.as_ref();
+ let base_path = subplot::get_basedir_from(filename);
+ event!(
+ Level::TRACE,
+ ?filename,
+ ?base_path,
+ "Loading document based at `{}` called `{}` with {:?} using pullmark-cmark",
+ base_path.display(),
+ filename.display(),
+ style
+ );
+ let doc = Document::from_file_with_pullmark(&base_path, filename, style)?;
+ event!(Level::TRACE, "Loaded doc from file OK");
+
+ Ok(doc)
+}
+
pub fn extract_file<'a>(doc: &'a Document, filename: &str) -> Result<&'a DataFile> {
for file in doc.files() {
if file.filename() == filename {
diff --git a/src/bin/subplot.rs b/src/bin/subplot.rs
index 48406f1..3e865fb 100644
--- a/src/bin/subplot.rs
+++ b/src/bin/subplot.rs
@@ -415,7 +415,7 @@ impl Codegen {
fn run(&self) -> Result<()> {
let span = span!(Level::TRACE, "codegen");
let _enter = span.enter();
- let mut doc = cli::load_document(&self.filename, Style::default())?;
+ let mut doc = cli::load_document_with_pullmark(&self.filename, Style::default())?;
doc.lint()?;
let template = doc
.meta()
diff --git a/src/doc.rs b/src/doc.rs
index 5294738..10ac20f 100644
--- a/src/doc.rs
+++ b/src/doc.rs
@@ -1,3 +1,4 @@
+use crate::ast;
use crate::visitor;
use crate::DataFile;
use crate::DataFiles;
@@ -16,6 +17,7 @@ use std::default::Default;
use std::fmt::Debug;
use std::ops::Deref;
use std::path::{Path, PathBuf};
+use std::str::FromStr;
use pandoc_ast::{MutVisitor, Pandoc};
@@ -98,19 +100,16 @@ impl<'a> Document {
}
}
- /// Construct a Document from a JSON AST
- #[instrument(level = "trace", skip(json))]
- pub fn from_json<P>(
+ #[instrument(level = "trace", skip(ast))]
+ fn from_ast<P>(
basedir: P,
markdowns: Vec<PathBuf>,
- json: &str,
+ mut ast: Pandoc,
style: Style,
) -> Result<Document>
where
P: AsRef<Path> + Debug,
{
- event!(Level::TRACE, "Parsing document...");
- let mut ast: Pandoc = serde_json::from_str(json)?;
let meta = Metadata::new(basedir, &ast)?;
let mut linter = LintingVisitor::default();
event!(Level::TRACE, "Walking AST for linting...");
@@ -125,6 +124,22 @@ impl<'a> Document {
Ok(doc)
}
+ /// Construct a Document from a JSON AST
+ #[instrument(level = "trace", skip(json))]
+ pub fn from_json<P>(
+ basedir: P,
+ markdowns: Vec<PathBuf>,
+ json: &str,
+ style: Style,
+ ) -> Result<Document>
+ where
+ P: AsRef<Path> + Debug,
+ {
+ event!(Level::TRACE, "Parsing document...");
+ let ast: Pandoc = serde_json::from_str(json)?;
+ Self::from_ast(basedir, markdowns, ast, style)
+ }
+
/// Construct a Document from a named file.
///
/// The file can be in any format Pandoc understands. This runs
@@ -157,6 +172,30 @@ impl<'a> Document {
Ok(doc)
}
+ /// Construct a Document from a named file, using the pullmark_cmark crate.
+ ///
+ /// The file can be in the CommonMark format, with some
+ /// extensions. This uses the pulldown-cmark crate to parse the
+ /// file into an AST.
+ #[instrument(level = "trace")]
+ pub fn from_file_with_pullmark(
+ basedir: &Path,
+ filename: &Path,
+ style: Style,
+ ) -> Result<Document> {
+ event!(
+ Level::TRACE,
+ ?filename,
+ "Parsing document with pullmark-cmark"
+ );
+ let filename = filename.to_path_buf();
+ let markdown = std::fs::read_to_string(&filename)?;
+ let ast = ast::AbstractSyntaxTree::from_str(&markdown)?;
+
+ event!(Level::TRACE, "Parsed document OK");
+ Self::from_ast(basedir, vec![filename], ast.to_pandoc(), style)
+ }
+
/// Return the AST of a Document, serialized as JSON.
///
/// This is useful in a Pandoc filter, so that the filter can give
diff --git a/src/error.rs b/src/error.rs
index 92e8edb..e69bffe 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -165,7 +165,7 @@ pub enum SubplotError {
///
/// Names of embedded files must be unique in the input document,
/// but Subplot found at least one with the same name as another.
- #[error("Duplicate embedded file name: {0}")]
+ #[error("Duplicate embedded file name: {0:?}")]
DuplicateEmbeddedFilename(String),
/// Embedded file has more than one `add-newline` attribute
@@ -287,6 +287,14 @@ pub enum SubplotError {
#[from]
source: serde_yaml::Error,
},
+
+ /// Abstract syntax tree error.
+ #[error(transparent)]
+ Ast(#[from] crate::ast::Error),
+
+ /// UTF8 conversion error.
+ #[error(transparent)]
+ Utf8Error(#[from] std::str::Utf8Error),
}
impl SubplotError {
diff --git a/src/lib.rs b/src/lib.rs
index e2b1431..e818c5a 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -69,3 +69,6 @@ pub use templatespec::TemplateSpec;
mod codegen;
pub use codegen::{generate_test_program, template_spec};
+
+mod ast;
+pub use ast::AbstractSyntaxTree;
diff --git a/src/visitor/datafiles.rs b/src/visitor/datafiles.rs
index 53ab7bb..09d9245 100644
--- a/src/visitor/datafiles.rs
+++ b/src/visitor/datafiles.rs
@@ -3,6 +3,7 @@ use crate::DataFile;
use crate::DataFiles;
use pandoc_ast::{Block, MutVisitor};
+use tracing::{event, Level};
impl MutVisitor for DataFiles {
fn visit_vec_block(&mut self, vec_block: &mut Vec<Block>) {
@@ -11,6 +12,7 @@ impl MutVisitor for DataFiles {
match block {
Block::CodeBlock(attr, contents) => {
if is_class(attr, "file") {
+ event!(Level::TRACE, ?attr, "visiting code block");
let add_newline = match panhelper::find_attr_kv(attr, "add-newline").next()
{
None | Some("auto") => !contents.ends_with('\n'),
diff --git a/subplot.md b/subplot.md
index 83fbda3..824e44a 100644
--- a/subplot.md
+++ b/subplot.md
@@ -1,3 +1,20 @@
+---
+title: "Subplot"
+author: The Subplot project
+template: python
+bindings:
+- subplot.yaml
+- lib/runcmd.yaml
+- lib/files.yaml
+functions:
+- subplot.py
+- lib/files.py
+- lib/runcmd.py
+classes:
+- json
+...
+
+
# Introduction
Subplot is software to help capture and communicate acceptance
@@ -3153,22 +3170,3 @@ This is a test file.
~~~{#expected.txt .file}
This is a test file.
~~~
-
-
-
-
----
-title: "Subplot"
-author: The Subplot project
-template: python
-bindings:
-- subplot.yaml
-- lib/runcmd.yaml
-- lib/files.yaml
-functions:
-- subplot.py
-- lib/files.py
-- lib/runcmd.py
-classes:
-- json
-...