summaryrefslogtreecommitdiff
path: root/src/ast.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/ast.rs')
-rw-r--r--src/ast.rs483
1 files changed, 0 insertions, 483 deletions
diff --git a/src/ast.rs b/src/ast.rs
deleted file mode 100644
index 14a57be..0000000
--- a/src/ast.rs
+++ /dev/null
@@ -1,483 +0,0 @@
-use lazy_static::lazy_static;
-use log::trace;
-use pandoc_ast::{Attr, Block, Inline, Map, MetaValue, Pandoc};
-use pulldown_cmark::{CodeBlockKind, Event, Options, Parser, Tag};
-use regex::Regex;
-use serde::Deserialize;
-use serde_yaml::{Mapping, Value};
-use std::collections::{BTreeMap, HashMap};
-use std::path::{Path, PathBuf};
-
-lazy_static! {
- // Pattern that recognises a YAML block at the beginning of a file.
- static ref LEADING_YAML_PATTERN: Regex = Regex::new(r"^(?:\S*\n)*(?P<yaml>-{3,}\n([^.].*\n)*\.{3,}\n)(?P<text>(.*\n)*)$").unwrap();
-
-
- // Pattern that recognises a YAML block at the end of a file.
- static ref TRAILING_YAML_PATTERN: Regex = Regex::new(r"(?P<text>(.*\n)*)\n*(?P<yaml>-{3,}\n([^.].*\n)*\.{3,}\n)(?:\S*\n)*$").unwrap();
-}
-
-/// An abstract syntax tree representation of a Markdown file.
-///
-/// This represents a Markdown file as an abstract syntax tree
-/// compatible with Pandoc's AST. The document YAML metadata MUST be
-/// at the top or bottom of the file, excluding leading or trailing
-/// empty lines.
-#[derive(Debug)]
-pub struct AbstractSyntaxTree {
- blocks: Vec<Block>,
- meta: YamlMetadata,
-}
-
-impl AbstractSyntaxTree {
- /// Create a new AST.
- pub fn new(meta: YamlMetadata, markdown: &str) -> Self {
- let blocks = parse_blocks(markdown);
- Self { blocks, meta }
- }
-
- /// Return a Pandoc-compatible AST.
- pub fn to_pandoc(&self) -> Pandoc {
- Pandoc {
- meta: self.meta.to_map(),
- blocks: self.blocks.clone(),
- pandoc_api_version: vec![1, 20],
- }
- }
-}
-
-/// Extract YAML metadata from a Markdown document.
-pub fn extract_metadata(markdown: &str) -> Result<(YamlMetadata, &str), Error> {
- trace!("Extracting YAML from Markdown");
- let (yaml, md) = if let Some((yaml, markdown)) = get_yaml(&LEADING_YAML_PATTERN, markdown) {
- trace!("Found leading YAML: {:?}", yaml);
- (yaml, markdown)
- } else if let Some((yaml, _markdown)) = get_yaml(&TRAILING_YAML_PATTERN, markdown) {
- trace!("Found trailing YAML: {:?}", yaml);
- (yaml, markdown)
- } else {
- trace!("No YAML to be found");
- return Err(Error::NoMetadata);
- };
- let meta = YamlMetadata::new(yaml)?;
- trace!("Parsing markdown: OK");
- Ok((meta, md))
-}
-
-// Extract a YAML metadata block using a given regex.
-fn get_yaml<'a>(pat: &Regex, markdown: &'a str) -> Option<(&'a str, &'a str)> {
- trace!("Markdown: {:?}", markdown);
- if let Some(c) = pat.captures(markdown) {
- trace!("YAML regex matches: {:?}", c);
- let yaml = c.name("yaml");
- let text = c.name("text");
- trace!("YAML metadata: {:?}", yaml);
- trace!("markdown: {:?}", text);
- if yaml.is_some() && text.is_some() {
- trace!("YAML regex captures YAML and text");
- let yaml = yaml?;
- let text = text?;
- let yaml = &markdown[yaml.start()..yaml.end()];
- let text = &markdown[text.start()..text.end()];
- assert!(yaml.starts_with("---"));
- assert!(yaml.ends_with("...\n"));
- return Some((yaml, text));
- } else {
- trace!("YAML regex fails to capture YAML");
- }
- } else {
- trace!("YAML regex does not match");
- }
- None
-}
-
-// Parse Markdown into a sequence of Blocks.
-fn parse_blocks(markdown: &str) -> Vec<Block> {
- trace!("Parsing blocks");
-
- // Define the Markdown parser.
- let mut options = Options::empty();
- options.insert(Options::ENABLE_TABLES);
- options.insert(Options::ENABLE_FOOTNOTES);
- options.insert(Options::ENABLE_STRIKETHROUGH);
- options.insert(Options::ENABLE_TASKLISTS);
- options.insert(Options::ENABLE_SMART_PUNCTUATION);
- let parser = Parser::new_ext(markdown, options);
-
- // The sequence of blocks that represents the parsed document.
- let mut blocks = vec![];
-
- // The current set of inline elements we've collected. This gets
- // emptied whenever we finish a block.
- let mut inlines: Vec<Inline> = vec![];
-
- for event in parser {
- trace!("Parsing event: {:?}", event);
- match event {
- // We ignore these for now. They're not needed for codegen.
- Event::Html(_)
- | Event::FootnoteReference(_)
- | Event::SoftBreak
- | Event::HardBreak
- | Event::Rule
- | Event::TaskListMarker(_) => (),
-
- // Inline text of various kinds.
- Event::Text(text) => inlines.push(inline_text(&text)),
- Event::Code(text) => inlines.push(inline_code(&text)),
-
- // We only handle the end events.
- Event::Start(_) => (),
-
- // End of a block or inline.
- Event::End(tag) => match tag {
- // Collect inline elements for later inclusion in a block.
- Tag::Emphasis | Tag::Strong | Tag::Strikethrough => {
- inline_from_inlines(&tag, &mut inlines)
- }
- Tag::Paragraph => blocks.push(paragraph(&mut inlines)),
- Tag::Heading(level, _fragment, _classes) => {
- blocks.push(heading(level as i64, &mut inlines))
- }
- Tag::CodeBlock(kind) => blocks.push(code_block(&kind, &mut inlines)),
- Tag::Image(_link, dest, title) => blocks.push(image_block(&dest, &title)),
- // We don't handle anything else yet.
- _ => (),
- },
- }
- }
-
- // We MUST have emptied all inline elements.
- // assert!(inlines.is_empty());
-
- trace!("Parsing blocks: OK");
- blocks
-}
-
-fn inline_text(text: &str) -> Inline {
- Inline::Str(text.to_string())
-}
-
-fn inline_code(text: &str) -> Inline {
- let attr = ("".to_string(), vec![], vec![]);
- Inline::Code(attr, text.to_string())
-}
-
-fn paragraph(inlines: &mut Vec<Inline>) -> Block {
- Block::Para(std::mem::take(inlines))
-}
-
-fn heading(level: i64, inlines: &mut Vec<Inline>) -> Block {
- let attr = ("".to_string(), vec![], vec![]);
- Block::Header(level, attr, std::mem::take(inlines))
-}
-
-fn image_block(dest: &str, title: &str) -> Block {
- let attr = ("".to_string(), vec![], vec![]);
- Block::Para(vec![Inline::Image(
- attr,
- vec![],
- (dest.to_string(), title.to_string()),
- )])
-}
-
-fn code_block(kind: &CodeBlockKind, inlines: &mut Vec<Inline>) -> Block {
- trace!("code block: {:?}", kind);
- let attr = if let CodeBlockKind::Fenced(lang) = kind {
- trace!("fenced code block, lang={:?}", lang);
- parse_code_block_attrs(lang)
- } else {
- trace!("indented code block");
- parse_code_block_attrs("")
- };
- trace!("code block attrs: {:?}", attr);
- let mut code = String::new();
- for inline in inlines.drain(0..) {
- let text = plain_text_inline(inline);
- code.push_str(&text);
- }
- // pulldown_cmark and pandoc differ in their codeblock handling,
- // pulldown_cmark has an extra newline which we trim for now to be
- // compatible with pandoc's parsing
- if !code.is_empty() {
- assert_eq!(code.pop(), Some('\n'));
- }
- Block::CodeBlock(attr, code)
-}
-
-fn plain_text_inline(inline: Inline) -> String {
- match inline {
- Inline::Str(text) => text,
- Inline::Code(_, text) => text,
- Inline::Emph(inlines) => {
- let mut text = String::new();
- for inline in inlines {
- text.push_str(&plain_text_inline(inline));
- }
- text
- }
- _ => panic!("not text in code block: {:?}", inline),
- }
-}
-
-fn parse_code_block_attrs(attrs: &str) -> Attr {
- trace!("parsing code block attrs: {:?}", attrs);
- let mut id = "".to_string();
- let mut classes = vec![];
- let mut keyvalues = vec![];
- if attrs.starts_with('{') && attrs.ends_with('}') {
- let attrs = &attrs[1..attrs.len() - 1];
- for word in attrs.split_ascii_whitespace() {
- if let Some(x) = word.strip_prefix('#') {
- id = x.to_string();
- } else if let Some(x) = word.strip_prefix('.') {
- classes.push(x.to_string());
- } else if let Some(i) = word.find('=') {
- let k = &word[..i];
- let v = &word[i + 1..];
- keyvalues.push((k.to_string(), v.to_string()));
- }
- }
- } else if !attrs.is_empty() {
- classes.push(attrs.to_string());
- }
- (id, classes, keyvalues)
-}
-
-fn inline_from_inlines(tag: &Tag, inlines: &mut Vec<Inline>) {
- let new_inlines = inlines.clone();
- inlines.clear();
-
- let inline = match tag {
- Tag::Emphasis => Inline::Emph(new_inlines),
- Tag::Strong => Inline::Strong(new_inlines),
- Tag::Strikethrough => Inline::Strikeout(new_inlines),
- _ => unreachable!(),
- };
-
- inlines.push(inline);
-}
-
-/// Errors from Markdown parsing.
-#[derive(Debug, thiserror::Error)]
-pub enum Error {
- #[error(transparent)]
- Regex(#[from] regex::Error),
-
- #[error("Markdown doesn't contain a YAML block for document metadata")]
- NoMetadata,
-
- #[error(transparent)]
- Yaml(#[from] serde_yaml::Error),
-}
-
-/// Document metadata.
-///
-/// This is expressed in the Markdown input file as an embedded YAML
-/// block.
-///
-/// Note that this structure needs to be able to capture any metadata
-/// block we can work with, in any input file. By being strict here we
-/// make it easier to tell the user when a metadata block has, say, a
-/// misspelled field.
-#[derive(Debug, Default, Clone, Deserialize)]
-#[serde(deny_unknown_fields)]
-pub struct YamlMetadata {
- title: String,
- subtitle: Option<String>,
- authors: Option<Vec<String>>,
- date: Option<String>,
- classes: Option<Vec<String>>,
- bibliography: Option<Vec<PathBuf>>,
- markdowns: Vec<PathBuf>,
- bindings: Option<Vec<PathBuf>>,
- documentclass: Option<String>,
- #[serde(default)]
- impls: BTreeMap<String, Vec<PathBuf>>,
- pandoc: Option<HashMap<String, Value>>,
-}
-
-impl YamlMetadata {
- fn new(yaml_text: &str) -> Result<Self, Error> {
- trace!("Parsing YAML");
- let meta: Self = serde_yaml::from_str(yaml_text)?;
- Ok(meta)
- }
-
- /// Name of file with the Markdown for the subplot document.
- pub fn markdown(&self) -> &Path {
- &self.markdowns[0]
- }
-
- /// Convert into a pandoc_ast::Map.
- pub fn to_map(&self) -> Map<String, MetaValue> {
- trace!("Creating metadata map from parsed YAML");
- let mut map: Map<String, MetaValue> = Map::new();
-
- map.insert("title".into(), meta_string(&self.title));
-
- if let Some(v) = &self.subtitle {
- map.insert("subtitle".into(), meta_string(v));
- }
-
- if let Some(authors) = &self.authors {
- let authors: Vec<MetaValue> = authors
- .iter()
- .map(|s| MetaValue::MetaString(s.into()))
- .collect();
- map.insert("author".into(), MetaValue::MetaList(authors));
- }
-
- if let Some(v) = &self.date {
- map.insert("date".into(), meta_string(v));
- }
-
- if let Some(v) = &self.classes {
- map.insert("classes".into(), meta_strings(v));
- }
-
- if !self.impls.is_empty() {
- let impls = self
- .impls
- .iter()
- .map(|(k, v)| (k.to_owned(), Box::new(meta_path_bufs(v))))
- .collect();
- map.insert("impls".into(), MetaValue::MetaMap(impls));
- }
-
- if let Some(v) = &self.bibliography {
- map.insert("bibliography".into(), meta_path_bufs(v));
- }
-
- if let Some(v) = &self.bindings {
- map.insert("bindings".into(), meta_path_bufs(v));
- }
-
- if let Some(v) = &self.documentclass {
- map.insert("documentclass".into(), meta_string(v));
- }
-
- if let Some(pandoc) = &self.pandoc {
- for (key, value) in pandoc.iter() {
- map.insert(key.to_string(), value_to_pandoc(value));
- }
- }
-
- trace!("Created metadata map from parsed YAML");
- map
- }
-}
-
-fn mapping_to_pandoc(mapping: &Mapping) -> MetaValue {
- let mut map = Map::new();
- for (key, value) in mapping.iter() {
- let key = if let MetaValue::MetaString(s) = value_to_pandoc(key) {
- s
- } else {
- panic!("key not a string: {:?}", key);
- };
- map.insert(key, Box::new(value_to_pandoc(value)));
- }
-
- MetaValue::MetaMap(map)
-}
-
-fn value_to_pandoc(data: &Value) -> MetaValue {
- match data {
- Value::Null => unreachable!("null not OK"),
- Value::Number(_) => unreachable!("number not OK"),
- Value::Sequence(_) => unreachable!("sequence not OK"),
-
- Value::Bool(b) => MetaValue::MetaBool(*b),
- Value::String(s) => MetaValue::MetaString(s.clone()),
- Value::Mapping(mapping) => mapping_to_pandoc(mapping),
- }
-}
-
-fn meta_string(s: &str) -> MetaValue {
- MetaValue::MetaString(s.to_string())
-}
-
-fn meta_strings(v: &[String]) -> MetaValue {
- MetaValue::MetaList(v.iter().map(|s| meta_string(s)).collect())
-}
-
-fn meta_path_buf(p: &Path) -> MetaValue {
- meta_string(&p.display().to_string())
-}
-
-fn meta_path_bufs(v: &[PathBuf]) -> MetaValue {
- MetaValue::MetaList(v.iter().map(|p| meta_path_buf(p)).collect())
-}
-
-#[cfg(test)]
-mod test {
- use super::{parse_code_block_attrs, YamlMetadata};
- use std::path::{Path, PathBuf};
-
- #[test]
- fn code_block_attrs() {
- assert_eq!(parse_code_block_attrs(""), ("".to_string(), vec![], vec![]));
- assert_eq!(
- parse_code_block_attrs("foo"),
- ("".to_string(), vec!["foo".to_string()], vec![])
- );
- assert_eq!(
- parse_code_block_attrs("{#foo}"),
- ("foo".to_string(), vec![], vec![])
- );
- assert_eq!(
- parse_code_block_attrs("{#foo .file bar=yo}"),
- (
- "foo".to_string(),
- vec!["file".to_string()],
- vec![("bar".to_string(), "yo".to_string())]
- )
- );
- }
-
- #[test]
- fn full_meta() {
- let meta = YamlMetadata::new(
- "\
-title: Foo Bar
-date: today
-classes: [json, text]
-impls:
- python:
- - foo.py
- - bar.py
-bibliography:
-- foo.bib
-- bar.bib
-markdowns:
-- test.md
-bindings:
-- foo.yaml
-- bar.yaml
-",
- )
- .unwrap();
- assert_eq!(meta.title, "Foo Bar");
- assert_eq!(meta.date.unwrap(), "today");
- assert_eq!(meta.classes.unwrap(), &["json", "text"]);
- assert_eq!(
- meta.bibliography.unwrap(),
- &[path("foo.bib"), path("bar.bib")]
- );
- assert_eq!(meta.markdowns, vec![Path::new("test.md")]);
- assert_eq!(
- meta.bindings.unwrap(),
- &[path("foo.yaml"), path("bar.yaml")]
- );
- assert!(!meta.impls.is_empty());
- for (k, v) in meta.impls.iter() {
- assert_eq!(k, "python");
- assert_eq!(v, &[path("foo.py"), path("bar.py")]);
- }
- }
-
- fn path(s: &str) -> PathBuf {
- PathBuf::from(s)
- }
-}