summaryrefslogtreecommitdiff
path: root/src/ast.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/ast.rs')
-rw-r--r--src/ast.rs354
1 files changed, 44 insertions, 310 deletions
diff --git a/src/ast.rs b/src/ast.rs
index e638464..ed163f0 100644
--- a/src/ast.rs
+++ b/src/ast.rs
@@ -1,10 +1,7 @@
use lazy_static::lazy_static;
-use log::trace;
-use pandoc_ast::{Attr, Block, Inline, Map, MetaValue, Pandoc};
-use pulldown_cmark::{CodeBlockKind, Event, Options, Parser, Tag};
use regex::Regex;
use serde::Deserialize;
-use serde_yaml::{Mapping, Value};
+use serde_yaml::Value;
use std::collections::{BTreeMap, HashMap};
use std::path::{Path, PathBuf};
@@ -17,202 +14,6 @@ lazy_static! {
static ref TRAILING_YAML_PATTERN: Regex = Regex::new(r"(?P<text>(.*\n)*)\n*(?P<yaml>-{3,}\n([^.].*\n)*\.{3,}\n)(?:\S*\n)*$").unwrap();
}
-/// An abstract syntax tree representation of a Markdown file.
-///
-/// This represents a Markdown file as an abstract syntax tree
-/// compatible with Pandoc's AST. The document YAML metadata MUST be
-/// at the top or bottom of the file, excluding leading or trailing
-/// empty lines.
-#[derive(Debug)]
-pub struct AbstractSyntaxTree {
- blocks: Vec<Block>,
- meta: YamlMetadata,
-}
-
-impl AbstractSyntaxTree {
- /// Create a new AST.
- pub fn new(meta: YamlMetadata, markdown: &str) -> Self {
- let blocks = parse_blocks(markdown);
- Self { blocks, meta }
- }
-
- /// Return a Pandoc-compatible AST.
- pub fn to_pandoc(&self) -> Pandoc {
- Pandoc {
- meta: self.meta.to_map(),
- blocks: self.blocks.clone(),
- pandoc_api_version: vec![1, 20],
- }
- }
-}
-
-// Parse Markdown into a sequence of Blocks.
-fn parse_blocks(markdown: &str) -> Vec<Block> {
- trace!("Parsing blocks");
-
- // Define the Markdown parser.
- let mut options = Options::empty();
- options.insert(Options::ENABLE_TABLES);
- options.insert(Options::ENABLE_FOOTNOTES);
- options.insert(Options::ENABLE_STRIKETHROUGH);
- options.insert(Options::ENABLE_TASKLISTS);
- options.insert(Options::ENABLE_SMART_PUNCTUATION);
- let parser = Parser::new_ext(markdown, options);
-
- // The sequence of blocks that represents the parsed document.
- let mut blocks = vec![];
-
- // The current set of inline elements we've collected. This gets
- // emptied whenever we finish a block.
- let mut inlines: Vec<Inline> = vec![];
-
- for event in parser {
- trace!("Parsing event: {:?}", event);
- match event {
- // We ignore these for now. They're not needed for codegen.
- Event::Html(_)
- | Event::FootnoteReference(_)
- | Event::SoftBreak
- | Event::HardBreak
- | Event::Rule
- | Event::TaskListMarker(_) => (),
-
- // Inline text of various kinds.
- Event::Text(text) => inlines.push(inline_text(&text)),
- Event::Code(text) => inlines.push(inline_code(&text)),
-
- // We only handle the end events.
- Event::Start(_) => (),
-
- // End of a block or inline.
- Event::End(tag) => match tag {
- // Collect inline elements for later inclusion in a block.
- Tag::Emphasis | Tag::Strong | Tag::Strikethrough => {
- inline_from_inlines(&tag, &mut inlines)
- }
- Tag::Paragraph => blocks.push(paragraph(&mut inlines)),
- Tag::Heading(level, _fragment, _classes) => {
- blocks.push(heading(level as i64, &mut inlines))
- }
- Tag::CodeBlock(kind) => blocks.push(code_block(&kind, &mut inlines)),
- Tag::Image(_link, dest, title) => blocks.push(image_block(&dest, &title)),
- // We don't handle anything else yet.
- _ => (),
- },
- }
- }
-
- // We MUST have emptied all inline elements.
- // assert!(inlines.is_empty());
-
- trace!("Parsing blocks: OK");
- blocks
-}
-
-fn inline_text(text: &str) -> Inline {
- Inline::Str(text.to_string())
-}
-
-fn inline_code(text: &str) -> Inline {
- let attr = ("".to_string(), vec![], vec![]);
- Inline::Code(attr, text.to_string())
-}
-
-fn paragraph(inlines: &mut Vec<Inline>) -> Block {
- Block::Para(std::mem::take(inlines))
-}
-
-fn heading(level: i64, inlines: &mut Vec<Inline>) -> Block {
- let attr = ("".to_string(), vec![], vec![]);
- Block::Header(level, attr, std::mem::take(inlines))
-}
-
-fn image_block(dest: &str, title: &str) -> Block {
- let attr = ("".to_string(), vec![], vec![]);
- Block::Para(vec![Inline::Image(
- attr,
- vec![],
- (dest.to_string(), title.to_string()),
- )])
-}
-
-fn code_block(kind: &CodeBlockKind, inlines: &mut Vec<Inline>) -> Block {
- trace!("code block: {:?}", kind);
- let attr = if let CodeBlockKind::Fenced(lang) = kind {
- trace!("fenced code block, lang={:?}", lang);
- parse_code_block_attrs(lang)
- } else {
- trace!("indented code block");
- parse_code_block_attrs("")
- };
- trace!("code block attrs: {:?}", attr);
- let mut code = String::new();
- for inline in inlines.drain(0..) {
- let text = plain_text_inline(inline);
- code.push_str(&text);
- }
- // pulldown_cmark and pandoc differ in their codeblock handling,
- // pulldown_cmark has an extra newline which we trim for now to be
- // compatible with pandoc's parsing
- if !code.is_empty() {
- assert_eq!(code.pop(), Some('\n'));
- }
- Block::CodeBlock(attr, code)
-}
-
-fn plain_text_inline(inline: Inline) -> String {
- match inline {
- Inline::Str(text) => text,
- Inline::Code(_, text) => text,
- Inline::Emph(inlines) => {
- let mut text = String::new();
- for inline in inlines {
- text.push_str(&plain_text_inline(inline));
- }
- text
- }
- _ => panic!("not text in code block: {:?}", inline),
- }
-}
-
-fn parse_code_block_attrs(attrs: &str) -> Attr {
- trace!("parsing code block attrs: {:?}", attrs);
- let mut id = "".to_string();
- let mut classes = vec![];
- let mut keyvalues = vec![];
- if attrs.starts_with('{') && attrs.ends_with('}') {
- let attrs = &attrs[1..attrs.len() - 1];
- for word in attrs.split_ascii_whitespace() {
- if let Some(x) = word.strip_prefix('#') {
- id = x.to_string();
- } else if let Some(x) = word.strip_prefix('.') {
- classes.push(x.to_string());
- } else if let Some(i) = word.find('=') {
- let k = &word[..i];
- let v = &word[i + 1..];
- keyvalues.push((k.to_string(), v.to_string()));
- }
- }
- } else if !attrs.is_empty() {
- classes.push(attrs.to_string());
- }
- (id, classes, keyvalues)
-}
-
-fn inline_from_inlines(tag: &Tag, inlines: &mut Vec<Inline>) {
- let new_inlines = inlines.clone();
- inlines.clear();
-
- let inline = match tag {
- Tag::Emphasis => Inline::Emph(new_inlines),
- Tag::Strong => Inline::Strong(new_inlines),
- Tag::Strikethrough => Inline::Strikeout(new_inlines),
- _ => unreachable!(),
- };
-
- inlines.push(inline);
-}
-
/// Errors from Markdown parsing.
#[derive(Debug, thiserror::Error)]
pub enum Error {
@@ -253,8 +54,8 @@ pub struct YamlMetadata {
}
impl YamlMetadata {
+ #[cfg(test)]
fn new(yaml_text: &str) -> Result<Self, Error> {
- trace!("Parsing YAML");
let meta: Self = serde_yaml::from_str(yaml_text)?;
Ok(meta)
}
@@ -264,134 +65,67 @@ impl YamlMetadata {
&self.markdowns[0]
}
- /// Convert into a pandoc_ast::Map.
- pub fn to_map(&self) -> Map<String, MetaValue> {
- trace!("Creating metadata map from parsed YAML");
- let mut map: Map<String, MetaValue> = Map::new();
-
- map.insert("title".into(), meta_string(&self.title));
-
- if let Some(v) = &self.subtitle {
- map.insert("subtitle".into(), meta_string(v));
- }
-
- if let Some(authors) = &self.authors {
- let authors: Vec<MetaValue> = authors
- .iter()
- .map(|s| MetaValue::MetaString(s.into()))
- .collect();
- map.insert("author".into(), MetaValue::MetaList(authors));
- }
-
- if let Some(v) = &self.date {
- map.insert("date".into(), meta_string(v));
- }
-
- if let Some(v) = &self.classes {
- map.insert("classes".into(), meta_strings(v));
- }
-
- if !self.impls.is_empty() {
- let impls = self
- .impls
- .iter()
- .map(|(k, v)| (k.to_owned(), Box::new(meta_path_bufs(v))))
- .collect();
- map.insert("impls".into(), MetaValue::MetaMap(impls));
- }
-
- if let Some(v) = &self.bibliography {
- map.insert("bibliography".into(), meta_path_bufs(v));
- }
-
- if let Some(v) = &self.bindings {
- map.insert("bindings".into(), meta_path_bufs(v));
- }
-
- if let Some(v) = &self.documentclass {
- map.insert("documentclass".into(), meta_string(v));
- }
-
- if let Some(pandoc) = &self.pandoc {
- for (key, value) in pandoc.iter() {
- map.insert(key.to_string(), value_to_pandoc(value));
- }
- }
+ /// Title.
+ pub fn title(&self) -> &str {
+ &self.title
+ }
- trace!("Created metadata map from parsed YAML");
- map
+ /// Subtitle.
+ pub fn subtitle(&self) -> Option<&str> {
+ self.subtitle.as_deref()
}
-}
-fn mapping_to_pandoc(mapping: &Mapping) -> MetaValue {
- let mut map = Map::new();
- for (key, value) in mapping.iter() {
- let key = if let MetaValue::MetaString(s) = value_to_pandoc(key) {
- s
- } else {
- panic!("key not a string: {:?}", key);
- };
- map.insert(key, Box::new(value_to_pandoc(value)));
+ /// Date.
+ pub fn date(&self) -> Option<&str> {
+ self.date.as_deref()
}
- MetaValue::MetaMap(map)
-}
+ /// Authors.
+ pub fn authors(&self) -> Option<&[String]> {
+ self.authors.as_deref()
+ }
-fn value_to_pandoc(data: &Value) -> MetaValue {
- match data {
- Value::Null => unreachable!("null not OK"),
- Value::Number(_) => unreachable!("number not OK"),
- Value::Sequence(_) => unreachable!("sequence not OK"),
+ /// Names of bindings files.
+ pub fn bindings_filenames(&self) -> Option<&[PathBuf]> {
+ self.bindings.as_deref()
+ }
- Value::Bool(b) => MetaValue::MetaBool(*b),
- Value::String(s) => MetaValue::MetaString(s.clone()),
- Value::Mapping(mapping) => mapping_to_pandoc(mapping),
+ /// Impls section.
+ pub fn impls(&self) -> &BTreeMap<String, Vec<PathBuf>> {
+ &self.impls
}
-}
-fn meta_string(s: &str) -> MetaValue {
- MetaValue::MetaString(s.to_string())
-}
+ /// Bibliographies.
+ pub fn bibliographies(&self) -> Option<&[PathBuf]> {
+ self.bibliography.as_deref()
+ }
-fn meta_strings(v: &[String]) -> MetaValue {
- MetaValue::MetaList(v.iter().map(|s| meta_string(s)).collect())
-}
+ /// Classes..
+ pub fn classes(&self) -> Option<&[String]> {
+ self.classes.as_deref()
+ }
-fn meta_path_buf(p: &Path) -> MetaValue {
- meta_string(&p.display().to_string())
-}
+ /// Documentclass.
+ pub fn documentclass(&self) -> Option<&str> {
+ self.documentclass.as_deref()
+ }
-fn meta_path_bufs(v: &[PathBuf]) -> MetaValue {
- MetaValue::MetaList(v.iter().map(|p| meta_path_buf(p)).collect())
+ /// Pandoc metadata.
+ pub fn pandoc(&self) -> Option<&HashMap<String, Value>> {
+ if let Some(x) = &self.pandoc {
+ Some(x)
+ } else {
+ None
+ }
+ }
}
#[cfg(test)]
mod test {
- use super::{parse_code_block_attrs, YamlMetadata};
+ use super::YamlMetadata;
use std::path::{Path, PathBuf};
#[test]
- fn code_block_attrs() {
- assert_eq!(parse_code_block_attrs(""), ("".to_string(), vec![], vec![]));
- assert_eq!(
- parse_code_block_attrs("foo"),
- ("".to_string(), vec!["foo".to_string()], vec![])
- );
- assert_eq!(
- parse_code_block_attrs("{#foo}"),
- ("foo".to_string(), vec![], vec![])
- );
- assert_eq!(
- parse_code_block_attrs("{#foo .file bar=yo}"),
- (
- "foo".to_string(),
- vec!["file".to_string()],
- vec![("bar".to_string(), "yo".to_string())]
- )
- );
- }
-
- #[test]
fn full_meta() {
let meta = YamlMetadata::new(
"\