refactor: drop abstract syntax tree built using pulldown_cmark

We'll want to use the new Markdown type instead. Sponsored-by: author
author: Lars Wirzenius <liw@liw.fi> 2023-01-22 11:32:12 +0200
committer: Lars Wirzenius <liw@liw.fi> 2023-01-22 11:35:58 +0200
commit: 27fac843cd0475026e96545b645c11f15a71d432 (patch)
tree: 073bad11a601df79c5ff1ddf24af1ad033094ad0
parent: b940075214d31b632f62a0ba723e63b857f5e5f1 (diff)
download: subplot-27fac843cd0475026e96545b645c11f15a71d432.tar.gz
2 files changed, 3 insertions, 221 deletions
diff --git a/src/ast.rs b/src/ast.rs
index 9e50af1..c48a1e7 100644
--- a/src/ast.rs
+++ b/src/ast.rs
@@ -1,7 +1,6 @@
 use lazy_static::lazy_static;
 use log::trace;
-use pandoc_ast::{Attr, Block, Inline, Map, MetaValue, Pandoc};
-use pulldown_cmark::{CodeBlockKind, Event, Options, Parser, Tag};
+use pandoc_ast::{Map, MetaValue};
 use regex::Regex;
 use serde::Deserialize;
 use serde_yaml::{Mapping, Value};
@@ -17,202 +16,6 @@ lazy_static! {
     static ref TRAILING_YAML_PATTERN: Regex = Regex::new(r"(?P<text>(.*\n)*)\n*(?P<yaml>-{3,}\n([^.].*\n)*\.{3,}\n)(?:\S*\n)*$").unwrap();
 }
 
-/// An abstract syntax tree representation of a Markdown file.
-///
-/// This represents a Markdown file as an abstract syntax tree
-/// compatible with Pandoc's AST. The document YAML metadata MUST be
-/// at the top or bottom of the file, excluding leading or trailing
-/// empty lines.
-#[derive(Debug)]
-pub struct AbstractSyntaxTree {
-    blocks: Vec<Block>,
-    meta: YamlMetadata,
-}
-
-impl AbstractSyntaxTree {
-    /// Create a new AST.
-    pub fn new(meta: YamlMetadata, markdown: &str) -> Self {
-        let blocks = parse_blocks(markdown);
-        Self { blocks, meta }
-    }
-
-    /// Return a Pandoc-compatible AST.
-    pub fn to_pandoc(&self) -> Pandoc {
-        Pandoc {
-            meta: self.meta.to_map(),
-            blocks: self.blocks.clone(),
-            pandoc_api_version: vec![1, 20],
-        }
-    }
-}
-
-// Parse Markdown into a sequence of Blocks.
-fn parse_blocks(markdown: &str) -> Vec<Block> {
-    trace!("Parsing blocks");
-
-    // Define the Markdown parser.
-    let mut options = Options::empty();
-    options.insert(Options::ENABLE_TABLES);
-    options.insert(Options::ENABLE_FOOTNOTES);
-    options.insert(Options::ENABLE_STRIKETHROUGH);
-    options.insert(Options::ENABLE_TASKLISTS);
-    options.insert(Options::ENABLE_SMART_PUNCTUATION);
-    let parser = Parser::new_ext(markdown, options);
-
-    // The sequence of blocks that represents the parsed document.
-    let mut blocks = vec![];
-
-    // The current set of inline elements we've collected. This gets
-    // emptied whenever we finish a block.
-    let mut inlines: Vec<Inline> = vec![];
-
-    for event in parser {
-        trace!("Parsing event: {:?}", event);
-        match event {
-            // We ignore these for now. They're not needed for codegen.
-            Event::Html(_)
-            | Event::FootnoteReference(_)
-            | Event::SoftBreak
-            | Event::HardBreak
-            | Event::Rule
-            | Event::TaskListMarker(_) => (),
-
-            // Inline text of various kinds.
-            Event::Text(text) => inlines.push(inline_text(&text)),
-            Event::Code(text) => inlines.push(inline_code(&text)),
-
-            // We only handle the end events.
-            Event::Start(_) => (),
-
-            // End of a block or inline.
-            Event::End(tag) => match tag {
-                // Collect inline elements for later inclusion in a block.
-                Tag::Emphasis | Tag::Strong | Tag::Strikethrough => {
-                    inline_from_inlines(&tag, &mut inlines)
-                }
-                Tag::Paragraph => blocks.push(paragraph(&mut inlines)),
-                Tag::Heading(level, _fragment, _classes) => {
-                    blocks.push(heading(level as i64, &mut inlines))
-                }
-                Tag::CodeBlock(kind) => blocks.push(code_block(&kind, &mut inlines)),
-                Tag::Image(_link, dest, title) => blocks.push(image_block(&dest, &title)),
-                // We don't handle anything else yet.
-                _ => (),
-            },
-        }
-    }
-
-    // We MUST have emptied all inline elements.
-    // assert!(inlines.is_empty());
-
-    trace!("Parsing blocks: OK");
-    blocks
-}
-
-fn inline_text(text: &str) -> Inline {
-    Inline::Str(text.to_string())
-}
-
-fn inline_code(text: &str) -> Inline {
-    let attr = ("".to_string(), vec![], vec![]);
-    Inline::Code(attr, text.to_string())
-}
-
-fn paragraph(inlines: &mut Vec<Inline>) -> Block {
-    Block::Para(std::mem::take(inlines))
-}
-
-fn heading(level: i64, inlines: &mut Vec<Inline>) -> Block {
-    let attr = ("".to_string(), vec![], vec![]);
-    Block::Header(level, attr, std::mem::take(inlines))
-}
-
-fn image_block(dest: &str, title: &str) -> Block {
-    let attr = ("".to_string(), vec![], vec![]);
-    Block::Para(vec![Inline::Image(
-        attr,
-        vec![],
-        (dest.to_string(), title.to_string()),
-    )])
-}
-
-fn code_block(kind: &CodeBlockKind, inlines: &mut Vec<Inline>) -> Block {
-    trace!("code block: {:?}", kind);
-    let attr = if let CodeBlockKind::Fenced(lang) = kind {
-        trace!("fenced code block, lang={:?}", lang);
-        parse_code_block_attrs(lang)
-    } else {
-        trace!("indented code block");
-        parse_code_block_attrs("")
-    };
-    trace!("code block attrs: {:?}", attr);
-    let mut code = String::new();
-    for inline in inlines.drain(0..) {
-        let text = plain_text_inline(inline);
-        code.push_str(&text);
-    }
-    // pulldown_cmark and pandoc differ in their codeblock handling,
-    // pulldown_cmark has an extra newline which we trim for now to be
-    // compatible with pandoc's parsing
-    if !code.is_empty() {
-        assert_eq!(code.pop(), Some('\n'));
-    }
-    Block::CodeBlock(attr, code)
-}
-
-fn plain_text_inline(inline: Inline) -> String {
-    match inline {
-        Inline::Str(text) => text,
-        Inline::Code(_, text) => text,
-        Inline::Emph(inlines) => {
-            let mut text = String::new();
-            for inline in inlines {
-                text.push_str(&plain_text_inline(inline));
-            }
-            text
-        }
-        _ => panic!("not text in code block: {:?}", inline),
-    }
-}
-
-fn parse_code_block_attrs(attrs: &str) -> Attr {
-    trace!("parsing code block attrs: {:?}", attrs);
-    let mut id = "".to_string();
-    let mut classes = vec![];
-    let mut keyvalues = vec![];
-    if attrs.starts_with('{') && attrs.ends_with('}') {
-        let attrs = &attrs[1..attrs.len() - 1];
-        for word in attrs.split_ascii_whitespace() {
-            if let Some(x) = word.strip_prefix('#') {
-                id = x.to_string();
-            } else if let Some(x) = word.strip_prefix('.') {
-                classes.push(x.to_string());
-            } else if let Some(i) = word.find('=') {
-                let k = &word[..i];
-                let v = &word[i + 1..];
-                keyvalues.push((k.to_string(), v.to_string()));
-            }
-        }
-    } else if !attrs.is_empty() {
-        classes.push(attrs.to_string());
-    }
-    (id, classes, keyvalues)
-}
-
-fn inline_from_inlines(tag: &Tag, inlines: &mut Vec<Inline>) {
-    let new_inlines = inlines.clone();
-    inlines.clear();
-
-    let inline = match tag {
-        Tag::Emphasis => Inline::Emph(new_inlines),
-        Tag::Strong => Inline::Strong(new_inlines),
-        Tag::Strikethrough => Inline::Strikeout(new_inlines),
-        _ => unreachable!(),
-    };
-
-    inlines.push(inline);
-}
-
 /// Errors from Markdown parsing.
 #[derive(Debug, thiserror::Error)]
 pub enum Error {
@@ -368,31 +171,10 @@ fn meta_path_bufs(v: &[PathBuf]) -> MetaValue {
 
 #[cfg(test)]
 mod test {
-    use super::{parse_code_block_attrs, YamlMetadata};
+    use super::YamlMetadata;
     use std::path::{Path, PathBuf};
 
     #[test]
-    fn code_block_attrs() {
-        assert_eq!(parse_code_block_attrs(""), ("".to_string(), vec![], vec![]));
-        assert_eq!(
-            parse_code_block_attrs("foo"),
-            ("".to_string(), vec!["foo".to_string()], vec![])
-        );
-        assert_eq!(
-            parse_code_block_attrs("{#foo}"),
-            ("foo".to_string(), vec![], vec![])
-        );
-        assert_eq!(
-            parse_code_block_attrs("{#foo .file bar=yo}"),
-            (
-                "foo".to_string(),
-                vec!["file".to_string()],
-                vec![("bar".to_string(), "yo".to_string())]
-            )
-        );
-    }
-
-    #[test]
     fn full_meta() {
         let meta = YamlMetadata::new(
             "\
diff --git a/src/lib.rs b/src/lib.rs
index 966118d..747b375 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -74,4 +74,4 @@ mod codegen;
 pub use codegen::generate_test_program;
 
 mod ast;
-pub use ast::{AbstractSyntaxTree, YamlMetadata};
+pub use ast::YamlMetadata;
author	Lars Wirzenius <liw@liw.fi>	2023-01-22 11:32:12 +0200
committer	Lars Wirzenius <liw@liw.fi>	2023-01-22 11:35:58 +0200
commit	27fac843cd0475026e96545b645c11f15a71d432 (patch)
tree	073bad11a601df79c5ff1ddf24af1ad033094ad0
parent	b940075214d31b632f62a0ba723e63b857f5e5f1 (diff)
download	subplot-27fac843cd0475026e96545b645c11f15a71d432.tar.gz