diff options
Diffstat (limited to 'src/ast.rs')
-rw-r--r-- | src/ast.rs | 354 |
1 files changed, 44 insertions, 310 deletions
@@ -1,10 +1,7 @@ use lazy_static::lazy_static; -use log::trace; -use pandoc_ast::{Attr, Block, Inline, Map, MetaValue, Pandoc}; -use pulldown_cmark::{CodeBlockKind, Event, Options, Parser, Tag}; use regex::Regex; use serde::Deserialize; -use serde_yaml::{Mapping, Value}; +use serde_yaml::Value; use std::collections::{BTreeMap, HashMap}; use std::path::{Path, PathBuf}; @@ -17,202 +14,6 @@ lazy_static! { static ref TRAILING_YAML_PATTERN: Regex = Regex::new(r"(?P<text>(.*\n)*)\n*(?P<yaml>-{3,}\n([^.].*\n)*\.{3,}\n)(?:\S*\n)*$").unwrap(); } -/// An abstract syntax tree representation of a Markdown file. -/// -/// This represents a Markdown file as an abstract syntax tree -/// compatible with Pandoc's AST. The document YAML metadata MUST be -/// at the top or bottom of the file, excluding leading or trailing -/// empty lines. -#[derive(Debug)] -pub struct AbstractSyntaxTree { - blocks: Vec<Block>, - meta: YamlMetadata, -} - -impl AbstractSyntaxTree { - /// Create a new AST. - pub fn new(meta: YamlMetadata, markdown: &str) -> Self { - let blocks = parse_blocks(markdown); - Self { blocks, meta } - } - - /// Return a Pandoc-compatible AST. - pub fn to_pandoc(&self) -> Pandoc { - Pandoc { - meta: self.meta.to_map(), - blocks: self.blocks.clone(), - pandoc_api_version: vec![1, 20], - } - } -} - -// Parse Markdown into a sequence of Blocks. -fn parse_blocks(markdown: &str) -> Vec<Block> { - trace!("Parsing blocks"); - - // Define the Markdown parser. - let mut options = Options::empty(); - options.insert(Options::ENABLE_TABLES); - options.insert(Options::ENABLE_FOOTNOTES); - options.insert(Options::ENABLE_STRIKETHROUGH); - options.insert(Options::ENABLE_TASKLISTS); - options.insert(Options::ENABLE_SMART_PUNCTUATION); - let parser = Parser::new_ext(markdown, options); - - // The sequence of blocks that represents the parsed document. - let mut blocks = vec![]; - - // The current set of inline elements we've collected. This gets - // emptied whenever we finish a block. - let mut inlines: Vec<Inline> = vec![]; - - for event in parser { - trace!("Parsing event: {:?}", event); - match event { - // We ignore these for now. They're not needed for codegen. - Event::Html(_) - | Event::FootnoteReference(_) - | Event::SoftBreak - | Event::HardBreak - | Event::Rule - | Event::TaskListMarker(_) => (), - - // Inline text of various kinds. - Event::Text(text) => inlines.push(inline_text(&text)), - Event::Code(text) => inlines.push(inline_code(&text)), - - // We only handle the end events. - Event::Start(_) => (), - - // End of a block or inline. - Event::End(tag) => match tag { - // Collect inline elements for later inclusion in a block. - Tag::Emphasis | Tag::Strong | Tag::Strikethrough => { - inline_from_inlines(&tag, &mut inlines) - } - Tag::Paragraph => blocks.push(paragraph(&mut inlines)), - Tag::Heading(level, _fragment, _classes) => { - blocks.push(heading(level as i64, &mut inlines)) - } - Tag::CodeBlock(kind) => blocks.push(code_block(&kind, &mut inlines)), - Tag::Image(_link, dest, title) => blocks.push(image_block(&dest, &title)), - // We don't handle anything else yet. - _ => (), - }, - } - } - - // We MUST have emptied all inline elements. - // assert!(inlines.is_empty()); - - trace!("Parsing blocks: OK"); - blocks -} - -fn inline_text(text: &str) -> Inline { - Inline::Str(text.to_string()) -} - -fn inline_code(text: &str) -> Inline { - let attr = ("".to_string(), vec![], vec![]); - Inline::Code(attr, text.to_string()) -} - -fn paragraph(inlines: &mut Vec<Inline>) -> Block { - Block::Para(std::mem::take(inlines)) -} - -fn heading(level: i64, inlines: &mut Vec<Inline>) -> Block { - let attr = ("".to_string(), vec![], vec![]); - Block::Header(level, attr, std::mem::take(inlines)) -} - -fn image_block(dest: &str, title: &str) -> Block { - let attr = ("".to_string(), vec![], vec![]); - Block::Para(vec![Inline::Image( - attr, - vec![], - (dest.to_string(), title.to_string()), - )]) -} - -fn code_block(kind: &CodeBlockKind, inlines: &mut Vec<Inline>) -> Block { - trace!("code block: {:?}", kind); - let attr = if let CodeBlockKind::Fenced(lang) = kind { - trace!("fenced code block, lang={:?}", lang); - parse_code_block_attrs(lang) - } else { - trace!("indented code block"); - parse_code_block_attrs("") - }; - trace!("code block attrs: {:?}", attr); - let mut code = String::new(); - for inline in inlines.drain(0..) { - let text = plain_text_inline(inline); - code.push_str(&text); - } - // pulldown_cmark and pandoc differ in their codeblock handling, - // pulldown_cmark has an extra newline which we trim for now to be - // compatible with pandoc's parsing - if !code.is_empty() { - assert_eq!(code.pop(), Some('\n')); - } - Block::CodeBlock(attr, code) -} - -fn plain_text_inline(inline: Inline) -> String { - match inline { - Inline::Str(text) => text, - Inline::Code(_, text) => text, - Inline::Emph(inlines) => { - let mut text = String::new(); - for inline in inlines { - text.push_str(&plain_text_inline(inline)); - } - text - } - _ => panic!("not text in code block: {:?}", inline), - } -} - -fn parse_code_block_attrs(attrs: &str) -> Attr { - trace!("parsing code block attrs: {:?}", attrs); - let mut id = "".to_string(); - let mut classes = vec![]; - let mut keyvalues = vec![]; - if attrs.starts_with('{') && attrs.ends_with('}') { - let attrs = &attrs[1..attrs.len() - 1]; - for word in attrs.split_ascii_whitespace() { - if let Some(x) = word.strip_prefix('#') { - id = x.to_string(); - } else if let Some(x) = word.strip_prefix('.') { - classes.push(x.to_string()); - } else if let Some(i) = word.find('=') { - let k = &word[..i]; - let v = &word[i + 1..]; - keyvalues.push((k.to_string(), v.to_string())); - } - } - } else if !attrs.is_empty() { - classes.push(attrs.to_string()); - } - (id, classes, keyvalues) -} - -fn inline_from_inlines(tag: &Tag, inlines: &mut Vec<Inline>) { - let new_inlines = inlines.clone(); - inlines.clear(); - - let inline = match tag { - Tag::Emphasis => Inline::Emph(new_inlines), - Tag::Strong => Inline::Strong(new_inlines), - Tag::Strikethrough => Inline::Strikeout(new_inlines), - _ => unreachable!(), - }; - - inlines.push(inline); -} - /// Errors from Markdown parsing. #[derive(Debug, thiserror::Error)] pub enum Error { @@ -253,8 +54,8 @@ pub struct YamlMetadata { } impl YamlMetadata { + #[cfg(test)] fn new(yaml_text: &str) -> Result<Self, Error> { - trace!("Parsing YAML"); let meta: Self = serde_yaml::from_str(yaml_text)?; Ok(meta) } @@ -264,134 +65,67 @@ impl YamlMetadata { &self.markdowns[0] } - /// Convert into a pandoc_ast::Map. - pub fn to_map(&self) -> Map<String, MetaValue> { - trace!("Creating metadata map from parsed YAML"); - let mut map: Map<String, MetaValue> = Map::new(); - - map.insert("title".into(), meta_string(&self.title)); - - if let Some(v) = &self.subtitle { - map.insert("subtitle".into(), meta_string(v)); - } - - if let Some(authors) = &self.authors { - let authors: Vec<MetaValue> = authors - .iter() - .map(|s| MetaValue::MetaString(s.into())) - .collect(); - map.insert("author".into(), MetaValue::MetaList(authors)); - } - - if let Some(v) = &self.date { - map.insert("date".into(), meta_string(v)); - } - - if let Some(v) = &self.classes { - map.insert("classes".into(), meta_strings(v)); - } - - if !self.impls.is_empty() { - let impls = self - .impls - .iter() - .map(|(k, v)| (k.to_owned(), Box::new(meta_path_bufs(v)))) - .collect(); - map.insert("impls".into(), MetaValue::MetaMap(impls)); - } - - if let Some(v) = &self.bibliography { - map.insert("bibliography".into(), meta_path_bufs(v)); - } - - if let Some(v) = &self.bindings { - map.insert("bindings".into(), meta_path_bufs(v)); - } - - if let Some(v) = &self.documentclass { - map.insert("documentclass".into(), meta_string(v)); - } - - if let Some(pandoc) = &self.pandoc { - for (key, value) in pandoc.iter() { - map.insert(key.to_string(), value_to_pandoc(value)); - } - } + /// Title. + pub fn title(&self) -> &str { + &self.title + } - trace!("Created metadata map from parsed YAML"); - map + /// Subtitle. + pub fn subtitle(&self) -> Option<&str> { + self.subtitle.as_deref() } -} -fn mapping_to_pandoc(mapping: &Mapping) -> MetaValue { - let mut map = Map::new(); - for (key, value) in mapping.iter() { - let key = if let MetaValue::MetaString(s) = value_to_pandoc(key) { - s - } else { - panic!("key not a string: {:?}", key); - }; - map.insert(key, Box::new(value_to_pandoc(value))); + /// Date. + pub fn date(&self) -> Option<&str> { + self.date.as_deref() } - MetaValue::MetaMap(map) -} + /// Authors. + pub fn authors(&self) -> Option<&[String]> { + self.authors.as_deref() + } -fn value_to_pandoc(data: &Value) -> MetaValue { - match data { - Value::Null => unreachable!("null not OK"), - Value::Number(_) => unreachable!("number not OK"), - Value::Sequence(_) => unreachable!("sequence not OK"), + /// Names of bindings files. + pub fn bindings_filenames(&self) -> Option<&[PathBuf]> { + self.bindings.as_deref() + } - Value::Bool(b) => MetaValue::MetaBool(*b), - Value::String(s) => MetaValue::MetaString(s.clone()), - Value::Mapping(mapping) => mapping_to_pandoc(mapping), + /// Impls section. + pub fn impls(&self) -> &BTreeMap<String, Vec<PathBuf>> { + &self.impls } -} -fn meta_string(s: &str) -> MetaValue { - MetaValue::MetaString(s.to_string()) -} + /// Bibliographies. + pub fn bibliographies(&self) -> Option<&[PathBuf]> { + self.bibliography.as_deref() + } -fn meta_strings(v: &[String]) -> MetaValue { - MetaValue::MetaList(v.iter().map(|s| meta_string(s)).collect()) -} + /// Classes.. + pub fn classes(&self) -> Option<&[String]> { + self.classes.as_deref() + } -fn meta_path_buf(p: &Path) -> MetaValue { - meta_string(&p.display().to_string()) -} + /// Documentclass. + pub fn documentclass(&self) -> Option<&str> { + self.documentclass.as_deref() + } -fn meta_path_bufs(v: &[PathBuf]) -> MetaValue { - MetaValue::MetaList(v.iter().map(|p| meta_path_buf(p)).collect()) + /// Pandoc metadata. + pub fn pandoc(&self) -> Option<&HashMap<String, Value>> { + if let Some(x) = &self.pandoc { + Some(x) + } else { + None + } + } } #[cfg(test)] mod test { - use super::{parse_code_block_attrs, YamlMetadata}; + use super::YamlMetadata; use std::path::{Path, PathBuf}; #[test] - fn code_block_attrs() { - assert_eq!(parse_code_block_attrs(""), ("".to_string(), vec![], vec![])); - assert_eq!( - parse_code_block_attrs("foo"), - ("".to_string(), vec!["foo".to_string()], vec![]) - ); - assert_eq!( - parse_code_block_attrs("{#foo}"), - ("foo".to_string(), vec![], vec![]) - ); - assert_eq!( - parse_code_block_attrs("{#foo .file bar=yo}"), - ( - "foo".to_string(), - vec!["file".to_string()], - vec![("bar".to_string(), "yo".to_string())] - ) - ); - } - - #[test] fn full_meta() { let meta = YamlMetadata::new( "\ |