diff options
Diffstat (limited to 'src/metadata.rs')
-rw-r--r-- | src/metadata.rs | 435 |
1 files changed, 238 insertions, 197 deletions
diff --git a/src/metadata.rs b/src/metadata.rs index 5f5e183..e382840 100644 --- a/src/metadata.rs +++ b/src/metadata.rs @@ -1,25 +1,176 @@ use crate::{Bindings, SubplotError, TemplateSpec}; -use std::collections::HashMap; +use lazy_static::lazy_static; +use log::trace; +use regex::Regex; +use serde::Deserialize; +use std::collections::{BTreeMap, HashMap}; use std::fmt::Debug; use std::ops::Deref; use std::path::{Path, PathBuf}; -use pandoc_ast::{Inline, Map, MetaValue, Pandoc}; +lazy_static! { + // Pattern that recognises a YAML block at the beginning of a file. + static ref LEADING_YAML_PATTERN: Regex = Regex::new(r"^(?:\S*\n)*(?P<yaml>-{3,}\n([^.].*\n)*\.{3,}\n)(?P<text>(.*\n)*)$").unwrap(); -use log::trace; + + // Pattern that recognises a YAML block at the end of a file. + static ref TRAILING_YAML_PATTERN: Regex = Regex::new(r"(?P<text>(.*\n)*)\n*(?P<yaml>-{3,}\n([^.].*\n)*\.{3,}\n)(?:\S*\n)*$").unwrap(); +} + +/// Errors from Markdown parsing. +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error(transparent)] + Regex(#[from] regex::Error), + + #[error(transparent)] + Yaml(#[from] serde_yaml::Error), +} + +/// Document metadata. +/// +/// This is expressed in the Markdown input file as an embedded YAML +/// block. +/// +/// Note that this structure needs to be able to capture any metadata +/// block we can work with, in any input file. By being strict here we +/// make it easier to tell the user when a metadata block has, say, a +/// misspelled field. +#[derive(Debug, Default, Clone, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct YamlMetadata { + title: String, + subtitle: Option<String>, + authors: Option<Vec<String>>, + date: Option<String>, + classes: Option<Vec<String>>, + markdowns: Vec<PathBuf>, + bindings: Option<Vec<PathBuf>>, + documentclass: Option<String>, + #[serde(default)] + impls: BTreeMap<String, Vec<PathBuf>>, + css_embed: Option<Vec<PathBuf>>, + css_urls: Option<Vec<String>>, +} + +impl YamlMetadata { + #[cfg(test)] + fn new(yaml_text: &str) -> Result<Self, Error> { + let meta: Self = serde_yaml::from_str(yaml_text)?; + Ok(meta) + } + + /// Names of files with the Markdown for the subplot document. + pub fn markdowns(&self) -> &[PathBuf] { + &self.markdowns + } + + /// Title. + pub fn title(&self) -> &str { + &self.title + } + + /// Subtitle. + pub fn subtitle(&self) -> Option<&str> { + self.subtitle.as_deref() + } + + /// Date. + pub fn date(&self) -> Option<&str> { + self.date.as_deref() + } + + /// Set date. + pub fn set_date(&mut self, date: String) { + self.date = Some(date); + } + + /// Authors. + pub fn authors(&self) -> Option<&[String]> { + self.authors.as_deref() + } + + /// Names of bindings files. + pub fn bindings_filenames(&self) -> Option<&[PathBuf]> { + self.bindings.as_deref() + } + + /// Impls section. + pub fn impls(&self) -> &BTreeMap<String, Vec<PathBuf>> { + &self.impls + } + + /// Classes.. + pub fn classes(&self) -> Option<&[String]> { + self.classes.as_deref() + } + + /// Documentclass. + pub fn documentclass(&self) -> Option<&str> { + self.documentclass.as_deref() + } +} + +#[cfg(test)] +mod test { + use super::YamlMetadata; + use std::path::{Path, PathBuf}; + + #[test] + fn full_meta() { + let meta = YamlMetadata::new( + "\ +title: Foo Bar +date: today +classes: [json, text] +impls: + python: + - foo.py + - bar.py +markdowns: +- test.md +bindings: +- foo.yaml +- bar.yaml +", + ) + .unwrap(); + assert_eq!(meta.title, "Foo Bar"); + assert_eq!(meta.date.unwrap(), "today"); + assert_eq!(meta.classes.unwrap(), &["json", "text"]); + assert_eq!(meta.markdowns, vec![Path::new("test.md")]); + assert_eq!( + meta.bindings.unwrap(), + &[path("foo.yaml"), path("bar.yaml")] + ); + assert!(!meta.impls.is_empty()); + for (k, v) in meta.impls.iter() { + assert_eq!(k, "python"); + assert_eq!(v, &[path("foo.py"), path("bar.py")]); + } + } + + fn path(s: &str) -> PathBuf { + PathBuf::from(s) + } +} /// Metadata of a document, as needed by Subplot. #[derive(Debug)] pub struct Metadata { + basedir: PathBuf, title: String, date: Option<String>, + authors: Option<Vec<String>>, + markdown_filenames: Vec<PathBuf>, bindings_filenames: Vec<PathBuf>, bindings: Bindings, impls: HashMap<String, DocumentImpl>, - bibliographies: Vec<PathBuf>, /// Extra class names which should be considered 'correct' for this document classes: Vec<String>, + css_embed: Vec<String>, + css_urls: Vec<String>, } #[derive(Debug)] @@ -29,57 +180,71 @@ pub struct DocumentImpl { } impl Metadata { - /// Construct a Metadata from a Document, if possible. - pub fn new<P>( + /// Create from YamlMetadata. + pub fn from_yaml_metadata<P>( basedir: P, - doc: &Pandoc, + yaml: &YamlMetadata, template: Option<&str>, - ) -> Result<Metadata, SubplotError> + ) -> Result<Self, SubplotError> where P: AsRef<Path> + Debug, { - let title = get_title(&doc.meta); - let date = get_date(&doc.meta); - let bindings_filenames = get_bindings_filenames(&doc.meta); - let bibliographies = get_bibliographies(basedir.as_ref(), &doc.meta); - let classes = get_classes(&doc.meta); - trace!("Loaded basic metadata"); + let mut bindings = Bindings::new(); + let bindings_filenames = if let Some(filenames) = yaml.bindings_filenames() { + get_bindings(filenames, &mut bindings, template)?; + filenames.iter().map(|p| p.to_path_buf()).collect() + } else { + vec![] + }; let mut impls = HashMap::new(); - if let Some(raw_impls) = doc.meta.get("impls") { - match raw_impls { - MetaValue::MetaMap(raw_impls) => { - for (impl_name, functions_filenames) in raw_impls.iter() { - let template_spec = load_template_spec(impl_name)?; - let filenames = pathbufs("", functions_filenames); - let docimpl = DocumentImpl::new(template_spec, filenames); - impls.insert(impl_name.to_string(), docimpl); - } - } - _ => { - trace!("Ignoring unknown raw implementation value"); - } - } + for (impl_name, functions_filenames) in yaml.impls().iter() { + let template_spec = load_template_spec(impl_name)?; + let filenames = pathbufs("", functions_filenames); + let docimpl = DocumentImpl::new(template_spec, filenames); + impls.insert(impl_name.to_string(), docimpl); } - let template = template.or_else(|| impls.keys().next().map(String::as_str)); - - let mut bindings = Bindings::new(); - - get_bindings(&bindings_filenames, &mut bindings, template)?; - - trace!("Loaded all metadata successfully"); + let classes = if let Some(v) = yaml.classes() { + v.iter().map(|s| s.to_string()).collect() + } else { + vec![] + }; + + let mut css_embed = vec![]; + if let Some(filenames) = &yaml.css_embed { + for filename in filenames.iter() { + let css = std::fs::read(filename) + .map_err(|e| SubplotError::ReadFile(filename.into(), e))?; + let css = String::from_utf8(css) + .map_err(|e| SubplotError::FileUtf8(filename.into(), e))?; + css_embed.push(css); + } + } - Ok(Metadata { - title, - date, + let css_urls = if let Some(urls) = &yaml.css_urls { + urls.clone() + } else { + vec![] + }; + + let meta = Self { + basedir: basedir.as_ref().to_path_buf(), + title: yaml.title().into(), + date: yaml.date().map(|s| s.into()), + authors: yaml.authors().map(|a| a.into()), + markdown_filenames: yaml.markdowns().into(), bindings_filenames, bindings, impls, - bibliographies, classes, - }) + css_embed, + css_urls, + }; + trace!("metadata: {:#?}", meta); + + Ok(meta) } /// Return title of document. @@ -92,6 +257,26 @@ impl Metadata { self.date.as_deref() } + /// Set date. + pub fn set_date(&mut self, date: String) { + self.date = Some(date); + } + + /// Authors. + pub fn authors(&self) -> Option<&[String]> { + self.authors.as_deref() + } + + /// Return base dir for all relative filenames. + pub fn basedir(&self) -> &Path { + &self.basedir + } + + /// Return filenames of the markdown files. + pub fn markdown_filenames(&self) -> &[PathBuf] { + &self.markdown_filenames + } + /// Return filename where bindings are specified. pub fn bindings_filenames(&self) -> Vec<&Path> { self.bindings_filenames.iter().map(|f| f.as_ref()).collect() @@ -112,15 +297,20 @@ impl Metadata { &self.bindings } - /// Return the bibliographies. - pub fn bibliographies(&self) -> Vec<&Path> { - self.bibliographies.iter().map(|x| x.as_path()).collect() - } - /// The classes which this document also claims are valid pub fn classes(&self) -> impl Iterator<Item = &str> { self.classes.iter().map(Deref::deref) } + + /// Contents of CSS files to embed into the HTML output. + pub fn css_embed(&self) -> impl Iterator<Item = &str> { + self.css_embed.iter().map(Deref::deref) + } + + /// List of CSS urls to add to the HTML output. + pub fn css_urls(&self) -> impl Iterator<Item = &str> { + self.css_urls.iter().map(Deref::deref) + } } impl DocumentImpl { @@ -137,24 +327,6 @@ impl DocumentImpl { } } -type Mapp = Map<String, MetaValue>; - -fn get_title(map: &Mapp) -> String { - if let Some(s) = get_string(map, "title") { - s - } else { - "".to_string() - } -} - -fn get_date(map: &Mapp) -> Option<String> { - get_string(map, "date") -} - -fn get_bindings_filenames(map: &Mapp) -> Vec<PathBuf> { - get_paths("", map, "bindings") -} - fn load_template_spec(template: &str) -> Result<TemplateSpec, SubplotError> { let mut spec_path = PathBuf::from(template); spec_path.push("template"); @@ -162,143 +334,12 @@ fn load_template_spec(template: &str) -> Result<TemplateSpec, SubplotError> { TemplateSpec::from_file(&spec_path) } -fn get_paths<P>(basedir: P, map: &Mapp, field: &str) -> Vec<PathBuf> +fn pathbufs<P>(basedir: P, v: &[PathBuf]) -> Vec<PathBuf> where P: AsRef<Path>, { - match map.get(field) { - None => vec![], - Some(v) => pathbufs(basedir, v), - } -} - -fn get_string(map: &Mapp, field: &str) -> Option<String> { - let v = match map.get(field) { - None => return None, - Some(s) => s, - }; - let v = match v { - pandoc_ast::MetaValue::MetaString(s) => s.to_string(), - pandoc_ast::MetaValue::MetaInlines(vec) => join(vec), - _ => panic!("don't know how to handle: {:?}", v), - }; - Some(v) -} - -fn get_bibliographies<P>(basedir: P, map: &Mapp) -> Vec<PathBuf> -where - P: AsRef<Path>, -{ - let v = match map.get("bibliography") { - None => return vec![], - Some(s) => s, - }; - pathbufs(basedir, v) -} - -fn pathbufs<P>(basedir: P, v: &MetaValue) -> Vec<PathBuf> -where - P: AsRef<Path>, -{ - let mut bufs = vec![]; - push_pathbufs(basedir, v, &mut bufs); - bufs -} - -fn get_classes(map: &Mapp) -> Vec<String> { - let mut ret = Vec::new(); - if let Some(classes) = map.get("classes") { - push_strings(classes, &mut ret); - } - ret -} - -fn push_strings(v: &MetaValue, strings: &mut Vec<String>) { - match v { - MetaValue::MetaString(s) => strings.push(s.to_string()), - MetaValue::MetaInlines(vec) => strings.push(join(vec)), - MetaValue::MetaList(values) => { - for value in values { - push_strings(value, strings); - } - } - _ => panic!("don't know how to handle: {:?}", v), - }; -} - -fn push_pathbufs<P>(basedir: P, v: &MetaValue, bufs: &mut Vec<PathBuf>) -where - P: AsRef<Path>, -{ - match v { - MetaValue::MetaString(s) => bufs.push(basedir.as_ref().join(Path::new(s))), - MetaValue::MetaInlines(vec) => bufs.push(basedir.as_ref().join(Path::new(&join(vec)))), - MetaValue::MetaList(values) => { - for value in values { - push_pathbufs(basedir.as_ref(), value, bufs); - } - } - _ => panic!("don't know how to handle: {:?}", v), - }; -} - -fn join(vec: &[Inline]) -> String { - let mut buf = String::new(); - join_into_buffer(vec, &mut buf); - buf -} - -fn join_into_buffer(vec: &[Inline], buf: &mut String) { - for item in vec { - match item { - pandoc_ast::Inline::Str(s) => buf.push_str(s), - pandoc_ast::Inline::Code(_, s) => buf.push_str(s), - pandoc_ast::Inline::Emph(v) => join_into_buffer(v, buf), - pandoc_ast::Inline::Strong(v) => join_into_buffer(v, buf), - pandoc_ast::Inline::Strikeout(v) => join_into_buffer(v, buf), - pandoc_ast::Inline::Superscript(v) => join_into_buffer(v, buf), - pandoc_ast::Inline::Subscript(v) => join_into_buffer(v, buf), - pandoc_ast::Inline::SmallCaps(v) => join_into_buffer(v, buf), - pandoc_ast::Inline::Space => buf.push(' '), - pandoc_ast::Inline::SoftBreak => buf.push(' '), - pandoc_ast::Inline::LineBreak => buf.push(' '), - pandoc_ast::Inline::Quoted(qtype, v) => { - let quote = match qtype { - pandoc_ast::QuoteType::SingleQuote => '\'', - pandoc_ast::QuoteType::DoubleQuote => '"', - }; - buf.push(quote); - join_into_buffer(v, buf); - buf.push(quote); - } - _ => panic!("unknown pandoc_ast::Inline component {:?}", item), - } - } -} - -#[cfg(test)] -mod test_join { - use super::join; - use pandoc_ast::{Inline, QuoteType}; - - #[test] - fn join_all_kinds() { - let v = vec![ - Inline::Str("a".to_string()), - Inline::Emph(vec![Inline::Str("b".to_string())]), - Inline::Strong(vec![Inline::Str("c".to_string())]), - Inline::Strikeout(vec![Inline::Str("d".to_string())]), - Inline::Superscript(vec![Inline::Str("e".to_string())]), - Inline::Subscript(vec![Inline::Str("f".to_string())]), - Inline::SmallCaps(vec![Inline::Str("g".to_string())]), - Inline::Space, - Inline::SoftBreak, - Inline::Quoted(QuoteType::SingleQuote, vec![Inline::Str("h".to_string())]), - Inline::LineBreak, - Inline::Quoted(QuoteType::DoubleQuote, vec![Inline::Str("i".to_string())]), - ]; - assert_eq!(join(&v), r#"abcdefg 'h' "i""#); - } + let basedir = basedir.as_ref(); + v.iter().map(|p| basedir.join(p)).collect() } fn get_bindings<P>( |