diff options
author | Lars Wirzenius <liw@liw.fi> | 2022-10-16 08:22:04 +0000 |
---|---|---|
committer | Lars Wirzenius <liw@liw.fi> | 2022-10-16 08:22:04 +0000 |
commit | f9df90b851ca273e4af44cc726d5814131e0d6a8 (patch) | |
tree | e123fe9ee4c41229b6e7a7164503b3242cf48336 | |
parent | 0583399d1a3fbe702591a673872a815dc47b8d96 (diff) | |
parent | 90103712b62feec5788acff3865b1a587d780c21 (diff) | |
download | riki-f9df90b851ca273e4af44cc726d5814131e0d6a8.tar.gz |
Merge branch 'refactor-pages-and-files-unification' into 'main'
refactor to simplify site processing, tidy up some things
See merge request larswirzenius/riki!57
-rw-r--r-- | .cargo/config.toml | 5 | ||||
-rw-r--r-- | build.rs | 1 | ||||
-rw-r--r-- | riki.md | 82 | ||||
-rw-r--r-- | src/directive/calendar.rs | 8 | ||||
-rw-r--r-- | src/directive/format.rs | 8 | ||||
-rw-r--r-- | src/directive/graph.rs | 8 | ||||
-rw-r--r-- | src/directive/map.rs | 8 | ||||
-rw-r--r-- | src/directive/meta.rs | 5 | ||||
-rw-r--r-- | src/directive/mod.rs | 44 | ||||
-rw-r--r-- | src/directive/sidebar.rs | 8 | ||||
-rw-r--r-- | src/lib.rs | 9 | ||||
-rw-r--r-- | src/name.rs | 33 | ||||
-rw-r--r-- | src/pagespec.rs | 12 | ||||
-rw-r--r-- | src/site.rs | 167 | ||||
-rw-r--r-- | src/srcdir.rs | 140 | ||||
-rw-r--r-- | src/time.rs | 12 | ||||
-rw-r--r-- | src/token.rs | 6 | ||||
-rw-r--r-- | src/wikitext.rs | 1 |
18 files changed, 377 insertions, 180 deletions
diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 0000000..6748679 --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,5 @@ +[target.'cfg(all())'] +rustflags = [ + "-Aclippy::just_underscores_and_digits", + "-Aclippy::needless_lifetimes", +] @@ -1,4 +1,5 @@ fn main() { + println!("cargo:rerun-if-changed=src/pagespec.lalrpop"); lalrpop::process_root().unwrap(); subplot_build::codegen("riki.subplot").expect("failed to generate code with Subplot"); } @@ -8,6 +8,88 @@ speed. This document describes the requirements and acceptance criteria for the software, and how to verify that riki meets them in an automated way. This is done using the [Subplot][] software. +# Software architecture + +`riki` converts files in a source tree into a files that form a static +website in an output, or target, tree. The files in the source tree +are either "pages" or "blobs". The files in the target tree are HTML +files or blobs. + +Source pages contain "wiki text", which +adds on top of Markdown syntax for *wiki links* and *directives*: + +* plain wiki link: `[[pagename]]` + - this corresponds to Markdown: `[pagename](pagename)` + - or HTML: `<a href="pagename">pagename</a>` +* wiki link with link text: ``[[link text|pagename]]` + - this corresponds to Markdown: `[link text](pagename)` + - or HTML: `<a href="pagename">link text</a>` +* directive: `[[!foo arg other="value for other" more="""value for + more"""]]` + - directive arguments may contain values + - values may be single or triple quoted: triple quoted may span + multiple lines + +Directives cause some processing to be done. That processing may take +as its input only values of arguments, or the page, where the +directive is used, or all other files in the site. + +Wiki text is converted into plain Markdown by replacing wiki links and +directives with Markdown text, before the whole page is parsed into +HTML, which gets written to the target directory. + +Blobs are copied to the target directory as-is, without any +processing. + +## Processing pipeline + +~~~dot +digraph "processing" { + source [shape=folder] + target [shape=folder] + blob [shape=note] + wikitext [shape=note] + html [shape=note] + + source -> blob + source -> wikitext + + wikitext -> markdown + wikitext -> wikilink + wikitext -> directive + wikilink -> markdown + directive -> markdown + + markdown -> html + html -> target + + blob -> target +} +~~~ + +When the site is processed from source to target, the processing +pipeline is roughly like this: + +* read in all files in the source tree +* parse each page of wiki text into snippets + - a snippet is plain markdown, a wiki link, or a directive +* prepare directives in each page + - this collects or produces data needed for processing the + directive, but doesn't produce output +* process directives in each page + - this produces markdown output +* process wiki links in each page +* combine all processed snippets into one markdown string for each + page and parse that into HTML +* write HTML files to target tree +* copy all blobs to target tree + +Note that when preparing or processing directives, directives on all +pages are prepared first, before any directives are processed. This +allows things like defining shortcuts on any page of the site: the +shortcut definitions are recognized and obeyed during the preparation +stage. + # Verification scenarios The approach used for verifying acceptance criteria is to run `riki` diff --git a/src/directive/calendar.rs b/src/directive/calendar.rs index 1c02e16..b24f39b 100644 --- a/src/directive/calendar.rs +++ b/src/directive/calendar.rs @@ -3,7 +3,7 @@ use crate::page::PageMeta; use crate::site::Site; use crate::wikitext::ParsedDirective; -#[derive(Debug, Eq, PartialEq)] +#[derive(Debug, Default, Eq, PartialEq)] pub struct Calendar {} impl Calendar { @@ -19,10 +19,6 @@ impl Calendar { ]; pub const ALLOW_ANY_UNNAMED: bool = true; - pub fn new() -> Self { - Self {} - } - pub fn process(&self, _site: &Site, _meta: &mut PageMeta) -> Result<String, SiteError> { Ok("FIXME:graph".into()) } @@ -30,6 +26,6 @@ impl Calendar { impl From<&ParsedDirective> for Calendar { fn from(_: &ParsedDirective) -> Self { - Calendar::new() + Calendar::default() } } diff --git a/src/directive/format.rs b/src/directive/format.rs index 6483310..a40c439 100644 --- a/src/directive/format.rs +++ b/src/directive/format.rs @@ -3,7 +3,7 @@ use crate::page::PageMeta; use crate::site::Site; use crate::wikitext::ParsedDirective; -#[derive(Debug, Eq, PartialEq)] +#[derive(Debug, Default, Eq, PartialEq)] pub struct Format {} impl Format { @@ -11,10 +11,6 @@ impl Format { pub const ALLOWED: &'static [&'static str] = &[]; pub const ALLOW_ANY_UNNAMED: bool = true; - pub fn new() -> Self { - Self {} - } - pub fn process(&self, _site: &Site, _meta: &mut PageMeta) -> Result<String, SiteError> { Ok("FIXME:format".into()) } @@ -22,6 +18,6 @@ impl Format { impl From<&ParsedDirective> for Format { fn from(_: &ParsedDirective) -> Self { - Format::new() + Format::default() } } diff --git a/src/directive/graph.rs b/src/directive/graph.rs index a3d15ef..c8c7480 100644 --- a/src/directive/graph.rs +++ b/src/directive/graph.rs @@ -3,7 +3,7 @@ use crate::page::PageMeta; use crate::site::Site; use crate::wikitext::ParsedDirective; -#[derive(Debug, Eq, PartialEq)] +#[derive(Debug, Default, Eq, PartialEq)] pub struct Graph {} impl Graph { @@ -11,10 +11,6 @@ impl Graph { pub const ALLOWED: &'static [&'static str] = &["src", "type"]; pub const ALLOW_ANY_UNNAMED: bool = true; - pub fn new() -> Self { - Self {} - } - pub fn process(&self, _site: &Site, _meta: &mut PageMeta) -> Result<String, SiteError> { Ok("FIXME:graph".into()) } @@ -22,6 +18,6 @@ impl Graph { impl From<&ParsedDirective> for Graph { fn from(_: &ParsedDirective) -> Self { - Graph::new() + Graph::default() } } diff --git a/src/directive/map.rs b/src/directive/map.rs index 32b56cc..20bd222 100644 --- a/src/directive/map.rs +++ b/src/directive/map.rs @@ -3,7 +3,7 @@ use crate::page::PageMeta; use crate::site::Site; use crate::wikitext::ParsedDirective; -#[derive(Debug, Eq, PartialEq)] +#[derive(Debug, Default, Eq, PartialEq)] pub struct Map {} impl Map { @@ -11,10 +11,6 @@ impl Map { pub const ALLOWED: &'static [&'static str] = &["show"]; pub const ALLOW_ANY_UNNAMED: bool = true; - pub fn new() -> Self { - Self {} - } - pub fn process(&self, _site: &Site, _meta: &mut PageMeta) -> Result<String, SiteError> { Ok("FIXME:map".into()) } @@ -22,6 +18,6 @@ impl Map { impl From<&ParsedDirective> for Map { fn from(_: &ParsedDirective) -> Self { - Map::new() + Map::default() } } diff --git a/src/directive/meta.rs b/src/directive/meta.rs index ddbdeaa..490ffb1 100644 --- a/src/directive/meta.rs +++ b/src/directive/meta.rs @@ -1,8 +1,8 @@ use crate::error::SiteError; use crate::page::PageMeta; use crate::site::Site; -use crate::wikitext::ParsedDirective; use crate::time::parse_timestamp; +use crate::wikitext::ParsedDirective; #[derive(Default, Debug, Eq, PartialEq)] pub struct Meta { @@ -49,5 +49,4 @@ impl From<&ParsedDirective> for Meta { } #[cfg(test)] -mod test { -} +mod test {} diff --git a/src/directive/mod.rs b/src/directive/mod.rs index 261de36..89b5b8a 100644 --- a/src/directive/mod.rs +++ b/src/directive/mod.rs @@ -36,7 +36,7 @@ impl TryFrom<ParsedDirective> for Directive { fn try_from(p: ParsedDirective) -> Result<Self, Self::Error> { Self::try_from(&p) - } + } } impl TryFrom<&ParsedDirective> for Directive { @@ -66,15 +66,30 @@ impl TryFrom<&ParsedDirective> for Directive { } "brokenlinks" => { - Self::check_args(p, BrokenLinks::REQUIRED, BrokenLinks::ALLOWED, BrokenLinks::ALLOW_ANY_UNNAMED)?; + Self::check_args( + p, + BrokenLinks::REQUIRED, + BrokenLinks::ALLOWED, + BrokenLinks::ALLOW_ANY_UNNAMED, + )?; Directive::BrokenLinks(BrokenLinks::from(p)) } "calendar" => { - Self::check_args(p, Calendar::REQUIRED, Calendar::ALLOWED, Calendar::ALLOW_ANY_UNNAMED)?; + Self::check_args( + p, + Calendar::REQUIRED, + Calendar::ALLOWED, + Calendar::ALLOW_ANY_UNNAMED, + )?; Directive::Calendar(Calendar::from(p)) } "format" => { - Self::check_args(p, Format::REQUIRED, Format::ALLOWED, Format::ALLOW_ANY_UNNAMED)?; + Self::check_args( + p, + Format::REQUIRED, + Format::ALLOWED, + Format::ALLOW_ANY_UNNAMED, + )?; Directive::Format(Format::from(p)) } "graph" => { @@ -112,11 +127,21 @@ impl TryFrom<&ParsedDirective> for Directive { Directive::PageStats(PageStats::from(p)) } "shortcut" => { - Self::check_args(p, Shortcut::REQUIRED, Shortcut::ALLOWED, Shortcut::ALLOW_ANY_UNNAMED)?; + Self::check_args( + p, + Shortcut::REQUIRED, + Shortcut::ALLOWED, + Shortcut::ALLOW_ANY_UNNAMED, + )?; Directive::Shortcut(Shortcut::from(p)) } "sidebar" => { - Self::check_args(p, Sidebar::REQUIRED, Sidebar::ALLOWED, Sidebar::ALLOW_ANY_UNNAMED)?; + Self::check_args( + p, + Sidebar::REQUIRED, + Sidebar::ALLOWED, + Sidebar::ALLOW_ANY_UNNAMED, + )?; Directive::Sidebar(Sidebar::from(p)) } "tag" => { @@ -132,7 +157,12 @@ impl TryFrom<&ParsedDirective> for Directive { Directive::Toc(Toc::from(p)) } "traillink" => { - Self::check_args(p, TrailLink::REQUIRED, TrailLink::ALLOWED, TrailLink::ALLOW_ANY_UNNAMED)?; + Self::check_args( + p, + TrailLink::REQUIRED, + TrailLink::ALLOWED, + TrailLink::ALLOW_ANY_UNNAMED, + )?; Directive::TrailLink(TrailLink::from(p)) } _ => return Err(SiteError::UnknownDirective(p.name().into())), diff --git a/src/directive/sidebar.rs b/src/directive/sidebar.rs index d68f9be..b80bfe9 100644 --- a/src/directive/sidebar.rs +++ b/src/directive/sidebar.rs @@ -3,7 +3,7 @@ use crate::page::PageMeta; use crate::site::Site; use crate::wikitext::ParsedDirective; -#[derive(Debug, Eq, PartialEq)] +#[derive(Debug, Default, Eq, PartialEq)] pub struct Sidebar {} impl Sidebar { @@ -11,10 +11,6 @@ impl Sidebar { pub const ALLOWED: &'static [&'static str] = &["content"]; pub const ALLOW_ANY_UNNAMED: bool = true; - pub fn new() -> Self { - Self {} - } - pub fn process(&self, _site: &Site, _meta: &mut PageMeta) -> Result<String, SiteError> { Ok("FIXME:sidebar".into()) } @@ -22,6 +18,6 @@ impl Sidebar { impl From<&ParsedDirective> for Sidebar { fn from(_: &ParsedDirective) -> Self { - Sidebar::new() + Sidebar::default() } } @@ -7,16 +7,17 @@ //! little slow. This care implements a subset of the functionality of //! ikiwiki in Rust, for speed. -pub mod name; pub mod directive; pub mod error; +pub mod git; pub mod html; +pub mod name; pub mod page; +pub mod pagespec; pub mod parser; pub mod site; +pub mod srcdir; +pub mod time; pub mod token; -pub mod git; pub mod util; -pub mod time; pub mod wikitext; -pub mod pagespec; diff --git a/src/name.rs b/src/name.rs index 3cb0157..2388e4b 100644 --- a/src/name.rs +++ b/src/name.rs @@ -4,6 +4,7 @@ use std::cmp::Ordering; use std::ffi::OsStr; use std::fmt; use std::path::{Path, PathBuf}; +use std::time::SystemTime; #[derive(Debug, Clone, Eq, PartialEq)] pub struct Name { @@ -12,10 +13,17 @@ pub struct Name { dest: PathBuf, page: PathBuf, page_name: String, + mtime: SystemTime, } impl Name { - fn new(is_wikitext: bool, src: PathBuf, dest: PathBuf, page: PathBuf) -> Self { + fn new( + is_wikitext: bool, + src: PathBuf, + dest: PathBuf, + page: PathBuf, + mtime: SystemTime, + ) -> Self { trace!( "Name::new: is_wikitext={} src={} dest={} page={}", is_wikitext, @@ -34,6 +42,7 @@ impl Name { dest, page, page_name, + mtime, } } @@ -103,7 +112,7 @@ impl NameBuilder { &self.destdir } - pub fn page(&self, path: &Path) -> Name { + pub fn page(&self, path: &Path, mtime: SystemTime) -> Name { assert!(path.starts_with(&self.srcdir)); let src = path.into(); let relative = make_path_relative_to(&self.srcdir, path); @@ -128,16 +137,16 @@ impl NameBuilder { (dest, page) }; - Name::new(true, src, dest, page) + Name::new(true, src, dest, page, mtime) } - pub fn file(&self, path: &Path) -> Name { + pub fn file(&self, path: &Path, mtime: SystemTime) -> Name { assert!(path.starts_with(&self.srcdir)); let src = path.into(); let relative = make_path_relative_to(&self.srcdir, path); let page = make_path_absolute(&relative); let dest = join_subpath(&self.destdir, &relative); - Name::new(false, src, dest, page) + Name::new(false, src, dest, page, mtime) } } @@ -179,7 +188,7 @@ impl Names { #[cfg(test)] mod test { use super::{Name, NameBuilder, Names}; - use std::path::Path; + use std::{path::Path, time::UNIX_EPOCH}; fn builder() -> NameBuilder { NameBuilder::new(Path::new("/src"), Path::new("/dest")) @@ -187,7 +196,7 @@ mod test { #[test] fn builds_page_name() { - let name = builder().page(Path::new("/src/foo/bar.mdwn")); + let name = builder().page(Path::new("/src/foo/bar.mdwn"), UNIX_EPOCH); assert_eq!(name.source_path(), Path::new("/src/foo/bar.mdwn")); assert_eq!( name.destination_path(), @@ -199,7 +208,7 @@ mod test { #[test] fn builds_page_name_for_index_mdwn() { - let name = builder().page(Path::new("/src/foo/index.mdwn")); + let name = builder().page(Path::new("/src/foo/index.mdwn"), UNIX_EPOCH); assert_eq!(name.source_path(), Path::new("/src/foo/index.mdwn")); assert_eq!(name.destination_path(), Path::new("/dest/foo/index.html")); assert_eq!(name.page_path(), Path::new("/foo")); @@ -208,7 +217,7 @@ mod test { #[test] fn builds_file_name() { - let name = builder().file(Path::new("/src/foo/bar.jpg")); + let name = builder().file(Path::new("/src/foo/bar.jpg"), UNIX_EPOCH); assert_eq!(name.source_path(), Path::new("/src/foo/bar.jpg")); assert_eq!(name.destination_path(), Path::new("/dest/foo/bar.jpg")); assert_eq!(name.page_path(), Path::new("/foo/bar.jpg")); @@ -224,7 +233,7 @@ mod test { #[test] fn names_remembers_inserted() { let mut names = Names::default(); - let name = builder().page(Path::new("/src/foo/bar.mdwn")); + let name = builder().page(Path::new("/src/foo/bar.mdwn"), UNIX_EPOCH); names.insert(name.clone()); assert_eq!( names.get_source_path(Path::new("/src/foo/bar.mdwn")), @@ -236,8 +245,8 @@ mod test { #[test] fn names_remembers_inserted_pages_and_files() { let mut names = Names::default(); - let page = builder().page(Path::new("/src/foo/bar.mdwn")); - let file = builder().file(Path::new("/src/foo/bar.jpg")); + let page = builder().page(Path::new("/src/foo/bar.mdwn"), UNIX_EPOCH); + let file = builder().file(Path::new("/src/foo/bar.jpg"), UNIX_EPOCH); names.insert(page.clone()); names.insert(file.clone()); let pages: Vec<&Name> = names.pages().collect(); diff --git a/src/pagespec.rs b/src/pagespec.rs index de365a9..6a4b35d 100644 --- a/src/pagespec.rs +++ b/src/pagespec.rs @@ -34,7 +34,8 @@ impl PageSpec { pub fn matches(&self, page_path: &Path) -> bool { trace!( "PageSpec::matches: container={} page_path={}", - self.container.display(), page_path.display() + self.container.display(), + page_path.display() ); assert!(page_path.is_absolute()); if let Ok(path) = page_path.strip_prefix(&self.container) { @@ -100,12 +101,11 @@ fn glob_matches_helper(mut glob: &[char], mut path: &[char]) -> bool { if glob_remain.is_empty() { return true; } - let mut path_remain = &path[..]; - while !path_remain.is_empty() { - if glob_matches_helper(&glob[1..], path_remain) { + while !path.is_empty() { + if glob_matches_helper(&glob[1..], path) { return true; } - path_remain = &path_remain[1..]; + path = &path[1..]; } return false; } @@ -120,7 +120,7 @@ fn glob_matches_helper(mut glob: &[char], mut path: &[char]) -> bool { } } - while let Some('*') = glob.get(0) { + while let Some('*') = glob.first() { glob = &glob[1..]; } diff --git a/src/site.rs b/src/site.rs index 571bc11..874f3c8 100644 --- a/src/site.rs +++ b/src/site.rs @@ -3,25 +3,22 @@ use crate::git::git_whatchanged; use crate::name::{Name, NameBuilder, Names}; use crate::page::{MarkdownPage, UnprocessedPage, WikitextPage}; use crate::parser::WikitextParser; +use crate::srcdir::{PathFilter, SourceDir}; use crate::token::TokenPatterns; use crate::util::make_relative_link; use log::{debug, info, trace}; -use std::collections::{BinaryHeap, HashMap}; +use std::collections::HashMap; use std::path::{Path, PathBuf}; -use std::time::SystemTime; -use walkdir::WalkDir; +use std::time::UNIX_EPOCH; pub struct Site { + patterns: TokenPatterns, + shortcuts: HashMap<String, Shortcut>, builder: NameBuilder, - wikitext_pages: Vec<WikitextPage>, unprocessed_pages: Vec<UnprocessedPage>, markdown_pages: Vec<MarkdownPage>, + pages_that_will_exist: PageSet, files: Names, - patterns: TokenPatterns, - name_queue: BinaryHeap<Name>, - page_queue: PageSet, - whatchanged: HashMap<PathBuf, SystemTime>, - shortcuts: HashMap<String, Shortcut>, } impl Site { @@ -34,25 +31,24 @@ impl Site { { Self { builder: NameBuilder::new(srcdir.as_ref(), destdir.as_ref()), - wikitext_pages: vec![], unprocessed_pages: vec![], markdown_pages: vec![], files: Names::default(), patterns: TokenPatterns::default(), - name_queue: BinaryHeap::new(), - page_queue: PageSet::default(), - whatchanged: HashMap::new(), + pages_that_will_exist: PageSet::default(), shortcuts: HashMap::new(), } } pub fn scan(&mut self) -> Result<(), SiteError> { - self.whatchanged = git_whatchanged(self.builder.srcdir())?; for name in self.all_files()? { trace!("scan: name={}", name); if name.is_wikitext_page() { trace!("scan: it's a page"); - self.name_queue.push(name); + debug!("loading wikitext page {}", name.source_path().display()); + let page = WikitextPage::read(&name)?; + self.files.insert(name); + self.add_wikitextpage(page)?; } else { trace!("scan: it's a non-page file"); let filename = name.source_path(); @@ -64,10 +60,18 @@ impl Site { Ok(()) } - fn add_wikitextpage(&mut self, page: WikitextPage) { + fn add_wikitextpage(&mut self, page: WikitextPage) -> Result<(), SiteError> { info!("add wikitext page {}", page.meta().path().display()); - self.page_queue.insert(&page); - self.wikitext_pages.push(page); + self.pages_that_will_exist.insert(&page); + + debug!("parsing wikitext page {}", page.meta().path().display()); + let mut parser = WikitextParser::new(page.wikitext(), &self.patterns); + let page = UnprocessedPage::new(page.meta().clone(), &mut parser)?; + page.prepare(self)?; + + self.unprocessed_pages.push(page); + + Ok(()) } fn add_other_file(&mut self, name: Name) { @@ -78,10 +82,7 @@ impl Site { pub fn process(&mut self) -> Result<(), SiteError> { trace!("processing queues"); loop { - if !self.process_name()? - && !self.process_wikipage()? - && !self.process_unrocessed_page()? - { + if !self.process_page()? { trace!("processing queues done"); break; } @@ -89,45 +90,7 @@ impl Site { Ok(()) } - fn process_name(&mut self) -> Result<bool, SiteError> { - if let Some(name) = self.name_queue.pop() { - debug!("loading wikitext page {}", name.source_path().display()); - let mut page = WikitextPage::read(&name)?; - if let Some(mtime) = self.git_commit_timestamp(&name) { - page.meta_mut().set_mtime(mtime); - } - self.files.insert(name); - self.add_wikitextpage(page); - Ok(true) - } else { - trace!("name_queue was empty"); - Ok(false) - } - } - - fn git_commit_timestamp(&self, name: &Name) -> Option<SystemTime> { - let relative = name - .source_path() - .strip_prefix(&self.builder.srcdir()) - .unwrap(); - self.whatchanged.get(relative).copied() - } - - fn process_wikipage(&mut self) -> Result<bool, SiteError> { - if let Some(page) = self.wikitext_pages.pop() { - debug!("processing wikitext page {}", page.meta().path().display()); - let mut parser = WikitextParser::new(page.wikitext(), &self.patterns); - let page = UnprocessedPage::new(page.meta().clone(), &mut parser)?; - page.prepare(self)?; - self.unprocessed_pages.push(page); - Ok(true) - } else { - trace!("wikitext_ages was empty"); - Ok(false) - } - } - - fn process_unrocessed_page(&mut self) -> Result<bool, SiteError> { + fn process_page(&mut self) -> Result<bool, SiteError> { if let Some(page) = self.unprocessed_pages.pop() { debug!( "processing unprocessed page {}", @@ -137,7 +100,7 @@ impl Site { self.markdown_pages.push(page); Ok(true) } else { - trace!("unprocessed_ages was empty"); + trace!("no pages to process"); Ok(false) } } @@ -151,50 +114,32 @@ impl Site { } pub fn pages_and_files(&self) -> impl Iterator<Item = &Name> { - self.files.iter().chain(self.name_queue.iter()) + self.files.iter() } fn all_files(&self) -> Result<Vec<Name>, SiteError> { + let whatchanged = git_whatchanged(self.builder.srcdir())?; + + let mut srcdir = SourceDir::new(self.builder.srcdir()); + srcdir.scan()?; + + let filter = PathFilter::new(Self::EXCLUDE_SUBSTRINGS, Self::EXCLUDE_ENDS); + let mut names = vec![]; - let root = self.builder.srcdir(); - trace!("all_files: root={}", root.display()); - for e in WalkDir::new(root) { - let e = e.map_err(|err| SiteError::WalkDir(root.to_path_buf(), err))?; - let path = e.path(); - trace!("all_files: path={}", path.display()); - if Self::is_excluded(path) { - debug!("exclude {}", path.display()); + for path in srcdir.files().iter().filter(|x| filter.is_included(x)) { + let relative = path.strip_prefix(&self.builder.srcdir()).unwrap(); + let mtime = whatchanged.get(relative).copied().unwrap_or(UNIX_EPOCH); + if Self::is_markdown(path) { + names.push(self.builder.page(path, mtime)); + } else if path.is_file() { + names.push(self.builder.file(path, mtime)); } else { - debug!("include {}", path.display()); - if Self::is_markdown(path) { - trace!("it's markdown"); - names.push(self.builder.page(path)); - } else if path.is_file() { - trace!("it's not markdown"); - names.push(self.builder.file(path)); - } else { - trace!("it's not a file"); - } + trace!("not a file, ignoring: {}", path.display()); } } Ok(names) } - fn is_excluded(path: &Path) -> bool { - let path = path.to_string_lossy(); - for pat in Self::EXCLUDE_ENDS { - if path.ends_with(pat) { - return true; - } - } - for pat in Self::EXCLUDE_SUBSTRINGS { - if path.contains(pat) { - return true; - } - } - false - } - fn is_markdown(path: &Path) -> bool { if let Some(ext) = path.extension() { ext == "mdwn" @@ -228,7 +173,7 @@ impl Site { // Is target absolute? if target.starts_with("/") { - if let Some(path) = self.page_queue.get(target) { + if let Some(path) = self.pages_that_will_exist.get(target) { trace!("absolute target exists"); return Ok(path.into()); } else { @@ -240,7 +185,7 @@ impl Site { // Does a sub-page or file exist? let wanted = page.join(target); trace!("checking for subpage or file {}", wanted.display()); - if let Some(path) = self.page_queue.get(&wanted) { + if let Some(path) = self.pages_that_will_exist.get(&wanted) { trace!("subpage exists: {}", path.display()); return Ok(path.into()); } else if self.file_exists(&wanted) { @@ -256,7 +201,7 @@ impl Site { parent.display(), path.display() ); - if let Some(path) = self.page_queue.get(path.as_path()) { + if let Some(path) = self.pages_that_will_exist.get(path.as_path()) { trace!("sibling page exists: {}", path.display()); return Ok(path.into()); } @@ -270,7 +215,7 @@ impl Site { // Does target exist relative to root? let wanted = Path::new("/").join(target); trace!("checking for absolute path {}", wanted.display()); - if let Some(path) = self.page_queue.get(&wanted) { + if let Some(path) = self.pages_that_will_exist.get(&wanted) { trace!("page at absolute path exists: {}", path.display()); return Ok(path.into()); } else if self.file_exists(&wanted) { @@ -361,7 +306,7 @@ mod test { use crate::page::MetaBuilder; use std::{ path::{Path, PathBuf}, - time::SystemTime, + time::{SystemTime, UNIX_EPOCH}, }; fn site() -> Site { @@ -373,7 +318,7 @@ mod test { } fn page(path: &str) -> WikitextPage { - let name = builder().page(Path::new(path)); + let name = builder().page(Path::new(path), UNIX_EPOCH); let mtime = SystemTime::now(); let meta = MetaBuilder::default().name(name).mtime(mtime).build(); WikitextPage::new(meta, "".into()) @@ -387,7 +332,7 @@ mod test { #[test] fn absolute_link_resolves_to_link_relative_root_of_site() { let mut site = site(); - site.add_wikitextpage(page("/src/yo/yoyo")); + site.add_wikitextpage(page("/src/yo/yoyo")).unwrap(); assert_eq!( site.resolve("/foo/bar", "/yo/yoyo").unwrap(), Path::new("../yo/yoyo") @@ -409,7 +354,7 @@ mod test { #[test] fn link_to_sibling_resolves_to_it() { let mut site = site(); - site.add_wikitextpage(page("/src/foo/yo")); + site.add_wikitextpage(page("/src/foo/yo")).unwrap(); site.process().unwrap(); assert_eq!(site.resolve("/foo/bar", "yo").unwrap(), Path::new("yo")); } @@ -417,7 +362,7 @@ mod test { #[test] fn link_using_other_casing_is_resolved() { let mut site = site(); - site.add_wikitextpage(page("/src/foo/yo")); + site.add_wikitextpage(page("/src/foo/yo")).unwrap(); site.process().unwrap(); assert_eq!(site.resolve("/foo/bar", "YO").unwrap(), Path::new("yo")); } @@ -425,7 +370,7 @@ mod test { #[test] fn link_to_sublpage_resolves_to_it() { let mut site = site(); - site.add_wikitextpage(page("/src/foo/bar/yo")); + site.add_wikitextpage(page("/src/foo/bar/yo")).unwrap(); site.process().unwrap(); assert_eq!(site.resolve("/foo/bar", "yo").unwrap(), Path::new("bar/yo")); } @@ -433,8 +378,8 @@ mod test { #[test] fn link_to_sublpage_resolves_to_it_and_not_sibling() { let mut site = site(); - site.add_wikitextpage(page("/src/foo/bar/yo")); - site.add_wikitextpage(page("/src/foo/yo")); + site.add_wikitextpage(page("/src/foo/bar/yo")).unwrap(); + site.add_wikitextpage(page("/src/foo/yo")).unwrap(); site.process().unwrap(); assert_eq!(site.resolve("/foo/bar", "yo").unwrap(), Path::new("bar/yo")); } @@ -455,7 +400,7 @@ mod test { #[test] fn link_to_subsubpage_resolves_to_it() { let mut site = site(); - site.add_wikitextpage(page("/src/foo/bar/yo/yoyo")); + site.add_wikitextpage(page("/src/foo/bar/yo/yoyo")).unwrap(); site.process().unwrap(); assert_eq!( site.resolve("/foo/bar", "yo/yoyo").unwrap(), @@ -466,7 +411,7 @@ mod test { #[test] fn link_to_sibling_file_resolves_to_it() { let mut site = site(); - let name = builder().file(Path::new("/src/foo/bar.jpg")); + let name = builder().file(Path::new("/src/foo/bar.jpg"), UNIX_EPOCH); site.add_other_file(name); site.process().unwrap(); assert_eq!( @@ -478,7 +423,7 @@ mod test { #[test] fn link_relative_to_root_as_last_resort() { let mut site = site(); - site.add_wikitextpage(page("/src/foo.mdwn")); + site.add_wikitextpage(page("/src/foo.mdwn")).unwrap(); site.process().unwrap(); assert_eq!( site.resolve("/yo/yoyo/yoyoyo", "foo").unwrap(), diff --git a/src/srcdir.rs b/src/srcdir.rs new file mode 100644 index 0000000..66d2a04 --- /dev/null +++ b/src/srcdir.rs @@ -0,0 +1,140 @@ +use crate::error::SiteError; +use log::trace; +use std::path::{Path, PathBuf}; +use walkdir::WalkDir; + +pub struct SourceDir { + path: PathBuf, + files: Vec<PathBuf>, + artificial: bool, +} + +impl SourceDir { + pub fn new(path: &Path) -> Self { + Self { + path: path.into(), + files: vec![], + artificial: false, + } + } + + pub fn insert_for_tests<P>(&mut self, path: P) + where + P: AsRef<Path>, + { + self.artificial = true; + self.insert(path.as_ref()); + } + + pub fn insert(&mut self, path: &Path) { + trace!("Source Dir::insert: path={}", path.display()); + self.files.push(path.into()); + } + + pub fn scan(&mut self) -> Result<(), SiteError> { + if self.artificial { + trace!("SourceDir::scan: artificial mode, not actually scanning"); + } else { + trace!("SourceDir::scan: find files in {}", self.path.display()); + for e in WalkDir::new(&self.path) { + let e = e.map_err(|err| SiteError::WalkDir(self.path.clone(), err))?; + let path = e.path(); + trace!("SourceDir::scan: found {}", path.display()); + self.insert(path); + } + } + Ok(()) + } + + pub fn files(&self) -> &[PathBuf] { + &self.files + } +} + +#[derive(Default)] +pub struct PathFilter { + excluded_substrings: Vec<&'static str>, + excluded_suffixes: Vec<&'static str>, +} + +impl PathFilter { + pub fn new(subs: &[&'static str], suffixes: &[&'static str]) -> Self { + Self { + excluded_substrings: subs.to_vec(), + excluded_suffixes: suffixes.to_vec(), + } + } + + pub fn exclude_substring(&mut self, s: &'static str) { + self.excluded_substrings.push(s); + } + + pub fn exclude_suffix(&mut self, s: &'static str) { + self.excluded_suffixes.push(s); + } + + pub fn is_included<P>(&self, path: P) -> bool + where + P: AsRef<Path>, + { + let path = path.as_ref(); + let include = { + let path = path.to_string_lossy(); + for pat in self.excluded_suffixes.iter() { + if path.ends_with(pat) { + return false; + } + } + for pat in self.excluded_substrings.iter() { + if path.contains(pat) { + return false; + } + } + true + }; + if include { + trace!("include {}", path.display()); + } else { + trace!("exclude {}", path.display()); + } + include + } +} + +#[cfg(test)] +mod test { + use super::PathFilter; + + #[test] + fn includes_dotgit_by_default() { + let filter = PathFilter::default(); + assert!(filter.is_included(".git")); + } + + #[test] + fn excludes_dotgit_if_requested() { + let mut filter = PathFilter::default(); + filter.exclude_substring(".git"); + assert!(!filter.is_included(".git")); + } + + #[test] + fn includes_footilde_by_default() { + let filter = PathFilter::default(); + assert!(filter.is_included("foo~")); + } + + #[test] + fn includes_footildebar_if_tilde_suffix_is_excluded() { + let mut filter = PathFilter::default(); + filter.exclude_suffix("~"); + assert!(filter.is_included("foo~bar")); + } + + #[test] + fn excludes_footilde_if_tilde_suffix_is_excluded() { + let mut filter = PathFilter::default(); + filter.exclude_suffix("~"); + assert!(!filter.is_included("foo~")); + } +} diff --git a/src/time.rs b/src/time.rs index 2728701..dce3ecc 100644 --- a/src/time.rs +++ b/src/time.rs @@ -29,18 +29,22 @@ fn parse(timestamp: &str) -> Result<OffsetDateTime, SiteError> { format_description!("[year]-[month]-[day] [hour]:[minute]:[second]"); const SIMPLIFIED_ISO9601_MIN: &[FormatItem<'static>] = format_description!("[year]-[month]-[day] [hour]:[minute]"); - const SIMPLIFIED_ISO9601_TZ: &[FormatItem<'static>] = - format_description!("[year]-[month]-[day] [hour]:[minute]:[second] [offset_hour][offset_minute]"); + const SIMPLIFIED_ISO9601_TZ: &[FormatItem<'static>] = format_description!( + "[year]-[month]-[day] [hour]:[minute]:[second] [offset_hour][offset_minute]" + ); const SIMPLIFIED_ISO9601_MIN_TZ: &[FormatItem<'static>] = format_description!("[year]-[month]-[day] [hour]:[minute] [offset_hour][offset_minute]"); if let Ok(t) = parse_one_time_format(timestamp, "simplified", SIMPLIFIED_ISO9601) { Ok(t) - } else if let Ok(t) = parse_one_time_format(timestamp, "simplified-min", SIMPLIFIED_ISO9601_MIN) { + } else if let Ok(t) = parse_one_time_format(timestamp, "simplified-min", SIMPLIFIED_ISO9601_MIN) + { Ok(t) } else if let Ok(t) = parse_one_time_format(timestamp, "simplified-tz", SIMPLIFIED_ISO9601_TZ) { Ok(t) - } else if let Ok(t) = parse_one_time_format(timestamp, "simplified-tz", SIMPLIFIED_ISO9601_MIN_TZ) { + } else if let Ok(t) = + parse_one_time_format(timestamp, "simplified-tz", SIMPLIFIED_ISO9601_MIN_TZ) + { Ok(t) } else if let Ok(t) = parse_one_time_format(timestamp, "ISO8601", &Iso8601::PARSING) { Ok(t) diff --git a/src/token.rs b/src/token.rs index 0190edc..f6fb92c 100644 --- a/src/token.rs +++ b/src/token.rs @@ -106,10 +106,10 @@ impl<'a> TokenParser<'a> { } fn regex(&mut self, pattern: &Regex) -> Option<String> { -// trace!("matching regex {}", pattern.as_str()); + // trace!("matching regex {}", pattern.as_str()); if let Some(m) = pattern.find(self.input) { if m.start() == 0 { -// trace!("match at beginning"); + // trace!("match at beginning"); let captures = pattern.captures(self.input).unwrap(); let m = if let Some(value) = captures.name("value") { self.input = &self.input[m.end()..]; @@ -121,7 +121,7 @@ impl<'a> TokenParser<'a> { return Some(m.as_str().to_string()); } } -// trace!("no match at beginning"); + // trace!("no match at beginning"); None } } diff --git a/src/wikitext.rs b/src/wikitext.rs index 79f311f..81514f0 100644 --- a/src/wikitext.rs +++ b/src/wikitext.rs @@ -5,6 +5,7 @@ use crate::site::Site; use log::trace; use std::collections::HashMap; use std::path::Path; + #[derive(Debug, Eq, PartialEq)] pub enum Snippet { Markdown(String), |