summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2022-10-16 08:22:04 +0000
committerLars Wirzenius <liw@liw.fi>2022-10-16 08:22:04 +0000
commitf9df90b851ca273e4af44cc726d5814131e0d6a8 (patch)
treee123fe9ee4c41229b6e7a7164503b3242cf48336
parent0583399d1a3fbe702591a673872a815dc47b8d96 (diff)
parent90103712b62feec5788acff3865b1a587d780c21 (diff)
downloadriki-f9df90b851ca273e4af44cc726d5814131e0d6a8.tar.gz
Merge branch 'refactor-pages-and-files-unification' into 'main'
refactor to simplify site processing, tidy up some things See merge request larswirzenius/riki!57
-rw-r--r--.cargo/config.toml5
-rw-r--r--build.rs1
-rw-r--r--riki.md82
-rw-r--r--src/directive/calendar.rs8
-rw-r--r--src/directive/format.rs8
-rw-r--r--src/directive/graph.rs8
-rw-r--r--src/directive/map.rs8
-rw-r--r--src/directive/meta.rs5
-rw-r--r--src/directive/mod.rs44
-rw-r--r--src/directive/sidebar.rs8
-rw-r--r--src/lib.rs9
-rw-r--r--src/name.rs33
-rw-r--r--src/pagespec.rs12
-rw-r--r--src/site.rs167
-rw-r--r--src/srcdir.rs140
-rw-r--r--src/time.rs12
-rw-r--r--src/token.rs6
-rw-r--r--src/wikitext.rs1
18 files changed, 377 insertions, 180 deletions
diff --git a/.cargo/config.toml b/.cargo/config.toml
new file mode 100644
index 0000000..6748679
--- /dev/null
+++ b/.cargo/config.toml
@@ -0,0 +1,5 @@
+[target.'cfg(all())']
+rustflags = [
+ "-Aclippy::just_underscores_and_digits",
+ "-Aclippy::needless_lifetimes",
+]
diff --git a/build.rs b/build.rs
index bc369d8..52d9f76 100644
--- a/build.rs
+++ b/build.rs
@@ -1,4 +1,5 @@
fn main() {
+ println!("cargo:rerun-if-changed=src/pagespec.lalrpop");
lalrpop::process_root().unwrap();
subplot_build::codegen("riki.subplot").expect("failed to generate code with Subplot");
}
diff --git a/riki.md b/riki.md
index b7551e1..4c78f11 100644
--- a/riki.md
+++ b/riki.md
@@ -8,6 +8,88 @@ speed. This document describes the requirements and acceptance
criteria for the software, and how to verify that riki meets them in
an automated way. This is done using the [Subplot][] software.
+# Software architecture
+
+`riki` converts files in a source tree into a files that form a static
+website in an output, or target, tree. The files in the source tree
+are either "pages" or "blobs". The files in the target tree are HTML
+files or blobs.
+
+Source pages contain "wiki text", which
+adds on top of Markdown syntax for *wiki links* and *directives*:
+
+* plain wiki link: `[[pagename]]`
+ - this corresponds to Markdown: `[pagename](pagename)`
+ - or HTML: `<a href="pagename">pagename</a>`
+* wiki link with link text: ``[[link text|pagename]]`
+ - this corresponds to Markdown: `[link text](pagename)`
+ - or HTML: `<a href="pagename">link text</a>`
+* directive: `[[!foo arg other="value for other" more="""value for
+ more"""]]`
+ - directive arguments may contain values
+ - values may be single or triple quoted: triple quoted may span
+ multiple lines
+
+Directives cause some processing to be done. That processing may take
+as its input only values of arguments, or the page, where the
+directive is used, or all other files in the site.
+
+Wiki text is converted into plain Markdown by replacing wiki links and
+directives with Markdown text, before the whole page is parsed into
+HTML, which gets written to the target directory.
+
+Blobs are copied to the target directory as-is, without any
+processing.
+
+## Processing pipeline
+
+~~~dot
+digraph "processing" {
+ source [shape=folder]
+ target [shape=folder]
+ blob [shape=note]
+ wikitext [shape=note]
+ html [shape=note]
+
+ source -> blob
+ source -> wikitext
+
+ wikitext -> markdown
+ wikitext -> wikilink
+ wikitext -> directive
+ wikilink -> markdown
+ directive -> markdown
+
+ markdown -> html
+ html -> target
+
+ blob -> target
+}
+~~~
+
+When the site is processed from source to target, the processing
+pipeline is roughly like this:
+
+* read in all files in the source tree
+* parse each page of wiki text into snippets
+ - a snippet is plain markdown, a wiki link, or a directive
+* prepare directives in each page
+ - this collects or produces data needed for processing the
+ directive, but doesn't produce output
+* process directives in each page
+ - this produces markdown output
+* process wiki links in each page
+* combine all processed snippets into one markdown string for each
+ page and parse that into HTML
+* write HTML files to target tree
+* copy all blobs to target tree
+
+Note that when preparing or processing directives, directives on all
+pages are prepared first, before any directives are processed. This
+allows things like defining shortcuts on any page of the site: the
+shortcut definitions are recognized and obeyed during the preparation
+stage.
+
# Verification scenarios
The approach used for verifying acceptance criteria is to run `riki`
diff --git a/src/directive/calendar.rs b/src/directive/calendar.rs
index 1c02e16..b24f39b 100644
--- a/src/directive/calendar.rs
+++ b/src/directive/calendar.rs
@@ -3,7 +3,7 @@ use crate::page::PageMeta;
use crate::site::Site;
use crate::wikitext::ParsedDirective;
-#[derive(Debug, Eq, PartialEq)]
+#[derive(Debug, Default, Eq, PartialEq)]
pub struct Calendar {}
impl Calendar {
@@ -19,10 +19,6 @@ impl Calendar {
];
pub const ALLOW_ANY_UNNAMED: bool = true;
- pub fn new() -> Self {
- Self {}
- }
-
pub fn process(&self, _site: &Site, _meta: &mut PageMeta) -> Result<String, SiteError> {
Ok("FIXME:graph".into())
}
@@ -30,6 +26,6 @@ impl Calendar {
impl From<&ParsedDirective> for Calendar {
fn from(_: &ParsedDirective) -> Self {
- Calendar::new()
+ Calendar::default()
}
}
diff --git a/src/directive/format.rs b/src/directive/format.rs
index 6483310..a40c439 100644
--- a/src/directive/format.rs
+++ b/src/directive/format.rs
@@ -3,7 +3,7 @@ use crate::page::PageMeta;
use crate::site::Site;
use crate::wikitext::ParsedDirective;
-#[derive(Debug, Eq, PartialEq)]
+#[derive(Debug, Default, Eq, PartialEq)]
pub struct Format {}
impl Format {
@@ -11,10 +11,6 @@ impl Format {
pub const ALLOWED: &'static [&'static str] = &[];
pub const ALLOW_ANY_UNNAMED: bool = true;
- pub fn new() -> Self {
- Self {}
- }
-
pub fn process(&self, _site: &Site, _meta: &mut PageMeta) -> Result<String, SiteError> {
Ok("FIXME:format".into())
}
@@ -22,6 +18,6 @@ impl Format {
impl From<&ParsedDirective> for Format {
fn from(_: &ParsedDirective) -> Self {
- Format::new()
+ Format::default()
}
}
diff --git a/src/directive/graph.rs b/src/directive/graph.rs
index a3d15ef..c8c7480 100644
--- a/src/directive/graph.rs
+++ b/src/directive/graph.rs
@@ -3,7 +3,7 @@ use crate::page::PageMeta;
use crate::site::Site;
use crate::wikitext::ParsedDirective;
-#[derive(Debug, Eq, PartialEq)]
+#[derive(Debug, Default, Eq, PartialEq)]
pub struct Graph {}
impl Graph {
@@ -11,10 +11,6 @@ impl Graph {
pub const ALLOWED: &'static [&'static str] = &["src", "type"];
pub const ALLOW_ANY_UNNAMED: bool = true;
- pub fn new() -> Self {
- Self {}
- }
-
pub fn process(&self, _site: &Site, _meta: &mut PageMeta) -> Result<String, SiteError> {
Ok("FIXME:graph".into())
}
@@ -22,6 +18,6 @@ impl Graph {
impl From<&ParsedDirective> for Graph {
fn from(_: &ParsedDirective) -> Self {
- Graph::new()
+ Graph::default()
}
}
diff --git a/src/directive/map.rs b/src/directive/map.rs
index 32b56cc..20bd222 100644
--- a/src/directive/map.rs
+++ b/src/directive/map.rs
@@ -3,7 +3,7 @@ use crate::page::PageMeta;
use crate::site::Site;
use crate::wikitext::ParsedDirective;
-#[derive(Debug, Eq, PartialEq)]
+#[derive(Debug, Default, Eq, PartialEq)]
pub struct Map {}
impl Map {
@@ -11,10 +11,6 @@ impl Map {
pub const ALLOWED: &'static [&'static str] = &["show"];
pub const ALLOW_ANY_UNNAMED: bool = true;
- pub fn new() -> Self {
- Self {}
- }
-
pub fn process(&self, _site: &Site, _meta: &mut PageMeta) -> Result<String, SiteError> {
Ok("FIXME:map".into())
}
@@ -22,6 +18,6 @@ impl Map {
impl From<&ParsedDirective> for Map {
fn from(_: &ParsedDirective) -> Self {
- Map::new()
+ Map::default()
}
}
diff --git a/src/directive/meta.rs b/src/directive/meta.rs
index ddbdeaa..490ffb1 100644
--- a/src/directive/meta.rs
+++ b/src/directive/meta.rs
@@ -1,8 +1,8 @@
use crate::error::SiteError;
use crate::page::PageMeta;
use crate::site::Site;
-use crate::wikitext::ParsedDirective;
use crate::time::parse_timestamp;
+use crate::wikitext::ParsedDirective;
#[derive(Default, Debug, Eq, PartialEq)]
pub struct Meta {
@@ -49,5 +49,4 @@ impl From<&ParsedDirective> for Meta {
}
#[cfg(test)]
-mod test {
-}
+mod test {}
diff --git a/src/directive/mod.rs b/src/directive/mod.rs
index 261de36..89b5b8a 100644
--- a/src/directive/mod.rs
+++ b/src/directive/mod.rs
@@ -36,7 +36,7 @@ impl TryFrom<ParsedDirective> for Directive {
fn try_from(p: ParsedDirective) -> Result<Self, Self::Error> {
Self::try_from(&p)
- }
+ }
}
impl TryFrom<&ParsedDirective> for Directive {
@@ -66,15 +66,30 @@ impl TryFrom<&ParsedDirective> for Directive {
}
"brokenlinks" => {
- Self::check_args(p, BrokenLinks::REQUIRED, BrokenLinks::ALLOWED, BrokenLinks::ALLOW_ANY_UNNAMED)?;
+ Self::check_args(
+ p,
+ BrokenLinks::REQUIRED,
+ BrokenLinks::ALLOWED,
+ BrokenLinks::ALLOW_ANY_UNNAMED,
+ )?;
Directive::BrokenLinks(BrokenLinks::from(p))
}
"calendar" => {
- Self::check_args(p, Calendar::REQUIRED, Calendar::ALLOWED, Calendar::ALLOW_ANY_UNNAMED)?;
+ Self::check_args(
+ p,
+ Calendar::REQUIRED,
+ Calendar::ALLOWED,
+ Calendar::ALLOW_ANY_UNNAMED,
+ )?;
Directive::Calendar(Calendar::from(p))
}
"format" => {
- Self::check_args(p, Format::REQUIRED, Format::ALLOWED, Format::ALLOW_ANY_UNNAMED)?;
+ Self::check_args(
+ p,
+ Format::REQUIRED,
+ Format::ALLOWED,
+ Format::ALLOW_ANY_UNNAMED,
+ )?;
Directive::Format(Format::from(p))
}
"graph" => {
@@ -112,11 +127,21 @@ impl TryFrom<&ParsedDirective> for Directive {
Directive::PageStats(PageStats::from(p))
}
"shortcut" => {
- Self::check_args(p, Shortcut::REQUIRED, Shortcut::ALLOWED, Shortcut::ALLOW_ANY_UNNAMED)?;
+ Self::check_args(
+ p,
+ Shortcut::REQUIRED,
+ Shortcut::ALLOWED,
+ Shortcut::ALLOW_ANY_UNNAMED,
+ )?;
Directive::Shortcut(Shortcut::from(p))
}
"sidebar" => {
- Self::check_args(p, Sidebar::REQUIRED, Sidebar::ALLOWED, Sidebar::ALLOW_ANY_UNNAMED)?;
+ Self::check_args(
+ p,
+ Sidebar::REQUIRED,
+ Sidebar::ALLOWED,
+ Sidebar::ALLOW_ANY_UNNAMED,
+ )?;
Directive::Sidebar(Sidebar::from(p))
}
"tag" => {
@@ -132,7 +157,12 @@ impl TryFrom<&ParsedDirective> for Directive {
Directive::Toc(Toc::from(p))
}
"traillink" => {
- Self::check_args(p, TrailLink::REQUIRED, TrailLink::ALLOWED, TrailLink::ALLOW_ANY_UNNAMED)?;
+ Self::check_args(
+ p,
+ TrailLink::REQUIRED,
+ TrailLink::ALLOWED,
+ TrailLink::ALLOW_ANY_UNNAMED,
+ )?;
Directive::TrailLink(TrailLink::from(p))
}
_ => return Err(SiteError::UnknownDirective(p.name().into())),
diff --git a/src/directive/sidebar.rs b/src/directive/sidebar.rs
index d68f9be..b80bfe9 100644
--- a/src/directive/sidebar.rs
+++ b/src/directive/sidebar.rs
@@ -3,7 +3,7 @@ use crate::page::PageMeta;
use crate::site::Site;
use crate::wikitext::ParsedDirective;
-#[derive(Debug, Eq, PartialEq)]
+#[derive(Debug, Default, Eq, PartialEq)]
pub struct Sidebar {}
impl Sidebar {
@@ -11,10 +11,6 @@ impl Sidebar {
pub const ALLOWED: &'static [&'static str] = &["content"];
pub const ALLOW_ANY_UNNAMED: bool = true;
- pub fn new() -> Self {
- Self {}
- }
-
pub fn process(&self, _site: &Site, _meta: &mut PageMeta) -> Result<String, SiteError> {
Ok("FIXME:sidebar".into())
}
@@ -22,6 +18,6 @@ impl Sidebar {
impl From<&ParsedDirective> for Sidebar {
fn from(_: &ParsedDirective) -> Self {
- Sidebar::new()
+ Sidebar::default()
}
}
diff --git a/src/lib.rs b/src/lib.rs
index 9f955d8..04dffa6 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -7,16 +7,17 @@
//! little slow. This care implements a subset of the functionality of
//! ikiwiki in Rust, for speed.
-pub mod name;
pub mod directive;
pub mod error;
+pub mod git;
pub mod html;
+pub mod name;
pub mod page;
+pub mod pagespec;
pub mod parser;
pub mod site;
+pub mod srcdir;
+pub mod time;
pub mod token;
-pub mod git;
pub mod util;
-pub mod time;
pub mod wikitext;
-pub mod pagespec;
diff --git a/src/name.rs b/src/name.rs
index 3cb0157..2388e4b 100644
--- a/src/name.rs
+++ b/src/name.rs
@@ -4,6 +4,7 @@ use std::cmp::Ordering;
use std::ffi::OsStr;
use std::fmt;
use std::path::{Path, PathBuf};
+use std::time::SystemTime;
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct Name {
@@ -12,10 +13,17 @@ pub struct Name {
dest: PathBuf,
page: PathBuf,
page_name: String,
+ mtime: SystemTime,
}
impl Name {
- fn new(is_wikitext: bool, src: PathBuf, dest: PathBuf, page: PathBuf) -> Self {
+ fn new(
+ is_wikitext: bool,
+ src: PathBuf,
+ dest: PathBuf,
+ page: PathBuf,
+ mtime: SystemTime,
+ ) -> Self {
trace!(
"Name::new: is_wikitext={} src={} dest={} page={}",
is_wikitext,
@@ -34,6 +42,7 @@ impl Name {
dest,
page,
page_name,
+ mtime,
}
}
@@ -103,7 +112,7 @@ impl NameBuilder {
&self.destdir
}
- pub fn page(&self, path: &Path) -> Name {
+ pub fn page(&self, path: &Path, mtime: SystemTime) -> Name {
assert!(path.starts_with(&self.srcdir));
let src = path.into();
let relative = make_path_relative_to(&self.srcdir, path);
@@ -128,16 +137,16 @@ impl NameBuilder {
(dest, page)
};
- Name::new(true, src, dest, page)
+ Name::new(true, src, dest, page, mtime)
}
- pub fn file(&self, path: &Path) -> Name {
+ pub fn file(&self, path: &Path, mtime: SystemTime) -> Name {
assert!(path.starts_with(&self.srcdir));
let src = path.into();
let relative = make_path_relative_to(&self.srcdir, path);
let page = make_path_absolute(&relative);
let dest = join_subpath(&self.destdir, &relative);
- Name::new(false, src, dest, page)
+ Name::new(false, src, dest, page, mtime)
}
}
@@ -179,7 +188,7 @@ impl Names {
#[cfg(test)]
mod test {
use super::{Name, NameBuilder, Names};
- use std::path::Path;
+ use std::{path::Path, time::UNIX_EPOCH};
fn builder() -> NameBuilder {
NameBuilder::new(Path::new("/src"), Path::new("/dest"))
@@ -187,7 +196,7 @@ mod test {
#[test]
fn builds_page_name() {
- let name = builder().page(Path::new("/src/foo/bar.mdwn"));
+ let name = builder().page(Path::new("/src/foo/bar.mdwn"), UNIX_EPOCH);
assert_eq!(name.source_path(), Path::new("/src/foo/bar.mdwn"));
assert_eq!(
name.destination_path(),
@@ -199,7 +208,7 @@ mod test {
#[test]
fn builds_page_name_for_index_mdwn() {
- let name = builder().page(Path::new("/src/foo/index.mdwn"));
+ let name = builder().page(Path::new("/src/foo/index.mdwn"), UNIX_EPOCH);
assert_eq!(name.source_path(), Path::new("/src/foo/index.mdwn"));
assert_eq!(name.destination_path(), Path::new("/dest/foo/index.html"));
assert_eq!(name.page_path(), Path::new("/foo"));
@@ -208,7 +217,7 @@ mod test {
#[test]
fn builds_file_name() {
- let name = builder().file(Path::new("/src/foo/bar.jpg"));
+ let name = builder().file(Path::new("/src/foo/bar.jpg"), UNIX_EPOCH);
assert_eq!(name.source_path(), Path::new("/src/foo/bar.jpg"));
assert_eq!(name.destination_path(), Path::new("/dest/foo/bar.jpg"));
assert_eq!(name.page_path(), Path::new("/foo/bar.jpg"));
@@ -224,7 +233,7 @@ mod test {
#[test]
fn names_remembers_inserted() {
let mut names = Names::default();
- let name = builder().page(Path::new("/src/foo/bar.mdwn"));
+ let name = builder().page(Path::new("/src/foo/bar.mdwn"), UNIX_EPOCH);
names.insert(name.clone());
assert_eq!(
names.get_source_path(Path::new("/src/foo/bar.mdwn")),
@@ -236,8 +245,8 @@ mod test {
#[test]
fn names_remembers_inserted_pages_and_files() {
let mut names = Names::default();
- let page = builder().page(Path::new("/src/foo/bar.mdwn"));
- let file = builder().file(Path::new("/src/foo/bar.jpg"));
+ let page = builder().page(Path::new("/src/foo/bar.mdwn"), UNIX_EPOCH);
+ let file = builder().file(Path::new("/src/foo/bar.jpg"), UNIX_EPOCH);
names.insert(page.clone());
names.insert(file.clone());
let pages: Vec<&Name> = names.pages().collect();
diff --git a/src/pagespec.rs b/src/pagespec.rs
index de365a9..6a4b35d 100644
--- a/src/pagespec.rs
+++ b/src/pagespec.rs
@@ -34,7 +34,8 @@ impl PageSpec {
pub fn matches(&self, page_path: &Path) -> bool {
trace!(
"PageSpec::matches: container={} page_path={}",
- self.container.display(), page_path.display()
+ self.container.display(),
+ page_path.display()
);
assert!(page_path.is_absolute());
if let Ok(path) = page_path.strip_prefix(&self.container) {
@@ -100,12 +101,11 @@ fn glob_matches_helper(mut glob: &[char], mut path: &[char]) -> bool {
if glob_remain.is_empty() {
return true;
}
- let mut path_remain = &path[..];
- while !path_remain.is_empty() {
- if glob_matches_helper(&glob[1..], path_remain) {
+ while !path.is_empty() {
+ if glob_matches_helper(&glob[1..], path) {
return true;
}
- path_remain = &path_remain[1..];
+ path = &path[1..];
}
return false;
}
@@ -120,7 +120,7 @@ fn glob_matches_helper(mut glob: &[char], mut path: &[char]) -> bool {
}
}
- while let Some('*') = glob.get(0) {
+ while let Some('*') = glob.first() {
glob = &glob[1..];
}
diff --git a/src/site.rs b/src/site.rs
index 571bc11..874f3c8 100644
--- a/src/site.rs
+++ b/src/site.rs
@@ -3,25 +3,22 @@ use crate::git::git_whatchanged;
use crate::name::{Name, NameBuilder, Names};
use crate::page::{MarkdownPage, UnprocessedPage, WikitextPage};
use crate::parser::WikitextParser;
+use crate::srcdir::{PathFilter, SourceDir};
use crate::token::TokenPatterns;
use crate::util::make_relative_link;
use log::{debug, info, trace};
-use std::collections::{BinaryHeap, HashMap};
+use std::collections::HashMap;
use std::path::{Path, PathBuf};
-use std::time::SystemTime;
-use walkdir::WalkDir;
+use std::time::UNIX_EPOCH;
pub struct Site {
+ patterns: TokenPatterns,
+ shortcuts: HashMap<String, Shortcut>,
builder: NameBuilder,
- wikitext_pages: Vec<WikitextPage>,
unprocessed_pages: Vec<UnprocessedPage>,
markdown_pages: Vec<MarkdownPage>,
+ pages_that_will_exist: PageSet,
files: Names,
- patterns: TokenPatterns,
- name_queue: BinaryHeap<Name>,
- page_queue: PageSet,
- whatchanged: HashMap<PathBuf, SystemTime>,
- shortcuts: HashMap<String, Shortcut>,
}
impl Site {
@@ -34,25 +31,24 @@ impl Site {
{
Self {
builder: NameBuilder::new(srcdir.as_ref(), destdir.as_ref()),
- wikitext_pages: vec![],
unprocessed_pages: vec![],
markdown_pages: vec![],
files: Names::default(),
patterns: TokenPatterns::default(),
- name_queue: BinaryHeap::new(),
- page_queue: PageSet::default(),
- whatchanged: HashMap::new(),
+ pages_that_will_exist: PageSet::default(),
shortcuts: HashMap::new(),
}
}
pub fn scan(&mut self) -> Result<(), SiteError> {
- self.whatchanged = git_whatchanged(self.builder.srcdir())?;
for name in self.all_files()? {
trace!("scan: name={}", name);
if name.is_wikitext_page() {
trace!("scan: it's a page");
- self.name_queue.push(name);
+ debug!("loading wikitext page {}", name.source_path().display());
+ let page = WikitextPage::read(&name)?;
+ self.files.insert(name);
+ self.add_wikitextpage(page)?;
} else {
trace!("scan: it's a non-page file");
let filename = name.source_path();
@@ -64,10 +60,18 @@ impl Site {
Ok(())
}
- fn add_wikitextpage(&mut self, page: WikitextPage) {
+ fn add_wikitextpage(&mut self, page: WikitextPage) -> Result<(), SiteError> {
info!("add wikitext page {}", page.meta().path().display());
- self.page_queue.insert(&page);
- self.wikitext_pages.push(page);
+ self.pages_that_will_exist.insert(&page);
+
+ debug!("parsing wikitext page {}", page.meta().path().display());
+ let mut parser = WikitextParser::new(page.wikitext(), &self.patterns);
+ let page = UnprocessedPage::new(page.meta().clone(), &mut parser)?;
+ page.prepare(self)?;
+
+ self.unprocessed_pages.push(page);
+
+ Ok(())
}
fn add_other_file(&mut self, name: Name) {
@@ -78,10 +82,7 @@ impl Site {
pub fn process(&mut self) -> Result<(), SiteError> {
trace!("processing queues");
loop {
- if !self.process_name()?
- && !self.process_wikipage()?
- && !self.process_unrocessed_page()?
- {
+ if !self.process_page()? {
trace!("processing queues done");
break;
}
@@ -89,45 +90,7 @@ impl Site {
Ok(())
}
- fn process_name(&mut self) -> Result<bool, SiteError> {
- if let Some(name) = self.name_queue.pop() {
- debug!("loading wikitext page {}", name.source_path().display());
- let mut page = WikitextPage::read(&name)?;
- if let Some(mtime) = self.git_commit_timestamp(&name) {
- page.meta_mut().set_mtime(mtime);
- }
- self.files.insert(name);
- self.add_wikitextpage(page);
- Ok(true)
- } else {
- trace!("name_queue was empty");
- Ok(false)
- }
- }
-
- fn git_commit_timestamp(&self, name: &Name) -> Option<SystemTime> {
- let relative = name
- .source_path()
- .strip_prefix(&self.builder.srcdir())
- .unwrap();
- self.whatchanged.get(relative).copied()
- }
-
- fn process_wikipage(&mut self) -> Result<bool, SiteError> {
- if let Some(page) = self.wikitext_pages.pop() {
- debug!("processing wikitext page {}", page.meta().path().display());
- let mut parser = WikitextParser::new(page.wikitext(), &self.patterns);
- let page = UnprocessedPage::new(page.meta().clone(), &mut parser)?;
- page.prepare(self)?;
- self.unprocessed_pages.push(page);
- Ok(true)
- } else {
- trace!("wikitext_ages was empty");
- Ok(false)
- }
- }
-
- fn process_unrocessed_page(&mut self) -> Result<bool, SiteError> {
+ fn process_page(&mut self) -> Result<bool, SiteError> {
if let Some(page) = self.unprocessed_pages.pop() {
debug!(
"processing unprocessed page {}",
@@ -137,7 +100,7 @@ impl Site {
self.markdown_pages.push(page);
Ok(true)
} else {
- trace!("unprocessed_ages was empty");
+ trace!("no pages to process");
Ok(false)
}
}
@@ -151,50 +114,32 @@ impl Site {
}
pub fn pages_and_files(&self) -> impl Iterator<Item = &Name> {
- self.files.iter().chain(self.name_queue.iter())
+ self.files.iter()
}
fn all_files(&self) -> Result<Vec<Name>, SiteError> {
+ let whatchanged = git_whatchanged(self.builder.srcdir())?;
+
+ let mut srcdir = SourceDir::new(self.builder.srcdir());
+ srcdir.scan()?;
+
+ let filter = PathFilter::new(Self::EXCLUDE_SUBSTRINGS, Self::EXCLUDE_ENDS);
+
let mut names = vec![];
- let root = self.builder.srcdir();
- trace!("all_files: root={}", root.display());
- for e in WalkDir::new(root) {
- let e = e.map_err(|err| SiteError::WalkDir(root.to_path_buf(), err))?;
- let path = e.path();
- trace!("all_files: path={}", path.display());
- if Self::is_excluded(path) {
- debug!("exclude {}", path.display());
+ for path in srcdir.files().iter().filter(|x| filter.is_included(x)) {
+ let relative = path.strip_prefix(&self.builder.srcdir()).unwrap();
+ let mtime = whatchanged.get(relative).copied().unwrap_or(UNIX_EPOCH);
+ if Self::is_markdown(path) {
+ names.push(self.builder.page(path, mtime));
+ } else if path.is_file() {
+ names.push(self.builder.file(path, mtime));
} else {
- debug!("include {}", path.display());
- if Self::is_markdown(path) {
- trace!("it's markdown");
- names.push(self.builder.page(path));
- } else if path.is_file() {
- trace!("it's not markdown");
- names.push(self.builder.file(path));
- } else {
- trace!("it's not a file");
- }
+ trace!("not a file, ignoring: {}", path.display());
}
}
Ok(names)
}
- fn is_excluded(path: &Path) -> bool {
- let path = path.to_string_lossy();
- for pat in Self::EXCLUDE_ENDS {
- if path.ends_with(pat) {
- return true;
- }
- }
- for pat in Self::EXCLUDE_SUBSTRINGS {
- if path.contains(pat) {
- return true;
- }
- }
- false
- }
-
fn is_markdown(path: &Path) -> bool {
if let Some(ext) = path.extension() {
ext == "mdwn"
@@ -228,7 +173,7 @@ impl Site {
// Is target absolute?
if target.starts_with("/") {
- if let Some(path) = self.page_queue.get(target) {
+ if let Some(path) = self.pages_that_will_exist.get(target) {
trace!("absolute target exists");
return Ok(path.into());
} else {
@@ -240,7 +185,7 @@ impl Site {
// Does a sub-page or file exist?
let wanted = page.join(target);
trace!("checking for subpage or file {}", wanted.display());
- if let Some(path) = self.page_queue.get(&wanted) {
+ if let Some(path) = self.pages_that_will_exist.get(&wanted) {
trace!("subpage exists: {}", path.display());
return Ok(path.into());
} else if self.file_exists(&wanted) {
@@ -256,7 +201,7 @@ impl Site {
parent.display(),
path.display()
);
- if let Some(path) = self.page_queue.get(path.as_path()) {
+ if let Some(path) = self.pages_that_will_exist.get(path.as_path()) {
trace!("sibling page exists: {}", path.display());
return Ok(path.into());
}
@@ -270,7 +215,7 @@ impl Site {
// Does target exist relative to root?
let wanted = Path::new("/").join(target);
trace!("checking for absolute path {}", wanted.display());
- if let Some(path) = self.page_queue.get(&wanted) {
+ if let Some(path) = self.pages_that_will_exist.get(&wanted) {
trace!("page at absolute path exists: {}", path.display());
return Ok(path.into());
} else if self.file_exists(&wanted) {
@@ -361,7 +306,7 @@ mod test {
use crate::page::MetaBuilder;
use std::{
path::{Path, PathBuf},
- time::SystemTime,
+ time::{SystemTime, UNIX_EPOCH},
};
fn site() -> Site {
@@ -373,7 +318,7 @@ mod test {
}
fn page(path: &str) -> WikitextPage {
- let name = builder().page(Path::new(path));
+ let name = builder().page(Path::new(path), UNIX_EPOCH);
let mtime = SystemTime::now();
let meta = MetaBuilder::default().name(name).mtime(mtime).build();
WikitextPage::new(meta, "".into())
@@ -387,7 +332,7 @@ mod test {
#[test]
fn absolute_link_resolves_to_link_relative_root_of_site() {
let mut site = site();
- site.add_wikitextpage(page("/src/yo/yoyo"));
+ site.add_wikitextpage(page("/src/yo/yoyo")).unwrap();
assert_eq!(
site.resolve("/foo/bar", "/yo/yoyo").unwrap(),
Path::new("../yo/yoyo")
@@ -409,7 +354,7 @@ mod test {
#[test]
fn link_to_sibling_resolves_to_it() {
let mut site = site();
- site.add_wikitextpage(page("/src/foo/yo"));
+ site.add_wikitextpage(page("/src/foo/yo")).unwrap();
site.process().unwrap();
assert_eq!(site.resolve("/foo/bar", "yo").unwrap(), Path::new("yo"));
}
@@ -417,7 +362,7 @@ mod test {
#[test]
fn link_using_other_casing_is_resolved() {
let mut site = site();
- site.add_wikitextpage(page("/src/foo/yo"));
+ site.add_wikitextpage(page("/src/foo/yo")).unwrap();
site.process().unwrap();
assert_eq!(site.resolve("/foo/bar", "YO").unwrap(), Path::new("yo"));
}
@@ -425,7 +370,7 @@ mod test {
#[test]
fn link_to_sublpage_resolves_to_it() {
let mut site = site();
- site.add_wikitextpage(page("/src/foo/bar/yo"));
+ site.add_wikitextpage(page("/src/foo/bar/yo")).unwrap();
site.process().unwrap();
assert_eq!(site.resolve("/foo/bar", "yo").unwrap(), Path::new("bar/yo"));
}
@@ -433,8 +378,8 @@ mod test {
#[test]
fn link_to_sublpage_resolves_to_it_and_not_sibling() {
let mut site = site();
- site.add_wikitextpage(page("/src/foo/bar/yo"));
- site.add_wikitextpage(page("/src/foo/yo"));
+ site.add_wikitextpage(page("/src/foo/bar/yo")).unwrap();
+ site.add_wikitextpage(page("/src/foo/yo")).unwrap();
site.process().unwrap();
assert_eq!(site.resolve("/foo/bar", "yo").unwrap(), Path::new("bar/yo"));
}
@@ -455,7 +400,7 @@ mod test {
#[test]
fn link_to_subsubpage_resolves_to_it() {
let mut site = site();
- site.add_wikitextpage(page("/src/foo/bar/yo/yoyo"));
+ site.add_wikitextpage(page("/src/foo/bar/yo/yoyo")).unwrap();
site.process().unwrap();
assert_eq!(
site.resolve("/foo/bar", "yo/yoyo").unwrap(),
@@ -466,7 +411,7 @@ mod test {
#[test]
fn link_to_sibling_file_resolves_to_it() {
let mut site = site();
- let name = builder().file(Path::new("/src/foo/bar.jpg"));
+ let name = builder().file(Path::new("/src/foo/bar.jpg"), UNIX_EPOCH);
site.add_other_file(name);
site.process().unwrap();
assert_eq!(
@@ -478,7 +423,7 @@ mod test {
#[test]
fn link_relative_to_root_as_last_resort() {
let mut site = site();
- site.add_wikitextpage(page("/src/foo.mdwn"));
+ site.add_wikitextpage(page("/src/foo.mdwn")).unwrap();
site.process().unwrap();
assert_eq!(
site.resolve("/yo/yoyo/yoyoyo", "foo").unwrap(),
diff --git a/src/srcdir.rs b/src/srcdir.rs
new file mode 100644
index 0000000..66d2a04
--- /dev/null
+++ b/src/srcdir.rs
@@ -0,0 +1,140 @@
+use crate::error::SiteError;
+use log::trace;
+use std::path::{Path, PathBuf};
+use walkdir::WalkDir;
+
+pub struct SourceDir {
+ path: PathBuf,
+ files: Vec<PathBuf>,
+ artificial: bool,
+}
+
+impl SourceDir {
+ pub fn new(path: &Path) -> Self {
+ Self {
+ path: path.into(),
+ files: vec![],
+ artificial: false,
+ }
+ }
+
+ pub fn insert_for_tests<P>(&mut self, path: P)
+ where
+ P: AsRef<Path>,
+ {
+ self.artificial = true;
+ self.insert(path.as_ref());
+ }
+
+ pub fn insert(&mut self, path: &Path) {
+ trace!("Source Dir::insert: path={}", path.display());
+ self.files.push(path.into());
+ }
+
+ pub fn scan(&mut self) -> Result<(), SiteError> {
+ if self.artificial {
+ trace!("SourceDir::scan: artificial mode, not actually scanning");
+ } else {
+ trace!("SourceDir::scan: find files in {}", self.path.display());
+ for e in WalkDir::new(&self.path) {
+ let e = e.map_err(|err| SiteError::WalkDir(self.path.clone(), err))?;
+ let path = e.path();
+ trace!("SourceDir::scan: found {}", path.display());
+ self.insert(path);
+ }
+ }
+ Ok(())
+ }
+
+ pub fn files(&self) -> &[PathBuf] {
+ &self.files
+ }
+}
+
+#[derive(Default)]
+pub struct PathFilter {
+ excluded_substrings: Vec<&'static str>,
+ excluded_suffixes: Vec<&'static str>,
+}
+
+impl PathFilter {
+ pub fn new(subs: &[&'static str], suffixes: &[&'static str]) -> Self {
+ Self {
+ excluded_substrings: subs.to_vec(),
+ excluded_suffixes: suffixes.to_vec(),
+ }
+ }
+
+ pub fn exclude_substring(&mut self, s: &'static str) {
+ self.excluded_substrings.push(s);
+ }
+
+ pub fn exclude_suffix(&mut self, s: &'static str) {
+ self.excluded_suffixes.push(s);
+ }
+
+ pub fn is_included<P>(&self, path: P) -> bool
+ where
+ P: AsRef<Path>,
+ {
+ let path = path.as_ref();
+ let include = {
+ let path = path.to_string_lossy();
+ for pat in self.excluded_suffixes.iter() {
+ if path.ends_with(pat) {
+ return false;
+ }
+ }
+ for pat in self.excluded_substrings.iter() {
+ if path.contains(pat) {
+ return false;
+ }
+ }
+ true
+ };
+ if include {
+ trace!("include {}", path.display());
+ } else {
+ trace!("exclude {}", path.display());
+ }
+ include
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::PathFilter;
+
+ #[test]
+ fn includes_dotgit_by_default() {
+ let filter = PathFilter::default();
+ assert!(filter.is_included(".git"));
+ }
+
+ #[test]
+ fn excludes_dotgit_if_requested() {
+ let mut filter = PathFilter::default();
+ filter.exclude_substring(".git");
+ assert!(!filter.is_included(".git"));
+ }
+
+ #[test]
+ fn includes_footilde_by_default() {
+ let filter = PathFilter::default();
+ assert!(filter.is_included("foo~"));
+ }
+
+ #[test]
+ fn includes_footildebar_if_tilde_suffix_is_excluded() {
+ let mut filter = PathFilter::default();
+ filter.exclude_suffix("~");
+ assert!(filter.is_included("foo~bar"));
+ }
+
+ #[test]
+ fn excludes_footilde_if_tilde_suffix_is_excluded() {
+ let mut filter = PathFilter::default();
+ filter.exclude_suffix("~");
+ assert!(!filter.is_included("foo~"));
+ }
+}
diff --git a/src/time.rs b/src/time.rs
index 2728701..dce3ecc 100644
--- a/src/time.rs
+++ b/src/time.rs
@@ -29,18 +29,22 @@ fn parse(timestamp: &str) -> Result<OffsetDateTime, SiteError> {
format_description!("[year]-[month]-[day] [hour]:[minute]:[second]");
const SIMPLIFIED_ISO9601_MIN: &[FormatItem<'static>] =
format_description!("[year]-[month]-[day] [hour]:[minute]");
- const SIMPLIFIED_ISO9601_TZ: &[FormatItem<'static>] =
- format_description!("[year]-[month]-[day] [hour]:[minute]:[second] [offset_hour][offset_minute]");
+ const SIMPLIFIED_ISO9601_TZ: &[FormatItem<'static>] = format_description!(
+ "[year]-[month]-[day] [hour]:[minute]:[second] [offset_hour][offset_minute]"
+ );
const SIMPLIFIED_ISO9601_MIN_TZ: &[FormatItem<'static>] =
format_description!("[year]-[month]-[day] [hour]:[minute] [offset_hour][offset_minute]");
if let Ok(t) = parse_one_time_format(timestamp, "simplified", SIMPLIFIED_ISO9601) {
Ok(t)
- } else if let Ok(t) = parse_one_time_format(timestamp, "simplified-min", SIMPLIFIED_ISO9601_MIN) {
+ } else if let Ok(t) = parse_one_time_format(timestamp, "simplified-min", SIMPLIFIED_ISO9601_MIN)
+ {
Ok(t)
} else if let Ok(t) = parse_one_time_format(timestamp, "simplified-tz", SIMPLIFIED_ISO9601_TZ) {
Ok(t)
- } else if let Ok(t) = parse_one_time_format(timestamp, "simplified-tz", SIMPLIFIED_ISO9601_MIN_TZ) {
+ } else if let Ok(t) =
+ parse_one_time_format(timestamp, "simplified-tz", SIMPLIFIED_ISO9601_MIN_TZ)
+ {
Ok(t)
} else if let Ok(t) = parse_one_time_format(timestamp, "ISO8601", &Iso8601::PARSING) {
Ok(t)
diff --git a/src/token.rs b/src/token.rs
index 0190edc..f6fb92c 100644
--- a/src/token.rs
+++ b/src/token.rs
@@ -106,10 +106,10 @@ impl<'a> TokenParser<'a> {
}
fn regex(&mut self, pattern: &Regex) -> Option<String> {
-// trace!("matching regex {}", pattern.as_str());
+ // trace!("matching regex {}", pattern.as_str());
if let Some(m) = pattern.find(self.input) {
if m.start() == 0 {
-// trace!("match at beginning");
+ // trace!("match at beginning");
let captures = pattern.captures(self.input).unwrap();
let m = if let Some(value) = captures.name("value") {
self.input = &self.input[m.end()..];
@@ -121,7 +121,7 @@ impl<'a> TokenParser<'a> {
return Some(m.as_str().to_string());
}
}
-// trace!("no match at beginning");
+ // trace!("no match at beginning");
None
}
}
diff --git a/src/wikitext.rs b/src/wikitext.rs
index 79f311f..81514f0 100644
--- a/src/wikitext.rs
+++ b/src/wikitext.rs
@@ -5,6 +5,7 @@ use crate::site::Site;
use log::trace;
use std::collections::HashMap;
use std::path::Path;
+
#[derive(Debug, Eq, PartialEq)]
pub enum Snippet {
Markdown(String),