From e849cf9d8f5d93166722f47891813cd99ef8f352 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Sun, 14 May 2023 11:10:25 +0300 Subject: refactor: make Site::markdown_pages be a hash map instead of vector Sponsored-by: author --- src/bin/riki.rs | 2 +- src/directive/inline.rs | 1 - src/directive/map.rs | 1 - src/directive/pagestats.rs | 1 - src/site.rs | 15 ++++++++------- 5 files changed, 9 insertions(+), 11 deletions(-) diff --git a/src/bin/riki.rs b/src/bin/riki.rs index d219803..946051f 100644 --- a/src/bin/riki.rs +++ b/src/bin/riki.rs @@ -109,7 +109,7 @@ impl Build { let mut site = Site::new(&srcdir, &destdir); site.scan()?; site.process()?; - debug!("markdown file count: {}", site.markdown_pages().len()); + debug!("markdown file count: {}", site.markdown_pages().count()); for page in site.markdown_pages() { let htmlpage = if self.plain_body { diff --git a/src/directive/inline.rs b/src/directive/inline.rs index 973890b..7c0114b 100644 --- a/src/directive/inline.rs +++ b/src/directive/inline.rs @@ -41,7 +41,6 @@ impl DirectiveImplementation for Inline { let pagespec = PageSpec::new(meta.path(), &self.pages).map_err(DirectiveError::PageSpec)?; let matches: Vec = site .markdown_pages() - .iter() .filter(|page| pagespec.matches(site, page.meta().path())) .map(|page| format!("* {}\n", Self::link(meta.path(), page.meta()))) .collect(); diff --git a/src/directive/map.rs b/src/directive/map.rs index a2f43a6..a7ee3b2 100644 --- a/src/directive/map.rs +++ b/src/directive/map.rs @@ -26,7 +26,6 @@ impl DirectiveImplementation for Map { let pagespec = PageSpec::new(meta.path(), &self.pages).map_err(DirectiveError::PageSpec)?; let matches: Vec = site .markdown_pages() - .iter() .filter(|page| pagespec.matches(site, page.meta().path())) .map(|page| format!("* {}\n", Self::link(meta.path(), page.meta()))) .collect(); diff --git a/src/directive/pagestats.rs b/src/directive/pagestats.rs index 8c7feec..0d34a77 100644 --- a/src/directive/pagestats.rs +++ b/src/directive/pagestats.rs @@ -26,7 +26,6 @@ impl DirectiveImplementation for PageStats { let pagespec = PageSpec::new(meta.path(), &self.pages).map_err(DirectiveError::PageSpec)?; let matches: Vec = site .markdown_pages() - .iter() .filter(|page| pagespec.matches(site, page.meta().path())) .map(|page| format!("* {}\n", Self::link(meta.path(), page.meta()))) .collect(); diff --git a/src/site.rs b/src/site.rs index cfd5017..64b9602 100644 --- a/src/site.rs +++ b/src/site.rs @@ -45,7 +45,7 @@ pub struct Site { shortcuts: HashMap, name_builder: NameBuilder, pages: Vec, - markdown_pages: Vec, + markdown_pages: HashMap, pages_that_will_exist: PageSet, files: Names, } @@ -61,7 +61,7 @@ impl Site { Self { name_builder: NameBuilder::new(srcdir.as_ref(), destdir.as_ref()), pages: vec![], - markdown_pages: vec![], + markdown_pages: HashMap::new(), files: Names::default(), patterns: TokenPatterns::default(), pages_that_will_exist: PageSet::default(), @@ -127,7 +127,7 @@ impl Site { page.meta().path().display() ); let page = page.markdown(self)?; - self.markdown_pages.push(page); + self.markdown_pages.insert(page.meta().path().into(), page); Ok(true) } else { trace!("no pages to process"); @@ -135,8 +135,8 @@ impl Site { } } - pub fn markdown_pages(&self) -> &[MarkdownPage] { - &self.markdown_pages + pub fn markdown_pages(&self) -> impl Iterator { + self.markdown_pages.iter().map(|(_, md)| md) } pub fn files_only(&self) -> impl Iterator { @@ -154,7 +154,8 @@ impl Site { pub fn page(&self, path: &Path) -> Option<&MarkdownPage> { self.markdown_pages .iter() - .find(|&page| page.meta().path() == path) + .find(|(page_path, _)| page_path.as_path() == path) + .map(|(_, md)| md) } fn all_files(&self) -> Result, SiteError> { @@ -371,7 +372,7 @@ mod test { #[test] fn has_no_pages_initially() { - assert_eq!(site().markdown_pages().to_vec(), vec![]); + assert_eq!(site().markdown_pages().count(), 0); } #[test] -- cgit v1.2.1 From 5401d34c7975dbbbbbd833240bcab98dc3e38f65 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Sun, 14 May 2023 11:23:42 +0300 Subject: refactor: rename and turn Site::pages into HashMap Sponsored-by: author --- src/lib.rs | 2 +- src/site.rs | 22 ++++++++++++++++------ 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index be6e4a9..bca0992 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -20,5 +20,5 @@ pub mod srcdir; pub mod time; pub mod token; pub mod util; -pub mod wikitext; pub mod version; +pub mod wikitext; diff --git a/src/site.rs b/src/site.rs index 64b9602..ebbd339 100644 --- a/src/site.rs +++ b/src/site.rs @@ -44,7 +44,7 @@ pub struct Site { patterns: TokenPatterns, shortcuts: HashMap, name_builder: NameBuilder, - pages: Vec, + unprocessed_pages: HashMap, markdown_pages: HashMap, pages_that_will_exist: PageSet, files: Names, @@ -60,7 +60,7 @@ impl Site { { Self { name_builder: NameBuilder::new(srcdir.as_ref(), destdir.as_ref()), - pages: vec![], + unprocessed_pages: HashMap::new(), markdown_pages: HashMap::new(), files: Names::default(), patterns: TokenPatterns::default(), @@ -99,7 +99,8 @@ impl Site { page.prepare(self)?; let page = Page::new(page.meta().clone(), page); - self.pages.push(page); + self.unprocessed_pages + .insert(page.meta().path().into(), page); Ok(()) } @@ -121,7 +122,7 @@ impl Site { } fn process_page(&mut self) -> Result { - if let Some(page) = self.pages.pop() { + if let Some(page) = self.pick_unprocessed_page() { debug!( "processing unprocessed page {}", page.meta().path().display() @@ -135,8 +136,17 @@ impl Site { } } - pub fn markdown_pages(&self) -> impl Iterator { - self.markdown_pages.iter().map(|(_, md)| md) + fn pick_unprocessed_page(&mut self) -> Option { + let mut keys: Vec = self.unprocessed_pages.keys().map(|k| k.into()).collect(); + if let Some(path) = keys.pop() { + self.unprocessed_pages.remove(&path) + } else { + None + } + } + + pub fn markdown_pages(&self) -> impl Iterator { + self.markdown_pages.values() } pub fn files_only(&self) -> impl Iterator { -- cgit v1.2.1 From e5eee5a3f3327bb9248c9ca47f7e96e3883c965d Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Sun, 14 May 2023 11:25:06 +0300 Subject: refactor: use other page collections for Site::is_page Sponsored-by: author --- src/site.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/site.rs b/src/site.rs index ebbd339..58fb374 100644 --- a/src/site.rs +++ b/src/site.rs @@ -158,7 +158,7 @@ impl Site { } pub fn is_page(&self, path: &Path) -> bool { - self.pages_that_will_exist.get_path(path).is_some() + self.unprocessed_pages.contains_key(path) || self.markdown_pages.contains_key(path) } pub fn page(&self, path: &Path) -> Option<&MarkdownPage> { -- cgit v1.2.1 From dd5e6c9bab1926836d5989dd9fb153674e8b2ac8 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Sun, 14 May 2023 12:00:15 +0300 Subject: refactor: add a new set-of-pages abstraction Sponsored-by: author --- src/page.rs | 4 +-- src/site.rs | 109 +++++++++++++++++++++++++++++++++----------------------- src/wikitext.rs | 6 ++-- 3 files changed, 69 insertions(+), 50 deletions(-) diff --git a/src/page.rs b/src/page.rs index 485632d..77a06e6 100644 --- a/src/page.rs +++ b/src/page.rs @@ -31,7 +31,7 @@ pub enum PageError { Parser(#[from] crate::parser::ParserError), } -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct Page { meta: PageMeta, unprocessed: UnprocessedPage, @@ -90,7 +90,7 @@ impl WikitextPage { } } -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct UnprocessedPage { meta: PageMeta, snippets: Vec, diff --git a/src/site.rs b/src/site.rs index 58fb374..5196b5c 100644 --- a/src/site.rs +++ b/src/site.rs @@ -44,9 +44,8 @@ pub struct Site { patterns: TokenPatterns, shortcuts: HashMap, name_builder: NameBuilder, - unprocessed_pages: HashMap, - markdown_pages: HashMap, - pages_that_will_exist: PageSet, + unprocessed_pages: PageSet, + markdown_pages: PageSet, files: Names, } @@ -60,11 +59,10 @@ impl Site { { Self { name_builder: NameBuilder::new(srcdir.as_ref(), destdir.as_ref()), - unprocessed_pages: HashMap::new(), - markdown_pages: HashMap::new(), + unprocessed_pages: PageSet::default(), + markdown_pages: PageSet::default(), files: Names::default(), patterns: TokenPatterns::default(), - pages_that_will_exist: PageSet::default(), shortcuts: HashMap::new(), } } @@ -91,7 +89,6 @@ impl Site { fn add_wikitextpage(&mut self, page: WikitextPage) -> Result<(), SiteError> { info!("add wikitext page {}", page.meta().path().display()); - self.pages_that_will_exist.insert(&page); trace!("parsing wikitext page {}", page.meta().path().display()); let mut parser = WikitextParser::new(page.wikitext(), &self.patterns); @@ -100,7 +97,7 @@ impl Site { let page = Page::new(page.meta().clone(), page); self.unprocessed_pages - .insert(page.meta().path().into(), page); + .insert(page.meta().path(), page.clone()); Ok(()) } @@ -122,13 +119,13 @@ impl Site { } fn process_page(&mut self) -> Result { - if let Some(page) = self.pick_unprocessed_page() { + if let Some(page) = self.unprocessed_pages.remove_random_page() { debug!( "processing unprocessed page {}", page.meta().path().display() ); let page = page.markdown(self)?; - self.markdown_pages.insert(page.meta().path().into(), page); + self.markdown_pages.insert(page.meta().path(), page.clone()); Ok(true) } else { trace!("no pages to process"); @@ -136,17 +133,8 @@ impl Site { } } - fn pick_unprocessed_page(&mut self) -> Option { - let mut keys: Vec = self.unprocessed_pages.keys().map(|k| k.into()).collect(); - if let Some(path) = keys.pop() { - self.unprocessed_pages.remove(&path) - } else { - None - } - } - pub fn markdown_pages(&self) -> impl Iterator { - self.markdown_pages.values() + self.markdown_pages.pages() } pub fn files_only(&self) -> impl Iterator { @@ -158,14 +146,11 @@ impl Site { } pub fn is_page(&self, path: &Path) -> bool { - self.unprocessed_pages.contains_key(path) || self.markdown_pages.contains_key(path) + self.unprocessed_pages.contains(path) || self.markdown_pages.contains(path) } pub fn page(&self, path: &Path) -> Option<&MarkdownPage> { - self.markdown_pages - .iter() - .find(|(page_path, _)| page_path.as_path() == path) - .map(|(_, md)| md) + self.markdown_pages.get_page(path) } fn all_files(&self) -> Result, SiteError> { @@ -224,9 +209,9 @@ impl Site { // Is target absolute? if target.starts_with("/") { - if let Some(path) = self.pages_that_will_exist.get_path(target) { + if self.is_page(target) { trace!("absolute target exists"); - return Ok(path.into()); + return Ok(target.into()); } else { trace!("absolute target does not exist"); return Err(SiteError::PageMissing(page.into(), target.into())); @@ -236,9 +221,9 @@ impl Site { // Does a sub-page or file exist? let wanted = page.join(target); trace!("checking for subpage or file {}", wanted.display()); - if let Some(path) = self.pages_that_will_exist.get_path(&wanted) { - trace!("subpage exists: {}", path.display()); - return Ok(path.into()); + if self.is_page(&wanted) { + trace!("subpage exists: {}", wanted.display()); + return Ok(wanted); } else if self.file_exists(&wanted) { trace!("subpage file exists: {}", wanted.display()); return Ok(wanted); @@ -252,9 +237,13 @@ impl Site { parent.display(), path.display() ); - if let Some(path) = self.pages_that_will_exist.get_path(path.as_path()) { - trace!("sibling page exists: {}", path.display()); - return Ok(path.into()); + if let Some(actual) = self.unprocessed_pages.get_path(&path) { + trace!("sibling page exists: {}", actual.display()); + return Ok(actual.into()); + } + if let Some(actual) = self.markdown_pages.get_path(&path) { + trace!("sibling page exists: {}", actual.display()); + return Ok(actual.into()); } // trace!("consider files: {:?}", self.files); if self.file_exists(&path) { @@ -266,9 +255,9 @@ impl Site { // Does target exist relative to root? let wanted = Path::new("/").join(target); trace!("checking for absolute path {}", wanted.display()); - if let Some(path) = self.pages_that_will_exist.get_path(&wanted) { - trace!("page at absolute path exists: {}", path.display()); - return Ok(path.into()); + if self.is_page(&wanted) { + trace!("page at absolute path exists: {}", wanted.display()); + return Ok(wanted); } else if self.file_exists(&wanted) { trace!("file at absolute path exists: {}", wanted.display()); return Ok(wanted); @@ -301,20 +290,42 @@ impl Site { } } -#[derive(Default, Debug)] -struct PageSet { - map: HashMap, +#[derive(Debug)] +struct PageSet { + map: HashMap, } -impl PageSet { - fn insert(&mut self, page: &WikitextPage) { - let path = page.meta().path(); +impl PageSet { + fn insert(&mut self, path: &Path, page: T) { let key = Self::normalize(path); - self.map.insert(key, path.into()); + self.map.insert(key, (path.into(), page)); + } + + fn contains(&self, path: &Path) -> bool { + self.map.contains_key(&Self::normalize(path)) + } + + fn get_page(&self, path: &Path) -> Option<&T> { + self.map.get(&Self::normalize(path)).map(|(_, page)| page) } - fn get_path(&self, path: &Path) -> Option<&Path> { - self.map.get(&Self::normalize(path)).map(|x| x.as_ref()) + fn get_path(&self, wanted: &Path) -> Option<&Path> { + self.map + .get(&Self::normalize(wanted)) + .map(|(path, _)| path.as_path()) + } + + fn pages(&self) -> impl Iterator { + self.map.values().map(|(_, page)| page) + } + + fn remove_random_page(&mut self) -> Option { + let mut keys: Vec = self.map.keys().take(1).map(|k| k.into()).collect(); + if let Some(key) = keys.pop() { + self.map.remove(&key).map(|(_, page)| page) + } else { + None + } } fn normalize(path: &Path) -> String { @@ -322,6 +333,14 @@ impl PageSet { } } +impl Default for PageSet { + fn default() -> Self { + Self { + map: HashMap::default(), + } + } +} + #[derive(Debug, Clone, Eq, PartialEq)] pub struct Shortcut { name: String, diff --git a/src/wikitext.rs b/src/wikitext.rs index 57dff88..4192c85 100644 --- a/src/wikitext.rs +++ b/src/wikitext.rs @@ -14,7 +14,7 @@ pub enum WikitextError { Site(#[from] crate::site::SiteError), } -#[derive(Debug, Eq, PartialEq)] +#[derive(Debug, Clone, Eq, PartialEq)] pub enum Snippet { Markdown(String), WikiLink(WikiLink), @@ -69,7 +69,7 @@ impl Snippet { } } -#[derive(Debug, Eq, PartialEq)] +#[derive(Debug, Clone, Eq, PartialEq)] pub struct WikiLink { link_text: String, target: String, @@ -92,7 +92,7 @@ impl WikiLink { } } -#[derive(Debug, Eq, PartialEq)] +#[derive(Debug, Clone, Eq, PartialEq)] pub struct ParsedDirective { name: String, args: HashMap, -- cgit v1.2.1