diff options
Diffstat (limited to 'src/fsiter.rs')
-rw-r--r-- | src/fsiter.rs | 158 |
1 files changed, 139 insertions, 19 deletions
diff --git a/src/fsiter.rs b/src/fsiter.rs index a40ad34..ef2886d 100644 --- a/src/fsiter.rs +++ b/src/fsiter.rs @@ -1,37 +1,157 @@ -use crate::fsentry::FilesystemEntry; -use log::info; -use std::path::Path; -use walkdir::{IntoIter, WalkDir}; +//! Iterate over directory tree. + +use crate::fsentry::{FilesystemEntry, FsEntryError}; +use log::warn; +use std::path::{Path, PathBuf}; +use users::UsersCache; +use walkdir::{DirEntry, IntoIter, WalkDir}; + +/// Filesystem entry along with additional info about it. +pub struct AnnotatedFsEntry { + /// The file system entry being annotated. + pub inner: FilesystemEntry, + /// Is `entry` a valid CACHEDIR.TAG? + pub is_cachedir_tag: bool, +} /// Iterator over file system entries in a directory tree. pub struct FsIterator { - iter: IntoIter, + iter: SkipCachedirs, +} + +/// Possible errors from iterating over a directory tree. +#[derive(Debug, thiserror::Error)] +pub enum FsIterError { + /// Error from the walkdir crate. + #[error("walkdir failed: {0}")] + WalkDir(walkdir::Error), + + /// Error reading a file's metadata. + #[error("failed to get file system metadata for {0}: {1}")] + Metadata(PathBuf, std::io::Error), + + /// Error related to file system entries. + #[error(transparent)] + FsEntryError(#[from] FsEntryError), } impl FsIterator { - pub fn new(root: &Path) -> Self { + /// Create a new iterator. + pub fn new(root: &Path, exclude_cache_tag_directories: bool) -> Self { Self { - iter: WalkDir::new(root).into_iter(), + iter: SkipCachedirs::new( + WalkDir::new(root).into_iter(), + exclude_cache_tag_directories, + ), } } } impl Iterator for FsIterator { - type Item = Result<FilesystemEntry, anyhow::Error>; + type Item = Result<AnnotatedFsEntry, FsIterError>; fn next(&mut self) -> Option<Self::Item> { - match self.iter.next() { - None => None, - Some(Ok(entry)) => { - info!("found {}", entry.path().display()); - Some(new_entry(&entry)) - } - Some(Err(err)) => Some(Err(err.into())), + self.iter.next() + } +} + +/// Cachedir-aware adaptor for WalkDir: it skips the contents of dirs that contain CACHEDIR.TAG, +/// but still yields entries for the dir and the tag themselves. +struct SkipCachedirs { + cache: UsersCache, + iter: IntoIter, + exclude_cache_tag_directories: bool, + // This is the last tag we've found. `next()` will yield it before asking `iter` for more + // entries. + cachedir_tag: Option<Result<AnnotatedFsEntry, FsIterError>>, +} + +impl SkipCachedirs { + fn new(iter: IntoIter, exclude_cache_tag_directories: bool) -> Self { + Self { + cache: UsersCache::new(), + iter, + exclude_cache_tag_directories, + cachedir_tag: None, + } + } + + fn try_enqueue_cachedir_tag(&mut self, entry: &DirEntry) { + if !self.exclude_cache_tag_directories { + return; + } + + // If this entry is not a directory, it means we already processed its + // parent dir and decided that it's not cached. + if !entry.file_type().is_dir() { + return; + } + + let mut tag_path = entry.path().to_owned(); + tag_path.push("CACHEDIR.TAG"); + + // Tags are required to be regular files -- not even symlinks are allowed. + if !tag_path.is_file() { + return; + }; + + const CACHEDIR_TAG: &[u8] = b"Signature: 8a477f597d28d172789f06886806bc55"; + let mut content = [0u8; CACHEDIR_TAG.len()]; + + let mut file = if let Ok(file) = std::fs::File::open(&tag_path) { + file + } else { + return; + }; + + use std::io::Read; + match file.read_exact(&mut content) { + Ok(_) => (), + // If we can't read the tag file, proceed as if's not there + Err(_) => return, + } + + if content == CACHEDIR_TAG { + self.iter.skip_current_dir(); + self.cachedir_tag = Some(new_entry(&tag_path, true, &mut self.cache)); } } } -fn new_entry(e: &walkdir::DirEntry) -> anyhow::Result<FilesystemEntry> { - let meta = e.metadata()?; - let entry = FilesystemEntry::from_metadata(e.path(), &meta)?; - Ok(entry) +impl Iterator for SkipCachedirs { + type Item = Result<AnnotatedFsEntry, FsIterError>; + + fn next(&mut self) -> Option<Self::Item> { + self.cachedir_tag.take().or_else(|| { + let next = self.iter.next(); + match next { + None => None, + Some(Err(err)) => Some(Err(FsIterError::WalkDir(err))), + Some(Ok(entry)) => { + self.try_enqueue_cachedir_tag(&entry); + Some(new_entry(entry.path(), false, &mut self.cache)) + } + } + }) + } +} + +fn new_entry( + path: &Path, + is_cachedir_tag: bool, + cache: &mut UsersCache, +) -> Result<AnnotatedFsEntry, FsIterError> { + let meta = std::fs::symlink_metadata(path); + let meta = match meta { + Ok(meta) => meta, + Err(err) => { + warn!("failed to get metadata for {}: {}", path.display(), err); + return Err(FsIterError::Metadata(path.to_path_buf(), err)); + } + }; + let entry = FilesystemEntry::from_metadata(path, &meta, cache)?; + let annotated = AnnotatedFsEntry { + inner: entry, + is_cachedir_tag, + }; + Ok(annotated) } |